Serialization
Overview
Section titled “Overview”Strands Evals provides JSON serialization for experiments and reports, enabling you to save, load, version, and share evaluation work.
Saving Experiments
Section titled “Saving Experiments”from strands_evals import Experiment
# Save to fileexperiment.to_file("my_experiment.json")experiment.to_file("my_experiment") # .json added automatically
# Relative pathexperiment.to_file("experiments/baseline.json")
# Absolute pathexperiment.to_file("/path/to/experiments/baseline.json")Loading Experiments
Section titled “Loading Experiments”# Load from fileexperiment = Experiment.from_file("my_experiment.json")
print(f"Loaded {len(experiment.cases)} cases")print(f"Evaluators: {[e.get_type_name() for e in experiment.evaluators]}")Custom Evaluators
Section titled “Custom Evaluators”Pass custom evaluator classes when loading:
from strands_evals.evaluators import Evaluator
class CustomEvaluator(Evaluator): def evaluate(self, evaluation_case): # Custom logic return EvaluationOutput(score=1.0, test_pass=True, reason="...")
# Save with custom evaluatorexperiment = Experiment( cases=cases, evaluators=[CustomEvaluator()])experiment.to_file("custom.json")
# Load with custom evaluator classloaded = Experiment.from_file( "custom.json", custom_evaluators=[CustomEvaluator])Dictionary Conversion
Section titled “Dictionary Conversion”# To dictionaryexperiment_dict = experiment.to_dict()
# From dictionaryexperiment = Experiment.from_dict(experiment_dict)
# With custom evaluatorsexperiment = Experiment.from_dict( experiment_dict, custom_evaluators=[CustomEvaluator])Saving Reports
Section titled “Saving Reports”import json
# Run evaluationreports = experiment.run_evaluations(task_function)
# Save reportsfor i, report in enumerate(reports): report_data = { "evaluator": experiment.evaluators[i].get_type_name(), "overall_score": report.overall_score, "scores": report.scores, "test_passes": report.test_passes, "reasons": report.reasons }
with open(f"report_{i}.json", "w") as f: json.dump(report_data, f, indent=2)Versioning Strategies
Section titled “Versioning Strategies”Timestamp Versioning
Section titled “Timestamp Versioning”from datetime import datetime
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")experiment.to_file(f"experiment_{timestamp}.json")Semantic Versioning
Section titled “Semantic Versioning”experiment.to_file("experiment_v1.json")experiment.to_file("experiment_v2.json")Organizing Files
Section titled “Organizing Files”Directory Structure
Section titled “Directory Structure”experiments/├── baseline/│ ├── experiment.json│ └── reports/├── iteration_1/│ ├── experiment.json│ └── reports/└── final/ ├── experiment.json └── reports/Organized Saving
Section titled “Organized Saving”from pathlib import Path
base_dir = Path("experiments/iteration_1")base_dir.mkdir(parents=True, exist_ok=True)
# Save experimentexperiment.to_file(base_dir / "experiment.json")
# Save reportsreports_dir = base_dir / "reports"reports_dir.mkdir(exist_ok=True)Saving Experiments with Reports
Section titled “Saving Experiments with Reports”from pathlib import Pathimport json
def save_with_reports(experiment, reports, base_name): base_path = Path(f"evaluations/{base_name}") base_path.mkdir(parents=True, exist_ok=True)
# Save experiment experiment.to_file(base_path / "experiment.json")
# Save reports for i, report in enumerate(reports): evaluator_name = experiment.evaluators[i].get_type_name() report_data = { "evaluator": evaluator_name, "overall_score": report.overall_score, "pass_rate": sum(report.test_passes) / len(report.test_passes), "scores": report.scores }
with open(base_path / f"report_{evaluator_name}.json", "w") as f: json.dump(report_data, f, indent=2)
# Usagereports = experiment.run_evaluations(task_function)save_with_reports(experiment, reports, "baseline_20250115")Error Handling
Section titled “Error Handling”from pathlib import Path
def safe_load(path, custom_evaluators=None): try: file_path = Path(path)
if not file_path.exists(): raise FileNotFoundError(f"File not found: {path}")
if file_path.suffix != ".json": raise ValueError(f"Expected .json file, got: {file_path.suffix}")
experiment = Experiment.from_file(path, custom_evaluators=custom_evaluators) print(f"✓ Loaded {len(experiment.cases)} cases") return experiment
except Exception as e: print(f"✗ Failed to load: {e}") return NoneBest Practices
Section titled “Best Practices”1. Use Consistent Naming
Section titled “1. Use Consistent Naming”# Goodexperiment.to_file("customer_service_baseline_v1.json")
# Less helpfulexperiment.to_file("test.json")2. Validate After Loading
Section titled “2. Validate After Loading”experiment = Experiment.from_file("experiment.json")
assert len(experiment.cases) > 0, "No cases loaded"assert len(experiment.evaluators) > 0, "No evaluators loaded"3. Include Metadata
Section titled “3. Include Metadata”experiment_data = experiment.to_dict()experiment_data["metadata"] = { "created_date": datetime.now().isoformat(), "description": "Baseline evaluation", "version": "1.0"}
with open("experiment.json", "w") as f: json.dump(experiment_data, f, indent=2)Related Documentation
Section titled “Related Documentation”- Experiment Management: Organize experiments
- Experiment Generator: Generate experiments
- Quickstart Guide: Get started with Strands Evals