Skip to content

Experiment Management

Test cases in Strands Evals are organized into Experiment objects. This guide covers practical patterns for managing experiments and test cases.

from strands_evals import Case
# Add metadata for filtering and organization
cases = [
Case(
name="easy-math",
input="What is 2 + 2?",
metadata={
"category": "math",
"difficulty": "easy",
"tags": ["arithmetic"]
}
),
Case(
name="hard-math",
input="Solve x^2 + 5x + 6 = 0",
metadata={
"category": "math",
"difficulty": "hard",
"tags": ["algebra"]
}
)
]
# Filter by metadata
easy_cases = [c for c in cases if c.metadata.get("difficulty") == "easy"]
# Pattern: {category}-{subcategory}-{number}
Case(name="knowledge-geography-001", input="..."),
Case(name="math-arithmetic-001", input="..."),
from strands_evals import Experiment
experiments = {
"baseline": Experiment(cases=baseline_cases, evaluators=[...]),
"with_tools": Experiment(cases=tool_cases, evaluators=[...]),
"edge_cases": Experiment(cases=edge_cases, evaluators=[...])
}
# Run all
for name, exp in experiments.items():
print(f"Running {name}...")
reports = exp.run_evaluations(task_function)
# Merge cases from multiple experiments
combined = Experiment(
cases=exp1.cases + exp2.cases + exp3.cases,
evaluators=[OutputEvaluator()]
)
# Add single case
experiment.cases.append(new_case)
# Add multiple
experiment.cases.extend(additional_cases)
from strands_evals.evaluators import HelpfulnessEvaluator
# Replace evaluators
experiment.evaluators = [
OutputEvaluator(),
HelpfulnessEvaluator()
]

Each case gets a unique session ID automatically:

case = Case(input="test")
print(case.session_id) # Auto-generated UUID
# Or provide custom
case = Case(input="test", session_id="custom-123")
# Good
Case(name="customer-service-refund-request", input="...")
# Less helpful
Case(name="test1", input="...")
Case(
name="complex-query",
input="...",
metadata={
"category": "customer_service",
"difficulty": "medium",
"expected_tools": ["search_orders"],
"created_date": "2025-01-15"
}
)
experiment.to_file("experiment_v1.json")
experiment.to_file("experiment_v2.json")
# Or with timestamps
from datetime import datetime
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
experiment.to_file(f"experiment_{timestamp}.json")