Skip to content

Class peagen.plugins.evaluators.benchmark.PytestBenchmarkEvaluator

peagen.plugins.evaluators.benchmark.PytestBenchmarkEvaluator

PytestBenchmarkEvaluator(**_)

Bases: Evaluator

Run pytest-benchmark and score by median time.

Source code in peagen/plugins/evaluators/base.py
14
15
def __init__(self, **_: Any) -> None:
    self.last_result = None

last_result instance-attribute

last_result = None

run

run(workspace, bench_cmd, runs=1, **kw)
Source code in peagen/plugins/evaluators/benchmark.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
def run(self, workspace: Path, bench_cmd: str, runs: int = 1, **kw: Any) -> float:
    workspace = Path(workspace)
    times: List[float] = []
    bench_args = [
        "pytest",
        "--benchmark-only",
        "--benchmark-json=benchmark.json",
        "--json-report",
        "-q",
    ]
    bench_args.extend(shlex.split(bench_cmd))

    for _ in range(max(1, runs)):
        subprocess.run(bench_args, cwd=workspace, capture_output=True, text=True)
        bench_file = workspace / "benchmark.json"
        if bench_file.exists():
            data = json.loads(bench_file.read_text())
            for entry in data.get("benchmarks", []):
                median_s = entry.get("stats", {}).get("median")
                if isinstance(median_s, (int, float)):
                    times.append(median_s * 1000)
            bench_file.unlink()
        report_file = workspace / ".report.json"
        report_file.unlink(missing_ok=True)

    if not times:
        self.last_result = {"median_ms": None}
        return float("-inf")

    median_ms = statistics.median(times)
    self.last_result = {"median_ms": median_ms, "runs": len(times)}
    return -median_ms