a945207aab
Ran minimal agent loop (Ollama /api/chat + read_file/write_file/run_bash) on steel141 3090 Ti against 3 models on a broken-median-function task: - gemma4:31b-it-q4_K_M: PASS (8 iters, 1 write, 44s) — textbook trace - qwen3-coder:30b: PASS (15 iters, 1 write, 22s) — correct but chatty - gemma4:26b: FAIL (6 iters, 0 writes) — silently stops with eval=4 after reading source. Reproduced on second run. One-shot probe confirms 26b CAN produce the correct fix — failure is specifically at the write_file tool-call argument boundary. Updates GOTCHAS with a new HIGH-severity entry, SYNTHESIS model-selection table, CORPUS_cli_coding_agent.md empirical-follow-up pointer, and adds docs/reference/bakeoff-2026-04-18.md with the full writeup.
36 lines
857 B
Python
36 lines
857 B
Python
"""Basic statistics helpers."""
|
|
|
|
|
|
def mean(numbers):
|
|
"""Arithmetic mean of a non-empty list."""
|
|
return sum(numbers) / len(numbers)
|
|
|
|
|
|
def median(numbers):
|
|
"""Return the median of a list of numbers."""
|
|
s = sorted(numbers)
|
|
n = len(s)
|
|
return s[n // 2]
|
|
|
|
|
|
def mode(numbers):
|
|
"""Return the most common value. Ties broken by first occurrence."""
|
|
counts = {}
|
|
for x in numbers:
|
|
counts[x] = counts.get(x, 0) + 1
|
|
best = None
|
|
best_count = -1
|
|
for x in numbers:
|
|
if counts[x] > best_count:
|
|
best = x
|
|
best_count = counts[x]
|
|
return best
|
|
|
|
|
|
def variance(numbers):
|
|
"""Sample variance (divides by n-1)."""
|
|
if len(numbers) < 2:
|
|
raise ValueError("variance requires at least 2 values")
|
|
m = mean(numbers)
|
|
return sum((x - m) ** 2 for x in numbers) / (len(numbers) - 1)
|