{ "host": "pve197", "gpu": "Tesla V100-PCIE-32GB", "vram_gb": 32, "model_alias": "gemma4:31b", "model_tag": "gemma4:31b-it-q4_K_M", "prompt_key": "long", "prompt_chars": 1614, "num_predict": 256, "num_ctx": 4096, "runs": [ { "prompt_tokens": 318, "prompt_eval_ms": 728.7, "prefill_tok_per_s": 436.37, "output_tokens": 256, "eval_ms": 163511.0, "decode_tok_per_s": 1.57, "load_ms": 495.0, "total_ms": 164970.4, "harness_wall_s": 164.977, "done_reason": "length" }, { "prompt_tokens": 318, "prompt_eval_ms": 682.8, "prefill_tok_per_s": 465.71, "output_tokens": 256, "eval_ms": 168727.1, "decode_tok_per_s": 1.52, "load_ms": 545.3, "total_ms": 170207.4, "harness_wall_s": 170.214, "done_reason": "length" }, { "prompt_tokens": 318, "prompt_eval_ms": 950.0, "prefill_tok_per_s": 334.75, "output_tokens": 256, "eval_ms": 163102.9, "decode_tok_per_s": 1.57, "load_ms": 507.9, "total_ms": 164801.8, "harness_wall_s": 164.809, "done_reason": "length" } ], "warmup": { "prompt_tokens": 318, "prompt_eval_ms": 3883.3, "prefill_tok_per_s": 81.89, "output_tokens": 256, "eval_ms": 172199.4, "decode_tok_per_s": 1.49, "load_ms": 528.0, "total_ms": 176864.8, "harness_wall_s": 176.871, "done_reason": "length" }, "summary": { "prefill_tok_per_s": { "min": 334.75, "median": 436.37, "max": 465.71, "n": 3 }, "decode_tok_per_s": { "min": 1.52, "median": 1.57, "max": 1.57, "n": 3 }, "total_ms": { "min": 164801.8, "median": 164970.4, "max": 170207.4, "n": 3 } } }