{ "host": "pve197", "gpu": "Tesla V100-PCIE-32GB", "vram_gb": 32, "model_alias": "gemma4:31b", "model_tag": "gemma4:31b-it-q4_K_M", "prompt_key": "short", "prompt_chars": 78, "num_predict": 256, "num_ctx": 4096, "runs": [ { "prompt_tokens": 27, "prompt_eval_ms": 665.6, "prefill_tok_per_s": 40.56, "output_tokens": 256, "eval_ms": 164631.1, "decode_tok_per_s": 1.55, "load_ms": 512.6, "total_ms": 166062.7, "harness_wall_s": 166.067, "done_reason": "length" }, { "prompt_tokens": 27, "prompt_eval_ms": 660.3, "prefill_tok_per_s": 40.89, "output_tokens": 256, "eval_ms": 159594.3, "decode_tok_per_s": 1.6, "load_ms": 523.6, "total_ms": 161012.3, "harness_wall_s": 161.016, "done_reason": "length" }, { "prompt_tokens": 27, "prompt_eval_ms": 887.8, "prefill_tok_per_s": 30.41, "output_tokens": 256, "eval_ms": 167584.3, "decode_tok_per_s": 1.53, "load_ms": 486.8, "total_ms": 169188.9, "harness_wall_s": 169.194, "done_reason": "length" } ], "warmup": { "prompt_tokens": 27, "prompt_eval_ms": 6642.4, "prefill_tok_per_s": 4.06, "output_tokens": 256, "eval_ms": 173530.1, "decode_tok_per_s": 1.48, "load_ms": 20142.1, "total_ms": 200836.5, "harness_wall_s": 200.841, "done_reason": "length" }, "summary": { "prefill_tok_per_s": { "min": 30.41, "median": 40.56, "max": 40.89, "n": 3 }, "decode_tok_per_s": { "min": 1.53, "median": 1.55, "max": 1.6, "n": 3 }, "total_ms": { "min": 161012.3, "median": 166062.7, "max": 169188.9, "n": 3 } } }