{ "MMLU": { "score": 62.6, "correct": 8786, "total": 14042 }, "GSM8K": { "score": 18.9, "correct": 249, "total": 1319 }, "speed": { "avg_tok_s": 106.3, "max_tok_s": 106.6, "device": "cuda", "gpu": "NVIDIA GeForce RTX 5090", "dtype": "bfloat16" }, "ttft": { "avg_ms": 11.2, "p50_ms": 11.2, "p99_ms": 11.3 }, "vram": { "used_gb": 6.43, "total_gb": 33.67, "utilization_pct": 19.1 } }