29 lines
468 B
JSON
29 lines
468 B
JSON
{
|
|
"MMLU": {
|
|
"score": 62.6,
|
|
"correct": 8786,
|
|
"total": 14042
|
|
},
|
|
"GSM8K": {
|
|
"score": 18.9,
|
|
"correct": 249,
|
|
"total": 1319
|
|
},
|
|
"speed": {
|
|
"avg_tok_s": 106.3,
|
|
"max_tok_s": 106.6,
|
|
"device": "cuda",
|
|
"gpu": "NVIDIA GeForce RTX 5090",
|
|
"dtype": "bfloat16"
|
|
},
|
|
"ttft": {
|
|
"avg_ms": 11.2,
|
|
"p50_ms": 11.2,
|
|
"p99_ms": 11.3
|
|
},
|
|
"vram": {
|
|
"used_gb": 6.43,
|
|
"total_gb": 33.67,
|
|
"utilization_pct": 19.1
|
|
}
|
|
} |