{ "model": "Qwen3-4B", "total_examples": 5000, "exact_match_accuracy": 0.8564, "error_detection": { "precision": 0.9969188634029442, "recall": 0.9921635434412266, "f1": 0.994535519125683, "true_positives": 2912, "false_positives": 9, "true_negatives": 1370, "false_negatives": 23 }, "has_error_accuracy": 0.8041977354322011, "no_error_accuracy": 0.9934735315445975, "inference_speed_tokens_per_sec": 58.816495560243375, "total_inference_time_sec": 1150.6635911464691, "total_tokens_generated": 67678 }