{ "model": "Qwen3-1.7B", "total_examples": 5000, "exact_match_accuracy": 0.8224, "error_detection": { "precision": 0.99457111834962, "recall": 0.9803781662504459, "f1": 0.9874236435501258, "true_positives": 2748, "false_positives": 15, "true_negatives": 1364, "false_negatives": 55 }, "has_error_accuracy": 0.7589063794531897, "no_error_accuracy": 0.9891225525743292, "inference_speed_tokens_per_sec": 75.49899709914501, "total_inference_time_sec": 896.7403886318207, "total_tokens_generated": 67703 }