77 lines
2.3 KiB
JSON
77 lines
2.3 KiB
JSON
{
|
|
"model": "semantic-turn-taking/production-v1/ckpt-7000",
|
|
"base_model": "Qwen/Qwen2.5-0.5B-Instruct",
|
|
"checkpoint_step": 7000,
|
|
"benchmarks": {
|
|
"ten": {
|
|
"dataset": "TEN Turn Detection",
|
|
"examples": 528,
|
|
"binary_accuracy": 0.869,
|
|
"binary_f1_macro": 0.868,
|
|
"finished_recall": 0.88,
|
|
"unfinished_recall": 0.96,
|
|
"wait_recall": 0.66
|
|
},
|
|
"swda_v2": {
|
|
"dataset": "SwDA (improved heuristic v2)",
|
|
"examples": 4087,
|
|
"four_class_accuracy": 0.6337,
|
|
"four_class_f1_macro": 0.4364,
|
|
"binary_accuracy": 0.7149,
|
|
"binary_f1_macro": 0.7122,
|
|
"per_class_accuracy": {
|
|
"start_speaking": 0.618,
|
|
"continue_listening": 0.479,
|
|
"start_listening": 0.0,
|
|
"continue_speaking": 0.847
|
|
},
|
|
"note": "v2 fixes false start_listening from agent backchannels"
|
|
},
|
|
"internal_test_en": {
|
|
"dataset": "Hand-crafted Internal Test (English)",
|
|
"examples": 48,
|
|
"four_class_accuracy": 0.7708,
|
|
"four_class_f1_macro": 0.7421,
|
|
"binary_accuracy": 0.9583,
|
|
"binary_f1_macro": 0.9583,
|
|
"per_class_accuracy": {
|
|
"start_speaking": 0.833,
|
|
"continue_listening": 1.0,
|
|
"start_listening": 0.25,
|
|
"continue_speaking": 1.0
|
|
}
|
|
},
|
|
"internal_test_es": {
|
|
"dataset": "Hand-crafted Internal Test (Spanish)",
|
|
"examples": 48,
|
|
"four_class_accuracy": 0.6667,
|
|
"four_class_f1_macro": 0.631,
|
|
"binary_accuracy": 0.8542,
|
|
"binary_f1_macro": 0.8536,
|
|
"per_class_accuracy": {
|
|
"start_speaking": 0.667,
|
|
"continue_listening": 1.0,
|
|
"start_listening": 0.167,
|
|
"continue_speaking": 0.833
|
|
},
|
|
"note": "Model was never trained on Spanish — cross-lingual transfer from Qwen2.5"
|
|
},
|
|
"synthetic_eval": {
|
|
"dataset": "Synthetic validation set",
|
|
"eval_accuracy": 0.900,
|
|
"eval_f1_macro": 0.767
|
|
}
|
|
},
|
|
"livekit_comparison": {
|
|
"model": "livekit/turn-detector@v0.4.1-intl",
|
|
"ten_binary_accuracy": 0.667,
|
|
"ten_binary_f1_macro": 0.592,
|
|
"swda_v2_binary_accuracy": 0.367,
|
|
"swda_v2_binary_f1_macro": 0.289,
|
|
"internal_en_binary_accuracy": 0.708,
|
|
"internal_en_binary_f1_macro": 0.681,
|
|
"internal_es_binary_accuracy": 0.625,
|
|
"internal_es_binary_f1_macro": 0.578
|
|
}
|
|
}
|