Files
semantic-turn-taking/test_metrics.json
ModelHub XC a288e309b5 初始化项目,由ModelHub XC社区提供模型
Model: anyreach-ai/semantic-turn-taking
Source: Original Platform
2026-04-29 12:06:38 +08:00

77 lines
2.3 KiB
JSON

{
"model": "semantic-turn-taking/production-v1/ckpt-7000",
"base_model": "Qwen/Qwen2.5-0.5B-Instruct",
"checkpoint_step": 7000,
"benchmarks": {
"ten": {
"dataset": "TEN Turn Detection",
"examples": 528,
"binary_accuracy": 0.869,
"binary_f1_macro": 0.868,
"finished_recall": 0.88,
"unfinished_recall": 0.96,
"wait_recall": 0.66
},
"swda_v2": {
"dataset": "SwDA (improved heuristic v2)",
"examples": 4087,
"four_class_accuracy": 0.6337,
"four_class_f1_macro": 0.4364,
"binary_accuracy": 0.7149,
"binary_f1_macro": 0.7122,
"per_class_accuracy": {
"start_speaking": 0.618,
"continue_listening": 0.479,
"start_listening": 0.0,
"continue_speaking": 0.847
},
"note": "v2 fixes false start_listening from agent backchannels"
},
"internal_test_en": {
"dataset": "Hand-crafted Internal Test (English)",
"examples": 48,
"four_class_accuracy": 0.7708,
"four_class_f1_macro": 0.7421,
"binary_accuracy": 0.9583,
"binary_f1_macro": 0.9583,
"per_class_accuracy": {
"start_speaking": 0.833,
"continue_listening": 1.0,
"start_listening": 0.25,
"continue_speaking": 1.0
}
},
"internal_test_es": {
"dataset": "Hand-crafted Internal Test (Spanish)",
"examples": 48,
"four_class_accuracy": 0.6667,
"four_class_f1_macro": 0.631,
"binary_accuracy": 0.8542,
"binary_f1_macro": 0.8536,
"per_class_accuracy": {
"start_speaking": 0.667,
"continue_listening": 1.0,
"start_listening": 0.167,
"continue_speaking": 0.833
},
"note": "Model was never trained on Spanish — cross-lingual transfer from Qwen2.5"
},
"synthetic_eval": {
"dataset": "Synthetic validation set",
"eval_accuracy": 0.900,
"eval_f1_macro": 0.767
}
},
"livekit_comparison": {
"model": "livekit/turn-detector@v0.4.1-intl",
"ten_binary_accuracy": 0.667,
"ten_binary_f1_macro": 0.592,
"swda_v2_binary_accuracy": 0.367,
"swda_v2_binary_f1_macro": 0.289,
"internal_en_binary_accuracy": 0.708,
"internal_en_binary_f1_macro": 0.681,
"internal_es_binary_accuracy": 0.625,
"internal_es_binary_f1_macro": 0.578
}
}