Add C++ runtime for SenseVoice models (#1148)

2024-07-18 22:54:18 +08:00
parent 3bae5c3fe5
commit 25f0a10468
34 changed files with 1160 additions and 39 deletions
--- a/scripts/sense-voice/export-onnx.py
+++ b/scripts/sense-voice/export-onnx.py
@@ -162,7 +162,9 @@ def main():
        "neg_mean": neg_mean,
        "inv_stddev": inv_stddev,
        "model_type": "sense_voice_ctc",
-        "version": "1",
+        # version 1: Use QInt8
+        # version 2: Use QUInt8
+        "version": "2",
        "model_author": "iic",
        "maintainer": "k2-fsa",
        "vocab_size": vocab_size,
@@ -185,7 +187,10 @@ def main():
        model_input=filename,
        model_output=filename_int8,
        op_types_to_quantize=["MatMul"],
-        weight_type=QuantType.QInt8,
+        # Note that we have to use QUInt8 here.
+        #
+        # When QInt8 is used, C++ onnxruntime produces incorrect results
+        weight_type=QuantType.QUInt8,
    )