Add C++ runtime for SenseVoice models (#1148)

This commit is contained in:
Fangjun Kuang
2024-07-18 22:54:18 +08:00
committed by GitHub
parent 3bae5c3fe5
commit 25f0a10468
34 changed files with 1160 additions and 39 deletions

View File

@@ -162,7 +162,9 @@ def main():
"neg_mean": neg_mean,
"inv_stddev": inv_stddev,
"model_type": "sense_voice_ctc",
"version": "1",
# version 1: Use QInt8
# version 2: Use QUInt8
"version": "2",
"model_author": "iic",
"maintainer": "k2-fsa",
"vocab_size": vocab_size,
@@ -185,7 +187,10 @@ def main():
model_input=filename,
model_output=filename_int8,
op_types_to_quantize=["MatMul"],
weight_type=QuantType.QInt8,
# Note that we have to use QUInt8 here.
#
# When QInt8 is used, C++ onnxruntime produces incorrect results
weight_type=QuantType.QUInt8,
)