Refactor exporting NeMo models (#2362)

Refactors and extends model export support to include new NeMo Parakeet TDT int8 variants for English and Japanese, updating the Kotlin API, export scripts, test runners, and CI workflows. - Added support for two new int8 model types in OfflineRecognizer.kt. - Enhanced Python export scripts to perform dynamic quantization and metadata injection. - Updated shell scripts and GitHub workflows to package, test, and publish int8 model artifacts.
2025-07-09 16:02:12 +08:00
parent f1405779cf
commit 6122a678f5
19 changed files with 671 additions and 23 deletions
--- a/scripts/nemo/fast-conformer-hybrid-transducer-ctc/export-onnx-transducer.py
+++ b/scripts/nemo/fast-conformer-hybrid-transducer-ctc/export-onnx-transducer.py
@@ -6,6 +6,7 @@ from typing import Dict
 import nemo.collections.asr as nemo_asr
 import onnx
 import torch
+from onnxruntime.quantization import QuantType, quantize_dynamic


 def get_args():
@@ -122,6 +123,13 @@ def main():
    }
    add_meta_data("encoder.onnx", meta_data)

+    for m in ["encoder", "decoder", "joiner"]:
+        quantize_dynamic(
+            model_input=f"{m}.onnx",
+            model_output=f"{m}.int8.onnx",
+            weight_type=QuantType.QUInt8,
+        )
+
    print(meta_data)