Refactor exporting NeMo models (#2362)
Refactors and extends model export support to include new NeMo Parakeet TDT int8 variants for English and Japanese, updating the Kotlin API, export scripts, test runners, and CI workflows. - Added support for two new int8 model types in OfflineRecognizer.kt. - Enhanced Python export scripts to perform dynamic quantization and metadata injection. - Updated shell scripts and GitHub workflows to package, test, and publish int8 model artifacts.
This commit is contained in:
@@ -6,6 +6,7 @@ from typing import Dict
|
||||
import nemo.collections.asr as nemo_asr
|
||||
import onnx
|
||||
import torch
|
||||
from onnxruntime.quantization import QuantType, quantize_dynamic
|
||||
|
||||
|
||||
def get_args():
|
||||
@@ -114,6 +115,11 @@ def main():
|
||||
"comment": "Only the CTC branch is exported",
|
||||
}
|
||||
add_meta_data(filename, meta_data)
|
||||
quantize_dynamic(
|
||||
model_input="./model.onnx",
|
||||
model_output="./model.int8.onnx",
|
||||
weight_type=QuantType.QUInt8,
|
||||
)
|
||||
|
||||
print(meta_data)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user