Add int8 quantized whisper large models (#1126)

2024-07-13 22:30:06 +08:00
parent 3951a12f8d
commit ab71c3976d
3 changed files with 41 additions and 20 deletions
--- a/scripts/whisper/export-onnx.py
+++ b/scripts/whisper/export-onnx.py
@@ -582,9 +582,6 @@ def main():
            location=decoder_external_filename + ".weights",
        )

-    if "large" in args.model:
-        # it causes errors for large models, so skip it.
-        return
    # Generate int8 quantization models
    # See https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html#data-type-selection