Add int8 quantized whisper large models (#1126)

This commit is contained in:
Fangjun Kuang
2024-07-13 22:30:06 +08:00
committed by GitHub
parent 3951a12f8d
commit ab71c3976d
3 changed files with 41 additions and 20 deletions

View File

@@ -582,9 +582,6 @@ def main():
location=decoder_external_filename + ".weights",
)
if "large" in args.model:
# it causes errors for large models, so skip it.
return
# Generate int8 quantization models
# See https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html#data-type-selection