Provide models for mobile-only platforms by fixing batch size to 1 (#1276)

2024-08-22 19:36:24 +08:00
parent d8001d6edc
commit 0e0d04a97a
6 changed files with 287 additions and 0 deletions
--- a/.github/workflows/mobile-asr-models.yaml
+++ b/.github/workflows/mobile-asr-models.yaml
@@ -0,0 +1,52 @@
 name: mobile-asr-models
 on:
  push:
    branches:
      - asr-mobile
  workflow_dispatch:
 concurrency:
  group: mobile-asr-models-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  mobile-asr-models:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj' || github.repository_owner == 'csu-fangjun'
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.8"]
    steps:
      - uses: actions/checkout@v4
      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}
      - name: Install dependencies
        shell: bash
        run: |
          python3 -m pip install onnxruntime==1.16.3 onnx==1.15.0
      - name: Run
        shell: bash
        run: |
          cd scripts/mobile-asr-models
          ./run.sh
      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models
--- a/scripts/mobile-asr-models/README.md
+++ b/scripts/mobile-asr-models/README.md
@@ -0,0 +1,18 @@
 # Introduction
 This folder contains scripts to convert ASR models for mobile platforms
 supporting only batch size equal to 1.
 The advantage of fixing the batch size to 1 is that it provides more
 opportunities for model optimization and quantization.
 To give you a concrete example, for the following model
 https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
 | | encoder-epoch-99-avg-1.onnx | encoder-epoch-99-avg-1.int8.onnx|
 |---|---|---|
 |Dynamic batch size| 315 MB| 174 MB|
 |Batch size fixed to 1| 242 MB | 100 MB |
 The following [colab notebook](https://colab.research.google.com/drive/1RsVZbsxbPjazeGrNNbZNjXCYbEG2F2DU?usp=sharing)
 provides examples to use the above two models.
--- a/scripts/mobile-asr-models/dynamic_quantization.py
+++ b/scripts/mobile-asr-models/dynamic_quantization.py
@@ -0,0 +1,38 @@
 #!/usr/bin/env python3
 import argparse
 from onnxruntime.quantization import QuantType, quantize_dynamic
 def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--input",
        type=str,
        required=True,
        help="Input onnx model",
    )
    parser.add_argument(
        "--output",
        type=str,
        required=True,
        help="Output onnx model",
    )
    return parser.parse_args()
 def main():
    args = get_args()
    print(vars(args))
    quantize_dynamic(
        model_input=args.input,
        model_output=args.output,
        op_types_to_quantize=["MatMul"],
        weight_type=QuantType.QInt8,
    )
 if __name__ == "__main__":
    main()
--- a/scripts/mobile-asr-models/parse_options.sh
+++ b/scripts/mobile-asr-models/parse_options.sh
@@ -0,0 +1,97 @@
 #!/usr/bin/env bash
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
 #                 Arnab Ghoshal, Karel Vesely
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #  http://www.apache.org/licenses/LICENSE-2.0
 #
 # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 # MERCHANTABLITY OR NON-INFRINGEMENT.
 # See the Apache 2 License for the specific language governing permissions and
 # limitations under the License.
 # Parse command-line options.
 # To be sourced by another script (as in ". parse_options.sh").
 # Option format is: --option-name arg
 # and shell variable "option_name" gets set to value "arg."
 # The exception is --help, which takes no arguments, but prints the
 # $help_message variable (if defined).
 ###
 ### The --config file options have lower priority to command line
 ### options, so we need to import them first...
 ###
 # Now import all the configs specified by command-line, in left-to-right order
 for ((argpos=1; argpos<$#; argpos++)); do
  if [ "${!argpos}" == "--config" ]; then
    argpos_plus1=$((argpos+1))
    config=${!argpos_plus1}
    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
    . $config  # source the config file.
  fi
 done
 ###
 ### Now we process the command line options
 ###
 while true; do
  [ -z "${1:-}" ] && break;  # break if there are no arguments
  case "$1" in
    # If the enclosing script is called with --help option, print the help
    # message and exit.  Scripts should put help messages in $help_message
    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
      else printf "$help_message\n" 1>&2 ; fi;
      exit 0 ;;
    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
      exit 1 ;;
    # If the first command-line argument begins with "--" (e.g. --foo-bar),
    # then work out the variable name as $name, which will equal "foo_bar".
    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
      # Next we test whether the variable in question is undefned-- if so it's
      # an invalid option and we die.  Note: $0 evaluates to the name of the
      # enclosing script.
      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
      # is undefined.  We then have to wrap this test inside "eval" because
      # foo_bar is itself inside a variable ($name).
      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
      oldval="`eval echo \\$$name`";
      # Work out whether we seem to be expecting a Boolean argument.
      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
        was_bool=true;
      else
        was_bool=false;
      fi
      # Set the variable to the right value-- the escaped quotes make it work if
      # the option had spaces, like --cmd "queue.pl -sync y"
      eval $name=\"$2\";
      # Check that Boolean-valued arguments are really Boolean.
      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
        exit 1;
      fi
      shift 2;
      ;;
  *) break;
  esac
 done
 # Check for an empty argument to the --cmd option, which can easily occur as a
 # result of scripting errors.
 [ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
 true; # so this script returns exit code 0.
--- a/scripts/mobile-asr-models/run-impl.sh
+++ b/scripts/mobile-asr-models/run-impl.sh
@@ -0,0 +1,42 @@
 #!/usr/bin/env bash
 #
 # usage of this file:
 #  ./run.sh --input in.onnx --output1 out1.onnx --output2 out2.onnx
 # where out1.onnx is a float32 model with batch size fixed to 1
 # and out2.onnx is an int8 quantized version of out1.onnx
 set -ex
 input=
 output1=
 output2=
 batch_dim=N
 source ./parse_options.sh
 if [ -z $input ]; then
  echo 'Please provide input model filename'
  exit 1
 fi
 if [ -z $output1 ]; then
  echo 'Please provide output1 model filename'
  exit 1
 fi
 if [ -z $output2 ]; then
  echo 'Please provide output2 model filename'
  exit 1
 fi
 echo "input: $input"
 echo "output1: $output1"
 echo "output2: $output2"
 python3 -m onnxruntime.tools.make_dynamic_shape_fixed --dim_param $batch_dim --dim_value 1 $input tmp.fixed.onnx
 python3 -m onnxruntime.quantization.preprocess --input tmp.fixed.onnx --output $output1
 python3 ./dynamic_quantization.py --input $output1 --output $output2
 ls -lh $input tmp.fixed.onnx $output1 $output2
 rm tmp.fixed.onnx
--- a/scripts/mobile-asr-models/run.sh
+++ b/scripts/mobile-asr-models/run.sh
@@ -0,0 +1,40 @@
 #!/usr/bin/env bash
 set -ex
 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
 tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
 rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
 src=sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
 dst=$src-mobile
 mkdir -p $dst
 ./run-impl.sh \
  --input $src/encoder-epoch-99-avg-1.onnx \
  --output1 $dst/encoder-epoch-99-avg-1.onnx \
  --output2 $dst/encoder-epoch-99-avg-1.int8.onnx
 cp -v $src/README.md $dst/
 cp -v $src/tokens.txt $dst/
 cp -av $src/test_wavs $dst/
 cp -v $src/decoder-epoch-99-avg-1.onnx $dst/
 cp -v $src/joiner-epoch-99-avg-1.int8.onnx $dst/
 cat > $dst/notes.md <<EOF
 # Introduction
 This model is converted from
 https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$src.tar.bz2
 and it supports only batch size equal to 1.
 EOF
 echo "---$src---"
 ls -lh $src
 echo "---$dst---"
 ls -lh $dst
 rm -rf $src
 tar cjfv $dst.tar.bz2 $dst
 mv *.tar.bz2 ../../
 rm -rf $dst