Add non-streaming websocket server for python (#259)

2023-08-11 15:56:24 +08:00
parent 6c0f002825
commit b094868fb8
24 changed files with 1247 additions and 92 deletions
--- a/.github/workflows/test-pip-install.yaml
+++ b/.github/workflows/test-pip-install.yaml
@@ -23,12 +23,12 @@ permissions:
 jobs:
  test_pip_install:
    runs-on: ${{ matrix.os }}
-    name: Test pip install on ${{ matrix.os }}
+    name: ${{ matrix.os }} ${{ matrix.python-version }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, windows-latest, macos-latest]
-        python-version: ["3.8", "3.9", "3.10"]
+        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
    steps:
      - uses: actions/checkout@v2
@@ -50,3 +50,15 @@ jobs:
        run: |
          python3 -c "import sherpa_onnx; print(sherpa_onnx.__file__)"
          python3 -c "import sherpa_onnx; print(sherpa_onnx.__version__)"
          sherpa-onnx --help
          sherpa-onnx-offline --help
          sherpa-onnx-microphone --help
          sherpa-onnx-microphone-offline --help
          sherpa-onnx-offline-websocket-server --help
          sherpa-onnx-offline-websocket-client --help
          sherpa-onnx-online-websocket-server --help
          sherpa-onnx-online-websocket-client --help
--- a/.github/workflows/test-python-offline-websocket-server.yaml
+++ b/.github/workflows/test-python-offline-websocket-server.yaml
@@ -0,0 +1,174 @@
 name: Python offline websocket server
 on:
  push:
    branches:
      - master
  pull_request:
    branches:
      - master
 concurrency:
  group: python-offline-websocket-server-${{ github.ref }}
  cancel-in-progress: true
 permissions:
  contents: read
 jobs:
  python_offline_websocket_server:
    runs-on: ${{ matrix.os }}
    name: ${{ matrix.os }} ${{ matrix.python-version }} ${{ matrix.model_type }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, windows-latest, macos-latest]
        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
        model_type: ["transducer", "paraformer", "nemo_ctc", "whisper"]
    steps:
      - uses: actions/checkout@v2
        with:
          fetch-depth: 0
      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip numpy
      - name: Install sherpa-onnx
        shell: bash
        run: |
          python3 -m pip install --no-deps --verbose .
          python3 -m pip install websockets
      - name: Start server for transducer models
        if: matrix.model_type == 'transducer'
        shell: bash
        run: |
          GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-en-2023-06-26
          cd sherpa-onnx-zipformer-en-2023-06-26
          git lfs pull --include "*.onnx"
          cd ..
          python3 ./python-api-examples/non_streaming_server.py \
            --encoder ./sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx \
            --decoder ./sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx \
            --joiner ./sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.onnx \
            --tokens ./sherpa-onnx-zipformer-en-2023-06-26/tokens.txt &
          echo "sleep 10 seconds to wait the server start"
          sleep 10
      - name: Start client for transducer models
        if: matrix.model_type == 'transducer'
        shell: bash
        run: |
          python3 ./python-api-examples/offline-websocket-client-decode-files-paralell.py \
            ./sherpa-onnx-zipformer-en-2023-06-26/test_wavs/0.wav \
            ./sherpa-onnx-zipformer-en-2023-06-26/test_wavs/1.wav \
            ./sherpa-onnx-zipformer-en-2023-06-26/test_wavs/8k.wav
          python3 ./python-api-examples/offline-websocket-client-decode-files-sequential.py \
            ./sherpa-onnx-zipformer-en-2023-06-26/test_wavs/0.wav \
            ./sherpa-onnx-zipformer-en-2023-06-26/test_wavs/1.wav \
            ./sherpa-onnx-zipformer-en-2023-06-26/test_wavs/8k.wav
      - name: Start server for paraformer models
        if: matrix.model_type == 'paraformer'
        shell: bash
        run: |
          GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28
          cd sherpa-onnx-paraformer-zh-2023-03-28
          git lfs pull --include "*.onnx"
          cd ..
          python3 ./python-api-examples/non_streaming_server.py \
            --paraformer ./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx \
            --tokens ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt &
          echo "sleep 10 seconds to wait the server start"
          sleep 10
      - name: Start client for paraformer models
        if: matrix.model_type == 'paraformer'
        shell: bash
        run: |
          python3 ./python-api-examples/offline-websocket-client-decode-files-paralell.py \
            ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/0.wav \
            ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/1.wav \
            ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/2.wav \
            ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/8k.wav
          python3 ./python-api-examples/offline-websocket-client-decode-files-sequential.py \
            ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/0.wav \
            ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/1.wav \
            ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/2.wav \
            ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/8k.wav
      - name: Start server for nemo_ctc models
        if: matrix.model_type == 'nemo_ctc'
        shell: bash
        run: |
          GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-medium
          cd sherpa-onnx-nemo-ctc-en-conformer-medium
          git lfs pull --include "*.onnx"
          cd ..
          python3 ./python-api-examples/non_streaming_server.py \
            --nemo-ctc ./sherpa-onnx-nemo-ctc-en-conformer-medium/model.onnx \
            --tokens ./sherpa-onnx-nemo-ctc-en-conformer-medium/tokens.txt &
          echo "sleep 10 seconds to wait the server start"
          sleep 10
      - name: Start client for nemo_ctc models
        if: matrix.model_type == 'nemo_ctc'
        shell: bash
        run: |
          python3 ./python-api-examples/offline-websocket-client-decode-files-paralell.py \
            ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/0.wav \
            ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/1.wav \
            ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/8k.wav
          python3 ./python-api-examples/offline-websocket-client-decode-files-sequential.py \
            ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/0.wav \
            ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/1.wav \
            ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/8k.wav
      - name: Start server for whisper models
        if: matrix.model_type == 'whisper'
        shell: bash
        run: |
          GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-whisper-tiny.en
          cd sherpa-onnx-whisper-tiny.en
          git lfs pull --include "*.onnx"
          cd ..
          python3 ./python-api-examples/non_streaming_server.py \
            --whisper-encoder=./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.onnx \
            --whisper-decoder=./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.onnx \
            --tokens=./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt &
          echo "sleep 10 seconds to wait the server start"
          sleep 10
      - name: Start client for whisper models
        if: matrix.model_type == 'whisper'
        shell: bash
        run: |
          python3 ./python-api-examples/offline-websocket-client-decode-files-paralell.py \
            ./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav \
            ./sherpa-onnx-whisper-tiny.en/test_wavs/1.wav \
            ./sherpa-onnx-whisper-tiny.en/test_wavs/8k.wav
          python3 ./python-api-examples/offline-websocket-client-decode-files-sequential.py \
            ./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav \
            ./sherpa-onnx-whisper-tiny.en/test_wavs/1.wav \
            ./sherpa-onnx-whisper-tiny.en/test_wavs/8k.wav
--- a/.github/workflows/test-python-online-websocket-server.yaml
+++ b/.github/workflows/test-python-online-websocket-server.yaml
@@ -0,0 +1,73 @@
 name: Python online websocket server
 on:
  push:
    branches:
      - master
  pull_request:
    branches:
      - master
 concurrency:
  group: python-online-websocket-server-${{ github.ref }}
  cancel-in-progress: true
 permissions:
  contents: read
 jobs:
  python_online_websocket_server:
    runs-on: ${{ matrix.os }}
    name: ${{ matrix.os }} ${{ matrix.python-version }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, windows-latest, macos-latest]
        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
        model_type: ["transducer"]
    steps:
      - uses: actions/checkout@v2
        with:
          fetch-depth: 0
      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip numpy
      - name: Install sherpa-onnx
        shell: bash
        run: |
          python3 -m pip install --no-deps --verbose .
          python3 -m pip install websockets
      - name: Start server for transducer models
        if: matrix.model_type == 'transducer'
        shell: bash
        run: |
          GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26
          cd sherpa-onnx-streaming-zipformer-en-2023-06-26
          git lfs pull --include "*.onnx"
          cd ..
          python3 ./python-api-examples/streaming_server.py \
            --encoder ./sherpa-onnx-streaming-zipformer-en-2023-06-26/encoder-epoch-99-avg-1-chunk-16-left-128.onnx \
            --decoder ./sherpa-onnx-streaming-zipformer-en-2023-06-26/decoder-epoch-99-avg-1-chunk-16-left-128.onnx \
            --joiner ./sherpa-onnx-streaming-zipformer-en-2023-06-26/joiner-epoch-99-avg-1-chunk-16-left-128.onnx \
            --tokens ./sherpa-onnx-streaming-zipformer-en-2023-06-26/tokens.txt &
          echo "sleep 10 seconds to wait the server start"
          sleep 10
      - name: Start client for transducer models
        if: matrix.model_type == 'transducer'
        shell: bash
        run: |
          python3 ./python-api-examples/online-websocket-client-decode-file.py \
            ./sherpa-onnx-streaming-zipformer-en-2023-06-26/test_wavs/0.wav
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,7 +1,7 @@
 cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
 project(sherpa-onnx)
-set(SHERPA_ONNX_VERSION "1.7.1")
+set(SHERPA_ONNX_VERSION "1.7.2")
 # Disable warning about
 #
--- a/c-api-examples/README.md
+++ b/c-api-examples/README.md
@@ -0,0 +1,9 @@
 # Introduction
 This folder contains C API examples for [sherpa-onnx][sherpa-onnx].
 Please refer to the documentation
 https://k2-fsa.github.io/sherpa/onnx/c-api/index.html
 for details.
 [sherpa-onnx]: https://github.com/k2-fsa/sherpa-onnx
--- a/dotnet-examples/README.md
+++ b/dotnet-examples/README.md
@@ -0,0 +1,9 @@
 # Introduction
 This folder contains C# API examples for [sherpa-onnx][sherpa-onnx].
 Please refer to the documentation
 https://k2-fsa.github.io/sherpa/onnx/csharp-api/index.html
 for details.
 [sherpa-onnx]: https://github.com/k2-fsa/sherpa-onnx
--- a/go-api-examples/README.md
+++ b/go-api-examples/README.md
@@ -0,0 +1,9 @@
 # Introduction
 This folder contains Go API examples for [sherpa-onnx][sherpa-onnx].
 Please refer to the documentation
 https://k2-fsa.github.io/sherpa/onnx/go-api/index.html
 for details.
 [sherpa-onnx]: https://github.com/k2-fsa/sherpa-onnx
--- a/python-api-examples/non_streaming_server.py
+++ b/python-api-examples/non_streaming_server.py
@@ -0,0 +1,835 @@
 #!/usr/bin/env python3
 # Copyright      2022-2023  Xiaomi Corp.
 """
 A server for non-streaming speech recognition. Non-streaming means you send all
 the content of the audio at once for recognition.
 It supports multiple clients sending at the same time.
 Usage:
    ./non_streaming_server.py --help
 Please refer to
 https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/index.html
 https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html
 https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/index.html
 https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/index.html
 for pre-trained models to download.
 Usage examples:
 (1) Use a non-streaming transducer model
 cd /path/to/sherpa-onnx
 GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-en-2023-06-26
 cd sherpa-onnx-zipformer-en-2023-06-26
 git lfs pull --include "*.onnx"
 cd ..
 python3 ./python-api-examples/non_streaming_server.py \
  --encoder ./sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx \
  --decoder ./sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx \
  --joiner ./sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.onnx \
  --tokens ./sherpa-onnx-zipformer-en-2023-06-26/tokens.txt
 (2) Use a non-streaming paraformer
 cd /path/to/sherpa-onnx
 GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28
 cd sherpa-onnx-paraformer-zh-2023-03-28
 git lfs pull --include "*.onnx"
 cd ..
 python3 ./python-api-examples/non_streaming_server.py \
  --paraformer ./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx \
  --tokens ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt
 (3) Use a non-streaming CTC model from NeMo
 cd /path/to/sherpa-onnx
 GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-medium
 cd sherpa-onnx-nemo-ctc-en-conformer-medium
 git lfs pull --include "*.onnx"
 cd ..
 python3 ./python-api-examples/non_streaming_server.py \
  --nemo-ctc ./sherpa-onnx-nemo-ctc-en-conformer-medium/model.onnx \
  --tokens ./sherpa-onnx-nemo-ctc-en-conformer-medium/tokens.txt
 (4) Use a Whisper model
 cd /path/to/sherpa-onnx
 GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-whisper-tiny.en
 cd sherpa-onnx-whisper-tiny.en
 git lfs pull --include "*.onnx"
 cd ..
 python3 ./python-api-examples/non_streaming_server.py \
  --whisper-encoder=./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.onnx \
  --whisper-decoder=./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.onnx \
  --tokens=./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt
 ----
 To use a certificate so that you can use https, please use
 python3 ./python-api-examples/non_streaming_server.py \
  --whisper-encoder=./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.onnx \
  --whisper-decoder=./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.onnx \
  --certificate=/path/to/your/cert.pem
 If you don't have a certificate, please run:
    cd ./python-api-examples/web
    ./generate-certificate.py
 It will generate 3 files, one of which is the required `cert.pem`.
 """  # noqa
 import argparse
 import asyncio
 import http
 import logging
 import socket
 import ssl
 import sys
 import warnings
 from concurrent.futures import ThreadPoolExecutor
 from datetime import datetime
 from pathlib import Path
 from typing import Optional, Tuple
 import numpy as np
 import sherpa_onnx
 import websockets
 from http_server import HttpServer
 def setup_logger(
    log_filename: str,
    log_level: str = "info",
    use_console: bool = True,
 ) -> None:
    """Setup log level.
    Args:
      log_filename:
        The filename to save the log.
      log_level:
        The log level to use, e.g., "debug", "info", "warning", "error",
        "critical"
      use_console:
        True to also print logs to console.
    """
    now = datetime.now()
    date_time = now.strftime("%Y-%m-%d-%H-%M-%S")
    formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
    log_filename = f"{log_filename}-{date_time}.txt"
    Path(log_filename).parent.mkdir(parents=True, exist_ok=True)
    level = logging.ERROR
    if log_level == "debug":
        level = logging.DEBUG
    elif log_level == "info":
        level = logging.INFO
    elif log_level == "warning":
        level = logging.WARNING
    elif log_level == "critical":
        level = logging.CRITICAL
    logging.basicConfig(
        filename=log_filename,
        format=formatter,
        level=level,
        filemode="w",
    )
    if use_console:
        console = logging.StreamHandler()
        console.setLevel(level)
        console.setFormatter(logging.Formatter(formatter))
        logging.getLogger("").addHandler(console)
 def add_transducer_model_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--encoder",
        default="",
        type=str,
        help="Path to the transducer encoder model",
    )
    parser.add_argument(
        "--decoder",
        default="",
        type=str,
        help="Path to the transducer decoder model",
    )
    parser.add_argument(
        "--joiner",
        default="",
        type=str,
        help="Path to the transducer joiner model",
    )
 def add_paraformer_model_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--paraformer",
        default="",
        type=str,
        help="Path to the model.onnx from Paraformer",
    )
 def add_nemo_ctc_model_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--nemo-ctc",
        default="",
        type=str,
        help="Path to the model.onnx from NeMo CTC",
    )
 def add_whisper_model_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--whisper-encoder",
        default="",
        type=str,
        help="Path to whisper encoder model",
    )
    parser.add_argument(
        "--whisper-decoder",
        default="",
        type=str,
        help="Path to whisper decoder model",
    )
 def add_model_args(parser: argparse.ArgumentParser):
    add_transducer_model_args(parser)
    add_paraformer_model_args(parser)
    add_nemo_ctc_model_args(parser)
    add_whisper_model_args(parser)
    parser.add_argument(
        "--tokens",
        type=str,
        help="Path to tokens.txt",
    )
    parser.add_argument(
        "--num-threads",
        type=int,
        default=2,
        help="Number of threads to run the neural network model",
    )
    parser.add_argument(
        "--provider",
        type=str,
        default="cpu",
        help="Valid values: cpu, cuda, coreml",
    )
 def add_feature_config_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--sample-rate",
        type=int,
        default=16000,
        help="Sample rate of the data used to train the model. ",
    )
    parser.add_argument(
        "--feat-dim",
        type=int,
        default=80,
        help="Feature dimension of the model",
    )
 def add_decoding_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--decoding-method",
        type=str,
        default="greedy_search",
        help="""Decoding method to use. Current supported methods are:
        - greedy_search
        - modified_beam_search  (for transducer models only)
        """,
    )
    add_modified_beam_search_args(parser)
 def add_modified_beam_search_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--max-active-paths",
        type=int,
        default=4,
        help="""Used only when --decoding-method is modified_beam_search.
        It specifies number of active paths to keep during decoding.
        """,
    )
 def check_args(args):
    if not Path(args.tokens).is_file():
        raise ValueError(f"{args.tokens} does not exist")
    if args.decoding_method not in (
        "greedy_search",
        "modified_beam_search",
    ):
        raise ValueError(f"Unsupported decoding method {args.decoding_method}")
    if args.decoding_method == "modified_beam_search":
        assert args.num_active_paths > 0, args.num_active_paths
        assert Path(args.encoder).is_file(), args.encoder
        assert Path(args.decoder).is_file(), args.decoder
        assert Path(args.joiner).is_file(), args.joiner
 def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    add_model_args(parser)
    add_feature_config_args(parser)
    add_decoding_args(parser)
    parser.add_argument(
        "--port",
        type=int,
        default=6006,
        help="The server will listen on this port",
    )
    parser.add_argument(
        "--max-batch-size",
        type=int,
        default=25,
        help="""Max batch size for computation. Note if there are not enough
        requests in the queue, it will wait for max_wait_ms time. After that,
        even if there are not enough requests, it still sends the
        available requests in the queue for computation.
        """,
    )
    parser.add_argument(
        "--max-wait-ms",
        type=float,
        default=5,
        help="""Max time in millisecond to wait to build batches for inference.
        If there are not enough requests in the feature queue to build a batch
        of max_batch_size, it waits up to this time before fetching available
        requests for computation.
        """,
    )
    parser.add_argument(
        "--nn-pool-size",
        type=int,
        default=1,
        help="Number of threads for NN computation and decoding.",
    )
    parser.add_argument(
        "--max-message-size",
        type=int,
        default=(1 << 20),
        help="""Max message size in bytes.
        The max size per message cannot exceed this limit.
        """,
    )
    parser.add_argument(
        "--max-queue-size",
        type=int,
        default=32,
        help="Max number of messages in the queue for each connection.",
    )
    parser.add_argument(
        "--max-active-connections",
        type=int,
        default=500,
        help="""Maximum number of active connections. The server will refuse
        to accept new connections once the current number of active connections
        equals to this limit.
        """,
    )
    parser.add_argument(
        "--certificate",
        type=str,
        help="""Path to the X.509 certificate. You need it only if you want to
        use a secure websocket connection, i.e., use wss:// instead of ws://.
        You can use ./web/generate-certificate.py
        to generate the certificate `cert.pem`.
        Note ./web/generate-certificate.py will generate three files but you
        only need to pass the generated cert.pem to this option.
        """,
    )
    parser.add_argument(
        "--doc-root",
        type=str,
        default="./python-api-examples/web",
        help="Path to the web root",
    )
    return parser.parse_args()
 class NonStreamingServer:
    def __init__(
        self,
        recognizer: sherpa_onnx.OfflineRecognizer,
        max_batch_size: int,
        max_wait_ms: float,
        nn_pool_size: int,
        max_message_size: int,
        max_queue_size: int,
        max_active_connections: int,
        doc_root: str,
        certificate: Optional[str] = None,
    ):
        """
        Args:
          recognizer:
            An instance of the sherpa_onnx.OfflineRecognizer.
          max_batch_size:
            Max batch size for inference.
          max_wait_ms:
            Max wait time in milliseconds in order to build a batch of
            `max_batch_size`.
          nn_pool_size:
            Number of threads for the thread pool that is used for NN
            computation and decoding.
          max_message_size:
            Max size in bytes per message.
          max_queue_size:
            Max number of messages in the queue for each connection.
          max_active_connections:
            Max number of active connections. Once number of active client
            equals to this limit, the server refuses to accept new connections.
          doc_root:
            Path to the directory where files like index.html for the HTTP
            server locate.
          certificate:
            Optional. If not None, it will use secure websocket.
            You can use ./web/generate-certificate.py to generate
            it (the default generated filename is `cert.pem`).
        """
        self.recognizer = recognizer
        self.certificate = certificate
        self.http_server = HttpServer(doc_root)
        self.nn_pool = ThreadPoolExecutor(
            max_workers=nn_pool_size,
            thread_name_prefix="nn",
        )
        self.stream_queue = asyncio.Queue()
        self.max_wait_ms = max_wait_ms
        self.max_batch_size = max_batch_size
        self.max_message_size = max_message_size
        self.max_queue_size = max_queue_size
        self.max_active_connections = max_active_connections
        self.current_active_connections = 0
        self.sample_rate = int(recognizer.config.feat_config.sampling_rate)
    async def process_request(
        self,
        path: str,
        request_headers: websockets.Headers,
    ) -> Optional[Tuple[http.HTTPStatus, websockets.Headers, bytes]]:
        if "sec-websocket-key" not in request_headers:
            # This is a normal HTTP request
            if path == "/":
                path = "/index.html"
            if path[-1] == "?":
                path = path[:-1]
            if path == "/streaming_record.html":
                response = r"""
 <!doctype html><html><head>
 <title>Speech recognition with next-gen Kaldi</title><body>
 <h2>Only
 <a href="/upload.html">/upload.html</a>
 and
 <a href="/offline_record.html">/offline_record.html</a>
 is available for the non-streaming server.<h2>
 <br/>
 <br/>
 Go back to <a href="/upload.html">/upload.html</a>
 or <a href="/offline_record.html">/offline_record.html</a>
 </body></head></html>
 """
                found = True
                mime_type = "text/html"
            else:
                found, response, mime_type = self.http_server.process_request(path)
            if isinstance(response, str):
                response = response.encode("utf-8")
            if not found:
                status = http.HTTPStatus.NOT_FOUND
            else:
                status = http.HTTPStatus.OK
            header = {"Content-Type": mime_type}
            return status, header, response
        if self.current_active_connections < self.max_active_connections:
            self.current_active_connections += 1
            return None
        # Refuse new connections
        status = http.HTTPStatus.SERVICE_UNAVAILABLE  # 503
        header = {"Hint": "The server is overloaded. Please retry later."}
        response = b"The server is busy. Please retry later."
        return status, header, response
    async def run(self, port: int):
        logging.info("started")
        task = asyncio.create_task(self.stream_consumer_task())
        if self.certificate:
            logging.info(f"Using certificate: {self.certificate}")
            ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
            ssl_context.load_cert_chain(self.certificate)
        else:
            ssl_context = None
            logging.info("No certificate provided")
        async with websockets.serve(
            self.handle_connection,
            host="",
            port=port,
            max_size=self.max_message_size,
            max_queue=self.max_queue_size,
            process_request=self.process_request,
            ssl=ssl_context,
        ):
            ip_list = ["localhost"]
            if ssl_context:
                ip_list += ["0.0.0.0", "127.0.0.1"]
                ip_list.append(socket.gethostbyname(socket.gethostname()))
            proto = "http://" if ssl_context is None else "https://"
            s = "Please visit one of the following addresses:\n\n"
            for p in ip_list:
                s += "  " + proto + p + f":{port}" "\n"
            logging.info(s)
            await asyncio.Future()  # run forever
        await task  # not reachable
    async def recv_audio_samples(
        self,
        socket: websockets.WebSocketServerProtocol,
    ) -> Tuple[Optional[np.ndarray], Optional[float]]:
        """Receive a tensor from the client.
        The message from the client is a **bytes** buffer.
        The first message can be either "Done" meaning the client won't send
        anything in the future or it can be a buffer containing 8 bytes.
        The first 4 bytes in little endian specifies the sample
        rate of the audio samples; the second 4 bytes in little endian specifies
        the number of bytes in the audio file, which will be sent by the client
        in the subsequent messages.
        Since there is a limit in the message size posed by the websocket
        protocol, the client may send the audio file in multiple messages if the
        audio file is very large.
        The second and remaining messages contain audio samples.
        Please refer to ./offline-websocket-client-decode-files-paralell.py
        and ./offline-websocket-client-decode-files-sequential.py
        for how the client sends the message.
        Args:
          socket:
            The socket for communicating with the client.
        Returns:
          Return a containing:
            - 1-D np.float32 array containing the audio samples
            - sample rate of the audio samples
          or return (None, None) indicating the end of utterance.
        """
        header = await socket.recv()
        if header == "Done":
            return None, None
        assert len(header) >= 8, (
            "The first message should contain at least 8 bytes."
            + f"Given {len(header)}"
        )
        sample_rate = int.from_bytes(header[:4], "little", signed=True)
        expected_num_bytes = int.from_bytes(header[4:8], "little", signed=True)
        received = []
        num_received_bytes = 0
        if len(header) > 8:
            received.append(header[8:])
            num_received_bytes += len(header) - 8
        if num_received_bytes < expected_num_bytes:
            async for message in socket:
                received.append(message)
                num_received_bytes += len(message)
                if num_received_bytes >= expected_num_bytes:
                    break
        assert num_received_bytes == expected_num_bytes, (
            num_received_bytes,
            expected_num_bytes,
        )
        samples = b"".join(received)
        array = np.frombuffer(samples, dtype=np.float32)
        return array, sample_rate
    async def stream_consumer_task(self):
        """This function extracts streams from the queue, batches them up, sends
        them to the RNN-T model for computation and decoding.
        """
        while True:
            if self.stream_queue.empty():
                await asyncio.sleep(self.max_wait_ms / 1000)
                continue
            batch = []
            try:
                while len(batch) < self.max_batch_size:
                    item = self.stream_queue.get_nowait()
                    batch.append(item)
            except asyncio.QueueEmpty:
                pass
            stream_list = [b[0] for b in batch]
            future_list = [b[1] for b in batch]
            loop = asyncio.get_running_loop()
            await loop.run_in_executor(
                self.nn_pool,
                self.recognizer.decode_streams,
                stream_list,
            )
            for f in future_list:
                self.stream_queue.task_done()
                f.set_result(None)
    async def compute_and_decode(
        self,
        stream: sherpa_onnx.OfflineStream,
    ) -> None:
        """Put the stream into the queue and wait it to be processed by the
        consumer task.
        Args:
          stream:
            The stream to be processed. Note: It is changed in-place.
        """
        loop = asyncio.get_running_loop()
        future = loop.create_future()
        await self.stream_queue.put((stream, future))
        await future
    async def handle_connection(
        self,
        socket: websockets.WebSocketServerProtocol,
    ):
        """Receive audio samples from the client, process it, and sends
        deocoding result back to the client.
        Args:
          socket:
            The socket for communicating with the client.
        """
        try:
            await self.handle_connection_impl(socket)
        except websockets.exceptions.ConnectionClosedError:
            logging.info(f"{socket.remote_address} disconnected")
        finally:
            # Decrement so that it can accept new connections
            self.current_active_connections -= 1
            logging.info(
                f"Disconnected: {socket.remote_address}. "
                f"Number of connections: {self.current_active_connections}/{self.max_active_connections}"  # noqa
            )
    async def handle_connection_impl(
        self,
        socket: websockets.WebSocketServerProtocol,
    ):
        """Receive audio samples from the client, process it, and send
        decoding results back to the client.
        Args:
          socket:
            The socket for communicating with the client.
        """
        logging.info(
            f"Connected: {socket.remote_address}. "
            f"Number of connections: {self.current_active_connections}/{self.max_active_connections}"  # noqa
        )
        while True:
            stream = self.recognizer.create_stream()
            samples, sample_rate = await self.recv_audio_samples(socket)
            if samples is None:
                break
            # stream.accept_samples() runs in the main thread
            stream.accept_waveform(sample_rate, samples)
            await self.compute_and_decode(stream)
            result = stream.result.text
            logging.info(f"result: {result}")
            if result:
                await socket.send(result)
            else:
                # If result is an empty string, send something to the client.
                # Otherwise, socket.send() is a no-op and the client will
                # wait for a reply indefinitely.
                await socket.send("<EMPTY>")
 def assert_file_exists(filename: str):
    assert Path(filename).is_file(), (
        f"{filename} does not exist!\n"
        "Please refer to "
        "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html to download it"
    )
 def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
    if args.encoder:
        assert len(args.paraformer) == 0, args.paraformer
        assert len(args.nemo_ctc) == 0, args.nemo_ctc
        assert len(args.whisper_encoder) == 0, args.whisper_encoder
        assert len(args.whisper_decoder) == 0, args.whisper_decoder
        assert_file_exists(args.encoder)
        assert_file_exists(args.decoder)
        assert_file_exists(args.joiner)
        recognizer = sherpa_onnx.OfflineRecognizer.from_transducer(
            encoder=args.encoder,
            decoder=args.decoder,
            joiner=args.joiner,
            tokens=args.tokens,
            num_threads=args.num_threads,
            sample_rate=args.sample_rate,
            feature_dim=args.feat_dim,
            decoding_method=args.decoding_method,
            max_active_paths=args.max_active_paths,
        )
    elif args.paraformer:
        assert len(args.nemo_ctc) == 0, args.nemo_ctc
        assert len(args.whisper_encoder) == 0, args.whisper_encoder
        assert len(args.whisper_decoder) == 0, args.whisper_decoder
        assert_file_exists(args.paraformer)
        recognizer = sherpa_onnx.OfflineRecognizer.from_paraformer(
            paraformer=args.paraformer,
            tokens=args.tokens,
            num_threads=args.num_threads,
            sample_rate=args.sample_rate,
            feature_dim=args.feat_dim,
            decoding_method=args.decoding_method,
        )
    elif args.nemo_ctc:
        assert len(args.whisper_encoder) == 0, args.whisper_encoder
        assert len(args.whisper_decoder) == 0, args.whisper_decoder
        assert_file_exists(args.nemo_ctc)
        recognizer = sherpa_onnx.OfflineRecognizer.from_nemo_ctc(
            model=args.nemo_ctc,
            tokens=args.tokens,
            num_threads=args.num_threads,
            sample_rate=args.sample_rate,
            feature_dim=args.feat_dim,
            decoding_method=args.decoding_method,
        )
    elif args.whisper_encoder:
        assert_file_exists(args.whisper_encoder)
        assert_file_exists(args.whisper_decoder)
        recognizer = sherpa_onnx.OfflineRecognizer.from_whisper(
            encoder=args.whisper_encoder,
            decoder=args.whisper_decoder,
            tokens=args.tokens,
            num_threads=args.num_threads,
            decoding_method=args.decoding_method,
        )
    else:
        raise ValueError("Please specify at least one model")
    return recognizer
 def main():
    args = get_args()
    logging.info(vars(args))
    check_args(args)
    recognizer = create_recognizer(args)
    port = args.port
    max_wait_ms = args.max_wait_ms
    max_batch_size = args.max_batch_size
    nn_pool_size = args.nn_pool_size
    max_message_size = args.max_message_size
    max_queue_size = args.max_queue_size
    max_active_connections = args.max_active_connections
    certificate = args.certificate
    doc_root = args.doc_root
    if certificate and not Path(certificate).is_file():
        raise ValueError(f"{certificate} does not exist")
    if not Path(doc_root).is_dir():
        raise ValueError(f"Directory {doc_root} does not exist")
    non_streaming_server = NonStreamingServer(
        recognizer=recognizer,
        max_wait_ms=max_wait_ms,
        max_batch_size=max_batch_size,
        nn_pool_size=nn_pool_size,
        max_message_size=max_message_size,
        max_queue_size=max_queue_size,
        max_active_connections=max_active_connections,
        certificate=certificate,
        doc_root=doc_root,
    )
    asyncio.run(non_streaming_server.run(port))
 if __name__ == "__main__":
    log_filename = "log/log-non-streaming-server"
    setup_logger(log_filename)
    main()
--- a/python-api-examples/offline-websocket-client-decode-files-paralell.py
+++ b/python-api-examples/offline-websocket-client-decode-files-paralell.py
@@ -119,7 +119,13 @@ async def run(
        buf += (samples.size * 4).to_bytes(4, byteorder="little")
        buf += samples.tobytes()
-        await websocket.send(buf)
+        payload_len = 10240
        while len(buf) > payload_len:
            await websocket.send(buf[:payload_len])
            buf = buf[payload_len:]
        if buf:
            await websocket.send(buf)
        decoding_results = await websocket.recv()
        logging.info(f"{wave_filename}\n{decoding_results}")
--- a/python-api-examples/offline-websocket-client-decode-files-sequential.py
+++ b/python-api-examples/offline-websocket-client-decode-files-sequential.py
@@ -116,11 +116,18 @@ async def run(
            assert isinstance(sample_rate, int)
            assert samples.dtype == np.float32, samples.dtype
            assert samples.ndim == 1, samples.dim
            buf = sample_rate.to_bytes(4, byteorder="little")  # 4 bytes
            buf += (samples.size * 4).to_bytes(4, byteorder="little")
            buf += samples.tobytes()
-            await websocket.send(buf)
+            payload_len = 10240
            while len(buf) > payload_len:
                await websocket.send(buf[:payload_len])
                buf = buf[payload_len:]
            if buf:
                await websocket.send(buf)
            decoding_results = await websocket.recv()
            print(decoding_results)
--- a/python-api-examples/online-websocket-client-decode-file.py
+++ b/python-api-examples/online-websocket-client-decode-file.py
@@ -15,10 +15,9 @@ Usage:
 (Note: You have to first start the server before starting the client)
-You can find the server at
+You can find the c++ server at
 https://github.com/k2-fsa/sherpa-onnx/blob/master/sherpa-onnx/csrc/online-websocket-server.cc
-
+or use the python server ./python-api-examples/streaming_server.py
 Note: The server is implemented in C++.
 There is also a C++ version of the client. Please see
 https://github.com/k2-fsa/sherpa-onnx/blob/master/sherpa-onnx/csrc/online-websocket-client.cc
@@ -115,7 +114,8 @@ async def receive_results(socket: websockets.WebSocketServerProtocol):
            last_message = message
            logging.info(message)
        else:
-            return last_message
+            break
    return last_message
 async def run(
@@ -142,6 +142,7 @@ async def run(
            await websocket.send(d)
            # Simulate streaming. You can remove the sleep if you want
            await asyncio.sleep(seconds_per_message)  # in seconds
            start += samples_per_message
--- a/python-api-examples/online-websocket-client-microphone.py
+++ b/python-api-examples/online-websocket-client-microphone.py
@@ -12,10 +12,9 @@ Usage:
 (Note: You have to first start the server before starting the client)
-You can find the server at
+You can find the C++ server at
 https://github.com/k2-fsa/sherpa-onnx/blob/master/sherpa-onnx/csrc/online-websocket-server.cc
-
+or use the python server ./python-api-examples/streaming_server.py
 Note: The server is implemented in C++.
 There is also a C++ version of the client. Please see
 https://github.com/k2-fsa/sherpa-onnx/blob/master/sherpa-onnx/csrc/online-websocket-client.cc
--- a/python-api-examples/streaming_server.py
+++ b/python-api-examples/streaming_server.py
@@ -13,11 +13,37 @@ Usage:
 Example:
 (1) Without a certificate
 python3 ./python-api-examples/streaming_server.py \
  --encoder-model ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx \
  --decoder-model ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \
  --joiner-model ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx \
  --tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt
 (2) With a certificate
 (a) Generate a certificate first:
    cd python-api-examples/web
    ./generate-certificate.py
    cd ../..
 (b) Start the server
 python3 ./python-api-examples/streaming_server.py \
  --encoder-model ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx \
  --decoder-model ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \
  --joiner-model ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx \
  --tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
  --certificate ./python-api-examples/web/cert.pem
 Please refer to
 https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html
 to download pre-trained models.
 The model in the above help messages is from
 https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
 """
 import argparse
@@ -35,6 +61,7 @@ from typing import List, Optional, Tuple
 import numpy as np
 import sherpa_onnx
 import websockets
 from http_server import HttpServer
@@ -269,8 +296,8 @@ def get_args():
    parser.add_argument(
        "--num-threads",
        type=int,
-        default=1,
+        default=2,
-        help="Sets the number of threads used for interop parallelism (e.g. in JIT interpreter) on CPU.",
+        help="Number of threads to run the neural network model",
    )
    parser.add_argument(
@@ -278,8 +305,10 @@ def get_args():
        type=str,
        help="""Path to the X.509 certificate. You need it only if you want to
        use a secure websocket connection, i.e., use wss:// instead of ws://.
-        You can use sherpa/bin/web/generate-certificate.py
+        You can use ./web/generate-certificate.py
        to generate the certificate `cert.pem`.
        Note ./web/generate-certificate.py will generate three files but you
        only need to pass the generated cert.pem to this option.
        """,
    )
@@ -287,7 +316,7 @@ def get_args():
        "--doc-root",
        type=str,
        default="./python-api-examples/web",
-        help="""Path to the web root""",
+        help="Path to the web root",
    )
    return parser.parse_args()
@@ -299,9 +328,9 @@ def create_recognizer(args) -> sherpa_onnx.OnlineRecognizer:
        encoder=args.encoder_model,
        decoder=args.decoder_model,
        joiner=args.joiner_model,
-        num_threads=1,
+        num_threads=args.num_threads,
-        sample_rate=16000,
+        sample_rate=args.sample_rate,
-        feature_dim=80,
+        feature_dim=args.feat_dim,
        decoding_method=args.decoding_method,
        max_active_paths=args.num_active_paths,
        enable_endpoint_detection=args.use_endpoint != 0,
@@ -359,7 +388,7 @@ class StreamingServer(object):
            server locate.
          certificate:
            Optional. If not None, it will use secure websocket.
-            You can use ./sherpa/bin/web/generate-certificate.py to generate
+            You can use ./web/generate-certificate.py to generate
            it (the default generated filename is `cert.pem`).
        """
        self.recognizer = recognizer
@@ -373,6 +402,7 @@ class StreamingServer(object):
        )
        self.stream_queue = asyncio.Queue()
        self.max_wait_ms = max_wait_ms
        self.max_batch_size = max_batch_size
        self.max_message_size = max_message_size
@@ -382,11 +412,10 @@ class StreamingServer(object):
        self.current_active_connections = 0
        self.sample_rate = int(recognizer.config.feat_config.sampling_rate)
        self.decoding_method = recognizer.config.decoding_method
    async def stream_consumer_task(self):
        """This function extracts streams from the queue, batches them up, sends
-        them to the RNN-T model for computation and decoding.
+        them to the neural network model for computation and decoding.
        """
        while True:
            if self.stream_queue.empty():
@@ -442,7 +471,22 @@ class StreamingServer(object):
            # This is a normal HTTP request
            if path == "/":
                path = "/index.html"
-            found, response, mime_type = self.http_server.process_request(path)
+
            if path in ("/upload.html", "/offline_record.html"):
                response = r"""
 <!doctype html><html><head>
 <title>Speech recognition with next-gen Kaldi</title><body>
 <h2>Only /streaming_record.html is available for the streaming server.<h2>
 <br/>
 <br/>
 Go back to <a href="/streaming_record.html">/streaming_record.html</a>
 </body></head></html>
 """
                found = True
                mime_type = "text/html"
            else:
                found, response, mime_type = self.http_server.process_request(path)
            if isinstance(response, str):
                response = response.encode("utf-8")
@@ -484,12 +528,21 @@ class StreamingServer(object):
            process_request=self.process_request,
            ssl=ssl_context,
        ):
-            ip_list = ["0.0.0.0", "localhost", "127.0.0.1"]
+            ip_list = ["localhost"]
-            ip_list.append(socket.gethostbyname(socket.gethostname()))
+            if ssl_context:
                ip_list += ["0.0.0.0", "127.0.0.1"]
                ip_list.append(socket.gethostbyname(socket.gethostname()))
            proto = "http://" if ssl_context is None else "https://"
            s = "Please visit one of the following addresses:\n\n"
            for p in ip_list:
                s += "  " + proto + p + f":{port}" "\n"
            if not ssl_context:
                s += "\nSince you are not providing a certificate, you cannot "
                s += "use your microphone from within the browser using "
                s += "public IP addresses. Only localhost can be used."
                s += "You also cannot use 0.0.0.0 or 127.0.0.1"
            logging.info(s)
            await asyncio.Future()  # run forever
@@ -525,7 +578,7 @@ class StreamingServer(object):
        socket: websockets.WebSocketServerProtocol,
    ):
        """Receive audio samples from the client, process it, and send
-        deocoding result back to the client.
+        decoding result back to the client.
        Args:
          socket:
@@ -560,8 +613,6 @@ class StreamingServer(object):
                    self.recognizer.reset(stream)
                    segment += 1
                print(message)
                await socket.send(json.dumps(message))
        tail_padding = np.zeros(int(self.sample_rate * 0.3)).astype(np.float32)
@@ -583,7 +634,7 @@ class StreamingServer(object):
        self,
        socket: websockets.WebSocketServerProtocol,
    ) -> Optional[np.ndarray]:
-        """Receives a tensor from the client.
+        """Receive a tensor from the client.
        Each message contains either a bytes buffer containing audio samples
        in 16 kHz or contains "Done" meaning the end of utterance.
@@ -660,6 +711,6 @@ def main():
 if __name__ == "__main__":
-    log_filename = "log/log-streaming-zipformer"
+    log_filename = "log/log-streaming-server"
    setup_logger(log_filename)
    main()
--- a/python-api-examples/web/README.md
+++ b/python-api-examples/web/README.md
@@ -1,34 +0,0 @@
 # How to use
 ```bash
 git clone https://github.com/k2-fsa/sherpa
 cd sherpa/sherpa/bin/web
 python3 -m http.server 6009
 ```
 and then go to <http://localhost:6009>
 You will see a page like the following screenshot:
 ![Screenshot if you visit http://localhost:6009](./pic/web-ui.png)
 If your server is listening at the port *6006* with address **localhost**,
 then you can either click **Upload**, **Streaming_Record** or **Offline_Record** to play with it.
 ## File descriptions
 ### ./css/bootstrap.min.css
 It is downloaded from https://cdn.jsdelivr.net/npm/bootstrap@4.3.1/dist/css/bootstrap.min.css
 ### ./js/jquery-3.6.0.min.js
 It is downloaded from https://code.jquery.com/jquery-3.6.0.min.js
 ### ./js/popper.min.js
 It is downloaded from https://cdn.jsdelivr.net/npm/popper.js@1.14.7/dist/umd/popper.min.js
 ### ./js/bootstrap.min.js
 It is download from https://cdn.jsdelivr.net/npm/bootstrap@4.3.1/dist/js/bootstrap.min.js
--- a/python-api-examples/web/generate-certificate.py
+++ b/python-api-examples/web/generate-certificate.py
@@ -35,8 +35,8 @@ Otherwise, you may get the following error from within you browser:
 def cert_gen(
-    emailAddress="https://github.com/k2-fsa/k2",
+    emailAddress="https://github.com/k2-fsa/sherpa-onnx",
-    commonName="sherpa",
+    commonName="sherpa-onnx",
    countryName="CN",
    localityName="k2-fsa",
    stateOrProvinceName="k2-fsa",
@@ -70,17 +70,13 @@ def cert_gen(
    cert.set_pubkey(k)
    cert.sign(k, "sha512")
    with open(CERT_FILE, "wt") as f:
-        f.write(
+        f.write(crypto.dump_certificate(crypto.FILETYPE_PEM, cert).decode("utf-8"))
            crypto.dump_certificate(crypto.FILETYPE_PEM, cert).decode("utf-8")
        )
    with open(KEY_FILE, "wt") as f:
        f.write(crypto.dump_privatekey(crypto.FILETYPE_PEM, k).decode("utf-8"))
    with open(ALL_IN_ONE_FILE, "wt") as f:
        f.write(crypto.dump_privatekey(crypto.FILETYPE_PEM, k).decode("utf-8"))
-        f.write(
+        f.write(crypto.dump_certificate(crypto.FILETYPE_PEM, cert).decode("utf-8"))
            crypto.dump_certificate(crypto.FILETYPE_PEM, cert).decode("utf-8")
        )
    print(f"Generated {CERT_FILE}")
    print(f"Generated {KEY_FILE}")
    print(f"Generated {ALL_IN_ONE_FILE}")
--- a/python-api-examples/web/index.html
+++ b/python-api-examples/web/index.html
@@ -53,7 +53,7 @@
  </ul>
  Code is available at
-  <a href="https://github.com/k2-fsa/sherpa"> https://github.com/k2-fsa/sherpa</a>
+  <a href="https://github.com/k2-fsa/sherpa-onnx"> https://github.com/k2-fsa/sherpa-onnx</a>
  <!-- Optional JavaScript -->
  <!-- jQuery first, then Popper.js, then Bootstrap JS -->
--- a/python-api-examples/web/js/offline_record.js
+++ b/python-api-examples/web/js/offline_record.js
@@ -60,6 +60,7 @@ const soundClips = document.getElementById('sound-clips');
 const canvas = document.getElementById('canvas');
 const mainSection = document.querySelector('.container');
 recordBtn.disabled = true;
 stopBtn.disabled = true;
 window.onload = (event) => {
@@ -95,9 +96,10 @@ clearBtn.onclick = function() {
 };
 function send_header(n) {
-  const header = new ArrayBuffer(4);
+  const header = new ArrayBuffer(8);
-  new DataView(header).setInt32(0, n, true /* littleEndian */);
+  new DataView(header).setInt32(0, expectedSampleRate, true /* littleEndian */);
-  socket.send(new Int32Array(header, 0, 1));
+  new DataView(header).setInt32(4, n, true /* littleEndian */);
  socket.send(new Int32Array(header, 0, 2));
 }
 // copied/modified from https://mdn.github.io/web-dictaphone/
--- a/python-api-examples/web/js/streaming_record.js
+++ b/python-api-examples/web/js/streaming_record.js
@@ -88,6 +88,7 @@ const canvas = document.getElementById('canvas');
 const mainSection = document.querySelector('.container');
 stopBtn.disabled = true;
 recordBtn.disabled = true;
 let audioCtx;
 const canvasCtx = canvas.getContext('2d');
--- a/python-api-examples/web/js/upload.js
+++ b/python-api-examples/web/js/upload.js
@@ -74,9 +74,11 @@ connectBtn.onclick = function() {
 };
 function send_header(n) {
-  const header = new ArrayBuffer(4);
+  const header = new ArrayBuffer(8);
-  new DataView(header).setInt32(0, n, true /* littleEndian */);
+  // assume the uploaded wave is 16000 Hz
-  socket.send(new Int32Array(header, 0, 1));
+  new DataView(header).setInt32(0, 16000, true /* littleEndian */);
  new DataView(header).setInt32(4, n, true /* littleEndian */);
  socket.send(new Int32Array(header, 0, 2));
 }
 function onFileChange() {
--- a/python-api-examples/web/offline_record.html
+++ b/python-api-examples/web/offline_record.html
@@ -33,9 +33,9 @@
        <button class="btn btn-block btn-primary" type="button" id="connect">Click me to connect</button>
      </div>
      <span class="input-group-text" id="ws-protocol">ws://</span>
-      <input type="text" id="server-ip" class="form-control" placeholder="Sherpa server IP, e.g., localhost" aria-label="sherpa server IP">
+      <input type="text" id="server-ip" class="form-control" placeholder="Sherpa-onnx server IP, e.g., localhost" aria-label="sherpa-onnx server IP">
      <span class="input-group-text">:</span>
-      <input type="text" id="server-port" class="form-control" placeholder="Sherpa server port, e.g., 6006" aria-label="sherpa server port">
+      <input type="text" id="server-port" class="form-control" placeholder="Sherpa-onnx server port, e.g., 6006" aria-label="sherpa-onnx server port">
    </div>
    <div class="row">
--- a/python-api-examples/web/streaming_record.html
+++ b/python-api-examples/web/streaming_record.html
@@ -33,9 +33,9 @@
        <button class="btn btn-block btn-primary" type="button" id="connect">Click me to connect</button>
      </div>
      <span class="input-group-text" id="ws-protocol">ws://</span>
-      <input type="text" id="server-ip" class="form-control" placeholder="Sherpa server IP, e.g., localhost" aria-label="sherpa server IP">
+      <input type="text" id="server-ip" class="form-control" placeholder="Sherpa-onnx server IP, e.g., localhost" aria-label="sherpa-onnx server IP">
      <span class="input-group-text">:</span>
-      <input type="text" id="server-port" class="form-control" placeholder="Sherpa server port, e.g., 6006" aria-label="sherpa server port">
+      <input type="text" id="server-port" class="form-control" placeholder="Sherpa-onnx server port, e.g., 6006" aria-label="sherpa-onnx server port">
    </div>
    <div class="row">
--- a/python-api-examples/web/upload.html
+++ b/python-api-examples/web/upload.html
@@ -32,9 +32,9 @@
      <button class="btn btn-block btn-primary" type="button" id="connect">Click me to connect</button>
    </div>
    <span class="input-group-text" id="ws-protocol">ws://</span>
-    <input type="text" id="server-ip" class="form-control" placeholder="Sherpa server IP, e.g., localhost" aria-label="sherpa server IP">
+    <input type="text" id="server-ip" class="form-control" placeholder="Sherpa-onnx server IP, e.g., localhost" aria-label="sherpa-onnx server IP">
    <span class="input-group-text">:</span>
-    <input type="text" id="server-port" class="form-control" placeholder="Sherpa server port, e.g., 6006" aria-label="sherpa server port">
+    <input type="text" id="server-port" class="form-control" placeholder="Sherpa-onnx server port, e.g., 6006" aria-label="sherpa-onnx server port">
  </div>
  <form>
--- a/sherpa-onnx/python/sherpa_onnx/init.py
+++ b/sherpa-onnx/python/sherpa_onnx/init.py
@@ -1,12 +1,7 @@
 from typing import Dict, List, Optional
-from _sherpa_onnx import Display
+from _sherpa_onnx import Display, OfflineStream, OnlineStream
 from .online_recognizer import OnlineRecognizer
 from .online_recognizer import OnlineStream
 from .offline_recognizer import OfflineRecognizer
-
+from .online_recognizer import OnlineRecognizer
 from .utils import encode_contexts
--- a/sherpa-onnx/python/sherpa_onnx/offline_recognizer.py
+++ b/sherpa-onnx/python/sherpa_onnx/offline_recognizer.py
@@ -41,6 +41,7 @@ class OfflineRecognizer(object):
        sample_rate: int = 16000,
        feature_dim: int = 80,
        decoding_method: str = "greedy_search",
        max_active_paths: int = 4,
        context_score: float = 1.5,
        debug: bool = False,
        provider: str = "cpu",
@@ -72,6 +73,9 @@ class OfflineRecognizer(object):
            Dimension of the feature used to train the model.
          decoding_method:
            Valid values: greedy_search, modified_beam_search.
          max_active_paths:
            Maximum number of active paths to keep. Used only when
            decoding_method is modified_beam_search.
          debug:
            True to show debug messages.
          provider:
@@ -103,6 +107,7 @@ class OfflineRecognizer(object):
            context_score=context_score,
        )
        self.recognizer = _Recognizer(recognizer_config)
        self.config = recognizer_config
        return self
    @classmethod
@@ -166,6 +171,7 @@ class OfflineRecognizer(object):
            decoding_method=decoding_method,
        )
        self.recognizer = _Recognizer(recognizer_config)
        self.config = recognizer_config
        return self
    @classmethod
@@ -229,6 +235,7 @@ class OfflineRecognizer(object):
            decoding_method=decoding_method,
        )
        self.recognizer = _Recognizer(recognizer_config)
        self.config = recognizer_config
        return self
    @classmethod
@@ -291,6 +298,7 @@ class OfflineRecognizer(object):
            decoding_method=decoding_method,
        )
        self.recognizer = _Recognizer(recognizer_config)
        self.config = recognizer_config
        return self
    def create_stream(self, contexts_list: Optional[List[List[int]]] = None):