add streaming-server with web client (#164)

* add streaming-server with web client * small fixes
2023-05-30 22:46:52 +08:00
parent d7114da441
commit 5e2dc5ceea
26 changed files with 2228 additions and 1 deletions
--- a/python-api-examples/http_server.py
+++ b/python-api-examples/http_server.py
@@ -0,0 +1,82 @@
+# Copyright      2022  Xiaomi Corp.        (authors: Fangjun Kuang)
+#
+# See ../../../LICENSE for clarification regarding multiple authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Tuple
+
+# Please sort it alphabetically
+_static_files = (
+    ("/css/bootstrap.min.css", "text/css"),
+    ("/css/bootstrap.min.css.map", "text/css"),
+    ("/index.html", "text/html"),
+    ("/js/bootstrap.min.js", "application/javascript"),
+    ("/js/bootstrap.min.js.map", "application/javascript"),
+    ("/js/jquery-3.6.0.min.js", "application/javascript"),
+    ("/js/offline_record.js", "application/javascript"),
+    ("/js/offline_record.js", "application/javascript"),
+    ("/js/popper.min.js", "application/javascript"),
+    ("/js/popper.min.js.map", "application/javascript"),
+    ("/js/streaming_record.js", "application/javascript"),
+    ("/js/upload.js", "application/javascript"),
+    ("/k2-logo.png", "image/png"),
+    ("/nav-partial.html", "text/html"),
+    ("/offline_record.html", "text/html"),
+    ("/streaming_record.html", "text/html"),
+    ("/upload.html", "text/html"),
+)
+
+_404_page = r"""
+<!doctype html><html><head>
+<title>Speech recognition with next-gen Kaldi</title><body>
+<h1>404 ERROR! Please re-check your URL</h1>
+</body></head></html>
+"""
+
+
+def read_file(root: str, name: str) -> str:
+    try:
+        with open(f"{root}/{name}") as f:
+            return f.read()
+    except:  # noqa
+        with open(f"{root}/{name}", "rb") as f:
+            return f.read()
+
+
+class HttpServer:
+    """
+    A simple HTTP server that hosts only static files
+    """
+
+    def __init__(self, doc_root: str):
+        content = dict()
+        for f, mime_type in _static_files:
+            content[f] = (read_file(doc_root, f), mime_type)
+        self.content = content
+
+    def process_request(self, f: str) -> Tuple[str, str, str]:
+        """
+        Args:
+          f:
+            The filename to read.
+        Returns:
+          Return a tuple:
+            - a bool, True if the given file is found. False otherwise.
+            - a str, the content of the file if found. Otherwise, it
+              contains the content for the 404 page
+            - a str, the MIME type of the returned content
+        """
+        if f in self.content:
+            return True, self.content[f][0], self.content[f][1]
+        else:
+            return False, _404_page, "text/html"
--- a/python-api-examples/offline-decode-files.py
+++ b/python-api-examples/offline-decode-files.py
@@ -119,7 +119,9 @@ def get_args():
        "--sample-rate",
        type=int,
        default=16000,
-        help="Sample rate of the feature extractor. Must match the one expected by the model. Note: The input sound files can have a different sample rate from this argument.",
+        help="""Sample rate of the feature extractor. Must match the one
+        expected  by the model. Note: The input sound files can have a
+        different sample rate from this argument.""",
    )

    parser.add_argument(
--- a/python-api-examples/streaming_server.py
+++ b/python-api-examples/streaming_server.py
@@ -0,0 +1,657 @@
+#!/usr/bin/env python3
+# Copyright      2022-2023  Xiaomi Corp.
+#
+"""
+A server for streaming ASR recognition. By streaming it means the audio samples
+are coming in real-time. You don't need to wait until all audio samples are
+captured before sending them for recognition.
+
+It supports multiple clients sending at the same time.
+
+Usage:
+    ./streaming_server.py --help
+
+Example:
+
+python3 ./python-api-examples/streaming_server.py \
+  --encoder-model ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx \
+  --decoder-model ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \
+  --joiner-model ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx \
+  --tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt
+"""
+
+import argparse
+import asyncio
+import http
+import json
+import logging
+import socket
+import ssl
+from concurrent.futures import ThreadPoolExecutor
+from datetime import datetime
+from pathlib import Path
+from typing import List, Optional, Tuple
+
+import numpy as np
+import sherpa_onnx
+import websockets
+from http_server import HttpServer
+
+
+def setup_logger(
+    log_filename: str,
+    log_level: str = "info",
+    use_console: bool = True,
+) -> None:
+    """Setup log level.
+
+    Args:
+      log_filename:
+        The filename to save the log.
+      log_level:
+        The log level to use, e.g., "debug", "info", "warning", "error",
+        "critical"
+      use_console:
+        True to also print logs to console.
+    """
+    now = datetime.now()
+    date_time = now.strftime("%Y-%m-%d-%H-%M-%S")
+    formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
+    log_filename = f"{log_filename}-{date_time}.txt"
+
+    Path(log_filename).parent.mkdir(parents=True, exist_ok=True)
+
+    level = logging.ERROR
+    if log_level == "debug":
+        level = logging.DEBUG
+    elif log_level == "info":
+        level = logging.INFO
+    elif log_level == "warning":
+        level = logging.WARNING
+    elif log_level == "critical":
+        level = logging.CRITICAL
+
+    logging.basicConfig(
+        filename=log_filename,
+        format=formatter,
+        level=level,
+        filemode="w",
+    )
+    if use_console:
+        console = logging.StreamHandler()
+        console.setLevel(level)
+        console.setFormatter(logging.Formatter(formatter))
+        logging.getLogger("").addHandler(console)
+
+
+def add_model_args(parser: argparse.ArgumentParser):
+    parser.add_argument(
+        "--encoder-model",
+        type=str,
+        required=True,
+        help="Path to the encoder model",
+    )
+
+    parser.add_argument(
+        "--decoder-model",
+        type=str,
+        required=True,
+        help="Path to the decoder model.",
+    )
+
+    parser.add_argument(
+        "--joiner-model",
+        type=str,
+        required=True,
+        help="Path to the joiner model.",
+    )
+
+    parser.add_argument(
+        "--tokens",
+        type=str,
+        required=True,
+        help="Path to tokens.txt",
+    )
+
+    parser.add_argument(
+        "--sample-rate",
+        type=int,
+        default=16000,
+        help="Sample rate of the data used to train the model. "
+        "Caution: If your input sound files have a different sampling rate, "
+        "we will do resampling inside",
+    )
+
+    parser.add_argument(
+        "--feat-dim",
+        type=int,
+        default=80,
+        help="Feature dimension of the model",
+    )
+
+
+def add_decoding_args(parser: argparse.ArgumentParser):
+    parser.add_argument(
+        "--decoding-method",
+        type=str,
+        default="greedy_search",
+        help="""Decoding method to use. Current supported methods are:
+        - greedy_search
+        - modified_beam_search
+        """,
+    )
+
+    add_modified_beam_search_args(parser)
+
+
+def add_modified_beam_search_args(parser: argparse.ArgumentParser):
+    parser.add_argument(
+        "--num-active-paths",
+        type=int,
+        default=4,
+        help="""Used only when --decoding-method is modified_beam_search.
+        It specifies number of active paths to keep during decoding.
+        """,
+    )
+
+
+def add_endpointing_args(parser: argparse.ArgumentParser):
+    parser.add_argument(
+        "--use-endpoint",
+        type=int,
+        default=1,
+        help="1 to enable endpoiting. 0 to disable it",
+    )
+
+    parser.add_argument(
+        "--rule1-min-trailing-silence",
+        type=float,
+        default=2.4,
+        help="""This endpointing rule1 requires duration of trailing silence
+        in seconds) to be >= this value""",
+    )
+
+    parser.add_argument(
+        "--rule2-min-trailing-silence",
+        type=float,
+        default=1.2,
+        help="""This endpointing rule2 requires duration of trailing silence in
+        seconds) to be >= this value.""",
+    )
+
+    parser.add_argument(
+        "--rule3-min-utterance-length",
+        type=float,
+        default=20,
+        help="""This endpointing rule3 requires utterance-length (in seconds)
+        to be >= this value.""",
+    )
+
+
+def get_args():
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+
+    add_model_args(parser)
+    add_decoding_args(parser)
+    add_endpointing_args(parser)
+
+    parser.add_argument(
+        "--port",
+        type=int,
+        default=6006,
+        help="The server will listen on this port",
+    )
+
+    parser.add_argument(
+        "--nn-pool-size",
+        type=int,
+        default=1,
+        help="Number of threads for NN computation and decoding.",
+    )
+
+    parser.add_argument(
+        "--max-batch-size",
+        type=int,
+        default=50,
+        help="""Max batch size for computation. Note if there are not enough
+        requests in the queue, it will wait for max_wait_ms time. After that,
+        even if there are not enough requests, it still sends the
+        available requests in the queue for computation.
+        """,
+    )
+
+    parser.add_argument(
+        "--max-wait-ms",
+        type=float,
+        default=10,
+        help="""Max time in millisecond to wait to build batches for inference.
+        If there are not enough requests in the stream queue to build a batch
+        of max_batch_size, it waits up to this time before fetching available
+        requests for computation.
+        """,
+    )
+
+    parser.add_argument(
+        "--max-message-size",
+        type=int,
+        default=(1 << 20),
+        help="""Max message size in bytes.
+        The max size per message cannot exceed this limit.
+        """,
+    )
+
+    parser.add_argument(
+        "--max-queue-size",
+        type=int,
+        default=32,
+        help="Max number of messages in the queue for each connection.",
+    )
+
+    parser.add_argument(
+        "--max-active-connections",
+        type=int,
+        default=500,
+        help="""Maximum number of active connections. The server will refuse
+        to accept new connections once the current number of active connections
+        equals to this limit.
+        """,
+    )
+
+    parser.add_argument(
+        "--num-threads",
+        type=int,
+        default=1,
+        help="Sets the number of threads used for interop parallelism (e.g. in JIT interpreter) on CPU.",
+    )
+
+    parser.add_argument(
+        "--certificate",
+        type=str,
+        help="""Path to the X.509 certificate. You need it only if you want to
+        use a secure websocket connection, i.e., use wss:// instead of ws://.
+        You can use sherpa/bin/web/generate-certificate.py
+        to generate the certificate `cert.pem`.
+        """,
+    )
+
+    parser.add_argument(
+        "--doc-root",
+        type=str,
+        default="./python-api-examples/web",
+        help="""Path to the web root""",
+    )
+
+    return parser.parse_args()
+
+
+def create_recognizer(args) -> sherpa_onnx.OnlineRecognizer:
+    recognizer = sherpa_onnx.OnlineRecognizer(
+        tokens=args.tokens,
+        encoder=args.encoder_model,
+        decoder=args.decoder_model,
+        joiner=args.joiner_model,
+        num_threads=1,
+        sample_rate=16000,
+        feature_dim=80,
+        decoding_method=args.decoding_method,
+        max_active_paths=args.num_active_paths,
+        enable_endpoint_detection=args.use_endpoint != 0,
+        rule1_min_trailing_silence=args.rule1_min_trailing_silence,
+        rule2_min_trailing_silence=args.rule2_min_trailing_silence,
+        rule3_min_utterance_length=args.rule3_min_utterance_length,
+    )
+
+    return recognizer
+
+
+def format_timestamps(timestamps: List[float]) -> List[str]:
+    return ["{:.3f}".format(t) for t in timestamps]
+
+
+class StreamingServer(object):
+    def __init__(
+        self,
+        recognizer: sherpa_onnx.OnlineRecognizer,
+        nn_pool_size: int,
+        max_wait_ms: float,
+        max_batch_size: int,
+        max_message_size: int,
+        max_queue_size: int,
+        max_active_connections: int,
+        doc_root: str,
+        certificate: Optional[str] = None,
+    ):
+        """
+        Args:
+          recognizer:
+            An instance of online recognizer.
+          nn_pool_size:
+            Number of threads for the thread pool that is responsible for
+            neural network computation and decoding.
+          max_wait_ms:
+            Max wait time in milliseconds in order to build a batch of
+            `batch_size`.
+          max_batch_size:
+            Max batch size for inference.
+          max_message_size:
+            Max size in bytes per message.
+          max_queue_size:
+            Max number of messages in the queue for each connection.
+          max_active_connections:
+            Max number of active connections. Once number of active client
+            equals to this limit, the server refuses to accept new connections.
+          beam_search_params:
+            Dictionary containing all the parameters for beam search.
+          online_endpoint_config:
+            Config for endpointing.
+          doc_root:
+            Path to the directory where files like index.html for the HTTP
+            server locate.
+          certificate:
+            Optional. If not None, it will use secure websocket.
+            You can use ./sherpa/bin/web/generate-certificate.py to generate
+            it (the default generated filename is `cert.pem`).
+        """
+        self.recognizer = recognizer
+
+        self.certificate = certificate
+        self.http_server = HttpServer(doc_root)
+
+        self.nn_pool = ThreadPoolExecutor(
+            max_workers=nn_pool_size,
+            thread_name_prefix="nn",
+        )
+
+        self.stream_queue = asyncio.Queue()
+        self.max_wait_ms = max_wait_ms
+        self.max_batch_size = max_batch_size
+        self.max_message_size = max_message_size
+        self.max_queue_size = max_queue_size
+        self.max_active_connections = max_active_connections
+
+        self.current_active_connections = 0
+
+        self.sample_rate = int(recognizer.config.feat_config.sampling_rate)
+        self.decoding_method = recognizer.config.decoding_method
+
+    async def stream_consumer_task(self):
+        """This function extracts streams from the queue, batches them up, sends
+        them to the RNN-T model for computation and decoding.
+        """
+        while True:
+            if self.stream_queue.empty():
+                await asyncio.sleep(self.max_wait_ms / 1000)
+                continue
+
+            batch = []
+            try:
+                while len(batch) < self.max_batch_size:
+                    item = self.stream_queue.get_nowait()
+
+                    assert self.recognizer.is_ready(item[0])
+
+                    batch.append(item)
+            except asyncio.QueueEmpty:
+                pass
+            stream_list = [b[0] for b in batch]
+            future_list = [b[1] for b in batch]
+
+            loop = asyncio.get_running_loop()
+            await loop.run_in_executor(
+                self.nn_pool,
+                self.recognizer.decode_streams,
+                stream_list,
+            )
+
+            for f in future_list:
+                self.stream_queue.task_done()
+                f.set_result(None)
+
+    async def compute_and_decode(
+        self,
+        stream: sherpa_onnx.OnlineStream,
+    ) -> None:
+        """Put the stream into the queue and wait it to be processed by the
+        consumer task.
+
+        Args:
+          stream:
+            The stream to be processed. Note: It is changed in-place.
+        """
+        loop = asyncio.get_running_loop()
+        future = loop.create_future()
+        await self.stream_queue.put((stream, future))
+        await future
+
+    async def process_request(
+        self,
+        path: str,
+        request_headers: websockets.Headers,
+    ) -> Optional[Tuple[http.HTTPStatus, websockets.Headers, bytes]]:
+        if "sec-websocket-key" not in request_headers:
+            # This is a normal HTTP request
+            if path == "/":
+                path = "/index.html"
+            found, response, mime_type = self.http_server.process_request(path)
+            if isinstance(response, str):
+                response = response.encode("utf-8")
+
+            if not found:
+                status = http.HTTPStatus.NOT_FOUND
+            else:
+                status = http.HTTPStatus.OK
+            header = {"Content-Type": mime_type}
+            return status, header, response
+
+        if self.current_active_connections < self.max_active_connections:
+            self.current_active_connections += 1
+            return None
+
+        # Refuse new connections
+        status = http.HTTPStatus.SERVICE_UNAVAILABLE  # 503
+        header = {"Hint": "The server is overloaded. Please retry later."}
+        response = b"The server is busy. Please retry later."
+
+        return status, header, response
+
+    async def run(self, port: int):
+        task = asyncio.create_task(self.stream_consumer_task())
+
+        if self.certificate:
+            logging.info(f"Using certificate: {self.certificate}")
+            ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
+            ssl_context.load_cert_chain(self.certificate)
+        else:
+            ssl_context = None
+            logging.info("No certificate provided")
+
+        async with websockets.serve(
+            self.handle_connection,
+            host="",
+            port=port,
+            max_size=self.max_message_size,
+            max_queue=self.max_queue_size,
+            process_request=self.process_request,
+            ssl=ssl_context,
+        ):
+            ip_list = ["0.0.0.0", "localhost", "127.0.0.1"]
+            ip_list.append(socket.gethostbyname(socket.gethostname()))
+            proto = "http://" if ssl_context is None else "https://"
+            s = "Please visit one of the following addresses:\n\n"
+            for p in ip_list:
+                s += "  " + proto + p + f":{port}" "\n"
+            logging.info(s)
+
+            await asyncio.Future()  # run forever
+
+        await task  # not reachable
+
+    async def handle_connection(
+        self,
+        socket: websockets.WebSocketServerProtocol,
+    ):
+        """Receive audio samples from the client, process it, and send
+        decoding result back to the client.
+
+        Args:
+          socket:
+            The socket for communicating with the client.
+        """
+        try:
+            await self.handle_connection_impl(socket)
+        except websockets.exceptions.ConnectionClosedError:
+            logging.info(f"{socket.remote_address} disconnected")
+        finally:
+            # Decrement so that it can accept new connections
+            self.current_active_connections -= 1
+
+            logging.info(
+                f"Disconnected: {socket.remote_address}. "
+                f"Number of connections: {self.current_active_connections}/{self.max_active_connections}"  # noqa
+            )
+
+    async def handle_connection_impl(
+        self,
+        socket: websockets.WebSocketServerProtocol,
+    ):
+        """Receive audio samples from the client, process it, and send
+        deocoding result back to the client.
+
+        Args:
+          socket:
+            The socket for communicating with the client.
+        """
+        logging.info(
+            f"Connected: {socket.remote_address}. "
+            f"Number of connections: {self.current_active_connections}/{self.max_active_connections}"  # noqa
+        )
+
+        stream = self.recognizer.create_stream()
+        segment = 0
+
+        while True:
+            samples = await self.recv_audio_samples(socket)
+            if samples is None:
+                break
+
+            # TODO(fangjun): At present, we assume the sampling rate
+            # of the received audio samples equal to --sample-rate
+            stream.accept_waveform(sample_rate=self.sample_rate, waveform=samples)
+
+            while self.recognizer.is_ready(stream):
+                await self.compute_and_decode(stream)
+                result = self.recognizer.get_result(stream)
+
+                message = {
+                    "text": result,
+                    "segment": segment,
+                }
+                if self.recognizer.is_endpoint(stream):
+                    self.recognizer.reset(stream)
+                    segment += 1
+
+                print(message)
+
+                await socket.send(json.dumps(message))
+
+        tail_padding = np.rand(int(self.sample_rate * 0.3), dtype=np.float32)
+        stream.accept_waveform(sampling_rate=self.sample_rate, waveform=tail_padding)
+        stream.input_finished()
+        while self.recognizer.is_ready(stream):
+            await self.compute_and_decode(stream)
+
+        result = self.recognizer.get_result(stream)
+
+        message = {
+            "text": result,
+            "segment": segment,
+        }
+
+        await socket.send(json.dumps(message))
+
+    async def recv_audio_samples(
+        self,
+        socket: websockets.WebSocketServerProtocol,
+    ) -> Optional[np.ndarray]:
+        """Receives a tensor from the client.
+
+        Each message contains either a bytes buffer containing audio samples
+        in 16 kHz or contains "Done" meaning the end of utterance.
+
+        Args:
+          socket:
+            The socket for communicating with the client.
+        Returns:
+          Return a 1-D np.float32 tensor containing the audio samples or
+          return None.
+        """
+        message = await socket.recv()
+        if message == "Done":
+            return None
+
+        return np.frombuffer(message, dtype=np.float32)
+
+
+def check_args(args):
+    assert Path(args.encoder_model).is_file(), f"{args.encoder_model} does not exist"
+
+    assert Path(args.decoder_model).is_file(), f"{args.decoder_model} does not exist"
+
+    assert Path(args.joiner_model).is_file(), f"{args.joiner_model} does not exist"
+
+    if not Path(args.tokens).is_file():
+        raise ValueError(f"{args.tokens} does not exist")
+
+    if args.decoding_method not in (
+        "greedy_search",
+        "modified_beam_search",
+    ):
+        raise ValueError(f"Unsupported decoding method {args.decoding_method}")
+
+    if args.decoding_method == "modified_beam_search":
+        assert args.num_active_paths > 0, args.num_active_paths
+
+
+def main():
+    args = get_args()
+    logging.info(vars(args))
+    check_args(args)
+
+    recognizer = create_recognizer(args)
+
+    port = args.port
+    nn_pool_size = args.nn_pool_size
+    max_batch_size = args.max_batch_size
+    max_wait_ms = args.max_wait_ms
+    max_message_size = args.max_message_size
+    max_queue_size = args.max_queue_size
+    max_active_connections = args.max_active_connections
+    certificate = args.certificate
+    doc_root = args.doc_root
+
+    if certificate and not Path(certificate).is_file():
+        raise ValueError(f"{certificate} does not exist")
+
+    if not Path(doc_root).is_dir():
+        raise ValueError(f"Directory {doc_root} does not exist")
+
+    server = StreamingServer(
+        recognizer=recognizer,
+        nn_pool_size=nn_pool_size,
+        max_batch_size=max_batch_size,
+        max_wait_ms=max_wait_ms,
+        max_message_size=max_message_size,
+        max_queue_size=max_queue_size,
+        max_active_connections=max_active_connections,
+        certificate=certificate,
+        doc_root=doc_root,
+    )
+    asyncio.run(server.run(port))
+
+
+if __name__ == "__main__":
+    log_filename = "log/log-streaming-zipformer"
+    setup_logger(log_filename)
+    main()
--- a/python-api-examples/web/.gitignore
+++ b/python-api-examples/web/.gitignore
@@ -0,0 +1,3 @@
+*.pem
+*.key
+*.crt
--- a/python-api-examples/web/README.md
+++ b/python-api-examples/web/README.md
@@ -0,0 +1,34 @@
+# How to use
+
+```bash
+git clone https://github.com/k2-fsa/sherpa
+
+cd sherpa/sherpa/bin/web
+python3 -m http.server 6009
+```
+and then go to <http://localhost:6009>
+
+You will see a page like the following screenshot:
+
+![Screenshot if you visit http://localhost:6009](./pic/web-ui.png)
+
+If your server is listening at the port *6006* with address **localhost**,
+then you can either click **Upload**, **Streaming_Record** or **Offline_Record** to play with it.
+
+## File descriptions
+
+### ./css/bootstrap.min.css
+
+It is downloaded from https://cdn.jsdelivr.net/npm/bootstrap@4.3.1/dist/css/bootstrap.min.css
+
+### ./js/jquery-3.6.0.min.js
+
+It is downloaded from https://code.jquery.com/jquery-3.6.0.min.js
+
+### ./js/popper.min.js
+
+It is downloaded from https://cdn.jsdelivr.net/npm/popper.js@1.14.7/dist/umd/popper.min.js
+
+### ./js/bootstrap.min.js
+
+It is download from https://cdn.jsdelivr.net/npm/bootstrap@4.3.1/dist/js/bootstrap.min.js
--- a/python-api-examples/web/css/bootstrap.min.css
+++ b/python-api-examples/web/css/bootstrap.min.css
--- a/python-api-examples/web/css/bootstrap.min.css.map
+++ b/python-api-examples/web/css/bootstrap.min.css.map
--- a/python-api-examples/web/generate-certificate.py
+++ b/python-api-examples/web/generate-certificate.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python3
+
+"""
+pip install pyopenssl
+"""
+
+from OpenSSL import crypto
+
+# The code in this file is modified from
+# https://stackoverflow.com/questions/27164354/create-a-self-signed-x509-certificate-in-python
+
+"""
+This script generates 3 files:
+    - private.key
+    - selfsigned.crt
+    - cert.pem
+
+You need cert.pem when you start a https server
+or a secure websocket server.
+
+Note: You need to change serialNumber if you want to generate
+a new certificate as two different certificates cannot share
+the same serial number if they are issued by the same organization.
+
+Otherwise, you may get the following error from within you browser:
+
+  An error occurred during a connection to 127.0.0.1:6007. You have received an
+  invalid certificate. Please contact the server administrator or email
+  correspondent and give them the following information: Your certificate
+  contains the same serial number as another certificate issued by the
+  certificate authority. Please get a new certificate containing a unique
+  serial number. Error code: SEC_ERROR_REUSED_ISSUER_AND_SERIAL
+
+"""
+
+
+def cert_gen(
+    emailAddress="https://github.com/k2-fsa/k2",
+    commonName="sherpa",
+    countryName="CN",
+    localityName="k2-fsa",
+    stateOrProvinceName="k2-fsa",
+    organizationName="k2-fsa",
+    organizationUnitName="k2-fsa",
+    serialNumber=3,
+    validityStartInSeconds=0,
+    validityEndInSeconds=10 * 365 * 24 * 60 * 60,
+    KEY_FILE="private.key",
+    CERT_FILE="selfsigned.crt",
+    ALL_IN_ONE_FILE="cert.pem",
+):
+    # can look at generated file using openssl:
+    # openssl x509 -inform pem -in selfsigned.crt -noout -text
+    # create a key pair
+    k = crypto.PKey()
+    k.generate_key(crypto.TYPE_RSA, 4096)
+    # create a self-signed cert
+    cert = crypto.X509()
+    cert.get_subject().C = countryName
+    cert.get_subject().ST = stateOrProvinceName
+    cert.get_subject().L = localityName
+    cert.get_subject().O = organizationName  # noqa
+    cert.get_subject().OU = organizationUnitName
+    cert.get_subject().CN = commonName
+    cert.get_subject().emailAddress = emailAddress
+    cert.set_serial_number(serialNumber)
+    cert.gmtime_adj_notBefore(0)
+    cert.gmtime_adj_notAfter(validityEndInSeconds)
+    cert.set_issuer(cert.get_subject())
+    cert.set_pubkey(k)
+    cert.sign(k, "sha512")
+    with open(CERT_FILE, "wt") as f:
+        f.write(
+            crypto.dump_certificate(crypto.FILETYPE_PEM, cert).decode("utf-8")
+        )
+    with open(KEY_FILE, "wt") as f:
+        f.write(crypto.dump_privatekey(crypto.FILETYPE_PEM, k).decode("utf-8"))
+
+    with open(ALL_IN_ONE_FILE, "wt") as f:
+        f.write(crypto.dump_privatekey(crypto.FILETYPE_PEM, k).decode("utf-8"))
+        f.write(
+            crypto.dump_certificate(crypto.FILETYPE_PEM, cert).decode("utf-8")
+        )
+    print(f"Generated {CERT_FILE}")
+    print(f"Generated {KEY_FILE}")
+    print(f"Generated {ALL_IN_ONE_FILE}")
+
+
+cert_gen()
--- a/python-api-examples/web/index.html
+++ b/python-api-examples/web/index.html
@@ -0,0 +1,71 @@
+<!doctype html>
+<html lang="en">
+<head>
+  <!-- Required meta tags -->
+  <meta charset="utf-8"></meta>
+  <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"></meta>
+
+  <!-- Bootstrap CSS -->
+  <link rel="stylesheet"
+        href="./css/bootstrap.min.css"
+        integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T"
+        crossorigin="anonymous">
+  </link>
+  <link rel="icon"
+      type="image/png"
+      href="./k2-logo.png">
+
+  <script src="./js/jquery-3.6.0.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script>
+
+  <title>Next-gen Kaldi demo</title>
+</head>
+
+
+<body>
+  <div id="nav"></div>
+  <script>
+    $(function(){
+      $("#nav").load("nav-partial.html");
+    });
+  </script>
+
+  <ul class="list-unstyled">
+  <li class="media">
+    <div class="media-body">
+      <h5 class="mt-0 mb-1">Upload</h5>
+      <p>Recognition from a selected file</p>
+    </div>
+  <li>
+
+  <li class="media">
+    <div class="media-body">
+      <h5 class="mt-0 mb-1">Streaming_Record</h5>
+      <p>Recognition from real-time recordings</p>
+    </div>
+  </li>
+
+  <li class="media">
+    <div class="media-body">
+      <h5 class="mt-0 mb-1">Offline_Record</h5>
+      <p>Recognition from offline recordings</p>
+    </div>
+  </li>
+  </ul>
+
+  Code is available at
+  <a href="https://github.com/k2-fsa/sherpa"> https://github.com/k2-fsa/sherpa</a>
+
+  <!-- Optional JavaScript -->
+  <!-- jQuery first, then Popper.js, then Bootstrap JS -->
+  <script src="./js/popper.min.js"
+          integrity="sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1"
+          crossorigin="anonymous">
+  </script>
+
+  <script src="./js/bootstrap.min.js"
+          integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM"
+          crossorigin="anonymous">
+  </script>
+
+</body>
+</html>
--- a/python-api-examples/web/js/bootstrap.min.js
+++ b/python-api-examples/web/js/bootstrap.min.js
--- a/python-api-examples/web/js/bootstrap.min.js.map
+++ b/python-api-examples/web/js/bootstrap.min.js.map
--- a/python-api-examples/web/js/jquery-3.6.0.min.js
+++ b/python-api-examples/web/js/jquery-3.6.0.min.js
--- a/python-api-examples/web/js/offline_record.js
+++ b/python-api-examples/web/js/offline_record.js
@@ -0,0 +1,395 @@
+// This file copies and modifies code
+// from https://mdn.github.io/web-dictaphone/scripts/app.js
+// and https://gist.github.com/meziantou/edb7217fddfbb70e899e
+
+var socket;
+
+const serverIpInput = document.getElementById('server-ip');
+const serverPortInput = document.getElementById('server-port');
+
+const connectBtn = document.getElementById('connect');
+const uploadBtn = document.getElementById('file');
+
+function initWebSocket() {
+  let protocol = 'ws://';
+  if (window.location.protocol == 'https:') {
+    protocol = 'wss://'
+  }
+  let server_ip = serverIpInput.value;
+  let server_port = serverPortInput.value;
+  console.log('protocol: ', protocol);
+  console.log('server_ip: ', server_ip);
+  console.log('server_port: ', server_port);
+
+  let uri = protocol + server_ip + ':' + server_port;
+  console.log('uri', uri);
+  socket = new WebSocket(uri);
+
+  // Connection opened
+  socket.addEventListener('open', function(event) {
+    console.log('connected');
+    recordBtn.disabled = false;
+    connectBtn.disabled = true;
+    connectBtn.innerHTML = 'Connected!';
+  });
+
+  // Connection closed
+  socket.addEventListener('close', function(event) {
+    console.log('disconnected');
+    recordBtn.disabled = true;
+    stopBtn.disabled = true;
+    connectBtn.disabled = false;
+    connectBtn.innerHTML = 'Click me to connect!';
+  });
+
+  // Listen for messages
+  socket.addEventListener('message', function(event) {
+    console.log('Received message: ', event.data);
+
+    document.getElementById('results').value = event.data;
+    socket.send('Done');
+    console.log('Sent Done');
+    socket.close();
+  });
+}
+
+const recordBtn = document.getElementById('offline_record');
+const stopBtn = document.getElementById('offline_stop');
+const clearBtn = document.getElementById('clear');
+const soundClips = document.getElementById('sound-clips');
+const canvas = document.getElementById('canvas');
+const mainSection = document.querySelector('.container');
+
+stopBtn.disabled = true;
+
+window.onload = (event) => {
+  console.log('page is fully loaded');
+  console.log('protocol', window.location.protocol);
+  console.log('port', window.location.port);
+  if (window.location.protocol == 'https:') {
+    document.getElementById('ws-protocol').textContent = 'wss://';
+  }
+  serverIpInput.value = window.location.hostname;
+  serverPortInput.value = window.location.port;
+};
+
+connectBtn.onclick = function() {
+  initWebSocket();
+};
+
+
+let audioCtx;
+const canvasCtx = canvas.getContext('2d');
+let mediaStream;
+let analyser;
+
+let expectedSampleRate = 16000;
+let recordSampleRate;  // the sampleRate of the microphone
+let recorder = null;   // the microphone
+let leftchannel = [];  // TODO: Use a single channel
+
+let recordingLength = 0;  // number of samples so far
+
+clearBtn.onclick = function() {
+  document.getElementById('results').value = '';
+};
+
+function send_header(n) {
+  const header = new ArrayBuffer(4);
+  new DataView(header).setInt32(0, n, true /* littleEndian */);
+  socket.send(new Int32Array(header, 0, 1));
+}
+
+// copied/modified from https://mdn.github.io/web-dictaphone/
+// and
+// https://gist.github.com/meziantou/edb7217fddfbb70e899e
+if (navigator.mediaDevices.getUserMedia) {
+  console.log('getUserMedia supported.');
+
+  // see https://w3c.github.io/mediacapture-main/#dom-mediadevices-getusermedia
+  const constraints = {audio: true};
+
+  let onSuccess = function(stream) {
+    if (!audioCtx) {
+      audioCtx = new AudioContext();
+    }
+    console.log(audioCtx);
+    recordSampleRate = audioCtx.sampleRate;
+    console.log('sample rate ' + recordSampleRate);
+
+    // creates an audio node from the microphone incoming stream
+    mediaStream = audioCtx.createMediaStreamSource(stream);
+    console.log(mediaStream);
+
+    // https://developer.mozilla.org/en-US/docs/Web/API/AudioContext/createScriptProcessor
+    // bufferSize: the onaudioprocess event is called when the buffer is full
+    var bufferSize = 2048;
+    var numberOfInputChannels = 2;
+    var numberOfOutputChannels = 2;
+    if (audioCtx.createScriptProcessor) {
+      recorder = audioCtx.createScriptProcessor(
+          bufferSize, numberOfInputChannels, numberOfOutputChannels);
+    } else {
+      recorder = audioCtx.createJavaScriptNode(
+          bufferSize, numberOfInputChannels, numberOfOutputChannels);
+    }
+    console.log(recorder);
+
+    recorder.onaudioprocess = function(e) {
+      let samples = new Float32Array(e.inputBuffer.getChannelData(0))
+      samples = downsampleBuffer(samples, expectedSampleRate);
+      let buf = new Int16Array(samples.length);
+      for (var i = 0; i < samples.length; ++i) {
+        let s = samples[i];
+        if (s >= 1)
+          s = 1;
+        else if (s <= -1)
+          s = -1;
+        buf[i] = s * 32767;
+      }
+      leftchannel.push(buf);
+      recordingLength += bufferSize;
+    };
+
+    visualize(stream);
+    mediaStream.connect(analyser);
+
+    recordBtn.onclick = function() {
+      mediaStream.connect(recorder);
+      mediaStream.connect(analyser);
+      recorder.connect(audioCtx.destination);
+
+      console.log('recorder started');
+      recordBtn.style.background = 'red';
+
+      stopBtn.disabled = false;
+      recordBtn.disabled = true;
+    };
+
+    stopBtn.onclick = function() {
+      console.log('recorder stopped');
+
+      // stopBtn recording
+      recorder.disconnect(audioCtx.destination);
+      mediaStream.disconnect(recorder);
+      mediaStream.disconnect(analyser);
+
+      recordBtn.style.background = '';
+      recordBtn.style.color = '';
+      // mediaRecorder.requestData();
+
+      stopBtn.disabled = true;
+      recordBtn.disabled = false;
+
+      const clipName =
+          prompt('Enter a name for your sound clip?', 'My unnamed clip');
+
+      const clipContainer = document.createElement('article');
+      const clipLabel = document.createElement('p');
+      const audio = document.createElement('audio');
+      const deleteButton = document.createElement('button');
+      clipContainer.classList.add('clip');
+      audio.setAttribute('controls', '');
+      deleteButton.textContent = 'Delete';
+      deleteButton.className = 'delete';
+
+      if (clipName === null) {
+        clipLabel.textContent = 'My unnamed clip';
+      } else {
+        clipLabel.textContent = clipName;
+      }
+
+      clipContainer.appendChild(audio);
+
+      clipContainer.appendChild(clipLabel);
+      clipContainer.appendChild(deleteButton);
+      soundClips.appendChild(clipContainer);
+
+      audio.controls = true;
+      let samples = flatten(leftchannel);
+      let buf = new Float32Array(samples.length);
+      for (var i = 0; i < samples.length; ++i) {
+        let s = samples[i];
+        buf[i] = s / 32767.0;
+      }
+      const blob = toWav(samples);
+
+      leftchannel = [];
+      const audioURL = window.URL.createObjectURL(blob);
+      audio.src = audioURL;
+      console.log('recorder stopped');
+
+      deleteButton.onclick = function(e) {
+        let evtTgt = e.target;
+        evtTgt.parentNode.parentNode.removeChild(evtTgt.parentNode);
+      };
+
+      clipLabel.onclick = function() {
+        const existingName = clipLabel.textContent;
+        const newClipName = prompt('Enter a new name for your sound clip?');
+        if (newClipName === null) {
+          clipLabel.textContent = existingName;
+        } else {
+          clipLabel.textContent = newClipName;
+        }
+      };
+
+      buf = buf.buffer
+
+      let n = 1024 * 4;  // send this number of bytes per request.
+      console.log('buf length, ' + buf.byteLength);
+      send_header(buf.byteLength);
+
+      for (let start = 0; start < buf.byteLength; start += n) {
+        socket.send(buf.slice(start, start + n));
+      }
+    };
+  };
+
+  let onError = function(err) {
+    console.log('The following error occured: ' + err);
+  };
+
+  navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError);
+} else {
+  console.log('getUserMedia not supported on your browser!');
+  alert('getUserMedia not supported on your browser!');
+}
+
+function visualize(stream) {
+  if (!audioCtx) {
+    audioCtx = new AudioContext();
+  }
+
+  const source = audioCtx.createMediaStreamSource(stream);
+
+  if (!analyser) {
+    analyser = audioCtx.createAnalyser();
+    analyser.fftSize = 2048;
+  }
+  const bufferLength = analyser.frequencyBinCount;
+  const dataArray = new Uint8Array(bufferLength);
+
+  // source.connect(analyser);
+  // analyser.connect(audioCtx.destination);
+
+  draw()
+
+  function draw() {
+    const WIDTH = canvas.width
+    const HEIGHT = canvas.height;
+
+    requestAnimationFrame(draw);
+
+    analyser.getByteTimeDomainData(dataArray);
+
+    canvasCtx.fillStyle = 'rgb(200, 200, 200)';
+    canvasCtx.fillRect(0, 0, WIDTH, HEIGHT);
+
+    canvasCtx.lineWidth = 2;
+    canvasCtx.strokeStyle = 'rgb(0, 0, 0)';
+
+    canvasCtx.beginPath();
+
+    let sliceWidth = WIDTH * 1.0 / bufferLength;
+    let x = 0;
+
+    for (let i = 0; i < bufferLength; i++) {
+      let v = dataArray[i] / 128.0;
+      let y = v * HEIGHT / 2;
+
+      if (i === 0) {
+        canvasCtx.moveTo(x, y);
+      } else {
+        canvasCtx.lineTo(x, y);
+      }
+
+      x += sliceWidth;
+    }
+
+    canvasCtx.lineTo(canvas.width, canvas.height / 2);
+    canvasCtx.stroke();
+  }
+}
+
+window.onresize = function() {
+  canvas.width = mainSection.offsetWidth;
+};
+
+window.onresize();
+
+// this function is copied/modified from
+// https://gist.github.com/meziantou/edb7217fddfbb70e899e
+function flatten(listOfSamples) {
+  let n = 0;
+  for (let i = 0; i < listOfSamples.length; ++i) {
+    n += listOfSamples[i].length;
+  }
+  let ans = new Int16Array(n);
+
+  let offset = 0;
+  for (let i = 0; i < listOfSamples.length; ++i) {
+    ans.set(listOfSamples[i], offset);
+    offset += listOfSamples[i].length;
+  }
+  return ans;
+}
+
+// this function is copied/modified from
+// https://gist.github.com/meziantou/edb7217fddfbb70e899e
+function toWav(samples) {
+  let buf = new ArrayBuffer(44 + samples.length * 2);
+  var view = new DataView(buf);
+
+  // http://soundfile.sapp.org/doc/WaveFormat/
+  //                   F F I R
+  view.setUint32(0, 0x46464952, true);               // chunkID
+  view.setUint32(4, 36 + samples.length * 2, true);  // chunkSize
+  //                   E V A W
+  view.setUint32(8, 0x45564157, true);  // format
+                                        //
+  //                      t m f
+  view.setUint32(12, 0x20746d66, true);          // subchunk1ID
+  view.setUint32(16, 16, true);                  // subchunk1Size, 16 for PCM
+  view.setUint32(20, 1, true);                   // audioFormat, 1 for PCM
+  view.setUint16(22, 1, true);                   // numChannels: 1 channel
+  view.setUint32(24, expectedSampleRate, true);  // sampleRate
+  view.setUint32(28, expectedSampleRate * 2, true);  // byteRate
+  view.setUint16(32, 2, true);                       // blockAlign
+  view.setUint16(34, 16, true);                      // bitsPerSample
+  view.setUint32(36, 0x61746164, true);              // Subchunk2ID
+  view.setUint32(40, samples.length * 2, true);      // subchunk2Size
+
+  let offset = 44;
+  for (let i = 0; i < samples.length; ++i) {
+    view.setInt16(offset, samples[i], true);
+    offset += 2;
+  }
+
+  return new Blob([view], {type: 'audio/wav'});
+}
+
+// this function is copied from
+// https://github.com/awslabs/aws-lex-browser-audio-capture/blob/master/lib/worker.js#L46
+function downsampleBuffer(buffer, exportSampleRate) {
+  if (exportSampleRate === recordSampleRate) {
+    return buffer;
+  }
+  var sampleRateRatio = recordSampleRate / exportSampleRate;
+  var newLength = Math.round(buffer.length / sampleRateRatio);
+  var result = new Float32Array(newLength);
+  var offsetResult = 0;
+  var offsetBuffer = 0;
+  while (offsetResult < result.length) {
+    var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
+    var accum = 0, count = 0;
+    for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
+      accum += buffer[i];
+      count++;
+    }
+    result[offsetResult] = accum / count;
+    offsetResult++;
+    offsetBuffer = nextOffsetBuffer;
+  }
+  return result;
+};
--- a/python-api-examples/web/js/popper.min.js
+++ b/python-api-examples/web/js/popper.min.js
--- a/python-api-examples/web/js/popper.min.js.map
+++ b/python-api-examples/web/js/popper.min.js.map
--- a/python-api-examples/web/js/streaming_record.js
+++ b/python-api-examples/web/js/streaming_record.js
@@ -0,0 +1,401 @@
+// This file copies and modifies code
+// from https://mdn.github.io/web-dictaphone/scripts/app.js
+// and https://gist.github.com/meziantou/edb7217fddfbb70e899e
+
+var socket;
+var recognition_text = [];
+
+function getDisplayResult() {
+  let i = 0;
+  let ans = '';
+  for (let s in recognition_text) {
+    if (recognition_text[s] == '') continue;
+
+    ans += '' + i + ': ' + recognition_text[s] + '\n';
+    i += 1;
+  }
+  return ans;
+}
+
+function initWebSocket() {
+  console.log('Creating websocket')
+  let protocol = 'ws://';
+  if (window.location.protocol == 'https:') {
+    protocol = 'wss://'
+  }
+  let server_ip = serverIpInput.value;
+  let server_port = serverPortInput.value;
+  console.log('protocol: ', protocol);
+  console.log('server_ip: ', server_ip);
+  console.log('server_port: ', server_port);
+
+  let uri = protocol + server_ip + ':' + server_port;
+  console.log('uri', uri);
+  socket = new WebSocket(uri);
+  // socket = new WebSocket('wss://localhost:6006/');
+
+  // Connection opened
+  socket.addEventListener('open', function(event) {
+    console.log('connected');
+    recordBtn.disabled = false;
+    connectBtn.disabled = true;
+    connectBtn.innerHTML = 'Connected!';
+  });
+
+  // Connection closed
+  socket.addEventListener('close', function(event) {
+    console.log('disconnected');
+    recordBtn.disabled = true;
+    connectBtn.disabled = false;
+    connectBtn.innerHTML = 'Click me to connect!';
+  });
+
+  // Listen for messages
+  socket.addEventListener('message', function(event) {
+    let message = JSON.parse(event.data);
+    if (message.segment in recognition_text) {
+      recognition_text[message.segment] = message.text;
+    } else {
+      recognition_text.push(message.text);
+    }
+    let text_area = document.getElementById('results');
+    text_area.value = getDisplayResult();
+    text_area.scrollTop = text_area.scrollHeight;  // auto scroll
+    console.log('Received message: ', event.data);
+  });
+}
+
+window.onload = (event) => {
+  console.log('page is fully loaded');
+  console.log('protocol', window.location.protocol);
+  console.log('port', window.location.port);
+  if (window.location.protocol == 'https:') {
+    document.getElementById('ws-protocol').textContent = 'wss://';
+  }
+  serverIpInput.value = window.location.hostname;
+  serverPortInput.value = window.location.port;
+};
+
+const serverIpInput = document.getElementById('server-ip');
+const serverPortInput = document.getElementById('server-port');
+
+const connectBtn = document.getElementById('connect');
+const recordBtn = document.getElementById('streaming_record');
+const stopBtn = document.getElementById('streaming_stop');
+const clearBtn = document.getElementById('clear');
+const soundClips = document.getElementById('sound-clips');
+const canvas = document.getElementById('canvas');
+const mainSection = document.querySelector('.container');
+
+stopBtn.disabled = true;
+
+let audioCtx;
+const canvasCtx = canvas.getContext('2d');
+let mediaStream;
+let analyser;
+
+let expectedSampleRate = 16000;
+let recordSampleRate;  // the sampleRate of the microphone
+let recorder = null;   // the microphone
+let leftchannel = [];  // TODO: Use a single channel
+
+let recordingLength = 0;  // number of samples so far
+
+clearBtn.onclick = function() {
+  document.getElementById('results').value = '';
+  recognition_text = [];
+};
+
+connectBtn.onclick = function() {
+  initWebSocket();
+};
+
+// copied/modified from https://mdn.github.io/web-dictaphone/
+// and
+// https://gist.github.com/meziantou/edb7217fddfbb70e899e
+if (navigator.mediaDevices.getUserMedia) {
+  console.log('getUserMedia supported.');
+
+  // see https://w3c.github.io/mediacapture-main/#dom-mediadevices-getusermedia
+  const constraints = {audio: true};
+
+  let onSuccess = function(stream) {
+    if (!audioCtx) {
+      audioCtx = new AudioContext();
+    }
+    console.log(audioCtx);
+    recordSampleRate = audioCtx.sampleRate;
+    console.log('sample rate ' + recordSampleRate);
+
+    // creates an audio node from the microphone incoming stream
+    mediaStream = audioCtx.createMediaStreamSource(stream);
+    console.log(mediaStream);
+
+    // https://developer.mozilla.org/en-US/docs/Web/API/AudioContext/createScriptProcessor
+    // bufferSize: the onaudioprocess event is called when the buffer is full
+    var bufferSize = 2048;
+    var numberOfInputChannels = 2;
+    var numberOfOutputChannels = 2;
+    if (audioCtx.createScriptProcessor) {
+      recorder = audioCtx.createScriptProcessor(
+          bufferSize, numberOfInputChannels, numberOfOutputChannels);
+    } else {
+      recorder = audioCtx.createJavaScriptNode(
+          bufferSize, numberOfInputChannels, numberOfOutputChannels);
+    }
+    console.log(recorder);
+
+    recorder.onaudioprocess = function(e) {
+      let samples = new Float32Array(e.inputBuffer.getChannelData(0))
+      samples = downsampleBuffer(samples, expectedSampleRate);
+
+      let buf = new Int16Array(samples.length);
+      for (var i = 0; i < samples.length; ++i) {
+        let s = samples[i];
+        if (s >= 1)
+          s = 1;
+        else if (s <= -1)
+          s = -1;
+
+        samples[i] = s;
+        buf[i] = s * 32767;
+      }
+
+      socket.send(samples);
+
+      leftchannel.push(buf);
+      recordingLength += bufferSize;
+    };
+
+    visualize(stream);
+    mediaStream.connect(analyser);
+
+    recordBtn.onclick = function() {
+      mediaStream.connect(recorder);
+      mediaStream.connect(analyser);
+      recorder.connect(audioCtx.destination);
+
+      console.log('recorder started');
+      recordBtn.style.background = 'red';
+
+      stopBtn.disabled = false;
+      recordBtn.disabled = true;
+    };
+
+    stopBtn.onclick = function() {
+      console.log('recorder stopped');
+
+      socket.send('Done');
+      console.log('Sent Done');
+
+      socket.close();
+
+      // stopBtn recording
+      recorder.disconnect(audioCtx.destination);
+      mediaStream.disconnect(recorder);
+      mediaStream.disconnect(analyser);
+
+      recordBtn.style.background = '';
+      recordBtn.style.color = '';
+      // mediaRecorder.requestData();
+
+      stopBtn.disabled = true;
+      recordBtn.disabled = false;
+
+      const clipName =
+          prompt('Enter a name for your sound clip?', 'My unnamed clip');
+
+      const clipContainer = document.createElement('article');
+      const clipLabel = document.createElement('p');
+      const audio = document.createElement('audio');
+      const deleteButton = document.createElement('button');
+      clipContainer.classList.add('clip');
+      audio.setAttribute('controls', '');
+      deleteButton.textContent = 'Delete';
+      deleteButton.className = 'delete';
+
+      if (clipName === null) {
+        clipLabel.textContent = 'My unnamed clip';
+      } else {
+        clipLabel.textContent = clipName;
+      }
+
+      clipContainer.appendChild(audio);
+
+      clipContainer.appendChild(clipLabel);
+      clipContainer.appendChild(deleteButton);
+      soundClips.appendChild(clipContainer);
+
+      audio.controls = true;
+      let samples = flatten(leftchannel);
+      const blob = toWav(samples);
+
+      leftchannel = [];
+      const audioURL = window.URL.createObjectURL(blob);
+      audio.src = audioURL;
+      console.log('recorder stopped');
+
+      deleteButton.onclick = function(e) {
+        let evtTgt = e.target;
+        evtTgt.parentNode.parentNode.removeChild(evtTgt.parentNode);
+      };
+
+      clipLabel.onclick = function() {
+        const existingName = clipLabel.textContent;
+        const newClipName = prompt('Enter a new name for your sound clip?');
+        if (newClipName === null) {
+          clipLabel.textContent = existingName;
+        } else {
+          clipLabel.textContent = newClipName;
+        }
+      };
+    };
+  };
+
+  let onError = function(err) {
+    console.log('The following error occured: ' + err);
+  };
+
+  navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError);
+} else {
+  console.log('getUserMedia not supported on your browser!');
+  alert('getUserMedia not supported on your browser!');
+}
+
+function visualize(stream) {
+  if (!audioCtx) {
+    audioCtx = new AudioContext();
+  }
+
+  const source = audioCtx.createMediaStreamSource(stream);
+
+  if (!analyser) {
+    analyser = audioCtx.createAnalyser();
+    analyser.fftSize = 2048;
+  }
+  const bufferLength = analyser.frequencyBinCount;
+  const dataArray = new Uint8Array(bufferLength);
+
+  // source.connect(analyser);
+  // analyser.connect(audioCtx.destination);
+
+  draw()
+
+  function draw() {
+    const WIDTH = canvas.width
+    const HEIGHT = canvas.height;
+
+    requestAnimationFrame(draw);
+
+    analyser.getByteTimeDomainData(dataArray);
+
+    canvasCtx.fillStyle = 'rgb(200, 200, 200)';
+    canvasCtx.fillRect(0, 0, WIDTH, HEIGHT);
+
+    canvasCtx.lineWidth = 2;
+    canvasCtx.strokeStyle = 'rgb(0, 0, 0)';
+
+    canvasCtx.beginPath();
+
+    let sliceWidth = WIDTH * 1.0 / bufferLength;
+    let x = 0;
+
+    for (let i = 0; i < bufferLength; i++) {
+      let v = dataArray[i] / 128.0;
+      let y = v * HEIGHT / 2;
+
+      if (i === 0) {
+        canvasCtx.moveTo(x, y);
+      } else {
+        canvasCtx.lineTo(x, y);
+      }
+
+      x += sliceWidth;
+    }
+
+    canvasCtx.lineTo(canvas.width, canvas.height / 2);
+    canvasCtx.stroke();
+  }
+}
+
+window.onresize = function() {
+  canvas.width = mainSection.offsetWidth;
+};
+
+window.onresize();
+
+// this function is copied/modified from
+// https://gist.github.com/meziantou/edb7217fddfbb70e899e
+function flatten(listOfSamples) {
+  let n = 0;
+  for (let i = 0; i < listOfSamples.length; ++i) {
+    n += listOfSamples[i].length;
+  }
+  let ans = new Int16Array(n);
+
+  let offset = 0;
+  for (let i = 0; i < listOfSamples.length; ++i) {
+    ans.set(listOfSamples[i], offset);
+    offset += listOfSamples[i].length;
+  }
+  return ans;
+}
+
+// this function is copied/modified from
+// https://gist.github.com/meziantou/edb7217fddfbb70e899e
+function toWav(samples) {
+  let buf = new ArrayBuffer(44 + samples.length * 2);
+  var view = new DataView(buf);
+
+  // http://soundfile.sapp.org/doc/WaveFormat/
+  //                   F F I R
+  view.setUint32(0, 0x46464952, true);               // chunkID
+  view.setUint32(4, 36 + samples.length * 2, true);  // chunkSize
+  //                   E V A W
+  view.setUint32(8, 0x45564157, true);  // format
+                                        //
+  //                      t m f
+  view.setUint32(12, 0x20746d66, true);          // subchunk1ID
+  view.setUint32(16, 16, true);                  // subchunk1Size, 16 for PCM
+  view.setUint32(20, 1, true);                   // audioFormat, 1 for PCM
+  view.setUint16(22, 1, true);                   // numChannels: 1 channel
+  view.setUint32(24, expectedSampleRate, true);  // sampleRate
+  view.setUint32(28, expectedSampleRate * 2, true);  // byteRate
+  view.setUint16(32, 2, true);                       // blockAlign
+  view.setUint16(34, 16, true);                      // bitsPerSample
+  view.setUint32(36, 0x61746164, true);              // Subchunk2ID
+  view.setUint32(40, samples.length * 2, true);      // subchunk2Size
+
+  let offset = 44;
+  for (let i = 0; i < samples.length; ++i) {
+    view.setInt16(offset, samples[i], true);
+    offset += 2;
+  }
+
+  return new Blob([view], {type: 'audio/wav'});
+}
+
+// this function is copied from
+// https://github.com/awslabs/aws-lex-browser-audio-capture/blob/master/lib/worker.js#L46
+function downsampleBuffer(buffer, exportSampleRate) {
+  if (exportSampleRate === recordSampleRate) {
+    return buffer;
+  }
+  var sampleRateRatio = recordSampleRate / exportSampleRate;
+  var newLength = Math.round(buffer.length / sampleRateRatio);
+  var result = new Float32Array(newLength);
+  var offsetResult = 0;
+  var offsetBuffer = 0;
+  while (offsetResult < result.length) {
+    var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
+    var accum = 0, count = 0;
+    for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
+      accum += buffer[i];
+      count++;
+    }
+    result[offsetResult] = accum / count;
+    offsetResult++;
+    offsetBuffer = nextOffsetBuffer;
+  }
+  return result;
+};
--- a/python-api-examples/web/js/upload.js
+++ b/python-api-examples/web/js/upload.js
@@ -0,0 +1,136 @@
+/**
+References
+https://developer.mozilla.org/en-US/docs/Web/API/FileList
+https://developer.mozilla.org/en-US/docs/Web/API/FileReader
+https://javascript.info/arraybuffer-binary-arrays
+https://developer.mozilla.org/zh-CN/docs/Web/API/WebSocket
+https://developer.mozilla.org/en-US/docs/Web/API/WebSocket/send
+*/
+
+var socket;
+
+const serverIpInput = document.getElementById('server-ip');
+const serverPortInput = document.getElementById('server-port');
+
+const connectBtn = document.getElementById('connect');
+const uploadBtn = document.getElementById('file');
+
+function initWebSocket() {
+  let protocol = 'ws://';
+  if (window.location.protocol == 'https:') {
+    protocol = 'wss://'
+  }
+  let server_ip = serverIpInput.value;
+  let server_port = serverPortInput.value;
+  console.log('protocol: ', protocol);
+  console.log('server_ip: ', server_ip);
+  console.log('server_port: ', server_port);
+
+
+  let uri = protocol + server_ip + ':' + server_port;
+  console.log('uri', uri);
+  socket = new WebSocket(uri);
+
+  // Connection opened
+  socket.addEventListener('open', function(event) {
+    console.log('connected');
+    uploadBtn.disabled = false;
+    connectBtn.disabled = true;
+    connectBtn.innerHTML = 'Connected!';
+  });
+
+  // Connection closed
+  socket.addEventListener('close', function(event) {
+    console.log('disconnected');
+    uploadBtn.disabled = true;
+    connectBtn.disabled = false;
+    connectBtn.innerHTML = 'Click me to connect!';
+  });
+
+  // Listen for messages
+  socket.addEventListener('message', function(event) {
+    console.log('Received message: ', event.data);
+
+    document.getElementById('results').value = event.data;
+    socket.send('Done');
+    console.log('Sent Done');
+    socket.close();
+  });
+}
+
+window.onload = (event) => {
+  console.log('page is fully loaded');
+  console.log('protocol', window.location.protocol);
+  console.log('port', window.location.port);
+  if (window.location.protocol == 'https:') {
+    document.getElementById('ws-protocol').textContent = 'wss://';
+  }
+  serverIpInput.value = window.location.hostname;
+  serverPortInput.value = window.location.port;
+};
+
+connectBtn.onclick = function() {
+  initWebSocket();
+};
+
+function send_header(n) {
+  const header = new ArrayBuffer(4);
+  new DataView(header).setInt32(0, n, true /* littleEndian */);
+  socket.send(new Int32Array(header, 0, 1));
+}
+
+function onFileChange() {
+  var files = document.getElementById('file').files;
+
+  if (files.length == 0) {
+    console.log('No file selected');
+    return;
+  }
+
+  console.log('files: ' + files);
+
+  const file = files[0];
+  console.log(file);
+  console.log('file.name ' + file.name);
+  console.log('file.type ' + file.type);
+  console.log('file.size ' + file.size);
+
+  let reader = new FileReader();
+  reader.onload = function() {
+    console.log('reading file!');
+    let view = new Int16Array(reader.result);
+    // we assume the input file is a wav file.
+    // TODO: add some checks here.
+    let int16_samples = view.subarray(22);  // header has 44 bytes == 22 shorts
+    let num_samples = int16_samples.length;
+    let float32_samples = new Float32Array(num_samples);
+    console.log('num_samples ' + num_samples)
+
+    for (let i = 0; i < num_samples; ++i) {
+      float32_samples[i] = int16_samples[i] / 32768.
+    }
+
+    // Send 1024 audio samples per request.
+    //
+    // It has two purposes:
+    //  (1) Simulate streaming
+    //  (2) There is a limit on the number of bytes in the payload that can be
+    //      sent by websocket, which is 1MB, I think. We can send a large
+    //      audio file for decoding in this approach.
+    let buf = float32_samples.buffer
+    let n = 1024 * 4;  // send this number of bytes per request.
+    console.log('buf length, ' + buf.byteLength);
+    send_header(buf.byteLength);
+    for (let start = 0; start < buf.byteLength; start += n) {
+      socket.send(buf.slice(start, start + n));
+    }
+  };
+
+  reader.readAsArrayBuffer(file);
+}
+
+const clearBtn = document.getElementById('clear');
+clearBtn.onclick = function() {
+  console.log('clicked');
+  document.getElementById('results').value = '';
+};
--- a/python-api-examples/web/k2-logo.png
+++ b/python-api-examples/web/k2-logo.png
--- a/python-api-examples/web/nav-partial.html
+++ b/python-api-examples/web/nav-partial.html
@@ -0,0 +1,26 @@
+  <nav class="navbar navbar-expand-lg navbar-light bg-light">
+    <a class="navbar-brand" href="index.html">Next-gen Kaldi demo</a>
+      <button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarSupportedContent" aria-controls="navbarSupportedContent" aria-expanded="false" aria-label="Toggle navigation">
+        <span class="navbar-toggler-icon"></span>
+      </button>
+    <div class="collapse navbar-collapse" id="navbarSupportedContent">
+      <ul class="navbar-nav mr-auto">
+        <li class="nav-item active">
+          <a class="nav-link" href="index.html">Home <span class="sr-only">(current)</span></a>
+        </li>
+
+        <li class="nav-item">
+          <a class="nav-link" href="upload.html">Upload</a>
+        </li>
+
+        <li class="nav-item">
+          <a class="nav-link" href="streaming_record.html">Streaming-Record</a>
+        </li>
+
+        <li class="nav-item">
+          <a class="nav-link" href="offline_record.html">Offline-Record</a>
+        </li>
+
+      </ul>
+    </div>
+  </nav>
--- a/python-api-examples/web/offline_record.html
+++ b/python-api-examples/web/offline_record.html
@@ -0,0 +1,81 @@
+<!doctype html>
+<html lang="en">
+<head>
+  <!-- Required meta tags -->
+  <meta charset="utf-8"></meta>
+  <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"></meta>
+
+  <!-- Bootstrap CSS -->
+  <link rel="stylesheet"
+        href="./css/bootstrap.min.css"
+        integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T"
+        crossorigin="anonymous">
+  </link>
+
+  <script src="./js/jquery-3.6.0.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script>
+
+  <title>Next-gen Kaldi demo (Upload file for recognition)</title>
+</head>
+
+
+<body>
+  <div id="nav"></div>
+  <script>
+    $(function(){
+      $("#nav").load("nav-partial.html");
+    });
+  </script>
+
+  <h3>Recognition from offline recordings</h3>
+  <div class="container">
+    <div class="input-group mb-1">
+      <div class="input-group-prepend">
+        <button class="btn btn-block btn-primary" type="button" id="connect">Click me to connect</button>
+      </div>
+      <span class="input-group-text" id="ws-protocol">ws://</span>
+      <input type="text" id="server-ip" class="form-control" placeholder="Sherpa server IP, e.g., localhost" aria-label="sherpa server IP">
+      <span class="input-group-text">:</span>
+      <input type="text" id="server-port" class="form-control" placeholder="Sherpa server port, e.g., 6006" aria-label="sherpa server port">
+    </div>
+
+    <div class="row">
+       <div class="col-12">
+        <canvas id="canvas" height="60px" display="block" margin-bottom="0.5rem"></canvas>
+      </div>
+    </div>
+    <div class="row">
+       <div class="col">
+        <button class="btn btn-primary btn-block" id="offline_record">Offline-Record</button>
+       </div>
+       <div class="col">
+        <button class="btn btn-primary btn-block" id="offline_stop">Offline-Stop</button>
+       </div>
+    </div>
+  </div>
+
+  <div class="mb-3">
+    <label for="results" class="form-label">Recognition results</label>
+    <textarea class="form-control" id="results" rows="8"></textarea>
+  </div>
+
+  <button class="btn btn-primary btn-block" id="clear">Clear results</button>
+
+  <section flex="1" overflow="auto" id="sound-clips">
+  </section>
+
+
+  <!-- Optional JavaScript -->
+  <!-- jQuery first, then Popper.js, then Bootstrap JS -->
+  <script src="./js/popper.min.js"
+          integrity="sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1"
+          crossorigin="anonymous">
+  </script>
+
+  <script src="./js/bootstrap.min.js"
+          integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM"
+          crossorigin="anonymous">
+  </script>
+
+  <script src="./js/offline_record.js"> </script>
+</body>
+</html>
--- a/python-api-examples/web/pic/web-ui.png
+++ b/python-api-examples/web/pic/web-ui.png
--- a/python-api-examples/web/start-https-server.py
+++ b/python-api-examples/web/start-https-server.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python3
+
+# Code in this file is modified from
+# https://stackoverflow.com/questions/19705785/python-3-simple-https-server
+
+import argparse
+import http.server
+import ssl
+import sys
+from pathlib import Path
+
+"""
+Usage:
+
+  ./start-https-server.py \
+    --server-address 0.0.0.0 \
+    --server-port 6007 \
+    --cert ./cert.pem
+"""
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--server-address",
+        type=str,
+        default="0.0.0.0",
+        help="""IP address which this server will bind to""",
+    )
+
+    parser.add_argument(
+        "--server-port",
+        type=int,
+        default=6007,
+        help="""Port number on which this server will listen""",
+    )
+
+    parser.add_argument(
+        "--certificate",
+        type=str,
+        default="cert.pem",
+        help="""Path to the X.509 certificate. You can use
+        ./generate-certificate.py to generate it""",
+    )
+
+    return parser.parse_args()
+
+
+def main():
+    args = get_args()
+    print(f"{vars(args)}")
+    server_address = (args.server_address, args.server_port)
+    httpd = http.server.HTTPServer(
+        server_address, http.server.SimpleHTTPRequestHandler
+    )
+
+    if not Path(args.certificate).is_file():
+        print("Please run ./generate-certificate.py to generate a certificate")
+        sys.exit(-1)
+
+    httpd.socket = ssl.wrap_socket(
+        httpd.socket,
+        server_side=True,
+        certfile=args.certificate,
+        ssl_version=ssl.PROTOCOL_TLS,
+    )
+    print(
+        "The server is listening at the following address:\n"
+        f"https://{args.server_address}:{args.server_port}"
+    )
+    httpd.serve_forever()
+
+
+if __name__ == "__main__":
+    main()
--- a/python-api-examples/web/streaming_record.html
+++ b/python-api-examples/web/streaming_record.html
@@ -0,0 +1,81 @@
+<!doctype html>
+<html lang="en">
+<head>
+  <!-- Required meta tags -->
+  <meta charset="utf-8"></meta>
+  <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"></meta>
+
+  <!-- Bootstrap CSS -->
+  <link rel="stylesheet"
+        href="./css/bootstrap.min.css"
+        integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T"
+        crossorigin="anonymous">
+  </link>
+
+  <script src="./js/jquery-3.6.0.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script>
+
+  <title>Next-gen Kaldi demo (Upload file for recognition)</title>
+</head>
+
+
+<body>
+  <div id="nav"></div>
+  <script>
+    $(function(){
+      $("#nav").load("nav-partial.html");
+    });
+  </script>
+
+  <h3>Recognition from real-time recordings</h3>
+  <div class="container">
+    <div class="input-group mb-1">
+      <div class="input-group-prepend">
+        <button class="btn btn-block btn-primary" type="button" id="connect">Click me to connect</button>
+      </div>
+      <span class="input-group-text" id="ws-protocol">ws://</span>
+      <input type="text" id="server-ip" class="form-control" placeholder="Sherpa server IP, e.g., localhost" aria-label="sherpa server IP">
+      <span class="input-group-text">:</span>
+      <input type="text" id="server-port" class="form-control" placeholder="Sherpa server port, e.g., 6006" aria-label="sherpa server port">
+    </div>
+
+    <div class="row">
+       <div class="col-12">
+        <canvas id="canvas" height="60px" display="block" margin-bottom="0.5rem"></canvas>
+      </div>
+    </div>
+    <div class="row">
+       <div class="col">
+        <button class="btn btn-primary btn-block" id="streaming_record">Streaming-Record</button>
+       </div>
+       <div class="col">
+        <button class="btn btn-primary btn-block" id="streaming_stop">Streaming-Stop</button>
+       </div>
+    </div>
+  </div>
+
+  <div class="mb-3">
+    <label for="results" class="form-label">Recognition results</label>
+    <textarea class="form-control" id="results" rows="8"></textarea>
+  </div>
+
+  <button class="btn btn-primary btn-block" id="clear">Clear results</button>
+
+  <section flex="1" overflow="auto" id="sound-clips">
+  </section>
+
+
+  <!-- Optional JavaScript -->
+  <!-- jQuery first, then Popper.js, then Bootstrap JS -->
+  <script src="./js/popper.min.js"
+          integrity="sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1"
+          crossorigin="anonymous">
+  </script>
+
+  <script src="./js/bootstrap.min.js"
+          integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM"
+          crossorigin="anonymous">
+  </script>
+
+  <script src="./js/streaming_record.js"> </script>
+</body>
+</html>
--- a/python-api-examples/web/upload.html
+++ b/python-api-examples/web/upload.html
@@ -0,0 +1,68 @@
+<!doctype html>
+<html lang="en">
+<head>
+  <!-- Required meta tags -->
+  <meta charset="utf-8"></meta>
+  <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"></meta>
+
+  <!-- Bootstrap CSS -->
+  <link rel="stylesheet"
+        href="./css/bootstrap.min.css"
+        integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T"
+        crossorigin="anonymous">
+  </link>
+
+  <script src="./js/jquery-3.6.0.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script>
+
+  <title>Next-gen Kaldi demo (Upload file for recognition)</title>
+</head>
+
+
+<body>
+  <div id="nav"></div>
+  <script>
+    $(function(){
+      $("#nav").load("nav-partial.html");
+    });
+  </script>
+
+  <h3>Recognition from a selected file</h3>
+  <div class="input-group mb-1">
+    <div class="input-group-prepend">
+      <button class="btn btn-block btn-primary" type="button" id="connect">Click me to connect</button>
+    </div>
+    <span class="input-group-text" id="ws-protocol">ws://</span>
+    <input type="text" id="server-ip" class="form-control" placeholder="Sherpa server IP, e.g., localhost" aria-label="sherpa server IP">
+    <span class="input-group-text">:</span>
+    <input type="text" id="server-port" class="form-control" placeholder="Sherpa server port, e.g., 6006" aria-label="sherpa server port">
+  </div>
+
+  <form>
+    <div class="mb-3">
+      <label for="file" class="form-label">Select file</label>
+      <input class="form-control" type="file" id="file" accept=".wav" onchange="onFileChange()" disabled="true"></input>
+    </div>
+
+    <div class="mb-3">
+      <label for="results" class="form-label">Recognition results</label>
+      <textarea class="form-control" id="results" rows="8"></textarea>
+    </div>
+
+    <button class="btn btn-primary btn-block" id="clear">Clear results</button>
+  </form>
+
+  <!-- Optional JavaScript -->
+  <!-- jQuery first, then Popper.js, then Bootstrap JS -->
+  <script src="./js/popper.min.js"
+          integrity="sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1"
+          crossorigin="anonymous">
+  </script>
+
+  <script src="./js/bootstrap.min.js"
+          integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM"
+          crossorigin="anonymous">
+  </script>
+
+  <script src="./js/upload.js"> </script>
+</body>
+</html>
--- a/sherpa-onnx/python/sherpa_onnx/init.py
+++ b/sherpa-onnx/python/sherpa_onnx/init.py
@@ -1,4 +1,5 @@
 from _sherpa_onnx import Display

 from .online_recognizer import OnlineRecognizer
+from .online_recognizer import OnlineStream
 from .offline_recognizer import OfflineRecognizer
--- a/sherpa-onnx/python/sherpa_onnx/online_recognizer.py
+++ b/sherpa-onnx/python/sherpa_onnx/online_recognizer.py
@@ -127,6 +127,7 @@ class OnlineRecognizer(object):
        )

        self.recognizer = _Recognizer(recognizer_config)
+        self.config = recognizer_config

    def create_stream(self):
        return self.recognizer.create_stream()