add streaming-server with web client (#164)
* add streaming-server with web client * small fixes
This commit is contained in:
82
python-api-examples/http_server.py
Normal file
82
python-api-examples/http_server.py
Normal file
@@ -0,0 +1,82 @@
|
||||
# Copyright 2022 Xiaomi Corp. (authors: Fangjun Kuang)
|
||||
#
|
||||
# See ../../../LICENSE for clarification regarding multiple authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from typing import Tuple
|
||||
|
||||
# Please sort it alphabetically
|
||||
_static_files = (
|
||||
("/css/bootstrap.min.css", "text/css"),
|
||||
("/css/bootstrap.min.css.map", "text/css"),
|
||||
("/index.html", "text/html"),
|
||||
("/js/bootstrap.min.js", "application/javascript"),
|
||||
("/js/bootstrap.min.js.map", "application/javascript"),
|
||||
("/js/jquery-3.6.0.min.js", "application/javascript"),
|
||||
("/js/offline_record.js", "application/javascript"),
|
||||
("/js/offline_record.js", "application/javascript"),
|
||||
("/js/popper.min.js", "application/javascript"),
|
||||
("/js/popper.min.js.map", "application/javascript"),
|
||||
("/js/streaming_record.js", "application/javascript"),
|
||||
("/js/upload.js", "application/javascript"),
|
||||
("/k2-logo.png", "image/png"),
|
||||
("/nav-partial.html", "text/html"),
|
||||
("/offline_record.html", "text/html"),
|
||||
("/streaming_record.html", "text/html"),
|
||||
("/upload.html", "text/html"),
|
||||
)
|
||||
|
||||
_404_page = r"""
|
||||
<!doctype html><html><head>
|
||||
<title>Speech recognition with next-gen Kaldi</title><body>
|
||||
<h1>404 ERROR! Please re-check your URL</h1>
|
||||
</body></head></html>
|
||||
"""
|
||||
|
||||
|
||||
def read_file(root: str, name: str) -> str:
|
||||
try:
|
||||
with open(f"{root}/{name}") as f:
|
||||
return f.read()
|
||||
except: # noqa
|
||||
with open(f"{root}/{name}", "rb") as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
class HttpServer:
|
||||
"""
|
||||
A simple HTTP server that hosts only static files
|
||||
"""
|
||||
|
||||
def __init__(self, doc_root: str):
|
||||
content = dict()
|
||||
for f, mime_type in _static_files:
|
||||
content[f] = (read_file(doc_root, f), mime_type)
|
||||
self.content = content
|
||||
|
||||
def process_request(self, f: str) -> Tuple[str, str, str]:
|
||||
"""
|
||||
Args:
|
||||
f:
|
||||
The filename to read.
|
||||
Returns:
|
||||
Return a tuple:
|
||||
- a bool, True if the given file is found. False otherwise.
|
||||
- a str, the content of the file if found. Otherwise, it
|
||||
contains the content for the 404 page
|
||||
- a str, the MIME type of the returned content
|
||||
"""
|
||||
if f in self.content:
|
||||
return True, self.content[f][0], self.content[f][1]
|
||||
else:
|
||||
return False, _404_page, "text/html"
|
||||
@@ -119,7 +119,9 @@ def get_args():
|
||||
"--sample-rate",
|
||||
type=int,
|
||||
default=16000,
|
||||
help="Sample rate of the feature extractor. Must match the one expected by the model. Note: The input sound files can have a different sample rate from this argument.",
|
||||
help="""Sample rate of the feature extractor. Must match the one
|
||||
expected by the model. Note: The input sound files can have a
|
||||
different sample rate from this argument.""",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
|
||||
657
python-api-examples/streaming_server.py
Executable file
657
python-api-examples/streaming_server.py
Executable file
@@ -0,0 +1,657 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright 2022-2023 Xiaomi Corp.
|
||||
#
|
||||
"""
|
||||
A server for streaming ASR recognition. By streaming it means the audio samples
|
||||
are coming in real-time. You don't need to wait until all audio samples are
|
||||
captured before sending them for recognition.
|
||||
|
||||
It supports multiple clients sending at the same time.
|
||||
|
||||
Usage:
|
||||
./streaming_server.py --help
|
||||
|
||||
Example:
|
||||
|
||||
python3 ./python-api-examples/streaming_server.py \
|
||||
--encoder-model ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx \
|
||||
--decoder-model ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \
|
||||
--joiner-model ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx \
|
||||
--tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import http
|
||||
import json
|
||||
import logging
|
||||
import socket
|
||||
import ssl
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
import sherpa_onnx
|
||||
import websockets
|
||||
from http_server import HttpServer
|
||||
|
||||
|
||||
def setup_logger(
|
||||
log_filename: str,
|
||||
log_level: str = "info",
|
||||
use_console: bool = True,
|
||||
) -> None:
|
||||
"""Setup log level.
|
||||
|
||||
Args:
|
||||
log_filename:
|
||||
The filename to save the log.
|
||||
log_level:
|
||||
The log level to use, e.g., "debug", "info", "warning", "error",
|
||||
"critical"
|
||||
use_console:
|
||||
True to also print logs to console.
|
||||
"""
|
||||
now = datetime.now()
|
||||
date_time = now.strftime("%Y-%m-%d-%H-%M-%S")
|
||||
formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
|
||||
log_filename = f"{log_filename}-{date_time}.txt"
|
||||
|
||||
Path(log_filename).parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
level = logging.ERROR
|
||||
if log_level == "debug":
|
||||
level = logging.DEBUG
|
||||
elif log_level == "info":
|
||||
level = logging.INFO
|
||||
elif log_level == "warning":
|
||||
level = logging.WARNING
|
||||
elif log_level == "critical":
|
||||
level = logging.CRITICAL
|
||||
|
||||
logging.basicConfig(
|
||||
filename=log_filename,
|
||||
format=formatter,
|
||||
level=level,
|
||||
filemode="w",
|
||||
)
|
||||
if use_console:
|
||||
console = logging.StreamHandler()
|
||||
console.setLevel(level)
|
||||
console.setFormatter(logging.Formatter(formatter))
|
||||
logging.getLogger("").addHandler(console)
|
||||
|
||||
|
||||
def add_model_args(parser: argparse.ArgumentParser):
|
||||
parser.add_argument(
|
||||
"--encoder-model",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Path to the encoder model",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--decoder-model",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Path to the decoder model.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--joiner-model",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Path to the joiner model.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--tokens",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Path to tokens.txt",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--sample-rate",
|
||||
type=int,
|
||||
default=16000,
|
||||
help="Sample rate of the data used to train the model. "
|
||||
"Caution: If your input sound files have a different sampling rate, "
|
||||
"we will do resampling inside",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--feat-dim",
|
||||
type=int,
|
||||
default=80,
|
||||
help="Feature dimension of the model",
|
||||
)
|
||||
|
||||
|
||||
def add_decoding_args(parser: argparse.ArgumentParser):
|
||||
parser.add_argument(
|
||||
"--decoding-method",
|
||||
type=str,
|
||||
default="greedy_search",
|
||||
help="""Decoding method to use. Current supported methods are:
|
||||
- greedy_search
|
||||
- modified_beam_search
|
||||
""",
|
||||
)
|
||||
|
||||
add_modified_beam_search_args(parser)
|
||||
|
||||
|
||||
def add_modified_beam_search_args(parser: argparse.ArgumentParser):
|
||||
parser.add_argument(
|
||||
"--num-active-paths",
|
||||
type=int,
|
||||
default=4,
|
||||
help="""Used only when --decoding-method is modified_beam_search.
|
||||
It specifies number of active paths to keep during decoding.
|
||||
""",
|
||||
)
|
||||
|
||||
|
||||
def add_endpointing_args(parser: argparse.ArgumentParser):
|
||||
parser.add_argument(
|
||||
"--use-endpoint",
|
||||
type=int,
|
||||
default=1,
|
||||
help="1 to enable endpoiting. 0 to disable it",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--rule1-min-trailing-silence",
|
||||
type=float,
|
||||
default=2.4,
|
||||
help="""This endpointing rule1 requires duration of trailing silence
|
||||
in seconds) to be >= this value""",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--rule2-min-trailing-silence",
|
||||
type=float,
|
||||
default=1.2,
|
||||
help="""This endpointing rule2 requires duration of trailing silence in
|
||||
seconds) to be >= this value.""",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--rule3-min-utterance-length",
|
||||
type=float,
|
||||
default=20,
|
||||
help="""This endpointing rule3 requires utterance-length (in seconds)
|
||||
to be >= this value.""",
|
||||
)
|
||||
|
||||
|
||||
def get_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
)
|
||||
|
||||
add_model_args(parser)
|
||||
add_decoding_args(parser)
|
||||
add_endpointing_args(parser)
|
||||
|
||||
parser.add_argument(
|
||||
"--port",
|
||||
type=int,
|
||||
default=6006,
|
||||
help="The server will listen on this port",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--nn-pool-size",
|
||||
type=int,
|
||||
default=1,
|
||||
help="Number of threads for NN computation and decoding.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--max-batch-size",
|
||||
type=int,
|
||||
default=50,
|
||||
help="""Max batch size for computation. Note if there are not enough
|
||||
requests in the queue, it will wait for max_wait_ms time. After that,
|
||||
even if there are not enough requests, it still sends the
|
||||
available requests in the queue for computation.
|
||||
""",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--max-wait-ms",
|
||||
type=float,
|
||||
default=10,
|
||||
help="""Max time in millisecond to wait to build batches for inference.
|
||||
If there are not enough requests in the stream queue to build a batch
|
||||
of max_batch_size, it waits up to this time before fetching available
|
||||
requests for computation.
|
||||
""",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--max-message-size",
|
||||
type=int,
|
||||
default=(1 << 20),
|
||||
help="""Max message size in bytes.
|
||||
The max size per message cannot exceed this limit.
|
||||
""",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--max-queue-size",
|
||||
type=int,
|
||||
default=32,
|
||||
help="Max number of messages in the queue for each connection.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--max-active-connections",
|
||||
type=int,
|
||||
default=500,
|
||||
help="""Maximum number of active connections. The server will refuse
|
||||
to accept new connections once the current number of active connections
|
||||
equals to this limit.
|
||||
""",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--num-threads",
|
||||
type=int,
|
||||
default=1,
|
||||
help="Sets the number of threads used for interop parallelism (e.g. in JIT interpreter) on CPU.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--certificate",
|
||||
type=str,
|
||||
help="""Path to the X.509 certificate. You need it only if you want to
|
||||
use a secure websocket connection, i.e., use wss:// instead of ws://.
|
||||
You can use sherpa/bin/web/generate-certificate.py
|
||||
to generate the certificate `cert.pem`.
|
||||
""",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--doc-root",
|
||||
type=str,
|
||||
default="./python-api-examples/web",
|
||||
help="""Path to the web root""",
|
||||
)
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def create_recognizer(args) -> sherpa_onnx.OnlineRecognizer:
|
||||
recognizer = sherpa_onnx.OnlineRecognizer(
|
||||
tokens=args.tokens,
|
||||
encoder=args.encoder_model,
|
||||
decoder=args.decoder_model,
|
||||
joiner=args.joiner_model,
|
||||
num_threads=1,
|
||||
sample_rate=16000,
|
||||
feature_dim=80,
|
||||
decoding_method=args.decoding_method,
|
||||
max_active_paths=args.num_active_paths,
|
||||
enable_endpoint_detection=args.use_endpoint != 0,
|
||||
rule1_min_trailing_silence=args.rule1_min_trailing_silence,
|
||||
rule2_min_trailing_silence=args.rule2_min_trailing_silence,
|
||||
rule3_min_utterance_length=args.rule3_min_utterance_length,
|
||||
)
|
||||
|
||||
return recognizer
|
||||
|
||||
|
||||
def format_timestamps(timestamps: List[float]) -> List[str]:
|
||||
return ["{:.3f}".format(t) for t in timestamps]
|
||||
|
||||
|
||||
class StreamingServer(object):
|
||||
def __init__(
|
||||
self,
|
||||
recognizer: sherpa_onnx.OnlineRecognizer,
|
||||
nn_pool_size: int,
|
||||
max_wait_ms: float,
|
||||
max_batch_size: int,
|
||||
max_message_size: int,
|
||||
max_queue_size: int,
|
||||
max_active_connections: int,
|
||||
doc_root: str,
|
||||
certificate: Optional[str] = None,
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
recognizer:
|
||||
An instance of online recognizer.
|
||||
nn_pool_size:
|
||||
Number of threads for the thread pool that is responsible for
|
||||
neural network computation and decoding.
|
||||
max_wait_ms:
|
||||
Max wait time in milliseconds in order to build a batch of
|
||||
`batch_size`.
|
||||
max_batch_size:
|
||||
Max batch size for inference.
|
||||
max_message_size:
|
||||
Max size in bytes per message.
|
||||
max_queue_size:
|
||||
Max number of messages in the queue for each connection.
|
||||
max_active_connections:
|
||||
Max number of active connections. Once number of active client
|
||||
equals to this limit, the server refuses to accept new connections.
|
||||
beam_search_params:
|
||||
Dictionary containing all the parameters for beam search.
|
||||
online_endpoint_config:
|
||||
Config for endpointing.
|
||||
doc_root:
|
||||
Path to the directory where files like index.html for the HTTP
|
||||
server locate.
|
||||
certificate:
|
||||
Optional. If not None, it will use secure websocket.
|
||||
You can use ./sherpa/bin/web/generate-certificate.py to generate
|
||||
it (the default generated filename is `cert.pem`).
|
||||
"""
|
||||
self.recognizer = recognizer
|
||||
|
||||
self.certificate = certificate
|
||||
self.http_server = HttpServer(doc_root)
|
||||
|
||||
self.nn_pool = ThreadPoolExecutor(
|
||||
max_workers=nn_pool_size,
|
||||
thread_name_prefix="nn",
|
||||
)
|
||||
|
||||
self.stream_queue = asyncio.Queue()
|
||||
self.max_wait_ms = max_wait_ms
|
||||
self.max_batch_size = max_batch_size
|
||||
self.max_message_size = max_message_size
|
||||
self.max_queue_size = max_queue_size
|
||||
self.max_active_connections = max_active_connections
|
||||
|
||||
self.current_active_connections = 0
|
||||
|
||||
self.sample_rate = int(recognizer.config.feat_config.sampling_rate)
|
||||
self.decoding_method = recognizer.config.decoding_method
|
||||
|
||||
async def stream_consumer_task(self):
|
||||
"""This function extracts streams from the queue, batches them up, sends
|
||||
them to the RNN-T model for computation and decoding.
|
||||
"""
|
||||
while True:
|
||||
if self.stream_queue.empty():
|
||||
await asyncio.sleep(self.max_wait_ms / 1000)
|
||||
continue
|
||||
|
||||
batch = []
|
||||
try:
|
||||
while len(batch) < self.max_batch_size:
|
||||
item = self.stream_queue.get_nowait()
|
||||
|
||||
assert self.recognizer.is_ready(item[0])
|
||||
|
||||
batch.append(item)
|
||||
except asyncio.QueueEmpty:
|
||||
pass
|
||||
stream_list = [b[0] for b in batch]
|
||||
future_list = [b[1] for b in batch]
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
await loop.run_in_executor(
|
||||
self.nn_pool,
|
||||
self.recognizer.decode_streams,
|
||||
stream_list,
|
||||
)
|
||||
|
||||
for f in future_list:
|
||||
self.stream_queue.task_done()
|
||||
f.set_result(None)
|
||||
|
||||
async def compute_and_decode(
|
||||
self,
|
||||
stream: sherpa_onnx.OnlineStream,
|
||||
) -> None:
|
||||
"""Put the stream into the queue and wait it to be processed by the
|
||||
consumer task.
|
||||
|
||||
Args:
|
||||
stream:
|
||||
The stream to be processed. Note: It is changed in-place.
|
||||
"""
|
||||
loop = asyncio.get_running_loop()
|
||||
future = loop.create_future()
|
||||
await self.stream_queue.put((stream, future))
|
||||
await future
|
||||
|
||||
async def process_request(
|
||||
self,
|
||||
path: str,
|
||||
request_headers: websockets.Headers,
|
||||
) -> Optional[Tuple[http.HTTPStatus, websockets.Headers, bytes]]:
|
||||
if "sec-websocket-key" not in request_headers:
|
||||
# This is a normal HTTP request
|
||||
if path == "/":
|
||||
path = "/index.html"
|
||||
found, response, mime_type = self.http_server.process_request(path)
|
||||
if isinstance(response, str):
|
||||
response = response.encode("utf-8")
|
||||
|
||||
if not found:
|
||||
status = http.HTTPStatus.NOT_FOUND
|
||||
else:
|
||||
status = http.HTTPStatus.OK
|
||||
header = {"Content-Type": mime_type}
|
||||
return status, header, response
|
||||
|
||||
if self.current_active_connections < self.max_active_connections:
|
||||
self.current_active_connections += 1
|
||||
return None
|
||||
|
||||
# Refuse new connections
|
||||
status = http.HTTPStatus.SERVICE_UNAVAILABLE # 503
|
||||
header = {"Hint": "The server is overloaded. Please retry later."}
|
||||
response = b"The server is busy. Please retry later."
|
||||
|
||||
return status, header, response
|
||||
|
||||
async def run(self, port: int):
|
||||
task = asyncio.create_task(self.stream_consumer_task())
|
||||
|
||||
if self.certificate:
|
||||
logging.info(f"Using certificate: {self.certificate}")
|
||||
ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
||||
ssl_context.load_cert_chain(self.certificate)
|
||||
else:
|
||||
ssl_context = None
|
||||
logging.info("No certificate provided")
|
||||
|
||||
async with websockets.serve(
|
||||
self.handle_connection,
|
||||
host="",
|
||||
port=port,
|
||||
max_size=self.max_message_size,
|
||||
max_queue=self.max_queue_size,
|
||||
process_request=self.process_request,
|
||||
ssl=ssl_context,
|
||||
):
|
||||
ip_list = ["0.0.0.0", "localhost", "127.0.0.1"]
|
||||
ip_list.append(socket.gethostbyname(socket.gethostname()))
|
||||
proto = "http://" if ssl_context is None else "https://"
|
||||
s = "Please visit one of the following addresses:\n\n"
|
||||
for p in ip_list:
|
||||
s += " " + proto + p + f":{port}" "\n"
|
||||
logging.info(s)
|
||||
|
||||
await asyncio.Future() # run forever
|
||||
|
||||
await task # not reachable
|
||||
|
||||
async def handle_connection(
|
||||
self,
|
||||
socket: websockets.WebSocketServerProtocol,
|
||||
):
|
||||
"""Receive audio samples from the client, process it, and send
|
||||
decoding result back to the client.
|
||||
|
||||
Args:
|
||||
socket:
|
||||
The socket for communicating with the client.
|
||||
"""
|
||||
try:
|
||||
await self.handle_connection_impl(socket)
|
||||
except websockets.exceptions.ConnectionClosedError:
|
||||
logging.info(f"{socket.remote_address} disconnected")
|
||||
finally:
|
||||
# Decrement so that it can accept new connections
|
||||
self.current_active_connections -= 1
|
||||
|
||||
logging.info(
|
||||
f"Disconnected: {socket.remote_address}. "
|
||||
f"Number of connections: {self.current_active_connections}/{self.max_active_connections}" # noqa
|
||||
)
|
||||
|
||||
async def handle_connection_impl(
|
||||
self,
|
||||
socket: websockets.WebSocketServerProtocol,
|
||||
):
|
||||
"""Receive audio samples from the client, process it, and send
|
||||
deocoding result back to the client.
|
||||
|
||||
Args:
|
||||
socket:
|
||||
The socket for communicating with the client.
|
||||
"""
|
||||
logging.info(
|
||||
f"Connected: {socket.remote_address}. "
|
||||
f"Number of connections: {self.current_active_connections}/{self.max_active_connections}" # noqa
|
||||
)
|
||||
|
||||
stream = self.recognizer.create_stream()
|
||||
segment = 0
|
||||
|
||||
while True:
|
||||
samples = await self.recv_audio_samples(socket)
|
||||
if samples is None:
|
||||
break
|
||||
|
||||
# TODO(fangjun): At present, we assume the sampling rate
|
||||
# of the received audio samples equal to --sample-rate
|
||||
stream.accept_waveform(sample_rate=self.sample_rate, waveform=samples)
|
||||
|
||||
while self.recognizer.is_ready(stream):
|
||||
await self.compute_and_decode(stream)
|
||||
result = self.recognizer.get_result(stream)
|
||||
|
||||
message = {
|
||||
"text": result,
|
||||
"segment": segment,
|
||||
}
|
||||
if self.recognizer.is_endpoint(stream):
|
||||
self.recognizer.reset(stream)
|
||||
segment += 1
|
||||
|
||||
print(message)
|
||||
|
||||
await socket.send(json.dumps(message))
|
||||
|
||||
tail_padding = np.rand(int(self.sample_rate * 0.3), dtype=np.float32)
|
||||
stream.accept_waveform(sampling_rate=self.sample_rate, waveform=tail_padding)
|
||||
stream.input_finished()
|
||||
while self.recognizer.is_ready(stream):
|
||||
await self.compute_and_decode(stream)
|
||||
|
||||
result = self.recognizer.get_result(stream)
|
||||
|
||||
message = {
|
||||
"text": result,
|
||||
"segment": segment,
|
||||
}
|
||||
|
||||
await socket.send(json.dumps(message))
|
||||
|
||||
async def recv_audio_samples(
|
||||
self,
|
||||
socket: websockets.WebSocketServerProtocol,
|
||||
) -> Optional[np.ndarray]:
|
||||
"""Receives a tensor from the client.
|
||||
|
||||
Each message contains either a bytes buffer containing audio samples
|
||||
in 16 kHz or contains "Done" meaning the end of utterance.
|
||||
|
||||
Args:
|
||||
socket:
|
||||
The socket for communicating with the client.
|
||||
Returns:
|
||||
Return a 1-D np.float32 tensor containing the audio samples or
|
||||
return None.
|
||||
"""
|
||||
message = await socket.recv()
|
||||
if message == "Done":
|
||||
return None
|
||||
|
||||
return np.frombuffer(message, dtype=np.float32)
|
||||
|
||||
|
||||
def check_args(args):
|
||||
assert Path(args.encoder_model).is_file(), f"{args.encoder_model} does not exist"
|
||||
|
||||
assert Path(args.decoder_model).is_file(), f"{args.decoder_model} does not exist"
|
||||
|
||||
assert Path(args.joiner_model).is_file(), f"{args.joiner_model} does not exist"
|
||||
|
||||
if not Path(args.tokens).is_file():
|
||||
raise ValueError(f"{args.tokens} does not exist")
|
||||
|
||||
if args.decoding_method not in (
|
||||
"greedy_search",
|
||||
"modified_beam_search",
|
||||
):
|
||||
raise ValueError(f"Unsupported decoding method {args.decoding_method}")
|
||||
|
||||
if args.decoding_method == "modified_beam_search":
|
||||
assert args.num_active_paths > 0, args.num_active_paths
|
||||
|
||||
|
||||
def main():
|
||||
args = get_args()
|
||||
logging.info(vars(args))
|
||||
check_args(args)
|
||||
|
||||
recognizer = create_recognizer(args)
|
||||
|
||||
port = args.port
|
||||
nn_pool_size = args.nn_pool_size
|
||||
max_batch_size = args.max_batch_size
|
||||
max_wait_ms = args.max_wait_ms
|
||||
max_message_size = args.max_message_size
|
||||
max_queue_size = args.max_queue_size
|
||||
max_active_connections = args.max_active_connections
|
||||
certificate = args.certificate
|
||||
doc_root = args.doc_root
|
||||
|
||||
if certificate and not Path(certificate).is_file():
|
||||
raise ValueError(f"{certificate} does not exist")
|
||||
|
||||
if not Path(doc_root).is_dir():
|
||||
raise ValueError(f"Directory {doc_root} does not exist")
|
||||
|
||||
server = StreamingServer(
|
||||
recognizer=recognizer,
|
||||
nn_pool_size=nn_pool_size,
|
||||
max_batch_size=max_batch_size,
|
||||
max_wait_ms=max_wait_ms,
|
||||
max_message_size=max_message_size,
|
||||
max_queue_size=max_queue_size,
|
||||
max_active_connections=max_active_connections,
|
||||
certificate=certificate,
|
||||
doc_root=doc_root,
|
||||
)
|
||||
asyncio.run(server.run(port))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
log_filename = "log/log-streaming-zipformer"
|
||||
setup_logger(log_filename)
|
||||
main()
|
||||
3
python-api-examples/web/.gitignore
vendored
Normal file
3
python-api-examples/web/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
*.pem
|
||||
*.key
|
||||
*.crt
|
||||
34
python-api-examples/web/README.md
Normal file
34
python-api-examples/web/README.md
Normal file
@@ -0,0 +1,34 @@
|
||||
# How to use
|
||||
|
||||
```bash
|
||||
git clone https://github.com/k2-fsa/sherpa
|
||||
|
||||
cd sherpa/sherpa/bin/web
|
||||
python3 -m http.server 6009
|
||||
```
|
||||
and then go to <http://localhost:6009>
|
||||
|
||||
You will see a page like the following screenshot:
|
||||
|
||||

|
||||
|
||||
If your server is listening at the port *6006* with address **localhost**,
|
||||
then you can either click **Upload**, **Streaming_Record** or **Offline_Record** to play with it.
|
||||
|
||||
## File descriptions
|
||||
|
||||
### ./css/bootstrap.min.css
|
||||
|
||||
It is downloaded from https://cdn.jsdelivr.net/npm/bootstrap@4.3.1/dist/css/bootstrap.min.css
|
||||
|
||||
### ./js/jquery-3.6.0.min.js
|
||||
|
||||
It is downloaded from https://code.jquery.com/jquery-3.6.0.min.js
|
||||
|
||||
### ./js/popper.min.js
|
||||
|
||||
It is downloaded from https://cdn.jsdelivr.net/npm/popper.js@1.14.7/dist/umd/popper.min.js
|
||||
|
||||
### ./js/bootstrap.min.js
|
||||
|
||||
It is download from https://cdn.jsdelivr.net/npm/bootstrap@4.3.1/dist/js/bootstrap.min.js
|
||||
7
python-api-examples/web/css/bootstrap.min.css
vendored
Normal file
7
python-api-examples/web/css/bootstrap.min.css
vendored
Normal file
File diff suppressed because one or more lines are too long
1
python-api-examples/web/css/bootstrap.min.css.map
Normal file
1
python-api-examples/web/css/bootstrap.min.css.map
Normal file
File diff suppressed because one or more lines are too long
89
python-api-examples/web/generate-certificate.py
Executable file
89
python-api-examples/web/generate-certificate.py
Executable file
@@ -0,0 +1,89 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
pip install pyopenssl
|
||||
"""
|
||||
|
||||
from OpenSSL import crypto
|
||||
|
||||
# The code in this file is modified from
|
||||
# https://stackoverflow.com/questions/27164354/create-a-self-signed-x509-certificate-in-python
|
||||
|
||||
"""
|
||||
This script generates 3 files:
|
||||
- private.key
|
||||
- selfsigned.crt
|
||||
- cert.pem
|
||||
|
||||
You need cert.pem when you start a https server
|
||||
or a secure websocket server.
|
||||
|
||||
Note: You need to change serialNumber if you want to generate
|
||||
a new certificate as two different certificates cannot share
|
||||
the same serial number if they are issued by the same organization.
|
||||
|
||||
Otherwise, you may get the following error from within you browser:
|
||||
|
||||
An error occurred during a connection to 127.0.0.1:6007. You have received an
|
||||
invalid certificate. Please contact the server administrator or email
|
||||
correspondent and give them the following information: Your certificate
|
||||
contains the same serial number as another certificate issued by the
|
||||
certificate authority. Please get a new certificate containing a unique
|
||||
serial number. Error code: SEC_ERROR_REUSED_ISSUER_AND_SERIAL
|
||||
|
||||
"""
|
||||
|
||||
|
||||
def cert_gen(
|
||||
emailAddress="https://github.com/k2-fsa/k2",
|
||||
commonName="sherpa",
|
||||
countryName="CN",
|
||||
localityName="k2-fsa",
|
||||
stateOrProvinceName="k2-fsa",
|
||||
organizationName="k2-fsa",
|
||||
organizationUnitName="k2-fsa",
|
||||
serialNumber=3,
|
||||
validityStartInSeconds=0,
|
||||
validityEndInSeconds=10 * 365 * 24 * 60 * 60,
|
||||
KEY_FILE="private.key",
|
||||
CERT_FILE="selfsigned.crt",
|
||||
ALL_IN_ONE_FILE="cert.pem",
|
||||
):
|
||||
# can look at generated file using openssl:
|
||||
# openssl x509 -inform pem -in selfsigned.crt -noout -text
|
||||
# create a key pair
|
||||
k = crypto.PKey()
|
||||
k.generate_key(crypto.TYPE_RSA, 4096)
|
||||
# create a self-signed cert
|
||||
cert = crypto.X509()
|
||||
cert.get_subject().C = countryName
|
||||
cert.get_subject().ST = stateOrProvinceName
|
||||
cert.get_subject().L = localityName
|
||||
cert.get_subject().O = organizationName # noqa
|
||||
cert.get_subject().OU = organizationUnitName
|
||||
cert.get_subject().CN = commonName
|
||||
cert.get_subject().emailAddress = emailAddress
|
||||
cert.set_serial_number(serialNumber)
|
||||
cert.gmtime_adj_notBefore(0)
|
||||
cert.gmtime_adj_notAfter(validityEndInSeconds)
|
||||
cert.set_issuer(cert.get_subject())
|
||||
cert.set_pubkey(k)
|
||||
cert.sign(k, "sha512")
|
||||
with open(CERT_FILE, "wt") as f:
|
||||
f.write(
|
||||
crypto.dump_certificate(crypto.FILETYPE_PEM, cert).decode("utf-8")
|
||||
)
|
||||
with open(KEY_FILE, "wt") as f:
|
||||
f.write(crypto.dump_privatekey(crypto.FILETYPE_PEM, k).decode("utf-8"))
|
||||
|
||||
with open(ALL_IN_ONE_FILE, "wt") as f:
|
||||
f.write(crypto.dump_privatekey(crypto.FILETYPE_PEM, k).decode("utf-8"))
|
||||
f.write(
|
||||
crypto.dump_certificate(crypto.FILETYPE_PEM, cert).decode("utf-8")
|
||||
)
|
||||
print(f"Generated {CERT_FILE}")
|
||||
print(f"Generated {KEY_FILE}")
|
||||
print(f"Generated {ALL_IN_ONE_FILE}")
|
||||
|
||||
|
||||
cert_gen()
|
||||
71
python-api-examples/web/index.html
Normal file
71
python-api-examples/web/index.html
Normal file
@@ -0,0 +1,71 @@
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<!-- Required meta tags -->
|
||||
<meta charset="utf-8"></meta>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"></meta>
|
||||
|
||||
<!-- Bootstrap CSS -->
|
||||
<link rel="stylesheet"
|
||||
href="./css/bootstrap.min.css"
|
||||
integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T"
|
||||
crossorigin="anonymous">
|
||||
</link>
|
||||
<link rel="icon"
|
||||
type="image/png"
|
||||
href="./k2-logo.png">
|
||||
|
||||
<script src="./js/jquery-3.6.0.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script>
|
||||
|
||||
<title>Next-gen Kaldi demo</title>
|
||||
</head>
|
||||
|
||||
|
||||
<body>
|
||||
<div id="nav"></div>
|
||||
<script>
|
||||
$(function(){
|
||||
$("#nav").load("nav-partial.html");
|
||||
});
|
||||
</script>
|
||||
|
||||
<ul class="list-unstyled">
|
||||
<li class="media">
|
||||
<div class="media-body">
|
||||
<h5 class="mt-0 mb-1">Upload</h5>
|
||||
<p>Recognition from a selected file</p>
|
||||
</div>
|
||||
<li>
|
||||
|
||||
<li class="media">
|
||||
<div class="media-body">
|
||||
<h5 class="mt-0 mb-1">Streaming_Record</h5>
|
||||
<p>Recognition from real-time recordings</p>
|
||||
</div>
|
||||
</li>
|
||||
|
||||
<li class="media">
|
||||
<div class="media-body">
|
||||
<h5 class="mt-0 mb-1">Offline_Record</h5>
|
||||
<p>Recognition from offline recordings</p>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
Code is available at
|
||||
<a href="https://github.com/k2-fsa/sherpa"> https://github.com/k2-fsa/sherpa</a>
|
||||
|
||||
<!-- Optional JavaScript -->
|
||||
<!-- jQuery first, then Popper.js, then Bootstrap JS -->
|
||||
<script src="./js/popper.min.js"
|
||||
integrity="sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1"
|
||||
crossorigin="anonymous">
|
||||
</script>
|
||||
|
||||
<script src="./js/bootstrap.min.js"
|
||||
integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM"
|
||||
crossorigin="anonymous">
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
7
python-api-examples/web/js/bootstrap.min.js
vendored
Normal file
7
python-api-examples/web/js/bootstrap.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
1
python-api-examples/web/js/bootstrap.min.js.map
Normal file
1
python-api-examples/web/js/bootstrap.min.js.map
Normal file
File diff suppressed because one or more lines are too long
2
python-api-examples/web/js/jquery-3.6.0.min.js
vendored
Normal file
2
python-api-examples/web/js/jquery-3.6.0.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
395
python-api-examples/web/js/offline_record.js
Normal file
395
python-api-examples/web/js/offline_record.js
Normal file
@@ -0,0 +1,395 @@
|
||||
// This file copies and modifies code
|
||||
// from https://mdn.github.io/web-dictaphone/scripts/app.js
|
||||
// and https://gist.github.com/meziantou/edb7217fddfbb70e899e
|
||||
|
||||
var socket;
|
||||
|
||||
const serverIpInput = document.getElementById('server-ip');
|
||||
const serverPortInput = document.getElementById('server-port');
|
||||
|
||||
const connectBtn = document.getElementById('connect');
|
||||
const uploadBtn = document.getElementById('file');
|
||||
|
||||
function initWebSocket() {
|
||||
let protocol = 'ws://';
|
||||
if (window.location.protocol == 'https:') {
|
||||
protocol = 'wss://'
|
||||
}
|
||||
let server_ip = serverIpInput.value;
|
||||
let server_port = serverPortInput.value;
|
||||
console.log('protocol: ', protocol);
|
||||
console.log('server_ip: ', server_ip);
|
||||
console.log('server_port: ', server_port);
|
||||
|
||||
let uri = protocol + server_ip + ':' + server_port;
|
||||
console.log('uri', uri);
|
||||
socket = new WebSocket(uri);
|
||||
|
||||
// Connection opened
|
||||
socket.addEventListener('open', function(event) {
|
||||
console.log('connected');
|
||||
recordBtn.disabled = false;
|
||||
connectBtn.disabled = true;
|
||||
connectBtn.innerHTML = 'Connected!';
|
||||
});
|
||||
|
||||
// Connection closed
|
||||
socket.addEventListener('close', function(event) {
|
||||
console.log('disconnected');
|
||||
recordBtn.disabled = true;
|
||||
stopBtn.disabled = true;
|
||||
connectBtn.disabled = false;
|
||||
connectBtn.innerHTML = 'Click me to connect!';
|
||||
});
|
||||
|
||||
// Listen for messages
|
||||
socket.addEventListener('message', function(event) {
|
||||
console.log('Received message: ', event.data);
|
||||
|
||||
document.getElementById('results').value = event.data;
|
||||
socket.send('Done');
|
||||
console.log('Sent Done');
|
||||
socket.close();
|
||||
});
|
||||
}
|
||||
|
||||
const recordBtn = document.getElementById('offline_record');
|
||||
const stopBtn = document.getElementById('offline_stop');
|
||||
const clearBtn = document.getElementById('clear');
|
||||
const soundClips = document.getElementById('sound-clips');
|
||||
const canvas = document.getElementById('canvas');
|
||||
const mainSection = document.querySelector('.container');
|
||||
|
||||
stopBtn.disabled = true;
|
||||
|
||||
window.onload = (event) => {
|
||||
console.log('page is fully loaded');
|
||||
console.log('protocol', window.location.protocol);
|
||||
console.log('port', window.location.port);
|
||||
if (window.location.protocol == 'https:') {
|
||||
document.getElementById('ws-protocol').textContent = 'wss://';
|
||||
}
|
||||
serverIpInput.value = window.location.hostname;
|
||||
serverPortInput.value = window.location.port;
|
||||
};
|
||||
|
||||
connectBtn.onclick = function() {
|
||||
initWebSocket();
|
||||
};
|
||||
|
||||
|
||||
let audioCtx;
|
||||
const canvasCtx = canvas.getContext('2d');
|
||||
let mediaStream;
|
||||
let analyser;
|
||||
|
||||
let expectedSampleRate = 16000;
|
||||
let recordSampleRate; // the sampleRate of the microphone
|
||||
let recorder = null; // the microphone
|
||||
let leftchannel = []; // TODO: Use a single channel
|
||||
|
||||
let recordingLength = 0; // number of samples so far
|
||||
|
||||
clearBtn.onclick = function() {
|
||||
document.getElementById('results').value = '';
|
||||
};
|
||||
|
||||
function send_header(n) {
|
||||
const header = new ArrayBuffer(4);
|
||||
new DataView(header).setInt32(0, n, true /* littleEndian */);
|
||||
socket.send(new Int32Array(header, 0, 1));
|
||||
}
|
||||
|
||||
// copied/modified from https://mdn.github.io/web-dictaphone/
|
||||
// and
|
||||
// https://gist.github.com/meziantou/edb7217fddfbb70e899e
|
||||
if (navigator.mediaDevices.getUserMedia) {
|
||||
console.log('getUserMedia supported.');
|
||||
|
||||
// see https://w3c.github.io/mediacapture-main/#dom-mediadevices-getusermedia
|
||||
const constraints = {audio: true};
|
||||
|
||||
let onSuccess = function(stream) {
|
||||
if (!audioCtx) {
|
||||
audioCtx = new AudioContext();
|
||||
}
|
||||
console.log(audioCtx);
|
||||
recordSampleRate = audioCtx.sampleRate;
|
||||
console.log('sample rate ' + recordSampleRate);
|
||||
|
||||
// creates an audio node from the microphone incoming stream
|
||||
mediaStream = audioCtx.createMediaStreamSource(stream);
|
||||
console.log(mediaStream);
|
||||
|
||||
// https://developer.mozilla.org/en-US/docs/Web/API/AudioContext/createScriptProcessor
|
||||
// bufferSize: the onaudioprocess event is called when the buffer is full
|
||||
var bufferSize = 2048;
|
||||
var numberOfInputChannels = 2;
|
||||
var numberOfOutputChannels = 2;
|
||||
if (audioCtx.createScriptProcessor) {
|
||||
recorder = audioCtx.createScriptProcessor(
|
||||
bufferSize, numberOfInputChannels, numberOfOutputChannels);
|
||||
} else {
|
||||
recorder = audioCtx.createJavaScriptNode(
|
||||
bufferSize, numberOfInputChannels, numberOfOutputChannels);
|
||||
}
|
||||
console.log(recorder);
|
||||
|
||||
recorder.onaudioprocess = function(e) {
|
||||
let samples = new Float32Array(e.inputBuffer.getChannelData(0))
|
||||
samples = downsampleBuffer(samples, expectedSampleRate);
|
||||
let buf = new Int16Array(samples.length);
|
||||
for (var i = 0; i < samples.length; ++i) {
|
||||
let s = samples[i];
|
||||
if (s >= 1)
|
||||
s = 1;
|
||||
else if (s <= -1)
|
||||
s = -1;
|
||||
buf[i] = s * 32767;
|
||||
}
|
||||
leftchannel.push(buf);
|
||||
recordingLength += bufferSize;
|
||||
};
|
||||
|
||||
visualize(stream);
|
||||
mediaStream.connect(analyser);
|
||||
|
||||
recordBtn.onclick = function() {
|
||||
mediaStream.connect(recorder);
|
||||
mediaStream.connect(analyser);
|
||||
recorder.connect(audioCtx.destination);
|
||||
|
||||
console.log('recorder started');
|
||||
recordBtn.style.background = 'red';
|
||||
|
||||
stopBtn.disabled = false;
|
||||
recordBtn.disabled = true;
|
||||
};
|
||||
|
||||
stopBtn.onclick = function() {
|
||||
console.log('recorder stopped');
|
||||
|
||||
// stopBtn recording
|
||||
recorder.disconnect(audioCtx.destination);
|
||||
mediaStream.disconnect(recorder);
|
||||
mediaStream.disconnect(analyser);
|
||||
|
||||
recordBtn.style.background = '';
|
||||
recordBtn.style.color = '';
|
||||
// mediaRecorder.requestData();
|
||||
|
||||
stopBtn.disabled = true;
|
||||
recordBtn.disabled = false;
|
||||
|
||||
const clipName =
|
||||
prompt('Enter a name for your sound clip?', 'My unnamed clip');
|
||||
|
||||
const clipContainer = document.createElement('article');
|
||||
const clipLabel = document.createElement('p');
|
||||
const audio = document.createElement('audio');
|
||||
const deleteButton = document.createElement('button');
|
||||
clipContainer.classList.add('clip');
|
||||
audio.setAttribute('controls', '');
|
||||
deleteButton.textContent = 'Delete';
|
||||
deleteButton.className = 'delete';
|
||||
|
||||
if (clipName === null) {
|
||||
clipLabel.textContent = 'My unnamed clip';
|
||||
} else {
|
||||
clipLabel.textContent = clipName;
|
||||
}
|
||||
|
||||
clipContainer.appendChild(audio);
|
||||
|
||||
clipContainer.appendChild(clipLabel);
|
||||
clipContainer.appendChild(deleteButton);
|
||||
soundClips.appendChild(clipContainer);
|
||||
|
||||
audio.controls = true;
|
||||
let samples = flatten(leftchannel);
|
||||
let buf = new Float32Array(samples.length);
|
||||
for (var i = 0; i < samples.length; ++i) {
|
||||
let s = samples[i];
|
||||
buf[i] = s / 32767.0;
|
||||
}
|
||||
const blob = toWav(samples);
|
||||
|
||||
leftchannel = [];
|
||||
const audioURL = window.URL.createObjectURL(blob);
|
||||
audio.src = audioURL;
|
||||
console.log('recorder stopped');
|
||||
|
||||
deleteButton.onclick = function(e) {
|
||||
let evtTgt = e.target;
|
||||
evtTgt.parentNode.parentNode.removeChild(evtTgt.parentNode);
|
||||
};
|
||||
|
||||
clipLabel.onclick = function() {
|
||||
const existingName = clipLabel.textContent;
|
||||
const newClipName = prompt('Enter a new name for your sound clip?');
|
||||
if (newClipName === null) {
|
||||
clipLabel.textContent = existingName;
|
||||
} else {
|
||||
clipLabel.textContent = newClipName;
|
||||
}
|
||||
};
|
||||
|
||||
buf = buf.buffer
|
||||
|
||||
let n = 1024 * 4; // send this number of bytes per request.
|
||||
console.log('buf length, ' + buf.byteLength);
|
||||
send_header(buf.byteLength);
|
||||
|
||||
for (let start = 0; start < buf.byteLength; start += n) {
|
||||
socket.send(buf.slice(start, start + n));
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
let onError = function(err) {
|
||||
console.log('The following error occured: ' + err);
|
||||
};
|
||||
|
||||
navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError);
|
||||
} else {
|
||||
console.log('getUserMedia not supported on your browser!');
|
||||
alert('getUserMedia not supported on your browser!');
|
||||
}
|
||||
|
||||
function visualize(stream) {
|
||||
if (!audioCtx) {
|
||||
audioCtx = new AudioContext();
|
||||
}
|
||||
|
||||
const source = audioCtx.createMediaStreamSource(stream);
|
||||
|
||||
if (!analyser) {
|
||||
analyser = audioCtx.createAnalyser();
|
||||
analyser.fftSize = 2048;
|
||||
}
|
||||
const bufferLength = analyser.frequencyBinCount;
|
||||
const dataArray = new Uint8Array(bufferLength);
|
||||
|
||||
// source.connect(analyser);
|
||||
// analyser.connect(audioCtx.destination);
|
||||
|
||||
draw()
|
||||
|
||||
function draw() {
|
||||
const WIDTH = canvas.width
|
||||
const HEIGHT = canvas.height;
|
||||
|
||||
requestAnimationFrame(draw);
|
||||
|
||||
analyser.getByteTimeDomainData(dataArray);
|
||||
|
||||
canvasCtx.fillStyle = 'rgb(200, 200, 200)';
|
||||
canvasCtx.fillRect(0, 0, WIDTH, HEIGHT);
|
||||
|
||||
canvasCtx.lineWidth = 2;
|
||||
canvasCtx.strokeStyle = 'rgb(0, 0, 0)';
|
||||
|
||||
canvasCtx.beginPath();
|
||||
|
||||
let sliceWidth = WIDTH * 1.0 / bufferLength;
|
||||
let x = 0;
|
||||
|
||||
for (let i = 0; i < bufferLength; i++) {
|
||||
let v = dataArray[i] / 128.0;
|
||||
let y = v * HEIGHT / 2;
|
||||
|
||||
if (i === 0) {
|
||||
canvasCtx.moveTo(x, y);
|
||||
} else {
|
||||
canvasCtx.lineTo(x, y);
|
||||
}
|
||||
|
||||
x += sliceWidth;
|
||||
}
|
||||
|
||||
canvasCtx.lineTo(canvas.width, canvas.height / 2);
|
||||
canvasCtx.stroke();
|
||||
}
|
||||
}
|
||||
|
||||
window.onresize = function() {
|
||||
canvas.width = mainSection.offsetWidth;
|
||||
};
|
||||
|
||||
window.onresize();
|
||||
|
||||
// this function is copied/modified from
|
||||
// https://gist.github.com/meziantou/edb7217fddfbb70e899e
|
||||
function flatten(listOfSamples) {
|
||||
let n = 0;
|
||||
for (let i = 0; i < listOfSamples.length; ++i) {
|
||||
n += listOfSamples[i].length;
|
||||
}
|
||||
let ans = new Int16Array(n);
|
||||
|
||||
let offset = 0;
|
||||
for (let i = 0; i < listOfSamples.length; ++i) {
|
||||
ans.set(listOfSamples[i], offset);
|
||||
offset += listOfSamples[i].length;
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
|
||||
// this function is copied/modified from
|
||||
// https://gist.github.com/meziantou/edb7217fddfbb70e899e
|
||||
function toWav(samples) {
|
||||
let buf = new ArrayBuffer(44 + samples.length * 2);
|
||||
var view = new DataView(buf);
|
||||
|
||||
// http://soundfile.sapp.org/doc/WaveFormat/
|
||||
// F F I R
|
||||
view.setUint32(0, 0x46464952, true); // chunkID
|
||||
view.setUint32(4, 36 + samples.length * 2, true); // chunkSize
|
||||
// E V A W
|
||||
view.setUint32(8, 0x45564157, true); // format
|
||||
//
|
||||
// t m f
|
||||
view.setUint32(12, 0x20746d66, true); // subchunk1ID
|
||||
view.setUint32(16, 16, true); // subchunk1Size, 16 for PCM
|
||||
view.setUint32(20, 1, true); // audioFormat, 1 for PCM
|
||||
view.setUint16(22, 1, true); // numChannels: 1 channel
|
||||
view.setUint32(24, expectedSampleRate, true); // sampleRate
|
||||
view.setUint32(28, expectedSampleRate * 2, true); // byteRate
|
||||
view.setUint16(32, 2, true); // blockAlign
|
||||
view.setUint16(34, 16, true); // bitsPerSample
|
||||
view.setUint32(36, 0x61746164, true); // Subchunk2ID
|
||||
view.setUint32(40, samples.length * 2, true); // subchunk2Size
|
||||
|
||||
let offset = 44;
|
||||
for (let i = 0; i < samples.length; ++i) {
|
||||
view.setInt16(offset, samples[i], true);
|
||||
offset += 2;
|
||||
}
|
||||
|
||||
return new Blob([view], {type: 'audio/wav'});
|
||||
}
|
||||
|
||||
// this function is copied from
|
||||
// https://github.com/awslabs/aws-lex-browser-audio-capture/blob/master/lib/worker.js#L46
|
||||
function downsampleBuffer(buffer, exportSampleRate) {
|
||||
if (exportSampleRate === recordSampleRate) {
|
||||
return buffer;
|
||||
}
|
||||
var sampleRateRatio = recordSampleRate / exportSampleRate;
|
||||
var newLength = Math.round(buffer.length / sampleRateRatio);
|
||||
var result = new Float32Array(newLength);
|
||||
var offsetResult = 0;
|
||||
var offsetBuffer = 0;
|
||||
while (offsetResult < result.length) {
|
||||
var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
|
||||
var accum = 0, count = 0;
|
||||
for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
|
||||
accum += buffer[i];
|
||||
count++;
|
||||
}
|
||||
result[offsetResult] = accum / count;
|
||||
offsetResult++;
|
||||
offsetBuffer = nextOffsetBuffer;
|
||||
}
|
||||
return result;
|
||||
};
|
||||
5
python-api-examples/web/js/popper.min.js
vendored
Normal file
5
python-api-examples/web/js/popper.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
1
python-api-examples/web/js/popper.min.js.map
Normal file
1
python-api-examples/web/js/popper.min.js.map
Normal file
File diff suppressed because one or more lines are too long
401
python-api-examples/web/js/streaming_record.js
Normal file
401
python-api-examples/web/js/streaming_record.js
Normal file
@@ -0,0 +1,401 @@
|
||||
// This file copies and modifies code
|
||||
// from https://mdn.github.io/web-dictaphone/scripts/app.js
|
||||
// and https://gist.github.com/meziantou/edb7217fddfbb70e899e
|
||||
|
||||
var socket;
|
||||
var recognition_text = [];
|
||||
|
||||
function getDisplayResult() {
|
||||
let i = 0;
|
||||
let ans = '';
|
||||
for (let s in recognition_text) {
|
||||
if (recognition_text[s] == '') continue;
|
||||
|
||||
ans += '' + i + ': ' + recognition_text[s] + '\n';
|
||||
i += 1;
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
|
||||
function initWebSocket() {
|
||||
console.log('Creating websocket')
|
||||
let protocol = 'ws://';
|
||||
if (window.location.protocol == 'https:') {
|
||||
protocol = 'wss://'
|
||||
}
|
||||
let server_ip = serverIpInput.value;
|
||||
let server_port = serverPortInput.value;
|
||||
console.log('protocol: ', protocol);
|
||||
console.log('server_ip: ', server_ip);
|
||||
console.log('server_port: ', server_port);
|
||||
|
||||
let uri = protocol + server_ip + ':' + server_port;
|
||||
console.log('uri', uri);
|
||||
socket = new WebSocket(uri);
|
||||
// socket = new WebSocket('wss://localhost:6006/');
|
||||
|
||||
// Connection opened
|
||||
socket.addEventListener('open', function(event) {
|
||||
console.log('connected');
|
||||
recordBtn.disabled = false;
|
||||
connectBtn.disabled = true;
|
||||
connectBtn.innerHTML = 'Connected!';
|
||||
});
|
||||
|
||||
// Connection closed
|
||||
socket.addEventListener('close', function(event) {
|
||||
console.log('disconnected');
|
||||
recordBtn.disabled = true;
|
||||
connectBtn.disabled = false;
|
||||
connectBtn.innerHTML = 'Click me to connect!';
|
||||
});
|
||||
|
||||
// Listen for messages
|
||||
socket.addEventListener('message', function(event) {
|
||||
let message = JSON.parse(event.data);
|
||||
if (message.segment in recognition_text) {
|
||||
recognition_text[message.segment] = message.text;
|
||||
} else {
|
||||
recognition_text.push(message.text);
|
||||
}
|
||||
let text_area = document.getElementById('results');
|
||||
text_area.value = getDisplayResult();
|
||||
text_area.scrollTop = text_area.scrollHeight; // auto scroll
|
||||
console.log('Received message: ', event.data);
|
||||
});
|
||||
}
|
||||
|
||||
window.onload = (event) => {
|
||||
console.log('page is fully loaded');
|
||||
console.log('protocol', window.location.protocol);
|
||||
console.log('port', window.location.port);
|
||||
if (window.location.protocol == 'https:') {
|
||||
document.getElementById('ws-protocol').textContent = 'wss://';
|
||||
}
|
||||
serverIpInput.value = window.location.hostname;
|
||||
serverPortInput.value = window.location.port;
|
||||
};
|
||||
|
||||
const serverIpInput = document.getElementById('server-ip');
|
||||
const serverPortInput = document.getElementById('server-port');
|
||||
|
||||
const connectBtn = document.getElementById('connect');
|
||||
const recordBtn = document.getElementById('streaming_record');
|
||||
const stopBtn = document.getElementById('streaming_stop');
|
||||
const clearBtn = document.getElementById('clear');
|
||||
const soundClips = document.getElementById('sound-clips');
|
||||
const canvas = document.getElementById('canvas');
|
||||
const mainSection = document.querySelector('.container');
|
||||
|
||||
stopBtn.disabled = true;
|
||||
|
||||
let audioCtx;
|
||||
const canvasCtx = canvas.getContext('2d');
|
||||
let mediaStream;
|
||||
let analyser;
|
||||
|
||||
let expectedSampleRate = 16000;
|
||||
let recordSampleRate; // the sampleRate of the microphone
|
||||
let recorder = null; // the microphone
|
||||
let leftchannel = []; // TODO: Use a single channel
|
||||
|
||||
let recordingLength = 0; // number of samples so far
|
||||
|
||||
clearBtn.onclick = function() {
|
||||
document.getElementById('results').value = '';
|
||||
recognition_text = [];
|
||||
};
|
||||
|
||||
connectBtn.onclick = function() {
|
||||
initWebSocket();
|
||||
};
|
||||
|
||||
// copied/modified from https://mdn.github.io/web-dictaphone/
|
||||
// and
|
||||
// https://gist.github.com/meziantou/edb7217fddfbb70e899e
|
||||
if (navigator.mediaDevices.getUserMedia) {
|
||||
console.log('getUserMedia supported.');
|
||||
|
||||
// see https://w3c.github.io/mediacapture-main/#dom-mediadevices-getusermedia
|
||||
const constraints = {audio: true};
|
||||
|
||||
let onSuccess = function(stream) {
|
||||
if (!audioCtx) {
|
||||
audioCtx = new AudioContext();
|
||||
}
|
||||
console.log(audioCtx);
|
||||
recordSampleRate = audioCtx.sampleRate;
|
||||
console.log('sample rate ' + recordSampleRate);
|
||||
|
||||
// creates an audio node from the microphone incoming stream
|
||||
mediaStream = audioCtx.createMediaStreamSource(stream);
|
||||
console.log(mediaStream);
|
||||
|
||||
// https://developer.mozilla.org/en-US/docs/Web/API/AudioContext/createScriptProcessor
|
||||
// bufferSize: the onaudioprocess event is called when the buffer is full
|
||||
var bufferSize = 2048;
|
||||
var numberOfInputChannels = 2;
|
||||
var numberOfOutputChannels = 2;
|
||||
if (audioCtx.createScriptProcessor) {
|
||||
recorder = audioCtx.createScriptProcessor(
|
||||
bufferSize, numberOfInputChannels, numberOfOutputChannels);
|
||||
} else {
|
||||
recorder = audioCtx.createJavaScriptNode(
|
||||
bufferSize, numberOfInputChannels, numberOfOutputChannels);
|
||||
}
|
||||
console.log(recorder);
|
||||
|
||||
recorder.onaudioprocess = function(e) {
|
||||
let samples = new Float32Array(e.inputBuffer.getChannelData(0))
|
||||
samples = downsampleBuffer(samples, expectedSampleRate);
|
||||
|
||||
let buf = new Int16Array(samples.length);
|
||||
for (var i = 0; i < samples.length; ++i) {
|
||||
let s = samples[i];
|
||||
if (s >= 1)
|
||||
s = 1;
|
||||
else if (s <= -1)
|
||||
s = -1;
|
||||
|
||||
samples[i] = s;
|
||||
buf[i] = s * 32767;
|
||||
}
|
||||
|
||||
socket.send(samples);
|
||||
|
||||
leftchannel.push(buf);
|
||||
recordingLength += bufferSize;
|
||||
};
|
||||
|
||||
visualize(stream);
|
||||
mediaStream.connect(analyser);
|
||||
|
||||
recordBtn.onclick = function() {
|
||||
mediaStream.connect(recorder);
|
||||
mediaStream.connect(analyser);
|
||||
recorder.connect(audioCtx.destination);
|
||||
|
||||
console.log('recorder started');
|
||||
recordBtn.style.background = 'red';
|
||||
|
||||
stopBtn.disabled = false;
|
||||
recordBtn.disabled = true;
|
||||
};
|
||||
|
||||
stopBtn.onclick = function() {
|
||||
console.log('recorder stopped');
|
||||
|
||||
socket.send('Done');
|
||||
console.log('Sent Done');
|
||||
|
||||
socket.close();
|
||||
|
||||
// stopBtn recording
|
||||
recorder.disconnect(audioCtx.destination);
|
||||
mediaStream.disconnect(recorder);
|
||||
mediaStream.disconnect(analyser);
|
||||
|
||||
recordBtn.style.background = '';
|
||||
recordBtn.style.color = '';
|
||||
// mediaRecorder.requestData();
|
||||
|
||||
stopBtn.disabled = true;
|
||||
recordBtn.disabled = false;
|
||||
|
||||
const clipName =
|
||||
prompt('Enter a name for your sound clip?', 'My unnamed clip');
|
||||
|
||||
const clipContainer = document.createElement('article');
|
||||
const clipLabel = document.createElement('p');
|
||||
const audio = document.createElement('audio');
|
||||
const deleteButton = document.createElement('button');
|
||||
clipContainer.classList.add('clip');
|
||||
audio.setAttribute('controls', '');
|
||||
deleteButton.textContent = 'Delete';
|
||||
deleteButton.className = 'delete';
|
||||
|
||||
if (clipName === null) {
|
||||
clipLabel.textContent = 'My unnamed clip';
|
||||
} else {
|
||||
clipLabel.textContent = clipName;
|
||||
}
|
||||
|
||||
clipContainer.appendChild(audio);
|
||||
|
||||
clipContainer.appendChild(clipLabel);
|
||||
clipContainer.appendChild(deleteButton);
|
||||
soundClips.appendChild(clipContainer);
|
||||
|
||||
audio.controls = true;
|
||||
let samples = flatten(leftchannel);
|
||||
const blob = toWav(samples);
|
||||
|
||||
leftchannel = [];
|
||||
const audioURL = window.URL.createObjectURL(blob);
|
||||
audio.src = audioURL;
|
||||
console.log('recorder stopped');
|
||||
|
||||
deleteButton.onclick = function(e) {
|
||||
let evtTgt = e.target;
|
||||
evtTgt.parentNode.parentNode.removeChild(evtTgt.parentNode);
|
||||
};
|
||||
|
||||
clipLabel.onclick = function() {
|
||||
const existingName = clipLabel.textContent;
|
||||
const newClipName = prompt('Enter a new name for your sound clip?');
|
||||
if (newClipName === null) {
|
||||
clipLabel.textContent = existingName;
|
||||
} else {
|
||||
clipLabel.textContent = newClipName;
|
||||
}
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
let onError = function(err) {
|
||||
console.log('The following error occured: ' + err);
|
||||
};
|
||||
|
||||
navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError);
|
||||
} else {
|
||||
console.log('getUserMedia not supported on your browser!');
|
||||
alert('getUserMedia not supported on your browser!');
|
||||
}
|
||||
|
||||
function visualize(stream) {
|
||||
if (!audioCtx) {
|
||||
audioCtx = new AudioContext();
|
||||
}
|
||||
|
||||
const source = audioCtx.createMediaStreamSource(stream);
|
||||
|
||||
if (!analyser) {
|
||||
analyser = audioCtx.createAnalyser();
|
||||
analyser.fftSize = 2048;
|
||||
}
|
||||
const bufferLength = analyser.frequencyBinCount;
|
||||
const dataArray = new Uint8Array(bufferLength);
|
||||
|
||||
// source.connect(analyser);
|
||||
// analyser.connect(audioCtx.destination);
|
||||
|
||||
draw()
|
||||
|
||||
function draw() {
|
||||
const WIDTH = canvas.width
|
||||
const HEIGHT = canvas.height;
|
||||
|
||||
requestAnimationFrame(draw);
|
||||
|
||||
analyser.getByteTimeDomainData(dataArray);
|
||||
|
||||
canvasCtx.fillStyle = 'rgb(200, 200, 200)';
|
||||
canvasCtx.fillRect(0, 0, WIDTH, HEIGHT);
|
||||
|
||||
canvasCtx.lineWidth = 2;
|
||||
canvasCtx.strokeStyle = 'rgb(0, 0, 0)';
|
||||
|
||||
canvasCtx.beginPath();
|
||||
|
||||
let sliceWidth = WIDTH * 1.0 / bufferLength;
|
||||
let x = 0;
|
||||
|
||||
for (let i = 0; i < bufferLength; i++) {
|
||||
let v = dataArray[i] / 128.0;
|
||||
let y = v * HEIGHT / 2;
|
||||
|
||||
if (i === 0) {
|
||||
canvasCtx.moveTo(x, y);
|
||||
} else {
|
||||
canvasCtx.lineTo(x, y);
|
||||
}
|
||||
|
||||
x += sliceWidth;
|
||||
}
|
||||
|
||||
canvasCtx.lineTo(canvas.width, canvas.height / 2);
|
||||
canvasCtx.stroke();
|
||||
}
|
||||
}
|
||||
|
||||
window.onresize = function() {
|
||||
canvas.width = mainSection.offsetWidth;
|
||||
};
|
||||
|
||||
window.onresize();
|
||||
|
||||
// this function is copied/modified from
|
||||
// https://gist.github.com/meziantou/edb7217fddfbb70e899e
|
||||
function flatten(listOfSamples) {
|
||||
let n = 0;
|
||||
for (let i = 0; i < listOfSamples.length; ++i) {
|
||||
n += listOfSamples[i].length;
|
||||
}
|
||||
let ans = new Int16Array(n);
|
||||
|
||||
let offset = 0;
|
||||
for (let i = 0; i < listOfSamples.length; ++i) {
|
||||
ans.set(listOfSamples[i], offset);
|
||||
offset += listOfSamples[i].length;
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
|
||||
// this function is copied/modified from
|
||||
// https://gist.github.com/meziantou/edb7217fddfbb70e899e
|
||||
function toWav(samples) {
|
||||
let buf = new ArrayBuffer(44 + samples.length * 2);
|
||||
var view = new DataView(buf);
|
||||
|
||||
// http://soundfile.sapp.org/doc/WaveFormat/
|
||||
// F F I R
|
||||
view.setUint32(0, 0x46464952, true); // chunkID
|
||||
view.setUint32(4, 36 + samples.length * 2, true); // chunkSize
|
||||
// E V A W
|
||||
view.setUint32(8, 0x45564157, true); // format
|
||||
//
|
||||
// t m f
|
||||
view.setUint32(12, 0x20746d66, true); // subchunk1ID
|
||||
view.setUint32(16, 16, true); // subchunk1Size, 16 for PCM
|
||||
view.setUint32(20, 1, true); // audioFormat, 1 for PCM
|
||||
view.setUint16(22, 1, true); // numChannels: 1 channel
|
||||
view.setUint32(24, expectedSampleRate, true); // sampleRate
|
||||
view.setUint32(28, expectedSampleRate * 2, true); // byteRate
|
||||
view.setUint16(32, 2, true); // blockAlign
|
||||
view.setUint16(34, 16, true); // bitsPerSample
|
||||
view.setUint32(36, 0x61746164, true); // Subchunk2ID
|
||||
view.setUint32(40, samples.length * 2, true); // subchunk2Size
|
||||
|
||||
let offset = 44;
|
||||
for (let i = 0; i < samples.length; ++i) {
|
||||
view.setInt16(offset, samples[i], true);
|
||||
offset += 2;
|
||||
}
|
||||
|
||||
return new Blob([view], {type: 'audio/wav'});
|
||||
}
|
||||
|
||||
// this function is copied from
|
||||
// https://github.com/awslabs/aws-lex-browser-audio-capture/blob/master/lib/worker.js#L46
|
||||
function downsampleBuffer(buffer, exportSampleRate) {
|
||||
if (exportSampleRate === recordSampleRate) {
|
||||
return buffer;
|
||||
}
|
||||
var sampleRateRatio = recordSampleRate / exportSampleRate;
|
||||
var newLength = Math.round(buffer.length / sampleRateRatio);
|
||||
var result = new Float32Array(newLength);
|
||||
var offsetResult = 0;
|
||||
var offsetBuffer = 0;
|
||||
while (offsetResult < result.length) {
|
||||
var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
|
||||
var accum = 0, count = 0;
|
||||
for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
|
||||
accum += buffer[i];
|
||||
count++;
|
||||
}
|
||||
result[offsetResult] = accum / count;
|
||||
offsetResult++;
|
||||
offsetBuffer = nextOffsetBuffer;
|
||||
}
|
||||
return result;
|
||||
};
|
||||
136
python-api-examples/web/js/upload.js
Normal file
136
python-api-examples/web/js/upload.js
Normal file
@@ -0,0 +1,136 @@
|
||||
/**
|
||||
References
|
||||
https://developer.mozilla.org/en-US/docs/Web/API/FileList
|
||||
https://developer.mozilla.org/en-US/docs/Web/API/FileReader
|
||||
https://javascript.info/arraybuffer-binary-arrays
|
||||
https://developer.mozilla.org/zh-CN/docs/Web/API/WebSocket
|
||||
https://developer.mozilla.org/en-US/docs/Web/API/WebSocket/send
|
||||
*/
|
||||
|
||||
var socket;
|
||||
|
||||
const serverIpInput = document.getElementById('server-ip');
|
||||
const serverPortInput = document.getElementById('server-port');
|
||||
|
||||
const connectBtn = document.getElementById('connect');
|
||||
const uploadBtn = document.getElementById('file');
|
||||
|
||||
function initWebSocket() {
|
||||
let protocol = 'ws://';
|
||||
if (window.location.protocol == 'https:') {
|
||||
protocol = 'wss://'
|
||||
}
|
||||
let server_ip = serverIpInput.value;
|
||||
let server_port = serverPortInput.value;
|
||||
console.log('protocol: ', protocol);
|
||||
console.log('server_ip: ', server_ip);
|
||||
console.log('server_port: ', server_port);
|
||||
|
||||
|
||||
let uri = protocol + server_ip + ':' + server_port;
|
||||
console.log('uri', uri);
|
||||
socket = new WebSocket(uri);
|
||||
|
||||
// Connection opened
|
||||
socket.addEventListener('open', function(event) {
|
||||
console.log('connected');
|
||||
uploadBtn.disabled = false;
|
||||
connectBtn.disabled = true;
|
||||
connectBtn.innerHTML = 'Connected!';
|
||||
});
|
||||
|
||||
// Connection closed
|
||||
socket.addEventListener('close', function(event) {
|
||||
console.log('disconnected');
|
||||
uploadBtn.disabled = true;
|
||||
connectBtn.disabled = false;
|
||||
connectBtn.innerHTML = 'Click me to connect!';
|
||||
});
|
||||
|
||||
// Listen for messages
|
||||
socket.addEventListener('message', function(event) {
|
||||
console.log('Received message: ', event.data);
|
||||
|
||||
document.getElementById('results').value = event.data;
|
||||
socket.send('Done');
|
||||
console.log('Sent Done');
|
||||
socket.close();
|
||||
});
|
||||
}
|
||||
|
||||
window.onload = (event) => {
|
||||
console.log('page is fully loaded');
|
||||
console.log('protocol', window.location.protocol);
|
||||
console.log('port', window.location.port);
|
||||
if (window.location.protocol == 'https:') {
|
||||
document.getElementById('ws-protocol').textContent = 'wss://';
|
||||
}
|
||||
serverIpInput.value = window.location.hostname;
|
||||
serverPortInput.value = window.location.port;
|
||||
};
|
||||
|
||||
connectBtn.onclick = function() {
|
||||
initWebSocket();
|
||||
};
|
||||
|
||||
function send_header(n) {
|
||||
const header = new ArrayBuffer(4);
|
||||
new DataView(header).setInt32(0, n, true /* littleEndian */);
|
||||
socket.send(new Int32Array(header, 0, 1));
|
||||
}
|
||||
|
||||
function onFileChange() {
|
||||
var files = document.getElementById('file').files;
|
||||
|
||||
if (files.length == 0) {
|
||||
console.log('No file selected');
|
||||
return;
|
||||
}
|
||||
|
||||
console.log('files: ' + files);
|
||||
|
||||
const file = files[0];
|
||||
console.log(file);
|
||||
console.log('file.name ' + file.name);
|
||||
console.log('file.type ' + file.type);
|
||||
console.log('file.size ' + file.size);
|
||||
|
||||
let reader = new FileReader();
|
||||
reader.onload = function() {
|
||||
console.log('reading file!');
|
||||
let view = new Int16Array(reader.result);
|
||||
// we assume the input file is a wav file.
|
||||
// TODO: add some checks here.
|
||||
let int16_samples = view.subarray(22); // header has 44 bytes == 22 shorts
|
||||
let num_samples = int16_samples.length;
|
||||
let float32_samples = new Float32Array(num_samples);
|
||||
console.log('num_samples ' + num_samples)
|
||||
|
||||
for (let i = 0; i < num_samples; ++i) {
|
||||
float32_samples[i] = int16_samples[i] / 32768.
|
||||
}
|
||||
|
||||
// Send 1024 audio samples per request.
|
||||
//
|
||||
// It has two purposes:
|
||||
// (1) Simulate streaming
|
||||
// (2) There is a limit on the number of bytes in the payload that can be
|
||||
// sent by websocket, which is 1MB, I think. We can send a large
|
||||
// audio file for decoding in this approach.
|
||||
let buf = float32_samples.buffer
|
||||
let n = 1024 * 4; // send this number of bytes per request.
|
||||
console.log('buf length, ' + buf.byteLength);
|
||||
send_header(buf.byteLength);
|
||||
for (let start = 0; start < buf.byteLength; start += n) {
|
||||
socket.send(buf.slice(start, start + n));
|
||||
}
|
||||
};
|
||||
|
||||
reader.readAsArrayBuffer(file);
|
||||
}
|
||||
|
||||
const clearBtn = document.getElementById('clear');
|
||||
clearBtn.onclick = function() {
|
||||
console.log('clicked');
|
||||
document.getElementById('results').value = '';
|
||||
};
|
||||
BIN
python-api-examples/web/k2-logo.png
Normal file
BIN
python-api-examples/web/k2-logo.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 69 KiB |
26
python-api-examples/web/nav-partial.html
Normal file
26
python-api-examples/web/nav-partial.html
Normal file
@@ -0,0 +1,26 @@
|
||||
<nav class="navbar navbar-expand-lg navbar-light bg-light">
|
||||
<a class="navbar-brand" href="index.html">Next-gen Kaldi demo</a>
|
||||
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarSupportedContent" aria-controls="navbarSupportedContent" aria-expanded="false" aria-label="Toggle navigation">
|
||||
<span class="navbar-toggler-icon"></span>
|
||||
</button>
|
||||
<div class="collapse navbar-collapse" id="navbarSupportedContent">
|
||||
<ul class="navbar-nav mr-auto">
|
||||
<li class="nav-item active">
|
||||
<a class="nav-link" href="index.html">Home <span class="sr-only">(current)</span></a>
|
||||
</li>
|
||||
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" href="upload.html">Upload</a>
|
||||
</li>
|
||||
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" href="streaming_record.html">Streaming-Record</a>
|
||||
</li>
|
||||
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" href="offline_record.html">Offline-Record</a>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
81
python-api-examples/web/offline_record.html
Normal file
81
python-api-examples/web/offline_record.html
Normal file
@@ -0,0 +1,81 @@
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<!-- Required meta tags -->
|
||||
<meta charset="utf-8"></meta>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"></meta>
|
||||
|
||||
<!-- Bootstrap CSS -->
|
||||
<link rel="stylesheet"
|
||||
href="./css/bootstrap.min.css"
|
||||
integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T"
|
||||
crossorigin="anonymous">
|
||||
</link>
|
||||
|
||||
<script src="./js/jquery-3.6.0.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script>
|
||||
|
||||
<title>Next-gen Kaldi demo (Upload file for recognition)</title>
|
||||
</head>
|
||||
|
||||
|
||||
<body>
|
||||
<div id="nav"></div>
|
||||
<script>
|
||||
$(function(){
|
||||
$("#nav").load("nav-partial.html");
|
||||
});
|
||||
</script>
|
||||
|
||||
<h3>Recognition from offline recordings</h3>
|
||||
<div class="container">
|
||||
<div class="input-group mb-1">
|
||||
<div class="input-group-prepend">
|
||||
<button class="btn btn-block btn-primary" type="button" id="connect">Click me to connect</button>
|
||||
</div>
|
||||
<span class="input-group-text" id="ws-protocol">ws://</span>
|
||||
<input type="text" id="server-ip" class="form-control" placeholder="Sherpa server IP, e.g., localhost" aria-label="sherpa server IP">
|
||||
<span class="input-group-text">:</span>
|
||||
<input type="text" id="server-port" class="form-control" placeholder="Sherpa server port, e.g., 6006" aria-label="sherpa server port">
|
||||
</div>
|
||||
|
||||
<div class="row">
|
||||
<div class="col-12">
|
||||
<canvas id="canvas" height="60px" display="block" margin-bottom="0.5rem"></canvas>
|
||||
</div>
|
||||
</div>
|
||||
<div class="row">
|
||||
<div class="col">
|
||||
<button class="btn btn-primary btn-block" id="offline_record">Offline-Record</button>
|
||||
</div>
|
||||
<div class="col">
|
||||
<button class="btn btn-primary btn-block" id="offline_stop">Offline-Stop</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="mb-3">
|
||||
<label for="results" class="form-label">Recognition results</label>
|
||||
<textarea class="form-control" id="results" rows="8"></textarea>
|
||||
</div>
|
||||
|
||||
<button class="btn btn-primary btn-block" id="clear">Clear results</button>
|
||||
|
||||
<section flex="1" overflow="auto" id="sound-clips">
|
||||
</section>
|
||||
|
||||
|
||||
<!-- Optional JavaScript -->
|
||||
<!-- jQuery first, then Popper.js, then Bootstrap JS -->
|
||||
<script src="./js/popper.min.js"
|
||||
integrity="sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1"
|
||||
crossorigin="anonymous">
|
||||
</script>
|
||||
|
||||
<script src="./js/bootstrap.min.js"
|
||||
integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM"
|
||||
crossorigin="anonymous">
|
||||
</script>
|
||||
|
||||
<script src="./js/offline_record.js"> </script>
|
||||
</body>
|
||||
</html>
|
||||
BIN
python-api-examples/web/pic/web-ui.png
Normal file
BIN
python-api-examples/web/pic/web-ui.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 46 KiB |
75
python-api-examples/web/start-https-server.py
Executable file
75
python-api-examples/web/start-https-server.py
Executable file
@@ -0,0 +1,75 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Code in this file is modified from
|
||||
# https://stackoverflow.com/questions/19705785/python-3-simple-https-server
|
||||
|
||||
import argparse
|
||||
import http.server
|
||||
import ssl
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
"""
|
||||
Usage:
|
||||
|
||||
./start-https-server.py \
|
||||
--server-address 0.0.0.0 \
|
||||
--server-port 6007 \
|
||||
--cert ./cert.pem
|
||||
"""
|
||||
|
||||
|
||||
def get_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--server-address",
|
||||
type=str,
|
||||
default="0.0.0.0",
|
||||
help="""IP address which this server will bind to""",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--server-port",
|
||||
type=int,
|
||||
default=6007,
|
||||
help="""Port number on which this server will listen""",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--certificate",
|
||||
type=str,
|
||||
default="cert.pem",
|
||||
help="""Path to the X.509 certificate. You can use
|
||||
./generate-certificate.py to generate it""",
|
||||
)
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
args = get_args()
|
||||
print(f"{vars(args)}")
|
||||
server_address = (args.server_address, args.server_port)
|
||||
httpd = http.server.HTTPServer(
|
||||
server_address, http.server.SimpleHTTPRequestHandler
|
||||
)
|
||||
|
||||
if not Path(args.certificate).is_file():
|
||||
print("Please run ./generate-certificate.py to generate a certificate")
|
||||
sys.exit(-1)
|
||||
|
||||
httpd.socket = ssl.wrap_socket(
|
||||
httpd.socket,
|
||||
server_side=True,
|
||||
certfile=args.certificate,
|
||||
ssl_version=ssl.PROTOCOL_TLS,
|
||||
)
|
||||
print(
|
||||
"The server is listening at the following address:\n"
|
||||
f"https://{args.server_address}:{args.server_port}"
|
||||
)
|
||||
httpd.serve_forever()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
81
python-api-examples/web/streaming_record.html
Normal file
81
python-api-examples/web/streaming_record.html
Normal file
@@ -0,0 +1,81 @@
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<!-- Required meta tags -->
|
||||
<meta charset="utf-8"></meta>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"></meta>
|
||||
|
||||
<!-- Bootstrap CSS -->
|
||||
<link rel="stylesheet"
|
||||
href="./css/bootstrap.min.css"
|
||||
integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T"
|
||||
crossorigin="anonymous">
|
||||
</link>
|
||||
|
||||
<script src="./js/jquery-3.6.0.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script>
|
||||
|
||||
<title>Next-gen Kaldi demo (Upload file for recognition)</title>
|
||||
</head>
|
||||
|
||||
|
||||
<body>
|
||||
<div id="nav"></div>
|
||||
<script>
|
||||
$(function(){
|
||||
$("#nav").load("nav-partial.html");
|
||||
});
|
||||
</script>
|
||||
|
||||
<h3>Recognition from real-time recordings</h3>
|
||||
<div class="container">
|
||||
<div class="input-group mb-1">
|
||||
<div class="input-group-prepend">
|
||||
<button class="btn btn-block btn-primary" type="button" id="connect">Click me to connect</button>
|
||||
</div>
|
||||
<span class="input-group-text" id="ws-protocol">ws://</span>
|
||||
<input type="text" id="server-ip" class="form-control" placeholder="Sherpa server IP, e.g., localhost" aria-label="sherpa server IP">
|
||||
<span class="input-group-text">:</span>
|
||||
<input type="text" id="server-port" class="form-control" placeholder="Sherpa server port, e.g., 6006" aria-label="sherpa server port">
|
||||
</div>
|
||||
|
||||
<div class="row">
|
||||
<div class="col-12">
|
||||
<canvas id="canvas" height="60px" display="block" margin-bottom="0.5rem"></canvas>
|
||||
</div>
|
||||
</div>
|
||||
<div class="row">
|
||||
<div class="col">
|
||||
<button class="btn btn-primary btn-block" id="streaming_record">Streaming-Record</button>
|
||||
</div>
|
||||
<div class="col">
|
||||
<button class="btn btn-primary btn-block" id="streaming_stop">Streaming-Stop</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="mb-3">
|
||||
<label for="results" class="form-label">Recognition results</label>
|
||||
<textarea class="form-control" id="results" rows="8"></textarea>
|
||||
</div>
|
||||
|
||||
<button class="btn btn-primary btn-block" id="clear">Clear results</button>
|
||||
|
||||
<section flex="1" overflow="auto" id="sound-clips">
|
||||
</section>
|
||||
|
||||
|
||||
<!-- Optional JavaScript -->
|
||||
<!-- jQuery first, then Popper.js, then Bootstrap JS -->
|
||||
<script src="./js/popper.min.js"
|
||||
integrity="sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1"
|
||||
crossorigin="anonymous">
|
||||
</script>
|
||||
|
||||
<script src="./js/bootstrap.min.js"
|
||||
integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM"
|
||||
crossorigin="anonymous">
|
||||
</script>
|
||||
|
||||
<script src="./js/streaming_record.js"> </script>
|
||||
</body>
|
||||
</html>
|
||||
68
python-api-examples/web/upload.html
Normal file
68
python-api-examples/web/upload.html
Normal file
@@ -0,0 +1,68 @@
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<!-- Required meta tags -->
|
||||
<meta charset="utf-8"></meta>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"></meta>
|
||||
|
||||
<!-- Bootstrap CSS -->
|
||||
<link rel="stylesheet"
|
||||
href="./css/bootstrap.min.css"
|
||||
integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T"
|
||||
crossorigin="anonymous">
|
||||
</link>
|
||||
|
||||
<script src="./js/jquery-3.6.0.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script>
|
||||
|
||||
<title>Next-gen Kaldi demo (Upload file for recognition)</title>
|
||||
</head>
|
||||
|
||||
|
||||
<body>
|
||||
<div id="nav"></div>
|
||||
<script>
|
||||
$(function(){
|
||||
$("#nav").load("nav-partial.html");
|
||||
});
|
||||
</script>
|
||||
|
||||
<h3>Recognition from a selected file</h3>
|
||||
<div class="input-group mb-1">
|
||||
<div class="input-group-prepend">
|
||||
<button class="btn btn-block btn-primary" type="button" id="connect">Click me to connect</button>
|
||||
</div>
|
||||
<span class="input-group-text" id="ws-protocol">ws://</span>
|
||||
<input type="text" id="server-ip" class="form-control" placeholder="Sherpa server IP, e.g., localhost" aria-label="sherpa server IP">
|
||||
<span class="input-group-text">:</span>
|
||||
<input type="text" id="server-port" class="form-control" placeholder="Sherpa server port, e.g., 6006" aria-label="sherpa server port">
|
||||
</div>
|
||||
|
||||
<form>
|
||||
<div class="mb-3">
|
||||
<label for="file" class="form-label">Select file</label>
|
||||
<input class="form-control" type="file" id="file" accept=".wav" onchange="onFileChange()" disabled="true"></input>
|
||||
</div>
|
||||
|
||||
<div class="mb-3">
|
||||
<label for="results" class="form-label">Recognition results</label>
|
||||
<textarea class="form-control" id="results" rows="8"></textarea>
|
||||
</div>
|
||||
|
||||
<button class="btn btn-primary btn-block" id="clear">Clear results</button>
|
||||
</form>
|
||||
|
||||
<!-- Optional JavaScript -->
|
||||
<!-- jQuery first, then Popper.js, then Bootstrap JS -->
|
||||
<script src="./js/popper.min.js"
|
||||
integrity="sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1"
|
||||
crossorigin="anonymous">
|
||||
</script>
|
||||
|
||||
<script src="./js/bootstrap.min.js"
|
||||
integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM"
|
||||
crossorigin="anonymous">
|
||||
</script>
|
||||
|
||||
<script src="./js/upload.js"> </script>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,4 +1,5 @@
|
||||
from _sherpa_onnx import Display
|
||||
|
||||
from .online_recognizer import OnlineRecognizer
|
||||
from .online_recognizer import OnlineStream
|
||||
from .offline_recognizer import OfflineRecognizer
|
||||
|
||||
@@ -127,6 +127,7 @@ class OnlineRecognizer(object):
|
||||
)
|
||||
|
||||
self.recognizer = _Recognizer(recognizer_config)
|
||||
self.config = recognizer_config
|
||||
|
||||
def create_stream(self):
|
||||
return self.recognizer.create_stream()
|
||||
|
||||
Reference in New Issue
Block a user