Support streaming paraformer (#263)

This commit is contained in:
Fangjun Kuang
2023-08-14 10:32:14 +08:00
committed by GitHub
parent a4bff28e21
commit 6038e2aa62
38 changed files with 1488 additions and 112 deletions

View File

@@ -16,9 +16,9 @@ Example:
(1) Without a certificate
python3 ./python-api-examples/streaming_server.py \
--encoder-model ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx \
--decoder-model ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \
--joiner-model ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx \
--encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx \
--decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \
--joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx \
--tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt
(2) With a certificate
@@ -32,9 +32,9 @@ python3 ./python-api-examples/streaming_server.py \
(b) Start the server
python3 ./python-api-examples/streaming_server.py \
--encoder-model ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx \
--decoder-model ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \
--joiner-model ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx \
--encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx \
--decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \
--joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx \
--tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
--certificate ./python-api-examples/web/cert.pem
@@ -113,24 +113,33 @@ def setup_logger(
def add_model_args(parser: argparse.ArgumentParser):
parser.add_argument(
"--encoder-model",
"--encoder",
type=str,
required=True,
help="Path to the encoder model",
help="Path to the transducer encoder model",
)
parser.add_argument(
"--decoder-model",
"--decoder",
type=str,
required=True,
help="Path to the decoder model.",
help="Path to the transducer decoder model.",
)
parser.add_argument(
"--joiner-model",
"--joiner",
type=str,
required=True,
help="Path to the joiner model.",
help="Path to the transducer joiner model.",
)
parser.add_argument(
"--paraformer-encoder",
type=str,
help="Path to the paraformer encoder model",
)
parser.add_argument(
"--paraformer-decoder",
type=str,
help="Path to the transducer decoder model.",
)
parser.add_argument(
@@ -323,22 +332,40 @@ def get_args():
def create_recognizer(args) -> sherpa_onnx.OnlineRecognizer:
recognizer = sherpa_onnx.OnlineRecognizer.from_transducer(
tokens=args.tokens,
encoder=args.encoder_model,
decoder=args.decoder_model,
joiner=args.joiner_model,
num_threads=args.num_threads,
sample_rate=args.sample_rate,
feature_dim=args.feat_dim,
decoding_method=args.decoding_method,
max_active_paths=args.num_active_paths,
enable_endpoint_detection=args.use_endpoint != 0,
rule1_min_trailing_silence=args.rule1_min_trailing_silence,
rule2_min_trailing_silence=args.rule2_min_trailing_silence,
rule3_min_utterance_length=args.rule3_min_utterance_length,
provider=args.provider,
)
if args.encoder:
recognizer = sherpa_onnx.OnlineRecognizer.from_transducer(
tokens=args.tokens,
encoder=args.encoder,
decoder=args.decoder,
joiner=args.joiner,
num_threads=args.num_threads,
sample_rate=args.sample_rate,
feature_dim=args.feat_dim,
decoding_method=args.decoding_method,
max_active_paths=args.num_active_paths,
enable_endpoint_detection=args.use_endpoint != 0,
rule1_min_trailing_silence=args.rule1_min_trailing_silence,
rule2_min_trailing_silence=args.rule2_min_trailing_silence,
rule3_min_utterance_length=args.rule3_min_utterance_length,
provider=args.provider,
)
elif args.paraformer_encoder:
recognizer = sherpa_onnx.OnlineRecognizer.from_paraformer(
tokens=args.tokens,
encoder=args.paraformer_encoder,
decoder=args.paraformer_decoder,
num_threads=args.num_threads,
sample_rate=args.sample_rate,
feature_dim=args.feat_dim,
decoding_method=args.decoding_method,
enable_endpoint_detection=args.use_endpoint != 0,
rule1_min_trailing_silence=args.rule1_min_trailing_silence,
rule2_min_trailing_silence=args.rule2_min_trailing_silence,
rule3_min_utterance_length=args.rule3_min_utterance_length,
provider=args.provider,
)
else:
raise ValueError("Please provide a model")
return recognizer
@@ -654,11 +681,25 @@ Go back to <a href="/streaming_record.html">/streaming_record.html</a>
def check_args(args):
assert Path(args.encoder_model).is_file(), f"{args.encoder_model} does not exist"
if args.encoder:
assert Path(args.encoder).is_file(), f"{args.encoder} does not exist"
assert Path(args.decoder_model).is_file(), f"{args.decoder_model} does not exist"
assert Path(args.decoder).is_file(), f"{args.decoder} does not exist"
assert Path(args.joiner_model).is_file(), f"{args.joiner_model} does not exist"
assert Path(args.joiner).is_file(), f"{args.joiner} does not exist"
assert args.paraformer_encoder is None, args.paraformer_encoder
assert args.paraformer_decoder is None, args.paraformer_decoder
elif args.paraformer_encoder:
assert Path(
args.paraformer_encoder
).is_file(), f"{args.paraformer_encoder} does not exist"
assert Path(
args.paraformer_decoder
).is_file(), f"{args.paraformer_decoder} does not exist"
else:
raise ValueError("Please provide a model")
if not Path(args.tokens).is_file():
raise ValueError(f"{args.tokens} does not exist")