Fix python two pass ASR examples (#1230)
This commit is contained in:
@@ -335,11 +335,10 @@ def create_second_pass_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
|
||||
|
||||
def run_second_pass(
|
||||
recognizer: sherpa_onnx.OfflineRecognizer,
|
||||
sample_buffers: List[np.ndarray],
|
||||
samples: np.ndarray,
|
||||
sample_rate: int,
|
||||
):
|
||||
stream = recognizer.create_stream()
|
||||
samples = np.concatenate(sample_buffers)
|
||||
stream.accept_waveform(sample_rate, samples)
|
||||
|
||||
recognizer.decode_stream(stream)
|
||||
@@ -407,14 +406,20 @@ def main():
|
||||
|
||||
if is_endpoint:
|
||||
if result:
|
||||
samples = np.concatenate(sample_buffers)
|
||||
# There are internal sample buffers inside the streaming
|
||||
# feature extractor, so we cannot send all samples to
|
||||
# the 2nd pass. Here 8000 is just an empirical value
|
||||
# that should work for most streaming models in sherpa-onnx
|
||||
sample_buffers = [samples[-8000:]]
|
||||
samples = samples[:-8000]
|
||||
result = run_second_pass(
|
||||
recognizer=second_recognizer,
|
||||
sample_buffers=sample_buffers,
|
||||
samples=samples,
|
||||
sample_rate=sample_rate,
|
||||
)
|
||||
result = result.lower().strip()
|
||||
|
||||
sample_buffers = []
|
||||
print(
|
||||
"\r{}:{}".format(segment_id, " " * len(last_result)),
|
||||
end="",
|
||||
|
||||
@@ -18,8 +18,8 @@ The input text can contain English words.
|
||||
|
||||
Usage:
|
||||
|
||||
Please download the model from:
|
||||
https://huggingface.co/frankyoujian/Edge-Punct-Casing/resolve/main/sherpa-onnx-cnn-bilstm-unigram-bpe-en.7z
|
||||
Please download the model from:
|
||||
https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
|
||||
|
||||
./bin/Release/sherpa-onnx-online-punctuation \
|
||||
--cnn-bilstm=/path/to/model.onnx \
|
||||
|
||||
Reference in New Issue
Block a user