Support resampling (#77)

This commit is contained in:
Fangjun Kuang
2023-03-03 16:42:33 +08:00
committed by GitHub
parent 5f31b22c12
commit 9d8fddef01
10 changed files with 96 additions and 26 deletions

View File

@@ -78,8 +78,6 @@ def get_args():
def main():
sample_rate = 16000
args = get_args()
assert_file_exists(args.encoder)
assert_file_exists(args.decoder)
@@ -95,12 +93,16 @@ def main():
decoder=args.decoder,
joiner=args.joiner,
num_threads=args.num_threads,
sample_rate=sample_rate,
sample_rate=16000,
feature_dim=80,
decoding_method=args.decoding_method,
)
with wave.open(args.wave_filename) as f:
assert f.getframerate() == sample_rate, f.getframerate()
# If the wave file has a different sampling rate from the one
# expected by the model (16 kHz in our case), we will do
# resampling inside sherpa-onnx
wave_file_sample_rate = f.getframerate()
assert f.getnchannels() == 1, f.getnchannels()
assert f.getsampwidth() == 2, f.getsampwidth() # it is in bytes
num_samples = f.getnframes()
@@ -110,17 +112,17 @@ def main():
samples_float32 = samples_float32 / 32768
duration = len(samples_float32) / sample_rate
duration = len(samples_float32) / wave_file_sample_rate
start_time = time.time()
print("Started!")
stream = recognizer.create_stream()
stream.accept_waveform(sample_rate, samples_float32)
stream.accept_waveform(wave_file_sample_rate, samples_float32)
tail_paddings = np.zeros(int(0.2 * sample_rate), dtype=np.float32)
stream.accept_waveform(sample_rate, tail_paddings)
tail_paddings = np.zeros(int(0.2 * wave_file_sample_rate), dtype=np.float32)
stream.accept_waveform(wave_file_sample_rate, tail_paddings)
stream.input_finished()

View File

@@ -100,7 +100,9 @@ def main():
recognizer = create_recognizer()
print("Started! Please speak")
sample_rate = 16000
# The model is using 16 kHz, we use 48 kHz here to demonstrate that
# sherpa-onnx will do resampling inside.
sample_rate = 48000
samples_per_read = int(0.1 * sample_rate) # 0.1 second = 100 ms
last_result = ""
stream = recognizer.create_stream()

View File

@@ -92,9 +92,12 @@ def create_recognizer():
def main():
print("Started! Please speak")
recognizer = create_recognizer()
sample_rate = 16000
print("Started! Please speak")
# The model is using 16 kHz, we use 48 kHz here to demonstrate that
# sherpa-onnx will do resampling inside.
sample_rate = 48000
samples_per_read = int(0.1 * sample_rate) # 0.1 second = 100 ms
last_result = ""
stream = recognizer.create_stream()