diff --git a/python-api-examples/speech-recognition-from-microphone-with-endpoint-detection.py b/python-api-examples/speech-recognition-from-microphone-with-endpoint-detection.py index 36f2b548..4ed67602 100755 --- a/python-api-examples/speech-recognition-from-microphone-with-endpoint-detection.py +++ b/python-api-examples/speech-recognition-from-microphone-with-endpoint-detection.py @@ -82,8 +82,7 @@ def get_args(): return parser.parse_args() -def create_recognizer(): - args = get_args() +def create_recognizer(args): assert_file_exists(args.encoder) assert_file_exists(args.decoder) assert_file_exists(args.joiner) @@ -110,7 +109,18 @@ def create_recognizer(): def main(): - recognizer = create_recognizer() + args = get_args() + + devices = sd.query_devices() + if len(devices) == 0: + print("No microphone devices found") + sys.exit(0) + + print(devices) + default_input_device_idx = sd.default.device[0] + print(f'Use default device: {devices[default_input_device_idx]["name"]}') + + recognizer = create_recognizer(args) print("Started! Please speak") # The model is using 16 kHz, we use 48 kHz here to demonstrate that @@ -145,10 +155,6 @@ def main(): if __name__ == "__main__": - devices = sd.query_devices() - print(devices) - default_input_device_idx = sd.default.device[0] - print(f'Use default device: {devices[default_input_device_idx]["name"]}') try: main() diff --git a/python-api-examples/speech-recognition-from-microphone.py b/python-api-examples/speech-recognition-from-microphone.py index 9723230c..9f6be910 100755 --- a/python-api-examples/speech-recognition-from-microphone.py +++ b/python-api-examples/speech-recognition-from-microphone.py @@ -10,7 +10,7 @@ import argparse import sys from pathlib import Path -from typing import List, Tuple +from typing import List import sentencepiece as spm try: @@ -136,8 +136,7 @@ def get_args(): return parser.parse_args() -def create_recognizer(): - args = get_args() +def create_recognizer(args): assert_file_exists(args.encoder) assert_file_exists(args.decoder) assert_file_exists(args.joiner) @@ -185,13 +184,22 @@ def encode_contexts(args, contexts: List[str]) -> List[List[int]]: def main(): args = get_args() + devices = sd.query_devices() + if len(devices) == 0: + print("No microphone devices found") + sys.exit(0) + + print(devices) + default_input_device_idx = sd.default.device[0] + print(f'Use default device: {devices[default_input_device_idx]["name"]}') + contexts_list = [] contexts = [x.strip().upper() for x in args.contexts.split("/") if x.strip()] if contexts: print(f"Contexts list: {contexts}") contexts_list = encode_contexts(args, contexts) - recognizer = create_recognizer() + recognizer = create_recognizer(args) print("Started! Please speak") # The model is using 16 kHz, we use 48 kHz here to demonstrate that @@ -217,11 +225,6 @@ def main(): if __name__ == "__main__": - devices = sd.query_devices() - print(devices) - default_input_device_idx = sd.default.device[0] - print(f'Use default device: {devices[default_input_device_idx]["name"]}') - try: main() except KeyboardInterrupt: