import torch from transformers import pipeline from flask import Flask, request, jsonify from threading import Lock asr_pipeline = pipeline( task="automatic-speech-recognition", model="/model", torch_dtype=torch.float16, device="cuda:0" ) app = Flask(__name__) pipeline_lock = Lock() @app.route('/recognition', methods=['POST']) def predict(): audio_data = request.data params = request.params params_json = json.loads(params) lang = params_json.get('language', 'en') with pipeline_lock: res = asr_pipeline(inputs=audio_data, generate_kwargs={"language": lang}) text = res['text'].strip() return jsonify({'RecognitionStatus': 'Success', "DisplayText": text}) @app.route('/ready', methods=['GET']) @app.route('/health', methods=['GET']) @app.route('/health_check', methods=['GET']) def health(): return jsonify({'status': 'ok'}) if __name__ == '__main__': app.run(host='0.0.0.0', port=80)