enginex-bi_series-asr-backup/bi_v100-whisper/asr_server_whisper.py

import torch
from transformers import pipeline
from flask import Flask, request, jsonify
from threading import Lock

asr_pipeline = pipeline(
    task="automatic-speech-recognition",
    model="/model",
    torch_dtype=torch.float16,
    device="cuda:0"
)

app = Flask(__name__)
pipeline_lock = Lock()

@app.route('/recognition', methods=['POST'])
def predict():
    audio_data = request.data
    params = request.params
    params_json = json.loads(params)
    lang = params_json.get('language', 'en')
    with pipeline_lock:
        res = asr_pipeline(inputs=audio_data, generate_kwargs={"language": lang})
    text = res['text'].strip()
    return jsonify({'RecognitionStatus': 'Success', "DisplayText": text})

@app.route('/ready', methods=['GET'])
@app.route('/health', methods=['GET'])
@app.route('/health_check', methods=['GET'])
def health():
    return jsonify({'status': 'ok'})

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=80)