commit 8c4e98688a0f39fbf2a22a019e9ea2bdc97cc852 Author: zhousha <736730048@qq.com> Date: Tue Aug 12 15:03:58 2025 +0800 merge code repo for sensevoice and whisper diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..4dbf4df Binary files /dev/null and b/.DS_Store differ diff --git a/Dockerfile_sensevoice b/Dockerfile_sensevoice new file mode 100644 index 0000000..eb4c345 --- /dev/null +++ b/Dockerfile_sensevoice @@ -0,0 +1,7 @@ +FROM corex:3.2.1 + +WORKDIR /workspace +COPY requirements_sensevoice.txt constraints_sensevoice.txt server_sensevoice.py launch_sensevoice.sh /workspace/ +RUN pip install -r requirements_sensevoice.txt -c constraints_sensevoice.txt + +ENTRYPOINT ["/bin/bash", "launch_sensevoice.sh"] \ No newline at end of file diff --git a/Dockerfile_whisper b/Dockerfile_whisper new file mode 100644 index 0000000..ab82865 --- /dev/null +++ b/Dockerfile_whisper @@ -0,0 +1,10 @@ +FROM zibo.harbor.iluvatar.com.cn:30000/saas/bi100-3.2.1-x86-ubuntu20.04-py3.10-poc-llm-infer:v1.2.2 + +RUN mkdir /workspace +WORKDIR /workspace/ + +COPY asr_server_whisper.py /workspace/asr_server_whisper.py +COPY ./launch_service /workspace/launch_service +COPY whisper-tiny /model + +ENTRYPOINT ["./launch_service"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..a3c0803 --- /dev/null +++ b/README.md @@ -0,0 +1,6 @@ +# tiangai100-sensevoice-funasr +# tiangai100-whisper + +【语音识别】 + +funasr可正常照nvidia方式使用。 diff --git a/asr_server_whisper.py b/asr_server_whisper.py new file mode 100644 index 0000000..73b2531 --- /dev/null +++ b/asr_server_whisper.py @@ -0,0 +1,34 @@ +import torch +from transformers import pipeline +from flask import Flask, request, jsonify +from threading import Lock + +asr_pipeline = pipeline( + task="automatic-speech-recognition", + model="/model", + torch_dtype=torch.float16, + device="cuda:0" +) + +app = Flask(__name__) +pipeline_lock = Lock() + +@app.route('/recognition', methods=['POST']) +def predict(): + audio_data = request.data + params = request.params + params_json = json.loads(params) + lang = params_json.get('language', 'en') + with pipeline_lock: + res = asr_pipeline(inputs=audio_data, generate_kwargs={"language": lang}) + text = res['text'].strip() + return jsonify({'RecognitionStatus': 'Success', "DisplayText": text}) + +@app.route('/ready', methods=['GET']) +@app.route('/health', methods=['GET']) +@app.route('/health_check', methods=['GET']) +def health(): + return jsonify({'status': 'ok'}) + +if __name__ == '__main__': + app.run(host='0.0.0.0', port=80) diff --git a/constraints_sensevoice.txt b/constraints_sensevoice.txt new file mode 100644 index 0000000..65d66ac --- /dev/null +++ b/constraints_sensevoice.txt @@ -0,0 +1 @@ +torch==2.1.0+corex.3.2.1 \ No newline at end of file diff --git a/launch_sensevoice.sh b/launch_sensevoice.sh new file mode 100755 index 0000000..e75b371 --- /dev/null +++ b/launch_sensevoice.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +python3 asr_server.py + diff --git a/launch_service b/launch_service new file mode 100755 index 0000000..3ad80ba --- /dev/null +++ b/launch_service @@ -0,0 +1,9 @@ +#!/bin/bash + +data +cat /proc/cpuinfo +ixsmi +export +date + +python3 asr.py diff --git a/requirements_sensevoice.txt b/requirements_sensevoice.txt new file mode 100644 index 0000000..8c9e4a6 --- /dev/null +++ b/requirements_sensevoice.txt @@ -0,0 +1,3 @@ +funasr +fastapi +uvicorn[standard] \ No newline at end of file diff --git a/server_sensevoice.py b/server_sensevoice.py new file mode 100644 index 0000000..a87e43c --- /dev/null +++ b/server_sensevoice.py @@ -0,0 +1,41 @@ +from fastapi import FastAPI, Request +from contextlib import asynccontextmanager +from funasr import AutoModel +from funasr.utils.postprocess_utils import rich_transcription_postprocess +import os +import torch + +model_dir = os.getenv("MODEL_DIR", "/model/iic/SenseVoiceSmall") +model = None + +@asynccontextmanager +async def lifespan(app: FastAPI): + global model + if model is None: + device = "cuda" if torch.cuda.is_available() else "cpu" + model = AutoModel(model=model_dir, disable_update=True, device=device) + yield + pass + +app = FastAPI(lifespan=lifespan) + +@app.post("/recognition") +async def asr(request: Request, language: str = "auto"): + audio_data = await request.body() + res = model.generate(input=audio_data, use_itn=True, ban_emo_unk=True) + text = rich_transcription_postprocess(res[0]["text"]) + return { + "RecognitionStatus": "Success", + "DisplayText": text + } + +@app.get("/health") +@app.get("/ready") +def ready(): + return {"status": "ok"} + + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=80) + diff --git a/whisper-tiny/.msc b/whisper-tiny/.msc new file mode 100644 index 0000000..850289a Binary files /dev/null and b/whisper-tiny/.msc differ diff --git a/whisper-tiny/.mv b/whisper-tiny/.mv new file mode 100644 index 0000000..2b25642 --- /dev/null +++ b/whisper-tiny/.mv @@ -0,0 +1 @@ +Revision:master,CreatedAt:1736300537 \ No newline at end of file