Fix paraformer Englishword split
This commit is contained in:
18
README.md
18
README.md
@@ -7,12 +7,28 @@ docker build -f ./Dockerfile.funasr-mr100 -t <your_image> .
|
|||||||
其中,基础镜像 corex:4.3.0 通过联系天数智芯智铠100厂商技术支持可获取
|
其中,基础镜像 corex:4.3.0 通过联系天数智芯智铠100厂商技术支持可获取
|
||||||
|
|
||||||
## 使用说明
|
## 使用说明
|
||||||
|
|
||||||
|
### 使用 FastAPI 测试ASR服务:
|
||||||
|
例如:
|
||||||
|
```shell
|
||||||
|
docker run -it --rm --name iluvatar_test_asr -p 23333:1111 \
|
||||||
|
--privileged \
|
||||||
|
-v /lib/modules:/lib/modules \
|
||||||
|
-v /dev:/dev \
|
||||||
|
-v /usr/src:/usr/src \
|
||||||
|
-v /mnt/gpfs/leaderboard/modelHubXC/iic/SenseVoiceSmall:/model \
|
||||||
|
-e CUDA_VISIBLE_DEVICES=0 \
|
||||||
|
--entrypoint python3 <IMAGE_NAME> main.py \
|
||||||
|
--port 1111 --model_dir /model --model_type sensevoice
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
### 快速镜像测试
|
### 快速镜像测试
|
||||||
对funasr的测试需要在以上构造好的镜像容器内测试,测试步骤
|
对funasr的测试需要在以上构造好的镜像容器内测试,测试步骤
|
||||||
1. 本项目中附带上了示例测试数据,音频文件为`lei-jun-test.wav`,音频的识别准确内容文件为`lei-jun.txt`,用户需要准备好相应的ASR模型路径,本例中假设我们已经下载好了SenseVoiceSmall模型存放于/model/SenseVoiceSmall
|
1. 本项目中附带上了示例测试数据,音频文件为`lei-jun-test.wav`,音频的识别准确内容文件为`lei-jun.txt`,用户需要准备好相应的ASR模型路径,本例中假设我们已经下载好了SenseVoiceSmall模型存放于/model/SenseVoiceSmall
|
||||||
2. 在本项目路径下执行以下快速测试命令
|
2. 在本项目路径下执行以下快速测试命令
|
||||||
```shell
|
```shell
|
||||||
docker run -it \
|
docker run -it \
|
||||||
-v /usr/src:/usr/src \
|
-v /usr/src:/usr/src \
|
||||||
-v /lib/modules:/lib/modules --device=/dev/iluvatar0:/dev/iluvatar0 \
|
-v /lib/modules:/lib/modules --device=/dev/iluvatar0:/dev/iluvatar0 \
|
||||||
-v $PWD:/tmp/workspace \
|
-v $PWD:/tmp/workspace \
|
||||||
|
|||||||
@@ -74,7 +74,7 @@ def split_audio(waveform, sample_rate, segment_seconds=20):
|
|||||||
@app.on_event("startup")
|
@app.on_event("startup")
|
||||||
def load_model():
|
def load_model():
|
||||||
global status, model, device
|
global status, model, device
|
||||||
|
|
||||||
config = app.state.config
|
config = app.state.config
|
||||||
use_gpu = config.get("use_gpu", True)
|
use_gpu = config.get("use_gpu", True)
|
||||||
model_dir = config.get("model_dir", "/model")
|
model_dir = config.get("model_dir", "/model")
|
||||||
@@ -85,7 +85,7 @@ def load_model():
|
|||||||
print(" model_type =", model_type, flush=True)
|
print(" model_type =", model_type, flush=True)
|
||||||
print(" use_gpu =", use_gpu, flush=True)
|
print(" use_gpu =", use_gpu, flush=True)
|
||||||
print(" warmup =", warmup, flush=True)
|
print(" warmup =", warmup, flush=True)
|
||||||
|
|
||||||
device = "cpu"
|
device = "cpu"
|
||||||
if use_gpu:
|
if use_gpu:
|
||||||
if CUSTOM_DEVICE.startswith("mlu"):
|
if CUSTOM_DEVICE.startswith("mlu"):
|
||||||
@@ -94,7 +94,7 @@ def load_model():
|
|||||||
device = "npu:1"
|
device = "npu:1"
|
||||||
else:
|
else:
|
||||||
device = "cuda:0"
|
device = "cuda:0"
|
||||||
|
|
||||||
# 针对加速卡的特殊处理部分
|
# 针对加速卡的特殊处理部分
|
||||||
if device == "cuda:0" and torch.cuda.get_device_name() == "Iluvatar BI-V100" and model_type == "whisper":
|
if device == "cuda:0" and torch.cuda.get_device_name() == "Iluvatar BI-V100" and model_type == "whisper":
|
||||||
# 天垓100情况下的Whisper需要绕过不支持算子
|
# 天垓100情况下的Whisper需要绕过不支持算子
|
||||||
@@ -102,14 +102,14 @@ def load_model():
|
|||||||
torch.backends.cuda.enable_mem_efficient_sdp(False)
|
torch.backends.cuda.enable_mem_efficient_sdp(False)
|
||||||
torch.backends.cuda.enable_math_sdp(True)
|
torch.backends.cuda.enable_math_sdp(True)
|
||||||
print(f"device: {device}", flush=True)
|
print(f"device: {device}", flush=True)
|
||||||
|
|
||||||
dense_convert = False
|
dense_convert = False
|
||||||
if device == "cuda:0" and CUSTOM_DEVICE.startswith("pt") and model_type == "whisper":
|
if device == "cuda:0" and CUSTOM_DEVICE.startswith("pt") and model_type == "whisper":
|
||||||
dense_convert = True
|
dense_convert = True
|
||||||
if device.startswith("npu") and model_type == "whisper":
|
if device.startswith("npu") and model_type == "whisper":
|
||||||
# Ascend NPU 加载whisper的部分会有Sparse部分device不匹配
|
# Ascend NPU 加载whisper的部分会有Sparse部分device不匹配
|
||||||
dense_convert = True
|
dense_convert = True
|
||||||
|
|
||||||
print(f"dense_convert: {dense_convert}", flush=True)
|
print(f"dense_convert: {dense_convert}", flush=True)
|
||||||
if dense_convert:
|
if dense_convert:
|
||||||
model = AutoModel(
|
model = AutoModel(
|
||||||
@@ -132,23 +132,23 @@ def load_model():
|
|||||||
device=device,
|
device=device,
|
||||||
disable_update=True
|
disable_update=True
|
||||||
)
|
)
|
||||||
|
|
||||||
if device.startswith("npu") or warmup:
|
if device.startswith("npu") or warmup:
|
||||||
# Ascend NPU由于底层设计的不同,初始化卡的调度比其他卡更复杂,要先进行warmup
|
# Ascend NPU由于底层设计的不同,初始化卡的调度比其他卡更复杂,要先进行warmup
|
||||||
print("Start warmup...", flush=True)
|
print("Start warmup...", flush=True)
|
||||||
res = model.generate(input="warmup.wav")
|
res = model.generate(input="warmup.wav")
|
||||||
print("warmup complete.", flush=True)
|
print("warmup complete.", flush=True)
|
||||||
|
|
||||||
status = "Success"
|
status = "Success"
|
||||||
|
|
||||||
|
|
||||||
def test_funasr(audio_file, lang):
|
def test_funasr(audio_file, lang):
|
||||||
# 推理部分
|
# 推理部分
|
||||||
waveform, sample_rate = torchaudio.load(audio_file)
|
waveform, sample_rate = torchaudio.load(audio_file)
|
||||||
# print(waveform.shape)
|
# print(waveform.shape)
|
||||||
duration = waveform.shape[1] / sample_rate
|
duration = waveform.shape[1] / sample_rate
|
||||||
segments = split_audio(waveform, sample_rate, segment_seconds=20)
|
segments = split_audio(waveform, sample_rate, segment_seconds=20)
|
||||||
|
|
||||||
generated_text = ""
|
generated_text = ""
|
||||||
processing_time = 0
|
processing_time = 0
|
||||||
model_type = app.state.config.get("model_type", "sensevoice")
|
model_type = app.state.config.get("model_type", "sensevoice")
|
||||||
@@ -201,7 +201,8 @@ def test_funasr(audio_file, lang):
|
|||||||
)
|
)
|
||||||
text = res[0]["text"]
|
text = res[0]["text"]
|
||||||
# paraformer模型会一个字一个字输出,中间夹太多空格会影响1-cer的结果
|
# paraformer模型会一个字一个字输出,中间夹太多空格会影响1-cer的结果
|
||||||
text = text.replace(" ", "")
|
if lang == "zh":
|
||||||
|
text = text.replace(" ", "")
|
||||||
elif model_type == "conformer":
|
elif model_type == "conformer":
|
||||||
res = model.generate(
|
res = model.generate(
|
||||||
input=segment_path,
|
input=segment_path,
|
||||||
@@ -222,7 +223,7 @@ def test_funasr(audio_file, lang):
|
|||||||
generated_text += text
|
generated_text += text
|
||||||
processing_time += (ts2 - ts1)
|
processing_time += (ts2 - ts1)
|
||||||
os.remove(segment_path)
|
os.remove(segment_path)
|
||||||
|
|
||||||
rtf = processing_time / duration
|
rtf = processing_time / duration
|
||||||
print("Text:", generated_text, flush=True)
|
print("Text:", generated_text, flush=True)
|
||||||
print(f"Audio duration:\t{duration:.3f} s", flush=True)
|
print(f"Audio duration:\t{duration:.3f} s", flush=True)
|
||||||
@@ -255,11 +256,11 @@ def transduce(
|
|||||||
f.write(audio.file.read())
|
f.write(audio.file.read())
|
||||||
background_tasks.add_task(os.remove, file_path)
|
background_tasks.add_task(os.remove, file_path)
|
||||||
generated_text = test_funasr(file_path, lang)
|
generated_text = test_funasr(file_path, lang)
|
||||||
|
|
||||||
return {"generated_text": generated_text}
|
return {"generated_text": generated_text}
|
||||||
except Exception:
|
except Exception:
|
||||||
raise HTTPException(status_code=500, detail=f"Processing failed: \n{traceback.format_exc()}")
|
raise HTTPException(status_code=500, detail=f"Processing failed: \n{traceback.format_exc()}")
|
||||||
|
|
||||||
# if __name__ == "__main__":
|
# if __name__ == "__main__":
|
||||||
|
|
||||||
# uvicorn.run("fastapi_funasr:app", host="0.0.0.0", port=1111, workers=1)
|
# uvicorn.run("fastapi_funasr:app", host="0.0.0.0", port=1111, workers=1)
|
||||||
|
|||||||
Reference in New Issue
Block a user