diff --git a/README.md b/README.md
index 7ffcb40..eb98089 100644
--- a/README.md
+++ b/README.md
@@ -7,12 +7,28 @@ docker build -f ./Dockerfile.funasr-mr100 -t <your_image> .
 其中，基础镜像 corex:4.3.0 通过联系天数智芯智铠100厂商技术支持可获取
 
 ## 使用说明
+
+### 使用 FastAPI 测试ASR服务：
+例如：
+```shell
+docker run -it --rm --name iluvatar_test_asr -p 23333:1111 \
+    --privileged \
+    -v /lib/modules:/lib/modules \
+    -v /dev:/dev \
+    -v /usr/src:/usr/src \
+    -v /mnt/gpfs/leaderboard/modelHubXC/iic/SenseVoiceSmall:/model \
+    -e CUDA_VISIBLE_DEVICES=0 \
+    --entrypoint python3 <IMAGE_NAME> main.py \
+    --port 1111 --model_dir /model --model_type sensevoice
+```
+
+
 ### 快速镜像测试
 对funasr的测试需要在以上构造好的镜像容器内测试，测试步骤
 1. 本项目中附带上了示例测试数据，音频文件为`lei-jun-test.wav`，音频的识别准确内容文件为`lei-jun.txt`，用户需要准备好相应的ASR模型路径，本例中假设我们已经下载好了SenseVoiceSmall模型存放于/model/SenseVoiceSmall
 2. 在本项目路径下执行以下快速测试命令
 ```shell
-docker run -it \ 
+docker run -it \
     -v /usr/src:/usr/src \
     -v /lib/modules:/lib/modules --device=/dev/iluvatar0:/dev/iluvatar0 \
     -v $PWD:/tmp/workspace \
diff --git a/fastapi_funasr.py b/fastapi_funasr.py
index a26363b..4a15382 100644
--- a/fastapi_funasr.py
+++ b/fastapi_funasr.py
@@ -74,7 +74,7 @@ def split_audio(waveform, sample_rate, segment_seconds=20):
 @app.on_event("startup")
 def load_model():
     global status, model, device
-    
+
     config = app.state.config
     use_gpu = config.get("use_gpu", True)
     model_dir = config.get("model_dir", "/model")
@@ -85,7 +85,7 @@ def load_model():
     print("   model_type =", model_type, flush=True)
     print("   use_gpu =", use_gpu, flush=True)
     print("   warmup =", warmup, flush=True)
-    
+
     device = "cpu"
     if use_gpu:
         if CUSTOM_DEVICE.startswith("mlu"):
@@ -94,7 +94,7 @@ def load_model():
             device = "npu:1"
         else:
             device = "cuda:0"
-    
+
     # 针对加速卡的特殊处理部分
     if device == "cuda:0" and torch.cuda.get_device_name() == "Iluvatar BI-V100" and model_type == "whisper":
         # 天垓100情况下的Whisper需要绕过不支持算子
@@ -102,14 +102,14 @@ def load_model():
         torch.backends.cuda.enable_mem_efficient_sdp(False)
         torch.backends.cuda.enable_math_sdp(True)
     print(f"device: {device}", flush=True)
-    
+
     dense_convert = False
     if device == "cuda:0" and CUSTOM_DEVICE.startswith("pt") and model_type == "whisper":
         dense_convert = True
     if device.startswith("npu") and model_type == "whisper":
         # Ascend NPU 加载whisper的部分会有Sparse部分device不匹配
         dense_convert = True
-    
+
     print(f"dense_convert: {dense_convert}", flush=True)
     if dense_convert:
         model = AutoModel(
@@ -132,23 +132,23 @@ def load_model():
             device=device,
             disable_update=True
         )
-        
+
     if device.startswith("npu") or warmup:
         # Ascend NPU由于底层设计的不同，初始化卡的调度比其他卡更复杂，要先进行warmup
         print("Start warmup...", flush=True)
         res = model.generate(input="warmup.wav")
         print("warmup complete.", flush=True)
-    
+
     status = "Success"
-    
-    
+
+
 def test_funasr(audio_file, lang):
     # 推理部分
     waveform, sample_rate = torchaudio.load(audio_file)
     # print(waveform.shape)
     duration = waveform.shape[1] / sample_rate
     segments = split_audio(waveform, sample_rate, segment_seconds=20)
-    
+
     generated_text = ""
     processing_time = 0
     model_type = app.state.config.get("model_type", "sensevoice")
@@ -201,7 +201,8 @@ def test_funasr(audio_file, lang):
                 )
                 text = res[0]["text"]
                 # paraformer模型会一个字一个字输出，中间夹太多空格会影响1-cer的结果
-                text = text.replace(" ", "")
+                if lang == "zh":
+                    text = text.replace(" ", "")
             elif model_type == "conformer":
                 res = model.generate(
                     input=segment_path,
@@ -222,7 +223,7 @@ def test_funasr(audio_file, lang):
             generated_text += text
             processing_time += (ts2 - ts1)
             os.remove(segment_path)
-    
+
     rtf = processing_time / duration
     print("Text:", generated_text, flush=True)
     print(f"Audio duration:\t{duration:.3f} s", flush=True)
@@ -255,11 +256,11 @@ def transduce(
             f.write(audio.file.read())
         background_tasks.add_task(os.remove, file_path)
         generated_text = test_funasr(file_path, lang)
-        
+
         return {"generated_text": generated_text}
     except Exception:
-        raise HTTPException(status_code=500, detail=f"Processing failed: \n{traceback.format_exc()}")    
+        raise HTTPException(status_code=500, detail=f"Processing failed: \n{traceback.format_exc()}")
 
 # if __name__ == "__main__":
-    
+
 #     uvicorn.run("fastapi_funasr:app", host="0.0.0.0", port=1111, workers=1)