Fix model output Unicode U+2581 separator related issues
This commit is contained in:
@@ -168,6 +168,7 @@ def test_funasr(audio_file, lang):
|
|||||||
segment_path = f"temp_seg_{i}.wav"
|
segment_path = f"temp_seg_{i}.wav"
|
||||||
torchaudio.save(segment_path, segment, sample_rate)
|
torchaudio.save(segment_path, segment, sample_rate)
|
||||||
ts1 = time.time()
|
ts1 = time.time()
|
||||||
|
text = None
|
||||||
if model_type == "sensevoice":
|
if model_type == "sensevoice":
|
||||||
res = model.generate(
|
res = model.generate(
|
||||||
input=segment_path,
|
input=segment_path,
|
||||||
@@ -203,14 +204,12 @@ def test_funasr(audio_file, lang):
|
|||||||
# paraformer模型会一个字一个字输出,中间夹太多空格会影响1-cer的结果
|
# paraformer模型会一个字一个字输出,中间夹太多空格会影响1-cer的结果
|
||||||
if lang == "zh":
|
if lang == "zh":
|
||||||
text = text.replace(" ", "")
|
text = text.replace(" ", "")
|
||||||
text = text.replace("_", " ")
|
|
||||||
elif model_type == "conformer":
|
elif model_type == "conformer":
|
||||||
res = model.generate(
|
res = model.generate(
|
||||||
input=segment_path,
|
input=segment_path,
|
||||||
batch_size_s=300
|
batch_size_s=300
|
||||||
)
|
)
|
||||||
text = res[0]["text"]
|
text = res[0]["text"]
|
||||||
text = text.replace("_", " ")
|
|
||||||
# elif model_type == "uni_asr":
|
# elif model_type == "uni_asr":
|
||||||
# if i == 0:
|
# if i == 0:
|
||||||
# os.remove(segment_path)
|
# os.remove(segment_path)
|
||||||
@@ -221,6 +220,10 @@ def test_funasr(audio_file, lang):
|
|||||||
# text = res[0]["text"]
|
# text = res[0]["text"]
|
||||||
else:
|
else:
|
||||||
raise RuntimeError("unknown model type")
|
raise RuntimeError("unknown model type")
|
||||||
|
if text is not None:
|
||||||
|
# some models output "▁" (9601, Unicode U+2581) as separator between words, replace them with space for better readability
|
||||||
|
text = text.replace("_", " ")
|
||||||
|
text = text.replace(chr(9601), " ")
|
||||||
ts2 = time.time()
|
ts2 = time.time()
|
||||||
generated_text += text
|
generated_text += text
|
||||||
processing_time += (ts2 - ts1)
|
processing_time += (ts2 - ts1)
|
||||||
|
|||||||
Reference in New Issue
Block a user