Files
enginex-bi_series-vc-cnn/utils/metrics_plus.py

51 lines
1.2 KiB
Python
Raw Normal View History

2025-08-06 15:38:55 +08:00
from typing import List
from utils.tokenizer import TokenizerType
def replace_general_punc(
sentences: List[str], tokenizer: TokenizerType
) -> List[str]:
"""代替原来的函数 utils.metrics.cut_sentence"""
general_puncs = [
"······",
"......",
"",
"",
"",
"",
"",
"",
"...",
".",
",",
"?",
"!",
";",
":",
]
if tokenizer == TokenizerType.whitespace:
replacer = " "
else:
replacer = ""
trans = str.maketrans(dict.fromkeys("".join(general_puncs), replacer))
ret_sentences = [""] * len(sentences)
for i, sentence in enumerate(sentences):
sentence = sentence.translate(trans)
sentence = sentence.strip()
sentence = sentence.lower()
ret_sentences[i] = sentence
return ret_sentences
def distance_point_line(
point: float, line_start: float, line_end: float
) -> float:
"""计算点到直线的距离"""
if line_start <= point <= line_end:
return 0
if point < line_start:
return abs(point - line_start)
else:
return abs(point - line_end)