51 lines
1.2 KiB
Python
51 lines
1.2 KiB
Python
from typing import List
|
||
|
||
from utils.tokenizer import TokenizerType
|
||
|
||
|
||
def replace_general_punc(
|
||
sentences: List[str], tokenizer: TokenizerType
|
||
) -> List[str]:
|
||
"""代替原来的函数 utils.metrics.cut_sentence"""
|
||
general_puncs = [
|
||
"······",
|
||
"......",
|
||
"。",
|
||
",",
|
||
"?",
|
||
"!",
|
||
";",
|
||
":",
|
||
"...",
|
||
".",
|
||
",",
|
||
"?",
|
||
"!",
|
||
";",
|
||
":",
|
||
]
|
||
if tokenizer == TokenizerType.whitespace:
|
||
replacer = " "
|
||
else:
|
||
replacer = ""
|
||
trans = str.maketrans(dict.fromkeys("".join(general_puncs), replacer))
|
||
ret_sentences = [""] * len(sentences)
|
||
for i, sentence in enumerate(sentences):
|
||
sentence = sentence.translate(trans)
|
||
sentence = sentence.strip()
|
||
sentence = sentence.lower()
|
||
ret_sentences[i] = sentence
|
||
return ret_sentences
|
||
|
||
|
||
def distance_point_line(
|
||
point: float, line_start: float, line_end: float
|
||
) -> float:
|
||
"""计算点到直线的距离"""
|
||
if line_start <= point <= line_end:
|
||
return 0
|
||
if point < line_start:
|
||
return abs(point - line_start)
|
||
else:
|
||
return abs(point - line_end)
|