init ascend tts

2025-09-05 11:27:43 +08:00
parent d53ac91bb6
commit b92a65b0fa
602 changed files with 590901 additions and 1 deletions
--- a/ascend_910-gpt-sovits/GPT-SoVITS/GPT_SoVITS/text/zh_normalization/README.md
+++ b/ascend_910-gpt-sovits/GPT-SoVITS/GPT_SoVITS/text/zh_normalization/README.md
@@ -0,0 +1,16 @@
+## Supported NSW (Non-Standard-Word) Normalization
+
+|NSW type|raw|normalized|
+|:--|:-|:-|
+|serial number|电影中梁朝伟扮演的陈永仁的编号27149|电影中梁朝伟扮演的陈永仁的编号二七一四九|
+|cardinal|这块黄金重达324.75克<br>我们班的最高总分为583分|这块黄金重达三百二十四点七五克<br>我们班的最高总分为五百八十三分|
+|numeric range |12\~23<br>-1.5\~2|十二到二十三<br>负一点五到二|
+|date|她出生于86年8月18日，她弟弟出生于1995年3月1日|她出生于八六年八月十八日， 她弟弟出生于一九九五年三月一日|
+|time|等会请在12:05请通知我|等会请在十二点零五分请通知我
+|temperature|今天的最低气温达到-10°C|今天的最低气温达到零下十度
+|fraction|现场有7/12的观众投出了赞成票|现场有十二分之七的观众投出了赞成票|
+|percentage|明天有62％的概率降雨|明天有百分之六十二的概率降雨|
+|money|随便来几个价格12块5，34.5元，20.1万|随便来几个价格十二块五，三十四点五元，二十点一万|
+|telephone|这是固话0421-33441122<br>这是手机+86 18544139121|这是固话零四二一三三四四一一二二<br>这是手机八六一八五四四一三九一二一|
+## References
+[Pull requests #658 of DeepSpeech](https://github.com/PaddlePaddle/DeepSpeech/pull/658/files)
--- a/ascend_910-gpt-sovits/GPT-SoVITS/GPT_SoVITS/text/zh_normalization/init.py
+++ b/ascend_910-gpt-sovits/GPT-SoVITS/GPT_SoVITS/text/zh_normalization/init.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from text.zh_normalization.text_normlization import *
--- a/ascend_910-gpt-sovits/GPT-SoVITS/GPT_SoVITS/text/zh_normalization/char_convert.py
+++ b/ascend_910-gpt-sovits/GPT-SoVITS/GPT_SoVITS/text/zh_normalization/char_convert.py
--- a/ascend_910-gpt-sovits/GPT-SoVITS/GPT_SoVITS/text/zh_normalization/chronology.py
+++ b/ascend_910-gpt-sovits/GPT-SoVITS/GPT_SoVITS/text/zh_normalization/chronology.py
@@ -0,0 +1,139 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import re
+
+from .num import DIGITS
+from .num import num2str
+from .num import verbalize_cardinal
+from .num import verbalize_digit
+
+
+def _time_num2str(num_string: str) -> str:
+    """A special case for verbalizing number in time."""
+    result = num2str(num_string.lstrip("0"))
+    if num_string.startswith("0"):
+        result = DIGITS["0"] + result
+    return result
+
+
+# 时刻表达式
+RE_TIME = re.compile(
+    r"([0-1]?[0-9]|2[0-3])"
+    r":([0-5][0-9])"
+    r"(:([0-5][0-9]))?"
+)
+
+# 时间范围，如8:30-12:30
+RE_TIME_RANGE = re.compile(
+    r"([0-1]?[0-9]|2[0-3])"
+    r":([0-5][0-9])"
+    r"(:([0-5][0-9]))?"
+    r"(~|-)"
+    r"([0-1]?[0-9]|2[0-3])"
+    r":([0-5][0-9])"
+    r"(:([0-5][0-9]))?"
+)
+
+
+def replace_time(match) -> str:
+    """
+    Args:
+        match (re.Match)
+    Returns:
+        str
+    """
+
+    is_range = len(match.groups()) > 5
+
+    hour = match.group(1)
+    minute = match.group(2)
+    second = match.group(4)
+
+    if is_range:
+        hour_2 = match.group(6)
+        minute_2 = match.group(7)
+        second_2 = match.group(9)
+
+    result = f"{num2str(hour)}点"
+    if minute.lstrip("0"):
+        if int(minute) == 30:
+            result += "半"
+        else:
+            result += f"{_time_num2str(minute)}分"
+    if second and second.lstrip("0"):
+        result += f"{_time_num2str(second)}秒"
+
+    if is_range:
+        result += "至"
+        result += f"{num2str(hour_2)}点"
+        if minute_2.lstrip("0"):
+            if int(minute) == 30:
+                result += "半"
+            else:
+                result += f"{_time_num2str(minute_2)}分"
+        if second_2 and second_2.lstrip("0"):
+            result += f"{_time_num2str(second_2)}秒"
+
+    return result
+
+
+RE_DATE = re.compile(
+    r"(\d{4}|\d{2})年"
+    r"((0?[1-9]|1[0-2])月)?"
+    r"(((0?[1-9])|((1|2)[0-9])|30|31)([日号]))?"
+)
+
+
+def replace_date(match) -> str:
+    """
+    Args:
+        match (re.Match)
+    Returns:
+        str
+    """
+    year = match.group(1)
+    month = match.group(3)
+    day = match.group(5)
+    result = ""
+    if year:
+        result += f"{verbalize_digit(year)}年"
+    if month:
+        result += f"{verbalize_cardinal(month)}月"
+    if day:
+        result += f"{verbalize_cardinal(day)}{match.group(9)}"
+    return result
+
+
+# 用 / 或者 - 分隔的 YY/MM/DD 或者 YY-MM-DD 日期
+RE_DATE2 = re.compile(r"(\d{4})([- /.])(0[1-9]|1[012])\2(0[1-9]|[12][0-9]|3[01])")
+
+
+def replace_date2(match) -> str:
+    """
+    Args:
+        match (re.Match)
+    Returns:
+        str
+    """
+    year = match.group(1)
+    month = match.group(3)
+    day = match.group(4)
+    result = ""
+    if year:
+        result += f"{verbalize_digit(year)}年"
+    if month:
+        result += f"{verbalize_cardinal(month)}月"
+    if day:
+        result += f"{verbalize_cardinal(day)}日"
+    return result
--- a/ascend_910-gpt-sovits/GPT-SoVITS/GPT_SoVITS/text/zh_normalization/constants.py
+++ b/ascend_910-gpt-sovits/GPT-SoVITS/GPT_SoVITS/text/zh_normalization/constants.py
@@ -0,0 +1,62 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import re
+import string
+
+from pypinyin.constants import SUPPORT_UCS4
+
+# 全角半角转换
+# 英文字符全角 -> 半角映射表 (num: 52)
+F2H_ASCII_LETTERS = {ord(char) + 65248: ord(char) for char in string.ascii_letters}
+
+# 英文字符半角 -> 全角映射表
+H2F_ASCII_LETTERS = {value: key for key, value in F2H_ASCII_LETTERS.items()}
+
+# 数字字符全角 -> 半角映射表 (num: 10)
+F2H_DIGITS = {ord(char) + 65248: ord(char) for char in string.digits}
+# 数字字符半角 -> 全角映射表
+H2F_DIGITS = {value: key for key, value in F2H_DIGITS.items()}
+
+# 标点符号全角 -> 半角映射表 (num: 32)
+F2H_PUNCTUATIONS = {ord(char) + 65248: ord(char) for char in string.punctuation}
+# 标点符号半角 -> 全角映射表
+H2F_PUNCTUATIONS = {value: key for key, value in F2H_PUNCTUATIONS.items()}
+
+# 空格 (num: 1)
+F2H_SPACE = {"\u3000": " "}
+H2F_SPACE = {" ": "\u3000"}
+
+# 非"有拼音的汉字"的字符串，可用于NSW提取
+if SUPPORT_UCS4:
+    RE_NSW = re.compile(
+        r"(?:[^"
+        r"\u3007"  # 〇
+        r"\u3400-\u4dbf"  # CJK扩展A:[3400-4DBF]
+        r"\u4e00-\u9fff"  # CJK基本:[4E00-9FFF]
+        r"\uf900-\ufaff"  # CJK兼容:[F900-FAFF]
+        r"\U00020000-\U0002A6DF"  # CJK扩展B:[20000-2A6DF]
+        r"\U0002A703-\U0002B73F"  # CJK扩展C:[2A700-2B73F]
+        r"\U0002B740-\U0002B81D"  # CJK扩展D:[2B740-2B81D]
+        r"\U0002F80A-\U0002FA1F"  # CJK兼容扩展:[2F800-2FA1F]
+        r"])+"
+    )
+else:
+    RE_NSW = re.compile(  # pragma: no cover
+        r"(?:[^"
+        r"\u3007"  # 〇
+        r"\u3400-\u4dbf"  # CJK扩展A:[3400-4DBF]
+        r"\u4e00-\u9fff"  # CJK基本:[4E00-9FFF]
+        r"\uf900-\ufaff"  # CJK兼容:[F900-FAFF]
+        r"])+"
+    )
--- a/ascend_910-gpt-sovits/GPT-SoVITS/GPT_SoVITS/text/zh_normalization/num.py
+++ b/ascend_910-gpt-sovits/GPT-SoVITS/GPT_SoVITS/text/zh_normalization/num.py
@@ -0,0 +1,339 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Rules to verbalize numbers into Chinese characters.
+https://zh.wikipedia.org/wiki/中文数字#現代中文
+"""
+
+import re
+from collections import OrderedDict
+from typing import List
+
+DIGITS = {str(i): tran for i, tran in enumerate("零一二三四五六七八九")}
+UNITS = OrderedDict(
+    {
+        1: "十",
+        2: "百",
+        3: "千",
+        4: "万",
+        8: "亿",
+    }
+)
+
+COM_QUANTIFIERS = "(处|台|架|枚|趟|幅|平|方|堵|间|床|株|批|项|例|列|篇|栋|注|亩|封|艘|把|目|套|段|人|所|朵|匹|张|座|回|场|尾|条|个|首|阙|阵|网|炮|顶|丘|棵|只|支|袭|辆|挑|担|颗|壳|窠|曲|墙|群|腔|砣|座|客|贯|扎|捆|刀|令|打|手|罗|坡|山|岭|江|溪|钟|队|单|双|对|出|口|头|脚|板|跳|枝|件|贴|针|线|管|名|位|身|堂|课|本|页|家|户|层|丝|毫|厘|分|钱|两|斤|担|铢|石|钧|锱|忽|(千|毫|微)克|毫|厘|(公)分|分|寸|尺|丈|里|寻|常|铺|程|(千|分|厘|毫|微)米|米|撮|勺|合|升|斗|石|盘|碗|碟|叠|桶|笼|盆|盒|杯|钟|斛|锅|簋|篮|盘|桶|罐|瓶|壶|卮|盏|箩|箱|煲|啖|袋|钵|年|月|日|季|刻|时|周|天|秒|分|小时|旬|纪|岁|世|更|夜|春|夏|秋|冬|代|伏|辈|丸|泡|粒|颗|幢|堆|条|根|支|道|面|片|张|颗|块|元|(亿|千万|百万|万|千|百)|(亿|千万|百万|万|千|百|美|)元|(亿|千万|百万|万|千|百|十|)吨|(亿|千万|百万|万|千|百|)块|角|毛|分)"
+
+# 分数表达式
+RE_FRAC = re.compile(r"(-?)(\d+)/(\d+)")
+
+
+def replace_frac(match) -> str:
+    """
+    Args:
+        match (re.Match)
+    Returns:
+        str
+    """
+    sign = match.group(1)
+    nominator = match.group(2)
+    denominator = match.group(3)
+    sign: str = "负" if sign else ""
+    nominator: str = num2str(nominator)
+    denominator: str = num2str(denominator)
+    result = f"{sign}{denominator}分之{nominator}"
+    return result
+
+
+# 百分数表达式
+RE_PERCENTAGE = re.compile(r"(-?)(\d+(\.\d+)?)%")
+
+
+def replace_percentage(match) -> str:
+    """
+    Args:
+        match (re.Match)
+    Returns:
+        str
+    """
+    sign = match.group(1)
+    percent = match.group(2)
+    sign: str = "负" if sign else ""
+    percent: str = num2str(percent)
+    result = f"{sign}百分之{percent}"
+    return result
+
+
+# 整数表达式
+# 带负号的整数 -10
+RE_INTEGER = re.compile(r"(-)" r"(\d+)")
+
+
+def replace_negative_num(match) -> str:
+    """
+    Args:
+        match (re.Match)
+    Returns:
+        str
+    """
+    sign = match.group(1)
+    number = match.group(2)
+    sign: str = "负" if sign else ""
+    number: str = num2str(number)
+    result = f"{sign}{number}"
+    return result
+
+
+# 编号-无符号整形
+# 00078
+RE_DEFAULT_NUM = re.compile(r"\d{3}\d*")
+
+
+def replace_default_num(match):
+    """
+    Args:
+        match (re.Match)
+    Returns:
+        str
+    """
+    number = match.group(0)
+    return verbalize_digit(number, alt_one=True)
+
+
+# 加减乘除
+# RE_ASMD = re.compile(
+#     r'((-?)((\d+)(\.\d+)?)|(\.(\d+)))([\+\-\×÷=])((-?)((\d+)(\.\d+)?)|(\.(\d+)))')
+RE_ASMD = re.compile(
+    r"((-?)((\d+)(\.\d+)?[⁰¹²³⁴⁵⁶⁷⁸⁹ˣʸⁿ]*)|(\.\d+[⁰¹²³⁴⁵⁶⁷⁸⁹ˣʸⁿ]*)|([A-Za-z][⁰¹²³⁴⁵⁶⁷⁸⁹ˣʸⁿ]*))([\+\-\×÷=])((-?)((\d+)(\.\d+)?[⁰¹²³⁴⁵⁶⁷⁸⁹ˣʸⁿ]*)|(\.\d+[⁰¹²³⁴⁵⁶⁷⁸⁹ˣʸⁿ]*)|([A-Za-z][⁰¹²³⁴⁵⁶⁷⁸⁹ˣʸⁿ]*))"
+)
+
+asmd_map = {"+": "加", "-": "减", "×": "乘", "÷": "除", "=": "等于"}
+
+
+def replace_asmd(match) -> str:
+    """
+    Args:
+        match (re.Match)
+    Returns:
+        str
+    """
+    result = match.group(1) + asmd_map[match.group(8)] + match.group(9)
+    return result
+
+
+# 次方专项
+RE_POWER = re.compile(r"[⁰¹²³⁴⁵⁶⁷⁸⁹ˣʸⁿ]+")
+
+power_map = {
+    "⁰": "0",
+    "¹": "1",
+    "²": "2",
+    "³": "3",
+    "⁴": "4",
+    "⁵": "5",
+    "⁶": "6",
+    "⁷": "7",
+    "⁸": "8",
+    "⁹": "9",
+    "ˣ": "x",
+    "ʸ": "y",
+    "ⁿ": "n",
+}
+
+
+def replace_power(match) -> str:
+    """
+    Args:
+        match (re.Match)
+    Returns:
+        str
+    """
+    power_num = ""
+    for m in match.group(0):
+        power_num += power_map[m]
+    result = "的" + power_num + "次方"
+    return result
+
+
+# 数字表达式
+# 纯小数
+RE_DECIMAL_NUM = re.compile(r"(-?)((\d+)(\.\d+))" r"|(\.(\d+))")
+# 正整数 + 量词
+RE_POSITIVE_QUANTIFIERS = re.compile(r"(\d+)([多余几\+])?" + COM_QUANTIFIERS)
+RE_NUMBER = re.compile(r"(-?)((\d+)(\.\d+)?)" r"|(\.(\d+))")
+
+
+def replace_positive_quantifier(match) -> str:
+    """
+    Args:
+        match (re.Match)
+    Returns:
+        str
+    """
+    number = match.group(1)
+    match_2 = match.group(2)
+    if match_2 == "+":
+        match_2 = "多"
+    match_2: str = match_2 if match_2 else ""
+    quantifiers: str = match.group(3)
+    number: str = num2str(number)
+    number = "两" if number == "二" else number
+    result = f"{number}{match_2}{quantifiers}"
+    return result
+
+
+def replace_number(match) -> str:
+    """
+    Args:
+        match (re.Match)
+    Returns:
+        str
+    """
+    sign = match.group(1)
+    number = match.group(2)
+    pure_decimal = match.group(5)
+    if pure_decimal:
+        result = num2str(pure_decimal)
+    else:
+        sign: str = "负" if sign else ""
+        number: str = num2str(number)
+        result = f"{sign}{number}"
+    return result
+
+
+# 范围表达式
+# match.group(1) and match.group(8) are copy from RE_NUMBER
+
+RE_RANGE = re.compile(
+    r"""
+    (?<![\d\+\-\×÷=])      # 使用反向前瞻以确保数字范围之前没有其他数字和操作符
+    ((-?)((\d+)(\.\d+)?))  # 匹配范围起始的负数或正数（整数或小数）
+    [-~]                   # 匹配范围分隔符
+    ((-?)((\d+)(\.\d+)?))  # 匹配范围结束的负数或正数（整数或小数）
+    (?![\d\+\-\×÷=])       # 使用正向前瞻以确保数字范围之后没有其他数字和操作符
+    """,
+    re.VERBOSE,
+)
+
+
+def replace_range(match) -> str:
+    """
+    Args:
+        match (re.Match)
+    Returns:
+        str
+    """
+    first, second = match.group(1), match.group(6)
+    first = RE_NUMBER.sub(replace_number, first)
+    second = RE_NUMBER.sub(replace_number, second)
+    result = f"{first}到{second}"
+    return result
+
+
+# ~至表达式
+RE_TO_RANGE = re.compile(
+    r"((-?)((\d+)(\.\d+)?)|(\.(\d+)))(%|°C|℃|度|摄氏度|cm2|cm²|cm3|cm³|cm|db|ds|kg|km|m2|m²|m³|m3|ml|m|mm|s)[~]((-?)((\d+)(\.\d+)?)|(\.(\d+)))(%|°C|℃|度|摄氏度|cm2|cm²|cm3|cm³|cm|db|ds|kg|km|m2|m²|m³|m3|ml|m|mm|s)"
+)
+
+
+def replace_to_range(match) -> str:
+    """
+    Args:
+        match (re.Match)
+    Returns:
+        str
+    """
+    result = match.group(0).replace("~", "至")
+    return result
+
+
+RE_VERSION_NUM = re.compile(r"((\d+)(\.\d+)(\.\d+)?(\.\d+)+)")
+def replace_vrsion_num(match) -> str:
+    """
+    Args:
+        match (re.Match)
+    Returns:
+        str
+    """
+    result = ""
+    for c in match.group(1):
+        if c == ".":
+            result += "点"
+        else:
+            result += num2str(c)
+    return result
+
+
+
+def _get_value(value_string: str, use_zero: bool = True) -> List[str]:
+    stripped = value_string.lstrip("0")
+    if len(stripped) == 0:
+        return []
+    elif len(stripped) == 1:
+        if use_zero and len(stripped) < len(value_string):
+            return [DIGITS["0"], DIGITS[stripped]]
+        else:
+            return [DIGITS[stripped]]
+    else:
+        largest_unit = next(power for power in reversed(UNITS.keys()) if power < len(stripped))
+        first_part = value_string[:-largest_unit]
+        second_part = value_string[-largest_unit:]
+        return _get_value(first_part) + [UNITS[largest_unit]] + _get_value(second_part)
+
+
+def verbalize_cardinal(value_string: str) -> str:
+    if not value_string:
+        return ""
+
+    # 000 -> '零' , 0 -> '零'
+    value_string = value_string.lstrip("0")
+    if len(value_string) == 0:
+        return DIGITS["0"]
+
+    result_symbols = _get_value(value_string)
+    # verbalized number starting with '一十*' is abbreviated as `十*`
+    if len(result_symbols) >= 2 and result_symbols[0] == DIGITS["1"] and result_symbols[1] == UNITS[1]:
+        result_symbols = result_symbols[1:]
+    return "".join(result_symbols)
+
+
+def verbalize_digit(value_string: str, alt_one=False) -> str:
+    result_symbols = [DIGITS[digit] for digit in value_string]
+    result = "".join(result_symbols)
+    if alt_one:
+        result = result.replace("一", "幺")
+    return result
+
+
+def num2str(value_string: str) -> str:
+    integer_decimal = value_string.split(".")
+    if len(integer_decimal) == 1:
+        integer = integer_decimal[0]
+        decimal = ""
+    elif len(integer_decimal) == 2:
+        integer, decimal = integer_decimal
+    else:
+        raise ValueError(f"The value string: '${value_string}' has more than one point in it.")
+
+    result = verbalize_cardinal(integer)
+
+    if decimal.endswith("0"):
+        decimal = decimal.rstrip("0") + "0"
+    else:
+        decimal = decimal.rstrip("0")
+
+    if decimal:
+        # '.22' is verbalized as '零点二二'
+        # '3.20' is verbalized as '三点二
+        result = result if result else "零"
+        result += "点" + verbalize_digit(decimal)
+    return result
--- a/ascend_910-gpt-sovits/GPT-SoVITS/GPT_SoVITS/text/zh_normalization/phonecode.py
+++ b/ascend_910-gpt-sovits/GPT-SoVITS/GPT_SoVITS/text/zh_normalization/phonecode.py
@@ -0,0 +1,59 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import re
+
+from .num import verbalize_digit
+
+# 规范化固话/手机号码
+# 手机
+# http://www.jihaoba.com/news/show/13680
+# 移动：139、138、137、136、135、134、159、158、157、150、151、152、188、187、182、183、184、178、198
+# 联通：130、131、132、156、155、186、185、176
+# 电信：133、153、189、180、181、177
+RE_MOBILE_PHONE = re.compile(r"(?<!\d)((\+?86 ?)?1([38]\d|5[0-35-9]|7[678]|9[89])\d{8})(?!\d)")
+RE_TELEPHONE = re.compile(r"(?<!\d)((0(10|2[1-3]|[3-9]\d{2})-?)?[1-9]\d{6,7})(?!\d)")
+
+# 全国统一的号码400开头
+RE_NATIONAL_UNIFORM_NUMBER = re.compile(r"(400)(-)?\d{3}(-)?\d{4}")
+
+
+def phone2str(phone_string: str, mobile=True) -> str:
+    if mobile:
+        sp_parts = phone_string.strip("+").split()
+        result = "，".join([verbalize_digit(part, alt_one=True) for part in sp_parts])
+        return result
+    else:
+        sil_parts = phone_string.split("-")
+        result = "，".join([verbalize_digit(part, alt_one=True) for part in sil_parts])
+        return result
+
+
+def replace_phone(match) -> str:
+    """
+    Args:
+        match (re.Match)
+    Returns:
+        str
+    """
+    return phone2str(match.group(0), mobile=False)
+
+
+def replace_mobile(match) -> str:
+    """
+    Args:
+        match (re.Match)
+    Returns:
+        str
+    """
+    return phone2str(match.group(0))
--- a/ascend_910-gpt-sovits/GPT-SoVITS/GPT_SoVITS/text/zh_normalization/quantifier.py
+++ b/ascend_910-gpt-sovits/GPT-SoVITS/GPT_SoVITS/text/zh_normalization/quantifier.py
@@ -0,0 +1,63 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import re
+
+from .num import num2str
+
+# 温度表达式，温度会影响负号的读法
+# -3°C 零下三度
+RE_TEMPERATURE = re.compile(r"(-?)(\d+(\.\d+)?)(°C|℃|度|摄氏度)")
+measure_dict = {
+    "cm2": "平方厘米",
+    "cm²": "平方厘米",
+    "cm3": "立方厘米",
+    "cm³": "立方厘米",
+    "cm": "厘米",
+    "db": "分贝",
+    "ds": "毫秒",
+    "kg": "千克",
+    "km": "千米",
+    "m2": "平方米",
+    "m²": "平方米",
+    "m³": "立方米",
+    "m3": "立方米",
+    "ml": "毫升",
+    "m": "米",
+    "mm": "毫米",
+    "s": "秒",
+}
+
+
+def replace_temperature(match) -> str:
+    """
+    Args:
+        match (re.Match)
+    Returns:
+        str
+    """
+    sign = match.group(1)
+    temperature = match.group(2)
+    unit = match.group(3)
+    sign: str = "零下" if sign else ""
+    temperature: str = num2str(temperature)
+    unit: str = "摄氏度" if unit == "摄氏度" else "度"
+    result = f"{sign}{temperature}{unit}"
+    return result
+
+
+def replace_measure(sentence) -> str:
+    for q_notation in measure_dict:
+        if q_notation in sentence:
+            sentence = sentence.replace(q_notation, measure_dict[q_notation])
+    return sentence
--- a/ascend_910-gpt-sovits/GPT-SoVITS/GPT_SoVITS/text/zh_normalization/text_normlization.py
+++ b/ascend_910-gpt-sovits/GPT-SoVITS/GPT_SoVITS/text/zh_normalization/text_normlization.py
@@ -0,0 +1,175 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import re
+from typing import List
+
+from .char_convert import tranditional_to_simplified
+from .chronology import RE_DATE
+from .chronology import RE_DATE2
+from .chronology import RE_TIME
+from .chronology import RE_TIME_RANGE
+from .chronology import replace_date
+from .chronology import replace_date2
+from .chronology import replace_time
+from .constants import F2H_ASCII_LETTERS
+from .constants import F2H_DIGITS
+from .constants import F2H_SPACE
+from .num import RE_VERSION_NUM
+from .num import RE_DECIMAL_NUM
+from .num import RE_DEFAULT_NUM
+from .num import RE_FRAC
+from .num import RE_INTEGER
+from .num import RE_NUMBER
+from .num import RE_PERCENTAGE
+from .num import RE_POSITIVE_QUANTIFIERS
+from .num import RE_RANGE
+from .num import RE_TO_RANGE
+from .num import RE_ASMD
+from .num import RE_POWER
+from .num import replace_vrsion_num
+from .num import replace_default_num
+from .num import replace_frac
+from .num import replace_negative_num
+from .num import replace_number
+from .num import replace_percentage
+from .num import replace_positive_quantifier
+from .num import replace_range
+from .num import replace_to_range
+from .num import replace_asmd
+from .num import replace_power
+from .phonecode import RE_MOBILE_PHONE
+from .phonecode import RE_NATIONAL_UNIFORM_NUMBER
+from .phonecode import RE_TELEPHONE
+from .phonecode import replace_mobile
+from .phonecode import replace_phone
+from .quantifier import RE_TEMPERATURE
+from .quantifier import replace_measure
+from .quantifier import replace_temperature
+
+
+class TextNormalizer:
+    def __init__(self):
+        self.SENTENCE_SPLITOR = re.compile(r"([：、，；。？！,;?!][”’]?)")
+
+    def _split(self, text: str, lang="zh") -> List[str]:
+        """Split long text into sentences with sentence-splitting punctuations.
+        Args:
+            text (str): The input text.
+        Returns:
+            List[str]: Sentences.
+        """
+        # Only for pure Chinese here
+        if lang == "zh":
+            text = text.replace(" ", "")
+            # 过滤掉特殊字符
+            text = re.sub(r"[——《》【】<>{}()（）#&@“”^_|\\]", "", text)
+        text = self.SENTENCE_SPLITOR.sub(r"\1\n", text)
+        text = text.strip()
+        sentences = [sentence.strip() for sentence in re.split(r"\n+", text)]
+        return sentences
+
+    def _post_replace(self, sentence: str) -> str:
+        sentence = sentence.replace("/", "每")
+        # sentence = sentence.replace('~', '至')
+        # sentence = sentence.replace('～', '至')
+        sentence = sentence.replace("①", "一")
+        sentence = sentence.replace("②", "二")
+        sentence = sentence.replace("③", "三")
+        sentence = sentence.replace("④", "四")
+        sentence = sentence.replace("⑤", "五")
+        sentence = sentence.replace("⑥", "六")
+        sentence = sentence.replace("⑦", "七")
+        sentence = sentence.replace("⑧", "八")
+        sentence = sentence.replace("⑨", "九")
+        sentence = sentence.replace("⑩", "十")
+        sentence = sentence.replace("α", "阿尔法")
+        sentence = sentence.replace("β", "贝塔")
+        sentence = sentence.replace("γ", "伽玛").replace("Γ", "伽玛")
+        sentence = sentence.replace("δ", "德尔塔").replace("Δ", "德尔塔")
+        sentence = sentence.replace("ε", "艾普西龙")
+        sentence = sentence.replace("ζ", "捷塔")
+        sentence = sentence.replace("η", "依塔")
+        sentence = sentence.replace("θ", "西塔").replace("Θ", "西塔")
+        sentence = sentence.replace("ι", "艾欧塔")
+        sentence = sentence.replace("κ", "喀帕")
+        sentence = sentence.replace("λ", "拉姆达").replace("Λ", "拉姆达")
+        sentence = sentence.replace("μ", "缪")
+        sentence = sentence.replace("ν", "拗")
+        sentence = sentence.replace("ξ", "克西").replace("Ξ", "克西")
+        sentence = sentence.replace("ο", "欧米克伦")
+        sentence = sentence.replace("π", "派").replace("Π", "派")
+        sentence = sentence.replace("ρ", "肉")
+        sentence = sentence.replace("ς", "西格玛").replace("Σ", "西格玛").replace("σ", "西格玛")
+        sentence = sentence.replace("τ", "套")
+        sentence = sentence.replace("υ", "宇普西龙")
+        sentence = sentence.replace("φ", "服艾").replace("Φ", "服艾")
+        sentence = sentence.replace("χ", "器")
+        sentence = sentence.replace("ψ", "普赛").replace("Ψ", "普赛")
+        sentence = sentence.replace("ω", "欧米伽").replace("Ω", "欧米伽")
+        # 兜底数学运算，顺便兼容懒人用语
+        sentence = sentence.replace("+", "加")
+        sentence = sentence.replace("-", "减")
+        sentence = sentence.replace("×", "乘")
+        sentence = sentence.replace("÷", "除")
+        sentence = sentence.replace("=", "等")
+        # re filter special characters, have one more character "-" than line 68
+        sentence = re.sub(r"[-——《》【】<=>{}()（）#&@“”^_|\\]", "", sentence)
+        return sentence
+
+    def normalize_sentence(self, sentence: str) -> str:
+        # basic character conversions
+        sentence = tranditional_to_simplified(sentence)
+        sentence = sentence.translate(F2H_ASCII_LETTERS).translate(F2H_DIGITS).translate(F2H_SPACE)
+
+        # number related NSW verbalization
+        sentence = RE_DATE.sub(replace_date, sentence)
+        sentence = RE_DATE2.sub(replace_date2, sentence)
+
+        # range first
+        sentence = RE_TIME_RANGE.sub(replace_time, sentence)
+        sentence = RE_TIME.sub(replace_time, sentence)
+
+        # 处理~波浪号作为至的替换
+        sentence = RE_TO_RANGE.sub(replace_to_range, sentence)
+        sentence = RE_TEMPERATURE.sub(replace_temperature, sentence)
+        sentence = replace_measure(sentence)
+
+        # 处理数学运算
+        while RE_ASMD.search(sentence):
+            sentence = RE_ASMD.sub(replace_asmd, sentence)
+        sentence = RE_POWER.sub(replace_power, sentence)
+
+        sentence = RE_FRAC.sub(replace_frac, sentence)
+        sentence = RE_PERCENTAGE.sub(replace_percentage, sentence)
+        sentence = RE_MOBILE_PHONE.sub(replace_mobile, sentence)
+
+        sentence = RE_TELEPHONE.sub(replace_phone, sentence)
+        sentence = RE_NATIONAL_UNIFORM_NUMBER.sub(replace_phone, sentence)
+
+        sentence = RE_RANGE.sub(replace_range, sentence)
+
+        sentence = RE_INTEGER.sub(replace_negative_num, sentence)
+        sentence = RE_VERSION_NUM.sub(replace_vrsion_num, sentence)
+        sentence = RE_DECIMAL_NUM.sub(replace_number, sentence)
+        sentence = RE_POSITIVE_QUANTIFIERS.sub(replace_positive_quantifier, sentence)
+        sentence = RE_DEFAULT_NUM.sub(replace_default_num, sentence)
+        sentence = RE_NUMBER.sub(replace_number, sentence)
+        sentence = self._post_replace(sentence)
+
+        return sentence
+
+    def normalize(self, text: str) -> List[str]:
+        sentences = self._split(text)
+        sentences = [self.normalize_sentence(sent) for sent in sentences]
+        return sentences