Support heteronyms in Chinese TTS (#738)

This commit is contained in:
Fangjun Kuang
2024-04-08 11:01:30 +08:00
committed by GitHub
parent c1c0f5bafd
commit a5f8fbc83f
49 changed files with 308 additions and 143 deletions

View File

@@ -26,6 +26,7 @@ func main() {
flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message")
flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use")
flag.StringVar(&config.RuleFsts, "tts-rule-fsts", "", "Path to rule.fst")
flag.StringVar(&config.RuleFars, "tts-rule-fars", "", "Path to rule.far")
flag.IntVar(&config.MaxNumSentences, "tts-max-num-sentences", 1, "Batch size")
flag.IntVar(&sid, "sid", 0, "Speaker ID. Used only for multi-speaker models")

View File

@@ -6,21 +6,32 @@
for sid in 10 33 99; do
./non-streaming-tts \
--vits-model=./vits-zh-aishell3/vits-aishell3.onnx \
--vits-lexicon=./vits-zh-aishell3/lexicon.txt \
--vits-tokens=./vits-zh-aishell3/tokens.txt \
--vits-model=./vits-icefall-zh-aishell3/model.onnx \
--vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
--vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \
--sid=$sid \
--debug=1 \
--output-filename=./liliana-$sid.wav \
"林美丽最美丽、最漂亮、最可爱!"
./non-streaming-tts \
--vits-model=./vits-zh-aishell3/vits-aishell3.onnx \
--vits-lexicon=./vits-zh-aishell3/lexicon.txt \
--vits-tokens=./vits-zh-aishell3/tokens.txt \
--tts-rule-fsts=./vits-zh-aishell3/rule.fst \
--vits-model=./vits-icefall-zh-aishell3/model.onnx \
--vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
--vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \
--tts-rule-fsts=./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst \
--sid=$sid \
--debug=1 \
--output-filename=./numbers-$sid.wav \
"数字12345.6789怎么念"
./non-streaming-tts \
--vits-model=./vits-icefall-zh-aishell3/model.onnx \
--vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
--vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \
--tts-rule-fsts=./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst \
--tts-rule-fars=./vits-icefall-zh-aishell3/rule.far \
--sid=$sid \
--debug=1 \
--output-filename=./heteronym-$sid.wav \
"万古长存长沙长大长白山长孙长安街"
done