Add Dart API for MatchaTTS models (#1687)
This commit is contained in:
41
.github/scripts/test-dart.sh
vendored
41
.github/scripts/test-dart.sh
vendored
@@ -4,6 +4,31 @@ set -ex
|
|||||||
|
|
||||||
cd dart-api-examples
|
cd dart-api-examples
|
||||||
|
|
||||||
|
pushd tts
|
||||||
|
|
||||||
|
echo '----------matcha tts----------'
|
||||||
|
./run-matcha-zh.sh
|
||||||
|
./run-matcha-en.sh
|
||||||
|
ls -lh *.wav
|
||||||
|
rm -rf matcha-icefall-*
|
||||||
|
rm *.onnx
|
||||||
|
|
||||||
|
echo '----------piper tts----------'
|
||||||
|
./run-piper.sh
|
||||||
|
rm -rf vits-piper-*
|
||||||
|
|
||||||
|
echo '----------coqui tts----------'
|
||||||
|
./run-coqui.sh
|
||||||
|
rm -rf vits-coqui-*
|
||||||
|
|
||||||
|
echo '----------zh tts----------'
|
||||||
|
./run-vits-zh.sh
|
||||||
|
rm -rf sherpa-onnx-*
|
||||||
|
|
||||||
|
ls -lh *.wav
|
||||||
|
|
||||||
|
popd # tts
|
||||||
|
|
||||||
pushd speaker-diarization
|
pushd speaker-diarization
|
||||||
echo '----------speaker diarization----------'
|
echo '----------speaker diarization----------'
|
||||||
./run.sh
|
./run.sh
|
||||||
@@ -106,22 +131,6 @@ rm -rf sherpa-onnx-*
|
|||||||
|
|
||||||
popd # non-streaming-asr
|
popd # non-streaming-asr
|
||||||
|
|
||||||
pushd tts
|
|
||||||
|
|
||||||
echo '----------piper tts----------'
|
|
||||||
./run-piper.sh
|
|
||||||
rm -rf vits-piper-*
|
|
||||||
|
|
||||||
echo '----------coqui tts----------'
|
|
||||||
./run-coqui.sh
|
|
||||||
rm -rf vits-coqui-*
|
|
||||||
|
|
||||||
echo '----------zh tts----------'
|
|
||||||
./run-zh.sh
|
|
||||||
rm -rf sherpa-onnx-*
|
|
||||||
|
|
||||||
popd # tts
|
|
||||||
|
|
||||||
pushd streaming-asr
|
pushd streaming-asr
|
||||||
|
|
||||||
echo '----------streaming zipformer ctc HLG----------'
|
echo '----------streaming zipformer ctc HLG----------'
|
||||||
|
|||||||
1
.github/workflows/checksum.yaml
vendored
1
.github/workflows/checksum.yaml
vendored
@@ -7,6 +7,7 @@ on:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
checksum:
|
checksum:
|
||||||
|
if: github.repository_owner == 'k2-fsa'
|
||||||
runs-on: macos-latest
|
runs-on: macos-latest
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
|
|||||||
86
dart-api-examples/tts/bin/matcha-en.dart
Normal file
86
dart-api-examples/tts/bin/matcha-en.dart
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
// Copyright (c) 2025 Xiaomi Corporation
|
||||||
|
import 'dart:io';
|
||||||
|
|
||||||
|
import 'package:args/args.dart';
|
||||||
|
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
|
||||||
|
|
||||||
|
import './init.dart';
|
||||||
|
|
||||||
|
void main(List<String> arguments) async {
|
||||||
|
await initSherpaOnnx();
|
||||||
|
|
||||||
|
final parser = ArgParser()
|
||||||
|
..addOption('acoustic-model', help: 'Path to the acoustic model')
|
||||||
|
..addOption('vocoder', help: 'Path to the vocoder model')
|
||||||
|
..addOption('tokens', help: 'Path to tokens.txt')
|
||||||
|
..addOption(
|
||||||
|
'data-dir',
|
||||||
|
help: 'Path to espeak-ng-data directory',
|
||||||
|
defaultsTo: '',
|
||||||
|
)
|
||||||
|
..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '')
|
||||||
|
..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '')
|
||||||
|
..addOption('text', help: 'Text to generate TTS for')
|
||||||
|
..addOption('output-wav', help: 'Filename to save the generated audio')
|
||||||
|
..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
|
||||||
|
..addOption(
|
||||||
|
'sid',
|
||||||
|
help: 'Speaker ID to select. Used only for multi-speaker TTS',
|
||||||
|
defaultsTo: '0',
|
||||||
|
);
|
||||||
|
final res = parser.parse(arguments);
|
||||||
|
if (res['acoustic-model'] == null ||
|
||||||
|
res['vocoder'] == null ||
|
||||||
|
res['tokens'] == null ||
|
||||||
|
res['data-dir'] == null ||
|
||||||
|
res['output-wav'] == null ||
|
||||||
|
res['text'] == null) {
|
||||||
|
print(parser.usage);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
final acousticModel = res['acoustic-model'] as String;
|
||||||
|
final vocoder = res['vocoder'] as String;
|
||||||
|
final tokens = res['tokens'] as String;
|
||||||
|
final dataDir = res['data-dir'] as String;
|
||||||
|
final ruleFsts = res['rule-fsts'] as String;
|
||||||
|
final ruleFars = res['rule-fars'] as String;
|
||||||
|
final text = res['text'] as String;
|
||||||
|
final outputWav = res['output-wav'] as String;
|
||||||
|
var speed = double.tryParse(res['speed'] as String) ?? 1.0;
|
||||||
|
final sid = int.tryParse(res['sid'] as String) ?? 0;
|
||||||
|
|
||||||
|
if (speed == 0) {
|
||||||
|
speed = 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
final matcha = sherpa_onnx.OfflineTtsMatchaModelConfig(
|
||||||
|
acousticModel: acousticModel,
|
||||||
|
vocoder: vocoder,
|
||||||
|
tokens: tokens,
|
||||||
|
dataDir: dataDir,
|
||||||
|
lengthScale: 1 / speed,
|
||||||
|
);
|
||||||
|
|
||||||
|
final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
|
||||||
|
matcha: matcha,
|
||||||
|
numThreads: 1,
|
||||||
|
debug: true,
|
||||||
|
);
|
||||||
|
final config = sherpa_onnx.OfflineTtsConfig(
|
||||||
|
model: modelConfig,
|
||||||
|
maxNumSenetences: 1,
|
||||||
|
ruleFsts: ruleFsts,
|
||||||
|
ruleFars: ruleFars,
|
||||||
|
);
|
||||||
|
|
||||||
|
final tts = sherpa_onnx.OfflineTts(config);
|
||||||
|
final audio = tts.generate(text: text, sid: sid, speed: speed);
|
||||||
|
tts.free();
|
||||||
|
|
||||||
|
sherpa_onnx.writeWave(
|
||||||
|
filename: outputWav,
|
||||||
|
samples: audio.samples,
|
||||||
|
sampleRate: audio.sampleRate,
|
||||||
|
);
|
||||||
|
print('Saved to $outputWav');
|
||||||
|
}
|
||||||
90
dart-api-examples/tts/bin/matcha-zh.dart
Normal file
90
dart-api-examples/tts/bin/matcha-zh.dart
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
// Copyright (c) 2025 Xiaomi Corporation
|
||||||
|
import 'dart:io';
|
||||||
|
|
||||||
|
import 'package:args/args.dart';
|
||||||
|
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
|
||||||
|
|
||||||
|
import './init.dart';
|
||||||
|
|
||||||
|
void main(List<String> arguments) async {
|
||||||
|
await initSherpaOnnx();
|
||||||
|
|
||||||
|
final parser = ArgParser()
|
||||||
|
..addOption('acoustic-model', help: 'Path to the acoustic model')
|
||||||
|
..addOption('vocoder', help: 'Path to the vocoder model')
|
||||||
|
..addOption('tokens', help: 'Path to tokens.txt')
|
||||||
|
..addOption('lexicon', help: 'Path to lexicon.txt')
|
||||||
|
..addOption(
|
||||||
|
'dict-dir',
|
||||||
|
help: 'Path to jieba dict directory',
|
||||||
|
defaultsTo: '',
|
||||||
|
)
|
||||||
|
..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '')
|
||||||
|
..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '')
|
||||||
|
..addOption('text', help: 'Text to generate TTS for')
|
||||||
|
..addOption('output-wav', help: 'Filename to save the generated audio')
|
||||||
|
..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
|
||||||
|
..addOption(
|
||||||
|
'sid',
|
||||||
|
help: 'Speaker ID to select. Used only for multi-speaker TTS',
|
||||||
|
defaultsTo: '0',
|
||||||
|
);
|
||||||
|
final res = parser.parse(arguments);
|
||||||
|
if (res['acoustic-model'] == null ||
|
||||||
|
res['vocoder'] == null ||
|
||||||
|
res['lexicon'] == null ||
|
||||||
|
res['tokens'] == null ||
|
||||||
|
res['dict-dir'] == null ||
|
||||||
|
res['output-wav'] == null ||
|
||||||
|
res['text'] == null) {
|
||||||
|
print(parser.usage);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
final acousticModel = res['acoustic-model'] as String;
|
||||||
|
final vocoder = res['vocoder'] as String;
|
||||||
|
final lexicon = res['lexicon'] as String;
|
||||||
|
final tokens = res['tokens'] as String;
|
||||||
|
final dictDir = res['dict-dir'] as String;
|
||||||
|
final ruleFsts = res['rule-fsts'] as String;
|
||||||
|
final ruleFars = res['rule-fars'] as String;
|
||||||
|
final text = res['text'] as String;
|
||||||
|
final outputWav = res['output-wav'] as String;
|
||||||
|
var speed = double.tryParse(res['speed'] as String) ?? 1.0;
|
||||||
|
final sid = int.tryParse(res['sid'] as String) ?? 0;
|
||||||
|
|
||||||
|
if (speed == 0) {
|
||||||
|
speed = 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
final matcha = sherpa_onnx.OfflineTtsMatchaModelConfig(
|
||||||
|
acousticModel: acousticModel,
|
||||||
|
vocoder: vocoder,
|
||||||
|
lexicon: lexicon,
|
||||||
|
tokens: tokens,
|
||||||
|
dictDir: dictDir,
|
||||||
|
lengthScale: 1 / speed,
|
||||||
|
);
|
||||||
|
|
||||||
|
final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
|
||||||
|
matcha: matcha,
|
||||||
|
numThreads: 1,
|
||||||
|
debug: true,
|
||||||
|
);
|
||||||
|
final config = sherpa_onnx.OfflineTtsConfig(
|
||||||
|
model: modelConfig,
|
||||||
|
maxNumSenetences: 1,
|
||||||
|
ruleFsts: ruleFsts,
|
||||||
|
ruleFars: ruleFars,
|
||||||
|
);
|
||||||
|
|
||||||
|
final tts = sherpa_onnx.OfflineTts(config);
|
||||||
|
final audio = tts.generate(text: text, sid: sid, speed: speed);
|
||||||
|
tts.free();
|
||||||
|
|
||||||
|
sherpa_onnx.writeWave(
|
||||||
|
filename: outputWav,
|
||||||
|
samples: audio.samples,
|
||||||
|
sampleRate: audio.sampleRate,
|
||||||
|
);
|
||||||
|
print('Saved to $outputWav');
|
||||||
|
}
|
||||||
32
dart-api-examples/tts/run-matcha-en.sh
Executable file
32
dart-api-examples/tts/run-matcha-en.sh
Executable file
@@ -0,0 +1,32 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
dart pub get
|
||||||
|
|
||||||
|
# please visit
|
||||||
|
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
|
||||||
|
# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
|
||||||
|
# to download more models
|
||||||
|
if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
|
||||||
|
tar xf matcha-icefall-en_US-ljspeech.tar.bz2
|
||||||
|
rm matcha-icefall-en_US-ljspeech.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./hifigan_v2.onnx ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
|
||||||
|
fi
|
||||||
|
|
||||||
|
dart run \
|
||||||
|
./bin/matcha-en.dart \
|
||||||
|
--acoustic-model ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
|
||||||
|
--vocoder ./hifigan_v2.onnx \
|
||||||
|
--tokens ./matcha-icefall-en_US-ljspeech/tokens.txt \
|
||||||
|
--data-dir ./matcha-icefall-en_US-ljspeech/espeak-ng-data \
|
||||||
|
--sid 0 \
|
||||||
|
--speed 1.0 \
|
||||||
|
--output-wav matcha-en-1.wav \
|
||||||
|
--text "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone." \
|
||||||
|
|
||||||
|
ls -lh *.wav
|
||||||
45
dart-api-examples/tts/run-matcha-zh.sh
Executable file
45
dart-api-examples/tts/run-matcha-zh.sh
Executable file
@@ -0,0 +1,45 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
dart pub get
|
||||||
|
|
||||||
|
# please visit
|
||||||
|
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
|
||||||
|
# to download more models
|
||||||
|
if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
|
||||||
|
tar xvf matcha-icefall-zh-baker.tar.bz2
|
||||||
|
rm matcha-icefall-zh-baker.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./hifigan_v2.onnx ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
|
||||||
|
fi
|
||||||
|
|
||||||
|
dart run \
|
||||||
|
./bin/matcha-zh.dart \
|
||||||
|
--acoustic-model ./matcha-icefall-zh-baker/model-steps-3.onnx \
|
||||||
|
--vocoder ./hifigan_v2.onnx \
|
||||||
|
--lexicon ./matcha-icefall-zh-baker/lexicon.txt \
|
||||||
|
--tokens ./matcha-icefall-zh-baker/tokens.txt \
|
||||||
|
--dict-dir ./matcha-icefall-zh-baker/dict \
|
||||||
|
--rule-fsts ./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
|
||||||
|
--sid 0 \
|
||||||
|
--speed 1.0 \
|
||||||
|
--output-wav matcha-zh-1.wav \
|
||||||
|
--text "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。" \
|
||||||
|
|
||||||
|
dart run \
|
||||||
|
./bin/matcha-zh.dart \
|
||||||
|
--acoustic-model ./matcha-icefall-zh-baker/model-steps-3.onnx \
|
||||||
|
--vocoder ./hifigan_v2.onnx \
|
||||||
|
--lexicon ./matcha-icefall-zh-baker/lexicon.txt \
|
||||||
|
--tokens ./matcha-icefall-zh-baker/tokens.txt \
|
||||||
|
--dict-dir ./matcha-icefall-zh-baker/dict \
|
||||||
|
--sid 0 \
|
||||||
|
--speed 1.0 \
|
||||||
|
--output-wav matcha-zh-2.wav \
|
||||||
|
--text "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔." \
|
||||||
|
|
||||||
|
ls -lh *.wav
|
||||||
@@ -16,7 +16,7 @@ if [[ ! -f ./sherpa-onnx-vits-zh-ll/tokens.txt ]]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
dart run \
|
dart run \
|
||||||
./bin/zh.dart \
|
./bin/vits-zh.dart \
|
||||||
--model ./sherpa-onnx-vits-zh-ll/model.onnx \
|
--model ./sherpa-onnx-vits-zh-ll/model.onnx \
|
||||||
--lexicon ./sherpa-onnx-vits-zh-ll/lexicon.txt \
|
--lexicon ./sherpa-onnx-vits-zh-ll/lexicon.txt \
|
||||||
--tokens ./sherpa-onnx-vits-zh-ll/tokens.txt \
|
--tokens ./sherpa-onnx-vits-zh-ll/tokens.txt \
|
||||||
@@ -24,10 +24,10 @@ dart run \
|
|||||||
--sid 2 \
|
--sid 2 \
|
||||||
--speed 1.0 \
|
--speed 1.0 \
|
||||||
--text '当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。' \
|
--text '当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。' \
|
||||||
--output-wav zh-jieba-2.wav
|
--output-wav vits-zh-jieba-2.wav
|
||||||
|
|
||||||
dart run \
|
dart run \
|
||||||
./bin/zh.dart \
|
./bin/vits-zh.dart \
|
||||||
--model ./sherpa-onnx-vits-zh-ll/model.onnx \
|
--model ./sherpa-onnx-vits-zh-ll/model.onnx \
|
||||||
--lexicon ./sherpa-onnx-vits-zh-ll/lexicon.txt \
|
--lexicon ./sherpa-onnx-vits-zh-ll/lexicon.txt \
|
||||||
--tokens ./sherpa-onnx-vits-zh-ll/tokens.txt \
|
--tokens ./sherpa-onnx-vits-zh-ll/tokens.txt \
|
||||||
@@ -36,6 +36,6 @@ dart run \
|
|||||||
--sid 3 \
|
--sid 3 \
|
||||||
--speed 1.0 \
|
--speed 1.0 \
|
||||||
--text '今天是2024年6月15号,13点23分。如果有困难,请拨打110或者18920240511。123456块钱。' \
|
--text '今天是2024年6月15号,13点23分。如果有困难,请拨打110或者18920240511。123456块钱。' \
|
||||||
--output-wav zh-jieba-3.wav
|
--output-wav vits-zh-jieba-3.wav
|
||||||
|
|
||||||
ls -lh *.wav
|
ls -lh *.wav
|
||||||
@@ -131,6 +131,22 @@ final class SherpaOnnxOfflineTtsVitsModelConfig extends Struct {
|
|||||||
external Pointer<Utf8> dictDir;
|
external Pointer<Utf8> dictDir;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final class SherpaOnnxOfflineTtsMatchaModelConfig extends Struct {
|
||||||
|
external Pointer<Utf8> acousticModel;
|
||||||
|
external Pointer<Utf8> vocoder;
|
||||||
|
external Pointer<Utf8> lexicon;
|
||||||
|
external Pointer<Utf8> tokens;
|
||||||
|
external Pointer<Utf8> dataDir;
|
||||||
|
|
||||||
|
@Float()
|
||||||
|
external double noiseScale;
|
||||||
|
|
||||||
|
@Float()
|
||||||
|
external double lengthScale;
|
||||||
|
|
||||||
|
external Pointer<Utf8> dictDir;
|
||||||
|
}
|
||||||
|
|
||||||
final class SherpaOnnxOfflineTtsModelConfig extends Struct {
|
final class SherpaOnnxOfflineTtsModelConfig extends Struct {
|
||||||
external SherpaOnnxOfflineTtsVitsModelConfig vits;
|
external SherpaOnnxOfflineTtsVitsModelConfig vits;
|
||||||
@Int32()
|
@Int32()
|
||||||
@@ -140,6 +156,7 @@ final class SherpaOnnxOfflineTtsModelConfig extends Struct {
|
|||||||
external int debug;
|
external int debug;
|
||||||
|
|
||||||
external Pointer<Utf8> provider;
|
external Pointer<Utf8> provider;
|
||||||
|
external SherpaOnnxOfflineTtsMatchaModelConfig matcha;
|
||||||
}
|
}
|
||||||
|
|
||||||
final class SherpaOnnxOfflineTtsConfig extends Struct {
|
final class SherpaOnnxOfflineTtsConfig extends Struct {
|
||||||
|
|||||||
@@ -8,9 +8,9 @@ import './sherpa_onnx_bindings.dart';
|
|||||||
|
|
||||||
class OfflineTtsVitsModelConfig {
|
class OfflineTtsVitsModelConfig {
|
||||||
const OfflineTtsVitsModelConfig({
|
const OfflineTtsVitsModelConfig({
|
||||||
required this.model,
|
this.model = '',
|
||||||
this.lexicon = '',
|
this.lexicon = '',
|
||||||
required this.tokens,
|
this.tokens = '',
|
||||||
this.dataDir = '',
|
this.dataDir = '',
|
||||||
this.noiseScale = 0.667,
|
this.noiseScale = 0.667,
|
||||||
this.noiseScaleW = 0.8,
|
this.noiseScaleW = 0.8,
|
||||||
@@ -33,9 +33,37 @@ class OfflineTtsVitsModelConfig {
|
|||||||
final String dictDir;
|
final String dictDir;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class OfflineTtsMatchaModelConfig {
|
||||||
|
const OfflineTtsMatchaModelConfig({
|
||||||
|
this.acousticModel = '',
|
||||||
|
this.vocoder = '',
|
||||||
|
this.lexicon = '',
|
||||||
|
this.tokens = '',
|
||||||
|
this.dataDir = '',
|
||||||
|
this.noiseScale = 0.667,
|
||||||
|
this.lengthScale = 1.0,
|
||||||
|
this.dictDir = '',
|
||||||
|
});
|
||||||
|
|
||||||
|
@override
|
||||||
|
String toString() {
|
||||||
|
return 'OfflineTtsMatchaModelConfig(acousticModel: $acousticModel, vocoder: $vocoder, lexicon: $lexicon, tokens: $tokens, dataDir: $dataDir, noiseScale: $noiseScale, lengthScale: $lengthScale, dictDir: $dictDir)';
|
||||||
|
}
|
||||||
|
|
||||||
|
final String acousticModel;
|
||||||
|
final String vocoder;
|
||||||
|
final String lexicon;
|
||||||
|
final String tokens;
|
||||||
|
final String dataDir;
|
||||||
|
final double noiseScale;
|
||||||
|
final double lengthScale;
|
||||||
|
final String dictDir;
|
||||||
|
}
|
||||||
|
|
||||||
class OfflineTtsModelConfig {
|
class OfflineTtsModelConfig {
|
||||||
const OfflineTtsModelConfig({
|
const OfflineTtsModelConfig({
|
||||||
required this.vits,
|
this.vits = const OfflineTtsVitsModelConfig(),
|
||||||
|
this.matcha = const OfflineTtsMatchaModelConfig(),
|
||||||
this.numThreads = 1,
|
this.numThreads = 1,
|
||||||
this.debug = true,
|
this.debug = true,
|
||||||
this.provider = 'cpu',
|
this.provider = 'cpu',
|
||||||
@@ -43,10 +71,11 @@ class OfflineTtsModelConfig {
|
|||||||
|
|
||||||
@override
|
@override
|
||||||
String toString() {
|
String toString() {
|
||||||
return 'OfflineTtsModelConfig(vits: $vits, numThreads: $numThreads, debug: $debug, provider: $provider)';
|
return 'OfflineTtsModelConfig(vits: $vits, matcha: $matcha, numThreads: $numThreads, debug: $debug, provider: $provider)';
|
||||||
}
|
}
|
||||||
|
|
||||||
final OfflineTtsVitsModelConfig vits;
|
final OfflineTtsVitsModelConfig vits;
|
||||||
|
final OfflineTtsMatchaModelConfig matcha;
|
||||||
final int numThreads;
|
final int numThreads;
|
||||||
final bool debug;
|
final bool debug;
|
||||||
final String provider;
|
final String provider;
|
||||||
@@ -99,6 +128,16 @@ class OfflineTts {
|
|||||||
c.ref.model.vits.lengthScale = config.model.vits.lengthScale;
|
c.ref.model.vits.lengthScale = config.model.vits.lengthScale;
|
||||||
c.ref.model.vits.dictDir = config.model.vits.dictDir.toNativeUtf8();
|
c.ref.model.vits.dictDir = config.model.vits.dictDir.toNativeUtf8();
|
||||||
|
|
||||||
|
c.ref.model.matcha.acousticModel =
|
||||||
|
config.model.matcha.acousticModel.toNativeUtf8();
|
||||||
|
c.ref.model.matcha.vocoder = config.model.matcha.vocoder.toNativeUtf8();
|
||||||
|
c.ref.model.matcha.lexicon = config.model.matcha.lexicon.toNativeUtf8();
|
||||||
|
c.ref.model.matcha.tokens = config.model.matcha.tokens.toNativeUtf8();
|
||||||
|
c.ref.model.matcha.dataDir = config.model.matcha.dataDir.toNativeUtf8();
|
||||||
|
c.ref.model.matcha.noiseScale = config.model.matcha.noiseScale;
|
||||||
|
c.ref.model.matcha.lengthScale = config.model.matcha.lengthScale;
|
||||||
|
c.ref.model.matcha.dictDir = config.model.matcha.dictDir.toNativeUtf8();
|
||||||
|
|
||||||
c.ref.model.numThreads = config.model.numThreads;
|
c.ref.model.numThreads = config.model.numThreads;
|
||||||
c.ref.model.debug = config.model.debug ? 1 : 0;
|
c.ref.model.debug = config.model.debug ? 1 : 0;
|
||||||
c.ref.model.provider = config.model.provider.toNativeUtf8();
|
c.ref.model.provider = config.model.provider.toNativeUtf8();
|
||||||
@@ -112,6 +151,12 @@ class OfflineTts {
|
|||||||
calloc.free(c.ref.ruleFars);
|
calloc.free(c.ref.ruleFars);
|
||||||
calloc.free(c.ref.ruleFsts);
|
calloc.free(c.ref.ruleFsts);
|
||||||
calloc.free(c.ref.model.provider);
|
calloc.free(c.ref.model.provider);
|
||||||
|
calloc.free(c.ref.model.matcha.dictDir);
|
||||||
|
calloc.free(c.ref.model.matcha.dataDir);
|
||||||
|
calloc.free(c.ref.model.matcha.tokens);
|
||||||
|
calloc.free(c.ref.model.matcha.lexicon);
|
||||||
|
calloc.free(c.ref.model.matcha.vocoder);
|
||||||
|
calloc.free(c.ref.model.matcha.acousticModel);
|
||||||
calloc.free(c.ref.model.vits.dictDir);
|
calloc.free(c.ref.model.vits.dictDir);
|
||||||
calloc.free(c.ref.model.vits.dataDir);
|
calloc.free(c.ref.model.vits.dataDir);
|
||||||
calloc.free(c.ref.model.vits.tokens);
|
calloc.free(c.ref.model.vits.tokens);
|
||||||
|
|||||||
Reference in New Issue
Block a user