Add Dart API for Kokoro TTS models (#1723)
This commit is contained in:
1
.github/scripts/test-dart.sh
vendored
1
.github/scripts/test-dart.sh
vendored
@@ -7,6 +7,7 @@ cd dart-api-examples
|
||||
pushd tts
|
||||
|
||||
echo '----------matcha tts----------'
|
||||
./run-kokoro-en.sh
|
||||
./run-matcha-zh.sh
|
||||
./run-matcha-en.sh
|
||||
ls -lh *.wav
|
||||
|
||||
86
dart-api-examples/tts/bin/kokoro-en.dart
Normal file
86
dart-api-examples/tts/bin/kokoro-en.dart
Normal file
@@ -0,0 +1,86 @@
|
||||
// Copyright (c) 2025 Xiaomi Corporation
|
||||
import 'dart:io';
|
||||
|
||||
import 'package:args/args.dart';
|
||||
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
|
||||
|
||||
import './init.dart';
|
||||
|
||||
void main(List<String> arguments) async {
|
||||
await initSherpaOnnx();
|
||||
|
||||
final parser = ArgParser()
|
||||
..addOption('model', help: 'Path to the onnx model')
|
||||
..addOption('voices', help: 'Path to the voices.bin')
|
||||
..addOption('tokens', help: 'Path to tokens.txt')
|
||||
..addOption(
|
||||
'data-dir',
|
||||
help: 'Path to espeak-ng-data directory',
|
||||
defaultsTo: '',
|
||||
)
|
||||
..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '')
|
||||
..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '')
|
||||
..addOption('text', help: 'Text to generate TTS for')
|
||||
..addOption('output-wav', help: 'Filename to save the generated audio')
|
||||
..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
|
||||
..addOption(
|
||||
'sid',
|
||||
help: 'Speaker ID to select. Used only for multi-speaker TTS',
|
||||
defaultsTo: '0',
|
||||
);
|
||||
final res = parser.parse(arguments);
|
||||
if (res['model'] == null ||
|
||||
res['voices'] == null ||
|
||||
res['tokens'] == null ||
|
||||
res['data-dir'] == null ||
|
||||
res['output-wav'] == null ||
|
||||
res['text'] == null) {
|
||||
print(parser.usage);
|
||||
exit(1);
|
||||
}
|
||||
final model = res['model'] as String;
|
||||
final voices = res['voices'] as String;
|
||||
final tokens = res['tokens'] as String;
|
||||
final dataDir = res['data-dir'] as String;
|
||||
final ruleFsts = res['rule-fsts'] as String;
|
||||
final ruleFars = res['rule-fars'] as String;
|
||||
final text = res['text'] as String;
|
||||
final outputWav = res['output-wav'] as String;
|
||||
var speed = double.tryParse(res['speed'] as String) ?? 1.0;
|
||||
final sid = int.tryParse(res['sid'] as String) ?? 0;
|
||||
|
||||
if (speed == 0) {
|
||||
speed = 1.0;
|
||||
}
|
||||
|
||||
final kokoro = sherpa_onnx.OfflineTtsKokoroModelConfig(
|
||||
model: model,
|
||||
voices: voices,
|
||||
tokens: tokens,
|
||||
dataDir: dataDir,
|
||||
lengthScale: 1 / speed,
|
||||
);
|
||||
|
||||
final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
|
||||
kokoro: kokoro,
|
||||
numThreads: 1,
|
||||
debug: true,
|
||||
);
|
||||
final config = sherpa_onnx.OfflineTtsConfig(
|
||||
model: modelConfig,
|
||||
maxNumSenetences: 1,
|
||||
ruleFsts: ruleFsts,
|
||||
ruleFars: ruleFars,
|
||||
);
|
||||
|
||||
final tts = sherpa_onnx.OfflineTts(config);
|
||||
final audio = tts.generate(text: text, sid: sid, speed: speed);
|
||||
tts.free();
|
||||
|
||||
sherpa_onnx.writeWave(
|
||||
filename: outputWav,
|
||||
samples: audio.samples,
|
||||
sampleRate: audio.sampleRate,
|
||||
);
|
||||
print('Saved to $outputWav');
|
||||
}
|
||||
27
dart-api-examples/tts/run-kokoro-en.sh
Executable file
27
dart-api-examples/tts/run-kokoro-en.sh
Executable file
@@ -0,0 +1,27 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
dart pub get
|
||||
|
||||
# please visit
|
||||
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
|
||||
# to download more models
|
||||
if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
|
||||
tar xf kokoro-en-v0_19.tar.bz2
|
||||
rm kokoro-en-v0_19.tar.bz2
|
||||
fi
|
||||
|
||||
dart run \
|
||||
./bin/kokoro-en.dart \
|
||||
--model ./kokoro-en-v0_19/model.onnx \
|
||||
--voices ./kokoro-en-v0_19/voices.bin \
|
||||
--tokens ./kokoro-en-v0_19/tokens.txt \
|
||||
--data-dir ./kokoro-en-v0_19/espeak-ng-data \
|
||||
--sid 9 \
|
||||
--speed 1.0 \
|
||||
--output-wav kokoro-en-9.wav \
|
||||
--text "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone." \
|
||||
|
||||
ls -lh *.wav
|
||||
@@ -147,6 +147,16 @@ final class SherpaOnnxOfflineTtsMatchaModelConfig extends Struct {
|
||||
external Pointer<Utf8> dictDir;
|
||||
}
|
||||
|
||||
final class SherpaOnnxOfflineTtsKokoroModelConfig extends Struct {
|
||||
external Pointer<Utf8> model;
|
||||
external Pointer<Utf8> voices;
|
||||
external Pointer<Utf8> tokens;
|
||||
external Pointer<Utf8> dataDir;
|
||||
|
||||
@Float()
|
||||
external double lengthScale;
|
||||
}
|
||||
|
||||
final class SherpaOnnxOfflineTtsModelConfig extends Struct {
|
||||
external SherpaOnnxOfflineTtsVitsModelConfig vits;
|
||||
@Int32()
|
||||
@@ -157,6 +167,7 @@ final class SherpaOnnxOfflineTtsModelConfig extends Struct {
|
||||
|
||||
external Pointer<Utf8> provider;
|
||||
external SherpaOnnxOfflineTtsMatchaModelConfig matcha;
|
||||
external SherpaOnnxOfflineTtsKokoroModelConfig kokoro;
|
||||
}
|
||||
|
||||
final class SherpaOnnxOfflineTtsConfig extends Struct {
|
||||
|
||||
@@ -60,10 +60,32 @@ class OfflineTtsMatchaModelConfig {
|
||||
final String dictDir;
|
||||
}
|
||||
|
||||
class OfflineTtsKokoroModelConfig {
|
||||
const OfflineTtsKokoroModelConfig({
|
||||
this.model = '',
|
||||
this.voices = '',
|
||||
this.tokens = '',
|
||||
this.dataDir = '',
|
||||
this.lengthScale = 1.0,
|
||||
});
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
return 'OfflineTtsKokoroModelConfig(model: $model, voices: $voices, tokens: $tokens, dataDir: $dataDir, lengthScale: $lengthScale)';
|
||||
}
|
||||
|
||||
final String model;
|
||||
final String voices;
|
||||
final String tokens;
|
||||
final String dataDir;
|
||||
final double lengthScale;
|
||||
}
|
||||
|
||||
class OfflineTtsModelConfig {
|
||||
const OfflineTtsModelConfig({
|
||||
this.vits = const OfflineTtsVitsModelConfig(),
|
||||
this.matcha = const OfflineTtsMatchaModelConfig(),
|
||||
this.kokoro = const OfflineTtsKokoroModelConfig(),
|
||||
this.numThreads = 1,
|
||||
this.debug = true,
|
||||
this.provider = 'cpu',
|
||||
@@ -71,11 +93,12 @@ class OfflineTtsModelConfig {
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
return 'OfflineTtsModelConfig(vits: $vits, matcha: $matcha, numThreads: $numThreads, debug: $debug, provider: $provider)';
|
||||
return 'OfflineTtsModelConfig(vits: $vits, matcha: $matcha, kokoro: $kokoro, numThreads: $numThreads, debug: $debug, provider: $provider)';
|
||||
}
|
||||
|
||||
final OfflineTtsVitsModelConfig vits;
|
||||
final OfflineTtsMatchaModelConfig matcha;
|
||||
final OfflineTtsKokoroModelConfig kokoro;
|
||||
final int numThreads;
|
||||
final bool debug;
|
||||
final String provider;
|
||||
@@ -138,6 +161,12 @@ class OfflineTts {
|
||||
c.ref.model.matcha.lengthScale = config.model.matcha.lengthScale;
|
||||
c.ref.model.matcha.dictDir = config.model.matcha.dictDir.toNativeUtf8();
|
||||
|
||||
c.ref.model.kokoro.model = config.model.kokoro.model.toNativeUtf8();
|
||||
c.ref.model.kokoro.voices = config.model.kokoro.voices.toNativeUtf8();
|
||||
c.ref.model.kokoro.tokens = config.model.kokoro.tokens.toNativeUtf8();
|
||||
c.ref.model.kokoro.dataDir = config.model.kokoro.dataDir.toNativeUtf8();
|
||||
c.ref.model.kokoro.lengthScale = config.model.kokoro.lengthScale;
|
||||
|
||||
c.ref.model.numThreads = config.model.numThreads;
|
||||
c.ref.model.debug = config.model.debug ? 1 : 0;
|
||||
c.ref.model.provider = config.model.provider.toNativeUtf8();
|
||||
@@ -151,12 +180,19 @@ class OfflineTts {
|
||||
calloc.free(c.ref.ruleFars);
|
||||
calloc.free(c.ref.ruleFsts);
|
||||
calloc.free(c.ref.model.provider);
|
||||
|
||||
calloc.free(c.ref.model.kokoro.dataDir);
|
||||
calloc.free(c.ref.model.kokoro.tokens);
|
||||
calloc.free(c.ref.model.kokoro.voices);
|
||||
calloc.free(c.ref.model.kokoro.model);
|
||||
|
||||
calloc.free(c.ref.model.matcha.dictDir);
|
||||
calloc.free(c.ref.model.matcha.dataDir);
|
||||
calloc.free(c.ref.model.matcha.tokens);
|
||||
calloc.free(c.ref.model.matcha.lexicon);
|
||||
calloc.free(c.ref.model.matcha.vocoder);
|
||||
calloc.free(c.ref.model.matcha.acousticModel);
|
||||
|
||||
calloc.free(c.ref.model.vits.dictDir);
|
||||
calloc.free(c.ref.model.vits.dataDir);
|
||||
calloc.free(c.ref.model.vits.tokens);
|
||||
|
||||
Reference in New Issue
Block a user