Add TTS API and examples for Dart (#1010)

This commit is contained in:
Fangjun Kuang
2024-06-15 14:30:36 +08:00
committed by GitHub
parent e3077670c6
commit e52d32b95b
20 changed files with 874 additions and 0 deletions

View File

@@ -1,6 +1,7 @@
# Introduction
This folder contains examples for streaming ASR with Dart API.
| File | Description|
|------|------------|
|[./bin/nemo-transducer.dart](./bin/nemo-transducer.dart)| Use a NeMo transducer model for speech recognition. See [./run-nemo-transducer.sh](./run-nemo-transducer.sh)|

3
dart-api-examples/tts/.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
# https://dart.dev/guides/libraries/private-files
# Created by `dart pub`
.dart_tool/

View File

@@ -0,0 +1,3 @@
## 1.0.0
- Initial version.

View File

@@ -0,0 +1,10 @@
# Introduction
This folder contains examples for text to speech with Dart API.
| File | Description|
|------|------------|
|[./bin/piper.dart](./bin/piper.dart)| Use a Piper tts model for text to speech. See [./run-piper.sh](./run-piper.sh)|
|[./bin/coqui.dart](./bin/coqui.dart)| Use a Coqui tts model for text to speech. See [./run-coqui.sh](./run-coqui.sh)|
|[./bin/zh.dart](./bin/zh.dart)| Use a Chinese VITS tts model for text to speech. See [./run-zh.sh](./run-zh.sh)|

View File

@@ -0,0 +1,30 @@
# This file configures the static analysis results for your project (errors,
# warnings, and lints).
#
# This enables the 'recommended' set of lints from `package:lints`.
# This set helps identify many issues that may lead to problems when running
# or consuming Dart code, and enforces writing Dart using a single, idiomatic
# style and format.
#
# If you want a smaller set of lints you can change this to specify
# 'package:lints/core.yaml'. These are just the most critical lints
# (the recommended set includes the core lints).
# The core lints are also what is used by pub.dev for scoring packages.
include: package:lints/recommended.yaml
# Uncomment the following section to specify additional rules.
# linter:
# rules:
# - camel_case_types
# analyzer:
# exclude:
# - path/to/excluded/files/**
# For more information about the core and recommended set of lints, see
# https://dart.dev/go/core-lints
# For additional information about configuring this file, see
# https://dart.dev/guides/language/analysis-options

View File

@@ -0,0 +1,69 @@
// Copyright (c) 2024 Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';
import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
import './init.dart';
void main(List<String> arguments) async {
await initSherpaOnnx();
final parser = ArgParser()
..addOption('model', help: 'Path to the ONNX model')
..addOption('tokens', help: 'Path to tokens.txt')
..addOption('text', help: 'Text to generate TTS for')
..addOption('output-wav', help: 'Filename to save the generated audio')
..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
..addOption(
'sid',
help: 'Speaker ID to select. Used only for multi-speaker TTS',
defaultsTo: '0',
);
final res = parser.parse(arguments);
if (res['model'] == null ||
res['tokens'] == null ||
res['output-wav'] == null ||
res['text'] == null) {
print(parser.usage);
exit(1);
}
final model = res['model'] as String;
final tokens = res['tokens'] as String;
final text = res['text'] as String;
final outputWav = res['output-wav'] as String;
var speed = double.tryParse(res['speed'] as String) ?? 1.0;
final sid = int.tryParse(res['sid'] as String) ?? 0;
if (speed == 0) {
speed = 1.0;
}
final vits = sherpa_onnx.OfflineTtsVitsModelConfig(
model: model,
tokens: tokens,
lengthScale: 1 / speed,
);
final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
vits: vits,
numThreads: 1,
debug: true,
);
final config = sherpa_onnx.OfflineTtsConfig(
model: modelConfig,
maxNumSenetences: 1,
);
final tts = sherpa_onnx.OfflineTts(config);
final audio = tts.generate(text: text, sid: sid, speed: speed);
tts.free();
sherpa_onnx.writeWave(
filename: outputWav,
samples: audio.samples,
sampleRate: audio.sampleRate,
);
print('Saved to ${outputWav}');
}

View File

@@ -0,0 +1 @@
../../vad/bin/init.dart

View File

@@ -0,0 +1,80 @@
// Copyright (c) 2024 Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';
import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
import './init.dart';
void main(List<String> arguments) async {
await initSherpaOnnx();
final parser = ArgParser()
..addOption('model', help: 'Path to the ONNX model')
..addOption('tokens', help: 'Path to tokens.txt')
..addOption('data-dir', help: 'Path to espeak-ng-data directory')
..addOption('text', help: 'Text to generate TTS for')
..addOption('output-wav', help: 'Filename to save the generated audio')
..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
..addOption(
'sid',
help: 'Speaker ID to select. Used only for multi-speaker TTS',
defaultsTo: '0',
);
final res = parser.parse(arguments);
if (res['model'] == null ||
res['tokens'] == null ||
res['data-dir'] == null ||
res['output-wav'] == null ||
res['text'] == null) {
print(parser.usage);
exit(1);
}
final model = res['model'] as String;
final tokens = res['tokens'] as String;
final dataDir = res['data-dir'] as String;
final text = res['text'] as String;
final outputWav = res['output-wav'] as String;
var speed = double.tryParse(res['speed'] as String) ?? 1.0;
final sid = int.tryParse(res['sid'] as String) ?? 0;
if (speed == 0) {
speed = 1.0;
}
final vits = sherpa_onnx.OfflineTtsVitsModelConfig(
model: model,
tokens: tokens,
dataDir: dataDir,
lengthScale: 1 / speed,
);
final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
vits: vits,
numThreads: 1,
debug: true,
);
final config = sherpa_onnx.OfflineTtsConfig(
model: modelConfig,
maxNumSenetences: 1,
);
final tts = sherpa_onnx.OfflineTts(config);
final audio = tts.generateWithCallback(
text: text,
sid: sid,
speed: speed,
callback: (Float32List samples) {
print('${samples.length} samples received');
// You can play samples in a separate thread/isolate
});
tts.free();
sherpa_onnx.writeWave(
filename: outputWav,
samples: audio.samples,
sampleRate: audio.sampleRate,
);
print('Saved to ${outputWav}');
}

View File

@@ -0,0 +1,86 @@
// Copyright (c) 2024 Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';
import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
import './init.dart';
void main(List<String> arguments) async {
await initSherpaOnnx();
final parser = ArgParser()
..addOption('model', help: 'Path to the ONNX model')
..addOption('tokens', help: 'Path to tokens.txt')
..addOption('lexicon', help: 'Path to lexicon.txt')
..addOption(
'dict-dir',
help: 'Path to jieba dict directory',
defaultsTo: '',
)
..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '')
..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '')
..addOption('text', help: 'Text to generate TTS for')
..addOption('output-wav', help: 'Filename to save the generated audio')
..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
..addOption(
'sid',
help: 'Speaker ID to select. Used only for multi-speaker TTS',
defaultsTo: '0',
);
final res = parser.parse(arguments);
if (res['model'] == null ||
res['lexicon'] == null ||
res['tokens'] == null ||
res['output-wav'] == null ||
res['text'] == null) {
print(parser.usage);
exit(1);
}
final model = res['model'] as String;
final lexicon = res['lexicon'] as String;
final tokens = res['tokens'] as String;
final dictDir = res['dict-dir'] as String;
final ruleFsts = res['rule-fsts'] as String;
final ruleFars = res['rule-fars'] as String;
final text = res['text'] as String;
final outputWav = res['output-wav'] as String;
var speed = double.tryParse(res['speed'] as String) ?? 1.0;
final sid = int.tryParse(res['sid'] as String) ?? 0;
if (speed == 0) {
speed = 1.0;
}
final vits = sherpa_onnx.OfflineTtsVitsModelConfig(
model: model,
lexicon: lexicon,
tokens: tokens,
dictDir: dictDir,
lengthScale: 1 / speed,
);
final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
vits: vits,
numThreads: 1,
debug: true,
);
final config = sherpa_onnx.OfflineTtsConfig(
model: modelConfig,
maxNumSenetences: 1,
ruleFsts: ruleFsts,
ruleFars: ruleFars,
);
final tts = sherpa_onnx.OfflineTts(config);
final audio = tts.generate(text: text, sid: sid, speed: speed);
tts.free();
sherpa_onnx.writeWave(
filename: outputWav,
samples: audio.samples,
sampleRate: audio.sampleRate,
);
print('Saved to ${outputWav}');
}

View File

@@ -0,0 +1,96 @@
# Generated by pub
# See https://dart.dev/tools/pub/glossary#lockfile
packages:
args:
dependency: "direct main"
description:
name: args
sha256: "7cf60b9f0cc88203c5a190b4cd62a99feea42759a7fa695010eb5de1c0b2252a"
url: "https://pub.dev"
source: hosted
version: "2.5.0"
characters:
dependency: transitive
description:
name: characters
sha256: "04a925763edad70e8443c99234dc3328f442e811f1d8fd1a72f1c8ad0f69a605"
url: "https://pub.dev"
source: hosted
version: "1.3.0"
collection:
dependency: transitive
description:
name: collection
sha256: ee67cb0715911d28db6bf4af1026078bd6f0128b07a5f66fb2ed94ec6783c09a
url: "https://pub.dev"
source: hosted
version: "1.18.0"
ffi:
dependency: transitive
description:
name: ffi
sha256: "493f37e7df1804778ff3a53bd691d8692ddf69702cf4c1c1096a2e41b4779e21"
url: "https://pub.dev"
source: hosted
version: "2.1.2"
flutter:
dependency: transitive
description: flutter
source: sdk
version: "0.0.0"
lints:
dependency: "direct dev"
description:
name: lints
sha256: cbf8d4b858bb0134ef3ef87841abdf8d63bfc255c266b7bf6b39daa1085c4290
url: "https://pub.dev"
source: hosted
version: "3.0.0"
material_color_utilities:
dependency: transitive
description:
name: material_color_utilities
sha256: "0e0a020085b65b6083975e499759762399b4475f766c21668c4ecca34ea74e5a"
url: "https://pub.dev"
source: hosted
version: "0.8.0"
meta:
dependency: transitive
description:
name: meta
sha256: "7687075e408b093f36e6bbf6c91878cc0d4cd10f409506f7bc996f68220b9136"
url: "https://pub.dev"
source: hosted
version: "1.12.0"
path:
dependency: "direct main"
description:
name: path
sha256: "087ce49c3f0dc39180befefc60fdb4acd8f8620e5682fe2476afd0b3688bb4af"
url: "https://pub.dev"
source: hosted
version: "1.9.0"
sherpa_onnx:
dependency: "direct main"
description:
name: sherpa_onnx
sha256: e45894f81e7c854ca96d678bcab5303036e884a7c90e9a6c4ec04c7b1ee215a8
url: "https://pub.dev"
source: hosted
version: "1.9.29"
sky_engine:
dependency: transitive
description: flutter
source: sdk
version: "0.0.99"
vector_math:
dependency: transitive
description:
name: vector_math
sha256: "80b3257d1492ce4d091729e3a67a60407d227c27241d6927be0130c98e741803"
url: "https://pub.dev"
source: hosted
version: "2.1.4"
sdks:
dart: ">=3.4.0 <4.0.0"
flutter: ">=3.3.0"

View File

@@ -0,0 +1,16 @@
name: tts
description: A sample command-line application.
version: 1.0.0
# repository: https://github.com/my_org/my_repo
environment:
sdk: ^3.4.0
# Add regular dependencies here.
dependencies:
sherpa_onnx: ^1.9.29
path: ^1.9.0
args: ^2.5.0
dev_dependencies:
lints: ^3.0.0

View File

@@ -0,0 +1,28 @@
#!/usr/bin/env bash
set -ex
dart pub get
# Please visit
# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
# to download more models
if [[ ! -f ./vits-coqui-de-css10/tokens.txt ]]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2
tar xvf vits-coqui-de-css10.tar.bz2
rm vits-coqui-de-css10.tar.bz2
fi
# It is a character-based TTS model, so there is no need to use a lexicon
dart run \
./bin/coqui.dart \
--model ./vits-coqui-de-css10/model.onnx \
--tokens ./vits-coqui-de-css10/tokens.txt \
--sid 0 \
--speed 0.7 \
--text 'Alles hat ein Ende, nur die Wurst hat zwei.' \
--output-wav coqui-0.wav
ls -lh *.wav

View File

@@ -0,0 +1,28 @@
#!/usr/bin/env bash
set -ex
dart pub get
# Please visit
# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
# to download more models
if [[ ! -f ./vits-piper-en_US-libritts_r-medium/tokens.txt ]]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-libritts_r-medium.tar.bz2
tar xf vits-piper-en_US-libritts_r-medium.tar.bz2
rm vits-piper-en_US-libritts_r-medium.tar.bz2
fi
dart run \
./bin/piper.dart \
--model ./vits-piper-en_US-libritts_r-medium/en_US-libritts_r-medium.onnx \
--tokens ./vits-piper-en_US-libritts_r-medium/tokens.txt \
--data-dir ./vits-piper-en_US-libritts_r-medium/espeak-ng-data \
--sid 351 \
--speed 1.0 \
--text 'How are you doing? This is a speech to text example, using next generation kaldi with piper.' \
--output-wav piper-351.wav
ls -lh *.wav

41
dart-api-examples/tts/run-zh.sh Executable file
View File

@@ -0,0 +1,41 @@
#!/usr/bin/env bash
set -ex
dart pub get
# Please visit
# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
# to download more models
if [[ ! -f ./sherpa-onnx-vits-zh-ll/tokens.txt ]]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2
tar xvf sherpa-onnx-vits-zh-ll.tar.bz2
rm sherpa-onnx-vits-zh-ll.tar.bz2
fi
dart run \
./bin/zh.dart \
--model ./sherpa-onnx-vits-zh-ll/model.onnx \
--lexicon ./sherpa-onnx-vits-zh-ll/lexicon.txt \
--tokens ./sherpa-onnx-vits-zh-ll/tokens.txt \
--dict-dir ./sherpa-onnx-vits-zh-ll/dict \
--sid 2 \
--speed 1.0 \
--text '当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。' \
--output-wav zh-jieba-2.wav
dart run \
./bin/zh.dart \
--model ./sherpa-onnx-vits-zh-ll/model.onnx \
--lexicon ./sherpa-onnx-vits-zh-ll/lexicon.txt \
--tokens ./sherpa-onnx-vits-zh-ll/tokens.txt \
--dict-dir ./sherpa-onnx-vits-zh-ll/dict \
--rule-fsts "./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/number.fst" \
--sid 3 \
--speed 1.0 \
--text '今天是2024年6月15号13点23分。如果有困难请拨打110或者18920240511。123456块钱。' \
--output-wav zh-jieba-3.wav
ls -lh *.wav