Add TTS API and examples for Dart (#1010)
This commit is contained in:
@@ -8,6 +8,7 @@ export 'src/offline_stream.dart';
|
||||
export 'src/online_recognizer.dart';
|
||||
export 'src/online_stream.dart';
|
||||
export 'src/speaker_identification.dart';
|
||||
export 'src/tts.dart';
|
||||
export 'src/vad.dart';
|
||||
export 'src/wave_reader.dart';
|
||||
export 'src/wave_writer.dart';
|
||||
|
||||
@@ -2,6 +2,55 @@
|
||||
import 'dart:ffi';
|
||||
import 'package:ffi/ffi.dart';
|
||||
|
||||
final class SherpaOnnxOfflineTtsVitsModelConfig extends Struct {
|
||||
external Pointer<Utf8> model;
|
||||
external Pointer<Utf8> lexicon;
|
||||
external Pointer<Utf8> tokens;
|
||||
external Pointer<Utf8> dataDir;
|
||||
|
||||
@Float()
|
||||
external double noiseScale;
|
||||
|
||||
@Float()
|
||||
external double noiseScaleW;
|
||||
|
||||
@Float()
|
||||
external double lengthScale;
|
||||
|
||||
external Pointer<Utf8> dictDir;
|
||||
}
|
||||
|
||||
final class SherpaOnnxOfflineTtsModelConfig extends Struct {
|
||||
external SherpaOnnxOfflineTtsVitsModelConfig vits;
|
||||
@Int32()
|
||||
external int numThreads;
|
||||
|
||||
@Int32()
|
||||
external int debug;
|
||||
|
||||
external Pointer<Utf8> provider;
|
||||
}
|
||||
|
||||
final class SherpaOnnxOfflineTtsConfig extends Struct {
|
||||
external SherpaOnnxOfflineTtsModelConfig model;
|
||||
external Pointer<Utf8> ruleFsts;
|
||||
|
||||
@Int32()
|
||||
external int maxNumSenetences;
|
||||
|
||||
external Pointer<Utf8> ruleFars;
|
||||
}
|
||||
|
||||
final class SherpaOnnxGeneratedAudio extends Struct {
|
||||
external Pointer<Float> samples;
|
||||
|
||||
@Int32()
|
||||
external int n;
|
||||
|
||||
@Int32()
|
||||
external int sampleRate;
|
||||
}
|
||||
|
||||
final class SherpaOnnxFeatureConfig extends Struct {
|
||||
@Int32()
|
||||
external int sampleRate;
|
||||
@@ -218,6 +267,8 @@ final class SherpaOnnxSpeakerEmbeddingExtractorConfig extends Struct {
|
||||
external Pointer<Utf8> provider;
|
||||
}
|
||||
|
||||
final class SherpaOnnxOfflineTts extends Opaque {}
|
||||
|
||||
final class SherpaOnnxCircularBuffer extends Opaque {}
|
||||
|
||||
final class SherpaOnnxVoiceActivityDetector extends Opaque {}
|
||||
@@ -234,6 +285,60 @@ final class SherpaOnnxSpeakerEmbeddingExtractor extends Opaque {}
|
||||
|
||||
final class SherpaOnnxSpeakerEmbeddingManager extends Opaque {}
|
||||
|
||||
typedef SherpaOnnxCreateOfflineTtsNative = Pointer<SherpaOnnxOfflineTts>
|
||||
Function(Pointer<SherpaOnnxOfflineTtsConfig>);
|
||||
|
||||
typedef SherpaOnnxCreateOfflineTts = SherpaOnnxCreateOfflineTtsNative;
|
||||
|
||||
typedef SherpaOnnxDestroyOfflineTtsNative = Void Function(
|
||||
Pointer<SherpaOnnxOfflineTts>);
|
||||
|
||||
typedef SherpaOnnxDestroyOfflineTts = void Function(
|
||||
Pointer<SherpaOnnxOfflineTts>);
|
||||
|
||||
typedef SherpaOnnxOfflineTtsSampleRateNative = Int32 Function(
|
||||
Pointer<SherpaOnnxOfflineTts>);
|
||||
|
||||
typedef SherpaOnnxOfflineTtsSampleRate = int Function(
|
||||
Pointer<SherpaOnnxOfflineTts>);
|
||||
|
||||
typedef SherpaOnnxOfflineTtsNumSpeakersNative = Int32 Function(
|
||||
Pointer<SherpaOnnxOfflineTts>);
|
||||
|
||||
typedef SherpaOnnxOfflineTtsNumSpeakers = int Function(
|
||||
Pointer<SherpaOnnxOfflineTts>);
|
||||
|
||||
typedef SherpaOnnxOfflineTtsGenerateNative = Pointer<SherpaOnnxGeneratedAudio>
|
||||
Function(Pointer<SherpaOnnxOfflineTts>, Pointer<Utf8>, Int32, Float);
|
||||
|
||||
typedef SherpaOnnxOfflineTtsGenerate = Pointer<SherpaOnnxGeneratedAudio>
|
||||
Function(Pointer<SherpaOnnxOfflineTts>, Pointer<Utf8>, int, double);
|
||||
|
||||
typedef SherpaOnnxDestroyOfflineTtsGeneratedAudioNative = Void Function(
|
||||
Pointer<SherpaOnnxGeneratedAudio>);
|
||||
|
||||
typedef SherpaOnnxDestroyOfflineTtsGeneratedAudio = void Function(
|
||||
Pointer<SherpaOnnxGeneratedAudio>);
|
||||
|
||||
typedef SherpaOnnxGeneratedAudioCallbackNative = Void Function(
|
||||
Pointer<Float>, Int32);
|
||||
|
||||
typedef SherpaOnnxOfflineTtsGenerateWithCallbackNative
|
||||
= Pointer<SherpaOnnxGeneratedAudio> Function(
|
||||
Pointer<SherpaOnnxOfflineTts>,
|
||||
Pointer<Utf8>,
|
||||
Int32,
|
||||
Float,
|
||||
Pointer<NativeFunction<SherpaOnnxGeneratedAudioCallbackNative>>);
|
||||
|
||||
typedef SherpaOnnxOfflineTtsGenerateWithCallback
|
||||
= Pointer<SherpaOnnxGeneratedAudio> Function(
|
||||
Pointer<SherpaOnnxOfflineTts>,
|
||||
Pointer<Utf8>,
|
||||
int,
|
||||
double,
|
||||
Pointer<NativeFunction<SherpaOnnxGeneratedAudioCallbackNative>>);
|
||||
|
||||
typedef CreateOfflineRecognizerNative = Pointer<SherpaOnnxOfflineRecognizer>
|
||||
Function(Pointer<SherpaOnnxOfflineRecognizerConfig>);
|
||||
|
||||
@@ -608,6 +713,16 @@ typedef SherpaOnnxFreeWaveNative = Void Function(Pointer<SherpaOnnxWave>);
|
||||
typedef SherpaOnnxFreeWave = void Function(Pointer<SherpaOnnxWave>);
|
||||
|
||||
class SherpaOnnxBindings {
|
||||
static SherpaOnnxCreateOfflineTts? createOfflineTts;
|
||||
static SherpaOnnxDestroyOfflineTts? destroyOfflineTts;
|
||||
static SherpaOnnxOfflineTtsSampleRate? offlineTtsSampleRate;
|
||||
static SherpaOnnxOfflineTtsNumSpeakers? offlineTtsNumSpeakers;
|
||||
static SherpaOnnxOfflineTtsGenerate? offlineTtsGenerate;
|
||||
static SherpaOnnxDestroyOfflineTtsGeneratedAudio?
|
||||
destroyOfflineTtsGeneratedAudio;
|
||||
static SherpaOnnxOfflineTtsGenerateWithCallback?
|
||||
offlineTtsGenerateWithCallback;
|
||||
|
||||
static CreateOfflineRecognizer? createOfflineRecognizer;
|
||||
static DestroyOfflineRecognizer? destroyOfflineRecognizer;
|
||||
static CreateOfflineStream? createOfflineStream;
|
||||
@@ -740,6 +855,43 @@ class SherpaOnnxBindings {
|
||||
static SherpaOnnxFreeWave? freeWave;
|
||||
|
||||
static void init(DynamicLibrary dynamicLibrary) {
|
||||
createOfflineTts ??= dynamicLibrary
|
||||
.lookup<NativeFunction<SherpaOnnxCreateOfflineTtsNative>>(
|
||||
'SherpaOnnxCreateOfflineTts')
|
||||
.asFunction();
|
||||
|
||||
destroyOfflineTts ??= dynamicLibrary
|
||||
.lookup<NativeFunction<SherpaOnnxDestroyOfflineTtsNative>>(
|
||||
'SherpaOnnxDestroyOfflineTts')
|
||||
.asFunction();
|
||||
|
||||
offlineTtsSampleRate ??= dynamicLibrary
|
||||
.lookup<NativeFunction<SherpaOnnxOfflineTtsSampleRateNative>>(
|
||||
'SherpaOnnxOfflineTtsSampleRate')
|
||||
.asFunction();
|
||||
|
||||
offlineTtsNumSpeakers ??= dynamicLibrary
|
||||
.lookup<NativeFunction<SherpaOnnxOfflineTtsNumSpeakersNative>>(
|
||||
'SherpaOnnxOfflineTtsNumSpeakers')
|
||||
.asFunction();
|
||||
|
||||
offlineTtsGenerate ??= dynamicLibrary
|
||||
.lookup<NativeFunction<SherpaOnnxOfflineTtsGenerateNative>>(
|
||||
'SherpaOnnxOfflineTtsGenerate')
|
||||
.asFunction();
|
||||
|
||||
destroyOfflineTtsGeneratedAudio ??= dynamicLibrary
|
||||
.lookup<
|
||||
NativeFunction<
|
||||
SherpaOnnxDestroyOfflineTtsGeneratedAudioNative>>(
|
||||
'SherpaOnnxDestroyOfflineTtsGeneratedAudio')
|
||||
.asFunction();
|
||||
|
||||
offlineTtsGenerateWithCallback ??= dynamicLibrary
|
||||
.lookup<NativeFunction<SherpaOnnxOfflineTtsGenerateWithCallbackNative>>(
|
||||
'SherpaOnnxOfflineTtsGenerateWithCallback')
|
||||
.asFunction();
|
||||
|
||||
createOfflineRecognizer ??= dynamicLibrary
|
||||
.lookup<NativeFunction<CreateOfflineRecognizerNative>>(
|
||||
'CreateOfflineRecognizer')
|
||||
|
||||
195
sherpa-onnx/flutter/lib/src/tts.dart
Normal file
195
sherpa-onnx/flutter/lib/src/tts.dart
Normal file
@@ -0,0 +1,195 @@
|
||||
// Copyright (c) 2024 Xiaomi Corporation
|
||||
import 'dart:convert';
|
||||
import 'dart:ffi';
|
||||
import 'dart:typed_data';
|
||||
|
||||
import 'package:ffi/ffi.dart';
|
||||
|
||||
import './sherpa_onnx_bindings.dart';
|
||||
|
||||
class OfflineTtsVitsModelConfig {
|
||||
const OfflineTtsVitsModelConfig({
|
||||
required this.model,
|
||||
this.lexicon = '',
|
||||
required this.tokens,
|
||||
this.dataDir = '',
|
||||
this.noiseScale = 0.667,
|
||||
this.noiseScaleW = 0.8,
|
||||
this.lengthScale = 1.0,
|
||||
this.dictDir = '',
|
||||
});
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
return 'OfflineTtsVitsModelConfig(model: $model, lexicon: $lexicon, tokens: $tokens, dataDir: $dataDir, noiseScale: $noiseScale, noiseScaleW: $noiseScaleW, lengthScale: $lengthScale, dictDir: $dictDir)';
|
||||
}
|
||||
|
||||
final String model;
|
||||
final String lexicon;
|
||||
final String tokens;
|
||||
final String dataDir;
|
||||
final double noiseScale;
|
||||
final double noiseScaleW;
|
||||
final double lengthScale;
|
||||
final String dictDir;
|
||||
}
|
||||
|
||||
class OfflineTtsModelConfig {
|
||||
const OfflineTtsModelConfig({
|
||||
required this.vits,
|
||||
this.numThreads = 1,
|
||||
this.debug = true,
|
||||
this.provider = 'cpu',
|
||||
});
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
return 'OfflineTtsModelConfig(vits: $vits, numThreads: $numThreads, debug: $debug, provider: $provider)';
|
||||
}
|
||||
|
||||
final OfflineTtsVitsModelConfig vits;
|
||||
final int numThreads;
|
||||
final bool debug;
|
||||
final String provider;
|
||||
}
|
||||
|
||||
class OfflineTtsConfig {
|
||||
const OfflineTtsConfig({
|
||||
required this.model,
|
||||
this.ruleFsts = '',
|
||||
this.maxNumSenetences = 1,
|
||||
this.ruleFars = '',
|
||||
});
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
return 'OfflineTtsConfig(model: $model, ruleFsts: $ruleFsts, maxNumSenetences: $maxNumSenetences, ruleFars: $ruleFars)';
|
||||
}
|
||||
|
||||
final OfflineTtsModelConfig model;
|
||||
final String ruleFsts;
|
||||
final int maxNumSenetences;
|
||||
final String ruleFars;
|
||||
}
|
||||
|
||||
class GeneratedAudio {
|
||||
GeneratedAudio({
|
||||
required this.samples,
|
||||
required this.sampleRate,
|
||||
});
|
||||
|
||||
final Float32List samples;
|
||||
final int sampleRate;
|
||||
}
|
||||
|
||||
class OfflineTts {
|
||||
OfflineTts._({required this.ptr, required this.config});
|
||||
|
||||
/// The user is responsible to call the OfflineTts.free()
|
||||
/// method of the returned instance to avoid memory leak.
|
||||
factory OfflineTts(OfflineTtsConfig config) {
|
||||
final c = calloc<SherpaOnnxOfflineTtsConfig>();
|
||||
c.ref.model.vits.model = config.model.vits.model.toNativeUtf8();
|
||||
c.ref.model.vits.lexicon = config.model.vits.lexicon.toNativeUtf8();
|
||||
c.ref.model.vits.tokens = config.model.vits.tokens.toNativeUtf8();
|
||||
c.ref.model.vits.dataDir = config.model.vits.dataDir.toNativeUtf8();
|
||||
c.ref.model.vits.noiseScale = config.model.vits.noiseScale;
|
||||
c.ref.model.vits.noiseScaleW = config.model.vits.noiseScaleW;
|
||||
c.ref.model.vits.lengthScale = config.model.vits.lengthScale;
|
||||
c.ref.model.vits.dictDir = config.model.vits.dictDir.toNativeUtf8();
|
||||
|
||||
c.ref.model.numThreads = config.model.numThreads;
|
||||
c.ref.model.debug = config.model.debug ? 1 : 0;
|
||||
c.ref.model.provider = config.model.provider.toNativeUtf8();
|
||||
|
||||
c.ref.ruleFsts = config.ruleFsts.toNativeUtf8();
|
||||
c.ref.maxNumSenetences = config.maxNumSenetences;
|
||||
c.ref.ruleFars = config.ruleFars.toNativeUtf8();
|
||||
|
||||
final ptr = SherpaOnnxBindings.createOfflineTts?.call(c) ?? nullptr;
|
||||
|
||||
calloc.free(c.ref.ruleFars);
|
||||
calloc.free(c.ref.ruleFsts);
|
||||
calloc.free(c.ref.model.provider);
|
||||
calloc.free(c.ref.model.vits.dictDir);
|
||||
calloc.free(c.ref.model.vits.dataDir);
|
||||
calloc.free(c.ref.model.vits.tokens);
|
||||
calloc.free(c.ref.model.vits.lexicon);
|
||||
calloc.free(c.ref.model.vits.model);
|
||||
|
||||
return OfflineTts._(ptr: ptr, config: config);
|
||||
}
|
||||
|
||||
void free() {
|
||||
SherpaOnnxBindings.destroyOfflineTts?.call(ptr);
|
||||
ptr = nullptr;
|
||||
}
|
||||
|
||||
GeneratedAudio generate(
|
||||
{required String text, int sid = 0, double speed = 1.0}) {
|
||||
final Pointer<Utf8> textPtr = text.toNativeUtf8();
|
||||
final p =
|
||||
SherpaOnnxBindings.offlineTtsGenerate?.call(ptr, textPtr, sid, speed) ??
|
||||
nullptr;
|
||||
calloc.free(textPtr);
|
||||
|
||||
if (p == nullptr) {
|
||||
return GeneratedAudio(samples: Float32List(0), sampleRate: 0);
|
||||
}
|
||||
|
||||
final samples = p.ref.samples.asTypedList(p.ref.n);
|
||||
final sampleRate = p.ref.sampleRate;
|
||||
final newSamples = Float32List.fromList(samples);
|
||||
|
||||
SherpaOnnxBindings.destroyOfflineTtsGeneratedAudio?.call(p);
|
||||
|
||||
return GeneratedAudio(samples: newSamples, sampleRate: sampleRate);
|
||||
}
|
||||
|
||||
GeneratedAudio generateWithCallback(
|
||||
{required String text,
|
||||
int sid = 0,
|
||||
double speed = 1.0,
|
||||
required void Function(Float32List samples) callback}) {
|
||||
// see
|
||||
// https://github.com/dart-lang/sdk/issues/54276#issuecomment-1846109285
|
||||
// https://stackoverflow.com/questions/69537440/callbacks-in-dart-dartffi-only-supports-calling-static-dart-functions-from-nat
|
||||
// https://github.com/dart-lang/sdk/blob/main/tests/ffi/isolate_local_function_callbacks_test.dart#L46
|
||||
final wrapper =
|
||||
NativeCallable<SherpaOnnxGeneratedAudioCallbackNative>.isolateLocal(
|
||||
(Pointer<Float> samples, int n) {
|
||||
final s = samples.asTypedList(n);
|
||||
final newSamples = Float32List.fromList(s);
|
||||
callback(newSamples);
|
||||
});
|
||||
|
||||
final Pointer<Utf8> textPtr = text.toNativeUtf8();
|
||||
final p = SherpaOnnxBindings.offlineTtsGenerateWithCallback
|
||||
?.call(ptr, textPtr, sid, speed, wrapper.nativeFunction) ??
|
||||
nullptr;
|
||||
|
||||
calloc.free(textPtr);
|
||||
wrapper.close();
|
||||
|
||||
if (p == nullptr) {
|
||||
return GeneratedAudio(samples: Float32List(0), sampleRate: 0);
|
||||
}
|
||||
|
||||
final samples = p.ref.samples.asTypedList(p.ref.n);
|
||||
final sampleRate = p.ref.sampleRate;
|
||||
final newSamples = Float32List.fromList(samples);
|
||||
|
||||
SherpaOnnxBindings.destroyOfflineTtsGeneratedAudio?.call(p);
|
||||
|
||||
return GeneratedAudio(samples: newSamples, sampleRate: sampleRate);
|
||||
}
|
||||
|
||||
int get sampleRate =>
|
||||
SherpaOnnxBindings.offlineTtsSampleRate?.call(this.ptr) ?? 0;
|
||||
|
||||
int get numSpeakers =>
|
||||
SherpaOnnxBindings.offlineTtsNumSpeakers?.call(this.ptr) ?? 0;
|
||||
|
||||
Pointer<SherpaOnnxOfflineTts> ptr;
|
||||
OfflineTtsConfig config;
|
||||
}
|
||||
Reference in New Issue
Block a user