Add Pascal API for MatchaTTS models. (#1686)
This commit is contained in:
13
.github/workflows/pascal.yaml
vendored
13
.github/workflows/pascal.yaml
vendored
@@ -152,6 +152,19 @@ jobs:
|
|||||||
|
|
||||||
./run-piper.sh
|
./run-piper.sh
|
||||||
rm -rf vits-piper-*
|
rm -rf vits-piper-*
|
||||||
|
rm piper
|
||||||
|
ls -lh
|
||||||
|
echo "---"
|
||||||
|
|
||||||
|
./run-matcha-zh.sh
|
||||||
|
rm -rf matcha-icefall-*
|
||||||
|
rm matcha-zh
|
||||||
|
ls -lh
|
||||||
|
echo "---"
|
||||||
|
|
||||||
|
./run-matcha-en.sh
|
||||||
|
rm -rf matcha-icefall-*
|
||||||
|
rm matcha-en
|
||||||
ls -lh
|
ls -lh
|
||||||
echo "---"
|
echo "---"
|
||||||
|
|
||||||
|
|||||||
4
pascal-api-examples/tts/.gitignore
vendored
4
pascal-api-examples/tts/.gitignore
vendored
@@ -2,3 +2,7 @@
|
|||||||
piper
|
piper
|
||||||
piper-playback
|
piper-playback
|
||||||
link*.res
|
link*.res
|
||||||
|
matcha-zh
|
||||||
|
matcha-en
|
||||||
|
matcha-zh-playback
|
||||||
|
matcha-en-playback
|
||||||
|
|||||||
239
pascal-api-examples/tts/matcha-en-playback.pas
Normal file
239
pascal-api-examples/tts/matcha-en-playback.pas
Normal file
@@ -0,0 +1,239 @@
|
|||||||
|
{ Copyright (c) 2025 Xiaomi Corporation }
|
||||||
|
program matcha_en_playback;
|
||||||
|
{
|
||||||
|
This file shows how to use the text to speech API of sherpa-onnx
|
||||||
|
with Piper models.
|
||||||
|
|
||||||
|
It generates speech from text and saves it to a wave file.
|
||||||
|
|
||||||
|
Note that it plays the audio back as it is still generating.
|
||||||
|
}
|
||||||
|
|
||||||
|
{$mode objfpc}
|
||||||
|
|
||||||
|
uses
|
||||||
|
{$ifdef unix}
|
||||||
|
cthreads,
|
||||||
|
{$endif}
|
||||||
|
SysUtils,
|
||||||
|
dos,
|
||||||
|
ctypes,
|
||||||
|
portaudio,
|
||||||
|
sherpa_onnx;
|
||||||
|
|
||||||
|
var
|
||||||
|
CriticalSection: TRTLCriticalSection;
|
||||||
|
|
||||||
|
Tts: TSherpaOnnxOfflineTts;
|
||||||
|
Audio: TSherpaOnnxGeneratedAudio;
|
||||||
|
Resampler: TSherpaOnnxLinearResampler;
|
||||||
|
|
||||||
|
Text: AnsiString;
|
||||||
|
Speed: Single = 1.0; {Use a larger value to speak faster}
|
||||||
|
SpeakerId: Integer = 0;
|
||||||
|
Buffer: TSherpaOnnxCircularBuffer;
|
||||||
|
FinishedGeneration: Boolean = False;
|
||||||
|
FinishedPlaying: Boolean = False;
|
||||||
|
|
||||||
|
Version: String;
|
||||||
|
EnvStr: String;
|
||||||
|
Status: Integer;
|
||||||
|
NumDevices: Integer;
|
||||||
|
DeviceIndex: Integer;
|
||||||
|
DeviceInfo: PPaDeviceInfo;
|
||||||
|
|
||||||
|
{ If you get EDivByZero: Division by zero error, please change the sample rate
|
||||||
|
to the one supported by your microphone.
|
||||||
|
}
|
||||||
|
DeviceSampleRate: Integer = 48000;
|
||||||
|
I: Integer;
|
||||||
|
Param: TPaStreamParameters;
|
||||||
|
Stream: PPaStream;
|
||||||
|
Wave: TSherpaOnnxWave;
|
||||||
|
|
||||||
|
function GenerateCallback(
|
||||||
|
Samples: pcfloat; N: cint32;
|
||||||
|
Arg: Pointer): cint; cdecl;
|
||||||
|
begin
|
||||||
|
EnterCriticalSection(CriticalSection);
|
||||||
|
try
|
||||||
|
if Resampler <> nil then
|
||||||
|
Buffer.Push(Resampler.Resample(Samples, N, False))
|
||||||
|
else
|
||||||
|
Buffer.Push(Samples, N);
|
||||||
|
finally
|
||||||
|
LeaveCriticalSection(CriticalSection);
|
||||||
|
end;
|
||||||
|
|
||||||
|
{ 1 means to continue generating; 0 means to stop generating. }
|
||||||
|
Result := 1;
|
||||||
|
end;
|
||||||
|
|
||||||
|
function PlayCallback(
|
||||||
|
input: Pointer; output: Pointer;
|
||||||
|
frameCount: culong;
|
||||||
|
timeInfo: PPaStreamCallbackTimeInfo;
|
||||||
|
statusFlags: TPaStreamCallbackFlags;
|
||||||
|
userData: Pointer ): cint; cdecl;
|
||||||
|
var
|
||||||
|
Samples: TSherpaOnnxSamplesArray;
|
||||||
|
I: Integer;
|
||||||
|
begin
|
||||||
|
EnterCriticalSection(CriticalSection);
|
||||||
|
try
|
||||||
|
if Buffer.Size >= frameCount then
|
||||||
|
begin
|
||||||
|
Samples := Buffer.Get(Buffer.Head, FrameCount);
|
||||||
|
Buffer.Pop(FrameCount);
|
||||||
|
end
|
||||||
|
else if Buffer.Size > 0 then
|
||||||
|
begin
|
||||||
|
Samples := Buffer.Get(Buffer.Head, Buffer.Size);
|
||||||
|
Buffer.Pop(Buffer.Size);
|
||||||
|
SetLength(Samples, frameCount);
|
||||||
|
end
|
||||||
|
else
|
||||||
|
SetLength(Samples, frameCount);
|
||||||
|
|
||||||
|
for I := 0 to frameCount - 1 do
|
||||||
|
pcfloat(output)[I] := Samples[I];
|
||||||
|
|
||||||
|
if (Buffer.Size > 0) or (not FinishedGeneration) then
|
||||||
|
Result := paContinue
|
||||||
|
else
|
||||||
|
begin
|
||||||
|
Result := paComplete;
|
||||||
|
FinishedPlaying := True;
|
||||||
|
end;
|
||||||
|
finally
|
||||||
|
LeaveCriticalSection(CriticalSection);
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
function GetOfflineTts: TSherpaOnnxOfflineTts;
|
||||||
|
var
|
||||||
|
Config: TSherpaOnnxOfflineTtsConfig;
|
||||||
|
begin
|
||||||
|
Config.Model.Matcha.AcousticModel := './matcha-icefall-en_US-ljspeech/model-steps-3.onnx';
|
||||||
|
Config.Model.Matcha.Vocoder := './hifigan_v2.onnx';
|
||||||
|
Config.Model.Matcha.Tokens := './matcha-icefall-en_US-ljspeech/tokens.txt';
|
||||||
|
Config.Model.Matcha.DataDir := './matcha-icefall-en_US-ljspeech/espeak-ng-data';
|
||||||
|
Config.Model.NumThreads := 1;
|
||||||
|
Config.Model.Debug := False;
|
||||||
|
Config.MaxNumSentences := 1;
|
||||||
|
|
||||||
|
Result := TSherpaOnnxOfflineTts.Create(Config);
|
||||||
|
end;
|
||||||
|
|
||||||
|
begin
|
||||||
|
Tts := GetOfflineTts;
|
||||||
|
if Tts.GetSampleRate <> DeviceSampleRate then
|
||||||
|
Resampler := TSherpaOnnxLinearResampler.Create(Tts.GetSampleRate, DeviceSampleRate);
|
||||||
|
|
||||||
|
Version := String(Pa_GetVersionText);
|
||||||
|
WriteLn('Version is ', Version);
|
||||||
|
Status := Pa_Initialize;
|
||||||
|
if Status <> paNoError then
|
||||||
|
begin
|
||||||
|
WriteLn('Failed to initialize portaudio, ', Pa_GetErrorText(Status));
|
||||||
|
Exit;
|
||||||
|
end;
|
||||||
|
|
||||||
|
NumDevices := Pa_GetDeviceCount;
|
||||||
|
WriteLn('Num devices: ', NumDevices);
|
||||||
|
|
||||||
|
DeviceIndex := Pa_GetDefaultOutputDevice;
|
||||||
|
|
||||||
|
if DeviceIndex = paNoDevice then
|
||||||
|
begin
|
||||||
|
WriteLn('No default output device found');
|
||||||
|
Pa_Terminate;
|
||||||
|
Exit;
|
||||||
|
end;
|
||||||
|
|
||||||
|
EnvStr := GetEnv('SHERPA_ONNX_MIC_DEVICE');
|
||||||
|
if EnvStr <> '' then
|
||||||
|
begin
|
||||||
|
DeviceIndex := StrToIntDef(EnvStr, DeviceIndex);
|
||||||
|
WriteLn('Use device index from environment variable SHERPA_ONNX_MIC_DEVICE: ', EnvStr);
|
||||||
|
end;
|
||||||
|
|
||||||
|
for I := 0 to (NumDevices - 1) do
|
||||||
|
begin
|
||||||
|
DeviceInfo := Pa_GetDeviceInfo(I);
|
||||||
|
if I = DeviceIndex then
|
||||||
|
{ WriteLn(Format(' * %d %s', [I, DeviceInfo^.Name])) }
|
||||||
|
WriteLn(Format(' * %d %s', [I, AnsiString(DeviceInfo^.Name)]))
|
||||||
|
else
|
||||||
|
WriteLn(Format(' %d %s', [I, AnsiString(DeviceInfo^.Name)]));
|
||||||
|
end;
|
||||||
|
|
||||||
|
WriteLn('Use device ', DeviceIndex);
|
||||||
|
WriteLn(' Name ', Pa_GetDeviceInfo(DeviceIndex)^.Name);
|
||||||
|
WriteLn(' Max output channels ', Pa_GetDeviceInfo(DeviceIndex)^.MaxOutputChannels);
|
||||||
|
|
||||||
|
Initialize(Param);
|
||||||
|
Param.Device := DeviceIndex;
|
||||||
|
Param.ChannelCount := 1;
|
||||||
|
Param.SampleFormat := paFloat32;
|
||||||
|
param.SuggestedLatency := Pa_GetDeviceInfo(DeviceIndex)^.DefaultHighOutputLatency;
|
||||||
|
param.HostApiSpecificStreamInfo := nil;
|
||||||
|
|
||||||
|
Buffer := TSherpaOnnxCircularBuffer.Create(30 * DeviceSampleRate);
|
||||||
|
|
||||||
|
|
||||||
|
{ Note(fangjun): PortAudio invokes PlayCallback in a separate thread. }
|
||||||
|
Status := Pa_OpenStream(stream, nil, @Param, DeviceSampleRate, paFramesPerBufferUnspecified, paNoFlag,
|
||||||
|
PPaStreamCallback(@PlayCallback), nil);
|
||||||
|
|
||||||
|
if Status <> paNoError then
|
||||||
|
begin
|
||||||
|
WriteLn('Failed to open stream, ', Pa_GetErrorText(Status));
|
||||||
|
Pa_Terminate;
|
||||||
|
Exit;
|
||||||
|
end;
|
||||||
|
|
||||||
|
InitCriticalSection(CriticalSection);
|
||||||
|
|
||||||
|
Status := Pa_StartStream(stream);
|
||||||
|
if Status <> paNoError then
|
||||||
|
begin
|
||||||
|
WriteLn('Failed to start stream, ', Pa_GetErrorText(Status));
|
||||||
|
Pa_Terminate;
|
||||||
|
Exit;
|
||||||
|
end;
|
||||||
|
|
||||||
|
WriteLn('There are ', Tts.GetNumSpeakers, ' speakers');
|
||||||
|
|
||||||
|
Text := 'Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone.';
|
||||||
|
|
||||||
|
Audio := Tts.Generate(Text, SpeakerId, Speed,
|
||||||
|
PSherpaOnnxGeneratedAudioCallbackWithArg(@GenerateCallback), nil);
|
||||||
|
FinishedGeneration := True;
|
||||||
|
SherpaOnnxWriteWave('./matcha-zh-playback.wav', Audio.Samples, Audio.SampleRate);
|
||||||
|
WriteLn('Saved to ./matcha-zh-playback.wav');
|
||||||
|
|
||||||
|
while not FinishedPlaying do
|
||||||
|
Pa_Sleep(100); {sleep for 0.1 second }
|
||||||
|
{TODO(fangjun): Use an event to indicate the play is finished}
|
||||||
|
|
||||||
|
DoneCriticalSection(CriticalSection);
|
||||||
|
|
||||||
|
FreeAndNil(Tts);
|
||||||
|
FreeAndNil(Resampler);
|
||||||
|
|
||||||
|
Status := Pa_CloseStream(stream);
|
||||||
|
if Status <> paNoError then
|
||||||
|
begin
|
||||||
|
WriteLn('Failed to close stream, ', Pa_GetErrorText(Status));
|
||||||
|
Exit;
|
||||||
|
end;
|
||||||
|
|
||||||
|
Status := Pa_Terminate;
|
||||||
|
if Status <> paNoError then
|
||||||
|
begin
|
||||||
|
WriteLn('Failed to deinitialize portaudio, ', Pa_GetErrorText(Status));
|
||||||
|
Exit;
|
||||||
|
end;
|
||||||
|
end.
|
||||||
|
|
||||||
55
pascal-api-examples/tts/matcha-en.pas
Normal file
55
pascal-api-examples/tts/matcha-en.pas
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
{ Copyright (c) 2025 Xiaomi Corporation }
|
||||||
|
program matcha_en;
|
||||||
|
{
|
||||||
|
This file shows how to use the text to speech API of sherpa-onnx
|
||||||
|
with MatchaTTS models.
|
||||||
|
|
||||||
|
It generates speech from text and saves it to a wave file.
|
||||||
|
|
||||||
|
If you want to play it while it is generating, please see
|
||||||
|
./matcha-zh-playback.pas
|
||||||
|
}
|
||||||
|
|
||||||
|
{$mode objfpc}
|
||||||
|
|
||||||
|
uses
|
||||||
|
SysUtils,
|
||||||
|
sherpa_onnx;
|
||||||
|
|
||||||
|
function GetOfflineTts: TSherpaOnnxOfflineTts;
|
||||||
|
var
|
||||||
|
Config: TSherpaOnnxOfflineTtsConfig;
|
||||||
|
begin
|
||||||
|
Config.Model.Matcha.AcousticModel := './matcha-icefall-en_US-ljspeech/model-steps-3.onnx';
|
||||||
|
Config.Model.Matcha.Vocoder := './hifigan_v2.onnx';
|
||||||
|
Config.Model.Matcha.Tokens := './matcha-icefall-en_US-ljspeech/tokens.txt';
|
||||||
|
Config.Model.Matcha.DataDir := './matcha-icefall-en_US-ljspeech/espeak-ng-data';
|
||||||
|
Config.Model.NumThreads := 1;
|
||||||
|
Config.Model.Debug := False;
|
||||||
|
Config.MaxNumSentences := 1;
|
||||||
|
|
||||||
|
Result := TSherpaOnnxOfflineTts.Create(Config);
|
||||||
|
end;
|
||||||
|
|
||||||
|
var
|
||||||
|
Tts: TSherpaOnnxOfflineTts;
|
||||||
|
Audio: TSherpaOnnxGeneratedAudio;
|
||||||
|
|
||||||
|
Text: AnsiString;
|
||||||
|
Speed: Single = 1.0; {Use a larger value to speak faster}
|
||||||
|
SpeakerId: Integer = 0;
|
||||||
|
|
||||||
|
begin
|
||||||
|
Tts := GetOfflineTts;
|
||||||
|
|
||||||
|
WriteLn('There are ', Tts.GetNumSpeakers, ' speakers');
|
||||||
|
|
||||||
|
Text := 'Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone.';
|
||||||
|
|
||||||
|
Audio := Tts.Generate(Text, SpeakerId, Speed);
|
||||||
|
SherpaOnnxWriteWave('./matcha-en.wav', Audio.Samples, Audio.SampleRate);
|
||||||
|
WriteLn('Saved to ./matcha-en.wav');
|
||||||
|
|
||||||
|
FreeAndNil(Tts);
|
||||||
|
end.
|
||||||
|
|
||||||
241
pascal-api-examples/tts/matcha-zh-playback.pas
Normal file
241
pascal-api-examples/tts/matcha-zh-playback.pas
Normal file
@@ -0,0 +1,241 @@
|
|||||||
|
{ Copyright (c) 2025 Xiaomi Corporation }
|
||||||
|
program matcha_zh_playback;
|
||||||
|
{
|
||||||
|
This file shows how to use the text to speech API of sherpa-onnx
|
||||||
|
with Piper models.
|
||||||
|
|
||||||
|
It generates speech from text and saves it to a wave file.
|
||||||
|
|
||||||
|
Note that it plays the audio back as it is still generating.
|
||||||
|
}
|
||||||
|
|
||||||
|
{$mode objfpc}
|
||||||
|
|
||||||
|
uses
|
||||||
|
{$ifdef unix}
|
||||||
|
cthreads,
|
||||||
|
{$endif}
|
||||||
|
SysUtils,
|
||||||
|
dos,
|
||||||
|
ctypes,
|
||||||
|
portaudio,
|
||||||
|
sherpa_onnx;
|
||||||
|
|
||||||
|
var
|
||||||
|
CriticalSection: TRTLCriticalSection;
|
||||||
|
|
||||||
|
Tts: TSherpaOnnxOfflineTts;
|
||||||
|
Audio: TSherpaOnnxGeneratedAudio;
|
||||||
|
Resampler: TSherpaOnnxLinearResampler;
|
||||||
|
|
||||||
|
Text: AnsiString;
|
||||||
|
Speed: Single = 1.0; {Use a larger value to speak faster}
|
||||||
|
SpeakerId: Integer = 0;
|
||||||
|
Buffer: TSherpaOnnxCircularBuffer;
|
||||||
|
FinishedGeneration: Boolean = False;
|
||||||
|
FinishedPlaying: Boolean = False;
|
||||||
|
|
||||||
|
Version: String;
|
||||||
|
EnvStr: String;
|
||||||
|
Status: Integer;
|
||||||
|
NumDevices: Integer;
|
||||||
|
DeviceIndex: Integer;
|
||||||
|
DeviceInfo: PPaDeviceInfo;
|
||||||
|
|
||||||
|
{ If you get EDivByZero: Division by zero error, please change the sample rate
|
||||||
|
to the one supported by your microphone.
|
||||||
|
}
|
||||||
|
DeviceSampleRate: Integer = 48000;
|
||||||
|
I: Integer;
|
||||||
|
Param: TPaStreamParameters;
|
||||||
|
Stream: PPaStream;
|
||||||
|
Wave: TSherpaOnnxWave;
|
||||||
|
|
||||||
|
function GenerateCallback(
|
||||||
|
Samples: pcfloat; N: cint32;
|
||||||
|
Arg: Pointer): cint; cdecl;
|
||||||
|
begin
|
||||||
|
EnterCriticalSection(CriticalSection);
|
||||||
|
try
|
||||||
|
if Resampler <> nil then
|
||||||
|
Buffer.Push(Resampler.Resample(Samples, N, False))
|
||||||
|
else
|
||||||
|
Buffer.Push(Samples, N);
|
||||||
|
finally
|
||||||
|
LeaveCriticalSection(CriticalSection);
|
||||||
|
end;
|
||||||
|
|
||||||
|
{ 1 means to continue generating; 0 means to stop generating. }
|
||||||
|
Result := 1;
|
||||||
|
end;
|
||||||
|
|
||||||
|
function PlayCallback(
|
||||||
|
input: Pointer; output: Pointer;
|
||||||
|
frameCount: culong;
|
||||||
|
timeInfo: PPaStreamCallbackTimeInfo;
|
||||||
|
statusFlags: TPaStreamCallbackFlags;
|
||||||
|
userData: Pointer ): cint; cdecl;
|
||||||
|
var
|
||||||
|
Samples: TSherpaOnnxSamplesArray;
|
||||||
|
I: Integer;
|
||||||
|
begin
|
||||||
|
EnterCriticalSection(CriticalSection);
|
||||||
|
try
|
||||||
|
if Buffer.Size >= frameCount then
|
||||||
|
begin
|
||||||
|
Samples := Buffer.Get(Buffer.Head, FrameCount);
|
||||||
|
Buffer.Pop(FrameCount);
|
||||||
|
end
|
||||||
|
else if Buffer.Size > 0 then
|
||||||
|
begin
|
||||||
|
Samples := Buffer.Get(Buffer.Head, Buffer.Size);
|
||||||
|
Buffer.Pop(Buffer.Size);
|
||||||
|
SetLength(Samples, frameCount);
|
||||||
|
end
|
||||||
|
else
|
||||||
|
SetLength(Samples, frameCount);
|
||||||
|
|
||||||
|
for I := 0 to frameCount - 1 do
|
||||||
|
pcfloat(output)[I] := Samples[I];
|
||||||
|
|
||||||
|
if (Buffer.Size > 0) or (not FinishedGeneration) then
|
||||||
|
Result := paContinue
|
||||||
|
else
|
||||||
|
begin
|
||||||
|
Result := paComplete;
|
||||||
|
FinishedPlaying := True;
|
||||||
|
end;
|
||||||
|
finally
|
||||||
|
LeaveCriticalSection(CriticalSection);
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
function GetOfflineTts: TSherpaOnnxOfflineTts;
|
||||||
|
var
|
||||||
|
Config: TSherpaOnnxOfflineTtsConfig;
|
||||||
|
begin
|
||||||
|
Config.Model.Matcha.AcousticModel := './matcha-icefall-zh-baker/model-steps-3.onnx';
|
||||||
|
Config.Model.Matcha.Vocoder := './hifigan_v2.onnx';
|
||||||
|
Config.Model.Matcha.Lexicon := './matcha-icefall-zh-baker/lexicon.txt';
|
||||||
|
Config.Model.Matcha.Tokens := './matcha-icefall-zh-baker/tokens.txt';
|
||||||
|
Config.Model.Matcha.DictDir := './matcha-icefall-zh-baker/dict';
|
||||||
|
Config.Model.NumThreads := 1;
|
||||||
|
Config.Model.Debug := False;
|
||||||
|
Config.RuleFsts := './matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst';
|
||||||
|
Config.MaxNumSentences := 1;
|
||||||
|
|
||||||
|
Result := TSherpaOnnxOfflineTts.Create(Config);
|
||||||
|
end;
|
||||||
|
|
||||||
|
begin
|
||||||
|
Tts := GetOfflineTts;
|
||||||
|
if Tts.GetSampleRate <> DeviceSampleRate then
|
||||||
|
Resampler := TSherpaOnnxLinearResampler.Create(Tts.GetSampleRate, DeviceSampleRate);
|
||||||
|
|
||||||
|
Version := String(Pa_GetVersionText);
|
||||||
|
WriteLn('Version is ', Version);
|
||||||
|
Status := Pa_Initialize;
|
||||||
|
if Status <> paNoError then
|
||||||
|
begin
|
||||||
|
WriteLn('Failed to initialize portaudio, ', Pa_GetErrorText(Status));
|
||||||
|
Exit;
|
||||||
|
end;
|
||||||
|
|
||||||
|
NumDevices := Pa_GetDeviceCount;
|
||||||
|
WriteLn('Num devices: ', NumDevices);
|
||||||
|
|
||||||
|
DeviceIndex := Pa_GetDefaultOutputDevice;
|
||||||
|
|
||||||
|
if DeviceIndex = paNoDevice then
|
||||||
|
begin
|
||||||
|
WriteLn('No default output device found');
|
||||||
|
Pa_Terminate;
|
||||||
|
Exit;
|
||||||
|
end;
|
||||||
|
|
||||||
|
EnvStr := GetEnv('SHERPA_ONNX_MIC_DEVICE');
|
||||||
|
if EnvStr <> '' then
|
||||||
|
begin
|
||||||
|
DeviceIndex := StrToIntDef(EnvStr, DeviceIndex);
|
||||||
|
WriteLn('Use device index from environment variable SHERPA_ONNX_MIC_DEVICE: ', EnvStr);
|
||||||
|
end;
|
||||||
|
|
||||||
|
for I := 0 to (NumDevices - 1) do
|
||||||
|
begin
|
||||||
|
DeviceInfo := Pa_GetDeviceInfo(I);
|
||||||
|
if I = DeviceIndex then
|
||||||
|
{ WriteLn(Format(' * %d %s', [I, DeviceInfo^.Name])) }
|
||||||
|
WriteLn(Format(' * %d %s', [I, AnsiString(DeviceInfo^.Name)]))
|
||||||
|
else
|
||||||
|
WriteLn(Format(' %d %s', [I, AnsiString(DeviceInfo^.Name)]));
|
||||||
|
end;
|
||||||
|
|
||||||
|
WriteLn('Use device ', DeviceIndex);
|
||||||
|
WriteLn(' Name ', Pa_GetDeviceInfo(DeviceIndex)^.Name);
|
||||||
|
WriteLn(' Max output channels ', Pa_GetDeviceInfo(DeviceIndex)^.MaxOutputChannels);
|
||||||
|
|
||||||
|
Initialize(Param);
|
||||||
|
Param.Device := DeviceIndex;
|
||||||
|
Param.ChannelCount := 1;
|
||||||
|
Param.SampleFormat := paFloat32;
|
||||||
|
param.SuggestedLatency := Pa_GetDeviceInfo(DeviceIndex)^.DefaultHighOutputLatency;
|
||||||
|
param.HostApiSpecificStreamInfo := nil;
|
||||||
|
|
||||||
|
Buffer := TSherpaOnnxCircularBuffer.Create(30 * DeviceSampleRate);
|
||||||
|
|
||||||
|
|
||||||
|
{ Note(fangjun): PortAudio invokes PlayCallback in a separate thread. }
|
||||||
|
Status := Pa_OpenStream(stream, nil, @Param, DeviceSampleRate, paFramesPerBufferUnspecified, paNoFlag,
|
||||||
|
PPaStreamCallback(@PlayCallback), nil);
|
||||||
|
|
||||||
|
if Status <> paNoError then
|
||||||
|
begin
|
||||||
|
WriteLn('Failed to open stream, ', Pa_GetErrorText(Status));
|
||||||
|
Pa_Terminate;
|
||||||
|
Exit;
|
||||||
|
end;
|
||||||
|
|
||||||
|
InitCriticalSection(CriticalSection);
|
||||||
|
|
||||||
|
Status := Pa_StartStream(stream);
|
||||||
|
if Status <> paNoError then
|
||||||
|
begin
|
||||||
|
WriteLn('Failed to start stream, ', Pa_GetErrorText(Status));
|
||||||
|
Pa_Terminate;
|
||||||
|
Exit;
|
||||||
|
end;
|
||||||
|
|
||||||
|
WriteLn('There are ', Tts.GetNumSpeakers, ' speakers');
|
||||||
|
|
||||||
|
Text := '某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。';
|
||||||
|
|
||||||
|
Audio := Tts.Generate(Text, SpeakerId, Speed,
|
||||||
|
PSherpaOnnxGeneratedAudioCallbackWithArg(@GenerateCallback), nil);
|
||||||
|
FinishedGeneration := True;
|
||||||
|
SherpaOnnxWriteWave('./matcha-zh-playback.wav', Audio.Samples, Audio.SampleRate);
|
||||||
|
WriteLn('Saved to ./matcha-zh-playback.wav');
|
||||||
|
|
||||||
|
while not FinishedPlaying do
|
||||||
|
Pa_Sleep(100); {sleep for 0.1 second }
|
||||||
|
{TODO(fangjun): Use an event to indicate the play is finished}
|
||||||
|
|
||||||
|
DoneCriticalSection(CriticalSection);
|
||||||
|
|
||||||
|
FreeAndNil(Tts);
|
||||||
|
FreeAndNil(Resampler);
|
||||||
|
|
||||||
|
Status := Pa_CloseStream(stream);
|
||||||
|
if Status <> paNoError then
|
||||||
|
begin
|
||||||
|
WriteLn('Failed to close stream, ', Pa_GetErrorText(Status));
|
||||||
|
Exit;
|
||||||
|
end;
|
||||||
|
|
||||||
|
Status := Pa_Terminate;
|
||||||
|
if Status <> paNoError then
|
||||||
|
begin
|
||||||
|
WriteLn('Failed to deinitialize portaudio, ', Pa_GetErrorText(Status));
|
||||||
|
Exit;
|
||||||
|
end;
|
||||||
|
end.
|
||||||
|
|
||||||
57
pascal-api-examples/tts/matcha-zh.pas
Normal file
57
pascal-api-examples/tts/matcha-zh.pas
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
{ Copyright (c) 2025 Xiaomi Corporation }
|
||||||
|
program matcha_zh;
|
||||||
|
{
|
||||||
|
This file shows how to use the text to speech API of sherpa-onnx
|
||||||
|
with MatchaTTS models.
|
||||||
|
|
||||||
|
It generates speech from text and saves it to a wave file.
|
||||||
|
|
||||||
|
If you want to play it while it is generating, please see
|
||||||
|
./matcha-zh-playback.pas
|
||||||
|
}
|
||||||
|
|
||||||
|
{$mode objfpc}
|
||||||
|
|
||||||
|
uses
|
||||||
|
SysUtils,
|
||||||
|
sherpa_onnx;
|
||||||
|
|
||||||
|
function GetOfflineTts: TSherpaOnnxOfflineTts;
|
||||||
|
var
|
||||||
|
Config: TSherpaOnnxOfflineTtsConfig;
|
||||||
|
begin
|
||||||
|
Config.Model.Matcha.AcousticModel := './matcha-icefall-zh-baker/model-steps-3.onnx';
|
||||||
|
Config.Model.Matcha.Vocoder := './hifigan_v2.onnx';
|
||||||
|
Config.Model.Matcha.Lexicon := './matcha-icefall-zh-baker/lexicon.txt';
|
||||||
|
Config.Model.Matcha.Tokens := './matcha-icefall-zh-baker/tokens.txt';
|
||||||
|
Config.Model.Matcha.DictDir := './matcha-icefall-zh-baker/dict';
|
||||||
|
Config.Model.NumThreads := 1;
|
||||||
|
Config.Model.Debug := False;
|
||||||
|
Config.RuleFsts := './matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst';
|
||||||
|
Config.MaxNumSentences := 1;
|
||||||
|
|
||||||
|
Result := TSherpaOnnxOfflineTts.Create(Config);
|
||||||
|
end;
|
||||||
|
|
||||||
|
var
|
||||||
|
Tts: TSherpaOnnxOfflineTts;
|
||||||
|
Audio: TSherpaOnnxGeneratedAudio;
|
||||||
|
|
||||||
|
Text: AnsiString;
|
||||||
|
Speed: Single = 1.0; {Use a larger value to speak faster}
|
||||||
|
SpeakerId: Integer = 0;
|
||||||
|
|
||||||
|
begin
|
||||||
|
Tts := GetOfflineTts;
|
||||||
|
|
||||||
|
WriteLn('There are ', Tts.GetNumSpeakers, ' speakers');
|
||||||
|
|
||||||
|
Text := '某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。';
|
||||||
|
|
||||||
|
Audio := Tts.Generate(Text, SpeakerId, Speed);
|
||||||
|
SherpaOnnxWriteWave('./matcha-zh.wav', Audio.Samples, Audio.SampleRate);
|
||||||
|
WriteLn('Saved to ./matcha-zh.wav');
|
||||||
|
|
||||||
|
FreeAndNil(Tts);
|
||||||
|
end.
|
||||||
|
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
{ Copyright (c) 2024 Xiaomi Corporation }
|
{ Copyright (c) 2024 Xiaomi Corporation }
|
||||||
program piper;
|
program piper_playback;
|
||||||
{
|
{
|
||||||
This file shows how to use the text to speech API of sherpa-onnx
|
This file shows how to use the text to speech API of sherpa-onnx
|
||||||
with Piper models.
|
with Piper models.
|
||||||
|
|||||||
53
pascal-api-examples/tts/run-matcha-en-playback.sh
Executable file
53
pascal-api-examples/tts/run-matcha-en-playback.sh
Executable file
@@ -0,0 +1,53 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||||
|
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
|
||||||
|
|
||||||
|
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
|
||||||
|
|
||||||
|
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
|
||||||
|
mkdir -p ../../build
|
||||||
|
pushd ../../build
|
||||||
|
cmake \
|
||||||
|
-DCMAKE_INSTALL_PREFIX=./install \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||||
|
-DBUILD_SHARED_LIBS=ON \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||||
|
..
|
||||||
|
|
||||||
|
cmake --build . --target install --config Release
|
||||||
|
popd
|
||||||
|
fi
|
||||||
|
|
||||||
|
# please visit
|
||||||
|
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
|
||||||
|
# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
|
||||||
|
# to download more models
|
||||||
|
if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
|
||||||
|
tar xf matcha-icefall-en_US-ljspeech.tar.bz2
|
||||||
|
rm matcha-icefall-en_US-ljspeech.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./hifigan_v2.onnx ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
|
||||||
|
fi
|
||||||
|
|
||||||
|
fpc \
|
||||||
|
-dSHERPA_ONNX_USE_SHARED_LIBS \
|
||||||
|
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
|
||||||
|
-Fl$SHERPA_ONNX_DIR/build/install/lib \
|
||||||
|
-Fl/usr/local/Cellar/portaudio/19.7.0/lib \
|
||||||
|
./matcha-en-playback.pas
|
||||||
|
|
||||||
|
# Please see ../portaudio-test/README.md
|
||||||
|
# for how to install portaudio on macOS
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
|
||||||
|
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
|
||||||
|
|
||||||
|
./matcha-en-playback
|
||||||
49
pascal-api-examples/tts/run-matcha-en.sh
Executable file
49
pascal-api-examples/tts/run-matcha-en.sh
Executable file
@@ -0,0 +1,49 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||||
|
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
|
||||||
|
|
||||||
|
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
|
||||||
|
|
||||||
|
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
|
||||||
|
mkdir -p ../../build
|
||||||
|
pushd ../../build
|
||||||
|
cmake \
|
||||||
|
-DCMAKE_INSTALL_PREFIX=./install \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||||
|
-DBUILD_SHARED_LIBS=ON \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||||
|
..
|
||||||
|
|
||||||
|
cmake --build . --target install --config Release
|
||||||
|
popd
|
||||||
|
fi
|
||||||
|
|
||||||
|
# please visit
|
||||||
|
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
|
||||||
|
# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
|
||||||
|
# to download more models
|
||||||
|
if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
|
||||||
|
tar xf matcha-icefall-en_US-ljspeech.tar.bz2
|
||||||
|
rm matcha-icefall-en_US-ljspeech.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./hifigan_v2.onnx ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
|
||||||
|
fi
|
||||||
|
|
||||||
|
fpc \
|
||||||
|
-dSHERPA_ONNX_USE_SHARED_LIBS \
|
||||||
|
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
|
||||||
|
-Fl$SHERPA_ONNX_DIR/build/install/lib \
|
||||||
|
./matcha-en.pas
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
|
||||||
|
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
|
||||||
|
|
||||||
|
./matcha-en
|
||||||
52
pascal-api-examples/tts/run-matcha-zh-playback.sh
Executable file
52
pascal-api-examples/tts/run-matcha-zh-playback.sh
Executable file
@@ -0,0 +1,52 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||||
|
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
|
||||||
|
|
||||||
|
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
|
||||||
|
|
||||||
|
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
|
||||||
|
mkdir -p ../../build
|
||||||
|
pushd ../../build
|
||||||
|
cmake \
|
||||||
|
-DCMAKE_INSTALL_PREFIX=./install \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||||
|
-DBUILD_SHARED_LIBS=ON \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||||
|
..
|
||||||
|
|
||||||
|
cmake --build . --target install --config Release
|
||||||
|
popd
|
||||||
|
fi
|
||||||
|
|
||||||
|
# please visit
|
||||||
|
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
|
||||||
|
# to download more models
|
||||||
|
if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
|
||||||
|
tar xvf matcha-icefall-zh-baker.tar.bz2
|
||||||
|
rm matcha-icefall-zh-baker.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./hifigan_v2.onnx ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
|
||||||
|
fi
|
||||||
|
|
||||||
|
fpc \
|
||||||
|
-dSHERPA_ONNX_USE_SHARED_LIBS \
|
||||||
|
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
|
||||||
|
-Fl$SHERPA_ONNX_DIR/build/install/lib \
|
||||||
|
-Fl/usr/local/Cellar/portaudio/19.7.0/lib \
|
||||||
|
./matcha-zh-playback.pas
|
||||||
|
|
||||||
|
# Please see ../portaudio-test/README.md
|
||||||
|
# for how to install portaudio on macOS
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
|
||||||
|
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
|
||||||
|
|
||||||
|
./matcha-zh-playback
|
||||||
48
pascal-api-examples/tts/run-matcha-zh.sh
Executable file
48
pascal-api-examples/tts/run-matcha-zh.sh
Executable file
@@ -0,0 +1,48 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||||
|
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
|
||||||
|
|
||||||
|
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
|
||||||
|
|
||||||
|
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
|
||||||
|
mkdir -p ../../build
|
||||||
|
pushd ../../build
|
||||||
|
cmake \
|
||||||
|
-DCMAKE_INSTALL_PREFIX=./install \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||||
|
-DBUILD_SHARED_LIBS=ON \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||||
|
..
|
||||||
|
|
||||||
|
cmake --build . --target install --config Release
|
||||||
|
popd
|
||||||
|
fi
|
||||||
|
|
||||||
|
# please visit
|
||||||
|
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
|
||||||
|
# to download more models
|
||||||
|
if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
|
||||||
|
tar xvf matcha-icefall-zh-baker.tar.bz2
|
||||||
|
rm matcha-icefall-zh-baker.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./hifigan_v2.onnx ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
|
||||||
|
fi
|
||||||
|
|
||||||
|
fpc \
|
||||||
|
-dSHERPA_ONNX_USE_SHARED_LIBS \
|
||||||
|
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
|
||||||
|
-Fl$SHERPA_ONNX_DIR/build/install/lib \
|
||||||
|
./matcha-zh.pas
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
|
||||||
|
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
|
||||||
|
|
||||||
|
./matcha-zh
|
||||||
@@ -62,11 +62,26 @@ type
|
|||||||
class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsVitsModelConfig);
|
class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsVitsModelConfig);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
TSherpaOnnxOfflineTtsMatchaModelConfig = record
|
||||||
|
AcousticModel: AnsiString;
|
||||||
|
Vocoder: AnsiString;
|
||||||
|
Lexicon: AnsiString;
|
||||||
|
Tokens: AnsiString;
|
||||||
|
DataDir: AnsiString;
|
||||||
|
NoiseScale: Single;
|
||||||
|
LengthScale: Single;
|
||||||
|
DictDir: AnsiString;
|
||||||
|
|
||||||
|
function ToString: AnsiString;
|
||||||
|
class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsMatchaModelConfig);
|
||||||
|
end;
|
||||||
|
|
||||||
TSherpaOnnxOfflineTtsModelConfig = record
|
TSherpaOnnxOfflineTtsModelConfig = record
|
||||||
Vits: TSherpaOnnxOfflineTtsVitsModelConfig;
|
Vits: TSherpaOnnxOfflineTtsVitsModelConfig;
|
||||||
NumThreads: Integer;
|
NumThreads: Integer;
|
||||||
Debug: Boolean;
|
Debug: Boolean;
|
||||||
Provider: AnsiString;
|
Provider: AnsiString;
|
||||||
|
Matcha: TSherpaOnnxOfflineTtsMatchaModelConfig;
|
||||||
|
|
||||||
function ToString: AnsiString;
|
function ToString: AnsiString;
|
||||||
class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsModelConfig);
|
class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsModelConfig);
|
||||||
@@ -713,11 +728,23 @@ type
|
|||||||
DictDir: PAnsiChar;
|
DictDir: PAnsiChar;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
SherpaOnnxOfflineTtsMatchaModelConfig = record
|
||||||
|
AcousticModel: PAnsiChar;
|
||||||
|
Vocoder: PAnsiChar;
|
||||||
|
Lexicon: PAnsiChar;
|
||||||
|
Tokens: PAnsiChar;
|
||||||
|
DataDir: PAnsiChar;
|
||||||
|
NoiseScale: cfloat;
|
||||||
|
LengthScale: cfloat;
|
||||||
|
DictDir: PAnsiChar;
|
||||||
|
end;
|
||||||
|
|
||||||
SherpaOnnxOfflineTtsModelConfig = record
|
SherpaOnnxOfflineTtsModelConfig = record
|
||||||
Vits: SherpaOnnxOfflineTtsVitsModelConfig;
|
Vits: SherpaOnnxOfflineTtsVitsModelConfig;
|
||||||
NumThreads: cint32;
|
NumThreads: cint32;
|
||||||
Debug: cint32;
|
Debug: cint32;
|
||||||
Provider: PAnsiChar;
|
Provider: PAnsiChar;
|
||||||
|
Matcha: SherpaOnnxOfflineTtsMatchaModelConfig;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
SherpaOnnxOfflineTtsConfig = record
|
SherpaOnnxOfflineTtsConfig = record
|
||||||
@@ -1853,15 +1880,40 @@ begin
|
|||||||
Dest.LengthScale := 1.0;
|
Dest.LengthScale := 1.0;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
function TSherpaOnnxOfflineTtsMatchaModelConfig.ToString: AnsiString;
|
||||||
|
begin
|
||||||
|
Result := Format('TSherpaOnnxOfflineTtsMatchaModelConfig(' +
|
||||||
|
'AcousticModel := %s, ' +
|
||||||
|
'Vocoder := %s, ' +
|
||||||
|
'Lexicon := %s, ' +
|
||||||
|
'Tokens := %s, ' +
|
||||||
|
'DataDir := %s, ' +
|
||||||
|
'NoiseScale := %.2f, ' +
|
||||||
|
'LengthScale := %.2f, ' +
|
||||||
|
'DictDir := %s' +
|
||||||
|
')',
|
||||||
|
[Self.AcousticModel, Self.Vocoder, Self.Lexicon, Self.Tokens,
|
||||||
|
Self.DataDir, Self.NoiseScale, Self.LengthScale, Self.DictDir
|
||||||
|
]);
|
||||||
|
end;
|
||||||
|
|
||||||
|
class operator TSherpaOnnxOfflineTtsMatchaModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsMatchaModelConfig);
|
||||||
|
begin
|
||||||
|
Dest.NoiseScale := 0.667;
|
||||||
|
Dest.LengthScale := 1.0;
|
||||||
|
end;
|
||||||
|
|
||||||
function TSherpaOnnxOfflineTtsModelConfig.ToString: AnsiString;
|
function TSherpaOnnxOfflineTtsModelConfig.ToString: AnsiString;
|
||||||
begin
|
begin
|
||||||
Result := Format('TSherpaOnnxOfflineTtsModelConfig(' +
|
Result := Format('TSherpaOnnxOfflineTtsModelConfig(' +
|
||||||
'Vits := %s, ' +
|
'Vits := %s, ' +
|
||||||
'NumThreads := %d, ' +
|
'NumThreads := %d, ' +
|
||||||
'Debug := %s, ' +
|
'Debug := %s, ' +
|
||||||
'Provider := %s' +
|
'Provider := %s, ' +
|
||||||
|
'Matcha := %s' +
|
||||||
')',
|
')',
|
||||||
[Self.Vits.ToString, Self.NumThreads, Self.Debug.ToString, Self.Provider
|
[Self.Vits.ToString, Self.NumThreads, Self.Debug.ToString, Self.Provider,
|
||||||
|
Self.Matcha.ToString
|
||||||
]);
|
]);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
@@ -1905,6 +1957,15 @@ begin
|
|||||||
C.Model.Vits.LengthScale := Config.Model.Vits.LengthScale;
|
C.Model.Vits.LengthScale := Config.Model.Vits.LengthScale;
|
||||||
C.Model.Vits.DictDir := PAnsiChar(Config.Model.Vits.DictDir);
|
C.Model.Vits.DictDir := PAnsiChar(Config.Model.Vits.DictDir);
|
||||||
|
|
||||||
|
C.Model.Matcha.AcousticModel := PAnsiChar(Config.Model.Matcha.AcousticModel);
|
||||||
|
C.Model.Matcha.Vocoder := PAnsiChar(Config.Model.Matcha.Vocoder);
|
||||||
|
C.Model.Matcha.Lexicon := PAnsiChar(Config.Model.Matcha.Lexicon);
|
||||||
|
C.Model.Matcha.Tokens := PAnsiChar(Config.Model.Matcha.Tokens);
|
||||||
|
C.Model.Matcha.DataDir := PAnsiChar(Config.Model.Matcha.DataDir);
|
||||||
|
C.Model.Matcha.NoiseScale := Config.Model.Matcha.NoiseScale;
|
||||||
|
C.Model.Matcha.LengthScale := Config.Model.Matcha.LengthScale;
|
||||||
|
C.Model.Matcha.DictDir := PAnsiChar(Config.Model.Matcha.DictDir);
|
||||||
|
|
||||||
C.Model.NumThreads := Config.Model.NumThreads;
|
C.Model.NumThreads := Config.Model.NumThreads;
|
||||||
C.Model.Provider := PAnsiChar(Config.Model.Provider);
|
C.Model.Provider := PAnsiChar(Config.Model.Provider);
|
||||||
C.Model.Debug := Ord(Config.Model.Debug);
|
C.Model.Debug := Ord(Config.Model.Debug);
|
||||||
|
|||||||
Reference in New Issue
Block a user