Add TTS for node-addon-api (#871)

This commit is contained in:
Fangjun Kuang
2024-05-13 19:24:09 +08:00
committed by GitHub
parent 740d7ae9d6
commit 031134b4d4
21 changed files with 691 additions and 10 deletions

View File

@@ -143,3 +143,43 @@ node ./test_asr_non_streaming_paraformer.js
npm install naudiodon2
node ./test_vad_asr_non_streaming_paraformer_microphone.js
```
## Text-to-speech with piper VITS models (TTS)
```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_GB-cori-medium.tar.bz2
tar xvf vits-piper-en_GB-cori-medium.tar.bz2
rm vits-piper-en_GB-cori-medium.tar.bz2
node ./test_tts_non_streaming_vits_piper_en.js
```
## Text-to-speech with piper Coqui-ai/TTS models (TTS)
```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2
tar xvf vits-coqui-de-css10.tar.bz2
rm vits-coqui-de-css10.tar.bz2
node ./test_tts_non_streaming_vits_coqui_de.js
```
## Text-to-speech with vits Chinese models (1/2)
```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2
tar xvf sherpa-onnx-vits-zh-ll.tar.bz2
rm sherpa-onnx-vits-zh-ll.tar.bz2
node ./test_tts_non_streaming_vits_zh_ll.js
```
## Text-to-speech with vits Chinese models (2/2)
```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
tar xvf vits-icefall-zh-aishell3.tar.bz2
rm vits-icefall-zh-aishell3.tar.bz2
node ./test_tts_non_streaming_vits_zh_aishell3.js
```

View File

@@ -0,0 +1,43 @@
// Copyright (c) 2024 Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');
const performance = require('perf_hooks').performance;
// please download model files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
function createOfflineTts() {
const config = {
model: {
vits: {
model: './vits-coqui-de-css10/model.onnx',
tokens: './vits-coqui-de-css10/tokens.txt',
},
debug: true,
numThreads: 1,
provider: 'cpu',
},
maxNumStences: 1,
};
return new sherpa_onnx.OfflineTts(config);
}
const tts = createOfflineTts();
const text = 'Alles hat ein Ende, nur die Wurst hat zwei.'
let start = performance.now();
const audio = tts.generate({text: text, sid: 0, speed: 1.0});
let stop = performance.now();
const elapsed_seconds = (stop - start) / 1000;
const duration = audio.samples.length / audio.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'secodns')
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
console.log(
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
real_time_factor.toFixed(3))
const filename = 'test-coqui-de.wav';
sherpa_onnx.writeWave(
filename, {samples: audio.samples, sampleRate: audio.sampleRate});
console.log(`Saved to ${filename}`);

View File

@@ -0,0 +1,46 @@
// Copyright (c) 2024 Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');
const performance = require('perf_hooks').performance;
// please download model files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
function createOfflineTts() {
const config = {
model: {
vits: {
model: './vits-piper-en_GB-cori-medium/en_GB-cori-medium.onnx',
tokens: './vits-piper-en_GB-cori-medium/tokens.txt',
dataDir: './vits-piper-en_GB-cori-medium/espeak-ng-data',
},
debug: true,
numThreads: 1,
provider: 'cpu',
},
maxNumStences: 1,
};
return new sherpa_onnx.OfflineTts(config);
}
const tts = createOfflineTts();
const text =
'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.'
let start = performance.now();
const audio = tts.generate({text: text, sid: 0, speed: 1.0});
let stop = performance.now();
const elapsed_seconds = (stop - start) / 1000;
const duration = audio.samples.length / audio.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'secodns')
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
console.log(
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
real_time_factor.toFixed(3))
const filename = 'test-piper-en.wav';
sherpa_onnx.writeWave(
filename, {samples: audio.samples, sampleRate: audio.sampleRate});
console.log(`Saved to ${filename}`);

View File

@@ -0,0 +1,48 @@
// Copyright (c) 2024 Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');
const performance = require('perf_hooks').performance;
// please download model files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
function createOfflineTts() {
const config = {
model: {
vits: {
model: './vits-icefall-zh-aishell3/model.onnx',
tokens: './vits-icefall-zh-aishell3/tokens.txt',
lexicon: './vits-icefall-zh-aishell3/lexicon.txt',
},
debug: true,
numThreads: 1,
provider: 'cpu',
},
maxNumStences: 1,
ruleFsts:
'./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/number.fst,./vits-icefall-zh-aishell3/new_heteronym.fst',
ruleFars: './vits-icefall-zh-aishell3/rule.far',
};
return new sherpa_onnx.OfflineTts(config);
}
const tts = createOfflineTts();
const text =
'他在长沙出生长白山长大去过长江现在他是一个银行的行长主管行政工作。有困难请拨110或者13020240513。今天是2024年5月13号, 他上个月的工资是12345块钱。'
let start = performance.now();
const audio = tts.generate({text: text, sid: 88, speed: 1.0});
let stop = performance.now();
const elapsed_seconds = (stop - start) / 1000;
const duration = audio.samples.length / audio.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'secodns')
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
console.log(
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
real_time_factor.toFixed(3))
const filename = 'test-zh-aishell3.wav';
sherpa_onnx.writeWave(
filename, {samples: audio.samples, sampleRate: audio.sampleRate});
console.log(`Saved to ${filename}`);

View File

@@ -0,0 +1,48 @@
// Copyright (c) 2024 Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');
const performance = require('perf_hooks').performance;
// please download model files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
function createOfflineTts() {
const config = {
model: {
vits: {
model: './sherpa-onnx-vits-zh-ll/model.onnx',
tokens: './sherpa-onnx-vits-zh-ll/tokens.txt',
lexicon: './sherpa-onnx-vits-zh-ll/lexicon.txt',
dictDir: './sherpa-onnx-vits-zh-ll/dict',
},
debug: true,
numThreads: 1,
provider: 'cpu',
},
maxNumStences: 1,
ruleFsts:
'./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/number.fst',
};
return new sherpa_onnx.OfflineTts(config);
}
const tts = createOfflineTts();
const text =
'当夜幕降临星光点点伴随着微风拂面我在静谧中感受着时光的流转思念如涟漪荡漾梦境如画卷展开我与自然融为一体沉静在这片宁静的美丽之中感受着生命的奇迹与温柔。2024年5月13号拨打110或者18920240513。123456块钱。'
let start = performance.now();
const audio = tts.generate({text: text, sid: 2, speed: 1.0});
let stop = performance.now();
const elapsed_seconds = (stop - start) / 1000;
const duration = audio.samples.length / audio.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'secodns')
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
console.log(
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
real_time_factor.toFixed(3))
const filename = 'test-zh-ll.wav';
sherpa_onnx.writeWave(
filename, {samples: audio.samples, sampleRate: audio.sampleRate});
console.log(`Saved to ${filename}`);

View File

@@ -99,7 +99,7 @@ ai.on('data', data => {
.split(' ')[0]}.wav`;
sherpa_onnx.writeWave(
filename,
{samples: segment.samples, sampleRate: vad.config.sampleRate})
{samples: segment.samples, sampleRate: vad.config.sampleRate});
index += 1;
}

View File

@@ -97,7 +97,7 @@ ai.on('data', data => {
.split(' ')[0]}.wav`;
sherpa_onnx.writeWave(
filename,
{samples: segment.samples, sampleRate: vad.config.sampleRate})
{samples: segment.samples, sampleRate: vad.config.sampleRate});
index += 1;
}

View File

@@ -102,7 +102,7 @@ ai.on('data', data => {
.split(' ')[0]}.wav`;
sherpa_onnx.writeWave(
filename,
{samples: segment.samples, sampleRate: vad.config.sampleRate})
{samples: segment.samples, sampleRate: vad.config.sampleRate});
index += 1;
}

View File

@@ -98,7 +98,7 @@ ai.on('data', data => {
.split(' ')[0]}.wav`;
sherpa_onnx.writeWave(
filename,
{samples: segment.samples, sampleRate: vad.config.sampleRate})
{samples: segment.samples, sampleRate: vad.config.sampleRate});
index += 1;
}

View File

@@ -71,7 +71,7 @@ ai.on('data', data => {
.split(' ')[0]}.wav`;
sherpa_onnx.writeWave(
filename,
{samples: segment.samples, sampleRate: vad.config.sampleRate})
{samples: segment.samples, sampleRate: vad.config.sampleRate});
const duration = segment.samples.length / vad.config.sampleRate;
console.log(`${index} End of speech. Duration: ${duration} seconds`);
console.log(`Saved to ${filename}`);