Add JavaScript API for Moonshine models (#1480)
This commit is contained in:
20
.github/scripts/test-nodejs-addon-npm.sh
vendored
20
.github/scripts/test-nodejs-addon-npm.sh
vendored
@@ -10,6 +10,19 @@ arch=$(node -p "require('os').arch()")
|
||||
platform=$(node -p "require('os').platform()")
|
||||
node_version=$(node -p "process.versions.node.split('.')[0]")
|
||||
|
||||
echo "----------non-streaming asr moonshine + vad----------"
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
|
||||
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
|
||||
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
|
||||
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
||||
|
||||
node ./test_vad_with_non_streaming_asr_moonshine.js
|
||||
rm -rf sherpa-onnx-*
|
||||
rm *.wav
|
||||
rm *.onnx
|
||||
|
||||
echo "----------non-streaming speaker diarization----------"
|
||||
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||
@@ -24,7 +37,7 @@ node ./test_offline_speaker_diarization.js
|
||||
|
||||
rm -rfv *.onnx *.wav sherpa-onnx-pyannote-*
|
||||
|
||||
echo "----------non-streaming asr + vad----------"
|
||||
echo "----------non-streaming asr whisper + vad----------"
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||
rm sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||
@@ -218,6 +231,11 @@ rm sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||
node ./test_asr_non_streaming_whisper.js
|
||||
rm -rf sherpa-onnx-whisper-tiny.en
|
||||
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
|
||||
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
|
||||
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
|
||||
|
||||
node ./test_asr_non_streaming_moonshine.js
|
||||
rm -rf sherpa-onnx-*
|
||||
|
||||
ls -lh
|
||||
|
||||
24
.github/scripts/test-nodejs-npm.sh
vendored
24
.github/scripts/test-nodejs-npm.sh
vendored
@@ -21,6 +21,23 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segm
|
||||
node ./test-offline-speaker-diarization.js
|
||||
rm -rfv *.wav *.onnx sherpa-onnx-pyannote-*
|
||||
|
||||
echo '-----vad+moonshine----------'
|
||||
|
||||
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||
rm sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
|
||||
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
|
||||
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
|
||||
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
||||
node ./test-vad-with-non-streaming-asr-whisper.js
|
||||
rm Obama.wav
|
||||
rm silero_vad.onnx
|
||||
rm -rf sherpa-onnx-moonshine-*
|
||||
|
||||
echo '-----vad+whisper----------'
|
||||
|
||||
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||
@@ -90,6 +107,13 @@ rm sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||
node ./test-offline-whisper.js
|
||||
rm -rf sherpa-onnx-whisper-tiny.en
|
||||
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
|
||||
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
|
||||
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
|
||||
|
||||
node ./test-offline-moonshine.js
|
||||
rm -rf sherpa-onnx-moonshine-*
|
||||
|
||||
# online asr
|
||||
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
|
||||
tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
|
||||
|
||||
@@ -112,6 +112,8 @@ The following tables list the examples in this folder.
|
||||
|[./test_asr_non_streaming_transducer.js](./test_asr_non_streaming_transducer.js)|Non-streaming speech recognition from a file with a Zipformer transducer model|
|
||||
|[./test_asr_non_streaming_whisper.js](./test_asr_non_streaming_whisper.js)| Non-streaming speech recognition from a file using [Whisper](https://github.com/openai/whisper)|
|
||||
|[./test_vad_with_non_streaming_asr_whisper.js](./test_vad_with_non_streaming_asr_whisper.js)| Non-streaming speech recognition from a file using [Whisper](https://github.com/openai/whisper) + [Silero VAD](https://github.com/snakers4/silero-vad)|
|
||||
|[./test_asr_non_streaming_moonshine.js](./test_asr_non_streaming_moonshine.js)|Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine)|
|
||||
|[./test_vad_with_non_streaming_asr_moonshine.js](./test_vad_with_non_streaming_asr_moonshine.js)| Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine) + [Silero VAD](https://github.com/snakers4/silero-vad)|
|
||||
|[./test_asr_non_streaming_nemo_ctc.js](./test_asr_non_streaming_nemo_ctc.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search|
|
||||
|[./test_asr_non_streaming_paraformer.js](./test_asr_non_streaming_paraformer.js)|Non-streaming speech recognition from a file using [Paraformer](https://github.com/alibaba-damo-academy/FunASR)|
|
||||
|[./test_asr_non_streaming_sense_voice.js](./test_asr_non_streaming_sense_voice.js)|Non-streaming speech recognition from a file using [SenseVoice](https://github.com/FunAudioLLM/SenseVoice)|
|
||||
@@ -122,6 +124,7 @@ The following tables list the examples in this folder.
|
||||
|---|---|
|
||||
|[./test_vad_asr_non_streaming_transducer_microphone.js](./test_vad_asr_non_streaming_transducer_microphone.js)|VAD + Non-streaming speech recognition from a microphone using a Zipformer transducer model|
|
||||
|[./test_vad_asr_non_streaming_whisper_microphone.js](./test_vad_asr_non_streaming_whisper_microphone.js)|VAD + Non-streaming speech recognition from a microphone using [Whisper](https://github.com/openai/whisper)|
|
||||
|[./test_vad_asr_non_streaming_moonshine_microphone.js](./test_vad_asr_non_streaming_moonshine_microphone.js)|VAD + Non-streaming speech recognition from a microphone using [Moonshine](https://github.com/usefulsensors/moonshine)|
|
||||
|[./test_vad_asr_non_streaming_nemo_ctc_microphone.js](./test_vad_asr_non_streaming_nemo_ctc_microphone.js)|VAD + Non-streaming speech recognition from a microphone using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search|
|
||||
|[./test_vad_asr_non_streaming_paraformer_microphone.js](./test_vad_asr_non_streaming_paraformer_microphone.js)|VAD + Non-streaming speech recognition from a microphone using [Paraformer](https://github.com/alibaba-damo-academy/FunASR)|
|
||||
|[./test_vad_asr_non_streaming_sense_voice_microphone.js](./test_vad_asr_non_streaming_sense_voice_microphone.js)|VAD + Non-streaming speech recognition from a microphone using [SenseVoice](https://github.com/FunAudioLLM/SenseVoice)|
|
||||
@@ -260,6 +263,33 @@ npm install naudiodon2
|
||||
node ./test_vad_asr_non_streaming_whisper_microphone.js
|
||||
```
|
||||
|
||||
### Non-streaming speech recognition with Moonshine
|
||||
|
||||
```bash
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
|
||||
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
|
||||
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
|
||||
|
||||
node ./test_asr_non_streaming_moonshine.js
|
||||
|
||||
# To run VAD + non-streaming ASR with Moonshine using a microphone
|
||||
npm install naudiodon2
|
||||
node ./test_vad_asr_non_streaming_moonshine_microphone.js
|
||||
```
|
||||
|
||||
### Non-streaming speech recognition with Moonshine + VAD
|
||||
|
||||
```bash
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
|
||||
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
|
||||
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
|
||||
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
||||
|
||||
node ./test_vad_with_non_streaming_asr_moonshine.js
|
||||
```
|
||||
|
||||
### Non-streaming speech recognition with Whisper + VAD
|
||||
|
||||
```bash
|
||||
|
||||
50
nodejs-addon-examples/test_asr_non_streaming_moonshine.js
Normal file
50
nodejs-addon-examples/test_asr_non_streaming_moonshine.js
Normal file
@@ -0,0 +1,50 @@
|
||||
// Copyright (c) 2024 Xiaomi Corporation
|
||||
const sherpa_onnx = require('sherpa-onnx-node');
|
||||
|
||||
// Please download test files from
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||
const config = {
|
||||
'featConfig': {
|
||||
'sampleRate': 16000,
|
||||
'featureDim': 80,
|
||||
},
|
||||
'modelConfig': {
|
||||
'moonshine': {
|
||||
'preprocessor': './sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx',
|
||||
'encoder': './sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx',
|
||||
'uncachedDecoder':
|
||||
'./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx',
|
||||
'cachedDecoder':
|
||||
'./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx',
|
||||
},
|
||||
'tokens': './sherpa-onnx-moonshine-tiny-en-int8/tokens.txt',
|
||||
'numThreads': 2,
|
||||
'provider': 'cpu',
|
||||
'debug': 1,
|
||||
}
|
||||
};
|
||||
|
||||
const waveFilename = './sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav';
|
||||
|
||||
const recognizer = new sherpa_onnx.OfflineRecognizer(config);
|
||||
console.log('Started')
|
||||
let start = Date.now();
|
||||
const stream = recognizer.createStream();
|
||||
const wave = sherpa_onnx.readWave(waveFilename);
|
||||
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
|
||||
|
||||
recognizer.decode(stream);
|
||||
result = recognizer.getResult(stream)
|
||||
let stop = Date.now();
|
||||
console.log('Done')
|
||||
|
||||
const elapsed_seconds = (stop - start) / 1000;
|
||||
const duration = wave.samples.length / wave.sampleRate;
|
||||
const real_time_factor = elapsed_seconds / duration;
|
||||
console.log('Wave duration', duration.toFixed(3), 'secodns')
|
||||
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
|
||||
console.log(
|
||||
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
|
||||
real_time_factor.toFixed(3))
|
||||
console.log(waveFilename)
|
||||
console.log('result\n', result)
|
||||
@@ -0,0 +1,113 @@
|
||||
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
//
|
||||
const portAudio = require('naudiodon2');
|
||||
// console.log(portAudio.getDevices());
|
||||
|
||||
const sherpa_onnx = require('sherpa-onnx-node');
|
||||
|
||||
function createRecognizer() {
|
||||
// Please download test files from
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||
const config = {
|
||||
'featConfig': {
|
||||
'sampleRate': 16000,
|
||||
'featureDim': 80,
|
||||
},
|
||||
'modelConfig': {
|
||||
'moonshine': {
|
||||
'preprocessor': './sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx',
|
||||
'encoder': './sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx',
|
||||
'uncachedDecoder':
|
||||
'./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx',
|
||||
'cachedDecoder':
|
||||
'./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx',
|
||||
},
|
||||
'tokens': './sherpa-onnx-moonshine-tiny-en-int8/tokens.txt',
|
||||
'numThreads': 2,
|
||||
'provider': 'cpu',
|
||||
'debug': 1,
|
||||
}
|
||||
};
|
||||
|
||||
return new sherpa_onnx.OfflineRecognizer(config);
|
||||
}
|
||||
|
||||
function createVad() {
|
||||
// please download silero_vad.onnx from
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
||||
const config = {
|
||||
sileroVad: {
|
||||
model: './silero_vad.onnx',
|
||||
threshold: 0.5,
|
||||
minSpeechDuration: 0.25,
|
||||
minSilenceDuration: 0.5,
|
||||
windowSize: 512,
|
||||
},
|
||||
sampleRate: 16000,
|
||||
debug: true,
|
||||
numThreads: 1,
|
||||
};
|
||||
|
||||
const bufferSizeInSeconds = 60;
|
||||
|
||||
return new sherpa_onnx.Vad(config, bufferSizeInSeconds);
|
||||
}
|
||||
|
||||
const recognizer = createRecognizer();
|
||||
const vad = createVad();
|
||||
|
||||
const bufferSizeInSeconds = 30;
|
||||
const buffer =
|
||||
new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);
|
||||
|
||||
const ai = new portAudio.AudioIO({
|
||||
inOptions: {
|
||||
channelCount: 1,
|
||||
closeOnError: true, // Close the stream if an audio error is detected, if
|
||||
// set false then just log the error
|
||||
deviceId: -1, // Use -1 or omit the deviceId to select the default device
|
||||
sampleFormat: portAudio.SampleFormatFloat32,
|
||||
sampleRate: vad.config.sampleRate
|
||||
}
|
||||
});
|
||||
|
||||
let printed = false;
|
||||
let index = 0;
|
||||
ai.on('data', data => {
|
||||
const windowSize = vad.config.sileroVad.windowSize;
|
||||
buffer.push(new Float32Array(data.buffer));
|
||||
while (buffer.size() > windowSize) {
|
||||
const samples = buffer.get(buffer.head(), windowSize);
|
||||
buffer.pop(windowSize);
|
||||
vad.acceptWaveform(samples);
|
||||
}
|
||||
|
||||
while (!vad.isEmpty()) {
|
||||
const segment = vad.front();
|
||||
vad.pop();
|
||||
const stream = recognizer.createStream();
|
||||
stream.acceptWaveform({
|
||||
samples: segment.samples,
|
||||
sampleRate: recognizer.config.featConfig.sampleRate
|
||||
});
|
||||
recognizer.decode(stream);
|
||||
const r = recognizer.getResult(stream);
|
||||
if (r.text.length > 0) {
|
||||
const text = r.text.toLowerCase().trim();
|
||||
console.log(`${index}: ${text}`);
|
||||
|
||||
const filename = `${index}-${text}-${
|
||||
new Date()
|
||||
.toLocaleTimeString('en-US', {hour12: false})
|
||||
.split(' ')[0]}.wav`;
|
||||
sherpa_onnx.writeWave(
|
||||
filename,
|
||||
{samples: segment.samples, sampleRate: vad.config.sampleRate});
|
||||
|
||||
index += 1;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
ai.start();
|
||||
console.log('Started! Please speak')
|
||||
@@ -0,0 +1,132 @@
|
||||
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
|
||||
const sherpa_onnx = require('sherpa-onnx-node');
|
||||
|
||||
function createRecognizer() {
|
||||
// Please download test files from
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||
const config = {
|
||||
'featConfig': {
|
||||
'sampleRate': 16000,
|
||||
'featureDim': 80,
|
||||
},
|
||||
'modelConfig': {
|
||||
'moonshine': {
|
||||
'preprocessor': './sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx',
|
||||
'encoder': './sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx',
|
||||
'uncachedDecoder':
|
||||
'./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx',
|
||||
'cachedDecoder':
|
||||
'./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx',
|
||||
},
|
||||
'tokens': './sherpa-onnx-moonshine-tiny-en-int8/tokens.txt',
|
||||
'numThreads': 2,
|
||||
'provider': 'cpu',
|
||||
'debug': 1,
|
||||
}
|
||||
};
|
||||
|
||||
return new sherpa_onnx.OfflineRecognizer(config);
|
||||
}
|
||||
|
||||
function createVad() {
|
||||
// please download silero_vad.onnx from
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
||||
const config = {
|
||||
sileroVad: {
|
||||
model: './silero_vad.onnx',
|
||||
threshold: 0.5,
|
||||
minSpeechDuration: 0.25,
|
||||
minSilenceDuration: 0.5,
|
||||
maxSpeechDuration: 5,
|
||||
windowSize: 512,
|
||||
},
|
||||
sampleRate: 16000,
|
||||
debug: true,
|
||||
numThreads: 1,
|
||||
};
|
||||
|
||||
const bufferSizeInSeconds = 60;
|
||||
|
||||
return new sherpa_onnx.Vad(config, bufferSizeInSeconds);
|
||||
}
|
||||
|
||||
const recognizer = createRecognizer();
|
||||
const vad = createVad();
|
||||
|
||||
// please download ./Obama.wav from
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||
const waveFilename = './Obama.wav';
|
||||
const wave = sherpa_onnx.readWave(waveFilename);
|
||||
|
||||
if (wave.sampleRate != recognizer.config.featConfig.sampleRate) {
|
||||
throw new Error(
|
||||
'Expected sample rate: ${recognizer.config.featConfig.sampleRate}. Given: ${wave.sampleRate}');
|
||||
}
|
||||
|
||||
console.log('Started')
|
||||
let start = Date.now();
|
||||
|
||||
const windowSize = vad.config.sileroVad.windowSize;
|
||||
for (let i = 0; i < wave.samples.length; i += windowSize) {
|
||||
const thisWindow = wave.samples.subarray(i, i + windowSize);
|
||||
vad.acceptWaveform(thisWindow);
|
||||
|
||||
while (!vad.isEmpty()) {
|
||||
const segment = vad.front();
|
||||
vad.pop();
|
||||
|
||||
let start_time = segment.start / wave.sampleRate;
|
||||
let end_time = start_time + segment.samples.length / wave.sampleRate;
|
||||
|
||||
start_time = start_time.toFixed(2);
|
||||
end_time = end_time.toFixed(2);
|
||||
|
||||
const stream = recognizer.createStream();
|
||||
stream.acceptWaveform(
|
||||
{samples: segment.samples, sampleRate: wave.sampleRate});
|
||||
|
||||
recognizer.decode(stream);
|
||||
const r = recognizer.getResult(stream);
|
||||
if (r.text.length > 0) {
|
||||
const text = r.text.toLowerCase().trim();
|
||||
console.log(`${start_time} -- ${end_time}: ${text}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vad.flush();
|
||||
|
||||
while (!vad.isEmpty()) {
|
||||
const segment = vad.front();
|
||||
vad.pop();
|
||||
|
||||
let start_time = segment.start / wave.sampleRate;
|
||||
let end_time = start_time + segment.samples.length / wave.sampleRate;
|
||||
|
||||
start_time = start_time.toFixed(2);
|
||||
end_time = end_time.toFixed(2);
|
||||
|
||||
const stream = recognizer.createStream();
|
||||
stream.acceptWaveform(
|
||||
{samples: segment.samples, sampleRate: wave.sampleRate});
|
||||
|
||||
recognizer.decode(stream);
|
||||
const r = recognizer.getResult(stream);
|
||||
if (r.text.length > 0) {
|
||||
const text = r.text.toLowerCase().trim();
|
||||
console.log(`${start_time} -- ${end_time}: ${text}`);
|
||||
}
|
||||
}
|
||||
|
||||
let stop = Date.now();
|
||||
console.log('Done')
|
||||
|
||||
const elapsed_seconds = (stop - start) / 1000;
|
||||
const duration = wave.samples.length / wave.sampleRate;
|
||||
const real_time_factor = elapsed_seconds / duration;
|
||||
console.log('Wave duration', duration.toFixed(3), 'seconds')
|
||||
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds')
|
||||
console.log(
|
||||
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
|
||||
real_time_factor.toFixed(3))
|
||||
@@ -133,7 +133,25 @@ tar xvf sherpa-onnx-zipformer-en-2023-06-26.tar.bz2
|
||||
node ./test-offline-transducer.js
|
||||
```
|
||||
|
||||
## ./test-vad-with-non-streaming-asr-whisper.js
|
||||
|
||||
[./test-vad-with-non-streaming-asr-whisper.js](./test-vad-with-non-streaming-asr-whisper.js)
|
||||
shows how to use VAD + whisper to decode a very long file.
|
||||
|
||||
You can use the following command to run it:
|
||||
|
||||
```bash
|
||||
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
||||
|
||||
node ./test-vad-with-non-streaming-asr-whisper.js
|
||||
```
|
||||
|
||||
## ./test-offline-whisper.js
|
||||
|
||||
[./test-offline-whisper.js](./test-offline-whisper.js) demonstrates
|
||||
how to decode a file with a Whisper model. In the code we use
|
||||
[sherpa-onnx-whisper-tiny.en](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html).
|
||||
@@ -146,7 +164,40 @@ tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||
node ./test-offline-whisper.js
|
||||
```
|
||||
|
||||
## ./test-offline-moonshine.js
|
||||
|
||||
[./test-offline-moonshine.js](./test-offline-moonshine.js) demonstrates
|
||||
how to decode a file with a Moonshine model. In the code we use
|
||||
[sherpa-onnx-moonshine-tiny-en-int8](https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2).
|
||||
|
||||
You can use the following command to run it:
|
||||
|
||||
```bash
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
|
||||
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
|
||||
|
||||
node ./test-offline-moonshine.js
|
||||
```
|
||||
|
||||
## ./test-vad-with-non-streaming-asr-moonshine.js
|
||||
|
||||
[./test-vad-with-non-streaming-asr-moonshine.js](./test-vad-with-non-streaming-asr-moonshine.js)
|
||||
shows how to use VAD + whisper to decode a very long file.
|
||||
|
||||
You can use the following command to run it:
|
||||
|
||||
```bash
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
|
||||
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
|
||||
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
||||
|
||||
node ./test-vad-with-non-streaming-asr-moonshine.js
|
||||
```
|
||||
|
||||
## ./test-online-paraformer-microphone.js
|
||||
|
||||
[./test-online-paraformer-microphone.js](./test-online-paraformer-microphone.js)
|
||||
demonstrates how to do real-time speech recognition from microphone
|
||||
with a streaming Paraformer model. In the code we use
|
||||
|
||||
37
nodejs-examples/test-offline-moonshine.js
Normal file
37
nodejs-examples/test-offline-moonshine.js
Normal file
@@ -0,0 +1,37 @@
|
||||
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
//
|
||||
const sherpa_onnx = require('sherpa-onnx');
|
||||
|
||||
function createOfflineRecognizer() {
|
||||
let modelConfig = {
|
||||
moonshine: {
|
||||
preprocessor: './sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx',
|
||||
encoder: './sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx',
|
||||
uncachedDecoder:
|
||||
'./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx',
|
||||
cachedDecoder:
|
||||
'./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx',
|
||||
},
|
||||
tokens: './sherpa-onnx-moonshine-tiny-en-int8/tokens.txt',
|
||||
};
|
||||
|
||||
let config = {
|
||||
modelConfig: modelConfig,
|
||||
};
|
||||
|
||||
return sherpa_onnx.createOfflineRecognizer(config);
|
||||
}
|
||||
|
||||
recognizer = createOfflineRecognizer();
|
||||
stream = recognizer.createStream();
|
||||
|
||||
const waveFilename = './sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav';
|
||||
const wave = sherpa_onnx.readWave(waveFilename);
|
||||
stream.acceptWaveform(wave.sampleRate, wave.samples);
|
||||
|
||||
recognizer.decode(stream);
|
||||
const text = recognizer.getResult(stream).text;
|
||||
console.log(text);
|
||||
|
||||
stream.free();
|
||||
recognizer.free();
|
||||
128
nodejs-examples/test-vad-with-non-streaming-asr-moonshine.js
Normal file
128
nodejs-examples/test-vad-with-non-streaming-asr-moonshine.js
Normal file
@@ -0,0 +1,128 @@
|
||||
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
|
||||
const sherpa_onnx = require('sherpa-onnx');
|
||||
|
||||
function createRecognizer() {
|
||||
// Please download test files from
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||
const config = {
|
||||
'modelConfig': {
|
||||
'moonshine': {
|
||||
'preprocessor': './sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx',
|
||||
'encoder': './sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx',
|
||||
'uncachedDecoder':
|
||||
'./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx',
|
||||
'cachedDecoder':
|
||||
'./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx',
|
||||
},
|
||||
'tokens': './sherpa-onnx-moonshine-tiny-en-int8/tokens.txt',
|
||||
'debug': 0,
|
||||
}
|
||||
};
|
||||
|
||||
return sherpa_onnx.createOfflineRecognizer(config);
|
||||
}
|
||||
|
||||
function createVad() {
|
||||
// please download silero_vad.onnx from
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
||||
const config = {
|
||||
sileroVad: {
|
||||
model: './silero_vad.onnx',
|
||||
threshold: 0.5,
|
||||
minSpeechDuration: 0.25,
|
||||
minSilenceDuration: 0.5,
|
||||
maxSpeechDuration: 5,
|
||||
windowSize: 512,
|
||||
},
|
||||
sampleRate: 16000,
|
||||
debug: true,
|
||||
numThreads: 1,
|
||||
bufferSizeInSeconds: 60,
|
||||
};
|
||||
|
||||
return sherpa_onnx.createVad(config);
|
||||
}
|
||||
|
||||
const recognizer = createRecognizer();
|
||||
const vad = createVad();
|
||||
|
||||
// please download ./Obama.wav from
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||
const waveFilename = './Obama.wav';
|
||||
const wave = sherpa_onnx.readWave(waveFilename);
|
||||
|
||||
if (wave.sampleRate != recognizer.config.featConfig.sampleRate) {
|
||||
throw new Error(
|
||||
'Expected sample rate: ${recognizer.config.featConfig.sampleRate}. Given: ${wave.sampleRate}');
|
||||
}
|
||||
|
||||
console.log('Started')
|
||||
let start = Date.now();
|
||||
|
||||
const windowSize = vad.config.sileroVad.windowSize;
|
||||
for (let i = 0; i < wave.samples.length; i += windowSize) {
|
||||
const thisWindow = wave.samples.subarray(i, i + windowSize);
|
||||
vad.acceptWaveform(thisWindow);
|
||||
|
||||
while (!vad.isEmpty()) {
|
||||
const segment = vad.front();
|
||||
vad.pop();
|
||||
|
||||
let start_time = segment.start / wave.sampleRate;
|
||||
let end_time = start_time + segment.samples.length / wave.sampleRate;
|
||||
|
||||
start_time = start_time.toFixed(2);
|
||||
end_time = end_time.toFixed(2);
|
||||
|
||||
const stream = recognizer.createStream();
|
||||
stream.acceptWaveform(wave.sampleRate, segment.samples);
|
||||
|
||||
recognizer.decode(stream);
|
||||
const r = recognizer.getResult(stream);
|
||||
if (r.text.length > 0) {
|
||||
const text = r.text.toLowerCase().trim();
|
||||
console.log(`${start_time} -- ${end_time}: ${text}`);
|
||||
}
|
||||
|
||||
stream.free();
|
||||
}
|
||||
}
|
||||
|
||||
vad.flush();
|
||||
|
||||
while (!vad.isEmpty()) {
|
||||
const segment = vad.front();
|
||||
vad.pop();
|
||||
|
||||
let start_time = segment.start / wave.sampleRate;
|
||||
let end_time = start_time + segment.samples.length / wave.sampleRate;
|
||||
|
||||
start_time = start_time.toFixed(2);
|
||||
end_time = end_time.toFixed(2);
|
||||
|
||||
const stream = recognizer.createStream();
|
||||
stream.acceptWaveform(wave.sampleRate, segment.samples);
|
||||
|
||||
recognizer.decode(stream);
|
||||
const r = recognizer.getResult(stream);
|
||||
if (r.text.length > 0) {
|
||||
const text = r.text.toLowerCase().trim();
|
||||
console.log(`${start_time} -- ${end_time}: ${text}`);
|
||||
}
|
||||
}
|
||||
|
||||
let stop = Date.now();
|
||||
console.log('Done')
|
||||
|
||||
const elapsed_seconds = (stop - start) / 1000;
|
||||
const duration = wave.samples.length / wave.sampleRate;
|
||||
const real_time_factor = elapsed_seconds / duration;
|
||||
console.log('Wave duration', duration.toFixed(3), 'seconds')
|
||||
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds')
|
||||
console.log(
|
||||
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
|
||||
real_time_factor.toFixed(3))
|
||||
|
||||
vad.free();
|
||||
recognizer.free();
|
||||
@@ -41,4 +41,11 @@
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define SHERPA_ONNX_DELETE_C_STR(p) \
|
||||
do { \
|
||||
if (p) { \
|
||||
delete[] p; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#endif // SCRIPTS_NODE_ADDON_API_SRC_MACROS_H_
|
||||
|
||||
@@ -80,6 +80,25 @@ static SherpaOnnxOfflineWhisperModelConfig GetOfflineWhisperModelConfig(
|
||||
return c;
|
||||
}
|
||||
|
||||
static SherpaOnnxOfflineMoonshineModelConfig GetOfflineMoonshineModelConfig(
|
||||
Napi::Object obj) {
|
||||
SherpaOnnxOfflineMoonshineModelConfig c;
|
||||
memset(&c, 0, sizeof(c));
|
||||
|
||||
if (!obj.Has("moonshine") || !obj.Get("moonshine").IsObject()) {
|
||||
return c;
|
||||
}
|
||||
|
||||
Napi::Object o = obj.Get("moonshine").As<Napi::Object>();
|
||||
|
||||
SHERPA_ONNX_ASSIGN_ATTR_STR(preprocessor, preprocessor);
|
||||
SHERPA_ONNX_ASSIGN_ATTR_STR(encoder, encoder);
|
||||
SHERPA_ONNX_ASSIGN_ATTR_STR(uncached_decoder, uncachedDecoder);
|
||||
SHERPA_ONNX_ASSIGN_ATTR_STR(cached_decoder, cachedDecoder);
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
static SherpaOnnxOfflineTdnnModelConfig GetOfflineTdnnModelConfig(
|
||||
Napi::Object obj) {
|
||||
SherpaOnnxOfflineTdnnModelConfig c;
|
||||
@@ -130,6 +149,7 @@ static SherpaOnnxOfflineModelConfig GetOfflineModelConfig(Napi::Object obj) {
|
||||
c.whisper = GetOfflineWhisperModelConfig(o);
|
||||
c.tdnn = GetOfflineTdnnModelConfig(o);
|
||||
c.sense_voice = GetOfflineSenseVoiceModelConfig(o);
|
||||
c.moonshine = GetOfflineMoonshineModelConfig(o);
|
||||
|
||||
SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens);
|
||||
SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
|
||||
@@ -206,97 +226,42 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) {
|
||||
const SherpaOnnxOfflineRecognizer *recognizer =
|
||||
SherpaOnnxCreateOfflineRecognizer(&c);
|
||||
|
||||
if (c.model_config.transducer.encoder) {
|
||||
delete[] c.model_config.transducer.encoder;
|
||||
}
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.encoder);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.decoder);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.joiner);
|
||||
|
||||
if (c.model_config.transducer.decoder) {
|
||||
delete[] c.model_config.transducer.decoder;
|
||||
}
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.model);
|
||||
|
||||
if (c.model_config.transducer.joiner) {
|
||||
delete[] c.model_config.transducer.joiner;
|
||||
}
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.nemo_ctc.model);
|
||||
|
||||
if (c.model_config.paraformer.model) {
|
||||
delete[] c.model_config.paraformer.model;
|
||||
}
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.encoder);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.decoder);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.language);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.task);
|
||||
|
||||
if (c.model_config.nemo_ctc.model) {
|
||||
delete[] c.model_config.nemo_ctc.model;
|
||||
}
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.tdnn.model);
|
||||
|
||||
if (c.model_config.whisper.encoder) {
|
||||
delete[] c.model_config.whisper.encoder;
|
||||
}
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.sense_voice.model);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.sense_voice.language);
|
||||
|
||||
if (c.model_config.whisper.decoder) {
|
||||
delete[] c.model_config.whisper.decoder;
|
||||
}
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.preprocessor);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.encoder);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.uncached_decoder);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.cached_decoder);
|
||||
|
||||
if (c.model_config.whisper.language) {
|
||||
delete[] c.model_config.whisper.language;
|
||||
}
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.provider);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.model_type);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.modeling_unit);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.bpe_vocab);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.telespeech_ctc);
|
||||
|
||||
if (c.model_config.whisper.task) {
|
||||
delete[] c.model_config.whisper.task;
|
||||
}
|
||||
SHERPA_ONNX_DELETE_C_STR(c.lm_config.model);
|
||||
|
||||
if (c.model_config.tdnn.model) {
|
||||
delete[] c.model_config.tdnn.model;
|
||||
}
|
||||
|
||||
if (c.model_config.sense_voice.model) {
|
||||
delete[] c.model_config.sense_voice.model;
|
||||
}
|
||||
|
||||
if (c.model_config.sense_voice.language) {
|
||||
delete[] c.model_config.sense_voice.language;
|
||||
}
|
||||
|
||||
if (c.model_config.tokens) {
|
||||
delete[] c.model_config.tokens;
|
||||
}
|
||||
|
||||
if (c.model_config.provider) {
|
||||
delete[] c.model_config.provider;
|
||||
}
|
||||
|
||||
if (c.model_config.model_type) {
|
||||
delete[] c.model_config.model_type;
|
||||
}
|
||||
|
||||
if (c.model_config.modeling_unit) {
|
||||
delete[] c.model_config.modeling_unit;
|
||||
}
|
||||
|
||||
if (c.model_config.bpe_vocab) {
|
||||
delete[] c.model_config.bpe_vocab;
|
||||
}
|
||||
|
||||
if (c.model_config.telespeech_ctc) {
|
||||
delete[] c.model_config.telespeech_ctc;
|
||||
}
|
||||
|
||||
if (c.lm_config.model) {
|
||||
delete[] c.lm_config.model;
|
||||
}
|
||||
|
||||
if (c.decoding_method) {
|
||||
delete[] c.decoding_method;
|
||||
}
|
||||
|
||||
if (c.hotwords_file) {
|
||||
delete[] c.hotwords_file;
|
||||
}
|
||||
|
||||
if (c.rule_fsts) {
|
||||
delete[] c.rule_fsts;
|
||||
}
|
||||
|
||||
if (c.rule_fars) {
|
||||
delete[] c.rule_fars;
|
||||
}
|
||||
SHERPA_ONNX_DELETE_C_STR(c.decoding_method);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.hotwords_file);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.rule_fsts);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.rule_fars);
|
||||
|
||||
if (!recognizer) {
|
||||
Napi::TypeError::New(env, "Please check your config!")
|
||||
|
||||
@@ -35,6 +35,10 @@ function freeConfig(config, Module) {
|
||||
freeConfig(config.whisper, Module)
|
||||
}
|
||||
|
||||
if ('moonshine' in config) {
|
||||
freeConfig(config.moonshine, Module)
|
||||
}
|
||||
|
||||
if ('tdnn' in config) {
|
||||
freeConfig(config.tdnn, Module)
|
||||
}
|
||||
@@ -563,7 +567,7 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
|
||||
const n = encoderLen + decoderLen + languageLen + taskLen;
|
||||
const buffer = Module._malloc(n);
|
||||
|
||||
const len = 5 * 4; // 4 pointers
|
||||
const len = 5 * 4; // 4 pointers + 1 int32
|
||||
const ptr = Module._malloc(len);
|
||||
|
||||
let offset = 0;
|
||||
@@ -598,6 +602,55 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
|
||||
}
|
||||
}
|
||||
|
||||
function initSherpaOnnxOfflineMoonshineModelConfig(config, Module) {
|
||||
const preprocessorLen = Module.lengthBytesUTF8(config.preprocessor || '') + 1;
|
||||
const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
|
||||
const uncachedDecoderLen =
|
||||
Module.lengthBytesUTF8(config.uncachedDecoder || '') + 1;
|
||||
const cachedDecoderLen =
|
||||
Module.lengthBytesUTF8(config.cachedDecoder || '') + 1;
|
||||
|
||||
const n =
|
||||
preprocessorLen + encoderLen + uncachedDecoderLen + cachedDecoderLen;
|
||||
const buffer = Module._malloc(n);
|
||||
|
||||
const len = 4 * 4; // 4 pointers
|
||||
const ptr = Module._malloc(len);
|
||||
|
||||
let offset = 0;
|
||||
Module.stringToUTF8(
|
||||
config.preprocessor || '', buffer + offset, preprocessorLen);
|
||||
offset += preprocessorLen;
|
||||
|
||||
Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen);
|
||||
offset += encoderLen;
|
||||
|
||||
Module.stringToUTF8(
|
||||
config.uncachedDecoder || '', buffer + offset, uncachedDecoderLen);
|
||||
offset += uncachedDecoderLen;
|
||||
|
||||
Module.stringToUTF8(
|
||||
config.cachedDecoder || '', buffer + offset, cachedDecoderLen);
|
||||
offset += cachedDecoderLen;
|
||||
|
||||
offset = 0;
|
||||
Module.setValue(ptr, buffer + offset, 'i8*');
|
||||
offset += preprocessorLen;
|
||||
|
||||
Module.setValue(ptr + 4, buffer + offset, 'i8*');
|
||||
offset += encoderLen;
|
||||
|
||||
Module.setValue(ptr + 8, buffer + offset, 'i8*');
|
||||
offset += uncachedDecoderLen;
|
||||
|
||||
Module.setValue(ptr + 12, buffer + offset, 'i8*');
|
||||
offset += cachedDecoderLen;
|
||||
|
||||
return {
|
||||
buffer: buffer, ptr: ptr, len: len,
|
||||
}
|
||||
}
|
||||
|
||||
function initSherpaOnnxOfflineTdnnModelConfig(config, Module) {
|
||||
const n = Module.lengthBytesUTF8(config.model || '') + 1;
|
||||
const buffer = Module._malloc(n);
|
||||
@@ -693,6 +746,15 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
|
||||
};
|
||||
}
|
||||
|
||||
if (!('moonshine' in config)) {
|
||||
config.moonshine = {
|
||||
preprocessor: '',
|
||||
encoder: '',
|
||||
uncachedDecoder: '',
|
||||
cachedDecoder: '',
|
||||
};
|
||||
}
|
||||
|
||||
if (!('tdnn' in config)) {
|
||||
config.tdnn = {
|
||||
model: '',
|
||||
@@ -724,8 +786,11 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
|
||||
const senseVoice =
|
||||
initSherpaOnnxOfflineSenseVoiceModelConfig(config.senseVoice, Module);
|
||||
|
||||
const moonshine =
|
||||
initSherpaOnnxOfflineMoonshineModelConfig(config.moonshine, Module);
|
||||
|
||||
const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len +
|
||||
tdnn.len + 8 * 4 + senseVoice.len;
|
||||
tdnn.len + 8 * 4 + senseVoice.len + moonshine.len;
|
||||
|
||||
const ptr = Module._malloc(len);
|
||||
|
||||
@@ -745,7 +810,6 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
|
||||
Module._CopyHeap(tdnn.ptr, tdnn.len, ptr + offset);
|
||||
offset += tdnn.len;
|
||||
|
||||
|
||||
const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1;
|
||||
const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1;
|
||||
const modelTypeLen = Module.lengthBytesUTF8(config.modelType || '') + 1;
|
||||
@@ -817,11 +881,14 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
|
||||
offset += 4;
|
||||
|
||||
Module._CopyHeap(senseVoice.ptr, senseVoice.len, ptr + offset);
|
||||
offset += senseVoice.len;
|
||||
|
||||
Module._CopyHeap(moonshine.ptr, moonshine.len, ptr + offset);
|
||||
|
||||
return {
|
||||
buffer: buffer, ptr: ptr, len: len, transducer: transducer,
|
||||
paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn,
|
||||
senseVoice: senseVoice,
|
||||
senseVoice: senseVoice, moonshine: moonshine,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@ static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, "");
|
||||
|
||||
static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, "");
|
||||
static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 5 * 4, "");
|
||||
static_assert(sizeof(SherpaOnnxOfflineMoonshineModelConfig) == 4 * 4, "");
|
||||
static_assert(sizeof(SherpaOnnxOfflineTdnnModelConfig) == 4, "");
|
||||
static_assert(sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) == 3 * 4, "");
|
||||
static_assert(sizeof(SherpaOnnxOfflineLMConfig) == 2 * 4, "");
|
||||
@@ -25,7 +26,8 @@ static_assert(sizeof(SherpaOnnxOfflineModelConfig) ==
|
||||
sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) +
|
||||
sizeof(SherpaOnnxOfflineWhisperModelConfig) +
|
||||
sizeof(SherpaOnnxOfflineTdnnModelConfig) + 8 * 4 +
|
||||
sizeof(SherpaOnnxOfflineSenseVoiceModelConfig),
|
||||
sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) +
|
||||
sizeof(SherpaOnnxOfflineMoonshineModelConfig),
|
||||
"");
|
||||
static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
|
||||
static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) ==
|
||||
@@ -66,6 +68,7 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
|
||||
auto whisper = &model_config->whisper;
|
||||
auto tdnn = &model_config->tdnn;
|
||||
auto sense_voice = &model_config->sense_voice;
|
||||
auto moonshine = &model_config->moonshine;
|
||||
|
||||
fprintf(stdout, "----------offline transducer model config----------\n");
|
||||
fprintf(stdout, "encoder: %s\n", transducer->encoder);
|
||||
@@ -93,6 +96,12 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
|
||||
fprintf(stdout, "language: %s\n", sense_voice->language);
|
||||
fprintf(stdout, "use_itn: %d\n", sense_voice->use_itn);
|
||||
|
||||
fprintf(stdout, "----------offline moonshine model config----------\n");
|
||||
fprintf(stdout, "preprocessor: %s\n", moonshine->preprocessor);
|
||||
fprintf(stdout, "encoder: %s\n", moonshine->encoder);
|
||||
fprintf(stdout, "uncached_decoder: %s\n", moonshine->uncached_decoder);
|
||||
fprintf(stdout, "cached_decoder: %s\n", moonshine->cached_decoder);
|
||||
|
||||
fprintf(stdout, "tokens: %s\n", model_config->tokens);
|
||||
fprintf(stdout, "num_threads: %d\n", model_config->num_threads);
|
||||
fprintf(stdout, "provider: %s\n", model_config->provider);
|
||||
|
||||
Reference in New Issue
Block a user