Add VAD and keyword spotting for the Node package with WebAssembly (#1286)
This commit is contained in:
22
.github/scripts/test-nodejs-npm.sh
vendored
22
.github/scripts/test-nodejs-npm.sh
vendored
@@ -9,6 +9,28 @@ git status
|
|||||||
ls -lh
|
ls -lh
|
||||||
ls -lh node_modules
|
ls -lh node_modules
|
||||||
|
|
||||||
|
echo '-----vad+whisper----------'
|
||||||
|
|
||||||
|
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||||
|
rm sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||||
|
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
||||||
|
node ./test-vad-with-non-streaming-asr-whisper.js
|
||||||
|
rm Obama.wav
|
||||||
|
rm silero_vad.onnx
|
||||||
|
rm -rf sherpa-onnx-whisper-tiny.en
|
||||||
|
|
||||||
|
echo "----------keyword spotting----------"
|
||||||
|
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
|
||||||
|
rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
|
||||||
|
|
||||||
|
node ./test-keyword-spotter-transducer.js
|
||||||
|
rm -rf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01
|
||||||
|
|
||||||
# offline asr
|
# offline asr
|
||||||
#
|
#
|
||||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
|
||||||
|
|||||||
31
.github/workflows/npm.yaml
vendored
31
.github/workflows/npm.yaml
vendored
@@ -1,6 +1,9 @@
|
|||||||
name: npm
|
name: npm
|
||||||
|
|
||||||
on:
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- npm
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
@@ -27,6 +30,9 @@ jobs:
|
|||||||
|
|
||||||
- name: Install emsdk
|
- name: Install emsdk
|
||||||
uses: mymindstorm/setup-emsdk@v14
|
uses: mymindstorm/setup-emsdk@v14
|
||||||
|
with:
|
||||||
|
version: 3.1.51
|
||||||
|
actions-cache-folder: 'emsdk-cache'
|
||||||
|
|
||||||
- name: View emsdk version
|
- name: View emsdk version
|
||||||
shell: bash
|
shell: bash
|
||||||
@@ -51,8 +57,6 @@ jobs:
|
|||||||
|
|
||||||
- name: Build nodejs package
|
- name: Build nodejs package
|
||||||
shell: bash
|
shell: bash
|
||||||
env:
|
|
||||||
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
|
||||||
run: |
|
run: |
|
||||||
./build-wasm-simd-nodejs.sh
|
./build-wasm-simd-nodejs.sh
|
||||||
cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.js ./scripts/nodejs/
|
cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.js ./scripts/nodejs/
|
||||||
@@ -71,6 +75,29 @@ jobs:
|
|||||||
|
|
||||||
rm package.json.bak
|
rm package.json.bak
|
||||||
|
|
||||||
|
- name: Collect files
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
dst=sherpa-onnx-wasm-nodejs
|
||||||
|
mkdir $dst
|
||||||
|
cp -v scripts/nodejs/* $dst
|
||||||
|
tar cvjf $dst.tar.bz2 $dst
|
||||||
|
|
||||||
|
echo "---"
|
||||||
|
ls -h $dst
|
||||||
|
|
||||||
|
- uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: sherpa-onnx-wasm-nodejs
|
||||||
|
path: ./*.tar.bz2
|
||||||
|
|
||||||
|
- name: Build nodejs package
|
||||||
|
shell: bash
|
||||||
|
env:
|
||||||
|
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
||||||
|
run: |
|
||||||
|
cd scripts/nodejs
|
||||||
|
|
||||||
git diff
|
git diff
|
||||||
|
|
||||||
npm install
|
npm install
|
||||||
|
|||||||
4
.github/workflows/test-nodejs.yaml
vendored
4
.github/workflows/test-nodejs.yaml
vendored
@@ -55,6 +55,9 @@ jobs:
|
|||||||
|
|
||||||
- name: Install emsdk
|
- name: Install emsdk
|
||||||
uses: mymindstorm/setup-emsdk@v14
|
uses: mymindstorm/setup-emsdk@v14
|
||||||
|
with:
|
||||||
|
version: 3.1.51
|
||||||
|
actions-cache-folder: 'emsdk-cache'
|
||||||
|
|
||||||
- name: View emsdk version
|
- name: View emsdk version
|
||||||
shell: bash
|
shell: bash
|
||||||
@@ -109,6 +112,7 @@ jobs:
|
|||||||
node --version
|
node --version
|
||||||
npm --version
|
npm --version
|
||||||
export d=scripts/nodejs
|
export d=scripts/nodejs
|
||||||
|
cat $d/index.js
|
||||||
|
|
||||||
pushd $d
|
pushd $d
|
||||||
npm install
|
npm install
|
||||||
|
|||||||
11
CHANGELOG.md
11
CHANGELOG.md
@@ -1,3 +1,14 @@
|
|||||||
|
## 1.10.23
|
||||||
|
|
||||||
|
* flutter: add lang, emotion, event to OfflineRecognizerResult (#1268)
|
||||||
|
* Use a separate thread to initialize models for lazarus examples. (#1270)
|
||||||
|
* Object pascal examples for recording and playing audio with portaudio. (#1271)
|
||||||
|
* Text to speech API for Object Pascal. (#1273)
|
||||||
|
* update kotlin api for better release native object and add user-friendly apis. (#1275)
|
||||||
|
* Update wave-reader.cc to support 8/16/32-bit waves (#1278)
|
||||||
|
* Add WebAssembly for VAD (#1281)
|
||||||
|
* WebAssembly example for VAD + Non-streaming ASR (#1284)
|
||||||
|
|
||||||
## 1.10.22
|
## 1.10.22
|
||||||
|
|
||||||
* Add Pascal API for reading wave files (#1243)
|
* Add Pascal API for reading wave files (#1243)
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ project(sherpa-onnx)
|
|||||||
# ./nodejs-addon-examples
|
# ./nodejs-addon-examples
|
||||||
# ./dart-api-examples/
|
# ./dart-api-examples/
|
||||||
# ./CHANGELOG.md
|
# ./CHANGELOG.md
|
||||||
set(SHERPA_ONNX_VERSION "1.10.22")
|
set(SHERPA_ONNX_VERSION "1.10.23")
|
||||||
|
|
||||||
# Disable warning about
|
# Disable warning about
|
||||||
#
|
#
|
||||||
@@ -206,6 +206,7 @@ if(SHERPA_ONNX_ENABLE_WASM_NODEJS)
|
|||||||
if(NOT SHERPA_ONNX_ENABLE_WASM)
|
if(NOT SHERPA_ONNX_ENABLE_WASM)
|
||||||
message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for NodeJS")
|
message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for NodeJS")
|
||||||
endif()
|
endif()
|
||||||
|
add_definitions(-DSHERPA_ONNX_ENABLE_WASM_KWS=1)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(SHERPA_ONNX_ENABLE_WASM)
|
if(SHERPA_ONNX_ENABLE_WASM)
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ environment:
|
|||||||
sdk: ^3.4.0
|
sdk: ^3.4.0
|
||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
sherpa_onnx: ^1.10.22
|
sherpa_onnx: ^1.10.23
|
||||||
path: ^1.9.0
|
path: ^1.9.0
|
||||||
args: ^2.5.0
|
args: ^2.5.0
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ environment:
|
|||||||
sdk: ^3.4.0
|
sdk: ^3.4.0
|
||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
sherpa_onnx: ^1.10.22
|
sherpa_onnx: ^1.10.23
|
||||||
path: ^1.9.0
|
path: ^1.9.0
|
||||||
args: ^2.5.0
|
args: ^2.5.0
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ environment:
|
|||||||
sdk: ^3.4.0
|
sdk: ^3.4.0
|
||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
sherpa_onnx: ^1.10.22
|
sherpa_onnx: ^1.10.23
|
||||||
# sherpa_onnx:
|
# sherpa_onnx:
|
||||||
# path: ../../flutter/sherpa_onnx
|
# path: ../../flutter/sherpa_onnx
|
||||||
path: ^1.9.0
|
path: ^1.9.0
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ environment:
|
|||||||
|
|
||||||
# Add regular dependencies here.
|
# Add regular dependencies here.
|
||||||
dependencies:
|
dependencies:
|
||||||
sherpa_onnx: ^1.10.22
|
sherpa_onnx: ^1.10.23
|
||||||
path: ^1.9.0
|
path: ^1.9.0
|
||||||
args: ^2.5.0
|
args: ^2.5.0
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ environment:
|
|||||||
sdk: ^3.4.0
|
sdk: ^3.4.0
|
||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
sherpa_onnx: ^1.10.22
|
sherpa_onnx: ^1.10.23
|
||||||
path: ^1.9.0
|
path: ^1.9.0
|
||||||
args: ^2.5.0
|
args: ^2.5.0
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ environment:
|
|||||||
|
|
||||||
# Add regular dependencies here.
|
# Add regular dependencies here.
|
||||||
dependencies:
|
dependencies:
|
||||||
sherpa_onnx: ^1.10.22
|
sherpa_onnx: ^1.10.23
|
||||||
path: ^1.9.0
|
path: ^1.9.0
|
||||||
args: ^2.5.0
|
args: ^2.5.0
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ environment:
|
|||||||
|
|
||||||
# Add regular dependencies here.
|
# Add regular dependencies here.
|
||||||
dependencies:
|
dependencies:
|
||||||
sherpa_onnx: ^1.10.22
|
sherpa_onnx: ^1.10.23
|
||||||
path: ^1.9.0
|
path: ^1.9.0
|
||||||
args: ^2.5.0
|
args: ^2.5.0
|
||||||
|
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ environment:
|
|||||||
sdk: ^3.4.0
|
sdk: ^3.4.0
|
||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
sherpa_onnx: ^1.10.22
|
sherpa_onnx: ^1.10.23
|
||||||
path: ^1.9.0
|
path: ^1.9.0
|
||||||
args: ^2.5.0
|
args: ^2.5.0
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ environment:
|
|||||||
sdk: ^3.4.0
|
sdk: ^3.4.0
|
||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
sherpa_onnx: ^1.10.22
|
sherpa_onnx: ^1.10.23
|
||||||
path: ^1.9.0
|
path: ^1.9.0
|
||||||
args: ^2.5.0
|
args: ^2.5.0
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ description: >
|
|||||||
|
|
||||||
publish_to: 'none'
|
publish_to: 'none'
|
||||||
|
|
||||||
version: 1.10.22
|
version: 1.10.23
|
||||||
|
|
||||||
topics:
|
topics:
|
||||||
- speech-recognition
|
- speech-recognition
|
||||||
@@ -30,7 +30,7 @@ dependencies:
|
|||||||
record: ^5.1.0
|
record: ^5.1.0
|
||||||
url_launcher: ^6.2.6
|
url_launcher: ^6.2.6
|
||||||
|
|
||||||
sherpa_onnx: ^1.10.22
|
sherpa_onnx: ^1.10.23
|
||||||
# sherpa_onnx:
|
# sherpa_onnx:
|
||||||
# path: ../../flutter/sherpa_onnx
|
# path: ../../flutter/sherpa_onnx
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ description: >
|
|||||||
|
|
||||||
publish_to: 'none' # Remove this line if you wish to publish to pub.dev
|
publish_to: 'none' # Remove this line if you wish to publish to pub.dev
|
||||||
|
|
||||||
version: 1.10.22
|
version: 1.10.23
|
||||||
|
|
||||||
environment:
|
environment:
|
||||||
sdk: '>=3.4.0 <4.0.0'
|
sdk: '>=3.4.0 <4.0.0'
|
||||||
@@ -17,7 +17,7 @@ dependencies:
|
|||||||
cupertino_icons: ^1.0.6
|
cupertino_icons: ^1.0.6
|
||||||
path_provider: ^2.1.3
|
path_provider: ^2.1.3
|
||||||
path: ^1.9.0
|
path: ^1.9.0
|
||||||
sherpa_onnx: ^1.10.22
|
sherpa_onnx: ^1.10.23
|
||||||
url_launcher: ^6.2.6
|
url_launcher: ^6.2.6
|
||||||
audioplayers: ^5.0.0
|
audioplayers: ^5.0.0
|
||||||
|
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ topics:
|
|||||||
- voice-activity-detection
|
- voice-activity-detection
|
||||||
|
|
||||||
# remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
|
# remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
|
||||||
version: 1.10.22
|
version: 1.10.23
|
||||||
|
|
||||||
homepage: https://github.com/k2-fsa/sherpa-onnx
|
homepage: https://github.com/k2-fsa/sherpa-onnx
|
||||||
|
|
||||||
@@ -30,23 +30,23 @@ dependencies:
|
|||||||
flutter:
|
flutter:
|
||||||
sdk: flutter
|
sdk: flutter
|
||||||
|
|
||||||
sherpa_onnx_android: ^1.10.22
|
sherpa_onnx_android: ^1.10.23
|
||||||
# sherpa_onnx_android:
|
# sherpa_onnx_android:
|
||||||
# path: ../sherpa_onnx_android
|
# path: ../sherpa_onnx_android
|
||||||
|
|
||||||
sherpa_onnx_macos: ^1.10.22
|
sherpa_onnx_macos: ^1.10.23
|
||||||
# sherpa_onnx_macos:
|
# sherpa_onnx_macos:
|
||||||
# path: ../sherpa_onnx_macos
|
# path: ../sherpa_onnx_macos
|
||||||
|
|
||||||
sherpa_onnx_linux: ^1.10.22
|
sherpa_onnx_linux: ^1.10.23
|
||||||
# sherpa_onnx_linux:
|
# sherpa_onnx_linux:
|
||||||
# path: ../sherpa_onnx_linux
|
# path: ../sherpa_onnx_linux
|
||||||
#
|
#
|
||||||
sherpa_onnx_windows: ^1.10.22
|
sherpa_onnx_windows: ^1.10.23
|
||||||
# sherpa_onnx_windows:
|
# sherpa_onnx_windows:
|
||||||
# path: ../sherpa_onnx_windows
|
# path: ../sherpa_onnx_windows
|
||||||
|
|
||||||
sherpa_onnx_ios: ^1.10.22
|
sherpa_onnx_ios: ^1.10.23
|
||||||
# sherpa_onnx_ios:
|
# sherpa_onnx_ios:
|
||||||
# path: ../sherpa_onnx_ios
|
# path: ../sherpa_onnx_ios
|
||||||
|
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# https://groups.google.com/g/dart-ffi/c/nUATMBy7r0c
|
# https://groups.google.com/g/dart-ffi/c/nUATMBy7r0c
|
||||||
Pod::Spec.new do |s|
|
Pod::Spec.new do |s|
|
||||||
s.name = 'sherpa_onnx_ios'
|
s.name = 'sherpa_onnx_ios'
|
||||||
s.version = '1.10.22'
|
s.version = '1.10.23'
|
||||||
s.summary = 'A new Flutter FFI plugin project.'
|
s.summary = 'A new Flutter FFI plugin project.'
|
||||||
s.description = <<-DESC
|
s.description = <<-DESC
|
||||||
A new Flutter FFI plugin project.
|
A new Flutter FFI plugin project.
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
#
|
#
|
||||||
Pod::Spec.new do |s|
|
Pod::Spec.new do |s|
|
||||||
s.name = 'sherpa_onnx_macos'
|
s.name = 'sherpa_onnx_macos'
|
||||||
s.version = '1.10.22'
|
s.version = '1.10.23'
|
||||||
s.summary = 'sherpa-onnx Flutter FFI plugin project.'
|
s.summary = 'sherpa-onnx Flutter FFI plugin project.'
|
||||||
s.description = <<-DESC
|
s.description = <<-DESC
|
||||||
sherpa-onnx Flutter FFI plugin project.
|
sherpa-onnx Flutter FFI plugin project.
|
||||||
|
|||||||
7
new-release.sh
Executable file
7
new-release.sh
Executable file
@@ -0,0 +1,7 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
find flutter -name *.yaml -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \;
|
||||||
|
find dart-api-examples -name *.yaml -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \;
|
||||||
|
find flutter-examples -name *.yaml -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \;
|
||||||
|
find flutter -name *.podspec -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \;
|
||||||
|
find nodejs-addon-examples -name package.json -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \;
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
{
|
{
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"sherpa-onnx-node": "^1.10.22"
|
"sherpa-onnx-node": "^1.10.23"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -42,11 +42,11 @@ stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});
|
|||||||
|
|
||||||
const detectedKeywords = [];
|
const detectedKeywords = [];
|
||||||
while (kws.isReady(stream)) {
|
while (kws.isReady(stream)) {
|
||||||
|
kws.decode(stream);
|
||||||
const keyword = kws.getResult(stream).keyword;
|
const keyword = kws.getResult(stream).keyword;
|
||||||
if (keyword != '') {
|
if (keyword != '') {
|
||||||
detectedKeywords.push(keyword);
|
detectedKeywords.push(keyword);
|
||||||
}
|
}
|
||||||
kws.decode(stream);
|
|
||||||
}
|
}
|
||||||
let stop = Date.now();
|
let stop = Date.now();
|
||||||
|
|
||||||
|
|||||||
@@ -120,8 +120,8 @@ console.log('Done')
|
|||||||
const elapsed_seconds = (stop - start) / 1000;
|
const elapsed_seconds = (stop - start) / 1000;
|
||||||
const duration = wave.samples.length / wave.sampleRate;
|
const duration = wave.samples.length / wave.sampleRate;
|
||||||
const real_time_factor = elapsed_seconds / duration;
|
const real_time_factor = elapsed_seconds / duration;
|
||||||
console.log('Wave duration', duration.toFixed(3), 'secodns')
|
console.log('Wave duration', duration.toFixed(3), 'seconds')
|
||||||
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
|
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds')
|
||||||
console.log(
|
console.log(
|
||||||
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
|
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
|
||||||
real_time_factor.toFixed(3))
|
real_time_factor.toFixed(3))
|
||||||
|
|||||||
49
nodejs-examples/test-keyword-spotter-transducer.js
Normal file
49
nodejs-examples/test-keyword-spotter-transducer.js
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
// Copyright (c) 2024 Xiaomi Corporation
|
||||||
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
|
function createKeywordSpotter() {
|
||||||
|
// Please download test files from
|
||||||
|
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models
|
||||||
|
const config = {
|
||||||
|
'modelConfig': {
|
||||||
|
'transducer': {
|
||||||
|
'encoder':
|
||||||
|
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx',
|
||||||
|
'decoder':
|
||||||
|
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx',
|
||||||
|
'joiner':
|
||||||
|
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx',
|
||||||
|
},
|
||||||
|
'tokens':
|
||||||
|
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt',
|
||||||
|
},
|
||||||
|
keywords: 'w én s ēn t è k ǎ s uǒ @文森特卡索\n' +
|
||||||
|
'f ǎ g uó @法国'
|
||||||
|
};
|
||||||
|
|
||||||
|
return sherpa_onnx.createKws(config);
|
||||||
|
}
|
||||||
|
|
||||||
|
const kws = createKeywordSpotter();
|
||||||
|
const stream = kws.createStream();
|
||||||
|
const waveFilename =
|
||||||
|
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav';
|
||||||
|
|
||||||
|
const wave = sherpa_onnx.readWave(waveFilename);
|
||||||
|
stream.acceptWaveform(wave.sampleRate, wave.samples);
|
||||||
|
|
||||||
|
const tailPadding = new Float32Array(wave.sampleRate * 0.4);
|
||||||
|
stream.acceptWaveform(kws.config.featConfig.sampleRate, tailPadding);
|
||||||
|
|
||||||
|
const detectedKeywords = [];
|
||||||
|
while (kws.isReady(stream)) {
|
||||||
|
kws.decode(stream);
|
||||||
|
const keyword = kws.getResult(stream).keyword;
|
||||||
|
if (keyword != '') {
|
||||||
|
detectedKeywords.push(keyword);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
console.log(detectedKeywords);
|
||||||
|
|
||||||
|
stream.free();
|
||||||
|
kws.free();
|
||||||
@@ -7,27 +7,13 @@ const wav = require('wav');
|
|||||||
const sherpa_onnx = require('sherpa-onnx');
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
function createOfflineRecognizer() {
|
function createOfflineRecognizer() {
|
||||||
let featConfig = {
|
|
||||||
sampleRate: 16000,
|
|
||||||
featureDim: 80,
|
|
||||||
};
|
|
||||||
|
|
||||||
let modelConfig = {
|
|
||||||
nemoCtc: {
|
|
||||||
model: './sherpa-onnx-nemo-ctc-en-conformer-small/model.int8.onnx',
|
|
||||||
},
|
|
||||||
tokens: './sherpa-onnx-nemo-ctc-en-conformer-small/tokens.txt',
|
|
||||||
numThreads: 1,
|
|
||||||
debug: 0,
|
|
||||||
provider: 'cpu',
|
|
||||||
modelType: 'nemo_ctc',
|
|
||||||
};
|
|
||||||
|
|
||||||
let config = {
|
let config = {
|
||||||
featConfig: featConfig,
|
modelConfig: {
|
||||||
modelConfig: modelConfig,
|
nemoCtc: {
|
||||||
decodingMethod: 'greedy_search',
|
model: './sherpa-onnx-nemo-ctc-en-conformer-small/model.int8.onnx',
|
||||||
maxActivePaths: 4,
|
},
|
||||||
|
tokens: './sherpa-onnx-nemo-ctc-en-conformer-small/tokens.txt',
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
return sherpa_onnx.createOfflineRecognizer(config);
|
return sherpa_onnx.createOfflineRecognizer(config);
|
||||||
@@ -38,63 +24,12 @@ const stream = recognizer.createStream();
|
|||||||
|
|
||||||
const waveFilename =
|
const waveFilename =
|
||||||
'./sherpa-onnx-nemo-ctc-en-conformer-small/test_wavs/0.wav';
|
'./sherpa-onnx-nemo-ctc-en-conformer-small/test_wavs/0.wav';
|
||||||
|
const wave = sherpa_onnx.readWave(waveFilename);
|
||||||
|
stream.acceptWaveform(wave.sampleRate, wave.samples);
|
||||||
|
|
||||||
const reader = new wav.Reader();
|
recognizer.decode(stream);
|
||||||
const readable = new Readable().wrap(reader);
|
const text = recognizer.getResult(stream).text;
|
||||||
const buf = [];
|
console.log(text);
|
||||||
|
|
||||||
reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
|
stream.free();
|
||||||
if (sampleRate != recognizer.config.featConfig.sampleRate) {
|
recognizer.free();
|
||||||
throw new Error(`Only support sampleRate ${
|
|
||||||
recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (audioFormat != 1) {
|
|
||||||
throw new Error(`Only support PCM format. Given ${audioFormat}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (channels != 1) {
|
|
||||||
throw new Error(`Only a single channel. Given ${channel}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bitDepth != 16) {
|
|
||||||
throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
fs.createReadStream(waveFilename, {highWaterMark: 4096})
|
|
||||||
.pipe(reader)
|
|
||||||
.on('finish', function(err) {
|
|
||||||
// tail padding
|
|
||||||
const floatSamples =
|
|
||||||
new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
|
|
||||||
|
|
||||||
buf.push(floatSamples);
|
|
||||||
const flattened =
|
|
||||||
Float32Array.from(buf.reduce((a, b) => [...a, ...b], []));
|
|
||||||
|
|
||||||
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
|
|
||||||
recognizer.decode(stream);
|
|
||||||
const text = recognizer.getResult(stream).text;
|
|
||||||
console.log(text);
|
|
||||||
|
|
||||||
stream.free();
|
|
||||||
recognizer.free();
|
|
||||||
});
|
|
||||||
|
|
||||||
readable.on('readable', function() {
|
|
||||||
let chunk;
|
|
||||||
while ((chunk = readable.read()) != null) {
|
|
||||||
const int16Samples = new Int16Array(
|
|
||||||
chunk.buffer, chunk.byteOffset,
|
|
||||||
chunk.length / Int16Array.BYTES_PER_ELEMENT);
|
|
||||||
|
|
||||||
const floatSamples = new Float32Array(int16Samples.length);
|
|
||||||
|
|
||||||
for (let i = 0; i < floatSamples.length; i++) {
|
|
||||||
floatSamples[i] = int16Samples[i] / 32768.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
buf.push(floatSamples);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|||||||
@@ -7,27 +7,15 @@ const wav = require('wav');
|
|||||||
const sherpa_onnx = require('sherpa-onnx');
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
function createOfflineRecognizer() {
|
function createOfflineRecognizer() {
|
||||||
let featConfig = {
|
|
||||||
sampleRate: 16000,
|
|
||||||
featureDim: 80,
|
|
||||||
};
|
|
||||||
|
|
||||||
let modelConfig = {
|
let modelConfig = {
|
||||||
paraformer: {
|
paraformer: {
|
||||||
model: './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx',
|
model: './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx',
|
||||||
},
|
},
|
||||||
tokens: './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt',
|
tokens: './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt',
|
||||||
numThreads: 1,
|
|
||||||
debug: 0,
|
|
||||||
provider: 'cpu',
|
|
||||||
modelType: 'paraformer',
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
let config = {
|
let config = {
|
||||||
featConfig: featConfig,
|
|
||||||
modelConfig: modelConfig,
|
modelConfig: modelConfig,
|
||||||
decodingMethod: 'greedy_search',
|
|
||||||
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
||||||
ruleFsts: './itn_zh_number.fst',
|
ruleFsts: './itn_zh_number.fst',
|
||||||
};
|
};
|
||||||
@@ -41,62 +29,12 @@ const stream = recognizer.createStream();
|
|||||||
|
|
||||||
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
|
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
|
||||||
const waveFilename = './itn-zh-number.wav';
|
const waveFilename = './itn-zh-number.wav';
|
||||||
|
const wave = sherpa_onnx.readWave(waveFilename);
|
||||||
|
stream.acceptWaveform(wave.sampleRate, wave.samples);
|
||||||
|
|
||||||
const reader = new wav.Reader();
|
recognizer.decode(stream);
|
||||||
const readable = new Readable().wrap(reader);
|
const text = recognizer.getResult(stream).text;
|
||||||
const buf = [];
|
console.log(text);
|
||||||
|
|
||||||
reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
|
stream.free();
|
||||||
if (sampleRate != recognizer.config.featConfig.sampleRate) {
|
recognizer.free();
|
||||||
throw new Error(`Only support sampleRate ${
|
|
||||||
recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (audioFormat != 1) {
|
|
||||||
throw new Error(`Only support PCM format. Given ${audioFormat}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (channels != 1) {
|
|
||||||
throw new Error(`Only a single channel. Given ${channel}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bitDepth != 16) {
|
|
||||||
throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
fs.createReadStream(waveFilename, {'highWaterMark': 4096})
|
|
||||||
.pipe(reader)
|
|
||||||
.on('finish', function(err) {
|
|
||||||
// tail padding
|
|
||||||
const floatSamples =
|
|
||||||
new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
|
|
||||||
|
|
||||||
buf.push(floatSamples);
|
|
||||||
const flattened =
|
|
||||||
Float32Array.from(buf.reduce((a, b) => [...a, ...b], []));
|
|
||||||
|
|
||||||
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
|
|
||||||
recognizer.decode(stream);
|
|
||||||
const text = recognizer.getResult(stream).text;
|
|
||||||
console.log(text);
|
|
||||||
|
|
||||||
stream.free();
|
|
||||||
recognizer.free();
|
|
||||||
});
|
|
||||||
|
|
||||||
readable.on('readable', function() {
|
|
||||||
let chunk;
|
|
||||||
while ((chunk = readable.read()) != null) {
|
|
||||||
const int16Samples = new Int16Array(
|
|
||||||
chunk.buffer, chunk.byteOffset,
|
|
||||||
chunk.length / Int16Array.BYTES_PER_ELEMENT);
|
|
||||||
|
|
||||||
const floatSamples = new Float32Array(int16Samples.length);
|
|
||||||
for (let i = 0; i < floatSamples.length; i++) {
|
|
||||||
floatSamples[i] = int16Samples[i] / 32768.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
buf.push(floatSamples);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|||||||
@@ -1,98 +1,32 @@
|
|||||||
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
|
||||||
const fs = require('fs');
|
|
||||||
const {Readable} = require('stream');
|
|
||||||
const wav = require('wav');
|
|
||||||
|
|
||||||
const sherpa_onnx = require('sherpa-onnx');
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
function createOfflineRecognizer() {
|
function createOfflineRecognizer() {
|
||||||
let featConfig = {
|
|
||||||
sampleRate: 16000,
|
|
||||||
featureDim: 80,
|
|
||||||
};
|
|
||||||
|
|
||||||
let modelConfig = {
|
let modelConfig = {
|
||||||
paraformer: {
|
paraformer: {
|
||||||
model: './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx',
|
model: './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx',
|
||||||
},
|
},
|
||||||
tokens: './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt',
|
tokens: './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt',
|
||||||
numThreads: 1,
|
|
||||||
debug: 0,
|
|
||||||
provider: 'cpu',
|
|
||||||
modelType: 'paraformer',
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let config = {
|
let config = {
|
||||||
featConfig: featConfig,
|
|
||||||
modelConfig: modelConfig,
|
modelConfig: modelConfig,
|
||||||
decodingMethod: 'greedy_search',
|
|
||||||
};
|
};
|
||||||
|
|
||||||
return sherpa_onnx.createOfflineRecognizer(config);
|
return sherpa_onnx.createOfflineRecognizer(config);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
const recognizer = createOfflineRecognizer();
|
const recognizer = createOfflineRecognizer();
|
||||||
const stream = recognizer.createStream();
|
const stream = recognizer.createStream();
|
||||||
|
|
||||||
const waveFilename = './sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav';
|
const waveFilename = './sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav';
|
||||||
|
const wave = sherpa_onnx.readWave(waveFilename);
|
||||||
|
stream.acceptWaveform(wave.sampleRate, wave.samples);
|
||||||
|
|
||||||
const reader = new wav.Reader();
|
recognizer.decode(stream);
|
||||||
const readable = new Readable().wrap(reader);
|
const text = recognizer.getResult(stream).text;
|
||||||
const buf = [];
|
console.log(text);
|
||||||
|
|
||||||
reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
|
stream.free();
|
||||||
if (sampleRate != recognizer.config.featConfig.sampleRate) {
|
recognizer.free();
|
||||||
throw new Error(`Only support sampleRate ${
|
|
||||||
recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (audioFormat != 1) {
|
|
||||||
throw new Error(`Only support PCM format. Given ${audioFormat}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (channels != 1) {
|
|
||||||
throw new Error(`Only a single channel. Given ${channel}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bitDepth != 16) {
|
|
||||||
throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
fs.createReadStream(waveFilename, {'highWaterMark': 4096})
|
|
||||||
.pipe(reader)
|
|
||||||
.on('finish', function(err) {
|
|
||||||
// tail padding
|
|
||||||
const floatSamples =
|
|
||||||
new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
|
|
||||||
|
|
||||||
buf.push(floatSamples);
|
|
||||||
const flattened =
|
|
||||||
Float32Array.from(buf.reduce((a, b) => [...a, ...b], []));
|
|
||||||
|
|
||||||
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
|
|
||||||
recognizer.decode(stream);
|
|
||||||
const text = recognizer.getResult(stream).text;
|
|
||||||
console.log(text);
|
|
||||||
|
|
||||||
stream.free();
|
|
||||||
recognizer.free();
|
|
||||||
});
|
|
||||||
|
|
||||||
readable.on('readable', function() {
|
|
||||||
let chunk;
|
|
||||||
while ((chunk = readable.read()) != null) {
|
|
||||||
const int16Samples = new Int16Array(
|
|
||||||
chunk.buffer, chunk.byteOffset,
|
|
||||||
chunk.length / Int16Array.BYTES_PER_ELEMENT);
|
|
||||||
|
|
||||||
const floatSamples = new Float32Array(int16Samples.length);
|
|
||||||
for (let i = 0; i < floatSamples.length; i++) {
|
|
||||||
floatSamples[i] = int16Samples[i] / 32768.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
buf.push(floatSamples);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|||||||
@@ -1,17 +1,8 @@
|
|||||||
// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)
|
// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
|
||||||
const fs = require('fs');
|
|
||||||
const {Readable} = require('stream');
|
|
||||||
const wav = require('wav');
|
|
||||||
|
|
||||||
const sherpa_onnx = require('sherpa-onnx');
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
function createOfflineRecognizer() {
|
function createOfflineRecognizer() {
|
||||||
let featConfig = {
|
|
||||||
sampleRate: 16000,
|
|
||||||
featureDim: 80,
|
|
||||||
};
|
|
||||||
|
|
||||||
let modelConfig = {
|
let modelConfig = {
|
||||||
senseVoice: {
|
senseVoice: {
|
||||||
model:
|
model:
|
||||||
@@ -20,82 +11,26 @@ function createOfflineRecognizer() {
|
|||||||
useInverseTextNormalization: 1,
|
useInverseTextNormalization: 1,
|
||||||
},
|
},
|
||||||
tokens: './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt',
|
tokens: './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt',
|
||||||
numThreads: 1,
|
|
||||||
debug: 0,
|
|
||||||
provider: 'cpu',
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let config = {
|
let config = {
|
||||||
featConfig: featConfig,
|
|
||||||
modelConfig: modelConfig,
|
modelConfig: modelConfig,
|
||||||
decodingMethod: 'greedy_search',
|
|
||||||
};
|
};
|
||||||
|
|
||||||
return sherpa_onnx.createOfflineRecognizer(config);
|
return sherpa_onnx.createOfflineRecognizer(config);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
const recognizer = createOfflineRecognizer();
|
const recognizer = createOfflineRecognizer();
|
||||||
const stream = recognizer.createStream();
|
const stream = recognizer.createStream();
|
||||||
|
|
||||||
const waveFilename =
|
const waveFilename =
|
||||||
'./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav';
|
'./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav';
|
||||||
|
const wave = sherpa_onnx.readWave(waveFilename);
|
||||||
|
stream.acceptWaveform(wave.sampleRate, wave.samples);
|
||||||
|
|
||||||
const reader = new wav.Reader();
|
recognizer.decode(stream);
|
||||||
const readable = new Readable().wrap(reader);
|
const text = recognizer.getResult(stream).text;
|
||||||
const buf = [];
|
console.log(text);
|
||||||
|
|
||||||
reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
|
stream.free();
|
||||||
if (sampleRate != recognizer.config.featConfig.sampleRate) {
|
recognizer.free();
|
||||||
throw new Error(`Only support sampleRate ${
|
|
||||||
recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (audioFormat != 1) {
|
|
||||||
throw new Error(`Only support PCM format. Given ${audioFormat}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (channels != 1) {
|
|
||||||
throw new Error(`Only a single channel. Given ${channel}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bitDepth != 16) {
|
|
||||||
throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
fs.createReadStream(waveFilename, {'highWaterMark': 4096})
|
|
||||||
.pipe(reader)
|
|
||||||
.on('finish', function(err) {
|
|
||||||
// tail padding
|
|
||||||
const floatSamples =
|
|
||||||
new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
|
|
||||||
|
|
||||||
buf.push(floatSamples);
|
|
||||||
const flattened =
|
|
||||||
Float32Array.from(buf.reduce((a, b) => [...a, ...b], []));
|
|
||||||
|
|
||||||
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
|
|
||||||
recognizer.decode(stream);
|
|
||||||
const text = recognizer.getResult(stream).text;
|
|
||||||
console.log(text);
|
|
||||||
|
|
||||||
stream.free();
|
|
||||||
recognizer.free();
|
|
||||||
});
|
|
||||||
|
|
||||||
readable.on('readable', function() {
|
|
||||||
let chunk;
|
|
||||||
while ((chunk = readable.read()) != null) {
|
|
||||||
const int16Samples = new Int16Array(
|
|
||||||
chunk.buffer, chunk.byteOffset,
|
|
||||||
chunk.length / Int16Array.BYTES_PER_ELEMENT);
|
|
||||||
|
|
||||||
const floatSamples = new Float32Array(int16Samples.length);
|
|
||||||
for (let i = 0; i < floatSamples.length; i++) {
|
|
||||||
floatSamples[i] = int16Samples[i] / 32768.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
buf.push(floatSamples);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|||||||
@@ -1,17 +1,8 @@
|
|||||||
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
//
|
//
|
||||||
const fs = require('fs');
|
|
||||||
const {Readable} = require('stream');
|
|
||||||
const wav = require('wav');
|
|
||||||
|
|
||||||
const sherpa_onnx = require('sherpa-onnx');
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
function createOfflineRecognizer() {
|
function createOfflineRecognizer() {
|
||||||
let featConfig = {
|
|
||||||
sampleRate: 16000,
|
|
||||||
featureDim: 80,
|
|
||||||
};
|
|
||||||
|
|
||||||
let modelConfig = {
|
let modelConfig = {
|
||||||
transducer: {
|
transducer: {
|
||||||
encoder:
|
encoder:
|
||||||
@@ -22,19 +13,11 @@ function createOfflineRecognizer() {
|
|||||||
'./sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.int8.onnx',
|
'./sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.int8.onnx',
|
||||||
},
|
},
|
||||||
tokens: './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt',
|
tokens: './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt',
|
||||||
numThreads: 1,
|
|
||||||
debug: 0,
|
|
||||||
provider: 'cpu',
|
|
||||||
modelType: 'transducer',
|
modelType: 'transducer',
|
||||||
};
|
};
|
||||||
|
|
||||||
let config = {
|
let config = {
|
||||||
featConfig: featConfig,
|
|
||||||
modelConfig: modelConfig,
|
modelConfig: modelConfig,
|
||||||
decodingMethod: 'greedy_search',
|
|
||||||
maxActivePaths: 4,
|
|
||||||
hotwordsFile: '',
|
|
||||||
hotwordsScore: 1.5,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
return sherpa_onnx.createOfflineRecognizer(config);
|
return sherpa_onnx.createOfflineRecognizer(config);
|
||||||
@@ -43,62 +26,12 @@ const recognizer = createOfflineRecognizer();
|
|||||||
const stream = recognizer.createStream();
|
const stream = recognizer.createStream();
|
||||||
|
|
||||||
const waveFilename = './sherpa-onnx-zipformer-en-2023-06-26/test_wavs/0.wav';
|
const waveFilename = './sherpa-onnx-zipformer-en-2023-06-26/test_wavs/0.wav';
|
||||||
|
const wave = sherpa_onnx.readWave(waveFilename);
|
||||||
|
stream.acceptWaveform(wave.sampleRate, wave.samples);
|
||||||
|
|
||||||
const reader = new wav.Reader();
|
recognizer.decode(stream);
|
||||||
const readable = new Readable().wrap(reader);
|
const text = recognizer.getResult(stream).text;
|
||||||
const buf = [];
|
console.log(text);
|
||||||
|
|
||||||
reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
|
stream.free();
|
||||||
if (sampleRate != recognizer.config.featConfig.sampleRate) {
|
recognizer.free();
|
||||||
throw new Error(`Only support sampleRate ${
|
|
||||||
recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (audioFormat != 1) {
|
|
||||||
throw new Error(`Only support PCM format. Given ${audioFormat}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (channels != 1) {
|
|
||||||
throw new Error(`Only a single channel. Given ${channel}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bitDepth != 16) {
|
|
||||||
throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
fs.createReadStream(waveFilename, {'highWaterMark': 4096})
|
|
||||||
.pipe(reader)
|
|
||||||
.on('finish', function(err) {
|
|
||||||
// tail padding
|
|
||||||
const floatSamples =
|
|
||||||
new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
|
|
||||||
|
|
||||||
buf.push(floatSamples);
|
|
||||||
const flattened =
|
|
||||||
Float32Array.from(buf.reduce((a, b) => [...a, ...b], []));
|
|
||||||
|
|
||||||
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
|
|
||||||
recognizer.decode(stream);
|
|
||||||
const text = recognizer.getResult(stream).text;
|
|
||||||
console.log(text);
|
|
||||||
|
|
||||||
stream.free();
|
|
||||||
recognizer.free();
|
|
||||||
});
|
|
||||||
|
|
||||||
readable.on('readable', function() {
|
|
||||||
let chunk;
|
|
||||||
while ((chunk = readable.read()) != null) {
|
|
||||||
const int16Samples = new Int16Array(
|
|
||||||
chunk.buffer, chunk.byteOffset,
|
|
||||||
chunk.length / Int16Array.BYTES_PER_ELEMENT);
|
|
||||||
|
|
||||||
const floatSamples = new Float32Array(int16Samples.length);
|
|
||||||
for (let i = 0; i < floatSamples.length; i++) {
|
|
||||||
floatSamples[i] = int16Samples[i] / 32768.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
buf.push(floatSamples);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|||||||
@@ -1,17 +1,8 @@
|
|||||||
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
//
|
//
|
||||||
const fs = require('fs');
|
|
||||||
const {Readable} = require('stream');
|
|
||||||
const wav = require('wav');
|
|
||||||
|
|
||||||
const sherpa_onnx = require('sherpa-onnx');
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
function createOfflineRecognizer() {
|
function createOfflineRecognizer() {
|
||||||
let featConfig = {
|
|
||||||
sampleRate: 16000,
|
|
||||||
featureDim: 80,
|
|
||||||
};
|
|
||||||
|
|
||||||
let modelConfig = {
|
let modelConfig = {
|
||||||
whisper: {
|
whisper: {
|
||||||
encoder: './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx',
|
encoder: './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx',
|
||||||
@@ -21,83 +12,25 @@ function createOfflineRecognizer() {
|
|||||||
tailPaddings: -1,
|
tailPaddings: -1,
|
||||||
},
|
},
|
||||||
tokens: './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt',
|
tokens: './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt',
|
||||||
numThreads: 1,
|
|
||||||
debug: 0,
|
|
||||||
provider: 'cpu',
|
|
||||||
modelType: 'whisper',
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let config = {
|
let config = {
|
||||||
featConfig: featConfig,
|
|
||||||
modelConfig: modelConfig,
|
modelConfig: modelConfig,
|
||||||
decodingMethod: 'greedy_search',
|
|
||||||
};
|
};
|
||||||
|
|
||||||
return sherpa_onnx.createOfflineRecognizer(config);
|
return sherpa_onnx.createOfflineRecognizer(config);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
recognizer = createOfflineRecognizer();
|
recognizer = createOfflineRecognizer();
|
||||||
stream = recognizer.createStream();
|
stream = recognizer.createStream();
|
||||||
|
|
||||||
const waveFilename = './sherpa-onnx-whisper-tiny.en/test_wavs/0.wav';
|
const waveFilename = './sherpa-onnx-whisper-tiny.en/test_wavs/0.wav';
|
||||||
|
const wave = sherpa_onnx.readWave(waveFilename);
|
||||||
|
stream.acceptWaveform(wave.sampleRate, wave.samples);
|
||||||
|
|
||||||
const reader = new wav.Reader();
|
recognizer.decode(stream);
|
||||||
const readable = new Readable().wrap(reader);
|
const text = recognizer.getResult(stream).text;
|
||||||
const buf = [];
|
console.log(text);
|
||||||
|
|
||||||
reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
|
stream.free();
|
||||||
if (sampleRate != recognizer.config.featConfig.sampleRate) {
|
recognizer.free();
|
||||||
throw new Error(`Only support sampleRate ${
|
|
||||||
recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (audioFormat != 1) {
|
|
||||||
throw new Error(`Only support PCM format. Given ${audioFormat}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (channels != 1) {
|
|
||||||
throw new Error(`Only a single channel. Given ${channel}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bitDepth != 16) {
|
|
||||||
throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
fs.createReadStream(waveFilename, {'highWaterMark': 4096})
|
|
||||||
.pipe(reader)
|
|
||||||
.on('finish', function(err) {
|
|
||||||
// tail padding
|
|
||||||
const floatSamples =
|
|
||||||
new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
|
|
||||||
|
|
||||||
buf.push(floatSamples);
|
|
||||||
const flattened =
|
|
||||||
Float32Array.from(buf.reduce((a, b) => [...a, ...b], []));
|
|
||||||
|
|
||||||
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
|
|
||||||
recognizer.decode(stream);
|
|
||||||
const text = recognizer.getResult(stream).text;
|
|
||||||
console.log(text);
|
|
||||||
|
|
||||||
stream.free();
|
|
||||||
recognizer.free();
|
|
||||||
});
|
|
||||||
|
|
||||||
readable.on('readable', function() {
|
|
||||||
let chunk;
|
|
||||||
while ((chunk = readable.read()) != null) {
|
|
||||||
const int16Samples = new Int16Array(
|
|
||||||
chunk.buffer, chunk.byteOffset,
|
|
||||||
chunk.length / Int16Array.BYTES_PER_ELEMENT);
|
|
||||||
|
|
||||||
const floatSamples = new Float32Array(int16Samples.length);
|
|
||||||
|
|
||||||
for (let i = 0; i < floatSamples.length; i++) {
|
|
||||||
floatSamples[i] = int16Samples[i] / 32768.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
buf.push(floatSamples);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|||||||
@@ -16,22 +16,10 @@ function createOnlineRecognizer() {
|
|||||||
let onlineModelConfig = {
|
let onlineModelConfig = {
|
||||||
paraformer: onlineParaformerModelConfig,
|
paraformer: onlineParaformerModelConfig,
|
||||||
tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt',
|
tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt',
|
||||||
numThreads: 1,
|
|
||||||
provider: 'cpu',
|
|
||||||
debug: 1,
|
|
||||||
modelType: 'paraformer',
|
|
||||||
};
|
|
||||||
|
|
||||||
let featureConfig = {
|
|
||||||
sampleRate: 16000,
|
|
||||||
featureDim: 80,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let recognizerConfig = {
|
let recognizerConfig = {
|
||||||
featConfig: featureConfig,
|
|
||||||
modelConfig: onlineModelConfig,
|
modelConfig: onlineModelConfig,
|
||||||
decodingMethod: 'greedy_search',
|
|
||||||
maxActivePaths: 4,
|
|
||||||
enableEndpoint: 1,
|
enableEndpoint: 1,
|
||||||
rule1MinTrailingSilence: 2.4,
|
rule1MinTrailingSilence: 2.4,
|
||||||
rule2MinTrailingSilence: 1.2,
|
rule2MinTrailingSilence: 1.2,
|
||||||
|
|||||||
@@ -17,26 +17,10 @@ function createOnlineRecognizer() {
|
|||||||
let onlineModelConfig = {
|
let onlineModelConfig = {
|
||||||
paraformer: onlineParaformerModelConfig,
|
paraformer: onlineParaformerModelConfig,
|
||||||
tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt',
|
tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt',
|
||||||
numThreads: 1,
|
|
||||||
provider: 'cpu',
|
|
||||||
debug: 1,
|
|
||||||
modelType: 'paraformer',
|
|
||||||
};
|
|
||||||
|
|
||||||
let featureConfig = {
|
|
||||||
sampleRate: 16000,
|
|
||||||
featureDim: 80,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let recognizerConfig = {
|
let recognizerConfig = {
|
||||||
featConfig: featureConfig,
|
|
||||||
modelConfig: onlineModelConfig,
|
modelConfig: onlineModelConfig,
|
||||||
decodingMethod: 'greedy_search',
|
|
||||||
maxActivePaths: 4,
|
|
||||||
enableEndpoint: 1,
|
|
||||||
rule1MinTrailingSilence: 2.4,
|
|
||||||
rule2MinTrailingSilence: 1.2,
|
|
||||||
rule3MinUtteranceLength: 20,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
|
return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
|
||||||
|
|||||||
@@ -20,26 +20,10 @@ function createOnlineRecognizer() {
|
|||||||
transducer: onlineTransducerModelConfig,
|
transducer: onlineTransducerModelConfig,
|
||||||
tokens:
|
tokens:
|
||||||
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt',
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt',
|
||||||
numThreads: 1,
|
|
||||||
provider: 'cpu',
|
|
||||||
debug: 1,
|
|
||||||
modelType: 'zipformer',
|
|
||||||
};
|
|
||||||
|
|
||||||
let featureConfig = {
|
|
||||||
sampleRate: 16000,
|
|
||||||
featureDim: 80,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let recognizerConfig = {
|
let recognizerConfig = {
|
||||||
featConfig: featureConfig,
|
|
||||||
modelConfig: onlineModelConfig,
|
modelConfig: onlineModelConfig,
|
||||||
decodingMethod: 'greedy_search',
|
|
||||||
maxActivePaths: 4,
|
|
||||||
enableEndpoint: 1,
|
|
||||||
rule1MinTrailingSilence: 2.4,
|
|
||||||
rule2MinTrailingSilence: 1.2,
|
|
||||||
rule3MinUtteranceLength: 20,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
|
return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
|
||||||
|
|||||||
124
nodejs-examples/test-vad-with-non-streaming-asr-whisper.js
Normal file
124
nodejs-examples/test-vad-with-non-streaming-asr-whisper.js
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
|
||||||
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
|
function createRecognizer() {
|
||||||
|
// Please download test files from
|
||||||
|
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
const config = {
|
||||||
|
'modelConfig': {
|
||||||
|
'whisper': {
|
||||||
|
'encoder': './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx',
|
||||||
|
'decoder': './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx',
|
||||||
|
'tailPaddings': 2000,
|
||||||
|
},
|
||||||
|
'tokens': './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt',
|
||||||
|
'debug': 0,
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return sherpa_onnx.createOfflineRecognizer(config);
|
||||||
|
}
|
||||||
|
|
||||||
|
function createVad() {
|
||||||
|
// please download silero_vad.onnx from
|
||||||
|
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
||||||
|
const config = {
|
||||||
|
sileroVad: {
|
||||||
|
model: './silero_vad.onnx',
|
||||||
|
threshold: 0.5,
|
||||||
|
minSpeechDuration: 0.25,
|
||||||
|
minSilenceDuration: 0.5,
|
||||||
|
windowSize: 512,
|
||||||
|
},
|
||||||
|
sampleRate: 16000,
|
||||||
|
debug: true,
|
||||||
|
numThreads: 1,
|
||||||
|
bufferSizeInSeconds: 60,
|
||||||
|
};
|
||||||
|
|
||||||
|
return sherpa_onnx.createVad(config);
|
||||||
|
}
|
||||||
|
|
||||||
|
const recognizer = createRecognizer();
|
||||||
|
const vad = createVad();
|
||||||
|
|
||||||
|
// please download ./Obama.wav from
|
||||||
|
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
const waveFilename = './Obama.wav';
|
||||||
|
const wave = sherpa_onnx.readWave(waveFilename);
|
||||||
|
|
||||||
|
if (wave.sampleRate != recognizer.config.featConfig.sampleRate) {
|
||||||
|
throw new Error(
|
||||||
|
'Expected sample rate: ${recognizer.config.featConfig.sampleRate}. Given: ${wave.sampleRate}');
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('Started')
|
||||||
|
let start = Date.now();
|
||||||
|
|
||||||
|
const windowSize = vad.config.sileroVad.windowSize;
|
||||||
|
for (let i = 0; i < wave.samples.length; i += windowSize) {
|
||||||
|
const thisWindow = wave.samples.subarray(i, i + windowSize);
|
||||||
|
vad.acceptWaveform(thisWindow);
|
||||||
|
|
||||||
|
while (!vad.isEmpty()) {
|
||||||
|
const segment = vad.front();
|
||||||
|
vad.pop();
|
||||||
|
|
||||||
|
let start_time = segment.start / wave.sampleRate;
|
||||||
|
let end_time = start_time + segment.samples.length / wave.sampleRate;
|
||||||
|
|
||||||
|
start_time = start_time.toFixed(2);
|
||||||
|
end_time = end_time.toFixed(2);
|
||||||
|
|
||||||
|
const stream = recognizer.createStream();
|
||||||
|
stream.acceptWaveform(wave.sampleRate, segment.samples);
|
||||||
|
|
||||||
|
recognizer.decode(stream);
|
||||||
|
const r = recognizer.getResult(stream);
|
||||||
|
if (r.text.length > 0) {
|
||||||
|
const text = r.text.toLowerCase().trim();
|
||||||
|
console.log(`${start_time} -- ${end_time}: ${text}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
stream.free();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
vad.flush();
|
||||||
|
|
||||||
|
while (!vad.isEmpty()) {
|
||||||
|
const segment = vad.front();
|
||||||
|
vad.pop();
|
||||||
|
|
||||||
|
let start_time = segment.start / wave.sampleRate;
|
||||||
|
let end_time = start_time + segment.samples.length / wave.sampleRate;
|
||||||
|
|
||||||
|
start_time = start_time.toFixed(2);
|
||||||
|
end_time = end_time.toFixed(2);
|
||||||
|
|
||||||
|
const stream = recognizer.createStream();
|
||||||
|
stream.acceptWaveform(wave.sampleRate, segment.samples);
|
||||||
|
|
||||||
|
recognizer.decode(stream);
|
||||||
|
const r = recognizer.getResult(stream);
|
||||||
|
if (r.text.length > 0) {
|
||||||
|
const text = r.text.toLowerCase().trim();
|
||||||
|
console.log(`${start_time} -- ${end_time}: ${text}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let stop = Date.now();
|
||||||
|
console.log('Done')
|
||||||
|
|
||||||
|
const elapsed_seconds = (stop - start) / 1000;
|
||||||
|
const duration = wave.samples.length / wave.sampleRate;
|
||||||
|
const real_time_factor = elapsed_seconds / duration;
|
||||||
|
console.log('Wave duration', duration.toFixed(3), 'seconds')
|
||||||
|
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds')
|
||||||
|
console.log(
|
||||||
|
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
|
||||||
|
real_time_factor.toFixed(3))
|
||||||
|
|
||||||
|
vad.free();
|
||||||
|
recognizer.free();
|
||||||
2
scripts/nodejs/.gitignore
vendored
2
scripts/nodejs/.gitignore
vendored
@@ -1,2 +1,4 @@
|
|||||||
node_modules
|
node_modules
|
||||||
jslint.mjs
|
jslint.mjs
|
||||||
|
sherpa-onnx-*.js
|
||||||
|
sherpa-onnx-*.wasm
|
||||||
|
|||||||
@@ -4,6 +4,9 @@
|
|||||||
const wasmModule = require('./sherpa-onnx-wasm-nodejs.js')();
|
const wasmModule = require('./sherpa-onnx-wasm-nodejs.js')();
|
||||||
const sherpa_onnx_asr = require('./sherpa-onnx-asr.js');
|
const sherpa_onnx_asr = require('./sherpa-onnx-asr.js');
|
||||||
const sherpa_onnx_tts = require('./sherpa-onnx-tts.js');
|
const sherpa_onnx_tts = require('./sherpa-onnx-tts.js');
|
||||||
|
const sherpa_onnx_kws = require('./sherpa-onnx-kws.js');
|
||||||
|
const sherpa_onnx_wave = require('./sherpa-onnx-wave.js');
|
||||||
|
const sherpa_onnx_vad = require('./sherpa-onnx-vad.js');
|
||||||
|
|
||||||
function createOnlineRecognizer(config) {
|
function createOnlineRecognizer(config) {
|
||||||
return sherpa_onnx_asr.createOnlineRecognizer(wasmModule, config);
|
return sherpa_onnx_asr.createOnlineRecognizer(wasmModule, config);
|
||||||
@@ -17,10 +20,35 @@ function createOfflineTts(config) {
|
|||||||
return sherpa_onnx_tts.createOfflineTts(wasmModule, config);
|
return sherpa_onnx_tts.createOfflineTts(wasmModule, config);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function createKws(config) {
|
||||||
|
return sherpa_onnx_kws.createKws(wasmModule, config);
|
||||||
|
}
|
||||||
|
|
||||||
|
function createCircularBuffer(capacity) {
|
||||||
|
return new sherpa_onnx_vad.CircularBuffer(capacity, wasmModule);
|
||||||
|
}
|
||||||
|
|
||||||
|
function createVad(config) {
|
||||||
|
return sherpa_onnx_vad.createVad(wasmModule, config);
|
||||||
|
}
|
||||||
|
|
||||||
|
function readWave(filename) {
|
||||||
|
return sherpa_onnx_wave.readWave(filename, wasmModule);
|
||||||
|
}
|
||||||
|
|
||||||
|
function writeWave(filename, data) {
|
||||||
|
sherpa_onnx_wave.writeWave(filename, data, wasmModule);
|
||||||
|
}
|
||||||
|
|
||||||
// Note: online means streaming and offline means non-streaming here.
|
// Note: online means streaming and offline means non-streaming here.
|
||||||
// Both of them don't require internet connection.
|
// Both of them don't require internet connection.
|
||||||
module.exports = {
|
module.exports = {
|
||||||
createOnlineRecognizer,
|
createOnlineRecognizer,
|
||||||
createOfflineRecognizer,
|
createOfflineRecognizer,
|
||||||
createOfflineTts,
|
createOfflineTts,
|
||||||
|
createKws,
|
||||||
|
readWave,
|
||||||
|
writeWave,
|
||||||
|
createCircularBuffer,
|
||||||
|
createVad,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -546,7 +546,7 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
|
|||||||
Module.setValue(ptr + 12, buffer + offset, 'i8*');
|
Module.setValue(ptr + 12, buffer + offset, 'i8*');
|
||||||
offset += taskLen;
|
offset += taskLen;
|
||||||
|
|
||||||
Module.setValue(ptr + 16, config.tailPaddings || -1, 'i32');
|
Module.setValue(ptr + 16, config.tailPaddings || 2000, 'i32');
|
||||||
|
|
||||||
return {
|
return {
|
||||||
buffer: buffer, ptr: ptr, len: len,
|
buffer: buffer, ptr: ptr, len: len,
|
||||||
|
|||||||
@@ -69,13 +69,14 @@ function initModelConfig(config, Module) {
|
|||||||
|
|
||||||
const len = transducer.len + paraformer_len + ctc_len + 7 * 4;
|
const len = transducer.len + paraformer_len + ctc_len + 7 * 4;
|
||||||
const ptr = Module._malloc(len);
|
const ptr = Module._malloc(len);
|
||||||
|
Module.HEAPU8.fill(0, ptr, ptr + len);
|
||||||
|
|
||||||
let offset = 0;
|
let offset = 0;
|
||||||
Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset);
|
Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset);
|
||||||
|
|
||||||
const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1;
|
const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1;
|
||||||
const providerLen = Module.lengthBytesUTF8(config.provider) + 1;
|
const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1;
|
||||||
const modelTypeLen = Module.lengthBytesUTF8(config.modelType) + 1;
|
const modelTypeLen = Module.lengthBytesUTF8(config.modelType || '') + 1;
|
||||||
const modelingUnitLen = Module.lengthBytesUTF8(config.modelingUnit || '') + 1;
|
const modelingUnitLen = Module.lengthBytesUTF8(config.modelingUnit || '') + 1;
|
||||||
const bpeVocabLen = Module.lengthBytesUTF8(config.bpeVocab || '') + 1;
|
const bpeVocabLen = Module.lengthBytesUTF8(config.bpeVocab || '') + 1;
|
||||||
const bufferLen =
|
const bufferLen =
|
||||||
@@ -86,10 +87,10 @@ function initModelConfig(config, Module) {
|
|||||||
Module.stringToUTF8(config.tokens, buffer, tokensLen);
|
Module.stringToUTF8(config.tokens, buffer, tokensLen);
|
||||||
offset += tokensLen;
|
offset += tokensLen;
|
||||||
|
|
||||||
Module.stringToUTF8(config.provider, buffer + offset, providerLen);
|
Module.stringToUTF8(config.provider || 'cpu', buffer + offset, providerLen);
|
||||||
offset += providerLen;
|
offset += providerLen;
|
||||||
|
|
||||||
Module.stringToUTF8(config.modelType, buffer + offset, modelTypeLen);
|
Module.stringToUTF8(config.modelType || '', buffer + offset, modelTypeLen);
|
||||||
offset += modelTypeLen;
|
offset += modelTypeLen;
|
||||||
|
|
||||||
Module.stringToUTF8(
|
Module.stringToUTF8(
|
||||||
@@ -103,7 +104,7 @@ function initModelConfig(config, Module) {
|
|||||||
Module.setValue(ptr + offset, buffer, 'i8*'); // tokens
|
Module.setValue(ptr + offset, buffer, 'i8*'); // tokens
|
||||||
offset += 4;
|
offset += 4;
|
||||||
|
|
||||||
Module.setValue(ptr + offset, config.numThreads, 'i32');
|
Module.setValue(ptr + offset, config.numThreads || 1, 'i32');
|
||||||
offset += 4;
|
offset += 4;
|
||||||
|
|
||||||
Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider
|
Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider
|
||||||
@@ -134,14 +135,21 @@ function initModelConfig(config, Module) {
|
|||||||
|
|
||||||
function initFeatureExtractorConfig(config, Module) {
|
function initFeatureExtractorConfig(config, Module) {
|
||||||
let ptr = Module._malloc(4 * 2);
|
let ptr = Module._malloc(4 * 2);
|
||||||
Module.setValue(ptr, config.samplingRate, 'i32');
|
Module.setValue(ptr, config.samplingRate || 16000, 'i32');
|
||||||
Module.setValue(ptr + 4, config.featureDim, 'i32');
|
Module.setValue(ptr + 4, config.featureDim || 80, 'i32');
|
||||||
return {
|
return {
|
||||||
ptr: ptr, len: 8,
|
ptr: ptr, len: 8,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function initKwsConfig(config, Module) {
|
function initKwsConfig(config, Module) {
|
||||||
|
if (!('featConfig' in config)) {
|
||||||
|
config.featConfig = {
|
||||||
|
sampleRate: 16000,
|
||||||
|
featureDim: 80,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
let featConfig = initFeatureExtractorConfig(config.featConfig, Module);
|
let featConfig = initFeatureExtractorConfig(config.featConfig, Module);
|
||||||
|
|
||||||
let modelConfig = initModelConfig(config.modelConfig, Module);
|
let modelConfig = initModelConfig(config.modelConfig, Module);
|
||||||
@@ -155,16 +163,16 @@ function initKwsConfig(config, Module) {
|
|||||||
Module._CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset)
|
Module._CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset)
|
||||||
offset += modelConfig.len;
|
offset += modelConfig.len;
|
||||||
|
|
||||||
Module.setValue(ptr + offset, config.maxActivePaths, 'i32');
|
Module.setValue(ptr + offset, config.maxActivePaths || 4, 'i32');
|
||||||
offset += 4;
|
offset += 4;
|
||||||
|
|
||||||
Module.setValue(ptr + offset, config.numTrailingBlanks, 'i32');
|
Module.setValue(ptr + offset, config.numTrailingBlanks || 1, 'i32');
|
||||||
offset += 4;
|
offset += 4;
|
||||||
|
|
||||||
Module.setValue(ptr + offset, config.keywordsScore, 'float');
|
Module.setValue(ptr + offset, config.keywordsScore || 1.0, 'float');
|
||||||
offset += 4;
|
offset += 4;
|
||||||
|
|
||||||
Module.setValue(ptr + offset, config.keywordsThreshold, 'float');
|
Module.setValue(ptr + offset, config.keywordsThreshold || 0.25, 'float');
|
||||||
offset += 4;
|
offset += 4;
|
||||||
|
|
||||||
let keywordsLen = Module.lengthBytesUTF8(config.keywords) + 1;
|
let keywordsLen = Module.lengthBytesUTF8(config.keywords) + 1;
|
||||||
|
|||||||
@@ -49,6 +49,32 @@ set(exported_functions
|
|||||||
SherpaOnnxDestroyKeywordSpotter
|
SherpaOnnxDestroyKeywordSpotter
|
||||||
SherpaOnnxGetKeywordResult
|
SherpaOnnxGetKeywordResult
|
||||||
SherpaOnnxIsKeywordStreamReady
|
SherpaOnnxIsKeywordStreamReady
|
||||||
|
# VAD
|
||||||
|
SherpaOnnxCreateCircularBuffer
|
||||||
|
SherpaOnnxDestroyCircularBuffer
|
||||||
|
SherpaOnnxCircularBufferPush
|
||||||
|
SherpaOnnxCircularBufferGet
|
||||||
|
SherpaOnnxCircularBufferFree
|
||||||
|
SherpaOnnxCircularBufferPop
|
||||||
|
SherpaOnnxCircularBufferSize
|
||||||
|
SherpaOnnxCircularBufferHead
|
||||||
|
SherpaOnnxCircularBufferReset
|
||||||
|
SherpaOnnxCreateVoiceActivityDetector
|
||||||
|
SherpaOnnxDestroyVoiceActivityDetector
|
||||||
|
SherpaOnnxVoiceActivityDetectorAcceptWaveform
|
||||||
|
SherpaOnnxVoiceActivityDetectorEmpty
|
||||||
|
SherpaOnnxVoiceActivityDetectorDetected
|
||||||
|
SherpaOnnxVoiceActivityDetectorPop
|
||||||
|
SherpaOnnxVoiceActivityDetectorClear
|
||||||
|
SherpaOnnxVoiceActivityDetectorFront
|
||||||
|
SherpaOnnxDestroySpeechSegment
|
||||||
|
SherpaOnnxVoiceActivityDetectorReset
|
||||||
|
SherpaOnnxVoiceActivityDetectorFlush
|
||||||
|
#
|
||||||
|
SherpaOnnxFileExists
|
||||||
|
SherpaOnnxReadWave
|
||||||
|
SherpaOnnxFreeWave
|
||||||
|
SherpaOnnxWriteWave
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -82,6 +108,8 @@ install(
|
|||||||
${CMAKE_SOURCE_DIR}/wasm/asr/sherpa-onnx-asr.js
|
${CMAKE_SOURCE_DIR}/wasm/asr/sherpa-onnx-asr.js
|
||||||
${CMAKE_SOURCE_DIR}/wasm/tts/sherpa-onnx-tts.js
|
${CMAKE_SOURCE_DIR}/wasm/tts/sherpa-onnx-tts.js
|
||||||
${CMAKE_SOURCE_DIR}/wasm/kws/sherpa-onnx-kws.js
|
${CMAKE_SOURCE_DIR}/wasm/kws/sherpa-onnx-kws.js
|
||||||
|
${CMAKE_SOURCE_DIR}/wasm/vad/sherpa-onnx-vad.js
|
||||||
|
${CMAKE_SOURCE_DIR}/wasm/nodejs/sherpa-onnx-wave.js
|
||||||
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.js"
|
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.js"
|
||||||
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.wasm"
|
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.wasm"
|
||||||
DESTINATION
|
DESTINATION
|
||||||
|
|||||||
57
wasm/nodejs/sherpa-onnx-wave.js
Normal file
57
wasm/nodejs/sherpa-onnx-wave.js
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
// return an object
|
||||||
|
// {
|
||||||
|
// samples: a float32 array
|
||||||
|
// sampleRate: an integer
|
||||||
|
// }
|
||||||
|
function readWave(filename, Module) {
|
||||||
|
const filenameLen = Module.lengthBytesUTF8(filename) + 1;
|
||||||
|
const pFilename = Module._malloc(filenameLen);
|
||||||
|
Module.stringToUTF8(filename, pFilename, filenameLen);
|
||||||
|
|
||||||
|
const w = Module._SherpaOnnxReadWave(pFilename);
|
||||||
|
Module._free(pFilename);
|
||||||
|
|
||||||
|
|
||||||
|
const samplesPtr = Module.HEAP32[w / 4] / 4;
|
||||||
|
const sampleRate = Module.HEAP32[w / 4 + 1];
|
||||||
|
const numSamples = Module.HEAP32[w / 4 + 2];
|
||||||
|
|
||||||
|
const samples = new Float32Array(numSamples);
|
||||||
|
for (let i = 0; i < numSamples; i++) {
|
||||||
|
samples[i] = Module.HEAPF32[samplesPtr + i];
|
||||||
|
}
|
||||||
|
|
||||||
|
Module._SherpaOnnxFreeWave(w);
|
||||||
|
|
||||||
|
|
||||||
|
return {samples: samples, sampleRate: sampleRate};
|
||||||
|
}
|
||||||
|
|
||||||
|
// data is an object
|
||||||
|
// {
|
||||||
|
// samples: a float32 array
|
||||||
|
// sampleRate: an integer
|
||||||
|
// }
|
||||||
|
function writeWave(filename, data, Module) {
|
||||||
|
const pSamples =
|
||||||
|
Module._malloc(data.samples.length * data.samples.BYTES_PER_ELEMENT);
|
||||||
|
Module.HEAPF32.set(data.samples, pSamples / data.samples.BYTES_PER_ELEMENT);
|
||||||
|
|
||||||
|
const filenameLen = Module.lengthBytesUTF8(filename) + 1;
|
||||||
|
const pFilename = Module._malloc(filenameLen);
|
||||||
|
Module.stringToUTF8(filename, pFilename, filenameLen);
|
||||||
|
|
||||||
|
Module._SherpaOnnxWriteWave(
|
||||||
|
pSamples, data.samples.length, data.sampleRate, pFilename);
|
||||||
|
|
||||||
|
Module._free(pFilename);
|
||||||
|
Module._free(pSamples);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (typeof process == 'object' && typeof process.versions == 'object' &&
|
||||||
|
typeof process.versions.node == 'string') {
|
||||||
|
module.exports = {
|
||||||
|
readWave,
|
||||||
|
writeWave,
|
||||||
|
};
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user