Add WebAssembly for NodeJS. (#628)
This commit is contained in:
41
.github/workflows/npm.yaml
vendored
41
.github/workflows/npm.yaml
vendored
@@ -9,6 +9,7 @@ concurrency:
|
|||||||
|
|
||||||
permissions:
|
permissions:
|
||||||
contents: read
|
contents: read
|
||||||
|
id-token: write
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
nodejs:
|
nodejs:
|
||||||
@@ -20,10 +21,20 @@ jobs:
|
|||||||
python-version: ["3.8"]
|
python-version: ["3.8"]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Install emsdk
|
||||||
|
uses: mymindstorm/setup-emsdk@v14
|
||||||
|
|
||||||
|
- name: View emsdk version
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
emcc -v
|
||||||
|
echo "--------------------"
|
||||||
|
emcc --check
|
||||||
|
|
||||||
- name: Setup Python ${{ matrix.python-version }}
|
- name: Setup Python ${{ matrix.python-version }}
|
||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
@@ -31,28 +42,38 @@ jobs:
|
|||||||
|
|
||||||
- uses: actions/setup-node@v4
|
- uses: actions/setup-node@v4
|
||||||
with:
|
with:
|
||||||
node-version: 13
|
|
||||||
registry-url: 'https://registry.npmjs.org'
|
registry-url: 'https://registry.npmjs.org'
|
||||||
|
|
||||||
- name: Display node version
|
- name: Display node version
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
node --version
|
node --version
|
||||||
npm --version
|
|
||||||
cd nodejs-examples
|
|
||||||
|
|
||||||
npm install npm@6.14.4 -g
|
|
||||||
npm install npm@6.14.4
|
|
||||||
npm --version
|
|
||||||
|
|
||||||
- name: Build nodejs package
|
- name: Build nodejs package
|
||||||
shell: bash
|
shell: bash
|
||||||
env:
|
env:
|
||||||
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
||||||
run: |
|
run: |
|
||||||
|
./build-wasm-simd-nodejs.sh
|
||||||
|
cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.js ./scripts/nodejs/
|
||||||
|
cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.wasm ./scripts/nodejs/
|
||||||
|
|
||||||
|
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
|
||||||
|
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
|
||||||
|
|
||||||
cd scripts/nodejs
|
cd scripts/nodejs
|
||||||
./run.sh
|
|
||||||
|
owner=${{ github.repository_owner }}
|
||||||
|
echo "owner: $owner"
|
||||||
|
|
||||||
|
sed -i.bak s/SHERPA_ONNX_VERSION/$SHERPA_ONNX_VERSION/g ./package.json
|
||||||
|
sed -i.bak s/k2-fsa/$owner/g ./package.json
|
||||||
|
|
||||||
|
rm package.json.bak
|
||||||
|
|
||||||
|
git diff
|
||||||
|
|
||||||
npm install
|
npm install
|
||||||
rm run.sh
|
|
||||||
npm ci
|
npm ci
|
||||||
|
# see https://docs.npmjs.com/generating-provenance-statements
|
||||||
npm publish --provenance --access public
|
npm publish --provenance --access public
|
||||||
|
|||||||
1
.github/workflows/test-nodejs-npm.yaml
vendored
1
.github/workflows/test-nodejs-npm.yaml
vendored
@@ -40,7 +40,6 @@ jobs:
|
|||||||
|
|
||||||
- uses: actions/setup-node@v4
|
- uses: actions/setup-node@v4
|
||||||
with:
|
with:
|
||||||
node-version: 13
|
|
||||||
registry-url: 'https://registry.npmjs.org'
|
registry-url: 'https://registry.npmjs.org'
|
||||||
|
|
||||||
- name: Display node version
|
- name: Display node version
|
||||||
|
|||||||
64
.github/workflows/test-nodejs.yaml
vendored
64
.github/workflows/test-nodejs.yaml
vendored
@@ -24,7 +24,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
os: [ubuntu-latest, macos-latest] #, windows-2019]
|
os: [ubuntu-latest] #, macos-latest] #, windows-2019]
|
||||||
python-version: ["3.8"]
|
python-version: ["3.8"]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
@@ -32,49 +32,38 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
|
||||||
- name: ccache
|
- name: Install emsdk
|
||||||
uses: hendrikmuhs/ccache-action@v1.2
|
uses: mymindstorm/setup-emsdk@v14
|
||||||
with:
|
|
||||||
key: ${{ matrix.os }}-Release-ON
|
|
||||||
|
|
||||||
- name: Configure CMake
|
- name: View emsdk version
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
export CMAKE_CXX_COMPILER_LAUNCHER=ccache
|
emcc -v
|
||||||
export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
|
echo "--------------------"
|
||||||
cmake --version
|
emcc --check
|
||||||
|
|
||||||
mkdir build
|
|
||||||
cd build
|
|
||||||
cmake -D CMAKE_BUILD_TYPE=Release -D BUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=./install ..
|
|
||||||
cmake --build . --target install --config Release
|
|
||||||
|
|
||||||
ls -lh install/lib
|
|
||||||
|
|
||||||
- name: Setup Python ${{ matrix.python-version }}
|
- name: Setup Python ${{ matrix.python-version }}
|
||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
|
|
||||||
- name: Copy files
|
- uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
registry-url: 'https://registry.npmjs.org'
|
||||||
|
|
||||||
|
- name: Display node version
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
os=${{ matrix.os }}
|
node --version
|
||||||
if [[ $os == 'ubuntu-latest' ]]; then
|
|
||||||
mkdir -p scripts/nodejs/lib/linux-x64
|
|
||||||
dst=scripts/nodejs/lib/linux-x64
|
|
||||||
elif [[ $os == 'macos-latest' ]]; then
|
|
||||||
mkdir -p scripts/nodejs/lib/osx-x64
|
|
||||||
dst=scripts/nodejs/lib/osx-x64
|
|
||||||
elif [[ $os == 'windows-2019' ]]; then
|
|
||||||
mkdir -p scripts/nodejs/lib/win-x64
|
|
||||||
dst=scripts/nodejs/lib/win-x64
|
|
||||||
fi
|
|
||||||
ls -lh build/install/lib/
|
|
||||||
|
|
||||||
rm -rf build/install/lib/pkgconfig
|
- name: Build nodejs package
|
||||||
|
shell: bash
|
||||||
cp -v build/install/lib/* $dst/
|
env:
|
||||||
|
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
||||||
|
run: |
|
||||||
|
./build-wasm-simd-nodejs.sh
|
||||||
|
cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.js ./scripts/nodejs/
|
||||||
|
cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.wasm ./scripts/nodejs/
|
||||||
|
|
||||||
- name: replace files
|
- name: replace files
|
||||||
shell: bash
|
shell: bash
|
||||||
@@ -89,17 +78,6 @@ jobs:
|
|||||||
git diff
|
git diff
|
||||||
cp *.js ../scripts/nodejs
|
cp *.js ../scripts/nodejs
|
||||||
|
|
||||||
- uses: actions/setup-node@v4
|
|
||||||
with:
|
|
||||||
node-version: 13
|
|
||||||
registry-url: 'https://registry.npmjs.org'
|
|
||||||
|
|
||||||
- name: Display node version
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
node --version
|
|
||||||
npm --version
|
|
||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ option(SHERPA_ONNX_ENABLE_GPU "Enable ONNX Runtime GPU support" OFF)
|
|||||||
option(SHERPA_ONNX_ENABLE_WASM "Whether to enable WASM" OFF)
|
option(SHERPA_ONNX_ENABLE_WASM "Whether to enable WASM" OFF)
|
||||||
option(SHERPA_ONNX_ENABLE_WASM_TTS "Whether to enable WASM for TTS" OFF)
|
option(SHERPA_ONNX_ENABLE_WASM_TTS "Whether to enable WASM for TTS" OFF)
|
||||||
option(SHERPA_ONNX_ENABLE_WASM_ASR "Whether to enable WASM for ASR" OFF)
|
option(SHERPA_ONNX_ENABLE_WASM_ASR "Whether to enable WASM for ASR" OFF)
|
||||||
|
option(SHERPA_ONNX_ENABLE_WASM_NODEJS "Whether to enable WASM for NodeJS" OFF)
|
||||||
option(SHERPA_ONNX_ENABLE_BINARY "Whether to build binaries" ON)
|
option(SHERPA_ONNX_ENABLE_BINARY "Whether to build binaries" ON)
|
||||||
option(SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY "True to link libstdc++ statically. Used only when BUILD_SHARED_LIBS is OFF on Linux" ON)
|
option(SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY "True to link libstdc++ statically. Used only when BUILD_SHARED_LIBS is OFF on Linux" ON)
|
||||||
|
|
||||||
@@ -108,6 +109,7 @@ message(STATUS "SHERPA_ONNX_ENABLE_GPU ${SHERPA_ONNX_ENABLE_GPU}")
|
|||||||
message(STATUS "SHERPA_ONNX_ENABLE_WASM ${SHERPA_ONNX_ENABLE_WASM}")
|
message(STATUS "SHERPA_ONNX_ENABLE_WASM ${SHERPA_ONNX_ENABLE_WASM}")
|
||||||
message(STATUS "SHERPA_ONNX_ENABLE_WASM_TTS ${SHERPA_ONNX_ENABLE_WASM_TTS}")
|
message(STATUS "SHERPA_ONNX_ENABLE_WASM_TTS ${SHERPA_ONNX_ENABLE_WASM_TTS}")
|
||||||
message(STATUS "SHERPA_ONNX_ENABLE_WASM_ASR ${SHERPA_ONNX_ENABLE_WASM_ASR}")
|
message(STATUS "SHERPA_ONNX_ENABLE_WASM_ASR ${SHERPA_ONNX_ENABLE_WASM_ASR}")
|
||||||
|
message(STATUS "SHERPA_ONNX_ENABLE_WASM_NODEJS ${SHERPA_ONNX_ENABLE_WASM_NODEJS}")
|
||||||
|
|
||||||
if(SHERPA_ONNX_ENABLE_WASM_TTS)
|
if(SHERPA_ONNX_ENABLE_WASM_TTS)
|
||||||
if(NOT SHERPA_ONNX_ENABLE_WASM)
|
if(NOT SHERPA_ONNX_ENABLE_WASM)
|
||||||
@@ -121,6 +123,12 @@ if(SHERPA_ONNX_ENABLE_WASM_ASR)
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(SHERPA_ONNX_ENABLE_WASM_NODEJS)
|
||||||
|
if(NOT SHERPA_ONNX_ENABLE_WASM)
|
||||||
|
message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for NodeJS")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
if(SHERPA_ONNX_ENABLE_WASM)
|
if(SHERPA_ONNX_ENABLE_WASM)
|
||||||
add_definitions(-DSHERPA_ONNX_ENABLE_WASM=1)
|
add_definitions(-DSHERPA_ONNX_ENABLE_WASM=1)
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
63
build-wasm-simd-nodejs.sh
Executable file
63
build-wasm-simd-nodejs.sh
Executable file
@@ -0,0 +1,63 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Copyright (c) 2024 Xiaomi Corporation
|
||||||
|
#
|
||||||
|
# This script is to build sherpa-onnx for WebAssembly (NodeJS)
|
||||||
|
#
|
||||||
|
# Please use NodeJS >= 18
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
if [ x"$EMSCRIPTEN" == x"" ]; then
|
||||||
|
if ! command -v emcc &> /dev/null; then
|
||||||
|
echo "Please install emscripten first"
|
||||||
|
echo ""
|
||||||
|
echo "You can use the following commands to install it:"
|
||||||
|
echo ""
|
||||||
|
echo "git clone https://github.com/emscripten-core/emsdk.git"
|
||||||
|
echo "cd emsdk"
|
||||||
|
echo "git pull"
|
||||||
|
echo "./emsdk install latest"
|
||||||
|
echo "./emsdk activate latest"
|
||||||
|
echo "source ./emsdk_env.sh"
|
||||||
|
exit 1
|
||||||
|
else
|
||||||
|
EMSCRIPTEN=$(dirname $(realpath $(which emcc)))
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
export EMSCRIPTEN=$EMSCRIPTEN
|
||||||
|
echo "EMSCRIPTEN: $EMSCRIPTEN"
|
||||||
|
if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then
|
||||||
|
echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake"
|
||||||
|
echo "Please make sure you have installed emsdk correctly"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
mkdir -p build-wasm-simd-nodejs
|
||||||
|
pushd build-wasm-simd-nodejs
|
||||||
|
|
||||||
|
export SHERPA_ONNX_IS_USING_BUILD_WASM_SH=ON
|
||||||
|
|
||||||
|
cmake \
|
||||||
|
-DCMAKE_INSTALL_PREFIX=./install \
|
||||||
|
-DCMAKE_BUILD_TYPE=Release \
|
||||||
|
-DCMAKE_TOOLCHAIN_FILE=$EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake \
|
||||||
|
\
|
||||||
|
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||||
|
-DBUILD_SHARED_LIBS=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_JNI=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_C_API=ON \
|
||||||
|
-DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_GPU=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_WASM=ON \
|
||||||
|
-DSHERPA_ONNX_ENABLE_WASM_NODEJS=ON \
|
||||||
|
-DSHERPA_ONNX_ENABLE_BINARY=OFF \
|
||||||
|
-DSHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY=OFF \
|
||||||
|
..
|
||||||
|
make -j10
|
||||||
|
make install
|
||||||
|
|
||||||
|
ls -lh install/bin/wasm/nodejs
|
||||||
1
nodejs-examples/.gitignore
vendored
1
nodejs-examples/.gitignore
vendored
@@ -1,3 +1,4 @@
|
|||||||
node_modules
|
node_modules
|
||||||
lib
|
lib
|
||||||
package-lock.json
|
package-lock.json
|
||||||
|
*.tar.bz2
|
||||||
|
|||||||
@@ -2,38 +2,18 @@
|
|||||||
|
|
||||||
This directory contains nodejs examples for [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx).
|
This directory contains nodejs examples for [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx).
|
||||||
|
|
||||||
Before you continue, please first install the npm package `sherpa-onnx` by
|
Before you continue, please first run
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
npm install sherpa-onnx
|
cd ./nodejs-examples
|
||||||
|
|
||||||
|
npm i
|
||||||
```
|
```
|
||||||
|
|
||||||
In the following, we describe how to use [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx)
|
In the following, we describe how to use [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx)
|
||||||
for text-to-speech and speech-to-text.
|
for text-to-speech and speech-to-text.
|
||||||
|
|
||||||
**Caution**: If you get the following error:
|
Note: You need `Node >= 18`.
|
||||||
```
|
|
||||||
/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/ffi-napi/lib/dynamic_library.js:67
|
|
||||||
if (match = err.match(/^(([^ \t()])+\.so([^ \t:()])*):([ \t])*/)) {
|
|
||||||
^
|
|
||||||
|
|
||||||
TypeError: Cannot read properties of null (reading 'match')
|
|
||||||
at new DynamicLibrary (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/ffi-napi/lib/dynamic_library.js:67:21)
|
|
||||||
at Object.Library (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/ffi-napi/lib/library.js:47:10)
|
|
||||||
at Object.<anonymous> (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/sherpa-onnx3/index.js:268:28)
|
|
||||||
at Module._compile (node:internal/modules/cjs/loader:1376:14)
|
|
||||||
at Module._extensions..js (node:internal/modules/cjs/loader:1435:10)
|
|
||||||
at Module.load (node:internal/modules/cjs/loader:1207:32)
|
|
||||||
at Module._load (node:internal/modules/cjs/loader:1023:12)
|
|
||||||
at Module.require (node:internal/modules/cjs/loader:1235:19)
|
|
||||||
at require (node:internal/modules/helpers:176:18)
|
|
||||||
at Object.<anonymous> (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/test-offline-tts-zh.js:3:21)
|
|
||||||
```
|
|
||||||
|
|
||||||
Please downgrade your node to version v13.14.0. See also
|
|
||||||
https://github.com/node-ffi-napi/node-ffi-napi/issues/244
|
|
||||||
and
|
|
||||||
https://github.com/node-ffi-napi/node-ffi-napi/issues/97 .
|
|
||||||
|
|
||||||
# Text-to-speech
|
# Text-to-speech
|
||||||
|
|
||||||
@@ -71,13 +51,7 @@ node ./test-offline-tts-zh.js
|
|||||||
# Speech-to-text
|
# Speech-to-text
|
||||||
|
|
||||||
In the following, we demonstrate how to decode files and how to perform
|
In the following, we demonstrate how to decode files and how to perform
|
||||||
speech recognition with a microphone with `nodejs`. We need to install two additional
|
speech recognition with a microphone with `nodejs`.
|
||||||
npm packages:
|
|
||||||
|
|
||||||
|
|
||||||
```bash
|
|
||||||
npm install wav naudiodon2
|
|
||||||
```
|
|
||||||
|
|
||||||
## ./test-offline-nemo-ctc.js
|
## ./test-offline-nemo-ctc.js
|
||||||
|
|
||||||
@@ -200,60 +174,3 @@ wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherp
|
|||||||
tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
|
tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
|
||||||
node ./test-online-zipformer2-ctc.js
|
node ./test-online-zipformer2-ctc.js
|
||||||
```
|
```
|
||||||
|
|
||||||
## ./test-vad-microphone-offline-paraformer.js
|
|
||||||
|
|
||||||
[./test-vad-microphone-offline-paraformer.js](./test-vad-microphone-offline-paraformer.js)
|
|
||||||
demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad)
|
|
||||||
with non-streaming Paraformer for speech recognition from microphone.
|
|
||||||
|
|
||||||
You can use the following command to run it:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
|
||||||
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
|
|
||||||
tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
|
|
||||||
node ./test-vad-microphone-offline-paraformer.js
|
|
||||||
```
|
|
||||||
|
|
||||||
## ./test-vad-microphone-offline-transducer.js
|
|
||||||
|
|
||||||
[./test-vad-microphone-offline-transducer.js](./test-vad-microphone-offline-transducer.js)
|
|
||||||
demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad)
|
|
||||||
with a non-streaming transducer model for speech recognition from microphone.
|
|
||||||
|
|
||||||
You can use the following command to run it:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
|
||||||
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-06-26.tar.bz2
|
|
||||||
tar xvf sherpa-onnx-zipformer-en-2023-06-26.tar.bz2
|
|
||||||
node ./test-vad-microphone-offline-transducer.js
|
|
||||||
```
|
|
||||||
|
|
||||||
## ./test-vad-microphone-offline-whisper.js
|
|
||||||
|
|
||||||
[./test-vad-microphone-offline-whisper.js](./test-vad-microphone-offline-whisper.js)
|
|
||||||
demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad)
|
|
||||||
with whisper for speech recognition from microphone.
|
|
||||||
|
|
||||||
You can use the following command to run it:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
|
||||||
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
|
|
||||||
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
|
|
||||||
node ./test-vad-microphone-offline-whisper.js
|
|
||||||
```
|
|
||||||
|
|
||||||
## ./test-vad-microphone.js
|
|
||||||
|
|
||||||
[./test-vad-microphone.js](./test-vad-microphone.js)
|
|
||||||
demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad).
|
|
||||||
|
|
||||||
You can use the following command to run it:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
|
||||||
node ./test-vad-microphone.js
|
|
||||||
```
|
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"naudiodon2": "^2.4.0",
|
"naudiodon2": "^2.4.0",
|
||||||
"sherpa-onnx": "^1.8.12",
|
"sherpa-onnx": "*",
|
||||||
"wav": "^1.0.2"
|
"wav": "^1.0.2"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
//
|
//
|
||||||
const fs = require('fs');
|
const fs = require('fs');
|
||||||
const {Readable} = require('stream');
|
const {Readable} = require('stream');
|
||||||
@@ -6,32 +6,58 @@ const wav = require('wav');
|
|||||||
|
|
||||||
const sherpa_onnx = require('sherpa-onnx');
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
function createRecognizer() {
|
function createOfflineRecognizer() {
|
||||||
const featConfig = new sherpa_onnx.FeatureConfig();
|
let featConfig = {
|
||||||
featConfig.sampleRate = 16000;
|
sampleRate: 16000,
|
||||||
featConfig.featureDim = 80;
|
featureDim: 80,
|
||||||
|
};
|
||||||
|
|
||||||
// test online recognizer
|
let modelConfig = {
|
||||||
const nemoCtc = new sherpa_onnx.OfflineNemoEncDecCtcModelConfig();
|
transducer: {
|
||||||
nemoCtc.model = './sherpa-onnx-nemo-ctc-en-conformer-small/model.int8.onnx';
|
encoder: '',
|
||||||
const tokens = './sherpa-onnx-nemo-ctc-en-conformer-small/tokens.txt';
|
decoder: '',
|
||||||
|
joiner: '',
|
||||||
|
},
|
||||||
|
paraformer: {
|
||||||
|
model: '',
|
||||||
|
},
|
||||||
|
nemoCtc: {
|
||||||
|
model: './sherpa-onnx-nemo-ctc-en-conformer-small/model.int8.onnx',
|
||||||
|
},
|
||||||
|
whisper: {
|
||||||
|
encoder: '',
|
||||||
|
decoder: '',
|
||||||
|
},
|
||||||
|
tdnn: {
|
||||||
|
model: '',
|
||||||
|
},
|
||||||
|
tokens: './sherpa-onnx-nemo-ctc-en-conformer-small/tokens.txt',
|
||||||
|
numThreads: 1,
|
||||||
|
debug: 0,
|
||||||
|
provider: 'cpu',
|
||||||
|
modelType: 'nemo_ctc',
|
||||||
|
};
|
||||||
|
|
||||||
const modelConfig = new sherpa_onnx.OfflineModelConfig();
|
let lmConfig = {
|
||||||
modelConfig.nemoCtc = nemoCtc;
|
model: '',
|
||||||
modelConfig.tokens = tokens;
|
scale: 1.0,
|
||||||
modelConfig.modelType = 'nemo_ctc';
|
};
|
||||||
|
|
||||||
const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();
|
let config = {
|
||||||
recognizerConfig.featConfig = featConfig;
|
featConfig: featConfig,
|
||||||
recognizerConfig.modelConfig = modelConfig;
|
modelConfig: modelConfig,
|
||||||
recognizerConfig.decodingMethod = 'greedy_search';
|
lmConfig: lmConfig,
|
||||||
|
decodingMethod: 'greedy_search',
|
||||||
|
maxActivePaths: 4,
|
||||||
|
hotwordsFile: '',
|
||||||
|
hotwordsScore: 1.5,
|
||||||
|
};
|
||||||
|
|
||||||
const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);
|
return sherpa_onnx.createOfflineRecognizer(config);
|
||||||
return recognizer;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
recognizer = createRecognizer();
|
const recognizer = createOfflineRecognizer();
|
||||||
stream = recognizer.createStream();
|
const stream = recognizer.createStream();
|
||||||
|
|
||||||
const waveFilename =
|
const waveFilename =
|
||||||
'./sherpa-onnx-nemo-ctc-en-conformer-small/test_wavs/0.wav';
|
'./sherpa-onnx-nemo-ctc-en-conformer-small/test_wavs/0.wav';
|
||||||
@@ -72,8 +98,8 @@ fs.createReadStream(waveFilename, {highWaterMark: 4096})
|
|||||||
|
|
||||||
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
|
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
|
||||||
recognizer.decode(stream);
|
recognizer.decode(stream);
|
||||||
const r = recognizer.getResult(stream);
|
const text = recognizer.getResult(stream);
|
||||||
console.log(r.text);
|
console.log(text);
|
||||||
|
|
||||||
stream.free();
|
stream.free();
|
||||||
recognizer.free();
|
recognizer.free();
|
||||||
|
|||||||
@@ -6,32 +6,59 @@ const wav = require('wav');
|
|||||||
|
|
||||||
const sherpa_onnx = require('sherpa-onnx');
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
function createRecognizer() {
|
function createOfflineRecognizer() {
|
||||||
const featConfig = new sherpa_onnx.FeatureConfig();
|
let featConfig = {
|
||||||
featConfig.sampleRate = 16000;
|
sampleRate: 16000,
|
||||||
featConfig.featureDim = 80;
|
featureDim: 80,
|
||||||
|
};
|
||||||
|
|
||||||
// test online recognizer
|
let modelConfig = {
|
||||||
const paraformer = new sherpa_onnx.OfflineParaformerModelConfig();
|
transducer: {
|
||||||
paraformer.model = './sherpa-onnx-paraformer-zh-2023-03-28/model.onnx';
|
encoder: '',
|
||||||
const tokens = './sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt';
|
decoder: '',
|
||||||
|
joiner: '',
|
||||||
|
},
|
||||||
|
paraformer: {
|
||||||
|
model: './sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx',
|
||||||
|
},
|
||||||
|
nemoCtc: {
|
||||||
|
model: '',
|
||||||
|
},
|
||||||
|
whisper: {
|
||||||
|
encoder: '',
|
||||||
|
decoder: '',
|
||||||
|
},
|
||||||
|
tdnn: {
|
||||||
|
model: '',
|
||||||
|
},
|
||||||
|
tokens: './sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt',
|
||||||
|
numThreads: 1,
|
||||||
|
debug: 0,
|
||||||
|
provider: 'cpu',
|
||||||
|
modelType: 'paraformer',
|
||||||
|
};
|
||||||
|
|
||||||
const modelConfig = new sherpa_onnx.OfflineModelConfig();
|
let lmConfig = {
|
||||||
modelConfig.paraformer = paraformer;
|
model: '',
|
||||||
modelConfig.tokens = tokens;
|
scale: 1.0,
|
||||||
modelConfig.modelType = 'paraformer';
|
};
|
||||||
|
|
||||||
const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();
|
let config = {
|
||||||
recognizerConfig.featConfig = featConfig;
|
featConfig: featConfig,
|
||||||
recognizerConfig.modelConfig = modelConfig;
|
modelConfig: modelConfig,
|
||||||
recognizerConfig.decodingMethod = 'greedy_search';
|
lmConfig: lmConfig,
|
||||||
|
decodingMethod: 'greedy_search',
|
||||||
|
maxActivePaths: 4,
|
||||||
|
hotwordsFile: '',
|
||||||
|
hotwordsScore: 1.5,
|
||||||
|
};
|
||||||
|
|
||||||
const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);
|
return sherpa_onnx.createOfflineRecognizer(config);
|
||||||
return recognizer;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
recognizer = createRecognizer();
|
|
||||||
stream = recognizer.createStream();
|
const recognizer = createOfflineRecognizer();
|
||||||
|
const stream = recognizer.createStream();
|
||||||
|
|
||||||
const waveFilename = './sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/0.wav';
|
const waveFilename = './sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/0.wav';
|
||||||
|
|
||||||
@@ -71,8 +98,8 @@ fs.createReadStream(waveFilename, {'highWaterMark': 4096})
|
|||||||
|
|
||||||
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
|
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
|
||||||
recognizer.decode(stream);
|
recognizer.decode(stream);
|
||||||
const r = recognizer.getResult(stream);
|
const text = recognizer.getResult(stream);
|
||||||
console.log(r.text);
|
console.log(text);
|
||||||
|
|
||||||
stream.free();
|
stream.free();
|
||||||
recognizer.free();
|
recognizer.free();
|
||||||
|
|||||||
@@ -6,37 +6,60 @@ const wav = require('wav');
|
|||||||
|
|
||||||
const sherpa_onnx = require('sherpa-onnx');
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
function createRecognizer() {
|
function createOfflineRecognizer() {
|
||||||
const featConfig = new sherpa_onnx.FeatureConfig();
|
let featConfig = {
|
||||||
featConfig.sampleRate = 16000;
|
sampleRate: 16000,
|
||||||
featConfig.featureDim = 80;
|
featureDim: 80,
|
||||||
|
};
|
||||||
|
|
||||||
// test online recognizer
|
let modelConfig = {
|
||||||
const transducer = new sherpa_onnx.OfflineTransducerModelConfig();
|
transducer: {
|
||||||
transducer.encoder =
|
encoder:
|
||||||
'./sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx';
|
'./sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.int8.onnx',
|
||||||
transducer.decoder =
|
decoder:
|
||||||
'./sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx';
|
'./sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx',
|
||||||
transducer.joiner =
|
joiner:
|
||||||
'./sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.onnx';
|
'./sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.int8.onnx',
|
||||||
const tokens = './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt';
|
},
|
||||||
|
paraformer: {
|
||||||
|
model: '',
|
||||||
|
},
|
||||||
|
nemoCtc: {
|
||||||
|
model: '',
|
||||||
|
},
|
||||||
|
whisper: {
|
||||||
|
encoder: '',
|
||||||
|
decoder: '',
|
||||||
|
},
|
||||||
|
tdnn: {
|
||||||
|
model: '',
|
||||||
|
},
|
||||||
|
tokens: './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt',
|
||||||
|
numThreads: 1,
|
||||||
|
debug: 0,
|
||||||
|
provider: 'cpu',
|
||||||
|
modelType: 'transducer',
|
||||||
|
};
|
||||||
|
|
||||||
const modelConfig = new sherpa_onnx.OfflineModelConfig();
|
let lmConfig = {
|
||||||
modelConfig.transducer = transducer;
|
model: '',
|
||||||
modelConfig.tokens = tokens;
|
scale: 1.0,
|
||||||
modelConfig.modelType = 'transducer';
|
};
|
||||||
|
|
||||||
const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();
|
let config = {
|
||||||
recognizerConfig.featConfig = featConfig;
|
featConfig: featConfig,
|
||||||
recognizerConfig.modelConfig = modelConfig;
|
modelConfig: modelConfig,
|
||||||
recognizerConfig.decodingMethod = 'greedy_search';
|
lmConfig: lmConfig,
|
||||||
|
decodingMethod: 'greedy_search',
|
||||||
|
maxActivePaths: 4,
|
||||||
|
hotwordsFile: '',
|
||||||
|
hotwordsScore: 1.5,
|
||||||
|
};
|
||||||
|
|
||||||
const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);
|
return sherpa_onnx.createOfflineRecognizer(config);
|
||||||
return recognizer;
|
|
||||||
}
|
}
|
||||||
|
const recognizer = createOfflineRecognizer();
|
||||||
recognizer = createRecognizer();
|
const stream = recognizer.createStream();
|
||||||
stream = recognizer.createStream();
|
|
||||||
|
|
||||||
const waveFilename = './sherpa-onnx-zipformer-en-2023-06-26/test_wavs/0.wav';
|
const waveFilename = './sherpa-onnx-zipformer-en-2023-06-26/test_wavs/0.wav';
|
||||||
|
|
||||||
@@ -76,8 +99,8 @@ fs.createReadStream(waveFilename, {'highWaterMark': 4096})
|
|||||||
|
|
||||||
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
|
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
|
||||||
recognizer.decode(stream);
|
recognizer.decode(stream);
|
||||||
const r = recognizer.getResult(stream);
|
const text = recognizer.getResult(stream);
|
||||||
console.log(r.text);
|
console.log(text);
|
||||||
|
|
||||||
stream.free();
|
stream.free();
|
||||||
recognizer.free();
|
recognizer.free();
|
||||||
|
|||||||
@@ -1,28 +1,45 @@
|
|||||||
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
|
||||||
const sherpa_onnx = require('sherpa-onnx');
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
function createOfflineTts() {
|
function createOfflineTts() {
|
||||||
const vits = new sherpa_onnx.OfflineTtsVitsModelConfig();
|
let offlineTtsVitsModelConfig = {
|
||||||
vits.model = 'vits-piper-en_US-amy-low/en_US-amy-low.onnx'
|
model: './vits-piper-en_US-amy-low/en_US-amy-low.onnx',
|
||||||
vits.tokens = './vits-piper-en_US-amy-low/tokens.txt';
|
lexicon: '',
|
||||||
vits.dataDir = './vits-piper-en_US-amy-low/espeak-ng-data'
|
tokens: './vits-piper-en_US-amy-low/tokens.txt',
|
||||||
|
dataDir: './vits-piper-en_US-amy-low/espeak-ng-data',
|
||||||
|
noiseScale: 0.667,
|
||||||
|
noiseScaleW: 0.8,
|
||||||
|
lengthScale: 1.0,
|
||||||
|
};
|
||||||
|
let offlineTtsModelConfig = {
|
||||||
|
offlineTtsVitsModelConfig: offlineTtsVitsModelConfig,
|
||||||
|
numThreads: 1,
|
||||||
|
debug: 1,
|
||||||
|
provider: 'cpu',
|
||||||
|
};
|
||||||
|
|
||||||
const modelConfig = new sherpa_onnx.OfflineTtsModelConfig();
|
let offlineTtsConfig = {
|
||||||
modelConfig.vits = vits;
|
offlineTtsModelConfig: offlineTtsModelConfig,
|
||||||
|
ruleFsts: '',
|
||||||
|
maxNumSentences: 1,
|
||||||
|
};
|
||||||
|
|
||||||
const config = new sherpa_onnx.OfflineTtsConfig();
|
return sherpa_onnx.createOfflineTts(offlineTtsConfig);
|
||||||
config.model = modelConfig;
|
|
||||||
|
|
||||||
return new sherpa_onnx.OfflineTts(config);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
const tts = createOfflineTts();
|
const tts = createOfflineTts();
|
||||||
const speakerId = 0;
|
const speakerId = 0;
|
||||||
const speed = 1.0;
|
const speed = 1.0;
|
||||||
const audio = tts.generate(
|
const audio = tts.generate({
|
||||||
'“Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.”',
|
text:
|
||||||
speakerId, speed);
|
'“Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.”',
|
||||||
audio.save('./test-en.wav');
|
sid: speakerId,
|
||||||
|
speed: speed
|
||||||
|
});
|
||||||
|
|
||||||
|
tts.save('./test-en.wav', audio);
|
||||||
console.log('Saved to test-en.wav successfully.');
|
console.log('Saved to test-en.wav successfully.');
|
||||||
|
|
||||||
tts.free();
|
tts.free();
|
||||||
|
|||||||
@@ -3,25 +3,37 @@
|
|||||||
const sherpa_onnx = require('sherpa-onnx');
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
function createOfflineTts() {
|
function createOfflineTts() {
|
||||||
const vits = new sherpa_onnx.OfflineTtsVitsModelConfig();
|
let offlineTtsVitsModelConfig = {
|
||||||
vits.model = './vits-zh-aishell3/vits-aishell3.onnx';
|
model: './vits-zh-aishell3/vits-aishell3.onnx',
|
||||||
vits.lexicon = './vits-zh-aishell3/lexicon.txt';
|
lexicon: './vits-zh-aishell3/lexicon.txt',
|
||||||
vits.tokens = './vits-zh-aishell3/tokens.txt';
|
tokens: './vits-zh-aishell3/tokens.txt',
|
||||||
|
dataDir: '',
|
||||||
|
noiseScale: 0.667,
|
||||||
|
noiseScaleW: 0.8,
|
||||||
|
lengthScale: 1.0,
|
||||||
|
};
|
||||||
|
let offlineTtsModelConfig = {
|
||||||
|
offlineTtsVitsModelConfig: offlineTtsVitsModelConfig,
|
||||||
|
numThreads: 1,
|
||||||
|
debug: 1,
|
||||||
|
provider: 'cpu',
|
||||||
|
};
|
||||||
|
|
||||||
const modelConfig = new sherpa_onnx.OfflineTtsModelConfig();
|
let offlineTtsConfig = {
|
||||||
modelConfig.vits = vits;
|
offlineTtsModelConfig: offlineTtsModelConfig,
|
||||||
|
ruleFsts: './vits-zh-aishell3/rule.fst',
|
||||||
|
maxNumSentences: 1,
|
||||||
|
};
|
||||||
|
|
||||||
const config = new sherpa_onnx.OfflineTtsConfig();
|
return sherpa_onnx.createOfflineTts(offlineTtsConfig);
|
||||||
config.model = modelConfig;
|
|
||||||
config.ruleFsts = './vits-zh-aishell3/rule.fst';
|
|
||||||
|
|
||||||
return new sherpa_onnx.OfflineTts(config);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
const tts = createOfflineTts();
|
const tts = createOfflineTts();
|
||||||
const speakerId = 66;
|
const speakerId = 66;
|
||||||
const speed = 1.0;
|
const speed = 1.0;
|
||||||
const audio = tts.generate('3年前中国总人口是1411778724人', speakerId, speed);
|
const audio = tts.generate(
|
||||||
audio.save('./test-zh.wav');
|
{text: '3年前中国总人口是1411778724人', sid: speakerId, speed: speed});
|
||||||
|
tts.save('./test-zh.wav', audio);
|
||||||
console.log('Saved to test-zh.wav successfully.');
|
console.log('Saved to test-zh.wav successfully.');
|
||||||
tts.free();
|
tts.free();
|
||||||
|
|||||||
@@ -6,32 +6,58 @@ const wav = require('wav');
|
|||||||
|
|
||||||
const sherpa_onnx = require('sherpa-onnx');
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
function createRecognizer() {
|
function createOfflineRecognizer() {
|
||||||
const featConfig = new sherpa_onnx.FeatureConfig();
|
let featConfig = {
|
||||||
featConfig.sampleRate = 16000;
|
sampleRate: 16000,
|
||||||
featConfig.featureDim = 80;
|
featureDim: 80,
|
||||||
|
};
|
||||||
|
|
||||||
// test online recognizer
|
let modelConfig = {
|
||||||
const whisper = new sherpa_onnx.OfflineWhisperModelConfig();
|
transducer: {
|
||||||
whisper.encoder = './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx';
|
encoder: '',
|
||||||
whisper.decoder = './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx';
|
decoder: '',
|
||||||
const tokens = './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt';
|
joiner: '',
|
||||||
|
},
|
||||||
|
paraformer: {
|
||||||
|
model: '',
|
||||||
|
},
|
||||||
|
nemoCtc: {
|
||||||
|
model: '',
|
||||||
|
},
|
||||||
|
whisper: {
|
||||||
|
encoder: './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx',
|
||||||
|
decoder: './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx',
|
||||||
|
},
|
||||||
|
tdnn: {
|
||||||
|
model: '',
|
||||||
|
},
|
||||||
|
tokens: './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt',
|
||||||
|
numThreads: 1,
|
||||||
|
debug: 0,
|
||||||
|
provider: 'cpu',
|
||||||
|
modelType: 'whisper',
|
||||||
|
};
|
||||||
|
|
||||||
const modelConfig = new sherpa_onnx.OfflineModelConfig();
|
let lmConfig = {
|
||||||
modelConfig.whisper = whisper;
|
model: '',
|
||||||
modelConfig.tokens = tokens;
|
scale: 1.0,
|
||||||
modelConfig.modelType = 'whisper';
|
};
|
||||||
|
|
||||||
const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();
|
let config = {
|
||||||
recognizerConfig.featConfig = featConfig;
|
featConfig: featConfig,
|
||||||
recognizerConfig.modelConfig = modelConfig;
|
modelConfig: modelConfig,
|
||||||
recognizerConfig.decodingMethod = 'greedy_search';
|
lmConfig: lmConfig,
|
||||||
|
decodingMethod: 'greedy_search',
|
||||||
|
maxActivePaths: 4,
|
||||||
|
hotwordsFile: '',
|
||||||
|
hotwordsScore: 1.5,
|
||||||
|
};
|
||||||
|
|
||||||
const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);
|
return sherpa_onnx.createOfflineRecognizer(config);
|
||||||
return recognizer;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
recognizer = createRecognizer();
|
|
||||||
|
recognizer = createOfflineRecognizer();
|
||||||
stream = recognizer.createStream();
|
stream = recognizer.createStream();
|
||||||
|
|
||||||
const waveFilename = './sherpa-onnx-whisper-tiny.en/test_wavs/0.wav';
|
const waveFilename = './sherpa-onnx-whisper-tiny.en/test_wavs/0.wav';
|
||||||
@@ -72,8 +98,8 @@ fs.createReadStream(waveFilename, {'highWaterMark': 4096})
|
|||||||
|
|
||||||
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
|
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
|
||||||
recognizer.decode(stream);
|
recognizer.decode(stream);
|
||||||
const r = recognizer.getResult(stream);
|
const text = recognizer.getResult(stream);
|
||||||
console.log(r.text);
|
console.log(text);
|
||||||
|
|
||||||
stream.free();
|
stream.free();
|
||||||
recognizer.free();
|
recognizer.free();
|
||||||
|
|||||||
@@ -5,37 +5,58 @@ console.log(portAudio.getDevices());
|
|||||||
|
|
||||||
const sherpa_onnx = require('sherpa-onnx');
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
function createRecognizer() {
|
function createOnlineRecognizer() {
|
||||||
const featConfig = new sherpa_onnx.FeatureConfig();
|
let onlineTransducerModelConfig = {
|
||||||
featConfig.sampleRate = 16000;
|
encoder: '',
|
||||||
featConfig.featureDim = 80;
|
decoder: '',
|
||||||
|
joiner: '',
|
||||||
|
};
|
||||||
|
|
||||||
const paraformer = new sherpa_onnx.OnlineParaformerModelConfig();
|
let onlineParaformerModelConfig = {
|
||||||
paraformer.encoder =
|
encoder:
|
||||||
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx';
|
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx',
|
||||||
paraformer.decoder =
|
decoder:
|
||||||
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx';
|
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx',
|
||||||
const tokens =
|
};
|
||||||
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt';
|
|
||||||
|
|
||||||
const modelConfig = new sherpa_onnx.OnlineModelConfig();
|
let onlineZipformer2CtcModelConfig = {
|
||||||
modelConfig.paraformer = paraformer;
|
model: '',
|
||||||
modelConfig.tokens = tokens;
|
};
|
||||||
modelConfig.modelType = 'paraformer';
|
|
||||||
|
|
||||||
const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig();
|
let onlineModelConfig = {
|
||||||
recognizerConfig.featConfig = featConfig;
|
transducer: onlineTransducerModelConfig,
|
||||||
recognizerConfig.modelConfig = modelConfig;
|
paraformer: onlineParaformerModelConfig,
|
||||||
recognizerConfig.decodingMethod = 'greedy_search';
|
zipformer2Ctc: onlineZipformer2CtcModelConfig,
|
||||||
recognizerConfig.enableEndpoint = 1;
|
tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt',
|
||||||
|
numThreads: 1,
|
||||||
|
provider: 'cpu',
|
||||||
|
debug: 1,
|
||||||
|
modelType: 'paraformer',
|
||||||
|
};
|
||||||
|
|
||||||
const recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig);
|
let featureConfig = {
|
||||||
return recognizer;
|
sampleRate: 16000,
|
||||||
|
featureDim: 80,
|
||||||
|
};
|
||||||
|
|
||||||
|
let recognizerConfig = {
|
||||||
|
featConfig: featureConfig,
|
||||||
|
modelConfig: onlineModelConfig,
|
||||||
|
decodingMethod: 'greedy_search',
|
||||||
|
maxActivePaths: 4,
|
||||||
|
enableEndpoint: 1,
|
||||||
|
rule1MinTrailingSilence: 2.4,
|
||||||
|
rule2MinTrailingSilence: 1.2,
|
||||||
|
rule3MinUtteranceLength: 20,
|
||||||
|
hotwordsFile: '',
|
||||||
|
hotwordsScore: 1.5,
|
||||||
|
};
|
||||||
|
|
||||||
|
return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
|
||||||
}
|
}
|
||||||
recognizer = createRecognizer();
|
|
||||||
stream = recognizer.createStream();
|
|
||||||
|
|
||||||
display = new sherpa_onnx.Display(50);
|
const recognizer = createOnlineRecognizer();
|
||||||
|
const stream = recognizer.createStream();
|
||||||
|
|
||||||
let lastText = '';
|
let lastText = '';
|
||||||
let segmentIndex = 0;
|
let segmentIndex = 0;
|
||||||
@@ -61,11 +82,11 @@ ai.on('data', data => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const isEndpoint = recognizer.isEndpoint(stream);
|
const isEndpoint = recognizer.isEndpoint(stream);
|
||||||
const text = recognizer.getResult(stream).text;
|
const text = recognizer.getResult(stream);
|
||||||
|
|
||||||
if (text.length > 0 && lastText != text) {
|
if (text.length > 0 && lastText != text) {
|
||||||
lastText = text;
|
lastText = text;
|
||||||
display.print(segmentIndex, lastText);
|
console.log(segmentIndex, lastText);
|
||||||
}
|
}
|
||||||
if (isEndpoint) {
|
if (isEndpoint) {
|
||||||
if (text.length > 0) {
|
if (text.length > 0) {
|
||||||
|
|||||||
@@ -6,34 +6,58 @@ const wav = require('wav');
|
|||||||
|
|
||||||
const sherpa_onnx = require('sherpa-onnx');
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
function createRecognizer() {
|
function createOnlineRecognizer() {
|
||||||
const featConfig = new sherpa_onnx.FeatureConfig();
|
let onlineTransducerModelConfig = {
|
||||||
featConfig.sampleRate = 16000;
|
encoder: '',
|
||||||
featConfig.featureDim = 80;
|
decoder: '',
|
||||||
|
joiner: '',
|
||||||
|
};
|
||||||
|
|
||||||
const paraformer = new sherpa_onnx.OnlineParaformerModelConfig();
|
let onlineParaformerModelConfig = {
|
||||||
paraformer.encoder =
|
encoder:
|
||||||
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.onnx';
|
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx',
|
||||||
paraformer.decoder =
|
decoder:
|
||||||
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.onnx';
|
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx',
|
||||||
const tokens =
|
};
|
||||||
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt';
|
|
||||||
|
|
||||||
const modelConfig = new sherpa_onnx.OnlineModelConfig();
|
let onlineZipformer2CtcModelConfig = {
|
||||||
modelConfig.paraformer = paraformer;
|
model: '',
|
||||||
modelConfig.tokens = tokens;
|
};
|
||||||
modelConfig.modelType = 'paraformer';
|
|
||||||
|
|
||||||
const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig();
|
let onlineModelConfig = {
|
||||||
recognizerConfig.featConfig = featConfig;
|
transducer: onlineTransducerModelConfig,
|
||||||
recognizerConfig.modelConfig = modelConfig;
|
paraformer: onlineParaformerModelConfig,
|
||||||
recognizerConfig.decodingMethod = 'greedy_search';
|
zipformer2Ctc: onlineZipformer2CtcModelConfig,
|
||||||
|
tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt',
|
||||||
|
numThreads: 1,
|
||||||
|
provider: 'cpu',
|
||||||
|
debug: 1,
|
||||||
|
modelType: 'paraformer',
|
||||||
|
};
|
||||||
|
|
||||||
const recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig);
|
let featureConfig = {
|
||||||
return recognizer;
|
sampleRate: 16000,
|
||||||
|
featureDim: 80,
|
||||||
|
};
|
||||||
|
|
||||||
|
let recognizerConfig = {
|
||||||
|
featConfig: featureConfig,
|
||||||
|
modelConfig: onlineModelConfig,
|
||||||
|
decodingMethod: 'greedy_search',
|
||||||
|
maxActivePaths: 4,
|
||||||
|
enableEndpoint: 1,
|
||||||
|
rule1MinTrailingSilence: 2.4,
|
||||||
|
rule2MinTrailingSilence: 1.2,
|
||||||
|
rule3MinUtteranceLength: 20,
|
||||||
|
hotwordsFile: '',
|
||||||
|
hotwordsScore: 1.5,
|
||||||
|
};
|
||||||
|
|
||||||
|
return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
|
||||||
}
|
}
|
||||||
recognizer = createRecognizer();
|
|
||||||
stream = recognizer.createStream();
|
const recognizer = createOnlineRecognizer();
|
||||||
|
const stream = recognizer.createStream();
|
||||||
|
|
||||||
const waveFilename =
|
const waveFilename =
|
||||||
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav';
|
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav';
|
||||||
@@ -47,8 +71,8 @@ function decode(samples) {
|
|||||||
while (recognizer.isReady(stream)) {
|
while (recognizer.isReady(stream)) {
|
||||||
recognizer.decode(stream);
|
recognizer.decode(stream);
|
||||||
}
|
}
|
||||||
const r = recognizer.getResult(stream);
|
const text = recognizer.getResult(stream);
|
||||||
console.log(r.text);
|
console.log(text);
|
||||||
}
|
}
|
||||||
|
|
||||||
reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
|
reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
|
||||||
|
|||||||
@@ -5,39 +5,60 @@ const portAudio = require('naudiodon2');
|
|||||||
|
|
||||||
const sherpa_onnx = require('sherpa-onnx');
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
function createRecognizer() {
|
function createOnlineRecognizer() {
|
||||||
const featConfig = new sherpa_onnx.FeatureConfig();
|
let onlineTransducerModelConfig = {
|
||||||
featConfig.sampleRate = 16000;
|
encoder:
|
||||||
featConfig.featureDim = 80;
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx',
|
||||||
|
decoder:
|
||||||
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx',
|
||||||
|
joiner:
|
||||||
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx',
|
||||||
|
};
|
||||||
|
|
||||||
// test online recognizer
|
let onlineParaformerModelConfig = {
|
||||||
const transducer = new sherpa_onnx.OnlineTransducerModelConfig();
|
encoder: '',
|
||||||
transducer.encoder =
|
decoder: '',
|
||||||
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx';
|
};
|
||||||
transducer.decoder =
|
|
||||||
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx';
|
|
||||||
transducer.joiner =
|
|
||||||
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx';
|
|
||||||
const tokens =
|
|
||||||
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt';
|
|
||||||
|
|
||||||
const modelConfig = new sherpa_onnx.OnlineModelConfig();
|
let onlineZipformer2CtcModelConfig = {
|
||||||
modelConfig.transducer = transducer;
|
model: '',
|
||||||
modelConfig.tokens = tokens;
|
};
|
||||||
modelConfig.modelType = 'zipformer';
|
|
||||||
|
|
||||||
const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig();
|
let onlineModelConfig = {
|
||||||
recognizerConfig.featConfig = featConfig;
|
transducer: onlineTransducerModelConfig,
|
||||||
recognizerConfig.modelConfig = modelConfig;
|
paraformer: onlineParaformerModelConfig,
|
||||||
recognizerConfig.decodingMethod = 'greedy_search';
|
zipformer2Ctc: onlineZipformer2CtcModelConfig,
|
||||||
recognizerConfig.enableEndpoint = 1;
|
tokens:
|
||||||
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt',
|
||||||
|
numThreads: 1,
|
||||||
|
provider: 'cpu',
|
||||||
|
debug: 1,
|
||||||
|
modelType: 'zipformer',
|
||||||
|
};
|
||||||
|
|
||||||
const recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig);
|
let featureConfig = {
|
||||||
return recognizer;
|
sampleRate: 16000,
|
||||||
|
featureDim: 80,
|
||||||
|
};
|
||||||
|
|
||||||
|
let recognizerConfig = {
|
||||||
|
featConfig: featureConfig,
|
||||||
|
modelConfig: onlineModelConfig,
|
||||||
|
decodingMethod: 'greedy_search',
|
||||||
|
maxActivePaths: 4,
|
||||||
|
enableEndpoint: 1,
|
||||||
|
rule1MinTrailingSilence: 2.4,
|
||||||
|
rule2MinTrailingSilence: 1.2,
|
||||||
|
rule3MinUtteranceLength: 20,
|
||||||
|
hotwordsFile: '',
|
||||||
|
hotwordsScore: 1.5,
|
||||||
|
};
|
||||||
|
|
||||||
|
return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
|
||||||
}
|
}
|
||||||
recognizer = createRecognizer();
|
|
||||||
stream = recognizer.createStream();
|
const recognizer = createOnlineRecognizer();
|
||||||
display = new sherpa_onnx.Display(50);
|
const stream = recognizer.createStream();
|
||||||
|
|
||||||
let lastText = '';
|
let lastText = '';
|
||||||
let segmentIndex = 0;
|
let segmentIndex = 0;
|
||||||
@@ -63,11 +84,11 @@ ai.on('data', data => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const isEndpoint = recognizer.isEndpoint(stream);
|
const isEndpoint = recognizer.isEndpoint(stream);
|
||||||
const text = recognizer.getResult(stream).text;
|
const text = recognizer.getResult(stream);
|
||||||
|
|
||||||
if (text.length > 0 && lastText != text) {
|
if (text.length > 0 && lastText != text) {
|
||||||
lastText = text;
|
lastText = text;
|
||||||
display.print(segmentIndex, lastText);
|
console.log(segmentIndex, lastText);
|
||||||
}
|
}
|
||||||
if (isEndpoint) {
|
if (isEndpoint) {
|
||||||
if (text.length > 0) {
|
if (text.length > 0) {
|
||||||
|
|||||||
@@ -6,37 +6,60 @@ const wav = require('wav');
|
|||||||
|
|
||||||
const sherpa_onnx = require('sherpa-onnx');
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
function createRecognizer() {
|
function createOnlineRecognizer() {
|
||||||
const featConfig = new sherpa_onnx.FeatureConfig();
|
let onlineTransducerModelConfig = {
|
||||||
featConfig.sampleRate = 16000;
|
encoder:
|
||||||
featConfig.featureDim = 80;
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx',
|
||||||
|
decoder:
|
||||||
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx',
|
||||||
|
joiner:
|
||||||
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx',
|
||||||
|
};
|
||||||
|
|
||||||
// test online recognizer
|
let onlineParaformerModelConfig = {
|
||||||
const transducer = new sherpa_onnx.OnlineTransducerModelConfig();
|
encoder: '',
|
||||||
transducer.encoder =
|
decoder: '',
|
||||||
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx';
|
};
|
||||||
transducer.decoder =
|
|
||||||
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx';
|
|
||||||
transducer.joiner =
|
|
||||||
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx';
|
|
||||||
const tokens =
|
|
||||||
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt';
|
|
||||||
|
|
||||||
const modelConfig = new sherpa_onnx.OnlineModelConfig();
|
let onlineZipformer2CtcModelConfig = {
|
||||||
modelConfig.transducer = transducer;
|
model: '',
|
||||||
modelConfig.tokens = tokens;
|
};
|
||||||
modelConfig.modelType = 'zipformer';
|
|
||||||
|
|
||||||
const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig();
|
let onlineModelConfig = {
|
||||||
recognizerConfig.featConfig = featConfig;
|
transducer: onlineTransducerModelConfig,
|
||||||
recognizerConfig.modelConfig = modelConfig;
|
paraformer: onlineParaformerModelConfig,
|
||||||
recognizerConfig.decodingMethod = 'greedy_search';
|
zipformer2Ctc: onlineZipformer2CtcModelConfig,
|
||||||
|
tokens:
|
||||||
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt',
|
||||||
|
numThreads: 1,
|
||||||
|
provider: 'cpu',
|
||||||
|
debug: 1,
|
||||||
|
modelType: 'zipformer',
|
||||||
|
};
|
||||||
|
|
||||||
recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig);
|
let featureConfig = {
|
||||||
return recognizer;
|
sampleRate: 16000,
|
||||||
|
featureDim: 80,
|
||||||
|
};
|
||||||
|
|
||||||
|
let recognizerConfig = {
|
||||||
|
featConfig: featureConfig,
|
||||||
|
modelConfig: onlineModelConfig,
|
||||||
|
decodingMethod: 'greedy_search',
|
||||||
|
maxActivePaths: 4,
|
||||||
|
enableEndpoint: 1,
|
||||||
|
rule1MinTrailingSilence: 2.4,
|
||||||
|
rule2MinTrailingSilence: 1.2,
|
||||||
|
rule3MinUtteranceLength: 20,
|
||||||
|
hotwordsFile: '',
|
||||||
|
hotwordsScore: 1.5,
|
||||||
|
};
|
||||||
|
|
||||||
|
return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
|
||||||
}
|
}
|
||||||
recognizer = createRecognizer();
|
|
||||||
stream = recognizer.createStream();
|
const recognizer = createOnlineRecognizer();
|
||||||
|
const stream = recognizer.createStream();
|
||||||
|
|
||||||
const waveFilename =
|
const waveFilename =
|
||||||
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav';
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav';
|
||||||
@@ -50,8 +73,8 @@ function decode(samples) {
|
|||||||
while (recognizer.isReady(stream)) {
|
while (recognizer.isReady(stream)) {
|
||||||
recognizer.decode(stream);
|
recognizer.decode(stream);
|
||||||
}
|
}
|
||||||
const r = recognizer.getResult(stream);
|
const text = recognizer.getResult(stream);
|
||||||
console.log(r.text);
|
console.log(text);
|
||||||
}
|
}
|
||||||
|
|
||||||
reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
|
reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
|
||||||
|
|||||||
@@ -6,32 +6,58 @@ const wav = require('wav');
|
|||||||
|
|
||||||
const sherpa_onnx = require('sherpa-onnx');
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
function createRecognizer() {
|
function createOnlineRecognizer() {
|
||||||
const featConfig = new sherpa_onnx.FeatureConfig();
|
let onlineTransducerModelConfig = {
|
||||||
featConfig.sampleRate = 16000;
|
encoder: '',
|
||||||
featConfig.featureDim = 80;
|
decoder: '',
|
||||||
|
joiner: '',
|
||||||
|
};
|
||||||
|
|
||||||
// test online recognizer
|
let onlineParaformerModelConfig = {
|
||||||
const zipformer2Ctc = new sherpa_onnx.OnlineZipformer2CtcModelConfig();
|
encoder: '',
|
||||||
zipformer2Ctc.model =
|
decoder: '',
|
||||||
'./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx';
|
};
|
||||||
const tokens =
|
|
||||||
'./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt';
|
|
||||||
|
|
||||||
const modelConfig = new sherpa_onnx.OnlineModelConfig();
|
let onlineZipformer2CtcModelConfig = {
|
||||||
modelConfig.zipformer2Ctc = zipformer2Ctc;
|
model:
|
||||||
modelConfig.tokens = tokens;
|
'./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx',
|
||||||
|
};
|
||||||
|
|
||||||
const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig();
|
let onlineModelConfig = {
|
||||||
recognizerConfig.featConfig = featConfig;
|
transducer: onlineTransducerModelConfig,
|
||||||
recognizerConfig.modelConfig = modelConfig;
|
paraformer: onlineParaformerModelConfig,
|
||||||
recognizerConfig.decodingMethod = 'greedy_search';
|
zipformer2Ctc: onlineZipformer2CtcModelConfig,
|
||||||
|
tokens:
|
||||||
|
'./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt',
|
||||||
|
numThreads: 1,
|
||||||
|
provider: 'cpu',
|
||||||
|
debug: 1,
|
||||||
|
modelType: '',
|
||||||
|
};
|
||||||
|
|
||||||
recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig);
|
let featureConfig = {
|
||||||
return recognizer;
|
sampleRate: 16000,
|
||||||
|
featureDim: 80,
|
||||||
|
};
|
||||||
|
|
||||||
|
let recognizerConfig = {
|
||||||
|
featConfig: featureConfig,
|
||||||
|
modelConfig: onlineModelConfig,
|
||||||
|
decodingMethod: 'greedy_search',
|
||||||
|
maxActivePaths: 4,
|
||||||
|
enableEndpoint: 1,
|
||||||
|
rule1MinTrailingSilence: 2.4,
|
||||||
|
rule2MinTrailingSilence: 1.2,
|
||||||
|
rule3MinUtteranceLength: 20,
|
||||||
|
hotwordsFile: '',
|
||||||
|
hotwordsScore: 1.5,
|
||||||
|
};
|
||||||
|
|
||||||
|
return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
|
||||||
}
|
}
|
||||||
recognizer = createRecognizer();
|
|
||||||
stream = recognizer.createStream();
|
const recognizer = createOnlineRecognizer();
|
||||||
|
const stream = recognizer.createStream();
|
||||||
|
|
||||||
const waveFilename =
|
const waveFilename =
|
||||||
'./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000000.wav';
|
'./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000000.wav';
|
||||||
@@ -45,8 +71,8 @@ function decode(samples) {
|
|||||||
while (recognizer.isReady(stream)) {
|
while (recognizer.isReady(stream)) {
|
||||||
recognizer.decode(stream);
|
recognizer.decode(stream);
|
||||||
}
|
}
|
||||||
const r = recognizer.getResult(stream);
|
const text = recognizer.getResult(stream);
|
||||||
console.log(r.text);
|
console.log(text);
|
||||||
}
|
}
|
||||||
|
|
||||||
reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
|
reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
|
||||||
|
|||||||
@@ -1,101 +0,0 @@
|
|||||||
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
|
||||||
//
|
|
||||||
const sherpa_onnx = require('sherpa-onnx3');
|
|
||||||
const portAudio = require('naudiodon2');
|
|
||||||
console.log(portAudio.getDevices());
|
|
||||||
|
|
||||||
function createOfflineRecognizer() {
|
|
||||||
const featConfig = new sherpa_onnx.FeatureConfig();
|
|
||||||
featConfig.sampleRate = 16000;
|
|
||||||
featConfig.featureDim = 80;
|
|
||||||
|
|
||||||
// test online recognizer
|
|
||||||
const paraformer = new sherpa_onnx.OfflineParaformerModelConfig();
|
|
||||||
paraformer.model = './sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx';
|
|
||||||
const tokens = './sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt';
|
|
||||||
|
|
||||||
const modelConfig = new sherpa_onnx.OfflineModelConfig();
|
|
||||||
modelConfig.paraformer = paraformer;
|
|
||||||
modelConfig.tokens = tokens;
|
|
||||||
modelConfig.modelType = 'paraformer';
|
|
||||||
|
|
||||||
const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();
|
|
||||||
recognizerConfig.featConfig = featConfig;
|
|
||||||
recognizerConfig.modelConfig = modelConfig;
|
|
||||||
recognizerConfig.decodingMethod = 'greedy_search';
|
|
||||||
|
|
||||||
const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);
|
|
||||||
return recognizer
|
|
||||||
}
|
|
||||||
|
|
||||||
function createVad() {
|
|
||||||
const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig();
|
|
||||||
sileroVadModelConfig.model = './silero_vad.onnx';
|
|
||||||
sileroVadModelConfig.minSpeechDuration = 0.3; // seconds
|
|
||||||
sileroVadModelConfig.minSilenceDuration = 0.3; // seconds
|
|
||||||
sileroVadModelConfig.windowSize = 512;
|
|
||||||
|
|
||||||
const vadModelConfig = new sherpa_onnx.VadModelConfig();
|
|
||||||
vadModelConfig.sileroVad = sileroVadModelConfig;
|
|
||||||
vadModelConfig.sampleRate = 16000;
|
|
||||||
|
|
||||||
const bufferSizeInSeconds = 60;
|
|
||||||
const vad = new sherpa_onnx.VoiceActivityDetector(
|
|
||||||
vadModelConfig, bufferSizeInSeconds);
|
|
||||||
return vad;
|
|
||||||
}
|
|
||||||
|
|
||||||
const recognizer = createOfflineRecognizer();
|
|
||||||
const vad = createVad();
|
|
||||||
|
|
||||||
const bufferSizeInSeconds = 30;
|
|
||||||
const buffer =
|
|
||||||
new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);
|
|
||||||
|
|
||||||
var ai = new portAudio.AudioIO({
|
|
||||||
inOptions: {
|
|
||||||
channelCount: 1,
|
|
||||||
sampleFormat: portAudio.SampleFormatFloat32,
|
|
||||||
sampleRate: vad.config.sampleRate,
|
|
||||||
deviceId: -1, // Use -1 or omit the deviceId to select the default device
|
|
||||||
closeOnError: true // Close the stream if an audio error is detected, if
|
|
||||||
// set false then just log the error
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
let printed = false;
|
|
||||||
let index = 0;
|
|
||||||
ai.on('data', data => {
|
|
||||||
const windowSize = vad.config.sileroVad.windowSize;
|
|
||||||
buffer.push(new Float32Array(data.buffer));
|
|
||||||
while (buffer.size() > windowSize) {
|
|
||||||
const samples = buffer.get(buffer.head(), windowSize);
|
|
||||||
buffer.pop(windowSize);
|
|
||||||
vad.acceptWaveform(samples)
|
|
||||||
}
|
|
||||||
|
|
||||||
while (!vad.isEmpty()) {
|
|
||||||
const segment = vad.front();
|
|
||||||
vad.pop();
|
|
||||||
const stream = recognizer.createStream();
|
|
||||||
stream.acceptWaveform(
|
|
||||||
recognizer.config.featConfig.sampleRate, segment.samples);
|
|
||||||
recognizer.decode(stream);
|
|
||||||
const r = recognizer.getResult(stream);
|
|
||||||
stream.free();
|
|
||||||
if (r.text.length > 0) {
|
|
||||||
console.log(`${index}: ${r.text}`);
|
|
||||||
index += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
ai.on('close', () => {
|
|
||||||
console.log('Free resources');
|
|
||||||
recognizer.free();
|
|
||||||
vad.free();
|
|
||||||
buffer.free();
|
|
||||||
});
|
|
||||||
|
|
||||||
ai.start();
|
|
||||||
console.log('Started! Please speak')
|
|
||||||
@@ -1,106 +0,0 @@
|
|||||||
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
|
||||||
//
|
|
||||||
const sherpa_onnx = require('sherpa-onnx');
|
|
||||||
const portAudio = require('naudiodon2');
|
|
||||||
console.log(portAudio.getDevices());
|
|
||||||
|
|
||||||
function createOfflineRecognizer() {
|
|
||||||
const featConfig = new sherpa_onnx.FeatureConfig();
|
|
||||||
featConfig.sampleRate = 16000;
|
|
||||||
featConfig.featureDim = 80;
|
|
||||||
|
|
||||||
// test online recognizer
|
|
||||||
const transducer = new sherpa_onnx.OfflineTransducerModelConfig();
|
|
||||||
transducer.encoder =
|
|
||||||
'./sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx';
|
|
||||||
transducer.decoder =
|
|
||||||
'./sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx';
|
|
||||||
transducer.joiner =
|
|
||||||
'./sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.onnx';
|
|
||||||
const tokens = './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt';
|
|
||||||
|
|
||||||
const modelConfig = new sherpa_onnx.OfflineModelConfig();
|
|
||||||
modelConfig.transducer = transducer;
|
|
||||||
modelConfig.tokens = tokens;
|
|
||||||
modelConfig.modelType = 'transducer';
|
|
||||||
|
|
||||||
const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();
|
|
||||||
recognizerConfig.featConfig = featConfig;
|
|
||||||
recognizerConfig.modelConfig = modelConfig;
|
|
||||||
recognizerConfig.decodingMethod = 'greedy_search';
|
|
||||||
|
|
||||||
const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);
|
|
||||||
return recognizer;
|
|
||||||
}
|
|
||||||
|
|
||||||
function createVad() {
|
|
||||||
const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig();
|
|
||||||
sileroVadModelConfig.model = './silero_vad.onnx';
|
|
||||||
sileroVadModelConfig.minSpeechDuration = 0.3; // seconds
|
|
||||||
sileroVadModelConfig.minSilenceDuration = 0.3; // seconds
|
|
||||||
sileroVadModelConfig.windowSize = 512;
|
|
||||||
|
|
||||||
const vadModelConfig = new sherpa_onnx.VadModelConfig();
|
|
||||||
vadModelConfig.sileroVad = sileroVadModelConfig;
|
|
||||||
vadModelConfig.sampleRate = 16000;
|
|
||||||
|
|
||||||
const bufferSizeInSeconds = 60;
|
|
||||||
const vad = new sherpa_onnx.VoiceActivityDetector(
|
|
||||||
vadModelConfig, bufferSizeInSeconds);
|
|
||||||
return vad;
|
|
||||||
}
|
|
||||||
|
|
||||||
const recognizer = createOfflineRecognizer();
|
|
||||||
const vad = createVad();
|
|
||||||
|
|
||||||
const bufferSizeInSeconds = 30;
|
|
||||||
const buffer =
|
|
||||||
new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);
|
|
||||||
|
|
||||||
const ai = new portAudio.AudioIO({
|
|
||||||
inOptions: {
|
|
||||||
channelCount: 1,
|
|
||||||
closeOnError: true, // Close the stream if an audio error is detected, if
|
|
||||||
// set false then just log the error
|
|
||||||
deviceId: -1, // Use -1 or omit the deviceId to select the default device
|
|
||||||
sampleFormat: portAudio.SampleFormatFloat32,
|
|
||||||
sampleRate: vad.config.sampleRate
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
let printed = false;
|
|
||||||
let index = 0;
|
|
||||||
ai.on('data', data => {
|
|
||||||
const windowSize = vad.config.sileroVad.windowSize;
|
|
||||||
buffer.push(new Float32Array(data.buffer));
|
|
||||||
while (buffer.size() > windowSize) {
|
|
||||||
const samples = buffer.get(buffer.head(), windowSize);
|
|
||||||
buffer.pop(windowSize);
|
|
||||||
vad.acceptWaveform(samples)
|
|
||||||
}
|
|
||||||
|
|
||||||
while (!vad.isEmpty()) {
|
|
||||||
const segment = vad.front();
|
|
||||||
vad.pop();
|
|
||||||
const stream = recognizer.createStream();
|
|
||||||
stream.acceptWaveform(
|
|
||||||
recognizer.config.featConfig.sampleRate, segment.samples);
|
|
||||||
recognizer.decode(stream);
|
|
||||||
const r = recognizer.getResult(stream);
|
|
||||||
stream.free();
|
|
||||||
if (r.text.length > 0) {
|
|
||||||
console.log(`${index}: ${r.text}`);
|
|
||||||
index += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
ai.on('close', () => {
|
|
||||||
console.log('Free resources');
|
|
||||||
recognizer.free();
|
|
||||||
vad.free();
|
|
||||||
buffer.free();
|
|
||||||
});
|
|
||||||
|
|
||||||
ai.start();
|
|
||||||
console.log('Started! Please speak')
|
|
||||||
@@ -1,102 +0,0 @@
|
|||||||
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
|
||||||
//
|
|
||||||
const sherpa_onnx = require('sherpa-onnx');
|
|
||||||
const portAudio = require('naudiodon2');
|
|
||||||
console.log(portAudio.getDevices());
|
|
||||||
|
|
||||||
function createOfflineRecognizer() {
|
|
||||||
const featConfig = new sherpa_onnx.FeatureConfig();
|
|
||||||
featConfig.sampleRate = 16000;
|
|
||||||
featConfig.featureDim = 80;
|
|
||||||
|
|
||||||
// test online recognizer
|
|
||||||
const whisper = new sherpa_onnx.OfflineWhisperModelConfig();
|
|
||||||
whisper.encoder = './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx';
|
|
||||||
whisper.decoder = './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx';
|
|
||||||
const tokens = './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt';
|
|
||||||
|
|
||||||
const modelConfig = new sherpa_onnx.OfflineModelConfig();
|
|
||||||
modelConfig.whisper = whisper;
|
|
||||||
modelConfig.tokens = tokens;
|
|
||||||
modelConfig.modelType = 'whisper';
|
|
||||||
|
|
||||||
const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();
|
|
||||||
recognizerConfig.featConfig = featConfig;
|
|
||||||
recognizerConfig.modelConfig = modelConfig;
|
|
||||||
recognizerConfig.decodingMethod = 'greedy_search';
|
|
||||||
|
|
||||||
const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);
|
|
||||||
return recognizer;
|
|
||||||
}
|
|
||||||
|
|
||||||
function createVad() {
|
|
||||||
const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig();
|
|
||||||
sileroVadModelConfig.model = './silero_vad.onnx';
|
|
||||||
sileroVadModelConfig.minSpeechDuration = 0.3; // seconds
|
|
||||||
sileroVadModelConfig.minSilenceDuration = 0.3; // seconds
|
|
||||||
sileroVadModelConfig.windowSize = 512;
|
|
||||||
|
|
||||||
const vadModelConfig = new sherpa_onnx.VadModelConfig();
|
|
||||||
vadModelConfig.sileroVad = sileroVadModelConfig;
|
|
||||||
vadModelConfig.sampleRate = 16000;
|
|
||||||
|
|
||||||
const bufferSizeInSeconds = 60;
|
|
||||||
const vad = new sherpa_onnx.VoiceActivityDetector(
|
|
||||||
vadModelConfig, bufferSizeInSeconds);
|
|
||||||
return vad;
|
|
||||||
}
|
|
||||||
|
|
||||||
const recognizer = createOfflineRecognizer();
|
|
||||||
const vad = createVad();
|
|
||||||
|
|
||||||
const bufferSizeInSeconds = 30;
|
|
||||||
const buffer =
|
|
||||||
new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);
|
|
||||||
|
|
||||||
const ai = new portAudio.AudioIO({
|
|
||||||
inOptions: {
|
|
||||||
channelCount: 1,
|
|
||||||
closeOnError: true, // Close the stream if an audio error is detected, if
|
|
||||||
// set false then just log the error
|
|
||||||
deviceId: -1, // Use -1 or omit the deviceId to select the default device
|
|
||||||
sampleFormat: portAudio.SampleFormatFloat32,
|
|
||||||
sampleRate: vad.config.sampleRate
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
let printed = false;
|
|
||||||
let index = 0;
|
|
||||||
ai.on('data', data => {
|
|
||||||
const windowSize = vad.config.sileroVad.windowSize;
|
|
||||||
buffer.push(new Float32Array(data.buffer));
|
|
||||||
while (buffer.size() > windowSize) {
|
|
||||||
const samples = buffer.get(buffer.head(), windowSize);
|
|
||||||
buffer.pop(windowSize);
|
|
||||||
vad.acceptWaveform(samples)
|
|
||||||
}
|
|
||||||
|
|
||||||
while (!vad.isEmpty()) {
|
|
||||||
const segment = vad.front();
|
|
||||||
vad.pop();
|
|
||||||
const stream = recognizer.createStream();
|
|
||||||
stream.acceptWaveform(
|
|
||||||
recognizer.config.featConfig.sampleRate, segment.samples);
|
|
||||||
recognizer.decode(stream);
|
|
||||||
const r = recognizer.getResult(stream);
|
|
||||||
stream.free();
|
|
||||||
if (r.text.length > 0) {
|
|
||||||
console.log(`${index}: ${r.text}`);
|
|
||||||
index += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
ai.on('close', () => {
|
|
||||||
console.log('Free resources');
|
|
||||||
recognizer.free();
|
|
||||||
vad.free();
|
|
||||||
buffer.free();
|
|
||||||
});
|
|
||||||
|
|
||||||
ai.start();
|
|
||||||
console.log('Started! Please speak')
|
|
||||||
@@ -1,74 +0,0 @@
|
|||||||
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
|
||||||
|
|
||||||
const sherpa_onnx = require('sherpa-onnx');
|
|
||||||
const portAudio = require('naudiodon2');
|
|
||||||
console.log(portAudio.getDevices());
|
|
||||||
|
|
||||||
function createVad() {
|
|
||||||
const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig();
|
|
||||||
sileroVadModelConfig.model = './silero_vad.onnx';
|
|
||||||
sileroVadModelConfig.minSpeechDuration = 0.3; // seconds
|
|
||||||
sileroVadModelConfig.minSilenceDuration = 0.3; // seconds
|
|
||||||
sileroVadModelConfig.windowSize = 512;
|
|
||||||
|
|
||||||
const vadModelConfig = new sherpa_onnx.VadModelConfig();
|
|
||||||
vadModelConfig.sileroVad = sileroVadModelConfig;
|
|
||||||
vadModelConfig.sampleRate = 16000;
|
|
||||||
|
|
||||||
const bufferSizeInSeconds = 60;
|
|
||||||
const vad = new sherpa_onnx.VoiceActivityDetector(
|
|
||||||
vadModelConfig, bufferSizeInSeconds);
|
|
||||||
return vad;
|
|
||||||
}
|
|
||||||
vad = createVad();
|
|
||||||
const bufferSizeInSeconds = 30;
|
|
||||||
const buffer =
|
|
||||||
new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);
|
|
||||||
|
|
||||||
const ai = new portAudio.AudioIO({
|
|
||||||
inOptions: {
|
|
||||||
channelCount: 1,
|
|
||||||
closeOnError: true, // Close the stream if an audio error is detected, if
|
|
||||||
// set false then just log the error
|
|
||||||
deviceId: -1, // Use -1 or omit the deviceId to select the default device
|
|
||||||
sampleFormat: portAudio.SampleFormatFloat32,
|
|
||||||
sampleRate: vad.config.sampleRate
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
let printed = false;
|
|
||||||
let index = 0;
|
|
||||||
ai.on('data', data => {
|
|
||||||
const windowSize = vad.config.sileroVad.windowSize;
|
|
||||||
buffer.push(new Float32Array(data.buffer));
|
|
||||||
while (buffer.size() > windowSize) {
|
|
||||||
const samples = buffer.get(buffer.head(), windowSize);
|
|
||||||
buffer.pop(windowSize);
|
|
||||||
vad.acceptWaveform(samples)
|
|
||||||
if (vad.isDetected() && !printed) {
|
|
||||||
console.log(`${index}: Detected speech`)
|
|
||||||
printed = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!vad.isDetected()) {
|
|
||||||
printed = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
while (!vad.isEmpty()) {
|
|
||||||
const segment = vad.front();
|
|
||||||
vad.pop();
|
|
||||||
const duration = segment.samples.length / vad.config.sampleRate;
|
|
||||||
console.log(`${index} End of speech. Duration: ${duration} seconds`);
|
|
||||||
index += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
ai.on('close', () => {
|
|
||||||
console.log('Free resources');
|
|
||||||
vad.free();
|
|
||||||
buffer.free();
|
|
||||||
});
|
|
||||||
|
|
||||||
ai.start();
|
|
||||||
console.log('Started! Please speak')
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
Language: JavaScript
|
|
||||||
JavaScriptQuotes: Double
|
|
||||||
|
|
||||||
@@ -7,3 +7,5 @@ It processes everything locally without accessing the Internet.
|
|||||||
Please refer to
|
Please refer to
|
||||||
https://github.com/k2-fsa/sherpa-onnx/tree/master/nodejs-examples
|
https://github.com/k2-fsa/sherpa-onnx/tree/master/nodejs-examples
|
||||||
for examples.
|
for examples.
|
||||||
|
|
||||||
|
You need Node >= 18 for this package.
|
||||||
|
|||||||
@@ -1,726 +1,26 @@
|
|||||||
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
//
|
'use strict'
|
||||||
// Please use
|
|
||||||
//
|
|
||||||
// npm install ffi-napi ref-struct-napi
|
|
||||||
//
|
|
||||||
// before you use this file
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// Please use node 13. node 16, 18, 20, and 21 are known not working.
|
|
||||||
// See also
|
|
||||||
// https://github.com/node-ffi-napi/node-ffi-napi/issues/244
|
|
||||||
// and
|
|
||||||
// https://github.com/node-ffi-napi/node-ffi-napi/issues/97
|
|
||||||
"use strict"
|
|
||||||
|
|
||||||
const debug = require("debug")("sherpa-onnx");
|
const wasmModule = require('./sherpa-onnx-wasm-nodejs.js')();
|
||||||
const os = require("os");
|
const sherpa_onnx_asr = require('./sherpa-onnx-asr.js');
|
||||||
const path = require("path");
|
const sherpa_onnx_tts = require('./sherpa-onnx-tts.js');
|
||||||
const ffi = require("ffi-napi");
|
|
||||||
const ref = require("ref-napi");
|
|
||||||
const fs = require("fs");
|
|
||||||
var ArrayType = require("ref-array-napi");
|
|
||||||
|
|
||||||
const FloatArray = ArrayType(ref.types.float);
|
function createOnlineRecognizer(config) {
|
||||||
const StructType = require("ref-struct-napi");
|
return sherpa_onnx_asr.createOnlineRecognizer(wasmModule, config);
|
||||||
const cstring = ref.types.CString;
|
|
||||||
const cstringPtr = ref.refType(cstring);
|
|
||||||
const int32_t = ref.types.int32;
|
|
||||||
const float = ref.types.float;
|
|
||||||
const floatPtr = ref.refType(float);
|
|
||||||
|
|
||||||
const SherpaOnnxOnlineTransducerModelConfig = StructType({
|
|
||||||
"encoder" : cstring,
|
|
||||||
"decoder" : cstring,
|
|
||||||
"joiner" : cstring,
|
|
||||||
});
|
|
||||||
|
|
||||||
const SherpaOnnxOnlineParaformerModelConfig = StructType({
|
|
||||||
"encoder" : cstring,
|
|
||||||
"decoder" : cstring,
|
|
||||||
});
|
|
||||||
|
|
||||||
const SherpaOnnxOnlineZipformer2CtcModelConfig = StructType({
|
|
||||||
"model" : cstring,
|
|
||||||
});
|
|
||||||
|
|
||||||
const SherpaOnnxOnlineModelConfig = StructType({
|
|
||||||
"transducer" : SherpaOnnxOnlineTransducerModelConfig,
|
|
||||||
"paraformer" : SherpaOnnxOnlineParaformerModelConfig,
|
|
||||||
"zipformer2Ctc" : SherpaOnnxOnlineZipformer2CtcModelConfig,
|
|
||||||
"tokens" : cstring,
|
|
||||||
"numThreads" : int32_t,
|
|
||||||
"provider" : cstring,
|
|
||||||
"debug" : int32_t,
|
|
||||||
"modelType" : cstring,
|
|
||||||
});
|
|
||||||
|
|
||||||
const SherpaOnnxFeatureConfig = StructType({
|
|
||||||
"sampleRate" : int32_t,
|
|
||||||
"featureDim" : int32_t,
|
|
||||||
});
|
|
||||||
|
|
||||||
const SherpaOnnxOnlineRecognizerConfig = StructType({
|
|
||||||
"featConfig" : SherpaOnnxFeatureConfig,
|
|
||||||
"modelConfig" : SherpaOnnxOnlineModelConfig,
|
|
||||||
"decodingMethod" : cstring,
|
|
||||||
"maxActivePaths" : int32_t,
|
|
||||||
"enableEndpoint" : int32_t,
|
|
||||||
"rule1MinTrailingSilence" : float,
|
|
||||||
"rule2MinTrailingSilence" : float,
|
|
||||||
"rule3MinUtteranceLength" : float,
|
|
||||||
"hotwordsFile" : cstring,
|
|
||||||
"hotwordsScore" : float,
|
|
||||||
});
|
|
||||||
|
|
||||||
const SherpaOnnxOnlineRecognizerResult = StructType({
|
|
||||||
"text" : cstring,
|
|
||||||
"tokens" : cstring,
|
|
||||||
"tokensArr" : cstringPtr,
|
|
||||||
"timestamps" : floatPtr,
|
|
||||||
"count" : int32_t,
|
|
||||||
"json" : cstring,
|
|
||||||
});
|
|
||||||
|
|
||||||
const SherpaOnnxOnlineRecognizerPtr = ref.refType(ref.types.void);
|
|
||||||
const SherpaOnnxOnlineStreamPtr = ref.refType(ref.types.void);
|
|
||||||
const SherpaOnnxOnlineStreamPtrPtr = ref.refType(SherpaOnnxOnlineStreamPtr);
|
|
||||||
const SherpaOnnxOnlineRecognizerResultPtr =
|
|
||||||
ref.refType(SherpaOnnxOnlineRecognizerResult);
|
|
||||||
|
|
||||||
const SherpaOnnxOnlineRecognizerConfigPtr =
|
|
||||||
ref.refType(SherpaOnnxOnlineRecognizerConfig);
|
|
||||||
|
|
||||||
const SherpaOnnxOfflineTransducerModelConfig = StructType({
|
|
||||||
"encoder" : cstring,
|
|
||||||
"decoder" : cstring,
|
|
||||||
"joiner" : cstring,
|
|
||||||
});
|
|
||||||
|
|
||||||
const SherpaOnnxOfflineParaformerModelConfig = StructType({
|
|
||||||
"model" : cstring,
|
|
||||||
});
|
|
||||||
|
|
||||||
const SherpaOnnxOfflineNemoEncDecCtcModelConfig = StructType({
|
|
||||||
"model" : cstring,
|
|
||||||
});
|
|
||||||
|
|
||||||
const SherpaOnnxOfflineWhisperModelConfig = StructType({
|
|
||||||
"encoder" : cstring,
|
|
||||||
"decoder" : cstring,
|
|
||||||
});
|
|
||||||
|
|
||||||
const SherpaOnnxOfflineTdnnModelConfig = StructType({
|
|
||||||
"model" : cstring,
|
|
||||||
});
|
|
||||||
|
|
||||||
const SherpaOnnxOfflineLMConfig = StructType({
|
|
||||||
"model" : cstring,
|
|
||||||
"scale" : float,
|
|
||||||
});
|
|
||||||
|
|
||||||
const SherpaOnnxOfflineModelConfig = StructType({
|
|
||||||
"transducer" : SherpaOnnxOfflineTransducerModelConfig,
|
|
||||||
"paraformer" : SherpaOnnxOfflineParaformerModelConfig,
|
|
||||||
"nemoCtc" : SherpaOnnxOfflineNemoEncDecCtcModelConfig,
|
|
||||||
"whisper" : SherpaOnnxOfflineWhisperModelConfig,
|
|
||||||
"tdnn" : SherpaOnnxOfflineTdnnModelConfig,
|
|
||||||
"tokens" : cstring,
|
|
||||||
"numThreads" : int32_t,
|
|
||||||
"debug" : int32_t,
|
|
||||||
"provider" : cstring,
|
|
||||||
"modelType" : cstring,
|
|
||||||
});
|
|
||||||
|
|
||||||
const SherpaOnnxOfflineRecognizerConfig = StructType({
|
|
||||||
"featConfig" : SherpaOnnxFeatureConfig,
|
|
||||||
"modelConfig" : SherpaOnnxOfflineModelConfig,
|
|
||||||
"lmConfig" : SherpaOnnxOfflineLMConfig,
|
|
||||||
"decodingMethod" : cstring,
|
|
||||||
"maxActivePaths" : int32_t,
|
|
||||||
"hotwordsFile" : cstring,
|
|
||||||
"hotwordsScore" : float,
|
|
||||||
});
|
|
||||||
|
|
||||||
const SherpaOnnxOfflineRecognizerResult = StructType({
|
|
||||||
"text" : cstring,
|
|
||||||
"timestamps" : floatPtr,
|
|
||||||
"count" : int32_t,
|
|
||||||
});
|
|
||||||
|
|
||||||
const SherpaOnnxOfflineRecognizerPtr = ref.refType(ref.types.void);
|
|
||||||
const SherpaOnnxOfflineStreamPtr = ref.refType(ref.types.void);
|
|
||||||
const SherpaOnnxOfflineStreamPtrPtr = ref.refType(SherpaOnnxOfflineStreamPtr);
|
|
||||||
const SherpaOnnxOfflineRecognizerResultPtr =
|
|
||||||
ref.refType(SherpaOnnxOfflineRecognizerResult);
|
|
||||||
|
|
||||||
const SherpaOnnxOfflineRecognizerConfigPtr =
|
|
||||||
ref.refType(SherpaOnnxOfflineRecognizerConfig);
|
|
||||||
|
|
||||||
// vad
|
|
||||||
const SherpaOnnxSileroVadModelConfig = StructType({
|
|
||||||
"model" : cstring,
|
|
||||||
"threshold" : float,
|
|
||||||
"minSilenceDuration" : float,
|
|
||||||
"minSpeechDuration" : float,
|
|
||||||
"windowSize" : int32_t,
|
|
||||||
});
|
|
||||||
|
|
||||||
const SherpaOnnxVadModelConfig = StructType({
|
|
||||||
"sileroVad" : SherpaOnnxSileroVadModelConfig,
|
|
||||||
"sampleRate" : int32_t,
|
|
||||||
"numThreads" : int32_t,
|
|
||||||
"provider" : cstring,
|
|
||||||
"debug" : int32_t,
|
|
||||||
});
|
|
||||||
|
|
||||||
const SherpaOnnxSpeechSegment = StructType({
|
|
||||||
"start" : int32_t,
|
|
||||||
"samples" : FloatArray,
|
|
||||||
"n" : int32_t,
|
|
||||||
});
|
|
||||||
|
|
||||||
const SherpaOnnxVadModelConfigPtr = ref.refType(SherpaOnnxVadModelConfig);
|
|
||||||
const SherpaOnnxSpeechSegmentPtr = ref.refType(SherpaOnnxSpeechSegment);
|
|
||||||
const SherpaOnnxCircularBufferPtr = ref.refType(ref.types.void);
|
|
||||||
const SherpaOnnxVoiceActivityDetectorPtr = ref.refType(ref.types.void);
|
|
||||||
|
|
||||||
// tts
|
|
||||||
const SherpaOnnxOfflineTtsVitsModelConfig = StructType({
|
|
||||||
"model" : cstring,
|
|
||||||
"lexicon" : cstring,
|
|
||||||
"tokens" : cstring,
|
|
||||||
"dataDir" : cstring,
|
|
||||||
"noiseScale" : float,
|
|
||||||
"noiseScaleW" : float,
|
|
||||||
"lengthScale" : float,
|
|
||||||
});
|
|
||||||
|
|
||||||
const SherpaOnnxOfflineTtsModelConfig = StructType({
|
|
||||||
"vits" : SherpaOnnxOfflineTtsVitsModelConfig,
|
|
||||||
"numThreads" : int32_t,
|
|
||||||
"debug" : int32_t,
|
|
||||||
"provider" : cstring,
|
|
||||||
});
|
|
||||||
|
|
||||||
const SherpaOnnxOfflineTtsConfig = StructType({
|
|
||||||
"model" : SherpaOnnxOfflineTtsModelConfig,
|
|
||||||
"ruleFsts" : cstring,
|
|
||||||
"maxNumSentences" : int32_t,
|
|
||||||
});
|
|
||||||
|
|
||||||
const SherpaOnnxGeneratedAudio = StructType({
|
|
||||||
"samples" : FloatArray,
|
|
||||||
"n" : int32_t,
|
|
||||||
"sampleRate" : int32_t,
|
|
||||||
});
|
|
||||||
|
|
||||||
const SherpaOnnxOfflineTtsVitsModelConfigPtr =
|
|
||||||
ref.refType(SherpaOnnxOfflineTtsVitsModelConfig);
|
|
||||||
const SherpaOnnxOfflineTtsConfigPtr = ref.refType(SherpaOnnxOfflineTtsConfig);
|
|
||||||
const SherpaOnnxGeneratedAudioPtr = ref.refType(SherpaOnnxGeneratedAudio);
|
|
||||||
const SherpaOnnxOfflineTtsPtr = ref.refType(ref.types.void);
|
|
||||||
|
|
||||||
const SherpaOnnxDisplayPtr = ref.refType(ref.types.void);
|
|
||||||
|
|
||||||
let soname;
|
|
||||||
if (os.platform() == "win32") {
|
|
||||||
// see https://nodejs.org/api/process.html#processarch
|
|
||||||
if (process.arch == "x64") {
|
|
||||||
let currentPath = process.env.Path;
|
|
||||||
let dllDirectory = path.resolve(path.join(__dirname, "lib", "win-x64"));
|
|
||||||
process.env.Path = currentPath + path.delimiter + dllDirectory;
|
|
||||||
|
|
||||||
soname = path.join(__dirname, "lib", "win-x64", "sherpa-onnx-c-api.dll")
|
|
||||||
} else if (process.arch == "ia32") {
|
|
||||||
let currentPath = process.env.Path;
|
|
||||||
let dllDirectory = path.resolve(path.join(__dirname, "lib", "win-x86"));
|
|
||||||
process.env.Path = currentPath + path.delimiter + dllDirectory;
|
|
||||||
|
|
||||||
soname = path.join(__dirname, "lib", "win-x86", "sherpa-onnx-c-api.dll")
|
|
||||||
} else {
|
|
||||||
throw new Error(
|
|
||||||
`Support only Windows x86 and x64 for now. Given ${process.arch}`);
|
|
||||||
}
|
|
||||||
} else if (os.platform() == "darwin") {
|
|
||||||
if (process.arch == "x64") {
|
|
||||||
soname =
|
|
||||||
path.join(__dirname, "lib", "osx-x64", "libsherpa-onnx-c-api.dylib");
|
|
||||||
} else if (process.arch == "arm64") {
|
|
||||||
soname =
|
|
||||||
path.join(__dirname, "lib", "osx-arm64", "libsherpa-onnx-c-api.dylib");
|
|
||||||
} else {
|
|
||||||
throw new Error(
|
|
||||||
`Support only macOS x64 and arm64 for now. Given ${process.arch}`);
|
|
||||||
}
|
|
||||||
} else if (os.platform() == "linux") {
|
|
||||||
if (process.arch == "x64") {
|
|
||||||
soname =
|
|
||||||
path.join(__dirname, "lib", "linux-x64", "libsherpa-onnx-c-api.so");
|
|
||||||
} else {
|
|
||||||
throw new Error(`Support only Linux x64 for now. Given ${process.arch}`);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
throw new Error(`Unsupported platform ${os.platform()}`);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!fs.existsSync(soname)) {
|
function createOfflineRecognizer(config) {
|
||||||
throw new Error(`Cannot find file ${soname}. Please make sure you have run
|
return new sherpa_onnx_asr.OfflineRecognizer(config, wasmModule);
|
||||||
./build.sh`);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
debug("soname ", soname)
|
function createOfflineTts(config) {
|
||||||
|
return sherpa_onnx_tts.createOfflineTts(wasmModule, config);
|
||||||
const libsherpa_onnx = ffi.Library(soname, {
|
}
|
||||||
// online asr
|
|
||||||
"CreateOnlineRecognizer" : [
|
|
||||||
SherpaOnnxOnlineRecognizerPtr, [ SherpaOnnxOnlineRecognizerConfigPtr ]
|
|
||||||
],
|
|
||||||
"DestroyOnlineRecognizer" : [ "void", [ SherpaOnnxOnlineRecognizerPtr ] ],
|
|
||||||
"CreateOnlineStream" :
|
|
||||||
[ SherpaOnnxOnlineStreamPtr, [ SherpaOnnxOnlineRecognizerPtr ] ],
|
|
||||||
"CreateOnlineStreamWithHotwords" :
|
|
||||||
[ SherpaOnnxOnlineStreamPtr, [ SherpaOnnxOnlineRecognizerPtr, cstring ] ],
|
|
||||||
"DestroyOnlineStream" : [ "void", [ SherpaOnnxOnlineStreamPtr ] ],
|
|
||||||
"AcceptWaveform" :
|
|
||||||
[ "void", [ SherpaOnnxOnlineStreamPtr, int32_t, floatPtr, int32_t ] ],
|
|
||||||
"IsOnlineStreamReady" :
|
|
||||||
[ int32_t, [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ] ],
|
|
||||||
"DecodeOnlineStream" :
|
|
||||||
[ "void", [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ] ],
|
|
||||||
"DecodeMultipleOnlineStreams" : [
|
|
||||||
"void",
|
|
||||||
[ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtrPtr, int32_t ]
|
|
||||||
],
|
|
||||||
"GetOnlineStreamResult" : [
|
|
||||||
SherpaOnnxOnlineRecognizerResultPtr,
|
|
||||||
[ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ]
|
|
||||||
],
|
|
||||||
"DestroyOnlineRecognizerResult" :
|
|
||||||
[ "void", [ SherpaOnnxOnlineRecognizerResultPtr ] ],
|
|
||||||
"Reset" :
|
|
||||||
[ "void", [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ] ],
|
|
||||||
"InputFinished" : [ "void", [ SherpaOnnxOnlineStreamPtr ] ],
|
|
||||||
"IsEndpoint" :
|
|
||||||
[ int32_t, [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ] ],
|
|
||||||
|
|
||||||
// offline asr
|
|
||||||
"CreateOfflineRecognizer" : [
|
|
||||||
SherpaOnnxOfflineRecognizerPtr, [ SherpaOnnxOfflineRecognizerConfigPtr ]
|
|
||||||
],
|
|
||||||
"DestroyOfflineRecognizer" : [ "void", [ SherpaOnnxOfflineRecognizerPtr ] ],
|
|
||||||
"CreateOfflineStream" :
|
|
||||||
[ SherpaOnnxOfflineStreamPtr, [ SherpaOnnxOfflineRecognizerPtr ] ],
|
|
||||||
"DestroyOfflineStream" : [ "void", [ SherpaOnnxOfflineStreamPtr ] ],
|
|
||||||
"AcceptWaveformOffline" :
|
|
||||||
[ "void", [ SherpaOnnxOfflineStreamPtr, int32_t, floatPtr, int32_t ] ],
|
|
||||||
"DecodeOfflineStream" : [
|
|
||||||
"void", [ SherpaOnnxOfflineRecognizerPtr, SherpaOnnxOfflineStreamPtr ]
|
|
||||||
],
|
|
||||||
"DecodeMultipleOfflineStreams" : [
|
|
||||||
"void",
|
|
||||||
[ SherpaOnnxOfflineRecognizerPtr, SherpaOnnxOfflineStreamPtrPtr, int32_t ]
|
|
||||||
],
|
|
||||||
"GetOfflineStreamResult" :
|
|
||||||
[ SherpaOnnxOfflineRecognizerResultPtr, [ SherpaOnnxOfflineStreamPtr ] ],
|
|
||||||
"DestroyOfflineRecognizerResult" :
|
|
||||||
[ "void", [ SherpaOnnxOfflineRecognizerResultPtr ] ],
|
|
||||||
|
|
||||||
// vad
|
|
||||||
"SherpaOnnxCreateCircularBuffer" :
|
|
||||||
[ SherpaOnnxCircularBufferPtr, [ int32_t ] ],
|
|
||||||
"SherpaOnnxDestroyCircularBuffer" :
|
|
||||||
[ "void", [ SherpaOnnxCircularBufferPtr ] ],
|
|
||||||
"SherpaOnnxCircularBufferPush" :
|
|
||||||
[ "void", [ SherpaOnnxCircularBufferPtr, floatPtr, int32_t ] ],
|
|
||||||
"SherpaOnnxCircularBufferGet" :
|
|
||||||
[ FloatArray, [ SherpaOnnxCircularBufferPtr, int32_t, int32_t ] ],
|
|
||||||
"SherpaOnnxCircularBufferFree" : [ "void", [ FloatArray ] ],
|
|
||||||
"SherpaOnnxCircularBufferPop" :
|
|
||||||
[ "void", [ SherpaOnnxCircularBufferPtr, int32_t ] ],
|
|
||||||
"SherpaOnnxCircularBufferSize" : [ int32_t, [ SherpaOnnxCircularBufferPtr ] ],
|
|
||||||
"SherpaOnnxCircularBufferHead" : [ int32_t, [ SherpaOnnxCircularBufferPtr ] ],
|
|
||||||
"SherpaOnnxCircularBufferReset" : [ "void", [ SherpaOnnxCircularBufferPtr ] ],
|
|
||||||
"SherpaOnnxCreateVoiceActivityDetector" : [
|
|
||||||
SherpaOnnxVoiceActivityDetectorPtr, [ SherpaOnnxVadModelConfigPtr, float ]
|
|
||||||
],
|
|
||||||
"SherpaOnnxDestroyVoiceActivityDetector" :
|
|
||||||
[ "void", [ SherpaOnnxVoiceActivityDetectorPtr ] ],
|
|
||||||
"SherpaOnnxVoiceActivityDetectorAcceptWaveform" :
|
|
||||||
[ "void", [ SherpaOnnxVoiceActivityDetectorPtr, floatPtr, int32_t ] ],
|
|
||||||
"SherpaOnnxVoiceActivityDetectorEmpty" :
|
|
||||||
[ int32_t, [ SherpaOnnxVoiceActivityDetectorPtr ] ],
|
|
||||||
"SherpaOnnxVoiceActivityDetectorDetected" :
|
|
||||||
[ int32_t, [ SherpaOnnxVoiceActivityDetectorPtr ] ],
|
|
||||||
"SherpaOnnxVoiceActivityDetectorPop" :
|
|
||||||
[ "void", [ SherpaOnnxVoiceActivityDetectorPtr ] ],
|
|
||||||
"SherpaOnnxVoiceActivityDetectorClear" :
|
|
||||||
[ "void", [ SherpaOnnxVoiceActivityDetectorPtr ] ],
|
|
||||||
"SherpaOnnxVoiceActivityDetectorFront" :
|
|
||||||
[ SherpaOnnxSpeechSegmentPtr, [ SherpaOnnxVoiceActivityDetectorPtr ] ],
|
|
||||||
"SherpaOnnxDestroySpeechSegment" : [ "void", [ SherpaOnnxSpeechSegmentPtr ] ],
|
|
||||||
"SherpaOnnxVoiceActivityDetectorReset" :
|
|
||||||
[ "void", [ SherpaOnnxVoiceActivityDetectorPtr ] ],
|
|
||||||
// tts
|
|
||||||
"SherpaOnnxCreateOfflineTts" :
|
|
||||||
[ SherpaOnnxOfflineTtsPtr, [ SherpaOnnxOfflineTtsConfigPtr ] ],
|
|
||||||
"SherpaOnnxDestroyOfflineTts" : [ "void", [ SherpaOnnxOfflineTtsPtr ] ],
|
|
||||||
"SherpaOnnxOfflineTtsGenerate" : [
|
|
||||||
SherpaOnnxGeneratedAudioPtr,
|
|
||||||
[ SherpaOnnxOfflineTtsPtr, cstring, int32_t, float ]
|
|
||||||
],
|
|
||||||
"SherpaOnnxDestroyOfflineTtsGeneratedAudio" :
|
|
||||||
[ "void", [ SherpaOnnxGeneratedAudioPtr ] ],
|
|
||||||
"SherpaOnnxWriteWave" : [ "void", [ floatPtr, int32_t, int32_t, cstring ] ],
|
|
||||||
|
|
||||||
// display
|
|
||||||
"CreateDisplay" : [ SherpaOnnxDisplayPtr, [ int32_t ] ],
|
|
||||||
"DestroyDisplay" : [ "void", [ SherpaOnnxDisplayPtr ] ],
|
|
||||||
"SherpaOnnxPrint" : [ "void", [ SherpaOnnxDisplayPtr, int32_t, cstring ] ],
|
|
||||||
});
|
|
||||||
|
|
||||||
class Display {
|
|
||||||
constructor(maxWordPerLine) {
|
|
||||||
this.handle = libsherpa_onnx.CreateDisplay(maxWordPerLine);
|
|
||||||
}
|
|
||||||
free() {
|
|
||||||
if (this.handle) {
|
|
||||||
libsherpa_onnx.DestroyDisplay(this.handle);
|
|
||||||
this.handle = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
print(idx, s) { libsherpa_onnx.SherpaOnnxPrint(this.handle, idx, s); }
|
|
||||||
};
|
|
||||||
|
|
||||||
class OnlineResult {
|
|
||||||
constructor(text) { this.text = Buffer.from(text, "utf-8").toString(); }
|
|
||||||
};
|
|
||||||
|
|
||||||
class OnlineStream {
|
|
||||||
constructor(handle) { this.handle = handle }
|
|
||||||
|
|
||||||
free() {
|
|
||||||
if (this.handle) {
|
|
||||||
libsherpa_onnx.DestroyOnlineStream(this.handle);
|
|
||||||
this.handle = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param sampleRate {Number}
|
|
||||||
* @param samples {Float32Array} Containing samples in the range [-1, 1]
|
|
||||||
*/
|
|
||||||
acceptWaveform(sampleRate, samples) {
|
|
||||||
libsherpa_onnx.AcceptWaveform(this.handle, sampleRate, samples,
|
|
||||||
samples.length);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
class OnlineRecognizer {
|
|
||||||
constructor(config) {
|
|
||||||
this.config = config;
|
|
||||||
this.recognizer_handle =
|
|
||||||
libsherpa_onnx.CreateOnlineRecognizer(config.ref());
|
|
||||||
}
|
|
||||||
|
|
||||||
free() {
|
|
||||||
if (this.recognizer_handle) {
|
|
||||||
libsherpa_onnx.DestroyOnlineRecognizer(this.recognizer_handle);
|
|
||||||
this.recognizer_handle = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
createStream() {
|
|
||||||
let handle = libsherpa_onnx.CreateOnlineStream(this.recognizer_handle);
|
|
||||||
return new OnlineStream(handle);
|
|
||||||
}
|
|
||||||
|
|
||||||
isReady(stream) {
|
|
||||||
return libsherpa_onnx.IsOnlineStreamReady(this.recognizer_handle,
|
|
||||||
stream.handle)
|
|
||||||
}
|
|
||||||
|
|
||||||
isEndpoint(stream) {
|
|
||||||
return libsherpa_onnx.IsEndpoint(this.recognizer_handle, stream.handle);
|
|
||||||
}
|
|
||||||
|
|
||||||
reset(stream) { libsherpa_onnx.Reset(this.recognizer_handle, stream.handle); }
|
|
||||||
|
|
||||||
decode(stream) {
|
|
||||||
libsherpa_onnx.DecodeOnlineStream(this.recognizer_handle, stream.handle)
|
|
||||||
}
|
|
||||||
|
|
||||||
getResult(stream) {
|
|
||||||
let handle = libsherpa_onnx.GetOnlineStreamResult(this.recognizer_handle,
|
|
||||||
stream.handle);
|
|
||||||
let r = handle.deref();
|
|
||||||
let ans = new OnlineResult(r.text);
|
|
||||||
libsherpa_onnx.DestroyOnlineRecognizerResult(handle);
|
|
||||||
|
|
||||||
return ans
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
class OfflineResult {
|
|
||||||
constructor(text) { this.text = Buffer.from(text, "utf-8").toString(); }
|
|
||||||
};
|
|
||||||
|
|
||||||
class OfflineStream {
|
|
||||||
constructor(handle) { this.handle = handle }
|
|
||||||
|
|
||||||
free() {
|
|
||||||
if (this.handle) {
|
|
||||||
libsherpa_onnx.DestroyOfflineStream(this.handle);
|
|
||||||
this.handle = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param sampleRate {Number}
|
|
||||||
* @param samples {Float32Array} Containing samples in the range [-1, 1]
|
|
||||||
*/
|
|
||||||
acceptWaveform(sampleRate, samples) {
|
|
||||||
libsherpa_onnx.AcceptWaveformOffline(this.handle, sampleRate, samples,
|
|
||||||
samples.length);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
class OfflineRecognizer {
|
|
||||||
constructor(config) {
|
|
||||||
this.config = config;
|
|
||||||
this.recognizer_handle =
|
|
||||||
libsherpa_onnx.CreateOfflineRecognizer(config.ref());
|
|
||||||
}
|
|
||||||
|
|
||||||
free() {
|
|
||||||
if (this.recognizer_handle) {
|
|
||||||
libsherpa_onnx.DestroyOfflineRecognizer(this.recognizer_handle);
|
|
||||||
this.recognizer_handle = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
createStream() {
|
|
||||||
let handle = libsherpa_onnx.CreateOfflineStream(this.recognizer_handle);
|
|
||||||
return new OfflineStream(handle);
|
|
||||||
}
|
|
||||||
|
|
||||||
decode(stream) {
|
|
||||||
libsherpa_onnx.DecodeOfflineStream(this.recognizer_handle, stream.handle)
|
|
||||||
}
|
|
||||||
|
|
||||||
getResult(stream) {
|
|
||||||
let handle = libsherpa_onnx.GetOfflineStreamResult(stream.handle);
|
|
||||||
let r = handle.deref();
|
|
||||||
let ans = new OfflineResult(r.text);
|
|
||||||
libsherpa_onnx.DestroyOfflineRecognizerResult(handle);
|
|
||||||
|
|
||||||
return ans
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
class SpeechSegment {
|
|
||||||
constructor(start, samples) {
|
|
||||||
this.start = start;
|
|
||||||
this.samples = samples;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// this buffer holds only float entries.
|
|
||||||
class CircularBuffer {
|
|
||||||
/**
|
|
||||||
* @param capacity {int} The capacity of the circular buffer.
|
|
||||||
*/
|
|
||||||
constructor(capacity) {
|
|
||||||
this.handle = libsherpa_onnx.SherpaOnnxCreateCircularBuffer(capacity);
|
|
||||||
}
|
|
||||||
|
|
||||||
free() {
|
|
||||||
if (this.handle) {
|
|
||||||
libsherpa_onnx.SherpaOnnxDestroyCircularBuffer(this.handle);
|
|
||||||
this.handle = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param samples {Float32Array}
|
|
||||||
*/
|
|
||||||
push(samples) {
|
|
||||||
libsherpa_onnx.SherpaOnnxCircularBufferPush(this.handle, samples,
|
|
||||||
samples.length);
|
|
||||||
}
|
|
||||||
|
|
||||||
get(startIndex, n) {
|
|
||||||
let data =
|
|
||||||
libsherpa_onnx.SherpaOnnxCircularBufferGet(this.handle, startIndex, n);
|
|
||||||
|
|
||||||
// https://tootallnate.github.io/ref/#exports-reinterpret
|
|
||||||
const buffer = data.buffer.reinterpret(n * ref.sizeof.float).buffer;
|
|
||||||
|
|
||||||
// create a copy since we are going to free the buffer at the end
|
|
||||||
let s = new Float32Array(buffer).slice(0);
|
|
||||||
libsherpa_onnx.SherpaOnnxCircularBufferFree(data);
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
pop(n) { libsherpa_onnx.SherpaOnnxCircularBufferPop(this.handle, n); }
|
|
||||||
|
|
||||||
size() { return libsherpa_onnx.SherpaOnnxCircularBufferSize(this.handle); }
|
|
||||||
|
|
||||||
head() { return libsherpa_onnx.SherpaOnnxCircularBufferHead(this.handle); }
|
|
||||||
|
|
||||||
reset() { libsherpa_onnx.SherpaOnnxCircularBufferReset(this.handle); }
|
|
||||||
};
|
|
||||||
|
|
||||||
class VoiceActivityDetector {
|
|
||||||
constructor(config, bufferSizeInSeconds) {
|
|
||||||
this.config = config;
|
|
||||||
this.handle = libsherpa_onnx.SherpaOnnxCreateVoiceActivityDetector(
|
|
||||||
config.ref(), bufferSizeInSeconds);
|
|
||||||
}
|
|
||||||
|
|
||||||
free() {
|
|
||||||
if (this.handle) {
|
|
||||||
libsherpa_onnx.SherpaOnnxDestroyVoiceActivityDetector(this.handle);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
acceptWaveform(samples) {
|
|
||||||
libsherpa_onnx.SherpaOnnxVoiceActivityDetectorAcceptWaveform(
|
|
||||||
this.handle, samples, samples.length);
|
|
||||||
}
|
|
||||||
|
|
||||||
isEmpty() {
|
|
||||||
return libsherpa_onnx.SherpaOnnxVoiceActivityDetectorEmpty(this.handle);
|
|
||||||
}
|
|
||||||
|
|
||||||
isDetected() {
|
|
||||||
return libsherpa_onnx.SherpaOnnxVoiceActivityDetectorDetected(this.handle);
|
|
||||||
}
|
|
||||||
pop() { libsherpa_onnx.SherpaOnnxVoiceActivityDetectorPop(this.handle); }
|
|
||||||
|
|
||||||
clear() { libsherpa_onnx.SherpaOnnxVoiceActivityDetectorClear(this.handle); }
|
|
||||||
|
|
||||||
reset() { libsherpa_onnx.SherpaOnnxVoiceActivityDetectorReset(this.handle); }
|
|
||||||
|
|
||||||
front() {
|
|
||||||
let segment =
|
|
||||||
libsherpa_onnx.SherpaOnnxVoiceActivityDetectorFront(this.handle);
|
|
||||||
|
|
||||||
let buffer =
|
|
||||||
segment.deref()
|
|
||||||
.samples.buffer.reinterpret(segment.deref().n * ref.sizeof.float)
|
|
||||||
.buffer;
|
|
||||||
|
|
||||||
let samples = new Float32Array(buffer).slice(0);
|
|
||||||
let ans = new SpeechSegment(segment.deref().start, samples);
|
|
||||||
|
|
||||||
libsherpa_onnx.SherpaOnnxDestroySpeechSegment(segment);
|
|
||||||
return ans;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
class GeneratedAudio {
|
|
||||||
constructor(sampleRate, samples) {
|
|
||||||
this.sampleRate = sampleRate;
|
|
||||||
this.samples = samples;
|
|
||||||
}
|
|
||||||
save(filename) {
|
|
||||||
libsherpa_onnx.SherpaOnnxWriteWave(this.samples, this.samples.length,
|
|
||||||
this.sampleRate, filename);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
class OfflineTts {
|
|
||||||
constructor(config) {
|
|
||||||
this.config = config;
|
|
||||||
this.handle = libsherpa_onnx.SherpaOnnxCreateOfflineTts(config.ref());
|
|
||||||
}
|
|
||||||
|
|
||||||
free() {
|
|
||||||
if (this.handle) {
|
|
||||||
libsherpa_onnx.SherpaOnnxDestroyOfflineTts(this.handle);
|
|
||||||
this.handle = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
generate(text, sid, speed) {
|
|
||||||
let r = libsherpa_onnx.SherpaOnnxOfflineTtsGenerate(this.handle, text, sid,
|
|
||||||
speed);
|
|
||||||
const buffer =
|
|
||||||
r.deref()
|
|
||||||
.samples.buffer.reinterpret(r.deref().n * ref.sizeof.float)
|
|
||||||
.buffer;
|
|
||||||
let samples = new Float32Array(buffer).slice(0);
|
|
||||||
let sampleRate = r.deref().sampleRate;
|
|
||||||
|
|
||||||
let generatedAudio = new GeneratedAudio(sampleRate, samples);
|
|
||||||
|
|
||||||
libsherpa_onnx.SherpaOnnxDestroyOfflineTtsGeneratedAudio(r);
|
|
||||||
|
|
||||||
return generatedAudio;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// online asr
|
|
||||||
const OnlineTransducerModelConfig = SherpaOnnxOnlineTransducerModelConfig;
|
|
||||||
const OnlineModelConfig = SherpaOnnxOnlineModelConfig;
|
|
||||||
const FeatureConfig = SherpaOnnxFeatureConfig;
|
|
||||||
const OnlineRecognizerConfig = SherpaOnnxOnlineRecognizerConfig;
|
|
||||||
const OnlineParaformerModelConfig = SherpaOnnxOnlineParaformerModelConfig;
|
|
||||||
const OnlineZipformer2CtcModelConfig = SherpaOnnxOnlineZipformer2CtcModelConfig;
|
|
||||||
|
|
||||||
// offline asr
|
|
||||||
const OfflineTransducerModelConfig = SherpaOnnxOfflineTransducerModelConfig;
|
|
||||||
const OfflineModelConfig = SherpaOnnxOfflineModelConfig;
|
|
||||||
const OfflineRecognizerConfig = SherpaOnnxOfflineRecognizerConfig;
|
|
||||||
const OfflineParaformerModelConfig = SherpaOnnxOfflineParaformerModelConfig;
|
|
||||||
const OfflineWhisperModelConfig = SherpaOnnxOfflineWhisperModelConfig;
|
|
||||||
const OfflineNemoEncDecCtcModelConfig =
|
|
||||||
SherpaOnnxOfflineNemoEncDecCtcModelConfig;
|
|
||||||
const OfflineTdnnModelConfig = SherpaOnnxOfflineTdnnModelConfig;
|
|
||||||
|
|
||||||
// vad
|
|
||||||
const SileroVadModelConfig = SherpaOnnxSileroVadModelConfig;
|
|
||||||
const VadModelConfig = SherpaOnnxVadModelConfig;
|
|
||||||
|
|
||||||
// tts
|
|
||||||
const OfflineTtsVitsModelConfig = SherpaOnnxOfflineTtsVitsModelConfig;
|
|
||||||
const OfflineTtsModelConfig = SherpaOnnxOfflineTtsModelConfig;
|
|
||||||
const OfflineTtsConfig = SherpaOnnxOfflineTtsConfig;
|
|
||||||
|
|
||||||
|
// Note: online means streaming and offline means non-streaming here.
|
||||||
|
// Both of them don't require internet connection.
|
||||||
module.exports = {
|
module.exports = {
|
||||||
// online asr
|
createOnlineRecognizer,
|
||||||
OnlineTransducerModelConfig,
|
createOfflineRecognizer,
|
||||||
OnlineModelConfig,
|
createOfflineTts,
|
||||||
FeatureConfig,
|
|
||||||
OnlineRecognizerConfig,
|
|
||||||
OnlineRecognizer,
|
|
||||||
OnlineStream,
|
|
||||||
OnlineParaformerModelConfig,
|
|
||||||
OnlineZipformer2CtcModelConfig,
|
|
||||||
|
|
||||||
// offline asr
|
|
||||||
OfflineRecognizer,
|
|
||||||
OfflineStream,
|
|
||||||
OfflineTransducerModelConfig,
|
|
||||||
OfflineModelConfig,
|
|
||||||
OfflineRecognizerConfig,
|
|
||||||
OfflineParaformerModelConfig,
|
|
||||||
OfflineWhisperModelConfig,
|
|
||||||
OfflineNemoEncDecCtcModelConfig,
|
|
||||||
OfflineTdnnModelConfig,
|
|
||||||
// vad
|
|
||||||
SileroVadModelConfig,
|
|
||||||
VadModelConfig,
|
|
||||||
CircularBuffer,
|
|
||||||
VoiceActivityDetector,
|
|
||||||
// tts
|
|
||||||
OfflineTtsVitsModelConfig,
|
|
||||||
OfflineTtsModelConfig,
|
|
||||||
OfflineTtsConfig,
|
|
||||||
OfflineTts,
|
|
||||||
|
|
||||||
//
|
|
||||||
Display,
|
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"name": "sherpa-onnx2",
|
"name": "sherpa-onnx",
|
||||||
"version": "1.8.10",
|
"version": "SHERPA_ONNX_VERSION",
|
||||||
"description": "Real-time speech recognition with Next-gen Kaldi",
|
"description": "Speech-to-text and text-to-speech using Next-gen Kaldi without internet connection",
|
||||||
"main": "index.js",
|
"main": "index.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"test": "echo \"Error: no test specified\" && exit 1"
|
"test": "echo \"Error: no test specified\" && exit 1"
|
||||||
@@ -11,15 +11,30 @@
|
|||||||
"url": "git+https://github.com/k2-fsa/sherpa-onnx.git"
|
"url": "git+https://github.com/k2-fsa/sherpa-onnx.git"
|
||||||
},
|
},
|
||||||
"keywords": [
|
"keywords": [
|
||||||
"speech-to-text",
|
"speech to text",
|
||||||
"text-to-speech",
|
"text to speech",
|
||||||
|
"transcription",
|
||||||
"real-time speech recognition",
|
"real-time speech recognition",
|
||||||
"without internet connect",
|
"without internet connection",
|
||||||
"embedded systems",
|
"embedded systems",
|
||||||
"open source",
|
"open source",
|
||||||
"zipformer",
|
"zipformer",
|
||||||
"asr",
|
"asr",
|
||||||
"speech"
|
"tts",
|
||||||
|
"stt",
|
||||||
|
"c++",
|
||||||
|
"onnxruntime",
|
||||||
|
"onnx",
|
||||||
|
"ai",
|
||||||
|
"next-gen kaldi",
|
||||||
|
"offline",
|
||||||
|
"privacy",
|
||||||
|
"open source",
|
||||||
|
"streaming speech recognition",
|
||||||
|
"speech",
|
||||||
|
"recognition",
|
||||||
|
"WebAssembly",
|
||||||
|
"wasm"
|
||||||
],
|
],
|
||||||
"author": "The next-gen Kaldi team",
|
"author": "The next-gen Kaldi team",
|
||||||
"license": "Apache-2.0",
|
"license": "Apache-2.0",
|
||||||
@@ -28,10 +43,5 @@
|
|||||||
},
|
},
|
||||||
"homepage": "https://github.com/k2-fsa/sherpa-onnx#readme",
|
"homepage": "https://github.com/k2-fsa/sherpa-onnx#readme",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"ffi-napi": "^4.0.3",
|
|
||||||
"npm": "^6.14.18",
|
|
||||||
"ref-array-napi": "^1.2.2",
|
|
||||||
"ref-napi": "^3.0.3",
|
|
||||||
"ref-struct-napi": "^1.1.1"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,50 +0,0 @@
|
|||||||
{
|
|
||||||
"name": "sherpa-onnx",
|
|
||||||
"version": "SHERPA_ONNX_VERSION",
|
|
||||||
"description": "Real-time speech recognition with Next-gen Kaldi",
|
|
||||||
"main": "index.js",
|
|
||||||
"scripts": {
|
|
||||||
"test": "echo \"Error: no test specified\" && exit 1"
|
|
||||||
},
|
|
||||||
"repository": {
|
|
||||||
"type": "git",
|
|
||||||
"url": "git+https://github.com/k2-fsa/sherpa-onnx.git"
|
|
||||||
},
|
|
||||||
"keywords": [
|
|
||||||
"speech to text",
|
|
||||||
"text to speech",
|
|
||||||
"transcription",
|
|
||||||
"real-time speech recognition",
|
|
||||||
"without internet connect",
|
|
||||||
"embedded systems",
|
|
||||||
"open source",
|
|
||||||
"zipformer",
|
|
||||||
"asr",
|
|
||||||
"tts",
|
|
||||||
"stt",
|
|
||||||
"c++",
|
|
||||||
"onnxruntime",
|
|
||||||
"onnx",
|
|
||||||
"ai",
|
|
||||||
"next-gen kaldi",
|
|
||||||
"offline",
|
|
||||||
"privacy",
|
|
||||||
"open source",
|
|
||||||
"streaming speech recognition",
|
|
||||||
"speech",
|
|
||||||
"recognition"
|
|
||||||
],
|
|
||||||
"author": "The next-gen Kaldi team",
|
|
||||||
"license": "Apache-2.0",
|
|
||||||
"bugs": {
|
|
||||||
"url": "https://github.com/k2-fsa/sherpa-onnx/issues"
|
|
||||||
},
|
|
||||||
"homepage": "https://github.com/k2-fsa/sherpa-onnx#readme",
|
|
||||||
"dependencies": {
|
|
||||||
"ffi-napi": "^4.0.3",
|
|
||||||
"npm": "^6.14.18",
|
|
||||||
"ref-array-napi": "^1.2.2",
|
|
||||||
"ref-napi": "^3.0.3",
|
|
||||||
"ref-struct-napi": "^1.1.1"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,123 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
set -ex
|
|
||||||
|
|
||||||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
|
||||||
SHERPA_ONNX_DIR=$(realpath $SCRIPT_DIR/../..)
|
|
||||||
echo "SCRIPT_DIR: $SCRIPT_DIR"
|
|
||||||
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
|
|
||||||
|
|
||||||
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" $SHERPA_ONNX_DIR/CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
|
|
||||||
|
|
||||||
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
|
|
||||||
sed -i.bak s/SHERPA_ONNX_VERSION/$SHERPA_ONNX_VERSION/g ./package.json.in
|
|
||||||
|
|
||||||
cp package.json.in package.json
|
|
||||||
rm package.json.in
|
|
||||||
rm package.json.in.bak
|
|
||||||
rm .clang-format
|
|
||||||
|
|
||||||
function windows_x64() {
|
|
||||||
echo "Process Windows (x64)"
|
|
||||||
mkdir -p lib/win-x64
|
|
||||||
dst=$(realpath lib/win-x64)
|
|
||||||
mkdir t
|
|
||||||
cd t
|
|
||||||
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win_amd64.whl
|
|
||||||
unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win_amd64.whl
|
|
||||||
|
|
||||||
cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.dll $dst
|
|
||||||
cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.lib $dst
|
|
||||||
rm -fv $dst/sherpa-onnx-portaudio.dll
|
|
||||||
|
|
||||||
cd ..
|
|
||||||
rm -rf t
|
|
||||||
}
|
|
||||||
|
|
||||||
function windows_x86() {
|
|
||||||
echo "Process Windows (x86)"
|
|
||||||
mkdir -p lib/win-x86
|
|
||||||
dst=$(realpath lib/win-x86)
|
|
||||||
mkdir t
|
|
||||||
cd t
|
|
||||||
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win32.whl
|
|
||||||
unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win32.whl
|
|
||||||
|
|
||||||
cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.dll $dst
|
|
||||||
cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.lib $dst
|
|
||||||
rm -fv $dst/sherpa-onnx-portaudio.dll
|
|
||||||
|
|
||||||
cd ..
|
|
||||||
rm -rf t
|
|
||||||
}
|
|
||||||
|
|
||||||
function linux_x64() {
|
|
||||||
echo "Process Linux (x64)"
|
|
||||||
mkdir -p lib/linux-x64
|
|
||||||
dst=$(realpath lib/linux-x64)
|
|
||||||
mkdir t
|
|
||||||
cd t
|
|
||||||
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-manylinux_2_28_x86_64.whl
|
|
||||||
unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-manylinux_2_28_x86_64.whl
|
|
||||||
|
|
||||||
cp -v sherpa_onnx/lib/*.so* $dst
|
|
||||||
rm -v $dst/libcargs.so
|
|
||||||
rm -v $dst/libsherpa-onnx-portaudio.so
|
|
||||||
rm -v $dst/libsherpa-onnx-fst.so
|
|
||||||
rm -v $dst/libonnxruntime.so
|
|
||||||
|
|
||||||
cd ..
|
|
||||||
rm -rf t
|
|
||||||
}
|
|
||||||
|
|
||||||
function osx_x64() {
|
|
||||||
echo "Process osx-x64"
|
|
||||||
mkdir -p lib/osx-x64
|
|
||||||
dst=$(realpath lib/osx-x64)
|
|
||||||
mkdir t
|
|
||||||
cd t
|
|
||||||
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_x86_64.whl
|
|
||||||
unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_x86_64.whl
|
|
||||||
|
|
||||||
cp -v sherpa_onnx/lib/*.dylib $dst/
|
|
||||||
rm -v $dst/libonnxruntime.dylib
|
|
||||||
rm -v $dst/libcargs.dylib
|
|
||||||
rm -v $dst/libsherpa-onnx-fst.dylib
|
|
||||||
rm -v $dst/libsherpa-onnx-portaudio.dylib
|
|
||||||
|
|
||||||
cd ..
|
|
||||||
rm -rf t
|
|
||||||
}
|
|
||||||
|
|
||||||
function osx_arm64() {
|
|
||||||
echo "Process osx-arm64"
|
|
||||||
mkdir -p lib/osx-arm64
|
|
||||||
dst=$(realpath lib/osx-arm64)
|
|
||||||
mkdir t
|
|
||||||
cd t
|
|
||||||
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_arm64.whl
|
|
||||||
unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_arm64.whl
|
|
||||||
|
|
||||||
cp -v sherpa_onnx/lib/*.dylib $dst/
|
|
||||||
rm -v $dst/libonnxruntime.dylib
|
|
||||||
rm -v $dst/libcargs.dylib
|
|
||||||
rm -v $dst/libsherpa-onnx-fst.dylib
|
|
||||||
rm -v $dst/libsherpa-onnx-portaudio.dylib
|
|
||||||
|
|
||||||
cd ..
|
|
||||||
rm -rf t
|
|
||||||
}
|
|
||||||
|
|
||||||
windows_x64
|
|
||||||
ls -lh lib/win-x64
|
|
||||||
|
|
||||||
windows_x86
|
|
||||||
ls -lh lib/win-x86
|
|
||||||
|
|
||||||
linux_x64
|
|
||||||
ls -lh lib/linux-x64
|
|
||||||
|
|
||||||
osx_x64
|
|
||||||
ls -lh lib/osx-x64
|
|
||||||
|
|
||||||
osx_arm64
|
|
||||||
ls -lh lib/osx-arm64
|
|
||||||
@@ -94,6 +94,11 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer(
|
|||||||
SHERPA_ONNX_LOGE("%s\n", recognizer_config.ToString().c_str());
|
SHERPA_ONNX_LOGE("%s\n", recognizer_config.ToString().c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!recognizer_config.Validate()) {
|
||||||
|
SHERPA_ONNX_LOGE("Errors in config!");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
SherpaOnnxOnlineRecognizer *recognizer = new SherpaOnnxOnlineRecognizer;
|
SherpaOnnxOnlineRecognizer *recognizer = new SherpaOnnxOnlineRecognizer;
|
||||||
|
|
||||||
recognizer->impl =
|
recognizer->impl =
|
||||||
@@ -324,6 +329,11 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer(
|
|||||||
SHERPA_ONNX_LOGE("%s", recognizer_config.ToString().c_str());
|
SHERPA_ONNX_LOGE("%s", recognizer_config.ToString().c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!recognizer_config.Validate()) {
|
||||||
|
SHERPA_ONNX_LOGE("Errors in config");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
SherpaOnnxOfflineRecognizer *recognizer = new SherpaOnnxOfflineRecognizer;
|
SherpaOnnxOfflineRecognizer *recognizer = new SherpaOnnxOfflineRecognizer;
|
||||||
|
|
||||||
recognizer->impl =
|
recognizer->impl =
|
||||||
@@ -480,6 +490,11 @@ SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector(
|
|||||||
SHERPA_ONNX_LOGE("%s", vad_config.ToString().c_str());
|
SHERPA_ONNX_LOGE("%s", vad_config.ToString().c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!vad_config.Validate()) {
|
||||||
|
SHERPA_ONNX_LOGE("Errors in config");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
SherpaOnnxVoiceActivityDetector *p = new SherpaOnnxVoiceActivityDetector;
|
SherpaOnnxVoiceActivityDetector *p = new SherpaOnnxVoiceActivityDetector;
|
||||||
p->impl = std::make_unique<sherpa_onnx::VoiceActivityDetector>(
|
p->impl = std::make_unique<sherpa_onnx::VoiceActivityDetector>(
|
||||||
vad_config, buffer_size_in_seconds);
|
vad_config, buffer_size_in_seconds);
|
||||||
@@ -570,6 +585,11 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
|
|||||||
SHERPA_ONNX_LOGE("%s\n", tts_config.ToString().c_str());
|
SHERPA_ONNX_LOGE("%s\n", tts_config.ToString().c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!tts_config.Validate()) {
|
||||||
|
SHERPA_ONNX_LOGE("Errors in config");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
SherpaOnnxOfflineTts *tts = new SherpaOnnxOfflineTts;
|
SherpaOnnxOfflineTts *tts = new SherpaOnnxOfflineTts;
|
||||||
|
|
||||||
tts->impl = std::make_unique<sherpa_onnx::OfflineTts>(tts_config);
|
tts->impl = std::make_unique<sherpa_onnx::OfflineTts>(tts_config);
|
||||||
|
|||||||
@@ -5,3 +5,7 @@ endif()
|
|||||||
if(SHERPA_ONNX_ENABLE_WASM_ASR)
|
if(SHERPA_ONNX_ENABLE_WASM_ASR)
|
||||||
add_subdirectory(asr)
|
add_subdirectory(asr)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(SHERPA_ONNX_ENABLE_WASM_NODEJS)
|
||||||
|
add_subdirectory(nodejs)
|
||||||
|
endif()
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ Module.onRuntimeInitialized = function() {
|
|||||||
|
|
||||||
startBtn.disabled = false;
|
startBtn.disabled = false;
|
||||||
|
|
||||||
recognizer = createRecognizer();
|
recognizer = createOnlineRecognizer(Module);
|
||||||
console.log('recognizer is created!', recognizer);
|
console.log('recognizer is created!', recognizer);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -1,161 +1,181 @@
|
|||||||
function freeConfig(config) {
|
function freeConfig(config, Module) {
|
||||||
if ('buffer' in config) {
|
if ('buffer' in config) {
|
||||||
_free(config.buffer);
|
Module._free(config.buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ('config' in config) {
|
if ('config' in config) {
|
||||||
freeConfig(config.config)
|
freeConfig(config.config, Module)
|
||||||
}
|
}
|
||||||
|
|
||||||
if ('transducer' in config) {
|
if ('transducer' in config) {
|
||||||
freeConfig(config.transducer)
|
freeConfig(config.transducer, Module)
|
||||||
}
|
}
|
||||||
|
|
||||||
if ('paraformer' in config) {
|
if ('paraformer' in config) {
|
||||||
freeConfig(config.paraformer)
|
freeConfig(config.paraformer, Module)
|
||||||
}
|
}
|
||||||
|
|
||||||
if ('ctc' in config) {
|
if ('ctc' in config) {
|
||||||
freeConfig(config.ctc)
|
freeConfig(config.ctc, Module)
|
||||||
}
|
}
|
||||||
|
|
||||||
if ('feat' in config) {
|
if ('feat' in config) {
|
||||||
freeConfig(config.feat)
|
freeConfig(config.feat, Module)
|
||||||
}
|
}
|
||||||
|
|
||||||
if ('model' in config) {
|
if ('model' in config) {
|
||||||
freeConfig(config.model)
|
freeConfig(config.model, Module)
|
||||||
}
|
}
|
||||||
|
|
||||||
_free(config.ptr);
|
if ('nemoCtc' in config) {
|
||||||
|
freeConfig(config.nemoCtc, Module)
|
||||||
|
}
|
||||||
|
|
||||||
|
if ('whisper' in config) {
|
||||||
|
freeConfig(config.whisper, Module)
|
||||||
|
}
|
||||||
|
|
||||||
|
if ('tdnn' in config) {
|
||||||
|
freeConfig(config.tdnn, Module)
|
||||||
|
}
|
||||||
|
|
||||||
|
if ('lm' in config) {
|
||||||
|
freeConfig(config.lm, Module)
|
||||||
|
}
|
||||||
|
|
||||||
|
Module._free(config.ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
// The user should free the returned pointers
|
// The user should free the returned pointers
|
||||||
function initSherpaOnnxOnlineTransducerModelConfig(config) {
|
function initSherpaOnnxOnlineTransducerModelConfig(config, Module) {
|
||||||
let encoderLen = lengthBytesUTF8(config.encoder) + 1;
|
const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1;
|
||||||
let decoderLen = lengthBytesUTF8(config.decoder) + 1;
|
const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1;
|
||||||
let joinerLen = lengthBytesUTF8(config.joiner) + 1;
|
const joinerLen = Module.lengthBytesUTF8(config.joiner) + 1;
|
||||||
|
|
||||||
let n = encoderLen + decoderLen + joinerLen;
|
const n = encoderLen + decoderLen + joinerLen;
|
||||||
|
|
||||||
let buffer = _malloc(n);
|
const buffer = Module._malloc(n);
|
||||||
|
|
||||||
let len = 3 * 4; // 3 pointers
|
const len = 3 * 4; // 3 pointers
|
||||||
let ptr = _malloc(len);
|
const ptr = Module._malloc(len);
|
||||||
|
|
||||||
let offset = 0;
|
let offset = 0;
|
||||||
stringToUTF8(config.encoder, buffer + offset, encoderLen);
|
Module.stringToUTF8(config.encoder, buffer + offset, encoderLen);
|
||||||
offset += encoderLen;
|
offset += encoderLen;
|
||||||
|
|
||||||
stringToUTF8(config.decoder, buffer + offset, decoderLen);
|
Module.stringToUTF8(config.decoder, buffer + offset, decoderLen);
|
||||||
offset += decoderLen;
|
offset += decoderLen;
|
||||||
|
|
||||||
stringToUTF8(config.joiner, buffer + offset, joinerLen);
|
Module.stringToUTF8(config.joiner, buffer + offset, joinerLen);
|
||||||
|
|
||||||
offset = 0;
|
offset = 0;
|
||||||
setValue(ptr, buffer + offset, 'i8*');
|
Module.setValue(ptr, buffer + offset, 'i8*');
|
||||||
offset += encoderLen;
|
offset += encoderLen;
|
||||||
|
|
||||||
setValue(ptr + 4, buffer + offset, 'i8*');
|
Module.setValue(ptr + 4, buffer + offset, 'i8*');
|
||||||
offset += decoderLen;
|
offset += decoderLen;
|
||||||
|
|
||||||
setValue(ptr + 8, buffer + offset, 'i8*');
|
Module.setValue(ptr + 8, buffer + offset, 'i8*');
|
||||||
|
|
||||||
return {
|
return {
|
||||||
buffer: buffer, ptr: ptr, len: len,
|
buffer: buffer, ptr: ptr, len: len,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function initSherpaOnnxOnlineParaformerModelConfig(config) {
|
function initSherpaOnnxOnlineParaformerModelConfig(config, Module) {
|
||||||
let encoderLen = lengthBytesUTF8(config.encoder) + 1;
|
const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1;
|
||||||
let decoderLen = lengthBytesUTF8(config.decoder) + 1;
|
const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1;
|
||||||
|
|
||||||
let n = encoderLen + decoderLen;
|
const n = encoderLen + decoderLen;
|
||||||
let buffer = _malloc(n);
|
const buffer = Module._malloc(n);
|
||||||
|
|
||||||
let len = 2 * 4; // 2 pointers
|
const len = 2 * 4; // 2 pointers
|
||||||
let ptr = _malloc(len);
|
const ptr = Module._malloc(len);
|
||||||
|
|
||||||
let offset = 0;
|
let offset = 0;
|
||||||
stringToUTF8(config.encoder, buffer + offset, encoderLen);
|
Module.stringToUTF8(config.encoder, buffer + offset, encoderLen);
|
||||||
offset += encoderLen;
|
offset += encoderLen;
|
||||||
|
|
||||||
stringToUTF8(config.decoder, buffer + offset, decoderLen);
|
Module.stringToUTF8(config.decoder, buffer + offset, decoderLen);
|
||||||
|
|
||||||
offset = 0;
|
offset = 0;
|
||||||
setValue(ptr, buffer + offset, 'i8*');
|
Module.setValue(ptr, buffer + offset, 'i8*');
|
||||||
offset += encoderLen;
|
offset += encoderLen;
|
||||||
|
|
||||||
setValue(ptr + 4, buffer + offset, 'i8*');
|
Module.setValue(ptr + 4, buffer + offset, 'i8*');
|
||||||
|
|
||||||
return {
|
return {
|
||||||
buffer: buffer, ptr: ptr, len: len,
|
buffer: buffer, ptr: ptr, len: len,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function initSherpaOnnxOnlineZipformer2CtcModelConfig(config) {
|
function initSherpaOnnxOnlineZipformer2CtcModelConfig(config, Module) {
|
||||||
let n = lengthBytesUTF8(config.model) + 1;
|
const n = Module.lengthBytesUTF8(config.model) + 1;
|
||||||
let buffer = _malloc(n);
|
const buffer = Module._malloc(n);
|
||||||
|
|
||||||
let len = 1 * 4; // 1 pointer
|
const len = 1 * 4; // 1 pointer
|
||||||
let ptr = _malloc(len);
|
const ptr = Module._malloc(len);
|
||||||
|
|
||||||
stringToUTF8(config.model, buffer, n);
|
Module.stringToUTF8(config.model, buffer, n);
|
||||||
|
|
||||||
setValue(ptr, buffer, 'i8*');
|
Module.setValue(ptr, buffer, 'i8*');
|
||||||
|
|
||||||
return {
|
return {
|
||||||
buffer: buffer, ptr: ptr, len: len,
|
buffer: buffer, ptr: ptr, len: len,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function initSherpaOnnxOnlineModelConfig(config) {
|
function initSherpaOnnxOnlineModelConfig(config, Module) {
|
||||||
let transducer = initSherpaOnnxOnlineTransducerModelConfig(config.transducer);
|
const transducer =
|
||||||
let paraformer = initSherpaOnnxOnlineParaformerModelConfig(config.paraformer);
|
initSherpaOnnxOnlineTransducerModelConfig(config.transducer, Module);
|
||||||
let ctc = initSherpaOnnxOnlineZipformer2CtcModelConfig(config.zipformer2Ctc);
|
const paraformer =
|
||||||
|
initSherpaOnnxOnlineParaformerModelConfig(config.paraformer, Module);
|
||||||
|
const ctc = initSherpaOnnxOnlineZipformer2CtcModelConfig(
|
||||||
|
config.zipformer2Ctc, Module);
|
||||||
|
|
||||||
let len = transducer.len + paraformer.len + ctc.len + 5 * 4;
|
const len = transducer.len + paraformer.len + ctc.len + 5 * 4;
|
||||||
let ptr = _malloc(len);
|
const ptr = Module._malloc(len);
|
||||||
|
|
||||||
let offset = 0;
|
let offset = 0;
|
||||||
_CopyHeap(transducer.ptr, transducer.len, ptr + offset);
|
Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset);
|
||||||
offset += transducer.len;
|
offset += transducer.len;
|
||||||
|
|
||||||
_CopyHeap(paraformer.ptr, paraformer.len, ptr + offset);
|
Module._CopyHeap(paraformer.ptr, paraformer.len, ptr + offset);
|
||||||
offset += paraformer.len;
|
offset += paraformer.len;
|
||||||
|
|
||||||
_CopyHeap(ctc.ptr, ctc.len, ptr + offset);
|
Module._CopyHeap(ctc.ptr, ctc.len, ptr + offset);
|
||||||
offset += ctc.len;
|
offset += ctc.len;
|
||||||
|
|
||||||
let tokensLen = lengthBytesUTF8(config.tokens) + 1;
|
const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1;
|
||||||
let providerLen = lengthBytesUTF8(config.provider) + 1;
|
const providerLen = Module.lengthBytesUTF8(config.provider) + 1;
|
||||||
let modelTypeLen = lengthBytesUTF8(config.modelType) + 1;
|
const modelTypeLen = Module.lengthBytesUTF8(config.modelType) + 1;
|
||||||
let bufferLen = tokensLen + providerLen + modelTypeLen;
|
const bufferLen = tokensLen + providerLen + modelTypeLen;
|
||||||
let buffer = _malloc(bufferLen);
|
const buffer = Module._malloc(bufferLen);
|
||||||
|
|
||||||
offset = 0;
|
offset = 0;
|
||||||
stringToUTF8(config.tokens, buffer, tokensLen);
|
Module.stringToUTF8(config.tokens, buffer, tokensLen);
|
||||||
offset += tokensLen;
|
offset += tokensLen;
|
||||||
|
|
||||||
stringToUTF8(config.provider, buffer + offset, providerLen);
|
Module.stringToUTF8(config.provider, buffer + offset, providerLen);
|
||||||
offset += providerLen;
|
offset += providerLen;
|
||||||
|
|
||||||
stringToUTF8(config.modelType, buffer + offset, modelTypeLen);
|
Module.stringToUTF8(config.modelType, buffer + offset, modelTypeLen);
|
||||||
|
|
||||||
offset = transducer.len + paraformer.len + ctc.len;
|
offset = transducer.len + paraformer.len + ctc.len;
|
||||||
setValue(ptr + offset, buffer, 'i8*'); // tokens
|
Module.setValue(ptr + offset, buffer, 'i8*'); // tokens
|
||||||
offset += 4;
|
offset += 4;
|
||||||
|
|
||||||
setValue(ptr + offset, config.numThreads, 'i32');
|
Module.setValue(ptr + offset, config.numThreads, 'i32');
|
||||||
offset += 4;
|
offset += 4;
|
||||||
|
|
||||||
setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider
|
Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider
|
||||||
offset += 4;
|
offset += 4;
|
||||||
|
|
||||||
setValue(ptr + offset, config.debug, 'i32');
|
Module.setValue(ptr + offset, config.debug, 'i32');
|
||||||
offset += 4;
|
offset += 4;
|
||||||
|
|
||||||
setValue(ptr + offset, buffer + tokensLen + providerLen, 'i8*'); // modelType
|
Module.setValue(
|
||||||
|
ptr + offset, buffer + tokensLen + providerLen, 'i8*'); // modelType
|
||||||
offset += 4;
|
offset += 4;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@@ -164,63 +184,63 @@ function initSherpaOnnxOnlineModelConfig(config) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function initSherpaOnnxFeatureConfig(config) {
|
function initSherpaOnnxFeatureConfig(config, Module) {
|
||||||
let len = 2 * 4; // 2 pointers
|
const len = 2 * 4; // 2 pointers
|
||||||
let ptr = _malloc(len);
|
const ptr = Module._malloc(len);
|
||||||
|
|
||||||
setValue(ptr, config.sampleRate, 'i32');
|
Module.setValue(ptr, config.sampleRate, 'i32');
|
||||||
setValue(ptr + 4, config.featureDim, 'i32');
|
Module.setValue(ptr + 4, config.featureDim, 'i32');
|
||||||
return {ptr: ptr, len: len};
|
return {ptr: ptr, len: len};
|
||||||
}
|
}
|
||||||
|
|
||||||
function initSherpaOnnxOnlineRecognizerConfig(config) {
|
function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
|
||||||
let feat = initSherpaOnnxFeatureConfig(config.featConfig);
|
const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module);
|
||||||
let model = initSherpaOnnxOnlineModelConfig(config.modelConfig);
|
const model = initSherpaOnnxOnlineModelConfig(config.modelConfig, Module);
|
||||||
|
|
||||||
let len = feat.len + model.len + 8 * 4;
|
const len = feat.len + model.len + 8 * 4;
|
||||||
let ptr = _malloc(len);
|
const ptr = Module._malloc(len);
|
||||||
|
|
||||||
let offset = 0;
|
let offset = 0;
|
||||||
_CopyHeap(feat.ptr, feat.len, ptr + offset);
|
Module._CopyHeap(feat.ptr, feat.len, ptr + offset);
|
||||||
offset += feat.len;
|
offset += feat.len;
|
||||||
|
|
||||||
_CopyHeap(model.ptr, model.len, ptr + offset);
|
Module._CopyHeap(model.ptr, model.len, ptr + offset);
|
||||||
offset += model.len;
|
offset += model.len;
|
||||||
|
|
||||||
let decodingMethodLen = lengthBytesUTF8(config.decodingMethod) + 1;
|
const decodingMethodLen = Module.lengthBytesUTF8(config.decodingMethod) + 1;
|
||||||
let hotwordsFileLen = lengthBytesUTF8(config.hotwordsFile) + 1;
|
const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile) + 1;
|
||||||
let bufferLen = decodingMethodLen + hotwordsFileLen;
|
const bufferLen = decodingMethodLen + hotwordsFileLen;
|
||||||
let buffer = _malloc(bufferLen);
|
const buffer = Module._malloc(bufferLen);
|
||||||
|
|
||||||
offset = 0;
|
offset = 0;
|
||||||
stringToUTF8(config.decodingMethod, buffer, decodingMethodLen);
|
Module.stringToUTF8(config.decodingMethod, buffer, decodingMethodLen);
|
||||||
offset += decodingMethodLen;
|
offset += decodingMethodLen;
|
||||||
|
|
||||||
stringToUTF8(config.hotwordsFile, buffer + offset, hotwordsFileLen);
|
Module.stringToUTF8(config.hotwordsFile, buffer + offset, hotwordsFileLen);
|
||||||
|
|
||||||
offset = feat.len + model.len;
|
offset = feat.len + model.len;
|
||||||
setValue(ptr + offset, buffer, 'i8*'); // decoding method
|
Module.setValue(ptr + offset, buffer, 'i8*'); // decoding method
|
||||||
offset += 4;
|
offset += 4;
|
||||||
|
|
||||||
setValue(ptr + offset, config.maxActivePaths, 'i32');
|
Module.setValue(ptr + offset, config.maxActivePaths, 'i32');
|
||||||
offset += 4;
|
offset += 4;
|
||||||
|
|
||||||
setValue(ptr + offset, config.enableEndpoint, 'i32');
|
Module.setValue(ptr + offset, config.enableEndpoint, 'i32');
|
||||||
offset += 4;
|
offset += 4;
|
||||||
|
|
||||||
setValue(ptr + offset, config.rule1MinTrailingSilence, 'float');
|
Module.setValue(ptr + offset, config.rule1MinTrailingSilence, 'float');
|
||||||
offset += 4;
|
offset += 4;
|
||||||
|
|
||||||
setValue(ptr + offset, config.rule2MinTrailingSilence, 'float');
|
Module.setValue(ptr + offset, config.rule2MinTrailingSilence, 'float');
|
||||||
offset += 4;
|
offset += 4;
|
||||||
|
|
||||||
setValue(ptr + offset, config.rule3MinUtteranceLength, 'float');
|
Module.setValue(ptr + offset, config.rule3MinUtteranceLength, 'float');
|
||||||
offset += 4;
|
offset += 4;
|
||||||
|
|
||||||
setValue(ptr + offset, buffer + decodingMethodLen, 'i8*');
|
Module.setValue(ptr + offset, buffer + decodingMethodLen, 'i8*');
|
||||||
offset += 4;
|
offset += 4;
|
||||||
|
|
||||||
setValue(ptr + offset, config.hotwordsScore, 'float');
|
Module.setValue(ptr + offset, config.hotwordsScore, 'float');
|
||||||
offset += 4;
|
offset += 4;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@@ -229,21 +249,21 @@ function initSherpaOnnxOnlineRecognizerConfig(config) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function createRecognizer() {
|
function createOnlineRecognizer(Module, myConfig) {
|
||||||
let onlineTransducerModelConfig = {
|
const onlineTransducerModelConfig = {
|
||||||
encoder: '',
|
encoder: '',
|
||||||
decoder: '',
|
decoder: '',
|
||||||
joiner: '',
|
joiner: '',
|
||||||
}
|
};
|
||||||
|
|
||||||
let onlineParaformerModelConfig = {
|
const onlineParaformerModelConfig = {
|
||||||
encoder: '',
|
encoder: '',
|
||||||
decoder: '',
|
decoder: '',
|
||||||
}
|
};
|
||||||
|
|
||||||
let onlineZipformer2CtcModelConfig = {
|
const onlineZipformer2CtcModelConfig = {
|
||||||
model: '',
|
model: '',
|
||||||
}
|
};
|
||||||
|
|
||||||
let type = 0;
|
let type = 0;
|
||||||
|
|
||||||
@@ -266,7 +286,7 @@ function createRecognizer() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
let onlineModelConfig = {
|
const onlineModelConfig = {
|
||||||
transducer: onlineTransducerModelConfig,
|
transducer: onlineTransducerModelConfig,
|
||||||
paraformer: onlineParaformerModelConfig,
|
paraformer: onlineParaformerModelConfig,
|
||||||
zipformer2Ctc: onlineZipformer2CtcModelConfig,
|
zipformer2Ctc: onlineZipformer2CtcModelConfig,
|
||||||
@@ -275,12 +295,12 @@ function createRecognizer() {
|
|||||||
provider: 'cpu',
|
provider: 'cpu',
|
||||||
debug: 1,
|
debug: 1,
|
||||||
modelType: '',
|
modelType: '',
|
||||||
}
|
};
|
||||||
|
|
||||||
let featureConfig = {
|
const featureConfig = {
|
||||||
sampleRate: 16000,
|
sampleRate: 16000,
|
||||||
featureDim: 80,
|
featureDim: 80,
|
||||||
}
|
};
|
||||||
|
|
||||||
let recognizerConfig = {
|
let recognizerConfig = {
|
||||||
featConfig: featureConfig,
|
featConfig: featureConfig,
|
||||||
@@ -293,23 +313,336 @@ function createRecognizer() {
|
|||||||
rule3MinUtteranceLength: 20,
|
rule3MinUtteranceLength: 20,
|
||||||
hotwordsFile: '',
|
hotwordsFile: '',
|
||||||
hotwordsScore: 1.5,
|
hotwordsScore: 1.5,
|
||||||
|
};
|
||||||
|
if (myConfig) {
|
||||||
|
recognizerConfig = myConfig;
|
||||||
}
|
}
|
||||||
|
|
||||||
return new OnlineRecognizer(recognizerConfig);
|
return new OnlineRecognizer(recognizerConfig, Module);
|
||||||
}
|
}
|
||||||
|
|
||||||
class OnlineStream {
|
function initSherpaOnnxOfflineTransducerModelConfig(config, Module) {
|
||||||
constructor(handle) {
|
const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1;
|
||||||
|
const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1;
|
||||||
|
const joinerLen = Module.lengthBytesUTF8(config.joiner) + 1;
|
||||||
|
|
||||||
|
const n = encoderLen + decoderLen + joinerLen;
|
||||||
|
|
||||||
|
const buffer = Module._malloc(n);
|
||||||
|
|
||||||
|
const len = 3 * 4; // 3 pointers
|
||||||
|
const ptr = Module._malloc(len);
|
||||||
|
|
||||||
|
let offset = 0;
|
||||||
|
Module.stringToUTF8(config.encoder, buffer + offset, encoderLen);
|
||||||
|
offset += encoderLen;
|
||||||
|
|
||||||
|
Module.stringToUTF8(config.decoder, buffer + offset, decoderLen);
|
||||||
|
offset += decoderLen;
|
||||||
|
|
||||||
|
Module.stringToUTF8(config.joiner, buffer + offset, joinerLen);
|
||||||
|
|
||||||
|
offset = 0;
|
||||||
|
Module.setValue(ptr, buffer + offset, 'i8*');
|
||||||
|
offset += encoderLen;
|
||||||
|
|
||||||
|
Module.setValue(ptr + 4, buffer + offset, 'i8*');
|
||||||
|
offset += decoderLen;
|
||||||
|
|
||||||
|
Module.setValue(ptr + 8, buffer + offset, 'i8*');
|
||||||
|
|
||||||
|
return {
|
||||||
|
buffer: buffer, ptr: ptr, len: len,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function initSherpaOnnxOfflineParaformerModelConfig(config, Module) {
|
||||||
|
const n = Module.lengthBytesUTF8(config.model) + 1;
|
||||||
|
|
||||||
|
const buffer = Module._malloc(n);
|
||||||
|
|
||||||
|
const len = 1 * 4; // 1 pointer
|
||||||
|
const ptr = Module._malloc(len);
|
||||||
|
|
||||||
|
Module.stringToUTF8(config.model, buffer, n);
|
||||||
|
|
||||||
|
Module.setValue(ptr, buffer, 'i8*');
|
||||||
|
|
||||||
|
return {
|
||||||
|
buffer: buffer, ptr: ptr, len: len,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config, Module) {
|
||||||
|
const n = Module.lengthBytesUTF8(config.model) + 1;
|
||||||
|
|
||||||
|
const buffer = Module._malloc(n);
|
||||||
|
|
||||||
|
const len = 1 * 4; // 1 pointer
|
||||||
|
const ptr = Module._malloc(len);
|
||||||
|
|
||||||
|
Module.stringToUTF8(config.model, buffer, n);
|
||||||
|
|
||||||
|
Module.setValue(ptr, buffer, 'i8*');
|
||||||
|
|
||||||
|
return {
|
||||||
|
buffer: buffer, ptr: ptr, len: len,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
|
||||||
|
const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1;
|
||||||
|
const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1;
|
||||||
|
|
||||||
|
const n = encoderLen + decoderLen;
|
||||||
|
const buffer = Module._malloc(n);
|
||||||
|
|
||||||
|
const len = 2 * 4; // 2 pointers
|
||||||
|
const ptr = Module._malloc(len);
|
||||||
|
|
||||||
|
let offset = 0;
|
||||||
|
Module.stringToUTF8(config.encoder, buffer + offset, encoderLen);
|
||||||
|
offset += encoderLen;
|
||||||
|
|
||||||
|
Module.stringToUTF8(config.decoder, buffer + offset, decoderLen);
|
||||||
|
|
||||||
|
offset = 0;
|
||||||
|
Module.setValue(ptr, buffer + offset, 'i8*');
|
||||||
|
offset += encoderLen;
|
||||||
|
|
||||||
|
Module.setValue(ptr + 4, buffer + offset, 'i8*');
|
||||||
|
|
||||||
|
return {
|
||||||
|
buffer: buffer, ptr: ptr, len: len,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function initSherpaOnnxOfflineTdnnModelConfig(config, Module) {
|
||||||
|
const n = Module.lengthBytesUTF8(config.model) + 1;
|
||||||
|
const buffer = Module._malloc(n);
|
||||||
|
|
||||||
|
const len = 1 * 4; // 1 pointer
|
||||||
|
const ptr = Module._malloc(len);
|
||||||
|
|
||||||
|
Module.stringToUTF8(config.model, buffer, n);
|
||||||
|
|
||||||
|
Module.setValue(ptr, buffer, 'i8*');
|
||||||
|
|
||||||
|
return {
|
||||||
|
buffer: buffer, ptr: ptr, len: len,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function initSherpaOnnxOfflineLMConfig(config, Module) {
|
||||||
|
const n = Module.lengthBytesUTF8(config.model) + 1;
|
||||||
|
const buffer = Module._malloc(n);
|
||||||
|
|
||||||
|
const len = 2 * 4;
|
||||||
|
const ptr = Module._malloc(len);
|
||||||
|
|
||||||
|
Module.stringToUTF8(config.model, buffer, n);
|
||||||
|
Module.setValue(ptr, buffer, 'i8*');
|
||||||
|
Module.setValue(ptr + 4, config.scale, 'float');
|
||||||
|
|
||||||
|
return {
|
||||||
|
buffer: buffer, ptr: ptr, len: len,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function initSherpaOnnxOfflineModelConfig(config, Module) {
|
||||||
|
const transducer =
|
||||||
|
initSherpaOnnxOfflineTransducerModelConfig(config.transducer, Module);
|
||||||
|
const paraformer =
|
||||||
|
initSherpaOnnxOfflineParaformerModelConfig(config.paraformer, Module);
|
||||||
|
const nemoCtc =
|
||||||
|
initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config.nemoCtc, Module);
|
||||||
|
const whisper =
|
||||||
|
initSherpaOnnxOfflineWhisperModelConfig(config.whisper, Module);
|
||||||
|
const tdnn = initSherpaOnnxOfflineTdnnModelConfig(config.tdnn, Module);
|
||||||
|
|
||||||
|
const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len +
|
||||||
|
tdnn.len + 5 * 4;
|
||||||
|
const ptr = Module._malloc(len);
|
||||||
|
|
||||||
|
let offset = 0;
|
||||||
|
Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset);
|
||||||
|
offset += transducer.len;
|
||||||
|
|
||||||
|
Module._CopyHeap(paraformer.ptr, paraformer.len, ptr + offset);
|
||||||
|
offset += paraformer.len;
|
||||||
|
|
||||||
|
Module._CopyHeap(nemoCtc.ptr, nemoCtc.len, ptr + offset);
|
||||||
|
offset += nemoCtc.len;
|
||||||
|
|
||||||
|
Module._CopyHeap(whisper.ptr, whisper.len, ptr + offset);
|
||||||
|
offset += whisper.len;
|
||||||
|
|
||||||
|
Module._CopyHeap(tdnn.ptr, tdnn.len, ptr + offset);
|
||||||
|
offset += tdnn.len;
|
||||||
|
|
||||||
|
const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1;
|
||||||
|
const providerLen = Module.lengthBytesUTF8(config.provider) + 1;
|
||||||
|
const modelTypeLen = Module.lengthBytesUTF8(config.modelType) + 1;
|
||||||
|
const bufferLen = tokensLen + providerLen + modelTypeLen;
|
||||||
|
const buffer = Module._malloc(bufferLen);
|
||||||
|
|
||||||
|
offset = 0;
|
||||||
|
Module.stringToUTF8(config.tokens, buffer, tokensLen);
|
||||||
|
offset += tokensLen;
|
||||||
|
|
||||||
|
Module.stringToUTF8(config.provider, buffer + offset, providerLen);
|
||||||
|
offset += providerLen;
|
||||||
|
|
||||||
|
Module.stringToUTF8(config.modelType, buffer + offset, modelTypeLen);
|
||||||
|
|
||||||
|
offset =
|
||||||
|
transducer.len + paraformer.len + nemoCtc.len + whisper.len + tdnn.len;
|
||||||
|
Module.setValue(ptr + offset, buffer, 'i8*'); // tokens
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
Module.setValue(ptr + offset, config.numThreads, 'i32');
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
Module.setValue(ptr + offset, config.debug, 'i32');
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
Module.setValue(
|
||||||
|
ptr + offset, buffer + tokensLen + providerLen, 'i8*'); // modelType
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
return {
|
||||||
|
buffer: buffer, ptr: ptr, len: len, transducer: transducer,
|
||||||
|
paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function initSherpaOnnxOfflineRecognizerConfig(config, Module) {
|
||||||
|
const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module);
|
||||||
|
const model = initSherpaOnnxOfflineModelConfig(config.modelConfig, Module);
|
||||||
|
const lm = initSherpaOnnxOfflineLMConfig(config.lmConfig, Module);
|
||||||
|
|
||||||
|
const len = feat.len + model.len + lm.len + 4 * 4;
|
||||||
|
const ptr = Module._malloc(len);
|
||||||
|
|
||||||
|
let offset = 0;
|
||||||
|
Module._CopyHeap(feat.ptr, feat.len, ptr + offset);
|
||||||
|
offset += feat.len;
|
||||||
|
|
||||||
|
Module._CopyHeap(model.ptr, model.len, ptr + offset);
|
||||||
|
offset += model.len;
|
||||||
|
|
||||||
|
Module._CopyHeap(lm.ptr, lm.len, ptr + offset);
|
||||||
|
offset += lm.len;
|
||||||
|
|
||||||
|
const decodingMethodLen = Module.lengthBytesUTF8(config.decodingMethod) + 1;
|
||||||
|
const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile) + 1;
|
||||||
|
const bufferLen = decodingMethodLen + hotwordsFileLen;
|
||||||
|
const buffer = Module._malloc(bufferLen);
|
||||||
|
|
||||||
|
offset = 0;
|
||||||
|
Module.stringToUTF8(config.decodingMethod, buffer, decodingMethodLen);
|
||||||
|
offset += decodingMethodLen;
|
||||||
|
|
||||||
|
Module.stringToUTF8(config.hotwordsFile, buffer + offset, hotwordsFileLen);
|
||||||
|
|
||||||
|
offset = feat.len + model.len + lm.len;
|
||||||
|
|
||||||
|
Module.setValue(ptr + offset, buffer, 'i8*'); // decoding method
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
Module.setValue(ptr + offset, config.maxActivePaths, 'i32');
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
Module.setValue(ptr + offset, buffer + decodingMethodLen, 'i8*');
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
Module.setValue(ptr + offset, config.hotwordsScore, 'float');
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
return {
|
||||||
|
buffer: buffer, ptr: ptr, len: len, feat: feat, model: model, lm: lm
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class OfflineStream {
|
||||||
|
constructor(handle, Module) {
|
||||||
this.handle = handle;
|
this.handle = handle;
|
||||||
this.pointer = null; // buffer
|
this.Module = Module;
|
||||||
this.n = 0; // buffer size
|
|
||||||
}
|
}
|
||||||
|
|
||||||
free() {
|
free() {
|
||||||
if (this.handle) {
|
if (this.handle) {
|
||||||
_DestroyOnlineStream(this.handle);
|
this.Module._DestroyOfflineStream(this.handle);
|
||||||
this.handle = null;
|
this.handle = null;
|
||||||
_free(this.pointer);
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param sampleRate {Number}
|
||||||
|
* @param samples {Float32Array} Containing samples in the range [-1, 1]
|
||||||
|
*/
|
||||||
|
acceptWaveform(sampleRate, samples) {
|
||||||
|
const pointer =
|
||||||
|
this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT);
|
||||||
|
this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT);
|
||||||
|
this.Module._AcceptWaveformOffline(
|
||||||
|
this.handle, sampleRate, pointer, samples.length);
|
||||||
|
this.Module._free(pointer);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class OfflineRecognizer {
|
||||||
|
constructor(configObj, Module) {
|
||||||
|
this.config = configObj;
|
||||||
|
const config = initSherpaOnnxOfflineRecognizerConfig(configObj, Module);
|
||||||
|
const handle = Module._CreateOfflineRecognizer(config.ptr);
|
||||||
|
freeConfig(config, Module);
|
||||||
|
|
||||||
|
this.handle = handle;
|
||||||
|
this.Module = Module;
|
||||||
|
}
|
||||||
|
|
||||||
|
free() {
|
||||||
|
this.Module._DestroyOfflineRecognizer(this.handle);
|
||||||
|
this.handle = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
createStream() {
|
||||||
|
const handle = this.Module._CreateOfflineStream(this.handle);
|
||||||
|
return new OfflineStream(handle, this.Module);
|
||||||
|
}
|
||||||
|
|
||||||
|
decode(stream) {
|
||||||
|
this.Module._DecodeOfflineStream(this.handle, stream.handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
getResult(stream) {
|
||||||
|
const r = this.Module._GetOfflineStreamResult(stream.handle);
|
||||||
|
|
||||||
|
const textPtr = this.Module.getValue(r, 'i8*');
|
||||||
|
const text = this.Module.UTF8ToString(textPtr);
|
||||||
|
|
||||||
|
this.Module._DestroyOfflineRecognizerResult(r);
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class OnlineStream {
|
||||||
|
constructor(handle, Module) {
|
||||||
|
this.handle = handle;
|
||||||
|
this.pointer = null; // buffer
|
||||||
|
this.n = 0; // buffer size
|
||||||
|
this.Module = Module;
|
||||||
|
}
|
||||||
|
|
||||||
|
free() {
|
||||||
|
if (this.handle) {
|
||||||
|
this.Module._DestroyOnlineStream(this.handle);
|
||||||
|
this.handle = null;
|
||||||
|
this.Module._free(this.pointer);
|
||||||
this.pointer = null;
|
this.pointer = null;
|
||||||
this.n = 0;
|
this.n = 0;
|
||||||
}
|
}
|
||||||
@@ -321,61 +654,73 @@ class OnlineStream {
|
|||||||
*/
|
*/
|
||||||
acceptWaveform(sampleRate, samples) {
|
acceptWaveform(sampleRate, samples) {
|
||||||
if (this.n < samples.length) {
|
if (this.n < samples.length) {
|
||||||
_free(this.pointer);
|
this.Module._free(this.pointer);
|
||||||
this.pointer = _malloc(samples.length * samples.BYTES_PER_ELEMENT);
|
this.pointer =
|
||||||
|
this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT);
|
||||||
this.n = samples.length
|
this.n = samples.length
|
||||||
}
|
}
|
||||||
|
|
||||||
Module.HEAPF32.set(samples, this.pointer / samples.BYTES_PER_ELEMENT);
|
this.Module.HEAPF32.set(samples, this.pointer / samples.BYTES_PER_ELEMENT);
|
||||||
_AcceptWaveform(this.handle, sampleRate, this.pointer, samples.length);
|
this.Module._AcceptWaveform(
|
||||||
|
this.handle, sampleRate, this.pointer, samples.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
inputFinished() {
|
inputFinished() {
|
||||||
_InputFinished(this.handle);
|
this.Module._InputFinished(this.handle);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class OnlineRecognizer {
|
class OnlineRecognizer {
|
||||||
constructor(configObj) {
|
constructor(configObj, Module) {
|
||||||
let config = initSherpaOnnxOnlineRecognizerConfig(configObj)
|
this.config = configObj;
|
||||||
let handle = _CreateOnlineRecognizer(config.ptr);
|
const config = initSherpaOnnxOnlineRecognizerConfig(configObj, Module)
|
||||||
|
const handle = Module._CreateOnlineRecognizer(config.ptr);
|
||||||
|
|
||||||
freeConfig(config);
|
freeConfig(config, Module);
|
||||||
|
|
||||||
this.handle = handle;
|
this.handle = handle;
|
||||||
|
this.Module = Module;
|
||||||
}
|
}
|
||||||
|
|
||||||
free() {
|
free() {
|
||||||
_DestroyOnlineRecognizer(this.handle);
|
this.Module._DestroyOnlineRecognizer(this.handle);
|
||||||
this.handle = 0
|
this.handle = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
createStream() {
|
createStream() {
|
||||||
let handle = _CreateOnlineStream(this.handle);
|
const handle = this.Module._CreateOnlineStream(this.handle);
|
||||||
return new OnlineStream(handle);
|
return new OnlineStream(handle, this.Module);
|
||||||
}
|
}
|
||||||
|
|
||||||
isReady(stream) {
|
isReady(stream) {
|
||||||
return _IsOnlineStreamReady(this.handle, stream.handle) == 1;
|
return this.Module._IsOnlineStreamReady(this.handle, stream.handle) == 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
decode(stream) {
|
decode(stream) {
|
||||||
return _DecodeOnlineStream(this.handle, stream.handle);
|
this.Module._DecodeOnlineStream(this.handle, stream.handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
isEndpoint(stream) {
|
isEndpoint(stream) {
|
||||||
return _IsEndpoint(this.handle, stream.handle) == 1;
|
return this.Module._IsEndpoint(this.handle, stream.handle) == 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
reset(stream) {
|
reset(stream) {
|
||||||
_Reset(this.handle, stream.handle);
|
this.Module._Reset(this.handle, stream.handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
getResult(stream) {
|
getResult(stream) {
|
||||||
let r = _GetOnlineStreamResult(this.handle, stream.handle);
|
const r = this.Module._GetOnlineStreamResult(this.handle, stream.handle);
|
||||||
let textPtr = getValue(r, 'i8*');
|
const textPtr = this.Module.getValue(r, 'i8*');
|
||||||
let text = UTF8ToString(textPtr);
|
const text = this.Module.UTF8ToString(textPtr);
|
||||||
_DestroyOnlineRecognizerResult(r);
|
this.Module._DestroyOnlineRecognizerResult(r);
|
||||||
return text;
|
return text;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (typeof process == 'object' && typeof process.versions == 'object' &&
|
||||||
|
typeof process.versions.node == 'string') {
|
||||||
|
module.exports = {
|
||||||
|
createOnlineRecognizer,
|
||||||
|
OfflineRecognizer,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
// wasm/sherpa-onnx-wasm-asr-main.cc
|
// wasm/sherpa-onnx-wasm-main-asr.cc
|
||||||
//
|
//
|
||||||
// Copyright (c) 2024 Xiaomi Corporation
|
// Copyright (c) 2024 Xiaomi Corporation
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|||||||
76
wasm/nodejs/CMakeLists.txt
Normal file
76
wasm/nodejs/CMakeLists.txt
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH})
|
||||||
|
message(FATAL_ERROR "Please use ./build-wasm-simd-nodejs.sh to build for wasm NodeJS")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(exported_functions
|
||||||
|
#tts
|
||||||
|
PrintOfflineTtsConfig
|
||||||
|
SherpaOnnxCreateOfflineTts
|
||||||
|
SherpaOnnxDestroyOfflineTts
|
||||||
|
SherpaOnnxDestroyOfflineTtsGeneratedAudio
|
||||||
|
SherpaOnnxOfflineTtsGenerate
|
||||||
|
SherpaOnnxOfflineTtsGenerateWithCallback
|
||||||
|
SherpaOnnxOfflineTtsNumSpeakers
|
||||||
|
SherpaOnnxOfflineTtsSampleRate
|
||||||
|
SherpaOnnxWriteWave
|
||||||
|
# streaming asr
|
||||||
|
AcceptWaveform
|
||||||
|
CreateOnlineRecognizer
|
||||||
|
CreateOnlineStream
|
||||||
|
DecodeOnlineStream
|
||||||
|
DestroyOnlineRecognizer
|
||||||
|
DestroyOnlineRecognizerResult
|
||||||
|
DestroyOnlineStream
|
||||||
|
GetOnlineStreamResult
|
||||||
|
InputFinished
|
||||||
|
IsEndpoint
|
||||||
|
IsOnlineStreamReady
|
||||||
|
Reset
|
||||||
|
# non-streaming ASR
|
||||||
|
PrintOfflineRecognizerConfig
|
||||||
|
CreateOfflineRecognizer
|
||||||
|
DestroyOfflineRecognizer
|
||||||
|
CreateOfflineStream
|
||||||
|
DestroyOfflineStream
|
||||||
|
AcceptWaveformOffline
|
||||||
|
DecodeOfflineStream
|
||||||
|
DecodeMultipleOfflineStreams
|
||||||
|
GetOfflineStreamResult
|
||||||
|
DestroyOfflineRecognizerResult
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
set(mangled_exported_functions)
|
||||||
|
foreach(x IN LISTS exported_functions)
|
||||||
|
list(APPEND mangled_exported_functions "_${x}")
|
||||||
|
endforeach()
|
||||||
|
list(JOIN mangled_exported_functions "," all_exported_functions)
|
||||||
|
|
||||||
|
include_directories(${CMAKE_SOURCE_DIR})
|
||||||
|
set(MY_FLAGS " -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=512MB -s ALLOW_MEMORY_GROWTH=1")
|
||||||
|
string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ") # 10MB
|
||||||
|
string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ")
|
||||||
|
string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue'] ")
|
||||||
|
string(APPEND MY_FLAGS " -sNODERAWFS=1 ")
|
||||||
|
string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ")
|
||||||
|
string(APPEND MY_FLAGS " -sMODULARIZE=1 -sWASM_ASYNC_COMPILATION=0 ")
|
||||||
|
|
||||||
|
message(STATUS "MY_FLAGS: ${MY_FLAGS}")
|
||||||
|
|
||||||
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}")
|
||||||
|
set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}")
|
||||||
|
|
||||||
|
add_executable(sherpa-onnx-wasm-nodejs sherpa-onnx-wasm-nodejs.cc)
|
||||||
|
target_link_libraries(sherpa-onnx-wasm-nodejs sherpa-onnx-core sherpa-onnx-c-api)
|
||||||
|
install(TARGETS sherpa-onnx-wasm-nodejs DESTINATION bin/wasm/nodejs)
|
||||||
|
|
||||||
|
install(
|
||||||
|
FILES
|
||||||
|
${CMAKE_SOURCE_DIR}/wasm/asr/sherpa-onnx-asr.js
|
||||||
|
${CMAKE_SOURCE_DIR}/wasm/tts/sherpa-onnx-tts.js
|
||||||
|
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.js"
|
||||||
|
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.wasm"
|
||||||
|
DESTINATION
|
||||||
|
bin/wasm/nodejs
|
||||||
|
)
|
||||||
104
wasm/nodejs/sherpa-onnx-wasm-nodejs.cc
Normal file
104
wasm/nodejs/sherpa-onnx-wasm-nodejs.cc
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
// wasm/sherpa-onnx-wasm-main-nodejs.cc
|
||||||
|
//
|
||||||
|
// Copyright (c) 2024 Xiaomi Corporation
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "sherpa-onnx/c-api/c-api.h"
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
|
||||||
|
static_assert(sizeof(SherpaOnnxOfflineTransducerModelConfig) == 3 * 4, "");
|
||||||
|
static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, "");
|
||||||
|
|
||||||
|
static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, "");
|
||||||
|
static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 2 * 4, "");
|
||||||
|
static_assert(sizeof(SherpaOnnxOfflineTdnnModelConfig) == 4, "");
|
||||||
|
static_assert(sizeof(SherpaOnnxOfflineLMConfig) == 2 * 4, "");
|
||||||
|
|
||||||
|
static_assert(sizeof(SherpaOnnxOfflineModelConfig) ==
|
||||||
|
sizeof(SherpaOnnxOfflineTransducerModelConfig) +
|
||||||
|
sizeof(SherpaOnnxOfflineParaformerModelConfig) +
|
||||||
|
sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) +
|
||||||
|
sizeof(SherpaOnnxOfflineWhisperModelConfig) +
|
||||||
|
sizeof(SherpaOnnxOfflineTdnnModelConfig) + 5 * 4,
|
||||||
|
"");
|
||||||
|
static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
|
||||||
|
static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) ==
|
||||||
|
sizeof(SherpaOnnxFeatureConfig) +
|
||||||
|
sizeof(SherpaOnnxOfflineLMConfig) +
|
||||||
|
sizeof(SherpaOnnxOfflineModelConfig) + 4 * 4,
|
||||||
|
"");
|
||||||
|
|
||||||
|
void PrintOfflineTtsConfig(SherpaOnnxOfflineTtsConfig *tts_config) {
|
||||||
|
auto tts_model_config = &tts_config->model;
|
||||||
|
auto vits_model_config = &tts_model_config->vits;
|
||||||
|
fprintf(stdout, "----------vits model config----------\n");
|
||||||
|
fprintf(stdout, "model: %s\n", vits_model_config->model);
|
||||||
|
fprintf(stdout, "lexicon: %s\n", vits_model_config->lexicon);
|
||||||
|
fprintf(stdout, "tokens: %s\n", vits_model_config->tokens);
|
||||||
|
fprintf(stdout, "data_dir: %s\n", vits_model_config->data_dir);
|
||||||
|
fprintf(stdout, "noise scale: %.3f\n", vits_model_config->noise_scale);
|
||||||
|
fprintf(stdout, "noise scale w: %.3f\n", vits_model_config->noise_scale_w);
|
||||||
|
fprintf(stdout, "length scale: %.3f\n", vits_model_config->length_scale);
|
||||||
|
|
||||||
|
fprintf(stdout, "----------tts model config----------\n");
|
||||||
|
fprintf(stdout, "num threads: %d\n", tts_model_config->num_threads);
|
||||||
|
fprintf(stdout, "debug: %d\n", tts_model_config->debug);
|
||||||
|
fprintf(stdout, "provider: %s\n", tts_model_config->provider);
|
||||||
|
|
||||||
|
fprintf(stdout, "----------tts config----------\n");
|
||||||
|
fprintf(stdout, "rule_fsts: %s\n", tts_config->rule_fsts);
|
||||||
|
fprintf(stdout, "max num sentences: %d\n", tts_config->max_num_sentences);
|
||||||
|
}
|
||||||
|
|
||||||
|
void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
|
||||||
|
auto model_config = &config->model_config;
|
||||||
|
auto feat = &config->feat_config;
|
||||||
|
auto transducer = &model_config->transducer;
|
||||||
|
auto paraformer = &model_config->paraformer;
|
||||||
|
auto nemo_ctc = &model_config->nemo_ctc;
|
||||||
|
auto whisper = &model_config->whisper;
|
||||||
|
auto tdnn = &model_config->tdnn;
|
||||||
|
|
||||||
|
fprintf(stdout, "----------offline transducer model config----------\n");
|
||||||
|
fprintf(stdout, "encoder: %s\n", transducer->encoder);
|
||||||
|
fprintf(stdout, "decoder: %s\n", transducer->decoder);
|
||||||
|
fprintf(stdout, "joiner: %s\n", transducer->joiner);
|
||||||
|
|
||||||
|
fprintf(stdout, "----------offline paraformer model config----------\n");
|
||||||
|
fprintf(stdout, "model: %s\n", paraformer->model);
|
||||||
|
|
||||||
|
fprintf(stdout, "----------offline nemo_ctc model config----------\n");
|
||||||
|
fprintf(stdout, "model: %s\n", nemo_ctc->model);
|
||||||
|
|
||||||
|
fprintf(stdout, "----------offline whisper model config----------\n");
|
||||||
|
fprintf(stdout, "encoder: %s\n", whisper->encoder);
|
||||||
|
fprintf(stdout, "decoder: %s\n", whisper->decoder);
|
||||||
|
|
||||||
|
fprintf(stdout, "----------offline tdnn model config----------\n");
|
||||||
|
fprintf(stdout, "model: %s\n", tdnn->model);
|
||||||
|
|
||||||
|
fprintf(stdout, "tokens: %s\n", model_config->tokens);
|
||||||
|
fprintf(stdout, "num_threads: %d\n", model_config->num_threads);
|
||||||
|
fprintf(stdout, "provider: %s\n", model_config->provider);
|
||||||
|
fprintf(stdout, "debug: %d\n", model_config->debug);
|
||||||
|
fprintf(stdout, "model type: %s\n", model_config->model_type);
|
||||||
|
|
||||||
|
fprintf(stdout, "----------feat config----------\n");
|
||||||
|
fprintf(stdout, "sample rate: %d\n", feat->sample_rate);
|
||||||
|
fprintf(stdout, "feat dim: %d\n", feat->feature_dim);
|
||||||
|
|
||||||
|
fprintf(stdout, "----------recognizer config----------\n");
|
||||||
|
fprintf(stdout, "decoding method: %s\n", config->decoding_method);
|
||||||
|
fprintf(stdout, "max active paths: %d\n", config->max_active_paths);
|
||||||
|
fprintf(stdout, "hotwords_file: %s\n", config->hotwords_file);
|
||||||
|
fprintf(stdout, "hotwords_score: %.2f\n", config->hotwords_score);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CopyHeap(const char *src, int32_t num_bytes, char *dst) {
|
||||||
|
std::copy(src, src + num_bytes, dst);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -22,7 +22,7 @@ Module.onRuntimeInitialized = function() {
|
|||||||
console.log('Model files downloaded!');
|
console.log('Model files downloaded!');
|
||||||
|
|
||||||
console.log('Initializing tts ......');
|
console.log('Initializing tts ......');
|
||||||
tts = initSherpaOnnxOfflineTts()
|
tts = createOfflineTts(Module)
|
||||||
if (tts.numSpeakers > 1) {
|
if (tts.numSpeakers > 1) {
|
||||||
speakerIdLabel.innerHTML = `Speaker ID (0 - ${tts.numSpeakers - 1}):`;
|
speakerIdLabel.innerHTML = `Speaker ID (0 - ${tts.numSpeakers - 1}):`;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,109 +1,109 @@
|
|||||||
|
|
||||||
function freeConfig(config) {
|
function freeConfig(config, Module) {
|
||||||
if ('buffer' in config) {
|
if ('buffer' in config) {
|
||||||
_free(config.buffer);
|
Module._free(config.buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ('config' in config) {
|
if ('config' in config) {
|
||||||
freeConfig(config.config)
|
freeConfig(config.config, Module)
|
||||||
}
|
}
|
||||||
|
|
||||||
_free(config.ptr);
|
Module._free(config.ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
// The user should free the returned pointers
|
// The user should free the returned pointers
|
||||||
function initSherpaOnnxOfflineTtsVitsModelConfig(config) {
|
function initSherpaOnnxOfflineTtsVitsModelConfig(config, Module) {
|
||||||
let modelLen = lengthBytesUTF8(config.model) + 1;
|
const modelLen = Module.lengthBytesUTF8(config.model) + 1;
|
||||||
let lexiconLen = lengthBytesUTF8(config.lexicon) + 1;
|
const lexiconLen = Module.lengthBytesUTF8(config.lexicon) + 1;
|
||||||
let tokensLen = lengthBytesUTF8(config.tokens) + 1;
|
const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1;
|
||||||
let dataDirLen = lengthBytesUTF8(config.dataDir) + 1;
|
const dataDirLen = Module.lengthBytesUTF8(config.dataDir) + 1;
|
||||||
|
|
||||||
let n = modelLen + lexiconLen + tokensLen + dataDirLen;
|
const n = modelLen + lexiconLen + tokensLen + dataDirLen;
|
||||||
|
|
||||||
let buffer = _malloc(n);
|
const buffer = Module._malloc(n);
|
||||||
|
|
||||||
let len = 7 * 4;
|
const len = 7 * 4;
|
||||||
let ptr = _malloc(len);
|
const ptr = Module._malloc(len);
|
||||||
|
|
||||||
let offset = 0;
|
let offset = 0;
|
||||||
stringToUTF8(config.model, buffer + offset, modelLen);
|
Module.stringToUTF8(config.model, buffer + offset, modelLen);
|
||||||
offset += modelLen;
|
offset += modelLen;
|
||||||
|
|
||||||
stringToUTF8(config.lexicon, buffer + offset, lexiconLen);
|
Module.stringToUTF8(config.lexicon, buffer + offset, lexiconLen);
|
||||||
offset += lexiconLen;
|
offset += lexiconLen;
|
||||||
|
|
||||||
stringToUTF8(config.tokens, buffer + offset, tokensLen);
|
Module.stringToUTF8(config.tokens, buffer + offset, tokensLen);
|
||||||
offset += tokensLen;
|
offset += tokensLen;
|
||||||
|
|
||||||
stringToUTF8(config.dataDir, buffer + offset, dataDirLen);
|
Module.stringToUTF8(config.dataDir, buffer + offset, dataDirLen);
|
||||||
offset += dataDirLen;
|
offset += dataDirLen;
|
||||||
|
|
||||||
offset = 0;
|
offset = 0;
|
||||||
setValue(ptr, buffer + offset, 'i8*');
|
Module.setValue(ptr, buffer + offset, 'i8*');
|
||||||
offset += modelLen;
|
offset += modelLen;
|
||||||
|
|
||||||
setValue(ptr + 4, buffer + offset, 'i8*');
|
Module.setValue(ptr + 4, buffer + offset, 'i8*');
|
||||||
offset += lexiconLen;
|
offset += lexiconLen;
|
||||||
|
|
||||||
setValue(ptr + 8, buffer + offset, 'i8*');
|
Module.setValue(ptr + 8, buffer + offset, 'i8*');
|
||||||
offset += tokensLen;
|
offset += tokensLen;
|
||||||
|
|
||||||
setValue(ptr + 12, buffer + offset, 'i8*');
|
Module.setValue(ptr + 12, buffer + offset, 'i8*');
|
||||||
offset += dataDirLen;
|
offset += dataDirLen;
|
||||||
|
|
||||||
setValue(ptr + 16, config.noiseScale, 'float');
|
Module.setValue(ptr + 16, config.noiseScale, 'float');
|
||||||
setValue(ptr + 20, config.noiseScaleW, 'float');
|
Module.setValue(ptr + 20, config.noiseScaleW, 'float');
|
||||||
setValue(ptr + 24, config.lengthScale, 'float');
|
Module.setValue(ptr + 24, config.lengthScale, 'float');
|
||||||
|
|
||||||
return {
|
return {
|
||||||
buffer: buffer, ptr: ptr, len: len,
|
buffer: buffer, ptr: ptr, len: len,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function initSherpaOnnxOfflineTtsModelConfig(config) {
|
function initSherpaOnnxOfflineTtsModelConfig(config, Module) {
|
||||||
let vitsModelConfig =
|
const vitsModelConfig = initSherpaOnnxOfflineTtsVitsModelConfig(
|
||||||
initSherpaOnnxOfflineTtsVitsModelConfig(config.offlineTtsVitsModelConfig);
|
config.offlineTtsVitsModelConfig, Module);
|
||||||
|
|
||||||
let len = vitsModelConfig.len + 3 * 4;
|
const len = vitsModelConfig.len + 3 * 4;
|
||||||
let ptr = _malloc(len);
|
const ptr = Module._malloc(len);
|
||||||
|
|
||||||
let offset = 0;
|
let offset = 0;
|
||||||
_CopyHeap(vitsModelConfig.ptr, vitsModelConfig.len, ptr + offset);
|
Module._CopyHeap(vitsModelConfig.ptr, vitsModelConfig.len, ptr + offset);
|
||||||
offset += vitsModelConfig.len;
|
offset += vitsModelConfig.len;
|
||||||
|
|
||||||
setValue(ptr + offset, config.numThreads, 'i32');
|
Module.setValue(ptr + offset, config.numThreads, 'i32');
|
||||||
offset += 4;
|
offset += 4;
|
||||||
|
|
||||||
setValue(ptr + offset, config.debug, 'i32');
|
Module.setValue(ptr + offset, config.debug, 'i32');
|
||||||
offset += 4;
|
offset += 4;
|
||||||
|
|
||||||
let providerLen = lengthBytesUTF8(config.provider) + 1;
|
const providerLen = Module.lengthBytesUTF8(config.provider) + 1;
|
||||||
let buffer = _malloc(providerLen);
|
const buffer = Module._malloc(providerLen);
|
||||||
stringToUTF8(config.provider, buffer, providerLen);
|
Module.stringToUTF8(config.provider, buffer, providerLen);
|
||||||
setValue(ptr + offset, buffer, 'i8*');
|
Module.setValue(ptr + offset, buffer, 'i8*');
|
||||||
|
|
||||||
return {
|
return {
|
||||||
buffer: buffer, ptr: ptr, len: len, config: vitsModelConfig,
|
buffer: buffer, ptr: ptr, len: len, config: vitsModelConfig,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function initSherpaOnnxOfflineTtsConfig(config) {
|
function initSherpaOnnxOfflineTtsConfig(config, Module) {
|
||||||
let modelConfig =
|
const modelConfig =
|
||||||
initSherpaOnnxOfflineTtsModelConfig(config.offlineTtsModelConfig);
|
initSherpaOnnxOfflineTtsModelConfig(config.offlineTtsModelConfig, Module);
|
||||||
let len = modelConfig.len + 2 * 4;
|
const len = modelConfig.len + 2 * 4;
|
||||||
let ptr = _malloc(len);
|
const ptr = Module._malloc(len);
|
||||||
|
|
||||||
let offset = 0;
|
let offset = 0;
|
||||||
_CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset);
|
Module._CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset);
|
||||||
offset += modelConfig.len;
|
offset += modelConfig.len;
|
||||||
|
|
||||||
let ruleFstsLen = lengthBytesUTF8(config.ruleFsts) + 1;
|
const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts) + 1;
|
||||||
let buffer = _malloc(ruleFstsLen);
|
const buffer = Module._malloc(ruleFstsLen);
|
||||||
stringToUTF8(config.ruleFsts, buffer, ruleFstsLen);
|
Module.stringToUTF8(config.ruleFsts, buffer, ruleFstsLen);
|
||||||
setValue(ptr + offset, buffer, 'i8*');
|
Module.setValue(ptr + offset, buffer, 'i8*');
|
||||||
offset += 4;
|
offset += 4;
|
||||||
|
|
||||||
setValue(ptr + offset, config.maxNumSentences, 'i32');
|
Module.setValue(ptr + offset, config.maxNumSentences, 'i32');
|
||||||
|
|
||||||
return {
|
return {
|
||||||
buffer: buffer, ptr: ptr, len: len, config: modelConfig,
|
buffer: buffer, ptr: ptr, len: len, config: modelConfig,
|
||||||
@@ -111,19 +111,21 @@ function initSherpaOnnxOfflineTtsConfig(config) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
class OfflineTts {
|
class OfflineTts {
|
||||||
constructor(configObj) {
|
constructor(configObj, Module) {
|
||||||
let config = initSherpaOnnxOfflineTtsConfig(configObj)
|
console.log(configObj)
|
||||||
let handle = _SherpaOnnxCreateOfflineTts(config.ptr);
|
const config = initSherpaOnnxOfflineTtsConfig(configObj, Module)
|
||||||
|
const handle = Module._SherpaOnnxCreateOfflineTts(config.ptr);
|
||||||
|
|
||||||
freeConfig(config);
|
freeConfig(config, Module);
|
||||||
|
|
||||||
this.handle = handle;
|
this.handle = handle;
|
||||||
this.sampleRate = _SherpaOnnxOfflineTtsSampleRate(this.handle);
|
this.sampleRate = Module._SherpaOnnxOfflineTtsSampleRate(this.handle);
|
||||||
this.numSpeakers = _SherpaOnnxOfflineTtsNumSpeakers(this.handle);
|
this.numSpeakers = Module._SherpaOnnxOfflineTtsNumSpeakers(this.handle);
|
||||||
|
this.Module = Module
|
||||||
}
|
}
|
||||||
|
|
||||||
free() {
|
free() {
|
||||||
_SherpaOnnxDestroyOfflineTts(this.handle);
|
this.Module._SherpaOnnxDestroyOfflineTts(this.handle);
|
||||||
this.handle = 0
|
this.handle = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -133,29 +135,44 @@ class OfflineTts {
|
|||||||
// speed: 1.0
|
// speed: 1.0
|
||||||
// }
|
// }
|
||||||
generate(config) {
|
generate(config) {
|
||||||
let textLen = lengthBytesUTF8(config.text) + 1;
|
const textLen = this.Module.lengthBytesUTF8(config.text) + 1;
|
||||||
let textPtr = _malloc(textLen);
|
const textPtr = this.Module._malloc(textLen);
|
||||||
stringToUTF8(config.text, textPtr, textLen);
|
this.Module.stringToUTF8(config.text, textPtr, textLen);
|
||||||
|
|
||||||
let h = _SherpaOnnxOfflineTtsGenerate(
|
const h = this.Module._SherpaOnnxOfflineTtsGenerate(
|
||||||
this.handle, textPtr, config.sid, config.speed);
|
this.handle, textPtr, config.sid, config.speed);
|
||||||
|
|
||||||
let numSamples = HEAP32[h / 4 + 1];
|
const numSamples = this.Module.HEAP32[h / 4 + 1];
|
||||||
let sampleRate = HEAP32[h / 4 + 2];
|
const sampleRate = this.Module.HEAP32[h / 4 + 2];
|
||||||
|
|
||||||
let samplesPtr = HEAP32[h / 4] / 4;
|
const samplesPtr = this.Module.HEAP32[h / 4] / 4;
|
||||||
let samples = new Float32Array(numSamples);
|
const samples = new Float32Array(numSamples);
|
||||||
for (let i = 0; i < numSamples; i++) {
|
for (let i = 0; i < numSamples; i++) {
|
||||||
samples[i] = HEAPF32[samplesPtr + i];
|
samples[i] = this.Module.HEAPF32[samplesPtr + i];
|
||||||
}
|
}
|
||||||
|
|
||||||
_SherpaOnnxDestroyOfflineTtsGeneratedAudio(h);
|
this.Module._SherpaOnnxDestroyOfflineTtsGeneratedAudio(h);
|
||||||
return {samples: samples, sampleRate: sampleRate};
|
return {samples: samples, sampleRate: sampleRate};
|
||||||
}
|
}
|
||||||
|
save(filename, audio) {
|
||||||
|
const samples = audio.samples;
|
||||||
|
const sampleRate = audio.sampleRate;
|
||||||
|
const ptr = this.Module._malloc(samples.length * 4);
|
||||||
|
for (let i = 0; i < samples.length; i++) {
|
||||||
|
this.Module.HEAPF32[ptr / 4 + i] = samples[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
const filenameLen = this.Module.lengthBytesUTF8(filename) + 1;
|
||||||
|
const buffer = this.Module._malloc(filenameLen);
|
||||||
|
this.Module.stringToUTF8(filename, buffer, filenameLen);
|
||||||
|
this.Module._SherpaOnnxWriteWave(ptr, samples.length, sampleRate, buffer);
|
||||||
|
this.Module._free(buffer);
|
||||||
|
this.Module._free(ptr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function initSherpaOnnxOfflineTts() {
|
function createOfflineTts(Module, myConfig) {
|
||||||
let offlineTtsVitsModelConfig = {
|
const offlineTtsVitsModelConfig = {
|
||||||
model: './model.onnx',
|
model: './model.onnx',
|
||||||
lexicon: '',
|
lexicon: '',
|
||||||
tokens: './tokens.txt',
|
tokens: './tokens.txt',
|
||||||
@@ -164,7 +181,7 @@ function initSherpaOnnxOfflineTts() {
|
|||||||
noiseScaleW: 0.8,
|
noiseScaleW: 0.8,
|
||||||
lengthScale: 1.0,
|
lengthScale: 1.0,
|
||||||
};
|
};
|
||||||
let offlineTtsModelConfig = {
|
const offlineTtsModelConfig = {
|
||||||
offlineTtsVitsModelConfig: offlineTtsVitsModelConfig,
|
offlineTtsVitsModelConfig: offlineTtsVitsModelConfig,
|
||||||
numThreads: 1,
|
numThreads: 1,
|
||||||
debug: 1,
|
debug: 1,
|
||||||
@@ -176,5 +193,16 @@ function initSherpaOnnxOfflineTts() {
|
|||||||
maxNumSentences: 1,
|
maxNumSentences: 1,
|
||||||
}
|
}
|
||||||
|
|
||||||
return new OfflineTts(offlineTtsConfig);
|
if (myConfig) {
|
||||||
|
offlineTtsConfig = myConfig;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new OfflineTts(offlineTtsConfig, Module);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (typeof process == 'object' && typeof process.versions == 'object' &&
|
||||||
|
typeof process.versions.node == 'string') {
|
||||||
|
module.exports = {
|
||||||
|
createOfflineTts,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
// wasm/sherpa-onnx-wasm-main.cc
|
// wasm/sherpa-onnx-wasm-main-tts.cc
|
||||||
//
|
//
|
||||||
// Copyright (c) 2024 Xiaomi Corporation
|
// Copyright (c) 2024 Xiaomi Corporation
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|||||||
Reference in New Issue
Block a user