support nodejs (#438)
This commit is contained in:
62
.github/scripts/test-nodejs-npm.sh
vendored
Executable file
62
.github/scripts/test-nodejs-npm.sh
vendored
Executable file
@@ -0,0 +1,62 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
echo "dir: $d"
|
||||||
|
cd $d
|
||||||
|
npm install
|
||||||
|
git status
|
||||||
|
ls -lh
|
||||||
|
ls -lh node_modules
|
||||||
|
|
||||||
|
# offline asr
|
||||||
|
|
||||||
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2
|
||||||
|
rm sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2
|
||||||
|
node ./test-offline-nemo-ctc.js
|
||||||
|
rm -rf sherpa-onnx-nemo-ctc-en-conformer-small
|
||||||
|
|
||||||
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
|
||||||
|
rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
|
||||||
|
node ./test-offline-paraformer.js
|
||||||
|
rm -rf sherpa-onnx-paraformer-zh-2023-03-28
|
||||||
|
|
||||||
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-06-26.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-zipformer-en-2023-06-26.tar.bz2
|
||||||
|
rm sherpa-onnx-zipformer-en-2023-06-26.tar.bz2
|
||||||
|
node ./test-offline-transducer.js
|
||||||
|
rm -rf sherpa-onnx-zipformer-en-2023-06-26
|
||||||
|
|
||||||
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||||
|
rm sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||||
|
node ./test-offline-whisper.js
|
||||||
|
rm -rf sherpa-onnx-whisper-tiny.en
|
||||||
|
|
||||||
|
# online asr
|
||||||
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
|
||||||
|
rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
|
||||||
|
node ./test-online-paraformer.js
|
||||||
|
rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en
|
||||||
|
|
||||||
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
|
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
|
node ./test-online-transducer.js
|
||||||
|
rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
|
||||||
|
|
||||||
|
# offline tts
|
||||||
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-vctk.tar.bz2
|
||||||
|
tar xvf vits-vctk.tar.bz2
|
||||||
|
rm vits-vctk.tar.bz2
|
||||||
|
node ./test-offline-tts-en.js
|
||||||
|
rm -rf vits-vctk
|
||||||
|
|
||||||
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
|
||||||
|
tar xvf vits-zh-aishell3.tar.bz2
|
||||||
|
rm vits-zh-aishell3.tar.bz2
|
||||||
|
node ./test-offline-tts-zh.js
|
||||||
|
rm -rf vits-zh-aishell3
|
||||||
1
.github/workflows/dot-net.yaml
vendored
1
.github/workflows/dot-net.yaml
vendored
@@ -4,6 +4,7 @@ on:
|
|||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- dot-net
|
- dot-net
|
||||||
|
- fix-dot-net
|
||||||
tags:
|
tags:
|
||||||
- '*'
|
- '*'
|
||||||
|
|
||||||
|
|||||||
58
.github/workflows/npm.yaml
vendored
Normal file
58
.github/workflows/npm.yaml
vendored
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
name: npm
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: npm-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
nodejs:
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
os: [ubuntu-latest]
|
||||||
|
python-version: ["3.8"]
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Setup Python ${{ matrix.python-version }}
|
||||||
|
uses: actions/setup-python@v2
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.python-version }}
|
||||||
|
|
||||||
|
- uses: actions/setup-node@v3
|
||||||
|
with:
|
||||||
|
node-version: 13
|
||||||
|
registry-url: 'https://registry.npmjs.org'
|
||||||
|
|
||||||
|
- name: Display node version
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
node --version
|
||||||
|
npm --version
|
||||||
|
cd nodejs-examples
|
||||||
|
|
||||||
|
npm install npm@6.14.4 -g
|
||||||
|
npm install npm@6.14.4
|
||||||
|
npm --version
|
||||||
|
|
||||||
|
- name: Build nodejs package
|
||||||
|
shell: bash
|
||||||
|
env:
|
||||||
|
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
||||||
|
run: |
|
||||||
|
cd scripts/nodejs
|
||||||
|
./run.sh
|
||||||
|
npm install
|
||||||
|
rm run.sh
|
||||||
|
npm ci
|
||||||
|
npm publish --provenance --access public
|
||||||
59
.github/workflows/test-nodejs-npm.yaml
vendored
Normal file
59
.github/workflows/test-nodejs-npm.yaml
vendored
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
name: test-nodejs-npm
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
schedule:
|
||||||
|
# minute (0-59)
|
||||||
|
# hour (0-23)
|
||||||
|
# day of the month (1-31)
|
||||||
|
# month (1-12)
|
||||||
|
# day of the week (0-6)
|
||||||
|
# nightly build at 23:50 UTC time every day
|
||||||
|
- cron: "50 23 * * *"
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: test-nodejs-npm-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test-nodejs-npm:
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
os: [ubuntu-latest, macos-latest] #, windows-latest]
|
||||||
|
python-version: ["3.8"]
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Setup Python ${{ matrix.python-version }}
|
||||||
|
uses: actions/setup-python@v2
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.python-version }}
|
||||||
|
|
||||||
|
- uses: actions/setup-node@v3
|
||||||
|
with:
|
||||||
|
node-version: 13
|
||||||
|
registry-url: 'https://registry.npmjs.org'
|
||||||
|
|
||||||
|
- name: Display node version
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
node --version
|
||||||
|
npm --version
|
||||||
|
|
||||||
|
- name: Run tests
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
node --version
|
||||||
|
npm --version
|
||||||
|
|
||||||
|
export d=nodejs-examples
|
||||||
|
./.github/scripts/test-nodejs-npm.sh
|
||||||
108
.github/workflows/test-nodejs.yaml
vendored
Normal file
108
.github/workflows/test-nodejs.yaml
vendored
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
name: test-nodejs
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: test-nodejs-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test-nodejs:
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
os: [ubuntu-latest, macos-latest] #, windows-latest]
|
||||||
|
python-version: ["3.8"]
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: ccache
|
||||||
|
uses: hendrikmuhs/ccache-action@v1.2
|
||||||
|
with:
|
||||||
|
key: ${{ matrix.os }}-Release-ON
|
||||||
|
|
||||||
|
- name: Configure CMake
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
export CMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||||
|
export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
|
||||||
|
cmake --version
|
||||||
|
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake -D CMAKE_BUILD_TYPE=Release -D BUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=./install ..
|
||||||
|
make -j2
|
||||||
|
make install
|
||||||
|
ls -lh install/lib
|
||||||
|
|
||||||
|
- name: Setup Python ${{ matrix.python-version }}
|
||||||
|
uses: actions/setup-python@v2
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.python-version }}
|
||||||
|
|
||||||
|
- name: Copy files
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
os=${{ matrix.os }}
|
||||||
|
if [[ $os == 'ubuntu-latest' ]]; then
|
||||||
|
mkdir -p scripts/nodejs/lib/linux-x64
|
||||||
|
dst=scripts/nodejs/lib/linux-x64
|
||||||
|
elif [[ $os == 'macos-latest' ]]; then
|
||||||
|
mkdir -p scripts/nodejs/lib/osx-x64
|
||||||
|
dst=scripts/nodejs/lib/osx-x64
|
||||||
|
fi
|
||||||
|
cp -v build/install/lib/* $dst/
|
||||||
|
|
||||||
|
- name: replace files
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
cd nodejs-examples
|
||||||
|
files=$(ls -1 *.js)
|
||||||
|
for f in ${files[@]}; do
|
||||||
|
echo $f
|
||||||
|
sed -i.bak s%\'sherpa-onnx\'%\'./index.js\'% $f
|
||||||
|
git status
|
||||||
|
done
|
||||||
|
git diff
|
||||||
|
cp *.js ../scripts/nodejs
|
||||||
|
|
||||||
|
- uses: actions/setup-node@v3
|
||||||
|
with:
|
||||||
|
node-version: 13
|
||||||
|
registry-url: 'https://registry.npmjs.org'
|
||||||
|
|
||||||
|
- name: Display node version
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
node --version
|
||||||
|
npm --version
|
||||||
|
|
||||||
|
- name: Run tests
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
node --version
|
||||||
|
npm --version
|
||||||
|
export d=scripts/nodejs
|
||||||
|
|
||||||
|
pushd $d
|
||||||
|
npm install
|
||||||
|
npm install wav
|
||||||
|
popd
|
||||||
|
|
||||||
|
./.github/scripts/test-nodejs-npm.sh
|
||||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -67,3 +67,6 @@ swift-api-examples/k2fsa-*
|
|||||||
run-*.sh
|
run-*.sh
|
||||||
two-pass-*.sh
|
two-pass-*.sh
|
||||||
build-*
|
build-*
|
||||||
|
vits-vctk
|
||||||
|
vits-zh-aishell3
|
||||||
|
jslint.mjs
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
|
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
|
||||||
project(sherpa-onnx)
|
project(sherpa-onnx)
|
||||||
|
|
||||||
set(SHERPA_ONNX_VERSION "1.8.10")
|
set(SHERPA_ONNX_VERSION "1.8.11")
|
||||||
|
|
||||||
# Disable warning about
|
# Disable warning about
|
||||||
#
|
#
|
||||||
|
|||||||
2
nodejs-examples/.gitignore
vendored
Normal file
2
nodejs-examples/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
node_modules
|
||||||
|
package-lock.json
|
||||||
247
nodejs-examples/README.md
Normal file
247
nodejs-examples/README.md
Normal file
@@ -0,0 +1,247 @@
|
|||||||
|
# Introduction
|
||||||
|
|
||||||
|
This directory contains nodejs examples for [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx).
|
||||||
|
|
||||||
|
Before you continue, please first install the npm package `sherpa-onnx` by
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm install sherpa-onnx
|
||||||
|
```
|
||||||
|
|
||||||
|
In the following, we describe how to use [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx)
|
||||||
|
for text-to-speech and speech-to-text.
|
||||||
|
|
||||||
|
**Caution**: If you get the following error:
|
||||||
|
```
|
||||||
|
/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/ffi-napi/lib/dynamic_library.js:67
|
||||||
|
if (match = err.match(/^(([^ \t()])+\.so([^ \t:()])*):([ \t])*/)) {
|
||||||
|
^
|
||||||
|
|
||||||
|
TypeError: Cannot read properties of null (reading 'match')
|
||||||
|
at new DynamicLibrary (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/ffi-napi/lib/dynamic_library.js:67:21)
|
||||||
|
at Object.Library (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/ffi-napi/lib/library.js:47:10)
|
||||||
|
at Object.<anonymous> (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/sherpa-onnx3/index.js:268:28)
|
||||||
|
at Module._compile (node:internal/modules/cjs/loader:1376:14)
|
||||||
|
at Module._extensions..js (node:internal/modules/cjs/loader:1435:10)
|
||||||
|
at Module.load (node:internal/modules/cjs/loader:1207:32)
|
||||||
|
at Module._load (node:internal/modules/cjs/loader:1023:12)
|
||||||
|
at Module.require (node:internal/modules/cjs/loader:1235:19)
|
||||||
|
at require (node:internal/modules/helpers:176:18)
|
||||||
|
at Object.<anonymous> (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/test-offline-tts-zh.js:3:21)
|
||||||
|
```
|
||||||
|
|
||||||
|
Please downgrade your node to version v13.14.0. See also
|
||||||
|
https://github.com/node-ffi-napi/node-ffi-napi/issues/244
|
||||||
|
and
|
||||||
|
https://github.com/node-ffi-napi/node-ffi-napi/issues/97 .
|
||||||
|
|
||||||
|
# Text-to-speech
|
||||||
|
|
||||||
|
In the following, we demonstrate how to run text-to-speech.
|
||||||
|
|
||||||
|
## ./test-offline-tts-en.js
|
||||||
|
|
||||||
|
[./test-offline-tts-en.js](./test-offline-tts-en.js) shows how to use
|
||||||
|
a VITS pretrained model
|
||||||
|
[VCTK](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vctk-english-multi-speaker-109-speakers)
|
||||||
|
for text-to-speech.
|
||||||
|
|
||||||
|
You can use the following command to run it:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-vctk.tar.bz2
|
||||||
|
tar xvf vits-vctk.tar.bz2
|
||||||
|
node ./test-offline-tts-en.js
|
||||||
|
```
|
||||||
|
|
||||||
|
## ./test-offline-tts-zh.js
|
||||||
|
|
||||||
|
[./test-offline-tts-zh.js](./test-offline-tts-zh.js) shows how to use
|
||||||
|
a VITS pretrained model
|
||||||
|
[aishell3](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-model-aishell3)
|
||||||
|
for text-to-speech.
|
||||||
|
|
||||||
|
You can use the following command to run it:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
|
||||||
|
tar xvf vits-zh-aishell3.tar.bz2
|
||||||
|
node ./test-offline-tts-zh.js
|
||||||
|
```
|
||||||
|
|
||||||
|
# Speech-to-text
|
||||||
|
|
||||||
|
In the following, we demonstrate how to decode files and how to perform
|
||||||
|
speech recognition with a microphone with `nodejs`. We need to install two additional
|
||||||
|
npm packages:
|
||||||
|
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm install wav naudiodon2
|
||||||
|
```
|
||||||
|
|
||||||
|
## ./test-offline-nemo-ctc.js
|
||||||
|
|
||||||
|
[./test-offline-nemo-ctc.js](./test-offline-nemo-ctc.js) demonstrates
|
||||||
|
how to decode a file with a NeMo CTC model. In the code we use
|
||||||
|
[stt_en_conformer_ctc_small](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/nemo/english.html#stt-en-conformer-ctc-small).
|
||||||
|
|
||||||
|
You can use the following command run it:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2
|
||||||
|
node ./test-offline-nemo-ctc.js
|
||||||
|
```
|
||||||
|
|
||||||
|
## ./test-offline-paraformer.js
|
||||||
|
|
||||||
|
[./test-offline-paraformer.js](./test-offline-paraformer.js) demonstrates
|
||||||
|
how to decode a file with a non-streaming Paraformer model. In the code we use
|
||||||
|
[sherpa-onnx-paraformer-zh-2023-03-28](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-03-28-chinese).
|
||||||
|
|
||||||
|
You can use the following command run it:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
|
||||||
|
node ./test-offline-paraformer.js
|
||||||
|
```
|
||||||
|
|
||||||
|
## ./test-offline-transducer.js
|
||||||
|
|
||||||
|
[./test-offline-transducer.js](./test-offline-transducer.js) demonstrates
|
||||||
|
how to decode a file with a non-streaming transducer model. In the code we use
|
||||||
|
[sherpa-onnx-zipformer-en-2023-06-26](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-zipformer-en-2023-06-26-english).
|
||||||
|
|
||||||
|
You can use the following command run it:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-06-26.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-zipformer-en-2023-06-26.tar.bz2
|
||||||
|
node ./test-offline-transducer.js
|
||||||
|
```
|
||||||
|
|
||||||
|
## ./test-offline-whisper.js
|
||||||
|
[./test-offline-whisper.js](./test-offline-whisper.js) demonstrates
|
||||||
|
how to decode a file with a Whisper model. In the code we use
|
||||||
|
[sherpa-onnx-whisper-tiny.en](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html).
|
||||||
|
|
||||||
|
You can use the following command run it:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||||
|
node ./test-offline-whisper.js
|
||||||
|
```
|
||||||
|
|
||||||
|
## ./test-online-paraformer-microphone.js
|
||||||
|
[./test-online-paraformer-microphone.js](./test-online-paraformer-microphone.js)
|
||||||
|
demonstrates how to do real-time speech recognition from microphone
|
||||||
|
with a streaming Paraformer model. In the code we use
|
||||||
|
[sherpa-onnx-streaming-paraformer-bilingual-zh-en](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english).
|
||||||
|
|
||||||
|
You can use the following command run it:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
|
||||||
|
rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
|
||||||
|
node ./test-online-paraformer-microphone.js
|
||||||
|
```
|
||||||
|
|
||||||
|
## ./test-online-paraformer.js
|
||||||
|
[./test-online-paraformer.js](./test-online-paraformer.js) demonstrates
|
||||||
|
how to decode a file using a streaming Paraformer model. In the code we use
|
||||||
|
[sherpa-onnx-streaming-paraformer-bilingual-zh-en](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english).
|
||||||
|
|
||||||
|
You can use the following command run it:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
|
||||||
|
rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
|
||||||
|
node ./test-online-paraformer.js
|
||||||
|
```
|
||||||
|
|
||||||
|
## ./test-online-transducer-microphone.js
|
||||||
|
[./test-online-transducer-microphone.js](./test-online-transducer-microphone.js)
|
||||||
|
demonstrates how to do real-time speech recognition with microphone using a streaming transducer model. In the code
|
||||||
|
we use [sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english).
|
||||||
|
|
||||||
|
|
||||||
|
You can use the following command run it:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
|
node ./test-online-transducer-microphone.js
|
||||||
|
```
|
||||||
|
|
||||||
|
## ./test-online-transducer.js
|
||||||
|
[./test-online-transducer.js](./test-online-transducer.js) demonstrates
|
||||||
|
how to decode a file using a streaming transducer model. In the code
|
||||||
|
we use [sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english).
|
||||||
|
|
||||||
|
You can use the following command run it:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
|
node ./test-online-transducer.js
|
||||||
|
```
|
||||||
|
|
||||||
|
## ./test-vad-microphone-offline-paraformer.js
|
||||||
|
|
||||||
|
[./test-vad-microphone-offline-paraformer.js](./test-vad-microphone-offline-paraformer.js)
|
||||||
|
demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad)
|
||||||
|
with non-streaming Paraformer for speech recognition from microphone.
|
||||||
|
|
||||||
|
You can use the following command run it:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
||||||
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
|
||||||
|
node ./test-vad-microphone-offline-paraformer.js
|
||||||
|
```
|
||||||
|
|
||||||
|
## ./test-vad-microphone-offline-transducer.js
|
||||||
|
|
||||||
|
[./test-vad-microphone-offline-transducer.js](./test-vad-microphone-offline-transducer.js)
|
||||||
|
demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad)
|
||||||
|
with a non-streaming transducer model for speech recognition from microphone.
|
||||||
|
|
||||||
|
You can use the following command run it:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
||||||
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-06-26.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-zipformer-en-2023-06-26.tar.bz2
|
||||||
|
node ./test-vad-microphone-offline-transducer.js
|
||||||
|
```
|
||||||
|
|
||||||
|
## ./test-vad-microphone-offline-whisper.js
|
||||||
|
|
||||||
|
[./test-vad-microphone-offline-whisper.js](./test-vad-microphone-offline-whisper.js)
|
||||||
|
demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad)
|
||||||
|
with whisper for speech recognition from microphone.
|
||||||
|
|
||||||
|
You can use the following command run it:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
||||||
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||||
|
node ./test-vad-microphone-offline-whisper.js
|
||||||
|
```
|
||||||
|
|
||||||
|
## ./test-vad-microphone.js
|
||||||
|
|
||||||
|
[./test-vad-microphone.js](./test-vad-microphone.js)
|
||||||
|
demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad).
|
||||||
|
|
||||||
|
You can use the following command run it:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
||||||
|
node ./test-vad-microphone.js
|
||||||
|
```
|
||||||
7
nodejs-examples/package.json
Normal file
7
nodejs-examples/package.json
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"dependencies": {
|
||||||
|
"naudiodon2": "^2.4.0",
|
||||||
|
"sherpa-onnx": "^1.8.11",
|
||||||
|
"wav": "^1.0.2"
|
||||||
|
}
|
||||||
|
}
|
||||||
97
nodejs-examples/test-offline-nemo-ctc.js
Normal file
97
nodejs-examples/test-offline-nemo-ctc.js
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
//
|
||||||
|
const fs = require('fs');
|
||||||
|
const {Readable} = require('stream');
|
||||||
|
const wav = require('wav');
|
||||||
|
|
||||||
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
|
function createRecognizer() {
|
||||||
|
const featConfig = new sherpa_onnx.FeatureConfig();
|
||||||
|
featConfig.sampleRate = 16000;
|
||||||
|
featConfig.featureDim = 80;
|
||||||
|
|
||||||
|
// test online recognizer
|
||||||
|
const nemoCtc = new sherpa_onnx.OfflineNemoEncDecCtcModelConfig();
|
||||||
|
nemoCtc.model = './sherpa-onnx-nemo-ctc-en-conformer-small/model.int8.onnx';
|
||||||
|
const tokens = './sherpa-onnx-nemo-ctc-en-conformer-small/tokens.txt';
|
||||||
|
|
||||||
|
const modelConfig = new sherpa_onnx.OfflineModelConfig();
|
||||||
|
modelConfig.nemoCtc = nemoCtc;
|
||||||
|
modelConfig.tokens = tokens;
|
||||||
|
modelConfig.modelType = 'nemo_ctc';
|
||||||
|
|
||||||
|
const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();
|
||||||
|
recognizerConfig.featConfig = featConfig;
|
||||||
|
recognizerConfig.modelConfig = modelConfig;
|
||||||
|
recognizerConfig.decodingMethod = 'greedy_search';
|
||||||
|
|
||||||
|
const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);
|
||||||
|
return recognizer;
|
||||||
|
}
|
||||||
|
|
||||||
|
recognizer = createRecognizer();
|
||||||
|
stream = recognizer.createStream();
|
||||||
|
|
||||||
|
const waveFilename =
|
||||||
|
'./sherpa-onnx-nemo-ctc-en-conformer-small/test_wavs/0.wav';
|
||||||
|
|
||||||
|
const reader = new wav.Reader();
|
||||||
|
const readable = new Readable().wrap(reader);
|
||||||
|
const buf = [];
|
||||||
|
|
||||||
|
reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
|
||||||
|
if (sampleRate != recognizer.config.featConfig.sampleRate) {
|
||||||
|
throw new Error(`Only support sampleRate ${
|
||||||
|
recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (audioFormat != 1) {
|
||||||
|
throw new Error(`Only support PCM format. Given ${audioFormat}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (channels != 1) {
|
||||||
|
throw new Error(`Only a single channel. Given ${channel}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bitDepth != 16) {
|
||||||
|
throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
fs.createReadStream(waveFilename, {highWaterMark: 4096})
|
||||||
|
.pipe(reader)
|
||||||
|
.on('finish', function(err) {
|
||||||
|
// tail padding
|
||||||
|
const floatSamples =
|
||||||
|
new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
|
||||||
|
|
||||||
|
buf.push(floatSamples);
|
||||||
|
const flattened =
|
||||||
|
Float32Array.from(buf.reduce((a, b) => [...a, ...b], []));
|
||||||
|
|
||||||
|
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
|
||||||
|
recognizer.decode(stream);
|
||||||
|
const r = recognizer.getResult(stream);
|
||||||
|
console.log(r.text);
|
||||||
|
|
||||||
|
stream.free();
|
||||||
|
recognizer.free();
|
||||||
|
});
|
||||||
|
|
||||||
|
readable.on('readable', function() {
|
||||||
|
let chunk;
|
||||||
|
while ((chunk = readable.read()) != null) {
|
||||||
|
const int16Samples = new Int16Array(
|
||||||
|
chunk.buffer, chunk.byteOffset,
|
||||||
|
chunk.length / Int16Array.BYTES_PER_ELEMENT);
|
||||||
|
|
||||||
|
const floatSamples = new Float32Array(int16Samples.length);
|
||||||
|
|
||||||
|
for (let i = 0; i < floatSamples.length; i++) {
|
||||||
|
floatSamples[i] = int16Samples[i] / 32768.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
buf.push(floatSamples);
|
||||||
|
}
|
||||||
|
});
|
||||||
95
nodejs-examples/test-offline-paraformer.js
Normal file
95
nodejs-examples/test-offline-paraformer.js
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
|
||||||
|
const fs = require('fs');
|
||||||
|
const {Readable} = require('stream');
|
||||||
|
const wav = require('wav');
|
||||||
|
|
||||||
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
|
function createRecognizer() {
|
||||||
|
const featConfig = new sherpa_onnx.FeatureConfig();
|
||||||
|
featConfig.sampleRate = 16000;
|
||||||
|
featConfig.featureDim = 80;
|
||||||
|
|
||||||
|
// test online recognizer
|
||||||
|
const paraformer = new sherpa_onnx.OfflineParaformerModelConfig();
|
||||||
|
paraformer.model = './sherpa-onnx-paraformer-zh-2023-03-28/model.onnx';
|
||||||
|
const tokens = './sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt';
|
||||||
|
|
||||||
|
const modelConfig = new sherpa_onnx.OfflineModelConfig();
|
||||||
|
modelConfig.paraformer = paraformer;
|
||||||
|
modelConfig.tokens = tokens;
|
||||||
|
modelConfig.modelType = 'paraformer';
|
||||||
|
|
||||||
|
const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();
|
||||||
|
recognizerConfig.featConfig = featConfig;
|
||||||
|
recognizerConfig.modelConfig = modelConfig;
|
||||||
|
recognizerConfig.decodingMethod = 'greedy_search';
|
||||||
|
|
||||||
|
const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);
|
||||||
|
return recognizer;
|
||||||
|
}
|
||||||
|
|
||||||
|
recognizer = createRecognizer();
|
||||||
|
stream = recognizer.createStream();
|
||||||
|
|
||||||
|
const waveFilename = './sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/0.wav';
|
||||||
|
|
||||||
|
const reader = new wav.Reader();
|
||||||
|
const readable = new Readable().wrap(reader);
|
||||||
|
const buf = [];
|
||||||
|
|
||||||
|
reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
|
||||||
|
if (sampleRate != recognizer.config.featConfig.sampleRate) {
|
||||||
|
throw new Error(`Only support sampleRate ${
|
||||||
|
recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (audioFormat != 1) {
|
||||||
|
throw new Error(`Only support PCM format. Given ${audioFormat}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (channels != 1) {
|
||||||
|
throw new Error(`Only a single channel. Given ${channel}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bitDepth != 16) {
|
||||||
|
throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
fs.createReadStream(waveFilename, {'highWaterMark': 4096})
|
||||||
|
.pipe(reader)
|
||||||
|
.on('finish', function(err) {
|
||||||
|
// tail padding
|
||||||
|
const floatSamples =
|
||||||
|
new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
|
||||||
|
|
||||||
|
buf.push(floatSamples);
|
||||||
|
const flattened =
|
||||||
|
Float32Array.from(buf.reduce((a, b) => [...a, ...b], []));
|
||||||
|
|
||||||
|
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
|
||||||
|
recognizer.decode(stream);
|
||||||
|
const r = recognizer.getResult(stream);
|
||||||
|
console.log(r.text);
|
||||||
|
|
||||||
|
stream.free();
|
||||||
|
recognizer.free();
|
||||||
|
});
|
||||||
|
|
||||||
|
readable.on('readable', function() {
|
||||||
|
let chunk;
|
||||||
|
while ((chunk = readable.read()) != null) {
|
||||||
|
const int16Samples = new Int16Array(
|
||||||
|
chunk.buffer, chunk.byteOffset,
|
||||||
|
chunk.length / Int16Array.BYTES_PER_ELEMENT);
|
||||||
|
|
||||||
|
const floatSamples = new Float32Array(int16Samples.length);
|
||||||
|
for (let i = 0; i < floatSamples.length; i++) {
|
||||||
|
floatSamples[i] = int16Samples[i] / 32768.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
buf.push(floatSamples);
|
||||||
|
}
|
||||||
|
});
|
||||||
100
nodejs-examples/test-offline-transducer.js
Normal file
100
nodejs-examples/test-offline-transducer.js
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
//
|
||||||
|
const fs = require('fs');
|
||||||
|
const {Readable} = require('stream');
|
||||||
|
const wav = require('wav');
|
||||||
|
|
||||||
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
|
function createRecognizer() {
|
||||||
|
const featConfig = new sherpa_onnx.FeatureConfig();
|
||||||
|
featConfig.sampleRate = 16000;
|
||||||
|
featConfig.featureDim = 80;
|
||||||
|
|
||||||
|
// test online recognizer
|
||||||
|
const transducer = new sherpa_onnx.OfflineTransducerModelConfig();
|
||||||
|
transducer.encoder =
|
||||||
|
'./sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx';
|
||||||
|
transducer.decoder =
|
||||||
|
'./sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx';
|
||||||
|
transducer.joiner =
|
||||||
|
'./sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.onnx';
|
||||||
|
const tokens = './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt';
|
||||||
|
|
||||||
|
const modelConfig = new sherpa_onnx.OfflineModelConfig();
|
||||||
|
modelConfig.transducer = transducer;
|
||||||
|
modelConfig.tokens = tokens;
|
||||||
|
modelConfig.modelType = 'transducer';
|
||||||
|
|
||||||
|
const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();
|
||||||
|
recognizerConfig.featConfig = featConfig;
|
||||||
|
recognizerConfig.modelConfig = modelConfig;
|
||||||
|
recognizerConfig.decodingMethod = 'greedy_search';
|
||||||
|
|
||||||
|
const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);
|
||||||
|
return recognizer;
|
||||||
|
}
|
||||||
|
|
||||||
|
recognizer = createRecognizer();
|
||||||
|
stream = recognizer.createStream();
|
||||||
|
|
||||||
|
const waveFilename = './sherpa-onnx-zipformer-en-2023-06-26/test_wavs/0.wav';
|
||||||
|
|
||||||
|
const reader = new wav.Reader();
|
||||||
|
const readable = new Readable().wrap(reader);
|
||||||
|
const buf = [];
|
||||||
|
|
||||||
|
reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
|
||||||
|
if (sampleRate != recognizer.config.featConfig.sampleRate) {
|
||||||
|
throw new Error(`Only support sampleRate ${
|
||||||
|
recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (audioFormat != 1) {
|
||||||
|
throw new Error(`Only support PCM format. Given ${audioFormat}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (channels != 1) {
|
||||||
|
throw new Error(`Only a single channel. Given ${channel}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bitDepth != 16) {
|
||||||
|
throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
fs.createReadStream(waveFilename, {'highWaterMark': 4096})
|
||||||
|
.pipe(reader)
|
||||||
|
.on('finish', function(err) {
|
||||||
|
// tail padding
|
||||||
|
const floatSamples =
|
||||||
|
new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
|
||||||
|
|
||||||
|
buf.push(floatSamples);
|
||||||
|
const flattened =
|
||||||
|
Float32Array.from(buf.reduce((a, b) => [...a, ...b], []));
|
||||||
|
|
||||||
|
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
|
||||||
|
recognizer.decode(stream);
|
||||||
|
const r = recognizer.getResult(stream);
|
||||||
|
console.log(r.text);
|
||||||
|
|
||||||
|
stream.free();
|
||||||
|
recognizer.free();
|
||||||
|
});
|
||||||
|
|
||||||
|
readable.on('readable', function() {
|
||||||
|
let chunk;
|
||||||
|
while ((chunk = readable.read()) != null) {
|
||||||
|
const int16Samples = new Int16Array(
|
||||||
|
chunk.buffer, chunk.byteOffset,
|
||||||
|
chunk.length / Int16Array.BYTES_PER_ELEMENT);
|
||||||
|
|
||||||
|
const floatSamples = new Float32Array(int16Samples.length);
|
||||||
|
for (let i = 0; i < floatSamples.length; i++) {
|
||||||
|
floatSamples[i] = int16Samples[i] / 32768.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
buf.push(floatSamples);
|
||||||
|
}
|
||||||
|
});
|
||||||
27
nodejs-examples/test-offline-tts-en.js
Normal file
27
nodejs-examples/test-offline-tts-en.js
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
|
||||||
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
|
function createOfflineTts() {
|
||||||
|
const vits = new sherpa_onnx.OfflineTtsVitsModelConfig();
|
||||||
|
vits.model = './vits-vctk/vits-vctk.onnx';
|
||||||
|
vits.lexicon = './vits-vctk/lexicon.txt';
|
||||||
|
vits.tokens = './vits-vctk/tokens.txt';
|
||||||
|
|
||||||
|
const modelConfig = new sherpa_onnx.OfflineTtsModelConfig();
|
||||||
|
modelConfig.vits = vits;
|
||||||
|
|
||||||
|
const config = new sherpa_onnx.OfflineTtsConfig();
|
||||||
|
config.model = modelConfig;
|
||||||
|
|
||||||
|
return new sherpa_onnx.OfflineTts(config);
|
||||||
|
}
|
||||||
|
|
||||||
|
const tts = createOfflineTts();
|
||||||
|
const speakerId = 99;
|
||||||
|
const speed = 1.0;
|
||||||
|
const audio =
|
||||||
|
tts.generate('Good morning. How are you doing?', speakerId, speed);
|
||||||
|
audio.save('./test-en.wav');
|
||||||
|
console.log('Saved to test-en.wav successfully.');
|
||||||
|
tts.free();
|
||||||
27
nodejs-examples/test-offline-tts-zh.js
Normal file
27
nodejs-examples/test-offline-tts-zh.js
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
|
||||||
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
|
function createOfflineTts() {
|
||||||
|
const vits = new sherpa_onnx.OfflineTtsVitsModelConfig();
|
||||||
|
vits.model = './vits-zh-aishell3/vits-aishell3.onnx';
|
||||||
|
vits.lexicon = './vits-zh-aishell3/lexicon.txt';
|
||||||
|
vits.tokens = './vits-zh-aishell3/tokens.txt';
|
||||||
|
|
||||||
|
const modelConfig = new sherpa_onnx.OfflineTtsModelConfig();
|
||||||
|
modelConfig.vits = vits;
|
||||||
|
|
||||||
|
const config = new sherpa_onnx.OfflineTtsConfig();
|
||||||
|
config.model = modelConfig;
|
||||||
|
config.ruleFsts = './vits-zh-aishell3/rule.fst';
|
||||||
|
|
||||||
|
return new sherpa_onnx.OfflineTts(config);
|
||||||
|
}
|
||||||
|
|
||||||
|
const tts = createOfflineTts();
|
||||||
|
const speakerId = 66;
|
||||||
|
const speed = 1.0;
|
||||||
|
const audio = tts.generate('3年前中国总人口是1411778724人', speakerId, speed);
|
||||||
|
audio.save('./test-zh.wav');
|
||||||
|
console.log('Saved to test-zh.wav successfully.');
|
||||||
|
tts.free();
|
||||||
97
nodejs-examples/test-offline-whisper.js
Normal file
97
nodejs-examples/test-offline-whisper.js
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
//
|
||||||
|
const fs = require('fs');
|
||||||
|
const {Readable} = require('stream');
|
||||||
|
const wav = require('wav');
|
||||||
|
|
||||||
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
|
function createRecognizer() {
|
||||||
|
const featConfig = new sherpa_onnx.FeatureConfig();
|
||||||
|
featConfig.sampleRate = 16000;
|
||||||
|
featConfig.featureDim = 80;
|
||||||
|
|
||||||
|
// test online recognizer
|
||||||
|
const whisper = new sherpa_onnx.OfflineWhisperModelConfig();
|
||||||
|
whisper.encoder = './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx';
|
||||||
|
whisper.decoder = './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx';
|
||||||
|
const tokens = './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt';
|
||||||
|
|
||||||
|
const modelConfig = new sherpa_onnx.OfflineModelConfig();
|
||||||
|
modelConfig.whisper = whisper;
|
||||||
|
modelConfig.tokens = tokens;
|
||||||
|
modelConfig.modelType = 'whisper';
|
||||||
|
|
||||||
|
const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();
|
||||||
|
recognizerConfig.featConfig = featConfig;
|
||||||
|
recognizerConfig.modelConfig = modelConfig;
|
||||||
|
recognizerConfig.decodingMethod = 'greedy_search';
|
||||||
|
|
||||||
|
const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);
|
||||||
|
return recognizer;
|
||||||
|
}
|
||||||
|
|
||||||
|
recognizer = createRecognizer();
|
||||||
|
stream = recognizer.createStream();
|
||||||
|
|
||||||
|
const waveFilename = './sherpa-onnx-whisper-tiny.en/test_wavs/0.wav';
|
||||||
|
|
||||||
|
const reader = new wav.Reader();
|
||||||
|
const readable = new Readable().wrap(reader);
|
||||||
|
const buf = [];
|
||||||
|
|
||||||
|
reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
|
||||||
|
if (sampleRate != recognizer.config.featConfig.sampleRate) {
|
||||||
|
throw new Error(`Only support sampleRate ${
|
||||||
|
recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (audioFormat != 1) {
|
||||||
|
throw new Error(`Only support PCM format. Given ${audioFormat}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (channels != 1) {
|
||||||
|
throw new Error(`Only a single channel. Given ${channel}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bitDepth != 16) {
|
||||||
|
throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
fs.createReadStream(waveFilename, {'highWaterMark': 4096})
|
||||||
|
.pipe(reader)
|
||||||
|
.on('finish', function(err) {
|
||||||
|
// tail padding
|
||||||
|
const floatSamples =
|
||||||
|
new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
|
||||||
|
|
||||||
|
buf.push(floatSamples);
|
||||||
|
const flattened =
|
||||||
|
Float32Array.from(buf.reduce((a, b) => [...a, ...b], []));
|
||||||
|
|
||||||
|
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
|
||||||
|
recognizer.decode(stream);
|
||||||
|
const r = recognizer.getResult(stream);
|
||||||
|
console.log(r.text);
|
||||||
|
|
||||||
|
stream.free();
|
||||||
|
recognizer.free();
|
||||||
|
});
|
||||||
|
|
||||||
|
readable.on('readable', function() {
|
||||||
|
let chunk;
|
||||||
|
while ((chunk = readable.read()) != null) {
|
||||||
|
const int16Samples = new Int16Array(
|
||||||
|
chunk.buffer, chunk.byteOffset,
|
||||||
|
chunk.length / Int16Array.BYTES_PER_ELEMENT);
|
||||||
|
|
||||||
|
const floatSamples = new Float32Array(int16Samples.length);
|
||||||
|
|
||||||
|
for (let i = 0; i < floatSamples.length; i++) {
|
||||||
|
floatSamples[i] = int16Samples[i] / 32768.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
buf.push(floatSamples);
|
||||||
|
}
|
||||||
|
});
|
||||||
86
nodejs-examples/test-online-paraformer-microphone.js
Normal file
86
nodejs-examples/test-online-paraformer-microphone.js
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
//
|
||||||
|
const portAudio = require('naudiodon2');
|
||||||
|
console.log(portAudio.getDevices());
|
||||||
|
|
||||||
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
|
function createRecognizer() {
|
||||||
|
const featConfig = new sherpa_onnx.FeatureConfig();
|
||||||
|
featConfig.sampleRate = 16000;
|
||||||
|
featConfig.featureDim = 80;
|
||||||
|
|
||||||
|
const paraformer = new sherpa_onnx.OnlineParaformerModelConfig();
|
||||||
|
paraformer.encoder =
|
||||||
|
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx';
|
||||||
|
paraformer.decoder =
|
||||||
|
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx';
|
||||||
|
const tokens =
|
||||||
|
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt';
|
||||||
|
|
||||||
|
const modelConfig = new sherpa_onnx.OnlineModelConfig();
|
||||||
|
modelConfig.paraformer = paraformer;
|
||||||
|
modelConfig.tokens = tokens;
|
||||||
|
modelConfig.modelType = 'paraformer';
|
||||||
|
|
||||||
|
const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig();
|
||||||
|
recognizerConfig.featConfig = featConfig;
|
||||||
|
recognizerConfig.modelConfig = modelConfig;
|
||||||
|
recognizerConfig.decodingMethod = 'greedy_search';
|
||||||
|
recognizerConfig.enableEndpoint = 1;
|
||||||
|
|
||||||
|
const recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig);
|
||||||
|
return recognizer;
|
||||||
|
}
|
||||||
|
recognizer = createRecognizer();
|
||||||
|
stream = recognizer.createStream();
|
||||||
|
|
||||||
|
display = new sherpa_onnx.Display(50);
|
||||||
|
|
||||||
|
let lastText = '';
|
||||||
|
let segmentIndex = 0;
|
||||||
|
|
||||||
|
const ai = new portAudio.AudioIO({
|
||||||
|
inOptions: {
|
||||||
|
channelCount: 1,
|
||||||
|
closeOnError: true, // Close the stream if an audio error is detected, if
|
||||||
|
// set false then just log the error
|
||||||
|
deviceId: -1, // Use -1 or omit the deviceId to select the default device
|
||||||
|
sampleFormat: portAudio.SampleFormatFloat32,
|
||||||
|
sampleRate: recognizer.config.featConfig.sampleRate
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
ai.on('data', data => {
|
||||||
|
const samples = new Float32Array(data.buffer);
|
||||||
|
|
||||||
|
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, samples);
|
||||||
|
|
||||||
|
while (recognizer.isReady(stream)) {
|
||||||
|
recognizer.decode(stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
const isEndpoint = recognizer.isEndpoint(stream);
|
||||||
|
const text = recognizer.getResult(stream).text;
|
||||||
|
|
||||||
|
if (text.length > 0 && lastText != text) {
|
||||||
|
lastText = text;
|
||||||
|
display.print(segmentIndex, lastText);
|
||||||
|
}
|
||||||
|
if (isEndpoint) {
|
||||||
|
if (text.length > 0) {
|
||||||
|
lastText = text;
|
||||||
|
segmentIndex += 1;
|
||||||
|
}
|
||||||
|
recognizer.reset(stream)
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
ai.on('close', () => {
|
||||||
|
console.log('Free resources');
|
||||||
|
stream.free();
|
||||||
|
recognizer.free();
|
||||||
|
});
|
||||||
|
|
||||||
|
ai.start();
|
||||||
|
console.log('Started! Please speak')
|
||||||
99
nodejs-examples/test-online-paraformer.js
Normal file
99
nodejs-examples/test-online-paraformer.js
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
//
|
||||||
|
const fs = require('fs');
|
||||||
|
const {Readable} = require('stream');
|
||||||
|
const wav = require('wav');
|
||||||
|
|
||||||
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
|
function createRecognizer() {
|
||||||
|
const featConfig = new sherpa_onnx.FeatureConfig();
|
||||||
|
featConfig.sampleRate = 16000;
|
||||||
|
featConfig.featureDim = 80;
|
||||||
|
|
||||||
|
const paraformer = new sherpa_onnx.OnlineParaformerModelConfig();
|
||||||
|
paraformer.encoder =
|
||||||
|
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.onnx';
|
||||||
|
paraformer.decoder =
|
||||||
|
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.onnx';
|
||||||
|
const tokens =
|
||||||
|
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt';
|
||||||
|
|
||||||
|
const modelConfig = new sherpa_onnx.OnlineModelConfig();
|
||||||
|
modelConfig.paraformer = paraformer;
|
||||||
|
modelConfig.tokens = tokens;
|
||||||
|
modelConfig.modelType = 'paraformer';
|
||||||
|
|
||||||
|
const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig();
|
||||||
|
recognizerConfig.featConfig = featConfig;
|
||||||
|
recognizerConfig.modelConfig = modelConfig;
|
||||||
|
recognizerConfig.decodingMethod = 'greedy_search';
|
||||||
|
|
||||||
|
const recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig);
|
||||||
|
return recognizer;
|
||||||
|
}
|
||||||
|
recognizer = createRecognizer();
|
||||||
|
stream = recognizer.createStream();
|
||||||
|
|
||||||
|
const waveFilename =
|
||||||
|
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav';
|
||||||
|
|
||||||
|
const reader = new wav.Reader();
|
||||||
|
const readable = new Readable().wrap(reader);
|
||||||
|
|
||||||
|
function decode(samples) {
|
||||||
|
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, samples);
|
||||||
|
|
||||||
|
while (recognizer.isReady(stream)) {
|
||||||
|
recognizer.decode(stream);
|
||||||
|
}
|
||||||
|
const r = recognizer.getResult(stream);
|
||||||
|
console.log(r.text);
|
||||||
|
}
|
||||||
|
|
||||||
|
reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
|
||||||
|
if (sampleRate != recognizer.config.featConfig.sampleRate) {
|
||||||
|
throw new Error(`Only support sampleRate ${
|
||||||
|
recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (audioFormat != 1) {
|
||||||
|
throw new Error(`Only support PCM format. Given ${audioFormat}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (channels != 1) {
|
||||||
|
throw new Error(`Only a single channel. Given ${channel}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bitDepth != 16) {
|
||||||
|
throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
fs.createReadStream(waveFilename, {'highWaterMark': 4096})
|
||||||
|
.pipe(reader)
|
||||||
|
.on('finish', function(err) {
|
||||||
|
// tail padding
|
||||||
|
const floatSamples =
|
||||||
|
new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
|
||||||
|
decode(floatSamples);
|
||||||
|
stream.free();
|
||||||
|
recognizer.free();
|
||||||
|
});
|
||||||
|
|
||||||
|
readable.on('readable', function() {
|
||||||
|
let chunk;
|
||||||
|
while ((chunk = readable.read()) != null) {
|
||||||
|
const int16Samples = new Int16Array(
|
||||||
|
chunk.buffer, chunk.byteOffset,
|
||||||
|
chunk.length / Int16Array.BYTES_PER_ELEMENT);
|
||||||
|
|
||||||
|
const floatSamples = new Float32Array(int16Samples.length);
|
||||||
|
|
||||||
|
for (let i = 0; i < floatSamples.length; i++) {
|
||||||
|
floatSamples[i] = int16Samples[i] / 32768.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
decode(floatSamples);
|
||||||
|
}
|
||||||
|
});
|
||||||
88
nodejs-examples/test-online-transducer-microphone.js
Normal file
88
nodejs-examples/test-online-transducer-microphone.js
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
//
|
||||||
|
const portAudio = require('naudiodon2');
|
||||||
|
// console.log(portAudio.getDevices());
|
||||||
|
|
||||||
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
|
function createRecognizer() {
|
||||||
|
const featConfig = new sherpa_onnx.FeatureConfig();
|
||||||
|
featConfig.sampleRate = 16000;
|
||||||
|
featConfig.featureDim = 80;
|
||||||
|
|
||||||
|
// test online recognizer
|
||||||
|
const transducer = new sherpa_onnx.OnlineTransducerModelConfig();
|
||||||
|
transducer.encoder =
|
||||||
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx';
|
||||||
|
transducer.decoder =
|
||||||
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx';
|
||||||
|
transducer.joiner =
|
||||||
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx';
|
||||||
|
const tokens =
|
||||||
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt';
|
||||||
|
|
||||||
|
const modelConfig = new sherpa_onnx.OnlineModelConfig();
|
||||||
|
modelConfig.transducer = transducer;
|
||||||
|
modelConfig.tokens = tokens;
|
||||||
|
modelConfig.modelType = 'zipformer';
|
||||||
|
|
||||||
|
const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig();
|
||||||
|
recognizerConfig.featConfig = featConfig;
|
||||||
|
recognizerConfig.modelConfig = modelConfig;
|
||||||
|
recognizerConfig.decodingMethod = 'greedy_search';
|
||||||
|
recognizerConfig.enableEndpoint = 1;
|
||||||
|
|
||||||
|
const recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig);
|
||||||
|
return recognizer;
|
||||||
|
}
|
||||||
|
recognizer = createRecognizer();
|
||||||
|
stream = recognizer.createStream();
|
||||||
|
display = new sherpa_onnx.Display(50);
|
||||||
|
|
||||||
|
let lastText = '';
|
||||||
|
let segmentIndex = 0;
|
||||||
|
|
||||||
|
const ai = new portAudio.AudioIO({
|
||||||
|
inOptions: {
|
||||||
|
channelCount: 1,
|
||||||
|
closeOnError: true, // Close the stream if an audio error is detected, if
|
||||||
|
// set false then just log the error
|
||||||
|
deviceId: -1, // Use -1 or omit the deviceId to select the default device
|
||||||
|
sampleFormat: portAudio.SampleFormatFloat32,
|
||||||
|
sampleRate: recognizer.config.featConfig.sampleRate
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
ai.on('data', data => {
|
||||||
|
const samples = new Float32Array(data.buffer);
|
||||||
|
|
||||||
|
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, samples);
|
||||||
|
|
||||||
|
while (recognizer.isReady(stream)) {
|
||||||
|
recognizer.decode(stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
const isEndpoint = recognizer.isEndpoint(stream);
|
||||||
|
const text = recognizer.getResult(stream).text;
|
||||||
|
|
||||||
|
if (text.length > 0 && lastText != text) {
|
||||||
|
lastText = text;
|
||||||
|
display.print(segmentIndex, lastText);
|
||||||
|
}
|
||||||
|
if (isEndpoint) {
|
||||||
|
if (text.length > 0) {
|
||||||
|
lastText = text;
|
||||||
|
segmentIndex += 1;
|
||||||
|
}
|
||||||
|
recognizer.reset(stream)
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
ai.on('close', () => {
|
||||||
|
console.log('Free resources');
|
||||||
|
stream.free();
|
||||||
|
recognizer.free();
|
||||||
|
});
|
||||||
|
|
||||||
|
ai.start();
|
||||||
|
console.log('Started! Please speak')
|
||||||
102
nodejs-examples/test-online-transducer.js
Normal file
102
nodejs-examples/test-online-transducer.js
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
//
|
||||||
|
const fs = require('fs');
|
||||||
|
const {Readable} = require('stream');
|
||||||
|
const wav = require('wav');
|
||||||
|
|
||||||
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
|
function createRecognizer() {
|
||||||
|
const featConfig = new sherpa_onnx.FeatureConfig();
|
||||||
|
featConfig.sampleRate = 16000;
|
||||||
|
featConfig.featureDim = 80;
|
||||||
|
|
||||||
|
// test online recognizer
|
||||||
|
const transducer = new sherpa_onnx.OnlineTransducerModelConfig();
|
||||||
|
transducer.encoder =
|
||||||
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx';
|
||||||
|
transducer.decoder =
|
||||||
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx';
|
||||||
|
transducer.joiner =
|
||||||
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx';
|
||||||
|
const tokens =
|
||||||
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt';
|
||||||
|
|
||||||
|
const modelConfig = new sherpa_onnx.OnlineModelConfig();
|
||||||
|
modelConfig.transducer = transducer;
|
||||||
|
modelConfig.tokens = tokens;
|
||||||
|
modelConfig.modelType = 'zipformer';
|
||||||
|
|
||||||
|
const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig();
|
||||||
|
recognizerConfig.featConfig = featConfig;
|
||||||
|
recognizerConfig.modelConfig = modelConfig;
|
||||||
|
recognizerConfig.decodingMethod = 'greedy_search';
|
||||||
|
|
||||||
|
recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig);
|
||||||
|
return recognizer;
|
||||||
|
}
|
||||||
|
recognizer = createRecognizer();
|
||||||
|
stream = recognizer.createStream();
|
||||||
|
|
||||||
|
const waveFilename =
|
||||||
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav';
|
||||||
|
|
||||||
|
const reader = new wav.Reader();
|
||||||
|
const readable = new Readable().wrap(reader);
|
||||||
|
|
||||||
|
function decode(samples) {
|
||||||
|
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, samples);
|
||||||
|
|
||||||
|
while (recognizer.isReady(stream)) {
|
||||||
|
recognizer.decode(stream);
|
||||||
|
}
|
||||||
|
const r = recognizer.getResult(stream);
|
||||||
|
console.log(r.text);
|
||||||
|
}
|
||||||
|
|
||||||
|
reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
|
||||||
|
if (sampleRate != recognizer.config.featConfig.sampleRate) {
|
||||||
|
throw new Error(`Only support sampleRate ${
|
||||||
|
recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (audioFormat != 1) {
|
||||||
|
throw new Error(`Only support PCM format. Given ${audioFormat}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (channels != 1) {
|
||||||
|
throw new Error(`Only a single channel. Given ${channel}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bitDepth != 16) {
|
||||||
|
throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
fs.createReadStream(waveFilename, {'highWaterMark': 4096})
|
||||||
|
.pipe(reader)
|
||||||
|
.on('finish', function(err) {
|
||||||
|
// tail padding
|
||||||
|
const floatSamples =
|
||||||
|
new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
|
||||||
|
decode(floatSamples);
|
||||||
|
stream.free();
|
||||||
|
recognizer.free();
|
||||||
|
});
|
||||||
|
|
||||||
|
readable.on('readable', function() {
|
||||||
|
let chunk;
|
||||||
|
while ((chunk = readable.read()) != null) {
|
||||||
|
const int16Samples = new Int16Array(
|
||||||
|
chunk.buffer, chunk.byteOffset,
|
||||||
|
chunk.length / Int16Array.BYTES_PER_ELEMENT);
|
||||||
|
|
||||||
|
const floatSamples = new Float32Array(int16Samples.length);
|
||||||
|
|
||||||
|
for (let i = 0; i < floatSamples.length; i++) {
|
||||||
|
floatSamples[i] = int16Samples[i] / 32768.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
decode(floatSamples);
|
||||||
|
}
|
||||||
|
});
|
||||||
101
nodejs-examples/test-vad-microphone-offline-paraformer.js
Normal file
101
nodejs-examples/test-vad-microphone-offline-paraformer.js
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
//
|
||||||
|
const sherpa_onnx = require('sherpa-onnx3');
|
||||||
|
const portAudio = require('naudiodon2');
|
||||||
|
console.log(portAudio.getDevices());
|
||||||
|
|
||||||
|
function createOfflineRecognizer() {
|
||||||
|
const featConfig = new sherpa_onnx.FeatureConfig();
|
||||||
|
featConfig.sampleRate = 16000;
|
||||||
|
featConfig.featureDim = 80;
|
||||||
|
|
||||||
|
// test online recognizer
|
||||||
|
const paraformer = new sherpa_onnx.OfflineParaformerModelConfig();
|
||||||
|
paraformer.model = './sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx';
|
||||||
|
const tokens = './sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt';
|
||||||
|
|
||||||
|
const modelConfig = new sherpa_onnx.OfflineModelConfig();
|
||||||
|
modelConfig.paraformer = paraformer;
|
||||||
|
modelConfig.tokens = tokens;
|
||||||
|
modelConfig.modelType = 'paraformer';
|
||||||
|
|
||||||
|
const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();
|
||||||
|
recognizerConfig.featConfig = featConfig;
|
||||||
|
recognizerConfig.modelConfig = modelConfig;
|
||||||
|
recognizerConfig.decodingMethod = 'greedy_search';
|
||||||
|
|
||||||
|
const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);
|
||||||
|
return recognizer
|
||||||
|
}
|
||||||
|
|
||||||
|
function createVad() {
|
||||||
|
const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig();
|
||||||
|
sileroVadModelConfig.model = './silero_vad.onnx';
|
||||||
|
sileroVadModelConfig.minSpeechDuration = 0.3; // seconds
|
||||||
|
sileroVadModelConfig.minSilenceDuration = 0.3; // seconds
|
||||||
|
sileroVadModelConfig.windowSize = 512;
|
||||||
|
|
||||||
|
const vadModelConfig = new sherpa_onnx.VadModelConfig();
|
||||||
|
vadModelConfig.sileroVad = sileroVadModelConfig;
|
||||||
|
vadModelConfig.sampleRate = 16000;
|
||||||
|
|
||||||
|
const bufferSizeInSeconds = 60;
|
||||||
|
const vad = new sherpa_onnx.VoiceActivityDetector(
|
||||||
|
vadModelConfig, bufferSizeInSeconds);
|
||||||
|
return vad;
|
||||||
|
}
|
||||||
|
|
||||||
|
const recognizer = createOfflineRecognizer();
|
||||||
|
const vad = createVad();
|
||||||
|
|
||||||
|
const bufferSizeInSeconds = 30;
|
||||||
|
const buffer =
|
||||||
|
new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);
|
||||||
|
|
||||||
|
var ai = new portAudio.AudioIO({
|
||||||
|
inOptions: {
|
||||||
|
channelCount: 1,
|
||||||
|
sampleFormat: portAudio.SampleFormatFloat32,
|
||||||
|
sampleRate: vad.config.sampleRate,
|
||||||
|
deviceId: -1, // Use -1 or omit the deviceId to select the default device
|
||||||
|
closeOnError: true // Close the stream if an audio error is detected, if
|
||||||
|
// set false then just log the error
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let printed = false;
|
||||||
|
let index = 0;
|
||||||
|
ai.on('data', data => {
|
||||||
|
const windowSize = vad.config.sileroVad.windowSize;
|
||||||
|
buffer.push(new Float32Array(data.buffer));
|
||||||
|
while (buffer.size() > windowSize) {
|
||||||
|
const samples = buffer.get(buffer.head(), windowSize);
|
||||||
|
buffer.pop(windowSize);
|
||||||
|
vad.acceptWaveform(samples)
|
||||||
|
}
|
||||||
|
|
||||||
|
while (!vad.isEmpty()) {
|
||||||
|
const segment = vad.front();
|
||||||
|
vad.pop();
|
||||||
|
const stream = recognizer.createStream();
|
||||||
|
stream.acceptWaveform(
|
||||||
|
recognizer.config.featConfig.sampleRate, segment.samples);
|
||||||
|
recognizer.decode(stream);
|
||||||
|
const r = recognizer.getResult(stream);
|
||||||
|
stream.free();
|
||||||
|
if (r.text.length > 0) {
|
||||||
|
console.log(`${index}: ${r.text}`);
|
||||||
|
index += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
ai.on('close', () => {
|
||||||
|
console.log('Free resources');
|
||||||
|
recognizer.free();
|
||||||
|
vad.free();
|
||||||
|
buffer.free();
|
||||||
|
});
|
||||||
|
|
||||||
|
ai.start();
|
||||||
|
console.log('Started! Please speak')
|
||||||
106
nodejs-examples/test-vad-microphone-offline-transducer.js
Normal file
106
nodejs-examples/test-vad-microphone-offline-transducer.js
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
//
|
||||||
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
const portAudio = require('naudiodon2');
|
||||||
|
console.log(portAudio.getDevices());
|
||||||
|
|
||||||
|
function createOfflineRecognizer() {
|
||||||
|
const featConfig = new sherpa_onnx.FeatureConfig();
|
||||||
|
featConfig.sampleRate = 16000;
|
||||||
|
featConfig.featureDim = 80;
|
||||||
|
|
||||||
|
// test online recognizer
|
||||||
|
const transducer = new sherpa_onnx.OfflineTransducerModelConfig();
|
||||||
|
transducer.encoder =
|
||||||
|
'./sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx';
|
||||||
|
transducer.decoder =
|
||||||
|
'./sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx';
|
||||||
|
transducer.joiner =
|
||||||
|
'./sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.onnx';
|
||||||
|
const tokens = './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt';
|
||||||
|
|
||||||
|
const modelConfig = new sherpa_onnx.OfflineModelConfig();
|
||||||
|
modelConfig.transducer = transducer;
|
||||||
|
modelConfig.tokens = tokens;
|
||||||
|
modelConfig.modelType = 'transducer';
|
||||||
|
|
||||||
|
const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();
|
||||||
|
recognizerConfig.featConfig = featConfig;
|
||||||
|
recognizerConfig.modelConfig = modelConfig;
|
||||||
|
recognizerConfig.decodingMethod = 'greedy_search';
|
||||||
|
|
||||||
|
const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);
|
||||||
|
return recognizer;
|
||||||
|
}
|
||||||
|
|
||||||
|
function createVad() {
|
||||||
|
const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig();
|
||||||
|
sileroVadModelConfig.model = './silero_vad.onnx';
|
||||||
|
sileroVadModelConfig.minSpeechDuration = 0.3; // seconds
|
||||||
|
sileroVadModelConfig.minSilenceDuration = 0.3; // seconds
|
||||||
|
sileroVadModelConfig.windowSize = 512;
|
||||||
|
|
||||||
|
const vadModelConfig = new sherpa_onnx.VadModelConfig();
|
||||||
|
vadModelConfig.sileroVad = sileroVadModelConfig;
|
||||||
|
vadModelConfig.sampleRate = 16000;
|
||||||
|
|
||||||
|
const bufferSizeInSeconds = 60;
|
||||||
|
const vad = new sherpa_onnx.VoiceActivityDetector(
|
||||||
|
vadModelConfig, bufferSizeInSeconds);
|
||||||
|
return vad;
|
||||||
|
}
|
||||||
|
|
||||||
|
const recognizer = createOfflineRecognizer();
|
||||||
|
const vad = createVad();
|
||||||
|
|
||||||
|
const bufferSizeInSeconds = 30;
|
||||||
|
const buffer =
|
||||||
|
new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);
|
||||||
|
|
||||||
|
const ai = new portAudio.AudioIO({
|
||||||
|
inOptions: {
|
||||||
|
channelCount: 1,
|
||||||
|
closeOnError: true, // Close the stream if an audio error is detected, if
|
||||||
|
// set false then just log the error
|
||||||
|
deviceId: -1, // Use -1 or omit the deviceId to select the default device
|
||||||
|
sampleFormat: portAudio.SampleFormatFloat32,
|
||||||
|
sampleRate: vad.config.sampleRate
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let printed = false;
|
||||||
|
let index = 0;
|
||||||
|
ai.on('data', data => {
|
||||||
|
const windowSize = vad.config.sileroVad.windowSize;
|
||||||
|
buffer.push(new Float32Array(data.buffer));
|
||||||
|
while (buffer.size() > windowSize) {
|
||||||
|
const samples = buffer.get(buffer.head(), windowSize);
|
||||||
|
buffer.pop(windowSize);
|
||||||
|
vad.acceptWaveform(samples)
|
||||||
|
}
|
||||||
|
|
||||||
|
while (!vad.isEmpty()) {
|
||||||
|
const segment = vad.front();
|
||||||
|
vad.pop();
|
||||||
|
const stream = recognizer.createStream();
|
||||||
|
stream.acceptWaveform(
|
||||||
|
recognizer.config.featConfig.sampleRate, segment.samples);
|
||||||
|
recognizer.decode(stream);
|
||||||
|
const r = recognizer.getResult(stream);
|
||||||
|
stream.free();
|
||||||
|
if (r.text.length > 0) {
|
||||||
|
console.log(`${index}: ${r.text}`);
|
||||||
|
index += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
ai.on('close', () => {
|
||||||
|
console.log('Free resources');
|
||||||
|
recognizer.free();
|
||||||
|
vad.free();
|
||||||
|
buffer.free();
|
||||||
|
});
|
||||||
|
|
||||||
|
ai.start();
|
||||||
|
console.log('Started! Please speak')
|
||||||
102
nodejs-examples/test-vad-microphone-offline-whisper.js
Normal file
102
nodejs-examples/test-vad-microphone-offline-whisper.js
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
//
|
||||||
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
const portAudio = require('naudiodon2');
|
||||||
|
console.log(portAudio.getDevices());
|
||||||
|
|
||||||
|
function createOfflineRecognizer() {
|
||||||
|
const featConfig = new sherpa_onnx.FeatureConfig();
|
||||||
|
featConfig.sampleRate = 16000;
|
||||||
|
featConfig.featureDim = 80;
|
||||||
|
|
||||||
|
// test online recognizer
|
||||||
|
const whisper = new sherpa_onnx.OfflineWhisperModelConfig();
|
||||||
|
whisper.encoder = './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx';
|
||||||
|
whisper.decoder = './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx';
|
||||||
|
const tokens = './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt';
|
||||||
|
|
||||||
|
const modelConfig = new sherpa_onnx.OfflineModelConfig();
|
||||||
|
modelConfig.whisper = whisper;
|
||||||
|
modelConfig.tokens = tokens;
|
||||||
|
modelConfig.modelType = 'whisper';
|
||||||
|
|
||||||
|
const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();
|
||||||
|
recognizerConfig.featConfig = featConfig;
|
||||||
|
recognizerConfig.modelConfig = modelConfig;
|
||||||
|
recognizerConfig.decodingMethod = 'greedy_search';
|
||||||
|
|
||||||
|
const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);
|
||||||
|
return recognizer;
|
||||||
|
}
|
||||||
|
|
||||||
|
function createVad() {
|
||||||
|
const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig();
|
||||||
|
sileroVadModelConfig.model = './silero_vad.onnx';
|
||||||
|
sileroVadModelConfig.minSpeechDuration = 0.3; // seconds
|
||||||
|
sileroVadModelConfig.minSilenceDuration = 0.3; // seconds
|
||||||
|
sileroVadModelConfig.windowSize = 512;
|
||||||
|
|
||||||
|
const vadModelConfig = new sherpa_onnx.VadModelConfig();
|
||||||
|
vadModelConfig.sileroVad = sileroVadModelConfig;
|
||||||
|
vadModelConfig.sampleRate = 16000;
|
||||||
|
|
||||||
|
const bufferSizeInSeconds = 60;
|
||||||
|
const vad = new sherpa_onnx.VoiceActivityDetector(
|
||||||
|
vadModelConfig, bufferSizeInSeconds);
|
||||||
|
return vad;
|
||||||
|
}
|
||||||
|
|
||||||
|
const recognizer = createOfflineRecognizer();
|
||||||
|
const vad = createVad();
|
||||||
|
|
||||||
|
const bufferSizeInSeconds = 30;
|
||||||
|
const buffer =
|
||||||
|
new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);
|
||||||
|
|
||||||
|
const ai = new portAudio.AudioIO({
|
||||||
|
inOptions: {
|
||||||
|
channelCount: 1,
|
||||||
|
closeOnError: true, // Close the stream if an audio error is detected, if
|
||||||
|
// set false then just log the error
|
||||||
|
deviceId: -1, // Use -1 or omit the deviceId to select the default device
|
||||||
|
sampleFormat: portAudio.SampleFormatFloat32,
|
||||||
|
sampleRate: vad.config.sampleRate
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let printed = false;
|
||||||
|
let index = 0;
|
||||||
|
ai.on('data', data => {
|
||||||
|
const windowSize = vad.config.sileroVad.windowSize;
|
||||||
|
buffer.push(new Float32Array(data.buffer));
|
||||||
|
while (buffer.size() > windowSize) {
|
||||||
|
const samples = buffer.get(buffer.head(), windowSize);
|
||||||
|
buffer.pop(windowSize);
|
||||||
|
vad.acceptWaveform(samples)
|
||||||
|
}
|
||||||
|
|
||||||
|
while (!vad.isEmpty()) {
|
||||||
|
const segment = vad.front();
|
||||||
|
vad.pop();
|
||||||
|
const stream = recognizer.createStream();
|
||||||
|
stream.acceptWaveform(
|
||||||
|
recognizer.config.featConfig.sampleRate, segment.samples);
|
||||||
|
recognizer.decode(stream);
|
||||||
|
const r = recognizer.getResult(stream);
|
||||||
|
stream.free();
|
||||||
|
if (r.text.length > 0) {
|
||||||
|
console.log(`${index}: ${r.text}`);
|
||||||
|
index += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
ai.on('close', () => {
|
||||||
|
console.log('Free resources');
|
||||||
|
recognizer.free();
|
||||||
|
vad.free();
|
||||||
|
buffer.free();
|
||||||
|
});
|
||||||
|
|
||||||
|
ai.start();
|
||||||
|
console.log('Started! Please speak')
|
||||||
74
nodejs-examples/test-vad-microphone.js
Normal file
74
nodejs-examples/test-vad-microphone.js
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
|
||||||
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
const portAudio = require('naudiodon2');
|
||||||
|
console.log(portAudio.getDevices());
|
||||||
|
|
||||||
|
function createVad() {
|
||||||
|
const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig();
|
||||||
|
sileroVadModelConfig.model = './silero_vad.onnx';
|
||||||
|
sileroVadModelConfig.minSpeechDuration = 0.3; // seconds
|
||||||
|
sileroVadModelConfig.minSilenceDuration = 0.3; // seconds
|
||||||
|
sileroVadModelConfig.windowSize = 512;
|
||||||
|
|
||||||
|
const vadModelConfig = new sherpa_onnx.VadModelConfig();
|
||||||
|
vadModelConfig.sileroVad = sileroVadModelConfig;
|
||||||
|
vadModelConfig.sampleRate = 16000;
|
||||||
|
|
||||||
|
const bufferSizeInSeconds = 60;
|
||||||
|
const vad = new sherpa_onnx.VoiceActivityDetector(
|
||||||
|
vadModelConfig, bufferSizeInSeconds);
|
||||||
|
return vad;
|
||||||
|
}
|
||||||
|
vad = createVad();
|
||||||
|
const bufferSizeInSeconds = 30;
|
||||||
|
const buffer =
|
||||||
|
new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);
|
||||||
|
|
||||||
|
const ai = new portAudio.AudioIO({
|
||||||
|
inOptions: {
|
||||||
|
channelCount: 1,
|
||||||
|
closeOnError: true, // Close the stream if an audio error is detected, if
|
||||||
|
// set false then just log the error
|
||||||
|
deviceId: -1, // Use -1 or omit the deviceId to select the default device
|
||||||
|
sampleFormat: portAudio.SampleFormatFloat32,
|
||||||
|
sampleRate: vad.config.sampleRate
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let printed = false;
|
||||||
|
let index = 0;
|
||||||
|
ai.on('data', data => {
|
||||||
|
const windowSize = vad.config.sileroVad.windowSize;
|
||||||
|
buffer.push(new Float32Array(data.buffer));
|
||||||
|
while (buffer.size() > windowSize) {
|
||||||
|
const samples = buffer.get(buffer.head(), windowSize);
|
||||||
|
buffer.pop(windowSize);
|
||||||
|
vad.acceptWaveform(samples)
|
||||||
|
if (vad.isDetected() && !printed) {
|
||||||
|
console.log(`${index}: Detected speech`)
|
||||||
|
printed = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!vad.isDetected()) {
|
||||||
|
printed = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (!vad.isEmpty()) {
|
||||||
|
const segment = vad.front();
|
||||||
|
vad.pop();
|
||||||
|
const duration = segment.samples.length / vad.config.sampleRate;
|
||||||
|
console.log(`${index} End of speech. Duration: ${duration} seconds`);
|
||||||
|
index += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
ai.on('close', () => {
|
||||||
|
console.log('Free resources');
|
||||||
|
vad.free();
|
||||||
|
buffer.free();
|
||||||
|
});
|
||||||
|
|
||||||
|
ai.start();
|
||||||
|
console.log('Started! Please speak')
|
||||||
3
scripts/nodejs/.clang-format
Normal file
3
scripts/nodejs/.clang-format
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
Language: JavaScript
|
||||||
|
JavaScriptQuotes: Double
|
||||||
|
|
||||||
2
scripts/nodejs/.gitignore
vendored
Normal file
2
scripts/nodejs/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
node_modules
|
||||||
|
jslint.mjs
|
||||||
9
scripts/nodejs/README.md
Normal file
9
scripts/nodejs/README.md
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
# Introduction
|
||||||
|
|
||||||
|
Text-to-speech and speech-to-text with [Next-gen Kaldi](https://github.com/k2-fsa/).
|
||||||
|
|
||||||
|
It processes everything locally without accessing the Internet.
|
||||||
|
|
||||||
|
Please refer to
|
||||||
|
https://github.com/k2-fsa/sherpa-onnx/tree/master/nodejs-examples
|
||||||
|
for examples.
|
||||||
717
scripts/nodejs/index.js
Normal file
717
scripts/nodejs/index.js
Normal file
@@ -0,0 +1,717 @@
|
|||||||
|
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
//
|
||||||
|
// Please use
|
||||||
|
//
|
||||||
|
// npm install ffi-napi ref-struct-napi
|
||||||
|
//
|
||||||
|
// before you use this file
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// Please use node 13. node 16, 18, 20, and 21 are known not working.
|
||||||
|
// See also
|
||||||
|
// https://github.com/node-ffi-napi/node-ffi-napi/issues/244
|
||||||
|
// and
|
||||||
|
// https://github.com/node-ffi-napi/node-ffi-napi/issues/97
|
||||||
|
"use strict"
|
||||||
|
|
||||||
|
const debug = require("debug")("sherpa-onnx");
|
||||||
|
const os = require("os");
|
||||||
|
const path = require("path");
|
||||||
|
const ffi = require("ffi-napi");
|
||||||
|
const ref = require("ref-napi");
|
||||||
|
const fs = require("fs");
|
||||||
|
var ArrayType = require("ref-array-napi");
|
||||||
|
|
||||||
|
const FloatArray = ArrayType(ref.types.float);
|
||||||
|
const StructType = require("ref-struct-napi");
|
||||||
|
const cstring = ref.types.CString;
|
||||||
|
const cstringPtr = ref.refType(cstring);
|
||||||
|
const int32_t = ref.types.int32;
|
||||||
|
const float = ref.types.float;
|
||||||
|
const floatPtr = ref.refType(float);
|
||||||
|
|
||||||
|
const SherpaOnnxOnlineTransducerModelConfig = StructType({
|
||||||
|
"encoder" : cstring,
|
||||||
|
"decoder" : cstring,
|
||||||
|
"joiner" : cstring,
|
||||||
|
});
|
||||||
|
|
||||||
|
const SherpaOnnxOnlineParaformerModelConfig = StructType({
|
||||||
|
"encoder" : cstring,
|
||||||
|
"decoder" : cstring,
|
||||||
|
});
|
||||||
|
|
||||||
|
const SherpaOnnxOnlineModelConfig = StructType({
|
||||||
|
"transducer" : SherpaOnnxOnlineTransducerModelConfig,
|
||||||
|
"paraformer" : SherpaOnnxOnlineParaformerModelConfig,
|
||||||
|
"tokens" : cstring,
|
||||||
|
"numThreads" : int32_t,
|
||||||
|
"provider" : cstring,
|
||||||
|
"debug" : int32_t,
|
||||||
|
"modelType" : cstring,
|
||||||
|
});
|
||||||
|
|
||||||
|
const SherpaOnnxFeatureConfig = StructType({
|
||||||
|
"sampleRate" : int32_t,
|
||||||
|
"featureDim" : int32_t,
|
||||||
|
});
|
||||||
|
|
||||||
|
const SherpaOnnxOnlineRecognizerConfig = StructType({
|
||||||
|
"featConfig" : SherpaOnnxFeatureConfig,
|
||||||
|
"modelConfig" : SherpaOnnxOnlineModelConfig,
|
||||||
|
"decodingMethod" : cstring,
|
||||||
|
"maxActivePaths" : int32_t,
|
||||||
|
"enableEndpoint" : int32_t,
|
||||||
|
"rule1MinTrailingSilence" : float,
|
||||||
|
"rule2MinTrailingSilence" : float,
|
||||||
|
"rule3MinUtteranceLength" : float,
|
||||||
|
"hotwordsFile" : cstring,
|
||||||
|
"hotwordsScore" : float,
|
||||||
|
});
|
||||||
|
|
||||||
|
const SherpaOnnxOnlineRecognizerResult = StructType({
|
||||||
|
"text" : cstring,
|
||||||
|
"tokens" : cstring,
|
||||||
|
"tokensArr" : cstringPtr,
|
||||||
|
"timestamps" : floatPtr,
|
||||||
|
"count" : int32_t,
|
||||||
|
"json" : cstring,
|
||||||
|
});
|
||||||
|
|
||||||
|
const SherpaOnnxOnlineRecognizerPtr = ref.refType(ref.types.void);
|
||||||
|
const SherpaOnnxOnlineStreamPtr = ref.refType(ref.types.void);
|
||||||
|
const SherpaOnnxOnlineStreamPtrPtr = ref.refType(SherpaOnnxOnlineStreamPtr);
|
||||||
|
const SherpaOnnxOnlineRecognizerResultPtr =
|
||||||
|
ref.refType(SherpaOnnxOnlineRecognizerResult);
|
||||||
|
|
||||||
|
const SherpaOnnxOnlineRecognizerConfigPtr =
|
||||||
|
ref.refType(SherpaOnnxOnlineRecognizerConfig);
|
||||||
|
|
||||||
|
const SherpaOnnxOfflineTransducerModelConfig = StructType({
|
||||||
|
"encoder" : cstring,
|
||||||
|
"decoder" : cstring,
|
||||||
|
"joiner" : cstring,
|
||||||
|
});
|
||||||
|
|
||||||
|
const SherpaOnnxOfflineParaformerModelConfig = StructType({
|
||||||
|
"model" : cstring,
|
||||||
|
});
|
||||||
|
|
||||||
|
const SherpaOnnxOfflineNemoEncDecCtcModelConfig = StructType({
|
||||||
|
"model" : cstring,
|
||||||
|
});
|
||||||
|
|
||||||
|
const SherpaOnnxOfflineWhisperModelConfig = StructType({
|
||||||
|
"encoder" : cstring,
|
||||||
|
"decoder" : cstring,
|
||||||
|
});
|
||||||
|
|
||||||
|
const SherpaOnnxOfflineTdnnModelConfig = StructType({
|
||||||
|
"model" : cstring,
|
||||||
|
});
|
||||||
|
|
||||||
|
const SherpaOnnxOfflineLMConfig = StructType({
|
||||||
|
"model" : cstring,
|
||||||
|
"scale" : float,
|
||||||
|
});
|
||||||
|
|
||||||
|
const SherpaOnnxOfflineModelConfig = StructType({
|
||||||
|
"transducer" : SherpaOnnxOfflineTransducerModelConfig,
|
||||||
|
"paraformer" : SherpaOnnxOfflineParaformerModelConfig,
|
||||||
|
"nemoCtc" : SherpaOnnxOfflineNemoEncDecCtcModelConfig,
|
||||||
|
"whisper" : SherpaOnnxOfflineWhisperModelConfig,
|
||||||
|
"tdnn" : SherpaOnnxOfflineTdnnModelConfig,
|
||||||
|
"tokens" : cstring,
|
||||||
|
"numThreads" : int32_t,
|
||||||
|
"debug" : int32_t,
|
||||||
|
"provider" : cstring,
|
||||||
|
"modelType" : cstring,
|
||||||
|
});
|
||||||
|
|
||||||
|
const SherpaOnnxOfflineRecognizerConfig = StructType({
|
||||||
|
"featConfig" : SherpaOnnxFeatureConfig,
|
||||||
|
"modelConfig" : SherpaOnnxOfflineModelConfig,
|
||||||
|
"lmConfig" : SherpaOnnxOfflineLMConfig,
|
||||||
|
"decodingMethod" : cstring,
|
||||||
|
"maxActivePaths" : int32_t,
|
||||||
|
"hotwordsFile" : cstring,
|
||||||
|
"hotwordsScore" : float,
|
||||||
|
});
|
||||||
|
|
||||||
|
const SherpaOnnxOfflineRecognizerResult = StructType({
|
||||||
|
"text" : cstring,
|
||||||
|
"timestamps" : floatPtr,
|
||||||
|
"count" : int32_t,
|
||||||
|
});
|
||||||
|
|
||||||
|
const SherpaOnnxOfflineRecognizerPtr = ref.refType(ref.types.void);
|
||||||
|
const SherpaOnnxOfflineStreamPtr = ref.refType(ref.types.void);
|
||||||
|
const SherpaOnnxOfflineStreamPtrPtr = ref.refType(SherpaOnnxOfflineStreamPtr);
|
||||||
|
const SherpaOnnxOfflineRecognizerResultPtr =
|
||||||
|
ref.refType(SherpaOnnxOfflineRecognizerResult);
|
||||||
|
|
||||||
|
const SherpaOnnxOfflineRecognizerConfigPtr =
|
||||||
|
ref.refType(SherpaOnnxOfflineRecognizerConfig);
|
||||||
|
|
||||||
|
// vad
|
||||||
|
const SherpaOnnxSileroVadModelConfig = StructType({
|
||||||
|
"model" : cstring,
|
||||||
|
"threshold" : float,
|
||||||
|
"minSilenceDuration" : float,
|
||||||
|
"minSpeechDuration" : float,
|
||||||
|
"windowSize" : int32_t,
|
||||||
|
});
|
||||||
|
|
||||||
|
const SherpaOnnxVadModelConfig = StructType({
|
||||||
|
"sileroVad" : SherpaOnnxSileroVadModelConfig,
|
||||||
|
"sampleRate" : int32_t,
|
||||||
|
"numThreads" : int32_t,
|
||||||
|
"provider" : cstring,
|
||||||
|
"debug" : int32_t,
|
||||||
|
});
|
||||||
|
|
||||||
|
const SherpaOnnxSpeechSegment = StructType({
|
||||||
|
"start" : int32_t,
|
||||||
|
"samples" : FloatArray,
|
||||||
|
"n" : int32_t,
|
||||||
|
});
|
||||||
|
|
||||||
|
const SherpaOnnxVadModelConfigPtr = ref.refType(SherpaOnnxVadModelConfig);
|
||||||
|
const SherpaOnnxSpeechSegmentPtr = ref.refType(SherpaOnnxSpeechSegment);
|
||||||
|
const SherpaOnnxCircularBufferPtr = ref.refType(ref.types.void);
|
||||||
|
const SherpaOnnxVoiceActivityDetectorPtr = ref.refType(ref.types.void);
|
||||||
|
|
||||||
|
// tts
|
||||||
|
const SherpaOnnxOfflineTtsVitsModelConfig = StructType({
|
||||||
|
"model" : cstring,
|
||||||
|
"lexicon" : cstring,
|
||||||
|
"tokens" : cstring,
|
||||||
|
"noiseScale" : float,
|
||||||
|
"noiseScaleW" : float,
|
||||||
|
"lengthScale" : float,
|
||||||
|
});
|
||||||
|
|
||||||
|
const SherpaOnnxOfflineTtsModelConfig = StructType({
|
||||||
|
"vits" : SherpaOnnxOfflineTtsVitsModelConfig,
|
||||||
|
"numThreads" : int32_t,
|
||||||
|
"debug" : int32_t,
|
||||||
|
"provider" : cstring,
|
||||||
|
});
|
||||||
|
|
||||||
|
const SherpaOnnxOfflineTtsConfig = StructType({
|
||||||
|
"model" : SherpaOnnxOfflineTtsModelConfig,
|
||||||
|
"ruleFsts" : cstring,
|
||||||
|
});
|
||||||
|
|
||||||
|
const SherpaOnnxGeneratedAudio = StructType({
|
||||||
|
"samples" : FloatArray,
|
||||||
|
"n" : int32_t,
|
||||||
|
"sampleRate" : int32_t,
|
||||||
|
});
|
||||||
|
|
||||||
|
const SherpaOnnxOfflineTtsVitsModelConfigPtr =
|
||||||
|
ref.refType(SherpaOnnxOfflineTtsVitsModelConfig);
|
||||||
|
const SherpaOnnxOfflineTtsConfigPtr = ref.refType(SherpaOnnxOfflineTtsConfig);
|
||||||
|
const SherpaOnnxGeneratedAudioPtr = ref.refType(SherpaOnnxGeneratedAudio);
|
||||||
|
const SherpaOnnxOfflineTtsPtr = ref.refType(ref.types.void);
|
||||||
|
|
||||||
|
const SherpaOnnxDisplayPtr = ref.refType(ref.types.void);
|
||||||
|
|
||||||
|
let soname;
|
||||||
|
if (os.platform() == "win32") {
|
||||||
|
// see https://nodejs.org/api/process.html#processarch
|
||||||
|
if (process.arch == "x64") {
|
||||||
|
let currentPath = process.env.Path;
|
||||||
|
let dllDirectory = path.resolve(path.join(__dirname, "lib", "win-x64"));
|
||||||
|
process.env.Path = currentPath + path.delimiter + dllDirectory;
|
||||||
|
|
||||||
|
soname = path.join(__dirname, "lib", "win-x64", "sherpa-onnx-c-api.dll")
|
||||||
|
} else if (process.arch == "ia32") {
|
||||||
|
let currentPath = process.env.Path;
|
||||||
|
let dllDirectory = path.resolve(path.join(__dirname, "lib", "win-x86"));
|
||||||
|
process.env.Path = currentPath + path.delimiter + dllDirectory;
|
||||||
|
|
||||||
|
soname = path.join(__dirname, "lib", "win-x86", "sherpa-onnx-c-api.dll")
|
||||||
|
} else {
|
||||||
|
throw new Error(
|
||||||
|
`Support only Windows x86 and x64 for now. Given ${process.arch}`);
|
||||||
|
}
|
||||||
|
} else if (os.platform() == "darwin") {
|
||||||
|
if (process.arch == "x64") {
|
||||||
|
soname =
|
||||||
|
path.join(__dirname, "lib", "osx-x64", "libsherpa-onnx-c-api.dylib");
|
||||||
|
} else if (process.arch == "arm64") {
|
||||||
|
soname =
|
||||||
|
path.join(__dirname, "lib", "osx-arm64", "libsherpa-onnx-c-api.dylib");
|
||||||
|
} else {
|
||||||
|
throw new Error(
|
||||||
|
`Support only macOS x64 and arm64 for now. Given ${process.arch}`);
|
||||||
|
}
|
||||||
|
} else if (os.platform() == "linux") {
|
||||||
|
if (process.arch == "x64") {
|
||||||
|
soname =
|
||||||
|
path.join(__dirname, "lib", "linux-x64", "libsherpa-onnx-c-api.so");
|
||||||
|
} else {
|
||||||
|
throw new Error(`Support only Linux x64 for now. Given ${process.arch}`);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw new Error(`Unsupported platform ${os.platform()}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!fs.existsSync(soname)) {
|
||||||
|
throw new Error(`Cannot find file ${soname}. Please make sure you have run
|
||||||
|
./build.sh`);
|
||||||
|
}
|
||||||
|
|
||||||
|
debug("soname ", soname)
|
||||||
|
|
||||||
|
const libsherpa_onnx = ffi.Library(soname, {
|
||||||
|
// online asr
|
||||||
|
"CreateOnlineRecognizer" : [
|
||||||
|
SherpaOnnxOnlineRecognizerPtr, [ SherpaOnnxOnlineRecognizerConfigPtr ]
|
||||||
|
],
|
||||||
|
"DestroyOnlineRecognizer" : [ "void", [ SherpaOnnxOnlineRecognizerPtr ] ],
|
||||||
|
"CreateOnlineStream" :
|
||||||
|
[ SherpaOnnxOnlineStreamPtr, [ SherpaOnnxOnlineRecognizerPtr ] ],
|
||||||
|
"CreateOnlineStreamWithHotwords" :
|
||||||
|
[ SherpaOnnxOnlineStreamPtr, [ SherpaOnnxOnlineRecognizerPtr, cstring ] ],
|
||||||
|
"DestroyOnlineStream" : [ "void", [ SherpaOnnxOnlineStreamPtr ] ],
|
||||||
|
"AcceptWaveform" :
|
||||||
|
[ "void", [ SherpaOnnxOnlineStreamPtr, int32_t, floatPtr, int32_t ] ],
|
||||||
|
"IsOnlineStreamReady" :
|
||||||
|
[ int32_t, [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ] ],
|
||||||
|
"DecodeOnlineStream" :
|
||||||
|
[ "void", [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ] ],
|
||||||
|
"DecodeMultipleOnlineStreams" : [
|
||||||
|
"void",
|
||||||
|
[ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtrPtr, int32_t ]
|
||||||
|
],
|
||||||
|
"GetOnlineStreamResult" : [
|
||||||
|
SherpaOnnxOnlineRecognizerResultPtr,
|
||||||
|
[ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ]
|
||||||
|
],
|
||||||
|
"DestroyOnlineRecognizerResult" :
|
||||||
|
[ "void", [ SherpaOnnxOnlineRecognizerResultPtr ] ],
|
||||||
|
"Reset" :
|
||||||
|
[ "void", [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ] ],
|
||||||
|
"InputFinished" : [ "void", [ SherpaOnnxOnlineStreamPtr ] ],
|
||||||
|
"IsEndpoint" :
|
||||||
|
[ int32_t, [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ] ],
|
||||||
|
|
||||||
|
// offline asr
|
||||||
|
"CreateOfflineRecognizer" : [
|
||||||
|
SherpaOnnxOfflineRecognizerPtr, [ SherpaOnnxOfflineRecognizerConfigPtr ]
|
||||||
|
],
|
||||||
|
"DestroyOfflineRecognizer" : [ "void", [ SherpaOnnxOfflineRecognizerPtr ] ],
|
||||||
|
"CreateOfflineStream" :
|
||||||
|
[ SherpaOnnxOfflineStreamPtr, [ SherpaOnnxOfflineRecognizerPtr ] ],
|
||||||
|
"DestroyOfflineStream" : [ "void", [ SherpaOnnxOfflineStreamPtr ] ],
|
||||||
|
"AcceptWaveformOffline" :
|
||||||
|
[ "void", [ SherpaOnnxOfflineStreamPtr, int32_t, floatPtr, int32_t ] ],
|
||||||
|
"DecodeOfflineStream" : [
|
||||||
|
"void", [ SherpaOnnxOfflineRecognizerPtr, SherpaOnnxOfflineStreamPtr ]
|
||||||
|
],
|
||||||
|
"DecodeMultipleOfflineStreams" : [
|
||||||
|
"void",
|
||||||
|
[ SherpaOnnxOfflineRecognizerPtr, SherpaOnnxOfflineStreamPtrPtr, int32_t ]
|
||||||
|
],
|
||||||
|
"GetOfflineStreamResult" :
|
||||||
|
[ SherpaOnnxOfflineRecognizerResultPtr, [ SherpaOnnxOfflineStreamPtr ] ],
|
||||||
|
"DestroyOfflineRecognizerResult" :
|
||||||
|
[ "void", [ SherpaOnnxOfflineRecognizerResultPtr ] ],
|
||||||
|
|
||||||
|
// vad
|
||||||
|
"SherpaOnnxCreateCircularBuffer" :
|
||||||
|
[ SherpaOnnxCircularBufferPtr, [ int32_t ] ],
|
||||||
|
"SherpaOnnxDestroyCircularBuffer" :
|
||||||
|
[ "void", [ SherpaOnnxCircularBufferPtr ] ],
|
||||||
|
"SherpaOnnxCircularBufferPush" :
|
||||||
|
[ "void", [ SherpaOnnxCircularBufferPtr, floatPtr, int32_t ] ],
|
||||||
|
"SherpaOnnxCircularBufferGet" :
|
||||||
|
[ FloatArray, [ SherpaOnnxCircularBufferPtr, int32_t, int32_t ] ],
|
||||||
|
"SherpaOnnxCircularBufferFree" : [ "void", [ FloatArray ] ],
|
||||||
|
"SherpaOnnxCircularBufferPop" :
|
||||||
|
[ "void", [ SherpaOnnxCircularBufferPtr, int32_t ] ],
|
||||||
|
"SherpaOnnxCircularBufferSize" : [ int32_t, [ SherpaOnnxCircularBufferPtr ] ],
|
||||||
|
"SherpaOnnxCircularBufferHead" : [ int32_t, [ SherpaOnnxCircularBufferPtr ] ],
|
||||||
|
"SherpaOnnxCircularBufferReset" : [ "void", [ SherpaOnnxCircularBufferPtr ] ],
|
||||||
|
"SherpaOnnxCreateVoiceActivityDetector" : [
|
||||||
|
SherpaOnnxVoiceActivityDetectorPtr, [ SherpaOnnxVadModelConfigPtr, float ]
|
||||||
|
],
|
||||||
|
"SherpaOnnxDestroyVoiceActivityDetector" :
|
||||||
|
[ "void", [ SherpaOnnxVoiceActivityDetectorPtr ] ],
|
||||||
|
"SherpaOnnxVoiceActivityDetectorAcceptWaveform" :
|
||||||
|
[ "void", [ SherpaOnnxVoiceActivityDetectorPtr, floatPtr, int32_t ] ],
|
||||||
|
"SherpaOnnxVoiceActivityDetectorEmpty" :
|
||||||
|
[ int32_t, [ SherpaOnnxVoiceActivityDetectorPtr ] ],
|
||||||
|
"SherpaOnnxVoiceActivityDetectorDetected" :
|
||||||
|
[ int32_t, [ SherpaOnnxVoiceActivityDetectorPtr ] ],
|
||||||
|
"SherpaOnnxVoiceActivityDetectorPop" :
|
||||||
|
[ "void", [ SherpaOnnxVoiceActivityDetectorPtr ] ],
|
||||||
|
"SherpaOnnxVoiceActivityDetectorClear" :
|
||||||
|
[ "void", [ SherpaOnnxVoiceActivityDetectorPtr ] ],
|
||||||
|
"SherpaOnnxVoiceActivityDetectorFront" :
|
||||||
|
[ SherpaOnnxSpeechSegmentPtr, [ SherpaOnnxVoiceActivityDetectorPtr ] ],
|
||||||
|
"SherpaOnnxDestroySpeechSegment" : [ "void", [ SherpaOnnxSpeechSegmentPtr ] ],
|
||||||
|
"SherpaOnnxVoiceActivityDetectorReset" :
|
||||||
|
[ "void", [ SherpaOnnxVoiceActivityDetectorPtr ] ],
|
||||||
|
// tts
|
||||||
|
"SherpaOnnxCreateOfflineTts" :
|
||||||
|
[ SherpaOnnxOfflineTtsPtr, [ SherpaOnnxOfflineTtsConfigPtr ] ],
|
||||||
|
"SherpaOnnxDestroyOfflineTts" : [ "void", [ SherpaOnnxOfflineTtsPtr ] ],
|
||||||
|
"SherpaOnnxOfflineTtsGenerate" : [
|
||||||
|
SherpaOnnxGeneratedAudioPtr,
|
||||||
|
[ SherpaOnnxOfflineTtsPtr, cstring, int32_t, float ]
|
||||||
|
],
|
||||||
|
"SherpaOnnxDestroyOfflineTtsGeneratedAudio" :
|
||||||
|
[ "void", [ SherpaOnnxGeneratedAudioPtr ] ],
|
||||||
|
"SherpaOnnxWriteWave" : [ "void", [ floatPtr, int32_t, int32_t, cstring ] ],
|
||||||
|
|
||||||
|
// display
|
||||||
|
"CreateDisplay" : [ SherpaOnnxDisplayPtr, [ int32_t ] ],
|
||||||
|
"DestroyDisplay" : [ "void", [ SherpaOnnxDisplayPtr ] ],
|
||||||
|
"SherpaOnnxPrint" : [ "void", [ SherpaOnnxDisplayPtr, int32_t, cstring ] ],
|
||||||
|
});
|
||||||
|
|
||||||
|
class Display {
|
||||||
|
constructor(maxWordPerLine) {
|
||||||
|
this.handle = libsherpa_onnx.CreateDisplay(maxWordPerLine);
|
||||||
|
}
|
||||||
|
free() {
|
||||||
|
if (this.handle) {
|
||||||
|
libsherpa_onnx.DestroyDisplay(this.handle);
|
||||||
|
this.handle = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print(idx, s) { libsherpa_onnx.SherpaOnnxPrint(this.handle, idx, s); }
|
||||||
|
};
|
||||||
|
|
||||||
|
class OnlineResult {
|
||||||
|
constructor(text) { this.text = Buffer.from(text, "utf-8").toString(); }
|
||||||
|
};
|
||||||
|
|
||||||
|
class OnlineStream {
|
||||||
|
constructor(handle) { this.handle = handle }
|
||||||
|
|
||||||
|
free() {
|
||||||
|
if (this.handle) {
|
||||||
|
libsherpa_onnx.DestroyOnlineStream(this.handle);
|
||||||
|
this.handle = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param sampleRate {Number}
|
||||||
|
* @param samples {Float32Array} Containing samples in the range [-1, 1]
|
||||||
|
*/
|
||||||
|
acceptWaveform(sampleRate, samples) {
|
||||||
|
libsherpa_onnx.AcceptWaveform(this.handle, sampleRate, samples,
|
||||||
|
samples.length);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class OnlineRecognizer {
|
||||||
|
constructor(config) {
|
||||||
|
this.config = config;
|
||||||
|
this.recognizer_handle =
|
||||||
|
libsherpa_onnx.CreateOnlineRecognizer(config.ref());
|
||||||
|
}
|
||||||
|
|
||||||
|
free() {
|
||||||
|
if (this.recognizer_handle) {
|
||||||
|
libsherpa_onnx.DestroyOnlineRecognizer(this.recognizer_handle);
|
||||||
|
this.recognizer_handle = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
createStream() {
|
||||||
|
let handle = libsherpa_onnx.CreateOnlineStream(this.recognizer_handle);
|
||||||
|
return new OnlineStream(handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
isReady(stream) {
|
||||||
|
return libsherpa_onnx.IsOnlineStreamReady(this.recognizer_handle,
|
||||||
|
stream.handle)
|
||||||
|
}
|
||||||
|
|
||||||
|
isEndpoint(stream) {
|
||||||
|
return libsherpa_onnx.IsEndpoint(this.recognizer_handle, stream.handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
reset(stream) { libsherpa_onnx.Reset(this.recognizer_handle, stream.handle); }
|
||||||
|
|
||||||
|
decode(stream) {
|
||||||
|
libsherpa_onnx.DecodeOnlineStream(this.recognizer_handle, stream.handle)
|
||||||
|
}
|
||||||
|
|
||||||
|
getResult(stream) {
|
||||||
|
let handle = libsherpa_onnx.GetOnlineStreamResult(this.recognizer_handle,
|
||||||
|
stream.handle);
|
||||||
|
let r = handle.deref();
|
||||||
|
let ans = new OnlineResult(r.text);
|
||||||
|
libsherpa_onnx.DestroyOnlineRecognizerResult(handle);
|
||||||
|
|
||||||
|
return ans
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class OfflineResult {
|
||||||
|
constructor(text) { this.text = Buffer.from(text, "utf-8").toString(); }
|
||||||
|
};
|
||||||
|
|
||||||
|
class OfflineStream {
|
||||||
|
constructor(handle) { this.handle = handle }
|
||||||
|
|
||||||
|
free() {
|
||||||
|
if (this.handle) {
|
||||||
|
libsherpa_onnx.DestroyOfflineStream(this.handle);
|
||||||
|
this.handle = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param sampleRate {Number}
|
||||||
|
* @param samples {Float32Array} Containing samples in the range [-1, 1]
|
||||||
|
*/
|
||||||
|
acceptWaveform(sampleRate, samples) {
|
||||||
|
libsherpa_onnx.AcceptWaveformOffline(this.handle, sampleRate, samples,
|
||||||
|
samples.length);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class OfflineRecognizer {
|
||||||
|
constructor(config) {
|
||||||
|
this.config = config;
|
||||||
|
this.recognizer_handle =
|
||||||
|
libsherpa_onnx.CreateOfflineRecognizer(config.ref());
|
||||||
|
}
|
||||||
|
|
||||||
|
free() {
|
||||||
|
if (this.recognizer_handle) {
|
||||||
|
libsherpa_onnx.DestroyOfflineRecognizer(this.recognizer_handle);
|
||||||
|
this.recognizer_handle = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
createStream() {
|
||||||
|
let handle = libsherpa_onnx.CreateOfflineStream(this.recognizer_handle);
|
||||||
|
return new OfflineStream(handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
decode(stream) {
|
||||||
|
libsherpa_onnx.DecodeOfflineStream(this.recognizer_handle, stream.handle)
|
||||||
|
}
|
||||||
|
|
||||||
|
getResult(stream) {
|
||||||
|
let handle = libsherpa_onnx.GetOfflineStreamResult(stream.handle);
|
||||||
|
let r = handle.deref();
|
||||||
|
let ans = new OfflineResult(r.text);
|
||||||
|
libsherpa_onnx.DestroyOfflineRecognizerResult(handle);
|
||||||
|
|
||||||
|
return ans
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class SpeechSegment {
|
||||||
|
constructor(start, samples) {
|
||||||
|
this.start = start;
|
||||||
|
this.samples = samples;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// this buffer holds only float entries.
|
||||||
|
class CircularBuffer {
|
||||||
|
/**
|
||||||
|
* @param capacity {int} The capacity of the circular buffer.
|
||||||
|
*/
|
||||||
|
constructor(capacity) {
|
||||||
|
this.handle = libsherpa_onnx.SherpaOnnxCreateCircularBuffer(capacity);
|
||||||
|
}
|
||||||
|
|
||||||
|
free() {
|
||||||
|
if (this.handle) {
|
||||||
|
libsherpa_onnx.SherpaOnnxDestroyCircularBuffer(this.handle);
|
||||||
|
this.handle = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param samples {Float32Array}
|
||||||
|
*/
|
||||||
|
push(samples) {
|
||||||
|
libsherpa_onnx.SherpaOnnxCircularBufferPush(this.handle, samples,
|
||||||
|
samples.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
get(startIndex, n) {
|
||||||
|
let data =
|
||||||
|
libsherpa_onnx.SherpaOnnxCircularBufferGet(this.handle, startIndex, n);
|
||||||
|
|
||||||
|
// https://tootallnate.github.io/ref/#exports-reinterpret
|
||||||
|
const buffer = data.buffer.reinterpret(n * ref.sizeof.float).buffer;
|
||||||
|
|
||||||
|
// create a copy since we are going to free the buffer at the end
|
||||||
|
let s = new Float32Array(buffer).slice(0);
|
||||||
|
libsherpa_onnx.SherpaOnnxCircularBufferFree(data);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
pop(n) { libsherpa_onnx.SherpaOnnxCircularBufferPop(this.handle, n); }
|
||||||
|
|
||||||
|
size() { return libsherpa_onnx.SherpaOnnxCircularBufferSize(this.handle); }
|
||||||
|
|
||||||
|
head() { return libsherpa_onnx.SherpaOnnxCircularBufferHead(this.handle); }
|
||||||
|
|
||||||
|
reset() { libsherpa_onnx.SherpaOnnxCircularBufferReset(this.handle); }
|
||||||
|
};
|
||||||
|
|
||||||
|
class VoiceActivityDetector {
|
||||||
|
constructor(config, bufferSizeInSeconds) {
|
||||||
|
this.config = config;
|
||||||
|
this.handle = libsherpa_onnx.SherpaOnnxCreateVoiceActivityDetector(
|
||||||
|
config.ref(), bufferSizeInSeconds);
|
||||||
|
}
|
||||||
|
|
||||||
|
free() {
|
||||||
|
if (this.handle) {
|
||||||
|
libsherpa_onnx.SherpaOnnxDestroyVoiceActivityDetector(this.handle);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
acceptWaveform(samples) {
|
||||||
|
libsherpa_onnx.SherpaOnnxVoiceActivityDetectorAcceptWaveform(
|
||||||
|
this.handle, samples, samples.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
isEmpty() {
|
||||||
|
return libsherpa_onnx.SherpaOnnxVoiceActivityDetectorEmpty(this.handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
isDetected() {
|
||||||
|
return libsherpa_onnx.SherpaOnnxVoiceActivityDetectorDetected(this.handle);
|
||||||
|
}
|
||||||
|
pop() { libsherpa_onnx.SherpaOnnxVoiceActivityDetectorPop(this.handle); }
|
||||||
|
|
||||||
|
clear() { libsherpa_onnx.SherpaOnnxVoiceActivityDetectorClear(this.handle); }
|
||||||
|
|
||||||
|
reset() { libsherpa_onnx.SherpaOnnxVoiceActivityDetectorReset(this.handle); }
|
||||||
|
|
||||||
|
front() {
|
||||||
|
let segment =
|
||||||
|
libsherpa_onnx.SherpaOnnxVoiceActivityDetectorFront(this.handle);
|
||||||
|
|
||||||
|
let buffer =
|
||||||
|
segment.deref()
|
||||||
|
.samples.buffer.reinterpret(segment.deref().n * ref.sizeof.float)
|
||||||
|
.buffer;
|
||||||
|
|
||||||
|
let samples = new Float32Array(buffer).slice(0);
|
||||||
|
let ans = new SpeechSegment(segment.deref().start, samples);
|
||||||
|
|
||||||
|
libsherpa_onnx.SherpaOnnxDestroySpeechSegment(segment);
|
||||||
|
return ans;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class GeneratedAudio {
|
||||||
|
constructor(sampleRate, samples) {
|
||||||
|
this.sampleRate = sampleRate;
|
||||||
|
this.samples = samples;
|
||||||
|
}
|
||||||
|
save(filename) {
|
||||||
|
libsherpa_onnx.SherpaOnnxWriteWave(this.samples, this.samples.length,
|
||||||
|
this.sampleRate, filename);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class OfflineTts {
|
||||||
|
constructor(config) {
|
||||||
|
this.config = config;
|
||||||
|
this.handle = libsherpa_onnx.SherpaOnnxCreateOfflineTts(config.ref());
|
||||||
|
}
|
||||||
|
|
||||||
|
free() {
|
||||||
|
if (this.handle) {
|
||||||
|
libsherpa_onnx.SherpaOnnxDestroyOfflineTts(this.handle);
|
||||||
|
this.handle = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
generate(text, sid, speed) {
|
||||||
|
let r = libsherpa_onnx.SherpaOnnxOfflineTtsGenerate(this.handle, text, sid,
|
||||||
|
speed);
|
||||||
|
const buffer =
|
||||||
|
r.deref()
|
||||||
|
.samples.buffer.reinterpret(r.deref().n * ref.sizeof.float)
|
||||||
|
.buffer;
|
||||||
|
let samples = new Float32Array(buffer).slice(0);
|
||||||
|
let sampleRate = r.deref().sampleRate;
|
||||||
|
|
||||||
|
let generatedAudio = new GeneratedAudio(sampleRate, samples);
|
||||||
|
|
||||||
|
libsherpa_onnx.SherpaOnnxDestroyOfflineTtsGeneratedAudio(r);
|
||||||
|
|
||||||
|
return generatedAudio;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// online asr
|
||||||
|
const OnlineTransducerModelConfig = SherpaOnnxOnlineTransducerModelConfig;
|
||||||
|
const OnlineModelConfig = SherpaOnnxOnlineModelConfig;
|
||||||
|
const FeatureConfig = SherpaOnnxFeatureConfig;
|
||||||
|
const OnlineRecognizerConfig = SherpaOnnxOnlineRecognizerConfig;
|
||||||
|
const OnlineParaformerModelConfig = SherpaOnnxOnlineParaformerModelConfig;
|
||||||
|
|
||||||
|
// offline asr
|
||||||
|
const OfflineTransducerModelConfig = SherpaOnnxOfflineTransducerModelConfig;
|
||||||
|
const OfflineModelConfig = SherpaOnnxOfflineModelConfig;
|
||||||
|
const OfflineRecognizerConfig = SherpaOnnxOfflineRecognizerConfig;
|
||||||
|
const OfflineParaformerModelConfig = SherpaOnnxOfflineParaformerModelConfig;
|
||||||
|
const OfflineWhisperModelConfig = SherpaOnnxOfflineWhisperModelConfig;
|
||||||
|
const OfflineNemoEncDecCtcModelConfig =
|
||||||
|
SherpaOnnxOfflineNemoEncDecCtcModelConfig;
|
||||||
|
const OfflineTdnnModelConfig = SherpaOnnxOfflineTdnnModelConfig;
|
||||||
|
|
||||||
|
// vad
|
||||||
|
const SileroVadModelConfig = SherpaOnnxSileroVadModelConfig;
|
||||||
|
const VadModelConfig = SherpaOnnxVadModelConfig;
|
||||||
|
|
||||||
|
// tts
|
||||||
|
const OfflineTtsVitsModelConfig = SherpaOnnxOfflineTtsVitsModelConfig;
|
||||||
|
const OfflineTtsModelConfig = SherpaOnnxOfflineTtsModelConfig;
|
||||||
|
const OfflineTtsConfig = SherpaOnnxOfflineTtsConfig;
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
// online asr
|
||||||
|
OnlineTransducerModelConfig,
|
||||||
|
OnlineModelConfig,
|
||||||
|
FeatureConfig,
|
||||||
|
OnlineRecognizerConfig,
|
||||||
|
OnlineRecognizer,
|
||||||
|
OnlineStream,
|
||||||
|
OnlineParaformerModelConfig,
|
||||||
|
|
||||||
|
// offline asr
|
||||||
|
OfflineRecognizer,
|
||||||
|
OfflineStream,
|
||||||
|
OfflineTransducerModelConfig,
|
||||||
|
OfflineModelConfig,
|
||||||
|
OfflineRecognizerConfig,
|
||||||
|
OfflineParaformerModelConfig,
|
||||||
|
OfflineWhisperModelConfig,
|
||||||
|
OfflineNemoEncDecCtcModelConfig,
|
||||||
|
OfflineTdnnModelConfig,
|
||||||
|
// vad
|
||||||
|
SileroVadModelConfig,
|
||||||
|
VadModelConfig,
|
||||||
|
CircularBuffer,
|
||||||
|
VoiceActivityDetector,
|
||||||
|
// tts
|
||||||
|
OfflineTtsVitsModelConfig,
|
||||||
|
OfflineTtsModelConfig,
|
||||||
|
OfflineTtsConfig,
|
||||||
|
OfflineTts,
|
||||||
|
|
||||||
|
//
|
||||||
|
Display,
|
||||||
|
};
|
||||||
37
scripts/nodejs/package.json
Normal file
37
scripts/nodejs/package.json
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
{
|
||||||
|
"name": "sherpa-onnx2",
|
||||||
|
"version": "1.8.10",
|
||||||
|
"description": "Real-time speech recognition with Next-gen Kaldi",
|
||||||
|
"main": "index.js",
|
||||||
|
"scripts": {
|
||||||
|
"test": "echo \"Error: no test specified\" && exit 1"
|
||||||
|
},
|
||||||
|
"repository": {
|
||||||
|
"type": "git",
|
||||||
|
"url": "git+https://github.com/k2-fsa/sherpa-onnx.git"
|
||||||
|
},
|
||||||
|
"keywords": [
|
||||||
|
"speech-to-text",
|
||||||
|
"text-to-speech",
|
||||||
|
"real-time speech recognition",
|
||||||
|
"without internet connect",
|
||||||
|
"embedded systems",
|
||||||
|
"open source",
|
||||||
|
"zipformer",
|
||||||
|
"asr",
|
||||||
|
"speech"
|
||||||
|
],
|
||||||
|
"author": "The next-gen Kaldi team",
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"bugs": {
|
||||||
|
"url": "https://github.com/k2-fsa/sherpa-onnx/issues"
|
||||||
|
},
|
||||||
|
"homepage": "https://github.com/k2-fsa/sherpa-onnx#readme",
|
||||||
|
"dependencies": {
|
||||||
|
"ffi-napi": "^4.0.3",
|
||||||
|
"npm": "^6.14.18",
|
||||||
|
"ref-array-napi": "^1.2.2",
|
||||||
|
"ref-napi": "^3.0.3",
|
||||||
|
"ref-struct-napi": "^1.1.1"
|
||||||
|
}
|
||||||
|
}
|
||||||
50
scripts/nodejs/package.json.in
Normal file
50
scripts/nodejs/package.json.in
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
{
|
||||||
|
"name": "sherpa-onnx",
|
||||||
|
"version": "SHERPA_ONNX_VERSION",
|
||||||
|
"description": "Real-time speech recognition with Next-gen Kaldi",
|
||||||
|
"main": "index.js",
|
||||||
|
"scripts": {
|
||||||
|
"test": "echo \"Error: no test specified\" && exit 1"
|
||||||
|
},
|
||||||
|
"repository": {
|
||||||
|
"type": "git",
|
||||||
|
"url": "git+https://github.com/k2-fsa/sherpa-onnx.git"
|
||||||
|
},
|
||||||
|
"keywords": [
|
||||||
|
"speech to text",
|
||||||
|
"text to speech",
|
||||||
|
"transcription",
|
||||||
|
"real-time speech recognition",
|
||||||
|
"without internet connect",
|
||||||
|
"embedded systems",
|
||||||
|
"open source",
|
||||||
|
"zipformer",
|
||||||
|
"asr",
|
||||||
|
"tts",
|
||||||
|
"stt",
|
||||||
|
"c++",
|
||||||
|
"onnxruntime",
|
||||||
|
"onnx",
|
||||||
|
"ai",
|
||||||
|
"next-gen kaldi",
|
||||||
|
"offline",
|
||||||
|
"privacy",
|
||||||
|
"open source",
|
||||||
|
"streaming speech recognition",
|
||||||
|
"speech",
|
||||||
|
"recognition"
|
||||||
|
],
|
||||||
|
"author": "The next-gen Kaldi team",
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"bugs": {
|
||||||
|
"url": "https://github.com/k2-fsa/sherpa-onnx/issues"
|
||||||
|
},
|
||||||
|
"homepage": "https://github.com/k2-fsa/sherpa-onnx#readme",
|
||||||
|
"dependencies": {
|
||||||
|
"ffi-napi": "^4.0.3",
|
||||||
|
"npm": "^6.14.18",
|
||||||
|
"ref-array-napi": "^1.2.2",
|
||||||
|
"ref-napi": "^3.0.3",
|
||||||
|
"ref-struct-napi": "^1.1.1"
|
||||||
|
}
|
||||||
|
}
|
||||||
123
scripts/nodejs/run.sh
Executable file
123
scripts/nodejs/run.sh
Executable file
@@ -0,0 +1,123 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||||
|
SHERPA_ONNX_DIR=$(realpath $SCRIPT_DIR/../..)
|
||||||
|
echo "SCRIPT_DIR: $SCRIPT_DIR"
|
||||||
|
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
|
||||||
|
|
||||||
|
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" $SHERPA_ONNX_DIR/CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
|
||||||
|
|
||||||
|
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
|
||||||
|
sed -i.bak s/SHERPA_ONNX_VERSION/$SHERPA_ONNX_VERSION/g ./package.json.in
|
||||||
|
|
||||||
|
cp package.json.in package.json
|
||||||
|
rm package.json.in
|
||||||
|
rm package.json.in.bak
|
||||||
|
rm .clang-format
|
||||||
|
|
||||||
|
function windows_x64() {
|
||||||
|
echo "Process Windows (x64)"
|
||||||
|
mkdir -p lib/windows-x64
|
||||||
|
dst=$(realpath lib/windows-x64)
|
||||||
|
mkdir t
|
||||||
|
cd t
|
||||||
|
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win_amd64.whl
|
||||||
|
unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win_amd64.whl
|
||||||
|
|
||||||
|
cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.dll $dst
|
||||||
|
cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.lib $dst
|
||||||
|
rm -fv $dst/sherpa-onnx-portaudio.dll
|
||||||
|
|
||||||
|
cd ..
|
||||||
|
rm -rf t
|
||||||
|
}
|
||||||
|
|
||||||
|
function windows_x86() {
|
||||||
|
echo "Process Windows (x86)"
|
||||||
|
mkdir -p lib/windows-x86
|
||||||
|
dst=$(realpath lib/windows-x86)
|
||||||
|
mkdir t
|
||||||
|
cd t
|
||||||
|
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win32.whl
|
||||||
|
unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win32.whl
|
||||||
|
|
||||||
|
cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.dll $dst
|
||||||
|
cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.lib $dst
|
||||||
|
rm -fv $dst/sherpa-onnx-portaudio.dll
|
||||||
|
|
||||||
|
cd ..
|
||||||
|
rm -rf t
|
||||||
|
}
|
||||||
|
|
||||||
|
function linux_x64() {
|
||||||
|
echo "Process Linux (x64)"
|
||||||
|
mkdir -p lib/linux-x64
|
||||||
|
dst=$(realpath lib/linux-x64)
|
||||||
|
mkdir t
|
||||||
|
cd t
|
||||||
|
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
|
||||||
|
unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
|
||||||
|
|
||||||
|
cp -v sherpa_onnx/lib/*.so* $dst
|
||||||
|
rm -v $dst/libcargs.so
|
||||||
|
rm -v $dst/libsherpa-onnx-portaudio.so
|
||||||
|
rm -v $dst/libsherpa-onnx-fst.so
|
||||||
|
rm -v $dst/libonnxruntime.so
|
||||||
|
|
||||||
|
cd ..
|
||||||
|
rm -rf t
|
||||||
|
}
|
||||||
|
|
||||||
|
function osx_x64() {
|
||||||
|
echo "Process osx-x64"
|
||||||
|
mkdir -p lib/osx-x64
|
||||||
|
dst=$(realpath lib/osx-x64)
|
||||||
|
mkdir t
|
||||||
|
cd t
|
||||||
|
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_10_14_x86_64.whl
|
||||||
|
unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_10_14_x86_64.whl
|
||||||
|
|
||||||
|
cp -v sherpa_onnx/lib/*.dylib $dst/
|
||||||
|
rm -v $dst/libonnxruntime.dylib
|
||||||
|
rm -v $dst/libcargs.dylib
|
||||||
|
rm -v $dst/libsherpa-onnx-fst.dylib
|
||||||
|
rm -v $dst/libsherpa-onnx-portaudio.dylib
|
||||||
|
|
||||||
|
cd ..
|
||||||
|
rm -rf t
|
||||||
|
}
|
||||||
|
|
||||||
|
function osx_arm64() {
|
||||||
|
echo "Process osx-arm64"
|
||||||
|
mkdir -p lib/osx-arm64
|
||||||
|
dst=$(realpath lib/osx-arm64)
|
||||||
|
mkdir t
|
||||||
|
cd t
|
||||||
|
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_arm64.whl
|
||||||
|
unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_arm64.whl
|
||||||
|
|
||||||
|
cp -v sherpa_onnx/lib/*.dylib $dst/
|
||||||
|
rm -v $dst/libonnxruntime.dylib
|
||||||
|
rm -v $dst/libcargs.dylib
|
||||||
|
rm -v $dst/libsherpa-onnx-fst.dylib
|
||||||
|
rm -v $dst/libsherpa-onnx-portaudio.dylib
|
||||||
|
|
||||||
|
cd ..
|
||||||
|
rm -rf t
|
||||||
|
}
|
||||||
|
|
||||||
|
windows_x64
|
||||||
|
ls -lh lib/windows-x64
|
||||||
|
|
||||||
|
windows_x86
|
||||||
|
ls -lh lib/windows-x86
|
||||||
|
|
||||||
|
linux_x64
|
||||||
|
ls -lh lib/linux-x64
|
||||||
|
|
||||||
|
osx_x64
|
||||||
|
ls -lh lib/osx-x64
|
||||||
|
|
||||||
|
osx_arm64
|
||||||
|
ls -lh lib/osx-arm64
|
||||||
@@ -438,6 +438,10 @@ int32_t SherpaOnnxCircularBufferSize(SherpaOnnxCircularBuffer *buffer) {
|
|||||||
return buffer->impl->Size();
|
return buffer->impl->Size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int32_t SherpaOnnxCircularBufferHead(SherpaOnnxCircularBuffer *buffer) {
|
||||||
|
return buffer->impl->Head();
|
||||||
|
}
|
||||||
|
|
||||||
void SherpaOnnxCircularBufferReset(SherpaOnnxCircularBuffer *buffer) {
|
void SherpaOnnxCircularBufferReset(SherpaOnnxCircularBuffer *buffer) {
|
||||||
buffer->impl->Reset();
|
buffer->impl->Reset();
|
||||||
}
|
}
|
||||||
@@ -553,6 +557,7 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
|
|||||||
tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
|
tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
|
||||||
tts_config.model.debug = config->model.debug;
|
tts_config.model.debug = config->model.debug;
|
||||||
tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
|
tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
|
||||||
|
tts_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, "");
|
||||||
|
|
||||||
if (tts_config.model.debug) {
|
if (tts_config.model.debug) {
|
||||||
fprintf(stderr, "%s\n", tts_config.ToString().c_str());
|
fprintf(stderr, "%s\n", tts_config.ToString().c_str());
|
||||||
|
|||||||
@@ -130,10 +130,10 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerResult {
|
|||||||
const char *text;
|
const char *text;
|
||||||
|
|
||||||
// Pointer to continuous memory which holds string based tokens
|
// Pointer to continuous memory which holds string based tokens
|
||||||
// which are seperated by \0
|
// which are separated by \0
|
||||||
const char *tokens;
|
const char *tokens;
|
||||||
|
|
||||||
// a pointer array contains the address of the first item in tokens
|
// a pointer array containing the address of the first item in tokens
|
||||||
const char *const *tokens_arr;
|
const char *const *tokens_arr;
|
||||||
|
|
||||||
// Pointer to continuous memory which holds timestamps
|
// Pointer to continuous memory which holds timestamps
|
||||||
@@ -532,6 +532,11 @@ SHERPA_ONNX_API void SherpaOnnxCircularBufferPop(
|
|||||||
SHERPA_ONNX_API int32_t
|
SHERPA_ONNX_API int32_t
|
||||||
SherpaOnnxCircularBufferSize(SherpaOnnxCircularBuffer *buffer);
|
SherpaOnnxCircularBufferSize(SherpaOnnxCircularBuffer *buffer);
|
||||||
|
|
||||||
|
// Return the head of the buffer. It's always non-decreasing until you
|
||||||
|
// invoke SherpaOnnxCircularBufferReset() which resets head to 0.
|
||||||
|
SHERPA_ONNX_API int32_t
|
||||||
|
SherpaOnnxCircularBufferHead(SherpaOnnxCircularBuffer *buffer);
|
||||||
|
|
||||||
// Clear all elements in the buffer
|
// Clear all elements in the buffer
|
||||||
SHERPA_ONNX_API void SherpaOnnxCircularBufferReset(
|
SHERPA_ONNX_API void SherpaOnnxCircularBufferReset(
|
||||||
SherpaOnnxCircularBuffer *buffer);
|
SherpaOnnxCircularBuffer *buffer);
|
||||||
@@ -617,6 +622,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsModelConfig {
|
|||||||
|
|
||||||
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsConfig {
|
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsConfig {
|
||||||
SherpaOnnxOfflineTtsModelConfig model;
|
SherpaOnnxOfflineTtsModelConfig model;
|
||||||
|
const char *rule_fsts;
|
||||||
} SherpaOnnxOfflineTtsConfig;
|
} SherpaOnnxOfflineTtsConfig;
|
||||||
|
|
||||||
SHERPA_ONNX_API typedef struct SherpaOnnxGeneratedAudio {
|
SHERPA_ONNX_API typedef struct SherpaOnnxGeneratedAudio {
|
||||||
|
|||||||
@@ -457,7 +457,7 @@ class OnlineRecognizerParaformerImpl : public OnlineRecognizerImpl {
|
|||||||
// (61 - 7) / 6 + 1 = 10
|
// (61 - 7) / 6 + 1 = 10
|
||||||
|
|
||||||
int32_t left_chunk_size_ = 5;
|
int32_t left_chunk_size_ = 5;
|
||||||
int32_t right_chunk_size_ = 5;
|
int32_t right_chunk_size_ = 2;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace sherpa_onnx
|
} // namespace sherpa_onnx
|
||||||
|
|||||||
Reference in New Issue
Block a user