diff --git a/.github/scripts/test-nodejs-npm.sh b/.github/scripts/test-nodejs-npm.sh index c2d28381..30620e39 100755 --- a/.github/scripts/test-nodejs-npm.sh +++ b/.github/scripts/test-nodejs-npm.sh @@ -144,7 +144,18 @@ tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 node ./test-offline-sense-voice.js + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2 +tar xf dict.tar.bz2 + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt + +node ./test-offline-sense-voice-with-hr.js + rm -rf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 +rm -rf dict replace.fst test-hr.wav lexicon.txt curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2 ls -lh diff --git a/.github/workflows/build-wheels-linux-cuda.yaml b/.github/workflows/build-wheels-linux-cuda.yaml index 1b2c68a7..bd322185 100644 --- a/.github/workflows/build-wheels-linux-cuda.yaml +++ b/.github/workflows/build-wheels-linux-cuda.yaml @@ -20,7 +20,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-20.04] + os: [ubuntu-22.04] python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] steps: diff --git a/.github/workflows/build-wheels-linux.yaml b/.github/workflows/build-wheels-linux.yaml index e3f5fd97..93d97990 100644 --- a/.github/workflows/build-wheels-linux.yaml +++ b/.github/workflows/build-wheels-linux.yaml @@ -20,7 +20,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest] + os: [ubuntu-22.04] python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312", "cp313"] manylinux: [manylinux2014] #, manylinux_2_28] diff --git a/.github/workflows/test-build-wheel.yaml b/.github/workflows/test-build-wheel.yaml index 695629b6..b7e23ba3 100644 --- a/.github/workflows/test-build-wheel.yaml +++ b/.github/workflows/test-build-wheel.yaml @@ -35,11 +35,11 @@ jobs: matrix: # See https://github.com/actions/runner-images include: - - os: ubuntu-20.04 + - os: ubuntu-22.04 python-version: "3.7" - - os: ubuntu-20.04 + - os: ubuntu-22.04 python-version: "3.8" - - os: ubuntu-20.04 + - os: ubuntu-22.04 python-version: "3.9" - os: ubuntu-22.04 python-version: "3.10" @@ -48,7 +48,7 @@ jobs: - os: ubuntu-22.04 python-version: "3.12" - - os: macos-12 + - os: macos-13 python-version: "3.8" - os: macos-13 @@ -137,8 +137,8 @@ jobs: export PATH=/c/hostedtoolcache/windows/Python/3.9.13/x64/bin:$PATH export PATH=/c/hostedtoolcache/windows/Python/3.10.11/x64/bin:$PATH export PATH=/c/hostedtoolcache/windows/Python/3.11.9/x64/bin:$PATH - export PATH=/c/hostedtoolcache/windows/Python/3.12.9/x64/bin:$PATH - export PATH=/c/hostedtoolcache/windows/Python/3.13.2/x64/bin:$PATH + export PATH=/c/hostedtoolcache/windows/Python/3.12.10/x64/bin:$PATH + export PATH=/c/hostedtoolcache/windows/Python/3.13.3/x64/bin:$PATH which sherpa-onnx sherpa-onnx --help diff --git a/.github/workflows/test-nodejs-addon-npm.yaml b/.github/workflows/test-nodejs-addon-npm.yaml index 0e2b9f55..34507164 100644 --- a/.github/workflows/test-nodejs-addon-npm.yaml +++ b/.github/workflows/test-nodejs-addon-npm.yaml @@ -40,7 +40,7 @@ jobs: strategy: fail-fast: false matrix: - os: [macos-latest, macos-14, ubuntu-20.04, ubuntu-22.04, windows-latest] + os: [macos-latest, macos-14, ubuntu-latest, ubuntu-22.04, windows-latest] node-version: ["16", "17", "18", "19", "21", "22"] steps: diff --git a/.github/workflows/test-pip-install.yaml b/.github/workflows/test-pip-install.yaml index 6923add4..9780e175 100644 --- a/.github/workflows/test-pip-install.yaml +++ b/.github/workflows/test-pip-install.yaml @@ -30,11 +30,11 @@ jobs: matrix: # See https://github.com/actions/runner-images include: - - os: ubuntu-20.04 + - os: ubuntu-22.04 python-version: "3.7" - - os: ubuntu-20.04 + - os: ubuntu-22.04 python-version: "3.8" - - os: ubuntu-20.04 + - os: ubuntu-22.04 python-version: "3.9" - os: ubuntu-22.04 python-version: "3.10" @@ -45,7 +45,7 @@ jobs: - os: ubuntu-22.04 python-version: "3.13" - - os: macos-12 + - os: macos-13 python-version: "3.8" - os: macos-13 @@ -110,8 +110,8 @@ jobs: export PATH=/c/hostedtoolcache/windows/Python/3.9.13/x64/bin:$PATH export PATH=/c/hostedtoolcache/windows/Python/3.10.11/x64/bin:$PATH export PATH=/c/hostedtoolcache/windows/Python/3.11.9/x64/bin:$PATH - export PATH=/c/hostedtoolcache/windows/Python/3.12.9/x64/bin:$PATH - export PATH=/c/hostedtoolcache/windows/Python/3.13.2/x64/bin:$PATH + export PATH=/c/hostedtoolcache/windows/Python/3.12.10/x64/bin:$PATH + export PATH=/c/hostedtoolcache/windows/Python/3.13.3/x64/bin:$PATH sherpa-onnx --help sherpa-onnx-keyword-spotter --help diff --git a/.github/workflows/test-python-offline-websocket-server.yaml b/.github/workflows/test-python-offline-websocket-server.yaml index 4fa98464..d3e931ff 100644 --- a/.github/workflows/test-python-offline-websocket-server.yaml +++ b/.github/workflows/test-python-offline-websocket-server.yaml @@ -33,7 +33,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-20.04, ubuntu-22.04, windows-latest, macos-latest, macos-14] + os: [ubuntu-latest, ubuntu-22.04, windows-latest, macos-latest, macos-14] python-version: ["3.10"] model_type: ["transducer", "paraformer", "nemo_ctc", "whisper", "tdnn"] diff --git a/.github/workflows/test-python-online-websocket-server.yaml b/.github/workflows/test-python-online-websocket-server.yaml index d22e9300..f5afa584 100644 --- a/.github/workflows/test-python-online-websocket-server.yaml +++ b/.github/workflows/test-python-online-websocket-server.yaml @@ -33,7 +33,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-20.04, ubuntu-22.04, windows-latest, macos-latest, macos-14] + os: [ubuntu-latest, ubuntu-22.04, windows-latest, macos-latest, macos-14] python-version: ["3.10"] model_type: ["transducer", "paraformer", "zipformer2-ctc"] diff --git a/nodejs-examples/README.md b/nodejs-examples/README.md index 64f7b184..31af5cc2 100644 --- a/nodejs-examples/README.md +++ b/nodejs-examples/README.md @@ -182,10 +182,32 @@ tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2 node ./test-offline-paraformer.js ``` +## ./test-offline-sense-voice-with-hr.js + +[./test-offline-sense-voice-with-hr.js](./test-offline-sense-voice-with-hr.js) demonstrates +how to decode a file with a non-streaming SenseVoice model with homophone replacer. + +You can use the following command to run it: + +```bash +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 +tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 +rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2 +tar xf dict.tar.bz2 + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt + +node ./test-offline-sense-voice-with-hr.js +``` + ## ./test-offline-sense-voice.js [./test-offline-sense-voice.js](./test-offline-sense-voice.js) demonstrates -how to decode a file with a non-streaming Paraformer model. +how to decode a file with a non-streaming SenseVoice model. You can use the following command to run it: diff --git a/nodejs-examples/test-offline-sense-voice-with-hr.js b/nodejs-examples/test-offline-sense-voice-with-hr.js new file mode 100644 index 00000000..576f1335 --- /dev/null +++ b/nodejs-examples/test-offline-sense-voice-with-hr.js @@ -0,0 +1,40 @@ +// Copyright (c) 2024-2025 Xiaomi Corporation (authors: Fangjun Kuang) + +const sherpa_onnx = require('sherpa-onnx'); + +function createOfflineRecognizer() { + let modelConfig = { + senseVoice: { + model: + './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx', + language: '', + useInverseTextNormalization: 1, + }, + tokens: './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt', + }; + + let config = { + modelConfig: modelConfig, + hr: { + dictDir: './dict', + lexicon: './lexicon.txt', + ruleFsts: './replace.fst', + }, + }; + + return sherpa_onnx.createOfflineRecognizer(config); +} + +const recognizer = createOfflineRecognizer(); +const stream = recognizer.createStream(); + +const waveFilename = './test-hr.wav'; +const wave = sherpa_onnx.readWave(waveFilename); +stream.acceptWaveform(wave.sampleRate, wave.samples); + +recognizer.decode(stream); +const text = recognizer.getResult(stream).text; +console.log(text); + +stream.free(); +recognizer.free(); diff --git a/wasm/asr/sherpa-onnx-asr.js b/wasm/asr/sherpa-onnx-asr.js index ebcda516..3c6324ef 100644 --- a/wasm/asr/sherpa-onnx-asr.js +++ b/wasm/asr/sherpa-onnx-asr.js @@ -63,6 +63,10 @@ function freeConfig(config, Module) { freeConfig(config.ctcFstDecoder, Module) } + if ('hr' in config) { + freeConfig(config.hr, Module) + } + Module._free(config.ptr); } @@ -281,6 +285,34 @@ function initSherpaOnnxFeatureConfig(config, Module) { return {ptr: ptr, len: len}; } +function initSherpaOnnxHomophoneReplacerConfig(config, Module) { + const len = 3 * 4; + const ptr = Module._malloc(len); + + const dictDirLen = Module.lengthBytesUTF8(config.dictDir || '') + 1; + const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1; + const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1; + + const bufferLen = dictDirLen + lexiconLen + ruleFstsLen; + + const buffer = Module._malloc(bufferLen); + let offset = 0 + Module.stringToUTF8(config.dictDir || '', buffer + offset, dictDirLen); + offset += dictDirLen; + + Module.stringToUTF8(config.lexicon || '', buffer + offset, lexiconLen); + offset += lexiconLen; + + Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsLen); + offset += ruleFstsLen; + + Module.setValue(ptr, buffer, 'i8*'); + Module.setValue(ptr + 4, buffer + dictDirLen, 'i8*'); + Module.setValue(ptr + 8, buffer + dictDirLen + lexiconLen, 'i8*'); + + return {ptr: ptr, len: len, buffer: buffer}; +} + function initSherpaOnnxOnlineCtcFstDecoderConfig(config, Module) { const len = 2 * 4; const ptr = Module._malloc(len); @@ -317,12 +349,21 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) { config.hotwordsBufSize = 0; } + if (!('hr' in config)) { + config.hr = { + dictDir: '', + lexicon: '', + ruleFsts: '', + }; + } + const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module); const model = initSherpaOnnxOnlineModelConfig(config.modelConfig, Module); const ctcFstDecoder = initSherpaOnnxOnlineCtcFstDecoderConfig( config.ctcFstDecoderConfig, Module) + const hr = initSherpaOnnxHomophoneReplacerConfig(config.hr, Module); - const len = feat.len + model.len + 8 * 4 + ctcFstDecoder.len + 5 * 4; + const len = feat.len + model.len + 8 * 4 + ctcFstDecoder.len + 5 * 4 + hr.len; const ptr = Module._malloc(len); let offset = 0; @@ -411,9 +452,12 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) { Module.setValue(ptr + offset, config.hotwordsBufSize || 0, 'i32'); offset += 4; + Module._CopyHeap(hr.ptr, hr.len, ptr + offset); + offset += hr.len; + return { buffer: buffer, ptr: ptr, len: len, feat: feat, model: model, - ctcFstDecoder: ctcFstDecoder + ctcFstDecoder: ctcFstDecoder, hr: hr, } } @@ -989,11 +1033,20 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) { }; } + if (!('hr' in config)) { + config.hr = { + dictDir: '', + lexicon: '', + ruleFsts: '', + }; + } + const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module); const model = initSherpaOnnxOfflineModelConfig(config.modelConfig, Module); const lm = initSherpaOnnxOfflineLMConfig(config.lmConfig, Module); + const hr = initSherpaOnnxHomophoneReplacerConfig(config.hr, Module); - const len = feat.len + model.len + lm.len + 7 * 4; + const len = feat.len + model.len + lm.len + 7 * 4 + hr.len; const ptr = Module._malloc(len); let offset = 0; @@ -1056,8 +1109,12 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) { Module.setValue(ptr + offset, config.blankPenalty || 0, 'float'); offset += 4; + Module._CopyHeap(hr.ptr, hr.len, ptr + offset); + offset += hr.len; + return { - buffer: buffer, ptr: ptr, len: len, feat: feat, model: model, lm: lm + buffer: buffer, ptr: ptr, len: len, feat: feat, model: model, lm: lm, + hr: hr, } } diff --git a/wasm/asr/sherpa-onnx-wasm-main-asr.cc b/wasm/asr/sherpa-onnx-wasm-main-asr.cc index ffd90c20..f7af30e9 100644 --- a/wasm/asr/sherpa-onnx-wasm-main-asr.cc +++ b/wasm/asr/sherpa-onnx-wasm-main-asr.cc @@ -26,7 +26,8 @@ static_assert(sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) == 2 * 4, ""); static_assert(sizeof(SherpaOnnxOnlineRecognizerConfig) == sizeof(SherpaOnnxFeatureConfig) + sizeof(SherpaOnnxOnlineModelConfig) + 8 * 4 + - sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) + 5 * 4, + sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) + 5 * 4 + + sizeof(SherpaOnnxHomophoneReplacerConfig), ""); void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) { @@ -82,6 +83,11 @@ void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) { fprintf(stdout, "graph: %s\n", config->ctc_fst_decoder_config.graph); fprintf(stdout, "max_active: %d\n", config->ctc_fst_decoder_config.max_active); + + fprintf(stdout, "----------hr config----------\n"); + fprintf(stdout, "dict_dir: %s\n", config->hr.dict_dir); + fprintf(stdout, "lexicon: %s\n", config->hr.lexicon); + fprintf(stdout, "rule_fsts: %s\n", config->hr.rule_fsts); } void CopyHeap(const char *src, int32_t num_bytes, char *dst) { diff --git a/wasm/nodejs/sherpa-onnx-wasm-nodejs.cc b/wasm/nodejs/sherpa-onnx-wasm-nodejs.cc index 3dc5611d..0a5bb6f6 100644 --- a/wasm/nodejs/sherpa-onnx-wasm-nodejs.cc +++ b/wasm/nodejs/sherpa-onnx-wasm-nodejs.cc @@ -38,7 +38,8 @@ static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) == sizeof(SherpaOnnxFeatureConfig) + sizeof(SherpaOnnxOfflineLMConfig) + - sizeof(SherpaOnnxOfflineModelConfig) + 7 * 4, + sizeof(SherpaOnnxOfflineModelConfig) + 7 * 4 + + sizeof(SherpaOnnxHomophoneReplacerConfig), ""); void PrintOfflineTtsConfig(SherpaOnnxOfflineTtsConfig *tts_config) { @@ -137,6 +138,10 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { fprintf(stdout, "rule_fsts: %s\n", config->rule_fsts); fprintf(stdout, "rule_fars: %s\n", config->rule_fars); fprintf(stdout, "blank_penalty: %f\n", config->blank_penalty); + fprintf(stdout, "----------hr config----------\n"); + fprintf(stdout, "dict_dir: %s\n", config->hr.dict_dir); + fprintf(stdout, "lexicon: %s\n", config->hr.lexicon); + fprintf(stdout, "rule_fsts: %s\n", config->hr.rule_fsts); } void CopyHeap(const char *src, int32_t num_bytes, char *dst) {