Inverse text normalization API of streaming ASR for various programming languages (#1022)
This commit is contained in:
2
.github/scripts/test-dart.sh
vendored
2
.github/scripts/test-dart.sh
vendored
@@ -66,7 +66,9 @@ echo '----------streaming zipformer ctc----------'
|
|||||||
rm -rf sherpa-onnx-*
|
rm -rf sherpa-onnx-*
|
||||||
|
|
||||||
echo '----------streaming zipformer transducer----------'
|
echo '----------streaming zipformer transducer----------'
|
||||||
|
./run-zipformer-transducer-itn.sh
|
||||||
./run-zipformer-transducer.sh
|
./run-zipformer-transducer.sh
|
||||||
|
rm -f itn*
|
||||||
rm -rf sherpa-onnx-*
|
rm -rf sherpa-onnx-*
|
||||||
|
|
||||||
echo '----------streaming NeMo transducer----------'
|
echo '----------streaming NeMo transducer----------'
|
||||||
|
|||||||
13
.github/scripts/test-dot-net.sh
vendored
13
.github/scripts/test-dot-net.sh
vendored
@@ -2,7 +2,13 @@
|
|||||||
|
|
||||||
cd dotnet-examples/
|
cd dotnet-examples/
|
||||||
|
|
||||||
cd ./offline-decode-files
|
cd ./online-decode-files
|
||||||
|
./run-transducer-itn.sh
|
||||||
|
./run-zipformer2-ctc.sh
|
||||||
|
./run-transducer.sh
|
||||||
|
./run-paraformer.sh
|
||||||
|
|
||||||
|
cd ../offline-decode-files
|
||||||
./run-paraformer-itn.sh
|
./run-paraformer-itn.sh
|
||||||
./run-telespeech-ctc.sh
|
./run-telespeech-ctc.sh
|
||||||
./run-nemo-ctc.sh
|
./run-nemo-ctc.sh
|
||||||
@@ -27,11 +33,6 @@ cd ../streaming-hlg-decoding/
|
|||||||
cd ../spoken-language-identification
|
cd ../spoken-language-identification
|
||||||
./run.sh
|
./run.sh
|
||||||
|
|
||||||
cd ../online-decode-files
|
|
||||||
./run-zipformer2-ctc.sh
|
|
||||||
./run-transducer.sh
|
|
||||||
./run-paraformer.sh
|
|
||||||
|
|
||||||
cd ../offline-tts
|
cd ../offline-tts
|
||||||
./run-aishell3.sh
|
./run-aishell3.sh
|
||||||
./run-piper.sh
|
./run-piper.sh
|
||||||
|
|||||||
9
.github/scripts/test-nodejs-addon-npm.sh
vendored
9
.github/scripts/test-nodejs-addon-npm.sh
vendored
@@ -70,6 +70,13 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/s
|
|||||||
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
|
|
||||||
|
rm -f itn*
|
||||||
|
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
|
||||||
|
|
||||||
|
node test_asr_streaming_transducer_itn.js
|
||||||
|
|
||||||
node test_asr_streaming_transducer.js
|
node test_asr_streaming_transducer.js
|
||||||
|
|
||||||
rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
|
rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
|
||||||
@@ -120,6 +127,8 @@ rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
|
|||||||
|
|
||||||
node ./test_asr_non_streaming_paraformer.js
|
node ./test_asr_non_streaming_paraformer.js
|
||||||
|
|
||||||
|
rm -f itn*
|
||||||
|
|
||||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
||||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
|
||||||
|
|
||||||
|
|||||||
10
.github/scripts/test-nodejs-npm.sh
vendored
10
.github/scripts/test-nodejs-npm.sh
vendored
@@ -15,6 +15,8 @@ curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/s
|
|||||||
ls -lh
|
ls -lh
|
||||||
tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
|
tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
|
||||||
rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
|
rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
|
||||||
|
|
||||||
|
rm -f itn*
|
||||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
|
||||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
||||||
node ./test-offline-paraformer-itn.js
|
node ./test-offline-paraformer-itn.js
|
||||||
@@ -57,7 +59,15 @@ rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en
|
|||||||
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
|
|
||||||
|
rm -f itn*
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
||||||
|
|
||||||
|
node ./test-online-transducer-itn.js
|
||||||
|
|
||||||
node ./test-online-transducer.js
|
node ./test-online-transducer.js
|
||||||
|
|
||||||
rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
|
rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
|
||||||
|
|
||||||
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
|
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
|
||||||
|
|||||||
2
.github/workflows/build-wheels-aarch64.yaml
vendored
2
.github/workflows/build-wheels-aarch64.yaml
vendored
@@ -2,6 +2,8 @@ name: build-wheels-aarch64
|
|||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
|
branches:
|
||||||
|
- wheel
|
||||||
tags:
|
tags:
|
||||||
- 'v[0-9]+.[0-9]+.[0-9]+*'
|
- 'v[0-9]+.[0-9]+.[0-9]+*'
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|||||||
2
.github/workflows/build-wheels-armv7l.yaml
vendored
2
.github/workflows/build-wheels-armv7l.yaml
vendored
@@ -2,6 +2,8 @@ name: build-wheels-armv7l
|
|||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
|
branches:
|
||||||
|
- wheel
|
||||||
tags:
|
tags:
|
||||||
- 'v[0-9]+.[0-9]+.[0-9]+*'
|
- 'v[0-9]+.[0-9]+.[0-9]+*'
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|||||||
2
.github/workflows/build-wheels-linux.yaml
vendored
2
.github/workflows/build-wheels-linux.yaml
vendored
@@ -2,6 +2,8 @@ name: build-wheels-linux
|
|||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
|
branches:
|
||||||
|
- wheel
|
||||||
tags:
|
tags:
|
||||||
- 'v[0-9]+.[0-9]+.[0-9]+*'
|
- 'v[0-9]+.[0-9]+.[0-9]+*'
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|||||||
@@ -2,6 +2,8 @@ name: build-wheels-macos-arm64
|
|||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
|
branches:
|
||||||
|
- wheel
|
||||||
tags:
|
tags:
|
||||||
- 'v[0-9]+.[0-9]+.[0-9]+*'
|
- 'v[0-9]+.[0-9]+.[0-9]+*'
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
@@ -84,7 +86,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
opts='--break-system-packages'
|
opts='--break-system-packages'
|
||||||
v=${{ matrix.python-version }}
|
v=${{ matrix.python-version }}
|
||||||
if [[ $v == cp38 || $v == cp39 ]]; then
|
if [[ $v == cp37 || $v == cp38 || $v == cp39 ]]; then
|
||||||
opts=''
|
opts=''
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|||||||
@@ -101,7 +101,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
opts='--break-system-packages'
|
opts='--break-system-packages'
|
||||||
v=${{ matrix.python-version }}
|
v=${{ matrix.python-version }}
|
||||||
if [[ $v == cp38 || $v == cp39 ]]; then
|
if [[ $v == cp37 || $v == cp38 || $v == cp39 ]]; then
|
||||||
opts=''
|
opts=''
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|||||||
2
.github/workflows/build-wheels-win32.yaml
vendored
2
.github/workflows/build-wheels-win32.yaml
vendored
@@ -2,6 +2,8 @@ name: build-wheels-win32
|
|||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
|
branches:
|
||||||
|
- wheel
|
||||||
tags:
|
tags:
|
||||||
- 'v[0-9]+.[0-9]+.[0-9]+*'
|
- 'v[0-9]+.[0-9]+.[0-9]+*'
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|||||||
3
.github/workflows/run-java-test.yaml
vendored
3
.github/workflows/run-java-test.yaml
vendored
@@ -173,6 +173,9 @@ jobs:
|
|||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
cd ./java-api-examples
|
cd ./java-api-examples
|
||||||
|
./run-inverse-text-normalization-transducer.sh
|
||||||
|
rm -rf sherpa-onnx-streaming-*
|
||||||
|
|
||||||
./run-streaming-decode-file-ctc.sh
|
./run-streaming-decode-file-ctc.sh
|
||||||
# Delete model files to save space
|
# Delete model files to save space
|
||||||
rm -rf sherpa-onnx-streaming-*
|
rm -rf sherpa-onnx-streaming-*
|
||||||
|
|||||||
3
.github/workflows/test-go.yaml
vendored
3
.github/workflows/test-go.yaml
vendored
@@ -187,6 +187,9 @@ jobs:
|
|||||||
./run-transducer.sh
|
./run-transducer.sh
|
||||||
rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26
|
rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26
|
||||||
|
|
||||||
|
./run-transducer-itn.sh
|
||||||
|
rm -rf sherpa-onnx-streaming-*
|
||||||
|
|
||||||
echo "Test paraformer"
|
echo "Test paraformer"
|
||||||
./run-paraformer.sh
|
./run-paraformer.sh
|
||||||
rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en
|
rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en
|
||||||
|
|||||||
@@ -7,7 +7,8 @@ project(sherpa-onnx)
|
|||||||
# Remember to update
|
# Remember to update
|
||||||
# ./nodejs-addon-examples
|
# ./nodejs-addon-examples
|
||||||
# ./dart-api-examples/
|
# ./dart-api-examples/
|
||||||
set(SHERPA_ONNX_VERSION "1.9.30")
|
# ./sherpa-onnx/flutter/CHANGELOG.md
|
||||||
|
set(SHERPA_ONNX_VERSION "1.10.0")
|
||||||
|
|
||||||
# Disable warning about
|
# Disable warning about
|
||||||
#
|
#
|
||||||
|
|||||||
@@ -196,6 +196,9 @@ class MainActivity : AppCompatActivity() {
|
|||||||
// See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
// See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||||
// for a list of available models
|
// for a list of available models
|
||||||
val type = 0
|
val type = 0
|
||||||
|
var ruleFsts : String?
|
||||||
|
ruleFsts = null
|
||||||
|
|
||||||
Log.i(TAG, "Select model type $type")
|
Log.i(TAG, "Select model type $type")
|
||||||
val config = OnlineRecognizerConfig(
|
val config = OnlineRecognizerConfig(
|
||||||
featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
|
featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
|
||||||
@@ -205,6 +208,10 @@ class MainActivity : AppCompatActivity() {
|
|||||||
enableEndpoint = true,
|
enableEndpoint = true,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if (ruleFsts != null) {
|
||||||
|
config.ruleFsts = ruleFsts
|
||||||
|
}
|
||||||
|
|
||||||
recognizer = OnlineRecognizer(
|
recognizer = OnlineRecognizer(
|
||||||
assetManager = application.assets,
|
assetManager = application.assets,
|
||||||
config = config,
|
config = config,
|
||||||
|
|||||||
@@ -194,6 +194,8 @@ class MainActivity : AppCompatActivity() {
|
|||||||
// See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
// See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||||
// for a list of available models
|
// for a list of available models
|
||||||
val firstType = 9
|
val firstType = 9
|
||||||
|
val firstRuleFsts: String?
|
||||||
|
firstRuleFsts = null
|
||||||
Log.i(TAG, "Select model type $firstType for the first pass")
|
Log.i(TAG, "Select model type $firstType for the first pass")
|
||||||
val config = OnlineRecognizerConfig(
|
val config = OnlineRecognizerConfig(
|
||||||
featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
|
featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
|
||||||
@@ -201,6 +203,9 @@ class MainActivity : AppCompatActivity() {
|
|||||||
endpointConfig = getEndpointConfig(),
|
endpointConfig = getEndpointConfig(),
|
||||||
enableEndpoint = true,
|
enableEndpoint = true,
|
||||||
)
|
)
|
||||||
|
if (firstRuleFsts != null) {
|
||||||
|
config.ruleFsts = firstRuleFsts;
|
||||||
|
}
|
||||||
|
|
||||||
onlineRecognizer = OnlineRecognizer(
|
onlineRecognizer = OnlineRecognizer(
|
||||||
assetManager = application.assets,
|
assetManager = application.assets,
|
||||||
@@ -213,6 +218,8 @@ class MainActivity : AppCompatActivity() {
|
|||||||
// See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
// See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||||
// for a list of available models
|
// for a list of available models
|
||||||
val secondType = 0
|
val secondType = 0
|
||||||
|
var secondRuleFsts: String?
|
||||||
|
secondRuleFsts = null
|
||||||
Log.i(TAG, "Select model type $secondType for the second pass")
|
Log.i(TAG, "Select model type $secondType for the second pass")
|
||||||
|
|
||||||
val config = OfflineRecognizerConfig(
|
val config = OfflineRecognizerConfig(
|
||||||
@@ -220,6 +227,10 @@ class MainActivity : AppCompatActivity() {
|
|||||||
modelConfig = getOfflineModelConfig(type = secondType)!!,
|
modelConfig = getOfflineModelConfig(type = secondType)!!,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if (secondRuleFsts != null) {
|
||||||
|
config.ruleFsts = secondRuleFsts
|
||||||
|
}
|
||||||
|
|
||||||
offlineRecognizer = OfflineRecognizer(
|
offlineRecognizer = OfflineRecognizer(
|
||||||
assetManager = application.assets,
|
assetManager = application.assets,
|
||||||
config = config,
|
config = config,
|
||||||
|
|||||||
@@ -200,12 +200,17 @@ class MainActivity : AppCompatActivity() {
|
|||||||
// See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
// See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||||
// for a list of available models
|
// for a list of available models
|
||||||
val asrModelType = 0
|
val asrModelType = 0
|
||||||
|
val asrRuleFsts: String?
|
||||||
|
asrRuleFsts = null
|
||||||
Log.i(TAG, "Select model type ${asrModelType} for ASR")
|
Log.i(TAG, "Select model type ${asrModelType} for ASR")
|
||||||
|
|
||||||
val config = OfflineRecognizerConfig(
|
val config = OfflineRecognizerConfig(
|
||||||
featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
|
featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
|
||||||
modelConfig = getOfflineModelConfig(type = asrModelType)!!,
|
modelConfig = getOfflineModelConfig(type = asrModelType)!!,
|
||||||
)
|
)
|
||||||
|
if (asrRuleFsts != null) {
|
||||||
|
config.ruleFsts = asrRuleFsts;
|
||||||
|
}
|
||||||
|
|
||||||
offlineRecognizer = OfflineRecognizer(
|
offlineRecognizer = OfflineRecognizer(
|
||||||
assetManager = application.assets,
|
assetManager = application.assets,
|
||||||
|
|||||||
@@ -63,23 +63,15 @@ function(download_kaldi_decoder)
|
|||||||
kaldi-decoder-core
|
kaldi-decoder-core
|
||||||
kaldifst_core
|
kaldifst_core
|
||||||
fst
|
fst
|
||||||
|
fstfar
|
||||||
DESTINATION ..)
|
DESTINATION ..)
|
||||||
if(SHERPA_ONNX_ENABLE_TTS)
|
|
||||||
install(TARGETS
|
|
||||||
fstfar
|
|
||||||
DESTINATION ..)
|
|
||||||
endif()
|
|
||||||
else()
|
else()
|
||||||
install(TARGETS
|
install(TARGETS
|
||||||
kaldi-decoder-core
|
kaldi-decoder-core
|
||||||
kaldifst_core
|
kaldifst_core
|
||||||
fst
|
fst
|
||||||
|
fstfar
|
||||||
DESTINATION lib)
|
DESTINATION lib)
|
||||||
if(SHERPA_ONNX_ENABLE_TTS)
|
|
||||||
install(TARGETS
|
|
||||||
fstfar
|
|
||||||
DESTINATION lib)
|
|
||||||
endif()
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(WIN32 AND BUILD_SHARED_LIBS)
|
if(WIN32 AND BUILD_SHARED_LIBS)
|
||||||
@@ -87,12 +79,8 @@ function(download_kaldi_decoder)
|
|||||||
kaldi-decoder-core
|
kaldi-decoder-core
|
||||||
kaldifst_core
|
kaldifst_core
|
||||||
fst
|
fst
|
||||||
|
fstfar
|
||||||
DESTINATION bin)
|
DESTINATION bin)
|
||||||
if(SHERPA_ONNX_ENABLE_TTS)
|
|
||||||
install(TARGETS
|
|
||||||
fstfar
|
|
||||||
DESTINATION bin)
|
|
||||||
endif()
|
|
||||||
endif()
|
endif()
|
||||||
endfunction()
|
endfunction()
|
||||||
|
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ environment:
|
|||||||
|
|
||||||
# Add regular dependencies here.
|
# Add regular dependencies here.
|
||||||
dependencies:
|
dependencies:
|
||||||
sherpa_onnx: ^1.9.30
|
sherpa_onnx: ^1.10.0
|
||||||
path: ^1.9.0
|
path: ^1.9.0
|
||||||
args: ^2.5.0
|
args: ^2.5.0
|
||||||
|
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ void main(List<String> arguments) async {
|
|||||||
..addOption('decoder', help: 'Path to decoder model')
|
..addOption('decoder', help: 'Path to decoder model')
|
||||||
..addOption('joiner', help: 'Path to joiner model')
|
..addOption('joiner', help: 'Path to joiner model')
|
||||||
..addOption('tokens', help: 'Path to tokens.txt')
|
..addOption('tokens', help: 'Path to tokens.txt')
|
||||||
|
..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '')
|
||||||
..addOption('input-wav', help: 'Path to input.wav to transcribe');
|
..addOption('input-wav', help: 'Path to input.wav to transcribe');
|
||||||
|
|
||||||
final res = parser.parse(arguments);
|
final res = parser.parse(arguments);
|
||||||
@@ -31,6 +32,7 @@ void main(List<String> arguments) async {
|
|||||||
final decoder = res['decoder'] as String;
|
final decoder = res['decoder'] as String;
|
||||||
final joiner = res['joiner'] as String;
|
final joiner = res['joiner'] as String;
|
||||||
final tokens = res['tokens'] as String;
|
final tokens = res['tokens'] as String;
|
||||||
|
final ruleFsts = res['rule-fsts'] as String;
|
||||||
final inputWav = res['input-wav'] as String;
|
final inputWav = res['input-wav'] as String;
|
||||||
|
|
||||||
final transducer = sherpa_onnx.OnlineTransducerModelConfig(
|
final transducer = sherpa_onnx.OnlineTransducerModelConfig(
|
||||||
@@ -45,7 +47,10 @@ void main(List<String> arguments) async {
|
|||||||
debug: true,
|
debug: true,
|
||||||
numThreads: 1,
|
numThreads: 1,
|
||||||
);
|
);
|
||||||
final config = sherpa_onnx.OnlineRecognizerConfig(model: modelConfig);
|
final config = sherpa_onnx.OnlineRecognizerConfig(
|
||||||
|
model: modelConfig,
|
||||||
|
ruleFsts: ruleFsts,
|
||||||
|
);
|
||||||
final recognizer = sherpa_onnx.OnlineRecognizer(config);
|
final recognizer = sherpa_onnx.OnlineRecognizer(config);
|
||||||
|
|
||||||
final waveData = sherpa_onnx.readWave(inputWav);
|
final waveData = sherpa_onnx.readWave(inputWav);
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ environment:
|
|||||||
|
|
||||||
# Add regular dependencies here.
|
# Add regular dependencies here.
|
||||||
dependencies:
|
dependencies:
|
||||||
sherpa_onnx: ^1.9.30
|
sherpa_onnx: ^1.10.0
|
||||||
path: ^1.9.0
|
path: ^1.9.0
|
||||||
args: ^2.5.0
|
args: ^2.5.0
|
||||||
|
|
||||||
|
|||||||
28
dart-api-examples/streaming-asr/run-zipformer-transducer-itn.sh
Executable file
28
dart-api-examples/streaming-asr/run-zipformer-transducer-itn.sh
Executable file
@@ -0,0 +1,28 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
dart pub get
|
||||||
|
|
||||||
|
if [ ! -f ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
|
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./itn-zh-number.wav ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./itn_zh_number.fst ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
||||||
|
fi
|
||||||
|
|
||||||
|
dart run \
|
||||||
|
./bin/zipformer-transducer.dart \
|
||||||
|
--encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \
|
||||||
|
--decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \
|
||||||
|
--joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx \
|
||||||
|
--tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
|
||||||
|
--rule-fsts ./itn_zh_number.fst \
|
||||||
|
--input-wav ./itn-zh-number.wav
|
||||||
@@ -8,7 +8,7 @@ environment:
|
|||||||
|
|
||||||
# Add regular dependencies here.
|
# Add regular dependencies here.
|
||||||
dependencies:
|
dependencies:
|
||||||
sherpa_onnx: ^1.9.30
|
sherpa_onnx: ^1.10.0
|
||||||
path: ^1.9.0
|
path: ^1.9.0
|
||||||
args: ^2.5.0
|
args: ^2.5.0
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ environment:
|
|||||||
sdk: ^3.4.0
|
sdk: ^3.4.0
|
||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
sherpa_onnx: ^1.9.30
|
sherpa_onnx: ^1.10.0
|
||||||
path: ^1.9.0
|
path: ^1.9.0
|
||||||
args: ^2.5.0
|
args: ^2.5.0
|
||||||
|
|
||||||
|
|||||||
@@ -85,6 +85,10 @@ larger than this value. Used only when --enable-endpoint is true.")]
|
|||||||
[Option("hotwords-score", Required = false, Default = 1.5F, HelpText = "hotwords score")]
|
[Option("hotwords-score", Required = false, Default = 1.5F, HelpText = "hotwords score")]
|
||||||
public float HotwordsScore { get; set; }
|
public float HotwordsScore { get; set; }
|
||||||
|
|
||||||
|
[Option("rule-fsts", Required = false, Default = "",
|
||||||
|
HelpText = "If not empty, path to rule fst for inverse text normalization")]
|
||||||
|
public string RuleFsts { get; set; }
|
||||||
|
|
||||||
|
|
||||||
[Option("files", Required = true, HelpText = "Audio files for decoding")]
|
[Option("files", Required = true, HelpText = "Audio files for decoding")]
|
||||||
public IEnumerable<string> Files { get; set; }
|
public IEnumerable<string> Files { get; set; }
|
||||||
@@ -189,6 +193,7 @@ to download pre-trained streaming models.
|
|||||||
config.Rule3MinUtteranceLength = options.Rule3MinUtteranceLength;
|
config.Rule3MinUtteranceLength = options.Rule3MinUtteranceLength;
|
||||||
config.HotwordsFile = options.HotwordsFile;
|
config.HotwordsFile = options.HotwordsFile;
|
||||||
config.HotwordsScore = options.HotwordsScore;
|
config.HotwordsScore = options.HotwordsScore;
|
||||||
|
config.RuleFsts = options.RuleFsts;
|
||||||
|
|
||||||
OnlineRecognizer recognizer = new OnlineRecognizer(config);
|
OnlineRecognizer recognizer = new OnlineRecognizer(config);
|
||||||
|
|
||||||
|
|||||||
28
dotnet-examples/online-decode-files/run-transducer-itn.sh
Executable file
28
dotnet-examples/online-decode-files/run-transducer-itn.sh
Executable file
@@ -0,0 +1,28 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
# Please refer to
|
||||||
|
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
|
||||||
|
# to download the model files
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
if [ ! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
|
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./itn-zh-number.wav ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./itn_zh_number.fst ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
||||||
|
fi
|
||||||
|
|
||||||
|
dotnet run -c Release \
|
||||||
|
--tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
|
||||||
|
--encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \
|
||||||
|
--decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.int8.onnx \
|
||||||
|
--joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx \
|
||||||
|
--decoding-method greedy_search \
|
||||||
|
--files ./itn-zh-number.wav
|
||||||
@@ -30,6 +30,8 @@ func main() {
|
|||||||
flag.StringVar(&config.ModelConfig.Provider, "provider", "cpu", "Provider to use")
|
flag.StringVar(&config.ModelConfig.Provider, "provider", "cpu", "Provider to use")
|
||||||
flag.StringVar(&config.DecodingMethod, "decoding-method", "greedy_search", "Decoding method. Possible values: greedy_search, modified_beam_search")
|
flag.StringVar(&config.DecodingMethod, "decoding-method", "greedy_search", "Decoding method. Possible values: greedy_search, modified_beam_search")
|
||||||
flag.IntVar(&config.MaxActivePaths, "max-active-paths", 4, "Used only when --decoding-method is modified_beam_search")
|
flag.IntVar(&config.MaxActivePaths, "max-active-paths", 4, "Used only when --decoding-method is modified_beam_search")
|
||||||
|
flag.StringVar(&config.RuleFsts, "rule-fsts", "", "If not empty, path to rule fst for inverse text normalization")
|
||||||
|
flag.StringVar(&config.RuleFars, "rule-fars", "", "If not empty, path to rule fst archives for inverse text normalization")
|
||||||
|
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
|
|||||||
30
go-api-examples/streaming-decode-files/run-transducer-itn.sh
Executable file
30
go-api-examples/streaming-decode-files/run-transducer-itn.sh
Executable file
@@ -0,0 +1,30 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
if [ ! -f ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
|
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./itn-zh-number.wav ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./itn_zh_number.fst ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
||||||
|
fi
|
||||||
|
|
||||||
|
go mod tidy
|
||||||
|
go build
|
||||||
|
|
||||||
|
./streaming-decode-files \
|
||||||
|
--encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \
|
||||||
|
--decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \
|
||||||
|
--joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx \
|
||||||
|
--tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
|
||||||
|
--model-type zipformer \
|
||||||
|
--rule-fsts ./itn_zh_number.fst \
|
||||||
|
--debug 0 \
|
||||||
|
./itn-zh-number.wav
|
||||||
@@ -0,0 +1,68 @@
|
|||||||
|
// Copyright 2024 Xiaomi Corporation
|
||||||
|
|
||||||
|
// This file shows how to use a streaming transducer
|
||||||
|
// to decode files with inverse text normalization.
|
||||||
|
import com.k2fsa.sherpa.onnx.*;
|
||||||
|
|
||||||
|
public class InverseTextNormalizationStreamingTransducer {
|
||||||
|
public static void main(String[] args) {
|
||||||
|
// please refer to
|
||||||
|
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
|
||||||
|
// to download model files
|
||||||
|
String encoder =
|
||||||
|
"./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx";
|
||||||
|
String decoder =
|
||||||
|
"./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx";
|
||||||
|
String joiner =
|
||||||
|
"./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx";
|
||||||
|
String tokens = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt";
|
||||||
|
|
||||||
|
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
|
||||||
|
String waveFilename = "./itn-zh-number.wav";
|
||||||
|
|
||||||
|
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
||||||
|
String ruleFsts = "./itn_zh_number.fst";
|
||||||
|
|
||||||
|
WaveReader reader = new WaveReader(waveFilename);
|
||||||
|
|
||||||
|
OnlineTransducerModelConfig transducer =
|
||||||
|
OnlineTransducerModelConfig.builder()
|
||||||
|
.setEncoder(encoder)
|
||||||
|
.setDecoder(decoder)
|
||||||
|
.setJoiner(joiner)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
OnlineModelConfig modelConfig =
|
||||||
|
OnlineModelConfig.builder()
|
||||||
|
.setTransducer(transducer)
|
||||||
|
.setTokens(tokens)
|
||||||
|
.setNumThreads(1)
|
||||||
|
.setDebug(true)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
OnlineRecognizerConfig config =
|
||||||
|
OnlineRecognizerConfig.builder()
|
||||||
|
.setOnlineModelConfig(modelConfig)
|
||||||
|
.setDecodingMethod("greedy_search")
|
||||||
|
.setRuleFsts(ruleFsts)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
OnlineRecognizer recognizer = new OnlineRecognizer(config);
|
||||||
|
OnlineStream stream = recognizer.createStream();
|
||||||
|
stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());
|
||||||
|
|
||||||
|
float[] tailPaddings = new float[(int) (0.8 * reader.getSampleRate())];
|
||||||
|
stream.acceptWaveform(tailPaddings, reader.getSampleRate());
|
||||||
|
|
||||||
|
while (recognizer.isReady(stream)) {
|
||||||
|
recognizer.decode(stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
String text = recognizer.getResult(stream).getText();
|
||||||
|
|
||||||
|
System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);
|
||||||
|
|
||||||
|
stream.release();
|
||||||
|
recognizer.release();
|
||||||
|
}
|
||||||
|
}
|
||||||
45
java-api-examples/run-inverse-text-normalization-transducer.sh
Executable file
45
java-api-examples/run-inverse-text-normalization-transducer.sh
Executable file
@@ -0,0 +1,45 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
|
||||||
|
mkdir -p ../build
|
||||||
|
pushd ../build
|
||||||
|
cmake \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||||
|
-DBUILD_SHARED_LIBS=ON \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_JNI=ON \
|
||||||
|
..
|
||||||
|
|
||||||
|
make -j4
|
||||||
|
ls -lh lib
|
||||||
|
popd
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
|
||||||
|
pushd ../sherpa-onnx/java-api
|
||||||
|
make
|
||||||
|
popd
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
|
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./itn-zh-number.wav ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./itn_zh_number.fst ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
||||||
|
fi
|
||||||
|
|
||||||
|
java \
|
||||||
|
-Djava.library.path=$PWD/../build/lib \
|
||||||
|
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
|
||||||
|
InverseTextNormalizationStreamingTransducer.java
|
||||||
@@ -203,7 +203,7 @@ function testOfflineAsr() {
|
|||||||
java -Djava.library.path=../build/lib -jar $out_filename
|
java -Djava.library.path=../build/lib -jar $out_filename
|
||||||
}
|
}
|
||||||
|
|
||||||
function testInverseTextNormalizationAsr() {
|
function testInverseTextNormalizationOfflineAsr() {
|
||||||
if [ ! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt ]; then
|
if [ ! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt ]; then
|
||||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
|
||||||
tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
|
tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
|
||||||
@@ -218,9 +218,9 @@ function testInverseTextNormalizationAsr() {
|
|||||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
||||||
fi
|
fi
|
||||||
|
|
||||||
out_filename=test_offline_asr.jar
|
out_filename=test_itn_offline_asr.jar
|
||||||
kotlinc-jvm -include-runtime -d $out_filename \
|
kotlinc-jvm -include-runtime -d $out_filename \
|
||||||
test_itn_asr.kt \
|
test_itn_offline_asr.kt \
|
||||||
FeatureConfig.kt \
|
FeatureConfig.kt \
|
||||||
OfflineRecognizer.kt \
|
OfflineRecognizer.kt \
|
||||||
OfflineStream.kt \
|
OfflineStream.kt \
|
||||||
@@ -231,6 +231,34 @@ function testInverseTextNormalizationAsr() {
|
|||||||
java -Djava.library.path=../build/lib -jar $out_filename
|
java -Djava.library.path=../build/lib -jar $out_filename
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function testInverseTextNormalizationOnlineAsr() {
|
||||||
|
if [ ! -f ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
|
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./itn-zh-number.wav ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./itn_zh_number.fst ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
||||||
|
fi
|
||||||
|
|
||||||
|
out_filename=test_itn_online_asr.jar
|
||||||
|
kotlinc-jvm -include-runtime -d $out_filename \
|
||||||
|
test_itn_online_asr.kt \
|
||||||
|
FeatureConfig.kt \
|
||||||
|
OnlineRecognizer.kt \
|
||||||
|
OnlineStream.kt \
|
||||||
|
WaveReader.kt \
|
||||||
|
faked-asset-manager.kt
|
||||||
|
|
||||||
|
ls -lh $out_filename
|
||||||
|
java -Djava.library.path=../build/lib -jar $out_filename
|
||||||
|
}
|
||||||
|
|
||||||
function testPunctuation() {
|
function testPunctuation() {
|
||||||
if [ ! -f ./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx ]; then
|
if [ ! -f ./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx ]; then
|
||||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
|
||||||
@@ -257,4 +285,5 @@ testAudioTagging
|
|||||||
testSpokenLanguageIdentification
|
testSpokenLanguageIdentification
|
||||||
testOfflineAsr
|
testOfflineAsr
|
||||||
testPunctuation
|
testPunctuation
|
||||||
testInverseTextNormalizationAsr
|
testInverseTextNormalizationOfflineAsr
|
||||||
|
testInverseTextNormalizationOnlineAsr
|
||||||
|
|||||||
41
kotlin-api-examples/test_itn_online_asr.kt
Normal file
41
kotlin-api-examples/test_itn_online_asr.kt
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
package com.k2fsa.sherpa.onnx
|
||||||
|
|
||||||
|
fun main() {
|
||||||
|
test()
|
||||||
|
}
|
||||||
|
|
||||||
|
fun test() {
|
||||||
|
val recognizer = createOnlineRecognizer()
|
||||||
|
val waveFilename = "./itn-zh-number.wav";
|
||||||
|
|
||||||
|
val objArray = WaveReader.readWaveFromFile(
|
||||||
|
filename = waveFilename,
|
||||||
|
)
|
||||||
|
val samples: FloatArray = objArray[0] as FloatArray
|
||||||
|
val sampleRate: Int = objArray[1] as Int
|
||||||
|
|
||||||
|
val stream = recognizer.createStream()
|
||||||
|
stream.acceptWaveform(samples, sampleRate=sampleRate)
|
||||||
|
while (recognizer.isReady(stream)) {
|
||||||
|
recognizer.decode(stream)
|
||||||
|
}
|
||||||
|
|
||||||
|
val result = recognizer.getResult(stream).text
|
||||||
|
println(result)
|
||||||
|
|
||||||
|
stream.release()
|
||||||
|
recognizer.release()
|
||||||
|
}
|
||||||
|
|
||||||
|
fun createOnlineRecognizer(): OnlineRecognizer {
|
||||||
|
val config = OnlineRecognizerConfig(
|
||||||
|
featConfig = getFeatureConfig(sampleRate = 16000, featureDim = 80),
|
||||||
|
modelConfig = getModelConfig(8)!!,
|
||||||
|
)
|
||||||
|
|
||||||
|
config.ruleFsts = "./itn_zh_number.fst"
|
||||||
|
println(config)
|
||||||
|
|
||||||
|
return OnlineRecognizer(config = config)
|
||||||
|
}
|
||||||
|
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
{
|
{
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"sherpa-onnx-node": "^1.9.30"
|
"sherpa-onnx-node": "^1.10.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
59
nodejs-addon-examples/test_asr_streaming_transducer_itn.js
Normal file
59
nodejs-addon-examples/test_asr_streaming_transducer_itn.js
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
// Copyright (c) 2024 Xiaomi Corporation
|
||||||
|
const sherpa_onnx = require('sherpa-onnx-node');
|
||||||
|
|
||||||
|
// Please download test files from
|
||||||
|
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
const config = {
|
||||||
|
'featConfig': {
|
||||||
|
'sampleRate': 16000,
|
||||||
|
'featureDim': 80,
|
||||||
|
},
|
||||||
|
'modelConfig': {
|
||||||
|
'transducer': {
|
||||||
|
'encoder':
|
||||||
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx',
|
||||||
|
'decoder':
|
||||||
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx',
|
||||||
|
'joiner':
|
||||||
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx',
|
||||||
|
},
|
||||||
|
'tokens':
|
||||||
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt',
|
||||||
|
'numThreads': 2,
|
||||||
|
'provider': 'cpu',
|
||||||
|
'debug': 1,
|
||||||
|
},
|
||||||
|
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
||||||
|
ruleFsts: './itn_zh_number.fst',
|
||||||
|
};
|
||||||
|
|
||||||
|
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
|
||||||
|
const waveFilename = './itn-zh-number.wav';
|
||||||
|
|
||||||
|
const recognizer = new sherpa_onnx.OnlineRecognizer(config);
|
||||||
|
console.log('Started')
|
||||||
|
let start = Date.now();
|
||||||
|
const stream = recognizer.createStream();
|
||||||
|
const wave = sherpa_onnx.readWave(waveFilename);
|
||||||
|
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
|
||||||
|
|
||||||
|
const tailPadding = new Float32Array(wave.sampleRate * 0.4);
|
||||||
|
stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});
|
||||||
|
|
||||||
|
while (recognizer.isReady(stream)) {
|
||||||
|
recognizer.decode(stream);
|
||||||
|
}
|
||||||
|
result = recognizer.getResult(stream)
|
||||||
|
let stop = Date.now();
|
||||||
|
console.log('Done')
|
||||||
|
|
||||||
|
const elapsed_seconds = (stop - start) / 1000;
|
||||||
|
const duration = wave.samples.length / wave.sampleRate;
|
||||||
|
const real_time_factor = elapsed_seconds / duration;
|
||||||
|
console.log('Wave duration', duration.toFixed(3), 'secodns')
|
||||||
|
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
|
||||||
|
console.log(
|
||||||
|
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
|
||||||
|
real_time_factor.toFixed(3))
|
||||||
|
console.log(waveFilename)
|
||||||
|
console.log('result\n', result)
|
||||||
@@ -0,0 +1,88 @@
|
|||||||
|
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
//
|
||||||
|
const portAudio = require('naudiodon2');
|
||||||
|
// console.log(portAudio.getDevices());
|
||||||
|
|
||||||
|
const sherpa_onnx = require('sherpa-onnx-node');
|
||||||
|
|
||||||
|
function createOnlineRecognizer() {
|
||||||
|
const config = {
|
||||||
|
'featConfig': {
|
||||||
|
'sampleRate': 16000,
|
||||||
|
'featureDim': 80,
|
||||||
|
},
|
||||||
|
'modelConfig': {
|
||||||
|
'transducer': {
|
||||||
|
'encoder':
|
||||||
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx',
|
||||||
|
'decoder':
|
||||||
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx',
|
||||||
|
'joiner':
|
||||||
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx',
|
||||||
|
},
|
||||||
|
'tokens':
|
||||||
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt',
|
||||||
|
'numThreads': 2,
|
||||||
|
'provider': 'cpu',
|
||||||
|
'debug': 1,
|
||||||
|
},
|
||||||
|
'decodingMethod': 'greedy_search',
|
||||||
|
'maxActivePaths': 4,
|
||||||
|
'enableEndpoint': true,
|
||||||
|
'rule1MinTrailingSilence': 2.4,
|
||||||
|
'rule2MinTrailingSilence': 1.2,
|
||||||
|
'rule3MinUtteranceLength': 20,
|
||||||
|
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
||||||
|
ruleFsts: './itn_zh_number.fst',
|
||||||
|
};
|
||||||
|
|
||||||
|
return new sherpa_onnx.OnlineRecognizer(config);
|
||||||
|
}
|
||||||
|
|
||||||
|
const recognizer = createOnlineRecognizer();
|
||||||
|
const stream = recognizer.createStream();
|
||||||
|
|
||||||
|
let lastText = '';
|
||||||
|
let segmentIndex = 0;
|
||||||
|
|
||||||
|
const ai = new portAudio.AudioIO({
|
||||||
|
inOptions: {
|
||||||
|
channelCount: 1,
|
||||||
|
closeOnError: true, // Close the stream if an audio error is detected, if
|
||||||
|
// set false then just log the error
|
||||||
|
deviceId: -1, // Use -1 or omit the deviceId to select the default device
|
||||||
|
sampleFormat: portAudio.SampleFormatFloat32,
|
||||||
|
sampleRate: recognizer.config.featConfig.sampleRate
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const display = new sherpa_onnx.Display(50);
|
||||||
|
|
||||||
|
ai.on('data', data => {
|
||||||
|
const samples = new Float32Array(data.buffer);
|
||||||
|
|
||||||
|
stream.acceptWaveform(
|
||||||
|
{sampleRate: recognizer.config.featConfig.sampleRate, samples: samples});
|
||||||
|
|
||||||
|
while (recognizer.isReady(stream)) {
|
||||||
|
recognizer.decode(stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
const isEndpoint = recognizer.isEndpoint(stream);
|
||||||
|
const text = recognizer.getResult(stream).text.toLowerCase();
|
||||||
|
|
||||||
|
if (text.length > 0 && lastText != text) {
|
||||||
|
lastText = text;
|
||||||
|
display.print(segmentIndex, lastText);
|
||||||
|
}
|
||||||
|
if (isEndpoint) {
|
||||||
|
if (text.length > 0) {
|
||||||
|
lastText = text;
|
||||||
|
segmentIndex += 1;
|
||||||
|
}
|
||||||
|
recognizer.reset(stream)
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
ai.start();
|
||||||
|
console.log('Started! Please speak')
|
||||||
131
nodejs-examples/test-online-transducer-itn.js
Normal file
131
nodejs-examples/test-online-transducer-itn.js
Normal file
@@ -0,0 +1,131 @@
|
|||||||
|
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
//
|
||||||
|
const fs = require('fs');
|
||||||
|
const {Readable} = require('stream');
|
||||||
|
const wav = require('wav');
|
||||||
|
|
||||||
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
|
function createOnlineRecognizer() {
|
||||||
|
let onlineTransducerModelConfig = {
|
||||||
|
encoder:
|
||||||
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx',
|
||||||
|
decoder:
|
||||||
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx',
|
||||||
|
joiner:
|
||||||
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx',
|
||||||
|
};
|
||||||
|
|
||||||
|
let onlineParaformerModelConfig = {
|
||||||
|
encoder: '',
|
||||||
|
decoder: '',
|
||||||
|
};
|
||||||
|
|
||||||
|
let onlineZipformer2CtcModelConfig = {
|
||||||
|
model: '',
|
||||||
|
};
|
||||||
|
|
||||||
|
let onlineModelConfig = {
|
||||||
|
transducer: onlineTransducerModelConfig,
|
||||||
|
paraformer: onlineParaformerModelConfig,
|
||||||
|
zipformer2Ctc: onlineZipformer2CtcModelConfig,
|
||||||
|
tokens:
|
||||||
|
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt',
|
||||||
|
numThreads: 1,
|
||||||
|
provider: 'cpu',
|
||||||
|
debug: 1,
|
||||||
|
modelType: 'zipformer',
|
||||||
|
};
|
||||||
|
|
||||||
|
let featureConfig = {
|
||||||
|
sampleRate: 16000,
|
||||||
|
featureDim: 80,
|
||||||
|
};
|
||||||
|
|
||||||
|
let recognizerConfig = {
|
||||||
|
featConfig: featureConfig,
|
||||||
|
modelConfig: onlineModelConfig,
|
||||||
|
decodingMethod: 'greedy_search',
|
||||||
|
maxActivePaths: 4,
|
||||||
|
enableEndpoint: 1,
|
||||||
|
rule1MinTrailingSilence: 2.4,
|
||||||
|
rule2MinTrailingSilence: 1.2,
|
||||||
|
rule3MinUtteranceLength: 20,
|
||||||
|
hotwordsFile: '',
|
||||||
|
hotwordsScore: 1.5,
|
||||||
|
ctcFstDecoderConfig: {
|
||||||
|
graph: '',
|
||||||
|
maxActive: 3000,
|
||||||
|
},
|
||||||
|
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
||||||
|
ruleFsts: './itn_zh_number.fst',
|
||||||
|
};
|
||||||
|
|
||||||
|
return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
|
||||||
|
}
|
||||||
|
|
||||||
|
const recognizer = createOnlineRecognizer();
|
||||||
|
const stream = recognizer.createStream();
|
||||||
|
|
||||||
|
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
|
||||||
|
const waveFilename = './itn-zh-number.wav';
|
||||||
|
|
||||||
|
const reader = new wav.Reader();
|
||||||
|
const readable = new Readable().wrap(reader);
|
||||||
|
|
||||||
|
function decode(samples) {
|
||||||
|
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, samples);
|
||||||
|
|
||||||
|
while (recognizer.isReady(stream)) {
|
||||||
|
recognizer.decode(stream);
|
||||||
|
}
|
||||||
|
const text = recognizer.getResult(stream).text;
|
||||||
|
console.log(text);
|
||||||
|
}
|
||||||
|
|
||||||
|
reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
|
||||||
|
if (sampleRate != recognizer.config.featConfig.sampleRate) {
|
||||||
|
throw new Error(`Only support sampleRate ${
|
||||||
|
recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (audioFormat != 1) {
|
||||||
|
throw new Error(`Only support PCM format. Given ${audioFormat}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (channels != 1) {
|
||||||
|
throw new Error(`Only a single channel. Given ${channel}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bitDepth != 16) {
|
||||||
|
throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
fs.createReadStream(waveFilename, {'highWaterMark': 4096})
|
||||||
|
.pipe(reader)
|
||||||
|
.on('finish', function(err) {
|
||||||
|
// tail padding
|
||||||
|
const floatSamples =
|
||||||
|
new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
|
||||||
|
decode(floatSamples);
|
||||||
|
stream.free();
|
||||||
|
recognizer.free();
|
||||||
|
});
|
||||||
|
|
||||||
|
readable.on('readable', function() {
|
||||||
|
let chunk;
|
||||||
|
while ((chunk = readable.read()) != null) {
|
||||||
|
const int16Samples = new Int16Array(
|
||||||
|
chunk.buffer, chunk.byteOffset,
|
||||||
|
chunk.length / Int16Array.BYTES_PER_ELEMENT);
|
||||||
|
|
||||||
|
const floatSamples = new Float32Array(int16Samples.length);
|
||||||
|
|
||||||
|
for (let i = 0; i < floatSamples.length; i++) {
|
||||||
|
floatSamples[i] = int16Samples[i] / 32768.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
decode(floatSamples);
|
||||||
|
}
|
||||||
|
});
|
||||||
@@ -71,6 +71,17 @@ git checkout .
|
|||||||
pushd android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx
|
pushd android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx
|
||||||
sed -i.bak s/"firstType = 9/firstType = $type1/" ./MainActivity.kt
|
sed -i.bak s/"firstType = 9/firstType = $type1/" ./MainActivity.kt
|
||||||
sed -i.bak s/"secondType = 0/secondType = $type2/" ./MainActivity.kt
|
sed -i.bak s/"secondType = 0/secondType = $type2/" ./MainActivity.kt
|
||||||
|
|
||||||
|
{% if first.rule_fsts %}
|
||||||
|
rule_fsts={{ first.rule_fsts }}
|
||||||
|
sed -i.bak s%"firstRuleFsts = null"%"firstRuleFsts = \"$rule_fsts\""% ./MainActivity.kt
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if second.rule_fsts %}
|
||||||
|
rule_fsts={{ second.rule_fsts }}
|
||||||
|
sed -i.bak s%"secondRuleFsts = null"%"secondRuleFsts = \"$rule_fsts\""% ./MainActivity.kt
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
git diff
|
git diff
|
||||||
popd
|
popd
|
||||||
|
|
||||||
|
|||||||
@@ -54,6 +54,12 @@ popd
|
|||||||
git checkout .
|
git checkout .
|
||||||
pushd android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx
|
pushd android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx
|
||||||
sed -i.bak s/"type = 0/type = $type/" ./MainActivity.kt
|
sed -i.bak s/"type = 0/type = $type/" ./MainActivity.kt
|
||||||
|
|
||||||
|
{% if model.rule_fsts %}
|
||||||
|
rule_fsts={{ model.rule_fsts }}
|
||||||
|
sed -i.bak s%"ruleFsts = null"%"ruleFsts = \"$rule_fsts\""% ./MainActivity.kt
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
git diff
|
git diff
|
||||||
popd
|
popd
|
||||||
|
|
||||||
@@ -84,6 +90,7 @@ for arch in arm64-v8a armeabi-v7a x86_64 x86; do
|
|||||||
done
|
done
|
||||||
|
|
||||||
rm -rf ./android/SherpaOnnx/app/src/main/assets/$model_name
|
rm -rf ./android/SherpaOnnx/app/src/main/assets/$model_name
|
||||||
|
rm -rf ./android/SherpaOnnx/app/src/main/assets/*.fst
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
||||||
git checkout .
|
git checkout .
|
||||||
|
|||||||
@@ -56,6 +56,12 @@ popd
|
|||||||
git checkout .
|
git checkout .
|
||||||
pushd android/SherpaOnnxVadAsr/app/src/main/java/com/k2fsa/sherpa/onnx
|
pushd android/SherpaOnnxVadAsr/app/src/main/java/com/k2fsa/sherpa/onnx
|
||||||
sed -i.bak s/"asrModelType = 0/asrModelType = $type/" ./MainActivity.kt
|
sed -i.bak s/"asrModelType = 0/asrModelType = $type/" ./MainActivity.kt
|
||||||
|
|
||||||
|
{% if model.rule_fsts %}
|
||||||
|
rule_fsts={{ model.rule_fsts }}
|
||||||
|
sed -i.bak s%"asrRuleFsts = null"%"asrRuleFsts = \"$rule_fsts\""% ./MainActivity.kt
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
git diff
|
git diff
|
||||||
popd
|
popd
|
||||||
|
|
||||||
|
|||||||
@@ -41,6 +41,7 @@ class Model:
|
|||||||
|
|
||||||
# cmd is used to remove extra file from the model directory
|
# cmd is used to remove extra file from the model directory
|
||||||
cmd: str = ""
|
cmd: str = ""
|
||||||
|
rule_fsts: str = ""
|
||||||
|
|
||||||
|
|
||||||
def get_2nd_models():
|
def get_2nd_models():
|
||||||
@@ -70,7 +71,11 @@ def get_2nd_models():
|
|||||||
idx=0,
|
idx=0,
|
||||||
lang="zh",
|
lang="zh",
|
||||||
short_name="paraformer",
|
short_name="paraformer",
|
||||||
|
rule_fsts="itn_zh_number.fst",
|
||||||
cmd="""
|
cmd="""
|
||||||
|
if [ ! -f itn_zh_number.fst ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
||||||
|
fi
|
||||||
pushd $model_name
|
pushd $model_name
|
||||||
|
|
||||||
rm -fv README.md
|
rm -fv README.md
|
||||||
@@ -87,7 +92,11 @@ def get_2nd_models():
|
|||||||
idx=4,
|
idx=4,
|
||||||
lang="zh",
|
lang="zh",
|
||||||
short_name="zipformer",
|
short_name="zipformer",
|
||||||
|
rule_fsts="itn_zh_number.fst",
|
||||||
cmd="""
|
cmd="""
|
||||||
|
if [ ! -f itn_zh_number.fst ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
||||||
|
fi
|
||||||
pushd $model_name
|
pushd $model_name
|
||||||
|
|
||||||
rm -rfv test_wavs
|
rm -rfv test_wavs
|
||||||
@@ -117,7 +126,11 @@ def get_1st_models():
|
|||||||
idx=8,
|
idx=8,
|
||||||
lang="bilingual_zh_en",
|
lang="bilingual_zh_en",
|
||||||
short_name="zipformer",
|
short_name="zipformer",
|
||||||
|
rule_fsts="itn_zh_number.fst",
|
||||||
cmd="""
|
cmd="""
|
||||||
|
if [ ! -f itn_zh_number.fst ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
||||||
|
fi
|
||||||
pushd $model_name
|
pushd $model_name
|
||||||
rm -fv decoder-epoch-99-avg-1.int8.onnx
|
rm -fv decoder-epoch-99-avg-1.int8.onnx
|
||||||
rm -fv encoder-epoch-99-avg-1.onnx
|
rm -fv encoder-epoch-99-avg-1.onnx
|
||||||
@@ -160,7 +173,11 @@ def get_1st_models():
|
|||||||
idx=3,
|
idx=3,
|
||||||
lang="zh",
|
lang="zh",
|
||||||
short_name="zipformer2",
|
short_name="zipformer2",
|
||||||
|
rule_fsts="itn_zh_number.fst",
|
||||||
cmd="""
|
cmd="""
|
||||||
|
if [ ! -f itn_zh_number.fst ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
||||||
|
fi
|
||||||
pushd $model_name
|
pushd $model_name
|
||||||
rm -fv exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx
|
rm -fv exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx
|
||||||
rm -fv exp/decoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx
|
rm -fv exp/decoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx
|
||||||
@@ -202,7 +219,11 @@ def get_1st_models():
|
|||||||
idx=9,
|
idx=9,
|
||||||
lang="zh",
|
lang="zh",
|
||||||
short_name="small_zipformer",
|
short_name="small_zipformer",
|
||||||
|
rule_fsts="itn_zh_number.fst",
|
||||||
cmd="""
|
cmd="""
|
||||||
|
if [ ! -f itn_zh_number.fst ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
||||||
|
fi
|
||||||
pushd $model_name
|
pushd $model_name
|
||||||
rm -fv encoder-epoch-99-avg-1.onnx
|
rm -fv encoder-epoch-99-avg-1.onnx
|
||||||
rm -fv decoder-epoch-99-avg-1.int8.onnx
|
rm -fv decoder-epoch-99-avg-1.int8.onnx
|
||||||
|
|||||||
@@ -42,6 +42,8 @@ class Model:
|
|||||||
# cmd is used to remove extra file from the model directory
|
# cmd is used to remove extra file from the model directory
|
||||||
cmd: str = ""
|
cmd: str = ""
|
||||||
|
|
||||||
|
rule_fsts: str = ""
|
||||||
|
|
||||||
|
|
||||||
def get_models():
|
def get_models():
|
||||||
models = [
|
models = [
|
||||||
@@ -50,7 +52,11 @@ def get_models():
|
|||||||
idx=8,
|
idx=8,
|
||||||
lang="bilingual_zh_en",
|
lang="bilingual_zh_en",
|
||||||
short_name="zipformer",
|
short_name="zipformer",
|
||||||
|
rule_fsts="itn_zh_number.fst",
|
||||||
cmd="""
|
cmd="""
|
||||||
|
if [ ! -f itn_zh_number.fst ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
||||||
|
fi
|
||||||
pushd $model_name
|
pushd $model_name
|
||||||
rm -fv decoder-epoch-99-avg-1.int8.onnx
|
rm -fv decoder-epoch-99-avg-1.int8.onnx
|
||||||
rm -fv encoder-epoch-99-avg-1.onnx
|
rm -fv encoder-epoch-99-avg-1.onnx
|
||||||
@@ -93,7 +99,11 @@ def get_models():
|
|||||||
idx=3,
|
idx=3,
|
||||||
lang="zh",
|
lang="zh",
|
||||||
short_name="zipformer2",
|
short_name="zipformer2",
|
||||||
|
rule_fsts="itn_zh_number.fst",
|
||||||
cmd="""
|
cmd="""
|
||||||
|
if [ ! -f itn_zh_number.fst ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
||||||
|
fi
|
||||||
pushd $model_name
|
pushd $model_name
|
||||||
rm -fv exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx
|
rm -fv exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx
|
||||||
rm -fv exp/decoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx
|
rm -fv exp/decoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx
|
||||||
@@ -135,7 +145,11 @@ def get_models():
|
|||||||
idx=9,
|
idx=9,
|
||||||
lang="zh",
|
lang="zh",
|
||||||
short_name="small_zipformer",
|
short_name="small_zipformer",
|
||||||
|
rule_fsts="itn_zh_number.fst",
|
||||||
cmd="""
|
cmd="""
|
||||||
|
if [ ! -f itn_zh_number.fst ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
||||||
|
fi
|
||||||
pushd $model_name
|
pushd $model_name
|
||||||
rm -fv encoder-epoch-99-avg-1.onnx
|
rm -fv encoder-epoch-99-avg-1.onnx
|
||||||
rm -fv decoder-epoch-99-avg-1.int8.onnx
|
rm -fv decoder-epoch-99-avg-1.int8.onnx
|
||||||
|
|||||||
@@ -42,6 +42,8 @@ class Model:
|
|||||||
# cmd is used to remove extra file from the model directory
|
# cmd is used to remove extra file from the model directory
|
||||||
cmd: str = ""
|
cmd: str = ""
|
||||||
|
|
||||||
|
rule_fsts: str = ""
|
||||||
|
|
||||||
|
|
||||||
# See get_2nd_models() in ./generate-asr-2pass-apk-script.py
|
# See get_2nd_models() in ./generate-asr-2pass-apk-script.py
|
||||||
def get_models():
|
def get_models():
|
||||||
@@ -71,7 +73,11 @@ def get_models():
|
|||||||
idx=0,
|
idx=0,
|
||||||
lang="zh",
|
lang="zh",
|
||||||
short_name="paraformer",
|
short_name="paraformer",
|
||||||
|
rule_fsts="itn_zh_number.fst",
|
||||||
cmd="""
|
cmd="""
|
||||||
|
if [ ! -f itn_zh_number.fst ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
||||||
|
fi
|
||||||
pushd $model_name
|
pushd $model_name
|
||||||
|
|
||||||
rm -v README.md
|
rm -v README.md
|
||||||
@@ -88,7 +94,11 @@ def get_models():
|
|||||||
idx=4,
|
idx=4,
|
||||||
lang="zh",
|
lang="zh",
|
||||||
short_name="zipformer",
|
short_name="zipformer",
|
||||||
|
rule_fsts="itn_zh_number.fst",
|
||||||
cmd="""
|
cmd="""
|
||||||
|
if [ ! -f itn_zh_number.fst ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
||||||
|
fi
|
||||||
pushd $model_name
|
pushd $model_name
|
||||||
|
|
||||||
rm -rfv test_wavs
|
rm -rfv test_wavs
|
||||||
@@ -171,7 +181,11 @@ def get_models():
|
|||||||
idx=11,
|
idx=11,
|
||||||
lang="zh",
|
lang="zh",
|
||||||
short_name="telespeech",
|
short_name="telespeech",
|
||||||
|
rule_fsts="itn_zh_number.fst",
|
||||||
cmd="""
|
cmd="""
|
||||||
|
if [ ! -f itn_zh_number.fst ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
||||||
|
fi
|
||||||
pushd $model_name
|
pushd $model_name
|
||||||
|
|
||||||
rm -rfv test_wavs
|
rm -rfv test_wavs
|
||||||
|
|||||||
@@ -26,6 +26,8 @@ namespace SherpaOnnx
|
|||||||
HotwordsFile = "";
|
HotwordsFile = "";
|
||||||
HotwordsScore = 1.5F;
|
HotwordsScore = 1.5F;
|
||||||
CtcFstDecoderConfig = new OnlineCtcFstDecoderConfig();
|
CtcFstDecoderConfig = new OnlineCtcFstDecoderConfig();
|
||||||
|
RuleFsts = "";
|
||||||
|
RuleFars = "";
|
||||||
}
|
}
|
||||||
public FeatureConfig FeatConfig;
|
public FeatureConfig FeatConfig;
|
||||||
public OnlineModelConfig ModelConfig;
|
public OnlineModelConfig ModelConfig;
|
||||||
@@ -64,5 +66,11 @@ namespace SherpaOnnx
|
|||||||
public float HotwordsScore;
|
public float HotwordsScore;
|
||||||
|
|
||||||
public OnlineCtcFstDecoderConfig CtcFstDecoderConfig;
|
public OnlineCtcFstDecoderConfig CtcFstDecoderConfig;
|
||||||
|
|
||||||
|
[MarshalAs(UnmanagedType.LPStr)]
|
||||||
|
public string RuleFsts;
|
||||||
|
|
||||||
|
[MarshalAs(UnmanagedType.LPStr)]
|
||||||
|
public string RuleFars;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1 @@
|
|||||||
|
../../../../go-api-examples/streaming-decode-files/run-transducer-itn.sh
|
||||||
@@ -79,8 +79,8 @@ function osx() {
|
|||||||
|
|
||||||
mkdir t
|
mkdir t
|
||||||
cd t
|
cd t
|
||||||
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_x86_64.whl
|
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp39-cp39-macosx_11_0_x86_64.whl
|
||||||
unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_x86_64.whl
|
unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp39-cp39-macosx_11_0_x86_64.whl
|
||||||
|
|
||||||
cp -v sherpa_onnx/lib/*.dylib $dst/
|
cp -v sherpa_onnx/lib/*.dylib $dst/
|
||||||
|
|
||||||
@@ -93,8 +93,8 @@ function osx() {
|
|||||||
|
|
||||||
mkdir t
|
mkdir t
|
||||||
cd t
|
cd t
|
||||||
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_arm64.whl
|
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp39-cp39-macosx_11_0_arm64.whl
|
||||||
unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_arm64.whl
|
unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp39-cp39-macosx_11_0_arm64.whl
|
||||||
|
|
||||||
cp -v sherpa_onnx/lib/*.dylib $dst/
|
cp -v sherpa_onnx/lib/*.dylib $dst/
|
||||||
|
|
||||||
@@ -126,7 +126,6 @@ function windows() {
|
|||||||
unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win_amd64.whl
|
unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win_amd64.whl
|
||||||
|
|
||||||
cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.dll $dst
|
cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.dll $dst
|
||||||
cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.lib $dst
|
|
||||||
|
|
||||||
cd ..
|
cd ..
|
||||||
rm -rf t
|
rm -rf t
|
||||||
@@ -139,7 +138,6 @@ function windows() {
|
|||||||
unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win32.whl
|
unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win32.whl
|
||||||
|
|
||||||
cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.dll $dst
|
cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.dll $dst
|
||||||
cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.lib $dst
|
|
||||||
|
|
||||||
cd ..
|
cd ..
|
||||||
rm -rf t
|
rm -rf t
|
||||||
|
|||||||
@@ -127,7 +127,11 @@ type OnlineRecognizerConfig struct {
|
|||||||
Rule1MinTrailingSilence float32
|
Rule1MinTrailingSilence float32
|
||||||
Rule2MinTrailingSilence float32
|
Rule2MinTrailingSilence float32
|
||||||
Rule3MinUtteranceLength float32
|
Rule3MinUtteranceLength float32
|
||||||
|
HotwordsFile string
|
||||||
|
HotwordsScore float32
|
||||||
CtcFstDecoderConfig OnlineCtcFstDecoderConfig
|
CtcFstDecoderConfig OnlineCtcFstDecoderConfig
|
||||||
|
RuleFsts string
|
||||||
|
RuleFars string
|
||||||
}
|
}
|
||||||
|
|
||||||
// It contains the recognition result for a online stream.
|
// It contains the recognition result for a online stream.
|
||||||
@@ -204,6 +208,17 @@ func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer {
|
|||||||
c.rule2_min_trailing_silence = C.float(config.Rule2MinTrailingSilence)
|
c.rule2_min_trailing_silence = C.float(config.Rule2MinTrailingSilence)
|
||||||
c.rule3_min_utterance_length = C.float(config.Rule3MinUtteranceLength)
|
c.rule3_min_utterance_length = C.float(config.Rule3MinUtteranceLength)
|
||||||
|
|
||||||
|
c.hotwords_file = C.CString(config.HotwordsFile)
|
||||||
|
defer C.free(unsafe.Pointer(c.hotwords_file))
|
||||||
|
|
||||||
|
c.hotwords_score = C.float(config.HotwordsScore)
|
||||||
|
|
||||||
|
c.rule_fsts = C.CString(config.RuleFsts)
|
||||||
|
defer C.free(unsafe.Pointer(c.rule_fsts))
|
||||||
|
|
||||||
|
c.rule_fars = C.CString(config.RuleFars)
|
||||||
|
defer C.free(unsafe.Pointer(c.rule_fars))
|
||||||
|
|
||||||
c.ctc_fst_decoder_config.graph = C.CString(config.CtcFstDecoderConfig.Graph)
|
c.ctc_fst_decoder_config.graph = C.CString(config.CtcFstDecoderConfig.Graph)
|
||||||
defer C.free(unsafe.Pointer(c.ctc_fst_decoder_config.graph))
|
defer C.free(unsafe.Pointer(c.ctc_fst_decoder_config.graph))
|
||||||
c.ctc_fst_decoder_config.max_active = C.int(config.CtcFstDecoderConfig.MaxActive)
|
c.ctc_fst_decoder_config.max_active = C.int(config.CtcFstDecoderConfig.MaxActive)
|
||||||
|
|||||||
@@ -189,6 +189,8 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper(
|
|||||||
rule3MinUtteranceLength);
|
rule3MinUtteranceLength);
|
||||||
SHERPA_ONNX_ASSIGN_ATTR_STR(hotwords_file, hotwordsFile);
|
SHERPA_ONNX_ASSIGN_ATTR_STR(hotwords_file, hotwordsFile);
|
||||||
SHERPA_ONNX_ASSIGN_ATTR_FLOAT(hotwords_score, hotwordsScore);
|
SHERPA_ONNX_ASSIGN_ATTR_FLOAT(hotwords_score, hotwordsScore);
|
||||||
|
SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fsts, ruleFsts);
|
||||||
|
SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars);
|
||||||
|
|
||||||
c.ctc_fst_decoder_config = GetCtcFstDecoderConfig(o);
|
c.ctc_fst_decoder_config = GetCtcFstDecoderConfig(o);
|
||||||
|
|
||||||
@@ -246,6 +248,14 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper(
|
|||||||
delete[] c.hotwords_file;
|
delete[] c.hotwords_file;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (c.rule_fsts) {
|
||||||
|
delete[] c.rule_fsts;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c.rule_fars) {
|
||||||
|
delete[] c.rule_fars;
|
||||||
|
}
|
||||||
|
|
||||||
if (c.ctc_fst_decoder_config.graph) {
|
if (c.ctc_fst_decoder_config.graph) {
|
||||||
delete[] c.ctc_fst_decoder_config.graph;
|
delete[] c.ctc_fst_decoder_config.graph;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -110,6 +110,9 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer(
|
|||||||
recognizer_config.ctc_fst_decoder_config.max_active =
|
recognizer_config.ctc_fst_decoder_config.max_active =
|
||||||
SHERPA_ONNX_OR(config->ctc_fst_decoder_config.max_active, 3000);
|
SHERPA_ONNX_OR(config->ctc_fst_decoder_config.max_active, 3000);
|
||||||
|
|
||||||
|
recognizer_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, "");
|
||||||
|
recognizer_config.rule_fars = SHERPA_ONNX_OR(config->rule_fars, "");
|
||||||
|
|
||||||
if (config->model_config.debug) {
|
if (config->model_config.debug) {
|
||||||
SHERPA_ONNX_LOGE("%s\n", recognizer_config.ToString().c_str());
|
SHERPA_ONNX_LOGE("%s\n", recognizer_config.ToString().c_str());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -144,6 +144,8 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerConfig {
|
|||||||
float hotwords_score;
|
float hotwords_score;
|
||||||
|
|
||||||
SherpaOnnxOnlineCtcFstDecoderConfig ctc_fst_decoder_config;
|
SherpaOnnxOnlineCtcFstDecoderConfig ctc_fst_decoder_config;
|
||||||
|
const char *rule_fsts;
|
||||||
|
const char *rule_fars;
|
||||||
} SherpaOnnxOnlineRecognizerConfig;
|
} SherpaOnnxOnlineRecognizerConfig;
|
||||||
|
|
||||||
SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerResult {
|
SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerResult {
|
||||||
|
|||||||
@@ -190,9 +190,10 @@ if(NOT BUILD_SHARED_LIBS AND APPLE)
|
|||||||
target_link_libraries(sherpa-onnx-core "-framework Foundation")
|
target_link_libraries(sherpa-onnx-core "-framework Foundation")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
target_link_libraries(sherpa-onnx-core fstfar fst)
|
||||||
|
|
||||||
if(SHERPA_ONNX_ENABLE_TTS)
|
if(SHERPA_ONNX_ENABLE_TTS)
|
||||||
target_link_libraries(sherpa-onnx-core piper_phonemize)
|
target_link_libraries(sherpa-onnx-core piper_phonemize)
|
||||||
target_link_libraries(sherpa-onnx-core fstfar fst)
|
|
||||||
target_link_libraries(sherpa-onnx-core cppjieba)
|
target_link_libraries(sherpa-onnx-core cppjieba)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|||||||
@@ -425,9 +425,6 @@ std::string OfflineRecognizerImpl::ApplyInverseTextNormalization(
|
|||||||
if (!itn_list_.empty()) {
|
if (!itn_list_.empty()) {
|
||||||
for (const auto &tn : itn_list_) {
|
for (const auto &tn : itn_list_) {
|
||||||
text = tn->Normalize(text);
|
text = tn->Normalize(text);
|
||||||
if (config_.model_config.debug) {
|
|
||||||
SHERPA_ONNX_LOGE("After inverse text normalization: %s", text.c_str());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,8 @@
|
|||||||
|
|
||||||
#include "sherpa-onnx/csrc/online-recognizer-impl.h"
|
#include "sherpa-onnx/csrc/online-recognizer-impl.h"
|
||||||
|
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
#if __ANDROID_API__ >= 9
|
#if __ANDROID_API__ >= 9
|
||||||
#include <strstream>
|
#include <strstream>
|
||||||
|
|
||||||
@@ -186,9 +188,6 @@ std::string OnlineRecognizerImpl::ApplyInverseTextNormalization(
|
|||||||
if (!itn_list_.empty()) {
|
if (!itn_list_.empty()) {
|
||||||
for (const auto &tn : itn_list_) {
|
for (const auto &tn : itn_list_) {
|
||||||
text = tn->Normalize(text);
|
text = tn->Normalize(text);
|
||||||
if (config_.model_config.debug) {
|
|
||||||
SHERPA_ONNX_LOGE("After inverse text normalization: %s", text.c_str());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,3 +1,7 @@
|
|||||||
|
## 1.10.0
|
||||||
|
|
||||||
|
* Add inverse text normalization
|
||||||
|
|
||||||
## 1.9.30
|
## 1.9.30
|
||||||
|
|
||||||
* Add TTS
|
* Add TTS
|
||||||
|
|||||||
@@ -111,11 +111,13 @@ class OnlineRecognizerConfig {
|
|||||||
this.hotwordsFile = '',
|
this.hotwordsFile = '',
|
||||||
this.hotwordsScore = 1.5,
|
this.hotwordsScore = 1.5,
|
||||||
this.ctcFstDecoderConfig = const OnlineCtcFstDecoderConfig(),
|
this.ctcFstDecoderConfig = const OnlineCtcFstDecoderConfig(),
|
||||||
|
this.ruleFsts = '',
|
||||||
|
this.ruleFars = '',
|
||||||
});
|
});
|
||||||
|
|
||||||
@override
|
@override
|
||||||
String toString() {
|
String toString() {
|
||||||
return 'OnlineRecognizerConfig(feat: $feat, model: $model, decodingMethod: $decodingMethod, maxActivePaths: $maxActivePaths, enableEndpoint: $enableEndpoint, rule1MinTrailingSilence: $rule1MinTrailingSilence, rule2MinTrailingSilence: $rule2MinTrailingSilence, rule3MinUtteranceLength: $rule3MinUtteranceLength, hotwordsFile: $hotwordsFile, hotwordsScore: $hotwordsScore, ctcFstDecoderConfig: $ctcFstDecoderConfig)';
|
return 'OnlineRecognizerConfig(feat: $feat, model: $model, decodingMethod: $decodingMethod, maxActivePaths: $maxActivePaths, enableEndpoint: $enableEndpoint, rule1MinTrailingSilence: $rule1MinTrailingSilence, rule2MinTrailingSilence: $rule2MinTrailingSilence, rule3MinUtteranceLength: $rule3MinUtteranceLength, hotwordsFile: $hotwordsFile, hotwordsScore: $hotwordsScore, ctcFstDecoderConfig: $ctcFstDecoderConfig, ruleFsts: $ruleFsts, ruleFars: $ruleFars)';
|
||||||
}
|
}
|
||||||
|
|
||||||
final FeatureConfig feat;
|
final FeatureConfig feat;
|
||||||
@@ -137,6 +139,8 @@ class OnlineRecognizerConfig {
|
|||||||
final double hotwordsScore;
|
final double hotwordsScore;
|
||||||
|
|
||||||
final OnlineCtcFstDecoderConfig ctcFstDecoderConfig;
|
final OnlineCtcFstDecoderConfig ctcFstDecoderConfig;
|
||||||
|
final String ruleFsts;
|
||||||
|
final String ruleFars;
|
||||||
}
|
}
|
||||||
|
|
||||||
class OnlineRecognizerResult {
|
class OnlineRecognizerResult {
|
||||||
@@ -201,9 +205,13 @@ class OnlineRecognizer {
|
|||||||
c.ref.ctcFstDecoderConfig.graph =
|
c.ref.ctcFstDecoderConfig.graph =
|
||||||
config.ctcFstDecoderConfig.graph.toNativeUtf8();
|
config.ctcFstDecoderConfig.graph.toNativeUtf8();
|
||||||
c.ref.ctcFstDecoderConfig.maxActive = config.ctcFstDecoderConfig.maxActive;
|
c.ref.ctcFstDecoderConfig.maxActive = config.ctcFstDecoderConfig.maxActive;
|
||||||
|
c.ref.ruleFsts = config.ruleFsts.toNativeUtf8();
|
||||||
|
c.ref.ruleFars = config.ruleFars.toNativeUtf8();
|
||||||
|
|
||||||
final ptr = SherpaOnnxBindings.createOnlineRecognizer?.call(c) ?? nullptr;
|
final ptr = SherpaOnnxBindings.createOnlineRecognizer?.call(c) ?? nullptr;
|
||||||
|
|
||||||
|
calloc.free(c.ref.ruleFars);
|
||||||
|
calloc.free(c.ref.ruleFsts);
|
||||||
calloc.free(c.ref.ctcFstDecoderConfig.graph);
|
calloc.free(c.ref.ctcFstDecoderConfig.graph);
|
||||||
calloc.free(c.ref.hotwordsFile);
|
calloc.free(c.ref.hotwordsFile);
|
||||||
calloc.free(c.ref.decodingMethod);
|
calloc.free(c.ref.decodingMethod);
|
||||||
|
|||||||
@@ -205,6 +205,9 @@ final class SherpaOnnxOnlineRecognizerConfig extends Struct {
|
|||||||
external double hotwordsScore;
|
external double hotwordsScore;
|
||||||
|
|
||||||
external SherpaOnnxOnlineCtcFstDecoderConfig ctcFstDecoderConfig;
|
external SherpaOnnxOnlineCtcFstDecoderConfig ctcFstDecoderConfig;
|
||||||
|
|
||||||
|
external Pointer<Utf8> ruleFsts;
|
||||||
|
external Pointer<Utf8> ruleFars;
|
||||||
}
|
}
|
||||||
|
|
||||||
final class SherpaOnnxSileroVadModelConfig extends Struct {
|
final class SherpaOnnxSileroVadModelConfig extends Struct {
|
||||||
|
|||||||
@@ -15,6 +15,8 @@ public class OnlineRecognizerConfig {
|
|||||||
private final int maxActivePaths;
|
private final int maxActivePaths;
|
||||||
private final String hotwordsFile;
|
private final String hotwordsFile;
|
||||||
private final float hotwordsScore;
|
private final float hotwordsScore;
|
||||||
|
private final String ruleFsts;
|
||||||
|
private final String ruleFars;
|
||||||
|
|
||||||
private OnlineRecognizerConfig(Builder builder) {
|
private OnlineRecognizerConfig(Builder builder) {
|
||||||
this.featConfig = builder.featConfig;
|
this.featConfig = builder.featConfig;
|
||||||
@@ -27,6 +29,8 @@ public class OnlineRecognizerConfig {
|
|||||||
this.maxActivePaths = builder.maxActivePaths;
|
this.maxActivePaths = builder.maxActivePaths;
|
||||||
this.hotwordsFile = builder.hotwordsFile;
|
this.hotwordsFile = builder.hotwordsFile;
|
||||||
this.hotwordsScore = builder.hotwordsScore;
|
this.hotwordsScore = builder.hotwordsScore;
|
||||||
|
this.ruleFsts = builder.ruleFsts;
|
||||||
|
this.ruleFars = builder.ruleFars;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Builder builder() {
|
public static Builder builder() {
|
||||||
@@ -48,6 +52,8 @@ public class OnlineRecognizerConfig {
|
|||||||
private int maxActivePaths = 4;
|
private int maxActivePaths = 4;
|
||||||
private String hotwordsFile = "";
|
private String hotwordsFile = "";
|
||||||
private float hotwordsScore = 1.5f;
|
private float hotwordsScore = 1.5f;
|
||||||
|
private String ruleFsts = "";
|
||||||
|
private String ruleFars = "";
|
||||||
|
|
||||||
public OnlineRecognizerConfig build() {
|
public OnlineRecognizerConfig build() {
|
||||||
return new OnlineRecognizerConfig(this);
|
return new OnlineRecognizerConfig(this);
|
||||||
@@ -102,5 +108,15 @@ public class OnlineRecognizerConfig {
|
|||||||
this.hotwordsScore = hotwordsScore;
|
this.hotwordsScore = hotwordsScore;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Builder setRuleFsts(String ruleFsts) {
|
||||||
|
this.ruleFsts = ruleFsts;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder setRuleFars(String ruleFars) {
|
||||||
|
this.ruleFars = ruleFars;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -37,6 +37,18 @@ static OnlineRecognizerConfig GetConfig(JNIEnv *env, jobject config) {
|
|||||||
fid = env->GetFieldID(cls, "hotwordsScore", "F");
|
fid = env->GetFieldID(cls, "hotwordsScore", "F");
|
||||||
ans.hotwords_score = env->GetFloatField(config, fid);
|
ans.hotwords_score = env->GetFloatField(config, fid);
|
||||||
|
|
||||||
|
fid = env->GetFieldID(cls, "ruleFsts", "Ljava/lang/String;");
|
||||||
|
s = (jstring)env->GetObjectField(config, fid);
|
||||||
|
p = env->GetStringUTFChars(s, nullptr);
|
||||||
|
ans.rule_fsts = p;
|
||||||
|
env->ReleaseStringUTFChars(s, p);
|
||||||
|
|
||||||
|
fid = env->GetFieldID(cls, "ruleFars", "Ljava/lang/String;");
|
||||||
|
s = (jstring)env->GetObjectField(config, fid);
|
||||||
|
p = env->GetStringUTFChars(s, nullptr);
|
||||||
|
ans.rule_fars = p;
|
||||||
|
env->ReleaseStringUTFChars(s, p);
|
||||||
|
|
||||||
//---------- feat config ----------
|
//---------- feat config ----------
|
||||||
fid = env->GetFieldID(cls, "featConfig",
|
fid = env->GetFieldID(cls, "featConfig",
|
||||||
"Lcom/k2fsa/sherpa/onnx/FeatureConfig;");
|
"Lcom/k2fsa/sherpa/onnx/FeatureConfig;");
|
||||||
|
|||||||
@@ -69,6 +69,8 @@ data class OnlineRecognizerConfig(
|
|||||||
var maxActivePaths: Int = 4,
|
var maxActivePaths: Int = 4,
|
||||||
var hotwordsFile: String = "",
|
var hotwordsFile: String = "",
|
||||||
var hotwordsScore: Float = 1.5f,
|
var hotwordsScore: Float = 1.5f,
|
||||||
|
var ruleFsts: String = "",
|
||||||
|
var ruleFars: String = "",
|
||||||
)
|
)
|
||||||
|
|
||||||
data class OnlineRecognizerResult(
|
data class OnlineRecognizerResult(
|
||||||
|
|||||||
@@ -135,7 +135,9 @@ func sherpaOnnxOnlineRecognizerConfig(
|
|||||||
maxActivePaths: Int = 4,
|
maxActivePaths: Int = 4,
|
||||||
hotwordsFile: String = "",
|
hotwordsFile: String = "",
|
||||||
hotwordsScore: Float = 1.5,
|
hotwordsScore: Float = 1.5,
|
||||||
ctcFstDecoderConfig: SherpaOnnxOnlineCtcFstDecoderConfig = sherpaOnnxOnlineCtcFstDecoderConfig()
|
ctcFstDecoderConfig: SherpaOnnxOnlineCtcFstDecoderConfig = sherpaOnnxOnlineCtcFstDecoderConfig(),
|
||||||
|
ruleFsts: String = "",
|
||||||
|
ruleFars: String = ""
|
||||||
) -> SherpaOnnxOnlineRecognizerConfig {
|
) -> SherpaOnnxOnlineRecognizerConfig {
|
||||||
return SherpaOnnxOnlineRecognizerConfig(
|
return SherpaOnnxOnlineRecognizerConfig(
|
||||||
feat_config: featConfig,
|
feat_config: featConfig,
|
||||||
@@ -148,7 +150,9 @@ func sherpaOnnxOnlineRecognizerConfig(
|
|||||||
rule3_min_utterance_length: rule3MinUtteranceLength,
|
rule3_min_utterance_length: rule3MinUtteranceLength,
|
||||||
hotwords_file: toCPointer(hotwordsFile),
|
hotwords_file: toCPointer(hotwordsFile),
|
||||||
hotwords_score: hotwordsScore,
|
hotwords_score: hotwordsScore,
|
||||||
ctc_fst_decoder_config: ctcFstDecoderConfig
|
ctc_fst_decoder_config: ctcFstDecoderConfig,
|
||||||
|
rule_fsts: toCPointer(ruleFsts),
|
||||||
|
rule_fars: toCPointer(ruleFars)
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -40,6 +40,8 @@ string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exp
|
|||||||
string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ")
|
string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ")
|
||||||
string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ")
|
string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ")
|
||||||
|
|
||||||
|
message(STATUS "MY_FLAGS: ${MY_FLAGS}")
|
||||||
|
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}")
|
||||||
set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}")
|
set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}")
|
||||||
|
|||||||
@@ -239,7 +239,7 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
|
|||||||
const ctcFstDecoder = initSherpaOnnxOnlineCtcFstDecoderConfig(
|
const ctcFstDecoder = initSherpaOnnxOnlineCtcFstDecoderConfig(
|
||||||
config.ctcFstDecoderConfig, Module)
|
config.ctcFstDecoderConfig, Module)
|
||||||
|
|
||||||
const len = feat.len + model.len + 8 * 4 + ctcFstDecoder.len;
|
const len = feat.len + model.len + 8 * 4 + ctcFstDecoder.len + 2 * 4;
|
||||||
const ptr = Module._malloc(len);
|
const ptr = Module._malloc(len);
|
||||||
|
|
||||||
let offset = 0;
|
let offset = 0;
|
||||||
@@ -251,7 +251,10 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
|
|||||||
|
|
||||||
const decodingMethodLen = Module.lengthBytesUTF8(config.decodingMethod) + 1;
|
const decodingMethodLen = Module.lengthBytesUTF8(config.decodingMethod) + 1;
|
||||||
const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile) + 1;
|
const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile) + 1;
|
||||||
const bufferLen = decodingMethodLen + hotwordsFileLen;
|
const ruleFstsFileLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1;
|
||||||
|
const ruleFarsFileLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1;
|
||||||
|
const bufferLen =
|
||||||
|
decodingMethodLen + hotwordsFileLen + ruleFstsFileLen + ruleFarsFileLen;
|
||||||
const buffer = Module._malloc(bufferLen);
|
const buffer = Module._malloc(bufferLen);
|
||||||
|
|
||||||
offset = 0;
|
offset = 0;
|
||||||
@@ -259,6 +262,13 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
|
|||||||
offset += decodingMethodLen;
|
offset += decodingMethodLen;
|
||||||
|
|
||||||
Module.stringToUTF8(config.hotwordsFile, buffer + offset, hotwordsFileLen);
|
Module.stringToUTF8(config.hotwordsFile, buffer + offset, hotwordsFileLen);
|
||||||
|
offset += hotwordsFileLen;
|
||||||
|
|
||||||
|
Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsFileLen);
|
||||||
|
offset += ruleFstsFileLen;
|
||||||
|
|
||||||
|
Module.stringToUTF8(config.ruleFars || '', buffer + offset, ruleFarsFileLen);
|
||||||
|
offset += ruleFarsFileLen;
|
||||||
|
|
||||||
offset = feat.len + model.len;
|
offset = feat.len + model.len;
|
||||||
Module.setValue(ptr + offset, buffer, 'i8*'); // decoding method
|
Module.setValue(ptr + offset, buffer, 'i8*'); // decoding method
|
||||||
@@ -286,6 +296,16 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
|
|||||||
offset += 4;
|
offset += 4;
|
||||||
|
|
||||||
Module._CopyHeap(ctcFstDecoder.ptr, ctcFstDecoder.len, ptr + offset);
|
Module._CopyHeap(ctcFstDecoder.ptr, ctcFstDecoder.len, ptr + offset);
|
||||||
|
offset += ctcFstDecoder.len;
|
||||||
|
|
||||||
|
Module.setValue(
|
||||||
|
ptr + offset, buffer + decodingMethodLen + hotwordsFileLen, 'i8*');
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
|
Module.setValue(
|
||||||
|
ptr + offset,
|
||||||
|
buffer + decodingMethodLen + hotwordsFileLen + ruleFstsFileLen, 'i8*');
|
||||||
|
offset += 4;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
buffer: buffer, ptr: ptr, len: len, feat: feat, model: model,
|
buffer: buffer, ptr: ptr, len: len, feat: feat, model: model,
|
||||||
@@ -363,7 +383,9 @@ function createOnlineRecognizer(Module, myConfig) {
|
|||||||
ctcFstDecoderConfig: {
|
ctcFstDecoderConfig: {
|
||||||
graph: '',
|
graph: '',
|
||||||
maxActive: 3000,
|
maxActive: 3000,
|
||||||
}
|
},
|
||||||
|
ruleFsts: '',
|
||||||
|
ruleFars: '',
|
||||||
};
|
};
|
||||||
if (myConfig) {
|
if (myConfig) {
|
||||||
recognizerConfig = myConfig;
|
recognizerConfig = myConfig;
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ static_assert(sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) == 2 * 4, "");
|
|||||||
static_assert(sizeof(SherpaOnnxOnlineRecognizerConfig) ==
|
static_assert(sizeof(SherpaOnnxOnlineRecognizerConfig) ==
|
||||||
sizeof(SherpaOnnxFeatureConfig) +
|
sizeof(SherpaOnnxFeatureConfig) +
|
||||||
sizeof(SherpaOnnxOnlineModelConfig) + 8 * 4 +
|
sizeof(SherpaOnnxOnlineModelConfig) + 8 * 4 +
|
||||||
sizeof(SherpaOnnxOnlineCtcFstDecoderConfig),
|
sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) + 2 * 4,
|
||||||
"");
|
"");
|
||||||
|
|
||||||
void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) {
|
void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) {
|
||||||
@@ -71,6 +71,8 @@ void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) {
|
|||||||
config->rule3_min_utterance_length);
|
config->rule3_min_utterance_length);
|
||||||
fprintf(stdout, "hotwords_file: %s\n", config->hotwords_file);
|
fprintf(stdout, "hotwords_file: %s\n", config->hotwords_file);
|
||||||
fprintf(stdout, "hotwords_score: %.2f\n", config->hotwords_score);
|
fprintf(stdout, "hotwords_score: %.2f\n", config->hotwords_score);
|
||||||
|
fprintf(stdout, "rule_fsts: %s\n", config->rule_fsts);
|
||||||
|
fprintf(stdout, "rule_fars: %s\n", config->rule_fars);
|
||||||
|
|
||||||
fprintf(stdout, "----------ctc fst decoder config----------\n");
|
fprintf(stdout, "----------ctc fst decoder config----------\n");
|
||||||
fprintf(stdout, "graph: %s\n", config->ctc_fst_decoder_config.graph);
|
fprintf(stdout, "graph: %s\n", config->ctc_fst_decoder_config.graph);
|
||||||
|
|||||||
@@ -31,6 +31,7 @@ string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ")
|
|||||||
string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ")
|
string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ")
|
||||||
string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ")
|
string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ")
|
||||||
string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ")
|
string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ")
|
||||||
|
|
||||||
message(STATUS "MY_FLAGS: ${MY_FLAGS}")
|
message(STATUS "MY_FLAGS: ${MY_FLAGS}")
|
||||||
|
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
|
||||||
@@ -51,4 +52,4 @@ install(
|
|||||||
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-kws-main>/sherpa-onnx-wasm-kws-main.data"
|
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-kws-main>/sherpa-onnx-wasm-kws-main.data"
|
||||||
DESTINATION
|
DESTINATION
|
||||||
bin/wasm
|
bin/wasm
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -31,6 +31,8 @@ string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exp
|
|||||||
string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ")
|
string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ")
|
||||||
string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ")
|
string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ")
|
||||||
|
|
||||||
|
message(STATUS "MY_FLAGS: ${MY_FLAGS}")
|
||||||
|
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}")
|
||||||
set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}")
|
set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}")
|
||||||
|
|||||||
Reference in New Issue
Block a user