Add C++ support for non-streaming NeMo fast conformer hybrid transducer ctc (the ctc branch) (#848)
This commit is contained in:
101
.github/scripts/test-offline-ctc.sh
vendored
101
.github/scripts/test-offline-ctc.sh
vendored
@@ -13,14 +13,111 @@ echo "PATH: $PATH"
|
|||||||
|
|
||||||
which $EXE
|
which $EXE
|
||||||
|
|
||||||
|
log "-----------------------------------------------------------------"
|
||||||
|
log "Run Nemo fast conformer hybrid transducer ctc models (CTC branch)"
|
||||||
|
log "-----------------------------------------------------------------"
|
||||||
|
|
||||||
|
url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
|
||||||
|
name=$(basename $url)
|
||||||
|
curl -SL -O $url
|
||||||
|
tar xvf $name
|
||||||
|
rm $name
|
||||||
|
repo=$(basename -s .tar.bz2 $name)
|
||||||
|
ls -lh $repo
|
||||||
|
|
||||||
|
log "test $repo"
|
||||||
|
test_wavs=(
|
||||||
|
de-german.wav
|
||||||
|
es-spanish.wav
|
||||||
|
hr-croatian.wav
|
||||||
|
po-polish.wav
|
||||||
|
uk-ukrainian.wav
|
||||||
|
en-english.wav
|
||||||
|
fr-french.wav
|
||||||
|
it-italian.wav
|
||||||
|
ru-russian.wav
|
||||||
|
)
|
||||||
|
for w in ${test_wavs[@]}; do
|
||||||
|
time $EXE \
|
||||||
|
--tokens=$repo/tokens.txt \
|
||||||
|
--nemo-ctc-model=$repo/model.onnx \
|
||||||
|
--debug=1 \
|
||||||
|
$repo/test_wavs/$w
|
||||||
|
done
|
||||||
|
|
||||||
|
rm -rf $repo
|
||||||
|
|
||||||
|
url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-en-24500.tar.bz2
|
||||||
|
name=$(basename $url)
|
||||||
|
curl -SL -O $url
|
||||||
|
tar xvf $name
|
||||||
|
rm $name
|
||||||
|
repo=$(basename -s .tar.bz2 $name)
|
||||||
|
ls -lh $repo
|
||||||
|
|
||||||
|
log "Test $repo"
|
||||||
|
|
||||||
|
time $EXE \
|
||||||
|
--tokens=$repo/tokens.txt \
|
||||||
|
--nemo-ctc-model=$repo/model.onnx \
|
||||||
|
--debug=1 \
|
||||||
|
$repo/test_wavs/en-english.wav
|
||||||
|
|
||||||
|
rm -rf $repo
|
||||||
|
|
||||||
|
url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-es-1424.tar.bz2
|
||||||
|
name=$(basename $url)
|
||||||
|
curl -SL -O $url
|
||||||
|
tar xvf $name
|
||||||
|
rm $name
|
||||||
|
repo=$(basename -s .tar.bz2 $name)
|
||||||
|
ls -lh $repo
|
||||||
|
|
||||||
|
log "test $repo"
|
||||||
|
|
||||||
|
time $EXE \
|
||||||
|
--tokens=$repo/tokens.txt \
|
||||||
|
--nemo-ctc-model=$repo/model.onnx \
|
||||||
|
--debug=1 \
|
||||||
|
$repo/test_wavs/es-spanish.wav
|
||||||
|
|
||||||
|
rm -rf $repo
|
||||||
|
|
||||||
|
url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288.tar.bz2
|
||||||
|
name=$(basename $url)
|
||||||
|
curl -SL -O $url
|
||||||
|
tar xvf $name
|
||||||
|
rm $name
|
||||||
|
repo=$(basename -s .tar.bz2 $name)
|
||||||
|
ls -lh $repo
|
||||||
|
|
||||||
|
log "Test $repo"
|
||||||
|
|
||||||
|
test_wavs=(
|
||||||
|
en-english.wav
|
||||||
|
de-german.wav
|
||||||
|
fr-french.wav
|
||||||
|
es-spanish.wav
|
||||||
|
)
|
||||||
|
|
||||||
|
for w in ${test_wavs[@]}; do
|
||||||
|
time $EXE \
|
||||||
|
--tokens=$repo/tokens.txt \
|
||||||
|
--nemo-ctc-model=$repo/model.onnx \
|
||||||
|
--debug=1 \
|
||||||
|
$repo/test_wavs/$w
|
||||||
|
done
|
||||||
|
|
||||||
|
rm -rf $repo
|
||||||
|
|
||||||
log "------------------------------------------------------------"
|
log "------------------------------------------------------------"
|
||||||
log "Run Wenet models"
|
log "Run Wenet models"
|
||||||
log "------------------------------------------------------------"
|
log "------------------------------------------------------------"
|
||||||
wenet_models=(
|
wenet_models=(
|
||||||
sherpa-onnx-zh-wenet-aishell
|
sherpa-onnx-zh-wenet-aishell
|
||||||
sherpa-onnx-zh-wenet-aishell2
|
# sherpa-onnx-zh-wenet-aishell2
|
||||||
# sherpa-onnx-zh-wenet-wenetspeech
|
# sherpa-onnx-zh-wenet-wenetspeech
|
||||||
sherpa-onnx-zh-wenet-multi-cn
|
# sherpa-onnx-zh-wenet-multi-cn
|
||||||
sherpa-onnx-en-wenet-librispeech
|
sherpa-onnx-en-wenet-librispeech
|
||||||
# sherpa-onnx-en-wenet-gigaspeech
|
# sherpa-onnx-en-wenet-gigaspeech
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -62,6 +62,11 @@ for wav in ${waves[@]}; do
|
|||||||
ls -lh *.wav
|
ls -lh *.wav
|
||||||
done
|
done
|
||||||
|
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/spoken-language-identification-test-wavs.tar.bz2
|
||||||
|
tar xvf spoken-language-identification-test-wavs.tar.bz2
|
||||||
|
rm spoken-language-identification-test-wavs.tar.bz2
|
||||||
|
data=spoken-language-identification-test-wavs
|
||||||
|
|
||||||
for name in ${names[@]}; do
|
for name in ${names[@]}; do
|
||||||
log "------------------------------------------------------------"
|
log "------------------------------------------------------------"
|
||||||
log "Run $name"
|
log "Run $name"
|
||||||
@@ -85,14 +90,14 @@ for name in ${names[@]}; do
|
|||||||
time $EXE \
|
time $EXE \
|
||||||
--whisper-encoder=$repo/${name}-encoder.onnx \
|
--whisper-encoder=$repo/${name}-encoder.onnx \
|
||||||
--whisper-decoder=$repo/${name}-decoder.onnx \
|
--whisper-decoder=$repo/${name}-decoder.onnx \
|
||||||
$wav
|
$data/$wav
|
||||||
|
|
||||||
log "test int8 onnx"
|
log "test int8 onnx"
|
||||||
|
|
||||||
time $EXE \
|
time $EXE \
|
||||||
--whisper-encoder=$repo/${name}-encoder.int8.onnx \
|
--whisper-encoder=$repo/${name}-encoder.int8.onnx \
|
||||||
--whisper-decoder=$repo/${name}-decoder.int8.onnx \
|
--whisper-decoder=$repo/${name}-decoder.int8.onnx \
|
||||||
$wav
|
$data/$wav
|
||||||
done
|
done
|
||||||
rm -rf $repo
|
rm -rf $repo
|
||||||
done
|
done
|
||||||
|
|||||||
36
.github/workflows/linux.yaml
vendored
36
.github/workflows/linux.yaml
vendored
@@ -128,13 +128,13 @@ jobs:
|
|||||||
name: release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
|
name: release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
|
||||||
path: install/*
|
path: install/*
|
||||||
|
|
||||||
- name: Test offline punctuation
|
- name: Test spoken language identification (C++ API)
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
export PATH=$PWD/build/bin:$PATH
|
export PATH=$PWD/build/bin:$PATH
|
||||||
export EXE=sherpa-onnx-offline-punctuation
|
export EXE=sherpa-onnx-offline-language-identification
|
||||||
|
|
||||||
.github/scripts/test-offline-punctuation.sh
|
.github/scripts/test-spoken-language-identification.sh
|
||||||
|
|
||||||
- name: Test C API
|
- name: Test C API
|
||||||
shell: bash
|
shell: bash
|
||||||
@@ -147,13 +147,13 @@ jobs:
|
|||||||
|
|
||||||
.github/scripts/test-c-api.sh
|
.github/scripts/test-c-api.sh
|
||||||
|
|
||||||
- name: Test Audio tagging
|
- name: Test offline CTC
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
export PATH=$PWD/build/bin:$PATH
|
export PATH=$PWD/build/bin:$PATH
|
||||||
export EXE=sherpa-onnx-offline-audio-tagging
|
export EXE=sherpa-onnx-offline
|
||||||
|
|
||||||
.github/scripts/test-audio-tagging.sh
|
.github/scripts/test-offline-ctc.sh
|
||||||
|
|
||||||
- name: Test online CTC
|
- name: Test online CTC
|
||||||
shell: bash
|
shell: bash
|
||||||
@@ -163,14 +163,21 @@ jobs:
|
|||||||
|
|
||||||
.github/scripts/test-online-ctc.sh
|
.github/scripts/test-online-ctc.sh
|
||||||
|
|
||||||
|
- name: Test offline punctuation
|
||||||
- name: Test spoken language identification (C++ API)
|
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
export PATH=$PWD/build/bin:$PATH
|
export PATH=$PWD/build/bin:$PATH
|
||||||
export EXE=sherpa-onnx-offline-language-identification
|
export EXE=sherpa-onnx-offline-punctuation
|
||||||
|
|
||||||
.github/scripts/test-spoken-language-identification.sh
|
.github/scripts/test-offline-punctuation.sh
|
||||||
|
|
||||||
|
- name: Test Audio tagging
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
export PATH=$PWD/build/bin:$PATH
|
||||||
|
export EXE=sherpa-onnx-offline-audio-tagging
|
||||||
|
|
||||||
|
.github/scripts/test-audio-tagging.sh
|
||||||
|
|
||||||
- name: Test transducer kws
|
- name: Test transducer kws
|
||||||
shell: bash
|
shell: bash
|
||||||
@@ -180,7 +187,6 @@ jobs:
|
|||||||
|
|
||||||
.github/scripts/test-kws.sh
|
.github/scripts/test-kws.sh
|
||||||
|
|
||||||
|
|
||||||
- name: Test offline Whisper
|
- name: Test offline Whisper
|
||||||
if: matrix.build_type != 'Debug'
|
if: matrix.build_type != 'Debug'
|
||||||
shell: bash
|
shell: bash
|
||||||
@@ -192,14 +198,6 @@ jobs:
|
|||||||
|
|
||||||
.github/scripts/test-offline-whisper.sh
|
.github/scripts/test-offline-whisper.sh
|
||||||
|
|
||||||
- name: Test offline CTC
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
export PATH=$PWD/build/bin:$PATH
|
|
||||||
export EXE=sherpa-onnx-offline
|
|
||||||
|
|
||||||
.github/scripts/test-offline-ctc.sh
|
|
||||||
|
|
||||||
- name: Test offline TTS
|
- name: Test offline TTS
|
||||||
if: matrix.with_tts == 'ON'
|
if: matrix.with_tts == 'ON'
|
||||||
shell: bash
|
shell: bash
|
||||||
|
|||||||
16
.github/workflows/macos.yaml
vendored
16
.github/workflows/macos.yaml
vendored
@@ -107,6 +107,14 @@ jobs:
|
|||||||
otool -L build/bin/sherpa-onnx
|
otool -L build/bin/sherpa-onnx
|
||||||
otool -l build/bin/sherpa-onnx
|
otool -l build/bin/sherpa-onnx
|
||||||
|
|
||||||
|
- name: Test online CTC
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
export PATH=$PWD/build/bin:$PATH
|
||||||
|
export EXE=sherpa-onnx
|
||||||
|
|
||||||
|
.github/scripts/test-online-ctc.sh
|
||||||
|
|
||||||
- name: Test offline punctuation
|
- name: Test offline punctuation
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@@ -150,14 +158,6 @@ jobs:
|
|||||||
|
|
||||||
.github/scripts/test-kws.sh
|
.github/scripts/test-kws.sh
|
||||||
|
|
||||||
- name: Test online CTC
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
export PATH=$PWD/build/bin:$PATH
|
|
||||||
export EXE=sherpa-onnx
|
|
||||||
|
|
||||||
.github/scripts/test-online-ctc.sh
|
|
||||||
|
|
||||||
- name: Test offline TTS
|
- name: Test offline TTS
|
||||||
if: matrix.with_tts == 'ON'
|
if: matrix.with_tts == 'ON'
|
||||||
shell: bash
|
shell: bash
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ namespace {
|
|||||||
|
|
||||||
enum class ModelType {
|
enum class ModelType {
|
||||||
kEncDecCTCModelBPE,
|
kEncDecCTCModelBPE,
|
||||||
|
kEncDecHybridRNNTCTCBPEModel,
|
||||||
kTdnn,
|
kTdnn,
|
||||||
kZipformerCtc,
|
kZipformerCtc,
|
||||||
kWenetCtc,
|
kWenetCtc,
|
||||||
@@ -55,7 +56,10 @@ static ModelType GetModelType(char *model_data, size_t model_data_length,
|
|||||||
"No model_type in the metadata!\n"
|
"No model_type in the metadata!\n"
|
||||||
"If you are using models from NeMo, please refer to\n"
|
"If you are using models from NeMo, please refer to\n"
|
||||||
"https://huggingface.co/csukuangfj/"
|
"https://huggingface.co/csukuangfj/"
|
||||||
"sherpa-onnx-nemo-ctc-en-citrinet-512/blob/main/add-model-metadata.py"
|
"sherpa-onnx-nemo-ctc-en-citrinet-512/blob/main/add-model-metadata.py\n"
|
||||||
|
"or "
|
||||||
|
"https://github.com/k2-fsa/sherpa-onnx/tree/master/scripts/nemo/"
|
||||||
|
"fast-conformer-hybrid-transducer-ctc\n"
|
||||||
"If you are using models from WeNet, please refer to\n"
|
"If you are using models from WeNet, please refer to\n"
|
||||||
"https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/wenet/"
|
"https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/wenet/"
|
||||||
"run.sh\n"
|
"run.sh\n"
|
||||||
@@ -66,6 +70,8 @@ static ModelType GetModelType(char *model_data, size_t model_data_length,
|
|||||||
|
|
||||||
if (model_type.get() == std::string("EncDecCTCModelBPE")) {
|
if (model_type.get() == std::string("EncDecCTCModelBPE")) {
|
||||||
return ModelType::kEncDecCTCModelBPE;
|
return ModelType::kEncDecCTCModelBPE;
|
||||||
|
} else if (model_type.get() == std::string("EncDecHybridRNNTCTCBPEModel")) {
|
||||||
|
return ModelType::kEncDecHybridRNNTCTCBPEModel;
|
||||||
} else if (model_type.get() == std::string("tdnn")) {
|
} else if (model_type.get() == std::string("tdnn")) {
|
||||||
return ModelType::kTdnn;
|
return ModelType::kTdnn;
|
||||||
} else if (model_type.get() == std::string("zipformer2_ctc")) {
|
} else if (model_type.get() == std::string("zipformer2_ctc")) {
|
||||||
@@ -106,6 +112,9 @@ std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
|
|||||||
case ModelType::kEncDecCTCModelBPE:
|
case ModelType::kEncDecCTCModelBPE:
|
||||||
return std::make_unique<OfflineNemoEncDecCtcModel>(config);
|
return std::make_unique<OfflineNemoEncDecCtcModel>(config);
|
||||||
break;
|
break;
|
||||||
|
case ModelType::kEncDecHybridRNNTCTCBPEModel:
|
||||||
|
return std::make_unique<OfflineNemoEncDecHybridRNNTCTCBPEModel>(config);
|
||||||
|
break;
|
||||||
case ModelType::kTdnn:
|
case ModelType::kTdnn:
|
||||||
return std::make_unique<OfflineTdnnCtcModel>(config);
|
return std::make_unique<OfflineTdnnCtcModel>(config);
|
||||||
break;
|
break;
|
||||||
@@ -153,6 +162,9 @@ std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
|
|||||||
case ModelType::kEncDecCTCModelBPE:
|
case ModelType::kEncDecCTCModelBPE:
|
||||||
return std::make_unique<OfflineNemoEncDecCtcModel>(mgr, config);
|
return std::make_unique<OfflineNemoEncDecCtcModel>(mgr, config);
|
||||||
break;
|
break;
|
||||||
|
case ModelType::kEncDecHybridRNNTCTCBPEModel:
|
||||||
|
return std::make_unique<OfflineNemoEncDecHybridRNNTCTCBPEModel>(config);
|
||||||
|
break;
|
||||||
case ModelType::kTdnn:
|
case ModelType::kTdnn:
|
||||||
return std::make_unique<OfflineTdnnCtcModel>(mgr, config);
|
return std::make_unique<OfflineTdnnCtcModel>(mgr, config);
|
||||||
break;
|
break;
|
||||||
|
|||||||
@@ -81,6 +81,8 @@ class OfflineNemoEncDecCtcModel : public OfflineCtcModel {
|
|||||||
std::unique_ptr<Impl> impl_;
|
std::unique_ptr<Impl> impl_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
using OfflineNemoEncDecHybridRNNTCTCBPEModel = OfflineNemoEncDecCtcModel;
|
||||||
|
|
||||||
} // namespace sherpa_onnx
|
} // namespace sherpa_onnx
|
||||||
|
|
||||||
#endif // SHERPA_ONNX_CSRC_OFFLINE_NEMO_ENC_DEC_CTC_MODEL_H_
|
#endif // SHERPA_ONNX_CSRC_OFFLINE_NEMO_ENC_DEC_CTC_MODEL_H_
|
||||||
|
|||||||
@@ -122,7 +122,8 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
|
|||||||
return std::make_unique<OfflineRecognizerParaformerImpl>(config);
|
return std::make_unique<OfflineRecognizerParaformerImpl>(config);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (model_type == "EncDecCTCModelBPE" || model_type == "tdnn" ||
|
if (model_type == "EncDecCTCModelBPE" ||
|
||||||
|
model_type == "EncDecHybridRNNTCTCBPEModel" || model_type == "tdnn" ||
|
||||||
model_type == "zipformer2_ctc" || model_type == "wenet_ctc") {
|
model_type == "zipformer2_ctc" || model_type == "wenet_ctc") {
|
||||||
return std::make_unique<OfflineRecognizerCtcImpl>(config);
|
return std::make_unique<OfflineRecognizerCtcImpl>(config);
|
||||||
}
|
}
|
||||||
@@ -137,6 +138,7 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
|
|||||||
" - Non-streaming transducer models from icefall\n"
|
" - Non-streaming transducer models from icefall\n"
|
||||||
" - Non-streaming Paraformer models from FunASR\n"
|
" - Non-streaming Paraformer models from FunASR\n"
|
||||||
" - EncDecCTCModelBPE models from NeMo\n"
|
" - EncDecCTCModelBPE models from NeMo\n"
|
||||||
|
" - EncDecHybridRNNTCTCBPEModel models from NeMo\n"
|
||||||
" - Whisper models\n"
|
" - Whisper models\n"
|
||||||
" - Tdnn models\n"
|
" - Tdnn models\n"
|
||||||
" - Zipformer CTC models\n"
|
" - Zipformer CTC models\n"
|
||||||
@@ -252,7 +254,8 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
|
|||||||
return std::make_unique<OfflineRecognizerParaformerImpl>(mgr, config);
|
return std::make_unique<OfflineRecognizerParaformerImpl>(mgr, config);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (model_type == "EncDecCTCModelBPE" || model_type == "tdnn" ||
|
if (model_type == "EncDecCTCModelBPE" ||
|
||||||
|
model_type == "EncDecHybridRNNTCTCBPEModel" || model_type == "tdnn" ||
|
||||||
model_type == "zipformer2_ctc" || model_type == "wenet_ctc") {
|
model_type == "zipformer2_ctc" || model_type == "wenet_ctc") {
|
||||||
return std::make_unique<OfflineRecognizerCtcImpl>(mgr, config);
|
return std::make_unique<OfflineRecognizerCtcImpl>(mgr, config);
|
||||||
}
|
}
|
||||||
@@ -267,6 +270,7 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
|
|||||||
" - Non-streaming transducer models from icefall\n"
|
" - Non-streaming transducer models from icefall\n"
|
||||||
" - Non-streaming Paraformer models from FunASR\n"
|
" - Non-streaming Paraformer models from FunASR\n"
|
||||||
" - EncDecCTCModelBPE models from NeMo\n"
|
" - EncDecCTCModelBPE models from NeMo\n"
|
||||||
|
" - EncDecHybridRNNTCTCBPEModel models from NeMo\n"
|
||||||
" - Whisper models\n"
|
" - Whisper models\n"
|
||||||
" - Tdnn models\n"
|
" - Tdnn models\n"
|
||||||
" - Zipformer CTC models\n"
|
" - Zipformer CTC models\n"
|
||||||
|
|||||||
@@ -67,9 +67,13 @@ void SymbolTable::Init(std::istream &is) {
|
|||||||
// the following check.
|
// the following check.
|
||||||
//
|
//
|
||||||
// Note: Only id2sym_ matters as we use it to convert ID to symbols.
|
// Note: Only id2sym_ matters as we use it to convert ID to symbols.
|
||||||
|
#if 0
|
||||||
|
// we disable the test here since for some multi-lingual BPE models
|
||||||
|
// from NeMo, the same symbol can appear multiple times with different IDs.
|
||||||
if (sym != " ") {
|
if (sym != " ") {
|
||||||
assert(sym2id_.count(sym) == 0);
|
assert(sym2id_.count(sym) == 0);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
assert(id2sym_.count(id) == 0);
|
assert(id2sym_.count(id) == 0);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user