Add C++ and Python API for Dolphin CTC models (#2085)
This commit is contained in:
33
.github/scripts/test-offline-ctc.sh
vendored
33
.github/scripts/test-offline-ctc.sh
vendored
@@ -15,6 +15,39 @@ echo "PATH: $PATH"
|
|||||||
|
|
||||||
which $EXE
|
which $EXE
|
||||||
|
|
||||||
|
for type in base small; do
|
||||||
|
log "------------------------------------------------------------"
|
||||||
|
log "Run Dolphin CTC models ($type int8)"
|
||||||
|
log "------------------------------------------------------------"
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02.tar.bz2
|
||||||
|
rm sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02.tar.bz2
|
||||||
|
|
||||||
|
$EXE \
|
||||||
|
--dolphin-model=./sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02/model.int8.onnx \
|
||||||
|
--tokens=./sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02/tokens.txt \
|
||||||
|
--debug=1 \
|
||||||
|
./sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav
|
||||||
|
|
||||||
|
rm -rf sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02
|
||||||
|
|
||||||
|
log "------------------------------------------------------------"
|
||||||
|
log "Run Dolphin CTC models ($type)"
|
||||||
|
log "------------------------------------------------------------"
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02.tar.bz2
|
||||||
|
rm sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02.tar.bz2
|
||||||
|
|
||||||
|
$EXE \
|
||||||
|
--dolphin-model=./sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02/model.onnx \
|
||||||
|
--tokens=./sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02/tokens.txt \
|
||||||
|
--debug=1 \
|
||||||
|
./sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02/test_wavs/0.wav
|
||||||
|
|
||||||
|
rm -rf sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02
|
||||||
|
done
|
||||||
|
|
||||||
|
|
||||||
log "------------------------------------------------------------"
|
log "------------------------------------------------------------"
|
||||||
log "Run NeMo GigaAM Russian models"
|
log "Run NeMo GigaAM Russian models"
|
||||||
log "------------------------------------------------------------"
|
log "------------------------------------------------------------"
|
||||||
|
|||||||
9
.github/scripts/test-python.sh
vendored
9
.github/scripts/test-python.sh
vendored
@@ -8,6 +8,15 @@ log() {
|
|||||||
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log "test offline dolphin ctc"
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
|
||||||
|
rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
|
||||||
|
|
||||||
|
python3 ./python-api-examples/offline-dolphin-ctc-decode-files.py
|
||||||
|
|
||||||
|
rm -rf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
|
||||||
|
|
||||||
log "test offline speech enhancement (GTCRN)"
|
log "test offline speech enhancement (GTCRN)"
|
||||||
|
|
||||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
|
||||||
|
|||||||
48
.github/workflows/export-dophin-ctc-to-onnx.yaml
vendored
Normal file
48
.github/workflows/export-dophin-ctc-to-onnx.yaml
vendored
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
name: export-dolphin-ctc-to-onnx
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: export-dolphin-ctc-to-onnx-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
export-dolphin-ctc-to-onnx:
|
||||||
|
if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
|
||||||
|
name: ${{ matrix.model_type }}
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
os: [macos-latest]
|
||||||
|
model_type: [small, base]
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Download ${{ matrix.model_type }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
git lfs install
|
||||||
|
type=${{ matrix.model_type }}
|
||||||
|
|
||||||
|
git clone https://huggingface.co/csukuangfj/sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02
|
||||||
|
git clone https://huggingface.co/csukuangfj/sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02
|
||||||
|
|
||||||
|
rm -rf sherpa-onnx-dolphin-*/.git*
|
||||||
|
|
||||||
|
ls -lha sherpa-onnx-dolphin-*/
|
||||||
|
|
||||||
|
tar cjfv sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02.tar.bz2 sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02
|
||||||
|
tar cjfv sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02.tar.bz2 sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02
|
||||||
|
|
||||||
|
- name: Release
|
||||||
|
uses: svenstaro/upload-release-action@v2
|
||||||
|
with:
|
||||||
|
file_glob: true
|
||||||
|
file: ./*.tar.bz2
|
||||||
|
overwrite: true
|
||||||
|
repo_name: k2-fsa/sherpa-onnx
|
||||||
|
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
||||||
|
tag: asr-models
|
||||||
20
.github/workflows/linux.yaml
vendored
20
.github/workflows/linux.yaml
vendored
@@ -205,6 +205,16 @@ jobs:
|
|||||||
overwrite: true
|
overwrite: true
|
||||||
file: sherpa-onnx-*.tar.bz2
|
file: sherpa-onnx-*.tar.bz2
|
||||||
|
|
||||||
|
- name: Test offline CTC
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
du -h -d1 .
|
||||||
|
export PATH=$PWD/build/bin:$PATH
|
||||||
|
export EXE=sherpa-onnx-offline
|
||||||
|
|
||||||
|
.github/scripts/test-offline-ctc.sh
|
||||||
|
du -h -d1 .
|
||||||
|
|
||||||
- name: Test offline speech denoiser
|
- name: Test offline speech denoiser
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@@ -249,16 +259,6 @@ jobs:
|
|||||||
.github/scripts/test-offline-moonshine.sh
|
.github/scripts/test-offline-moonshine.sh
|
||||||
du -h -d1 .
|
du -h -d1 .
|
||||||
|
|
||||||
- name: Test offline CTC
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
du -h -d1 .
|
|
||||||
export PATH=$PWD/build/bin:$PATH
|
|
||||||
export EXE=sherpa-onnx-offline
|
|
||||||
|
|
||||||
.github/scripts/test-offline-ctc.sh
|
|
||||||
du -h -d1 .
|
|
||||||
|
|
||||||
- name: Test C++ API
|
- name: Test C++ API
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
16
.github/workflows/macos.yaml
vendored
16
.github/workflows/macos.yaml
vendored
@@ -162,6 +162,14 @@ jobs:
|
|||||||
overwrite: true
|
overwrite: true
|
||||||
file: sherpa-onnx-*osx-universal2*.tar.bz2
|
file: sherpa-onnx-*osx-universal2*.tar.bz2
|
||||||
|
|
||||||
|
- name: Test offline CTC
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
export PATH=$PWD/build/bin:$PATH
|
||||||
|
export EXE=sherpa-onnx-offline
|
||||||
|
|
||||||
|
.github/scripts/test-offline-ctc.sh
|
||||||
|
|
||||||
- name: Test offline speech denoiser
|
- name: Test offline speech denoiser
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@@ -226,14 +234,6 @@ jobs:
|
|||||||
|
|
||||||
.github/scripts/test-online-punctuation.sh
|
.github/scripts/test-online-punctuation.sh
|
||||||
|
|
||||||
- name: Test offline CTC
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
export PATH=$PWD/build/bin:$PATH
|
|
||||||
export EXE=sherpa-onnx-offline
|
|
||||||
|
|
||||||
.github/scripts/test-offline-ctc.sh
|
|
||||||
|
|
||||||
- name: Test online CTC
|
- name: Test online CTC
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
@@ -1,3 +1,7 @@
|
|||||||
|
if (CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
|
||||||
|
set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
|
||||||
|
endif()
|
||||||
|
|
||||||
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
|
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
|
||||||
|
|
||||||
set(CMAKE_OSX_DEPLOYMENT_TARGET "10.14" CACHE STRING "Minimum OS X deployment version. Used only for macOS")
|
set(CMAKE_OSX_DEPLOYMENT_TARGET "10.14" CACHE STRING "Minimum OS X deployment version. Used only for macOS")
|
||||||
|
|||||||
69
python-api-examples/offline-dolphin-ctc-decode-files.py
Executable file
69
python-api-examples/offline-dolphin-ctc-decode-files.py
Executable file
@@ -0,0 +1,69 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
"""
|
||||||
|
This file shows how to use a non-streaming CTC model from Dolphin
|
||||||
|
to decode files.
|
||||||
|
|
||||||
|
Please download model files from
|
||||||
|
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
"""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
import time
|
||||||
|
|
||||||
|
import sherpa_onnx
|
||||||
|
import soundfile as sf
|
||||||
|
|
||||||
|
|
||||||
|
def create_recognizer():
|
||||||
|
model = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx"
|
||||||
|
tokens = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt"
|
||||||
|
test_wav = (
|
||||||
|
"./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav"
|
||||||
|
)
|
||||||
|
|
||||||
|
if not Path(model).is_file() or not Path(test_wav).is_file():
|
||||||
|
raise ValueError(
|
||||||
|
"""Please download model files from
|
||||||
|
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
return (
|
||||||
|
sherpa_onnx.OfflineRecognizer.from_dolphin_ctc(
|
||||||
|
model=model,
|
||||||
|
tokens=tokens,
|
||||||
|
debug=True,
|
||||||
|
),
|
||||||
|
test_wav,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
recognizer, wave_filename = create_recognizer()
|
||||||
|
|
||||||
|
audio, sample_rate = sf.read(wave_filename, dtype="float32", always_2d=True)
|
||||||
|
audio = audio[:, 0] # only use the first channel
|
||||||
|
|
||||||
|
# audio is a 1-D float32 numpy array normalized to the range [-1, 1]
|
||||||
|
# sample_rate does not need to be 16000 Hz
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
stream = recognizer.create_stream()
|
||||||
|
stream.accept_waveform(sample_rate, audio)
|
||||||
|
recognizer.decode_stream(stream)
|
||||||
|
end = time.time()
|
||||||
|
|
||||||
|
print(wave_filename)
|
||||||
|
print(stream.result)
|
||||||
|
|
||||||
|
elapsed_seconds = end - start
|
||||||
|
audio_duration = len(audio) / sample_rate
|
||||||
|
real_time_factor = elapsed_seconds / audio_duration
|
||||||
|
|
||||||
|
print(f"Elapsed seconds: {elapsed_seconds:.3f}")
|
||||||
|
print(f"Audio duration in seconds: {audio_duration:.3f}")
|
||||||
|
print(f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -27,6 +27,8 @@ set(sources
|
|||||||
offline-ctc-fst-decoder.cc
|
offline-ctc-fst-decoder.cc
|
||||||
offline-ctc-greedy-search-decoder.cc
|
offline-ctc-greedy-search-decoder.cc
|
||||||
offline-ctc-model.cc
|
offline-ctc-model.cc
|
||||||
|
offline-dolphin-model-config.cc
|
||||||
|
offline-dolphin-model.cc
|
||||||
offline-fire-red-asr-greedy-search-decoder.cc
|
offline-fire-red-asr-greedy-search-decoder.cc
|
||||||
offline-fire-red-asr-model-config.cc
|
offline-fire-red-asr-model-config.cc
|
||||||
offline-fire-red-asr-model.cc
|
offline-fire-red-asr-model.cc
|
||||||
|
|||||||
@@ -20,6 +20,7 @@
|
|||||||
|
|
||||||
#include "sherpa-onnx/csrc/file-utils.h"
|
#include "sherpa-onnx/csrc/file-utils.h"
|
||||||
#include "sherpa-onnx/csrc/macros.h"
|
#include "sherpa-onnx/csrc/macros.h"
|
||||||
|
#include "sherpa-onnx/csrc/offline-dolphin-model.h"
|
||||||
#include "sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.h"
|
#include "sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.h"
|
||||||
#include "sherpa-onnx/csrc/offline-tdnn-ctc-model.h"
|
#include "sherpa-onnx/csrc/offline-tdnn-ctc-model.h"
|
||||||
#include "sherpa-onnx/csrc/offline-telespeech-ctc-model.h"
|
#include "sherpa-onnx/csrc/offline-telespeech-ctc-model.h"
|
||||||
@@ -110,6 +111,10 @@ static ModelType GetModelType(char *model_data, size_t model_data_length,
|
|||||||
|
|
||||||
std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
|
std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
|
||||||
const OfflineModelConfig &config) {
|
const OfflineModelConfig &config) {
|
||||||
|
if (!config.dolphin.model.empty()) {
|
||||||
|
return std::make_unique<OfflineDolphinModel>(config);
|
||||||
|
}
|
||||||
|
|
||||||
// TODO(fangjun): Refactor it. We don't need to use model_type here
|
// TODO(fangjun): Refactor it. We don't need to use model_type here
|
||||||
ModelType model_type = ModelType::kUnknown;
|
ModelType model_type = ModelType::kUnknown;
|
||||||
|
|
||||||
@@ -160,6 +165,10 @@ std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
|
|||||||
template <typename Manager>
|
template <typename Manager>
|
||||||
std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
|
std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
|
||||||
Manager *mgr, const OfflineModelConfig &config) {
|
Manager *mgr, const OfflineModelConfig &config) {
|
||||||
|
if (!config.dolphin.model.empty()) {
|
||||||
|
return std::make_unique<OfflineDolphinModel>(mgr, config);
|
||||||
|
}
|
||||||
|
|
||||||
// TODO(fangjun): Refactor it. We don't need to use model_type here
|
// TODO(fangjun): Refactor it. We don't need to use model_type here
|
||||||
ModelType model_type = ModelType::kUnknown;
|
ModelType model_type = ModelType::kUnknown;
|
||||||
|
|
||||||
|
|||||||
@@ -64,6 +64,10 @@ class OfflineCtcModel {
|
|||||||
// return true for models from https://github.com/salute-developers/GigaAM
|
// return true for models from https://github.com/salute-developers/GigaAM
|
||||||
// return false otherwise
|
// return false otherwise
|
||||||
virtual bool IsGigaAM() const { return false; }
|
virtual bool IsGigaAM() const { return false; }
|
||||||
|
|
||||||
|
// For Dolphin models, they use global CMVN
|
||||||
|
virtual void NormalizeFeatures(float *features, int32_t num_frames,
|
||||||
|
int32_t feat_dim) const {}
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace sherpa_onnx
|
} // namespace sherpa_onnx
|
||||||
|
|||||||
35
sherpa-onnx/csrc/offline-dolphin-model-config.cc
Normal file
35
sherpa-onnx/csrc/offline-dolphin-model-config.cc
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
// sherpa-onnx/csrc/offline-dolphin-model-config.cc
|
||||||
|
//
|
||||||
|
// Copyright (c) 2025 Xiaomi Corporation
|
||||||
|
|
||||||
|
#include "sherpa-onnx/csrc/offline-dolphin-model-config.h"
|
||||||
|
|
||||||
|
#include "sherpa-onnx/csrc/file-utils.h"
|
||||||
|
#include "sherpa-onnx/csrc/macros.h"
|
||||||
|
|
||||||
|
namespace sherpa_onnx {
|
||||||
|
|
||||||
|
void OfflineDolphinModelConfig::Register(ParseOptions *po) {
|
||||||
|
po->Register("dolphin-model", &model,
|
||||||
|
"Path to model.onnx of Dolphin CTC branch.");
|
||||||
|
}
|
||||||
|
|
||||||
|
bool OfflineDolphinModelConfig::Validate() const {
|
||||||
|
if (!FileExists(model)) {
|
||||||
|
SHERPA_ONNX_LOGE("Dolphin model '%s' does not exist", model.c_str());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string OfflineDolphinModelConfig::ToString() const {
|
||||||
|
std::ostringstream os;
|
||||||
|
|
||||||
|
os << "OfflineDolphinModelConfig(";
|
||||||
|
os << "model=\"" << model << "\")";
|
||||||
|
|
||||||
|
return os.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace sherpa_onnx
|
||||||
27
sherpa-onnx/csrc/offline-dolphin-model-config.h
Normal file
27
sherpa-onnx/csrc/offline-dolphin-model-config.h
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
// sherpa-onnx/csrc/offline-dolphin-model-config.h
|
||||||
|
//
|
||||||
|
// Copyright (c) 2025 Xiaomi Corporation
|
||||||
|
#ifndef SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_CONFIG_H_
|
||||||
|
#define SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_CONFIG_H_
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "sherpa-onnx/csrc/parse-options.h"
|
||||||
|
|
||||||
|
namespace sherpa_onnx {
|
||||||
|
|
||||||
|
struct OfflineDolphinModelConfig {
|
||||||
|
std::string model;
|
||||||
|
|
||||||
|
OfflineDolphinModelConfig() = default;
|
||||||
|
explicit OfflineDolphinModelConfig(const std::string &model) : model(model) {}
|
||||||
|
|
||||||
|
void Register(ParseOptions *po);
|
||||||
|
bool Validate() const;
|
||||||
|
|
||||||
|
std::string ToString() const;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace sherpa_onnx
|
||||||
|
|
||||||
|
#endif // SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_CONFIG_H_
|
||||||
21
sherpa-onnx/csrc/offline-dolphin-model-meta-data.h
Normal file
21
sherpa-onnx/csrc/offline-dolphin-model-meta-data.h
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
// sherpa-onnx/csrc/offline-dolphin-model-meta-data.h
|
||||||
|
//
|
||||||
|
// Copyright (c) 2024 Xiaomi Corporation
|
||||||
|
#ifndef SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_META_DATA_H_
|
||||||
|
#define SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_META_DATA_H_
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace sherpa_onnx {
|
||||||
|
|
||||||
|
struct OfflineDolphinModelMetaData {
|
||||||
|
int32_t vocab_size;
|
||||||
|
int32_t subsampling_factor = 4;
|
||||||
|
std::vector<float> mean;
|
||||||
|
std::vector<float> inv_stddev;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace sherpa_onnx
|
||||||
|
|
||||||
|
#endif // SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_META_DATA_H_
|
||||||
165
sherpa-onnx/csrc/offline-dolphin-model.cc
Normal file
165
sherpa-onnx/csrc/offline-dolphin-model.cc
Normal file
@@ -0,0 +1,165 @@
|
|||||||
|
// sherpa-onnx/csrc/offline-dolphin-model.cc
|
||||||
|
//
|
||||||
|
// Copyright (c) 2025 Xiaomi Corporation
|
||||||
|
|
||||||
|
#include "sherpa-onnx/csrc/offline-dolphin-model.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
#if __ANDROID_API__ >= 9
|
||||||
|
#include "android/asset_manager.h"
|
||||||
|
#include "android/asset_manager_jni.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if __OHOS__
|
||||||
|
#include "rawfile/raw_file_manager.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "sherpa-onnx/csrc/file-utils.h"
|
||||||
|
#include "sherpa-onnx/csrc/macros.h"
|
||||||
|
#include "sherpa-onnx/csrc/onnx-utils.h"
|
||||||
|
#include "sherpa-onnx/csrc/session.h"
|
||||||
|
#include "sherpa-onnx/csrc/text-utils.h"
|
||||||
|
|
||||||
|
namespace sherpa_onnx {
|
||||||
|
|
||||||
|
class OfflineDolphinModel::Impl {
|
||||||
|
public:
|
||||||
|
explicit Impl(const OfflineModelConfig &config)
|
||||||
|
: config_(config),
|
||||||
|
env_(ORT_LOGGING_LEVEL_ERROR),
|
||||||
|
sess_opts_(GetSessionOptions(config)),
|
||||||
|
allocator_{} {
|
||||||
|
auto buf = ReadFile(config_.dolphin.model);
|
||||||
|
Init(buf.data(), buf.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Manager>
|
||||||
|
Impl(Manager *mgr, const OfflineModelConfig &config)
|
||||||
|
: config_(config),
|
||||||
|
env_(ORT_LOGGING_LEVEL_ERROR),
|
||||||
|
sess_opts_(GetSessionOptions(config)),
|
||||||
|
allocator_{} {
|
||||||
|
auto buf = ReadFile(mgr, config_.dolphin.model);
|
||||||
|
Init(buf.data(), buf.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<Ort::Value> Forward(Ort::Value features,
|
||||||
|
Ort::Value features_length) {
|
||||||
|
std::array<Ort::Value, 2> inputs = {
|
||||||
|
std::move(features),
|
||||||
|
std::move(features_length),
|
||||||
|
};
|
||||||
|
|
||||||
|
return sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
|
||||||
|
output_names_ptr_.data(), output_names_ptr_.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t VocabSize() const { return meta_data_.vocab_size; }
|
||||||
|
|
||||||
|
int32_t SubsamplingFactor() const { return meta_data_.subsampling_factor; }
|
||||||
|
|
||||||
|
void NormalizeFeatures(float *features, int32_t num_frames,
|
||||||
|
int32_t feat_dim) const {
|
||||||
|
auto p = features;
|
||||||
|
const auto &mean = meta_data_.mean;
|
||||||
|
const auto &invstd = meta_data_.inv_stddev;
|
||||||
|
|
||||||
|
for (int32_t f = 0; f < num_frames; ++f) {
|
||||||
|
for (int32_t d = 0; d < feat_dim; ++d) {
|
||||||
|
p[d] = (p[d] - mean[d]) * invstd[d];
|
||||||
|
}
|
||||||
|
p += feat_dim;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
OrtAllocator *Allocator() { return allocator_; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
void Init(void *model_data, size_t model_data_length) {
|
||||||
|
sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
|
||||||
|
sess_opts_);
|
||||||
|
|
||||||
|
GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);
|
||||||
|
|
||||||
|
GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);
|
||||||
|
|
||||||
|
// get meta data
|
||||||
|
Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
|
||||||
|
if (config_.debug) {
|
||||||
|
std::ostringstream os;
|
||||||
|
PrintModelMetadata(os, meta_data);
|
||||||
|
#if __OHOS__
|
||||||
|
SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
|
||||||
|
#else
|
||||||
|
SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
Ort::AllocatorWithDefaultOptions allocator; // used in the macro below
|
||||||
|
SHERPA_ONNX_READ_META_DATA(meta_data_.vocab_size, "vocab_size");
|
||||||
|
|
||||||
|
SHERPA_ONNX_READ_META_DATA_VEC_FLOAT(meta_data_.mean, "mean");
|
||||||
|
SHERPA_ONNX_READ_META_DATA_VEC_FLOAT(meta_data_.inv_stddev, "invstd");
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
OfflineModelConfig config_;
|
||||||
|
Ort::Env env_;
|
||||||
|
Ort::SessionOptions sess_opts_;
|
||||||
|
Ort::AllocatorWithDefaultOptions allocator_;
|
||||||
|
|
||||||
|
std::unique_ptr<Ort::Session> sess_;
|
||||||
|
|
||||||
|
std::vector<std::string> input_names_;
|
||||||
|
std::vector<const char *> input_names_ptr_;
|
||||||
|
|
||||||
|
std::vector<std::string> output_names_;
|
||||||
|
std::vector<const char *> output_names_ptr_;
|
||||||
|
|
||||||
|
OfflineDolphinModelMetaData meta_data_;
|
||||||
|
};
|
||||||
|
|
||||||
|
OfflineDolphinModel::OfflineDolphinModel(const OfflineModelConfig &config)
|
||||||
|
: impl_(std::make_unique<Impl>(config)) {}
|
||||||
|
|
||||||
|
template <typename Manager>
|
||||||
|
OfflineDolphinModel::OfflineDolphinModel(Manager *mgr,
|
||||||
|
const OfflineModelConfig &config)
|
||||||
|
: impl_(std::make_unique<Impl>(mgr, config)) {}
|
||||||
|
|
||||||
|
OfflineDolphinModel::~OfflineDolphinModel() = default;
|
||||||
|
|
||||||
|
std::vector<Ort::Value> OfflineDolphinModel::Forward(
|
||||||
|
Ort::Value features, Ort::Value features_length) {
|
||||||
|
return impl_->Forward(std::move(features), std::move(features_length));
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t OfflineDolphinModel::VocabSize() const { return impl_->VocabSize(); }
|
||||||
|
|
||||||
|
int32_t OfflineDolphinModel::SubsamplingFactor() const {
|
||||||
|
return impl_->SubsamplingFactor();
|
||||||
|
}
|
||||||
|
|
||||||
|
void OfflineDolphinModel::NormalizeFeatures(float *features, int32_t num_frames,
|
||||||
|
int32_t feat_dim) const {
|
||||||
|
return impl_->NormalizeFeatures(features, num_frames, feat_dim);
|
||||||
|
}
|
||||||
|
|
||||||
|
OrtAllocator *OfflineDolphinModel::Allocator() const {
|
||||||
|
return impl_->Allocator();
|
||||||
|
}
|
||||||
|
|
||||||
|
#if __ANDROID_API__ >= 9
|
||||||
|
template OfflineDolphinModel::OfflineDolphinModel(
|
||||||
|
AAssetManager *mgr, const OfflineModelConfig &config);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if __OHOS__
|
||||||
|
template OfflineDolphinModel::OfflineDolphinModel(
|
||||||
|
NativeResourceManager *mgr, const OfflineModelConfig &config);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
} // namespace sherpa_onnx
|
||||||
67
sherpa-onnx/csrc/offline-dolphin-model.h
Normal file
67
sherpa-onnx/csrc/offline-dolphin-model.h
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
// sherpa-onnx/csrc/offline-dolphin-model.h
|
||||||
|
//
|
||||||
|
// Copyright (c) 2025 Xiaomi Corporation
|
||||||
|
#ifndef SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_H_
|
||||||
|
#define SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_H_
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "onnxruntime_cxx_api.h" // NOLINT
|
||||||
|
#include "sherpa-onnx/csrc/offline-ctc-model.h"
|
||||||
|
#include "sherpa-onnx/csrc/offline-dolphin-model-meta-data.h"
|
||||||
|
#include "sherpa-onnx/csrc/offline-model-config.h"
|
||||||
|
|
||||||
|
namespace sherpa_onnx {
|
||||||
|
|
||||||
|
class OfflineDolphinModel : public OfflineCtcModel {
|
||||||
|
public:
|
||||||
|
explicit OfflineDolphinModel(const OfflineModelConfig &config);
|
||||||
|
|
||||||
|
template <typename Manager>
|
||||||
|
OfflineDolphinModel(Manager *mgr, const OfflineModelConfig &config);
|
||||||
|
|
||||||
|
~OfflineDolphinModel() override;
|
||||||
|
|
||||||
|
/** Run the forward method of the model.
|
||||||
|
*
|
||||||
|
* @param features A tensor of shape (N, T, C).
|
||||||
|
* @param features_length A 1-D tensor of shape (N,) containing number of
|
||||||
|
* valid frames in `features` before padding.
|
||||||
|
* Its dtype is int64_t.
|
||||||
|
*
|
||||||
|
* @return Return a vector containing:
|
||||||
|
* - log_probs: A 3-D tensor of shape (N, T', vocab_size).
|
||||||
|
* - log_probs_length A 1-D tensor of shape (N,). Its dtype is int64_t
|
||||||
|
*/
|
||||||
|
std::vector<Ort::Value> Forward(Ort::Value features,
|
||||||
|
Ort::Value features_length) override;
|
||||||
|
|
||||||
|
/** Return the vocabulary size of the model
|
||||||
|
*/
|
||||||
|
int32_t VocabSize() const override;
|
||||||
|
|
||||||
|
/** SubsamplingFactor of the model
|
||||||
|
*
|
||||||
|
* For Citrinet, the subsampling factor is usually 4.
|
||||||
|
* For Conformer CTC, the subsampling factor is usually 8.
|
||||||
|
*/
|
||||||
|
int32_t SubsamplingFactor() const override;
|
||||||
|
|
||||||
|
/** Return an allocator for allocating memory
|
||||||
|
*/
|
||||||
|
OrtAllocator *Allocator() const override;
|
||||||
|
|
||||||
|
bool SupportBatchProcessing() const override { return true; }
|
||||||
|
|
||||||
|
void NormalizeFeatures(float *features, int32_t num_frames,
|
||||||
|
int32_t feat_dim) const override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
class Impl;
|
||||||
|
std::unique_ptr<Impl> impl_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace sherpa_onnx
|
||||||
|
|
||||||
|
#endif // SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_H_
|
||||||
@@ -21,6 +21,7 @@ void OfflineModelConfig::Register(ParseOptions *po) {
|
|||||||
wenet_ctc.Register(po);
|
wenet_ctc.Register(po);
|
||||||
sense_voice.Register(po);
|
sense_voice.Register(po);
|
||||||
moonshine.Register(po);
|
moonshine.Register(po);
|
||||||
|
dolphin.Register(po);
|
||||||
|
|
||||||
po->Register("telespeech-ctc", &telespeech_ctc,
|
po->Register("telespeech-ctc", &telespeech_ctc,
|
||||||
"Path to model.onnx for telespeech ctc");
|
"Path to model.onnx for telespeech ctc");
|
||||||
@@ -109,6 +110,10 @@ bool OfflineModelConfig::Validate() const {
|
|||||||
return moonshine.Validate();
|
return moonshine.Validate();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!dolphin.model.empty()) {
|
||||||
|
return dolphin.Validate();
|
||||||
|
}
|
||||||
|
|
||||||
if (!telespeech_ctc.empty() && !FileExists(telespeech_ctc)) {
|
if (!telespeech_ctc.empty() && !FileExists(telespeech_ctc)) {
|
||||||
SHERPA_ONNX_LOGE("telespeech_ctc: '%s' does not exist",
|
SHERPA_ONNX_LOGE("telespeech_ctc: '%s' does not exist",
|
||||||
telespeech_ctc.c_str());
|
telespeech_ctc.c_str());
|
||||||
@@ -136,6 +141,7 @@ std::string OfflineModelConfig::ToString() const {
|
|||||||
os << "wenet_ctc=" << wenet_ctc.ToString() << ", ";
|
os << "wenet_ctc=" << wenet_ctc.ToString() << ", ";
|
||||||
os << "sense_voice=" << sense_voice.ToString() << ", ";
|
os << "sense_voice=" << sense_voice.ToString() << ", ";
|
||||||
os << "moonshine=" << moonshine.ToString() << ", ";
|
os << "moonshine=" << moonshine.ToString() << ", ";
|
||||||
|
os << "dolphin=" << dolphin.ToString() << ", ";
|
||||||
os << "telespeech_ctc=\"" << telespeech_ctc << "\", ";
|
os << "telespeech_ctc=\"" << telespeech_ctc << "\", ";
|
||||||
os << "tokens=\"" << tokens << "\", ";
|
os << "tokens=\"" << tokens << "\", ";
|
||||||
os << "num_threads=" << num_threads << ", ";
|
os << "num_threads=" << num_threads << ", ";
|
||||||
|
|||||||
@@ -6,6 +6,7 @@
|
|||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
#include "sherpa-onnx/csrc/offline-dolphin-model-config.h"
|
||||||
#include "sherpa-onnx/csrc/offline-fire-red-asr-model-config.h"
|
#include "sherpa-onnx/csrc/offline-fire-red-asr-model-config.h"
|
||||||
#include "sherpa-onnx/csrc/offline-moonshine-model-config.h"
|
#include "sherpa-onnx/csrc/offline-moonshine-model-config.h"
|
||||||
#include "sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model-config.h"
|
#include "sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model-config.h"
|
||||||
@@ -30,6 +31,7 @@ struct OfflineModelConfig {
|
|||||||
OfflineWenetCtcModelConfig wenet_ctc;
|
OfflineWenetCtcModelConfig wenet_ctc;
|
||||||
OfflineSenseVoiceModelConfig sense_voice;
|
OfflineSenseVoiceModelConfig sense_voice;
|
||||||
OfflineMoonshineModelConfig moonshine;
|
OfflineMoonshineModelConfig moonshine;
|
||||||
|
OfflineDolphinModelConfig dolphin;
|
||||||
std::string telespeech_ctc;
|
std::string telespeech_ctc;
|
||||||
|
|
||||||
std::string tokens;
|
std::string tokens;
|
||||||
@@ -62,6 +64,7 @@ struct OfflineModelConfig {
|
|||||||
const OfflineWenetCtcModelConfig &wenet_ctc,
|
const OfflineWenetCtcModelConfig &wenet_ctc,
|
||||||
const OfflineSenseVoiceModelConfig &sense_voice,
|
const OfflineSenseVoiceModelConfig &sense_voice,
|
||||||
const OfflineMoonshineModelConfig &moonshine,
|
const OfflineMoonshineModelConfig &moonshine,
|
||||||
|
const OfflineDolphinModelConfig &dolphin,
|
||||||
const std::string &telespeech_ctc,
|
const std::string &telespeech_ctc,
|
||||||
const std::string &tokens, int32_t num_threads, bool debug,
|
const std::string &tokens, int32_t num_threads, bool debug,
|
||||||
const std::string &provider, const std::string &model_type,
|
const std::string &provider, const std::string &model_type,
|
||||||
@@ -77,6 +80,7 @@ struct OfflineModelConfig {
|
|||||||
wenet_ctc(wenet_ctc),
|
wenet_ctc(wenet_ctc),
|
||||||
sense_voice(sense_voice),
|
sense_voice(sense_voice),
|
||||||
moonshine(moonshine),
|
moonshine(moonshine),
|
||||||
|
dolphin(dolphin),
|
||||||
telespeech_ctc(telespeech_ctc),
|
telespeech_ctc(telespeech_ctc),
|
||||||
tokens(tokens),
|
tokens(tokens),
|
||||||
num_threads(num_threads),
|
num_threads(num_threads),
|
||||||
|
|||||||
@@ -118,6 +118,19 @@ class OfflineRecognizerCtcImpl : public OfflineRecognizerImpl {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!config_.model_config.dolphin.model.empty()) {
|
||||||
|
config_.feat_config.low_freq = 0;
|
||||||
|
config_.feat_config.high_freq = 8000;
|
||||||
|
config_.feat_config.remove_dc_offset = false;
|
||||||
|
config_.feat_config.dither = 0;
|
||||||
|
config_.feat_config.preemph_coeff = 0;
|
||||||
|
config_.feat_config.window_type = "hann";
|
||||||
|
config_.feat_config.feature_dim = 80;
|
||||||
|
config_.feat_config.is_librosa = true;
|
||||||
|
config_.feat_config.frame_length_ms = 31.25; // 16000/512 = 31.25
|
||||||
|
config_.feat_config.snip_edges = false;
|
||||||
|
}
|
||||||
|
|
||||||
if (!config_.model_config.wenet_ctc.model.empty()) {
|
if (!config_.model_config.wenet_ctc.model.empty()) {
|
||||||
// WeNet CTC models assume input samples are in the range
|
// WeNet CTC models assume input samples are in the range
|
||||||
// [-32768, 32767], so we set normalize_samples to false
|
// [-32768, 32767], so we set normalize_samples to false
|
||||||
@@ -157,7 +170,7 @@ class OfflineRecognizerCtcImpl : public OfflineRecognizerImpl {
|
|||||||
} else {
|
} else {
|
||||||
SHERPA_ONNX_LOGE("Only greedy_search is supported at present. Given %s",
|
SHERPA_ONNX_LOGE("Only greedy_search is supported at present. Given %s",
|
||||||
config_.decoding_method.c_str());
|
config_.decoding_method.c_str());
|
||||||
exit(-1);
|
SHERPA_ONNX_EXIT(-1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -166,7 +179,7 @@ class OfflineRecognizerCtcImpl : public OfflineRecognizerImpl {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void DecodeStreams(OfflineStream **ss, int32_t n) const override {
|
void DecodeStreams(OfflineStream **ss, int32_t n) const override {
|
||||||
if (!model_->SupportBatchProcessing()) {
|
if (!model_->SupportBatchProcessing() || (n == 1)) {
|
||||||
// If the model does not support batch process,
|
// If the model does not support batch process,
|
||||||
// we process each stream independently.
|
// we process each stream independently.
|
||||||
for (int32_t i = 0; i != n; ++i) {
|
for (int32_t i = 0; i != n; ++i) {
|
||||||
@@ -190,6 +203,9 @@ class OfflineRecognizerCtcImpl : public OfflineRecognizerImpl {
|
|||||||
std::vector<float> f = ss[i]->GetFrames();
|
std::vector<float> f = ss[i]->GetFrames();
|
||||||
|
|
||||||
int32_t num_frames = f.size() / feat_dim;
|
int32_t num_frames = f.size() / feat_dim;
|
||||||
|
|
||||||
|
model_->NormalizeFeatures(f.data(), num_frames, feat_dim);
|
||||||
|
|
||||||
features_vec[i] = std::move(f);
|
features_vec[i] = std::move(f);
|
||||||
|
|
||||||
features_length_vec[i] = num_frames;
|
features_length_vec[i] = num_frames;
|
||||||
@@ -241,6 +257,8 @@ class OfflineRecognizerCtcImpl : public OfflineRecognizerImpl {
|
|||||||
|
|
||||||
int32_t num_frames = f.size() / feat_dim;
|
int32_t num_frames = f.size() / feat_dim;
|
||||||
|
|
||||||
|
model_->NormalizeFeatures(f.data(), num_frames, feat_dim);
|
||||||
|
|
||||||
std::array<int64_t, 3> shape = {1, num_frames, feat_dim};
|
std::array<int64_t, 3> shape = {1, num_frames, feat_dim};
|
||||||
|
|
||||||
Ort::Value x = Ort::Value::CreateTensor(memory_info, f.data(), f.size(),
|
Ort::Value x = Ort::Value::CreateTensor(memory_info, f.data(), f.size(),
|
||||||
|
|||||||
@@ -49,7 +49,8 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
|
|||||||
if (!config.model_config.nemo_ctc.model.empty() ||
|
if (!config.model_config.nemo_ctc.model.empty() ||
|
||||||
!config.model_config.zipformer_ctc.model.empty() ||
|
!config.model_config.zipformer_ctc.model.empty() ||
|
||||||
!config.model_config.tdnn.model.empty() ||
|
!config.model_config.tdnn.model.empty() ||
|
||||||
!config.model_config.wenet_ctc.model.empty()) {
|
!config.model_config.wenet_ctc.model.empty() ||
|
||||||
|
!config.model_config.dolphin.model.empty()) {
|
||||||
return std::make_unique<OfflineRecognizerCtcImpl>(config);
|
return std::make_unique<OfflineRecognizerCtcImpl>(config);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -234,7 +235,8 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
|
|||||||
if (!config.model_config.nemo_ctc.model.empty() ||
|
if (!config.model_config.nemo_ctc.model.empty() ||
|
||||||
!config.model_config.zipformer_ctc.model.empty() ||
|
!config.model_config.zipformer_ctc.model.empty() ||
|
||||||
!config.model_config.tdnn.model.empty() ||
|
!config.model_config.tdnn.model.empty() ||
|
||||||
!config.model_config.wenet_ctc.model.empty()) {
|
!config.model_config.wenet_ctc.model.empty() ||
|
||||||
|
!config.model_config.dolphin.model.empty()) {
|
||||||
return std::make_unique<OfflineRecognizerCtcImpl>(mgr, config);
|
return std::make_unique<OfflineRecognizerCtcImpl>(mgr, config);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -23,9 +23,8 @@ struct OfflineSenseVoiceModelConfig {
|
|||||||
bool use_itn = false;
|
bool use_itn = false;
|
||||||
|
|
||||||
OfflineSenseVoiceModelConfig() = default;
|
OfflineSenseVoiceModelConfig() = default;
|
||||||
explicit OfflineSenseVoiceModelConfig(const std::string &model,
|
OfflineSenseVoiceModelConfig(const std::string &model,
|
||||||
const std::string &language,
|
const std::string &language, bool use_itn)
|
||||||
bool use_itn)
|
|
||||||
: model(model), language(language), use_itn(use_itn) {}
|
: model(model), language(language), use_itn(use_itn) {}
|
||||||
|
|
||||||
void Register(ParseOptions *po);
|
void Register(ParseOptions *po);
|
||||||
|
|||||||
@@ -41,6 +41,9 @@ OnlineRecognizerResult Convert(const OnlineTransducerDecoderResult &src,
|
|||||||
std::string text;
|
std::string text;
|
||||||
for (auto i : src.tokens) {
|
for (auto i : src.tokens) {
|
||||||
auto sym = sym_table[i];
|
auto sym = sym_table[i];
|
||||||
|
if (sym == "<unk>") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
text.append(sym);
|
text.append(sym);
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,8 @@
|
|||||||
#ifndef SHERPA_ONNX_CSRC_RKNN_SILERO_VAD_MODEL_RKNN_H_
|
#ifndef SHERPA_ONNX_CSRC_RKNN_SILERO_VAD_MODEL_RKNN_H_
|
||||||
#define SHERPA_ONNX_CSRC_RKNN_SILERO_VAD_MODEL_RKNN_H_
|
#define SHERPA_ONNX_CSRC_RKNN_SILERO_VAD_MODEL_RKNN_H_
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
#include "rknn_api.h" // NOLINT
|
#include "rknn_api.h" // NOLINT
|
||||||
#include "sherpa-onnx/csrc/online-model-config.h"
|
#include "sherpa-onnx/csrc/online-model-config.h"
|
||||||
#include "sherpa-onnx/csrc/vad-model.h"
|
#include "sherpa-onnx/csrc/vad-model.h"
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ set(srcs
|
|||||||
features.cc
|
features.cc
|
||||||
keyword-spotter.cc
|
keyword-spotter.cc
|
||||||
offline-ctc-fst-decoder-config.cc
|
offline-ctc-fst-decoder-config.cc
|
||||||
|
offline-dolphin-model-config.cc
|
||||||
offline-fire-red-asr-model-config.cc
|
offline-fire-red-asr-model-config.cc
|
||||||
offline-lm-config.cc
|
offline-lm-config.cc
|
||||||
offline-model-config.cc
|
offline-model-config.cc
|
||||||
|
|||||||
23
sherpa-onnx/python/csrc/offline-dolphin-model-config.cc
Normal file
23
sherpa-onnx/python/csrc/offline-dolphin-model-config.cc
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
// sherpa-onnx/python/csrc/offline-dolphin-model-config.cc
|
||||||
|
//
|
||||||
|
// Copyright (c) 2025 Xiaomi Corporation
|
||||||
|
|
||||||
|
#include "sherpa-onnx/csrc/offline-dolphin-model-config.h"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "sherpa-onnx/python/csrc/offline-dolphin-model-config.h"
|
||||||
|
|
||||||
|
namespace sherpa_onnx {
|
||||||
|
|
||||||
|
void PybindOfflineDolphinModelConfig(py::module *m) {
|
||||||
|
using PyClass = OfflineDolphinModelConfig;
|
||||||
|
py::class_<PyClass>(*m, "OfflineDolphinModelConfig")
|
||||||
|
.def(py::init<>())
|
||||||
|
.def(py::init<const std::string &>(), py::arg("model"))
|
||||||
|
.def_readwrite("model", &PyClass::model)
|
||||||
|
.def("__str__", &PyClass::ToString);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace sherpa_onnx
|
||||||
16
sherpa-onnx/python/csrc/offline-dolphin-model-config.h
Normal file
16
sherpa-onnx/python/csrc/offline-dolphin-model-config.h
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
// sherpa-onnx/python/csrc/offline-dolphin-model-config.h
|
||||||
|
//
|
||||||
|
// Copyright (c) 2025 Xiaomi Corporation
|
||||||
|
|
||||||
|
#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_DOLPHIN_MODEL_CONFIG_H_
|
||||||
|
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_DOLPHIN_MODEL_CONFIG_H_
|
||||||
|
|
||||||
|
#include "sherpa-onnx/python/csrc/sherpa-onnx.h"
|
||||||
|
|
||||||
|
namespace sherpa_onnx {
|
||||||
|
|
||||||
|
void PybindOfflineDolphinModelConfig(py::module *m);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_DOLPHIN_MODEL_CONFIG_H_
|
||||||
@@ -8,6 +8,7 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "sherpa-onnx/csrc/offline-model-config.h"
|
#include "sherpa-onnx/csrc/offline-model-config.h"
|
||||||
|
#include "sherpa-onnx/python/csrc/offline-dolphin-model-config.h"
|
||||||
#include "sherpa-onnx/python/csrc/offline-fire-red-asr-model-config.h"
|
#include "sherpa-onnx/python/csrc/offline-fire-red-asr-model-config.h"
|
||||||
#include "sherpa-onnx/python/csrc/offline-moonshine-model-config.h"
|
#include "sherpa-onnx/python/csrc/offline-moonshine-model-config.h"
|
||||||
#include "sherpa-onnx/python/csrc/offline-nemo-enc-dec-ctc-model-config.h"
|
#include "sherpa-onnx/python/csrc/offline-nemo-enc-dec-ctc-model-config.h"
|
||||||
@@ -32,6 +33,7 @@ void PybindOfflineModelConfig(py::module *m) {
|
|||||||
PybindOfflineWenetCtcModelConfig(m);
|
PybindOfflineWenetCtcModelConfig(m);
|
||||||
PybindOfflineSenseVoiceModelConfig(m);
|
PybindOfflineSenseVoiceModelConfig(m);
|
||||||
PybindOfflineMoonshineModelConfig(m);
|
PybindOfflineMoonshineModelConfig(m);
|
||||||
|
PybindOfflineDolphinModelConfig(m);
|
||||||
|
|
||||||
using PyClass = OfflineModelConfig;
|
using PyClass = OfflineModelConfig;
|
||||||
py::class_<PyClass>(*m, "OfflineModelConfig")
|
py::class_<PyClass>(*m, "OfflineModelConfig")
|
||||||
@@ -44,7 +46,8 @@ void PybindOfflineModelConfig(py::module *m) {
|
|||||||
const OfflineZipformerCtcModelConfig &,
|
const OfflineZipformerCtcModelConfig &,
|
||||||
const OfflineWenetCtcModelConfig &,
|
const OfflineWenetCtcModelConfig &,
|
||||||
const OfflineSenseVoiceModelConfig &,
|
const OfflineSenseVoiceModelConfig &,
|
||||||
const OfflineMoonshineModelConfig &, const std::string &,
|
const OfflineMoonshineModelConfig &,
|
||||||
|
const OfflineDolphinModelConfig &, const std::string &,
|
||||||
const std::string &, int32_t, bool, const std::string &,
|
const std::string &, int32_t, bool, const std::string &,
|
||||||
const std::string &, const std::string &,
|
const std::string &, const std::string &,
|
||||||
const std::string &>(),
|
const std::string &>(),
|
||||||
@@ -58,6 +61,7 @@ void PybindOfflineModelConfig(py::module *m) {
|
|||||||
py::arg("wenet_ctc") = OfflineWenetCtcModelConfig(),
|
py::arg("wenet_ctc") = OfflineWenetCtcModelConfig(),
|
||||||
py::arg("sense_voice") = OfflineSenseVoiceModelConfig(),
|
py::arg("sense_voice") = OfflineSenseVoiceModelConfig(),
|
||||||
py::arg("moonshine") = OfflineMoonshineModelConfig(),
|
py::arg("moonshine") = OfflineMoonshineModelConfig(),
|
||||||
|
py::arg("dolphin") = OfflineDolphinModelConfig(),
|
||||||
py::arg("telespeech_ctc") = "", py::arg("tokens"),
|
py::arg("telespeech_ctc") = "", py::arg("tokens"),
|
||||||
py::arg("num_threads"), py::arg("debug") = false,
|
py::arg("num_threads"), py::arg("debug") = false,
|
||||||
py::arg("provider") = "cpu", py::arg("model_type") = "",
|
py::arg("provider") = "cpu", py::arg("model_type") = "",
|
||||||
@@ -72,6 +76,7 @@ void PybindOfflineModelConfig(py::module *m) {
|
|||||||
.def_readwrite("wenet_ctc", &PyClass::wenet_ctc)
|
.def_readwrite("wenet_ctc", &PyClass::wenet_ctc)
|
||||||
.def_readwrite("sense_voice", &PyClass::sense_voice)
|
.def_readwrite("sense_voice", &PyClass::sense_voice)
|
||||||
.def_readwrite("moonshine", &PyClass::moonshine)
|
.def_readwrite("moonshine", &PyClass::moonshine)
|
||||||
|
.def_readwrite("dolphin", &PyClass::dolphin)
|
||||||
.def_readwrite("telespeech_ctc", &PyClass::telespeech_ctc)
|
.def_readwrite("telespeech_ctc", &PyClass::telespeech_ctc)
|
||||||
.def_readwrite("tokens", &PyClass::tokens)
|
.def_readwrite("tokens", &PyClass::tokens)
|
||||||
.def_readwrite("num_threads", &PyClass::num_threads)
|
.def_readwrite("num_threads", &PyClass::num_threads)
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ from typing import List, Optional
|
|||||||
from _sherpa_onnx import (
|
from _sherpa_onnx import (
|
||||||
FeatureExtractorConfig,
|
FeatureExtractorConfig,
|
||||||
OfflineCtcFstDecoderConfig,
|
OfflineCtcFstDecoderConfig,
|
||||||
|
OfflineDolphinModelConfig,
|
||||||
OfflineFireRedAsrModelConfig,
|
OfflineFireRedAsrModelConfig,
|
||||||
OfflineLMConfig,
|
OfflineLMConfig,
|
||||||
OfflineModelConfig,
|
OfflineModelConfig,
|
||||||
@@ -408,6 +409,78 @@ class OfflineRecognizer(object):
|
|||||||
self.config = recognizer_config
|
self.config = recognizer_config
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dolphin_ctc(
|
||||||
|
cls,
|
||||||
|
model: str,
|
||||||
|
tokens: str,
|
||||||
|
num_threads: int = 1,
|
||||||
|
sample_rate: int = 16000,
|
||||||
|
feature_dim: int = 80,
|
||||||
|
decoding_method: str = "greedy_search",
|
||||||
|
debug: bool = False,
|
||||||
|
provider: str = "cpu",
|
||||||
|
rule_fsts: str = "",
|
||||||
|
rule_fars: str = "",
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Please refer to
|
||||||
|
`<https://k2-fsa.github.io/sherpa/onnx/dolphin/index.html>`_
|
||||||
|
to download pre-trained models.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
model:
|
||||||
|
Path to ``model.onnx`` or ``model.int8.onnx``.
|
||||||
|
tokens:
|
||||||
|
Path to ``tokens.txt``. Each line in ``tokens.txt`` contains two
|
||||||
|
columns::
|
||||||
|
|
||||||
|
symbol integer_id
|
||||||
|
|
||||||
|
num_threads:
|
||||||
|
Number of threads for neural network computation.
|
||||||
|
sample_rate:
|
||||||
|
Sample rate of the training data used to train the model.
|
||||||
|
feature_dim:
|
||||||
|
Dimension of the feature used to train the model.
|
||||||
|
decoding_method:
|
||||||
|
Valid values are greedy_search.
|
||||||
|
debug:
|
||||||
|
True to show debug messages.
|
||||||
|
provider:
|
||||||
|
onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
|
||||||
|
rule_fsts:
|
||||||
|
If not empty, it specifies fsts for inverse text normalization.
|
||||||
|
If there are multiple fsts, they are separated by a comma.
|
||||||
|
rule_fars:
|
||||||
|
If not empty, it specifies fst archives for inverse text normalization.
|
||||||
|
If there are multiple archives, they are separated by a comma.
|
||||||
|
"""
|
||||||
|
self = cls.__new__(cls)
|
||||||
|
model_config = OfflineModelConfig(
|
||||||
|
dolphin=OfflineDolphinModelConfig(model=model),
|
||||||
|
tokens=tokens,
|
||||||
|
num_threads=num_threads,
|
||||||
|
debug=debug,
|
||||||
|
provider=provider,
|
||||||
|
)
|
||||||
|
|
||||||
|
feat_config = FeatureExtractorConfig(
|
||||||
|
sampling_rate=sample_rate,
|
||||||
|
feature_dim=feature_dim,
|
||||||
|
)
|
||||||
|
|
||||||
|
recognizer_config = OfflineRecognizerConfig(
|
||||||
|
feat_config=feat_config,
|
||||||
|
model_config=model_config,
|
||||||
|
decoding_method=decoding_method,
|
||||||
|
rule_fsts=rule_fsts,
|
||||||
|
rule_fars=rule_fars,
|
||||||
|
)
|
||||||
|
self.recognizer = _Recognizer(recognizer_config)
|
||||||
|
self.config = recognizer_config
|
||||||
|
return self
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_nemo_ctc(
|
def from_nemo_ctc(
|
||||||
cls,
|
cls,
|
||||||
|
|||||||
Reference in New Issue
Block a user