diff --git a/.github/workflows/c-api.yaml b/.github/workflows/c-api.yaml index 1d0accb3..5baa448e 100644 --- a/.github/workflows/c-api.yaml +++ b/.github/workflows/c-api.yaml @@ -79,6 +79,36 @@ jobs: otool -L ./install/lib/libsherpa-onnx-c-api.dylib fi + - name: Test Dolphin CTC + shell: bash + run: | + name=dolphin-ctc-c-api + gcc -o $name ./c-api-examples/$name.c \ + -I ./build/install/include \ + -L ./build/install/lib/ \ + -l sherpa-onnx-c-api \ + -l onnxruntime + + ls -lh $name + + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then + ldd ./$name + echo "----" + readelf -d ./$name + fi + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02 + tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 + rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 + + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH + + ./$name + + rm $name + rm -rf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02 + - name: Test speech enhancement (GTCRN) shell: bash run: | diff --git a/.github/workflows/cxx-api.yaml b/.github/workflows/cxx-api.yaml index 6e1e20f2..d88e45fd 100644 --- a/.github/workflows/cxx-api.yaml +++ b/.github/workflows/cxx-api.yaml @@ -81,6 +81,38 @@ jobs: otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib fi + - name: Test Dolphin CTC + shell: bash + run: | + name=dolphin-ctc-cxx-api + g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \ + -I ./build/install/include \ + -L ./build/install/lib/ \ + -l sherpa-onnx-cxx-api \ + -l sherpa-onnx-c-api \ + -l onnxruntime + + ls -lh $name + + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH + + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then + ldd ./$name + echo "----" + readelf -d ./$name + fi + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02 + tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 + rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 + + ./$name + + rm -rf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02 + + rm $name + - name: Test VAD shell: bash run: | diff --git a/c-api-examples/dolphin-ctc-c-api.c b/c-api-examples/dolphin-ctc-c-api.c new file mode 100644 index 00000000..b69ad9c5 --- /dev/null +++ b/c-api-examples/dolphin-ctc-c-api.c @@ -0,0 +1,74 @@ +// c-api-examples/dolphin-ctc-c-api.c +// +// Copyright (c) 2025 Xiaomi Corporation + +// +// This file demonstrates how to use Dolphin CTC model with sherpa-onnx's C API. +// clang-format off +// +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02 +// tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 +// rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 +// +// clang-format on + +#include +#include +#include + +#include "sherpa-onnx/c-api/c-api.h" + +int32_t main() { + // clang-format off + const char *wav_filename = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav"; + const char *model_filename = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx"; + const char *tokens_filename = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt"; + // clang-format on + + const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename); + if (wave == NULL) { + fprintf(stderr, "Failed to read %s\n", wav_filename); + return -1; + } + + SherpaOnnxOfflineModelConfig offline_model_config; + memset(&offline_model_config, 0, sizeof(offline_model_config)); + offline_model_config.debug = 1; + offline_model_config.num_threads = 1; + offline_model_config.provider = "cpu"; + offline_model_config.tokens = tokens_filename; + offline_model_config.dolphin.model = model_filename; + + // Recognizer config + SherpaOnnxOfflineRecognizerConfig recognizer_config; + memset(&recognizer_config, 0, sizeof(recognizer_config)); + recognizer_config.decoding_method = "greedy_search"; + recognizer_config.model_config = offline_model_config; + + const SherpaOnnxOfflineRecognizer *recognizer = + SherpaOnnxCreateOfflineRecognizer(&recognizer_config); + + if (recognizer == NULL) { + fprintf(stderr, "Please check your config!\n"); + SherpaOnnxFreeWave(wave); + return -1; + } + + const SherpaOnnxOfflineStream *stream = + SherpaOnnxCreateOfflineStream(recognizer); + + SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples, + wave->num_samples); + SherpaOnnxDecodeOfflineStream(recognizer, stream); + const SherpaOnnxOfflineRecognizerResult *result = + SherpaOnnxGetOfflineStreamResult(stream); + + fprintf(stderr, "Decoded text: %s\n", result->text); + + SherpaOnnxDestroyOfflineRecognizerResult(result); + SherpaOnnxDestroyOfflineStream(stream); + SherpaOnnxDestroyOfflineRecognizer(recognizer); + SherpaOnnxFreeWave(wave); + + return 0; +} diff --git a/cxx-api-examples/CMakeLists.txt b/cxx-api-examples/CMakeLists.txt index 45b01189..4a820649 100644 --- a/cxx-api-examples/CMakeLists.txt +++ b/cxx-api-examples/CMakeLists.txt @@ -24,6 +24,9 @@ target_link_libraries(moonshine-cxx-api sherpa-onnx-cxx-api) add_executable(sense-voice-cxx-api ./sense-voice-cxx-api.cc) target_link_libraries(sense-voice-cxx-api sherpa-onnx-cxx-api) +add_executable(dolphin-ctc-cxx-api ./dolphin-ctc-cxx-api.cc) +target_link_libraries(dolphin-ctc-cxx-api sherpa-onnx-cxx-api) + add_executable(vad-cxx-api ./vad-cxx-api.cc) target_link_libraries(vad-cxx-api sherpa-onnx-cxx-api) diff --git a/cxx-api-examples/dolphin-ctc-cxx-api.cc b/cxx-api-examples/dolphin-ctc-cxx-api.cc new file mode 100644 index 00000000..23cbc432 --- /dev/null +++ b/cxx-api-examples/dolphin-ctc-cxx-api.cc @@ -0,0 +1,76 @@ +// cxx-api-examples/dolphin-ctc-cxx-api.cc +// Copyright (c) 2025 Xiaomi Corporation + +// +// This file demonstrates how to use Dolphini CTC model with sherpa-onnx's C++ +// API. +// +// clang-format off +// +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02 +// tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 +// rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 +// +// clang-format on + +#include // NOLINT +#include +#include + +#include "sherpa-onnx/c-api/cxx-api.h" + +int32_t main() { + using namespace sherpa_onnx::cxx; // NOLINT + OfflineRecognizerConfig config; + + // clang-format off + config.model_config.dolphin.model = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx"; + config.model_config.tokens = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt"; + + std::string wave_filename = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav"; + // clang-format on + + config.model_config.num_threads = 1; + + std::cout << "Loading model\n"; + OfflineRecognizer recongizer = OfflineRecognizer::Create(config); + if (!recongizer.Get()) { + std::cerr << "Please check your config\n"; + return -1; + } + std::cout << "Loading model done\n"; + + Wave wave = ReadWave(wave_filename); + if (wave.samples.empty()) { + std::cerr << "Failed to read: '" << wave_filename << "'\n"; + return -1; + } + + std::cout << "Start recognition\n"; + const auto begin = std::chrono::steady_clock::now(); + + OfflineStream stream = recongizer.CreateStream(); + stream.AcceptWaveform(wave.sample_rate, wave.samples.data(), + wave.samples.size()); + + recongizer.Decode(&stream); + + OfflineRecognizerResult result = recongizer.GetResult(&stream); + + const auto end = std::chrono::steady_clock::now(); + const float elapsed_seconds = + std::chrono::duration_cast(end - begin) + .count() / + 1000.; + float duration = wave.samples.size() / static_cast(wave.sample_rate); + float rtf = elapsed_seconds / duration; + + std::cout << "text: " << result.text << "\n"; + printf("Number of threads: %d\n", config.model_config.num_threads); + printf("Duration: %.3fs\n", duration); + printf("Elapsed seconds: %.3fs\n", elapsed_seconds); + printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds, + duration, rtf); + + return 0; +} diff --git a/scripts/apk/generate-asr-2pass-apk-script.py b/scripts/apk/generate-asr-2pass-apk-script.py index 138da81e..4731dde0 100755 --- a/scripts/apk/generate-asr-2pass-apk-script.py +++ b/scripts/apk/generate-asr-2pass-apk-script.py @@ -333,7 +333,6 @@ def get_1st_models(): rm -f bpe.model rm -rf test_wavs - rm README.md ls -lh @@ -354,7 +353,6 @@ def get_1st_models(): rm -f bpe.model rm -rf test_wavs - rm README.md ls -lh diff --git a/scripts/apk/generate-asr-apk-script.py b/scripts/apk/generate-asr-apk-script.py index e8db919a..9f9c50f1 100755 --- a/scripts/apk/generate-asr-apk-script.py +++ b/scripts/apk/generate-asr-apk-script.py @@ -277,7 +277,6 @@ def get_models(): rm -f bpe.model rm -rf test_wavs - rm README.md ls -lh @@ -298,7 +297,6 @@ def get_models(): rm -f bpe.model rm -rf test_wavs - rm README.md ls -lh diff --git a/scripts/apk/generate-vad-asr-apk-script.py b/scripts/apk/generate-vad-asr-apk-script.py index e8e0a9d9..aed57ed9 100755 --- a/scripts/apk/generate-vad-asr-apk-script.py +++ b/scripts/apk/generate-vad-asr-apk-script.py @@ -448,7 +448,7 @@ def get_models(): idx=25, lang="multi_lang", lang2="multi_lang", - short_name="multi_lang", + short_name="dolphin_base_ctc", cmd=""" pushd $model_name diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index ec2a9374..580c9e80 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -467,6 +467,9 @@ static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig( recognizer_config.model_config.fire_red_asr.decoder = SHERPA_ONNX_OR(config->model_config.fire_red_asr.decoder, ""); + recognizer_config.model_config.dolphin.model = + SHERPA_ONNX_OR(config->model_config.dolphin.model, ""); + recognizer_config.lm_config.model = SHERPA_ONNX_OR(config->lm_config.model, ""); recognizer_config.lm_config.scale = diff --git a/sherpa-onnx/c-api/c-api.h b/sherpa-onnx/c-api/c-api.h index 027364b9..83847ab8 100644 --- a/sherpa-onnx/c-api/c-api.h +++ b/sherpa-onnx/c-api/c-api.h @@ -416,6 +416,10 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineSenseVoiceModelConfig { int32_t use_itn; } SherpaOnnxOfflineSenseVoiceModelConfig; +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineDolphinModelConfig { + const char *model; +} SherpaOnnxOfflineDolphinModelConfig; + SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig { SherpaOnnxOfflineTransducerModelConfig transducer; SherpaOnnxOfflineParaformerModelConfig paraformer; @@ -438,6 +442,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig { SherpaOnnxOfflineSenseVoiceModelConfig sense_voice; SherpaOnnxOfflineMoonshineModelConfig moonshine; SherpaOnnxOfflineFireRedAsrModelConfig fire_red_asr; + SherpaOnnxOfflineDolphinModelConfig dolphin; } SherpaOnnxOfflineModelConfig; SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig { diff --git a/sherpa-onnx/c-api/cxx-api.cc b/sherpa-onnx/c-api/cxx-api.cc index 45d30fa8..0f818b9f 100644 --- a/sherpa-onnx/c-api/cxx-api.cc +++ b/sherpa-onnx/c-api/cxx-api.cc @@ -246,6 +246,8 @@ OfflineRecognizer OfflineRecognizer::Create( c.model_config.fire_red_asr.decoder = config.model_config.fire_red_asr.decoder.c_str(); + c.model_config.dolphin.model = config.model_config.dolphin.model.c_str(); + c.lm_config.model = config.lm_config.model.c_str(); c.lm_config.scale = config.lm_config.scale; diff --git a/sherpa-onnx/c-api/cxx-api.h b/sherpa-onnx/c-api/cxx-api.h index 94248996..f1b1f040 100644 --- a/sherpa-onnx/c-api/cxx-api.h +++ b/sherpa-onnx/c-api/cxx-api.h @@ -229,6 +229,10 @@ struct SHERPA_ONNX_API OfflineSenseVoiceModelConfig { bool use_itn = false; }; +struct SHERPA_ONNX_API OfflineDolphinModelConfig { + std::string model; +}; + struct SHERPA_ONNX_API OfflineMoonshineModelConfig { std::string preprocessor; std::string encoder; @@ -254,6 +258,7 @@ struct SHERPA_ONNX_API OfflineModelConfig { OfflineSenseVoiceModelConfig sense_voice; OfflineMoonshineModelConfig moonshine; OfflineFireRedAsrModelConfig fire_red_asr; + OfflineDolphinModelConfig dolphin; }; struct SHERPA_ONNX_API OfflineLMConfig {