Add C/CXX/JavaScript API for NeMo Canary models (#2357)

This PR introduces support for NeMo Canary models across C, C++, and JavaScript APIs 
by adding new Canary configuration structures, updating bindings, extending examples,
and enhancing CI workflows.

- Add OfflineCanaryModelConfig to all language bindings (C, C++, JS, ETS).
- Implement SetConfig methods and NAPI wrappers for updating recognizer config at runtime.
- Update examples and CI scripts to demonstrate and test NeMo Canary model usage.
This commit is contained in:
Fangjun Kuang
2025-07-07 23:38:04 +08:00
committed by GitHub
parent 0e738c356c
commit df4615ca1d
28 changed files with 750 additions and 80 deletions

View File

@@ -487,6 +487,21 @@ static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig(
recognizer_config.model_config.zipformer_ctc.model =
SHERPA_ONNX_OR(config->model_config.zipformer_ctc.model, "");
recognizer_config.model_config.canary.encoder =
SHERPA_ONNX_OR(config->model_config.canary.encoder, "");
recognizer_config.model_config.canary.decoder =
SHERPA_ONNX_OR(config->model_config.canary.decoder, "");
recognizer_config.model_config.canary.src_lang =
SHERPA_ONNX_OR(config->model_config.canary.src_lang, "");
recognizer_config.model_config.canary.tgt_lang =
SHERPA_ONNX_OR(config->model_config.canary.tgt_lang, "");
recognizer_config.model_config.canary.use_pnc =
config->model_config.canary.use_pnc;
recognizer_config.lm_config.model =
SHERPA_ONNX_OR(config->lm_config.model, "");
recognizer_config.lm_config.scale =

View File

@@ -420,6 +420,14 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineWhisperModelConfig {
int32_t tail_paddings;
} SherpaOnnxOfflineWhisperModelConfig;
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineCanaryModelConfig {
const char *encoder;
const char *decoder;
const char *src_lang;
const char *tgt_lang;
int32_t use_pnc;
} SherpaOnnxOfflineCanaryModelConfig;
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineFireRedAsrModelConfig {
const char *encoder;
const char *decoder;
@@ -479,6 +487,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig {
SherpaOnnxOfflineFireRedAsrModelConfig fire_red_asr;
SherpaOnnxOfflineDolphinModelConfig dolphin;
SherpaOnnxOfflineZipformerCtcModelConfig zipformer_ctc;
SherpaOnnxOfflineCanaryModelConfig canary;
} SherpaOnnxOfflineModelConfig;
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig {

View File

@@ -193,7 +193,7 @@ void OfflineStream::AcceptWaveform(int32_t sample_rate, const float *samples,
SherpaOnnxAcceptWaveformOffline(p_, sample_rate, samples, n);
}
OfflineRecognizer OfflineRecognizer::Create(
static SherpaOnnxOfflineRecognizerConfig Convert(
const OfflineRecognizerConfig &config) {
struct SherpaOnnxOfflineRecognizerConfig c;
memset(&c, 0, sizeof(c));
@@ -256,6 +256,12 @@ OfflineRecognizer OfflineRecognizer::Create(
c.model_config.zipformer_ctc.model =
config.model_config.zipformer_ctc.model.c_str();
c.model_config.canary.encoder = config.model_config.canary.encoder.c_str();
c.model_config.canary.decoder = config.model_config.canary.decoder.c_str();
c.model_config.canary.src_lang = config.model_config.canary.src_lang.c_str();
c.model_config.canary.tgt_lang = config.model_config.canary.tgt_lang.c_str();
c.model_config.canary.use_pnc = config.model_config.canary.use_pnc;
c.lm_config.model = config.lm_config.model.c_str();
c.lm_config.scale = config.lm_config.scale;
@@ -273,10 +279,22 @@ OfflineRecognizer OfflineRecognizer::Create(
c.hr.lexicon = config.hr.lexicon.c_str();
c.hr.rule_fsts = config.hr.rule_fsts.c_str();
return c;
}
OfflineRecognizer OfflineRecognizer::Create(
const OfflineRecognizerConfig &config) {
auto c = Convert(config);
auto p = SherpaOnnxCreateOfflineRecognizer(&c);
return OfflineRecognizer(p);
}
void OfflineRecognizer::SetConfig(const OfflineRecognizerConfig &config) const {
auto c = Convert(config);
SherpaOnnxOfflineRecognizerSetConfig(p_, &c);
}
OfflineRecognizer::OfflineRecognizer(const SherpaOnnxOfflineRecognizer *p)
: MoveOnly<OfflineRecognizer, SherpaOnnxOfflineRecognizer>(p) {}

View File

@@ -223,6 +223,14 @@ struct SHERPA_ONNX_API OfflineWhisperModelConfig {
int32_t tail_paddings = -1;
};
struct SHERPA_ONNX_API OfflineCanaryModelConfig {
std::string encoder;
std::string decoder;
std::string src_lang;
std::string tgt_lang;
bool use_pnc = true;
};
struct SHERPA_ONNX_API OfflineFireRedAsrModelConfig {
std::string encoder;
std::string decoder;
@@ -273,6 +281,7 @@ struct SHERPA_ONNX_API OfflineModelConfig {
OfflineFireRedAsrModelConfig fire_red_asr;
OfflineDolphinModelConfig dolphin;
OfflineZipformerCtcModelConfig zipformer_ctc;
OfflineCanaryModelConfig canary;
};
struct SHERPA_ONNX_API OfflineLMConfig {
@@ -335,6 +344,8 @@ class SHERPA_ONNX_API OfflineRecognizer
OfflineRecognizerResult GetResult(const OfflineStream *s) const;
void SetConfig(const OfflineRecognizerConfig &config) const;
private:
explicit OfflineRecognizer(const SherpaOnnxOfflineRecognizer *p);
};

View File

@@ -45,7 +45,7 @@ Usage:
./bin/sherpa-onnx \
--debug=1 \
--zipformer2-ctc-model=./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx \
--zipformer2-ctc-model=./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx \
--tokens=./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt \
./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000000.wav \
./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000001.wav \