Add C++ and Python API for Dolphin CTC models (#2085)
This commit is contained in:
@@ -27,6 +27,8 @@ set(sources
|
||||
offline-ctc-fst-decoder.cc
|
||||
offline-ctc-greedy-search-decoder.cc
|
||||
offline-ctc-model.cc
|
||||
offline-dolphin-model-config.cc
|
||||
offline-dolphin-model.cc
|
||||
offline-fire-red-asr-greedy-search-decoder.cc
|
||||
offline-fire-red-asr-model-config.cc
|
||||
offline-fire-red-asr-model.cc
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
|
||||
#include "sherpa-onnx/csrc/file-utils.h"
|
||||
#include "sherpa-onnx/csrc/macros.h"
|
||||
#include "sherpa-onnx/csrc/offline-dolphin-model.h"
|
||||
#include "sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.h"
|
||||
#include "sherpa-onnx/csrc/offline-tdnn-ctc-model.h"
|
||||
#include "sherpa-onnx/csrc/offline-telespeech-ctc-model.h"
|
||||
@@ -110,6 +111,10 @@ static ModelType GetModelType(char *model_data, size_t model_data_length,
|
||||
|
||||
std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
|
||||
const OfflineModelConfig &config) {
|
||||
if (!config.dolphin.model.empty()) {
|
||||
return std::make_unique<OfflineDolphinModel>(config);
|
||||
}
|
||||
|
||||
// TODO(fangjun): Refactor it. We don't need to use model_type here
|
||||
ModelType model_type = ModelType::kUnknown;
|
||||
|
||||
@@ -160,6 +165,10 @@ std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
|
||||
template <typename Manager>
|
||||
std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
|
||||
Manager *mgr, const OfflineModelConfig &config) {
|
||||
if (!config.dolphin.model.empty()) {
|
||||
return std::make_unique<OfflineDolphinModel>(mgr, config);
|
||||
}
|
||||
|
||||
// TODO(fangjun): Refactor it. We don't need to use model_type here
|
||||
ModelType model_type = ModelType::kUnknown;
|
||||
|
||||
|
||||
@@ -64,6 +64,10 @@ class OfflineCtcModel {
|
||||
// return true for models from https://github.com/salute-developers/GigaAM
|
||||
// return false otherwise
|
||||
virtual bool IsGigaAM() const { return false; }
|
||||
|
||||
// For Dolphin models, they use global CMVN
|
||||
virtual void NormalizeFeatures(float *features, int32_t num_frames,
|
||||
int32_t feat_dim) const {}
|
||||
};
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
|
||||
35
sherpa-onnx/csrc/offline-dolphin-model-config.cc
Normal file
35
sherpa-onnx/csrc/offline-dolphin-model-config.cc
Normal file
@@ -0,0 +1,35 @@
|
||||
// sherpa-onnx/csrc/offline-dolphin-model-config.cc
|
||||
//
|
||||
// Copyright (c) 2025 Xiaomi Corporation
|
||||
|
||||
#include "sherpa-onnx/csrc/offline-dolphin-model-config.h"
|
||||
|
||||
#include "sherpa-onnx/csrc/file-utils.h"
|
||||
#include "sherpa-onnx/csrc/macros.h"
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
void OfflineDolphinModelConfig::Register(ParseOptions *po) {
|
||||
po->Register("dolphin-model", &model,
|
||||
"Path to model.onnx of Dolphin CTC branch.");
|
||||
}
|
||||
|
||||
bool OfflineDolphinModelConfig::Validate() const {
|
||||
if (!FileExists(model)) {
|
||||
SHERPA_ONNX_LOGE("Dolphin model '%s' does not exist", model.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::string OfflineDolphinModelConfig::ToString() const {
|
||||
std::ostringstream os;
|
||||
|
||||
os << "OfflineDolphinModelConfig(";
|
||||
os << "model=\"" << model << "\")";
|
||||
|
||||
return os.str();
|
||||
}
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
27
sherpa-onnx/csrc/offline-dolphin-model-config.h
Normal file
27
sherpa-onnx/csrc/offline-dolphin-model-config.h
Normal file
@@ -0,0 +1,27 @@
|
||||
// sherpa-onnx/csrc/offline-dolphin-model-config.h
|
||||
//
|
||||
// Copyright (c) 2025 Xiaomi Corporation
|
||||
#ifndef SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_CONFIG_H_
|
||||
#define SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_CONFIG_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "sherpa-onnx/csrc/parse-options.h"
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
struct OfflineDolphinModelConfig {
|
||||
std::string model;
|
||||
|
||||
OfflineDolphinModelConfig() = default;
|
||||
explicit OfflineDolphinModelConfig(const std::string &model) : model(model) {}
|
||||
|
||||
void Register(ParseOptions *po);
|
||||
bool Validate() const;
|
||||
|
||||
std::string ToString() const;
|
||||
};
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
|
||||
#endif // SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_CONFIG_H_
|
||||
21
sherpa-onnx/csrc/offline-dolphin-model-meta-data.h
Normal file
21
sherpa-onnx/csrc/offline-dolphin-model-meta-data.h
Normal file
@@ -0,0 +1,21 @@
|
||||
// sherpa-onnx/csrc/offline-dolphin-model-meta-data.h
|
||||
//
|
||||
// Copyright (c) 2024 Xiaomi Corporation
|
||||
#ifndef SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_META_DATA_H_
|
||||
#define SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_META_DATA_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
struct OfflineDolphinModelMetaData {
|
||||
int32_t vocab_size;
|
||||
int32_t subsampling_factor = 4;
|
||||
std::vector<float> mean;
|
||||
std::vector<float> inv_stddev;
|
||||
};
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
|
||||
#endif // SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_META_DATA_H_
|
||||
165
sherpa-onnx/csrc/offline-dolphin-model.cc
Normal file
165
sherpa-onnx/csrc/offline-dolphin-model.cc
Normal file
@@ -0,0 +1,165 @@
|
||||
// sherpa-onnx/csrc/offline-dolphin-model.cc
|
||||
//
|
||||
// Copyright (c) 2025 Xiaomi Corporation
|
||||
|
||||
#include "sherpa-onnx/csrc/offline-dolphin-model.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
#include "android/asset_manager.h"
|
||||
#include "android/asset_manager_jni.h"
|
||||
#endif
|
||||
|
||||
#if __OHOS__
|
||||
#include "rawfile/raw_file_manager.h"
|
||||
#endif
|
||||
|
||||
#include "sherpa-onnx/csrc/file-utils.h"
|
||||
#include "sherpa-onnx/csrc/macros.h"
|
||||
#include "sherpa-onnx/csrc/onnx-utils.h"
|
||||
#include "sherpa-onnx/csrc/session.h"
|
||||
#include "sherpa-onnx/csrc/text-utils.h"
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
class OfflineDolphinModel::Impl {
|
||||
public:
|
||||
explicit Impl(const OfflineModelConfig &config)
|
||||
: config_(config),
|
||||
env_(ORT_LOGGING_LEVEL_ERROR),
|
||||
sess_opts_(GetSessionOptions(config)),
|
||||
allocator_{} {
|
||||
auto buf = ReadFile(config_.dolphin.model);
|
||||
Init(buf.data(), buf.size());
|
||||
}
|
||||
|
||||
template <typename Manager>
|
||||
Impl(Manager *mgr, const OfflineModelConfig &config)
|
||||
: config_(config),
|
||||
env_(ORT_LOGGING_LEVEL_ERROR),
|
||||
sess_opts_(GetSessionOptions(config)),
|
||||
allocator_{} {
|
||||
auto buf = ReadFile(mgr, config_.dolphin.model);
|
||||
Init(buf.data(), buf.size());
|
||||
}
|
||||
|
||||
std::vector<Ort::Value> Forward(Ort::Value features,
|
||||
Ort::Value features_length) {
|
||||
std::array<Ort::Value, 2> inputs = {
|
||||
std::move(features),
|
||||
std::move(features_length),
|
||||
};
|
||||
|
||||
return sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
|
||||
output_names_ptr_.data(), output_names_ptr_.size());
|
||||
}
|
||||
|
||||
int32_t VocabSize() const { return meta_data_.vocab_size; }
|
||||
|
||||
int32_t SubsamplingFactor() const { return meta_data_.subsampling_factor; }
|
||||
|
||||
void NormalizeFeatures(float *features, int32_t num_frames,
|
||||
int32_t feat_dim) const {
|
||||
auto p = features;
|
||||
const auto &mean = meta_data_.mean;
|
||||
const auto &invstd = meta_data_.inv_stddev;
|
||||
|
||||
for (int32_t f = 0; f < num_frames; ++f) {
|
||||
for (int32_t d = 0; d < feat_dim; ++d) {
|
||||
p[d] = (p[d] - mean[d]) * invstd[d];
|
||||
}
|
||||
p += feat_dim;
|
||||
}
|
||||
}
|
||||
|
||||
OrtAllocator *Allocator() { return allocator_; }
|
||||
|
||||
private:
|
||||
void Init(void *model_data, size_t model_data_length) {
|
||||
sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
|
||||
sess_opts_);
|
||||
|
||||
GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);
|
||||
|
||||
GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);
|
||||
|
||||
// get meta data
|
||||
Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
|
||||
if (config_.debug) {
|
||||
std::ostringstream os;
|
||||
PrintModelMetadata(os, meta_data);
|
||||
#if __OHOS__
|
||||
SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
|
||||
#else
|
||||
SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
|
||||
#endif
|
||||
}
|
||||
|
||||
Ort::AllocatorWithDefaultOptions allocator; // used in the macro below
|
||||
SHERPA_ONNX_READ_META_DATA(meta_data_.vocab_size, "vocab_size");
|
||||
|
||||
SHERPA_ONNX_READ_META_DATA_VEC_FLOAT(meta_data_.mean, "mean");
|
||||
SHERPA_ONNX_READ_META_DATA_VEC_FLOAT(meta_data_.inv_stddev, "invstd");
|
||||
}
|
||||
|
||||
private:
|
||||
OfflineModelConfig config_;
|
||||
Ort::Env env_;
|
||||
Ort::SessionOptions sess_opts_;
|
||||
Ort::AllocatorWithDefaultOptions allocator_;
|
||||
|
||||
std::unique_ptr<Ort::Session> sess_;
|
||||
|
||||
std::vector<std::string> input_names_;
|
||||
std::vector<const char *> input_names_ptr_;
|
||||
|
||||
std::vector<std::string> output_names_;
|
||||
std::vector<const char *> output_names_ptr_;
|
||||
|
||||
OfflineDolphinModelMetaData meta_data_;
|
||||
};
|
||||
|
||||
OfflineDolphinModel::OfflineDolphinModel(const OfflineModelConfig &config)
|
||||
: impl_(std::make_unique<Impl>(config)) {}
|
||||
|
||||
template <typename Manager>
|
||||
OfflineDolphinModel::OfflineDolphinModel(Manager *mgr,
|
||||
const OfflineModelConfig &config)
|
||||
: impl_(std::make_unique<Impl>(mgr, config)) {}
|
||||
|
||||
OfflineDolphinModel::~OfflineDolphinModel() = default;
|
||||
|
||||
std::vector<Ort::Value> OfflineDolphinModel::Forward(
|
||||
Ort::Value features, Ort::Value features_length) {
|
||||
return impl_->Forward(std::move(features), std::move(features_length));
|
||||
}
|
||||
|
||||
int32_t OfflineDolphinModel::VocabSize() const { return impl_->VocabSize(); }
|
||||
|
||||
int32_t OfflineDolphinModel::SubsamplingFactor() const {
|
||||
return impl_->SubsamplingFactor();
|
||||
}
|
||||
|
||||
void OfflineDolphinModel::NormalizeFeatures(float *features, int32_t num_frames,
|
||||
int32_t feat_dim) const {
|
||||
return impl_->NormalizeFeatures(features, num_frames, feat_dim);
|
||||
}
|
||||
|
||||
OrtAllocator *OfflineDolphinModel::Allocator() const {
|
||||
return impl_->Allocator();
|
||||
}
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
template OfflineDolphinModel::OfflineDolphinModel(
|
||||
AAssetManager *mgr, const OfflineModelConfig &config);
|
||||
#endif
|
||||
|
||||
#if __OHOS__
|
||||
template OfflineDolphinModel::OfflineDolphinModel(
|
||||
NativeResourceManager *mgr, const OfflineModelConfig &config);
|
||||
#endif
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
67
sherpa-onnx/csrc/offline-dolphin-model.h
Normal file
67
sherpa-onnx/csrc/offline-dolphin-model.h
Normal file
@@ -0,0 +1,67 @@
|
||||
// sherpa-onnx/csrc/offline-dolphin-model.h
|
||||
//
|
||||
// Copyright (c) 2025 Xiaomi Corporation
|
||||
#ifndef SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_H_
|
||||
#define SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_H_
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "onnxruntime_cxx_api.h" // NOLINT
|
||||
#include "sherpa-onnx/csrc/offline-ctc-model.h"
|
||||
#include "sherpa-onnx/csrc/offline-dolphin-model-meta-data.h"
|
||||
#include "sherpa-onnx/csrc/offline-model-config.h"
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
class OfflineDolphinModel : public OfflineCtcModel {
|
||||
public:
|
||||
explicit OfflineDolphinModel(const OfflineModelConfig &config);
|
||||
|
||||
template <typename Manager>
|
||||
OfflineDolphinModel(Manager *mgr, const OfflineModelConfig &config);
|
||||
|
||||
~OfflineDolphinModel() override;
|
||||
|
||||
/** Run the forward method of the model.
|
||||
*
|
||||
* @param features A tensor of shape (N, T, C).
|
||||
* @param features_length A 1-D tensor of shape (N,) containing number of
|
||||
* valid frames in `features` before padding.
|
||||
* Its dtype is int64_t.
|
||||
*
|
||||
* @return Return a vector containing:
|
||||
* - log_probs: A 3-D tensor of shape (N, T', vocab_size).
|
||||
* - log_probs_length A 1-D tensor of shape (N,). Its dtype is int64_t
|
||||
*/
|
||||
std::vector<Ort::Value> Forward(Ort::Value features,
|
||||
Ort::Value features_length) override;
|
||||
|
||||
/** Return the vocabulary size of the model
|
||||
*/
|
||||
int32_t VocabSize() const override;
|
||||
|
||||
/** SubsamplingFactor of the model
|
||||
*
|
||||
* For Citrinet, the subsampling factor is usually 4.
|
||||
* For Conformer CTC, the subsampling factor is usually 8.
|
||||
*/
|
||||
int32_t SubsamplingFactor() const override;
|
||||
|
||||
/** Return an allocator for allocating memory
|
||||
*/
|
||||
OrtAllocator *Allocator() const override;
|
||||
|
||||
bool SupportBatchProcessing() const override { return true; }
|
||||
|
||||
void NormalizeFeatures(float *features, int32_t num_frames,
|
||||
int32_t feat_dim) const override;
|
||||
|
||||
private:
|
||||
class Impl;
|
||||
std::unique_ptr<Impl> impl_;
|
||||
};
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
|
||||
#endif // SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_H_
|
||||
@@ -21,6 +21,7 @@ void OfflineModelConfig::Register(ParseOptions *po) {
|
||||
wenet_ctc.Register(po);
|
||||
sense_voice.Register(po);
|
||||
moonshine.Register(po);
|
||||
dolphin.Register(po);
|
||||
|
||||
po->Register("telespeech-ctc", &telespeech_ctc,
|
||||
"Path to model.onnx for telespeech ctc");
|
||||
@@ -109,6 +110,10 @@ bool OfflineModelConfig::Validate() const {
|
||||
return moonshine.Validate();
|
||||
}
|
||||
|
||||
if (!dolphin.model.empty()) {
|
||||
return dolphin.Validate();
|
||||
}
|
||||
|
||||
if (!telespeech_ctc.empty() && !FileExists(telespeech_ctc)) {
|
||||
SHERPA_ONNX_LOGE("telespeech_ctc: '%s' does not exist",
|
||||
telespeech_ctc.c_str());
|
||||
@@ -136,6 +141,7 @@ std::string OfflineModelConfig::ToString() const {
|
||||
os << "wenet_ctc=" << wenet_ctc.ToString() << ", ";
|
||||
os << "sense_voice=" << sense_voice.ToString() << ", ";
|
||||
os << "moonshine=" << moonshine.ToString() << ", ";
|
||||
os << "dolphin=" << dolphin.ToString() << ", ";
|
||||
os << "telespeech_ctc=\"" << telespeech_ctc << "\", ";
|
||||
os << "tokens=\"" << tokens << "\", ";
|
||||
os << "num_threads=" << num_threads << ", ";
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "sherpa-onnx/csrc/offline-dolphin-model-config.h"
|
||||
#include "sherpa-onnx/csrc/offline-fire-red-asr-model-config.h"
|
||||
#include "sherpa-onnx/csrc/offline-moonshine-model-config.h"
|
||||
#include "sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model-config.h"
|
||||
@@ -30,6 +31,7 @@ struct OfflineModelConfig {
|
||||
OfflineWenetCtcModelConfig wenet_ctc;
|
||||
OfflineSenseVoiceModelConfig sense_voice;
|
||||
OfflineMoonshineModelConfig moonshine;
|
||||
OfflineDolphinModelConfig dolphin;
|
||||
std::string telespeech_ctc;
|
||||
|
||||
std::string tokens;
|
||||
@@ -62,6 +64,7 @@ struct OfflineModelConfig {
|
||||
const OfflineWenetCtcModelConfig &wenet_ctc,
|
||||
const OfflineSenseVoiceModelConfig &sense_voice,
|
||||
const OfflineMoonshineModelConfig &moonshine,
|
||||
const OfflineDolphinModelConfig &dolphin,
|
||||
const std::string &telespeech_ctc,
|
||||
const std::string &tokens, int32_t num_threads, bool debug,
|
||||
const std::string &provider, const std::string &model_type,
|
||||
@@ -77,6 +80,7 @@ struct OfflineModelConfig {
|
||||
wenet_ctc(wenet_ctc),
|
||||
sense_voice(sense_voice),
|
||||
moonshine(moonshine),
|
||||
dolphin(dolphin),
|
||||
telespeech_ctc(telespeech_ctc),
|
||||
tokens(tokens),
|
||||
num_threads(num_threads),
|
||||
|
||||
@@ -118,6 +118,19 @@ class OfflineRecognizerCtcImpl : public OfflineRecognizerImpl {
|
||||
}
|
||||
}
|
||||
|
||||
if (!config_.model_config.dolphin.model.empty()) {
|
||||
config_.feat_config.low_freq = 0;
|
||||
config_.feat_config.high_freq = 8000;
|
||||
config_.feat_config.remove_dc_offset = false;
|
||||
config_.feat_config.dither = 0;
|
||||
config_.feat_config.preemph_coeff = 0;
|
||||
config_.feat_config.window_type = "hann";
|
||||
config_.feat_config.feature_dim = 80;
|
||||
config_.feat_config.is_librosa = true;
|
||||
config_.feat_config.frame_length_ms = 31.25; // 16000/512 = 31.25
|
||||
config_.feat_config.snip_edges = false;
|
||||
}
|
||||
|
||||
if (!config_.model_config.wenet_ctc.model.empty()) {
|
||||
// WeNet CTC models assume input samples are in the range
|
||||
// [-32768, 32767], so we set normalize_samples to false
|
||||
@@ -157,7 +170,7 @@ class OfflineRecognizerCtcImpl : public OfflineRecognizerImpl {
|
||||
} else {
|
||||
SHERPA_ONNX_LOGE("Only greedy_search is supported at present. Given %s",
|
||||
config_.decoding_method.c_str());
|
||||
exit(-1);
|
||||
SHERPA_ONNX_EXIT(-1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -166,7 +179,7 @@ class OfflineRecognizerCtcImpl : public OfflineRecognizerImpl {
|
||||
}
|
||||
|
||||
void DecodeStreams(OfflineStream **ss, int32_t n) const override {
|
||||
if (!model_->SupportBatchProcessing()) {
|
||||
if (!model_->SupportBatchProcessing() || (n == 1)) {
|
||||
// If the model does not support batch process,
|
||||
// we process each stream independently.
|
||||
for (int32_t i = 0; i != n; ++i) {
|
||||
@@ -190,6 +203,9 @@ class OfflineRecognizerCtcImpl : public OfflineRecognizerImpl {
|
||||
std::vector<float> f = ss[i]->GetFrames();
|
||||
|
||||
int32_t num_frames = f.size() / feat_dim;
|
||||
|
||||
model_->NormalizeFeatures(f.data(), num_frames, feat_dim);
|
||||
|
||||
features_vec[i] = std::move(f);
|
||||
|
||||
features_length_vec[i] = num_frames;
|
||||
@@ -241,6 +257,8 @@ class OfflineRecognizerCtcImpl : public OfflineRecognizerImpl {
|
||||
|
||||
int32_t num_frames = f.size() / feat_dim;
|
||||
|
||||
model_->NormalizeFeatures(f.data(), num_frames, feat_dim);
|
||||
|
||||
std::array<int64_t, 3> shape = {1, num_frames, feat_dim};
|
||||
|
||||
Ort::Value x = Ort::Value::CreateTensor(memory_info, f.data(), f.size(),
|
||||
|
||||
@@ -49,7 +49,8 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
|
||||
if (!config.model_config.nemo_ctc.model.empty() ||
|
||||
!config.model_config.zipformer_ctc.model.empty() ||
|
||||
!config.model_config.tdnn.model.empty() ||
|
||||
!config.model_config.wenet_ctc.model.empty()) {
|
||||
!config.model_config.wenet_ctc.model.empty() ||
|
||||
!config.model_config.dolphin.model.empty()) {
|
||||
return std::make_unique<OfflineRecognizerCtcImpl>(config);
|
||||
}
|
||||
|
||||
@@ -234,7 +235,8 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
|
||||
if (!config.model_config.nemo_ctc.model.empty() ||
|
||||
!config.model_config.zipformer_ctc.model.empty() ||
|
||||
!config.model_config.tdnn.model.empty() ||
|
||||
!config.model_config.wenet_ctc.model.empty()) {
|
||||
!config.model_config.wenet_ctc.model.empty() ||
|
||||
!config.model_config.dolphin.model.empty()) {
|
||||
return std::make_unique<OfflineRecognizerCtcImpl>(mgr, config);
|
||||
}
|
||||
|
||||
|
||||
@@ -23,9 +23,8 @@ struct OfflineSenseVoiceModelConfig {
|
||||
bool use_itn = false;
|
||||
|
||||
OfflineSenseVoiceModelConfig() = default;
|
||||
explicit OfflineSenseVoiceModelConfig(const std::string &model,
|
||||
const std::string &language,
|
||||
bool use_itn)
|
||||
OfflineSenseVoiceModelConfig(const std::string &model,
|
||||
const std::string &language, bool use_itn)
|
||||
: model(model), language(language), use_itn(use_itn) {}
|
||||
|
||||
void Register(ParseOptions *po);
|
||||
|
||||
@@ -41,6 +41,9 @@ OnlineRecognizerResult Convert(const OnlineTransducerDecoderResult &src,
|
||||
std::string text;
|
||||
for (auto i : src.tokens) {
|
||||
auto sym = sym_table[i];
|
||||
if (sym == "<unk>") {
|
||||
continue;
|
||||
}
|
||||
|
||||
text.append(sym);
|
||||
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
#ifndef SHERPA_ONNX_CSRC_RKNN_SILERO_VAD_MODEL_RKNN_H_
|
||||
#define SHERPA_ONNX_CSRC_RKNN_SILERO_VAD_MODEL_RKNN_H_
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "rknn_api.h" // NOLINT
|
||||
#include "sherpa-onnx/csrc/online-model-config.h"
|
||||
#include "sherpa-onnx/csrc/vad-model.h"
|
||||
|
||||
Reference in New Issue
Block a user