Support replacing homonphonic phrases (#2153)

This commit is contained in:
Fangjun Kuang
2025-04-27 15:31:11 +08:00
committed by GitHub
parent e3280027f9
commit f64c58342b
42 changed files with 834 additions and 134 deletions

View File

@@ -408,6 +408,8 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
OfflineRecognizerImpl::OfflineRecognizerImpl(
const OfflineRecognizerConfig &config)
: config_(config) {
// TODO(fangjun): Refactor this function
if (!config.rule_fsts.empty()) {
std::vector<std::string> files;
SplitStringToVector(config.rule_fsts, ",", false, &files);
@@ -448,6 +450,13 @@ OfflineRecognizerImpl::OfflineRecognizerImpl(
SHERPA_ONNX_LOGE("FST archives loaded!");
}
}
if (!config.hr.dict_dir.empty() && !config.hr.lexicon.empty() &&
!config.hr.rule_fsts.empty()) {
auto hr_config = config.hr;
hr_config.debug = config.model_config.debug;
hr_ = std::make_unique<HomophoneReplacer>(hr_config);
}
}
template <typename Manager>
@@ -495,6 +504,13 @@ OfflineRecognizerImpl::OfflineRecognizerImpl(
} // for (; !reader->Done(); reader->Next())
} // for (const auto &f : files)
} // if (!config.rule_fars.empty())
if (!config.hr.dict_dir.empty() && !config.hr.lexicon.empty() &&
!config.hr.rule_fsts.empty()) {
auto hr_config = config.hr;
hr_config.debug = config.model_config.debug;
hr_ = std::make_unique<HomophoneReplacer>(mgr, hr_config);
}
}
std::string OfflineRecognizerImpl::ApplyInverseTextNormalization(
@@ -510,6 +526,15 @@ std::string OfflineRecognizerImpl::ApplyInverseTextNormalization(
return text;
}
std::string OfflineRecognizerImpl::ApplyHomophoneReplacer(
std::string text) const {
if (hr_) {
text = hr_->Apply(text);
}
return text;
}
void OfflineRecognizerImpl::SetConfig(const OfflineRecognizerConfig &config) {
config_ = config;
}