Add inverse text normalization for non-streaming ASR (#1017)
This commit is contained in:
@@ -10,7 +10,7 @@
|
||||
#include "sherpa-onnx/csrc/macros.h"
|
||||
#include "sherpa-onnx/csrc/offline-lm-config.h"
|
||||
#include "sherpa-onnx/csrc/offline-recognizer-impl.h"
|
||||
|
||||
#include "sherpa-onnx/csrc/text-utils.h"
|
||||
namespace sherpa_onnx {
|
||||
|
||||
void OfflineRecognizerConfig::Register(ParseOptions *po) {
|
||||
@@ -44,6 +44,16 @@ void OfflineRecognizerConfig::Register(ParseOptions *po) {
|
||||
po->Register("hotwords-score", &hotwords_score,
|
||||
"The bonus score for each token in context word/phrase. "
|
||||
"Used only when decoding_method is modified_beam_search");
|
||||
|
||||
po->Register(
|
||||
"rule-fsts", &rule_fsts,
|
||||
"If not empty, it specifies fsts for inverse text normalization. "
|
||||
"If there are multiple fsts, they are separated by a comma.");
|
||||
|
||||
po->Register(
|
||||
"rule-fars", &rule_fars,
|
||||
"If not empty, it specifies fst archives for inverse text normalization. "
|
||||
"If there are multiple archives, they are separated by a comma.");
|
||||
}
|
||||
|
||||
bool OfflineRecognizerConfig::Validate() const {
|
||||
@@ -61,7 +71,7 @@ bool OfflineRecognizerConfig::Validate() const {
|
||||
if (!hotwords_file.empty() && decoding_method != "modified_beam_search") {
|
||||
SHERPA_ONNX_LOGE(
|
||||
"Please use --decoding-method=modified_beam_search if you"
|
||||
" provide --hotwords-file. Given --decoding-method=%s",
|
||||
" provide --hotwords-file. Given --decoding-method='%s'",
|
||||
decoding_method.c_str());
|
||||
return false;
|
||||
}
|
||||
@@ -72,6 +82,34 @@ bool OfflineRecognizerConfig::Validate() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!hotwords_file.empty() && !FileExists(hotwords_file)) {
|
||||
SHERPA_ONNX_LOGE("--hotwords-file: '%s' does not exist",
|
||||
hotwords_file.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!rule_fsts.empty()) {
|
||||
std::vector<std::string> files;
|
||||
SplitStringToVector(rule_fsts, ",", false, &files);
|
||||
for (const auto &f : files) {
|
||||
if (!FileExists(f)) {
|
||||
SHERPA_ONNX_LOGE("Rule fst '%s' does not exist. ", f.c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!rule_fars.empty()) {
|
||||
std::vector<std::string> files;
|
||||
SplitStringToVector(rule_fars, ",", false, &files);
|
||||
for (const auto &f : files) {
|
||||
if (!FileExists(f)) {
|
||||
SHERPA_ONNX_LOGE("Rule far '%s' does not exist. ", f.c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return model_config.Validate();
|
||||
}
|
||||
|
||||
@@ -87,7 +125,9 @@ std::string OfflineRecognizerConfig::ToString() const {
|
||||
os << "max_active_paths=" << max_active_paths << ", ";
|
||||
os << "hotwords_file=\"" << hotwords_file << "\", ";
|
||||
os << "hotwords_score=" << hotwords_score << ", ";
|
||||
os << "blank_penalty=" << blank_penalty << ")";
|
||||
os << "blank_penalty=" << blank_penalty << ", ";
|
||||
os << "rule_fsts=\"" << rule_fsts << "\", ";
|
||||
os << "rule_fars=\"" << rule_fars << "\")";
|
||||
|
||||
return os.str();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user