Add C++ example for streaming ASR with SenseVoice. (#2199)

This commit is contained in:
Fangjun Kuang
2025-05-11 00:23:32 +08:00
committed by GitHub
parent fc2121c307
commit 028b8f2718
16 changed files with 514 additions and 60 deletions

View File

@@ -166,20 +166,32 @@ class HomophoneReplacer::Impl {
}
// convert words to pronunciations
std::vector<std::string> pronunciations;
std::vector<std::string> current_words;
std::vector<std::string> current_pronunciations;
for (const auto &w : words) {
if (w.size() < 3 ||
reinterpret_cast<const uint8_t *>(w.data())[0] < 128) {
if (!current_words.empty()) {
ans += ApplyImpl(current_words, current_pronunciations);
current_words.clear();
current_pronunciations.clear();
}
ans += w;
continue;
}
auto p = ConvertWordToPronunciation(w);
if (config_.debug) {
SHERPA_ONNX_LOGE("%s %s", w.c_str(), p.c_str());
}
pronunciations.push_back(std::move(p));
current_words.push_back(w);
current_pronunciations.push_back(std::move(p));
}
for (const auto &r : replacer_list_) {
ans = r->Normalize(words, pronunciations);
// TODO(fangjun): We support only 1 rule fst at present.
break;
if (!current_words.empty()) {
ans += ApplyImpl(current_words, current_pronunciations);
}
if (config_.debug) {
@@ -190,6 +202,16 @@ class HomophoneReplacer::Impl {
}
private:
std::string ApplyImpl(const std::vector<std::string> &words,
const std::vector<std::string> &pronunciations) const {
std::string ans;
for (const auto &r : replacer_list_) {
ans = r->Normalize(words, pronunciations);
// TODO(fangjun): We support only 1 rule fst at present.
break;
}
return ans;
}
std::string ConvertWordToPronunciation(const std::string &word) const {
if (word2pron_.count(word)) {
return word2pron_.at(word);
@@ -239,6 +261,9 @@ class HomophoneReplacer::Impl {
}
while (iss >> p) {
if (p.back() > '4') {
p.push_back('1');
}
pron.append(std::move(p));
}