Add C++ example for streaming ASR with SenseVoice. (#2199)
This commit is contained in:
@@ -678,4 +678,42 @@ void VoiceActivityDetector::Flush() const {
|
||||
SherpaOnnxVoiceActivityDetectorFlush(p_);
|
||||
}
|
||||
|
||||
LinearResampler LinearResampler::Create(int32_t samp_rate_in_hz,
|
||||
int32_t samp_rate_out_hz,
|
||||
float filter_cutoff_hz,
|
||||
int32_t num_zeros) {
|
||||
auto p = SherpaOnnxCreateLinearResampler(samp_rate_in_hz, samp_rate_out_hz,
|
||||
filter_cutoff_hz, num_zeros);
|
||||
return LinearResampler(p);
|
||||
}
|
||||
|
||||
LinearResampler::LinearResampler(const SherpaOnnxLinearResampler *p)
|
||||
: MoveOnly<LinearResampler, SherpaOnnxLinearResampler>(p) {}
|
||||
|
||||
void LinearResampler::Destroy(const SherpaOnnxLinearResampler *p) const {
|
||||
SherpaOnnxDestroyLinearResampler(p);
|
||||
}
|
||||
|
||||
void LinearResampler::Reset() const { SherpaOnnxLinearResamplerReset(p_); }
|
||||
|
||||
std::vector<float> LinearResampler::Resample(const float *input,
|
||||
int32_t input_dim,
|
||||
bool flush) const {
|
||||
auto out = SherpaOnnxLinearResamplerResample(p_, input, input_dim, flush);
|
||||
|
||||
std::vector<float> ans{out->samples, out->samples + out->n};
|
||||
|
||||
SherpaOnnxLinearResamplerResampleFree(out);
|
||||
|
||||
return ans;
|
||||
}
|
||||
|
||||
int32_t LinearResampler::GetInputSamplingRate() const {
|
||||
return SherpaOnnxLinearResamplerResampleGetInputSampleRate(p_);
|
||||
}
|
||||
|
||||
int32_t LinearResampler::GetOutputSamplingRate() const {
|
||||
return SherpaOnnxLinearResamplerResampleGetOutputSampleRate(p_);
|
||||
}
|
||||
|
||||
} // namespace sherpa_onnx::cxx
|
||||
|
||||
@@ -111,6 +111,7 @@ SHERPA_ONNX_API bool WriteWave(const std::string &filename, const Wave &wave);
|
||||
template <typename Derived, typename T>
|
||||
class SHERPA_ONNX_API MoveOnly {
|
||||
public:
|
||||
MoveOnly() = default;
|
||||
explicit MoveOnly(const T *p) : p_(p) {}
|
||||
|
||||
~MoveOnly() { Destroy(); }
|
||||
@@ -591,6 +592,28 @@ class SHERPA_ONNX_API VoiceActivityDetector
|
||||
explicit VoiceActivityDetector(const SherpaOnnxVoiceActivityDetector *p);
|
||||
};
|
||||
|
||||
class SHERPA_ONNX_API LinearResampler
|
||||
: public MoveOnly<LinearResampler, SherpaOnnxLinearResampler> {
|
||||
public:
|
||||
LinearResampler() = default;
|
||||
static LinearResampler Create(int32_t samp_rate_in_hz,
|
||||
int32_t samp_rate_out_hz,
|
||||
float filter_cutoff_hz, int32_t num_zeros);
|
||||
|
||||
void Destroy(const SherpaOnnxLinearResampler *p) const;
|
||||
|
||||
void Reset() const;
|
||||
|
||||
std::vector<float> Resample(const float *input, int32_t input_dim,
|
||||
bool flush) const;
|
||||
|
||||
int32_t GetInputSamplingRate() const;
|
||||
int32_t GetOutputSamplingRate() const;
|
||||
|
||||
private:
|
||||
explicit LinearResampler(const SherpaOnnxLinearResampler *p);
|
||||
};
|
||||
|
||||
} // namespace sherpa_onnx::cxx
|
||||
|
||||
#endif // SHERPA_ONNX_C_API_CXX_API_H_
|
||||
|
||||
@@ -166,20 +166,32 @@ class HomophoneReplacer::Impl {
|
||||
}
|
||||
|
||||
// convert words to pronunciations
|
||||
std::vector<std::string> pronunciations;
|
||||
std::vector<std::string> current_words;
|
||||
std::vector<std::string> current_pronunciations;
|
||||
|
||||
for (const auto &w : words) {
|
||||
if (w.size() < 3 ||
|
||||
reinterpret_cast<const uint8_t *>(w.data())[0] < 128) {
|
||||
if (!current_words.empty()) {
|
||||
ans += ApplyImpl(current_words, current_pronunciations);
|
||||
current_words.clear();
|
||||
current_pronunciations.clear();
|
||||
}
|
||||
ans += w;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto p = ConvertWordToPronunciation(w);
|
||||
if (config_.debug) {
|
||||
SHERPA_ONNX_LOGE("%s %s", w.c_str(), p.c_str());
|
||||
}
|
||||
pronunciations.push_back(std::move(p));
|
||||
|
||||
current_words.push_back(w);
|
||||
current_pronunciations.push_back(std::move(p));
|
||||
}
|
||||
|
||||
for (const auto &r : replacer_list_) {
|
||||
ans = r->Normalize(words, pronunciations);
|
||||
// TODO(fangjun): We support only 1 rule fst at present.
|
||||
break;
|
||||
if (!current_words.empty()) {
|
||||
ans += ApplyImpl(current_words, current_pronunciations);
|
||||
}
|
||||
|
||||
if (config_.debug) {
|
||||
@@ -190,6 +202,16 @@ class HomophoneReplacer::Impl {
|
||||
}
|
||||
|
||||
private:
|
||||
std::string ApplyImpl(const std::vector<std::string> &words,
|
||||
const std::vector<std::string> &pronunciations) const {
|
||||
std::string ans;
|
||||
for (const auto &r : replacer_list_) {
|
||||
ans = r->Normalize(words, pronunciations);
|
||||
// TODO(fangjun): We support only 1 rule fst at present.
|
||||
break;
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
std::string ConvertWordToPronunciation(const std::string &word) const {
|
||||
if (word2pron_.count(word)) {
|
||||
return word2pron_.at(word);
|
||||
@@ -239,6 +261,9 @@ class HomophoneReplacer::Impl {
|
||||
}
|
||||
|
||||
while (iss >> p) {
|
||||
if (p.back() > '4') {
|
||||
p.push_back('1');
|
||||
}
|
||||
pron.append(std::move(p));
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user