Add endpointing (#54)
This commit is contained in:
91
sherpa-onnx/csrc/endpoint.cc
Normal file
91
sherpa-onnx/csrc/endpoint.cc
Normal file
@@ -0,0 +1,91 @@
|
||||
// sherpa-onnx/csrc/endpoint.cc
|
||||
//
|
||||
// Copyright (c) 2022 (authors: Pingfeng Luo)
|
||||
// 2022-2023 Xiaomi Corporation
|
||||
|
||||
#include "sherpa-onnx/csrc/endpoint.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "sherpa-onnx/csrc/log.h"
|
||||
#include "sherpa-onnx/csrc/parse-options.h"
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
static bool RuleActivated(const EndpointRule &rule,
|
||||
const std::string &rule_name, float trailing_silence,
|
||||
float utterance_length) {
|
||||
bool contain_nonsilence = utterance_length > trailing_silence;
|
||||
bool ans = (contain_nonsilence || !rule.must_contain_nonsilence) &&
|
||||
trailing_silence >= rule.min_trailing_silence &&
|
||||
utterance_length >= rule.min_utterance_length;
|
||||
if (ans) {
|
||||
SHERPA_ONNX_LOG(DEBUG) << "Endpointing rule " << rule_name << " activated: "
|
||||
<< (contain_nonsilence ? "true" : "false") << ','
|
||||
<< trailing_silence << ',' << utterance_length;
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
|
||||
static void RegisterEndpointRule(ParseOptions *po, EndpointRule *rule,
|
||||
const std::string &rule_name) {
|
||||
po->Register(
|
||||
rule_name + "-must-contain-nonsilence", &rule->must_contain_nonsilence,
|
||||
"If True, for this endpointing " + rule_name +
|
||||
" to apply there must be nonsilence in the best-path traceback. "
|
||||
"For decoding, a non-blank token is considered as non-silence");
|
||||
po->Register(rule_name + "-min-trailing-silence", &rule->min_trailing_silence,
|
||||
"This endpointing " + rule_name +
|
||||
" requires duration of trailing silence in seconds) to "
|
||||
"be >= this value.");
|
||||
po->Register(rule_name + "-min-utterance-length", &rule->min_utterance_length,
|
||||
"This endpointing " + rule_name +
|
||||
" requires utterance-length (in seconds) to be >= this "
|
||||
"value.");
|
||||
}
|
||||
|
||||
std::string EndpointRule::ToString() const {
|
||||
std::ostringstream os;
|
||||
|
||||
os << "EndpointRule(";
|
||||
os << "must_contain_nonsilence="
|
||||
<< (must_contain_nonsilence ? "True" : "False") << ", ";
|
||||
os << "min_trailing_silence=" << min_trailing_silence << ", ";
|
||||
os << "min_utterance_length=" << min_utterance_length << ")";
|
||||
|
||||
return os.str();
|
||||
}
|
||||
|
||||
void EndpointConfig::Register(ParseOptions *po) {
|
||||
RegisterEndpointRule(po, &rule1, "rule1");
|
||||
RegisterEndpointRule(po, &rule2, "rule2");
|
||||
RegisterEndpointRule(po, &rule3, "rule3");
|
||||
}
|
||||
|
||||
std::string EndpointConfig::ToString() const {
|
||||
std::ostringstream os;
|
||||
|
||||
os << "EndpointConfig(";
|
||||
os << "rule1=" << rule1.ToString() << ", ";
|
||||
os << "rule2=" << rule2.ToString() << ", ";
|
||||
os << "rule3=" << rule3.ToString() << ")";
|
||||
|
||||
return os.str();
|
||||
}
|
||||
|
||||
bool Endpoint::IsEndpoint(int num_frames_decoded, int trailing_silence_frames,
|
||||
float frame_shift_in_seconds) const {
|
||||
float utterance_length = num_frames_decoded * frame_shift_in_seconds;
|
||||
float trailing_silence = trailing_silence_frames * frame_shift_in_seconds;
|
||||
if (RuleActivated(config_.rule1, "rule1", trailing_silence,
|
||||
utterance_length) ||
|
||||
RuleActivated(config_.rule2, "rule2", trailing_silence,
|
||||
utterance_length) ||
|
||||
RuleActivated(config_.rule3, "rule3", trailing_silence,
|
||||
utterance_length)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
Reference in New Issue
Block a user