Add emotion, event of SenseVoice. (#1257)

* Add emotion, event of SenseVoice.

* Fix tokens size check and update java api.

https://github.com/k2-fsa/sherpa-onnx/pull/1257
This commit is contained in:
Robin Zhong
2024-08-14 15:50:13 +08:00
committed by GitHub
parent f300ec0f98
commit 62c4d4ab62
10 changed files with 95 additions and 4 deletions

View File

@@ -52,6 +52,13 @@ static OfflineRecognitionResult ConvertSenseVoiceResult(
r.words = std::move(src.words);
// parse lang, emotion and event from tokens.
if (src.tokens.size() >= 3) {
r.lang = sym_table[src.tokens[0]];
r.emotion = sym_table[src.tokens[1]];
r.event = sym_table[src.tokens[2]];
}
return r;
}

View File

@@ -304,6 +304,19 @@ const OfflineRecognitionResult &OfflineStream::GetResult() const {
std::string OfflineRecognitionResult::AsJsonString() const {
std::ostringstream os;
os << "{";
os << "\"lang\""
<< ": ";
os << std::quoted(lang) << ", ";
os << "\"emotion\""
<< ": ";
os << std::quoted(emotion) << ", ";
os << "\"event\""
<< ": ";
os << std::quoted(event) << ", ";
os << "\"text\""
<< ": ";
os << std::quoted(text) << ", ";

View File

@@ -28,6 +28,12 @@ struct OfflineRecognitionResult {
std::string lang;
// emotion target of the audio.
std::string emotion;
// event target of the audio.
std::string event;
/// timestamps.size() == tokens.size()
/// timestamps[i] records the time in seconds when tokens[i] is decoded.
std::vector<float> timestamps;