Add emotion, event of SenseVoice. (#1257)

* Add emotion, event of SenseVoice. * Fix tokens size check and update java api. https://github.com/k2-fsa/sherpa-onnx/pull/1257
2024-08-14 15:50:13 +08:00
parent f300ec0f98
commit 62c4d4ab62
10 changed files with 95 additions and 4 deletions
--- a/sherpa-onnx/csrc/offline-recognizer-sense-voice-impl.h
+++ b/sherpa-onnx/csrc/offline-recognizer-sense-voice-impl.h
@@ -52,6 +52,13 @@ static OfflineRecognitionResult ConvertSenseVoiceResult(

  r.words = std::move(src.words);

+  // parse lang, emotion and event from tokens.
+  if (src.tokens.size() >= 3) {
+    r.lang = sym_table[src.tokens[0]];
+    r.emotion = sym_table[src.tokens[1]];
+    r.event = sym_table[src.tokens[2]];
+  }
+
  return r;
 }

--- a/sherpa-onnx/csrc/offline-stream.cc
+++ b/sherpa-onnx/csrc/offline-stream.cc
@@ -304,6 +304,19 @@ const OfflineRecognitionResult &OfflineStream::GetResult() const {
 std::string OfflineRecognitionResult::AsJsonString() const {
  std::ostringstream os;
  os << "{";
+
+  os << "\"lang\""
+     << ": ";
+  os << std::quoted(lang) << ", ";
+
+  os << "\"emotion\""
+     << ": ";
+  os << std::quoted(emotion) << ", ";
+
+  os << "\"event\""
+     << ": ";
+  os << std::quoted(event) << ", ";
+
  os << "\"text\""
     << ": ";
  os << std::quoted(text) << ", ";
--- a/sherpa-onnx/csrc/offline-stream.h
+++ b/sherpa-onnx/csrc/offline-stream.h
@@ -28,6 +28,12 @@ struct OfflineRecognitionResult {

  std::string lang;

+  // emotion target of the audio.
+  std::string emotion;
+
+  // event target of the audio.
+  std::string event;
+
    /// timestamps.size() == tokens.size()
  /// timestamps[i] records the time in seconds when tokens[i] is decoded.
  std::vector<float> timestamps;