Add C++ runtime and Python APIs for Moonshine models (#1473)

2024-10-26 14:34:07 +08:00
parent 0f2732e4e8
commit 669f5ef441
33 changed files with 1572 additions and 36 deletions
--- a/sherpa-onnx/csrc/offline-stream.h
+++ b/sherpa-onnx/csrc/offline-stream.h
@@ -34,7 +34,7 @@ struct OfflineRecognitionResult {
  // event target of the audio.
  std::string event;

-    /// timestamps.size() == tokens.size()
+  /// timestamps.size() == tokens.size()
  /// timestamps[i] records the time in seconds when tokens[i] is decoded.
  std::vector<float> timestamps;

@@ -49,6 +49,10 @@ struct WhisperTag {

 struct CEDTag {};

+// It uses a neural network model, a preprocessor, to convert
+// audio samples to features
+struct MoonshineTag {};
+
 class OfflineStream {
 public:
  explicit OfflineStream(const FeatureExtractorConfig &config = {},
@@ -56,6 +60,7 @@ class OfflineStream {

  explicit OfflineStream(WhisperTag tag);
  explicit OfflineStream(CEDTag tag);
+  explicit OfflineStream(MoonshineTag tag);
  ~OfflineStream();

  /**
@@ -72,7 +77,10 @@ class OfflineStream {
  void AcceptWaveform(int32_t sampling_rate, const float *waveform,
                      int32_t n) const;

-  /// Return feature dim of this extractor
+  /// Return feature dim of this extractor.
+  ///
+  /// Note: if it is Moonshine, then it returns the number of audio samples
+  /// currently received.
  int32_t FeatureDim() const;

  // Get all the feature frames of this stream in a 1-D array, which is