enginex-mr_series-sherpa-onnx/sherpa-onnx/csrc/features.h

// sherpa-onnx/csrc/features.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_FEATURES_H_
#define SHERPA_ONNX_CSRC_FEATURES_H_

#include <memory>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct FeatureExtractorConfig {
  // Sampling rate used by the feature extractor. If it is different from
  // the sampling rate of the input waveform, we will do resampling inside.
  int32_t sampling_rate = 16000;

  // Feature dimension
  int32_t feature_dim = 80;

  // Set internally by some models, e.g., paraformer sets it to false.
  // This parameter is not exposed to users from the commandline
  // If true, the feature extractor expects inputs to be normalized to
  // the range [-1, 1].
  // If false, we will multiply the inputs by 32768
  bool normalize_samples = true;

  std::string ToString() const;

  void Register(ParseOptions *po);
};

class FeatureExtractor {
 public:
  explicit FeatureExtractor(const FeatureExtractorConfig &config = {});
  ~FeatureExtractor();

  /**
     @param sampling_rate The sampling_rate of the input waveform. If it does
                          not equal to  config.sampling_rate, we will do
                          resampling inside.
     @param waveform Pointer to a 1-D array of size n. It must be normalized to
                     the range [-1, 1].
     @param n Number of entries in waveform
   */
  void AcceptWaveform(int32_t sampling_rate, const float *waveform,
                      int32_t n) const;

  /**
   * InputFinished() tells the class you won't be providing any
   * more waveform.  This will help flush out the last frame or two
   * of features, in the case where snip-edges == false; it also
   * affects the return value of IsLastFrame().
   */
  void InputFinished() const;

  int32_t NumFramesReady() const;

  /** Note: IsLastFrame() will only ever return true if you have called
   * InputFinished() (and this frame is the last frame).
   */
  bool IsLastFrame(int32_t frame) const;

  /** Get n frames starting from the given frame index.
   *
   * @param frame_index  The starting frame index
   * @param n  Number of frames to get.
   * @return Return a 2-D tensor of shape (n, feature_dim).
   *         which is flattened into a 1-D vector (flattened in in row major)
   */
  std::vector<float> GetFrames(int32_t frame_index, int32_t n) const;

  /// Return feature dim of this extractor
  int32_t FeatureDim() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_FEATURES_H_
Add Python API (#31) 2023-02-19 19:36:03 +08:00			`// sherpa-onnx/csrc/features.h`
Add online LSTM transducer model (#25) 2023-02-18 21:35:15 +08:00			`//`
			`// Copyright (c) 2023 Xiaomi Corporation`

			`#ifndef SHERPA_ONNX_CSRC_FEATURES_H_`
			`#define SHERPA_ONNX_CSRC_FEATURES_H_`

			`#include <memory>`
add online-recognizer (#29) 2023-02-19 12:45:38 +08:00			`#include <string>`
Add online LSTM transducer model (#25) 2023-02-18 21:35:15 +08:00			`#include <vector>`

add streaming websocket server and client (#62) 2023-02-24 21:39:51 +08:00			`#include "sherpa-onnx/csrc/parse-options.h"`

Add online LSTM transducer model (#25) 2023-02-18 21:35:15 +08:00			`namespace sherpa_onnx {`

Add online transducer decoder (#27) 2023-02-19 10:39:07 +08:00			`struct FeatureExtractorConfig {`
Add non-streaming ASR (#92) 2023-03-26 08:53:42 +08:00			`// Sampling rate used by the feature extractor. If it is different from`
			`// the sampling rate of the input waveform, we will do resampling inside.`
Fix modified beam search for iOS and android (#76) * Use Int type for sampling rate * Fix swift * Fix iOS 2023-03-03 15:18:31 +08:00			`int32_t sampling_rate = 16000;`
Add non-streaming ASR (#92) 2023-03-26 08:53:42 +08:00
			`// Feature dimension`
Add online transducer decoder (#27) 2023-02-19 10:39:07 +08:00			`int32_t feature_dim = 80;`
add online-recognizer (#29) 2023-02-19 12:45:38 +08:00
Support streaming paraformer (#263) 2023-08-14 10:32:14 +08:00			`// Set internally by some models, e.g., paraformer sets it to false.`
			`// This parameter is not exposed to users from the commandline`
			`// If true, the feature extractor expects inputs to be normalized to`
			`// the range [-1, 1].`
			`// If false, we will multiply the inputs by 32768`
			`bool normalize_samples = true;`

add online-recognizer (#29) 2023-02-19 12:45:38 +08:00			`std::string ToString() const;`
add streaming websocket server and client (#62) 2023-02-24 21:39:51 +08:00
			`void Register(ParseOptions *po);`
Add online transducer decoder (#27) 2023-02-19 10:39:07 +08:00			`};`

Add online LSTM transducer model (#25) 2023-02-18 21:35:15 +08:00			`class FeatureExtractor {`
			`public:`
Add online transducer decoder (#27) 2023-02-19 10:39:07 +08:00			`explicit FeatureExtractor(const FeatureExtractorConfig &config = {});`
Refactor feature extractor (#26) 2023-02-19 09:57:56 +08:00			`~FeatureExtractor();`
Add online LSTM transducer model (#25) 2023-02-18 21:35:15 +08:00
			`/**`
Support resampling (#77) 2023-03-03 16:42:33 +08:00			`@param sampling_rate The sampling_rate of the input waveform. If it does`
			`not equal to config.sampling_rate, we will do`
			`resampling inside.`
			`@param waveform Pointer to a 1-D array of size n. It must be normalized to`
			`the range [-1, 1].`
Add online LSTM transducer model (#25) 2023-02-18 21:35:15 +08:00			`@param n Number of entries in waveform`
			`*/`
Add non-streaming ASR (#92) 2023-03-26 08:53:42 +08:00			`void AcceptWaveform(int32_t sampling_rate, const float *waveform,`
			`int32_t n) const;`
Add online LSTM transducer model (#25) 2023-02-18 21:35:15 +08:00
Add online stream. (#28) 2023-02-19 11:42:15 +08:00			`/**`
			`* InputFinished() tells the class you won't be providing any`
			`* more waveform. This will help flush out the last frame or two`
			`* of features, in the case where snip-edges == false; it also`
			`* affects the return value of IsLastFrame().`
			`*/`
Add non-streaming ASR (#92) 2023-03-26 08:53:42 +08:00			`void InputFinished() const;`
Add online LSTM transducer model (#25) 2023-02-18 21:35:15 +08:00
			`int32_t NumFramesReady() const;`

Add online stream. (#28) 2023-02-19 11:42:15 +08:00			`/** Note: IsLastFrame() will only ever return true if you have called`
			`* InputFinished() (and this frame is the last frame).`
			`*/`
Add online LSTM transducer model (#25) 2023-02-18 21:35:15 +08:00			`bool IsLastFrame(int32_t frame) const;`

			`/** Get n frames starting from the given frame index.`
			`*`
			`* @param frame_index The starting frame index`
			`* @param n Number of frames to get.`
			`* @return Return a 2-D tensor of shape (n, feature_dim).`
			`* which is flattened into a 1-D vector (flattened in in row major)`
			`*/`
			`std::vector<float> GetFrames(int32_t frame_index, int32_t n) const;`

Refactor feature extractor (#26) 2023-02-19 09:57:56 +08:00			`/// Return feature dim of this extractor`
			`int32_t FeatureDim() const;`
Add online LSTM transducer model (#25) 2023-02-18 21:35:15 +08:00
			`private:`
Refactor feature extractor (#26) 2023-02-19 09:57:56 +08:00			`class Impl;`
			`std::unique_ptr<Impl> impl_;`
Add online LSTM transducer model (#25) 2023-02-18 21:35:15 +08:00			`};`

			`} // namespace sherpa_onnx`

			`#endif // SHERPA_ONNX_CSRC_FEATURES_H_`