Add C++ runtime and Python APIs for Moonshine models (#1473)

This commit is contained in:
Fangjun Kuang
2024-10-26 14:34:07 +08:00
committed by GitHub
parent 0f2732e4e8
commit 669f5ef441
33 changed files with 1572 additions and 36 deletions

View File

@@ -34,7 +34,7 @@ struct OfflineRecognitionResult {
// event target of the audio.
std::string event;
/// timestamps.size() == tokens.size()
/// timestamps.size() == tokens.size()
/// timestamps[i] records the time in seconds when tokens[i] is decoded.
std::vector<float> timestamps;
@@ -49,6 +49,10 @@ struct WhisperTag {
struct CEDTag {};
// It uses a neural network model, a preprocessor, to convert
// audio samples to features
struct MoonshineTag {};
class OfflineStream {
public:
explicit OfflineStream(const FeatureExtractorConfig &config = {},
@@ -56,6 +60,7 @@ class OfflineStream {
explicit OfflineStream(WhisperTag tag);
explicit OfflineStream(CEDTag tag);
explicit OfflineStream(MoonshineTag tag);
~OfflineStream();
/**
@@ -72,7 +77,10 @@ class OfflineStream {
void AcceptWaveform(int32_t sampling_rate, const float *waveform,
int32_t n) const;
/// Return feature dim of this extractor
/// Return feature dim of this extractor.
///
/// Note: if it is Moonshine, then it returns the number of audio samples
/// currently received.
int32_t FeatureDim() const;
// Get all the feature frames of this stream in a 1-D array, which is