Add C API for audio tagging (#754)

This commit is contained in:
Fangjun Kuang
2024-04-11 14:18:43 +08:00
committed by GitHub
parent 34d70a259f
commit f204e62b44
9 changed files with 289 additions and 36 deletions

View File

@@ -427,7 +427,8 @@ SHERPA_ONNX_API SherpaOnnxOfflineStream *CreateOfflineStream(
/// Destroy an offline stream.
///
/// @param stream A pointer returned by CreateOfflineStream()
SHERPA_ONNX_API void DestroyOfflineStream(SherpaOnnxOfflineStream *stream);
SHERPA_ONNX_API void DestroyOfflineStream(
const SherpaOnnxOfflineStream *stream);
/// Accept input audio samples and compute the features.
/// The user has to invoke DecodeOfflineStream() to run the neural network and
@@ -442,9 +443,9 @@ SHERPA_ONNX_API void DestroyOfflineStream(SherpaOnnxOfflineStream *stream);
/// @param n Number of elements in the samples array.
///
/// @caution: For each offline stream, please invoke this function only once!
SHERPA_ONNX_API void AcceptWaveformOffline(SherpaOnnxOfflineStream *stream,
int32_t sample_rate,
const float *samples, int32_t n);
SHERPA_ONNX_API void AcceptWaveformOffline(
const SherpaOnnxOfflineStream *stream, int32_t sample_rate,
const float *samples, int32_t n);
/// Decode an offline stream.
///
/// We assume you have invoked AcceptWaveformOffline() for the given stream
@@ -453,7 +454,8 @@ SHERPA_ONNX_API void AcceptWaveformOffline(SherpaOnnxOfflineStream *stream,
/// @param recognizer A pointer returned by CreateOfflineRecognizer().
/// @param stream A pointer returned by CreateOfflineStream()
SHERPA_ONNX_API void DecodeOfflineStream(
SherpaOnnxOfflineRecognizer *recognizer, SherpaOnnxOfflineStream *stream);
const SherpaOnnxOfflineRecognizer *recognizer,
const SherpaOnnxOfflineStream *stream);
/// Decode a list offline streams in parallel.
///
@@ -1088,6 +1090,65 @@ SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(
SHERPA_ONNX_API void SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(
const char *const *names);
// ============================================================
// For audio tagging
// ============================================================
SHERPA_ONNX_API typedef struct
SherpaOnnxOfflineZipformerAudioTaggingModelConfig {
const char *model;
} SherpaOnnxOfflineZipformerAudioTaggingModelConfig;
SHERPA_ONNX_API typedef struct SherpaOnnxAudioTaggingModelConfig {
SherpaOnnxOfflineZipformerAudioTaggingModelConfig zipformer;
int32_t num_threads;
int32_t debug; // true to print debug information of the model
const char *provider;
} SherpaOnnxAudioTaggingModelConfig;
SHERPA_ONNX_API typedef struct SherpaOnnxAudioTaggingConfig {
SherpaOnnxAudioTaggingModelConfig model;
const char *labels;
int32_t top_k;
} SherpaOnnxAudioTaggingConfig;
SHERPA_ONNX_API typedef struct SherpaOnnxAudioEvent {
const char *name;
int32_t index;
float prob;
} SherpaOnnxAudioEvent;
SHERPA_ONNX_API typedef struct SherpaOnnxAudioTagging SherpaOnnxAudioTagging;
// The user has to invoke
// SherpaOnnxDestroyAudioTagging()
// to free the returned pointer to avoid memory leak
SHERPA_ONNX_API const SherpaOnnxAudioTagging *SherpaOnnxCreateAudioTagging(
const SherpaOnnxAudioTaggingConfig *config);
SHERPA_ONNX_API void SherpaOnnxDestroyAudioTagging(
const SherpaOnnxAudioTagging *tagger);
// The user has to invoke DestroyOfflineStream()
// to free the returned pointer to avoid memory leak
SHERPA_ONNX_API const SherpaOnnxOfflineStream *
SherpaOnnxAudioTaggingCreateOfflineStream(const SherpaOnnxAudioTagging *tagger);
// Return an array of pointers. The length of the array is top_k + 1.
// If top_k is -1, then config.top_k is used, where config is the config
// used to create the input tagger.
//
// The ans[0]->prob has the largest probability among the array elements
// The last element of the array is a null pointer
//
// The user has to use SherpaOnnxAudioTaggingFreeResults()
// to free the returned pointer to avoid memory leak
SHERPA_ONNX_API const SherpaOnnxAudioEvent *const *
SherpaOnnxAudioTaggingCompute(const SherpaOnnxAudioTagging *tagger,
const SherpaOnnxOfflineStream *s, int32_t top_k);
SHERPA_ONNX_API void SherpaOnnxAudioTaggingFreeResults(
const SherpaOnnxAudioEvent *const *p);
#if defined(__GNUC__)
#pragma GCC diagnostic pop
#endif