Add C API for audio tagging (#754)
This commit is contained in:
13
.github/scripts/test-c-api.sh
vendored
13
.github/scripts/test-c-api.sh
vendored
@@ -10,8 +10,21 @@ log() {
|
|||||||
|
|
||||||
echo "SLID_EXE is $SLID_EXE"
|
echo "SLID_EXE is $SLID_EXE"
|
||||||
echo "SID_EXE is $SID_EXE"
|
echo "SID_EXE is $SID_EXE"
|
||||||
|
echo "AT_EXE is $AT_EXE"
|
||||||
echo "PATH: $PATH"
|
echo "PATH: $PATH"
|
||||||
|
|
||||||
|
log "------------------------------------------------------------"
|
||||||
|
log "Test audio tagging "
|
||||||
|
log "------------------------------------------------------------"
|
||||||
|
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
|
||||||
|
rm sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
|
||||||
|
|
||||||
|
$AT_EXE
|
||||||
|
|
||||||
|
rm -rf sherpa-onnx-zipformer-audio-tagging-2024-04-09
|
||||||
|
|
||||||
|
|
||||||
log "------------------------------------------------------------"
|
log "------------------------------------------------------------"
|
||||||
log "Download whisper tiny for spoken language identification "
|
log "Download whisper tiny for spoken language identification "
|
||||||
|
|||||||
18
.github/workflows/linux.yaml
vendored
18
.github/workflows/linux.yaml
vendored
@@ -126,6 +126,16 @@ jobs:
|
|||||||
name: release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
|
name: release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
|
||||||
path: build/bin/*
|
path: build/bin/*
|
||||||
|
|
||||||
|
- name: Test C API
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
export PATH=$PWD/build/bin:$PATH
|
||||||
|
export SLID_EXE=spoken-language-identification-c-api
|
||||||
|
export SID_EXE=speaker-identification-c-api
|
||||||
|
export AT_EXE=audio-tagging-c-api
|
||||||
|
|
||||||
|
.github/scripts/test-c-api.sh
|
||||||
|
|
||||||
- name: Test Audio tagging
|
- name: Test Audio tagging
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@@ -142,14 +152,6 @@ jobs:
|
|||||||
|
|
||||||
.github/scripts/test-online-ctc.sh
|
.github/scripts/test-online-ctc.sh
|
||||||
|
|
||||||
- name: Test C API
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
export PATH=$PWD/build/bin:$PATH
|
|
||||||
export SLID_EXE=spoken-language-identification-c-api
|
|
||||||
export SID_EXE=speaker-identification-c-api
|
|
||||||
|
|
||||||
.github/scripts/test-c-api.sh
|
|
||||||
|
|
||||||
- name: Test spoken language identification (C++ API)
|
- name: Test spoken language identification (C++ API)
|
||||||
shell: bash
|
shell: bash
|
||||||
|
|||||||
19
.github/workflows/macos.yaml
vendored
19
.github/workflows/macos.yaml
vendored
@@ -105,6 +105,16 @@ jobs:
|
|||||||
otool -L build/bin/sherpa-onnx
|
otool -L build/bin/sherpa-onnx
|
||||||
otool -l build/bin/sherpa-onnx
|
otool -l build/bin/sherpa-onnx
|
||||||
|
|
||||||
|
- name: Test C API
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
export PATH=$PWD/build/bin:$PATH
|
||||||
|
export SLID_EXE=spoken-language-identification-c-api
|
||||||
|
export SID_EXE=speaker-identification-c-api
|
||||||
|
export AT_EXE=audio-tagging-c-api
|
||||||
|
|
||||||
|
.github/scripts/test-c-api.sh
|
||||||
|
|
||||||
- name: Test Audio tagging
|
- name: Test Audio tagging
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@@ -113,15 +123,6 @@ jobs:
|
|||||||
|
|
||||||
.github/scripts/test-audio-tagging.sh
|
.github/scripts/test-audio-tagging.sh
|
||||||
|
|
||||||
- name: Test C API
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
export PATH=$PWD/build/bin:$PATH
|
|
||||||
export SLID_EXE=spoken-language-identification-c-api
|
|
||||||
export SID_EXE=speaker-identification-c-api
|
|
||||||
|
|
||||||
.github/scripts/test-c-api.sh
|
|
||||||
|
|
||||||
- name: Test spoken language identification (C++ API)
|
- name: Test spoken language identification (C++ API)
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
20
.github/workflows/windows-x64.yaml
vendored
20
.github/workflows/windows-x64.yaml
vendored
@@ -72,6 +72,17 @@ jobs:
|
|||||||
|
|
||||||
ls -lh ./bin/Release/sherpa-onnx.exe
|
ls -lh ./bin/Release/sherpa-onnx.exe
|
||||||
|
|
||||||
|
- name: Test C API
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
export PATH=$PWD/build/bin/Release:$PATH
|
||||||
|
export SLID_EXE=spoken-language-identification-c-api.exe
|
||||||
|
export SID_EXE=speaker-identification-c-api.exe
|
||||||
|
export AT_EXE=audio-tagging-c-api.exe
|
||||||
|
|
||||||
|
.github/scripts/test-c-api.sh
|
||||||
|
|
||||||
|
|
||||||
- name: Test Audio tagging
|
- name: Test Audio tagging
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@@ -80,15 +91,6 @@ jobs:
|
|||||||
|
|
||||||
.github/scripts/test-audio-tagging.sh
|
.github/scripts/test-audio-tagging.sh
|
||||||
|
|
||||||
- name: Test C API
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
export PATH=$PWD/build/bin/Release:$PATH
|
|
||||||
export SLID_EXE=spoken-language-identification-c-api.exe
|
|
||||||
export SID_EXE=speaker-identification-c-api.exe
|
|
||||||
|
|
||||||
.github/scripts/test-c-api.sh
|
|
||||||
|
|
||||||
- name: Test spoken language identification (C++ API)
|
- name: Test spoken language identification (C++ API)
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
2
.github/workflows/windows-x86.yaml
vendored
2
.github/workflows/windows-x86.yaml
vendored
@@ -77,6 +77,8 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
export PATH=$PWD/build/bin/Release:$PATH
|
export PATH=$PWD/build/bin/Release:$PATH
|
||||||
export SLID_EXE=spoken-language-identification-c-api.exe
|
export SLID_EXE=spoken-language-identification-c-api.exe
|
||||||
|
export SID_EXE=speaker-identification-c-api.exe
|
||||||
|
export AT_EXE=audio-tagging-c-api.exe
|
||||||
|
|
||||||
.github/scripts/test-c-api.sh
|
.github/scripts/test-c-api.sh
|
||||||
|
|
||||||
|
|||||||
@@ -18,6 +18,9 @@ target_link_libraries(speaker-identification-c-api sherpa-onnx-c-api)
|
|||||||
add_executable(streaming-hlg-decode-file-c-api streaming-hlg-decode-file-c-api.c)
|
add_executable(streaming-hlg-decode-file-c-api streaming-hlg-decode-file-c-api.c)
|
||||||
target_link_libraries(streaming-hlg-decode-file-c-api sherpa-onnx-c-api)
|
target_link_libraries(streaming-hlg-decode-file-c-api sherpa-onnx-c-api)
|
||||||
|
|
||||||
|
add_executable(audio-tagging-c-api audio-tagging-c-api.c)
|
||||||
|
target_link_libraries(audio-tagging-c-api sherpa-onnx-c-api)
|
||||||
|
|
||||||
if(SHERPA_ONNX_HAS_ALSA)
|
if(SHERPA_ONNX_HAS_ALSA)
|
||||||
add_subdirectory(./asr-microphone-example)
|
add_subdirectory(./asr-microphone-example)
|
||||||
elseif((UNIX AND NOT APPLE) OR LINUX)
|
elseif((UNIX AND NOT APPLE) OR LINUX)
|
||||||
|
|||||||
79
c-api-examples/audio-tagging-c-api.c
Normal file
79
c-api-examples/audio-tagging-c-api.c
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
// c-api-examples/audio-tagging-c-api.c
|
||||||
|
//
|
||||||
|
// Copyright (c) 2024 Xiaomi Corporation
|
||||||
|
|
||||||
|
// We assume you have pre-downloaded the model files for testing
|
||||||
|
// from https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models
|
||||||
|
//
|
||||||
|
// An example is given below:
|
||||||
|
//
|
||||||
|
// clang-format off
|
||||||
|
//
|
||||||
|
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
|
||||||
|
// tar xvf sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
|
||||||
|
// rm sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
|
||||||
|
//
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "sherpa-onnx/c-api/c-api.h"
|
||||||
|
|
||||||
|
int32_t main() {
|
||||||
|
SherpaOnnxAudioTaggingConfig config;
|
||||||
|
memset(&config, 0, sizeof(config));
|
||||||
|
|
||||||
|
config.model.zipformer.model =
|
||||||
|
"./sherpa-onnx-zipformer-audio-tagging-2024-04-09/model.int8.onnx";
|
||||||
|
config.model.num_threads = 1;
|
||||||
|
config.model.debug = 1;
|
||||||
|
config.model.provider = "cpu";
|
||||||
|
// clang-format off
|
||||||
|
config.labels = "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/class_labels_indices.csv";
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
const SherpaOnnxAudioTagging *tagger = SherpaOnnxCreateAudioTagging(&config);
|
||||||
|
if (!tagger) {
|
||||||
|
fprintf(stderr, "Failed to create audio tagger. Please check your config");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// You can find more test waves from
|
||||||
|
// https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
|
||||||
|
const char *wav_filename =
|
||||||
|
"./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/1.wav";
|
||||||
|
|
||||||
|
const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
|
||||||
|
if (wave == NULL) {
|
||||||
|
fprintf(stderr, "Failed to read %s\n", wav_filename);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
const SherpaOnnxOfflineStream *stream =
|
||||||
|
SherpaOnnxAudioTaggingCreateOfflineStream(tagger);
|
||||||
|
|
||||||
|
AcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
|
||||||
|
wave->num_samples);
|
||||||
|
|
||||||
|
int32_t top_k = 5;
|
||||||
|
const SherpaOnnxAudioEvent *const *results =
|
||||||
|
SherpaOnnxAudioTaggingCompute(tagger, stream, top_k);
|
||||||
|
|
||||||
|
fprintf(stderr, "--------------------------------------------------\n");
|
||||||
|
fprintf(stderr, "Index\t\tProbability\t\tEvent name\n");
|
||||||
|
fprintf(stderr, "--------------------------------------------------\n");
|
||||||
|
for (int32_t i = 0; i != top_k; ++i) {
|
||||||
|
fprintf(stderr, "%d\t\t%.3f\t\t\t%s\n", i, results[i]->prob,
|
||||||
|
results[i]->name);
|
||||||
|
}
|
||||||
|
fprintf(stderr, "--------------------------------------------------\n");
|
||||||
|
|
||||||
|
SherpaOnnxAudioTaggingFreeResults(results);
|
||||||
|
DestroyOfflineStream(stream);
|
||||||
|
SherpaOnnxFreeWave(wave);
|
||||||
|
SherpaOnnxDestroyAudioTagging(tagger);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
};
|
||||||
@@ -10,6 +10,7 @@
|
|||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "sherpa-onnx/csrc/audio-tagging.h"
|
||||||
#include "sherpa-onnx/csrc/circular-buffer.h"
|
#include "sherpa-onnx/csrc/circular-buffer.h"
|
||||||
#include "sherpa-onnx/csrc/display.h"
|
#include "sherpa-onnx/csrc/display.h"
|
||||||
#include "sherpa-onnx/csrc/keyword-spotter.h"
|
#include "sherpa-onnx/csrc/keyword-spotter.h"
|
||||||
@@ -400,15 +401,18 @@ SherpaOnnxOfflineStream *CreateOfflineStream(
|
|||||||
return stream;
|
return stream;
|
||||||
}
|
}
|
||||||
|
|
||||||
void DestroyOfflineStream(SherpaOnnxOfflineStream *stream) { delete stream; }
|
void DestroyOfflineStream(const SherpaOnnxOfflineStream *stream) {
|
||||||
|
delete stream;
|
||||||
|
}
|
||||||
|
|
||||||
void AcceptWaveformOffline(SherpaOnnxOfflineStream *stream, int32_t sample_rate,
|
void AcceptWaveformOffline(const SherpaOnnxOfflineStream *stream,
|
||||||
const float *samples, int32_t n) {
|
int32_t sample_rate, const float *samples,
|
||||||
|
int32_t n) {
|
||||||
stream->impl->AcceptWaveform(sample_rate, samples, n);
|
stream->impl->AcceptWaveform(sample_rate, samples, n);
|
||||||
}
|
}
|
||||||
|
|
||||||
void DecodeOfflineStream(SherpaOnnxOfflineRecognizer *recognizer,
|
void DecodeOfflineStream(const SherpaOnnxOfflineRecognizer *recognizer,
|
||||||
SherpaOnnxOfflineStream *stream) {
|
const SherpaOnnxOfflineStream *stream) {
|
||||||
recognizer->impl->DecodeStream(stream->impl.get());
|
recognizer->impl->DecodeStream(stream->impl.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1209,3 +1213,89 @@ void SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(
|
|||||||
|
|
||||||
delete[] names;
|
delete[] names;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct SherpaOnnxAudioTagging {
|
||||||
|
std::unique_ptr<sherpa_onnx::AudioTagging> impl;
|
||||||
|
};
|
||||||
|
|
||||||
|
const SherpaOnnxAudioTagging *SherpaOnnxCreateAudioTagging(
|
||||||
|
const SherpaOnnxAudioTaggingConfig *config) {
|
||||||
|
sherpa_onnx::AudioTaggingConfig ac;
|
||||||
|
ac.model.zipformer.model = SHERPA_ONNX_OR(config->model.zipformer.model, "");
|
||||||
|
ac.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
|
||||||
|
ac.model.debug = config->model.debug;
|
||||||
|
ac.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
|
||||||
|
ac.labels = SHERPA_ONNX_OR(config->labels, "");
|
||||||
|
ac.top_k = SHERPA_ONNX_OR(config->top_k, 5);
|
||||||
|
|
||||||
|
if (ac.model.debug) {
|
||||||
|
SHERPA_ONNX_LOGE("%s\n", ac.ToString().c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!ac.Validate()) {
|
||||||
|
SHERPA_ONNX_LOGE("Errors in config");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
SherpaOnnxAudioTagging *tagger = new SherpaOnnxAudioTagging;
|
||||||
|
tagger->impl = std::make_unique<sherpa_onnx::AudioTagging>(ac);
|
||||||
|
|
||||||
|
return tagger;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SherpaOnnxDestroyAudioTagging(const SherpaOnnxAudioTagging *tagger) {
|
||||||
|
delete tagger;
|
||||||
|
}
|
||||||
|
|
||||||
|
const SherpaOnnxOfflineStream *SherpaOnnxAudioTaggingCreateOfflineStream(
|
||||||
|
const SherpaOnnxAudioTagging *tagger) {
|
||||||
|
const SherpaOnnxOfflineStream *stream =
|
||||||
|
new SherpaOnnxOfflineStream(tagger->impl->CreateStream());
|
||||||
|
return stream;
|
||||||
|
}
|
||||||
|
|
||||||
|
const SherpaOnnxAudioEvent *const *SherpaOnnxAudioTaggingCompute(
|
||||||
|
const SherpaOnnxAudioTagging *tagger, const SherpaOnnxOfflineStream *s,
|
||||||
|
int32_t top_k) {
|
||||||
|
std::vector<sherpa_onnx::AudioEvent> events =
|
||||||
|
tagger->impl->Compute(s->impl.get(), top_k);
|
||||||
|
|
||||||
|
int32_t n = static_cast<int32_t>(events.size());
|
||||||
|
SherpaOnnxAudioEvent **ans = new SherpaOnnxAudioEvent *[n + 1];
|
||||||
|
ans[n] = nullptr;
|
||||||
|
|
||||||
|
int32_t i = 0;
|
||||||
|
for (const auto &e : events) {
|
||||||
|
SherpaOnnxAudioEvent *p = new SherpaOnnxAudioEvent;
|
||||||
|
|
||||||
|
char *name = new char[e.name.size() + 1];
|
||||||
|
std::copy(e.name.begin(), e.name.end(), name);
|
||||||
|
name[e.name.size()] = 0;
|
||||||
|
|
||||||
|
p->name = name;
|
||||||
|
|
||||||
|
p->index = e.index;
|
||||||
|
p->prob = e.prob;
|
||||||
|
|
||||||
|
ans[i] = p;
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ans;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SherpaOnnxAudioTaggingFreeResults(
|
||||||
|
const SherpaOnnxAudioEvent *const *events) {
|
||||||
|
auto p = events;
|
||||||
|
|
||||||
|
while (p && *p) {
|
||||||
|
auto e = *p;
|
||||||
|
|
||||||
|
delete[] e->name;
|
||||||
|
delete e;
|
||||||
|
|
||||||
|
++p;
|
||||||
|
}
|
||||||
|
|
||||||
|
delete[] events;
|
||||||
|
}
|
||||||
|
|||||||
@@ -427,7 +427,8 @@ SHERPA_ONNX_API SherpaOnnxOfflineStream *CreateOfflineStream(
|
|||||||
/// Destroy an offline stream.
|
/// Destroy an offline stream.
|
||||||
///
|
///
|
||||||
/// @param stream A pointer returned by CreateOfflineStream()
|
/// @param stream A pointer returned by CreateOfflineStream()
|
||||||
SHERPA_ONNX_API void DestroyOfflineStream(SherpaOnnxOfflineStream *stream);
|
SHERPA_ONNX_API void DestroyOfflineStream(
|
||||||
|
const SherpaOnnxOfflineStream *stream);
|
||||||
|
|
||||||
/// Accept input audio samples and compute the features.
|
/// Accept input audio samples and compute the features.
|
||||||
/// The user has to invoke DecodeOfflineStream() to run the neural network and
|
/// The user has to invoke DecodeOfflineStream() to run the neural network and
|
||||||
@@ -442,9 +443,9 @@ SHERPA_ONNX_API void DestroyOfflineStream(SherpaOnnxOfflineStream *stream);
|
|||||||
/// @param n Number of elements in the samples array.
|
/// @param n Number of elements in the samples array.
|
||||||
///
|
///
|
||||||
/// @caution: For each offline stream, please invoke this function only once!
|
/// @caution: For each offline stream, please invoke this function only once!
|
||||||
SHERPA_ONNX_API void AcceptWaveformOffline(SherpaOnnxOfflineStream *stream,
|
SHERPA_ONNX_API void AcceptWaveformOffline(
|
||||||
int32_t sample_rate,
|
const SherpaOnnxOfflineStream *stream, int32_t sample_rate,
|
||||||
const float *samples, int32_t n);
|
const float *samples, int32_t n);
|
||||||
/// Decode an offline stream.
|
/// Decode an offline stream.
|
||||||
///
|
///
|
||||||
/// We assume you have invoked AcceptWaveformOffline() for the given stream
|
/// We assume you have invoked AcceptWaveformOffline() for the given stream
|
||||||
@@ -453,7 +454,8 @@ SHERPA_ONNX_API void AcceptWaveformOffline(SherpaOnnxOfflineStream *stream,
|
|||||||
/// @param recognizer A pointer returned by CreateOfflineRecognizer().
|
/// @param recognizer A pointer returned by CreateOfflineRecognizer().
|
||||||
/// @param stream A pointer returned by CreateOfflineStream()
|
/// @param stream A pointer returned by CreateOfflineStream()
|
||||||
SHERPA_ONNX_API void DecodeOfflineStream(
|
SHERPA_ONNX_API void DecodeOfflineStream(
|
||||||
SherpaOnnxOfflineRecognizer *recognizer, SherpaOnnxOfflineStream *stream);
|
const SherpaOnnxOfflineRecognizer *recognizer,
|
||||||
|
const SherpaOnnxOfflineStream *stream);
|
||||||
|
|
||||||
/// Decode a list offline streams in parallel.
|
/// Decode a list offline streams in parallel.
|
||||||
///
|
///
|
||||||
@@ -1088,6 +1090,65 @@ SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(
|
|||||||
SHERPA_ONNX_API void SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(
|
SHERPA_ONNX_API void SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(
|
||||||
const char *const *names);
|
const char *const *names);
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// For audio tagging
|
||||||
|
// ============================================================
|
||||||
|
SHERPA_ONNX_API typedef struct
|
||||||
|
SherpaOnnxOfflineZipformerAudioTaggingModelConfig {
|
||||||
|
const char *model;
|
||||||
|
} SherpaOnnxOfflineZipformerAudioTaggingModelConfig;
|
||||||
|
|
||||||
|
SHERPA_ONNX_API typedef struct SherpaOnnxAudioTaggingModelConfig {
|
||||||
|
SherpaOnnxOfflineZipformerAudioTaggingModelConfig zipformer;
|
||||||
|
int32_t num_threads;
|
||||||
|
int32_t debug; // true to print debug information of the model
|
||||||
|
const char *provider;
|
||||||
|
} SherpaOnnxAudioTaggingModelConfig;
|
||||||
|
|
||||||
|
SHERPA_ONNX_API typedef struct SherpaOnnxAudioTaggingConfig {
|
||||||
|
SherpaOnnxAudioTaggingModelConfig model;
|
||||||
|
const char *labels;
|
||||||
|
int32_t top_k;
|
||||||
|
} SherpaOnnxAudioTaggingConfig;
|
||||||
|
|
||||||
|
SHERPA_ONNX_API typedef struct SherpaOnnxAudioEvent {
|
||||||
|
const char *name;
|
||||||
|
int32_t index;
|
||||||
|
float prob;
|
||||||
|
} SherpaOnnxAudioEvent;
|
||||||
|
|
||||||
|
SHERPA_ONNX_API typedef struct SherpaOnnxAudioTagging SherpaOnnxAudioTagging;
|
||||||
|
|
||||||
|
// The user has to invoke
|
||||||
|
// SherpaOnnxDestroyAudioTagging()
|
||||||
|
// to free the returned pointer to avoid memory leak
|
||||||
|
SHERPA_ONNX_API const SherpaOnnxAudioTagging *SherpaOnnxCreateAudioTagging(
|
||||||
|
const SherpaOnnxAudioTaggingConfig *config);
|
||||||
|
|
||||||
|
SHERPA_ONNX_API void SherpaOnnxDestroyAudioTagging(
|
||||||
|
const SherpaOnnxAudioTagging *tagger);
|
||||||
|
|
||||||
|
// The user has to invoke DestroyOfflineStream()
|
||||||
|
// to free the returned pointer to avoid memory leak
|
||||||
|
SHERPA_ONNX_API const SherpaOnnxOfflineStream *
|
||||||
|
SherpaOnnxAudioTaggingCreateOfflineStream(const SherpaOnnxAudioTagging *tagger);
|
||||||
|
|
||||||
|
// Return an array of pointers. The length of the array is top_k + 1.
|
||||||
|
// If top_k is -1, then config.top_k is used, where config is the config
|
||||||
|
// used to create the input tagger.
|
||||||
|
//
|
||||||
|
// The ans[0]->prob has the largest probability among the array elements
|
||||||
|
// The last element of the array is a null pointer
|
||||||
|
//
|
||||||
|
// The user has to use SherpaOnnxAudioTaggingFreeResults()
|
||||||
|
// to free the returned pointer to avoid memory leak
|
||||||
|
SHERPA_ONNX_API const SherpaOnnxAudioEvent *const *
|
||||||
|
SherpaOnnxAudioTaggingCompute(const SherpaOnnxAudioTagging *tagger,
|
||||||
|
const SherpaOnnxOfflineStream *s, int32_t top_k);
|
||||||
|
|
||||||
|
SHERPA_ONNX_API void SherpaOnnxAudioTaggingFreeResults(
|
||||||
|
const SherpaOnnxAudioEvent *const *p);
|
||||||
|
|
||||||
#if defined(__GNUC__)
|
#if defined(__GNUC__)
|
||||||
#pragma GCC diagnostic pop
|
#pragma GCC diagnostic pop
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
Reference in New Issue
Block a user