Add C API for spoken language identification. (#695)
This commit is contained in:
@@ -6,6 +6,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
@@ -16,7 +17,9 @@
|
||||
#include "sherpa-onnx/csrc/offline-recognizer.h"
|
||||
#include "sherpa-onnx/csrc/offline-tts.h"
|
||||
#include "sherpa-onnx/csrc/online-recognizer.h"
|
||||
#include "sherpa-onnx/csrc/spoken-language-identification.h"
|
||||
#include "sherpa-onnx/csrc/voice-activity-detector.h"
|
||||
#include "sherpa-onnx/csrc/wave-reader.h"
|
||||
#include "sherpa-onnx/csrc/wave-writer.h"
|
||||
|
||||
struct SherpaOnnxOnlineRecognizer {
|
||||
@@ -859,3 +862,97 @@ int32_t SherpaOnnxWriteWave(const float *samples, int32_t n,
|
||||
int32_t sample_rate, const char *filename) {
|
||||
return sherpa_onnx::WriteWave(filename, sample_rate, samples, n);
|
||||
}
|
||||
|
||||
const SherpaOnnxWave *SherpaOnnxReadWave(const char *filename) {
|
||||
int32_t sample_rate = -1;
|
||||
bool is_ok = false;
|
||||
std::vector<float> samples =
|
||||
sherpa_onnx::ReadWave(filename, &sample_rate, &is_ok);
|
||||
if (!is_ok) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
float *c_samples = new float[samples.size()];
|
||||
std::copy(samples.begin(), samples.end(), c_samples);
|
||||
|
||||
SherpaOnnxWave *wave = new SherpaOnnxWave;
|
||||
wave->samples = c_samples;
|
||||
wave->sample_rate = sample_rate;
|
||||
wave->num_samples = samples.size();
|
||||
return wave;
|
||||
}
|
||||
|
||||
void SherpaOnnxFreeWave(const SherpaOnnxWave *wave) {
|
||||
if (wave) {
|
||||
delete[] wave->samples;
|
||||
delete wave;
|
||||
}
|
||||
}
|
||||
|
||||
struct SherpaOnnxSpokenLanguageIdentification {
|
||||
std::unique_ptr<sherpa_onnx::SpokenLanguageIdentification> impl;
|
||||
};
|
||||
|
||||
const SherpaOnnxSpokenLanguageIdentification *
|
||||
SherpaOnnxCreateSpokenLanguageIdentification(
|
||||
const SherpaOnnxSpokenLanguageIdentificationConfig *config) {
|
||||
sherpa_onnx::SpokenLanguageIdentificationConfig slid_config;
|
||||
slid_config.whisper.encoder = SHERPA_ONNX_OR(config->whisper.encoder, "");
|
||||
slid_config.whisper.decoder = SHERPA_ONNX_OR(config->whisper.decoder, "");
|
||||
slid_config.whisper.tail_paddings =
|
||||
SHERPA_ONNX_OR(config->whisper.tail_paddings, -1);
|
||||
slid_config.num_threads = SHERPA_ONNX_OR(config->num_threads, 1);
|
||||
slid_config.debug = config->debug;
|
||||
slid_config.provider = SHERPA_ONNX_OR(config->provider, "cpu");
|
||||
|
||||
if (slid_config.debug) {
|
||||
SHERPA_ONNX_LOGE("%s\n", slid_config.ToString().c_str());
|
||||
}
|
||||
|
||||
if (!slid_config.Validate()) {
|
||||
SHERPA_ONNX_LOGE("Errors in config");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
SherpaOnnxSpokenLanguageIdentification *slid =
|
||||
new SherpaOnnxSpokenLanguageIdentification;
|
||||
slid->impl =
|
||||
std::make_unique<sherpa_onnx::SpokenLanguageIdentification>(slid_config);
|
||||
|
||||
return slid;
|
||||
}
|
||||
|
||||
void SherpaOnnxDestroySpokenLanguageIdentification(
|
||||
const SherpaOnnxSpokenLanguageIdentification *slid) {
|
||||
delete slid;
|
||||
}
|
||||
|
||||
SherpaOnnxOfflineStream *
|
||||
SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(
|
||||
const SherpaOnnxSpokenLanguageIdentification *slid) {
|
||||
SherpaOnnxOfflineStream *stream =
|
||||
new SherpaOnnxOfflineStream(slid->impl->CreateStream());
|
||||
return stream;
|
||||
}
|
||||
|
||||
const SherpaOnnxSpokenLanguageIdentificationResult *
|
||||
SherpaOnnxSpokenLanguageIdentificationCompute(
|
||||
const SherpaOnnxSpokenLanguageIdentification *slid,
|
||||
const SherpaOnnxOfflineStream *s) {
|
||||
std::string lang = slid->impl->Compute(s->impl.get());
|
||||
char *c_lang = new char[lang.size() + 1];
|
||||
std::copy(lang.begin(), lang.end(), c_lang);
|
||||
c_lang[lang.size()] = '\0';
|
||||
SherpaOnnxSpokenLanguageIdentificationResult *r =
|
||||
new SherpaOnnxSpokenLanguageIdentificationResult;
|
||||
r->lang = c_lang;
|
||||
return r;
|
||||
}
|
||||
|
||||
void SherpaOnnxDestroySpokenLanguageIdentificationResult(
|
||||
const SherpaOnnxSpokenLanguageIdentificationResult *r) {
|
||||
if (r) {
|
||||
delete[] r->lang;
|
||||
delete r;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -820,6 +820,76 @@ SHERPA_ONNX_API int32_t SherpaOnnxWriteWave(const float *samples, int32_t n,
|
||||
int32_t sample_rate,
|
||||
const char *filename);
|
||||
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxWave {
|
||||
// samples normalized to the range [-1, 1]
|
||||
const float *samples;
|
||||
int32_t sample_rate;
|
||||
int32_t num_samples;
|
||||
} SherpaOnnxWave;
|
||||
|
||||
// Return a NULL pointer on error. It supports only standard WAVE file.
|
||||
// Each sample should be 16-bit. It supports only single channel..
|
||||
//
|
||||
// If the returned pointer is not NULL, the user has to invoke
|
||||
// SherpaOnnxFreeWave() to free the returned pointer to avoid memory leak.
|
||||
SHERPA_ONNX_API const SherpaOnnxWave *SherpaOnnxReadWave(const char *filename);
|
||||
|
||||
SHERPA_ONNX_API void SherpaOnnxFreeWave(const SherpaOnnxWave *wave);
|
||||
|
||||
// Spoken language identification
|
||||
|
||||
SHERPA_ONNX_API typedef struct
|
||||
SherpaOnnxSpokenLanguageIdentificationWhisperConfig {
|
||||
const char *encoder;
|
||||
const char *decoder;
|
||||
int32_t tail_paddings;
|
||||
} SherpaOnnxSpokenLanguageIdentificationWhisperConfig;
|
||||
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxSpokenLanguageIdentificationConfig {
|
||||
SherpaOnnxSpokenLanguageIdentificationWhisperConfig whisper;
|
||||
int32_t num_threads;
|
||||
int32_t debug;
|
||||
const char *provider;
|
||||
} SherpaOnnxSpokenLanguageIdentificationConfig;
|
||||
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxSpokenLanguageIdentification
|
||||
SherpaOnnxSpokenLanguageIdentification;
|
||||
|
||||
// Create an instance of SpokenLanguageIdentification.
|
||||
// The user has to invoke SherpaOnnxDestroySpokenLanguageIdentification()
|
||||
// to free the returned pointer to avoid memory leak.
|
||||
SHERPA_ONNX_API const SherpaOnnxSpokenLanguageIdentification *
|
||||
SherpaOnnxCreateSpokenLanguageIdentification(
|
||||
const SherpaOnnxSpokenLanguageIdentificationConfig *config);
|
||||
|
||||
SHERPA_ONNX_API void SherpaOnnxDestroySpokenLanguageIdentification(
|
||||
const SherpaOnnxSpokenLanguageIdentification *slid);
|
||||
|
||||
// The user has to invoke DestroyOfflineStream()
|
||||
// to free the returned pointer to avoid memory leak
|
||||
SHERPA_ONNX_API SherpaOnnxOfflineStream *
|
||||
SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(
|
||||
const SherpaOnnxSpokenLanguageIdentification *slid);
|
||||
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxSpokenLanguageIdentificationResult {
|
||||
// en for English
|
||||
// de for German
|
||||
// zh for Chinese
|
||||
// es for Spanish
|
||||
// ...
|
||||
const char *lang;
|
||||
} SherpaOnnxSpokenLanguageIdentificationResult;
|
||||
|
||||
// The user has to invoke SherpaOnnxDestroySpokenLanguageIdentificationResult()
|
||||
// to free the returned pointer to avoid memory leak
|
||||
SHERPA_ONNX_API const SherpaOnnxSpokenLanguageIdentificationResult *
|
||||
SherpaOnnxSpokenLanguageIdentificationCompute(
|
||||
const SherpaOnnxSpokenLanguageIdentification *slid,
|
||||
const SherpaOnnxOfflineStream *s);
|
||||
|
||||
SHERPA_ONNX_API void SherpaOnnxDestroySpokenLanguageIdentificationResult(
|
||||
const SherpaOnnxSpokenLanguageIdentificationResult *r);
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user