Support spoken language identification with whisper (#694)

This commit is contained in:
Fangjun Kuang
2024-03-24 22:57:00 +08:00
committed by GitHub
parent 3cdad9b5d1
commit 0d258dd150
36 changed files with 1173 additions and 200 deletions

View File

@@ -18,6 +18,7 @@
#include "onnxruntime_cxx_api.h" // NOLINT
#include "sherpa-onnx/csrc/offline-model-config.h"
#include "sherpa-onnx/csrc/spoken-language-identification.h"
namespace sherpa_onnx {
@@ -25,6 +26,9 @@ class OfflineWhisperModel {
public:
explicit OfflineWhisperModel(const OfflineModelConfig &config);
explicit OfflineWhisperModel(
const SpokenLanguageIdentificationConfig &config);
#if __ANDROID_API__ >= 9
OfflineWhisperModel(AAssetManager *mgr, const OfflineModelConfig &config);
#endif
@@ -72,7 +76,8 @@ class OfflineWhisperModel {
Ort::Value n_layer_self_v_cache, Ort::Value n_layer_cross_k,
Ort::Value n_layer_cross_v, Ort::Value offset) const;
int32_t DetectLanguage() const;
int32_t DetectLanguage(Ort::Value &cross_k, // NOLINT
Ort::Value &cross_v); // NOLINT
/** Return the initial self kv cache in a pair
* - n_layer_self_k_cache A 4-D tensor of shape
@@ -98,6 +103,9 @@ class OfflineWhisperModel {
int32_t Translate() const;
bool IsMultiLingual() const;
static void NormalizeFeatures(float *features, int32_t num_frames,
int32_t feat_dim);
private:
class Impl;
std::unique_ptr<Impl> impl_;