HarmonyOS support for VAD. (#1561)

This commit is contained in:
Fangjun Kuang
2024-11-24 16:29:24 +08:00
committed by GitHub
parent e424cc9e0d
commit 31d6206fde
15 changed files with 231 additions and 71 deletions

View File

@@ -11,6 +11,10 @@
#include <utility>
#include <vector>
#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif
#include "sherpa-onnx/csrc/audio-tagging.h"
#include "sherpa-onnx/csrc/circular-buffer.h"
#include "sherpa-onnx/csrc/display.h"
@@ -917,8 +921,8 @@ struct SherpaOnnxVoiceActivityDetector {
std::unique_ptr<sherpa_onnx::VoiceActivityDetector> impl;
};
SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector(
const SherpaOnnxVadModelConfig *config, float buffer_size_in_seconds) {
sherpa_onnx::VadModelConfig GetVadModelConfig(
const SherpaOnnxVadModelConfig *config) {
sherpa_onnx::VadModelConfig vad_config;
vad_config.silero_vad.model = SHERPA_ONNX_OR(config->silero_vad.model, "");
@@ -947,9 +951,20 @@ SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector(
vad_config.debug = SHERPA_ONNX_OR(config->debug, false);
if (vad_config.debug) {
#if __OHOS__
SHERPA_ONNX_LOGE("%{public}s", vad_config.ToString().c_str());
#else
SHERPA_ONNX_LOGE("%s", vad_config.ToString().c_str());
#endif
}
return vad_config;
}
SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector(
const SherpaOnnxVadModelConfig *config, float buffer_size_in_seconds) {
auto vad_config = GetVadModelConfig(config);
if (!vad_config.Validate()) {
SHERPA_ONNX_LOGE("Errors in config");
return nullptr;
@@ -962,6 +977,25 @@ SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector(
return p;
}
#ifdef __OHOS__
SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetectorOHOS(
const SherpaOnnxVadModelConfig *config, float buffer_size_in_seconds,
NativeResourceManager *mgr) {
if (mgr == nullptr) {
return SherpaOnnxCreateVoiceActivityDetector(config,
buffer_size_in_seconds);
}
auto vad_config = GetVadModelConfig(config);
SherpaOnnxVoiceActivityDetector *p = new SherpaOnnxVoiceActivityDetector;
p->impl = std::make_unique<sherpa_onnx::VoiceActivityDetector>(
mgr, vad_config, buffer_size_in_seconds);
return p;
}
#endif
void SherpaOnnxDestroyVoiceActivityDetector(
SherpaOnnxVoiceActivityDetector *p) {
delete p;

View File

@@ -841,6 +841,21 @@ SHERPA_ONNX_API SherpaOnnxVoiceActivityDetector *
SherpaOnnxCreateVoiceActivityDetector(const SherpaOnnxVadModelConfig *config,
float buffer_size_in_seconds);
#ifdef __OHOS__
// Return an instance of VoiceActivityDetector.
// The user has to use SherpaOnnxDestroyVoiceActivityDetector() to free
// the returned pointer to avoid memory leak.
//
// It is for HarmonyOS
typedef struct NativeResourceManager NativeResourceManager;
SHERPA_ONNX_API SherpaOnnxVoiceActivityDetector *
SherpaOnnxCreateVoiceActivityDetectorOHOS(
const SherpaOnnxVadModelConfig *config, float buffer_size_in_seconds,
NativeResourceManager *mgr);
#endif
SHERPA_ONNX_API void SherpaOnnxDestroyVoiceActivityDetector(
SherpaOnnxVoiceActivityDetector *p);

View File

@@ -207,6 +207,12 @@ target_link_libraries(sherpa-onnx-core
kaldi-decoder-core
ssentencepiece_core
)
if(DEFINED OHOS AND x${OHOS} STREQUAL xOHOS)
target_link_libraries(sherpa-onnx-core
hilog_ndk.z
rawfile.z
)
endif()
if(SHERPA_ONNX_ENABLE_GPU)
target_link_libraries(sherpa-onnx-core

View File

@@ -8,6 +8,16 @@
#include <stdlib.h>
#include <utility>
#if __OHOS__
#include "hilog/log.h"
#undef LOG_DOMAIN
#undef LOG_TAG
// https://gitee.com/openharmony/docs/blob/145a084f0b742e4325915e32f8184817927d1251/en/contribute/OpenHarmony-Log-guide.md#hilog-api-usage-specifications
#define LOG_DOMAIN 0x6666
#define LOG_TAG "sherpa_onnx"
#endif
#if __ANDROID_API__ >= 8
#include "android/log.h"
@@ -19,6 +29,8 @@
fprintf(stderr, "\n"); \
__android_log_print(ANDROID_LOG_WARN, "sherpa-onnx", ##__VA_ARGS__); \
} while (0)
#elif defined(__OHOS__)
#define SHERPA_ONNX_LOGE(...) OH_LOG_INFO(LOG_APP, ##__VA_ARGS__)
#elif SHERPA_ONNX_ENABLE_WASM
#define SHERPA_ONNX_LOGE(...) \
do { \

View File

@@ -7,9 +7,13 @@
#include <algorithm>
#include <fstream>
#include <functional>
#include <memory>
#include <numeric>
#include <sstream>
#include <string>
#include <vector>
#include "sherpa-onnx/csrc/macros.h"
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
@@ -326,6 +330,38 @@ std::vector<char> ReadFile(AAssetManager *mgr, const std::string &filename) {
}
#endif
#if __OHOS__
std::vector<char> ReadFile(NativeResourceManager *mgr,
const std::string &filename) {
std::unique_ptr<RawFile, decltype(&OH_ResourceManager_CloseRawFile)> fp(
OH_ResourceManager_OpenRawFile(mgr, filename.c_str()),
OH_ResourceManager_CloseRawFile);
if (!fp) {
std::ostringstream os;
os << "Read file '" << filename << "' failed.";
SHERPA_ONNX_LOGE("%s", os.str().c_str());
return {};
}
auto len = static_cast<int32_t>(OH_ResourceManager_GetRawFileSize(fp.get()));
std::vector<char> buffer(len);
int32_t n = OH_ResourceManager_ReadRawFile(fp.get(), buffer.data(), len);
if (n != len) {
std::ostringstream os;
os << "Read file '" << filename << "' failed. Number of bytes read: " << n
<< ". Expected bytes to read: " << len;
SHERPA_ONNX_LOGE("%s", os.str().c_str());
return {};
}
return buffer;
}
#endif
Ort::Value Repeat(OrtAllocator *allocator, Ort::Value *cur_encoder_out,
const std::vector<int32_t> &hyps_num_split) {
std::vector<int64_t> cur_encoder_out_shape =

View File

@@ -22,6 +22,10 @@
#include "android/asset_manager_jni.h"
#endif
#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif
#include "onnxruntime_cxx_api.h" // NOLINT
namespace sherpa_onnx {
@@ -103,6 +107,11 @@ std::vector<char> ReadFile(const std::string &filename);
std::vector<char> ReadFile(AAssetManager *mgr, const std::string &filename);
#endif
#if __OHOS__
std::vector<char> ReadFile(NativeResourceManager *mgr,
const std::string &filename);
#endif
// TODO(fangjun): Document it
Ort::Value Repeat(OrtAllocator *allocator, Ort::Value *cur_encoder_out,
const std::vector<int32_t> &hyps_num_split);

View File

@@ -37,8 +37,9 @@ class SileroVadModel::Impl {
min_speech_samples_ = sample_rate_ * config_.silero_vad.min_speech_duration;
}
#if __ANDROID_API__ >= 9
Impl(AAssetManager *mgr, const VadModelConfig &config)
#if __ANDROID_API__ >= 9 || defined(__OHOS__)
template <typename Manager>
Impl(Manager *mgr, const VadModelConfig &config)
: config_(config),
env_(ORT_LOGGING_LEVEL_ERROR),
sess_opts_(GetSessionOptions(config)),
@@ -437,6 +438,12 @@ SileroVadModel::SileroVadModel(AAssetManager *mgr, const VadModelConfig &config)
: impl_(std::make_unique<Impl>(mgr, config)) {}
#endif
#if __OHOS__
SileroVadModel::SileroVadModel(NativeResourceManager *mgr,
const VadModelConfig &config)
: impl_(std::make_unique<Impl>(mgr, config)) {}
#endif
SileroVadModel::~SileroVadModel() = default;
void SileroVadModel::Reset() { return impl_->Reset(); }

View File

@@ -11,6 +11,10 @@
#include "android/asset_manager_jni.h"
#endif
#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif
#include "sherpa-onnx/csrc/vad-model.h"
namespace sherpa_onnx {
@@ -23,6 +27,10 @@ class SileroVadModel : public VadModel {
SileroVadModel(AAssetManager *mgr, const VadModelConfig &config);
#endif
#if __OHOS__
SileroVadModel(NativeResourceManager *mgr, const VadModelConfig &config);
#endif
~SileroVadModel() override;
// reset the internal model states

View File

@@ -21,4 +21,12 @@ std::unique_ptr<VadModel> VadModel::Create(AAssetManager *mgr,
}
#endif
#if __OHOS__
std::unique_ptr<VadModel> VadModel::Create(NativeResourceManager *mgr,
const VadModelConfig &config) {
// TODO(fangjun): Support other VAD models.
return std::make_unique<SileroVadModel>(mgr, config);
}
#endif
} // namespace sherpa_onnx

View File

@@ -11,6 +11,10 @@
#include "android/asset_manager_jni.h"
#endif
#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif
#include "sherpa-onnx/csrc/vad-model-config.h"
namespace sherpa_onnx {
@@ -26,6 +30,11 @@ class VadModel {
const VadModelConfig &config);
#endif
#if __OHOS__
static std::unique_ptr<VadModel> Create(NativeResourceManager *mgr,
const VadModelConfig &config);
#endif
// reset the internal model states
virtual void Reset() = 0;

View File

@@ -22,8 +22,9 @@ class VoiceActivityDetector::Impl {
Init();
}
#if __ANDROID_API__ >= 9
Impl(AAssetManager *mgr, const VadModelConfig &config,
#if __ANDROID_API__ >= 9 || defined(__OHOS__)
template <typename Manager>
Impl(Manager *mgr, const VadModelConfig &config,
float buffer_size_in_seconds = 60)
: model_(VadModel::Create(mgr, config)),
config_(config),
@@ -184,6 +185,13 @@ VoiceActivityDetector::VoiceActivityDetector(
: impl_(std::make_unique<Impl>(mgr, config, buffer_size_in_seconds)) {}
#endif
#if __OHOS__
VoiceActivityDetector::VoiceActivityDetector(
NativeResourceManager *mgr, const VadModelConfig &config,
float buffer_size_in_seconds /*= 60*/)
: impl_(std::make_unique<Impl>(mgr, config, buffer_size_in_seconds)) {}
#endif
VoiceActivityDetector::~VoiceActivityDetector() = default;
void VoiceActivityDetector::AcceptWaveform(const float *samples, int32_t n) {

View File

@@ -12,6 +12,10 @@
#include "android/asset_manager_jni.h"
#endif
#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif
#include "sherpa-onnx/csrc/vad-model-config.h"
namespace sherpa_onnx {
@@ -31,6 +35,12 @@ class VoiceActivityDetector {
float buffer_size_in_seconds = 60);
#endif
#if __OHOS__
VoiceActivityDetector(NativeResourceManager *mgr,
const VadModelConfig &config,
float buffer_size_in_seconds = 60);
#endif
~VoiceActivityDetector();
void AcceptWaveform(const float *samples, int32_t n);