Support including TTS conditionally. (#699)
This commit is contained in:
@@ -15,13 +15,16 @@
|
||||
#include "sherpa-onnx/csrc/keyword-spotter.h"
|
||||
#include "sherpa-onnx/csrc/macros.h"
|
||||
#include "sherpa-onnx/csrc/offline-recognizer.h"
|
||||
#include "sherpa-onnx/csrc/offline-tts.h"
|
||||
#include "sherpa-onnx/csrc/online-recognizer.h"
|
||||
#include "sherpa-onnx/csrc/spoken-language-identification.h"
|
||||
#include "sherpa-onnx/csrc/voice-activity-detector.h"
|
||||
#include "sherpa-onnx/csrc/wave-reader.h"
|
||||
#include "sherpa-onnx/csrc/wave-writer.h"
|
||||
|
||||
#if SHERPA_ONNX_ENABLE_TTS == 1
|
||||
#include "sherpa-onnx/csrc/offline-tts.h"
|
||||
#endif
|
||||
|
||||
struct SherpaOnnxOnlineRecognizer {
|
||||
std::unique_ptr<sherpa_onnx::OnlineRecognizer> impl;
|
||||
};
|
||||
@@ -742,6 +745,7 @@ void SherpaOnnxVoiceActivityDetectorReset(SherpaOnnxVoiceActivityDetector *p) {
|
||||
p->impl->Reset();
|
||||
}
|
||||
|
||||
#if SHERPA_ONNX_ENABLE_TTS == 1
|
||||
struct SherpaOnnxOfflineTts {
|
||||
std::unique_ptr<sherpa_onnx::OfflineTts> impl;
|
||||
};
|
||||
@@ -857,6 +861,7 @@ void SherpaOnnxDestroyOfflineTtsGeneratedAudio(
|
||||
delete p;
|
||||
}
|
||||
}
|
||||
#endif // SHERPA_ONNX_ENABLE_TTS == 1
|
||||
|
||||
int32_t SherpaOnnxWriteWave(const float *samples, int32_t n,
|
||||
int32_t sample_rate, const char *filename) {
|
||||
|
||||
@@ -43,7 +43,6 @@ set(sources
|
||||
offline-transducer-model-config.cc
|
||||
offline-transducer-model.cc
|
||||
offline-transducer-modified-beam-search-decoder.cc
|
||||
offline-tts-character-frontend.cc
|
||||
offline-wenet-ctc-model-config.cc
|
||||
offline-wenet-ctc-model.cc
|
||||
offline-whisper-greedy-search-decoder.cc
|
||||
@@ -79,7 +78,6 @@ set(sources
|
||||
packed-sequence.cc
|
||||
pad-sequence.cc
|
||||
parse-options.cc
|
||||
piper-phonemize-lexicon.cc
|
||||
provider.cc
|
||||
resample.cc
|
||||
session.cc
|
||||
@@ -99,6 +97,7 @@ set(sources
|
||||
vad-model.cc
|
||||
voice-activity-detector.cc
|
||||
wave-reader.cc
|
||||
wave-writer.cc
|
||||
)
|
||||
|
||||
# speaker embedding extractor
|
||||
@@ -110,15 +109,18 @@ list(APPEND sources
|
||||
speaker-embedding-manager.cc
|
||||
)
|
||||
|
||||
list(APPEND sources
|
||||
lexicon.cc
|
||||
offline-tts-impl.cc
|
||||
offline-tts-model-config.cc
|
||||
offline-tts-vits-model-config.cc
|
||||
offline-tts-vits-model.cc
|
||||
offline-tts.cc
|
||||
wave-writer.cc
|
||||
)
|
||||
if(SHERPA_ONNX_ENABLE_TTS)
|
||||
list(APPEND sources
|
||||
lexicon.cc
|
||||
offline-tts-character-frontend.cc
|
||||
offline-tts-impl.cc
|
||||
offline-tts-model-config.cc
|
||||
offline-tts-vits-model-config.cc
|
||||
offline-tts-vits-model.cc
|
||||
offline-tts.cc
|
||||
piper-phonemize-lexicon.cc
|
||||
)
|
||||
endif()
|
||||
|
||||
if(SHERPA_ONNX_ENABLE_CHECK)
|
||||
list(APPEND sources log.cc)
|
||||
@@ -130,14 +132,21 @@ if(APPLE)
|
||||
)
|
||||
endif()
|
||||
|
||||
|
||||
if(ANDROID_NDK)
|
||||
target_link_libraries(sherpa-onnx-core android log)
|
||||
endif()
|
||||
|
||||
target_link_libraries(sherpa-onnx-core kaldi-native-fbank-core)
|
||||
target_link_libraries(sherpa-onnx-core
|
||||
kaldi-native-fbank-core
|
||||
kaldi-decoder-core
|
||||
)
|
||||
|
||||
target_link_libraries(sherpa-onnx-core kaldi-decoder-core)
|
||||
if(SHERPA_ONNX_ENABLE_GPU)
|
||||
target_link_libraries(sherpa-onnx-core
|
||||
onnxruntime_providers_cuda
|
||||
onnxruntime_providers_shared
|
||||
)
|
||||
endif()
|
||||
|
||||
if(BUILD_SHARED_LIBS)
|
||||
target_link_libraries(sherpa-onnx-core onnxruntime)
|
||||
@@ -151,15 +160,10 @@ if(NOT BUILD_SHARED_LIBS AND APPLE)
|
||||
target_link_libraries(sherpa-onnx-core "-framework Foundation")
|
||||
endif()
|
||||
|
||||
if(SHERPA_ONNX_ENABLE_GPU)
|
||||
target_link_libraries(sherpa-onnx-core
|
||||
onnxruntime_providers_cuda
|
||||
onnxruntime_providers_shared
|
||||
)
|
||||
if(SHERPA_ONNX_ENABLE_TTS)
|
||||
target_link_libraries(sherpa-onnx-core piper_phonemize)
|
||||
endif()
|
||||
|
||||
target_link_libraries(sherpa-onnx-core piper_phonemize)
|
||||
|
||||
if(SHERPA_ONNX_ENABLE_CHECK)
|
||||
target_compile_definitions(sherpa-onnx-core PUBLIC SHERPA_ONNX_ENABLE_CHECK=1)
|
||||
|
||||
@@ -185,17 +189,24 @@ if(SHERPA_ONNX_ENABLE_BINARY)
|
||||
add_executable(sherpa-onnx-keyword-spotter sherpa-onnx-keyword-spotter.cc)
|
||||
add_executable(sherpa-onnx-offline sherpa-onnx-offline.cc)
|
||||
add_executable(sherpa-onnx-offline-parallel sherpa-onnx-offline-parallel.cc)
|
||||
add_executable(sherpa-onnx-offline-tts sherpa-onnx-offline-tts.cc)
|
||||
add_executable(sherpa-onnx-offline-language-identification sherpa-onnx-offline-language-identification.cc)
|
||||
|
||||
if(SHERPA_ONNX_ENABLE_TTS)
|
||||
add_executable(sherpa-onnx-offline-tts sherpa-onnx-offline-tts.cc)
|
||||
endif()
|
||||
|
||||
set(main_exes
|
||||
sherpa-onnx
|
||||
sherpa-onnx-keyword-spotter
|
||||
sherpa-onnx-offline
|
||||
sherpa-onnx-offline-parallel
|
||||
sherpa-onnx-offline-tts
|
||||
sherpa-onnx-offline-language-identification
|
||||
)
|
||||
if(SHERPA_ONNX_ENABLE_TTS)
|
||||
list(APPEND main_exes
|
||||
sherpa-onnx-offline-tts
|
||||
)
|
||||
endif()
|
||||
|
||||
foreach(exe IN LISTS main_exes)
|
||||
target_link_libraries(${exe} sherpa-onnx-core)
|
||||
@@ -235,17 +246,27 @@ endif()
|
||||
if(SHERPA_ONNX_HAS_ALSA AND SHERPA_ONNX_ENABLE_BINARY)
|
||||
add_executable(sherpa-onnx-alsa sherpa-onnx-alsa.cc alsa.cc)
|
||||
add_executable(sherpa-onnx-keyword-spotter-alsa sherpa-onnx-keyword-spotter-alsa.cc alsa.cc)
|
||||
add_executable(sherpa-onnx-offline-tts-play-alsa sherpa-onnx-offline-tts-play-alsa.cc alsa-play.cc)
|
||||
add_executable(sherpa-onnx-alsa-offline sherpa-onnx-alsa-offline.cc alsa.cc)
|
||||
add_executable(sherpa-onnx-alsa-offline-speaker-identification sherpa-onnx-alsa-offline-speaker-identification.cc alsa.cc)
|
||||
|
||||
|
||||
if(SHERPA_ONNX_ENABLE_TTS)
|
||||
add_executable(sherpa-onnx-offline-tts-play-alsa sherpa-onnx-offline-tts-play-alsa.cc alsa-play.cc)
|
||||
endif()
|
||||
|
||||
set(exes
|
||||
sherpa-onnx-alsa
|
||||
sherpa-onnx-keyword-spotter-alsa
|
||||
sherpa-onnx-alsa-offline
|
||||
sherpa-onnx-offline-tts-play-alsa
|
||||
sherpa-onnx-alsa-offline-speaker-identification
|
||||
)
|
||||
|
||||
if(SHERPA_ONNX_ENABLE_TTS)
|
||||
list(APPEND exes
|
||||
sherpa-onnx-offline-tts-play-alsa
|
||||
)
|
||||
endif()
|
||||
|
||||
foreach(exe IN LISTS exes)
|
||||
target_link_libraries(${exe} sherpa-onnx-core)
|
||||
endforeach()
|
||||
@@ -279,10 +300,12 @@ if(SHERPA_ONNX_HAS_ALSA AND SHERPA_ONNX_ENABLE_BINARY)
|
||||
endif()
|
||||
|
||||
if(SHERPA_ONNX_ENABLE_PORTAUDIO AND SHERPA_ONNX_ENABLE_BINARY)
|
||||
add_executable(sherpa-onnx-offline-tts-play
|
||||
sherpa-onnx-offline-tts-play.cc
|
||||
microphone.cc
|
||||
)
|
||||
if(SHERPA_ONNX_ENABLE_TTS)
|
||||
add_executable(sherpa-onnx-offline-tts-play
|
||||
sherpa-onnx-offline-tts-play.cc
|
||||
microphone.cc
|
||||
)
|
||||
endif()
|
||||
|
||||
add_executable(sherpa-onnx-keyword-spotter-microphone
|
||||
sherpa-onnx-keyword-spotter-microphone.cc
|
||||
@@ -325,10 +348,15 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO AND SHERPA_ONNX_ENABLE_BINARY)
|
||||
sherpa-onnx-keyword-spotter-microphone
|
||||
sherpa-onnx-microphone-offline
|
||||
sherpa-onnx-microphone-offline-speaker-identification
|
||||
sherpa-onnx-offline-tts-play
|
||||
sherpa-onnx-vad-microphone
|
||||
sherpa-onnx-vad-microphone-offline-asr
|
||||
)
|
||||
if(SHERPA_ONNX_ENABLE_TTS)
|
||||
list(APPEND exes
|
||||
sherpa-onnx-offline-tts-play
|
||||
)
|
||||
endif()
|
||||
|
||||
foreach(exe IN LISTS exes)
|
||||
target_link_libraries(${exe} ${PA_LIB} sherpa-onnx-core)
|
||||
endforeach()
|
||||
@@ -369,10 +397,8 @@ if(SHERPA_ONNX_ENABLE_WEBSOCKET AND SHERPA_ONNX_ENABLE_BINARY)
|
||||
target_link_libraries(sherpa-onnx-online-websocket-client sherpa-onnx-core)
|
||||
|
||||
if(NOT WIN32)
|
||||
target_link_libraries(sherpa-onnx-online-websocket-server -pthread)
|
||||
target_compile_options(sherpa-onnx-online-websocket-server PRIVATE -Wno-deprecated-declarations)
|
||||
|
||||
target_link_libraries(sherpa-onnx-online-websocket-client -pthread)
|
||||
target_compile_options(sherpa-onnx-online-websocket-client PRIVATE -Wno-deprecated-declarations)
|
||||
endif()
|
||||
|
||||
@@ -384,7 +410,6 @@ if(SHERPA_ONNX_ENABLE_WEBSOCKET AND SHERPA_ONNX_ENABLE_BINARY)
|
||||
target_link_libraries(sherpa-onnx-offline-websocket-server sherpa-onnx-core)
|
||||
|
||||
if(NOT WIN32)
|
||||
target_link_libraries(sherpa-onnx-offline-websocket-server -pthread)
|
||||
target_compile_options(sherpa-onnx-offline-websocket-server PRIVATE -Wno-deprecated-declarations)
|
||||
endif()
|
||||
|
||||
@@ -422,13 +447,17 @@ if(SHERPA_ONNX_ENABLE_TESTS)
|
||||
context-graph-test.cc
|
||||
packed-sequence-test.cc
|
||||
pad-sequence-test.cc
|
||||
piper-phonemize-test.cc
|
||||
slice-test.cc
|
||||
stack-test.cc
|
||||
transpose-test.cc
|
||||
unbind-test.cc
|
||||
utfcpp-test.cc
|
||||
)
|
||||
if(SHERPA_ONNX_ENABLE_TTS)
|
||||
list(APPEND sherpa_onnx_test_srcs
|
||||
piper-phonemize-test.cc
|
||||
)
|
||||
endif()
|
||||
|
||||
list(APPEND sherpa_onnx_test_srcs
|
||||
speaker-embedding-manager-test.cc
|
||||
|
||||
@@ -24,7 +24,6 @@
|
||||
#include "sherpa-onnx/csrc/keyword-spotter.h"
|
||||
#include "sherpa-onnx/csrc/macros.h"
|
||||
#include "sherpa-onnx/csrc/offline-recognizer.h"
|
||||
#include "sherpa-onnx/csrc/offline-tts.h"
|
||||
#include "sherpa-onnx/csrc/online-recognizer.h"
|
||||
#include "sherpa-onnx/csrc/onnx-utils.h"
|
||||
#include "sherpa-onnx/csrc/speaker-embedding-extractor.h"
|
||||
@@ -33,6 +32,10 @@
|
||||
#include "sherpa-onnx/csrc/wave-reader.h"
|
||||
#include "sherpa-onnx/csrc/wave-writer.h"
|
||||
|
||||
#if SHERPA_ONNX_ENABLE_TTS == 1
|
||||
#include "sherpa-onnx/csrc/offline-tts.h"
|
||||
#endif
|
||||
|
||||
#define SHERPA_ONNX_EXTERN_C extern "C"
|
||||
|
||||
namespace sherpa_onnx {
|
||||
@@ -629,8 +632,8 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) {
|
||||
env->ReleaseStringUTFChars(s, p);
|
||||
|
||||
fid = env->GetFieldID(whisper_config_cls, "tailPaddings", "I");
|
||||
ans.model_config.whisper.tail_paddings = env->GetIntField(whisper_config,
|
||||
fid);
|
||||
ans.model_config.whisper.tail_paddings =
|
||||
env->GetIntField(whisper_config, fid);
|
||||
|
||||
return ans;
|
||||
}
|
||||
@@ -782,6 +785,7 @@ static VadModelConfig GetVadModelConfig(JNIEnv *env, jobject config) {
|
||||
return ans;
|
||||
}
|
||||
|
||||
#if SHERPA_ONNX_ENABLE_TTS == 1
|
||||
class SherpaOnnxOfflineTts {
|
||||
public:
|
||||
#if __ANDROID_API__ >= 9
|
||||
@@ -878,6 +882,7 @@ static OfflineTtsConfig GetOfflineTtsConfig(JNIEnv *env, jobject config) {
|
||||
|
||||
return ans;
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
|
||||
@@ -1209,6 +1214,15 @@ Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_allSpeakerNames(
|
||||
return obj_arr;
|
||||
}
|
||||
|
||||
// see
|
||||
// https://stackoverflow.com/questions/29043872/android-jni-return-multiple-variables
|
||||
static jobject NewInteger(JNIEnv *env, int32_t value) {
|
||||
jclass cls = env->FindClass("java/lang/Integer");
|
||||
jmethodID constructor = env->GetMethodID(cls, "<init>", "(I)V");
|
||||
return env->NewObject(cls, constructor, value);
|
||||
}
|
||||
|
||||
#if SHERPA_ONNX_ENABLE_TTS == 1
|
||||
SHERPA_ONNX_EXTERN_C
|
||||
JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_new(
|
||||
JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) {
|
||||
@@ -1265,14 +1279,6 @@ JNIEXPORT jint JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_getNumSpeakers(
|
||||
->NumSpeakers();
|
||||
}
|
||||
|
||||
// see
|
||||
// https://stackoverflow.com/questions/29043872/android-jni-return-multiple-variables
|
||||
static jobject NewInteger(JNIEnv *env, int32_t value) {
|
||||
jclass cls = env->FindClass("java/lang/Integer");
|
||||
jmethodID constructor = env->GetMethodID(cls, "<init>", "(I)V");
|
||||
return env->NewObject(cls, constructor, value);
|
||||
}
|
||||
|
||||
SHERPA_ONNX_EXTERN_C
|
||||
JNIEXPORT jobjectArray JNICALL
|
||||
Java_com_k2fsa_sherpa_onnx_OfflineTts_generateImpl(JNIEnv *env, jobject /*obj*/,
|
||||
@@ -1336,6 +1342,7 @@ Java_com_k2fsa_sherpa_onnx_OfflineTts_generateWithCallbackImpl(
|
||||
|
||||
return obj_arr;
|
||||
}
|
||||
#endif
|
||||
|
||||
SHERPA_ONNX_EXTERN_C
|
||||
JNIEXPORT jboolean JNICALL Java_com_k2fsa_sherpa_onnx_GeneratedAudio_saveImpl(
|
||||
|
||||
@@ -15,9 +15,6 @@ set(srcs
|
||||
offline-stream.cc
|
||||
offline-tdnn-model-config.cc
|
||||
offline-transducer-model-config.cc
|
||||
offline-tts-model-config.cc
|
||||
offline-tts-vits-model-config.cc
|
||||
offline-tts.cc
|
||||
offline-wenet-ctc-model-config.cc
|
||||
offline-whisper-model-config.cc
|
||||
offline-zipformer-ctc-model-config.cc
|
||||
@@ -44,6 +41,14 @@ else()
|
||||
list(APPEND srcs faked-alsa.cc)
|
||||
endif()
|
||||
|
||||
if(SHERPA_ONNX_ENABLE_TTS)
|
||||
list(APPEND srcs
|
||||
offline-tts-model-config.cc
|
||||
offline-tts-vits-model-config.cc
|
||||
offline-tts.cc
|
||||
)
|
||||
endif()
|
||||
|
||||
pybind11_add_module(_sherpa_onnx ${srcs})
|
||||
|
||||
if(APPLE)
|
||||
|
||||
@@ -15,7 +15,6 @@
|
||||
#include "sherpa-onnx/python/csrc/offline-model-config.h"
|
||||
#include "sherpa-onnx/python/csrc/offline-recognizer.h"
|
||||
#include "sherpa-onnx/python/csrc/offline-stream.h"
|
||||
#include "sherpa-onnx/python/csrc/offline-tts.h"
|
||||
#include "sherpa-onnx/python/csrc/online-lm-config.h"
|
||||
#include "sherpa-onnx/python/csrc/online-model-config.h"
|
||||
#include "sherpa-onnx/python/csrc/online-recognizer.h"
|
||||
@@ -27,6 +26,10 @@
|
||||
#include "sherpa-onnx/python/csrc/vad-model.h"
|
||||
#include "sherpa-onnx/python/csrc/voice-activity-detector.h"
|
||||
|
||||
#if SHERPA_ONNX_ENABLE_TTS == 1
|
||||
#include "sherpa-onnx/python/csrc/offline-tts.h"
|
||||
#endif
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
PYBIND11_MODULE(_sherpa_onnx, m) {
|
||||
@@ -53,7 +56,10 @@ PYBIND11_MODULE(_sherpa_onnx, m) {
|
||||
PybindCircularBuffer(&m);
|
||||
PybindVoiceActivityDetector(&m);
|
||||
|
||||
#if SHERPA_ONNX_ENABLE_TTS == 1
|
||||
PybindOfflineTts(&m);
|
||||
#endif
|
||||
|
||||
PybindSpeakerEmbeddingExtractor(&m);
|
||||
PybindSpeakerEmbeddingManager(&m);
|
||||
PybindSpokenLanguageIdentification(&m);
|
||||
|
||||
Reference in New Issue
Block a user