diff --git a/.github/workflows/c-api.yaml b/.github/workflows/c-api.yaml index 44379769..4a4108c9 100644 --- a/.github/workflows/c-api.yaml +++ b/.github/workflows/c-api.yaml @@ -100,6 +100,27 @@ jobs: rm ./kws-c-api rm -rf sherpa-onnx-kws-* + - name: Test Kokoro TTS (zh+en) + shell: bash + run: | + gcc -o kokoro-tts-zh-en-c-api ./c-api-examples/kokoro-tts-zh-en-c-api.c \ + -I ./build/install/include \ + -L ./build/install/lib/ \ + -l sherpa-onnx-c-api \ + -l onnxruntime + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2 + tar xf kokoro-multi-lang-v1_0.tar.bz2 + rm kokoro-multi-lang-v1_0.tar.bz2 + + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH + + ./kokoro-tts-zh-en-c-api + + rm ./kokoro-tts-zh-en-c-api + rm -rf kokoro-zh-en-* + - name: Test Kokoro TTS (en) shell: bash run: | diff --git a/c-api-examples/CMakeLists.txt b/c-api-examples/CMakeLists.txt index 3db3f253..44c5814a 100644 --- a/c-api-examples/CMakeLists.txt +++ b/c-api-examples/CMakeLists.txt @@ -19,6 +19,9 @@ if(SHERPA_ONNX_ENABLE_TTS) add_executable(kokoro-tts-en-c-api kokoro-tts-en-c-api.c) target_link_libraries(kokoro-tts-en-c-api sherpa-onnx-c-api) + + add_executable(kokoro-tts-zh-en-c-api kokoro-tts-zh-en-c-api.c) + target_link_libraries(kokoro-tts-zh-en-c-api sherpa-onnx-c-api) endif() if(SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION) diff --git a/c-api-examples/kokoro-tts-zh-en-c-api.c b/c-api-examples/kokoro-tts-zh-en-c-api.c new file mode 100644 index 00000000..4d998fb7 --- /dev/null +++ b/c-api-examples/kokoro-tts-zh-en-c-api.c @@ -0,0 +1,82 @@ +// c-api-examples/kokoro-tts-zh-en-c-api.c +// +// Copyright (c) 2025 Xiaomi Corporation + +// This file shows how to use sherpa-onnx C API +// for English + Chinese TTS with Kokoro. +// +// clang-format off +/* +Usage + + +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2 +tar xf kokoro-multi-lang-v1_0.tar.bz2 +rm kokoro-multi-lang-v1_0.tar.bz2 + +./kokoro-tts-zh-en-c-api + + */ +// clang-format on + +#include +#include +#include + +#include "sherpa-onnx/c-api/c-api.h" + +static int32_t ProgressCallback(const float *samples, int32_t num_samples, + float progress) { + fprintf(stderr, "Progress: %.3f%%\n", progress * 100); + // return 1 to continue generating + // return 0 to stop generating + return 1; +} + +int32_t main(int32_t argc, char *argv[]) { + SherpaOnnxOfflineTtsConfig config; + memset(&config, 0, sizeof(config)); + config.model.kokoro.model = "./kokoro-multi-lang-v1_0/model.onnx"; + config.model.kokoro.voices = "./kokoro-multi-lang-v1_0/voices.bin"; + config.model.kokoro.tokens = "./kokoro-multi-lang-v1_0/tokens.txt"; + config.model.kokoro.data_dir = "./kokoro-multi-lang-v1_0/espeak-ng-data"; + config.model.kokoro.dict_dir = "./kokoro-multi-lang-v1_0/dict"; + config.model.kokoro.lexicon = + "./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/" + "lexicon-zh.txt"; + + config.model.num_threads = 2; + + // If you don't want to see debug messages, please set it to 0 + config.model.debug = 1; + + const char *filename = "./generated-kokoro-zh-en.wav"; + const char *text = + "中英文语音合成测试。This is generated by next generation Kaldi using " + "Kokoro without Misaki. 你觉得中英文说的如何呢?"; + + const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config); + int32_t sid = 0; // there are 53 speakers + float speed = 1.0; // larger -> faster in speech speed + +#if 0 + // If you don't want to use a callback, then please enable this branch + const SherpaOnnxGeneratedAudio *audio = + SherpaOnnxOfflineTtsGenerate(tts, text, sid, speed); +#else + const SherpaOnnxGeneratedAudio *audio = + SherpaOnnxOfflineTtsGenerateWithProgressCallback(tts, text, sid, speed, + ProgressCallback); +#endif + + SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename); + + SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio); + SherpaOnnxDestroyOfflineTts(tts); + + fprintf(stderr, "Input text is: %s\n", text); + fprintf(stderr, "Speaker ID is is: %d\n", sid); + fprintf(stderr, "Saved to: %s\n", filename); + + return 0; +} diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index 78701cf7..b6a2c9a8 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -1120,6 +1120,10 @@ static sherpa_onnx::OfflineTtsConfig GetOfflineTtsConfig( SHERPA_ONNX_OR(config->model.kokoro.data_dir, ""); tts_config.model.kokoro.length_scale = SHERPA_ONNX_OR(config->model.kokoro.length_scale, 1.0); + tts_config.model.kokoro.dict_dir = + SHERPA_ONNX_OR(config->model.kokoro.dict_dir, ""); + tts_config.model.kokoro.lexicon = + SHERPA_ONNX_OR(config->model.kokoro.lexicon, ""); tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1); tts_config.model.debug = config->model.debug; diff --git a/sherpa-onnx/c-api/c-api.h b/sherpa-onnx/c-api/c-api.h index 5fe124d4..cabfc15c 100644 --- a/sherpa-onnx/c-api/c-api.h +++ b/sherpa-onnx/c-api/c-api.h @@ -926,6 +926,8 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsKokoroModelConfig { const char *data_dir; float length_scale; // < 1, faster in speech speed; > 1, slower in speed + const char *dict_dir; + const char *lexicon; } SherpaOnnxOfflineTtsKokoroModelConfig; SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsModelConfig { diff --git a/sherpa-onnx/csrc/offline-tts-kokoro-impl.h b/sherpa-onnx/csrc/offline-tts-kokoro-impl.h index 510f031c..416cadce 100644 --- a/sherpa-onnx/csrc/offline-tts-kokoro-impl.h +++ b/sherpa-onnx/csrc/offline-tts-kokoro-impl.h @@ -4,6 +4,8 @@ #ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_IMPL_H_ #define SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_IMPL_H_ +#include +#include #include #include #include @@ -188,6 +190,20 @@ class OfflineTtsKokoroImpl : public OfflineTtsImpl { SHERPA_ONNX_LOGE("Raw text: %{public}s", text.c_str()); #else SHERPA_ONNX_LOGE("Raw text: %s", text.c_str()); +#endif + std::ostringstream os; + os << "In bytes (hex):\n"; + const auto p = reinterpret_cast(text.c_str()); + for (int32_t i = 0; i != text.size(); ++i) { + os << std::setw(2) << std::setfill('0') << std::hex + << static_cast(p[i]) << " "; + } + os << "\n"; + +#if __OHOS__ + SHERPA_ONNX_LOGE("%{public}s", os.str().c_str()); +#else + SHERPA_ONNX_LOGE("%s", os.str().c_str()); #endif }