Use piper-phonemize to convert text to token IDs (#453)

2023-11-30 23:57:43 +08:00
parent db41778e99
commit 62dc3c3e46
55 changed files with 1048 additions and 192 deletions
--- a/sherpa-onnx/csrc/offline-tts-frontend.h
+++ b/sherpa-onnx/csrc/offline-tts-frontend.h
@@ -0,0 +1,35 @@
+// sherpa-onnx/csrc/offline-tts-frontend.h
+//
+// Copyright (c)  2023  Xiaomi Corporation
+
+#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_FRONTEND_H_
+#define SHERPA_ONNX_CSRC_OFFLINE_TTS_FRONTEND_H_
+#include <cstdint>
+#include <string>
+#include <vector>
+
+namespace sherpa_onnx {
+
+class OfflineTtsFrontend {
+ public:
+  virtual ~OfflineTtsFrontend() = default;
+
+  /** Convert a string to token IDs.
+   *
+   * @param text The input text.
+   *             Example 1: "This is the first sample sentence; this is the
+   *             second one." Example 2: "这是第一句。这是第二句。"
+   * @param voice Optional. It is for espeak-ng.
+   *
+   * @return Return a vector-of-vector of token IDs. Each subvector contains
+   *         a sentence that can be processed independently.
+   *         If a frontend does not support splitting the text into sentences,
+   *         the resulting vector contains only one subvector.
+   */
+  virtual std::vector<std::vector<int64_t>> ConvertTextToTokenIds(
+      const std::string &text, const std::string &voice = "") const = 0;
+};
+
+}  // namespace sherpa_onnx
+
+#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_FRONTEND_H_