update README
This commit is contained in:
24596
mlu_370-piper/piper/src/cpp/json.hpp
Normal file
24596
mlu_370-piper/piper/src/cpp/json.hpp
Normal file
File diff suppressed because it is too large
Load Diff
561
mlu_370-piper/piper/src/cpp/main.cpp
Normal file
561
mlu_370-piper/piper/src/cpp/main.cpp
Normal file
@@ -0,0 +1,561 @@
|
||||
#include <chrono>
|
||||
#include <condition_variable>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#define NOMINMAX
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <fcntl.h>
|
||||
#include <io.h>
|
||||
#endif
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <mach-o/dyld.h>
|
||||
#endif
|
||||
|
||||
#include <spdlog/sinks/stdout_color_sinks.h>
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include "json.hpp"
|
||||
#include "piper.hpp"
|
||||
|
||||
using namespace std;
|
||||
using json = nlohmann::json;
|
||||
|
||||
enum OutputType { OUTPUT_FILE, OUTPUT_DIRECTORY, OUTPUT_STDOUT, OUTPUT_RAW };
|
||||
|
||||
struct RunConfig {
|
||||
// Path to .onnx voice file
|
||||
filesystem::path modelPath;
|
||||
|
||||
// Path to JSON voice config file
|
||||
filesystem::path modelConfigPath;
|
||||
|
||||
// Type of output to produce.
|
||||
// Default is to write a WAV file in the current directory.
|
||||
OutputType outputType = OUTPUT_DIRECTORY;
|
||||
|
||||
// Path for output
|
||||
optional<filesystem::path> outputPath = filesystem::path(".");
|
||||
|
||||
// Numerical id of the default speaker (multi-speaker voices)
|
||||
optional<piper::SpeakerId> speakerId;
|
||||
|
||||
// Amount of noise to add during audio generation
|
||||
optional<float> noiseScale;
|
||||
|
||||
// Speed of speaking (1 = normal, < 1 is faster, > 1 is slower)
|
||||
optional<float> lengthScale;
|
||||
|
||||
// Variation in phoneme lengths
|
||||
optional<float> noiseW;
|
||||
|
||||
// Seconds of silence to add after each sentence
|
||||
optional<float> sentenceSilenceSeconds;
|
||||
|
||||
// Path to espeak-ng data directory (default is next to piper executable)
|
||||
optional<filesystem::path> eSpeakDataPath;
|
||||
|
||||
// Path to libtashkeel ort model
|
||||
// https://github.com/mush42/libtashkeel/
|
||||
optional<filesystem::path> tashkeelModelPath;
|
||||
|
||||
// stdin input is lines of JSON instead of text with format:
|
||||
// {
|
||||
// "text": str, (required)
|
||||
// "speaker_id": int, (optional)
|
||||
// "speaker": str, (optional)
|
||||
// "output_file": str, (optional)
|
||||
// }
|
||||
bool jsonInput = false;
|
||||
|
||||
// Seconds of extra silence to insert after a single phoneme
|
||||
optional<std::map<piper::Phoneme, float>> phonemeSilenceSeconds;
|
||||
|
||||
// true to use CUDA execution provider
|
||||
bool useCuda = false;
|
||||
};
|
||||
|
||||
void parseArgs(int argc, char *argv[], RunConfig &runConfig);
|
||||
void rawOutputProc(vector<int16_t> &sharedAudioBuffer, mutex &mutAudio,
|
||||
condition_variable &cvAudio, bool &audioReady,
|
||||
bool &audioFinished);
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
spdlog::set_default_logger(spdlog::stderr_color_st("piper"));
|
||||
|
||||
RunConfig runConfig;
|
||||
parseArgs(argc, argv, runConfig);
|
||||
|
||||
#ifdef _WIN32
|
||||
// Required on Windows to show IPA symbols
|
||||
SetConsoleOutputCP(CP_UTF8);
|
||||
#endif
|
||||
|
||||
piper::PiperConfig piperConfig;
|
||||
piper::Voice voice;
|
||||
|
||||
spdlog::debug("Loading voice from {} (config={})",
|
||||
runConfig.modelPath.string(),
|
||||
runConfig.modelConfigPath.string());
|
||||
|
||||
auto startTime = chrono::steady_clock::now();
|
||||
loadVoice(piperConfig, runConfig.modelPath.string(),
|
||||
runConfig.modelConfigPath.string(), voice, runConfig.speakerId,
|
||||
runConfig.useCuda);
|
||||
auto endTime = chrono::steady_clock::now();
|
||||
spdlog::info("Loaded voice in {} second(s)",
|
||||
chrono::duration<double>(endTime - startTime).count());
|
||||
|
||||
// Get the path to the piper executable so we can locate espeak-ng-data, etc.
|
||||
// next to it.
|
||||
#ifdef _MSC_VER
|
||||
auto exePath = []() {
|
||||
wchar_t moduleFileName[MAX_PATH] = {0};
|
||||
GetModuleFileNameW(nullptr, moduleFileName, std::size(moduleFileName));
|
||||
return filesystem::path(moduleFileName);
|
||||
}();
|
||||
#else
|
||||
#ifdef __APPLE__
|
||||
auto exePath = []() {
|
||||
char moduleFileName[PATH_MAX] = {0};
|
||||
uint32_t moduleFileNameSize = std::size(moduleFileName);
|
||||
_NSGetExecutablePath(moduleFileName, &moduleFileNameSize);
|
||||
return filesystem::path(moduleFileName);
|
||||
}();
|
||||
#else
|
||||
auto exePath = filesystem::canonical("/proc/self/exe");
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (voice.phonemizeConfig.phonemeType == piper::eSpeakPhonemes) {
|
||||
spdlog::debug("Voice uses eSpeak phonemes ({})",
|
||||
voice.phonemizeConfig.eSpeak.voice);
|
||||
|
||||
if (runConfig.eSpeakDataPath) {
|
||||
// User provided path
|
||||
piperConfig.eSpeakDataPath = runConfig.eSpeakDataPath.value().string();
|
||||
} else {
|
||||
// Assume next to piper executable
|
||||
piperConfig.eSpeakDataPath =
|
||||
std::filesystem::absolute(
|
||||
exePath.parent_path().append("espeak-ng-data"))
|
||||
.string();
|
||||
|
||||
spdlog::debug("espeak-ng-data directory is expected at {}",
|
||||
piperConfig.eSpeakDataPath);
|
||||
}
|
||||
} else {
|
||||
// Not using eSpeak
|
||||
piperConfig.useESpeak = false;
|
||||
}
|
||||
|
||||
// Enable libtashkeel for Arabic
|
||||
if (voice.phonemizeConfig.eSpeak.voice == "ar") {
|
||||
piperConfig.useTashkeel = true;
|
||||
if (runConfig.tashkeelModelPath) {
|
||||
// User provided path
|
||||
piperConfig.tashkeelModelPath =
|
||||
runConfig.tashkeelModelPath.value().string();
|
||||
} else {
|
||||
// Assume next to piper executable
|
||||
piperConfig.tashkeelModelPath =
|
||||
std::filesystem::absolute(
|
||||
exePath.parent_path().append("libtashkeel_model.ort"))
|
||||
.string();
|
||||
|
||||
spdlog::debug("libtashkeel model is expected at {}",
|
||||
piperConfig.tashkeelModelPath.value());
|
||||
}
|
||||
}
|
||||
|
||||
piper::initialize(piperConfig);
|
||||
|
||||
// Scales
|
||||
if (runConfig.noiseScale) {
|
||||
voice.synthesisConfig.noiseScale = runConfig.noiseScale.value();
|
||||
}
|
||||
|
||||
if (runConfig.lengthScale) {
|
||||
voice.synthesisConfig.lengthScale = runConfig.lengthScale.value();
|
||||
}
|
||||
|
||||
if (runConfig.noiseW) {
|
||||
voice.synthesisConfig.noiseW = runConfig.noiseW.value();
|
||||
}
|
||||
|
||||
if (runConfig.sentenceSilenceSeconds) {
|
||||
voice.synthesisConfig.sentenceSilenceSeconds =
|
||||
runConfig.sentenceSilenceSeconds.value();
|
||||
}
|
||||
|
||||
if (runConfig.phonemeSilenceSeconds) {
|
||||
if (!voice.synthesisConfig.phonemeSilenceSeconds) {
|
||||
// Overwrite
|
||||
voice.synthesisConfig.phonemeSilenceSeconds =
|
||||
runConfig.phonemeSilenceSeconds;
|
||||
} else {
|
||||
// Merge
|
||||
for (const auto &[phoneme, silenceSeconds] :
|
||||
*runConfig.phonemeSilenceSeconds) {
|
||||
voice.synthesisConfig.phonemeSilenceSeconds->try_emplace(
|
||||
phoneme, silenceSeconds);
|
||||
}
|
||||
}
|
||||
|
||||
} // if phonemeSilenceSeconds
|
||||
|
||||
if (runConfig.outputType == OUTPUT_DIRECTORY) {
|
||||
runConfig.outputPath = filesystem::absolute(runConfig.outputPath.value());
|
||||
spdlog::info("Output directory: {}", runConfig.outputPath.value().string());
|
||||
}
|
||||
|
||||
string line;
|
||||
piper::SynthesisResult result;
|
||||
while (getline(cin, line)) {
|
||||
auto outputType = runConfig.outputType;
|
||||
auto speakerId = voice.synthesisConfig.speakerId;
|
||||
std::optional<filesystem::path> maybeOutputPath = runConfig.outputPath;
|
||||
|
||||
if (runConfig.jsonInput) {
|
||||
// Each line is a JSON object
|
||||
json lineRoot = json::parse(line);
|
||||
|
||||
// Text is required
|
||||
line = lineRoot["text"].get<std::string>();
|
||||
|
||||
if (lineRoot.contains("output_file")) {
|
||||
// Override output WAV file path
|
||||
outputType = OUTPUT_FILE;
|
||||
maybeOutputPath =
|
||||
filesystem::path(lineRoot["output_file"].get<std::string>());
|
||||
}
|
||||
|
||||
if (lineRoot.contains("speaker_id")) {
|
||||
// Override speaker id
|
||||
voice.synthesisConfig.speakerId =
|
||||
lineRoot["speaker_id"].get<piper::SpeakerId>();
|
||||
} else if (lineRoot.contains("speaker")) {
|
||||
// Resolve to id using speaker id map
|
||||
auto speakerName = lineRoot["speaker"].get<std::string>();
|
||||
if ((voice.modelConfig.speakerIdMap) &&
|
||||
(voice.modelConfig.speakerIdMap->count(speakerName) > 0)) {
|
||||
voice.synthesisConfig.speakerId =
|
||||
(*voice.modelConfig.speakerIdMap)[speakerName];
|
||||
} else {
|
||||
spdlog::warn("No speaker named: {}", speakerName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Timestamp is used for path to output WAV file
|
||||
const auto now = chrono::system_clock::now();
|
||||
const auto timestamp =
|
||||
chrono::duration_cast<chrono::nanoseconds>(now.time_since_epoch())
|
||||
.count();
|
||||
|
||||
if (outputType == OUTPUT_DIRECTORY) {
|
||||
// Generate path using timestamp
|
||||
stringstream outputName;
|
||||
outputName << timestamp << ".wav";
|
||||
filesystem::path outputPath = runConfig.outputPath.value();
|
||||
outputPath.append(outputName.str());
|
||||
|
||||
// Output audio to automatically-named WAV file in a directory
|
||||
ofstream audioFile(outputPath.string(), ios::binary);
|
||||
piper::textToWavFile(piperConfig, voice, line, audioFile, result);
|
||||
cout << outputPath.string() << endl;
|
||||
} else if (outputType == OUTPUT_FILE) {
|
||||
if (!maybeOutputPath || maybeOutputPath->empty()) {
|
||||
throw runtime_error("No output path provided");
|
||||
}
|
||||
|
||||
filesystem::path outputPath = maybeOutputPath.value();
|
||||
|
||||
if (!runConfig.jsonInput) {
|
||||
// Read all of standard input before synthesizing.
|
||||
// Otherwise, we would overwrite the output file for each line.
|
||||
stringstream text;
|
||||
text << line;
|
||||
while (getline(cin, line)) {
|
||||
text << " " << line;
|
||||
}
|
||||
|
||||
line = text.str();
|
||||
}
|
||||
|
||||
// Output audio to WAV file
|
||||
ofstream audioFile(outputPath.string(), ios::binary);
|
||||
piper::textToWavFile(piperConfig, voice, line, audioFile, result);
|
||||
cout << outputPath.string() << endl;
|
||||
} else if (outputType == OUTPUT_STDOUT) {
|
||||
// Output WAV to stdout
|
||||
piper::textToWavFile(piperConfig, voice, line, cout, result);
|
||||
} else if (outputType == OUTPUT_RAW) {
|
||||
// Raw output to stdout
|
||||
mutex mutAudio;
|
||||
condition_variable cvAudio;
|
||||
bool audioReady = false;
|
||||
bool audioFinished = false;
|
||||
vector<int16_t> audioBuffer;
|
||||
vector<int16_t> sharedAudioBuffer;
|
||||
|
||||
#ifdef _WIN32
|
||||
// Needed on Windows to avoid terminal conversions
|
||||
setmode(fileno(stdout), O_BINARY);
|
||||
setmode(fileno(stdin), O_BINARY);
|
||||
#endif
|
||||
|
||||
thread rawOutputThread(rawOutputProc, ref(sharedAudioBuffer),
|
||||
ref(mutAudio), ref(cvAudio), ref(audioReady),
|
||||
ref(audioFinished));
|
||||
auto audioCallback = [&audioBuffer, &sharedAudioBuffer, &mutAudio,
|
||||
&cvAudio, &audioReady]() {
|
||||
// Signal thread that audio is ready
|
||||
{
|
||||
unique_lock lockAudio(mutAudio);
|
||||
copy(audioBuffer.begin(), audioBuffer.end(),
|
||||
back_inserter(sharedAudioBuffer));
|
||||
audioReady = true;
|
||||
cvAudio.notify_one();
|
||||
}
|
||||
};
|
||||
piper::textToAudio(piperConfig, voice, line, audioBuffer, result,
|
||||
audioCallback);
|
||||
|
||||
// Signal thread that there is no more audio
|
||||
{
|
||||
unique_lock lockAudio(mutAudio);
|
||||
audioReady = true;
|
||||
audioFinished = true;
|
||||
cvAudio.notify_one();
|
||||
}
|
||||
|
||||
// Wait for audio output to finish
|
||||
spdlog::info("Waiting for audio to finish playing...");
|
||||
rawOutputThread.join();
|
||||
}
|
||||
|
||||
spdlog::info("Real-time factor: {} (infer={} sec, audio={} sec)",
|
||||
result.realTimeFactor, result.inferSeconds,
|
||||
result.audioSeconds);
|
||||
|
||||
// Restore config (--json-input)
|
||||
voice.synthesisConfig.speakerId = speakerId;
|
||||
|
||||
} // for each line
|
||||
|
||||
piper::terminate(piperConfig);
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
void rawOutputProc(vector<int16_t> &sharedAudioBuffer, mutex &mutAudio,
|
||||
condition_variable &cvAudio, bool &audioReady,
|
||||
bool &audioFinished) {
|
||||
vector<int16_t> internalAudioBuffer;
|
||||
while (true) {
|
||||
{
|
||||
unique_lock lockAudio{mutAudio};
|
||||
cvAudio.wait(lockAudio, [&audioReady] { return audioReady; });
|
||||
|
||||
if (sharedAudioBuffer.empty() && audioFinished) {
|
||||
break;
|
||||
}
|
||||
|
||||
copy(sharedAudioBuffer.begin(), sharedAudioBuffer.end(),
|
||||
back_inserter(internalAudioBuffer));
|
||||
|
||||
sharedAudioBuffer.clear();
|
||||
|
||||
if (!audioFinished) {
|
||||
audioReady = false;
|
||||
}
|
||||
}
|
||||
|
||||
cout.write((const char *)internalAudioBuffer.data(),
|
||||
sizeof(int16_t) * internalAudioBuffer.size());
|
||||
cout.flush();
|
||||
internalAudioBuffer.clear();
|
||||
}
|
||||
|
||||
} // rawOutputProc
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
void printUsage(char *argv[]) {
|
||||
cerr << endl;
|
||||
cerr << "usage: " << argv[0] << " [options]" << endl;
|
||||
cerr << endl;
|
||||
cerr << "options:" << endl;
|
||||
cerr << " -h --help show this message and exit" << endl;
|
||||
cerr << " -m FILE --model FILE path to onnx model file" << endl;
|
||||
cerr << " -c FILE --config FILE path to model config file "
|
||||
"(default: model path + .json)"
|
||||
<< endl;
|
||||
cerr << " -f FILE --output_file FILE path to output WAV file ('-' for "
|
||||
"stdout)"
|
||||
<< endl;
|
||||
cerr << " -d DIR --output_dir DIR path to output directory (default: "
|
||||
"cwd)"
|
||||
<< endl;
|
||||
cerr << " --output_raw output raw audio to stdout as it "
|
||||
"becomes available"
|
||||
<< endl;
|
||||
cerr << " -s NUM --speaker NUM id of speaker (default: 0)" << endl;
|
||||
cerr << " --noise_scale NUM generator noise (default: 0.667)"
|
||||
<< endl;
|
||||
cerr << " --length_scale NUM phoneme length (default: 1.0)"
|
||||
<< endl;
|
||||
cerr << " --noise_w NUM phoneme width noise (default: 0.8)"
|
||||
<< endl;
|
||||
cerr << " --sentence_silence NUM seconds of silence after each "
|
||||
"sentence (default: 0.2)"
|
||||
<< endl;
|
||||
cerr << " --espeak_data DIR path to espeak-ng data directory"
|
||||
<< endl;
|
||||
cerr << " --tashkeel_model FILE path to libtashkeel onnx model "
|
||||
"(arabic)"
|
||||
<< endl;
|
||||
cerr << " --json-input stdin input is lines of JSON "
|
||||
"instead of plain text"
|
||||
<< endl;
|
||||
cerr << " --use-cuda use CUDA execution provider"
|
||||
<< endl;
|
||||
cerr << " --debug print DEBUG messages to the console"
|
||||
<< endl;
|
||||
cerr << " -q --quiet disable logging" << endl;
|
||||
cerr << endl;
|
||||
}
|
||||
|
||||
void ensureArg(int argc, char *argv[], int argi) {
|
||||
if ((argi + 1) >= argc) {
|
||||
printUsage(argv);
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
// Parse command-line arguments
|
||||
void parseArgs(int argc, char *argv[], RunConfig &runConfig) {
|
||||
optional<filesystem::path> modelConfigPath;
|
||||
|
||||
for (int i = 1; i < argc; i++) {
|
||||
std::string arg = argv[i];
|
||||
|
||||
if (arg == "-m" || arg == "--model") {
|
||||
ensureArg(argc, argv, i);
|
||||
runConfig.modelPath = filesystem::path(argv[++i]);
|
||||
} else if (arg == "-c" || arg == "--config") {
|
||||
ensureArg(argc, argv, i);
|
||||
modelConfigPath = filesystem::path(argv[++i]);
|
||||
} else if (arg == "-f" || arg == "--output_file" ||
|
||||
arg == "--output-file") {
|
||||
ensureArg(argc, argv, i);
|
||||
std::string filePath = argv[++i];
|
||||
if (filePath == "-") {
|
||||
runConfig.outputType = OUTPUT_STDOUT;
|
||||
runConfig.outputPath = nullopt;
|
||||
} else {
|
||||
runConfig.outputType = OUTPUT_FILE;
|
||||
runConfig.outputPath = filesystem::path(filePath);
|
||||
}
|
||||
} else if (arg == "-d" || arg == "--output_dir" || arg == "output-dir") {
|
||||
ensureArg(argc, argv, i);
|
||||
runConfig.outputType = OUTPUT_DIRECTORY;
|
||||
runConfig.outputPath = filesystem::path(argv[++i]);
|
||||
} else if (arg == "--output_raw" || arg == "--output-raw") {
|
||||
runConfig.outputType = OUTPUT_RAW;
|
||||
} else if (arg == "-s" || arg == "--speaker") {
|
||||
ensureArg(argc, argv, i);
|
||||
runConfig.speakerId = (piper::SpeakerId)stol(argv[++i]);
|
||||
} else if (arg == "--noise_scale" || arg == "--noise-scale") {
|
||||
ensureArg(argc, argv, i);
|
||||
runConfig.noiseScale = stof(argv[++i]);
|
||||
} else if (arg == "--length_scale" || arg == "--length-scale") {
|
||||
ensureArg(argc, argv, i);
|
||||
runConfig.lengthScale = stof(argv[++i]);
|
||||
} else if (arg == "--noise_w" || arg == "--noise-w") {
|
||||
ensureArg(argc, argv, i);
|
||||
runConfig.noiseW = stof(argv[++i]);
|
||||
} else if (arg == "--sentence_silence" || arg == "--sentence-silence") {
|
||||
ensureArg(argc, argv, i);
|
||||
runConfig.sentenceSilenceSeconds = stof(argv[++i]);
|
||||
} else if (arg == "--phoneme_silence" || arg == "--phoneme-silence") {
|
||||
ensureArg(argc, argv, i);
|
||||
ensureArg(argc, argv, i + 1);
|
||||
auto phonemeStr = std::string(argv[++i]);
|
||||
if (!piper::isSingleCodepoint(phonemeStr)) {
|
||||
std::cerr << "Phoneme '" << phonemeStr
|
||||
<< "' is not a single codepoint (--phoneme_silence)"
|
||||
<< std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (!runConfig.phonemeSilenceSeconds) {
|
||||
runConfig.phonemeSilenceSeconds.emplace();
|
||||
}
|
||||
|
||||
auto phoneme = piper::getCodepoint(phonemeStr);
|
||||
(*runConfig.phonemeSilenceSeconds)[phoneme] = stof(argv[++i]);
|
||||
} else if (arg == "--espeak_data" || arg == "--espeak-data") {
|
||||
ensureArg(argc, argv, i);
|
||||
runConfig.eSpeakDataPath = filesystem::path(argv[++i]);
|
||||
} else if (arg == "--tashkeel_model" || arg == "--tashkeel-model") {
|
||||
ensureArg(argc, argv, i);
|
||||
runConfig.tashkeelModelPath = filesystem::path(argv[++i]);
|
||||
} else if (arg == "--json_input" || arg == "--json-input") {
|
||||
runConfig.jsonInput = true;
|
||||
} else if (arg == "--use_cuda" || arg == "--use-cuda") {
|
||||
runConfig.useCuda = true;
|
||||
} else if (arg == "--version") {
|
||||
std::cout << piper::getVersion() << std::endl;
|
||||
exit(0);
|
||||
} else if (arg == "--debug") {
|
||||
// Set DEBUG logging
|
||||
spdlog::set_level(spdlog::level::debug);
|
||||
} else if (arg == "-q" || arg == "--quiet") {
|
||||
// diable logging
|
||||
spdlog::set_level(spdlog::level::off);
|
||||
} else if (arg == "-h" || arg == "--help") {
|
||||
printUsage(argv);
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
// Verify model file exists
|
||||
ifstream modelFile(runConfig.modelPath.c_str(), ios::binary);
|
||||
if (!modelFile.good()) {
|
||||
throw runtime_error("Model file doesn't exist");
|
||||
}
|
||||
|
||||
if (!modelConfigPath) {
|
||||
runConfig.modelConfigPath =
|
||||
filesystem::path(runConfig.modelPath.string() + ".json");
|
||||
} else {
|
||||
runConfig.modelConfigPath = modelConfigPath.value();
|
||||
}
|
||||
|
||||
// Verify model config exists
|
||||
ifstream modelConfigFile(runConfig.modelConfigPath.c_str());
|
||||
if (!modelConfigFile.good()) {
|
||||
throw runtime_error("Model config doesn't exist");
|
||||
}
|
||||
}
|
||||
636
mlu_370-piper/piper/src/cpp/piper.cpp
Normal file
636
mlu_370-piper/piper/src/cpp/piper.cpp
Normal file
@@ -0,0 +1,636 @@
|
||||
#include <array>
|
||||
#include <chrono>
|
||||
#include <fstream>
|
||||
#include <limits>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <espeak-ng/speak_lib.h>
|
||||
#include <onnxruntime_cxx_api.h>
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include "json.hpp"
|
||||
#include "piper.hpp"
|
||||
#include "utf8.h"
|
||||
#include "wavfile.hpp"
|
||||
|
||||
namespace piper {
|
||||
|
||||
#ifdef _PIPER_VERSION
|
||||
// https://stackoverflow.com/questions/47346133/how-to-use-a-define-inside-a-format-string
|
||||
#define _STR(x) #x
|
||||
#define STR(x) _STR(x)
|
||||
const std::string VERSION = STR(_PIPER_VERSION);
|
||||
#else
|
||||
const std::string VERSION = "";
|
||||
#endif
|
||||
|
||||
// Maximum value for 16-bit signed WAV sample
|
||||
const float MAX_WAV_VALUE = 32767.0f;
|
||||
|
||||
const std::string instanceName{"piper"};
|
||||
|
||||
std::string getVersion() { return VERSION; }
|
||||
|
||||
// True if the string is a single UTF-8 codepoint
|
||||
bool isSingleCodepoint(std::string s) {
|
||||
return utf8::distance(s.begin(), s.end()) == 1;
|
||||
}
|
||||
|
||||
// Get the first UTF-8 codepoint of a string
|
||||
Phoneme getCodepoint(std::string s) {
|
||||
utf8::iterator character_iter(s.begin(), s.begin(), s.end());
|
||||
return *character_iter;
|
||||
}
|
||||
|
||||
// Load JSON config information for phonemization
|
||||
void parsePhonemizeConfig(json &configRoot, PhonemizeConfig &phonemizeConfig) {
|
||||
// {
|
||||
// "espeak": {
|
||||
// "voice": "<language code>"
|
||||
// },
|
||||
// "phoneme_type": "<espeak or text>",
|
||||
// "phoneme_map": {
|
||||
// "<from phoneme>": ["<to phoneme 1>", "<to phoneme 2>", ...]
|
||||
// },
|
||||
// "phoneme_id_map": {
|
||||
// "<phoneme>": [<id1>, <id2>, ...]
|
||||
// }
|
||||
// }
|
||||
|
||||
if (configRoot.contains("espeak")) {
|
||||
auto espeakValue = configRoot["espeak"];
|
||||
if (espeakValue.contains("voice")) {
|
||||
phonemizeConfig.eSpeak.voice = espeakValue["voice"].get<std::string>();
|
||||
}
|
||||
}
|
||||
|
||||
if (configRoot.contains("phoneme_type")) {
|
||||
auto phonemeTypeStr = configRoot["phoneme_type"].get<std::string>();
|
||||
if (phonemeTypeStr == "text") {
|
||||
phonemizeConfig.phonemeType = TextPhonemes;
|
||||
}
|
||||
}
|
||||
|
||||
// phoneme to [id] map
|
||||
// Maps phonemes to one or more phoneme ids (required).
|
||||
if (configRoot.contains("phoneme_id_map")) {
|
||||
auto phonemeIdMapValue = configRoot["phoneme_id_map"];
|
||||
for (auto &fromPhonemeItem : phonemeIdMapValue.items()) {
|
||||
std::string fromPhoneme = fromPhonemeItem.key();
|
||||
if (!isSingleCodepoint(fromPhoneme)) {
|
||||
std::stringstream idsStr;
|
||||
for (auto &toIdValue : fromPhonemeItem.value()) {
|
||||
PhonemeId toId = toIdValue.get<PhonemeId>();
|
||||
idsStr << toId << ",";
|
||||
}
|
||||
|
||||
spdlog::error("\"{}\" is not a single codepoint (ids={})", fromPhoneme,
|
||||
idsStr.str());
|
||||
throw std::runtime_error(
|
||||
"Phonemes must be one codepoint (phoneme id map)");
|
||||
}
|
||||
|
||||
auto fromCodepoint = getCodepoint(fromPhoneme);
|
||||
for (auto &toIdValue : fromPhonemeItem.value()) {
|
||||
PhonemeId toId = toIdValue.get<PhonemeId>();
|
||||
phonemizeConfig.phonemeIdMap[fromCodepoint].push_back(toId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// phoneme to [phoneme] map
|
||||
// Maps phonemes to one or more other phonemes (not normally used).
|
||||
if (configRoot.contains("phoneme_map")) {
|
||||
if (!phonemizeConfig.phonemeMap) {
|
||||
phonemizeConfig.phonemeMap.emplace();
|
||||
}
|
||||
|
||||
auto phonemeMapValue = configRoot["phoneme_map"];
|
||||
for (auto &fromPhonemeItem : phonemeMapValue.items()) {
|
||||
std::string fromPhoneme = fromPhonemeItem.key();
|
||||
if (!isSingleCodepoint(fromPhoneme)) {
|
||||
spdlog::error("\"{}\" is not a single codepoint", fromPhoneme);
|
||||
throw std::runtime_error(
|
||||
"Phonemes must be one codepoint (phoneme map)");
|
||||
}
|
||||
|
||||
auto fromCodepoint = getCodepoint(fromPhoneme);
|
||||
for (auto &toPhonemeValue : fromPhonemeItem.value()) {
|
||||
std::string toPhoneme = toPhonemeValue.get<std::string>();
|
||||
if (!isSingleCodepoint(toPhoneme)) {
|
||||
throw std::runtime_error(
|
||||
"Phonemes must be one codepoint (phoneme map)");
|
||||
}
|
||||
|
||||
auto toCodepoint = getCodepoint(toPhoneme);
|
||||
(*phonemizeConfig.phonemeMap)[fromCodepoint].push_back(toCodepoint);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} /* parsePhonemizeConfig */
|
||||
|
||||
// Load JSON config for audio synthesis
|
||||
void parseSynthesisConfig(json &configRoot, SynthesisConfig &synthesisConfig) {
|
||||
// {
|
||||
// "audio": {
|
||||
// "sample_rate": 22050
|
||||
// },
|
||||
// "inference": {
|
||||
// "noise_scale": 0.667,
|
||||
// "length_scale": 1,
|
||||
// "noise_w": 0.8,
|
||||
// "phoneme_silence": {
|
||||
// "<phoneme>": <seconds of silence>,
|
||||
// ...
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
if (configRoot.contains("audio")) {
|
||||
auto audioValue = configRoot["audio"];
|
||||
if (audioValue.contains("sample_rate")) {
|
||||
// Default sample rate is 22050 Hz
|
||||
synthesisConfig.sampleRate = audioValue.value("sample_rate", 22050);
|
||||
}
|
||||
}
|
||||
|
||||
if (configRoot.contains("inference")) {
|
||||
// Overrides default inference settings
|
||||
auto inferenceValue = configRoot["inference"];
|
||||
if (inferenceValue.contains("noise_scale")) {
|
||||
synthesisConfig.noiseScale = inferenceValue.value("noise_scale", 0.667f);
|
||||
}
|
||||
|
||||
if (inferenceValue.contains("length_scale")) {
|
||||
synthesisConfig.lengthScale = inferenceValue.value("length_scale", 1.0f);
|
||||
}
|
||||
|
||||
if (inferenceValue.contains("noise_w")) {
|
||||
synthesisConfig.noiseW = inferenceValue.value("noise_w", 0.8f);
|
||||
}
|
||||
|
||||
if (inferenceValue.contains("phoneme_silence")) {
|
||||
// phoneme -> seconds of silence to add after
|
||||
synthesisConfig.phonemeSilenceSeconds.emplace();
|
||||
auto phonemeSilenceValue = inferenceValue["phoneme_silence"];
|
||||
for (auto &phonemeItem : phonemeSilenceValue.items()) {
|
||||
std::string phonemeStr = phonemeItem.key();
|
||||
if (!isSingleCodepoint(phonemeStr)) {
|
||||
spdlog::error("\"{}\" is not a single codepoint", phonemeStr);
|
||||
throw std::runtime_error(
|
||||
"Phonemes must be one codepoint (phoneme silence)");
|
||||
}
|
||||
|
||||
auto phoneme = getCodepoint(phonemeStr);
|
||||
(*synthesisConfig.phonemeSilenceSeconds)[phoneme] =
|
||||
phonemeItem.value().get<float>();
|
||||
}
|
||||
|
||||
} // if phoneme_silence
|
||||
|
||||
} // if inference
|
||||
|
||||
} /* parseSynthesisConfig */
|
||||
|
||||
void parseModelConfig(json &configRoot, ModelConfig &modelConfig) {
|
||||
|
||||
modelConfig.numSpeakers = configRoot["num_speakers"].get<SpeakerId>();
|
||||
|
||||
if (configRoot.contains("speaker_id_map")) {
|
||||
if (!modelConfig.speakerIdMap) {
|
||||
modelConfig.speakerIdMap.emplace();
|
||||
}
|
||||
|
||||
auto speakerIdMapValue = configRoot["speaker_id_map"];
|
||||
for (auto &speakerItem : speakerIdMapValue.items()) {
|
||||
std::string speakerName = speakerItem.key();
|
||||
(*modelConfig.speakerIdMap)[speakerName] =
|
||||
speakerItem.value().get<SpeakerId>();
|
||||
}
|
||||
}
|
||||
|
||||
} /* parseModelConfig */
|
||||
|
||||
void initialize(PiperConfig &config) {
|
||||
if (config.useESpeak) {
|
||||
// Set up espeak-ng for calling espeak_TextToPhonemesWithTerminator
|
||||
// See: https://github.com/rhasspy/espeak-ng
|
||||
spdlog::debug("Initializing eSpeak");
|
||||
int result = espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS,
|
||||
/*buflength*/ 0,
|
||||
/*path*/ config.eSpeakDataPath.c_str(),
|
||||
/*options*/ 0);
|
||||
if (result < 0) {
|
||||
throw std::runtime_error("Failed to initialize eSpeak-ng");
|
||||
}
|
||||
|
||||
spdlog::debug("Initialized eSpeak");
|
||||
}
|
||||
|
||||
// Load onnx model for libtashkeel
|
||||
// https://github.com/mush42/libtashkeel/
|
||||
if (config.useTashkeel) {
|
||||
spdlog::debug("Using libtashkeel for diacritization");
|
||||
if (!config.tashkeelModelPath) {
|
||||
throw std::runtime_error("No path to libtashkeel model");
|
||||
}
|
||||
|
||||
spdlog::debug("Loading libtashkeel model from {}",
|
||||
config.tashkeelModelPath.value());
|
||||
config.tashkeelState = std::make_unique<tashkeel::State>();
|
||||
tashkeel::tashkeel_load(config.tashkeelModelPath.value(),
|
||||
*config.tashkeelState);
|
||||
spdlog::debug("Initialized libtashkeel");
|
||||
}
|
||||
|
||||
spdlog::info("Initialized piper");
|
||||
}
|
||||
|
||||
void terminate(PiperConfig &config) {
|
||||
if (config.useESpeak) {
|
||||
// Clean up espeak-ng
|
||||
spdlog::debug("Terminating eSpeak");
|
||||
espeak_Terminate();
|
||||
spdlog::debug("Terminated eSpeak");
|
||||
}
|
||||
|
||||
spdlog::info("Terminated piper");
|
||||
}
|
||||
|
||||
void loadModel(std::string modelPath, ModelSession &session, bool useCuda) {
|
||||
spdlog::debug("Loading onnx model from {}", modelPath);
|
||||
session.env = Ort::Env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING,
|
||||
instanceName.c_str());
|
||||
session.env.DisableTelemetryEvents();
|
||||
|
||||
if (useCuda) {
|
||||
// Use CUDA provider
|
||||
OrtCUDAProviderOptions cuda_options{};
|
||||
cuda_options.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchHeuristic;
|
||||
session.options.AppendExecutionProvider_CUDA(cuda_options);
|
||||
}
|
||||
|
||||
// Slows down performance by ~2x
|
||||
// session.options.SetIntraOpNumThreads(1);
|
||||
|
||||
// Roughly doubles load time for no visible inference benefit
|
||||
// session.options.SetGraphOptimizationLevel(
|
||||
// GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
|
||||
|
||||
session.options.SetGraphOptimizationLevel(
|
||||
GraphOptimizationLevel::ORT_DISABLE_ALL);
|
||||
|
||||
// Slows down performance very slightly
|
||||
// session.options.SetExecutionMode(ExecutionMode::ORT_PARALLEL);
|
||||
|
||||
session.options.DisableCpuMemArena();
|
||||
session.options.DisableMemPattern();
|
||||
session.options.DisableProfiling();
|
||||
|
||||
auto startTime = std::chrono::steady_clock::now();
|
||||
|
||||
#ifdef _WIN32
|
||||
auto modelPathW = std::wstring(modelPath.begin(), modelPath.end());
|
||||
auto modelPathStr = modelPathW.c_str();
|
||||
#else
|
||||
auto modelPathStr = modelPath.c_str();
|
||||
#endif
|
||||
|
||||
session.onnx = Ort::Session(session.env, modelPathStr, session.options);
|
||||
|
||||
auto endTime = std::chrono::steady_clock::now();
|
||||
spdlog::debug("Loaded onnx model in {} second(s)",
|
||||
std::chrono::duration<double>(endTime - startTime).count());
|
||||
}
|
||||
|
||||
// Load Onnx model and JSON config file
|
||||
void loadVoice(PiperConfig &config, std::string modelPath,
|
||||
std::string modelConfigPath, Voice &voice,
|
||||
std::optional<SpeakerId> &speakerId, bool useCuda) {
|
||||
spdlog::debug("Parsing voice config at {}", modelConfigPath);
|
||||
std::ifstream modelConfigFile(modelConfigPath);
|
||||
voice.configRoot = json::parse(modelConfigFile);
|
||||
|
||||
parsePhonemizeConfig(voice.configRoot, voice.phonemizeConfig);
|
||||
parseSynthesisConfig(voice.configRoot, voice.synthesisConfig);
|
||||
parseModelConfig(voice.configRoot, voice.modelConfig);
|
||||
|
||||
if (voice.modelConfig.numSpeakers > 1) {
|
||||
// Multi-speaker model
|
||||
if (speakerId) {
|
||||
voice.synthesisConfig.speakerId = speakerId;
|
||||
} else {
|
||||
// Default speaker
|
||||
voice.synthesisConfig.speakerId = 0;
|
||||
}
|
||||
}
|
||||
|
||||
spdlog::debug("Voice contains {} speaker(s)", voice.modelConfig.numSpeakers);
|
||||
|
||||
loadModel(modelPath, voice.session, useCuda);
|
||||
|
||||
} /* loadVoice */
|
||||
|
||||
// Phoneme ids to WAV audio
|
||||
void synthesize(std::vector<PhonemeId> &phonemeIds,
|
||||
SynthesisConfig &synthesisConfig, ModelSession &session,
|
||||
std::vector<int16_t> &audioBuffer, SynthesisResult &result) {
|
||||
spdlog::debug("Synthesizing audio for {} phoneme id(s)", phonemeIds.size());
|
||||
|
||||
auto memoryInfo = Ort::MemoryInfo::CreateCpu(
|
||||
OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
|
||||
|
||||
// Allocate
|
||||
std::vector<int64_t> phonemeIdLengths{(int64_t)phonemeIds.size()};
|
||||
std::vector<float> scales{synthesisConfig.noiseScale,
|
||||
synthesisConfig.lengthScale,
|
||||
synthesisConfig.noiseW};
|
||||
|
||||
std::vector<Ort::Value> inputTensors;
|
||||
std::vector<int64_t> phonemeIdsShape{1, (int64_t)phonemeIds.size()};
|
||||
inputTensors.push_back(Ort::Value::CreateTensor<int64_t>(
|
||||
memoryInfo, phonemeIds.data(), phonemeIds.size(), phonemeIdsShape.data(),
|
||||
phonemeIdsShape.size()));
|
||||
|
||||
std::vector<int64_t> phomemeIdLengthsShape{(int64_t)phonemeIdLengths.size()};
|
||||
inputTensors.push_back(Ort::Value::CreateTensor<int64_t>(
|
||||
memoryInfo, phonemeIdLengths.data(), phonemeIdLengths.size(),
|
||||
phomemeIdLengthsShape.data(), phomemeIdLengthsShape.size()));
|
||||
|
||||
std::vector<int64_t> scalesShape{(int64_t)scales.size()};
|
||||
inputTensors.push_back(
|
||||
Ort::Value::CreateTensor<float>(memoryInfo, scales.data(), scales.size(),
|
||||
scalesShape.data(), scalesShape.size()));
|
||||
|
||||
// Add speaker id.
|
||||
// NOTE: These must be kept outside the "if" below to avoid being deallocated.
|
||||
std::vector<int64_t> speakerId{
|
||||
(int64_t)synthesisConfig.speakerId.value_or(0)};
|
||||
std::vector<int64_t> speakerIdShape{(int64_t)speakerId.size()};
|
||||
|
||||
if (synthesisConfig.speakerId) {
|
||||
inputTensors.push_back(Ort::Value::CreateTensor<int64_t>(
|
||||
memoryInfo, speakerId.data(), speakerId.size(), speakerIdShape.data(),
|
||||
speakerIdShape.size()));
|
||||
}
|
||||
|
||||
// From export_onnx.py
|
||||
std::array<const char *, 4> inputNames = {"input", "input_lengths", "scales",
|
||||
"sid"};
|
||||
std::array<const char *, 1> outputNames = {"output"};
|
||||
|
||||
// Infer
|
||||
auto startTime = std::chrono::steady_clock::now();
|
||||
auto outputTensors = session.onnx.Run(
|
||||
Ort::RunOptions{nullptr}, inputNames.data(), inputTensors.data(),
|
||||
inputTensors.size(), outputNames.data(), outputNames.size());
|
||||
auto endTime = std::chrono::steady_clock::now();
|
||||
|
||||
if ((outputTensors.size() != 1) || (!outputTensors.front().IsTensor())) {
|
||||
throw std::runtime_error("Invalid output tensors");
|
||||
}
|
||||
auto inferDuration = std::chrono::duration<double>(endTime - startTime);
|
||||
result.inferSeconds = inferDuration.count();
|
||||
|
||||
const float *audio = outputTensors.front().GetTensorData<float>();
|
||||
auto audioShape =
|
||||
outputTensors.front().GetTensorTypeAndShapeInfo().GetShape();
|
||||
int64_t audioCount = audioShape[audioShape.size() - 1];
|
||||
|
||||
result.audioSeconds = (double)audioCount / (double)synthesisConfig.sampleRate;
|
||||
result.realTimeFactor = 0.0;
|
||||
if (result.audioSeconds > 0) {
|
||||
result.realTimeFactor = result.inferSeconds / result.audioSeconds;
|
||||
}
|
||||
spdlog::debug("Synthesized {} second(s) of audio in {} second(s)",
|
||||
result.audioSeconds, result.inferSeconds);
|
||||
|
||||
// Get max audio value for scaling
|
||||
float maxAudioValue = 0.01f;
|
||||
for (int64_t i = 0; i < audioCount; i++) {
|
||||
float audioValue = abs(audio[i]);
|
||||
if (audioValue > maxAudioValue) {
|
||||
maxAudioValue = audioValue;
|
||||
}
|
||||
}
|
||||
|
||||
// We know the size up front
|
||||
audioBuffer.reserve(audioCount);
|
||||
|
||||
// Scale audio to fill range and convert to int16
|
||||
float audioScale = (MAX_WAV_VALUE / std::max(0.01f, maxAudioValue));
|
||||
for (int64_t i = 0; i < audioCount; i++) {
|
||||
int16_t intAudioValue = static_cast<int16_t>(
|
||||
std::clamp(audio[i] * audioScale,
|
||||
static_cast<float>(std::numeric_limits<int16_t>::min()),
|
||||
static_cast<float>(std::numeric_limits<int16_t>::max())));
|
||||
|
||||
audioBuffer.push_back(intAudioValue);
|
||||
}
|
||||
|
||||
// Clean up
|
||||
for (std::size_t i = 0; i < outputTensors.size(); i++) {
|
||||
Ort::detail::OrtRelease(outputTensors[i].release());
|
||||
}
|
||||
|
||||
for (std::size_t i = 0; i < inputTensors.size(); i++) {
|
||||
Ort::detail::OrtRelease(inputTensors[i].release());
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
// Phonemize text and synthesize audio
|
||||
void textToAudio(PiperConfig &config, Voice &voice, std::string text,
|
||||
std::vector<int16_t> &audioBuffer, SynthesisResult &result,
|
||||
const std::function<void()> &audioCallback) {
|
||||
|
||||
std::size_t sentenceSilenceSamples = 0;
|
||||
if (voice.synthesisConfig.sentenceSilenceSeconds > 0) {
|
||||
sentenceSilenceSamples = (std::size_t)(
|
||||
voice.synthesisConfig.sentenceSilenceSeconds *
|
||||
voice.synthesisConfig.sampleRate * voice.synthesisConfig.channels);
|
||||
}
|
||||
|
||||
if (config.useTashkeel) {
|
||||
if (!config.tashkeelState) {
|
||||
throw std::runtime_error("Tashkeel model is not loaded");
|
||||
}
|
||||
|
||||
spdlog::debug("Diacritizing text with libtashkeel: {}", text);
|
||||
text = tashkeel::tashkeel_run(text, *config.tashkeelState);
|
||||
}
|
||||
|
||||
// Phonemes for each sentence
|
||||
spdlog::debug("Phonemizing text: {}", text);
|
||||
std::vector<std::vector<Phoneme>> phonemes;
|
||||
|
||||
if (voice.phonemizeConfig.phonemeType == eSpeakPhonemes) {
|
||||
// Use espeak-ng for phonemization
|
||||
eSpeakPhonemeConfig eSpeakConfig;
|
||||
eSpeakConfig.voice = voice.phonemizeConfig.eSpeak.voice;
|
||||
phonemize_eSpeak(text, eSpeakConfig, phonemes);
|
||||
} else {
|
||||
// Use UTF-8 codepoints as "phonemes"
|
||||
CodepointsPhonemeConfig codepointsConfig;
|
||||
phonemize_codepoints(text, codepointsConfig, phonemes);
|
||||
}
|
||||
|
||||
// Synthesize each sentence independently.
|
||||
std::vector<PhonemeId> phonemeIds;
|
||||
std::map<Phoneme, std::size_t> missingPhonemes;
|
||||
for (auto phonemesIter = phonemes.begin(); phonemesIter != phonemes.end();
|
||||
++phonemesIter) {
|
||||
std::vector<Phoneme> &sentencePhonemes = *phonemesIter;
|
||||
|
||||
if (spdlog::should_log(spdlog::level::debug)) {
|
||||
// DEBUG log for phonemes
|
||||
std::string phonemesStr;
|
||||
for (auto phoneme : sentencePhonemes) {
|
||||
utf8::append(phoneme, std::back_inserter(phonemesStr));
|
||||
}
|
||||
|
||||
spdlog::debug("Converting {} phoneme(s) to ids: {}",
|
||||
sentencePhonemes.size(), phonemesStr);
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<std::vector<Phoneme>>> phrasePhonemes;
|
||||
std::vector<SynthesisResult> phraseResults;
|
||||
std::vector<size_t> phraseSilenceSamples;
|
||||
|
||||
// Use phoneme/id map from config
|
||||
PhonemeIdConfig idConfig;
|
||||
idConfig.phonemeIdMap =
|
||||
std::make_shared<PhonemeIdMap>(voice.phonemizeConfig.phonemeIdMap);
|
||||
|
||||
if (voice.synthesisConfig.phonemeSilenceSeconds) {
|
||||
// Split into phrases
|
||||
std::map<Phoneme, float> &phonemeSilenceSeconds =
|
||||
*voice.synthesisConfig.phonemeSilenceSeconds;
|
||||
|
||||
auto currentPhrasePhonemes = std::make_shared<std::vector<Phoneme>>();
|
||||
phrasePhonemes.push_back(currentPhrasePhonemes);
|
||||
|
||||
for (auto sentencePhonemesIter = sentencePhonemes.begin();
|
||||
sentencePhonemesIter != sentencePhonemes.end();
|
||||
sentencePhonemesIter++) {
|
||||
Phoneme ¤tPhoneme = *sentencePhonemesIter;
|
||||
currentPhrasePhonemes->push_back(currentPhoneme);
|
||||
|
||||
if (phonemeSilenceSeconds.count(currentPhoneme) > 0) {
|
||||
// Split at phrase boundary
|
||||
phraseSilenceSamples.push_back(
|
||||
(std::size_t)(phonemeSilenceSeconds[currentPhoneme] *
|
||||
voice.synthesisConfig.sampleRate *
|
||||
voice.synthesisConfig.channels));
|
||||
|
||||
currentPhrasePhonemes = std::make_shared<std::vector<Phoneme>>();
|
||||
phrasePhonemes.push_back(currentPhrasePhonemes);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Use all phonemes
|
||||
phrasePhonemes.push_back(
|
||||
std::make_shared<std::vector<Phoneme>>(sentencePhonemes));
|
||||
}
|
||||
|
||||
// Ensure results/samples are the same size
|
||||
while (phraseResults.size() < phrasePhonemes.size()) {
|
||||
phraseResults.emplace_back();
|
||||
}
|
||||
|
||||
while (phraseSilenceSamples.size() < phrasePhonemes.size()) {
|
||||
phraseSilenceSamples.push_back(0);
|
||||
}
|
||||
|
||||
// phonemes -> ids -> audio
|
||||
for (size_t phraseIdx = 0; phraseIdx < phrasePhonemes.size(); phraseIdx++) {
|
||||
if (phrasePhonemes[phraseIdx]->size() <= 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// phonemes -> ids
|
||||
phonemes_to_ids(*(phrasePhonemes[phraseIdx]), idConfig, phonemeIds,
|
||||
missingPhonemes);
|
||||
if (spdlog::should_log(spdlog::level::debug)) {
|
||||
// DEBUG log for phoneme ids
|
||||
std::stringstream phonemeIdsStr;
|
||||
for (auto phonemeId : phonemeIds) {
|
||||
phonemeIdsStr << phonemeId << ", ";
|
||||
}
|
||||
|
||||
spdlog::debug("Converted {} phoneme(s) to {} phoneme id(s): {}",
|
||||
phrasePhonemes[phraseIdx]->size(), phonemeIds.size(),
|
||||
phonemeIdsStr.str());
|
||||
}
|
||||
|
||||
// ids -> audio
|
||||
synthesize(phonemeIds, voice.synthesisConfig, voice.session, audioBuffer,
|
||||
phraseResults[phraseIdx]);
|
||||
|
||||
// Add end of phrase silence
|
||||
for (std::size_t i = 0; i < phraseSilenceSamples[phraseIdx]; i++) {
|
||||
audioBuffer.push_back(0);
|
||||
}
|
||||
|
||||
result.audioSeconds += phraseResults[phraseIdx].audioSeconds;
|
||||
result.inferSeconds += phraseResults[phraseIdx].inferSeconds;
|
||||
|
||||
phonemeIds.clear();
|
||||
}
|
||||
|
||||
// Add end of sentence silence
|
||||
if (sentenceSilenceSamples > 0) {
|
||||
for (std::size_t i = 0; i < sentenceSilenceSamples; i++) {
|
||||
audioBuffer.push_back(0);
|
||||
}
|
||||
}
|
||||
|
||||
if (audioCallback) {
|
||||
// Call back must copy audio since it is cleared afterwards.
|
||||
audioCallback();
|
||||
audioBuffer.clear();
|
||||
}
|
||||
|
||||
phonemeIds.clear();
|
||||
}
|
||||
|
||||
if (missingPhonemes.size() > 0) {
|
||||
spdlog::warn("Missing {} phoneme(s) from phoneme/id map!",
|
||||
missingPhonemes.size());
|
||||
|
||||
for (auto phonemeCount : missingPhonemes) {
|
||||
std::string phonemeStr;
|
||||
utf8::append(phonemeCount.first, std::back_inserter(phonemeStr));
|
||||
spdlog::warn("Missing \"{}\" (\\u{:04X}): {} time(s)", phonemeStr,
|
||||
(uint32_t)phonemeCount.first, phonemeCount.second);
|
||||
}
|
||||
}
|
||||
|
||||
if (result.audioSeconds > 0) {
|
||||
result.realTimeFactor = result.inferSeconds / result.audioSeconds;
|
||||
}
|
||||
|
||||
} /* textToAudio */
|
||||
|
||||
// Phonemize text and synthesize audio to WAV file
|
||||
void textToWavFile(PiperConfig &config, Voice &voice, std::string text,
|
||||
std::ostream &audioFile, SynthesisResult &result) {
|
||||
|
||||
std::vector<int16_t> audioBuffer;
|
||||
textToAudio(config, voice, text, audioBuffer, result, NULL);
|
||||
|
||||
// Write WAV
|
||||
auto synthesisConfig = voice.synthesisConfig;
|
||||
writeWavHeader(synthesisConfig.sampleRate, synthesisConfig.sampleWidth,
|
||||
synthesisConfig.channels, (int32_t)audioBuffer.size(),
|
||||
audioFile);
|
||||
|
||||
audioFile.write((const char *)audioBuffer.data(),
|
||||
sizeof(int16_t) * audioBuffer.size());
|
||||
|
||||
} /* textToWavFile */
|
||||
|
||||
} // namespace piper
|
||||
132
mlu_370-piper/piper/src/cpp/piper.hpp
Normal file
132
mlu_370-piper/piper/src/cpp/piper.hpp
Normal file
@@ -0,0 +1,132 @@
|
||||
#ifndef PIPER_H_
|
||||
#define PIPER_H_
|
||||
|
||||
#include <fstream>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <onnxruntime_cxx_api.h>
|
||||
#include <piper-phonemize/phoneme_ids.hpp>
|
||||
#include <piper-phonemize/phonemize.hpp>
|
||||
#include <piper-phonemize/tashkeel.hpp>
|
||||
|
||||
#include "json.hpp"
|
||||
|
||||
using json = nlohmann::json;
|
||||
|
||||
namespace piper {
|
||||
|
||||
typedef int64_t SpeakerId;
|
||||
|
||||
struct eSpeakConfig {
|
||||
std::string voice = "en-us";
|
||||
};
|
||||
|
||||
struct PiperConfig {
|
||||
std::string eSpeakDataPath;
|
||||
bool useESpeak = true;
|
||||
|
||||
bool useTashkeel = false;
|
||||
std::optional<std::string> tashkeelModelPath;
|
||||
std::unique_ptr<tashkeel::State> tashkeelState;
|
||||
};
|
||||
|
||||
enum PhonemeType { eSpeakPhonemes, TextPhonemes };
|
||||
|
||||
struct PhonemizeConfig {
|
||||
PhonemeType phonemeType = eSpeakPhonemes;
|
||||
std::optional<std::map<Phoneme, std::vector<Phoneme>>> phonemeMap;
|
||||
std::map<Phoneme, std::vector<PhonemeId>> phonemeIdMap;
|
||||
|
||||
PhonemeId idPad = 0; // padding (optionally interspersed)
|
||||
PhonemeId idBos = 1; // beginning of sentence
|
||||
PhonemeId idEos = 2; // end of sentence
|
||||
bool interspersePad = true;
|
||||
|
||||
eSpeakConfig eSpeak;
|
||||
};
|
||||
|
||||
struct SynthesisConfig {
|
||||
// VITS inference settings
|
||||
float noiseScale = 0.667f;
|
||||
float lengthScale = 1.0f;
|
||||
float noiseW = 0.8f;
|
||||
|
||||
// Audio settings
|
||||
int sampleRate = 22050;
|
||||
int sampleWidth = 2; // 16-bit
|
||||
int channels = 1; // mono
|
||||
|
||||
// Speaker id from 0 to numSpeakers - 1
|
||||
std::optional<SpeakerId> speakerId;
|
||||
|
||||
// Extra silence
|
||||
float sentenceSilenceSeconds = 0.2f;
|
||||
std::optional<std::map<piper::Phoneme, float>> phonemeSilenceSeconds;
|
||||
};
|
||||
|
||||
struct ModelConfig {
|
||||
int numSpeakers;
|
||||
|
||||
// speaker name -> id
|
||||
std::optional<std::map<std::string, SpeakerId>> speakerIdMap;
|
||||
};
|
||||
|
||||
struct ModelSession {
|
||||
Ort::Session onnx;
|
||||
Ort::AllocatorWithDefaultOptions allocator;
|
||||
Ort::SessionOptions options;
|
||||
Ort::Env env;
|
||||
|
||||
ModelSession() : onnx(nullptr){};
|
||||
};
|
||||
|
||||
struct SynthesisResult {
|
||||
double inferSeconds;
|
||||
double audioSeconds;
|
||||
double realTimeFactor;
|
||||
};
|
||||
|
||||
struct Voice {
|
||||
json configRoot;
|
||||
PhonemizeConfig phonemizeConfig;
|
||||
SynthesisConfig synthesisConfig;
|
||||
ModelConfig modelConfig;
|
||||
ModelSession session;
|
||||
};
|
||||
|
||||
// True if the string is a single UTF-8 codepoint
|
||||
bool isSingleCodepoint(std::string s);
|
||||
|
||||
// Get the first UTF-8 codepoint of a string
|
||||
Phoneme getCodepoint(std::string s);
|
||||
|
||||
// Get version of Piper
|
||||
std::string getVersion();
|
||||
|
||||
// Must be called before using textTo* functions
|
||||
void initialize(PiperConfig &config);
|
||||
|
||||
// Clean up
|
||||
void terminate(PiperConfig &config);
|
||||
|
||||
// Load Onnx model and JSON config file
|
||||
void loadVoice(PiperConfig &config, std::string modelPath,
|
||||
std::string modelConfigPath, Voice &voice,
|
||||
std::optional<SpeakerId> &speakerId, bool useCuda);
|
||||
|
||||
// Phonemize text and synthesize audio
|
||||
void textToAudio(PiperConfig &config, Voice &voice, std::string text,
|
||||
std::vector<int16_t> &audioBuffer, SynthesisResult &result,
|
||||
const std::function<void()> &audioCallback);
|
||||
|
||||
// Phonemize text and synthesize audio to WAV file
|
||||
void textToWavFile(PiperConfig &config, Voice &voice, std::string text,
|
||||
std::ostream &audioFile, SynthesisResult &result);
|
||||
|
||||
} // namespace piper
|
||||
|
||||
#endif // PIPER_H_
|
||||
60
mlu_370-piper/piper/src/cpp/test.cpp
Normal file
60
mlu_370-piper/piper/src/cpp/test.cpp
Normal file
@@ -0,0 +1,60 @@
|
||||
#include <fstream>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include <optional>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "json.hpp"
|
||||
#include "piper.hpp"
|
||||
|
||||
using namespace std;
|
||||
using json = nlohmann::json;
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
piper::PiperConfig piperConfig;
|
||||
piper::Voice voice;
|
||||
|
||||
if (argc < 2) {
|
||||
std::cerr << "Need voice model path" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (argc < 3) {
|
||||
std::cerr << "Need espeak-ng-data path" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (argc < 4) {
|
||||
std::cerr << "Need output WAV path" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
auto modelPath = std::string(argv[1]);
|
||||
piperConfig.eSpeakDataPath = std::string(argv[2]);
|
||||
auto outputPath = std::string(argv[3]);
|
||||
|
||||
optional<piper::SpeakerId> speakerId;
|
||||
loadVoice(piperConfig, modelPath, modelPath + ".json", voice, speakerId,
|
||||
false);
|
||||
piper::initialize(piperConfig);
|
||||
|
||||
// Output audio to WAV file
|
||||
ofstream audioFile(outputPath, ios::binary);
|
||||
|
||||
piper::SynthesisResult result;
|
||||
piper::textToWavFile(piperConfig, voice, "This is a test.", audioFile,
|
||||
result);
|
||||
piper::terminate(piperConfig);
|
||||
|
||||
// Verify that file has some data
|
||||
if (audioFile.tellp() < 10000) {
|
||||
std::cerr << "ERROR: Output file is smaller than expected!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
std::cout << "OK" << std::endl;
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
34
mlu_370-piper/piper/src/cpp/utf8.h
Normal file
34
mlu_370-piper/piper/src/cpp/utf8.h
Normal file
@@ -0,0 +1,34 @@
|
||||
// Copyright 2006 Nemanja Trifunovic
|
||||
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person or organization
|
||||
obtaining a copy of the software and accompanying documentation covered by
|
||||
this license (the "Software") to use, reproduce, display, distribute,
|
||||
execute, and transmit the Software, and to prepare derivative works of the
|
||||
Software, and to permit third-parties to whom the Software is furnished to
|
||||
do so, all subject to the following:
|
||||
|
||||
The copyright notices in the Software and this entire statement, including
|
||||
the above license grant, this restriction and the following disclaimer,
|
||||
must be included in all copies of the Software, in whole or in part, and
|
||||
all derivative works of the Software, unless such copies or derivative
|
||||
works are solely in the form of machine-executable object code generated by
|
||||
a source language processor.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
|
||||
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
|
||||
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
|
||||
#define UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
|
||||
|
||||
#include "utf8/checked.h"
|
||||
#include "utf8/unchecked.h"
|
||||
|
||||
#endif // header guard
|
||||
335
mlu_370-piper/piper/src/cpp/utf8/checked.h
Normal file
335
mlu_370-piper/piper/src/cpp/utf8/checked.h
Normal file
@@ -0,0 +1,335 @@
|
||||
// Copyright 2006-2016 Nemanja Trifunovic
|
||||
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person or organization
|
||||
obtaining a copy of the software and accompanying documentation covered by
|
||||
this license (the "Software") to use, reproduce, display, distribute,
|
||||
execute, and transmit the Software, and to prepare derivative works of the
|
||||
Software, and to permit third-parties to whom the Software is furnished to
|
||||
do so, all subject to the following:
|
||||
|
||||
The copyright notices in the Software and this entire statement, including
|
||||
the above license grant, this restriction and the following disclaimer,
|
||||
must be included in all copies of the Software, in whole or in part, and
|
||||
all derivative works of the Software, unless such copies or derivative
|
||||
works are solely in the form of machine-executable object code generated by
|
||||
a source language processor.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
|
||||
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
|
||||
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
|
||||
#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
|
||||
|
||||
#include "core.h"
|
||||
#include <stdexcept>
|
||||
|
||||
namespace utf8
|
||||
{
|
||||
// Base for the exceptions that may be thrown from the library
|
||||
class exception : public ::std::exception {
|
||||
};
|
||||
|
||||
// Exceptions that may be thrown from the library functions.
|
||||
class invalid_code_point : public exception {
|
||||
uint32_t cp;
|
||||
public:
|
||||
invalid_code_point(uint32_t codepoint) : cp(codepoint) {}
|
||||
virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Invalid code point"; }
|
||||
uint32_t code_point() const {return cp;}
|
||||
};
|
||||
|
||||
class invalid_utf8 : public exception {
|
||||
uint8_t u8;
|
||||
public:
|
||||
invalid_utf8 (uint8_t u) : u8(u) {}
|
||||
virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Invalid UTF-8"; }
|
||||
uint8_t utf8_octet() const {return u8;}
|
||||
};
|
||||
|
||||
class invalid_utf16 : public exception {
|
||||
uint16_t u16;
|
||||
public:
|
||||
invalid_utf16 (uint16_t u) : u16(u) {}
|
||||
virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Invalid UTF-16"; }
|
||||
uint16_t utf16_word() const {return u16;}
|
||||
};
|
||||
|
||||
class not_enough_room : public exception {
|
||||
public:
|
||||
virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Not enough space"; }
|
||||
};
|
||||
|
||||
/// The library API - functions intended to be called by the users
|
||||
|
||||
template <typename octet_iterator>
|
||||
octet_iterator append(uint32_t cp, octet_iterator result)
|
||||
{
|
||||
if (!utf8::internal::is_code_point_valid(cp))
|
||||
throw invalid_code_point(cp);
|
||||
|
||||
if (cp < 0x80) // one octet
|
||||
*(result++) = static_cast<uint8_t>(cp);
|
||||
else if (cp < 0x800) { // two octets
|
||||
*(result++) = static_cast<uint8_t>((cp >> 6) | 0xc0);
|
||||
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
|
||||
}
|
||||
else if (cp < 0x10000) { // three octets
|
||||
*(result++) = static_cast<uint8_t>((cp >> 12) | 0xe0);
|
||||
*(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
|
||||
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
|
||||
}
|
||||
else { // four octets
|
||||
*(result++) = static_cast<uint8_t>((cp >> 18) | 0xf0);
|
||||
*(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f) | 0x80);
|
||||
*(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
|
||||
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename octet_iterator, typename output_iterator>
|
||||
output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement)
|
||||
{
|
||||
while (start != end) {
|
||||
octet_iterator sequence_start = start;
|
||||
internal::utf_error err_code = utf8::internal::validate_next(start, end);
|
||||
switch (err_code) {
|
||||
case internal::UTF8_OK :
|
||||
for (octet_iterator it = sequence_start; it != start; ++it)
|
||||
*out++ = *it;
|
||||
break;
|
||||
case internal::NOT_ENOUGH_ROOM:
|
||||
out = utf8::append (replacement, out);
|
||||
start = end;
|
||||
break;
|
||||
case internal::INVALID_LEAD:
|
||||
out = utf8::append (replacement, out);
|
||||
++start;
|
||||
break;
|
||||
case internal::INCOMPLETE_SEQUENCE:
|
||||
case internal::OVERLONG_SEQUENCE:
|
||||
case internal::INVALID_CODE_POINT:
|
||||
out = utf8::append (replacement, out);
|
||||
++start;
|
||||
// just one replacement mark for the sequence
|
||||
while (start != end && utf8::internal::is_trail(*start))
|
||||
++start;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
template <typename octet_iterator, typename output_iterator>
|
||||
inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out)
|
||||
{
|
||||
static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd);
|
||||
return utf8::replace_invalid(start, end, out, replacement_marker);
|
||||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
uint32_t next(octet_iterator& it, octet_iterator end)
|
||||
{
|
||||
uint32_t cp = 0;
|
||||
internal::utf_error err_code = utf8::internal::validate_next(it, end, cp);
|
||||
switch (err_code) {
|
||||
case internal::UTF8_OK :
|
||||
break;
|
||||
case internal::NOT_ENOUGH_ROOM :
|
||||
throw not_enough_room();
|
||||
case internal::INVALID_LEAD :
|
||||
case internal::INCOMPLETE_SEQUENCE :
|
||||
case internal::OVERLONG_SEQUENCE :
|
||||
throw invalid_utf8(*it);
|
||||
case internal::INVALID_CODE_POINT :
|
||||
throw invalid_code_point(cp);
|
||||
}
|
||||
return cp;
|
||||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
uint32_t peek_next(octet_iterator it, octet_iterator end)
|
||||
{
|
||||
return utf8::next(it, end);
|
||||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
uint32_t prior(octet_iterator& it, octet_iterator start)
|
||||
{
|
||||
// can't do much if it == start
|
||||
if (it == start)
|
||||
throw not_enough_room();
|
||||
|
||||
octet_iterator end = it;
|
||||
// Go back until we hit either a lead octet or start
|
||||
while (utf8::internal::is_trail(*(--it)))
|
||||
if (it == start)
|
||||
throw invalid_utf8(*it); // error - no lead byte in the sequence
|
||||
return utf8::peek_next(it, end);
|
||||
}
|
||||
|
||||
template <typename octet_iterator, typename distance_type>
|
||||
void advance (octet_iterator& it, distance_type n, octet_iterator end)
|
||||
{
|
||||
const distance_type zero(0);
|
||||
if (n < zero) {
|
||||
// backward
|
||||
for (distance_type i = n; i < zero; ++i)
|
||||
utf8::prior(it, end);
|
||||
} else {
|
||||
// forward
|
||||
for (distance_type i = zero; i < n; ++i)
|
||||
utf8::next(it, end);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
typename std::iterator_traits<octet_iterator>::difference_type
|
||||
distance (octet_iterator first, octet_iterator last)
|
||||
{
|
||||
typename std::iterator_traits<octet_iterator>::difference_type dist;
|
||||
for (dist = 0; first < last; ++dist)
|
||||
utf8::next(first, last);
|
||||
return dist;
|
||||
}
|
||||
|
||||
template <typename u16bit_iterator, typename octet_iterator>
|
||||
octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
|
||||
{
|
||||
while (start != end) {
|
||||
uint32_t cp = utf8::internal::mask16(*start++);
|
||||
// Take care of surrogate pairs first
|
||||
if (utf8::internal::is_lead_surrogate(cp)) {
|
||||
if (start != end) {
|
||||
uint32_t trail_surrogate = utf8::internal::mask16(*start++);
|
||||
if (utf8::internal::is_trail_surrogate(trail_surrogate))
|
||||
cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
|
||||
else
|
||||
throw invalid_utf16(static_cast<uint16_t>(trail_surrogate));
|
||||
}
|
||||
else
|
||||
throw invalid_utf16(static_cast<uint16_t>(cp));
|
||||
|
||||
}
|
||||
// Lone trail surrogate
|
||||
else if (utf8::internal::is_trail_surrogate(cp))
|
||||
throw invalid_utf16(static_cast<uint16_t>(cp));
|
||||
|
||||
result = utf8::append(cp, result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename u16bit_iterator, typename octet_iterator>
|
||||
u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
|
||||
{
|
||||
while (start < end) {
|
||||
uint32_t cp = utf8::next(start, end);
|
||||
if (cp > 0xffff) { //make a surrogate pair
|
||||
*result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_OFFSET);
|
||||
*result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
|
||||
}
|
||||
else
|
||||
*result++ = static_cast<uint16_t>(cp);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename octet_iterator, typename u32bit_iterator>
|
||||
octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
|
||||
{
|
||||
while (start != end)
|
||||
result = utf8::append(*(start++), result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename octet_iterator, typename u32bit_iterator>
|
||||
u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
|
||||
{
|
||||
while (start < end)
|
||||
(*result++) = utf8::next(start, end);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// The iterator class
|
||||
template <typename octet_iterator>
|
||||
class iterator {
|
||||
octet_iterator it;
|
||||
octet_iterator range_start;
|
||||
octet_iterator range_end;
|
||||
public:
|
||||
typedef uint32_t value_type;
|
||||
typedef uint32_t* pointer;
|
||||
typedef uint32_t& reference;
|
||||
typedef std::ptrdiff_t difference_type;
|
||||
typedef std::bidirectional_iterator_tag iterator_category;
|
||||
iterator () {}
|
||||
explicit iterator (const octet_iterator& octet_it,
|
||||
const octet_iterator& rangestart,
|
||||
const octet_iterator& rangeend) :
|
||||
it(octet_it), range_start(rangestart), range_end(rangeend)
|
||||
{
|
||||
if (it < range_start || it > range_end)
|
||||
throw std::out_of_range("Invalid utf-8 iterator position");
|
||||
}
|
||||
// the default "big three" are OK
|
||||
octet_iterator base () const { return it; }
|
||||
uint32_t operator * () const
|
||||
{
|
||||
octet_iterator temp = it;
|
||||
return utf8::next(temp, range_end);
|
||||
}
|
||||
bool operator == (const iterator& rhs) const
|
||||
{
|
||||
if (range_start != rhs.range_start || range_end != rhs.range_end)
|
||||
throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
|
||||
return (it == rhs.it);
|
||||
}
|
||||
bool operator != (const iterator& rhs) const
|
||||
{
|
||||
return !(operator == (rhs));
|
||||
}
|
||||
iterator& operator ++ ()
|
||||
{
|
||||
utf8::next(it, range_end);
|
||||
return *this;
|
||||
}
|
||||
iterator operator ++ (int)
|
||||
{
|
||||
iterator temp = *this;
|
||||
utf8::next(it, range_end);
|
||||
return temp;
|
||||
}
|
||||
iterator& operator -- ()
|
||||
{
|
||||
utf8::prior(it, range_start);
|
||||
return *this;
|
||||
}
|
||||
iterator operator -- (int)
|
||||
{
|
||||
iterator temp = *this;
|
||||
utf8::prior(it, range_start);
|
||||
return temp;
|
||||
}
|
||||
}; // class iterator
|
||||
|
||||
} // namespace utf8
|
||||
|
||||
#if UTF_CPP_CPLUSPLUS >= 201703L // C++ 17 or later
|
||||
#include "cpp17.h"
|
||||
#elif UTF_CPP_CPLUSPLUS >= 201103L // C++ 11 or later
|
||||
#include "cpp11.h"
|
||||
#endif // C++ 11 or later
|
||||
|
||||
#endif //header guard
|
||||
|
||||
338
mlu_370-piper/piper/src/cpp/utf8/core.h
Normal file
338
mlu_370-piper/piper/src/cpp/utf8/core.h
Normal file
@@ -0,0 +1,338 @@
|
||||
// Copyright 2006 Nemanja Trifunovic
|
||||
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person or organization
|
||||
obtaining a copy of the software and accompanying documentation covered by
|
||||
this license (the "Software") to use, reproduce, display, distribute,
|
||||
execute, and transmit the Software, and to prepare derivative works of the
|
||||
Software, and to permit third-parties to whom the Software is furnished to
|
||||
do so, all subject to the following:
|
||||
|
||||
The copyright notices in the Software and this entire statement, including
|
||||
the above license grant, this restriction and the following disclaimer,
|
||||
must be included in all copies of the Software, in whole or in part, and
|
||||
all derivative works of the Software, unless such copies or derivative
|
||||
works are solely in the form of machine-executable object code generated by
|
||||
a source language processor.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
|
||||
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
|
||||
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
|
||||
#define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
|
||||
|
||||
#include <iterator>
|
||||
|
||||
// Determine the C++ standard version.
|
||||
// If the user defines UTF_CPP_CPLUSPLUS, use that.
|
||||
// Otherwise, trust the unreliable predefined macro __cplusplus
|
||||
|
||||
#if !defined UTF_CPP_CPLUSPLUS
|
||||
#define UTF_CPP_CPLUSPLUS __cplusplus
|
||||
#endif
|
||||
|
||||
#if UTF_CPP_CPLUSPLUS >= 201103L // C++ 11 or later
|
||||
#define UTF_CPP_OVERRIDE override
|
||||
#define UTF_CPP_NOEXCEPT noexcept
|
||||
#else // C++ 98/03
|
||||
#define UTF_CPP_OVERRIDE
|
||||
#define UTF_CPP_NOEXCEPT throw()
|
||||
#endif // C++ 11 or later
|
||||
|
||||
|
||||
namespace utf8
|
||||
{
|
||||
// The typedefs for 8-bit, 16-bit and 32-bit unsigned integers
|
||||
// You may need to change them to match your system.
|
||||
// These typedefs have the same names as ones from cstdint, or boost/cstdint
|
||||
typedef unsigned char uint8_t;
|
||||
typedef unsigned short uint16_t;
|
||||
typedef unsigned int uint32_t;
|
||||
|
||||
// Helper code - not intended to be directly called by the library users. May be changed at any time
|
||||
namespace internal
|
||||
{
|
||||
// Unicode constants
|
||||
// Leading (high) surrogates: 0xd800 - 0xdbff
|
||||
// Trailing (low) surrogates: 0xdc00 - 0xdfff
|
||||
const uint16_t LEAD_SURROGATE_MIN = 0xd800u;
|
||||
const uint16_t LEAD_SURROGATE_MAX = 0xdbffu;
|
||||
const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u;
|
||||
const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu;
|
||||
const uint16_t LEAD_OFFSET = 0xd7c0u; // LEAD_SURROGATE_MIN - (0x10000 >> 10)
|
||||
const uint32_t SURROGATE_OFFSET = 0xfca02400u; // 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN
|
||||
|
||||
// Maximum valid value for a Unicode code point
|
||||
const uint32_t CODE_POINT_MAX = 0x0010ffffu;
|
||||
|
||||
template<typename octet_type>
|
||||
inline uint8_t mask8(octet_type oc)
|
||||
{
|
||||
return static_cast<uint8_t>(0xff & oc);
|
||||
}
|
||||
template<typename u16_type>
|
||||
inline uint16_t mask16(u16_type oc)
|
||||
{
|
||||
return static_cast<uint16_t>(0xffff & oc);
|
||||
}
|
||||
template<typename octet_type>
|
||||
inline bool is_trail(octet_type oc)
|
||||
{
|
||||
return ((utf8::internal::mask8(oc) >> 6) == 0x2);
|
||||
}
|
||||
|
||||
template <typename u16>
|
||||
inline bool is_lead_surrogate(u16 cp)
|
||||
{
|
||||
return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX);
|
||||
}
|
||||
|
||||
template <typename u16>
|
||||
inline bool is_trail_surrogate(u16 cp)
|
||||
{
|
||||
return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
|
||||
}
|
||||
|
||||
template <typename u16>
|
||||
inline bool is_surrogate(u16 cp)
|
||||
{
|
||||
return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
|
||||
}
|
||||
|
||||
template <typename u32>
|
||||
inline bool is_code_point_valid(u32 cp)
|
||||
{
|
||||
return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp));
|
||||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
inline typename std::iterator_traits<octet_iterator>::difference_type
|
||||
sequence_length(octet_iterator lead_it)
|
||||
{
|
||||
uint8_t lead = utf8::internal::mask8(*lead_it);
|
||||
if (lead < 0x80)
|
||||
return 1;
|
||||
else if ((lead >> 5) == 0x6)
|
||||
return 2;
|
||||
else if ((lead >> 4) == 0xe)
|
||||
return 3;
|
||||
else if ((lead >> 3) == 0x1e)
|
||||
return 4;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename octet_difference_type>
|
||||
inline bool is_overlong_sequence(uint32_t cp, octet_difference_type length)
|
||||
{
|
||||
if (cp < 0x80) {
|
||||
if (length != 1)
|
||||
return true;
|
||||
}
|
||||
else if (cp < 0x800) {
|
||||
if (length != 2)
|
||||
return true;
|
||||
}
|
||||
else if (cp < 0x10000) {
|
||||
if (length != 3)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
enum utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT};
|
||||
|
||||
/// Helper for get_sequence_x
|
||||
template <typename octet_iterator>
|
||||
utf_error increase_safely(octet_iterator& it, octet_iterator end)
|
||||
{
|
||||
if (++it == end)
|
||||
return NOT_ENOUGH_ROOM;
|
||||
|
||||
if (!utf8::internal::is_trail(*it))
|
||||
return INCOMPLETE_SEQUENCE;
|
||||
|
||||
return UTF8_OK;
|
||||
}
|
||||
|
||||
#define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;}
|
||||
|
||||
/// get_sequence_x functions decode utf-8 sequences of the length x
|
||||
template <typename octet_iterator>
|
||||
utf_error get_sequence_1(octet_iterator& it, octet_iterator end, uint32_t& code_point)
|
||||
{
|
||||
if (it == end)
|
||||
return NOT_ENOUGH_ROOM;
|
||||
|
||||
code_point = utf8::internal::mask8(*it);
|
||||
|
||||
return UTF8_OK;
|
||||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t& code_point)
|
||||
{
|
||||
if (it == end)
|
||||
return NOT_ENOUGH_ROOM;
|
||||
|
||||
code_point = utf8::internal::mask8(*it);
|
||||
|
||||
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
|
||||
|
||||
code_point = ((code_point << 6) & 0x7ff) + ((*it) & 0x3f);
|
||||
|
||||
return UTF8_OK;
|
||||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
utf_error get_sequence_3(octet_iterator& it, octet_iterator end, uint32_t& code_point)
|
||||
{
|
||||
if (it == end)
|
||||
return NOT_ENOUGH_ROOM;
|
||||
|
||||
code_point = utf8::internal::mask8(*it);
|
||||
|
||||
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
|
||||
|
||||
code_point = ((code_point << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
|
||||
|
||||
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
|
||||
|
||||
code_point += (*it) & 0x3f;
|
||||
|
||||
return UTF8_OK;
|
||||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
utf_error get_sequence_4(octet_iterator& it, octet_iterator end, uint32_t& code_point)
|
||||
{
|
||||
if (it == end)
|
||||
return NOT_ENOUGH_ROOM;
|
||||
|
||||
code_point = utf8::internal::mask8(*it);
|
||||
|
||||
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
|
||||
|
||||
code_point = ((code_point << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
|
||||
|
||||
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
|
||||
|
||||
code_point += (utf8::internal::mask8(*it) << 6) & 0xfff;
|
||||
|
||||
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
|
||||
|
||||
code_point += (*it) & 0x3f;
|
||||
|
||||
return UTF8_OK;
|
||||
}
|
||||
|
||||
#undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR
|
||||
|
||||
template <typename octet_iterator>
|
||||
utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point)
|
||||
{
|
||||
if (it == end)
|
||||
return NOT_ENOUGH_ROOM;
|
||||
|
||||
// Save the original value of it so we can go back in case of failure
|
||||
// Of course, it does not make much sense with i.e. stream iterators
|
||||
octet_iterator original_it = it;
|
||||
|
||||
uint32_t cp = 0;
|
||||
// Determine the sequence length based on the lead octet
|
||||
typedef typename std::iterator_traits<octet_iterator>::difference_type octet_difference_type;
|
||||
const octet_difference_type length = utf8::internal::sequence_length(it);
|
||||
|
||||
// Get trail octets and calculate the code point
|
||||
utf_error err = UTF8_OK;
|
||||
switch (length) {
|
||||
case 0:
|
||||
return INVALID_LEAD;
|
||||
case 1:
|
||||
err = utf8::internal::get_sequence_1(it, end, cp);
|
||||
break;
|
||||
case 2:
|
||||
err = utf8::internal::get_sequence_2(it, end, cp);
|
||||
break;
|
||||
case 3:
|
||||
err = utf8::internal::get_sequence_3(it, end, cp);
|
||||
break;
|
||||
case 4:
|
||||
err = utf8::internal::get_sequence_4(it, end, cp);
|
||||
break;
|
||||
}
|
||||
|
||||
if (err == UTF8_OK) {
|
||||
// Decoding succeeded. Now, security checks...
|
||||
if (utf8::internal::is_code_point_valid(cp)) {
|
||||
if (!utf8::internal::is_overlong_sequence(cp, length)){
|
||||
// Passed! Return here.
|
||||
code_point = cp;
|
||||
++it;
|
||||
return UTF8_OK;
|
||||
}
|
||||
else
|
||||
err = OVERLONG_SEQUENCE;
|
||||
}
|
||||
else
|
||||
err = INVALID_CODE_POINT;
|
||||
}
|
||||
|
||||
// Failure branch - restore the original value of the iterator
|
||||
it = original_it;
|
||||
return err;
|
||||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
inline utf_error validate_next(octet_iterator& it, octet_iterator end) {
|
||||
uint32_t ignored;
|
||||
return utf8::internal::validate_next(it, end, ignored);
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
|
||||
/// The library API - functions intended to be called by the users
|
||||
|
||||
// Byte order mark
|
||||
const uint8_t bom[] = {0xef, 0xbb, 0xbf};
|
||||
|
||||
template <typename octet_iterator>
|
||||
octet_iterator find_invalid(octet_iterator start, octet_iterator end)
|
||||
{
|
||||
octet_iterator result = start;
|
||||
while (result != end) {
|
||||
utf8::internal::utf_error err_code = utf8::internal::validate_next(result, end);
|
||||
if (err_code != internal::UTF8_OK)
|
||||
return result;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
inline bool is_valid(octet_iterator start, octet_iterator end)
|
||||
{
|
||||
return (utf8::find_invalid(start, end) == end);
|
||||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
inline bool starts_with_bom (octet_iterator it, octet_iterator end)
|
||||
{
|
||||
return (
|
||||
((it != end) && (utf8::internal::mask8(*it++)) == bom[0]) &&
|
||||
((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) &&
|
||||
((it != end) && (utf8::internal::mask8(*it)) == bom[2])
|
||||
);
|
||||
}
|
||||
} // namespace utf8
|
||||
|
||||
#endif // header guard
|
||||
|
||||
|
||||
103
mlu_370-piper/piper/src/cpp/utf8/cpp11.h
Normal file
103
mlu_370-piper/piper/src/cpp/utf8/cpp11.h
Normal file
@@ -0,0 +1,103 @@
|
||||
// Copyright 2018 Nemanja Trifunovic
|
||||
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person or organization
|
||||
obtaining a copy of the software and accompanying documentation covered by
|
||||
this license (the "Software") to use, reproduce, display, distribute,
|
||||
execute, and transmit the Software, and to prepare derivative works of the
|
||||
Software, and to permit third-parties to whom the Software is furnished to
|
||||
do so, all subject to the following:
|
||||
|
||||
The copyright notices in the Software and this entire statement, including
|
||||
the above license grant, this restriction and the following disclaimer,
|
||||
must be included in all copies of the Software, in whole or in part, and
|
||||
all derivative works of the Software, unless such copies or derivative
|
||||
works are solely in the form of machine-executable object code generated by
|
||||
a source language processor.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
|
||||
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
|
||||
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef UTF8_FOR_CPP_a184c22c_d012_11e8_a8d5_f2801f1b9fd1
|
||||
#define UTF8_FOR_CPP_a184c22c_d012_11e8_a8d5_f2801f1b9fd1
|
||||
|
||||
#include "checked.h"
|
||||
#include <string>
|
||||
|
||||
namespace utf8
|
||||
{
|
||||
|
||||
inline void append(char32_t cp, std::string& s)
|
||||
{
|
||||
append(uint32_t(cp), std::back_inserter(s));
|
||||
}
|
||||
|
||||
inline std::string utf16to8(const std::u16string& s)
|
||||
{
|
||||
std::string result;
|
||||
utf16to8(s.begin(), s.end(), std::back_inserter(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
inline std::u16string utf8to16(const std::string& s)
|
||||
{
|
||||
std::u16string result;
|
||||
utf8to16(s.begin(), s.end(), std::back_inserter(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
inline std::string utf32to8(const std::u32string& s)
|
||||
{
|
||||
std::string result;
|
||||
utf32to8(s.begin(), s.end(), std::back_inserter(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
inline std::u32string utf8to32(const std::string& s)
|
||||
{
|
||||
std::u32string result;
|
||||
utf8to32(s.begin(), s.end(), std::back_inserter(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
inline std::size_t find_invalid(const std::string& s)
|
||||
{
|
||||
std::string::const_iterator invalid = find_invalid(s.begin(), s.end());
|
||||
return (invalid == s.end()) ? std::string::npos : (invalid - s.begin());
|
||||
}
|
||||
|
||||
inline bool is_valid(const std::string& s)
|
||||
{
|
||||
return is_valid(s.begin(), s.end());
|
||||
}
|
||||
|
||||
inline std::string replace_invalid(const std::string& s, char32_t replacement)
|
||||
{
|
||||
std::string result;
|
||||
replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline std::string replace_invalid(const std::string& s)
|
||||
{
|
||||
std::string result;
|
||||
replace_invalid(s.begin(), s.end(), std::back_inserter(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
inline bool starts_with_bom(const std::string& s)
|
||||
{
|
||||
return starts_with_bom(s.begin(), s.end());
|
||||
}
|
||||
|
||||
} // namespace utf8
|
||||
|
||||
#endif // header guard
|
||||
|
||||
103
mlu_370-piper/piper/src/cpp/utf8/cpp17.h
Normal file
103
mlu_370-piper/piper/src/cpp/utf8/cpp17.h
Normal file
@@ -0,0 +1,103 @@
|
||||
// Copyright 2018 Nemanja Trifunovic
|
||||
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person or organization
|
||||
obtaining a copy of the software and accompanying documentation covered by
|
||||
this license (the "Software") to use, reproduce, display, distribute,
|
||||
execute, and transmit the Software, and to prepare derivative works of the
|
||||
Software, and to permit third-parties to whom the Software is furnished to
|
||||
do so, all subject to the following:
|
||||
|
||||
The copyright notices in the Software and this entire statement, including
|
||||
the above license grant, this restriction and the following disclaimer,
|
||||
must be included in all copies of the Software, in whole or in part, and
|
||||
all derivative works of the Software, unless such copies or derivative
|
||||
works are solely in the form of machine-executable object code generated by
|
||||
a source language processor.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
|
||||
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
|
||||
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef UTF8_FOR_CPP_7e906c01_03a3_4daf_b420_ea7ea952b3c9
|
||||
#define UTF8_FOR_CPP_7e906c01_03a3_4daf_b420_ea7ea952b3c9
|
||||
|
||||
#include "checked.h"
|
||||
#include <string>
|
||||
|
||||
namespace utf8
|
||||
{
|
||||
|
||||
inline void append(char32_t cp, std::string& s)
|
||||
{
|
||||
append(uint32_t(cp), std::back_inserter(s));
|
||||
}
|
||||
|
||||
inline std::string utf16to8(std::u16string_view s)
|
||||
{
|
||||
std::string result;
|
||||
utf16to8(s.begin(), s.end(), std::back_inserter(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
inline std::u16string utf8to16(std::string_view s)
|
||||
{
|
||||
std::u16string result;
|
||||
utf8to16(s.begin(), s.end(), std::back_inserter(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
inline std::string utf32to8(std::u32string_view s)
|
||||
{
|
||||
std::string result;
|
||||
utf32to8(s.begin(), s.end(), std::back_inserter(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
inline std::u32string utf8to32(std::string_view s)
|
||||
{
|
||||
std::u32string result;
|
||||
utf8to32(s.begin(), s.end(), std::back_inserter(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
inline std::size_t find_invalid(std::string_view s)
|
||||
{
|
||||
std::string_view::const_iterator invalid = find_invalid(s.begin(), s.end());
|
||||
return (invalid == s.end()) ? std::string_view::npos : (invalid - s.begin());
|
||||
}
|
||||
|
||||
inline bool is_valid(std::string_view s)
|
||||
{
|
||||
return is_valid(s.begin(), s.end());
|
||||
}
|
||||
|
||||
inline std::string replace_invalid(std::string_view s, char32_t replacement)
|
||||
{
|
||||
std::string result;
|
||||
replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline std::string replace_invalid(std::string_view s)
|
||||
{
|
||||
std::string result;
|
||||
replace_invalid(s.begin(), s.end(), std::back_inserter(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
inline bool starts_with_bom(std::string_view s)
|
||||
{
|
||||
return starts_with_bom(s.begin(), s.end());
|
||||
}
|
||||
|
||||
} // namespace utf8
|
||||
|
||||
#endif // header guard
|
||||
|
||||
274
mlu_370-piper/piper/src/cpp/utf8/unchecked.h
Normal file
274
mlu_370-piper/piper/src/cpp/utf8/unchecked.h
Normal file
@@ -0,0 +1,274 @@
|
||||
// Copyright 2006 Nemanja Trifunovic
|
||||
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person or organization
|
||||
obtaining a copy of the software and accompanying documentation covered by
|
||||
this license (the "Software") to use, reproduce, display, distribute,
|
||||
execute, and transmit the Software, and to prepare derivative works of the
|
||||
Software, and to permit third-parties to whom the Software is furnished to
|
||||
do so, all subject to the following:
|
||||
|
||||
The copyright notices in the Software and this entire statement, including
|
||||
the above license grant, this restriction and the following disclaimer,
|
||||
must be included in all copies of the Software, in whole or in part, and
|
||||
all derivative works of the Software, unless such copies or derivative
|
||||
works are solely in the form of machine-executable object code generated by
|
||||
a source language processor.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
|
||||
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
|
||||
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
|
||||
#define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
|
||||
|
||||
#include "core.h"
|
||||
|
||||
namespace utf8
|
||||
{
|
||||
namespace unchecked
|
||||
{
|
||||
template <typename octet_iterator>
|
||||
octet_iterator append(uint32_t cp, octet_iterator result)
|
||||
{
|
||||
if (cp < 0x80) // one octet
|
||||
*(result++) = static_cast<uint8_t>(cp);
|
||||
else if (cp < 0x800) { // two octets
|
||||
*(result++) = static_cast<uint8_t>((cp >> 6) | 0xc0);
|
||||
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
|
||||
}
|
||||
else if (cp < 0x10000) { // three octets
|
||||
*(result++) = static_cast<uint8_t>((cp >> 12) | 0xe0);
|
||||
*(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
|
||||
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
|
||||
}
|
||||
else { // four octets
|
||||
*(result++) = static_cast<uint8_t>((cp >> 18) | 0xf0);
|
||||
*(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)| 0x80);
|
||||
*(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
|
||||
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename octet_iterator, typename output_iterator>
|
||||
output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement)
|
||||
{
|
||||
while (start != end) {
|
||||
octet_iterator sequence_start = start;
|
||||
internal::utf_error err_code = utf8::internal::validate_next(start, end);
|
||||
switch (err_code) {
|
||||
case internal::UTF8_OK :
|
||||
for (octet_iterator it = sequence_start; it != start; ++it)
|
||||
*out++ = *it;
|
||||
break;
|
||||
case internal::NOT_ENOUGH_ROOM:
|
||||
out = utf8::unchecked::append (replacement, out);
|
||||
start = end;
|
||||
break;
|
||||
case internal::INVALID_LEAD:
|
||||
out = utf8::unchecked::append (replacement, out);
|
||||
++start;
|
||||
break;
|
||||
case internal::INCOMPLETE_SEQUENCE:
|
||||
case internal::OVERLONG_SEQUENCE:
|
||||
case internal::INVALID_CODE_POINT:
|
||||
out = utf8::unchecked::append (replacement, out);
|
||||
++start;
|
||||
// just one replacement mark for the sequence
|
||||
while (start != end && utf8::internal::is_trail(*start))
|
||||
++start;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
template <typename octet_iterator, typename output_iterator>
|
||||
inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out)
|
||||
{
|
||||
static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd);
|
||||
return utf8::unchecked::replace_invalid(start, end, out, replacement_marker);
|
||||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
uint32_t next(octet_iterator& it)
|
||||
{
|
||||
uint32_t cp = utf8::internal::mask8(*it);
|
||||
typename std::iterator_traits<octet_iterator>::difference_type length = utf8::internal::sequence_length(it);
|
||||
switch (length) {
|
||||
case 1:
|
||||
break;
|
||||
case 2:
|
||||
it++;
|
||||
cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
|
||||
break;
|
||||
case 3:
|
||||
++it;
|
||||
cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
|
||||
++it;
|
||||
cp += (*it) & 0x3f;
|
||||
break;
|
||||
case 4:
|
||||
++it;
|
||||
cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
|
||||
++it;
|
||||
cp += (utf8::internal::mask8(*it) << 6) & 0xfff;
|
||||
++it;
|
||||
cp += (*it) & 0x3f;
|
||||
break;
|
||||
}
|
||||
++it;
|
||||
return cp;
|
||||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
uint32_t peek_next(octet_iterator it)
|
||||
{
|
||||
return utf8::unchecked::next(it);
|
||||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
uint32_t prior(octet_iterator& it)
|
||||
{
|
||||
while (utf8::internal::is_trail(*(--it))) ;
|
||||
octet_iterator temp = it;
|
||||
return utf8::unchecked::next(temp);
|
||||
}
|
||||
|
||||
template <typename octet_iterator, typename distance_type>
|
||||
void advance (octet_iterator& it, distance_type n)
|
||||
{
|
||||
const distance_type zero(0);
|
||||
if (n < zero) {
|
||||
// backward
|
||||
for (distance_type i = n; i < zero; ++i)
|
||||
utf8::unchecked::prior(it);
|
||||
} else {
|
||||
// forward
|
||||
for (distance_type i = zero; i < n; ++i)
|
||||
utf8::unchecked::next(it);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
typename std::iterator_traits<octet_iterator>::difference_type
|
||||
distance (octet_iterator first, octet_iterator last)
|
||||
{
|
||||
typename std::iterator_traits<octet_iterator>::difference_type dist;
|
||||
for (dist = 0; first < last; ++dist)
|
||||
utf8::unchecked::next(first);
|
||||
return dist;
|
||||
}
|
||||
|
||||
template <typename u16bit_iterator, typename octet_iterator>
|
||||
octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
|
||||
{
|
||||
while (start != end) {
|
||||
uint32_t cp = utf8::internal::mask16(*start++);
|
||||
// Take care of surrogate pairs first
|
||||
if (utf8::internal::is_lead_surrogate(cp)) {
|
||||
uint32_t trail_surrogate = utf8::internal::mask16(*start++);
|
||||
cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
|
||||
}
|
||||
result = utf8::unchecked::append(cp, result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename u16bit_iterator, typename octet_iterator>
|
||||
u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
|
||||
{
|
||||
while (start < end) {
|
||||
uint32_t cp = utf8::unchecked::next(start);
|
||||
if (cp > 0xffff) { //make a surrogate pair
|
||||
*result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_OFFSET);
|
||||
*result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
|
||||
}
|
||||
else
|
||||
*result++ = static_cast<uint16_t>(cp);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename octet_iterator, typename u32bit_iterator>
|
||||
octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
|
||||
{
|
||||
while (start != end)
|
||||
result = utf8::unchecked::append(*(start++), result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename octet_iterator, typename u32bit_iterator>
|
||||
u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
|
||||
{
|
||||
while (start < end)
|
||||
(*result++) = utf8::unchecked::next(start);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// The iterator class
|
||||
template <typename octet_iterator>
|
||||
class iterator {
|
||||
octet_iterator it;
|
||||
public:
|
||||
typedef uint32_t value_type;
|
||||
typedef uint32_t* pointer;
|
||||
typedef uint32_t& reference;
|
||||
typedef std::ptrdiff_t difference_type;
|
||||
typedef std::bidirectional_iterator_tag iterator_category;
|
||||
iterator () {}
|
||||
explicit iterator (const octet_iterator& octet_it): it(octet_it) {}
|
||||
// the default "big three" are OK
|
||||
octet_iterator base () const { return it; }
|
||||
uint32_t operator * () const
|
||||
{
|
||||
octet_iterator temp = it;
|
||||
return utf8::unchecked::next(temp);
|
||||
}
|
||||
bool operator == (const iterator& rhs) const
|
||||
{
|
||||
return (it == rhs.it);
|
||||
}
|
||||
bool operator != (const iterator& rhs) const
|
||||
{
|
||||
return !(operator == (rhs));
|
||||
}
|
||||
iterator& operator ++ ()
|
||||
{
|
||||
::std::advance(it, utf8::internal::sequence_length(it));
|
||||
return *this;
|
||||
}
|
||||
iterator operator ++ (int)
|
||||
{
|
||||
iterator temp = *this;
|
||||
::std::advance(it, utf8::internal::sequence_length(it));
|
||||
return temp;
|
||||
}
|
||||
iterator& operator -- ()
|
||||
{
|
||||
utf8::unchecked::prior(it);
|
||||
return *this;
|
||||
}
|
||||
iterator operator -- (int)
|
||||
{
|
||||
iterator temp = *this;
|
||||
utf8::unchecked::prior(it);
|
||||
return temp;
|
||||
}
|
||||
}; // class iterator
|
||||
|
||||
} // namespace utf8::unchecked
|
||||
} // namespace utf8
|
||||
|
||||
|
||||
#endif // header guard
|
||||
|
||||
40
mlu_370-piper/piper/src/cpp/wavfile.hpp
Normal file
40
mlu_370-piper/piper/src/cpp/wavfile.hpp
Normal file
@@ -0,0 +1,40 @@
|
||||
#ifndef WAVFILE_H_
|
||||
#define WAVFILE_H_
|
||||
|
||||
#include <iostream>
|
||||
|
||||
struct WavHeader {
|
||||
uint8_t RIFF[4] = {'R', 'I', 'F', 'F'};
|
||||
uint32_t chunkSize;
|
||||
uint8_t WAVE[4] = {'W', 'A', 'V', 'E'};
|
||||
|
||||
// fmt
|
||||
uint8_t fmt[4] = {'f', 'm', 't', ' '};
|
||||
uint32_t fmtSize = 16; // bytes
|
||||
uint16_t audioFormat = 1; // PCM
|
||||
uint16_t numChannels; // mono
|
||||
uint32_t sampleRate; // Hertz
|
||||
uint32_t bytesPerSec; // sampleRate * sampleWidth
|
||||
uint16_t blockAlign = 2; // 16-bit mono
|
||||
uint16_t bitsPerSample = 16;
|
||||
|
||||
// data
|
||||
uint8_t data[4] = {'d', 'a', 't', 'a'};
|
||||
uint32_t dataSize;
|
||||
};
|
||||
|
||||
// Write WAV file header only
|
||||
void writeWavHeader(int sampleRate, int sampleWidth, int channels,
|
||||
uint32_t numSamples, std::ostream &audioFile) {
|
||||
WavHeader header;
|
||||
header.dataSize = numSamples * sampleWidth * channels;
|
||||
header.chunkSize = header.dataSize + sizeof(WavHeader) - 8;
|
||||
header.sampleRate = sampleRate;
|
||||
header.numChannels = channels;
|
||||
header.bytesPerSec = sampleRate * sampleWidth * channels;
|
||||
header.blockAlign = sampleWidth * channels;
|
||||
audioFile.write(reinterpret_cast<const char *>(&header), sizeof(header));
|
||||
|
||||
} /* writeWavHeader */
|
||||
|
||||
#endif // WAVFILE_H_
|
||||
Reference in New Issue
Block a user