update README

2025-09-10 10:47:02 +08:00
parent 5088f0b50a
commit ff78032400
603 changed files with 21 additions and 23 deletions
--- a/mlu_370-piper/piper/src/cpp/json.hpp
+++ b/mlu_370-piper/piper/src/cpp/json.hpp
--- a/mlu_370-piper/piper/src/cpp/main.cpp
+++ b/mlu_370-piper/piper/src/cpp/main.cpp
@@ -0,0 +1,561 @@
+#include <chrono>
+#include <condition_variable>
+#include <filesystem>
+#include <fstream>
+#include <functional>
+#include <iostream>
+#include <map>
+#include <mutex>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <thread>
+#include <vector>
+
+#ifdef _MSC_VER
+#define WIN32_LEAN_AND_MEAN
+#define NOMINMAX
+#include <windows.h>
+#endif
+
+#ifdef _WIN32
+#include <fcntl.h>
+#include <io.h>
+#endif
+
+#ifdef __APPLE__
+#include <mach-o/dyld.h>
+#endif
+
+#include <spdlog/sinks/stdout_color_sinks.h>
+#include <spdlog/spdlog.h>
+
+#include "json.hpp"
+#include "piper.hpp"
+
+using namespace std;
+using json = nlohmann::json;
+
+enum OutputType { OUTPUT_FILE, OUTPUT_DIRECTORY, OUTPUT_STDOUT, OUTPUT_RAW };
+
+struct RunConfig {
+  // Path to .onnx voice file
+  filesystem::path modelPath;
+
+  // Path to JSON voice config file
+  filesystem::path modelConfigPath;
+
+  // Type of output to produce.
+  // Default is to write a WAV file in the current directory.
+  OutputType outputType = OUTPUT_DIRECTORY;
+
+  // Path for output
+  optional<filesystem::path> outputPath = filesystem::path(".");
+
+  // Numerical id of the default speaker (multi-speaker voices)
+  optional<piper::SpeakerId> speakerId;
+
+  // Amount of noise to add during audio generation
+  optional<float> noiseScale;
+
+  // Speed of speaking (1 = normal, < 1 is faster, > 1 is slower)
+  optional<float> lengthScale;
+
+  // Variation in phoneme lengths
+  optional<float> noiseW;
+
+  // Seconds of silence to add after each sentence
+  optional<float> sentenceSilenceSeconds;
+
+  // Path to espeak-ng data directory (default is next to piper executable)
+  optional<filesystem::path> eSpeakDataPath;
+
+  // Path to libtashkeel ort model
+  // https://github.com/mush42/libtashkeel/
+  optional<filesystem::path> tashkeelModelPath;
+
+  // stdin input is lines of JSON instead of text with format:
+  // {
+  //   "text": str,               (required)
+  //   "speaker_id": int,         (optional)
+  //   "speaker": str,            (optional)
+  //   "output_file": str,        (optional)
+  // }
+  bool jsonInput = false;
+
+  // Seconds of extra silence to insert after a single phoneme
+  optional<std::map<piper::Phoneme, float>> phonemeSilenceSeconds;
+
+  // true to use CUDA execution provider
+  bool useCuda = false;
+};
+
+void parseArgs(int argc, char *argv[], RunConfig &runConfig);
+void rawOutputProc(vector<int16_t> &sharedAudioBuffer, mutex &mutAudio,
+                   condition_variable &cvAudio, bool &audioReady,
+                   bool &audioFinished);
+
+// ----------------------------------------------------------------------------
+
+int main(int argc, char *argv[]) {
+  spdlog::set_default_logger(spdlog::stderr_color_st("piper"));
+
+  RunConfig runConfig;
+  parseArgs(argc, argv, runConfig);
+
+#ifdef _WIN32
+  // Required on Windows to show IPA symbols
+  SetConsoleOutputCP(CP_UTF8);
+#endif
+
+  piper::PiperConfig piperConfig;
+  piper::Voice voice;
+
+  spdlog::debug("Loading voice from {} (config={})",
+                runConfig.modelPath.string(),
+                runConfig.modelConfigPath.string());
+
+  auto startTime = chrono::steady_clock::now();
+  loadVoice(piperConfig, runConfig.modelPath.string(),
+            runConfig.modelConfigPath.string(), voice, runConfig.speakerId,
+            runConfig.useCuda);
+  auto endTime = chrono::steady_clock::now();
+  spdlog::info("Loaded voice in {} second(s)",
+               chrono::duration<double>(endTime - startTime).count());
+
+  // Get the path to the piper executable so we can locate espeak-ng-data, etc.
+  // next to it.
+#ifdef _MSC_VER
+  auto exePath = []() {
+    wchar_t moduleFileName[MAX_PATH] = {0};
+    GetModuleFileNameW(nullptr, moduleFileName, std::size(moduleFileName));
+    return filesystem::path(moduleFileName);
+  }();
+#else
+#ifdef __APPLE__
+  auto exePath = []() {
+    char moduleFileName[PATH_MAX] = {0};
+    uint32_t moduleFileNameSize = std::size(moduleFileName);
+    _NSGetExecutablePath(moduleFileName, &moduleFileNameSize);
+    return filesystem::path(moduleFileName);
+  }();
+#else
+  auto exePath = filesystem::canonical("/proc/self/exe");
+#endif
+#endif
+
+  if (voice.phonemizeConfig.phonemeType == piper::eSpeakPhonemes) {
+    spdlog::debug("Voice uses eSpeak phonemes ({})",
+                  voice.phonemizeConfig.eSpeak.voice);
+
+    if (runConfig.eSpeakDataPath) {
+      // User provided path
+      piperConfig.eSpeakDataPath = runConfig.eSpeakDataPath.value().string();
+    } else {
+      // Assume next to piper executable
+      piperConfig.eSpeakDataPath =
+          std::filesystem::absolute(
+              exePath.parent_path().append("espeak-ng-data"))
+              .string();
+
+      spdlog::debug("espeak-ng-data directory is expected at {}",
+                    piperConfig.eSpeakDataPath);
+    }
+  } else {
+    // Not using eSpeak
+    piperConfig.useESpeak = false;
+  }
+
+  // Enable libtashkeel for Arabic
+  if (voice.phonemizeConfig.eSpeak.voice == "ar") {
+    piperConfig.useTashkeel = true;
+    if (runConfig.tashkeelModelPath) {
+      // User provided path
+      piperConfig.tashkeelModelPath =
+          runConfig.tashkeelModelPath.value().string();
+    } else {
+      // Assume next to piper executable
+      piperConfig.tashkeelModelPath =
+          std::filesystem::absolute(
+              exePath.parent_path().append("libtashkeel_model.ort"))
+              .string();
+
+      spdlog::debug("libtashkeel model is expected at {}",
+                    piperConfig.tashkeelModelPath.value());
+    }
+  }
+
+  piper::initialize(piperConfig);
+
+  // Scales
+  if (runConfig.noiseScale) {
+    voice.synthesisConfig.noiseScale = runConfig.noiseScale.value();
+  }
+
+  if (runConfig.lengthScale) {
+    voice.synthesisConfig.lengthScale = runConfig.lengthScale.value();
+  }
+
+  if (runConfig.noiseW) {
+    voice.synthesisConfig.noiseW = runConfig.noiseW.value();
+  }
+
+  if (runConfig.sentenceSilenceSeconds) {
+    voice.synthesisConfig.sentenceSilenceSeconds =
+        runConfig.sentenceSilenceSeconds.value();
+  }
+
+  if (runConfig.phonemeSilenceSeconds) {
+    if (!voice.synthesisConfig.phonemeSilenceSeconds) {
+      // Overwrite
+      voice.synthesisConfig.phonemeSilenceSeconds =
+          runConfig.phonemeSilenceSeconds;
+    } else {
+      // Merge
+      for (const auto &[phoneme, silenceSeconds] :
+           *runConfig.phonemeSilenceSeconds) {
+        voice.synthesisConfig.phonemeSilenceSeconds->try_emplace(
+            phoneme, silenceSeconds);
+      }
+    }
+
+  } // if phonemeSilenceSeconds
+
+  if (runConfig.outputType == OUTPUT_DIRECTORY) {
+    runConfig.outputPath = filesystem::absolute(runConfig.outputPath.value());
+    spdlog::info("Output directory: {}", runConfig.outputPath.value().string());
+  }
+
+  string line;
+  piper::SynthesisResult result;
+  while (getline(cin, line)) {
+    auto outputType = runConfig.outputType;
+    auto speakerId = voice.synthesisConfig.speakerId;
+    std::optional<filesystem::path> maybeOutputPath = runConfig.outputPath;
+
+    if (runConfig.jsonInput) {
+      // Each line is a JSON object
+      json lineRoot = json::parse(line);
+
+      // Text is required
+      line = lineRoot["text"].get<std::string>();
+
+      if (lineRoot.contains("output_file")) {
+        // Override output WAV file path
+        outputType = OUTPUT_FILE;
+        maybeOutputPath =
+            filesystem::path(lineRoot["output_file"].get<std::string>());
+      }
+
+      if (lineRoot.contains("speaker_id")) {
+        // Override speaker id
+        voice.synthesisConfig.speakerId =
+            lineRoot["speaker_id"].get<piper::SpeakerId>();
+      } else if (lineRoot.contains("speaker")) {
+        // Resolve to id using speaker id map
+        auto speakerName = lineRoot["speaker"].get<std::string>();
+        if ((voice.modelConfig.speakerIdMap) &&
+            (voice.modelConfig.speakerIdMap->count(speakerName) > 0)) {
+          voice.synthesisConfig.speakerId =
+              (*voice.modelConfig.speakerIdMap)[speakerName];
+        } else {
+          spdlog::warn("No speaker named: {}", speakerName);
+        }
+      }
+    }
+
+    // Timestamp is used for path to output WAV file
+    const auto now = chrono::system_clock::now();
+    const auto timestamp =
+        chrono::duration_cast<chrono::nanoseconds>(now.time_since_epoch())
+            .count();
+
+    if (outputType == OUTPUT_DIRECTORY) {
+      // Generate path using timestamp
+      stringstream outputName;
+      outputName << timestamp << ".wav";
+      filesystem::path outputPath = runConfig.outputPath.value();
+      outputPath.append(outputName.str());
+
+      // Output audio to automatically-named WAV file in a directory
+      ofstream audioFile(outputPath.string(), ios::binary);
+      piper::textToWavFile(piperConfig, voice, line, audioFile, result);
+      cout << outputPath.string() << endl;
+    } else if (outputType == OUTPUT_FILE) {
+      if (!maybeOutputPath || maybeOutputPath->empty()) {
+        throw runtime_error("No output path provided");
+      }
+
+      filesystem::path outputPath = maybeOutputPath.value();
+
+      if (!runConfig.jsonInput) {
+        // Read all of standard input before synthesizing.
+        // Otherwise, we would overwrite the output file for each line.
+        stringstream text;
+        text << line;
+        while (getline(cin, line)) {
+          text << " " << line;
+        }
+
+        line = text.str();
+      }
+
+      // Output audio to WAV file
+      ofstream audioFile(outputPath.string(), ios::binary);
+      piper::textToWavFile(piperConfig, voice, line, audioFile, result);
+      cout << outputPath.string() << endl;
+    } else if (outputType == OUTPUT_STDOUT) {
+      // Output WAV to stdout
+      piper::textToWavFile(piperConfig, voice, line, cout, result);
+    } else if (outputType == OUTPUT_RAW) {
+      // Raw output to stdout
+      mutex mutAudio;
+      condition_variable cvAudio;
+      bool audioReady = false;
+      bool audioFinished = false;
+      vector<int16_t> audioBuffer;
+      vector<int16_t> sharedAudioBuffer;
+
+#ifdef _WIN32
+      // Needed on Windows to avoid terminal conversions
+      setmode(fileno(stdout), O_BINARY);
+      setmode(fileno(stdin), O_BINARY);
+#endif
+
+      thread rawOutputThread(rawOutputProc, ref(sharedAudioBuffer),
+                             ref(mutAudio), ref(cvAudio), ref(audioReady),
+                             ref(audioFinished));
+      auto audioCallback = [&audioBuffer, &sharedAudioBuffer, &mutAudio,
+                            &cvAudio, &audioReady]() {
+        // Signal thread that audio is ready
+        {
+          unique_lock lockAudio(mutAudio);
+          copy(audioBuffer.begin(), audioBuffer.end(),
+               back_inserter(sharedAudioBuffer));
+          audioReady = true;
+          cvAudio.notify_one();
+        }
+      };
+      piper::textToAudio(piperConfig, voice, line, audioBuffer, result,
+                         audioCallback);
+
+      // Signal thread that there is no more audio
+      {
+        unique_lock lockAudio(mutAudio);
+        audioReady = true;
+        audioFinished = true;
+        cvAudio.notify_one();
+      }
+
+      // Wait for audio output to finish
+      spdlog::info("Waiting for audio to finish playing...");
+      rawOutputThread.join();
+    }
+
+    spdlog::info("Real-time factor: {} (infer={} sec, audio={} sec)",
+                 result.realTimeFactor, result.inferSeconds,
+                 result.audioSeconds);
+
+    // Restore config (--json-input)
+    voice.synthesisConfig.speakerId = speakerId;
+
+  } // for each line
+
+  piper::terminate(piperConfig);
+
+  return EXIT_SUCCESS;
+}
+
+// ----------------------------------------------------------------------------
+
+void rawOutputProc(vector<int16_t> &sharedAudioBuffer, mutex &mutAudio,
+                   condition_variable &cvAudio, bool &audioReady,
+                   bool &audioFinished) {
+  vector<int16_t> internalAudioBuffer;
+  while (true) {
+    {
+      unique_lock lockAudio{mutAudio};
+      cvAudio.wait(lockAudio, [&audioReady] { return audioReady; });
+
+      if (sharedAudioBuffer.empty() && audioFinished) {
+        break;
+      }
+
+      copy(sharedAudioBuffer.begin(), sharedAudioBuffer.end(),
+           back_inserter(internalAudioBuffer));
+
+      sharedAudioBuffer.clear();
+
+      if (!audioFinished) {
+        audioReady = false;
+      }
+    }
+
+    cout.write((const char *)internalAudioBuffer.data(),
+               sizeof(int16_t) * internalAudioBuffer.size());
+    cout.flush();
+    internalAudioBuffer.clear();
+  }
+
+} // rawOutputProc
+
+// ----------------------------------------------------------------------------
+
+void printUsage(char *argv[]) {
+  cerr << endl;
+  cerr << "usage: " << argv[0] << " [options]" << endl;
+  cerr << endl;
+  cerr << "options:" << endl;
+  cerr << "   -h        --help              show this message and exit" << endl;
+  cerr << "   -m  FILE  --model       FILE  path to onnx model file" << endl;
+  cerr << "   -c  FILE  --config      FILE  path to model config file "
+          "(default: model path + .json)"
+       << endl;
+  cerr << "   -f  FILE  --output_file FILE  path to output WAV file ('-' for "
+          "stdout)"
+       << endl;
+  cerr << "   -d  DIR   --output_dir  DIR   path to output directory (default: "
+          "cwd)"
+       << endl;
+  cerr << "   --output_raw                  output raw audio to stdout as it "
+          "becomes available"
+       << endl;
+  cerr << "   -s  NUM   --speaker     NUM   id of speaker (default: 0)" << endl;
+  cerr << "   --noise_scale           NUM   generator noise (default: 0.667)"
+       << endl;
+  cerr << "   --length_scale          NUM   phoneme length (default: 1.0)"
+       << endl;
+  cerr << "   --noise_w               NUM   phoneme width noise (default: 0.8)"
+       << endl;
+  cerr << "   --sentence_silence      NUM   seconds of silence after each "
+          "sentence (default: 0.2)"
+       << endl;
+  cerr << "   --espeak_data           DIR   path to espeak-ng data directory"
+       << endl;
+  cerr << "   --tashkeel_model        FILE  path to libtashkeel onnx model "
+          "(arabic)"
+       << endl;
+  cerr << "   --json-input                  stdin input is lines of JSON "
+          "instead of plain text"
+       << endl;
+  cerr << "   --use-cuda                    use CUDA execution provider"
+       << endl;
+  cerr << "   --debug                       print DEBUG messages to the console"
+       << endl;
+  cerr << "   -q       --quiet              disable logging" << endl;
+  cerr << endl;
+}
+
+void ensureArg(int argc, char *argv[], int argi) {
+  if ((argi + 1) >= argc) {
+    printUsage(argv);
+    exit(0);
+  }
+}
+
+// Parse command-line arguments
+void parseArgs(int argc, char *argv[], RunConfig &runConfig) {
+  optional<filesystem::path> modelConfigPath;
+
+  for (int i = 1; i < argc; i++) {
+    std::string arg = argv[i];
+
+    if (arg == "-m" || arg == "--model") {
+      ensureArg(argc, argv, i);
+      runConfig.modelPath = filesystem::path(argv[++i]);
+    } else if (arg == "-c" || arg == "--config") {
+      ensureArg(argc, argv, i);
+      modelConfigPath = filesystem::path(argv[++i]);
+    } else if (arg == "-f" || arg == "--output_file" ||
+               arg == "--output-file") {
+      ensureArg(argc, argv, i);
+      std::string filePath = argv[++i];
+      if (filePath == "-") {
+        runConfig.outputType = OUTPUT_STDOUT;
+        runConfig.outputPath = nullopt;
+      } else {
+        runConfig.outputType = OUTPUT_FILE;
+        runConfig.outputPath = filesystem::path(filePath);
+      }
+    } else if (arg == "-d" || arg == "--output_dir" || arg == "output-dir") {
+      ensureArg(argc, argv, i);
+      runConfig.outputType = OUTPUT_DIRECTORY;
+      runConfig.outputPath = filesystem::path(argv[++i]);
+    } else if (arg == "--output_raw" || arg == "--output-raw") {
+      runConfig.outputType = OUTPUT_RAW;
+    } else if (arg == "-s" || arg == "--speaker") {
+      ensureArg(argc, argv, i);
+      runConfig.speakerId = (piper::SpeakerId)stol(argv[++i]);
+    } else if (arg == "--noise_scale" || arg == "--noise-scale") {
+      ensureArg(argc, argv, i);
+      runConfig.noiseScale = stof(argv[++i]);
+    } else if (arg == "--length_scale" || arg == "--length-scale") {
+      ensureArg(argc, argv, i);
+      runConfig.lengthScale = stof(argv[++i]);
+    } else if (arg == "--noise_w" || arg == "--noise-w") {
+      ensureArg(argc, argv, i);
+      runConfig.noiseW = stof(argv[++i]);
+    } else if (arg == "--sentence_silence" || arg == "--sentence-silence") {
+      ensureArg(argc, argv, i);
+      runConfig.sentenceSilenceSeconds = stof(argv[++i]);
+    } else if (arg == "--phoneme_silence" || arg == "--phoneme-silence") {
+      ensureArg(argc, argv, i);
+      ensureArg(argc, argv, i + 1);
+      auto phonemeStr = std::string(argv[++i]);
+      if (!piper::isSingleCodepoint(phonemeStr)) {
+        std::cerr << "Phoneme '" << phonemeStr
+                  << "' is not a single codepoint (--phoneme_silence)"
+                  << std::endl;
+        exit(1);
+      }
+
+      if (!runConfig.phonemeSilenceSeconds) {
+        runConfig.phonemeSilenceSeconds.emplace();
+      }
+
+      auto phoneme = piper::getCodepoint(phonemeStr);
+      (*runConfig.phonemeSilenceSeconds)[phoneme] = stof(argv[++i]);
+    } else if (arg == "--espeak_data" || arg == "--espeak-data") {
+      ensureArg(argc, argv, i);
+      runConfig.eSpeakDataPath = filesystem::path(argv[++i]);
+    } else if (arg == "--tashkeel_model" || arg == "--tashkeel-model") {
+      ensureArg(argc, argv, i);
+      runConfig.tashkeelModelPath = filesystem::path(argv[++i]);
+    } else if (arg == "--json_input" || arg == "--json-input") {
+      runConfig.jsonInput = true;
+    } else if (arg == "--use_cuda" || arg == "--use-cuda") {
+      runConfig.useCuda = true;
+    } else if (arg == "--version") {
+      std::cout << piper::getVersion() << std::endl;
+      exit(0);
+    } else if (arg == "--debug") {
+      // Set DEBUG logging
+      spdlog::set_level(spdlog::level::debug);
+    } else if (arg == "-q" || arg == "--quiet") {
+      // diable logging
+      spdlog::set_level(spdlog::level::off);
+    } else if (arg == "-h" || arg == "--help") {
+      printUsage(argv);
+      exit(0);
+    }
+  }
+
+  // Verify model file exists
+  ifstream modelFile(runConfig.modelPath.c_str(), ios::binary);
+  if (!modelFile.good()) {
+    throw runtime_error("Model file doesn't exist");
+  }
+
+  if (!modelConfigPath) {
+    runConfig.modelConfigPath =
+        filesystem::path(runConfig.modelPath.string() + ".json");
+  } else {
+    runConfig.modelConfigPath = modelConfigPath.value();
+  }
+
+  // Verify model config exists
+  ifstream modelConfigFile(runConfig.modelConfigPath.c_str());
+  if (!modelConfigFile.good()) {
+    throw runtime_error("Model config doesn't exist");
+  }
+}
--- a/mlu_370-piper/piper/src/cpp/piper.cpp
+++ b/mlu_370-piper/piper/src/cpp/piper.cpp
@@ -0,0 +1,636 @@
+#include <array>
+#include <chrono>
+#include <fstream>
+#include <limits>
+#include <sstream>
+#include <stdexcept>
+
+#include <espeak-ng/speak_lib.h>
+#include <onnxruntime_cxx_api.h>
+#include <spdlog/spdlog.h>
+
+#include "json.hpp"
+#include "piper.hpp"
+#include "utf8.h"
+#include "wavfile.hpp"
+
+namespace piper {
+
+#ifdef _PIPER_VERSION
+// https://stackoverflow.com/questions/47346133/how-to-use-a-define-inside-a-format-string
+#define _STR(x) #x
+#define STR(x) _STR(x)
+const std::string VERSION = STR(_PIPER_VERSION);
+#else
+const std::string VERSION = "";
+#endif
+
+// Maximum value for 16-bit signed WAV sample
+const float MAX_WAV_VALUE = 32767.0f;
+
+const std::string instanceName{"piper"};
+
+std::string getVersion() { return VERSION; }
+
+// True if the string is a single UTF-8 codepoint
+bool isSingleCodepoint(std::string s) {
+  return utf8::distance(s.begin(), s.end()) == 1;
+}
+
+// Get the first UTF-8 codepoint of a string
+Phoneme getCodepoint(std::string s) {
+  utf8::iterator character_iter(s.begin(), s.begin(), s.end());
+  return *character_iter;
+}
+
+// Load JSON config information for phonemization
+void parsePhonemizeConfig(json &configRoot, PhonemizeConfig &phonemizeConfig) {
+  // {
+  //     "espeak": {
+  //         "voice": "<language code>"
+  //     },
+  //     "phoneme_type": "<espeak or text>",
+  //     "phoneme_map": {
+  //         "<from phoneme>": ["<to phoneme 1>", "<to phoneme 2>", ...]
+  //     },
+  //     "phoneme_id_map": {
+  //         "<phoneme>": [<id1>, <id2>, ...]
+  //     }
+  // }
+
+  if (configRoot.contains("espeak")) {
+    auto espeakValue = configRoot["espeak"];
+    if (espeakValue.contains("voice")) {
+      phonemizeConfig.eSpeak.voice = espeakValue["voice"].get<std::string>();
+    }
+  }
+
+  if (configRoot.contains("phoneme_type")) {
+    auto phonemeTypeStr = configRoot["phoneme_type"].get<std::string>();
+    if (phonemeTypeStr == "text") {
+      phonemizeConfig.phonemeType = TextPhonemes;
+    }
+  }
+
+  // phoneme to [id] map
+  // Maps phonemes to one or more phoneme ids (required).
+  if (configRoot.contains("phoneme_id_map")) {
+    auto phonemeIdMapValue = configRoot["phoneme_id_map"];
+    for (auto &fromPhonemeItem : phonemeIdMapValue.items()) {
+      std::string fromPhoneme = fromPhonemeItem.key();
+      if (!isSingleCodepoint(fromPhoneme)) {
+        std::stringstream idsStr;
+        for (auto &toIdValue : fromPhonemeItem.value()) {
+          PhonemeId toId = toIdValue.get<PhonemeId>();
+          idsStr << toId << ",";
+        }
+
+        spdlog::error("\"{}\" is not a single codepoint (ids={})", fromPhoneme,
+                      idsStr.str());
+        throw std::runtime_error(
+            "Phonemes must be one codepoint (phoneme id map)");
+      }
+
+      auto fromCodepoint = getCodepoint(fromPhoneme);
+      for (auto &toIdValue : fromPhonemeItem.value()) {
+        PhonemeId toId = toIdValue.get<PhonemeId>();
+        phonemizeConfig.phonemeIdMap[fromCodepoint].push_back(toId);
+      }
+    }
+  }
+
+  // phoneme to [phoneme] map
+  // Maps phonemes to one or more other phonemes (not normally used).
+  if (configRoot.contains("phoneme_map")) {
+    if (!phonemizeConfig.phonemeMap) {
+      phonemizeConfig.phonemeMap.emplace();
+    }
+
+    auto phonemeMapValue = configRoot["phoneme_map"];
+    for (auto &fromPhonemeItem : phonemeMapValue.items()) {
+      std::string fromPhoneme = fromPhonemeItem.key();
+      if (!isSingleCodepoint(fromPhoneme)) {
+        spdlog::error("\"{}\" is not a single codepoint", fromPhoneme);
+        throw std::runtime_error(
+            "Phonemes must be one codepoint (phoneme map)");
+      }
+
+      auto fromCodepoint = getCodepoint(fromPhoneme);
+      for (auto &toPhonemeValue : fromPhonemeItem.value()) {
+        std::string toPhoneme = toPhonemeValue.get<std::string>();
+        if (!isSingleCodepoint(toPhoneme)) {
+          throw std::runtime_error(
+              "Phonemes must be one codepoint (phoneme map)");
+        }
+
+        auto toCodepoint = getCodepoint(toPhoneme);
+        (*phonemizeConfig.phonemeMap)[fromCodepoint].push_back(toCodepoint);
+      }
+    }
+  }
+
+} /* parsePhonemizeConfig */
+
+// Load JSON config for audio synthesis
+void parseSynthesisConfig(json &configRoot, SynthesisConfig &synthesisConfig) {
+  // {
+  //     "audio": {
+  //         "sample_rate": 22050
+  //     },
+  //     "inference": {
+  //         "noise_scale": 0.667,
+  //         "length_scale": 1,
+  //         "noise_w": 0.8,
+  //         "phoneme_silence": {
+  //           "<phoneme>": <seconds of silence>,
+  //           ...
+  //         }
+  //     }
+  // }
+
+  if (configRoot.contains("audio")) {
+    auto audioValue = configRoot["audio"];
+    if (audioValue.contains("sample_rate")) {
+      // Default sample rate is 22050 Hz
+      synthesisConfig.sampleRate = audioValue.value("sample_rate", 22050);
+    }
+  }
+
+  if (configRoot.contains("inference")) {
+    // Overrides default inference settings
+    auto inferenceValue = configRoot["inference"];
+    if (inferenceValue.contains("noise_scale")) {
+      synthesisConfig.noiseScale = inferenceValue.value("noise_scale", 0.667f);
+    }
+
+    if (inferenceValue.contains("length_scale")) {
+      synthesisConfig.lengthScale = inferenceValue.value("length_scale", 1.0f);
+    }
+
+    if (inferenceValue.contains("noise_w")) {
+      synthesisConfig.noiseW = inferenceValue.value("noise_w", 0.8f);
+    }
+
+    if (inferenceValue.contains("phoneme_silence")) {
+      // phoneme -> seconds of silence to add after
+      synthesisConfig.phonemeSilenceSeconds.emplace();
+      auto phonemeSilenceValue = inferenceValue["phoneme_silence"];
+      for (auto &phonemeItem : phonemeSilenceValue.items()) {
+        std::string phonemeStr = phonemeItem.key();
+        if (!isSingleCodepoint(phonemeStr)) {
+          spdlog::error("\"{}\" is not a single codepoint", phonemeStr);
+          throw std::runtime_error(
+              "Phonemes must be one codepoint (phoneme silence)");
+        }
+
+        auto phoneme = getCodepoint(phonemeStr);
+        (*synthesisConfig.phonemeSilenceSeconds)[phoneme] =
+            phonemeItem.value().get<float>();
+      }
+
+    } // if phoneme_silence
+
+  } // if inference
+
+} /* parseSynthesisConfig */
+
+void parseModelConfig(json &configRoot, ModelConfig &modelConfig) {
+
+  modelConfig.numSpeakers = configRoot["num_speakers"].get<SpeakerId>();
+
+  if (configRoot.contains("speaker_id_map")) {
+    if (!modelConfig.speakerIdMap) {
+      modelConfig.speakerIdMap.emplace();
+    }
+
+    auto speakerIdMapValue = configRoot["speaker_id_map"];
+    for (auto &speakerItem : speakerIdMapValue.items()) {
+      std::string speakerName = speakerItem.key();
+      (*modelConfig.speakerIdMap)[speakerName] =
+          speakerItem.value().get<SpeakerId>();
+    }
+  }
+
+} /* parseModelConfig */
+
+void initialize(PiperConfig &config) {
+  if (config.useESpeak) {
+    // Set up espeak-ng for calling espeak_TextToPhonemesWithTerminator
+    // See: https://github.com/rhasspy/espeak-ng
+    spdlog::debug("Initializing eSpeak");
+    int result = espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS,
+                                   /*buflength*/ 0,
+                                   /*path*/ config.eSpeakDataPath.c_str(),
+                                   /*options*/ 0);
+    if (result < 0) {
+      throw std::runtime_error("Failed to initialize eSpeak-ng");
+    }
+
+    spdlog::debug("Initialized eSpeak");
+  }
+
+  // Load onnx model for libtashkeel
+  // https://github.com/mush42/libtashkeel/
+  if (config.useTashkeel) {
+    spdlog::debug("Using libtashkeel for diacritization");
+    if (!config.tashkeelModelPath) {
+      throw std::runtime_error("No path to libtashkeel model");
+    }
+
+    spdlog::debug("Loading libtashkeel model from {}",
+                  config.tashkeelModelPath.value());
+    config.tashkeelState = std::make_unique<tashkeel::State>();
+    tashkeel::tashkeel_load(config.tashkeelModelPath.value(),
+                            *config.tashkeelState);
+    spdlog::debug("Initialized libtashkeel");
+  }
+
+  spdlog::info("Initialized piper");
+}
+
+void terminate(PiperConfig &config) {
+  if (config.useESpeak) {
+    // Clean up espeak-ng
+    spdlog::debug("Terminating eSpeak");
+    espeak_Terminate();
+    spdlog::debug("Terminated eSpeak");
+  }
+
+  spdlog::info("Terminated piper");
+}
+
+void loadModel(std::string modelPath, ModelSession &session, bool useCuda) {
+  spdlog::debug("Loading onnx model from {}", modelPath);
+  session.env = Ort::Env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING,
+                         instanceName.c_str());
+  session.env.DisableTelemetryEvents();
+
+  if (useCuda) {
+    // Use CUDA provider
+    OrtCUDAProviderOptions cuda_options{};
+    cuda_options.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchHeuristic;
+    session.options.AppendExecutionProvider_CUDA(cuda_options);
+  }
+
+  // Slows down performance by ~2x
+  // session.options.SetIntraOpNumThreads(1);
+
+  // Roughly doubles load time for no visible inference benefit
+  // session.options.SetGraphOptimizationLevel(
+  //     GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
+
+  session.options.SetGraphOptimizationLevel(
+      GraphOptimizationLevel::ORT_DISABLE_ALL);
+
+  // Slows down performance very slightly
+  // session.options.SetExecutionMode(ExecutionMode::ORT_PARALLEL);
+
+  session.options.DisableCpuMemArena();
+  session.options.DisableMemPattern();
+  session.options.DisableProfiling();
+
+  auto startTime = std::chrono::steady_clock::now();
+
+#ifdef _WIN32
+  auto modelPathW = std::wstring(modelPath.begin(), modelPath.end());
+  auto modelPathStr = modelPathW.c_str();
+#else
+  auto modelPathStr = modelPath.c_str();
+#endif
+
+  session.onnx = Ort::Session(session.env, modelPathStr, session.options);
+
+  auto endTime = std::chrono::steady_clock::now();
+  spdlog::debug("Loaded onnx model in {} second(s)",
+                std::chrono::duration<double>(endTime - startTime).count());
+}
+
+// Load Onnx model and JSON config file
+void loadVoice(PiperConfig &config, std::string modelPath,
+               std::string modelConfigPath, Voice &voice,
+               std::optional<SpeakerId> &speakerId, bool useCuda) {
+  spdlog::debug("Parsing voice config at {}", modelConfigPath);
+  std::ifstream modelConfigFile(modelConfigPath);
+  voice.configRoot = json::parse(modelConfigFile);
+
+  parsePhonemizeConfig(voice.configRoot, voice.phonemizeConfig);
+  parseSynthesisConfig(voice.configRoot, voice.synthesisConfig);
+  parseModelConfig(voice.configRoot, voice.modelConfig);
+
+  if (voice.modelConfig.numSpeakers > 1) {
+    // Multi-speaker model
+    if (speakerId) {
+      voice.synthesisConfig.speakerId = speakerId;
+    } else {
+      // Default speaker
+      voice.synthesisConfig.speakerId = 0;
+    }
+  }
+
+  spdlog::debug("Voice contains {} speaker(s)", voice.modelConfig.numSpeakers);
+
+  loadModel(modelPath, voice.session, useCuda);
+
+} /* loadVoice */
+
+// Phoneme ids to WAV audio
+void synthesize(std::vector<PhonemeId> &phonemeIds,
+                SynthesisConfig &synthesisConfig, ModelSession &session,
+                std::vector<int16_t> &audioBuffer, SynthesisResult &result) {
+  spdlog::debug("Synthesizing audio for {} phoneme id(s)", phonemeIds.size());
+
+  auto memoryInfo = Ort::MemoryInfo::CreateCpu(
+      OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
+
+  // Allocate
+  std::vector<int64_t> phonemeIdLengths{(int64_t)phonemeIds.size()};
+  std::vector<float> scales{synthesisConfig.noiseScale,
+                            synthesisConfig.lengthScale,
+                            synthesisConfig.noiseW};
+
+  std::vector<Ort::Value> inputTensors;
+  std::vector<int64_t> phonemeIdsShape{1, (int64_t)phonemeIds.size()};
+  inputTensors.push_back(Ort::Value::CreateTensor<int64_t>(
+      memoryInfo, phonemeIds.data(), phonemeIds.size(), phonemeIdsShape.data(),
+      phonemeIdsShape.size()));
+
+  std::vector<int64_t> phomemeIdLengthsShape{(int64_t)phonemeIdLengths.size()};
+  inputTensors.push_back(Ort::Value::CreateTensor<int64_t>(
+      memoryInfo, phonemeIdLengths.data(), phonemeIdLengths.size(),
+      phomemeIdLengthsShape.data(), phomemeIdLengthsShape.size()));
+
+  std::vector<int64_t> scalesShape{(int64_t)scales.size()};
+  inputTensors.push_back(
+      Ort::Value::CreateTensor<float>(memoryInfo, scales.data(), scales.size(),
+                                      scalesShape.data(), scalesShape.size()));
+
+  // Add speaker id.
+  // NOTE: These must be kept outside the "if" below to avoid being deallocated.
+  std::vector<int64_t> speakerId{
+      (int64_t)synthesisConfig.speakerId.value_or(0)};
+  std::vector<int64_t> speakerIdShape{(int64_t)speakerId.size()};
+
+  if (synthesisConfig.speakerId) {
+    inputTensors.push_back(Ort::Value::CreateTensor<int64_t>(
+        memoryInfo, speakerId.data(), speakerId.size(), speakerIdShape.data(),
+        speakerIdShape.size()));
+  }
+
+  // From export_onnx.py
+  std::array<const char *, 4> inputNames = {"input", "input_lengths", "scales",
+                                            "sid"};
+  std::array<const char *, 1> outputNames = {"output"};
+
+  // Infer
+  auto startTime = std::chrono::steady_clock::now();
+  auto outputTensors = session.onnx.Run(
+      Ort::RunOptions{nullptr}, inputNames.data(), inputTensors.data(),
+      inputTensors.size(), outputNames.data(), outputNames.size());
+  auto endTime = std::chrono::steady_clock::now();
+
+  if ((outputTensors.size() != 1) || (!outputTensors.front().IsTensor())) {
+    throw std::runtime_error("Invalid output tensors");
+  }
+  auto inferDuration = std::chrono::duration<double>(endTime - startTime);
+  result.inferSeconds = inferDuration.count();
+
+  const float *audio = outputTensors.front().GetTensorData<float>();
+  auto audioShape =
+      outputTensors.front().GetTensorTypeAndShapeInfo().GetShape();
+  int64_t audioCount = audioShape[audioShape.size() - 1];
+
+  result.audioSeconds = (double)audioCount / (double)synthesisConfig.sampleRate;
+  result.realTimeFactor = 0.0;
+  if (result.audioSeconds > 0) {
+    result.realTimeFactor = result.inferSeconds / result.audioSeconds;
+  }
+  spdlog::debug("Synthesized {} second(s) of audio in {} second(s)",
+                result.audioSeconds, result.inferSeconds);
+
+  // Get max audio value for scaling
+  float maxAudioValue = 0.01f;
+  for (int64_t i = 0; i < audioCount; i++) {
+    float audioValue = abs(audio[i]);
+    if (audioValue > maxAudioValue) {
+      maxAudioValue = audioValue;
+    }
+  }
+
+  // We know the size up front
+  audioBuffer.reserve(audioCount);
+
+  // Scale audio to fill range and convert to int16
+  float audioScale = (MAX_WAV_VALUE / std::max(0.01f, maxAudioValue));
+  for (int64_t i = 0; i < audioCount; i++) {
+    int16_t intAudioValue = static_cast<int16_t>(
+        std::clamp(audio[i] * audioScale,
+                   static_cast<float>(std::numeric_limits<int16_t>::min()),
+                   static_cast<float>(std::numeric_limits<int16_t>::max())));
+
+    audioBuffer.push_back(intAudioValue);
+  }
+
+  // Clean up
+  for (std::size_t i = 0; i < outputTensors.size(); i++) {
+    Ort::detail::OrtRelease(outputTensors[i].release());
+  }
+
+  for (std::size_t i = 0; i < inputTensors.size(); i++) {
+    Ort::detail::OrtRelease(inputTensors[i].release());
+  }
+}
+
+// ----------------------------------------------------------------------------
+
+// Phonemize text and synthesize audio
+void textToAudio(PiperConfig &config, Voice &voice, std::string text,
+                 std::vector<int16_t> &audioBuffer, SynthesisResult &result,
+                 const std::function<void()> &audioCallback) {
+
+  std::size_t sentenceSilenceSamples = 0;
+  if (voice.synthesisConfig.sentenceSilenceSeconds > 0) {
+    sentenceSilenceSamples = (std::size_t)(
+        voice.synthesisConfig.sentenceSilenceSeconds *
+        voice.synthesisConfig.sampleRate * voice.synthesisConfig.channels);
+  }
+
+  if (config.useTashkeel) {
+    if (!config.tashkeelState) {
+      throw std::runtime_error("Tashkeel model is not loaded");
+    }
+
+    spdlog::debug("Diacritizing text with libtashkeel: {}", text);
+    text = tashkeel::tashkeel_run(text, *config.tashkeelState);
+  }
+
+  // Phonemes for each sentence
+  spdlog::debug("Phonemizing text: {}", text);
+  std::vector<std::vector<Phoneme>> phonemes;
+
+  if (voice.phonemizeConfig.phonemeType == eSpeakPhonemes) {
+    // Use espeak-ng for phonemization
+    eSpeakPhonemeConfig eSpeakConfig;
+    eSpeakConfig.voice = voice.phonemizeConfig.eSpeak.voice;
+    phonemize_eSpeak(text, eSpeakConfig, phonemes);
+  } else {
+    // Use UTF-8 codepoints as "phonemes"
+    CodepointsPhonemeConfig codepointsConfig;
+    phonemize_codepoints(text, codepointsConfig, phonemes);
+  }
+
+  // Synthesize each sentence independently.
+  std::vector<PhonemeId> phonemeIds;
+  std::map<Phoneme, std::size_t> missingPhonemes;
+  for (auto phonemesIter = phonemes.begin(); phonemesIter != phonemes.end();
+       ++phonemesIter) {
+    std::vector<Phoneme> &sentencePhonemes = *phonemesIter;
+
+    if (spdlog::should_log(spdlog::level::debug)) {
+      // DEBUG log for phonemes
+      std::string phonemesStr;
+      for (auto phoneme : sentencePhonemes) {
+        utf8::append(phoneme, std::back_inserter(phonemesStr));
+      }
+
+      spdlog::debug("Converting {} phoneme(s) to ids: {}",
+                    sentencePhonemes.size(), phonemesStr);
+    }
+
+    std::vector<std::shared_ptr<std::vector<Phoneme>>> phrasePhonemes;
+    std::vector<SynthesisResult> phraseResults;
+    std::vector<size_t> phraseSilenceSamples;
+
+    // Use phoneme/id map from config
+    PhonemeIdConfig idConfig;
+    idConfig.phonemeIdMap =
+        std::make_shared<PhonemeIdMap>(voice.phonemizeConfig.phonemeIdMap);
+
+    if (voice.synthesisConfig.phonemeSilenceSeconds) {
+      // Split into phrases
+      std::map<Phoneme, float> &phonemeSilenceSeconds =
+          *voice.synthesisConfig.phonemeSilenceSeconds;
+
+      auto currentPhrasePhonemes = std::make_shared<std::vector<Phoneme>>();
+      phrasePhonemes.push_back(currentPhrasePhonemes);
+
+      for (auto sentencePhonemesIter = sentencePhonemes.begin();
+           sentencePhonemesIter != sentencePhonemes.end();
+           sentencePhonemesIter++) {
+        Phoneme &currentPhoneme = *sentencePhonemesIter;
+        currentPhrasePhonemes->push_back(currentPhoneme);
+
+        if (phonemeSilenceSeconds.count(currentPhoneme) > 0) {
+          // Split at phrase boundary
+          phraseSilenceSamples.push_back(
+              (std::size_t)(phonemeSilenceSeconds[currentPhoneme] *
+                            voice.synthesisConfig.sampleRate *
+                            voice.synthesisConfig.channels));
+
+          currentPhrasePhonemes = std::make_shared<std::vector<Phoneme>>();
+          phrasePhonemes.push_back(currentPhrasePhonemes);
+        }
+      }
+    } else {
+      // Use all phonemes
+      phrasePhonemes.push_back(
+          std::make_shared<std::vector<Phoneme>>(sentencePhonemes));
+    }
+
+    // Ensure results/samples are the same size
+    while (phraseResults.size() < phrasePhonemes.size()) {
+      phraseResults.emplace_back();
+    }
+
+    while (phraseSilenceSamples.size() < phrasePhonemes.size()) {
+      phraseSilenceSamples.push_back(0);
+    }
+
+    // phonemes -> ids -> audio
+    for (size_t phraseIdx = 0; phraseIdx < phrasePhonemes.size(); phraseIdx++) {
+      if (phrasePhonemes[phraseIdx]->size() <= 0) {
+        continue;
+      }
+
+      // phonemes -> ids
+      phonemes_to_ids(*(phrasePhonemes[phraseIdx]), idConfig, phonemeIds,
+                      missingPhonemes);
+      if (spdlog::should_log(spdlog::level::debug)) {
+        // DEBUG log for phoneme ids
+        std::stringstream phonemeIdsStr;
+        for (auto phonemeId : phonemeIds) {
+          phonemeIdsStr << phonemeId << ", ";
+        }
+
+        spdlog::debug("Converted {} phoneme(s) to {} phoneme id(s): {}",
+                      phrasePhonemes[phraseIdx]->size(), phonemeIds.size(),
+                      phonemeIdsStr.str());
+      }
+
+      // ids -> audio
+      synthesize(phonemeIds, voice.synthesisConfig, voice.session, audioBuffer,
+                 phraseResults[phraseIdx]);
+
+      // Add end of phrase silence
+      for (std::size_t i = 0; i < phraseSilenceSamples[phraseIdx]; i++) {
+        audioBuffer.push_back(0);
+      }
+
+      result.audioSeconds += phraseResults[phraseIdx].audioSeconds;
+      result.inferSeconds += phraseResults[phraseIdx].inferSeconds;
+
+      phonemeIds.clear();
+    }
+
+    // Add end of sentence silence
+    if (sentenceSilenceSamples > 0) {
+      for (std::size_t i = 0; i < sentenceSilenceSamples; i++) {
+        audioBuffer.push_back(0);
+      }
+    }
+
+    if (audioCallback) {
+      // Call back must copy audio since it is cleared afterwards.
+      audioCallback();
+      audioBuffer.clear();
+    }
+
+    phonemeIds.clear();
+  }
+
+  if (missingPhonemes.size() > 0) {
+    spdlog::warn("Missing {} phoneme(s) from phoneme/id map!",
+                 missingPhonemes.size());
+
+    for (auto phonemeCount : missingPhonemes) {
+      std::string phonemeStr;
+      utf8::append(phonemeCount.first, std::back_inserter(phonemeStr));
+      spdlog::warn("Missing \"{}\" (\\u{:04X}): {} time(s)", phonemeStr,
+                   (uint32_t)phonemeCount.first, phonemeCount.second);
+    }
+  }
+
+  if (result.audioSeconds > 0) {
+    result.realTimeFactor = result.inferSeconds / result.audioSeconds;
+  }
+
+} /* textToAudio */
+
+// Phonemize text and synthesize audio to WAV file
+void textToWavFile(PiperConfig &config, Voice &voice, std::string text,
+                   std::ostream &audioFile, SynthesisResult &result) {
+
+  std::vector<int16_t> audioBuffer;
+  textToAudio(config, voice, text, audioBuffer, result, NULL);
+
+  // Write WAV
+  auto synthesisConfig = voice.synthesisConfig;
+  writeWavHeader(synthesisConfig.sampleRate, synthesisConfig.sampleWidth,
+                 synthesisConfig.channels, (int32_t)audioBuffer.size(),
+                 audioFile);
+
+  audioFile.write((const char *)audioBuffer.data(),
+                  sizeof(int16_t) * audioBuffer.size());
+
+} /* textToWavFile */
+
+} // namespace piper
--- a/mlu_370-piper/piper/src/cpp/piper.hpp
+++ b/mlu_370-piper/piper/src/cpp/piper.hpp
@@ -0,0 +1,132 @@
+#ifndef PIPER_H_
+#define PIPER_H_
+
+#include <fstream>
+#include <functional>
+#include <map>
+#include <optional>
+#include <string>
+#include <vector>
+
+#include <onnxruntime_cxx_api.h>
+#include <piper-phonemize/phoneme_ids.hpp>
+#include <piper-phonemize/phonemize.hpp>
+#include <piper-phonemize/tashkeel.hpp>
+
+#include "json.hpp"
+
+using json = nlohmann::json;
+
+namespace piper {
+
+typedef int64_t SpeakerId;
+
+struct eSpeakConfig {
+  std::string voice = "en-us";
+};
+
+struct PiperConfig {
+  std::string eSpeakDataPath;
+  bool useESpeak = true;
+
+  bool useTashkeel = false;
+  std::optional<std::string> tashkeelModelPath;
+  std::unique_ptr<tashkeel::State> tashkeelState;
+};
+
+enum PhonemeType { eSpeakPhonemes, TextPhonemes };
+
+struct PhonemizeConfig {
+  PhonemeType phonemeType = eSpeakPhonemes;
+  std::optional<std::map<Phoneme, std::vector<Phoneme>>> phonemeMap;
+  std::map<Phoneme, std::vector<PhonemeId>> phonemeIdMap;
+
+  PhonemeId idPad = 0; // padding (optionally interspersed)
+  PhonemeId idBos = 1; // beginning of sentence
+  PhonemeId idEos = 2; // end of sentence
+  bool interspersePad = true;
+
+  eSpeakConfig eSpeak;
+};
+
+struct SynthesisConfig {
+  // VITS inference settings
+  float noiseScale = 0.667f;
+  float lengthScale = 1.0f;
+  float noiseW = 0.8f;
+
+  // Audio settings
+  int sampleRate = 22050;
+  int sampleWidth = 2; // 16-bit
+  int channels = 1;    // mono
+
+  // Speaker id from 0 to numSpeakers - 1
+  std::optional<SpeakerId> speakerId;
+
+  // Extra silence
+  float sentenceSilenceSeconds = 0.2f;
+  std::optional<std::map<piper::Phoneme, float>> phonemeSilenceSeconds;
+};
+
+struct ModelConfig {
+  int numSpeakers;
+
+  // speaker name -> id
+  std::optional<std::map<std::string, SpeakerId>> speakerIdMap;
+};
+
+struct ModelSession {
+  Ort::Session onnx;
+  Ort::AllocatorWithDefaultOptions allocator;
+  Ort::SessionOptions options;
+  Ort::Env env;
+
+  ModelSession() : onnx(nullptr){};
+};
+
+struct SynthesisResult {
+  double inferSeconds;
+  double audioSeconds;
+  double realTimeFactor;
+};
+
+struct Voice {
+  json configRoot;
+  PhonemizeConfig phonemizeConfig;
+  SynthesisConfig synthesisConfig;
+  ModelConfig modelConfig;
+  ModelSession session;
+};
+
+// True if the string is a single UTF-8 codepoint
+bool isSingleCodepoint(std::string s);
+
+// Get the first UTF-8 codepoint of a string
+Phoneme getCodepoint(std::string s);
+
+// Get version of Piper
+std::string getVersion();
+
+// Must be called before using textTo* functions
+void initialize(PiperConfig &config);
+
+// Clean up
+void terminate(PiperConfig &config);
+
+// Load Onnx model and JSON config file
+void loadVoice(PiperConfig &config, std::string modelPath,
+               std::string modelConfigPath, Voice &voice,
+               std::optional<SpeakerId> &speakerId, bool useCuda);
+
+// Phonemize text and synthesize audio
+void textToAudio(PiperConfig &config, Voice &voice, std::string text,
+                 std::vector<int16_t> &audioBuffer, SynthesisResult &result,
+                 const std::function<void()> &audioCallback);
+
+// Phonemize text and synthesize audio to WAV file
+void textToWavFile(PiperConfig &config, Voice &voice, std::string text,
+                   std::ostream &audioFile, SynthesisResult &result);
+
+} // namespace piper
+
+#endif // PIPER_H_
--- a/mlu_370-piper/piper/src/cpp/test.cpp
+++ b/mlu_370-piper/piper/src/cpp/test.cpp
@@ -0,0 +1,60 @@
+#include <fstream>
+#include <functional>
+#include <iostream>
+#include <optional>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include "json.hpp"
+#include "piper.hpp"
+
+using namespace std;
+using json = nlohmann::json;
+
+int main(int argc, char *argv[]) {
+  piper::PiperConfig piperConfig;
+  piper::Voice voice;
+
+  if (argc < 2) {
+    std::cerr << "Need voice model path" << std::endl;
+    return 1;
+  }
+
+  if (argc < 3) {
+    std::cerr << "Need espeak-ng-data path" << std::endl;
+    return 1;
+  }
+
+  if (argc < 4) {
+    std::cerr << "Need output WAV path" << std::endl;
+    return 1;
+  }
+
+  auto modelPath = std::string(argv[1]);
+  piperConfig.eSpeakDataPath = std::string(argv[2]);
+  auto outputPath = std::string(argv[3]);
+
+  optional<piper::SpeakerId> speakerId;
+  loadVoice(piperConfig, modelPath, modelPath + ".json", voice, speakerId,
+            false);
+  piper::initialize(piperConfig);
+
+  // Output audio to WAV file
+  ofstream audioFile(outputPath, ios::binary);
+
+  piper::SynthesisResult result;
+  piper::textToWavFile(piperConfig, voice, "This is a test.", audioFile,
+                       result);
+  piper::terminate(piperConfig);
+
+  // Verify that file has some data
+  if (audioFile.tellp() < 10000) {
+    std::cerr << "ERROR: Output file is smaller than expected!" << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  std::cout << "OK" << std::endl;
+
+  return EXIT_SUCCESS;
+}
--- a/mlu_370-piper/piper/src/cpp/utf8.h
+++ b/mlu_370-piper/piper/src/cpp/utf8.h
@@ -0,0 +1,34 @@
+// Copyright 2006 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include "utf8/checked.h"
+#include "utf8/unchecked.h"
+
+#endif // header guard
--- a/mlu_370-piper/piper/src/cpp/utf8/checked.h
+++ b/mlu_370-piper/piper/src/cpp/utf8/checked.h
@@ -0,0 +1,335 @@
+// Copyright 2006-2016 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include "core.h"
+#include <stdexcept>
+
+namespace utf8
+{
+    // Base for the exceptions that may be thrown from the library
+    class exception : public ::std::exception {
+    };
+
+    // Exceptions that may be thrown from the library functions.
+    class invalid_code_point : public exception {
+        uint32_t cp;
+    public:
+        invalid_code_point(uint32_t codepoint) : cp(codepoint) {}
+        virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Invalid code point"; }
+        uint32_t code_point() const {return cp;}
+    };
+
+    class invalid_utf8 : public exception {
+        uint8_t u8;
+    public:
+        invalid_utf8 (uint8_t u) : u8(u) {}
+        virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Invalid UTF-8"; }
+        uint8_t utf8_octet() const {return u8;}
+    };
+
+    class invalid_utf16 : public exception {
+        uint16_t u16;
+    public:
+        invalid_utf16 (uint16_t u) : u16(u) {}
+        virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Invalid UTF-16"; }
+        uint16_t utf16_word() const {return u16;}
+    };
+
+    class not_enough_room : public exception {
+    public:
+        virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Not enough space"; }
+    };
+
+    /// The library API - functions intended to be called by the users
+
+    template <typename octet_iterator>
+    octet_iterator append(uint32_t cp, octet_iterator result)
+    {
+        if (!utf8::internal::is_code_point_valid(cp))
+            throw invalid_code_point(cp);
+
+        if (cp < 0x80)                        // one octet
+            *(result++) = static_cast<uint8_t>(cp);
+        else if (cp < 0x800) {                // two octets
+            *(result++) = static_cast<uint8_t>((cp >> 6)            | 0xc0);
+            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
+        }
+        else if (cp < 0x10000) {              // three octets
+            *(result++) = static_cast<uint8_t>((cp >> 12)           | 0xe0);
+            *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f)   | 0x80);
+            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
+        }
+        else {                                // four octets
+            *(result++) = static_cast<uint8_t>((cp >> 18)           | 0xf0);
+            *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)  | 0x80);
+            *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f)   | 0x80);
+            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
+        }
+        return result;
+    }
+
+    template <typename octet_iterator, typename output_iterator>
+    output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement)
+    {
+        while (start != end) {
+            octet_iterator sequence_start = start;
+            internal::utf_error err_code = utf8::internal::validate_next(start, end);
+            switch (err_code) {
+                case internal::UTF8_OK :
+                    for (octet_iterator it = sequence_start; it != start; ++it)
+                        *out++ = *it;
+                    break;
+                case internal::NOT_ENOUGH_ROOM:
+                    out = utf8::append (replacement, out);
+                    start = end;
+                    break;
+                case internal::INVALID_LEAD:
+                    out = utf8::append (replacement, out);
+                    ++start;
+                    break;
+                case internal::INCOMPLETE_SEQUENCE:
+                case internal::OVERLONG_SEQUENCE:
+                case internal::INVALID_CODE_POINT:
+                    out = utf8::append (replacement, out);
+                    ++start;
+                    // just one replacement mark for the sequence
+                    while (start != end && utf8::internal::is_trail(*start))
+                        ++start;
+                    break;
+            }
+        }
+        return out;
+    }
+
+    template <typename octet_iterator, typename output_iterator>
+    inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out)
+    {
+        static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd);
+        return utf8::replace_invalid(start, end, out, replacement_marker);
+    }
+
+    template <typename octet_iterator>
+    uint32_t next(octet_iterator& it, octet_iterator end)
+    {
+        uint32_t cp = 0;
+        internal::utf_error err_code = utf8::internal::validate_next(it, end, cp);
+        switch (err_code) {
+            case internal::UTF8_OK :
+                break;
+            case internal::NOT_ENOUGH_ROOM :
+                throw not_enough_room();
+            case internal::INVALID_LEAD :
+            case internal::INCOMPLETE_SEQUENCE :
+            case internal::OVERLONG_SEQUENCE :
+                throw invalid_utf8(*it);
+            case internal::INVALID_CODE_POINT :
+                throw invalid_code_point(cp);
+        }
+        return cp;
+    }
+
+    template <typename octet_iterator>
+    uint32_t peek_next(octet_iterator it, octet_iterator end)
+    {
+        return utf8::next(it, end);
+    }
+
+    template <typename octet_iterator>
+    uint32_t prior(octet_iterator& it, octet_iterator start)
+    {
+        // can't do much if it == start
+        if (it == start)
+            throw not_enough_room();
+
+        octet_iterator end = it;
+        // Go back until we hit either a lead octet or start
+        while (utf8::internal::is_trail(*(--it)))
+            if (it == start)
+                throw invalid_utf8(*it); // error - no lead byte in the sequence
+        return utf8::peek_next(it, end);
+    }
+
+    template <typename octet_iterator, typename distance_type>
+    void advance (octet_iterator& it, distance_type n, octet_iterator end)
+    {
+        const distance_type zero(0);
+        if (n < zero) {
+            // backward
+            for (distance_type i = n; i < zero; ++i)
+                utf8::prior(it, end);
+        } else {
+            // forward
+            for (distance_type i = zero; i < n; ++i)
+                utf8::next(it, end);
+        }
+    }
+
+    template <typename octet_iterator>
+    typename std::iterator_traits<octet_iterator>::difference_type
+    distance (octet_iterator first, octet_iterator last)
+    {
+        typename std::iterator_traits<octet_iterator>::difference_type dist;
+        for (dist = 0; first < last; ++dist)
+            utf8::next(first, last);
+        return dist;
+    }
+
+    template <typename u16bit_iterator, typename octet_iterator>
+    octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
+    {
+        while (start != end) {
+            uint32_t cp = utf8::internal::mask16(*start++);
+            // Take care of surrogate pairs first
+            if (utf8::internal::is_lead_surrogate(cp)) {
+                if (start != end) {
+                    uint32_t trail_surrogate = utf8::internal::mask16(*start++);
+                    if (utf8::internal::is_trail_surrogate(trail_surrogate))
+                        cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
+                    else
+                        throw invalid_utf16(static_cast<uint16_t>(trail_surrogate));
+                }
+                else
+                    throw invalid_utf16(static_cast<uint16_t>(cp));
+
+            }
+            // Lone trail surrogate
+            else if (utf8::internal::is_trail_surrogate(cp))
+                throw invalid_utf16(static_cast<uint16_t>(cp));
+
+            result = utf8::append(cp, result);
+        }
+        return result;
+    }
+
+    template <typename u16bit_iterator, typename octet_iterator>
+    u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
+    {
+        while (start < end) {
+            uint32_t cp = utf8::next(start, end);
+            if (cp > 0xffff) { //make a surrogate pair
+                *result++ = static_cast<uint16_t>((cp >> 10)   + internal::LEAD_OFFSET);
+                *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
+            }
+            else
+                *result++ = static_cast<uint16_t>(cp);
+        }
+        return result;
+    }
+
+    template <typename octet_iterator, typename u32bit_iterator>
+    octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
+    {
+        while (start != end)
+            result = utf8::append(*(start++), result);
+
+        return result;
+    }
+
+    template <typename octet_iterator, typename u32bit_iterator>
+    u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
+    {
+        while (start < end)
+            (*result++) = utf8::next(start, end);
+
+        return result;
+    }
+
+    // The iterator class
+    template <typename octet_iterator>
+    class iterator {
+      octet_iterator it;
+      octet_iterator range_start;
+      octet_iterator range_end;
+      public:
+      typedef uint32_t value_type;
+      typedef uint32_t* pointer;
+      typedef uint32_t& reference;
+      typedef std::ptrdiff_t difference_type;
+      typedef std::bidirectional_iterator_tag iterator_category;
+      iterator () {}
+      explicit iterator (const octet_iterator& octet_it,
+                         const octet_iterator& rangestart,
+                         const octet_iterator& rangeend) :
+               it(octet_it), range_start(rangestart), range_end(rangeend)
+      {
+          if (it < range_start || it > range_end)
+              throw std::out_of_range("Invalid utf-8 iterator position");
+      }
+      // the default "big three" are OK
+      octet_iterator base () const { return it; }
+      uint32_t operator * () const
+      {
+          octet_iterator temp = it;
+          return utf8::next(temp, range_end);
+      }
+      bool operator == (const iterator& rhs) const
+      {
+          if (range_start != rhs.range_start || range_end != rhs.range_end)
+              throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
+          return (it == rhs.it);
+      }
+      bool operator != (const iterator& rhs) const
+      {
+          return !(operator == (rhs));
+      }
+      iterator& operator ++ ()
+      {
+          utf8::next(it, range_end);
+          return *this;
+      }
+      iterator operator ++ (int)
+      {
+          iterator temp = *this;
+          utf8::next(it, range_end);
+          return temp;
+      }
+      iterator& operator -- ()
+      {
+          utf8::prior(it, range_start);
+          return *this;
+      }
+      iterator operator -- (int)
+      {
+          iterator temp = *this;
+          utf8::prior(it, range_start);
+          return temp;
+      }
+    }; // class iterator
+
+} // namespace utf8
+
+#if UTF_CPP_CPLUSPLUS >= 201703L // C++ 17 or later
+#include "cpp17.h"
+#elif UTF_CPP_CPLUSPLUS >= 201103L // C++ 11 or later
+#include "cpp11.h"
+#endif // C++ 11 or later
+
+#endif //header guard
+
--- a/mlu_370-piper/piper/src/cpp/utf8/core.h
+++ b/mlu_370-piper/piper/src/cpp/utf8/core.h
@@ -0,0 +1,338 @@
+// Copyright 2006 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include <iterator>
+
+// Determine the C++ standard version.
+// If the user defines UTF_CPP_CPLUSPLUS, use that.
+// Otherwise, trust the unreliable predefined macro __cplusplus
+
+#if !defined UTF_CPP_CPLUSPLUS
+    #define UTF_CPP_CPLUSPLUS __cplusplus
+#endif
+
+#if UTF_CPP_CPLUSPLUS >= 201103L // C++ 11 or later
+    #define UTF_CPP_OVERRIDE override
+    #define UTF_CPP_NOEXCEPT noexcept
+#else // C++ 98/03
+    #define UTF_CPP_OVERRIDE
+    #define UTF_CPP_NOEXCEPT throw()
+#endif // C++ 11 or later
+
+
+namespace utf8
+{
+    // The typedefs for 8-bit, 16-bit and 32-bit unsigned integers
+    // You may need to change them to match your system.
+    // These typedefs have the same names as ones from cstdint, or boost/cstdint
+    typedef unsigned char   uint8_t;
+    typedef unsigned short  uint16_t;
+    typedef unsigned int    uint32_t;
+
+// Helper code - not intended to be directly called by the library users. May be changed at any time
+namespace internal
+{
+    // Unicode constants
+    // Leading (high) surrogates: 0xd800 - 0xdbff
+    // Trailing (low) surrogates: 0xdc00 - 0xdfff
+    const uint16_t LEAD_SURROGATE_MIN  = 0xd800u;
+    const uint16_t LEAD_SURROGATE_MAX  = 0xdbffu;
+    const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u;
+    const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu;
+    const uint16_t LEAD_OFFSET         = 0xd7c0u;       // LEAD_SURROGATE_MIN - (0x10000 >> 10)
+    const uint32_t SURROGATE_OFFSET    = 0xfca02400u;   // 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN
+
+    // Maximum valid value for a Unicode code point
+    const uint32_t CODE_POINT_MAX      = 0x0010ffffu;
+
+    template<typename octet_type>
+    inline uint8_t mask8(octet_type oc)
+    {
+        return static_cast<uint8_t>(0xff & oc);
+    }
+    template<typename u16_type>
+    inline uint16_t mask16(u16_type oc)
+    {
+        return static_cast<uint16_t>(0xffff & oc);
+    }
+    template<typename octet_type>
+    inline bool is_trail(octet_type oc)
+    {
+        return ((utf8::internal::mask8(oc) >> 6) == 0x2);
+    }
+
+    template <typename u16>
+    inline bool is_lead_surrogate(u16 cp)
+    {
+        return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX);
+    }
+
+    template <typename u16>
+    inline bool is_trail_surrogate(u16 cp)
+    {
+        return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
+    }
+
+    template <typename u16>
+    inline bool is_surrogate(u16 cp)
+    {
+        return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
+    }
+
+    template <typename u32>
+    inline bool is_code_point_valid(u32 cp)
+    {
+        return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp));
+    }
+
+    template <typename octet_iterator>
+    inline typename std::iterator_traits<octet_iterator>::difference_type
+    sequence_length(octet_iterator lead_it)
+    {
+        uint8_t lead = utf8::internal::mask8(*lead_it);
+        if (lead < 0x80)
+            return 1;
+        else if ((lead >> 5) == 0x6)
+            return 2;
+        else if ((lead >> 4) == 0xe)
+            return 3;
+        else if ((lead >> 3) == 0x1e)
+            return 4;
+        else
+            return 0;
+    }
+
+    template <typename octet_difference_type>
+    inline bool is_overlong_sequence(uint32_t cp, octet_difference_type length)
+    {
+        if (cp < 0x80) {
+            if (length != 1) 
+                return true;
+        }
+        else if (cp < 0x800) {
+            if (length != 2) 
+                return true;
+        }
+        else if (cp < 0x10000) {
+            if (length != 3) 
+                return true;
+        }
+
+        return false;
+    }
+
+    enum utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT};
+
+    /// Helper for get_sequence_x
+    template <typename octet_iterator>
+    utf_error increase_safely(octet_iterator& it, octet_iterator end)
+    {
+        if (++it == end)
+            return NOT_ENOUGH_ROOM;
+
+        if (!utf8::internal::is_trail(*it))
+            return INCOMPLETE_SEQUENCE;
+
+        return UTF8_OK;
+    }
+
+    #define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;}    
+
+    /// get_sequence_x functions decode utf-8 sequences of the length x
+    template <typename octet_iterator>
+    utf_error get_sequence_1(octet_iterator& it, octet_iterator end, uint32_t& code_point)
+    {
+        if (it == end)
+            return NOT_ENOUGH_ROOM;
+
+        code_point = utf8::internal::mask8(*it);
+
+        return UTF8_OK;
+    }
+
+    template <typename octet_iterator>
+    utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t& code_point)
+    {
+        if (it == end) 
+            return NOT_ENOUGH_ROOM;
+
+        code_point = utf8::internal::mask8(*it);
+
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+        code_point = ((code_point << 6) & 0x7ff) + ((*it) & 0x3f);
+
+        return UTF8_OK;
+    }
+
+    template <typename octet_iterator>
+    utf_error get_sequence_3(octet_iterator& it, octet_iterator end, uint32_t& code_point)
+    {
+        if (it == end)
+            return NOT_ENOUGH_ROOM;
+            
+        code_point = utf8::internal::mask8(*it);
+
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+        code_point = ((code_point << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
+
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+        code_point += (*it) & 0x3f;
+
+        return UTF8_OK;
+    }
+
+    template <typename octet_iterator>
+    utf_error get_sequence_4(octet_iterator& it, octet_iterator end, uint32_t& code_point)
+    {
+        if (it == end)
+           return NOT_ENOUGH_ROOM;
+
+        code_point = utf8::internal::mask8(*it);
+
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+        code_point = ((code_point << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
+
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+        code_point += (utf8::internal::mask8(*it) << 6) & 0xfff;
+
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+        code_point += (*it) & 0x3f;
+
+        return UTF8_OK;
+    }
+
+    #undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR
+
+    template <typename octet_iterator>
+    utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point)
+    {
+        if (it == end)
+            return NOT_ENOUGH_ROOM;
+
+        // Save the original value of it so we can go back in case of failure
+        // Of course, it does not make much sense with i.e. stream iterators
+        octet_iterator original_it = it;
+
+        uint32_t cp = 0;
+        // Determine the sequence length based on the lead octet
+        typedef typename std::iterator_traits<octet_iterator>::difference_type octet_difference_type;
+        const octet_difference_type length = utf8::internal::sequence_length(it);
+
+        // Get trail octets and calculate the code point
+        utf_error err = UTF8_OK;
+        switch (length) {
+            case 0:
+                return INVALID_LEAD;
+            case 1:
+                err = utf8::internal::get_sequence_1(it, end, cp);
+                break;
+            case 2:
+                err = utf8::internal::get_sequence_2(it, end, cp);
+            break;
+            case 3:
+                err = utf8::internal::get_sequence_3(it, end, cp);
+            break;
+            case 4:
+                err = utf8::internal::get_sequence_4(it, end, cp);
+            break;
+        }
+
+        if (err == UTF8_OK) {
+            // Decoding succeeded. Now, security checks...
+            if (utf8::internal::is_code_point_valid(cp)) {
+                if (!utf8::internal::is_overlong_sequence(cp, length)){
+                    // Passed! Return here.
+                    code_point = cp;
+                    ++it;
+                    return UTF8_OK;
+                }
+                else
+                    err = OVERLONG_SEQUENCE;
+            }
+            else 
+                err = INVALID_CODE_POINT;
+        }
+
+        // Failure branch - restore the original value of the iterator
+        it = original_it;
+        return err;
+    }
+
+    template <typename octet_iterator>
+    inline utf_error validate_next(octet_iterator& it, octet_iterator end) {
+        uint32_t ignored;
+        return utf8::internal::validate_next(it, end, ignored);
+    }
+
+} // namespace internal
+
+    /// The library API - functions intended to be called by the users
+
+    // Byte order mark
+    const uint8_t bom[] = {0xef, 0xbb, 0xbf};
+
+    template <typename octet_iterator>
+    octet_iterator find_invalid(octet_iterator start, octet_iterator end)
+    {
+        octet_iterator result = start;
+        while (result != end) {
+            utf8::internal::utf_error err_code = utf8::internal::validate_next(result, end);
+            if (err_code != internal::UTF8_OK)
+                return result;
+        }
+        return result;
+    }
+
+    template <typename octet_iterator>
+    inline bool is_valid(octet_iterator start, octet_iterator end)
+    {
+        return (utf8::find_invalid(start, end) == end);
+    }
+
+    template <typename octet_iterator>
+    inline bool starts_with_bom (octet_iterator it, octet_iterator end)
+    {
+        return (
+            ((it != end) && (utf8::internal::mask8(*it++)) == bom[0]) &&
+            ((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) &&
+            ((it != end) && (utf8::internal::mask8(*it))   == bom[2])
+           );
+    }	
+} // namespace utf8
+
+#endif // header guard
+
+
--- a/mlu_370-piper/piper/src/cpp/utf8/cpp11.h
+++ b/mlu_370-piper/piper/src/cpp/utf8/cpp11.h
@@ -0,0 +1,103 @@
+// Copyright 2018 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_a184c22c_d012_11e8_a8d5_f2801f1b9fd1
+#define UTF8_FOR_CPP_a184c22c_d012_11e8_a8d5_f2801f1b9fd1
+
+#include "checked.h"
+#include <string>
+
+namespace utf8
+{
+
+    inline void append(char32_t cp, std::string& s)
+    {
+        append(uint32_t(cp), std::back_inserter(s));
+    }
+
+    inline std::string utf16to8(const std::u16string& s)
+    {
+        std::string result;
+        utf16to8(s.begin(), s.end(), std::back_inserter(result));
+        return result;
+    }
+
+    inline std::u16string utf8to16(const std::string& s)
+    {
+        std::u16string result;
+        utf8to16(s.begin(), s.end(), std::back_inserter(result));
+        return result;
+    }
+
+    inline std::string utf32to8(const std::u32string& s)
+    {
+        std::string result;
+        utf32to8(s.begin(), s.end(), std::back_inserter(result));
+        return result;
+    }
+
+    inline std::u32string utf8to32(const std::string& s)
+    {
+        std::u32string result;
+        utf8to32(s.begin(), s.end(), std::back_inserter(result));
+        return result;
+    }
+
+    inline std::size_t find_invalid(const std::string& s)
+    {
+        std::string::const_iterator invalid = find_invalid(s.begin(), s.end());
+        return (invalid == s.end()) ? std::string::npos : (invalid - s.begin());
+    }
+
+    inline bool is_valid(const std::string& s)
+    {
+        return is_valid(s.begin(), s.end());
+    }
+
+    inline std::string replace_invalid(const std::string& s, char32_t replacement)
+    {
+        std::string result;
+        replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement);
+        return result;
+    }
+
+    inline std::string replace_invalid(const std::string& s)
+    {
+        std::string result;
+        replace_invalid(s.begin(), s.end(), std::back_inserter(result));
+        return result;
+    }
+
+    inline bool starts_with_bom(const std::string& s)
+    {
+        return starts_with_bom(s.begin(), s.end());
+    }
+ 
+} // namespace utf8
+
+#endif // header guard
+
--- a/mlu_370-piper/piper/src/cpp/utf8/cpp17.h
+++ b/mlu_370-piper/piper/src/cpp/utf8/cpp17.h
@@ -0,0 +1,103 @@
+// Copyright 2018 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_7e906c01_03a3_4daf_b420_ea7ea952b3c9
+#define UTF8_FOR_CPP_7e906c01_03a3_4daf_b420_ea7ea952b3c9
+
+#include "checked.h"
+#include <string>
+
+namespace utf8
+{
+
+    inline void append(char32_t cp, std::string& s)
+    {
+        append(uint32_t(cp), std::back_inserter(s));
+    }
+
+    inline std::string utf16to8(std::u16string_view s)
+    {
+        std::string result;
+        utf16to8(s.begin(), s.end(), std::back_inserter(result));
+        return result;
+    }
+
+    inline std::u16string utf8to16(std::string_view s)
+    {
+        std::u16string result;
+        utf8to16(s.begin(), s.end(), std::back_inserter(result));
+        return result;
+    }
+
+    inline std::string utf32to8(std::u32string_view s)
+    {
+        std::string result;
+        utf32to8(s.begin(), s.end(), std::back_inserter(result));
+        return result;
+    }
+
+    inline std::u32string utf8to32(std::string_view s)
+    {
+        std::u32string result;
+        utf8to32(s.begin(), s.end(), std::back_inserter(result));
+        return result;
+    }
+
+    inline std::size_t find_invalid(std::string_view s)
+    {
+        std::string_view::const_iterator invalid = find_invalid(s.begin(), s.end());
+        return (invalid == s.end()) ? std::string_view::npos : (invalid - s.begin());
+    }
+
+    inline bool is_valid(std::string_view s)
+    {
+        return is_valid(s.begin(), s.end());
+    }
+
+    inline std::string replace_invalid(std::string_view s, char32_t replacement)
+    {
+        std::string result;
+        replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement);
+        return result;
+    }
+
+    inline std::string replace_invalid(std::string_view s)
+    {
+        std::string result;
+        replace_invalid(s.begin(), s.end(), std::back_inserter(result));
+        return result;
+    }
+
+    inline bool starts_with_bom(std::string_view s)
+    {
+        return starts_with_bom(s.begin(), s.end());
+    }
+ 
+} // namespace utf8
+
+#endif // header guard
+
--- a/mlu_370-piper/piper/src/cpp/utf8/unchecked.h
+++ b/mlu_370-piper/piper/src/cpp/utf8/unchecked.h
@@ -0,0 +1,274 @@
+// Copyright 2006 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include "core.h"
+
+namespace utf8
+{
+    namespace unchecked
+    {
+        template <typename octet_iterator>
+        octet_iterator append(uint32_t cp, octet_iterator result)
+        {
+            if (cp < 0x80)                        // one octet
+                *(result++) = static_cast<uint8_t>(cp);
+            else if (cp < 0x800) {                // two octets
+                *(result++) = static_cast<uint8_t>((cp >> 6)          | 0xc0);
+                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
+            }
+            else if (cp < 0x10000) {              // three octets
+                *(result++) = static_cast<uint8_t>((cp >> 12)         | 0xe0);
+                *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
+                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
+            }
+            else {                                // four octets
+                *(result++) = static_cast<uint8_t>((cp >> 18)         | 0xf0);
+                *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)| 0x80);
+                *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
+                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
+            }
+            return result;
+        }
+
+        template <typename octet_iterator, typename output_iterator>
+        output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement)
+        {
+            while (start != end) {
+                octet_iterator sequence_start = start;
+                internal::utf_error err_code = utf8::internal::validate_next(start, end);
+                switch (err_code) {
+                    case internal::UTF8_OK :
+                        for (octet_iterator it = sequence_start; it != start; ++it)
+                            *out++ = *it;
+                        break;
+                    case internal::NOT_ENOUGH_ROOM:
+                        out = utf8::unchecked::append (replacement, out);
+                        start = end;
+                        break;
+                    case internal::INVALID_LEAD:
+                        out = utf8::unchecked::append (replacement, out);
+                        ++start;
+                        break;
+                    case internal::INCOMPLETE_SEQUENCE:
+                    case internal::OVERLONG_SEQUENCE:
+                    case internal::INVALID_CODE_POINT:
+                        out = utf8::unchecked::append (replacement, out);
+                        ++start;
+                        // just one replacement mark for the sequence
+                        while (start != end && utf8::internal::is_trail(*start))
+                            ++start;
+                        break;
+                }
+            }
+            return out;
+        }
+
+        template <typename octet_iterator, typename output_iterator>
+        inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out)
+        {
+            static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd);
+            return utf8::unchecked::replace_invalid(start, end, out, replacement_marker);
+        }
+
+        template <typename octet_iterator>
+        uint32_t next(octet_iterator& it)
+        {
+            uint32_t cp = utf8::internal::mask8(*it);
+            typename std::iterator_traits<octet_iterator>::difference_type length = utf8::internal::sequence_length(it);
+            switch (length) {
+                case 1:
+                    break;
+                case 2:
+                    it++;
+                    cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
+                    break;
+                case 3:
+                    ++it; 
+                    cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
+                    ++it;
+                    cp += (*it) & 0x3f;
+                    break;
+                case 4:
+                    ++it;
+                    cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);                
+                    ++it;
+                    cp += (utf8::internal::mask8(*it) << 6) & 0xfff;
+                    ++it;
+                    cp += (*it) & 0x3f; 
+                    break;
+            }
+            ++it;
+            return cp;
+        }
+
+        template <typename octet_iterator>
+        uint32_t peek_next(octet_iterator it)
+        {
+            return utf8::unchecked::next(it);
+        }
+
+        template <typename octet_iterator>
+        uint32_t prior(octet_iterator& it)
+        {
+            while (utf8::internal::is_trail(*(--it))) ;
+            octet_iterator temp = it;
+            return utf8::unchecked::next(temp);
+        }
+
+        template <typename octet_iterator, typename distance_type>
+        void advance (octet_iterator& it, distance_type n)
+        {
+            const distance_type zero(0);
+            if (n < zero) {
+                // backward
+                for (distance_type i = n; i < zero; ++i)
+                    utf8::unchecked::prior(it);
+            } else {
+                // forward
+                for (distance_type i = zero; i < n; ++i)
+                    utf8::unchecked::next(it);
+            }
+        }
+
+        template <typename octet_iterator>
+        typename std::iterator_traits<octet_iterator>::difference_type
+        distance (octet_iterator first, octet_iterator last)
+        {
+            typename std::iterator_traits<octet_iterator>::difference_type dist;
+            for (dist = 0; first < last; ++dist) 
+                utf8::unchecked::next(first);
+            return dist;
+        }
+
+        template <typename u16bit_iterator, typename octet_iterator>
+        octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
+        {
+            while (start != end) {
+                uint32_t cp = utf8::internal::mask16(*start++);
+            // Take care of surrogate pairs first
+                if (utf8::internal::is_lead_surrogate(cp)) {
+                    uint32_t trail_surrogate = utf8::internal::mask16(*start++);
+                    cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
+                }
+                result = utf8::unchecked::append(cp, result);
+            }
+            return result;
+        }
+
+        template <typename u16bit_iterator, typename octet_iterator>
+        u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
+        {
+            while (start < end) {
+                uint32_t cp = utf8::unchecked::next(start);
+                if (cp > 0xffff) { //make a surrogate pair
+                    *result++ = static_cast<uint16_t>((cp >> 10)   + internal::LEAD_OFFSET);
+                    *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
+                }
+                else
+                    *result++ = static_cast<uint16_t>(cp);
+            }
+            return result;
+        }
+
+        template <typename octet_iterator, typename u32bit_iterator>
+        octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
+        {
+            while (start != end)
+                result = utf8::unchecked::append(*(start++), result);
+
+            return result;
+        }
+
+        template <typename octet_iterator, typename u32bit_iterator>
+        u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
+        {
+            while (start < end)
+                (*result++) = utf8::unchecked::next(start);
+
+            return result;
+        }
+
+        // The iterator class
+        template <typename octet_iterator>
+          class iterator {
+            octet_iterator it;
+            public:
+            typedef uint32_t value_type;
+            typedef uint32_t* pointer;
+            typedef uint32_t& reference;
+            typedef std::ptrdiff_t difference_type;
+            typedef std::bidirectional_iterator_tag iterator_category;
+            iterator () {}
+            explicit iterator (const octet_iterator& octet_it): it(octet_it) {}
+            // the default "big three" are OK
+            octet_iterator base () const { return it; }
+            uint32_t operator * () const
+            {
+                octet_iterator temp = it;
+                return utf8::unchecked::next(temp);
+            }
+            bool operator == (const iterator& rhs) const 
+            { 
+                return (it == rhs.it);
+            }
+            bool operator != (const iterator& rhs) const
+            {
+                return !(operator == (rhs));
+            }
+            iterator& operator ++ () 
+            {
+                ::std::advance(it, utf8::internal::sequence_length(it));
+                return *this;
+            }
+            iterator operator ++ (int)
+            {
+                iterator temp = *this;
+                ::std::advance(it, utf8::internal::sequence_length(it));
+                return temp;
+            }  
+            iterator& operator -- ()
+            {
+                utf8::unchecked::prior(it);
+                return *this;
+            }
+            iterator operator -- (int)
+            {
+                iterator temp = *this;
+                utf8::unchecked::prior(it);
+                return temp;
+            }
+          }; // class iterator
+
+    } // namespace utf8::unchecked
+} // namespace utf8 
+
+
+#endif // header guard
+
--- a/mlu_370-piper/piper/src/cpp/wavfile.hpp
+++ b/mlu_370-piper/piper/src/cpp/wavfile.hpp
@@ -0,0 +1,40 @@
+#ifndef WAVFILE_H_
+#define WAVFILE_H_
+
+#include <iostream>
+
+struct WavHeader {
+  uint8_t RIFF[4] = {'R', 'I', 'F', 'F'};
+  uint32_t chunkSize;
+  uint8_t WAVE[4] = {'W', 'A', 'V', 'E'};
+
+  // fmt
+  uint8_t fmt[4] = {'f', 'm', 't', ' '};
+  uint32_t fmtSize = 16;    // bytes
+  uint16_t audioFormat = 1; // PCM
+  uint16_t numChannels;     // mono
+  uint32_t sampleRate;      // Hertz
+  uint32_t bytesPerSec;     // sampleRate * sampleWidth
+  uint16_t blockAlign = 2;  // 16-bit mono
+  uint16_t bitsPerSample = 16;
+
+  // data
+  uint8_t data[4] = {'d', 'a', 't', 'a'};
+  uint32_t dataSize;
+};
+
+// Write WAV file header only
+void writeWavHeader(int sampleRate, int sampleWidth, int channels,
+                    uint32_t numSamples, std::ostream &audioFile) {
+  WavHeader header;
+  header.dataSize = numSamples * sampleWidth * channels;
+  header.chunkSize = header.dataSize + sizeof(WavHeader) - 8;
+  header.sampleRate = sampleRate;
+  header.numChannels = channels;
+  header.bytesPerSec = sampleRate * sampleWidth * channels;
+  header.blockAlign = sampleWidth * channels;
+  audioFile.write(reinterpret_cast<const char *>(&header), sizeof(header));
+
+} /* writeWavHeader */
+
+#endif // WAVFILE_H_