init

2025-09-10 10:56:53 +08:00
commit 1df95ad2f6
606 changed files with 590904 additions and 0 deletions
--- a/mr_v100-piper/piper/src/python_run/.gitignore
+++ b/mr_v100-piper/piper/src/python_run/.gitignore
@@ -0,0 +1,3 @@
+build/
+dist/
+*.egg-info/
--- a/mr_v100-piper/piper/src/python_run/.isort.cfg
+++ b/mr_v100-piper/piper/src/python_run/.isort.cfg
@@ -0,0 +1,6 @@
+[settings]
+multi_line_output=3
+include_trailing_comma=True
+force_grid_wrap=0
+use_parentheses=True
+line_length=88
--- a/mr_v100-piper/piper/src/python_run/MANIFEST.in
+++ b/mr_v100-piper/piper/src/python_run/MANIFEST.in
@@ -0,0 +1,2 @@
+include requirements.txt
+include piper/voices.json
--- a/mr_v100-piper/piper/src/python_run/README_http.md
+++ b/mr_v100-piper/piper/src/python_run/README_http.md
@@ -0,0 +1,27 @@
+# Piper HTTP Server
+
+Install the requirements into your virtual environment:
+
+```sh
+.venv/bin/pip3 install -r requirements_http.txt
+```
+
+Run the web server:
+
+```sh
+.venv/bin/python3 -m piper.http_server --model ...
+```
+
+See `--help` for more options.
+
+Using a `GET` request:
+
+```sh
+curl -G --data-urlencode 'text=This is a test.' -o test.wav 'localhost:5000'
+```
+
+Using a `POST` request:
+
+```sh
+curl -X POST -H 'Content-Type: text/plain' --data 'This is a test.' -o test.wav 'localhost:5000'
+```
--- a/mr_v100-piper/piper/src/python_run/mypy.ini
+++ b/mr_v100-piper/piper/src/python_run/mypy.ini
@@ -0,0 +1,7 @@
+[mypy]
+
+[mypy-onnxruntime.*]
+ignore_missing_imports = True
+
+[mypy-piper_phonemize.*]
+ignore_missing_imports = True
--- a/mr_v100-piper/piper/src/python_run/piper/init.py
+++ b/mr_v100-piper/piper/src/python_run/piper/init.py
@@ -0,0 +1,5 @@
+from .voice import PiperVoice
+
+__all__ = [
+    "PiperVoice",
+]
--- a/mr_v100-piper/piper/src/python_run/piper/main.py
+++ b/mr_v100-piper/piper/src/python_run/piper/main.py
@@ -0,0 +1,159 @@
+import argparse
+import logging
+import sys
+import time
+import wave
+from pathlib import Path
+from typing import Any, Dict
+
+from . import PiperVoice
+from .download import ensure_voice_exists, find_voice, get_voices
+
+_FILE = Path(__file__)
+_DIR = _FILE.parent
+_LOGGER = logging.getLogger(_FILE.stem)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-m", "--model", required=True, help="Path to Onnx model file")
+    parser.add_argument("-c", "--config", help="Path to model config file")
+    parser.add_argument(
+        "-f",
+        "--output-file",
+        "--output_file",
+        help="Path to output WAV file (default: stdout)",
+    )
+    parser.add_argument(
+        "-d",
+        "--output-dir",
+        "--output_dir",
+        help="Path to output directory (default: cwd)",
+    )
+    parser.add_argument(
+        "--output-raw",
+        "--output_raw",
+        action="store_true",
+        help="Stream raw audio to stdout",
+    )
+    #
+    parser.add_argument("-s", "--speaker", type=int, help="Id of speaker (default: 0)")
+    parser.add_argument(
+        "--length-scale", "--length_scale", type=float, help="Phoneme length"
+    )
+    parser.add_argument(
+        "--noise-scale", "--noise_scale", type=float, help="Generator noise"
+    )
+    parser.add_argument(
+        "--noise-w", "--noise_w", type=float, help="Phoneme width noise"
+    )
+    #
+    parser.add_argument("--cuda", action="store_true", help="Use GPU")
+    #
+    parser.add_argument(
+        "--sentence-silence",
+        "--sentence_silence",
+        type=float,
+        default=0.0,
+        help="Seconds of silence after each sentence",
+    )
+    #
+    parser.add_argument(
+        "--data-dir",
+        "--data_dir",
+        action="append",
+        default=[str(Path.cwd())],
+        help="Data directory to check for downloaded models (default: current directory)",
+    )
+    parser.add_argument(
+        "--download-dir",
+        "--download_dir",
+        help="Directory to download voices into (default: first data dir)",
+    )
+    #
+    parser.add_argument(
+        "--update-voices",
+        action="store_true",
+        help="Download latest voices.json during startup",
+    )
+    #
+    parser.add_argument(
+        "--debug", action="store_true", help="Print DEBUG messages to console"
+    )
+    args = parser.parse_args()
+    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
+    _LOGGER.debug(args)
+
+    if not args.download_dir:
+        # Download to first data directory by default
+        args.download_dir = args.data_dir[0]
+
+    # Download voice if file doesn't exist
+    model_path = Path(args.model)
+    if not model_path.exists():
+        # Load voice info
+        voices_info = get_voices(args.download_dir, update_voices=args.update_voices)
+
+        # Resolve aliases for backwards compatibility with old voice names
+        aliases_info: Dict[str, Any] = {}
+        for voice_info in voices_info.values():
+            for voice_alias in voice_info.get("aliases", []):
+                aliases_info[voice_alias] = {"_is_alias": True, **voice_info}
+
+        voices_info.update(aliases_info)
+        ensure_voice_exists(args.model, args.data_dir, args.download_dir, voices_info)
+        args.model, args.config = find_voice(args.model, args.data_dir)
+
+    # Load voice
+    voice = PiperVoice.load(args.model, config_path=args.config, use_cuda=args.cuda)
+    synthesize_args = {
+        "speaker_id": args.speaker,
+        "length_scale": args.length_scale,
+        "noise_scale": args.noise_scale,
+        "noise_w": args.noise_w,
+        "sentence_silence": args.sentence_silence,
+    }
+
+    if args.output_raw:
+        # Read line-by-line
+        for line in sys.stdin:
+            line = line.strip()
+            if not line:
+                continue
+
+            # Write raw audio to stdout as its produced
+            audio_stream = voice.synthesize_stream_raw(line, **synthesize_args)
+            for audio_bytes in audio_stream:
+                sys.stdout.buffer.write(audio_bytes)
+                sys.stdout.buffer.flush()
+    elif args.output_dir:
+        output_dir = Path(args.output_dir)
+        output_dir.mkdir(parents=True, exist_ok=True)
+
+        # Read line-by-line
+        for line in sys.stdin:
+            line = line.strip()
+            if not line:
+                continue
+
+            wav_path = output_dir / f"{time.monotonic_ns()}.wav"
+            with wave.open(str(wav_path), "wb") as wav_file:
+                voice.synthesize(line, wav_file, **synthesize_args)
+
+            _LOGGER.info("Wrote %s", wav_path)
+    else:
+        # Read entire input
+        text = sys.stdin.read()
+
+        if (not args.output_file) or (args.output_file == "-"):
+            # Write to stdout
+            with wave.open(sys.stdout.buffer, "wb") as wav_file:
+                voice.synthesize(text, wav_file, **synthesize_args)
+        else:
+            # Write to file
+            with wave.open(args.output_file, "wb") as wav_file:
+                voice.synthesize(text, wav_file, **synthesize_args)
+
+
+if __name__ == "__main__":
+    main()
--- a/mr_v100-piper/piper/src/python_run/piper/config.py
+++ b/mr_v100-piper/piper/src/python_run/piper/config.py
@@ -0,0 +1,53 @@
+"""Piper configuration"""
+from dataclasses import dataclass
+from enum import Enum
+from typing import Any, Dict, Mapping, Sequence
+
+
+class PhonemeType(str, Enum):
+    ESPEAK = "espeak"
+    TEXT = "text"
+
+
+@dataclass
+class PiperConfig:
+    """Piper configuration"""
+
+    num_symbols: int
+    """Number of phonemes"""
+
+    num_speakers: int
+    """Number of speakers"""
+
+    sample_rate: int
+    """Sample rate of output audio"""
+
+    espeak_voice: str
+    """Name of espeak-ng voice or alphabet"""
+
+    length_scale: float
+    noise_scale: float
+    noise_w: float
+
+    phoneme_id_map: Mapping[str, Sequence[int]]
+    """Phoneme -> [id,]"""
+
+    phoneme_type: PhonemeType
+    """espeak or text"""
+
+    @staticmethod
+    def from_dict(config: Dict[str, Any]) -> "PiperConfig":
+        inference = config.get("inference", {})
+
+        return PiperConfig(
+            num_symbols=config["num_symbols"],
+            num_speakers=config["num_speakers"],
+            sample_rate=config["audio"]["sample_rate"],
+            noise_scale=inference.get("noise_scale", 0.667),
+            length_scale=inference.get("length_scale", 1.0),
+            noise_w=inference.get("noise_w", 0.8),
+            #
+            espeak_voice=config["espeak"]["voice"],
+            phoneme_id_map=config["phoneme_id_map"],
+            phoneme_type=PhonemeType(config.get("phoneme_type", PhonemeType.ESPEAK)),
+        )
--- a/mr_v100-piper/piper/src/python_run/piper/const.py
+++ b/mr_v100-piper/piper/src/python_run/piper/const.py
@@ -0,0 +1,5 @@
+"""Constants"""
+
+PAD = "_"  # padding (0)
+BOS = "^"  # beginning of sentence
+EOS = "$"  # end of sentence
--- a/mr_v100-piper/piper/src/python_run/piper/download.py
+++ b/mr_v100-piper/piper/src/python_run/piper/download.py
@@ -0,0 +1,139 @@
+"""Utility for downloading Piper voices."""
+import json
+import logging
+import shutil
+from pathlib import Path
+from typing import Any, Dict, Iterable, Set, Tuple, Union
+from urllib.request import urlopen
+
+from .file_hash import get_file_hash
+
+URL_FORMAT = "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/{file}"
+
+_DIR = Path(__file__).parent
+_LOGGER = logging.getLogger(__name__)
+
+_SKIP_FILES = {"MODEL_CARD"}
+
+
+class VoiceNotFoundError(Exception):
+    pass
+
+
+def get_voices(
+    download_dir: Union[str, Path], update_voices: bool = False
+) -> Dict[str, Any]:
+    """Loads available voices from downloaded or embedded JSON file."""
+    download_dir = Path(download_dir)
+    voices_download = download_dir / "voices.json"
+
+    if update_voices:
+        # Download latest voices.json
+        voices_url = URL_FORMAT.format(file="voices.json")
+        _LOGGER.debug("Downloading %s to %s", voices_url, voices_download)
+        with urlopen(voices_url) as response, open(
+            voices_download, "wb"
+        ) as download_file:
+            shutil.copyfileobj(response, download_file)
+
+    # Prefer downloaded file to embedded
+    voices_embedded = _DIR / "voices.json"
+    voices_path = voices_download if voices_download.exists() else voices_embedded
+
+    _LOGGER.debug("Loading %s", voices_path)
+    with open(voices_path, "r", encoding="utf-8") as voices_file:
+        return json.load(voices_file)
+
+
+def ensure_voice_exists(
+    name: str,
+    data_dirs: Iterable[Union[str, Path]],
+    download_dir: Union[str, Path],
+    voices_info: Dict[str, Any],
+):
+    assert data_dirs, "No data dirs"
+    if name not in voices_info:
+        raise VoiceNotFoundError(name)
+
+    voice_info = voices_info[name]
+    voice_files = voice_info["files"]
+    files_to_download: Set[str] = set()
+
+    for data_dir in data_dirs:
+        data_dir = Path(data_dir)
+
+        # Check sizes/hashes
+        for file_path, file_info in voice_files.items():
+            if file_path in files_to_download:
+                # Already planning to download
+                continue
+
+            file_name = Path(file_path).name
+            if file_name in _SKIP_FILES:
+                continue
+
+            data_file_path = data_dir / file_name
+            _LOGGER.debug("Checking %s", data_file_path)
+            if not data_file_path.exists():
+                _LOGGER.debug("Missing %s", data_file_path)
+                files_to_download.add(file_path)
+                continue
+
+            expected_size = file_info["size_bytes"]
+            actual_size = data_file_path.stat().st_size
+            if expected_size != actual_size:
+                _LOGGER.warning(
+                    "Wrong size (expected=%s, actual=%s) for %s",
+                    expected_size,
+                    actual_size,
+                    data_file_path,
+                )
+                files_to_download.add(file_path)
+                continue
+
+            expected_hash = file_info["md5_digest"]
+            actual_hash = get_file_hash(data_file_path)
+            if expected_hash != actual_hash:
+                _LOGGER.warning(
+                    "Wrong hash (expected=%s, actual=%s) for %s",
+                    expected_hash,
+                    actual_hash,
+                    data_file_path,
+                )
+                files_to_download.add(file_path)
+                continue
+
+    if (not voice_files) and (not files_to_download):
+        raise ValueError(f"Unable to find or download voice: {name}")
+
+    # Download missing files
+    download_dir = Path(download_dir)
+
+    for file_path in files_to_download:
+        file_name = Path(file_path).name
+        if file_name in _SKIP_FILES:
+            continue
+
+        file_url = URL_FORMAT.format(file=file_path)
+        download_file_path = download_dir / file_name
+        download_file_path.parent.mkdir(parents=True, exist_ok=True)
+
+        _LOGGER.debug("Downloading %s to %s", file_url, download_file_path)
+        with urlopen(file_url) as response, open(
+            download_file_path, "wb"
+        ) as download_file:
+            shutil.copyfileobj(response, download_file)
+
+        _LOGGER.info("Downloaded %s (%s)", download_file_path, file_url)
+
+
+def find_voice(name: str, data_dirs: Iterable[Union[str, Path]]) -> Tuple[Path, Path]:
+    for data_dir in data_dirs:
+        data_dir = Path(data_dir)
+        onnx_path = data_dir / f"{name}.onnx"
+        config_path = data_dir / f"{name}.onnx.json"
+
+        if onnx_path.exists() and config_path.exists():
+            return onnx_path, config_path
+
+    raise ValueError(f"Missing files for voice {name}")
--- a/mr_v100-piper/piper/src/python_run/piper/file_hash.py
+++ b/mr_v100-piper/piper/src/python_run/piper/file_hash.py
@@ -0,0 +1,46 @@
+import argparse
+import hashlib
+import json
+import sys
+from pathlib import Path
+from typing import Union
+
+
+def get_file_hash(path: Union[str, Path], bytes_per_chunk: int = 8192) -> str:
+    """Hash a file in chunks using md5."""
+    path_hash = hashlib.md5()
+    with open(path, "rb") as path_file:
+        chunk = path_file.read(bytes_per_chunk)
+        while chunk:
+            path_hash.update(chunk)
+            chunk = path_file.read(bytes_per_chunk)
+
+    return path_hash.hexdigest()
+
+
+# -----------------------------------------------------------------------------
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("file", nargs="+")
+    parser.add_argument("--dir", help="Parent directory")
+    args = parser.parse_args()
+
+    if args.dir:
+        args.dir = Path(args.dir)
+
+    hashes = {}
+    for path_str in args.file:
+        path = Path(path_str)
+        path_hash = get_file_hash(path)
+        if args.dir:
+            path = path.relative_to(args.dir)
+
+        hashes[str(path)] = path_hash
+
+    json.dump(hashes, sys.stdout)
+
+
+if __name__ == "__main__":
+    main()
--- a/mr_v100-piper/piper/src/python_run/piper/http_server.py
+++ b/mr_v100-piper/piper/src/python_run/piper/http_server.py
@@ -0,0 +1,127 @@
+#!/usr/bin/env python3
+import argparse
+import io
+import logging
+import wave
+from pathlib import Path
+from typing import Any, Dict
+
+from flask import Flask, request
+
+from . import PiperVoice
+from .download import ensure_voice_exists, find_voice, get_voices
+
+_LOGGER = logging.getLogger()
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--host", default="0.0.0.0", help="HTTP server host")
+    parser.add_argument("--port", type=int, default=5000, help="HTTP server port")
+    #
+    parser.add_argument("-m", "--model", required=True, help="Path to Onnx model file")
+    parser.add_argument("-c", "--config", help="Path to model config file")
+    #
+    parser.add_argument("-s", "--speaker", type=int, help="Id of speaker (default: 0)")
+    parser.add_argument(
+        "--length-scale", "--length_scale", type=float, help="Phoneme length"
+    )
+    parser.add_argument(
+        "--noise-scale", "--noise_scale", type=float, help="Generator noise"
+    )
+    parser.add_argument(
+        "--noise-w", "--noise_w", type=float, help="Phoneme width noise"
+    )
+    #
+    parser.add_argument("--cuda", action="store_true", help="Use GPU")
+    #
+    parser.add_argument(
+        "--sentence-silence",
+        "--sentence_silence",
+        type=float,
+        default=0.0,
+        help="Seconds of silence after each sentence",
+    )
+    #
+    parser.add_argument(
+        "--data-dir",
+        "--data_dir",
+        action="append",
+        default=[str(Path.cwd())],
+        help="Data directory to check for downloaded models (default: current directory)",
+    )
+    parser.add_argument(
+        "--download-dir",
+        "--download_dir",
+        help="Directory to download voices into (default: first data dir)",
+    )
+    #
+    parser.add_argument(
+        "--update-voices",
+        action="store_true",
+        help="Download latest voices.json during startup",
+    )
+    #
+    parser.add_argument(
+        "--debug", action="store_true", help="Print DEBUG messages to console"
+    )
+    args = parser.parse_args()
+    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
+    _LOGGER.debug(args)
+
+    if not args.download_dir:
+        # Download to first data directory by default
+        args.download_dir = args.data_dir[0]
+
+    # Download voice if file doesn't exist
+    model_path = Path(args.model)
+    if not model_path.exists():
+        # Load voice info
+        voices_info = get_voices(args.download_dir, update_voices=args.update_voices)
+
+        # Resolve aliases for backwards compatibility with old voice names
+        aliases_info: Dict[str, Any] = {}
+        for voice_info in voices_info.values():
+            for voice_alias in voice_info.get("aliases", []):
+                aliases_info[voice_alias] = {"_is_alias": True, **voice_info}
+
+        voices_info.update(aliases_info)
+        ensure_voice_exists(args.model, args.data_dir, args.download_dir, voices_info)
+        args.model, args.config = find_voice(args.model, args.data_dir)
+
+    # Load voice
+    voice = PiperVoice.load(args.model, config_path=args.config, use_cuda=args.cuda)
+    synthesize_args = {
+        "speaker_id": args.speaker,
+        "length_scale": args.length_scale,
+        "noise_scale": args.noise_scale,
+        "noise_w": args.noise_w,
+        "sentence_silence": args.sentence_silence,
+    }
+
+    # Create web server
+    app = Flask(__name__)
+
+    @app.route("/", methods=["GET", "POST"])
+    def app_synthesize() -> bytes:
+        if request.method == "POST":
+            text = request.data.decode("utf-8")
+        else:
+            text = request.args.get("text", "")
+
+        text = text.strip()
+        if not text:
+            raise ValueError("No text provided")
+
+        _LOGGER.debug("Synthesizing text: %s", text)
+        with io.BytesIO() as wav_io:
+            with wave.open(wav_io, "wb") as wav_file:
+                voice.synthesize(text, wav_file, **synthesize_args)
+
+            return wav_io.getvalue()
+
+    app.run(host=args.host, port=args.port)
+
+
+if __name__ == "__main__":
+    main()
--- a/mr_v100-piper/piper/src/python_run/piper/util.py
+++ b/mr_v100-piper/piper/src/python_run/piper/util.py
@@ -0,0 +1,12 @@
+"""Utilities"""
+import numpy as np
+
+
+def audio_float_to_int16(
+    audio: np.ndarray, max_wav_value: float = 32767.0
+) -> np.ndarray:
+    """Normalize audio and convert to int16 range"""
+    audio_norm = audio * (max_wav_value / max(0.01, np.max(np.abs(audio))))
+    audio_norm = np.clip(audio_norm, -max_wav_value, max_wav_value)
+    audio_norm = audio_norm.astype("int16")
+    return audio_norm
--- a/mr_v100-piper/piper/src/python_run/piper/voice.py
+++ b/mr_v100-piper/piper/src/python_run/piper/voice.py
@@ -0,0 +1,185 @@
+import json
+import logging
+import wave
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
+
+import numpy as np
+import onnxruntime
+from piper_phonemize import phonemize_codepoints, phonemize_espeak, tashkeel_run
+
+from .config import PhonemeType, PiperConfig
+from .const import BOS, EOS, PAD
+from .util import audio_float_to_int16
+
+_LOGGER = logging.getLogger(__name__)
+
+
+@dataclass
+class PiperVoice:
+    session: onnxruntime.InferenceSession
+    config: PiperConfig
+
+    @staticmethod
+    def load(
+        model_path: Union[str, Path],
+        config_path: Optional[Union[str, Path]] = None,
+        use_cuda: bool = False,
+    ) -> "PiperVoice":
+        """Load an ONNX model and config."""
+        if config_path is None:
+            config_path = f"{model_path}.json"
+
+        with open(config_path, "r", encoding="utf-8") as config_file:
+            config_dict = json.load(config_file)
+
+        providers: List[Union[str, Tuple[str, Dict[str, Any]]]]
+        if use_cuda:
+            providers = [
+                (
+                    "CUDAExecutionProvider",
+                    {"cudnn_conv_algo_search": "HEURISTIC"},
+                )
+            ]
+        else:
+            providers = ["CPUExecutionProvider"]
+
+        return PiperVoice(
+            config=PiperConfig.from_dict(config_dict),
+            session=onnxruntime.InferenceSession(
+                str(model_path),
+                sess_options=onnxruntime.SessionOptions(),
+                providers=providers,
+            ),
+        )
+
+    def phonemize(self, text: str) -> List[List[str]]:
+        """Text to phonemes grouped by sentence."""
+        if self.config.phoneme_type == PhonemeType.ESPEAK:
+            if self.config.espeak_voice == "ar":
+                # Arabic diacritization
+                # https://github.com/mush42/libtashkeel/
+                text = tashkeel_run(text)
+
+            return phonemize_espeak(text, self.config.espeak_voice)
+
+        if self.config.phoneme_type == PhonemeType.TEXT:
+            return phonemize_codepoints(text)
+
+        raise ValueError(f"Unexpected phoneme type: {self.config.phoneme_type}")
+
+    def phonemes_to_ids(self, phonemes: List[str]) -> List[int]:
+        """Phonemes to ids."""
+        id_map = self.config.phoneme_id_map
+        ids: List[int] = list(id_map[BOS])
+
+        for phoneme in phonemes:
+            if phoneme not in id_map:
+                _LOGGER.warning("Missing phoneme from id map: %s", phoneme)
+                continue
+
+            ids.extend(id_map[phoneme])
+            ids.extend(id_map[PAD])
+
+        ids.extend(id_map[EOS])
+
+        return ids
+
+    def synthesize(
+        self,
+        text: str,
+        wav_file: wave.Wave_write,
+        speaker_id: Optional[int] = None,
+        length_scale: Optional[float] = None,
+        noise_scale: Optional[float] = None,
+        noise_w: Optional[float] = None,
+        sentence_silence: float = 0.0,
+    ):
+        """Synthesize WAV audio from text."""
+        wav_file.setframerate(self.config.sample_rate)
+        wav_file.setsampwidth(2)  # 16-bit
+        wav_file.setnchannels(1)  # mono
+
+        for audio_bytes in self.synthesize_stream_raw(
+            text,
+            speaker_id=speaker_id,
+            length_scale=length_scale,
+            noise_scale=noise_scale,
+            noise_w=noise_w,
+            sentence_silence=sentence_silence,
+        ):
+            wav_file.writeframes(audio_bytes)
+
+    def synthesize_stream_raw(
+        self,
+        text: str,
+        speaker_id: Optional[int] = None,
+        length_scale: Optional[float] = None,
+        noise_scale: Optional[float] = None,
+        noise_w: Optional[float] = None,
+        sentence_silence: float = 0.0,
+    ) -> Iterable[bytes]:
+        """Synthesize raw audio per sentence from text."""
+        sentence_phonemes = self.phonemize(text)
+
+        # 16-bit mono
+        num_silence_samples = int(sentence_silence * self.config.sample_rate)
+        silence_bytes = bytes(num_silence_samples * 2)
+
+        for phonemes in sentence_phonemes:
+            phoneme_ids = self.phonemes_to_ids(phonemes)
+            yield self.synthesize_ids_to_raw(
+                phoneme_ids,
+                speaker_id=speaker_id,
+                length_scale=length_scale,
+                noise_scale=noise_scale,
+                noise_w=noise_w,
+            ) + silence_bytes
+
+    def synthesize_ids_to_raw(
+        self,
+        phoneme_ids: List[int],
+        speaker_id: Optional[int] = None,
+        length_scale: Optional[float] = None,
+        noise_scale: Optional[float] = None,
+        noise_w: Optional[float] = None,
+    ) -> bytes:
+        """Synthesize raw audio from phoneme ids."""
+        if length_scale is None:
+            length_scale = self.config.length_scale
+
+        if noise_scale is None:
+            noise_scale = self.config.noise_scale
+
+        if noise_w is None:
+            noise_w = self.config.noise_w
+
+        phoneme_ids_array = np.expand_dims(np.array(phoneme_ids, dtype=np.int64), 0)
+        phoneme_ids_lengths = np.array([phoneme_ids_array.shape[1]], dtype=np.int64)
+        scales = np.array(
+            [noise_scale, length_scale, noise_w],
+            dtype=np.float32,
+        )
+
+        args = {
+            "input": phoneme_ids_array,
+            "input_lengths": phoneme_ids_lengths,
+            "scales": scales
+        }
+
+        if self.config.num_speakers <= 1:
+            speaker_id = None
+
+        if (self.config.num_speakers > 1) and (speaker_id is None):
+            # Default speaker
+            speaker_id = 0
+
+        if speaker_id is not None:
+            sid = np.array([speaker_id], dtype=np.int64)
+            args["sid"] = sid
+
+        # Synthesize through Onnx
+        audio = self.session.run(None, args, )[0].squeeze((0, 1))
+        audio = audio_float_to_int16(audio.squeeze())
+        return audio.tobytes()
--- a/mr_v100-piper/piper/src/python_run/piper/voices.json
+++ b/mr_v100-piper/piper/src/python_run/piper/voices.json
--- a/mr_v100-piper/piper/src/python_run/py.typed
+++ b/mr_v100-piper/piper/src/python_run/py.typed
--- a/mr_v100-piper/piper/src/python_run/pylintrc
+++ b/mr_v100-piper/piper/src/python_run/pylintrc
@@ -0,0 +1,37 @@
+[MESSAGES CONTROL]
+disable=
+  format,
+  abstract-method,
+  cyclic-import,
+  duplicate-code,
+  global-statement,
+  import-outside-toplevel,
+  inconsistent-return-statements,
+  locally-disabled,
+  not-context-manager,
+  too-few-public-methods,
+  too-many-arguments,
+  too-many-branches,
+  too-many-instance-attributes,
+  too-many-lines,
+  too-many-locals,
+  too-many-public-methods,
+  too-many-return-statements,
+  too-many-statements,
+  too-many-boolean-expressions,
+  unnecessary-pass,
+  unused-argument,
+  broad-except,
+  too-many-nested-blocks,
+  invalid-name,
+  unused-import,
+  fixme,
+  useless-super-delegation,
+  missing-module-docstring,
+  missing-class-docstring,
+  missing-function-docstring,
+  import-error,
+  relative-beyond-top-level
+
+[FORMAT]
+expected-line-ending-format=LF
--- a/mr_v100-piper/piper/src/python_run/requirements.txt
+++ b/mr_v100-piper/piper/src/python_run/requirements.txt
@@ -0,0 +1,2 @@
+piper-phonemize~=1.1.0
+onnxruntime>=1.11.0,<2
--- a/mr_v100-piper/piper/src/python_run/requirements_dev.txt
+++ b/mr_v100-piper/piper/src/python_run/requirements_dev.txt
@@ -0,0 +1,5 @@
+black==22.12.0
+flake8==6.0.0
+isort==5.11.3
+mypy==0.991
+pylint==2.15.9
--- a/mr_v100-piper/piper/src/python_run/requirements_gpu.txt
+++ b/mr_v100-piper/piper/src/python_run/requirements_gpu.txt
@@ -0,0 +1 @@
+onnxruntime-gpu>=1.11.0,<2
--- a/mr_v100-piper/piper/src/python_run/requirements_http.txt
+++ b/mr_v100-piper/piper/src/python_run/requirements_http.txt
@@ -0,0 +1 @@
+flask>=3,<4
--- a/mr_v100-piper/piper/src/python_run/script/format
+++ b/mr_v100-piper/piper/src/python_run/script/format
@@ -0,0 +1,13 @@
+#!/usr/bin/env python3
+import subprocess
+import venv
+from pathlib import Path
+
+_DIR = Path(__file__).parent
+_PROGRAM_DIR = _DIR.parent
+_VENV_DIR = _PROGRAM_DIR / ".venv"
+_MODULE_DIR = _PROGRAM_DIR / "piper"
+
+context = venv.EnvBuilder().ensure_directories(_VENV_DIR)
+subprocess.check_call([context.env_exe, "-m", "black", str(_MODULE_DIR)])
+subprocess.check_call([context.env_exe, "-m", "isort", str(_MODULE_DIR)])
--- a/mr_v100-piper/piper/src/python_run/script/lint
+++ b/mr_v100-piper/piper/src/python_run/script/lint
@@ -0,0 +1,16 @@
+#!/usr/bin/env python3
+import subprocess
+import venv
+from pathlib import Path
+
+_DIR = Path(__file__).parent
+_PROGRAM_DIR = _DIR.parent
+_VENV_DIR = _PROGRAM_DIR / ".venv"
+_MODULE_DIR = _PROGRAM_DIR / "piper"
+
+context = venv.EnvBuilder().ensure_directories(_VENV_DIR)
+subprocess.check_call([context.env_exe, "-m", "black", str(_MODULE_DIR), "--check"])
+subprocess.check_call([context.env_exe, "-m", "isort", str(_MODULE_DIR), "--check"])
+subprocess.check_call([context.env_exe, "-m", "flake8", str(_MODULE_DIR)])
+subprocess.check_call([context.env_exe, "-m", "pylint", str(_MODULE_DIR)])
+subprocess.check_call([context.env_exe, "-m", "mypy", str(_MODULE_DIR)])
--- a/mr_v100-piper/piper/src/python_run/script/piper
+++ b/mr_v100-piper/piper/src/python_run/script/piper
@@ -0,0 +1,12 @@
+#!/usr/bin/env python3
+import sys
+import subprocess
+import venv
+from pathlib import Path
+
+_DIR = Path(__file__).parent
+_PROGRAM_DIR = _DIR.parent
+_VENV_DIR = _PROGRAM_DIR / ".venv"
+
+context = venv.EnvBuilder().ensure_directories(_VENV_DIR)
+subprocess.check_call([context.env_exe, "-m", "piper"] + sys.argv[1:])
--- a/mr_v100-piper/piper/src/python_run/script/setup
+++ b/mr_v100-piper/piper/src/python_run/script/setup
@@ -0,0 +1,31 @@
+#!/usr/bin/env python3
+import subprocess
+import venv
+from pathlib import Path
+
+_DIR = Path(__file__).parent
+_PROGRAM_DIR = _DIR.parent
+_VENV_DIR = _PROGRAM_DIR / ".venv"
+
+
+# Create virtual environment
+builder = venv.EnvBuilder(with_pip=True)
+context = builder.ensure_directories(_VENV_DIR)
+builder.create(_VENV_DIR)
+
+# Upgrade dependencies
+pip = [context.env_exe, "-m", "pip"]
+subprocess.check_call(pip + ["install", "--upgrade", "pip"])
+subprocess.check_call(pip + ["install", "--upgrade", "setuptools", "wheel"])
+
+# Install requirements
+subprocess.check_call(
+    pip
+    + [
+        "install",
+        "-f",
+        "https://synesthesiam.github.io/prebuilt-apps/",
+        "-r",
+        str(_PROGRAM_DIR / "requirements.txt"),
+    ]
+)
--- a/mr_v100-piper/piper/src/python_run/setup.cfg
+++ b/mr_v100-piper/piper/src/python_run/setup.cfg
@@ -0,0 +1,22 @@
+[flake8]
+# To work with Black
+max-line-length = 88
+# E501: line too long
+# W503: Line break occurred before a binary operator
+# E203: Whitespace before ':'
+# D202 No blank lines allowed after function docstring
+# W504 line break after binary operator
+ignore =
+    E501,
+    W503,
+    E203,
+    D202,
+    W504
+
+[isort]
+multi_line_output = 3
+include_trailing_comma=True
+force_grid_wrap=0
+use_parentheses=True
+line_length=88
+indent = "    "
--- a/mr_v100-piper/piper/src/python_run/setup.py
+++ b/mr_v100-piper/piper/src/python_run/setup.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+from pathlib import Path
+
+import setuptools
+from setuptools import setup
+
+this_dir = Path(__file__).parent
+module_dir = this_dir / "piper"
+
+requirements = []
+requirements_path = this_dir / "requirements.txt"
+if requirements_path.is_file():
+    with open(requirements_path, "r", encoding="utf-8") as requirements_file:
+        requirements = requirements_file.read().splitlines()
+
+data_files = [module_dir / "voices.json"]
+
+# -----------------------------------------------------------------------------
+
+setup(
+    name="piper-tts",
+    version="1.2.0",
+    description="A fast, local neural text to speech system that sounds great and is optimized for the Raspberry Pi 4.",
+    url="http://github.com/rhasspy/piper",
+    author="Michael Hansen",
+    author_email="mike@rhasspy.org",
+    license="MIT",
+    packages=setuptools.find_packages(),
+    package_data={"piper": [str(p.relative_to(module_dir)) for p in data_files]},
+    entry_points={
+        "console_scripts": [
+            "piper = piper.__main__:main",
+        ]
+    },
+    install_requires=requirements,
+    extras_require={"gpu": ["onnxruntime-gpu>=1.11.0,<2"], "http": ["flask>=3,<4"]},
+    classifiers=[
+        "Development Status :: 3 - Alpha",
+        "Intended Audience :: Developers",
+        "Topic :: Text Processing :: Linguistic",
+        "License :: OSI Approved :: MIT License",
+        "Programming Language :: Python :: 3.7",
+        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+    ],
+    keywords="rhasspy piper tts",
+)