init
This commit is contained in:
3
mr_v100-piper/piper/src/python_run/.gitignore
vendored
Normal file
3
mr_v100-piper/piper/src/python_run/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
build/
|
||||
dist/
|
||||
*.egg-info/
|
||||
6
mr_v100-piper/piper/src/python_run/.isort.cfg
Normal file
6
mr_v100-piper/piper/src/python_run/.isort.cfg
Normal file
@@ -0,0 +1,6 @@
|
||||
[settings]
|
||||
multi_line_output=3
|
||||
include_trailing_comma=True
|
||||
force_grid_wrap=0
|
||||
use_parentheses=True
|
||||
line_length=88
|
||||
2
mr_v100-piper/piper/src/python_run/MANIFEST.in
Normal file
2
mr_v100-piper/piper/src/python_run/MANIFEST.in
Normal file
@@ -0,0 +1,2 @@
|
||||
include requirements.txt
|
||||
include piper/voices.json
|
||||
27
mr_v100-piper/piper/src/python_run/README_http.md
Normal file
27
mr_v100-piper/piper/src/python_run/README_http.md
Normal file
@@ -0,0 +1,27 @@
|
||||
# Piper HTTP Server
|
||||
|
||||
Install the requirements into your virtual environment:
|
||||
|
||||
```sh
|
||||
.venv/bin/pip3 install -r requirements_http.txt
|
||||
```
|
||||
|
||||
Run the web server:
|
||||
|
||||
```sh
|
||||
.venv/bin/python3 -m piper.http_server --model ...
|
||||
```
|
||||
|
||||
See `--help` for more options.
|
||||
|
||||
Using a `GET` request:
|
||||
|
||||
```sh
|
||||
curl -G --data-urlencode 'text=This is a test.' -o test.wav 'localhost:5000'
|
||||
```
|
||||
|
||||
Using a `POST` request:
|
||||
|
||||
```sh
|
||||
curl -X POST -H 'Content-Type: text/plain' --data 'This is a test.' -o test.wav 'localhost:5000'
|
||||
```
|
||||
7
mr_v100-piper/piper/src/python_run/mypy.ini
Normal file
7
mr_v100-piper/piper/src/python_run/mypy.ini
Normal file
@@ -0,0 +1,7 @@
|
||||
[mypy]
|
||||
|
||||
[mypy-onnxruntime.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-piper_phonemize.*]
|
||||
ignore_missing_imports = True
|
||||
5
mr_v100-piper/piper/src/python_run/piper/__init__.py
Normal file
5
mr_v100-piper/piper/src/python_run/piper/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
from .voice import PiperVoice
|
||||
|
||||
__all__ = [
|
||||
"PiperVoice",
|
||||
]
|
||||
159
mr_v100-piper/piper/src/python_run/piper/__main__.py
Normal file
159
mr_v100-piper/piper/src/python_run/piper/__main__.py
Normal file
@@ -0,0 +1,159 @@
|
||||
import argparse
|
||||
import logging
|
||||
import sys
|
||||
import time
|
||||
import wave
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
from . import PiperVoice
|
||||
from .download import ensure_voice_exists, find_voice, get_voices
|
||||
|
||||
_FILE = Path(__file__)
|
||||
_DIR = _FILE.parent
|
||||
_LOGGER = logging.getLogger(_FILE.stem)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("-m", "--model", required=True, help="Path to Onnx model file")
|
||||
parser.add_argument("-c", "--config", help="Path to model config file")
|
||||
parser.add_argument(
|
||||
"-f",
|
||||
"--output-file",
|
||||
"--output_file",
|
||||
help="Path to output WAV file (default: stdout)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-d",
|
||||
"--output-dir",
|
||||
"--output_dir",
|
||||
help="Path to output directory (default: cwd)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-raw",
|
||||
"--output_raw",
|
||||
action="store_true",
|
||||
help="Stream raw audio to stdout",
|
||||
)
|
||||
#
|
||||
parser.add_argument("-s", "--speaker", type=int, help="Id of speaker (default: 0)")
|
||||
parser.add_argument(
|
||||
"--length-scale", "--length_scale", type=float, help="Phoneme length"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--noise-scale", "--noise_scale", type=float, help="Generator noise"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--noise-w", "--noise_w", type=float, help="Phoneme width noise"
|
||||
)
|
||||
#
|
||||
parser.add_argument("--cuda", action="store_true", help="Use GPU")
|
||||
#
|
||||
parser.add_argument(
|
||||
"--sentence-silence",
|
||||
"--sentence_silence",
|
||||
type=float,
|
||||
default=0.0,
|
||||
help="Seconds of silence after each sentence",
|
||||
)
|
||||
#
|
||||
parser.add_argument(
|
||||
"--data-dir",
|
||||
"--data_dir",
|
||||
action="append",
|
||||
default=[str(Path.cwd())],
|
||||
help="Data directory to check for downloaded models (default: current directory)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--download-dir",
|
||||
"--download_dir",
|
||||
help="Directory to download voices into (default: first data dir)",
|
||||
)
|
||||
#
|
||||
parser.add_argument(
|
||||
"--update-voices",
|
||||
action="store_true",
|
||||
help="Download latest voices.json during startup",
|
||||
)
|
||||
#
|
||||
parser.add_argument(
|
||||
"--debug", action="store_true", help="Print DEBUG messages to console"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
|
||||
_LOGGER.debug(args)
|
||||
|
||||
if not args.download_dir:
|
||||
# Download to first data directory by default
|
||||
args.download_dir = args.data_dir[0]
|
||||
|
||||
# Download voice if file doesn't exist
|
||||
model_path = Path(args.model)
|
||||
if not model_path.exists():
|
||||
# Load voice info
|
||||
voices_info = get_voices(args.download_dir, update_voices=args.update_voices)
|
||||
|
||||
# Resolve aliases for backwards compatibility with old voice names
|
||||
aliases_info: Dict[str, Any] = {}
|
||||
for voice_info in voices_info.values():
|
||||
for voice_alias in voice_info.get("aliases", []):
|
||||
aliases_info[voice_alias] = {"_is_alias": True, **voice_info}
|
||||
|
||||
voices_info.update(aliases_info)
|
||||
ensure_voice_exists(args.model, args.data_dir, args.download_dir, voices_info)
|
||||
args.model, args.config = find_voice(args.model, args.data_dir)
|
||||
|
||||
# Load voice
|
||||
voice = PiperVoice.load(args.model, config_path=args.config, use_cuda=args.cuda)
|
||||
synthesize_args = {
|
||||
"speaker_id": args.speaker,
|
||||
"length_scale": args.length_scale,
|
||||
"noise_scale": args.noise_scale,
|
||||
"noise_w": args.noise_w,
|
||||
"sentence_silence": args.sentence_silence,
|
||||
}
|
||||
|
||||
if args.output_raw:
|
||||
# Read line-by-line
|
||||
for line in sys.stdin:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
# Write raw audio to stdout as its produced
|
||||
audio_stream = voice.synthesize_stream_raw(line, **synthesize_args)
|
||||
for audio_bytes in audio_stream:
|
||||
sys.stdout.buffer.write(audio_bytes)
|
||||
sys.stdout.buffer.flush()
|
||||
elif args.output_dir:
|
||||
output_dir = Path(args.output_dir)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Read line-by-line
|
||||
for line in sys.stdin:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
wav_path = output_dir / f"{time.monotonic_ns()}.wav"
|
||||
with wave.open(str(wav_path), "wb") as wav_file:
|
||||
voice.synthesize(line, wav_file, **synthesize_args)
|
||||
|
||||
_LOGGER.info("Wrote %s", wav_path)
|
||||
else:
|
||||
# Read entire input
|
||||
text = sys.stdin.read()
|
||||
|
||||
if (not args.output_file) or (args.output_file == "-"):
|
||||
# Write to stdout
|
||||
with wave.open(sys.stdout.buffer, "wb") as wav_file:
|
||||
voice.synthesize(text, wav_file, **synthesize_args)
|
||||
else:
|
||||
# Write to file
|
||||
with wave.open(args.output_file, "wb") as wav_file:
|
||||
voice.synthesize(text, wav_file, **synthesize_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
53
mr_v100-piper/piper/src/python_run/piper/config.py
Normal file
53
mr_v100-piper/piper/src/python_run/piper/config.py
Normal file
@@ -0,0 +1,53 @@
|
||||
"""Piper configuration"""
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, Mapping, Sequence
|
||||
|
||||
|
||||
class PhonemeType(str, Enum):
|
||||
ESPEAK = "espeak"
|
||||
TEXT = "text"
|
||||
|
||||
|
||||
@dataclass
|
||||
class PiperConfig:
|
||||
"""Piper configuration"""
|
||||
|
||||
num_symbols: int
|
||||
"""Number of phonemes"""
|
||||
|
||||
num_speakers: int
|
||||
"""Number of speakers"""
|
||||
|
||||
sample_rate: int
|
||||
"""Sample rate of output audio"""
|
||||
|
||||
espeak_voice: str
|
||||
"""Name of espeak-ng voice or alphabet"""
|
||||
|
||||
length_scale: float
|
||||
noise_scale: float
|
||||
noise_w: float
|
||||
|
||||
phoneme_id_map: Mapping[str, Sequence[int]]
|
||||
"""Phoneme -> [id,]"""
|
||||
|
||||
phoneme_type: PhonemeType
|
||||
"""espeak or text"""
|
||||
|
||||
@staticmethod
|
||||
def from_dict(config: Dict[str, Any]) -> "PiperConfig":
|
||||
inference = config.get("inference", {})
|
||||
|
||||
return PiperConfig(
|
||||
num_symbols=config["num_symbols"],
|
||||
num_speakers=config["num_speakers"],
|
||||
sample_rate=config["audio"]["sample_rate"],
|
||||
noise_scale=inference.get("noise_scale", 0.667),
|
||||
length_scale=inference.get("length_scale", 1.0),
|
||||
noise_w=inference.get("noise_w", 0.8),
|
||||
#
|
||||
espeak_voice=config["espeak"]["voice"],
|
||||
phoneme_id_map=config["phoneme_id_map"],
|
||||
phoneme_type=PhonemeType(config.get("phoneme_type", PhonemeType.ESPEAK)),
|
||||
)
|
||||
5
mr_v100-piper/piper/src/python_run/piper/const.py
Normal file
5
mr_v100-piper/piper/src/python_run/piper/const.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Constants"""
|
||||
|
||||
PAD = "_" # padding (0)
|
||||
BOS = "^" # beginning of sentence
|
||||
EOS = "$" # end of sentence
|
||||
139
mr_v100-piper/piper/src/python_run/piper/download.py
Executable file
139
mr_v100-piper/piper/src/python_run/piper/download.py
Executable file
@@ -0,0 +1,139 @@
|
||||
"""Utility for downloading Piper voices."""
|
||||
import json
|
||||
import logging
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, Set, Tuple, Union
|
||||
from urllib.request import urlopen
|
||||
|
||||
from .file_hash import get_file_hash
|
||||
|
||||
URL_FORMAT = "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/{file}"
|
||||
|
||||
_DIR = Path(__file__).parent
|
||||
_LOGGER = logging.getLogger(__name__)
|
||||
|
||||
_SKIP_FILES = {"MODEL_CARD"}
|
||||
|
||||
|
||||
class VoiceNotFoundError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def get_voices(
|
||||
download_dir: Union[str, Path], update_voices: bool = False
|
||||
) -> Dict[str, Any]:
|
||||
"""Loads available voices from downloaded or embedded JSON file."""
|
||||
download_dir = Path(download_dir)
|
||||
voices_download = download_dir / "voices.json"
|
||||
|
||||
if update_voices:
|
||||
# Download latest voices.json
|
||||
voices_url = URL_FORMAT.format(file="voices.json")
|
||||
_LOGGER.debug("Downloading %s to %s", voices_url, voices_download)
|
||||
with urlopen(voices_url) as response, open(
|
||||
voices_download, "wb"
|
||||
) as download_file:
|
||||
shutil.copyfileobj(response, download_file)
|
||||
|
||||
# Prefer downloaded file to embedded
|
||||
voices_embedded = _DIR / "voices.json"
|
||||
voices_path = voices_download if voices_download.exists() else voices_embedded
|
||||
|
||||
_LOGGER.debug("Loading %s", voices_path)
|
||||
with open(voices_path, "r", encoding="utf-8") as voices_file:
|
||||
return json.load(voices_file)
|
||||
|
||||
|
||||
def ensure_voice_exists(
|
||||
name: str,
|
||||
data_dirs: Iterable[Union[str, Path]],
|
||||
download_dir: Union[str, Path],
|
||||
voices_info: Dict[str, Any],
|
||||
):
|
||||
assert data_dirs, "No data dirs"
|
||||
if name not in voices_info:
|
||||
raise VoiceNotFoundError(name)
|
||||
|
||||
voice_info = voices_info[name]
|
||||
voice_files = voice_info["files"]
|
||||
files_to_download: Set[str] = set()
|
||||
|
||||
for data_dir in data_dirs:
|
||||
data_dir = Path(data_dir)
|
||||
|
||||
# Check sizes/hashes
|
||||
for file_path, file_info in voice_files.items():
|
||||
if file_path in files_to_download:
|
||||
# Already planning to download
|
||||
continue
|
||||
|
||||
file_name = Path(file_path).name
|
||||
if file_name in _SKIP_FILES:
|
||||
continue
|
||||
|
||||
data_file_path = data_dir / file_name
|
||||
_LOGGER.debug("Checking %s", data_file_path)
|
||||
if not data_file_path.exists():
|
||||
_LOGGER.debug("Missing %s", data_file_path)
|
||||
files_to_download.add(file_path)
|
||||
continue
|
||||
|
||||
expected_size = file_info["size_bytes"]
|
||||
actual_size = data_file_path.stat().st_size
|
||||
if expected_size != actual_size:
|
||||
_LOGGER.warning(
|
||||
"Wrong size (expected=%s, actual=%s) for %s",
|
||||
expected_size,
|
||||
actual_size,
|
||||
data_file_path,
|
||||
)
|
||||
files_to_download.add(file_path)
|
||||
continue
|
||||
|
||||
expected_hash = file_info["md5_digest"]
|
||||
actual_hash = get_file_hash(data_file_path)
|
||||
if expected_hash != actual_hash:
|
||||
_LOGGER.warning(
|
||||
"Wrong hash (expected=%s, actual=%s) for %s",
|
||||
expected_hash,
|
||||
actual_hash,
|
||||
data_file_path,
|
||||
)
|
||||
files_to_download.add(file_path)
|
||||
continue
|
||||
|
||||
if (not voice_files) and (not files_to_download):
|
||||
raise ValueError(f"Unable to find or download voice: {name}")
|
||||
|
||||
# Download missing files
|
||||
download_dir = Path(download_dir)
|
||||
|
||||
for file_path in files_to_download:
|
||||
file_name = Path(file_path).name
|
||||
if file_name in _SKIP_FILES:
|
||||
continue
|
||||
|
||||
file_url = URL_FORMAT.format(file=file_path)
|
||||
download_file_path = download_dir / file_name
|
||||
download_file_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
_LOGGER.debug("Downloading %s to %s", file_url, download_file_path)
|
||||
with urlopen(file_url) as response, open(
|
||||
download_file_path, "wb"
|
||||
) as download_file:
|
||||
shutil.copyfileobj(response, download_file)
|
||||
|
||||
_LOGGER.info("Downloaded %s (%s)", download_file_path, file_url)
|
||||
|
||||
|
||||
def find_voice(name: str, data_dirs: Iterable[Union[str, Path]]) -> Tuple[Path, Path]:
|
||||
for data_dir in data_dirs:
|
||||
data_dir = Path(data_dir)
|
||||
onnx_path = data_dir / f"{name}.onnx"
|
||||
config_path = data_dir / f"{name}.onnx.json"
|
||||
|
||||
if onnx_path.exists() and config_path.exists():
|
||||
return onnx_path, config_path
|
||||
|
||||
raise ValueError(f"Missing files for voice {name}")
|
||||
46
mr_v100-piper/piper/src/python_run/piper/file_hash.py
Normal file
46
mr_v100-piper/piper/src/python_run/piper/file_hash.py
Normal file
@@ -0,0 +1,46 @@
|
||||
import argparse
|
||||
import hashlib
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Union
|
||||
|
||||
|
||||
def get_file_hash(path: Union[str, Path], bytes_per_chunk: int = 8192) -> str:
|
||||
"""Hash a file in chunks using md5."""
|
||||
path_hash = hashlib.md5()
|
||||
with open(path, "rb") as path_file:
|
||||
chunk = path_file.read(bytes_per_chunk)
|
||||
while chunk:
|
||||
path_hash.update(chunk)
|
||||
chunk = path_file.read(bytes_per_chunk)
|
||||
|
||||
return path_hash.hexdigest()
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("file", nargs="+")
|
||||
parser.add_argument("--dir", help="Parent directory")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.dir:
|
||||
args.dir = Path(args.dir)
|
||||
|
||||
hashes = {}
|
||||
for path_str in args.file:
|
||||
path = Path(path_str)
|
||||
path_hash = get_file_hash(path)
|
||||
if args.dir:
|
||||
path = path.relative_to(args.dir)
|
||||
|
||||
hashes[str(path)] = path_hash
|
||||
|
||||
json.dump(hashes, sys.stdout)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
127
mr_v100-piper/piper/src/python_run/piper/http_server.py
Normal file
127
mr_v100-piper/piper/src/python_run/piper/http_server.py
Normal file
@@ -0,0 +1,127 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import io
|
||||
import logging
|
||||
import wave
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
from flask import Flask, request
|
||||
|
||||
from . import PiperVoice
|
||||
from .download import ensure_voice_exists, find_voice, get_voices
|
||||
|
||||
_LOGGER = logging.getLogger()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--host", default="0.0.0.0", help="HTTP server host")
|
||||
parser.add_argument("--port", type=int, default=5000, help="HTTP server port")
|
||||
#
|
||||
parser.add_argument("-m", "--model", required=True, help="Path to Onnx model file")
|
||||
parser.add_argument("-c", "--config", help="Path to model config file")
|
||||
#
|
||||
parser.add_argument("-s", "--speaker", type=int, help="Id of speaker (default: 0)")
|
||||
parser.add_argument(
|
||||
"--length-scale", "--length_scale", type=float, help="Phoneme length"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--noise-scale", "--noise_scale", type=float, help="Generator noise"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--noise-w", "--noise_w", type=float, help="Phoneme width noise"
|
||||
)
|
||||
#
|
||||
parser.add_argument("--cuda", action="store_true", help="Use GPU")
|
||||
#
|
||||
parser.add_argument(
|
||||
"--sentence-silence",
|
||||
"--sentence_silence",
|
||||
type=float,
|
||||
default=0.0,
|
||||
help="Seconds of silence after each sentence",
|
||||
)
|
||||
#
|
||||
parser.add_argument(
|
||||
"--data-dir",
|
||||
"--data_dir",
|
||||
action="append",
|
||||
default=[str(Path.cwd())],
|
||||
help="Data directory to check for downloaded models (default: current directory)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--download-dir",
|
||||
"--download_dir",
|
||||
help="Directory to download voices into (default: first data dir)",
|
||||
)
|
||||
#
|
||||
parser.add_argument(
|
||||
"--update-voices",
|
||||
action="store_true",
|
||||
help="Download latest voices.json during startup",
|
||||
)
|
||||
#
|
||||
parser.add_argument(
|
||||
"--debug", action="store_true", help="Print DEBUG messages to console"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
|
||||
_LOGGER.debug(args)
|
||||
|
||||
if not args.download_dir:
|
||||
# Download to first data directory by default
|
||||
args.download_dir = args.data_dir[0]
|
||||
|
||||
# Download voice if file doesn't exist
|
||||
model_path = Path(args.model)
|
||||
if not model_path.exists():
|
||||
# Load voice info
|
||||
voices_info = get_voices(args.download_dir, update_voices=args.update_voices)
|
||||
|
||||
# Resolve aliases for backwards compatibility with old voice names
|
||||
aliases_info: Dict[str, Any] = {}
|
||||
for voice_info in voices_info.values():
|
||||
for voice_alias in voice_info.get("aliases", []):
|
||||
aliases_info[voice_alias] = {"_is_alias": True, **voice_info}
|
||||
|
||||
voices_info.update(aliases_info)
|
||||
ensure_voice_exists(args.model, args.data_dir, args.download_dir, voices_info)
|
||||
args.model, args.config = find_voice(args.model, args.data_dir)
|
||||
|
||||
# Load voice
|
||||
voice = PiperVoice.load(args.model, config_path=args.config, use_cuda=args.cuda)
|
||||
synthesize_args = {
|
||||
"speaker_id": args.speaker,
|
||||
"length_scale": args.length_scale,
|
||||
"noise_scale": args.noise_scale,
|
||||
"noise_w": args.noise_w,
|
||||
"sentence_silence": args.sentence_silence,
|
||||
}
|
||||
|
||||
# Create web server
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route("/", methods=["GET", "POST"])
|
||||
def app_synthesize() -> bytes:
|
||||
if request.method == "POST":
|
||||
text = request.data.decode("utf-8")
|
||||
else:
|
||||
text = request.args.get("text", "")
|
||||
|
||||
text = text.strip()
|
||||
if not text:
|
||||
raise ValueError("No text provided")
|
||||
|
||||
_LOGGER.debug("Synthesizing text: %s", text)
|
||||
with io.BytesIO() as wav_io:
|
||||
with wave.open(wav_io, "wb") as wav_file:
|
||||
voice.synthesize(text, wav_file, **synthesize_args)
|
||||
|
||||
return wav_io.getvalue()
|
||||
|
||||
app.run(host=args.host, port=args.port)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
12
mr_v100-piper/piper/src/python_run/piper/util.py
Normal file
12
mr_v100-piper/piper/src/python_run/piper/util.py
Normal file
@@ -0,0 +1,12 @@
|
||||
"""Utilities"""
|
||||
import numpy as np
|
||||
|
||||
|
||||
def audio_float_to_int16(
|
||||
audio: np.ndarray, max_wav_value: float = 32767.0
|
||||
) -> np.ndarray:
|
||||
"""Normalize audio and convert to int16 range"""
|
||||
audio_norm = audio * (max_wav_value / max(0.01, np.max(np.abs(audio))))
|
||||
audio_norm = np.clip(audio_norm, -max_wav_value, max_wav_value)
|
||||
audio_norm = audio_norm.astype("int16")
|
||||
return audio_norm
|
||||
185
mr_v100-piper/piper/src/python_run/piper/voice.py
Normal file
185
mr_v100-piper/piper/src/python_run/piper/voice.py
Normal file
@@ -0,0 +1,185 @@
|
||||
import json
|
||||
import logging
|
||||
import wave
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
import onnxruntime
|
||||
from piper_phonemize import phonemize_codepoints, phonemize_espeak, tashkeel_run
|
||||
|
||||
from .config import PhonemeType, PiperConfig
|
||||
from .const import BOS, EOS, PAD
|
||||
from .util import audio_float_to_int16
|
||||
|
||||
_LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PiperVoice:
|
||||
session: onnxruntime.InferenceSession
|
||||
config: PiperConfig
|
||||
|
||||
@staticmethod
|
||||
def load(
|
||||
model_path: Union[str, Path],
|
||||
config_path: Optional[Union[str, Path]] = None,
|
||||
use_cuda: bool = False,
|
||||
) -> "PiperVoice":
|
||||
"""Load an ONNX model and config."""
|
||||
if config_path is None:
|
||||
config_path = f"{model_path}.json"
|
||||
|
||||
with open(config_path, "r", encoding="utf-8") as config_file:
|
||||
config_dict = json.load(config_file)
|
||||
|
||||
providers: List[Union[str, Tuple[str, Dict[str, Any]]]]
|
||||
if use_cuda:
|
||||
providers = [
|
||||
(
|
||||
"CUDAExecutionProvider",
|
||||
{"cudnn_conv_algo_search": "HEURISTIC"},
|
||||
)
|
||||
]
|
||||
else:
|
||||
providers = ["CPUExecutionProvider"]
|
||||
|
||||
return PiperVoice(
|
||||
config=PiperConfig.from_dict(config_dict),
|
||||
session=onnxruntime.InferenceSession(
|
||||
str(model_path),
|
||||
sess_options=onnxruntime.SessionOptions(),
|
||||
providers=providers,
|
||||
),
|
||||
)
|
||||
|
||||
def phonemize(self, text: str) -> List[List[str]]:
|
||||
"""Text to phonemes grouped by sentence."""
|
||||
if self.config.phoneme_type == PhonemeType.ESPEAK:
|
||||
if self.config.espeak_voice == "ar":
|
||||
# Arabic diacritization
|
||||
# https://github.com/mush42/libtashkeel/
|
||||
text = tashkeel_run(text)
|
||||
|
||||
return phonemize_espeak(text, self.config.espeak_voice)
|
||||
|
||||
if self.config.phoneme_type == PhonemeType.TEXT:
|
||||
return phonemize_codepoints(text)
|
||||
|
||||
raise ValueError(f"Unexpected phoneme type: {self.config.phoneme_type}")
|
||||
|
||||
def phonemes_to_ids(self, phonemes: List[str]) -> List[int]:
|
||||
"""Phonemes to ids."""
|
||||
id_map = self.config.phoneme_id_map
|
||||
ids: List[int] = list(id_map[BOS])
|
||||
|
||||
for phoneme in phonemes:
|
||||
if phoneme not in id_map:
|
||||
_LOGGER.warning("Missing phoneme from id map: %s", phoneme)
|
||||
continue
|
||||
|
||||
ids.extend(id_map[phoneme])
|
||||
ids.extend(id_map[PAD])
|
||||
|
||||
ids.extend(id_map[EOS])
|
||||
|
||||
return ids
|
||||
|
||||
def synthesize(
|
||||
self,
|
||||
text: str,
|
||||
wav_file: wave.Wave_write,
|
||||
speaker_id: Optional[int] = None,
|
||||
length_scale: Optional[float] = None,
|
||||
noise_scale: Optional[float] = None,
|
||||
noise_w: Optional[float] = None,
|
||||
sentence_silence: float = 0.0,
|
||||
):
|
||||
"""Synthesize WAV audio from text."""
|
||||
wav_file.setframerate(self.config.sample_rate)
|
||||
wav_file.setsampwidth(2) # 16-bit
|
||||
wav_file.setnchannels(1) # mono
|
||||
|
||||
for audio_bytes in self.synthesize_stream_raw(
|
||||
text,
|
||||
speaker_id=speaker_id,
|
||||
length_scale=length_scale,
|
||||
noise_scale=noise_scale,
|
||||
noise_w=noise_w,
|
||||
sentence_silence=sentence_silence,
|
||||
):
|
||||
wav_file.writeframes(audio_bytes)
|
||||
|
||||
def synthesize_stream_raw(
|
||||
self,
|
||||
text: str,
|
||||
speaker_id: Optional[int] = None,
|
||||
length_scale: Optional[float] = None,
|
||||
noise_scale: Optional[float] = None,
|
||||
noise_w: Optional[float] = None,
|
||||
sentence_silence: float = 0.0,
|
||||
) -> Iterable[bytes]:
|
||||
"""Synthesize raw audio per sentence from text."""
|
||||
sentence_phonemes = self.phonemize(text)
|
||||
|
||||
# 16-bit mono
|
||||
num_silence_samples = int(sentence_silence * self.config.sample_rate)
|
||||
silence_bytes = bytes(num_silence_samples * 2)
|
||||
|
||||
for phonemes in sentence_phonemes:
|
||||
phoneme_ids = self.phonemes_to_ids(phonemes)
|
||||
yield self.synthesize_ids_to_raw(
|
||||
phoneme_ids,
|
||||
speaker_id=speaker_id,
|
||||
length_scale=length_scale,
|
||||
noise_scale=noise_scale,
|
||||
noise_w=noise_w,
|
||||
) + silence_bytes
|
||||
|
||||
def synthesize_ids_to_raw(
|
||||
self,
|
||||
phoneme_ids: List[int],
|
||||
speaker_id: Optional[int] = None,
|
||||
length_scale: Optional[float] = None,
|
||||
noise_scale: Optional[float] = None,
|
||||
noise_w: Optional[float] = None,
|
||||
) -> bytes:
|
||||
"""Synthesize raw audio from phoneme ids."""
|
||||
if length_scale is None:
|
||||
length_scale = self.config.length_scale
|
||||
|
||||
if noise_scale is None:
|
||||
noise_scale = self.config.noise_scale
|
||||
|
||||
if noise_w is None:
|
||||
noise_w = self.config.noise_w
|
||||
|
||||
phoneme_ids_array = np.expand_dims(np.array(phoneme_ids, dtype=np.int64), 0)
|
||||
phoneme_ids_lengths = np.array([phoneme_ids_array.shape[1]], dtype=np.int64)
|
||||
scales = np.array(
|
||||
[noise_scale, length_scale, noise_w],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
args = {
|
||||
"input": phoneme_ids_array,
|
||||
"input_lengths": phoneme_ids_lengths,
|
||||
"scales": scales
|
||||
}
|
||||
|
||||
if self.config.num_speakers <= 1:
|
||||
speaker_id = None
|
||||
|
||||
if (self.config.num_speakers > 1) and (speaker_id is None):
|
||||
# Default speaker
|
||||
speaker_id = 0
|
||||
|
||||
if speaker_id is not None:
|
||||
sid = np.array([speaker_id], dtype=np.int64)
|
||||
args["sid"] = sid
|
||||
|
||||
# Synthesize through Onnx
|
||||
audio = self.session.run(None, args, )[0].squeeze((0, 1))
|
||||
audio = audio_float_to_int16(audio.squeeze())
|
||||
return audio.tobytes()
|
||||
4222
mr_v100-piper/piper/src/python_run/piper/voices.json
Normal file
4222
mr_v100-piper/piper/src/python_run/piper/voices.json
Normal file
File diff suppressed because it is too large
Load Diff
0
mr_v100-piper/piper/src/python_run/py.typed
Normal file
0
mr_v100-piper/piper/src/python_run/py.typed
Normal file
37
mr_v100-piper/piper/src/python_run/pylintrc
Normal file
37
mr_v100-piper/piper/src/python_run/pylintrc
Normal file
@@ -0,0 +1,37 @@
|
||||
[MESSAGES CONTROL]
|
||||
disable=
|
||||
format,
|
||||
abstract-method,
|
||||
cyclic-import,
|
||||
duplicate-code,
|
||||
global-statement,
|
||||
import-outside-toplevel,
|
||||
inconsistent-return-statements,
|
||||
locally-disabled,
|
||||
not-context-manager,
|
||||
too-few-public-methods,
|
||||
too-many-arguments,
|
||||
too-many-branches,
|
||||
too-many-instance-attributes,
|
||||
too-many-lines,
|
||||
too-many-locals,
|
||||
too-many-public-methods,
|
||||
too-many-return-statements,
|
||||
too-many-statements,
|
||||
too-many-boolean-expressions,
|
||||
unnecessary-pass,
|
||||
unused-argument,
|
||||
broad-except,
|
||||
too-many-nested-blocks,
|
||||
invalid-name,
|
||||
unused-import,
|
||||
fixme,
|
||||
useless-super-delegation,
|
||||
missing-module-docstring,
|
||||
missing-class-docstring,
|
||||
missing-function-docstring,
|
||||
import-error,
|
||||
relative-beyond-top-level
|
||||
|
||||
[FORMAT]
|
||||
expected-line-ending-format=LF
|
||||
2
mr_v100-piper/piper/src/python_run/requirements.txt
Normal file
2
mr_v100-piper/piper/src/python_run/requirements.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
piper-phonemize~=1.1.0
|
||||
onnxruntime>=1.11.0,<2
|
||||
5
mr_v100-piper/piper/src/python_run/requirements_dev.txt
Normal file
5
mr_v100-piper/piper/src/python_run/requirements_dev.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
black==22.12.0
|
||||
flake8==6.0.0
|
||||
isort==5.11.3
|
||||
mypy==0.991
|
||||
pylint==2.15.9
|
||||
1
mr_v100-piper/piper/src/python_run/requirements_gpu.txt
Normal file
1
mr_v100-piper/piper/src/python_run/requirements_gpu.txt
Normal file
@@ -0,0 +1 @@
|
||||
onnxruntime-gpu>=1.11.0,<2
|
||||
1
mr_v100-piper/piper/src/python_run/requirements_http.txt
Normal file
1
mr_v100-piper/piper/src/python_run/requirements_http.txt
Normal file
@@ -0,0 +1 @@
|
||||
flask>=3,<4
|
||||
13
mr_v100-piper/piper/src/python_run/script/format
Executable file
13
mr_v100-piper/piper/src/python_run/script/format
Executable file
@@ -0,0 +1,13 @@
|
||||
#!/usr/bin/env python3
|
||||
import subprocess
|
||||
import venv
|
||||
from pathlib import Path
|
||||
|
||||
_DIR = Path(__file__).parent
|
||||
_PROGRAM_DIR = _DIR.parent
|
||||
_VENV_DIR = _PROGRAM_DIR / ".venv"
|
||||
_MODULE_DIR = _PROGRAM_DIR / "piper"
|
||||
|
||||
context = venv.EnvBuilder().ensure_directories(_VENV_DIR)
|
||||
subprocess.check_call([context.env_exe, "-m", "black", str(_MODULE_DIR)])
|
||||
subprocess.check_call([context.env_exe, "-m", "isort", str(_MODULE_DIR)])
|
||||
16
mr_v100-piper/piper/src/python_run/script/lint
Executable file
16
mr_v100-piper/piper/src/python_run/script/lint
Executable file
@@ -0,0 +1,16 @@
|
||||
#!/usr/bin/env python3
|
||||
import subprocess
|
||||
import venv
|
||||
from pathlib import Path
|
||||
|
||||
_DIR = Path(__file__).parent
|
||||
_PROGRAM_DIR = _DIR.parent
|
||||
_VENV_DIR = _PROGRAM_DIR / ".venv"
|
||||
_MODULE_DIR = _PROGRAM_DIR / "piper"
|
||||
|
||||
context = venv.EnvBuilder().ensure_directories(_VENV_DIR)
|
||||
subprocess.check_call([context.env_exe, "-m", "black", str(_MODULE_DIR), "--check"])
|
||||
subprocess.check_call([context.env_exe, "-m", "isort", str(_MODULE_DIR), "--check"])
|
||||
subprocess.check_call([context.env_exe, "-m", "flake8", str(_MODULE_DIR)])
|
||||
subprocess.check_call([context.env_exe, "-m", "pylint", str(_MODULE_DIR)])
|
||||
subprocess.check_call([context.env_exe, "-m", "mypy", str(_MODULE_DIR)])
|
||||
12
mr_v100-piper/piper/src/python_run/script/piper
Executable file
12
mr_v100-piper/piper/src/python_run/script/piper
Executable file
@@ -0,0 +1,12 @@
|
||||
#!/usr/bin/env python3
|
||||
import sys
|
||||
import subprocess
|
||||
import venv
|
||||
from pathlib import Path
|
||||
|
||||
_DIR = Path(__file__).parent
|
||||
_PROGRAM_DIR = _DIR.parent
|
||||
_VENV_DIR = _PROGRAM_DIR / ".venv"
|
||||
|
||||
context = venv.EnvBuilder().ensure_directories(_VENV_DIR)
|
||||
subprocess.check_call([context.env_exe, "-m", "piper"] + sys.argv[1:])
|
||||
31
mr_v100-piper/piper/src/python_run/script/setup
Executable file
31
mr_v100-piper/piper/src/python_run/script/setup
Executable file
@@ -0,0 +1,31 @@
|
||||
#!/usr/bin/env python3
|
||||
import subprocess
|
||||
import venv
|
||||
from pathlib import Path
|
||||
|
||||
_DIR = Path(__file__).parent
|
||||
_PROGRAM_DIR = _DIR.parent
|
||||
_VENV_DIR = _PROGRAM_DIR / ".venv"
|
||||
|
||||
|
||||
# Create virtual environment
|
||||
builder = venv.EnvBuilder(with_pip=True)
|
||||
context = builder.ensure_directories(_VENV_DIR)
|
||||
builder.create(_VENV_DIR)
|
||||
|
||||
# Upgrade dependencies
|
||||
pip = [context.env_exe, "-m", "pip"]
|
||||
subprocess.check_call(pip + ["install", "--upgrade", "pip"])
|
||||
subprocess.check_call(pip + ["install", "--upgrade", "setuptools", "wheel"])
|
||||
|
||||
# Install requirements
|
||||
subprocess.check_call(
|
||||
pip
|
||||
+ [
|
||||
"install",
|
||||
"-f",
|
||||
"https://synesthesiam.github.io/prebuilt-apps/",
|
||||
"-r",
|
||||
str(_PROGRAM_DIR / "requirements.txt"),
|
||||
]
|
||||
)
|
||||
22
mr_v100-piper/piper/src/python_run/setup.cfg
Normal file
22
mr_v100-piper/piper/src/python_run/setup.cfg
Normal file
@@ -0,0 +1,22 @@
|
||||
[flake8]
|
||||
# To work with Black
|
||||
max-line-length = 88
|
||||
# E501: line too long
|
||||
# W503: Line break occurred before a binary operator
|
||||
# E203: Whitespace before ':'
|
||||
# D202 No blank lines allowed after function docstring
|
||||
# W504 line break after binary operator
|
||||
ignore =
|
||||
E501,
|
||||
W503,
|
||||
E203,
|
||||
D202,
|
||||
W504
|
||||
|
||||
[isort]
|
||||
multi_line_output = 3
|
||||
include_trailing_comma=True
|
||||
force_grid_wrap=0
|
||||
use_parentheses=True
|
||||
line_length=88
|
||||
indent = " "
|
||||
48
mr_v100-piper/piper/src/python_run/setup.py
Normal file
48
mr_v100-piper/piper/src/python_run/setup.py
Normal file
@@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env python3
|
||||
from pathlib import Path
|
||||
|
||||
import setuptools
|
||||
from setuptools import setup
|
||||
|
||||
this_dir = Path(__file__).parent
|
||||
module_dir = this_dir / "piper"
|
||||
|
||||
requirements = []
|
||||
requirements_path = this_dir / "requirements.txt"
|
||||
if requirements_path.is_file():
|
||||
with open(requirements_path, "r", encoding="utf-8") as requirements_file:
|
||||
requirements = requirements_file.read().splitlines()
|
||||
|
||||
data_files = [module_dir / "voices.json"]
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
setup(
|
||||
name="piper-tts",
|
||||
version="1.2.0",
|
||||
description="A fast, local neural text to speech system that sounds great and is optimized for the Raspberry Pi 4.",
|
||||
url="http://github.com/rhasspy/piper",
|
||||
author="Michael Hansen",
|
||||
author_email="mike@rhasspy.org",
|
||||
license="MIT",
|
||||
packages=setuptools.find_packages(),
|
||||
package_data={"piper": [str(p.relative_to(module_dir)) for p in data_files]},
|
||||
entry_points={
|
||||
"console_scripts": [
|
||||
"piper = piper.__main__:main",
|
||||
]
|
||||
},
|
||||
install_requires=requirements,
|
||||
extras_require={"gpu": ["onnxruntime-gpu>=1.11.0,<2"], "http": ["flask>=3,<4"]},
|
||||
classifiers=[
|
||||
"Development Status :: 3 - Alpha",
|
||||
"Intended Audience :: Developers",
|
||||
"Topic :: Text Processing :: Linguistic",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Programming Language :: Python :: 3.7",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
],
|
||||
keywords="rhasspy piper tts",
|
||||
)
|
||||
Reference in New Issue
Block a user