This commit is contained in:
2025-09-10 10:56:53 +08:00
commit 1df95ad2f6
606 changed files with 590904 additions and 0 deletions

View File

@@ -0,0 +1,3 @@
build/
dist/
*.egg-info/

View File

@@ -0,0 +1,6 @@
[settings]
multi_line_output=3
include_trailing_comma=True
force_grid_wrap=0
use_parentheses=True
line_length=88

View File

@@ -0,0 +1,2 @@
include requirements.txt
include piper/voices.json

View File

@@ -0,0 +1,27 @@
# Piper HTTP Server
Install the requirements into your virtual environment:
```sh
.venv/bin/pip3 install -r requirements_http.txt
```
Run the web server:
```sh
.venv/bin/python3 -m piper.http_server --model ...
```
See `--help` for more options.
Using a `GET` request:
```sh
curl -G --data-urlencode 'text=This is a test.' -o test.wav 'localhost:5000'
```
Using a `POST` request:
```sh
curl -X POST -H 'Content-Type: text/plain' --data 'This is a test.' -o test.wav 'localhost:5000'
```

View File

@@ -0,0 +1,7 @@
[mypy]
[mypy-onnxruntime.*]
ignore_missing_imports = True
[mypy-piper_phonemize.*]
ignore_missing_imports = True

View File

@@ -0,0 +1,5 @@
from .voice import PiperVoice
__all__ = [
"PiperVoice",
]

View File

@@ -0,0 +1,159 @@
import argparse
import logging
import sys
import time
import wave
from pathlib import Path
from typing import Any, Dict
from . import PiperVoice
from .download import ensure_voice_exists, find_voice, get_voices
_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("-m", "--model", required=True, help="Path to Onnx model file")
parser.add_argument("-c", "--config", help="Path to model config file")
parser.add_argument(
"-f",
"--output-file",
"--output_file",
help="Path to output WAV file (default: stdout)",
)
parser.add_argument(
"-d",
"--output-dir",
"--output_dir",
help="Path to output directory (default: cwd)",
)
parser.add_argument(
"--output-raw",
"--output_raw",
action="store_true",
help="Stream raw audio to stdout",
)
#
parser.add_argument("-s", "--speaker", type=int, help="Id of speaker (default: 0)")
parser.add_argument(
"--length-scale", "--length_scale", type=float, help="Phoneme length"
)
parser.add_argument(
"--noise-scale", "--noise_scale", type=float, help="Generator noise"
)
parser.add_argument(
"--noise-w", "--noise_w", type=float, help="Phoneme width noise"
)
#
parser.add_argument("--cuda", action="store_true", help="Use GPU")
#
parser.add_argument(
"--sentence-silence",
"--sentence_silence",
type=float,
default=0.0,
help="Seconds of silence after each sentence",
)
#
parser.add_argument(
"--data-dir",
"--data_dir",
action="append",
default=[str(Path.cwd())],
help="Data directory to check for downloaded models (default: current directory)",
)
parser.add_argument(
"--download-dir",
"--download_dir",
help="Directory to download voices into (default: first data dir)",
)
#
parser.add_argument(
"--update-voices",
action="store_true",
help="Download latest voices.json during startup",
)
#
parser.add_argument(
"--debug", action="store_true", help="Print DEBUG messages to console"
)
args = parser.parse_args()
logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
_LOGGER.debug(args)
if not args.download_dir:
# Download to first data directory by default
args.download_dir = args.data_dir[0]
# Download voice if file doesn't exist
model_path = Path(args.model)
if not model_path.exists():
# Load voice info
voices_info = get_voices(args.download_dir, update_voices=args.update_voices)
# Resolve aliases for backwards compatibility with old voice names
aliases_info: Dict[str, Any] = {}
for voice_info in voices_info.values():
for voice_alias in voice_info.get("aliases", []):
aliases_info[voice_alias] = {"_is_alias": True, **voice_info}
voices_info.update(aliases_info)
ensure_voice_exists(args.model, args.data_dir, args.download_dir, voices_info)
args.model, args.config = find_voice(args.model, args.data_dir)
# Load voice
voice = PiperVoice.load(args.model, config_path=args.config, use_cuda=args.cuda)
synthesize_args = {
"speaker_id": args.speaker,
"length_scale": args.length_scale,
"noise_scale": args.noise_scale,
"noise_w": args.noise_w,
"sentence_silence": args.sentence_silence,
}
if args.output_raw:
# Read line-by-line
for line in sys.stdin:
line = line.strip()
if not line:
continue
# Write raw audio to stdout as its produced
audio_stream = voice.synthesize_stream_raw(line, **synthesize_args)
for audio_bytes in audio_stream:
sys.stdout.buffer.write(audio_bytes)
sys.stdout.buffer.flush()
elif args.output_dir:
output_dir = Path(args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
# Read line-by-line
for line in sys.stdin:
line = line.strip()
if not line:
continue
wav_path = output_dir / f"{time.monotonic_ns()}.wav"
with wave.open(str(wav_path), "wb") as wav_file:
voice.synthesize(line, wav_file, **synthesize_args)
_LOGGER.info("Wrote %s", wav_path)
else:
# Read entire input
text = sys.stdin.read()
if (not args.output_file) or (args.output_file == "-"):
# Write to stdout
with wave.open(sys.stdout.buffer, "wb") as wav_file:
voice.synthesize(text, wav_file, **synthesize_args)
else:
# Write to file
with wave.open(args.output_file, "wb") as wav_file:
voice.synthesize(text, wav_file, **synthesize_args)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,53 @@
"""Piper configuration"""
from dataclasses import dataclass
from enum import Enum
from typing import Any, Dict, Mapping, Sequence
class PhonemeType(str, Enum):
ESPEAK = "espeak"
TEXT = "text"
@dataclass
class PiperConfig:
"""Piper configuration"""
num_symbols: int
"""Number of phonemes"""
num_speakers: int
"""Number of speakers"""
sample_rate: int
"""Sample rate of output audio"""
espeak_voice: str
"""Name of espeak-ng voice or alphabet"""
length_scale: float
noise_scale: float
noise_w: float
phoneme_id_map: Mapping[str, Sequence[int]]
"""Phoneme -> [id,]"""
phoneme_type: PhonemeType
"""espeak or text"""
@staticmethod
def from_dict(config: Dict[str, Any]) -> "PiperConfig":
inference = config.get("inference", {})
return PiperConfig(
num_symbols=config["num_symbols"],
num_speakers=config["num_speakers"],
sample_rate=config["audio"]["sample_rate"],
noise_scale=inference.get("noise_scale", 0.667),
length_scale=inference.get("length_scale", 1.0),
noise_w=inference.get("noise_w", 0.8),
#
espeak_voice=config["espeak"]["voice"],
phoneme_id_map=config["phoneme_id_map"],
phoneme_type=PhonemeType(config.get("phoneme_type", PhonemeType.ESPEAK)),
)

View File

@@ -0,0 +1,5 @@
"""Constants"""
PAD = "_" # padding (0)
BOS = "^" # beginning of sentence
EOS = "$" # end of sentence

View File

@@ -0,0 +1,139 @@
"""Utility for downloading Piper voices."""
import json
import logging
import shutil
from pathlib import Path
from typing import Any, Dict, Iterable, Set, Tuple, Union
from urllib.request import urlopen
from .file_hash import get_file_hash
URL_FORMAT = "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/{file}"
_DIR = Path(__file__).parent
_LOGGER = logging.getLogger(__name__)
_SKIP_FILES = {"MODEL_CARD"}
class VoiceNotFoundError(Exception):
pass
def get_voices(
download_dir: Union[str, Path], update_voices: bool = False
) -> Dict[str, Any]:
"""Loads available voices from downloaded or embedded JSON file."""
download_dir = Path(download_dir)
voices_download = download_dir / "voices.json"
if update_voices:
# Download latest voices.json
voices_url = URL_FORMAT.format(file="voices.json")
_LOGGER.debug("Downloading %s to %s", voices_url, voices_download)
with urlopen(voices_url) as response, open(
voices_download, "wb"
) as download_file:
shutil.copyfileobj(response, download_file)
# Prefer downloaded file to embedded
voices_embedded = _DIR / "voices.json"
voices_path = voices_download if voices_download.exists() else voices_embedded
_LOGGER.debug("Loading %s", voices_path)
with open(voices_path, "r", encoding="utf-8") as voices_file:
return json.load(voices_file)
def ensure_voice_exists(
name: str,
data_dirs: Iterable[Union[str, Path]],
download_dir: Union[str, Path],
voices_info: Dict[str, Any],
):
assert data_dirs, "No data dirs"
if name not in voices_info:
raise VoiceNotFoundError(name)
voice_info = voices_info[name]
voice_files = voice_info["files"]
files_to_download: Set[str] = set()
for data_dir in data_dirs:
data_dir = Path(data_dir)
# Check sizes/hashes
for file_path, file_info in voice_files.items():
if file_path in files_to_download:
# Already planning to download
continue
file_name = Path(file_path).name
if file_name in _SKIP_FILES:
continue
data_file_path = data_dir / file_name
_LOGGER.debug("Checking %s", data_file_path)
if not data_file_path.exists():
_LOGGER.debug("Missing %s", data_file_path)
files_to_download.add(file_path)
continue
expected_size = file_info["size_bytes"]
actual_size = data_file_path.stat().st_size
if expected_size != actual_size:
_LOGGER.warning(
"Wrong size (expected=%s, actual=%s) for %s",
expected_size,
actual_size,
data_file_path,
)
files_to_download.add(file_path)
continue
expected_hash = file_info["md5_digest"]
actual_hash = get_file_hash(data_file_path)
if expected_hash != actual_hash:
_LOGGER.warning(
"Wrong hash (expected=%s, actual=%s) for %s",
expected_hash,
actual_hash,
data_file_path,
)
files_to_download.add(file_path)
continue
if (not voice_files) and (not files_to_download):
raise ValueError(f"Unable to find or download voice: {name}")
# Download missing files
download_dir = Path(download_dir)
for file_path in files_to_download:
file_name = Path(file_path).name
if file_name in _SKIP_FILES:
continue
file_url = URL_FORMAT.format(file=file_path)
download_file_path = download_dir / file_name
download_file_path.parent.mkdir(parents=True, exist_ok=True)
_LOGGER.debug("Downloading %s to %s", file_url, download_file_path)
with urlopen(file_url) as response, open(
download_file_path, "wb"
) as download_file:
shutil.copyfileobj(response, download_file)
_LOGGER.info("Downloaded %s (%s)", download_file_path, file_url)
def find_voice(name: str, data_dirs: Iterable[Union[str, Path]]) -> Tuple[Path, Path]:
for data_dir in data_dirs:
data_dir = Path(data_dir)
onnx_path = data_dir / f"{name}.onnx"
config_path = data_dir / f"{name}.onnx.json"
if onnx_path.exists() and config_path.exists():
return onnx_path, config_path
raise ValueError(f"Missing files for voice {name}")

View File

@@ -0,0 +1,46 @@
import argparse
import hashlib
import json
import sys
from pathlib import Path
from typing import Union
def get_file_hash(path: Union[str, Path], bytes_per_chunk: int = 8192) -> str:
"""Hash a file in chunks using md5."""
path_hash = hashlib.md5()
with open(path, "rb") as path_file:
chunk = path_file.read(bytes_per_chunk)
while chunk:
path_hash.update(chunk)
chunk = path_file.read(bytes_per_chunk)
return path_hash.hexdigest()
# -----------------------------------------------------------------------------
def main():
parser = argparse.ArgumentParser()
parser.add_argument("file", nargs="+")
parser.add_argument("--dir", help="Parent directory")
args = parser.parse_args()
if args.dir:
args.dir = Path(args.dir)
hashes = {}
for path_str in args.file:
path = Path(path_str)
path_hash = get_file_hash(path)
if args.dir:
path = path.relative_to(args.dir)
hashes[str(path)] = path_hash
json.dump(hashes, sys.stdout)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,127 @@
#!/usr/bin/env python3
import argparse
import io
import logging
import wave
from pathlib import Path
from typing import Any, Dict
from flask import Flask, request
from . import PiperVoice
from .download import ensure_voice_exists, find_voice, get_voices
_LOGGER = logging.getLogger()
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--host", default="0.0.0.0", help="HTTP server host")
parser.add_argument("--port", type=int, default=5000, help="HTTP server port")
#
parser.add_argument("-m", "--model", required=True, help="Path to Onnx model file")
parser.add_argument("-c", "--config", help="Path to model config file")
#
parser.add_argument("-s", "--speaker", type=int, help="Id of speaker (default: 0)")
parser.add_argument(
"--length-scale", "--length_scale", type=float, help="Phoneme length"
)
parser.add_argument(
"--noise-scale", "--noise_scale", type=float, help="Generator noise"
)
parser.add_argument(
"--noise-w", "--noise_w", type=float, help="Phoneme width noise"
)
#
parser.add_argument("--cuda", action="store_true", help="Use GPU")
#
parser.add_argument(
"--sentence-silence",
"--sentence_silence",
type=float,
default=0.0,
help="Seconds of silence after each sentence",
)
#
parser.add_argument(
"--data-dir",
"--data_dir",
action="append",
default=[str(Path.cwd())],
help="Data directory to check for downloaded models (default: current directory)",
)
parser.add_argument(
"--download-dir",
"--download_dir",
help="Directory to download voices into (default: first data dir)",
)
#
parser.add_argument(
"--update-voices",
action="store_true",
help="Download latest voices.json during startup",
)
#
parser.add_argument(
"--debug", action="store_true", help="Print DEBUG messages to console"
)
args = parser.parse_args()
logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
_LOGGER.debug(args)
if not args.download_dir:
# Download to first data directory by default
args.download_dir = args.data_dir[0]
# Download voice if file doesn't exist
model_path = Path(args.model)
if not model_path.exists():
# Load voice info
voices_info = get_voices(args.download_dir, update_voices=args.update_voices)
# Resolve aliases for backwards compatibility with old voice names
aliases_info: Dict[str, Any] = {}
for voice_info in voices_info.values():
for voice_alias in voice_info.get("aliases", []):
aliases_info[voice_alias] = {"_is_alias": True, **voice_info}
voices_info.update(aliases_info)
ensure_voice_exists(args.model, args.data_dir, args.download_dir, voices_info)
args.model, args.config = find_voice(args.model, args.data_dir)
# Load voice
voice = PiperVoice.load(args.model, config_path=args.config, use_cuda=args.cuda)
synthesize_args = {
"speaker_id": args.speaker,
"length_scale": args.length_scale,
"noise_scale": args.noise_scale,
"noise_w": args.noise_w,
"sentence_silence": args.sentence_silence,
}
# Create web server
app = Flask(__name__)
@app.route("/", methods=["GET", "POST"])
def app_synthesize() -> bytes:
if request.method == "POST":
text = request.data.decode("utf-8")
else:
text = request.args.get("text", "")
text = text.strip()
if not text:
raise ValueError("No text provided")
_LOGGER.debug("Synthesizing text: %s", text)
with io.BytesIO() as wav_io:
with wave.open(wav_io, "wb") as wav_file:
voice.synthesize(text, wav_file, **synthesize_args)
return wav_io.getvalue()
app.run(host=args.host, port=args.port)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,12 @@
"""Utilities"""
import numpy as np
def audio_float_to_int16(
audio: np.ndarray, max_wav_value: float = 32767.0
) -> np.ndarray:
"""Normalize audio and convert to int16 range"""
audio_norm = audio * (max_wav_value / max(0.01, np.max(np.abs(audio))))
audio_norm = np.clip(audio_norm, -max_wav_value, max_wav_value)
audio_norm = audio_norm.astype("int16")
return audio_norm

View File

@@ -0,0 +1,185 @@
import json
import logging
import wave
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
import numpy as np
import onnxruntime
from piper_phonemize import phonemize_codepoints, phonemize_espeak, tashkeel_run
from .config import PhonemeType, PiperConfig
from .const import BOS, EOS, PAD
from .util import audio_float_to_int16
_LOGGER = logging.getLogger(__name__)
@dataclass
class PiperVoice:
session: onnxruntime.InferenceSession
config: PiperConfig
@staticmethod
def load(
model_path: Union[str, Path],
config_path: Optional[Union[str, Path]] = None,
use_cuda: bool = False,
) -> "PiperVoice":
"""Load an ONNX model and config."""
if config_path is None:
config_path = f"{model_path}.json"
with open(config_path, "r", encoding="utf-8") as config_file:
config_dict = json.load(config_file)
providers: List[Union[str, Tuple[str, Dict[str, Any]]]]
if use_cuda:
providers = [
(
"CUDAExecutionProvider",
{"cudnn_conv_algo_search": "HEURISTIC"},
)
]
else:
providers = ["CPUExecutionProvider"]
return PiperVoice(
config=PiperConfig.from_dict(config_dict),
session=onnxruntime.InferenceSession(
str(model_path),
sess_options=onnxruntime.SessionOptions(),
providers=providers,
),
)
def phonemize(self, text: str) -> List[List[str]]:
"""Text to phonemes grouped by sentence."""
if self.config.phoneme_type == PhonemeType.ESPEAK:
if self.config.espeak_voice == "ar":
# Arabic diacritization
# https://github.com/mush42/libtashkeel/
text = tashkeel_run(text)
return phonemize_espeak(text, self.config.espeak_voice)
if self.config.phoneme_type == PhonemeType.TEXT:
return phonemize_codepoints(text)
raise ValueError(f"Unexpected phoneme type: {self.config.phoneme_type}")
def phonemes_to_ids(self, phonemes: List[str]) -> List[int]:
"""Phonemes to ids."""
id_map = self.config.phoneme_id_map
ids: List[int] = list(id_map[BOS])
for phoneme in phonemes:
if phoneme not in id_map:
_LOGGER.warning("Missing phoneme from id map: %s", phoneme)
continue
ids.extend(id_map[phoneme])
ids.extend(id_map[PAD])
ids.extend(id_map[EOS])
return ids
def synthesize(
self,
text: str,
wav_file: wave.Wave_write,
speaker_id: Optional[int] = None,
length_scale: Optional[float] = None,
noise_scale: Optional[float] = None,
noise_w: Optional[float] = None,
sentence_silence: float = 0.0,
):
"""Synthesize WAV audio from text."""
wav_file.setframerate(self.config.sample_rate)
wav_file.setsampwidth(2) # 16-bit
wav_file.setnchannels(1) # mono
for audio_bytes in self.synthesize_stream_raw(
text,
speaker_id=speaker_id,
length_scale=length_scale,
noise_scale=noise_scale,
noise_w=noise_w,
sentence_silence=sentence_silence,
):
wav_file.writeframes(audio_bytes)
def synthesize_stream_raw(
self,
text: str,
speaker_id: Optional[int] = None,
length_scale: Optional[float] = None,
noise_scale: Optional[float] = None,
noise_w: Optional[float] = None,
sentence_silence: float = 0.0,
) -> Iterable[bytes]:
"""Synthesize raw audio per sentence from text."""
sentence_phonemes = self.phonemize(text)
# 16-bit mono
num_silence_samples = int(sentence_silence * self.config.sample_rate)
silence_bytes = bytes(num_silence_samples * 2)
for phonemes in sentence_phonemes:
phoneme_ids = self.phonemes_to_ids(phonemes)
yield self.synthesize_ids_to_raw(
phoneme_ids,
speaker_id=speaker_id,
length_scale=length_scale,
noise_scale=noise_scale,
noise_w=noise_w,
) + silence_bytes
def synthesize_ids_to_raw(
self,
phoneme_ids: List[int],
speaker_id: Optional[int] = None,
length_scale: Optional[float] = None,
noise_scale: Optional[float] = None,
noise_w: Optional[float] = None,
) -> bytes:
"""Synthesize raw audio from phoneme ids."""
if length_scale is None:
length_scale = self.config.length_scale
if noise_scale is None:
noise_scale = self.config.noise_scale
if noise_w is None:
noise_w = self.config.noise_w
phoneme_ids_array = np.expand_dims(np.array(phoneme_ids, dtype=np.int64), 0)
phoneme_ids_lengths = np.array([phoneme_ids_array.shape[1]], dtype=np.int64)
scales = np.array(
[noise_scale, length_scale, noise_w],
dtype=np.float32,
)
args = {
"input": phoneme_ids_array,
"input_lengths": phoneme_ids_lengths,
"scales": scales
}
if self.config.num_speakers <= 1:
speaker_id = None
if (self.config.num_speakers > 1) and (speaker_id is None):
# Default speaker
speaker_id = 0
if speaker_id is not None:
sid = np.array([speaker_id], dtype=np.int64)
args["sid"] = sid
# Synthesize through Onnx
audio = self.session.run(None, args, )[0].squeeze((0, 1))
audio = audio_float_to_int16(audio.squeeze())
return audio.tobytes()

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,37 @@
[MESSAGES CONTROL]
disable=
format,
abstract-method,
cyclic-import,
duplicate-code,
global-statement,
import-outside-toplevel,
inconsistent-return-statements,
locally-disabled,
not-context-manager,
too-few-public-methods,
too-many-arguments,
too-many-branches,
too-many-instance-attributes,
too-many-lines,
too-many-locals,
too-many-public-methods,
too-many-return-statements,
too-many-statements,
too-many-boolean-expressions,
unnecessary-pass,
unused-argument,
broad-except,
too-many-nested-blocks,
invalid-name,
unused-import,
fixme,
useless-super-delegation,
missing-module-docstring,
missing-class-docstring,
missing-function-docstring,
import-error,
relative-beyond-top-level
[FORMAT]
expected-line-ending-format=LF

View File

@@ -0,0 +1,2 @@
piper-phonemize~=1.1.0
onnxruntime>=1.11.0,<2

View File

@@ -0,0 +1,5 @@
black==22.12.0
flake8==6.0.0
isort==5.11.3
mypy==0.991
pylint==2.15.9

View File

@@ -0,0 +1 @@
onnxruntime-gpu>=1.11.0,<2

View File

@@ -0,0 +1 @@
flask>=3,<4

View File

@@ -0,0 +1,13 @@
#!/usr/bin/env python3
import subprocess
import venv
from pathlib import Path
_DIR = Path(__file__).parent
_PROGRAM_DIR = _DIR.parent
_VENV_DIR = _PROGRAM_DIR / ".venv"
_MODULE_DIR = _PROGRAM_DIR / "piper"
context = venv.EnvBuilder().ensure_directories(_VENV_DIR)
subprocess.check_call([context.env_exe, "-m", "black", str(_MODULE_DIR)])
subprocess.check_call([context.env_exe, "-m", "isort", str(_MODULE_DIR)])

View File

@@ -0,0 +1,16 @@
#!/usr/bin/env python3
import subprocess
import venv
from pathlib import Path
_DIR = Path(__file__).parent
_PROGRAM_DIR = _DIR.parent
_VENV_DIR = _PROGRAM_DIR / ".venv"
_MODULE_DIR = _PROGRAM_DIR / "piper"
context = venv.EnvBuilder().ensure_directories(_VENV_DIR)
subprocess.check_call([context.env_exe, "-m", "black", str(_MODULE_DIR), "--check"])
subprocess.check_call([context.env_exe, "-m", "isort", str(_MODULE_DIR), "--check"])
subprocess.check_call([context.env_exe, "-m", "flake8", str(_MODULE_DIR)])
subprocess.check_call([context.env_exe, "-m", "pylint", str(_MODULE_DIR)])
subprocess.check_call([context.env_exe, "-m", "mypy", str(_MODULE_DIR)])

View File

@@ -0,0 +1,12 @@
#!/usr/bin/env python3
import sys
import subprocess
import venv
from pathlib import Path
_DIR = Path(__file__).parent
_PROGRAM_DIR = _DIR.parent
_VENV_DIR = _PROGRAM_DIR / ".venv"
context = venv.EnvBuilder().ensure_directories(_VENV_DIR)
subprocess.check_call([context.env_exe, "-m", "piper"] + sys.argv[1:])

View File

@@ -0,0 +1,31 @@
#!/usr/bin/env python3
import subprocess
import venv
from pathlib import Path
_DIR = Path(__file__).parent
_PROGRAM_DIR = _DIR.parent
_VENV_DIR = _PROGRAM_DIR / ".venv"
# Create virtual environment
builder = venv.EnvBuilder(with_pip=True)
context = builder.ensure_directories(_VENV_DIR)
builder.create(_VENV_DIR)
# Upgrade dependencies
pip = [context.env_exe, "-m", "pip"]
subprocess.check_call(pip + ["install", "--upgrade", "pip"])
subprocess.check_call(pip + ["install", "--upgrade", "setuptools", "wheel"])
# Install requirements
subprocess.check_call(
pip
+ [
"install",
"-f",
"https://synesthesiam.github.io/prebuilt-apps/",
"-r",
str(_PROGRAM_DIR / "requirements.txt"),
]
)

View File

@@ -0,0 +1,22 @@
[flake8]
# To work with Black
max-line-length = 88
# E501: line too long
# W503: Line break occurred before a binary operator
# E203: Whitespace before ':'
# D202 No blank lines allowed after function docstring
# W504 line break after binary operator
ignore =
E501,
W503,
E203,
D202,
W504
[isort]
multi_line_output = 3
include_trailing_comma=True
force_grid_wrap=0
use_parentheses=True
line_length=88
indent = " "

View File

@@ -0,0 +1,48 @@
#!/usr/bin/env python3
from pathlib import Path
import setuptools
from setuptools import setup
this_dir = Path(__file__).parent
module_dir = this_dir / "piper"
requirements = []
requirements_path = this_dir / "requirements.txt"
if requirements_path.is_file():
with open(requirements_path, "r", encoding="utf-8") as requirements_file:
requirements = requirements_file.read().splitlines()
data_files = [module_dir / "voices.json"]
# -----------------------------------------------------------------------------
setup(
name="piper-tts",
version="1.2.0",
description="A fast, local neural text to speech system that sounds great and is optimized for the Raspberry Pi 4.",
url="http://github.com/rhasspy/piper",
author="Michael Hansen",
author_email="mike@rhasspy.org",
license="MIT",
packages=setuptools.find_packages(),
package_data={"piper": [str(p.relative_to(module_dir)) for p in data_files]},
entry_points={
"console_scripts": [
"piper = piper.__main__:main",
]
},
install_requires=requirements,
extras_require={"gpu": ["onnxruntime-gpu>=1.11.0,<2"], "http": ["flask>=3,<4"]},
classifiers=[
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"Topic :: Text Processing :: Linguistic",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
],
keywords="rhasspy piper tts",
)