33 lines
1.4 KiB
Python
33 lines
1.4 KiB
Python
import argparse
|
|
import uvicorn
|
|
from fastapi_sherpa import app
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--model_dir", type=str, default="/model", required=True, help="model directory")
|
|
parser.add_argument("--model_type", type=str, default=None, help="model type, e.g. sensevoice")
|
|
parser.add_argument("--use_gpu", action="store_true", default=True)
|
|
parser.add_argument("--warmup", action="store_true", help="whether do warmup when first initializing model")
|
|
parser.add_argument("--model_name", type=str, default=None, help="model's full name(optional) to determine model type")
|
|
parser.add_argument("--num_threads", type=int, default=2, help="number of threads with model inference")
|
|
parser.add_argument("--offline_model", action="store_true", help="indicating a non-streaming model when this flag is set")
|
|
parser.add_argument("--port", type=int, default=8000, help="service port")
|
|
|
|
args = parser.parse_args()
|
|
|
|
# 将参数加到 app.state 中
|
|
app.state.config = {
|
|
"model_dir": args.model_dir,
|
|
"model_type": args.model_type,
|
|
"model_name": args.model_name,
|
|
"num_threads": args.num_threads,
|
|
"offline_model": args.offline_model,
|
|
"use_gpu": args.use_gpu, # True
|
|
"warmup": args.warmup,
|
|
}
|
|
|
|
uvicorn.run("fastapi_sherpa:app",
|
|
host="0.0.0.0",
|
|
port=args.port,
|
|
workers=1
|
|
) |