Add torchao quant (int4/int8/fp8) to llama models (#1341)

Co-authored-by: Lianmin Zheng <lianminzheng@gmail.com>
This commit is contained in:
Jerry Zhang
2024-09-09 05:32:41 -07:00
committed by GitHub
parent e4d68afcf0
commit a7c47e0f02
10 changed files with 151 additions and 12 deletions

View File

@@ -22,7 +22,7 @@ dependencies = [
[project.optional-dependencies]
srt = ["aiohttp", "decord", "fastapi", "hf_transfer", "huggingface_hub", "interegular",
"packaging", "pillow", "psutil", "pydantic", "python-multipart",
"torch", "uvicorn", "uvloop", "zmq",
"torch", "torchao", "uvicorn", "uvloop", "zmq",
"vllm==0.5.5", "outlines>=0.0.44"]
openai = ["openai>=1.0", "tiktoken"]
anthropic = ["anthropic>=0.20.0"]