Add integration with gemlite weight only quant (#2528)

This commit is contained in:
Jerry Zhang
2024-12-20 08:25:25 -08:00
committed by GitHub
parent d95a5f5bf5
commit feb2b768ba
4 changed files with 61 additions and 1 deletions

View File

@@ -21,7 +21,7 @@ runtime_common = ["aiohttp", "decord", "fastapi",
"orjson", "outlines>=0.0.44,<0.1.0",
"packaging", "pillow", "prometheus-client>=0.20.0",
"psutil", "pydantic", "python-multipart",
"pyzmq>=25.1.2", "torchao>=0.7.0", "uvicorn", "uvloop",
"pyzmq>=25.1.2", "torchao>=0.7.0", "gemlite", "uvicorn", "uvloop",
"xgrammar>=0.1.6"]
srt = ["sglang[runtime_common]", "torch", "vllm>=0.6.3.post1,<=0.6.4.post1", "cuda-python", "flashinfer==0.1.6"]