Integration of TurboMind AWQ (#2828)

Co-authored-by: root <bjmsong@126.com>
This commit is contained in:
bjmsong
2025-01-13 20:14:16 +08:00
committed by GitHub
parent 51ab3ccf47
commit 17de02f98d
8 changed files with 411 additions and 2 deletions

View File

@@ -28,7 +28,7 @@ runtime_common = [
srt = [
"sglang[runtime_common]", "cuda-python",
"sgl-kernel>=0.0.2.post11", "torch", "vllm>=0.6.3.post1,<=0.6.4.post1",
"flashinfer==0.1.6"
"flashinfer==0.1.6", "turbomind"
]
# HIP (Heterogeneous-computing Interface for Portability) for AMD