Support compressed tensors fp8w8a8 (#4743)

This commit is contained in:
Xiaoyu Zhang
2025-03-27 04:21:25 +08:00
committed by GitHub
parent 45fdf1f7f3
commit 04e3ff6975
30 changed files with 2386 additions and 113 deletions

View File

@@ -29,3 +29,5 @@ pip install cuda-python nvidia-cuda-nvrtc-cu12
pip install timm
pip install sgl-kernel==0.0.5.post3 --force-reinstall
pip uninstall vllm -y || true