[1/n] chore: decouple quantization implementation from vLLM dependency (#7992)

This commit is contained in:
Peng Zhang
2025-07-17 06:56:26 +08:00
committed by GitHub
parent 570d33437b
commit c28ad1990d
13 changed files with 1498 additions and 636 deletions

View File

@@ -2,10 +2,11 @@ import functools
from typing import Optional
import torch
from sgl_kernel.scalar_type import scalar_types
def get_scalar_type(num_bits: int, has_zp: bool):
from sglang.srt.layers.quantization.scalar_type import scalar_types
if has_zp:
assert num_bits == 4
return scalar_types.uint4