[1/n] chore: decouple quantization implementation from vLLM dependency (#7992)

This commit is contained in:
Peng Zhang
2025-07-17 06:56:26 +08:00
committed by GitHub
parent 570d33437b
commit c28ad1990d
13 changed files with 1498 additions and 636 deletions

View File

@@ -1,12 +1,10 @@
import math
import numpy as np
import pytest
import torch
from sgl_kernel import awq_marlin_repack
from sgl_kernel.scalar_type import scalar_types
from sglang.srt.layers.quantization.quant_utils import (
from sglang.srt.layers.quantization.scalar_type import scalar_types
from sglang.srt.layers.quantization.utils import (
get_pack_factor,
pack_cols,
quantize_weights,