Support compressed tensors fp8w8a8 (#4743)

This commit is contained in:
Xiaoyu Zhang
2025-03-27 04:21:25 +08:00
committed by GitHub
parent 45fdf1f7f3
commit 04e3ff6975
30 changed files with 2386 additions and 113 deletions

View File

@@ -53,6 +53,7 @@ from sglang.srt.disaggregation.utils import (
from sglang.srt.hf_transformers_utils import get_processor, get_tokenizer
from sglang.srt.layers.dp_attention import compute_dp_attention_world_info
from sglang.srt.layers.logits_processor import LogitsProcessorOutput
from sglang.srt.managers.expert_distribution import ExpertDistributionRecorder
from sglang.srt.managers.io_struct import (
AbortReq,
CloseSessionReqInput,
@@ -106,7 +107,7 @@ from sglang.srt.managers.scheduler_output_processor_mixin import (
from sglang.srt.managers.session_controller import Session
from sglang.srt.managers.tp_worker import TpModelWorker
from sglang.srt.managers.tp_worker_overlap_thread import TpModelWorkerClient
from sglang.srt.managers.utils import ExpertDistributionRecorder, validate_input_length
from sglang.srt.managers.utils import validate_input_length
from sglang.srt.mem_cache.chunk_cache import ChunkCache
from sglang.srt.mem_cache.hiradix_cache import HiRadixCache
from sglang.srt.mem_cache.radix_cache import RadixCache