Support compressed tensors fp8w8a8 (#4743)

This commit is contained in:
Xiaoyu Zhang
2025-03-27 04:21:25 +08:00
committed by GitHub
parent 45fdf1f7f3
commit 04e3ff6975
30 changed files with 2386 additions and 113 deletions

View File

@@ -67,8 +67,8 @@ from sglang.srt.layers.vocab_parallel_embedding import (
ParallelLMHead,
VocabParallelEmbedding,
)
from sglang.srt.managers.expert_distribution import ExpertDistributionRecorder
from sglang.srt.managers.schedule_batch import global_server_args_dict
from sglang.srt.managers.utils import ExpertDistributionRecorder
from sglang.srt.model_executor.forward_batch_info import ForwardBatch, ForwardMode
from sglang.srt.model_loader.weight_utils import default_weight_loader
from sglang.srt.utils import add_prefix, is_cuda, is_cuda_available, is_hip