Improve linear.py to load sharded weights & remove the dependency of Parameters from vllm (#2784)

Co-authored-by: SangBin Cho rkooo567@gmail.com
This commit is contained in:
Lianmin Zheng
2025-01-07 23:29:10 -08:00
committed by GitHub
parent 694e41925e
commit 8a6906127a
15 changed files with 655 additions and 88 deletions

View File

@@ -12,8 +12,8 @@ from vllm.distributed import (
get_tensor_model_parallel_world_size,
tensor_model_parallel_all_reduce,
)
from vllm.model_executor.parameter import BasevLLMParameter
from sglang.srt.layers.parameter import BasevLLMParameter
from sglang.srt.layers.quantization.base_config import (
QuantizationConfig,
QuantizeMethodBase,