Improve linear.py to load sharded weights & remove the dependency of Parameters from vllm (#2784)
Co-authored-by: SangBin Cho rkooo567@gmail.com
This commit is contained in:
@@ -14,7 +14,7 @@ from sglang.srt.speculative.build_eagle_tree import build_tree_kernel
|
||||
from sglang.srt.speculative.spec_info import SpecInfo
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from python.sglang.srt.managers.schedule_batch import ScheduleBatch
|
||||
from sglang.srt.managers.schedule_batch import ScheduleBatch
|
||||
from sglang.srt.server_args import ServerArgs
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user