Improve linear.py to load sharded weights & remove the dependency of Parameters from vllm (#2784)
Co-authored-by: SangBin Cho rkooo567@gmail.com
This commit is contained in:
@@ -205,7 +205,7 @@ class ModelRunner:
|
||||
if self.device == "cuda":
|
||||
backend = "nccl"
|
||||
elif self.device == "xpu":
|
||||
# TODO(liangan1):Just use gloo to bypass the initilization fail
|
||||
# TODO(liangan1): Just use gloo to bypass the initilization fail
|
||||
# Need to use xccl for xpu backend in the future
|
||||
backend = "gloo"
|
||||
elif self.device == "hpu":
|
||||
@@ -634,7 +634,6 @@ class ModelRunner:
|
||||
)
|
||||
|
||||
def init_double_sparsity_channel_config(self, selected_channel):
|
||||
|
||||
selected_channel = "." + selected_channel + "_proj"
|
||||
self.sorted_channels = []
|
||||
# load channel config
|
||||
|
||||
Reference in New Issue
Block a user