[dist] fix communicator patch (#58)
### What this PR does / why we need it? fix communicator patch so parallel could work. see #52 Signed-off-by: MengqingCao <cmq0113@163.com>
This commit is contained in:
2
setup.py
2
setup.py
@@ -95,7 +95,7 @@ setup(
|
|||||||
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
||||||
"Topic :: Scientific/Engineering :: Information Analysis",
|
"Topic :: Scientific/Engineering :: Information Analysis",
|
||||||
],
|
],
|
||||||
packages=find_packages(exclude=("docs", "examples", "tests*", "patch")),
|
packages=find_packages(exclude=("docs", "examples", "tests*")),
|
||||||
python_requires=">=3.9",
|
python_requires=">=3.9",
|
||||||
install_requires=get_requirements(),
|
install_requires=get_requirements(),
|
||||||
extras_require={},
|
extras_require={},
|
||||||
|
|||||||
@@ -19,11 +19,11 @@
|
|||||||
# https://github.com/vllm-project/vllm/pull/11324.
|
# https://github.com/vllm-project/vllm/pull/11324.
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from vllm.distributed.parallel_state import GroupCoordinator
|
import vllm
|
||||||
from vllm.utils import resolve_obj_by_qualname
|
from vllm.utils import resolve_obj_by_qualname
|
||||||
|
|
||||||
|
|
||||||
class GroupCoordinatorPatch(GroupCoordinator):
|
class GroupCoordinatorPatch(vllm.distributed.parallel_state.GroupCoordinator):
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
@@ -66,4 +66,4 @@ class GroupCoordinatorPatch(GroupCoordinator):
|
|||||||
return self.communicator.all_gather(input_, dim)
|
return self.communicator.all_gather(input_, dim)
|
||||||
|
|
||||||
|
|
||||||
GroupCoordinator = GroupCoordinatorPatch
|
vllm.distributed.parallel_state.GroupCoordinator = GroupCoordinatorPatch
|
||||||
|
|||||||
@@ -88,9 +88,8 @@ class NPUPlatform(Platform):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
|
def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
|
||||||
# Register ops and patch when setup.
|
# Register ops when setup.
|
||||||
from vllm_ascend import ops # noqa: F401
|
from vllm_ascend import ops # noqa: F401
|
||||||
from vllm_ascend import patch # noqa: F401
|
|
||||||
|
|
||||||
parallel_config = vllm_config.parallel_config
|
parallel_config = vllm_config.parallel_config
|
||||||
if parallel_config.worker_cls == "auto":
|
if parallel_config.worker_cls == "auto":
|
||||||
|
|||||||
@@ -457,6 +457,8 @@ def init_worker_distributed_environment(
|
|||||||
backend: str = "hccl") -> None:
|
backend: str = "hccl") -> None:
|
||||||
"""Initialize the distributed environment."""
|
"""Initialize the distributed environment."""
|
||||||
set_custom_all_reduce(not parallel_config.disable_custom_all_reduce)
|
set_custom_all_reduce(not parallel_config.disable_custom_all_reduce)
|
||||||
|
# register communicator patch before init dist env
|
||||||
|
from vllm_ascend import patch # noqa: F401
|
||||||
|
|
||||||
init_distributed_environment(parallel_config.world_size, rank,
|
init_distributed_environment(parallel_config.world_size, rank,
|
||||||
distributed_init_method, local_rank, backend)
|
distributed_init_method, local_rank, backend)
|
||||||
|
|||||||
Reference in New Issue
Block a user