diff --git a/vllm_ascend/envs.py b/vllm_ascend/envs.py index 2fd7041..2d11e3d 100644 --- a/vllm_ascend/envs.py +++ b/vllm_ascend/envs.py @@ -112,6 +112,15 @@ env_variables: Dict[str, Callable[[], Any]] = { "VLLM_ASCEND_MODEL_EXECUTE_TIME_OBSERVE": lambda: bool(int(os.getenv("VLLM_ASCEND_MODEL_EXECUTE_TIME_OBSERVE", '0')) ), + # VLLM_ASCEND_ACL_OP_INIT_MODE: + # 0: default, normal init. + # 1: delay init until launch aclops. + # 2: forbid aclops init and launch. + # Find more details at https://gitee.com/ascend/pytorch/pulls/18094 + # We set this var default to `1` in vllm-ascend to avoid segment fault when + # enable `pin_memory` while creating a tensor using `torch.tensor`. + "VLLM_ASCEND_ACL_OP_INIT_MODE": + lambda: os.getenv("VLLM_ASCEND_ACL_OP_INIT_MODE", '1'), } # end-env-vars-definition diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py index 96d4a9b..bbe9511 100644 --- a/vllm_ascend/platform.py +++ b/vllm_ascend/platform.py @@ -24,6 +24,7 @@ import vllm.envs as envs from vllm.logger import logger from vllm.platforms import Platform, PlatformEnum +import vllm_ascend.envs as ascend_envs from vllm_ascend.ascend_config import check_ascend_config, init_ascend_config from vllm_ascend.utils import ASCEND_QUATIZATION_METHOD, update_aclgraph_sizes @@ -46,6 +47,7 @@ else: FlexibleArgumentParser = None os.environ["RAY_EXPERIMENTAL_NOSET_ASCEND_RT_VISIBLE_DEVICES"] = "1" +os.environ["ACL_OP_INIT_MODE"] = ascend_envs.VLLM_ASCEND_ACL_OP_INIT_MODE class NPUPlatform(Platform):