add qwen3

This commit is contained in:
Chranos
2026-02-04 17:22:39 +08:00
parent d1c0f68ab4
commit 8511fe8530
1932 changed files with 300426 additions and 0 deletions

View File

@@ -0,0 +1,17 @@
这个文件夹里包含 ray 适配 mlu 所需的内容. 其 diff 同时放在了 `diff.patch` 里.
原始适配基于官方 commit: 457d6e930e2d87354c9462b150be26a592508ea1, 其对应的 wheel 包在:
`https://s3-us-west-2.amazonaws.com/ray-wheels/master/457d6e930e2d87354c9462b150be26a592508ea1/ray-3.0.0.dev0-cp310-cp310-manyli
nux2014_x86_64.whl`
安装 ray 的 mlu 适配的步骤为:
1. 安装官方commit的wheel包:
```bash
pip install https://s3-us-west-2.amazonaws.com/ray-wheels/master/457d6e930e2d87354c9462b150be26a592508ea1/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl
```
2. 把 `__init__.py` 和 `mlu.py` 拷到包安装的地方. 其中,
`__init__.py` 会把已有的 `__init__.py` 覆盖, `mlu.py` 会是一个全新的文件. 比如这样:
```bash
cp python/ray/_private/accelerators/__init__.py /path/to/python3.10/site-packages/ray/_private/accelerators/__init__.py && \
cp python/ray/_private/accelerators/mlu.py /path/to/python3.10/site-packages/ray/_private/accelerators/
```

View File

@@ -0,0 +1,82 @@
from typing import Set, Optional
from ray._private.accelerators.accelerator import AcceleratorManager
from ray._private.accelerators.nvidia_gpu import NvidiaGPUAcceleratorManager
from ray._private.accelerators.intel_gpu import IntelGPUAcceleratorManager
from ray._private.accelerators.amd_gpu import AMDGPUAcceleratorManager
from ray._private.accelerators.tpu import TPUAcceleratorManager
from ray._private.accelerators.neuron import NeuronAcceleratorManager
from ray._private.accelerators.hpu import HPUAcceleratorManager
from ray._private.accelerators.npu import NPUAcceleratorManager
from ray._private.accelerators.mlu import MLUAcceleratorManager
def get_all_accelerator_managers() -> Set[AcceleratorManager]:
"""Get all accelerator managers supported by Ray."""
return {
NvidiaGPUAcceleratorManager,
IntelGPUAcceleratorManager,
AMDGPUAcceleratorManager,
TPUAcceleratorManager,
NeuronAcceleratorManager,
HPUAcceleratorManager,
NPUAcceleratorManager,
MLUAcceleratorManager,
}
def get_all_accelerator_resource_names() -> Set[str]:
"""Get all resource names for accelerators."""
return {
accelerator_manager.get_resource_name()
for accelerator_manager in get_all_accelerator_managers()
}
def get_accelerator_manager_for_resource(
resource_name: str,
) -> Optional[AcceleratorManager]:
"""Get the corresponding accelerator manager for the given
accelerator resource name
E.g., TPUAcceleratorManager is returned if resource name is "TPU"
"""
try:
return get_accelerator_manager_for_resource._resource_name_to_accelerator_manager.get( # noqa: E501
resource_name, None
)
except AttributeError:
# Lazy initialization.
resource_name_to_accelerator_manager = {
accelerator_manager.get_resource_name(): accelerator_manager
for accelerator_manager in get_all_accelerator_managers()
}
# Special handling for GPU resource name since multiple accelerator managers
# have the same GPU resource name.
if AMDGPUAcceleratorManager.get_current_node_num_accelerators() > 0:
resource_name_to_accelerator_manager["GPU"] = AMDGPUAcceleratorManager
elif IntelGPUAcceleratorManager.get_current_node_num_accelerators() > 0:
resource_name_to_accelerator_manager["GPU"] = IntelGPUAcceleratorManager
elif MLUAcceleratorManager.get_current_node_num_accelerators() > 0:
resource_name_to_accelerator_manager["GPU"] = MLUAcceleratorManager
else:
resource_name_to_accelerator_manager["GPU"] = NvidiaGPUAcceleratorManager
get_accelerator_manager_for_resource._resource_name_to_accelerator_manager = (
resource_name_to_accelerator_manager
)
return resource_name_to_accelerator_manager.get(resource_name, None)
__all__ = [
"NvidiaGPUAcceleratorManager",
"IntelGPUAcceleratorManager",
"AMDGPUAcceleratorManager",
"TPUAcceleratorManager",
"NeuronAcceleratorManager",
"HPUAcceleratorManager",
"NPUAcceleratorManager",
"MLUAcceleratorManager",
"get_all_accelerator_managers",
"get_all_accelerator_resource_names",
"get_accelerator_manager_for_resource",
]

View File

@@ -0,0 +1,243 @@
commit 7376225d16e381ecae5cc07d84db9eed043ed06a
Author: tanhaojue <tanhaojue@cambricon.com>
Date: Thu Mar 7 15:54:09 2024 +0800
support mlu
diff --git a/python/ray/_private/accelerators/__init__.py b/python/ray/_private/accelerators/__init__.py
index 71550bc..07bdcd6 100644
--- a/python/ray/_private/accelerators/__init__.py
+++ b/python/ray/_private/accelerators/__init__.py
@@ -8,6 +8,7 @@ from ray._private.accelerators.tpu import TPUAcceleratorManager
from ray._private.accelerators.neuron import NeuronAcceleratorManager
from ray._private.accelerators.hpu import HPUAcceleratorManager
from ray._private.accelerators.npu import NPUAcceleratorManager
+from ray._private.accelerators.mlu import MLUAcceleratorManager
def get_all_accelerator_managers() -> Set[AcceleratorManager]:
@@ -20,6 +21,7 @@ def get_all_accelerator_managers() -> Set[AcceleratorManager]:
NeuronAcceleratorManager,
HPUAcceleratorManager,
NPUAcceleratorManager,
+ MLUAcceleratorManager,
}
@@ -55,6 +57,8 @@ def get_accelerator_manager_for_resource(
resource_name_to_accelerator_manager["GPU"] = AMDGPUAcceleratorManager
elif IntelGPUAcceleratorManager.get_current_node_num_accelerators() > 0:
resource_name_to_accelerator_manager["GPU"] = IntelGPUAcceleratorManager
+ elif MLUAcceleratorManager.get_current_node_num_accelerators() > 0:
+ resource_name_to_accelerator_manager["GPU"] = MLUAcceleratorManager
else:
resource_name_to_accelerator_manager["GPU"] = NvidiaGPUAcceleratorManager
get_accelerator_manager_for_resource._resource_name_to_accelerator_manager = (
@@ -71,6 +75,7 @@ __all__ = [
"NeuronAcceleratorManager",
"HPUAcceleratorManager",
"NPUAcceleratorManager",
+ "MLUAcceleratorManager",
"get_all_accelerator_managers",
"get_all_accelerator_resource_names",
"get_accelerator_manager_for_resource",
diff --git a/python/ray/_private/accelerators/mlu.py b/python/ray/_private/accelerators/mlu.py
new file mode 100755
index 0000000..21a5771
--- /dev/null
+++ b/python/ray/_private/accelerators/mlu.py
@@ -0,0 +1,92 @@
+import os
+import glob
+import logging
+from typing import Optional, List, Tuple
+import torch
+import torch_mlu
+from ray._private.accelerators.accelerator import AcceleratorManager
+
+logger = logging.getLogger(__name__)
+
+MLU_VISIBLE_DEVICES_ENV_VAR = "MLU_VISIBLE_DEVICES"
+NOSET_MLU_VISIBLE_DEVICES_ENV_VAR = "RAY_EXPERIMENTAL_NOSET_MLU_VISIBLE_DEVICES"
+
+
+class MLUAcceleratorManager(AcceleratorManager):
+ """Cambricon MLU accelerators."""
+
+ @staticmethod
+ def get_resource_name() -> str:
+ return "GPU"
+
+ @staticmethod
+ def get_visible_accelerator_ids_env_var() -> str:
+ return MLU_VISIBLE_DEVICES_ENV_VAR
+
+ @staticmethod
+ def get_current_process_visible_accelerator_ids() -> Optional[List[str]]:
+ mlu_visible_devices = os.environ.get(
+ MLUAcceleratorManager.get_visible_accelerator_ids_env_var(), None
+ )
+
+ if mlu_visible_devices is None:
+ return None
+
+ if mlu_visible_devices == "":
+ return []
+
+ if mlu_visible_devices == "NoDevFiles":
+ return []
+
+ return list(mlu_visible_devices.split(","))
+
+ @staticmethod
+ def get_current_node_num_accelerators() -> int:
+ """Attempt to detect the number of MLUs on this machine.
+
+ MLU chips are represented as devices within `/dev/`, either as `/dev/davinci?`.
+
+ Returns:
+ The number of MLUs if any were detected, otherwise 0.
+ """
+ try:
+ return torch.mlu.device_count()
+ except Exception as e:
+ logger.debug("Could not import CambriconCL: %s", e)
+
+ try:
+ mlu_files = glob.glob("/dev/cambricon_dev?")
+ return len(mlu_files)
+ except Exception as e:
+ logger.debug("Failed to detect number of MLUs: %s", e)
+ return 0
+
+ @staticmethod
+ def get_current_node_accelerator_type() -> Optional[str]:
+ """Get the type of the Cambricon MLU on the current node.
+
+ Returns:
+ A string of the type, such as "MLU370".
+ """
+ try:
+ return torch.mlu.get_device_name(0)
+ except Exception:
+ logger.exception("Failed to detect MLU type.")
+ return None
+
+ @staticmethod
+ def validate_resource_request_quantity(
+ quantity: float,
+ ) -> Tuple[bool, Optional[str]]:
+ return (True, None)
+
+ @staticmethod
+ def set_current_process_visible_accelerator_ids(
+ visible_mlu_devices: List[str],
+ ) -> None:
+ if os.environ.get(NOSET_MLU_VISIBLE_DEVICES_ENV_VAR):
+ return
+
+ os.environ[
+ MLUAcceleratorManager.get_visible_accelerator_ids_env_var()
+ ] = ",".join([str(i) for i in visible_mlu_devices])
diff --git a/python/ray/tests/accelerators/test_mlu.py b/python/ray/tests/accelerators/test_mlu.py
new file mode 100755
index 0000000..70e81f7
--- /dev/null
+++ b/python/ray/tests/accelerators/test_mlu.py
@@ -0,0 +1,92 @@
+import os
+import sys
+import pytest
+from unittest.mock import patch
+
+import ray
+from ray._private.accelerators import MLUAcceleratorManager as Accelerator
+
+
+@patch("glob.glob")
+@patch("os.listdir")
+def test_autodetect_num_mlus(mock_list, mock_glob):
+ mock_glob.return_value = [f"/dev/davinci{i}" for i in range(4)]
+ # mock_list.return_value = []
+ assert Accelerator.get_current_node_num_accelerators() == 4
+
+
+@patch("glob.glob")
+@patch("os.listdir")
+def test_autodetect_num_mlus_without_devices(mock_list, mock_glob):
+ mock_glob.side_effect = Exception
+ # mock_list.return_value = []
+ assert Accelerator.get_current_node_num_accelerators() == 0
+
+
+def test_mlu_accelerator_manager_api():
+ assert Accelerator.get_resource_name() == "MLU"
+ assert Accelerator.get_visible_accelerator_ids_env_var() == "MLU_VISIBLE_DEVICES"
+ assert Accelerator.validate_resource_request_quantity(0.5) == (True, None)
+ assert Accelerator.validate_resource_request_quantity(1) == (True, None)
+
+
+def test_visible_mlu_type(monkeypatch, shutdown_only):
+ with patch.object(
+ Accelerator, "get_current_node_num_accelerators", return_value=4
+ ), patch.object(
+ Accelerator, "get_current_node_accelerator_type", return_value="MLU370"
+ ):
+ monkeypatch.setenv("MLU_VISIBLE_DEVICES", "0,1,2")
+ manager = ray._private.accelerators.get_accelerator_manager_for_resource("MLU")
+ assert manager.get_current_node_accelerator_type() == "MLU370"
+
+@pytest.mark.skipif(sys.platform == "win32", reason="Not supported mock on Windows")
+def test_visible_mlu_ids(monkeypatch, shutdown_only):
+ monkeypatch.setenv("MLU_VISIBLE_DEVICES", "0,1,2")
+ with patch.object(Accelerator, "get_current_node_num_accelerators", return_value=4):
+
+ ray.init()
+ manager = ray._private.accelerators.get_accelerator_manager_for_resource("MLU")
+ assert manager.get_current_node_num_accelerators() == 4
+ assert manager.__name__ == "MLUAcceleratorManager"
+ assert ray.available_resources()["MLU"] == 3
+
+def test_get_current_process_visible_accelerator_ids(monkeypatch, shutdown_only):
+ monkeypatch.setenv("MLU_VISIBLE_DEVICES", "0,1,2")
+ assert Accelerator.get_current_process_visible_accelerator_ids() == ["0", "1", "2"]
+
+ monkeypatch.delenv("MLU_VISIBLE_DEVICES")
+ assert Accelerator.get_current_process_visible_accelerator_ids() is None
+
+ monkeypatch.setenv("MLU_VISIBLE_DEVICES", "")
+ assert Accelerator.get_current_process_visible_accelerator_ids() == []
+
+ monkeypatch.setenv("MLU_VISIBLE_DEVICES", "NoDevFiles")
+ assert Accelerator.get_current_process_visible_accelerator_ids() == []
+
+
+def test_set_current_process_visible_accelerator_ids(shutdown_only):
+ Accelerator.set_current_process_visible_accelerator_ids(["0"])
+ assert os.environ["MLU_VISIBLE_DEVICES"] == "0"
+
+ Accelerator.set_current_process_visible_accelerator_ids(["0", "1"])
+ assert os.environ["MLU_VISIBLE_DEVICES"] == "0,1"
+
+ Accelerator.set_current_process_visible_accelerator_ids(["0", "1", "2"])
+ assert os.environ["MLU_VISIBLE_DEVICES"] == "0,1,2"
+
+
+@pytest.mark.skipif(sys.platform == "win32", reason="Not supported mock on Windows")
+def test_auto_detected_more_than_visible(monkeypatch, shutdown_only):
+ with patch.object(Accelerator, "get_current_node_num_accelerators", return_value=4):
+ # If more MLUs are detected than visible.
+ monkeypatch.setenv("MLU_VISIBLE_DEVICES", "0,1,2")
+
+ ray.init()
+ assert ray.available_resources()["MLU"] == 3
+
+if __name__ == "__main__":
+ if os.environ.get("PARALLEL_CI"):
+ sys.exit(pytest.main(["-n", "auto", "--boxed", "-vs", __file__]))
+ else:
+ sys.exit(pytest.main(["-sv", __file__]))
diff --git a/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl b/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl
new file mode 100644
index 0000000..8628a88
Binary files /dev/null and b/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl differ

View File

@@ -0,0 +1,11 @@
diff --git a/ray_mlu/mlu.py b/ray_mlu/mlu.py
index 21a57719..2c63fd5b 100755
--- a/ray_mlu/mlu.py
+++ b/ray_mlu/mlu.py
@@ -87,6 +87,3 @@ class MLUAcceleratorManager(AcceleratorManager):
if os.environ.get(NOSET_MLU_VISIBLE_DEVICES_ENV_VAR):
return
- os.environ[
- MLUAcceleratorManager.get_visible_accelerator_ids_env_var()
- ] = ",".join([str(i) for i in visible_mlu_devices])

92
vllm-v0.6.2/ray_mlu/mlu.py Executable file
View File

@@ -0,0 +1,92 @@
import os
import glob
import logging
from typing import Optional, List, Tuple
import torch
import torch_mlu
from ray._private.accelerators.accelerator import AcceleratorManager
logger = logging.getLogger(__name__)
MLU_VISIBLE_DEVICES_ENV_VAR = "MLU_VISIBLE_DEVICES"
NOSET_MLU_VISIBLE_DEVICES_ENV_VAR = "RAY_EXPERIMENTAL_NOSET_MLU_VISIBLE_DEVICES"
class MLUAcceleratorManager(AcceleratorManager):
"""Cambricon MLU accelerators."""
@staticmethod
def get_resource_name() -> str:
return "GPU"
@staticmethod
def get_visible_accelerator_ids_env_var() -> str:
return MLU_VISIBLE_DEVICES_ENV_VAR
@staticmethod
def get_current_process_visible_accelerator_ids() -> Optional[List[str]]:
mlu_visible_devices = os.environ.get(
MLUAcceleratorManager.get_visible_accelerator_ids_env_var(), None
)
if mlu_visible_devices is None:
return None
if mlu_visible_devices == "":
return []
if mlu_visible_devices == "NoDevFiles":
return []
return list(mlu_visible_devices.split(","))
@staticmethod
def get_current_node_num_accelerators() -> int:
"""Attempt to detect the number of MLUs on this machine.
MLU chips are represented as devices within `/dev/`, either as `/dev/davinci?`.
Returns:
The number of MLUs if any were detected, otherwise 0.
"""
try:
return torch.mlu.device_count()
except Exception as e:
logger.debug("Could not import CambriconCL: %s", e)
try:
mlu_files = glob.glob("/dev/cambricon_dev?")
return len(mlu_files)
except Exception as e:
logger.debug("Failed to detect number of MLUs: %s", e)
return 0
@staticmethod
def get_current_node_accelerator_type() -> Optional[str]:
"""Get the type of the Cambricon MLU on the current node.
Returns:
A string of the type, such as "MLU370".
"""
try:
return torch.mlu.get_device_name(0)
except Exception:
logger.exception("Failed to detect MLU type.")
return None
@staticmethod
def validate_resource_request_quantity(
quantity: float,
) -> Tuple[bool, Optional[str]]:
return (True, None)
@staticmethod
def set_current_process_visible_accelerator_ids(
visible_mlu_devices: List[str],
) -> None:
if os.environ.get(NOSET_MLU_VISIBLE_DEVICES_ENV_VAR):
return
os.environ[
MLUAcceleratorManager.get_visible_accelerator_ids_env_var()
] = ",".join([str(i) for i in visible_mlu_devices])

1825
vllm-v0.6.2/ray_mlu/node.py Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,154 @@
import os
import sys
import logging
import asyncio
import subprocess
import copy
from pathlib import Path
from typing import Tuple, List, Dict, Optional
from ray._private.runtime_env.context import RuntimeEnvContext
from ray._private.runtime_env.plugin import RuntimeEnvPlugin
from ray._private.utils import (
try_to_create_directory,
)
from ray.exceptions import RuntimeEnvSetupError
default_logger = logging.getLogger(__name__)
# Nsight options used when runtime_env={"_nsight": "default"}
NSIGHT_DEFAULT_CONFIG = {
"o": "'worker_process_%p'",
# "cudabacktrace": "all",
# "stop-on-exit": "true",
"force_overwrite": "false"
}
def parse_nsight_config(nsight_config: Dict[str, str]) -> List[str]:
"""
Function to convert dictionary of nsight options into
nsight command line
The function returns:
- List[str]: nsys profile cmd line split into list of str
"""
# nsight_cmd = ["nsys", "profile"]
nsight_cmd = ["cnperf-cli", "record"]
for option, option_val in nsight_config.items():
# option standard based on
# https://www.gnu.org/software/libc/manual/html_node/Argument-Syntax.html
if len(option) > 1:
nsight_cmd.append(f"--{option}={option_val}")
else:
nsight_cmd += [f"-{option}", option_val]
return nsight_cmd
class NsightPlugin(RuntimeEnvPlugin):
name = "_nsight"
def __init__(self, resources_dir: str):
self.nsight_cmd = []
# replace this with better way to get logs dir
session_dir, runtime_dir = os.path.split(resources_dir)
self._nsight_dir = Path(session_dir) / "logs" / "nsight"
try_to_create_directory(self._nsight_dir)
async def _check_nsight_script(
self, nsight_config: Dict[str, str]
) -> Tuple[bool, str]:
"""
Function to validate if nsight_config is a valid nsight profile options
Args:
nsight_config: dictionary mapping nsight option to it's value
Returns:
a tuple consists of a boolean indicating if the nsight_config
is valid option and an error message if the nsight_config is invalid
"""
# use empty as nsight report test filename
nsight_config_copy = copy.deepcopy(nsight_config)
try_to_create_directory(Path(self._nsight_dir) / "empty")
nsight_config_copy["o"] = str(Path(self._nsight_dir) / "empty")
nsight_cmd = parse_nsight_config(nsight_config_copy)
try:
nsight_cmd = nsight_cmd + ["python", "-c", '""']
process = await asyncio.create_subprocess_exec(
*nsight_cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
stdout, stderr = await process.communicate()
error_msg = stderr.strip() if stderr.strip() != "" else stdout.strip()
# cleanup test.nsys-rep file
# clean_up_cmd = ["rm", f"{nsight_config_copy['o']}.nsys-rep"]
clean_up_cmd = ["rm", f"{nsight_config_copy['o']}", "-rf"]
cleanup_process = await asyncio.create_subprocess_exec(
*clean_up_cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
_, _ = await cleanup_process.communicate()
if process.returncode == 0:
return True, None
else:
return False, error_msg
except FileNotFoundError:
return False, ("nsight is not installed")
async def create(
self,
uri: Optional[str],
runtime_env: "RuntimeEnv", # noqa: F821
context: RuntimeEnvContext,
logger: logging.Logger = default_logger,
) -> int:
nsight_config = runtime_env.nsight()
if not nsight_config:
return 0
if nsight_config and sys.platform != "linux":
raise RuntimeEnvSetupError(
"Cnperf CLI is only available in Linux.\n"
"More information can be found in "
"https://docs.nvidia.com/nsight-compute/NsightComputeCli/index.html"
)
if isinstance(nsight_config, str):
if nsight_config == "default":
nsight_config = NSIGHT_DEFAULT_CONFIG
else:
raise RuntimeEnvSetupError(
f"Unsupported nsight config: {nsight_config}. "
"The supported config is 'default' or "
"Dictionary of nsight options"
)
is_valid_nsight_cmd, error_msg = await self._check_nsight_script(nsight_config)
if not is_valid_nsight_cmd:
logger.warning(error_msg)
raise RuntimeEnvSetupError(
"cnperf profile failed to run with the following "
f"error message:\n {error_msg}"
)
# add set output path to logs dir
# nsight_config["o"] = str(
# Path(self._nsight_dir) / nsight_config.get("o", NSIGHT_DEFAULT_CONFIG["o"])
# )
try_to_create_directory(nsight_config["o"])
self.nsight_cmd = parse_nsight_config(nsight_config)
return 0
def modify_context(
self,
uris: List[str],
runtime_env: "RuntimeEnv", # noqa: F821
context: RuntimeEnvContext,
logger: Optional[logging.Logger] = default_logger,
):
logger.info("Running nsight profiler")
context.py_executable = " ".join(self.nsight_cmd) + " python"

92
vllm-v0.6.2/ray_mlu/test_mlu.py Executable file
View File

@@ -0,0 +1,92 @@
import os
import sys
import pytest
from unittest.mock import patch
import ray
from ray._private.accelerators import MLUAcceleratorManager as Accelerator
@patch("glob.glob")
@patch("os.listdir")
def test_autodetect_num_mlus(mock_list, mock_glob):
mock_glob.return_value = [f"/dev/davinci{i}" for i in range(4)]
# mock_list.return_value = []
assert Accelerator.get_current_node_num_accelerators() == 4
@patch("glob.glob")
@patch("os.listdir")
def test_autodetect_num_mlus_without_devices(mock_list, mock_glob):
mock_glob.side_effect = Exception
# mock_list.return_value = []
assert Accelerator.get_current_node_num_accelerators() == 0
def test_mlu_accelerator_manager_api():
assert Accelerator.get_resource_name() == "MLU"
assert Accelerator.get_visible_accelerator_ids_env_var() == "MLU_VISIBLE_DEVICES"
assert Accelerator.validate_resource_request_quantity(0.5) == (True, None)
assert Accelerator.validate_resource_request_quantity(1) == (True, None)
def test_visible_mlu_type(monkeypatch, shutdown_only):
with patch.object(
Accelerator, "get_current_node_num_accelerators", return_value=4
), patch.object(
Accelerator, "get_current_node_accelerator_type", return_value="MLU370"
):
monkeypatch.setenv("MLU_VISIBLE_DEVICES", "0,1,2")
manager = ray._private.accelerators.get_accelerator_manager_for_resource("MLU")
assert manager.get_current_node_accelerator_type() == "MLU370"
@pytest.mark.skipif(sys.platform == "win32", reason="Not supported mock on Windows")
def test_visible_mlu_ids(monkeypatch, shutdown_only):
monkeypatch.setenv("MLU_VISIBLE_DEVICES", "0,1,2")
with patch.object(Accelerator, "get_current_node_num_accelerators", return_value=4):
ray.init()
manager = ray._private.accelerators.get_accelerator_manager_for_resource("MLU")
assert manager.get_current_node_num_accelerators() == 4
assert manager.__name__ == "MLUAcceleratorManager"
assert ray.available_resources()["MLU"] == 3
def test_get_current_process_visible_accelerator_ids(monkeypatch, shutdown_only):
monkeypatch.setenv("MLU_VISIBLE_DEVICES", "0,1,2")
assert Accelerator.get_current_process_visible_accelerator_ids() == ["0", "1", "2"]
monkeypatch.delenv("MLU_VISIBLE_DEVICES")
assert Accelerator.get_current_process_visible_accelerator_ids() is None
monkeypatch.setenv("MLU_VISIBLE_DEVICES", "")
assert Accelerator.get_current_process_visible_accelerator_ids() == []
monkeypatch.setenv("MLU_VISIBLE_DEVICES", "NoDevFiles")
assert Accelerator.get_current_process_visible_accelerator_ids() == []
def test_set_current_process_visible_accelerator_ids(shutdown_only):
Accelerator.set_current_process_visible_accelerator_ids(["0"])
assert os.environ["MLU_VISIBLE_DEVICES"] == "0"
Accelerator.set_current_process_visible_accelerator_ids(["0", "1"])
assert os.environ["MLU_VISIBLE_DEVICES"] == "0,1"
Accelerator.set_current_process_visible_accelerator_ids(["0", "1", "2"])
assert os.environ["MLU_VISIBLE_DEVICES"] == "0,1,2"
@pytest.mark.skipif(sys.platform == "win32", reason="Not supported mock on Windows")
def test_auto_detected_more_than_visible(monkeypatch, shutdown_only):
with patch.object(Accelerator, "get_current_node_num_accelerators", return_value=4):
# If more MLUs are detected than visible.
monkeypatch.setenv("MLU_VISIBLE_DEVICES", "0,1,2")
ray.init()
assert ray.available_resources()["MLU"] == 3
if __name__ == "__main__":
if os.environ.get("PARALLEL_CI"):
sys.exit(pytest.main(["-n", "auto", "--boxed", "-vs", __file__]))
else:
sys.exit(pytest.main(["-sv", __file__]))