From 389030a8f81f9d95e1b82bb3bef95c82d09d8ca3 Mon Sep 17 00:00:00 2001 From: starkwj Date: Wed, 11 Feb 2026 06:27:58 +0000 Subject: [PATCH] add env vars & misc --- CMakeLists.txt | 6 +-- Dockerfile | 4 +- Dockerfile.310p | 4 +- Dockerfile.310p.openEuler | 4 +- Dockerfile.a3 | 4 +- Dockerfile.a3.openEuler | 4 +- Dockerfile.openEuler | 4 +- README.md | 4 ++ csrc/camem_allocator.cpp | 14 ++++--- .../{idle_offload => vnpu_offload}/.gitignore | 0 csrc/{idle_offload => vnpu_offload}/Makefile | 2 +- .../include/spdlog/async.h | 0 .../include/spdlog/async_logger-inl.h | 0 .../include/spdlog/async_logger.h | 0 .../include/spdlog/cfg/argv.h | 0 .../include/spdlog/cfg/env.h | 0 .../include/spdlog/cfg/helpers-inl.h | 0 .../include/spdlog/cfg/helpers.h | 0 .../include/spdlog/common-inl.h | 0 .../include/spdlog/common.h | 0 .../include/spdlog/details/backtracer-inl.h | 0 .../include/spdlog/details/backtracer.h | 0 .../include/spdlog/details/circular_q.h | 0 .../include/spdlog/details/console_globals.h | 0 .../include/spdlog/details/file_helper-inl.h | 0 .../include/spdlog/details/file_helper.h | 0 .../include/spdlog/details/fmt_helper.h | 0 .../include/spdlog/details/log_msg-inl.h | 0 .../include/spdlog/details/log_msg.h | 0 .../spdlog/details/log_msg_buffer-inl.h | 0 .../include/spdlog/details/log_msg_buffer.h | 0 .../include/spdlog/details/mpmc_blocking_q.h | 0 .../include/spdlog/details/null_mutex.h | 0 .../include/spdlog/details/os-inl.h | 0 .../include/spdlog/details/os.h | 0 .../spdlog/details/periodic_worker-inl.h | 0 .../include/spdlog/details/periodic_worker.h | 0 .../include/spdlog/details/registry-inl.h | 0 .../include/spdlog/details/registry.h | 0 .../spdlog/details/synchronous_factory.h | 0 .../spdlog/details/tcp_client-windows.h | 0 .../include/spdlog/details/tcp_client.h | 0 .../include/spdlog/details/thread_pool-inl.h | 0 .../include/spdlog/details/thread_pool.h | 0 .../spdlog/details/udp_client-windows.h | 0 .../include/spdlog/details/udp_client.h | 0 .../include/spdlog/details/windows_include.h | 0 .../include/spdlog/fmt/bin_to_hex.h | 0 .../include/spdlog/fmt/bundled/args.h | 0 .../include/spdlog/fmt/bundled/base.h | 0 .../include/spdlog/fmt/bundled/chrono.h | 0 .../include/spdlog/fmt/bundled/color.h | 0 .../include/spdlog/fmt/bundled/compile.h | 0 .../include/spdlog/fmt/bundled/core.h | 0 .../spdlog/fmt/bundled/fmt.license.rst | 0 .../include/spdlog/fmt/bundled/format-inl.h | 0 .../include/spdlog/fmt/bundled/format.h | 0 .../include/spdlog/fmt/bundled/os.h | 0 .../include/spdlog/fmt/bundled/ostream.h | 0 .../include/spdlog/fmt/bundled/printf.h | 0 .../include/spdlog/fmt/bundled/ranges.h | 0 .../include/spdlog/fmt/bundled/std.h | 0 .../include/spdlog/fmt/bundled/xchar.h | 0 .../include/spdlog/fmt/chrono.h | 0 .../include/spdlog/fmt/compile.h | 0 .../include/spdlog/fmt/fmt.h | 0 .../include/spdlog/fmt/ostr.h | 0 .../include/spdlog/fmt/ranges.h | 0 .../include/spdlog/fmt/std.h | 0 .../include/spdlog/fmt/xchar.h | 0 .../include/spdlog/formatter.h | 0 .../include/spdlog/fwd.h | 0 .../include/spdlog/logger-inl.h | 0 .../include/spdlog/logger.h | 0 .../include/spdlog/mdc.h | 0 .../include/spdlog/pattern_formatter-inl.h | 0 .../include/spdlog/pattern_formatter.h | 0 .../include/spdlog/sinks/android_sink.h | 0 .../include/spdlog/sinks/ansicolor_sink-inl.h | 0 .../include/spdlog/sinks/ansicolor_sink.h | 0 .../include/spdlog/sinks/base_sink-inl.h | 0 .../include/spdlog/sinks/base_sink.h | 0 .../spdlog/sinks/basic_file_sink-inl.h | 0 .../include/spdlog/sinks/basic_file_sink.h | 0 .../include/spdlog/sinks/callback_sink.h | 0 .../include/spdlog/sinks/daily_file_sink.h | 0 .../include/spdlog/sinks/dist_sink.h | 0 .../include/spdlog/sinks/dup_filter_sink.h | 0 .../include/spdlog/sinks/hourly_file_sink.h | 0 .../include/spdlog/sinks/kafka_sink.h | 0 .../include/spdlog/sinks/mongo_sink.h | 0 .../include/spdlog/sinks/msvc_sink.h | 0 .../include/spdlog/sinks/null_sink.h | 0 .../include/spdlog/sinks/ostream_sink.h | 0 .../include/spdlog/sinks/qt_sinks.h | 0 .../include/spdlog/sinks/ringbuffer_sink.h | 0 .../spdlog/sinks/rotating_file_sink-inl.h | 0 .../include/spdlog/sinks/rotating_file_sink.h | 0 .../include/spdlog/sinks/sink-inl.h | 0 .../include/spdlog/sinks/sink.h | 0 .../spdlog/sinks/stdout_color_sinks-inl.h | 0 .../include/spdlog/sinks/stdout_color_sinks.h | 0 .../include/spdlog/sinks/stdout_sinks-inl.h | 0 .../include/spdlog/sinks/stdout_sinks.h | 0 .../include/spdlog/sinks/syslog_sink.h | 0 .../include/spdlog/sinks/systemd_sink.h | 0 .../include/spdlog/sinks/tcp_sink.h | 0 .../include/spdlog/sinks/udp_sink.h | 0 .../include/spdlog/sinks/win_eventlog_sink.h | 0 .../include/spdlog/sinks/wincolor_sink-inl.h | 0 .../include/spdlog/sinks/wincolor_sink.h | 0 .../include/spdlog/spdlog-inl.h | 0 .../include/spdlog/spdlog.h | 0 .../include/spdlog/stopwatch.h | 0 .../include/spdlog/tweakme.h | 0 .../include/spdlog/version.h | 0 .../npu_helper.h | 4 +- .../shm_helper.h | 8 ++-- .../shm_manager.cpp | 10 ++++- .../shm_manager.h | 0 .../shm_worker.cpp | 1 - .../shm_worker.h | 0 .../vnpu_daemon.cpp} | 38 ++++++++++++++----- vllm_ascend/device_allocator/camem.py | 11 +++--- vllm_ascend/envs.py | 2 +- vllm_ascend/patch/platform/patch_core.py | 10 ++--- vllm_ascend/patch/platform/patch_executor.py | 6 +-- vllm_ascend/worker/worker_v1.py | 8 ++-- 128 files changed, 89 insertions(+), 59 deletions(-) rename csrc/{idle_offload => vnpu_offload}/.gitignore (100%) rename csrc/{idle_offload => vnpu_offload}/Makefile (93%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/async.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/async_logger-inl.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/async_logger.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/cfg/argv.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/cfg/env.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/cfg/helpers-inl.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/cfg/helpers.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/common-inl.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/common.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/details/backtracer-inl.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/details/backtracer.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/details/circular_q.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/details/console_globals.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/details/file_helper-inl.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/details/file_helper.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/details/fmt_helper.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/details/log_msg-inl.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/details/log_msg.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/details/log_msg_buffer-inl.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/details/log_msg_buffer.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/details/mpmc_blocking_q.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/details/null_mutex.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/details/os-inl.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/details/os.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/details/periodic_worker-inl.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/details/periodic_worker.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/details/registry-inl.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/details/registry.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/details/synchronous_factory.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/details/tcp_client-windows.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/details/tcp_client.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/details/thread_pool-inl.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/details/thread_pool.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/details/udp_client-windows.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/details/udp_client.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/details/windows_include.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/fmt/bin_to_hex.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/fmt/bundled/args.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/fmt/bundled/base.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/fmt/bundled/chrono.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/fmt/bundled/color.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/fmt/bundled/compile.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/fmt/bundled/core.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/fmt/bundled/fmt.license.rst (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/fmt/bundled/format-inl.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/fmt/bundled/format.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/fmt/bundled/os.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/fmt/bundled/ostream.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/fmt/bundled/printf.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/fmt/bundled/ranges.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/fmt/bundled/std.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/fmt/bundled/xchar.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/fmt/chrono.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/fmt/compile.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/fmt/fmt.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/fmt/ostr.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/fmt/ranges.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/fmt/std.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/fmt/xchar.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/formatter.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/fwd.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/logger-inl.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/logger.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/mdc.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/pattern_formatter-inl.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/pattern_formatter.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/android_sink.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/ansicolor_sink-inl.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/ansicolor_sink.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/base_sink-inl.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/base_sink.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/basic_file_sink-inl.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/basic_file_sink.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/callback_sink.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/daily_file_sink.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/dist_sink.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/dup_filter_sink.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/hourly_file_sink.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/kafka_sink.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/mongo_sink.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/msvc_sink.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/null_sink.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/ostream_sink.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/qt_sinks.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/ringbuffer_sink.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/rotating_file_sink-inl.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/rotating_file_sink.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/sink-inl.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/sink.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/stdout_color_sinks-inl.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/stdout_color_sinks.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/stdout_sinks-inl.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/stdout_sinks.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/syslog_sink.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/systemd_sink.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/tcp_sink.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/udp_sink.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/win_eventlog_sink.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/wincolor_sink-inl.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/sinks/wincolor_sink.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/spdlog-inl.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/spdlog.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/stopwatch.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/tweakme.h (100%) rename csrc/{idle_offload => vnpu_offload}/include/spdlog/version.h (100%) rename csrc/{idle_offload => vnpu_offload}/npu_helper.h (95%) rename csrc/{idle_offload => vnpu_offload}/shm_helper.h (92%) rename csrc/{idle_offload => vnpu_offload}/shm_manager.cpp (93%) rename csrc/{idle_offload => vnpu_offload}/shm_manager.h (100%) rename csrc/{idle_offload => vnpu_offload}/shm_worker.cpp (99%) rename csrc/{idle_offload => vnpu_offload}/shm_worker.h (100%) rename csrc/{idle_offload/offload_daemon.cpp => vnpu_offload/vnpu_daemon.cpp} (86%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 561c99a..a054112 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -78,11 +78,11 @@ message("TORCH_NPU_PATH is ${TORCH_NPU_PATH}") if(SOC_VERSION STREQUAL "ASCEND310P3") file(GLOB VLLM_ASCEND_SRC ${CMAKE_CURRENT_SOURCE_DIR}/csrc/*.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/csrc/idle_offload/shm_worker.cpp) + ${CMAKE_CURRENT_SOURCE_DIR}/csrc/vnpu_offload/shm_worker.cpp) else() file(GLOB VLLM_ASCEND_SRC ${CMAKE_CURRENT_SOURCE_DIR}/csrc/*.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/csrc/idle_offload/shm_worker.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/csrc/vnpu_offload/shm_worker.cpp ${CMAKE_CURRENT_SOURCE_DIR}/csrc/batch_matmul_transpose/op_host/tiling/tiling_data.cpp) endif() @@ -95,7 +95,7 @@ include_directories( ${ASCEND_HOME_PATH}/aarch64-linux/include/experiment/platform ${ASCEND_HOME_PATH}/x86_64-linux/include/experiment/platform ${CMAKE_CURRENT_SOURCE_DIR}/csrc/batch_matmul_transpose/op_host - ${CMAKE_CURRENT_SOURCE_DIR}/csrc/idle_offload/include + ${CMAKE_CURRENT_SOURCE_DIR}/csrc/vnpu_offload/include ) set( diff --git a/Dockerfile b/Dockerfile index 18f016d..e18e3d6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -59,14 +59,14 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi source /usr/local/Ascend/ascend-toolkit/set_env.sh && \ source /usr/local/Ascend/nnal/atb/set_env.sh && \ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \ - cd /vllm-workspace/vllm-ascend/csrc/idle_offload && \ + cd /vllm-workspace/vllm-ascend/csrc/vnpu_offload && \ make install && make clean && \ python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip cache purge ENV VLLM_ASCEND_ENABLE_NZ=0 \ VLLM_WORKER_MULTIPROC_METHOD=spawn \ - VLLM_ASCEND_ENABLE_IDLE_OFFLOAD=1 + VLLM_ASCEND_ENABLE_VNPU=1 # Install modelscope (for fast download) and ray (for multinode) RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ diff --git a/Dockerfile.310p b/Dockerfile.310p index 710db19..622dc58 100644 --- a/Dockerfile.310p +++ b/Dockerfile.310p @@ -51,14 +51,14 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi source /usr/local/Ascend/nnal/atb/set_env.sh && \ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \ export SOC_VERSION=ASCEND310P3 && \ - cd /vllm-workspace/vllm-ascend/csrc/idle_offload && \ + cd /vllm-workspace/vllm-ascend/csrc/vnpu_offload && \ make install && make clean && \ python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip cache purge ENV VLLM_ASCEND_ENABLE_NZ=0 \ VLLM_WORKER_MULTIPROC_METHOD=spawn \ - VLLM_ASCEND_ENABLE_IDLE_OFFLOAD=1 + VLLM_ASCEND_ENABLE_VNPU=1 # Install modelscope (for fast download) and ray (for multinode) RUN python3 -m pip install modelscope 'ray>=2.47.1' 'protobuf>3.20.0' && \ diff --git a/Dockerfile.310p.openEuler b/Dockerfile.310p.openEuler index bebed5d..5fe6b05 100644 --- a/Dockerfile.310p.openEuler +++ b/Dockerfile.310p.openEuler @@ -49,14 +49,14 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \ export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/`uname -i`-openEuler-linux && \ export SOC_VERSION=ASCEND310P3 && \ - cd /vllm-workspace/vllm-ascend/csrc/idle_offload && \ + cd /vllm-workspace/vllm-ascend/csrc/vnpu_offload && \ make install && make clean && \ python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip cache purge ENV VLLM_ASCEND_ENABLE_NZ=0 \ VLLM_WORKER_MULTIPROC_METHOD=spawn \ - VLLM_ASCEND_ENABLE_IDLE_OFFLOAD=1 + VLLM_ASCEND_ENABLE_VNPU=1 # Install modelscope (for fast download) and ray (for multinode) RUN python3 -m pip install modelscope 'ray>=2.47.1' 'protobuf>3.20.0' && \ diff --git a/Dockerfile.a3 b/Dockerfile.a3 index 1037ce0..e2f177f 100644 --- a/Dockerfile.a3 +++ b/Dockerfile.a3 @@ -60,14 +60,14 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi source /usr/local/Ascend/ascend-toolkit/set_env.sh && \ source /usr/local/Ascend/nnal/atb/set_env.sh && \ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \ - cd /vllm-workspace/vllm-ascend/csrc/idle_offload && \ + cd /vllm-workspace/vllm-ascend/csrc/vnpu_offload && \ make install && make clean && \ python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip cache purge ENV VLLM_ASCEND_ENABLE_NZ=0 \ VLLM_WORKER_MULTIPROC_METHOD=spawn \ - VLLM_ASCEND_ENABLE_IDLE_OFFLOAD=1 + VLLM_ASCEND_ENABLE_VNPU=1 # Install modelscope (for fast download) and ray (for multinode) RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ diff --git a/Dockerfile.a3.openEuler b/Dockerfile.a3.openEuler index 21c0631..29bbde4 100644 --- a/Dockerfile.a3.openEuler +++ b/Dockerfile.a3.openEuler @@ -63,14 +63,14 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi source /usr/local/Ascend/nnal/atb/set_env.sh && \ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \ export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/`uname -i`-openEuler-linux && \ - cd /vllm-workspace/vllm-ascend/csrc/idle_offload && \ + cd /vllm-workspace/vllm-ascend/csrc/vnpu_offload && \ make install && make clean && \ python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip cache purge ENV VLLM_ASCEND_ENABLE_NZ=0 \ VLLM_WORKER_MULTIPROC_METHOD=spawn \ - VLLM_ASCEND_ENABLE_IDLE_OFFLOAD=1 + VLLM_ASCEND_ENABLE_VNPU=1 # Install modelscope (for fast download) and ray (for multinode) RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ diff --git a/Dockerfile.openEuler b/Dockerfile.openEuler index d13855d..4b93a5b 100644 --- a/Dockerfile.openEuler +++ b/Dockerfile.openEuler @@ -62,14 +62,14 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi source /usr/local/Ascend/nnal/atb/set_env.sh && \ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \ export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/`uname -i`-openEuler-linux && \ - cd /vllm-workspace/vllm-ascend/csrc/idle_offload && \ + cd /vllm-workspace/vllm-ascend/csrc/vnpu_offload && \ make install && make clean && \ python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \ python3 -m pip cache purge ENV VLLM_ASCEND_ENABLE_NZ=0 \ VLLM_WORKER_MULTIPROC_METHOD=spawn \ - VLLM_ASCEND_ENABLE_IDLE_OFFLOAD=1 + VLLM_ASCEND_ENABLE_VNPU=1 # Install modelscope (for fast download) and ray (for multinode) RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \ diff --git a/README.md b/README.md index 78aeca5..eff8589 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,10 @@ docker build -t vllm-ascend-multi-llm:latest -f ./Dockerfile . 2. Start LLM services with this image, following the official usage instructions. 3. Due to the limited stream resource of Ascend NPU, you may need to restrict graph capture sizes or disable ACLgraph by setting `--enforce-eager`, especially when launching multiple LLMs. Refer to the [link](https://docs.vllm.ai/projects/ascend/en/latest/faqs.html#how-to-troubleshoot-and-resolve-size-capture-failures-resulting-from-stream-resource-exhaustion-and-what-are-the-underlying-causes). +### Environment Variables +- `VNPU_RESERVED_VRAM_SIZE_GB`: The amonut of reserved GPU memory for other miscellaneous memory. Only needs to be set for `vllm_vnpu_daemon`. Try increasing the variable if you launch multiple LLM services and encounter OOM. Default: `8`. +- `VLLM_VNPU_SHM_NAME`: The name of the shm file. Needs to be set for all containers of the shared vNPU group. Default: `/vllm_acl_vnpu_offload_shm`. + ## Limitations diff --git a/csrc/camem_allocator.cpp b/csrc/camem_allocator.cpp index e88c695..eb76b15 100644 --- a/csrc/camem_allocator.cpp +++ b/csrc/camem_allocator.cpp @@ -19,8 +19,8 @@ #include #include -#include "idle_offload/shm_worker.h" -#include "idle_offload/npu_helper.h" +#include "vnpu_offload/shm_worker.h" +#include "vnpu_offload/npu_helper.h" extern "C" { @@ -312,8 +312,9 @@ my_malloc_offload(ssize_t size, int device, aclrtStream stream) { (aclrtDrvMemHandle*)malloc(sizeof(aclrtDrvMemHandle)); if (!g_python_malloc_callback) { - throw std::runtime_error("my_malloc ERROR: g_python_malloc_callback not set." + - std::string(" ") + __FILE__ + ":" + std::to_string(__LINE__)); + throw std::runtime_error( + "my_malloc ERROR: g_python_malloc_callback not set." + + std::string(" ") + __FILE__ + ":" + std::to_string(__LINE__)); } // Acquire GIL (not in stable ABI officially, but often works) @@ -346,8 +347,9 @@ __attribute__((visibility("default"))) void my_free_offload(void *ptr, ssize_t size, int device, aclrtStream stream) { // get memory handle from the pointer if (!g_python_free_callback) { - throw std::runtime_error("aclrtDrvMemHandle ERROR: g_python_malloc_callback not set." + - std::string(" ") + __FILE__ + ":" + std::to_string(__LINE__)); + throw std::runtime_error( + "my_free ERROR: g_python_malloc_callback not set." + std::string(" ") + + __FILE__ + ":" + std::to_string(__LINE__)); } // Acquire GIL (not in stable ABI officially, but often works) diff --git a/csrc/idle_offload/.gitignore b/csrc/vnpu_offload/.gitignore similarity index 100% rename from csrc/idle_offload/.gitignore rename to csrc/vnpu_offload/.gitignore diff --git a/csrc/idle_offload/Makefile b/csrc/vnpu_offload/Makefile similarity index 93% rename from csrc/idle_offload/Makefile rename to csrc/vnpu_offload/Makefile index 3cea2c3..efcadee 100644 --- a/csrc/idle_offload/Makefile +++ b/csrc/vnpu_offload/Makefile @@ -1,6 +1,6 @@ CXX := g++ TARGET := vllm_vnpu_daemon -SRCS := offload_daemon.cpp shm_manager.cpp +SRCS := vnpu_daemon.cpp shm_manager.cpp ASCEND_HOME := /usr/local/Ascend/ascend-toolkit/latest INCLUDES := -I$(ASCEND_HOME)/include -Iinclude diff --git a/csrc/idle_offload/include/spdlog/async.h b/csrc/vnpu_offload/include/spdlog/async.h similarity index 100% rename from csrc/idle_offload/include/spdlog/async.h rename to csrc/vnpu_offload/include/spdlog/async.h diff --git a/csrc/idle_offload/include/spdlog/async_logger-inl.h b/csrc/vnpu_offload/include/spdlog/async_logger-inl.h similarity index 100% rename from csrc/idle_offload/include/spdlog/async_logger-inl.h rename to csrc/vnpu_offload/include/spdlog/async_logger-inl.h diff --git a/csrc/idle_offload/include/spdlog/async_logger.h b/csrc/vnpu_offload/include/spdlog/async_logger.h similarity index 100% rename from csrc/idle_offload/include/spdlog/async_logger.h rename to csrc/vnpu_offload/include/spdlog/async_logger.h diff --git a/csrc/idle_offload/include/spdlog/cfg/argv.h b/csrc/vnpu_offload/include/spdlog/cfg/argv.h similarity index 100% rename from csrc/idle_offload/include/spdlog/cfg/argv.h rename to csrc/vnpu_offload/include/spdlog/cfg/argv.h diff --git a/csrc/idle_offload/include/spdlog/cfg/env.h b/csrc/vnpu_offload/include/spdlog/cfg/env.h similarity index 100% rename from csrc/idle_offload/include/spdlog/cfg/env.h rename to csrc/vnpu_offload/include/spdlog/cfg/env.h diff --git a/csrc/idle_offload/include/spdlog/cfg/helpers-inl.h b/csrc/vnpu_offload/include/spdlog/cfg/helpers-inl.h similarity index 100% rename from csrc/idle_offload/include/spdlog/cfg/helpers-inl.h rename to csrc/vnpu_offload/include/spdlog/cfg/helpers-inl.h diff --git a/csrc/idle_offload/include/spdlog/cfg/helpers.h b/csrc/vnpu_offload/include/spdlog/cfg/helpers.h similarity index 100% rename from csrc/idle_offload/include/spdlog/cfg/helpers.h rename to csrc/vnpu_offload/include/spdlog/cfg/helpers.h diff --git a/csrc/idle_offload/include/spdlog/common-inl.h b/csrc/vnpu_offload/include/spdlog/common-inl.h similarity index 100% rename from csrc/idle_offload/include/spdlog/common-inl.h rename to csrc/vnpu_offload/include/spdlog/common-inl.h diff --git a/csrc/idle_offload/include/spdlog/common.h b/csrc/vnpu_offload/include/spdlog/common.h similarity index 100% rename from csrc/idle_offload/include/spdlog/common.h rename to csrc/vnpu_offload/include/spdlog/common.h diff --git a/csrc/idle_offload/include/spdlog/details/backtracer-inl.h b/csrc/vnpu_offload/include/spdlog/details/backtracer-inl.h similarity index 100% rename from csrc/idle_offload/include/spdlog/details/backtracer-inl.h rename to csrc/vnpu_offload/include/spdlog/details/backtracer-inl.h diff --git a/csrc/idle_offload/include/spdlog/details/backtracer.h b/csrc/vnpu_offload/include/spdlog/details/backtracer.h similarity index 100% rename from csrc/idle_offload/include/spdlog/details/backtracer.h rename to csrc/vnpu_offload/include/spdlog/details/backtracer.h diff --git a/csrc/idle_offload/include/spdlog/details/circular_q.h b/csrc/vnpu_offload/include/spdlog/details/circular_q.h similarity index 100% rename from csrc/idle_offload/include/spdlog/details/circular_q.h rename to csrc/vnpu_offload/include/spdlog/details/circular_q.h diff --git a/csrc/idle_offload/include/spdlog/details/console_globals.h b/csrc/vnpu_offload/include/spdlog/details/console_globals.h similarity index 100% rename from csrc/idle_offload/include/spdlog/details/console_globals.h rename to csrc/vnpu_offload/include/spdlog/details/console_globals.h diff --git a/csrc/idle_offload/include/spdlog/details/file_helper-inl.h b/csrc/vnpu_offload/include/spdlog/details/file_helper-inl.h similarity index 100% rename from csrc/idle_offload/include/spdlog/details/file_helper-inl.h rename to csrc/vnpu_offload/include/spdlog/details/file_helper-inl.h diff --git a/csrc/idle_offload/include/spdlog/details/file_helper.h b/csrc/vnpu_offload/include/spdlog/details/file_helper.h similarity index 100% rename from csrc/idle_offload/include/spdlog/details/file_helper.h rename to csrc/vnpu_offload/include/spdlog/details/file_helper.h diff --git a/csrc/idle_offload/include/spdlog/details/fmt_helper.h b/csrc/vnpu_offload/include/spdlog/details/fmt_helper.h similarity index 100% rename from csrc/idle_offload/include/spdlog/details/fmt_helper.h rename to csrc/vnpu_offload/include/spdlog/details/fmt_helper.h diff --git a/csrc/idle_offload/include/spdlog/details/log_msg-inl.h b/csrc/vnpu_offload/include/spdlog/details/log_msg-inl.h similarity index 100% rename from csrc/idle_offload/include/spdlog/details/log_msg-inl.h rename to csrc/vnpu_offload/include/spdlog/details/log_msg-inl.h diff --git a/csrc/idle_offload/include/spdlog/details/log_msg.h b/csrc/vnpu_offload/include/spdlog/details/log_msg.h similarity index 100% rename from csrc/idle_offload/include/spdlog/details/log_msg.h rename to csrc/vnpu_offload/include/spdlog/details/log_msg.h diff --git a/csrc/idle_offload/include/spdlog/details/log_msg_buffer-inl.h b/csrc/vnpu_offload/include/spdlog/details/log_msg_buffer-inl.h similarity index 100% rename from csrc/idle_offload/include/spdlog/details/log_msg_buffer-inl.h rename to csrc/vnpu_offload/include/spdlog/details/log_msg_buffer-inl.h diff --git a/csrc/idle_offload/include/spdlog/details/log_msg_buffer.h b/csrc/vnpu_offload/include/spdlog/details/log_msg_buffer.h similarity index 100% rename from csrc/idle_offload/include/spdlog/details/log_msg_buffer.h rename to csrc/vnpu_offload/include/spdlog/details/log_msg_buffer.h diff --git a/csrc/idle_offload/include/spdlog/details/mpmc_blocking_q.h b/csrc/vnpu_offload/include/spdlog/details/mpmc_blocking_q.h similarity index 100% rename from csrc/idle_offload/include/spdlog/details/mpmc_blocking_q.h rename to csrc/vnpu_offload/include/spdlog/details/mpmc_blocking_q.h diff --git a/csrc/idle_offload/include/spdlog/details/null_mutex.h b/csrc/vnpu_offload/include/spdlog/details/null_mutex.h similarity index 100% rename from csrc/idle_offload/include/spdlog/details/null_mutex.h rename to csrc/vnpu_offload/include/spdlog/details/null_mutex.h diff --git a/csrc/idle_offload/include/spdlog/details/os-inl.h b/csrc/vnpu_offload/include/spdlog/details/os-inl.h similarity index 100% rename from csrc/idle_offload/include/spdlog/details/os-inl.h rename to csrc/vnpu_offload/include/spdlog/details/os-inl.h diff --git a/csrc/idle_offload/include/spdlog/details/os.h b/csrc/vnpu_offload/include/spdlog/details/os.h similarity index 100% rename from csrc/idle_offload/include/spdlog/details/os.h rename to csrc/vnpu_offload/include/spdlog/details/os.h diff --git a/csrc/idle_offload/include/spdlog/details/periodic_worker-inl.h b/csrc/vnpu_offload/include/spdlog/details/periodic_worker-inl.h similarity index 100% rename from csrc/idle_offload/include/spdlog/details/periodic_worker-inl.h rename to csrc/vnpu_offload/include/spdlog/details/periodic_worker-inl.h diff --git a/csrc/idle_offload/include/spdlog/details/periodic_worker.h b/csrc/vnpu_offload/include/spdlog/details/periodic_worker.h similarity index 100% rename from csrc/idle_offload/include/spdlog/details/periodic_worker.h rename to csrc/vnpu_offload/include/spdlog/details/periodic_worker.h diff --git a/csrc/idle_offload/include/spdlog/details/registry-inl.h b/csrc/vnpu_offload/include/spdlog/details/registry-inl.h similarity index 100% rename from csrc/idle_offload/include/spdlog/details/registry-inl.h rename to csrc/vnpu_offload/include/spdlog/details/registry-inl.h diff --git a/csrc/idle_offload/include/spdlog/details/registry.h b/csrc/vnpu_offload/include/spdlog/details/registry.h similarity index 100% rename from csrc/idle_offload/include/spdlog/details/registry.h rename to csrc/vnpu_offload/include/spdlog/details/registry.h diff --git a/csrc/idle_offload/include/spdlog/details/synchronous_factory.h b/csrc/vnpu_offload/include/spdlog/details/synchronous_factory.h similarity index 100% rename from csrc/idle_offload/include/spdlog/details/synchronous_factory.h rename to csrc/vnpu_offload/include/spdlog/details/synchronous_factory.h diff --git a/csrc/idle_offload/include/spdlog/details/tcp_client-windows.h b/csrc/vnpu_offload/include/spdlog/details/tcp_client-windows.h similarity index 100% rename from csrc/idle_offload/include/spdlog/details/tcp_client-windows.h rename to csrc/vnpu_offload/include/spdlog/details/tcp_client-windows.h diff --git a/csrc/idle_offload/include/spdlog/details/tcp_client.h b/csrc/vnpu_offload/include/spdlog/details/tcp_client.h similarity index 100% rename from csrc/idle_offload/include/spdlog/details/tcp_client.h rename to csrc/vnpu_offload/include/spdlog/details/tcp_client.h diff --git a/csrc/idle_offload/include/spdlog/details/thread_pool-inl.h b/csrc/vnpu_offload/include/spdlog/details/thread_pool-inl.h similarity index 100% rename from csrc/idle_offload/include/spdlog/details/thread_pool-inl.h rename to csrc/vnpu_offload/include/spdlog/details/thread_pool-inl.h diff --git a/csrc/idle_offload/include/spdlog/details/thread_pool.h b/csrc/vnpu_offload/include/spdlog/details/thread_pool.h similarity index 100% rename from csrc/idle_offload/include/spdlog/details/thread_pool.h rename to csrc/vnpu_offload/include/spdlog/details/thread_pool.h diff --git a/csrc/idle_offload/include/spdlog/details/udp_client-windows.h b/csrc/vnpu_offload/include/spdlog/details/udp_client-windows.h similarity index 100% rename from csrc/idle_offload/include/spdlog/details/udp_client-windows.h rename to csrc/vnpu_offload/include/spdlog/details/udp_client-windows.h diff --git a/csrc/idle_offload/include/spdlog/details/udp_client.h b/csrc/vnpu_offload/include/spdlog/details/udp_client.h similarity index 100% rename from csrc/idle_offload/include/spdlog/details/udp_client.h rename to csrc/vnpu_offload/include/spdlog/details/udp_client.h diff --git a/csrc/idle_offload/include/spdlog/details/windows_include.h b/csrc/vnpu_offload/include/spdlog/details/windows_include.h similarity index 100% rename from csrc/idle_offload/include/spdlog/details/windows_include.h rename to csrc/vnpu_offload/include/spdlog/details/windows_include.h diff --git a/csrc/idle_offload/include/spdlog/fmt/bin_to_hex.h b/csrc/vnpu_offload/include/spdlog/fmt/bin_to_hex.h similarity index 100% rename from csrc/idle_offload/include/spdlog/fmt/bin_to_hex.h rename to csrc/vnpu_offload/include/spdlog/fmt/bin_to_hex.h diff --git a/csrc/idle_offload/include/spdlog/fmt/bundled/args.h b/csrc/vnpu_offload/include/spdlog/fmt/bundled/args.h similarity index 100% rename from csrc/idle_offload/include/spdlog/fmt/bundled/args.h rename to csrc/vnpu_offload/include/spdlog/fmt/bundled/args.h diff --git a/csrc/idle_offload/include/spdlog/fmt/bundled/base.h b/csrc/vnpu_offload/include/spdlog/fmt/bundled/base.h similarity index 100% rename from csrc/idle_offload/include/spdlog/fmt/bundled/base.h rename to csrc/vnpu_offload/include/spdlog/fmt/bundled/base.h diff --git a/csrc/idle_offload/include/spdlog/fmt/bundled/chrono.h b/csrc/vnpu_offload/include/spdlog/fmt/bundled/chrono.h similarity index 100% rename from csrc/idle_offload/include/spdlog/fmt/bundled/chrono.h rename to csrc/vnpu_offload/include/spdlog/fmt/bundled/chrono.h diff --git a/csrc/idle_offload/include/spdlog/fmt/bundled/color.h b/csrc/vnpu_offload/include/spdlog/fmt/bundled/color.h similarity index 100% rename from csrc/idle_offload/include/spdlog/fmt/bundled/color.h rename to csrc/vnpu_offload/include/spdlog/fmt/bundled/color.h diff --git a/csrc/idle_offload/include/spdlog/fmt/bundled/compile.h b/csrc/vnpu_offload/include/spdlog/fmt/bundled/compile.h similarity index 100% rename from csrc/idle_offload/include/spdlog/fmt/bundled/compile.h rename to csrc/vnpu_offload/include/spdlog/fmt/bundled/compile.h diff --git a/csrc/idle_offload/include/spdlog/fmt/bundled/core.h b/csrc/vnpu_offload/include/spdlog/fmt/bundled/core.h similarity index 100% rename from csrc/idle_offload/include/spdlog/fmt/bundled/core.h rename to csrc/vnpu_offload/include/spdlog/fmt/bundled/core.h diff --git a/csrc/idle_offload/include/spdlog/fmt/bundled/fmt.license.rst b/csrc/vnpu_offload/include/spdlog/fmt/bundled/fmt.license.rst similarity index 100% rename from csrc/idle_offload/include/spdlog/fmt/bundled/fmt.license.rst rename to csrc/vnpu_offload/include/spdlog/fmt/bundled/fmt.license.rst diff --git a/csrc/idle_offload/include/spdlog/fmt/bundled/format-inl.h b/csrc/vnpu_offload/include/spdlog/fmt/bundled/format-inl.h similarity index 100% rename from csrc/idle_offload/include/spdlog/fmt/bundled/format-inl.h rename to csrc/vnpu_offload/include/spdlog/fmt/bundled/format-inl.h diff --git a/csrc/idle_offload/include/spdlog/fmt/bundled/format.h b/csrc/vnpu_offload/include/spdlog/fmt/bundled/format.h similarity index 100% rename from csrc/idle_offload/include/spdlog/fmt/bundled/format.h rename to csrc/vnpu_offload/include/spdlog/fmt/bundled/format.h diff --git a/csrc/idle_offload/include/spdlog/fmt/bundled/os.h b/csrc/vnpu_offload/include/spdlog/fmt/bundled/os.h similarity index 100% rename from csrc/idle_offload/include/spdlog/fmt/bundled/os.h rename to csrc/vnpu_offload/include/spdlog/fmt/bundled/os.h diff --git a/csrc/idle_offload/include/spdlog/fmt/bundled/ostream.h b/csrc/vnpu_offload/include/spdlog/fmt/bundled/ostream.h similarity index 100% rename from csrc/idle_offload/include/spdlog/fmt/bundled/ostream.h rename to csrc/vnpu_offload/include/spdlog/fmt/bundled/ostream.h diff --git a/csrc/idle_offload/include/spdlog/fmt/bundled/printf.h b/csrc/vnpu_offload/include/spdlog/fmt/bundled/printf.h similarity index 100% rename from csrc/idle_offload/include/spdlog/fmt/bundled/printf.h rename to csrc/vnpu_offload/include/spdlog/fmt/bundled/printf.h diff --git a/csrc/idle_offload/include/spdlog/fmt/bundled/ranges.h b/csrc/vnpu_offload/include/spdlog/fmt/bundled/ranges.h similarity index 100% rename from csrc/idle_offload/include/spdlog/fmt/bundled/ranges.h rename to csrc/vnpu_offload/include/spdlog/fmt/bundled/ranges.h diff --git a/csrc/idle_offload/include/spdlog/fmt/bundled/std.h b/csrc/vnpu_offload/include/spdlog/fmt/bundled/std.h similarity index 100% rename from csrc/idle_offload/include/spdlog/fmt/bundled/std.h rename to csrc/vnpu_offload/include/spdlog/fmt/bundled/std.h diff --git a/csrc/idle_offload/include/spdlog/fmt/bundled/xchar.h b/csrc/vnpu_offload/include/spdlog/fmt/bundled/xchar.h similarity index 100% rename from csrc/idle_offload/include/spdlog/fmt/bundled/xchar.h rename to csrc/vnpu_offload/include/spdlog/fmt/bundled/xchar.h diff --git a/csrc/idle_offload/include/spdlog/fmt/chrono.h b/csrc/vnpu_offload/include/spdlog/fmt/chrono.h similarity index 100% rename from csrc/idle_offload/include/spdlog/fmt/chrono.h rename to csrc/vnpu_offload/include/spdlog/fmt/chrono.h diff --git a/csrc/idle_offload/include/spdlog/fmt/compile.h b/csrc/vnpu_offload/include/spdlog/fmt/compile.h similarity index 100% rename from csrc/idle_offload/include/spdlog/fmt/compile.h rename to csrc/vnpu_offload/include/spdlog/fmt/compile.h diff --git a/csrc/idle_offload/include/spdlog/fmt/fmt.h b/csrc/vnpu_offload/include/spdlog/fmt/fmt.h similarity index 100% rename from csrc/idle_offload/include/spdlog/fmt/fmt.h rename to csrc/vnpu_offload/include/spdlog/fmt/fmt.h diff --git a/csrc/idle_offload/include/spdlog/fmt/ostr.h b/csrc/vnpu_offload/include/spdlog/fmt/ostr.h similarity index 100% rename from csrc/idle_offload/include/spdlog/fmt/ostr.h rename to csrc/vnpu_offload/include/spdlog/fmt/ostr.h diff --git a/csrc/idle_offload/include/spdlog/fmt/ranges.h b/csrc/vnpu_offload/include/spdlog/fmt/ranges.h similarity index 100% rename from csrc/idle_offload/include/spdlog/fmt/ranges.h rename to csrc/vnpu_offload/include/spdlog/fmt/ranges.h diff --git a/csrc/idle_offload/include/spdlog/fmt/std.h b/csrc/vnpu_offload/include/spdlog/fmt/std.h similarity index 100% rename from csrc/idle_offload/include/spdlog/fmt/std.h rename to csrc/vnpu_offload/include/spdlog/fmt/std.h diff --git a/csrc/idle_offload/include/spdlog/fmt/xchar.h b/csrc/vnpu_offload/include/spdlog/fmt/xchar.h similarity index 100% rename from csrc/idle_offload/include/spdlog/fmt/xchar.h rename to csrc/vnpu_offload/include/spdlog/fmt/xchar.h diff --git a/csrc/idle_offload/include/spdlog/formatter.h b/csrc/vnpu_offload/include/spdlog/formatter.h similarity index 100% rename from csrc/idle_offload/include/spdlog/formatter.h rename to csrc/vnpu_offload/include/spdlog/formatter.h diff --git a/csrc/idle_offload/include/spdlog/fwd.h b/csrc/vnpu_offload/include/spdlog/fwd.h similarity index 100% rename from csrc/idle_offload/include/spdlog/fwd.h rename to csrc/vnpu_offload/include/spdlog/fwd.h diff --git a/csrc/idle_offload/include/spdlog/logger-inl.h b/csrc/vnpu_offload/include/spdlog/logger-inl.h similarity index 100% rename from csrc/idle_offload/include/spdlog/logger-inl.h rename to csrc/vnpu_offload/include/spdlog/logger-inl.h diff --git a/csrc/idle_offload/include/spdlog/logger.h b/csrc/vnpu_offload/include/spdlog/logger.h similarity index 100% rename from csrc/idle_offload/include/spdlog/logger.h rename to csrc/vnpu_offload/include/spdlog/logger.h diff --git a/csrc/idle_offload/include/spdlog/mdc.h b/csrc/vnpu_offload/include/spdlog/mdc.h similarity index 100% rename from csrc/idle_offload/include/spdlog/mdc.h rename to csrc/vnpu_offload/include/spdlog/mdc.h diff --git a/csrc/idle_offload/include/spdlog/pattern_formatter-inl.h b/csrc/vnpu_offload/include/spdlog/pattern_formatter-inl.h similarity index 100% rename from csrc/idle_offload/include/spdlog/pattern_formatter-inl.h rename to csrc/vnpu_offload/include/spdlog/pattern_formatter-inl.h diff --git a/csrc/idle_offload/include/spdlog/pattern_formatter.h b/csrc/vnpu_offload/include/spdlog/pattern_formatter.h similarity index 100% rename from csrc/idle_offload/include/spdlog/pattern_formatter.h rename to csrc/vnpu_offload/include/spdlog/pattern_formatter.h diff --git a/csrc/idle_offload/include/spdlog/sinks/android_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/android_sink.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/android_sink.h rename to csrc/vnpu_offload/include/spdlog/sinks/android_sink.h diff --git a/csrc/idle_offload/include/spdlog/sinks/ansicolor_sink-inl.h b/csrc/vnpu_offload/include/spdlog/sinks/ansicolor_sink-inl.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/ansicolor_sink-inl.h rename to csrc/vnpu_offload/include/spdlog/sinks/ansicolor_sink-inl.h diff --git a/csrc/idle_offload/include/spdlog/sinks/ansicolor_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/ansicolor_sink.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/ansicolor_sink.h rename to csrc/vnpu_offload/include/spdlog/sinks/ansicolor_sink.h diff --git a/csrc/idle_offload/include/spdlog/sinks/base_sink-inl.h b/csrc/vnpu_offload/include/spdlog/sinks/base_sink-inl.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/base_sink-inl.h rename to csrc/vnpu_offload/include/spdlog/sinks/base_sink-inl.h diff --git a/csrc/idle_offload/include/spdlog/sinks/base_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/base_sink.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/base_sink.h rename to csrc/vnpu_offload/include/spdlog/sinks/base_sink.h diff --git a/csrc/idle_offload/include/spdlog/sinks/basic_file_sink-inl.h b/csrc/vnpu_offload/include/spdlog/sinks/basic_file_sink-inl.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/basic_file_sink-inl.h rename to csrc/vnpu_offload/include/spdlog/sinks/basic_file_sink-inl.h diff --git a/csrc/idle_offload/include/spdlog/sinks/basic_file_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/basic_file_sink.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/basic_file_sink.h rename to csrc/vnpu_offload/include/spdlog/sinks/basic_file_sink.h diff --git a/csrc/idle_offload/include/spdlog/sinks/callback_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/callback_sink.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/callback_sink.h rename to csrc/vnpu_offload/include/spdlog/sinks/callback_sink.h diff --git a/csrc/idle_offload/include/spdlog/sinks/daily_file_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/daily_file_sink.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/daily_file_sink.h rename to csrc/vnpu_offload/include/spdlog/sinks/daily_file_sink.h diff --git a/csrc/idle_offload/include/spdlog/sinks/dist_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/dist_sink.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/dist_sink.h rename to csrc/vnpu_offload/include/spdlog/sinks/dist_sink.h diff --git a/csrc/idle_offload/include/spdlog/sinks/dup_filter_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/dup_filter_sink.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/dup_filter_sink.h rename to csrc/vnpu_offload/include/spdlog/sinks/dup_filter_sink.h diff --git a/csrc/idle_offload/include/spdlog/sinks/hourly_file_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/hourly_file_sink.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/hourly_file_sink.h rename to csrc/vnpu_offload/include/spdlog/sinks/hourly_file_sink.h diff --git a/csrc/idle_offload/include/spdlog/sinks/kafka_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/kafka_sink.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/kafka_sink.h rename to csrc/vnpu_offload/include/spdlog/sinks/kafka_sink.h diff --git a/csrc/idle_offload/include/spdlog/sinks/mongo_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/mongo_sink.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/mongo_sink.h rename to csrc/vnpu_offload/include/spdlog/sinks/mongo_sink.h diff --git a/csrc/idle_offload/include/spdlog/sinks/msvc_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/msvc_sink.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/msvc_sink.h rename to csrc/vnpu_offload/include/spdlog/sinks/msvc_sink.h diff --git a/csrc/idle_offload/include/spdlog/sinks/null_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/null_sink.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/null_sink.h rename to csrc/vnpu_offload/include/spdlog/sinks/null_sink.h diff --git a/csrc/idle_offload/include/spdlog/sinks/ostream_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/ostream_sink.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/ostream_sink.h rename to csrc/vnpu_offload/include/spdlog/sinks/ostream_sink.h diff --git a/csrc/idle_offload/include/spdlog/sinks/qt_sinks.h b/csrc/vnpu_offload/include/spdlog/sinks/qt_sinks.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/qt_sinks.h rename to csrc/vnpu_offload/include/spdlog/sinks/qt_sinks.h diff --git a/csrc/idle_offload/include/spdlog/sinks/ringbuffer_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/ringbuffer_sink.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/ringbuffer_sink.h rename to csrc/vnpu_offload/include/spdlog/sinks/ringbuffer_sink.h diff --git a/csrc/idle_offload/include/spdlog/sinks/rotating_file_sink-inl.h b/csrc/vnpu_offload/include/spdlog/sinks/rotating_file_sink-inl.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/rotating_file_sink-inl.h rename to csrc/vnpu_offload/include/spdlog/sinks/rotating_file_sink-inl.h diff --git a/csrc/idle_offload/include/spdlog/sinks/rotating_file_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/rotating_file_sink.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/rotating_file_sink.h rename to csrc/vnpu_offload/include/spdlog/sinks/rotating_file_sink.h diff --git a/csrc/idle_offload/include/spdlog/sinks/sink-inl.h b/csrc/vnpu_offload/include/spdlog/sinks/sink-inl.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/sink-inl.h rename to csrc/vnpu_offload/include/spdlog/sinks/sink-inl.h diff --git a/csrc/idle_offload/include/spdlog/sinks/sink.h b/csrc/vnpu_offload/include/spdlog/sinks/sink.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/sink.h rename to csrc/vnpu_offload/include/spdlog/sinks/sink.h diff --git a/csrc/idle_offload/include/spdlog/sinks/stdout_color_sinks-inl.h b/csrc/vnpu_offload/include/spdlog/sinks/stdout_color_sinks-inl.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/stdout_color_sinks-inl.h rename to csrc/vnpu_offload/include/spdlog/sinks/stdout_color_sinks-inl.h diff --git a/csrc/idle_offload/include/spdlog/sinks/stdout_color_sinks.h b/csrc/vnpu_offload/include/spdlog/sinks/stdout_color_sinks.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/stdout_color_sinks.h rename to csrc/vnpu_offload/include/spdlog/sinks/stdout_color_sinks.h diff --git a/csrc/idle_offload/include/spdlog/sinks/stdout_sinks-inl.h b/csrc/vnpu_offload/include/spdlog/sinks/stdout_sinks-inl.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/stdout_sinks-inl.h rename to csrc/vnpu_offload/include/spdlog/sinks/stdout_sinks-inl.h diff --git a/csrc/idle_offload/include/spdlog/sinks/stdout_sinks.h b/csrc/vnpu_offload/include/spdlog/sinks/stdout_sinks.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/stdout_sinks.h rename to csrc/vnpu_offload/include/spdlog/sinks/stdout_sinks.h diff --git a/csrc/idle_offload/include/spdlog/sinks/syslog_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/syslog_sink.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/syslog_sink.h rename to csrc/vnpu_offload/include/spdlog/sinks/syslog_sink.h diff --git a/csrc/idle_offload/include/spdlog/sinks/systemd_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/systemd_sink.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/systemd_sink.h rename to csrc/vnpu_offload/include/spdlog/sinks/systemd_sink.h diff --git a/csrc/idle_offload/include/spdlog/sinks/tcp_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/tcp_sink.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/tcp_sink.h rename to csrc/vnpu_offload/include/spdlog/sinks/tcp_sink.h diff --git a/csrc/idle_offload/include/spdlog/sinks/udp_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/udp_sink.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/udp_sink.h rename to csrc/vnpu_offload/include/spdlog/sinks/udp_sink.h diff --git a/csrc/idle_offload/include/spdlog/sinks/win_eventlog_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/win_eventlog_sink.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/win_eventlog_sink.h rename to csrc/vnpu_offload/include/spdlog/sinks/win_eventlog_sink.h diff --git a/csrc/idle_offload/include/spdlog/sinks/wincolor_sink-inl.h b/csrc/vnpu_offload/include/spdlog/sinks/wincolor_sink-inl.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/wincolor_sink-inl.h rename to csrc/vnpu_offload/include/spdlog/sinks/wincolor_sink-inl.h diff --git a/csrc/idle_offload/include/spdlog/sinks/wincolor_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/wincolor_sink.h similarity index 100% rename from csrc/idle_offload/include/spdlog/sinks/wincolor_sink.h rename to csrc/vnpu_offload/include/spdlog/sinks/wincolor_sink.h diff --git a/csrc/idle_offload/include/spdlog/spdlog-inl.h b/csrc/vnpu_offload/include/spdlog/spdlog-inl.h similarity index 100% rename from csrc/idle_offload/include/spdlog/spdlog-inl.h rename to csrc/vnpu_offload/include/spdlog/spdlog-inl.h diff --git a/csrc/idle_offload/include/spdlog/spdlog.h b/csrc/vnpu_offload/include/spdlog/spdlog.h similarity index 100% rename from csrc/idle_offload/include/spdlog/spdlog.h rename to csrc/vnpu_offload/include/spdlog/spdlog.h diff --git a/csrc/idle_offload/include/spdlog/stopwatch.h b/csrc/vnpu_offload/include/spdlog/stopwatch.h similarity index 100% rename from csrc/idle_offload/include/spdlog/stopwatch.h rename to csrc/vnpu_offload/include/spdlog/stopwatch.h diff --git a/csrc/idle_offload/include/spdlog/tweakme.h b/csrc/vnpu_offload/include/spdlog/tweakme.h similarity index 100% rename from csrc/idle_offload/include/spdlog/tweakme.h rename to csrc/vnpu_offload/include/spdlog/tweakme.h diff --git a/csrc/idle_offload/include/spdlog/version.h b/csrc/vnpu_offload/include/spdlog/version.h similarity index 100% rename from csrc/idle_offload/include/spdlog/version.h rename to csrc/vnpu_offload/include/spdlog/version.h diff --git a/csrc/idle_offload/npu_helper.h b/csrc/vnpu_offload/npu_helper.h similarity index 95% rename from csrc/idle_offload/npu_helper.h rename to csrc/vnpu_offload/npu_helper.h index 7280b64..a61eaea 100644 --- a/csrc/idle_offload/npu_helper.h +++ b/csrc/vnpu_offload/npu_helper.h @@ -8,7 +8,7 @@ #include "acl/acl.h" -std::vector get_available_devices() { +static inline std::vector get_available_devices() { namespace fs = std::filesystem; std::vector devices; @@ -46,7 +46,7 @@ std::vector get_available_devices() { return devices; } -std::vector get_npu_ids() { +static inline std::vector get_npu_ids() { std::vector available_devices = get_available_devices(); std::vector npu_ids; uint32_t device_count = 0; diff --git a/csrc/idle_offload/shm_helper.h b/csrc/vnpu_offload/shm_helper.h similarity index 92% rename from csrc/idle_offload/shm_helper.h rename to csrc/vnpu_offload/shm_helper.h index 74aa979..f6d9b3c 100644 --- a/csrc/idle_offload/shm_helper.h +++ b/csrc/vnpu_offload/shm_helper.h @@ -10,23 +10,21 @@ #include #include #include -#include #include #include -#include #include "spdlog/spdlog.h" #define MAX_WORKERS 60 #define MAX_DEVICES 16 -// static constexpr const char *SHM_NAME = "/vllm_acl_vnpu_offload_shm"; + static inline std::string get_shm_name() { - const char *env_shm_name = getenv("VLLM_IDLE_OFFLOAD_SHM_NAME"); + const char *env_shm_name = getenv("VLLM_VNPU_SHM_NAME"); if (env_shm_name) { if (env_shm_name[0] != '/') { spdlog::error( - "The shm name specified by VLLM_IDLE_OFFLOAD_SHM_NAME should start " + "The shm name specified by VLLM_VNPU_SHM_NAME should start " "with '/'"); exit(-1); } diff --git a/csrc/idle_offload/shm_manager.cpp b/csrc/vnpu_offload/shm_manager.cpp similarity index 93% rename from csrc/idle_offload/shm_manager.cpp rename to csrc/vnpu_offload/shm_manager.cpp index bfeea96..cb2e99f 100644 --- a/csrc/idle_offload/shm_manager.cpp +++ b/csrc/vnpu_offload/shm_manager.cpp @@ -67,7 +67,6 @@ void ShmManager::run_busy_loop() { spdlog::info("ShmManager busy loop stopped"); } - void ShmManager::process_requests() { uint64_t req_status = shm_helper->req_ready.load(std::memory_order_acquire); if (req_status == ShmHelper::READY_STATE_REQUEST_READY) { @@ -167,6 +166,15 @@ void ShmManager::check_heart_beats() { std::memory_order_release); } } + // check request lock + if (shm_helper->req_ready.load(std::memory_order_acquire) != + ShmHelper::READY_STATE_NO_REQUEST && + shm_helper->request.tgid == tgid) { + spdlog::warn("Releasing request lock held by dead worker TGID {}", + tgid); + shm_helper->req_ready.store(ShmHelper::READY_STATE_NO_REQUEST, + std::memory_order_release); + } local_worker_tgids[i] = 0; alive_worker_tgids.erase(std::remove(alive_worker_tgids.begin(), alive_worker_tgids.end(), tgid), diff --git a/csrc/idle_offload/shm_manager.h b/csrc/vnpu_offload/shm_manager.h similarity index 100% rename from csrc/idle_offload/shm_manager.h rename to csrc/vnpu_offload/shm_manager.h diff --git a/csrc/idle_offload/shm_worker.cpp b/csrc/vnpu_offload/shm_worker.cpp similarity index 99% rename from csrc/idle_offload/shm_worker.cpp rename to csrc/vnpu_offload/shm_worker.cpp index e1dbc25..3a73a78 100644 --- a/csrc/idle_offload/shm_worker.cpp +++ b/csrc/vnpu_offload/shm_worker.cpp @@ -20,7 +20,6 @@ ShmWorker::ShmWorker() { shm_helper = static_cast(ptr); } - ShmWorker::~ShmWorker() { stop_heart_beat.store(true, std::memory_order_release); heart_beat_thread.join(); diff --git a/csrc/idle_offload/shm_worker.h b/csrc/vnpu_offload/shm_worker.h similarity index 100% rename from csrc/idle_offload/shm_worker.h rename to csrc/vnpu_offload/shm_worker.h diff --git a/csrc/idle_offload/offload_daemon.cpp b/csrc/vnpu_offload/vnpu_daemon.cpp similarity index 86% rename from csrc/idle_offload/offload_daemon.cpp rename to csrc/vnpu_offload/vnpu_daemon.cpp index 8a4536d..5a29d4a 100644 --- a/csrc/idle_offload/offload_daemon.cpp +++ b/csrc/vnpu_offload/vnpu_daemon.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include "acl/acl.h" @@ -17,8 +18,6 @@ #include "spdlog/spdlog.h" -static constexpr size_t reserved_mem_size = 8ul * 1024 * 1024 * 1024; // 8GB - static ShmManager *shm_manager = nullptr; void handle_signal(int sig) { @@ -28,14 +27,34 @@ void handle_signal(int sig) { } void install_signal_handlers() { - struct sigaction sa{}; - sa.sa_handler = handle_signal; - sigemptyset(&sa.sa_mask); - sa.sa_flags = 0; + struct sigaction sa{}; + sa.sa_handler = handle_signal; + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; - sigaction(SIGINT, &sa, nullptr); - sigaction(SIGTERM, &sa, nullptr); - sigaction(SIGHUP, &sa, nullptr); + sigaction(SIGINT, &sa, nullptr); + sigaction(SIGTERM, &sa, nullptr); + sigaction(SIGHUP, &sa, nullptr); +} + +size_t get_reserved_vram_size() { + static std::once_flag flag; + static size_t reserved_vram_size = 8ul * 1024 * 1024 * 1024; // default 8GB + + std::call_once(flag, []() { + const char *env_p = std::getenv("VNPU_RESERVED_VRAM_SIZE_GB"); + if (env_p) { + try { + size_t size_gb = std::stoul(env_p); + reserved_vram_size = size_gb * 1024 * 1024 * 1024; + } catch (const std::exception &e) { + spdlog::warn("Failed to parse VNPU_RESERVED_VRAM_SIZE_GB: {}, using " + "default 8GB", + e.what()); + } + } + }); + return reserved_vram_size; } void ensure_context(unsigned long long device) { @@ -113,6 +132,7 @@ void alloc_physical(uint32_t device_id, aclrtDrvMemHandle &out_mem_handle, spdlog::info("aclrtMemGetAllocationGranularity succeeded, granularity: {}", granularity); } + size_t reserved_mem_size = get_reserved_vram_size(); if (free_mem < reserved_mem_size) { spdlog::error("Not enough free memory to reserve: {}, free_mem: {}", reserved_mem_size, free_mem); diff --git a/vllm_ascend/device_allocator/camem.py b/vllm_ascend/device_allocator/camem.py index f6f451b..1890839 100644 --- a/vllm_ascend/device_allocator/camem.py +++ b/vllm_ascend/device_allocator/camem.py @@ -58,7 +58,7 @@ def find_loaded_library(lib_name) -> Optional[str]: camem_available = False try: - if envs_ascend.VLLM_ASCEND_ENABLE_IDLE_OFFLOAD: + if envs_ascend.VLLM_ASCEND_ENABLE_VNPU: from vllm_ascend.vllm_ascend_C import ( # type: ignore # noqa: F401 init_module_offload as init_module, python_create_and_map_offload as python_create_and_map,python_unmap_and_release_offload as python_unmap_and_release, @@ -109,7 +109,7 @@ def get_pluggable_allocator( python_malloc_fn: Callable[[tuple[int, int, int, int]], None], python_free_func: Callable[[int], tuple[int, int, int, int]] ) -> torch.npu.memory.NPUPluggableAllocator: - if envs_ascend.VLLM_ASCEND_ENABLE_IDLE_OFFLOAD: + if envs_ascend.VLLM_ASCEND_ENABLE_VNPU: current_device = torch.npu.current_device() init_module(python_malloc_fn, python_free_func, current_device) new_alloc = torch.npu.memory.NPUPluggableAllocator( @@ -281,7 +281,7 @@ class CaMemAllocator: # see https://github.com/pytorch/pytorch/issues/146431 . self.allocator_and_pools[tag] = data # lock gpu - if envs_ascend.VLLM_ASCEND_ENABLE_IDLE_OFFLOAD: + if envs_ascend.VLLM_ASCEND_ENABLE_VNPU: self._vnpu_lock_gpu() yield # PyTorch's bug, calling torch.cuda.empty_cache() will error @@ -294,7 +294,7 @@ class CaMemAllocator: # allocate memory. # TODO: we need to find a way to release the memory, # i.e. calling torch.cuda.empty_cache() - if envs_ascend.VLLM_ASCEND_ENABLE_IDLE_OFFLOAD: + if envs_ascend.VLLM_ASCEND_ENABLE_VNPU: self.vnpu_unlock_gpu() self.current_tag = old_tag @@ -321,12 +321,11 @@ class CaMemAllocator: return True time.sleep(0.001) - def vnpu_unlock_gpu(self): if python_unlock_gpu: python_unlock_gpu() - def get_pool_mem_info(self) -> int: + def get_pool_mem_info(self) -> tuple[int, int]: """ get available memory in reserved pool.""" return python_get_mem_info() diff --git a/vllm_ascend/envs.py b/vllm_ascend/envs.py index 50bd6c3..8672995 100644 --- a/vllm_ascend/envs.py +++ b/vllm_ascend/envs.py @@ -167,7 +167,7 @@ env_variables: Dict[str, Callable[[], Any]] = { # Whether to enable transpose weight and cast format to FRACTAL_NZ. "VLLM_ASCEND_ENABLE_NZ": lambda: int(os.getenv("VLLM_ASCEND_ENABLE_NZ", 0)), - "VLLM_ASCEND_ENABLE_IDLE_OFFLOAD": lambda: int(os.getenv("VLLM_ASCEND_ENABLE_IDLE_OFFLOAD", 1)), + "VLLM_ASCEND_ENABLE_VNPU": lambda: int(os.getenv("VLLM_ASCEND_ENABLE_VNPU", 1)), } # end-env-vars-definition diff --git a/vllm_ascend/patch/platform/patch_core.py b/vllm_ascend/patch/platform/patch_core.py index d3e9bcd..fa32028 100644 --- a/vllm_ascend/patch/platform/patch_core.py +++ b/vllm_ascend/patch/platform/patch_core.py @@ -84,12 +84,12 @@ def run_busy_loop(self): # 1) Poll the input queue until there is work to do. self._process_input_queue() # 2) Step the engine core and return the outputs. - if envs_ascend.VLLM_ASCEND_ENABLE_IDLE_OFFLOAD and self.scheduler.has_requests() and self.model_executor.is_offloaded: + if envs_ascend.VLLM_ASCEND_ENABLE_VNPU and self.scheduler.has_requests() and self.model_executor.is_offloaded: prev_is_self = self.model_executor.reload_vram() if not prev_is_self: self.reset_prefix_cache() self._process_engine_step() - if envs_ascend.VLLM_ASCEND_ENABLE_IDLE_OFFLOAD and not self.scheduler.has_requests() and not self.model_executor.is_offloaded: + if envs_ascend.VLLM_ASCEND_ENABLE_VNPU and not self.scheduler.has_requests() and not self.model_executor.is_offloaded: self.model_executor.offload_vram() def _process_input_queue(self): @@ -101,7 +101,7 @@ def _process_input_queue(self): if logger.isEnabledFor(DEBUG) and self.input_queue.empty(): logger.debug("EngineCore waiting for work.") waited = True - if envs_ascend.VLLM_ASCEND_ENABLE_IDLE_OFFLOAD and not self.model_executor.is_offloaded: + if envs_ascend.VLLM_ASCEND_ENABLE_VNPU and not self.model_executor.is_offloaded: self.model_executor.offload_vram() req = self.input_queue.get() self._handle_client_request(*req) @@ -128,10 +128,10 @@ def _initialize_kv_caches( has_kv_cache = any(kv_cache_spec for kv_cache_spec in kv_cache_specs) if has_kv_cache: - if envs_ascend.VLLM_ASCEND_ENABLE_IDLE_OFFLOAD: + if envs_ascend.VLLM_ASCEND_ENABLE_VNPU: # get available memory in idle offload mode available_gpu_memory = ( - self.model_executor.determine_available_memory_idle_offload_mode()) + self.model_executor.determine_available_memory_vnpu_offload_mode()) self.available_gpu_memory_for_kv_cache = \ available_gpu_memory[0] elif os.environ.get("VLLM_ELASTIC_EP_SCALE_UP_LAUNCH") == "1": diff --git a/vllm_ascend/patch/platform/patch_executor.py b/vllm_ascend/patch/platform/patch_executor.py index e3a6a32..ace312c 100644 --- a/vllm_ascend/patch/platform/patch_executor.py +++ b/vllm_ascend/patch/platform/patch_executor.py @@ -43,11 +43,11 @@ def reload_vram(self) -> bool: time.sleep(0.001) -def determine_available_memory_idle_offload_mode(self) -> int: - return self.collective_rpc("determine_available_memory_idle_offload_mode") +def determine_available_memory_vnpu_offload_mode(self) -> int: + return self.collective_rpc("determine_available_memory_vnpu_offload_mode") ExecutorBase.__init__ = init ExecutorBase.offload_vram = offload_vram ExecutorBase.reload_vram = reload_vram -ExecutorBase.determine_available_memory_idle_offload_mode = determine_available_memory_idle_offload_mode +ExecutorBase.determine_available_memory_vnpu_offload_mode = determine_available_memory_vnpu_offload_mode diff --git a/vllm_ascend/worker/worker_v1.py b/vllm_ascend/worker/worker_v1.py index 1c1ab34..e38e16f 100644 --- a/vllm_ascend/worker/worker_v1.py +++ b/vllm_ascend/worker/worker_v1.py @@ -258,7 +258,7 @@ class NPUWorker(WorkerBase): ) return available_kv_cache_memory - def determine_available_memory_idle_offload_mode(self) -> int: + def determine_available_memory_vnpu_offload_mode(self) -> int: allocator = CaMemAllocator.get_instance() free, total = allocator.get_pool_mem_info() available_kv_cache_memory = int( @@ -317,7 +317,7 @@ class NPUWorker(WorkerBase): "Sleep mode can only be " "used for one instance per process.") context = allocator.use_memory_pool(tag="weights") - elif envs_ascend.VLLM_ASCEND_ENABLE_IDLE_OFFLOAD: + elif envs_ascend.VLLM_ASCEND_ENABLE_VNPU: if not sleep_mode_enabled(): raise ValueError( "Sleep mode is not enabled. Please compile vllm-ascend with COMPILE_CUSTOM_KERNELS=1." @@ -336,7 +336,7 @@ class NPUWorker(WorkerBase): context = nullcontext() # type: ignore with context: self.model_runner.load_model() - if envs_ascend.VLLM_ASCEND_ENABLE_IDLE_OFFLOAD: + if envs_ascend.VLLM_ASCEND_ENABLE_VNPU: # save memory to host with lock self.offload_vram() succ, _ = self.try_reload_vram() @@ -402,7 +402,7 @@ class NPUWorker(WorkerBase): if self.vllm_config.model_config.enable_sleep_mode: allocator = CaMemAllocator.get_instance() context = allocator.use_memory_pool(tag="kv_cache") - elif envs_ascend.VLLM_ASCEND_ENABLE_IDLE_OFFLOAD: + elif envs_ascend.VLLM_ASCEND_ENABLE_VNPU: allocator = CaMemAllocator.get_instance() context = allocator.use_memory_pool(tag="kv_cache") else: