diff --git a/CMakeLists.txt b/CMakeLists.txt
index 811ad5f8..3235facd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -96,10 +96,12 @@ message("TORCH_NPU_PATH is ${TORCH_NPU_PATH}")
if(SOC_VERSION MATCHES "ascend310p.*")
file(GLOB VLLM_ASCEND_SRC
${CMAKE_CURRENT_SOURCE_DIR}/csrc/*.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/csrc/vnpu_offload/shm_worker.cpp
${CMAKE_CURRENT_SOURCE_DIR}/csrc/aclnn_torch_adapter/*.cpp)
else()
file(GLOB VLLM_ASCEND_SRC
${CMAKE_CURRENT_SOURCE_DIR}/csrc/*.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/csrc/vnpu_offload/shm_worker.cpp
${CMAKE_CURRENT_SOURCE_DIR}/csrc/aclnn_torch_adapter/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/csrc/batch_matmul_transpose/op_host/tiling/tiling_data.cpp)
endif()
@@ -113,6 +115,7 @@ include_directories(
${ASCEND_HOME_PATH}/aarch64-linux/include/experiment/platform
${ASCEND_HOME_PATH}/x86_64-linux/include/experiment/platform
${CMAKE_CURRENT_SOURCE_DIR}/csrc/batch_matmul_transpose/op_host
+ ${CMAKE_CURRENT_SOURCE_DIR}/csrc/vnpu_offload/include
)
set(
diff --git a/Dockerfile b/Dockerfile
index 0afde8f8..c164951a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -65,6 +65,8 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
source /usr/local/Ascend/nnal/atb/set_env.sh && \
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/$(uname -i)-linux/devlib && \
\
+ cd /vllm-workspace/vllm-ascend/csrc/vnpu_offload && \
+ make install && make clean && \
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
\
if [ "$(uname -i)" = "x86_64" ]; then python3 -m pip uninstall -y triton; fi && \
@@ -84,6 +86,9 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/${PTA_WHEEL}" && \
python3 -m pip cache purge
+ENV VLLM_WORKER_MULTIPROC_METHOD=spawn \
+ VLLM_ASCEND_ENABLE_VNPU=1
+
# Install clang-15 (for triton-ascend)
RUN apt-get update -y && \
apt-get -y install clang-15 && \
diff --git a/Dockerfile.310p b/Dockerfile.310p
index a275c798..b9c5b8ba 100644
--- a/Dockerfile.310p
+++ b/Dockerfile.310p
@@ -54,6 +54,8 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
source /usr/local/Ascend/nnal/atb/set_env.sh && \
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
+ cd /vllm-workspace/vllm-ascend/csrc/vnpu_offload && \
+ make install && make clean && \
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
if [ "$(uname -i)" = "x86_64" ]; then python3 -m pip uninstall -y triton; fi && \
PYTHON_TAG=$(python3 -c "import sys; print(f'cp{sys.version_info.major}{sys.version_info.minor}')") && \
@@ -68,6 +70,9 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/${PTA_WHEEL}" && \
python3 -m pip cache purge
+ENV VLLM_WORKER_MULTIPROC_METHOD=spawn \
+ VLLM_ASCEND_ENABLE_VNPU=1
+
# Install modelscope (for fast download) and ray (for multinode)
RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
python3 -m pip cache purge
diff --git a/Dockerfile.310p.openEuler b/Dockerfile.310p.openEuler
index 0c95f133..edb155b8 100644
--- a/Dockerfile.310p.openEuler
+++ b/Dockerfile.310p.openEuler
@@ -50,6 +50,8 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
source /usr/local/Ascend/nnal/atb/set_env.sh && \
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/`uname -i`-openEuler-linux && \
+ cd /vllm-workspace/vllm-ascend/csrc/vnpu_offload && \
+ make install && make clean && \
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
if [ "$(uname -i)" = "x86_64" ]; then python3 -m pip uninstall -y triton; fi && \
PYTHON_TAG=$(python3 -c "import sys; print(f'cp{sys.version_info.major}{sys.version_info.minor}')") && \
@@ -64,6 +66,9 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/${PTA_WHEEL}" && \
python3 -m pip cache purge
+ENV VLLM_WORKER_MULTIPROC_METHOD=spawn \
+ VLLM_ASCEND_ENABLE_VNPU=1
+
# Install modelscope (for fast download) and ray (for multinode)
RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
python3 -m pip cache purge
diff --git a/Dockerfile.a3 b/Dockerfile.a3
index 2c31d878..2bf2d95b 100644
--- a/Dockerfile.a3
+++ b/Dockerfile.a3
@@ -64,6 +64,8 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
source /usr/local/Ascend/nnal/atb/set_env.sh && \
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/$(uname -i)-linux/devlib && \
\
+ cd /vllm-workspace/vllm-ascend/csrc/vnpu_offload && \
+ make install && make clean && \
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
\
if [ "$(uname -i)" = "x86_64" ]; then python3 -m pip uninstall -y triton; fi && \
@@ -83,6 +85,9 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/${PTA_WHEEL}" && \
python3 -m pip cache purge
+ENV VLLM_WORKER_MULTIPROC_METHOD=spawn \
+ VLLM_ASCEND_ENABLE_VNPU=1
+
# Install clang-15 (for triton-ascend)
RUN apt-get update -y && \
apt-get -y install clang-15 && \
diff --git a/Dockerfile.a3.openEuler b/Dockerfile.a3.openEuler
index 4ed461a4..4c511e5c 100644
--- a/Dockerfile.a3.openEuler
+++ b/Dockerfile.a3.openEuler
@@ -65,6 +65,8 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/$(uname -i)-linux/devlib && \
export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/$(uname -i)-openEuler-linux && \
\
+ cd /vllm-workspace/vllm-ascend/csrc/vnpu_offload && \
+ make install && make clean && \
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
\
if [ "$(uname -i)" = "x86_64" ]; then python3 -m pip uninstall -y triton; fi && \
@@ -84,6 +86,9 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/${PTA_WHEEL}" && \
python3 -m pip cache purge
+ENV VLLM_WORKER_MULTIPROC_METHOD=spawn \
+ VLLM_ASCEND_ENABLE_VNPU=1
+
# Install clang (for triton-ascend)
RUN yum update -y && \
yum install -y clang && \
diff --git a/Dockerfile.openEuler b/Dockerfile.openEuler
index 4bb86318..2c815f53 100644
--- a/Dockerfile.openEuler
+++ b/Dockerfile.openEuler
@@ -65,6 +65,8 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/$(uname -i)-linux/devlib && \
export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/$(uname -i)-openEuler-linux && \
\
+ cd /vllm-workspace/vllm-ascend/csrc/vnpu_offload && \
+ make install && make clean && \
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
\
if [ "$(uname -i)" = "x86_64" ]; then python3 -m pip uninstall -y triton; fi && \
@@ -84,6 +86,9 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/${PTA_WHEEL}" && \
python3 -m pip cache purge
+ENV VLLM_WORKER_MULTIPROC_METHOD=spawn \
+ VLLM_ASCEND_ENABLE_VNPU=1
+
# Install clang (for triton-ascend)
RUN yum update -y && \
yum install -y clang && \
diff --git a/README-vllm-ascend.md b/README-vllm-ascend.md
new file mode 100644
index 00000000..fce5d824
--- /dev/null
+++ b/README-vllm-ascend.md
@@ -0,0 +1,106 @@
+
+
+
+
+
+
+
+
+vLLM Ascend Plugin
+
+
+
+
+[](https://deepwiki.com/vllm-project/vllm-ascend)
+
+
+
+
+| About Ascend | Documentation | #SIG-Ascend | Users Forum | Weekly Meeting |
+
+
+
+English | 中文
+
+
+---
+*Latest News* 🔥
+
+- [2026/02] We released the new official version [v0.13.0](https://github.com/vllm-project/vllm-ascend/releases/tag/v0.13.0)! Please follow the [official guide](https://docs.vllm.ai/projects/ascend/en/v0.13.0/) to start using vLLM Ascend Plugin on Ascend.
+- [2025/12] We released the new official version [v0.11.0](https://github.com/vllm-project/vllm-ascend/releases/tag/v0.11.0)! Please follow the [official guide](https://docs.vllm.ai/projects/ascend/en/v0.11.0/) to start using vLLM Ascend Plugin on Ascend.
+- [2025/09] We released the new official version [v0.9.1](https://github.com/vllm-project/vllm-ascend/releases/tag/v0.9.1)! Please follow the [official guide](https://docs.vllm.ai/projects/ascend/en/v0.9.1/tutorials/large_scale_ep.html) to start deploying large-scale Expert Parallelism (EP) on Ascend.
+- [2025/08] We hosted the [vLLM Beijing Meetup](https://mp.weixin.qq.com/s/7n8OYNrCC_I9SJaybHA_-Q) with vLLM and Tencent! Please find the meetup slides [here](https://drive.google.com/drive/folders/1Pid6NSFLU43DZRi0EaTcPgXsAzDvbBqF).
+- [2025/06] [User stories](https://docs.vllm.ai/projects/ascend/en/latest/community/user_stories/index.html) page is now live! It kicks off with LLaMA-Factory/verl/TRL/GPUStack to demonstrate how vLLM Ascend assists Ascend users in enhancing their experience across fine-tuning, evaluation, reinforcement learning (RL), and deployment scenarios.
+- [2025/06] [Contributors](https://docs.vllm.ai/projects/ascend/en/latest/community/contributors.html) page is now live! All contributions deserve to be recorded, thanks for all contributors.
+- [2025/05] We've released the first official version [v0.7.3](https://github.com/vllm-project/vllm-ascend/releases/tag/v0.7.3)! We collaborated with the vLLM community to publish a blog post sharing our practice: [Introducing vLLM Hardware Plugin, Best Practice from Ascend NPU](https://blog.vllm.ai/2025/05/12/hardware-plugin.html).
+- [2025/03] We hosted the [vLLM Beijing Meetup](https://mp.weixin.qq.com/s/VtxO9WXa5fC-mKqlxNUJUQ) with vLLM team! Please find the meetup slides [here](https://drive.google.com/drive/folders/1Pid6NSFLU43DZRi0EaTcPgXsAzDvbBqF).
+- [2025/02] vLLM community officially created [vllm-project/vllm-ascend](https://github.com/vllm-project/vllm-ascend) repo for running vLLM seamlessly on the Ascend NPU.
+- [2024/12] We are working with the vLLM community to support [[RFC]: Hardware pluggable](https://github.com/vllm-project/vllm/issues/11162).
+
+---
+
+## Overview
+
+vLLM Ascend (`vllm-ascend`) is a community maintained hardware plugin for running vLLM seamlessly on the Ascend NPU.
+
+It is the recommended approach for supporting the Ascend backend within the vLLM community. It adheres to the principles outlined in the [[RFC]: Hardware pluggable](https://github.com/vllm-project/vllm/issues/11162), providing a hardware-pluggable interface that decouples the integration of the Ascend NPU with vLLM.
+
+By using vLLM Ascend plugin, popular open-source models, including Transformer-like, Mixture-of-Experts (MoE), Embedding, Multi-modal LLMs can run seamlessly on the Ascend NPU.
+
+## Prerequisites
+
+- Hardware: Atlas 800I A2 Inference series, Atlas A2 Training series, Atlas 800I A3 Inference series, Atlas A3 Training series, Atlas 300I Duo (Experimental)
+- OS: Linux
+- Software:
+ - Python >= 3.10, < 3.12
+ - CANN == 8.5.0 (Ascend HDK version refers to [here](https://www.hiascend.com/document/detail/zh/canncommercial/83RC2/releasenote/releasenote_0000.html))
+ - PyTorch == 2.9.0, torch-npu == 2.9.0
+ - vLLM (the same version as vllm-ascend)
+
+## Getting Started
+
+Please use the following recommended versions to get started quickly:
+
+| Version | Release type | Doc |
+|------------|--------------|--------------------------------------|
+| v0.17.0rc1 | Latest release candidate | See [QuickStart](https://docs.vllm.ai/projects/ascend/en/latest/quick_start.html) and [Installation](https://docs.vllm.ai/projects/ascend/en/latest/installation.html) for more details |
+| v0.13.0 | Latest stable version | See [QuickStart](https://docs.vllm.ai/projects/ascend/en/v0.13.0/quick_start.html) and [Installation](https://docs.vllm.ai/projects/ascend/en/v0.13.0/installation.html) for more details |
+
+## Contributing
+
+See [CONTRIBUTING](https://docs.vllm.ai/projects/ascend/en/latest/developer_guide/contribution/index.html) for more details, which is a step-by-step guide to help you set up the development environment, build and test.
+
+We welcome and value any contributions and collaborations:
+
+- Please let us know if you encounter a bug by [filing an issue](https://github.com/vllm-project/vllm-ascend/issues)
+- Please use [User forum](https://discuss.vllm.ai/c/hardware-support/vllm-ascend-support) for usage questions and help.
+
+## Branch
+
+vllm-ascend has a main branch and a dev branch.
+
+- **main**: main branch, corresponds to the vLLM main branch, and is continuously monitored for quality through Ascend CI.
+- **releases/vX.Y.Z**: development branch, created alongside new releases of vLLM. For example, `releases/v0.13.0` is the dev branch for vLLM `v0.13.0` version.
+
+Below are the maintained branches:
+
+| Branch | Status | Note |
+|------------|--------------|--------------------------------------|
+| main | Maintained | CI commitment for vLLM main branch and vLLM v0.17.0 tag |
+| v0.7.1-dev | Unmaintained | Only doc fixes are allowed |
+| v0.7.3-dev | Maintained | CI commitment for vLLM 0.7.3 version, only bug fixes are allowed, and no new release tags anymore. |
+| v0.9.1-dev | Maintained | CI commitment for vLLM 0.9.1 version |
+| v0.11.0-dev | Maintained | CI commitment for vLLM 0.11.0 version |
+| releases/v0.13.0 | Maintained | CI commitment for vLLM 0.13.0 version |
+| rfc/feature-name | Maintained | [Feature branches](https://docs.vllm.ai/projects/ascend/en/latest/community/versioning_policy.html#feature-branches) for collaboration |
+
+Please refer to [Versioning policy](https://docs.vllm.ai/projects/ascend/en/latest/community/versioning_policy.html) for more details.
+
+## Weekly Meeting
+
+- vLLM Ascend Weekly Meeting:
+- Wednesday, 15:00 - 16:00 (UTC+8, [Convert to your timezone](https://dateful.com/convert/gmt8?t=15))
+
+## License
+
+Apache License 2.0, as found in the [LICENSE](./LICENSE) file.
diff --git a/README.zh.md b/README-vllm-ascend.zh.md
similarity index 100%
rename from README.zh.md
rename to README-vllm-ascend.zh.md
diff --git a/README.md b/README.md
index fce5d824..5db909ce 100644
--- a/README.md
+++ b/README.md
@@ -1,106 +1,38 @@
-
-
-
-
-
-
-
-
-vLLM Ascend Plugin
-
-
-
-
-[](https://deepwiki.com/vllm-project/vllm-ascend)
-
-
-
-
-| About Ascend | Documentation | #SIG-Ascend | Users Forum | Weekly Meeting |
-
-
-
-English | 中文
-
-
----
-*Latest News* 🔥
-
-- [2026/02] We released the new official version [v0.13.0](https://github.com/vllm-project/vllm-ascend/releases/tag/v0.13.0)! Please follow the [official guide](https://docs.vllm.ai/projects/ascend/en/v0.13.0/) to start using vLLM Ascend Plugin on Ascend.
-- [2025/12] We released the new official version [v0.11.0](https://github.com/vllm-project/vllm-ascend/releases/tag/v0.11.0)! Please follow the [official guide](https://docs.vllm.ai/projects/ascend/en/v0.11.0/) to start using vLLM Ascend Plugin on Ascend.
-- [2025/09] We released the new official version [v0.9.1](https://github.com/vllm-project/vllm-ascend/releases/tag/v0.9.1)! Please follow the [official guide](https://docs.vllm.ai/projects/ascend/en/v0.9.1/tutorials/large_scale_ep.html) to start deploying large-scale Expert Parallelism (EP) on Ascend.
-- [2025/08] We hosted the [vLLM Beijing Meetup](https://mp.weixin.qq.com/s/7n8OYNrCC_I9SJaybHA_-Q) with vLLM and Tencent! Please find the meetup slides [here](https://drive.google.com/drive/folders/1Pid6NSFLU43DZRi0EaTcPgXsAzDvbBqF).
-- [2025/06] [User stories](https://docs.vllm.ai/projects/ascend/en/latest/community/user_stories/index.html) page is now live! It kicks off with LLaMA-Factory/verl/TRL/GPUStack to demonstrate how vLLM Ascend assists Ascend users in enhancing their experience across fine-tuning, evaluation, reinforcement learning (RL), and deployment scenarios.
-- [2025/06] [Contributors](https://docs.vllm.ai/projects/ascend/en/latest/community/contributors.html) page is now live! All contributions deserve to be recorded, thanks for all contributors.
-- [2025/05] We've released the first official version [v0.7.3](https://github.com/vllm-project/vllm-ascend/releases/tag/v0.7.3)! We collaborated with the vLLM community to publish a blog post sharing our practice: [Introducing vLLM Hardware Plugin, Best Practice from Ascend NPU](https://blog.vllm.ai/2025/05/12/hardware-plugin.html).
-- [2025/03] We hosted the [vLLM Beijing Meetup](https://mp.weixin.qq.com/s/VtxO9WXa5fC-mKqlxNUJUQ) with vLLM team! Please find the meetup slides [here](https://drive.google.com/drive/folders/1Pid6NSFLU43DZRi0EaTcPgXsAzDvbBqF).
-- [2025/02] vLLM community officially created [vllm-project/vllm-ascend](https://github.com/vllm-project/vllm-ascend) repo for running vLLM seamlessly on the Ascend NPU.
-- [2024/12] We are working with the vLLM community to support [[RFC]: Hardware pluggable](https://github.com/vllm-project/vllm/issues/11162).
-
----
+# XC-LLM: A Specially Optimized LLM Inference Engine for ModelHub XC
## Overview
-vLLM Ascend (`vllm-ascend`) is a community maintained hardware plugin for running vLLM seamlessly on the Ascend NPU.
+The project is optimized based on the popular LLM inference project vLLM. This repo is for Ascend NPU.
-It is the recommended approach for supporting the Ascend backend within the vLLM community. It adheres to the principles outlined in the [[RFC]: Hardware pluggable](https://github.com/vllm-project/vllm/issues/11162), providing a hardware-pluggable interface that decouples the integration of the Ascend NPU with vLLM.
+One of the key features of this project is efficient memory coordination, enabling multiple vLLM instances share and dynamically hold Ascend NPU's physical memory. When an instance is idle, model parameters are offloaded to host memory. Upon a new inference request, the model parameters are quickly restored to the NPU’s memory (if not exist), without the need to initialize the engine and load the model from scratch. As a result, from the application’s perspective, multiple LLM inference engines can run on the NPU even when their total memory requirements exceed the physical memory limit. This technique is referred to as `InfiniVRAM`.
-By using vLLM Ascend plugin, popular open-source models, including Transformer-like, Mixture-of-Experts (MoE), Embedding, Multi-modal LLMs can run seamlessly on the Ascend NPU.
-## Prerequisites
+## Installation
-- Hardware: Atlas 800I A2 Inference series, Atlas A2 Training series, Atlas 800I A3 Inference series, Atlas A3 Training series, Atlas 300I Duo (Experimental)
-- OS: Linux
-- Software:
- - Python >= 3.10, < 3.12
- - CANN == 8.5.0 (Ascend HDK version refers to [here](https://www.hiascend.com/document/detail/zh/canncommercial/83RC2/releasenote/releasenote_0000.html))
- - PyTorch == 2.9.0, torch-npu == 2.9.0
- - vLLM (the same version as vllm-ascend)
+### Build from Dockerfile
-## Getting Started
+Clone this repository:
-Please use the following recommended versions to get started quickly:
+```bash
+docker build -t $build_image -f ./Dockerfile .
+```
-| Version | Release type | Doc |
-|------------|--------------|--------------------------------------|
-| v0.17.0rc1 | Latest release candidate | See [QuickStart](https://docs.vllm.ai/projects/ascend/en/latest/quick_start.html) and [Installation](https://docs.vllm.ai/projects/ascend/en/latest/installation.html) for more details |
-| v0.13.0 | Latest stable version | See [QuickStart](https://docs.vllm.ai/projects/ascend/en/v0.13.0/quick_start.html) and [Installation](https://docs.vllm.ai/projects/ascend/en/v0.13.0/installation.html) for more details |
+## Usage
-## Contributing
+> [!NOTE]
+> Some platforms may not allow multiple containers to share the same Ascend NPU. You may try to use privilegd container to bypass this restriction and mount all NPUs, and set the env ASCEND_RT_VISIBLE_DEVICES to specify the target device to use.
-See [CONTRIBUTING](https://docs.vllm.ai/projects/ascend/en/latest/developer_guide/contribution/index.html) for more details, which is a step-by-step guide to help you set up the development environment, build and test.
+0. To share NPU, processes coordinate via shm, so you need to set all containers with `ipc=host`.
+1. Start a daemon process in a standalone container, by running `vllm_vnpu_daemon` installed inside the image.
+2. Start LLM services with this image, following the official usage instructions.
+3. Due to the limited stream resource of Ascend NPU, you may need to restrict graph capture sizes or disable ACLgraph by setting `--enforce-eager`, especially when launching multiple LLMs. Refer to the [link](https://docs.vllm.ai/projects/ascend/en/latest/faqs.html#how-to-troubleshoot-and-resolve-size-capture-failures-resulting-from-stream-resource-exhaustion-and-what-are-the-underlying-causes).
-We welcome and value any contributions and collaborations:
+### Environment Variables
+- `VNPU_RESERVED_VRAM_SIZE_GB`: The amonut of reserved GPU memory for other miscellaneous memory. Only needs to be set for `vllm_vnpu_daemon`. Try increasing the variable if you launch multiple LLM services and encounter OOM. Default: `8`.
+- `VLLM_VNPU_SHM_NAME`: The name of the shm file. Needs to be set for all containers of the shared vNPU group. Default: `/vllm_acl_vnpu_offload_shm`.
-- Please let us know if you encounter a bug by [filing an issue](https://github.com/vllm-project/vllm-ascend/issues)
-- Please use [User forum](https://discuss.vllm.ai/c/hardware-support/vllm-ascend-support) for usage questions and help.
-## Branch
+## Limitations
-vllm-ascend has a main branch and a dev branch.
-
-- **main**: main branch, corresponds to the vLLM main branch, and is continuously monitored for quality through Ascend CI.
-- **releases/vX.Y.Z**: development branch, created alongside new releases of vLLM. For example, `releases/v0.13.0` is the dev branch for vLLM `v0.13.0` version.
-
-Below are the maintained branches:
-
-| Branch | Status | Note |
-|------------|--------------|--------------------------------------|
-| main | Maintained | CI commitment for vLLM main branch and vLLM v0.17.0 tag |
-| v0.7.1-dev | Unmaintained | Only doc fixes are allowed |
-| v0.7.3-dev | Maintained | CI commitment for vLLM 0.7.3 version, only bug fixes are allowed, and no new release tags anymore. |
-| v0.9.1-dev | Maintained | CI commitment for vLLM 0.9.1 version |
-| v0.11.0-dev | Maintained | CI commitment for vLLM 0.11.0 version |
-| releases/v0.13.0 | Maintained | CI commitment for vLLM 0.13.0 version |
-| rfc/feature-name | Maintained | [Feature branches](https://docs.vllm.ai/projects/ascend/en/latest/community/versioning_policy.html#feature-branches) for collaboration |
-
-Please refer to [Versioning policy](https://docs.vllm.ai/projects/ascend/en/latest/community/versioning_policy.html) for more details.
-
-## Weekly Meeting
-
-- vLLM Ascend Weekly Meeting:
-- Wednesday, 15:00 - 16:00 (UTC+8, [Convert to your timezone](https://dateful.com/convert/gmt8?t=15))
-
-## License
-
-Apache License 2.0, as found in the [LICENSE](./LICENSE) file.
+- Restricted by the fact that HCCL cannot be shared, deploying more than one model with multi-GPU (e.g., TP) is not feasible currently.
+- The prefix cache will be reset when the LLM is restored, since we just simply discard the KV cache when the LLM is offloaded.
diff --git a/csrc/camem_allocator.cpp b/csrc/camem_allocator.cpp
index aaeb3b0f..eb76b15d 100644
--- a/csrc/camem_allocator.cpp
+++ b/csrc/camem_allocator.cpp
@@ -17,6 +17,10 @@
#include
#include
#include
+#include
+
+#include "vnpu_offload/shm_worker.h"
+#include "vnpu_offload/npu_helper.h"
extern "C" {
@@ -26,6 +30,13 @@ extern "C" {
#include
#include "acl/acl.h"
+// idle offload
+static std::atomic g_initialized(false);
+static void *g_d_mem = nullptr;
+static size_t g_size = 0;
+static std::atomic_uint_fast64_t g_allocated_offset(0);
+ShmWorker *shm_worker = nullptr;
+
// Global references to Python callables
// NOTE: this is borrowed reference, so we don't need to DECREF them.
// This brings the limitation that the allocator needs to be singleton.
@@ -248,6 +259,146 @@ __attribute__ ((visibility("default"))) void my_free(void* ptr, ssize_t size, in
free(p_memHandle);
}
+__attribute__((visibility("default"))) void *
+my_malloc_offload(ssize_t size, int device, aclrtStream stream) {
+ ensure_context(device);
+
+ // first allocation, align the size, and reserve an address, and also allocate
+ // a aclrtDrvMemHandle
+
+ // Define memory allocation properties
+ aclrtPhysicalMemProp prop = {};
+ prop.handleType = ACL_MEM_HANDLE_TYPE_NONE ;
+ prop.allocationType = ACL_MEM_ALLOCATION_TYPE_PINNED;
+ prop.memAttr = ACL_HBM_MEM_HUGE;
+ prop.location.id = device;
+ prop.location.type = ACL_MEM_LOCATION_TYPE_DEVICE;
+ prop.reserve = 0;
+
+ // Check if the allocation is supported
+ size_t granularity;
+ aclError error_code = aclrtMemGetAllocationGranularity(&prop,
+ ACL_RT_MEM_ALLOC_GRANULARITY_MINIMUM,
+ &granularity);
+ if (error_code != 0) {
+ throw std::runtime_error("aclrtMemGetAllocationGranularity failed with acl error code: " +
+ std::to_string(error_code) + " " + __FILE__ + ":" + std::to_string(__LINE__));
+ }
+ size_t alignedSize = ((size + granularity - 1) / granularity) * granularity;
+ void *d_mem;
+ // error_code = aclrtReserveMemAddress(&d_mem, alignedSize, 0, nullptr, 0);
+ // if (error_code != 0) {
+ // if (error_code == ACL_ERROR_RT_MEMORY_ALLOCATION) {
+ // throw std::runtime_error("aclrtReserveMemAddress failed with acl error code: " +
+ // std::to_string(error_code) + "(OOM: Out of Memory, allocation failed) " +
+ // __FILE__ + ":" + std::to_string(__LINE__));
+ // } else {
+ // throw std::runtime_error("aclrtReserveMemAddress failed with acl error code: " +
+ // std::to_string(error_code) + " " + __FILE__ + ":" + std::to_string(__LINE__));
+ // }
+ // }
+
+ // allocate from the reserved pool
+ size_t alloc_offset = g_allocated_offset.fetch_add(alignedSize);
+ if (alloc_offset + alignedSize > g_size) {
+ throw std::runtime_error(
+ "my_malloc ERROR: Out of memory in the reserved pool." +
+ std::string(" ") + __FILE__ + ":" + std::to_string(__LINE__));
+ }
+ d_mem = (void *)((char *)g_d_mem + alloc_offset);
+
+ // allocate the aclrtDrvMemHandle
+ aclrtDrvMemHandle* p_memHandle =
+ (aclrtDrvMemHandle*)malloc(sizeof(aclrtDrvMemHandle));
+
+ if (!g_python_malloc_callback) {
+ throw std::runtime_error(
+ "my_malloc ERROR: g_python_malloc_callback not set." +
+ std::string(" ") + __FILE__ + ":" + std::to_string(__LINE__));
+ }
+
+ // Acquire GIL (not in stable ABI officially, but often works)
+ PyGILState_STATE gstate = PyGILState_Ensure();
+
+ PyObject* arg_tuple = create_tuple_from_c_integers(
+ (unsigned long long)device, (unsigned long long)alignedSize,
+ (unsigned long long)d_mem, (unsigned long long)p_memHandle);
+
+ // Call g_python_malloc_callback
+ PyObject* py_result =
+ PyObject_CallFunctionObjArgs(g_python_malloc_callback, arg_tuple, NULL);
+ Py_DECREF(arg_tuple);
+
+ if (!py_result) {
+ PyErr_Print();
+ PyGILState_Release(gstate);
+ return nullptr;
+ }
+
+ PyGILState_Release(gstate);
+
+ // // do the final mapping
+ // create_and_map(device, alignedSize, d_mem, p_memHandle);
+
+ return (void*)d_mem;
+}
+
+__attribute__((visibility("default"))) void
+my_free_offload(void *ptr, ssize_t size, int device, aclrtStream stream) {
+ // get memory handle from the pointer
+ if (!g_python_free_callback) {
+ throw std::runtime_error(
+ "my_free ERROR: g_python_malloc_callback not set." + std::string(" ") +
+ __FILE__ + ":" + std::to_string(__LINE__));
+ }
+
+ // Acquire GIL (not in stable ABI officially, but often works)
+ PyGILState_STATE gstate = PyGILState_Ensure();
+
+ PyObject* py_ptr =
+ PyLong_FromUnsignedLongLong(reinterpret_cast(ptr));
+
+ PyObject* py_result =
+ PyObject_CallFunctionObjArgs(g_python_free_callback, py_ptr, NULL);
+
+ if (!py_result || !PyTuple_Check(py_result) || PyTuple_Size(py_result) != 4) {
+ PyErr_SetString(PyExc_TypeError, "Expected a tuple of size 4");
+ return;
+ }
+
+ unsigned long long recv_device, recv_size;
+ unsigned long long recv_d_mem, recv_p_memHandle;
+ // Unpack the tuple into four C integers
+ if (!PyArg_ParseTuple(py_result, "KKKK", &recv_device, &recv_size,
+ &recv_d_mem, &recv_p_memHandle)) {
+ // PyArg_ParseTuple sets an error if it fails
+ return;
+ }
+
+ PyGILState_Release(gstate);
+
+ // recv_size == size
+ // recv_device == device
+
+ // Free memory
+
+ // nothing to do
+
+ // void *d_mem = (void*)recv_d_mem;
+ // // allocate the aclrtDrvMemHandle
+ // aclrtDrvMemHandle* p_memHandle =
+ // (aclrtDrvMemHandle*)recv_p_memHandle;
+ // unmap_and_release(device, size, d_mem, p_memHandle);
+
+ // // free address and the handle
+ // aclError error_code = aclrtReleaseMemAddress(d_mem);
+ // if (error_code != 0) {
+ // throw std::runtime_error("aclrtReleaseMemAddress failed with acl error code: " +
+ // std::to_string(error_code) + " " + __FILE__ + ":" + std::to_string(__LINE__));
+ // }
+ // free(p_memHandle);
+}
+
// ---------------------------------------------------------------------------
// Python extension boilerplate:
@@ -322,6 +473,132 @@ static PyObject* python_create_and_map(PyObject* self, PyObject* args) {
Py_RETURN_NONE;
}
+
+static PyObject* py_init_module_offload(PyObject* self, PyObject* args) {
+ PyObject* malloc_callback = nullptr;
+ PyObject* free_callback = nullptr;
+ unsigned long long device = 0;
+
+ if (!PyArg_ParseTuple(args, "OOK", &malloc_callback, &free_callback,
+ &device)) {
+ return nullptr;
+ }
+
+ if (!PyCallable_Check(malloc_callback) || !PyCallable_Check(free_callback)) {
+ PyErr_SetString(PyExc_TypeError, "Both arguments must be callables");
+ return nullptr;
+ }
+
+ // Save the Python callables
+ // This module does not handle GC of these objects, so they must be kept alive
+ // outside of this module.
+ g_python_malloc_callback = malloc_callback;
+ g_python_free_callback = free_callback;
+
+ // init idle
+ if (g_initialized.load()) {
+ printf("Module already initialized.\n");
+ Py_RETURN_NONE;
+ }
+ g_initialized.store(true);
+
+ std::vector gpu_ids = get_npu_ids();
+ if (device >= gpu_ids.size()) {
+ throw std::runtime_error("Invalid device id: " + std::to_string(device) +
+ " " + __FILE__ + ":" + std::to_string(__LINE__));
+ }
+ int gpu_id = gpu_ids[device];
+
+ // get pid
+ aclError error_code;
+ int32_t pid;
+ error_code = aclrtDeviceGetBareTgid(&pid);
+ if (error_code != 0) {
+ throw std::runtime_error(
+ "aclrtDeviceGetBareTgid failed with acl error code: " +
+ std::to_string(error_code) + " " + __FILE__ + ":" +
+ std::to_string(__LINE__));
+ }
+
+ shm_worker = new ShmWorker();
+ uint64_t shareable_handle;
+ shm_worker->register_worker(pid, gpu_id, &shareable_handle, &g_size);
+
+ // import shareable handle
+ aclrtDrvMemHandle memHandle;
+ error_code =
+ aclrtMemImportFromShareableHandle(shareable_handle, device, &memHandle);
+ if (error_code != 0) {
+ throw std::runtime_error(
+ "aclrtMemImportFromShareableHandle failed with acl error code: " +
+ std::to_string(error_code) + " " + __FILE__ + ":" +
+ std::to_string(__LINE__));
+ }
+
+ // reserve virtual address
+ error_code = aclrtReserveMemAddress(&g_d_mem, g_size, 0, nullptr, 0);
+ if (error_code != 0) {
+ throw std::runtime_error(
+ "aclrtReserveMemAddress failed with acl error code: " +
+ std::to_string(error_code) + " " + __FILE__ + ":" +
+ std::to_string(__LINE__));
+ }
+ // map
+ error_code = aclrtMapMem(g_d_mem, g_size, 0, memHandle, 0);
+ if (error_code != 0) {
+ throw std::runtime_error("aclrtMapMem failed with acl error code: " +
+ std::to_string(error_code) + " " + __FILE__ + ":" +
+ std::to_string(__LINE__));
+ }
+
+ Py_RETURN_NONE;
+}
+
+static PyObject *python_unmap_and_release_offload(PyObject *self,
+ PyObject *args) {
+ // nothing to do
+ Py_RETURN_NONE;
+}
+
+static PyObject *python_create_and_map_offload(PyObject *self, PyObject *args) {
+ // nothing to do
+ Py_RETURN_NONE;
+}
+
+static PyObject* python_get_mem_info_offload(PyObject* self, PyObject* args) {
+ size_t allocated_bytes = g_allocated_offset.load();
+ size_t free_mem = 0;
+ if (allocated_bytes >= g_size) {
+ free_mem = 0;
+ } else {
+ free_mem = g_size - allocated_bytes;
+ }
+ PyObject* tuple = PyTuple_New(2);
+ if (!tuple) {
+ return nullptr;
+ }
+ PyTuple_SetItem(tuple, 0, PyLong_FromSize_t(free_mem));
+ PyTuple_SetItem(tuple, 1, PyLong_FromSize_t(g_size));
+ return tuple;
+}
+
+static PyObject* python_try_lock_gpu_offload(PyObject* self, PyObject* args) {
+ bool prev_is_self = false;
+ bool success = shm_worker->try_lock_gpu(prev_is_self);
+ PyObject* tuple = PyTuple_New(2);
+ if (!tuple) {
+ return nullptr;
+ }
+ PyTuple_SetItem(tuple, 0, PyBool_FromLong(success));
+ PyTuple_SetItem(tuple, 1, PyBool_FromLong(prev_is_self));
+ return tuple;
+}
+
+static PyObject* python_unlock_gpu_offload(PyObject* self, PyObject* args) {
+ shm_worker->unlock_gpu();
+ Py_RETURN_NONE;
+}
+
static PyMethodDef module_methods[] = {
{"init_module", (PyCFunction)py_init_module, METH_VARARGS,
"Initialize module with python_malloc and python_free callables."},
@@ -329,7 +606,21 @@ static PyMethodDef module_methods[] = {
"Create and map memory on the device."},
{"python_unmap_and_release", (PyCFunction)python_unmap_and_release,
METH_VARARGS, "Unmap and release memory on the device."},
- {NULL, NULL, 0, NULL} // sentinel
+ {"init_module_offload", (PyCFunction)py_init_module_offload, METH_VARARGS,
+ "Initialize module with python_malloc and python_free callables."},
+ {"python_create_and_map_offload",
+ (PyCFunction)python_create_and_map_offload, METH_VARARGS,
+ "Create and map memory on the device."},
+ {"python_unmap_and_release_offload",
+ (PyCFunction)python_unmap_and_release_offload, METH_VARARGS,
+ "Unmap and release memory on the device."},
+ {"python_get_mem_info_offload", (PyCFunction)python_get_mem_info_offload,
+ METH_NOARGS, "Get mem info in the reserved pool."},
+ {"python_try_lock_gpu_offload", (PyCFunction)python_try_lock_gpu_offload,
+ METH_NOARGS, "Lock GPU."},
+ {"python_unlock_gpu_offload", (PyCFunction)python_unlock_gpu_offload,
+ METH_NOARGS, "Unlock GPU."},
+ {NULL, NULL, 0, NULL} // sentinel
};
static struct PyModuleDef camem_allocator_module = {
diff --git a/csrc/vnpu_offload/.gitignore b/csrc/vnpu_offload/.gitignore
new file mode 100644
index 00000000..24f458a3
--- /dev/null
+++ b/csrc/vnpu_offload/.gitignore
@@ -0,0 +1 @@
+vllm_vnpu_daemon
\ No newline at end of file
diff --git a/csrc/vnpu_offload/Makefile b/csrc/vnpu_offload/Makefile
new file mode 100644
index 00000000..efcadee9
--- /dev/null
+++ b/csrc/vnpu_offload/Makefile
@@ -0,0 +1,30 @@
+CXX := g++
+TARGET := vllm_vnpu_daemon
+SRCS := vnpu_daemon.cpp shm_manager.cpp
+
+ASCEND_HOME := /usr/local/Ascend/ascend-toolkit/latest
+INCLUDES := -I$(ASCEND_HOME)/include -Iinclude
+LIBS := -L$(ASCEND_HOME)/lib64 -lascendcl
+
+CXXFLAGS := $(INCLUDES) -O2
+LDFLAGS := $(LIBS)
+
+PREFIX ?= /usr/local
+BINDIR ?= $(PREFIX)/bin
+
+.PHONY: all clean install uninstall
+
+all: $(TARGET)
+
+$(TARGET): $(SRCS)
+ $(CXX) -o $@ $^ $(CXXFLAGS) $(LDFLAGS)
+
+install: $(TARGET)
+ install -d $(DESTDIR)$(BINDIR)
+ install -m 0755 $(TARGET) $(DESTDIR)$(BINDIR)/$(TARGET)
+
+uninstall:
+ rm -f $(DESTDIR)$(BINDIR)/$(TARGET)
+
+clean:
+ rm -f $(TARGET)
\ No newline at end of file
diff --git a/csrc/vnpu_offload/include/spdlog/async.h b/csrc/vnpu_offload/include/spdlog/async.h
new file mode 100644
index 00000000..92fcd9a7
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/async.h
@@ -0,0 +1,99 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+//
+// Async logging using global thread pool
+// All loggers created here share same global thread pool.
+// Each log message is pushed to a queue along with a shared pointer to the
+// logger.
+// If a logger deleted while having pending messages in the queue, it's actual
+// destruction will defer
+// until all its messages are processed by the thread pool.
+// This is because each message in the queue holds a shared_ptr to the
+// originating logger.
+
+#include
+#include
+#include
+
+#include
+#include
+#include
+
+namespace spdlog {
+
+namespace details {
+static const size_t default_async_q_size = 8192;
+}
+
+// async logger factory - creates async loggers backed with thread pool.
+// if a global thread pool doesn't already exist, create it with default queue
+// size of 8192 items and single thread.
+template
+struct async_factory_impl {
+ template
+ static std::shared_ptr create(std::string logger_name, SinkArgs &&...args) {
+ auto ®istry_inst = details::registry::instance();
+
+ // create global thread pool if not already exists..
+
+ auto &mutex = registry_inst.tp_mutex();
+ std::lock_guard tp_lock(mutex);
+ auto tp = registry_inst.get_tp();
+ if (tp == nullptr) {
+ tp = std::make_shared(details::default_async_q_size, 1U);
+ registry_inst.set_tp(tp);
+ }
+
+ auto sink = std::make_shared(std::forward(args)...);
+ auto new_logger = std::make_shared(std::move(logger_name), std::move(sink),
+ std::move(tp), OverflowPolicy);
+ registry_inst.initialize_logger(new_logger);
+ return new_logger;
+ }
+};
+
+using async_factory = async_factory_impl;
+using async_factory_nonblock = async_factory_impl;
+
+template
+inline std::shared_ptr create_async(std::string logger_name,
+ SinkArgs &&...sink_args) {
+ return async_factory::create(std::move(logger_name),
+ std::forward(sink_args)...);
+}
+
+template
+inline std::shared_ptr create_async_nb(std::string logger_name,
+ SinkArgs &&...sink_args) {
+ return async_factory_nonblock::create(std::move(logger_name),
+ std::forward(sink_args)...);
+}
+
+// set global thread pool.
+inline void init_thread_pool(size_t q_size,
+ size_t thread_count,
+ std::function on_thread_start,
+ std::function on_thread_stop) {
+ auto tp = std::make_shared(q_size, thread_count, on_thread_start,
+ on_thread_stop);
+ details::registry::instance().set_tp(std::move(tp));
+}
+
+inline void init_thread_pool(size_t q_size,
+ size_t thread_count,
+ std::function on_thread_start) {
+ init_thread_pool(q_size, thread_count, on_thread_start, [] {});
+}
+
+inline void init_thread_pool(size_t q_size, size_t thread_count) {
+ init_thread_pool(q_size, thread_count, [] {}, [] {});
+}
+
+// get the global thread pool.
+inline std::shared_ptr thread_pool() {
+ return details::registry::instance().get_tp();
+}
+} // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/async_logger-inl.h b/csrc/vnpu_offload/include/spdlog/async_logger-inl.h
new file mode 100644
index 00000000..cde73f90
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/async_logger-inl.h
@@ -0,0 +1,84 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifndef SPDLOG_HEADER_ONLY
+#include
+#endif
+
+#include
+#include
+
+#include
+#include
+
+SPDLOG_INLINE spdlog::async_logger::async_logger(std::string logger_name,
+ sinks_init_list sinks_list,
+ std::weak_ptr tp,
+ async_overflow_policy overflow_policy)
+ : async_logger(std::move(logger_name),
+ sinks_list.begin(),
+ sinks_list.end(),
+ std::move(tp),
+ overflow_policy) {}
+
+SPDLOG_INLINE spdlog::async_logger::async_logger(std::string logger_name,
+ sink_ptr single_sink,
+ std::weak_ptr tp,
+ async_overflow_policy overflow_policy)
+ : async_logger(
+ std::move(logger_name), {std::move(single_sink)}, std::move(tp), overflow_policy) {}
+
+// send the log message to the thread pool
+SPDLOG_INLINE void spdlog::async_logger::sink_it_(const details::log_msg &msg){
+ SPDLOG_TRY{if (auto pool_ptr = thread_pool_.lock()){
+ pool_ptr -> post_log(shared_from_this(), msg, overflow_policy_);
+}
+else {
+ throw_spdlog_ex("async log: thread pool doesn't exist anymore");
+}
+}
+SPDLOG_LOGGER_CATCH(msg.source)
+}
+
+// send flush request to the thread pool
+SPDLOG_INLINE void spdlog::async_logger::flush_(){
+ SPDLOG_TRY{if (auto pool_ptr = thread_pool_.lock()){
+ pool_ptr -> post_flush(shared_from_this(), overflow_policy_);
+}
+else {
+ throw_spdlog_ex("async flush: thread pool doesn't exist anymore");
+}
+}
+SPDLOG_LOGGER_CATCH(source_loc())
+}
+
+//
+// backend functions - called from the thread pool to do the actual job
+//
+SPDLOG_INLINE void spdlog::async_logger::backend_sink_it_(const details::log_msg &msg) {
+ for (auto &sink : sinks_) {
+ if (sink->should_log(msg.level)) {
+ SPDLOG_TRY { sink->log(msg); }
+ SPDLOG_LOGGER_CATCH(msg.source)
+ }
+ }
+
+ if (should_flush_(msg)) {
+ backend_flush_();
+ }
+}
+
+SPDLOG_INLINE void spdlog::async_logger::backend_flush_() {
+ for (auto &sink : sinks_) {
+ SPDLOG_TRY { sink->flush(); }
+ SPDLOG_LOGGER_CATCH(source_loc())
+ }
+}
+
+SPDLOG_INLINE std::shared_ptr spdlog::async_logger::clone(std::string new_name) {
+ auto cloned = std::make_shared(*this);
+ cloned->name_ = std::move(new_name);
+ return cloned;
+}
diff --git a/csrc/vnpu_offload/include/spdlog/async_logger.h b/csrc/vnpu_offload/include/spdlog/async_logger.h
new file mode 100644
index 00000000..be361538
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/async_logger.h
@@ -0,0 +1,74 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+// Fast asynchronous logger.
+// Uses pre allocated queue.
+// Creates a single back thread to pop messages from the queue and log them.
+//
+// Upon each log write the logger:
+// 1. Checks if its log level is enough to log the message
+// 2. Push a new copy of the message to a queue (or block the caller until
+// space is available in the queue)
+// Upon destruction, logs all remaining messages in the queue before
+// destructing..
+
+#include
+
+namespace spdlog {
+
+// Async overflow policy - block by default.
+enum class async_overflow_policy {
+ block, // Block until message can be enqueued
+ overrun_oldest, // Discard oldest message in the queue if full when trying to
+ // add new item.
+ discard_new // Discard new message if the queue is full when trying to add new item.
+};
+
+namespace details {
+class thread_pool;
+}
+
+class SPDLOG_API async_logger final : public std::enable_shared_from_this,
+ public logger {
+ friend class details::thread_pool;
+
+public:
+ template
+ async_logger(std::string logger_name,
+ It begin,
+ It end,
+ std::weak_ptr tp,
+ async_overflow_policy overflow_policy = async_overflow_policy::block)
+ : logger(std::move(logger_name), begin, end),
+ thread_pool_(std::move(tp)),
+ overflow_policy_(overflow_policy) {}
+
+ async_logger(std::string logger_name,
+ sinks_init_list sinks_list,
+ std::weak_ptr tp,
+ async_overflow_policy overflow_policy = async_overflow_policy::block);
+
+ async_logger(std::string logger_name,
+ sink_ptr single_sink,
+ std::weak_ptr tp,
+ async_overflow_policy overflow_policy = async_overflow_policy::block);
+
+ std::shared_ptr clone(std::string new_name) override;
+
+protected:
+ void sink_it_(const details::log_msg &msg) override;
+ void flush_() override;
+ void backend_sink_it_(const details::log_msg &incoming_log_msg);
+ void backend_flush_();
+
+private:
+ std::weak_ptr thread_pool_;
+ async_overflow_policy overflow_policy_;
+};
+} // namespace spdlog
+
+#ifdef SPDLOG_HEADER_ONLY
+#include "async_logger-inl.h"
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/cfg/argv.h b/csrc/vnpu_offload/include/spdlog/cfg/argv.h
new file mode 100644
index 00000000..7de2f83e
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/cfg/argv.h
@@ -0,0 +1,40 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+#include
+#include
+
+//
+// Init log levels using each argv entry that starts with "SPDLOG_LEVEL="
+//
+// set all loggers to debug level:
+// example.exe "SPDLOG_LEVEL=debug"
+
+// set logger1 to trace level
+// example.exe "SPDLOG_LEVEL=logger1=trace"
+
+// turn off all logging except for logger1 and logger2:
+// example.exe "SPDLOG_LEVEL=off,logger1=debug,logger2=info"
+
+namespace spdlog {
+namespace cfg {
+
+// search for SPDLOG_LEVEL= in the args and use it to init the levels
+inline void load_argv_levels(int argc, const char **argv) {
+ const std::string spdlog_level_prefix = "SPDLOG_LEVEL=";
+ for (int i = 1; i < argc; i++) {
+ std::string arg = argv[i];
+ if (arg.find(spdlog_level_prefix) == 0) {
+ auto levels_string = arg.substr(spdlog_level_prefix.size());
+ helpers::load_levels(levels_string);
+ }
+ }
+}
+
+inline void load_argv_levels(int argc, char **argv) {
+ load_argv_levels(argc, const_cast(argv));
+}
+
+} // namespace cfg
+} // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/cfg/env.h b/csrc/vnpu_offload/include/spdlog/cfg/env.h
new file mode 100644
index 00000000..47bf61c7
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/cfg/env.h
@@ -0,0 +1,36 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+#include
+#include
+#include
+
+//
+// Init levels and patterns from env variables SPDLOG_LEVEL
+// Inspired from Rust's "env_logger" crate (https://crates.io/crates/env_logger).
+// Note - fallback to "info" level on unrecognized levels
+//
+// Examples:
+//
+// set global level to debug:
+// export SPDLOG_LEVEL=debug
+//
+// turn off all logging except for logger1:
+// export SPDLOG_LEVEL="*=off,logger1=debug"
+//
+
+// turn off all logging except for logger1 and logger2:
+// export SPDLOG_LEVEL="off,logger1=debug,logger2=info"
+
+namespace spdlog {
+namespace cfg {
+inline void load_env_levels(const char* var = "SPDLOG_LEVEL") {
+ auto env_val = details::os::getenv(var);
+ if (!env_val.empty()) {
+ helpers::load_levels(env_val);
+ }
+}
+
+} // namespace cfg
+} // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/cfg/helpers-inl.h b/csrc/vnpu_offload/include/spdlog/cfg/helpers-inl.h
new file mode 100644
index 00000000..6ed86955
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/cfg/helpers-inl.h
@@ -0,0 +1,106 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifndef SPDLOG_HEADER_ONLY
+#include
+#endif
+
+#include
+#include
+
+#include
+#include
+#include
+#include
+
+namespace spdlog {
+namespace cfg {
+namespace helpers {
+
+// inplace convert to lowercase
+inline std::string &to_lower_(std::string &str) {
+ std::transform(str.begin(), str.end(), str.begin(), [](char ch) {
+ return static_cast((ch >= 'A' && ch <= 'Z') ? ch + ('a' - 'A') : ch);
+ });
+ return str;
+}
+
+// inplace trim spaces
+inline std::string &trim_(std::string &str) {
+ const char *spaces = " \n\r\t";
+ str.erase(str.find_last_not_of(spaces) + 1);
+ str.erase(0, str.find_first_not_of(spaces));
+ return str;
+}
+
+// return (name,value) trimmed pair from the given "name = value" string.
+// return empty string on missing parts
+// "key=val" => ("key", "val")
+// " key = val " => ("key", "val")
+// "key=" => ("key", "")
+// "val" => ("", "val")
+
+inline std::pair extract_kv_(char sep, const std::string &str) {
+ auto n = str.find(sep);
+ std::string k, v;
+ if (n == std::string::npos) {
+ v = str;
+ } else {
+ k = str.substr(0, n);
+ v = str.substr(n + 1);
+ }
+ return std::make_pair(trim_(k), trim_(v));
+}
+
+// return vector of key/value pairs from a sequence of "K1=V1,K2=V2,.."
+// "a=AAA,b=BBB,c=CCC,.." => {("a","AAA"),("b","BBB"),("c", "CCC"),...}
+inline std::unordered_map extract_key_vals_(const std::string &str) {
+ std::string token;
+ std::istringstream token_stream(str);
+ std::unordered_map rv{};
+ while (std::getline(token_stream, token, ',')) {
+ if (token.empty()) {
+ continue;
+ }
+ auto kv = extract_kv_('=', token);
+ rv[kv.first] = kv.second;
+ }
+ return rv;
+}
+
+SPDLOG_INLINE void load_levels(const std::string &input) {
+ if (input.empty() || input.size() >= 32768) {
+ return;
+ }
+
+ auto key_vals = extract_key_vals_(input);
+ std::unordered_map levels;
+ level::level_enum global_level = level::info;
+ bool global_level_found = false;
+
+ for (auto &name_level : key_vals) {
+ const auto &logger_name = name_level.first;
+ const auto &level_name = to_lower_(name_level.second);
+ auto level = level::from_str(level_name);
+ // ignore unrecognized level names
+ if (level == level::off && level_name != "off") {
+ continue;
+ }
+ if (logger_name.empty()) // no logger name indicates global level
+ {
+ global_level_found = true;
+ global_level = level;
+ } else {
+ levels[logger_name] = level;
+ }
+ }
+
+ details::registry::instance().set_levels(std::move(levels),
+ global_level_found ? &global_level : nullptr);
+}
+
+} // namespace helpers
+} // namespace cfg
+} // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/cfg/helpers.h b/csrc/vnpu_offload/include/spdlog/cfg/helpers.h
new file mode 100644
index 00000000..d09a1e97
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/cfg/helpers.h
@@ -0,0 +1,29 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include
+#include
+
+namespace spdlog {
+namespace cfg {
+namespace helpers {
+//
+// Init levels from given string
+//
+// Examples:
+//
+// set global level to debug: "debug"
+// turn off all logging except for logger1: "off,logger1=debug"
+// turn off all logging except for logger1 and logger2: "off,logger1=debug,logger2=info"
+//
+SPDLOG_API void load_levels(const std::string &txt);
+} // namespace helpers
+
+} // namespace cfg
+} // namespace spdlog
+
+#ifdef SPDLOG_HEADER_ONLY
+#include "helpers-inl.h"
+#endif // SPDLOG_HEADER_ONLY
diff --git a/csrc/vnpu_offload/include/spdlog/common-inl.h b/csrc/vnpu_offload/include/spdlog/common-inl.h
new file mode 100644
index 00000000..f35901c5
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/common-inl.h
@@ -0,0 +1,68 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifndef SPDLOG_HEADER_ONLY
+#include
+#endif
+
+#include
+#include
+
+namespace spdlog {
+namespace level {
+
+#if __cplusplus >= 201703L
+constexpr
+#endif
+ static string_view_t level_string_views[] SPDLOG_LEVEL_NAMES;
+
+static const char *short_level_names[] SPDLOG_SHORT_LEVEL_NAMES;
+
+SPDLOG_INLINE const string_view_t &to_string_view(spdlog::level::level_enum l) SPDLOG_NOEXCEPT {
+ return level_string_views[l];
+}
+
+SPDLOG_INLINE const char *to_short_c_str(spdlog::level::level_enum l) SPDLOG_NOEXCEPT {
+ return short_level_names[l];
+}
+
+SPDLOG_INLINE spdlog::level::level_enum from_str(const std::string &name) SPDLOG_NOEXCEPT {
+ auto it = std::find(std::begin(level_string_views), std::end(level_string_views), name);
+ if (it != std::end(level_string_views))
+ return static_cast(std::distance(std::begin(level_string_views), it));
+
+ // check also for "warn" and "err" before giving up..
+ if (name == "warn") {
+ return level::warn;
+ }
+ if (name == "err") {
+ return level::err;
+ }
+ return level::off;
+}
+} // namespace level
+
+SPDLOG_INLINE spdlog_ex::spdlog_ex(std::string msg)
+ : msg_(std::move(msg)) {}
+
+SPDLOG_INLINE spdlog_ex::spdlog_ex(const std::string &msg, int last_errno) {
+#ifdef SPDLOG_USE_STD_FORMAT
+ msg_ = std::system_error(std::error_code(last_errno, std::generic_category()), msg).what();
+#else
+ memory_buf_t outbuf;
+ fmt::format_system_error(outbuf, last_errno, msg.c_str());
+ msg_ = fmt::to_string(outbuf);
+#endif
+}
+
+SPDLOG_INLINE const char *spdlog_ex::what() const SPDLOG_NOEXCEPT { return msg_.c_str(); }
+
+SPDLOG_INLINE void throw_spdlog_ex(const std::string &msg, int last_errno) {
+ SPDLOG_THROW(spdlog_ex(msg, last_errno));
+}
+
+SPDLOG_INLINE void throw_spdlog_ex(std::string msg) { SPDLOG_THROW(spdlog_ex(std::move(msg))); }
+
+} // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/common.h b/csrc/vnpu_offload/include/spdlog/common.h
new file mode 100644
index 00000000..20dbca45
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/common.h
@@ -0,0 +1,406 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#ifdef SPDLOG_USE_STD_FORMAT
+#include
+#if __cpp_lib_format >= 202207L
+#include
+#else
+#include
+#endif
+#endif
+
+#ifdef SPDLOG_COMPILED_LIB
+#undef SPDLOG_HEADER_ONLY
+#if defined(SPDLOG_SHARED_LIB)
+#if defined(_WIN32)
+#ifdef spdlog_EXPORTS
+#define SPDLOG_API __declspec(dllexport)
+#else // !spdlog_EXPORTS
+#define SPDLOG_API __declspec(dllimport)
+#endif
+#else // !defined(_WIN32)
+#define SPDLOG_API __attribute__((visibility("default")))
+#endif
+#else // !defined(SPDLOG_SHARED_LIB)
+#define SPDLOG_API
+#endif
+#define SPDLOG_INLINE
+#else // !defined(SPDLOG_COMPILED_LIB)
+#define SPDLOG_API
+#define SPDLOG_HEADER_ONLY
+#define SPDLOG_INLINE inline
+#endif // #ifdef SPDLOG_COMPILED_LIB
+
+#include
+
+#if !defined(SPDLOG_USE_STD_FORMAT) && \
+ FMT_VERSION >= 80000 // backward compatibility with fmt versions older than 8
+#define SPDLOG_FMT_RUNTIME(format_string) fmt::runtime(format_string)
+#define SPDLOG_FMT_STRING(format_string) FMT_STRING(format_string)
+#if defined(SPDLOG_WCHAR_FILENAMES) || defined(SPDLOG_WCHAR_TO_UTF8_SUPPORT)
+#include
+#endif
+#else
+#define SPDLOG_FMT_RUNTIME(format_string) format_string
+#define SPDLOG_FMT_STRING(format_string) format_string
+#endif
+
+// visual studio up to 2013 does not support noexcept nor constexpr
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+#define SPDLOG_NOEXCEPT _NOEXCEPT
+#define SPDLOG_CONSTEXPR
+#else
+#define SPDLOG_NOEXCEPT noexcept
+#define SPDLOG_CONSTEXPR constexpr
+#endif
+
+// If building with std::format, can just use constexpr, otherwise if building with fmt
+// SPDLOG_CONSTEXPR_FUNC needs to be set the same as FMT_CONSTEXPR to avoid situations where
+// a constexpr function in spdlog could end up calling a non-constexpr function in fmt
+// depending on the compiler
+// If fmt determines it can't use constexpr, we should inline the function instead
+#ifdef SPDLOG_USE_STD_FORMAT
+#define SPDLOG_CONSTEXPR_FUNC constexpr
+#else // Being built with fmt
+#if FMT_USE_CONSTEXPR
+#define SPDLOG_CONSTEXPR_FUNC FMT_CONSTEXPR
+#else
+#define SPDLOG_CONSTEXPR_FUNC inline
+#endif
+#endif
+
+#if defined(__GNUC__) || defined(__clang__)
+#define SPDLOG_DEPRECATED __attribute__((deprecated))
+#elif defined(_MSC_VER)
+#define SPDLOG_DEPRECATED __declspec(deprecated)
+#else
+#define SPDLOG_DEPRECATED
+#endif
+
+// disable thread local on msvc 2013
+#ifndef SPDLOG_NO_TLS
+#if (defined(_MSC_VER) && (_MSC_VER < 1900)) || defined(__cplusplus_winrt)
+#define SPDLOG_NO_TLS 1
+#endif
+#endif
+
+#ifndef SPDLOG_FUNCTION
+#define SPDLOG_FUNCTION static_cast(__FUNCTION__)
+#endif
+
+#ifdef SPDLOG_NO_EXCEPTIONS
+#define SPDLOG_TRY
+#define SPDLOG_THROW(ex) \
+ do { \
+ printf("spdlog fatal error: %s\n", ex.what()); \
+ std::abort(); \
+ } while (0)
+#define SPDLOG_CATCH_STD
+#else
+#define SPDLOG_TRY try
+#define SPDLOG_THROW(ex) throw(ex)
+#define SPDLOG_CATCH_STD \
+ catch (const std::exception &) { \
+ }
+#endif
+
+namespace spdlog {
+
+class formatter;
+
+namespace sinks {
+class sink;
+}
+
+#if defined(_WIN32) && defined(SPDLOG_WCHAR_FILENAMES)
+using filename_t = std::wstring;
+// allow macro expansion to occur in SPDLOG_FILENAME_T
+#define SPDLOG_FILENAME_T_INNER(s) L##s
+#define SPDLOG_FILENAME_T(s) SPDLOG_FILENAME_T_INNER(s)
+#else
+using filename_t = std::string;
+#define SPDLOG_FILENAME_T(s) s
+#endif
+
+using log_clock = std::chrono::system_clock;
+using sink_ptr = std::shared_ptr;
+using sinks_init_list = std::initializer_list;
+using err_handler = std::function;
+#ifdef SPDLOG_USE_STD_FORMAT
+namespace fmt_lib = std;
+
+using string_view_t = std::string_view;
+using memory_buf_t = std::string;
+
+template
+#if __cpp_lib_format >= 202207L
+using format_string_t = std::format_string;
+#else
+using format_string_t = std::string_view;
+#endif
+
+template
+struct is_convertible_to_basic_format_string
+ : std::integral_constant>::value> {};
+
+#if defined(SPDLOG_WCHAR_FILENAMES) || defined(SPDLOG_WCHAR_TO_UTF8_SUPPORT)
+using wstring_view_t = std::wstring_view;
+using wmemory_buf_t = std::wstring;
+
+template
+#if __cpp_lib_format >= 202207L
+using wformat_string_t = std::wformat_string;
+#else
+using wformat_string_t = std::wstring_view;
+#endif
+#endif
+#define SPDLOG_BUF_TO_STRING(x) x
+#else // use fmt lib instead of std::format
+namespace fmt_lib = fmt;
+
+using string_view_t = fmt::basic_string_view;
+using memory_buf_t = fmt::basic_memory_buffer;
+
+template
+using format_string_t = fmt::format_string;
+
+template
+using remove_cvref_t = typename std::remove_cv::type>::type;
+
+template
+#if FMT_VERSION >= 90101
+using fmt_runtime_string = fmt::runtime_format_string;
+#else
+using fmt_runtime_string = fmt::basic_runtime;
+#endif
+
+// clang doesn't like SFINAE disabled constructor in std::is_convertible<> so have to repeat the
+// condition from basic_format_string here, in addition, fmt::basic_runtime is only
+// convertible to basic_format_string but not basic_string_view
+template
+struct is_convertible_to_basic_format_string
+ : std::integral_constant>::value ||
+ std::is_same, fmt_runtime_string>::value> {
+};
+
+#if defined(SPDLOG_WCHAR_FILENAMES) || defined(SPDLOG_WCHAR_TO_UTF8_SUPPORT)
+using wstring_view_t = fmt::basic_string_view;
+using wmemory_buf_t = fmt::basic_memory_buffer;
+
+template
+using wformat_string_t = fmt::wformat_string;
+#endif
+#define SPDLOG_BUF_TO_STRING(x) fmt::to_string(x)
+#endif
+
+#ifdef SPDLOG_WCHAR_TO_UTF8_SUPPORT
+#ifndef _WIN32
+#error SPDLOG_WCHAR_TO_UTF8_SUPPORT only supported on windows
+#endif // _WIN32
+#endif // SPDLOG_WCHAR_TO_UTF8_SUPPORT
+
+template
+struct is_convertible_to_any_format_string
+ : std::integral_constant::value ||
+ is_convertible_to_basic_format_string::value> {};
+
+#if defined(SPDLOG_NO_ATOMIC_LEVELS)
+using level_t = details::null_atomic_int;
+#else
+using level_t = std::atomic;
+#endif
+
+#define SPDLOG_LEVEL_TRACE 0
+#define SPDLOG_LEVEL_DEBUG 1
+#define SPDLOG_LEVEL_INFO 2
+#define SPDLOG_LEVEL_WARN 3
+#define SPDLOG_LEVEL_ERROR 4
+#define SPDLOG_LEVEL_CRITICAL 5
+#define SPDLOG_LEVEL_OFF 6
+
+#if !defined(SPDLOG_ACTIVE_LEVEL)
+#define SPDLOG_ACTIVE_LEVEL SPDLOG_LEVEL_INFO
+#endif
+
+// Log level enum
+namespace level {
+enum level_enum : int {
+ trace = SPDLOG_LEVEL_TRACE,
+ debug = SPDLOG_LEVEL_DEBUG,
+ info = SPDLOG_LEVEL_INFO,
+ warn = SPDLOG_LEVEL_WARN,
+ err = SPDLOG_LEVEL_ERROR,
+ critical = SPDLOG_LEVEL_CRITICAL,
+ off = SPDLOG_LEVEL_OFF,
+ n_levels
+};
+
+#define SPDLOG_LEVEL_NAME_TRACE spdlog::string_view_t("trace", 5)
+#define SPDLOG_LEVEL_NAME_DEBUG spdlog::string_view_t("debug", 5)
+#define SPDLOG_LEVEL_NAME_INFO spdlog::string_view_t("info", 4)
+#define SPDLOG_LEVEL_NAME_WARNING spdlog::string_view_t("warning", 7)
+#define SPDLOG_LEVEL_NAME_ERROR spdlog::string_view_t("error", 5)
+#define SPDLOG_LEVEL_NAME_CRITICAL spdlog::string_view_t("critical", 8)
+#define SPDLOG_LEVEL_NAME_OFF spdlog::string_view_t("off", 3)
+
+#if !defined(SPDLOG_LEVEL_NAMES)
+#define SPDLOG_LEVEL_NAMES \
+ { \
+ SPDLOG_LEVEL_NAME_TRACE, SPDLOG_LEVEL_NAME_DEBUG, SPDLOG_LEVEL_NAME_INFO, \
+ SPDLOG_LEVEL_NAME_WARNING, SPDLOG_LEVEL_NAME_ERROR, SPDLOG_LEVEL_NAME_CRITICAL, \
+ SPDLOG_LEVEL_NAME_OFF \
+ }
+#endif
+
+#if !defined(SPDLOG_SHORT_LEVEL_NAMES)
+
+#define SPDLOG_SHORT_LEVEL_NAMES \
+ { "T", "D", "I", "W", "E", "C", "O" }
+#endif
+
+SPDLOG_API const string_view_t &to_string_view(spdlog::level::level_enum l) SPDLOG_NOEXCEPT;
+SPDLOG_API const char *to_short_c_str(spdlog::level::level_enum l) SPDLOG_NOEXCEPT;
+SPDLOG_API spdlog::level::level_enum from_str(const std::string &name) SPDLOG_NOEXCEPT;
+
+} // namespace level
+
+//
+// Color mode used by sinks with color support.
+//
+enum class color_mode { always, automatic, never };
+
+//
+// Pattern time - specific time getting to use for pattern_formatter.
+// local time by default
+//
+enum class pattern_time_type {
+ local, // log localtime
+ utc // log utc
+};
+
+//
+// Log exception
+//
+class SPDLOG_API spdlog_ex : public std::exception {
+public:
+ explicit spdlog_ex(std::string msg);
+ spdlog_ex(const std::string &msg, int last_errno);
+ const char *what() const SPDLOG_NOEXCEPT override;
+
+private:
+ std::string msg_;
+};
+
+[[noreturn]] SPDLOG_API void throw_spdlog_ex(const std::string &msg, int last_errno);
+[[noreturn]] SPDLOG_API void throw_spdlog_ex(std::string msg);
+
+struct source_loc {
+ SPDLOG_CONSTEXPR source_loc() = default;
+ SPDLOG_CONSTEXPR source_loc(const char *filename_in, int line_in, const char *funcname_in)
+ : filename{filename_in},
+ line{line_in},
+ funcname{funcname_in} {}
+
+ SPDLOG_CONSTEXPR bool empty() const SPDLOG_NOEXCEPT { return line <= 0; }
+ const char *filename{nullptr};
+ int line{0};
+ const char *funcname{nullptr};
+};
+
+struct file_event_handlers {
+ file_event_handlers()
+ : before_open(nullptr),
+ after_open(nullptr),
+ before_close(nullptr),
+ after_close(nullptr) {}
+
+ std::function before_open;
+ std::function after_open;
+ std::function before_close;
+ std::function after_close;
+};
+
+namespace details {
+
+// to_string_view
+
+SPDLOG_CONSTEXPR_FUNC spdlog::string_view_t to_string_view(const memory_buf_t &buf)
+ SPDLOG_NOEXCEPT {
+ return spdlog::string_view_t{buf.data(), buf.size()};
+}
+
+SPDLOG_CONSTEXPR_FUNC spdlog::string_view_t to_string_view(spdlog::string_view_t str)
+ SPDLOG_NOEXCEPT {
+ return str;
+}
+
+#if defined(SPDLOG_WCHAR_FILENAMES) || defined(SPDLOG_WCHAR_TO_UTF8_SUPPORT)
+SPDLOG_CONSTEXPR_FUNC spdlog::wstring_view_t to_string_view(const wmemory_buf_t &buf)
+ SPDLOG_NOEXCEPT {
+ return spdlog::wstring_view_t{buf.data(), buf.size()};
+}
+
+SPDLOG_CONSTEXPR_FUNC spdlog::wstring_view_t to_string_view(spdlog::wstring_view_t str)
+ SPDLOG_NOEXCEPT {
+ return str;
+}
+#endif
+
+#if defined(SPDLOG_USE_STD_FORMAT) && __cpp_lib_format >= 202207L
+template
+SPDLOG_CONSTEXPR_FUNC std::basic_string_view to_string_view(
+ std::basic_format_string fmt) SPDLOG_NOEXCEPT {
+ return fmt.get();
+}
+#endif
+
+// make_unique support for pre c++14
+#if __cplusplus >= 201402L // C++14 and beyond
+using std::enable_if_t;
+using std::make_unique;
+#else
+template
+using enable_if_t = typename std::enable_if::type;
+
+template
+std::unique_ptr make_unique(Args &&...args) {
+ static_assert(!std::is_array::value, "arrays not supported");
+ return std::unique_ptr(new T(std::forward(args)...));
+}
+#endif
+
+// to avoid useless casts (see https://github.com/nlohmann/json/issues/2893#issuecomment-889152324)
+template ::value, int> = 0>
+constexpr T conditional_static_cast(U value) {
+ return static_cast(value);
+}
+
+template ::value, int> = 0>
+constexpr T conditional_static_cast(U value) {
+ return value;
+}
+
+} // namespace details
+} // namespace spdlog
+
+#ifdef SPDLOG_HEADER_ONLY
+#include "common-inl.h"
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/details/backtracer-inl.h b/csrc/vnpu_offload/include/spdlog/details/backtracer-inl.h
new file mode 100644
index 00000000..baa06b6a
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/backtracer-inl.h
@@ -0,0 +1,63 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifndef SPDLOG_HEADER_ONLY
+#include
+#endif
+namespace spdlog {
+namespace details {
+SPDLOG_INLINE backtracer::backtracer(const backtracer &other) {
+ std::lock_guard lock(other.mutex_);
+ enabled_ = other.enabled();
+ messages_ = other.messages_;
+}
+
+SPDLOG_INLINE backtracer::backtracer(backtracer &&other) SPDLOG_NOEXCEPT {
+ std::lock_guard lock(other.mutex_);
+ enabled_ = other.enabled();
+ messages_ = std::move(other.messages_);
+}
+
+SPDLOG_INLINE backtracer &backtracer::operator=(backtracer other) {
+ std::lock_guard lock(mutex_);
+ enabled_ = other.enabled();
+ messages_ = std::move(other.messages_);
+ return *this;
+}
+
+SPDLOG_INLINE void backtracer::enable(size_t size) {
+ std::lock_guard lock{mutex_};
+ enabled_.store(true, std::memory_order_relaxed);
+ messages_ = circular_q{size};
+}
+
+SPDLOG_INLINE void backtracer::disable() {
+ std::lock_guard lock{mutex_};
+ enabled_.store(false, std::memory_order_relaxed);
+}
+
+SPDLOG_INLINE bool backtracer::enabled() const { return enabled_.load(std::memory_order_relaxed); }
+
+SPDLOG_INLINE void backtracer::push_back(const log_msg &msg) {
+ std::lock_guard lock{mutex_};
+ messages_.push_back(log_msg_buffer{msg});
+}
+
+SPDLOG_INLINE bool backtracer::empty() const {
+ std::lock_guard lock{mutex_};
+ return messages_.empty();
+}
+
+// pop all items in the q and apply the given fun on each of them.
+SPDLOG_INLINE void backtracer::foreach_pop(std::function fun) {
+ std::lock_guard lock{mutex_};
+ while (!messages_.empty()) {
+ auto &front_msg = messages_.front();
+ fun(front_msg);
+ messages_.pop_front();
+ }
+}
+} // namespace details
+} // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/details/backtracer.h b/csrc/vnpu_offload/include/spdlog/details/backtracer.h
new file mode 100644
index 00000000..f9eb4b43
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/backtracer.h
@@ -0,0 +1,45 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include
+#include
+
+#include
+#include
+#include
+
+// Store log messages in circular buffer.
+// Useful for storing debug data in case of error/warning happens.
+
+namespace spdlog {
+namespace details {
+class SPDLOG_API backtracer {
+ mutable std::mutex mutex_;
+ std::atomic enabled_{false};
+ circular_q messages_;
+
+public:
+ backtracer() = default;
+ backtracer(const backtracer &other);
+
+ backtracer(backtracer &&other) SPDLOG_NOEXCEPT;
+ backtracer &operator=(backtracer other);
+
+ void enable(size_t size);
+ void disable();
+ bool enabled() const;
+ void push_back(const log_msg &msg);
+ bool empty() const;
+
+ // pop all items in the q and apply the given fun on each of them.
+ void foreach_pop(std::function fun);
+};
+
+} // namespace details
+} // namespace spdlog
+
+#ifdef SPDLOG_HEADER_ONLY
+#include "backtracer-inl.h"
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/details/circular_q.h b/csrc/vnpu_offload/include/spdlog/details/circular_q.h
new file mode 100644
index 00000000..29e9d255
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/circular_q.h
@@ -0,0 +1,115 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+// circular q view of std::vector.
+#pragma once
+
+#include
+#include
+
+#include "spdlog/common.h"
+
+namespace spdlog {
+namespace details {
+template
+class circular_q {
+ size_t max_items_ = 0;
+ typename std::vector::size_type head_ = 0;
+ typename std::vector::size_type tail_ = 0;
+ size_t overrun_counter_ = 0;
+ std::vector v_;
+
+public:
+ using value_type = T;
+
+ // empty ctor - create a disabled queue with no elements allocated at all
+ circular_q() = default;
+
+ explicit circular_q(size_t max_items)
+ : max_items_(max_items + 1) // one item is reserved as marker for full q
+ ,
+ v_(max_items_) {}
+
+ circular_q(const circular_q &) = default;
+ circular_q &operator=(const circular_q &) = default;
+
+ // move cannot be default,
+ // since we need to reset head_, tail_, etc to zero in the moved object
+ circular_q(circular_q &&other) SPDLOG_NOEXCEPT { copy_moveable(std::move(other)); }
+
+ circular_q &operator=(circular_q &&other) SPDLOG_NOEXCEPT {
+ copy_moveable(std::move(other));
+ return *this;
+ }
+
+ // push back, overrun (oldest) item if no room left
+ void push_back(T &&item) {
+ if (max_items_ > 0) {
+ v_[tail_] = std::move(item);
+ tail_ = (tail_ + 1) % max_items_;
+
+ if (tail_ == head_) // overrun last item if full
+ {
+ head_ = (head_ + 1) % max_items_;
+ ++overrun_counter_;
+ }
+ }
+ }
+
+ // Return reference to the front item.
+ // If there are no elements in the container, the behavior is undefined.
+ const T &front() const { return v_[head_]; }
+
+ T &front() { return v_[head_]; }
+
+ // Return number of elements actually stored
+ size_t size() const {
+ if (tail_ >= head_) {
+ return tail_ - head_;
+ } else {
+ return max_items_ - (head_ - tail_);
+ }
+ }
+
+ // Return const reference to item by index.
+ // If index is out of range 0…size()-1, the behavior is undefined.
+ const T &at(size_t i) const {
+ assert(i < size());
+ return v_[(head_ + i) % max_items_];
+ }
+
+ // Pop item from front.
+ // If there are no elements in the container, the behavior is undefined.
+ void pop_front() { head_ = (head_ + 1) % max_items_; }
+
+ bool empty() const { return tail_ == head_; }
+
+ bool full() const {
+ // head is ahead of the tail by 1
+ if (max_items_ > 0) {
+ return ((tail_ + 1) % max_items_) == head_;
+ }
+ return false;
+ }
+
+ size_t overrun_counter() const { return overrun_counter_; }
+
+ void reset_overrun_counter() { overrun_counter_ = 0; }
+
+private:
+ // copy from other&& and reset it to disabled state
+ void copy_moveable(circular_q &&other) SPDLOG_NOEXCEPT {
+ max_items_ = other.max_items_;
+ head_ = other.head_;
+ tail_ = other.tail_;
+ overrun_counter_ = other.overrun_counter_;
+ v_ = std::move(other.v_);
+
+ // put &&other in disabled, but valid state
+ other.max_items_ = 0;
+ other.head_ = other.tail_ = 0;
+ other.overrun_counter_ = 0;
+ }
+};
+} // namespace details
+} // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/details/console_globals.h b/csrc/vnpu_offload/include/spdlog/details/console_globals.h
new file mode 100644
index 00000000..9c552106
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/console_globals.h
@@ -0,0 +1,28 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include
+#include
+
+namespace spdlog {
+namespace details {
+
+struct console_mutex {
+ using mutex_t = std::mutex;
+ static mutex_t &mutex() {
+ static mutex_t s_mutex;
+ return s_mutex;
+ }
+};
+
+struct console_nullmutex {
+ using mutex_t = null_mutex;
+ static mutex_t &mutex() {
+ static mutex_t s_mutex;
+ return s_mutex;
+ }
+};
+} // namespace details
+} // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/details/file_helper-inl.h b/csrc/vnpu_offload/include/spdlog/details/file_helper-inl.h
new file mode 100644
index 00000000..c7260ec3
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/file_helper-inl.h
@@ -0,0 +1,151 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifndef SPDLOG_HEADER_ONLY
+#include
+#endif
+
+#include
+#include
+
+#include
+#include
+#include
+#include
+
+namespace spdlog {
+namespace details {
+
+SPDLOG_INLINE file_helper::file_helper(const file_event_handlers &event_handlers)
+ : event_handlers_(event_handlers) {}
+
+SPDLOG_INLINE file_helper::~file_helper() { close(); }
+
+SPDLOG_INLINE void file_helper::open(const filename_t &fname, bool truncate) {
+ close();
+ filename_ = fname;
+
+ auto *mode = SPDLOG_FILENAME_T("ab");
+ auto *trunc_mode = SPDLOG_FILENAME_T("wb");
+
+ if (event_handlers_.before_open) {
+ event_handlers_.before_open(filename_);
+ }
+ for (int tries = 0; tries < open_tries_; ++tries) {
+ // create containing folder if not exists already.
+ os::create_dir(os::dir_name(fname));
+ if (truncate) {
+ // Truncate by opening-and-closing a tmp file in "wb" mode, always
+ // opening the actual log-we-write-to in "ab" mode, since that
+ // interacts more politely with eternal processes that might
+ // rotate/truncate the file underneath us.
+ std::FILE *tmp;
+ if (os::fopen_s(&tmp, fname, trunc_mode)) {
+ continue;
+ }
+ std::fclose(tmp);
+ }
+ if (!os::fopen_s(&fd_, fname, mode)) {
+ if (event_handlers_.after_open) {
+ event_handlers_.after_open(filename_, fd_);
+ }
+ return;
+ }
+
+ details::os::sleep_for_millis(open_interval_);
+ }
+
+ throw_spdlog_ex("Failed opening file " + os::filename_to_str(filename_) + " for writing",
+ errno);
+}
+
+SPDLOG_INLINE void file_helper::reopen(bool truncate) {
+ if (filename_.empty()) {
+ throw_spdlog_ex("Failed re opening file - was not opened before");
+ }
+ this->open(filename_, truncate);
+}
+
+SPDLOG_INLINE void file_helper::flush() {
+ if (std::fflush(fd_) != 0) {
+ throw_spdlog_ex("Failed flush to file " + os::filename_to_str(filename_), errno);
+ }
+}
+
+SPDLOG_INLINE void file_helper::sync() {
+ if (!os::fsync(fd_)) {
+ throw_spdlog_ex("Failed to fsync file " + os::filename_to_str(filename_), errno);
+ }
+}
+
+SPDLOG_INLINE void file_helper::close() {
+ if (fd_ != nullptr) {
+ if (event_handlers_.before_close) {
+ event_handlers_.before_close(filename_, fd_);
+ }
+
+ std::fclose(fd_);
+ fd_ = nullptr;
+
+ if (event_handlers_.after_close) {
+ event_handlers_.after_close(filename_);
+ }
+ }
+}
+
+SPDLOG_INLINE void file_helper::write(const memory_buf_t &buf) {
+ if (fd_ == nullptr) return;
+ size_t msg_size = buf.size();
+ auto data = buf.data();
+
+ if (!details::os::fwrite_bytes(data, msg_size, fd_)) {
+ throw_spdlog_ex("Failed writing to file " + os::filename_to_str(filename_), errno);
+ }
+}
+
+SPDLOG_INLINE size_t file_helper::size() const {
+ if (fd_ == nullptr) {
+ throw_spdlog_ex("Cannot use size() on closed file " + os::filename_to_str(filename_));
+ }
+ return os::filesize(fd_);
+}
+
+SPDLOG_INLINE const filename_t &file_helper::filename() const { return filename_; }
+
+//
+// return file path and its extension:
+//
+// "mylog.txt" => ("mylog", ".txt")
+// "mylog" => ("mylog", "")
+// "mylog." => ("mylog.", "")
+// "/dir1/dir2/mylog.txt" => ("/dir1/dir2/mylog", ".txt")
+//
+// the starting dot in filenames is ignored (hidden files):
+//
+// ".mylog" => (".mylog". "")
+// "my_folder/.mylog" => ("my_folder/.mylog", "")
+// "my_folder/.mylog.txt" => ("my_folder/.mylog", ".txt")
+SPDLOG_INLINE std::tuple file_helper::split_by_extension(
+ const filename_t &fname) {
+ auto ext_index = fname.rfind('.');
+
+ // no valid extension found - return whole path and empty string as
+ // extension
+ if (ext_index == filename_t::npos || ext_index == 0 || ext_index == fname.size() - 1) {
+ return std::make_tuple(fname, filename_t());
+ }
+
+ // treat cases like "/etc/rc.d/somelogfile or "/abc/.hiddenfile"
+ auto folder_index = fname.find_last_of(details::os::folder_seps_filename);
+ if (folder_index != filename_t::npos && folder_index >= ext_index - 1) {
+ return std::make_tuple(fname, filename_t());
+ }
+
+ // finally - return a valid base and extension tuple
+ return std::make_tuple(fname.substr(0, ext_index), fname.substr(ext_index));
+}
+
+} // namespace details
+} // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/details/file_helper.h b/csrc/vnpu_offload/include/spdlog/details/file_helper.h
new file mode 100644
index 00000000..aba0be47
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/file_helper.h
@@ -0,0 +1,61 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include
+#include
+
+namespace spdlog {
+namespace details {
+
+// Helper class for file sinks.
+// When failing to open a file, retry several times(5) with a delay interval(10 ms).
+// Throw spdlog_ex exception on errors.
+
+class SPDLOG_API file_helper {
+public:
+ file_helper() = default;
+ explicit file_helper(const file_event_handlers &event_handlers);
+
+ file_helper(const file_helper &) = delete;
+ file_helper &operator=(const file_helper &) = delete;
+ ~file_helper();
+
+ void open(const filename_t &fname, bool truncate = false);
+ void reopen(bool truncate);
+ void flush();
+ void sync();
+ void close();
+ void write(const memory_buf_t &buf);
+ size_t size() const;
+ const filename_t &filename() const;
+
+ //
+ // return file path and its extension:
+ //
+ // "mylog.txt" => ("mylog", ".txt")
+ // "mylog" => ("mylog", "")
+ // "mylog." => ("mylog.", "")
+ // "/dir1/dir2/mylog.txt" => ("/dir1/dir2/mylog", ".txt")
+ //
+ // the starting dot in filenames is ignored (hidden files):
+ //
+ // ".mylog" => (".mylog". "")
+ // "my_folder/.mylog" => ("my_folder/.mylog", "")
+ // "my_folder/.mylog.txt" => ("my_folder/.mylog", ".txt")
+ static std::tuple split_by_extension(const filename_t &fname);
+
+private:
+ const int open_tries_ = 5;
+ const unsigned int open_interval_ = 10;
+ std::FILE *fd_{nullptr};
+ filename_t filename_;
+ file_event_handlers event_handlers_;
+};
+} // namespace details
+} // namespace spdlog
+
+#ifdef SPDLOG_HEADER_ONLY
+#include "file_helper-inl.h"
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/details/fmt_helper.h b/csrc/vnpu_offload/include/spdlog/details/fmt_helper.h
new file mode 100644
index 00000000..b629b894
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/fmt_helper.h
@@ -0,0 +1,141 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+
+#ifdef SPDLOG_USE_STD_FORMAT
+#include
+#include
+#endif
+
+// Some fmt helpers to efficiently format and pad ints and strings
+namespace spdlog {
+namespace details {
+namespace fmt_helper {
+
+inline void append_string_view(spdlog::string_view_t view, memory_buf_t &dest) {
+ auto *buf_ptr = view.data();
+ dest.append(buf_ptr, buf_ptr + view.size());
+}
+
+#ifdef SPDLOG_USE_STD_FORMAT
+template
+inline void append_int(T n, memory_buf_t &dest) {
+ // Buffer should be large enough to hold all digits (digits10 + 1) and a sign
+ SPDLOG_CONSTEXPR const auto BUF_SIZE = std::numeric_limits::digits10 + 2;
+ char buf[BUF_SIZE];
+
+ auto [ptr, ec] = std::to_chars(buf, buf + BUF_SIZE, n, 10);
+ if (ec == std::errc()) {
+ dest.append(buf, ptr);
+ } else {
+ throw_spdlog_ex("Failed to format int", static_cast(ec));
+ }
+}
+#else
+template
+inline void append_int(T n, memory_buf_t &dest) {
+ fmt::format_int i(n);
+ dest.append(i.data(), i.data() + i.size());
+}
+#endif
+
+template
+SPDLOG_CONSTEXPR_FUNC unsigned int count_digits_fallback(T n) {
+ // taken from fmt: https://github.com/fmtlib/fmt/blob/8.0.1/include/fmt/format.h#L899-L912
+ unsigned int count = 1;
+ for (;;) {
+ // Integer division is slow so do it for a group of four digits instead
+ // of for every digit. The idea comes from the talk by Alexandrescu
+ // "Three Optimization Tips for C++". See speed-test for a comparison.
+ if (n < 10) return count;
+ if (n < 100) return count + 1;
+ if (n < 1000) return count + 2;
+ if (n < 10000) return count + 3;
+ n /= 10000u;
+ count += 4;
+ }
+}
+
+template
+inline unsigned int count_digits(T n) {
+ using count_type =
+ typename std::conditional<(sizeof(T) > sizeof(uint32_t)), uint64_t, uint32_t>::type;
+#ifdef SPDLOG_USE_STD_FORMAT
+ return count_digits_fallback(static_cast(n));
+#else
+ return static_cast(fmt::
+// fmt 7.0.0 renamed the internal namespace to detail.
+// See: https://github.com/fmtlib/fmt/issues/1538
+#if FMT_VERSION < 70000
+ internal
+#else
+ detail
+#endif
+ ::count_digits(static_cast(n)));
+#endif
+}
+
+inline void pad2(int n, memory_buf_t &dest) {
+ if (n >= 0 && n < 100) // 0-99
+ {
+ dest.push_back(static_cast('0' + n / 10));
+ dest.push_back(static_cast('0' + n % 10));
+ } else // unlikely, but just in case, let fmt deal with it
+ {
+ fmt_lib::format_to(std::back_inserter(dest), SPDLOG_FMT_STRING("{:02}"), n);
+ }
+}
+
+template
+inline void pad_uint(T n, unsigned int width, memory_buf_t &dest) {
+ static_assert(std::is_unsigned::value, "pad_uint must get unsigned T");
+ for (auto digits = count_digits(n); digits < width; digits++) {
+ dest.push_back('0');
+ }
+ append_int(n, dest);
+}
+
+template
+inline void pad3(T n, memory_buf_t &dest) {
+ static_assert(std::is_unsigned::value, "pad3 must get unsigned T");
+ if (n < 1000) {
+ dest.push_back(static_cast(n / 100 + '0'));
+ n = n % 100;
+ dest.push_back(static_cast((n / 10) + '0'));
+ dest.push_back(static_cast((n % 10) + '0'));
+ } else {
+ append_int(n, dest);
+ }
+}
+
+template
+inline void pad6(T n, memory_buf_t &dest) {
+ pad_uint(n, 6, dest);
+}
+
+template
+inline void pad9(T n, memory_buf_t &dest) {
+ pad_uint(n, 9, dest);
+}
+
+// return fraction of a second of the given time_point.
+// e.g.
+// fraction(tp) -> will return the millis part of the second
+template
+inline ToDuration time_fraction(log_clock::time_point tp) {
+ using std::chrono::duration_cast;
+ using std::chrono::seconds;
+ auto duration = tp.time_since_epoch();
+ auto secs = duration_cast(duration);
+ return duration_cast(duration) - duration_cast(secs);
+}
+
+} // namespace fmt_helper
+} // namespace details
+} // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/details/log_msg-inl.h b/csrc/vnpu_offload/include/spdlog/details/log_msg-inl.h
new file mode 100644
index 00000000..3a23dcf1
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/log_msg-inl.h
@@ -0,0 +1,44 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifndef SPDLOG_HEADER_ONLY
+#include
+#endif
+
+#include
+
+namespace spdlog {
+namespace details {
+
+SPDLOG_INLINE log_msg::log_msg(spdlog::log_clock::time_point log_time,
+ spdlog::source_loc loc,
+ string_view_t a_logger_name,
+ spdlog::level::level_enum lvl,
+ spdlog::string_view_t msg)
+ : logger_name(a_logger_name),
+ level(lvl),
+ time(log_time)
+#ifndef SPDLOG_NO_THREAD_ID
+ ,
+ thread_id(os::thread_id())
+#endif
+ ,
+ source(loc),
+ payload(msg) {
+}
+
+SPDLOG_INLINE log_msg::log_msg(spdlog::source_loc loc,
+ string_view_t a_logger_name,
+ spdlog::level::level_enum lvl,
+ spdlog::string_view_t msg)
+ : log_msg(os::now(), loc, a_logger_name, lvl, msg) {}
+
+SPDLOG_INLINE log_msg::log_msg(string_view_t a_logger_name,
+ spdlog::level::level_enum lvl,
+ spdlog::string_view_t msg)
+ : log_msg(os::now(), source_loc{}, a_logger_name, lvl, msg) {}
+
+} // namespace details
+} // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/details/log_msg.h b/csrc/vnpu_offload/include/spdlog/details/log_msg.h
new file mode 100644
index 00000000..64b4bf6f
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/log_msg.h
@@ -0,0 +1,40 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include
+#include
+
+namespace spdlog {
+namespace details {
+struct SPDLOG_API log_msg {
+ log_msg() = default;
+ log_msg(log_clock::time_point log_time,
+ source_loc loc,
+ string_view_t logger_name,
+ level::level_enum lvl,
+ string_view_t msg);
+ log_msg(source_loc loc, string_view_t logger_name, level::level_enum lvl, string_view_t msg);
+ log_msg(string_view_t logger_name, level::level_enum lvl, string_view_t msg);
+ log_msg(const log_msg &other) = default;
+ log_msg &operator=(const log_msg &other) = default;
+
+ string_view_t logger_name;
+ level::level_enum level{level::off};
+ log_clock::time_point time;
+ size_t thread_id{0};
+
+ // wrapping the formatted text with color (updated by pattern_formatter).
+ mutable size_t color_range_start{0};
+ mutable size_t color_range_end{0};
+
+ source_loc source;
+ string_view_t payload;
+};
+} // namespace details
+} // namespace spdlog
+
+#ifdef SPDLOG_HEADER_ONLY
+#include "log_msg-inl.h"
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/details/log_msg_buffer-inl.h b/csrc/vnpu_offload/include/spdlog/details/log_msg_buffer-inl.h
new file mode 100644
index 00000000..45c33835
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/log_msg_buffer-inl.h
@@ -0,0 +1,54 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifndef SPDLOG_HEADER_ONLY
+#include
+#endif
+
+namespace spdlog {
+namespace details {
+
+SPDLOG_INLINE log_msg_buffer::log_msg_buffer(const log_msg &orig_msg)
+ : log_msg{orig_msg} {
+ buffer.append(logger_name.begin(), logger_name.end());
+ buffer.append(payload.begin(), payload.end());
+ update_string_views();
+}
+
+SPDLOG_INLINE log_msg_buffer::log_msg_buffer(const log_msg_buffer &other)
+ : log_msg{other} {
+ buffer.append(logger_name.begin(), logger_name.end());
+ buffer.append(payload.begin(), payload.end());
+ update_string_views();
+}
+
+SPDLOG_INLINE log_msg_buffer::log_msg_buffer(log_msg_buffer &&other) SPDLOG_NOEXCEPT
+ : log_msg{other},
+ buffer{std::move(other.buffer)} {
+ update_string_views();
+}
+
+SPDLOG_INLINE log_msg_buffer &log_msg_buffer::operator=(const log_msg_buffer &other) {
+ log_msg::operator=(other);
+ buffer.clear();
+ buffer.append(other.buffer.data(), other.buffer.data() + other.buffer.size());
+ update_string_views();
+ return *this;
+}
+
+SPDLOG_INLINE log_msg_buffer &log_msg_buffer::operator=(log_msg_buffer &&other) SPDLOG_NOEXCEPT {
+ log_msg::operator=(other);
+ buffer = std::move(other.buffer);
+ update_string_views();
+ return *this;
+}
+
+SPDLOG_INLINE void log_msg_buffer::update_string_views() {
+ logger_name = string_view_t{buffer.data(), logger_name.size()};
+ payload = string_view_t{buffer.data() + logger_name.size(), payload.size()};
+}
+
+} // namespace details
+} // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/details/log_msg_buffer.h b/csrc/vnpu_offload/include/spdlog/details/log_msg_buffer.h
new file mode 100644
index 00000000..c926cfaa
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/log_msg_buffer.h
@@ -0,0 +1,32 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include
+
+namespace spdlog {
+namespace details {
+
+// Extend log_msg with internal buffer to store its payload.
+// This is needed since log_msg holds string_views that points to stack data.
+
+class SPDLOG_API log_msg_buffer : public log_msg {
+ memory_buf_t buffer;
+ void update_string_views();
+
+public:
+ log_msg_buffer() = default;
+ explicit log_msg_buffer(const log_msg &orig_msg);
+ log_msg_buffer(const log_msg_buffer &other);
+ log_msg_buffer(log_msg_buffer &&other) SPDLOG_NOEXCEPT;
+ log_msg_buffer &operator=(const log_msg_buffer &other);
+ log_msg_buffer &operator=(log_msg_buffer &&other) SPDLOG_NOEXCEPT;
+};
+
+} // namespace details
+} // namespace spdlog
+
+#ifdef SPDLOG_HEADER_ONLY
+#include "log_msg_buffer-inl.h"
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/details/mpmc_blocking_q.h b/csrc/vnpu_offload/include/spdlog/details/mpmc_blocking_q.h
new file mode 100644
index 00000000..f153f6c2
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/mpmc_blocking_q.h
@@ -0,0 +1,177 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+// multi producer-multi consumer blocking queue.
+// enqueue(..) - will block until room found to put the new message.
+// enqueue_nowait(..) - enqueue immediately. overruns oldest message if no
+// room left.
+// dequeue_for(..) - will block until the queue is not empty or timeout have
+// passed.
+
+#include
+
+#include
+#include
+#include
+
+namespace spdlog {
+namespace details {
+
+template
+class mpmc_blocking_queue {
+public:
+ using item_type = T;
+ explicit mpmc_blocking_queue(size_t max_items)
+ : q_(max_items) {}
+
+#ifndef __MINGW32__
+ // try to enqueue and block if no room left
+ void enqueue(T &&item) {
+ {
+ std::unique_lock lock(queue_mutex_);
+ pop_cv_.wait(lock, [this] { return !this->q_.full(); });
+ q_.push_back(std::move(item));
+ }
+ push_cv_.notify_one();
+ }
+
+ // enqueue immediately. overrun oldest message in the queue if no room left.
+ void enqueue_nowait(T &&item) {
+ {
+ std::unique_lock lock(queue_mutex_);
+ q_.push_back(std::move(item));
+ }
+ push_cv_.notify_one();
+ }
+
+ void enqueue_if_have_room(T &&item) {
+ bool pushed = false;
+ {
+ std::unique_lock lock(queue_mutex_);
+ if (!q_.full()) {
+ q_.push_back(std::move(item));
+ pushed = true;
+ }
+ }
+
+ if (pushed) {
+ push_cv_.notify_one();
+ } else {
+ ++discard_counter_;
+ }
+ }
+
+ // dequeue with a timeout.
+ // Return true, if succeeded dequeue item, false otherwise
+ bool dequeue_for(T &popped_item, std::chrono::milliseconds wait_duration) {
+ {
+ std::unique_lock lock(queue_mutex_);
+ if (!push_cv_.wait_for(lock, wait_duration, [this] { return !this->q_.empty(); })) {
+ return false;
+ }
+ popped_item = std::move(q_.front());
+ q_.pop_front();
+ }
+ pop_cv_.notify_one();
+ return true;
+ }
+
+ // blocking dequeue without a timeout.
+ void dequeue(T &popped_item) {
+ {
+ std::unique_lock lock(queue_mutex_);
+ push_cv_.wait(lock, [this] { return !this->q_.empty(); });
+ popped_item = std::move(q_.front());
+ q_.pop_front();
+ }
+ pop_cv_.notify_one();
+ }
+
+#else
+ // apparently mingw deadlocks if the mutex is released before cv.notify_one(),
+ // so release the mutex at the very end each function.
+
+ // try to enqueue and block if no room left
+ void enqueue(T &&item) {
+ std::unique_lock lock(queue_mutex_);
+ pop_cv_.wait(lock, [this] { return !this->q_.full(); });
+ q_.push_back(std::move(item));
+ push_cv_.notify_one();
+ }
+
+ // enqueue immediately. overrun oldest message in the queue if no room left.
+ void enqueue_nowait(T &&item) {
+ std::unique_lock lock(queue_mutex_);
+ q_.push_back(std::move(item));
+ push_cv_.notify_one();
+ }
+
+ void enqueue_if_have_room(T &&item) {
+ bool pushed = false;
+ std::unique_lock lock(queue_mutex_);
+ if (!q_.full()) {
+ q_.push_back(std::move(item));
+ pushed = true;
+ }
+
+ if (pushed) {
+ push_cv_.notify_one();
+ } else {
+ ++discard_counter_;
+ }
+ }
+
+ // dequeue with a timeout.
+ // Return true, if succeeded dequeue item, false otherwise
+ bool dequeue_for(T &popped_item, std::chrono::milliseconds wait_duration) {
+ std::unique_lock lock(queue_mutex_);
+ if (!push_cv_.wait_for(lock, wait_duration, [this] { return !this->q_.empty(); })) {
+ return false;
+ }
+ popped_item = std::move(q_.front());
+ q_.pop_front();
+ pop_cv_.notify_one();
+ return true;
+ }
+
+ // blocking dequeue without a timeout.
+ void dequeue(T &popped_item) {
+ std::unique_lock lock(queue_mutex_);
+ push_cv_.wait(lock, [this] { return !this->q_.empty(); });
+ popped_item = std::move(q_.front());
+ q_.pop_front();
+ pop_cv_.notify_one();
+ }
+
+#endif
+
+ size_t overrun_counter() {
+ std::lock_guard lock(queue_mutex_);
+ return q_.overrun_counter();
+ }
+
+ size_t discard_counter() { return discard_counter_.load(std::memory_order_relaxed); }
+
+ size_t size() {
+ std::lock_guard lock(queue_mutex_);
+ return q_.size();
+ }
+
+ void reset_overrun_counter() {
+ std::lock_guard lock(queue_mutex_);
+ q_.reset_overrun_counter();
+ }
+
+ void reset_discard_counter() { discard_counter_.store(0, std::memory_order_relaxed); }
+
+private:
+ std::mutex queue_mutex_;
+ std::condition_variable push_cv_;
+ std::condition_variable pop_cv_;
+ spdlog::details::circular_q q_;
+ std::atomic discard_counter_{0};
+};
+} // namespace details
+} // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/details/null_mutex.h b/csrc/vnpu_offload/include/spdlog/details/null_mutex.h
new file mode 100644
index 00000000..e3b32204
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/null_mutex.h
@@ -0,0 +1,35 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include
+#include
+// null, no cost dummy "mutex" and dummy "atomic" int
+
+namespace spdlog {
+namespace details {
+struct null_mutex {
+ void lock() const {}
+ void unlock() const {}
+};
+
+struct null_atomic_int {
+ int value;
+ null_atomic_int() = default;
+
+ explicit null_atomic_int(int new_value)
+ : value(new_value) {}
+
+ int load(std::memory_order = std::memory_order_relaxed) const { return value; }
+
+ void store(int new_value, std::memory_order = std::memory_order_relaxed) { value = new_value; }
+
+ int exchange(int new_value, std::memory_order = std::memory_order_relaxed) {
+ std::swap(new_value, value);
+ return new_value; // return value before the call
+ }
+};
+
+} // namespace details
+} // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/details/os-inl.h b/csrc/vnpu_offload/include/spdlog/details/os-inl.h
new file mode 100644
index 00000000..2acded09
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/os-inl.h
@@ -0,0 +1,572 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifndef SPDLOG_HEADER_ONLY
+#include
+#endif
+
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include