diff --git a/CMakeLists.txt b/CMakeLists.txt
index 811ad5f8..3235facd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -96,10 +96,12 @@ message("TORCH_NPU_PATH is ${TORCH_NPU_PATH}")
 if(SOC_VERSION MATCHES "ascend310p.*")
     file(GLOB VLLM_ASCEND_SRC
     ${CMAKE_CURRENT_SOURCE_DIR}/csrc/*.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/csrc/vnpu_offload/shm_worker.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/csrc/aclnn_torch_adapter/*.cpp)
 else()
     file(GLOB VLLM_ASCEND_SRC
     ${CMAKE_CURRENT_SOURCE_DIR}/csrc/*.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/csrc/vnpu_offload/shm_worker.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/csrc/aclnn_torch_adapter/*.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/csrc/batch_matmul_transpose/op_host/tiling/tiling_data.cpp)
 endif()
@@ -113,6 +115,7 @@ include_directories(
   ${ASCEND_HOME_PATH}/aarch64-linux/include/experiment/platform
   ${ASCEND_HOME_PATH}/x86_64-linux/include/experiment/platform
   ${CMAKE_CURRENT_SOURCE_DIR}/csrc/batch_matmul_transpose/op_host
+  ${CMAKE_CURRENT_SOURCE_DIR}/csrc/vnpu_offload/include
 )
 
 set(
diff --git a/Dockerfile b/Dockerfile
index 0afde8f8..c164951a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -65,6 +65,8 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
     source /usr/local/Ascend/nnal/atb/set_env.sh && \
     export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/$(uname -i)-linux/devlib && \
     \
+    cd /vllm-workspace/vllm-ascend/csrc/vnpu_offload && \
+    make install && make clean && \
     python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
     \
     if [ "$(uname -i)" = "x86_64" ]; then python3 -m pip uninstall -y triton; fi && \
@@ -84,6 +86,9 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
     python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/${PTA_WHEEL}" && \
     python3 -m pip cache purge
 
+ENV VLLM_WORKER_MULTIPROC_METHOD=spawn \
+    VLLM_ASCEND_ENABLE_VNPU=1
+
 # Install clang-15 (for triton-ascend)
 RUN apt-get update -y && \
     apt-get -y install clang-15 && \
diff --git a/Dockerfile.310p b/Dockerfile.310p
index a275c798..b9c5b8ba 100644
--- a/Dockerfile.310p
+++ b/Dockerfile.310p
@@ -54,6 +54,8 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
     source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
     source /usr/local/Ascend/nnal/atb/set_env.sh && \
     export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
+    cd /vllm-workspace/vllm-ascend/csrc/vnpu_offload && \
+    make install && make clean && \
     python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
     if [ "$(uname -i)" = "x86_64" ]; then python3 -m pip uninstall -y triton; fi && \
     PYTHON_TAG=$(python3 -c "import sys; print(f'cp{sys.version_info.major}{sys.version_info.minor}')") && \
@@ -68,6 +70,9 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
     python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/${PTA_WHEEL}" && \
     python3 -m pip cache purge
 
+ENV VLLM_WORKER_MULTIPROC_METHOD=spawn \
+    VLLM_ASCEND_ENABLE_VNPU=1
+
 # Install modelscope (for fast download) and ray (for multinode)
 RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
     python3 -m pip cache purge
diff --git a/Dockerfile.310p.openEuler b/Dockerfile.310p.openEuler
index 0c95f133..edb155b8 100644
--- a/Dockerfile.310p.openEuler
+++ b/Dockerfile.310p.openEuler
@@ -50,6 +50,8 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
     source /usr/local/Ascend/nnal/atb/set_env.sh && \
     export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
     export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/`uname -i`-openEuler-linux && \
+    cd /vllm-workspace/vllm-ascend/csrc/vnpu_offload && \
+    make install && make clean && \
     python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
     if [ "$(uname -i)" = "x86_64" ]; then python3 -m pip uninstall -y triton; fi && \
     PYTHON_TAG=$(python3 -c "import sys; print(f'cp{sys.version_info.major}{sys.version_info.minor}')") && \
@@ -64,6 +66,9 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
     python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/${PTA_WHEEL}" && \
     python3 -m pip cache purge
 
+ENV VLLM_WORKER_MULTIPROC_METHOD=spawn \
+    VLLM_ASCEND_ENABLE_VNPU=1
+
 # Install modelscope (for fast download) and ray (for multinode)
 RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
     python3 -m pip cache purge
diff --git a/Dockerfile.a3 b/Dockerfile.a3
index 2c31d878..2bf2d95b 100644
--- a/Dockerfile.a3
+++ b/Dockerfile.a3
@@ -64,6 +64,8 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
     source /usr/local/Ascend/nnal/atb/set_env.sh && \
     export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/$(uname -i)-linux/devlib && \
     \
+    cd /vllm-workspace/vllm-ascend/csrc/vnpu_offload && \
+    make install && make clean && \
     python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
     \
     if [ "$(uname -i)" = "x86_64" ]; then python3 -m pip uninstall -y triton; fi && \
@@ -83,6 +85,9 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
     python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/${PTA_WHEEL}" && \
     python3 -m pip cache purge
 
+ENV VLLM_WORKER_MULTIPROC_METHOD=spawn \
+    VLLM_ASCEND_ENABLE_VNPU=1
+
 # Install clang-15 (for triton-ascend)
 RUN apt-get update -y && \
     apt-get -y install clang-15 && \
diff --git a/Dockerfile.a3.openEuler b/Dockerfile.a3.openEuler
index 4ed461a4..4c511e5c 100644
--- a/Dockerfile.a3.openEuler
+++ b/Dockerfile.a3.openEuler
@@ -65,6 +65,8 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
     export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/$(uname -i)-linux/devlib && \
     export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/$(uname -i)-openEuler-linux && \
     \
+    cd /vllm-workspace/vllm-ascend/csrc/vnpu_offload && \
+    make install && make clean && \
     python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
     \
     if [ "$(uname -i)" = "x86_64" ]; then python3 -m pip uninstall -y triton; fi && \
@@ -84,6 +86,9 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
     python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/${PTA_WHEEL}" && \
     python3 -m pip cache purge
 
+ENV VLLM_WORKER_MULTIPROC_METHOD=spawn \
+    VLLM_ASCEND_ENABLE_VNPU=1
+
 # Install clang (for triton-ascend)
 RUN yum update -y && \
     yum install -y clang && \
diff --git a/Dockerfile.openEuler b/Dockerfile.openEuler
index 4bb86318..2c815f53 100644
--- a/Dockerfile.openEuler
+++ b/Dockerfile.openEuler
@@ -65,6 +65,8 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
     export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/$(uname -i)-linux/devlib && \
     export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/$(uname -i)-openEuler-linux && \
     \
+    cd /vllm-workspace/vllm-ascend/csrc/vnpu_offload && \
+    make install && make clean && \
     python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
     \
     if [ "$(uname -i)" = "x86_64" ]; then python3 -m pip uninstall -y triton; fi && \
@@ -84,6 +86,9 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
     python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/${PTA_WHEEL}" && \
     python3 -m pip cache purge
 
+ENV VLLM_WORKER_MULTIPROC_METHOD=spawn \
+    VLLM_ASCEND_ENABLE_VNPU=1
+
 # Install clang (for triton-ascend)
 RUN yum update -y && \
     yum install -y clang && \
diff --git a/README-vllm-ascend.md b/README-vllm-ascend.md
new file mode 100644
index 00000000..fce5d824
--- /dev/null
+++ b/README-vllm-ascend.md
@@ -0,0 +1,106 @@
+<p align="center">
+  <picture>
+    <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/vllm-project/vllm-ascend/main/docs/source/logos/vllm-ascend-logo-text-dark.png">
+    <img alt="vllm-ascend" src="https://raw.githubusercontent.com/vllm-project/vllm-ascend/main/docs/source/logos/vllm-ascend-logo-text-light.png" width=55%>
+  </picture>
+</p>
+
+<h3 align="center">
+vLLM Ascend Plugin
+</h3>
+
+<div align="center">
+
+[![DeepWiki](https://img.shields.io/badge/DeepWiki-Ask_AI-_.svg?style=flat&color=0052D9&labelColor=000000&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACwAAAAyCAYAAAAnWDnqAAAAAXNSR0IArs4c6QAAA05JREFUaEPtmUtyEzEQhtWTQyQLHNak2AB7ZnyXZMEjXMGeK/AIi+QuHrMnbChYY7MIh8g01fJoopFb0uhhEqqcbWTp06/uv1saEDv4O3n3dV60RfP947Mm9/SQc0ICFQgzfc4CYZoTPAswgSJCCUJUnAAoRHOAUOcATwbmVLWdGoH//PB8mnKqScAhsD0kYP3j/Yt5LPQe2KvcXmGvRHcDnpxfL2zOYJ1mFwrryWTz0advv1Ut4CJgf5uhDuDj5eUcAUoahrdY/56ebRWeraTjMt/00Sh3UDtjgHtQNHwcRGOC98BJEAEymycmYcWwOprTgcB6VZ5JK5TAJ+fXGLBm3FDAmn6oPPjR4rKCAoJCal2eAiQp2x0vxTPB3ALO2CRkwmDy5WohzBDwSEFKRwPbknEggCPB/imwrycgxX2NzoMCHhPkDwqYMr9tRcP5qNrMZHkVnOjRMWwLCcr8ohBVb1OMjxLwGCvjTikrsBOiA6fNyCrm8V1rP93iVPpwaE+gO0SsWmPiXB+jikdf6SizrT5qKasx5j8ABbHpFTx+vFXp9EnYQmLx02h1QTTrl6eDqxLnGjporxl3NL3agEvXdT0WmEost648sQOYAeJS9Q7bfUVoMGnjo4AZdUMQku50McDcMWcBPvr0SzbTAFDfvJqwLzgxwATnCgnp4wDl6Aa+Ax283gghmj+vj7feE2KBBRMW3FzOpLOADl0Isb5587h/U4gGvkt5v60Z1VLG8BhYjbzRwyQZemwAd6cCR5/XFWLYZRIMpX39AR0tjaGGiGzLVyhse5C9RKC6ai42ppWPKiBagOvaYk8lO7DajerabOZP46Lby5wKjw1HCRx7p9sVMOWGzb/vA1hwiWc6jm3MvQDTogQkiqIhJV0nBQBTU+3okKCFDy9WwferkHjtxib7t3xIUQtHxnIwtx4mpg26/HfwVNVDb4oI9RHmx5WGelRVlrtiw43zboCLaxv46AZeB3IlTkwouebTr1y2NjSpHz68WNFjHvupy3q8TFn3Hos2IAk4Ju5dCo8B3wP7VPr/FGaKiG+T+v+TQqIrOqMTL1VdWV1DdmcbO8KXBz6esmYWYKPwDL5b5FA1a0hwapHiom0r/cKaoqr+27/XcrS5UwSMbQAAAABJRU5ErkJggg==)](https://deepwiki.com/vllm-project/vllm-ascend)
+
+</div>
+
+<p align="center">
+| <a href="https://www.hiascend.com/en/"><b>About Ascend</b></a> | <a href="https://docs.vllm.ai/projects/ascend/en/latest/"><b>Documentation</b></a> | <a href="https://slack.vllm.ai"><b>#SIG-Ascend</b></a> | <a href="https://discuss.vllm.ai/c/hardware-support/vllm-ascend-support"><b>Users Forum</b></a> | <a href="https://tinyurl.com/vllm-ascend-meeting"><b>Weekly Meeting</b></a> |
+</p>
+
+<p align="center">
+<a ><b>English</b></a> | <a href="README.zh.md"><b>中文</b></a>
+</p>
+
+---
+*Latest News* 🔥
+
+- [2026/02] We released the new official version [v0.13.0](https://github.com/vllm-project/vllm-ascend/releases/tag/v0.13.0)! Please follow the [official guide](https://docs.vllm.ai/projects/ascend/en/v0.13.0/) to start using vLLM Ascend Plugin on Ascend.
+- [2025/12] We released the new official version [v0.11.0](https://github.com/vllm-project/vllm-ascend/releases/tag/v0.11.0)! Please follow the [official guide](https://docs.vllm.ai/projects/ascend/en/v0.11.0/) to start using vLLM Ascend Plugin on Ascend.
+- [2025/09] We released the new official version [v0.9.1](https://github.com/vllm-project/vllm-ascend/releases/tag/v0.9.1)! Please follow the [official guide](https://docs.vllm.ai/projects/ascend/en/v0.9.1/tutorials/large_scale_ep.html) to start deploying large-scale Expert Parallelism (EP) on Ascend.
+- [2025/08] We hosted the [vLLM Beijing Meetup](https://mp.weixin.qq.com/s/7n8OYNrCC_I9SJaybHA_-Q) with vLLM and Tencent! Please find the meetup slides [here](https://drive.google.com/drive/folders/1Pid6NSFLU43DZRi0EaTcPgXsAzDvbBqF).
+- [2025/06] [User stories](https://docs.vllm.ai/projects/ascend/en/latest/community/user_stories/index.html) page is now live! It kicks off with LLaMA-Factory/verl/TRL/GPUStack to demonstrate how vLLM Ascend‌ assists Ascend users in enhancing their experience across fine-tuning, evaluation, reinforcement learning (RL), and deployment scenarios.
+- [2025/06] [Contributors](https://docs.vllm.ai/projects/ascend/en/latest/community/contributors.html) page is now live! All contributions deserve to be recorded, thanks for all contributors.
+- [2025/05] We've released the first official version [v0.7.3](https://github.com/vllm-project/vllm-ascend/releases/tag/v0.7.3)! We collaborated with the vLLM community to publish a blog post sharing our practice: [Introducing vLLM Hardware Plugin, Best Practice from Ascend NPU](https://blog.vllm.ai/2025/05/12/hardware-plugin.html).
+- [2025/03] We hosted the [vLLM Beijing Meetup](https://mp.weixin.qq.com/s/VtxO9WXa5fC-mKqlxNUJUQ) with vLLM team! Please find the meetup slides [here](https://drive.google.com/drive/folders/1Pid6NSFLU43DZRi0EaTcPgXsAzDvbBqF).
+- [2025/02] vLLM community officially created [vllm-project/vllm-ascend](https://github.com/vllm-project/vllm-ascend) repo for running vLLM seamlessly on the Ascend NPU.
+- [2024/12] We are working with the vLLM community to support [[RFC]: Hardware pluggable](https://github.com/vllm-project/vllm/issues/11162).
+
+---
+
+## Overview
+
+vLLM Ascend (`vllm-ascend`) is a community maintained hardware plugin for running vLLM seamlessly on the Ascend NPU.
+
+It is the recommended approach for supporting the Ascend backend within the vLLM community. It adheres to the principles outlined in the [[RFC]: Hardware pluggable](https://github.com/vllm-project/vllm/issues/11162), providing a hardware-pluggable interface that decouples the integration of the Ascend NPU with vLLM.
+
+By using vLLM Ascend plugin, popular open-source models, including Transformer-like, Mixture-of-Experts (MoE), Embedding, Multi-modal LLMs can run seamlessly on the Ascend NPU.
+
+## Prerequisites
+
+- Hardware: Atlas 800I A2 Inference series, Atlas A2 Training series, Atlas 800I A3 Inference series, Atlas A3 Training series, Atlas 300I Duo (Experimental)
+- OS: Linux
+- Software:
+    - Python >= 3.10, < 3.12
+    - CANN == 8.5.0 (Ascend HDK version refers to [here](https://www.hiascend.com/document/detail/zh/canncommercial/83RC2/releasenote/releasenote_0000.html))
+    - PyTorch == 2.9.0, torch-npu == 2.9.0
+    - vLLM (the same version as vllm-ascend)
+
+## Getting Started
+
+Please use the following recommended versions to get started quickly:
+
+| Version    | Release type | Doc                                  |
+|------------|--------------|--------------------------------------|
+| v0.17.0rc1 | Latest release candidate | See [QuickStart](https://docs.vllm.ai/projects/ascend/en/latest/quick_start.html) and [Installation](https://docs.vllm.ai/projects/ascend/en/latest/installation.html) for more details |
+| v0.13.0 | Latest stable version | See [QuickStart](https://docs.vllm.ai/projects/ascend/en/v0.13.0/quick_start.html) and [Installation](https://docs.vllm.ai/projects/ascend/en/v0.13.0/installation.html) for more details |
+
+## Contributing
+
+See [CONTRIBUTING](https://docs.vllm.ai/projects/ascend/en/latest/developer_guide/contribution/index.html) for more details, which is a step-by-step guide to help you set up the development environment, build and test.
+
+We welcome and value any contributions and collaborations:
+
+- Please let us know if you encounter a bug by [filing an issue](https://github.com/vllm-project/vllm-ascend/issues)
+- Please use [User forum](https://discuss.vllm.ai/c/hardware-support/vllm-ascend-support) for usage questions and help.
+
+## Branch
+
+vllm-ascend has a main branch and a dev branch.
+
+- **main**: main branch, corresponds to the vLLM main branch, and is continuously monitored for quality through Ascend CI.
+- **releases/vX.Y.Z**: development branch, created alongside new releases of vLLM. For example, `releases/v0.13.0` is the dev branch for vLLM `v0.13.0` version.
+
+Below are the maintained branches:
+
+| Branch     | Status       | Note                                 |
+|------------|--------------|--------------------------------------|
+| main       | Maintained   | CI commitment for vLLM main branch and vLLM v0.17.0 tag   |
+| v0.7.1-dev | Unmaintained | Only doc fixes are allowed |
+| v0.7.3-dev | Maintained   | CI commitment for vLLM 0.7.3 version, only bug fixes are allowed, and no new release tags anymore. |
+| v0.9.1-dev | Maintained   | CI commitment for vLLM 0.9.1 version |
+| v0.11.0-dev | Maintained | CI commitment for vLLM 0.11.0 version |
+| releases/v0.13.0 | Maintained | CI commitment for vLLM 0.13.0 version |
+| rfc/feature-name | Maintained | [Feature branches](https://docs.vllm.ai/projects/ascend/en/latest/community/versioning_policy.html#feature-branches) for collaboration |
+
+Please refer to [Versioning policy](https://docs.vllm.ai/projects/ascend/en/latest/community/versioning_policy.html) for more details.
+
+## Weekly Meeting
+
+- vLLM Ascend Weekly Meeting: <https://tinyurl.com/vllm-ascend-meeting>
+- Wednesday, 15:00 - 16:00 (UTC+8, [Convert to your timezone](https://dateful.com/convert/gmt8?t=15))
+
+## License
+
+Apache License 2.0, as found in the [LICENSE](./LICENSE) file.
diff --git a/README.zh.md b/README-vllm-ascend.zh.md
similarity index 100%
rename from README.zh.md
rename to README-vllm-ascend.zh.md
diff --git a/README.md b/README.md
index fce5d824..5db909ce 100644
--- a/README.md
+++ b/README.md
@@ -1,106 +1,38 @@
-<p align="center">
-  <picture>
-    <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/vllm-project/vllm-ascend/main/docs/source/logos/vllm-ascend-logo-text-dark.png">
-    <img alt="vllm-ascend" src="https://raw.githubusercontent.com/vllm-project/vllm-ascend/main/docs/source/logos/vllm-ascend-logo-text-light.png" width=55%>
-  </picture>
-</p>
-
-<h3 align="center">
-vLLM Ascend Plugin
-</h3>
-
-<div align="center">
-
-[![DeepWiki](https://img.shields.io/badge/DeepWiki-Ask_AI-_.svg?style=flat&color=0052D9&labelColor=000000&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACwAAAAyCAYAAAAnWDnqAAAAAXNSR0IArs4c6QAAA05JREFUaEPtmUtyEzEQhtWTQyQLHNak2AB7ZnyXZMEjXMGeK/AIi+QuHrMnbChYY7MIh8g01fJoopFb0uhhEqqcbWTp06/uv1saEDv4O3n3dV60RfP947Mm9/SQc0ICFQgzfc4CYZoTPAswgSJCCUJUnAAoRHOAUOcATwbmVLWdGoH//PB8mnKqScAhsD0kYP3j/Yt5LPQe2KvcXmGvRHcDnpxfL2zOYJ1mFwrryWTz0advv1Ut4CJgf5uhDuDj5eUcAUoahrdY/56ebRWeraTjMt/00Sh3UDtjgHtQNHwcRGOC98BJEAEymycmYcWwOprTgcB6VZ5JK5TAJ+fXGLBm3FDAmn6oPPjR4rKCAoJCal2eAiQp2x0vxTPB3ALO2CRkwmDy5WohzBDwSEFKRwPbknEggCPB/imwrycgxX2NzoMCHhPkDwqYMr9tRcP5qNrMZHkVnOjRMWwLCcr8ohBVb1OMjxLwGCvjTikrsBOiA6fNyCrm8V1rP93iVPpwaE+gO0SsWmPiXB+jikdf6SizrT5qKasx5j8ABbHpFTx+vFXp9EnYQmLx02h1QTTrl6eDqxLnGjporxl3NL3agEvXdT0WmEost648sQOYAeJS9Q7bfUVoMGnjo4AZdUMQku50McDcMWcBPvr0SzbTAFDfvJqwLzgxwATnCgnp4wDl6Aa+Ax283gghmj+vj7feE2KBBRMW3FzOpLOADl0Isb5587h/U4gGvkt5v60Z1VLG8BhYjbzRwyQZemwAd6cCR5/XFWLYZRIMpX39AR0tjaGGiGzLVyhse5C9RKC6ai42ppWPKiBagOvaYk8lO7DajerabOZP46Lby5wKjw1HCRx7p9sVMOWGzb/vA1hwiWc6jm3MvQDTogQkiqIhJV0nBQBTU+3okKCFDy9WwferkHjtxib7t3xIUQtHxnIwtx4mpg26/HfwVNVDb4oI9RHmx5WGelRVlrtiw43zboCLaxv46AZeB3IlTkwouebTr1y2NjSpHz68WNFjHvupy3q8TFn3Hos2IAk4Ju5dCo8B3wP7VPr/FGaKiG+T+v+TQqIrOqMTL1VdWV1DdmcbO8KXBz6esmYWYKPwDL5b5FA1a0hwapHiom0r/cKaoqr+27/XcrS5UwSMbQAAAABJRU5ErkJggg==)](https://deepwiki.com/vllm-project/vllm-ascend)
-
-</div>
-
-<p align="center">
-| <a href="https://www.hiascend.com/en/"><b>About Ascend</b></a> | <a href="https://docs.vllm.ai/projects/ascend/en/latest/"><b>Documentation</b></a> | <a href="https://slack.vllm.ai"><b>#SIG-Ascend</b></a> | <a href="https://discuss.vllm.ai/c/hardware-support/vllm-ascend-support"><b>Users Forum</b></a> | <a href="https://tinyurl.com/vllm-ascend-meeting"><b>Weekly Meeting</b></a> |
-</p>
-
-<p align="center">
-<a ><b>English</b></a> | <a href="README.zh.md"><b>中文</b></a>
-</p>
-
----
-*Latest News* 🔥
-
-- [2026/02] We released the new official version [v0.13.0](https://github.com/vllm-project/vllm-ascend/releases/tag/v0.13.0)! Please follow the [official guide](https://docs.vllm.ai/projects/ascend/en/v0.13.0/) to start using vLLM Ascend Plugin on Ascend.
-- [2025/12] We released the new official version [v0.11.0](https://github.com/vllm-project/vllm-ascend/releases/tag/v0.11.0)! Please follow the [official guide](https://docs.vllm.ai/projects/ascend/en/v0.11.0/) to start using vLLM Ascend Plugin on Ascend.
-- [2025/09] We released the new official version [v0.9.1](https://github.com/vllm-project/vllm-ascend/releases/tag/v0.9.1)! Please follow the [official guide](https://docs.vllm.ai/projects/ascend/en/v0.9.1/tutorials/large_scale_ep.html) to start deploying large-scale Expert Parallelism (EP) on Ascend.
-- [2025/08] We hosted the [vLLM Beijing Meetup](https://mp.weixin.qq.com/s/7n8OYNrCC_I9SJaybHA_-Q) with vLLM and Tencent! Please find the meetup slides [here](https://drive.google.com/drive/folders/1Pid6NSFLU43DZRi0EaTcPgXsAzDvbBqF).
-- [2025/06] [User stories](https://docs.vllm.ai/projects/ascend/en/latest/community/user_stories/index.html) page is now live! It kicks off with LLaMA-Factory/verl/TRL/GPUStack to demonstrate how vLLM Ascend‌ assists Ascend users in enhancing their experience across fine-tuning, evaluation, reinforcement learning (RL), and deployment scenarios.
-- [2025/06] [Contributors](https://docs.vllm.ai/projects/ascend/en/latest/community/contributors.html) page is now live! All contributions deserve to be recorded, thanks for all contributors.
-- [2025/05] We've released the first official version [v0.7.3](https://github.com/vllm-project/vllm-ascend/releases/tag/v0.7.3)! We collaborated with the vLLM community to publish a blog post sharing our practice: [Introducing vLLM Hardware Plugin, Best Practice from Ascend NPU](https://blog.vllm.ai/2025/05/12/hardware-plugin.html).
-- [2025/03] We hosted the [vLLM Beijing Meetup](https://mp.weixin.qq.com/s/VtxO9WXa5fC-mKqlxNUJUQ) with vLLM team! Please find the meetup slides [here](https://drive.google.com/drive/folders/1Pid6NSFLU43DZRi0EaTcPgXsAzDvbBqF).
-- [2025/02] vLLM community officially created [vllm-project/vllm-ascend](https://github.com/vllm-project/vllm-ascend) repo for running vLLM seamlessly on the Ascend NPU.
-- [2024/12] We are working with the vLLM community to support [[RFC]: Hardware pluggable](https://github.com/vllm-project/vllm/issues/11162).
-
----
+# XC-LLM: A Specially Optimized LLM Inference Engine for ModelHub XC
 
 ## Overview
 
-vLLM Ascend (`vllm-ascend`) is a community maintained hardware plugin for running vLLM seamlessly on the Ascend NPU.
+The project is optimized based on the popular LLM inference project vLLM. This repo is for Ascend NPU. 
 
-It is the recommended approach for supporting the Ascend backend within the vLLM community. It adheres to the principles outlined in the [[RFC]: Hardware pluggable](https://github.com/vllm-project/vllm/issues/11162), providing a hardware-pluggable interface that decouples the integration of the Ascend NPU with vLLM.
+One of the key features of this project is efficient memory coordination, enabling multiple vLLM instances share and dynamically hold Ascend NPU's physical memory. When an instance is idle, model parameters are offloaded to host memory. Upon a new inference request, the model parameters are quickly restored to the NPU’s memory (if not exist), without the need to initialize the engine and load the model from scratch. As a result, from the application’s perspective, multiple LLM inference engines can run on the NPU even when their total memory requirements exceed the physical memory limit. This technique is referred to as `InfiniVRAM`.
 
-By using vLLM Ascend plugin, popular open-source models, including Transformer-like, Mixture-of-Experts (MoE), Embedding, Multi-modal LLMs can run seamlessly on the Ascend NPU.
 
-## Prerequisites
+## Installation
 
-- Hardware: Atlas 800I A2 Inference series, Atlas A2 Training series, Atlas 800I A3 Inference series, Atlas A3 Training series, Atlas 300I Duo (Experimental)
-- OS: Linux
-- Software:
-    - Python >= 3.10, < 3.12
-    - CANN == 8.5.0 (Ascend HDK version refers to [here](https://www.hiascend.com/document/detail/zh/canncommercial/83RC2/releasenote/releasenote_0000.html))
-    - PyTorch == 2.9.0, torch-npu == 2.9.0
-    - vLLM (the same version as vllm-ascend)
+### Build from Dockerfile
 
-## Getting Started
+Clone this repository:
 
-Please use the following recommended versions to get started quickly:
+```bash
+docker build -t $build_image -f ./Dockerfile .
+```
 
-| Version    | Release type | Doc                                  |
-|------------|--------------|--------------------------------------|
-| v0.17.0rc1 | Latest release candidate | See [QuickStart](https://docs.vllm.ai/projects/ascend/en/latest/quick_start.html) and [Installation](https://docs.vllm.ai/projects/ascend/en/latest/installation.html) for more details |
-| v0.13.0 | Latest stable version | See [QuickStart](https://docs.vllm.ai/projects/ascend/en/v0.13.0/quick_start.html) and [Installation](https://docs.vllm.ai/projects/ascend/en/v0.13.0/installation.html) for more details |
+## Usage
 
-## Contributing
+> [!NOTE]
+> Some platforms may not allow multiple containers to share the same Ascend NPU. You may try to use privilegd container to bypass this restriction and mount all NPUs, and set the env ASCEND_RT_VISIBLE_DEVICES to specify the target device to use.
 
-See [CONTRIBUTING](https://docs.vllm.ai/projects/ascend/en/latest/developer_guide/contribution/index.html) for more details, which is a step-by-step guide to help you set up the development environment, build and test.
+0. To share NPU, processes coordinate via shm, so you need to set all containers with `ipc=host`.
+1. Start a daemon process in a standalone container, by running `vllm_vnpu_daemon` installed inside the image.
+2. Start LLM services with this image, following the official usage instructions.
+3. Due to the limited stream resource of Ascend NPU, you may need to restrict graph capture sizes or disable ACLgraph by setting `--enforce-eager`, especially when launching multiple LLMs. Refer to the [link](https://docs.vllm.ai/projects/ascend/en/latest/faqs.html#how-to-troubleshoot-and-resolve-size-capture-failures-resulting-from-stream-resource-exhaustion-and-what-are-the-underlying-causes).
 
-We welcome and value any contributions and collaborations:
+### Environment Variables
+- `VNPU_RESERVED_VRAM_SIZE_GB`: The amonut of reserved GPU memory for other miscellaneous memory. Only needs to be set for `vllm_vnpu_daemon`. Try increasing the variable if you launch multiple LLM services and encounter OOM. Default: `8`.
+- `VLLM_VNPU_SHM_NAME`: The name of the shm file. Needs to be set for all containers of the shared vNPU group. Default: `/vllm_acl_vnpu_offload_shm`.
 
-- Please let us know if you encounter a bug by [filing an issue](https://github.com/vllm-project/vllm-ascend/issues)
-- Please use [User forum](https://discuss.vllm.ai/c/hardware-support/vllm-ascend-support) for usage questions and help.
 
-## Branch
+## Limitations
 
-vllm-ascend has a main branch and a dev branch.
-
-- **main**: main branch, corresponds to the vLLM main branch, and is continuously monitored for quality through Ascend CI.
-- **releases/vX.Y.Z**: development branch, created alongside new releases of vLLM. For example, `releases/v0.13.0` is the dev branch for vLLM `v0.13.0` version.
-
-Below are the maintained branches:
-
-| Branch     | Status       | Note                                 |
-|------------|--------------|--------------------------------------|
-| main       | Maintained   | CI commitment for vLLM main branch and vLLM v0.17.0 tag   |
-| v0.7.1-dev | Unmaintained | Only doc fixes are allowed |
-| v0.7.3-dev | Maintained   | CI commitment for vLLM 0.7.3 version, only bug fixes are allowed, and no new release tags anymore. |
-| v0.9.1-dev | Maintained   | CI commitment for vLLM 0.9.1 version |
-| v0.11.0-dev | Maintained | CI commitment for vLLM 0.11.0 version |
-| releases/v0.13.0 | Maintained | CI commitment for vLLM 0.13.0 version |
-| rfc/feature-name | Maintained | [Feature branches](https://docs.vllm.ai/projects/ascend/en/latest/community/versioning_policy.html#feature-branches) for collaboration |
-
-Please refer to [Versioning policy](https://docs.vllm.ai/projects/ascend/en/latest/community/versioning_policy.html) for more details.
-
-## Weekly Meeting
-
-- vLLM Ascend Weekly Meeting: <https://tinyurl.com/vllm-ascend-meeting>
-- Wednesday, 15:00 - 16:00 (UTC+8, [Convert to your timezone](https://dateful.com/convert/gmt8?t=15))
-
-## License
-
-Apache License 2.0, as found in the [LICENSE](./LICENSE) file.
+- Restricted by the fact that HCCL cannot be shared, deploying more than one model with multi-GPU (e.g., TP) is not feasible currently.
+- The prefix cache will be reset when the LLM is restored, since we just simply discard the KV cache when the LLM is offloaded.
diff --git a/csrc/camem_allocator.cpp b/csrc/camem_allocator.cpp
index aaeb3b0f..eb76b15d 100644
--- a/csrc/camem_allocator.cpp
+++ b/csrc/camem_allocator.cpp
@@ -17,6 +17,10 @@
 #include <iostream>
 #include <stdexcept>
 #include <string>
+#include <atomic>
+
+#include "vnpu_offload/shm_worker.h"
+#include "vnpu_offload/npu_helper.h"
 
 extern "C" {
 
@@ -26,6 +30,13 @@ extern "C" {
 #include <sys/types.h>
 #include "acl/acl.h"
 
+// idle offload
+static std::atomic<bool> g_initialized(false);
+static void *g_d_mem = nullptr;
+static size_t g_size = 0;
+static std::atomic_uint_fast64_t g_allocated_offset(0);
+ShmWorker *shm_worker = nullptr;
+
 // Global references to Python callables
 // NOTE: this is borrowed reference, so we don't need to DECREF them.
 // This brings the limitation that the allocator needs to be singleton.
@@ -248,6 +259,146 @@ __attribute__ ((visibility("default"))) void my_free(void* ptr, ssize_t size, in
   free(p_memHandle);
 }
 
+__attribute__((visibility("default"))) void *
+my_malloc_offload(ssize_t size, int device, aclrtStream stream) {
+  ensure_context(device);
+
+  // first allocation, align the size, and reserve an address, and also allocate
+  // a aclrtDrvMemHandle
+
+  // Define memory allocation properties
+  aclrtPhysicalMemProp prop = {};
+  prop.handleType = ACL_MEM_HANDLE_TYPE_NONE ;
+  prop.allocationType = ACL_MEM_ALLOCATION_TYPE_PINNED;
+  prop.memAttr = ACL_HBM_MEM_HUGE;
+  prop.location.id = device;
+  prop.location.type = ACL_MEM_LOCATION_TYPE_DEVICE;
+  prop.reserve = 0;
+
+  // Check if the allocation is supported
+  size_t granularity;
+  aclError error_code = aclrtMemGetAllocationGranularity(&prop,
+                                   ACL_RT_MEM_ALLOC_GRANULARITY_MINIMUM,
+                                   &granularity);
+  if (error_code != 0) {
+    throw std::runtime_error("aclrtMemGetAllocationGranularity failed with acl error code: " +
+                            std::to_string(error_code) + " " + __FILE__ + ":" + std::to_string(__LINE__));
+  }
+  size_t alignedSize = ((size + granularity - 1) / granularity) * granularity;
+  void *d_mem;
+  // error_code = aclrtReserveMemAddress(&d_mem, alignedSize, 0, nullptr, 0);
+  // if (error_code != 0) {
+  //   if (error_code == ACL_ERROR_RT_MEMORY_ALLOCATION) {
+  //     throw std::runtime_error("aclrtReserveMemAddress failed with acl error code: " + 
+  //                             std::to_string(error_code) + "(OOM: Out of Memory, allocation failed) " + 
+  //                             __FILE__ + ":" + std::to_string(__LINE__));
+  //   } else {
+  //     throw std::runtime_error("aclrtReserveMemAddress failed with acl error code: " +
+  //                             std::to_string(error_code) + " " + __FILE__ + ":" + std::to_string(__LINE__));
+  //   }
+  // }
+
+  // allocate from the reserved pool
+  size_t alloc_offset = g_allocated_offset.fetch_add(alignedSize);
+  if (alloc_offset + alignedSize > g_size) {
+    throw std::runtime_error(
+        "my_malloc ERROR: Out of memory in the reserved pool." +
+        std::string(" ") + __FILE__ + ":" + std::to_string(__LINE__));
+  }
+  d_mem = (void *)((char *)g_d_mem + alloc_offset);
+
+  // allocate the aclrtDrvMemHandle
+  aclrtDrvMemHandle* p_memHandle =
+      (aclrtDrvMemHandle*)malloc(sizeof(aclrtDrvMemHandle));
+
+  if (!g_python_malloc_callback) {
+    throw std::runtime_error(
+        "my_malloc ERROR: g_python_malloc_callback not set." +
+        std::string(" ") + __FILE__ + ":" + std::to_string(__LINE__));
+  }
+
+  // Acquire GIL (not in stable ABI officially, but often works)
+  PyGILState_STATE gstate = PyGILState_Ensure();
+
+  PyObject* arg_tuple = create_tuple_from_c_integers(
+      (unsigned long long)device, (unsigned long long)alignedSize,
+      (unsigned long long)d_mem, (unsigned long long)p_memHandle);
+
+  // Call g_python_malloc_callback
+  PyObject* py_result =
+      PyObject_CallFunctionObjArgs(g_python_malloc_callback, arg_tuple, NULL);
+  Py_DECREF(arg_tuple);
+
+  if (!py_result) {
+    PyErr_Print();
+    PyGILState_Release(gstate);
+    return nullptr;
+  }
+
+  PyGILState_Release(gstate);
+
+  // // do the final mapping
+  // create_and_map(device, alignedSize, d_mem, p_memHandle);
+
+  return (void*)d_mem;
+}
+
+__attribute__((visibility("default"))) void
+my_free_offload(void *ptr, ssize_t size, int device, aclrtStream stream) {
+  // get memory handle from the pointer
+  if (!g_python_free_callback) {
+    throw std::runtime_error(
+        "my_free ERROR: g_python_malloc_callback not set." + std::string(" ") +
+        __FILE__ + ":" + std::to_string(__LINE__));
+  }
+
+  // Acquire GIL (not in stable ABI officially, but often works)
+  PyGILState_STATE gstate = PyGILState_Ensure();
+
+  PyObject* py_ptr =
+      PyLong_FromUnsignedLongLong(reinterpret_cast<unsigned long long>(ptr));
+
+  PyObject* py_result =
+      PyObject_CallFunctionObjArgs(g_python_free_callback, py_ptr, NULL);
+
+  if (!py_result || !PyTuple_Check(py_result) || PyTuple_Size(py_result) != 4) {
+    PyErr_SetString(PyExc_TypeError, "Expected a tuple of size 4");
+    return;
+  }
+
+  unsigned long long recv_device, recv_size;
+  unsigned long long recv_d_mem, recv_p_memHandle;
+  // Unpack the tuple into four C integers
+  if (!PyArg_ParseTuple(py_result, "KKKK", &recv_device, &recv_size,
+                        &recv_d_mem, &recv_p_memHandle)) {
+    // PyArg_ParseTuple sets an error if it fails
+    return;
+  }
+
+  PyGILState_Release(gstate);
+
+  // recv_size == size
+  // recv_device == device
+
+  // Free memory
+
+  // nothing to do
+  
+  // void *d_mem = (void*)recv_d_mem;
+  //   // allocate the aclrtDrvMemHandle
+  // aclrtDrvMemHandle* p_memHandle =
+  //     (aclrtDrvMemHandle*)recv_p_memHandle;
+  // unmap_and_release(device, size, d_mem, p_memHandle);
+
+  // // free address and the handle
+  // aclError error_code = aclrtReleaseMemAddress(d_mem);
+  // if (error_code != 0) {
+  //   throw std::runtime_error("aclrtReleaseMemAddress failed with acl error code: " +
+  //                           std::to_string(error_code) + " " + __FILE__ + ":" + std::to_string(__LINE__));
+  // }
+  // free(p_memHandle);
+}
+
 // ---------------------------------------------------------------------------
 // Python extension boilerplate:
 
@@ -322,6 +473,132 @@ static PyObject* python_create_and_map(PyObject* self, PyObject* args) {
   Py_RETURN_NONE;
 }
 
+
+static PyObject* py_init_module_offload(PyObject* self, PyObject* args) {
+  PyObject* malloc_callback = nullptr;
+  PyObject* free_callback = nullptr;
+  unsigned long long device = 0;
+
+  if (!PyArg_ParseTuple(args, "OOK", &malloc_callback, &free_callback,
+                        &device)) {
+    return nullptr;
+  }
+
+  if (!PyCallable_Check(malloc_callback) || !PyCallable_Check(free_callback)) {
+    PyErr_SetString(PyExc_TypeError, "Both arguments must be callables");
+    return nullptr;
+  }
+
+  // Save the Python callables
+  // This module does not handle GC of these objects, so they must be kept alive
+  // outside of this module.
+  g_python_malloc_callback = malloc_callback;
+  g_python_free_callback = free_callback;
+
+  // init idle
+  if (g_initialized.load()) {
+    printf("Module already initialized.\n");
+    Py_RETURN_NONE;
+  }
+  g_initialized.store(true);
+
+  std::vector<int> gpu_ids = get_npu_ids();
+  if (device >= gpu_ids.size()) {
+    throw std::runtime_error("Invalid device id: " + std::to_string(device) +
+                             " " + __FILE__ + ":" + std::to_string(__LINE__));
+  }
+  int gpu_id = gpu_ids[device];
+
+  // get pid
+  aclError error_code;
+  int32_t pid;
+  error_code = aclrtDeviceGetBareTgid(&pid);
+  if (error_code != 0) {
+    throw std::runtime_error(
+        "aclrtDeviceGetBareTgid failed with acl error code: " +
+        std::to_string(error_code) + " " + __FILE__ + ":" +
+        std::to_string(__LINE__));
+  }
+
+  shm_worker = new ShmWorker();
+  uint64_t shareable_handle;
+  shm_worker->register_worker(pid, gpu_id, &shareable_handle, &g_size);
+
+  // import shareable handle
+  aclrtDrvMemHandle memHandle;
+  error_code =
+      aclrtMemImportFromShareableHandle(shareable_handle, device, &memHandle);
+  if (error_code != 0) {
+    throw std::runtime_error(
+        "aclrtMemImportFromShareableHandle failed with acl error code: " +
+        std::to_string(error_code) + " " + __FILE__ + ":" +
+        std::to_string(__LINE__));
+  }
+
+  // reserve virtual address
+  error_code = aclrtReserveMemAddress(&g_d_mem, g_size, 0, nullptr, 0);
+  if (error_code != 0) {
+    throw std::runtime_error(
+        "aclrtReserveMemAddress failed with acl error code: " +
+        std::to_string(error_code) + " " + __FILE__ + ":" +
+        std::to_string(__LINE__));
+  }
+  // map
+  error_code = aclrtMapMem(g_d_mem, g_size, 0, memHandle, 0);
+  if (error_code != 0) {
+    throw std::runtime_error("aclrtMapMem failed with acl error code: " +
+                             std::to_string(error_code) + " " + __FILE__ + ":" +
+                             std::to_string(__LINE__));
+  }
+
+  Py_RETURN_NONE;
+}
+
+static PyObject *python_unmap_and_release_offload(PyObject *self,
+                                                  PyObject *args) {
+  // nothing to do
+  Py_RETURN_NONE;
+}
+
+static PyObject *python_create_and_map_offload(PyObject *self, PyObject *args) {
+  // nothing to do
+  Py_RETURN_NONE;
+}
+
+static PyObject* python_get_mem_info_offload(PyObject* self, PyObject* args) {
+  size_t allocated_bytes = g_allocated_offset.load();
+  size_t free_mem = 0;
+  if (allocated_bytes >= g_size) {
+    free_mem = 0;
+  } else {
+    free_mem = g_size - allocated_bytes;
+  }
+  PyObject* tuple = PyTuple_New(2);
+  if (!tuple) {  
+    return nullptr;
+  }
+  PyTuple_SetItem(tuple, 0, PyLong_FromSize_t(free_mem));
+  PyTuple_SetItem(tuple, 1, PyLong_FromSize_t(g_size));
+  return tuple;
+}
+
+static PyObject* python_try_lock_gpu_offload(PyObject* self, PyObject* args) {
+  bool prev_is_self = false;
+  bool success = shm_worker->try_lock_gpu(prev_is_self);
+  PyObject* tuple = PyTuple_New(2);
+  if (!tuple) {  
+    return nullptr;
+  }
+  PyTuple_SetItem(tuple, 0, PyBool_FromLong(success));
+  PyTuple_SetItem(tuple, 1, PyBool_FromLong(prev_is_self));
+  return tuple;
+}
+
+static PyObject* python_unlock_gpu_offload(PyObject* self, PyObject* args) {
+  shm_worker->unlock_gpu();
+  Py_RETURN_NONE;
+}
+
 static PyMethodDef module_methods[] = {
     {"init_module", (PyCFunction)py_init_module, METH_VARARGS,
      "Initialize module with python_malloc and python_free callables."},
@@ -329,7 +606,21 @@ static PyMethodDef module_methods[] = {
      "Create and map memory on the device."},
     {"python_unmap_and_release", (PyCFunction)python_unmap_and_release,
      METH_VARARGS, "Unmap and release memory on the device."},
-    {NULL, NULL, 0, NULL}  // sentinel
+    {"init_module_offload", (PyCFunction)py_init_module_offload, METH_VARARGS,
+     "Initialize module with python_malloc and python_free callables."},
+    {"python_create_and_map_offload",
+     (PyCFunction)python_create_and_map_offload, METH_VARARGS,
+     "Create and map memory on the device."},
+    {"python_unmap_and_release_offload",
+     (PyCFunction)python_unmap_and_release_offload, METH_VARARGS,
+     "Unmap and release memory on the device."},
+    {"python_get_mem_info_offload", (PyCFunction)python_get_mem_info_offload,
+     METH_NOARGS, "Get mem info in the reserved pool."},
+    {"python_try_lock_gpu_offload", (PyCFunction)python_try_lock_gpu_offload,
+     METH_NOARGS, "Lock GPU."},
+    {"python_unlock_gpu_offload", (PyCFunction)python_unlock_gpu_offload,
+     METH_NOARGS, "Unlock GPU."},
+    {NULL, NULL, 0, NULL} // sentinel
 };
 
 static struct PyModuleDef camem_allocator_module = {
diff --git a/csrc/vnpu_offload/.gitignore b/csrc/vnpu_offload/.gitignore
new file mode 100644
index 00000000..24f458a3
--- /dev/null
+++ b/csrc/vnpu_offload/.gitignore
@@ -0,0 +1 @@
+vllm_vnpu_daemon
\ No newline at end of file
diff --git a/csrc/vnpu_offload/Makefile b/csrc/vnpu_offload/Makefile
new file mode 100644
index 00000000..efcadee9
--- /dev/null
+++ b/csrc/vnpu_offload/Makefile
@@ -0,0 +1,30 @@
+CXX := g++
+TARGET := vllm_vnpu_daemon
+SRCS := vnpu_daemon.cpp shm_manager.cpp
+
+ASCEND_HOME := /usr/local/Ascend/ascend-toolkit/latest
+INCLUDES := -I$(ASCEND_HOME)/include -Iinclude
+LIBS := -L$(ASCEND_HOME)/lib64 -lascendcl
+
+CXXFLAGS := $(INCLUDES) -O2
+LDFLAGS := $(LIBS)
+
+PREFIX ?= /usr/local
+BINDIR ?= $(PREFIX)/bin
+
+.PHONY: all clean install uninstall
+
+all: $(TARGET)
+
+$(TARGET): $(SRCS)
+	$(CXX) -o $@ $^ $(CXXFLAGS) $(LDFLAGS)
+
+install: $(TARGET)
+	install -d $(DESTDIR)$(BINDIR)
+	install -m 0755 $(TARGET) $(DESTDIR)$(BINDIR)/$(TARGET)
+
+uninstall:
+	rm -f $(DESTDIR)$(BINDIR)/$(TARGET)
+
+clean:
+	rm -f $(TARGET)
\ No newline at end of file
diff --git a/csrc/vnpu_offload/include/spdlog/async.h b/csrc/vnpu_offload/include/spdlog/async.h
new file mode 100644
index 00000000..92fcd9a7
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/async.h
@@ -0,0 +1,99 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+//
+// Async logging using global thread pool
+// All loggers created here share same global thread pool.
+// Each log message is pushed to a queue along with a shared pointer to the
+// logger.
+// If a logger deleted while having pending messages in the queue, it's actual
+// destruction will defer
+// until all its messages are processed by the thread pool.
+// This is because each message in the queue holds a shared_ptr to the
+// originating logger.
+
+#include <spdlog/async_logger.h>
+#include <spdlog/details/registry.h>
+#include <spdlog/details/thread_pool.h>
+
+#include <functional>
+#include <memory>
+#include <mutex>
+
+namespace spdlog {
+
+namespace details {
+static const size_t default_async_q_size = 8192;
+}
+
+// async logger factory - creates async loggers backed with thread pool.
+// if a global thread pool doesn't already exist, create it with default queue
+// size of 8192 items and single thread.
+template <async_overflow_policy OverflowPolicy = async_overflow_policy::block>
+struct async_factory_impl {
+    template <typename Sink, typename... SinkArgs>
+    static std::shared_ptr<async_logger> create(std::string logger_name, SinkArgs &&...args) {
+        auto &registry_inst = details::registry::instance();
+
+        // create global thread pool if not already exists..
+
+        auto &mutex = registry_inst.tp_mutex();
+        std::lock_guard<std::recursive_mutex> tp_lock(mutex);
+        auto tp = registry_inst.get_tp();
+        if (tp == nullptr) {
+            tp = std::make_shared<details::thread_pool>(details::default_async_q_size, 1U);
+            registry_inst.set_tp(tp);
+        }
+
+        auto sink = std::make_shared<Sink>(std::forward<SinkArgs>(args)...);
+        auto new_logger = std::make_shared<async_logger>(std::move(logger_name), std::move(sink),
+                                                         std::move(tp), OverflowPolicy);
+        registry_inst.initialize_logger(new_logger);
+        return new_logger;
+    }
+};
+
+using async_factory = async_factory_impl<async_overflow_policy::block>;
+using async_factory_nonblock = async_factory_impl<async_overflow_policy::overrun_oldest>;
+
+template <typename Sink, typename... SinkArgs>
+inline std::shared_ptr<spdlog::logger> create_async(std::string logger_name,
+                                                    SinkArgs &&...sink_args) {
+    return async_factory::create<Sink>(std::move(logger_name),
+                                       std::forward<SinkArgs>(sink_args)...);
+}
+
+template <typename Sink, typename... SinkArgs>
+inline std::shared_ptr<spdlog::logger> create_async_nb(std::string logger_name,
+                                                       SinkArgs &&...sink_args) {
+    return async_factory_nonblock::create<Sink>(std::move(logger_name),
+                                                std::forward<SinkArgs>(sink_args)...);
+}
+
+// set global thread pool.
+inline void init_thread_pool(size_t q_size,
+                             size_t thread_count,
+                             std::function<void()> on_thread_start,
+                             std::function<void()> on_thread_stop) {
+    auto tp = std::make_shared<details::thread_pool>(q_size, thread_count, on_thread_start,
+                                                     on_thread_stop);
+    details::registry::instance().set_tp(std::move(tp));
+}
+
+inline void init_thread_pool(size_t q_size,
+                             size_t thread_count,
+                             std::function<void()> on_thread_start) {
+    init_thread_pool(q_size, thread_count, on_thread_start, [] {});
+}
+
+inline void init_thread_pool(size_t q_size, size_t thread_count) {
+    init_thread_pool(q_size, thread_count, [] {}, [] {});
+}
+
+// get the global thread pool.
+inline std::shared_ptr<spdlog::details::thread_pool> thread_pool() {
+    return details::registry::instance().get_tp();
+}
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/async_logger-inl.h b/csrc/vnpu_offload/include/spdlog/async_logger-inl.h
new file mode 100644
index 00000000..cde73f90
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/async_logger-inl.h
@@ -0,0 +1,84 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifndef SPDLOG_HEADER_ONLY
+#include <spdlog/async_logger.h>
+#endif
+
+#include <spdlog/details/thread_pool.h>
+#include <spdlog/sinks/sink.h>
+
+#include <memory>
+#include <string>
+
+SPDLOG_INLINE spdlog::async_logger::async_logger(std::string logger_name,
+                                                 sinks_init_list sinks_list,
+                                                 std::weak_ptr<details::thread_pool> tp,
+                                                 async_overflow_policy overflow_policy)
+    : async_logger(std::move(logger_name),
+                   sinks_list.begin(),
+                   sinks_list.end(),
+                   std::move(tp),
+                   overflow_policy) {}
+
+SPDLOG_INLINE spdlog::async_logger::async_logger(std::string logger_name,
+                                                 sink_ptr single_sink,
+                                                 std::weak_ptr<details::thread_pool> tp,
+                                                 async_overflow_policy overflow_policy)
+    : async_logger(
+          std::move(logger_name), {std::move(single_sink)}, std::move(tp), overflow_policy) {}
+
+// send the log message to the thread pool
+SPDLOG_INLINE void spdlog::async_logger::sink_it_(const details::log_msg &msg){
+    SPDLOG_TRY{if (auto pool_ptr = thread_pool_.lock()){
+        pool_ptr -> post_log(shared_from_this(), msg, overflow_policy_);
+}
+else {
+    throw_spdlog_ex("async log: thread pool doesn't exist anymore");
+}
+}
+SPDLOG_LOGGER_CATCH(msg.source)
+}
+
+// send flush request to the thread pool
+SPDLOG_INLINE void spdlog::async_logger::flush_(){
+    SPDLOG_TRY{if (auto pool_ptr = thread_pool_.lock()){
+        pool_ptr -> post_flush(shared_from_this(), overflow_policy_);
+}
+else {
+    throw_spdlog_ex("async flush: thread pool doesn't exist anymore");
+}
+}
+SPDLOG_LOGGER_CATCH(source_loc())
+}
+
+//
+// backend functions - called from the thread pool to do the actual job
+//
+SPDLOG_INLINE void spdlog::async_logger::backend_sink_it_(const details::log_msg &msg) {
+    for (auto &sink : sinks_) {
+        if (sink->should_log(msg.level)) {
+            SPDLOG_TRY { sink->log(msg); }
+            SPDLOG_LOGGER_CATCH(msg.source)
+        }
+    }
+
+    if (should_flush_(msg)) {
+        backend_flush_();
+    }
+}
+
+SPDLOG_INLINE void spdlog::async_logger::backend_flush_() {
+    for (auto &sink : sinks_) {
+        SPDLOG_TRY { sink->flush(); }
+        SPDLOG_LOGGER_CATCH(source_loc())
+    }
+}
+
+SPDLOG_INLINE std::shared_ptr<spdlog::logger> spdlog::async_logger::clone(std::string new_name) {
+    auto cloned = std::make_shared<spdlog::async_logger>(*this);
+    cloned->name_ = std::move(new_name);
+    return cloned;
+}
diff --git a/csrc/vnpu_offload/include/spdlog/async_logger.h b/csrc/vnpu_offload/include/spdlog/async_logger.h
new file mode 100644
index 00000000..be361538
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/async_logger.h
@@ -0,0 +1,74 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+// Fast asynchronous logger.
+// Uses pre allocated queue.
+// Creates a single back thread to pop messages from the queue and log them.
+//
+// Upon each log write the logger:
+//    1. Checks if its log level is enough to log the message
+//    2. Push a new copy of the message to a queue (or block the caller until
+//    space is available in the queue)
+// Upon destruction, logs all remaining messages in the queue before
+// destructing..
+
+#include <spdlog/logger.h>
+
+namespace spdlog {
+
+// Async overflow policy - block by default.
+enum class async_overflow_policy {
+    block,           // Block until message can be enqueued
+    overrun_oldest,  // Discard oldest message in the queue if full when trying to
+                     // add new item.
+    discard_new      // Discard new message if the queue is full when trying to add new item.
+};
+
+namespace details {
+class thread_pool;
+}
+
+class SPDLOG_API async_logger final : public std::enable_shared_from_this<async_logger>,
+                                      public logger {
+    friend class details::thread_pool;
+
+public:
+    template <typename It>
+    async_logger(std::string logger_name,
+                 It begin,
+                 It end,
+                 std::weak_ptr<details::thread_pool> tp,
+                 async_overflow_policy overflow_policy = async_overflow_policy::block)
+        : logger(std::move(logger_name), begin, end),
+          thread_pool_(std::move(tp)),
+          overflow_policy_(overflow_policy) {}
+
+    async_logger(std::string logger_name,
+                 sinks_init_list sinks_list,
+                 std::weak_ptr<details::thread_pool> tp,
+                 async_overflow_policy overflow_policy = async_overflow_policy::block);
+
+    async_logger(std::string logger_name,
+                 sink_ptr single_sink,
+                 std::weak_ptr<details::thread_pool> tp,
+                 async_overflow_policy overflow_policy = async_overflow_policy::block);
+
+    std::shared_ptr<logger> clone(std::string new_name) override;
+
+protected:
+    void sink_it_(const details::log_msg &msg) override;
+    void flush_() override;
+    void backend_sink_it_(const details::log_msg &incoming_log_msg);
+    void backend_flush_();
+
+private:
+    std::weak_ptr<details::thread_pool> thread_pool_;
+    async_overflow_policy overflow_policy_;
+};
+}  // namespace spdlog
+
+#ifdef SPDLOG_HEADER_ONLY
+#include "async_logger-inl.h"
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/cfg/argv.h b/csrc/vnpu_offload/include/spdlog/cfg/argv.h
new file mode 100644
index 00000000..7de2f83e
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/cfg/argv.h
@@ -0,0 +1,40 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+#include <spdlog/cfg/helpers.h>
+#include <spdlog/details/registry.h>
+
+//
+// Init log levels using each argv entry that starts with "SPDLOG_LEVEL="
+//
+// set all loggers to debug level:
+// example.exe "SPDLOG_LEVEL=debug"
+
+// set logger1 to trace level
+// example.exe "SPDLOG_LEVEL=logger1=trace"
+
+// turn off all logging except for logger1 and logger2:
+// example.exe "SPDLOG_LEVEL=off,logger1=debug,logger2=info"
+
+namespace spdlog {
+namespace cfg {
+
+// search for SPDLOG_LEVEL= in the args and use it to init the levels
+inline void load_argv_levels(int argc, const char **argv) {
+    const std::string spdlog_level_prefix = "SPDLOG_LEVEL=";
+    for (int i = 1; i < argc; i++) {
+        std::string arg = argv[i];
+        if (arg.find(spdlog_level_prefix) == 0) {
+            auto levels_string = arg.substr(spdlog_level_prefix.size());
+            helpers::load_levels(levels_string);
+        }
+    }
+}
+
+inline void load_argv_levels(int argc, char **argv) {
+    load_argv_levels(argc, const_cast<const char **>(argv));
+}
+
+}  // namespace cfg
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/cfg/env.h b/csrc/vnpu_offload/include/spdlog/cfg/env.h
new file mode 100644
index 00000000..47bf61c7
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/cfg/env.h
@@ -0,0 +1,36 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+#include <spdlog/cfg/helpers.h>
+#include <spdlog/details/os.h>
+#include <spdlog/details/registry.h>
+
+//
+// Init levels and patterns from env variables SPDLOG_LEVEL
+// Inspired from Rust's "env_logger" crate (https://crates.io/crates/env_logger).
+// Note - fallback to "info" level on unrecognized levels
+//
+// Examples:
+//
+// set global level to debug:
+// export SPDLOG_LEVEL=debug
+//
+// turn off all logging except for logger1:
+// export SPDLOG_LEVEL="*=off,logger1=debug"
+//
+
+// turn off all logging except for logger1 and logger2:
+// export SPDLOG_LEVEL="off,logger1=debug,logger2=info"
+
+namespace spdlog {
+namespace cfg {
+inline void load_env_levels(const char* var = "SPDLOG_LEVEL") {
+    auto env_val = details::os::getenv(var);
+    if (!env_val.empty()) {
+        helpers::load_levels(env_val);
+    }
+}
+
+}  // namespace cfg
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/cfg/helpers-inl.h b/csrc/vnpu_offload/include/spdlog/cfg/helpers-inl.h
new file mode 100644
index 00000000..6ed86955
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/cfg/helpers-inl.h
@@ -0,0 +1,106 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifndef SPDLOG_HEADER_ONLY
+#include <spdlog/cfg/helpers.h>
+#endif
+
+#include <spdlog/details/os.h>
+#include <spdlog/details/registry.h>
+
+#include <algorithm>
+#include <sstream>
+#include <string>
+#include <utility>
+
+namespace spdlog {
+namespace cfg {
+namespace helpers {
+
+// inplace convert to lowercase
+inline std::string &to_lower_(std::string &str) {
+    std::transform(str.begin(), str.end(), str.begin(), [](char ch) {
+        return static_cast<char>((ch >= 'A' && ch <= 'Z') ? ch + ('a' - 'A') : ch);
+    });
+    return str;
+}
+
+// inplace trim spaces
+inline std::string &trim_(std::string &str) {
+    const char *spaces = " \n\r\t";
+    str.erase(str.find_last_not_of(spaces) + 1);
+    str.erase(0, str.find_first_not_of(spaces));
+    return str;
+}
+
+// return (name,value) trimmed pair from the given "name = value" string.
+// return empty string on missing parts
+// "key=val" => ("key", "val")
+// " key  =  val " => ("key", "val")
+// "key=" => ("key", "")
+// "val" => ("", "val")
+
+inline std::pair<std::string, std::string> extract_kv_(char sep, const std::string &str) {
+    auto n = str.find(sep);
+    std::string k, v;
+    if (n == std::string::npos) {
+        v = str;
+    } else {
+        k = str.substr(0, n);
+        v = str.substr(n + 1);
+    }
+    return std::make_pair(trim_(k), trim_(v));
+}
+
+// return vector of key/value pairs from a sequence of "K1=V1,K2=V2,.."
+// "a=AAA,b=BBB,c=CCC,.." => {("a","AAA"),("b","BBB"),("c", "CCC"),...}
+inline std::unordered_map<std::string, std::string> extract_key_vals_(const std::string &str) {
+    std::string token;
+    std::istringstream token_stream(str);
+    std::unordered_map<std::string, std::string> rv{};
+    while (std::getline(token_stream, token, ',')) {
+        if (token.empty()) {
+            continue;
+        }
+        auto kv = extract_kv_('=', token);
+        rv[kv.first] = kv.second;
+    }
+    return rv;
+}
+
+SPDLOG_INLINE void load_levels(const std::string &input) {
+    if (input.empty() || input.size() >= 32768) {
+        return;
+    }
+
+    auto key_vals = extract_key_vals_(input);
+    std::unordered_map<std::string, level::level_enum> levels;
+    level::level_enum global_level = level::info;
+    bool global_level_found = false;
+
+    for (auto &name_level : key_vals) {
+        const auto &logger_name = name_level.first;
+        const auto &level_name = to_lower_(name_level.second);
+        auto level = level::from_str(level_name);
+        // ignore unrecognized level names
+        if (level == level::off && level_name != "off") {
+            continue;
+        }
+        if (logger_name.empty())  // no logger name indicates global level
+        {
+            global_level_found = true;
+            global_level = level;
+        } else {
+            levels[logger_name] = level;
+        }
+    }
+
+    details::registry::instance().set_levels(std::move(levels),
+                                             global_level_found ? &global_level : nullptr);
+}
+
+}  // namespace helpers
+}  // namespace cfg
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/cfg/helpers.h b/csrc/vnpu_offload/include/spdlog/cfg/helpers.h
new file mode 100644
index 00000000..d09a1e97
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/cfg/helpers.h
@@ -0,0 +1,29 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include <spdlog/common.h>
+#include <unordered_map>
+
+namespace spdlog {
+namespace cfg {
+namespace helpers {
+//
+// Init levels from given string
+//
+// Examples:
+//
+// set global level to debug: "debug"
+// turn off all logging except for logger1: "off,logger1=debug"
+// turn off all logging except for logger1 and logger2: "off,logger1=debug,logger2=info"
+//
+SPDLOG_API void load_levels(const std::string &txt);
+}  // namespace helpers
+
+}  // namespace cfg
+}  // namespace spdlog
+
+#ifdef SPDLOG_HEADER_ONLY
+#include "helpers-inl.h"
+#endif  // SPDLOG_HEADER_ONLY
diff --git a/csrc/vnpu_offload/include/spdlog/common-inl.h b/csrc/vnpu_offload/include/spdlog/common-inl.h
new file mode 100644
index 00000000..f35901c5
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/common-inl.h
@@ -0,0 +1,68 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifndef SPDLOG_HEADER_ONLY
+#include <spdlog/common.h>
+#endif
+
+#include <algorithm>
+#include <iterator>
+
+namespace spdlog {
+namespace level {
+
+#if __cplusplus >= 201703L
+constexpr
+#endif
+    static string_view_t level_string_views[] SPDLOG_LEVEL_NAMES;
+
+static const char *short_level_names[] SPDLOG_SHORT_LEVEL_NAMES;
+
+SPDLOG_INLINE const string_view_t &to_string_view(spdlog::level::level_enum l) SPDLOG_NOEXCEPT {
+    return level_string_views[l];
+}
+
+SPDLOG_INLINE const char *to_short_c_str(spdlog::level::level_enum l) SPDLOG_NOEXCEPT {
+    return short_level_names[l];
+}
+
+SPDLOG_INLINE spdlog::level::level_enum from_str(const std::string &name) SPDLOG_NOEXCEPT {
+    auto it = std::find(std::begin(level_string_views), std::end(level_string_views), name);
+    if (it != std::end(level_string_views))
+        return static_cast<level::level_enum>(std::distance(std::begin(level_string_views), it));
+
+    // check also for "warn" and "err" before giving up..
+    if (name == "warn") {
+        return level::warn;
+    }
+    if (name == "err") {
+        return level::err;
+    }
+    return level::off;
+}
+}  // namespace level
+
+SPDLOG_INLINE spdlog_ex::spdlog_ex(std::string msg)
+    : msg_(std::move(msg)) {}
+
+SPDLOG_INLINE spdlog_ex::spdlog_ex(const std::string &msg, int last_errno) {
+#ifdef SPDLOG_USE_STD_FORMAT
+    msg_ = std::system_error(std::error_code(last_errno, std::generic_category()), msg).what();
+#else
+    memory_buf_t outbuf;
+    fmt::format_system_error(outbuf, last_errno, msg.c_str());
+    msg_ = fmt::to_string(outbuf);
+#endif
+}
+
+SPDLOG_INLINE const char *spdlog_ex::what() const SPDLOG_NOEXCEPT { return msg_.c_str(); }
+
+SPDLOG_INLINE void throw_spdlog_ex(const std::string &msg, int last_errno) {
+    SPDLOG_THROW(spdlog_ex(msg, last_errno));
+}
+
+SPDLOG_INLINE void throw_spdlog_ex(std::string msg) { SPDLOG_THROW(spdlog_ex(std::move(msg))); }
+
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/common.h b/csrc/vnpu_offload/include/spdlog/common.h
new file mode 100644
index 00000000..20dbca45
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/common.h
@@ -0,0 +1,406 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include <spdlog/details/null_mutex.h>
+#include <spdlog/tweakme.h>
+
+#include <atomic>
+#include <chrono>
+#include <cstdio>
+#include <exception>
+#include <functional>
+#include <initializer_list>
+#include <memory>
+#include <string>
+#include <type_traits>
+
+#ifdef SPDLOG_USE_STD_FORMAT
+#include <version>
+#if __cpp_lib_format >= 202207L
+#include <format>
+#else
+#include <string_view>
+#endif
+#endif
+
+#ifdef SPDLOG_COMPILED_LIB
+#undef SPDLOG_HEADER_ONLY
+#if defined(SPDLOG_SHARED_LIB)
+#if defined(_WIN32)
+#ifdef spdlog_EXPORTS
+#define SPDLOG_API __declspec(dllexport)
+#else  // !spdlog_EXPORTS
+#define SPDLOG_API __declspec(dllimport)
+#endif
+#else  // !defined(_WIN32)
+#define SPDLOG_API __attribute__((visibility("default")))
+#endif
+#else  // !defined(SPDLOG_SHARED_LIB)
+#define SPDLOG_API
+#endif
+#define SPDLOG_INLINE
+#else  // !defined(SPDLOG_COMPILED_LIB)
+#define SPDLOG_API
+#define SPDLOG_HEADER_ONLY
+#define SPDLOG_INLINE inline
+#endif  // #ifdef SPDLOG_COMPILED_LIB
+
+#include <spdlog/fmt/fmt.h>
+
+#if !defined(SPDLOG_USE_STD_FORMAT) && \
+    FMT_VERSION >= 80000  // backward compatibility with fmt versions older than 8
+#define SPDLOG_FMT_RUNTIME(format_string) fmt::runtime(format_string)
+#define SPDLOG_FMT_STRING(format_string) FMT_STRING(format_string)
+#if defined(SPDLOG_WCHAR_FILENAMES) || defined(SPDLOG_WCHAR_TO_UTF8_SUPPORT)
+#include <spdlog/fmt/xchar.h>
+#endif
+#else
+#define SPDLOG_FMT_RUNTIME(format_string) format_string
+#define SPDLOG_FMT_STRING(format_string) format_string
+#endif
+
+// visual studio up to 2013 does not support noexcept nor constexpr
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+#define SPDLOG_NOEXCEPT _NOEXCEPT
+#define SPDLOG_CONSTEXPR
+#else
+#define SPDLOG_NOEXCEPT noexcept
+#define SPDLOG_CONSTEXPR constexpr
+#endif
+
+// If building with std::format, can just use constexpr, otherwise if building with fmt
+// SPDLOG_CONSTEXPR_FUNC needs to be set the same as FMT_CONSTEXPR to avoid situations where
+// a constexpr function in spdlog could end up calling a non-constexpr function in fmt
+// depending on the compiler
+// If fmt determines it can't use constexpr, we should inline the function instead
+#ifdef SPDLOG_USE_STD_FORMAT
+#define SPDLOG_CONSTEXPR_FUNC constexpr
+#else  // Being built with fmt
+#if FMT_USE_CONSTEXPR
+#define SPDLOG_CONSTEXPR_FUNC FMT_CONSTEXPR
+#else
+#define SPDLOG_CONSTEXPR_FUNC inline
+#endif
+#endif
+
+#if defined(__GNUC__) || defined(__clang__)
+#define SPDLOG_DEPRECATED __attribute__((deprecated))
+#elif defined(_MSC_VER)
+#define SPDLOG_DEPRECATED __declspec(deprecated)
+#else
+#define SPDLOG_DEPRECATED
+#endif
+
+// disable thread local on msvc 2013
+#ifndef SPDLOG_NO_TLS
+#if (defined(_MSC_VER) && (_MSC_VER < 1900)) || defined(__cplusplus_winrt)
+#define SPDLOG_NO_TLS 1
+#endif
+#endif
+
+#ifndef SPDLOG_FUNCTION
+#define SPDLOG_FUNCTION static_cast<const char *>(__FUNCTION__)
+#endif
+
+#ifdef SPDLOG_NO_EXCEPTIONS
+#define SPDLOG_TRY
+#define SPDLOG_THROW(ex)                               \
+    do {                                               \
+        printf("spdlog fatal error: %s\n", ex.what()); \
+        std::abort();                                  \
+    } while (0)
+#define SPDLOG_CATCH_STD
+#else
+#define SPDLOG_TRY try
+#define SPDLOG_THROW(ex) throw(ex)
+#define SPDLOG_CATCH_STD             \
+    catch (const std::exception &) { \
+    }
+#endif
+
+namespace spdlog {
+
+class formatter;
+
+namespace sinks {
+class sink;
+}
+
+#if defined(_WIN32) && defined(SPDLOG_WCHAR_FILENAMES)
+using filename_t = std::wstring;
+// allow macro expansion to occur in SPDLOG_FILENAME_T
+#define SPDLOG_FILENAME_T_INNER(s) L##s
+#define SPDLOG_FILENAME_T(s) SPDLOG_FILENAME_T_INNER(s)
+#else
+using filename_t = std::string;
+#define SPDLOG_FILENAME_T(s) s
+#endif
+
+using log_clock = std::chrono::system_clock;
+using sink_ptr = std::shared_ptr<sinks::sink>;
+using sinks_init_list = std::initializer_list<sink_ptr>;
+using err_handler = std::function<void(const std::string &err_msg)>;
+#ifdef SPDLOG_USE_STD_FORMAT
+namespace fmt_lib = std;
+
+using string_view_t = std::string_view;
+using memory_buf_t = std::string;
+
+template <typename... Args>
+#if __cpp_lib_format >= 202207L
+using format_string_t = std::format_string<Args...>;
+#else
+using format_string_t = std::string_view;
+#endif
+
+template <class T, class Char = char>
+struct is_convertible_to_basic_format_string
+    : std::integral_constant<bool, std::is_convertible<T, std::basic_string_view<Char>>::value> {};
+
+#if defined(SPDLOG_WCHAR_FILENAMES) || defined(SPDLOG_WCHAR_TO_UTF8_SUPPORT)
+using wstring_view_t = std::wstring_view;
+using wmemory_buf_t = std::wstring;
+
+template <typename... Args>
+#if __cpp_lib_format >= 202207L
+using wformat_string_t = std::wformat_string<Args...>;
+#else
+using wformat_string_t = std::wstring_view;
+#endif
+#endif
+#define SPDLOG_BUF_TO_STRING(x) x
+#else  // use fmt lib instead of std::format
+namespace fmt_lib = fmt;
+
+using string_view_t = fmt::basic_string_view<char>;
+using memory_buf_t = fmt::basic_memory_buffer<char, 250>;
+
+template <typename... Args>
+using format_string_t = fmt::format_string<Args...>;
+
+template <class T>
+using remove_cvref_t = typename std::remove_cv<typename std::remove_reference<T>::type>::type;
+
+template <typename Char>
+#if FMT_VERSION >= 90101
+using fmt_runtime_string = fmt::runtime_format_string<Char>;
+#else
+using fmt_runtime_string = fmt::basic_runtime<Char>;
+#endif
+
+// clang doesn't like SFINAE disabled constructor in std::is_convertible<> so have to repeat the
+// condition from basic_format_string here, in addition, fmt::basic_runtime<Char> is only
+// convertible to basic_format_string<Char> but not basic_string_view<Char>
+template <class T, class Char = char>
+struct is_convertible_to_basic_format_string
+    : std::integral_constant<bool,
+                             std::is_convertible<T, fmt::basic_string_view<Char>>::value ||
+                                 std::is_same<remove_cvref_t<T>, fmt_runtime_string<Char>>::value> {
+};
+
+#if defined(SPDLOG_WCHAR_FILENAMES) || defined(SPDLOG_WCHAR_TO_UTF8_SUPPORT)
+using wstring_view_t = fmt::basic_string_view<wchar_t>;
+using wmemory_buf_t = fmt::basic_memory_buffer<wchar_t, 250>;
+
+template <typename... Args>
+using wformat_string_t = fmt::wformat_string<Args...>;
+#endif
+#define SPDLOG_BUF_TO_STRING(x) fmt::to_string(x)
+#endif
+
+#ifdef SPDLOG_WCHAR_TO_UTF8_SUPPORT
+#ifndef _WIN32
+#error SPDLOG_WCHAR_TO_UTF8_SUPPORT only supported on windows
+#endif  // _WIN32
+#endif  // SPDLOG_WCHAR_TO_UTF8_SUPPORT
+
+template <class T>
+struct is_convertible_to_any_format_string
+    : std::integral_constant<bool,
+                             is_convertible_to_basic_format_string<T, char>::value ||
+                                 is_convertible_to_basic_format_string<T, wchar_t>::value> {};
+
+#if defined(SPDLOG_NO_ATOMIC_LEVELS)
+using level_t = details::null_atomic_int;
+#else
+using level_t = std::atomic<int>;
+#endif
+
+#define SPDLOG_LEVEL_TRACE 0
+#define SPDLOG_LEVEL_DEBUG 1
+#define SPDLOG_LEVEL_INFO 2
+#define SPDLOG_LEVEL_WARN 3
+#define SPDLOG_LEVEL_ERROR 4
+#define SPDLOG_LEVEL_CRITICAL 5
+#define SPDLOG_LEVEL_OFF 6
+
+#if !defined(SPDLOG_ACTIVE_LEVEL)
+#define SPDLOG_ACTIVE_LEVEL SPDLOG_LEVEL_INFO
+#endif
+
+// Log level enum
+namespace level {
+enum level_enum : int {
+    trace = SPDLOG_LEVEL_TRACE,
+    debug = SPDLOG_LEVEL_DEBUG,
+    info = SPDLOG_LEVEL_INFO,
+    warn = SPDLOG_LEVEL_WARN,
+    err = SPDLOG_LEVEL_ERROR,
+    critical = SPDLOG_LEVEL_CRITICAL,
+    off = SPDLOG_LEVEL_OFF,
+    n_levels
+};
+
+#define SPDLOG_LEVEL_NAME_TRACE spdlog::string_view_t("trace", 5)
+#define SPDLOG_LEVEL_NAME_DEBUG spdlog::string_view_t("debug", 5)
+#define SPDLOG_LEVEL_NAME_INFO spdlog::string_view_t("info", 4)
+#define SPDLOG_LEVEL_NAME_WARNING spdlog::string_view_t("warning", 7)
+#define SPDLOG_LEVEL_NAME_ERROR spdlog::string_view_t("error", 5)
+#define SPDLOG_LEVEL_NAME_CRITICAL spdlog::string_view_t("critical", 8)
+#define SPDLOG_LEVEL_NAME_OFF spdlog::string_view_t("off", 3)
+
+#if !defined(SPDLOG_LEVEL_NAMES)
+#define SPDLOG_LEVEL_NAMES                                                                  \
+    {                                                                                       \
+        SPDLOG_LEVEL_NAME_TRACE, SPDLOG_LEVEL_NAME_DEBUG, SPDLOG_LEVEL_NAME_INFO,           \
+            SPDLOG_LEVEL_NAME_WARNING, SPDLOG_LEVEL_NAME_ERROR, SPDLOG_LEVEL_NAME_CRITICAL, \
+            SPDLOG_LEVEL_NAME_OFF                                                           \
+    }
+#endif
+
+#if !defined(SPDLOG_SHORT_LEVEL_NAMES)
+
+#define SPDLOG_SHORT_LEVEL_NAMES \
+    { "T", "D", "I", "W", "E", "C", "O" }
+#endif
+
+SPDLOG_API const string_view_t &to_string_view(spdlog::level::level_enum l) SPDLOG_NOEXCEPT;
+SPDLOG_API const char *to_short_c_str(spdlog::level::level_enum l) SPDLOG_NOEXCEPT;
+SPDLOG_API spdlog::level::level_enum from_str(const std::string &name) SPDLOG_NOEXCEPT;
+
+}  // namespace level
+
+//
+// Color mode used by sinks with color support.
+//
+enum class color_mode { always, automatic, never };
+
+//
+// Pattern time - specific time getting to use for pattern_formatter.
+// local time by default
+//
+enum class pattern_time_type {
+    local,  // log localtime
+    utc     // log utc
+};
+
+//
+// Log exception
+//
+class SPDLOG_API spdlog_ex : public std::exception {
+public:
+    explicit spdlog_ex(std::string msg);
+    spdlog_ex(const std::string &msg, int last_errno);
+    const char *what() const SPDLOG_NOEXCEPT override;
+
+private:
+    std::string msg_;
+};
+
+[[noreturn]] SPDLOG_API void throw_spdlog_ex(const std::string &msg, int last_errno);
+[[noreturn]] SPDLOG_API void throw_spdlog_ex(std::string msg);
+
+struct source_loc {
+    SPDLOG_CONSTEXPR source_loc() = default;
+    SPDLOG_CONSTEXPR source_loc(const char *filename_in, int line_in, const char *funcname_in)
+        : filename{filename_in},
+          line{line_in},
+          funcname{funcname_in} {}
+
+    SPDLOG_CONSTEXPR bool empty() const SPDLOG_NOEXCEPT { return line <= 0; }
+    const char *filename{nullptr};
+    int line{0};
+    const char *funcname{nullptr};
+};
+
+struct file_event_handlers {
+    file_event_handlers()
+        : before_open(nullptr),
+          after_open(nullptr),
+          before_close(nullptr),
+          after_close(nullptr) {}
+
+    std::function<void(const filename_t &filename)> before_open;
+    std::function<void(const filename_t &filename, std::FILE *file_stream)> after_open;
+    std::function<void(const filename_t &filename, std::FILE *file_stream)> before_close;
+    std::function<void(const filename_t &filename)> after_close;
+};
+
+namespace details {
+
+// to_string_view
+
+SPDLOG_CONSTEXPR_FUNC spdlog::string_view_t to_string_view(const memory_buf_t &buf)
+    SPDLOG_NOEXCEPT {
+    return spdlog::string_view_t{buf.data(), buf.size()};
+}
+
+SPDLOG_CONSTEXPR_FUNC spdlog::string_view_t to_string_view(spdlog::string_view_t str)
+    SPDLOG_NOEXCEPT {
+    return str;
+}
+
+#if defined(SPDLOG_WCHAR_FILENAMES) || defined(SPDLOG_WCHAR_TO_UTF8_SUPPORT)
+SPDLOG_CONSTEXPR_FUNC spdlog::wstring_view_t to_string_view(const wmemory_buf_t &buf)
+    SPDLOG_NOEXCEPT {
+    return spdlog::wstring_view_t{buf.data(), buf.size()};
+}
+
+SPDLOG_CONSTEXPR_FUNC spdlog::wstring_view_t to_string_view(spdlog::wstring_view_t str)
+    SPDLOG_NOEXCEPT {
+    return str;
+}
+#endif
+
+#if defined(SPDLOG_USE_STD_FORMAT) && __cpp_lib_format >= 202207L
+template <typename T, typename... Args>
+SPDLOG_CONSTEXPR_FUNC std::basic_string_view<T> to_string_view(
+    std::basic_format_string<T, Args...> fmt) SPDLOG_NOEXCEPT {
+    return fmt.get();
+}
+#endif
+
+// make_unique support for pre c++14
+#if __cplusplus >= 201402L  // C++14 and beyond
+using std::enable_if_t;
+using std::make_unique;
+#else
+template <bool B, class T = void>
+using enable_if_t = typename std::enable_if<B, T>::type;
+
+template <typename T, typename... Args>
+std::unique_ptr<T> make_unique(Args &&...args) {
+    static_assert(!std::is_array<T>::value, "arrays not supported");
+    return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
+}
+#endif
+
+// to avoid useless casts (see https://github.com/nlohmann/json/issues/2893#issuecomment-889152324)
+template <typename T, typename U, enable_if_t<!std::is_same<T, U>::value, int> = 0>
+constexpr T conditional_static_cast(U value) {
+    return static_cast<T>(value);
+}
+
+template <typename T, typename U, enable_if_t<std::is_same<T, U>::value, int> = 0>
+constexpr T conditional_static_cast(U value) {
+    return value;
+}
+
+}  // namespace details
+}  // namespace spdlog
+
+#ifdef SPDLOG_HEADER_ONLY
+#include "common-inl.h"
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/details/backtracer-inl.h b/csrc/vnpu_offload/include/spdlog/details/backtracer-inl.h
new file mode 100644
index 00000000..baa06b6a
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/backtracer-inl.h
@@ -0,0 +1,63 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifndef SPDLOG_HEADER_ONLY
+#include <spdlog/details/backtracer.h>
+#endif
+namespace spdlog {
+namespace details {
+SPDLOG_INLINE backtracer::backtracer(const backtracer &other) {
+    std::lock_guard<std::mutex> lock(other.mutex_);
+    enabled_ = other.enabled();
+    messages_ = other.messages_;
+}
+
+SPDLOG_INLINE backtracer::backtracer(backtracer &&other) SPDLOG_NOEXCEPT {
+    std::lock_guard<std::mutex> lock(other.mutex_);
+    enabled_ = other.enabled();
+    messages_ = std::move(other.messages_);
+}
+
+SPDLOG_INLINE backtracer &backtracer::operator=(backtracer other) {
+    std::lock_guard<std::mutex> lock(mutex_);
+    enabled_ = other.enabled();
+    messages_ = std::move(other.messages_);
+    return *this;
+}
+
+SPDLOG_INLINE void backtracer::enable(size_t size) {
+    std::lock_guard<std::mutex> lock{mutex_};
+    enabled_.store(true, std::memory_order_relaxed);
+    messages_ = circular_q<log_msg_buffer>{size};
+}
+
+SPDLOG_INLINE void backtracer::disable() {
+    std::lock_guard<std::mutex> lock{mutex_};
+    enabled_.store(false, std::memory_order_relaxed);
+}
+
+SPDLOG_INLINE bool backtracer::enabled() const { return enabled_.load(std::memory_order_relaxed); }
+
+SPDLOG_INLINE void backtracer::push_back(const log_msg &msg) {
+    std::lock_guard<std::mutex> lock{mutex_};
+    messages_.push_back(log_msg_buffer{msg});
+}
+
+SPDLOG_INLINE bool backtracer::empty() const {
+    std::lock_guard<std::mutex> lock{mutex_};
+    return messages_.empty();
+}
+
+// pop all items in the q and apply the given fun on each of them.
+SPDLOG_INLINE void backtracer::foreach_pop(std::function<void(const details::log_msg &)> fun) {
+    std::lock_guard<std::mutex> lock{mutex_};
+    while (!messages_.empty()) {
+        auto &front_msg = messages_.front();
+        fun(front_msg);
+        messages_.pop_front();
+    }
+}
+}  // namespace details
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/details/backtracer.h b/csrc/vnpu_offload/include/spdlog/details/backtracer.h
new file mode 100644
index 00000000..f9eb4b43
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/backtracer.h
@@ -0,0 +1,45 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include <spdlog/details/circular_q.h>
+#include <spdlog/details/log_msg_buffer.h>
+
+#include <atomic>
+#include <functional>
+#include <mutex>
+
+// Store log messages in circular buffer.
+// Useful for storing debug data in case of error/warning happens.
+
+namespace spdlog {
+namespace details {
+class SPDLOG_API backtracer {
+    mutable std::mutex mutex_;
+    std::atomic<bool> enabled_{false};
+    circular_q<log_msg_buffer> messages_;
+
+public:
+    backtracer() = default;
+    backtracer(const backtracer &other);
+
+    backtracer(backtracer &&other) SPDLOG_NOEXCEPT;
+    backtracer &operator=(backtracer other);
+
+    void enable(size_t size);
+    void disable();
+    bool enabled() const;
+    void push_back(const log_msg &msg);
+    bool empty() const;
+
+    // pop all items in the q and apply the given fun on each of them.
+    void foreach_pop(std::function<void(const details::log_msg &)> fun);
+};
+
+}  // namespace details
+}  // namespace spdlog
+
+#ifdef SPDLOG_HEADER_ONLY
+#include "backtracer-inl.h"
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/details/circular_q.h b/csrc/vnpu_offload/include/spdlog/details/circular_q.h
new file mode 100644
index 00000000..29e9d255
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/circular_q.h
@@ -0,0 +1,115 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+// circular q view of std::vector.
+#pragma once
+
+#include <cassert>
+#include <vector>
+
+#include "spdlog/common.h"
+
+namespace spdlog {
+namespace details {
+template <typename T>
+class circular_q {
+    size_t max_items_ = 0;
+    typename std::vector<T>::size_type head_ = 0;
+    typename std::vector<T>::size_type tail_ = 0;
+    size_t overrun_counter_ = 0;
+    std::vector<T> v_;
+
+public:
+    using value_type = T;
+
+    // empty ctor - create a disabled queue with no elements allocated at all
+    circular_q() = default;
+
+    explicit circular_q(size_t max_items)
+        : max_items_(max_items + 1)  // one item is reserved as marker for full q
+          ,
+          v_(max_items_) {}
+
+    circular_q(const circular_q &) = default;
+    circular_q &operator=(const circular_q &) = default;
+
+    // move cannot be default,
+    // since we need to reset head_, tail_, etc to zero in the moved object
+    circular_q(circular_q &&other) SPDLOG_NOEXCEPT { copy_moveable(std::move(other)); }
+
+    circular_q &operator=(circular_q &&other) SPDLOG_NOEXCEPT {
+        copy_moveable(std::move(other));
+        return *this;
+    }
+
+    // push back, overrun (oldest) item if no room left
+    void push_back(T &&item) {
+        if (max_items_ > 0) {
+            v_[tail_] = std::move(item);
+            tail_ = (tail_ + 1) % max_items_;
+
+            if (tail_ == head_)  // overrun last item if full
+            {
+                head_ = (head_ + 1) % max_items_;
+                ++overrun_counter_;
+            }
+        }
+    }
+
+    // Return reference to the front item.
+    // If there are no elements in the container, the behavior is undefined.
+    const T &front() const { return v_[head_]; }
+
+    T &front() { return v_[head_]; }
+
+    // Return number of elements actually stored
+    size_t size() const {
+        if (tail_ >= head_) {
+            return tail_ - head_;
+        } else {
+            return max_items_ - (head_ - tail_);
+        }
+    }
+
+    // Return const reference to item by index.
+    // If index is out of range 0…size()-1, the behavior is undefined.
+    const T &at(size_t i) const {
+        assert(i < size());
+        return v_[(head_ + i) % max_items_];
+    }
+
+    // Pop item from front.
+    // If there are no elements in the container, the behavior is undefined.
+    void pop_front() { head_ = (head_ + 1) % max_items_; }
+
+    bool empty() const { return tail_ == head_; }
+
+    bool full() const {
+        // head is ahead of the tail by 1
+        if (max_items_ > 0) {
+            return ((tail_ + 1) % max_items_) == head_;
+        }
+        return false;
+    }
+
+    size_t overrun_counter() const { return overrun_counter_; }
+
+    void reset_overrun_counter() { overrun_counter_ = 0; }
+
+private:
+    // copy from other&& and reset it to disabled state
+    void copy_moveable(circular_q &&other) SPDLOG_NOEXCEPT {
+        max_items_ = other.max_items_;
+        head_ = other.head_;
+        tail_ = other.tail_;
+        overrun_counter_ = other.overrun_counter_;
+        v_ = std::move(other.v_);
+
+        // put &&other in disabled, but valid state
+        other.max_items_ = 0;
+        other.head_ = other.tail_ = 0;
+        other.overrun_counter_ = 0;
+    }
+};
+}  // namespace details
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/details/console_globals.h b/csrc/vnpu_offload/include/spdlog/details/console_globals.h
new file mode 100644
index 00000000..9c552106
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/console_globals.h
@@ -0,0 +1,28 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include <mutex>
+#include <spdlog/details/null_mutex.h>
+
+namespace spdlog {
+namespace details {
+
+struct console_mutex {
+    using mutex_t = std::mutex;
+    static mutex_t &mutex() {
+        static mutex_t s_mutex;
+        return s_mutex;
+    }
+};
+
+struct console_nullmutex {
+    using mutex_t = null_mutex;
+    static mutex_t &mutex() {
+        static mutex_t s_mutex;
+        return s_mutex;
+    }
+};
+}  // namespace details
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/details/file_helper-inl.h b/csrc/vnpu_offload/include/spdlog/details/file_helper-inl.h
new file mode 100644
index 00000000..c7260ec3
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/file_helper-inl.h
@@ -0,0 +1,151 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifndef SPDLOG_HEADER_ONLY
+#include <spdlog/details/file_helper.h>
+#endif
+
+#include <spdlog/common.h>
+#include <spdlog/details/os.h>
+
+#include <cerrno>
+#include <cstdio>
+#include <string>
+#include <tuple>
+
+namespace spdlog {
+namespace details {
+
+SPDLOG_INLINE file_helper::file_helper(const file_event_handlers &event_handlers)
+    : event_handlers_(event_handlers) {}
+
+SPDLOG_INLINE file_helper::~file_helper() { close(); }
+
+SPDLOG_INLINE void file_helper::open(const filename_t &fname, bool truncate) {
+    close();
+    filename_ = fname;
+
+    auto *mode = SPDLOG_FILENAME_T("ab");
+    auto *trunc_mode = SPDLOG_FILENAME_T("wb");
+
+    if (event_handlers_.before_open) {
+        event_handlers_.before_open(filename_);
+    }
+    for (int tries = 0; tries < open_tries_; ++tries) {
+        // create containing folder if not exists already.
+        os::create_dir(os::dir_name(fname));
+        if (truncate) {
+            // Truncate by opening-and-closing a tmp file in "wb" mode, always
+            // opening the actual log-we-write-to in "ab" mode, since that
+            // interacts more politely with eternal processes that might
+            // rotate/truncate the file underneath us.
+            std::FILE *tmp;
+            if (os::fopen_s(&tmp, fname, trunc_mode)) {
+                continue;
+            }
+            std::fclose(tmp);
+        }
+        if (!os::fopen_s(&fd_, fname, mode)) {
+            if (event_handlers_.after_open) {
+                event_handlers_.after_open(filename_, fd_);
+            }
+            return;
+        }
+
+        details::os::sleep_for_millis(open_interval_);
+    }
+
+    throw_spdlog_ex("Failed opening file " + os::filename_to_str(filename_) + " for writing",
+                    errno);
+}
+
+SPDLOG_INLINE void file_helper::reopen(bool truncate) {
+    if (filename_.empty()) {
+        throw_spdlog_ex("Failed re opening file - was not opened before");
+    }
+    this->open(filename_, truncate);
+}
+
+SPDLOG_INLINE void file_helper::flush() {
+    if (std::fflush(fd_) != 0) {
+        throw_spdlog_ex("Failed flush to file " + os::filename_to_str(filename_), errno);
+    }
+}
+
+SPDLOG_INLINE void file_helper::sync() {
+    if (!os::fsync(fd_)) {
+        throw_spdlog_ex("Failed to fsync file " + os::filename_to_str(filename_), errno);
+    }
+}
+
+SPDLOG_INLINE void file_helper::close() {
+    if (fd_ != nullptr) {
+        if (event_handlers_.before_close) {
+            event_handlers_.before_close(filename_, fd_);
+        }
+
+        std::fclose(fd_);
+        fd_ = nullptr;
+
+        if (event_handlers_.after_close) {
+            event_handlers_.after_close(filename_);
+        }
+    }
+}
+
+SPDLOG_INLINE void file_helper::write(const memory_buf_t &buf) {
+    if (fd_ == nullptr) return;
+    size_t msg_size = buf.size();
+    auto data = buf.data();
+
+    if (!details::os::fwrite_bytes(data, msg_size, fd_)) {
+        throw_spdlog_ex("Failed writing to file " + os::filename_to_str(filename_), errno);
+    }
+}
+
+SPDLOG_INLINE size_t file_helper::size() const {
+    if (fd_ == nullptr) {
+        throw_spdlog_ex("Cannot use size() on closed file " + os::filename_to_str(filename_));
+    }
+    return os::filesize(fd_);
+}
+
+SPDLOG_INLINE const filename_t &file_helper::filename() const { return filename_; }
+
+//
+// return file path and its extension:
+//
+// "mylog.txt" => ("mylog", ".txt")
+// "mylog" => ("mylog", "")
+// "mylog." => ("mylog.", "")
+// "/dir1/dir2/mylog.txt" => ("/dir1/dir2/mylog", ".txt")
+//
+// the starting dot in filenames is ignored (hidden files):
+//
+// ".mylog" => (".mylog". "")
+// "my_folder/.mylog" => ("my_folder/.mylog", "")
+// "my_folder/.mylog.txt" => ("my_folder/.mylog", ".txt")
+SPDLOG_INLINE std::tuple<filename_t, filename_t> file_helper::split_by_extension(
+    const filename_t &fname) {
+    auto ext_index = fname.rfind('.');
+
+    // no valid extension found - return whole path and empty string as
+    // extension
+    if (ext_index == filename_t::npos || ext_index == 0 || ext_index == fname.size() - 1) {
+        return std::make_tuple(fname, filename_t());
+    }
+
+    // treat cases like "/etc/rc.d/somelogfile or "/abc/.hiddenfile"
+    auto folder_index = fname.find_last_of(details::os::folder_seps_filename);
+    if (folder_index != filename_t::npos && folder_index >= ext_index - 1) {
+        return std::make_tuple(fname, filename_t());
+    }
+
+    // finally - return a valid base and extension tuple
+    return std::make_tuple(fname.substr(0, ext_index), fname.substr(ext_index));
+}
+
+}  // namespace details
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/details/file_helper.h b/csrc/vnpu_offload/include/spdlog/details/file_helper.h
new file mode 100644
index 00000000..aba0be47
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/file_helper.h
@@ -0,0 +1,61 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include <spdlog/common.h>
+#include <tuple>
+
+namespace spdlog {
+namespace details {
+
+// Helper class for file sinks.
+// When failing to open a file, retry several times(5) with a delay interval(10 ms).
+// Throw spdlog_ex exception on errors.
+
+class SPDLOG_API file_helper {
+public:
+    file_helper() = default;
+    explicit file_helper(const file_event_handlers &event_handlers);
+
+    file_helper(const file_helper &) = delete;
+    file_helper &operator=(const file_helper &) = delete;
+    ~file_helper();
+
+    void open(const filename_t &fname, bool truncate = false);
+    void reopen(bool truncate);
+    void flush();
+    void sync();
+    void close();
+    void write(const memory_buf_t &buf);
+    size_t size() const;
+    const filename_t &filename() const;
+
+    //
+    // return file path and its extension:
+    //
+    // "mylog.txt" => ("mylog", ".txt")
+    // "mylog" => ("mylog", "")
+    // "mylog." => ("mylog.", "")
+    // "/dir1/dir2/mylog.txt" => ("/dir1/dir2/mylog", ".txt")
+    //
+    // the starting dot in filenames is ignored (hidden files):
+    //
+    // ".mylog" => (".mylog". "")
+    // "my_folder/.mylog" => ("my_folder/.mylog", "")
+    // "my_folder/.mylog.txt" => ("my_folder/.mylog", ".txt")
+    static std::tuple<filename_t, filename_t> split_by_extension(const filename_t &fname);
+
+private:
+    const int open_tries_ = 5;
+    const unsigned int open_interval_ = 10;
+    std::FILE *fd_{nullptr};
+    filename_t filename_;
+    file_event_handlers event_handlers_;
+};
+}  // namespace details
+}  // namespace spdlog
+
+#ifdef SPDLOG_HEADER_ONLY
+#include "file_helper-inl.h"
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/details/fmt_helper.h b/csrc/vnpu_offload/include/spdlog/details/fmt_helper.h
new file mode 100644
index 00000000..b629b894
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/fmt_helper.h
@@ -0,0 +1,141 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+#pragma once
+
+#include <chrono>
+#include <iterator>
+#include <spdlog/common.h>
+#include <spdlog/fmt/fmt.h>
+#include <type_traits>
+
+#ifdef SPDLOG_USE_STD_FORMAT
+#include <charconv>
+#include <limits>
+#endif
+
+// Some fmt helpers to efficiently format and pad ints and strings
+namespace spdlog {
+namespace details {
+namespace fmt_helper {
+
+inline void append_string_view(spdlog::string_view_t view, memory_buf_t &dest) {
+    auto *buf_ptr = view.data();
+    dest.append(buf_ptr, buf_ptr + view.size());
+}
+
+#ifdef SPDLOG_USE_STD_FORMAT
+template <typename T>
+inline void append_int(T n, memory_buf_t &dest) {
+    // Buffer should be large enough to hold all digits (digits10 + 1) and a sign
+    SPDLOG_CONSTEXPR const auto BUF_SIZE = std::numeric_limits<T>::digits10 + 2;
+    char buf[BUF_SIZE];
+
+    auto [ptr, ec] = std::to_chars(buf, buf + BUF_SIZE, n, 10);
+    if (ec == std::errc()) {
+        dest.append(buf, ptr);
+    } else {
+        throw_spdlog_ex("Failed to format int", static_cast<int>(ec));
+    }
+}
+#else
+template <typename T>
+inline void append_int(T n, memory_buf_t &dest) {
+    fmt::format_int i(n);
+    dest.append(i.data(), i.data() + i.size());
+}
+#endif
+
+template <typename T>
+SPDLOG_CONSTEXPR_FUNC unsigned int count_digits_fallback(T n) {
+    // taken from fmt: https://github.com/fmtlib/fmt/blob/8.0.1/include/fmt/format.h#L899-L912
+    unsigned int count = 1;
+    for (;;) {
+        // Integer division is slow so do it for a group of four digits instead
+        // of for every digit. The idea comes from the talk by Alexandrescu
+        // "Three Optimization Tips for C++". See speed-test for a comparison.
+        if (n < 10) return count;
+        if (n < 100) return count + 1;
+        if (n < 1000) return count + 2;
+        if (n < 10000) return count + 3;
+        n /= 10000u;
+        count += 4;
+    }
+}
+
+template <typename T>
+inline unsigned int count_digits(T n) {
+    using count_type =
+        typename std::conditional<(sizeof(T) > sizeof(uint32_t)), uint64_t, uint32_t>::type;
+#ifdef SPDLOG_USE_STD_FORMAT
+    return count_digits_fallback(static_cast<count_type>(n));
+#else
+    return static_cast<unsigned int>(fmt::
+// fmt 7.0.0 renamed the internal namespace to detail.
+// See: https://github.com/fmtlib/fmt/issues/1538
+#if FMT_VERSION < 70000
+                                         internal
+#else
+                                         detail
+#endif
+                                     ::count_digits(static_cast<count_type>(n)));
+#endif
+}
+
+inline void pad2(int n, memory_buf_t &dest) {
+    if (n >= 0 && n < 100)  // 0-99
+    {
+        dest.push_back(static_cast<char>('0' + n / 10));
+        dest.push_back(static_cast<char>('0' + n % 10));
+    } else  // unlikely, but just in case, let fmt deal with it
+    {
+        fmt_lib::format_to(std::back_inserter(dest), SPDLOG_FMT_STRING("{:02}"), n);
+    }
+}
+
+template <typename T>
+inline void pad_uint(T n, unsigned int width, memory_buf_t &dest) {
+    static_assert(std::is_unsigned<T>::value, "pad_uint must get unsigned T");
+    for (auto digits = count_digits(n); digits < width; digits++) {
+        dest.push_back('0');
+    }
+    append_int(n, dest);
+}
+
+template <typename T>
+inline void pad3(T n, memory_buf_t &dest) {
+    static_assert(std::is_unsigned<T>::value, "pad3 must get unsigned T");
+    if (n < 1000) {
+        dest.push_back(static_cast<char>(n / 100 + '0'));
+        n = n % 100;
+        dest.push_back(static_cast<char>((n / 10) + '0'));
+        dest.push_back(static_cast<char>((n % 10) + '0'));
+    } else {
+        append_int(n, dest);
+    }
+}
+
+template <typename T>
+inline void pad6(T n, memory_buf_t &dest) {
+    pad_uint(n, 6, dest);
+}
+
+template <typename T>
+inline void pad9(T n, memory_buf_t &dest) {
+    pad_uint(n, 9, dest);
+}
+
+// return fraction of a second of the given time_point.
+// e.g.
+// fraction<std::milliseconds>(tp) -> will return the millis part of the second
+template <typename ToDuration>
+inline ToDuration time_fraction(log_clock::time_point tp) {
+    using std::chrono::duration_cast;
+    using std::chrono::seconds;
+    auto duration = tp.time_since_epoch();
+    auto secs = duration_cast<seconds>(duration);
+    return duration_cast<ToDuration>(duration) - duration_cast<ToDuration>(secs);
+}
+
+}  // namespace fmt_helper
+}  // namespace details
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/details/log_msg-inl.h b/csrc/vnpu_offload/include/spdlog/details/log_msg-inl.h
new file mode 100644
index 00000000..3a23dcf1
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/log_msg-inl.h
@@ -0,0 +1,44 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifndef SPDLOG_HEADER_ONLY
+#include <spdlog/details/log_msg.h>
+#endif
+
+#include <spdlog/details/os.h>
+
+namespace spdlog {
+namespace details {
+
+SPDLOG_INLINE log_msg::log_msg(spdlog::log_clock::time_point log_time,
+                               spdlog::source_loc loc,
+                               string_view_t a_logger_name,
+                               spdlog::level::level_enum lvl,
+                               spdlog::string_view_t msg)
+    : logger_name(a_logger_name),
+      level(lvl),
+      time(log_time)
+#ifndef SPDLOG_NO_THREAD_ID
+      ,
+      thread_id(os::thread_id())
+#endif
+      ,
+      source(loc),
+      payload(msg) {
+}
+
+SPDLOG_INLINE log_msg::log_msg(spdlog::source_loc loc,
+                               string_view_t a_logger_name,
+                               spdlog::level::level_enum lvl,
+                               spdlog::string_view_t msg)
+    : log_msg(os::now(), loc, a_logger_name, lvl, msg) {}
+
+SPDLOG_INLINE log_msg::log_msg(string_view_t a_logger_name,
+                               spdlog::level::level_enum lvl,
+                               spdlog::string_view_t msg)
+    : log_msg(os::now(), source_loc{}, a_logger_name, lvl, msg) {}
+
+}  // namespace details
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/details/log_msg.h b/csrc/vnpu_offload/include/spdlog/details/log_msg.h
new file mode 100644
index 00000000..64b4bf6f
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/log_msg.h
@@ -0,0 +1,40 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include <spdlog/common.h>
+#include <string>
+
+namespace spdlog {
+namespace details {
+struct SPDLOG_API log_msg {
+    log_msg() = default;
+    log_msg(log_clock::time_point log_time,
+            source_loc loc,
+            string_view_t logger_name,
+            level::level_enum lvl,
+            string_view_t msg);
+    log_msg(source_loc loc, string_view_t logger_name, level::level_enum lvl, string_view_t msg);
+    log_msg(string_view_t logger_name, level::level_enum lvl, string_view_t msg);
+    log_msg(const log_msg &other) = default;
+    log_msg &operator=(const log_msg &other) = default;
+
+    string_view_t logger_name;
+    level::level_enum level{level::off};
+    log_clock::time_point time;
+    size_t thread_id{0};
+
+    // wrapping the formatted text with color (updated by pattern_formatter).
+    mutable size_t color_range_start{0};
+    mutable size_t color_range_end{0};
+
+    source_loc source;
+    string_view_t payload;
+};
+}  // namespace details
+}  // namespace spdlog
+
+#ifdef SPDLOG_HEADER_ONLY
+#include "log_msg-inl.h"
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/details/log_msg_buffer-inl.h b/csrc/vnpu_offload/include/spdlog/details/log_msg_buffer-inl.h
new file mode 100644
index 00000000..45c33835
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/log_msg_buffer-inl.h
@@ -0,0 +1,54 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifndef SPDLOG_HEADER_ONLY
+#include <spdlog/details/log_msg_buffer.h>
+#endif
+
+namespace spdlog {
+namespace details {
+
+SPDLOG_INLINE log_msg_buffer::log_msg_buffer(const log_msg &orig_msg)
+    : log_msg{orig_msg} {
+    buffer.append(logger_name.begin(), logger_name.end());
+    buffer.append(payload.begin(), payload.end());
+    update_string_views();
+}
+
+SPDLOG_INLINE log_msg_buffer::log_msg_buffer(const log_msg_buffer &other)
+    : log_msg{other} {
+    buffer.append(logger_name.begin(), logger_name.end());
+    buffer.append(payload.begin(), payload.end());
+    update_string_views();
+}
+
+SPDLOG_INLINE log_msg_buffer::log_msg_buffer(log_msg_buffer &&other) SPDLOG_NOEXCEPT
+    : log_msg{other},
+      buffer{std::move(other.buffer)} {
+    update_string_views();
+}
+
+SPDLOG_INLINE log_msg_buffer &log_msg_buffer::operator=(const log_msg_buffer &other) {
+    log_msg::operator=(other);
+    buffer.clear();
+    buffer.append(other.buffer.data(), other.buffer.data() + other.buffer.size());
+    update_string_views();
+    return *this;
+}
+
+SPDLOG_INLINE log_msg_buffer &log_msg_buffer::operator=(log_msg_buffer &&other) SPDLOG_NOEXCEPT {
+    log_msg::operator=(other);
+    buffer = std::move(other.buffer);
+    update_string_views();
+    return *this;
+}
+
+SPDLOG_INLINE void log_msg_buffer::update_string_views() {
+    logger_name = string_view_t{buffer.data(), logger_name.size()};
+    payload = string_view_t{buffer.data() + logger_name.size(), payload.size()};
+}
+
+}  // namespace details
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/details/log_msg_buffer.h b/csrc/vnpu_offload/include/spdlog/details/log_msg_buffer.h
new file mode 100644
index 00000000..c926cfaa
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/log_msg_buffer.h
@@ -0,0 +1,32 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include <spdlog/details/log_msg.h>
+
+namespace spdlog {
+namespace details {
+
+// Extend log_msg with internal buffer to store its payload.
+// This is needed since log_msg holds string_views that points to stack data.
+
+class SPDLOG_API log_msg_buffer : public log_msg {
+    memory_buf_t buffer;
+    void update_string_views();
+
+public:
+    log_msg_buffer() = default;
+    explicit log_msg_buffer(const log_msg &orig_msg);
+    log_msg_buffer(const log_msg_buffer &other);
+    log_msg_buffer(log_msg_buffer &&other) SPDLOG_NOEXCEPT;
+    log_msg_buffer &operator=(const log_msg_buffer &other);
+    log_msg_buffer &operator=(log_msg_buffer &&other) SPDLOG_NOEXCEPT;
+};
+
+}  // namespace details
+}  // namespace spdlog
+
+#ifdef SPDLOG_HEADER_ONLY
+#include "log_msg_buffer-inl.h"
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/details/mpmc_blocking_q.h b/csrc/vnpu_offload/include/spdlog/details/mpmc_blocking_q.h
new file mode 100644
index 00000000..f153f6c2
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/mpmc_blocking_q.h
@@ -0,0 +1,177 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+// multi producer-multi consumer blocking queue.
+// enqueue(..) - will block until room found to put the new message.
+// enqueue_nowait(..) - enqueue immediately. overruns oldest message if no 
+// room left.
+// dequeue_for(..) - will block until the queue is not empty or timeout have
+// passed.
+
+#include <spdlog/details/circular_q.h>
+
+#include <atomic>
+#include <condition_variable>
+#include <mutex>
+
+namespace spdlog {
+namespace details {
+
+template <typename T>
+class mpmc_blocking_queue {
+public:
+    using item_type = T;
+    explicit mpmc_blocking_queue(size_t max_items)
+        : q_(max_items) {}
+
+#ifndef __MINGW32__
+    // try to enqueue and block if no room left
+    void enqueue(T &&item) {
+        {
+            std::unique_lock<std::mutex> lock(queue_mutex_);
+            pop_cv_.wait(lock, [this] { return !this->q_.full(); });
+            q_.push_back(std::move(item));
+        }
+        push_cv_.notify_one();
+    }
+
+    // enqueue immediately. overrun oldest message in the queue if no room left.
+    void enqueue_nowait(T &&item) {
+        {
+            std::unique_lock<std::mutex> lock(queue_mutex_);
+            q_.push_back(std::move(item));
+        }
+        push_cv_.notify_one();
+    }
+
+    void enqueue_if_have_room(T &&item) {
+        bool pushed = false;
+        {
+            std::unique_lock<std::mutex> lock(queue_mutex_);
+            if (!q_.full()) {
+                q_.push_back(std::move(item));
+                pushed = true;
+            }
+        }
+
+        if (pushed) {
+            push_cv_.notify_one();
+        } else {
+            ++discard_counter_;
+        }
+    }
+
+    // dequeue with a timeout.
+    // Return true, if succeeded dequeue item, false otherwise
+    bool dequeue_for(T &popped_item, std::chrono::milliseconds wait_duration) {
+        {
+            std::unique_lock<std::mutex> lock(queue_mutex_);
+            if (!push_cv_.wait_for(lock, wait_duration, [this] { return !this->q_.empty(); })) {
+                return false;
+            }
+            popped_item = std::move(q_.front());
+            q_.pop_front();
+        }
+        pop_cv_.notify_one();
+        return true;
+    }
+
+    // blocking dequeue without a timeout.
+    void dequeue(T &popped_item) {
+        {
+            std::unique_lock<std::mutex> lock(queue_mutex_);
+            push_cv_.wait(lock, [this] { return !this->q_.empty(); });
+            popped_item = std::move(q_.front());
+            q_.pop_front();
+        }
+        pop_cv_.notify_one();
+    }
+
+#else
+    // apparently mingw deadlocks if the mutex is released before cv.notify_one(),
+    // so release the mutex at the very end each function.
+
+    // try to enqueue and block if no room left
+    void enqueue(T &&item) {
+        std::unique_lock<std::mutex> lock(queue_mutex_);
+        pop_cv_.wait(lock, [this] { return !this->q_.full(); });
+        q_.push_back(std::move(item));
+        push_cv_.notify_one();
+    }
+
+    // enqueue immediately. overrun oldest message in the queue if no room left.
+    void enqueue_nowait(T &&item) {
+        std::unique_lock<std::mutex> lock(queue_mutex_);
+        q_.push_back(std::move(item));
+        push_cv_.notify_one();
+    }
+
+    void enqueue_if_have_room(T &&item) {
+        bool pushed = false;
+        std::unique_lock<std::mutex> lock(queue_mutex_);
+        if (!q_.full()) {
+            q_.push_back(std::move(item));
+            pushed = true;
+        }
+
+        if (pushed) {
+            push_cv_.notify_one();
+        } else {
+            ++discard_counter_;
+        }
+    }
+
+    // dequeue with a timeout.
+    // Return true, if succeeded dequeue item, false otherwise
+    bool dequeue_for(T &popped_item, std::chrono::milliseconds wait_duration) {
+        std::unique_lock<std::mutex> lock(queue_mutex_);
+        if (!push_cv_.wait_for(lock, wait_duration, [this] { return !this->q_.empty(); })) {
+            return false;
+        }
+        popped_item = std::move(q_.front());
+        q_.pop_front();
+        pop_cv_.notify_one();
+        return true;
+    }
+
+    // blocking dequeue without a timeout.
+    void dequeue(T &popped_item) {
+        std::unique_lock<std::mutex> lock(queue_mutex_);
+        push_cv_.wait(lock, [this] { return !this->q_.empty(); });
+        popped_item = std::move(q_.front());
+        q_.pop_front();
+        pop_cv_.notify_one();
+    }
+
+#endif
+
+    size_t overrun_counter() {
+        std::lock_guard<std::mutex> lock(queue_mutex_);
+        return q_.overrun_counter();
+    }
+
+    size_t discard_counter() { return discard_counter_.load(std::memory_order_relaxed); }
+
+    size_t size() {
+        std::lock_guard<std::mutex> lock(queue_mutex_);
+        return q_.size();
+    }
+
+    void reset_overrun_counter() {
+        std::lock_guard<std::mutex> lock(queue_mutex_);
+        q_.reset_overrun_counter();
+    }
+
+    void reset_discard_counter() { discard_counter_.store(0, std::memory_order_relaxed); }
+
+private:
+    std::mutex queue_mutex_;
+    std::condition_variable push_cv_;
+    std::condition_variable pop_cv_;
+    spdlog::details::circular_q<T> q_;
+    std::atomic<size_t> discard_counter_{0};
+};
+}  // namespace details
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/details/null_mutex.h b/csrc/vnpu_offload/include/spdlog/details/null_mutex.h
new file mode 100644
index 00000000..e3b32204
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/null_mutex.h
@@ -0,0 +1,35 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include <atomic>
+#include <utility>
+// null, no cost dummy "mutex" and dummy "atomic" int
+
+namespace spdlog {
+namespace details {
+struct null_mutex {
+    void lock() const {}
+    void unlock() const {}
+};
+
+struct null_atomic_int {
+    int value;
+    null_atomic_int() = default;
+
+    explicit null_atomic_int(int new_value)
+        : value(new_value) {}
+
+    int load(std::memory_order = std::memory_order_relaxed) const { return value; }
+
+    void store(int new_value, std::memory_order = std::memory_order_relaxed) { value = new_value; }
+
+    int exchange(int new_value, std::memory_order = std::memory_order_relaxed) {
+        std::swap(new_value, value);
+        return new_value;  // return value before the call
+    }
+};
+
+}  // namespace details
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/details/os-inl.h b/csrc/vnpu_offload/include/spdlog/details/os-inl.h
new file mode 100644
index 00000000..2acded09
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/os-inl.h
@@ -0,0 +1,572 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifndef SPDLOG_HEADER_ONLY
+#include <spdlog/details/os.h>
+#endif
+
+#include <spdlog/common.h>
+
+#include <algorithm>
+#include <array>
+#include <chrono>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <ctime>
+#include <string>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <thread>
+
+#ifdef _WIN32
+#include <spdlog/details/windows_include.h>
+#include <io.h>       // for _get_osfhandle, _isatty, _fileno
+#include <process.h>  // for _get_pid
+
+#ifdef __MINGW32__
+#include <share.h>
+#endif
+
+#if defined(SPDLOG_WCHAR_TO_UTF8_SUPPORT) || defined(SPDLOG_WCHAR_FILENAMES)
+#include <cassert>
+#include <limits>
+#endif
+
+#include <direct.h>  // for _mkdir/_wmkdir
+
+#else  // unix
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#ifdef __linux__
+#include <sys/syscall.h>  //Use gettid() syscall under linux to get thread id
+
+#elif defined(_AIX)
+#include <pthread.h>  // for pthread_getthrds_np
+
+#elif defined(__DragonFly__) || defined(__FreeBSD__)
+#include <pthread_np.h>  // for pthread_getthreadid_np
+
+#elif defined(__NetBSD__)
+#include <lwp.h>  // for _lwp_self
+
+#elif defined(__sun)
+#include <thread.h>  // for thr_self
+#endif
+
+#endif  // unix
+
+#if defined __APPLE__
+#include <AvailabilityMacros.h>
+#endif
+
+#ifndef __has_feature       // Clang - feature checking macros.
+#define __has_feature(x) 0  // Compatibility with non-clang compilers.
+#endif
+
+namespace spdlog {
+namespace details {
+namespace os {
+
+SPDLOG_INLINE spdlog::log_clock::time_point now() SPDLOG_NOEXCEPT {
+#if defined __linux__ && defined SPDLOG_CLOCK_COARSE
+    timespec ts;
+    ::clock_gettime(CLOCK_REALTIME_COARSE, &ts);
+    return std::chrono::time_point<log_clock, typename log_clock::duration>(
+        std::chrono::duration_cast<typename log_clock::duration>(
+            std::chrono::seconds(ts.tv_sec) + std::chrono::nanoseconds(ts.tv_nsec)));
+
+#else
+    return log_clock::now();
+#endif
+}
+SPDLOG_INLINE std::tm localtime(const std::time_t &time_tt) SPDLOG_NOEXCEPT {
+#ifdef _WIN32
+    std::tm tm;
+    ::localtime_s(&tm, &time_tt);
+#else
+    std::tm tm;
+    ::localtime_r(&time_tt, &tm);
+#endif
+    return tm;
+}
+
+SPDLOG_INLINE std::tm localtime() SPDLOG_NOEXCEPT {
+    std::time_t now_t = ::time(nullptr);
+    return localtime(now_t);
+}
+
+SPDLOG_INLINE std::tm gmtime(const std::time_t &time_tt) SPDLOG_NOEXCEPT {
+#ifdef _WIN32
+    std::tm tm;
+    ::gmtime_s(&tm, &time_tt);
+#else
+    std::tm tm;
+    ::gmtime_r(&time_tt, &tm);
+#endif
+    return tm;
+}
+
+SPDLOG_INLINE std::tm gmtime() SPDLOG_NOEXCEPT {
+    std::time_t now_t = ::time(nullptr);
+    return gmtime(now_t);
+}
+
+// fopen_s on non windows for writing
+SPDLOG_INLINE bool fopen_s(FILE **fp, const filename_t &filename, const filename_t &mode) {
+#ifdef _WIN32
+#ifdef SPDLOG_WCHAR_FILENAMES
+    *fp = ::_wfsopen((filename.c_str()), mode.c_str(), _SH_DENYNO);
+#else
+    *fp = ::_fsopen((filename.c_str()), mode.c_str(), _SH_DENYNO);
+#endif
+#if defined(SPDLOG_PREVENT_CHILD_FD)
+    if (*fp != nullptr) {
+        auto file_handle = reinterpret_cast<HANDLE>(_get_osfhandle(::_fileno(*fp)));
+        if (!::SetHandleInformation(file_handle, HANDLE_FLAG_INHERIT, 0)) {
+            ::fclose(*fp);
+            *fp = nullptr;
+        }
+    }
+#endif
+#else  // unix
+#if defined(SPDLOG_PREVENT_CHILD_FD)
+    const int mode_flag = mode == SPDLOG_FILENAME_T("ab") ? O_APPEND : O_TRUNC;
+    const int fd =
+        ::open((filename.c_str()), O_CREAT | O_WRONLY | O_CLOEXEC | mode_flag, mode_t(0644));
+    if (fd == -1) {
+        return true;
+    }
+    *fp = ::fdopen(fd, mode.c_str());
+    if (*fp == nullptr) {
+        ::close(fd);
+    }
+#else
+    *fp = ::fopen((filename.c_str()), mode.c_str());
+#endif
+#endif
+
+    return *fp == nullptr;
+}
+
+SPDLOG_INLINE int remove(const filename_t &filename) SPDLOG_NOEXCEPT {
+#if defined(_WIN32) && defined(SPDLOG_WCHAR_FILENAMES)
+    return ::_wremove(filename.c_str());
+#else
+    return std::remove(filename.c_str());
+#endif
+}
+
+SPDLOG_INLINE int remove_if_exists(const filename_t &filename) SPDLOG_NOEXCEPT {
+    return path_exists(filename) ? remove(filename) : 0;
+}
+
+SPDLOG_INLINE int rename(const filename_t &filename1, const filename_t &filename2) SPDLOG_NOEXCEPT {
+#if defined(_WIN32) && defined(SPDLOG_WCHAR_FILENAMES)
+    return ::_wrename(filename1.c_str(), filename2.c_str());
+#else
+    return std::rename(filename1.c_str(), filename2.c_str());
+#endif
+}
+
+// Return true if path exists (file or directory)
+SPDLOG_INLINE bool path_exists(const filename_t &filename) SPDLOG_NOEXCEPT {
+#ifdef _WIN32
+    struct _stat buffer;
+#ifdef SPDLOG_WCHAR_FILENAMES
+    return (::_wstat(filename.c_str(), &buffer) == 0);
+#else
+    return (::_stat(filename.c_str(), &buffer) == 0);
+#endif
+#else  // common linux/unix all have the stat system call
+    struct stat buffer;
+    return (::stat(filename.c_str(), &buffer) == 0);
+#endif
+}
+
+#ifdef _MSC_VER
+// avoid warning about unreachable statement at the end of filesize()
+#pragma warning(push)
+#pragma warning(disable : 4702)
+#endif
+
+// Return file size according to open FILE* object
+SPDLOG_INLINE size_t filesize(FILE *f) {
+    if (f == nullptr) {
+        throw_spdlog_ex("Failed getting file size. fd is null");
+    }
+#if defined(_WIN32) && !defined(__CYGWIN__)
+    int fd = ::_fileno(f);
+#if defined(_WIN64)  // 64 bits
+    __int64 ret = ::_filelengthi64(fd);
+    if (ret >= 0) {
+        return static_cast<size_t>(ret);
+    }
+
+#else  // windows 32 bits
+    long ret = ::_filelength(fd);
+    if (ret >= 0) {
+        return static_cast<size_t>(ret);
+    }
+#endif
+
+#else  // unix
+// OpenBSD and AIX doesn't compile with :: before the fileno(..)
+#if defined(__OpenBSD__) || defined(_AIX)
+    int fd = fileno(f);
+#else
+    int fd = ::fileno(f);
+#endif
+// 64 bits(but not in osx, linux/musl or cygwin, where fstat64 is deprecated)
+#if ((defined(__linux__) && defined(__GLIBC__)) || defined(__sun) || defined(_AIX)) && \
+    (defined(__LP64__) || defined(_LP64))
+    struct stat64 st;
+    if (::fstat64(fd, &st) == 0) {
+        return static_cast<size_t>(st.st_size);
+    }
+#else  // other unix or linux 32 bits or cygwin
+    struct stat st;
+    if (::fstat(fd, &st) == 0) {
+        return static_cast<size_t>(st.st_size);
+    }
+#endif
+#endif
+    throw_spdlog_ex("Failed getting file size from fd", errno);
+    return 0;  // will not be reached.
+}
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+// Return utc offset in minutes or throw spdlog_ex on failure
+#if !defined(SPDLOG_NO_TZ_OFFSET)
+SPDLOG_INLINE int utc_minutes_offset(const std::tm &tm) {
+#ifdef _WIN32
+#if _WIN32_WINNT < _WIN32_WINNT_WS08
+    TIME_ZONE_INFORMATION tzinfo;
+    auto rv = ::GetTimeZoneInformation(&tzinfo);
+#else
+    DYNAMIC_TIME_ZONE_INFORMATION tzinfo;
+    auto rv = ::GetDynamicTimeZoneInformation(&tzinfo);
+#endif
+    if (rv == TIME_ZONE_ID_INVALID) throw_spdlog_ex("Failed getting timezone info. ", errno);
+
+    int offset = -tzinfo.Bias;
+    if (tm.tm_isdst) {
+        offset -= tzinfo.DaylightBias;
+    } else {
+        offset -= tzinfo.StandardBias;
+    }
+    return offset;
+#else
+    auto offset_seconds = tm.tm_gmtoff;
+    return static_cast<int>(offset_seconds / 60);
+#endif
+}
+#endif  // SPDLOG_NO_TZ_OFFSET
+
+// Return current thread id as size_t
+// It exists because the std::this_thread::get_id() is much slower(especially
+// under VS 2013)
+SPDLOG_INLINE size_t _thread_id() SPDLOG_NOEXCEPT {
+#ifdef _WIN32
+    return static_cast<size_t>(::GetCurrentThreadId());
+#elif defined(__linux__)
+#if defined(__ANDROID__) && defined(__ANDROID_API__) && (__ANDROID_API__ < 21)
+#define SYS_gettid __NR_gettid
+#endif
+    return static_cast<size_t>(::syscall(SYS_gettid));
+#elif defined(_AIX)
+    struct __pthrdsinfo buf;
+    int reg_size = 0;
+    pthread_t pt = pthread_self();
+    int retval = pthread_getthrds_np(&pt, PTHRDSINFO_QUERY_TID, &buf, sizeof(buf), NULL, &reg_size);
+    int tid = (!retval) ? buf.__pi_tid : 0;
+    return static_cast<size_t>(tid);
+#elif defined(__DragonFly__) || defined(__FreeBSD__)
+    return static_cast<size_t>(::pthread_getthreadid_np());
+#elif defined(__NetBSD__)
+    return static_cast<size_t>(::_lwp_self());
+#elif defined(__OpenBSD__)
+    return static_cast<size_t>(::getthrid());
+#elif defined(__sun)
+    return static_cast<size_t>(::thr_self());
+#elif __APPLE__
+    uint64_t tid;
+// There is no pthread_threadid_np prior to Mac OS X 10.6, and it is not supported on any PPC,
+// including 10.6.8 Rosetta. __POWERPC__ is Apple-specific define encompassing ppc and ppc64.
+#ifdef MAC_OS_X_VERSION_MAX_ALLOWED
+    {
+#if (MAC_OS_X_VERSION_MAX_ALLOWED < 1060) || defined(__POWERPC__)
+        tid = pthread_mach_thread_np(pthread_self());
+#elif MAC_OS_X_VERSION_MIN_REQUIRED < 1060
+        if (&pthread_threadid_np) {
+            pthread_threadid_np(nullptr, &tid);
+        } else {
+            tid = pthread_mach_thread_np(pthread_self());
+        }
+#else
+        pthread_threadid_np(nullptr, &tid);
+#endif
+    }
+#else
+    pthread_threadid_np(nullptr, &tid);
+#endif
+    return static_cast<size_t>(tid);
+#else  // Default to standard C++11 (other Unix)
+    return static_cast<size_t>(std::hash<std::thread::id>()(std::this_thread::get_id()));
+#endif
+}
+
+// Return current thread id as size_t (from thread local storage)
+SPDLOG_INLINE size_t thread_id() SPDLOG_NOEXCEPT {
+#if defined(SPDLOG_NO_TLS)
+    return _thread_id();
+#else  // cache thread id in tls
+    static thread_local const size_t tid = _thread_id();
+    return tid;
+#endif
+}
+
+// This is avoid msvc issue in sleep_for that happens if the clock changes.
+// See https://github.com/gabime/spdlog/issues/609
+SPDLOG_INLINE void sleep_for_millis(unsigned int milliseconds) SPDLOG_NOEXCEPT {
+#if defined(_WIN32)
+    ::Sleep(milliseconds);
+#else
+    std::this_thread::sleep_for(std::chrono::milliseconds(milliseconds));
+#endif
+}
+
+// wchar support for windows file names (SPDLOG_WCHAR_FILENAMES must be defined)
+#if defined(_WIN32) && defined(SPDLOG_WCHAR_FILENAMES)
+SPDLOG_INLINE std::string filename_to_str(const filename_t &filename) {
+    memory_buf_t buf;
+    wstr_to_utf8buf(filename, buf);
+    return SPDLOG_BUF_TO_STRING(buf);
+}
+#else
+SPDLOG_INLINE std::string filename_to_str(const filename_t &filename) { return filename; }
+#endif
+
+SPDLOG_INLINE int pid() SPDLOG_NOEXCEPT {
+#ifdef _WIN32
+    return conditional_static_cast<int>(::GetCurrentProcessId());
+#else
+    return conditional_static_cast<int>(::getpid());
+#endif
+}
+
+// Determine if the terminal supports colors
+// Based on: https://github.com/agauniyal/rang/
+SPDLOG_INLINE bool is_color_terminal() SPDLOG_NOEXCEPT {
+#ifdef _WIN32
+    return true;
+#else
+
+    static const bool result = []() {
+        const char *env_colorterm_p = std::getenv("COLORTERM");
+        if (env_colorterm_p != nullptr) {
+            return true;
+        }
+
+        static constexpr std::array<const char *, 16> terms = {
+            {"ansi", "color", "console", "cygwin", "gnome", "konsole", "kterm", "linux", "msys",
+             "putty", "rxvt", "screen", "vt100", "xterm", "alacritty", "vt102"}};
+
+        const char *env_term_p = std::getenv("TERM");
+        if (env_term_p == nullptr) {
+            return false;
+        }
+
+        return std::any_of(terms.begin(), terms.end(), [&](const char *term) {
+            return std::strstr(env_term_p, term) != nullptr;
+        });
+    }();
+
+    return result;
+#endif
+}
+
+// Determine if the terminal attached
+// Source: https://github.com/agauniyal/rang/
+SPDLOG_INLINE bool in_terminal(FILE *file) SPDLOG_NOEXCEPT {
+#ifdef _WIN32
+    return ::_isatty(_fileno(file)) != 0;
+#else
+    return ::isatty(fileno(file)) != 0;
+#endif
+}
+
+#if (defined(SPDLOG_WCHAR_TO_UTF8_SUPPORT) || defined(SPDLOG_WCHAR_FILENAMES)) && defined(_WIN32)
+SPDLOG_INLINE void wstr_to_utf8buf(wstring_view_t wstr, memory_buf_t &target) {
+    if (wstr.size() > static_cast<size_t>((std::numeric_limits<int>::max)()) / 4 - 1) {
+        throw_spdlog_ex("UTF-16 string is too big to be converted to UTF-8");
+    }
+
+    int wstr_size = static_cast<int>(wstr.size());
+    if (wstr_size == 0) {
+        target.resize(0);
+        return;
+    }
+
+    int result_size = static_cast<int>(target.capacity());
+    if ((wstr_size + 1) * 4 > result_size) {
+        result_size =
+            ::WideCharToMultiByte(CP_UTF8, 0, wstr.data(), wstr_size, NULL, 0, NULL, NULL);
+    }
+
+    if (result_size > 0) {
+        target.resize(result_size);
+        result_size = ::WideCharToMultiByte(CP_UTF8, 0, wstr.data(), wstr_size, target.data(),
+                                            result_size, NULL, NULL);
+
+        if (result_size > 0) {
+            target.resize(result_size);
+            return;
+        }
+    }
+
+    throw_spdlog_ex(
+        fmt_lib::format("WideCharToMultiByte failed. Last error: {}", ::GetLastError()));
+}
+
+SPDLOG_INLINE void utf8_to_wstrbuf(string_view_t str, wmemory_buf_t &target) {
+    if (str.size() > static_cast<size_t>((std::numeric_limits<int>::max)()) - 1) {
+        throw_spdlog_ex("UTF-8 string is too big to be converted to UTF-16");
+    }
+
+    int str_size = static_cast<int>(str.size());
+    if (str_size == 0) {
+        target.resize(0);
+        return;
+    }
+
+    // find the size to allocate for the result buffer
+    int result_size = ::MultiByteToWideChar(CP_UTF8, 0, str.data(), str_size, NULL, 0);
+
+    if (result_size > 0) {
+        target.resize(result_size);
+        result_size =
+            ::MultiByteToWideChar(CP_UTF8, 0, str.data(), str_size, target.data(), result_size);
+        if (result_size > 0) {
+            assert(result_size == static_cast<int>(target.size()));
+            return;
+        }
+    }
+
+    throw_spdlog_ex(
+        fmt_lib::format("MultiByteToWideChar failed. Last error: {}", ::GetLastError()));
+}
+#endif  // (defined(SPDLOG_WCHAR_TO_UTF8_SUPPORT) || defined(SPDLOG_WCHAR_FILENAMES)) &&
+        // defined(_WIN32)
+
+// return true on success
+static SPDLOG_INLINE bool mkdir_(const filename_t &path) {
+#ifdef _WIN32
+#ifdef SPDLOG_WCHAR_FILENAMES
+    return ::_wmkdir(path.c_str()) == 0;
+#else
+    return ::_mkdir(path.c_str()) == 0;
+#endif
+#else
+    return ::mkdir(path.c_str(), mode_t(0755)) == 0;
+#endif
+}
+
+// create the given directory - and all directories leading to it
+// return true on success or if the directory already exists
+SPDLOG_INLINE bool create_dir(const filename_t &path) {
+    if (path_exists(path)) {
+        return true;
+    }
+
+    if (path.empty()) {
+        return false;
+    }
+
+    size_t search_offset = 0;
+    do {
+        auto token_pos = path.find_first_of(folder_seps_filename, search_offset);
+        // treat the entire path as a folder if no folder separator not found
+        if (token_pos == filename_t::npos) {
+            token_pos = path.size();
+        }
+
+        auto subdir = path.substr(0, token_pos);
+#ifdef _WIN32
+        // if subdir is just a drive letter, add a slash e.g. "c:"=>"c:\",
+        // otherwise path_exists(subdir) returns false (issue #3079)
+        const bool is_drive = subdir.length() == 2 && subdir[1] == ':';
+        if (is_drive) {
+            subdir += '\\';
+            token_pos++;
+        }
+#endif
+
+        if (!subdir.empty() && !path_exists(subdir) && !mkdir_(subdir)) {
+            return false;  // return error if failed creating dir
+        }
+        search_offset = token_pos + 1;
+    } while (search_offset < path.size());
+
+    return true;
+}
+
+// Return directory name from given path or empty string
+// "abc/file" => "abc"
+// "abc/" => "abc"
+// "abc" => ""
+// "abc///" => "abc//"
+SPDLOG_INLINE filename_t dir_name(const filename_t &path) {
+    auto pos = path.find_last_of(folder_seps_filename);
+    return pos != filename_t::npos ? path.substr(0, pos) : filename_t{};
+}
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4996)
+#endif  // _MSC_VER
+std::string SPDLOG_INLINE getenv(const char *field) {
+#if defined(_MSC_VER) && defined(WINAPI_FAMILY) && defined(WINAPI_FAMILY_DESKTOP_APP) && \
+    (WINAPI_FAMILY != WINAPI_FAMILY_DESKTOP_APP)
+    return std::string{};  // not supported under uwp
+#else
+    char *buf = std::getenv(field);
+    return buf ? buf : std::string{};
+#endif
+}
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif  // _MSC_VER
+
+// Do fsync by FILE handlerpointer
+// Return true on success
+SPDLOG_INLINE bool fsync(FILE *fp) {
+#ifdef _WIN32
+    return FlushFileBuffers(reinterpret_cast<HANDLE>(_get_osfhandle(_fileno(fp)))) != 0;
+#else
+    return ::fsync(fileno(fp)) == 0;
+#endif
+}
+
+// Do non-locking fwrite if possible by the os or use the regular locking fwrite
+// Return true on success.
+SPDLOG_INLINE bool fwrite_bytes(const void *ptr, const size_t n_bytes, FILE *fp) {
+#if defined(_WIN32) && defined(SPDLOG_FWRITE_UNLOCKED)
+    return _fwrite_nolock(ptr, 1, n_bytes, fp) == n_bytes;
+#elif defined(SPDLOG_FWRITE_UNLOCKED)
+    return ::fwrite_unlocked(ptr, 1, n_bytes, fp) == n_bytes;
+#else
+    return std::fwrite(ptr, 1, n_bytes, fp) == n_bytes;
+#endif
+}
+
+}  // namespace os
+}  // namespace details
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/details/os.h b/csrc/vnpu_offload/include/spdlog/details/os.h
new file mode 100644
index 00000000..8bb8d14a
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/os.h
@@ -0,0 +1,127 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include <ctime>  // std::time_t
+#include <spdlog/common.h>
+
+namespace spdlog {
+namespace details {
+namespace os {
+
+SPDLOG_API spdlog::log_clock::time_point now() SPDLOG_NOEXCEPT;
+
+SPDLOG_API std::tm localtime(const std::time_t &time_tt) SPDLOG_NOEXCEPT;
+
+SPDLOG_API std::tm localtime() SPDLOG_NOEXCEPT;
+
+SPDLOG_API std::tm gmtime(const std::time_t &time_tt) SPDLOG_NOEXCEPT;
+
+SPDLOG_API std::tm gmtime() SPDLOG_NOEXCEPT;
+
+// eol definition
+#if !defined(SPDLOG_EOL)
+#ifdef _WIN32
+#define SPDLOG_EOL "\r\n"
+#else
+#define SPDLOG_EOL "\n"
+#endif
+#endif
+
+SPDLOG_CONSTEXPR static const char *default_eol = SPDLOG_EOL;
+
+// folder separator
+#if !defined(SPDLOG_FOLDER_SEPS)
+#ifdef _WIN32
+#define SPDLOG_FOLDER_SEPS "\\/"
+#else
+#define SPDLOG_FOLDER_SEPS "/"
+#endif
+#endif
+
+SPDLOG_CONSTEXPR static const char folder_seps[] = SPDLOG_FOLDER_SEPS;
+SPDLOG_CONSTEXPR static const filename_t::value_type folder_seps_filename[] =
+    SPDLOG_FILENAME_T(SPDLOG_FOLDER_SEPS);
+
+// fopen_s on non windows for writing
+SPDLOG_API bool fopen_s(FILE **fp, const filename_t &filename, const filename_t &mode);
+
+// Remove filename. return 0 on success
+SPDLOG_API int remove(const filename_t &filename) SPDLOG_NOEXCEPT;
+
+// Remove file if exists. return 0 on success
+// Note: Non atomic (might return failure to delete if concurrently deleted by other process/thread)
+SPDLOG_API int remove_if_exists(const filename_t &filename) SPDLOG_NOEXCEPT;
+
+SPDLOG_API int rename(const filename_t &filename1, const filename_t &filename2) SPDLOG_NOEXCEPT;
+
+// Return if file exists.
+SPDLOG_API bool path_exists(const filename_t &filename) SPDLOG_NOEXCEPT;
+
+// Return file size according to open FILE* object
+SPDLOG_API size_t filesize(FILE *f);
+
+// Return utc offset in minutes or throw spdlog_ex on failure
+SPDLOG_API int utc_minutes_offset(const std::tm &tm = details::os::localtime());
+
+// Return current thread id as size_t
+// It exists because the std::this_thread::get_id() is much slower(especially
+// under VS 2013)
+SPDLOG_API size_t _thread_id() SPDLOG_NOEXCEPT;
+
+// Return current thread id as size_t (from thread local storage)
+SPDLOG_API size_t thread_id() SPDLOG_NOEXCEPT;
+
+// This is avoid msvc issue in sleep_for that happens if the clock changes.
+// See https://github.com/gabime/spdlog/issues/609
+SPDLOG_API void sleep_for_millis(unsigned int milliseconds) SPDLOG_NOEXCEPT;
+
+SPDLOG_API std::string filename_to_str(const filename_t &filename);
+
+SPDLOG_API int pid() SPDLOG_NOEXCEPT;
+
+// Determine if the terminal supports colors
+// Source: https://github.com/agauniyal/rang/
+SPDLOG_API bool is_color_terminal() SPDLOG_NOEXCEPT;
+
+// Determine if the terminal attached
+// Source: https://github.com/agauniyal/rang/
+SPDLOG_API bool in_terminal(FILE *file) SPDLOG_NOEXCEPT;
+
+#if (defined(SPDLOG_WCHAR_TO_UTF8_SUPPORT) || defined(SPDLOG_WCHAR_FILENAMES)) && defined(_WIN32)
+SPDLOG_API void wstr_to_utf8buf(wstring_view_t wstr, memory_buf_t &target);
+
+SPDLOG_API void utf8_to_wstrbuf(string_view_t str, wmemory_buf_t &target);
+#endif
+
+// Return directory name from given path or empty string
+// "abc/file" => "abc"
+// "abc/" => "abc"
+// "abc" => ""
+// "abc///" => "abc//"
+SPDLOG_API filename_t dir_name(const filename_t &path);
+
+// Create a dir from the given path.
+// Return true if succeeded or if this dir already exists.
+SPDLOG_API bool create_dir(const filename_t &path);
+
+// non thread safe, cross platform getenv/getenv_s
+// return empty string if field not found
+SPDLOG_API std::string getenv(const char *field);
+
+// Do fsync by FILE objectpointer.
+// Return true on success.
+SPDLOG_API bool fsync(FILE *fp);
+
+// Do non-locking fwrite if possible by the os or use the regular locking fwrite
+// Return true on success.
+SPDLOG_API bool fwrite_bytes(const void *ptr, const size_t n_bytes, FILE *fp);
+
+}  // namespace os
+}  // namespace details
+}  // namespace spdlog
+
+#ifdef SPDLOG_HEADER_ONLY
+#include "os-inl.h"
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/details/periodic_worker-inl.h b/csrc/vnpu_offload/include/spdlog/details/periodic_worker-inl.h
new file mode 100644
index 00000000..c12e9e12
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/periodic_worker-inl.h
@@ -0,0 +1,26 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifndef SPDLOG_HEADER_ONLY
+#include <spdlog/details/periodic_worker.h>
+#endif
+
+namespace spdlog {
+namespace details {
+
+// stop the worker thread and join it
+SPDLOG_INLINE periodic_worker::~periodic_worker() {
+    if (worker_thread_.joinable()) {
+        {
+            std::lock_guard<std::mutex> lock(mutex_);
+            active_ = false;
+        }
+        cv_.notify_one();
+        worker_thread_.join();
+    }
+}
+
+}  // namespace details
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/details/periodic_worker.h b/csrc/vnpu_offload/include/spdlog/details/periodic_worker.h
new file mode 100644
index 00000000..2f59bb33
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/periodic_worker.h
@@ -0,0 +1,58 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+// periodic worker thread - periodically executes the given callback function.
+//
+// RAII over the owned thread:
+//    creates the thread on construction.
+//    stops and joins the thread on destruction (if the thread is executing a callback, wait for it
+//    to finish first).
+
+#include <chrono>
+#include <condition_variable>
+#include <functional>
+#include <mutex>
+#include <thread>
+namespace spdlog {
+namespace details {
+
+class SPDLOG_API periodic_worker {
+public:
+    template <typename Rep, typename Period>
+    periodic_worker(const std::function<void()> &callback_fun,
+                    std::chrono::duration<Rep, Period> interval) {
+        active_ = (interval > std::chrono::duration<Rep, Period>::zero());
+        if (!active_) {
+            return;
+        }
+
+        worker_thread_ = std::thread([this, callback_fun, interval]() {
+            for (;;) {
+                std::unique_lock<std::mutex> lock(this->mutex_);
+                if (this->cv_.wait_for(lock, interval, [this] { return !this->active_; })) {
+                    return;  // active_ == false, so exit this thread
+                }
+                callback_fun();
+            }
+        });
+    }
+    std::thread &get_thread() { return worker_thread_; }
+    periodic_worker(const periodic_worker &) = delete;
+    periodic_worker &operator=(const periodic_worker &) = delete;
+    // stop the worker thread and join it
+    ~periodic_worker();
+
+private:
+    bool active_;
+    std::thread worker_thread_;
+    std::mutex mutex_;
+    std::condition_variable cv_;
+};
+}  // namespace details
+}  // namespace spdlog
+
+#ifdef SPDLOG_HEADER_ONLY
+#include "periodic_worker-inl.h"
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/details/registry-inl.h b/csrc/vnpu_offload/include/spdlog/details/registry-inl.h
new file mode 100644
index 00000000..576eebe0
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/registry-inl.h
@@ -0,0 +1,270 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifndef SPDLOG_HEADER_ONLY
+#include <spdlog/details/registry.h>
+#endif
+
+#include <spdlog/common.h>
+#include <spdlog/details/periodic_worker.h>
+#include <spdlog/logger.h>
+#include <spdlog/pattern_formatter.h>
+
+#ifndef SPDLOG_DISABLE_DEFAULT_LOGGER
+// support for the default stdout color logger
+#ifdef _WIN32
+#include <spdlog/sinks/wincolor_sink.h>
+#else
+#include <spdlog/sinks/ansicolor_sink.h>
+#endif
+#endif  // SPDLOG_DISABLE_DEFAULT_LOGGER
+
+#include <chrono>
+#include <functional>
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+namespace spdlog {
+namespace details {
+
+SPDLOG_INLINE registry::registry()
+    : formatter_(new pattern_formatter()) {
+#ifndef SPDLOG_DISABLE_DEFAULT_LOGGER
+// create default logger (ansicolor_stdout_sink_mt or wincolor_stdout_sink_mt in windows).
+#ifdef _WIN32
+    auto color_sink = std::make_shared<sinks::wincolor_stdout_sink_mt>();
+#else
+    auto color_sink = std::make_shared<sinks::ansicolor_stdout_sink_mt>();
+#endif
+
+    const char *default_logger_name = "";
+    default_logger_ = std::make_shared<spdlog::logger>(default_logger_name, std::move(color_sink));
+    loggers_[default_logger_name] = default_logger_;
+
+#endif  // SPDLOG_DISABLE_DEFAULT_LOGGER
+}
+
+SPDLOG_INLINE registry::~registry() = default;
+
+SPDLOG_INLINE void registry::register_logger(std::shared_ptr<logger> new_logger) {
+    std::lock_guard<std::mutex> lock(logger_map_mutex_);
+    register_logger_(std::move(new_logger));
+}
+
+SPDLOG_INLINE void registry::register_or_replace(std::shared_ptr<logger> new_logger) {
+    std::lock_guard<std::mutex> lock(logger_map_mutex_);
+    register_or_replace_(std::move(new_logger));
+}
+
+SPDLOG_INLINE void registry::initialize_logger(std::shared_ptr<logger> new_logger) {
+    std::lock_guard<std::mutex> lock(logger_map_mutex_);
+    new_logger->set_formatter(formatter_->clone());
+
+    if (err_handler_) {
+        new_logger->set_error_handler(err_handler_);
+    }
+
+    // set new level according to previously configured level or default level
+    auto it = log_levels_.find(new_logger->name());
+    auto new_level = it != log_levels_.end() ? it->second : global_log_level_;
+    new_logger->set_level(new_level);
+
+    new_logger->flush_on(flush_level_);
+
+    if (backtrace_n_messages_ > 0) {
+        new_logger->enable_backtrace(backtrace_n_messages_);
+    }
+
+    if (automatic_registration_) {
+        register_logger_(std::move(new_logger));
+    }
+}
+
+SPDLOG_INLINE std::shared_ptr<logger> registry::get(const std::string &logger_name) {
+    std::lock_guard<std::mutex> lock(logger_map_mutex_);
+    auto found = loggers_.find(logger_name);
+    return found == loggers_.end() ? nullptr : found->second;
+}
+
+SPDLOG_INLINE std::shared_ptr<logger> registry::default_logger() {
+    std::lock_guard<std::mutex> lock(logger_map_mutex_);
+    return default_logger_;
+}
+
+// Return raw ptr to the default logger.
+// To be used directly by the spdlog default api (e.g. spdlog::info)
+// This make the default API faster, but cannot be used concurrently with set_default_logger().
+// e.g do not call set_default_logger() from one thread while calling spdlog::info() from another.
+SPDLOG_INLINE logger *registry::get_default_raw() { return default_logger_.get(); }
+
+// set default logger.
+// the default logger is stored in default_logger_ (for faster retrieval) and in the loggers_ map.
+SPDLOG_INLINE void registry::set_default_logger(std::shared_ptr<logger> new_default_logger) {
+    std::lock_guard<std::mutex> lock(logger_map_mutex_);
+    if (new_default_logger != nullptr) {
+        loggers_[new_default_logger->name()] = new_default_logger;
+    }
+    default_logger_ = std::move(new_default_logger);
+}
+
+SPDLOG_INLINE void registry::set_tp(std::shared_ptr<thread_pool> tp) {
+    std::lock_guard<std::recursive_mutex> lock(tp_mutex_);
+    tp_ = std::move(tp);
+}
+
+SPDLOG_INLINE std::shared_ptr<thread_pool> registry::get_tp() {
+    std::lock_guard<std::recursive_mutex> lock(tp_mutex_);
+    return tp_;
+}
+
+// Set global formatter. Each sink in each logger will get a clone of this object
+SPDLOG_INLINE void registry::set_formatter(std::unique_ptr<formatter> formatter) {
+    std::lock_guard<std::mutex> lock(logger_map_mutex_);
+    formatter_ = std::move(formatter);
+    for (auto &l : loggers_) {
+        l.second->set_formatter(formatter_->clone());
+    }
+}
+
+SPDLOG_INLINE void registry::enable_backtrace(size_t n_messages) {
+    std::lock_guard<std::mutex> lock(logger_map_mutex_);
+    backtrace_n_messages_ = n_messages;
+
+    for (auto &l : loggers_) {
+        l.second->enable_backtrace(n_messages);
+    }
+}
+
+SPDLOG_INLINE void registry::disable_backtrace() {
+    std::lock_guard<std::mutex> lock(logger_map_mutex_);
+    backtrace_n_messages_ = 0;
+    for (auto &l : loggers_) {
+        l.second->disable_backtrace();
+    }
+}
+
+SPDLOG_INLINE void registry::set_level(level::level_enum log_level) {
+    std::lock_guard<std::mutex> lock(logger_map_mutex_);
+    for (auto &l : loggers_) {
+        l.second->set_level(log_level);
+    }
+    global_log_level_ = log_level;
+}
+
+SPDLOG_INLINE void registry::flush_on(level::level_enum log_level) {
+    std::lock_guard<std::mutex> lock(logger_map_mutex_);
+    for (auto &l : loggers_) {
+        l.second->flush_on(log_level);
+    }
+    flush_level_ = log_level;
+}
+
+SPDLOG_INLINE void registry::set_error_handler(err_handler handler) {
+    std::lock_guard<std::mutex> lock(logger_map_mutex_);
+    for (auto &l : loggers_) {
+        l.second->set_error_handler(handler);
+    }
+    err_handler_ = std::move(handler);
+}
+
+SPDLOG_INLINE void registry::apply_all(
+    const std::function<void(const std::shared_ptr<logger>)> &fun) {
+    std::lock_guard<std::mutex> lock(logger_map_mutex_);
+    for (auto &l : loggers_) {
+        fun(l.second);
+    }
+}
+
+SPDLOG_INLINE void registry::flush_all() {
+    std::lock_guard<std::mutex> lock(logger_map_mutex_);
+    for (auto &l : loggers_) {
+        l.second->flush();
+    }
+}
+
+SPDLOG_INLINE void registry::drop(const std::string &logger_name) {
+    std::lock_guard<std::mutex> lock(logger_map_mutex_);
+    auto is_default_logger = default_logger_ && default_logger_->name() == logger_name;
+    loggers_.erase(logger_name);
+    if (is_default_logger) {
+        default_logger_.reset();
+    }
+}
+
+SPDLOG_INLINE void registry::drop_all() {
+    std::lock_guard<std::mutex> lock(logger_map_mutex_);
+    loggers_.clear();
+    default_logger_.reset();
+}
+
+// clean all resources and threads started by the registry
+SPDLOG_INLINE void registry::shutdown() {
+    {
+        std::lock_guard<std::mutex> lock(flusher_mutex_);
+        periodic_flusher_.reset();
+    }
+
+    drop_all();
+
+    {
+        std::lock_guard<std::recursive_mutex> lock(tp_mutex_);
+        tp_.reset();
+    }
+}
+
+SPDLOG_INLINE std::recursive_mutex &registry::tp_mutex() { return tp_mutex_; }
+
+SPDLOG_INLINE void registry::set_automatic_registration(bool automatic_registration) {
+    std::lock_guard<std::mutex> lock(logger_map_mutex_);
+    automatic_registration_ = automatic_registration;
+}
+
+SPDLOG_INLINE void registry::set_levels(log_levels levels, level::level_enum *global_level) {
+    std::lock_guard<std::mutex> lock(logger_map_mutex_);
+    log_levels_ = std::move(levels);
+    auto global_level_requested = global_level != nullptr;
+    global_log_level_ = global_level_requested ? *global_level : global_log_level_;
+
+    for (auto &logger : loggers_) {
+        auto logger_entry = log_levels_.find(logger.first);
+        if (logger_entry != log_levels_.end()) {
+            logger.second->set_level(logger_entry->second);
+        } else if (global_level_requested) {
+            logger.second->set_level(*global_level);
+        }
+    }
+}
+
+SPDLOG_INLINE registry &registry::instance() {
+    static registry s_instance;
+    return s_instance;
+}
+
+SPDLOG_INLINE void registry::apply_logger_env_levels(std::shared_ptr<logger> new_logger) {
+    std::lock_guard<std::mutex> lock(logger_map_mutex_);
+    auto it = log_levels_.find(new_logger->name());
+    auto new_level = it != log_levels_.end() ? it->second : global_log_level_;
+    new_logger->set_level(new_level);
+}
+
+SPDLOG_INLINE void registry::throw_if_exists_(const std::string &logger_name) {
+    if (loggers_.find(logger_name) != loggers_.end()) {
+        throw_spdlog_ex("logger with name '" + logger_name + "' already exists");
+    }
+}
+
+SPDLOG_INLINE void registry::register_logger_(std::shared_ptr<logger> new_logger) {
+    auto &logger_name = new_logger->name();
+    throw_if_exists_(logger_name);
+    loggers_[logger_name] = std::move(new_logger);
+}
+
+SPDLOG_INLINE void registry::register_or_replace_(std::shared_ptr<logger> new_logger) {
+    loggers_[new_logger->name()] = std::move(new_logger);
+}
+
+}  // namespace details
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/details/registry.h b/csrc/vnpu_offload/include/spdlog/details/registry.h
new file mode 100644
index 00000000..576803ee
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/registry.h
@@ -0,0 +1,131 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+// Loggers registry of unique name->logger pointer
+// An attempt to create a logger with an already existing name will result with spdlog_ex exception.
+// If user requests a non existing logger, nullptr will be returned
+// This class is thread safe
+
+#include <spdlog/common.h>
+#include <spdlog/details/periodic_worker.h>
+
+#include <chrono>
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <unordered_map>
+
+namespace spdlog {
+class logger;
+
+namespace details {
+class thread_pool;
+
+class SPDLOG_API registry {
+public:
+    using log_levels = std::unordered_map<std::string, level::level_enum>;
+    registry(const registry &) = delete;
+    registry &operator=(const registry &) = delete;
+
+    void register_logger(std::shared_ptr<logger> new_logger);
+    void register_or_replace(std::shared_ptr<logger> new_logger);
+    void initialize_logger(std::shared_ptr<logger> new_logger);
+    std::shared_ptr<logger> get(const std::string &logger_name);
+    std::shared_ptr<logger> default_logger();
+
+    // Return raw ptr to the default logger.
+    // To be used directly by the spdlog default api (e.g. spdlog::info)
+    // This make the default API faster, but cannot be used concurrently with set_default_logger().
+    // e.g do not call set_default_logger() from one thread while calling spdlog::info() from
+    // another.
+    logger *get_default_raw();
+
+    // set default logger and add it to the registry if not registered already.
+    // default logger is stored in default_logger_ (for faster retrieval) and in the loggers_ map.
+    // Note: Make sure to unregister it when no longer needed or before calling again with a new
+    // logger.
+    void set_default_logger(std::shared_ptr<logger> new_default_logger);
+
+    void set_tp(std::shared_ptr<thread_pool> tp);
+
+    std::shared_ptr<thread_pool> get_tp();
+
+    // Set global formatter. Each sink in each logger will get a clone of this object
+    void set_formatter(std::unique_ptr<formatter> formatter);
+
+    void enable_backtrace(size_t n_messages);
+
+    void disable_backtrace();
+
+    void set_level(level::level_enum log_level);
+
+    void flush_on(level::level_enum log_level);
+
+    template <typename Rep, typename Period>
+    void flush_every(std::chrono::duration<Rep, Period> interval) {
+        std::lock_guard<std::mutex> lock(flusher_mutex_);
+        auto clbk = [this]() { this->flush_all(); };
+        periodic_flusher_ = details::make_unique<periodic_worker>(clbk, interval);
+    }
+
+    std::unique_ptr<periodic_worker> &get_flusher() {
+        std::lock_guard<std::mutex> lock(flusher_mutex_);
+        return periodic_flusher_;
+    }
+
+    void set_error_handler(err_handler handler);
+
+    void apply_all(const std::function<void(const std::shared_ptr<logger>)> &fun);
+
+    void flush_all();
+
+    void drop(const std::string &logger_name);
+
+    void drop_all();
+
+    // clean all resources and threads started by the registry
+    void shutdown();
+
+    std::recursive_mutex &tp_mutex();
+
+    void set_automatic_registration(bool automatic_registration);
+
+    // set levels for all existing/future loggers. global_level can be null if should not set.
+    void set_levels(log_levels levels, level::level_enum *global_level);
+
+    static registry &instance();
+
+    void apply_logger_env_levels(std::shared_ptr<logger> new_logger);
+
+private:
+    registry();
+    ~registry();
+
+    void throw_if_exists_(const std::string &logger_name);
+    void register_logger_(std::shared_ptr<logger> new_logger);
+    void register_or_replace_(std::shared_ptr<logger> new_logger);
+    bool set_level_from_cfg_(logger *logger);
+    std::mutex logger_map_mutex_, flusher_mutex_;
+    std::recursive_mutex tp_mutex_;
+    std::unordered_map<std::string, std::shared_ptr<logger>> loggers_;
+    log_levels log_levels_;
+    std::unique_ptr<formatter> formatter_;
+    spdlog::level::level_enum global_log_level_ = level::info;
+    level::level_enum flush_level_ = level::off;
+    err_handler err_handler_;
+    std::shared_ptr<thread_pool> tp_;
+    std::unique_ptr<periodic_worker> periodic_flusher_;
+    std::shared_ptr<logger> default_logger_;
+    bool automatic_registration_ = true;
+    size_t backtrace_n_messages_ = 0;
+};
+
+}  // namespace details
+}  // namespace spdlog
+
+#ifdef SPDLOG_HEADER_ONLY
+#include "registry-inl.h"
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/details/synchronous_factory.h b/csrc/vnpu_offload/include/spdlog/details/synchronous_factory.h
new file mode 100644
index 00000000..4bd5a51c
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/synchronous_factory.h
@@ -0,0 +1,22 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include "registry.h"
+
+namespace spdlog {
+
+// Default logger factory-  creates synchronous loggers
+class logger;
+
+struct synchronous_factory {
+    template <typename Sink, typename... SinkArgs>
+    static std::shared_ptr<spdlog::logger> create(std::string logger_name, SinkArgs &&...args) {
+        auto sink = std::make_shared<Sink>(std::forward<SinkArgs>(args)...);
+        auto new_logger = std::make_shared<spdlog::logger>(std::move(logger_name), std::move(sink));
+        details::registry::instance().initialize_logger(new_logger);
+        return new_logger;
+    }
+};
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/details/tcp_client-windows.h b/csrc/vnpu_offload/include/spdlog/details/tcp_client-windows.h
new file mode 100644
index 00000000..956f9f9b
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/tcp_client-windows.h
@@ -0,0 +1,217 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#define WIN32_LEAN_AND_MEAN
+// tcp client helper
+#include <spdlog/common.h>
+#include <spdlog/details/os.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string>
+#include <windows.h>
+#include <winsock2.h>
+#include <ws2tcpip.h>
+
+#pragma comment(lib, "Ws2_32.lib")
+#pragma comment(lib, "Mswsock.lib")
+#pragma comment(lib, "AdvApi32.lib")
+
+namespace spdlog {
+namespace details {
+class tcp_client {
+    SOCKET socket_ = INVALID_SOCKET;
+
+    static void init_winsock_() {
+        WSADATA wsaData;
+        auto rv = WSAStartup(MAKEWORD(2, 2), &wsaData);
+        if (rv != 0) {
+            throw_winsock_error_("WSAStartup failed", ::WSAGetLastError());
+        }
+    }
+
+    static void throw_winsock_error_(const std::string &msg, int last_error) {
+        char buf[512];
+        ::FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL,
+                         last_error, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), buf,
+                         (sizeof(buf) / sizeof(char)), NULL);
+
+        throw_spdlog_ex(fmt_lib::format("tcp_sink - {}: {}", msg, buf));
+    }
+
+public:
+    tcp_client() { init_winsock_(); }
+
+    ~tcp_client() {
+        close();
+        ::WSACleanup();
+    }
+
+    bool is_connected() const { return socket_ != INVALID_SOCKET; }
+
+    void close() {
+        ::closesocket(socket_);
+        socket_ = INVALID_SOCKET;
+    }
+
+    SOCKET fd() const { return socket_; }
+
+    int connect_socket_with_timeout(SOCKET sockfd,
+                                    const struct sockaddr *addr,
+                                    int addrlen,
+                                    const timeval &tv) {
+        // If no timeout requested, do a normal blocking connect.
+        if (tv.tv_sec == 0 && tv.tv_usec == 0) {
+            int rv = ::connect(sockfd, addr, addrlen);
+            if (rv == SOCKET_ERROR && WSAGetLastError() == WSAEISCONN) {
+                return 0;
+            }
+            return rv;
+        }
+
+        // Switch to non‐blocking mode
+        u_long mode = 1UL;
+        if (::ioctlsocket(sockfd, FIONBIO, &mode) == SOCKET_ERROR) {
+            return SOCKET_ERROR;
+        }
+
+        int rv = ::connect(sockfd, addr, addrlen);
+        int last_error = WSAGetLastError();
+        if (rv == 0 || last_error == WSAEISCONN) {
+            mode = 0UL;
+            if (::ioctlsocket(sockfd, FIONBIO, &mode) == SOCKET_ERROR) {
+                return SOCKET_ERROR;
+            }
+            return 0;
+        }
+        if (last_error != WSAEWOULDBLOCK) {
+            // Real error
+            mode = 0UL;
+            if (::ioctlsocket(sockfd, FIONBIO, &mode)) {
+                return SOCKET_ERROR;
+            }
+            return SOCKET_ERROR;
+        }
+
+        // Wait until socket is writable or timeout expires
+        fd_set wfds;
+        FD_ZERO(&wfds);
+        FD_SET(sockfd, &wfds);
+
+        rv = ::select(0, nullptr, &wfds, nullptr, const_cast<timeval *>(&tv));
+
+        // Restore blocking mode regardless of select result
+        mode = 0UL;
+        if (::ioctlsocket(sockfd, FIONBIO, &mode) == SOCKET_ERROR) {
+            return SOCKET_ERROR;
+        }
+
+        if (rv == 0) {
+            WSASetLastError(WSAETIMEDOUT);
+            return SOCKET_ERROR;
+        }
+        if (rv == SOCKET_ERROR) {
+            return SOCKET_ERROR;
+        }
+
+        int so_error = 0;
+        int len = sizeof(so_error);
+        if (::getsockopt(sockfd, SOL_SOCKET, SO_ERROR, reinterpret_cast<char *>(&so_error), &len) ==
+            SOCKET_ERROR) {
+            return SOCKET_ERROR;
+        }
+        if (so_error != 0 && so_error != WSAEISCONN) {
+            // connection failed
+            WSASetLastError(so_error);
+            return SOCKET_ERROR;
+        }
+
+        return 0;  // success
+    }
+
+    // try to connect or throw on failure
+    void connect(const std::string &host, int port, int timeout_ms = 0) {
+        if (is_connected()) {
+            close();
+        }
+        struct addrinfo hints {};
+        ZeroMemory(&hints, sizeof(hints));
+
+        hints.ai_family = AF_UNSPEC;      // To work with IPv4, IPv6, and so on
+        hints.ai_socktype = SOCK_STREAM;  // TCP
+        hints.ai_flags = AI_NUMERICSERV;  // port passed as as numeric value
+        hints.ai_protocol = 0;
+
+        timeval tv;
+        tv.tv_sec = timeout_ms / 1000;
+        tv.tv_usec = (timeout_ms % 1000) * 1000;
+
+        auto port_str = std::to_string(port);
+        struct addrinfo *addrinfo_result;
+        auto rv = ::getaddrinfo(host.c_str(), port_str.c_str(), &hints, &addrinfo_result);
+        int last_error = 0;
+        if (rv != 0) {
+            last_error = ::WSAGetLastError();
+            WSACleanup();
+            throw_winsock_error_("getaddrinfo failed", last_error);
+        }
+
+        // Try each address until we successfully connect(2).
+        for (auto *rp = addrinfo_result; rp != nullptr; rp = rp->ai_next) {
+            socket_ = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol);
+            if (socket_ == INVALID_SOCKET) {
+                last_error = ::WSAGetLastError();
+                WSACleanup();
+                continue;
+            }
+            if (connect_socket_with_timeout(socket_, rp->ai_addr, (int)rp->ai_addrlen, tv) == 0) {
+                last_error = 0;
+                break;
+            }
+            last_error = WSAGetLastError();
+            ::closesocket(socket_);
+            socket_ = INVALID_SOCKET;
+        }
+        ::freeaddrinfo(addrinfo_result);
+        if (socket_ == INVALID_SOCKET) {
+            WSACleanup();
+            throw_winsock_error_("connect failed", last_error);
+        }
+        if (timeout_ms > 0) {
+            DWORD tv = static_cast<DWORD>(timeout_ms);
+            ::setsockopt(socket_, SOL_SOCKET, SO_RCVTIMEO, (const char *)&tv, sizeof(tv));
+            ::setsockopt(socket_, SOL_SOCKET, SO_SNDTIMEO, (const char *)&tv, sizeof(tv));
+        }
+
+        // set TCP_NODELAY
+        int enable_flag = 1;
+        ::setsockopt(socket_, IPPROTO_TCP, TCP_NODELAY, reinterpret_cast<char *>(&enable_flag),
+                     sizeof(enable_flag));
+    }
+
+    // Send exactly n_bytes of the given data.
+    // On error close the connection and throw.
+    void send(const char *data, size_t n_bytes) {
+        size_t bytes_sent = 0;
+        while (bytes_sent < n_bytes) {
+            const int send_flags = 0;
+            auto write_result =
+                ::send(socket_, data + bytes_sent, (int)(n_bytes - bytes_sent), send_flags);
+            if (write_result == SOCKET_ERROR) {
+                int last_error = ::WSAGetLastError();
+                close();
+                throw_winsock_error_("send failed", last_error);
+            }
+
+            if (write_result == 0)  // (probably should not happen but in any case..)
+            {
+                break;
+            }
+            bytes_sent += static_cast<size_t>(write_result);
+        }
+    }
+};
+}  // namespace details
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/details/tcp_client.h b/csrc/vnpu_offload/include/spdlog/details/tcp_client.h
new file mode 100644
index 00000000..a6f9b80d
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/tcp_client.h
@@ -0,0 +1,203 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifdef _WIN32
+#error include tcp_client-windows.h instead
+#endif
+
+// tcp client helper
+#include <spdlog/common.h>
+#include <spdlog/details/os.h>
+
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include <string>
+
+namespace spdlog {
+namespace details {
+class tcp_client {
+    int socket_ = -1;
+
+public:
+    bool is_connected() const { return socket_ != -1; }
+
+    void close() {
+        if (is_connected()) {
+            ::close(socket_);
+            socket_ = -1;
+        }
+    }
+
+    int fd() const { return socket_; }
+
+    ~tcp_client() { close(); }
+
+    int connect_socket_with_timeout(int sockfd,
+                                    const struct sockaddr *addr,
+                                    socklen_t addrlen,
+                                    const timeval &tv) {
+        // Blocking connect if timeout is zero
+        if (tv.tv_sec == 0 && tv.tv_usec == 0) {
+            int rv = ::connect(sockfd, addr, addrlen);
+            if (rv < 0 && errno == EISCONN) {
+                // already connected, treat as success
+                return 0;
+            }
+            return rv;
+        }
+
+        // Non-blocking path
+        int orig_flags = ::fcntl(sockfd, F_GETFL, 0);
+        if (orig_flags < 0) {
+            return -1;
+        }
+        if (::fcntl(sockfd, F_SETFL, orig_flags | O_NONBLOCK) < 0) {
+            return -1;
+        }
+
+        int rv = ::connect(sockfd, addr, addrlen);
+        if (rv == 0 || (rv < 0 && errno == EISCONN)) {
+            // immediate connect or already connected
+            ::fcntl(sockfd, F_SETFL, orig_flags);
+            return 0;
+        }
+        if (errno != EINPROGRESS) {
+            ::fcntl(sockfd, F_SETFL, orig_flags);
+            return -1;
+        }
+
+        // wait for writability
+        fd_set wfds;
+        FD_ZERO(&wfds);
+        FD_SET(sockfd, &wfds);
+
+        struct timeval tv_copy = tv;
+        rv = ::select(sockfd + 1, nullptr, &wfds, nullptr, &tv_copy);
+        if (rv <= 0) {
+            // timeout or error
+            ::fcntl(sockfd, F_SETFL, orig_flags);
+            if (rv == 0) errno = ETIMEDOUT;
+            return -1;
+        }
+
+        // check socket error
+        int so_error = 0;
+        socklen_t len = sizeof(so_error);
+        if (::getsockopt(sockfd, SOL_SOCKET, SO_ERROR, &so_error, &len) < 0) {
+            ::fcntl(sockfd, F_SETFL, orig_flags);
+            return -1;
+        }
+        ::fcntl(sockfd, F_SETFL, orig_flags);
+        if (so_error != 0 && so_error != EISCONN) {
+            errno = so_error;
+            return -1;
+        }
+
+        return 0;
+    }
+
+    // try to connect or throw on failure
+    void connect(const std::string &host, int port, int timeout_ms = 0) {
+        close();
+        struct addrinfo hints {};
+        memset(&hints, 0, sizeof(struct addrinfo));
+        hints.ai_family = AF_UNSPEC;      // To work with IPv4, IPv6, and so on
+        hints.ai_socktype = SOCK_STREAM;  // TCP
+        hints.ai_flags = AI_NUMERICSERV;  // port passed as as numeric value
+        hints.ai_protocol = 0;
+
+        struct timeval tv;
+        tv.tv_sec = timeout_ms / 1000;
+        tv.tv_usec = (timeout_ms % 1000) * 1000;
+
+        auto port_str = std::to_string(port);
+        struct addrinfo *addrinfo_result;
+        auto rv = ::getaddrinfo(host.c_str(), port_str.c_str(), &hints, &addrinfo_result);
+        if (rv != 0) {
+            throw_spdlog_ex(fmt_lib::format("::getaddrinfo failed: {}", gai_strerror(rv)));
+        }
+
+        // Try each address until we successfully connect(2).
+        int last_errno = 0;
+        for (auto *rp = addrinfo_result; rp != nullptr; rp = rp->ai_next) {
+#if defined(SOCK_CLOEXEC)
+            const int flags = SOCK_CLOEXEC;
+#else
+            const int flags = 0;
+#endif
+            socket_ = ::socket(rp->ai_family, rp->ai_socktype | flags, rp->ai_protocol);
+            if (socket_ == -1) {
+                last_errno = errno;
+                continue;
+            }
+            ::fcntl(socket_, F_SETFD, FD_CLOEXEC);
+            if (connect_socket_with_timeout(socket_, rp->ai_addr, rp->ai_addrlen, tv) == 0) {
+                last_errno = 0;
+                break;
+            }
+            last_errno = errno;
+            ::close(socket_);
+            socket_ = -1;
+        }
+        ::freeaddrinfo(addrinfo_result);
+        if (socket_ == -1) {
+            throw_spdlog_ex("::connect failed", last_errno);
+        }
+
+        if (timeout_ms > 0) {
+            // Set timeouts for send and recv
+            ::setsockopt(socket_, SOL_SOCKET, SO_RCVTIMEO, (const char *)&tv, sizeof(tv));
+            ::setsockopt(socket_, SOL_SOCKET, SO_SNDTIMEO, (const char *)&tv, sizeof(tv));
+        }
+
+        // set TCP_NODELAY
+        int enable_flag = 1;
+        ::setsockopt(socket_, IPPROTO_TCP, TCP_NODELAY, reinterpret_cast<char *>(&enable_flag),
+                     sizeof(enable_flag));
+
+        // prevent sigpipe on systems where MSG_NOSIGNAL is not available
+#if defined(SO_NOSIGPIPE) && !defined(MSG_NOSIGNAL)
+        ::setsockopt(socket_, SOL_SOCKET, SO_NOSIGPIPE, reinterpret_cast<char *>(&enable_flag),
+                     sizeof(enable_flag));
+#endif
+
+#if !defined(SO_NOSIGPIPE) && !defined(MSG_NOSIGNAL)
+#error "tcp_sink would raise SIGPIPE since neither SO_NOSIGPIPE nor MSG_NOSIGNAL are available"
+#endif
+    }
+
+    // Send exactly n_bytes of the given data.
+    // On error close the connection and throw.
+    void send(const char *data, size_t n_bytes) {
+        size_t bytes_sent = 0;
+        while (bytes_sent < n_bytes) {
+#if defined(MSG_NOSIGNAL)
+            const int send_flags = MSG_NOSIGNAL;
+#else
+            const int send_flags = 0;
+#endif
+            auto write_result =
+                ::send(socket_, data + bytes_sent, n_bytes - bytes_sent, send_flags);
+            if (write_result < 0) {
+                close();
+                throw_spdlog_ex("write(2) failed", errno);
+            }
+
+            if (write_result == 0)  // (probably should not happen but in any case..)
+            {
+                break;
+            }
+            bytes_sent += static_cast<size_t>(write_result);
+        }
+    }
+};
+}  // namespace details
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/details/thread_pool-inl.h b/csrc/vnpu_offload/include/spdlog/details/thread_pool-inl.h
new file mode 100644
index 00000000..3fefc676
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/thread_pool-inl.h
@@ -0,0 +1,125 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifndef SPDLOG_HEADER_ONLY
+#include <spdlog/details/thread_pool.h>
+#endif
+
+#include <cassert>
+#include <spdlog/common.h>
+
+namespace spdlog {
+namespace details {
+
+SPDLOG_INLINE thread_pool::thread_pool(size_t q_max_items,
+                                       size_t threads_n,
+                                       std::function<void()> on_thread_start,
+                                       std::function<void()> on_thread_stop)
+    : q_(q_max_items) {
+    if (threads_n == 0 || threads_n > 1000) {
+        throw_spdlog_ex(
+            "spdlog::thread_pool(): invalid threads_n param (valid "
+            "range is 1-1000)");
+    }
+    for (size_t i = 0; i < threads_n; i++) {
+        threads_.emplace_back([this, on_thread_start, on_thread_stop] {
+            on_thread_start();
+            this->thread_pool::worker_loop_();
+            on_thread_stop();
+        });
+    }
+}
+
+SPDLOG_INLINE thread_pool::thread_pool(size_t q_max_items,
+                                       size_t threads_n,
+                                       std::function<void()> on_thread_start)
+    : thread_pool(q_max_items, threads_n, std::move(on_thread_start), [] {}) {}
+
+SPDLOG_INLINE thread_pool::thread_pool(size_t q_max_items, size_t threads_n)
+    : thread_pool(q_max_items, threads_n, [] {}, [] {}) {}
+
+// message all threads to terminate gracefully join them
+SPDLOG_INLINE thread_pool::~thread_pool() {
+    SPDLOG_TRY {
+        for (size_t i = 0; i < threads_.size(); i++) {
+            post_async_msg_(async_msg(async_msg_type::terminate), async_overflow_policy::block);
+        }
+
+        for (auto &t : threads_) {
+            t.join();
+        }
+    }
+    SPDLOG_CATCH_STD
+}
+
+void SPDLOG_INLINE thread_pool::post_log(async_logger_ptr &&worker_ptr,
+                                         const details::log_msg &msg,
+                                         async_overflow_policy overflow_policy) {
+    async_msg async_m(std::move(worker_ptr), async_msg_type::log, msg);
+    post_async_msg_(std::move(async_m), overflow_policy);
+}
+
+void SPDLOG_INLINE thread_pool::post_flush(async_logger_ptr &&worker_ptr,
+                                           async_overflow_policy overflow_policy) {
+    post_async_msg_(async_msg(std::move(worker_ptr), async_msg_type::flush), overflow_policy);
+}
+
+size_t SPDLOG_INLINE thread_pool::overrun_counter() { return q_.overrun_counter(); }
+
+void SPDLOG_INLINE thread_pool::reset_overrun_counter() { q_.reset_overrun_counter(); }
+
+size_t SPDLOG_INLINE thread_pool::discard_counter() { return q_.discard_counter(); }
+
+void SPDLOG_INLINE thread_pool::reset_discard_counter() { q_.reset_discard_counter(); }
+
+size_t SPDLOG_INLINE thread_pool::queue_size() { return q_.size(); }
+
+void SPDLOG_INLINE thread_pool::post_async_msg_(async_msg &&new_msg,
+                                                async_overflow_policy overflow_policy) {
+    if (overflow_policy == async_overflow_policy::block) {
+        q_.enqueue(std::move(new_msg));
+    } else if (overflow_policy == async_overflow_policy::overrun_oldest) {
+        q_.enqueue_nowait(std::move(new_msg));
+    } else {
+        assert(overflow_policy == async_overflow_policy::discard_new);
+        q_.enqueue_if_have_room(std::move(new_msg));
+    }
+}
+
+void SPDLOG_INLINE thread_pool::worker_loop_() {
+    while (process_next_msg_()) {
+    }
+}
+
+// process next message in the queue
+// returns true if this thread should still be active (while no terminated msg was received)
+bool SPDLOG_INLINE thread_pool::process_next_msg_() {
+    async_msg incoming_async_msg;
+    q_.dequeue(incoming_async_msg);
+
+    switch (incoming_async_msg.msg_type) {
+        case async_msg_type::log: {
+            incoming_async_msg.worker_ptr->backend_sink_it_(incoming_async_msg);
+            return true;
+        }
+        case async_msg_type::flush: {
+            incoming_async_msg.worker_ptr->backend_flush_();
+            return true;
+        }
+
+        case async_msg_type::terminate: {
+            return false;
+        }
+
+        default: {
+            assert(false);
+        }
+    }
+
+    return true;
+}
+
+}  // namespace details
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/details/thread_pool.h b/csrc/vnpu_offload/include/spdlog/details/thread_pool.h
new file mode 100644
index 00000000..b9303cf8
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/thread_pool.h
@@ -0,0 +1,117 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include <spdlog/details/log_msg_buffer.h>
+#include <spdlog/details/mpmc_blocking_q.h>
+#include <spdlog/details/os.h>
+
+#include <chrono>
+#include <functional>
+#include <memory>
+#include <thread>
+#include <vector>
+
+namespace spdlog {
+class async_logger;
+
+namespace details {
+
+using async_logger_ptr = std::shared_ptr<spdlog::async_logger>;
+
+enum class async_msg_type { log, flush, terminate };
+
+// Async msg to move to/from the queue
+// Movable only. should never be copied
+struct async_msg : log_msg_buffer {
+    async_msg_type msg_type{async_msg_type::log};
+    async_logger_ptr worker_ptr;
+
+    async_msg() = default;
+    ~async_msg() = default;
+
+    // should only be moved in or out of the queue..
+    async_msg(const async_msg &) = delete;
+
+// support for vs2013 move
+#if defined(_MSC_VER) && _MSC_VER <= 1800
+    async_msg(async_msg &&other)
+        : log_msg_buffer(std::move(other)),
+          msg_type(other.msg_type),
+          worker_ptr(std::move(other.worker_ptr)) {}
+
+    async_msg &operator=(async_msg &&other) {
+        *static_cast<log_msg_buffer *>(this) = std::move(other);
+        msg_type = other.msg_type;
+        worker_ptr = std::move(other.worker_ptr);
+        return *this;
+    }
+#else  // (_MSC_VER) && _MSC_VER <= 1800
+    async_msg(async_msg &&) = default;
+    async_msg &operator=(async_msg &&) = default;
+#endif
+
+    // construct from log_msg with given type
+    async_msg(async_logger_ptr &&worker, async_msg_type the_type, const details::log_msg &m)
+        : log_msg_buffer{m},
+          msg_type{the_type},
+          worker_ptr{std::move(worker)} {}
+
+    async_msg(async_logger_ptr &&worker, async_msg_type the_type)
+        : log_msg_buffer{},
+          msg_type{the_type},
+          worker_ptr{std::move(worker)} {}
+
+    explicit async_msg(async_msg_type the_type)
+        : async_msg{nullptr, the_type} {}
+};
+
+class SPDLOG_API thread_pool {
+public:
+    using item_type = async_msg;
+    using q_type = details::mpmc_blocking_queue<item_type>;
+
+    thread_pool(size_t q_max_items,
+                size_t threads_n,
+                std::function<void()> on_thread_start,
+                std::function<void()> on_thread_stop);
+    thread_pool(size_t q_max_items, size_t threads_n, std::function<void()> on_thread_start);
+    thread_pool(size_t q_max_items, size_t threads_n);
+
+    // message all threads to terminate gracefully and join them
+    ~thread_pool();
+
+    thread_pool(const thread_pool &) = delete;
+    thread_pool &operator=(thread_pool &&) = delete;
+
+    void post_log(async_logger_ptr &&worker_ptr,
+                  const details::log_msg &msg,
+                  async_overflow_policy overflow_policy);
+    void post_flush(async_logger_ptr &&worker_ptr, async_overflow_policy overflow_policy);
+    size_t overrun_counter();
+    void reset_overrun_counter();
+    size_t discard_counter();
+    void reset_discard_counter();
+    size_t queue_size();
+
+private:
+    q_type q_;
+
+    std::vector<std::thread> threads_;
+
+    void post_async_msg_(async_msg &&new_msg, async_overflow_policy overflow_policy);
+    void worker_loop_();
+
+    // process next message in the queue
+    // return true if this thread should still be active (while no terminate msg
+    // was received)
+    bool process_next_msg_();
+};
+
+}  // namespace details
+}  // namespace spdlog
+
+#ifdef SPDLOG_HEADER_ONLY
+#include "thread_pool-inl.h"
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/details/udp_client-windows.h b/csrc/vnpu_offload/include/spdlog/details/udp_client-windows.h
new file mode 100644
index 00000000..fd60f280
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/udp_client-windows.h
@@ -0,0 +1,98 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+// Helper RAII over winsock udp client socket.
+// Will throw on construction if socket creation failed.
+
+#include <spdlog/common.h>
+#include <spdlog/details/os.h>
+#include <spdlog/details/windows_include.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string>
+#include <winsock2.h>
+#include <ws2tcpip.h>
+
+#if defined(_MSC_VER)
+#pragma comment(lib, "Ws2_32.lib")
+#pragma comment(lib, "Mswsock.lib")
+#pragma comment(lib, "AdvApi32.lib")
+#endif
+
+namespace spdlog {
+namespace details {
+class udp_client {
+    static constexpr int TX_BUFFER_SIZE = 1024 * 10;
+    SOCKET socket_ = INVALID_SOCKET;
+    sockaddr_in addr_ = {};
+
+    static void init_winsock_() {
+        WSADATA wsaData;
+        auto rv = ::WSAStartup(MAKEWORD(2, 2), &wsaData);
+        if (rv != 0) {
+            throw_winsock_error_("WSAStartup failed", ::WSAGetLastError());
+        }
+    }
+
+    static void throw_winsock_error_(const std::string &msg, int last_error) {
+        char buf[512];
+        ::FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL,
+                         last_error, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), buf,
+                         (sizeof(buf) / sizeof(char)), NULL);
+
+        throw_spdlog_ex(fmt_lib::format("udp_sink - {}: {}", msg, buf));
+    }
+
+    void cleanup_() {
+        if (socket_ != INVALID_SOCKET) {
+            ::closesocket(socket_);
+        }
+        socket_ = INVALID_SOCKET;
+        ::WSACleanup();
+    }
+
+public:
+    udp_client(const std::string &host, uint16_t port) {
+        init_winsock_();
+
+        addr_.sin_family = PF_INET;
+        addr_.sin_port = htons(port);
+        addr_.sin_addr.s_addr = INADDR_ANY;
+        if (InetPtonA(PF_INET, host.c_str(), &addr_.sin_addr.s_addr) != 1) {
+            int last_error = ::WSAGetLastError();
+            ::WSACleanup();
+            throw_winsock_error_("error: Invalid address!", last_error);
+        }
+
+        socket_ = ::socket(PF_INET, SOCK_DGRAM, 0);
+        if (socket_ == INVALID_SOCKET) {
+            int last_error = ::WSAGetLastError();
+            ::WSACleanup();
+            throw_winsock_error_("error: Create Socket failed", last_error);
+        }
+
+        int option_value = TX_BUFFER_SIZE;
+        if (::setsockopt(socket_, SOL_SOCKET, SO_SNDBUF,
+                         reinterpret_cast<const char *>(&option_value), sizeof(option_value)) < 0) {
+            int last_error = ::WSAGetLastError();
+            cleanup_();
+            throw_winsock_error_("error: setsockopt(SO_SNDBUF) Failed!", last_error);
+        }
+    }
+
+    ~udp_client() { cleanup_(); }
+
+    SOCKET fd() const { return socket_; }
+
+    void send(const char *data, size_t n_bytes) {
+        socklen_t tolen = sizeof(struct sockaddr);
+        if (::sendto(socket_, data, static_cast<int>(n_bytes), 0, (struct sockaddr *)&addr_,
+                     tolen) == -1) {
+            throw_spdlog_ex("sendto(2) failed", errno);
+        }
+    }
+};
+}  // namespace details
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/details/udp_client.h b/csrc/vnpu_offload/include/spdlog/details/udp_client.h
new file mode 100644
index 00000000..8d9c71fa
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/udp_client.h
@@ -0,0 +1,81 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+// Helper RAII over unix udp client socket.
+// Will throw on construction if the socket creation failed.
+
+#ifdef _WIN32
+#error "include udp_client-windows.h instead"
+#endif
+
+#include <arpa/inet.h>
+#include <cstring>
+#include <netdb.h>
+#include <netinet/in.h>
+#include <netinet/udp.h>
+#include <spdlog/common.h>
+#include <spdlog/details/os.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#include <string>
+
+namespace spdlog {
+namespace details {
+
+class udp_client {
+    static constexpr int TX_BUFFER_SIZE = 1024 * 10;
+    int socket_ = -1;
+    struct sockaddr_in sockAddr_;
+
+    void cleanup_() {
+        if (socket_ != -1) {
+            ::close(socket_);
+            socket_ = -1;
+        }
+    }
+
+public:
+    udp_client(const std::string &host, uint16_t port) {
+        socket_ = ::socket(PF_INET, SOCK_DGRAM, 0);
+        if (socket_ < 0) {
+            throw_spdlog_ex("error: Create Socket Failed!");
+        }
+
+        int option_value = TX_BUFFER_SIZE;
+        if (::setsockopt(socket_, SOL_SOCKET, SO_SNDBUF,
+                         reinterpret_cast<const char *>(&option_value), sizeof(option_value)) < 0) {
+            cleanup_();
+            throw_spdlog_ex("error: setsockopt(SO_SNDBUF) Failed!");
+        }
+
+        sockAddr_.sin_family = AF_INET;
+        sockAddr_.sin_port = htons(port);
+
+        if (::inet_aton(host.c_str(), &sockAddr_.sin_addr) == 0) {
+            cleanup_();
+            throw_spdlog_ex("error: Invalid address!");
+        }
+
+        ::memset(sockAddr_.sin_zero, 0x00, sizeof(sockAddr_.sin_zero));
+    }
+
+    ~udp_client() { cleanup_(); }
+
+    int fd() const { return socket_; }
+
+    // Send exactly n_bytes of the given data.
+    // On error close the connection and throw.
+    void send(const char *data, size_t n_bytes) {
+        ssize_t toslen = 0;
+        socklen_t tolen = sizeof(struct sockaddr);
+        if ((toslen = ::sendto(socket_, data, n_bytes, 0, (struct sockaddr *)&sockAddr_, tolen)) ==
+            -1) {
+            throw_spdlog_ex("sendto(2) failed", errno);
+        }
+    }
+};
+}  // namespace details
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/details/windows_include.h b/csrc/vnpu_offload/include/spdlog/details/windows_include.h
new file mode 100644
index 00000000..10e04fc4
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/details/windows_include.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#ifndef NOMINMAX
+#define NOMINMAX  // prevent windows redefining min/max
+#endif
+
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#endif
+
+#include <windows.h>
diff --git a/csrc/vnpu_offload/include/spdlog/fmt/bin_to_hex.h b/csrc/vnpu_offload/include/spdlog/fmt/bin_to_hex.h
new file mode 100644
index 00000000..57ef3020
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/fmt/bin_to_hex.h
@@ -0,0 +1,224 @@
+//
+// Copyright(c) 2015 Gabi Melman.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+//
+
+#pragma once
+
+#include <cctype>
+#include <spdlog/common.h>
+
+#if defined(__has_include)
+#if __has_include(<version>)
+#include <version>
+#endif
+#endif
+
+#if __cpp_lib_span >= 202002L
+#include <span>
+#endif
+
+//
+// Support for logging binary data as hex
+// format flags, any combination of the following:
+// {:X} - print in uppercase.
+// {:s} - don't separate each byte with space.
+// {:p} - don't print the position on each line start.
+// {:n} - don't split the output to lines.
+// {:a} - show ASCII if :n is not set
+
+//
+// Examples:
+//
+// std::vector<char> v(200, 0x0b);
+// logger->info("Some buffer {}", spdlog::to_hex(v));
+// char buf[128];
+// logger->info("Some buffer {:X}", spdlog::to_hex(std::begin(buf), std::end(buf)));
+// logger->info("Some buffer {:X}", spdlog::to_hex(std::begin(buf), std::end(buf), 16));
+
+namespace spdlog {
+namespace details {
+
+template <typename It>
+class dump_info {
+public:
+    dump_info(It range_begin, It range_end, size_t size_per_line)
+        : begin_(range_begin),
+          end_(range_end),
+          size_per_line_(size_per_line) {}
+
+    // do not use begin() and end() to avoid collision with fmt/ranges
+    It get_begin() const { return begin_; }
+    It get_end() const { return end_; }
+    size_t size_per_line() const { return size_per_line_; }
+
+private:
+    It begin_, end_;
+    size_t size_per_line_;
+};
+}  // namespace details
+
+// create a dump_info that wraps the given container
+template <typename Container>
+inline details::dump_info<typename Container::const_iterator> to_hex(const Container &container,
+                                                                     size_t size_per_line = 32) {
+    static_assert(sizeof(typename Container::value_type) == 1,
+                  "sizeof(Container::value_type) != 1");
+    using Iter = typename Container::const_iterator;
+    return details::dump_info<Iter>(std::begin(container), std::end(container), size_per_line);
+}
+
+#if __cpp_lib_span >= 202002L
+
+template <typename Value, size_t Extent>
+inline details::dump_info<typename std::span<Value, Extent>::iterator> to_hex(
+    const std::span<Value, Extent> &container, size_t size_per_line = 32) {
+    using Container = std::span<Value, Extent>;
+    static_assert(sizeof(typename Container::value_type) == 1,
+                  "sizeof(Container::value_type) != 1");
+    using Iter = typename Container::iterator;
+    return details::dump_info<Iter>(std::begin(container), std::end(container), size_per_line);
+}
+
+#endif
+
+// create dump_info from ranges
+template <typename It>
+inline details::dump_info<It> to_hex(const It range_begin,
+                                     const It range_end,
+                                     size_t size_per_line = 32) {
+    return details::dump_info<It>(range_begin, range_end, size_per_line);
+}
+
+}  // namespace spdlog
+
+namespace
+#ifdef SPDLOG_USE_STD_FORMAT
+    std
+#else
+    fmt
+#endif
+{
+
+template <typename T>
+struct formatter<spdlog::details::dump_info<T>, char> {
+    char delimiter = ' ';
+    bool put_newlines = true;
+    bool put_delimiters = true;
+    bool use_uppercase = false;
+    bool put_positions = true;  // position on start of each line
+    bool show_ascii = false;
+
+    // parse the format string flags
+    template <typename ParseContext>
+    SPDLOG_CONSTEXPR_FUNC auto parse(ParseContext &ctx) -> decltype(ctx.begin()) {
+        auto it = ctx.begin();
+        while (it != ctx.end() && *it != '}') {
+            switch (*it) {
+                case 'X':
+                    use_uppercase = true;
+                    break;
+                case 's':
+                    put_delimiters = false;
+                    break;
+                case 'p':
+                    put_positions = false;
+                    break;
+                case 'n':
+                    put_newlines = false;
+                    show_ascii = false;
+                    break;
+                case 'a':
+                    if (put_newlines) {
+                        show_ascii = true;
+                    }
+                    break;
+            }
+
+            ++it;
+        }
+        return it;
+    }
+
+    // format the given bytes range as hex
+    template <typename FormatContext, typename Container>
+    auto format(const spdlog::details::dump_info<Container> &the_range,
+                FormatContext &ctx) const -> decltype(ctx.out()) {
+        SPDLOG_CONSTEXPR const char *hex_upper = "0123456789ABCDEF";
+        SPDLOG_CONSTEXPR const char *hex_lower = "0123456789abcdef";
+        const char *hex_chars = use_uppercase ? hex_upper : hex_lower;
+
+#if !defined(SPDLOG_USE_STD_FORMAT) && FMT_VERSION < 60000
+        auto inserter = ctx.begin();
+#else
+        auto inserter = ctx.out();
+#endif
+
+        int size_per_line = static_cast<int>(the_range.size_per_line());
+        auto start_of_line = the_range.get_begin();
+        for (auto i = the_range.get_begin(); i != the_range.get_end(); i++) {
+            auto ch = static_cast<unsigned char>(*i);
+
+            if (put_newlines &&
+                (i == the_range.get_begin() || i - start_of_line >= size_per_line)) {
+                if (show_ascii && i != the_range.get_begin()) {
+                    *inserter++ = delimiter;
+                    *inserter++ = delimiter;
+                    for (auto j = start_of_line; j < i; j++) {
+                        auto pc = static_cast<unsigned char>(*j);
+                        *inserter++ = std::isprint(pc) ? static_cast<char>(*j) : '.';
+                    }
+                }
+
+                put_newline(inserter, static_cast<size_t>(i - the_range.get_begin()));
+
+                // put first byte without delimiter in front of it
+                *inserter++ = hex_chars[(ch >> 4) & 0x0f];
+                *inserter++ = hex_chars[ch & 0x0f];
+                start_of_line = i;
+                continue;
+            }
+
+            if (put_delimiters && i != the_range.get_begin()) {
+                *inserter++ = delimiter;
+            }
+
+            *inserter++ = hex_chars[(ch >> 4) & 0x0f];
+            *inserter++ = hex_chars[ch & 0x0f];
+        }
+        if (show_ascii)  // add ascii to last line
+        {
+            if (the_range.get_end() - the_range.get_begin() > size_per_line) {
+                auto blank_num = size_per_line - (the_range.get_end() - start_of_line);
+                while (blank_num-- > 0) {
+                    *inserter++ = delimiter;
+                    *inserter++ = delimiter;
+                    if (put_delimiters) {
+                        *inserter++ = delimiter;
+                    }
+                }
+            }
+            *inserter++ = delimiter;
+            *inserter++ = delimiter;
+            for (auto j = start_of_line; j != the_range.get_end(); j++) {
+                auto pc = static_cast<unsigned char>(*j);
+                *inserter++ = std::isprint(pc) ? static_cast<char>(*j) : '.';
+            }
+        }
+        return inserter;
+    }
+
+    // put newline(and position header)
+    template <typename It>
+    void put_newline(It inserter, std::size_t pos) const {
+#ifdef _WIN32
+        *inserter++ = '\r';
+#endif
+        *inserter++ = '\n';
+
+        if (put_positions) {
+            spdlog::fmt_lib::format_to(inserter, SPDLOG_FMT_STRING("{:04X}: "), pos);
+        }
+    }
+};
+}  // namespace std
diff --git a/csrc/vnpu_offload/include/spdlog/fmt/bundled/args.h b/csrc/vnpu_offload/include/spdlog/fmt/bundled/args.h
new file mode 100644
index 00000000..5e5f40f9
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/fmt/bundled/args.h
@@ -0,0 +1,220 @@
+// Formatting library for C++ - dynamic argument lists
+//
+// Copyright (c) 2012 - present, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_ARGS_H_
+#define FMT_ARGS_H_
+
+#ifndef FMT_MODULE
+#  include <functional>  // std::reference_wrapper
+#  include <memory>      // std::unique_ptr
+#  include <vector>
+#endif
+
+#include "format.h"  // std_string_view
+
+FMT_BEGIN_NAMESPACE
+namespace detail {
+
+template <typename T> struct is_reference_wrapper : std::false_type {};
+template <typename T>
+struct is_reference_wrapper<std::reference_wrapper<T>> : std::true_type {};
+
+template <typename T> auto unwrap(const T& v) -> const T& { return v; }
+template <typename T>
+auto unwrap(const std::reference_wrapper<T>& v) -> const T& {
+  return static_cast<const T&>(v);
+}
+
+// node is defined outside dynamic_arg_list to workaround a C2504 bug in MSVC
+// 2022 (v17.10.0).
+//
+// Workaround for clang's -Wweak-vtables. Unlike for regular classes, for
+// templates it doesn't complain about inability to deduce single translation
+// unit for placing vtable. So node is made a fake template.
+template <typename = void> struct node {
+  virtual ~node() = default;
+  std::unique_ptr<node<>> next;
+};
+
+class dynamic_arg_list {
+  template <typename T> struct typed_node : node<> {
+    T value;
+
+    template <typename Arg>
+    FMT_CONSTEXPR typed_node(const Arg& arg) : value(arg) {}
+
+    template <typename Char>
+    FMT_CONSTEXPR typed_node(const basic_string_view<Char>& arg)
+        : value(arg.data(), arg.size()) {}
+  };
+
+  std::unique_ptr<node<>> head_;
+
+ public:
+  template <typename T, typename Arg> auto push(const Arg& arg) -> const T& {
+    auto new_node = std::unique_ptr<typed_node<T>>(new typed_node<T>(arg));
+    auto& value = new_node->value;
+    new_node->next = std::move(head_);
+    head_ = std::move(new_node);
+    return value;
+  }
+};
+}  // namespace detail
+
+/**
+ * A dynamic list of formatting arguments with storage.
+ *
+ * It can be implicitly converted into `fmt::basic_format_args` for passing
+ * into type-erased formatting functions such as `fmt::vformat`.
+ */
+FMT_EXPORT template <typename Context> class dynamic_format_arg_store {
+ private:
+  using char_type = typename Context::char_type;
+
+  template <typename T> struct need_copy {
+    static constexpr detail::type mapped_type =
+        detail::mapped_type_constant<T, char_type>::value;
+
+    enum {
+      value = !(detail::is_reference_wrapper<T>::value ||
+                std::is_same<T, basic_string_view<char_type>>::value ||
+                std::is_same<T, detail::std_string_view<char_type>>::value ||
+                (mapped_type != detail::type::cstring_type &&
+                 mapped_type != detail::type::string_type &&
+                 mapped_type != detail::type::custom_type))
+    };
+  };
+
+  template <typename T>
+  using stored_t = conditional_t<
+      std::is_convertible<T, std::basic_string<char_type>>::value &&
+          !detail::is_reference_wrapper<T>::value,
+      std::basic_string<char_type>, T>;
+
+  // Storage of basic_format_arg must be contiguous.
+  std::vector<basic_format_arg<Context>> data_;
+  std::vector<detail::named_arg_info<char_type>> named_info_;
+
+  // Storage of arguments not fitting into basic_format_arg must grow
+  // without relocation because items in data_ refer to it.
+  detail::dynamic_arg_list dynamic_args_;
+
+  friend class basic_format_args<Context>;
+
+  auto data() const -> const basic_format_arg<Context>* {
+    return named_info_.empty() ? data_.data() : data_.data() + 1;
+  }
+
+  template <typename T> void emplace_arg(const T& arg) {
+    data_.emplace_back(arg);
+  }
+
+  template <typename T>
+  void emplace_arg(const detail::named_arg<char_type, T>& arg) {
+    if (named_info_.empty())
+      data_.insert(data_.begin(), basic_format_arg<Context>(nullptr, 0));
+    data_.emplace_back(detail::unwrap(arg.value));
+    auto pop_one = [](std::vector<basic_format_arg<Context>>* data) {
+      data->pop_back();
+    };
+    std::unique_ptr<std::vector<basic_format_arg<Context>>, decltype(pop_one)>
+        guard{&data_, pop_one};
+    named_info_.push_back({arg.name, static_cast<int>(data_.size() - 2u)});
+    data_[0] = {named_info_.data(), named_info_.size()};
+    guard.release();
+  }
+
+ public:
+  constexpr dynamic_format_arg_store() = default;
+
+  operator basic_format_args<Context>() const {
+    return basic_format_args<Context>(data(), static_cast<int>(data_.size()),
+                                      !named_info_.empty());
+  }
+
+  /**
+   * Adds an argument into the dynamic store for later passing to a formatting
+   * function.
+   *
+   * Note that custom types and string types (but not string views) are copied
+   * into the store dynamically allocating memory if necessary.
+   *
+   * **Example**:
+   *
+   *     fmt::dynamic_format_arg_store<fmt::format_context> store;
+   *     store.push_back(42);
+   *     store.push_back("abc");
+   *     store.push_back(1.5f);
+   *     std::string result = fmt::vformat("{} and {} and {}", store);
+   */
+  template <typename T> void push_back(const T& arg) {
+    if (detail::const_check(need_copy<T>::value))
+      emplace_arg(dynamic_args_.push<stored_t<T>>(arg));
+    else
+      emplace_arg(detail::unwrap(arg));
+  }
+
+  /**
+   * Adds a reference to the argument into the dynamic store for later passing
+   * to a formatting function.
+   *
+   * **Example**:
+   *
+   *     fmt::dynamic_format_arg_store<fmt::format_context> store;
+   *     char band[] = "Rolling Stones";
+   *     store.push_back(std::cref(band));
+   *     band[9] = 'c'; // Changing str affects the output.
+   *     std::string result = fmt::vformat("{}", store);
+   *     // result == "Rolling Scones"
+   */
+  template <typename T> void push_back(std::reference_wrapper<T> arg) {
+    static_assert(
+        need_copy<T>::value,
+        "objects of built-in types and string views are always copied");
+    emplace_arg(arg.get());
+  }
+
+  /**
+   * Adds named argument into the dynamic store for later passing to a
+   * formatting function. `std::reference_wrapper` is supported to avoid
+   * copying of the argument. The name is always copied into the store.
+   */
+  template <typename T>
+  void push_back(const detail::named_arg<char_type, T>& arg) {
+    const char_type* arg_name =
+        dynamic_args_.push<std::basic_string<char_type>>(arg.name).c_str();
+    if (detail::const_check(need_copy<T>::value)) {
+      emplace_arg(
+          fmt::arg(arg_name, dynamic_args_.push<stored_t<T>>(arg.value)));
+    } else {
+      emplace_arg(fmt::arg(arg_name, arg.value));
+    }
+  }
+
+  /// Erase all elements from the store.
+  void clear() {
+    data_.clear();
+    named_info_.clear();
+    dynamic_args_ = {};
+  }
+
+  /// Reserves space to store at least `new_cap` arguments including
+  /// `new_cap_named` named arguments.
+  void reserve(size_t new_cap, size_t new_cap_named) {
+    FMT_ASSERT(new_cap >= new_cap_named,
+               "set of arguments includes set of named arguments");
+    data_.reserve(new_cap);
+    named_info_.reserve(new_cap_named);
+  }
+
+  /// Returns the number of elements in the store.
+  auto size() const noexcept -> size_t { return data_.size(); }
+};
+
+FMT_END_NAMESPACE
+
+#endif  // FMT_ARGS_H_
diff --git a/csrc/vnpu_offload/include/spdlog/fmt/bundled/base.h b/csrc/vnpu_offload/include/spdlog/fmt/bundled/base.h
new file mode 100644
index 00000000..620456b4
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/fmt/bundled/base.h
@@ -0,0 +1,3010 @@
+// Formatting library for C++ - the base API for char/UTF-8
+//
+// Copyright (c) 2012 - present, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_BASE_H_
+#define FMT_BASE_H_
+
+#if defined(FMT_IMPORT_STD) && !defined(FMT_MODULE)
+#  define FMT_MODULE
+#endif
+
+#ifndef FMT_MODULE
+#  include <limits.h>  // CHAR_BIT
+#  include <stdio.h>   // FILE
+#  include <string.h>  // memcmp
+
+#  include <type_traits>  // std::enable_if
+#endif
+
+// The fmt library version in the form major * 10000 + minor * 100 + patch.
+#define FMT_VERSION 120100
+
+// Detect compiler versions.
+#if defined(__clang__) && !defined(__ibmxl__)
+#  define FMT_CLANG_VERSION (__clang_major__ * 100 + __clang_minor__)
+#else
+#  define FMT_CLANG_VERSION 0
+#endif
+#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
+#  define FMT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#else
+#  define FMT_GCC_VERSION 0
+#endif
+#if defined(__ICL)
+#  define FMT_ICC_VERSION __ICL
+#elif defined(__INTEL_COMPILER)
+#  define FMT_ICC_VERSION __INTEL_COMPILER
+#else
+#  define FMT_ICC_VERSION 0
+#endif
+#if defined(_MSC_VER)
+#  define FMT_MSC_VERSION _MSC_VER
+#else
+#  define FMT_MSC_VERSION 0
+#endif
+
+// Detect standard library versions.
+#ifdef _GLIBCXX_RELEASE
+#  define FMT_GLIBCXX_RELEASE _GLIBCXX_RELEASE
+#else
+#  define FMT_GLIBCXX_RELEASE 0
+#endif
+#ifdef _LIBCPP_VERSION
+#  define FMT_LIBCPP_VERSION _LIBCPP_VERSION
+#else
+#  define FMT_LIBCPP_VERSION 0
+#endif
+
+#ifdef _MSVC_LANG
+#  define FMT_CPLUSPLUS _MSVC_LANG
+#else
+#  define FMT_CPLUSPLUS __cplusplus
+#endif
+
+// Detect __has_*.
+#ifdef __has_feature
+#  define FMT_HAS_FEATURE(x) __has_feature(x)
+#else
+#  define FMT_HAS_FEATURE(x) 0
+#endif
+#ifdef __has_include
+#  define FMT_HAS_INCLUDE(x) __has_include(x)
+#else
+#  define FMT_HAS_INCLUDE(x) 0
+#endif
+#ifdef __has_builtin
+#  define FMT_HAS_BUILTIN(x) __has_builtin(x)
+#else
+#  define FMT_HAS_BUILTIN(x) 0
+#endif
+#ifdef __has_cpp_attribute
+#  define FMT_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
+#else
+#  define FMT_HAS_CPP_ATTRIBUTE(x) 0
+#endif
+
+#define FMT_HAS_CPP14_ATTRIBUTE(attribute) \
+  (FMT_CPLUSPLUS >= 201402L && FMT_HAS_CPP_ATTRIBUTE(attribute))
+
+#define FMT_HAS_CPP17_ATTRIBUTE(attribute) \
+  (FMT_CPLUSPLUS >= 201703L && FMT_HAS_CPP_ATTRIBUTE(attribute))
+
+// Detect C++14 relaxed constexpr.
+#ifdef FMT_USE_CONSTEXPR
+// Use the provided definition.
+#elif FMT_GCC_VERSION >= 702 && FMT_CPLUSPLUS >= 201402L
+// GCC only allows constexpr member functions in non-literal types since 7.2:
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66297.
+#  define FMT_USE_CONSTEXPR 1
+#elif FMT_ICC_VERSION
+#  define FMT_USE_CONSTEXPR 0  // https://github.com/fmtlib/fmt/issues/1628
+#elif FMT_HAS_FEATURE(cxx_relaxed_constexpr) || FMT_MSC_VERSION >= 1912
+#  define FMT_USE_CONSTEXPR 1
+#else
+#  define FMT_USE_CONSTEXPR 0
+#endif
+#if FMT_USE_CONSTEXPR
+#  define FMT_CONSTEXPR constexpr
+#else
+#  define FMT_CONSTEXPR
+#endif
+
+// Detect consteval, C++20 constexpr extensions and std::is_constant_evaluated.
+#ifdef FMT_USE_CONSTEVAL
+// Use the provided definition.
+#elif !defined(__cpp_lib_is_constant_evaluated)
+#  define FMT_USE_CONSTEVAL 0
+#elif FMT_CPLUSPLUS < 201709L
+#  define FMT_USE_CONSTEVAL 0
+#elif FMT_GLIBCXX_RELEASE && FMT_GLIBCXX_RELEASE < 10
+#  define FMT_USE_CONSTEVAL 0
+#elif FMT_LIBCPP_VERSION && FMT_LIBCPP_VERSION < 10000
+#  define FMT_USE_CONSTEVAL 0
+#elif defined(__apple_build_version__) && __apple_build_version__ < 14000029L
+#  define FMT_USE_CONSTEVAL 0  // consteval is broken in Apple clang < 14.
+#elif FMT_MSC_VERSION && FMT_MSC_VERSION < 1929
+#  define FMT_USE_CONSTEVAL 0  // consteval is broken in MSVC VS2019 < 16.10.
+#elif defined(__cpp_consteval)
+#  define FMT_USE_CONSTEVAL 1
+#elif FMT_GCC_VERSION >= 1002 || FMT_CLANG_VERSION >= 1101
+#  define FMT_USE_CONSTEVAL 1
+#else
+#  define FMT_USE_CONSTEVAL 0
+#endif
+#if FMT_USE_CONSTEVAL
+#  define FMT_CONSTEVAL consteval
+#  define FMT_CONSTEXPR20 constexpr
+#else
+#  define FMT_CONSTEVAL
+#  define FMT_CONSTEXPR20
+#endif
+
+// Check if exceptions are disabled.
+#ifdef FMT_USE_EXCEPTIONS
+// Use the provided definition.
+#elif defined(__GNUC__) && !defined(__EXCEPTIONS)
+#  define FMT_USE_EXCEPTIONS 0
+#elif defined(__clang__) && !defined(__cpp_exceptions)
+#  define FMT_USE_EXCEPTIONS 0
+#elif FMT_MSC_VERSION && !_HAS_EXCEPTIONS
+#  define FMT_USE_EXCEPTIONS 0
+#else
+#  define FMT_USE_EXCEPTIONS 1
+#endif
+#if FMT_USE_EXCEPTIONS
+#  define FMT_TRY try
+#  define FMT_CATCH(x) catch (x)
+#else
+#  define FMT_TRY if (true)
+#  define FMT_CATCH(x) if (false)
+#endif
+
+#ifdef FMT_NO_UNIQUE_ADDRESS
+// Use the provided definition.
+#elif FMT_CPLUSPLUS < 202002L
+// Not supported.
+#elif FMT_HAS_CPP_ATTRIBUTE(no_unique_address)
+#  define FMT_NO_UNIQUE_ADDRESS [[no_unique_address]]
+// VS2019 v16.10 and later except clang-cl (https://reviews.llvm.org/D110485).
+#elif FMT_MSC_VERSION >= 1929 && !FMT_CLANG_VERSION
+#  define FMT_NO_UNIQUE_ADDRESS [[msvc::no_unique_address]]
+#endif
+#ifndef FMT_NO_UNIQUE_ADDRESS
+#  define FMT_NO_UNIQUE_ADDRESS
+#endif
+
+#if FMT_HAS_CPP17_ATTRIBUTE(fallthrough)
+#  define FMT_FALLTHROUGH [[fallthrough]]
+#elif defined(__clang__)
+#  define FMT_FALLTHROUGH [[clang::fallthrough]]
+#elif FMT_GCC_VERSION >= 700 && \
+    (!defined(__EDG_VERSION__) || __EDG_VERSION__ >= 520)
+#  define FMT_FALLTHROUGH [[gnu::fallthrough]]
+#else
+#  define FMT_FALLTHROUGH
+#endif
+
+// Disable [[noreturn]] on MSVC/NVCC because of bogus unreachable code warnings.
+#if FMT_HAS_CPP_ATTRIBUTE(noreturn) && !FMT_MSC_VERSION && !defined(__NVCC__)
+#  define FMT_NORETURN [[noreturn]]
+#else
+#  define FMT_NORETURN
+#endif
+
+#ifdef FMT_NODISCARD
+// Use the provided definition.
+#elif FMT_HAS_CPP17_ATTRIBUTE(nodiscard)
+#  define FMT_NODISCARD [[nodiscard]]
+#else
+#  define FMT_NODISCARD
+#endif
+
+#if FMT_GCC_VERSION || FMT_CLANG_VERSION
+#  define FMT_VISIBILITY(value) __attribute__((visibility(value)))
+#else
+#  define FMT_VISIBILITY(value)
+#endif
+
+// Detect pragmas.
+#define FMT_PRAGMA_IMPL(x) _Pragma(#x)
+#if FMT_GCC_VERSION >= 504 && !defined(__NVCOMPILER)
+// Workaround a _Pragma bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59884
+// and an nvhpc warning: https://github.com/fmtlib/fmt/pull/2582.
+#  define FMT_PRAGMA_GCC(x) FMT_PRAGMA_IMPL(GCC x)
+#else
+#  define FMT_PRAGMA_GCC(x)
+#endif
+#if FMT_CLANG_VERSION
+#  define FMT_PRAGMA_CLANG(x) FMT_PRAGMA_IMPL(clang x)
+#else
+#  define FMT_PRAGMA_CLANG(x)
+#endif
+#if FMT_MSC_VERSION
+#  define FMT_MSC_WARNING(...) __pragma(warning(__VA_ARGS__))
+#else
+#  define FMT_MSC_WARNING(...)
+#endif
+
+// Enable minimal optimizations for more compact code in debug mode.
+FMT_PRAGMA_GCC(push_options)
+#if !defined(__OPTIMIZE__) && !defined(__CUDACC__) && !defined(FMT_MODULE)
+FMT_PRAGMA_GCC(optimize("Og"))
+#  define FMT_GCC_OPTIMIZED
+#endif
+FMT_PRAGMA_CLANG(diagnostic push)
+FMT_PRAGMA_GCC(diagnostic push)
+
+#ifdef FMT_ALWAYS_INLINE
+// Use the provided definition.
+#elif FMT_GCC_VERSION || FMT_CLANG_VERSION
+#  define FMT_ALWAYS_INLINE inline __attribute__((always_inline))
+#else
+#  define FMT_ALWAYS_INLINE inline
+#endif
+// A version of FMT_ALWAYS_INLINE to prevent code bloat in debug mode.
+#if defined(NDEBUG) || defined(FMT_GCC_OPTIMIZED)
+#  define FMT_INLINE FMT_ALWAYS_INLINE
+#else
+#  define FMT_INLINE inline
+#endif
+
+#ifndef FMT_BEGIN_NAMESPACE
+#  define FMT_BEGIN_NAMESPACE \
+    namespace fmt {           \
+    inline namespace v12 {
+#  define FMT_END_NAMESPACE \
+    }                       \
+    }
+#endif
+
+#ifndef FMT_EXPORT
+#  define FMT_EXPORT
+#  define FMT_BEGIN_EXPORT
+#  define FMT_END_EXPORT
+#endif
+
+#ifdef _WIN32
+#  define FMT_WIN32 1
+#else
+#  define FMT_WIN32 0
+#endif
+
+#if !defined(FMT_HEADER_ONLY) && FMT_WIN32
+#  if defined(FMT_LIB_EXPORT)
+#    define FMT_API __declspec(dllexport)
+#  elif defined(FMT_SHARED)
+#    define FMT_API __declspec(dllimport)
+#  endif
+#elif defined(FMT_LIB_EXPORT) || defined(FMT_SHARED)
+#  define FMT_API FMT_VISIBILITY("default")
+#endif
+#ifndef FMT_API
+#  define FMT_API
+#endif
+
+#ifndef FMT_OPTIMIZE_SIZE
+#  define FMT_OPTIMIZE_SIZE 0
+#endif
+
+// FMT_BUILTIN_TYPE=0 may result in smaller library size at the cost of higher
+// per-call binary size by passing built-in types through the extension API.
+#ifndef FMT_BUILTIN_TYPES
+#  define FMT_BUILTIN_TYPES 1
+#endif
+
+#define FMT_APPLY_VARIADIC(expr) \
+  using unused = int[];          \
+  (void)unused { 0, (expr, 0)... }
+
+FMT_BEGIN_NAMESPACE
+
+// Implementations of enable_if_t and other metafunctions for older systems.
+template <bool B, typename T = void>
+using enable_if_t = typename std::enable_if<B, T>::type;
+template <bool B, typename T, typename F>
+using conditional_t = typename std::conditional<B, T, F>::type;
+template <bool B> using bool_constant = std::integral_constant<bool, B>;
+template <typename T>
+using remove_reference_t = typename std::remove_reference<T>::type;
+template <typename T>
+using remove_const_t = typename std::remove_const<T>::type;
+template <typename T>
+using remove_cvref_t = typename std::remove_cv<remove_reference_t<T>>::type;
+template <typename T>
+using make_unsigned_t = typename std::make_unsigned<T>::type;
+template <typename T>
+using underlying_t = typename std::underlying_type<T>::type;
+template <typename T> using decay_t = typename std::decay<T>::type;
+using nullptr_t = decltype(nullptr);
+
+#if (FMT_GCC_VERSION && FMT_GCC_VERSION < 500) || FMT_MSC_VERSION
+// A workaround for gcc 4.9 & MSVC v141 to make void_t work in a SFINAE context.
+template <typename...> struct void_t_impl {
+  using type = void;
+};
+template <typename... T> using void_t = typename void_t_impl<T...>::type;
+#else
+template <typename...> using void_t = void;
+#endif
+
+struct monostate {
+  constexpr monostate() {}
+};
+
+// An enable_if helper to be used in template parameters which results in much
+// shorter symbols: https://godbolt.org/z/sWw4vP. Extra parentheses are needed
+// to workaround a bug in MSVC 2019 (see #1140 and #1186).
+#ifdef FMT_DOC
+#  define FMT_ENABLE_IF(...)
+#else
+#  define FMT_ENABLE_IF(...) fmt::enable_if_t<(__VA_ARGS__), int> = 0
+#endif
+
+template <typename T> constexpr auto min_of(T a, T b) -> T {
+  return a < b ? a : b;
+}
+template <typename T> constexpr auto max_of(T a, T b) -> T {
+  return a > b ? a : b;
+}
+
+FMT_NORETURN FMT_API void assert_fail(const char* file, int line,
+                                      const char* message);
+
+namespace detail {
+// Suppresses "unused variable" warnings with the method described in
+// https://herbsutter.com/2009/10/18/mailbag-shutting-up-compiler-warnings/.
+// (void)var does not work on many Intel compilers.
+template <typename... T> FMT_CONSTEXPR void ignore_unused(const T&...) {}
+
+constexpr auto is_constant_evaluated(bool default_value = false) noexcept
+    -> bool {
+// Workaround for incompatibility between clang 14 and libstdc++ consteval-based
+// std::is_constant_evaluated: https://github.com/fmtlib/fmt/issues/3247.
+#if FMT_CPLUSPLUS >= 202002L && FMT_GLIBCXX_RELEASE >= 12 && \
+    (FMT_CLANG_VERSION >= 1400 && FMT_CLANG_VERSION < 1500)
+  ignore_unused(default_value);
+  return __builtin_is_constant_evaluated();
+#elif defined(__cpp_lib_is_constant_evaluated)
+  ignore_unused(default_value);
+  return std::is_constant_evaluated();
+#else
+  return default_value;
+#endif
+}
+
+// Suppresses "conditional expression is constant" warnings.
+template <typename T> FMT_ALWAYS_INLINE constexpr auto const_check(T val) -> T {
+  return val;
+}
+
+FMT_NORETURN FMT_API void assert_fail(const char* file, int line,
+                                      const char* message);
+
+#if defined(FMT_ASSERT)
+// Use the provided definition.
+#elif defined(NDEBUG)
+// FMT_ASSERT is not empty to avoid -Wempty-body.
+#  define FMT_ASSERT(condition, message) \
+    fmt::detail::ignore_unused((condition), (message))
+#else
+#  define FMT_ASSERT(condition, message)                                    \
+    ((condition) /* void() fails with -Winvalid-constexpr on clang 4.0.1 */ \
+         ? (void)0                                                          \
+         : ::fmt::assert_fail(__FILE__, __LINE__, (message)))
+#endif
+
+#ifdef FMT_USE_INT128
+// Use the provided definition.
+#elif defined(__SIZEOF_INT128__) && !defined(__NVCC__) && \
+    !(FMT_CLANG_VERSION && FMT_MSC_VERSION)
+#  define FMT_USE_INT128 1
+using int128_opt = __int128_t;  // An optional native 128-bit integer.
+using uint128_opt = __uint128_t;
+inline auto map(int128_opt x) -> int128_opt { return x; }
+inline auto map(uint128_opt x) -> uint128_opt { return x; }
+#else
+#  define FMT_USE_INT128 0
+#endif
+#if !FMT_USE_INT128
+enum class int128_opt {};
+enum class uint128_opt {};
+// Reduce template instantiations.
+inline auto map(int128_opt) -> monostate { return {}; }
+inline auto map(uint128_opt) -> monostate { return {}; }
+#endif
+
+#ifdef FMT_USE_BITINT
+// Use the provided definition.
+#elif FMT_CLANG_VERSION >= 1500 && !defined(__CUDACC__)
+#  define FMT_USE_BITINT 1
+#else
+#  define FMT_USE_BITINT 0
+#endif
+
+#if FMT_USE_BITINT
+FMT_PRAGMA_CLANG(diagnostic ignored "-Wbit-int-extension")
+template <int N> using bitint = _BitInt(N);
+template <int N> using ubitint = unsigned _BitInt(N);
+#else
+template <int N> struct bitint {};
+template <int N> struct ubitint {};
+#endif  // FMT_USE_BITINT
+
+// Casts a nonnegative integer to unsigned.
+template <typename Int>
+FMT_CONSTEXPR auto to_unsigned(Int value) -> make_unsigned_t<Int> {
+  FMT_ASSERT(std::is_unsigned<Int>::value || value >= 0, "negative value");
+  return static_cast<make_unsigned_t<Int>>(value);
+}
+
+template <typename Char>
+using unsigned_char = conditional_t<sizeof(Char) == 1, unsigned char, unsigned>;
+
+// A heuristic to detect std::string and std::[experimental::]string_view.
+// It is mainly used to avoid dependency on <[experimental/]string_view>.
+template <typename T, typename Enable = void>
+struct is_std_string_like : std::false_type {};
+template <typename T>
+struct is_std_string_like<T, void_t<decltype(std::declval<T>().find_first_of(
+                                 typename T::value_type(), 0))>>
+    : std::is_convertible<decltype(std::declval<T>().data()),
+                          const typename T::value_type*> {};
+
+// Check if the literal encoding is UTF-8.
+enum { is_utf8_enabled = "\u00A7"[1] == '\xA7' };
+enum { use_utf8 = !FMT_WIN32 || is_utf8_enabled };
+
+#ifndef FMT_UNICODE
+#  define FMT_UNICODE 1
+#endif
+
+static_assert(!FMT_UNICODE || use_utf8,
+              "Unicode support requires compiling with /utf-8");
+
+template <typename T> constexpr auto narrow(T*) -> char* { return nullptr; }
+constexpr FMT_ALWAYS_INLINE auto narrow(const char* s) -> const char* {
+  return s;
+}
+
+template <typename Char>
+FMT_CONSTEXPR auto compare(const Char* s1, const Char* s2, size_t n) -> int {
+  if (!is_constant_evaluated() && sizeof(Char) == 1) return memcmp(s1, s2, n);
+  for (; n != 0; ++s1, ++s2, --n) {
+    if (*s1 < *s2) return -1;
+    if (*s1 > *s2) return 1;
+  }
+  return 0;
+}
+
+namespace adl {
+using namespace std;
+
+template <typename Container>
+auto invoke_back_inserter()
+    -> decltype(back_inserter(std::declval<Container&>()));
+}  // namespace adl
+
+template <typename It, typename Enable = std::true_type>
+struct is_back_insert_iterator : std::false_type {};
+
+template <typename It>
+struct is_back_insert_iterator<
+    It, bool_constant<std::is_same<
+            decltype(adl::invoke_back_inserter<typename It::container_type>()),
+            It>::value>> : std::true_type {};
+
+// Extracts a reference to the container from *insert_iterator.
+template <typename OutputIt>
+inline FMT_CONSTEXPR20 auto get_container(OutputIt it) ->
+    typename OutputIt::container_type& {
+  struct accessor : OutputIt {
+    FMT_CONSTEXPR20 accessor(OutputIt base) : OutputIt(base) {}
+    using OutputIt::container;
+  };
+  return *accessor(it).container;
+}
+}  // namespace detail
+
+// Parsing-related public API and forward declarations.
+FMT_BEGIN_EXPORT
+
+/**
+ * An implementation of `std::basic_string_view` for pre-C++17. It provides a
+ * subset of the API. `fmt::basic_string_view` is used for format strings even
+ * if `std::basic_string_view` is available to prevent issues when a library is
+ * compiled with a different `-std` option than the client code (which is not
+ * recommended).
+ */
+template <typename Char> class basic_string_view {
+ private:
+  const Char* data_;
+  size_t size_;
+
+ public:
+  using value_type = Char;
+  using iterator = const Char*;
+
+  constexpr basic_string_view() noexcept : data_(nullptr), size_(0) {}
+
+  /// Constructs a string view object from a C string and a size.
+  constexpr basic_string_view(const Char* s, size_t count) noexcept
+      : data_(s), size_(count) {}
+
+  constexpr basic_string_view(nullptr_t) = delete;
+
+  /// Constructs a string view object from a C string.
+#if FMT_GCC_VERSION
+  FMT_ALWAYS_INLINE
+#endif
+  FMT_CONSTEXPR20 basic_string_view(const Char* s) : data_(s) {
+#if FMT_HAS_BUILTIN(__builtin_strlen) || FMT_GCC_VERSION || FMT_CLANG_VERSION
+    if (std::is_same<Char, char>::value && !detail::is_constant_evaluated()) {
+      size_ = __builtin_strlen(detail::narrow(s));  // strlen is not constexpr.
+      return;
+    }
+#endif
+    size_t len = 0;
+    while (*s++) ++len;
+    size_ = len;
+  }
+
+  /// Constructs a string view from a `std::basic_string` or a
+  /// `std::basic_string_view` object.
+  template <typename S,
+            FMT_ENABLE_IF(detail::is_std_string_like<S>::value&& std::is_same<
+                          typename S::value_type, Char>::value)>
+  FMT_CONSTEXPR basic_string_view(const S& s) noexcept
+      : data_(s.data()), size_(s.size()) {}
+
+  /// Returns a pointer to the string data.
+  constexpr auto data() const noexcept -> const Char* { return data_; }
+
+  /// Returns the string size.
+  constexpr auto size() const noexcept -> size_t { return size_; }
+
+  constexpr auto begin() const noexcept -> iterator { return data_; }
+  constexpr auto end() const noexcept -> iterator { return data_ + size_; }
+
+  constexpr auto operator[](size_t pos) const noexcept -> const Char& {
+    return data_[pos];
+  }
+
+  FMT_CONSTEXPR void remove_prefix(size_t n) noexcept {
+    data_ += n;
+    size_ -= n;
+  }
+
+  FMT_CONSTEXPR auto starts_with(basic_string_view<Char> sv) const noexcept
+      -> bool {
+    return size_ >= sv.size_ && detail::compare(data_, sv.data_, sv.size_) == 0;
+  }
+  FMT_CONSTEXPR auto starts_with(Char c) const noexcept -> bool {
+    return size_ >= 1 && *data_ == c;
+  }
+  FMT_CONSTEXPR auto starts_with(const Char* s) const -> bool {
+    return starts_with(basic_string_view<Char>(s));
+  }
+
+  FMT_CONSTEXPR auto compare(basic_string_view other) const -> int {
+    int result =
+        detail::compare(data_, other.data_, min_of(size_, other.size_));
+    if (result != 0) return result;
+    return size_ == other.size_ ? 0 : (size_ < other.size_ ? -1 : 1);
+  }
+
+  FMT_CONSTEXPR friend auto operator==(basic_string_view lhs,
+                                       basic_string_view rhs) -> bool {
+    return lhs.compare(rhs) == 0;
+  }
+  friend auto operator!=(basic_string_view lhs, basic_string_view rhs) -> bool {
+    return lhs.compare(rhs) != 0;
+  }
+  friend auto operator<(basic_string_view lhs, basic_string_view rhs) -> bool {
+    return lhs.compare(rhs) < 0;
+  }
+  friend auto operator<=(basic_string_view lhs, basic_string_view rhs) -> bool {
+    return lhs.compare(rhs) <= 0;
+  }
+  friend auto operator>(basic_string_view lhs, basic_string_view rhs) -> bool {
+    return lhs.compare(rhs) > 0;
+  }
+  friend auto operator>=(basic_string_view lhs, basic_string_view rhs) -> bool {
+    return lhs.compare(rhs) >= 0;
+  }
+};
+
+using string_view = basic_string_view<char>;
+
+template <typename T> class basic_appender;
+using appender = basic_appender<char>;
+
+// Checks whether T is a container with contiguous storage.
+template <typename T> struct is_contiguous : std::false_type {};
+
+class context;
+template <typename OutputIt, typename Char> class generic_context;
+template <typename Char> class parse_context;
+
+// Longer aliases for C++20 compatibility.
+template <typename Char> using basic_format_parse_context = parse_context<Char>;
+using format_parse_context = parse_context<char>;
+template <typename OutputIt, typename Char>
+using basic_format_context =
+    conditional_t<std::is_same<OutputIt, appender>::value, context,
+                  generic_context<OutputIt, Char>>;
+using format_context = context;
+
+template <typename Char>
+using buffered_context =
+    conditional_t<std::is_same<Char, char>::value, context,
+                  generic_context<basic_appender<Char>, Char>>;
+
+template <typename Context> class basic_format_arg;
+template <typename Context> class basic_format_args;
+
+// A separate type would result in shorter symbols but break ABI compatibility
+// between clang and gcc on ARM (#1919).
+using format_args = basic_format_args<context>;
+
+// A formatter for objects of type T.
+template <typename T, typename Char = char, typename Enable = void>
+struct formatter {
+  // A deleted default constructor indicates a disabled formatter.
+  formatter() = delete;
+};
+
+/// Reports a format error at compile time or, via a `format_error` exception,
+/// at runtime.
+// This function is intentionally not constexpr to give a compile-time error.
+FMT_NORETURN FMT_API void report_error(const char* message);
+
+enum class presentation_type : unsigned char {
+  // Common specifiers:
+  none = 0,
+  debug = 1,   // '?'
+  string = 2,  // 's' (string, bool)
+
+  // Integral, bool and character specifiers:
+  dec = 3,  // 'd'
+  hex,      // 'x' or 'X'
+  oct,      // 'o'
+  bin,      // 'b' or 'B'
+  chr,      // 'c'
+
+  // String and pointer specifiers:
+  pointer = 3,  // 'p'
+
+  // Floating-point specifiers:
+  exp = 1,  // 'e' or 'E' (1 since there is no FP debug presentation)
+  fixed,    // 'f' or 'F'
+  general,  // 'g' or 'G'
+  hexfloat  // 'a' or 'A'
+};
+
+enum class align { none, left, right, center, numeric };
+enum class sign { none, minus, plus, space };
+enum class arg_id_kind { none, index, name };
+
+// Basic format specifiers for built-in and string types.
+class basic_specs {
+ private:
+  // Data is arranged as follows:
+  //
+  //  0                   1                   2                   3
+  //  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+  // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+  // |type |align| w | p | s |u|#|L|  f  |          unused           |
+  // +-----+-----+---+---+---+-+-+-+-----+---------------------------+
+  //
+  //   w - dynamic width info
+  //   p - dynamic precision info
+  //   s - sign
+  //   u - uppercase (e.g. 'X' for 'x')
+  //   # - alternate form ('#')
+  //   L - localized
+  //   f - fill size
+  //
+  // Bitfields are not used because of compiler bugs such as gcc bug 61414.
+  enum : unsigned {
+    type_mask = 0x00007,
+    align_mask = 0x00038,
+    width_mask = 0x000C0,
+    precision_mask = 0x00300,
+    sign_mask = 0x00C00,
+    uppercase_mask = 0x01000,
+    alternate_mask = 0x02000,
+    localized_mask = 0x04000,
+    fill_size_mask = 0x38000,
+
+    align_shift = 3,
+    width_shift = 6,
+    precision_shift = 8,
+    sign_shift = 10,
+    fill_size_shift = 15,
+
+    max_fill_size = 4
+  };
+
+  unsigned data_ = 1 << fill_size_shift;
+  static_assert(sizeof(basic_specs::data_) * CHAR_BIT >= 18, "");
+
+  // Character (code unit) type is erased to prevent template bloat.
+  char fill_data_[max_fill_size] = {' '};
+
+  FMT_CONSTEXPR void set_fill_size(size_t size) {
+    data_ = (data_ & ~fill_size_mask) |
+            (static_cast<unsigned>(size) << fill_size_shift);
+  }
+
+ public:
+  constexpr auto type() const -> presentation_type {
+    return static_cast<presentation_type>(data_ & type_mask);
+  }
+  FMT_CONSTEXPR void set_type(presentation_type t) {
+    data_ = (data_ & ~type_mask) | static_cast<unsigned>(t);
+  }
+
+  constexpr auto align() const -> align {
+    return static_cast<fmt::align>((data_ & align_mask) >> align_shift);
+  }
+  FMT_CONSTEXPR void set_align(fmt::align a) {
+    data_ = (data_ & ~align_mask) | (static_cast<unsigned>(a) << align_shift);
+  }
+
+  constexpr auto dynamic_width() const -> arg_id_kind {
+    return static_cast<arg_id_kind>((data_ & width_mask) >> width_shift);
+  }
+  FMT_CONSTEXPR void set_dynamic_width(arg_id_kind w) {
+    data_ = (data_ & ~width_mask) | (static_cast<unsigned>(w) << width_shift);
+  }
+
+  FMT_CONSTEXPR auto dynamic_precision() const -> arg_id_kind {
+    return static_cast<arg_id_kind>((data_ & precision_mask) >>
+                                    precision_shift);
+  }
+  FMT_CONSTEXPR void set_dynamic_precision(arg_id_kind p) {
+    data_ = (data_ & ~precision_mask) |
+            (static_cast<unsigned>(p) << precision_shift);
+  }
+
+  constexpr auto dynamic() const -> bool {
+    return (data_ & (width_mask | precision_mask)) != 0;
+  }
+
+  constexpr auto sign() const -> sign {
+    return static_cast<fmt::sign>((data_ & sign_mask) >> sign_shift);
+  }
+  FMT_CONSTEXPR void set_sign(fmt::sign s) {
+    data_ = (data_ & ~sign_mask) | (static_cast<unsigned>(s) << sign_shift);
+  }
+
+  constexpr auto upper() const -> bool { return (data_ & uppercase_mask) != 0; }
+  FMT_CONSTEXPR void set_upper() { data_ |= uppercase_mask; }
+
+  constexpr auto alt() const -> bool { return (data_ & alternate_mask) != 0; }
+  FMT_CONSTEXPR void set_alt() { data_ |= alternate_mask; }
+  FMT_CONSTEXPR void clear_alt() { data_ &= ~alternate_mask; }
+
+  constexpr auto localized() const -> bool {
+    return (data_ & localized_mask) != 0;
+  }
+  FMT_CONSTEXPR void set_localized() { data_ |= localized_mask; }
+
+  constexpr auto fill_size() const -> size_t {
+    return (data_ & fill_size_mask) >> fill_size_shift;
+  }
+
+  template <typename Char, FMT_ENABLE_IF(std::is_same<Char, char>::value)>
+  constexpr auto fill() const -> const Char* {
+    return fill_data_;
+  }
+  template <typename Char, FMT_ENABLE_IF(!std::is_same<Char, char>::value)>
+  constexpr auto fill() const -> const Char* {
+    return nullptr;
+  }
+
+  template <typename Char> constexpr auto fill_unit() const -> Char {
+    using uchar = unsigned char;
+    return static_cast<Char>(static_cast<uchar>(fill_data_[0]) |
+                             (static_cast<uchar>(fill_data_[1]) << 8) |
+                             (static_cast<uchar>(fill_data_[2]) << 16));
+  }
+
+  FMT_CONSTEXPR void set_fill(char c) {
+    fill_data_[0] = c;
+    set_fill_size(1);
+  }
+
+  template <typename Char>
+  FMT_CONSTEXPR void set_fill(basic_string_view<Char> s) {
+    auto size = s.size();
+    set_fill_size(size);
+    if (size == 1) {
+      unsigned uchar = static_cast<detail::unsigned_char<Char>>(s[0]);
+      fill_data_[0] = static_cast<char>(uchar);
+      fill_data_[1] = static_cast<char>(uchar >> 8);
+      fill_data_[2] = static_cast<char>(uchar >> 16);
+      return;
+    }
+    FMT_ASSERT(size <= max_fill_size, "invalid fill");
+    for (size_t i = 0; i < size; ++i)
+      fill_data_[i & 3] = static_cast<char>(s[i]);
+  }
+
+  FMT_CONSTEXPR void copy_fill_from(const basic_specs& specs) {
+    set_fill_size(specs.fill_size());
+    for (size_t i = 0; i < max_fill_size; ++i)
+      fill_data_[i] = specs.fill_data_[i];
+  }
+};
+
+// Format specifiers for built-in and string types.
+struct format_specs : basic_specs {
+  int width;
+  int precision;
+
+  constexpr format_specs() : width(0), precision(-1) {}
+};
+
+/**
+ * Parsing context consisting of a format string range being parsed and an
+ * argument counter for automatic indexing.
+ */
+template <typename Char = char> class parse_context {
+ private:
+  basic_string_view<Char> fmt_;
+  int next_arg_id_;
+
+  enum { use_constexpr_cast = !FMT_GCC_VERSION || FMT_GCC_VERSION >= 1200 };
+
+  FMT_CONSTEXPR void do_check_arg_id(int arg_id);
+
+ public:
+  using char_type = Char;
+  using iterator = const Char*;
+
+  constexpr explicit parse_context(basic_string_view<Char> fmt,
+                                   int next_arg_id = 0)
+      : fmt_(fmt), next_arg_id_(next_arg_id) {}
+
+  /// Returns an iterator to the beginning of the format string range being
+  /// parsed.
+  constexpr auto begin() const noexcept -> iterator { return fmt_.begin(); }
+
+  /// Returns an iterator past the end of the format string range being parsed.
+  constexpr auto end() const noexcept -> iterator { return fmt_.end(); }
+
+  /// Advances the begin iterator to `it`.
+  FMT_CONSTEXPR void advance_to(iterator it) {
+    fmt_.remove_prefix(detail::to_unsigned(it - begin()));
+  }
+
+  /// Reports an error if using the manual argument indexing; otherwise returns
+  /// the next argument index and switches to the automatic indexing.
+  FMT_CONSTEXPR auto next_arg_id() -> int {
+    if (next_arg_id_ < 0) {
+      report_error("cannot switch from manual to automatic argument indexing");
+      return 0;
+    }
+    int id = next_arg_id_++;
+    do_check_arg_id(id);
+    return id;
+  }
+
+  /// Reports an error if using the automatic argument indexing; otherwise
+  /// switches to the manual indexing.
+  FMT_CONSTEXPR void check_arg_id(int id) {
+    if (next_arg_id_ > 0) {
+      report_error("cannot switch from automatic to manual argument indexing");
+      return;
+    }
+    next_arg_id_ = -1;
+    do_check_arg_id(id);
+  }
+  FMT_CONSTEXPR void check_arg_id(basic_string_view<Char>) {
+    next_arg_id_ = -1;
+  }
+  FMT_CONSTEXPR void check_dynamic_spec(int arg_id);
+};
+
+#ifndef FMT_USE_LOCALE
+#  define FMT_USE_LOCALE (FMT_OPTIMIZE_SIZE <= 1)
+#endif
+
+// A type-erased reference to std::locale to avoid the heavy <locale> include.
+class locale_ref {
+#if FMT_USE_LOCALE
+ private:
+  const void* locale_;  // A type-erased pointer to std::locale.
+
+ public:
+  constexpr locale_ref() : locale_(nullptr) {}
+
+  template <typename Locale, FMT_ENABLE_IF(sizeof(Locale::collate) != 0)>
+  locale_ref(const Locale& loc) : locale_(&loc) {
+    // Check if std::isalpha is found via ADL to reduce the chance of misuse.
+    detail::ignore_unused(isalpha('x', loc));
+  }
+
+  inline explicit operator bool() const noexcept { return locale_ != nullptr; }
+#endif  // FMT_USE_LOCALE
+
+ public:
+  template <typename Locale> auto get() const -> Locale;
+};
+
+FMT_END_EXPORT
+
+namespace detail {
+
+// Specifies if `T` is a code unit type.
+template <typename T> struct is_code_unit : std::false_type {};
+template <> struct is_code_unit<char> : std::true_type {};
+template <> struct is_code_unit<wchar_t> : std::true_type {};
+template <> struct is_code_unit<char16_t> : std::true_type {};
+template <> struct is_code_unit<char32_t> : std::true_type {};
+#ifdef __cpp_char8_t
+template <> struct is_code_unit<char8_t> : bool_constant<is_utf8_enabled> {};
+#endif
+
+// Constructs fmt::basic_string_view<Char> from types implicitly convertible
+// to it, deducing Char. Explicitly convertible types such as the ones returned
+// from FMT_STRING are intentionally excluded.
+template <typename Char, FMT_ENABLE_IF(is_code_unit<Char>::value)>
+constexpr auto to_string_view(const Char* s) -> basic_string_view<Char> {
+  return s;
+}
+template <typename T, FMT_ENABLE_IF(is_std_string_like<T>::value)>
+constexpr auto to_string_view(const T& s)
+    -> basic_string_view<typename T::value_type> {
+  return s;
+}
+template <typename Char>
+constexpr auto to_string_view(basic_string_view<Char> s)
+    -> basic_string_view<Char> {
+  return s;
+}
+
+template <typename T, typename Enable = void>
+struct has_to_string_view : std::false_type {};
+// detail:: is intentional since to_string_view is not an extension point.
+template <typename T>
+struct has_to_string_view<
+    T, void_t<decltype(detail::to_string_view(std::declval<T>()))>>
+    : std::true_type {};
+
+/// String's character (code unit) type. detail:: is intentional to prevent ADL.
+template <typename S,
+          typename V = decltype(detail::to_string_view(std::declval<S>()))>
+using char_t = typename V::value_type;
+
+enum class type {
+  none_type,
+  // Integer types should go first,
+  int_type,
+  uint_type,
+  long_long_type,
+  ulong_long_type,
+  int128_type,
+  uint128_type,
+  bool_type,
+  char_type,
+  last_integer_type = char_type,
+  // followed by floating-point types.
+  float_type,
+  double_type,
+  long_double_type,
+  last_numeric_type = long_double_type,
+  cstring_type,
+  string_type,
+  pointer_type,
+  custom_type
+};
+
+// Maps core type T to the corresponding type enum constant.
+template <typename T, typename Char>
+struct type_constant : std::integral_constant<type, type::custom_type> {};
+
+#define FMT_TYPE_CONSTANT(Type, constant) \
+  template <typename Char>                \
+  struct type_constant<Type, Char>        \
+      : std::integral_constant<type, type::constant> {}
+
+FMT_TYPE_CONSTANT(int, int_type);
+FMT_TYPE_CONSTANT(unsigned, uint_type);
+FMT_TYPE_CONSTANT(long long, long_long_type);
+FMT_TYPE_CONSTANT(unsigned long long, ulong_long_type);
+FMT_TYPE_CONSTANT(int128_opt, int128_type);
+FMT_TYPE_CONSTANT(uint128_opt, uint128_type);
+FMT_TYPE_CONSTANT(bool, bool_type);
+FMT_TYPE_CONSTANT(Char, char_type);
+FMT_TYPE_CONSTANT(float, float_type);
+FMT_TYPE_CONSTANT(double, double_type);
+FMT_TYPE_CONSTANT(long double, long_double_type);
+FMT_TYPE_CONSTANT(const Char*, cstring_type);
+FMT_TYPE_CONSTANT(basic_string_view<Char>, string_type);
+FMT_TYPE_CONSTANT(const void*, pointer_type);
+
+constexpr auto is_integral_type(type t) -> bool {
+  return t > type::none_type && t <= type::last_integer_type;
+}
+constexpr auto is_arithmetic_type(type t) -> bool {
+  return t > type::none_type && t <= type::last_numeric_type;
+}
+
+constexpr auto set(type rhs) -> int { return 1 << static_cast<int>(rhs); }
+constexpr auto in(type t, int set) -> bool {
+  return ((set >> static_cast<int>(t)) & 1) != 0;
+}
+
+// Bitsets of types.
+enum {
+  sint_set =
+      set(type::int_type) | set(type::long_long_type) | set(type::int128_type),
+  uint_set = set(type::uint_type) | set(type::ulong_long_type) |
+             set(type::uint128_type),
+  bool_set = set(type::bool_type),
+  char_set = set(type::char_type),
+  float_set = set(type::float_type) | set(type::double_type) |
+              set(type::long_double_type),
+  string_set = set(type::string_type),
+  cstring_set = set(type::cstring_type),
+  pointer_set = set(type::pointer_type)
+};
+
+struct view {};
+
+template <typename T, typename Enable = std::true_type>
+struct is_view : std::false_type {};
+template <typename T>
+struct is_view<T, bool_constant<sizeof(T) != 0>> : std::is_base_of<view, T> {};
+
+template <typename Char, typename T> struct named_arg;
+template <typename T> struct is_named_arg : std::false_type {};
+template <typename T> struct is_static_named_arg : std::false_type {};
+
+template <typename Char, typename T>
+struct is_named_arg<named_arg<Char, T>> : std::true_type {};
+
+template <typename Char, typename T> struct named_arg : view {
+  const Char* name;
+  const T& value;
+
+  named_arg(const Char* n, const T& v) : name(n), value(v) {}
+  static_assert(!is_named_arg<T>::value, "nested named arguments");
+};
+
+template <bool B = false> constexpr auto count() -> int { return B ? 1 : 0; }
+template <bool B1, bool B2, bool... Tail> constexpr auto count() -> int {
+  return (B1 ? 1 : 0) + count<B2, Tail...>();
+}
+
+template <typename... T> constexpr auto count_named_args() -> int {
+  return count<is_named_arg<T>::value...>();
+}
+template <typename... T> constexpr auto count_static_named_args() -> int {
+  return count<is_static_named_arg<T>::value...>();
+}
+
+template <typename Char> struct named_arg_info {
+  const Char* name;
+  int id;
+};
+
+// named_args is non-const to suppress a bogus -Wmaybe-uninitialized in gcc 13.
+template <typename Char>
+FMT_CONSTEXPR void check_for_duplicate(named_arg_info<Char>* named_args,
+                                       int named_arg_index,
+                                       basic_string_view<Char> arg_name) {
+  for (int i = 0; i < named_arg_index; ++i) {
+    if (named_args[i].name == arg_name) report_error("duplicate named arg");
+  }
+}
+
+template <typename Char, typename T, FMT_ENABLE_IF(!is_named_arg<T>::value)>
+void init_named_arg(named_arg_info<Char>*, int& arg_index, int&, const T&) {
+  ++arg_index;
+}
+template <typename Char, typename T, FMT_ENABLE_IF(is_named_arg<T>::value)>
+void init_named_arg(named_arg_info<Char>* named_args, int& arg_index,
+                    int& named_arg_index, const T& arg) {
+  check_for_duplicate<Char>(named_args, named_arg_index, arg.name);
+  named_args[named_arg_index++] = {arg.name, arg_index++};
+}
+
+template <typename T, typename Char,
+          FMT_ENABLE_IF(!is_static_named_arg<T>::value)>
+FMT_CONSTEXPR void init_static_named_arg(named_arg_info<Char>*, int& arg_index,
+                                         int&) {
+  ++arg_index;
+}
+template <typename T, typename Char,
+          FMT_ENABLE_IF(is_static_named_arg<T>::value)>
+FMT_CONSTEXPR void init_static_named_arg(named_arg_info<Char>* named_args,
+                                         int& arg_index, int& named_arg_index) {
+  check_for_duplicate<Char>(named_args, named_arg_index, T::name);
+  named_args[named_arg_index++] = {T::name, arg_index++};
+}
+
+// To minimize the number of types we need to deal with, long is translated
+// either to int or to long long depending on its size.
+enum { long_short = sizeof(long) == sizeof(int) && FMT_BUILTIN_TYPES };
+using long_type = conditional_t<long_short, int, long long>;
+using ulong_type = conditional_t<long_short, unsigned, unsigned long long>;
+
+template <typename T>
+using format_as_result =
+    remove_cvref_t<decltype(format_as(std::declval<const T&>()))>;
+template <typename T>
+using format_as_member_result =
+    remove_cvref_t<decltype(formatter<T>::format_as(std::declval<const T&>()))>;
+
+template <typename T, typename Enable = std::true_type>
+struct use_format_as : std::false_type {};
+// format_as member is only used to avoid injection into the std namespace.
+template <typename T, typename Enable = std::true_type>
+struct use_format_as_member : std::false_type {};
+
+// Only map owning types because mapping views can be unsafe.
+template <typename T>
+struct use_format_as<
+    T, bool_constant<std::is_arithmetic<format_as_result<T>>::value>>
+    : std::true_type {};
+template <typename T>
+struct use_format_as_member<
+    T, bool_constant<std::is_arithmetic<format_as_member_result<T>>::value>>
+    : std::true_type {};
+
+template <typename T, typename U = remove_const_t<T>>
+using use_formatter =
+    bool_constant<(std::is_class<T>::value || std::is_enum<T>::value ||
+                   std::is_union<T>::value || std::is_array<T>::value) &&
+                  !has_to_string_view<T>::value && !is_named_arg<T>::value &&
+                  !use_format_as<T>::value && !use_format_as_member<U>::value>;
+
+template <typename Char, typename T, typename U = remove_const_t<T>>
+auto has_formatter_impl(T* p, buffered_context<Char>* ctx = nullptr)
+    -> decltype(formatter<U, Char>().format(*p, *ctx), std::true_type());
+template <typename Char> auto has_formatter_impl(...) -> std::false_type;
+
+// T can be const-qualified to check if it is const-formattable.
+template <typename T, typename Char> constexpr auto has_formatter() -> bool {
+  return decltype(has_formatter_impl<Char>(static_cast<T*>(nullptr)))::value;
+}
+
+// Maps formatting argument types to natively supported types or user-defined
+// types with formatters. Returns void on errors to be SFINAE-friendly.
+template <typename Char> struct type_mapper {
+  static auto map(signed char) -> int;
+  static auto map(unsigned char) -> unsigned;
+  static auto map(short) -> int;
+  static auto map(unsigned short) -> unsigned;
+  static auto map(int) -> int;
+  static auto map(unsigned) -> unsigned;
+  static auto map(long) -> long_type;
+  static auto map(unsigned long) -> ulong_type;
+  static auto map(long long) -> long long;
+  static auto map(unsigned long long) -> unsigned long long;
+  static auto map(int128_opt) -> int128_opt;
+  static auto map(uint128_opt) -> uint128_opt;
+  static auto map(bool) -> bool;
+
+  template <int N>
+  static auto map(bitint<N>) -> conditional_t<N <= 64, long long, void>;
+  template <int N>
+  static auto map(ubitint<N>)
+      -> conditional_t<N <= 64, unsigned long long, void>;
+
+  template <typename T, FMT_ENABLE_IF(is_code_unit<T>::value)>
+  static auto map(T) -> conditional_t<
+      std::is_same<T, char>::value || std::is_same<T, Char>::value, Char, void>;
+
+  static auto map(float) -> float;
+  static auto map(double) -> double;
+  static auto map(long double) -> long double;
+
+  static auto map(Char*) -> const Char*;
+  static auto map(const Char*) -> const Char*;
+  template <typename T, typename C = char_t<T>,
+            FMT_ENABLE_IF(!std::is_pointer<T>::value)>
+  static auto map(const T&) -> conditional_t<std::is_same<C, Char>::value,
+                                             basic_string_view<C>, void>;
+
+  static auto map(void*) -> const void*;
+  static auto map(const void*) -> const void*;
+  static auto map(volatile void*) -> const void*;
+  static auto map(const volatile void*) -> const void*;
+  static auto map(nullptr_t) -> const void*;
+  template <typename T, FMT_ENABLE_IF(std::is_pointer<T>::value ||
+                                      std::is_member_pointer<T>::value)>
+  static auto map(const T&) -> void;
+
+  template <typename T, FMT_ENABLE_IF(use_format_as<T>::value)>
+  static auto map(const T& x) -> decltype(map(format_as(x)));
+  template <typename T, FMT_ENABLE_IF(use_format_as_member<T>::value)>
+  static auto map(const T& x) -> decltype(map(formatter<T>::format_as(x)));
+
+  template <typename T, FMT_ENABLE_IF(use_formatter<T>::value)>
+  static auto map(T&) -> conditional_t<has_formatter<T, Char>(), T&, void>;
+
+  template <typename T, FMT_ENABLE_IF(is_named_arg<T>::value)>
+  static auto map(const T& named_arg) -> decltype(map(named_arg.value));
+};
+
+// detail:: is used to workaround a bug in MSVC 2017.
+template <typename T, typename Char>
+using mapped_t = decltype(detail::type_mapper<Char>::map(std::declval<T&>()));
+
+// A type constant after applying type_mapper.
+template <typename T, typename Char = char>
+using mapped_type_constant = type_constant<mapped_t<T, Char>, Char>;
+
+template <typename T, typename Context,
+          type TYPE =
+              mapped_type_constant<T, typename Context::char_type>::value>
+using stored_type_constant = std::integral_constant<
+    type, Context::builtin_types || TYPE == type::int_type ? TYPE
+                                                           : type::custom_type>;
+// A parse context with extra data used only in compile-time checks.
+template <typename Char>
+class compile_parse_context : public parse_context<Char> {
+ private:
+  int num_args_;
+  const type* types_;
+  using base = parse_context<Char>;
+
+ public:
+  FMT_CONSTEXPR explicit compile_parse_context(basic_string_view<Char> fmt,
+                                               int num_args, const type* types,
+                                               int next_arg_id = 0)
+      : base(fmt, next_arg_id), num_args_(num_args), types_(types) {}
+
+  constexpr auto num_args() const -> int { return num_args_; }
+  constexpr auto arg_type(int id) const -> type { return types_[id]; }
+
+  FMT_CONSTEXPR auto next_arg_id() -> int {
+    int id = base::next_arg_id();
+    if (id >= num_args_) report_error("argument not found");
+    return id;
+  }
+
+  FMT_CONSTEXPR void check_arg_id(int id) {
+    base::check_arg_id(id);
+    if (id >= num_args_) report_error("argument not found");
+  }
+  using base::check_arg_id;
+
+  FMT_CONSTEXPR void check_dynamic_spec(int arg_id) {
+    ignore_unused(arg_id);
+    if (arg_id < num_args_ && types_ && !is_integral_type(types_[arg_id]))
+      report_error("width/precision is not integer");
+  }
+};
+
+// An argument reference.
+template <typename Char> union arg_ref {
+  FMT_CONSTEXPR arg_ref(int idx = 0) : index(idx) {}
+  FMT_CONSTEXPR arg_ref(basic_string_view<Char> n) : name(n) {}
+
+  int index;
+  basic_string_view<Char> name;
+};
+
+// Format specifiers with width and precision resolved at formatting rather
+// than parsing time to allow reusing the same parsed specifiers with
+// different sets of arguments (precompilation of format strings).
+template <typename Char = char> struct dynamic_format_specs : format_specs {
+  arg_ref<Char> width_ref;
+  arg_ref<Char> precision_ref;
+};
+
+// Converts a character to ASCII. Returns '\0' on conversion failure.
+template <typename Char, FMT_ENABLE_IF(std::is_integral<Char>::value)>
+constexpr auto to_ascii(Char c) -> char {
+  return c <= 0xff ? static_cast<char>(c) : '\0';
+}
+
+// Returns the number of code units in a code point or 1 on error.
+template <typename Char>
+FMT_CONSTEXPR auto code_point_length(const Char* begin) -> int {
+  if (const_check(sizeof(Char) != 1)) return 1;
+  auto c = static_cast<unsigned char>(*begin);
+  return static_cast<int>((0x3a55000000000000ull >> (2 * (c >> 3))) & 3) + 1;
+}
+
+// Parses the range [begin, end) as an unsigned integer. This function assumes
+// that the range is non-empty and the first character is a digit.
+template <typename Char>
+FMT_CONSTEXPR auto parse_nonnegative_int(const Char*& begin, const Char* end,
+                                         int error_value) noexcept -> int {
+  FMT_ASSERT(begin != end && '0' <= *begin && *begin <= '9', "");
+  unsigned value = 0, prev = 0;
+  auto p = begin;
+  do {
+    prev = value;
+    value = value * 10 + unsigned(*p - '0');
+    ++p;
+  } while (p != end && '0' <= *p && *p <= '9');
+  auto num_digits = p - begin;
+  begin = p;
+  int digits10 = static_cast<int>(sizeof(int) * CHAR_BIT * 3 / 10);
+  if (num_digits <= digits10) return static_cast<int>(value);
+  // Check for overflow.
+  unsigned max = INT_MAX;
+  return num_digits == digits10 + 1 &&
+                 prev * 10ull + unsigned(p[-1] - '0') <= max
+             ? static_cast<int>(value)
+             : error_value;
+}
+
+FMT_CONSTEXPR inline auto parse_align(char c) -> align {
+  switch (c) {
+  case '<': return align::left;
+  case '>': return align::right;
+  case '^': return align::center;
+  }
+  return align::none;
+}
+
+template <typename Char> constexpr auto is_name_start(Char c) -> bool {
+  return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '_';
+}
+
+template <typename Char, typename Handler>
+FMT_CONSTEXPR auto parse_arg_id(const Char* begin, const Char* end,
+                                Handler&& handler) -> const Char* {
+  Char c = *begin;
+  if (c >= '0' && c <= '9') {
+    int index = 0;
+    if (c != '0')
+      index = parse_nonnegative_int(begin, end, INT_MAX);
+    else
+      ++begin;
+    if (begin == end || (*begin != '}' && *begin != ':'))
+      report_error("invalid format string");
+    else
+      handler.on_index(index);
+    return begin;
+  }
+  if (FMT_OPTIMIZE_SIZE > 1 || !is_name_start(c)) {
+    report_error("invalid format string");
+    return begin;
+  }
+  auto it = begin;
+  do {
+    ++it;
+  } while (it != end && (is_name_start(*it) || ('0' <= *it && *it <= '9')));
+  handler.on_name({begin, to_unsigned(it - begin)});
+  return it;
+}
+
+template <typename Char> struct dynamic_spec_handler {
+  parse_context<Char>& ctx;
+  arg_ref<Char>& ref;
+  arg_id_kind& kind;
+
+  FMT_CONSTEXPR void on_index(int id) {
+    ref = id;
+    kind = arg_id_kind::index;
+    ctx.check_arg_id(id);
+    ctx.check_dynamic_spec(id);
+  }
+  FMT_CONSTEXPR void on_name(basic_string_view<Char> id) {
+    ref = id;
+    kind = arg_id_kind::name;
+    ctx.check_arg_id(id);
+  }
+};
+
+template <typename Char> struct parse_dynamic_spec_result {
+  const Char* end;
+  arg_id_kind kind;
+};
+
+// Parses integer | "{" [arg_id] "}".
+template <typename Char>
+FMT_CONSTEXPR auto parse_dynamic_spec(const Char* begin, const Char* end,
+                                      int& value, arg_ref<Char>& ref,
+                                      parse_context<Char>& ctx)
+    -> parse_dynamic_spec_result<Char> {
+  FMT_ASSERT(begin != end, "");
+  auto kind = arg_id_kind::none;
+  if ('0' <= *begin && *begin <= '9') {
+    int val = parse_nonnegative_int(begin, end, -1);
+    if (val == -1) report_error("number is too big");
+    value = val;
+  } else {
+    if (*begin == '{') {
+      ++begin;
+      if (begin != end) {
+        Char c = *begin;
+        if (c == '}' || c == ':') {
+          int id = ctx.next_arg_id();
+          ref = id;
+          kind = arg_id_kind::index;
+          ctx.check_dynamic_spec(id);
+        } else {
+          begin = parse_arg_id(begin, end,
+                               dynamic_spec_handler<Char>{ctx, ref, kind});
+        }
+      }
+      if (begin != end && *begin == '}') return {++begin, kind};
+    }
+    report_error("invalid format string");
+  }
+  return {begin, kind};
+}
+
+template <typename Char>
+FMT_CONSTEXPR auto parse_width(const Char* begin, const Char* end,
+                               format_specs& specs, arg_ref<Char>& width_ref,
+                               parse_context<Char>& ctx) -> const Char* {
+  auto result = parse_dynamic_spec(begin, end, specs.width, width_ref, ctx);
+  specs.set_dynamic_width(result.kind);
+  return result.end;
+}
+
+template <typename Char>
+FMT_CONSTEXPR auto parse_precision(const Char* begin, const Char* end,
+                                   format_specs& specs,
+                                   arg_ref<Char>& precision_ref,
+                                   parse_context<Char>& ctx) -> const Char* {
+  ++begin;
+  if (begin == end) {
+    report_error("invalid precision");
+    return begin;
+  }
+  auto result =
+      parse_dynamic_spec(begin, end, specs.precision, precision_ref, ctx);
+  specs.set_dynamic_precision(result.kind);
+  return result.end;
+}
+
+enum class state { start, align, sign, hash, zero, width, precision, locale };
+
+// Parses standard format specifiers.
+template <typename Char>
+FMT_CONSTEXPR auto parse_format_specs(const Char* begin, const Char* end,
+                                      dynamic_format_specs<Char>& specs,
+                                      parse_context<Char>& ctx, type arg_type)
+    -> const Char* {
+  auto c = '\0';
+  if (end - begin > 1) {
+    auto next = to_ascii(begin[1]);
+    c = parse_align(next) == align::none ? to_ascii(*begin) : '\0';
+  } else {
+    if (begin == end) return begin;
+    c = to_ascii(*begin);
+  }
+
+  struct {
+    state current_state = state::start;
+    FMT_CONSTEXPR void operator()(state s, bool valid = true) {
+      if (current_state >= s || !valid)
+        report_error("invalid format specifier");
+      current_state = s;
+    }
+  } enter_state;
+
+  using pres = presentation_type;
+  constexpr auto integral_set = sint_set | uint_set | bool_set | char_set;
+  struct {
+    const Char*& begin;
+    format_specs& specs;
+    type arg_type;
+
+    FMT_CONSTEXPR auto operator()(pres pres_type, int set) -> const Char* {
+      if (!in(arg_type, set)) report_error("invalid format specifier");
+      specs.set_type(pres_type);
+      return begin + 1;
+    }
+  } parse_presentation_type{begin, specs, arg_type};
+
+  for (;;) {
+    switch (c) {
+    case '<':
+    case '>':
+    case '^':
+      enter_state(state::align);
+      specs.set_align(parse_align(c));
+      ++begin;
+      break;
+    case '+':
+    case ' ':
+      specs.set_sign(c == ' ' ? sign::space : sign::plus);
+      FMT_FALLTHROUGH;
+    case '-':
+      enter_state(state::sign, in(arg_type, sint_set | float_set));
+      ++begin;
+      break;
+    case '#':
+      enter_state(state::hash, is_arithmetic_type(arg_type));
+      specs.set_alt();
+      ++begin;
+      break;
+    case '0':
+      enter_state(state::zero);
+      if (!is_arithmetic_type(arg_type))
+        report_error("format specifier requires numeric argument");
+      if (specs.align() == align::none) {
+        // Ignore 0 if align is specified for compatibility with std::format.
+        specs.set_align(align::numeric);
+        specs.set_fill('0');
+      }
+      ++begin;
+      break;
+      // clang-format off
+    case '1': case '2': case '3': case '4': case '5':
+    case '6': case '7': case '8': case '9': case '{':
+      // clang-format on
+      enter_state(state::width);
+      begin = parse_width(begin, end, specs, specs.width_ref, ctx);
+      break;
+    case '.':
+      enter_state(state::precision,
+                  in(arg_type, float_set | string_set | cstring_set));
+      begin = parse_precision(begin, end, specs, specs.precision_ref, ctx);
+      break;
+    case 'L':
+      enter_state(state::locale, is_arithmetic_type(arg_type));
+      specs.set_localized();
+      ++begin;
+      break;
+    case 'd': return parse_presentation_type(pres::dec, integral_set);
+    case 'X': specs.set_upper(); FMT_FALLTHROUGH;
+    case 'x': return parse_presentation_type(pres::hex, integral_set);
+    case 'o': return parse_presentation_type(pres::oct, integral_set);
+    case 'B': specs.set_upper(); FMT_FALLTHROUGH;
+    case 'b': return parse_presentation_type(pres::bin, integral_set);
+    case 'E': specs.set_upper(); FMT_FALLTHROUGH;
+    case 'e': return parse_presentation_type(pres::exp, float_set);
+    case 'F': specs.set_upper(); FMT_FALLTHROUGH;
+    case 'f': return parse_presentation_type(pres::fixed, float_set);
+    case 'G': specs.set_upper(); FMT_FALLTHROUGH;
+    case 'g': return parse_presentation_type(pres::general, float_set);
+    case 'A': specs.set_upper(); FMT_FALLTHROUGH;
+    case 'a': return parse_presentation_type(pres::hexfloat, float_set);
+    case 'c':
+      if (arg_type == type::bool_type) report_error("invalid format specifier");
+      return parse_presentation_type(pres::chr, integral_set);
+    case 's':
+      return parse_presentation_type(pres::string,
+                                     bool_set | string_set | cstring_set);
+    case 'p':
+      return parse_presentation_type(pres::pointer, pointer_set | cstring_set);
+    case '?':
+      return parse_presentation_type(pres::debug,
+                                     char_set | string_set | cstring_set);
+    case '}': return begin;
+    default:  {
+      if (*begin == '}') return begin;
+      // Parse fill and alignment.
+      auto fill_end = begin + code_point_length(begin);
+      if (end - fill_end <= 0) {
+        report_error("invalid format specifier");
+        return begin;
+      }
+      if (*begin == '{') {
+        report_error("invalid fill character '{'");
+        return begin;
+      }
+      auto alignment = parse_align(to_ascii(*fill_end));
+      enter_state(state::align, alignment != align::none);
+      specs.set_fill(
+          basic_string_view<Char>(begin, to_unsigned(fill_end - begin)));
+      specs.set_align(alignment);
+      begin = fill_end + 1;
+    }
+    }
+    if (begin == end) return begin;
+    c = to_ascii(*begin);
+  }
+}
+
+template <typename Char, typename Handler>
+FMT_CONSTEXPR FMT_INLINE auto parse_replacement_field(const Char* begin,
+                                                      const Char* end,
+                                                      Handler&& handler)
+    -> const Char* {
+  ++begin;
+  if (begin == end) {
+    handler.on_error("invalid format string");
+    return end;
+  }
+  int arg_id = 0;
+  switch (*begin) {
+  case '}':
+    handler.on_replacement_field(handler.on_arg_id(), begin);
+    return begin + 1;
+  case '{': handler.on_text(begin, begin + 1); return begin + 1;
+  case ':': arg_id = handler.on_arg_id(); break;
+  default:  {
+    struct id_adapter {
+      Handler& handler;
+      int arg_id;
+
+      FMT_CONSTEXPR void on_index(int id) { arg_id = handler.on_arg_id(id); }
+      FMT_CONSTEXPR void on_name(basic_string_view<Char> id) {
+        arg_id = handler.on_arg_id(id);
+      }
+    } adapter = {handler, 0};
+    begin = parse_arg_id(begin, end, adapter);
+    arg_id = adapter.arg_id;
+    Char c = begin != end ? *begin : Char();
+    if (c == '}') {
+      handler.on_replacement_field(arg_id, begin);
+      return begin + 1;
+    }
+    if (c != ':') {
+      handler.on_error("missing '}' in format string");
+      return end;
+    }
+    break;
+  }
+  }
+  begin = handler.on_format_specs(arg_id, begin + 1, end);
+  if (begin == end || *begin != '}')
+    return handler.on_error("unknown format specifier"), end;
+  return begin + 1;
+}
+
+template <typename Char, typename Handler>
+FMT_CONSTEXPR void parse_format_string(basic_string_view<Char> fmt,
+                                       Handler&& handler) {
+  auto begin = fmt.data(), end = begin + fmt.size();
+  auto p = begin;
+  while (p != end) {
+    auto c = *p++;
+    if (c == '{') {
+      handler.on_text(begin, p - 1);
+      begin = p = parse_replacement_field(p - 1, end, handler);
+    } else if (c == '}') {
+      if (p == end || *p != '}')
+        return handler.on_error("unmatched '}' in format string");
+      handler.on_text(begin, p);
+      begin = ++p;
+    }
+  }
+  handler.on_text(begin, end);
+}
+
+// Checks char specs and returns true iff the presentation type is char-like.
+FMT_CONSTEXPR inline auto check_char_specs(const format_specs& specs) -> bool {
+  auto type = specs.type();
+  if (type != presentation_type::none && type != presentation_type::chr &&
+      type != presentation_type::debug) {
+    return false;
+  }
+  if (specs.align() == align::numeric || specs.sign() != sign::none ||
+      specs.alt()) {
+    report_error("invalid format specifier for char");
+  }
+  return true;
+}
+
+// A base class for compile-time strings.
+struct compile_string {};
+
+template <typename T, typename Char>
+FMT_VISIBILITY("hidden")  // Suppress an ld warning on macOS (#3769).
+FMT_CONSTEXPR auto invoke_parse(parse_context<Char>& ctx) -> const Char* {
+  using mapped_type = remove_cvref_t<mapped_t<T, Char>>;
+  constexpr bool formattable =
+      std::is_constructible<formatter<mapped_type, Char>>::value;
+  if (!formattable) return ctx.begin();  // Error is reported in the value ctor.
+  using formatted_type = conditional_t<formattable, mapped_type, int>;
+  return formatter<formatted_type, Char>().parse(ctx);
+}
+
+template <typename... T> struct arg_pack {};
+
+template <typename Char, int NUM_ARGS, int NUM_NAMED_ARGS, bool DYNAMIC_NAMES>
+class format_string_checker {
+ private:
+  type types_[max_of<size_t>(1, NUM_ARGS)];
+  named_arg_info<Char> named_args_[max_of<size_t>(1, NUM_NAMED_ARGS)];
+  compile_parse_context<Char> context_;
+
+  using parse_func = auto (*)(parse_context<Char>&) -> const Char*;
+  parse_func parse_funcs_[max_of<size_t>(1, NUM_ARGS)];
+
+ public:
+  template <typename... T>
+  FMT_CONSTEXPR explicit format_string_checker(basic_string_view<Char> fmt,
+                                               arg_pack<T...>)
+      : types_{mapped_type_constant<T, Char>::value...},
+        named_args_{},
+        context_(fmt, NUM_ARGS, types_),
+        parse_funcs_{&invoke_parse<T, Char>...} {
+    int arg_index = 0, named_arg_index = 0;
+    FMT_APPLY_VARIADIC(
+        init_static_named_arg<T>(named_args_, arg_index, named_arg_index));
+    ignore_unused(arg_index, named_arg_index);
+  }
+
+  FMT_CONSTEXPR void on_text(const Char*, const Char*) {}
+
+  FMT_CONSTEXPR auto on_arg_id() -> int { return context_.next_arg_id(); }
+  FMT_CONSTEXPR auto on_arg_id(int id) -> int {
+    context_.check_arg_id(id);
+    return id;
+  }
+  FMT_CONSTEXPR auto on_arg_id(basic_string_view<Char> id) -> int {
+    for (int i = 0; i < NUM_NAMED_ARGS; ++i) {
+      if (named_args_[i].name == id) return named_args_[i].id;
+    }
+    if (!DYNAMIC_NAMES) on_error("argument not found");
+    return -1;
+  }
+
+  FMT_CONSTEXPR void on_replacement_field(int id, const Char* begin) {
+    on_format_specs(id, begin, begin);  // Call parse() on empty specs.
+  }
+
+  FMT_CONSTEXPR auto on_format_specs(int id, const Char* begin, const Char* end)
+      -> const Char* {
+    context_.advance_to(begin);
+    if (id >= 0 && id < NUM_ARGS) return parse_funcs_[id](context_);
+
+    // If id is out of range, it means we do not know the type and cannot parse
+    // the format at compile time. Instead, skip over content until we finish
+    // the format spec, accounting for any nested replacements.
+    for (int bracket_count = 0;
+         begin != end && (bracket_count > 0 || *begin != '}'); ++begin) {
+      if (*begin == '{')
+        ++bracket_count;
+      else if (*begin == '}')
+        --bracket_count;
+    }
+    return begin;
+  }
+
+  FMT_NORETURN FMT_CONSTEXPR void on_error(const char* message) {
+    report_error(message);
+  }
+};
+
+/// A contiguous memory buffer with an optional growing ability. It is an
+/// internal class and shouldn't be used directly, only via `memory_buffer`.
+template <typename T> class buffer {
+ private:
+  T* ptr_;
+  size_t size_;
+  size_t capacity_;
+
+  using grow_fun = void (*)(buffer& buf, size_t capacity);
+  grow_fun grow_;
+
+ protected:
+  // Don't initialize ptr_ since it is not accessed to save a few cycles.
+  FMT_MSC_WARNING(suppress : 26495)
+  FMT_CONSTEXPR buffer(grow_fun grow, size_t sz) noexcept
+      : size_(sz), capacity_(sz), grow_(grow) {}
+
+  constexpr buffer(grow_fun grow, T* p = nullptr, size_t sz = 0,
+                   size_t cap = 0) noexcept
+      : ptr_(p), size_(sz), capacity_(cap), grow_(grow) {}
+
+  FMT_CONSTEXPR20 ~buffer() = default;
+  buffer(buffer&&) = default;
+
+  /// Sets the buffer data and capacity.
+  FMT_CONSTEXPR void set(T* buf_data, size_t buf_capacity) noexcept {
+    ptr_ = buf_data;
+    capacity_ = buf_capacity;
+  }
+
+ public:
+  using value_type = T;
+  using const_reference = const T&;
+
+  buffer(const buffer&) = delete;
+  void operator=(const buffer&) = delete;
+
+  auto begin() noexcept -> T* { return ptr_; }
+  auto end() noexcept -> T* { return ptr_ + size_; }
+
+  auto begin() const noexcept -> const T* { return ptr_; }
+  auto end() const noexcept -> const T* { return ptr_ + size_; }
+
+  /// Returns the size of this buffer.
+  constexpr auto size() const noexcept -> size_t { return size_; }
+
+  /// Returns the capacity of this buffer.
+  constexpr auto capacity() const noexcept -> size_t { return capacity_; }
+
+  /// Returns a pointer to the buffer data (not null-terminated).
+  FMT_CONSTEXPR auto data() noexcept -> T* { return ptr_; }
+  FMT_CONSTEXPR auto data() const noexcept -> const T* { return ptr_; }
+
+  /// Clears this buffer.
+  FMT_CONSTEXPR void clear() { size_ = 0; }
+
+  // Tries resizing the buffer to contain `count` elements. If T is a POD type
+  // the new elements may not be initialized.
+  FMT_CONSTEXPR void try_resize(size_t count) {
+    try_reserve(count);
+    size_ = min_of(count, capacity_);
+  }
+
+  // Tries increasing the buffer capacity to `new_capacity`. It can increase the
+  // capacity by a smaller amount than requested but guarantees there is space
+  // for at least one additional element either by increasing the capacity or by
+  // flushing the buffer if it is full.
+  FMT_CONSTEXPR void try_reserve(size_t new_capacity) {
+    if (new_capacity > capacity_) grow_(*this, new_capacity);
+  }
+
+  FMT_CONSTEXPR void push_back(const T& value) {
+    try_reserve(size_ + 1);
+    ptr_[size_++] = value;
+  }
+
+  /// Appends data to the end of the buffer.
+  template <typename U>
+// Workaround for MSVC2019 to fix error C2893: Failed to specialize function
+// template 'void fmt::v11::detail::buffer<T>::append(const U *,const U *)'.
+#if !FMT_MSC_VERSION || FMT_MSC_VERSION >= 1940
+  FMT_CONSTEXPR20
+#endif
+      void
+      append(const U* begin, const U* end) {
+    while (begin != end) {
+      auto size = size_;
+      auto free_cap = capacity_ - size;
+      auto count = to_unsigned(end - begin);
+      if (free_cap < count) {
+        grow_(*this, size + count);
+        size = size_;
+        free_cap = capacity_ - size;
+        count = count < free_cap ? count : free_cap;
+      }
+      // A loop is faster than memcpy on small sizes.
+      T* out = ptr_ + size;
+      for (size_t i = 0; i < count; ++i) out[i] = begin[i];
+      size_ += count;
+      begin += count;
+    }
+  }
+
+  template <typename Idx> FMT_CONSTEXPR auto operator[](Idx index) -> T& {
+    return ptr_[index];
+  }
+  template <typename Idx>
+  FMT_CONSTEXPR auto operator[](Idx index) const -> const T& {
+    return ptr_[index];
+  }
+};
+
+struct buffer_traits {
+  constexpr explicit buffer_traits(size_t) {}
+  constexpr auto count() const -> size_t { return 0; }
+  constexpr auto limit(size_t size) const -> size_t { return size; }
+};
+
+class fixed_buffer_traits {
+ private:
+  size_t count_ = 0;
+  size_t limit_;
+
+ public:
+  constexpr explicit fixed_buffer_traits(size_t limit) : limit_(limit) {}
+  constexpr auto count() const -> size_t { return count_; }
+  FMT_CONSTEXPR auto limit(size_t size) -> size_t {
+    size_t n = limit_ > count_ ? limit_ - count_ : 0;
+    count_ += size;
+    return min_of(size, n);
+  }
+};
+
+// A buffer that writes to an output iterator when flushed.
+template <typename OutputIt, typename T, typename Traits = buffer_traits>
+class iterator_buffer : public Traits, public buffer<T> {
+ private:
+  OutputIt out_;
+  enum { buffer_size = 256 };
+  T data_[buffer_size];
+
+  static FMT_CONSTEXPR void grow(buffer<T>& buf, size_t) {
+    if (buf.size() == buffer_size) static_cast<iterator_buffer&>(buf).flush();
+  }
+
+  void flush() {
+    auto size = this->size();
+    this->clear();
+    const T* begin = data_;
+    const T* end = begin + this->limit(size);
+    while (begin != end) *out_++ = *begin++;
+  }
+
+ public:
+  explicit iterator_buffer(OutputIt out, size_t n = buffer_size)
+      : Traits(n), buffer<T>(grow, data_, 0, buffer_size), out_(out) {}
+  iterator_buffer(iterator_buffer&& other) noexcept
+      : Traits(other),
+        buffer<T>(grow, data_, 0, buffer_size),
+        out_(other.out_) {}
+  ~iterator_buffer() {
+    // Don't crash if flush fails during unwinding.
+    FMT_TRY { flush(); }
+    FMT_CATCH(...) {}
+  }
+
+  auto out() -> OutputIt {
+    flush();
+    return out_;
+  }
+  auto count() const -> size_t { return Traits::count() + this->size(); }
+};
+
+template <typename T>
+class iterator_buffer<T*, T, fixed_buffer_traits> : public fixed_buffer_traits,
+                                                    public buffer<T> {
+ private:
+  T* out_;
+  enum { buffer_size = 256 };
+  T data_[buffer_size];
+
+  static FMT_CONSTEXPR void grow(buffer<T>& buf, size_t) {
+    if (buf.size() == buf.capacity())
+      static_cast<iterator_buffer&>(buf).flush();
+  }
+
+  void flush() {
+    size_t n = this->limit(this->size());
+    if (this->data() == out_) {
+      out_ += n;
+      this->set(data_, buffer_size);
+    }
+    this->clear();
+  }
+
+ public:
+  explicit iterator_buffer(T* out, size_t n = buffer_size)
+      : fixed_buffer_traits(n), buffer<T>(grow, out, 0, n), out_(out) {}
+  iterator_buffer(iterator_buffer&& other) noexcept
+      : fixed_buffer_traits(other),
+        buffer<T>(static_cast<iterator_buffer&&>(other)),
+        out_(other.out_) {
+    if (this->data() != out_) {
+      this->set(data_, buffer_size);
+      this->clear();
+    }
+  }
+  ~iterator_buffer() { flush(); }
+
+  auto out() -> T* {
+    flush();
+    return out_;
+  }
+  auto count() const -> size_t {
+    return fixed_buffer_traits::count() + this->size();
+  }
+};
+
+template <typename T> class iterator_buffer<T*, T> : public buffer<T> {
+ public:
+  explicit iterator_buffer(T* out, size_t = 0)
+      : buffer<T>([](buffer<T>&, size_t) {}, out, 0, ~size_t()) {}
+
+  auto out() -> T* { return &*this->end(); }
+};
+
+template <typename Container>
+class container_buffer : public buffer<typename Container::value_type> {
+ private:
+  using value_type = typename Container::value_type;
+
+  static FMT_CONSTEXPR void grow(buffer<value_type>& buf, size_t capacity) {
+    auto& self = static_cast<container_buffer&>(buf);
+    self.container.resize(capacity);
+    self.set(&self.container[0], capacity);
+  }
+
+ public:
+  Container& container;
+
+  explicit container_buffer(Container& c)
+      : buffer<value_type>(grow, c.size()), container(c) {}
+};
+
+// A buffer that writes to a container with the contiguous storage.
+template <typename OutputIt>
+class iterator_buffer<
+    OutputIt,
+    enable_if_t<is_back_insert_iterator<OutputIt>::value &&
+                    is_contiguous<typename OutputIt::container_type>::value,
+                typename OutputIt::container_type::value_type>>
+    : public container_buffer<typename OutputIt::container_type> {
+ private:
+  using base = container_buffer<typename OutputIt::container_type>;
+
+ public:
+  explicit iterator_buffer(typename OutputIt::container_type& c) : base(c) {}
+  explicit iterator_buffer(OutputIt out, size_t = 0)
+      : base(get_container(out)) {}
+
+  auto out() -> OutputIt { return OutputIt(this->container); }
+};
+
+// A buffer that counts the number of code units written discarding the output.
+template <typename T = char> class counting_buffer : public buffer<T> {
+ private:
+  enum { buffer_size = 256 };
+  T data_[buffer_size];
+  size_t count_ = 0;
+
+  static FMT_CONSTEXPR void grow(buffer<T>& buf, size_t) {
+    if (buf.size() != buffer_size) return;
+    static_cast<counting_buffer&>(buf).count_ += buf.size();
+    buf.clear();
+  }
+
+ public:
+  FMT_CONSTEXPR counting_buffer() : buffer<T>(grow, data_, 0, buffer_size) {}
+
+  constexpr auto count() const noexcept -> size_t {
+    return count_ + this->size();
+  }
+};
+
+template <typename T>
+struct is_back_insert_iterator<basic_appender<T>> : std::true_type {};
+
+template <typename OutputIt, typename InputIt, typename = void>
+struct has_back_insert_iterator_container_append : std::false_type {};
+template <typename OutputIt, typename InputIt>
+struct has_back_insert_iterator_container_append<
+    OutputIt, InputIt,
+    void_t<decltype(get_container(std::declval<OutputIt>())
+                        .append(std::declval<InputIt>(),
+                                std::declval<InputIt>()))>> : std::true_type {};
+
+template <typename OutputIt, typename InputIt, typename = void>
+struct has_back_insert_iterator_container_insert_at_end : std::false_type {};
+
+template <typename OutputIt, typename InputIt>
+struct has_back_insert_iterator_container_insert_at_end<
+    OutputIt, InputIt,
+    void_t<decltype(get_container(std::declval<OutputIt>())
+                        .insert(get_container(std::declval<OutputIt>()).end(),
+                                std::declval<InputIt>(),
+                                std::declval<InputIt>()))>> : std::true_type {};
+
+// An optimized version of std::copy with the output value type (T).
+template <typename T, typename InputIt, typename OutputIt,
+          FMT_ENABLE_IF(is_back_insert_iterator<OutputIt>::value&&
+                            has_back_insert_iterator_container_append<
+                                OutputIt, InputIt>::value)>
+FMT_CONSTEXPR20 auto copy(InputIt begin, InputIt end, OutputIt out)
+    -> OutputIt {
+  get_container(out).append(begin, end);
+  return out;
+}
+
+template <typename T, typename InputIt, typename OutputIt,
+          FMT_ENABLE_IF(is_back_insert_iterator<OutputIt>::value &&
+                        !has_back_insert_iterator_container_append<
+                            OutputIt, InputIt>::value &&
+                        has_back_insert_iterator_container_insert_at_end<
+                            OutputIt, InputIt>::value)>
+FMT_CONSTEXPR20 auto copy(InputIt begin, InputIt end, OutputIt out)
+    -> OutputIt {
+  auto& c = get_container(out);
+  c.insert(c.end(), begin, end);
+  return out;
+}
+
+template <typename T, typename InputIt, typename OutputIt,
+          FMT_ENABLE_IF(!(is_back_insert_iterator<OutputIt>::value &&
+                          (has_back_insert_iterator_container_append<
+                               OutputIt, InputIt>::value ||
+                           has_back_insert_iterator_container_insert_at_end<
+                               OutputIt, InputIt>::value)))>
+FMT_CONSTEXPR auto copy(InputIt begin, InputIt end, OutputIt out) -> OutputIt {
+  while (begin != end) *out++ = static_cast<T>(*begin++);
+  return out;
+}
+
+template <typename T, typename V, typename OutputIt>
+FMT_CONSTEXPR auto copy(basic_string_view<V> s, OutputIt out) -> OutputIt {
+  return copy<T>(s.begin(), s.end(), out);
+}
+
+template <typename It, typename Enable = std::true_type>
+struct is_buffer_appender : std::false_type {};
+template <typename It>
+struct is_buffer_appender<
+    It, bool_constant<
+            is_back_insert_iterator<It>::value &&
+            std::is_base_of<buffer<typename It::container_type::value_type>,
+                            typename It::container_type>::value>>
+    : std::true_type {};
+
+// Maps an output iterator to a buffer.
+template <typename T, typename OutputIt,
+          FMT_ENABLE_IF(!is_buffer_appender<OutputIt>::value)>
+auto get_buffer(OutputIt out) -> iterator_buffer<OutputIt, T> {
+  return iterator_buffer<OutputIt, T>(out);
+}
+template <typename T, typename OutputIt,
+          FMT_ENABLE_IF(is_buffer_appender<OutputIt>::value)>
+auto get_buffer(OutputIt out) -> buffer<T>& {
+  return get_container(out);
+}
+
+template <typename Buf, typename OutputIt>
+auto get_iterator(Buf& buf, OutputIt) -> decltype(buf.out()) {
+  return buf.out();
+}
+template <typename T, typename OutputIt>
+auto get_iterator(buffer<T>&, OutputIt out) -> OutputIt {
+  return out;
+}
+
+// This type is intentionally undefined, only used for errors.
+template <typename T, typename Char> struct type_is_unformattable_for;
+
+template <typename Char> struct string_value {
+  const Char* data;
+  size_t size;
+  auto str() const -> basic_string_view<Char> { return {data, size}; }
+};
+
+template <typename Context> struct custom_value {
+  using char_type = typename Context::char_type;
+  void* value;
+  void (*format)(void* arg, parse_context<char_type>& parse_ctx, Context& ctx);
+};
+
+template <typename Char> struct named_arg_value {
+  const named_arg_info<Char>* data;
+  size_t size;
+};
+
+struct custom_tag {};
+
+#if !FMT_BUILTIN_TYPES
+#  define FMT_BUILTIN , monostate
+#else
+#  define FMT_BUILTIN
+#endif
+
+// A formatting argument value.
+template <typename Context> class value {
+ public:
+  using char_type = typename Context::char_type;
+
+  union {
+    monostate no_value;
+    int int_value;
+    unsigned uint_value;
+    long long long_long_value;
+    unsigned long long ulong_long_value;
+    int128_opt int128_value;
+    uint128_opt uint128_value;
+    bool bool_value;
+    char_type char_value;
+    float float_value;
+    double double_value;
+    long double long_double_value;
+    const void* pointer;
+    string_value<char_type> string;
+    custom_value<Context> custom;
+    named_arg_value<char_type> named_args;
+  };
+
+  constexpr FMT_INLINE value() : no_value() {}
+  constexpr FMT_INLINE value(signed char x) : int_value(x) {}
+  constexpr FMT_INLINE value(unsigned char x FMT_BUILTIN) : uint_value(x) {}
+  constexpr FMT_INLINE value(signed short x) : int_value(x) {}
+  constexpr FMT_INLINE value(unsigned short x FMT_BUILTIN) : uint_value(x) {}
+  constexpr FMT_INLINE value(int x) : int_value(x) {}
+  constexpr FMT_INLINE value(unsigned x FMT_BUILTIN) : uint_value(x) {}
+  FMT_CONSTEXPR FMT_INLINE value(long x FMT_BUILTIN) : value(long_type(x)) {}
+  FMT_CONSTEXPR FMT_INLINE value(unsigned long x FMT_BUILTIN)
+      : value(ulong_type(x)) {}
+  constexpr FMT_INLINE value(long long x FMT_BUILTIN) : long_long_value(x) {}
+  constexpr FMT_INLINE value(unsigned long long x FMT_BUILTIN)
+      : ulong_long_value(x) {}
+  FMT_INLINE value(int128_opt x FMT_BUILTIN) : int128_value(x) {}
+  FMT_INLINE value(uint128_opt x FMT_BUILTIN) : uint128_value(x) {}
+  constexpr FMT_INLINE value(bool x FMT_BUILTIN) : bool_value(x) {}
+
+  template <int N>
+  constexpr FMT_INLINE value(bitint<N> x FMT_BUILTIN) : long_long_value(x) {
+    static_assert(N <= 64, "unsupported _BitInt");
+  }
+  template <int N>
+  constexpr FMT_INLINE value(ubitint<N> x FMT_BUILTIN) : ulong_long_value(x) {
+    static_assert(N <= 64, "unsupported _BitInt");
+  }
+
+  template <typename T, FMT_ENABLE_IF(is_code_unit<T>::value)>
+  constexpr FMT_INLINE value(T x FMT_BUILTIN) : char_value(x) {
+    static_assert(
+        std::is_same<T, char>::value || std::is_same<T, char_type>::value,
+        "mixing character types is disallowed");
+  }
+
+  constexpr FMT_INLINE value(float x FMT_BUILTIN) : float_value(x) {}
+  constexpr FMT_INLINE value(double x FMT_BUILTIN) : double_value(x) {}
+  FMT_INLINE value(long double x FMT_BUILTIN) : long_double_value(x) {}
+
+  FMT_CONSTEXPR FMT_INLINE value(char_type* x FMT_BUILTIN) {
+    string.data = x;
+    if (is_constant_evaluated()) string.size = 0;
+  }
+  FMT_CONSTEXPR FMT_INLINE value(const char_type* x FMT_BUILTIN) {
+    string.data = x;
+    if (is_constant_evaluated()) string.size = 0;
+  }
+  template <typename T, typename C = char_t<T>,
+            FMT_ENABLE_IF(!std::is_pointer<T>::value)>
+  FMT_CONSTEXPR value(const T& x FMT_BUILTIN) {
+    static_assert(std::is_same<C, char_type>::value,
+                  "mixing character types is disallowed");
+    auto sv = to_string_view(x);
+    string.data = sv.data();
+    string.size = sv.size();
+  }
+  FMT_INLINE value(void* x FMT_BUILTIN) : pointer(x) {}
+  FMT_INLINE value(const void* x FMT_BUILTIN) : pointer(x) {}
+  FMT_INLINE value(volatile void* x FMT_BUILTIN)
+      : pointer(const_cast<const void*>(x)) {}
+  FMT_INLINE value(const volatile void* x FMT_BUILTIN)
+      : pointer(const_cast<const void*>(x)) {}
+  FMT_INLINE value(nullptr_t) : pointer(nullptr) {}
+
+  template <typename T, FMT_ENABLE_IF(std::is_pointer<T>::value ||
+                                      std::is_member_pointer<T>::value)>
+  value(const T&) {
+    // Formatting of arbitrary pointers is disallowed. If you want to format a
+    // pointer cast it to `void*` or `const void*`. In particular, this forbids
+    // formatting of `[const] volatile char*` printed as bool by iostreams.
+    static_assert(sizeof(T) == 0,
+                  "formatting of non-void pointers is disallowed");
+  }
+
+  template <typename T, FMT_ENABLE_IF(use_format_as<T>::value)>
+  value(const T& x) : value(format_as(x)) {}
+  template <typename T, FMT_ENABLE_IF(use_format_as_member<T>::value)>
+  value(const T& x) : value(formatter<T>::format_as(x)) {}
+
+  template <typename T, FMT_ENABLE_IF(is_named_arg<T>::value)>
+  value(const T& named_arg) : value(named_arg.value) {}
+
+  template <typename T,
+            FMT_ENABLE_IF(use_formatter<T>::value || !FMT_BUILTIN_TYPES)>
+  FMT_CONSTEXPR20 FMT_INLINE value(T& x) : value(x, custom_tag()) {}
+
+  FMT_ALWAYS_INLINE value(const named_arg_info<char_type>* args, size_t size)
+      : named_args{args, size} {}
+
+ private:
+  template <typename T, FMT_ENABLE_IF(has_formatter<T, char_type>())>
+  FMT_CONSTEXPR value(T& x, custom_tag) {
+    using value_type = remove_const_t<T>;
+    // T may overload operator& e.g. std::vector<bool>::reference in libc++.
+    if (!is_constant_evaluated()) {
+      custom.value =
+          const_cast<char*>(&reinterpret_cast<const volatile char&>(x));
+    } else {
+      custom.value = nullptr;
+#if defined(__cpp_if_constexpr)
+      if constexpr (std::is_same<decltype(&x), remove_reference_t<T>*>::value)
+        custom.value = const_cast<value_type*>(&x);
+#endif
+    }
+    custom.format = format_custom<value_type>;
+  }
+
+  template <typename T, FMT_ENABLE_IF(!has_formatter<T, char_type>())>
+  FMT_CONSTEXPR value(const T&, custom_tag) {
+    // Cannot format an argument; to make type T formattable provide a
+    // formatter<T> specialization: https://fmt.dev/latest/api.html#udt.
+    type_is_unformattable_for<T, char_type> _;
+  }
+
+  // Formats an argument of a custom type, such as a user-defined class.
+  template <typename T>
+  static void format_custom(void* arg, parse_context<char_type>& parse_ctx,
+                            Context& ctx) {
+    auto f = formatter<T, char_type>();
+    parse_ctx.advance_to(f.parse(parse_ctx));
+    using qualified_type =
+        conditional_t<has_formatter<const T, char_type>(), const T, T>;
+    // format must be const for compatibility with std::format and compilation.
+    const auto& cf = f;
+    ctx.advance_to(cf.format(*static_cast<qualified_type*>(arg), ctx));
+  }
+};
+
+enum { packed_arg_bits = 4 };
+// Maximum number of arguments with packed types.
+enum { max_packed_args = 62 / packed_arg_bits };
+enum : unsigned long long { is_unpacked_bit = 1ULL << 63 };
+enum : unsigned long long { has_named_args_bit = 1ULL << 62 };
+
+template <typename It, typename T, typename Enable = void>
+struct is_output_iterator : std::false_type {};
+
+template <> struct is_output_iterator<appender, char> : std::true_type {};
+
+template <typename It, typename T>
+struct is_output_iterator<
+    It, T,
+    enable_if_t<std::is_assignable<decltype(*std::declval<decay_t<It>&>()++),
+                                   T>::value>> : std::true_type {};
+
+template <typename> constexpr auto encode_types() -> unsigned long long {
+  return 0;
+}
+
+template <typename Context, typename First, typename... T>
+constexpr auto encode_types() -> unsigned long long {
+  return static_cast<unsigned>(stored_type_constant<First, Context>::value) |
+         (encode_types<Context, T...>() << packed_arg_bits);
+}
+
+template <typename Context, typename... T, size_t NUM_ARGS = sizeof...(T)>
+constexpr auto make_descriptor() -> unsigned long long {
+  return NUM_ARGS <= max_packed_args ? encode_types<Context, T...>()
+                                     : is_unpacked_bit | NUM_ARGS;
+}
+
+template <typename Context, int NUM_ARGS>
+using arg_t = conditional_t<NUM_ARGS <= max_packed_args, value<Context>,
+                            basic_format_arg<Context>>;
+
+template <typename Context, int NUM_ARGS, int NUM_NAMED_ARGS,
+          unsigned long long DESC>
+struct named_arg_store {
+  // args_[0].named_args points to named_args to avoid bloating format_args.
+  arg_t<Context, NUM_ARGS> args[1u + NUM_ARGS];
+  named_arg_info<typename Context::char_type>
+      named_args[static_cast<size_t>(NUM_NAMED_ARGS)];
+
+  template <typename... T>
+  FMT_CONSTEXPR FMT_ALWAYS_INLINE named_arg_store(T&... values)
+      : args{{named_args, NUM_NAMED_ARGS}, values...} {
+    int arg_index = 0, named_arg_index = 0;
+    FMT_APPLY_VARIADIC(
+        init_named_arg(named_args, arg_index, named_arg_index, values));
+  }
+
+  named_arg_store(named_arg_store&& rhs) {
+    args[0] = {named_args, NUM_NAMED_ARGS};
+    for (size_t i = 1; i < sizeof(args) / sizeof(*args); ++i)
+      args[i] = rhs.args[i];
+    for (size_t i = 0; i < NUM_NAMED_ARGS; ++i)
+      named_args[i] = rhs.named_args[i];
+  }
+
+  named_arg_store(const named_arg_store& rhs) = delete;
+  auto operator=(const named_arg_store& rhs) -> named_arg_store& = delete;
+  auto operator=(named_arg_store&& rhs) -> named_arg_store& = delete;
+  operator const arg_t<Context, NUM_ARGS>*() const { return args + 1; }
+};
+
+// An array of references to arguments. It can be implicitly converted to
+// `basic_format_args` for passing into type-erased formatting functions
+// such as `vformat`. It is a plain struct to reduce binary size in debug mode.
+template <typename Context, int NUM_ARGS, int NUM_NAMED_ARGS,
+          unsigned long long DESC>
+struct format_arg_store {
+  // +1 to workaround a bug in gcc 7.5 that causes duplicated-branches warning.
+  using type =
+      conditional_t<NUM_NAMED_ARGS == 0,
+                    arg_t<Context, NUM_ARGS>[max_of<size_t>(1, NUM_ARGS)],
+                    named_arg_store<Context, NUM_ARGS, NUM_NAMED_ARGS, DESC>>;
+  type args;
+};
+
+// TYPE can be different from type_constant<T>, e.g. for __float128.
+template <typename T, typename Char, type TYPE> struct native_formatter {
+ private:
+  dynamic_format_specs<Char> specs_;
+
+ public:
+  using nonlocking = void;
+
+  FMT_CONSTEXPR auto parse(parse_context<Char>& ctx) -> const Char* {
+    if (ctx.begin() == ctx.end() || *ctx.begin() == '}') return ctx.begin();
+    auto end = parse_format_specs(ctx.begin(), ctx.end(), specs_, ctx, TYPE);
+    if (const_check(TYPE == type::char_type)) check_char_specs(specs_);
+    return end;
+  }
+
+  template <type U = TYPE,
+            FMT_ENABLE_IF(U == type::string_type || U == type::cstring_type ||
+                          U == type::char_type)>
+  FMT_CONSTEXPR void set_debug_format(bool set = true) {
+    specs_.set_type(set ? presentation_type::debug : presentation_type::none);
+  }
+
+  FMT_PRAGMA_CLANG(diagnostic ignored "-Wundefined-inline")
+  template <typename FormatContext>
+  FMT_CONSTEXPR auto format(const T& val, FormatContext& ctx) const
+      -> decltype(ctx.out());
+};
+
+template <typename T, typename Enable = void>
+struct locking
+    : bool_constant<mapped_type_constant<T>::value == type::custom_type> {};
+template <typename T>
+struct locking<T, void_t<typename formatter<remove_cvref_t<T>>::nonlocking>>
+    : std::false_type {};
+
+template <typename T = int> FMT_CONSTEXPR inline auto is_locking() -> bool {
+  return locking<T>::value;
+}
+template <typename T1, typename T2, typename... Tail>
+FMT_CONSTEXPR inline auto is_locking() -> bool {
+  return locking<T1>::value || is_locking<T2, Tail...>();
+}
+
+FMT_API void vformat_to(buffer<char>& buf, string_view fmt, format_args args,
+                        locale_ref loc = {});
+
+#if FMT_WIN32
+FMT_API void vprint_mojibake(FILE*, string_view, format_args, bool);
+#else  // format_args is passed by reference since it is defined later.
+inline void vprint_mojibake(FILE*, string_view, const format_args&, bool) {}
+#endif
+}  // namespace detail
+
+// The main public API.
+
+template <typename Char>
+FMT_CONSTEXPR void parse_context<Char>::do_check_arg_id(int arg_id) {
+  // Argument id is only checked at compile time during parsing because
+  // formatting has its own validation.
+  if (detail::is_constant_evaluated() && use_constexpr_cast) {
+    auto ctx = static_cast<detail::compile_parse_context<Char>*>(this);
+    if (arg_id >= ctx->num_args()) report_error("argument not found");
+  }
+}
+
+template <typename Char>
+FMT_CONSTEXPR void parse_context<Char>::check_dynamic_spec(int arg_id) {
+  using detail::compile_parse_context;
+  if (detail::is_constant_evaluated() && use_constexpr_cast)
+    static_cast<compile_parse_context<Char>*>(this)->check_dynamic_spec(arg_id);
+}
+
+FMT_BEGIN_EXPORT
+
+// An output iterator that appends to a buffer. It is used instead of
+// back_insert_iterator to reduce symbol sizes and avoid <iterator> dependency.
+template <typename T> class basic_appender {
+ protected:
+  detail::buffer<T>* container;
+
+ public:
+  using container_type = detail::buffer<T>;
+
+  FMT_CONSTEXPR basic_appender(detail::buffer<T>& buf) : container(&buf) {}
+
+  FMT_CONSTEXPR20 auto operator=(T c) -> basic_appender& {
+    container->push_back(c);
+    return *this;
+  }
+  FMT_CONSTEXPR20 auto operator*() -> basic_appender& { return *this; }
+  FMT_CONSTEXPR20 auto operator++() -> basic_appender& { return *this; }
+  FMT_CONSTEXPR20 auto operator++(int) -> basic_appender { return *this; }
+};
+
+// A formatting argument. Context is a template parameter for the compiled API
+// where output can be unbuffered.
+template <typename Context> class basic_format_arg {
+ private:
+  detail::value<Context> value_;
+  detail::type type_;
+
+  friend class basic_format_args<Context>;
+
+  using char_type = typename Context::char_type;
+
+ public:
+  class handle {
+   private:
+    detail::custom_value<Context> custom_;
+
+   public:
+    explicit handle(detail::custom_value<Context> custom) : custom_(custom) {}
+
+    void format(parse_context<char_type>& parse_ctx, Context& ctx) const {
+      custom_.format(custom_.value, parse_ctx, ctx);
+    }
+  };
+
+  constexpr basic_format_arg() : type_(detail::type::none_type) {}
+  basic_format_arg(const detail::named_arg_info<char_type>* args, size_t size)
+      : value_(args, size) {}
+  template <typename T>
+  basic_format_arg(T&& val)
+      : value_(val), type_(detail::stored_type_constant<T, Context>::value) {}
+
+  constexpr explicit operator bool() const noexcept {
+    return type_ != detail::type::none_type;
+  }
+  auto type() const -> detail::type { return type_; }
+
+  /**
+   * Visits an argument dispatching to the appropriate visit method based on
+   * the argument type. For example, if the argument type is `double` then
+   * `vis(value)` will be called with the value of type `double`.
+   */
+  template <typename Visitor>
+  FMT_CONSTEXPR FMT_INLINE auto visit(Visitor&& vis) const -> decltype(vis(0)) {
+    using detail::map;
+    switch (type_) {
+    case detail::type::none_type:        break;
+    case detail::type::int_type:         return vis(value_.int_value);
+    case detail::type::uint_type:        return vis(value_.uint_value);
+    case detail::type::long_long_type:   return vis(value_.long_long_value);
+    case detail::type::ulong_long_type:  return vis(value_.ulong_long_value);
+    case detail::type::int128_type:      return vis(map(value_.int128_value));
+    case detail::type::uint128_type:     return vis(map(value_.uint128_value));
+    case detail::type::bool_type:        return vis(value_.bool_value);
+    case detail::type::char_type:        return vis(value_.char_value);
+    case detail::type::float_type:       return vis(value_.float_value);
+    case detail::type::double_type:      return vis(value_.double_value);
+    case detail::type::long_double_type: return vis(value_.long_double_value);
+    case detail::type::cstring_type:     return vis(value_.string.data);
+    case detail::type::string_type:      return vis(value_.string.str());
+    case detail::type::pointer_type:     return vis(value_.pointer);
+    case detail::type::custom_type:      return vis(handle(value_.custom));
+    }
+    return vis(monostate());
+  }
+
+  auto format_custom(const char_type* parse_begin,
+                     parse_context<char_type>& parse_ctx, Context& ctx)
+      -> bool {
+    if (type_ != detail::type::custom_type) return false;
+    parse_ctx.advance_to(parse_begin);
+    value_.custom.format(value_.custom.value, parse_ctx, ctx);
+    return true;
+  }
+};
+
+/**
+ * A view of a collection of formatting arguments. To avoid lifetime issues it
+ * should only be used as a parameter type in type-erased functions such as
+ * `vformat`:
+ *
+ *     void vlog(fmt::string_view fmt, fmt::format_args args);  // OK
+ *     fmt::format_args args = fmt::make_format_args();  // Dangling reference
+ */
+template <typename Context> class basic_format_args {
+ private:
+  // A descriptor that contains information about formatting arguments.
+  // If the number of arguments is less or equal to max_packed_args then
+  // argument types are passed in the descriptor. This reduces binary code size
+  // per formatting function call.
+  unsigned long long desc_;
+  union {
+    // If is_packed() returns true then argument values are stored in values_;
+    // otherwise they are stored in args_. This is done to improve cache
+    // locality and reduce compiled code size since storing larger objects
+    // may require more code (at least on x86-64) even if the same amount of
+    // data is actually copied to stack. It saves ~10% on the bloat test.
+    const detail::value<Context>* values_;
+    const basic_format_arg<Context>* args_;
+  };
+
+  constexpr auto is_packed() const -> bool {
+    return (desc_ & detail::is_unpacked_bit) == 0;
+  }
+  constexpr auto has_named_args() const -> bool {
+    return (desc_ & detail::has_named_args_bit) != 0;
+  }
+
+  FMT_CONSTEXPR auto type(int index) const -> detail::type {
+    int shift = index * detail::packed_arg_bits;
+    unsigned mask = (1 << detail::packed_arg_bits) - 1;
+    return static_cast<detail::type>((desc_ >> shift) & mask);
+  }
+
+  template <int NUM_ARGS, int NUM_NAMED_ARGS, unsigned long long DESC>
+  using store =
+      detail::format_arg_store<Context, NUM_ARGS, NUM_NAMED_ARGS, DESC>;
+
+ public:
+  using format_arg = basic_format_arg<Context>;
+
+  constexpr basic_format_args() : desc_(0), args_(nullptr) {}
+
+  /// Constructs a `basic_format_args` object from `format_arg_store`.
+  template <int NUM_ARGS, int NUM_NAMED_ARGS, unsigned long long DESC,
+            FMT_ENABLE_IF(NUM_ARGS <= detail::max_packed_args)>
+  constexpr FMT_ALWAYS_INLINE basic_format_args(
+      const store<NUM_ARGS, NUM_NAMED_ARGS, DESC>& s)
+      : desc_(DESC | (NUM_NAMED_ARGS != 0 ? +detail::has_named_args_bit : 0)),
+        values_(s.args) {}
+
+  template <int NUM_ARGS, int NUM_NAMED_ARGS, unsigned long long DESC,
+            FMT_ENABLE_IF(NUM_ARGS > detail::max_packed_args)>
+  constexpr basic_format_args(const store<NUM_ARGS, NUM_NAMED_ARGS, DESC>& s)
+      : desc_(DESC | (NUM_NAMED_ARGS != 0 ? +detail::has_named_args_bit : 0)),
+        args_(s.args) {}
+
+  /// Constructs a `basic_format_args` object from a dynamic list of arguments.
+  constexpr basic_format_args(const format_arg* args, int count,
+                              bool has_named = false)
+      : desc_(detail::is_unpacked_bit | detail::to_unsigned(count) |
+              (has_named ? +detail::has_named_args_bit : 0)),
+        args_(args) {}
+
+  /// Returns the argument with the specified id.
+  FMT_CONSTEXPR auto get(int id) const -> format_arg {
+    auto arg = format_arg();
+    if (!is_packed()) {
+      if (id < max_size()) arg = args_[id];
+      return arg;
+    }
+    if (static_cast<unsigned>(id) >= detail::max_packed_args) return arg;
+    arg.type_ = type(id);
+    if (arg.type_ != detail::type::none_type) arg.value_ = values_[id];
+    return arg;
+  }
+
+  template <typename Char>
+  auto get(basic_string_view<Char> name) const -> format_arg {
+    int id = get_id(name);
+    return id >= 0 ? get(id) : format_arg();
+  }
+
+  template <typename Char>
+  FMT_CONSTEXPR auto get_id(basic_string_view<Char> name) const -> int {
+    if (!has_named_args()) return -1;
+    const auto& named_args =
+        (is_packed() ? values_[-1] : args_[-1].value_).named_args;
+    for (size_t i = 0; i < named_args.size; ++i) {
+      if (named_args.data[i].name == name) return named_args.data[i].id;
+    }
+    return -1;
+  }
+
+  auto max_size() const -> int {
+    unsigned long long max_packed = detail::max_packed_args;
+    return static_cast<int>(is_packed() ? max_packed
+                                        : desc_ & ~detail::is_unpacked_bit);
+  }
+};
+
+// A formatting context.
+class context {
+ private:
+  appender out_;
+  format_args args_;
+  FMT_NO_UNIQUE_ADDRESS locale_ref loc_;
+
+ public:
+  using char_type = char;  ///< The character type for the output.
+  using iterator = appender;
+  using format_arg = basic_format_arg<context>;
+  enum { builtin_types = FMT_BUILTIN_TYPES };
+
+  /// Constructs a `context` object. References to the arguments are stored
+  /// in the object so make sure they have appropriate lifetimes.
+  FMT_CONSTEXPR context(iterator out, format_args args, locale_ref loc = {})
+      : out_(out), args_(args), loc_(loc) {}
+  context(context&&) = default;
+  context(const context&) = delete;
+  void operator=(const context&) = delete;
+
+  FMT_CONSTEXPR auto arg(int id) const -> format_arg { return args_.get(id); }
+  inline auto arg(string_view name) const -> format_arg {
+    return args_.get(name);
+  }
+  FMT_CONSTEXPR auto arg_id(string_view name) const -> int {
+    return args_.get_id(name);
+  }
+  auto args() const -> const format_args& { return args_; }
+
+  // Returns an iterator to the beginning of the output range.
+  FMT_CONSTEXPR auto out() const -> iterator { return out_; }
+
+  // Advances the begin iterator to `it`.
+  FMT_CONSTEXPR void advance_to(iterator) {}
+
+  FMT_CONSTEXPR auto locale() const -> locale_ref { return loc_; }
+};
+
+template <typename Char = char> struct runtime_format_string {
+  basic_string_view<Char> str;
+};
+
+/**
+ * Creates a runtime format string.
+ *
+ * **Example**:
+ *
+ *     // Check format string at runtime instead of compile-time.
+ *     fmt::print(fmt::runtime("{:d}"), "I am not a number");
+ */
+inline auto runtime(string_view s) -> runtime_format_string<> { return {{s}}; }
+
+/// A compile-time format string. Use `format_string` in the public API to
+/// prevent type deduction.
+template <typename... T> struct fstring {
+ private:
+  static constexpr int num_static_named_args =
+      detail::count_static_named_args<T...>();
+
+  using checker = detail::format_string_checker<
+      char, static_cast<int>(sizeof...(T)), num_static_named_args,
+      num_static_named_args != detail::count_named_args<T...>()>;
+
+  using arg_pack = detail::arg_pack<T...>;
+
+ public:
+  string_view str;
+  using t = fstring;
+
+  // Reports a compile-time error if S is not a valid format string for T.
+  template <size_t N>
+  FMT_CONSTEVAL FMT_ALWAYS_INLINE fstring(const char (&s)[N]) : str(s, N - 1) {
+    using namespace detail;
+    static_assert(count<(is_view<remove_cvref_t<T>>::value &&
+                         std::is_reference<T>::value)...>() == 0,
+                  "passing views as lvalues is disallowed");
+    if (FMT_USE_CONSTEVAL) parse_format_string<char>(s, checker(s, arg_pack()));
+#ifdef FMT_ENFORCE_COMPILE_STRING
+    static_assert(
+        FMT_USE_CONSTEVAL && sizeof(s) != 0,
+        "FMT_ENFORCE_COMPILE_STRING requires format strings to use FMT_STRING");
+#endif
+  }
+  template <typename S,
+            FMT_ENABLE_IF(std::is_convertible<const S&, string_view>::value)>
+  FMT_CONSTEVAL FMT_ALWAYS_INLINE fstring(const S& s) : str(s) {
+    auto sv = string_view(str);
+    if (FMT_USE_CONSTEVAL)
+      detail::parse_format_string<char>(sv, checker(sv, arg_pack()));
+#ifdef FMT_ENFORCE_COMPILE_STRING
+    static_assert(
+        FMT_USE_CONSTEVAL && sizeof(s) != 0,
+        "FMT_ENFORCE_COMPILE_STRING requires format strings to use FMT_STRING");
+#endif
+  }
+  template <typename S,
+            FMT_ENABLE_IF(std::is_base_of<detail::compile_string, S>::value&&
+                              std::is_same<typename S::char_type, char>::value)>
+  FMT_ALWAYS_INLINE fstring(const S&) : str(S()) {
+    FMT_CONSTEXPR auto sv = string_view(S());
+    FMT_CONSTEXPR int unused =
+        (parse_format_string(sv, checker(sv, arg_pack())), 0);
+    detail::ignore_unused(unused);
+  }
+  fstring(runtime_format_string<> fmt) : str(fmt.str) {}
+
+  // Returning by reference generates better code in debug mode.
+  FMT_ALWAYS_INLINE operator const string_view&() const { return str; }
+  auto get() const -> string_view { return str; }
+};
+
+template <typename... T> using format_string = typename fstring<T...>::t;
+
+template <typename T, typename Char = char>
+using is_formattable = bool_constant<!std::is_same<
+    detail::mapped_t<conditional_t<std::is_void<T>::value, int*, T>, Char>,
+    void>::value>;
+#ifdef __cpp_concepts
+template <typename T, typename Char = char>
+concept formattable = is_formattable<remove_reference_t<T>, Char>::value;
+#endif
+
+// A formatter specialization for natively supported types.
+template <typename T, typename Char>
+struct formatter<T, Char,
+                 enable_if_t<detail::type_constant<T, Char>::value !=
+                             detail::type::custom_type>>
+    : detail::native_formatter<T, Char, detail::type_constant<T, Char>::value> {
+};
+
+/**
+ * Constructs an object that stores references to arguments and can be
+ * implicitly converted to `format_args`. `Context` can be omitted in which case
+ * it defaults to `context`. See `arg` for lifetime considerations.
+ */
+// Take arguments by lvalue references to avoid some lifetime issues, e.g.
+//   auto args = make_format_args(std::string());
+template <typename Context = context, typename... T,
+          int NUM_ARGS = sizeof...(T),
+          int NUM_NAMED_ARGS = detail::count_named_args<T...>(),
+          unsigned long long DESC = detail::make_descriptor<Context, T...>()>
+constexpr FMT_ALWAYS_INLINE auto make_format_args(T&... args)
+    -> detail::format_arg_store<Context, NUM_ARGS, NUM_NAMED_ARGS, DESC> {
+  // Suppress warnings for pathological types convertible to detail::value.
+  FMT_PRAGMA_GCC(diagnostic ignored "-Wconversion")
+  return {{args...}};
+}
+
+template <typename... T>
+using vargs =
+    detail::format_arg_store<context, sizeof...(T),
+                             detail::count_named_args<T...>(),
+                             detail::make_descriptor<context, T...>()>;
+
+/**
+ * Returns a named argument to be used in a formatting function.
+ * It should only be used in a call to a formatting function.
+ *
+ * **Example**:
+ *
+ *     fmt::print("The answer is {answer}.", fmt::arg("answer", 42));
+ */
+template <typename Char, typename T>
+inline auto arg(const Char* name, const T& arg) -> detail::named_arg<Char, T> {
+  return {name, arg};
+}
+
+/// Formats a string and writes the output to `out`.
+template <typename OutputIt,
+          FMT_ENABLE_IF(detail::is_output_iterator<remove_cvref_t<OutputIt>,
+                                                   char>::value)>
+auto vformat_to(OutputIt&& out, string_view fmt, format_args args)
+    -> remove_cvref_t<OutputIt> {
+  auto&& buf = detail::get_buffer<char>(out);
+  detail::vformat_to(buf, fmt, args, {});
+  return detail::get_iterator(buf, out);
+}
+
+/**
+ * Formats `args` according to specifications in `fmt`, writes the result to
+ * the output iterator `out` and returns the iterator past the end of the output
+ * range. `format_to` does not append a terminating null character.
+ *
+ * **Example**:
+ *
+ *     auto out = std::vector<char>();
+ *     fmt::format_to(std::back_inserter(out), "{}", 42);
+ */
+template <typename OutputIt, typename... T,
+          FMT_ENABLE_IF(detail::is_output_iterator<remove_cvref_t<OutputIt>,
+                                                   char>::value)>
+FMT_INLINE auto format_to(OutputIt&& out, format_string<T...> fmt, T&&... args)
+    -> remove_cvref_t<OutputIt> {
+  return vformat_to(out, fmt.str, vargs<T...>{{args...}});
+}
+
+template <typename OutputIt> struct format_to_n_result {
+  /// Iterator past the end of the output range.
+  OutputIt out;
+  /// Total (not truncated) output size.
+  size_t size;
+};
+
+template <typename OutputIt, typename... T,
+          FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, char>::value)>
+auto vformat_to_n(OutputIt out, size_t n, string_view fmt, format_args args)
+    -> format_to_n_result<OutputIt> {
+  using traits = detail::fixed_buffer_traits;
+  auto buf = detail::iterator_buffer<OutputIt, char, traits>(out, n);
+  detail::vformat_to(buf, fmt, args, {});
+  return {buf.out(), buf.count()};
+}
+
+/**
+ * Formats `args` according to specifications in `fmt`, writes up to `n`
+ * characters of the result to the output iterator `out` and returns the total
+ * (not truncated) output size and the iterator past the end of the output
+ * range. `format_to_n` does not append a terminating null character.
+ */
+template <typename OutputIt, typename... T,
+          FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, char>::value)>
+FMT_INLINE auto format_to_n(OutputIt out, size_t n, format_string<T...> fmt,
+                            T&&... args) -> format_to_n_result<OutputIt> {
+  return vformat_to_n(out, n, fmt.str, vargs<T...>{{args...}});
+}
+
+struct format_to_result {
+  /// Pointer to just after the last successful write in the array.
+  char* out;
+  /// Specifies if the output was truncated.
+  bool truncated;
+
+  FMT_CONSTEXPR operator char*() const {
+    // Report truncation to prevent silent data loss.
+    if (truncated) report_error("output is truncated");
+    return out;
+  }
+};
+
+template <size_t N>
+auto vformat_to(char (&out)[N], string_view fmt, format_args args)
+    -> format_to_result {
+  auto result = vformat_to_n(out, N, fmt, args);
+  return {result.out, result.size > N};
+}
+
+template <size_t N, typename... T>
+FMT_INLINE auto format_to(char (&out)[N], format_string<T...> fmt, T&&... args)
+    -> format_to_result {
+  auto result = vformat_to_n(out, N, fmt.str, vargs<T...>{{args...}});
+  return {result.out, result.size > N};
+}
+
+/// Returns the number of chars in the output of `format(fmt, args...)`.
+template <typename... T>
+FMT_NODISCARD FMT_INLINE auto formatted_size(format_string<T...> fmt,
+                                             T&&... args) -> size_t {
+  auto buf = detail::counting_buffer<>();
+  detail::vformat_to(buf, fmt.str, vargs<T...>{{args...}}, {});
+  return buf.count();
+}
+
+FMT_API void vprint(string_view fmt, format_args args);
+FMT_API void vprint(FILE* f, string_view fmt, format_args args);
+FMT_API void vprintln(FILE* f, string_view fmt, format_args args);
+FMT_API void vprint_buffered(FILE* f, string_view fmt, format_args args);
+
+/**
+ * Formats `args` according to specifications in `fmt` and writes the output
+ * to `stdout`.
+ *
+ * **Example**:
+ *
+ *     fmt::print("The answer is {}.", 42);
+ */
+template <typename... T>
+FMT_INLINE void print(format_string<T...> fmt, T&&... args) {
+  vargs<T...> va = {{args...}};
+  if (detail::const_check(!detail::use_utf8))
+    return detail::vprint_mojibake(stdout, fmt.str, va, false);
+  return detail::is_locking<T...>() ? vprint_buffered(stdout, fmt.str, va)
+                                    : vprint(fmt.str, va);
+}
+
+/**
+ * Formats `args` according to specifications in `fmt` and writes the
+ * output to the file `f`.
+ *
+ * **Example**:
+ *
+ *     fmt::print(stderr, "Don't {}!", "panic");
+ */
+template <typename... T>
+FMT_INLINE void print(FILE* f, format_string<T...> fmt, T&&... args) {
+  vargs<T...> va = {{args...}};
+  if (detail::const_check(!detail::use_utf8))
+    return detail::vprint_mojibake(f, fmt.str, va, false);
+  return detail::is_locking<T...>() ? vprint_buffered(f, fmt.str, va)
+                                    : vprint(f, fmt.str, va);
+}
+
+/// Formats `args` according to specifications in `fmt` and writes the output
+/// to the file `f` followed by a newline.
+template <typename... T>
+FMT_INLINE void println(FILE* f, format_string<T...> fmt, T&&... args) {
+  vargs<T...> va = {{args...}};
+  return detail::const_check(detail::use_utf8)
+             ? vprintln(f, fmt.str, va)
+             : detail::vprint_mojibake(f, fmt.str, va, true);
+}
+
+/// Formats `args` according to specifications in `fmt` and writes the output
+/// to `stdout` followed by a newline.
+template <typename... T>
+FMT_INLINE void println(format_string<T...> fmt, T&&... args) {
+  return fmt::println(stdout, fmt, static_cast<T&&>(args)...);
+}
+
+FMT_PRAGMA_GCC(diagnostic pop)
+FMT_PRAGMA_CLANG(diagnostic pop)
+FMT_PRAGMA_GCC(pop_options)
+FMT_END_EXPORT
+FMT_END_NAMESPACE
+
+#ifdef FMT_HEADER_ONLY
+#  include "format.h"
+#endif
+#endif  // FMT_BASE_H_
diff --git a/csrc/vnpu_offload/include/spdlog/fmt/bundled/chrono.h b/csrc/vnpu_offload/include/spdlog/fmt/bundled/chrono.h
new file mode 100644
index 00000000..9fbeeed6
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/fmt/bundled/chrono.h
@@ -0,0 +1,2246 @@
+// Formatting library for C++ - chrono support
+//
+// Copyright (c) 2012 - present, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_CHRONO_H_
+#define FMT_CHRONO_H_
+
+#ifndef FMT_MODULE
+#  include <algorithm>
+#  include <chrono>
+#  include <cmath>    // std::isfinite
+#  include <cstring>  // std::memcpy
+#  include <ctime>
+#  include <iterator>
+#  include <locale>
+#  include <ostream>
+#  include <type_traits>
+#endif
+
+#include "format.h"
+
+FMT_BEGIN_NAMESPACE
+
+// Enable safe chrono durations, unless explicitly disabled.
+#ifndef FMT_SAFE_DURATION_CAST
+#  define FMT_SAFE_DURATION_CAST 1
+#endif
+#if FMT_SAFE_DURATION_CAST
+
+// For conversion between std::chrono::durations without undefined
+// behaviour or erroneous results.
+// This is a stripped down version of duration_cast, for inclusion in fmt.
+// See https://github.com/pauldreik/safe_duration_cast
+//
+// Copyright Paul Dreik 2019
+namespace safe_duration_cast {
+
+// DEPRECATED!
+template <typename To, typename From,
+          FMT_ENABLE_IF(!std::is_same<From, To>::value &&
+                        std::numeric_limits<From>::is_signed ==
+                            std::numeric_limits<To>::is_signed)>
+FMT_CONSTEXPR auto lossless_integral_conversion(const From from, int& ec)
+    -> To {
+  ec = 0;
+  using F = std::numeric_limits<From>;
+  using T = std::numeric_limits<To>;
+  static_assert(F::is_integer, "From must be integral");
+  static_assert(T::is_integer, "To must be integral");
+
+  // A and B are both signed, or both unsigned.
+  if (detail::const_check(F::digits <= T::digits)) {
+    // From fits in To without any problem.
+  } else {
+    // From does not always fit in To, resort to a dynamic check.
+    if (from < (T::min)() || from > (T::max)()) {
+      // outside range.
+      ec = 1;
+      return {};
+    }
+  }
+  return static_cast<To>(from);
+}
+
+/// Converts From to To, without loss. If the dynamic value of from
+/// can't be converted to To without loss, ec is set.
+template <typename To, typename From,
+          FMT_ENABLE_IF(!std::is_same<From, To>::value &&
+                        std::numeric_limits<From>::is_signed !=
+                            std::numeric_limits<To>::is_signed)>
+FMT_CONSTEXPR auto lossless_integral_conversion(const From from, int& ec)
+    -> To {
+  ec = 0;
+  using F = std::numeric_limits<From>;
+  using T = std::numeric_limits<To>;
+  static_assert(F::is_integer, "From must be integral");
+  static_assert(T::is_integer, "To must be integral");
+
+  if (detail::const_check(F::is_signed && !T::is_signed)) {
+    // From may be negative, not allowed!
+    if (fmt::detail::is_negative(from)) {
+      ec = 1;
+      return {};
+    }
+    // From is positive. Can it always fit in To?
+    if (detail::const_check(F::digits > T::digits) &&
+        from > static_cast<From>(detail::max_value<To>())) {
+      ec = 1;
+      return {};
+    }
+  }
+
+  if (detail::const_check(!F::is_signed && T::is_signed &&
+                          F::digits >= T::digits) &&
+      from > static_cast<From>(detail::max_value<To>())) {
+    ec = 1;
+    return {};
+  }
+  return static_cast<To>(from);  // Lossless conversion.
+}
+
+template <typename To, typename From,
+          FMT_ENABLE_IF(std::is_same<From, To>::value)>
+FMT_CONSTEXPR auto lossless_integral_conversion(const From from, int& ec)
+    -> To {
+  ec = 0;
+  return from;
+}  // function
+
+// clang-format off
+/**
+ * converts From to To if possible, otherwise ec is set.
+ *
+ * input                            |    output
+ * ---------------------------------|---------------
+ * NaN                              | NaN
+ * Inf                              | Inf
+ * normal, fits in output           | converted (possibly lossy)
+ * normal, does not fit in output   | ec is set
+ * subnormal                        | best effort
+ * -Inf                             | -Inf
+ */
+// clang-format on
+template <typename To, typename From,
+          FMT_ENABLE_IF(!std::is_same<From, To>::value)>
+FMT_CONSTEXPR auto safe_float_conversion(const From from, int& ec) -> To {
+  ec = 0;
+  using T = std::numeric_limits<To>;
+  static_assert(std::is_floating_point<From>::value, "From must be floating");
+  static_assert(std::is_floating_point<To>::value, "To must be floating");
+
+  // catch the only happy case
+  if (std::isfinite(from)) {
+    if (from >= T::lowest() && from <= (T::max)()) {
+      return static_cast<To>(from);
+    }
+    // not within range.
+    ec = 1;
+    return {};
+  }
+
+  // nan and inf will be preserved
+  return static_cast<To>(from);
+}  // function
+
+template <typename To, typename From,
+          FMT_ENABLE_IF(std::is_same<From, To>::value)>
+FMT_CONSTEXPR auto safe_float_conversion(const From from, int& ec) -> To {
+  ec = 0;
+  static_assert(std::is_floating_point<From>::value, "From must be floating");
+  return from;
+}
+
+/// Safe duration_cast between floating point durations
+template <typename To, typename FromRep, typename FromPeriod,
+          FMT_ENABLE_IF(std::is_floating_point<FromRep>::value),
+          FMT_ENABLE_IF(std::is_floating_point<typename To::rep>::value)>
+auto safe_duration_cast(std::chrono::duration<FromRep, FromPeriod> from,
+                        int& ec) -> To {
+  using From = std::chrono::duration<FromRep, FromPeriod>;
+  ec = 0;
+
+  // the basic idea is that we need to convert from count() in the from type
+  // to count() in the To type, by multiplying it with this:
+  struct Factor
+      : std::ratio_divide<typename From::period, typename To::period> {};
+
+  static_assert(Factor::num > 0, "num must be positive");
+  static_assert(Factor::den > 0, "den must be positive");
+
+  // the conversion is like this: multiply from.count() with Factor::num
+  // /Factor::den and convert it to To::rep, all this without
+  // overflow/underflow. let's start by finding a suitable type that can hold
+  // both To, From and Factor::num
+  using IntermediateRep =
+      typename std::common_type<typename From::rep, typename To::rep,
+                                decltype(Factor::num)>::type;
+
+  // force conversion of From::rep -> IntermediateRep to be safe,
+  // even if it will never happen be narrowing in this context.
+  IntermediateRep count =
+      safe_float_conversion<IntermediateRep>(from.count(), ec);
+  if (ec) {
+    return {};
+  }
+
+  // multiply with Factor::num without overflow or underflow
+  if (detail::const_check(Factor::num != 1)) {
+    constexpr auto max1 = detail::max_value<IntermediateRep>() /
+                          static_cast<IntermediateRep>(Factor::num);
+    if (count > max1) {
+      ec = 1;
+      return {};
+    }
+    constexpr auto min1 = std::numeric_limits<IntermediateRep>::lowest() /
+                          static_cast<IntermediateRep>(Factor::num);
+    if (count < min1) {
+      ec = 1;
+      return {};
+    }
+    count *= static_cast<IntermediateRep>(Factor::num);
+  }
+
+  // this can't go wrong, right? den>0 is checked earlier.
+  if (detail::const_check(Factor::den != 1)) {
+    using common_t = typename std::common_type<IntermediateRep, intmax_t>::type;
+    count /= static_cast<common_t>(Factor::den);
+  }
+
+  // convert to the to type, safely
+  using ToRep = typename To::rep;
+
+  const ToRep tocount = safe_float_conversion<ToRep>(count, ec);
+  if (ec) {
+    return {};
+  }
+  return To{tocount};
+}
+}  // namespace safe_duration_cast
+#endif
+
+namespace detail {
+
+// Check if std::chrono::utc_time is available.
+#ifdef FMT_USE_UTC_TIME
+// Use the provided definition.
+#elif defined(__cpp_lib_chrono)
+#  define FMT_USE_UTC_TIME (__cpp_lib_chrono >= 201907L)
+#else
+#  define FMT_USE_UTC_TIME 0
+#endif
+#if FMT_USE_UTC_TIME
+using utc_clock = std::chrono::utc_clock;
+#else
+struct utc_clock {
+  template <typename T> void to_sys(T);
+};
+#endif
+
+// Check if std::chrono::local_time is available.
+#ifdef FMT_USE_LOCAL_TIME
+// Use the provided definition.
+#elif defined(__cpp_lib_chrono)
+#  define FMT_USE_LOCAL_TIME (__cpp_lib_chrono >= 201907L)
+#else
+#  define FMT_USE_LOCAL_TIME 0
+#endif
+#if FMT_USE_LOCAL_TIME
+using local_t = std::chrono::local_t;
+#else
+struct local_t {};
+#endif
+
+}  // namespace detail
+
+template <typename Duration>
+using sys_time = std::chrono::time_point<std::chrono::system_clock, Duration>;
+
+template <typename Duration>
+using utc_time = std::chrono::time_point<detail::utc_clock, Duration>;
+
+template <class Duration>
+using local_time = std::chrono::time_point<detail::local_t, Duration>;
+
+namespace detail {
+
+// Prevents expansion of a preceding token as a function-style macro.
+// Usage: f FMT_NOMACRO()
+#define FMT_NOMACRO
+
+template <typename T = void> struct null {};
+inline auto gmtime_r(...) -> null<> { return null<>(); }
+inline auto gmtime_s(...) -> null<> { return null<>(); }
+
+// It is defined here and not in ostream.h because the latter has expensive
+// includes.
+template <typename StreamBuf> class formatbuf : public StreamBuf {
+ private:
+  using char_type = typename StreamBuf::char_type;
+  using streamsize = decltype(std::declval<StreamBuf>().sputn(nullptr, 0));
+  using int_type = typename StreamBuf::int_type;
+  using traits_type = typename StreamBuf::traits_type;
+
+  buffer<char_type>& buffer_;
+
+ public:
+  explicit formatbuf(buffer<char_type>& buf) : buffer_(buf) {}
+
+ protected:
+  // The put area is always empty. This makes the implementation simpler and has
+  // the advantage that the streambuf and the buffer are always in sync and
+  // sputc never writes into uninitialized memory. A disadvantage is that each
+  // call to sputc always results in a (virtual) call to overflow. There is no
+  // disadvantage here for sputn since this always results in a call to xsputn.
+
+  auto overflow(int_type ch) -> int_type override {
+    if (!traits_type::eq_int_type(ch, traits_type::eof()))
+      buffer_.push_back(static_cast<char_type>(ch));
+    return ch;
+  }
+
+  auto xsputn(const char_type* s, streamsize count) -> streamsize override {
+    buffer_.append(s, s + count);
+    return count;
+  }
+};
+
+inline auto get_classic_locale() -> const std::locale& {
+  static const auto& locale = std::locale::classic();
+  return locale;
+}
+
+template <typename CodeUnit> struct codecvt_result {
+  static constexpr size_t max_size = 32;
+  CodeUnit buf[max_size];
+  CodeUnit* end;
+};
+
+template <typename CodeUnit>
+void write_codecvt(codecvt_result<CodeUnit>& out, string_view in,
+                   const std::locale& loc) {
+  FMT_PRAGMA_CLANG(diagnostic push)
+  FMT_PRAGMA_CLANG(diagnostic ignored "-Wdeprecated")
+  auto& f = std::use_facet<std::codecvt<CodeUnit, char, std::mbstate_t>>(loc);
+  FMT_PRAGMA_CLANG(diagnostic pop)
+  auto mb = std::mbstate_t();
+  const char* from_next = nullptr;
+  auto result = f.in(mb, in.begin(), in.end(), from_next, std::begin(out.buf),
+                     std::end(out.buf), out.end);
+  if (result != std::codecvt_base::ok)
+    FMT_THROW(format_error("failed to format time"));
+}
+
+template <typename OutputIt>
+auto write_encoded_tm_str(OutputIt out, string_view in, const std::locale& loc)
+    -> OutputIt {
+  if (const_check(detail::use_utf8) && loc != get_classic_locale()) {
+    // char16_t and char32_t codecvts are broken in MSVC (linkage errors) and
+    // gcc-4.
+#if FMT_MSC_VERSION != 0 ||  \
+    (defined(__GLIBCXX__) && \
+     (!defined(_GLIBCXX_USE_DUAL_ABI) || _GLIBCXX_USE_DUAL_ABI == 0))
+    // The _GLIBCXX_USE_DUAL_ABI macro is always defined in libstdc++ from gcc-5
+    // and newer.
+    using code_unit = wchar_t;
+#else
+    using code_unit = char32_t;
+#endif
+
+    using unit_t = codecvt_result<code_unit>;
+    unit_t unit;
+    write_codecvt(unit, in, loc);
+    // In UTF-8 is used one to four one-byte code units.
+    auto u =
+        to_utf8<code_unit, basic_memory_buffer<char, unit_t::max_size * 4>>();
+    if (!u.convert({unit.buf, to_unsigned(unit.end - unit.buf)}))
+      FMT_THROW(format_error("failed to format time"));
+    return copy<char>(u.c_str(), u.c_str() + u.size(), out);
+  }
+  return copy<char>(in.data(), in.data() + in.size(), out);
+}
+
+template <typename Char, typename OutputIt,
+          FMT_ENABLE_IF(!std::is_same<Char, char>::value)>
+auto write_tm_str(OutputIt out, string_view sv, const std::locale& loc)
+    -> OutputIt {
+  codecvt_result<Char> unit;
+  write_codecvt(unit, sv, loc);
+  return copy<Char>(unit.buf, unit.end, out);
+}
+
+template <typename Char, typename OutputIt,
+          FMT_ENABLE_IF(std::is_same<Char, char>::value)>
+auto write_tm_str(OutputIt out, string_view sv, const std::locale& loc)
+    -> OutputIt {
+  return write_encoded_tm_str(out, sv, loc);
+}
+
+template <typename Char>
+inline void do_write(buffer<Char>& buf, const std::tm& time,
+                     const std::locale& loc, char format, char modifier) {
+  auto&& format_buf = formatbuf<std::basic_streambuf<Char>>(buf);
+  auto&& os = std::basic_ostream<Char>(&format_buf);
+  os.imbue(loc);
+  const auto& facet = std::use_facet<std::time_put<Char>>(loc);
+  auto end = facet.put(os, os, Char(' '), &time, format, modifier);
+  if (end.failed()) FMT_THROW(format_error("failed to format time"));
+}
+
+template <typename Char, typename OutputIt,
+          FMT_ENABLE_IF(!std::is_same<Char, char>::value)>
+auto write(OutputIt out, const std::tm& time, const std::locale& loc,
+           char format, char modifier = 0) -> OutputIt {
+  auto&& buf = get_buffer<Char>(out);
+  do_write<Char>(buf, time, loc, format, modifier);
+  return get_iterator(buf, out);
+}
+
+template <typename Char, typename OutputIt,
+          FMT_ENABLE_IF(std::is_same<Char, char>::value)>
+auto write(OutputIt out, const std::tm& time, const std::locale& loc,
+           char format, char modifier = 0) -> OutputIt {
+  auto&& buf = basic_memory_buffer<Char>();
+  do_write<char>(buf, time, loc, format, modifier);
+  return write_encoded_tm_str(out, string_view(buf.data(), buf.size()), loc);
+}
+
+template <typename T, typename U>
+using is_similar_arithmetic_type =
+    bool_constant<(std::is_integral<T>::value && std::is_integral<U>::value) ||
+                  (std::is_floating_point<T>::value &&
+                   std::is_floating_point<U>::value)>;
+
+FMT_NORETURN inline void throw_duration_error() {
+  FMT_THROW(format_error("cannot format duration"));
+}
+
+// Cast one integral duration to another with an overflow check.
+template <typename To, typename FromRep, typename FromPeriod,
+          FMT_ENABLE_IF(std::is_integral<FromRep>::value&&
+                            std::is_integral<typename To::rep>::value)>
+auto duration_cast(std::chrono::duration<FromRep, FromPeriod> from) -> To {
+#if !FMT_SAFE_DURATION_CAST
+  return std::chrono::duration_cast<To>(from);
+#else
+  // The conversion factor: to.count() == factor * from.count().
+  using factor = std::ratio_divide<FromPeriod, typename To::period>;
+
+  using common_rep = typename std::common_type<FromRep, typename To::rep,
+                                               decltype(factor::num)>::type;
+  common_rep count = from.count();  // This conversion is lossless.
+
+  // Multiply from.count() by factor and check for overflow.
+  if (const_check(factor::num != 1)) {
+    if (count > max_value<common_rep>() / factor::num) throw_duration_error();
+    const auto min = (std::numeric_limits<common_rep>::min)() / factor::num;
+    if (const_check(!std::is_unsigned<common_rep>::value) && count < min)
+      throw_duration_error();
+    count *= factor::num;
+  }
+  if (const_check(factor::den != 1)) count /= factor::den;
+  int ec = 0;
+  auto to =
+      To(safe_duration_cast::lossless_integral_conversion<typename To::rep>(
+          count, ec));
+  if (ec) throw_duration_error();
+  return to;
+#endif
+}
+
+template <typename To, typename FromRep, typename FromPeriod,
+          FMT_ENABLE_IF(std::is_floating_point<FromRep>::value&&
+                            std::is_floating_point<typename To::rep>::value)>
+auto duration_cast(std::chrono::duration<FromRep, FromPeriod> from) -> To {
+#if FMT_SAFE_DURATION_CAST
+  // Preserve infinity and NaN.
+  if (!isfinite(from.count())) return static_cast<To>(from.count());
+  // Throwing version of safe_duration_cast is only available for
+  // integer to integer or float to float casts.
+  int ec;
+  To to = safe_duration_cast::safe_duration_cast<To>(from, ec);
+  if (ec) throw_duration_error();
+  return to;
+#else
+  // Standard duration cast, may overflow.
+  return std::chrono::duration_cast<To>(from);
+#endif
+}
+
+template <typename To, typename FromRep, typename FromPeriod,
+          FMT_ENABLE_IF(
+              !is_similar_arithmetic_type<FromRep, typename To::rep>::value)>
+auto duration_cast(std::chrono::duration<FromRep, FromPeriod> from) -> To {
+  // Mixed integer <-> float cast is not supported by safe duration_cast.
+  return std::chrono::duration_cast<To>(from);
+}
+
+template <typename Duration>
+auto to_time_t(sys_time<Duration> time_point) -> std::time_t {
+  // Cannot use std::chrono::system_clock::to_time_t since this would first
+  // require a cast to std::chrono::system_clock::time_point, which could
+  // overflow.
+  return detail::duration_cast<std::chrono::duration<std::time_t>>(
+             time_point.time_since_epoch())
+      .count();
+}
+
+}  // namespace detail
+
+FMT_BEGIN_EXPORT
+
+/**
+ * Converts given time since epoch as `std::time_t` value into calendar time,
+ * expressed in Coordinated Universal Time (UTC). Unlike `std::gmtime`, this
+ * function is thread-safe on most platforms.
+ */
+inline auto gmtime(std::time_t time) -> std::tm {
+  struct dispatcher {
+    std::time_t time_;
+    std::tm tm_;
+
+    inline dispatcher(std::time_t t) : time_(t) {}
+
+    inline auto run() -> bool {
+      using namespace fmt::detail;
+      return handle(gmtime_r(&time_, &tm_));
+    }
+
+    inline auto handle(std::tm* tm) -> bool { return tm != nullptr; }
+
+    inline auto handle(detail::null<>) -> bool {
+      using namespace fmt::detail;
+      return fallback(gmtime_s(&tm_, &time_));
+    }
+
+    inline auto fallback(int res) -> bool { return res == 0; }
+
+#if !FMT_MSC_VERSION
+    inline auto fallback(detail::null<>) -> bool {
+      std::tm* tm = std::gmtime(&time_);
+      if (tm) tm_ = *tm;
+      return tm != nullptr;
+    }
+#endif
+  };
+  auto gt = dispatcher(time);
+  // Too big time values may be unsupported.
+  if (!gt.run()) FMT_THROW(format_error("time_t value out of range"));
+  return gt.tm_;
+}
+
+template <typename Duration>
+inline auto gmtime(sys_time<Duration> time_point) -> std::tm {
+  return gmtime(detail::to_time_t(time_point));
+}
+
+namespace detail {
+
+// Writes two-digit numbers a, b and c separated by sep to buf.
+// The method by Pavel Novikov based on
+// https://johnnylee-sde.github.io/Fast-unsigned-integer-to-time-string/.
+inline void write_digit2_separated(char* buf, unsigned a, unsigned b,
+                                   unsigned c, char sep) {
+  unsigned long long digits =
+      a | (b << 24) | (static_cast<unsigned long long>(c) << 48);
+  // Convert each value to BCD.
+  // We have x = a * 10 + b and we want to convert it to BCD y = a * 16 + b.
+  // The difference is
+  //   y - x = a * 6
+  // a can be found from x:
+  //   a = floor(x / 10)
+  // then
+  //   y = x + a * 6 = x + floor(x / 10) * 6
+  // floor(x / 10) is (x * 205) >> 11 (needs 16 bits).
+  digits += (((digits * 205) >> 11) & 0x000f00000f00000f) * 6;
+  // Put low nibbles to high bytes and high nibbles to low bytes.
+  digits = ((digits & 0x00f00000f00000f0) >> 4) |
+           ((digits & 0x000f00000f00000f) << 8);
+  auto usep = static_cast<unsigned long long>(sep);
+  // Add ASCII '0' to each digit byte and insert separators.
+  digits |= 0x3030003030003030 | (usep << 16) | (usep << 40);
+
+  constexpr size_t len = 8;
+  if (const_check(is_big_endian())) {
+    char tmp[len];
+    std::memcpy(tmp, &digits, len);
+    std::reverse_copy(tmp, tmp + len, buf);
+  } else {
+    std::memcpy(buf, &digits, len);
+  }
+}
+
+template <typename Period>
+FMT_CONSTEXPR inline auto get_units() -> const char* {
+  if (std::is_same<Period, std::atto>::value) return "as";
+  if (std::is_same<Period, std::femto>::value) return "fs";
+  if (std::is_same<Period, std::pico>::value) return "ps";
+  if (std::is_same<Period, std::nano>::value) return "ns";
+  if (std::is_same<Period, std::micro>::value)
+    return detail::use_utf8 ? "µs" : "us";
+  if (std::is_same<Period, std::milli>::value) return "ms";
+  if (std::is_same<Period, std::centi>::value) return "cs";
+  if (std::is_same<Period, std::deci>::value) return "ds";
+  if (std::is_same<Period, std::ratio<1>>::value) return "s";
+  if (std::is_same<Period, std::deca>::value) return "das";
+  if (std::is_same<Period, std::hecto>::value) return "hs";
+  if (std::is_same<Period, std::kilo>::value) return "ks";
+  if (std::is_same<Period, std::mega>::value) return "Ms";
+  if (std::is_same<Period, std::giga>::value) return "Gs";
+  if (std::is_same<Period, std::tera>::value) return "Ts";
+  if (std::is_same<Period, std::peta>::value) return "Ps";
+  if (std::is_same<Period, std::exa>::value) return "Es";
+  if (std::is_same<Period, std::ratio<60>>::value) return "min";
+  if (std::is_same<Period, std::ratio<3600>>::value) return "h";
+  if (std::is_same<Period, std::ratio<86400>>::value) return "d";
+  return nullptr;
+}
+
+enum class numeric_system {
+  standard,
+  // Alternative numeric system, e.g. 十二 instead of 12 in ja_JP locale.
+  alternative
+};
+
+// Glibc extensions for formatting numeric values.
+enum class pad_type {
+  // Pad a numeric result string with zeros (the default).
+  zero,
+  // Do not pad a numeric result string.
+  none,
+  // Pad a numeric result string with spaces.
+  space,
+};
+
+template <typename OutputIt>
+auto write_padding(OutputIt out, pad_type pad, int width) -> OutputIt {
+  if (pad == pad_type::none) return out;
+  return detail::fill_n(out, width, pad == pad_type::space ? ' ' : '0');
+}
+
+template <typename OutputIt>
+auto write_padding(OutputIt out, pad_type pad) -> OutputIt {
+  if (pad != pad_type::none) *out++ = pad == pad_type::space ? ' ' : '0';
+  return out;
+}
+
+// Parses a put_time-like format string and invokes handler actions.
+template <typename Char, typename Handler>
+FMT_CONSTEXPR auto parse_chrono_format(const Char* begin, const Char* end,
+                                       Handler&& handler) -> const Char* {
+  if (begin == end || *begin == '}') return begin;
+  if (*begin != '%') FMT_THROW(format_error("invalid format"));
+  auto ptr = begin;
+  while (ptr != end) {
+    pad_type pad = pad_type::zero;
+    auto c = *ptr;
+    if (c == '}') break;
+    if (c != '%') {
+      ++ptr;
+      continue;
+    }
+    if (begin != ptr) handler.on_text(begin, ptr);
+    ++ptr;  // consume '%'
+    if (ptr == end) FMT_THROW(format_error("invalid format"));
+    c = *ptr;
+    switch (c) {
+    case '_':
+      pad = pad_type::space;
+      ++ptr;
+      break;
+    case '-':
+      pad = pad_type::none;
+      ++ptr;
+      break;
+    }
+    if (ptr == end) FMT_THROW(format_error("invalid format"));
+    c = *ptr++;
+    switch (c) {
+    case '%': handler.on_text(ptr - 1, ptr); break;
+    case 'n': {
+      const Char newline[] = {'\n'};
+      handler.on_text(newline, newline + 1);
+      break;
+    }
+    case 't': {
+      const Char tab[] = {'\t'};
+      handler.on_text(tab, tab + 1);
+      break;
+    }
+    // Year:
+    case 'Y': handler.on_year(numeric_system::standard, pad); break;
+    case 'y': handler.on_short_year(numeric_system::standard); break;
+    case 'C': handler.on_century(numeric_system::standard); break;
+    case 'G': handler.on_iso_week_based_year(); break;
+    case 'g': handler.on_iso_week_based_short_year(); break;
+    // Day of the week:
+    case 'a': handler.on_abbr_weekday(); break;
+    case 'A': handler.on_full_weekday(); break;
+    case 'w': handler.on_dec0_weekday(numeric_system::standard); break;
+    case 'u': handler.on_dec1_weekday(numeric_system::standard); break;
+    // Month:
+    case 'b':
+    case 'h': handler.on_abbr_month(); break;
+    case 'B': handler.on_full_month(); break;
+    case 'm': handler.on_dec_month(numeric_system::standard, pad); break;
+    // Day of the year/month:
+    case 'U':
+      handler.on_dec0_week_of_year(numeric_system::standard, pad);
+      break;
+    case 'W':
+      handler.on_dec1_week_of_year(numeric_system::standard, pad);
+      break;
+    case 'V': handler.on_iso_week_of_year(numeric_system::standard, pad); break;
+    case 'j': handler.on_day_of_year(pad); break;
+    case 'd': handler.on_day_of_month(numeric_system::standard, pad); break;
+    case 'e':
+      handler.on_day_of_month(numeric_system::standard, pad_type::space);
+      break;
+    // Hour, minute, second:
+    case 'H': handler.on_24_hour(numeric_system::standard, pad); break;
+    case 'I': handler.on_12_hour(numeric_system::standard, pad); break;
+    case 'M': handler.on_minute(numeric_system::standard, pad); break;
+    case 'S': handler.on_second(numeric_system::standard, pad); break;
+    // Other:
+    case 'c': handler.on_datetime(numeric_system::standard); break;
+    case 'x': handler.on_loc_date(numeric_system::standard); break;
+    case 'X': handler.on_loc_time(numeric_system::standard); break;
+    case 'D': handler.on_us_date(); break;
+    case 'F': handler.on_iso_date(); break;
+    case 'r': handler.on_12_hour_time(); break;
+    case 'R': handler.on_24_hour_time(); break;
+    case 'T': handler.on_iso_time(); break;
+    case 'p': handler.on_am_pm(); break;
+    case 'Q': handler.on_duration_value(); break;
+    case 'q': handler.on_duration_unit(); break;
+    case 'z': handler.on_utc_offset(numeric_system::standard); break;
+    case 'Z': handler.on_tz_name(); break;
+    // Alternative representation:
+    case 'E': {
+      if (ptr == end) FMT_THROW(format_error("invalid format"));
+      c = *ptr++;
+      switch (c) {
+      case 'Y': handler.on_year(numeric_system::alternative, pad); break;
+      case 'y': handler.on_offset_year(); break;
+      case 'C': handler.on_century(numeric_system::alternative); break;
+      case 'c': handler.on_datetime(numeric_system::alternative); break;
+      case 'x': handler.on_loc_date(numeric_system::alternative); break;
+      case 'X': handler.on_loc_time(numeric_system::alternative); break;
+      case 'z': handler.on_utc_offset(numeric_system::alternative); break;
+      default:  FMT_THROW(format_error("invalid format"));
+      }
+      break;
+    }
+    case 'O':
+      if (ptr == end) FMT_THROW(format_error("invalid format"));
+      c = *ptr++;
+      switch (c) {
+      case 'y': handler.on_short_year(numeric_system::alternative); break;
+      case 'm': handler.on_dec_month(numeric_system::alternative, pad); break;
+      case 'U':
+        handler.on_dec0_week_of_year(numeric_system::alternative, pad);
+        break;
+      case 'W':
+        handler.on_dec1_week_of_year(numeric_system::alternative, pad);
+        break;
+      case 'V':
+        handler.on_iso_week_of_year(numeric_system::alternative, pad);
+        break;
+      case 'd':
+        handler.on_day_of_month(numeric_system::alternative, pad);
+        break;
+      case 'e':
+        handler.on_day_of_month(numeric_system::alternative, pad_type::space);
+        break;
+      case 'w': handler.on_dec0_weekday(numeric_system::alternative); break;
+      case 'u': handler.on_dec1_weekday(numeric_system::alternative); break;
+      case 'H': handler.on_24_hour(numeric_system::alternative, pad); break;
+      case 'I': handler.on_12_hour(numeric_system::alternative, pad); break;
+      case 'M': handler.on_minute(numeric_system::alternative, pad); break;
+      case 'S': handler.on_second(numeric_system::alternative, pad); break;
+      case 'z': handler.on_utc_offset(numeric_system::alternative); break;
+      default:  FMT_THROW(format_error("invalid format"));
+      }
+      break;
+    default: FMT_THROW(format_error("invalid format"));
+    }
+    begin = ptr;
+  }
+  if (begin != ptr) handler.on_text(begin, ptr);
+  return ptr;
+}
+
+template <typename Derived> struct null_chrono_spec_handler {
+  FMT_CONSTEXPR void unsupported() {
+    static_cast<Derived*>(this)->unsupported();
+  }
+  FMT_CONSTEXPR void on_year(numeric_system, pad_type) { unsupported(); }
+  FMT_CONSTEXPR void on_short_year(numeric_system) { unsupported(); }
+  FMT_CONSTEXPR void on_offset_year() { unsupported(); }
+  FMT_CONSTEXPR void on_century(numeric_system) { unsupported(); }
+  FMT_CONSTEXPR void on_iso_week_based_year() { unsupported(); }
+  FMT_CONSTEXPR void on_iso_week_based_short_year() { unsupported(); }
+  FMT_CONSTEXPR void on_abbr_weekday() { unsupported(); }
+  FMT_CONSTEXPR void on_full_weekday() { unsupported(); }
+  FMT_CONSTEXPR void on_dec0_weekday(numeric_system) { unsupported(); }
+  FMT_CONSTEXPR void on_dec1_weekday(numeric_system) { unsupported(); }
+  FMT_CONSTEXPR void on_abbr_month() { unsupported(); }
+  FMT_CONSTEXPR void on_full_month() { unsupported(); }
+  FMT_CONSTEXPR void on_dec_month(numeric_system, pad_type) { unsupported(); }
+  FMT_CONSTEXPR void on_dec0_week_of_year(numeric_system, pad_type) {
+    unsupported();
+  }
+  FMT_CONSTEXPR void on_dec1_week_of_year(numeric_system, pad_type) {
+    unsupported();
+  }
+  FMT_CONSTEXPR void on_iso_week_of_year(numeric_system, pad_type) {
+    unsupported();
+  }
+  FMT_CONSTEXPR void on_day_of_year(pad_type) { unsupported(); }
+  FMT_CONSTEXPR void on_day_of_month(numeric_system, pad_type) {
+    unsupported();
+  }
+  FMT_CONSTEXPR void on_24_hour(numeric_system) { unsupported(); }
+  FMT_CONSTEXPR void on_12_hour(numeric_system) { unsupported(); }
+  FMT_CONSTEXPR void on_minute(numeric_system) { unsupported(); }
+  FMT_CONSTEXPR void on_second(numeric_system) { unsupported(); }
+  FMT_CONSTEXPR void on_datetime(numeric_system) { unsupported(); }
+  FMT_CONSTEXPR void on_loc_date(numeric_system) { unsupported(); }
+  FMT_CONSTEXPR void on_loc_time(numeric_system) { unsupported(); }
+  FMT_CONSTEXPR void on_us_date() { unsupported(); }
+  FMT_CONSTEXPR void on_iso_date() { unsupported(); }
+  FMT_CONSTEXPR void on_12_hour_time() { unsupported(); }
+  FMT_CONSTEXPR void on_24_hour_time() { unsupported(); }
+  FMT_CONSTEXPR void on_iso_time() { unsupported(); }
+  FMT_CONSTEXPR void on_am_pm() { unsupported(); }
+  FMT_CONSTEXPR void on_duration_value() { unsupported(); }
+  FMT_CONSTEXPR void on_duration_unit() { unsupported(); }
+  FMT_CONSTEXPR void on_utc_offset(numeric_system) { unsupported(); }
+  FMT_CONSTEXPR void on_tz_name() { unsupported(); }
+};
+
+class tm_format_checker : public null_chrono_spec_handler<tm_format_checker> {
+ private:
+  bool has_timezone_ = false;
+
+ public:
+  constexpr explicit tm_format_checker(bool has_timezone)
+      : has_timezone_(has_timezone) {}
+
+  FMT_NORETURN inline void unsupported() {
+    FMT_THROW(format_error("no format"));
+  }
+
+  template <typename Char>
+  FMT_CONSTEXPR void on_text(const Char*, const Char*) {}
+  FMT_CONSTEXPR void on_year(numeric_system, pad_type) {}
+  FMT_CONSTEXPR void on_short_year(numeric_system) {}
+  FMT_CONSTEXPR void on_offset_year() {}
+  FMT_CONSTEXPR void on_century(numeric_system) {}
+  FMT_CONSTEXPR void on_iso_week_based_year() {}
+  FMT_CONSTEXPR void on_iso_week_based_short_year() {}
+  FMT_CONSTEXPR void on_abbr_weekday() {}
+  FMT_CONSTEXPR void on_full_weekday() {}
+  FMT_CONSTEXPR void on_dec0_weekday(numeric_system) {}
+  FMT_CONSTEXPR void on_dec1_weekday(numeric_system) {}
+  FMT_CONSTEXPR void on_abbr_month() {}
+  FMT_CONSTEXPR void on_full_month() {}
+  FMT_CONSTEXPR void on_dec_month(numeric_system, pad_type) {}
+  FMT_CONSTEXPR void on_dec0_week_of_year(numeric_system, pad_type) {}
+  FMT_CONSTEXPR void on_dec1_week_of_year(numeric_system, pad_type) {}
+  FMT_CONSTEXPR void on_iso_week_of_year(numeric_system, pad_type) {}
+  FMT_CONSTEXPR void on_day_of_year(pad_type) {}
+  FMT_CONSTEXPR void on_day_of_month(numeric_system, pad_type) {}
+  FMT_CONSTEXPR void on_24_hour(numeric_system, pad_type) {}
+  FMT_CONSTEXPR void on_12_hour(numeric_system, pad_type) {}
+  FMT_CONSTEXPR void on_minute(numeric_system, pad_type) {}
+  FMT_CONSTEXPR void on_second(numeric_system, pad_type) {}
+  FMT_CONSTEXPR void on_datetime(numeric_system) {}
+  FMT_CONSTEXPR void on_loc_date(numeric_system) {}
+  FMT_CONSTEXPR void on_loc_time(numeric_system) {}
+  FMT_CONSTEXPR void on_us_date() {}
+  FMT_CONSTEXPR void on_iso_date() {}
+  FMT_CONSTEXPR void on_12_hour_time() {}
+  FMT_CONSTEXPR void on_24_hour_time() {}
+  FMT_CONSTEXPR void on_iso_time() {}
+  FMT_CONSTEXPR void on_am_pm() {}
+  FMT_CONSTEXPR void on_utc_offset(numeric_system) {
+    if (!has_timezone_) FMT_THROW(format_error("no timezone"));
+  }
+  FMT_CONSTEXPR void on_tz_name() {
+    if (!has_timezone_) FMT_THROW(format_error("no timezone"));
+  }
+};
+
+inline auto tm_wday_full_name(int wday) -> const char* {
+  static constexpr const char* full_name_list[] = {
+      "Sunday",   "Monday", "Tuesday", "Wednesday",
+      "Thursday", "Friday", "Saturday"};
+  return wday >= 0 && wday <= 6 ? full_name_list[wday] : "?";
+}
+inline auto tm_wday_short_name(int wday) -> const char* {
+  static constexpr const char* short_name_list[] = {"Sun", "Mon", "Tue", "Wed",
+                                                    "Thu", "Fri", "Sat"};
+  return wday >= 0 && wday <= 6 ? short_name_list[wday] : "???";
+}
+
+inline auto tm_mon_full_name(int mon) -> const char* {
+  static constexpr const char* full_name_list[] = {
+      "January", "February", "March",     "April",   "May",      "June",
+      "July",    "August",   "September", "October", "November", "December"};
+  return mon >= 0 && mon <= 11 ? full_name_list[mon] : "?";
+}
+inline auto tm_mon_short_name(int mon) -> const char* {
+  static constexpr const char* short_name_list[] = {
+      "Jan", "Feb", "Mar", "Apr", "May", "Jun",
+      "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
+  };
+  return mon >= 0 && mon <= 11 ? short_name_list[mon] : "???";
+}
+
+template <typename T, typename = void>
+struct has_tm_gmtoff : std::false_type {};
+template <typename T>
+struct has_tm_gmtoff<T, void_t<decltype(T::tm_gmtoff)>> : std::true_type {};
+
+template <typename T, typename = void> struct has_tm_zone : std::false_type {};
+template <typename T>
+struct has_tm_zone<T, void_t<decltype(T::tm_zone)>> : std::true_type {};
+
+template <typename T, FMT_ENABLE_IF(has_tm_zone<T>::value)>
+auto set_tm_zone(T& time, char* tz) -> bool {
+  time.tm_zone = tz;
+  return true;
+}
+template <typename T, FMT_ENABLE_IF(!has_tm_zone<T>::value)>
+auto set_tm_zone(T&, char*) -> bool {
+  return false;
+}
+
+inline auto utc() -> char* {
+  static char tz[] = "UTC";
+  return tz;
+}
+
+// Converts value to Int and checks that it's in the range [0, upper).
+template <typename T, typename Int, FMT_ENABLE_IF(std::is_integral<T>::value)>
+inline auto to_nonnegative_int(T value, Int upper) -> Int {
+  if (!std::is_unsigned<Int>::value &&
+      (value < 0 || to_unsigned(value) > to_unsigned(upper))) {
+    FMT_THROW(format_error("chrono value is out of range"));
+  }
+  return static_cast<Int>(value);
+}
+template <typename T, typename Int, FMT_ENABLE_IF(!std::is_integral<T>::value)>
+inline auto to_nonnegative_int(T value, Int upper) -> Int {
+  auto int_value = static_cast<Int>(value);
+  if (int_value < 0 || value > static_cast<T>(upper))
+    FMT_THROW(format_error("invalid value"));
+  return int_value;
+}
+
+constexpr auto pow10(std::uint32_t n) -> long long {
+  return n == 0 ? 1 : 10 * pow10(n - 1);
+}
+
+// Counts the number of fractional digits in the range [0, 18] according to the
+// C++20 spec. If more than 18 fractional digits are required then returns 6 for
+// microseconds precision.
+template <long long Num, long long Den, int N = 0,
+          bool Enabled = (N < 19) && (Num <= max_value<long long>() / 10)>
+struct count_fractional_digits {
+  static constexpr int value =
+      Num % Den == 0 ? N : count_fractional_digits<Num * 10, Den, N + 1>::value;
+};
+
+// Base case that doesn't instantiate any more templates
+// in order to avoid overflow.
+template <long long Num, long long Den, int N>
+struct count_fractional_digits<Num, Den, N, false> {
+  static constexpr int value = (Num % Den == 0) ? N : 6;
+};
+
+// Format subseconds which are given as an integer type with an appropriate
+// number of digits.
+template <typename Char, typename OutputIt, typename Duration>
+void write_fractional_seconds(OutputIt& out, Duration d, int precision = -1) {
+  constexpr auto num_fractional_digits =
+      count_fractional_digits<Duration::period::num,
+                              Duration::period::den>::value;
+
+  using subsecond_precision = std::chrono::duration<
+      typename std::common_type<typename Duration::rep,
+                                std::chrono::seconds::rep>::type,
+      std::ratio<1, pow10(num_fractional_digits)>>;
+
+  const auto fractional = d - detail::duration_cast<std::chrono::seconds>(d);
+  const auto subseconds =
+      std::chrono::treat_as_floating_point<
+          typename subsecond_precision::rep>::value
+          ? fractional.count()
+          : detail::duration_cast<subsecond_precision>(fractional).count();
+  auto n = static_cast<uint32_or_64_or_128_t<long long>>(subseconds);
+  const int num_digits = count_digits(n);
+
+  int leading_zeroes = (std::max)(0, num_fractional_digits - num_digits);
+  if (precision < 0) {
+    FMT_ASSERT(!std::is_floating_point<typename Duration::rep>::value, "");
+    if (std::ratio_less<typename subsecond_precision::period,
+                        std::chrono::seconds::period>::value) {
+      *out++ = '.';
+      out = detail::fill_n(out, leading_zeroes, '0');
+      out = format_decimal<Char>(out, n, num_digits);
+    }
+  } else if (precision > 0) {
+    *out++ = '.';
+    leading_zeroes = min_of(leading_zeroes, precision);
+    int remaining = precision - leading_zeroes;
+    out = detail::fill_n(out, leading_zeroes, '0');
+    if (remaining < num_digits) {
+      int num_truncated_digits = num_digits - remaining;
+      n /= to_unsigned(pow10(to_unsigned(num_truncated_digits)));
+      if (n != 0) out = format_decimal<Char>(out, n, remaining);
+      return;
+    }
+    if (n != 0) {
+      out = format_decimal<Char>(out, n, num_digits);
+      remaining -= num_digits;
+    }
+    out = detail::fill_n(out, remaining, '0');
+  }
+}
+
+// Format subseconds which are given as a floating point type with an
+// appropriate number of digits. We cannot pass the Duration here, as we
+// explicitly need to pass the Rep value in the duration_formatter.
+template <typename Duration>
+void write_floating_seconds(memory_buffer& buf, Duration duration,
+                            int num_fractional_digits = -1) {
+  using rep = typename Duration::rep;
+  FMT_ASSERT(std::is_floating_point<rep>::value, "");
+
+  auto val = duration.count();
+
+  if (num_fractional_digits < 0) {
+    // For `std::round` with fallback to `round`:
+    // On some toolchains `std::round` is not available (e.g. GCC 6).
+    using namespace std;
+    num_fractional_digits =
+        count_fractional_digits<Duration::period::num,
+                                Duration::period::den>::value;
+    if (num_fractional_digits < 6 && static_cast<rep>(round(val)) != val)
+      num_fractional_digits = 6;
+  }
+
+  fmt::format_to(std::back_inserter(buf), FMT_STRING("{:.{}f}"),
+                 std::fmod(val * static_cast<rep>(Duration::period::num) /
+                               static_cast<rep>(Duration::period::den),
+                           static_cast<rep>(60)),
+                 num_fractional_digits);
+}
+
+template <typename OutputIt, typename Char,
+          typename Duration = std::chrono::seconds>
+class tm_writer {
+ private:
+  static constexpr int days_per_week = 7;
+
+  const std::locale& loc_;
+  bool is_classic_;
+  OutputIt out_;
+  const Duration* subsecs_;
+  const std::tm& tm_;
+
+  auto tm_sec() const noexcept -> int {
+    FMT_ASSERT(tm_.tm_sec >= 0 && tm_.tm_sec <= 61, "");
+    return tm_.tm_sec;
+  }
+  auto tm_min() const noexcept -> int {
+    FMT_ASSERT(tm_.tm_min >= 0 && tm_.tm_min <= 59, "");
+    return tm_.tm_min;
+  }
+  auto tm_hour() const noexcept -> int {
+    FMT_ASSERT(tm_.tm_hour >= 0 && tm_.tm_hour <= 23, "");
+    return tm_.tm_hour;
+  }
+  auto tm_mday() const noexcept -> int {
+    FMT_ASSERT(tm_.tm_mday >= 1 && tm_.tm_mday <= 31, "");
+    return tm_.tm_mday;
+  }
+  auto tm_mon() const noexcept -> int {
+    FMT_ASSERT(tm_.tm_mon >= 0 && tm_.tm_mon <= 11, "");
+    return tm_.tm_mon;
+  }
+  auto tm_year() const noexcept -> long long { return 1900ll + tm_.tm_year; }
+  auto tm_wday() const noexcept -> int {
+    FMT_ASSERT(tm_.tm_wday >= 0 && tm_.tm_wday <= 6, "");
+    return tm_.tm_wday;
+  }
+  auto tm_yday() const noexcept -> int {
+    FMT_ASSERT(tm_.tm_yday >= 0 && tm_.tm_yday <= 365, "");
+    return tm_.tm_yday;
+  }
+
+  auto tm_hour12() const noexcept -> int {
+    auto h = tm_hour();
+    auto z = h < 12 ? h : h - 12;
+    return z == 0 ? 12 : z;
+  }
+
+  // POSIX and the C Standard are unclear or inconsistent about what %C and %y
+  // do if the year is negative or exceeds 9999. Use the convention that %C
+  // concatenated with %y yields the same output as %Y, and that %Y contains at
+  // least 4 characters, with more only if necessary.
+  auto split_year_lower(long long year) const noexcept -> int {
+    auto l = year % 100;
+    if (l < 0) l = -l;  // l in [0, 99]
+    return static_cast<int>(l);
+  }
+
+  // Algorithm: https://en.wikipedia.org/wiki/ISO_week_date.
+  auto iso_year_weeks(long long curr_year) const noexcept -> int {
+    auto prev_year = curr_year - 1;
+    auto curr_p =
+        (curr_year + curr_year / 4 - curr_year / 100 + curr_year / 400) %
+        days_per_week;
+    auto prev_p =
+        (prev_year + prev_year / 4 - prev_year / 100 + prev_year / 400) %
+        days_per_week;
+    return 52 + ((curr_p == 4 || prev_p == 3) ? 1 : 0);
+  }
+  auto iso_week_num(int tm_yday, int tm_wday) const noexcept -> int {
+    return (tm_yday + 11 - (tm_wday == 0 ? days_per_week : tm_wday)) /
+           days_per_week;
+  }
+  auto tm_iso_week_year() const noexcept -> long long {
+    auto year = tm_year();
+    auto w = iso_week_num(tm_yday(), tm_wday());
+    if (w < 1) return year - 1;
+    if (w > iso_year_weeks(year)) return year + 1;
+    return year;
+  }
+  auto tm_iso_week_of_year() const noexcept -> int {
+    auto year = tm_year();
+    auto w = iso_week_num(tm_yday(), tm_wday());
+    if (w < 1) return iso_year_weeks(year - 1);
+    if (w > iso_year_weeks(year)) return 1;
+    return w;
+  }
+
+  void write1(int value) {
+    *out_++ = static_cast<char>('0' + to_unsigned(value) % 10);
+  }
+  void write2(int value) {
+    const char* d = digits2(to_unsigned(value) % 100);
+    *out_++ = *d++;
+    *out_++ = *d;
+  }
+  void write2(int value, pad_type pad) {
+    unsigned int v = to_unsigned(value) % 100;
+    if (v >= 10) {
+      const char* d = digits2(v);
+      *out_++ = *d++;
+      *out_++ = *d;
+    } else {
+      out_ = detail::write_padding(out_, pad);
+      *out_++ = static_cast<char>('0' + v);
+    }
+  }
+
+  void write_year_extended(long long year, pad_type pad) {
+    // At least 4 characters.
+    int width = 4;
+    bool negative = year < 0;
+    if (negative) {
+      year = 0 - year;
+      --width;
+    }
+    uint32_or_64_or_128_t<long long> n = to_unsigned(year);
+    const int num_digits = count_digits(n);
+    if (negative && pad == pad_type::zero) *out_++ = '-';
+    if (width > num_digits)
+      out_ = detail::write_padding(out_, pad, width - num_digits);
+    if (negative && pad != pad_type::zero) *out_++ = '-';
+    out_ = format_decimal<Char>(out_, n, num_digits);
+  }
+  void write_year(long long year, pad_type pad) {
+    write_year_extended(year, pad);
+  }
+
+  void write_utc_offset(long long offset, numeric_system ns) {
+    if (offset < 0) {
+      *out_++ = '-';
+      offset = -offset;
+    } else {
+      *out_++ = '+';
+    }
+    offset /= 60;
+    write2(static_cast<int>(offset / 60));
+    if (ns != numeric_system::standard) *out_++ = ':';
+    write2(static_cast<int>(offset % 60));
+  }
+
+  template <typename T, FMT_ENABLE_IF(has_tm_gmtoff<T>::value)>
+  void format_utc_offset(const T& tm, numeric_system ns) {
+    write_utc_offset(tm.tm_gmtoff, ns);
+  }
+  template <typename T, FMT_ENABLE_IF(!has_tm_gmtoff<T>::value)>
+  void format_utc_offset(const T&, numeric_system ns) {
+    write_utc_offset(0, ns);
+  }
+
+  template <typename T, FMT_ENABLE_IF(has_tm_zone<T>::value)>
+  void format_tz_name(const T& tm) {
+    out_ = write_tm_str<Char>(out_, tm.tm_zone, loc_);
+  }
+  template <typename T, FMT_ENABLE_IF(!has_tm_zone<T>::value)>
+  void format_tz_name(const T&) {
+    out_ = std::copy_n(utc(), 3, out_);
+  }
+
+  void format_localized(char format, char modifier = 0) {
+    out_ = write<Char>(out_, tm_, loc_, format, modifier);
+  }
+
+ public:
+  tm_writer(const std::locale& loc, OutputIt out, const std::tm& tm,
+            const Duration* subsecs = nullptr)
+      : loc_(loc),
+        is_classic_(loc_ == get_classic_locale()),
+        out_(out),
+        subsecs_(subsecs),
+        tm_(tm) {}
+
+  auto out() const -> OutputIt { return out_; }
+
+  FMT_CONSTEXPR void on_text(const Char* begin, const Char* end) {
+    out_ = copy<Char>(begin, end, out_);
+  }
+
+  void on_abbr_weekday() {
+    if (is_classic_)
+      out_ = write(out_, tm_wday_short_name(tm_wday()));
+    else
+      format_localized('a');
+  }
+  void on_full_weekday() {
+    if (is_classic_)
+      out_ = write(out_, tm_wday_full_name(tm_wday()));
+    else
+      format_localized('A');
+  }
+  void on_dec0_weekday(numeric_system ns) {
+    if (is_classic_ || ns == numeric_system::standard) return write1(tm_wday());
+    format_localized('w', 'O');
+  }
+  void on_dec1_weekday(numeric_system ns) {
+    if (is_classic_ || ns == numeric_system::standard) {
+      auto wday = tm_wday();
+      write1(wday == 0 ? days_per_week : wday);
+    } else {
+      format_localized('u', 'O');
+    }
+  }
+
+  void on_abbr_month() {
+    if (is_classic_)
+      out_ = write(out_, tm_mon_short_name(tm_mon()));
+    else
+      format_localized('b');
+  }
+  void on_full_month() {
+    if (is_classic_)
+      out_ = write(out_, tm_mon_full_name(tm_mon()));
+    else
+      format_localized('B');
+  }
+
+  void on_datetime(numeric_system ns) {
+    if (is_classic_) {
+      on_abbr_weekday();
+      *out_++ = ' ';
+      on_abbr_month();
+      *out_++ = ' ';
+      on_day_of_month(numeric_system::standard, pad_type::space);
+      *out_++ = ' ';
+      on_iso_time();
+      *out_++ = ' ';
+      on_year(numeric_system::standard, pad_type::space);
+    } else {
+      format_localized('c', ns == numeric_system::standard ? '\0' : 'E');
+    }
+  }
+  void on_loc_date(numeric_system ns) {
+    if (is_classic_)
+      on_us_date();
+    else
+      format_localized('x', ns == numeric_system::standard ? '\0' : 'E');
+  }
+  void on_loc_time(numeric_system ns) {
+    if (is_classic_)
+      on_iso_time();
+    else
+      format_localized('X', ns == numeric_system::standard ? '\0' : 'E');
+  }
+  void on_us_date() {
+    char buf[8];
+    write_digit2_separated(buf, to_unsigned(tm_mon() + 1),
+                           to_unsigned(tm_mday()),
+                           to_unsigned(split_year_lower(tm_year())), '/');
+    out_ = copy<Char>(std::begin(buf), std::end(buf), out_);
+  }
+  void on_iso_date() {
+    auto year = tm_year();
+    char buf[10];
+    size_t offset = 0;
+    if (year >= 0 && year < 10000) {
+      write2digits(buf, static_cast<size_t>(year / 100));
+    } else {
+      offset = 4;
+      write_year_extended(year, pad_type::zero);
+      year = 0;
+    }
+    write_digit2_separated(buf + 2, static_cast<unsigned>(year % 100),
+                           to_unsigned(tm_mon() + 1), to_unsigned(tm_mday()),
+                           '-');
+    out_ = copy<Char>(std::begin(buf) + offset, std::end(buf), out_);
+  }
+
+  void on_utc_offset(numeric_system ns) { format_utc_offset(tm_, ns); }
+  void on_tz_name() { format_tz_name(tm_); }
+
+  void on_year(numeric_system ns, pad_type pad) {
+    if (is_classic_ || ns == numeric_system::standard)
+      return write_year(tm_year(), pad);
+    format_localized('Y', 'E');
+  }
+  void on_short_year(numeric_system ns) {
+    if (is_classic_ || ns == numeric_system::standard)
+      return write2(split_year_lower(tm_year()));
+    format_localized('y', 'O');
+  }
+  void on_offset_year() {
+    if (is_classic_) return write2(split_year_lower(tm_year()));
+    format_localized('y', 'E');
+  }
+
+  void on_century(numeric_system ns) {
+    if (is_classic_ || ns == numeric_system::standard) {
+      auto year = tm_year();
+      auto upper = year / 100;
+      if (year >= -99 && year < 0) {
+        // Zero upper on negative year.
+        *out_++ = '-';
+        *out_++ = '0';
+      } else if (upper >= 0 && upper < 100) {
+        write2(static_cast<int>(upper));
+      } else {
+        out_ = write<Char>(out_, upper);
+      }
+    } else {
+      format_localized('C', 'E');
+    }
+  }
+
+  void on_dec_month(numeric_system ns, pad_type pad) {
+    if (is_classic_ || ns == numeric_system::standard)
+      return write2(tm_mon() + 1, pad);
+    format_localized('m', 'O');
+  }
+
+  void on_dec0_week_of_year(numeric_system ns, pad_type pad) {
+    if (is_classic_ || ns == numeric_system::standard)
+      return write2((tm_yday() + days_per_week - tm_wday()) / days_per_week,
+                    pad);
+    format_localized('U', 'O');
+  }
+  void on_dec1_week_of_year(numeric_system ns, pad_type pad) {
+    if (is_classic_ || ns == numeric_system::standard) {
+      auto wday = tm_wday();
+      write2((tm_yday() + days_per_week -
+              (wday == 0 ? (days_per_week - 1) : (wday - 1))) /
+                 days_per_week,
+             pad);
+    } else {
+      format_localized('W', 'O');
+    }
+  }
+  void on_iso_week_of_year(numeric_system ns, pad_type pad) {
+    if (is_classic_ || ns == numeric_system::standard)
+      return write2(tm_iso_week_of_year(), pad);
+    format_localized('V', 'O');
+  }
+
+  void on_iso_week_based_year() {
+    write_year(tm_iso_week_year(), pad_type::zero);
+  }
+  void on_iso_week_based_short_year() {
+    write2(split_year_lower(tm_iso_week_year()));
+  }
+
+  void on_day_of_year(pad_type pad) {
+    auto yday = tm_yday() + 1;
+    auto digit1 = yday / 100;
+    if (digit1 != 0)
+      write1(digit1);
+    else
+      out_ = detail::write_padding(out_, pad);
+    write2(yday % 100, pad);
+  }
+
+  void on_day_of_month(numeric_system ns, pad_type pad) {
+    if (is_classic_ || ns == numeric_system::standard)
+      return write2(tm_mday(), pad);
+    format_localized('d', 'O');
+  }
+
+  void on_24_hour(numeric_system ns, pad_type pad) {
+    if (is_classic_ || ns == numeric_system::standard)
+      return write2(tm_hour(), pad);
+    format_localized('H', 'O');
+  }
+  void on_12_hour(numeric_system ns, pad_type pad) {
+    if (is_classic_ || ns == numeric_system::standard)
+      return write2(tm_hour12(), pad);
+    format_localized('I', 'O');
+  }
+  void on_minute(numeric_system ns, pad_type pad) {
+    if (is_classic_ || ns == numeric_system::standard)
+      return write2(tm_min(), pad);
+    format_localized('M', 'O');
+  }
+
+  void on_second(numeric_system ns, pad_type pad) {
+    if (is_classic_ || ns == numeric_system::standard) {
+      write2(tm_sec(), pad);
+      if (subsecs_) {
+        if (std::is_floating_point<typename Duration::rep>::value) {
+          auto buf = memory_buffer();
+          write_floating_seconds(buf, *subsecs_);
+          if (buf.size() > 1) {
+            // Remove the leading "0", write something like ".123".
+            out_ = copy<Char>(buf.begin() + 1, buf.end(), out_);
+          }
+        } else {
+          write_fractional_seconds<Char>(out_, *subsecs_);
+        }
+      }
+    } else {
+      // Currently no formatting of subseconds when a locale is set.
+      format_localized('S', 'O');
+    }
+  }
+
+  void on_12_hour_time() {
+    if (is_classic_) {
+      char buf[8];
+      write_digit2_separated(buf, to_unsigned(tm_hour12()),
+                             to_unsigned(tm_min()), to_unsigned(tm_sec()), ':');
+      out_ = copy<Char>(std::begin(buf), std::end(buf), out_);
+      *out_++ = ' ';
+      on_am_pm();
+    } else {
+      format_localized('r');
+    }
+  }
+  void on_24_hour_time() {
+    write2(tm_hour());
+    *out_++ = ':';
+    write2(tm_min());
+  }
+  void on_iso_time() {
+    on_24_hour_time();
+    *out_++ = ':';
+    on_second(numeric_system::standard, pad_type::zero);
+  }
+
+  void on_am_pm() {
+    if (is_classic_) {
+      *out_++ = tm_hour() < 12 ? 'A' : 'P';
+      *out_++ = 'M';
+    } else {
+      format_localized('p');
+    }
+  }
+
+  // These apply to chrono durations but not tm.
+  void on_duration_value() {}
+  void on_duration_unit() {}
+};
+
+struct chrono_format_checker : null_chrono_spec_handler<chrono_format_checker> {
+  bool has_precision_integral = false;
+
+  FMT_NORETURN inline void unsupported() { FMT_THROW(format_error("no date")); }
+
+  template <typename Char>
+  FMT_CONSTEXPR void on_text(const Char*, const Char*) {}
+  FMT_CONSTEXPR void on_day_of_year(pad_type) {}
+  FMT_CONSTEXPR void on_24_hour(numeric_system, pad_type) {}
+  FMT_CONSTEXPR void on_12_hour(numeric_system, pad_type) {}
+  FMT_CONSTEXPR void on_minute(numeric_system, pad_type) {}
+  FMT_CONSTEXPR void on_second(numeric_system, pad_type) {}
+  FMT_CONSTEXPR void on_12_hour_time() {}
+  FMT_CONSTEXPR void on_24_hour_time() {}
+  FMT_CONSTEXPR void on_iso_time() {}
+  FMT_CONSTEXPR void on_am_pm() {}
+  FMT_CONSTEXPR void on_duration_value() const {
+    if (has_precision_integral)
+      FMT_THROW(format_error("precision not allowed for this argument type"));
+  }
+  FMT_CONSTEXPR void on_duration_unit() {}
+};
+
+template <typename T,
+          FMT_ENABLE_IF(std::is_integral<T>::value&& has_isfinite<T>::value)>
+inline auto isfinite(T) -> bool {
+  return true;
+}
+
+template <typename T, FMT_ENABLE_IF(std::is_integral<T>::value)>
+inline auto mod(T x, int y) -> T {
+  return x % static_cast<T>(y);
+}
+template <typename T, FMT_ENABLE_IF(std::is_floating_point<T>::value)>
+inline auto mod(T x, int y) -> T {
+  return std::fmod(x, static_cast<T>(y));
+}
+
+// If T is an integral type, maps T to its unsigned counterpart, otherwise
+// leaves it unchanged (unlike std::make_unsigned).
+template <typename T, bool INTEGRAL = std::is_integral<T>::value>
+struct make_unsigned_or_unchanged {
+  using type = T;
+};
+
+template <typename T> struct make_unsigned_or_unchanged<T, true> {
+  using type = typename std::make_unsigned<T>::type;
+};
+
+template <typename Rep, typename Period,
+          FMT_ENABLE_IF(std::is_integral<Rep>::value)>
+inline auto get_milliseconds(std::chrono::duration<Rep, Period> d)
+    -> std::chrono::duration<Rep, std::milli> {
+  // This may overflow and/or the result may not fit in the target type.
+#if FMT_SAFE_DURATION_CAST
+  using common_seconds_type =
+      typename std::common_type<decltype(d), std::chrono::seconds>::type;
+  auto d_as_common = detail::duration_cast<common_seconds_type>(d);
+  auto d_as_whole_seconds =
+      detail::duration_cast<std::chrono::seconds>(d_as_common);
+  // This conversion should be nonproblematic.
+  auto diff = d_as_common - d_as_whole_seconds;
+  auto ms = detail::duration_cast<std::chrono::duration<Rep, std::milli>>(diff);
+  return ms;
+#else
+  auto s = detail::duration_cast<std::chrono::seconds>(d);
+  return detail::duration_cast<std::chrono::milliseconds>(d - s);
+#endif
+}
+
+template <typename Char, typename Rep, typename OutputIt,
+          FMT_ENABLE_IF(std::is_integral<Rep>::value)>
+auto format_duration_value(OutputIt out, Rep val, int) -> OutputIt {
+  return write<Char>(out, val);
+}
+
+template <typename Char, typename Rep, typename OutputIt,
+          FMT_ENABLE_IF(std::is_floating_point<Rep>::value)>
+auto format_duration_value(OutputIt out, Rep val, int precision) -> OutputIt {
+  auto specs = format_specs();
+  specs.precision = precision;
+  specs.set_type(precision >= 0 ? presentation_type::fixed
+                                : presentation_type::general);
+  return write<Char>(out, val, specs);
+}
+
+template <typename Char, typename OutputIt>
+auto copy_unit(string_view unit, OutputIt out, Char) -> OutputIt {
+  return copy<Char>(unit.begin(), unit.end(), out);
+}
+
+template <typename OutputIt>
+auto copy_unit(string_view unit, OutputIt out, wchar_t) -> OutputIt {
+  // This works when wchar_t is UTF-32 because units only contain characters
+  // that have the same representation in UTF-16 and UTF-32.
+  utf8_to_utf16 u(unit);
+  return copy<wchar_t>(u.c_str(), u.c_str() + u.size(), out);
+}
+
+template <typename Char, typename Period, typename OutputIt>
+auto format_duration_unit(OutputIt out) -> OutputIt {
+  if (const char* unit = get_units<Period>())
+    return copy_unit(string_view(unit), out, Char());
+  *out++ = '[';
+  out = write<Char>(out, Period::num);
+  if (const_check(Period::den != 1)) {
+    *out++ = '/';
+    out = write<Char>(out, Period::den);
+  }
+  *out++ = ']';
+  *out++ = 's';
+  return out;
+}
+
+class get_locale {
+ private:
+  union {
+    std::locale locale_;
+  };
+  bool has_locale_ = false;
+
+ public:
+  inline get_locale(bool localized, locale_ref loc) : has_locale_(localized) {
+    if (!localized) return;
+    ignore_unused(loc);
+    ::new (&locale_) std::locale(
+#if FMT_USE_LOCALE
+        loc.template get<std::locale>()
+#endif
+    );
+  }
+  inline ~get_locale() {
+    if (has_locale_) locale_.~locale();
+  }
+  inline operator const std::locale&() const {
+    return has_locale_ ? locale_ : get_classic_locale();
+  }
+};
+
+template <typename Char, typename Rep, typename Period>
+struct duration_formatter {
+  using iterator = basic_appender<Char>;
+  iterator out;
+  // rep is unsigned to avoid overflow.
+  using rep =
+      conditional_t<std::is_integral<Rep>::value && sizeof(Rep) < sizeof(int),
+                    unsigned, typename make_unsigned_or_unchanged<Rep>::type>;
+  rep val;
+  int precision;
+  locale_ref locale;
+  bool localized = false;
+  using seconds = std::chrono::duration<rep>;
+  seconds s;
+  using milliseconds = std::chrono::duration<rep, std::milli>;
+  bool negative;
+
+  using tm_writer_type = tm_writer<iterator, Char>;
+
+  duration_formatter(iterator o, std::chrono::duration<Rep, Period> d,
+                     locale_ref loc)
+      : out(o), val(static_cast<rep>(d.count())), locale(loc), negative(false) {
+    if (d.count() < 0) {
+      val = 0 - val;
+      negative = true;
+    }
+
+    // this may overflow and/or the result may not fit in the
+    // target type.
+    // might need checked conversion (rep!=Rep)
+    s = detail::duration_cast<seconds>(std::chrono::duration<rep, Period>(val));
+  }
+
+  // returns true if nan or inf, writes to out.
+  auto handle_nan_inf() -> bool {
+    if (isfinite(val)) return false;
+    if (isnan(val)) {
+      write_nan();
+      return true;
+    }
+    // must be +-inf
+    if (val > 0)
+      std::copy_n("inf", 3, out);
+    else
+      std::copy_n("-inf", 4, out);
+    return true;
+  }
+
+  auto days() const -> Rep { return static_cast<Rep>(s.count() / 86400); }
+  auto hour() const -> Rep {
+    return static_cast<Rep>(mod((s.count() / 3600), 24));
+  }
+
+  auto hour12() const -> Rep {
+    Rep hour = static_cast<Rep>(mod((s.count() / 3600), 12));
+    return hour <= 0 ? 12 : hour;
+  }
+
+  auto minute() const -> Rep {
+    return static_cast<Rep>(mod((s.count() / 60), 60));
+  }
+  auto second() const -> Rep { return static_cast<Rep>(mod(s.count(), 60)); }
+
+  auto time() const -> std::tm {
+    auto time = std::tm();
+    time.tm_hour = to_nonnegative_int(hour(), 24);
+    time.tm_min = to_nonnegative_int(minute(), 60);
+    time.tm_sec = to_nonnegative_int(second(), 60);
+    return time;
+  }
+
+  void write_sign() {
+    if (!negative) return;
+    *out++ = '-';
+    negative = false;
+  }
+
+  void write(Rep value, int width, pad_type pad = pad_type::zero) {
+    write_sign();
+    if (isnan(value)) return write_nan();
+    uint32_or_64_or_128_t<int> n =
+        to_unsigned(to_nonnegative_int(value, max_value<int>()));
+    int num_digits = detail::count_digits(n);
+    if (width > num_digits) {
+      out = detail::write_padding(out, pad, width - num_digits);
+    }
+    out = format_decimal<Char>(out, n, num_digits);
+  }
+
+  void write_nan() { std::copy_n("nan", 3, out); }
+
+  template <typename Callback, typename... Args>
+  void format_tm(const tm& time, Callback cb, Args... args) {
+    if (isnan(val)) return write_nan();
+    get_locale loc(localized, locale);
+    auto w = tm_writer_type(loc, out, time);
+    (w.*cb)(args...);
+    out = w.out();
+  }
+
+  void on_text(const Char* begin, const Char* end) {
+    copy<Char>(begin, end, out);
+  }
+
+  // These are not implemented because durations don't have date information.
+  void on_abbr_weekday() {}
+  void on_full_weekday() {}
+  void on_dec0_weekday(numeric_system) {}
+  void on_dec1_weekday(numeric_system) {}
+  void on_abbr_month() {}
+  void on_full_month() {}
+  void on_datetime(numeric_system) {}
+  void on_loc_date(numeric_system) {}
+  void on_loc_time(numeric_system) {}
+  void on_us_date() {}
+  void on_iso_date() {}
+  void on_utc_offset(numeric_system) {}
+  void on_tz_name() {}
+  void on_year(numeric_system, pad_type) {}
+  void on_short_year(numeric_system) {}
+  void on_offset_year() {}
+  void on_century(numeric_system) {}
+  void on_iso_week_based_year() {}
+  void on_iso_week_based_short_year() {}
+  void on_dec_month(numeric_system, pad_type) {}
+  void on_dec0_week_of_year(numeric_system, pad_type) {}
+  void on_dec1_week_of_year(numeric_system, pad_type) {}
+  void on_iso_week_of_year(numeric_system, pad_type) {}
+  void on_day_of_month(numeric_system, pad_type) {}
+
+  void on_day_of_year(pad_type) {
+    if (handle_nan_inf()) return;
+    write(days(), 0);
+  }
+
+  void on_24_hour(numeric_system ns, pad_type pad) {
+    if (handle_nan_inf()) return;
+
+    if (ns == numeric_system::standard) return write(hour(), 2, pad);
+    auto time = tm();
+    time.tm_hour = to_nonnegative_int(hour(), 24);
+    format_tm(time, &tm_writer_type::on_24_hour, ns, pad);
+  }
+
+  void on_12_hour(numeric_system ns, pad_type pad) {
+    if (handle_nan_inf()) return;
+
+    if (ns == numeric_system::standard) return write(hour12(), 2, pad);
+    auto time = tm();
+    time.tm_hour = to_nonnegative_int(hour12(), 12);
+    format_tm(time, &tm_writer_type::on_12_hour, ns, pad);
+  }
+
+  void on_minute(numeric_system ns, pad_type pad) {
+    if (handle_nan_inf()) return;
+
+    if (ns == numeric_system::standard) return write(minute(), 2, pad);
+    auto time = tm();
+    time.tm_min = to_nonnegative_int(minute(), 60);
+    format_tm(time, &tm_writer_type::on_minute, ns, pad);
+  }
+
+  void on_second(numeric_system ns, pad_type pad) {
+    if (handle_nan_inf()) return;
+
+    if (ns == numeric_system::standard) {
+      if (std::is_floating_point<rep>::value) {
+        auto buf = memory_buffer();
+        write_floating_seconds(buf, std::chrono::duration<rep, Period>(val),
+                               precision);
+        if (negative) *out++ = '-';
+        if (buf.size() < 2 || buf[1] == '.')
+          out = detail::write_padding(out, pad);
+        out = copy<Char>(buf.begin(), buf.end(), out);
+      } else {
+        write(second(), 2, pad);
+        write_fractional_seconds<Char>(
+            out, std::chrono::duration<rep, Period>(val), precision);
+      }
+      return;
+    }
+    auto time = tm();
+    time.tm_sec = to_nonnegative_int(second(), 60);
+    format_tm(time, &tm_writer_type::on_second, ns, pad);
+  }
+
+  void on_12_hour_time() {
+    if (handle_nan_inf()) return;
+    format_tm(time(), &tm_writer_type::on_12_hour_time);
+  }
+
+  void on_24_hour_time() {
+    if (handle_nan_inf()) {
+      *out++ = ':';
+      handle_nan_inf();
+      return;
+    }
+
+    write(hour(), 2);
+    *out++ = ':';
+    write(minute(), 2);
+  }
+
+  void on_iso_time() {
+    on_24_hour_time();
+    *out++ = ':';
+    if (handle_nan_inf()) return;
+    on_second(numeric_system::standard, pad_type::zero);
+  }
+
+  void on_am_pm() {
+    if (handle_nan_inf()) return;
+    format_tm(time(), &tm_writer_type::on_am_pm);
+  }
+
+  void on_duration_value() {
+    if (handle_nan_inf()) return;
+    write_sign();
+    out = format_duration_value<Char>(out, val, precision);
+  }
+
+  void on_duration_unit() { out = format_duration_unit<Char, Period>(out); }
+};
+
+}  // namespace detail
+
+#if defined(__cpp_lib_chrono) && __cpp_lib_chrono >= 201907
+using weekday = std::chrono::weekday;
+using day = std::chrono::day;
+using month = std::chrono::month;
+using year = std::chrono::year;
+using year_month_day = std::chrono::year_month_day;
+#else
+// A fallback version of weekday.
+class weekday {
+ private:
+  unsigned char value_;
+
+ public:
+  weekday() = default;
+  constexpr explicit weekday(unsigned wd) noexcept
+      : value_(static_cast<unsigned char>(wd != 7 ? wd : 0)) {}
+  constexpr auto c_encoding() const noexcept -> unsigned { return value_; }
+};
+
+class day {
+ private:
+  unsigned char value_;
+
+ public:
+  day() = default;
+  constexpr explicit day(unsigned d) noexcept
+      : value_(static_cast<unsigned char>(d)) {}
+  constexpr explicit operator unsigned() const noexcept { return value_; }
+};
+
+class month {
+ private:
+  unsigned char value_;
+
+ public:
+  month() = default;
+  constexpr explicit month(unsigned m) noexcept
+      : value_(static_cast<unsigned char>(m)) {}
+  constexpr explicit operator unsigned() const noexcept { return value_; }
+};
+
+class year {
+ private:
+  int value_;
+
+ public:
+  year() = default;
+  constexpr explicit year(int y) noexcept : value_(y) {}
+  constexpr explicit operator int() const noexcept { return value_; }
+};
+
+class year_month_day {
+ private:
+  fmt::year year_;
+  fmt::month month_;
+  fmt::day day_;
+
+ public:
+  year_month_day() = default;
+  constexpr year_month_day(const year& y, const month& m, const day& d) noexcept
+      : year_(y), month_(m), day_(d) {}
+  constexpr auto year() const noexcept -> fmt::year { return year_; }
+  constexpr auto month() const noexcept -> fmt::month { return month_; }
+  constexpr auto day() const noexcept -> fmt::day { return day_; }
+};
+#endif  // __cpp_lib_chrono >= 201907
+
+template <typename Char>
+struct formatter<weekday, Char> : private formatter<std::tm, Char> {
+ private:
+  bool use_tm_formatter_ = false;
+
+ public:
+  FMT_CONSTEXPR auto parse(parse_context<Char>& ctx) -> const Char* {
+    auto it = ctx.begin(), end = ctx.end();
+    if (it != end && *it == 'L') {
+      ++it;
+      this->set_localized();
+    }
+    use_tm_formatter_ = it != end && *it != '}';
+    return use_tm_formatter_ ? formatter<std::tm, Char>::parse(ctx) : it;
+  }
+
+  template <typename FormatContext>
+  auto format(weekday wd, FormatContext& ctx) const -> decltype(ctx.out()) {
+    auto time = std::tm();
+    time.tm_wday = static_cast<int>(wd.c_encoding());
+    if (use_tm_formatter_) return formatter<std::tm, Char>::format(time, ctx);
+    detail::get_locale loc(this->localized(), ctx.locale());
+    auto w = detail::tm_writer<decltype(ctx.out()), Char>(loc, ctx.out(), time);
+    w.on_abbr_weekday();
+    return w.out();
+  }
+};
+
+template <typename Char>
+struct formatter<day, Char> : private formatter<std::tm, Char> {
+ private:
+  bool use_tm_formatter_ = false;
+
+ public:
+  FMT_CONSTEXPR auto parse(parse_context<Char>& ctx) -> const Char* {
+    auto it = ctx.begin(), end = ctx.end();
+    use_tm_formatter_ = it != end && *it != '}';
+    return use_tm_formatter_ ? formatter<std::tm, Char>::parse(ctx) : it;
+  }
+
+  template <typename FormatContext>
+  auto format(day d, FormatContext& ctx) const -> decltype(ctx.out()) {
+    auto time = std::tm();
+    time.tm_mday = static_cast<int>(static_cast<unsigned>(d));
+    if (use_tm_formatter_) return formatter<std::tm, Char>::format(time, ctx);
+    detail::get_locale loc(false, ctx.locale());
+    auto w = detail::tm_writer<decltype(ctx.out()), Char>(loc, ctx.out(), time);
+    w.on_day_of_month(detail::numeric_system::standard, detail::pad_type::zero);
+    return w.out();
+  }
+};
+
+template <typename Char>
+struct formatter<month, Char> : private formatter<std::tm, Char> {
+ private:
+  bool use_tm_formatter_ = false;
+
+ public:
+  FMT_CONSTEXPR auto parse(parse_context<Char>& ctx) -> const Char* {
+    auto it = ctx.begin(), end = ctx.end();
+    if (it != end && *it == 'L') {
+      ++it;
+      this->set_localized();
+    }
+    use_tm_formatter_ = it != end && *it != '}';
+    return use_tm_formatter_ ? formatter<std::tm, Char>::parse(ctx) : it;
+  }
+
+  template <typename FormatContext>
+  auto format(month m, FormatContext& ctx) const -> decltype(ctx.out()) {
+    auto time = std::tm();
+    time.tm_mon = static_cast<int>(static_cast<unsigned>(m)) - 1;
+    if (use_tm_formatter_) return formatter<std::tm, Char>::format(time, ctx);
+    detail::get_locale loc(this->localized(), ctx.locale());
+    auto w = detail::tm_writer<decltype(ctx.out()), Char>(loc, ctx.out(), time);
+    w.on_abbr_month();
+    return w.out();
+  }
+};
+
+template <typename Char>
+struct formatter<year, Char> : private formatter<std::tm, Char> {
+ private:
+  bool use_tm_formatter_ = false;
+
+ public:
+  FMT_CONSTEXPR auto parse(parse_context<Char>& ctx) -> const Char* {
+    auto it = ctx.begin(), end = ctx.end();
+    use_tm_formatter_ = it != end && *it != '}';
+    return use_tm_formatter_ ? formatter<std::tm, Char>::parse(ctx) : it;
+  }
+
+  template <typename FormatContext>
+  auto format(year y, FormatContext& ctx) const -> decltype(ctx.out()) {
+    auto time = std::tm();
+    time.tm_year = static_cast<int>(y) - 1900;
+    if (use_tm_formatter_) return formatter<std::tm, Char>::format(time, ctx);
+    detail::get_locale loc(false, ctx.locale());
+    auto w = detail::tm_writer<decltype(ctx.out()), Char>(loc, ctx.out(), time);
+    w.on_year(detail::numeric_system::standard, detail::pad_type::zero);
+    return w.out();
+  }
+};
+
+template <typename Char>
+struct formatter<year_month_day, Char> : private formatter<std::tm, Char> {
+ private:
+  bool use_tm_formatter_ = false;
+
+ public:
+  FMT_CONSTEXPR auto parse(parse_context<Char>& ctx) -> const Char* {
+    auto it = ctx.begin(), end = ctx.end();
+    use_tm_formatter_ = it != end && *it != '}';
+    return use_tm_formatter_ ? formatter<std::tm, Char>::parse(ctx) : it;
+  }
+
+  template <typename FormatContext>
+  auto format(year_month_day val, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    auto time = std::tm();
+    time.tm_year = static_cast<int>(val.year()) - 1900;
+    time.tm_mon = static_cast<int>(static_cast<unsigned>(val.month())) - 1;
+    time.tm_mday = static_cast<int>(static_cast<unsigned>(val.day()));
+    if (use_tm_formatter_) return formatter<std::tm, Char>::format(time, ctx);
+    detail::get_locale loc(true, ctx.locale());
+    auto w = detail::tm_writer<decltype(ctx.out()), Char>(loc, ctx.out(), time);
+    w.on_iso_date();
+    return w.out();
+  }
+};
+
+template <typename Rep, typename Period, typename Char>
+struct formatter<std::chrono::duration<Rep, Period>, Char> {
+ private:
+  format_specs specs_;
+  detail::arg_ref<Char> width_ref_;
+  detail::arg_ref<Char> precision_ref_;
+  basic_string_view<Char> fmt_;
+
+ public:
+  FMT_CONSTEXPR auto parse(parse_context<Char>& ctx) -> const Char* {
+    auto it = ctx.begin(), end = ctx.end();
+    if (it == end || *it == '}') return it;
+
+    it = detail::parse_align(it, end, specs_);
+    if (it == end) return it;
+
+    Char c = *it;
+    if ((c >= '0' && c <= '9') || c == '{') {
+      it = detail::parse_width(it, end, specs_, width_ref_, ctx);
+      if (it == end) return it;
+    }
+
+    auto checker = detail::chrono_format_checker();
+    if (*it == '.') {
+      checker.has_precision_integral = !std::is_floating_point<Rep>::value;
+      it = detail::parse_precision(it, end, specs_, precision_ref_, ctx);
+    }
+    if (it != end && *it == 'L') {
+      specs_.set_localized();
+      ++it;
+    }
+    end = detail::parse_chrono_format(it, end, checker);
+    fmt_ = {it, detail::to_unsigned(end - it)};
+    return end;
+  }
+
+  template <typename FormatContext>
+  auto format(std::chrono::duration<Rep, Period> d, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    auto specs = specs_;
+    auto precision = specs.precision;
+    specs.precision = -1;
+    auto begin = fmt_.begin(), end = fmt_.end();
+    // As a possible future optimization, we could avoid extra copying if width
+    // is not specified.
+    auto buf = basic_memory_buffer<Char>();
+    auto out = basic_appender<Char>(buf);
+    detail::handle_dynamic_spec(specs.dynamic_width(), specs.width, width_ref_,
+                                ctx);
+    detail::handle_dynamic_spec(specs.dynamic_precision(), precision,
+                                precision_ref_, ctx);
+    if (begin == end || *begin == '}') {
+      out = detail::format_duration_value<Char>(out, d.count(), precision);
+      detail::format_duration_unit<Char, Period>(out);
+    } else {
+      auto f =
+          detail::duration_formatter<Char, Rep, Period>(out, d, ctx.locale());
+      f.precision = precision;
+      f.localized = specs_.localized();
+      detail::parse_chrono_format(begin, end, f);
+    }
+    return detail::write(
+        ctx.out(), basic_string_view<Char>(buf.data(), buf.size()), specs);
+  }
+};
+
+template <typename Char> struct formatter<std::tm, Char> {
+ private:
+  format_specs specs_;
+  detail::arg_ref<Char> width_ref_;
+  basic_string_view<Char> fmt_ =
+      detail::string_literal<Char, '%', 'F', ' ', '%', 'T'>();
+
+ protected:
+  auto localized() const -> bool { return specs_.localized(); }
+  FMT_CONSTEXPR void set_localized() { specs_.set_localized(); }
+
+  FMT_CONSTEXPR auto do_parse(parse_context<Char>& ctx, bool has_timezone)
+      -> const Char* {
+    auto it = ctx.begin(), end = ctx.end();
+    if (it == end || *it == '}') return it;
+
+    it = detail::parse_align(it, end, specs_);
+    if (it == end) return it;
+
+    Char c = *it;
+    if ((c >= '0' && c <= '9') || c == '{') {
+      it = detail::parse_width(it, end, specs_, width_ref_, ctx);
+      if (it == end) return it;
+    }
+
+    if (*it == 'L') {
+      specs_.set_localized();
+      ++it;
+    }
+
+    end = detail::parse_chrono_format(it, end,
+                                      detail::tm_format_checker(has_timezone));
+    // Replace the default format string only if the new spec is not empty.
+    if (end != it) fmt_ = {it, detail::to_unsigned(end - it)};
+    return end;
+  }
+
+  template <typename Duration, typename FormatContext>
+  auto do_format(const std::tm& tm, FormatContext& ctx,
+                 const Duration* subsecs) const -> decltype(ctx.out()) {
+    auto specs = specs_;
+    auto buf = basic_memory_buffer<Char>();
+    auto out = basic_appender<Char>(buf);
+    detail::handle_dynamic_spec(specs.dynamic_width(), specs.width, width_ref_,
+                                ctx);
+
+    auto loc_ref = specs.localized() ? ctx.locale() : locale_ref();
+    detail::get_locale loc(static_cast<bool>(loc_ref), loc_ref);
+    auto w = detail::tm_writer<basic_appender<Char>, Char, Duration>(
+        loc, out, tm, subsecs);
+    detail::parse_chrono_format(fmt_.begin(), fmt_.end(), w);
+    return detail::write(
+        ctx.out(), basic_string_view<Char>(buf.data(), buf.size()), specs);
+  }
+
+ public:
+  FMT_CONSTEXPR auto parse(parse_context<Char>& ctx) -> const Char* {
+    return do_parse(ctx, detail::has_tm_gmtoff<std::tm>::value);
+  }
+
+  template <typename FormatContext>
+  auto format(const std::tm& tm, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    return do_format<std::chrono::seconds>(tm, ctx, nullptr);
+  }
+};
+
+// DEPRECATED! Reversed order of template parameters.
+template <typename Char, typename Duration>
+struct formatter<sys_time<Duration>, Char> : private formatter<std::tm, Char> {
+  FMT_CONSTEXPR auto parse(parse_context<Char>& ctx) -> const Char* {
+    return this->do_parse(ctx, true);
+  }
+
+  template <typename FormatContext>
+  auto format(sys_time<Duration> val, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    std::tm tm = gmtime(val);
+    using period = typename Duration::period;
+    if (detail::const_check(
+            period::num == 1 && period::den == 1 &&
+            !std::is_floating_point<typename Duration::rep>::value)) {
+      detail::set_tm_zone(tm, detail::utc());
+      return formatter<std::tm, Char>::format(tm, ctx);
+    }
+    Duration epoch = val.time_since_epoch();
+    Duration subsecs = detail::duration_cast<Duration>(
+        epoch - detail::duration_cast<std::chrono::seconds>(epoch));
+    if (subsecs.count() < 0) {
+      auto second = detail::duration_cast<Duration>(std::chrono::seconds(1));
+      if (tm.tm_sec != 0) {
+        --tm.tm_sec;
+      } else {
+        tm = gmtime(val - second);
+        detail::set_tm_zone(tm, detail::utc());
+      }
+      subsecs += second;
+    }
+    return formatter<std::tm, Char>::do_format(tm, ctx, &subsecs);
+  }
+};
+
+template <typename Duration, typename Char>
+struct formatter<utc_time<Duration>, Char>
+    : formatter<sys_time<Duration>, Char> {
+  template <typename FormatContext>
+  auto format(utc_time<Duration> val, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    return formatter<sys_time<Duration>, Char>::format(
+        detail::utc_clock::to_sys(val), ctx);
+  }
+};
+
+template <typename Duration, typename Char>
+struct formatter<local_time<Duration>, Char>
+    : private formatter<std::tm, Char> {
+  FMT_CONSTEXPR auto parse(parse_context<Char>& ctx) -> const Char* {
+    return this->do_parse(ctx, false);
+  }
+
+  template <typename FormatContext>
+  auto format(local_time<Duration> val, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    auto time_since_epoch = val.time_since_epoch();
+    auto seconds_since_epoch =
+        detail::duration_cast<std::chrono::seconds>(time_since_epoch);
+    // Use gmtime to prevent time zone conversion since local_time has an
+    // unspecified time zone.
+    std::tm t = gmtime(seconds_since_epoch.count());
+    using period = typename Duration::period;
+    if (period::num == 1 && period::den == 1 &&
+        !std::is_floating_point<typename Duration::rep>::value) {
+      return formatter<std::tm, Char>::format(t, ctx);
+    }
+    auto subsecs =
+        detail::duration_cast<Duration>(time_since_epoch - seconds_since_epoch);
+    return formatter<std::tm, Char>::do_format(t, ctx, &subsecs);
+  }
+};
+
+FMT_END_EXPORT
+FMT_END_NAMESPACE
+
+#endif  // FMT_CHRONO_H_
diff --git a/csrc/vnpu_offload/include/spdlog/fmt/bundled/color.h b/csrc/vnpu_offload/include/spdlog/fmt/bundled/color.h
new file mode 100644
index 00000000..2cbc53ca
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/fmt/bundled/color.h
@@ -0,0 +1,637 @@
+// Formatting library for C++ - color support
+//
+// Copyright (c) 2018 - present, Victor Zverovich and fmt contributors
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_COLOR_H_
+#define FMT_COLOR_H_
+
+#include "format.h"
+
+FMT_BEGIN_NAMESPACE
+FMT_BEGIN_EXPORT
+
+enum class color : uint32_t {
+  alice_blue = 0xF0F8FF,               // rgb(240,248,255)
+  antique_white = 0xFAEBD7,            // rgb(250,235,215)
+  aqua = 0x00FFFF,                     // rgb(0,255,255)
+  aquamarine = 0x7FFFD4,               // rgb(127,255,212)
+  azure = 0xF0FFFF,                    // rgb(240,255,255)
+  beige = 0xF5F5DC,                    // rgb(245,245,220)
+  bisque = 0xFFE4C4,                   // rgb(255,228,196)
+  black = 0x000000,                    // rgb(0,0,0)
+  blanched_almond = 0xFFEBCD,          // rgb(255,235,205)
+  blue = 0x0000FF,                     // rgb(0,0,255)
+  blue_violet = 0x8A2BE2,              // rgb(138,43,226)
+  brown = 0xA52A2A,                    // rgb(165,42,42)
+  burly_wood = 0xDEB887,               // rgb(222,184,135)
+  cadet_blue = 0x5F9EA0,               // rgb(95,158,160)
+  chartreuse = 0x7FFF00,               // rgb(127,255,0)
+  chocolate = 0xD2691E,                // rgb(210,105,30)
+  coral = 0xFF7F50,                    // rgb(255,127,80)
+  cornflower_blue = 0x6495ED,          // rgb(100,149,237)
+  cornsilk = 0xFFF8DC,                 // rgb(255,248,220)
+  crimson = 0xDC143C,                  // rgb(220,20,60)
+  cyan = 0x00FFFF,                     // rgb(0,255,255)
+  dark_blue = 0x00008B,                // rgb(0,0,139)
+  dark_cyan = 0x008B8B,                // rgb(0,139,139)
+  dark_golden_rod = 0xB8860B,          // rgb(184,134,11)
+  dark_gray = 0xA9A9A9,                // rgb(169,169,169)
+  dark_green = 0x006400,               // rgb(0,100,0)
+  dark_khaki = 0xBDB76B,               // rgb(189,183,107)
+  dark_magenta = 0x8B008B,             // rgb(139,0,139)
+  dark_olive_green = 0x556B2F,         // rgb(85,107,47)
+  dark_orange = 0xFF8C00,              // rgb(255,140,0)
+  dark_orchid = 0x9932CC,              // rgb(153,50,204)
+  dark_red = 0x8B0000,                 // rgb(139,0,0)
+  dark_salmon = 0xE9967A,              // rgb(233,150,122)
+  dark_sea_green = 0x8FBC8F,           // rgb(143,188,143)
+  dark_slate_blue = 0x483D8B,          // rgb(72,61,139)
+  dark_slate_gray = 0x2F4F4F,          // rgb(47,79,79)
+  dark_turquoise = 0x00CED1,           // rgb(0,206,209)
+  dark_violet = 0x9400D3,              // rgb(148,0,211)
+  deep_pink = 0xFF1493,                // rgb(255,20,147)
+  deep_sky_blue = 0x00BFFF,            // rgb(0,191,255)
+  dim_gray = 0x696969,                 // rgb(105,105,105)
+  dodger_blue = 0x1E90FF,              // rgb(30,144,255)
+  fire_brick = 0xB22222,               // rgb(178,34,34)
+  floral_white = 0xFFFAF0,             // rgb(255,250,240)
+  forest_green = 0x228B22,             // rgb(34,139,34)
+  fuchsia = 0xFF00FF,                  // rgb(255,0,255)
+  gainsboro = 0xDCDCDC,                // rgb(220,220,220)
+  ghost_white = 0xF8F8FF,              // rgb(248,248,255)
+  gold = 0xFFD700,                     // rgb(255,215,0)
+  golden_rod = 0xDAA520,               // rgb(218,165,32)
+  gray = 0x808080,                     // rgb(128,128,128)
+  green = 0x008000,                    // rgb(0,128,0)
+  green_yellow = 0xADFF2F,             // rgb(173,255,47)
+  honey_dew = 0xF0FFF0,                // rgb(240,255,240)
+  hot_pink = 0xFF69B4,                 // rgb(255,105,180)
+  indian_red = 0xCD5C5C,               // rgb(205,92,92)
+  indigo = 0x4B0082,                   // rgb(75,0,130)
+  ivory = 0xFFFFF0,                    // rgb(255,255,240)
+  khaki = 0xF0E68C,                    // rgb(240,230,140)
+  lavender = 0xE6E6FA,                 // rgb(230,230,250)
+  lavender_blush = 0xFFF0F5,           // rgb(255,240,245)
+  lawn_green = 0x7CFC00,               // rgb(124,252,0)
+  lemon_chiffon = 0xFFFACD,            // rgb(255,250,205)
+  light_blue = 0xADD8E6,               // rgb(173,216,230)
+  light_coral = 0xF08080,              // rgb(240,128,128)
+  light_cyan = 0xE0FFFF,               // rgb(224,255,255)
+  light_golden_rod_yellow = 0xFAFAD2,  // rgb(250,250,210)
+  light_gray = 0xD3D3D3,               // rgb(211,211,211)
+  light_green = 0x90EE90,              // rgb(144,238,144)
+  light_pink = 0xFFB6C1,               // rgb(255,182,193)
+  light_salmon = 0xFFA07A,             // rgb(255,160,122)
+  light_sea_green = 0x20B2AA,          // rgb(32,178,170)
+  light_sky_blue = 0x87CEFA,           // rgb(135,206,250)
+  light_slate_gray = 0x778899,         // rgb(119,136,153)
+  light_steel_blue = 0xB0C4DE,         // rgb(176,196,222)
+  light_yellow = 0xFFFFE0,             // rgb(255,255,224)
+  lime = 0x00FF00,                     // rgb(0,255,0)
+  lime_green = 0x32CD32,               // rgb(50,205,50)
+  linen = 0xFAF0E6,                    // rgb(250,240,230)
+  magenta = 0xFF00FF,                  // rgb(255,0,255)
+  maroon = 0x800000,                   // rgb(128,0,0)
+  medium_aquamarine = 0x66CDAA,        // rgb(102,205,170)
+  medium_blue = 0x0000CD,              // rgb(0,0,205)
+  medium_orchid = 0xBA55D3,            // rgb(186,85,211)
+  medium_purple = 0x9370DB,            // rgb(147,112,219)
+  medium_sea_green = 0x3CB371,         // rgb(60,179,113)
+  medium_slate_blue = 0x7B68EE,        // rgb(123,104,238)
+  medium_spring_green = 0x00FA9A,      // rgb(0,250,154)
+  medium_turquoise = 0x48D1CC,         // rgb(72,209,204)
+  medium_violet_red = 0xC71585,        // rgb(199,21,133)
+  midnight_blue = 0x191970,            // rgb(25,25,112)
+  mint_cream = 0xF5FFFA,               // rgb(245,255,250)
+  misty_rose = 0xFFE4E1,               // rgb(255,228,225)
+  moccasin = 0xFFE4B5,                 // rgb(255,228,181)
+  navajo_white = 0xFFDEAD,             // rgb(255,222,173)
+  navy = 0x000080,                     // rgb(0,0,128)
+  old_lace = 0xFDF5E6,                 // rgb(253,245,230)
+  olive = 0x808000,                    // rgb(128,128,0)
+  olive_drab = 0x6B8E23,               // rgb(107,142,35)
+  orange = 0xFFA500,                   // rgb(255,165,0)
+  orange_red = 0xFF4500,               // rgb(255,69,0)
+  orchid = 0xDA70D6,                   // rgb(218,112,214)
+  pale_golden_rod = 0xEEE8AA,          // rgb(238,232,170)
+  pale_green = 0x98FB98,               // rgb(152,251,152)
+  pale_turquoise = 0xAFEEEE,           // rgb(175,238,238)
+  pale_violet_red = 0xDB7093,          // rgb(219,112,147)
+  papaya_whip = 0xFFEFD5,              // rgb(255,239,213)
+  peach_puff = 0xFFDAB9,               // rgb(255,218,185)
+  peru = 0xCD853F,                     // rgb(205,133,63)
+  pink = 0xFFC0CB,                     // rgb(255,192,203)
+  plum = 0xDDA0DD,                     // rgb(221,160,221)
+  powder_blue = 0xB0E0E6,              // rgb(176,224,230)
+  purple = 0x800080,                   // rgb(128,0,128)
+  rebecca_purple = 0x663399,           // rgb(102,51,153)
+  red = 0xFF0000,                      // rgb(255,0,0)
+  rosy_brown = 0xBC8F8F,               // rgb(188,143,143)
+  royal_blue = 0x4169E1,               // rgb(65,105,225)
+  saddle_brown = 0x8B4513,             // rgb(139,69,19)
+  salmon = 0xFA8072,                   // rgb(250,128,114)
+  sandy_brown = 0xF4A460,              // rgb(244,164,96)
+  sea_green = 0x2E8B57,                // rgb(46,139,87)
+  sea_shell = 0xFFF5EE,                // rgb(255,245,238)
+  sienna = 0xA0522D,                   // rgb(160,82,45)
+  silver = 0xC0C0C0,                   // rgb(192,192,192)
+  sky_blue = 0x87CEEB,                 // rgb(135,206,235)
+  slate_blue = 0x6A5ACD,               // rgb(106,90,205)
+  slate_gray = 0x708090,               // rgb(112,128,144)
+  snow = 0xFFFAFA,                     // rgb(255,250,250)
+  spring_green = 0x00FF7F,             // rgb(0,255,127)
+  steel_blue = 0x4682B4,               // rgb(70,130,180)
+  tan = 0xD2B48C,                      // rgb(210,180,140)
+  teal = 0x008080,                     // rgb(0,128,128)
+  thistle = 0xD8BFD8,                  // rgb(216,191,216)
+  tomato = 0xFF6347,                   // rgb(255,99,71)
+  turquoise = 0x40E0D0,                // rgb(64,224,208)
+  violet = 0xEE82EE,                   // rgb(238,130,238)
+  wheat = 0xF5DEB3,                    // rgb(245,222,179)
+  white = 0xFFFFFF,                    // rgb(255,255,255)
+  white_smoke = 0xF5F5F5,              // rgb(245,245,245)
+  yellow = 0xFFFF00,                   // rgb(255,255,0)
+  yellow_green = 0x9ACD32              // rgb(154,205,50)
+};                                     // enum class color
+
+enum class terminal_color : uint8_t {
+  black = 30,
+  red,
+  green,
+  yellow,
+  blue,
+  magenta,
+  cyan,
+  white,
+  bright_black = 90,
+  bright_red,
+  bright_green,
+  bright_yellow,
+  bright_blue,
+  bright_magenta,
+  bright_cyan,
+  bright_white
+};
+
+enum class emphasis : uint8_t {
+  bold = 1,
+  faint = 1 << 1,
+  italic = 1 << 2,
+  underline = 1 << 3,
+  blink = 1 << 4,
+  reverse = 1 << 5,
+  conceal = 1 << 6,
+  strikethrough = 1 << 7,
+};
+
+// rgb is a struct for red, green and blue colors.
+// Using the name "rgb" makes some editors show the color in a tooltip.
+struct rgb {
+  constexpr rgb() : r(0), g(0), b(0) {}
+  constexpr rgb(uint8_t r_, uint8_t g_, uint8_t b_) : r(r_), g(g_), b(b_) {}
+  constexpr rgb(uint32_t hex)
+      : r((hex >> 16) & 0xFF), g((hex >> 8) & 0xFF), b(hex & 0xFF) {}
+  constexpr rgb(color hex)
+      : r((uint32_t(hex) >> 16) & 0xFF),
+        g((uint32_t(hex) >> 8) & 0xFF),
+        b(uint32_t(hex) & 0xFF) {}
+  uint8_t r;
+  uint8_t g;
+  uint8_t b;
+};
+
+namespace detail {
+
+// A bit-packed variant of an RGB color, a terminal color, or unset color.
+// see text_style for the bit-packing scheme.
+struct color_type {
+  constexpr color_type() noexcept = default;
+  constexpr color_type(color rgb_color) noexcept
+      : value_(static_cast<uint32_t>(rgb_color) | (1 << 24)) {}
+  constexpr color_type(rgb rgb_color) noexcept
+      : color_type(static_cast<color>(
+            (static_cast<uint32_t>(rgb_color.r) << 16) |
+            (static_cast<uint32_t>(rgb_color.g) << 8) | rgb_color.b)) {}
+  constexpr color_type(terminal_color term_color) noexcept
+      : value_(static_cast<uint32_t>(term_color) | (3 << 24)) {}
+
+  constexpr auto is_terminal_color() const noexcept -> bool {
+    return (value_ & (1 << 25)) != 0;
+  }
+
+  constexpr auto value() const noexcept -> uint32_t {
+    return value_ & 0xFFFFFF;
+  }
+
+  constexpr color_type(uint32_t value) noexcept : value_(value) {}
+
+  uint32_t value_ = 0;
+};
+}  // namespace detail
+
+/// A text style consisting of foreground and background colors and emphasis.
+class text_style {
+  // The information is packed as follows:
+  // ┌──┐
+  // │ 0│─┐
+  // │..│ ├── foreground color value
+  // │23│─┘
+  // ├──┤
+  // │24│─┬── discriminator for the above value. 00 if unset, 01 if it's
+  // │25│─┘   an RGB color, or 11 if it's a terminal color (10 is unused)
+  // ├──┤
+  // │26│──── overflow bit, always zero (see below)
+  // ├──┤
+  // │27│─┐
+  // │..│ │
+  // │50│ │
+  // ├──┤ │
+  // │51│ ├── background color (same format as the foreground color)
+  // │52│ │
+  // ├──┤ │
+  // │53│─┘
+  // ├──┤
+  // │54│─┐
+  // │..│ ├── emphases
+  // │61│─┘
+  // ├──┤
+  // │62│─┬── unused
+  // │63│─┘
+  // └──┘
+  // The overflow bits are there to make operator|= efficient.
+  // When ORing, we must throw if, for either the foreground or background,
+  // one style specifies a terminal color and the other specifies any color
+  // (terminal or RGB); in other words, if one discriminator is 11 and the
+  // other is 11 or 01.
+  //
+  // We do that check by adding the styles. Consider what adding does to each
+  // possible pair of discriminators:
+  //    00 + 00 = 000
+  //    01 + 00 = 001
+  //    11 + 00 = 011
+  //    01 + 01 = 010
+  //    11 + 01 = 100 (!!)
+  //    11 + 11 = 110 (!!)
+  // In the last two cases, the ones we want to catch, the third bit——the
+  // overflow bit——is set. Bingo.
+  //
+  // We must take into account the possible carry bit from the bits
+  // before the discriminator. The only potentially problematic case is
+  // 11 + 00 = 011 (a carry bit would make it 100, not good!), but a carry
+  // bit is impossible in that case, because 00 (unset color) means the
+  // 24 bits that precede the discriminator are all zero.
+  //
+  // This test can be applied to both colors simultaneously.
+
+ public:
+  FMT_CONSTEXPR text_style(emphasis em = emphasis()) noexcept
+      : style_(static_cast<uint64_t>(em) << 54) {}
+
+  FMT_CONSTEXPR auto operator|=(text_style rhs) -> text_style& {
+    if (((style_ + rhs.style_) & ((1ULL << 26) | (1ULL << 53))) != 0)
+      report_error("can't OR a terminal color");
+    style_ |= rhs.style_;
+    return *this;
+  }
+
+  friend FMT_CONSTEXPR auto operator|(text_style lhs, text_style rhs)
+      -> text_style {
+    return lhs |= rhs;
+  }
+
+  FMT_CONSTEXPR auto operator==(text_style rhs) const noexcept -> bool {
+    return style_ == rhs.style_;
+  }
+
+  FMT_CONSTEXPR auto operator!=(text_style rhs) const noexcept -> bool {
+    return !(*this == rhs);
+  }
+
+  FMT_CONSTEXPR auto has_foreground() const noexcept -> bool {
+    return (style_ & (1 << 24)) != 0;
+  }
+  FMT_CONSTEXPR auto has_background() const noexcept -> bool {
+    return (style_ & (1ULL << 51)) != 0;
+  }
+  FMT_CONSTEXPR auto has_emphasis() const noexcept -> bool {
+    return (style_ >> 54) != 0;
+  }
+  FMT_CONSTEXPR auto get_foreground() const noexcept -> detail::color_type {
+    FMT_ASSERT(has_foreground(), "no foreground specified for this style");
+    return style_ & 0x3FFFFFF;
+  }
+  FMT_CONSTEXPR auto get_background() const noexcept -> detail::color_type {
+    FMT_ASSERT(has_background(), "no background specified for this style");
+    return (style_ >> 27) & 0x3FFFFFF;
+  }
+  FMT_CONSTEXPR auto get_emphasis() const noexcept -> emphasis {
+    FMT_ASSERT(has_emphasis(), "no emphasis specified for this style");
+    return static_cast<emphasis>(style_ >> 54);
+  }
+
+ private:
+  FMT_CONSTEXPR text_style(uint64_t style) noexcept : style_(style) {}
+
+  friend FMT_CONSTEXPR auto fg(detail::color_type foreground) noexcept
+      -> text_style;
+
+  friend FMT_CONSTEXPR auto bg(detail::color_type background) noexcept
+      -> text_style;
+
+  uint64_t style_ = 0;
+};
+
+/// Creates a text style from the foreground (text) color.
+FMT_CONSTEXPR inline auto fg(detail::color_type foreground) noexcept
+    -> text_style {
+  return foreground.value_;
+}
+
+/// Creates a text style from the background color.
+FMT_CONSTEXPR inline auto bg(detail::color_type background) noexcept
+    -> text_style {
+  return static_cast<uint64_t>(background.value_) << 27;
+}
+
+FMT_CONSTEXPR inline auto operator|(emphasis lhs, emphasis rhs) noexcept
+    -> text_style {
+  return text_style(lhs) | rhs;
+}
+
+namespace detail {
+
+template <typename Char> struct ansi_color_escape {
+  FMT_CONSTEXPR ansi_color_escape(color_type text_color,
+                                  const char* esc) noexcept {
+    // If we have a terminal color, we need to output another escape code
+    // sequence.
+    if (text_color.is_terminal_color()) {
+      bool is_background = esc == string_view("\x1b[48;2;");
+      uint32_t value = text_color.value();
+      // Background ASCII codes are the same as the foreground ones but with
+      // 10 more.
+      if (is_background) value += 10u;
+
+      buffer[size++] = static_cast<Char>('\x1b');
+      buffer[size++] = static_cast<Char>('[');
+
+      if (value >= 100u) {
+        buffer[size++] = static_cast<Char>('1');
+        value %= 100u;
+      }
+      buffer[size++] = static_cast<Char>('0' + value / 10u);
+      buffer[size++] = static_cast<Char>('0' + value % 10u);
+
+      buffer[size++] = static_cast<Char>('m');
+      return;
+    }
+
+    for (int i = 0; i < 7; i++) {
+      buffer[i] = static_cast<Char>(esc[i]);
+    }
+    rgb color(text_color.value());
+    to_esc(color.r, buffer + 7, ';');
+    to_esc(color.g, buffer + 11, ';');
+    to_esc(color.b, buffer + 15, 'm');
+    size = 19;
+  }
+  FMT_CONSTEXPR ansi_color_escape(emphasis em) noexcept {
+    uint8_t em_codes[num_emphases] = {};
+    if (has_emphasis(em, emphasis::bold)) em_codes[0] = 1;
+    if (has_emphasis(em, emphasis::faint)) em_codes[1] = 2;
+    if (has_emphasis(em, emphasis::italic)) em_codes[2] = 3;
+    if (has_emphasis(em, emphasis::underline)) em_codes[3] = 4;
+    if (has_emphasis(em, emphasis::blink)) em_codes[4] = 5;
+    if (has_emphasis(em, emphasis::reverse)) em_codes[5] = 7;
+    if (has_emphasis(em, emphasis::conceal)) em_codes[6] = 8;
+    if (has_emphasis(em, emphasis::strikethrough)) em_codes[7] = 9;
+
+    buffer[size++] = static_cast<Char>('\x1b');
+    buffer[size++] = static_cast<Char>('[');
+
+    for (size_t i = 0; i < num_emphases; ++i) {
+      if (!em_codes[i]) continue;
+      buffer[size++] = static_cast<Char>('0' + em_codes[i]);
+      buffer[size++] = static_cast<Char>(';');
+    }
+
+    buffer[size - 1] = static_cast<Char>('m');
+  }
+  FMT_CONSTEXPR operator const Char*() const noexcept { return buffer; }
+
+  FMT_CONSTEXPR auto begin() const noexcept -> const Char* { return buffer; }
+  FMT_CONSTEXPR auto end() const noexcept -> const Char* {
+    return buffer + size;
+  }
+
+ private:
+  static constexpr size_t num_emphases = 8;
+  Char buffer[7u + 4u * num_emphases] = {};
+  size_t size = 0;
+
+  static FMT_CONSTEXPR void to_esc(uint8_t c, Char* out,
+                                   char delimiter) noexcept {
+    out[0] = static_cast<Char>('0' + c / 100);
+    out[1] = static_cast<Char>('0' + c / 10 % 10);
+    out[2] = static_cast<Char>('0' + c % 10);
+    out[3] = static_cast<Char>(delimiter);
+  }
+  static FMT_CONSTEXPR auto has_emphasis(emphasis em, emphasis mask) noexcept
+      -> bool {
+    return static_cast<uint8_t>(em) & static_cast<uint8_t>(mask);
+  }
+};
+
+template <typename Char>
+FMT_CONSTEXPR auto make_foreground_color(color_type foreground) noexcept
+    -> ansi_color_escape<Char> {
+  return ansi_color_escape<Char>(foreground, "\x1b[38;2;");
+}
+
+template <typename Char>
+FMT_CONSTEXPR auto make_background_color(color_type background) noexcept
+    -> ansi_color_escape<Char> {
+  return ansi_color_escape<Char>(background, "\x1b[48;2;");
+}
+
+template <typename Char>
+FMT_CONSTEXPR auto make_emphasis(emphasis em) noexcept
+    -> ansi_color_escape<Char> {
+  return ansi_color_escape<Char>(em);
+}
+
+template <typename Char> inline void reset_color(buffer<Char>& buffer) {
+  auto reset_color = string_view("\x1b[0m");
+  buffer.append(reset_color.begin(), reset_color.end());
+}
+
+template <typename T> struct styled_arg : view {
+  const T& value;
+  text_style style;
+  styled_arg(const T& v, text_style s) : value(v), style(s) {}
+};
+
+template <typename Char>
+void vformat_to(buffer<Char>& buf, text_style ts, basic_string_view<Char> fmt,
+                basic_format_args<buffered_context<Char>> args) {
+  if (ts.has_emphasis()) {
+    auto emphasis = make_emphasis<Char>(ts.get_emphasis());
+    buf.append(emphasis.begin(), emphasis.end());
+  }
+  if (ts.has_foreground()) {
+    auto foreground = make_foreground_color<Char>(ts.get_foreground());
+    buf.append(foreground.begin(), foreground.end());
+  }
+  if (ts.has_background()) {
+    auto background = make_background_color<Char>(ts.get_background());
+    buf.append(background.begin(), background.end());
+  }
+  vformat_to(buf, fmt, args);
+  if (ts != text_style()) reset_color<Char>(buf);
+}
+}  // namespace detail
+
+inline void vprint(FILE* f, text_style ts, string_view fmt, format_args args) {
+  auto buf = memory_buffer();
+  detail::vformat_to(buf, ts, fmt, args);
+  print(f, FMT_STRING("{}"), string_view(buf.begin(), buf.size()));
+}
+
+/**
+ * Formats a string and prints it to the specified file stream using ANSI
+ * escape sequences to specify text formatting.
+ *
+ * **Example**:
+ *
+ *     fmt::print(fmt::emphasis::bold | fg(fmt::color::red),
+ *                "Elapsed time: {0:.2f} seconds", 1.23);
+ */
+template <typename... T>
+void print(FILE* f, text_style ts, format_string<T...> fmt, T&&... args) {
+  vprint(f, ts, fmt.str, vargs<T...>{{args...}});
+}
+
+/**
+ * Formats a string and prints it to stdout using ANSI escape sequences to
+ * specify text formatting.
+ *
+ * **Example**:
+ *
+ *     fmt::print(fmt::emphasis::bold | fg(fmt::color::red),
+ *                "Elapsed time: {0:.2f} seconds", 1.23);
+ */
+template <typename... T>
+void print(text_style ts, format_string<T...> fmt, T&&... args) {
+  return print(stdout, ts, fmt, std::forward<T>(args)...);
+}
+
+inline auto vformat(text_style ts, string_view fmt, format_args args)
+    -> std::string {
+  auto buf = memory_buffer();
+  detail::vformat_to(buf, ts, fmt, args);
+  return fmt::to_string(buf);
+}
+
+/**
+ * Formats arguments and returns the result as a string using ANSI escape
+ * sequences to specify text formatting.
+ *
+ * **Example**:
+ *
+ * ```
+ * #include <fmt/color.h>
+ * std::string message = fmt::format(fmt::emphasis::bold | fg(fmt::color::red),
+ *                                   "The answer is {}", 42);
+ * ```
+ */
+template <typename... T>
+inline auto format(text_style ts, format_string<T...> fmt, T&&... args)
+    -> std::string {
+  return fmt::vformat(ts, fmt.str, vargs<T...>{{args...}});
+}
+
+/// Formats a string with the given text_style and writes the output to `out`.
+template <typename OutputIt,
+          FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, char>::value)>
+auto vformat_to(OutputIt out, text_style ts, string_view fmt, format_args args)
+    -> OutputIt {
+  auto&& buf = detail::get_buffer<char>(out);
+  detail::vformat_to(buf, ts, fmt, args);
+  return detail::get_iterator(buf, out);
+}
+
+/**
+ * Formats arguments with the given text style, writes the result to the output
+ * iterator `out` and returns the iterator past the end of the output range.
+ *
+ * **Example**:
+ *
+ *     std::vector<char> out;
+ *     fmt::format_to(std::back_inserter(out),
+ *                    fmt::emphasis::bold | fg(fmt::color::red), "{}", 42);
+ */
+template <typename OutputIt, typename... T,
+          FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, char>::value)>
+inline auto format_to(OutputIt out, text_style ts, format_string<T...> fmt,
+                      T&&... args) -> OutputIt {
+  return vformat_to(out, ts, fmt.str, vargs<T...>{{args...}});
+}
+
+template <typename T, typename Char>
+struct formatter<detail::styled_arg<T>, Char> : formatter<T, Char> {
+  template <typename FormatContext>
+  auto format(const detail::styled_arg<T>& arg, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    const auto& ts = arg.style;
+    auto out = ctx.out();
+
+    bool has_style = false;
+    if (ts.has_emphasis()) {
+      has_style = true;
+      auto emphasis = detail::make_emphasis<Char>(ts.get_emphasis());
+      out = detail::copy<Char>(emphasis.begin(), emphasis.end(), out);
+    }
+    if (ts.has_foreground()) {
+      has_style = true;
+      auto foreground =
+          detail::make_foreground_color<Char>(ts.get_foreground());
+      out = detail::copy<Char>(foreground.begin(), foreground.end(), out);
+    }
+    if (ts.has_background()) {
+      has_style = true;
+      auto background =
+          detail::make_background_color<Char>(ts.get_background());
+      out = detail::copy<Char>(background.begin(), background.end(), out);
+    }
+    out = formatter<T, Char>::format(arg.value, ctx);
+    if (has_style) {
+      auto reset_color = string_view("\x1b[0m");
+      out = detail::copy<Char>(reset_color.begin(), reset_color.end(), out);
+    }
+    return out;
+  }
+};
+
+/**
+ * Returns an argument that will be formatted using ANSI escape sequences,
+ * to be used in a formatting function.
+ *
+ * **Example**:
+ *
+ *     fmt::print("Elapsed time: {0:.2f} seconds",
+ *                fmt::styled(1.23, fmt::fg(fmt::color::green) |
+ *                                  fmt::bg(fmt::color::blue)));
+ */
+template <typename T>
+FMT_CONSTEXPR auto styled(const T& value, text_style ts)
+    -> detail::styled_arg<remove_cvref_t<T>> {
+  return detail::styled_arg<remove_cvref_t<T>>{value, ts};
+}
+
+FMT_END_EXPORT
+FMT_END_NAMESPACE
+
+#endif  // FMT_COLOR_H_
diff --git a/csrc/vnpu_offload/include/spdlog/fmt/bundled/compile.h b/csrc/vnpu_offload/include/spdlog/fmt/bundled/compile.h
new file mode 100644
index 00000000..64eb7a20
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/fmt/bundled/compile.h
@@ -0,0 +1,588 @@
+// Formatting library for C++ - experimental format string compilation
+//
+// Copyright (c) 2012 - present, Victor Zverovich and fmt contributors
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_COMPILE_H_
+#define FMT_COMPILE_H_
+
+#ifndef FMT_MODULE
+#  include <iterator>  // std::back_inserter
+#endif
+
+#include "format.h"
+
+FMT_BEGIN_NAMESPACE
+FMT_BEGIN_EXPORT
+
+// A compile-time string which is compiled into fast formatting code.
+class compiled_string {};
+
+template <typename S>
+struct is_compiled_string : std::is_base_of<compiled_string, S> {};
+
+/**
+ * Converts a string literal `s` into a format string that will be parsed at
+ * compile time and converted into efficient formatting code. Requires C++17
+ * `constexpr if` compiler support.
+ *
+ * **Example**:
+ *
+ *     // Converts 42 into std::string using the most efficient method and no
+ *     // runtime format string processing.
+ *     std::string s = fmt::format(FMT_COMPILE("{}"), 42);
+ */
+#if defined(__cpp_if_constexpr) && defined(__cpp_return_type_deduction)
+#  define FMT_COMPILE(s) FMT_STRING_IMPL(s, fmt::compiled_string)
+#else
+#  define FMT_COMPILE(s) FMT_STRING(s)
+#endif
+
+/**
+ * Converts a string literal into a format string that will be parsed at
+ * compile time and converted into efficient formatting code. Requires support
+ * for class types in constant template parameters (a C++20 feature).
+ *
+ *  **Example**:
+ *
+ *     // Converts 42 into std::string using the most efficient method and no
+ *     // runtime format string processing.
+ *     using namespace fmt::literals;
+ *     std::string s = fmt::format("{}"_cf, 42);
+ */
+#if FMT_USE_NONTYPE_TEMPLATE_ARGS
+inline namespace literals {
+template <detail::fixed_string Str> constexpr auto operator""_cf() {
+  return FMT_COMPILE(Str.data);
+}
+}  // namespace literals
+#endif
+
+FMT_END_EXPORT
+
+namespace detail {
+
+template <typename T, typename... Tail>
+constexpr auto first(const T& value, const Tail&...) -> const T& {
+  return value;
+}
+
+#if defined(__cpp_if_constexpr) && defined(__cpp_return_type_deduction)
+template <typename... T> struct type_list {};
+
+// Returns a reference to the argument at index N from [first, rest...].
+template <int N, typename T, typename... Args>
+constexpr auto get([[maybe_unused]] const T& first,
+                   [[maybe_unused]] const Args&... rest) -> const auto& {
+  static_assert(N < 1 + sizeof...(Args), "index is out of bounds");
+  if constexpr (N == 0)
+    return first;
+  else
+    return detail::get<N - 1>(rest...);
+}
+
+#  if FMT_USE_NONTYPE_TEMPLATE_ARGS
+template <int N, typename T, typename... Args, typename Char>
+constexpr auto get_arg_index_by_name(basic_string_view<Char> name) -> int {
+  if constexpr (is_static_named_arg<T>()) {
+    if (name == T::name) return N;
+  }
+  if constexpr (sizeof...(Args) > 0)
+    return get_arg_index_by_name<N + 1, Args...>(name);
+  (void)name;  // Workaround an MSVC bug about "unused" parameter.
+  return -1;
+}
+#  endif
+
+template <typename... Args, typename Char>
+FMT_CONSTEXPR auto get_arg_index_by_name(basic_string_view<Char> name) -> int {
+#  if FMT_USE_NONTYPE_TEMPLATE_ARGS
+  if constexpr (sizeof...(Args) > 0)
+    return get_arg_index_by_name<0, Args...>(name);
+#  endif
+  (void)name;
+  return -1;
+}
+
+template <typename Char, typename... Args>
+constexpr auto get_arg_index_by_name(basic_string_view<Char> name,
+                                     type_list<Args...>) -> int {
+  return get_arg_index_by_name<Args...>(name);
+}
+
+template <int N, typename> struct get_type_impl;
+
+template <int N, typename... Args> struct get_type_impl<N, type_list<Args...>> {
+  using type =
+      remove_cvref_t<decltype(detail::get<N>(std::declval<Args>()...))>;
+};
+
+template <int N, typename T>
+using get_type = typename get_type_impl<N, T>::type;
+
+template <typename T> struct is_compiled_format : std::false_type {};
+
+template <typename Char> struct text {
+  basic_string_view<Char> data;
+  using char_type = Char;
+
+  template <typename OutputIt, typename... T>
+  constexpr auto format(OutputIt out, const T&...) const -> OutputIt {
+    return write<Char>(out, data);
+  }
+};
+
+template <typename Char>
+struct is_compiled_format<text<Char>> : std::true_type {};
+
+template <typename Char>
+constexpr auto make_text(basic_string_view<Char> s, size_t pos, size_t size)
+    -> text<Char> {
+  return {{&s[pos], size}};
+}
+
+template <typename Char> struct code_unit {
+  Char value;
+  using char_type = Char;
+
+  template <typename OutputIt, typename... T>
+  constexpr auto format(OutputIt out, const T&...) const -> OutputIt {
+    *out++ = value;
+    return out;
+  }
+};
+
+// This ensures that the argument type is convertible to `const T&`.
+template <typename T, int N, typename... Args>
+constexpr auto get_arg_checked(const Args&... args) -> const T& {
+  const auto& arg = detail::get<N>(args...);
+  if constexpr (detail::is_named_arg<remove_cvref_t<decltype(arg)>>()) {
+    return arg.value;
+  } else {
+    return arg;
+  }
+}
+
+template <typename Char>
+struct is_compiled_format<code_unit<Char>> : std::true_type {};
+
+// A replacement field that refers to argument N.
+template <typename Char, typename V, int N> struct field {
+  using char_type = Char;
+
+  template <typename OutputIt, typename... T>
+  constexpr auto format(OutputIt out, const T&... args) const -> OutputIt {
+    const V& arg = get_arg_checked<V, N>(args...);
+    if constexpr (std::is_convertible<V, basic_string_view<Char>>::value) {
+      auto s = basic_string_view<Char>(arg);
+      return copy<Char>(s.begin(), s.end(), out);
+    } else {
+      return write<Char>(out, arg);
+    }
+  }
+};
+
+template <typename Char, typename T, int N>
+struct is_compiled_format<field<Char, T, N>> : std::true_type {};
+
+// A replacement field that refers to argument with name.
+template <typename Char> struct runtime_named_field {
+  using char_type = Char;
+  basic_string_view<Char> name;
+
+  template <typename OutputIt, typename T>
+  constexpr static auto try_format_argument(
+      OutputIt& out,
+      // [[maybe_unused]] due to unused-but-set-parameter warning in GCC 7,8,9
+      [[maybe_unused]] basic_string_view<Char> arg_name, const T& arg) -> bool {
+    if constexpr (is_named_arg<typename std::remove_cv<T>::type>::value) {
+      if (arg_name == arg.name) {
+        out = write<Char>(out, arg.value);
+        return true;
+      }
+    }
+    return false;
+  }
+
+  template <typename OutputIt, typename... T>
+  constexpr auto format(OutputIt out, const T&... args) const -> OutputIt {
+    bool found = (try_format_argument(out, name, args) || ...);
+    if (!found) {
+      FMT_THROW(format_error("argument with specified name is not found"));
+    }
+    return out;
+  }
+};
+
+template <typename Char>
+struct is_compiled_format<runtime_named_field<Char>> : std::true_type {};
+
+// A replacement field that refers to argument N and has format specifiers.
+template <typename Char, typename V, int N> struct spec_field {
+  using char_type = Char;
+  formatter<V, Char> fmt;
+
+  template <typename OutputIt, typename... T>
+  constexpr FMT_INLINE auto format(OutputIt out, const T&... args) const
+      -> OutputIt {
+    const auto& vargs =
+        fmt::make_format_args<basic_format_context<OutputIt, Char>>(args...);
+    basic_format_context<OutputIt, Char> ctx(out, vargs);
+    return fmt.format(get_arg_checked<V, N>(args...), ctx);
+  }
+};
+
+template <typename Char, typename T, int N>
+struct is_compiled_format<spec_field<Char, T, N>> : std::true_type {};
+
+template <typename L, typename R> struct concat {
+  L lhs;
+  R rhs;
+  using char_type = typename L::char_type;
+
+  template <typename OutputIt, typename... T>
+  constexpr auto format(OutputIt out, const T&... args) const -> OutputIt {
+    out = lhs.format(out, args...);
+    return rhs.format(out, args...);
+  }
+};
+
+template <typename L, typename R>
+struct is_compiled_format<concat<L, R>> : std::true_type {};
+
+template <typename L, typename R>
+constexpr auto make_concat(L lhs, R rhs) -> concat<L, R> {
+  return {lhs, rhs};
+}
+
+struct unknown_format {};
+
+template <typename Char>
+constexpr auto parse_text(basic_string_view<Char> str, size_t pos) -> size_t {
+  for (size_t size = str.size(); pos != size; ++pos) {
+    if (str[pos] == '{' || str[pos] == '}') break;
+  }
+  return pos;
+}
+
+template <typename Args, size_t POS, int ID, typename S>
+constexpr auto compile_format_string(S fmt);
+
+template <typename Args, size_t POS, int ID, typename T, typename S>
+constexpr auto parse_tail(T head, S fmt) {
+  if constexpr (POS != basic_string_view<typename S::char_type>(fmt).size()) {
+    constexpr auto tail = compile_format_string<Args, POS, ID>(fmt);
+    if constexpr (std::is_same<remove_cvref_t<decltype(tail)>,
+                               unknown_format>())
+      return tail;
+    else
+      return make_concat(head, tail);
+  } else {
+    return head;
+  }
+}
+
+template <typename T, typename Char> struct parse_specs_result {
+  formatter<T, Char> fmt;
+  size_t end;
+  int next_arg_id;
+};
+
+enum { manual_indexing_id = -1 };
+
+template <typename T, typename Char>
+constexpr auto parse_specs(basic_string_view<Char> str, size_t pos,
+                           int next_arg_id) -> parse_specs_result<T, Char> {
+  str.remove_prefix(pos);
+  auto ctx =
+      compile_parse_context<Char>(str, max_value<int>(), nullptr, next_arg_id);
+  auto f = formatter<T, Char>();
+  auto end = f.parse(ctx);
+  return {f, pos + fmt::detail::to_unsigned(end - str.data()),
+          next_arg_id == 0 ? manual_indexing_id : ctx.next_arg_id()};
+}
+
+template <typename Char> struct arg_id_handler {
+  arg_id_kind kind;
+  arg_ref<Char> arg_id;
+
+  constexpr auto on_auto() -> int {
+    FMT_ASSERT(false, "handler cannot be used with automatic indexing");
+    return 0;
+  }
+  constexpr auto on_index(int id) -> int {
+    kind = arg_id_kind::index;
+    arg_id = arg_ref<Char>(id);
+    return 0;
+  }
+  constexpr auto on_name(basic_string_view<Char> id) -> int {
+    kind = arg_id_kind::name;
+    arg_id = arg_ref<Char>(id);
+    return 0;
+  }
+};
+
+template <typename Char> struct parse_arg_id_result {
+  arg_id_kind kind;
+  arg_ref<Char> arg_id;
+  const Char* arg_id_end;
+};
+
+template <int ID, typename Char>
+constexpr auto parse_arg_id(const Char* begin, const Char* end) {
+  auto handler = arg_id_handler<Char>{arg_id_kind::none, arg_ref<Char>{}};
+  auto arg_id_end = parse_arg_id(begin, end, handler);
+  return parse_arg_id_result<Char>{handler.kind, handler.arg_id, arg_id_end};
+}
+
+template <typename T, typename Enable = void> struct field_type {
+  using type = remove_cvref_t<T>;
+};
+
+template <typename T>
+struct field_type<T, enable_if_t<detail::is_named_arg<T>::value>> {
+  using type = remove_cvref_t<decltype(T::value)>;
+};
+
+template <typename T, typename Args, size_t END_POS, int ARG_INDEX, int NEXT_ID,
+          typename S>
+constexpr auto parse_replacement_field_then_tail(S fmt) {
+  using char_type = typename S::char_type;
+  constexpr auto str = basic_string_view<char_type>(fmt);
+  constexpr char_type c = END_POS != str.size() ? str[END_POS] : char_type();
+  if constexpr (c == '}') {
+    return parse_tail<Args, END_POS + 1, NEXT_ID>(
+        field<char_type, typename field_type<T>::type, ARG_INDEX>(), fmt);
+  } else if constexpr (c != ':') {
+    FMT_THROW(format_error("expected ':'"));
+  } else {
+    constexpr auto result = parse_specs<typename field_type<T>::type>(
+        str, END_POS + 1, NEXT_ID == manual_indexing_id ? 0 : NEXT_ID);
+    if constexpr (result.end >= str.size() || str[result.end] != '}') {
+      FMT_THROW(format_error("expected '}'"));
+      return 0;
+    } else {
+      return parse_tail<Args, result.end + 1, result.next_arg_id>(
+          spec_field<char_type, typename field_type<T>::type, ARG_INDEX>{
+              result.fmt},
+          fmt);
+    }
+  }
+}
+
+// Compiles a non-empty format string and returns the compiled representation
+// or unknown_format() on unrecognized input.
+template <typename Args, size_t POS, int ID, typename S>
+constexpr auto compile_format_string(S fmt) {
+  using char_type = typename S::char_type;
+  constexpr auto str = basic_string_view<char_type>(fmt);
+  if constexpr (str[POS] == '{') {
+    if constexpr (POS + 1 == str.size())
+      FMT_THROW(format_error("unmatched '{' in format string"));
+    if constexpr (str[POS + 1] == '{') {
+      return parse_tail<Args, POS + 2, ID>(make_text(str, POS, 1), fmt);
+    } else if constexpr (str[POS + 1] == '}' || str[POS + 1] == ':') {
+      static_assert(ID != manual_indexing_id,
+                    "cannot switch from manual to automatic argument indexing");
+      constexpr auto next_id =
+          ID != manual_indexing_id ? ID + 1 : manual_indexing_id;
+      return parse_replacement_field_then_tail<get_type<ID, Args>, Args,
+                                               POS + 1, ID, next_id>(fmt);
+    } else {
+      constexpr auto arg_id_result =
+          parse_arg_id<ID>(str.data() + POS + 1, str.data() + str.size());
+      constexpr auto arg_id_end_pos = arg_id_result.arg_id_end - str.data();
+      constexpr char_type c =
+          arg_id_end_pos != str.size() ? str[arg_id_end_pos] : char_type();
+      static_assert(c == '}' || c == ':', "missing '}' in format string");
+      if constexpr (arg_id_result.kind == arg_id_kind::index) {
+        static_assert(
+            ID == manual_indexing_id || ID == 0,
+            "cannot switch from automatic to manual argument indexing");
+        constexpr auto arg_index = arg_id_result.arg_id.index;
+        return parse_replacement_field_then_tail<get_type<arg_index, Args>,
+                                                 Args, arg_id_end_pos,
+                                                 arg_index, manual_indexing_id>(
+            fmt);
+      } else if constexpr (arg_id_result.kind == arg_id_kind::name) {
+        constexpr auto arg_index =
+            get_arg_index_by_name(arg_id_result.arg_id.name, Args{});
+        if constexpr (arg_index >= 0) {
+          constexpr auto next_id =
+              ID != manual_indexing_id ? ID + 1 : manual_indexing_id;
+          return parse_replacement_field_then_tail<
+              decltype(get_type<arg_index, Args>::value), Args, arg_id_end_pos,
+              arg_index, next_id>(fmt);
+        } else if constexpr (c == '}') {
+          return parse_tail<Args, arg_id_end_pos + 1, ID>(
+              runtime_named_field<char_type>{arg_id_result.arg_id.name}, fmt);
+        } else if constexpr (c == ':') {
+          return unknown_format();  // no type info for specs parsing
+        }
+      }
+    }
+  } else if constexpr (str[POS] == '}') {
+    if constexpr (POS + 1 == str.size())
+      FMT_THROW(format_error("unmatched '}' in format string"));
+    return parse_tail<Args, POS + 2, ID>(make_text(str, POS, 1), fmt);
+  } else {
+    constexpr auto end = parse_text(str, POS + 1);
+    if constexpr (end - POS > 1) {
+      return parse_tail<Args, end, ID>(make_text(str, POS, end - POS), fmt);
+    } else {
+      return parse_tail<Args, end, ID>(code_unit<char_type>{str[POS]}, fmt);
+    }
+  }
+}
+
+template <typename... Args, typename S,
+          FMT_ENABLE_IF(is_compiled_string<S>::value)>
+constexpr auto compile(S fmt) {
+  constexpr auto str = basic_string_view<typename S::char_type>(fmt);
+  if constexpr (str.size() == 0) {
+    return detail::make_text(str, 0, 0);
+  } else {
+    constexpr auto result =
+        detail::compile_format_string<detail::type_list<Args...>, 0, 0>(fmt);
+    return result;
+  }
+}
+#endif  // defined(__cpp_if_constexpr) && defined(__cpp_return_type_deduction)
+}  // namespace detail
+
+FMT_BEGIN_EXPORT
+
+#if defined(__cpp_if_constexpr) && defined(__cpp_return_type_deduction)
+
+template <typename CompiledFormat, typename... T,
+          typename Char = typename CompiledFormat::char_type,
+          FMT_ENABLE_IF(detail::is_compiled_format<CompiledFormat>::value)>
+FMT_INLINE FMT_CONSTEXPR_STRING auto format(const CompiledFormat& cf,
+                                            const T&... args)
+    -> std::basic_string<Char> {
+  auto s = std::basic_string<Char>();
+  cf.format(std::back_inserter(s), args...);
+  return s;
+}
+
+template <typename OutputIt, typename CompiledFormat, typename... T,
+          FMT_ENABLE_IF(detail::is_compiled_format<CompiledFormat>::value)>
+constexpr FMT_INLINE auto format_to(OutputIt out, const CompiledFormat& cf,
+                                    const T&... args) -> OutputIt {
+  return cf.format(out, args...);
+}
+
+template <typename S, typename... T,
+          FMT_ENABLE_IF(is_compiled_string<S>::value)>
+FMT_INLINE FMT_CONSTEXPR_STRING auto format(const S&, T&&... args)
+    -> std::basic_string<typename S::char_type> {
+  if constexpr (std::is_same<typename S::char_type, char>::value) {
+    constexpr auto str = basic_string_view<typename S::char_type>(S());
+    if constexpr (str.size() == 2 && str[0] == '{' && str[1] == '}') {
+      const auto& first = detail::first(args...);
+      if constexpr (detail::is_named_arg<
+                        remove_cvref_t<decltype(first)>>::value) {
+        return fmt::to_string(first.value);
+      } else {
+        return fmt::to_string(first);
+      }
+    }
+  }
+  constexpr auto compiled = detail::compile<T...>(S());
+  if constexpr (std::is_same<remove_cvref_t<decltype(compiled)>,
+                             detail::unknown_format>()) {
+    return fmt::format(
+        static_cast<basic_string_view<typename S::char_type>>(S()),
+        std::forward<T>(args)...);
+  } else {
+    return fmt::format(compiled, std::forward<T>(args)...);
+  }
+}
+
+template <typename OutputIt, typename S, typename... T,
+          FMT_ENABLE_IF(is_compiled_string<S>::value)>
+FMT_CONSTEXPR auto format_to(OutputIt out, const S&, T&&... args) -> OutputIt {
+  constexpr auto compiled = detail::compile<T...>(S());
+  if constexpr (std::is_same<remove_cvref_t<decltype(compiled)>,
+                             detail::unknown_format>()) {
+    return fmt::format_to(
+        out, static_cast<basic_string_view<typename S::char_type>>(S()),
+        std::forward<T>(args)...);
+  } else {
+    return fmt::format_to(out, compiled, std::forward<T>(args)...);
+  }
+}
+#endif
+
+template <typename OutputIt, typename S, typename... T,
+          FMT_ENABLE_IF(is_compiled_string<S>::value)>
+auto format_to_n(OutputIt out, size_t n, const S& fmt, T&&... args)
+    -> format_to_n_result<OutputIt> {
+  using traits = detail::fixed_buffer_traits;
+  auto buf = detail::iterator_buffer<OutputIt, char, traits>(out, n);
+  fmt::format_to(std::back_inserter(buf), fmt, std::forward<T>(args)...);
+  return {buf.out(), buf.count()};
+}
+
+template <typename S, typename... T,
+          FMT_ENABLE_IF(is_compiled_string<S>::value)>
+FMT_CONSTEXPR20 auto formatted_size(const S& fmt, T&&... args) -> size_t {
+  auto buf = detail::counting_buffer<>();
+  fmt::format_to(appender(buf), fmt, std::forward<T>(args)...);
+  return buf.count();
+}
+
+template <typename S, typename... T,
+          FMT_ENABLE_IF(is_compiled_string<S>::value)>
+void print(std::FILE* f, const S& fmt, T&&... args) {
+  auto buf = memory_buffer();
+  fmt::format_to(appender(buf), fmt, std::forward<T>(args)...);
+  detail::print(f, {buf.data(), buf.size()});
+}
+
+template <typename S, typename... T,
+          FMT_ENABLE_IF(is_compiled_string<S>::value)>
+void print(const S& fmt, T&&... args) {
+  print(stdout, fmt, std::forward<T>(args)...);
+}
+
+template <size_t N> class static_format_result {
+ private:
+  char data[N];
+
+ public:
+  template <typename S, typename... T,
+            FMT_ENABLE_IF(is_compiled_string<S>::value)>
+  explicit FMT_CONSTEXPR static_format_result(const S& fmt, T&&... args) {
+    *fmt::format_to(data, fmt, std::forward<T>(args)...) = '\0';
+  }
+
+  auto str() const -> fmt::string_view { return {data, N - 1}; }
+  auto c_str() const -> const char* { return data; }
+};
+
+/**
+ * Formats arguments according to the format string `fmt_str` and produces
+ * a string of the exact required size at compile time. Both the format string
+ * and the arguments must be compile-time expressions.
+ *
+ * The resulting string can be accessed as a C string via `c_str()` or as
+ * a `fmt::string_view` via `str()`.
+ *
+ * **Example**:
+ *
+ *     // Produces the static string "42" at compile time.
+ *     static constexpr auto result = FMT_STATIC_FORMAT("{}", 42);
+ *     const char* s = result.c_str();
+ */
+#define FMT_STATIC_FORMAT(fmt_str, ...)                            \
+  fmt::static_format_result<                                       \
+      fmt::formatted_size(FMT_COMPILE(fmt_str), __VA_ARGS__) + 1>( \
+      FMT_COMPILE(fmt_str), __VA_ARGS__)
+
+FMT_END_EXPORT
+FMT_END_NAMESPACE
+
+#endif  // FMT_COMPILE_H_
diff --git a/csrc/vnpu_offload/include/spdlog/fmt/bundled/core.h b/csrc/vnpu_offload/include/spdlog/fmt/bundled/core.h
new file mode 100644
index 00000000..8ca735f0
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/fmt/bundled/core.h
@@ -0,0 +1,5 @@
+// This file is only provided for compatibility and may be removed in future
+// versions. Use fmt/base.h if you don't need fmt::format and fmt/format.h
+// otherwise.
+
+#include "format.h"
diff --git a/csrc/vnpu_offload/include/spdlog/fmt/bundled/fmt.license.rst b/csrc/vnpu_offload/include/spdlog/fmt/bundled/fmt.license.rst
new file mode 100644
index 00000000..1cd1ef92
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/fmt/bundled/fmt.license.rst
@@ -0,0 +1,27 @@
+Copyright (c) 2012 - present, Victor Zverovich and {fmt} contributors
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+--- Optional exception to the license ---
+
+As an exception, if, as a result of your compiling your source code, portions
+of this Software are embedded into a machine-executable object form of such
+source code, you may redistribute such embedded portions in such object form
+without including the above copyright and permission notices.
diff --git a/csrc/vnpu_offload/include/spdlog/fmt/bundled/format-inl.h b/csrc/vnpu_offload/include/spdlog/fmt/bundled/format-inl.h
new file mode 100644
index 00000000..945cb912
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/fmt/bundled/format-inl.h
@@ -0,0 +1,1948 @@
+// Formatting library for C++ - implementation
+//
+// Copyright (c) 2012 - 2016, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_FORMAT_INL_H_
+#define FMT_FORMAT_INL_H_
+
+#ifndef FMT_MODULE
+#  include <algorithm>
+#  include <cerrno>  // errno
+#  include <climits>
+#  include <cmath>
+#  include <exception>
+#endif
+
+#if defined(_WIN32) && !defined(FMT_USE_WRITE_CONSOLE)
+#  include <io.h>  // _isatty
+#endif
+
+#include "format.h"
+
+#if FMT_USE_LOCALE && !defined(FMT_MODULE)
+#  include <locale>
+#endif
+
+#ifndef FMT_FUNC
+#  define FMT_FUNC
+#endif
+
+FMT_BEGIN_NAMESPACE
+
+#ifndef FMT_CUSTOM_ASSERT_FAIL
+FMT_FUNC void assert_fail(const char* file, int line, const char* message) {
+  // Use unchecked std::fprintf to avoid triggering another assertion when
+  // writing to stderr fails.
+  std::fprintf(stderr, "%s:%d: assertion failed: %s", file, line, message);
+  abort();
+}
+#endif
+
+#if FMT_USE_LOCALE
+namespace detail {
+using std::locale;
+using std::numpunct;
+using std::use_facet;
+}  // namespace detail
+#else
+namespace detail {
+struct locale {};
+template <typename Char> struct numpunct {
+  auto grouping() const -> std::string { return "\03"; }
+  auto thousands_sep() const -> Char { return ','; }
+  auto decimal_point() const -> Char { return '.'; }
+};
+template <typename Facet> Facet use_facet(locale) { return {}; }
+}  // namespace detail
+#endif  // FMT_USE_LOCALE
+
+template <typename Locale> auto locale_ref::get() const -> Locale {
+  using namespace detail;
+  static_assert(std::is_same<Locale, locale>::value, "");
+#if FMT_USE_LOCALE
+  if (locale_) return *static_cast<const locale*>(locale_);
+#endif
+  return locale();
+}
+
+namespace detail {
+
+FMT_FUNC void format_error_code(detail::buffer<char>& out, int error_code,
+                                string_view message) noexcept {
+  // Report error code making sure that the output fits into
+  // inline_buffer_size to avoid dynamic memory allocation and potential
+  // bad_alloc.
+  out.try_resize(0);
+  static const char SEP[] = ": ";
+  static const char ERROR_STR[] = "error ";
+  // Subtract 2 to account for terminating null characters in SEP and ERROR_STR.
+  size_t error_code_size = sizeof(SEP) + sizeof(ERROR_STR) - 2;
+  auto abs_value = static_cast<uint32_or_64_or_128_t<int>>(error_code);
+  if (detail::is_negative(error_code)) {
+    abs_value = 0 - abs_value;
+    ++error_code_size;
+  }
+  error_code_size += detail::to_unsigned(detail::count_digits(abs_value));
+  auto it = appender(out);
+  if (message.size() <= inline_buffer_size - error_code_size)
+    fmt::format_to(it, FMT_STRING("{}{}"), message, SEP);
+  fmt::format_to(it, FMT_STRING("{}{}"), ERROR_STR, error_code);
+  FMT_ASSERT(out.size() <= inline_buffer_size, "");
+}
+
+FMT_FUNC void do_report_error(format_func func, int error_code,
+                              const char* message) noexcept {
+  memory_buffer full_message;
+  func(full_message, error_code, message);
+  // Don't use fwrite_all because the latter may throw.
+  if (std::fwrite(full_message.data(), full_message.size(), 1, stderr) > 0)
+    std::fputc('\n', stderr);
+}
+
+// A wrapper around fwrite that throws on error.
+inline void fwrite_all(const void* ptr, size_t count, FILE* stream) {
+  size_t written = std::fwrite(ptr, 1, count, stream);
+  if (written < count)
+    FMT_THROW(system_error(errno, FMT_STRING("cannot write to file")));
+}
+
+template <typename Char>
+FMT_FUNC auto thousands_sep_impl(locale_ref loc) -> thousands_sep_result<Char> {
+  auto&& facet = use_facet<numpunct<Char>>(loc.get<locale>());
+  auto grouping = facet.grouping();
+  auto thousands_sep = grouping.empty() ? Char() : facet.thousands_sep();
+  return {std::move(grouping), thousands_sep};
+}
+template <typename Char>
+FMT_FUNC auto decimal_point_impl(locale_ref loc) -> Char {
+  return use_facet<numpunct<Char>>(loc.get<locale>()).decimal_point();
+}
+
+#if FMT_USE_LOCALE
+FMT_FUNC auto write_loc(appender out, loc_value value,
+                        const format_specs& specs, locale_ref loc) -> bool {
+  auto locale = loc.get<std::locale>();
+  // We cannot use the num_put<char> facet because it may produce output in
+  // a wrong encoding.
+  using facet = format_facet<std::locale>;
+  if (std::has_facet<facet>(locale))
+    return use_facet<facet>(locale).put(out, value, specs);
+  return facet(locale).put(out, value, specs);
+}
+#endif
+}  // namespace detail
+
+FMT_FUNC void report_error(const char* message) {
+#if FMT_MSC_VERSION || defined(__NVCC__)
+  // Silence unreachable code warnings in MSVC and NVCC because these
+  // are nearly impossible to fix in a generic code.
+  volatile bool b = true;
+  if (!b) return;
+#endif
+  FMT_THROW(format_error(message));
+}
+
+template <typename Locale> typename Locale::id format_facet<Locale>::id;
+
+template <typename Locale> format_facet<Locale>::format_facet(Locale& loc) {
+  auto& np = detail::use_facet<detail::numpunct<char>>(loc);
+  grouping_ = np.grouping();
+  if (!grouping_.empty()) separator_ = std::string(1, np.thousands_sep());
+}
+
+#if FMT_USE_LOCALE
+template <>
+FMT_API FMT_FUNC auto format_facet<std::locale>::do_put(
+    appender out, loc_value val, const format_specs& specs) const -> bool {
+  return val.visit(
+      detail::loc_writer<>{out, specs, separator_, grouping_, decimal_point_});
+}
+#endif
+
+FMT_FUNC auto vsystem_error(int error_code, string_view fmt, format_args args)
+    -> std::system_error {
+  auto ec = std::error_code(error_code, std::generic_category());
+  return std::system_error(ec, vformat(fmt, args));
+}
+
+namespace detail {
+
+template <typename F>
+inline auto operator==(basic_fp<F> x, basic_fp<F> y) -> bool {
+  return x.f == y.f && x.e == y.e;
+}
+
+// Compilers should be able to optimize this into the ror instruction.
+FMT_INLINE auto rotr(uint32_t n, uint32_t r) noexcept -> uint32_t {
+  r &= 31;
+  return (n >> r) | (n << (32 - r));
+}
+FMT_INLINE auto rotr(uint64_t n, uint32_t r) noexcept -> uint64_t {
+  r &= 63;
+  return (n >> r) | (n << (64 - r));
+}
+
+// Implementation of Dragonbox algorithm: https://github.com/jk-jeon/dragonbox.
+namespace dragonbox {
+// Computes upper 64 bits of multiplication of a 32-bit unsigned integer and a
+// 64-bit unsigned integer.
+inline auto umul96_upper64(uint32_t x, uint64_t y) noexcept -> uint64_t {
+  return umul128_upper64(static_cast<uint64_t>(x) << 32, y);
+}
+
+// Computes lower 128 bits of multiplication of a 64-bit unsigned integer and a
+// 128-bit unsigned integer.
+inline auto umul192_lower128(uint64_t x, uint128_fallback y) noexcept
+    -> uint128_fallback {
+  uint64_t high = x * y.high();
+  uint128_fallback high_low = umul128(x, y.low());
+  return {high + high_low.high(), high_low.low()};
+}
+
+// Computes lower 64 bits of multiplication of a 32-bit unsigned integer and a
+// 64-bit unsigned integer.
+inline auto umul96_lower64(uint32_t x, uint64_t y) noexcept -> uint64_t {
+  return x * y;
+}
+
+// Various fast log computations.
+inline auto floor_log10_pow2_minus_log10_4_over_3(int e) noexcept -> int {
+  FMT_ASSERT(e <= 2936 && e >= -2985, "too large exponent");
+  return (e * 631305 - 261663) >> 21;
+}
+
+FMT_INLINE_VARIABLE constexpr struct div_small_pow10_infos_struct {
+  uint32_t divisor;
+  int shift_amount;
+} div_small_pow10_infos[] = {{10, 16}, {100, 16}};
+
+// Replaces n by floor(n / pow(10, N)) returning true if and only if n is
+// divisible by pow(10, N).
+// Precondition: n <= pow(10, N + 1).
+template <int N>
+auto check_divisibility_and_divide_by_pow10(uint32_t& n) noexcept -> bool {
+  // The numbers below are chosen such that:
+  //   1. floor(n/d) = floor(nm / 2^k) where d=10 or d=100,
+  //   2. nm mod 2^k < m if and only if n is divisible by d,
+  // where m is magic_number, k is shift_amount
+  // and d is divisor.
+  //
+  // Item 1 is a common technique of replacing division by a constant with
+  // multiplication, see e.g. "Division by Invariant Integers Using
+  // Multiplication" by Granlund and Montgomery (1994). magic_number (m) is set
+  // to ceil(2^k/d) for large enough k.
+  // The idea for item 2 originates from Schubfach.
+  constexpr auto info = div_small_pow10_infos[N - 1];
+  FMT_ASSERT(n <= info.divisor * 10, "n is too large");
+  constexpr uint32_t magic_number =
+      (1u << info.shift_amount) / info.divisor + 1;
+  n *= magic_number;
+  const uint32_t comparison_mask = (1u << info.shift_amount) - 1;
+  bool result = (n & comparison_mask) < magic_number;
+  n >>= info.shift_amount;
+  return result;
+}
+
+// Computes floor(n / pow(10, N)) for small n and N.
+// Precondition: n <= pow(10, N + 1).
+template <int N> auto small_division_by_pow10(uint32_t n) noexcept -> uint32_t {
+  constexpr auto info = div_small_pow10_infos[N - 1];
+  FMT_ASSERT(n <= info.divisor * 10, "n is too large");
+  constexpr uint32_t magic_number =
+      (1u << info.shift_amount) / info.divisor + 1;
+  return (n * magic_number) >> info.shift_amount;
+}
+
+// Computes floor(n / 10^(kappa + 1)) (float)
+inline auto divide_by_10_to_kappa_plus_1(uint32_t n) noexcept -> uint32_t {
+  // 1374389535 = ceil(2^37/100)
+  return static_cast<uint32_t>((static_cast<uint64_t>(n) * 1374389535) >> 37);
+}
+// Computes floor(n / 10^(kappa + 1)) (double)
+inline auto divide_by_10_to_kappa_plus_1(uint64_t n) noexcept -> uint64_t {
+  // 2361183241434822607 = ceil(2^(64+7)/1000)
+  return umul128_upper64(n, 2361183241434822607ull) >> 7;
+}
+
+// Various subroutines using pow10 cache
+template <typename T> struct cache_accessor;
+
+template <> struct cache_accessor<float> {
+  using carrier_uint = float_info<float>::carrier_uint;
+  using cache_entry_type = uint64_t;
+
+  static auto get_cached_power(int k) noexcept -> uint64_t {
+    FMT_ASSERT(k >= float_info<float>::min_k && k <= float_info<float>::max_k,
+               "k is out of range");
+    static constexpr uint64_t pow10_significands[] = {
+        0x81ceb32c4b43fcf5, 0xa2425ff75e14fc32, 0xcad2f7f5359a3b3f,
+        0xfd87b5f28300ca0e, 0x9e74d1b791e07e49, 0xc612062576589ddb,
+        0xf79687aed3eec552, 0x9abe14cd44753b53, 0xc16d9a0095928a28,
+        0xf1c90080baf72cb2, 0x971da05074da7bef, 0xbce5086492111aeb,
+        0xec1e4a7db69561a6, 0x9392ee8e921d5d08, 0xb877aa3236a4b44a,
+        0xe69594bec44de15c, 0x901d7cf73ab0acda, 0xb424dc35095cd810,
+        0xe12e13424bb40e14, 0x8cbccc096f5088cc, 0xafebff0bcb24aaff,
+        0xdbe6fecebdedd5bf, 0x89705f4136b4a598, 0xabcc77118461cefd,
+        0xd6bf94d5e57a42bd, 0x8637bd05af6c69b6, 0xa7c5ac471b478424,
+        0xd1b71758e219652c, 0x83126e978d4fdf3c, 0xa3d70a3d70a3d70b,
+        0xcccccccccccccccd, 0x8000000000000000, 0xa000000000000000,
+        0xc800000000000000, 0xfa00000000000000, 0x9c40000000000000,
+        0xc350000000000000, 0xf424000000000000, 0x9896800000000000,
+        0xbebc200000000000, 0xee6b280000000000, 0x9502f90000000000,
+        0xba43b74000000000, 0xe8d4a51000000000, 0x9184e72a00000000,
+        0xb5e620f480000000, 0xe35fa931a0000000, 0x8e1bc9bf04000000,
+        0xb1a2bc2ec5000000, 0xde0b6b3a76400000, 0x8ac7230489e80000,
+        0xad78ebc5ac620000, 0xd8d726b7177a8000, 0x878678326eac9000,
+        0xa968163f0a57b400, 0xd3c21bcecceda100, 0x84595161401484a0,
+        0xa56fa5b99019a5c8, 0xcecb8f27f4200f3a, 0x813f3978f8940985,
+        0xa18f07d736b90be6, 0xc9f2c9cd04674edf, 0xfc6f7c4045812297,
+        0x9dc5ada82b70b59e, 0xc5371912364ce306, 0xf684df56c3e01bc7,
+        0x9a130b963a6c115d, 0xc097ce7bc90715b4, 0xf0bdc21abb48db21,
+        0x96769950b50d88f5, 0xbc143fa4e250eb32, 0xeb194f8e1ae525fe,
+        0x92efd1b8d0cf37bf, 0xb7abc627050305ae, 0xe596b7b0c643c71a,
+        0x8f7e32ce7bea5c70, 0xb35dbf821ae4f38c, 0xe0352f62a19e306f};
+    return pow10_significands[k - float_info<float>::min_k];
+  }
+
+  struct compute_mul_result {
+    carrier_uint result;
+    bool is_integer;
+  };
+  struct compute_mul_parity_result {
+    bool parity;
+    bool is_integer;
+  };
+
+  static auto compute_mul(carrier_uint u,
+                          const cache_entry_type& cache) noexcept
+      -> compute_mul_result {
+    auto r = umul96_upper64(u, cache);
+    return {static_cast<carrier_uint>(r >> 32),
+            static_cast<carrier_uint>(r) == 0};
+  }
+
+  static auto compute_delta(const cache_entry_type& cache, int beta) noexcept
+      -> uint32_t {
+    return static_cast<uint32_t>(cache >> (64 - 1 - beta));
+  }
+
+  static auto compute_mul_parity(carrier_uint two_f,
+                                 const cache_entry_type& cache,
+                                 int beta) noexcept
+      -> compute_mul_parity_result {
+    FMT_ASSERT(beta >= 1, "");
+    FMT_ASSERT(beta < 64, "");
+
+    auto r = umul96_lower64(two_f, cache);
+    return {((r >> (64 - beta)) & 1) != 0,
+            static_cast<uint32_t>(r >> (32 - beta)) == 0};
+  }
+
+  static auto compute_left_endpoint_for_shorter_interval_case(
+      const cache_entry_type& cache, int beta) noexcept -> carrier_uint {
+    return static_cast<carrier_uint>(
+        (cache - (cache >> (num_significand_bits<float>() + 2))) >>
+        (64 - num_significand_bits<float>() - 1 - beta));
+  }
+
+  static auto compute_right_endpoint_for_shorter_interval_case(
+      const cache_entry_type& cache, int beta) noexcept -> carrier_uint {
+    return static_cast<carrier_uint>(
+        (cache + (cache >> (num_significand_bits<float>() + 1))) >>
+        (64 - num_significand_bits<float>() - 1 - beta));
+  }
+
+  static auto compute_round_up_for_shorter_interval_case(
+      const cache_entry_type& cache, int beta) noexcept -> carrier_uint {
+    return (static_cast<carrier_uint>(
+                cache >> (64 - num_significand_bits<float>() - 2 - beta)) +
+            1) /
+           2;
+  }
+};
+
+template <> struct cache_accessor<double> {
+  using carrier_uint = float_info<double>::carrier_uint;
+  using cache_entry_type = uint128_fallback;
+
+  static auto get_cached_power(int k) noexcept -> uint128_fallback {
+    FMT_ASSERT(k >= float_info<double>::min_k && k <= float_info<double>::max_k,
+               "k is out of range");
+
+    static constexpr uint128_fallback pow10_significands[] = {
+#if FMT_USE_FULL_CACHE_DRAGONBOX
+      {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b},
+      {0x9faacf3df73609b1, 0x77b191618c54e9ad},
+      {0xc795830d75038c1d, 0xd59df5b9ef6a2418},
+      {0xf97ae3d0d2446f25, 0x4b0573286b44ad1e},
+      {0x9becce62836ac577, 0x4ee367f9430aec33},
+      {0xc2e801fb244576d5, 0x229c41f793cda740},
+      {0xf3a20279ed56d48a, 0x6b43527578c11110},
+      {0x9845418c345644d6, 0x830a13896b78aaaa},
+      {0xbe5691ef416bd60c, 0x23cc986bc656d554},
+      {0xedec366b11c6cb8f, 0x2cbfbe86b7ec8aa9},
+      {0x94b3a202eb1c3f39, 0x7bf7d71432f3d6aa},
+      {0xb9e08a83a5e34f07, 0xdaf5ccd93fb0cc54},
+      {0xe858ad248f5c22c9, 0xd1b3400f8f9cff69},
+      {0x91376c36d99995be, 0x23100809b9c21fa2},
+      {0xb58547448ffffb2d, 0xabd40a0c2832a78b},
+      {0xe2e69915b3fff9f9, 0x16c90c8f323f516d},
+      {0x8dd01fad907ffc3b, 0xae3da7d97f6792e4},
+      {0xb1442798f49ffb4a, 0x99cd11cfdf41779d},
+      {0xdd95317f31c7fa1d, 0x40405643d711d584},
+      {0x8a7d3eef7f1cfc52, 0x482835ea666b2573},
+      {0xad1c8eab5ee43b66, 0xda3243650005eed0},
+      {0xd863b256369d4a40, 0x90bed43e40076a83},
+      {0x873e4f75e2224e68, 0x5a7744a6e804a292},
+      {0xa90de3535aaae202, 0x711515d0a205cb37},
+      {0xd3515c2831559a83, 0x0d5a5b44ca873e04},
+      {0x8412d9991ed58091, 0xe858790afe9486c3},
+      {0xa5178fff668ae0b6, 0x626e974dbe39a873},
+      {0xce5d73ff402d98e3, 0xfb0a3d212dc81290},
+      {0x80fa687f881c7f8e, 0x7ce66634bc9d0b9a},
+      {0xa139029f6a239f72, 0x1c1fffc1ebc44e81},
+      {0xc987434744ac874e, 0xa327ffb266b56221},
+      {0xfbe9141915d7a922, 0x4bf1ff9f0062baa9},
+      {0x9d71ac8fada6c9b5, 0x6f773fc3603db4aa},
+      {0xc4ce17b399107c22, 0xcb550fb4384d21d4},
+      {0xf6019da07f549b2b, 0x7e2a53a146606a49},
+      {0x99c102844f94e0fb, 0x2eda7444cbfc426e},
+      {0xc0314325637a1939, 0xfa911155fefb5309},
+      {0xf03d93eebc589f88, 0x793555ab7eba27cb},
+      {0x96267c7535b763b5, 0x4bc1558b2f3458df},
+      {0xbbb01b9283253ca2, 0x9eb1aaedfb016f17},
+      {0xea9c227723ee8bcb, 0x465e15a979c1cadd},
+      {0x92a1958a7675175f, 0x0bfacd89ec191eca},
+      {0xb749faed14125d36, 0xcef980ec671f667c},
+      {0xe51c79a85916f484, 0x82b7e12780e7401b},
+      {0x8f31cc0937ae58d2, 0xd1b2ecb8b0908811},
+      {0xb2fe3f0b8599ef07, 0x861fa7e6dcb4aa16},
+      {0xdfbdcece67006ac9, 0x67a791e093e1d49b},
+      {0x8bd6a141006042bd, 0xe0c8bb2c5c6d24e1},
+      {0xaecc49914078536d, 0x58fae9f773886e19},
+      {0xda7f5bf590966848, 0xaf39a475506a899f},
+      {0x888f99797a5e012d, 0x6d8406c952429604},
+      {0xaab37fd7d8f58178, 0xc8e5087ba6d33b84},
+      {0xd5605fcdcf32e1d6, 0xfb1e4a9a90880a65},
+      {0x855c3be0a17fcd26, 0x5cf2eea09a550680},
+      {0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481f},
+      {0xd0601d8efc57b08b, 0xf13b94daf124da27},
+      {0x823c12795db6ce57, 0x76c53d08d6b70859},
+      {0xa2cb1717b52481ed, 0x54768c4b0c64ca6f},
+      {0xcb7ddcdda26da268, 0xa9942f5dcf7dfd0a},
+      {0xfe5d54150b090b02, 0xd3f93b35435d7c4d},
+      {0x9efa548d26e5a6e1, 0xc47bc5014a1a6db0},
+      {0xc6b8e9b0709f109a, 0x359ab6419ca1091c},
+      {0xf867241c8cc6d4c0, 0xc30163d203c94b63},
+      {0x9b407691d7fc44f8, 0x79e0de63425dcf1e},
+      {0xc21094364dfb5636, 0x985915fc12f542e5},
+      {0xf294b943e17a2bc4, 0x3e6f5b7b17b2939e},
+      {0x979cf3ca6cec5b5a, 0xa705992ceecf9c43},
+      {0xbd8430bd08277231, 0x50c6ff782a838354},
+      {0xece53cec4a314ebd, 0xa4f8bf5635246429},
+      {0x940f4613ae5ed136, 0x871b7795e136be9a},
+      {0xb913179899f68584, 0x28e2557b59846e40},
+      {0xe757dd7ec07426e5, 0x331aeada2fe589d0},
+      {0x9096ea6f3848984f, 0x3ff0d2c85def7622},
+      {0xb4bca50b065abe63, 0x0fed077a756b53aa},
+      {0xe1ebce4dc7f16dfb, 0xd3e8495912c62895},
+      {0x8d3360f09cf6e4bd, 0x64712dd7abbbd95d},
+      {0xb080392cc4349dec, 0xbd8d794d96aacfb4},
+      {0xdca04777f541c567, 0xecf0d7a0fc5583a1},
+      {0x89e42caaf9491b60, 0xf41686c49db57245},
+      {0xac5d37d5b79b6239, 0x311c2875c522ced6},
+      {0xd77485cb25823ac7, 0x7d633293366b828c},
+      {0x86a8d39ef77164bc, 0xae5dff9c02033198},
+      {0xa8530886b54dbdeb, 0xd9f57f830283fdfd},
+      {0xd267caa862a12d66, 0xd072df63c324fd7c},
+      {0x8380dea93da4bc60, 0x4247cb9e59f71e6e},
+      {0xa46116538d0deb78, 0x52d9be85f074e609},
+      {0xcd795be870516656, 0x67902e276c921f8c},
+      {0x806bd9714632dff6, 0x00ba1cd8a3db53b7},
+      {0xa086cfcd97bf97f3, 0x80e8a40eccd228a5},
+      {0xc8a883c0fdaf7df0, 0x6122cd128006b2ce},
+      {0xfad2a4b13d1b5d6c, 0x796b805720085f82},
+      {0x9cc3a6eec6311a63, 0xcbe3303674053bb1},
+      {0xc3f490aa77bd60fc, 0xbedbfc4411068a9d},
+      {0xf4f1b4d515acb93b, 0xee92fb5515482d45},
+      {0x991711052d8bf3c5, 0x751bdd152d4d1c4b},
+      {0xbf5cd54678eef0b6, 0xd262d45a78a0635e},
+      {0xef340a98172aace4, 0x86fb897116c87c35},
+      {0x9580869f0e7aac0e, 0xd45d35e6ae3d4da1},
+      {0xbae0a846d2195712, 0x8974836059cca10a},
+      {0xe998d258869facd7, 0x2bd1a438703fc94c},
+      {0x91ff83775423cc06, 0x7b6306a34627ddd0},
+      {0xb67f6455292cbf08, 0x1a3bc84c17b1d543},
+      {0xe41f3d6a7377eeca, 0x20caba5f1d9e4a94},
+      {0x8e938662882af53e, 0x547eb47b7282ee9d},
+      {0xb23867fb2a35b28d, 0xe99e619a4f23aa44},
+      {0xdec681f9f4c31f31, 0x6405fa00e2ec94d5},
+      {0x8b3c113c38f9f37e, 0xde83bc408dd3dd05},
+      {0xae0b158b4738705e, 0x9624ab50b148d446},
+      {0xd98ddaee19068c76, 0x3badd624dd9b0958},
+      {0x87f8a8d4cfa417c9, 0xe54ca5d70a80e5d7},
+      {0xa9f6d30a038d1dbc, 0x5e9fcf4ccd211f4d},
+      {0xd47487cc8470652b, 0x7647c32000696720},
+      {0x84c8d4dfd2c63f3b, 0x29ecd9f40041e074},
+      {0xa5fb0a17c777cf09, 0xf468107100525891},
+      {0xcf79cc9db955c2cc, 0x7182148d4066eeb5},
+      {0x81ac1fe293d599bf, 0xc6f14cd848405531},
+      {0xa21727db38cb002f, 0xb8ada00e5a506a7d},
+      {0xca9cf1d206fdc03b, 0xa6d90811f0e4851d},
+      {0xfd442e4688bd304a, 0x908f4a166d1da664},
+      {0x9e4a9cec15763e2e, 0x9a598e4e043287ff},
+      {0xc5dd44271ad3cdba, 0x40eff1e1853f29fe},
+      {0xf7549530e188c128, 0xd12bee59e68ef47d},
+      {0x9a94dd3e8cf578b9, 0x82bb74f8301958cf},
+      {0xc13a148e3032d6e7, 0xe36a52363c1faf02},
+      {0xf18899b1bc3f8ca1, 0xdc44e6c3cb279ac2},
+      {0x96f5600f15a7b7e5, 0x29ab103a5ef8c0ba},
+      {0xbcb2b812db11a5de, 0x7415d448f6b6f0e8},
+      {0xebdf661791d60f56, 0x111b495b3464ad22},
+      {0x936b9fcebb25c995, 0xcab10dd900beec35},
+      {0xb84687c269ef3bfb, 0x3d5d514f40eea743},
+      {0xe65829b3046b0afa, 0x0cb4a5a3112a5113},
+      {0x8ff71a0fe2c2e6dc, 0x47f0e785eaba72ac},
+      {0xb3f4e093db73a093, 0x59ed216765690f57},
+      {0xe0f218b8d25088b8, 0x306869c13ec3532d},
+      {0x8c974f7383725573, 0x1e414218c73a13fc},
+      {0xafbd2350644eeacf, 0xe5d1929ef90898fb},
+      {0xdbac6c247d62a583, 0xdf45f746b74abf3a},
+      {0x894bc396ce5da772, 0x6b8bba8c328eb784},
+      {0xab9eb47c81f5114f, 0x066ea92f3f326565},
+      {0xd686619ba27255a2, 0xc80a537b0efefebe},
+      {0x8613fd0145877585, 0xbd06742ce95f5f37},
+      {0xa798fc4196e952e7, 0x2c48113823b73705},
+      {0xd17f3b51fca3a7a0, 0xf75a15862ca504c6},
+      {0x82ef85133de648c4, 0x9a984d73dbe722fc},
+      {0xa3ab66580d5fdaf5, 0xc13e60d0d2e0ebbb},
+      {0xcc963fee10b7d1b3, 0x318df905079926a9},
+      {0xffbbcfe994e5c61f, 0xfdf17746497f7053},
+      {0x9fd561f1fd0f9bd3, 0xfeb6ea8bedefa634},
+      {0xc7caba6e7c5382c8, 0xfe64a52ee96b8fc1},
+      {0xf9bd690a1b68637b, 0x3dfdce7aa3c673b1},
+      {0x9c1661a651213e2d, 0x06bea10ca65c084f},
+      {0xc31bfa0fe5698db8, 0x486e494fcff30a63},
+      {0xf3e2f893dec3f126, 0x5a89dba3c3efccfb},
+      {0x986ddb5c6b3a76b7, 0xf89629465a75e01d},
+      {0xbe89523386091465, 0xf6bbb397f1135824},
+      {0xee2ba6c0678b597f, 0x746aa07ded582e2d},
+      {0x94db483840b717ef, 0xa8c2a44eb4571cdd},
+      {0xba121a4650e4ddeb, 0x92f34d62616ce414},
+      {0xe896a0d7e51e1566, 0x77b020baf9c81d18},
+      {0x915e2486ef32cd60, 0x0ace1474dc1d122f},
+      {0xb5b5ada8aaff80b8, 0x0d819992132456bb},
+      {0xe3231912d5bf60e6, 0x10e1fff697ed6c6a},
+      {0x8df5efabc5979c8f, 0xca8d3ffa1ef463c2},
+      {0xb1736b96b6fd83b3, 0xbd308ff8a6b17cb3},
+      {0xddd0467c64bce4a0, 0xac7cb3f6d05ddbdf},
+      {0x8aa22c0dbef60ee4, 0x6bcdf07a423aa96c},
+      {0xad4ab7112eb3929d, 0x86c16c98d2c953c7},
+      {0xd89d64d57a607744, 0xe871c7bf077ba8b8},
+      {0x87625f056c7c4a8b, 0x11471cd764ad4973},
+      {0xa93af6c6c79b5d2d, 0xd598e40d3dd89bd0},
+      {0xd389b47879823479, 0x4aff1d108d4ec2c4},
+      {0x843610cb4bf160cb, 0xcedf722a585139bb},
+      {0xa54394fe1eedb8fe, 0xc2974eb4ee658829},
+      {0xce947a3da6a9273e, 0x733d226229feea33},
+      {0x811ccc668829b887, 0x0806357d5a3f5260},
+      {0xa163ff802a3426a8, 0xca07c2dcb0cf26f8},
+      {0xc9bcff6034c13052, 0xfc89b393dd02f0b6},
+      {0xfc2c3f3841f17c67, 0xbbac2078d443ace3},
+      {0x9d9ba7832936edc0, 0xd54b944b84aa4c0e},
+      {0xc5029163f384a931, 0x0a9e795e65d4df12},
+      {0xf64335bcf065d37d, 0x4d4617b5ff4a16d6},
+      {0x99ea0196163fa42e, 0x504bced1bf8e4e46},
+      {0xc06481fb9bcf8d39, 0xe45ec2862f71e1d7},
+      {0xf07da27a82c37088, 0x5d767327bb4e5a4d},
+      {0x964e858c91ba2655, 0x3a6a07f8d510f870},
+      {0xbbe226efb628afea, 0x890489f70a55368c},
+      {0xeadab0aba3b2dbe5, 0x2b45ac74ccea842f},
+      {0x92c8ae6b464fc96f, 0x3b0b8bc90012929e},
+      {0xb77ada0617e3bbcb, 0x09ce6ebb40173745},
+      {0xe55990879ddcaabd, 0xcc420a6a101d0516},
+      {0x8f57fa54c2a9eab6, 0x9fa946824a12232e},
+      {0xb32df8e9f3546564, 0x47939822dc96abfa},
+      {0xdff9772470297ebd, 0x59787e2b93bc56f8},
+      {0x8bfbea76c619ef36, 0x57eb4edb3c55b65b},
+      {0xaefae51477a06b03, 0xede622920b6b23f2},
+      {0xdab99e59958885c4, 0xe95fab368e45ecee},
+      {0x88b402f7fd75539b, 0x11dbcb0218ebb415},
+      {0xaae103b5fcd2a881, 0xd652bdc29f26a11a},
+      {0xd59944a37c0752a2, 0x4be76d3346f04960},
+      {0x857fcae62d8493a5, 0x6f70a4400c562ddc},
+      {0xa6dfbd9fb8e5b88e, 0xcb4ccd500f6bb953},
+      {0xd097ad07a71f26b2, 0x7e2000a41346a7a8},
+      {0x825ecc24c873782f, 0x8ed400668c0c28c9},
+      {0xa2f67f2dfa90563b, 0x728900802f0f32fb},
+      {0xcbb41ef979346bca, 0x4f2b40a03ad2ffba},
+      {0xfea126b7d78186bc, 0xe2f610c84987bfa9},
+      {0x9f24b832e6b0f436, 0x0dd9ca7d2df4d7ca},
+      {0xc6ede63fa05d3143, 0x91503d1c79720dbc},
+      {0xf8a95fcf88747d94, 0x75a44c6397ce912b},
+      {0x9b69dbe1b548ce7c, 0xc986afbe3ee11abb},
+      {0xc24452da229b021b, 0xfbe85badce996169},
+      {0xf2d56790ab41c2a2, 0xfae27299423fb9c4},
+      {0x97c560ba6b0919a5, 0xdccd879fc967d41b},
+      {0xbdb6b8e905cb600f, 0x5400e987bbc1c921},
+      {0xed246723473e3813, 0x290123e9aab23b69},
+      {0x9436c0760c86e30b, 0xf9a0b6720aaf6522},
+      {0xb94470938fa89bce, 0xf808e40e8d5b3e6a},
+      {0xe7958cb87392c2c2, 0xb60b1d1230b20e05},
+      {0x90bd77f3483bb9b9, 0xb1c6f22b5e6f48c3},
+      {0xb4ecd5f01a4aa828, 0x1e38aeb6360b1af4},
+      {0xe2280b6c20dd5232, 0x25c6da63c38de1b1},
+      {0x8d590723948a535f, 0x579c487e5a38ad0f},
+      {0xb0af48ec79ace837, 0x2d835a9df0c6d852},
+      {0xdcdb1b2798182244, 0xf8e431456cf88e66},
+      {0x8a08f0f8bf0f156b, 0x1b8e9ecb641b5900},
+      {0xac8b2d36eed2dac5, 0xe272467e3d222f40},
+      {0xd7adf884aa879177, 0x5b0ed81dcc6abb10},
+      {0x86ccbb52ea94baea, 0x98e947129fc2b4ea},
+      {0xa87fea27a539e9a5, 0x3f2398d747b36225},
+      {0xd29fe4b18e88640e, 0x8eec7f0d19a03aae},
+      {0x83a3eeeef9153e89, 0x1953cf68300424ad},
+      {0xa48ceaaab75a8e2b, 0x5fa8c3423c052dd8},
+      {0xcdb02555653131b6, 0x3792f412cb06794e},
+      {0x808e17555f3ebf11, 0xe2bbd88bbee40bd1},
+      {0xa0b19d2ab70e6ed6, 0x5b6aceaeae9d0ec5},
+      {0xc8de047564d20a8b, 0xf245825a5a445276},
+      {0xfb158592be068d2e, 0xeed6e2f0f0d56713},
+      {0x9ced737bb6c4183d, 0x55464dd69685606c},
+      {0xc428d05aa4751e4c, 0xaa97e14c3c26b887},
+      {0xf53304714d9265df, 0xd53dd99f4b3066a9},
+      {0x993fe2c6d07b7fab, 0xe546a8038efe402a},
+      {0xbf8fdb78849a5f96, 0xde98520472bdd034},
+      {0xef73d256a5c0f77c, 0x963e66858f6d4441},
+      {0x95a8637627989aad, 0xdde7001379a44aa9},
+      {0xbb127c53b17ec159, 0x5560c018580d5d53},
+      {0xe9d71b689dde71af, 0xaab8f01e6e10b4a7},
+      {0x9226712162ab070d, 0xcab3961304ca70e9},
+      {0xb6b00d69bb55c8d1, 0x3d607b97c5fd0d23},
+      {0xe45c10c42a2b3b05, 0x8cb89a7db77c506b},
+      {0x8eb98a7a9a5b04e3, 0x77f3608e92adb243},
+      {0xb267ed1940f1c61c, 0x55f038b237591ed4},
+      {0xdf01e85f912e37a3, 0x6b6c46dec52f6689},
+      {0x8b61313bbabce2c6, 0x2323ac4b3b3da016},
+      {0xae397d8aa96c1b77, 0xabec975e0a0d081b},
+      {0xd9c7dced53c72255, 0x96e7bd358c904a22},
+      {0x881cea14545c7575, 0x7e50d64177da2e55},
+      {0xaa242499697392d2, 0xdde50bd1d5d0b9ea},
+      {0xd4ad2dbfc3d07787, 0x955e4ec64b44e865},
+      {0x84ec3c97da624ab4, 0xbd5af13bef0b113f},
+      {0xa6274bbdd0fadd61, 0xecb1ad8aeacdd58f},
+      {0xcfb11ead453994ba, 0x67de18eda5814af3},
+      {0x81ceb32c4b43fcf4, 0x80eacf948770ced8},
+      {0xa2425ff75e14fc31, 0xa1258379a94d028e},
+      {0xcad2f7f5359a3b3e, 0x096ee45813a04331},
+      {0xfd87b5f28300ca0d, 0x8bca9d6e188853fd},
+      {0x9e74d1b791e07e48, 0x775ea264cf55347e},
+      {0xc612062576589dda, 0x95364afe032a819e},
+      {0xf79687aed3eec551, 0x3a83ddbd83f52205},
+      {0x9abe14cd44753b52, 0xc4926a9672793543},
+      {0xc16d9a0095928a27, 0x75b7053c0f178294},
+      {0xf1c90080baf72cb1, 0x5324c68b12dd6339},
+      {0x971da05074da7bee, 0xd3f6fc16ebca5e04},
+      {0xbce5086492111aea, 0x88f4bb1ca6bcf585},
+      {0xec1e4a7db69561a5, 0x2b31e9e3d06c32e6},
+      {0x9392ee8e921d5d07, 0x3aff322e62439fd0},
+      {0xb877aa3236a4b449, 0x09befeb9fad487c3},
+      {0xe69594bec44de15b, 0x4c2ebe687989a9b4},
+      {0x901d7cf73ab0acd9, 0x0f9d37014bf60a11},
+      {0xb424dc35095cd80f, 0x538484c19ef38c95},
+      {0xe12e13424bb40e13, 0x2865a5f206b06fba},
+      {0x8cbccc096f5088cb, 0xf93f87b7442e45d4},
+      {0xafebff0bcb24aafe, 0xf78f69a51539d749},
+      {0xdbe6fecebdedd5be, 0xb573440e5a884d1c},
+      {0x89705f4136b4a597, 0x31680a88f8953031},
+      {0xabcc77118461cefc, 0xfdc20d2b36ba7c3e},
+      {0xd6bf94d5e57a42bc, 0x3d32907604691b4d},
+      {0x8637bd05af6c69b5, 0xa63f9a49c2c1b110},
+      {0xa7c5ac471b478423, 0x0fcf80dc33721d54},
+      {0xd1b71758e219652b, 0xd3c36113404ea4a9},
+      {0x83126e978d4fdf3b, 0x645a1cac083126ea},
+      {0xa3d70a3d70a3d70a, 0x3d70a3d70a3d70a4},
+      {0xcccccccccccccccc, 0xcccccccccccccccd},
+      {0x8000000000000000, 0x0000000000000000},
+      {0xa000000000000000, 0x0000000000000000},
+      {0xc800000000000000, 0x0000000000000000},
+      {0xfa00000000000000, 0x0000000000000000},
+      {0x9c40000000000000, 0x0000000000000000},
+      {0xc350000000000000, 0x0000000000000000},
+      {0xf424000000000000, 0x0000000000000000},
+      {0x9896800000000000, 0x0000000000000000},
+      {0xbebc200000000000, 0x0000000000000000},
+      {0xee6b280000000000, 0x0000000000000000},
+      {0x9502f90000000000, 0x0000000000000000},
+      {0xba43b74000000000, 0x0000000000000000},
+      {0xe8d4a51000000000, 0x0000000000000000},
+      {0x9184e72a00000000, 0x0000000000000000},
+      {0xb5e620f480000000, 0x0000000000000000},
+      {0xe35fa931a0000000, 0x0000000000000000},
+      {0x8e1bc9bf04000000, 0x0000000000000000},
+      {0xb1a2bc2ec5000000, 0x0000000000000000},
+      {0xde0b6b3a76400000, 0x0000000000000000},
+      {0x8ac7230489e80000, 0x0000000000000000},
+      {0xad78ebc5ac620000, 0x0000000000000000},
+      {0xd8d726b7177a8000, 0x0000000000000000},
+      {0x878678326eac9000, 0x0000000000000000},
+      {0xa968163f0a57b400, 0x0000000000000000},
+      {0xd3c21bcecceda100, 0x0000000000000000},
+      {0x84595161401484a0, 0x0000000000000000},
+      {0xa56fa5b99019a5c8, 0x0000000000000000},
+      {0xcecb8f27f4200f3a, 0x0000000000000000},
+      {0x813f3978f8940984, 0x4000000000000000},
+      {0xa18f07d736b90be5, 0x5000000000000000},
+      {0xc9f2c9cd04674ede, 0xa400000000000000},
+      {0xfc6f7c4045812296, 0x4d00000000000000},
+      {0x9dc5ada82b70b59d, 0xf020000000000000},
+      {0xc5371912364ce305, 0x6c28000000000000},
+      {0xf684df56c3e01bc6, 0xc732000000000000},
+      {0x9a130b963a6c115c, 0x3c7f400000000000},
+      {0xc097ce7bc90715b3, 0x4b9f100000000000},
+      {0xf0bdc21abb48db20, 0x1e86d40000000000},
+      {0x96769950b50d88f4, 0x1314448000000000},
+      {0xbc143fa4e250eb31, 0x17d955a000000000},
+      {0xeb194f8e1ae525fd, 0x5dcfab0800000000},
+      {0x92efd1b8d0cf37be, 0x5aa1cae500000000},
+      {0xb7abc627050305ad, 0xf14a3d9e40000000},
+      {0xe596b7b0c643c719, 0x6d9ccd05d0000000},
+      {0x8f7e32ce7bea5c6f, 0xe4820023a2000000},
+      {0xb35dbf821ae4f38b, 0xdda2802c8a800000},
+      {0xe0352f62a19e306e, 0xd50b2037ad200000},
+      {0x8c213d9da502de45, 0x4526f422cc340000},
+      {0xaf298d050e4395d6, 0x9670b12b7f410000},
+      {0xdaf3f04651d47b4c, 0x3c0cdd765f114000},
+      {0x88d8762bf324cd0f, 0xa5880a69fb6ac800},
+      {0xab0e93b6efee0053, 0x8eea0d047a457a00},
+      {0xd5d238a4abe98068, 0x72a4904598d6d880},
+      {0x85a36366eb71f041, 0x47a6da2b7f864750},
+      {0xa70c3c40a64e6c51, 0x999090b65f67d924},
+      {0xd0cf4b50cfe20765, 0xfff4b4e3f741cf6d},
+      {0x82818f1281ed449f, 0xbff8f10e7a8921a5},
+      {0xa321f2d7226895c7, 0xaff72d52192b6a0e},
+      {0xcbea6f8ceb02bb39, 0x9bf4f8a69f764491},
+      {0xfee50b7025c36a08, 0x02f236d04753d5b5},
+      {0x9f4f2726179a2245, 0x01d762422c946591},
+      {0xc722f0ef9d80aad6, 0x424d3ad2b7b97ef6},
+      {0xf8ebad2b84e0d58b, 0xd2e0898765a7deb3},
+      {0x9b934c3b330c8577, 0x63cc55f49f88eb30},
+      {0xc2781f49ffcfa6d5, 0x3cbf6b71c76b25fc},
+      {0xf316271c7fc3908a, 0x8bef464e3945ef7b},
+      {0x97edd871cfda3a56, 0x97758bf0e3cbb5ad},
+      {0xbde94e8e43d0c8ec, 0x3d52eeed1cbea318},
+      {0xed63a231d4c4fb27, 0x4ca7aaa863ee4bde},
+      {0x945e455f24fb1cf8, 0x8fe8caa93e74ef6b},
+      {0xb975d6b6ee39e436, 0xb3e2fd538e122b45},
+      {0xe7d34c64a9c85d44, 0x60dbbca87196b617},
+      {0x90e40fbeea1d3a4a, 0xbc8955e946fe31ce},
+      {0xb51d13aea4a488dd, 0x6babab6398bdbe42},
+      {0xe264589a4dcdab14, 0xc696963c7eed2dd2},
+      {0x8d7eb76070a08aec, 0xfc1e1de5cf543ca3},
+      {0xb0de65388cc8ada8, 0x3b25a55f43294bcc},
+      {0xdd15fe86affad912, 0x49ef0eb713f39ebf},
+      {0x8a2dbf142dfcc7ab, 0x6e3569326c784338},
+      {0xacb92ed9397bf996, 0x49c2c37f07965405},
+      {0xd7e77a8f87daf7fb, 0xdc33745ec97be907},
+      {0x86f0ac99b4e8dafd, 0x69a028bb3ded71a4},
+      {0xa8acd7c0222311bc, 0xc40832ea0d68ce0d},
+      {0xd2d80db02aabd62b, 0xf50a3fa490c30191},
+      {0x83c7088e1aab65db, 0x792667c6da79e0fb},
+      {0xa4b8cab1a1563f52, 0x577001b891185939},
+      {0xcde6fd5e09abcf26, 0xed4c0226b55e6f87},
+      {0x80b05e5ac60b6178, 0x544f8158315b05b5},
+      {0xa0dc75f1778e39d6, 0x696361ae3db1c722},
+      {0xc913936dd571c84c, 0x03bc3a19cd1e38ea},
+      {0xfb5878494ace3a5f, 0x04ab48a04065c724},
+      {0x9d174b2dcec0e47b, 0x62eb0d64283f9c77},
+      {0xc45d1df942711d9a, 0x3ba5d0bd324f8395},
+      {0xf5746577930d6500, 0xca8f44ec7ee3647a},
+      {0x9968bf6abbe85f20, 0x7e998b13cf4e1ecc},
+      {0xbfc2ef456ae276e8, 0x9e3fedd8c321a67f},
+      {0xefb3ab16c59b14a2, 0xc5cfe94ef3ea101f},
+      {0x95d04aee3b80ece5, 0xbba1f1d158724a13},
+      {0xbb445da9ca61281f, 0x2a8a6e45ae8edc98},
+      {0xea1575143cf97226, 0xf52d09d71a3293be},
+      {0x924d692ca61be758, 0x593c2626705f9c57},
+      {0xb6e0c377cfa2e12e, 0x6f8b2fb00c77836d},
+      {0xe498f455c38b997a, 0x0b6dfb9c0f956448},
+      {0x8edf98b59a373fec, 0x4724bd4189bd5ead},
+      {0xb2977ee300c50fe7, 0x58edec91ec2cb658},
+      {0xdf3d5e9bc0f653e1, 0x2f2967b66737e3ee},
+      {0x8b865b215899f46c, 0xbd79e0d20082ee75},
+      {0xae67f1e9aec07187, 0xecd8590680a3aa12},
+      {0xda01ee641a708de9, 0xe80e6f4820cc9496},
+      {0x884134fe908658b2, 0x3109058d147fdcde},
+      {0xaa51823e34a7eede, 0xbd4b46f0599fd416},
+      {0xd4e5e2cdc1d1ea96, 0x6c9e18ac7007c91b},
+      {0x850fadc09923329e, 0x03e2cf6bc604ddb1},
+      {0xa6539930bf6bff45, 0x84db8346b786151d},
+      {0xcfe87f7cef46ff16, 0xe612641865679a64},
+      {0x81f14fae158c5f6e, 0x4fcb7e8f3f60c07f},
+      {0xa26da3999aef7749, 0xe3be5e330f38f09e},
+      {0xcb090c8001ab551c, 0x5cadf5bfd3072cc6},
+      {0xfdcb4fa002162a63, 0x73d9732fc7c8f7f7},
+      {0x9e9f11c4014dda7e, 0x2867e7fddcdd9afb},
+      {0xc646d63501a1511d, 0xb281e1fd541501b9},
+      {0xf7d88bc24209a565, 0x1f225a7ca91a4227},
+      {0x9ae757596946075f, 0x3375788de9b06959},
+      {0xc1a12d2fc3978937, 0x0052d6b1641c83af},
+      {0xf209787bb47d6b84, 0xc0678c5dbd23a49b},
+      {0x9745eb4d50ce6332, 0xf840b7ba963646e1},
+      {0xbd176620a501fbff, 0xb650e5a93bc3d899},
+      {0xec5d3fa8ce427aff, 0xa3e51f138ab4cebf},
+      {0x93ba47c980e98cdf, 0xc66f336c36b10138},
+      {0xb8a8d9bbe123f017, 0xb80b0047445d4185},
+      {0xe6d3102ad96cec1d, 0xa60dc059157491e6},
+      {0x9043ea1ac7e41392, 0x87c89837ad68db30},
+      {0xb454e4a179dd1877, 0x29babe4598c311fc},
+      {0xe16a1dc9d8545e94, 0xf4296dd6fef3d67b},
+      {0x8ce2529e2734bb1d, 0x1899e4a65f58660d},
+      {0xb01ae745b101e9e4, 0x5ec05dcff72e7f90},
+      {0xdc21a1171d42645d, 0x76707543f4fa1f74},
+      {0x899504ae72497eba, 0x6a06494a791c53a9},
+      {0xabfa45da0edbde69, 0x0487db9d17636893},
+      {0xd6f8d7509292d603, 0x45a9d2845d3c42b7},
+      {0x865b86925b9bc5c2, 0x0b8a2392ba45a9b3},
+      {0xa7f26836f282b732, 0x8e6cac7768d7141f},
+      {0xd1ef0244af2364ff, 0x3207d795430cd927},
+      {0x8335616aed761f1f, 0x7f44e6bd49e807b9},
+      {0xa402b9c5a8d3a6e7, 0x5f16206c9c6209a7},
+      {0xcd036837130890a1, 0x36dba887c37a8c10},
+      {0x802221226be55a64, 0xc2494954da2c978a},
+      {0xa02aa96b06deb0fd, 0xf2db9baa10b7bd6d},
+      {0xc83553c5c8965d3d, 0x6f92829494e5acc8},
+      {0xfa42a8b73abbf48c, 0xcb772339ba1f17fa},
+      {0x9c69a97284b578d7, 0xff2a760414536efc},
+      {0xc38413cf25e2d70d, 0xfef5138519684abb},
+      {0xf46518c2ef5b8cd1, 0x7eb258665fc25d6a},
+      {0x98bf2f79d5993802, 0xef2f773ffbd97a62},
+      {0xbeeefb584aff8603, 0xaafb550ffacfd8fb},
+      {0xeeaaba2e5dbf6784, 0x95ba2a53f983cf39},
+      {0x952ab45cfa97a0b2, 0xdd945a747bf26184},
+      {0xba756174393d88df, 0x94f971119aeef9e5},
+      {0xe912b9d1478ceb17, 0x7a37cd5601aab85e},
+      {0x91abb422ccb812ee, 0xac62e055c10ab33b},
+      {0xb616a12b7fe617aa, 0x577b986b314d600a},
+      {0xe39c49765fdf9d94, 0xed5a7e85fda0b80c},
+      {0x8e41ade9fbebc27d, 0x14588f13be847308},
+      {0xb1d219647ae6b31c, 0x596eb2d8ae258fc9},
+      {0xde469fbd99a05fe3, 0x6fca5f8ed9aef3bc},
+      {0x8aec23d680043bee, 0x25de7bb9480d5855},
+      {0xada72ccc20054ae9, 0xaf561aa79a10ae6b},
+      {0xd910f7ff28069da4, 0x1b2ba1518094da05},
+      {0x87aa9aff79042286, 0x90fb44d2f05d0843},
+      {0xa99541bf57452b28, 0x353a1607ac744a54},
+      {0xd3fa922f2d1675f2, 0x42889b8997915ce9},
+      {0x847c9b5d7c2e09b7, 0x69956135febada12},
+      {0xa59bc234db398c25, 0x43fab9837e699096},
+      {0xcf02b2c21207ef2e, 0x94f967e45e03f4bc},
+      {0x8161afb94b44f57d, 0x1d1be0eebac278f6},
+      {0xa1ba1ba79e1632dc, 0x6462d92a69731733},
+      {0xca28a291859bbf93, 0x7d7b8f7503cfdcff},
+      {0xfcb2cb35e702af78, 0x5cda735244c3d43f},
+      {0x9defbf01b061adab, 0x3a0888136afa64a8},
+      {0xc56baec21c7a1916, 0x088aaa1845b8fdd1},
+      {0xf6c69a72a3989f5b, 0x8aad549e57273d46},
+      {0x9a3c2087a63f6399, 0x36ac54e2f678864c},
+      {0xc0cb28a98fcf3c7f, 0x84576a1bb416a7de},
+      {0xf0fdf2d3f3c30b9f, 0x656d44a2a11c51d6},
+      {0x969eb7c47859e743, 0x9f644ae5a4b1b326},
+      {0xbc4665b596706114, 0x873d5d9f0dde1fef},
+      {0xeb57ff22fc0c7959, 0xa90cb506d155a7eb},
+      {0x9316ff75dd87cbd8, 0x09a7f12442d588f3},
+      {0xb7dcbf5354e9bece, 0x0c11ed6d538aeb30},
+      {0xe5d3ef282a242e81, 0x8f1668c8a86da5fb},
+      {0x8fa475791a569d10, 0xf96e017d694487bd},
+      {0xb38d92d760ec4455, 0x37c981dcc395a9ad},
+      {0xe070f78d3927556a, 0x85bbe253f47b1418},
+      {0x8c469ab843b89562, 0x93956d7478ccec8f},
+      {0xaf58416654a6babb, 0x387ac8d1970027b3},
+      {0xdb2e51bfe9d0696a, 0x06997b05fcc0319f},
+      {0x88fcf317f22241e2, 0x441fece3bdf81f04},
+      {0xab3c2fddeeaad25a, 0xd527e81cad7626c4},
+      {0xd60b3bd56a5586f1, 0x8a71e223d8d3b075},
+      {0x85c7056562757456, 0xf6872d5667844e4a},
+      {0xa738c6bebb12d16c, 0xb428f8ac016561dc},
+      {0xd106f86e69d785c7, 0xe13336d701beba53},
+      {0x82a45b450226b39c, 0xecc0024661173474},
+      {0xa34d721642b06084, 0x27f002d7f95d0191},
+      {0xcc20ce9bd35c78a5, 0x31ec038df7b441f5},
+      {0xff290242c83396ce, 0x7e67047175a15272},
+      {0x9f79a169bd203e41, 0x0f0062c6e984d387},
+      {0xc75809c42c684dd1, 0x52c07b78a3e60869},
+      {0xf92e0c3537826145, 0xa7709a56ccdf8a83},
+      {0x9bbcc7a142b17ccb, 0x88a66076400bb692},
+      {0xc2abf989935ddbfe, 0x6acff893d00ea436},
+      {0xf356f7ebf83552fe, 0x0583f6b8c4124d44},
+      {0x98165af37b2153de, 0xc3727a337a8b704b},
+      {0xbe1bf1b059e9a8d6, 0x744f18c0592e4c5d},
+      {0xeda2ee1c7064130c, 0x1162def06f79df74},
+      {0x9485d4d1c63e8be7, 0x8addcb5645ac2ba9},
+      {0xb9a74a0637ce2ee1, 0x6d953e2bd7173693},
+      {0xe8111c87c5c1ba99, 0xc8fa8db6ccdd0438},
+      {0x910ab1d4db9914a0, 0x1d9c9892400a22a3},
+      {0xb54d5e4a127f59c8, 0x2503beb6d00cab4c},
+      {0xe2a0b5dc971f303a, 0x2e44ae64840fd61e},
+      {0x8da471a9de737e24, 0x5ceaecfed289e5d3},
+      {0xb10d8e1456105dad, 0x7425a83e872c5f48},
+      {0xdd50f1996b947518, 0xd12f124e28f7771a},
+      {0x8a5296ffe33cc92f, 0x82bd6b70d99aaa70},
+      {0xace73cbfdc0bfb7b, 0x636cc64d1001550c},
+      {0xd8210befd30efa5a, 0x3c47f7e05401aa4f},
+      {0x8714a775e3e95c78, 0x65acfaec34810a72},
+      {0xa8d9d1535ce3b396, 0x7f1839a741a14d0e},
+      {0xd31045a8341ca07c, 0x1ede48111209a051},
+      {0x83ea2b892091e44d, 0x934aed0aab460433},
+      {0xa4e4b66b68b65d60, 0xf81da84d56178540},
+      {0xce1de40642e3f4b9, 0x36251260ab9d668f},
+      {0x80d2ae83e9ce78f3, 0xc1d72b7c6b42601a},
+      {0xa1075a24e4421730, 0xb24cf65b8612f820},
+      {0xc94930ae1d529cfc, 0xdee033f26797b628},
+      {0xfb9b7cd9a4a7443c, 0x169840ef017da3b2},
+      {0x9d412e0806e88aa5, 0x8e1f289560ee864f},
+      {0xc491798a08a2ad4e, 0xf1a6f2bab92a27e3},
+      {0xf5b5d7ec8acb58a2, 0xae10af696774b1dc},
+      {0x9991a6f3d6bf1765, 0xacca6da1e0a8ef2a},
+      {0xbff610b0cc6edd3f, 0x17fd090a58d32af4},
+      {0xeff394dcff8a948e, 0xddfc4b4cef07f5b1},
+      {0x95f83d0a1fb69cd9, 0x4abdaf101564f98f},
+      {0xbb764c4ca7a4440f, 0x9d6d1ad41abe37f2},
+      {0xea53df5fd18d5513, 0x84c86189216dc5ee},
+      {0x92746b9be2f8552c, 0x32fd3cf5b4e49bb5},
+      {0xb7118682dbb66a77, 0x3fbc8c33221dc2a2},
+      {0xe4d5e82392a40515, 0x0fabaf3feaa5334b},
+      {0x8f05b1163ba6832d, 0x29cb4d87f2a7400f},
+      {0xb2c71d5bca9023f8, 0x743e20e9ef511013},
+      {0xdf78e4b2bd342cf6, 0x914da9246b255417},
+      {0x8bab8eefb6409c1a, 0x1ad089b6c2f7548f},
+      {0xae9672aba3d0c320, 0xa184ac2473b529b2},
+      {0xda3c0f568cc4f3e8, 0xc9e5d72d90a2741f},
+      {0x8865899617fb1871, 0x7e2fa67c7a658893},
+      {0xaa7eebfb9df9de8d, 0xddbb901b98feeab8},
+      {0xd51ea6fa85785631, 0x552a74227f3ea566},
+      {0x8533285c936b35de, 0xd53a88958f872760},
+      {0xa67ff273b8460356, 0x8a892abaf368f138},
+      {0xd01fef10a657842c, 0x2d2b7569b0432d86},
+      {0x8213f56a67f6b29b, 0x9c3b29620e29fc74},
+      {0xa298f2c501f45f42, 0x8349f3ba91b47b90},
+      {0xcb3f2f7642717713, 0x241c70a936219a74},
+      {0xfe0efb53d30dd4d7, 0xed238cd383aa0111},
+      {0x9ec95d1463e8a506, 0xf4363804324a40ab},
+      {0xc67bb4597ce2ce48, 0xb143c6053edcd0d6},
+      {0xf81aa16fdc1b81da, 0xdd94b7868e94050b},
+      {0x9b10a4e5e9913128, 0xca7cf2b4191c8327},
+      {0xc1d4ce1f63f57d72, 0xfd1c2f611f63a3f1},
+      {0xf24a01a73cf2dccf, 0xbc633b39673c8ced},
+      {0x976e41088617ca01, 0xd5be0503e085d814},
+      {0xbd49d14aa79dbc82, 0x4b2d8644d8a74e19},
+      {0xec9c459d51852ba2, 0xddf8e7d60ed1219f},
+      {0x93e1ab8252f33b45, 0xcabb90e5c942b504},
+      {0xb8da1662e7b00a17, 0x3d6a751f3b936244},
+      {0xe7109bfba19c0c9d, 0x0cc512670a783ad5},
+      {0x906a617d450187e2, 0x27fb2b80668b24c6},
+      {0xb484f9dc9641e9da, 0xb1f9f660802dedf7},
+      {0xe1a63853bbd26451, 0x5e7873f8a0396974},
+      {0x8d07e33455637eb2, 0xdb0b487b6423e1e9},
+      {0xb049dc016abc5e5f, 0x91ce1a9a3d2cda63},
+      {0xdc5c5301c56b75f7, 0x7641a140cc7810fc},
+      {0x89b9b3e11b6329ba, 0xa9e904c87fcb0a9e},
+      {0xac2820d9623bf429, 0x546345fa9fbdcd45},
+      {0xd732290fbacaf133, 0xa97c177947ad4096},
+      {0x867f59a9d4bed6c0, 0x49ed8eabcccc485e},
+      {0xa81f301449ee8c70, 0x5c68f256bfff5a75},
+      {0xd226fc195c6a2f8c, 0x73832eec6fff3112},
+      {0x83585d8fd9c25db7, 0xc831fd53c5ff7eac},
+      {0xa42e74f3d032f525, 0xba3e7ca8b77f5e56},
+      {0xcd3a1230c43fb26f, 0x28ce1bd2e55f35ec},
+      {0x80444b5e7aa7cf85, 0x7980d163cf5b81b4},
+      {0xa0555e361951c366, 0xd7e105bcc3326220},
+      {0xc86ab5c39fa63440, 0x8dd9472bf3fefaa8},
+      {0xfa856334878fc150, 0xb14f98f6f0feb952},
+      {0x9c935e00d4b9d8d2, 0x6ed1bf9a569f33d4},
+      {0xc3b8358109e84f07, 0x0a862f80ec4700c9},
+      {0xf4a642e14c6262c8, 0xcd27bb612758c0fb},
+      {0x98e7e9cccfbd7dbd, 0x8038d51cb897789d},
+      {0xbf21e44003acdd2c, 0xe0470a63e6bd56c4},
+      {0xeeea5d5004981478, 0x1858ccfce06cac75},
+      {0x95527a5202df0ccb, 0x0f37801e0c43ebc9},
+      {0xbaa718e68396cffd, 0xd30560258f54e6bb},
+      {0xe950df20247c83fd, 0x47c6b82ef32a206a},
+      {0x91d28b7416cdd27e, 0x4cdc331d57fa5442},
+      {0xb6472e511c81471d, 0xe0133fe4adf8e953},
+      {0xe3d8f9e563a198e5, 0x58180fddd97723a7},
+      {0x8e679c2f5e44ff8f, 0x570f09eaa7ea7649},
+      {0xb201833b35d63f73, 0x2cd2cc6551e513db},
+      {0xde81e40a034bcf4f, 0xf8077f7ea65e58d2},
+      {0x8b112e86420f6191, 0xfb04afaf27faf783},
+      {0xadd57a27d29339f6, 0x79c5db9af1f9b564},
+      {0xd94ad8b1c7380874, 0x18375281ae7822bd},
+      {0x87cec76f1c830548, 0x8f2293910d0b15b6},
+      {0xa9c2794ae3a3c69a, 0xb2eb3875504ddb23},
+      {0xd433179d9c8cb841, 0x5fa60692a46151ec},
+      {0x849feec281d7f328, 0xdbc7c41ba6bcd334},
+      {0xa5c7ea73224deff3, 0x12b9b522906c0801},
+      {0xcf39e50feae16bef, 0xd768226b34870a01},
+      {0x81842f29f2cce375, 0xe6a1158300d46641},
+      {0xa1e53af46f801c53, 0x60495ae3c1097fd1},
+      {0xca5e89b18b602368, 0x385bb19cb14bdfc5},
+      {0xfcf62c1dee382c42, 0x46729e03dd9ed7b6},
+      {0x9e19db92b4e31ba9, 0x6c07a2c26a8346d2},
+      {0xc5a05277621be293, 0xc7098b7305241886},
+      {0xf70867153aa2db38, 0xb8cbee4fc66d1ea8},
+      {0x9a65406d44a5c903, 0x737f74f1dc043329},
+      {0xc0fe908895cf3b44, 0x505f522e53053ff3},
+      {0xf13e34aabb430a15, 0x647726b9e7c68ff0},
+      {0x96c6e0eab509e64d, 0x5eca783430dc19f6},
+      {0xbc789925624c5fe0, 0xb67d16413d132073},
+      {0xeb96bf6ebadf77d8, 0xe41c5bd18c57e890},
+      {0x933e37a534cbaae7, 0x8e91b962f7b6f15a},
+      {0xb80dc58e81fe95a1, 0x723627bbb5a4adb1},
+      {0xe61136f2227e3b09, 0xcec3b1aaa30dd91d},
+      {0x8fcac257558ee4e6, 0x213a4f0aa5e8a7b2},
+      {0xb3bd72ed2af29e1f, 0xa988e2cd4f62d19e},
+      {0xe0accfa875af45a7, 0x93eb1b80a33b8606},
+      {0x8c6c01c9498d8b88, 0xbc72f130660533c4},
+      {0xaf87023b9bf0ee6a, 0xeb8fad7c7f8680b5},
+      {0xdb68c2ca82ed2a05, 0xa67398db9f6820e2},
+#else
+      {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b},
+      {0xce5d73ff402d98e3, 0xfb0a3d212dc81290},
+      {0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481f},
+      {0x86a8d39ef77164bc, 0xae5dff9c02033198},
+      {0xd98ddaee19068c76, 0x3badd624dd9b0958},
+      {0xafbd2350644eeacf, 0xe5d1929ef90898fb},
+      {0x8df5efabc5979c8f, 0xca8d3ffa1ef463c2},
+      {0xe55990879ddcaabd, 0xcc420a6a101d0516},
+      {0xb94470938fa89bce, 0xf808e40e8d5b3e6a},
+      {0x95a8637627989aad, 0xdde7001379a44aa9},
+      {0xf1c90080baf72cb1, 0x5324c68b12dd6339},
+      {0xc350000000000000, 0x0000000000000000},
+      {0x9dc5ada82b70b59d, 0xf020000000000000},
+      {0xfee50b7025c36a08, 0x02f236d04753d5b5},
+      {0xcde6fd5e09abcf26, 0xed4c0226b55e6f87},
+      {0xa6539930bf6bff45, 0x84db8346b786151d},
+      {0x865b86925b9bc5c2, 0x0b8a2392ba45a9b3},
+      {0xd910f7ff28069da4, 0x1b2ba1518094da05},
+      {0xaf58416654a6babb, 0x387ac8d1970027b3},
+      {0x8da471a9de737e24, 0x5ceaecfed289e5d3},
+      {0xe4d5e82392a40515, 0x0fabaf3feaa5334b},
+      {0xb8da1662e7b00a17, 0x3d6a751f3b936244},
+      {0x95527a5202df0ccb, 0x0f37801e0c43ebc9},
+      {0xf13e34aabb430a15, 0x647726b9e7c68ff0}
+#endif
+    };
+
+#if FMT_USE_FULL_CACHE_DRAGONBOX
+    return pow10_significands[k - float_info<double>::min_k];
+#else
+    static constexpr uint64_t powers_of_5_64[] = {
+        0x0000000000000001, 0x0000000000000005, 0x0000000000000019,
+        0x000000000000007d, 0x0000000000000271, 0x0000000000000c35,
+        0x0000000000003d09, 0x000000000001312d, 0x000000000005f5e1,
+        0x00000000001dcd65, 0x00000000009502f9, 0x0000000002e90edd,
+        0x000000000e8d4a51, 0x0000000048c27395, 0x000000016bcc41e9,
+        0x000000071afd498d, 0x0000002386f26fc1, 0x000000b1a2bc2ec5,
+        0x000003782dace9d9, 0x00001158e460913d, 0x000056bc75e2d631,
+        0x0001b1ae4d6e2ef5, 0x000878678326eac9, 0x002a5a058fc295ed,
+        0x00d3c21bcecceda1, 0x0422ca8b0a00a425, 0x14adf4b7320334b9};
+
+    static const int compression_ratio = 27;
+
+    // Compute base index.
+    int cache_index = (k - float_info<double>::min_k) / compression_ratio;
+    int kb = cache_index * compression_ratio + float_info<double>::min_k;
+    int offset = k - kb;
+
+    // Get base cache.
+    uint128_fallback base_cache = pow10_significands[cache_index];
+    if (offset == 0) return base_cache;
+
+    // Compute the required amount of bit-shift.
+    int alpha = floor_log2_pow10(kb + offset) - floor_log2_pow10(kb) - offset;
+    FMT_ASSERT(alpha > 0 && alpha < 64, "shifting error detected");
+
+    // Try to recover the real cache.
+    uint64_t pow5 = powers_of_5_64[offset];
+    uint128_fallback recovered_cache = umul128(base_cache.high(), pow5);
+    uint128_fallback middle_low = umul128(base_cache.low(), pow5);
+
+    recovered_cache += middle_low.high();
+
+    uint64_t high_to_middle = recovered_cache.high() << (64 - alpha);
+    uint64_t middle_to_low = recovered_cache.low() << (64 - alpha);
+
+    recovered_cache =
+        uint128_fallback{(recovered_cache.low() >> alpha) | high_to_middle,
+                         ((middle_low.low() >> alpha) | middle_to_low)};
+    FMT_ASSERT(recovered_cache.low() + 1 != 0, "");
+    return {recovered_cache.high(), recovered_cache.low() + 1};
+#endif
+  }
+
+  struct compute_mul_result {
+    carrier_uint result;
+    bool is_integer;
+  };
+  struct compute_mul_parity_result {
+    bool parity;
+    bool is_integer;
+  };
+
+  static auto compute_mul(carrier_uint u,
+                          const cache_entry_type& cache) noexcept
+      -> compute_mul_result {
+    auto r = umul192_upper128(u, cache);
+    return {r.high(), r.low() == 0};
+  }
+
+  static auto compute_delta(const cache_entry_type& cache, int beta) noexcept
+      -> uint32_t {
+    return static_cast<uint32_t>(cache.high() >> (64 - 1 - beta));
+  }
+
+  static auto compute_mul_parity(carrier_uint two_f,
+                                 const cache_entry_type& cache,
+                                 int beta) noexcept
+      -> compute_mul_parity_result {
+    FMT_ASSERT(beta >= 1, "");
+    FMT_ASSERT(beta < 64, "");
+
+    auto r = umul192_lower128(two_f, cache);
+    return {((r.high() >> (64 - beta)) & 1) != 0,
+            ((r.high() << beta) | (r.low() >> (64 - beta))) == 0};
+  }
+
+  static auto compute_left_endpoint_for_shorter_interval_case(
+      const cache_entry_type& cache, int beta) noexcept -> carrier_uint {
+    return (cache.high() -
+            (cache.high() >> (num_significand_bits<double>() + 2))) >>
+           (64 - num_significand_bits<double>() - 1 - beta);
+  }
+
+  static auto compute_right_endpoint_for_shorter_interval_case(
+      const cache_entry_type& cache, int beta) noexcept -> carrier_uint {
+    return (cache.high() +
+            (cache.high() >> (num_significand_bits<double>() + 1))) >>
+           (64 - num_significand_bits<double>() - 1 - beta);
+  }
+
+  static auto compute_round_up_for_shorter_interval_case(
+      const cache_entry_type& cache, int beta) noexcept -> carrier_uint {
+    return ((cache.high() >> (64 - num_significand_bits<double>() - 2 - beta)) +
+            1) /
+           2;
+  }
+};
+
+FMT_FUNC auto get_cached_power(int k) noexcept -> uint128_fallback {
+  return cache_accessor<double>::get_cached_power(k);
+}
+
+// Various integer checks
+template <typename T>
+auto is_left_endpoint_integer_shorter_interval(int exponent) noexcept -> bool {
+  const int case_shorter_interval_left_endpoint_lower_threshold = 2;
+  const int case_shorter_interval_left_endpoint_upper_threshold = 3;
+  return exponent >= case_shorter_interval_left_endpoint_lower_threshold &&
+         exponent <= case_shorter_interval_left_endpoint_upper_threshold;
+}
+
+// Remove trailing zeros from n and return the number of zeros removed (float).
+FMT_INLINE auto remove_trailing_zeros(uint32_t& n, int s = 0) noexcept -> int {
+  FMT_ASSERT(n != 0, "");
+  // Modular inverse of 5 (mod 2^32): (mod_inv_5 * 5) mod 2^32 = 1.
+  constexpr uint32_t mod_inv_5 = 0xcccccccd;
+  constexpr uint32_t mod_inv_25 = 0xc28f5c29;  // = mod_inv_5 * mod_inv_5
+
+  while (true) {
+    auto q = rotr(n * mod_inv_25, 2);
+    if (q > max_value<uint32_t>() / 100) break;
+    n = q;
+    s += 2;
+  }
+  auto q = rotr(n * mod_inv_5, 1);
+  if (q <= max_value<uint32_t>() / 10) {
+    n = q;
+    s |= 1;
+  }
+  return s;
+}
+
+// Removes trailing zeros and returns the number of zeros removed (double).
+FMT_INLINE auto remove_trailing_zeros(uint64_t& n) noexcept -> int {
+  FMT_ASSERT(n != 0, "");
+
+  // Is n is divisible by 10^8?
+  constexpr uint32_t ten_pow_8 = 100000000u;
+  if ((n % ten_pow_8) == 0) {
+    // If yes, work with the quotient...
+    auto n32 = static_cast<uint32_t>(n / ten_pow_8);
+    // ... and use the 32 bit variant of the function
+    int num_zeros = remove_trailing_zeros(n32, 8);
+    n = n32;
+    return num_zeros;
+  }
+
+  // If n is not divisible by 10^8, work with n itself.
+  constexpr uint64_t mod_inv_5 = 0xcccccccccccccccd;
+  constexpr uint64_t mod_inv_25 = 0x8f5c28f5c28f5c29;  // mod_inv_5 * mod_inv_5
+
+  int s = 0;
+  while (true) {
+    auto q = rotr(n * mod_inv_25, 2);
+    if (q > max_value<uint64_t>() / 100) break;
+    n = q;
+    s += 2;
+  }
+  auto q = rotr(n * mod_inv_5, 1);
+  if (q <= max_value<uint64_t>() / 10) {
+    n = q;
+    s |= 1;
+  }
+
+  return s;
+}
+
+// The main algorithm for shorter interval case
+template <typename T>
+FMT_INLINE auto shorter_interval_case(int exponent) noexcept -> decimal_fp<T> {
+  decimal_fp<T> ret_value;
+  // Compute k and beta
+  const int minus_k = floor_log10_pow2_minus_log10_4_over_3(exponent);
+  const int beta = exponent + floor_log2_pow10(-minus_k);
+
+  // Compute xi and zi
+  using cache_entry_type = typename cache_accessor<T>::cache_entry_type;
+  const cache_entry_type cache = cache_accessor<T>::get_cached_power(-minus_k);
+
+  auto xi = cache_accessor<T>::compute_left_endpoint_for_shorter_interval_case(
+      cache, beta);
+  auto zi = cache_accessor<T>::compute_right_endpoint_for_shorter_interval_case(
+      cache, beta);
+
+  // If the left endpoint is not an integer, increase it
+  if (!is_left_endpoint_integer_shorter_interval<T>(exponent)) ++xi;
+
+  // Try bigger divisor
+  ret_value.significand = zi / 10;
+
+  // If succeed, remove trailing zeros if necessary and return
+  if (ret_value.significand * 10 >= xi) {
+    ret_value.exponent = minus_k + 1;
+    ret_value.exponent += remove_trailing_zeros(ret_value.significand);
+    return ret_value;
+  }
+
+  // Otherwise, compute the round-up of y
+  ret_value.significand =
+      cache_accessor<T>::compute_round_up_for_shorter_interval_case(cache,
+                                                                    beta);
+  ret_value.exponent = minus_k;
+
+  // When tie occurs, choose one of them according to the rule
+  if (exponent >= float_info<T>::shorter_interval_tie_lower_threshold &&
+      exponent <= float_info<T>::shorter_interval_tie_upper_threshold) {
+    ret_value.significand = ret_value.significand % 2 == 0
+                                ? ret_value.significand
+                                : ret_value.significand - 1;
+  } else if (ret_value.significand < xi) {
+    ++ret_value.significand;
+  }
+  return ret_value;
+}
+
+template <typename T> auto to_decimal(T x) noexcept -> decimal_fp<T> {
+  // Step 1: integer promotion & Schubfach multiplier calculation.
+
+  using carrier_uint = typename float_info<T>::carrier_uint;
+  using cache_entry_type = typename cache_accessor<T>::cache_entry_type;
+  auto br = bit_cast<carrier_uint>(x);
+
+  // Extract significand bits and exponent bits.
+  const carrier_uint significand_mask =
+      (static_cast<carrier_uint>(1) << num_significand_bits<T>()) - 1;
+  carrier_uint significand = (br & significand_mask);
+  int exponent =
+      static_cast<int>((br & exponent_mask<T>()) >> num_significand_bits<T>());
+
+  if (exponent != 0) {  // Check if normal.
+    exponent -= exponent_bias<T>() + num_significand_bits<T>();
+
+    // Shorter interval case; proceed like Schubfach.
+    // In fact, when exponent == 1 and significand == 0, the interval is
+    // regular. However, it can be shown that the end-results are anyway same.
+    if (significand == 0) return shorter_interval_case<T>(exponent);
+
+    significand |= (static_cast<carrier_uint>(1) << num_significand_bits<T>());
+  } else {
+    // Subnormal case; the interval is always regular.
+    if (significand == 0) return {0, 0};
+    exponent =
+        std::numeric_limits<T>::min_exponent - num_significand_bits<T>() - 1;
+  }
+
+  const bool include_left_endpoint = (significand % 2 == 0);
+  const bool include_right_endpoint = include_left_endpoint;
+
+  // Compute k and beta.
+  const int minus_k = floor_log10_pow2(exponent) - float_info<T>::kappa;
+  const cache_entry_type cache = cache_accessor<T>::get_cached_power(-minus_k);
+  const int beta = exponent + floor_log2_pow10(-minus_k);
+
+  // Compute zi and deltai.
+  // 10^kappa <= deltai < 10^(kappa + 1)
+  const uint32_t deltai = cache_accessor<T>::compute_delta(cache, beta);
+  const carrier_uint two_fc = significand << 1;
+
+  // For the case of binary32, the result of integer check is not correct for
+  // 29711844 * 2^-82
+  // = 6.1442653300000000008655037797566933477355632930994033813476... * 10^-18
+  // and 29711844 * 2^-81
+  // = 1.2288530660000000001731007559513386695471126586198806762695... * 10^-17,
+  // and they are the unique counterexamples. However, since 29711844 is even,
+  // this does not cause any problem for the endpoints calculations; it can only
+  // cause a problem when we need to perform integer check for the center.
+  // Fortunately, with these inputs, that branch is never executed, so we are
+  // fine.
+  const typename cache_accessor<T>::compute_mul_result z_mul =
+      cache_accessor<T>::compute_mul((two_fc | 1) << beta, cache);
+
+  // Step 2: Try larger divisor; remove trailing zeros if necessary.
+
+  // Using an upper bound on zi, we might be able to optimize the division
+  // better than the compiler; we are computing zi / big_divisor here.
+  decimal_fp<T> ret_value;
+  ret_value.significand = divide_by_10_to_kappa_plus_1(z_mul.result);
+  uint32_t r = static_cast<uint32_t>(z_mul.result - float_info<T>::big_divisor *
+                                                        ret_value.significand);
+
+  if (r < deltai) {
+    // Exclude the right endpoint if necessary.
+    if (r == 0 && (z_mul.is_integer & !include_right_endpoint)) {
+      --ret_value.significand;
+      r = float_info<T>::big_divisor;
+      goto small_divisor_case_label;
+    }
+  } else if (r > deltai) {
+    goto small_divisor_case_label;
+  } else {
+    // r == deltai; compare fractional parts.
+    const typename cache_accessor<T>::compute_mul_parity_result x_mul =
+        cache_accessor<T>::compute_mul_parity(two_fc - 1, cache, beta);
+
+    if (!(x_mul.parity | (x_mul.is_integer & include_left_endpoint)))
+      goto small_divisor_case_label;
+  }
+  ret_value.exponent = minus_k + float_info<T>::kappa + 1;
+
+  // We may need to remove trailing zeros.
+  ret_value.exponent += remove_trailing_zeros(ret_value.significand);
+  return ret_value;
+
+  // Step 3: Find the significand with the smaller divisor.
+
+small_divisor_case_label:
+  ret_value.significand *= 10;
+  ret_value.exponent = minus_k + float_info<T>::kappa;
+
+  uint32_t dist = r - (deltai / 2) + (float_info<T>::small_divisor / 2);
+  const bool approx_y_parity =
+      ((dist ^ (float_info<T>::small_divisor / 2)) & 1) != 0;
+
+  // Is dist divisible by 10^kappa?
+  const bool divisible_by_small_divisor =
+      check_divisibility_and_divide_by_pow10<float_info<T>::kappa>(dist);
+
+  // Add dist / 10^kappa to the significand.
+  ret_value.significand += dist;
+
+  if (!divisible_by_small_divisor) return ret_value;
+
+  // Check z^(f) >= epsilon^(f).
+  // We have either yi == zi - epsiloni or yi == (zi - epsiloni) - 1,
+  // where yi == zi - epsiloni if and only if z^(f) >= epsilon^(f).
+  // Since there are only 2 possibilities, we only need to care about the
+  // parity. Also, zi and r should have the same parity since the divisor
+  // is an even number.
+  const auto y_mul = cache_accessor<T>::compute_mul_parity(two_fc, cache, beta);
+
+  // If z^(f) >= epsilon^(f), we might have a tie when z^(f) == epsilon^(f),
+  // or equivalently, when y is an integer.
+  if (y_mul.parity != approx_y_parity)
+    --ret_value.significand;
+  else if (y_mul.is_integer & (ret_value.significand % 2 != 0))
+    --ret_value.significand;
+  return ret_value;
+}
+}  // namespace dragonbox
+}  // namespace detail
+
+template <> struct formatter<detail::bigint> {
+  FMT_CONSTEXPR auto parse(format_parse_context& ctx)
+      -> format_parse_context::iterator {
+    return ctx.begin();
+  }
+
+  auto format(const detail::bigint& n, format_context& ctx) const
+      -> format_context::iterator {
+    auto out = ctx.out();
+    bool first = true;
+    for (auto i = n.bigits_.size(); i > 0; --i) {
+      auto value = n.bigits_[i - 1u];
+      if (first) {
+        out = fmt::format_to(out, FMT_STRING("{:x}"), value);
+        first = false;
+        continue;
+      }
+      out = fmt::format_to(out, FMT_STRING("{:08x}"), value);
+    }
+    if (n.exp_ > 0)
+      out = fmt::format_to(out, FMT_STRING("p{}"),
+                           n.exp_ * detail::bigint::bigit_bits);
+    return out;
+  }
+};
+
+FMT_FUNC detail::utf8_to_utf16::utf8_to_utf16(string_view s) {
+  for_each_codepoint(s, [this](uint32_t cp, string_view) {
+    if (cp == invalid_code_point) FMT_THROW(std::runtime_error("invalid utf8"));
+    if (cp <= 0xFFFF) {
+      buffer_.push_back(static_cast<wchar_t>(cp));
+    } else {
+      cp -= 0x10000;
+      buffer_.push_back(static_cast<wchar_t>(0xD800 + (cp >> 10)));
+      buffer_.push_back(static_cast<wchar_t>(0xDC00 + (cp & 0x3FF)));
+    }
+    return true;
+  });
+  buffer_.push_back(0);
+}
+
+FMT_FUNC void format_system_error(detail::buffer<char>& out, int error_code,
+                                  const char* message) noexcept {
+  FMT_TRY {
+    auto ec = std::error_code(error_code, std::generic_category());
+    detail::write(appender(out), std::system_error(ec, message).what());
+    return;
+  }
+  FMT_CATCH(...) {}
+  format_error_code(out, error_code, message);
+}
+
+FMT_FUNC void report_system_error(int error_code,
+                                  const char* message) noexcept {
+  do_report_error(format_system_error, error_code, message);
+}
+
+FMT_FUNC auto vformat(string_view fmt, format_args args) -> std::string {
+  // Don't optimize the "{}" case to keep the binary size small and because it
+  // can be better optimized in fmt::format anyway.
+  auto buffer = memory_buffer();
+  detail::vformat_to(buffer, fmt, args);
+  return to_string(buffer);
+}
+
+namespace detail {
+
+FMT_FUNC void vformat_to(buffer<char>& buf, string_view fmt, format_args args,
+                         locale_ref loc) {
+  auto out = appender(buf);
+  if (fmt.size() == 2 && equal2(fmt.data(), "{}"))
+    return args.get(0).visit(default_arg_formatter<char>{out});
+  parse_format_string(fmt,
+                      format_handler<>{parse_context<>(fmt), {out, args, loc}});
+}
+
+template <typename T> struct span {
+  T* data;
+  size_t size;
+};
+
+template <typename F> auto flockfile(F* f) -> decltype(_lock_file(f)) {
+  _lock_file(f);
+}
+template <typename F> auto funlockfile(F* f) -> decltype(_unlock_file(f)) {
+  _unlock_file(f);
+}
+
+#ifndef getc_unlocked
+template <typename F> auto getc_unlocked(F* f) -> decltype(_fgetc_nolock(f)) {
+  return _fgetc_nolock(f);
+}
+#endif
+
+template <typename F = FILE, typename Enable = void>
+struct has_flockfile : std::false_type {};
+
+template <typename F>
+struct has_flockfile<F, void_t<decltype(flockfile(&std::declval<F&>()))>>
+    : std::true_type {};
+
+// A FILE wrapper. F is FILE defined as a template parameter to make system API
+// detection work.
+template <typename F> class file_base {
+ public:
+  F* file_;
+
+ public:
+  file_base(F* file) : file_(file) {}
+  operator F*() const { return file_; }
+
+  // Reads a code unit from the stream.
+  auto get() -> int {
+    int result = getc_unlocked(file_);
+    if (result == EOF && ferror(file_) != 0)
+      FMT_THROW(system_error(errno, FMT_STRING("getc failed")));
+    return result;
+  }
+
+  // Puts the code unit back into the stream buffer.
+  void unget(char c) {
+    if (ungetc(c, file_) == EOF)
+      FMT_THROW(system_error(errno, FMT_STRING("ungetc failed")));
+  }
+
+  void flush() { fflush(this->file_); }
+};
+
+// A FILE wrapper for glibc.
+template <typename F> class glibc_file : public file_base<F> {
+ private:
+  enum {
+    line_buffered = 0x200,  // _IO_LINE_BUF
+    unbuffered = 2          // _IO_UNBUFFERED
+  };
+
+ public:
+  using file_base<F>::file_base;
+
+  auto is_buffered() const -> bool {
+    return (this->file_->_flags & unbuffered) == 0;
+  }
+
+  void init_buffer() {
+    if (this->file_->_IO_write_ptr < this->file_->_IO_write_end) return;
+    // Force buffer initialization by placing and removing a char in a buffer.
+    putc_unlocked(0, this->file_);
+    --this->file_->_IO_write_ptr;
+  }
+
+  // Returns the file's read buffer.
+  auto get_read_buffer() const -> span<const char> {
+    auto ptr = this->file_->_IO_read_ptr;
+    return {ptr, to_unsigned(this->file_->_IO_read_end - ptr)};
+  }
+
+  // Returns the file's write buffer.
+  auto get_write_buffer() const -> span<char> {
+    auto ptr = this->file_->_IO_write_ptr;
+    return {ptr, to_unsigned(this->file_->_IO_buf_end - ptr)};
+  }
+
+  void advance_write_buffer(size_t size) { this->file_->_IO_write_ptr += size; }
+
+  auto needs_flush() const -> bool {
+    if ((this->file_->_flags & line_buffered) == 0) return false;
+    char* end = this->file_->_IO_write_end;
+    auto size = max_of<ptrdiff_t>(this->file_->_IO_write_ptr - end, 0);
+    return memchr(end, '\n', static_cast<size_t>(size));
+  }
+
+  void flush() { fflush_unlocked(this->file_); }
+};
+
+// A FILE wrapper for Apple's libc.
+template <typename F> class apple_file : public file_base<F> {
+ private:
+  enum {
+    line_buffered = 1,  // __SNBF
+    unbuffered = 2      // __SLBF
+  };
+
+ public:
+  using file_base<F>::file_base;
+
+  auto is_buffered() const -> bool {
+    return (this->file_->_flags & unbuffered) == 0;
+  }
+
+  void init_buffer() {
+    if (this->file_->_p) return;
+    // Force buffer initialization by placing and removing a char in a buffer.
+    if (!FMT_CLANG_ANALYZER) putc_unlocked(0, this->file_);
+    --this->file_->_p;
+    ++this->file_->_w;
+  }
+
+  auto get_read_buffer() const -> span<const char> {
+    return {reinterpret_cast<char*>(this->file_->_p),
+            to_unsigned(this->file_->_r)};
+  }
+
+  auto get_write_buffer() const -> span<char> {
+    return {reinterpret_cast<char*>(this->file_->_p),
+            to_unsigned(this->file_->_bf._base + this->file_->_bf._size -
+                        this->file_->_p)};
+  }
+
+  void advance_write_buffer(size_t size) {
+    this->file_->_p += size;
+    this->file_->_w -= size;
+  }
+
+  auto needs_flush() const -> bool {
+    if ((this->file_->_flags & line_buffered) == 0) return false;
+    return memchr(this->file_->_p + this->file_->_w, '\n',
+                  to_unsigned(-this->file_->_w));
+  }
+};
+
+// A fallback FILE wrapper.
+template <typename F> class fallback_file : public file_base<F> {
+ private:
+  char next_;  // The next unconsumed character in the buffer.
+  bool has_next_ = false;
+
+ public:
+  using file_base<F>::file_base;
+
+  auto is_buffered() const -> bool { return false; }
+  auto needs_flush() const -> bool { return false; }
+  void init_buffer() {}
+
+  auto get_read_buffer() const -> span<const char> {
+    return {&next_, has_next_ ? 1u : 0u};
+  }
+
+  auto get_write_buffer() const -> span<char> { return {nullptr, 0}; }
+
+  void advance_write_buffer(size_t) {}
+
+  auto get() -> int {
+    has_next_ = false;
+    return file_base<F>::get();
+  }
+
+  void unget(char c) {
+    file_base<F>::unget(c);
+    next_ = c;
+    has_next_ = true;
+  }
+};
+
+#ifndef FMT_USE_FALLBACK_FILE
+#  define FMT_USE_FALLBACK_FILE 0
+#endif
+
+template <typename F,
+          FMT_ENABLE_IF(sizeof(F::_p) != 0 && !FMT_USE_FALLBACK_FILE)>
+auto get_file(F* f, int) -> apple_file<F> {
+  return f;
+}
+template <typename F,
+          FMT_ENABLE_IF(sizeof(F::_IO_read_ptr) != 0 && !FMT_USE_FALLBACK_FILE)>
+inline auto get_file(F* f, int) -> glibc_file<F> {
+  return f;
+}
+
+inline auto get_file(FILE* f, ...) -> fallback_file<FILE> { return f; }
+
+using file_ref = decltype(get_file(static_cast<FILE*>(nullptr), 0));
+
+template <typename F = FILE, typename Enable = void>
+class file_print_buffer : public buffer<char> {
+ public:
+  explicit file_print_buffer(F*) : buffer(nullptr, size_t()) {}
+};
+
+template <typename F>
+class file_print_buffer<F, enable_if_t<has_flockfile<F>::value>>
+    : public buffer<char> {
+ private:
+  file_ref file_;
+
+  static void grow(buffer<char>& base, size_t) {
+    auto& self = static_cast<file_print_buffer&>(base);
+    self.file_.advance_write_buffer(self.size());
+    if (self.file_.get_write_buffer().size == 0) self.file_.flush();
+    auto buf = self.file_.get_write_buffer();
+    FMT_ASSERT(buf.size > 0, "");
+    self.set(buf.data, buf.size);
+    self.clear();
+  }
+
+ public:
+  explicit file_print_buffer(F* f) : buffer(grow, size_t()), file_(f) {
+    flockfile(f);
+    file_.init_buffer();
+    auto buf = file_.get_write_buffer();
+    set(buf.data, buf.size);
+  }
+  ~file_print_buffer() {
+    file_.advance_write_buffer(size());
+    bool flush = file_.needs_flush();
+    F* f = file_;    // Make funlockfile depend on the template parameter F
+    funlockfile(f);  // for the system API detection to work.
+    if (flush) fflush(file_);
+  }
+};
+
+#if !defined(_WIN32) || defined(FMT_USE_WRITE_CONSOLE)
+FMT_FUNC auto write_console(int, string_view) -> bool { return false; }
+#else
+using dword = conditional_t<sizeof(long) == 4, unsigned long, unsigned>;
+extern "C" __declspec(dllimport) int __stdcall WriteConsoleW(  //
+    void*, const void*, dword, dword*, void*);
+
+FMT_FUNC bool write_console(int fd, string_view text) {
+  auto u16 = utf8_to_utf16(text);
+  return WriteConsoleW(reinterpret_cast<void*>(_get_osfhandle(fd)), u16.c_str(),
+                       static_cast<dword>(u16.size()), nullptr, nullptr) != 0;
+}
+#endif
+
+#ifdef _WIN32
+// Print assuming legacy (non-Unicode) encoding.
+FMT_FUNC void vprint_mojibake(std::FILE* f, string_view fmt, format_args args,
+                              bool newline) {
+  auto buffer = memory_buffer();
+  detail::vformat_to(buffer, fmt, args);
+  if (newline) buffer.push_back('\n');
+  fwrite_all(buffer.data(), buffer.size(), f);
+}
+#endif
+
+FMT_FUNC void print(std::FILE* f, string_view text) {
+#if defined(_WIN32) && !defined(FMT_USE_WRITE_CONSOLE)
+  int fd = _fileno(f);
+  if (_isatty(fd)) {
+    std::fflush(f);
+    if (write_console(fd, text)) return;
+  }
+#endif
+  fwrite_all(text.data(), text.size(), f);
+}
+}  // namespace detail
+
+FMT_FUNC void vprint_buffered(std::FILE* f, string_view fmt, format_args args) {
+  auto buffer = memory_buffer();
+  detail::vformat_to(buffer, fmt, args);
+  detail::print(f, {buffer.data(), buffer.size()});
+}
+
+FMT_FUNC void vprint(std::FILE* f, string_view fmt, format_args args) {
+  if (!detail::file_ref(f).is_buffered() || !detail::has_flockfile<>())
+    return vprint_buffered(f, fmt, args);
+  auto&& buffer = detail::file_print_buffer<>(f);
+  return detail::vformat_to(buffer, fmt, args);
+}
+
+FMT_FUNC void vprintln(std::FILE* f, string_view fmt, format_args args) {
+  auto buffer = memory_buffer();
+  detail::vformat_to(buffer, fmt, args);
+  buffer.push_back('\n');
+  detail::print(f, {buffer.data(), buffer.size()});
+}
+
+FMT_FUNC void vprint(string_view fmt, format_args args) {
+  vprint(stdout, fmt, args);
+}
+
+namespace detail {
+
+struct singleton {
+  unsigned char upper;
+  unsigned char lower_count;
+};
+
+inline auto is_printable(uint16_t x, const singleton* singletons,
+                         size_t singletons_size,
+                         const unsigned char* singleton_lowers,
+                         const unsigned char* normal, size_t normal_size)
+    -> bool {
+  auto upper = x >> 8;
+  auto lower_start = 0;
+  for (size_t i = 0; i < singletons_size; ++i) {
+    auto s = singletons[i];
+    auto lower_end = lower_start + s.lower_count;
+    if (upper < s.upper) break;
+    if (upper == s.upper) {
+      for (auto j = lower_start; j < lower_end; ++j) {
+        if (singleton_lowers[j] == (x & 0xff)) return false;
+      }
+    }
+    lower_start = lower_end;
+  }
+
+  auto xsigned = static_cast<int>(x);
+  auto current = true;
+  for (size_t i = 0; i < normal_size; ++i) {
+    auto v = static_cast<int>(normal[i]);
+    auto len = (v & 0x80) != 0 ? (v & 0x7f) << 8 | normal[++i] : v;
+    xsigned -= len;
+    if (xsigned < 0) break;
+    current = !current;
+  }
+  return current;
+}
+
+// This code is generated by support/printable.py.
+FMT_FUNC auto is_printable(uint32_t cp) -> bool {
+  static constexpr singleton singletons0[] = {
+      {0x00, 1},  {0x03, 5},  {0x05, 6},  {0x06, 3},  {0x07, 6},  {0x08, 8},
+      {0x09, 17}, {0x0a, 28}, {0x0b, 25}, {0x0c, 20}, {0x0d, 16}, {0x0e, 13},
+      {0x0f, 4},  {0x10, 3},  {0x12, 18}, {0x13, 9},  {0x16, 1},  {0x17, 5},
+      {0x18, 2},  {0x19, 3},  {0x1a, 7},  {0x1c, 2},  {0x1d, 1},  {0x1f, 22},
+      {0x20, 3},  {0x2b, 3},  {0x2c, 2},  {0x2d, 11}, {0x2e, 1},  {0x30, 3},
+      {0x31, 2},  {0x32, 1},  {0xa7, 2},  {0xa9, 2},  {0xaa, 4},  {0xab, 8},
+      {0xfa, 2},  {0xfb, 5},  {0xfd, 4},  {0xfe, 3},  {0xff, 9},
+  };
+  static constexpr unsigned char singletons0_lower[] = {
+      0xad, 0x78, 0x79, 0x8b, 0x8d, 0xa2, 0x30, 0x57, 0x58, 0x8b, 0x8c, 0x90,
+      0x1c, 0x1d, 0xdd, 0x0e, 0x0f, 0x4b, 0x4c, 0xfb, 0xfc, 0x2e, 0x2f, 0x3f,
+      0x5c, 0x5d, 0x5f, 0xb5, 0xe2, 0x84, 0x8d, 0x8e, 0x91, 0x92, 0xa9, 0xb1,
+      0xba, 0xbb, 0xc5, 0xc6, 0xc9, 0xca, 0xde, 0xe4, 0xe5, 0xff, 0x00, 0x04,
+      0x11, 0x12, 0x29, 0x31, 0x34, 0x37, 0x3a, 0x3b, 0x3d, 0x49, 0x4a, 0x5d,
+      0x84, 0x8e, 0x92, 0xa9, 0xb1, 0xb4, 0xba, 0xbb, 0xc6, 0xca, 0xce, 0xcf,
+      0xe4, 0xe5, 0x00, 0x04, 0x0d, 0x0e, 0x11, 0x12, 0x29, 0x31, 0x34, 0x3a,
+      0x3b, 0x45, 0x46, 0x49, 0x4a, 0x5e, 0x64, 0x65, 0x84, 0x91, 0x9b, 0x9d,
+      0xc9, 0xce, 0xcf, 0x0d, 0x11, 0x29, 0x45, 0x49, 0x57, 0x64, 0x65, 0x8d,
+      0x91, 0xa9, 0xb4, 0xba, 0xbb, 0xc5, 0xc9, 0xdf, 0xe4, 0xe5, 0xf0, 0x0d,
+      0x11, 0x45, 0x49, 0x64, 0x65, 0x80, 0x84, 0xb2, 0xbc, 0xbe, 0xbf, 0xd5,
+      0xd7, 0xf0, 0xf1, 0x83, 0x85, 0x8b, 0xa4, 0xa6, 0xbe, 0xbf, 0xc5, 0xc7,
+      0xce, 0xcf, 0xda, 0xdb, 0x48, 0x98, 0xbd, 0xcd, 0xc6, 0xce, 0xcf, 0x49,
+      0x4e, 0x4f, 0x57, 0x59, 0x5e, 0x5f, 0x89, 0x8e, 0x8f, 0xb1, 0xb6, 0xb7,
+      0xbf, 0xc1, 0xc6, 0xc7, 0xd7, 0x11, 0x16, 0x17, 0x5b, 0x5c, 0xf6, 0xf7,
+      0xfe, 0xff, 0x80, 0x0d, 0x6d, 0x71, 0xde, 0xdf, 0x0e, 0x0f, 0x1f, 0x6e,
+      0x6f, 0x1c, 0x1d, 0x5f, 0x7d, 0x7e, 0xae, 0xaf, 0xbb, 0xbc, 0xfa, 0x16,
+      0x17, 0x1e, 0x1f, 0x46, 0x47, 0x4e, 0x4f, 0x58, 0x5a, 0x5c, 0x5e, 0x7e,
+      0x7f, 0xb5, 0xc5, 0xd4, 0xd5, 0xdc, 0xf0, 0xf1, 0xf5, 0x72, 0x73, 0x8f,
+      0x74, 0x75, 0x96, 0x2f, 0x5f, 0x26, 0x2e, 0x2f, 0xa7, 0xaf, 0xb7, 0xbf,
+      0xc7, 0xcf, 0xd7, 0xdf, 0x9a, 0x40, 0x97, 0x98, 0x30, 0x8f, 0x1f, 0xc0,
+      0xc1, 0xce, 0xff, 0x4e, 0x4f, 0x5a, 0x5b, 0x07, 0x08, 0x0f, 0x10, 0x27,
+      0x2f, 0xee, 0xef, 0x6e, 0x6f, 0x37, 0x3d, 0x3f, 0x42, 0x45, 0x90, 0x91,
+      0xfe, 0xff, 0x53, 0x67, 0x75, 0xc8, 0xc9, 0xd0, 0xd1, 0xd8, 0xd9, 0xe7,
+      0xfe, 0xff,
+  };
+  static constexpr singleton singletons1[] = {
+      {0x00, 6},  {0x01, 1}, {0x03, 1},  {0x04, 2}, {0x08, 8},  {0x09, 2},
+      {0x0a, 5},  {0x0b, 2}, {0x0e, 4},  {0x10, 1}, {0x11, 2},  {0x12, 5},
+      {0x13, 17}, {0x14, 1}, {0x15, 2},  {0x17, 2}, {0x19, 13}, {0x1c, 5},
+      {0x1d, 8},  {0x24, 1}, {0x6a, 3},  {0x6b, 2}, {0xbc, 2},  {0xd1, 2},
+      {0xd4, 12}, {0xd5, 9}, {0xd6, 2},  {0xd7, 2}, {0xda, 1},  {0xe0, 5},
+      {0xe1, 2},  {0xe8, 2}, {0xee, 32}, {0xf0, 4}, {0xf8, 2},  {0xf9, 2},
+      {0xfa, 2},  {0xfb, 1},
+  };
+  static constexpr unsigned char singletons1_lower[] = {
+      0x0c, 0x27, 0x3b, 0x3e, 0x4e, 0x4f, 0x8f, 0x9e, 0x9e, 0x9f, 0x06, 0x07,
+      0x09, 0x36, 0x3d, 0x3e, 0x56, 0xf3, 0xd0, 0xd1, 0x04, 0x14, 0x18, 0x36,
+      0x37, 0x56, 0x57, 0x7f, 0xaa, 0xae, 0xaf, 0xbd, 0x35, 0xe0, 0x12, 0x87,
+      0x89, 0x8e, 0x9e, 0x04, 0x0d, 0x0e, 0x11, 0x12, 0x29, 0x31, 0x34, 0x3a,
+      0x45, 0x46, 0x49, 0x4a, 0x4e, 0x4f, 0x64, 0x65, 0x5c, 0xb6, 0xb7, 0x1b,
+      0x1c, 0x07, 0x08, 0x0a, 0x0b, 0x14, 0x17, 0x36, 0x39, 0x3a, 0xa8, 0xa9,
+      0xd8, 0xd9, 0x09, 0x37, 0x90, 0x91, 0xa8, 0x07, 0x0a, 0x3b, 0x3e, 0x66,
+      0x69, 0x8f, 0x92, 0x6f, 0x5f, 0xee, 0xef, 0x5a, 0x62, 0x9a, 0x9b, 0x27,
+      0x28, 0x55, 0x9d, 0xa0, 0xa1, 0xa3, 0xa4, 0xa7, 0xa8, 0xad, 0xba, 0xbc,
+      0xc4, 0x06, 0x0b, 0x0c, 0x15, 0x1d, 0x3a, 0x3f, 0x45, 0x51, 0xa6, 0xa7,
+      0xcc, 0xcd, 0xa0, 0x07, 0x19, 0x1a, 0x22, 0x25, 0x3e, 0x3f, 0xc5, 0xc6,
+      0x04, 0x20, 0x23, 0x25, 0x26, 0x28, 0x33, 0x38, 0x3a, 0x48, 0x4a, 0x4c,
+      0x50, 0x53, 0x55, 0x56, 0x58, 0x5a, 0x5c, 0x5e, 0x60, 0x63, 0x65, 0x66,
+      0x6b, 0x73, 0x78, 0x7d, 0x7f, 0x8a, 0xa4, 0xaa, 0xaf, 0xb0, 0xc0, 0xd0,
+      0xae, 0xaf, 0x79, 0xcc, 0x6e, 0x6f, 0x93,
+  };
+  static constexpr unsigned char normal0[] = {
+      0x00, 0x20, 0x5f, 0x22, 0x82, 0xdf, 0x04, 0x82, 0x44, 0x08, 0x1b, 0x04,
+      0x06, 0x11, 0x81, 0xac, 0x0e, 0x80, 0xab, 0x35, 0x28, 0x0b, 0x80, 0xe0,
+      0x03, 0x19, 0x08, 0x01, 0x04, 0x2f, 0x04, 0x34, 0x04, 0x07, 0x03, 0x01,
+      0x07, 0x06, 0x07, 0x11, 0x0a, 0x50, 0x0f, 0x12, 0x07, 0x55, 0x07, 0x03,
+      0x04, 0x1c, 0x0a, 0x09, 0x03, 0x08, 0x03, 0x07, 0x03, 0x02, 0x03, 0x03,
+      0x03, 0x0c, 0x04, 0x05, 0x03, 0x0b, 0x06, 0x01, 0x0e, 0x15, 0x05, 0x3a,
+      0x03, 0x11, 0x07, 0x06, 0x05, 0x10, 0x07, 0x57, 0x07, 0x02, 0x07, 0x15,
+      0x0d, 0x50, 0x04, 0x43, 0x03, 0x2d, 0x03, 0x01, 0x04, 0x11, 0x06, 0x0f,
+      0x0c, 0x3a, 0x04, 0x1d, 0x25, 0x5f, 0x20, 0x6d, 0x04, 0x6a, 0x25, 0x80,
+      0xc8, 0x05, 0x82, 0xb0, 0x03, 0x1a, 0x06, 0x82, 0xfd, 0x03, 0x59, 0x07,
+      0x15, 0x0b, 0x17, 0x09, 0x14, 0x0c, 0x14, 0x0c, 0x6a, 0x06, 0x0a, 0x06,
+      0x1a, 0x06, 0x59, 0x07, 0x2b, 0x05, 0x46, 0x0a, 0x2c, 0x04, 0x0c, 0x04,
+      0x01, 0x03, 0x31, 0x0b, 0x2c, 0x04, 0x1a, 0x06, 0x0b, 0x03, 0x80, 0xac,
+      0x06, 0x0a, 0x06, 0x21, 0x3f, 0x4c, 0x04, 0x2d, 0x03, 0x74, 0x08, 0x3c,
+      0x03, 0x0f, 0x03, 0x3c, 0x07, 0x38, 0x08, 0x2b, 0x05, 0x82, 0xff, 0x11,
+      0x18, 0x08, 0x2f, 0x11, 0x2d, 0x03, 0x20, 0x10, 0x21, 0x0f, 0x80, 0x8c,
+      0x04, 0x82, 0x97, 0x19, 0x0b, 0x15, 0x88, 0x94, 0x05, 0x2f, 0x05, 0x3b,
+      0x07, 0x02, 0x0e, 0x18, 0x09, 0x80, 0xb3, 0x2d, 0x74, 0x0c, 0x80, 0xd6,
+      0x1a, 0x0c, 0x05, 0x80, 0xff, 0x05, 0x80, 0xdf, 0x0c, 0xee, 0x0d, 0x03,
+      0x84, 0x8d, 0x03, 0x37, 0x09, 0x81, 0x5c, 0x14, 0x80, 0xb8, 0x08, 0x80,
+      0xcb, 0x2a, 0x38, 0x03, 0x0a, 0x06, 0x38, 0x08, 0x46, 0x08, 0x0c, 0x06,
+      0x74, 0x0b, 0x1e, 0x03, 0x5a, 0x04, 0x59, 0x09, 0x80, 0x83, 0x18, 0x1c,
+      0x0a, 0x16, 0x09, 0x4c, 0x04, 0x80, 0x8a, 0x06, 0xab, 0xa4, 0x0c, 0x17,
+      0x04, 0x31, 0xa1, 0x04, 0x81, 0xda, 0x26, 0x07, 0x0c, 0x05, 0x05, 0x80,
+      0xa5, 0x11, 0x81, 0x6d, 0x10, 0x78, 0x28, 0x2a, 0x06, 0x4c, 0x04, 0x80,
+      0x8d, 0x04, 0x80, 0xbe, 0x03, 0x1b, 0x03, 0x0f, 0x0d,
+  };
+  static constexpr unsigned char normal1[] = {
+      0x5e, 0x22, 0x7b, 0x05, 0x03, 0x04, 0x2d, 0x03, 0x66, 0x03, 0x01, 0x2f,
+      0x2e, 0x80, 0x82, 0x1d, 0x03, 0x31, 0x0f, 0x1c, 0x04, 0x24, 0x09, 0x1e,
+      0x05, 0x2b, 0x05, 0x44, 0x04, 0x0e, 0x2a, 0x80, 0xaa, 0x06, 0x24, 0x04,
+      0x24, 0x04, 0x28, 0x08, 0x34, 0x0b, 0x01, 0x80, 0x90, 0x81, 0x37, 0x09,
+      0x16, 0x0a, 0x08, 0x80, 0x98, 0x39, 0x03, 0x63, 0x08, 0x09, 0x30, 0x16,
+      0x05, 0x21, 0x03, 0x1b, 0x05, 0x01, 0x40, 0x38, 0x04, 0x4b, 0x05, 0x2f,
+      0x04, 0x0a, 0x07, 0x09, 0x07, 0x40, 0x20, 0x27, 0x04, 0x0c, 0x09, 0x36,
+      0x03, 0x3a, 0x05, 0x1a, 0x07, 0x04, 0x0c, 0x07, 0x50, 0x49, 0x37, 0x33,
+      0x0d, 0x33, 0x07, 0x2e, 0x08, 0x0a, 0x81, 0x26, 0x52, 0x4e, 0x28, 0x08,
+      0x2a, 0x56, 0x1c, 0x14, 0x17, 0x09, 0x4e, 0x04, 0x1e, 0x0f, 0x43, 0x0e,
+      0x19, 0x07, 0x0a, 0x06, 0x48, 0x08, 0x27, 0x09, 0x75, 0x0b, 0x3f, 0x41,
+      0x2a, 0x06, 0x3b, 0x05, 0x0a, 0x06, 0x51, 0x06, 0x01, 0x05, 0x10, 0x03,
+      0x05, 0x80, 0x8b, 0x62, 0x1e, 0x48, 0x08, 0x0a, 0x80, 0xa6, 0x5e, 0x22,
+      0x45, 0x0b, 0x0a, 0x06, 0x0d, 0x13, 0x39, 0x07, 0x0a, 0x36, 0x2c, 0x04,
+      0x10, 0x80, 0xc0, 0x3c, 0x64, 0x53, 0x0c, 0x48, 0x09, 0x0a, 0x46, 0x45,
+      0x1b, 0x48, 0x08, 0x53, 0x1d, 0x39, 0x81, 0x07, 0x46, 0x0a, 0x1d, 0x03,
+      0x47, 0x49, 0x37, 0x03, 0x0e, 0x08, 0x0a, 0x06, 0x39, 0x07, 0x0a, 0x81,
+      0x36, 0x19, 0x80, 0xb7, 0x01, 0x0f, 0x32, 0x0d, 0x83, 0x9b, 0x66, 0x75,
+      0x0b, 0x80, 0xc4, 0x8a, 0xbc, 0x84, 0x2f, 0x8f, 0xd1, 0x82, 0x47, 0xa1,
+      0xb9, 0x82, 0x39, 0x07, 0x2a, 0x04, 0x02, 0x60, 0x26, 0x0a, 0x46, 0x0a,
+      0x28, 0x05, 0x13, 0x82, 0xb0, 0x5b, 0x65, 0x4b, 0x04, 0x39, 0x07, 0x11,
+      0x40, 0x05, 0x0b, 0x02, 0x0e, 0x97, 0xf8, 0x08, 0x84, 0xd6, 0x2a, 0x09,
+      0xa2, 0xf7, 0x81, 0x1f, 0x31, 0x03, 0x11, 0x04, 0x08, 0x81, 0x8c, 0x89,
+      0x04, 0x6b, 0x05, 0x0d, 0x03, 0x09, 0x07, 0x10, 0x93, 0x60, 0x80, 0xf6,
+      0x0a, 0x73, 0x08, 0x6e, 0x17, 0x46, 0x80, 0x9a, 0x14, 0x0c, 0x57, 0x09,
+      0x19, 0x80, 0x87, 0x81, 0x47, 0x03, 0x85, 0x42, 0x0f, 0x15, 0x85, 0x50,
+      0x2b, 0x80, 0xd5, 0x2d, 0x03, 0x1a, 0x04, 0x02, 0x81, 0x70, 0x3a, 0x05,
+      0x01, 0x85, 0x00, 0x80, 0xd7, 0x29, 0x4c, 0x04, 0x0a, 0x04, 0x02, 0x83,
+      0x11, 0x44, 0x4c, 0x3d, 0x80, 0xc2, 0x3c, 0x06, 0x01, 0x04, 0x55, 0x05,
+      0x1b, 0x34, 0x02, 0x81, 0x0e, 0x2c, 0x04, 0x64, 0x0c, 0x56, 0x0a, 0x80,
+      0xae, 0x38, 0x1d, 0x0d, 0x2c, 0x04, 0x09, 0x07, 0x02, 0x0e, 0x06, 0x80,
+      0x9a, 0x83, 0xd8, 0x08, 0x0d, 0x03, 0x0d, 0x03, 0x74, 0x0c, 0x59, 0x07,
+      0x0c, 0x14, 0x0c, 0x04, 0x38, 0x08, 0x0a, 0x06, 0x28, 0x08, 0x22, 0x4e,
+      0x81, 0x54, 0x0c, 0x15, 0x03, 0x03, 0x05, 0x07, 0x09, 0x19, 0x07, 0x07,
+      0x09, 0x03, 0x0d, 0x07, 0x29, 0x80, 0xcb, 0x25, 0x0a, 0x84, 0x06,
+  };
+  auto lower = static_cast<uint16_t>(cp);
+  if (cp < 0x10000) {
+    return is_printable(lower, singletons0,
+                        sizeof(singletons0) / sizeof(*singletons0),
+                        singletons0_lower, normal0, sizeof(normal0));
+  }
+  if (cp < 0x20000) {
+    return is_printable(lower, singletons1,
+                        sizeof(singletons1) / sizeof(*singletons1),
+                        singletons1_lower, normal1, sizeof(normal1));
+  }
+  if (0x2a6de <= cp && cp < 0x2a700) return false;
+  if (0x2b735 <= cp && cp < 0x2b740) return false;
+  if (0x2b81e <= cp && cp < 0x2b820) return false;
+  if (0x2cea2 <= cp && cp < 0x2ceb0) return false;
+  if (0x2ebe1 <= cp && cp < 0x2f800) return false;
+  if (0x2fa1e <= cp && cp < 0x30000) return false;
+  if (0x3134b <= cp && cp < 0xe0100) return false;
+  if (0xe01f0 <= cp && cp < 0x110000) return false;
+  return cp < 0x110000;
+}
+
+}  // namespace detail
+
+FMT_END_NAMESPACE
+
+#endif  // FMT_FORMAT_INL_H_
diff --git a/csrc/vnpu_offload/include/spdlog/fmt/bundled/format.h b/csrc/vnpu_offload/include/spdlog/fmt/bundled/format.h
new file mode 100644
index 00000000..4a653007
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/fmt/bundled/format.h
@@ -0,0 +1,4395 @@
+/*
+  Formatting library for C++
+
+  Copyright (c) 2012 - present, Victor Zverovich
+
+  Permission is hereby granted, free of charge, to any person obtaining
+  a copy of this software and associated documentation files (the
+  "Software"), to deal in the Software without restriction, including
+  without limitation the rights to use, copy, modify, merge, publish,
+  distribute, sublicense, and/or sell copies of the Software, and to
+  permit persons to whom the Software is furnished to do so, subject to
+  the following conditions:
+
+  The above copyright notice and this permission notice shall be
+  included in all copies or substantial portions of the Software.
+
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+  --- Optional exception to the license ---
+
+  As an exception, if, as a result of your compiling your source code, portions
+  of this Software are embedded into a machine-executable object form of such
+  source code, you may redistribute such embedded portions in such object form
+  without including the above copyright and permission notices.
+ */
+
+#ifndef FMT_FORMAT_H_
+#define FMT_FORMAT_H_
+
+#ifndef _LIBCPP_REMOVE_TRANSITIVE_INCLUDES
+#  define _LIBCPP_REMOVE_TRANSITIVE_INCLUDES
+#  define FMT_REMOVE_TRANSITIVE_INCLUDES
+#endif
+
+#include "base.h"
+
+// libc++ supports string_view in pre-c++17.
+#if FMT_HAS_INCLUDE(<string_view>) && \
+    (FMT_CPLUSPLUS >= 201703L || defined(_LIBCPP_VERSION))
+#  define FMT_USE_STRING_VIEW
+#endif
+
+#ifndef FMT_MODULE
+#  include <stdlib.h>  // malloc, free
+
+#  include <cmath>    // std::signbit
+#  include <cstddef>  // std::byte
+#  include <cstdint>  // uint32_t
+#  include <cstring>  // std::memcpy
+#  include <limits>   // std::numeric_limits
+#  include <new>      // std::bad_alloc
+#  if defined(__GLIBCXX__) && !defined(_GLIBCXX_USE_DUAL_ABI)
+// Workaround for pre gcc 5 libstdc++.
+#    include <memory>  // std::allocator_traits
+#  endif
+#  include <stdexcept>     // std::runtime_error
+#  include <string>        // std::string
+#  include <system_error>  // std::system_error
+
+// Check FMT_CPLUSPLUS to avoid a warning in MSVC.
+#  if FMT_HAS_INCLUDE(<bit>) && FMT_CPLUSPLUS > 201703L
+#    include <bit>  // std::bit_cast
+#  endif
+
+#  if defined(FMT_USE_STRING_VIEW)
+#    include <string_view>
+#  endif
+
+#  if FMT_MSC_VERSION
+#    include <intrin.h>  // _BitScanReverse[64], _umul128
+#  endif
+#endif  // FMT_MODULE
+
+#if defined(FMT_USE_NONTYPE_TEMPLATE_ARGS)
+// Use the provided definition.
+#elif defined(__NVCOMPILER)
+#  define FMT_USE_NONTYPE_TEMPLATE_ARGS 0
+#elif FMT_GCC_VERSION >= 903 && FMT_CPLUSPLUS >= 201709L
+#  define FMT_USE_NONTYPE_TEMPLATE_ARGS 1
+#elif defined(__cpp_nontype_template_args) && \
+    __cpp_nontype_template_args >= 201911L
+#  define FMT_USE_NONTYPE_TEMPLATE_ARGS 1
+#elif FMT_CLANG_VERSION >= 1200 && FMT_CPLUSPLUS >= 202002L
+#  define FMT_USE_NONTYPE_TEMPLATE_ARGS 1
+#else
+#  define FMT_USE_NONTYPE_TEMPLATE_ARGS 0
+#endif
+
+#if defined __cpp_inline_variables && __cpp_inline_variables >= 201606L
+#  define FMT_INLINE_VARIABLE inline
+#else
+#  define FMT_INLINE_VARIABLE
+#endif
+
+// Check if RTTI is disabled.
+#ifdef FMT_USE_RTTI
+// Use the provided definition.
+#elif defined(__GXX_RTTI) || FMT_HAS_FEATURE(cxx_rtti) || defined(_CPPRTTI) || \
+    defined(__INTEL_RTTI__) || defined(__RTTI)
+// __RTTI is for EDG compilers. _CPPRTTI is for MSVC.
+#  define FMT_USE_RTTI 1
+#else
+#  define FMT_USE_RTTI 0
+#endif
+
+// Visibility when compiled as a shared library/object.
+#if defined(FMT_LIB_EXPORT) || defined(FMT_SHARED)
+#  define FMT_SO_VISIBILITY(value) FMT_VISIBILITY(value)
+#else
+#  define FMT_SO_VISIBILITY(value)
+#endif
+
+#if FMT_GCC_VERSION || FMT_CLANG_VERSION
+#  define FMT_NOINLINE __attribute__((noinline))
+#else
+#  define FMT_NOINLINE
+#endif
+
+#ifdef FMT_DEPRECATED
+// Use the provided definition.
+#elif FMT_HAS_CPP14_ATTRIBUTE(deprecated)
+#  define FMT_DEPRECATED [[deprecated]]
+#else
+#  define FMT_DEPRECATED /* deprecated */
+#endif
+
+// Detect constexpr std::string.
+#if !FMT_USE_CONSTEVAL
+#  define FMT_USE_CONSTEXPR_STRING 0
+#elif defined(__cpp_lib_constexpr_string) && \
+    __cpp_lib_constexpr_string >= 201907L
+#  if FMT_CLANG_VERSION && FMT_GLIBCXX_RELEASE
+// clang + libstdc++ are able to work only starting with gcc13.3
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113294
+#    if FMT_GLIBCXX_RELEASE < 13
+#      define FMT_USE_CONSTEXPR_STRING 0
+#    elif FMT_GLIBCXX_RELEASE == 13 && __GLIBCXX__ < 20240521
+#      define FMT_USE_CONSTEXPR_STRING 0
+#    else
+#      define FMT_USE_CONSTEXPR_STRING 1
+#    endif
+#  else
+#    define FMT_USE_CONSTEXPR_STRING 1
+#  endif
+#else
+#  define FMT_USE_CONSTEXPR_STRING 0
+#endif
+#if FMT_USE_CONSTEXPR_STRING
+#  define FMT_CONSTEXPR_STRING constexpr
+#else
+#  define FMT_CONSTEXPR_STRING
+#endif
+
+// GCC 4.9 doesn't support qualified names in specializations.
+namespace std {
+template <typename T> struct iterator_traits<fmt::basic_appender<T>> {
+  using iterator_category = output_iterator_tag;
+  using value_type = T;
+  using difference_type =
+      decltype(static_cast<int*>(nullptr) - static_cast<int*>(nullptr));
+  using pointer = void;
+  using reference = void;
+};
+}  // namespace std
+
+#ifdef FMT_THROW
+// Use the provided definition.
+#elif FMT_USE_EXCEPTIONS
+#  define FMT_THROW(x) throw x
+#else
+#  define FMT_THROW(x) ::fmt::assert_fail(__FILE__, __LINE__, (x).what())
+#endif
+
+#ifdef __clang_analyzer__
+#  define FMT_CLANG_ANALYZER 1
+#else
+#  define FMT_CLANG_ANALYZER 0
+#endif
+
+// Defining FMT_REDUCE_INT_INSTANTIATIONS to 1, will reduce the number of
+// integer formatter template instantiations to just one by only using the
+// largest integer type. This results in a reduction in binary size but will
+// cause a decrease in integer formatting performance.
+#if !defined(FMT_REDUCE_INT_INSTANTIATIONS)
+#  define FMT_REDUCE_INT_INSTANTIATIONS 0
+#endif
+
+FMT_BEGIN_NAMESPACE
+
+template <typename Char, typename Traits, typename Allocator>
+struct is_contiguous<std::basic_string<Char, Traits, Allocator>>
+    : std::true_type {};
+
+namespace detail {
+
+// __builtin_clz is broken in clang with Microsoft codegen:
+// https://github.com/fmtlib/fmt/issues/519.
+#if !FMT_MSC_VERSION
+#  if FMT_HAS_BUILTIN(__builtin_clz) || FMT_GCC_VERSION || FMT_ICC_VERSION
+#    define FMT_BUILTIN_CLZ(n) __builtin_clz(n)
+#  endif
+#  if FMT_HAS_BUILTIN(__builtin_clzll) || FMT_GCC_VERSION || FMT_ICC_VERSION
+#    define FMT_BUILTIN_CLZLL(n) __builtin_clzll(n)
+#  endif
+#endif
+
+// Some compilers masquerade as both MSVC and GCC but otherwise support
+// __builtin_clz and __builtin_clzll, so only define FMT_BUILTIN_CLZ using the
+// MSVC intrinsics if the clz and clzll builtins are not available.
+#if FMT_MSC_VERSION && !defined(FMT_BUILTIN_CLZLL)
+// Avoid Clang with Microsoft CodeGen's -Wunknown-pragmas warning.
+#  ifndef __clang__
+#    pragma intrinsic(_BitScanReverse)
+#    ifdef _WIN64
+#      pragma intrinsic(_BitScanReverse64)
+#    endif
+#  endif
+
+inline auto clz(uint32_t x) -> int {
+  FMT_ASSERT(x != 0, "");
+  FMT_MSC_WARNING(suppress : 6102)  // Suppress a bogus static analysis warning.
+  unsigned long r = 0;
+  _BitScanReverse(&r, x);
+  return 31 ^ static_cast<int>(r);
+}
+#  define FMT_BUILTIN_CLZ(n) detail::clz(n)
+
+inline auto clzll(uint64_t x) -> int {
+  FMT_ASSERT(x != 0, "");
+  FMT_MSC_WARNING(suppress : 6102)  // Suppress a bogus static analysis warning.
+  unsigned long r = 0;
+#  ifdef _WIN64
+  _BitScanReverse64(&r, x);
+#  else
+  // Scan the high 32 bits.
+  if (_BitScanReverse(&r, static_cast<uint32_t>(x >> 32)))
+    return 63 ^ static_cast<int>(r + 32);
+  // Scan the low 32 bits.
+  _BitScanReverse(&r, static_cast<uint32_t>(x));
+#  endif
+  return 63 ^ static_cast<int>(r);
+}
+#  define FMT_BUILTIN_CLZLL(n) detail::clzll(n)
+#endif  // FMT_MSC_VERSION && !defined(FMT_BUILTIN_CLZLL)
+
+FMT_CONSTEXPR inline void abort_fuzzing_if(bool condition) {
+  ignore_unused(condition);
+#ifdef FMT_FUZZ
+  if (condition) throw std::runtime_error("fuzzing limit reached");
+#endif
+}
+
+#if defined(FMT_USE_STRING_VIEW)
+template <typename Char> using std_string_view = std::basic_string_view<Char>;
+#else
+template <typename Char> struct std_string_view {
+  operator basic_string_view<Char>() const;
+};
+#endif
+
+template <typename Char, Char... C> struct string_literal {
+  static constexpr Char value[sizeof...(C)] = {C...};
+  constexpr operator basic_string_view<Char>() const {
+    return {value, sizeof...(C)};
+  }
+};
+#if FMT_CPLUSPLUS < 201703L
+template <typename Char, Char... C>
+constexpr Char string_literal<Char, C...>::value[sizeof...(C)];
+#endif
+
+// Implementation of std::bit_cast for pre-C++20.
+template <typename To, typename From, FMT_ENABLE_IF(sizeof(To) == sizeof(From))>
+FMT_CONSTEXPR20 auto bit_cast(const From& from) -> To {
+#ifdef __cpp_lib_bit_cast
+  if (is_constant_evaluated()) return std::bit_cast<To>(from);
+#endif
+  auto to = To();
+  // The cast suppresses a bogus -Wclass-memaccess on GCC.
+  std::memcpy(static_cast<void*>(&to), &from, sizeof(to));
+  return to;
+}
+
+inline auto is_big_endian() -> bool {
+#ifdef _WIN32
+  return false;
+#elif defined(__BIG_ENDIAN__)
+  return true;
+#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__)
+  return __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__;
+#else
+  struct bytes {
+    char data[sizeof(int)];
+  };
+  return bit_cast<bytes>(1).data[0] == 0;
+#endif
+}
+
+class uint128_fallback {
+ private:
+  uint64_t lo_, hi_;
+
+ public:
+  constexpr uint128_fallback(uint64_t hi, uint64_t lo) : lo_(lo), hi_(hi) {}
+  constexpr uint128_fallback(uint64_t value = 0) : lo_(value), hi_(0) {}
+
+  constexpr auto high() const noexcept -> uint64_t { return hi_; }
+  constexpr auto low() const noexcept -> uint64_t { return lo_; }
+
+  template <typename T, FMT_ENABLE_IF(std::is_integral<T>::value)>
+  constexpr explicit operator T() const {
+    return static_cast<T>(lo_);
+  }
+
+  friend constexpr auto operator==(const uint128_fallback& lhs,
+                                   const uint128_fallback& rhs) -> bool {
+    return lhs.hi_ == rhs.hi_ && lhs.lo_ == rhs.lo_;
+  }
+  friend constexpr auto operator!=(const uint128_fallback& lhs,
+                                   const uint128_fallback& rhs) -> bool {
+    return !(lhs == rhs);
+  }
+  friend constexpr auto operator>(const uint128_fallback& lhs,
+                                  const uint128_fallback& rhs) -> bool {
+    return lhs.hi_ != rhs.hi_ ? lhs.hi_ > rhs.hi_ : lhs.lo_ > rhs.lo_;
+  }
+  friend constexpr auto operator|(const uint128_fallback& lhs,
+                                  const uint128_fallback& rhs)
+      -> uint128_fallback {
+    return {lhs.hi_ | rhs.hi_, lhs.lo_ | rhs.lo_};
+  }
+  friend constexpr auto operator&(const uint128_fallback& lhs,
+                                  const uint128_fallback& rhs)
+      -> uint128_fallback {
+    return {lhs.hi_ & rhs.hi_, lhs.lo_ & rhs.lo_};
+  }
+  friend constexpr auto operator~(const uint128_fallback& n)
+      -> uint128_fallback {
+    return {~n.hi_, ~n.lo_};
+  }
+  friend FMT_CONSTEXPR auto operator+(const uint128_fallback& lhs,
+                                      const uint128_fallback& rhs)
+      -> uint128_fallback {
+    auto result = uint128_fallback(lhs);
+    result += rhs;
+    return result;
+  }
+  friend FMT_CONSTEXPR auto operator*(const uint128_fallback& lhs, uint32_t rhs)
+      -> uint128_fallback {
+    FMT_ASSERT(lhs.hi_ == 0, "");
+    uint64_t hi = (lhs.lo_ >> 32) * rhs;
+    uint64_t lo = (lhs.lo_ & ~uint32_t()) * rhs;
+    uint64_t new_lo = (hi << 32) + lo;
+    return {(hi >> 32) + (new_lo < lo ? 1 : 0), new_lo};
+  }
+  friend constexpr auto operator-(const uint128_fallback& lhs, uint64_t rhs)
+      -> uint128_fallback {
+    return {lhs.hi_ - (lhs.lo_ < rhs ? 1 : 0), lhs.lo_ - rhs};
+  }
+  FMT_CONSTEXPR auto operator>>(int shift) const -> uint128_fallback {
+    if (shift == 64) return {0, hi_};
+    if (shift > 64) return uint128_fallback(0, hi_) >> (shift - 64);
+    return {hi_ >> shift, (hi_ << (64 - shift)) | (lo_ >> shift)};
+  }
+  FMT_CONSTEXPR auto operator<<(int shift) const -> uint128_fallback {
+    if (shift == 64) return {lo_, 0};
+    if (shift > 64) return uint128_fallback(lo_, 0) << (shift - 64);
+    return {hi_ << shift | (lo_ >> (64 - shift)), (lo_ << shift)};
+  }
+  FMT_CONSTEXPR auto operator>>=(int shift) -> uint128_fallback& {
+    return *this = *this >> shift;
+  }
+  FMT_CONSTEXPR void operator+=(uint128_fallback n) {
+    uint64_t new_lo = lo_ + n.lo_;
+    uint64_t new_hi = hi_ + n.hi_ + (new_lo < lo_ ? 1 : 0);
+    FMT_ASSERT(new_hi >= hi_, "");
+    lo_ = new_lo;
+    hi_ = new_hi;
+  }
+  FMT_CONSTEXPR void operator&=(uint128_fallback n) {
+    lo_ &= n.lo_;
+    hi_ &= n.hi_;
+  }
+
+  FMT_CONSTEXPR20 auto operator+=(uint64_t n) noexcept -> uint128_fallback& {
+    if (is_constant_evaluated()) {
+      lo_ += n;
+      hi_ += (lo_ < n ? 1 : 0);
+      return *this;
+    }
+#if FMT_HAS_BUILTIN(__builtin_addcll) && !defined(__ibmxl__)
+    unsigned long long carry;
+    lo_ = __builtin_addcll(lo_, n, 0, &carry);
+    hi_ += carry;
+#elif FMT_HAS_BUILTIN(__builtin_ia32_addcarryx_u64) && !defined(__ibmxl__)
+    unsigned long long result;
+    auto carry = __builtin_ia32_addcarryx_u64(0, lo_, n, &result);
+    lo_ = result;
+    hi_ += carry;
+#elif defined(_MSC_VER) && defined(_M_X64)
+    auto carry = _addcarry_u64(0, lo_, n, &lo_);
+    _addcarry_u64(carry, hi_, 0, &hi_);
+#else
+    lo_ += n;
+    hi_ += (lo_ < n ? 1 : 0);
+#endif
+    return *this;
+  }
+};
+
+using uint128_t = conditional_t<FMT_USE_INT128, uint128_opt, uint128_fallback>;
+
+#ifdef UINTPTR_MAX
+using uintptr_t = ::uintptr_t;
+#else
+using uintptr_t = uint128_t;
+#endif
+
+// Returns the largest possible value for type T. Same as
+// std::numeric_limits<T>::max() but shorter and not affected by the max macro.
+template <typename T> constexpr auto max_value() -> T {
+  return (std::numeric_limits<T>::max)();
+}
+template <typename T> constexpr auto num_bits() -> int {
+  return std::numeric_limits<T>::digits;
+}
+// std::numeric_limits<T>::digits may return 0 for 128-bit ints.
+template <> constexpr auto num_bits<int128_opt>() -> int { return 128; }
+template <> constexpr auto num_bits<uint128_opt>() -> int { return 128; }
+template <> constexpr auto num_bits<uint128_fallback>() -> int { return 128; }
+
+// A heterogeneous bit_cast used for converting 96-bit long double to uint128_t
+// and 128-bit pointers to uint128_fallback.
+template <typename To, typename From, FMT_ENABLE_IF(sizeof(To) > sizeof(From))>
+inline auto bit_cast(const From& from) -> To {
+  constexpr auto size = static_cast<int>(sizeof(From) / sizeof(unsigned short));
+  struct data_t {
+    unsigned short value[static_cast<unsigned>(size)];
+  } data = bit_cast<data_t>(from);
+  auto result = To();
+  if (const_check(is_big_endian())) {
+    for (int i = 0; i < size; ++i)
+      result = (result << num_bits<unsigned short>()) | data.value[i];
+  } else {
+    for (int i = size - 1; i >= 0; --i)
+      result = (result << num_bits<unsigned short>()) | data.value[i];
+  }
+  return result;
+}
+
+template <typename UInt>
+FMT_CONSTEXPR20 inline auto countl_zero_fallback(UInt n) -> int {
+  int lz = 0;
+  constexpr UInt msb_mask = static_cast<UInt>(1) << (num_bits<UInt>() - 1);
+  for (; (n & msb_mask) == 0; n <<= 1) lz++;
+  return lz;
+}
+
+FMT_CONSTEXPR20 inline auto countl_zero(uint32_t n) -> int {
+#ifdef FMT_BUILTIN_CLZ
+  if (!is_constant_evaluated()) return FMT_BUILTIN_CLZ(n);
+#endif
+  return countl_zero_fallback(n);
+}
+
+FMT_CONSTEXPR20 inline auto countl_zero(uint64_t n) -> int {
+#ifdef FMT_BUILTIN_CLZLL
+  if (!is_constant_evaluated()) return FMT_BUILTIN_CLZLL(n);
+#endif
+  return countl_zero_fallback(n);
+}
+
+FMT_INLINE void assume(bool condition) {
+  (void)condition;
+#if FMT_HAS_BUILTIN(__builtin_assume) && !FMT_ICC_VERSION
+  __builtin_assume(condition);
+#elif FMT_GCC_VERSION
+  if (!condition) __builtin_unreachable();
+#endif
+}
+
+// Attempts to reserve space for n extra characters in the output range.
+// Returns a pointer to the reserved range or a reference to it.
+template <typename OutputIt,
+          FMT_ENABLE_IF(is_back_insert_iterator<OutputIt>::value&&
+                            is_contiguous<typename OutputIt::container>::value)>
+#if FMT_CLANG_VERSION >= 307 && !FMT_ICC_VERSION
+__attribute__((no_sanitize("undefined")))
+#endif
+FMT_CONSTEXPR20 inline auto
+reserve(OutputIt it, size_t n) -> typename OutputIt::value_type* {
+  auto& c = get_container(it);
+  size_t size = c.size();
+  c.resize(size + n);
+  return &c[size];
+}
+
+template <typename T>
+FMT_CONSTEXPR20 inline auto reserve(basic_appender<T> it, size_t n)
+    -> basic_appender<T> {
+  buffer<T>& buf = get_container(it);
+  buf.try_reserve(buf.size() + n);
+  return it;
+}
+
+template <typename Iterator>
+constexpr auto reserve(Iterator& it, size_t) -> Iterator& {
+  return it;
+}
+
+template <typename OutputIt>
+using reserve_iterator =
+    remove_reference_t<decltype(reserve(std::declval<OutputIt&>(), 0))>;
+
+template <typename T, typename OutputIt>
+constexpr auto to_pointer(OutputIt, size_t) -> T* {
+  return nullptr;
+}
+template <typename T> FMT_CONSTEXPR auto to_pointer(T*& ptr, size_t n) -> T* {
+  T* begin = ptr;
+  ptr += n;
+  return begin;
+}
+template <typename T>
+FMT_CONSTEXPR20 auto to_pointer(basic_appender<T> it, size_t n) -> T* {
+  buffer<T>& buf = get_container(it);
+  buf.try_reserve(buf.size() + n);
+  auto size = buf.size();
+  if (buf.capacity() < size + n) return nullptr;
+  buf.try_resize(size + n);
+  return buf.data() + size;
+}
+
+template <typename OutputIt,
+          FMT_ENABLE_IF(is_back_insert_iterator<OutputIt>::value&&
+                            is_contiguous<typename OutputIt::container>::value)>
+inline auto base_iterator(OutputIt it,
+                          typename OutputIt::container_type::value_type*)
+    -> OutputIt {
+  return it;
+}
+
+template <typename Iterator>
+constexpr auto base_iterator(Iterator, Iterator it) -> Iterator {
+  return it;
+}
+
+// <algorithm> is spectacularly slow to compile in C++20 so use a simple fill_n
+// instead (#1998).
+template <typename OutputIt, typename Size, typename T>
+FMT_CONSTEXPR auto fill_n(OutputIt out, Size count, const T& value)
+    -> OutputIt {
+  for (Size i = 0; i < count; ++i) *out++ = value;
+  return out;
+}
+template <typename T, typename Size>
+FMT_CONSTEXPR20 auto fill_n(T* out, Size count, char value) -> T* {
+  if (is_constant_evaluated()) return fill_n<T*, Size, T>(out, count, value);
+  static_assert(sizeof(T) == 1,
+                "sizeof(T) must be 1 to use char for initialization");
+  std::memset(out, value, to_unsigned(count));
+  return out + count;
+}
+
+template <typename OutChar, typename InputIt, typename OutputIt>
+FMT_CONSTEXPR FMT_NOINLINE auto copy_noinline(InputIt begin, InputIt end,
+                                              OutputIt out) -> OutputIt {
+  return copy<OutChar>(begin, end, out);
+}
+
+// A public domain branchless UTF-8 decoder by Christopher Wellons:
+// https://github.com/skeeto/branchless-utf8
+/* Decode the next character, c, from s, reporting errors in e.
+ *
+ * Since this is a branchless decoder, four bytes will be read from the
+ * buffer regardless of the actual length of the next character. This
+ * means the buffer _must_ have at least three bytes of zero padding
+ * following the end of the data stream.
+ *
+ * Errors are reported in e, which will be non-zero if the parsed
+ * character was somehow invalid: invalid byte sequence, non-canonical
+ * encoding, or a surrogate half.
+ *
+ * The function returns a pointer to the next character. When an error
+ * occurs, this pointer will be a guess that depends on the particular
+ * error, but it will always advance at least one byte.
+ */
+FMT_CONSTEXPR inline auto utf8_decode(const char* s, uint32_t* c, int* e)
+    -> const char* {
+  constexpr int masks[] = {0x00, 0x7f, 0x1f, 0x0f, 0x07};
+  constexpr uint32_t mins[] = {4194304, 0, 128, 2048, 65536};
+  constexpr int shiftc[] = {0, 18, 12, 6, 0};
+  constexpr int shifte[] = {0, 6, 4, 2, 0};
+
+  int len = "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0\0\0\2\2\2\2\3\3\4"
+      [static_cast<unsigned char>(*s) >> 3];
+  // Compute the pointer to the next character early so that the next
+  // iteration can start working on the next character. Neither Clang
+  // nor GCC figure out this reordering on their own.
+  const char* next = s + len + !len;
+
+  using uchar = unsigned char;
+
+  // Assume a four-byte character and load four bytes. Unused bits are
+  // shifted out.
+  *c = uint32_t(uchar(s[0]) & masks[len]) << 18;
+  *c |= uint32_t(uchar(s[1]) & 0x3f) << 12;
+  *c |= uint32_t(uchar(s[2]) & 0x3f) << 6;
+  *c |= uint32_t(uchar(s[3]) & 0x3f) << 0;
+  *c >>= shiftc[len];
+
+  // Accumulate the various error conditions.
+  *e = (*c < mins[len]) << 6;       // non-canonical encoding
+  *e |= ((*c >> 11) == 0x1b) << 7;  // surrogate half?
+  *e |= (*c > 0x10FFFF) << 8;       // out of range?
+  *e |= (uchar(s[1]) & 0xc0) >> 2;
+  *e |= (uchar(s[2]) & 0xc0) >> 4;
+  *e |= uchar(s[3]) >> 6;
+  *e ^= 0x2a;  // top two bits of each tail byte correct?
+  *e >>= shifte[len];
+
+  return next;
+}
+
+constexpr FMT_INLINE_VARIABLE uint32_t invalid_code_point = ~uint32_t();
+
+// Invokes f(cp, sv) for every code point cp in s with sv being the string view
+// corresponding to the code point. cp is invalid_code_point on error.
+template <typename F>
+FMT_CONSTEXPR void for_each_codepoint(string_view s, F f) {
+  auto decode = [f](const char* buf_ptr, const char* ptr) {
+    auto cp = uint32_t();
+    auto error = 0;
+    auto end = utf8_decode(buf_ptr, &cp, &error);
+    bool result = f(error ? invalid_code_point : cp,
+                    string_view(ptr, error ? 1 : to_unsigned(end - buf_ptr)));
+    return result ? (error ? buf_ptr + 1 : end) : nullptr;
+  };
+
+  auto p = s.data();
+  const size_t block_size = 4;  // utf8_decode always reads blocks of 4 chars.
+  if (s.size() >= block_size) {
+    for (auto end = p + s.size() - block_size + 1; p < end;) {
+      p = decode(p, p);
+      if (!p) return;
+    }
+  }
+  auto num_chars_left = to_unsigned(s.data() + s.size() - p);
+  if (num_chars_left == 0) return;
+
+  // Suppress bogus -Wstringop-overflow.
+  if (FMT_GCC_VERSION) num_chars_left &= 3;
+  char buf[2 * block_size - 1] = {};
+  copy<char>(p, p + num_chars_left, buf);
+  const char* buf_ptr = buf;
+  do {
+    auto end = decode(buf_ptr, p);
+    if (!end) return;
+    p += end - buf_ptr;
+    buf_ptr = end;
+  } while (buf_ptr < buf + num_chars_left);
+}
+
+FMT_CONSTEXPR inline auto display_width_of(uint32_t cp) noexcept -> size_t {
+  return to_unsigned(
+      1 + (cp >= 0x1100 &&
+           (cp <= 0x115f ||  // Hangul Jamo init. consonants
+            cp == 0x2329 ||  // LEFT-POINTING ANGLE BRACKET
+            cp == 0x232a ||  // RIGHT-POINTING ANGLE BRACKET
+            // CJK ... Yi except IDEOGRAPHIC HALF FILL SPACE:
+            (cp >= 0x2e80 && cp <= 0xa4cf && cp != 0x303f) ||
+            (cp >= 0xac00 && cp <= 0xd7a3) ||    // Hangul Syllables
+            (cp >= 0xf900 && cp <= 0xfaff) ||    // CJK Compatibility Ideographs
+            (cp >= 0xfe10 && cp <= 0xfe19) ||    // Vertical Forms
+            (cp >= 0xfe30 && cp <= 0xfe6f) ||    // CJK Compatibility Forms
+            (cp >= 0xff00 && cp <= 0xff60) ||    // Fullwidth Forms
+            (cp >= 0xffe0 && cp <= 0xffe6) ||    // Fullwidth Forms
+            (cp >= 0x20000 && cp <= 0x2fffd) ||  // CJK
+            (cp >= 0x30000 && cp <= 0x3fffd) ||
+            // Miscellaneous Symbols and Pictographs + Emoticons:
+            (cp >= 0x1f300 && cp <= 0x1f64f) ||
+            // Supplemental Symbols and Pictographs:
+            (cp >= 0x1f900 && cp <= 0x1f9ff))));
+}
+
+template <typename T> struct is_integral : std::is_integral<T> {};
+template <> struct is_integral<int128_opt> : std::true_type {};
+template <> struct is_integral<uint128_t> : std::true_type {};
+
+template <typename T>
+using is_signed =
+    std::integral_constant<bool, std::numeric_limits<T>::is_signed ||
+                                     std::is_same<T, int128_opt>::value>;
+
+template <typename T>
+using is_integer =
+    bool_constant<is_integral<T>::value && !std::is_same<T, bool>::value &&
+                  !std::is_same<T, char>::value &&
+                  !std::is_same<T, wchar_t>::value>;
+
+#if defined(FMT_USE_FLOAT128)
+// Use the provided definition.
+#elif FMT_CLANG_VERSION >= 309 && FMT_HAS_INCLUDE(<quadmath.h>)
+#  define FMT_USE_FLOAT128 1
+#elif FMT_GCC_VERSION && defined(_GLIBCXX_USE_FLOAT128) && \
+    !defined(__STRICT_ANSI__)
+#  define FMT_USE_FLOAT128 1
+#else
+#  define FMT_USE_FLOAT128 0
+#endif
+#if FMT_USE_FLOAT128
+using float128 = __float128;
+#else
+struct float128 {};
+#endif
+
+template <typename T> using is_float128 = std::is_same<T, float128>;
+
+template <typename T> struct is_floating_point : std::is_floating_point<T> {};
+template <> struct is_floating_point<float128> : std::true_type {};
+
+template <typename T, bool = is_floating_point<T>::value>
+struct is_fast_float : bool_constant<std::numeric_limits<T>::is_iec559 &&
+                                     sizeof(T) <= sizeof(double)> {};
+template <typename T> struct is_fast_float<T, false> : std::false_type {};
+
+template <typename T>
+using fast_float_t = conditional_t<sizeof(T) == sizeof(double), double, float>;
+
+template <typename T>
+using is_double_double = bool_constant<std::numeric_limits<T>::digits == 106>;
+
+#ifndef FMT_USE_FULL_CACHE_DRAGONBOX
+#  define FMT_USE_FULL_CACHE_DRAGONBOX 0
+#endif
+
+// An allocator that uses malloc/free to allow removing dependency on the C++
+// standard libary runtime. std::decay is used for back_inserter to be found by
+// ADL when applied to memory_buffer.
+template <typename T> struct allocator : private std::decay<void> {
+  using value_type = T;
+
+  auto allocate(size_t n) -> T* {
+    FMT_ASSERT(n <= max_value<size_t>() / sizeof(T), "");
+    T* p = static_cast<T*>(malloc(n * sizeof(T)));
+    if (!p) FMT_THROW(std::bad_alloc());
+    return p;
+  }
+
+  void deallocate(T* p, size_t) { free(p); }
+
+  constexpr friend auto operator==(allocator, allocator) noexcept -> bool {
+    return true;  // All instances of this allocator are equivalent.
+  }
+  constexpr friend auto operator!=(allocator, allocator) noexcept -> bool {
+    return false;
+  }
+};
+
+template <typename Formatter>
+FMT_CONSTEXPR auto maybe_set_debug_format(Formatter& f, bool set)
+    -> decltype(f.set_debug_format(set)) {
+  f.set_debug_format(set);
+}
+template <typename Formatter>
+FMT_CONSTEXPR void maybe_set_debug_format(Formatter&, ...) {}
+
+}  // namespace detail
+
+FMT_BEGIN_EXPORT
+
+// The number of characters to store in the basic_memory_buffer object itself
+// to avoid dynamic memory allocation.
+enum { inline_buffer_size = 500 };
+
+/**
+ * A dynamically growing memory buffer for trivially copyable/constructible
+ * types with the first `SIZE` elements stored in the object itself. Most
+ * commonly used via the `memory_buffer` alias for `char`.
+ *
+ * **Example**:
+ *
+ *     auto out = fmt::memory_buffer();
+ *     fmt::format_to(std::back_inserter(out), "The answer is {}.", 42);
+ *
+ * This will append "The answer is 42." to `out`. The buffer content can be
+ * converted to `std::string` with `to_string(out)`.
+ */
+template <typename T, size_t SIZE = inline_buffer_size,
+          typename Allocator = detail::allocator<T>>
+class basic_memory_buffer : public detail::buffer<T> {
+ private:
+  T store_[SIZE];
+
+  // Don't inherit from Allocator to avoid generating type_info for it.
+  FMT_NO_UNIQUE_ADDRESS Allocator alloc_;
+
+  // Deallocate memory allocated by the buffer.
+  FMT_CONSTEXPR20 void deallocate() {
+    T* data = this->data();
+    if (data != store_) alloc_.deallocate(data, this->capacity());
+  }
+
+  static FMT_CONSTEXPR20 void grow(detail::buffer<T>& buf, size_t size) {
+    detail::abort_fuzzing_if(size > 5000);
+    auto& self = static_cast<basic_memory_buffer&>(buf);
+    const size_t max_size =
+        std::allocator_traits<Allocator>::max_size(self.alloc_);
+    size_t old_capacity = buf.capacity();
+    size_t new_capacity = old_capacity + old_capacity / 2;
+    if (size > new_capacity)
+      new_capacity = size;
+    else if (new_capacity > max_size)
+      new_capacity = max_of(size, max_size);
+    T* old_data = buf.data();
+    T* new_data = self.alloc_.allocate(new_capacity);
+    // Suppress a bogus -Wstringop-overflow in gcc 13.1 (#3481).
+    detail::assume(buf.size() <= new_capacity);
+    // The following code doesn't throw, so the raw pointer above doesn't leak.
+    memcpy(new_data, old_data, buf.size() * sizeof(T));
+    self.set(new_data, new_capacity);
+    // deallocate must not throw according to the standard, but even if it does,
+    // the buffer already uses the new storage and will deallocate it in
+    // destructor.
+    if (old_data != self.store_) self.alloc_.deallocate(old_data, old_capacity);
+  }
+
+ public:
+  using value_type = T;
+  using const_reference = const T&;
+
+  FMT_CONSTEXPR explicit basic_memory_buffer(
+      const Allocator& alloc = Allocator())
+      : detail::buffer<T>(grow), alloc_(alloc) {
+    this->set(store_, SIZE);
+    if (detail::is_constant_evaluated()) detail::fill_n(store_, SIZE, T());
+  }
+  FMT_CONSTEXPR20 ~basic_memory_buffer() { deallocate(); }
+
+ private:
+  template <typename Alloc = Allocator,
+            FMT_ENABLE_IF(std::allocator_traits<Alloc>::
+                              propagate_on_container_move_assignment::value)>
+  FMT_CONSTEXPR20 auto move_alloc(basic_memory_buffer& other) -> bool {
+    alloc_ = std::move(other.alloc_);
+    return true;
+  }
+  // If the allocator does not propagate then copy the data from other.
+  template <typename Alloc = Allocator,
+            FMT_ENABLE_IF(!std::allocator_traits<Alloc>::
+                              propagate_on_container_move_assignment::value)>
+  FMT_CONSTEXPR20 auto move_alloc(basic_memory_buffer& other) -> bool {
+    T* data = other.data();
+    if (alloc_ == other.alloc_ || data == other.store_) return true;
+    size_t size = other.size();
+    // Perform copy operation, allocators are different.
+    this->resize(size);
+    detail::copy<T>(data, data + size, this->data());
+    return false;
+  }
+
+  // Move data from other to this buffer.
+  FMT_CONSTEXPR20 void move(basic_memory_buffer& other) {
+    T* data = other.data();
+    size_t size = other.size(), capacity = other.capacity();
+    if (!move_alloc(other)) return;
+    if (data == other.store_) {
+      this->set(store_, capacity);
+      detail::copy<T>(other.store_, other.store_ + size, store_);
+    } else {
+      this->set(data, capacity);
+      // Set pointer to the inline array so that delete is not called
+      // when deallocating.
+      other.set(other.store_, 0);
+      other.clear();
+    }
+    this->resize(size);
+  }
+
+ public:
+  /// Constructs a `basic_memory_buffer` object moving the content of the other
+  /// object to it.
+  FMT_CONSTEXPR20 basic_memory_buffer(basic_memory_buffer&& other) noexcept
+      : detail::buffer<T>(grow) {
+    move(other);
+  }
+
+  /// Moves the content of the other `basic_memory_buffer` object to this one.
+  auto operator=(basic_memory_buffer&& other) noexcept -> basic_memory_buffer& {
+    FMT_ASSERT(this != &other, "");
+    deallocate();
+    move(other);
+    return *this;
+  }
+
+  // Returns a copy of the allocator associated with this buffer.
+  auto get_allocator() const -> Allocator { return alloc_; }
+
+  /// Resizes the buffer to contain `count` elements. If T is a POD type new
+  /// elements may not be initialized.
+  FMT_CONSTEXPR void resize(size_t count) { this->try_resize(count); }
+
+  /// Increases the buffer capacity to `new_capacity`.
+  void reserve(size_t new_capacity) { this->try_reserve(new_capacity); }
+
+  using detail::buffer<T>::append;
+  template <typename ContiguousRange>
+  FMT_CONSTEXPR20 void append(const ContiguousRange& range) {
+    append(range.data(), range.data() + range.size());
+  }
+};
+
+using memory_buffer = basic_memory_buffer<char>;
+
+template <size_t SIZE>
+FMT_NODISCARD auto to_string(const basic_memory_buffer<char, SIZE>& buf)
+    -> std::string {
+  auto size = buf.size();
+  detail::assume(size < std::string().max_size());
+  return {buf.data(), size};
+}
+
+// A writer to a buffered stream. It doesn't own the underlying stream.
+class writer {
+ private:
+  detail::buffer<char>* buf_;
+
+  // We cannot create a file buffer in advance because any write to a FILE may
+  // invalidate it.
+  FILE* file_;
+
+ public:
+  inline writer(FILE* f) : buf_(nullptr), file_(f) {}
+  inline writer(detail::buffer<char>& buf) : buf_(&buf) {}
+
+  /// Formats `args` according to specifications in `fmt` and writes the
+  /// output to the file.
+  template <typename... T> void print(format_string<T...> fmt, T&&... args) {
+    if (buf_)
+      fmt::format_to(appender(*buf_), fmt, std::forward<T>(args)...);
+    else
+      fmt::print(file_, fmt, std::forward<T>(args)...);
+  }
+};
+
+class string_buffer {
+ private:
+  std::string str_;
+  detail::container_buffer<std::string> buf_;
+
+ public:
+  inline string_buffer() : buf_(str_) {}
+
+  inline operator writer() { return buf_; }
+  inline auto str() -> std::string& { return str_; }
+};
+
+template <typename T, size_t SIZE, typename Allocator>
+struct is_contiguous<basic_memory_buffer<T, SIZE, Allocator>> : std::true_type {
+};
+
+// Suppress a misleading warning in older versions of clang.
+FMT_PRAGMA_CLANG(diagnostic ignored "-Wweak-vtables")
+
+/// An error reported from a formatting function.
+class FMT_SO_VISIBILITY("default") format_error : public std::runtime_error {
+ public:
+  using std::runtime_error::runtime_error;
+};
+
+class loc_value;
+
+FMT_END_EXPORT
+namespace detail {
+FMT_API auto write_console(int fd, string_view text) -> bool;
+FMT_API void print(FILE*, string_view);
+}  // namespace detail
+
+namespace detail {
+template <typename Char, size_t N> struct fixed_string {
+  FMT_CONSTEXPR20 fixed_string(const Char (&s)[N]) {
+    detail::copy<Char, const Char*, Char*>(static_cast<const Char*>(s), s + N,
+                                           data);
+  }
+  Char data[N] = {};
+};
+
+// Converts a compile-time string to basic_string_view.
+FMT_EXPORT template <typename Char, size_t N>
+constexpr auto compile_string_to_view(const Char (&s)[N])
+    -> basic_string_view<Char> {
+  // Remove trailing NUL character if needed. Won't be present if this is used
+  // with a raw character array (i.e. not defined as a string).
+  return {s, N - (std::char_traits<Char>::to_int_type(s[N - 1]) == 0 ? 1 : 0)};
+}
+FMT_EXPORT template <typename Char>
+constexpr auto compile_string_to_view(basic_string_view<Char> s)
+    -> basic_string_view<Char> {
+  return s;
+}
+
+// Returns true if value is negative, false otherwise.
+// Same as `value < 0` but doesn't produce warnings if T is an unsigned type.
+template <typename T, FMT_ENABLE_IF(is_signed<T>::value)>
+constexpr auto is_negative(T value) -> bool {
+  return value < 0;
+}
+template <typename T, FMT_ENABLE_IF(!is_signed<T>::value)>
+constexpr auto is_negative(T) -> bool {
+  return false;
+}
+
+// Smallest of uint32_t, uint64_t, uint128_t that is large enough to
+// represent all values of an integral type T.
+template <typename T>
+using uint32_or_64_or_128_t =
+    conditional_t<num_bits<T>() <= 32 && !FMT_REDUCE_INT_INSTANTIATIONS,
+                  uint32_t,
+                  conditional_t<num_bits<T>() <= 64, uint64_t, uint128_t>>;
+template <typename T>
+using uint64_or_128_t = conditional_t<num_bits<T>() <= 64, uint64_t, uint128_t>;
+
+#define FMT_POWERS_OF_10(factor)                                  \
+  factor * 10, (factor) * 100, (factor) * 1000, (factor) * 10000, \
+      (factor) * 100000, (factor) * 1000000, (factor) * 10000000, \
+      (factor) * 100000000, (factor) * 1000000000
+
+// Converts value in the range [0, 100) to a string.
+// GCC generates slightly better code when value is pointer-size.
+inline auto digits2(size_t value) -> const char* {
+  // Align data since unaligned access may be slower when crossing a
+  // hardware-specific boundary.
+  alignas(2) static const char data[] =
+      "0001020304050607080910111213141516171819"
+      "2021222324252627282930313233343536373839"
+      "4041424344454647484950515253545556575859"
+      "6061626364656667686970717273747576777879"
+      "8081828384858687888990919293949596979899";
+  return &data[value * 2];
+}
+
+template <typename Char> constexpr auto getsign(sign s) -> Char {
+  return static_cast<char>(((' ' << 24) | ('+' << 16) | ('-' << 8)) >>
+                           (static_cast<int>(s) * 8));
+}
+
+template <typename T> FMT_CONSTEXPR auto count_digits_fallback(T n) -> int {
+  int count = 1;
+  for (;;) {
+    // Integer division is slow so do it for a group of four digits instead
+    // of for every digit. The idea comes from the talk by Alexandrescu
+    // "Three Optimization Tips for C++". See speed-test for a comparison.
+    if (n < 10) return count;
+    if (n < 100) return count + 1;
+    if (n < 1000) return count + 2;
+    if (n < 10000) return count + 3;
+    n /= 10000u;
+    count += 4;
+  }
+}
+#if FMT_USE_INT128
+FMT_CONSTEXPR inline auto count_digits(uint128_opt n) -> int {
+  return count_digits_fallback(n);
+}
+#endif
+
+#ifdef FMT_BUILTIN_CLZLL
+// It is a separate function rather than a part of count_digits to workaround
+// the lack of static constexpr in constexpr functions.
+inline auto do_count_digits(uint64_t n) -> int {
+  // This has comparable performance to the version by Kendall Willets
+  // (https://github.com/fmtlib/format-benchmark/blob/master/digits10)
+  // but uses smaller tables.
+  // Maps bsr(n) to ceil(log10(pow(2, bsr(n) + 1) - 1)).
+  static constexpr uint8_t bsr2log10[] = {
+      1,  1,  1,  2,  2,  2,  3,  3,  3,  4,  4,  4,  4,  5,  5,  5,
+      6,  6,  6,  7,  7,  7,  7,  8,  8,  8,  9,  9,  9,  10, 10, 10,
+      10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
+      15, 16, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 19, 20};
+  auto t = bsr2log10[FMT_BUILTIN_CLZLL(n | 1) ^ 63];
+  static constexpr uint64_t zero_or_powers_of_10[] = {
+      0, 0, FMT_POWERS_OF_10(1U), FMT_POWERS_OF_10(1000000000ULL),
+      10000000000000000000ULL};
+  return t - (n < zero_or_powers_of_10[t]);
+}
+#endif
+
+// Returns the number of decimal digits in n. Leading zeros are not counted
+// except for n == 0 in which case count_digits returns 1.
+FMT_CONSTEXPR20 inline auto count_digits(uint64_t n) -> int {
+#ifdef FMT_BUILTIN_CLZLL
+  if (!is_constant_evaluated() && !FMT_OPTIMIZE_SIZE) return do_count_digits(n);
+#endif
+  return count_digits_fallback(n);
+}
+
+// Counts the number of digits in n. BITS = log2(radix).
+template <int BITS, typename UInt>
+FMT_CONSTEXPR auto count_digits(UInt n) -> int {
+#ifdef FMT_BUILTIN_CLZ
+  if (!is_constant_evaluated() && num_bits<UInt>() == 32)
+    return (FMT_BUILTIN_CLZ(static_cast<uint32_t>(n) | 1) ^ 31) / BITS + 1;
+#endif
+  // Lambda avoids unreachable code warnings from NVHPC.
+  return [](UInt m) {
+    int num_digits = 0;
+    do {
+      ++num_digits;
+    } while ((m >>= BITS) != 0);
+    return num_digits;
+  }(n);
+}
+
+#ifdef FMT_BUILTIN_CLZ
+// It is a separate function rather than a part of count_digits to workaround
+// the lack of static constexpr in constexpr functions.
+FMT_INLINE auto do_count_digits(uint32_t n) -> int {
+// An optimization by Kendall Willets from https://bit.ly/3uOIQrB.
+// This increments the upper 32 bits (log10(T) - 1) when >= T is added.
+#  define FMT_INC(T) (((sizeof(#T) - 1ull) << 32) - T)
+  static constexpr uint64_t table[] = {
+      FMT_INC(0),          FMT_INC(0),          FMT_INC(0),           // 8
+      FMT_INC(10),         FMT_INC(10),         FMT_INC(10),          // 64
+      FMT_INC(100),        FMT_INC(100),        FMT_INC(100),         // 512
+      FMT_INC(1000),       FMT_INC(1000),       FMT_INC(1000),        // 4096
+      FMT_INC(10000),      FMT_INC(10000),      FMT_INC(10000),       // 32k
+      FMT_INC(100000),     FMT_INC(100000),     FMT_INC(100000),      // 256k
+      FMT_INC(1000000),    FMT_INC(1000000),    FMT_INC(1000000),     // 2048k
+      FMT_INC(10000000),   FMT_INC(10000000),   FMT_INC(10000000),    // 16M
+      FMT_INC(100000000),  FMT_INC(100000000),  FMT_INC(100000000),   // 128M
+      FMT_INC(1000000000), FMT_INC(1000000000), FMT_INC(1000000000),  // 1024M
+      FMT_INC(1000000000), FMT_INC(1000000000)                        // 4B
+  };
+  auto inc = table[FMT_BUILTIN_CLZ(n | 1) ^ 31];
+  return static_cast<int>((n + inc) >> 32);
+}
+#endif
+
+// Optional version of count_digits for better performance on 32-bit platforms.
+FMT_CONSTEXPR20 inline auto count_digits(uint32_t n) -> int {
+#ifdef FMT_BUILTIN_CLZ
+  if (!is_constant_evaluated() && !FMT_OPTIMIZE_SIZE) return do_count_digits(n);
+#endif
+  return count_digits_fallback(n);
+}
+
+template <typename Int> constexpr auto digits10() noexcept -> int {
+  return std::numeric_limits<Int>::digits10;
+}
+template <> constexpr auto digits10<int128_opt>() noexcept -> int { return 38; }
+template <> constexpr auto digits10<uint128_t>() noexcept -> int { return 38; }
+
+template <typename Char> struct thousands_sep_result {
+  std::string grouping;
+  Char thousands_sep;
+};
+
+template <typename Char>
+FMT_API auto thousands_sep_impl(locale_ref loc) -> thousands_sep_result<Char>;
+template <typename Char>
+inline auto thousands_sep(locale_ref loc) -> thousands_sep_result<Char> {
+  auto result = thousands_sep_impl<char>(loc);
+  return {result.grouping, Char(result.thousands_sep)};
+}
+template <>
+inline auto thousands_sep(locale_ref loc) -> thousands_sep_result<wchar_t> {
+  return thousands_sep_impl<wchar_t>(loc);
+}
+
+template <typename Char>
+FMT_API auto decimal_point_impl(locale_ref loc) -> Char;
+template <typename Char> inline auto decimal_point(locale_ref loc) -> Char {
+  return Char(decimal_point_impl<char>(loc));
+}
+template <> inline auto decimal_point(locale_ref loc) -> wchar_t {
+  return decimal_point_impl<wchar_t>(loc);
+}
+
+#ifndef FMT_HEADER_ONLY
+FMT_BEGIN_EXPORT
+extern template FMT_API auto thousands_sep_impl<char>(locale_ref)
+    -> thousands_sep_result<char>;
+extern template FMT_API auto thousands_sep_impl<wchar_t>(locale_ref)
+    -> thousands_sep_result<wchar_t>;
+extern template FMT_API auto decimal_point_impl(locale_ref) -> char;
+extern template FMT_API auto decimal_point_impl(locale_ref) -> wchar_t;
+FMT_END_EXPORT
+#endif  // FMT_HEADER_ONLY
+
+// Compares two characters for equality.
+template <typename Char> auto equal2(const Char* lhs, const char* rhs) -> bool {
+  return lhs[0] == Char(rhs[0]) && lhs[1] == Char(rhs[1]);
+}
+inline auto equal2(const char* lhs, const char* rhs) -> bool {
+  return memcmp(lhs, rhs, 2) == 0;
+}
+
+// Writes a two-digit value to out.
+template <typename Char>
+FMT_CONSTEXPR20 FMT_INLINE void write2digits(Char* out, size_t value) {
+  if (!is_constant_evaluated() && std::is_same<Char, char>::value &&
+      !FMT_OPTIMIZE_SIZE) {
+    memcpy(out, digits2(value), 2);
+    return;
+  }
+  *out++ = static_cast<Char>('0' + value / 10);
+  *out = static_cast<Char>('0' + value % 10);
+}
+
+// Formats a decimal unsigned integer value writing to out pointing to a buffer
+// of specified size. The caller must ensure that the buffer is large enough.
+template <typename Char, typename UInt>
+FMT_CONSTEXPR20 auto do_format_decimal(Char* out, UInt value, int size)
+    -> Char* {
+  FMT_ASSERT(size >= count_digits(value), "invalid digit count");
+  unsigned n = to_unsigned(size);
+  while (value >= 100) {
+    // Integer division is slow so do it for a group of two digits instead
+    // of for every digit. The idea comes from the talk by Alexandrescu
+    // "Three Optimization Tips for C++". See speed-test for a comparison.
+    n -= 2;
+    write2digits(out + n, static_cast<unsigned>(value % 100));
+    value /= 100;
+  }
+  if (value >= 10) {
+    n -= 2;
+    write2digits(out + n, static_cast<unsigned>(value));
+  } else {
+    out[--n] = static_cast<Char>('0' + value);
+  }
+  return out + n;
+}
+
+template <typename Char, typename UInt>
+FMT_CONSTEXPR FMT_INLINE auto format_decimal(Char* out, UInt value,
+                                             int num_digits) -> Char* {
+  do_format_decimal(out, value, num_digits);
+  return out + num_digits;
+}
+
+template <typename Char, typename UInt, typename OutputIt,
+          FMT_ENABLE_IF(!std::is_pointer<remove_cvref_t<OutputIt>>::value)>
+FMT_CONSTEXPR auto format_decimal(OutputIt out, UInt value, int num_digits)
+    -> OutputIt {
+  if (auto ptr = to_pointer<Char>(out, to_unsigned(num_digits))) {
+    do_format_decimal(ptr, value, num_digits);
+    return out;
+  }
+  // Buffer is large enough to hold all digits (digits10 + 1).
+  char buffer[digits10<UInt>() + 1];
+  if (is_constant_evaluated()) fill_n(buffer, sizeof(buffer), '\0');
+  do_format_decimal(buffer, value, num_digits);
+  return copy_noinline<Char>(buffer, buffer + num_digits, out);
+}
+
+template <typename Char, typename UInt>
+FMT_CONSTEXPR auto do_format_base2e(int base_bits, Char* out, UInt value,
+                                    int size, bool upper = false) -> Char* {
+  out += size;
+  do {
+    const char* digits = upper ? "0123456789ABCDEF" : "0123456789abcdef";
+    unsigned digit = static_cast<unsigned>(value & ((1u << base_bits) - 1));
+    *--out = static_cast<Char>(base_bits < 4 ? static_cast<char>('0' + digit)
+                                             : digits[digit]);
+  } while ((value >>= base_bits) != 0);
+  return out;
+}
+
+// Formats an unsigned integer in the power of two base (binary, octal, hex).
+template <typename Char, typename UInt>
+FMT_CONSTEXPR auto format_base2e(int base_bits, Char* out, UInt value,
+                                 int num_digits, bool upper = false) -> Char* {
+  do_format_base2e(base_bits, out, value, num_digits, upper);
+  return out + num_digits;
+}
+
+template <typename Char, typename OutputIt, typename UInt,
+          FMT_ENABLE_IF(is_back_insert_iterator<OutputIt>::value)>
+FMT_CONSTEXPR inline auto format_base2e(int base_bits, OutputIt out, UInt value,
+                                        int num_digits, bool upper = false)
+    -> OutputIt {
+  if (auto ptr = to_pointer<Char>(out, to_unsigned(num_digits))) {
+    format_base2e(base_bits, ptr, value, num_digits, upper);
+    return out;
+  }
+  // Make buffer large enough for any base.
+  char buffer[num_bits<UInt>()];
+  if (is_constant_evaluated()) fill_n(buffer, sizeof(buffer), '\0');
+  format_base2e(base_bits, buffer, value, num_digits, upper);
+  return detail::copy_noinline<Char>(buffer, buffer + num_digits, out);
+}
+
+// A converter from UTF-8 to UTF-16.
+class utf8_to_utf16 {
+ private:
+  basic_memory_buffer<wchar_t> buffer_;
+
+ public:
+  FMT_API explicit utf8_to_utf16(string_view s);
+  inline operator basic_string_view<wchar_t>() const {
+    return {&buffer_[0], size()};
+  }
+  inline auto size() const -> size_t { return buffer_.size() - 1; }
+  inline auto c_str() const -> const wchar_t* { return &buffer_[0]; }
+  inline auto str() const -> std::wstring { return {&buffer_[0], size()}; }
+};
+
+enum class to_utf8_error_policy { abort, replace };
+
+// A converter from UTF-16/UTF-32 (host endian) to UTF-8.
+template <typename WChar, typename Buffer = memory_buffer> class to_utf8 {
+ private:
+  Buffer buffer_;
+
+ public:
+  to_utf8() {}
+  explicit to_utf8(basic_string_view<WChar> s,
+                   to_utf8_error_policy policy = to_utf8_error_policy::abort) {
+    static_assert(sizeof(WChar) == 2 || sizeof(WChar) == 4,
+                  "expected utf16 or utf32");
+    if (!convert(s, policy)) {
+      FMT_THROW(std::runtime_error(sizeof(WChar) == 2 ? "invalid utf16"
+                                                      : "invalid utf32"));
+    }
+  }
+  operator string_view() const { return string_view(&buffer_[0], size()); }
+  auto size() const -> size_t { return buffer_.size() - 1; }
+  auto c_str() const -> const char* { return &buffer_[0]; }
+  auto str() const -> std::string { return std::string(&buffer_[0], size()); }
+
+  // Performs conversion returning a bool instead of throwing exception on
+  // conversion error. This method may still throw in case of memory allocation
+  // error.
+  auto convert(basic_string_view<WChar> s,
+               to_utf8_error_policy policy = to_utf8_error_policy::abort)
+      -> bool {
+    if (!convert(buffer_, s, policy)) return false;
+    buffer_.push_back(0);
+    return true;
+  }
+  static auto convert(Buffer& buf, basic_string_view<WChar> s,
+                      to_utf8_error_policy policy = to_utf8_error_policy::abort)
+      -> bool {
+    for (auto p = s.begin(); p != s.end(); ++p) {
+      uint32_t c = static_cast<uint32_t>(*p);
+      if (sizeof(WChar) == 2 && c >= 0xd800 && c <= 0xdfff) {
+        // Handle a surrogate pair.
+        ++p;
+        if (p == s.end() || (c & 0xfc00) != 0xd800 || (*p & 0xfc00) != 0xdc00) {
+          if (policy == to_utf8_error_policy::abort) return false;
+          buf.append(string_view("\xEF\xBF\xBD"));
+          --p;
+          continue;
+        }
+        c = (c << 10) + static_cast<uint32_t>(*p) - 0x35fdc00;
+      }
+      if (c < 0x80) {
+        buf.push_back(static_cast<char>(c));
+      } else if (c < 0x800) {
+        buf.push_back(static_cast<char>(0xc0 | (c >> 6)));
+        buf.push_back(static_cast<char>(0x80 | (c & 0x3f)));
+      } else if ((c >= 0x800 && c <= 0xd7ff) || (c >= 0xe000 && c <= 0xffff)) {
+        buf.push_back(static_cast<char>(0xe0 | (c >> 12)));
+        buf.push_back(static_cast<char>(0x80 | ((c & 0xfff) >> 6)));
+        buf.push_back(static_cast<char>(0x80 | (c & 0x3f)));
+      } else if (c >= 0x10000 && c <= 0x10ffff) {
+        buf.push_back(static_cast<char>(0xf0 | (c >> 18)));
+        buf.push_back(static_cast<char>(0x80 | ((c & 0x3ffff) >> 12)));
+        buf.push_back(static_cast<char>(0x80 | ((c & 0xfff) >> 6)));
+        buf.push_back(static_cast<char>(0x80 | (c & 0x3f)));
+      } else {
+        return false;
+      }
+    }
+    return true;
+  }
+};
+
+// Computes 128-bit result of multiplication of two 64-bit unsigned integers.
+FMT_INLINE auto umul128(uint64_t x, uint64_t y) noexcept -> uint128_fallback {
+#if FMT_USE_INT128
+  auto p = static_cast<uint128_opt>(x) * static_cast<uint128_opt>(y);
+  return {static_cast<uint64_t>(p >> 64), static_cast<uint64_t>(p)};
+#elif defined(_MSC_VER) && defined(_M_X64)
+  auto hi = uint64_t();
+  auto lo = _umul128(x, y, &hi);
+  return {hi, lo};
+#else
+  const uint64_t mask = static_cast<uint64_t>(max_value<uint32_t>());
+
+  uint64_t a = x >> 32;
+  uint64_t b = x & mask;
+  uint64_t c = y >> 32;
+  uint64_t d = y & mask;
+
+  uint64_t ac = a * c;
+  uint64_t bc = b * c;
+  uint64_t ad = a * d;
+  uint64_t bd = b * d;
+
+  uint64_t intermediate = (bd >> 32) + (ad & mask) + (bc & mask);
+
+  return {ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32),
+          (intermediate << 32) + (bd & mask)};
+#endif
+}
+
+namespace dragonbox {
+// Computes floor(log10(pow(2, e))) for e in [-2620, 2620] using the method from
+// https://fmt.dev/papers/Dragonbox.pdf#page=28, section 6.1.
+inline auto floor_log10_pow2(int e) noexcept -> int {
+  FMT_ASSERT(e <= 2620 && e >= -2620, "too large exponent");
+  static_assert((-1 >> 1) == -1, "right shift is not arithmetic");
+  return (e * 315653) >> 20;
+}
+
+inline auto floor_log2_pow10(int e) noexcept -> int {
+  FMT_ASSERT(e <= 1233 && e >= -1233, "too large exponent");
+  return (e * 1741647) >> 19;
+}
+
+// Computes upper 64 bits of multiplication of two 64-bit unsigned integers.
+inline auto umul128_upper64(uint64_t x, uint64_t y) noexcept -> uint64_t {
+#if FMT_USE_INT128
+  auto p = static_cast<uint128_opt>(x) * static_cast<uint128_opt>(y);
+  return static_cast<uint64_t>(p >> 64);
+#elif defined(_MSC_VER) && defined(_M_X64)
+  return __umulh(x, y);
+#else
+  return umul128(x, y).high();
+#endif
+}
+
+// Computes upper 128 bits of multiplication of a 64-bit unsigned integer and a
+// 128-bit unsigned integer.
+inline auto umul192_upper128(uint64_t x, uint128_fallback y) noexcept
+    -> uint128_fallback {
+  uint128_fallback r = umul128(x, y.high());
+  r += umul128_upper64(x, y.low());
+  return r;
+}
+
+FMT_API auto get_cached_power(int k) noexcept -> uint128_fallback;
+
+// Type-specific information that Dragonbox uses.
+template <typename T, typename Enable = void> struct float_info;
+
+template <> struct float_info<float> {
+  using carrier_uint = uint32_t;
+  static const int exponent_bits = 8;
+  static const int kappa = 1;
+  static const int big_divisor = 100;
+  static const int small_divisor = 10;
+  static const int min_k = -31;
+  static const int max_k = 46;
+  static const int shorter_interval_tie_lower_threshold = -35;
+  static const int shorter_interval_tie_upper_threshold = -35;
+};
+
+template <> struct float_info<double> {
+  using carrier_uint = uint64_t;
+  static const int exponent_bits = 11;
+  static const int kappa = 2;
+  static const int big_divisor = 1000;
+  static const int small_divisor = 100;
+  static const int min_k = -292;
+  static const int max_k = 341;
+  static const int shorter_interval_tie_lower_threshold = -77;
+  static const int shorter_interval_tie_upper_threshold = -77;
+};
+
+// An 80- or 128-bit floating point number.
+template <typename T>
+struct float_info<T, enable_if_t<std::numeric_limits<T>::digits == 64 ||
+                                 std::numeric_limits<T>::digits == 113 ||
+                                 is_float128<T>::value>> {
+  using carrier_uint = detail::uint128_t;
+  static const int exponent_bits = 15;
+};
+
+// A double-double floating point number.
+template <typename T>
+struct float_info<T, enable_if_t<is_double_double<T>::value>> {
+  using carrier_uint = detail::uint128_t;
+};
+
+template <typename T> struct decimal_fp {
+  using significand_type = typename float_info<T>::carrier_uint;
+  significand_type significand;
+  int exponent;
+};
+
+template <typename T> FMT_API auto to_decimal(T x) noexcept -> decimal_fp<T>;
+}  // namespace dragonbox
+
+// Returns true iff Float has the implicit bit which is not stored.
+template <typename Float> constexpr auto has_implicit_bit() -> bool {
+  // An 80-bit FP number has a 64-bit significand an no implicit bit.
+  return std::numeric_limits<Float>::digits != 64;
+}
+
+// Returns the number of significand bits stored in Float. The implicit bit is
+// not counted since it is not stored.
+template <typename Float> constexpr auto num_significand_bits() -> int {
+  // std::numeric_limits may not support __float128.
+  return is_float128<Float>() ? 112
+                              : (std::numeric_limits<Float>::digits -
+                                 (has_implicit_bit<Float>() ? 1 : 0));
+}
+
+template <typename Float>
+constexpr auto exponent_mask() ->
+    typename dragonbox::float_info<Float>::carrier_uint {
+  using float_uint = typename dragonbox::float_info<Float>::carrier_uint;
+  return ((float_uint(1) << dragonbox::float_info<Float>::exponent_bits) - 1)
+         << num_significand_bits<Float>();
+}
+template <typename Float> constexpr auto exponent_bias() -> int {
+  // std::numeric_limits may not support __float128.
+  return is_float128<Float>() ? 16383
+                              : std::numeric_limits<Float>::max_exponent - 1;
+}
+
+FMT_CONSTEXPR inline auto compute_exp_size(int exp) -> int {
+  auto prefix_size = 2;  // sign + 'e'
+  auto abs_exp = exp >= 0 ? exp : -exp;
+  if (abs_exp < 100) return prefix_size + 2;
+  return prefix_size + (abs_exp >= 1000 ? 4 : 3);
+}
+
+// Writes the exponent exp in the form "[+-]d{2,3}" to buffer.
+template <typename Char, typename OutputIt>
+FMT_CONSTEXPR auto write_exponent(int exp, OutputIt out) -> OutputIt {
+  FMT_ASSERT(-10000 < exp && exp < 10000, "exponent out of range");
+  if (exp < 0) {
+    *out++ = static_cast<Char>('-');
+    exp = -exp;
+  } else {
+    *out++ = static_cast<Char>('+');
+  }
+  auto uexp = static_cast<uint32_t>(exp);
+  if (is_constant_evaluated()) {
+    if (uexp < 10) *out++ = '0';
+    return format_decimal<Char>(out, uexp, count_digits(uexp));
+  }
+  if (uexp >= 100u) {
+    const char* top = digits2(uexp / 100);
+    if (uexp >= 1000u) *out++ = static_cast<Char>(top[0]);
+    *out++ = static_cast<Char>(top[1]);
+    uexp %= 100;
+  }
+  const char* d = digits2(uexp);
+  *out++ = static_cast<Char>(d[0]);
+  *out++ = static_cast<Char>(d[1]);
+  return out;
+}
+
+// A floating-point number f * pow(2, e) where F is an unsigned type.
+template <typename F> struct basic_fp {
+  F f;
+  int e;
+
+  static constexpr int num_significand_bits =
+      static_cast<int>(sizeof(F) * num_bits<unsigned char>());
+
+  constexpr basic_fp() : f(0), e(0) {}
+  constexpr basic_fp(uint64_t f_val, int e_val) : f(f_val), e(e_val) {}
+
+  // Constructs fp from an IEEE754 floating-point number.
+  template <typename Float> FMT_CONSTEXPR basic_fp(Float n) { assign(n); }
+
+  // Assigns n to this and return true iff predecessor is closer than successor.
+  template <typename Float, FMT_ENABLE_IF(!is_double_double<Float>::value)>
+  FMT_CONSTEXPR auto assign(Float n) -> bool {
+    static_assert(std::numeric_limits<Float>::digits <= 113, "unsupported FP");
+    // Assume Float is in the format [sign][exponent][significand].
+    using carrier_uint = typename dragonbox::float_info<Float>::carrier_uint;
+    const auto num_float_significand_bits =
+        detail::num_significand_bits<Float>();
+    const auto implicit_bit = carrier_uint(1) << num_float_significand_bits;
+    const auto significand_mask = implicit_bit - 1;
+    auto u = bit_cast<carrier_uint>(n);
+    f = static_cast<F>(u & significand_mask);
+    auto biased_e = static_cast<int>((u & exponent_mask<Float>()) >>
+                                     num_float_significand_bits);
+    // The predecessor is closer if n is a normalized power of 2 (f == 0)
+    // other than the smallest normalized number (biased_e > 1).
+    auto is_predecessor_closer = f == 0 && biased_e > 1;
+    if (biased_e == 0)
+      biased_e = 1;  // Subnormals use biased exponent 1 (min exponent).
+    else if (has_implicit_bit<Float>())
+      f += static_cast<F>(implicit_bit);
+    e = biased_e - exponent_bias<Float>() - num_float_significand_bits;
+    if (!has_implicit_bit<Float>()) ++e;
+    return is_predecessor_closer;
+  }
+
+  template <typename Float, FMT_ENABLE_IF(is_double_double<Float>::value)>
+  FMT_CONSTEXPR auto assign(Float n) -> bool {
+    static_assert(std::numeric_limits<double>::is_iec559, "unsupported FP");
+    return assign(static_cast<double>(n));
+  }
+};
+
+using fp = basic_fp<unsigned long long>;
+
+// Normalizes the value converted from double and multiplied by (1 << SHIFT).
+template <int SHIFT = 0, typename F>
+FMT_CONSTEXPR auto normalize(basic_fp<F> value) -> basic_fp<F> {
+  // Handle subnormals.
+  const auto implicit_bit = F(1) << num_significand_bits<double>();
+  const auto shifted_implicit_bit = implicit_bit << SHIFT;
+  while ((value.f & shifted_implicit_bit) == 0) {
+    value.f <<= 1;
+    --value.e;
+  }
+  // Subtract 1 to account for hidden bit.
+  const auto offset = basic_fp<F>::num_significand_bits -
+                      num_significand_bits<double>() - SHIFT - 1;
+  value.f <<= offset;
+  value.e -= offset;
+  return value;
+}
+
+// Computes lhs * rhs / pow(2, 64) rounded to nearest with half-up tie breaking.
+FMT_CONSTEXPR inline auto multiply(uint64_t lhs, uint64_t rhs) -> uint64_t {
+#if FMT_USE_INT128
+  auto product = static_cast<__uint128_t>(lhs) * rhs;
+  auto f = static_cast<uint64_t>(product >> 64);
+  return (static_cast<uint64_t>(product) & (1ULL << 63)) != 0 ? f + 1 : f;
+#else
+  // Multiply 32-bit parts of significands.
+  uint64_t mask = (1ULL << 32) - 1;
+  uint64_t a = lhs >> 32, b = lhs & mask;
+  uint64_t c = rhs >> 32, d = rhs & mask;
+  uint64_t ac = a * c, bc = b * c, ad = a * d, bd = b * d;
+  // Compute mid 64-bit of result and round.
+  uint64_t mid = (bd >> 32) + (ad & mask) + (bc & mask) + (1U << 31);
+  return ac + (ad >> 32) + (bc >> 32) + (mid >> 32);
+#endif
+}
+
+FMT_CONSTEXPR inline auto operator*(fp x, fp y) -> fp {
+  return {multiply(x.f, y.f), x.e + y.e + 64};
+}
+
+template <typename T, bool doublish = num_bits<T>() == num_bits<double>()>
+using convert_float_result =
+    conditional_t<std::is_same<T, float>::value || doublish, double, T>;
+
+template <typename T>
+constexpr auto convert_float(T value) -> convert_float_result<T> {
+  return static_cast<convert_float_result<T>>(value);
+}
+
+template <bool C, typename T, typename F, FMT_ENABLE_IF(C)>
+auto select(T true_value, F) -> T {
+  return true_value;
+}
+template <bool C, typename T, typename F, FMT_ENABLE_IF(!C)>
+auto select(T, F false_value) -> F {
+  return false_value;
+}
+
+template <typename Char, typename OutputIt>
+FMT_CONSTEXPR FMT_NOINLINE auto fill(OutputIt it, size_t n,
+                                     const basic_specs& specs) -> OutputIt {
+  auto fill_size = specs.fill_size();
+  if (fill_size == 1) return detail::fill_n(it, n, specs.fill_unit<Char>());
+  if (const Char* data = specs.fill<Char>()) {
+    for (size_t i = 0; i < n; ++i) it = copy<Char>(data, data + fill_size, it);
+  }
+  return it;
+}
+
+// Writes the output of f, padded according to format specifications in specs.
+// size: output size in code units.
+// width: output display width in (terminal) column positions.
+template <typename Char, align default_align = align::left, typename OutputIt,
+          typename F>
+FMT_CONSTEXPR auto write_padded(OutputIt out, const format_specs& specs,
+                                size_t size, size_t width, F&& f) -> OutputIt {
+  static_assert(default_align == align::left || default_align == align::right,
+                "");
+  unsigned spec_width = to_unsigned(specs.width);
+  size_t padding = spec_width > width ? spec_width - width : 0;
+  // Shifts are encoded as string literals because static constexpr is not
+  // supported in constexpr functions.
+  auto* shifts =
+      default_align == align::left ? "\x1f\x1f\x00\x01" : "\x00\x1f\x00\x01";
+  size_t left_padding = padding >> shifts[static_cast<int>(specs.align())];
+  size_t right_padding = padding - left_padding;
+  auto it = reserve(out, size + padding * specs.fill_size());
+  if (left_padding != 0) it = fill<Char>(it, left_padding, specs);
+  it = f(it);
+  if (right_padding != 0) it = fill<Char>(it, right_padding, specs);
+  return base_iterator(out, it);
+}
+
+template <typename Char, align default_align = align::left, typename OutputIt,
+          typename F>
+constexpr auto write_padded(OutputIt out, const format_specs& specs,
+                            size_t size, F&& f) -> OutputIt {
+  return write_padded<Char, default_align>(out, specs, size, size, f);
+}
+
+template <typename Char, align default_align = align::left, typename OutputIt>
+FMT_CONSTEXPR auto write_bytes(OutputIt out, string_view bytes,
+                               const format_specs& specs = {}) -> OutputIt {
+  return write_padded<Char, default_align>(
+      out, specs, bytes.size(), [bytes](reserve_iterator<OutputIt> it) {
+        const char* data = bytes.data();
+        return copy<Char>(data, data + bytes.size(), it);
+      });
+}
+
+template <typename Char, typename OutputIt, typename UIntPtr>
+auto write_ptr(OutputIt out, UIntPtr value, const format_specs* specs)
+    -> OutputIt {
+  int num_digits = count_digits<4>(value);
+  auto size = to_unsigned(num_digits) + size_t(2);
+  auto write = [=](reserve_iterator<OutputIt> it) {
+    *it++ = static_cast<Char>('0');
+    *it++ = static_cast<Char>('x');
+    return format_base2e<Char>(4, it, value, num_digits);
+  };
+  return specs ? write_padded<Char, align::right>(out, *specs, size, write)
+               : base_iterator(out, write(reserve(out, size)));
+}
+
+// Returns true iff the code point cp is printable.
+FMT_API auto is_printable(uint32_t cp) -> bool;
+
+inline auto needs_escape(uint32_t cp) -> bool {
+  if (cp < 0x20 || cp == 0x7f || cp == '"' || cp == '\\') return true;
+  if (const_check(FMT_OPTIMIZE_SIZE > 1)) return false;
+  return !is_printable(cp);
+}
+
+template <typename Char> struct find_escape_result {
+  const Char* begin;
+  const Char* end;
+  uint32_t cp;
+};
+
+template <typename Char>
+auto find_escape(const Char* begin, const Char* end)
+    -> find_escape_result<Char> {
+  for (; begin != end; ++begin) {
+    uint32_t cp = static_cast<unsigned_char<Char>>(*begin);
+    if (const_check(sizeof(Char) == 1) && cp >= 0x80) continue;
+    if (needs_escape(cp)) return {begin, begin + 1, cp};
+  }
+  return {begin, nullptr, 0};
+}
+
+inline auto find_escape(const char* begin, const char* end)
+    -> find_escape_result<char> {
+  if (const_check(!use_utf8)) return find_escape<char>(begin, end);
+  auto result = find_escape_result<char>{end, nullptr, 0};
+  for_each_codepoint(string_view(begin, to_unsigned(end - begin)),
+                     [&](uint32_t cp, string_view sv) {
+                       if (needs_escape(cp)) {
+                         result = {sv.begin(), sv.end(), cp};
+                         return false;
+                       }
+                       return true;
+                     });
+  return result;
+}
+
+template <size_t width, typename Char, typename OutputIt>
+auto write_codepoint(OutputIt out, char prefix, uint32_t cp) -> OutputIt {
+  *out++ = static_cast<Char>('\\');
+  *out++ = static_cast<Char>(prefix);
+  Char buf[width];
+  fill_n(buf, width, static_cast<Char>('0'));
+  format_base2e(4, buf, cp, width);
+  return copy<Char>(buf, buf + width, out);
+}
+
+template <typename OutputIt, typename Char>
+auto write_escaped_cp(OutputIt out, const find_escape_result<Char>& escape)
+    -> OutputIt {
+  auto c = static_cast<Char>(escape.cp);
+  switch (escape.cp) {
+  case '\n':
+    *out++ = static_cast<Char>('\\');
+    c = static_cast<Char>('n');
+    break;
+  case '\r':
+    *out++ = static_cast<Char>('\\');
+    c = static_cast<Char>('r');
+    break;
+  case '\t':
+    *out++ = static_cast<Char>('\\');
+    c = static_cast<Char>('t');
+    break;
+  case '"':  FMT_FALLTHROUGH;
+  case '\'': FMT_FALLTHROUGH;
+  case '\\': *out++ = static_cast<Char>('\\'); break;
+  default:
+    if (escape.cp < 0x100) return write_codepoint<2, Char>(out, 'x', escape.cp);
+    if (escape.cp < 0x10000)
+      return write_codepoint<4, Char>(out, 'u', escape.cp);
+    if (escape.cp < 0x110000)
+      return write_codepoint<8, Char>(out, 'U', escape.cp);
+    for (Char escape_char : basic_string_view<Char>(
+             escape.begin, to_unsigned(escape.end - escape.begin))) {
+      out = write_codepoint<2, Char>(out, 'x',
+                                     static_cast<uint32_t>(escape_char) & 0xFF);
+    }
+    return out;
+  }
+  *out++ = c;
+  return out;
+}
+
+template <typename Char, typename OutputIt>
+auto write_escaped_string(OutputIt out, basic_string_view<Char> str)
+    -> OutputIt {
+  *out++ = static_cast<Char>('"');
+  auto begin = str.begin(), end = str.end();
+  do {
+    auto escape = find_escape(begin, end);
+    out = copy<Char>(begin, escape.begin, out);
+    begin = escape.end;
+    if (!begin) break;
+    out = write_escaped_cp<OutputIt, Char>(out, escape);
+  } while (begin != end);
+  *out++ = static_cast<Char>('"');
+  return out;
+}
+
+template <typename Char, typename OutputIt>
+auto write_escaped_char(OutputIt out, Char v) -> OutputIt {
+  Char v_array[1] = {v};
+  *out++ = static_cast<Char>('\'');
+  if ((needs_escape(static_cast<uint32_t>(v)) && v != static_cast<Char>('"')) ||
+      v == static_cast<Char>('\'')) {
+    out = write_escaped_cp(out,
+                           find_escape_result<Char>{v_array, v_array + 1,
+                                                    static_cast<uint32_t>(v)});
+  } else {
+    *out++ = v;
+  }
+  *out++ = static_cast<Char>('\'');
+  return out;
+}
+
+template <typename Char, typename OutputIt>
+FMT_CONSTEXPR auto write_char(OutputIt out, Char value,
+                              const format_specs& specs) -> OutputIt {
+  bool is_debug = specs.type() == presentation_type::debug;
+  return write_padded<Char>(out, specs, 1, [=](reserve_iterator<OutputIt> it) {
+    if (is_debug) return write_escaped_char(it, value);
+    *it++ = value;
+    return it;
+  });
+}
+
+template <typename Char> class digit_grouping {
+ private:
+  std::string grouping_;
+  std::basic_string<Char> thousands_sep_;
+
+  struct next_state {
+    std::string::const_iterator group;
+    int pos;
+  };
+  auto initial_state() const -> next_state { return {grouping_.begin(), 0}; }
+
+  // Returns the next digit group separator position.
+  auto next(next_state& state) const -> int {
+    if (thousands_sep_.empty()) return max_value<int>();
+    if (state.group == grouping_.end()) return state.pos += grouping_.back();
+    if (*state.group <= 0 || *state.group == max_value<char>())
+      return max_value<int>();
+    state.pos += *state.group++;
+    return state.pos;
+  }
+
+ public:
+  explicit digit_grouping(locale_ref loc, bool localized = true) {
+    if (!localized) return;
+    auto sep = thousands_sep<Char>(loc);
+    grouping_ = sep.grouping;
+    if (sep.thousands_sep) thousands_sep_.assign(1, sep.thousands_sep);
+  }
+  digit_grouping(std::string grouping, std::basic_string<Char> sep)
+      : grouping_(std::move(grouping)), thousands_sep_(std::move(sep)) {}
+
+  auto has_separator() const -> bool { return !thousands_sep_.empty(); }
+
+  auto count_separators(int num_digits) const -> int {
+    int count = 0;
+    auto state = initial_state();
+    while (num_digits > next(state)) ++count;
+    return count;
+  }
+
+  // Applies grouping to digits and writes the output to out.
+  template <typename Out, typename C>
+  auto apply(Out out, basic_string_view<C> digits) const -> Out {
+    auto num_digits = static_cast<int>(digits.size());
+    auto separators = basic_memory_buffer<int>();
+    separators.push_back(0);
+    auto state = initial_state();
+    while (int i = next(state)) {
+      if (i >= num_digits) break;
+      separators.push_back(i);
+    }
+    for (int i = 0, sep_index = static_cast<int>(separators.size() - 1);
+         i < num_digits; ++i) {
+      if (num_digits - i == separators[sep_index]) {
+        out = copy<Char>(thousands_sep_.data(),
+                         thousands_sep_.data() + thousands_sep_.size(), out);
+        --sep_index;
+      }
+      *out++ = static_cast<Char>(digits[to_unsigned(i)]);
+    }
+    return out;
+  }
+};
+
+FMT_CONSTEXPR inline void prefix_append(unsigned& prefix, unsigned value) {
+  prefix |= prefix != 0 ? value << 8 : value;
+  prefix += (1u + (value > 0xff ? 1 : 0)) << 24;
+}
+
+// Writes a decimal integer with digit grouping.
+template <typename OutputIt, typename UInt, typename Char>
+auto write_int(OutputIt out, UInt value, unsigned prefix,
+               const format_specs& specs, const digit_grouping<Char>& grouping)
+    -> OutputIt {
+  static_assert(std::is_same<uint64_or_128_t<UInt>, UInt>::value, "");
+  int num_digits = 0;
+  auto buffer = memory_buffer();
+  switch (specs.type()) {
+  default: FMT_ASSERT(false, ""); FMT_FALLTHROUGH;
+  case presentation_type::none:
+  case presentation_type::dec:
+    num_digits = count_digits(value);
+    format_decimal<char>(appender(buffer), value, num_digits);
+    break;
+  case presentation_type::hex:
+    if (specs.alt())
+      prefix_append(prefix, unsigned(specs.upper() ? 'X' : 'x') << 8 | '0');
+    num_digits = count_digits<4>(value);
+    format_base2e<char>(4, appender(buffer), value, num_digits, specs.upper());
+    break;
+  case presentation_type::oct:
+    num_digits = count_digits<3>(value);
+    // Octal prefix '0' is counted as a digit, so only add it if precision
+    // is not greater than the number of digits.
+    if (specs.alt() && specs.precision <= num_digits && value != 0)
+      prefix_append(prefix, '0');
+    format_base2e<char>(3, appender(buffer), value, num_digits);
+    break;
+  case presentation_type::bin:
+    if (specs.alt())
+      prefix_append(prefix, unsigned(specs.upper() ? 'B' : 'b') << 8 | '0');
+    num_digits = count_digits<1>(value);
+    format_base2e<char>(1, appender(buffer), value, num_digits);
+    break;
+  case presentation_type::chr:
+    return write_char<Char>(out, static_cast<Char>(value), specs);
+  }
+
+  unsigned size = (prefix != 0 ? prefix >> 24 : 0) + to_unsigned(num_digits) +
+                  to_unsigned(grouping.count_separators(num_digits));
+  return write_padded<Char, align::right>(
+      out, specs, size, size, [&](reserve_iterator<OutputIt> it) {
+        for (unsigned p = prefix & 0xffffff; p != 0; p >>= 8)
+          *it++ = static_cast<Char>(p & 0xff);
+        return grouping.apply(it, string_view(buffer.data(), buffer.size()));
+      });
+}
+
+#if FMT_USE_LOCALE
+// Writes a localized value.
+FMT_API auto write_loc(appender out, loc_value value, const format_specs& specs,
+                       locale_ref loc) -> bool;
+auto write_loc(basic_appender<wchar_t> out, loc_value value,
+               const format_specs& specs, locale_ref loc) -> bool;
+#endif
+template <typename OutputIt>
+inline auto write_loc(OutputIt, const loc_value&, const format_specs&,
+                      locale_ref) -> bool {
+  return false;
+}
+
+template <typename UInt> struct write_int_arg {
+  UInt abs_value;
+  unsigned prefix;
+};
+
+template <typename T>
+FMT_CONSTEXPR auto make_write_int_arg(T value, sign s)
+    -> write_int_arg<uint32_or_64_or_128_t<T>> {
+  auto prefix = 0u;
+  auto abs_value = static_cast<uint32_or_64_or_128_t<T>>(value);
+  if (is_negative(value)) {
+    prefix = 0x01000000 | '-';
+    abs_value = 0 - abs_value;
+  } else {
+    constexpr unsigned prefixes[4] = {0, 0, 0x1000000u | '+', 0x1000000u | ' '};
+    prefix = prefixes[static_cast<int>(s)];
+  }
+  return {abs_value, prefix};
+}
+
+template <typename Char = char> struct loc_writer {
+  basic_appender<Char> out;
+  const format_specs& specs;
+  std::basic_string<Char> sep;
+  std::string grouping;
+  std::basic_string<Char> decimal_point;
+
+  template <typename T, FMT_ENABLE_IF(is_integer<T>::value)>
+  auto operator()(T value) -> bool {
+    auto arg = make_write_int_arg(value, specs.sign());
+    write_int(out, static_cast<uint64_or_128_t<T>>(arg.abs_value), arg.prefix,
+              specs, digit_grouping<Char>(grouping, sep));
+    return true;
+  }
+
+  template <typename T, FMT_ENABLE_IF(!is_integer<T>::value)>
+  auto operator()(T) -> bool {
+    return false;
+  }
+};
+
+// Size and padding computation separate from write_int to avoid template bloat.
+struct size_padding {
+  unsigned size;
+  unsigned padding;
+
+  FMT_CONSTEXPR size_padding(int num_digits, unsigned prefix,
+                             const format_specs& specs)
+      : size((prefix >> 24) + to_unsigned(num_digits)), padding(0) {
+    if (specs.align() == align::numeric) {
+      auto width = to_unsigned(specs.width);
+      if (width > size) {
+        padding = width - size;
+        size = width;
+      }
+    } else if (specs.precision > num_digits) {
+      size = (prefix >> 24) + to_unsigned(specs.precision);
+      padding = to_unsigned(specs.precision - num_digits);
+    }
+  }
+};
+
+template <typename Char, typename OutputIt, typename T>
+FMT_CONSTEXPR FMT_INLINE auto write_int(OutputIt out, write_int_arg<T> arg,
+                                        const format_specs& specs) -> OutputIt {
+  static_assert(std::is_same<T, uint32_or_64_or_128_t<T>>::value, "");
+
+  constexpr size_t buffer_size = num_bits<T>();
+  char buffer[buffer_size];
+  if (is_constant_evaluated()) fill_n(buffer, buffer_size, '\0');
+  const char* begin = nullptr;
+  const char* end = buffer + buffer_size;
+
+  auto abs_value = arg.abs_value;
+  auto prefix = arg.prefix;
+  switch (specs.type()) {
+  default: FMT_ASSERT(false, ""); FMT_FALLTHROUGH;
+  case presentation_type::none:
+  case presentation_type::dec:
+    begin = do_format_decimal(buffer, abs_value, buffer_size);
+    break;
+  case presentation_type::hex:
+    begin = do_format_base2e(4, buffer, abs_value, buffer_size, specs.upper());
+    if (specs.alt())
+      prefix_append(prefix, unsigned(specs.upper() ? 'X' : 'x') << 8 | '0');
+    break;
+  case presentation_type::oct: {
+    begin = do_format_base2e(3, buffer, abs_value, buffer_size);
+    // Octal prefix '0' is counted as a digit, so only add it if precision
+    // is not greater than the number of digits.
+    auto num_digits = end - begin;
+    if (specs.alt() && specs.precision <= num_digits && abs_value != 0)
+      prefix_append(prefix, '0');
+    break;
+  }
+  case presentation_type::bin:
+    begin = do_format_base2e(1, buffer, abs_value, buffer_size);
+    if (specs.alt())
+      prefix_append(prefix, unsigned(specs.upper() ? 'B' : 'b') << 8 | '0');
+    break;
+  case presentation_type::chr:
+    return write_char<Char>(out, static_cast<Char>(abs_value), specs);
+  }
+
+  // Write an integer in the format
+  //   <left-padding><prefix><numeric-padding><digits><right-padding>
+  // prefix contains chars in three lower bytes and the size in the fourth byte.
+  int num_digits = static_cast<int>(end - begin);
+  // Slightly faster check for specs.width == 0 && specs.precision == -1.
+  if ((specs.width | (specs.precision + 1)) == 0) {
+    auto it = reserve(out, to_unsigned(num_digits) + (prefix >> 24));
+    for (unsigned p = prefix & 0xffffff; p != 0; p >>= 8)
+      *it++ = static_cast<Char>(p & 0xff);
+    return base_iterator(out, copy<Char>(begin, end, it));
+  }
+  auto sp = size_padding(num_digits, prefix, specs);
+  unsigned padding = sp.padding;
+  return write_padded<Char, align::right>(
+      out, specs, sp.size, [=](reserve_iterator<OutputIt> it) {
+        for (unsigned p = prefix & 0xffffff; p != 0; p >>= 8)
+          *it++ = static_cast<Char>(p & 0xff);
+        it = detail::fill_n(it, padding, static_cast<Char>('0'));
+        return copy<Char>(begin, end, it);
+      });
+}
+
+template <typename Char, typename OutputIt, typename T>
+FMT_CONSTEXPR FMT_NOINLINE auto write_int_noinline(OutputIt out,
+                                                   write_int_arg<T> arg,
+                                                   const format_specs& specs)
+    -> OutputIt {
+  return write_int<Char>(out, arg, specs);
+}
+
+template <typename Char, typename T,
+          FMT_ENABLE_IF(is_integral<T>::value &&
+                        !std::is_same<T, bool>::value &&
+                        !std::is_same<T, Char>::value)>
+FMT_CONSTEXPR FMT_INLINE auto write(basic_appender<Char> out, T value,
+                                    const format_specs& specs, locale_ref loc)
+    -> basic_appender<Char> {
+  if (specs.localized() && write_loc(out, value, specs, loc)) return out;
+  return write_int_noinline<Char>(out, make_write_int_arg(value, specs.sign()),
+                                  specs);
+}
+
+// An inlined version of write used in format string compilation.
+template <typename Char, typename OutputIt, typename T,
+          FMT_ENABLE_IF(is_integral<T>::value &&
+                        !std::is_same<T, bool>::value &&
+                        !std::is_same<T, Char>::value &&
+                        !std::is_same<OutputIt, basic_appender<Char>>::value)>
+FMT_CONSTEXPR FMT_INLINE auto write(OutputIt out, T value,
+                                    const format_specs& specs, locale_ref loc)
+    -> OutputIt {
+  if (specs.localized() && write_loc(out, value, specs, loc)) return out;
+  return write_int<Char>(out, make_write_int_arg(value, specs.sign()), specs);
+}
+
+template <typename Char, typename OutputIt>
+FMT_CONSTEXPR auto write(OutputIt out, Char value, const format_specs& specs,
+                         locale_ref loc = {}) -> OutputIt {
+  // char is formatted as unsigned char for consistency across platforms.
+  using unsigned_type =
+      conditional_t<std::is_same<Char, char>::value, unsigned char, unsigned>;
+  return check_char_specs(specs)
+             ? write_char<Char>(out, value, specs)
+             : write<Char>(out, static_cast<unsigned_type>(value), specs, loc);
+}
+
+template <typename Char, typename OutputIt,
+          FMT_ENABLE_IF(std::is_same<Char, char>::value)>
+FMT_CONSTEXPR auto write(OutputIt out, basic_string_view<Char> s,
+                         const format_specs& specs) -> OutputIt {
+  bool is_debug = specs.type() == presentation_type::debug;
+  if (specs.precision < 0 && specs.width == 0) {
+    auto&& it = reserve(out, s.size());
+    return is_debug ? write_escaped_string(it, s) : copy<char>(s, it);
+  }
+
+  size_t display_width_limit =
+      specs.precision < 0 ? SIZE_MAX : to_unsigned(specs.precision);
+  size_t display_width =
+      !is_debug || specs.precision == 0 ? 0 : 1;  // Account for opening '"'.
+  size_t size = !is_debug || specs.precision == 0 ? 0 : 1;
+  for_each_codepoint(s, [&](uint32_t cp, string_view sv) {
+    if (is_debug && needs_escape(cp)) {
+      counting_buffer<char> buf;
+      write_escaped_cp(basic_appender<char>(buf),
+                       find_escape_result<char>{sv.begin(), sv.end(), cp});
+      // We're reinterpreting bytes as display width. That's okay
+      // because write_escaped_cp() only writes ASCII characters.
+      size_t cp_width = buf.count();
+      if (display_width + cp_width <= display_width_limit) {
+        display_width += cp_width;
+        size += cp_width;
+        // If this is the end of the string, account for closing '"'.
+        if (display_width < display_width_limit && sv.end() == s.end()) {
+          ++display_width;
+          ++size;
+        }
+        return true;
+      }
+
+      size += display_width_limit - display_width;
+      display_width = display_width_limit;
+      return false;
+    }
+
+    size_t cp_width = display_width_of(cp);
+    if (cp_width + display_width <= display_width_limit) {
+      display_width += cp_width;
+      size += sv.size();
+      // If this is the end of the string, account for closing '"'.
+      if (is_debug && display_width < display_width_limit &&
+          sv.end() == s.end()) {
+        ++display_width;
+        ++size;
+      }
+      return true;
+    }
+
+    return false;
+  });
+
+  struct bounded_output_iterator {
+    reserve_iterator<OutputIt> underlying_iterator;
+    size_t bound;
+
+    FMT_CONSTEXPR auto operator*() -> bounded_output_iterator& { return *this; }
+    FMT_CONSTEXPR auto operator++() -> bounded_output_iterator& {
+      return *this;
+    }
+    FMT_CONSTEXPR auto operator++(int) -> bounded_output_iterator& {
+      return *this;
+    }
+    FMT_CONSTEXPR auto operator=(char c) -> bounded_output_iterator& {
+      if (bound > 0) {
+        *underlying_iterator++ = c;
+        --bound;
+      }
+      return *this;
+    }
+  };
+
+  return write_padded<char>(
+      out, specs, size, display_width, [=](reserve_iterator<OutputIt> it) {
+        return is_debug
+                   ? write_escaped_string(bounded_output_iterator{it, size}, s)
+                         .underlying_iterator
+                   : copy<char>(s.data(), s.data() + size, it);
+      });
+}
+
+template <typename Char, typename OutputIt,
+          FMT_ENABLE_IF(!std::is_same<Char, char>::value)>
+FMT_CONSTEXPR auto write(OutputIt out, basic_string_view<Char> s,
+                         const format_specs& specs) -> OutputIt {
+  auto data = s.data();
+  auto size = s.size();
+  if (specs.precision >= 0 && to_unsigned(specs.precision) < size)
+    size = to_unsigned(specs.precision);
+
+  bool is_debug = specs.type() == presentation_type::debug;
+  if (is_debug) {
+    auto buf = counting_buffer<Char>();
+    write_escaped_string(basic_appender<Char>(buf), s);
+    size = buf.count();
+  }
+
+  return write_padded<Char>(
+      out, specs, size, [=](reserve_iterator<OutputIt> it) {
+        return is_debug ? write_escaped_string(it, s)
+                        : copy<Char>(data, data + size, it);
+      });
+}
+
+template <typename Char, typename OutputIt>
+FMT_CONSTEXPR auto write(OutputIt out, basic_string_view<Char> s,
+                         const format_specs& specs, locale_ref) -> OutputIt {
+  return write<Char>(out, s, specs);
+}
+
+template <typename Char, typename OutputIt>
+FMT_CONSTEXPR auto write(OutputIt out, const Char* s, const format_specs& specs,
+                         locale_ref) -> OutputIt {
+  if (specs.type() == presentation_type::pointer)
+    return write_ptr<Char>(out, bit_cast<uintptr_t>(s), &specs);
+  if (!s) report_error("string pointer is null");
+  return write<Char>(out, basic_string_view<Char>(s), specs, {});
+}
+
+template <typename Char, typename OutputIt, typename T,
+          FMT_ENABLE_IF(is_integral<T>::value &&
+                        !std::is_same<T, bool>::value &&
+                        !std::is_same<T, Char>::value)>
+FMT_CONSTEXPR auto write(OutputIt out, T value) -> OutputIt {
+  auto abs_value = static_cast<uint32_or_64_or_128_t<T>>(value);
+  bool negative = is_negative(value);
+  // Don't do -abs_value since it trips unsigned-integer-overflow sanitizer.
+  if (negative) abs_value = ~abs_value + 1;
+  int num_digits = count_digits(abs_value);
+  auto size = (negative ? 1 : 0) + static_cast<size_t>(num_digits);
+  if (auto ptr = to_pointer<Char>(out, size)) {
+    if (negative) *ptr++ = static_cast<Char>('-');
+    format_decimal<Char>(ptr, abs_value, num_digits);
+    return out;
+  }
+  if (negative) *out++ = static_cast<Char>('-');
+  return format_decimal<Char>(out, abs_value, num_digits);
+}
+
+template <typename Char>
+FMT_CONSTEXPR auto parse_align(const Char* begin, const Char* end,
+                               format_specs& specs) -> const Char* {
+  FMT_ASSERT(begin != end, "");
+  auto alignment = align::none;
+  auto p = begin + code_point_length(begin);
+  if (end - p <= 0) p = begin;
+  for (;;) {
+    switch (to_ascii(*p)) {
+    case '<': alignment = align::left; break;
+    case '>': alignment = align::right; break;
+    case '^': alignment = align::center; break;
+    }
+    if (alignment != align::none) {
+      if (p != begin) {
+        auto c = *begin;
+        if (c == '}') return begin;
+        if (c == '{') {
+          report_error("invalid fill character '{'");
+          return begin;
+        }
+        specs.set_fill(basic_string_view<Char>(begin, to_unsigned(p - begin)));
+        begin = p + 1;
+      } else {
+        ++begin;
+      }
+      break;
+    } else if (p == begin) {
+      break;
+    }
+    p = begin;
+  }
+  specs.set_align(alignment);
+  return begin;
+}
+
+template <typename Char, typename OutputIt>
+FMT_CONSTEXPR20 auto write_nonfinite(OutputIt out, bool isnan,
+                                     format_specs specs, sign s) -> OutputIt {
+  auto str =
+      isnan ? (specs.upper() ? "NAN" : "nan") : (specs.upper() ? "INF" : "inf");
+  constexpr size_t str_size = 3;
+  auto size = str_size + (s != sign::none ? 1 : 0);
+  // Replace '0'-padding with space for non-finite values.
+  const bool is_zero_fill =
+      specs.fill_size() == 1 && specs.fill_unit<Char>() == '0';
+  if (is_zero_fill) specs.set_fill(' ');
+  return write_padded<Char>(out, specs, size,
+                            [=](reserve_iterator<OutputIt> it) {
+                              if (s != sign::none)
+                                *it++ = detail::getsign<Char>(s);
+                              return copy<Char>(str, str + str_size, it);
+                            });
+}
+
+// A decimal floating-point number significand * pow(10, exp).
+struct big_decimal_fp {
+  const char* significand;
+  int significand_size;
+  int exponent;
+};
+
+constexpr auto get_significand_size(const big_decimal_fp& f) -> int {
+  return f.significand_size;
+}
+template <typename T>
+inline auto get_significand_size(const dragonbox::decimal_fp<T>& f) -> int {
+  return count_digits(f.significand);
+}
+
+template <typename Char, typename OutputIt>
+constexpr auto write_significand(OutputIt out, const char* significand,
+                                 int significand_size) -> OutputIt {
+  return copy<Char>(significand, significand + significand_size, out);
+}
+template <typename Char, typename OutputIt, typename UInt>
+inline auto write_significand(OutputIt out, UInt significand,
+                              int significand_size) -> OutputIt {
+  return format_decimal<Char>(out, significand, significand_size);
+}
+template <typename Char, typename OutputIt, typename T, typename Grouping>
+FMT_CONSTEXPR20 auto write_significand(OutputIt out, T significand,
+                                       int significand_size, int exponent,
+                                       const Grouping& grouping) -> OutputIt {
+  if (!grouping.has_separator()) {
+    out = write_significand<Char>(out, significand, significand_size);
+    return detail::fill_n(out, exponent, static_cast<Char>('0'));
+  }
+  auto buffer = memory_buffer();
+  write_significand<char>(appender(buffer), significand, significand_size);
+  detail::fill_n(appender(buffer), exponent, '0');
+  return grouping.apply(out, string_view(buffer.data(), buffer.size()));
+}
+
+template <typename Char, typename UInt,
+          FMT_ENABLE_IF(std::is_integral<UInt>::value)>
+inline auto write_significand(Char* out, UInt significand, int significand_size,
+                              int integral_size, Char decimal_point) -> Char* {
+  if (!decimal_point) return format_decimal(out, significand, significand_size);
+  out += significand_size + 1;
+  Char* end = out;
+  int floating_size = significand_size - integral_size;
+  for (int i = floating_size / 2; i > 0; --i) {
+    out -= 2;
+    write2digits(out, static_cast<size_t>(significand % 100));
+    significand /= 100;
+  }
+  if (floating_size % 2 != 0) {
+    *--out = static_cast<Char>('0' + significand % 10);
+    significand /= 10;
+  }
+  *--out = decimal_point;
+  format_decimal(out - integral_size, significand, integral_size);
+  return end;
+}
+
+template <typename OutputIt, typename UInt, typename Char,
+          FMT_ENABLE_IF(!std::is_pointer<remove_cvref_t<OutputIt>>::value)>
+inline auto write_significand(OutputIt out, UInt significand,
+                              int significand_size, int integral_size,
+                              Char decimal_point) -> OutputIt {
+  // Buffer is large enough to hold digits (digits10 + 1) and a decimal point.
+  Char buffer[digits10<UInt>() + 2];
+  auto end = write_significand(buffer, significand, significand_size,
+                               integral_size, decimal_point);
+  return detail::copy_noinline<Char>(buffer, end, out);
+}
+
+template <typename OutputIt, typename Char>
+FMT_CONSTEXPR auto write_significand(OutputIt out, const char* significand,
+                                     int significand_size, int integral_size,
+                                     Char decimal_point) -> OutputIt {
+  out = detail::copy_noinline<Char>(significand, significand + integral_size,
+                                    out);
+  if (!decimal_point) return out;
+  *out++ = decimal_point;
+  return detail::copy_noinline<Char>(significand + integral_size,
+                                     significand + significand_size, out);
+}
+
+template <typename OutputIt, typename Char, typename T, typename Grouping>
+FMT_CONSTEXPR20 auto write_significand(OutputIt out, T significand,
+                                       int significand_size, int integral_size,
+                                       Char decimal_point,
+                                       const Grouping& grouping) -> OutputIt {
+  if (!grouping.has_separator()) {
+    return write_significand(out, significand, significand_size, integral_size,
+                             decimal_point);
+  }
+  auto buffer = basic_memory_buffer<Char>();
+  write_significand(basic_appender<Char>(buffer), significand, significand_size,
+                    integral_size, decimal_point);
+  grouping.apply(
+      out, basic_string_view<Char>(buffer.data(), to_unsigned(integral_size)));
+  return detail::copy_noinline<Char>(buffer.data() + integral_size,
+                                     buffer.end(), out);
+}
+
+// Numbers with exponents greater or equal to the returned value will use
+// the exponential notation.
+template <typename T> FMT_CONSTEVAL auto exp_upper() -> int {
+  return std::numeric_limits<T>::digits10 != 0
+             ? min_of(16, std::numeric_limits<T>::digits10 + 1)
+             : 16;
+}
+
+// Use the fixed notation if the exponent is in [-4, exp_upper),
+// e.g. 0.0001 instead of 1e-04. Otherwise use the exponent notation.
+constexpr auto use_fixed(int exp, int exp_upper) -> bool {
+  return exp >= -4 && exp < exp_upper;
+}
+
+template <typename Char> class fallback_digit_grouping {
+ public:
+  constexpr fallback_digit_grouping(locale_ref, bool) {}
+
+  constexpr auto has_separator() const -> bool { return false; }
+
+  constexpr auto count_separators(int) const -> int { return 0; }
+
+  template <typename Out, typename C>
+  constexpr auto apply(Out out, basic_string_view<C>) const -> Out {
+    return out;
+  }
+};
+
+template <typename Char, typename Grouping, typename OutputIt,
+          typename DecimalFP>
+FMT_CONSTEXPR20 auto write_fixed(OutputIt out, const DecimalFP& f,
+                                 int significand_size, Char decimal_point,
+                                 const format_specs& specs, sign s,
+                                 locale_ref loc = {}) -> OutputIt {
+  using iterator = reserve_iterator<OutputIt>;
+
+  int exp = f.exponent + significand_size;
+  long long size = significand_size + (s != sign::none ? 1 : 0);
+  if (f.exponent >= 0) {
+    // 1234e5 -> 123400000[.0+]
+    size += f.exponent;
+    int num_zeros = specs.precision - exp;
+    abort_fuzzing_if(num_zeros > 5000);
+    if (specs.alt()) {
+      ++size;
+      if (num_zeros <= 0 && specs.type() != presentation_type::fixed)
+        num_zeros = 0;
+      if (num_zeros > 0) size += num_zeros;
+    }
+    auto grouping = Grouping(loc, specs.localized());
+    size += grouping.count_separators(exp);
+    return write_padded<Char, align::right>(
+        out, specs, static_cast<size_t>(size), [&](iterator it) {
+          if (s != sign::none) *it++ = detail::getsign<Char>(s);
+          it = write_significand<Char>(it, f.significand, significand_size,
+                                       f.exponent, grouping);
+          if (!specs.alt()) return it;
+          *it++ = decimal_point;
+          return num_zeros > 0 ? detail::fill_n(it, num_zeros, Char('0')) : it;
+        });
+  }
+  if (exp > 0) {
+    // 1234e-2 -> 12.34[0+]
+    int num_zeros = specs.alt() ? specs.precision - significand_size : 0;
+    size += 1 + max_of(num_zeros, 0);
+    auto grouping = Grouping(loc, specs.localized());
+    size += grouping.count_separators(exp);
+    return write_padded<Char, align::right>(
+        out, specs, to_unsigned(size), [&](iterator it) {
+          if (s != sign::none) *it++ = detail::getsign<Char>(s);
+          it = write_significand(it, f.significand, significand_size, exp,
+                                 decimal_point, grouping);
+          return num_zeros > 0 ? detail::fill_n(it, num_zeros, Char('0')) : it;
+        });
+  }
+  // 1234e-6 -> 0.001234
+  int num_zeros = -exp;
+  if (significand_size == 0 && specs.precision >= 0 &&
+      specs.precision < num_zeros) {
+    num_zeros = specs.precision;
+  }
+  bool pointy = num_zeros != 0 || significand_size != 0 || specs.alt();
+  size += 1 + (pointy ? 1 : 0) + num_zeros;
+  return write_padded<Char, align::right>(
+      out, specs, to_unsigned(size), [&](iterator it) {
+        if (s != sign::none) *it++ = detail::getsign<Char>(s);
+        *it++ = Char('0');
+        if (!pointy) return it;
+        *it++ = decimal_point;
+        it = detail::fill_n(it, num_zeros, Char('0'));
+        return write_significand<Char>(it, f.significand, significand_size);
+      });
+}
+
+template <typename Char, typename Grouping, typename OutputIt,
+          typename DecimalFP>
+FMT_CONSTEXPR20 auto do_write_float(OutputIt out, const DecimalFP& f,
+                                    const format_specs& specs, sign s,
+                                    int exp_upper, locale_ref loc) -> OutputIt {
+  Char point = specs.localized() ? detail::decimal_point<Char>(loc) : Char('.');
+  int significand_size = get_significand_size(f);
+  int exp = f.exponent + significand_size - 1;
+  if (specs.type() == presentation_type::fixed ||
+      (specs.type() != presentation_type::exp &&
+       use_fixed(exp, specs.precision > 0 ? specs.precision : exp_upper))) {
+    return write_fixed<Char, Grouping>(out, f, significand_size, point, specs,
+                                       s, loc);
+  }
+
+  // Write value in the exponential format.
+  int num_zeros = 0;
+  long long size = significand_size + (s != sign::none ? 1 : 0);
+  if (specs.alt()) {
+    num_zeros = max_of(specs.precision - significand_size, 0);
+    size += num_zeros;
+  } else if (significand_size == 1) {
+    point = Char();
+  }
+  size += (point ? 1 : 0) + compute_exp_size(exp);
+  char exp_char = specs.upper() ? 'E' : 'e';
+  auto write = [=](reserve_iterator<OutputIt> it) {
+    if (s != sign::none) *it++ = detail::getsign<Char>(s);
+    // Insert a decimal point after the first digit and add an exponent.
+    it = write_significand(it, f.significand, significand_size, 1, point);
+    if (num_zeros > 0) it = detail::fill_n(it, num_zeros, Char('0'));
+    *it++ = Char(exp_char);
+    return write_exponent<Char>(exp, it);
+  };
+  auto usize = to_unsigned(size);
+  return specs.width > 0
+             ? write_padded<Char, align::right>(out, specs, usize, write)
+             : base_iterator(out, write(reserve(out, usize)));
+}
+
+template <typename Char, typename OutputIt, typename DecimalFP>
+FMT_CONSTEXPR20 auto write_float(OutputIt out, const DecimalFP& f,
+                                 const format_specs& specs, sign s,
+                                 int exp_upper, locale_ref loc) -> OutputIt {
+  if (is_constant_evaluated()) {
+    return do_write_float<Char, fallback_digit_grouping<Char>>(out, f, specs, s,
+                                                               exp_upper, loc);
+  } else {
+    return do_write_float<Char, digit_grouping<Char>>(out, f, specs, s,
+                                                      exp_upper, loc);
+  }
+}
+
+template <typename T> constexpr auto isnan(T value) -> bool {
+  return value != value;  // std::isnan doesn't support __float128.
+}
+
+template <typename T, typename Enable = void>
+struct has_isfinite : std::false_type {};
+
+template <typename T>
+struct has_isfinite<T, enable_if_t<sizeof(std::isfinite(T())) != 0>>
+    : std::true_type {};
+
+template <typename T,
+          FMT_ENABLE_IF(is_floating_point<T>::value&& has_isfinite<T>::value)>
+FMT_CONSTEXPR20 auto isfinite(T value) -> bool {
+  constexpr T inf = T(std::numeric_limits<double>::infinity());
+  if (is_constant_evaluated())
+    return !detail::isnan(value) && value < inf && value > -inf;
+  return std::isfinite(value);
+}
+template <typename T, FMT_ENABLE_IF(!has_isfinite<T>::value)>
+FMT_CONSTEXPR auto isfinite(T value) -> bool {
+  T inf = T(std::numeric_limits<double>::infinity());
+  // std::isfinite doesn't support __float128.
+  return !detail::isnan(value) && value < inf && value > -inf;
+}
+
+template <typename T, FMT_ENABLE_IF(is_floating_point<T>::value)>
+FMT_INLINE FMT_CONSTEXPR auto signbit(T value) -> bool {
+  if (is_constant_evaluated()) {
+#ifdef __cpp_if_constexpr
+    if constexpr (std::numeric_limits<double>::is_iec559) {
+      auto bits = detail::bit_cast<uint64_t>(static_cast<double>(value));
+      return (bits >> (num_bits<uint64_t>() - 1)) != 0;
+    }
+#endif
+  }
+  return std::signbit(static_cast<double>(value));
+}
+
+inline FMT_CONSTEXPR20 void adjust_precision(int& precision, int exp10) {
+  // Adjust fixed precision by exponent because it is relative to decimal
+  // point.
+  if (exp10 > 0 && precision > max_value<int>() - exp10)
+    FMT_THROW(format_error("number is too big"));
+  precision += exp10;
+}
+
+class bigint {
+ private:
+  // A bigint is a number in the form bigit_[N - 1] ... bigit_[0] * 32^exp_.
+  using bigit = uint32_t;  // A big digit.
+  using double_bigit = uint64_t;
+  enum { bigit_bits = num_bits<bigit>() };
+  enum { bigits_capacity = 32 };
+  basic_memory_buffer<bigit, bigits_capacity> bigits_;
+  int exp_;
+
+  friend struct formatter<bigint>;
+
+  FMT_CONSTEXPR auto get_bigit(int i) const -> bigit {
+    return i >= exp_ && i < num_bigits() ? bigits_[i - exp_] : 0;
+  }
+
+  FMT_CONSTEXPR void subtract_bigits(int index, bigit other, bigit& borrow) {
+    auto result = double_bigit(bigits_[index]) - other - borrow;
+    bigits_[index] = static_cast<bigit>(result);
+    borrow = static_cast<bigit>(result >> (bigit_bits * 2 - 1));
+  }
+
+  FMT_CONSTEXPR void remove_leading_zeros() {
+    int num_bigits = static_cast<int>(bigits_.size()) - 1;
+    while (num_bigits > 0 && bigits_[num_bigits] == 0) --num_bigits;
+    bigits_.resize(to_unsigned(num_bigits + 1));
+  }
+
+  // Computes *this -= other assuming aligned bigints and *this >= other.
+  FMT_CONSTEXPR void subtract_aligned(const bigint& other) {
+    FMT_ASSERT(other.exp_ >= exp_, "unaligned bigints");
+    FMT_ASSERT(compare(*this, other) >= 0, "");
+    bigit borrow = 0;
+    int i = other.exp_ - exp_;
+    for (size_t j = 0, n = other.bigits_.size(); j != n; ++i, ++j)
+      subtract_bigits(i, other.bigits_[j], borrow);
+    if (borrow != 0) subtract_bigits(i, 0, borrow);
+    FMT_ASSERT(borrow == 0, "");
+    remove_leading_zeros();
+  }
+
+  FMT_CONSTEXPR void multiply(uint32_t value) {
+    bigit carry = 0;
+    const double_bigit wide_value = value;
+    for (size_t i = 0, n = bigits_.size(); i < n; ++i) {
+      double_bigit result = bigits_[i] * wide_value + carry;
+      bigits_[i] = static_cast<bigit>(result);
+      carry = static_cast<bigit>(result >> bigit_bits);
+    }
+    if (carry != 0) bigits_.push_back(carry);
+  }
+
+  template <typename UInt, FMT_ENABLE_IF(std::is_same<UInt, uint64_t>::value ||
+                                         std::is_same<UInt, uint128_t>::value)>
+  FMT_CONSTEXPR void multiply(UInt value) {
+    using half_uint =
+        conditional_t<std::is_same<UInt, uint128_t>::value, uint64_t, uint32_t>;
+    const int shift = num_bits<half_uint>() - bigit_bits;
+    const UInt lower = static_cast<half_uint>(value);
+    const UInt upper = value >> num_bits<half_uint>();
+    UInt carry = 0;
+    for (size_t i = 0, n = bigits_.size(); i < n; ++i) {
+      UInt result = lower * bigits_[i] + static_cast<bigit>(carry);
+      carry = (upper * bigits_[i] << shift) + (result >> bigit_bits) +
+              (carry >> bigit_bits);
+      bigits_[i] = static_cast<bigit>(result);
+    }
+    while (carry != 0) {
+      bigits_.push_back(static_cast<bigit>(carry));
+      carry >>= bigit_bits;
+    }
+  }
+
+  template <typename UInt, FMT_ENABLE_IF(std::is_same<UInt, uint64_t>::value ||
+                                         std::is_same<UInt, uint128_t>::value)>
+  FMT_CONSTEXPR void assign(UInt n) {
+    size_t num_bigits = 0;
+    do {
+      bigits_[num_bigits++] = static_cast<bigit>(n);
+      n >>= bigit_bits;
+    } while (n != 0);
+    bigits_.resize(num_bigits);
+    exp_ = 0;
+  }
+
+ public:
+  FMT_CONSTEXPR bigint() : exp_(0) {}
+  explicit bigint(uint64_t n) { assign(n); }
+
+  bigint(const bigint&) = delete;
+  void operator=(const bigint&) = delete;
+
+  FMT_CONSTEXPR void assign(const bigint& other) {
+    auto size = other.bigits_.size();
+    bigits_.resize(size);
+    auto data = other.bigits_.data();
+    copy<bigit>(data, data + size, bigits_.data());
+    exp_ = other.exp_;
+  }
+
+  template <typename Int> FMT_CONSTEXPR void operator=(Int n) {
+    FMT_ASSERT(n > 0, "");
+    assign(uint64_or_128_t<Int>(n));
+  }
+
+  FMT_CONSTEXPR auto num_bigits() const -> int {
+    return static_cast<int>(bigits_.size()) + exp_;
+  }
+
+  FMT_CONSTEXPR auto operator<<=(int shift) -> bigint& {
+    FMT_ASSERT(shift >= 0, "");
+    exp_ += shift / bigit_bits;
+    shift %= bigit_bits;
+    if (shift == 0) return *this;
+    bigit carry = 0;
+    for (size_t i = 0, n = bigits_.size(); i < n; ++i) {
+      bigit c = bigits_[i] >> (bigit_bits - shift);
+      bigits_[i] = (bigits_[i] << shift) + carry;
+      carry = c;
+    }
+    if (carry != 0) bigits_.push_back(carry);
+    return *this;
+  }
+
+  template <typename Int> FMT_CONSTEXPR auto operator*=(Int value) -> bigint& {
+    FMT_ASSERT(value > 0, "");
+    multiply(uint32_or_64_or_128_t<Int>(value));
+    return *this;
+  }
+
+  friend FMT_CONSTEXPR auto compare(const bigint& b1, const bigint& b2) -> int {
+    int num_bigits1 = b1.num_bigits(), num_bigits2 = b2.num_bigits();
+    if (num_bigits1 != num_bigits2) return num_bigits1 > num_bigits2 ? 1 : -1;
+    int i = static_cast<int>(b1.bigits_.size()) - 1;
+    int j = static_cast<int>(b2.bigits_.size()) - 1;
+    int end = i - j;
+    if (end < 0) end = 0;
+    for (; i >= end; --i, --j) {
+      bigit b1_bigit = b1.bigits_[i], b2_bigit = b2.bigits_[j];
+      if (b1_bigit != b2_bigit) return b1_bigit > b2_bigit ? 1 : -1;
+    }
+    if (i != j) return i > j ? 1 : -1;
+    return 0;
+  }
+
+  // Returns compare(lhs1 + lhs2, rhs).
+  friend FMT_CONSTEXPR auto add_compare(const bigint& lhs1, const bigint& lhs2,
+                                        const bigint& rhs) -> int {
+    int max_lhs_bigits = max_of(lhs1.num_bigits(), lhs2.num_bigits());
+    int num_rhs_bigits = rhs.num_bigits();
+    if (max_lhs_bigits + 1 < num_rhs_bigits) return -1;
+    if (max_lhs_bigits > num_rhs_bigits) return 1;
+    double_bigit borrow = 0;
+    int min_exp = min_of(min_of(lhs1.exp_, lhs2.exp_), rhs.exp_);
+    for (int i = num_rhs_bigits - 1; i >= min_exp; --i) {
+      double_bigit sum = double_bigit(lhs1.get_bigit(i)) + lhs2.get_bigit(i);
+      bigit rhs_bigit = rhs.get_bigit(i);
+      if (sum > rhs_bigit + borrow) return 1;
+      borrow = rhs_bigit + borrow - sum;
+      if (borrow > 1) return -1;
+      borrow <<= bigit_bits;
+    }
+    return borrow != 0 ? -1 : 0;
+  }
+
+  // Assigns pow(10, exp) to this bigint.
+  FMT_CONSTEXPR20 void assign_pow10(int exp) {
+    FMT_ASSERT(exp >= 0, "");
+    if (exp == 0) return *this = 1;
+    int bitmask = 1 << (num_bits<unsigned>() -
+                        countl_zero(static_cast<uint32_t>(exp)) - 1);
+    // pow(10, exp) = pow(5, exp) * pow(2, exp). First compute pow(5, exp) by
+    // repeated squaring and multiplication.
+    *this = 5;
+    bitmask >>= 1;
+    while (bitmask != 0) {
+      square();
+      if ((exp & bitmask) != 0) *this *= 5;
+      bitmask >>= 1;
+    }
+    *this <<= exp;  // Multiply by pow(2, exp) by shifting.
+  }
+
+  FMT_CONSTEXPR20 void square() {
+    int num_bigits = static_cast<int>(bigits_.size());
+    int num_result_bigits = 2 * num_bigits;
+    basic_memory_buffer<bigit, bigits_capacity> n(std::move(bigits_));
+    bigits_.resize(to_unsigned(num_result_bigits));
+    auto sum = uint128_t();
+    for (int bigit_index = 0; bigit_index < num_bigits; ++bigit_index) {
+      // Compute bigit at position bigit_index of the result by adding
+      // cross-product terms n[i] * n[j] such that i + j == bigit_index.
+      for (int i = 0, j = bigit_index; j >= 0; ++i, --j) {
+        // Most terms are multiplied twice which can be optimized in the future.
+        sum += double_bigit(n[i]) * n[j];
+      }
+      bigits_[bigit_index] = static_cast<bigit>(sum);
+      sum >>= num_bits<bigit>();  // Compute the carry.
+    }
+    // Do the same for the top half.
+    for (int bigit_index = num_bigits; bigit_index < num_result_bigits;
+         ++bigit_index) {
+      for (int j = num_bigits - 1, i = bigit_index - j; i < num_bigits;)
+        sum += double_bigit(n[i++]) * n[j--];
+      bigits_[bigit_index] = static_cast<bigit>(sum);
+      sum >>= num_bits<bigit>();
+    }
+    remove_leading_zeros();
+    exp_ *= 2;
+  }
+
+  // If this bigint has a bigger exponent than other, adds trailing zero to make
+  // exponents equal. This simplifies some operations such as subtraction.
+  FMT_CONSTEXPR void align(const bigint& other) {
+    int exp_difference = exp_ - other.exp_;
+    if (exp_difference <= 0) return;
+    int num_bigits = static_cast<int>(bigits_.size());
+    bigits_.resize(to_unsigned(num_bigits + exp_difference));
+    for (int i = num_bigits - 1, j = i + exp_difference; i >= 0; --i, --j)
+      bigits_[j] = bigits_[i];
+    fill_n(bigits_.data(), to_unsigned(exp_difference), 0U);
+    exp_ -= exp_difference;
+  }
+
+  // Divides this bignum by divisor, assigning the remainder to this and
+  // returning the quotient.
+  FMT_CONSTEXPR auto divmod_assign(const bigint& divisor) -> int {
+    FMT_ASSERT(this != &divisor, "");
+    if (compare(*this, divisor) < 0) return 0;
+    FMT_ASSERT(divisor.bigits_[divisor.bigits_.size() - 1u] != 0, "");
+    align(divisor);
+    int quotient = 0;
+    do {
+      subtract_aligned(divisor);
+      ++quotient;
+    } while (compare(*this, divisor) >= 0);
+    return quotient;
+  }
+};
+
+// format_dragon flags.
+enum dragon {
+  predecessor_closer = 1,
+  fixup = 2,  // Run fixup to correct exp10 which can be off by one.
+  fixed = 4,
+};
+
+// Formats a floating-point number using a variation of the Fixed-Precision
+// Positive Floating-Point Printout ((FPP)^2) algorithm by Steele & White:
+// https://fmt.dev/papers/p372-steele.pdf.
+FMT_CONSTEXPR20 inline void format_dragon(basic_fp<uint128_t> value,
+                                          unsigned flags, int num_digits,
+                                          buffer<char>& buf, int& exp10) {
+  bigint numerator;    // 2 * R in (FPP)^2.
+  bigint denominator;  // 2 * S in (FPP)^2.
+  // lower and upper are differences between value and corresponding boundaries.
+  bigint lower;             // (M^- in (FPP)^2).
+  bigint upper_store;       // upper's value if different from lower.
+  bigint* upper = nullptr;  // (M^+ in (FPP)^2).
+  // Shift numerator and denominator by an extra bit or two (if lower boundary
+  // is closer) to make lower and upper integers. This eliminates multiplication
+  // by 2 during later computations.
+  bool is_predecessor_closer = (flags & dragon::predecessor_closer) != 0;
+  int shift = is_predecessor_closer ? 2 : 1;
+  if (value.e >= 0) {
+    numerator = value.f;
+    numerator <<= value.e + shift;
+    lower = 1;
+    lower <<= value.e;
+    if (is_predecessor_closer) {
+      upper_store = 1;
+      upper_store <<= value.e + 1;
+      upper = &upper_store;
+    }
+    denominator.assign_pow10(exp10);
+    denominator <<= shift;
+  } else if (exp10 < 0) {
+    numerator.assign_pow10(-exp10);
+    lower.assign(numerator);
+    if (is_predecessor_closer) {
+      upper_store.assign(numerator);
+      upper_store <<= 1;
+      upper = &upper_store;
+    }
+    numerator *= value.f;
+    numerator <<= shift;
+    denominator = 1;
+    denominator <<= shift - value.e;
+  } else {
+    numerator = value.f;
+    numerator <<= shift;
+    denominator.assign_pow10(exp10);
+    denominator <<= shift - value.e;
+    lower = 1;
+    if (is_predecessor_closer) {
+      upper_store = 1ULL << 1;
+      upper = &upper_store;
+    }
+  }
+  int even = static_cast<int>((value.f & 1) == 0);
+  if (!upper) upper = &lower;
+  bool shortest = num_digits < 0;
+  if ((flags & dragon::fixup) != 0) {
+    if (add_compare(numerator, *upper, denominator) + even <= 0) {
+      --exp10;
+      numerator *= 10;
+      if (num_digits < 0) {
+        lower *= 10;
+        if (upper != &lower) *upper *= 10;
+      }
+    }
+    if ((flags & dragon::fixed) != 0) adjust_precision(num_digits, exp10 + 1);
+  }
+  // Invariant: value == (numerator / denominator) * pow(10, exp10).
+  if (shortest) {
+    // Generate the shortest representation.
+    num_digits = 0;
+    char* data = buf.data();
+    for (;;) {
+      int digit = numerator.divmod_assign(denominator);
+      bool low = compare(numerator, lower) - even < 0;  // numerator <[=] lower.
+      // numerator + upper >[=] pow10:
+      bool high = add_compare(numerator, *upper, denominator) + even > 0;
+      data[num_digits++] = static_cast<char>('0' + digit);
+      if (low || high) {
+        if (!low) {
+          ++data[num_digits - 1];
+        } else if (high) {
+          int result = add_compare(numerator, numerator, denominator);
+          // Round half to even.
+          if (result > 0 || (result == 0 && (digit % 2) != 0))
+            ++data[num_digits - 1];
+        }
+        buf.try_resize(to_unsigned(num_digits));
+        exp10 -= num_digits - 1;
+        return;
+      }
+      numerator *= 10;
+      lower *= 10;
+      if (upper != &lower) *upper *= 10;
+    }
+  }
+  // Generate the given number of digits.
+  exp10 -= num_digits - 1;
+  if (num_digits <= 0) {
+    auto digit = '0';
+    if (num_digits == 0) {
+      denominator *= 10;
+      digit = add_compare(numerator, numerator, denominator) > 0 ? '1' : '0';
+    }
+    buf.push_back(digit);
+    return;
+  }
+  buf.try_resize(to_unsigned(num_digits));
+  for (int i = 0; i < num_digits - 1; ++i) {
+    int digit = numerator.divmod_assign(denominator);
+    buf[i] = static_cast<char>('0' + digit);
+    numerator *= 10;
+  }
+  int digit = numerator.divmod_assign(denominator);
+  auto result = add_compare(numerator, numerator, denominator);
+  if (result > 0 || (result == 0 && (digit % 2) != 0)) {
+    if (digit == 9) {
+      const auto overflow = '0' + 10;
+      buf[num_digits - 1] = overflow;
+      // Propagate the carry.
+      for (int i = num_digits - 1; i > 0 && buf[i] == overflow; --i) {
+        buf[i] = '0';
+        ++buf[i - 1];
+      }
+      if (buf[0] == overflow) {
+        buf[0] = '1';
+        if ((flags & dragon::fixed) != 0)
+          buf.push_back('0');
+        else
+          ++exp10;
+      }
+      return;
+    }
+    ++digit;
+  }
+  buf[num_digits - 1] = static_cast<char>('0' + digit);
+}
+
+// Formats a floating-point number using the hexfloat format.
+template <typename Float, FMT_ENABLE_IF(!is_double_double<Float>::value)>
+FMT_CONSTEXPR20 void format_hexfloat(Float value, format_specs specs,
+                                     buffer<char>& buf) {
+  // float is passed as double to reduce the number of instantiations and to
+  // simplify implementation.
+  static_assert(!std::is_same<Float, float>::value, "");
+
+  using info = dragonbox::float_info<Float>;
+
+  // Assume Float is in the format [sign][exponent][significand].
+  using carrier_uint = typename info::carrier_uint;
+
+  const auto num_float_significand_bits = detail::num_significand_bits<Float>();
+
+  basic_fp<carrier_uint> f(value);
+  f.e += num_float_significand_bits;
+  if (!has_implicit_bit<Float>()) --f.e;
+
+  const auto num_fraction_bits =
+      num_float_significand_bits + (has_implicit_bit<Float>() ? 1 : 0);
+  const auto num_xdigits = (num_fraction_bits + 3) / 4;
+
+  const auto leading_shift = ((num_xdigits - 1) * 4);
+  const auto leading_mask = carrier_uint(0xF) << leading_shift;
+  const auto leading_xdigit =
+      static_cast<uint32_t>((f.f & leading_mask) >> leading_shift);
+  if (leading_xdigit > 1) f.e -= (32 - countl_zero(leading_xdigit) - 1);
+
+  int print_xdigits = num_xdigits - 1;
+  if (specs.precision >= 0 && print_xdigits > specs.precision) {
+    const int shift = ((print_xdigits - specs.precision - 1) * 4);
+    const auto mask = carrier_uint(0xF) << shift;
+    const auto v = static_cast<uint32_t>((f.f & mask) >> shift);
+
+    if (v >= 8) {
+      const auto inc = carrier_uint(1) << (shift + 4);
+      f.f += inc;
+      f.f &= ~(inc - 1);
+    }
+
+    // Check long double overflow
+    if (!has_implicit_bit<Float>()) {
+      const auto implicit_bit = carrier_uint(1) << num_float_significand_bits;
+      if ((f.f & implicit_bit) == implicit_bit) {
+        f.f >>= 4;
+        f.e += 4;
+      }
+    }
+
+    print_xdigits = specs.precision;
+  }
+
+  char xdigits[num_bits<carrier_uint>() / 4];
+  detail::fill_n(xdigits, sizeof(xdigits), '0');
+  format_base2e(4, xdigits, f.f, num_xdigits, specs.upper());
+
+  // Remove zero tail
+  while (print_xdigits > 0 && xdigits[print_xdigits] == '0') --print_xdigits;
+
+  buf.push_back('0');
+  buf.push_back(specs.upper() ? 'X' : 'x');
+  buf.push_back(xdigits[0]);
+  if (specs.alt() || print_xdigits > 0 || print_xdigits < specs.precision)
+    buf.push_back('.');
+  buf.append(xdigits + 1, xdigits + 1 + print_xdigits);
+  for (; print_xdigits < specs.precision; ++print_xdigits) buf.push_back('0');
+
+  buf.push_back(specs.upper() ? 'P' : 'p');
+
+  uint32_t abs_e;
+  if (f.e < 0) {
+    buf.push_back('-');
+    abs_e = static_cast<uint32_t>(-f.e);
+  } else {
+    buf.push_back('+');
+    abs_e = static_cast<uint32_t>(f.e);
+  }
+  format_decimal<char>(appender(buf), abs_e, detail::count_digits(abs_e));
+}
+
+template <typename Float, FMT_ENABLE_IF(is_double_double<Float>::value)>
+FMT_CONSTEXPR20 void format_hexfloat(Float value, format_specs specs,
+                                     buffer<char>& buf) {
+  format_hexfloat(static_cast<double>(value), specs, buf);
+}
+
+constexpr auto fractional_part_rounding_thresholds(int index) -> uint32_t {
+  // For checking rounding thresholds.
+  // The kth entry is chosen to be the smallest integer such that the
+  // upper 32-bits of 10^(k+1) times it is strictly bigger than 5 * 10^k.
+  // It is equal to ceil(2^31 + 2^32/10^(k + 1)).
+  // These are stored in a string literal because we cannot have static arrays
+  // in constexpr functions and non-static ones are poorly optimized.
+  return U"\x9999999a\x828f5c29\x80418938\x80068db9\x8000a7c6\x800010c7"
+         U"\x800001ae\x8000002b"[index];
+}
+
+template <typename Float>
+FMT_CONSTEXPR20 auto format_float(Float value, int precision,
+                                  const format_specs& specs, bool binary32,
+                                  buffer<char>& buf) -> int {
+  // float is passed as double to reduce the number of instantiations.
+  static_assert(!std::is_same<Float, float>::value, "");
+  auto converted_value = convert_float(value);
+
+  const bool fixed = specs.type() == presentation_type::fixed;
+  if (value == 0) {
+    if (precision <= 0 || !fixed) {
+      buf.push_back('0');
+      return 0;
+    }
+    buf.try_resize(to_unsigned(precision));
+    fill_n(buf.data(), precision, '0');
+    return -precision;
+  }
+
+  int exp = 0;
+  bool use_dragon = true;
+  unsigned dragon_flags = 0;
+  if (!is_fast_float<Float>() || is_constant_evaluated()) {
+    const auto inv_log2_10 = 0.3010299956639812;  // 1 / log2(10)
+    using info = dragonbox::float_info<decltype(converted_value)>;
+    const auto f = basic_fp<typename info::carrier_uint>(converted_value);
+    // Compute exp, an approximate power of 10, such that
+    //   10^(exp - 1) <= value < 10^exp or 10^exp <= value < 10^(exp + 1).
+    // This is based on log10(value) == log2(value) / log2(10) and approximation
+    // of log2(value) by e + num_fraction_bits idea from double-conversion.
+    auto e = (f.e + count_digits<1>(f.f) - 1) * inv_log2_10 - 1e-10;
+    exp = static_cast<int>(e);
+    if (e > exp) ++exp;  // Compute ceil.
+    dragon_flags = dragon::fixup;
+  } else {
+    // Extract significand bits and exponent bits.
+    using info = dragonbox::float_info<double>;
+    auto br = bit_cast<uint64_t>(static_cast<double>(value));
+
+    const uint64_t significand_mask =
+        (static_cast<uint64_t>(1) << num_significand_bits<double>()) - 1;
+    uint64_t significand = (br & significand_mask);
+    int exponent = static_cast<int>((br & exponent_mask<double>()) >>
+                                    num_significand_bits<double>());
+
+    if (exponent != 0) {  // Check if normal.
+      exponent -= exponent_bias<double>() + num_significand_bits<double>();
+      significand |=
+          (static_cast<uint64_t>(1) << num_significand_bits<double>());
+      significand <<= 1;
+    } else {
+      // Normalize subnormal inputs.
+      FMT_ASSERT(significand != 0, "zeros should not appear here");
+      int shift = countl_zero(significand);
+      FMT_ASSERT(shift >= num_bits<uint64_t>() - num_significand_bits<double>(),
+                 "");
+      shift -= (num_bits<uint64_t>() - num_significand_bits<double>() - 2);
+      exponent = (std::numeric_limits<double>::min_exponent -
+                  num_significand_bits<double>()) -
+                 shift;
+      significand <<= shift;
+    }
+
+    // Compute the first several nonzero decimal significand digits.
+    // We call the number we get the first segment.
+    const int k = info::kappa - dragonbox::floor_log10_pow2(exponent);
+    exp = -k;
+    const int beta = exponent + dragonbox::floor_log2_pow10(k);
+    uint64_t first_segment;
+    bool has_more_segments;
+    int digits_in_the_first_segment;
+    {
+      const auto r = dragonbox::umul192_upper128(
+          significand << beta, dragonbox::get_cached_power(k));
+      first_segment = r.high();
+      has_more_segments = r.low() != 0;
+
+      // The first segment can have 18 ~ 19 digits.
+      if (first_segment >= 1000000000000000000ULL) {
+        digits_in_the_first_segment = 19;
+      } else {
+        // When it is of 18-digits, we align it to 19-digits by adding a bogus
+        // zero at the end.
+        digits_in_the_first_segment = 18;
+        first_segment *= 10;
+      }
+    }
+
+    // Compute the actual number of decimal digits to print.
+    if (fixed) adjust_precision(precision, exp + digits_in_the_first_segment);
+
+    // Use Dragon4 only when there might be not enough digits in the first
+    // segment.
+    if (digits_in_the_first_segment > precision) {
+      use_dragon = false;
+
+      if (precision <= 0) {
+        exp += digits_in_the_first_segment;
+
+        if (precision < 0) {
+          // Nothing to do, since all we have are just leading zeros.
+          buf.try_resize(0);
+        } else {
+          // We may need to round-up.
+          buf.try_resize(1);
+          if ((first_segment | static_cast<uint64_t>(has_more_segments)) >
+              5000000000000000000ULL) {
+            buf[0] = '1';
+          } else {
+            buf[0] = '0';
+          }
+        }
+      }  // precision <= 0
+      else {
+        exp += digits_in_the_first_segment - precision;
+
+        // When precision > 0, we divide the first segment into three
+        // subsegments, each with 9, 9, and 0 ~ 1 digits so that each fits
+        // in 32-bits which usually allows faster calculation than in
+        // 64-bits. Since some compiler (e.g. MSVC) doesn't know how to optimize
+        // division-by-constant for large 64-bit divisors, we do it here
+        // manually. The magic number 7922816251426433760 below is equal to
+        // ceil(2^(64+32) / 10^10).
+        const uint32_t first_subsegment = static_cast<uint32_t>(
+            dragonbox::umul128_upper64(first_segment, 7922816251426433760ULL) >>
+            32);
+        const uint64_t second_third_subsegments =
+            first_segment - first_subsegment * 10000000000ULL;
+
+        uint64_t prod;
+        uint32_t digits;
+        bool should_round_up;
+        int number_of_digits_to_print = min_of(precision, 9);
+
+        // Print a 9-digits subsegment, either the first or the second.
+        auto print_subsegment = [&](uint32_t subsegment, char* buffer) {
+          int number_of_digits_printed = 0;
+
+          // If we want to print an odd number of digits from the subsegment,
+          if ((number_of_digits_to_print & 1) != 0) {
+            // Convert to 64-bit fixed-point fractional form with 1-digit
+            // integer part. The magic number 720575941 is a good enough
+            // approximation of 2^(32 + 24) / 10^8; see
+            // https://jk-jeon.github.io/posts/2022/12/fixed-precision-formatting/#fixed-length-case
+            // for details.
+            prod = ((subsegment * static_cast<uint64_t>(720575941)) >> 24) + 1;
+            digits = static_cast<uint32_t>(prod >> 32);
+            *buffer = static_cast<char>('0' + digits);
+            number_of_digits_printed++;
+          }
+          // If we want to print an even number of digits from the
+          // first_subsegment,
+          else {
+            // Convert to 64-bit fixed-point fractional form with 2-digits
+            // integer part. The magic number 450359963 is a good enough
+            // approximation of 2^(32 + 20) / 10^7; see
+            // https://jk-jeon.github.io/posts/2022/12/fixed-precision-formatting/#fixed-length-case
+            // for details.
+            prod = ((subsegment * static_cast<uint64_t>(450359963)) >> 20) + 1;
+            digits = static_cast<uint32_t>(prod >> 32);
+            write2digits(buffer, digits);
+            number_of_digits_printed += 2;
+          }
+
+          // Print all digit pairs.
+          while (number_of_digits_printed < number_of_digits_to_print) {
+            prod = static_cast<uint32_t>(prod) * static_cast<uint64_t>(100);
+            digits = static_cast<uint32_t>(prod >> 32);
+            write2digits(buffer + number_of_digits_printed, digits);
+            number_of_digits_printed += 2;
+          }
+        };
+
+        // Print first subsegment.
+        print_subsegment(first_subsegment, buf.data());
+
+        // Perform rounding if the first subsegment is the last subsegment to
+        // print.
+        if (precision <= 9) {
+          // Rounding inside the subsegment.
+          // We round-up if:
+          //  - either the fractional part is strictly larger than 1/2, or
+          //  - the fractional part is exactly 1/2 and the last digit is odd.
+          // We rely on the following observations:
+          //  - If fractional_part >= threshold, then the fractional part is
+          //    strictly larger than 1/2.
+          //  - If the MSB of fractional_part is set, then the fractional part
+          //    must be at least 1/2.
+          //  - When the MSB of fractional_part is set, either
+          //    second_third_subsegments being nonzero or has_more_segments
+          //    being true means there are further digits not printed, so the
+          //    fractional part is strictly larger than 1/2.
+          if (precision < 9) {
+            uint32_t fractional_part = static_cast<uint32_t>(prod);
+            should_round_up =
+                fractional_part >= fractional_part_rounding_thresholds(
+                                       8 - number_of_digits_to_print) ||
+                ((fractional_part >> 31) &
+                 ((digits & 1) | (second_third_subsegments != 0) |
+                  has_more_segments)) != 0;
+          }
+          // Rounding at the subsegment boundary.
+          // In this case, the fractional part is at least 1/2 if and only if
+          // second_third_subsegments >= 5000000000ULL, and is strictly larger
+          // than 1/2 if we further have either second_third_subsegments >
+          // 5000000000ULL or has_more_segments == true.
+          else {
+            should_round_up = second_third_subsegments > 5000000000ULL ||
+                              (second_third_subsegments == 5000000000ULL &&
+                               ((digits & 1) != 0 || has_more_segments));
+          }
+        }
+        // Otherwise, print the second subsegment.
+        else {
+          // Compilers are not aware of how to leverage the maximum value of
+          // second_third_subsegments to find out a better magic number which
+          // allows us to eliminate an additional shift. 1844674407370955162 =
+          // ceil(2^64/10) < ceil(2^64*(10^9/(10^10 - 1))).
+          const uint32_t second_subsegment =
+              static_cast<uint32_t>(dragonbox::umul128_upper64(
+                  second_third_subsegments, 1844674407370955162ULL));
+          const uint32_t third_subsegment =
+              static_cast<uint32_t>(second_third_subsegments) -
+              second_subsegment * 10;
+
+          number_of_digits_to_print = precision - 9;
+          print_subsegment(second_subsegment, buf.data() + 9);
+
+          // Rounding inside the subsegment.
+          if (precision < 18) {
+            // The condition third_subsegment != 0 implies that the segment was
+            // of 19 digits, so in this case the third segment should be
+            // consisting of a genuine digit from the input.
+            uint32_t fractional_part = static_cast<uint32_t>(prod);
+            should_round_up =
+                fractional_part >= fractional_part_rounding_thresholds(
+                                       8 - number_of_digits_to_print) ||
+                ((fractional_part >> 31) &
+                 ((digits & 1) | (third_subsegment != 0) |
+                  has_more_segments)) != 0;
+          }
+          // Rounding at the subsegment boundary.
+          else {
+            // In this case, the segment must be of 19 digits, thus
+            // the third subsegment should be consisting of a genuine digit from
+            // the input.
+            should_round_up = third_subsegment > 5 ||
+                              (third_subsegment == 5 &&
+                               ((digits & 1) != 0 || has_more_segments));
+          }
+        }
+
+        // Round-up if necessary.
+        if (should_round_up) {
+          ++buf[precision - 1];
+          for (int i = precision - 1; i > 0 && buf[i] > '9'; --i) {
+            buf[i] = '0';
+            ++buf[i - 1];
+          }
+          if (buf[0] > '9') {
+            buf[0] = '1';
+            if (fixed)
+              buf[precision++] = '0';
+            else
+              ++exp;
+          }
+        }
+        buf.try_resize(to_unsigned(precision));
+      }
+    }  // if (digits_in_the_first_segment > precision)
+    else {
+      // Adjust the exponent for its use in Dragon4.
+      exp += digits_in_the_first_segment - 1;
+    }
+  }
+  if (use_dragon) {
+    auto f = basic_fp<uint128_t>();
+    bool is_predecessor_closer = binary32 ? f.assign(static_cast<float>(value))
+                                          : f.assign(converted_value);
+    if (is_predecessor_closer) dragon_flags |= dragon::predecessor_closer;
+    if (fixed) dragon_flags |= dragon::fixed;
+    // Limit precision to the maximum possible number of significant digits in
+    // an IEEE754 double because we don't need to generate zeros.
+    const int max_double_digits = 767;
+    if (precision > max_double_digits) precision = max_double_digits;
+    format_dragon(f, dragon_flags, precision, buf, exp);
+  }
+  if (!fixed && !specs.alt()) {
+    // Remove trailing zeros.
+    auto num_digits = buf.size();
+    while (num_digits > 0 && buf[num_digits - 1] == '0') {
+      --num_digits;
+      ++exp;
+    }
+    buf.try_resize(num_digits);
+  }
+  return exp;
+}
+
+template <typename Char, typename OutputIt, typename T,
+          FMT_ENABLE_IF(is_floating_point<T>::value)>
+FMT_CONSTEXPR20 auto write(OutputIt out, T value, format_specs specs,
+                           locale_ref loc = {}) -> OutputIt {
+  if (specs.localized() && write_loc(out, value, specs, loc)) return out;
+
+  // Use signbit because value < 0 is false for NaN.
+  sign s = detail::signbit(value) ? sign::minus : specs.sign();
+
+  if (!detail::isfinite(value))
+    return write_nonfinite<Char>(out, detail::isnan(value), specs, s);
+
+  if (specs.align() == align::numeric && s != sign::none) {
+    *out++ = detail::getsign<Char>(s);
+    s = sign::none;
+    if (specs.width != 0) --specs.width;
+  }
+
+  const int exp_upper = detail::exp_upper<T>();
+  int precision = specs.precision;
+  if (precision < 0) {
+    if (specs.type() != presentation_type::none) {
+      precision = 6;
+    } else if (is_fast_float<T>::value && !is_constant_evaluated()) {
+      // Use Dragonbox for the shortest format.
+      auto dec = dragonbox::to_decimal(static_cast<fast_float_t<T>>(value));
+      return write_float<Char>(out, dec, specs, s, exp_upper, loc);
+    }
+  }
+
+  memory_buffer buffer;
+  if (specs.type() == presentation_type::hexfloat) {
+    if (s != sign::none) buffer.push_back(detail::getsign<char>(s));
+    format_hexfloat(convert_float(value), specs, buffer);
+    return write_bytes<Char, align::right>(out, {buffer.data(), buffer.size()},
+                                           specs);
+  }
+
+  if (specs.type() == presentation_type::exp) {
+    if (precision == max_value<int>())
+      report_error("number is too big");
+    else
+      ++precision;
+    if (specs.precision != 0) specs.set_alt();
+  } else if (specs.type() == presentation_type::fixed) {
+    if (specs.precision != 0) specs.set_alt();
+  } else if (precision == 0) {
+    precision = 1;
+  }
+  int exp = format_float(convert_float(value), precision, specs,
+                         std::is_same<T, float>(), buffer);
+
+  specs.precision = precision;
+  auto f = big_decimal_fp{buffer.data(), static_cast<int>(buffer.size()), exp};
+  return write_float<Char>(out, f, specs, s, exp_upper, loc);
+}
+
+template <typename Char, typename OutputIt, typename T,
+          FMT_ENABLE_IF(is_fast_float<T>::value)>
+FMT_CONSTEXPR20 auto write(OutputIt out, T value) -> OutputIt {
+  if (is_constant_evaluated()) return write<Char>(out, value, format_specs());
+
+  auto s = detail::signbit(value) ? sign::minus : sign::none;
+  auto mask = exponent_mask<fast_float_t<T>>();
+  if ((bit_cast<decltype(mask)>(value) & mask) == mask)
+    return write_nonfinite<Char>(out, std::isnan(value), {}, s);
+
+  auto dec = dragonbox::to_decimal(static_cast<fast_float_t<T>>(value));
+  auto significand = dec.significand;
+  int significand_size = count_digits(significand);
+  int exponent = dec.exponent + significand_size - 1;
+  if (use_fixed(exponent, detail::exp_upper<T>())) {
+    return write_fixed<Char, fallback_digit_grouping<Char>>(
+        out, dec, significand_size, Char('.'), {}, s);
+  }
+
+  // Write value in the exponential format.
+  const char* prefix = "e+";
+  int abs_exponent = exponent;
+  if (exponent < 0) {
+    abs_exponent = -exponent;
+    prefix = "e-";
+  }
+  auto has_decimal_point = significand_size != 1;
+  size_t size = std::is_pointer<OutputIt>::value
+                    ? 0u
+                    : to_unsigned((s != sign::none ? 1 : 0) + significand_size +
+                                  (has_decimal_point ? 1 : 0) +
+                                  (abs_exponent >= 100 ? 5 : 4));
+  if (auto ptr = to_pointer<Char>(out, size)) {
+    if (s != sign::none) *ptr++ = Char('-');
+    if (has_decimal_point) {
+      auto begin = ptr;
+      ptr = format_decimal<Char>(ptr, significand, significand_size + 1);
+      *begin = begin[1];
+      begin[1] = '.';
+    } else {
+      *ptr++ = static_cast<Char>('0' + significand);
+    }
+    if (std::is_same<Char, char>::value) {
+      memcpy(ptr, prefix, 2);
+      ptr += 2;
+    } else {
+      *ptr++ = prefix[0];
+      *ptr++ = prefix[1];
+    }
+    if (abs_exponent >= 100) {
+      *ptr++ = static_cast<Char>('0' + abs_exponent / 100);
+      abs_exponent %= 100;
+    }
+    write2digits(ptr, static_cast<unsigned>(abs_exponent));
+    return select<std::is_pointer<OutputIt>::value>(ptr + 2, out);
+  }
+  auto it = reserve(out, size);
+  if (s != sign::none) *it++ = Char('-');
+  // Insert a decimal point after the first digit and add an exponent.
+  it = write_significand(it, significand, significand_size, 1,
+                         has_decimal_point ? Char('.') : Char());
+  *it++ = Char('e');
+  it = write_exponent<Char>(exponent, it);
+  return base_iterator(out, it);
+}
+
+template <typename Char, typename OutputIt, typename T,
+          FMT_ENABLE_IF(is_floating_point<T>::value &&
+                        !is_fast_float<T>::value)>
+inline auto write(OutputIt out, T value) -> OutputIt {
+  return write<Char>(out, value, {});
+}
+
+template <typename Char, typename OutputIt>
+auto write(OutputIt out, monostate, format_specs = {}, locale_ref = {})
+    -> OutputIt {
+  FMT_ASSERT(false, "");
+  return out;
+}
+
+template <typename Char, typename OutputIt>
+FMT_CONSTEXPR auto write(OutputIt out, basic_string_view<Char> value)
+    -> OutputIt {
+  return copy_noinline<Char>(value.begin(), value.end(), out);
+}
+
+template <typename Char, typename OutputIt, typename T,
+          FMT_ENABLE_IF(has_to_string_view<T>::value)>
+constexpr auto write(OutputIt out, const T& value) -> OutputIt {
+  return write<Char>(out, to_string_view(value));
+}
+
+// FMT_ENABLE_IF() condition separated to workaround an MSVC bug.
+template <
+    typename Char, typename OutputIt, typename T,
+    bool check = std::is_enum<T>::value && !std::is_same<T, Char>::value &&
+                 mapped_type_constant<T, Char>::value != type::custom_type,
+    FMT_ENABLE_IF(check)>
+FMT_CONSTEXPR auto write(OutputIt out, T value) -> OutputIt {
+  return write<Char>(out, static_cast<underlying_t<T>>(value));
+}
+
+template <typename Char, typename OutputIt, typename T,
+          FMT_ENABLE_IF(std::is_same<T, bool>::value)>
+FMT_CONSTEXPR auto write(OutputIt out, T value, const format_specs& specs = {},
+                         locale_ref = {}) -> OutputIt {
+  return specs.type() != presentation_type::none &&
+                 specs.type() != presentation_type::string
+             ? write<Char>(out, value ? 1 : 0, specs, {})
+             : write_bytes<Char>(out, value ? "true" : "false", specs);
+}
+
+template <typename Char, typename OutputIt>
+FMT_CONSTEXPR auto write(OutputIt out, Char value) -> OutputIt {
+  auto it = reserve(out, 1);
+  *it++ = value;
+  return base_iterator(out, it);
+}
+
+template <typename Char, typename OutputIt>
+FMT_CONSTEXPR20 auto write(OutputIt out, const Char* value) -> OutputIt {
+  if (value) return write(out, basic_string_view<Char>(value));
+  report_error("string pointer is null");
+  return out;
+}
+
+template <typename Char, typename OutputIt, typename T,
+          FMT_ENABLE_IF(std::is_same<T, void>::value)>
+auto write(OutputIt out, const T* value, const format_specs& specs = {},
+           locale_ref = {}) -> OutputIt {
+  return write_ptr<Char>(out, bit_cast<uintptr_t>(value), &specs);
+}
+
+template <typename Char, typename OutputIt, typename T,
+          FMT_ENABLE_IF(mapped_type_constant<T, Char>::value ==
+                            type::custom_type &&
+                        !std::is_fundamental<T>::value)>
+FMT_CONSTEXPR auto write(OutputIt out, const T& value) -> OutputIt {
+  auto f = formatter<T, Char>();
+  auto parse_ctx = parse_context<Char>({});
+  f.parse(parse_ctx);
+  auto ctx = basic_format_context<OutputIt, Char>(out, {}, {});
+  return f.format(value, ctx);
+}
+
+template <typename T>
+using is_builtin =
+    bool_constant<std::is_same<T, int>::value || FMT_BUILTIN_TYPES>;
+
+// An argument visitor that formats the argument and writes it via the output
+// iterator. It's a class and not a generic lambda for compatibility with C++11.
+template <typename Char> struct default_arg_formatter {
+  using context = buffered_context<Char>;
+
+  basic_appender<Char> out;
+
+  void operator()(monostate) { report_error("argument not found"); }
+
+  template <typename T, FMT_ENABLE_IF(is_builtin<T>::value)>
+  void operator()(T value) {
+    write<Char>(out, value);
+  }
+
+  template <typename T, FMT_ENABLE_IF(!is_builtin<T>::value)>
+  void operator()(T) {
+    FMT_ASSERT(false, "");
+  }
+
+  void operator()(typename basic_format_arg<context>::handle h) {
+    // Use a null locale since the default format must be unlocalized.
+    auto parse_ctx = parse_context<Char>({});
+    auto format_ctx = context(out, {}, {});
+    h.format(parse_ctx, format_ctx);
+  }
+};
+
+template <typename Char> struct arg_formatter {
+  basic_appender<Char> out;
+  const format_specs& specs;
+  FMT_NO_UNIQUE_ADDRESS locale_ref locale;
+
+  template <typename T, FMT_ENABLE_IF(is_builtin<T>::value)>
+  FMT_CONSTEXPR FMT_INLINE void operator()(T value) {
+    detail::write<Char>(out, value, specs, locale);
+  }
+
+  template <typename T, FMT_ENABLE_IF(!is_builtin<T>::value)>
+  void operator()(T) {
+    FMT_ASSERT(false, "");
+  }
+
+  void operator()(typename basic_format_arg<buffered_context<Char>>::handle) {
+    // User-defined types are handled separately because they require access
+    // to the parse context.
+  }
+};
+
+struct dynamic_spec_getter {
+  template <typename T, FMT_ENABLE_IF(is_integer<T>::value)>
+  FMT_CONSTEXPR auto operator()(T value) -> unsigned long long {
+    return is_negative(value) ? ~0ull : static_cast<unsigned long long>(value);
+  }
+
+  template <typename T, FMT_ENABLE_IF(!is_integer<T>::value)>
+  FMT_CONSTEXPR auto operator()(T) -> unsigned long long {
+    report_error("width/precision is not integer");
+    return 0;
+  }
+};
+
+template <typename Context>
+FMT_CONSTEXPR void handle_dynamic_spec(
+    arg_id_kind kind, int& value,
+    const arg_ref<typename Context::char_type>& ref, Context& ctx) {
+  if (kind == arg_id_kind::none) return;
+  auto arg =
+      kind == arg_id_kind::index ? ctx.arg(ref.index) : ctx.arg(ref.name);
+  if (!arg) report_error("argument not found");
+  unsigned long long result = arg.visit(dynamic_spec_getter());
+  if (result > to_unsigned(max_value<int>()))
+    report_error("width/precision is out of range");
+  value = static_cast<int>(result);
+}
+
+#if FMT_USE_NONTYPE_TEMPLATE_ARGS
+template <typename T, typename Char, size_t N,
+          fmt::detail::fixed_string<Char, N> Str>
+struct static_named_arg : view {
+  static constexpr auto name = Str.data;
+
+  const T& value;
+  static_named_arg(const T& v) : value(v) {}
+};
+
+template <typename T, typename Char, size_t N,
+          fmt::detail::fixed_string<Char, N> Str>
+struct is_named_arg<static_named_arg<T, Char, N, Str>> : std::true_type {};
+
+template <typename T, typename Char, size_t N,
+          fmt::detail::fixed_string<Char, N> Str>
+struct is_static_named_arg<static_named_arg<T, Char, N, Str>> : std::true_type {
+};
+
+template <typename Char, size_t N, fmt::detail::fixed_string<Char, N> Str>
+struct udl_arg {
+  template <typename T> auto operator=(T&& value) const {
+    return static_named_arg<T, Char, N, Str>(std::forward<T>(value));
+  }
+};
+#else
+template <typename Char> struct udl_arg {
+  const Char* str;
+
+  template <typename T> auto operator=(T&& value) const -> named_arg<Char, T> {
+    return {str, std::forward<T>(value)};
+  }
+};
+#endif  // FMT_USE_NONTYPE_TEMPLATE_ARGS
+
+template <typename Char = char> struct format_handler {
+  parse_context<Char> parse_ctx;
+  buffered_context<Char> ctx;
+
+  void on_text(const Char* begin, const Char* end) {
+    copy_noinline<Char>(begin, end, ctx.out());
+  }
+
+  FMT_CONSTEXPR auto on_arg_id() -> int { return parse_ctx.next_arg_id(); }
+  FMT_CONSTEXPR auto on_arg_id(int id) -> int {
+    parse_ctx.check_arg_id(id);
+    return id;
+  }
+  FMT_CONSTEXPR auto on_arg_id(basic_string_view<Char> id) -> int {
+    parse_ctx.check_arg_id(id);
+    int arg_id = ctx.arg_id(id);
+    if (arg_id < 0) report_error("argument not found");
+    return arg_id;
+  }
+
+  FMT_INLINE void on_replacement_field(int id, const Char*) {
+    ctx.arg(id).visit(default_arg_formatter<Char>{ctx.out()});
+  }
+
+  auto on_format_specs(int id, const Char* begin, const Char* end)
+      -> const Char* {
+    auto arg = ctx.arg(id);
+    if (!arg) report_error("argument not found");
+    // Not using a visitor for custom types gives better codegen.
+    if (arg.format_custom(begin, parse_ctx, ctx)) return parse_ctx.begin();
+
+    auto specs = dynamic_format_specs<Char>();
+    begin = parse_format_specs(begin, end, specs, parse_ctx, arg.type());
+    if (specs.dynamic()) {
+      handle_dynamic_spec(specs.dynamic_width(), specs.width, specs.width_ref,
+                          ctx);
+      handle_dynamic_spec(specs.dynamic_precision(), specs.precision,
+                          specs.precision_ref, ctx);
+    }
+
+    arg.visit(arg_formatter<Char>{ctx.out(), specs, ctx.locale()});
+    return begin;
+  }
+
+  FMT_NORETURN void on_error(const char* message) { report_error(message); }
+};
+
+// It is used in format-inl.h and os.cc.
+using format_func = void (*)(detail::buffer<char>&, int, const char*);
+FMT_API void do_report_error(format_func func, int error_code,
+                             const char* message) noexcept;
+
+FMT_API void format_error_code(buffer<char>& out, int error_code,
+                               string_view message) noexcept;
+
+template <typename T, typename Char, type TYPE>
+template <typename FormatContext>
+FMT_CONSTEXPR auto native_formatter<T, Char, TYPE>::format(
+    const T& val, FormatContext& ctx) const -> decltype(ctx.out()) {
+  if (!specs_.dynamic())
+    return write<Char>(ctx.out(), val, specs_, ctx.locale());
+  auto specs = format_specs(specs_);
+  handle_dynamic_spec(specs.dynamic_width(), specs.width, specs_.width_ref,
+                      ctx);
+  handle_dynamic_spec(specs.dynamic_precision(), specs.precision,
+                      specs_.precision_ref, ctx);
+  return write<Char>(ctx.out(), val, specs, ctx.locale());
+}
+}  // namespace detail
+
+FMT_BEGIN_EXPORT
+
+// A generic formatting context with custom output iterator and character
+// (code unit) support. Char is the format string code unit type which can be
+// different from OutputIt::value_type.
+template <typename OutputIt, typename Char> class generic_context {
+ private:
+  OutputIt out_;
+  basic_format_args<generic_context> args_;
+  locale_ref loc_;
+
+ public:
+  using char_type = Char;
+  using iterator = OutputIt;
+  enum { builtin_types = FMT_BUILTIN_TYPES };
+
+  constexpr generic_context(OutputIt out,
+                            basic_format_args<generic_context> args,
+                            locale_ref loc = {})
+      : out_(out), args_(args), loc_(loc) {}
+  generic_context(generic_context&&) = default;
+  generic_context(const generic_context&) = delete;
+  void operator=(const generic_context&) = delete;
+
+  constexpr auto arg(int id) const -> basic_format_arg<generic_context> {
+    return args_.get(id);
+  }
+  auto arg(basic_string_view<Char> name) const
+      -> basic_format_arg<generic_context> {
+    return args_.get(name);
+  }
+  constexpr auto arg_id(basic_string_view<Char> name) const -> int {
+    return args_.get_id(name);
+  }
+
+  constexpr auto out() const -> iterator { return out_; }
+
+  void advance_to(iterator it) {
+    if (!detail::is_back_insert_iterator<iterator>()) out_ = it;
+  }
+
+  constexpr auto locale() const -> locale_ref { return loc_; }
+};
+
+class loc_value {
+ private:
+  basic_format_arg<context> value_;
+
+ public:
+  template <typename T, FMT_ENABLE_IF(!detail::is_float128<T>::value)>
+  loc_value(T value) : value_(value) {}
+
+  template <typename T, FMT_ENABLE_IF(detail::is_float128<T>::value)>
+  loc_value(T) {}
+
+  template <typename Visitor> auto visit(Visitor&& vis) -> decltype(vis(0)) {
+    return value_.visit(vis);
+  }
+};
+
+// A locale facet that formats values in UTF-8.
+// It is parameterized on the locale to avoid the heavy <locale> include.
+template <typename Locale> class format_facet : public Locale::facet {
+ private:
+  std::string separator_;
+  std::string grouping_;
+  std::string decimal_point_;
+
+ protected:
+  virtual auto do_put(appender out, loc_value val,
+                      const format_specs& specs) const -> bool;
+
+ public:
+  static FMT_API typename Locale::id id;
+
+  explicit format_facet(Locale& loc);
+  explicit format_facet(string_view sep = "", std::string grouping = "\3",
+                        std::string decimal_point = ".")
+      : separator_(sep.data(), sep.size()),
+        grouping_(grouping),
+        decimal_point_(decimal_point) {}
+
+  auto put(appender out, loc_value val, const format_specs& specs) const
+      -> bool {
+    return do_put(out, val, specs);
+  }
+};
+
+#define FMT_FORMAT_AS(Type, Base)                                   \
+  template <typename Char>                                          \
+  struct formatter<Type, Char> : formatter<Base, Char> {            \
+    template <typename FormatContext>                               \
+    FMT_CONSTEXPR auto format(Type value, FormatContext& ctx) const \
+        -> decltype(ctx.out()) {                                    \
+      return formatter<Base, Char>::format(value, ctx);             \
+    }                                                               \
+  }
+
+FMT_FORMAT_AS(signed char, int);
+FMT_FORMAT_AS(unsigned char, unsigned);
+FMT_FORMAT_AS(short, int);
+FMT_FORMAT_AS(unsigned short, unsigned);
+FMT_FORMAT_AS(long, detail::long_type);
+FMT_FORMAT_AS(unsigned long, detail::ulong_type);
+FMT_FORMAT_AS(Char*, const Char*);
+FMT_FORMAT_AS(detail::std_string_view<Char>, basic_string_view<Char>);
+FMT_FORMAT_AS(std::nullptr_t, const void*);
+FMT_FORMAT_AS(void*, const void*);
+
+template <typename Char, size_t N>
+struct formatter<Char[N], Char> : formatter<basic_string_view<Char>, Char> {};
+
+template <typename Char, typename Traits, typename Allocator>
+class formatter<std::basic_string<Char, Traits, Allocator>, Char>
+    : public formatter<basic_string_view<Char>, Char> {};
+
+template <int N, typename Char>
+struct formatter<detail::bitint<N>, Char> : formatter<long long, Char> {};
+template <int N, typename Char>
+struct formatter<detail::ubitint<N>, Char>
+    : formatter<unsigned long long, Char> {};
+
+template <typename Char>
+struct formatter<detail::float128, Char>
+    : detail::native_formatter<detail::float128, Char,
+                               detail::type::float_type> {};
+
+template <typename T, typename Char>
+struct formatter<T, Char, void_t<detail::format_as_result<T>>>
+    : formatter<detail::format_as_result<T>, Char> {
+  template <typename FormatContext>
+  FMT_CONSTEXPR auto format(const T& value, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    auto&& val = format_as(value);  // Make an lvalue reference for format.
+    return formatter<detail::format_as_result<T>, Char>::format(val, ctx);
+  }
+};
+
+/**
+ * Converts `p` to `const void*` for pointer formatting.
+ *
+ * **Example**:
+ *
+ *     auto s = fmt::format("{}", fmt::ptr(p));
+ */
+template <typename T> auto ptr(T p) -> const void* {
+  static_assert(std::is_pointer<T>::value, "fmt::ptr used with non-pointer");
+  return detail::bit_cast<const void*>(p);
+}
+
+/**
+ * Converts `e` to the underlying type.
+ *
+ * **Example**:
+ *
+ *     enum class color { red, green, blue };
+ *     auto s = fmt::format("{}", fmt::underlying(color::red));  // s == "0"
+ */
+template <typename Enum>
+constexpr auto underlying(Enum e) noexcept -> underlying_t<Enum> {
+  return static_cast<underlying_t<Enum>>(e);
+}
+
+namespace enums {
+template <typename Enum, FMT_ENABLE_IF(std::is_enum<Enum>::value)>
+constexpr auto format_as(Enum e) noexcept -> underlying_t<Enum> {
+  return static_cast<underlying_t<Enum>>(e);
+}
+}  // namespace enums
+
+#ifdef __cpp_lib_byte
+template <typename Char>
+struct formatter<std::byte, Char> : formatter<unsigned, Char> {
+  static auto format_as(std::byte b) -> unsigned char {
+    return static_cast<unsigned char>(b);
+  }
+  template <typename Context>
+  auto format(std::byte b, Context& ctx) const -> decltype(ctx.out()) {
+    return formatter<unsigned, Char>::format(format_as(b), ctx);
+  }
+};
+#endif
+
+struct bytes {
+  string_view data;
+
+  inline explicit bytes(string_view s) : data(s) {}
+};
+
+template <> struct formatter<bytes> {
+ private:
+  detail::dynamic_format_specs<> specs_;
+
+ public:
+  FMT_CONSTEXPR auto parse(parse_context<>& ctx) -> const char* {
+    return parse_format_specs(ctx.begin(), ctx.end(), specs_, ctx,
+                              detail::type::string_type);
+  }
+
+  template <typename FormatContext>
+  auto format(bytes b, FormatContext& ctx) const -> decltype(ctx.out()) {
+    auto specs = specs_;
+    detail::handle_dynamic_spec(specs.dynamic_width(), specs.width,
+                                specs.width_ref, ctx);
+    detail::handle_dynamic_spec(specs.dynamic_precision(), specs.precision,
+                                specs.precision_ref, ctx);
+    return detail::write_bytes<char>(ctx.out(), b.data, specs);
+  }
+};
+
+// group_digits_view is not derived from view because it copies the argument.
+template <typename T> struct group_digits_view {
+  T value;
+};
+
+/**
+ * Returns a view that formats an integer value using ',' as a
+ * locale-independent thousands separator.
+ *
+ * **Example**:
+ *
+ *     fmt::print("{}", fmt::group_digits(12345));
+ *     // Output: "12,345"
+ */
+template <typename T> auto group_digits(T value) -> group_digits_view<T> {
+  return {value};
+}
+
+template <typename T> struct formatter<group_digits_view<T>> : formatter<T> {
+ private:
+  detail::dynamic_format_specs<> specs_;
+
+ public:
+  FMT_CONSTEXPR auto parse(parse_context<>& ctx) -> const char* {
+    return parse_format_specs(ctx.begin(), ctx.end(), specs_, ctx,
+                              detail::type::int_type);
+  }
+
+  template <typename FormatContext>
+  auto format(group_digits_view<T> view, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    auto specs = specs_;
+    detail::handle_dynamic_spec(specs.dynamic_width(), specs.width,
+                                specs.width_ref, ctx);
+    detail::handle_dynamic_spec(specs.dynamic_precision(), specs.precision,
+                                specs.precision_ref, ctx);
+    auto arg = detail::make_write_int_arg(view.value, specs.sign());
+    return detail::write_int(
+        ctx.out(), static_cast<detail::uint64_or_128_t<T>>(arg.abs_value),
+        arg.prefix, specs, detail::digit_grouping<char>("\3", ","));
+  }
+};
+
+template <typename T, typename Char> struct nested_view {
+  const formatter<T, Char>* fmt;
+  const T* value;
+};
+
+template <typename T, typename Char>
+struct formatter<nested_view<T, Char>, Char> {
+  FMT_CONSTEXPR auto parse(parse_context<Char>& ctx) -> const Char* {
+    return ctx.begin();
+  }
+  template <typename FormatContext>
+  auto format(nested_view<T, Char> view, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    return view.fmt->format(*view.value, ctx);
+  }
+};
+
+template <typename T, typename Char = char> struct nested_formatter {
+ private:
+  basic_specs specs_;
+  int width_;
+  formatter<T, Char> formatter_;
+
+ public:
+  constexpr nested_formatter() : width_(0) {}
+
+  FMT_CONSTEXPR auto parse(parse_context<Char>& ctx) -> const Char* {
+    auto it = ctx.begin(), end = ctx.end();
+    if (it == end) return it;
+    auto specs = format_specs();
+    it = detail::parse_align(it, end, specs);
+    specs_ = specs;
+    Char c = *it;
+    auto width_ref = detail::arg_ref<Char>();
+    if ((c >= '0' && c <= '9') || c == '{') {
+      it = detail::parse_width(it, end, specs, width_ref, ctx);
+      width_ = specs.width;
+    }
+    ctx.advance_to(it);
+    return formatter_.parse(ctx);
+  }
+
+  template <typename FormatContext, typename F>
+  auto write_padded(FormatContext& ctx, F write) const -> decltype(ctx.out()) {
+    if (width_ == 0) return write(ctx.out());
+    auto buf = basic_memory_buffer<Char>();
+    write(basic_appender<Char>(buf));
+    auto specs = format_specs();
+    specs.width = width_;
+    specs.copy_fill_from(specs_);
+    specs.set_align(specs_.align());
+    return detail::write<Char>(
+        ctx.out(), basic_string_view<Char>(buf.data(), buf.size()), specs);
+  }
+
+  auto nested(const T& value) const -> nested_view<T, Char> {
+    return nested_view<T, Char>{&formatter_, &value};
+  }
+};
+
+inline namespace literals {
+#if FMT_USE_NONTYPE_TEMPLATE_ARGS
+template <detail::fixed_string S> constexpr auto operator""_a() {
+  using char_t = remove_cvref_t<decltype(*S.data)>;
+  return detail::udl_arg<char_t, sizeof(S.data) / sizeof(char_t), S>();
+}
+#else
+/**
+ * User-defined literal equivalent of `fmt::arg`.
+ *
+ * **Example**:
+ *
+ *     using namespace fmt::literals;
+ *     fmt::print("The answer is {answer}.", "answer"_a=42);
+ */
+constexpr auto operator""_a(const char* s, size_t) -> detail::udl_arg<char> {
+  return {s};
+}
+#endif  // FMT_USE_NONTYPE_TEMPLATE_ARGS
+}  // namespace literals
+
+/// A fast integer formatter.
+class format_int {
+ private:
+  // Buffer should be large enough to hold all digits (digits10 + 1),
+  // a sign and a null character.
+  enum { buffer_size = std::numeric_limits<unsigned long long>::digits10 + 3 };
+  mutable char buffer_[buffer_size];
+  char* str_;
+
+  template <typename UInt>
+  FMT_CONSTEXPR20 auto format_unsigned(UInt value) -> char* {
+    auto n = static_cast<detail::uint32_or_64_or_128_t<UInt>>(value);
+    return detail::do_format_decimal(buffer_, n, buffer_size - 1);
+  }
+
+  template <typename Int>
+  FMT_CONSTEXPR20 auto format_signed(Int value) -> char* {
+    auto abs_value = static_cast<detail::uint32_or_64_or_128_t<Int>>(value);
+    bool negative = value < 0;
+    if (negative) abs_value = 0 - abs_value;
+    auto begin = format_unsigned(abs_value);
+    if (negative) *--begin = '-';
+    return begin;
+  }
+
+ public:
+  FMT_CONSTEXPR20 explicit format_int(int value) : str_(format_signed(value)) {}
+  FMT_CONSTEXPR20 explicit format_int(long value)
+      : str_(format_signed(value)) {}
+  FMT_CONSTEXPR20 explicit format_int(long long value)
+      : str_(format_signed(value)) {}
+  FMT_CONSTEXPR20 explicit format_int(unsigned value)
+      : str_(format_unsigned(value)) {}
+  FMT_CONSTEXPR20 explicit format_int(unsigned long value)
+      : str_(format_unsigned(value)) {}
+  FMT_CONSTEXPR20 explicit format_int(unsigned long long value)
+      : str_(format_unsigned(value)) {}
+
+  /// Returns the number of characters written to the output buffer.
+  FMT_CONSTEXPR20 auto size() const -> size_t {
+    return detail::to_unsigned(buffer_ - str_ + buffer_size - 1);
+  }
+
+  /// Returns a pointer to the output buffer content. No terminating null
+  /// character is appended.
+  FMT_CONSTEXPR20 auto data() const -> const char* { return str_; }
+
+  /// Returns a pointer to the output buffer content with terminating null
+  /// character appended.
+  FMT_CONSTEXPR20 auto c_str() const -> const char* {
+    buffer_[buffer_size - 1] = '\0';
+    return str_;
+  }
+
+  /// Returns the content of the output buffer as an `std::string`.
+  inline auto str() const -> std::string { return {str_, size()}; }
+};
+
+#if FMT_CLANG_ANALYZER
+#  define FMT_STRING_IMPL(s, base) s
+#else
+#  define FMT_STRING_IMPL(s, base)                                           \
+    [] {                                                                     \
+      /* Use the hidden visibility as a workaround for a GCC bug (#1973). */ \
+      /* Use a macro-like name to avoid shadowing warnings. */               \
+      struct FMT_VISIBILITY("hidden") FMT_COMPILE_STRING : base {            \
+        using char_type = fmt::remove_cvref_t<decltype(s[0])>;               \
+        constexpr explicit operator fmt::basic_string_view<char_type>()      \
+            const {                                                          \
+          return fmt::detail::compile_string_to_view<char_type>(s);          \
+        }                                                                    \
+      };                                                                     \
+      using FMT_STRING_VIEW =                                                \
+          fmt::basic_string_view<typename FMT_COMPILE_STRING::char_type>;    \
+      fmt::detail::ignore_unused(FMT_STRING_VIEW(FMT_COMPILE_STRING()));     \
+      return FMT_COMPILE_STRING();                                           \
+    }()
+#endif  // FMT_CLANG_ANALYZER
+
+/**
+ * Constructs a legacy compile-time format string from a string literal `s`.
+ *
+ * **Example**:
+ *
+ *     // A compile-time error because 'd' is an invalid specifier for strings.
+ *     std::string s = fmt::format(FMT_STRING("{:d}"), "foo");
+ */
+#define FMT_STRING(s) FMT_STRING_IMPL(s, fmt::detail::compile_string)
+
+FMT_API auto vsystem_error(int error_code, string_view fmt, format_args args)
+    -> std::system_error;
+
+/**
+ * Constructs `std::system_error` with a message formatted with
+ * `fmt::format(fmt, args...)`.
+ * `error_code` is a system error code as given by `errno`.
+ *
+ * **Example**:
+ *
+ *     // This throws std::system_error with the description
+ *     //   cannot open file 'madeup': No such file or directory
+ *     // or similar (system message may vary).
+ *     const char* filename = "madeup";
+ *     FILE* file = fopen(filename, "r");
+ *     if (!file)
+ *       throw fmt::system_error(errno, "cannot open file '{}'", filename);
+ */
+template <typename... T>
+auto system_error(int error_code, format_string<T...> fmt, T&&... args)
+    -> std::system_error {
+  return vsystem_error(error_code, fmt.str, vargs<T...>{{args...}});
+}
+
+/**
+ * Formats an error message for an error returned by an operating system or a
+ * language runtime, for example a file opening error, and writes it to `out`.
+ * The format is the same as the one used by `std::system_error(ec, message)`
+ * where `ec` is `std::error_code(error_code, std::generic_category())`.
+ * It is implementation-defined but normally looks like:
+ *
+ *     <message>: <system-message>
+ *
+ * where `<message>` is the passed message and `<system-message>` is the system
+ * message corresponding to the error code.
+ * `error_code` is a system error code as given by `errno`.
+ */
+FMT_API void format_system_error(detail::buffer<char>& out, int error_code,
+                                 const char* message) noexcept;
+
+// Reports a system error without throwing an exception.
+// Can be used to report errors from destructors.
+FMT_API void report_system_error(int error_code, const char* message) noexcept;
+
+inline auto vformat(locale_ref loc, string_view fmt, format_args args)
+    -> std::string {
+  auto buf = memory_buffer();
+  detail::vformat_to(buf, fmt, args, loc);
+  return {buf.data(), buf.size()};
+}
+
+template <typename... T>
+FMT_INLINE auto format(locale_ref loc, format_string<T...> fmt, T&&... args)
+    -> std::string {
+  return vformat(loc, fmt.str, vargs<T...>{{args...}});
+}
+
+template <typename OutputIt,
+          FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, char>::value)>
+auto vformat_to(OutputIt out, locale_ref loc, string_view fmt, format_args args)
+    -> OutputIt {
+  auto&& buf = detail::get_buffer<char>(out);
+  detail::vformat_to(buf, fmt, args, loc);
+  return detail::get_iterator(buf, out);
+}
+
+template <typename OutputIt, typename... T,
+          FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, char>::value)>
+FMT_INLINE auto format_to(OutputIt out, locale_ref loc, format_string<T...> fmt,
+                          T&&... args) -> OutputIt {
+  return fmt::vformat_to(out, loc, fmt.str, vargs<T...>{{args...}});
+}
+
+template <typename... T>
+FMT_NODISCARD FMT_INLINE auto formatted_size(locale_ref loc,
+                                             format_string<T...> fmt,
+                                             T&&... args) -> size_t {
+  auto buf = detail::counting_buffer<>();
+  detail::vformat_to(buf, fmt.str, vargs<T...>{{args...}}, loc);
+  return buf.count();
+}
+
+FMT_API auto vformat(string_view fmt, format_args args) -> std::string;
+
+/**
+ * Formats `args` according to specifications in `fmt` and returns the result
+ * as a string.
+ *
+ * **Example**:
+ *
+ *     #include <fmt/format.h>
+ *     std::string message = fmt::format("The answer is {}.", 42);
+ */
+template <typename... T>
+FMT_NODISCARD FMT_INLINE auto format(format_string<T...> fmt, T&&... args)
+    -> std::string {
+  return vformat(fmt.str, vargs<T...>{{args...}});
+}
+
+/**
+ * Converts `value` to `std::string` using the default format for type `T`.
+ *
+ * **Example**:
+ *
+ *     std::string answer = fmt::to_string(42);
+ */
+template <typename T, FMT_ENABLE_IF(std::is_integral<T>::value)>
+FMT_NODISCARD FMT_CONSTEXPR_STRING auto to_string(T value) -> std::string {
+  // The buffer should be large enough to store the number including the sign
+  // or "false" for bool.
+  char buffer[max_of(detail::digits10<T>() + 2, 5)];
+  return {buffer, detail::write<char>(buffer, value)};
+}
+
+template <typename T, FMT_ENABLE_IF(detail::use_format_as<T>::value)>
+FMT_NODISCARD FMT_CONSTEXPR_STRING auto to_string(const T& value)
+    -> std::string {
+  return to_string(format_as(value));
+}
+
+template <typename T, FMT_ENABLE_IF(!std::is_integral<T>::value &&
+                                    !detail::use_format_as<T>::value)>
+FMT_NODISCARD FMT_CONSTEXPR_STRING auto to_string(const T& value)
+    -> std::string {
+  auto buffer = memory_buffer();
+  detail::write<char>(appender(buffer), value);
+  return {buffer.data(), buffer.size()};
+}
+
+FMT_END_EXPORT
+FMT_END_NAMESPACE
+
+#ifdef FMT_HEADER_ONLY
+#  define FMT_FUNC inline
+#  include "format-inl.h"
+#endif
+
+// Restore _LIBCPP_REMOVE_TRANSITIVE_INCLUDES.
+#ifdef FMT_REMOVE_TRANSITIVE_INCLUDES
+#  undef _LIBCPP_REMOVE_TRANSITIVE_INCLUDES
+#endif
+
+#endif  // FMT_FORMAT_H_
diff --git a/csrc/vnpu_offload/include/spdlog/fmt/bundled/os.h b/csrc/vnpu_offload/include/spdlog/fmt/bundled/os.h
new file mode 100644
index 00000000..94d730de
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/fmt/bundled/os.h
@@ -0,0 +1,427 @@
+// Formatting library for C++ - optional OS-specific functionality
+//
+// Copyright (c) 2012 - present, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_OS_H_
+#define FMT_OS_H_
+
+#include "format.h"
+
+#ifndef FMT_MODULE
+#  include <cerrno>
+#  include <cstddef>
+#  include <cstdio>
+#  include <system_error>  // std::system_error
+
+#  if FMT_HAS_INCLUDE(<xlocale.h>)
+#    include <xlocale.h>  // LC_NUMERIC_MASK on macOS
+#  endif
+#endif  // FMT_MODULE
+
+#ifndef FMT_USE_FCNTL
+// UWP doesn't provide _pipe.
+#  if FMT_HAS_INCLUDE("winapifamily.h")
+#    include <winapifamily.h>
+#  endif
+#  if (FMT_HAS_INCLUDE(<fcntl.h>) || defined(__APPLE__) || \
+       defined(__linux__)) &&                              \
+      (!defined(WINAPI_FAMILY) ||                          \
+       (WINAPI_FAMILY == WINAPI_FAMILY_DESKTOP_APP)) &&    \
+      !defined(__wasm__)
+#    include <fcntl.h>  // for O_RDONLY
+#    define FMT_USE_FCNTL 1
+#  else
+#    define FMT_USE_FCNTL 0
+#  endif
+#endif
+
+#ifndef FMT_POSIX
+#  if defined(_WIN32) && !defined(__MINGW32__)
+// Fix warnings about deprecated symbols.
+#    define FMT_POSIX(call) _##call
+#  else
+#    define FMT_POSIX(call) call
+#  endif
+#endif
+
+// Calls to system functions are wrapped in FMT_SYSTEM for testability.
+#ifdef FMT_SYSTEM
+#  define FMT_HAS_SYSTEM
+#  define FMT_POSIX_CALL(call) FMT_SYSTEM(call)
+#else
+#  define FMT_SYSTEM(call) ::call
+#  ifdef _WIN32
+// Fix warnings about deprecated symbols.
+#    define FMT_POSIX_CALL(call) ::_##call
+#  else
+#    define FMT_POSIX_CALL(call) ::call
+#  endif
+#endif
+
+// Retries the expression while it evaluates to error_result and errno
+// equals to EINTR.
+#ifndef _WIN32
+#  define FMT_RETRY_VAL(result, expression, error_result) \
+    do {                                                  \
+      (result) = (expression);                            \
+    } while ((result) == (error_result) && errno == EINTR)
+#else
+#  define FMT_RETRY_VAL(result, expression, error_result) result = (expression)
+#endif
+
+#define FMT_RETRY(result, expression) FMT_RETRY_VAL(result, expression, -1)
+
+FMT_BEGIN_NAMESPACE
+FMT_BEGIN_EXPORT
+
+/**
+ * A reference to a null-terminated string. It can be constructed from a C
+ * string or `std::string`.
+ *
+ * You can use one of the following type aliases for common character types:
+ *
+ * +---------------+-----------------------------+
+ * | Type          | Definition                  |
+ * +===============+=============================+
+ * | cstring_view  | basic_cstring_view<char>    |
+ * +---------------+-----------------------------+
+ * | wcstring_view | basic_cstring_view<wchar_t> |
+ * +---------------+-----------------------------+
+ *
+ * This class is most useful as a parameter type for functions that wrap C APIs.
+ */
+template <typename Char> class basic_cstring_view {
+ private:
+  const Char* data_;
+
+ public:
+  /// Constructs a string reference object from a C string.
+  basic_cstring_view(const Char* s) : data_(s) {}
+
+  /// Constructs a string reference from an `std::string` object.
+  basic_cstring_view(const std::basic_string<Char>& s) : data_(s.c_str()) {}
+
+  /// Returns the pointer to a C string.
+  auto c_str() const -> const Char* { return data_; }
+};
+
+using cstring_view = basic_cstring_view<char>;
+using wcstring_view = basic_cstring_view<wchar_t>;
+
+#ifdef _WIN32
+FMT_API const std::error_category& system_category() noexcept;
+
+namespace detail {
+FMT_API void format_windows_error(buffer<char>& out, int error_code,
+                                  const char* message) noexcept;
+}
+
+FMT_API std::system_error vwindows_error(int error_code, string_view fmt,
+                                         format_args args);
+
+/**
+ * Constructs a `std::system_error` object with the description of the form
+ *
+ *     <message>: <system-message>
+ *
+ * where `<message>` is the formatted message and `<system-message>` is the
+ * system message corresponding to the error code.
+ * `error_code` is a Windows error code as given by `GetLastError`.
+ * If `error_code` is not a valid error code such as -1, the system message
+ * will look like "error -1".
+ *
+ * **Example**:
+ *
+ *     // This throws a system_error with the description
+ *     //   cannot open file 'foo': The system cannot find the file specified.
+ *     // or similar (system message may vary) if the file doesn't exist.
+ *     const char *filename = "foo";
+ *     LPOFSTRUCT of = LPOFSTRUCT();
+ *     HFILE file = OpenFile(filename, &of, OF_READ);
+ *     if (file == HFILE_ERROR) {
+ *       throw fmt::windows_error(GetLastError(),
+ *                                "cannot open file '{}'", filename);
+ *     }
+ */
+template <typename... T>
+auto windows_error(int error_code, string_view message, const T&... args)
+    -> std::system_error {
+  return vwindows_error(error_code, message, vargs<T...>{{args...}});
+}
+
+// Reports a Windows error without throwing an exception.
+// Can be used to report errors from destructors.
+FMT_API void report_windows_error(int error_code, const char* message) noexcept;
+#else
+inline auto system_category() noexcept -> const std::error_category& {
+  return std::system_category();
+}
+#endif  // _WIN32
+
+// std::system is not available on some platforms such as iOS (#2248).
+#ifdef __OSX__
+template <typename S, typename... Args, typename Char = char_t<S>>
+void say(const S& fmt, Args&&... args) {
+  std::system(format("say \"{}\"", format(fmt, args...)).c_str());
+}
+#endif
+
+// A buffered file.
+class buffered_file {
+ private:
+  FILE* file_;
+
+  friend class file;
+
+  inline explicit buffered_file(FILE* f) : file_(f) {}
+
+ public:
+  buffered_file(const buffered_file&) = delete;
+  void operator=(const buffered_file&) = delete;
+
+  // Constructs a buffered_file object which doesn't represent any file.
+  inline buffered_file() noexcept : file_(nullptr) {}
+
+  // Destroys the object closing the file it represents if any.
+  FMT_API ~buffered_file() noexcept;
+
+ public:
+  inline buffered_file(buffered_file&& other) noexcept : file_(other.file_) {
+    other.file_ = nullptr;
+  }
+
+  inline auto operator=(buffered_file&& other) -> buffered_file& {
+    close();
+    file_ = other.file_;
+    other.file_ = nullptr;
+    return *this;
+  }
+
+  // Opens a file.
+  FMT_API buffered_file(cstring_view filename, cstring_view mode);
+
+  // Closes the file.
+  FMT_API void close();
+
+  // Returns the pointer to a FILE object representing this file.
+  inline auto get() const noexcept -> FILE* { return file_; }
+
+  FMT_API auto descriptor() const -> int;
+
+  template <typename... T>
+  inline void print(string_view fmt, const T&... args) {
+    fmt::vargs<T...> vargs = {{args...}};
+    detail::is_locking<T...>() ? fmt::vprint_buffered(file_, fmt, vargs)
+                               : fmt::vprint(file_, fmt, vargs);
+  }
+};
+
+#if FMT_USE_FCNTL
+
+// A file. Closed file is represented by a file object with descriptor -1.
+// Methods that are not declared with noexcept may throw
+// fmt::system_error in case of failure. Note that some errors such as
+// closing the file multiple times will cause a crash on Windows rather
+// than an exception. You can get standard behavior by overriding the
+// invalid parameter handler with _set_invalid_parameter_handler.
+class FMT_API file {
+ private:
+  int fd_;  // File descriptor.
+
+  // Constructs a file object with a given descriptor.
+  explicit file(int fd) : fd_(fd) {}
+
+  friend struct pipe;
+
+ public:
+  // Possible values for the oflag argument to the constructor.
+  enum {
+    RDONLY = FMT_POSIX(O_RDONLY),  // Open for reading only.
+    WRONLY = FMT_POSIX(O_WRONLY),  // Open for writing only.
+    RDWR = FMT_POSIX(O_RDWR),      // Open for reading and writing.
+    CREATE = FMT_POSIX(O_CREAT),   // Create if the file doesn't exist.
+    APPEND = FMT_POSIX(O_APPEND),  // Open in append mode.
+    TRUNC = FMT_POSIX(O_TRUNC)     // Truncate the content of the file.
+  };
+
+  // Constructs a file object which doesn't represent any file.
+  inline file() noexcept : fd_(-1) {}
+
+  // Opens a file and constructs a file object representing this file.
+  file(cstring_view path, int oflag);
+
+ public:
+  file(const file&) = delete;
+  void operator=(const file&) = delete;
+
+  inline file(file&& other) noexcept : fd_(other.fd_) { other.fd_ = -1; }
+
+  // Move assignment is not noexcept because close may throw.
+  inline auto operator=(file&& other) -> file& {
+    close();
+    fd_ = other.fd_;
+    other.fd_ = -1;
+    return *this;
+  }
+
+  // Destroys the object closing the file it represents if any.
+  ~file() noexcept;
+
+  // Returns the file descriptor.
+  inline auto descriptor() const noexcept -> int { return fd_; }
+
+  // Closes the file.
+  void close();
+
+  // Returns the file size. The size has signed type for consistency with
+  // stat::st_size.
+  auto size() const -> long long;
+
+  // Attempts to read count bytes from the file into the specified buffer.
+  auto read(void* buffer, size_t count) -> size_t;
+
+  // Attempts to write count bytes from the specified buffer to the file.
+  auto write(const void* buffer, size_t count) -> size_t;
+
+  // Duplicates a file descriptor with the dup function and returns
+  // the duplicate as a file object.
+  static auto dup(int fd) -> file;
+
+  // Makes fd be the copy of this file descriptor, closing fd first if
+  // necessary.
+  void dup2(int fd);
+
+  // Makes fd be the copy of this file descriptor, closing fd first if
+  // necessary.
+  void dup2(int fd, std::error_code& ec) noexcept;
+
+  // Creates a buffered_file object associated with this file and detaches
+  // this file object from the file.
+  auto fdopen(const char* mode) -> buffered_file;
+
+#  if defined(_WIN32) && !defined(__MINGW32__)
+  // Opens a file and constructs a file object representing this file by
+  // wcstring_view filename. Windows only.
+  static file open_windows_file(wcstring_view path, int oflag);
+#  endif
+};
+
+struct FMT_API pipe {
+  file read_end;
+  file write_end;
+
+  // Creates a pipe setting up read_end and write_end file objects for reading
+  // and writing respectively.
+  pipe();
+};
+
+// Returns the memory page size.
+auto getpagesize() -> long;
+
+namespace detail {
+
+struct buffer_size {
+  constexpr buffer_size() = default;
+  size_t value = 0;
+  FMT_CONSTEXPR auto operator=(size_t val) const -> buffer_size {
+    auto bs = buffer_size();
+    bs.value = val;
+    return bs;
+  }
+};
+
+struct ostream_params {
+  int oflag = file::WRONLY | file::CREATE | file::TRUNC;
+  size_t buffer_size = BUFSIZ > 32768 ? BUFSIZ : 32768;
+
+  constexpr ostream_params() {}
+
+  template <typename... T>
+  ostream_params(T... params, int new_oflag) : ostream_params(params...) {
+    oflag = new_oflag;
+  }
+
+  template <typename... T>
+  ostream_params(T... params, detail::buffer_size bs)
+      : ostream_params(params...) {
+    this->buffer_size = bs.value;
+  }
+
+// Intel has a bug that results in failure to deduce a constructor
+// for empty parameter packs.
+#  if defined(__INTEL_COMPILER) && __INTEL_COMPILER < 2000
+  ostream_params(int new_oflag) : oflag(new_oflag) {}
+  ostream_params(detail::buffer_size bs) : buffer_size(bs.value) {}
+#  endif
+};
+
+}  // namespace detail
+
+FMT_INLINE_VARIABLE constexpr auto buffer_size = detail::buffer_size();
+
+/// A fast buffered output stream for writing from a single thread. Writing from
+/// multiple threads without external synchronization may result in a data race.
+class ostream : private detail::buffer<char> {
+ private:
+  file file_;
+
+  FMT_API ostream(cstring_view path, const detail::ostream_params& params);
+
+  FMT_API static void grow(buffer<char>& buf, size_t);
+
+ public:
+  FMT_API ostream(ostream&& other) noexcept;
+  FMT_API ~ostream();
+
+  operator writer() {
+    detail::buffer<char>& buf = *this;
+    return buf;
+  }
+
+  inline void flush() {
+    if (size() == 0) return;
+    file_.write(data(), size() * sizeof(data()[0]));
+    clear();
+  }
+
+  template <typename... T>
+  friend auto output_file(cstring_view path, T... params) -> ostream;
+
+  inline void close() {
+    flush();
+    file_.close();
+  }
+
+  /// Formats `args` according to specifications in `fmt` and writes the
+  /// output to the file.
+  template <typename... T> void print(format_string<T...> fmt, T&&... args) {
+    vformat_to(appender(*this), fmt.str, vargs<T...>{{args...}});
+  }
+};
+
+/**
+ * Opens a file for writing. Supported parameters passed in `params`:
+ *
+ * - `<integer>`: Flags passed to [open](
+ *   https://pubs.opengroup.org/onlinepubs/007904875/functions/open.html)
+ *   (`file::WRONLY | file::CREATE | file::TRUNC` by default)
+ * - `buffer_size=<integer>`: Output buffer size
+ *
+ * **Example**:
+ *
+ *     auto out = fmt::output_file("guide.txt");
+ *     out.print("Don't {}", "Panic");
+ */
+template <typename... T>
+inline auto output_file(cstring_view path, T... params) -> ostream {
+  return {path, detail::ostream_params(params...)};
+}
+#endif  // FMT_USE_FCNTL
+
+FMT_END_EXPORT
+FMT_END_NAMESPACE
+
+#endif  // FMT_OS_H_
diff --git a/csrc/vnpu_offload/include/spdlog/fmt/bundled/ostream.h b/csrc/vnpu_offload/include/spdlog/fmt/bundled/ostream.h
new file mode 100644
index 00000000..bf2371b7
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/fmt/bundled/ostream.h
@@ -0,0 +1,167 @@
+// Formatting library for C++ - std::ostream support
+//
+// Copyright (c) 2012 - present, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_OSTREAM_H_
+#define FMT_OSTREAM_H_
+
+#ifndef FMT_MODULE
+#  include <fstream>  // std::filebuf
+#endif
+
+#ifdef _WIN32
+#  ifdef __GLIBCXX__
+#    include <ext/stdio_filebuf.h>
+#    include <ext/stdio_sync_filebuf.h>
+#  endif
+#  include <io.h>
+#endif
+
+#include "chrono.h"  // formatbuf
+
+#ifdef _MSVC_STL_UPDATE
+#  define FMT_MSVC_STL_UPDATE _MSVC_STL_UPDATE
+#elif defined(_MSC_VER) && _MSC_VER < 1912  // VS 15.5
+#  define FMT_MSVC_STL_UPDATE _MSVC_LANG
+#else
+#  define FMT_MSVC_STL_UPDATE 0
+#endif
+
+FMT_BEGIN_NAMESPACE
+namespace detail {
+
+// Generate a unique explicit instantiation in every translation unit using a
+// tag type in an anonymous namespace.
+namespace {
+struct file_access_tag {};
+}  // namespace
+template <typename Tag, typename BufType, FILE* BufType::*FileMemberPtr>
+class file_access {
+  friend auto get_file(BufType& obj) -> FILE* { return obj.*FileMemberPtr; }
+};
+
+#if FMT_MSVC_STL_UPDATE
+template class file_access<file_access_tag, std::filebuf,
+                           &std::filebuf::_Myfile>;
+auto get_file(std::filebuf&) -> FILE*;
+#endif
+
+// Write the content of buf to os.
+// It is a separate function rather than a part of vprint to simplify testing.
+template <typename Char>
+void write_buffer(std::basic_ostream<Char>& os, buffer<Char>& buf) {
+  const Char* buf_data = buf.data();
+  using unsigned_streamsize = make_unsigned_t<std::streamsize>;
+  unsigned_streamsize size = buf.size();
+  unsigned_streamsize max_size = to_unsigned(max_value<std::streamsize>());
+  do {
+    unsigned_streamsize n = size <= max_size ? size : max_size;
+    os.write(buf_data, static_cast<std::streamsize>(n));
+    buf_data += n;
+    size -= n;
+  } while (size != 0);
+}
+
+template <typename T> struct streamed_view {
+  const T& value;
+};
+}  // namespace detail
+
+// Formats an object of type T that has an overloaded ostream operator<<.
+template <typename Char>
+struct basic_ostream_formatter : formatter<basic_string_view<Char>, Char> {
+  void set_debug_format() = delete;
+
+  template <typename T, typename Context>
+  auto format(const T& value, Context& ctx) const -> decltype(ctx.out()) {
+    auto buffer = basic_memory_buffer<Char>();
+    auto&& formatbuf = detail::formatbuf<std::basic_streambuf<Char>>(buffer);
+    auto&& output = std::basic_ostream<Char>(&formatbuf);
+    output.imbue(std::locale::classic());  // The default is always unlocalized.
+    output << value;
+    output.exceptions(std::ios_base::failbit | std::ios_base::badbit);
+    return formatter<basic_string_view<Char>, Char>::format(
+        {buffer.data(), buffer.size()}, ctx);
+  }
+};
+
+using ostream_formatter = basic_ostream_formatter<char>;
+
+template <typename T, typename Char>
+struct formatter<detail::streamed_view<T>, Char>
+    : basic_ostream_formatter<Char> {
+  template <typename Context>
+  auto format(detail::streamed_view<T> view, Context& ctx) const
+      -> decltype(ctx.out()) {
+    return basic_ostream_formatter<Char>::format(view.value, ctx);
+  }
+};
+
+/**
+ * Returns a view that formats `value` via an ostream `operator<<`.
+ *
+ * **Example**:
+ *
+ *     fmt::print("Current thread id: {}\n",
+ *                fmt::streamed(std::this_thread::get_id()));
+ */
+template <typename T>
+constexpr auto streamed(const T& value) -> detail::streamed_view<T> {
+  return {value};
+}
+
+inline void vprint(std::ostream& os, string_view fmt, format_args args) {
+  auto buffer = memory_buffer();
+  detail::vformat_to(buffer, fmt, args);
+  FILE* f = nullptr;
+#if FMT_MSVC_STL_UPDATE && FMT_USE_RTTI
+  if (auto* buf = dynamic_cast<std::filebuf*>(os.rdbuf()))
+    f = detail::get_file(*buf);
+#elif defined(_WIN32) && defined(__GLIBCXX__) && FMT_USE_RTTI
+  auto* rdbuf = os.rdbuf();
+  if (auto* sfbuf = dynamic_cast<__gnu_cxx::stdio_sync_filebuf<char>*>(rdbuf))
+    f = sfbuf->file();
+  else if (auto* fbuf = dynamic_cast<__gnu_cxx::stdio_filebuf<char>*>(rdbuf))
+    f = fbuf->file();
+#endif
+#ifdef _WIN32
+  if (f) {
+    int fd = _fileno(f);
+    if (_isatty(fd)) {
+      os.flush();
+      if (detail::write_console(fd, {buffer.data(), buffer.size()})) return;
+    }
+  }
+#endif
+  detail::ignore_unused(f);
+  detail::write_buffer(os, buffer);
+}
+
+/**
+ * Prints formatted data to the stream `os`.
+ *
+ * **Example**:
+ *
+ *     fmt::print(cerr, "Don't {}!", "panic");
+ */
+FMT_EXPORT template <typename... T>
+void print(std::ostream& os, format_string<T...> fmt, T&&... args) {
+  fmt::vargs<T...> vargs = {{args...}};
+  if (detail::const_check(detail::use_utf8)) return vprint(os, fmt.str, vargs);
+  auto buffer = memory_buffer();
+  detail::vformat_to(buffer, fmt.str, vargs);
+  detail::write_buffer(os, buffer);
+}
+
+FMT_EXPORT template <typename... T>
+void println(std::ostream& os, format_string<T...> fmt, T&&... args) {
+  fmt::print(os, FMT_STRING("{}\n"),
+             fmt::format(fmt, std::forward<T>(args)...));
+}
+
+FMT_END_NAMESPACE
+
+#endif  // FMT_OSTREAM_H_
diff --git a/csrc/vnpu_offload/include/spdlog/fmt/bundled/printf.h b/csrc/vnpu_offload/include/spdlog/fmt/bundled/printf.h
new file mode 100644
index 00000000..cc066d8c
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/fmt/bundled/printf.h
@@ -0,0 +1,624 @@
+// Formatting library for C++ - legacy printf implementation
+//
+// Copyright (c) 2012 - 2016, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_PRINTF_H_
+#define FMT_PRINTF_H_
+
+#ifndef FMT_MODULE
+#  include <algorithm>  // std::find
+#  include <limits>     // std::numeric_limits
+#endif
+
+#include "format.h"
+
+FMT_BEGIN_NAMESPACE
+FMT_BEGIN_EXPORT
+
+template <typename Char> class basic_printf_context {
+ private:
+  basic_appender<Char> out_;
+  basic_format_args<basic_printf_context> args_;
+
+  static_assert(std::is_same<Char, char>::value ||
+                    std::is_same<Char, wchar_t>::value,
+                "Unsupported code unit type.");
+
+ public:
+  using char_type = Char;
+  enum { builtin_types = 1 };
+
+  /// Constructs a `printf_context` object. References to the arguments are
+  /// stored in the context object so make sure they have appropriate lifetimes.
+  basic_printf_context(basic_appender<Char> out,
+                       basic_format_args<basic_printf_context> args)
+      : out_(out), args_(args) {}
+
+  auto out() -> basic_appender<Char> { return out_; }
+  void advance_to(basic_appender<Char>) {}
+
+  auto locale() -> locale_ref { return {}; }
+
+  auto arg(int id) const -> basic_format_arg<basic_printf_context> {
+    return args_.get(id);
+  }
+};
+
+namespace detail {
+
+// Return the result via the out param to workaround gcc bug 77539.
+template <bool IS_CONSTEXPR, typename T, typename Ptr = const T*>
+FMT_CONSTEXPR auto find(Ptr first, Ptr last, T value, Ptr& out) -> bool {
+  for (out = first; out != last; ++out) {
+    if (*out == value) return true;
+  }
+  return false;
+}
+
+template <>
+inline auto find<false, char>(const char* first, const char* last, char value,
+                              const char*& out) -> bool {
+  out =
+      static_cast<const char*>(memchr(first, value, to_unsigned(last - first)));
+  return out != nullptr;
+}
+
+// Checks if a value fits in int - used to avoid warnings about comparing
+// signed and unsigned integers.
+template <bool IS_SIGNED> struct int_checker {
+  template <typename T> static auto fits_in_int(T value) -> bool {
+    return value <= to_unsigned(max_value<int>());
+  }
+  inline static auto fits_in_int(bool) -> bool { return true; }
+};
+
+template <> struct int_checker<true> {
+  template <typename T> static auto fits_in_int(T value) -> bool {
+    return value >= (std::numeric_limits<int>::min)() &&
+           value <= max_value<int>();
+  }
+  inline static auto fits_in_int(int) -> bool { return true; }
+};
+
+struct printf_precision_handler {
+  template <typename T, FMT_ENABLE_IF(std::is_integral<T>::value)>
+  auto operator()(T value) -> int {
+    if (!int_checker<std::numeric_limits<T>::is_signed>::fits_in_int(value))
+      report_error("number is too big");
+    return max_of(static_cast<int>(value), 0);
+  }
+
+  template <typename T, FMT_ENABLE_IF(!std::is_integral<T>::value)>
+  auto operator()(T) -> int {
+    report_error("precision is not integer");
+    return 0;
+  }
+};
+
+// An argument visitor that returns true iff arg is a zero integer.
+struct is_zero_int {
+  template <typename T, FMT_ENABLE_IF(std::is_integral<T>::value)>
+  auto operator()(T value) -> bool {
+    return value == 0;
+  }
+
+  template <typename T, FMT_ENABLE_IF(!std::is_integral<T>::value)>
+  auto operator()(T) -> bool {
+    return false;
+  }
+};
+
+template <typename T> struct make_unsigned_or_bool : std::make_unsigned<T> {};
+
+template <> struct make_unsigned_or_bool<bool> {
+  using type = bool;
+};
+
+template <typename T, typename Context> class arg_converter {
+ private:
+  using char_type = typename Context::char_type;
+
+  basic_format_arg<Context>& arg_;
+  char_type type_;
+
+ public:
+  arg_converter(basic_format_arg<Context>& arg, char_type type)
+      : arg_(arg), type_(type) {}
+
+  void operator()(bool value) {
+    if (type_ != 's') operator()<bool>(value);
+  }
+
+  template <typename U, FMT_ENABLE_IF(std::is_integral<U>::value)>
+  void operator()(U value) {
+    bool is_signed = type_ == 'd' || type_ == 'i';
+    using target_type = conditional_t<std::is_same<T, void>::value, U, T>;
+    if (const_check(sizeof(target_type) <= sizeof(int))) {
+      // Extra casts are used to silence warnings.
+      using unsigned_type = typename make_unsigned_or_bool<target_type>::type;
+      if (is_signed)
+        arg_ = static_cast<int>(static_cast<target_type>(value));
+      else
+        arg_ = static_cast<unsigned>(static_cast<unsigned_type>(value));
+    } else {
+      // glibc's printf doesn't sign extend arguments of smaller types:
+      //   std::printf("%lld", -42);  // prints "4294967254"
+      // but we don't have to do the same because it's a UB.
+      if (is_signed)
+        arg_ = static_cast<long long>(value);
+      else
+        arg_ = static_cast<typename make_unsigned_or_bool<U>::type>(value);
+    }
+  }
+
+  template <typename U, FMT_ENABLE_IF(!std::is_integral<U>::value)>
+  void operator()(U) {}  // No conversion needed for non-integral types.
+};
+
+// Converts an integer argument to T for printf, if T is an integral type.
+// If T is void, the argument is converted to corresponding signed or unsigned
+// type depending on the type specifier: 'd' and 'i' - signed, other -
+// unsigned).
+template <typename T, typename Context, typename Char>
+void convert_arg(basic_format_arg<Context>& arg, Char type) {
+  arg.visit(arg_converter<T, Context>(arg, type));
+}
+
+// Converts an integer argument to char for printf.
+template <typename Context> class char_converter {
+ private:
+  basic_format_arg<Context>& arg_;
+
+ public:
+  explicit char_converter(basic_format_arg<Context>& arg) : arg_(arg) {}
+
+  template <typename T, FMT_ENABLE_IF(std::is_integral<T>::value)>
+  void operator()(T value) {
+    arg_ = static_cast<typename Context::char_type>(value);
+  }
+
+  template <typename T, FMT_ENABLE_IF(!std::is_integral<T>::value)>
+  void operator()(T) {}  // No conversion needed for non-integral types.
+};
+
+// An argument visitor that return a pointer to a C string if argument is a
+// string or null otherwise.
+template <typename Char> struct get_cstring {
+  template <typename T> auto operator()(T) -> const Char* { return nullptr; }
+  auto operator()(const Char* s) -> const Char* { return s; }
+};
+
+// Checks if an argument is a valid printf width specifier and sets
+// left alignment if it is negative.
+class printf_width_handler {
+ private:
+  format_specs& specs_;
+
+ public:
+  inline explicit printf_width_handler(format_specs& specs) : specs_(specs) {}
+
+  template <typename T, FMT_ENABLE_IF(std::is_integral<T>::value)>
+  auto operator()(T value) -> unsigned {
+    auto width = static_cast<uint32_or_64_or_128_t<T>>(value);
+    if (detail::is_negative(value)) {
+      specs_.set_align(align::left);
+      width = 0 - width;
+    }
+    unsigned int_max = to_unsigned(max_value<int>());
+    if (width > int_max) report_error("number is too big");
+    return static_cast<unsigned>(width);
+  }
+
+  template <typename T, FMT_ENABLE_IF(!std::is_integral<T>::value)>
+  auto operator()(T) -> unsigned {
+    report_error("width is not integer");
+    return 0;
+  }
+};
+
+// Workaround for a bug with the XL compiler when initializing
+// printf_arg_formatter's base class.
+template <typename Char>
+auto make_arg_formatter(basic_appender<Char> iter, format_specs& s)
+    -> arg_formatter<Char> {
+  return {iter, s, locale_ref()};
+}
+
+// The `printf` argument formatter.
+template <typename Char>
+class printf_arg_formatter : public arg_formatter<Char> {
+ private:
+  using base = arg_formatter<Char>;
+  using context_type = basic_printf_context<Char>;
+
+  context_type& context_;
+
+  void write_null_pointer(bool is_string = false) {
+    auto s = this->specs;
+    s.set_type(presentation_type::none);
+    write_bytes<Char>(this->out, is_string ? "(null)" : "(nil)", s);
+  }
+
+  template <typename T> void write(T value) {
+    detail::write<Char>(this->out, value, this->specs, this->locale);
+  }
+
+ public:
+  printf_arg_formatter(basic_appender<Char> iter, format_specs& s,
+                       context_type& ctx)
+      : base(make_arg_formatter(iter, s)), context_(ctx) {}
+
+  void operator()(monostate value) { write(value); }
+
+  template <typename T, FMT_ENABLE_IF(detail::is_integral<T>::value)>
+  void operator()(T value) {
+    // MSVC2013 fails to compile separate overloads for bool and Char so use
+    // std::is_same instead.
+    if (!std::is_same<T, Char>::value) {
+      write(value);
+      return;
+    }
+    format_specs s = this->specs;
+    if (s.type() != presentation_type::none &&
+        s.type() != presentation_type::chr) {
+      return (*this)(static_cast<int>(value));
+    }
+    s.set_sign(sign::none);
+    s.clear_alt();
+    s.set_fill(' ');  // Ignore '0' flag for char types.
+    // align::numeric needs to be overwritten here since the '0' flag is
+    // ignored for non-numeric types
+    if (s.align() == align::none || s.align() == align::numeric)
+      s.set_align(align::right);
+    detail::write<Char>(this->out, static_cast<Char>(value), s);
+  }
+
+  template <typename T, FMT_ENABLE_IF(std::is_floating_point<T>::value)>
+  void operator()(T value) {
+    write(value);
+  }
+
+  void operator()(const char* value) {
+    if (value)
+      write(value);
+    else
+      write_null_pointer(this->specs.type() != presentation_type::pointer);
+  }
+
+  void operator()(const wchar_t* value) {
+    if (value)
+      write(value);
+    else
+      write_null_pointer(this->specs.type() != presentation_type::pointer);
+  }
+
+  void operator()(basic_string_view<Char> value) { write(value); }
+
+  void operator()(const void* value) {
+    if (value)
+      write(value);
+    else
+      write_null_pointer();
+  }
+
+  void operator()(typename basic_format_arg<context_type>::handle handle) {
+    auto parse_ctx = parse_context<Char>({});
+    handle.format(parse_ctx, context_);
+  }
+};
+
+template <typename Char>
+void parse_flags(format_specs& specs, const Char*& it, const Char* end) {
+  for (; it != end; ++it) {
+    switch (*it) {
+    case '-': specs.set_align(align::left); break;
+    case '+': specs.set_sign(sign::plus); break;
+    case '0': specs.set_fill('0'); break;
+    case ' ':
+      if (specs.sign() != sign::plus) specs.set_sign(sign::space);
+      break;
+    case '#': specs.set_alt(); break;
+    default:  return;
+    }
+  }
+}
+
+template <typename Char, typename GetArg>
+auto parse_header(const Char*& it, const Char* end, format_specs& specs,
+                  GetArg get_arg) -> int {
+  int arg_index = -1;
+  Char c = *it;
+  if (c >= '0' && c <= '9') {
+    // Parse an argument index (if followed by '$') or a width possibly
+    // preceded with '0' flag(s).
+    int value = parse_nonnegative_int(it, end, -1);
+    if (it != end && *it == '$') {  // value is an argument index
+      ++it;
+      arg_index = value != -1 ? value : max_value<int>();
+    } else {
+      if (c == '0') specs.set_fill('0');
+      if (value != 0) {
+        // Nonzero value means that we parsed width and don't need to
+        // parse it or flags again, so return now.
+        if (value == -1) report_error("number is too big");
+        specs.width = value;
+        return arg_index;
+      }
+    }
+  }
+  parse_flags(specs, it, end);
+  // Parse width.
+  if (it != end) {
+    if (*it >= '0' && *it <= '9') {
+      specs.width = parse_nonnegative_int(it, end, -1);
+      if (specs.width == -1) report_error("number is too big");
+    } else if (*it == '*') {
+      ++it;
+      specs.width = static_cast<int>(
+          get_arg(-1).visit(detail::printf_width_handler(specs)));
+    }
+  }
+  return arg_index;
+}
+
+inline auto parse_printf_presentation_type(char c, type t, bool& upper)
+    -> presentation_type {
+  using pt = presentation_type;
+  constexpr auto integral_set = sint_set | uint_set | bool_set | char_set;
+  switch (c) {
+  case 'd': return in(t, integral_set) ? pt::dec : pt::none;
+  case 'o': return in(t, integral_set) ? pt::oct : pt::none;
+  case 'X': upper = true; FMT_FALLTHROUGH;
+  case 'x': return in(t, integral_set) ? pt::hex : pt::none;
+  case 'E': upper = true; FMT_FALLTHROUGH;
+  case 'e': return in(t, float_set) ? pt::exp : pt::none;
+  case 'F': upper = true; FMT_FALLTHROUGH;
+  case 'f': return in(t, float_set) ? pt::fixed : pt::none;
+  case 'G': upper = true; FMT_FALLTHROUGH;
+  case 'g': return in(t, float_set) ? pt::general : pt::none;
+  case 'A': upper = true; FMT_FALLTHROUGH;
+  case 'a': return in(t, float_set) ? pt::hexfloat : pt::none;
+  case 'c': return in(t, integral_set) ? pt::chr : pt::none;
+  case 's': return in(t, string_set | cstring_set) ? pt::string : pt::none;
+  case 'p': return in(t, pointer_set | cstring_set) ? pt::pointer : pt::none;
+  default:  return pt::none;
+  }
+}
+
+template <typename Char, typename Context>
+void vprintf(buffer<Char>& buf, basic_string_view<Char> format,
+             basic_format_args<Context> args) {
+  using iterator = basic_appender<Char>;
+  auto out = iterator(buf);
+  auto context = basic_printf_context<Char>(out, args);
+  auto parse_ctx = parse_context<Char>(format);
+
+  // Returns the argument with specified index or, if arg_index is -1, the next
+  // argument.
+  auto get_arg = [&](int arg_index) {
+    if (arg_index < 0)
+      arg_index = parse_ctx.next_arg_id();
+    else
+      parse_ctx.check_arg_id(--arg_index);
+    auto arg = context.arg(arg_index);
+    if (!arg) report_error("argument not found");
+    return arg;
+  };
+
+  const Char* start = parse_ctx.begin();
+  const Char* end = parse_ctx.end();
+  auto it = start;
+  while (it != end) {
+    if (!find<false, Char>(it, end, '%', it)) {
+      it = end;  // find leaves it == nullptr if it doesn't find '%'.
+      break;
+    }
+    Char c = *it++;
+    if (it != end && *it == c) {
+      write(out, basic_string_view<Char>(start, to_unsigned(it - start)));
+      start = ++it;
+      continue;
+    }
+    write(out, basic_string_view<Char>(start, to_unsigned(it - 1 - start)));
+
+    auto specs = format_specs();
+    specs.set_align(align::right);
+
+    // Parse argument index, flags and width.
+    int arg_index = parse_header(it, end, specs, get_arg);
+    if (arg_index == 0) report_error("argument not found");
+
+    // Parse precision.
+    if (it != end && *it == '.') {
+      ++it;
+      c = it != end ? *it : 0;
+      if ('0' <= c && c <= '9') {
+        specs.precision = parse_nonnegative_int(it, end, 0);
+      } else if (c == '*') {
+        ++it;
+        specs.precision =
+            static_cast<int>(get_arg(-1).visit(printf_precision_handler()));
+      } else {
+        specs.precision = 0;
+      }
+    }
+
+    auto arg = get_arg(arg_index);
+    // For d, i, o, u, x, and X conversion specifiers, if a precision is
+    // specified, the '0' flag is ignored
+    if (specs.precision >= 0 && is_integral_type(arg.type())) {
+      // Ignore '0' for non-numeric types or if '-' present.
+      specs.set_fill(' ');
+    }
+    if (specs.precision >= 0 && arg.type() == type::cstring_type) {
+      auto str = arg.visit(get_cstring<Char>());
+      auto str_end = str + specs.precision;
+      auto nul = std::find(str, str_end, Char());
+      auto sv = basic_string_view<Char>(
+          str, to_unsigned(nul != str_end ? nul - str : specs.precision));
+      arg = sv;
+    }
+    if (specs.alt() && arg.visit(is_zero_int())) specs.clear_alt();
+    if (specs.fill_unit<Char>() == '0') {
+      if (is_arithmetic_type(arg.type()) && specs.align() != align::left) {
+        specs.set_align(align::numeric);
+      } else {
+        // Ignore '0' flag for non-numeric types or if '-' flag is also present.
+        specs.set_fill(' ');
+      }
+    }
+
+    // Parse length and convert the argument to the required type.
+    c = it != end ? *it++ : 0;
+    Char t = it != end ? *it : 0;
+    switch (c) {
+    case 'h':
+      if (t == 'h') {
+        ++it;
+        t = it != end ? *it : 0;
+        convert_arg<signed char>(arg, t);
+      } else {
+        convert_arg<short>(arg, t);
+      }
+      break;
+    case 'l':
+      if (t == 'l') {
+        ++it;
+        t = it != end ? *it : 0;
+        convert_arg<long long>(arg, t);
+      } else {
+        convert_arg<long>(arg, t);
+      }
+      break;
+    case 'j': convert_arg<intmax_t>(arg, t); break;
+    case 'z': convert_arg<size_t>(arg, t); break;
+    case 't': convert_arg<std::ptrdiff_t>(arg, t); break;
+    case 'L':
+      // printf produces garbage when 'L' is omitted for long double, no
+      // need to do the same.
+      break;
+    default: --it; convert_arg<void>(arg, c);
+    }
+
+    // Parse type.
+    if (it == end) report_error("invalid format string");
+    char type = static_cast<char>(*it++);
+    if (is_integral_type(arg.type())) {
+      // Normalize type.
+      switch (type) {
+      case 'i':
+      case 'u': type = 'd'; break;
+      case 'c':
+        arg.visit(char_converter<basic_printf_context<Char>>(arg));
+        break;
+      }
+    }
+    bool upper = false;
+    specs.set_type(parse_printf_presentation_type(type, arg.type(), upper));
+    if (specs.type() == presentation_type::none)
+      report_error("invalid format specifier");
+    if (upper) specs.set_upper();
+
+    start = it;
+
+    // Format argument.
+    arg.visit(printf_arg_formatter<Char>(out, specs, context));
+  }
+  write(out, basic_string_view<Char>(start, to_unsigned(it - start)));
+}
+}  // namespace detail
+
+using printf_context = basic_printf_context<char>;
+using wprintf_context = basic_printf_context<wchar_t>;
+
+using printf_args = basic_format_args<printf_context>;
+using wprintf_args = basic_format_args<wprintf_context>;
+
+/// Constructs an `format_arg_store` object that contains references to
+/// arguments and can be implicitly converted to `printf_args`.
+template <typename Char = char, typename... T>
+inline auto make_printf_args(T&... args)
+    -> decltype(fmt::make_format_args<basic_printf_context<Char>>(args...)) {
+  return fmt::make_format_args<basic_printf_context<Char>>(args...);
+}
+
+template <typename Char> struct vprintf_args {
+  using type = basic_format_args<basic_printf_context<Char>>;
+};
+
+template <typename Char>
+inline auto vsprintf(basic_string_view<Char> fmt,
+                     typename vprintf_args<Char>::type args)
+    -> std::basic_string<Char> {
+  auto buf = basic_memory_buffer<Char>();
+  detail::vprintf(buf, fmt, args);
+  return {buf.data(), buf.size()};
+}
+
+/**
+ * Formats `args` according to specifications in `fmt` and returns the result
+ * as as string.
+ *
+ * **Example**:
+ *
+ *     std::string message = fmt::sprintf("The answer is %d", 42);
+ */
+template <typename... T>
+inline auto sprintf(string_view fmt, const T&... args) -> std::string {
+  return vsprintf(fmt, make_printf_args(args...));
+}
+template <typename... T>
+FMT_DEPRECATED auto sprintf(basic_string_view<wchar_t> fmt, const T&... args)
+    -> std::wstring {
+  return vsprintf(fmt, make_printf_args<wchar_t>(args...));
+}
+
+template <typename Char>
+auto vfprintf(std::FILE* f, basic_string_view<Char> fmt,
+              typename vprintf_args<Char>::type args) -> int {
+  auto buf = basic_memory_buffer<Char>();
+  detail::vprintf(buf, fmt, args);
+  size_t size = buf.size();
+  return std::fwrite(buf.data(), sizeof(Char), size, f) < size
+             ? -1
+             : static_cast<int>(size);
+}
+
+/**
+ * Formats `args` according to specifications in `fmt` and writes the output
+ * to `f`.
+ *
+ * **Example**:
+ *
+ *     fmt::fprintf(stderr, "Don't %s!", "panic");
+ */
+template <typename... T>
+inline auto fprintf(std::FILE* f, string_view fmt, const T&... args) -> int {
+  return vfprintf(f, fmt, make_printf_args(args...));
+}
+template <typename... T>
+FMT_DEPRECATED auto fprintf(std::FILE* f, basic_string_view<wchar_t> fmt,
+                            const T&... args) -> int {
+  return vfprintf(f, fmt, make_printf_args<wchar_t>(args...));
+}
+
+/**
+ * Formats `args` according to specifications in `fmt` and writes the output
+ * to `stdout`.
+ *
+ * **Example**:
+ *
+ *   fmt::printf("Elapsed time: %.2f seconds", 1.23);
+ */
+template <typename... T>
+inline auto printf(string_view fmt, const T&... args) -> int {
+  return vfprintf(stdout, fmt, make_printf_args(args...));
+}
+
+FMT_END_EXPORT
+FMT_END_NAMESPACE
+
+#endif  // FMT_PRINTF_H_
diff --git a/csrc/vnpu_offload/include/spdlog/fmt/bundled/ranges.h b/csrc/vnpu_offload/include/spdlog/fmt/bundled/ranges.h
new file mode 100644
index 00000000..36b38e29
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/fmt/bundled/ranges.h
@@ -0,0 +1,851 @@
+// Formatting library for C++ - range and tuple support
+//
+// Copyright (c) 2012 - present, Victor Zverovich and {fmt} contributors
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_RANGES_H_
+#define FMT_RANGES_H_
+
+#ifndef FMT_MODULE
+#  include <initializer_list>
+#  include <iterator>
+#  include <tuple>
+#  include <type_traits>
+#  include <utility>
+#endif
+
+#include "format.h"
+
+#if FMT_HAS_CPP_ATTRIBUTE(clang::lifetimebound)
+#  define FMT_LIFETIMEBOUND [[clang::lifetimebound]]
+#else
+#  define FMT_LIFETIMEBOUND
+#endif
+FMT_PRAGMA_CLANG(diagnostic error "-Wreturn-stack-address")
+
+FMT_BEGIN_NAMESPACE
+
+FMT_EXPORT
+enum class range_format { disabled, map, set, sequence, string, debug_string };
+
+namespace detail {
+
+template <typename T> class is_map {
+  template <typename U> static auto check(U*) -> typename U::mapped_type;
+  template <typename> static void check(...);
+
+ public:
+  static constexpr bool value =
+      !std::is_void<decltype(check<T>(nullptr))>::value;
+};
+
+template <typename T> class is_set {
+  template <typename U> static auto check(U*) -> typename U::key_type;
+  template <typename> static void check(...);
+
+ public:
+  static constexpr bool value =
+      !std::is_void<decltype(check<T>(nullptr))>::value && !is_map<T>::value;
+};
+
+// C array overload
+template <typename T, size_t N>
+auto range_begin(const T (&arr)[N]) -> const T* {
+  return arr;
+}
+template <typename T, size_t N> auto range_end(const T (&arr)[N]) -> const T* {
+  return arr + N;
+}
+
+template <typename T, typename Enable = void>
+struct has_member_fn_begin_end_t : std::false_type {};
+
+template <typename T>
+struct has_member_fn_begin_end_t<T, void_t<decltype(*std::declval<T>().begin()),
+                                           decltype(std::declval<T>().end())>>
+    : std::true_type {};
+
+// Member function overloads.
+template <typename T>
+auto range_begin(T&& rng) -> decltype(static_cast<T&&>(rng).begin()) {
+  return static_cast<T&&>(rng).begin();
+}
+template <typename T>
+auto range_end(T&& rng) -> decltype(static_cast<T&&>(rng).end()) {
+  return static_cast<T&&>(rng).end();
+}
+
+// ADL overloads. Only participate in overload resolution if member functions
+// are not found.
+template <typename T>
+auto range_begin(T&& rng)
+    -> enable_if_t<!has_member_fn_begin_end_t<T&&>::value,
+                   decltype(begin(static_cast<T&&>(rng)))> {
+  return begin(static_cast<T&&>(rng));
+}
+template <typename T>
+auto range_end(T&& rng) -> enable_if_t<!has_member_fn_begin_end_t<T&&>::value,
+                                       decltype(end(static_cast<T&&>(rng)))> {
+  return end(static_cast<T&&>(rng));
+}
+
+template <typename T, typename Enable = void>
+struct has_const_begin_end : std::false_type {};
+template <typename T, typename Enable = void>
+struct has_mutable_begin_end : std::false_type {};
+
+template <typename T>
+struct has_const_begin_end<
+    T, void_t<decltype(*detail::range_begin(
+                  std::declval<const remove_cvref_t<T>&>())),
+              decltype(detail::range_end(
+                  std::declval<const remove_cvref_t<T>&>()))>>
+    : std::true_type {};
+
+template <typename T>
+struct has_mutable_begin_end<
+    T, void_t<decltype(*detail::range_begin(std::declval<T&>())),
+              decltype(detail::range_end(std::declval<T&>())),
+              // the extra int here is because older versions of MSVC don't
+              // SFINAE properly unless there are distinct types
+              int>> : std::true_type {};
+
+template <typename T, typename _ = void> struct is_range_ : std::false_type {};
+template <typename T>
+struct is_range_<T, void>
+    : std::integral_constant<bool, (has_const_begin_end<T>::value ||
+                                    has_mutable_begin_end<T>::value)> {};
+
+// tuple_size and tuple_element check.
+template <typename T> class is_tuple_like_ {
+  template <typename U, typename V = typename std::remove_cv<U>::type>
+  static auto check(U* p) -> decltype(std::tuple_size<V>::value, 0);
+  template <typename> static void check(...);
+
+ public:
+  static constexpr bool value =
+      !std::is_void<decltype(check<T>(nullptr))>::value;
+};
+
+// Check for integer_sequence
+#if defined(__cpp_lib_integer_sequence) || FMT_MSC_VERSION >= 1900
+template <typename T, T... N>
+using integer_sequence = std::integer_sequence<T, N...>;
+template <size_t... N> using index_sequence = std::index_sequence<N...>;
+template <size_t N> using make_index_sequence = std::make_index_sequence<N>;
+#else
+template <typename T, T... N> struct integer_sequence {
+  using value_type = T;
+
+  static FMT_CONSTEXPR auto size() -> size_t { return sizeof...(N); }
+};
+
+template <size_t... N> using index_sequence = integer_sequence<size_t, N...>;
+
+template <typename T, size_t N, T... Ns>
+struct make_integer_sequence : make_integer_sequence<T, N - 1, N - 1, Ns...> {};
+template <typename T, T... Ns>
+struct make_integer_sequence<T, 0, Ns...> : integer_sequence<T, Ns...> {};
+
+template <size_t N>
+using make_index_sequence = make_integer_sequence<size_t, N>;
+#endif
+
+template <typename T>
+using tuple_index_sequence = make_index_sequence<std::tuple_size<T>::value>;
+
+template <typename T, typename C, bool = is_tuple_like_<T>::value>
+class is_tuple_formattable_ {
+ public:
+  static constexpr bool value = false;
+};
+template <typename T, typename C> class is_tuple_formattable_<T, C, true> {
+  template <size_t... Is>
+  static auto all_true(index_sequence<Is...>,
+                       integer_sequence<bool, (Is >= 0)...>) -> std::true_type;
+  static auto all_true(...) -> std::false_type;
+
+  template <size_t... Is>
+  static auto check(index_sequence<Is...>) -> decltype(all_true(
+      index_sequence<Is...>{},
+      integer_sequence<bool,
+                       (is_formattable<typename std::tuple_element<Is, T>::type,
+                                       C>::value)...>{}));
+
+ public:
+  static constexpr bool value =
+      decltype(check(tuple_index_sequence<T>{}))::value;
+};
+
+template <typename Tuple, typename F, size_t... Is>
+FMT_CONSTEXPR void for_each(index_sequence<Is...>, Tuple&& t, F&& f) {
+  using std::get;
+  // Using a free function get<Is>(Tuple) now.
+  const int unused[] = {0, ((void)f(get<Is>(t)), 0)...};
+  ignore_unused(unused);
+}
+
+template <typename Tuple, typename F>
+FMT_CONSTEXPR void for_each(Tuple&& t, F&& f) {
+  for_each(tuple_index_sequence<remove_cvref_t<Tuple>>(),
+           std::forward<Tuple>(t), std::forward<F>(f));
+}
+
+template <typename Tuple1, typename Tuple2, typename F, size_t... Is>
+void for_each2(index_sequence<Is...>, Tuple1&& t1, Tuple2&& t2, F&& f) {
+  using std::get;
+  const int unused[] = {0, ((void)f(get<Is>(t1), get<Is>(t2)), 0)...};
+  ignore_unused(unused);
+}
+
+template <typename Tuple1, typename Tuple2, typename F>
+void for_each2(Tuple1&& t1, Tuple2&& t2, F&& f) {
+  for_each2(tuple_index_sequence<remove_cvref_t<Tuple1>>(),
+            std::forward<Tuple1>(t1), std::forward<Tuple2>(t2),
+            std::forward<F>(f));
+}
+
+namespace tuple {
+// Workaround a bug in MSVC 2019 (v140).
+template <typename Char, typename... T>
+using result_t = std::tuple<formatter<remove_cvref_t<T>, Char>...>;
+
+using std::get;
+template <typename Tuple, typename Char, size_t... Is>
+auto get_formatters(index_sequence<Is...>)
+    -> result_t<Char, decltype(get<Is>(std::declval<Tuple>()))...>;
+}  // namespace tuple
+
+#if FMT_MSC_VERSION && FMT_MSC_VERSION < 1920
+// Older MSVC doesn't get the reference type correctly for arrays.
+template <typename R> struct range_reference_type_impl {
+  using type = decltype(*detail::range_begin(std::declval<R&>()));
+};
+
+template <typename T, size_t N> struct range_reference_type_impl<T[N]> {
+  using type = T&;
+};
+
+template <typename T>
+using range_reference_type = typename range_reference_type_impl<T>::type;
+#else
+template <typename Range>
+using range_reference_type =
+    decltype(*detail::range_begin(std::declval<Range&>()));
+#endif
+
+// We don't use the Range's value_type for anything, but we do need the Range's
+// reference type, with cv-ref stripped.
+template <typename Range>
+using uncvref_type = remove_cvref_t<range_reference_type<Range>>;
+
+template <typename T>
+struct range_format_kind_
+    : std::integral_constant<range_format,
+                             std::is_same<uncvref_type<T>, T>::value
+                                 ? range_format::disabled
+                             : is_map<T>::value ? range_format::map
+                             : is_set<T>::value ? range_format::set
+                                                : range_format::sequence> {};
+
+template <range_format K>
+using range_format_constant = std::integral_constant<range_format, K>;
+
+// These are not generic lambdas for compatibility with C++11.
+template <typename Char> struct parse_empty_specs {
+  template <typename Formatter> FMT_CONSTEXPR void operator()(Formatter& f) {
+    f.parse(ctx);
+    detail::maybe_set_debug_format(f, true);
+  }
+  parse_context<Char>& ctx;
+};
+template <typename FormatContext> struct format_tuple_element {
+  using char_type = typename FormatContext::char_type;
+
+  template <typename T>
+  void operator()(const formatter<T, char_type>& f, const T& v) {
+    if (i > 0) ctx.advance_to(detail::copy<char_type>(separator, ctx.out()));
+    ctx.advance_to(f.format(v, ctx));
+    ++i;
+  }
+
+  int i;
+  FormatContext& ctx;
+  basic_string_view<char_type> separator;
+};
+
+}  // namespace detail
+
+FMT_EXPORT
+template <typename T> struct is_tuple_like {
+  static constexpr bool value =
+      detail::is_tuple_like_<T>::value && !detail::is_range_<T>::value;
+};
+
+FMT_EXPORT
+template <typename T, typename C> struct is_tuple_formattable {
+  static constexpr bool value = detail::is_tuple_formattable_<T, C>::value;
+};
+
+template <typename Tuple, typename Char>
+struct formatter<Tuple, Char,
+                 enable_if_t<fmt::is_tuple_like<Tuple>::value &&
+                             fmt::is_tuple_formattable<Tuple, Char>::value>> {
+ private:
+  decltype(detail::tuple::get_formatters<Tuple, Char>(
+      detail::tuple_index_sequence<Tuple>())) formatters_;
+
+  basic_string_view<Char> separator_ = detail::string_literal<Char, ',', ' '>{};
+  basic_string_view<Char> opening_bracket_ =
+      detail::string_literal<Char, '('>{};
+  basic_string_view<Char> closing_bracket_ =
+      detail::string_literal<Char, ')'>{};
+
+ public:
+  FMT_CONSTEXPR formatter() {}
+
+  FMT_CONSTEXPR void set_separator(basic_string_view<Char> sep) {
+    separator_ = sep;
+  }
+
+  FMT_CONSTEXPR void set_brackets(basic_string_view<Char> open,
+                                  basic_string_view<Char> close) {
+    opening_bracket_ = open;
+    closing_bracket_ = close;
+  }
+
+  FMT_CONSTEXPR auto parse(parse_context<Char>& ctx) -> const Char* {
+    auto it = ctx.begin();
+    auto end = ctx.end();
+    if (it != end && detail::to_ascii(*it) == 'n') {
+      ++it;
+      set_brackets({}, {});
+      set_separator({});
+    }
+    if (it != end && *it != '}') report_error("invalid format specifier");
+    ctx.advance_to(it);
+    detail::for_each(formatters_, detail::parse_empty_specs<Char>{ctx});
+    return it;
+  }
+
+  template <typename FormatContext>
+  auto format(const Tuple& value, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    ctx.advance_to(detail::copy<Char>(opening_bracket_, ctx.out()));
+    detail::for_each2(
+        formatters_, value,
+        detail::format_tuple_element<FormatContext>{0, ctx, separator_});
+    return detail::copy<Char>(closing_bracket_, ctx.out());
+  }
+};
+
+FMT_EXPORT
+template <typename T, typename Char> struct is_range {
+  static constexpr bool value =
+      detail::is_range_<T>::value && !detail::has_to_string_view<T>::value;
+};
+
+namespace detail {
+
+template <typename Char, typename Element>
+using range_formatter_type = formatter<remove_cvref_t<Element>, Char>;
+
+template <typename R>
+using maybe_const_range =
+    conditional_t<has_const_begin_end<R>::value, const R, R>;
+
+template <typename R, typename Char>
+struct is_formattable_delayed
+    : is_formattable<uncvref_type<maybe_const_range<R>>, Char> {};
+}  // namespace detail
+
+template <typename...> struct conjunction : std::true_type {};
+template <typename P> struct conjunction<P> : P {};
+template <typename P1, typename... Pn>
+struct conjunction<P1, Pn...>
+    : conditional_t<bool(P1::value), conjunction<Pn...>, P1> {};
+
+FMT_EXPORT
+template <typename T, typename Char, typename Enable = void>
+struct range_formatter;
+
+template <typename T, typename Char>
+struct range_formatter<
+    T, Char,
+    enable_if_t<conjunction<std::is_same<T, remove_cvref_t<T>>,
+                            is_formattable<T, Char>>::value>> {
+ private:
+  detail::range_formatter_type<Char, T> underlying_;
+  basic_string_view<Char> separator_ = detail::string_literal<Char, ',', ' '>{};
+  basic_string_view<Char> opening_bracket_ =
+      detail::string_literal<Char, '['>{};
+  basic_string_view<Char> closing_bracket_ =
+      detail::string_literal<Char, ']'>{};
+  bool is_debug = false;
+
+  template <typename Output, typename It, typename Sentinel, typename U = T,
+            FMT_ENABLE_IF(std::is_same<U, Char>::value)>
+  auto write_debug_string(Output& out, It it, Sentinel end) const -> Output {
+    auto buf = basic_memory_buffer<Char>();
+    for (; it != end; ++it) buf.push_back(*it);
+    auto specs = format_specs();
+    specs.set_type(presentation_type::debug);
+    return detail::write<Char>(
+        out, basic_string_view<Char>(buf.data(), buf.size()), specs);
+  }
+
+  template <typename Output, typename It, typename Sentinel, typename U = T,
+            FMT_ENABLE_IF(!std::is_same<U, Char>::value)>
+  auto write_debug_string(Output& out, It, Sentinel) const -> Output {
+    return out;
+  }
+
+ public:
+  FMT_CONSTEXPR range_formatter() {}
+
+  FMT_CONSTEXPR auto underlying() -> detail::range_formatter_type<Char, T>& {
+    return underlying_;
+  }
+
+  FMT_CONSTEXPR void set_separator(basic_string_view<Char> sep) {
+    separator_ = sep;
+  }
+
+  FMT_CONSTEXPR void set_brackets(basic_string_view<Char> open,
+                                  basic_string_view<Char> close) {
+    opening_bracket_ = open;
+    closing_bracket_ = close;
+  }
+
+  FMT_CONSTEXPR auto parse(parse_context<Char>& ctx) -> const Char* {
+    auto it = ctx.begin();
+    auto end = ctx.end();
+    detail::maybe_set_debug_format(underlying_, true);
+    if (it == end) return underlying_.parse(ctx);
+
+    switch (detail::to_ascii(*it)) {
+    case 'n':
+      set_brackets({}, {});
+      ++it;
+      break;
+    case '?':
+      is_debug = true;
+      set_brackets({}, {});
+      ++it;
+      if (it == end || *it != 's') report_error("invalid format specifier");
+      FMT_FALLTHROUGH;
+    case 's':
+      if (!std::is_same<T, Char>::value)
+        report_error("invalid format specifier");
+      if (!is_debug) {
+        set_brackets(detail::string_literal<Char, '"'>{},
+                     detail::string_literal<Char, '"'>{});
+        set_separator({});
+        detail::maybe_set_debug_format(underlying_, false);
+      }
+      ++it;
+      return it;
+    }
+
+    if (it != end && *it != '}') {
+      if (*it != ':') report_error("invalid format specifier");
+      detail::maybe_set_debug_format(underlying_, false);
+      ++it;
+    }
+
+    ctx.advance_to(it);
+    return underlying_.parse(ctx);
+  }
+
+  template <typename R, typename FormatContext>
+  auto format(R&& range, FormatContext& ctx) const -> decltype(ctx.out()) {
+    auto out = ctx.out();
+    auto it = detail::range_begin(range);
+    auto end = detail::range_end(range);
+    if (is_debug) return write_debug_string(out, std::move(it), end);
+
+    out = detail::copy<Char>(opening_bracket_, out);
+    int i = 0;
+    for (; it != end; ++it) {
+      if (i > 0) out = detail::copy<Char>(separator_, out);
+      ctx.advance_to(out);
+      auto&& item = *it;  // Need an lvalue
+      out = underlying_.format(item, ctx);
+      ++i;
+    }
+    out = detail::copy<Char>(closing_bracket_, out);
+    return out;
+  }
+};
+
+FMT_EXPORT
+template <typename T, typename Char, typename Enable = void>
+struct range_format_kind
+    : conditional_t<
+          is_range<T, Char>::value, detail::range_format_kind_<T>,
+          std::integral_constant<range_format, range_format::disabled>> {};
+
+template <typename R, typename Char>
+struct formatter<
+    R, Char,
+    enable_if_t<conjunction<
+        bool_constant<
+            range_format_kind<R, Char>::value != range_format::disabled &&
+            range_format_kind<R, Char>::value != range_format::map &&
+            range_format_kind<R, Char>::value != range_format::string &&
+            range_format_kind<R, Char>::value != range_format::debug_string>,
+        detail::is_formattable_delayed<R, Char>>::value>> {
+ private:
+  using range_type = detail::maybe_const_range<R>;
+  range_formatter<detail::uncvref_type<range_type>, Char> range_formatter_;
+
+ public:
+  using nonlocking = void;
+
+  FMT_CONSTEXPR formatter() {
+    if (detail::const_check(range_format_kind<R, Char>::value !=
+                            range_format::set))
+      return;
+    range_formatter_.set_brackets(detail::string_literal<Char, '{'>{},
+                                  detail::string_literal<Char, '}'>{});
+  }
+
+  FMT_CONSTEXPR auto parse(parse_context<Char>& ctx) -> const Char* {
+    return range_formatter_.parse(ctx);
+  }
+
+  template <typename FormatContext>
+  auto format(range_type& range, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    return range_formatter_.format(range, ctx);
+  }
+};
+
+// A map formatter.
+template <typename R, typename Char>
+struct formatter<
+    R, Char,
+    enable_if_t<conjunction<
+        bool_constant<range_format_kind<R, Char>::value == range_format::map>,
+        detail::is_formattable_delayed<R, Char>>::value>> {
+ private:
+  using map_type = detail::maybe_const_range<R>;
+  using element_type = detail::uncvref_type<map_type>;
+
+  decltype(detail::tuple::get_formatters<element_type, Char>(
+      detail::tuple_index_sequence<element_type>())) formatters_;
+  bool no_delimiters_ = false;
+
+ public:
+  FMT_CONSTEXPR formatter() {}
+
+  FMT_CONSTEXPR auto parse(parse_context<Char>& ctx) -> const Char* {
+    auto it = ctx.begin();
+    auto end = ctx.end();
+    if (it != end) {
+      if (detail::to_ascii(*it) == 'n') {
+        no_delimiters_ = true;
+        ++it;
+      }
+      if (it != end && *it != '}') {
+        if (*it != ':') report_error("invalid format specifier");
+        ++it;
+      }
+      ctx.advance_to(it);
+    }
+    detail::for_each(formatters_, detail::parse_empty_specs<Char>{ctx});
+    return it;
+  }
+
+  template <typename FormatContext>
+  auto format(map_type& map, FormatContext& ctx) const -> decltype(ctx.out()) {
+    auto out = ctx.out();
+    basic_string_view<Char> open = detail::string_literal<Char, '{'>{};
+    if (!no_delimiters_) out = detail::copy<Char>(open, out);
+    int i = 0;
+    basic_string_view<Char> sep = detail::string_literal<Char, ',', ' '>{};
+    for (auto&& value : map) {
+      if (i > 0) out = detail::copy<Char>(sep, out);
+      ctx.advance_to(out);
+      detail::for_each2(formatters_, value,
+                        detail::format_tuple_element<FormatContext>{
+                            0, ctx, detail::string_literal<Char, ':', ' '>{}});
+      ++i;
+    }
+    basic_string_view<Char> close = detail::string_literal<Char, '}'>{};
+    if (!no_delimiters_) out = detail::copy<Char>(close, out);
+    return out;
+  }
+};
+
+// A (debug_)string formatter.
+template <typename R, typename Char>
+struct formatter<
+    R, Char,
+    enable_if_t<range_format_kind<R, Char>::value == range_format::string ||
+                range_format_kind<R, Char>::value ==
+                    range_format::debug_string>> {
+ private:
+  using range_type = detail::maybe_const_range<R>;
+  using string_type =
+      conditional_t<std::is_constructible<
+                        detail::std_string_view<Char>,
+                        decltype(detail::range_begin(std::declval<R>())),
+                        decltype(detail::range_end(std::declval<R>()))>::value,
+                    detail::std_string_view<Char>, std::basic_string<Char>>;
+
+  formatter<string_type, Char> underlying_;
+
+ public:
+  FMT_CONSTEXPR auto parse(parse_context<Char>& ctx) -> const Char* {
+    return underlying_.parse(ctx);
+  }
+
+  template <typename FormatContext>
+  auto format(range_type& range, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    auto out = ctx.out();
+    if (detail::const_check(range_format_kind<R, Char>::value ==
+                            range_format::debug_string))
+      *out++ = '"';
+    out = underlying_.format(
+        string_type{detail::range_begin(range), detail::range_end(range)}, ctx);
+    if (detail::const_check(range_format_kind<R, Char>::value ==
+                            range_format::debug_string))
+      *out++ = '"';
+    return out;
+  }
+};
+
+template <typename It, typename Sentinel, typename Char = char>
+struct join_view : detail::view {
+  It begin;
+  Sentinel end;
+  basic_string_view<Char> sep;
+
+  join_view(It b, Sentinel e, basic_string_view<Char> s)
+      : begin(std::move(b)), end(e), sep(s) {}
+};
+
+template <typename It, typename Sentinel, typename Char>
+struct formatter<join_view<It, Sentinel, Char>, Char> {
+ private:
+  using value_type =
+#ifdef __cpp_lib_ranges
+      std::iter_value_t<It>;
+#else
+      typename std::iterator_traits<It>::value_type;
+#endif
+  formatter<remove_cvref_t<value_type>, Char> value_formatter_;
+
+  using view = conditional_t<std::is_copy_constructible<It>::value,
+                             const join_view<It, Sentinel, Char>,
+                             join_view<It, Sentinel, Char>>;
+
+ public:
+  using nonlocking = void;
+
+  FMT_CONSTEXPR auto parse(parse_context<Char>& ctx) -> const Char* {
+    return value_formatter_.parse(ctx);
+  }
+
+  template <typename FormatContext>
+  auto format(view& value, FormatContext& ctx) const -> decltype(ctx.out()) {
+    using iter =
+        conditional_t<std::is_copy_constructible<view>::value, It, It&>;
+    iter it = value.begin;
+    auto out = ctx.out();
+    if (it == value.end) return out;
+    out = value_formatter_.format(*it, ctx);
+    ++it;
+    while (it != value.end) {
+      out = detail::copy<Char>(value.sep.begin(), value.sep.end(), out);
+      ctx.advance_to(out);
+      out = value_formatter_.format(*it, ctx);
+      ++it;
+    }
+    return out;
+  }
+};
+
+FMT_EXPORT
+template <typename Tuple, typename Char> struct tuple_join_view : detail::view {
+  const Tuple& tuple;
+  basic_string_view<Char> sep;
+
+  tuple_join_view(const Tuple& t, basic_string_view<Char> s)
+      : tuple(t), sep{s} {}
+};
+
+// Define FMT_TUPLE_JOIN_SPECIFIERS to enable experimental format specifiers
+// support in tuple_join. It is disabled by default because of issues with
+// the dynamic width and precision.
+#ifndef FMT_TUPLE_JOIN_SPECIFIERS
+#  define FMT_TUPLE_JOIN_SPECIFIERS 0
+#endif
+
+template <typename Tuple, typename Char>
+struct formatter<tuple_join_view<Tuple, Char>, Char,
+                 enable_if_t<is_tuple_like<Tuple>::value>> {
+  FMT_CONSTEXPR auto parse(parse_context<Char>& ctx) -> const Char* {
+    return do_parse(ctx, std::tuple_size<Tuple>());
+  }
+
+  template <typename FormatContext>
+  auto format(const tuple_join_view<Tuple, Char>& value,
+              FormatContext& ctx) const -> typename FormatContext::iterator {
+    return do_format(value, ctx, std::tuple_size<Tuple>());
+  }
+
+ private:
+  decltype(detail::tuple::get_formatters<Tuple, Char>(
+      detail::tuple_index_sequence<Tuple>())) formatters_;
+
+  FMT_CONSTEXPR auto do_parse(parse_context<Char>& ctx,
+                              std::integral_constant<size_t, 0>)
+      -> const Char* {
+    return ctx.begin();
+  }
+
+  template <size_t N>
+  FMT_CONSTEXPR auto do_parse(parse_context<Char>& ctx,
+                              std::integral_constant<size_t, N>)
+      -> const Char* {
+    auto end = ctx.begin();
+#if FMT_TUPLE_JOIN_SPECIFIERS
+    end = std::get<std::tuple_size<Tuple>::value - N>(formatters_).parse(ctx);
+    if (N > 1) {
+      auto end1 = do_parse(ctx, std::integral_constant<size_t, N - 1>());
+      if (end != end1)
+        report_error("incompatible format specs for tuple elements");
+    }
+#endif
+    return end;
+  }
+
+  template <typename FormatContext>
+  auto do_format(const tuple_join_view<Tuple, Char>&, FormatContext& ctx,
+                 std::integral_constant<size_t, 0>) const ->
+      typename FormatContext::iterator {
+    return ctx.out();
+  }
+
+  template <typename FormatContext, size_t N>
+  auto do_format(const tuple_join_view<Tuple, Char>& value, FormatContext& ctx,
+                 std::integral_constant<size_t, N>) const ->
+      typename FormatContext::iterator {
+    using std::get;
+    auto out =
+        std::get<std::tuple_size<Tuple>::value - N>(formatters_)
+            .format(get<std::tuple_size<Tuple>::value - N>(value.tuple), ctx);
+    if (N <= 1) return out;
+    out = detail::copy<Char>(value.sep, out);
+    ctx.advance_to(out);
+    return do_format(value, ctx, std::integral_constant<size_t, N - 1>());
+  }
+};
+
+namespace detail {
+// Check if T has an interface like a container adaptor (e.g. std::stack,
+// std::queue, std::priority_queue).
+template <typename T> class is_container_adaptor_like {
+  template <typename U> static auto check(U* p) -> typename U::container_type;
+  template <typename> static void check(...);
+
+ public:
+  static constexpr bool value =
+      !std::is_void<decltype(check<T>(nullptr))>::value;
+};
+
+template <typename Container> struct all {
+  const Container& c;
+  auto begin() const -> typename Container::const_iterator { return c.begin(); }
+  auto end() const -> typename Container::const_iterator { return c.end(); }
+};
+}  // namespace detail
+
+template <typename T, typename Char>
+struct formatter<
+    T, Char,
+    enable_if_t<conjunction<detail::is_container_adaptor_like<T>,
+                            bool_constant<range_format_kind<T, Char>::value ==
+                                          range_format::disabled>>::value>>
+    : formatter<detail::all<typename T::container_type>, Char> {
+  using all = detail::all<typename T::container_type>;
+  template <typename FormatContext>
+  auto format(const T& value, FormatContext& ctx) const -> decltype(ctx.out()) {
+    struct getter : T {
+      static auto get(const T& v) -> all {
+        return {v.*(&getter::c)};  // Access c through the derived class.
+      }
+    };
+    return formatter<all>::format(getter::get(value), ctx);
+  }
+};
+
+FMT_BEGIN_EXPORT
+
+/// Returns a view that formats the iterator range `[begin, end)` with elements
+/// separated by `sep`.
+template <typename It, typename Sentinel>
+auto join(It begin, Sentinel end, string_view sep) -> join_view<It, Sentinel> {
+  return {std::move(begin), end, sep};
+}
+
+/**
+ * Returns a view that formats `range` with elements separated by `sep`.
+ *
+ * **Example**:
+ *
+ *     auto v = std::vector<int>{1, 2, 3};
+ *     fmt::print("{}", fmt::join(v, ", "));
+ *     // Output: 1, 2, 3
+ *
+ * `fmt::join` applies passed format specifiers to the range elements:
+ *
+ *     fmt::print("{:02}", fmt::join(v, ", "));
+ *     // Output: 01, 02, 03
+ */
+template <typename Range, FMT_ENABLE_IF(!is_tuple_like<Range>::value)>
+auto join(Range&& r, string_view sep)
+    -> join_view<decltype(detail::range_begin(r)),
+                 decltype(detail::range_end(r))> {
+  return {detail::range_begin(r), detail::range_end(r), sep};
+}
+
+/**
+ * Returns an object that formats `std::tuple` with elements separated by `sep`.
+ *
+ * **Example**:
+ *
+ *     auto t = std::tuple<int, char>(1, 'a');
+ *     fmt::print("{}", fmt::join(t, ", "));
+ *     // Output: 1, a
+ */
+template <typename Tuple, FMT_ENABLE_IF(is_tuple_like<Tuple>::value)>
+FMT_CONSTEXPR auto join(const Tuple& tuple FMT_LIFETIMEBOUND, string_view sep)
+    -> tuple_join_view<Tuple, char> {
+  return {tuple, sep};
+}
+
+/**
+ * Returns an object that formats `std::initializer_list` with elements
+ * separated by `sep`.
+ *
+ * **Example**:
+ *
+ *     fmt::print("{}", fmt::join({1, 2, 3}, ", "));
+ *     // Output: "1, 2, 3"
+ */
+template <typename T>
+auto join(std::initializer_list<T> list, string_view sep)
+    -> join_view<const T*, const T*> {
+  return join(std::begin(list), std::end(list), sep);
+}
+
+FMT_END_EXPORT
+FMT_END_NAMESPACE
+
+#endif  // FMT_RANGES_H_
diff --git a/csrc/vnpu_offload/include/spdlog/fmt/bundled/std.h b/csrc/vnpu_offload/include/spdlog/fmt/bundled/std.h
new file mode 100644
index 00000000..184c6d26
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/fmt/bundled/std.h
@@ -0,0 +1,727 @@
+// Formatting library for C++ - formatters for standard library types
+//
+// Copyright (c) 2012 - present, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_STD_H_
+#define FMT_STD_H_
+
+#include "format.h"
+#include "ostream.h"
+
+#ifndef FMT_MODULE
+#  include <atomic>
+#  include <bitset>
+#  include <complex>
+#  include <exception>
+#  include <functional>  // std::reference_wrapper
+#  include <memory>
+#  include <thread>
+#  include <type_traits>
+#  include <typeinfo>  // std::type_info
+#  include <utility>   // std::make_index_sequence
+
+// Check FMT_CPLUSPLUS to suppress a bogus warning in MSVC.
+#  if FMT_CPLUSPLUS >= 201703L
+#    if FMT_HAS_INCLUDE(<filesystem>) && \
+        (!defined(FMT_CPP_LIB_FILESYSTEM) || FMT_CPP_LIB_FILESYSTEM != 0)
+#      include <filesystem>
+#    endif
+#    if FMT_HAS_INCLUDE(<variant>)
+#      include <variant>
+#    endif
+#    if FMT_HAS_INCLUDE(<optional>)
+#      include <optional>
+#    endif
+#  endif
+// Use > instead of >= in the version check because <source_location> may be
+// available after C++17 but before C++20 is marked as implemented.
+#  if FMT_CPLUSPLUS > 201703L && FMT_HAS_INCLUDE(<source_location>)
+#    include <source_location>
+#  endif
+#  if FMT_CPLUSPLUS > 202002L && FMT_HAS_INCLUDE(<expected>)
+#    include <expected>
+#  endif
+#endif  // FMT_MODULE
+
+#if FMT_HAS_INCLUDE(<version>)
+#  include <version>
+#endif
+
+// GCC 4 does not support FMT_HAS_INCLUDE.
+#if FMT_HAS_INCLUDE(<cxxabi.h>) || defined(__GLIBCXX__)
+#  include <cxxabi.h>
+// Android NDK with gabi++ library on some architectures does not implement
+// abi::__cxa_demangle().
+#  ifndef __GABIXX_CXXABI_H__
+#    define FMT_HAS_ABI_CXA_DEMANGLE
+#  endif
+#endif
+
+#ifdef FMT_CPP_LIB_FILESYSTEM
+// Use the provided definition.
+#elif defined(__cpp_lib_filesystem)
+#  define FMT_CPP_LIB_FILESYSTEM __cpp_lib_filesystem
+#else
+#  define FMT_CPP_LIB_FILESYSTEM 0
+#endif
+
+#ifdef FMT_CPP_LIB_VARIANT
+// Use the provided definition.
+#elif defined(__cpp_lib_variant)
+#  define FMT_CPP_LIB_VARIANT __cpp_lib_variant
+#else
+#  define FMT_CPP_LIB_VARIANT 0
+#endif
+
+FMT_BEGIN_NAMESPACE
+namespace detail {
+
+#if FMT_CPP_LIB_FILESYSTEM
+
+template <typename Char, typename PathChar>
+auto get_path_string(const std::filesystem::path& p,
+                     const std::basic_string<PathChar>& native) {
+  if constexpr (std::is_same_v<Char, char> && std::is_same_v<PathChar, wchar_t>)
+    return to_utf8<wchar_t>(native, to_utf8_error_policy::replace);
+  else
+    return p.string<Char>();
+}
+
+template <typename Char, typename PathChar>
+void write_escaped_path(basic_memory_buffer<Char>& quoted,
+                        const std::filesystem::path& p,
+                        const std::basic_string<PathChar>& native) {
+  if constexpr (std::is_same_v<Char, char> &&
+                std::is_same_v<PathChar, wchar_t>) {
+    auto buf = basic_memory_buffer<wchar_t>();
+    write_escaped_string<wchar_t>(std::back_inserter(buf), native);
+    bool valid = to_utf8<wchar_t>::convert(quoted, {buf.data(), buf.size()});
+    FMT_ASSERT(valid, "invalid utf16");
+  } else if constexpr (std::is_same_v<Char, PathChar>) {
+    write_escaped_string<std::filesystem::path::value_type>(
+        std::back_inserter(quoted), native);
+  } else {
+    write_escaped_string<Char>(std::back_inserter(quoted), p.string<Char>());
+  }
+}
+
+#endif  // FMT_CPP_LIB_FILESYSTEM
+
+#if defined(__cpp_lib_expected) || FMT_CPP_LIB_VARIANT
+
+template <typename Char, typename OutputIt, typename T, typename FormatContext>
+auto write_escaped_alternative(OutputIt out, const T& v, FormatContext& ctx)
+    -> OutputIt {
+  if constexpr (has_to_string_view<T>::value)
+    return write_escaped_string<Char>(out, detail::to_string_view(v));
+  if constexpr (std::is_same_v<T, Char>) return write_escaped_char(out, v);
+
+  formatter<std::remove_cv_t<T>, Char> underlying;
+  maybe_set_debug_format(underlying, true);
+  return underlying.format(v, ctx);
+}
+#endif
+
+#if FMT_CPP_LIB_VARIANT
+
+template <typename> struct is_variant_like_ : std::false_type {};
+template <typename... Types>
+struct is_variant_like_<std::variant<Types...>> : std::true_type {};
+
+template <typename Variant, typename Char> class is_variant_formattable {
+  template <size_t... Is>
+  static auto check(std::index_sequence<Is...>) -> std::conjunction<
+      is_formattable<std::variant_alternative_t<Is, Variant>, Char>...>;
+
+ public:
+  static constexpr bool value = decltype(check(
+      std::make_index_sequence<std::variant_size<Variant>::value>()))::value;
+};
+
+#endif  // FMT_CPP_LIB_VARIANT
+
+#if FMT_USE_RTTI
+inline auto normalize_libcxx_inline_namespaces(string_view demangled_name_view,
+                                               char* begin) -> string_view {
+  // Normalization of stdlib inline namespace names.
+  // libc++ inline namespaces.
+  //  std::__1::*       -> std::*
+  //  std::__1::__fs::* -> std::*
+  // libstdc++ inline namespaces.
+  //  std::__cxx11::*             -> std::*
+  //  std::filesystem::__cxx11::* -> std::filesystem::*
+  if (demangled_name_view.starts_with("std::")) {
+    char* to = begin + 5;  // std::
+    for (const char *from = to, *end = begin + demangled_name_view.size();
+         from < end;) {
+      // This is safe, because demangled_name is NUL-terminated.
+      if (from[0] == '_' && from[1] == '_') {
+        const char* next = from + 1;
+        while (next < end && *next != ':') next++;
+        if (next[0] == ':' && next[1] == ':') {
+          from = next + 2;
+          continue;
+        }
+      }
+      *to++ = *from++;
+    }
+    demangled_name_view = {begin, detail::to_unsigned(to - begin)};
+  }
+  return demangled_name_view;
+}
+
+template <class OutputIt>
+auto normalize_msvc_abi_name(string_view abi_name_view, OutputIt out)
+    -> OutputIt {
+  const string_view demangled_name(abi_name_view);
+  for (size_t i = 0; i < demangled_name.size(); ++i) {
+    auto sub = demangled_name;
+    sub.remove_prefix(i);
+    if (sub.starts_with("enum ")) {
+      i += 4;
+      continue;
+    }
+    if (sub.starts_with("class ") || sub.starts_with("union ")) {
+      i += 5;
+      continue;
+    }
+    if (sub.starts_with("struct ")) {
+      i += 6;
+      continue;
+    }
+    if (*sub.begin() != ' ') *out++ = *sub.begin();
+  }
+  return out;
+}
+
+template <typename OutputIt>
+auto write_demangled_name(OutputIt out, const std::type_info& ti) -> OutputIt {
+#  ifdef FMT_HAS_ABI_CXA_DEMANGLE
+  int status = 0;
+  size_t size = 0;
+  std::unique_ptr<char, void (*)(void*)> demangled_name_ptr(
+      abi::__cxa_demangle(ti.name(), nullptr, &size, &status), &free);
+
+  string_view demangled_name_view;
+  if (demangled_name_ptr) {
+    demangled_name_view = normalize_libcxx_inline_namespaces(
+        demangled_name_ptr.get(), demangled_name_ptr.get());
+  } else {
+    demangled_name_view = string_view(ti.name());
+  }
+  return detail::write_bytes<char>(out, demangled_name_view);
+#  elif FMT_MSC_VERSION && defined(_MSVC_STL_UPDATE)
+  return normalize_msvc_abi_name(ti.name(), out);
+#  elif FMT_MSC_VERSION && defined(_LIBCPP_VERSION)
+  const string_view demangled_name = ti.name();
+  std::string name_copy(demangled_name.size(), '\0');
+  // normalize_msvc_abi_name removes class, struct, union etc that MSVC has in
+  // front of types
+  name_copy.erase(normalize_msvc_abi_name(demangled_name, name_copy.begin()),
+                  name_copy.end());
+  // normalize_libcxx_inline_namespaces removes the inline __1, __2, etc
+  // namespaces libc++ uses for ABI versioning On MSVC ABI + libc++
+  // environments, we need to eliminate both of them.
+  const string_view normalized_name =
+      normalize_libcxx_inline_namespaces(name_copy, name_copy.data());
+  return detail::write_bytes<char>(out, normalized_name);
+#  else
+  return detail::write_bytes<char>(out, string_view(ti.name()));
+#  endif
+}
+
+#endif  // FMT_USE_RTTI
+
+template <typename T, typename Enable = void>
+struct has_flip : std::false_type {};
+
+template <typename T>
+struct has_flip<T, void_t<decltype(std::declval<T>().flip())>>
+    : std::true_type {};
+
+template <typename T> struct is_bit_reference_like {
+  static constexpr bool value = std::is_convertible<T, bool>::value &&
+                                std::is_nothrow_assignable<T, bool>::value &&
+                                has_flip<T>::value;
+};
+
+// Workaround for libc++ incompatibility with C++ standard.
+// According to the Standard, `bitset::operator[] const` returns bool.
+#if defined(_LIBCPP_VERSION) && !defined(FMT_IMPORT_STD)
+template <typename C>
+struct is_bit_reference_like<std::__bit_const_reference<C>> {
+  static constexpr bool value = true;
+};
+#endif
+
+template <typename T, typename Enable = void>
+struct has_format_as : std::false_type {};
+template <typename T>
+struct has_format_as<T, void_t<decltype(format_as(std::declval<const T&>()))>>
+    : std::true_type {};
+
+template <typename T, typename Enable = void>
+struct has_format_as_member : std::false_type {};
+template <typename T>
+struct has_format_as_member<
+    T, void_t<decltype(formatter<T>::format_as(std::declval<const T&>()))>>
+    : std::true_type {};
+
+}  // namespace detail
+
+template <typename T, typename Deleter>
+auto ptr(const std::unique_ptr<T, Deleter>& p) -> const void* {
+  return p.get();
+}
+template <typename T> auto ptr(const std::shared_ptr<T>& p) -> const void* {
+  return p.get();
+}
+
+#if FMT_CPP_LIB_FILESYSTEM
+
+template <typename Char> struct formatter<std::filesystem::path, Char> {
+ private:
+  format_specs specs_;
+  detail::arg_ref<Char> width_ref_;
+  bool debug_ = false;
+  char path_type_ = 0;
+
+ public:
+  FMT_CONSTEXPR void set_debug_format(bool set = true) { debug_ = set; }
+
+  FMT_CONSTEXPR auto parse(parse_context<Char>& ctx) {
+    auto it = ctx.begin(), end = ctx.end();
+    if (it == end) return it;
+
+    it = detail::parse_align(it, end, specs_);
+    if (it == end) return it;
+
+    Char c = *it;
+    if ((c >= '0' && c <= '9') || c == '{')
+      it = detail::parse_width(it, end, specs_, width_ref_, ctx);
+    if (it != end && *it == '?') {
+      debug_ = true;
+      ++it;
+    }
+    if (it != end && (*it == 'g')) path_type_ = detail::to_ascii(*it++);
+    return it;
+  }
+
+  template <typename FormatContext>
+  auto format(const std::filesystem::path& p, FormatContext& ctx) const {
+    auto specs = specs_;
+    auto path_string =
+        !path_type_ ? p.native()
+                    : p.generic_string<std::filesystem::path::value_type>();
+
+    detail::handle_dynamic_spec(specs.dynamic_width(), specs.width, width_ref_,
+                                ctx);
+    if (!debug_) {
+      auto s = detail::get_path_string<Char>(p, path_string);
+      return detail::write(ctx.out(), basic_string_view<Char>(s), specs);
+    }
+    auto quoted = basic_memory_buffer<Char>();
+    detail::write_escaped_path(quoted, p, path_string);
+    return detail::write(ctx.out(),
+                         basic_string_view<Char>(quoted.data(), quoted.size()),
+                         specs);
+  }
+};
+
+class path : public std::filesystem::path {
+ public:
+  auto display_string() const -> std::string {
+    const std::filesystem::path& base = *this;
+    return fmt::format(FMT_STRING("{}"), base);
+  }
+  auto system_string() const -> std::string { return string(); }
+
+  auto generic_display_string() const -> std::string {
+    const std::filesystem::path& base = *this;
+    return fmt::format(FMT_STRING("{:g}"), base);
+  }
+  auto generic_system_string() const -> std::string { return generic_string(); }
+};
+
+#endif  // FMT_CPP_LIB_FILESYSTEM
+
+template <size_t N, typename Char>
+struct formatter<std::bitset<N>, Char>
+    : nested_formatter<basic_string_view<Char>, Char> {
+ private:
+  // This is a functor because C++11 doesn't support generic lambdas.
+  struct writer {
+    const std::bitset<N>& bs;
+
+    template <typename OutputIt>
+    FMT_CONSTEXPR auto operator()(OutputIt out) -> OutputIt {
+      for (auto pos = N; pos > 0; --pos)
+        out = detail::write<Char>(out, bs[pos - 1] ? Char('1') : Char('0'));
+      return out;
+    }
+  };
+
+ public:
+  template <typename FormatContext>
+  auto format(const std::bitset<N>& bs, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    return this->write_padded(ctx, writer{bs});
+  }
+};
+
+template <typename Char>
+struct formatter<std::thread::id, Char> : basic_ostream_formatter<Char> {};
+
+#ifdef __cpp_lib_optional
+template <typename T, typename Char>
+struct formatter<std::optional<T>, Char,
+                 std::enable_if_t<is_formattable<T, Char>::value>> {
+ private:
+  formatter<std::remove_cv_t<T>, Char> underlying_;
+  static constexpr basic_string_view<Char> optional =
+      detail::string_literal<Char, 'o', 'p', 't', 'i', 'o', 'n', 'a', 'l',
+                             '('>{};
+  static constexpr basic_string_view<Char> none =
+      detail::string_literal<Char, 'n', 'o', 'n', 'e'>{};
+
+ public:
+  FMT_CONSTEXPR auto parse(parse_context<Char>& ctx) {
+    detail::maybe_set_debug_format(underlying_, true);
+    return underlying_.parse(ctx);
+  }
+
+  template <typename FormatContext>
+  auto format(const std::optional<T>& opt, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    if (!opt) return detail::write<Char>(ctx.out(), none);
+
+    auto out = ctx.out();
+    out = detail::write<Char>(out, optional);
+    ctx.advance_to(out);
+    out = underlying_.format(*opt, ctx);
+    return detail::write(out, ')');
+  }
+};
+#endif  // __cpp_lib_optional
+
+#ifdef __cpp_lib_expected
+template <typename T, typename E, typename Char>
+struct formatter<std::expected<T, E>, Char,
+                 std::enable_if_t<(std::is_void<T>::value ||
+                                   is_formattable<T, Char>::value) &&
+                                  is_formattable<E, Char>::value>> {
+  FMT_CONSTEXPR auto parse(parse_context<Char>& ctx) -> const Char* {
+    return ctx.begin();
+  }
+
+  template <typename FormatContext>
+  auto format(const std::expected<T, E>& value, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    auto out = ctx.out();
+
+    if (value.has_value()) {
+      out = detail::write<Char>(out, "expected(");
+      if constexpr (!std::is_void<T>::value)
+        out = detail::write_escaped_alternative<Char>(out, *value, ctx);
+    } else {
+      out = detail::write<Char>(out, "unexpected(");
+      out = detail::write_escaped_alternative<Char>(out, value.error(), ctx);
+    }
+    *out++ = ')';
+    return out;
+  }
+};
+#endif  // __cpp_lib_expected
+
+#ifdef __cpp_lib_source_location
+template <> struct formatter<std::source_location> {
+  FMT_CONSTEXPR auto parse(parse_context<>& ctx) { return ctx.begin(); }
+
+  template <typename FormatContext>
+  auto format(const std::source_location& loc, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    auto out = ctx.out();
+    out = detail::write(out, loc.file_name());
+    out = detail::write(out, ':');
+    out = detail::write<char>(out, loc.line());
+    out = detail::write(out, ':');
+    out = detail::write<char>(out, loc.column());
+    out = detail::write(out, ": ");
+    out = detail::write(out, loc.function_name());
+    return out;
+  }
+};
+#endif
+
+#if FMT_CPP_LIB_VARIANT
+
+template <typename T> struct is_variant_like {
+  static constexpr bool value = detail::is_variant_like_<T>::value;
+};
+
+template <typename Char> struct formatter<std::monostate, Char> {
+  FMT_CONSTEXPR auto parse(parse_context<Char>& ctx) -> const Char* {
+    return ctx.begin();
+  }
+
+  template <typename FormatContext>
+  auto format(const std::monostate&, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    return detail::write<Char>(ctx.out(), "monostate");
+  }
+};
+
+template <typename Variant, typename Char>
+struct formatter<Variant, Char,
+                 std::enable_if_t<std::conjunction_v<
+                     is_variant_like<Variant>,
+                     detail::is_variant_formattable<Variant, Char>>>> {
+  FMT_CONSTEXPR auto parse(parse_context<Char>& ctx) -> const Char* {
+    return ctx.begin();
+  }
+
+  template <typename FormatContext>
+  auto format(const Variant& value, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    auto out = ctx.out();
+
+    out = detail::write<Char>(out, "variant(");
+    FMT_TRY {
+      std::visit(
+          [&](const auto& v) {
+            out = detail::write_escaped_alternative<Char>(out, v, ctx);
+          },
+          value);
+    }
+    FMT_CATCH(const std::bad_variant_access&) {
+      detail::write<Char>(out, "valueless by exception");
+    }
+    *out++ = ')';
+    return out;
+  }
+};
+
+#endif  // FMT_CPP_LIB_VARIANT
+
+template <> struct formatter<std::error_code> {
+ private:
+  format_specs specs_;
+  detail::arg_ref<char> width_ref_;
+  bool debug_ = false;
+
+ public:
+  FMT_CONSTEXPR void set_debug_format(bool set = true) { debug_ = set; }
+
+  FMT_CONSTEXPR auto parse(parse_context<>& ctx) -> const char* {
+    auto it = ctx.begin(), end = ctx.end();
+    if (it == end) return it;
+
+    it = detail::parse_align(it, end, specs_);
+
+    char c = *it;
+    if (it != end && ((c >= '0' && c <= '9') || c == '{'))
+      it = detail::parse_width(it, end, specs_, width_ref_, ctx);
+
+    if (it != end && *it == '?') {
+      debug_ = true;
+      ++it;
+    }
+    if (it != end && *it == 's') {
+      specs_.set_type(presentation_type::string);
+      ++it;
+    }
+    return it;
+  }
+
+  template <typename FormatContext>
+  FMT_CONSTEXPR20 auto format(const std::error_code& ec,
+                              FormatContext& ctx) const -> decltype(ctx.out()) {
+    auto specs = specs_;
+    detail::handle_dynamic_spec(specs.dynamic_width(), specs.width, width_ref_,
+                                ctx);
+    auto buf = memory_buffer();
+    if (specs_.type() == presentation_type::string) {
+      buf.append(ec.message());
+    } else {
+      buf.append(string_view(ec.category().name()));
+      buf.push_back(':');
+      detail::write<char>(appender(buf), ec.value());
+    }
+    auto quoted = memory_buffer();
+    auto str = string_view(buf.data(), buf.size());
+    if (debug_) {
+      detail::write_escaped_string<char>(std::back_inserter(quoted), str);
+      str = string_view(quoted.data(), quoted.size());
+    }
+    return detail::write<char>(ctx.out(), str, specs);
+  }
+};
+
+#if FMT_USE_RTTI
+template <> struct formatter<std::type_info> {
+ public:
+  FMT_CONSTEXPR auto parse(parse_context<>& ctx) -> const char* {
+    return ctx.begin();
+  }
+
+  template <typename Context>
+  auto format(const std::type_info& ti, Context& ctx) const
+      -> decltype(ctx.out()) {
+    return detail::write_demangled_name(ctx.out(), ti);
+  }
+};
+#endif  // FMT_USE_RTTI
+
+template <typename T>
+struct formatter<
+    T, char,
+    typename std::enable_if<std::is_base_of<std::exception, T>::value>::type> {
+ private:
+  bool with_typename_ = false;
+
+ public:
+  FMT_CONSTEXPR auto parse(parse_context<>& ctx) -> const char* {
+    auto it = ctx.begin();
+    auto end = ctx.end();
+    if (it == end || *it == '}') return it;
+    if (*it == 't') {
+      ++it;
+      with_typename_ = FMT_USE_RTTI != 0;
+    }
+    return it;
+  }
+
+  template <typename Context>
+  auto format(const std::exception& ex, Context& ctx) const
+      -> decltype(ctx.out()) {
+    auto out = ctx.out();
+#if FMT_USE_RTTI
+    if (with_typename_) {
+      out = detail::write_demangled_name(out, typeid(ex));
+      *out++ = ':';
+      *out++ = ' ';
+    }
+#endif
+    return detail::write_bytes<char>(out, string_view(ex.what()));
+  }
+};
+
+// We can't use std::vector<bool, Allocator>::reference and
+// std::bitset<N>::reference because the compiler can't deduce Allocator and N
+// in partial specialization.
+template <typename BitRef, typename Char>
+struct formatter<BitRef, Char,
+                 enable_if_t<detail::is_bit_reference_like<BitRef>::value>>
+    : formatter<bool, Char> {
+  template <typename FormatContext>
+  FMT_CONSTEXPR auto format(const BitRef& v, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    return formatter<bool, Char>::format(v, ctx);
+  }
+};
+
+template <typename T, typename Char>
+struct formatter<std::atomic<T>, Char,
+                 enable_if_t<is_formattable<T, Char>::value>>
+    : formatter<T, Char> {
+  template <typename FormatContext>
+  auto format(const std::atomic<T>& v, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    return formatter<T, Char>::format(v.load(), ctx);
+  }
+};
+
+#ifdef __cpp_lib_atomic_flag_test
+template <typename Char>
+struct formatter<std::atomic_flag, Char> : formatter<bool, Char> {
+  template <typename FormatContext>
+  auto format(const std::atomic_flag& v, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    return formatter<bool, Char>::format(v.test(), ctx);
+  }
+};
+#endif  // __cpp_lib_atomic_flag_test
+
+template <typename T, typename Char> struct formatter<std::complex<T>, Char> {
+ private:
+  detail::dynamic_format_specs<Char> specs_;
+
+  template <typename FormatContext, typename OutputIt>
+  FMT_CONSTEXPR auto do_format(const std::complex<T>& c,
+                               detail::dynamic_format_specs<Char>& specs,
+                               FormatContext& ctx, OutputIt out) const
+      -> OutputIt {
+    if (c.real() != 0) {
+      *out++ = Char('(');
+      out = detail::write<Char>(out, c.real(), specs, ctx.locale());
+      specs.set_sign(sign::plus);
+      out = detail::write<Char>(out, c.imag(), specs, ctx.locale());
+      if (!detail::isfinite(c.imag())) *out++ = Char(' ');
+      *out++ = Char('i');
+      *out++ = Char(')');
+      return out;
+    }
+    out = detail::write<Char>(out, c.imag(), specs, ctx.locale());
+    if (!detail::isfinite(c.imag())) *out++ = Char(' ');
+    *out++ = Char('i');
+    return out;
+  }
+
+ public:
+  FMT_CONSTEXPR auto parse(parse_context<Char>& ctx) -> const Char* {
+    if (ctx.begin() == ctx.end() || *ctx.begin() == '}') return ctx.begin();
+    return parse_format_specs(ctx.begin(), ctx.end(), specs_, ctx,
+                              detail::type_constant<T, Char>::value);
+  }
+
+  template <typename FormatContext>
+  auto format(const std::complex<T>& c, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    auto specs = specs_;
+    if (specs.dynamic()) {
+      detail::handle_dynamic_spec(specs.dynamic_width(), specs.width,
+                                  specs.width_ref, ctx);
+      detail::handle_dynamic_spec(specs.dynamic_precision(), specs.precision,
+                                  specs.precision_ref, ctx);
+    }
+
+    if (specs.width == 0) return do_format(c, specs, ctx, ctx.out());
+    auto buf = basic_memory_buffer<Char>();
+
+    auto outer_specs = format_specs();
+    outer_specs.width = specs.width;
+    outer_specs.copy_fill_from(specs);
+    outer_specs.set_align(specs.align());
+
+    specs.width = 0;
+    specs.set_fill({});
+    specs.set_align(align::none);
+
+    do_format(c, specs, ctx, basic_appender<Char>(buf));
+    return detail::write<Char>(ctx.out(),
+                               basic_string_view<Char>(buf.data(), buf.size()),
+                               outer_specs);
+  }
+};
+
+template <typename T, typename Char>
+struct formatter<std::reference_wrapper<T>, Char,
+                 // Guard against format_as because reference_wrapper is
+                 // implicitly convertible to T&.
+                 enable_if_t<is_formattable<remove_cvref_t<T>, Char>::value &&
+                             !detail::has_format_as<T>::value &&
+                             !detail::has_format_as_member<T>::value>>
+    : formatter<remove_cvref_t<T>, Char> {
+  template <typename FormatContext>
+  auto format(std::reference_wrapper<T> ref, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    return formatter<remove_cvref_t<T>, Char>::format(ref.get(), ctx);
+  }
+};
+
+FMT_END_NAMESPACE
+
+#endif  // FMT_STD_H_
diff --git a/csrc/vnpu_offload/include/spdlog/fmt/bundled/xchar.h b/csrc/vnpu_offload/include/spdlog/fmt/bundled/xchar.h
new file mode 100644
index 00000000..9334b87f
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/fmt/bundled/xchar.h
@@ -0,0 +1,356 @@
+// Formatting library for C++ - optional wchar_t and exotic character support
+//
+// Copyright (c) 2012 - present, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_XCHAR_H_
+#define FMT_XCHAR_H_
+
+#include "color.h"
+#include "format.h"
+#include "ostream.h"
+#include "ranges.h"
+
+#ifndef FMT_MODULE
+#  include <cwchar>
+#  if FMT_USE_LOCALE
+#    include <locale>
+#  endif
+#endif
+
+FMT_BEGIN_NAMESPACE
+namespace detail {
+
+template <typename T>
+using is_exotic_char = bool_constant<!std::is_same<T, char>::value>;
+
+template <typename S, typename = void> struct format_string_char {};
+
+template <typename S>
+struct format_string_char<
+    S, void_t<decltype(sizeof(detail::to_string_view(std::declval<S>())))>> {
+  using type = char_t<S>;
+};
+
+template <typename S>
+struct format_string_char<
+    S, enable_if_t<std::is_base_of<detail::compile_string, S>::value>> {
+  using type = typename S::char_type;
+};
+
+template <typename S>
+using format_string_char_t = typename format_string_char<S>::type;
+
+inline auto write_loc(basic_appender<wchar_t> out, loc_value value,
+                      const format_specs& specs, locale_ref loc) -> bool {
+#if FMT_USE_LOCALE
+  auto& numpunct =
+      std::use_facet<std::numpunct<wchar_t>>(loc.get<std::locale>());
+  auto separator = std::wstring();
+  auto grouping = numpunct.grouping();
+  if (!grouping.empty()) separator = std::wstring(1, numpunct.thousands_sep());
+  return value.visit(loc_writer<wchar_t>{out, specs, separator, grouping, {}});
+#endif
+  return false;
+}
+
+template <typename Char>
+void vformat_to(buffer<Char>& buf, basic_string_view<Char> fmt,
+                basic_format_args<buffered_context<Char>> args,
+                locale_ref loc = {}) {
+  static_assert(!std::is_same<Char, char>::value, "");
+  auto out = basic_appender<Char>(buf);
+  parse_format_string(
+      fmt, format_handler<Char>{parse_context<Char>(fmt), {out, args, loc}});
+}
+}  // namespace detail
+
+FMT_BEGIN_EXPORT
+
+using wstring_view = basic_string_view<wchar_t>;
+using wformat_parse_context = parse_context<wchar_t>;
+using wformat_context = buffered_context<wchar_t>;
+using wformat_args = basic_format_args<wformat_context>;
+using wmemory_buffer = basic_memory_buffer<wchar_t>;
+
+template <typename Char, typename... T> struct basic_fstring {
+ private:
+  basic_string_view<Char> str_;
+
+  static constexpr int num_static_named_args =
+      detail::count_static_named_args<T...>();
+
+  using checker = detail::format_string_checker<
+      Char, static_cast<int>(sizeof...(T)), num_static_named_args,
+      num_static_named_args != detail::count_named_args<T...>()>;
+
+  using arg_pack = detail::arg_pack<T...>;
+
+ public:
+  using t = basic_fstring;
+
+  template <typename S,
+            FMT_ENABLE_IF(
+                std::is_convertible<const S&, basic_string_view<Char>>::value)>
+  FMT_CONSTEVAL FMT_ALWAYS_INLINE basic_fstring(const S& s) : str_(s) {
+    if (FMT_USE_CONSTEVAL)
+      detail::parse_format_string<Char>(s, checker(s, arg_pack()));
+  }
+  template <typename S,
+            FMT_ENABLE_IF(std::is_base_of<detail::compile_string, S>::value&&
+                              std::is_same<typename S::char_type, Char>::value)>
+  FMT_ALWAYS_INLINE basic_fstring(const S&) : str_(S()) {
+    FMT_CONSTEXPR auto sv = basic_string_view<Char>(S());
+    FMT_CONSTEXPR int ignore =
+        (parse_format_string(sv, checker(sv, arg_pack())), 0);
+    detail::ignore_unused(ignore);
+  }
+  basic_fstring(runtime_format_string<Char> fmt) : str_(fmt.str) {}
+
+  operator basic_string_view<Char>() const { return str_; }
+  auto get() const -> basic_string_view<Char> { return str_; }
+};
+
+template <typename Char, typename... T>
+using basic_format_string = basic_fstring<Char, T...>;
+
+template <typename... T>
+using wformat_string = typename basic_format_string<wchar_t, T...>::t;
+inline auto runtime(wstring_view s) -> runtime_format_string<wchar_t> {
+  return {{s}};
+}
+
+template <typename... T>
+constexpr auto make_wformat_args(T&... args)
+    -> decltype(fmt::make_format_args<wformat_context>(args...)) {
+  return fmt::make_format_args<wformat_context>(args...);
+}
+
+#if !FMT_USE_NONTYPE_TEMPLATE_ARGS
+inline namespace literals {
+inline auto operator""_a(const wchar_t* s, size_t) -> detail::udl_arg<wchar_t> {
+  return {s};
+}
+}  // namespace literals
+#endif
+
+template <typename It, typename Sentinel>
+auto join(It begin, Sentinel end, wstring_view sep)
+    -> join_view<It, Sentinel, wchar_t> {
+  return {begin, end, sep};
+}
+
+template <typename Range, FMT_ENABLE_IF(!is_tuple_like<Range>::value)>
+auto join(Range&& range, wstring_view sep)
+    -> join_view<decltype(std::begin(range)), decltype(std::end(range)),
+                 wchar_t> {
+  return join(std::begin(range), std::end(range), sep);
+}
+
+template <typename T>
+auto join(std::initializer_list<T> list, wstring_view sep)
+    -> join_view<const T*, const T*, wchar_t> {
+  return join(std::begin(list), std::end(list), sep);
+}
+
+template <typename Tuple, FMT_ENABLE_IF(is_tuple_like<Tuple>::value)>
+auto join(const Tuple& tuple, basic_string_view<wchar_t> sep)
+    -> tuple_join_view<Tuple, wchar_t> {
+  return {tuple, sep};
+}
+
+template <typename Char, FMT_ENABLE_IF(!std::is_same<Char, char>::value)>
+auto vformat(basic_string_view<Char> fmt,
+             basic_format_args<buffered_context<Char>> args)
+    -> std::basic_string<Char> {
+  auto buf = basic_memory_buffer<Char>();
+  detail::vformat_to(buf, fmt, args);
+  return {buf.data(), buf.size()};
+}
+
+template <typename... T>
+auto format(wformat_string<T...> fmt, T&&... args) -> std::wstring {
+  return vformat(fmt::wstring_view(fmt), fmt::make_wformat_args(args...));
+}
+
+template <typename OutputIt, typename... T>
+auto format_to(OutputIt out, wformat_string<T...> fmt, T&&... args)
+    -> OutputIt {
+  return vformat_to(out, fmt::wstring_view(fmt),
+                    fmt::make_wformat_args(args...));
+}
+
+// Pass char_t as a default template parameter instead of using
+// std::basic_string<char_t<S>> to reduce the symbol size.
+template <typename S, typename... T,
+          typename Char = detail::format_string_char_t<S>,
+          FMT_ENABLE_IF(!std::is_same<Char, char>::value &&
+                        !std::is_same<Char, wchar_t>::value)>
+auto format(const S& fmt, T&&... args) -> std::basic_string<Char> {
+  return vformat(detail::to_string_view(fmt),
+                 fmt::make_format_args<buffered_context<Char>>(args...));
+}
+
+template <typename S, typename Char = detail::format_string_char_t<S>,
+          FMT_ENABLE_IF(detail::is_exotic_char<Char>::value)>
+inline auto vformat(locale_ref loc, const S& fmt,
+                    basic_format_args<buffered_context<Char>> args)
+    -> std::basic_string<Char> {
+  auto buf = basic_memory_buffer<Char>();
+  detail::vformat_to(buf, detail::to_string_view(fmt), args, loc);
+  return {buf.data(), buf.size()};
+}
+
+template <typename S, typename... T,
+          typename Char = detail::format_string_char_t<S>,
+          FMT_ENABLE_IF(detail::is_exotic_char<Char>::value)>
+inline auto format(locale_ref loc, const S& fmt, T&&... args)
+    -> std::basic_string<Char> {
+  return vformat(loc, detail::to_string_view(fmt),
+                 fmt::make_format_args<buffered_context<Char>>(args...));
+}
+
+template <typename OutputIt, typename S,
+          typename Char = detail::format_string_char_t<S>,
+          FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, Char>::value&&
+                            detail::is_exotic_char<Char>::value)>
+auto vformat_to(OutputIt out, const S& fmt,
+                basic_format_args<buffered_context<Char>> args) -> OutputIt {
+  auto&& buf = detail::get_buffer<Char>(out);
+  detail::vformat_to(buf, detail::to_string_view(fmt), args);
+  return detail::get_iterator(buf, out);
+}
+
+template <typename OutputIt, typename S, typename... T,
+          typename Char = detail::format_string_char_t<S>,
+          FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, Char>::value &&
+                        !std::is_same<Char, char>::value &&
+                        !std::is_same<Char, wchar_t>::value)>
+inline auto format_to(OutputIt out, const S& fmt, T&&... args) -> OutputIt {
+  return vformat_to(out, detail::to_string_view(fmt),
+                    fmt::make_format_args<buffered_context<Char>>(args...));
+}
+
+template <typename S, typename OutputIt, typename... Args,
+          typename Char = detail::format_string_char_t<S>,
+          FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, Char>::value&&
+                            detail::is_exotic_char<Char>::value)>
+inline auto vformat_to(OutputIt out, locale_ref loc, const S& fmt,
+                       basic_format_args<buffered_context<Char>> args)
+    -> OutputIt {
+  auto&& buf = detail::get_buffer<Char>(out);
+  vformat_to(buf, detail::to_string_view(fmt), args, loc);
+  return detail::get_iterator(buf, out);
+}
+
+template <typename OutputIt, typename S, typename... T,
+          typename Char = detail::format_string_char_t<S>,
+          bool enable = detail::is_output_iterator<OutputIt, Char>::value &&
+                        detail::is_exotic_char<Char>::value>
+inline auto format_to(OutputIt out, locale_ref loc, const S& fmt, T&&... args)
+    -> typename std::enable_if<enable, OutputIt>::type {
+  return vformat_to(out, loc, detail::to_string_view(fmt),
+                    fmt::make_format_args<buffered_context<Char>>(args...));
+}
+
+template <typename OutputIt, typename Char, typename... Args,
+          FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, Char>::value&&
+                            detail::is_exotic_char<Char>::value)>
+inline auto vformat_to_n(OutputIt out, size_t n, basic_string_view<Char> fmt,
+                         basic_format_args<buffered_context<Char>> args)
+    -> format_to_n_result<OutputIt> {
+  using traits = detail::fixed_buffer_traits;
+  auto buf = detail::iterator_buffer<OutputIt, Char, traits>(out, n);
+  detail::vformat_to(buf, fmt, args);
+  return {buf.out(), buf.count()};
+}
+
+template <typename OutputIt, typename S, typename... T,
+          typename Char = detail::format_string_char_t<S>,
+          FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, Char>::value&&
+                            detail::is_exotic_char<Char>::value)>
+inline auto format_to_n(OutputIt out, size_t n, const S& fmt, T&&... args)
+    -> format_to_n_result<OutputIt> {
+  return vformat_to_n(out, n, fmt::basic_string_view<Char>(fmt),
+                      fmt::make_format_args<buffered_context<Char>>(args...));
+}
+
+template <typename S, typename... T,
+          typename Char = detail::format_string_char_t<S>,
+          FMT_ENABLE_IF(detail::is_exotic_char<Char>::value)>
+inline auto formatted_size(const S& fmt, T&&... args) -> size_t {
+  auto buf = detail::counting_buffer<Char>();
+  detail::vformat_to(buf, detail::to_string_view(fmt),
+                     fmt::make_format_args<buffered_context<Char>>(args...));
+  return buf.count();
+}
+
+inline void vprint(std::FILE* f, wstring_view fmt, wformat_args args) {
+  auto buf = wmemory_buffer();
+  detail::vformat_to(buf, fmt, args);
+  buf.push_back(L'\0');
+  if (std::fputws(buf.data(), f) == -1)
+    FMT_THROW(system_error(errno, FMT_STRING("cannot write to file")));
+}
+
+inline void vprint(wstring_view fmt, wformat_args args) {
+  vprint(stdout, fmt, args);
+}
+
+template <typename... T>
+void print(std::FILE* f, wformat_string<T...> fmt, T&&... args) {
+  return vprint(f, wstring_view(fmt), fmt::make_wformat_args(args...));
+}
+
+template <typename... T> void print(wformat_string<T...> fmt, T&&... args) {
+  return vprint(wstring_view(fmt), fmt::make_wformat_args(args...));
+}
+
+template <typename... T>
+void println(std::FILE* f, wformat_string<T...> fmt, T&&... args) {
+  return print(f, L"{}\n", fmt::format(fmt, std::forward<T>(args)...));
+}
+
+template <typename... T> void println(wformat_string<T...> fmt, T&&... args) {
+  return print(L"{}\n", fmt::format(fmt, std::forward<T>(args)...));
+}
+
+inline auto vformat(text_style ts, wstring_view fmt, wformat_args args)
+    -> std::wstring {
+  auto buf = wmemory_buffer();
+  detail::vformat_to(buf, ts, fmt, args);
+  return {buf.data(), buf.size()};
+}
+
+template <typename... T>
+inline auto format(text_style ts, wformat_string<T...> fmt, T&&... args)
+    -> std::wstring {
+  return fmt::vformat(ts, fmt, fmt::make_wformat_args(args...));
+}
+
+inline void vprint(std::wostream& os, wstring_view fmt, wformat_args args) {
+  auto buffer = basic_memory_buffer<wchar_t>();
+  detail::vformat_to(buffer, fmt, args);
+  detail::write_buffer(os, buffer);
+}
+
+template <typename... T>
+void print(std::wostream& os, wformat_string<T...> fmt, T&&... args) {
+  vprint(os, fmt, fmt::make_format_args<buffered_context<wchar_t>>(args...));
+}
+
+template <typename... T>
+void println(std::wostream& os, wformat_string<T...> fmt, T&&... args) {
+  print(os, L"{}\n", fmt::format(fmt, std::forward<T>(args)...));
+}
+
+/// Converts `value` to `std::wstring` using the default format for type `T`.
+template <typename T> inline auto to_wstring(const T& value) -> std::wstring {
+  return format(FMT_STRING(L"{}"), value);
+}
+FMT_END_EXPORT
+FMT_END_NAMESPACE
+
+#endif  // FMT_XCHAR_H_
diff --git a/csrc/vnpu_offload/include/spdlog/fmt/chrono.h b/csrc/vnpu_offload/include/spdlog/fmt/chrono.h
new file mode 100644
index 00000000..4da092b8
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/fmt/chrono.h
@@ -0,0 +1,23 @@
+//
+// Copyright(c) 2016 Gabi Melman.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+//
+
+#pragma once
+//
+// include bundled or external copy of fmtlib's chrono support
+//
+#include <spdlog/tweakme.h>
+
+#if !defined(SPDLOG_USE_STD_FORMAT)
+#if !defined(SPDLOG_FMT_EXTERNAL)
+#ifdef SPDLOG_HEADER_ONLY
+#ifndef FMT_HEADER_ONLY
+#define FMT_HEADER_ONLY
+#endif
+#endif
+#include <spdlog/fmt/bundled/chrono.h>
+#else
+#include <fmt/chrono.h>
+#endif
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/fmt/compile.h b/csrc/vnpu_offload/include/spdlog/fmt/compile.h
new file mode 100644
index 00000000..795e6a3d
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/fmt/compile.h
@@ -0,0 +1,23 @@
+//
+// Copyright(c) 2016 Gabi Melman.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+//
+
+#pragma once
+//
+// include bundled or external copy of fmtlib's compile-time support
+//
+#include <spdlog/tweakme.h>
+
+#if !defined(SPDLOG_USE_STD_FORMAT)
+#if !defined(SPDLOG_FMT_EXTERNAL)
+#ifdef SPDLOG_HEADER_ONLY
+#ifndef FMT_HEADER_ONLY
+#define FMT_HEADER_ONLY
+#endif
+#endif
+#include <spdlog/fmt/bundled/compile.h>
+#else
+#include <fmt/compile.h>
+#endif
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/fmt/fmt.h b/csrc/vnpu_offload/include/spdlog/fmt/fmt.h
new file mode 100644
index 00000000..6b1936d3
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/fmt/fmt.h
@@ -0,0 +1,26 @@
+//
+// Copyright(c) 2016-2018 Gabi Melman.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+//
+
+#pragma once
+
+//
+// Include a bundled header-only copy of fmtlib or an external one.
+// By default spdlog include its own copy.
+//
+#include <spdlog/tweakme.h>
+
+#if defined(SPDLOG_USE_STD_FORMAT)  // SPDLOG_USE_STD_FORMAT is defined - use std::format
+#include <format>
+#elif !defined(SPDLOG_FMT_EXTERNAL)
+#if !defined(SPDLOG_COMPILED_LIB) && !defined(FMT_HEADER_ONLY)
+#define FMT_HEADER_ONLY
+#endif
+#ifndef FMT_USE_WINDOWS_H
+#define FMT_USE_WINDOWS_H 0
+#endif
+#include <spdlog/fmt/bundled/format.h>
+#else  // SPDLOG_FMT_EXTERNAL is defined - use external fmtlib
+#include <fmt/format.h>
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/fmt/ostr.h b/csrc/vnpu_offload/include/spdlog/fmt/ostr.h
new file mode 100644
index 00000000..a9e50e1f
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/fmt/ostr.h
@@ -0,0 +1,23 @@
+//
+// Copyright(c) 2016 Gabi Melman.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+//
+
+#pragma once
+//
+// include bundled or external copy of fmtlib's ostream support
+//
+#include <spdlog/tweakme.h>
+
+#if !defined(SPDLOG_USE_STD_FORMAT)
+#if !defined(SPDLOG_FMT_EXTERNAL)
+#ifdef SPDLOG_HEADER_ONLY
+#ifndef FMT_HEADER_ONLY
+#define FMT_HEADER_ONLY
+#endif
+#endif
+#include <spdlog/fmt/bundled/ostream.h>
+#else
+#include <fmt/ostream.h>
+#endif
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/fmt/ranges.h b/csrc/vnpu_offload/include/spdlog/fmt/ranges.h
new file mode 100644
index 00000000..117a9e46
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/fmt/ranges.h
@@ -0,0 +1,23 @@
+//
+// Copyright(c) 2016 Gabi Melman.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+//
+
+#pragma once
+//
+// include bundled or external copy of fmtlib's ranges support
+//
+#include <spdlog/tweakme.h>
+
+#if !defined(SPDLOG_USE_STD_FORMAT)
+#if !defined(SPDLOG_FMT_EXTERNAL)
+#ifdef SPDLOG_HEADER_ONLY
+#ifndef FMT_HEADER_ONLY
+#define FMT_HEADER_ONLY
+#endif
+#endif
+#include <spdlog/fmt/bundled/ranges.h>
+#else
+#include <fmt/ranges.h>
+#endif
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/fmt/std.h b/csrc/vnpu_offload/include/spdlog/fmt/std.h
new file mode 100644
index 00000000..4a3b828c
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/fmt/std.h
@@ -0,0 +1,24 @@
+//
+// Copyright(c) 2016 Gabi Melman.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+//
+
+#pragma once
+//
+// include bundled or external copy of fmtlib's std support (for formatting e.g.
+// std::filesystem::path, std::thread::id, std::monostate, std::variant, ...)
+//
+#include <spdlog/tweakme.h>
+
+#if !defined(SPDLOG_USE_STD_FORMAT)
+#if !defined(SPDLOG_FMT_EXTERNAL)
+#ifdef SPDLOG_HEADER_ONLY
+#ifndef FMT_HEADER_ONLY
+#define FMT_HEADER_ONLY
+#endif
+#endif
+#include <spdlog/fmt/bundled/std.h>
+#else
+#include <fmt/std.h>
+#endif
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/fmt/xchar.h b/csrc/vnpu_offload/include/spdlog/fmt/xchar.h
new file mode 100644
index 00000000..8ce17988
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/fmt/xchar.h
@@ -0,0 +1,23 @@
+//
+// Copyright(c) 2016 Gabi Melman.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+//
+
+#pragma once
+//
+// include bundled or external copy of fmtlib's xchar support
+//
+#include <spdlog/tweakme.h>
+
+#if !defined(SPDLOG_USE_STD_FORMAT)
+#if !defined(SPDLOG_FMT_EXTERNAL)
+#ifdef SPDLOG_HEADER_ONLY
+#ifndef FMT_HEADER_ONLY
+#define FMT_HEADER_ONLY
+#endif
+#endif
+#include <spdlog/fmt/bundled/xchar.h>
+#else
+#include <fmt/xchar.h>
+#endif
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/formatter.h b/csrc/vnpu_offload/include/spdlog/formatter.h
new file mode 100644
index 00000000..4d482f82
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/formatter.h
@@ -0,0 +1,17 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include <spdlog/details/log_msg.h>
+#include <spdlog/fmt/fmt.h>
+
+namespace spdlog {
+
+class formatter {
+public:
+    virtual ~formatter() = default;
+    virtual void format(const details::log_msg &msg, memory_buf_t &dest) = 0;
+    virtual std::unique_ptr<formatter> clone() const = 0;
+};
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/fwd.h b/csrc/vnpu_offload/include/spdlog/fwd.h
new file mode 100644
index 00000000..647b16bf
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/fwd.h
@@ -0,0 +1,18 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+namespace spdlog {
+class logger;
+class formatter;
+
+namespace sinks {
+class sink;
+}
+
+namespace level {
+enum level_enum : int;
+}
+
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/logger-inl.h b/csrc/vnpu_offload/include/spdlog/logger-inl.h
new file mode 100644
index 00000000..768cee37
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/logger-inl.h
@@ -0,0 +1,198 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifndef SPDLOG_HEADER_ONLY
+#include <spdlog/logger.h>
+#endif
+
+#include <spdlog/details/backtracer.h>
+#include <spdlog/pattern_formatter.h>
+#include <spdlog/sinks/sink.h>
+
+#include <cstdio>
+
+namespace spdlog {
+
+// public methods
+SPDLOG_INLINE logger::logger(const logger &other)
+    : name_(other.name_),
+      sinks_(other.sinks_),
+      level_(other.level_.load(std::memory_order_relaxed)),
+      flush_level_(other.flush_level_.load(std::memory_order_relaxed)),
+      custom_err_handler_(other.custom_err_handler_),
+      tracer_(other.tracer_) {}
+
+SPDLOG_INLINE logger::logger(logger &&other) SPDLOG_NOEXCEPT
+    : name_(std::move(other.name_)),
+      sinks_(std::move(other.sinks_)),
+      level_(other.level_.load(std::memory_order_relaxed)),
+      flush_level_(other.flush_level_.load(std::memory_order_relaxed)),
+      custom_err_handler_(std::move(other.custom_err_handler_)),
+      tracer_(std::move(other.tracer_))
+
+{}
+
+SPDLOG_INLINE logger &logger::operator=(logger other) SPDLOG_NOEXCEPT {
+    this->swap(other);
+    return *this;
+}
+
+SPDLOG_INLINE void logger::swap(spdlog::logger &other) SPDLOG_NOEXCEPT {
+    name_.swap(other.name_);
+    sinks_.swap(other.sinks_);
+
+    // swap level_
+    auto other_level = other.level_.load();
+    auto my_level = level_.exchange(other_level);
+    other.level_.store(my_level);
+
+    // swap flush level_
+    other_level = other.flush_level_.load();
+    my_level = flush_level_.exchange(other_level);
+    other.flush_level_.store(my_level);
+
+    custom_err_handler_.swap(other.custom_err_handler_);
+    std::swap(tracer_, other.tracer_);
+}
+
+SPDLOG_INLINE void swap(logger &a, logger &b) noexcept { a.swap(b); }
+
+SPDLOG_INLINE void logger::set_level(level::level_enum log_level) { level_.store(log_level); }
+
+SPDLOG_INLINE level::level_enum logger::level() const {
+    return static_cast<level::level_enum>(level_.load(std::memory_order_relaxed));
+}
+
+SPDLOG_INLINE const std::string &logger::name() const { return name_; }
+
+// set formatting for the sinks in this logger.
+// each sink will get a separate instance of the formatter object.
+SPDLOG_INLINE void logger::set_formatter(std::unique_ptr<formatter> f) {
+    for (auto it = sinks_.begin(); it != sinks_.end(); ++it) {
+        if (std::next(it) == sinks_.end()) {
+            // last element - we can be move it.
+            (*it)->set_formatter(std::move(f));
+            break;  // to prevent clang-tidy warning
+        } else {
+            (*it)->set_formatter(f->clone());
+        }
+    }
+}
+
+SPDLOG_INLINE void logger::set_pattern(std::string pattern, pattern_time_type time_type) {
+    auto new_formatter = details::make_unique<pattern_formatter>(std::move(pattern), time_type);
+    set_formatter(std::move(new_formatter));
+}
+
+// create new backtrace sink and move to it all our child sinks
+SPDLOG_INLINE void logger::enable_backtrace(size_t n_messages) { tracer_.enable(n_messages); }
+
+// restore orig sinks and level and delete the backtrace sink
+SPDLOG_INLINE void logger::disable_backtrace() { tracer_.disable(); }
+
+SPDLOG_INLINE void logger::dump_backtrace() { dump_backtrace_(); }
+
+// flush functions
+SPDLOG_INLINE void logger::flush() { flush_(); }
+
+SPDLOG_INLINE void logger::flush_on(level::level_enum log_level) { flush_level_.store(log_level); }
+
+SPDLOG_INLINE level::level_enum logger::flush_level() const {
+    return static_cast<level::level_enum>(flush_level_.load(std::memory_order_relaxed));
+}
+
+// sinks
+SPDLOG_INLINE const std::vector<sink_ptr> &logger::sinks() const { return sinks_; }
+
+SPDLOG_INLINE std::vector<sink_ptr> &logger::sinks() { return sinks_; }
+
+// error handler
+SPDLOG_INLINE void logger::set_error_handler(err_handler handler) {
+    custom_err_handler_ = std::move(handler);
+}
+
+// create new logger with same sinks and configuration.
+SPDLOG_INLINE std::shared_ptr<logger> logger::clone(std::string logger_name) {
+    auto cloned = std::make_shared<logger>(*this);
+    cloned->name_ = std::move(logger_name);
+    return cloned;
+}
+
+// protected methods
+SPDLOG_INLINE void logger::log_it_(const spdlog::details::log_msg &log_msg,
+                                   bool log_enabled,
+                                   bool traceback_enabled) {
+    if (log_enabled) {
+        sink_it_(log_msg);
+    }
+    if (traceback_enabled) {
+        tracer_.push_back(log_msg);
+    }
+}
+
+SPDLOG_INLINE void logger::sink_it_(const details::log_msg &msg) {
+    for (auto &sink : sinks_) {
+        if (sink->should_log(msg.level)) {
+            SPDLOG_TRY { sink->log(msg); }
+            SPDLOG_LOGGER_CATCH(msg.source)
+        }
+    }
+
+    if (should_flush_(msg)) {
+        flush_();
+    }
+}
+
+SPDLOG_INLINE void logger::flush_() {
+    for (auto &sink : sinks_) {
+        SPDLOG_TRY { sink->flush(); }
+        SPDLOG_LOGGER_CATCH(source_loc())
+    }
+}
+
+SPDLOG_INLINE void logger::dump_backtrace_() {
+    using details::log_msg;
+    if (tracer_.enabled() && !tracer_.empty()) {
+        sink_it_(
+            log_msg{name(), level::info, "****************** Backtrace Start ******************"});
+        tracer_.foreach_pop([this](const log_msg &msg) { this->sink_it_(msg); });
+        sink_it_(
+            log_msg{name(), level::info, "****************** Backtrace End ********************"});
+    }
+}
+
+SPDLOG_INLINE bool logger::should_flush_(const details::log_msg &msg) const {
+    auto flush_level = flush_level_.load(std::memory_order_relaxed);
+    return (msg.level >= flush_level) && (msg.level != level::off);
+}
+
+SPDLOG_INLINE void logger::err_handler_(const std::string &msg) const {
+    if (custom_err_handler_) {
+        custom_err_handler_(msg);
+    } else {
+        using std::chrono::system_clock;
+        static std::mutex mutex;
+        static std::chrono::system_clock::time_point last_report_time;
+        static size_t err_counter = 0;
+        std::lock_guard<std::mutex> lk{mutex};
+        auto now = system_clock::now();
+        err_counter++;
+        if (now - last_report_time < std::chrono::seconds(1)) {
+            return;
+        }
+        last_report_time = now;
+        auto tm_time = details::os::localtime(system_clock::to_time_t(now));
+        char date_buf[64];
+        std::strftime(date_buf, sizeof(date_buf), "%Y-%m-%d %H:%M:%S", &tm_time);
+#if defined(USING_R) && defined(R_R_H)  // if in R environment
+        REprintf("[*** LOG ERROR #%04zu ***] [%s] [%s] %s\n", err_counter, date_buf, name().c_str(),
+                 msg.c_str());
+#else
+        std::fprintf(stderr, "[*** LOG ERROR #%04zu ***] [%s] [%s] %s\n", err_counter, date_buf,
+                     name().c_str(), msg.c_str());
+#endif
+    }
+}
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/logger.h b/csrc/vnpu_offload/include/spdlog/logger.h
new file mode 100644
index 00000000..9db3c6b3
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/logger.h
@@ -0,0 +1,379 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+// Thread safe logger (except for set_error_handler())
+// Has name, log level, vector of std::shared sink pointers and formatter
+// Upon each log write the logger:
+// 1. Checks if its log level is enough to log the message and if yes:
+// 2. Call the underlying sinks to do the job.
+// 3. Each sink use its own private copy of a formatter to format the message
+// and send to its destination.
+//
+// The use of private formatter per sink provides the opportunity to cache some
+// formatted data, and support for different format per sink.
+
+#include <spdlog/common.h>
+#include <spdlog/details/backtracer.h>
+#include <spdlog/details/log_msg.h>
+
+#ifdef SPDLOG_WCHAR_TO_UTF8_SUPPORT
+#ifndef _WIN32
+#error SPDLOG_WCHAR_TO_UTF8_SUPPORT only supported on windows
+#endif
+#include <spdlog/details/os.h>
+#endif
+
+#include <vector>
+
+#ifndef SPDLOG_NO_EXCEPTIONS
+#define SPDLOG_LOGGER_CATCH(location)                                                 \
+    catch (const std::exception &ex) {                                                \
+        if (location.filename) {                                                      \
+            err_handler_(fmt_lib::format(SPDLOG_FMT_STRING("{} [{}({})]"), ex.what(), \
+                                         location.filename, location.line));          \
+        } else {                                                                      \
+            err_handler_(ex.what());                                                  \
+        }                                                                             \
+    }                                                                                 \
+    catch (...) {                                                                     \
+        err_handler_("Rethrowing unknown exception in logger");                       \
+        throw;                                                                        \
+    }
+#else
+#define SPDLOG_LOGGER_CATCH(location)
+#endif
+
+namespace spdlog {
+
+class SPDLOG_API logger {
+public:
+    // Empty logger
+    explicit logger(std::string name)
+        : name_(std::move(name)),
+          sinks_() {}
+
+    // Logger with range on sinks
+    template <typename It>
+    logger(std::string name, It begin, It end)
+        : name_(std::move(name)),
+          sinks_(begin, end) {}
+
+    // Logger with single sink
+    logger(std::string name, sink_ptr single_sink)
+        : logger(std::move(name), {std::move(single_sink)}) {}
+
+    // Logger with sinks init list
+    logger(std::string name, sinks_init_list sinks)
+        : logger(std::move(name), sinks.begin(), sinks.end()) {}
+
+    virtual ~logger() = default;
+
+    logger(const logger &other);
+    logger(logger &&other) SPDLOG_NOEXCEPT;
+    logger &operator=(logger other) SPDLOG_NOEXCEPT;
+    void swap(spdlog::logger &other) SPDLOG_NOEXCEPT;
+
+    template <typename... Args>
+    void log(source_loc loc, level::level_enum lvl, format_string_t<Args...> fmt, Args &&...args) {
+        log_(loc, lvl, details::to_string_view(fmt), std::forward<Args>(args)...);
+    }
+
+    template <typename... Args>
+    void log(level::level_enum lvl, format_string_t<Args...> fmt, Args &&...args) {
+        log(source_loc{}, lvl, fmt, std::forward<Args>(args)...);
+    }
+
+    template <typename T>
+    void log(level::level_enum lvl, const T &msg) {
+        log(source_loc{}, lvl, msg);
+    }
+
+    // T cannot be statically converted to format string (including string_view/wstring_view)
+    template <class T,
+              typename std::enable_if<!is_convertible_to_any_format_string<const T &>::value,
+                                      int>::type = 0>
+    void log(source_loc loc, level::level_enum lvl, const T &msg) {
+        log(loc, lvl, "{}", msg);
+    }
+
+    void log(log_clock::time_point log_time,
+             source_loc loc,
+             level::level_enum lvl,
+             string_view_t msg) {
+        bool log_enabled = should_log(lvl);
+        bool traceback_enabled = tracer_.enabled();
+        if (!log_enabled && !traceback_enabled) {
+            return;
+        }
+
+        details::log_msg log_msg(log_time, loc, name_, lvl, msg);
+        log_it_(log_msg, log_enabled, traceback_enabled);
+    }
+
+    void log(source_loc loc, level::level_enum lvl, string_view_t msg) {
+        bool log_enabled = should_log(lvl);
+        bool traceback_enabled = tracer_.enabled();
+        if (!log_enabled && !traceback_enabled) {
+            return;
+        }
+
+        details::log_msg log_msg(loc, name_, lvl, msg);
+        log_it_(log_msg, log_enabled, traceback_enabled);
+    }
+
+    void log(level::level_enum lvl, string_view_t msg) { log(source_loc{}, lvl, msg); }
+
+    template <typename... Args>
+    void trace(format_string_t<Args...> fmt, Args &&...args) {
+        log(level::trace, fmt, std::forward<Args>(args)...);
+    }
+
+    template <typename... Args>
+    void debug(format_string_t<Args...> fmt, Args &&...args) {
+        log(level::debug, fmt, std::forward<Args>(args)...);
+    }
+
+    template <typename... Args>
+    void info(format_string_t<Args...> fmt, Args &&...args) {
+        log(level::info, fmt, std::forward<Args>(args)...);
+    }
+
+    template <typename... Args>
+    void warn(format_string_t<Args...> fmt, Args &&...args) {
+        log(level::warn, fmt, std::forward<Args>(args)...);
+    }
+
+    template <typename... Args>
+    void error(format_string_t<Args...> fmt, Args &&...args) {
+        log(level::err, fmt, std::forward<Args>(args)...);
+    }
+
+    template <typename... Args>
+    void critical(format_string_t<Args...> fmt, Args &&...args) {
+        log(level::critical, fmt, std::forward<Args>(args)...);
+    }
+
+#ifdef SPDLOG_WCHAR_TO_UTF8_SUPPORT
+    template <typename... Args>
+    void log(source_loc loc, level::level_enum lvl, wformat_string_t<Args...> fmt, Args &&...args) {
+        log_(loc, lvl, details::to_string_view(fmt), std::forward<Args>(args)...);
+    }
+
+    template <typename... Args>
+    void log(level::level_enum lvl, wformat_string_t<Args...> fmt, Args &&...args) {
+        log(source_loc{}, lvl, fmt, std::forward<Args>(args)...);
+    }
+
+    void log(log_clock::time_point log_time,
+             source_loc loc,
+             level::level_enum lvl,
+             wstring_view_t msg) {
+        bool log_enabled = should_log(lvl);
+        bool traceback_enabled = tracer_.enabled();
+        if (!log_enabled && !traceback_enabled) {
+            return;
+        }
+
+        memory_buf_t buf;
+        details::os::wstr_to_utf8buf(wstring_view_t(msg.data(), msg.size()), buf);
+        details::log_msg log_msg(log_time, loc, name_, lvl, string_view_t(buf.data(), buf.size()));
+        log_it_(log_msg, log_enabled, traceback_enabled);
+    }
+
+    void log(source_loc loc, level::level_enum lvl, wstring_view_t msg) {
+        bool log_enabled = should_log(lvl);
+        bool traceback_enabled = tracer_.enabled();
+        if (!log_enabled && !traceback_enabled) {
+            return;
+        }
+
+        memory_buf_t buf;
+        details::os::wstr_to_utf8buf(wstring_view_t(msg.data(), msg.size()), buf);
+        details::log_msg log_msg(loc, name_, lvl, string_view_t(buf.data(), buf.size()));
+        log_it_(log_msg, log_enabled, traceback_enabled);
+    }
+
+    void log(level::level_enum lvl, wstring_view_t msg) { log(source_loc{}, lvl, msg); }
+
+    template <typename... Args>
+    void trace(wformat_string_t<Args...> fmt, Args &&...args) {
+        log(level::trace, fmt, std::forward<Args>(args)...);
+    }
+
+    template <typename... Args>
+    void debug(wformat_string_t<Args...> fmt, Args &&...args) {
+        log(level::debug, fmt, std::forward<Args>(args)...);
+    }
+
+    template <typename... Args>
+    void info(wformat_string_t<Args...> fmt, Args &&...args) {
+        log(level::info, fmt, std::forward<Args>(args)...);
+    }
+
+    template <typename... Args>
+    void warn(wformat_string_t<Args...> fmt, Args &&...args) {
+        log(level::warn, fmt, std::forward<Args>(args)...);
+    }
+
+    template <typename... Args>
+    void error(wformat_string_t<Args...> fmt, Args &&...args) {
+        log(level::err, fmt, std::forward<Args>(args)...);
+    }
+
+    template <typename... Args>
+    void critical(wformat_string_t<Args...> fmt, Args &&...args) {
+        log(level::critical, fmt, std::forward<Args>(args)...);
+    }
+#endif
+
+    template <typename T>
+    void trace(const T &msg) {
+        log(level::trace, msg);
+    }
+
+    template <typename T>
+    void debug(const T &msg) {
+        log(level::debug, msg);
+    }
+
+    template <typename T>
+    void info(const T &msg) {
+        log(level::info, msg);
+    }
+
+    template <typename T>
+    void warn(const T &msg) {
+        log(level::warn, msg);
+    }
+
+    template <typename T>
+    void error(const T &msg) {
+        log(level::err, msg);
+    }
+
+    template <typename T>
+    void critical(const T &msg) {
+        log(level::critical, msg);
+    }
+
+    // return true logging is enabled for the given level.
+    bool should_log(level::level_enum msg_level) const {
+        return msg_level >= level_.load(std::memory_order_relaxed);
+    }
+
+    // return true if backtrace logging is enabled.
+    bool should_backtrace() const { return tracer_.enabled(); }
+
+    void set_level(level::level_enum log_level);
+
+    level::level_enum level() const;
+
+    const std::string &name() const;
+
+    // set formatting for the sinks in this logger.
+    // each sink will get a separate instance of the formatter object.
+    void set_formatter(std::unique_ptr<formatter> f);
+
+    // set formatting for the sinks in this logger.
+    // equivalent to
+    //     set_formatter(make_unique<pattern_formatter>(pattern, time_type))
+    // Note: each sink will get a new instance of a formatter object, replacing the old one.
+    void set_pattern(std::string pattern, pattern_time_type time_type = pattern_time_type::local);
+
+    // backtrace support.
+    // efficiently store all debug/trace messages in a circular buffer until needed for debugging.
+    void enable_backtrace(size_t n_messages);
+    void disable_backtrace();
+    void dump_backtrace();
+
+    // flush functions
+    void flush();
+    void flush_on(level::level_enum log_level);
+    level::level_enum flush_level() const;
+
+    // sinks
+    const std::vector<sink_ptr> &sinks() const;
+
+    std::vector<sink_ptr> &sinks();
+
+    // error handler
+    void set_error_handler(err_handler);
+
+    // create new logger with same sinks and configuration.
+    virtual std::shared_ptr<logger> clone(std::string logger_name);
+
+protected:
+    std::string name_;
+    std::vector<sink_ptr> sinks_;
+    spdlog::level_t level_{level::info};
+    spdlog::level_t flush_level_{level::off};
+    err_handler custom_err_handler_{nullptr};
+    details::backtracer tracer_;
+
+    // common implementation for after templated public api has been resolved
+    template <typename... Args>
+    void log_(source_loc loc, level::level_enum lvl, string_view_t fmt, Args &&...args) {
+        bool log_enabled = should_log(lvl);
+        bool traceback_enabled = tracer_.enabled();
+        if (!log_enabled && !traceback_enabled) {
+            return;
+        }
+        SPDLOG_TRY {
+            memory_buf_t buf;
+#ifdef SPDLOG_USE_STD_FORMAT
+            fmt_lib::vformat_to(std::back_inserter(buf), fmt, fmt_lib::make_format_args(args...));
+#else
+            fmt::vformat_to(fmt::appender(buf), fmt, fmt::make_format_args(args...));
+#endif
+
+            details::log_msg log_msg(loc, name_, lvl, string_view_t(buf.data(), buf.size()));
+            log_it_(log_msg, log_enabled, traceback_enabled);
+        }
+        SPDLOG_LOGGER_CATCH(loc)
+    }
+
+#ifdef SPDLOG_WCHAR_TO_UTF8_SUPPORT
+    template <typename... Args>
+    void log_(source_loc loc, level::level_enum lvl, wstring_view_t fmt, Args &&...args) {
+        bool log_enabled = should_log(lvl);
+        bool traceback_enabled = tracer_.enabled();
+        if (!log_enabled && !traceback_enabled) {
+            return;
+        }
+        SPDLOG_TRY {
+            // format to wmemory_buffer and convert to utf8
+            wmemory_buf_t wbuf;
+            fmt_lib::vformat_to(std::back_inserter(wbuf), fmt,
+                                fmt_lib::make_format_args<fmt_lib::wformat_context>(args...));
+
+            memory_buf_t buf;
+            details::os::wstr_to_utf8buf(wstring_view_t(wbuf.data(), wbuf.size()), buf);
+            details::log_msg log_msg(loc, name_, lvl, string_view_t(buf.data(), buf.size()));
+            log_it_(log_msg, log_enabled, traceback_enabled);
+        }
+        SPDLOG_LOGGER_CATCH(loc)
+    }
+#endif  // SPDLOG_WCHAR_TO_UTF8_SUPPORT
+
+    // log the given message (if the given log level is high enough),
+    // and save backtrace (if backtrace is enabled).
+    void log_it_(const details::log_msg &log_msg, bool log_enabled, bool traceback_enabled);
+    virtual void sink_it_(const details::log_msg &msg);
+    virtual void flush_();
+    void dump_backtrace_();
+    bool should_flush_(const details::log_msg &msg) const;
+
+    // handle errors during logging.
+    // default handler prints the error to stderr at max rate of 1 message/sec.
+    void err_handler_(const std::string &msg) const;
+};
+
+void swap(logger &a, logger &b) noexcept;
+
+}  // namespace spdlog
+
+#ifdef SPDLOG_HEADER_ONLY
+#include "logger-inl.h"
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/mdc.h b/csrc/vnpu_offload/include/spdlog/mdc.h
new file mode 100644
index 00000000..2f2665a7
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/mdc.h
@@ -0,0 +1,52 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#if defined(SPDLOG_NO_TLS)
+#error "This header requires thread local storage support, but SPDLOG_NO_TLS is defined."
+#endif
+
+#include <map>
+#include <string>
+
+#include <spdlog/common.h>
+
+// MDC is a simple map of key->string values stored in thread local storage whose content will be
+// printed by the loggers. Note: Not supported in async mode (thread local storage - so the async
+// thread pool have different copy).
+//
+// Usage example:
+// spdlog::mdc::put("mdc_key_1", "mdc_value_1");
+// spdlog::info("Hello, {}", "World!");  // => [2024-04-26 02:08:05.040] [info]
+// [mdc_key_1:mdc_value_1] Hello, World!
+
+namespace spdlog {
+class SPDLOG_API mdc {
+public:
+    using mdc_map_t = std::map<std::string, std::string>;
+
+    static void put(const std::string &key, const std::string &value) {
+        get_context()[key] = value;
+    }
+
+    static std::string get(const std::string &key) {
+        auto &context = get_context();
+        auto it = context.find(key);
+        if (it != context.end()) {
+            return it->second;
+        }
+        return "";
+    }
+
+    static void remove(const std::string &key) { get_context().erase(key); }
+
+    static void clear() { get_context().clear(); }
+
+    static mdc_map_t &get_context() {
+        static thread_local mdc_map_t context;
+        return context;
+    }
+};
+
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/pattern_formatter-inl.h b/csrc/vnpu_offload/include/spdlog/pattern_formatter-inl.h
new file mode 100644
index 00000000..1c59663b
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/pattern_formatter-inl.h
@@ -0,0 +1,1344 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifndef SPDLOG_HEADER_ONLY
+#include <spdlog/pattern_formatter.h>
+#endif
+
+#include <spdlog/details/fmt_helper.h>
+#include <spdlog/details/log_msg.h>
+#include <spdlog/details/os.h>
+
+#ifndef SPDLOG_NO_TLS
+#include <spdlog/mdc.h>
+#endif
+
+#include <spdlog/fmt/fmt.h>
+#include <spdlog/formatter.h>
+
+#include <algorithm>
+#include <array>
+#include <cctype>
+#include <chrono>
+#include <cstring>
+#include <ctime>
+#include <iterator>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <thread>
+#include <utility>
+#include <vector>
+
+namespace spdlog {
+namespace details {
+
+///////////////////////////////////////////////////////////////////////
+// name & level pattern appender
+///////////////////////////////////////////////////////////////////////
+
+class scoped_padder {
+public:
+    scoped_padder(size_t wrapped_size, const padding_info &padinfo, memory_buf_t &dest)
+        : padinfo_(padinfo),
+          dest_(dest) {
+        remaining_pad_ = static_cast<long>(padinfo.width_) - static_cast<long>(wrapped_size);
+        if (remaining_pad_ <= 0) {
+            return;
+        }
+
+        if (padinfo_.side_ == padding_info::pad_side::left) {
+            pad_it(remaining_pad_);
+            remaining_pad_ = 0;
+        } else if (padinfo_.side_ == padding_info::pad_side::center) {
+            auto half_pad = remaining_pad_ / 2;
+            auto reminder = remaining_pad_ & 1;
+            pad_it(half_pad);
+            remaining_pad_ = half_pad + reminder;  // for the right side
+        }
+    }
+
+    template <typename T>
+    static unsigned int count_digits(T n) {
+        return fmt_helper::count_digits(n);
+    }
+
+    ~scoped_padder() {
+        if (remaining_pad_ >= 0) {
+            pad_it(remaining_pad_);
+        } else if (padinfo_.truncate_) {
+            long new_size = static_cast<long>(dest_.size()) + remaining_pad_;
+            if (new_size < 0) {
+                new_size = 0;
+            }
+            dest_.resize(static_cast<size_t>(new_size));
+        }
+    }
+
+private:
+    void pad_it(long count) {
+        fmt_helper::append_string_view(string_view_t(spaces_.data(), static_cast<size_t>(count)),
+                                       dest_);
+    }
+
+    const padding_info &padinfo_;
+    memory_buf_t &dest_;
+    long remaining_pad_;
+    string_view_t spaces_{"                                                                ", 64};
+};
+
+struct null_scoped_padder {
+    null_scoped_padder(size_t /*wrapped_size*/,
+                       const padding_info & /*padinfo*/,
+                       memory_buf_t & /*dest*/) {}
+
+    template <typename T>
+    static unsigned int count_digits(T /* number */) {
+        return 0;
+    }
+};
+
+template <typename ScopedPadder>
+class name_formatter final : public flag_formatter {
+public:
+    explicit name_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &msg, const std::tm &, memory_buf_t &dest) override {
+        ScopedPadder p(msg.logger_name.size(), padinfo_, dest);
+        fmt_helper::append_string_view(msg.logger_name, dest);
+    }
+};
+
+// log level appender
+template <typename ScopedPadder>
+class level_formatter final : public flag_formatter {
+public:
+    explicit level_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &msg, const std::tm &, memory_buf_t &dest) override {
+        const string_view_t &level_name = level::to_string_view(msg.level);
+        ScopedPadder p(level_name.size(), padinfo_, dest);
+        fmt_helper::append_string_view(level_name, dest);
+    }
+};
+
+// short log level appender
+template <typename ScopedPadder>
+class short_level_formatter final : public flag_formatter {
+public:
+    explicit short_level_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &msg, const std::tm &, memory_buf_t &dest) override {
+        string_view_t level_name{level::to_short_c_str(msg.level)};
+        ScopedPadder p(level_name.size(), padinfo_, dest);
+        fmt_helper::append_string_view(level_name, dest);
+    }
+};
+
+///////////////////////////////////////////////////////////////////////
+// Date time pattern appenders
+///////////////////////////////////////////////////////////////////////
+
+static const char *ampm(const tm &t) { return t.tm_hour >= 12 ? "PM" : "AM"; }
+
+static int to12h(const tm &t) { return t.tm_hour > 12 ? t.tm_hour - 12 : t.tm_hour; }
+
+// Abbreviated weekday name
+static std::array<const char *, 7> days{{"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"}};
+
+template <typename ScopedPadder>
+class a_formatter final : public flag_formatter {
+public:
+    explicit a_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &, const std::tm &tm_time, memory_buf_t &dest) override {
+        string_view_t field_value{days[static_cast<size_t>(tm_time.tm_wday)]};
+        ScopedPadder p(field_value.size(), padinfo_, dest);
+        fmt_helper::append_string_view(field_value, dest);
+    }
+};
+
+// Full weekday name
+static std::array<const char *, 7> full_days{
+    {"Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"}};
+
+template <typename ScopedPadder>
+class A_formatter : public flag_formatter {
+public:
+    explicit A_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &, const std::tm &tm_time, memory_buf_t &dest) override {
+        string_view_t field_value{full_days[static_cast<size_t>(tm_time.tm_wday)]};
+        ScopedPadder p(field_value.size(), padinfo_, dest);
+        fmt_helper::append_string_view(field_value, dest);
+    }
+};
+
+// Abbreviated month
+static const std::array<const char *, 12> months{
+    {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}};
+
+template <typename ScopedPadder>
+class b_formatter final : public flag_formatter {
+public:
+    explicit b_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &, const std::tm &tm_time, memory_buf_t &dest) override {
+        string_view_t field_value{months[static_cast<size_t>(tm_time.tm_mon)]};
+        ScopedPadder p(field_value.size(), padinfo_, dest);
+        fmt_helper::append_string_view(field_value, dest);
+    }
+};
+
+// Full month name
+static const std::array<const char *, 12> full_months{{"January", "February", "March", "April",
+                                                       "May", "June", "July", "August", "September",
+                                                       "October", "November", "December"}};
+
+template <typename ScopedPadder>
+class B_formatter final : public flag_formatter {
+public:
+    explicit B_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &, const std::tm &tm_time, memory_buf_t &dest) override {
+        string_view_t field_value{full_months[static_cast<size_t>(tm_time.tm_mon)]};
+        ScopedPadder p(field_value.size(), padinfo_, dest);
+        fmt_helper::append_string_view(field_value, dest);
+    }
+};
+
+// Date and time representation (Thu Aug 23 15:35:46 2014)
+template <typename ScopedPadder>
+class c_formatter final : public flag_formatter {
+public:
+    explicit c_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &, const std::tm &tm_time, memory_buf_t &dest) override {
+        const size_t field_size = 24;
+        ScopedPadder p(field_size, padinfo_, dest);
+
+        fmt_helper::append_string_view(days[static_cast<size_t>(tm_time.tm_wday)], dest);
+        dest.push_back(' ');
+        fmt_helper::append_string_view(months[static_cast<size_t>(tm_time.tm_mon)], dest);
+        dest.push_back(' ');
+        fmt_helper::append_int(tm_time.tm_mday, dest);
+        dest.push_back(' ');
+        // time
+
+        fmt_helper::pad2(tm_time.tm_hour, dest);
+        dest.push_back(':');
+        fmt_helper::pad2(tm_time.tm_min, dest);
+        dest.push_back(':');
+        fmt_helper::pad2(tm_time.tm_sec, dest);
+        dest.push_back(' ');
+        fmt_helper::append_int(tm_time.tm_year + 1900, dest);
+    }
+};
+
+// year - 2 digit
+template <typename ScopedPadder>
+class C_formatter final : public flag_formatter {
+public:
+    explicit C_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &, const std::tm &tm_time, memory_buf_t &dest) override {
+        const size_t field_size = 2;
+        ScopedPadder p(field_size, padinfo_, dest);
+        fmt_helper::pad2(tm_time.tm_year % 100, dest);
+    }
+};
+
+// Short MM/DD/YY date, equivalent to %m/%d/%y 08/23/01
+template <typename ScopedPadder>
+class D_formatter final : public flag_formatter {
+public:
+    explicit D_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &, const std::tm &tm_time, memory_buf_t &dest) override {
+        const size_t field_size = 8;
+        ScopedPadder p(field_size, padinfo_, dest);
+
+        fmt_helper::pad2(tm_time.tm_mon + 1, dest);
+        dest.push_back('/');
+        fmt_helper::pad2(tm_time.tm_mday, dest);
+        dest.push_back('/');
+        fmt_helper::pad2(tm_time.tm_year % 100, dest);
+    }
+};
+
+// year - 4 digit
+template <typename ScopedPadder>
+class Y_formatter final : public flag_formatter {
+public:
+    explicit Y_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &, const std::tm &tm_time, memory_buf_t &dest) override {
+        const size_t field_size = 4;
+        ScopedPadder p(field_size, padinfo_, dest);
+        fmt_helper::append_int(tm_time.tm_year + 1900, dest);
+    }
+};
+
+// month 1-12
+template <typename ScopedPadder>
+class m_formatter final : public flag_formatter {
+public:
+    explicit m_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &, const std::tm &tm_time, memory_buf_t &dest) override {
+        const size_t field_size = 2;
+        ScopedPadder p(field_size, padinfo_, dest);
+        fmt_helper::pad2(tm_time.tm_mon + 1, dest);
+    }
+};
+
+// day of month 1-31
+template <typename ScopedPadder>
+class d_formatter final : public flag_formatter {
+public:
+    explicit d_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &, const std::tm &tm_time, memory_buf_t &dest) override {
+        const size_t field_size = 2;
+        ScopedPadder p(field_size, padinfo_, dest);
+        fmt_helper::pad2(tm_time.tm_mday, dest);
+    }
+};
+
+// hours in 24 format 0-23
+template <typename ScopedPadder>
+class H_formatter final : public flag_formatter {
+public:
+    explicit H_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &, const std::tm &tm_time, memory_buf_t &dest) override {
+        const size_t field_size = 2;
+        ScopedPadder p(field_size, padinfo_, dest);
+        fmt_helper::pad2(tm_time.tm_hour, dest);
+    }
+};
+
+// hours in 12 format 1-12
+template <typename ScopedPadder>
+class I_formatter final : public flag_formatter {
+public:
+    explicit I_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &, const std::tm &tm_time, memory_buf_t &dest) override {
+        const size_t field_size = 2;
+        ScopedPadder p(field_size, padinfo_, dest);
+        fmt_helper::pad2(to12h(tm_time), dest);
+    }
+};
+
+// minutes 0-59
+template <typename ScopedPadder>
+class M_formatter final : public flag_formatter {
+public:
+    explicit M_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &, const std::tm &tm_time, memory_buf_t &dest) override {
+        const size_t field_size = 2;
+        ScopedPadder p(field_size, padinfo_, dest);
+        fmt_helper::pad2(tm_time.tm_min, dest);
+    }
+};
+
+// seconds 0-59
+template <typename ScopedPadder>
+class S_formatter final : public flag_formatter {
+public:
+    explicit S_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &, const std::tm &tm_time, memory_buf_t &dest) override {
+        const size_t field_size = 2;
+        ScopedPadder p(field_size, padinfo_, dest);
+        fmt_helper::pad2(tm_time.tm_sec, dest);
+    }
+};
+
+// milliseconds
+template <typename ScopedPadder>
+class e_formatter final : public flag_formatter {
+public:
+    explicit e_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &msg, const std::tm &, memory_buf_t &dest) override {
+        auto millis = fmt_helper::time_fraction<std::chrono::milliseconds>(msg.time);
+        const size_t field_size = 3;
+        ScopedPadder p(field_size, padinfo_, dest);
+        fmt_helper::pad3(static_cast<uint32_t>(millis.count()), dest);
+    }
+};
+
+// microseconds
+template <typename ScopedPadder>
+class f_formatter final : public flag_formatter {
+public:
+    explicit f_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &msg, const std::tm &, memory_buf_t &dest) override {
+        auto micros = fmt_helper::time_fraction<std::chrono::microseconds>(msg.time);
+
+        const size_t field_size = 6;
+        ScopedPadder p(field_size, padinfo_, dest);
+        fmt_helper::pad6(static_cast<size_t>(micros.count()), dest);
+    }
+};
+
+// nanoseconds
+template <typename ScopedPadder>
+class F_formatter final : public flag_formatter {
+public:
+    explicit F_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &msg, const std::tm &, memory_buf_t &dest) override {
+        auto ns = fmt_helper::time_fraction<std::chrono::nanoseconds>(msg.time);
+        const size_t field_size = 9;
+        ScopedPadder p(field_size, padinfo_, dest);
+        fmt_helper::pad9(static_cast<size_t>(ns.count()), dest);
+    }
+};
+
+// seconds since epoch
+template <typename ScopedPadder>
+class E_formatter final : public flag_formatter {
+public:
+    explicit E_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &msg, const std::tm &, memory_buf_t &dest) override {
+        const size_t field_size = 10;
+        ScopedPadder p(field_size, padinfo_, dest);
+        auto duration = msg.time.time_since_epoch();
+        auto seconds = std::chrono::duration_cast<std::chrono::seconds>(duration).count();
+        fmt_helper::append_int(seconds, dest);
+    }
+};
+
+// AM/PM
+template <typename ScopedPadder>
+class p_formatter final : public flag_formatter {
+public:
+    explicit p_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &, const std::tm &tm_time, memory_buf_t &dest) override {
+        const size_t field_size = 2;
+        ScopedPadder p(field_size, padinfo_, dest);
+        fmt_helper::append_string_view(ampm(tm_time), dest);
+    }
+};
+
+// 12 hour clock 02:55:02 pm
+template <typename ScopedPadder>
+class r_formatter final : public flag_formatter {
+public:
+    explicit r_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &, const std::tm &tm_time, memory_buf_t &dest) override {
+        const size_t field_size = 11;
+        ScopedPadder p(field_size, padinfo_, dest);
+
+        fmt_helper::pad2(to12h(tm_time), dest);
+        dest.push_back(':');
+        fmt_helper::pad2(tm_time.tm_min, dest);
+        dest.push_back(':');
+        fmt_helper::pad2(tm_time.tm_sec, dest);
+        dest.push_back(' ');
+        fmt_helper::append_string_view(ampm(tm_time), dest);
+    }
+};
+
+// 24-hour HH:MM time, equivalent to %H:%M
+template <typename ScopedPadder>
+class R_formatter final : public flag_formatter {
+public:
+    explicit R_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &, const std::tm &tm_time, memory_buf_t &dest) override {
+        const size_t field_size = 5;
+        ScopedPadder p(field_size, padinfo_, dest);
+
+        fmt_helper::pad2(tm_time.tm_hour, dest);
+        dest.push_back(':');
+        fmt_helper::pad2(tm_time.tm_min, dest);
+    }
+};
+
+// ISO 8601 time format (HH:MM:SS), equivalent to %H:%M:%S
+template <typename ScopedPadder>
+class T_formatter final : public flag_formatter {
+public:
+    explicit T_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &, const std::tm &tm_time, memory_buf_t &dest) override {
+        const size_t field_size = 8;
+        ScopedPadder p(field_size, padinfo_, dest);
+
+        fmt_helper::pad2(tm_time.tm_hour, dest);
+        dest.push_back(':');
+        fmt_helper::pad2(tm_time.tm_min, dest);
+        dest.push_back(':');
+        fmt_helper::pad2(tm_time.tm_sec, dest);
+    }
+};
+
+// ISO 8601 offset from UTC in timezone (+-HH:MM)
+// If SPDLOG_NO_TZ_OFFSET is defined, print "+??.??" instead.
+template <typename ScopedPadder>
+class z_formatter final : public flag_formatter {
+public:
+    explicit z_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    z_formatter() = default;
+    z_formatter(const z_formatter &) = delete;
+    z_formatter &operator=(const z_formatter &) = delete;
+
+    void format(const details::log_msg &msg, const std::tm &tm_time, memory_buf_t &dest) override {
+        const size_t field_size = 6;
+        ScopedPadder p(field_size, padinfo_, dest);
+
+#ifdef SPDLOG_NO_TZ_OFFSET
+        const char *str = "+??:??";
+        dest.append(str, str + 6);
+#else
+        auto total_minutes = get_cached_offset(msg, tm_time);
+        bool is_negative = total_minutes < 0;
+        if (is_negative) {
+            total_minutes = -total_minutes;
+            dest.push_back('-');
+        } else {
+            dest.push_back('+');
+        }
+
+        fmt_helper::pad2(total_minutes / 60, dest);  // hours
+        dest.push_back(':');
+        fmt_helper::pad2(total_minutes % 60, dest);  // minutes
+#endif  // SPDLOG_NO_TZ_OFFSET
+    }
+
+private:
+    log_clock::time_point last_update_{std::chrono::seconds(0)};
+    int offset_minutes_{0};
+
+    int get_cached_offset(const log_msg &msg, const std::tm &tm_time) {
+        // refresh every 10 seconds
+        if (msg.time - last_update_ >= std::chrono::seconds(10)) {
+            offset_minutes_ = os::utc_minutes_offset(tm_time);
+            last_update_ = msg.time;
+        }
+        return offset_minutes_;
+    }
+};
+
+// Thread id
+template <typename ScopedPadder>
+class t_formatter final : public flag_formatter {
+public:
+    explicit t_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &msg, const std::tm &, memory_buf_t &dest) override {
+        const auto field_size = ScopedPadder::count_digits(msg.thread_id);
+        ScopedPadder p(field_size, padinfo_, dest);
+        fmt_helper::append_int(msg.thread_id, dest);
+    }
+};
+
+// Current pid
+template <typename ScopedPadder>
+class pid_formatter final : public flag_formatter {
+public:
+    explicit pid_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &, const std::tm &, memory_buf_t &dest) override {
+        const auto pid = static_cast<uint32_t>(details::os::pid());
+        auto field_size = ScopedPadder::count_digits(pid);
+        ScopedPadder p(field_size, padinfo_, dest);
+        fmt_helper::append_int(pid, dest);
+    }
+};
+
+template <typename ScopedPadder>
+class v_formatter final : public flag_formatter {
+public:
+    explicit v_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &msg, const std::tm &, memory_buf_t &dest) override {
+        ScopedPadder p(msg.payload.size(), padinfo_, dest);
+        fmt_helper::append_string_view(msg.payload, dest);
+    }
+};
+
+class ch_formatter final : public flag_formatter {
+public:
+    explicit ch_formatter(char ch)
+        : ch_(ch) {}
+
+    void format(const details::log_msg &, const std::tm &, memory_buf_t &dest) override {
+        dest.push_back(ch_);
+    }
+
+private:
+    char ch_;
+};
+
+// aggregate user chars to display as is
+class aggregate_formatter final : public flag_formatter {
+public:
+    aggregate_formatter() = default;
+
+    void add_ch(char ch) { str_ += ch; }
+    void format(const details::log_msg &, const std::tm &, memory_buf_t &dest) override {
+        fmt_helper::append_string_view(str_, dest);
+    }
+
+private:
+    std::string str_;
+};
+
+// mark the color range. expect it to be in the form of "%^colored text%$"
+class color_start_formatter final : public flag_formatter {
+public:
+    explicit color_start_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &msg, const std::tm &, memory_buf_t &dest) override {
+        msg.color_range_start = dest.size();
+    }
+};
+
+class color_stop_formatter final : public flag_formatter {
+public:
+    explicit color_stop_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &msg, const std::tm &, memory_buf_t &dest) override {
+        msg.color_range_end = dest.size();
+    }
+};
+
+// print source location
+template <typename ScopedPadder>
+class source_location_formatter final : public flag_formatter {
+public:
+    explicit source_location_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &msg, const std::tm &, memory_buf_t &dest) override {
+        if (msg.source.empty()) {
+            ScopedPadder p(0, padinfo_, dest);
+            return;
+        }
+
+        size_t text_size;
+        if (padinfo_.enabled()) {
+            // calc text size for padding based on "filename:line"
+            text_size = std::char_traits<char>::length(msg.source.filename) +
+                        ScopedPadder::count_digits(msg.source.line) + 1;
+        } else {
+            text_size = 0;
+        }
+
+        ScopedPadder p(text_size, padinfo_, dest);
+        fmt_helper::append_string_view(msg.source.filename, dest);
+        dest.push_back(':');
+        fmt_helper::append_int(msg.source.line, dest);
+    }
+};
+
+// print source filename
+template <typename ScopedPadder>
+class source_filename_formatter final : public flag_formatter {
+public:
+    explicit source_filename_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &msg, const std::tm &, memory_buf_t &dest) override {
+        if (msg.source.empty()) {
+            ScopedPadder p(0, padinfo_, dest);
+            return;
+        }
+        size_t text_size =
+            padinfo_.enabled() ? std::char_traits<char>::length(msg.source.filename) : 0;
+        ScopedPadder p(text_size, padinfo_, dest);
+        fmt_helper::append_string_view(msg.source.filename, dest);
+    }
+};
+
+template <typename ScopedPadder>
+class short_filename_formatter final : public flag_formatter {
+public:
+    explicit short_filename_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4127)  // consider using 'if constexpr' instead
+#endif                           // _MSC_VER
+    static const char *basename(const char *filename) {
+        // if the size is 2 (1 character + null terminator) we can use the more efficient strrchr
+        // the branch will be elided by optimizations
+        if (sizeof(os::folder_seps) == 2) {
+            const char *rv = std::strrchr(filename, os::folder_seps[0]);
+            return rv != nullptr ? rv + 1 : filename;
+        } else {
+            const std::reverse_iterator<const char *> begin(filename + std::strlen(filename));
+            const std::reverse_iterator<const char *> end(filename);
+
+            const auto it = std::find_first_of(begin, end, std::begin(os::folder_seps),
+                                               std::end(os::folder_seps) - 1);
+            return it != end ? it.base() : filename;
+        }
+    }
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif  // _MSC_VER
+
+    void format(const details::log_msg &msg, const std::tm &, memory_buf_t &dest) override {
+        if (msg.source.empty()) {
+            ScopedPadder p(0, padinfo_, dest);
+            return;
+        }
+        auto filename = basename(msg.source.filename);
+        size_t text_size = padinfo_.enabled() ? std::char_traits<char>::length(filename) : 0;
+        ScopedPadder p(text_size, padinfo_, dest);
+        fmt_helper::append_string_view(filename, dest);
+    }
+};
+
+template <typename ScopedPadder>
+class source_linenum_formatter final : public flag_formatter {
+public:
+    explicit source_linenum_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &msg, const std::tm &, memory_buf_t &dest) override {
+        if (msg.source.empty()) {
+            ScopedPadder p(0, padinfo_, dest);
+            return;
+        }
+
+        auto field_size = ScopedPadder::count_digits(msg.source.line);
+        ScopedPadder p(field_size, padinfo_, dest);
+        fmt_helper::append_int(msg.source.line, dest);
+    }
+};
+
+// print source funcname
+template <typename ScopedPadder>
+class source_funcname_formatter final : public flag_formatter {
+public:
+    explicit source_funcname_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &msg, const std::tm &, memory_buf_t &dest) override {
+        if (msg.source.empty()) {
+            ScopedPadder p(0, padinfo_, dest);
+            return;
+        }
+        size_t text_size =
+            padinfo_.enabled() ? std::char_traits<char>::length(msg.source.funcname) : 0;
+        ScopedPadder p(text_size, padinfo_, dest);
+        fmt_helper::append_string_view(msg.source.funcname, dest);
+    }
+};
+
+// print elapsed time since last message
+template <typename ScopedPadder, typename Units>
+class elapsed_formatter final : public flag_formatter {
+public:
+    using DurationUnits = Units;
+
+    explicit elapsed_formatter(padding_info padinfo)
+        : flag_formatter(padinfo),
+          last_message_time_(log_clock::now()) {}
+
+    void format(const details::log_msg &msg, const std::tm &, memory_buf_t &dest) override {
+        auto delta = (std::max)(msg.time - last_message_time_, log_clock::duration::zero());
+        auto delta_units = std::chrono::duration_cast<DurationUnits>(delta);
+        last_message_time_ = msg.time;
+        auto delta_count = static_cast<size_t>(delta_units.count());
+        auto n_digits = static_cast<size_t>(ScopedPadder::count_digits(delta_count));
+        ScopedPadder p(n_digits, padinfo_, dest);
+        fmt_helper::append_int(delta_count, dest);
+    }
+
+private:
+    log_clock::time_point last_message_time_;
+};
+
+// Class for formatting Mapped Diagnostic Context (MDC) in log messages.
+// Example: [logger-name] [info] [mdc_key_1:mdc_value_1 mdc_key_2:mdc_value_2] some message
+#ifndef SPDLOG_NO_TLS
+template <typename ScopedPadder>
+class mdc_formatter : public flag_formatter {
+public:
+    explicit mdc_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &, const std::tm &, memory_buf_t &dest) override {
+        auto &mdc_map = mdc::get_context();
+        if (mdc_map.empty()) {
+            ScopedPadder p(0, padinfo_, dest);
+            return;
+        } else {
+            format_mdc(mdc_map, dest);
+        }
+    }
+
+    void format_mdc(const mdc::mdc_map_t &mdc_map, memory_buf_t &dest) {
+        auto last_element = --mdc_map.end();
+        for (auto it = mdc_map.begin(); it != mdc_map.end(); ++it) {
+            auto &pair = *it;
+            const auto &key = pair.first;
+            const auto &value = pair.second;
+            size_t content_size = key.size() + value.size() + 1;  // 1 for ':'
+
+            if (it != last_element) {
+                content_size++;  // 1 for ' '
+            }
+
+            ScopedPadder p(content_size, padinfo_, dest);
+            fmt_helper::append_string_view(key, dest);
+            fmt_helper::append_string_view(":", dest);
+            fmt_helper::append_string_view(value, dest);
+            if (it != last_element) {
+                fmt_helper::append_string_view(" ", dest);
+            }
+        }
+    }
+};
+#endif
+
+// Full info formatter
+// pattern: [%Y-%m-%d %H:%M:%S.%e] [%n] [%l] [%s:%#] %v
+class full_formatter final : public flag_formatter {
+public:
+    explicit full_formatter(padding_info padinfo)
+        : flag_formatter(padinfo) {}
+
+    void format(const details::log_msg &msg, const std::tm &tm_time, memory_buf_t &dest) override {
+        using std::chrono::duration_cast;
+        using std::chrono::milliseconds;
+        using std::chrono::seconds;
+
+        // cache the date/time part for the next second.
+        auto duration = msg.time.time_since_epoch();
+        auto secs = duration_cast<seconds>(duration);
+
+        if (cache_timestamp_ != secs || cached_datetime_.size() == 0) {
+            cached_datetime_.clear();
+            cached_datetime_.push_back('[');
+            fmt_helper::append_int(tm_time.tm_year + 1900, cached_datetime_);
+            cached_datetime_.push_back('-');
+
+            fmt_helper::pad2(tm_time.tm_mon + 1, cached_datetime_);
+            cached_datetime_.push_back('-');
+
+            fmt_helper::pad2(tm_time.tm_mday, cached_datetime_);
+            cached_datetime_.push_back(' ');
+
+            fmt_helper::pad2(tm_time.tm_hour, cached_datetime_);
+            cached_datetime_.push_back(':');
+
+            fmt_helper::pad2(tm_time.tm_min, cached_datetime_);
+            cached_datetime_.push_back(':');
+
+            fmt_helper::pad2(tm_time.tm_sec, cached_datetime_);
+            cached_datetime_.push_back('.');
+
+            cache_timestamp_ = secs;
+        }
+        dest.append(cached_datetime_.begin(), cached_datetime_.end());
+
+        auto millis = fmt_helper::time_fraction<milliseconds>(msg.time);
+        fmt_helper::pad3(static_cast<uint32_t>(millis.count()), dest);
+        dest.push_back(']');
+        dest.push_back(' ');
+
+        // append logger name if exists
+        if (msg.logger_name.size() > 0) {
+            dest.push_back('[');
+            fmt_helper::append_string_view(msg.logger_name, dest);
+            dest.push_back(']');
+            dest.push_back(' ');
+        }
+
+        dest.push_back('[');
+        // wrap the level name with color
+        msg.color_range_start = dest.size();
+        // fmt_helper::append_string_view(level::to_c_str(msg.level), dest);
+        fmt_helper::append_string_view(level::to_string_view(msg.level), dest);
+        msg.color_range_end = dest.size();
+        dest.push_back(']');
+        dest.push_back(' ');
+
+        // add source location if present
+        if (!msg.source.empty()) {
+            dest.push_back('[');
+            const char *filename =
+                details::short_filename_formatter<details::null_scoped_padder>::basename(
+                    msg.source.filename);
+            fmt_helper::append_string_view(filename, dest);
+            dest.push_back(':');
+            fmt_helper::append_int(msg.source.line, dest);
+            dest.push_back(']');
+            dest.push_back(' ');
+        }
+
+#ifndef SPDLOG_NO_TLS
+        // add mdc if present
+        auto &mdc_map = mdc::get_context();
+        if (!mdc_map.empty()) {
+            dest.push_back('[');
+            mdc_formatter_.format_mdc(mdc_map, dest);
+            dest.push_back(']');
+            dest.push_back(' ');
+        }
+#endif
+        // fmt_helper::append_string_view(msg.msg(), dest);
+        fmt_helper::append_string_view(msg.payload, dest);
+    }
+
+private:
+    std::chrono::seconds cache_timestamp_{0};
+    memory_buf_t cached_datetime_;
+
+#ifndef SPDLOG_NO_TLS
+    mdc_formatter<null_scoped_padder> mdc_formatter_{padding_info {}};
+#endif
+};
+
+}  // namespace details
+
+SPDLOG_INLINE pattern_formatter::pattern_formatter(std::string pattern,
+                                                   pattern_time_type time_type,
+                                                   std::string eol,
+                                                   custom_flags custom_user_flags)
+    : pattern_(std::move(pattern)),
+      eol_(std::move(eol)),
+      pattern_time_type_(time_type),
+      need_localtime_(false),
+      last_log_secs_(0),
+      custom_handlers_(std::move(custom_user_flags)) {
+    std::memset(&cached_tm_, 0, sizeof(cached_tm_));
+    compile_pattern_(pattern_);
+}
+
+// use by default full formatter for if pattern is not given
+SPDLOG_INLINE pattern_formatter::pattern_formatter(pattern_time_type time_type, std::string eol)
+    : pattern_("%+"),
+      eol_(std::move(eol)),
+      pattern_time_type_(time_type),
+      need_localtime_(true),
+      last_log_secs_(0) {
+    std::memset(&cached_tm_, 0, sizeof(cached_tm_));
+    formatters_.push_back(details::make_unique<details::full_formatter>(details::padding_info{}));
+}
+
+SPDLOG_INLINE std::unique_ptr<formatter> pattern_formatter::clone() const {
+    custom_flags cloned_custom_formatters;
+    for (auto &it : custom_handlers_) {
+        cloned_custom_formatters[it.first] = it.second->clone();
+    }
+    auto cloned = details::make_unique<pattern_formatter>(pattern_, pattern_time_type_, eol_,
+                                                          std::move(cloned_custom_formatters));
+    cloned->need_localtime(need_localtime_);
+#if defined(__GNUC__) && __GNUC__ < 5
+    return std::move(cloned);
+#else
+    return cloned;
+#endif
+}
+
+SPDLOG_INLINE void pattern_formatter::format(const details::log_msg &msg, memory_buf_t &dest) {
+    if (need_localtime_) {
+        const auto secs =
+            std::chrono::duration_cast<std::chrono::seconds>(msg.time.time_since_epoch());
+        if (secs != last_log_secs_) {
+            cached_tm_ = get_time_(msg);
+            last_log_secs_ = secs;
+        }
+    }
+
+    for (auto &f : formatters_) {
+        f->format(msg, cached_tm_, dest);
+    }
+    // write eol
+    details::fmt_helper::append_string_view(eol_, dest);
+}
+
+SPDLOG_INLINE void pattern_formatter::set_pattern(std::string pattern) {
+    pattern_ = std::move(pattern);
+    need_localtime_ = false;
+    compile_pattern_(pattern_);
+}
+
+SPDLOG_INLINE void pattern_formatter::need_localtime(bool need) { need_localtime_ = need; }
+
+SPDLOG_INLINE std::tm pattern_formatter::get_time_(const details::log_msg &msg) {
+    if (pattern_time_type_ == pattern_time_type::local) {
+        return details::os::localtime(log_clock::to_time_t(msg.time));
+    }
+    return details::os::gmtime(log_clock::to_time_t(msg.time));
+}
+
+template <typename Padder>
+SPDLOG_INLINE void pattern_formatter::handle_flag_(char flag, details::padding_info padding) {
+    // process custom flags
+    auto it = custom_handlers_.find(flag);
+    if (it != custom_handlers_.end()) {
+        auto custom_handler = it->second->clone();
+        custom_handler->set_padding_info(padding);
+        formatters_.push_back(std::move(custom_handler));
+        return;
+    }
+
+    // process built-in flags
+    switch (flag) {
+        case ('+'):  // default formatter
+            formatters_.push_back(details::make_unique<details::full_formatter>(padding));
+            need_localtime_ = true;
+            break;
+
+        case 'n':  // logger name
+            formatters_.push_back(details::make_unique<details::name_formatter<Padder>>(padding));
+            break;
+
+        case 'l':  // level
+            formatters_.push_back(details::make_unique<details::level_formatter<Padder>>(padding));
+            break;
+
+        case 'L':  // short level
+            formatters_.push_back(
+                details::make_unique<details::short_level_formatter<Padder>>(padding));
+            break;
+
+        case ('t'):  // thread id
+            formatters_.push_back(details::make_unique<details::t_formatter<Padder>>(padding));
+            break;
+
+        case ('v'):  // the message text
+            formatters_.push_back(details::make_unique<details::v_formatter<Padder>>(padding));
+            break;
+
+        case ('a'):  // weekday
+            formatters_.push_back(details::make_unique<details::a_formatter<Padder>>(padding));
+            need_localtime_ = true;
+            break;
+
+        case ('A'):  // short weekday
+            formatters_.push_back(details::make_unique<details::A_formatter<Padder>>(padding));
+            need_localtime_ = true;
+            break;
+
+        case ('b'):
+        case ('h'):  // month
+            formatters_.push_back(details::make_unique<details::b_formatter<Padder>>(padding));
+            need_localtime_ = true;
+            break;
+
+        case ('B'):  // short month
+            formatters_.push_back(details::make_unique<details::B_formatter<Padder>>(padding));
+            need_localtime_ = true;
+            break;
+
+        case ('c'):  // datetime
+            formatters_.push_back(details::make_unique<details::c_formatter<Padder>>(padding));
+            need_localtime_ = true;
+            break;
+
+        case ('C'):  // year 2 digits
+            formatters_.push_back(details::make_unique<details::C_formatter<Padder>>(padding));
+            need_localtime_ = true;
+            break;
+
+        case ('Y'):  // year 4 digits
+            formatters_.push_back(details::make_unique<details::Y_formatter<Padder>>(padding));
+            need_localtime_ = true;
+            break;
+
+        case ('D'):
+        case ('x'):  // datetime MM/DD/YY
+            formatters_.push_back(details::make_unique<details::D_formatter<Padder>>(padding));
+            need_localtime_ = true;
+            break;
+
+        case ('m'):  // month 1-12
+            formatters_.push_back(details::make_unique<details::m_formatter<Padder>>(padding));
+            need_localtime_ = true;
+            break;
+
+        case ('d'):  // day of month 1-31
+            formatters_.push_back(details::make_unique<details::d_formatter<Padder>>(padding));
+            need_localtime_ = true;
+            break;
+
+        case ('H'):  // hours 24
+            formatters_.push_back(details::make_unique<details::H_formatter<Padder>>(padding));
+            need_localtime_ = true;
+            break;
+
+        case ('I'):  // hours 12
+            formatters_.push_back(details::make_unique<details::I_formatter<Padder>>(padding));
+            need_localtime_ = true;
+            break;
+
+        case ('M'):  // minutes
+            formatters_.push_back(details::make_unique<details::M_formatter<Padder>>(padding));
+            need_localtime_ = true;
+            break;
+
+        case ('S'):  // seconds
+            formatters_.push_back(details::make_unique<details::S_formatter<Padder>>(padding));
+            need_localtime_ = true;
+            break;
+
+        case ('e'):  // milliseconds
+            formatters_.push_back(details::make_unique<details::e_formatter<Padder>>(padding));
+            break;
+
+        case ('f'):  // microseconds
+            formatters_.push_back(details::make_unique<details::f_formatter<Padder>>(padding));
+            break;
+
+        case ('F'):  // nanoseconds
+            formatters_.push_back(details::make_unique<details::F_formatter<Padder>>(padding));
+            break;
+
+        case ('E'):  // seconds since epoch
+            formatters_.push_back(details::make_unique<details::E_formatter<Padder>>(padding));
+            break;
+
+        case ('p'):  // am/pm
+            formatters_.push_back(details::make_unique<details::p_formatter<Padder>>(padding));
+            need_localtime_ = true;
+            break;
+
+        case ('r'):  // 12 hour clock 02:55:02 pm
+            formatters_.push_back(details::make_unique<details::r_formatter<Padder>>(padding));
+            need_localtime_ = true;
+            break;
+
+        case ('R'):  // 24-hour HH:MM time
+            formatters_.push_back(details::make_unique<details::R_formatter<Padder>>(padding));
+            need_localtime_ = true;
+            break;
+
+        case ('T'):
+        case ('X'):  // ISO 8601 time format (HH:MM:SS)
+            formatters_.push_back(details::make_unique<details::T_formatter<Padder>>(padding));
+            need_localtime_ = true;
+            break;
+        case ('z'):  // timezone
+            formatters_.push_back(details::make_unique<details::z_formatter<Padder>>(padding));
+            need_localtime_ = true;
+            break;
+        case ('P'):  // pid
+            formatters_.push_back(details::make_unique<details::pid_formatter<Padder>>(padding));
+            break;
+
+        case ('^'):  // color range start
+            formatters_.push_back(details::make_unique<details::color_start_formatter>(padding));
+            break;
+
+        case ('$'):  // color range end
+            formatters_.push_back(details::make_unique<details::color_stop_formatter>(padding));
+            break;
+
+        case ('@'):  // source location (filename:filenumber)
+            formatters_.push_back(
+                details::make_unique<details::source_location_formatter<Padder>>(padding));
+            break;
+
+        case ('s'):  // short source filename - without directory name
+            formatters_.push_back(
+                details::make_unique<details::short_filename_formatter<Padder>>(padding));
+            break;
+
+        case ('g'):  // full source filename
+            formatters_.push_back(
+                details::make_unique<details::source_filename_formatter<Padder>>(padding));
+            break;
+
+        case ('#'):  // source line number
+            formatters_.push_back(
+                details::make_unique<details::source_linenum_formatter<Padder>>(padding));
+            break;
+
+        case ('!'):  // source funcname
+            formatters_.push_back(
+                details::make_unique<details::source_funcname_formatter<Padder>>(padding));
+            break;
+
+        case ('%'):  // % char
+            formatters_.push_back(details::make_unique<details::ch_formatter>('%'));
+            break;
+
+        case ('u'):  // elapsed time since last log message in nanos
+            formatters_.push_back(
+                details::make_unique<details::elapsed_formatter<Padder, std::chrono::nanoseconds>>(
+                    padding));
+            break;
+
+        case ('i'):  // elapsed time since last log message in micros
+            formatters_.push_back(
+                details::make_unique<details::elapsed_formatter<Padder, std::chrono::microseconds>>(
+                    padding));
+            break;
+
+        case ('o'):  // elapsed time since last log message in millis
+            formatters_.push_back(
+                details::make_unique<details::elapsed_formatter<Padder, std::chrono::milliseconds>>(
+                    padding));
+            break;
+
+        case ('O'):  // elapsed time since last log message in seconds
+            formatters_.push_back(
+                details::make_unique<details::elapsed_formatter<Padder, std::chrono::seconds>>(
+                    padding));
+            break;
+
+#ifndef SPDLOG_NO_TLS  // mdc formatter requires TLS support
+        case ('&'):
+            formatters_.push_back(details::make_unique<details::mdc_formatter<Padder>>(padding));
+            break;
+#endif
+
+        default:  // Unknown flag appears as is
+            auto unknown_flag = details::make_unique<details::aggregate_formatter>();
+
+            if (!padding.truncate_) {
+                unknown_flag->add_ch('%');
+                unknown_flag->add_ch(flag);
+                formatters_.push_back((std::move(unknown_flag)));
+            }
+            // fix issue #1617 (prev char was '!' and should have been treated as funcname flag
+            // instead of truncating flag) spdlog::set_pattern("[%10!] %v") => "[      main] some
+            // message" spdlog::set_pattern("[%3!!] %v") => "[mai] some message"
+            else {
+                padding.truncate_ = false;
+                formatters_.push_back(
+                    details::make_unique<details::source_funcname_formatter<Padder>>(padding));
+                unknown_flag->add_ch(flag);
+                formatters_.push_back((std::move(unknown_flag)));
+            }
+
+            break;
+    }
+}
+
+// Extract given pad spec (e.g. %8X, %=8X, %-8!X, %8!X, %=8!X, %-8!X, %+8!X)
+// Advance the given it pass the end of the padding spec found (if any)
+// Return padding.
+SPDLOG_INLINE details::padding_info pattern_formatter::handle_padspec_(
+    std::string::const_iterator &it, std::string::const_iterator end) {
+    using details::padding_info;
+    using details::scoped_padder;
+    const size_t max_width = 64;
+    if (it == end) {
+        return padding_info{};
+    }
+
+    padding_info::pad_side side;
+    switch (*it) {
+        case '-':
+            side = padding_info::pad_side::right;
+            ++it;
+            break;
+        case '=':
+            side = padding_info::pad_side::center;
+            ++it;
+            break;
+        default:
+            side = details::padding_info::pad_side::left;
+            break;
+    }
+
+    if (it == end || !std::isdigit(static_cast<unsigned char>(*it))) {
+        return padding_info{};  // no padding if no digit found here
+    }
+
+    auto width = static_cast<size_t>(*it) - '0';
+    for (++it; it != end && std::isdigit(static_cast<unsigned char>(*it)); ++it) {
+        auto digit = static_cast<size_t>(*it) - '0';
+        width = width * 10 + digit;
+    }
+
+    // search for the optional truncate marker '!'
+    bool truncate;
+    if (it != end && *it == '!') {
+        truncate = true;
+        ++it;
+    } else {
+        truncate = false;
+    }
+    return details::padding_info{std::min<size_t>(width, max_width), side, truncate};
+}
+
+SPDLOG_INLINE void pattern_formatter::compile_pattern_(const std::string &pattern) {
+    auto end = pattern.end();
+    std::unique_ptr<details::aggregate_formatter> user_chars;
+    formatters_.clear();
+    for (auto it = pattern.begin(); it != end; ++it) {
+        if (*it == '%') {
+            if (user_chars)  // append user chars found so far
+            {
+                formatters_.push_back(std::move(user_chars));
+            }
+
+            auto padding = handle_padspec_(++it, end);
+
+            if (it != end) {
+                if (padding.enabled()) {
+                    handle_flag_<details::scoped_padder>(*it, padding);
+                } else {
+                    handle_flag_<details::null_scoped_padder>(*it, padding);
+                }
+            } else {
+                break;
+            }
+        } else  // chars not following the % sign should be displayed as is
+        {
+            if (!user_chars) {
+                user_chars = details::make_unique<details::aggregate_formatter>();
+            }
+            user_chars->add_ch(*it);
+        }
+    }
+    if (user_chars)  // append raw chars found so far
+    {
+        formatters_.push_back(std::move(user_chars));
+    }
+}
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/pattern_formatter.h b/csrc/vnpu_offload/include/spdlog/pattern_formatter.h
new file mode 100644
index 00000000..0658161c
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/pattern_formatter.h
@@ -0,0 +1,118 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include <spdlog/common.h>
+#include <spdlog/details/log_msg.h>
+#include <spdlog/details/os.h>
+#include <spdlog/formatter.h>
+
+#include <chrono>
+#include <ctime>
+#include <memory>
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace spdlog {
+namespace details {
+
+// padding information.
+struct padding_info {
+    enum class pad_side { left, right, center };
+
+    padding_info() = default;
+    padding_info(size_t width, padding_info::pad_side side, bool truncate)
+        : width_(width),
+          side_(side),
+          truncate_(truncate),
+          enabled_(true) {}
+
+    bool enabled() const { return enabled_; }
+    size_t width_ = 0;
+    pad_side side_ = pad_side::left;
+    bool truncate_ = false;
+    bool enabled_ = false;
+};
+
+class SPDLOG_API flag_formatter {
+public:
+    explicit flag_formatter(padding_info padinfo)
+        : padinfo_(padinfo) {}
+    flag_formatter() = default;
+    virtual ~flag_formatter() = default;
+    virtual void format(const details::log_msg &msg,
+                        const std::tm &tm_time,
+                        memory_buf_t &dest) = 0;
+
+protected:
+    padding_info padinfo_;
+};
+
+}  // namespace details
+
+class SPDLOG_API custom_flag_formatter : public details::flag_formatter {
+public:
+    virtual std::unique_ptr<custom_flag_formatter> clone() const = 0;
+
+    void set_padding_info(const details::padding_info &padding) {
+        flag_formatter::padinfo_ = padding;
+    }
+};
+
+class SPDLOG_API pattern_formatter final : public formatter {
+public:
+    using custom_flags = std::unordered_map<char, std::unique_ptr<custom_flag_formatter>>;
+
+    explicit pattern_formatter(std::string pattern,
+                               pattern_time_type time_type = pattern_time_type::local,
+                               std::string eol = spdlog::details::os::default_eol,
+                               custom_flags custom_user_flags = custom_flags());
+
+    // use default pattern is not given
+    explicit pattern_formatter(pattern_time_type time_type = pattern_time_type::local,
+                               std::string eol = spdlog::details::os::default_eol);
+
+    pattern_formatter(const pattern_formatter &other) = delete;
+    pattern_formatter &operator=(const pattern_formatter &other) = delete;
+
+    std::unique_ptr<formatter> clone() const override;
+    void format(const details::log_msg &msg, memory_buf_t &dest) override;
+
+    template <typename T, typename... Args>
+    pattern_formatter &add_flag(char flag, Args &&...args) {
+        custom_handlers_[flag] = details::make_unique<T>(std::forward<Args>(args)...);
+        return *this;
+    }
+    void set_pattern(std::string pattern);
+    void need_localtime(bool need = true);
+
+private:
+    std::string pattern_;
+    std::string eol_;
+    pattern_time_type pattern_time_type_;
+    bool need_localtime_;
+    std::tm cached_tm_;
+    std::chrono::seconds last_log_secs_;
+    std::vector<std::unique_ptr<details::flag_formatter>> formatters_;
+    custom_flags custom_handlers_;
+
+    std::tm get_time_(const details::log_msg &msg);
+    template <typename Padder>
+    void handle_flag_(char flag, details::padding_info padding);
+
+    // Extract given pad spec (e.g. %8X)
+    // Advance the given it pass the end of the padding spec found (if any)
+    // Return padding.
+    static details::padding_info handle_padspec_(std::string::const_iterator &it,
+                                                 std::string::const_iterator end);
+
+    void compile_pattern_(const std::string &pattern);
+};
+}  // namespace spdlog
+
+#ifdef SPDLOG_HEADER_ONLY
+#include "pattern_formatter-inl.h"
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/android_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/android_sink.h
new file mode 100644
index 00000000..c9e1d27b
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/android_sink.h
@@ -0,0 +1,137 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifdef __ANDROID__
+
+#include <spdlog/details/fmt_helper.h>
+#include <spdlog/details/null_mutex.h>
+#include <spdlog/details/os.h>
+#include <spdlog/details/synchronous_factory.h>
+#include <spdlog/sinks/base_sink.h>
+
+#include <android/log.h>
+#include <chrono>
+#include <mutex>
+#include <string>
+#include <thread>
+#include <type_traits>
+
+#if !defined(SPDLOG_ANDROID_RETRIES)
+#define SPDLOG_ANDROID_RETRIES 2
+#endif
+
+namespace spdlog {
+namespace sinks {
+
+/*
+ * Android sink
+ * (logging using __android_log_write or __android_log_buf_write depending on the specified
+ * BufferID)
+ */
+template <typename Mutex, int BufferID = log_id::LOG_ID_MAIN>
+class android_sink final : public base_sink<Mutex> {
+public:
+    explicit android_sink(std::string tag = "spdlog", bool use_raw_msg = false)
+        : tag_(std::move(tag)),
+          use_raw_msg_(use_raw_msg) {}
+
+protected:
+    void sink_it_(const details::log_msg &msg) override {
+        const android_LogPriority priority = convert_to_android_(msg.level);
+        memory_buf_t formatted;
+        if (use_raw_msg_) {
+            details::fmt_helper::append_string_view(msg.payload, formatted);
+        } else {
+            base_sink<Mutex>::formatter_->format(msg, formatted);
+        }
+        formatted.push_back('\0');
+        const char *msg_output = formatted.data();
+
+        // See system/core/liblog/logger_write.c for explanation of return value
+        int ret = android_log(priority, tag_.c_str(), msg_output);
+        if (ret == -EPERM) {
+            return;  // !__android_log_is_loggable
+        }
+        int retry_count = 0;
+        while ((ret == -11 /*EAGAIN*/) && (retry_count < SPDLOG_ANDROID_RETRIES)) {
+            details::os::sleep_for_millis(5);
+            ret = android_log(priority, tag_.c_str(), msg_output);
+            retry_count++;
+        }
+
+        if (ret < 0) {
+            throw_spdlog_ex("logging to Android failed", ret);
+        }
+    }
+
+    void flush_() override {}
+
+private:
+    // There might be liblog versions used, that do not support __android_log_buf_write. So we only
+    // compile and link against
+    // __android_log_buf_write, if user explicitly provides a non-default log buffer. Otherwise,
+    // when using the default log buffer, always log via __android_log_write.
+    template <int ID = BufferID>
+    typename std::enable_if<ID == static_cast<int>(log_id::LOG_ID_MAIN), int>::type android_log(
+        int prio, const char *tag, const char *text) {
+        return __android_log_write(prio, tag, text);
+    }
+
+    template <int ID = BufferID>
+    typename std::enable_if<ID != static_cast<int>(log_id::LOG_ID_MAIN), int>::type android_log(
+        int prio, const char *tag, const char *text) {
+        return __android_log_buf_write(ID, prio, tag, text);
+    }
+
+    static android_LogPriority convert_to_android_(spdlog::level::level_enum level) {
+        switch (level) {
+            case spdlog::level::trace:
+                return ANDROID_LOG_VERBOSE;
+            case spdlog::level::debug:
+                return ANDROID_LOG_DEBUG;
+            case spdlog::level::info:
+                return ANDROID_LOG_INFO;
+            case spdlog::level::warn:
+                return ANDROID_LOG_WARN;
+            case spdlog::level::err:
+                return ANDROID_LOG_ERROR;
+            case spdlog::level::critical:
+                return ANDROID_LOG_FATAL;
+            default:
+                return ANDROID_LOG_DEFAULT;
+        }
+    }
+
+    std::string tag_;
+    bool use_raw_msg_;
+};
+
+using android_sink_mt = android_sink<std::mutex>;
+using android_sink_st = android_sink<details::null_mutex>;
+
+template <int BufferId = log_id::LOG_ID_MAIN>
+using android_sink_buf_mt = android_sink<std::mutex, BufferId>;
+template <int BufferId = log_id::LOG_ID_MAIN>
+using android_sink_buf_st = android_sink<details::null_mutex, BufferId>;
+
+}  // namespace sinks
+
+// Create and register android syslog logger
+
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> android_logger_mt(const std::string &logger_name,
+                                                 const std::string &tag = "spdlog") {
+    return Factory::template create<sinks::android_sink_mt>(logger_name, tag);
+}
+
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> android_logger_st(const std::string &logger_name,
+                                                 const std::string &tag = "spdlog") {
+    return Factory::template create<sinks::android_sink_st>(logger_name, tag);
+}
+
+}  // namespace spdlog
+
+#endif  // __ANDROID__
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/ansicolor_sink-inl.h b/csrc/vnpu_offload/include/spdlog/sinks/ansicolor_sink-inl.h
new file mode 100644
index 00000000..1d2c5447
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/ansicolor_sink-inl.h
@@ -0,0 +1,142 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifndef SPDLOG_HEADER_ONLY
+#include <spdlog/sinks/ansicolor_sink.h>
+#endif
+
+#include <spdlog/details/os.h>
+#include <spdlog/pattern_formatter.h>
+
+namespace spdlog {
+namespace sinks {
+
+template <typename ConsoleMutex>
+SPDLOG_INLINE ansicolor_sink<ConsoleMutex>::ansicolor_sink(FILE *target_file, color_mode mode)
+    : target_file_(target_file),
+      mutex_(ConsoleMutex::mutex()),
+      formatter_(details::make_unique<spdlog::pattern_formatter>())
+
+{
+    set_color_mode_(mode);
+    colors_.at(level::trace) = to_string_(white);
+    colors_.at(level::debug) = to_string_(cyan);
+    colors_.at(level::info) = to_string_(green);
+    colors_.at(level::warn) = to_string_(yellow_bold);
+    colors_.at(level::err) = to_string_(red_bold);
+    colors_.at(level::critical) = to_string_(bold_on_red);
+    colors_.at(level::off) = to_string_(reset);
+}
+
+template <typename ConsoleMutex>
+SPDLOG_INLINE void ansicolor_sink<ConsoleMutex>::set_color(level::level_enum color_level,
+                                                           string_view_t color) {
+    std::lock_guard<mutex_t> lock(mutex_);
+    colors_.at(static_cast<size_t>(color_level)) = to_string_(color);
+}
+
+template <typename ConsoleMutex>
+SPDLOG_INLINE void ansicolor_sink<ConsoleMutex>::log(const details::log_msg &msg) {
+    // Wrap the originally formatted message in color codes.
+    // If color is not supported in the terminal, log as is instead.
+    std::lock_guard<mutex_t> lock(mutex_);
+    msg.color_range_start = 0;
+    msg.color_range_end = 0;
+    memory_buf_t formatted;
+    formatter_->format(msg, formatted);
+    if (should_do_colors_ && msg.color_range_end > msg.color_range_start) {
+        // before color range
+        print_range_(formatted, 0, msg.color_range_start);
+        // in color range
+        print_ccode_(colors_.at(static_cast<size_t>(msg.level)));
+        print_range_(formatted, msg.color_range_start, msg.color_range_end);
+        print_ccode_(reset);
+        // after color range
+        print_range_(formatted, msg.color_range_end, formatted.size());
+    } else  // no color
+    {
+        print_range_(formatted, 0, formatted.size());
+    }
+    fflush(target_file_);
+}
+
+template <typename ConsoleMutex>
+SPDLOG_INLINE void ansicolor_sink<ConsoleMutex>::flush() {
+    std::lock_guard<mutex_t> lock(mutex_);
+    fflush(target_file_);
+}
+
+template <typename ConsoleMutex>
+SPDLOG_INLINE void ansicolor_sink<ConsoleMutex>::set_pattern(const std::string &pattern) {
+    std::lock_guard<mutex_t> lock(mutex_);
+    formatter_ = std::unique_ptr<spdlog::formatter>(new pattern_formatter(pattern));
+}
+
+template <typename ConsoleMutex>
+SPDLOG_INLINE void ansicolor_sink<ConsoleMutex>::set_formatter(
+    std::unique_ptr<spdlog::formatter> sink_formatter) {
+    std::lock_guard<mutex_t> lock(mutex_);
+    formatter_ = std::move(sink_formatter);
+}
+
+template <typename ConsoleMutex>
+SPDLOG_INLINE bool ansicolor_sink<ConsoleMutex>::should_color() const {
+    return should_do_colors_;
+}
+
+template <typename ConsoleMutex>
+SPDLOG_INLINE void ansicolor_sink<ConsoleMutex>::set_color_mode(color_mode mode) {
+    std::lock_guard<mutex_t> lock(mutex_);
+    set_color_mode_(mode);
+}
+
+template <typename ConsoleMutex>
+SPDLOG_INLINE void ansicolor_sink<ConsoleMutex>::set_color_mode_(color_mode mode) {
+    switch (mode) {
+        case color_mode::always:
+            should_do_colors_ = true;
+            return;
+        case color_mode::automatic:
+            should_do_colors_ =
+                details::os::in_terminal(target_file_) && details::os::is_color_terminal();
+            return;
+        case color_mode::never:
+            should_do_colors_ = false;
+            return;
+        default:
+            should_do_colors_ = false;
+    }
+}
+
+template <typename ConsoleMutex>
+SPDLOG_INLINE void ansicolor_sink<ConsoleMutex>::print_ccode_(
+    const string_view_t &color_code) const {
+    details::os::fwrite_bytes(color_code.data(), color_code.size(), target_file_);
+}
+
+template <typename ConsoleMutex>
+SPDLOG_INLINE void ansicolor_sink<ConsoleMutex>::print_range_(const memory_buf_t &formatted,
+                                                              size_t start,
+                                                              size_t end) const {
+    details::os::fwrite_bytes(formatted.data() + start, end - start, target_file_);
+}
+
+template <typename ConsoleMutex>
+SPDLOG_INLINE std::string ansicolor_sink<ConsoleMutex>::to_string_(const string_view_t &sv) {
+    return std::string(sv.data(), sv.size());
+}
+
+// ansicolor_stdout_sink
+template <typename ConsoleMutex>
+SPDLOG_INLINE ansicolor_stdout_sink<ConsoleMutex>::ansicolor_stdout_sink(color_mode mode)
+    : ansicolor_sink<ConsoleMutex>(stdout, mode) {}
+
+// ansicolor_stderr_sink
+template <typename ConsoleMutex>
+SPDLOG_INLINE ansicolor_stderr_sink<ConsoleMutex>::ansicolor_stderr_sink(color_mode mode)
+    : ansicolor_sink<ConsoleMutex>(stderr, mode) {}
+
+}  // namespace sinks
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/ansicolor_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/ansicolor_sink.h
new file mode 100644
index 00000000..542888fb
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/ansicolor_sink.h
@@ -0,0 +1,118 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include <array>
+#include <memory>
+#include <mutex>
+#include <spdlog/details/console_globals.h>
+#include <spdlog/details/null_mutex.h>
+#include <spdlog/sinks/sink.h>
+#include <string>
+
+namespace spdlog {
+namespace sinks {
+
+/**
+ * This sink prefixes the output with an ANSI escape sequence color code
+ * depending on the severity
+ * of the message.
+ * If no color terminal detected, omit the escape codes.
+ */
+
+template <typename ConsoleMutex>
+class ansicolor_sink : public sink {
+public:
+    using mutex_t = typename ConsoleMutex::mutex_t;
+    ansicolor_sink(FILE *target_file, color_mode mode);
+    ~ansicolor_sink() override = default;
+
+    ansicolor_sink(const ansicolor_sink &other) = delete;
+    ansicolor_sink(ansicolor_sink &&other) = delete;
+
+    ansicolor_sink &operator=(const ansicolor_sink &other) = delete;
+    ansicolor_sink &operator=(ansicolor_sink &&other) = delete;
+
+    void set_color(level::level_enum color_level, string_view_t color);
+    void set_color_mode(color_mode mode);
+    bool should_color() const;
+
+    void log(const details::log_msg &msg) override;
+    void flush() override;
+    void set_pattern(const std::string &pattern) override;
+    void set_formatter(std::unique_ptr<spdlog::formatter> sink_formatter) override;
+
+    // Formatting codes
+    const string_view_t reset = "\033[m";
+    const string_view_t bold = "\033[1m";
+    const string_view_t dark = "\033[2m";
+    const string_view_t underline = "\033[4m";
+    const string_view_t blink = "\033[5m";
+    const string_view_t reverse = "\033[7m";
+    const string_view_t concealed = "\033[8m";
+    const string_view_t clear_line = "\033[K";
+
+    // Foreground colors
+    const string_view_t black = "\033[30m";
+    const string_view_t red = "\033[31m";
+    const string_view_t green = "\033[32m";
+    const string_view_t yellow = "\033[33m";
+    const string_view_t blue = "\033[34m";
+    const string_view_t magenta = "\033[35m";
+    const string_view_t cyan = "\033[36m";
+    const string_view_t white = "\033[37m";
+
+    /// Background colors
+    const string_view_t on_black = "\033[40m";
+    const string_view_t on_red = "\033[41m";
+    const string_view_t on_green = "\033[42m";
+    const string_view_t on_yellow = "\033[43m";
+    const string_view_t on_blue = "\033[44m";
+    const string_view_t on_magenta = "\033[45m";
+    const string_view_t on_cyan = "\033[46m";
+    const string_view_t on_white = "\033[47m";
+
+    /// Bold colors
+    const string_view_t yellow_bold = "\033[33m\033[1m";
+    const string_view_t red_bold = "\033[31m\033[1m";
+    const string_view_t bold_on_red = "\033[1m\033[41m";
+
+protected:
+    FILE *target_file_;
+
+private:
+    mutex_t &mutex_;
+    bool should_do_colors_;
+    std::unique_ptr<spdlog::formatter> formatter_;
+    std::array<std::string, level::n_levels> colors_;
+    void set_color_mode_(color_mode mode);
+    void print_ccode_(const string_view_t &color_code) const;
+    void print_range_(const memory_buf_t &formatted, size_t start, size_t end) const;
+    static std::string to_string_(const string_view_t &sv);
+};
+
+template <typename ConsoleMutex>
+class ansicolor_stdout_sink : public ansicolor_sink<ConsoleMutex> {
+public:
+    explicit ansicolor_stdout_sink(color_mode mode = color_mode::automatic);
+};
+
+template <typename ConsoleMutex>
+class ansicolor_stderr_sink : public ansicolor_sink<ConsoleMutex> {
+public:
+    explicit ansicolor_stderr_sink(color_mode mode = color_mode::automatic);
+};
+
+using ansicolor_stdout_sink_mt = ansicolor_stdout_sink<details::console_mutex>;
+using ansicolor_stdout_sink_st = ansicolor_stdout_sink<details::console_nullmutex>;
+
+using ansicolor_stderr_sink_mt = ansicolor_stderr_sink<details::console_mutex>;
+using ansicolor_stderr_sink_st = ansicolor_stderr_sink<details::console_nullmutex>;
+
+}  // namespace sinks
+}  // namespace spdlog
+
+#ifdef SPDLOG_HEADER_ONLY
+#include "ansicolor_sink-inl.h"
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/base_sink-inl.h b/csrc/vnpu_offload/include/spdlog/sinks/base_sink-inl.h
new file mode 100644
index 00000000..4c3625e5
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/base_sink-inl.h
@@ -0,0 +1,59 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifndef SPDLOG_HEADER_ONLY
+#include <spdlog/sinks/base_sink.h>
+#endif
+
+#include <spdlog/common.h>
+#include <spdlog/pattern_formatter.h>
+
+#include <memory>
+#include <mutex>
+
+template <typename Mutex>
+SPDLOG_INLINE spdlog::sinks::base_sink<Mutex>::base_sink()
+    : formatter_{details::make_unique<spdlog::pattern_formatter>()} {}
+
+template <typename Mutex>
+SPDLOG_INLINE spdlog::sinks::base_sink<Mutex>::base_sink(
+    std::unique_ptr<spdlog::formatter> formatter)
+    : formatter_{std::move(formatter)} {}
+
+template <typename Mutex>
+void SPDLOG_INLINE spdlog::sinks::base_sink<Mutex>::log(const details::log_msg &msg) {
+    std::lock_guard<Mutex> lock(mutex_);
+    sink_it_(msg);
+}
+
+template <typename Mutex>
+void SPDLOG_INLINE spdlog::sinks::base_sink<Mutex>::flush() {
+    std::lock_guard<Mutex> lock(mutex_);
+    flush_();
+}
+
+template <typename Mutex>
+void SPDLOG_INLINE spdlog::sinks::base_sink<Mutex>::set_pattern(const std::string &pattern) {
+    std::lock_guard<Mutex> lock(mutex_);
+    set_pattern_(pattern);
+}
+
+template <typename Mutex>
+void SPDLOG_INLINE
+spdlog::sinks::base_sink<Mutex>::set_formatter(std::unique_ptr<spdlog::formatter> sink_formatter) {
+    std::lock_guard<Mutex> lock(mutex_);
+    set_formatter_(std::move(sink_formatter));
+}
+
+template <typename Mutex>
+void SPDLOG_INLINE spdlog::sinks::base_sink<Mutex>::set_pattern_(const std::string &pattern) {
+    set_formatter_(details::make_unique<spdlog::pattern_formatter>(pattern));
+}
+
+template <typename Mutex>
+void SPDLOG_INLINE
+spdlog::sinks::base_sink<Mutex>::set_formatter_(std::unique_ptr<spdlog::formatter> sink_formatter) {
+    formatter_ = std::move(sink_formatter);
+}
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/base_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/base_sink.h
new file mode 100644
index 00000000..c4fb4fce
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/base_sink.h
@@ -0,0 +1,51 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+//
+// base sink templated over a mutex (either dummy or real)
+// concrete implementation should override the sink_it_() and flush_()  methods.
+// locking is taken care of in this class - no locking needed by the
+// implementers..
+//
+
+#include <spdlog/common.h>
+#include <spdlog/details/log_msg.h>
+#include <spdlog/sinks/sink.h>
+
+namespace spdlog {
+namespace sinks {
+template <typename Mutex>
+class SPDLOG_API base_sink : public sink {
+public:
+    base_sink();
+    explicit base_sink(std::unique_ptr<spdlog::formatter> formatter);
+    ~base_sink() override = default;
+
+    base_sink(const base_sink &) = delete;
+    base_sink(base_sink &&) = delete;
+
+    base_sink &operator=(const base_sink &) = delete;
+    base_sink &operator=(base_sink &&) = delete;
+
+    void log(const details::log_msg &msg) final override;
+    void flush() final override;
+    void set_pattern(const std::string &pattern) final override;
+    void set_formatter(std::unique_ptr<spdlog::formatter> sink_formatter) final override;
+
+protected:
+    // sink formatter
+    std::unique_ptr<spdlog::formatter> formatter_;
+    Mutex mutex_;
+
+    virtual void sink_it_(const details::log_msg &msg) = 0;
+    virtual void flush_() = 0;
+    virtual void set_pattern_(const std::string &pattern);
+    virtual void set_formatter_(std::unique_ptr<spdlog::formatter> sink_formatter);
+};
+}  // namespace sinks
+}  // namespace spdlog
+
+#ifdef SPDLOG_HEADER_ONLY
+#include "base_sink-inl.h"
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/basic_file_sink-inl.h b/csrc/vnpu_offload/include/spdlog/sinks/basic_file_sink-inl.h
new file mode 100644
index 00000000..8aec33c7
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/basic_file_sink-inl.h
@@ -0,0 +1,48 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifndef SPDLOG_HEADER_ONLY
+#include <spdlog/sinks/basic_file_sink.h>
+#endif
+
+#include <spdlog/common.h>
+#include <spdlog/details/os.h>
+
+namespace spdlog {
+namespace sinks {
+
+template <typename Mutex>
+SPDLOG_INLINE basic_file_sink<Mutex>::basic_file_sink(const filename_t &filename,
+                                                      bool truncate,
+                                                      const file_event_handlers &event_handlers)
+    : file_helper_{event_handlers} {
+    file_helper_.open(filename, truncate);
+}
+
+template <typename Mutex>
+SPDLOG_INLINE const filename_t &basic_file_sink<Mutex>::filename() const {
+    return file_helper_.filename();
+}
+
+template <typename Mutex>
+SPDLOG_INLINE void basic_file_sink<Mutex>::truncate() {
+    std::lock_guard<Mutex> lock(base_sink<Mutex>::mutex_);
+    file_helper_.reopen(true);
+}
+
+template <typename Mutex>
+SPDLOG_INLINE void basic_file_sink<Mutex>::sink_it_(const details::log_msg &msg) {
+    memory_buf_t formatted;
+    base_sink<Mutex>::formatter_->format(msg, formatted);
+    file_helper_.write(formatted);
+}
+
+template <typename Mutex>
+SPDLOG_INLINE void basic_file_sink<Mutex>::flush_() {
+    file_helper_.flush();
+}
+
+}  // namespace sinks
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/basic_file_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/basic_file_sink.h
new file mode 100644
index 00000000..23ac6c1a
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/basic_file_sink.h
@@ -0,0 +1,66 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include <spdlog/details/file_helper.h>
+#include <spdlog/details/null_mutex.h>
+#include <spdlog/details/synchronous_factory.h>
+#include <spdlog/sinks/base_sink.h>
+
+#include <mutex>
+#include <string>
+
+namespace spdlog {
+namespace sinks {
+/*
+ * Trivial file sink with single file as target
+ */
+template <typename Mutex>
+class basic_file_sink final : public base_sink<Mutex> {
+public:
+    explicit basic_file_sink(const filename_t &filename,
+                             bool truncate = false,
+                             const file_event_handlers &event_handlers = {});
+    const filename_t &filename() const;
+    void truncate();
+
+protected:
+    void sink_it_(const details::log_msg &msg) override;
+    void flush_() override;
+
+private:
+    details::file_helper file_helper_;
+};
+
+using basic_file_sink_mt = basic_file_sink<std::mutex>;
+using basic_file_sink_st = basic_file_sink<details::null_mutex>;
+
+}  // namespace sinks
+
+//
+// factory functions
+//
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> basic_logger_mt(const std::string &logger_name,
+                                               const filename_t &filename,
+                                               bool truncate = false,
+                                               const file_event_handlers &event_handlers = {}) {
+    return Factory::template create<sinks::basic_file_sink_mt>(logger_name, filename, truncate,
+                                                               event_handlers);
+}
+
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> basic_logger_st(const std::string &logger_name,
+                                               const filename_t &filename,
+                                               bool truncate = false,
+                                               const file_event_handlers &event_handlers = {}) {
+    return Factory::template create<sinks::basic_file_sink_st>(logger_name, filename, truncate,
+                                                               event_handlers);
+}
+
+}  // namespace spdlog
+
+#ifdef SPDLOG_HEADER_ONLY
+#include "basic_file_sink-inl.h"
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/callback_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/callback_sink.h
new file mode 100644
index 00000000..8f0c8d41
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/callback_sink.h
@@ -0,0 +1,56 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include <spdlog/details/null_mutex.h>
+#include <spdlog/details/synchronous_factory.h>
+#include <spdlog/sinks/base_sink.h>
+
+#include <mutex>
+#include <string>
+
+namespace spdlog {
+
+// callbacks type
+typedef std::function<void(const details::log_msg &msg)> custom_log_callback;
+
+namespace sinks {
+/*
+ * Trivial callback sink, gets a callback function and calls it on each log
+ */
+template <typename Mutex>
+class callback_sink final : public base_sink<Mutex> {
+public:
+    explicit callback_sink(const custom_log_callback &callback)
+        : callback_{callback} {}
+
+protected:
+    void sink_it_(const details::log_msg &msg) override { callback_(msg); }
+    void flush_() override {}
+
+private:
+    custom_log_callback callback_;
+};
+
+using callback_sink_mt = callback_sink<std::mutex>;
+using callback_sink_st = callback_sink<details::null_mutex>;
+
+}  // namespace sinks
+
+//
+// factory functions
+//
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> callback_logger_mt(const std::string &logger_name,
+                                                  const custom_log_callback &callback) {
+    return Factory::template create<sinks::callback_sink_mt>(logger_name, callback);
+}
+
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> callback_logger_st(const std::string &logger_name,
+                                                  const custom_log_callback &callback) {
+    return Factory::template create<sinks::callback_sink_st>(logger_name, callback);
+}
+
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/daily_file_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/daily_file_sink.h
new file mode 100644
index 00000000..615c9f7b
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/daily_file_sink.h
@@ -0,0 +1,254 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include <spdlog/common.h>
+#include <spdlog/details/circular_q.h>
+#include <spdlog/details/file_helper.h>
+#include <spdlog/details/null_mutex.h>
+#include <spdlog/details/os.h>
+#include <spdlog/details/synchronous_factory.h>
+#include <spdlog/fmt/chrono.h>
+#include <spdlog/fmt/fmt.h>
+#include <spdlog/sinks/base_sink.h>
+
+#include <chrono>
+#include <cstdio>
+#include <iomanip>
+#include <mutex>
+#include <sstream>
+#include <string>
+
+namespace spdlog {
+namespace sinks {
+
+/*
+ * Generator of daily log file names in format basename.YYYY-MM-DD.ext
+ */
+struct daily_filename_calculator {
+    // Create filename for the form basename.YYYY-MM-DD
+    static filename_t calc_filename(const filename_t &filename, const tm &now_tm) {
+        filename_t basename, ext;
+        std::tie(basename, ext) = details::file_helper::split_by_extension(filename);
+        return fmt_lib::format(SPDLOG_FMT_STRING(SPDLOG_FILENAME_T("{}_{:04d}-{:02d}-{:02d}{}")),
+                               basename, now_tm.tm_year + 1900, now_tm.tm_mon + 1, now_tm.tm_mday,
+                               ext);
+    }
+};
+
+/*
+ * Generator of daily log file names with strftime format.
+ * Usages:
+ *    auto sink =
+ * std::make_shared<spdlog::sinks::daily_file_format_sink_mt>("myapp-%Y-%m-%d:%H:%M:%S.log", hour,
+ * minute);" auto logger = spdlog::daily_logger_format_mt("loggername, "myapp-%Y-%m-%d:%X.log",
+ * hour,  minute)"
+ *
+ */
+struct daily_filename_format_calculator {
+    static filename_t calc_filename(const filename_t &file_path, const tm &now_tm) {
+#if defined(_WIN32) && defined(SPDLOG_WCHAR_FILENAMES)
+        std::wstringstream stream;
+#else
+        std::stringstream stream;
+#endif
+        stream << std::put_time(&now_tm, file_path.c_str());
+        return stream.str();
+    }
+};
+
+/*
+ * Rotating file sink based on date.
+ * If truncate != false , the created file will be truncated.
+ * If max_files > 0, retain only the last max_files and delete previous.
+ * Note that old log files from previous executions will not be deleted by this class,
+ * rotation and deletion is only applied while the program is running.
+ */
+template <typename Mutex, typename FileNameCalc = daily_filename_calculator>
+class daily_file_sink final : public base_sink<Mutex> {
+public:
+    // create daily file sink which rotates on given time
+    daily_file_sink(filename_t base_filename,
+                    int rotation_hour,
+                    int rotation_minute,
+                    bool truncate = false,
+                    uint16_t max_files = 0,
+                    const file_event_handlers &event_handlers = {})
+        : base_filename_(std::move(base_filename)),
+          rotation_h_(rotation_hour),
+          rotation_m_(rotation_minute),
+          file_helper_{event_handlers},
+          truncate_(truncate),
+          max_files_(max_files),
+          filenames_q_() {
+        if (rotation_hour < 0 || rotation_hour > 23 || rotation_minute < 0 ||
+            rotation_minute > 59) {
+            throw_spdlog_ex("daily_file_sink: Invalid rotation time in ctor");
+        }
+
+        auto now = log_clock::now();
+        auto filename = FileNameCalc::calc_filename(base_filename_, now_tm(now));
+        file_helper_.open(filename, truncate_);
+        rotation_tp_ = next_rotation_tp_();
+
+        if (max_files_ > 0) {
+            init_filenames_q_();
+        }
+    }
+
+    filename_t filename() {
+        std::lock_guard<Mutex> lock(base_sink<Mutex>::mutex_);
+        return file_helper_.filename();
+    }
+
+protected:
+    void sink_it_(const details::log_msg &msg) override {
+        auto time = msg.time;
+        bool should_rotate = time >= rotation_tp_;
+        if (should_rotate) {
+            auto filename = FileNameCalc::calc_filename(base_filename_, now_tm(time));
+            file_helper_.open(filename, truncate_);
+            rotation_tp_ = next_rotation_tp_();
+        }
+        memory_buf_t formatted;
+        base_sink<Mutex>::formatter_->format(msg, formatted);
+        file_helper_.write(formatted);
+
+        // Do the cleaning only at the end because it might throw on failure.
+        if (should_rotate && max_files_ > 0) {
+            delete_old_();
+        }
+    }
+
+    void flush_() override { file_helper_.flush(); }
+
+private:
+    void init_filenames_q_() {
+        using details::os::path_exists;
+
+        filenames_q_ = details::circular_q<filename_t>(static_cast<size_t>(max_files_));
+        std::vector<filename_t> filenames;
+        auto now = log_clock::now();
+        while (filenames.size() < max_files_) {
+            auto filename = FileNameCalc::calc_filename(base_filename_, now_tm(now));
+            if (!path_exists(filename)) {
+                break;
+            }
+            filenames.emplace_back(filename);
+            now -= std::chrono::hours(24);
+        }
+        for (auto iter = filenames.rbegin(); iter != filenames.rend(); ++iter) {
+            filenames_q_.push_back(std::move(*iter));
+        }
+    }
+
+    tm now_tm(log_clock::time_point tp) {
+        time_t tnow = log_clock::to_time_t(tp);
+        return spdlog::details::os::localtime(tnow);
+    }
+
+    log_clock::time_point next_rotation_tp_() {
+        auto now = log_clock::now();
+        tm date = now_tm(now);
+        date.tm_hour = rotation_h_;
+        date.tm_min = rotation_m_;
+        date.tm_sec = 0;
+        auto rotation_time = log_clock::from_time_t(std::mktime(&date));
+        if (rotation_time > now) {
+            return rotation_time;
+        }
+        return {rotation_time + std::chrono::hours(24)};
+    }
+
+    // Delete the file N rotations ago.
+    // Throw spdlog_ex on failure to delete the old file.
+    void delete_old_() {
+        using details::os::filename_to_str;
+        using details::os::remove_if_exists;
+
+        filename_t current_file = file_helper_.filename();
+        if (filenames_q_.full()) {
+            auto old_filename = std::move(filenames_q_.front());
+            filenames_q_.pop_front();
+            bool ok = remove_if_exists(old_filename) == 0;
+            if (!ok) {
+                filenames_q_.push_back(std::move(current_file));
+                throw_spdlog_ex("Failed removing daily file " + filename_to_str(old_filename),
+                                errno);
+            }
+        }
+        filenames_q_.push_back(std::move(current_file));
+    }
+
+    filename_t base_filename_;
+    int rotation_h_;
+    int rotation_m_;
+    log_clock::time_point rotation_tp_;
+    details::file_helper file_helper_;
+    bool truncate_;
+    uint16_t max_files_;
+    details::circular_q<filename_t> filenames_q_;
+};
+
+using daily_file_sink_mt = daily_file_sink<std::mutex>;
+using daily_file_sink_st = daily_file_sink<details::null_mutex>;
+using daily_file_format_sink_mt = daily_file_sink<std::mutex, daily_filename_format_calculator>;
+using daily_file_format_sink_st =
+    daily_file_sink<details::null_mutex, daily_filename_format_calculator>;
+
+}  // namespace sinks
+
+//
+// factory functions
+//
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> daily_logger_mt(const std::string &logger_name,
+                                               const filename_t &filename,
+                                               int hour = 0,
+                                               int minute = 0,
+                                               bool truncate = false,
+                                               uint16_t max_files = 0,
+                                               const file_event_handlers &event_handlers = {}) {
+    return Factory::template create<sinks::daily_file_sink_mt>(logger_name, filename, hour, minute,
+                                                               truncate, max_files, event_handlers);
+}
+
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> daily_logger_format_mt(
+    const std::string &logger_name,
+    const filename_t &filename,
+    int hour = 0,
+    int minute = 0,
+    bool truncate = false,
+    uint16_t max_files = 0,
+    const file_event_handlers &event_handlers = {}) {
+    return Factory::template create<sinks::daily_file_format_sink_mt>(
+        logger_name, filename, hour, minute, truncate, max_files, event_handlers);
+}
+
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> daily_logger_st(const std::string &logger_name,
+                                               const filename_t &filename,
+                                               int hour = 0,
+                                               int minute = 0,
+                                               bool truncate = false,
+                                               uint16_t max_files = 0,
+                                               const file_event_handlers &event_handlers = {}) {
+    return Factory::template create<sinks::daily_file_sink_st>(logger_name, filename, hour, minute,
+                                                               truncate, max_files, event_handlers);
+}
+
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> daily_logger_format_st(
+    const std::string &logger_name,
+    const filename_t &filename,
+    int hour = 0,
+    int minute = 0,
+    bool truncate = false,
+    uint16_t max_files = 0,
+    const file_event_handlers &event_handlers = {}) {
+    return Factory::template create<sinks::daily_file_format_sink_st>(
+        logger_name, filename, hour, minute, truncate, max_files, event_handlers);
+}
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/dist_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/dist_sink.h
new file mode 100644
index 00000000..69c4971c
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/dist_sink.h
@@ -0,0 +1,81 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include "base_sink.h"
+#include <spdlog/details/log_msg.h>
+#include <spdlog/details/null_mutex.h>
+#include <spdlog/pattern_formatter.h>
+
+#include <algorithm>
+#include <memory>
+#include <mutex>
+#include <vector>
+
+// Distribution sink (mux). Stores a vector of sinks which get called when log
+// is called
+
+namespace spdlog {
+namespace sinks {
+
+template <typename Mutex>
+class dist_sink : public base_sink<Mutex> {
+public:
+    dist_sink() = default;
+    explicit dist_sink(std::vector<std::shared_ptr<sink>> sinks)
+        : sinks_(sinks) {}
+
+    dist_sink(const dist_sink &) = delete;
+    dist_sink &operator=(const dist_sink &) = delete;
+
+    void add_sink(std::shared_ptr<sink> sub_sink) {
+        std::lock_guard<Mutex> lock(base_sink<Mutex>::mutex_);
+        sinks_.push_back(sub_sink);
+    }
+
+    void remove_sink(std::shared_ptr<sink> sub_sink) {
+        std::lock_guard<Mutex> lock(base_sink<Mutex>::mutex_);
+        sinks_.erase(std::remove(sinks_.begin(), sinks_.end(), sub_sink), sinks_.end());
+    }
+
+    void set_sinks(std::vector<std::shared_ptr<sink>> sinks) {
+        std::lock_guard<Mutex> lock(base_sink<Mutex>::mutex_);
+        sinks_ = std::move(sinks);
+    }
+
+    std::vector<std::shared_ptr<sink>> &sinks() { return sinks_; }
+
+protected:
+    void sink_it_(const details::log_msg &msg) override {
+        for (auto &sub_sink : sinks_) {
+            if (sub_sink->should_log(msg.level)) {
+                sub_sink->log(msg);
+            }
+        }
+    }
+
+    void flush_() override {
+        for (auto &sub_sink : sinks_) {
+            sub_sink->flush();
+        }
+    }
+
+    void set_pattern_(const std::string &pattern) override {
+        set_formatter_(details::make_unique<spdlog::pattern_formatter>(pattern));
+    }
+
+    void set_formatter_(std::unique_ptr<spdlog::formatter> sink_formatter) override {
+        base_sink<Mutex>::formatter_ = std::move(sink_formatter);
+        for (auto &sub_sink : sinks_) {
+            sub_sink->set_formatter(base_sink<Mutex>::formatter_->clone());
+        }
+    }
+    std::vector<std::shared_ptr<sink>> sinks_;
+};
+
+using dist_sink_mt = dist_sink<std::mutex>;
+using dist_sink_st = dist_sink<details::null_mutex>;
+
+}  // namespace sinks
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/dup_filter_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/dup_filter_sink.h
new file mode 100644
index 00000000..0588d772
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/dup_filter_sink.h
@@ -0,0 +1,91 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include "dist_sink.h"
+#include <spdlog/details/log_msg.h>
+#include <spdlog/details/null_mutex.h>
+
+#include <chrono>
+#include <cstdio>
+#include <mutex>
+#include <string>
+
+// Duplicate message removal sink.
+// Skip the message if previous one is identical and less than "max_skip_duration" have passed
+//
+// Example:
+//
+//     #include <spdlog/sinks/dup_filter_sink.h>
+//
+//     int main() {
+//         auto dup_filter = std::make_shared<dup_filter_sink_st>(std::chrono::seconds(5),
+//         level::info); dup_filter->add_sink(std::make_shared<stdout_color_sink_mt>());
+//         spdlog::logger l("logger", dup_filter);
+//         l.info("Hello");
+//         l.info("Hello");
+//         l.info("Hello");
+//         l.info("Different Hello");
+//     }
+//
+// Will produce:
+//       [2019-06-25 17:50:56.511] [logger] [info] Hello
+//       [2019-06-25 17:50:56.512] [logger] [info] Skipped 3 duplicate messages..
+//       [2019-06-25 17:50:56.512] [logger] [info] Different Hello
+
+namespace spdlog {
+namespace sinks {
+template <typename Mutex>
+class dup_filter_sink : public dist_sink<Mutex> {
+public:
+    template <class Rep, class Period>
+    explicit dup_filter_sink(std::chrono::duration<Rep, Period> max_skip_duration)
+        : max_skip_duration_{max_skip_duration} {}
+
+protected:
+    std::chrono::microseconds max_skip_duration_;
+    log_clock::time_point last_msg_time_;
+    std::string last_msg_payload_;
+    size_t skip_counter_ = 0;
+    level::level_enum skipped_msg_log_level_ = spdlog::level::level_enum::off;
+
+    void sink_it_(const details::log_msg &msg) override {
+        bool filtered = filter_(msg);
+        if (!filtered) {
+            skip_counter_ += 1;
+            skipped_msg_log_level_ = msg.level;
+            return;
+        }
+
+        // log the "skipped.." message
+        if (skip_counter_ > 0) {
+            char buf[64];
+            auto msg_size = ::snprintf(buf, sizeof(buf), "Skipped %u duplicate messages..",
+                                       static_cast<unsigned>(skip_counter_));
+            if (msg_size > 0 && static_cast<size_t>(msg_size) < sizeof(buf)) {
+                details::log_msg skipped_msg{msg.source, msg.logger_name, skipped_msg_log_level_,
+                                             string_view_t{buf, static_cast<size_t>(msg_size)}};
+                dist_sink<Mutex>::sink_it_(skipped_msg);
+            }
+        }
+
+        // log current message
+        dist_sink<Mutex>::sink_it_(msg);
+        last_msg_time_ = msg.time;
+        skip_counter_ = 0;
+        last_msg_payload_.assign(msg.payload.data(), msg.payload.data() + msg.payload.size());
+    }
+
+    // return whether the log msg should be displayed (true) or skipped (false)
+    bool filter_(const details::log_msg &msg) {
+        auto filter_duration = msg.time - last_msg_time_;
+        return (filter_duration > max_skip_duration_) || (msg.payload != last_msg_payload_);
+    }
+};
+
+using dup_filter_sink_mt = dup_filter_sink<std::mutex>;
+using dup_filter_sink_st = dup_filter_sink<details::null_mutex>;
+
+}  // namespace sinks
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/hourly_file_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/hourly_file_sink.h
new file mode 100644
index 00000000..3e618725
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/hourly_file_sink.h
@@ -0,0 +1,193 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include <spdlog/common.h>
+#include <spdlog/details/circular_q.h>
+#include <spdlog/details/file_helper.h>
+#include <spdlog/details/null_mutex.h>
+#include <spdlog/details/os.h>
+#include <spdlog/details/synchronous_factory.h>
+#include <spdlog/fmt/fmt.h>
+#include <spdlog/sinks/base_sink.h>
+
+#include <chrono>
+#include <cstdio>
+#include <ctime>
+#include <mutex>
+#include <string>
+
+namespace spdlog {
+namespace sinks {
+
+/*
+ * Generator of Hourly log file names in format basename.YYYY-MM-DD-HH.ext
+ */
+struct hourly_filename_calculator {
+    // Create filename for the form basename.YYYY-MM-DD-H
+    static filename_t calc_filename(const filename_t &filename, const tm &now_tm) {
+        filename_t basename, ext;
+        std::tie(basename, ext) = details::file_helper::split_by_extension(filename);
+        return fmt_lib::format(SPDLOG_FILENAME_T("{}_{:04d}-{:02d}-{:02d}_{:02d}{}"), basename,
+                               now_tm.tm_year + 1900, now_tm.tm_mon + 1, now_tm.tm_mday,
+                               now_tm.tm_hour, ext);
+    }
+};
+
+/*
+ * Rotating file sink based on time.
+ * If truncate != false , the created file will be truncated.
+ * If max_files > 0, retain only the last max_files and delete previous.
+ * Note that old log files from previous executions will not be deleted by this class,
+ * rotation and deletion is only applied while the program is running.
+ */
+template <typename Mutex, typename FileNameCalc = hourly_filename_calculator>
+class hourly_file_sink final : public base_sink<Mutex> {
+public:
+    // create hourly file sink which rotates on given time
+    hourly_file_sink(filename_t base_filename,
+                     bool truncate = false,
+                     uint16_t max_files = 0,
+                     const file_event_handlers &event_handlers = {})
+        : base_filename_(std::move(base_filename)),
+          file_helper_{event_handlers},
+          truncate_(truncate),
+          max_files_(max_files),
+          filenames_q_() {
+        auto now = log_clock::now();
+        auto filename = FileNameCalc::calc_filename(base_filename_, now_tm(now));
+        file_helper_.open(filename, truncate_);
+        remove_init_file_ = file_helper_.size() == 0;
+        rotation_tp_ = next_rotation_tp_();
+
+        if (max_files_ > 0) {
+            init_filenames_q_();
+        }
+    }
+
+    filename_t filename() {
+        std::lock_guard<Mutex> lock(base_sink<Mutex>::mutex_);
+        return file_helper_.filename();
+    }
+
+protected:
+    void sink_it_(const details::log_msg &msg) override {
+        auto time = msg.time;
+        bool should_rotate = time >= rotation_tp_;
+        if (should_rotate) {
+            if (remove_init_file_) {
+                file_helper_.close();
+                details::os::remove(file_helper_.filename());
+            }
+            auto filename = FileNameCalc::calc_filename(base_filename_, now_tm(time));
+            file_helper_.open(filename, truncate_);
+            rotation_tp_ = next_rotation_tp_();
+        }
+        remove_init_file_ = false;
+        memory_buf_t formatted;
+        base_sink<Mutex>::formatter_->format(msg, formatted);
+        file_helper_.write(formatted);
+
+        // Do the cleaning only at the end because it might throw on failure.
+        if (should_rotate && max_files_ > 0) {
+            delete_old_();
+        }
+    }
+
+    void flush_() override { file_helper_.flush(); }
+
+private:
+    void init_filenames_q_() {
+        using details::os::path_exists;
+
+        filenames_q_ = details::circular_q<filename_t>(static_cast<size_t>(max_files_));
+        std::vector<filename_t> filenames;
+        auto now = log_clock::now();
+        while (filenames.size() < max_files_) {
+            auto filename = FileNameCalc::calc_filename(base_filename_, now_tm(now));
+            if (!path_exists(filename)) {
+                break;
+            }
+            filenames.emplace_back(filename);
+            now -= std::chrono::hours(1);
+        }
+        for (auto iter = filenames.rbegin(); iter != filenames.rend(); ++iter) {
+            filenames_q_.push_back(std::move(*iter));
+        }
+    }
+
+    tm now_tm(log_clock::time_point tp) {
+        time_t tnow = log_clock::to_time_t(tp);
+        return spdlog::details::os::localtime(tnow);
+    }
+
+    log_clock::time_point next_rotation_tp_() {
+        auto now = log_clock::now();
+        tm date = now_tm(now);
+        date.tm_min = 0;
+        date.tm_sec = 0;
+        auto rotation_time = log_clock::from_time_t(std::mktime(&date));
+        if (rotation_time > now) {
+            return rotation_time;
+        }
+        return {rotation_time + std::chrono::hours(1)};
+    }
+
+    // Delete the file N rotations ago.
+    // Throw spdlog_ex on failure to delete the old file.
+    void delete_old_() {
+        using details::os::filename_to_str;
+        using details::os::remove_if_exists;
+
+        filename_t current_file = file_helper_.filename();
+        if (filenames_q_.full()) {
+            auto old_filename = std::move(filenames_q_.front());
+            filenames_q_.pop_front();
+            bool ok = remove_if_exists(old_filename) == 0;
+            if (!ok) {
+                filenames_q_.push_back(std::move(current_file));
+                SPDLOG_THROW(spdlog_ex(
+                    "Failed removing hourly file " + filename_to_str(old_filename), errno));
+            }
+        }
+        filenames_q_.push_back(std::move(current_file));
+    }
+
+    filename_t base_filename_;
+    log_clock::time_point rotation_tp_;
+    details::file_helper file_helper_;
+    bool truncate_;
+    uint16_t max_files_;
+    details::circular_q<filename_t> filenames_q_;
+    bool remove_init_file_;
+};
+
+using hourly_file_sink_mt = hourly_file_sink<std::mutex>;
+using hourly_file_sink_st = hourly_file_sink<details::null_mutex>;
+
+}  // namespace sinks
+
+//
+// factory functions
+//
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> hourly_logger_mt(const std::string &logger_name,
+                                                const filename_t &filename,
+                                                bool truncate = false,
+                                                uint16_t max_files = 0,
+                                                const file_event_handlers &event_handlers = {}) {
+    return Factory::template create<sinks::hourly_file_sink_mt>(logger_name, filename, truncate,
+                                                                max_files, event_handlers);
+}
+
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> hourly_logger_st(const std::string &logger_name,
+                                                const filename_t &filename,
+                                                bool truncate = false,
+                                                uint16_t max_files = 0,
+                                                const file_event_handlers &event_handlers = {}) {
+    return Factory::template create<sinks::hourly_file_sink_st>(logger_name, filename, truncate,
+                                                                max_files, event_handlers);
+}
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/kafka_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/kafka_sink.h
new file mode 100644
index 00000000..91e98786
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/kafka_sink.h
@@ -0,0 +1,119 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+//
+// Custom sink for kafka
+// Building and using requires librdkafka library.
+// For building librdkafka library check the url below
+// https://github.com/confluentinc/librdkafka
+//
+
+#include "spdlog/async.h"
+#include "spdlog/details/log_msg.h"
+#include "spdlog/details/null_mutex.h"
+#include "spdlog/details/synchronous_factory.h"
+#include "spdlog/sinks/base_sink.h"
+#include <mutex>
+#include <spdlog/common.h>
+
+// kafka header
+#include <librdkafka/rdkafkacpp.h>
+
+namespace spdlog {
+namespace sinks {
+
+struct kafka_sink_config {
+    std::string server_addr;
+    std::string produce_topic;
+    int32_t flush_timeout_ms = 1000;
+
+    kafka_sink_config(std::string addr, std::string topic, int flush_timeout_ms = 1000)
+        : server_addr{std::move(addr)},
+          produce_topic{std::move(topic)},
+          flush_timeout_ms(flush_timeout_ms) {}
+};
+
+template <typename Mutex>
+class kafka_sink : public base_sink<Mutex> {
+public:
+    kafka_sink(kafka_sink_config config)
+        : config_{std::move(config)} {
+        try {
+            std::string errstr;
+            conf_.reset(RdKafka::Conf::create(RdKafka::Conf::CONF_GLOBAL));
+            RdKafka::Conf::ConfResult confRes =
+                conf_->set("bootstrap.servers", config_.server_addr, errstr);
+            if (confRes != RdKafka::Conf::CONF_OK) {
+                throw_spdlog_ex(
+                    fmt_lib::format("conf set bootstrap.servers failed err:{}", errstr));
+            }
+
+            tconf_.reset(RdKafka::Conf::create(RdKafka::Conf::CONF_TOPIC));
+            if (tconf_ == nullptr) {
+                throw_spdlog_ex(fmt_lib::format("create topic config failed"));
+            }
+
+            producer_.reset(RdKafka::Producer::create(conf_.get(), errstr));
+            if (producer_ == nullptr) {
+                throw_spdlog_ex(fmt_lib::format("create producer failed err:{}", errstr));
+            }
+            topic_.reset(RdKafka::Topic::create(producer_.get(), config_.produce_topic,
+                                                tconf_.get(), errstr));
+            if (topic_ == nullptr) {
+                throw_spdlog_ex(fmt_lib::format("create topic failed err:{}", errstr));
+            }
+        } catch (const std::exception &e) {
+            throw_spdlog_ex(fmt_lib::format("error create kafka instance: {}", e.what()));
+        }
+    }
+
+    ~kafka_sink() { producer_->flush(config_.flush_timeout_ms); }
+
+protected:
+    void sink_it_(const details::log_msg &msg) override {
+        producer_->produce(topic_.get(), 0, RdKafka::Producer::RK_MSG_COPY,
+                           (void *)msg.payload.data(), msg.payload.size(), NULL, NULL);
+    }
+
+    void flush_() override { producer_->flush(config_.flush_timeout_ms); }
+
+private:
+    kafka_sink_config config_;
+    std::unique_ptr<RdKafka::Producer> producer_ = nullptr;
+    std::unique_ptr<RdKafka::Conf> conf_ = nullptr;
+    std::unique_ptr<RdKafka::Conf> tconf_ = nullptr;
+    std::unique_ptr<RdKafka::Topic> topic_ = nullptr;
+};
+
+using kafka_sink_mt = kafka_sink<std::mutex>;
+using kafka_sink_st = kafka_sink<spdlog::details::null_mutex>;
+
+}  // namespace sinks
+
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> kafka_logger_mt(const std::string &logger_name,
+                                               spdlog::sinks::kafka_sink_config config) {
+    return Factory::template create<sinks::kafka_sink_mt>(logger_name, config);
+}
+
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> kafka_logger_st(const std::string &logger_name,
+                                               spdlog::sinks::kafka_sink_config config) {
+    return Factory::template create<sinks::kafka_sink_st>(logger_name, config);
+}
+
+template <typename Factory = spdlog::async_factory>
+inline std::shared_ptr<spdlog::logger> kafka_logger_async_mt(
+    std::string logger_name, spdlog::sinks::kafka_sink_config config) {
+    return Factory::template create<sinks::kafka_sink_mt>(logger_name, config);
+}
+
+template <typename Factory = spdlog::async_factory>
+inline std::shared_ptr<spdlog::logger> kafka_logger_async_st(
+    std::string logger_name, spdlog::sinks::kafka_sink_config config) {
+    return Factory::template create<sinks::kafka_sink_st>(logger_name, config);
+}
+
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/mongo_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/mongo_sink.h
new file mode 100644
index 00000000..c5b38ab9
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/mongo_sink.h
@@ -0,0 +1,108 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+//
+// Custom sink for mongodb
+// Building and using requires mongocxx library.
+// For building mongocxx library check the url below
+// http://mongocxx.org/mongocxx-v3/installation/
+//
+
+#include "spdlog/common.h"
+#include "spdlog/details/log_msg.h"
+#include "spdlog/sinks/base_sink.h"
+#include <spdlog/details/synchronous_factory.h>
+
+#include <bsoncxx/builder/stream/document.hpp>
+#include <bsoncxx/types.hpp>
+#include <bsoncxx/view_or_value.hpp>
+
+#include <mongocxx/client.hpp>
+#include <mongocxx/instance.hpp>
+#include <mongocxx/uri.hpp>
+
+namespace spdlog {
+namespace sinks {
+template <typename Mutex>
+class mongo_sink : public base_sink<Mutex> {
+public:
+    mongo_sink(const std::string &db_name,
+               const std::string &collection_name,
+               const std::string &uri = "mongodb://localhost:27017") try
+        : mongo_sink(std::make_shared<mongocxx::instance>(), db_name, collection_name, uri) {
+    } catch (const std::exception &e) {
+        throw_spdlog_ex(fmt_lib::format("Error opening database: {}", e.what()));
+    }
+
+    mongo_sink(std::shared_ptr<mongocxx::instance> instance,
+               const std::string &db_name,
+               const std::string &collection_name,
+               const std::string &uri = "mongodb://localhost:27017")
+        : instance_(std::move(instance)),
+          db_name_(db_name),
+          coll_name_(collection_name) {
+        try {
+            client_ = spdlog::details::make_unique<mongocxx::client>(mongocxx::uri{uri});
+        } catch (const std::exception &e) {
+            throw_spdlog_ex(fmt_lib::format("Error opening database: {}", e.what()));
+        }
+    }
+
+    ~mongo_sink() { flush_(); }
+
+protected:
+    void sink_it_(const details::log_msg &msg) override {
+        using bsoncxx::builder::stream::document;
+        using bsoncxx::builder::stream::finalize;
+
+        if (client_ != nullptr) {
+            auto doc = document{} << "timestamp" << bsoncxx::types::b_date(msg.time) << "level"
+                                  << level::to_string_view(msg.level).data() << "level_num"
+                                  << msg.level << "message"
+                                  << std::string(msg.payload.begin(), msg.payload.end())
+                                  << "logger_name"
+                                  << std::string(msg.logger_name.begin(), msg.logger_name.end())
+                                  << "thread_id" << static_cast<int>(msg.thread_id) << finalize;
+            client_->database(db_name_).collection(coll_name_).insert_one(doc.view());
+        }
+    }
+
+    void flush_() override {}
+
+private:
+    std::shared_ptr<mongocxx::instance> instance_;
+    std::string db_name_;
+    std::string coll_name_;
+    std::unique_ptr<mongocxx::client> client_ = nullptr;
+};
+
+#include "spdlog/details/null_mutex.h"
+#include <mutex>
+using mongo_sink_mt = mongo_sink<std::mutex>;
+using mongo_sink_st = mongo_sink<spdlog::details::null_mutex>;
+
+}  // namespace sinks
+
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> mongo_logger_mt(
+    const std::string &logger_name,
+    const std::string &db_name,
+    const std::string &collection_name,
+    const std::string &uri = "mongodb://localhost:27017") {
+    return Factory::template create<sinks::mongo_sink_mt>(logger_name, db_name, collection_name,
+                                                          uri);
+}
+
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> mongo_logger_st(
+    const std::string &logger_name,
+    const std::string &db_name,
+    const std::string &collection_name,
+    const std::string &uri = "mongodb://localhost:27017") {
+    return Factory::template create<sinks::mongo_sink_st>(logger_name, db_name, collection_name,
+                                                          uri);
+}
+
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/msvc_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/msvc_sink.h
new file mode 100644
index 00000000..1fe9892c
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/msvc_sink.h
@@ -0,0 +1,68 @@
+// Copyright(c) 2016 Alexander Dalshov & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#if defined(_WIN32)
+
+#include <spdlog/details/null_mutex.h>
+#if defined(SPDLOG_WCHAR_TO_UTF8_SUPPORT)
+#include <spdlog/details/os.h>
+#endif
+#include <spdlog/sinks/base_sink.h>
+
+#include <mutex>
+#include <string>
+
+// Avoid including windows.h (https://stackoverflow.com/a/30741042)
+#if defined(SPDLOG_WCHAR_TO_UTF8_SUPPORT)
+extern "C" __declspec(dllimport) void __stdcall OutputDebugStringW(const wchar_t *lpOutputString);
+#else
+extern "C" __declspec(dllimport) void __stdcall OutputDebugStringA(const char *lpOutputString);
+#endif
+extern "C" __declspec(dllimport) int __stdcall IsDebuggerPresent();
+
+namespace spdlog {
+namespace sinks {
+/*
+ * MSVC sink (logging using OutputDebugStringA)
+ */
+template <typename Mutex>
+class msvc_sink : public base_sink<Mutex> {
+public:
+    msvc_sink() = default;
+    msvc_sink(bool check_debugger_present)
+        : check_debugger_present_{check_debugger_present} {}
+
+protected:
+    void sink_it_(const details::log_msg &msg) override {
+        if (check_debugger_present_ && !IsDebuggerPresent()) {
+            return;
+        }
+        memory_buf_t formatted;
+        base_sink<Mutex>::formatter_->format(msg, formatted);
+        formatted.push_back('\0');  // add a null terminator for OutputDebugString
+#if defined(SPDLOG_WCHAR_TO_UTF8_SUPPORT)
+        wmemory_buf_t wformatted;
+        details::os::utf8_to_wstrbuf(string_view_t(formatted.data(), formatted.size()), wformatted);
+        OutputDebugStringW(wformatted.data());
+#else
+        OutputDebugStringA(formatted.data());
+#endif
+    }
+
+    void flush_() override {}
+
+    bool check_debugger_present_ = true;
+};
+
+using msvc_sink_mt = msvc_sink<std::mutex>;
+using msvc_sink_st = msvc_sink<details::null_mutex>;
+
+using windebug_sink_mt = msvc_sink_mt;
+using windebug_sink_st = msvc_sink_st;
+
+}  // namespace sinks
+}  // namespace spdlog
+
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/null_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/null_sink.h
new file mode 100644
index 00000000..74530b5b
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/null_sink.h
@@ -0,0 +1,41 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include <spdlog/details/null_mutex.h>
+#include <spdlog/details/synchronous_factory.h>
+#include <spdlog/sinks/base_sink.h>
+
+#include <mutex>
+
+namespace spdlog {
+namespace sinks {
+
+template <typename Mutex>
+class null_sink final : public base_sink<Mutex> {
+protected:
+    void sink_it_(const details::log_msg &) override {}
+    void flush_() override {}
+};
+
+using null_sink_mt = null_sink<details::null_mutex>;
+using null_sink_st = null_sink<details::null_mutex>;
+
+}  // namespace sinks
+
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> null_logger_mt(const std::string &logger_name) {
+    auto null_logger = Factory::template create<sinks::null_sink_mt>(logger_name);
+    null_logger->set_level(level::off);
+    return null_logger;
+}
+
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> null_logger_st(const std::string &logger_name) {
+    auto null_logger = Factory::template create<sinks::null_sink_st>(logger_name);
+    null_logger->set_level(level::off);
+    return null_logger;
+}
+
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/ostream_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/ostream_sink.h
new file mode 100644
index 00000000..6af9dd09
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/ostream_sink.h
@@ -0,0 +1,43 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include <spdlog/details/null_mutex.h>
+#include <spdlog/sinks/base_sink.h>
+
+#include <mutex>
+#include <ostream>
+
+namespace spdlog {
+namespace sinks {
+template <typename Mutex>
+class ostream_sink final : public base_sink<Mutex> {
+public:
+    explicit ostream_sink(std::ostream &os, bool force_flush = false)
+        : ostream_(os),
+          force_flush_(force_flush) {}
+    ostream_sink(const ostream_sink &) = delete;
+    ostream_sink &operator=(const ostream_sink &) = delete;
+
+protected:
+    void sink_it_(const details::log_msg &msg) override {
+        memory_buf_t formatted;
+        base_sink<Mutex>::formatter_->format(msg, formatted);
+        ostream_.write(formatted.data(), static_cast<std::streamsize>(formatted.size()));
+        if (force_flush_) {
+            ostream_.flush();
+        }
+    }
+
+    void flush_() override { ostream_.flush(); }
+
+    std::ostream &ostream_;
+    bool force_flush_;
+};
+
+using ostream_sink_mt = ostream_sink<std::mutex>;
+using ostream_sink_st = ostream_sink<details::null_mutex>;
+
+}  // namespace sinks
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/qt_sinks.h b/csrc/vnpu_offload/include/spdlog/sinks/qt_sinks.h
new file mode 100644
index 00000000..ce74f888
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/qt_sinks.h
@@ -0,0 +1,308 @@
+// Copyright(c) 2015-present, Gabi Melman, mguludag and spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+//
+// Custom sink for QPlainTextEdit or QTextEdit and its children (QTextBrowser...
+// etc) Building and using requires Qt library.
+//
+// Warning: the qt_sink won't be notified if the target widget is destroyed.
+// If the widget's lifetime can be shorter than the logger's one, you should provide some permanent
+// QObject, and then use a standard signal/slot.
+//
+
+#include "spdlog/common.h"
+#include "spdlog/details/log_msg.h"
+#include "spdlog/details/synchronous_factory.h"
+#include "spdlog/sinks/base_sink.h"
+#include <array>
+
+#include <QPlainTextEdit>
+#include <QTextEdit>
+
+//
+// qt_sink class
+//
+namespace spdlog {
+namespace sinks {
+template <typename Mutex>
+class qt_sink : public base_sink<Mutex> {
+public:
+    qt_sink(QObject *qt_object, std::string meta_method)
+        : qt_object_(qt_object),
+          meta_method_(std::move(meta_method)) {
+        if (!qt_object_) {
+            throw_spdlog_ex("qt_sink: qt_object is null");
+        }
+    }
+
+    ~qt_sink() { flush_(); }
+
+protected:
+    void sink_it_(const details::log_msg &msg) override {
+        memory_buf_t formatted;
+        base_sink<Mutex>::formatter_->format(msg, formatted);
+        const string_view_t str = string_view_t(formatted.data(), formatted.size());
+        QMetaObject::invokeMethod(
+            qt_object_, meta_method_.c_str(), Qt::AutoConnection,
+            Q_ARG(QString, QString::fromUtf8(str.data(), static_cast<int>(str.size())).trimmed()));
+    }
+
+    void flush_() override {}
+
+private:
+    QObject *qt_object_ = nullptr;
+    std::string meta_method_;
+};
+
+// Qt color sink to QTextEdit.
+// Color location is determined by the sink log pattern like in the rest of spdlog sinks.
+// Colors can be modified if needed using sink->set_color(level, qtTextCharFormat).
+// max_lines is the maximum number of lines that the sink will hold before removing the oldest
+// lines. By default, only ascii (latin1) is supported by this sink. Set is_utf8 to true if utf8
+// support is needed.
+template <typename Mutex>
+class qt_color_sink : public base_sink<Mutex> {
+public:
+    qt_color_sink(QTextEdit *qt_text_edit,
+                  int max_lines,
+                  bool dark_colors = false,
+                  bool is_utf8 = false)
+        : qt_text_edit_(qt_text_edit),
+          max_lines_(max_lines),
+          is_utf8_(is_utf8) {
+        if (!qt_text_edit_) {
+            throw_spdlog_ex("qt_color_text_sink: text_edit is null");
+        }
+
+        default_color_ = qt_text_edit_->currentCharFormat();
+        // set colors
+        QTextCharFormat format;
+        // trace
+        format.setForeground(dark_colors ? Qt::darkGray : Qt::gray);
+        colors_.at(level::trace) = format;
+        // debug
+        format.setForeground(dark_colors ? Qt::darkCyan : Qt::cyan);
+        colors_.at(level::debug) = format;
+        // info
+        format.setForeground(dark_colors ? Qt::darkGreen : Qt::green);
+        colors_.at(level::info) = format;
+        // warn
+        format.setForeground(dark_colors ? Qt::darkYellow : Qt::yellow);
+        colors_.at(level::warn) = format;
+        // err
+        format.setForeground(Qt::red);
+        colors_.at(level::err) = format;
+        // critical
+        format.setForeground(Qt::white);
+        format.setBackground(Qt::red);
+        colors_.at(level::critical) = format;
+    }
+
+    ~qt_color_sink() { flush_(); }
+
+    void set_default_color(QTextCharFormat format) {
+        // std::lock_guard<Mutex> lock(base_sink<Mutex>::mutex_);
+        default_color_ = format;
+    }
+
+    void set_level_color(level::level_enum color_level, QTextCharFormat format) {
+        // std::lock_guard<Mutex> lock(base_sink<Mutex>::mutex_);
+        colors_.at(static_cast<size_t>(color_level)) = format;
+    }
+
+    QTextCharFormat &get_level_color(level::level_enum color_level) {
+        std::lock_guard<Mutex> lock(base_sink<Mutex>::mutex_);
+        return colors_.at(static_cast<size_t>(color_level));
+    }
+
+    QTextCharFormat &get_default_color() {
+        std::lock_guard<Mutex> lock(base_sink<Mutex>::mutex_);
+        return default_color_;
+    }
+
+protected:
+    struct invoke_params {
+        invoke_params(int max_lines,
+                      QTextEdit *q_text_edit,
+                      QString payload,
+                      QTextCharFormat default_color,
+                      QTextCharFormat level_color,
+                      int color_range_start,
+                      int color_range_end)
+            : max_lines(max_lines),
+              q_text_edit(q_text_edit),
+              payload(std::move(payload)),
+              default_color(default_color),
+              level_color(level_color),
+              color_range_start(color_range_start),
+              color_range_end(color_range_end) {}
+        int max_lines;
+        QTextEdit *q_text_edit;
+        QString payload;
+        QTextCharFormat default_color;
+        QTextCharFormat level_color;
+        int color_range_start;
+        int color_range_end;
+    };
+
+    void sink_it_(const details::log_msg &msg) override {
+        memory_buf_t formatted;
+        base_sink<Mutex>::formatter_->format(msg, formatted);
+
+        const string_view_t str = string_view_t(formatted.data(), formatted.size());
+        // apply the color to the color range in the formatted message.
+        QString payload;
+        int color_range_start = static_cast<int>(msg.color_range_start);
+        int color_range_end = static_cast<int>(msg.color_range_end);
+        if (is_utf8_) {
+            payload = QString::fromUtf8(str.data(), static_cast<int>(str.size()));
+            // convert color ranges from byte index to character index.
+            if (msg.color_range_start < msg.color_range_end) {
+                color_range_start =
+                    QString::fromUtf8(str.data(), static_cast<qsizetype>(msg.color_range_start))
+                        .size();
+                color_range_end =
+                    QString::fromUtf8(str.data(), static_cast<qsizetype>(msg.color_range_end))
+                        .size();
+            }
+        } else {
+            payload = QString::fromLatin1(str.data(), static_cast<int>(str.size()));
+        }
+
+        invoke_params params{max_lines_,                                  // max lines
+                             qt_text_edit_,                               // text edit to append to
+                             std::move(payload),                          // text to append
+                             default_color_,                              // default color
+                             colors_.at(static_cast<size_t>(msg.level)),  // color to apply
+                             color_range_start,                           // color range start
+                             color_range_end};                            // color range end
+
+        QMetaObject::invokeMethod(
+            qt_text_edit_, [params]() { invoke_method_(params); }, Qt::AutoConnection);
+    }
+
+    void flush_() override {}
+
+    // Add colored text to the text edit widget. This method is invoked in the GUI thread.
+    // It is a static method to ensure that it is handled correctly even if the sink is destroyed
+    // prematurely before it is invoked.
+
+    static void invoke_method_(invoke_params params) {
+        auto *document = params.q_text_edit->document();
+        QTextCursor cursor(document);
+
+        // remove first blocks if number of blocks exceeds max_lines
+        while (document->blockCount() > params.max_lines) {
+            cursor.select(QTextCursor::BlockUnderCursor);
+            cursor.removeSelectedText();
+            cursor.deleteChar();  // delete the newline after the block
+        }
+
+        cursor.movePosition(QTextCursor::End);
+        cursor.setCharFormat(params.default_color);
+
+        // if color range not specified or not not valid, just append the text with default color
+        if (params.color_range_end <= params.color_range_start) {
+            cursor.insertText(params.payload);
+            return;
+        }
+
+        // insert the text before the color range
+        cursor.insertText(params.payload.left(params.color_range_start));
+
+        // insert the colorized text
+        cursor.setCharFormat(params.level_color);
+        cursor.insertText(params.payload.mid(params.color_range_start,
+                                             params.color_range_end - params.color_range_start));
+
+        // insert the text after the color range with default format
+        cursor.setCharFormat(params.default_color);
+        cursor.insertText(params.payload.mid(params.color_range_end));
+    }
+
+    QTextEdit *qt_text_edit_;
+    int max_lines_;
+    bool is_utf8_;
+    QTextCharFormat default_color_;
+    std::array<QTextCharFormat, level::n_levels> colors_;
+};
+
+#include "spdlog/details/null_mutex.h"
+#include <mutex>
+
+using qt_sink_mt = qt_sink<std::mutex>;
+using qt_sink_st = qt_sink<details::null_mutex>;
+using qt_color_sink_mt = qt_color_sink<std::mutex>;
+using qt_color_sink_st = qt_color_sink<details::null_mutex>;
+}  // namespace sinks
+
+//
+// Factory functions
+//
+
+// log to QTextEdit
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> qt_logger_mt(const std::string &logger_name,
+                                            QTextEdit *qt_object,
+                                            const std::string &meta_method = "append") {
+    return Factory::template create<sinks::qt_sink_mt>(logger_name, qt_object, meta_method);
+}
+
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> qt_logger_st(const std::string &logger_name,
+                                            QTextEdit *qt_object,
+                                            const std::string &meta_method = "append") {
+    return Factory::template create<sinks::qt_sink_st>(logger_name, qt_object, meta_method);
+}
+
+// log to QPlainTextEdit
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> qt_logger_mt(const std::string &logger_name,
+                                            QPlainTextEdit *qt_object,
+                                            const std::string &meta_method = "appendPlainText") {
+    return Factory::template create<sinks::qt_sink_mt>(logger_name, qt_object, meta_method);
+}
+
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> qt_logger_st(const std::string &logger_name,
+                                            QPlainTextEdit *qt_object,
+                                            const std::string &meta_method = "appendPlainText") {
+    return Factory::template create<sinks::qt_sink_st>(logger_name, qt_object, meta_method);
+}
+// log to QObject
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> qt_logger_mt(const std::string &logger_name,
+                                            QObject *qt_object,
+                                            const std::string &meta_method) {
+    return Factory::template create<sinks::qt_sink_mt>(logger_name, qt_object, meta_method);
+}
+
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> qt_logger_st(const std::string &logger_name,
+                                            QObject *qt_object,
+                                            const std::string &meta_method) {
+    return Factory::template create<sinks::qt_sink_st>(logger_name, qt_object, meta_method);
+}
+
+// log to QTextEdit with colorized output
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> qt_color_logger_mt(const std::string &logger_name,
+                                                  QTextEdit *qt_text_edit,
+                                                  int max_lines,
+                                                  bool is_utf8 = false) {
+    return Factory::template create<sinks::qt_color_sink_mt>(logger_name, qt_text_edit, max_lines,
+                                                             false, is_utf8);
+}
+
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> qt_color_logger_st(const std::string &logger_name,
+                                                  QTextEdit *qt_text_edit,
+                                                  int max_lines,
+                                                  bool is_utf8 = false) {
+    return Factory::template create<sinks::qt_color_sink_st>(logger_name, qt_text_edit, max_lines,
+                                                             false, is_utf8);
+}
+
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/ringbuffer_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/ringbuffer_sink.h
new file mode 100644
index 00000000..bcdf0ffc
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/ringbuffer_sink.h
@@ -0,0 +1,71 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include "spdlog/details/circular_q.h"
+#include "spdlog/details/log_msg_buffer.h"
+#include "spdlog/details/null_mutex.h"
+#include "spdlog/sinks/base_sink.h"
+
+#include <mutex>
+#include <string>
+#include <vector>
+
+namespace spdlog {
+namespace sinks {
+/*
+ * Ring buffer sink
+ */
+template <typename Mutex>
+class ringbuffer_sink final : public base_sink<Mutex> {
+public:
+    explicit ringbuffer_sink(size_t n_items)
+        : q_{n_items} {
+        if (n_items == 0) {
+            throw_spdlog_ex("ringbuffer_sink: n_items cannot be zero");
+        }
+    }
+
+    std::vector<details::log_msg_buffer> last_raw(size_t lim = 0) {
+        std::lock_guard<Mutex> lock(base_sink<Mutex>::mutex_);
+        auto items_available = q_.size();
+        auto n_items = lim > 0 ? (std::min)(lim, items_available) : items_available;
+        std::vector<details::log_msg_buffer> ret;
+        ret.reserve(n_items);
+        for (size_t i = (items_available - n_items); i < items_available; i++) {
+            ret.push_back(q_.at(i));
+        }
+        return ret;
+    }
+
+    std::vector<std::string> last_formatted(size_t lim = 0) {
+        std::lock_guard<Mutex> lock(base_sink<Mutex>::mutex_);
+        auto items_available = q_.size();
+        auto n_items = lim > 0 ? (std::min)(lim, items_available) : items_available;
+        std::vector<std::string> ret;
+        ret.reserve(n_items);
+        for (size_t i = (items_available - n_items); i < items_available; i++) {
+            memory_buf_t formatted;
+            base_sink<Mutex>::formatter_->format(q_.at(i), formatted);
+            ret.push_back(SPDLOG_BUF_TO_STRING(formatted));
+        }
+        return ret;
+    }
+
+protected:
+    void sink_it_(const details::log_msg &msg) override {
+        q_.push_back(details::log_msg_buffer{msg});
+    }
+    void flush_() override {}
+
+private:
+    details::circular_q<details::log_msg_buffer> q_;
+};
+
+using ringbuffer_sink_mt = ringbuffer_sink<std::mutex>;
+using ringbuffer_sink_st = ringbuffer_sink<details::null_mutex>;
+
+}  // namespace sinks
+
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/rotating_file_sink-inl.h b/csrc/vnpu_offload/include/spdlog/sinks/rotating_file_sink-inl.h
new file mode 100644
index 00000000..370fa7c2
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/rotating_file_sink-inl.h
@@ -0,0 +1,179 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifndef SPDLOG_HEADER_ONLY
+#include <spdlog/sinks/rotating_file_sink.h>
+#endif
+
+#include <spdlog/common.h>
+
+#include <spdlog/details/file_helper.h>
+#include <spdlog/details/null_mutex.h>
+#include <spdlog/fmt/fmt.h>
+
+#include <cerrno>
+#include <ctime>
+#include <mutex>
+#include <string>
+#include <tuple>
+
+namespace spdlog {
+namespace sinks {
+
+template <typename Mutex>
+SPDLOG_INLINE rotating_file_sink<Mutex>::rotating_file_sink(
+    filename_t base_filename,
+    std::size_t max_size,
+    std::size_t max_files,
+    bool rotate_on_open,
+    const file_event_handlers &event_handlers)
+    : base_filename_(std::move(base_filename)),
+      max_size_(max_size),
+      max_files_(max_files),
+      file_helper_{event_handlers} {
+    if (max_size == 0) {
+        throw_spdlog_ex("rotating sink constructor: max_size arg cannot be zero");
+    }
+
+    if (max_files > MaxFiles) {
+        throw_spdlog_ex("rotating sink constructor: max_files arg cannot exceed MaxFiles");
+    }
+    file_helper_.open(calc_filename(base_filename_, 0));
+    current_size_ = file_helper_.size();  // expensive. called only once
+    if (rotate_on_open && current_size_ > 0) {
+        rotate_();
+        current_size_ = 0;
+    }
+}
+
+// calc filename according to index and file extension if exists.
+// e.g. calc_filename("logs/mylog.txt, 3) => "logs/mylog.3.txt".
+template <typename Mutex>
+SPDLOG_INLINE filename_t rotating_file_sink<Mutex>::calc_filename(const filename_t &filename,
+                                                                  std::size_t index) {
+    if (index == 0U) {
+        return filename;
+    }
+
+    filename_t basename;
+    filename_t ext;
+    std::tie(basename, ext) = details::file_helper::split_by_extension(filename);
+    return fmt_lib::format(SPDLOG_FMT_STRING(SPDLOG_FILENAME_T("{}.{}{}")), basename, index, ext);
+}
+
+template <typename Mutex>
+SPDLOG_INLINE filename_t rotating_file_sink<Mutex>::filename() {
+    std::lock_guard<Mutex> lock(base_sink<Mutex>::mutex_);
+    return file_helper_.filename();
+}
+
+template <typename Mutex>
+SPDLOG_INLINE void rotating_file_sink<Mutex>::rotate_now() {
+    std::lock_guard<Mutex> lock(base_sink<Mutex>::mutex_);
+    rotate_();
+}
+template <typename Mutex>
+SPDLOG_INLINE void rotating_file_sink<Mutex>::set_max_size(std::size_t max_size) {
+    std::lock_guard<Mutex> lock(base_sink<Mutex>::mutex_);
+    if (max_size == 0) {
+        throw_spdlog_ex("rotating sink set_max_size: max_size arg cannot be zero");
+    }
+    max_size_ = max_size;
+}
+
+template <typename Mutex>
+SPDLOG_INLINE std::size_t rotating_file_sink<Mutex>::get_max_size() {
+    std::lock_guard<Mutex> lock(base_sink<Mutex>::mutex_);
+    return max_size_;
+}
+
+template <typename Mutex>
+SPDLOG_INLINE void rotating_file_sink<Mutex>::set_max_files(std::size_t max_files) {
+    std::lock_guard<Mutex> lock(base_sink<Mutex>::mutex_);
+    if (max_files > MaxFiles) {
+        throw_spdlog_ex("rotating sink set_max_files: max_files arg cannot exceed 200000");
+    }
+    max_files_ = max_files;
+}
+
+template <typename Mutex>
+std::size_t rotating_file_sink<Mutex>::get_max_files() {
+    std::lock_guard<Mutex> lock(base_sink<Mutex>::mutex_);
+    return max_files_;
+}
+
+template <typename Mutex>
+SPDLOG_INLINE void rotating_file_sink<Mutex>::sink_it_(const details::log_msg &msg) {
+    memory_buf_t formatted;
+    base_sink<Mutex>::formatter_->format(msg, formatted);
+    auto new_size = current_size_ + formatted.size();
+
+    // rotate if the new estimated file size exceeds max size.
+    // rotate only if the real size > 0 to better deal with full disk (see issue #2261).
+    // we only check the real size when new_size > max_size_ because it is relatively expensive.
+    if (new_size > max_size_) {
+        file_helper_.flush();
+        if (file_helper_.size() > 0) {
+            rotate_();
+            new_size = formatted.size();
+        }
+    }
+    file_helper_.write(formatted);
+    current_size_ = new_size;
+}
+
+template <typename Mutex>
+SPDLOG_INLINE void rotating_file_sink<Mutex>::flush_() {
+    file_helper_.flush();
+}
+
+// Rotate files:
+// log.txt -> log.1.txt
+// log.1.txt -> log.2.txt
+// log.2.txt -> log.3.txt
+// log.3.txt -> delete
+template <typename Mutex>
+SPDLOG_INLINE void rotating_file_sink<Mutex>::rotate_() {
+    using details::os::filename_to_str;
+    using details::os::path_exists;
+
+    file_helper_.close();
+    for (auto i = max_files_; i > 0; --i) {
+        filename_t src = calc_filename(base_filename_, i - 1);
+        if (!path_exists(src)) {
+            continue;
+        }
+        filename_t target = calc_filename(base_filename_, i);
+
+        if (!rename_file_(src, target)) {
+            // if failed try again after a small delay.
+            // this is a workaround to a windows issue, where very high rotation
+            // rates can cause the rename to fail with permission denied (because of antivirus?).
+            details::os::sleep_for_millis(100);
+            if (!rename_file_(src, target)) {
+                file_helper_.reopen(
+                    true);  // truncate the log file anyway to prevent it to grow beyond its limit!
+                current_size_ = 0;
+                throw_spdlog_ex("rotating_file_sink: failed renaming " + filename_to_str(src) +
+                                    " to " + filename_to_str(target),
+                                errno);
+            }
+        }
+    }
+    file_helper_.reopen(true);
+}
+
+// delete the target if exists, and rename the src file  to target
+// return true on success, false otherwise.
+template <typename Mutex>
+SPDLOG_INLINE bool rotating_file_sink<Mutex>::rename_file_(const filename_t &src_filename,
+                                                           const filename_t &target_filename) {
+    // try to delete the target file in case it already exists.
+    (void)details::os::remove(target_filename);
+    return details::os::rename(src_filename, target_filename) == 0;
+}
+
+}  // namespace sinks
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/rotating_file_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/rotating_file_sink.h
new file mode 100644
index 00000000..522dab63
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/rotating_file_sink.h
@@ -0,0 +1,93 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include <spdlog/details/file_helper.h>
+#include <spdlog/details/null_mutex.h>
+#include <spdlog/details/synchronous_factory.h>
+#include <spdlog/sinks/base_sink.h>
+
+#include <mutex>
+#include <string>
+
+namespace spdlog {
+namespace sinks {
+
+//
+// Rotating file sink based on size
+//
+template <typename Mutex>
+class rotating_file_sink final : public base_sink<Mutex> {
+public:
+    static constexpr size_t MaxFiles = 200000;
+    rotating_file_sink(filename_t base_filename,
+                       std::size_t max_size,
+                       std::size_t max_files,
+                       bool rotate_on_open = false,
+                       const file_event_handlers &event_handlers = {});
+    static filename_t calc_filename(const filename_t &filename, std::size_t index);
+    filename_t filename();
+    void rotate_now();
+    void set_max_size(std::size_t max_size);
+    std::size_t get_max_size();
+    void set_max_files(std::size_t max_files);
+    std::size_t get_max_files();
+
+protected:
+    void sink_it_(const details::log_msg &msg) override;
+    void flush_() override;
+
+private:
+    // Rotate files:
+    // log.txt -> log.1.txt
+    // log.1.txt -> log.2.txt
+    // log.2.txt -> log.3.txt
+    // log.3.txt -> delete
+    void rotate_();
+
+    // delete the target if exists, and rename the src file to target
+    // return true on success, false otherwise.
+    bool rename_file_(const filename_t &src_filename, const filename_t &target_filename);
+
+    filename_t base_filename_;
+    std::size_t max_size_;
+    std::size_t max_files_;
+    std::size_t current_size_;
+    details::file_helper file_helper_;
+};
+
+using rotating_file_sink_mt = rotating_file_sink<std::mutex>;
+using rotating_file_sink_st = rotating_file_sink<details::null_mutex>;
+
+}  // namespace sinks
+
+//
+// factory functions
+//
+template <typename Factory = spdlog::synchronous_factory>
+std::shared_ptr<logger> rotating_logger_mt(const std::string &logger_name,
+                                           const filename_t &filename,
+                                           size_t max_file_size,
+                                           size_t max_files,
+                                           bool rotate_on_open = false,
+                                           const file_event_handlers &event_handlers = {}) {
+    return Factory::template create<sinks::rotating_file_sink_mt>(
+        logger_name, filename, max_file_size, max_files, rotate_on_open, event_handlers);
+}
+
+template <typename Factory = spdlog::synchronous_factory>
+std::shared_ptr<logger> rotating_logger_st(const std::string &logger_name,
+                                           const filename_t &filename,
+                                           size_t max_file_size,
+                                           size_t max_files,
+                                           bool rotate_on_open = false,
+                                           const file_event_handlers &event_handlers = {}) {
+    return Factory::template create<sinks::rotating_file_sink_st>(
+        logger_name, filename, max_file_size, max_files, rotate_on_open, event_handlers);
+}
+}  // namespace spdlog
+
+#ifdef SPDLOG_HEADER_ONLY
+#include "rotating_file_sink-inl.h"
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/sink-inl.h b/csrc/vnpu_offload/include/spdlog/sinks/sink-inl.h
new file mode 100644
index 00000000..7ffb33e9
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/sink-inl.h
@@ -0,0 +1,22 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifndef SPDLOG_HEADER_ONLY
+#include <spdlog/sinks/sink.h>
+#endif
+
+#include <spdlog/common.h>
+
+SPDLOG_INLINE bool spdlog::sinks::sink::should_log(spdlog::level::level_enum msg_level) const {
+    return msg_level >= level_.load(std::memory_order_relaxed);
+}
+
+SPDLOG_INLINE void spdlog::sinks::sink::set_level(level::level_enum log_level) {
+    level_.store(log_level, std::memory_order_relaxed);
+}
+
+SPDLOG_INLINE spdlog::level::level_enum spdlog::sinks::sink::level() const {
+    return static_cast<spdlog::level::level_enum>(level_.load(std::memory_order_relaxed));
+}
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/sink.h b/csrc/vnpu_offload/include/spdlog/sinks/sink.h
new file mode 100644
index 00000000..9c39522f
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/sink.h
@@ -0,0 +1,34 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include <spdlog/details/log_msg.h>
+#include <spdlog/formatter.h>
+
+namespace spdlog {
+
+namespace sinks {
+class SPDLOG_API sink {
+public:
+    virtual ~sink() = default;
+    virtual void log(const details::log_msg &msg) = 0;
+    virtual void flush() = 0;
+    virtual void set_pattern(const std::string &pattern) = 0;
+    virtual void set_formatter(std::unique_ptr<spdlog::formatter> sink_formatter) = 0;
+
+    void set_level(level::level_enum log_level);
+    level::level_enum level() const;
+    bool should_log(level::level_enum msg_level) const;
+
+protected:
+    // sink log level - default is all
+    level_t level_{level::trace};
+};
+
+}  // namespace sinks
+}  // namespace spdlog
+
+#ifdef SPDLOG_HEADER_ONLY
+#include "sink-inl.h"
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/stdout_color_sinks-inl.h b/csrc/vnpu_offload/include/spdlog/sinks/stdout_color_sinks-inl.h
new file mode 100644
index 00000000..74c81130
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/stdout_color_sinks-inl.h
@@ -0,0 +1,38 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifndef SPDLOG_HEADER_ONLY
+#include <spdlog/sinks/stdout_color_sinks.h>
+#endif
+
+#include <spdlog/common.h>
+#include <spdlog/logger.h>
+
+namespace spdlog {
+
+template <typename Factory>
+SPDLOG_INLINE std::shared_ptr<logger> stdout_color_mt(const std::string &logger_name,
+                                                      color_mode mode) {
+    return Factory::template create<sinks::stdout_color_sink_mt>(logger_name, mode);
+}
+
+template <typename Factory>
+SPDLOG_INLINE std::shared_ptr<logger> stdout_color_st(const std::string &logger_name,
+                                                      color_mode mode) {
+    return Factory::template create<sinks::stdout_color_sink_st>(logger_name, mode);
+}
+
+template <typename Factory>
+SPDLOG_INLINE std::shared_ptr<logger> stderr_color_mt(const std::string &logger_name,
+                                                      color_mode mode) {
+    return Factory::template create<sinks::stderr_color_sink_mt>(logger_name, mode);
+}
+
+template <typename Factory>
+SPDLOG_INLINE std::shared_ptr<logger> stderr_color_st(const std::string &logger_name,
+                                                      color_mode mode) {
+    return Factory::template create<sinks::stderr_color_sink_st>(logger_name, mode);
+}
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/stdout_color_sinks.h b/csrc/vnpu_offload/include/spdlog/sinks/stdout_color_sinks.h
new file mode 100644
index 00000000..9ce26724
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/stdout_color_sinks.h
@@ -0,0 +1,49 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifdef _WIN32
+#include <spdlog/sinks/wincolor_sink.h>
+#else
+#include <spdlog/sinks/ansicolor_sink.h>
+#endif
+
+#include <spdlog/details/synchronous_factory.h>
+
+namespace spdlog {
+namespace sinks {
+#ifdef _WIN32
+using stdout_color_sink_mt = wincolor_stdout_sink_mt;
+using stdout_color_sink_st = wincolor_stdout_sink_st;
+using stderr_color_sink_mt = wincolor_stderr_sink_mt;
+using stderr_color_sink_st = wincolor_stderr_sink_st;
+#else
+using stdout_color_sink_mt = ansicolor_stdout_sink_mt;
+using stdout_color_sink_st = ansicolor_stdout_sink_st;
+using stderr_color_sink_mt = ansicolor_stderr_sink_mt;
+using stderr_color_sink_st = ansicolor_stderr_sink_st;
+#endif
+}  // namespace sinks
+
+template <typename Factory = spdlog::synchronous_factory>
+std::shared_ptr<logger> stdout_color_mt(const std::string &logger_name,
+                                        color_mode mode = color_mode::automatic);
+
+template <typename Factory = spdlog::synchronous_factory>
+std::shared_ptr<logger> stdout_color_st(const std::string &logger_name,
+                                        color_mode mode = color_mode::automatic);
+
+template <typename Factory = spdlog::synchronous_factory>
+std::shared_ptr<logger> stderr_color_mt(const std::string &logger_name,
+                                        color_mode mode = color_mode::automatic);
+
+template <typename Factory = spdlog::synchronous_factory>
+std::shared_ptr<logger> stderr_color_st(const std::string &logger_name,
+                                        color_mode mode = color_mode::automatic);
+
+}  // namespace spdlog
+
+#ifdef SPDLOG_HEADER_ONLY
+#include "stdout_color_sinks-inl.h"
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/stdout_sinks-inl.h b/csrc/vnpu_offload/include/spdlog/sinks/stdout_sinks-inl.h
new file mode 100644
index 00000000..74ec1414
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/stdout_sinks-inl.h
@@ -0,0 +1,127 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifndef SPDLOG_HEADER_ONLY
+#include <spdlog/sinks/stdout_sinks.h>
+#endif
+
+#include <memory>
+#include <spdlog/details/console_globals.h>
+#include <spdlog/pattern_formatter.h>
+#include <spdlog/details/os.h>
+
+#ifdef _WIN32
+// under windows using fwrite to non-binary stream results in \r\r\n (see issue #1675)
+// so instead we use ::FileWrite
+#include <spdlog/details/windows_include.h>
+
+#ifndef _USING_V110_SDK71_  // fileapi.h doesn't exist in winxp
+#include <fileapi.h>        // WriteFile (..)
+#endif
+
+#include <io.h>     // _get_osfhandle(..)
+#include <stdio.h>  // _fileno(..)
+#endif              // _WIN32
+
+namespace spdlog {
+
+namespace sinks {
+
+template <typename ConsoleMutex>
+SPDLOG_INLINE stdout_sink_base<ConsoleMutex>::stdout_sink_base(FILE *file)
+    : mutex_(ConsoleMutex::mutex()),
+      file_(file),
+      formatter_(details::make_unique<spdlog::pattern_formatter>()) {
+#ifdef _WIN32
+    // get windows handle from the FILE* object
+
+    handle_ = reinterpret_cast<HANDLE>(::_get_osfhandle(::_fileno(file_)));
+
+    // don't throw to support cases where no console is attached,
+    // and let the log method to do nothing if (handle_ == INVALID_HANDLE_VALUE).
+    // throw only if non stdout/stderr target is requested (probably regular file and not console).
+    if (handle_ == INVALID_HANDLE_VALUE && file != stdout && file != stderr) {
+        throw_spdlog_ex("spdlog::stdout_sink_base: _get_osfhandle() failed", errno);
+    }
+#endif  // _WIN32
+}
+
+template <typename ConsoleMutex>
+SPDLOG_INLINE void stdout_sink_base<ConsoleMutex>::log(const details::log_msg &msg) {
+#ifdef _WIN32
+    if (handle_ == INVALID_HANDLE_VALUE) {
+        return;
+    }
+    std::lock_guard<mutex_t> lock(mutex_);
+    memory_buf_t formatted;
+    formatter_->format(msg, formatted);
+    auto size = static_cast<DWORD>(formatted.size());
+    DWORD bytes_written = 0;
+    bool ok = ::WriteFile(handle_, formatted.data(), size, &bytes_written, nullptr) != 0;
+    if (!ok) {
+        throw_spdlog_ex("stdout_sink_base: WriteFile() failed. GetLastError(): " +
+                        std::to_string(::GetLastError()));
+    }
+#else
+    std::lock_guard<mutex_t> lock(mutex_);
+    memory_buf_t formatted;
+    formatter_->format(msg, formatted);
+    details::os::fwrite_bytes(formatted.data(), formatted.size(), file_);
+#endif                // _WIN32
+    ::fflush(file_);  // flush every line to terminal
+}
+
+template <typename ConsoleMutex>
+SPDLOG_INLINE void stdout_sink_base<ConsoleMutex>::flush() {
+    std::lock_guard<mutex_t> lock(mutex_);
+    fflush(file_);
+}
+
+template <typename ConsoleMutex>
+SPDLOG_INLINE void stdout_sink_base<ConsoleMutex>::set_pattern(const std::string &pattern) {
+    std::lock_guard<mutex_t> lock(mutex_);
+    formatter_ = std::unique_ptr<spdlog::formatter>(new pattern_formatter(pattern));
+}
+
+template <typename ConsoleMutex>
+SPDLOG_INLINE void stdout_sink_base<ConsoleMutex>::set_formatter(
+    std::unique_ptr<spdlog::formatter> sink_formatter) {
+    std::lock_guard<mutex_t> lock(mutex_);
+    formatter_ = std::move(sink_formatter);
+}
+
+// stdout sink
+template <typename ConsoleMutex>
+SPDLOG_INLINE stdout_sink<ConsoleMutex>::stdout_sink()
+    : stdout_sink_base<ConsoleMutex>(stdout) {}
+
+// stderr sink
+template <typename ConsoleMutex>
+SPDLOG_INLINE stderr_sink<ConsoleMutex>::stderr_sink()
+    : stdout_sink_base<ConsoleMutex>(stderr) {}
+
+}  // namespace sinks
+
+// factory methods
+template <typename Factory>
+SPDLOG_INLINE std::shared_ptr<logger> stdout_logger_mt(const std::string &logger_name) {
+    return Factory::template create<sinks::stdout_sink_mt>(logger_name);
+}
+
+template <typename Factory>
+SPDLOG_INLINE std::shared_ptr<logger> stdout_logger_st(const std::string &logger_name) {
+    return Factory::template create<sinks::stdout_sink_st>(logger_name);
+}
+
+template <typename Factory>
+SPDLOG_INLINE std::shared_ptr<logger> stderr_logger_mt(const std::string &logger_name) {
+    return Factory::template create<sinks::stderr_sink_mt>(logger_name);
+}
+
+template <typename Factory>
+SPDLOG_INLINE std::shared_ptr<logger> stderr_logger_st(const std::string &logger_name) {
+    return Factory::template create<sinks::stderr_sink_st>(logger_name);
+}
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/stdout_sinks.h b/csrc/vnpu_offload/include/spdlog/sinks/stdout_sinks.h
new file mode 100644
index 00000000..0b36c16f
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/stdout_sinks.h
@@ -0,0 +1,84 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include <cstdio>
+#include <spdlog/details/console_globals.h>
+#include <spdlog/details/synchronous_factory.h>
+#include <spdlog/sinks/sink.h>
+
+#ifdef _WIN32
+#include <spdlog/details/windows_include.h>
+#endif
+
+namespace spdlog {
+
+namespace sinks {
+
+template <typename ConsoleMutex>
+class stdout_sink_base : public sink {
+public:
+    using mutex_t = typename ConsoleMutex::mutex_t;
+    explicit stdout_sink_base(FILE *file);
+    ~stdout_sink_base() override = default;
+
+    stdout_sink_base(const stdout_sink_base &other) = delete;
+    stdout_sink_base(stdout_sink_base &&other) = delete;
+
+    stdout_sink_base &operator=(const stdout_sink_base &other) = delete;
+    stdout_sink_base &operator=(stdout_sink_base &&other) = delete;
+
+    void log(const details::log_msg &msg) override;
+    void flush() override;
+    void set_pattern(const std::string &pattern) override;
+
+    void set_formatter(std::unique_ptr<spdlog::formatter> sink_formatter) override;
+
+protected:
+    mutex_t &mutex_;
+    FILE *file_;
+    std::unique_ptr<spdlog::formatter> formatter_;
+#ifdef _WIN32
+    HANDLE handle_;
+#endif  // WIN32
+};
+
+template <typename ConsoleMutex>
+class stdout_sink : public stdout_sink_base<ConsoleMutex> {
+public:
+    stdout_sink();
+};
+
+template <typename ConsoleMutex>
+class stderr_sink : public stdout_sink_base<ConsoleMutex> {
+public:
+    stderr_sink();
+};
+
+using stdout_sink_mt = stdout_sink<details::console_mutex>;
+using stdout_sink_st = stdout_sink<details::console_nullmutex>;
+
+using stderr_sink_mt = stderr_sink<details::console_mutex>;
+using stderr_sink_st = stderr_sink<details::console_nullmutex>;
+
+}  // namespace sinks
+
+// factory methods
+template <typename Factory = spdlog::synchronous_factory>
+std::shared_ptr<logger> stdout_logger_mt(const std::string &logger_name);
+
+template <typename Factory = spdlog::synchronous_factory>
+std::shared_ptr<logger> stdout_logger_st(const std::string &logger_name);
+
+template <typename Factory = spdlog::synchronous_factory>
+std::shared_ptr<logger> stderr_logger_mt(const std::string &logger_name);
+
+template <typename Factory = spdlog::synchronous_factory>
+std::shared_ptr<logger> stderr_logger_st(const std::string &logger_name);
+
+}  // namespace spdlog
+
+#ifdef SPDLOG_HEADER_ONLY
+#include "stdout_sinks-inl.h"
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/syslog_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/syslog_sink.h
new file mode 100644
index 00000000..913d41be
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/syslog_sink.h
@@ -0,0 +1,104 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include <spdlog/details/null_mutex.h>
+#include <spdlog/details/synchronous_factory.h>
+#include <spdlog/sinks/base_sink.h>
+
+#include <array>
+#include <string>
+#include <syslog.h>
+
+namespace spdlog {
+namespace sinks {
+/**
+ * Sink that write to syslog using the `syscall()` library call.
+ */
+template <typename Mutex>
+class syslog_sink : public base_sink<Mutex> {
+public:
+    syslog_sink(std::string ident, int syslog_option, int syslog_facility, bool enable_formatting)
+        : enable_formatting_{enable_formatting},
+          syslog_levels_{{/* spdlog::level::trace      */ LOG_DEBUG,
+                          /* spdlog::level::debug      */ LOG_DEBUG,
+                          /* spdlog::level::info       */ LOG_INFO,
+                          /* spdlog::level::warn       */ LOG_WARNING,
+                          /* spdlog::level::err        */ LOG_ERR,
+                          /* spdlog::level::critical   */ LOG_CRIT,
+                          /* spdlog::level::off        */ LOG_INFO}},
+          ident_{std::move(ident)} {
+        // set ident to be program name if empty
+        ::openlog(ident_.empty() ? nullptr : ident_.c_str(), syslog_option, syslog_facility);
+    }
+
+    ~syslog_sink() override { ::closelog(); }
+
+    syslog_sink(const syslog_sink &) = delete;
+    syslog_sink &operator=(const syslog_sink &) = delete;
+
+protected:
+    void sink_it_(const details::log_msg &msg) override {
+        string_view_t payload;
+        memory_buf_t formatted;
+        if (enable_formatting_) {
+            base_sink<Mutex>::formatter_->format(msg, formatted);
+            payload = string_view_t(formatted.data(), formatted.size());
+        } else {
+            payload = msg.payload;
+        }
+
+        size_t length = payload.size();
+        // limit to max int
+        if (length > static_cast<size_t>(std::numeric_limits<int>::max())) {
+            length = static_cast<size_t>(std::numeric_limits<int>::max());
+        }
+
+        ::syslog(syslog_prio_from_level(msg), "%.*s", static_cast<int>(length), payload.data());
+    }
+
+    void flush_() override {}
+    bool enable_formatting_ = false;
+
+    //
+    // Simply maps spdlog's log level to syslog priority level.
+    //
+    virtual int syslog_prio_from_level(const details::log_msg &msg) const {
+        return syslog_levels_.at(static_cast<levels_array::size_type>(msg.level));
+    }
+
+    using levels_array = std::array<int, 7>;
+    levels_array syslog_levels_;
+
+private:
+    // must store the ident because the man says openlog might use the pointer as
+    // is and not a string copy
+    const std::string ident_;
+};
+
+using syslog_sink_mt = syslog_sink<std::mutex>;
+using syslog_sink_st = syslog_sink<details::null_mutex>;
+}  // namespace sinks
+
+// Create and register a syslog logger
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> syslog_logger_mt(const std::string &logger_name,
+                                                const std::string &syslog_ident = "",
+                                                int syslog_option = 0,
+                                                int syslog_facility = LOG_USER,
+                                                bool enable_formatting = false) {
+    return Factory::template create<sinks::syslog_sink_mt>(logger_name, syslog_ident, syslog_option,
+                                                           syslog_facility, enable_formatting);
+}
+
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> syslog_logger_st(const std::string &logger_name,
+                                                const std::string &syslog_ident = "",
+                                                int syslog_option = 0,
+                                                int syslog_facility = LOG_USER,
+                                                bool enable_formatting = false) {
+    return Factory::template create<sinks::syslog_sink_st>(logger_name, syslog_ident, syslog_option,
+                                                           syslog_facility, enable_formatting);
+}
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/systemd_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/systemd_sink.h
new file mode 100644
index 00000000..1ad32ce7
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/systemd_sink.h
@@ -0,0 +1,121 @@
+// Copyright(c) 2019 ZVYAGIN.Alexander@gmail.com
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include <spdlog/details/null_mutex.h>
+#include <spdlog/details/os.h>
+#include <spdlog/details/synchronous_factory.h>
+#include <spdlog/sinks/base_sink.h>
+
+#include <array>
+#ifndef SD_JOURNAL_SUPPRESS_LOCATION
+#define SD_JOURNAL_SUPPRESS_LOCATION
+#endif
+#include <systemd/sd-journal.h>
+
+namespace spdlog {
+namespace sinks {
+
+/**
+ * Sink that write to systemd journal using the `sd_journal_send()` library call.
+ */
+template <typename Mutex>
+class systemd_sink : public base_sink<Mutex> {
+public:
+    systemd_sink(std::string ident = "", bool enable_formatting = false)
+        : ident_{std::move(ident)},
+          enable_formatting_{enable_formatting},
+          syslog_levels_{{/* spdlog::level::trace      */ LOG_DEBUG,
+                          /* spdlog::level::debug      */ LOG_DEBUG,
+                          /* spdlog::level::info       */ LOG_INFO,
+                          /* spdlog::level::warn       */ LOG_WARNING,
+                          /* spdlog::level::err        */ LOG_ERR,
+                          /* spdlog::level::critical   */ LOG_CRIT,
+                          /* spdlog::level::off        */ LOG_INFO}} {}
+
+    ~systemd_sink() override {}
+
+    systemd_sink(const systemd_sink &) = delete;
+    systemd_sink &operator=(const systemd_sink &) = delete;
+
+protected:
+    const std::string ident_;
+    bool enable_formatting_ = false;
+    using levels_array = std::array<int, 7>;
+    levels_array syslog_levels_;
+
+    void sink_it_(const details::log_msg &msg) override {
+        int err;
+        string_view_t payload;
+        memory_buf_t formatted;
+        if (enable_formatting_) {
+            base_sink<Mutex>::formatter_->format(msg, formatted);
+            payload = string_view_t(formatted.data(), formatted.size());
+        } else {
+            payload = msg.payload;
+        }
+
+        size_t length = payload.size();
+        // limit to max int
+        if (length > static_cast<size_t>(std::numeric_limits<int>::max())) {
+            length = static_cast<size_t>(std::numeric_limits<int>::max());
+        }
+
+        const string_view_t syslog_identifier = ident_.empty() ? msg.logger_name : ident_;
+
+        // Do not send source location if not available
+        if (msg.source.empty()) {
+            // Note: function call inside '()' to avoid macro expansion
+            err = (sd_journal_send)("MESSAGE=%.*s", static_cast<int>(length), payload.data(),
+                                    "PRIORITY=%d", syslog_level(msg.level),
+#ifndef SPDLOG_NO_THREAD_ID
+                                    "TID=%zu", msg.thread_id,
+#endif
+                                    "SYSLOG_IDENTIFIER=%.*s",
+                                    static_cast<int>(syslog_identifier.size()),
+                                    syslog_identifier.data(), nullptr);
+        } else {
+            err = (sd_journal_send)("MESSAGE=%.*s", static_cast<int>(length), payload.data(),
+                                    "PRIORITY=%d", syslog_level(msg.level),
+#ifndef SPDLOG_NO_THREAD_ID
+                                    "TID=%zu", msg.thread_id,
+#endif
+                                    "SYSLOG_IDENTIFIER=%.*s",
+                                    static_cast<int>(syslog_identifier.size()),
+                                    syslog_identifier.data(), "CODE_FILE=%s", msg.source.filename,
+                                    "CODE_LINE=%d", msg.source.line, "CODE_FUNC=%s",
+                                    msg.source.funcname, nullptr);
+        }
+
+        if (err) {
+            throw_spdlog_ex("Failed writing to systemd", errno);
+        }
+    }
+
+    int syslog_level(level::level_enum l) {
+        return syslog_levels_.at(static_cast<levels_array::size_type>(l));
+    }
+
+    void flush_() override {}
+};
+
+using systemd_sink_mt = systemd_sink<std::mutex>;
+using systemd_sink_st = systemd_sink<details::null_mutex>;
+}  // namespace sinks
+
+// Create and register a syslog logger
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> systemd_logger_mt(const std::string &logger_name,
+                                                 const std::string &ident = "",
+                                                 bool enable_formatting = false) {
+    return Factory::template create<sinks::systemd_sink_mt>(logger_name, ident, enable_formatting);
+}
+
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> systemd_logger_st(const std::string &logger_name,
+                                                 const std::string &ident = "",
+                                                 bool enable_formatting = false) {
+    return Factory::template create<sinks::systemd_sink_st>(logger_name, ident, enable_formatting);
+}
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/tcp_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/tcp_sink.h
new file mode 100644
index 00000000..cffd4339
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/tcp_sink.h
@@ -0,0 +1,89 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include <spdlog/common.h>
+#include <spdlog/details/null_mutex.h>
+#include <spdlog/sinks/base_sink.h>
+#ifdef _WIN32
+#include <spdlog/details/tcp_client-windows.h>
+#else
+#include <spdlog/details/tcp_client.h>
+#endif
+
+#include <chrono>
+#include <functional>
+#include <mutex>
+#include <string>
+
+#pragma once
+
+// Simple tcp client sink
+// Connects to remote address and send the formatted log.
+// Will attempt to reconnect if connection drops.
+// If more complicated behaviour is needed (i.e get responses), you can inherit it and override the
+// sink_it_ method.
+
+namespace spdlog {
+namespace sinks {
+
+struct tcp_sink_config {
+    std::string server_host;
+    int server_port;
+    int timeout_ms =
+        0;  // The timeout for all 3 major socket operations that is connect, send, and recv
+    bool lazy_connect = false;  // if true connect on first log call instead of on construction
+
+    tcp_sink_config(std::string host, int port)
+        : server_host{std::move(host)},
+          server_port{port} {}
+};
+
+template <typename Mutex>
+class tcp_sink : public spdlog::sinks::base_sink<Mutex> {
+public:
+    // connect to tcp host/port or throw if failed
+    // host can be hostname or ip address
+
+    explicit tcp_sink(const std::string &host,
+                      int port,
+                      int timeout_ms = 0,
+                      bool lazy_connect = false)
+        : config_{host, port} {
+        config_.timeout_ms = timeout_ms;
+        config_.lazy_connect = lazy_connect;
+        if (!config_.lazy_connect) {
+            client_.connect(config_.server_host, config_.server_port, config_.timeout_ms);
+        }
+    }
+
+    explicit tcp_sink(tcp_sink_config sink_config)
+        : config_{std::move(sink_config)} {
+        if (!config_.lazy_connect) {
+            client_.connect(config_.server_host, config_.server_port, config_.timeout_ms);
+        }
+    }
+
+    ~tcp_sink() override = default;
+
+protected:
+    void sink_it_(const spdlog::details::log_msg &msg) override {
+        spdlog::memory_buf_t formatted;
+        spdlog::sinks::base_sink<Mutex>::formatter_->format(msg, formatted);
+        if (!client_.is_connected()) {
+            client_.connect(config_.server_host, config_.server_port, config_.timeout_ms);
+        }
+        client_.send(formatted.data(), formatted.size());
+    }
+
+    void flush_() override {}
+    tcp_sink_config config_;
+    details::tcp_client client_;
+};
+
+using tcp_sink_mt = tcp_sink<std::mutex>;
+using tcp_sink_st = tcp_sink<spdlog::details::null_mutex>;
+
+}  // namespace sinks
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/udp_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/udp_sink.h
new file mode 100644
index 00000000..6f287719
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/udp_sink.h
@@ -0,0 +1,69 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include <spdlog/common.h>
+#include <spdlog/details/null_mutex.h>
+#include <spdlog/sinks/base_sink.h>
+#ifdef _WIN32
+#include <spdlog/details/udp_client-windows.h>
+#else
+#include <spdlog/details/udp_client.h>
+#endif
+
+#include <chrono>
+#include <functional>
+#include <mutex>
+#include <string>
+
+// Simple udp client sink
+// Sends formatted log via udp
+
+namespace spdlog {
+namespace sinks {
+
+struct udp_sink_config {
+    std::string server_host;
+    uint16_t server_port;
+
+    udp_sink_config(std::string host, uint16_t port)
+        : server_host{std::move(host)},
+          server_port{port} {}
+};
+
+template <typename Mutex>
+class udp_sink : public spdlog::sinks::base_sink<Mutex> {
+public:
+    // host can be hostname or ip address
+    explicit udp_sink(udp_sink_config sink_config)
+        : client_{sink_config.server_host, sink_config.server_port} {}
+
+    ~udp_sink() override = default;
+
+protected:
+    void sink_it_(const spdlog::details::log_msg &msg) override {
+        spdlog::memory_buf_t formatted;
+        spdlog::sinks::base_sink<Mutex>::formatter_->format(msg, formatted);
+        client_.send(formatted.data(), formatted.size());
+    }
+
+    void flush_() override {}
+    details::udp_client client_;
+};
+
+using udp_sink_mt = udp_sink<std::mutex>;
+using udp_sink_st = udp_sink<spdlog::details::null_mutex>;
+
+}  // namespace sinks
+
+//
+// factory functions
+//
+template <typename Factory = spdlog::synchronous_factory>
+inline std::shared_ptr<logger> udp_logger_mt(const std::string &logger_name,
+                                             sinks::udp_sink_config skin_config) {
+    return Factory::template create<sinks::udp_sink_mt>(logger_name, skin_config);
+}
+
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/win_eventlog_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/win_eventlog_sink.h
new file mode 100644
index 00000000..2c9b582d
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/win_eventlog_sink.h
@@ -0,0 +1,260 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+// Writing to Windows Event Log requires the registry entries below to be present, with the
+// following modifications:
+// 1. <log_name>    should be replaced with your log name (e.g. your application name)
+// 2. <source_name> should be replaced with the specific source name and the key should be
+// duplicated for
+//                  each source used in the application
+//
+// Since typically modifications of this kind require elevation, it's better to do it as a part of
+// setup procedure. The snippet below uses mscoree.dll as the message file as it exists on most of
+// the Windows systems anyway and happens to contain the needed resource.
+//
+// You can also specify a custom message file if needed.
+// Please refer to Event Log functions descriptions in MSDN for more details on custom message
+// files.
+
+/*---------------------------------------------------------------------------------------
+
+Windows Registry Editor Version 5.00
+
+[HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services\EventLog\<log_name>]
+
+[HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services\EventLog\<log_name>\<source_name>]
+"TypesSupported"=dword:00000007
+"EventMessageFile"=hex(2):25,00,73,00,79,00,73,00,74,00,65,00,6d,00,72,00,6f,\
+  00,6f,00,74,00,25,00,5c,00,53,00,79,00,73,00,74,00,65,00,6d,00,33,00,32,00,\
+  5c,00,6d,00,73,00,63,00,6f,00,72,00,65,00,65,00,2e,00,64,00,6c,00,6c,00,00,\
+  00
+
+-----------------------------------------------------------------------------------------*/
+
+#pragma once
+
+#include <spdlog/details/null_mutex.h>
+#include <spdlog/sinks/base_sink.h>
+
+#include <spdlog/details/windows_include.h>
+#include <winbase.h>
+
+#include <mutex>
+#include <string>
+#include <vector>
+
+namespace spdlog {
+namespace sinks {
+
+namespace win_eventlog {
+
+namespace internal {
+
+struct local_alloc_t {
+    HLOCAL hlocal_;
+
+    SPDLOG_CONSTEXPR local_alloc_t() SPDLOG_NOEXCEPT : hlocal_(nullptr) {}
+
+    local_alloc_t(local_alloc_t const &) = delete;
+    local_alloc_t &operator=(local_alloc_t const &) = delete;
+
+    ~local_alloc_t() SPDLOG_NOEXCEPT {
+        if (hlocal_) {
+            LocalFree(hlocal_);
+        }
+    }
+};
+
+/** Windows error */
+struct win32_error : public spdlog_ex {
+    /** Formats an error report line: "user-message: error-code (system message)" */
+    static std::string format(std::string const &user_message, DWORD error_code = GetLastError()) {
+        std::string system_message;
+
+        local_alloc_t format_message_result{};
+        auto format_message_succeeded =
+            ::FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
+                                 FORMAT_MESSAGE_IGNORE_INSERTS,
+                             nullptr, error_code, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
+                             (LPSTR)&format_message_result.hlocal_, 0, nullptr);
+
+        if (format_message_succeeded && format_message_result.hlocal_) {
+            system_message = fmt_lib::format(" ({})", (LPSTR)format_message_result.hlocal_);
+        }
+
+        return fmt_lib::format("{}: {}{}", user_message, error_code, system_message);
+    }
+
+    explicit win32_error(std::string const &func_name, DWORD error = GetLastError())
+        : spdlog_ex(format(func_name, error)) {}
+};
+
+/** Wrapper for security identifiers (SID) on Windows */
+struct sid_t {
+    std::vector<char> buffer_;
+
+public:
+    sid_t() {}
+
+    /** creates a wrapped SID copy */
+    static sid_t duplicate_sid(PSID psid) {
+        if (!::IsValidSid(psid)) {
+            throw_spdlog_ex("sid_t::sid_t(): invalid SID received");
+        }
+
+        auto const sid_length{::GetLengthSid(psid)};
+
+        sid_t result;
+        result.buffer_.resize(sid_length);
+        if (!::CopySid(sid_length, (PSID)result.as_sid(), psid)) {
+            SPDLOG_THROW(win32_error("CopySid"));
+        }
+
+        return result;
+    }
+
+    /** Retrieves pointer to the internal buffer contents as SID* */
+    SID *as_sid() const { return buffer_.empty() ? nullptr : (SID *)buffer_.data(); }
+
+    /** Get SID for the current user */
+    static sid_t get_current_user_sid() {
+        /* create and init RAII holder for process token */
+        struct process_token_t {
+            HANDLE token_handle_ = INVALID_HANDLE_VALUE;
+            explicit process_token_t(HANDLE process) {
+                if (!::OpenProcessToken(process, TOKEN_QUERY, &token_handle_)) {
+                    SPDLOG_THROW(win32_error("OpenProcessToken"));
+                }
+            }
+
+            ~process_token_t() { ::CloseHandle(token_handle_); }
+
+        } current_process_token(
+            ::GetCurrentProcess());  // GetCurrentProcess returns pseudohandle, no leak here!
+
+        // Get the required size, this is expected to fail with ERROR_INSUFFICIENT_BUFFER and return
+        // the token size
+        DWORD tusize = 0;
+        if (::GetTokenInformation(current_process_token.token_handle_, TokenUser, NULL, 0,
+                                  &tusize)) {
+            SPDLOG_THROW(win32_error("GetTokenInformation should fail"));
+        }
+
+        // get user token
+        std::vector<unsigned char> buffer(static_cast<size_t>(tusize));
+        if (!::GetTokenInformation(current_process_token.token_handle_, TokenUser,
+                                   (LPVOID)buffer.data(), tusize, &tusize)) {
+            SPDLOG_THROW(win32_error("GetTokenInformation"));
+        }
+
+        // create a wrapper of the SID data as stored in the user token
+        return sid_t::duplicate_sid(((TOKEN_USER *)buffer.data())->User.Sid);
+    }
+};
+
+struct eventlog {
+    static WORD get_event_type(details::log_msg const &msg) {
+        switch (msg.level) {
+            case level::trace:
+            case level::debug:
+                return EVENTLOG_SUCCESS;
+
+            case level::info:
+                return EVENTLOG_INFORMATION_TYPE;
+
+            case level::warn:
+                return EVENTLOG_WARNING_TYPE;
+
+            case level::err:
+            case level::critical:
+            case level::off:
+                return EVENTLOG_ERROR_TYPE;
+
+            default:
+                return EVENTLOG_INFORMATION_TYPE;
+        }
+    }
+
+    static WORD get_event_category(details::log_msg const &msg) { return (WORD)msg.level; }
+};
+
+}  // namespace internal
+
+/*
+ * Windows Event Log sink
+ */
+template <typename Mutex>
+class win_eventlog_sink : public base_sink<Mutex> {
+private:
+    HANDLE hEventLog_{NULL};
+    internal::sid_t current_user_sid_;
+    std::string source_;
+    DWORD event_id_;
+
+    HANDLE event_log_handle() {
+        if (!hEventLog_) {
+            hEventLog_ = ::RegisterEventSourceA(nullptr, source_.c_str());
+            if (!hEventLog_ || hEventLog_ == (HANDLE)ERROR_ACCESS_DENIED) {
+                SPDLOG_THROW(internal::win32_error("RegisterEventSource"));
+            }
+        }
+
+        return hEventLog_;
+    }
+
+protected:
+    void sink_it_(const details::log_msg &msg) override {
+        using namespace internal;
+
+        bool succeeded;
+        memory_buf_t formatted;
+        base_sink<Mutex>::formatter_->format(msg, formatted);
+        formatted.push_back('\0');
+
+#ifdef SPDLOG_WCHAR_TO_UTF8_SUPPORT
+        wmemory_buf_t buf;
+        details::os::utf8_to_wstrbuf(string_view_t(formatted.data(), formatted.size()), buf);
+
+        LPCWSTR lp_wstr = buf.data();
+        succeeded = static_cast<bool>(::ReportEventW(
+            event_log_handle(), eventlog::get_event_type(msg), eventlog::get_event_category(msg),
+            event_id_, current_user_sid_.as_sid(), 1, 0, &lp_wstr, nullptr));
+#else
+        LPCSTR lp_str = formatted.data();
+        succeeded = static_cast<bool>(::ReportEventA(
+            event_log_handle(), eventlog::get_event_type(msg), eventlog::get_event_category(msg),
+            event_id_, current_user_sid_.as_sid(), 1, 0, &lp_str, nullptr));
+#endif
+
+        if (!succeeded) {
+            SPDLOG_THROW(win32_error("ReportEvent"));
+        }
+    }
+
+    void flush_() override {}
+
+public:
+    win_eventlog_sink(std::string const &source,
+                      DWORD event_id = 1000 /* according to mscoree.dll */)
+        : source_(source),
+          event_id_(event_id) {
+        try {
+            current_user_sid_ = internal::sid_t::get_current_user_sid();
+        } catch (...) {
+            // get_current_user_sid() is unlikely to fail and if it does, we can still proceed
+            // without current_user_sid but in the event log the record will have no user name
+        }
+    }
+
+    ~win_eventlog_sink() {
+        if (hEventLog_) DeregisterEventSource(hEventLog_);
+    }
+};
+
+}  // namespace win_eventlog
+
+using win_eventlog_sink_mt = win_eventlog::win_eventlog_sink<std::mutex>;
+using win_eventlog_sink_st = win_eventlog::win_eventlog_sink<details::null_mutex>;
+
+}  // namespace sinks
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/wincolor_sink-inl.h b/csrc/vnpu_offload/include/spdlog/sinks/wincolor_sink-inl.h
new file mode 100644
index 00000000..910115c5
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/wincolor_sink-inl.h
@@ -0,0 +1,172 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifndef SPDLOG_HEADER_ONLY
+#include <spdlog/sinks/wincolor_sink.h>
+#endif
+
+#include <spdlog/details/windows_include.h>
+#include <wincon.h>
+
+#include <spdlog/common.h>
+#include <spdlog/pattern_formatter.h>
+
+namespace spdlog {
+namespace sinks {
+template <typename ConsoleMutex>
+SPDLOG_INLINE wincolor_sink<ConsoleMutex>::wincolor_sink(void *out_handle, color_mode mode)
+    : out_handle_(out_handle),
+      mutex_(ConsoleMutex::mutex()),
+      formatter_(details::make_unique<spdlog::pattern_formatter>()) {
+    set_color_mode_impl(mode);
+    // set level colors
+    colors_[level::trace] = FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE;  // white
+    colors_[level::debug] = FOREGROUND_GREEN | FOREGROUND_BLUE;                   // cyan
+    colors_[level::info] = FOREGROUND_GREEN;                                      // green
+    colors_[level::warn] =
+        FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_INTENSITY;  // intense yellow
+    colors_[level::err] = FOREGROUND_RED | FOREGROUND_INTENSITY;   // intense red
+    colors_[level::critical] = BACKGROUND_RED | FOREGROUND_RED | FOREGROUND_GREEN |
+                               FOREGROUND_BLUE |
+                               FOREGROUND_INTENSITY;  // intense white on red background
+    colors_[level::off] = 0;
+}
+
+template <typename ConsoleMutex>
+SPDLOG_INLINE wincolor_sink<ConsoleMutex>::~wincolor_sink() {
+    this->flush();
+}
+
+// change the color for the given level
+template <typename ConsoleMutex>
+void SPDLOG_INLINE wincolor_sink<ConsoleMutex>::set_color(level::level_enum level,
+                                                          std::uint16_t color) {
+    std::lock_guard<mutex_t> lock(mutex_);
+    colors_[static_cast<size_t>(level)] = color;
+}
+
+template <typename ConsoleMutex>
+void SPDLOG_INLINE wincolor_sink<ConsoleMutex>::log(const details::log_msg &msg) {
+    if (out_handle_ == nullptr || out_handle_ == INVALID_HANDLE_VALUE) {
+        return;
+    }
+
+    std::lock_guard<mutex_t> lock(mutex_);
+    msg.color_range_start = 0;
+    msg.color_range_end = 0;
+    memory_buf_t formatted;
+    formatter_->format(msg, formatted);
+    if (should_do_colors_ && msg.color_range_end > msg.color_range_start) {
+        // before color range
+        print_range_(formatted, 0, msg.color_range_start);
+        // in color range
+        auto orig_attribs =
+            static_cast<WORD>(set_foreground_color_(colors_[static_cast<size_t>(msg.level)]));
+        print_range_(formatted, msg.color_range_start, msg.color_range_end);
+        // reset to orig colors
+        ::SetConsoleTextAttribute(static_cast<HANDLE>(out_handle_), orig_attribs);
+        print_range_(formatted, msg.color_range_end, formatted.size());
+    } else  // print without colors if color range is invalid (or color is disabled)
+    {
+        write_to_file_(formatted);
+    }
+}
+
+template <typename ConsoleMutex>
+void SPDLOG_INLINE wincolor_sink<ConsoleMutex>::flush() {
+    // windows console always flushed?
+}
+
+template <typename ConsoleMutex>
+void SPDLOG_INLINE wincolor_sink<ConsoleMutex>::set_pattern(const std::string &pattern) {
+    std::lock_guard<mutex_t> lock(mutex_);
+    formatter_ = std::unique_ptr<spdlog::formatter>(new pattern_formatter(pattern));
+}
+
+template <typename ConsoleMutex>
+void SPDLOG_INLINE
+wincolor_sink<ConsoleMutex>::set_formatter(std::unique_ptr<spdlog::formatter> sink_formatter) {
+    std::lock_guard<mutex_t> lock(mutex_);
+    formatter_ = std::move(sink_formatter);
+}
+
+template <typename ConsoleMutex>
+void SPDLOG_INLINE wincolor_sink<ConsoleMutex>::set_color_mode(color_mode mode) {
+    std::lock_guard<mutex_t> lock(mutex_);
+    set_color_mode_impl(mode);
+}
+
+template <typename ConsoleMutex>
+void SPDLOG_INLINE wincolor_sink<ConsoleMutex>::set_color_mode_impl(color_mode mode) {
+    if (mode == color_mode::automatic) {
+        // should do colors only if out_handle_  points to actual console.
+        DWORD console_mode;
+        bool in_console = ::GetConsoleMode(static_cast<HANDLE>(out_handle_), &console_mode) != 0;
+        should_do_colors_ = in_console;
+    } else {
+        should_do_colors_ = mode == color_mode::always ? true : false;
+    }
+}
+
+// set foreground color and return the orig console attributes (for resetting later)
+template <typename ConsoleMutex>
+std::uint16_t SPDLOG_INLINE
+wincolor_sink<ConsoleMutex>::set_foreground_color_(std::uint16_t attribs) {
+    CONSOLE_SCREEN_BUFFER_INFO orig_buffer_info;
+    if (!::GetConsoleScreenBufferInfo(static_cast<HANDLE>(out_handle_), &orig_buffer_info)) {
+        // just return white if failed getting console info
+        return FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE;
+    }
+
+    // change only the foreground bits (lowest 4 bits)
+    auto new_attribs = static_cast<WORD>(attribs) | (orig_buffer_info.wAttributes & 0xfff0);
+    auto ignored =
+        ::SetConsoleTextAttribute(static_cast<HANDLE>(out_handle_), static_cast<WORD>(new_attribs));
+    (void)(ignored);
+    return static_cast<std::uint16_t>(orig_buffer_info.wAttributes);  // return orig attribs
+}
+
+// print a range of formatted message to console
+template <typename ConsoleMutex>
+void SPDLOG_INLINE wincolor_sink<ConsoleMutex>::print_range_(const memory_buf_t &formatted,
+                                                             size_t start,
+                                                             size_t end) {
+    if (end > start) {
+#if defined(SPDLOG_UTF8_TO_WCHAR_CONSOLE)
+        wmemory_buf_t wformatted;
+        details::os::utf8_to_wstrbuf(string_view_t(formatted.data() + start, end - start),
+                                     wformatted);
+        auto size = static_cast<DWORD>(wformatted.size());
+        auto ignored = ::WriteConsoleW(static_cast<HANDLE>(out_handle_), wformatted.data(), size,
+                                       nullptr, nullptr);
+#else
+        auto size = static_cast<DWORD>(end - start);
+        auto ignored = ::WriteConsoleA(static_cast<HANDLE>(out_handle_), formatted.data() + start,
+                                       size, nullptr, nullptr);
+#endif
+        (void)(ignored);
+    }
+}
+
+template <typename ConsoleMutex>
+void SPDLOG_INLINE wincolor_sink<ConsoleMutex>::write_to_file_(const memory_buf_t &formatted) {
+    auto size = static_cast<DWORD>(formatted.size());
+    DWORD bytes_written = 0;
+    auto ignored = ::WriteFile(static_cast<HANDLE>(out_handle_), formatted.data(), size,
+                               &bytes_written, nullptr);
+    (void)(ignored);
+}
+
+// wincolor_stdout_sink
+template <typename ConsoleMutex>
+SPDLOG_INLINE wincolor_stdout_sink<ConsoleMutex>::wincolor_stdout_sink(color_mode mode)
+    : wincolor_sink<ConsoleMutex>(::GetStdHandle(STD_OUTPUT_HANDLE), mode) {}
+
+// wincolor_stderr_sink
+template <typename ConsoleMutex>
+SPDLOG_INLINE wincolor_stderr_sink<ConsoleMutex>::wincolor_stderr_sink(color_mode mode)
+    : wincolor_sink<ConsoleMutex>(::GetStdHandle(STD_ERROR_HANDLE), mode) {}
+}  // namespace sinks
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/sinks/wincolor_sink.h b/csrc/vnpu_offload/include/spdlog/sinks/wincolor_sink.h
new file mode 100644
index 00000000..743241c8
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/sinks/wincolor_sink.h
@@ -0,0 +1,82 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include <spdlog/common.h>
+#include <spdlog/details/console_globals.h>
+#include <spdlog/details/null_mutex.h>
+#include <spdlog/sinks/sink.h>
+
+#include <array>
+#include <cstdint>
+#include <memory>
+#include <mutex>
+#include <string>
+
+namespace spdlog {
+namespace sinks {
+/*
+ * Windows color console sink. Uses WriteConsoleA to write to the console with
+ * colors
+ */
+template <typename ConsoleMutex>
+class wincolor_sink : public sink {
+public:
+    wincolor_sink(void *out_handle, color_mode mode);
+    ~wincolor_sink() override;
+
+    wincolor_sink(const wincolor_sink &other) = delete;
+    wincolor_sink &operator=(const wincolor_sink &other) = delete;
+
+    // change the color for the given level
+    void set_color(level::level_enum level, std::uint16_t color);
+    void log(const details::log_msg &msg) override;
+    void flush() override;
+    void set_pattern(const std::string &pattern) override;
+    void set_formatter(std::unique_ptr<spdlog::formatter> sink_formatter) override;
+    void set_color_mode(color_mode mode);
+
+protected:
+    using mutex_t = typename ConsoleMutex::mutex_t;
+    void *out_handle_;
+    mutex_t &mutex_;
+    bool should_do_colors_;
+    std::unique_ptr<spdlog::formatter> formatter_;
+    std::array<std::uint16_t, level::n_levels> colors_;
+
+    // set foreground color and return the orig console attributes (for resetting later)
+    std::uint16_t set_foreground_color_(std::uint16_t attribs);
+
+    // print a range of formatted message to console
+    void print_range_(const memory_buf_t &formatted, size_t start, size_t end);
+
+    // in case we are redirected to file (not in console mode)
+    void write_to_file_(const memory_buf_t &formatted);
+
+    void set_color_mode_impl(color_mode mode);
+};
+
+template <typename ConsoleMutex>
+class wincolor_stdout_sink : public wincolor_sink<ConsoleMutex> {
+public:
+    explicit wincolor_stdout_sink(color_mode mode = color_mode::automatic);
+};
+
+template <typename ConsoleMutex>
+class wincolor_stderr_sink : public wincolor_sink<ConsoleMutex> {
+public:
+    explicit wincolor_stderr_sink(color_mode mode = color_mode::automatic);
+};
+
+using wincolor_stdout_sink_mt = wincolor_stdout_sink<details::console_mutex>;
+using wincolor_stdout_sink_st = wincolor_stdout_sink<details::console_nullmutex>;
+
+using wincolor_stderr_sink_mt = wincolor_stderr_sink<details::console_mutex>;
+using wincolor_stderr_sink_st = wincolor_stderr_sink<details::console_nullmutex>;
+}  // namespace sinks
+}  // namespace spdlog
+
+#ifdef SPDLOG_HEADER_ONLY
+#include "wincolor_sink-inl.h"
+#endif
diff --git a/csrc/vnpu_offload/include/spdlog/spdlog-inl.h b/csrc/vnpu_offload/include/spdlog/spdlog-inl.h
new file mode 100644
index 00000000..56f19168
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/spdlog-inl.h
@@ -0,0 +1,96 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#ifndef SPDLOG_HEADER_ONLY
+#include <spdlog/spdlog.h>
+#endif
+
+#include <spdlog/common.h>
+#include <spdlog/pattern_formatter.h>
+
+namespace spdlog {
+
+SPDLOG_INLINE void initialize_logger(std::shared_ptr<logger> logger) {
+    details::registry::instance().initialize_logger(std::move(logger));
+}
+
+SPDLOG_INLINE std::shared_ptr<logger> get(const std::string &name) {
+    return details::registry::instance().get(name);
+}
+
+SPDLOG_INLINE void set_formatter(std::unique_ptr<spdlog::formatter> formatter) {
+    details::registry::instance().set_formatter(std::move(formatter));
+}
+
+SPDLOG_INLINE void set_pattern(std::string pattern, pattern_time_type time_type) {
+    set_formatter(
+        std::unique_ptr<spdlog::formatter>(new pattern_formatter(std::move(pattern), time_type)));
+}
+
+SPDLOG_INLINE void enable_backtrace(size_t n_messages) {
+    details::registry::instance().enable_backtrace(n_messages);
+}
+
+SPDLOG_INLINE void disable_backtrace() { details::registry::instance().disable_backtrace(); }
+
+SPDLOG_INLINE void dump_backtrace() { default_logger_raw()->dump_backtrace(); }
+
+SPDLOG_INLINE level::level_enum get_level() { return default_logger_raw()->level(); }
+
+SPDLOG_INLINE bool should_log(level::level_enum log_level) {
+    return default_logger_raw()->should_log(log_level);
+}
+
+SPDLOG_INLINE void set_level(level::level_enum log_level) {
+    details::registry::instance().set_level(log_level);
+}
+
+SPDLOG_INLINE void flush_on(level::level_enum log_level) {
+    details::registry::instance().flush_on(log_level);
+}
+
+SPDLOG_INLINE void set_error_handler(void (*handler)(const std::string &msg)) {
+    details::registry::instance().set_error_handler(handler);
+}
+
+SPDLOG_INLINE void register_logger(std::shared_ptr<logger> logger) {
+    details::registry::instance().register_logger(std::move(logger));
+}
+
+SPDLOG_INLINE void register_or_replace(std::shared_ptr<logger> logger) {
+    details::registry::instance().register_or_replace(std::move(logger));
+}
+
+SPDLOG_INLINE void apply_all(const std::function<void(std::shared_ptr<logger>)> &fun) {
+    details::registry::instance().apply_all(fun);
+}
+
+SPDLOG_INLINE void drop(const std::string &name) { details::registry::instance().drop(name); }
+
+SPDLOG_INLINE void drop_all() { details::registry::instance().drop_all(); }
+
+SPDLOG_INLINE void shutdown() { details::registry::instance().shutdown(); }
+
+SPDLOG_INLINE void set_automatic_registration(bool automatic_registration) {
+    details::registry::instance().set_automatic_registration(automatic_registration);
+}
+
+SPDLOG_INLINE std::shared_ptr<spdlog::logger> default_logger() {
+    return details::registry::instance().default_logger();
+}
+
+SPDLOG_INLINE spdlog::logger *default_logger_raw() {
+    return details::registry::instance().get_default_raw();
+}
+
+SPDLOG_INLINE void set_default_logger(std::shared_ptr<spdlog::logger> default_logger) {
+    details::registry::instance().set_default_logger(std::move(default_logger));
+}
+
+SPDLOG_INLINE void apply_logger_env_levels(std::shared_ptr<logger> logger) {
+    details::registry::instance().apply_logger_env_levels(std::move(logger));
+}
+
+}  // namespace spdlog
diff --git a/csrc/vnpu_offload/include/spdlog/spdlog.h b/csrc/vnpu_offload/include/spdlog/spdlog.h
new file mode 100644
index 00000000..681045e1
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/spdlog.h
@@ -0,0 +1,354 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+// spdlog main header file.
+// see example.cpp for usage example
+
+#ifndef SPDLOG_H
+#define SPDLOG_H
+
+#pragma once
+
+#include <spdlog/common.h>
+#include <spdlog/details/registry.h>
+#include <spdlog/details/synchronous_factory.h>
+#include <spdlog/logger.h>
+#include <spdlog/version.h>
+
+#include <chrono>
+#include <functional>
+#include <memory>
+#include <string>
+
+namespace spdlog {
+
+using default_factory = synchronous_factory;
+
+// Create and register a logger with a templated sink type
+// The logger's level, formatter and flush level will be set according to the
+// global settings.
+//
+// Example:
+//   spdlog::create<daily_file_sink_st>("logger_name", "dailylog_filename", 11, 59);
+template <typename Sink, typename... SinkArgs>
+inline std::shared_ptr<spdlog::logger> create(std::string logger_name, SinkArgs &&...sink_args) {
+    return default_factory::create<Sink>(std::move(logger_name),
+                                         std::forward<SinkArgs>(sink_args)...);
+}
+
+// Initialize and register a logger,
+// formatter and flush level will be set according the global settings.
+//
+// Useful for initializing manually created loggers with the global settings.
+//
+// Example:
+//   auto mylogger = std::make_shared<spdlog::logger>("mylogger", ...);
+//   spdlog::initialize_logger(mylogger);
+SPDLOG_API void initialize_logger(std::shared_ptr<logger> logger);
+
+// Return an existing logger or nullptr if a logger with such a name doesn't
+// exist.
+// example: spdlog::get("my_logger")->info("hello {}", "world");
+SPDLOG_API std::shared_ptr<logger> get(const std::string &name);
+
+// Set global formatter. Each sink in each logger will get a clone of this object
+SPDLOG_API void set_formatter(std::unique_ptr<spdlog::formatter> formatter);
+
+// Set global format string.
+// example: spdlog::set_pattern("%Y-%m-%d %H:%M:%S.%e %l : %v");
+SPDLOG_API void set_pattern(std::string pattern,
+                            pattern_time_type time_type = pattern_time_type::local);
+
+// enable global backtrace support
+SPDLOG_API void enable_backtrace(size_t n_messages);
+
+// disable global backtrace support
+SPDLOG_API void disable_backtrace();
+
+// call dump backtrace on default logger
+SPDLOG_API void dump_backtrace();
+
+// Get global logging level
+SPDLOG_API level::level_enum get_level();
+
+// Set the global logging level
+SPDLOG_API void set_level(level::level_enum log_level);
+
+// Determine whether the default logger should log messages with a certain level
+SPDLOG_API bool should_log(level::level_enum lvl);
+
+// Set a global flush level
+SPDLOG_API void flush_on(level::level_enum log_level);
+
+// Start/Restart a periodic flusher thread
+// Warning: Use only if all your loggers are thread safe!
+template <typename Rep, typename Period>
+inline void flush_every(std::chrono::duration<Rep, Period> interval) {
+    details::registry::instance().flush_every(interval);
+}
+
+// Set global error handler
+SPDLOG_API void set_error_handler(void (*handler)(const std::string &msg));
+
+// Register the given logger with the given name
+// Will throw if a logger with the same name already exists.
+SPDLOG_API void register_logger(std::shared_ptr<logger> logger);
+
+// Register the given logger with the given name
+// Will replace any existing logger with the same name.
+SPDLOG_API void register_or_replace(std::shared_ptr<logger> logger);
+
+// Apply a user-defined function on all registered loggers
+// Example:
+// spdlog::apply_all([&](std::shared_ptr<spdlog::logger> l) {l->flush();});
+SPDLOG_API void apply_all(const std::function<void(std::shared_ptr<logger>)> &fun);
+
+// Drop the reference to the given logger
+SPDLOG_API void drop(const std::string &name);
+
+// Drop all references from the registry
+SPDLOG_API void drop_all();
+
+// stop any running threads started by spdlog and clean registry loggers
+SPDLOG_API void shutdown();
+
+// Automatic registration of loggers when using spdlog::create() or spdlog::create_async
+SPDLOG_API void set_automatic_registration(bool automatic_registration);
+
+// API for using default logger (stdout_color_mt),
+// e.g.: spdlog::info("Message {}", 1);
+//
+// The default logger object can be accessed using the spdlog::default_logger():
+// For example, to add another sink to it:
+// spdlog::default_logger()->sinks().push_back(some_sink);
+//
+// The default logger can be replaced using spdlog::set_default_logger(new_logger).
+// For example, to replace it with a file logger.
+//
+// IMPORTANT:
+// The default API is thread safe (for _mt loggers), but:
+// set_default_logger() *should not* be used concurrently with the default API.
+// e.g., do not call set_default_logger() from one thread while calling spdlog::info() from another.
+
+SPDLOG_API std::shared_ptr<spdlog::logger> default_logger();
+
+SPDLOG_API spdlog::logger *default_logger_raw();
+
+SPDLOG_API void set_default_logger(std::shared_ptr<spdlog::logger> default_logger);
+
+// Initialize logger level based on environment configs.
+//
+// Useful for applying SPDLOG_LEVEL to manually created loggers.
+//
+// Example:
+//   auto mylogger = std::make_shared<spdlog::logger>("mylogger", ...);
+//   spdlog::apply_logger_env_levels(mylogger);
+SPDLOG_API void apply_logger_env_levels(std::shared_ptr<logger> logger);
+
+template <typename... Args>
+inline void log(source_loc source,
+                level::level_enum lvl,
+                format_string_t<Args...> fmt,
+                Args &&...args) {
+    default_logger_raw()->log(source, lvl, fmt, std::forward<Args>(args)...);
+}
+
+template <typename... Args>
+inline void log(level::level_enum lvl, format_string_t<Args...> fmt, Args &&...args) {
+    default_logger_raw()->log(source_loc{}, lvl, fmt, std::forward<Args>(args)...);
+}
+
+template <typename... Args>
+inline void trace(format_string_t<Args...> fmt, Args &&...args) {
+    default_logger_raw()->trace(fmt, std::forward<Args>(args)...);
+}
+
+template <typename... Args>
+inline void debug(format_string_t<Args...> fmt, Args &&...args) {
+    default_logger_raw()->debug(fmt, std::forward<Args>(args)...);
+}
+
+template <typename... Args>
+inline void info(format_string_t<Args...> fmt, Args &&...args) {
+    default_logger_raw()->info(fmt, std::forward<Args>(args)...);
+}
+
+template <typename... Args>
+inline void warn(format_string_t<Args...> fmt, Args &&...args) {
+    default_logger_raw()->warn(fmt, std::forward<Args>(args)...);
+}
+
+template <typename... Args>
+inline void error(format_string_t<Args...> fmt, Args &&...args) {
+    default_logger_raw()->error(fmt, std::forward<Args>(args)...);
+}
+
+template <typename... Args>
+inline void critical(format_string_t<Args...> fmt, Args &&...args) {
+    default_logger_raw()->critical(fmt, std::forward<Args>(args)...);
+}
+
+template <typename T>
+inline void log(source_loc source, level::level_enum lvl, const T &msg) {
+    default_logger_raw()->log(source, lvl, msg);
+}
+
+template <typename T>
+inline void log(level::level_enum lvl, const T &msg) {
+    default_logger_raw()->log(lvl, msg);
+}
+
+#ifdef SPDLOG_WCHAR_TO_UTF8_SUPPORT
+template <typename... Args>
+inline void log(source_loc source,
+                level::level_enum lvl,
+                wformat_string_t<Args...> fmt,
+                Args &&...args) {
+    default_logger_raw()->log(source, lvl, fmt, std::forward<Args>(args)...);
+}
+
+template <typename... Args>
+inline void log(level::level_enum lvl, wformat_string_t<Args...> fmt, Args &&...args) {
+    default_logger_raw()->log(source_loc{}, lvl, fmt, std::forward<Args>(args)...);
+}
+
+template <typename... Args>
+inline void trace(wformat_string_t<Args...> fmt, Args &&...args) {
+    default_logger_raw()->trace(fmt, std::forward<Args>(args)...);
+}
+
+template <typename... Args>
+inline void debug(wformat_string_t<Args...> fmt, Args &&...args) {
+    default_logger_raw()->debug(fmt, std::forward<Args>(args)...);
+}
+
+template <typename... Args>
+inline void info(wformat_string_t<Args...> fmt, Args &&...args) {
+    default_logger_raw()->info(fmt, std::forward<Args>(args)...);
+}
+
+template <typename... Args>
+inline void warn(wformat_string_t<Args...> fmt, Args &&...args) {
+    default_logger_raw()->warn(fmt, std::forward<Args>(args)...);
+}
+
+template <typename... Args>
+inline void error(wformat_string_t<Args...> fmt, Args &&...args) {
+    default_logger_raw()->error(fmt, std::forward<Args>(args)...);
+}
+
+template <typename... Args>
+inline void critical(wformat_string_t<Args...> fmt, Args &&...args) {
+    default_logger_raw()->critical(fmt, std::forward<Args>(args)...);
+}
+#endif
+
+template <typename T>
+inline void trace(const T &msg) {
+    default_logger_raw()->trace(msg);
+}
+
+template <typename T>
+inline void debug(const T &msg) {
+    default_logger_raw()->debug(msg);
+}
+
+template <typename T>
+inline void info(const T &msg) {
+    default_logger_raw()->info(msg);
+}
+
+template <typename T>
+inline void warn(const T &msg) {
+    default_logger_raw()->warn(msg);
+}
+
+template <typename T>
+inline void error(const T &msg) {
+    default_logger_raw()->error(msg);
+}
+
+template <typename T>
+inline void critical(const T &msg) {
+    default_logger_raw()->critical(msg);
+}
+
+}  // namespace spdlog
+
+//
+// enable/disable log calls at compile time according to global level.
+//
+// define SPDLOG_ACTIVE_LEVEL to one of those (before including spdlog.h):
+// SPDLOG_LEVEL_TRACE,
+// SPDLOG_LEVEL_DEBUG,
+// SPDLOG_LEVEL_INFO,
+// SPDLOG_LEVEL_WARN,
+// SPDLOG_LEVEL_ERROR,
+// SPDLOG_LEVEL_CRITICAL,
+// SPDLOG_LEVEL_OFF
+//
+
+#ifndef SPDLOG_NO_SOURCE_LOC
+#define SPDLOG_LOGGER_CALL(logger, level, ...) \
+    (logger)->log(spdlog::source_loc{__FILE__, __LINE__, SPDLOG_FUNCTION}, level, __VA_ARGS__)
+#else
+#define SPDLOG_LOGGER_CALL(logger, level, ...) \
+    (logger)->log(spdlog::source_loc{}, level, __VA_ARGS__)
+#endif
+
+#if SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_TRACE
+#define SPDLOG_LOGGER_TRACE(logger, ...) \
+    SPDLOG_LOGGER_CALL(logger, spdlog::level::trace, __VA_ARGS__)
+#define SPDLOG_TRACE(...) SPDLOG_LOGGER_TRACE(spdlog::default_logger_raw(), __VA_ARGS__)
+#else
+#define SPDLOG_LOGGER_TRACE(logger, ...) (void)0
+#define SPDLOG_TRACE(...) (void)0
+#endif
+
+#if SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_DEBUG
+#define SPDLOG_LOGGER_DEBUG(logger, ...) \
+    SPDLOG_LOGGER_CALL(logger, spdlog::level::debug, __VA_ARGS__)
+#define SPDLOG_DEBUG(...) SPDLOG_LOGGER_DEBUG(spdlog::default_logger_raw(), __VA_ARGS__)
+#else
+#define SPDLOG_LOGGER_DEBUG(logger, ...) (void)0
+#define SPDLOG_DEBUG(...) (void)0
+#endif
+
+#if SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_INFO
+#define SPDLOG_LOGGER_INFO(logger, ...) SPDLOG_LOGGER_CALL(logger, spdlog::level::info, __VA_ARGS__)
+#define SPDLOG_INFO(...) SPDLOG_LOGGER_INFO(spdlog::default_logger_raw(), __VA_ARGS__)
+#else
+#define SPDLOG_LOGGER_INFO(logger, ...) (void)0
+#define SPDLOG_INFO(...) (void)0
+#endif
+
+#if SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_WARN
+#define SPDLOG_LOGGER_WARN(logger, ...) SPDLOG_LOGGER_CALL(logger, spdlog::level::warn, __VA_ARGS__)
+#define SPDLOG_WARN(...) SPDLOG_LOGGER_WARN(spdlog::default_logger_raw(), __VA_ARGS__)
+#else
+#define SPDLOG_LOGGER_WARN(logger, ...) (void)0
+#define SPDLOG_WARN(...) (void)0
+#endif
+
+#if SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_ERROR
+#define SPDLOG_LOGGER_ERROR(logger, ...) SPDLOG_LOGGER_CALL(logger, spdlog::level::err, __VA_ARGS__)
+#define SPDLOG_ERROR(...) SPDLOG_LOGGER_ERROR(spdlog::default_logger_raw(), __VA_ARGS__)
+#else
+#define SPDLOG_LOGGER_ERROR(logger, ...) (void)0
+#define SPDLOG_ERROR(...) (void)0
+#endif
+
+#if SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_CRITICAL
+#define SPDLOG_LOGGER_CRITICAL(logger, ...) \
+    SPDLOG_LOGGER_CALL(logger, spdlog::level::critical, __VA_ARGS__)
+#define SPDLOG_CRITICAL(...) SPDLOG_LOGGER_CRITICAL(spdlog::default_logger_raw(), __VA_ARGS__)
+#else
+#define SPDLOG_LOGGER_CRITICAL(logger, ...) (void)0
+#define SPDLOG_CRITICAL(...) (void)0
+#endif
+
+#ifdef SPDLOG_HEADER_ONLY
+#include "spdlog-inl.h"
+#endif
+
+#endif  // SPDLOG_H
diff --git a/csrc/vnpu_offload/include/spdlog/stopwatch.h b/csrc/vnpu_offload/include/spdlog/stopwatch.h
new file mode 100644
index 00000000..54ab3d3b
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/stopwatch.h
@@ -0,0 +1,66 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include <chrono>
+#include <spdlog/fmt/fmt.h>
+
+// Stopwatch support for spdlog  (using std::chrono::steady_clock).
+// Displays elapsed seconds since construction as double.
+//
+// Usage:
+//
+// spdlog::stopwatch sw;
+// ...
+// spdlog::debug("Elapsed: {} seconds", sw);    =>  "Elapsed 0.005116733 seconds"
+// spdlog::info("Elapsed: {:.6} seconds", sw);  =>  "Elapsed 0.005163 seconds"
+//
+//
+// If other units are needed (e.g. millis instead of double), include "fmt/chrono.h" and use
+// "duration_cast<..>(sw.elapsed())":
+//
+// #include <spdlog/fmt/chrono.h>
+//..
+// using std::chrono::duration_cast;
+// using std::chrono::milliseconds;
+// spdlog::info("Elapsed {}", duration_cast<milliseconds>(sw.elapsed())); => "Elapsed 5ms"
+
+namespace spdlog {
+class stopwatch {
+    using clock = std::chrono::steady_clock;
+    std::chrono::time_point<clock> start_tp_;
+
+public:
+    stopwatch()
+        : start_tp_{clock::now()} {}
+
+    std::chrono::duration<double> elapsed() const {
+        return std::chrono::duration<double>(clock::now() - start_tp_);
+    }
+
+    std::chrono::milliseconds elapsed_ms() const {
+        return std::chrono::duration_cast<std::chrono::milliseconds>(clock::now() - start_tp_);
+    }
+
+    void reset() { start_tp_ = clock::now(); }
+};
+}  // namespace spdlog
+
+// Support for fmt formatting  (e.g. "{:012.9}" or just "{}")
+namespace
+#ifdef SPDLOG_USE_STD_FORMAT
+    std
+#else
+    fmt
+#endif
+{
+
+template <>
+struct formatter<spdlog::stopwatch> : formatter<double> {
+    template <typename FormatContext>
+    auto format(const spdlog::stopwatch &sw, FormatContext &ctx) const -> decltype(ctx.out()) {
+        return formatter<double>::format(sw.elapsed().count(), ctx);
+    }
+};
+}  // namespace std
diff --git a/csrc/vnpu_offload/include/spdlog/tweakme.h b/csrc/vnpu_offload/include/spdlog/tweakme.h
new file mode 100644
index 00000000..d6092987
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/tweakme.h
@@ -0,0 +1,148 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// Edit this file to squeeze more performance, and to customize supported
+// features
+//
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// Under Linux, the much faster CLOCK_REALTIME_COARSE clock can be used.
+// This clock is less accurate - can be off by dozens of millis - depending on
+// the kernel HZ.
+// Uncomment to use it instead of the regular clock.
+//
+// #define SPDLOG_CLOCK_COARSE
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// Uncomment if source location logging is not needed.
+// This will prevent spdlog from using __FILE__, __LINE__ and SPDLOG_FUNCTION
+//
+// #define SPDLOG_NO_SOURCE_LOC
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// Uncomment if thread id logging is not needed (i.e. no %t in the log pattern).
+// This will prevent spdlog from querying the thread id on each log call.
+//
+// WARNING: If the log pattern contains thread id (i.e, %t) while this flag is
+// on, zero will be logged as thread id.
+//
+// #define SPDLOG_NO_THREAD_ID
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// Uncomment to prevent spdlog from using thread local storage.
+//
+// WARNING: if your program forks, UNCOMMENT this flag to prevent undefined
+// thread ids in the children logs.
+//
+// #define SPDLOG_NO_TLS
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// Uncomment to avoid spdlog's usage of atomic log levels
+// Use only if your code never modifies a logger's log levels concurrently by
+// different threads.
+//
+// #define SPDLOG_NO_ATOMIC_LEVELS
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// Uncomment to enable usage of wchar_t for file names on Windows.
+//
+// #define SPDLOG_WCHAR_FILENAMES
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// Uncomment to override default eol ("\n" or "\r\n" under Linux/Windows)
+//
+// #define SPDLOG_EOL ";-)\n"
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// Uncomment to override default folder separators ("/" or "\\/" under
+// Linux/Windows). Each character in the string is treated as a different
+// separator.
+//
+// #define SPDLOG_FOLDER_SEPS "\\"
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// Uncomment to use your own copy of the fmt library instead of spdlog's copy.
+// In this case spdlog will try to include <fmt/format.h> so set your -I flag
+// accordingly.
+//
+// #define SPDLOG_FMT_EXTERNAL
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// Uncomment to use C++20 std::format instead of fmt.
+//
+// #define SPDLOG_USE_STD_FORMAT
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// Uncomment to enable wchar_t support (convert to utf8)
+//
+// #define SPDLOG_WCHAR_TO_UTF8_SUPPORT
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// Uncomment to prevent child processes from inheriting log file descriptors
+//
+// #define SPDLOG_PREVENT_CHILD_FD
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// Uncomment to customize level names (e.g. "MY TRACE")
+//
+// #define SPDLOG_LEVEL_NAMES { "MY TRACE", "MY DEBUG", "MY INFO", "MY WARNING", "MY ERROR", "MY
+// CRITICAL", "OFF" }
+//
+// For C++17 use string_view_literals:
+//
+// #include <string_view>
+// using namespace std::string_view_literals;
+// #define SPDLOG_LEVEL_NAMES { "MY TRACE"sv, "MY DEBUG"sv, "MY INFO"sv, "MY WARNING"sv, "MY
+// ERROR"sv, "MY CRITICAL"sv, "OFF"sv }
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// Uncomment to customize short level names (e.g. "MT")
+// These can be longer than one character.
+//
+// #define SPDLOG_SHORT_LEVEL_NAMES { "T", "D", "I", "W", "E", "C", "O" }
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// Uncomment to disable default logger creation.
+// This might save some (very) small initialization time if no default logger is needed.
+//
+// #define SPDLOG_DISABLE_DEFAULT_LOGGER
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// Uncomment and set to compile time level with zero cost (default is INFO).
+// Macros like SPDLOG_DEBUG(..), SPDLOG_INFO(..)  will expand to empty statements if not enabled
+//
+// #define SPDLOG_ACTIVE_LEVEL SPDLOG_LEVEL_INFO
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// Uncomment (and change if desired) macro to use for function names.
+// This is compiler dependent.
+// __PRETTY_FUNCTION__ might be nicer in clang/gcc, and __FUNCTION__ in msvc.
+// Defaults to __FUNCTION__ (should work on all compilers) if not defined.
+//
+// #ifdef __PRETTY_FUNCTION__
+// # define SPDLOG_FUNCTION __PRETTY_FUNCTION__
+// #else
+// # define SPDLOG_FUNCTION __FUNCTION__
+// #endif
+///////////////////////////////////////////////////////////////////////////////
diff --git a/csrc/vnpu_offload/include/spdlog/version.h b/csrc/vnpu_offload/include/spdlog/version.h
new file mode 100644
index 00000000..69ff2571
--- /dev/null
+++ b/csrc/vnpu_offload/include/spdlog/version.h
@@ -0,0 +1,11 @@
+// Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
+// Distributed under the MIT License (http://opensource.org/licenses/MIT)
+
+#pragma once
+
+#define SPDLOG_VER_MAJOR 1
+#define SPDLOG_VER_MINOR 16
+#define SPDLOG_VER_PATCH 0
+
+#define SPDLOG_TO_VERSION(major, minor, patch) (major * 10000 + minor * 100 + patch)
+#define SPDLOG_VERSION SPDLOG_TO_VERSION(SPDLOG_VER_MAJOR, SPDLOG_VER_MINOR, SPDLOG_VER_PATCH)
diff --git a/csrc/vnpu_offload/npu_helper.h b/csrc/vnpu_offload/npu_helper.h
new file mode 100644
index 00000000..a61eaea1
--- /dev/null
+++ b/csrc/vnpu_offload/npu_helper.h
@@ -0,0 +1,86 @@
+#include <vector>
+#include <string>
+#include <stdint.h>
+#include <filesystem>
+#include <algorithm>
+
+#include "spdlog/spdlog.h"
+
+#include "acl/acl.h"
+
+static inline std::vector<int> get_available_devices() {
+  namespace fs = std::filesystem;
+
+  std::vector<int> devices;
+  const std::string dev_path = "/dev";
+  const std::string prefix = "davinci";
+
+  if (!fs::exists(dev_path)) {
+    return devices;
+  }
+
+  try {
+    for (const auto &entry : fs::directory_iterator(dev_path)) {
+      std::string filename = entry.path().filename().string();
+      if (filename.rfind(prefix, 0) == 0) {
+
+        std::string suffix = filename.substr(prefix.length());
+
+        // filter not digit suffix
+        if (!suffix.empty() &&
+            std::all_of(suffix.begin(), suffix.end(),
+                        [](unsigned char c) { return std::isdigit(c); })) {
+          try {
+            int id = std::stoi(suffix);
+            devices.push_back(id);
+          } catch (...) {
+          }
+        }
+      }
+    }
+  } catch (const fs::filesystem_error &e) {
+    spdlog::error("Error accessing /dev: {}", e.what());
+  }
+
+  std::sort(devices.begin(), devices.end());
+  return devices;
+}
+
+static inline std::vector<int> get_npu_ids() {
+  std::vector<int> available_devices = get_available_devices();
+  std::vector<int> npu_ids;
+  uint32_t device_count = 0;
+  aclError error_code = aclrtGetDeviceCount(&device_count);
+  if (error_code != 0) {
+    spdlog::error("Failed to get NPU device count, error code: {}", error_code);
+    throw std::runtime_error("Failed to get NPU device count");
+  }
+  if (device_count > available_devices.size()) {
+    spdlog::error("The number of available NPU devices ({}) is less than the "
+                  "number of devices reported by ACL ({}).",
+                  available_devices.size(), device_count);
+    throw std::runtime_error("Inconsistent NPU device count");
+  }
+
+  const char *env_available_npu = getenv("ASCEND_RT_VISIBLE_DEVICES");
+  if (env_available_npu) {
+    std::string npu_str(env_available_npu);
+    size_t start = 0;
+    while (start < npu_str.size()) {
+      size_t next = npu_str.find(',', start);
+      if (next == std::string::npos) {
+        next = npu_str.size();
+      }
+      int device_id = std::stoi(npu_str.substr(start, next - start));
+      npu_ids.push_back(available_devices[device_id]);
+      start = next + 1;
+      if (npu_ids.size() >= device_count) {
+        break;
+      }
+    }
+  } else {
+    npu_ids.insert(npu_ids.end(), available_devices.begin(),
+                   available_devices.begin() + device_count);
+  }
+  return npu_ids;
+}
diff --git a/csrc/vnpu_offload/shm_helper.h b/csrc/vnpu_offload/shm_helper.h
new file mode 100644
index 00000000..9585e43c
--- /dev/null
+++ b/csrc/vnpu_offload/shm_helper.h
@@ -0,0 +1,125 @@
+#pragma once
+
+#include <atomic>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <vector>
+#include <chrono>
+#include <string>
+
+#include "spdlog/spdlog.h"
+
+
+#define MAX_WORKERS 60
+#define MAX_DEVICES 16
+
+static inline std::string get_shm_name() {
+  const char *env_shm_name = getenv("VLLM_VNPU_SHM_NAME");
+  if (env_shm_name) {
+    if (env_shm_name[0] != '/') {
+      spdlog::error(
+          "The shm name specified by VLLM_VNPU_SHM_NAME should start "
+          "with '/'");
+      exit(-1);
+    }
+    return std::string(env_shm_name);
+  }
+  return std::string("/vllm_acl_vnpu_offload_shm");
+}
+
+static constexpr uint32_t heartbeat_us = 1000; // microseconds
+static constexpr uint32_t heartbeat_check_everyN = 50;
+static constexpr uint32_t heartbeat_timeout_us =
+    heartbeat_check_everyN * heartbeat_us;
+
+struct alignas(64) WorkerHeartBeat {
+  std::atomic<uint64_t> timestamp;
+  std::atomic<int32_t> tgid;
+  uint8_t _padding[64 - sizeof(std::atomic<uint64_t>) -
+                   sizeof(std::atomic<int32_t>)];
+};
+
+static inline uint64_t heartbeat_ts_us() {
+  return static_cast<uint64_t>(
+      std::chrono::duration_cast<std::chrono::microseconds>(
+          std::chrono::steady_clock::now().time_since_epoch())
+          .count());
+}
+
+static inline uint32_t unpack_lock_field(uint64_t gpu_flag) {
+  return static_cast<uint32_t>(gpu_flag >> 32);
+}
+
+static inline int32_t unpack_tgid_field(uint64_t gpu_flag) {
+  return static_cast<int32_t>(gpu_flag & 0xFFFFFFFF);
+}
+
+static inline uint64_t pack_locked_tgid(int32_t tgid) {
+  return (static_cast<uint64_t>(1) << 32) | static_cast<uint64_t>(tgid);
+}
+
+static inline uint64_t pack_unlocked_tgid(int32_t tgid) {
+  return static_cast<uint64_t>(tgid);
+}
+
+// mmap usually page-aligned
+struct alignas(64) ShmHelper {
+  struct VramInfo {
+    uint64_t total_vmem_size;
+    uint64_t shareable_handle;
+  };
+  VramInfo vram_info[MAX_DEVICES]; // support max 16 NPUs
+  // GPU lock flag
+  std::atomic<uint64_t> gpu_flag[MAX_DEVICES];
+  // uint8_t _padding1[64 - sizeof(std::atomic<uint64_t>)];
+
+  // request
+  enum RequestType: uint32_t {
+    REQUEST_TYPE_REGISTER_WORKER = 1,
+  };
+  /* ready:
+    * 0: worker store: no request & worker get response
+    * 1: worker store: worker preparing request
+    * 2: worker store: request ready for listener
+    * 3: listener store: listener processed request
+  */
+  enum ReadyState : uint64_t {
+    READY_STATE_NO_REQUEST = 0,
+    READY_STATE_PREPARING_REQUEST = 1,
+    READY_STATE_REQUEST_READY = 2,
+    READY_STATE_REQUEST_PROCESSED = 3
+  };
+  std::atomic<uint64_t> req_ready;
+  // currently only allow one parameter and one response
+  struct {
+    uint32_t type;
+    int32_t tgid;
+    uint64_t parameter;
+    uint64_t response;
+  } request;
+  uint8_t _padding2[64 - sizeof(std::atomic<uint64_t>) - sizeof(request)];
+
+  // heart beats
+  WorkerHeartBeat heart_beats[MAX_WORKERS];
+
+  void init() {
+    memset(vram_info, 0, sizeof(vram_info));
+    for (size_t i = 0; i < MAX_DEVICES; ++i) {
+      gpu_flag[i].store(0, std::memory_order_release);
+    }
+    req_ready.store(READY_STATE_NO_REQUEST, std::memory_order_release);
+  }
+
+  void set_gpu_info(int gpu_id, uint64_t vmem_size, uint64_t shared_handle) {
+    vram_info[gpu_id].total_vmem_size = vmem_size;
+    vram_info[gpu_id].shareable_handle = shared_handle;
+  }
+};
+
+static constexpr size_t SHM_SIZE = (sizeof(ShmHelper) + 4095) & ~4095;
diff --git a/csrc/vnpu_offload/shm_manager.cpp b/csrc/vnpu_offload/shm_manager.cpp
new file mode 100644
index 00000000..a591f280
--- /dev/null
+++ b/csrc/vnpu_offload/shm_manager.cpp
@@ -0,0 +1,188 @@
+#include "shm_manager.h"
+#include <algorithm>
+
+
+ShmManager::ShmManager() {
+  std::string shm_name = get_shm_name();
+  int shm_fd = shm_open(shm_name.c_str(), O_CREAT | O_RDWR, 0666);
+  if (shm_fd == -1) {
+    spdlog::error("Failed to create shared memory segment");
+    throw std::runtime_error("Failed to create shared memory segment");
+  }
+  if (ftruncate(shm_fd, SHM_SIZE) == -1) {
+    spdlog::error("Failed to set size of shared memory segment");
+    throw std::runtime_error("Failed to set size of shared memory segment");
+  }
+  void *ptr =
+      mmap(nullptr, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0);
+  if (ptr == MAP_FAILED) {
+    spdlog::error("Failed to map shared memory segment");
+    throw std::runtime_error("Failed to map shared memory segment");
+  }
+  spdlog::info("Shared memory segment created, size: {} bytes", SHM_SIZE);
+  close(shm_fd);
+  memset(ptr, 0, SHM_SIZE);
+  // initialize shm_helper
+  shm_helper = static_cast<ShmHelper *>(ptr);
+  shm_helper->init();
+
+  local_worker_tgids.resize(MAX_WORKERS, 0);
+  stop_loop_flag.store(false, std::memory_order_release);
+}
+
+ShmManager::~ShmManager() {
+  munmap(shm_helper, SHM_SIZE);
+  std::string shm_name = get_shm_name();
+  shm_unlink(shm_name.c_str());
+}
+
+void ShmManager::set_gpu_info(int gpu_id, uint64_t vmem_size,
+                              uint64_t shared_handle) {
+  shm_helper->set_gpu_info(gpu_id, vmem_size, shared_handle);
+  this->valid_gpu_ids.push_back(gpu_id);
+}
+
+void ShmManager::run_busy_loop() {
+  if (!cb_on_worker_change) {
+    spdlog::error("cb_on_worker_change is not set");
+    throw std::runtime_error("cb_on_worker_change is not set");
+  }
+
+  spdlog::info("ShmManager busy loop started");
+
+  int loop_cnt = 0;
+
+  while (!stop_loop_flag.load(std::memory_order_acquire)) {
+    process_requests();
+
+    if (loop_cnt % heartbeat_check_everyN== 0) {
+      check_heart_beats();
+    }
+    loop_cnt = (loop_cnt + 1) % heartbeat_check_everyN;
+
+    usleep(heartbeat_us);
+  }
+
+  spdlog::info("ShmManager busy loop stopped");
+}
+
+void ShmManager::process_requests() {
+  uint64_t req_status = shm_helper->req_ready.load(std::memory_order_acquire);
+  if (req_status == ShmHelper::READY_STATE_REQUEST_READY) {
+    uint32_t type = shm_helper->request.type;
+    int32_t tgid = shm_helper->request.tgid;
+    uint64_t parameter = shm_helper->request.parameter;
+    spdlog::info("Get request: type {}, TGID {}, parameter {}", type,
+                 tgid, parameter);
+    switch (type) {
+      case ShmHelper::REQUEST_TYPE_REGISTER_WORKER: {
+        int32_t tgid = (int32_t)parameter;
+        // get heart beat slot
+        int slot = -1;
+        for (int i = 0; i < MAX_WORKERS; ++i) {
+          int32_t slot_tgid =
+              shm_helper->heart_beats[i].tgid.load(std::memory_order_acquire);
+          if (slot_tgid == 0 && slot == -1) {
+            slot = i;
+            if (local_worker_tgids[i] != 0) {
+              spdlog::error(
+                  "Maybe bug: in register_worker: Worker slot {} TGID mismatch (local: {}, shm: "
+                  "{})",
+                  i, local_worker_tgids[i], 0);
+            }
+          }
+          // check repeat, maybe a worker reborn and register again
+          if (slot_tgid == tgid) {
+            // already registered
+            spdlog::warn("Worker TGID {} already registered in slot {}", tgid,
+                         i);
+            slot = i;
+            break;
+          }
+        }
+        if (slot == -1) {
+          spdlog::error("Reach max worker limit, no available heart beat slot");
+        } else {
+          uint64_t cur_ts = heartbeat_ts_us();
+          local_worker_tgids[slot] = tgid;
+          shm_helper->heart_beats[slot].tgid.store(tgid,
+                                                std::memory_order_release);
+          shm_helper->heart_beats[slot].timestamp.store(cur_ts,
+                                                     std::memory_order_release);
+          // register worker
+          alive_worker_tgids.push_back(tgid);
+          // set pid
+          cb_on_worker_change(alive_worker_tgids);
+        }
+        shm_helper->request.response = static_cast<uint64_t>(slot);
+        shm_helper->req_ready.store(ShmHelper::READY_STATE_REQUEST_PROCESSED,
+                                    std::memory_order_release);
+        break;
+      }
+      default: {
+        spdlog::error("Unknown request type {}", type);
+        shm_helper->request.response = 0;
+        shm_helper->req_ready.store(ShmHelper::READY_STATE_REQUEST_PROCESSED,
+                                    std::memory_order_release);
+        break;
+      }
+    }
+  }
+}
+
+void ShmManager::check_heart_beats() {
+  bool updated = false;
+  for (int i = 0; i < MAX_WORKERS; ++i) {
+    uint64_t ts =
+        shm_helper->heart_beats[i].timestamp.load(std::memory_order_acquire);
+    int32_t tgid =
+        shm_helper->heart_beats[i].tgid.load(std::memory_order_acquire);
+    if (tgid != local_worker_tgids[i]) {
+      spdlog::error(
+          "Maybe bug: Worker slot {} TGID mismatch (local: {}, shm: {})", i,
+          local_worker_tgids[i], tgid);
+      local_worker_tgids[i] = tgid;
+    }
+    if (tgid != 0) {
+      uint64_t now = heartbeat_ts_us();
+      if (now - ts > heartbeat_timeout_us) {
+        // worker died
+        updated = true;
+        spdlog::info("Detected dead worker TGID {}", tgid);
+        shm_helper->heart_beats[i].tgid = 0;
+        shm_helper->heart_beats[i].timestamp.store(0,
+                                                   std::memory_order_release);
+        // check dead lock
+        for (int gpu_id : valid_gpu_ids) {
+          uint64_t gpu_flag =
+              shm_helper->gpu_flag[gpu_id].load(std::memory_order_acquire);
+          if (unpack_lock_field(gpu_flag) == 1 &&
+              unpack_tgid_field(gpu_flag) == tgid) {
+            // release lock held by dead worker
+            spdlog::warn("Releasing GPU {} lock held by dead worker TGID {}",
+                         gpu_id, tgid);
+            shm_helper->gpu_flag[gpu_id].store(pack_unlocked_tgid(tgid),
+                                               std::memory_order_release);
+          }
+        }
+        // check request lock
+        if (shm_helper->req_ready.load(std::memory_order_acquire) !=
+                ShmHelper::READY_STATE_NO_REQUEST &&
+            shm_helper->request.tgid == tgid) {
+          spdlog::warn("Releasing request lock held by dead worker TGID {}",
+                       tgid);
+          shm_helper->req_ready.store(ShmHelper::READY_STATE_NO_REQUEST,
+                                      std::memory_order_release);
+        }
+        local_worker_tgids[i] = 0;
+        alive_worker_tgids.erase(std::remove(alive_worker_tgids.begin(),
+                                             alive_worker_tgids.end(), tgid),
+                                 alive_worker_tgids.end());
+        spdlog::info("Current alive workers: {}", alive_worker_tgids.size());
+      }
+    }
+  }
+  if (updated) {
+    cb_on_worker_change(alive_worker_tgids);
+  }
+}
diff --git a/csrc/vnpu_offload/shm_manager.h b/csrc/vnpu_offload/shm_manager.h
new file mode 100644
index 00000000..92932803
--- /dev/null
+++ b/csrc/vnpu_offload/shm_manager.h
@@ -0,0 +1,35 @@
+#pragma once
+
+#include "shm_helper.h"
+
+class ShmManager {
+ public:
+  ShmManager();
+  ~ShmManager();
+
+  void set_gpu_info(int gpu_id, uint64_t vmem_size, uint64_t shared_handle);
+
+  // request
+  void process_requests();
+  // heart beat
+  void check_heart_beats();
+
+  void run_busy_loop();
+
+  void stop_busy_loop() {
+    stop_loop_flag.store(true, std::memory_order_release);
+  }
+
+  void register_callback_on_worker_change(
+      std::function<void(const std::vector<int32_t> &)> cb) {
+    cb_on_worker_change = std::move(cb);
+  }
+
+ private:
+  ShmHelper *shm_helper;
+  std::vector<int32_t> local_worker_tgids;
+  std::vector<int32_t> alive_worker_tgids;
+  std::vector<int> valid_gpu_ids;
+  std::atomic<bool> stop_loop_flag;
+  std::function<void(const std::vector<int32_t> &)> cb_on_worker_change;
+};
diff --git a/csrc/vnpu_offload/shm_worker.cpp b/csrc/vnpu_offload/shm_worker.cpp
new file mode 100644
index 00000000..3a73a782
--- /dev/null
+++ b/csrc/vnpu_offload/shm_worker.cpp
@@ -0,0 +1,177 @@
+#include "shm_worker.h"
+
+
+ShmWorker::ShmWorker() {
+  std::string shm_name = get_shm_name();
+  int shm_fd = shm_open(shm_name.c_str(), O_RDWR, 0666);
+  if (shm_fd == -1) {
+    spdlog::error("Failed to open shared memory segment. Maybe the daemon is "
+                  "not started.");
+    throw std::runtime_error("Failed to open shared memory segment");
+  }
+  void *ptr =
+      mmap(nullptr, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0);
+  if (ptr == MAP_FAILED) {
+    spdlog::error("Failed to map shared memory segment");
+    throw std::runtime_error("Failed to map shared memory segment");
+  }
+  close(shm_fd);
+
+  shm_helper = static_cast<ShmHelper*>(ptr);
+}
+
+ShmWorker::~ShmWorker() {
+  stop_heart_beat.store(true, std::memory_order_release);
+  heart_beat_thread.join();
+
+  munmap(shm_helper, SHM_SIZE);
+}
+
+bool ShmWorker::register_worker(int32_t tgid, int gpu_id,
+                                uint64_t *out_shareable_handle,
+                                uint64_t *out_vmem_size) {
+  if (gpu_id < 0 || gpu_id >= MAX_DEVICES) {
+    spdlog::error("Invalid GPU ID {}", gpu_id);
+    throw std::runtime_error("Invalid GPU ID");
+  }
+  this->tgid = tgid;
+  this->gpu_id = gpu_id;
+  int slot = register_worker_shm();
+  if (slot == -1) {
+    return false;
+  }
+
+  *out_shareable_handle = shm_helper->vram_info[gpu_id].shareable_handle;
+  *out_vmem_size = shm_helper->vram_info[gpu_id].total_vmem_size;
+
+  stop_heart_beat.store(false, std::memory_order_release);
+  heart_beat_thread = std::thread(&ShmWorker::heart_beat_loop, this, slot);
+  return true;
+}
+
+void ShmWorker::heart_beat_loop(int slot) {
+  while (!stop_heart_beat.load(std::memory_order_acquire)) {
+    // update heart beat
+    int32_t shm_tgid =
+        shm_helper->heart_beats[slot].tgid.load(std::memory_order_acquire);
+    if (shm_tgid != tgid) {
+      spdlog::error(
+          "Maybe bug: Heart beat slot {} TGID mismatch (local: {}, shm: {})",
+          slot, tgid, shm_tgid);
+      // re-register
+      slot = register_worker_shm();
+      if (slot == -1) {
+        spdlog::error("TGID {} failed to re-register as worker", tgid);
+        throw std::runtime_error("Failed to re-register as worker");
+      }
+    }
+    uint64_t now = heartbeat_ts_us();
+    shm_helper->heart_beats[slot].timestamp.store(now,
+                                                  std::memory_order_release);
+    usleep(heartbeat_us);
+  }
+}
+
+bool ShmWorker::try_lock_gpu(bool &out_self_hold) {
+  static int retry_cnt = 0;
+
+  uint64_t old_flag =
+      shm_helper->gpu_flag[gpu_id].load(std::memory_order_acquire);
+  if (unpack_lock_field(old_flag) == 0) { // free
+    uint64_t new_flag = pack_locked_tgid(tgid);
+    if (shm_helper->gpu_flag[gpu_id].compare_exchange_weak(
+            old_flag, new_flag, std::memory_order_acq_rel,
+            std::memory_order_acquire)) {
+      spdlog::info("TGID {} acquired GPU {} lock", tgid, gpu_id);
+      int32_t prev_tgid = unpack_tgid_field(old_flag);
+      out_self_hold = prev_tgid == tgid;
+      retry_cnt = 0;
+      return true;
+    }
+  } else { // locked
+    if (unpack_tgid_field(old_flag) == tgid) {
+      spdlog::info("TGID {} already holds the GPU {} lock", tgid, gpu_id);
+      out_self_hold = true;
+      retry_cnt = 0;
+      return true;
+    }
+  }
+  // failed
+  if (++retry_cnt % 2000 == 0) {
+    spdlog::info(
+        "TGID {} trying to acquire GPU {} lock, current lock holder TGID {}",
+        tgid, gpu_id, unpack_tgid_field(old_flag));
+  }
+  out_self_hold = false;
+  return false;
+}
+
+bool ShmWorker::lock_gpu(bool &out_self_hold) {
+  while (true) {
+    if (try_lock_gpu(out_self_hold)) {
+      return true;
+    }
+    // failed
+    usleep(1000);
+  }
+}
+
+void ShmWorker::unlock_gpu() {
+  uint64_t old_flag =
+      shm_helper->gpu_flag[gpu_id].load(std::memory_order_acquire);
+  if (unpack_tgid_field(old_flag) != tgid) {
+    // spdlog::warn("previous gpu flag {} does not match expected locked flag for "
+    //              "TGID {}. This may be a bug, unless during startup.",
+    //              old_flag, tgid);
+    spdlog::info("TGID {} does not hold GPU {} lock", tgid, gpu_id);
+  } else {
+    uint64_t new_flag = pack_unlocked_tgid(tgid);
+    shm_helper->gpu_flag[gpu_id].store(new_flag, std::memory_order_release);
+    spdlog::info("TGID {} released GPU {} lock", tgid, gpu_id);
+  }
+}
+
+uint64_t ShmWorker::make_request(uint32_t type, uint64_t parameter) {
+  while (true) {
+    uint64_t expected = ShmHelper::READY_STATE_NO_REQUEST;
+    if (shm_helper->req_ready.load(std::memory_order_acquire) ==
+        ShmHelper::READY_STATE_NO_REQUEST) {
+      // set ready to 1
+      if (shm_helper->req_ready.compare_exchange_weak(
+              expected, ShmHelper::READY_STATE_PREPARING_REQUEST,
+              std::memory_order_acq_rel, std::memory_order_acquire)) {
+        break;
+      }
+    }
+    usleep(1000);
+  }
+  // prepare request
+  shm_helper->request.type = type;
+  shm_helper->request.tgid = tgid;
+  shm_helper->request.parameter = parameter;
+  // set ready
+  shm_helper->req_ready.store(ShmHelper::READY_STATE_REQUEST_READY,
+                          std::memory_order_release);
+  // wait until processed
+  while (shm_helper->req_ready.load(std::memory_order_acquire) !=
+         ShmHelper::READY_STATE_REQUEST_PROCESSED) {
+    usleep(1000);
+  }
+  // get response
+  uint64_t response = shm_helper->request.response;
+  // set ready to 0
+  shm_helper->req_ready.store(ShmHelper::READY_STATE_NO_REQUEST,
+                              std::memory_order_release);
+
+  return response;
+}
+
+int ShmWorker::register_worker_shm() {
+  uint64_t slot = make_request(ShmHelper::REQUEST_TYPE_REGISTER_WORKER, tgid);
+  spdlog::info("TGID {} registered as worker in slot {}", tgid, slot);
+  if (slot == static_cast<uint64_t>(-1) || slot >= MAX_WORKERS) {
+    spdlog::error("TGID {} failed to register as worker", tgid);
+    throw std::runtime_error("Failed to register as worker");
+  }
+  return static_cast<int>(slot);
+}
diff --git a/csrc/vnpu_offload/shm_worker.h b/csrc/vnpu_offload/shm_worker.h
new file mode 100644
index 00000000..cbd4e4c8
--- /dev/null
+++ b/csrc/vnpu_offload/shm_worker.h
@@ -0,0 +1,35 @@
+#pragma once
+
+#include <vector>
+#include <atomic>
+#include <thread>
+
+#include "shm_helper.h"
+
+
+class ShmWorker {
+ public:
+  ShmWorker();
+  ~ShmWorker();
+
+  bool register_worker(int32_t tgid, int gpu_id, uint64_t *out_shareable_handle,
+                       uint64_t *out_vmem_size);
+
+  bool try_lock_gpu(bool &out_self_hold);
+  bool lock_gpu(bool &out_self_hold);
+  void unlock_gpu();
+
+ private:
+  int32_t tgid;
+  int gpu_id;
+  ShmHelper *shm_helper;
+  std::thread heart_beat_thread;
+  std::atomic<bool> stop_heart_beat;
+
+  // request
+  uint64_t make_request(uint32_t type, uint64_t parameter);
+  int register_worker_shm();
+
+  // heart beat
+  void heart_beat_loop(int slot);
+};
diff --git a/csrc/vnpu_offload/vnpu_daemon.cpp b/csrc/vnpu_offload/vnpu_daemon.cpp
new file mode 100644
index 00000000..5a29d4a6
--- /dev/null
+++ b/csrc/vnpu_offload/vnpu_daemon.cpp
@@ -0,0 +1,229 @@
+#include <iostream>
+#include <sys/types.h>
+
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+#include <vector>
+#include <atomic>
+#include <mutex>
+#include <signal.h>
+
+#include "acl/acl.h"
+
+#include "shm_manager.h"
+#include "npu_helper.h"
+#include "spdlog/spdlog.h"
+
+
+static ShmManager *shm_manager = nullptr;
+
+void handle_signal(int sig) {
+  if (shm_manager) {
+    shm_manager->stop_busy_loop();
+  }
+}
+
+void install_signal_handlers() {
+  struct sigaction sa{};
+  sa.sa_handler = handle_signal;
+  sigemptyset(&sa.sa_mask);
+  sa.sa_flags = 0;
+
+  sigaction(SIGINT, &sa, nullptr);
+  sigaction(SIGTERM, &sa, nullptr);
+  sigaction(SIGHUP, &sa, nullptr);
+}
+
+size_t get_reserved_vram_size() {
+  static std::once_flag flag;
+  static size_t reserved_vram_size = 8ul * 1024 * 1024 * 1024; // default 8GB
+
+  std::call_once(flag, []() {
+    const char *env_p = std::getenv("VNPU_RESERVED_VRAM_SIZE_GB");
+    if (env_p) {
+      try {
+        size_t size_gb = std::stoul(env_p);
+        reserved_vram_size = size_gb * 1024 * 1024 * 1024;
+      } catch (const std::exception &e) {
+        spdlog::warn("Failed to parse VNPU_RESERVED_VRAM_SIZE_GB: {}, using "
+                     "default 8GB",
+                     e.what());
+      }
+    }
+  });
+  return reserved_vram_size;
+}
+
+void ensure_context(unsigned long long device) {
+  aclrtContext pctx;
+  aclrtGetCurrentContext(&pctx);
+  if (!pctx) {
+    // Ensure device context.
+    aclrtCreateContext(&pctx, device);
+    aclrtSetCurrentContext(pctx);
+  }
+}
+
+void init_acl() {
+  int32_t deviceId=0;
+
+  aclError ret = aclrtSetDevice(deviceId);
+  if (ret != ACL_ERROR_NONE) {
+    throw std::runtime_error("aclrtSetDevice failed with acl error code: " +
+                            std::to_string(ret) + " " + __FILE__ + ":" + std::to_string(__LINE__));
+  }
+}
+
+void reset_pids(const std::vector<int32_t> &pids,
+                const std::vector<uint64_t> &shareable_handles) {
+  int cnt = pids.size();
+  if (cnt <= 0) {
+    return;
+  }
+
+  int32_t pids_data[cnt];
+  memcpy(pids_data, pids.data(), cnt * sizeof(int32_t));
+
+  for (int i = 0; i < shareable_handles.size(); ++i) {
+    uint64_t shareable_handle = shareable_handles[i];
+    aclError error_code =
+        aclrtMemSetPidToShareableHandle(shareable_handle, pids_data, cnt);
+    if (error_code != 0) {
+      spdlog::error("aclrtMemSetPidToShareableHandle failed, error_code: {}",
+                    error_code);
+      throw std::runtime_error("aclrtMemSetPidToShareableHandle failed");
+    }
+  }
+  spdlog::info("aclrtMemSetPidToShareableHandle succeeded, num_pids: {}", cnt);
+}
+
+void alloc_physical(uint32_t device_id, aclrtDrvMemHandle &out_mem_handle,
+                    size_t &out_g_size) {
+  aclError error_code;
+  size_t free_mem = 0, total = 0;
+  error_code = aclrtGetMemInfo(ACL_HBM_MEM, &free_mem, &total);
+  if (error_code != 0) {
+    spdlog::error("aclrtGetMemInfo failed, error_code: {}", error_code);
+    throw std::runtime_error("aclrtGetMemInfo failed");
+  } else {
+    spdlog::info("aclrtGetMemInfo succeeded, free_mem: {}, total: {}", free_mem,
+                 total);
+  }
+
+  aclrtPhysicalMemProp prop = {};
+  prop.handleType = ACL_MEM_HANDLE_TYPE_NONE;
+  prop.allocationType = ACL_MEM_ALLOCATION_TYPE_PINNED;
+  prop.memAttr = ACL_HBM_MEM_HUGE;
+  prop.location.id = device_id;
+  prop.location.type = ACL_MEM_LOCATION_TYPE_DEVICE;
+  prop.reserve = 0;
+
+  size_t granularity;
+  error_code = aclrtMemGetAllocationGranularity(
+      &prop, ACL_RT_MEM_ALLOC_GRANULARITY_MINIMUM, &granularity);
+  if (error_code != 0) {
+    spdlog::error("aclrtMemGetAllocationGranularity failed, error_code: {}",
+                  error_code);
+    throw std::runtime_error("aclrtMemGetAllocationGranularity failed");
+  } else {
+    spdlog::info("aclrtMemGetAllocationGranularity succeeded, granularity: {}",
+                 granularity);
+  }
+  size_t reserved_mem_size = get_reserved_vram_size();
+  if (free_mem < reserved_mem_size) {
+    spdlog::error("Not enough free memory to reserve: {}, free_mem: {}",
+                  reserved_mem_size, free_mem);
+    throw std::runtime_error("Not enough free memory to reserve");
+  }
+  out_g_size = free_mem - reserved_mem_size;
+  out_g_size = (out_g_size / granularity) * granularity;
+
+  // allocate physical memory
+  error_code = aclrtMallocPhysical(&out_mem_handle, out_g_size, &prop, 0);
+  if (error_code != 0) {
+    spdlog::error("aclrtMallocPhysical failed, error_code: {}", error_code);
+    throw std::runtime_error("aclrtMallocPhysical failed");
+  } else {
+    spdlog::info("device {} aclrtMallocPhysical succeeded, size: {}", device_id,
+                 out_g_size);
+  }
+}
+
+void start_daemon() {
+  init_acl();
+
+  std::vector<int> npu_ids = get_npu_ids();
+  std::vector<aclrtDrvMemHandle> mem_handles;
+  std::vector<uint64_t> shareable_handles;
+  // shm
+  shm_manager = new ShmManager();
+
+  for (int i = 0; i < npu_ids.size(); ++i) {
+    uint32_t device_id = i;
+    int npu_id = npu_ids[i];
+    spdlog::info("Setting up device id {} - npu id {}", device_id, npu_id);
+    aclError error_code = aclrtSetDevice(device_id);
+    if (error_code != ACL_ERROR_NONE) {
+      throw std::runtime_error("aclrtSetDevice failed with acl error code: " +
+                               std::to_string(error_code) + " " + __FILE__ +
+                               ":" + std::to_string(__LINE__));
+    }
+
+    // alloc physical
+    aclrtDrvMemHandle mem_handle;
+    size_t g_size;
+    alloc_physical(device_id, mem_handle, g_size);
+    mem_handles.push_back(mem_handle);
+
+    // export
+    uint64_t shareable_handle;
+    error_code = aclrtMemExportToShareableHandle(
+        mem_handle, ACL_MEM_HANDLE_TYPE_NONE, ACL_RT_VMM_EXPORT_FLAG_DEFAULT,
+        &shareable_handle);
+    if (error_code != 0) {
+      spdlog::error("aclrtMemExportToShareableHandle failed, error_code: {}",
+                    error_code);
+      throw std::runtime_error("aclrtMemExportToShareableHandle failed");
+    } else {
+      spdlog::info(
+          "aclrtMemExportToShareableHandle succeeded, shareable_handle: {}",
+          shareable_handle);
+    }
+
+    shm_manager->set_gpu_info(npu_id, g_size, shareable_handle);
+    shareable_handles.push_back(shareable_handle);
+  }
+
+  shm_manager->register_callback_on_worker_change(
+      [&](const std::vector<int32_t> &pids) {
+        reset_pids(pids, shareable_handles);
+      });
+
+  // start busy loop
+  shm_manager->run_busy_loop();
+
+  // stopped by signal
+  delete shm_manager;
+  shm_manager = nullptr;
+
+  // free physical memory
+  for (auto mem_handle : mem_handles) {
+    aclError error_code = aclrtFreePhysical(mem_handle);
+    if (error_code != 0) {
+      spdlog::error("aclrtFreePhysical failed, error_code: {}", error_code);
+      throw std::runtime_error("aclrtFreePhysical failed");
+    }
+  }
+}
+
+
+int main() {
+  install_signal_handlers();
+
+  start_daemon();
+
+  return 0;
+}
diff --git a/vllm_ascend/device_allocator/camem.py b/vllm_ascend/device_allocator/camem.py
index 4971f137..e3cc6fd6 100644
--- a/vllm_ascend/device_allocator/camem.py
+++ b/vllm_ascend/device_allocator/camem.py
@@ -21,10 +21,12 @@ import os
 from collections.abc import Callable
 from contextlib import contextmanager
 from typing import Any
+import time
 
 import torch
 from acl.rt import memcpy  # type: ignore # noqa: F401
 from vllm.logger import logger
+import vllm_ascend.envs as envs_ascend
 
 
 def find_loaded_library(lib_name) -> str | None:
@@ -54,11 +56,23 @@ def find_loaded_library(lib_name) -> str | None:
 
 camem_available = False
 try:
-    from vllm_ascend.vllm_ascend_C import (  # type: ignore # noqa: F401
-        init_module,
-        python_create_and_map,
-        python_unmap_and_release,
-    )
+    if envs_ascend.VLLM_ASCEND_ENABLE_VNPU:
+        from vllm_ascend.vllm_ascend_C import (  # type: ignore # noqa: F401
+            init_module_offload as init_module,
+            python_create_and_map_offload as python_create_and_map,python_unmap_and_release_offload as python_unmap_and_release,
+            python_get_mem_info_offload as python_get_mem_info,
+            python_try_lock_gpu_offload as python_try_lock_gpu,
+            python_unlock_gpu_offload as python_unlock_gpu
+        )
+    else:
+        from vllm_ascend.vllm_ascend_C import (  # type: ignore # noqa: F401
+            init_module,
+            python_create_and_map,
+            python_unmap_and_release,
+        )
+        python_get_mem_info = None
+        python_try_lock_gpu = None
+        python_unlock_gpu = None
 
     lib_name = find_loaded_library("vllm_ascend_C")
     camem_available = True
@@ -67,6 +81,9 @@ except ImportError as e:
     init_module = None
     python_create_and_map = None
     python_unmap_and_release = None
+    python_get_mem_info = None
+    python_try_lock_gpu = None
+    python_unlock_gpu = None
     lib_name = None
     libcudart = None
 
@@ -93,8 +110,17 @@ def get_pluggable_allocator(
     python_malloc_fn: Callable[[tuple[int, int, int, int]], None],
     python_free_func: Callable[[int], tuple[int, int, int, int]],
 ) -> torch.npu.memory.NPUPluggableAllocator:
-    init_module(python_malloc_fn, python_free_func)
-    new_alloc = torch.npu.memory.NPUPluggableAllocator(lib_name, "my_malloc", "my_free")
+    if envs_ascend.VLLM_ASCEND_ENABLE_VNPU:
+        current_device = torch.npu.current_device()
+        init_module(python_malloc_fn, python_free_func, current_device)
+        new_alloc = torch.npu.memory.NPUPluggableAllocator(
+            lib_name, 'my_malloc_offload', 'my_free_offload'
+        )
+    else:
+        init_module(python_malloc_fn, python_free_func)
+        new_alloc = torch.npu.memory.NPUPluggableAllocator(
+            lib_name, 'my_malloc', 'my_free'
+        )
     return new_alloc
 
 
@@ -245,6 +271,9 @@ class CaMemAllocator:
             # to avoid the issue, we keep a reference of the data.
             # see https://github.com/pytorch/pytorch/issues/146431 .
             self.allocator_and_pools[tag] = data
+            # lock gpu
+            if envs_ascend.VLLM_ASCEND_ENABLE_VNPU:
+                self._vnpu_lock_gpu()
             yield
             # PyTorch's bug, calling torch.cuda.empty_cache() will error
             # when using pluggable allocator, see
@@ -256,6 +285,8 @@ class CaMemAllocator:
             # allocate memory.
             # TODO: we need to find a way to release the memory,
             # i.e. calling torch.cuda.empty_cache()
+            if envs_ascend.VLLM_ASCEND_ENABLE_VNPU:
+                self.vnpu_unlock_gpu()
             self.current_tag = old_tag
 
     def get_current_usage(self) -> int:
@@ -267,3 +298,104 @@ class CaMemAllocator:
             handle = data.handle
             sum_bytes += handle[1]
         return sum_bytes
+
+    def vnpu_try_lock_gpu(self) -> tuple[bool, bool]:
+        if python_try_lock_gpu:
+            return python_try_lock_gpu()
+        else:
+            return False, False
+
+    def _vnpu_lock_gpu(self) -> bool:
+        while True:
+            success, _ = self.vnpu_try_lock_gpu()
+            if success:
+                return True
+            time.sleep(0.001)
+
+    def vnpu_unlock_gpu(self):
+        if python_unlock_gpu:
+            python_unlock_gpu()
+
+    def get_pool_mem_info(self) -> tuple[int, int]:
+        """
+        get available memory in reserved pool."""
+        return python_get_mem_info()
+
+    def offload_vram(
+            self,
+            offload_tags: tuple[str, ...] | str | None = None) -> None:
+        """
+        Put the allocator in sleep mode.
+        All data in the memory allocation with the specified tag will be 
+        offloaded to CPU memory, and others will be discarded.
+        :param offload_tags: The tags of the memory allocation that will be
+            offloaded. The rest of the memory allocation will be discarded.
+        """
+        if offload_tags is None:
+            # by default, allocated tensors are offloaded
+            # when the allocator sleeps
+            offload_tags = (CaMemAllocator.default_tag, )
+        elif isinstance(offload_tags, str):
+            offload_tags = (offload_tags, )
+
+        assert isinstance(offload_tags, tuple)
+
+        sz_weights = 0
+        sz_kvcache = 0
+
+        for ptr, data in self.pointer_to_data.items():
+            handle = data.handle
+            if data.tag in offload_tags:
+                size_in_bytes = handle[1]
+                if data.cpu_backup_tensor is None:
+                    cpu_backup_tensor = torch.empty(
+                        size_in_bytes,
+                        dtype=torch.uint8,
+                        device='cpu',
+                        pin_memory=True)
+                    cpu_ptr = cpu_backup_tensor.data_ptr()
+                    ACL_MEMCPY_DEVICE_TO_HOST = 2
+                    dest_max = cpu_ptr + size_in_bytes * 2
+                    memcpy(cpu_ptr, dest_max, ptr, size_in_bytes,
+                        ACL_MEMCPY_DEVICE_TO_HOST)
+                    data.cpu_backup_tensor = cpu_backup_tensor
+                unmap_and_release(handle)
+                sz_weights += size_in_bytes
+            else:
+                size_in_bytes = handle[1]
+                unmap_and_release(handle)
+                sz_kvcache += size_in_bytes
+        # self.requested_vram_size = sz_weights + sz_kvcache
+
+        self.vnpu_unlock_gpu()
+        # logger.info(f"offload: tags {offload_tags}: {sz_weights/(1024**3):.2f} GB, discard kv cache: {sz_kvcache/(1024**3):.2f} GB")
+
+    def try_reload_vram(self, tags: list[str] | None = None) -> tuple[bool, bool]:
+        succ, prev_is_self = self.vnpu_try_lock_gpu()
+        if not succ:
+            # not get the lock
+            return False, prev_is_self
+
+        if prev_is_self:
+            # nothing to do
+            return succ, prev_is_self
+
+        for ptr, data in self.pointer_to_data.items():
+            handle = data.handle
+            if tags is None or data.tag in tags:
+                create_and_map(handle)
+                if data.cpu_backup_tensor is not None:
+                    cpu_backup_tensor = data.cpu_backup_tensor
+                    size_in_bytes = cpu_backup_tensor.numel(
+                    ) * cpu_backup_tensor.element_size()
+                    cpu_ptr = cpu_backup_tensor.data_ptr()
+                    ACL_MEMCPY_HOST_TO_DEVICE = 1
+                    dest_max = ptr + size_in_bytes * 2
+                    memcpy(ptr, dest_max, cpu_ptr, size_in_bytes,
+                        ACL_MEMCPY_HOST_TO_DEVICE)
+                    # data.cpu_backup_tensor = None
+                # TO check: no need to re-memset if we reset_prefix_cache
+                # else:
+                #     size_in_bytes = handle[1]
+                #     memset(ptr, size_in_bytes, 0, size_in_bytes)
+        return succ, prev_is_self
\ No newline at end of file
diff --git a/vllm_ascend/envs.py b/vllm_ascend/envs.py
index b161220e..e20e771a 100644
--- a/vllm_ascend/envs.py
+++ b/vllm_ascend/envs.py
@@ -107,6 +107,7 @@ env_variables: dict[str, Callable[[], Any]] = {
     "VLLM_ASCEND_FUSION_OP_TRANSPOSE_KV_CACHE_BY_BLOCK": lambda: bool(
         int(os.getenv("VLLM_ASCEND_FUSION_OP_TRANSPOSE_KV_CACHE_BY_BLOCK", "1"))
     ),
+    "VLLM_ASCEND_ENABLE_VNPU": lambda: bool(int(os.getenv("VLLM_ASCEND_ENABLE_VNPU", 1))),
 }
 
 # end-env-vars-definition
diff --git a/vllm_ascend/patch/platform/__init__.py b/vllm_ascend/patch/platform/__init__.py
index 22cc7a00..fdd0a834 100644
--- a/vllm_ascend/patch/platform/__init__.py
+++ b/vllm_ascend/patch/platform/__init__.py
@@ -37,3 +37,6 @@ if os.getenv("DYNAMIC_EPLB", "false").lower() in ("true", "1") or os.getenv("EXP
 
 if envs.VLLM_ASCEND_BALANCE_SCHEDULING:
     import vllm_ascend.patch.platform.patch_balance_schedule  # noqa
+
+import vllm_ascend.patch.platform.patch_executor  # noqa
+import vllm_ascend.patch.platform.patch_core  # noqa
diff --git a/vllm_ascend/patch/platform/patch_core.py b/vllm_ascend/patch/platform/patch_core.py
new file mode 100644
index 00000000..0ca9fe24
--- /dev/null
+++ b/vllm_ascend/patch/platform/patch_core.py
@@ -0,0 +1,151 @@
+from logging import DEBUG
+import os
+import queue
+import time
+
+import vllm.envs as envs
+from vllm.config import ParallelConfig, VllmConfig
+from vllm.logger import logger
+from vllm.v1.kv_cache_interface import KVCacheConfig
+from vllm.v1.core.kv_cache_utils import (generate_scheduler_kv_cache_config,
+                                         get_kv_cache_configs)
+from vllm.v1.engine.core import EngineCoreProc, EngineCore
+from vllm.tracing import instrument
+
+import vllm_ascend.envs as envs_ascend
+
+
+def run_busy_loop(self):
+    """Core busy loop of the EngineCore."""
+    while self._handle_shutdown():
+        # 1) Poll the input queue until there is work to do.
+        self._process_input_queue()
+        if (
+            envs_ascend.VLLM_ASCEND_ENABLE_VNPU
+            and self.has_work()
+            and self.model_executor.is_offloaded()
+        ):
+            prev_is_self = self.model_executor.reload_vram()
+            if not prev_is_self:
+                self.reset_prefix_cache()
+        # 2) Step the engine core and return the outputs.
+        self._process_engine_step()
+        if (
+            envs_ascend.VLLM_ASCEND_ENABLE_VNPU
+            and not self.has_work()
+            and not self.model_executor.is_offloaded()
+        ):
+            self.model_executor.offload_vram()
+
+    raise SystemExit
+
+def _process_input_queue(self):
+    """Exits when an engine step needs to be performed."""
+
+    waited = False
+    while not self.has_work() and self.is_running():
+        # Notify callbacks waiting for engine to become idle.
+        self._notify_idle_state_callbacks()
+        if self.input_queue.empty():
+            # Drain aborts queue; all aborts are also processed via input_queue.
+            with self.aborts_queue.mutex:
+                self.aborts_queue.queue.clear()
+            if logger.isEnabledFor(DEBUG):
+                logger.debug("EngineCore waiting for work.")
+                waited = True
+            # vnpu offload if idle
+            if (
+                envs_ascend.VLLM_ASCEND_ENABLE_VNPU
+                and not self.model_executor.is_offloaded()
+            ):
+                self.model_executor.offload_vram()
+        block = self.process_input_queue_block
+        try:
+            req = self.input_queue.get(block=block)
+            self._handle_client_request(*req)
+        except queue.Empty:
+            break
+        if not block:
+            break
+
+    if waited:
+        logger.debug("EngineCore loop active.")
+
+    # Handle any more client requests.
+    while not self.input_queue.empty():
+        req = self.input_queue.get_nowait()
+        self._handle_client_request(*req)
+
+
+@instrument(span_name="Prepare model")
+def _initialize_kv_caches(self, vllm_config: VllmConfig) -> KVCacheConfig:
+    start = time.time()
+
+    # Get all kv cache needed by the model
+    kv_cache_specs = self.model_executor.get_kv_cache_specs()
+
+    has_kv_cache = any(kv_cache_spec for kv_cache_spec in kv_cache_specs)
+    if has_kv_cache:
+        if envs_ascend.VLLM_ASCEND_ENABLE_VNPU:
+            # get available memory in idle offload mode
+            available_gpu_memory = (
+                self.model_executor.determine_available_memory_vnpu_offload_mode())
+            self.available_gpu_memory_for_kv_cache = \
+                available_gpu_memory[0]
+        elif envs.VLLM_ELASTIC_EP_SCALE_UP_LAUNCH:
+            # NOTE(yongji): should already be set
+            # during _eep_scale_up_before_kv_init
+            assert self.available_gpu_memory_for_kv_cache > 0
+            available_gpu_memory = [self.available_gpu_memory_for_kv_cache] * len(
+                kv_cache_specs
+            )
+        else:
+            # Profiles the peak memory usage of the model to determine how
+            # much memory can be allocated for kv cache.
+            available_gpu_memory = self.model_executor.determine_available_memory()
+            self.available_gpu_memory_for_kv_cache = available_gpu_memory[0]
+    else:
+        # Attention free models don't need memory for kv cache
+        available_gpu_memory = [0] * len(kv_cache_specs)
+
+    assert len(kv_cache_specs) == len(available_gpu_memory)
+
+    # Track max_model_len before KV cache config to detect auto-fit changes
+    max_model_len_before = vllm_config.model_config.max_model_len
+
+    kv_cache_configs = get_kv_cache_configs(
+        vllm_config, kv_cache_specs, available_gpu_memory
+    )
+
+    # If auto-fit reduced max_model_len, sync the new value to workers.
+    # This is needed because workers were spawned before memory profiling
+    # and have the original (larger) max_model_len cached.
+    max_model_len_after = vllm_config.model_config.max_model_len
+    if max_model_len_after != max_model_len_before:
+        self.collective_rpc("update_max_model_len", args=(max_model_len_after,))
+
+    scheduler_kv_cache_config = generate_scheduler_kv_cache_config(kv_cache_configs)
+    vllm_config.cache_config.num_gpu_blocks = scheduler_kv_cache_config.num_blocks
+    kv_cache_groups = scheduler_kv_cache_config.kv_cache_groups
+    if kv_cache_groups:
+        vllm_config.cache_config.block_size = min(
+            g.kv_cache_spec.block_size for g in kv_cache_groups
+        )
+
+    vllm_config.validate_block_size()
+
+    # Initialize kv cache and warmup the execution
+    self.model_executor.initialize_from_config(kv_cache_configs)
+
+    elapsed = time.time() - start
+    logger.info_once(
+        "init engine (profile, create kv cache, warmup model) took %.2f seconds",
+        elapsed,
+        scope="local",
+    )
+    return scheduler_kv_cache_config
+
+
+EngineCoreProc.run_busy_loop = run_busy_loop
+EngineCoreProc._process_input_queue = _process_input_queue
+EngineCore._initialize_kv_caches = _initialize_kv_caches
diff --git a/vllm_ascend/patch/platform/patch_executor.py b/vllm_ascend/patch/platform/patch_executor.py
new file mode 100644
index 00000000..3ae99618
--- /dev/null
+++ b/vllm_ascend/patch/platform/patch_executor.py
@@ -0,0 +1,52 @@
+import time
+
+from vllm.v1.executor.abstract import logger, Executor
+
+
+def is_offloaded(self) -> bool:
+    if not hasattr(self, "_is_offloaded"):
+        self._is_offloaded = False
+    return self._is_offloaded
+
+def offload_vram(self):
+    if self.is_offloaded():
+        logger.warning("Executor is already offloaded.")
+        return
+    time_before_offload = time.perf_counter()
+    self.collective_rpc("offload_vram")
+    time_after_offload = time.perf_counter()
+
+    self._is_offloaded = True
+    logger.info(f"Offloading VRAM costs {time_after_offload - time_before_offload:.6f} seconds.")
+
+
+def reload_vram(self) -> bool:
+    if not self.is_offloaded():
+        logger.warning("Executor is not offloaded.")
+        return True
+
+    while True:
+        time_before_reload = time.perf_counter()
+        res = self.collective_rpc("try_reload_vram")
+        time_after_reload = time.perf_counter()
+
+        succ = all(x[0] for x in res)
+        if succ:
+            self._is_offloaded = False
+            logger.info(f"Reloading VRAM costs {time_after_reload - time_before_reload:.6f} seconds.")
+            prev_is_self = all(x[1] for x in res)
+            return prev_is_self
+        else:
+            # some workers not get lock
+            self.collective_rpc("vnpu_unlock_gpu")
+            time.sleep(0.001)
+
+
+def determine_available_memory_vnpu_offload_mode(self) -> int:
+    return self.collective_rpc("determine_available_memory_vnpu_offload_mode")
+
+
+Executor.is_offloaded = is_offloaded
+Executor.offload_vram = offload_vram
+Executor.reload_vram = reload_vram
+Executor.determine_available_memory_vnpu_offload_mode = determine_available_memory_vnpu_offload_mode
diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py
index 85f19eeb..a27d3a58 100644
--- a/vllm_ascend/platform.py
+++ b/vllm_ascend/platform.py
@@ -485,7 +485,11 @@ class NPUPlatform(Platform):
         # Find more details at https://docs.vllm.ai/projects/ascend/en/latest/faqs.html#how-to-handle-the-out-of-memory-issue
         # NOTE: We should not set this environment variable in RL (sleep mode) scenarios.
         # Find more details about how to configure this environment variable at https://www.hiascend.com/document/detail/zh/Pytorch/720/comref/Envvariables/Envir_012.html
-        if model_config and not model_config.enable_sleep_mode:
+        if (
+            model_config
+            and not model_config.enable_sleep_mode
+            and not envs_ascend.VLLM_ASCEND_ENABLE_VNPU
+        ):
             npu_alloc_configs = os.getenv("PYTORCH_NPU_ALLOC_CONF", "expandable_segments:True")
             # This environment variable may have more than one key-value pairs.
             # We should append ",expandable_segments:True" to the current configs.
diff --git a/vllm_ascend/worker/worker.py b/vllm_ascend/worker/worker.py
index 5ec03c55..ad81ba1a 100644
--- a/vllm_ascend/worker/worker.py
+++ b/vllm_ascend/worker/worker.py
@@ -265,7 +265,10 @@ class NPUWorker(WorkerBase):
         # take current memory snapshot
         self.init_snapshot = MemorySnapshot()
         self.requested_memory = self.init_snapshot.total_memory * self.cache_config.gpu_memory_utilization
-        if self.init_snapshot.free_memory < self.requested_memory:
+        if (
+            self.init_snapshot.free_memory < self.requested_memory
+            and not envs_ascend.VLLM_ASCEND_ENABLE_VNPU
+        ):
             GiB = lambda b: round(b / GiB_bytes, 2)
             raise ValueError(
                 f"Free memory on device "
@@ -360,6 +363,28 @@ class NPUWorker(WorkerBase):
 
         return int(self.available_kv_cache_memory_bytes)
 
+    @torch.inference_mode()
+    def determine_available_memory_vnpu_offload_mode(self) -> int:
+        GiB = lambda b: b / GiB_bytes
+        allocator = CaMemAllocator.get_instance()
+        free, total = allocator.get_pool_mem_info()
+        if self.cache_config.gpu_memory_utilization <= 0.9:
+            logger.warning(
+                "GPU memory utilization is set to %.2f. For VNPU mode, it is recommended to set gpu_memory_utilization to a larger value",
+                self.cache_config.gpu_memory_utilization,
+            )
+        available_kv_cache_memory = int(
+            total * self.cache_config.gpu_memory_utilization - (total - free)
+        )
+        available_kv_cache_memory = int(max(available_kv_cache_memory, 0))
+        self.available_kv_cache_memory_bytes = available_kv_cache_memory
+        logger.info_once(
+            "Available KV cache memory: %.2f GiB",
+            GiB(self.available_kv_cache_memory_bytes),
+            scope="local",
+        )
+        return int(self.available_kv_cache_memory_bytes)
+
     def execute_model(
         self,
         scheduler_output: "SchedulerOutput",
@@ -431,6 +456,12 @@ class NPUWorker(WorkerBase):
             allocator = CaMemAllocator.get_instance()
             assert allocator.get_current_usage() == 0, "Sleep mode can only be used for one instance per process."
             context = allocator.use_memory_pool(tag="weights")
+        elif envs_ascend.VLLM_ASCEND_ENABLE_VNPU:
+            allocator = CaMemAllocator.get_instance()
+            assert (
+                allocator.get_current_usage() == 0
+            ), "vNPU mode can only be used for one instance per process."
+            context = allocator.use_memory_pool(tag="weights")
         else:
             from contextlib import nullcontext
 
@@ -438,6 +469,23 @@ class NPUWorker(WorkerBase):
 
         with context, set_current_vllm_config(self.vllm_config):
             self.model_runner.load_model()
+            if envs_ascend.VLLM_ASCEND_ENABLE_VNPU:
+                # save memory to host with lock
+                self.offload_vram()
+                succ, _ = self.try_reload_vram()
+                assert succ, "Failed to reload model weights after offloading."
+
+    def offload_vram(self) -> None:
+        allocator = CaMemAllocator.get_instance()
+        allocator.offload_vram(offload_tags=("weights",))
+
+    def try_reload_vram(self) -> tuple[bool, bool]:
+        allocator = CaMemAllocator.get_instance()
+        return allocator.try_reload_vram(tags=None)
+
+    def vnpu_unlock_gpu(self) -> None:
+        allocator = CaMemAllocator.get_instance()
+        allocator.vnpu_unlock_gpu()
 
     def compile_or_warm_up_model(self) -> float:
         # Note: need to adapt for graph mode.
@@ -517,6 +565,9 @@ class NPUWorker(WorkerBase):
         if self.vllm_config.model_config.enable_sleep_mode:
             allocator = CaMemAllocator.get_instance()
             context = allocator.use_memory_pool(tag="kv_cache")
+        elif envs_ascend.VLLM_ASCEND_ENABLE_VNPU:
+            allocator = CaMemAllocator.get_instance()
+            context = allocator.use_memory_pool(tag="kv_cache")
         else:
             from contextlib import nullcontext