diff --git a/.github/workflows/_e2e_singlecard.yml b/.github/workflows/_e2e_singlecard.yml
deleted file mode 100644
index 2facfa4..0000000
--- a/.github/workflows/_e2e_singlecard.yml
+++ /dev/null
@@ -1,141 +0,0 @@
-name: e2e-test
-
-on:
-  workflow_call:
-  pull_request:
-    branches: [main]
-    types: [opened, synchronize, reopened]
-  push:
-    branches: [main]
-
-concurrency:
-  group: e2e-singlecard
-  cancel-in-progress: false
-
-jobs:
-  e2e:
-    name: e2e-test-singlecard
-    runs-on:
-      - self-hosted
-      - Linux
-      - X64
-
-    steps:
-      - name: Checkout PR code
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Verify PR workspace
-        run: |
-          echo "===== WORKSPACE ====="
-          pwd
-          ls -l
-          echo "===== GIT INFO ====="
-          git rev-parse HEAD
-          git log -1 --oneline
-          git status --porcelain
-
-      - name: Start docker
-        run: |
-          bash ci/scripts/docker/start_docker.sh
-
-      - name: Install enviroments
-        run: |
-          bash ci/scripts/env/install_env.sh
-
-      - name: Start vLLM server
-        run: |
-          bash ci/scripts/server/start_vllm.sh
-
-      - name: Wait for vLLM ready
-        run: |
-          bash ci/scripts/server/wait_vllm.sh
-
-      - name: Accuracy testing
-        run: |
-          bash ci/scripts/tests/run_accuracy.sh
-
-      - name: Performance testing
-        run: |
-          docker exec aiak-e2e-singlecard bash -lc '
-            source ci/scripts/common/env.sh
-            source ci/scripts/common/log.sh
-            #!/bin/bash
-            # ==========================================
-            # 1. Define test dimensions
-            #    (can be easily extended, e.g., add "2048x2048")
-            # ==========================================
-            DIMENSIONS=("1024x1024")
-
-            # ==========================================
-            # 2. Define concurrency generation logic (densification strategy)
-            # ============x==============================
-            # Use array concatenation to combine different density ranges
-            # Syntax: seq [start] [step] [end]
-            CONCURRENCIES=(1)
-
-            # ==========================================
-            # 3. Automatically assemble test cases
-            # ==========================================
-            TEST_COMBINATIONS=() # Initialize empty array
-
-            # 🔄 Modified: outer loop over batch size (concurrency), inner loop over dimensions
-            for bs in "${CONCURRENCIES[@]}"; do    # ← outer loop: concurrency
-                for dim in "${DIMENSIONS[@]}"; do  # ← inner loop: dimensions
-                    case_str="${bs}x${dim}"
-                    TEST_COMBINATIONS+=("$case_str")
-                done
-            done
-
-            # ==========================================
-            # 4. (Optional) Print generated cases for sanity check
-            # ==========================================
-            echo "Generated ${#TEST_COMBINATIONS[@]} test cases in total:"
-            echo "${TEST_COMBINATIONS[@]}" # Uncomment if you want to print all cases
-
-            # Progress counters
-            TOTAL_TESTS=${#TEST_COMBINATIONS[@]}
-            CURRENT_TEST=0
-
-            # Iterate over all test combinations
-            for COMBINATION in "${TEST_COMBINATIONS[@]}"; do
-                # Parse parameters from combination string
-                NUM_PROMPTS=$(echo $COMBINATION | cut -d'x' -f1)
-                INPUT_LEN=$(echo $COMBINATION | cut -d'x' -f2)
-                OUTPUT_LEN=$(echo $COMBINATION | cut -d'x' -f3)
-
-                # Update progress
-                CURRENT_TEST=$((CURRENT_TEST + 1))
-
-                echo "=========================================================="
-                echo "Test progress: $CURRENT_TEST / $TOTAL_TESTS"
-                echo "Current configuration: concurrency=$NUM_PROMPTS, input_len=$INPUT_LEN, output_len=$OUTPUT_LEN"
-                echo "=========================================================="
-
-                #OUTPUT_FILE="$RESULT_DIR/p800_${NUM_PROMPTS}_${INPUT_LEN}_${OUTPUT_LEN}.log"
-
-                # Run benchmark
-                python3 -m vllm.entrypoints.cli.main bench serve \
-                    --host 127.0.0.1 \
-                    --port ${VLLM_PORT:-8356}\
-                    --backend vllm \
-                    --model ${SERVED_MODEL_NAME:-Qwen3-8B} \
-                    --dataset-name random \
-                    --num-prompts $NUM_PROMPTS \
-                    --random-input-len $INPUT_LEN \
-                    --random-output-len $OUTPUT_LEN \
-                    --tokenizer ${MODEL_PATH:-/ssd3/models/Qwen3-8B} \
-                    --ignore-eos
-            done
-          '
-      
-      - name: Set permissions
-        if: always()
-        run: |
-          bash ci/scripts/docker/set_permissions.sh
-
-      - name: Cleanup docker
-        if: always()
-        run: |
-          bash ci/scripts/docker/stop_docker.sh
diff --git a/.github/workflows/_pylint.yml b/.github/workflows/pylint-check.yml
similarity index 98%
rename from .github/workflows/_pylint.yml
rename to .github/workflows/pylint-check.yml
index b8fcfdd..634fdbb 100644
--- a/.github/workflows/_pylint.yml
+++ b/.github/workflows/pylint-check.yml
@@ -6,7 +6,7 @@ on:
     branches: [ main ]
 
 jobs:
-  lint:
+  pylint-check:
     runs-on: ubuntu-latest
 
     steps:
diff --git a/.github/workflows/run-e2e.yml b/.github/workflows/run-e2e.yml
index 91ffc66..56bb8cf 100644
--- a/.github/workflows/run-e2e.yml
+++ b/.github/workflows/run-e2e.yml
@@ -1,8 +1,141 @@
-name: run-e2e-test
+# name: e2e-test
 
-on:
-  workflow_dispatch:   
+# on:
+#   workflow_call:
+#   pull_request:
+#     branches: [main]
+#     types: [opened, synchronize, reopened]
+#   push:
+#     branches: [main]
 
-jobs:
-  call-e2e:
-    uses: ./.github/workflows/_e2e_singlecard.yml
+# concurrency:
+#   group: e2e-singlecard
+#   cancel-in-progress: false
+
+# jobs:
+#   e2e:
+#     name: e2e-test-singlecard
+#     runs-on:
+#       - self-hosted
+#       - Linux
+#       - X64
+
+#     steps:
+#       - name: Checkout PR code
+#         uses: actions/checkout@v4
+#         with:
+#           fetch-depth: 0
+
+#       - name: Verify PR workspace
+#         run: |
+#           echo "===== WORKSPACE ====="
+#           pwd
+#           ls -l
+#           echo "===== GIT INFO ====="
+#           git rev-parse HEAD
+#           git log -1 --oneline
+#           git status --porcelain
+
+#       - name: Start docker
+#         run: |
+#           bash ci/scripts/docker/start_docker.sh
+
+#       - name: Install enviroments
+#         run: |
+#           bash ci/scripts/env/install_env.sh
+
+#       - name: Start vLLM server
+#         run: |
+#           bash ci/scripts/server/start_vllm.sh
+
+#       - name: Wait for vLLM ready
+#         run: |
+#           bash ci/scripts/server/wait_vllm.sh
+
+#       - name: Accuracy testing
+#         run: |
+#           bash ci/scripts/tests/run_accuracy.sh
+
+#       - name: Performance testing
+#         run: |
+#           docker exec aiak-e2e-singlecard bash -lc '
+#             source ci/scripts/common/env.sh
+#             source ci/scripts/common/log.sh
+#             #!/bin/bash
+#             # ==========================================
+#             # 1. Define test dimensions
+#             #    (can be easily extended, e.g., add "2048x2048")
+#             # ==========================================
+#             DIMENSIONS=("1024x1024")
+
+#             # ==========================================
+#             # 2. Define concurrency generation logic (densification strategy)
+#             # ============x==============================
+#             # Use array concatenation to combine different density ranges
+#             # Syntax: seq [start] [step] [end]
+#             CONCURRENCIES=(1)
+
+#             # ==========================================
+#             # 3. Automatically assemble test cases
+#             # ==========================================
+#             TEST_COMBINATIONS=() # Initialize empty array
+
+#             # 🔄 Modified: outer loop over batch size (concurrency), inner loop over dimensions
+#             for bs in "${CONCURRENCIES[@]}"; do    # ← outer loop: concurrency
+#                 for dim in "${DIMENSIONS[@]}"; do  # ← inner loop: dimensions
+#                     case_str="${bs}x${dim}"
+#                     TEST_COMBINATIONS+=("$case_str")
+#                 done
+#             done
+
+#             # ==========================================
+#             # 4. (Optional) Print generated cases for sanity check
+#             # ==========================================
+#             echo "Generated ${#TEST_COMBINATIONS[@]} test cases in total:"
+#             echo "${TEST_COMBINATIONS[@]}" # Uncomment if you want to print all cases
+
+#             # Progress counters
+#             TOTAL_TESTS=${#TEST_COMBINATIONS[@]}
+#             CURRENT_TEST=0
+
+#             # Iterate over all test combinations
+#             for COMBINATION in "${TEST_COMBINATIONS[@]}"; do
+#                 # Parse parameters from combination string
+#                 NUM_PROMPTS=$(echo $COMBINATION | cut -d'x' -f1)
+#                 INPUT_LEN=$(echo $COMBINATION | cut -d'x' -f2)
+#                 OUTPUT_LEN=$(echo $COMBINATION | cut -d'x' -f3)
+
+#                 # Update progress
+#                 CURRENT_TEST=$((CURRENT_TEST + 1))
+
+#                 echo "=========================================================="
+#                 echo "Test progress: $CURRENT_TEST / $TOTAL_TESTS"
+#                 echo "Current configuration: concurrency=$NUM_PROMPTS, input_len=$INPUT_LEN, output_len=$OUTPUT_LEN"
+#                 echo "=========================================================="
+
+#                 #OUTPUT_FILE="$RESULT_DIR/p800_${NUM_PROMPTS}_${INPUT_LEN}_${OUTPUT_LEN}.log"
+
+#                 # Run benchmark
+#                 python3 -m vllm.entrypoints.cli.main bench serve \
+#                     --host 127.0.0.1 \
+#                     --port ${VLLM_PORT:-8356}\
+#                     --backend vllm \
+#                     --model ${SERVED_MODEL_NAME:-Qwen3-8B} \
+#                     --dataset-name random \
+#                     --num-prompts $NUM_PROMPTS \
+#                     --random-input-len $INPUT_LEN \
+#                     --random-output-len $OUTPUT_LEN \
+#                     --tokenizer ${MODEL_PATH:-/ssd3/models/Qwen3-8B} \
+#                     --ignore-eos
+#             done
+#           '
+      
+#       - name: Set permissions
+#         if: always()
+#         run: |
+#           bash ci/scripts/docker/set_permissions.sh
+
+#       - name: Cleanup docker
+#         if: always()
+#         run: |
+#           bash ci/scripts/docker/stop_docker.sh
diff --git a/.github/workflows/ut.yml b/.github/workflows/ut.yml
index 8ea5c23..147a23a 100644
--- a/.github/workflows/ut.yml
+++ b/.github/workflows/ut.yml
@@ -1,57 +1,53 @@
-name: Unit Test
+# name: Unit Test
 
-on:
-  pull_request:
-    branches:
-      - main
+# on:
+#   pull_request:
+#     branches:
+#       - main
 
-jobs:
-  test-kunlun:
-    runs-on: 
-      labels:
-        - self-hosted
-        - Linux
-        - X64
-        - test-1 # Actions Runner Label
+# jobs:
+#   test-kunlun:
+#     runs-on: 
+#       labels:
+#         - self-hosted
+#         - Linux
+#         - X64
+#         - test-1 # Actions Runner Label
         
-    steps:
-      - name: Checkout Code
-        uses: actions/checkout@v4
+#     steps:
+#       - name: Checkout Code
+#         uses: actions/checkout@v4
 
-      - name: Install vLLM-Kunlun Dependencies
-        run: |
-          pip install -r requirements.txt
+#       - name: Install vLLM-Kunlun Dependencies
+#         run: |
+#           pip install -r requirements.txt
           
-          python setup.py build
-          python setup.py develop
+#           python setup.py build
+#           python setup.py develop
 
-          # Install the KL3-customized build of PyTorch
-          wget -O xpytorch-cp310-torch251-ubuntu2004-x64.run https://baidu-kunlun-public.su.bcebos.com/v1/baidu-kunlun-share/1130/xpytorch-cp310-torch251-ubuntu2004-x64.run?authorization=bce-auth-v1%2FALTAKypXxBzU7gg4Mk4K4c6OYR%2F2025-12-02T05%3A01%3A27Z%2F-1%2Fhost%2Ff3cf499234f82303891aed2bcb0628918e379a21e841a3fac6bd94afef491ff7
-          bash xpytorch-cp310-torch251-ubuntu2004-x64.run
+#           # Install the KL3-customized build of PyTorch
+#           wget -O xpytorch-cp310-torch251-ubuntu2004-x64.run https://baidu-kunlun-public.su.bcebos.com/v1/baidu-kunlun-share/1130/xpytorch-cp310-torch251-ubuntu2004-x64.run?authorization=bce-auth-v1%2FALTAKypXxBzU7gg4Mk4K4c6OYR%2F2025-12-02T05%3A01%3A27Z%2F-1%2Fhost%2Ff3cf499234f82303891aed2bcb0628918e379a21e841a3fac6bd94afef491ff7
+#           bash xpytorch-cp310-torch251-ubuntu2004-x64.run
 
-          # Install custom ops
-          pip install "https://baidu-kunlun-public.su.bcebos.com/v1/baidu-kunlun-share/1130/xtorch_ops-0.1.2209%2B6752ad20-cp310-cp310-linux_x86_64.whl?authorization=bce-auth-v1%2FALTAKypXxBzU7gg4Mk4K4c6OYR%2F2025-12-05T06%3A18%3A00Z%2F-1%2Fhost%2F14936c2b7e7c557c1400e4c467c79f7a9217374a7aa4a046711ac4d948f460cd"
+#           # Install custom ops
+#           pip install "https://baidu-kunlun-public.su.bcebos.com/v1/baidu-kunlun-share/1130/xtorch_ops-0.1.2209%2B6752ad20-cp310-cp310-linux_x86_64.whl?authorization=bce-auth-v1%2FALTAKypXxBzU7gg4Mk4K4c6OYR%2F2025-12-05T06%3A18%3A00Z%2F-1%2Fhost%2F14936c2b7e7c557c1400e4c467c79f7a9217374a7aa4a046711ac4d948f460cd"
 
-          # Install the KLX3 custom Triton build
-          pip install "https://cce-ai-models.bj.bcebos.com/v1/vllm-kunlun-0.11.0/triton-3.0.0%2Bb2cde523-cp310-cp310-linux_x86_64.whl"
+#           # Install the KLX3 custom Triton build
+#           pip install "https://cce-ai-models.bj.bcebos.com/v1/vllm-kunlun-0.11.0/triton-3.0.0%2Bb2cde523-cp310-cp310-linux_x86_64.whl"
 
-          # Install the AIAK custom ops library
-          pip install "https://cce-ai-models.bj.bcebos.com/XSpeedGate-whl/release_merge/20251219_152418/xspeedgate_ops-0.0.0-cp310-cp310-linux_x86_64.whl"
+#           # Install the AIAK custom ops library
+#           pip install "https://cce-ai-models.bj.bcebos.com/XSpeedGate-whl/release_merge/20251219_152418/xspeedgate_ops-0.0.0-cp310-cp310-linux_x86_64.whl"
 
-      - name: Install vLLM
-        run: |
-          pip install vllm==0.11.0 --no-build-isolation --no-deps --no-deps --index-url https://pip.baidu-int.com/simple/
+#       - name: Install vLLM
+#         run: |
+#           pip install vllm==0.11.0 --no-build-isolation --no-deps --no-deps --index-url https://pip.baidu-int.com/simple/
       
-      - name: Install Test Dependencies
-        run: |
-          pip install pytest
-
-      - name: Run Unit Test
-        run: |
-          echo "Running full suite..."
-          export XPU_VISIBLE_DEVICES=1
-          pytest \
-          -vs \
-          --cov=vllm_kunlun \
-          --cov-report=term-missing \
-          -p no:warnings tests/ut
\ No newline at end of file
+#       - name: Run Unit Test
+#         run: |
+#           echo "Running full suite..."
+#           export XPU_VISIBLE_DEVICES=1
+#           pytest \
+#           -vs \
+#           --cov=vllm_kunlun \
+#           --cov-report=term-missing \
+#           -p no:warnings tests/ut
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 4f6b9eb..d38b1e5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -29,8 +29,10 @@ uvloop==0.21.0
 prometheus-fastapi-instrumentator==7.1.0
 transformers==4.57.0
 
-# 基础构建依赖
+# basic 
 hatchling>=1.25
 build>=1.0.3
 pytest
+pytest-cov
 mock
+pre-commit
\ No newline at end of file