diff --git a/.github/workflows/_e2e_singlecard.yml b/.github/workflows/_e2e_singlecard.yml deleted file mode 100644 index 2facfa4..0000000 --- a/.github/workflows/_e2e_singlecard.yml +++ /dev/null @@ -1,141 +0,0 @@ -name: e2e-test - -on: - workflow_call: - pull_request: - branches: [main] - types: [opened, synchronize, reopened] - push: - branches: [main] - -concurrency: - group: e2e-singlecard - cancel-in-progress: false - -jobs: - e2e: - name: e2e-test-singlecard - runs-on: - - self-hosted - - Linux - - X64 - - steps: - - name: Checkout PR code - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Verify PR workspace - run: | - echo "===== WORKSPACE =====" - pwd - ls -l - echo "===== GIT INFO =====" - git rev-parse HEAD - git log -1 --oneline - git status --porcelain - - - name: Start docker - run: | - bash ci/scripts/docker/start_docker.sh - - - name: Install enviroments - run: | - bash ci/scripts/env/install_env.sh - - - name: Start vLLM server - run: | - bash ci/scripts/server/start_vllm.sh - - - name: Wait for vLLM ready - run: | - bash ci/scripts/server/wait_vllm.sh - - - name: Accuracy testing - run: | - bash ci/scripts/tests/run_accuracy.sh - - - name: Performance testing - run: | - docker exec aiak-e2e-singlecard bash -lc ' - source ci/scripts/common/env.sh - source ci/scripts/common/log.sh - #!/bin/bash - # ========================================== - # 1. Define test dimensions - # (can be easily extended, e.g., add "2048x2048") - # ========================================== - DIMENSIONS=("1024x1024") - - # ========================================== - # 2. Define concurrency generation logic (densification strategy) - # ============x============================== - # Use array concatenation to combine different density ranges - # Syntax: seq [start] [step] [end] - CONCURRENCIES=(1) - - # ========================================== - # 3. Automatically assemble test cases - # ========================================== - TEST_COMBINATIONS=() # Initialize empty array - - # πŸ”„ Modified: outer loop over batch size (concurrency), inner loop over dimensions - for bs in "${CONCURRENCIES[@]}"; do # ← outer loop: concurrency - for dim in "${DIMENSIONS[@]}"; do # ← inner loop: dimensions - case_str="${bs}x${dim}" - TEST_COMBINATIONS+=("$case_str") - done - done - - # ========================================== - # 4. (Optional) Print generated cases for sanity check - # ========================================== - echo "Generated ${#TEST_COMBINATIONS[@]} test cases in total:" - echo "${TEST_COMBINATIONS[@]}" # Uncomment if you want to print all cases - - # Progress counters - TOTAL_TESTS=${#TEST_COMBINATIONS[@]} - CURRENT_TEST=0 - - # Iterate over all test combinations - for COMBINATION in "${TEST_COMBINATIONS[@]}"; do - # Parse parameters from combination string - NUM_PROMPTS=$(echo $COMBINATION | cut -d'x' -f1) - INPUT_LEN=$(echo $COMBINATION | cut -d'x' -f2) - OUTPUT_LEN=$(echo $COMBINATION | cut -d'x' -f3) - - # Update progress - CURRENT_TEST=$((CURRENT_TEST + 1)) - - echo "==========================================================" - echo "Test progress: $CURRENT_TEST / $TOTAL_TESTS" - echo "Current configuration: concurrency=$NUM_PROMPTS, input_len=$INPUT_LEN, output_len=$OUTPUT_LEN" - echo "==========================================================" - - #OUTPUT_FILE="$RESULT_DIR/p800_${NUM_PROMPTS}_${INPUT_LEN}_${OUTPUT_LEN}.log" - - # Run benchmark - python3 -m vllm.entrypoints.cli.main bench serve \ - --host 127.0.0.1 \ - --port ${VLLM_PORT:-8356}\ - --backend vllm \ - --model ${SERVED_MODEL_NAME:-Qwen3-8B} \ - --dataset-name random \ - --num-prompts $NUM_PROMPTS \ - --random-input-len $INPUT_LEN \ - --random-output-len $OUTPUT_LEN \ - --tokenizer ${MODEL_PATH:-/ssd3/models/Qwen3-8B} \ - --ignore-eos - done - ' - - - name: Set permissions - if: always() - run: | - bash ci/scripts/docker/set_permissions.sh - - - name: Cleanup docker - if: always() - run: | - bash ci/scripts/docker/stop_docker.sh diff --git a/.github/workflows/_pylint.yml b/.github/workflows/pylint-check.yml similarity index 98% rename from .github/workflows/_pylint.yml rename to .github/workflows/pylint-check.yml index b8fcfdd..634fdbb 100644 --- a/.github/workflows/_pylint.yml +++ b/.github/workflows/pylint-check.yml @@ -6,7 +6,7 @@ on: branches: [ main ] jobs: - lint: + pylint-check: runs-on: ubuntu-latest steps: diff --git a/.github/workflows/run-e2e.yml b/.github/workflows/run-e2e.yml index 91ffc66..56bb8cf 100644 --- a/.github/workflows/run-e2e.yml +++ b/.github/workflows/run-e2e.yml @@ -1,8 +1,141 @@ -name: run-e2e-test +# name: e2e-test -on: - workflow_dispatch: +# on: +# workflow_call: +# pull_request: +# branches: [main] +# types: [opened, synchronize, reopened] +# push: +# branches: [main] -jobs: - call-e2e: - uses: ./.github/workflows/_e2e_singlecard.yml +# concurrency: +# group: e2e-singlecard +# cancel-in-progress: false + +# jobs: +# e2e: +# name: e2e-test-singlecard +# runs-on: +# - self-hosted +# - Linux +# - X64 + +# steps: +# - name: Checkout PR code +# uses: actions/checkout@v4 +# with: +# fetch-depth: 0 + +# - name: Verify PR workspace +# run: | +# echo "===== WORKSPACE =====" +# pwd +# ls -l +# echo "===== GIT INFO =====" +# git rev-parse HEAD +# git log -1 --oneline +# git status --porcelain + +# - name: Start docker +# run: | +# bash ci/scripts/docker/start_docker.sh + +# - name: Install enviroments +# run: | +# bash ci/scripts/env/install_env.sh + +# - name: Start vLLM server +# run: | +# bash ci/scripts/server/start_vllm.sh + +# - name: Wait for vLLM ready +# run: | +# bash ci/scripts/server/wait_vllm.sh + +# - name: Accuracy testing +# run: | +# bash ci/scripts/tests/run_accuracy.sh + +# - name: Performance testing +# run: | +# docker exec aiak-e2e-singlecard bash -lc ' +# source ci/scripts/common/env.sh +# source ci/scripts/common/log.sh +# #!/bin/bash +# # ========================================== +# # 1. Define test dimensions +# # (can be easily extended, e.g., add "2048x2048") +# # ========================================== +# DIMENSIONS=("1024x1024") + +# # ========================================== +# # 2. Define concurrency generation logic (densification strategy) +# # ============x============================== +# # Use array concatenation to combine different density ranges +# # Syntax: seq [start] [step] [end] +# CONCURRENCIES=(1) + +# # ========================================== +# # 3. Automatically assemble test cases +# # ========================================== +# TEST_COMBINATIONS=() # Initialize empty array + +# # πŸ”„ Modified: outer loop over batch size (concurrency), inner loop over dimensions +# for bs in "${CONCURRENCIES[@]}"; do # ← outer loop: concurrency +# for dim in "${DIMENSIONS[@]}"; do # ← inner loop: dimensions +# case_str="${bs}x${dim}" +# TEST_COMBINATIONS+=("$case_str") +# done +# done + +# # ========================================== +# # 4. (Optional) Print generated cases for sanity check +# # ========================================== +# echo "Generated ${#TEST_COMBINATIONS[@]} test cases in total:" +# echo "${TEST_COMBINATIONS[@]}" # Uncomment if you want to print all cases + +# # Progress counters +# TOTAL_TESTS=${#TEST_COMBINATIONS[@]} +# CURRENT_TEST=0 + +# # Iterate over all test combinations +# for COMBINATION in "${TEST_COMBINATIONS[@]}"; do +# # Parse parameters from combination string +# NUM_PROMPTS=$(echo $COMBINATION | cut -d'x' -f1) +# INPUT_LEN=$(echo $COMBINATION | cut -d'x' -f2) +# OUTPUT_LEN=$(echo $COMBINATION | cut -d'x' -f3) + +# # Update progress +# CURRENT_TEST=$((CURRENT_TEST + 1)) + +# echo "==========================================================" +# echo "Test progress: $CURRENT_TEST / $TOTAL_TESTS" +# echo "Current configuration: concurrency=$NUM_PROMPTS, input_len=$INPUT_LEN, output_len=$OUTPUT_LEN" +# echo "==========================================================" + +# #OUTPUT_FILE="$RESULT_DIR/p800_${NUM_PROMPTS}_${INPUT_LEN}_${OUTPUT_LEN}.log" + +# # Run benchmark +# python3 -m vllm.entrypoints.cli.main bench serve \ +# --host 127.0.0.1 \ +# --port ${VLLM_PORT:-8356}\ +# --backend vllm \ +# --model ${SERVED_MODEL_NAME:-Qwen3-8B} \ +# --dataset-name random \ +# --num-prompts $NUM_PROMPTS \ +# --random-input-len $INPUT_LEN \ +# --random-output-len $OUTPUT_LEN \ +# --tokenizer ${MODEL_PATH:-/ssd3/models/Qwen3-8B} \ +# --ignore-eos +# done +# ' + +# - name: Set permissions +# if: always() +# run: | +# bash ci/scripts/docker/set_permissions.sh + +# - name: Cleanup docker +# if: always() +# run: | +# bash ci/scripts/docker/stop_docker.sh diff --git a/.github/workflows/ut.yml b/.github/workflows/ut.yml index 8ea5c23..147a23a 100644 --- a/.github/workflows/ut.yml +++ b/.github/workflows/ut.yml @@ -1,57 +1,53 @@ -name: Unit Test +# name: Unit Test -on: - pull_request: - branches: - - main +# on: +# pull_request: +# branches: +# - main -jobs: - test-kunlun: - runs-on: - labels: - - self-hosted - - Linux - - X64 - - test-1 # Actions Runner Label +# jobs: +# test-kunlun: +# runs-on: +# labels: +# - self-hosted +# - Linux +# - X64 +# - test-1 # Actions Runner Label - steps: - - name: Checkout Code - uses: actions/checkout@v4 +# steps: +# - name: Checkout Code +# uses: actions/checkout@v4 - - name: Install vLLM-Kunlun Dependencies - run: | - pip install -r requirements.txt +# - name: Install vLLM-Kunlun Dependencies +# run: | +# pip install -r requirements.txt - python setup.py build - python setup.py develop +# python setup.py build +# python setup.py develop - # Install the KL3-customized build of PyTorch - wget -O xpytorch-cp310-torch251-ubuntu2004-x64.run https://baidu-kunlun-public.su.bcebos.com/v1/baidu-kunlun-share/1130/xpytorch-cp310-torch251-ubuntu2004-x64.run?authorization=bce-auth-v1%2FALTAKypXxBzU7gg4Mk4K4c6OYR%2F2025-12-02T05%3A01%3A27Z%2F-1%2Fhost%2Ff3cf499234f82303891aed2bcb0628918e379a21e841a3fac6bd94afef491ff7 - bash xpytorch-cp310-torch251-ubuntu2004-x64.run +# # Install the KL3-customized build of PyTorch +# wget -O xpytorch-cp310-torch251-ubuntu2004-x64.run https://baidu-kunlun-public.su.bcebos.com/v1/baidu-kunlun-share/1130/xpytorch-cp310-torch251-ubuntu2004-x64.run?authorization=bce-auth-v1%2FALTAKypXxBzU7gg4Mk4K4c6OYR%2F2025-12-02T05%3A01%3A27Z%2F-1%2Fhost%2Ff3cf499234f82303891aed2bcb0628918e379a21e841a3fac6bd94afef491ff7 +# bash xpytorch-cp310-torch251-ubuntu2004-x64.run - # Install custom ops - pip install "https://baidu-kunlun-public.su.bcebos.com/v1/baidu-kunlun-share/1130/xtorch_ops-0.1.2209%2B6752ad20-cp310-cp310-linux_x86_64.whl?authorization=bce-auth-v1%2FALTAKypXxBzU7gg4Mk4K4c6OYR%2F2025-12-05T06%3A18%3A00Z%2F-1%2Fhost%2F14936c2b7e7c557c1400e4c467c79f7a9217374a7aa4a046711ac4d948f460cd" +# # Install custom ops +# pip install "https://baidu-kunlun-public.su.bcebos.com/v1/baidu-kunlun-share/1130/xtorch_ops-0.1.2209%2B6752ad20-cp310-cp310-linux_x86_64.whl?authorization=bce-auth-v1%2FALTAKypXxBzU7gg4Mk4K4c6OYR%2F2025-12-05T06%3A18%3A00Z%2F-1%2Fhost%2F14936c2b7e7c557c1400e4c467c79f7a9217374a7aa4a046711ac4d948f460cd" - # Install the KLX3 custom Triton build - pip install "https://cce-ai-models.bj.bcebos.com/v1/vllm-kunlun-0.11.0/triton-3.0.0%2Bb2cde523-cp310-cp310-linux_x86_64.whl" +# # Install the KLX3 custom Triton build +# pip install "https://cce-ai-models.bj.bcebos.com/v1/vllm-kunlun-0.11.0/triton-3.0.0%2Bb2cde523-cp310-cp310-linux_x86_64.whl" - # Install the AIAK custom ops library - pip install "https://cce-ai-models.bj.bcebos.com/XSpeedGate-whl/release_merge/20251219_152418/xspeedgate_ops-0.0.0-cp310-cp310-linux_x86_64.whl" +# # Install the AIAK custom ops library +# pip install "https://cce-ai-models.bj.bcebos.com/XSpeedGate-whl/release_merge/20251219_152418/xspeedgate_ops-0.0.0-cp310-cp310-linux_x86_64.whl" - - name: Install vLLM - run: | - pip install vllm==0.11.0 --no-build-isolation --no-deps --no-deps --index-url https://pip.baidu-int.com/simple/ +# - name: Install vLLM +# run: | +# pip install vllm==0.11.0 --no-build-isolation --no-deps --no-deps --index-url https://pip.baidu-int.com/simple/ - - name: Install Test Dependencies - run: | - pip install pytest - - - name: Run Unit Test - run: | - echo "Running full suite..." - export XPU_VISIBLE_DEVICES=1 - pytest \ - -vs \ - --cov=vllm_kunlun \ - --cov-report=term-missing \ - -p no:warnings tests/ut \ No newline at end of file +# - name: Run Unit Test +# run: | +# echo "Running full suite..." +# export XPU_VISIBLE_DEVICES=1 +# pytest \ +# -vs \ +# --cov=vllm_kunlun \ +# --cov-report=term-missing \ +# -p no:warnings tests/ut \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 4f6b9eb..d38b1e5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,8 +29,10 @@ uvloop==0.21.0 prometheus-fastapi-instrumentator==7.1.0 transformers==4.57.0 -# εŸΊη‘€ζž„ε»ΊδΎθ΅– +# basic hatchling>=1.25 build>=1.0.3 pytest +pytest-cov mock +pre-commit \ No newline at end of file