update ci workflow (#159)
Signed-off-by: tanjunchen <tanjunchen20@gmail.com>
This commit is contained in:
141
.github/workflows/_e2e_singlecard.yml
vendored
141
.github/workflows/_e2e_singlecard.yml
vendored
@@ -1,141 +0,0 @@
|
|||||||
name: e2e-test
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_call:
|
|
||||||
pull_request:
|
|
||||||
branches: [main]
|
|
||||||
types: [opened, synchronize, reopened]
|
|
||||||
push:
|
|
||||||
branches: [main]
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: e2e-singlecard
|
|
||||||
cancel-in-progress: false
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
e2e:
|
|
||||||
name: e2e-test-singlecard
|
|
||||||
runs-on:
|
|
||||||
- self-hosted
|
|
||||||
- Linux
|
|
||||||
- X64
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout PR code
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
fetch-depth: 0
|
|
||||||
|
|
||||||
- name: Verify PR workspace
|
|
||||||
run: |
|
|
||||||
echo "===== WORKSPACE ====="
|
|
||||||
pwd
|
|
||||||
ls -l
|
|
||||||
echo "===== GIT INFO ====="
|
|
||||||
git rev-parse HEAD
|
|
||||||
git log -1 --oneline
|
|
||||||
git status --porcelain
|
|
||||||
|
|
||||||
- name: Start docker
|
|
||||||
run: |
|
|
||||||
bash ci/scripts/docker/start_docker.sh
|
|
||||||
|
|
||||||
- name: Install enviroments
|
|
||||||
run: |
|
|
||||||
bash ci/scripts/env/install_env.sh
|
|
||||||
|
|
||||||
- name: Start vLLM server
|
|
||||||
run: |
|
|
||||||
bash ci/scripts/server/start_vllm.sh
|
|
||||||
|
|
||||||
- name: Wait for vLLM ready
|
|
||||||
run: |
|
|
||||||
bash ci/scripts/server/wait_vllm.sh
|
|
||||||
|
|
||||||
- name: Accuracy testing
|
|
||||||
run: |
|
|
||||||
bash ci/scripts/tests/run_accuracy.sh
|
|
||||||
|
|
||||||
- name: Performance testing
|
|
||||||
run: |
|
|
||||||
docker exec aiak-e2e-singlecard bash -lc '
|
|
||||||
source ci/scripts/common/env.sh
|
|
||||||
source ci/scripts/common/log.sh
|
|
||||||
#!/bin/bash
|
|
||||||
# ==========================================
|
|
||||||
# 1. Define test dimensions
|
|
||||||
# (can be easily extended, e.g., add "2048x2048")
|
|
||||||
# ==========================================
|
|
||||||
DIMENSIONS=("1024x1024")
|
|
||||||
|
|
||||||
# ==========================================
|
|
||||||
# 2. Define concurrency generation logic (densification strategy)
|
|
||||||
# ============x==============================
|
|
||||||
# Use array concatenation to combine different density ranges
|
|
||||||
# Syntax: seq [start] [step] [end]
|
|
||||||
CONCURRENCIES=(1)
|
|
||||||
|
|
||||||
# ==========================================
|
|
||||||
# 3. Automatically assemble test cases
|
|
||||||
# ==========================================
|
|
||||||
TEST_COMBINATIONS=() # Initialize empty array
|
|
||||||
|
|
||||||
# 🔄 Modified: outer loop over batch size (concurrency), inner loop over dimensions
|
|
||||||
for bs in "${CONCURRENCIES[@]}"; do # ← outer loop: concurrency
|
|
||||||
for dim in "${DIMENSIONS[@]}"; do # ← inner loop: dimensions
|
|
||||||
case_str="${bs}x${dim}"
|
|
||||||
TEST_COMBINATIONS+=("$case_str")
|
|
||||||
done
|
|
||||||
done
|
|
||||||
|
|
||||||
# ==========================================
|
|
||||||
# 4. (Optional) Print generated cases for sanity check
|
|
||||||
# ==========================================
|
|
||||||
echo "Generated ${#TEST_COMBINATIONS[@]} test cases in total:"
|
|
||||||
echo "${TEST_COMBINATIONS[@]}" # Uncomment if you want to print all cases
|
|
||||||
|
|
||||||
# Progress counters
|
|
||||||
TOTAL_TESTS=${#TEST_COMBINATIONS[@]}
|
|
||||||
CURRENT_TEST=0
|
|
||||||
|
|
||||||
# Iterate over all test combinations
|
|
||||||
for COMBINATION in "${TEST_COMBINATIONS[@]}"; do
|
|
||||||
# Parse parameters from combination string
|
|
||||||
NUM_PROMPTS=$(echo $COMBINATION | cut -d'x' -f1)
|
|
||||||
INPUT_LEN=$(echo $COMBINATION | cut -d'x' -f2)
|
|
||||||
OUTPUT_LEN=$(echo $COMBINATION | cut -d'x' -f3)
|
|
||||||
|
|
||||||
# Update progress
|
|
||||||
CURRENT_TEST=$((CURRENT_TEST + 1))
|
|
||||||
|
|
||||||
echo "=========================================================="
|
|
||||||
echo "Test progress: $CURRENT_TEST / $TOTAL_TESTS"
|
|
||||||
echo "Current configuration: concurrency=$NUM_PROMPTS, input_len=$INPUT_LEN, output_len=$OUTPUT_LEN"
|
|
||||||
echo "=========================================================="
|
|
||||||
|
|
||||||
#OUTPUT_FILE="$RESULT_DIR/p800_${NUM_PROMPTS}_${INPUT_LEN}_${OUTPUT_LEN}.log"
|
|
||||||
|
|
||||||
# Run benchmark
|
|
||||||
python3 -m vllm.entrypoints.cli.main bench serve \
|
|
||||||
--host 127.0.0.1 \
|
|
||||||
--port ${VLLM_PORT:-8356}\
|
|
||||||
--backend vllm \
|
|
||||||
--model ${SERVED_MODEL_NAME:-Qwen3-8B} \
|
|
||||||
--dataset-name random \
|
|
||||||
--num-prompts $NUM_PROMPTS \
|
|
||||||
--random-input-len $INPUT_LEN \
|
|
||||||
--random-output-len $OUTPUT_LEN \
|
|
||||||
--tokenizer ${MODEL_PATH:-/ssd3/models/Qwen3-8B} \
|
|
||||||
--ignore-eos
|
|
||||||
done
|
|
||||||
'
|
|
||||||
|
|
||||||
- name: Set permissions
|
|
||||||
if: always()
|
|
||||||
run: |
|
|
||||||
bash ci/scripts/docker/set_permissions.sh
|
|
||||||
|
|
||||||
- name: Cleanup docker
|
|
||||||
if: always()
|
|
||||||
run: |
|
|
||||||
bash ci/scripts/docker/stop_docker.sh
|
|
||||||
@@ -6,7 +6,7 @@ on:
|
|||||||
branches: [ main ]
|
branches: [ main ]
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
lint:
|
pylint-check:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
145
.github/workflows/run-e2e.yml
vendored
145
.github/workflows/run-e2e.yml
vendored
@@ -1,8 +1,141 @@
|
|||||||
name: run-e2e-test
|
# name: e2e-test
|
||||||
|
|
||||||
on:
|
# on:
|
||||||
workflow_dispatch:
|
# workflow_call:
|
||||||
|
# pull_request:
|
||||||
|
# branches: [main]
|
||||||
|
# types: [opened, synchronize, reopened]
|
||||||
|
# push:
|
||||||
|
# branches: [main]
|
||||||
|
|
||||||
jobs:
|
# concurrency:
|
||||||
call-e2e:
|
# group: e2e-singlecard
|
||||||
uses: ./.github/workflows/_e2e_singlecard.yml
|
# cancel-in-progress: false
|
||||||
|
|
||||||
|
# jobs:
|
||||||
|
# e2e:
|
||||||
|
# name: e2e-test-singlecard
|
||||||
|
# runs-on:
|
||||||
|
# - self-hosted
|
||||||
|
# - Linux
|
||||||
|
# - X64
|
||||||
|
|
||||||
|
# steps:
|
||||||
|
# - name: Checkout PR code
|
||||||
|
# uses: actions/checkout@v4
|
||||||
|
# with:
|
||||||
|
# fetch-depth: 0
|
||||||
|
|
||||||
|
# - name: Verify PR workspace
|
||||||
|
# run: |
|
||||||
|
# echo "===== WORKSPACE ====="
|
||||||
|
# pwd
|
||||||
|
# ls -l
|
||||||
|
# echo "===== GIT INFO ====="
|
||||||
|
# git rev-parse HEAD
|
||||||
|
# git log -1 --oneline
|
||||||
|
# git status --porcelain
|
||||||
|
|
||||||
|
# - name: Start docker
|
||||||
|
# run: |
|
||||||
|
# bash ci/scripts/docker/start_docker.sh
|
||||||
|
|
||||||
|
# - name: Install enviroments
|
||||||
|
# run: |
|
||||||
|
# bash ci/scripts/env/install_env.sh
|
||||||
|
|
||||||
|
# - name: Start vLLM server
|
||||||
|
# run: |
|
||||||
|
# bash ci/scripts/server/start_vllm.sh
|
||||||
|
|
||||||
|
# - name: Wait for vLLM ready
|
||||||
|
# run: |
|
||||||
|
# bash ci/scripts/server/wait_vllm.sh
|
||||||
|
|
||||||
|
# - name: Accuracy testing
|
||||||
|
# run: |
|
||||||
|
# bash ci/scripts/tests/run_accuracy.sh
|
||||||
|
|
||||||
|
# - name: Performance testing
|
||||||
|
# run: |
|
||||||
|
# docker exec aiak-e2e-singlecard bash -lc '
|
||||||
|
# source ci/scripts/common/env.sh
|
||||||
|
# source ci/scripts/common/log.sh
|
||||||
|
# #!/bin/bash
|
||||||
|
# # ==========================================
|
||||||
|
# # 1. Define test dimensions
|
||||||
|
# # (can be easily extended, e.g., add "2048x2048")
|
||||||
|
# # ==========================================
|
||||||
|
# DIMENSIONS=("1024x1024")
|
||||||
|
|
||||||
|
# # ==========================================
|
||||||
|
# # 2. Define concurrency generation logic (densification strategy)
|
||||||
|
# # ============x==============================
|
||||||
|
# # Use array concatenation to combine different density ranges
|
||||||
|
# # Syntax: seq [start] [step] [end]
|
||||||
|
# CONCURRENCIES=(1)
|
||||||
|
|
||||||
|
# # ==========================================
|
||||||
|
# # 3. Automatically assemble test cases
|
||||||
|
# # ==========================================
|
||||||
|
# TEST_COMBINATIONS=() # Initialize empty array
|
||||||
|
|
||||||
|
# # 🔄 Modified: outer loop over batch size (concurrency), inner loop over dimensions
|
||||||
|
# for bs in "${CONCURRENCIES[@]}"; do # ← outer loop: concurrency
|
||||||
|
# for dim in "${DIMENSIONS[@]}"; do # ← inner loop: dimensions
|
||||||
|
# case_str="${bs}x${dim}"
|
||||||
|
# TEST_COMBINATIONS+=("$case_str")
|
||||||
|
# done
|
||||||
|
# done
|
||||||
|
|
||||||
|
# # ==========================================
|
||||||
|
# # 4. (Optional) Print generated cases for sanity check
|
||||||
|
# # ==========================================
|
||||||
|
# echo "Generated ${#TEST_COMBINATIONS[@]} test cases in total:"
|
||||||
|
# echo "${TEST_COMBINATIONS[@]}" # Uncomment if you want to print all cases
|
||||||
|
|
||||||
|
# # Progress counters
|
||||||
|
# TOTAL_TESTS=${#TEST_COMBINATIONS[@]}
|
||||||
|
# CURRENT_TEST=0
|
||||||
|
|
||||||
|
# # Iterate over all test combinations
|
||||||
|
# for COMBINATION in "${TEST_COMBINATIONS[@]}"; do
|
||||||
|
# # Parse parameters from combination string
|
||||||
|
# NUM_PROMPTS=$(echo $COMBINATION | cut -d'x' -f1)
|
||||||
|
# INPUT_LEN=$(echo $COMBINATION | cut -d'x' -f2)
|
||||||
|
# OUTPUT_LEN=$(echo $COMBINATION | cut -d'x' -f3)
|
||||||
|
|
||||||
|
# # Update progress
|
||||||
|
# CURRENT_TEST=$((CURRENT_TEST + 1))
|
||||||
|
|
||||||
|
# echo "=========================================================="
|
||||||
|
# echo "Test progress: $CURRENT_TEST / $TOTAL_TESTS"
|
||||||
|
# echo "Current configuration: concurrency=$NUM_PROMPTS, input_len=$INPUT_LEN, output_len=$OUTPUT_LEN"
|
||||||
|
# echo "=========================================================="
|
||||||
|
|
||||||
|
# #OUTPUT_FILE="$RESULT_DIR/p800_${NUM_PROMPTS}_${INPUT_LEN}_${OUTPUT_LEN}.log"
|
||||||
|
|
||||||
|
# # Run benchmark
|
||||||
|
# python3 -m vllm.entrypoints.cli.main bench serve \
|
||||||
|
# --host 127.0.0.1 \
|
||||||
|
# --port ${VLLM_PORT:-8356}\
|
||||||
|
# --backend vllm \
|
||||||
|
# --model ${SERVED_MODEL_NAME:-Qwen3-8B} \
|
||||||
|
# --dataset-name random \
|
||||||
|
# --num-prompts $NUM_PROMPTS \
|
||||||
|
# --random-input-len $INPUT_LEN \
|
||||||
|
# --random-output-len $OUTPUT_LEN \
|
||||||
|
# --tokenizer ${MODEL_PATH:-/ssd3/models/Qwen3-8B} \
|
||||||
|
# --ignore-eos
|
||||||
|
# done
|
||||||
|
# '
|
||||||
|
|
||||||
|
# - name: Set permissions
|
||||||
|
# if: always()
|
||||||
|
# run: |
|
||||||
|
# bash ci/scripts/docker/set_permissions.sh
|
||||||
|
|
||||||
|
# - name: Cleanup docker
|
||||||
|
# if: always()
|
||||||
|
# run: |
|
||||||
|
# bash ci/scripts/docker/stop_docker.sh
|
||||||
|
|||||||
88
.github/workflows/ut.yml
vendored
88
.github/workflows/ut.yml
vendored
@@ -1,57 +1,53 @@
|
|||||||
name: Unit Test
|
# name: Unit Test
|
||||||
|
|
||||||
on:
|
# on:
|
||||||
pull_request:
|
# pull_request:
|
||||||
branches:
|
# branches:
|
||||||
- main
|
# - main
|
||||||
|
|
||||||
jobs:
|
# jobs:
|
||||||
test-kunlun:
|
# test-kunlun:
|
||||||
runs-on:
|
# runs-on:
|
||||||
labels:
|
# labels:
|
||||||
- self-hosted
|
# - self-hosted
|
||||||
- Linux
|
# - Linux
|
||||||
- X64
|
# - X64
|
||||||
- test-1 # Actions Runner Label
|
# - test-1 # Actions Runner Label
|
||||||
|
|
||||||
steps:
|
# steps:
|
||||||
- name: Checkout Code
|
# - name: Checkout Code
|
||||||
uses: actions/checkout@v4
|
# uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Install vLLM-Kunlun Dependencies
|
# - name: Install vLLM-Kunlun Dependencies
|
||||||
run: |
|
# run: |
|
||||||
pip install -r requirements.txt
|
# pip install -r requirements.txt
|
||||||
|
|
||||||
python setup.py build
|
# python setup.py build
|
||||||
python setup.py develop
|
# python setup.py develop
|
||||||
|
|
||||||
# Install the KL3-customized build of PyTorch
|
# # Install the KL3-customized build of PyTorch
|
||||||
wget -O xpytorch-cp310-torch251-ubuntu2004-x64.run https://baidu-kunlun-public.su.bcebos.com/v1/baidu-kunlun-share/1130/xpytorch-cp310-torch251-ubuntu2004-x64.run?authorization=bce-auth-v1%2FALTAKypXxBzU7gg4Mk4K4c6OYR%2F2025-12-02T05%3A01%3A27Z%2F-1%2Fhost%2Ff3cf499234f82303891aed2bcb0628918e379a21e841a3fac6bd94afef491ff7
|
# wget -O xpytorch-cp310-torch251-ubuntu2004-x64.run https://baidu-kunlun-public.su.bcebos.com/v1/baidu-kunlun-share/1130/xpytorch-cp310-torch251-ubuntu2004-x64.run?authorization=bce-auth-v1%2FALTAKypXxBzU7gg4Mk4K4c6OYR%2F2025-12-02T05%3A01%3A27Z%2F-1%2Fhost%2Ff3cf499234f82303891aed2bcb0628918e379a21e841a3fac6bd94afef491ff7
|
||||||
bash xpytorch-cp310-torch251-ubuntu2004-x64.run
|
# bash xpytorch-cp310-torch251-ubuntu2004-x64.run
|
||||||
|
|
||||||
# Install custom ops
|
# # Install custom ops
|
||||||
pip install "https://baidu-kunlun-public.su.bcebos.com/v1/baidu-kunlun-share/1130/xtorch_ops-0.1.2209%2B6752ad20-cp310-cp310-linux_x86_64.whl?authorization=bce-auth-v1%2FALTAKypXxBzU7gg4Mk4K4c6OYR%2F2025-12-05T06%3A18%3A00Z%2F-1%2Fhost%2F14936c2b7e7c557c1400e4c467c79f7a9217374a7aa4a046711ac4d948f460cd"
|
# pip install "https://baidu-kunlun-public.su.bcebos.com/v1/baidu-kunlun-share/1130/xtorch_ops-0.1.2209%2B6752ad20-cp310-cp310-linux_x86_64.whl?authorization=bce-auth-v1%2FALTAKypXxBzU7gg4Mk4K4c6OYR%2F2025-12-05T06%3A18%3A00Z%2F-1%2Fhost%2F14936c2b7e7c557c1400e4c467c79f7a9217374a7aa4a046711ac4d948f460cd"
|
||||||
|
|
||||||
# Install the KLX3 custom Triton build
|
# # Install the KLX3 custom Triton build
|
||||||
pip install "https://cce-ai-models.bj.bcebos.com/v1/vllm-kunlun-0.11.0/triton-3.0.0%2Bb2cde523-cp310-cp310-linux_x86_64.whl"
|
# pip install "https://cce-ai-models.bj.bcebos.com/v1/vllm-kunlun-0.11.0/triton-3.0.0%2Bb2cde523-cp310-cp310-linux_x86_64.whl"
|
||||||
|
|
||||||
# Install the AIAK custom ops library
|
# # Install the AIAK custom ops library
|
||||||
pip install "https://cce-ai-models.bj.bcebos.com/XSpeedGate-whl/release_merge/20251219_152418/xspeedgate_ops-0.0.0-cp310-cp310-linux_x86_64.whl"
|
# pip install "https://cce-ai-models.bj.bcebos.com/XSpeedGate-whl/release_merge/20251219_152418/xspeedgate_ops-0.0.0-cp310-cp310-linux_x86_64.whl"
|
||||||
|
|
||||||
- name: Install vLLM
|
# - name: Install vLLM
|
||||||
run: |
|
# run: |
|
||||||
pip install vllm==0.11.0 --no-build-isolation --no-deps --no-deps --index-url https://pip.baidu-int.com/simple/
|
# pip install vllm==0.11.0 --no-build-isolation --no-deps --no-deps --index-url https://pip.baidu-int.com/simple/
|
||||||
|
|
||||||
- name: Install Test Dependencies
|
# - name: Run Unit Test
|
||||||
run: |
|
# run: |
|
||||||
pip install pytest
|
# echo "Running full suite..."
|
||||||
|
# export XPU_VISIBLE_DEVICES=1
|
||||||
- name: Run Unit Test
|
# pytest \
|
||||||
run: |
|
# -vs \
|
||||||
echo "Running full suite..."
|
# --cov=vllm_kunlun \
|
||||||
export XPU_VISIBLE_DEVICES=1
|
# --cov-report=term-missing \
|
||||||
pytest \
|
# -p no:warnings tests/ut
|
||||||
-vs \
|
|
||||||
--cov=vllm_kunlun \
|
|
||||||
--cov-report=term-missing \
|
|
||||||
-p no:warnings tests/ut
|
|
||||||
@@ -29,8 +29,10 @@ uvloop==0.21.0
|
|||||||
prometheus-fastapi-instrumentator==7.1.0
|
prometheus-fastapi-instrumentator==7.1.0
|
||||||
transformers==4.57.0
|
transformers==4.57.0
|
||||||
|
|
||||||
# 基础构建依赖
|
# basic
|
||||||
hatchling>=1.25
|
hatchling>=1.25
|
||||||
build>=1.0.3
|
build>=1.0.3
|
||||||
pytest
|
pytest
|
||||||
|
pytest-cov
|
||||||
mock
|
mock
|
||||||
|
pre-commit
|
||||||
Reference in New Issue
Block a user