Signed-off-by: Jintao Zhang <zhangjintao9020@gmail.com> Co-authored-by: Waël Boukhobza <wawa_wael@live.fr>
602 lines
24 KiB
YAML
602 lines
24 KiB
YAML
name: PR Test (PD Router)
|
|
|
|
on:
|
|
push:
|
|
branches: [ main ]
|
|
paths:
|
|
- 'python/sglang/srt/disaggregation/**'
|
|
- 'scripts/ci/ci_start_disaggregation_servers.sh'
|
|
- 'sgl-router/**'
|
|
pull_request:
|
|
branches: [ main ]
|
|
paths:
|
|
- 'python/sglang/srt/disaggregation/**'
|
|
- 'scripts/ci/ci_start_disaggregation_servers.sh'
|
|
- 'sgl-router/**'
|
|
workflow_dispatch:
|
|
|
|
concurrency:
|
|
group: test-disaggregation-${{ github.ref }}
|
|
cancel-in-progress: true
|
|
|
|
permissions:
|
|
contents: read
|
|
pull-requests: write
|
|
issues: write
|
|
|
|
jobs:
|
|
test-disaggregation:
|
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
|
github.event.pull_request.draft == false
|
|
runs-on: [h200]
|
|
timeout-minutes: 45
|
|
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
with:
|
|
fetch-depth: 10
|
|
|
|
- name: Setup Python
|
|
uses: actions/setup-python@v4
|
|
with:
|
|
python-version: '3.12'
|
|
|
|
- name: Setup Rust
|
|
run: |
|
|
bash scripts/ci/ci_install_rust.sh
|
|
|
|
- name: Cache Rust dependencies
|
|
uses: actions/cache@v4
|
|
with:
|
|
path: |
|
|
~/.cargo/bin/
|
|
~/.cargo/registry/index/
|
|
~/.cargo/registry/cache/
|
|
~/.cargo/git/db/
|
|
sgl-router/target/
|
|
key: ${{ runner.os }}-cargo-${{ hashFiles('sgl-router/Cargo.lock') }}
|
|
restore-keys: |
|
|
${{ runner.os }}-cargo-
|
|
|
|
- name: Cache pip dependencies
|
|
uses: actions/cache@v4
|
|
with:
|
|
path: ~/.cache/pip
|
|
key: ${{ runner.os }}-pip-${{ hashFiles('python/pyproject.toml') }}
|
|
restore-keys: |
|
|
${{ runner.os }}-pip-
|
|
|
|
- name: Validate environment
|
|
run: |
|
|
echo "=== System Validation ==="
|
|
nvidia-smi
|
|
echo "GPU count: $(nvidia-smi -L | wc -l)"
|
|
if [ $(nvidia-smi -L | wc -l) -lt 8 ]; then
|
|
echo "Error: This test requires at least 8 GPUs"
|
|
exit 1
|
|
fi
|
|
|
|
echo "=== GPU Process Check ==="
|
|
# Fail fast if any GPU compute processes are active
|
|
if command -v nvidia-smi >/dev/null 2>&1; then
|
|
# Try to query compute apps first (preferred and concise)
|
|
gpu_procs=$(nvidia-smi --query-compute-apps=pid,process_name,gpu_uuid --format=csv,noheader 2>/dev/null | sed '/^$/d' || true)
|
|
|
|
# Fallback to detailed PIDS report if the query returns nothing but there might still be processes
|
|
if [ -z "$gpu_procs" ]; then
|
|
gpu_procs=$(nvidia-smi -q -d PIDS 2>/dev/null | awk '/Processes/{flag=1;next}/^$/{flag=0}flag' | sed '/^\s*Processes:/d' | sed '/^\s*$/d' || true)
|
|
fi
|
|
|
|
if [ -n "$gpu_procs" ]; then
|
|
echo "Error: Found active GPU processes using the device(s):"
|
|
echo "$gpu_procs"
|
|
exit 1
|
|
else
|
|
echo "No active GPU compute processes detected."
|
|
fi
|
|
else
|
|
echo "Error: nvidia-smi not found; skipping GPU process check."
|
|
exit 1
|
|
fi
|
|
|
|
echo "=== RDMA Validation ==="
|
|
if ! command -v ibv_devices >/dev/null 2>&1; then
|
|
echo "Error: InfiniBand tools not found"
|
|
exit 1
|
|
fi
|
|
|
|
# Check for active IB devices
|
|
found_active_device=false
|
|
for device in mlx5_{0..11}; do
|
|
if ibv_devinfo $device >/dev/null 2>&1; then
|
|
state=$(ibv_devinfo $device | grep "state:" | head -1 | awk '{print $2}')
|
|
if [[ "$state" == "PORT_ACTIVE" ]]; then
|
|
echo "✓ Found active device: $device"
|
|
found_active_device=true
|
|
break
|
|
fi
|
|
fi
|
|
done
|
|
|
|
if [ "$found_active_device" = false ]; then
|
|
echo "Error: No active IB devices found"
|
|
echo "Available devices:"
|
|
ibv_devices || true
|
|
exit 1
|
|
fi
|
|
|
|
echo "=== Model Validation ==="
|
|
if [ ! -d "/raid/models/meta-llama/Llama-3.1-8B-Instruct" ]; then
|
|
echo "Error: Model not found"
|
|
ls -la /raid/models/ || echo "No models directory"
|
|
exit 1
|
|
fi
|
|
echo "✓ Model found"
|
|
|
|
- name: Install SGLang dependencies
|
|
run: |
|
|
echo "Installing SGLang with all extras..."
|
|
python3 -m pip --no-cache-dir install --upgrade pip
|
|
python3 -m pip --no-cache-dir install torch==2.8.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/test/cu126
|
|
python3 -m pip --no-cache-dir install -e "python[all]" --break-system-packages
|
|
python3 -m pip --no-cache-dir install mooncake-transfer-engine==0.3.5
|
|
python3 -m pip --no-cache-dir install --user --force-reinstall genai-bench==0.0.2
|
|
python3 -m pip --no-cache-dir install sgl-kernel==0.3.9.post2
|
|
|
|
- name: Build and install sgl-router
|
|
run: |
|
|
source "$HOME/.cargo/env"
|
|
echo "Building sgl-router..."
|
|
cd sgl-router
|
|
cargo build && python3 -m build && pip install --force-reinstall dist/*.whl
|
|
|
|
- name: Start disaggregation servers
|
|
id: start_servers
|
|
run: |
|
|
echo "Starting disaggregation servers..."
|
|
READY_FILE=".disagg_ready"
|
|
rm -f "$READY_FILE"
|
|
DISAGG_READY_FILE="$READY_FILE" bash scripts/ci/ci_start_disaggregation_servers.sh &
|
|
SERVER_PID=$!
|
|
echo "server_pid=$SERVER_PID" >> $GITHUB_OUTPUT
|
|
|
|
# Wait until script signals readiness (8/8 healthy) or timeout
|
|
TIMEOUT=300
|
|
ELAPSED=0
|
|
while [ $ELAPSED -lt $TIMEOUT ]; do
|
|
if [ -f "$READY_FILE" ]; then
|
|
echo "✓ All disaggregation servers are healthy (signal detected)"
|
|
break
|
|
fi
|
|
if ! ps -p $SERVER_PID > /dev/null; then
|
|
echo "Error: server bootstrap script exited prematurely"
|
|
exit 1
|
|
fi
|
|
sleep 5
|
|
ELAPSED=$((ELAPSED + 5))
|
|
done
|
|
if [ $ELAPSED -ge $TIMEOUT ]; then
|
|
echo "❌ Timeout waiting for disaggregation servers to be healthy"
|
|
exit 1
|
|
fi
|
|
|
|
echo "✓ Servers started (PID: $SERVER_PID)"
|
|
|
|
|
|
- name: Test all policies sequentially
|
|
timeout-minutes: 30
|
|
run: |
|
|
POLICIES=("random" "round_robin" "cache_aware" "power_of_two")
|
|
BASE_URL="http://127.0.0.9:8000"
|
|
|
|
# Free commonly used ports for router and metrics
|
|
echo "Freeing ports 29000 (metrics) and 8000 (API), if in use..."
|
|
fuser -k -n tcp 29000 2>/dev/null || true
|
|
fuser -k -n tcp 8000 2>/dev/null || true
|
|
sleep 1
|
|
|
|
for policy in "${POLICIES[@]}"; do
|
|
echo ""
|
|
echo "=================================================="
|
|
echo "Testing policy: $policy"
|
|
echo "=================================================="
|
|
|
|
# Free ports before starting router
|
|
fuser -k -n tcp 29000 2>/dev/null || true
|
|
fuser -k -n tcp 8000 2>/dev/null || true
|
|
|
|
# Start router with the current policy
|
|
echo "Starting router with policy: $policy..."
|
|
RUST_BACKTRACE=1 python3 -m sglang_router.launch_router \
|
|
--pd-disaggregation \
|
|
--policy "$policy" \
|
|
--prefill http://127.0.0.1:30001 9001 \
|
|
--prefill http://127.0.0.2:30002 9002 \
|
|
--prefill http://127.0.0.3:30003 9003 \
|
|
--prefill http://127.0.0.4:30004 9004 \
|
|
--decode http://127.0.0.5:30005 \
|
|
--decode http://127.0.0.6:30006 \
|
|
--decode http://127.0.0.7:30007 \
|
|
--decode http://127.0.0.8:30008 \
|
|
--host 127.0.0.9 \
|
|
--port 8000 &
|
|
ROUTER_PID=$!
|
|
|
|
# Wait for router to become healthy
|
|
echo "Waiting for router to become healthy..."
|
|
TIMEOUT=60
|
|
ELAPSED=0
|
|
while [ $ELAPSED -lt $TIMEOUT ]; do
|
|
if curl --connect-timeout 5 --silent http://127.0.0.9:8000 > /dev/null 2>&1; then
|
|
echo "✓ Router is reachable"
|
|
break
|
|
fi
|
|
if ! ps -p $ROUTER_PID > /dev/null; then
|
|
echo "Error: Router process died"
|
|
exit 1
|
|
fi
|
|
sleep 5
|
|
ELAPSED=$((ELAPSED + 5))
|
|
done
|
|
|
|
if [ $ELAPSED -ge $TIMEOUT ]; then
|
|
echo "Error: Router health check timeout"
|
|
kill $ROUTER_PID 2>/dev/null || true
|
|
exit 1
|
|
fi
|
|
|
|
# Test API functionality
|
|
echo "Testing API completions for $policy..."
|
|
response=$(curl -s -X POST "$BASE_URL/v1/chat/completions" \
|
|
-H "Content-Type: application/json" \
|
|
-H "Authorization: Bearer test-token" \
|
|
-d '{
|
|
"model": "/raid/models/meta-llama/Llama-3.1-8B-Instruct",
|
|
"messages": [
|
|
{"role": "user", "content": "Write a Python function to calculate fibonacci numbers recursively"}
|
|
],
|
|
"stream": false,
|
|
"max_tokens": 100
|
|
}')
|
|
|
|
if echo "$response" | jq -e '.choices[0].message.content' > /dev/null 2>&1; then
|
|
echo "✓ API test passed for $policy"
|
|
else
|
|
echo "✗ API test failed for $policy: $response"
|
|
kill $ROUTER_PID 2>/dev/null || true
|
|
exit 1
|
|
fi
|
|
|
|
# Test streaming
|
|
echo "Testing streaming API for $policy..."
|
|
stream_response=$(timeout 30 curl -s -X POST "$BASE_URL/v1/chat/completions" \
|
|
-H "Content-Type: application/json" \
|
|
-H "Authorization: Bearer test-token" \
|
|
-d '{
|
|
"model": "/raid/models/meta-llama/Llama-3.1-8B-Instruct",
|
|
"messages": [
|
|
{"role": "user", "content": "Count from 1 to 5"}
|
|
],
|
|
"stream": true,
|
|
"max_tokens": 50
|
|
}')
|
|
|
|
if echo "$stream_response" | grep -q "data:"; then
|
|
echo "✓ Streaming API test passed for $policy"
|
|
else
|
|
echo "✗ Streaming API test failed for $policy"
|
|
kill $ROUTER_PID 2>/dev/null || true
|
|
exit 1
|
|
fi
|
|
|
|
# Run genai-bench benchmark
|
|
echo "Running genai-bench for $policy..."
|
|
genai-bench benchmark \
|
|
--api-backend openai \
|
|
--api-base "http://127.0.0.9:8000" \
|
|
--api-key "dummy-token" \
|
|
--api-model-name "/raid/models/meta-llama/Llama-3.1-8B-Instruct" \
|
|
--model-tokenizer /raid/models/meta-llama/Llama-3.1-8B-Instruct \
|
|
--task text-to-text \
|
|
--num-concurrency 64 \
|
|
--traffic-scenario "D(8000,2000)" \
|
|
--max-requests-per-run 640 \
|
|
--max-time-per-run 2 \
|
|
--experiment-folder-name "benchmark_${policy}" \
|
|
--experiment-base-dir "."
|
|
|
|
# Find the actual experiment folder
|
|
actual_folder=$(find . -maxdepth 1 -name "benchmark_${policy}" -type d | head -1)
|
|
|
|
if [ -n "$actual_folder" ]; then
|
|
# Extract metrics from the Excel summary or JSON files
|
|
summary_file="$actual_folder"/*_summary.xlsx
|
|
json_files=$(find "$actual_folder" -name "*.json" | grep -v experiment_metadata)
|
|
|
|
echo "Genai-bench results saved in: $actual_folder"
|
|
|
|
# Extract mean values and validate performance thresholds
|
|
echo "📊 Extracting performance metrics for $policy..."
|
|
|
|
# Find JSON files excluding experiment metadata
|
|
json_files=$(find "$actual_folder" -name "*.json" | grep -v experiment_metadata)
|
|
|
|
if [ -n "$json_files" ]; then
|
|
# Extract metrics using jq and validate against loose thresholds
|
|
for json_file in $json_files; do
|
|
echo "Processing: $(basename "$json_file")"
|
|
|
|
# Extract mean values for performance validation
|
|
ttft_mean=$(jq -r '.aggregated_metrics.stats.ttft.mean' "$json_file")
|
|
e2e_latency_mean=$(jq -r '.aggregated_metrics.stats.e2e_latency.mean' "$json_file")
|
|
input_throughput_mean=$(jq -r '.aggregated_metrics.stats.input_throughput.mean' "$json_file")
|
|
output_throughput_mean=$(jq -r '.aggregated_metrics.stats.output_throughput.mean' "$json_file")
|
|
|
|
echo " TTFT mean: ${ttft_mean}s"
|
|
echo " E2E Latency mean: ${e2e_latency_mean}s"
|
|
echo " Input Throughput mean: ${input_throughput_mean} tokens/s"
|
|
echo " Output Throughput mean: ${output_throughput_mean} tokens/s"
|
|
|
|
# Set mean thresholds (allowing for reasonable variance)
|
|
# These can be adjusted based on your performance requirements
|
|
ttft_threshold=4.7 # Max 4.7 seconds for mean TTFT
|
|
e2e_latency_threshold=35.0 # Max 35.0 seconds for mean E2E latency
|
|
input_throughput_threshold=12000 # Min 12000 tokens/s for mean input throughput
|
|
output_throughput_threshold=68 # Min 68 tokens/s for mean output throughput
|
|
|
|
|
|
# Validate mean thresholds
|
|
validation_passed=true
|
|
|
|
if (( $(echo "$ttft_mean > $ttft_threshold" | bc -l) )); then
|
|
echo "❌ TTFT validation failed: $ttft_mean > $ttft_threshold"
|
|
validation_passed=false
|
|
fi
|
|
|
|
if (( $(echo "$e2e_latency_mean > $e2e_latency_threshold" | bc -l) )); then
|
|
echo "❌ E2E Latency validation failed: $e2e_latency_mean > $e2e_latency_threshold"
|
|
validation_passed=false
|
|
fi
|
|
|
|
if (( $(echo "$input_throughput_mean < $input_throughput_threshold" | bc -l) )); then
|
|
echo "❌ Input Throughput validation failed: $input_throughput_mean < $input_throughput_threshold"
|
|
validation_passed=false
|
|
fi
|
|
|
|
if (( $(echo "$output_throughput_mean < $output_throughput_threshold" | bc -l) )); then
|
|
echo "❌ Output Throughput validation failed: $output_throughput_mean < $output_throughput_threshold"
|
|
validation_passed=false
|
|
fi
|
|
|
|
if [ "$validation_passed" = true ]; then
|
|
echo "✅ Performance validation passed for $policy"
|
|
else
|
|
echo "❌ Performance validation failed for $policy"
|
|
kill $ROUTER_PID 2>/dev/null || true
|
|
exit 1
|
|
fi
|
|
done
|
|
|
|
echo "✓ Genai-bench completed successfully for $policy"
|
|
echo "📊 Detailed metrics and plots available in: $actual_folder"
|
|
else
|
|
echo "✗ Benchmark failed for $policy: No JSON results found"
|
|
kill $ROUTER_PID 2>/dev/null || true
|
|
exit 1
|
|
fi
|
|
else
|
|
echo "✗ Benchmark failed for $policy: Experiment folder not found"
|
|
kill $ROUTER_PID 2>/dev/null || true
|
|
exit 1
|
|
fi
|
|
|
|
# Stop router before testing next policy
|
|
echo "Stopping router for $policy..."
|
|
# First try graceful shutdown
|
|
kill $ROUTER_PID 2>/dev/null || true
|
|
|
|
# Wait up to 5 seconds for graceful shutdown
|
|
for i in {1..5}; do
|
|
if ! ps -p $ROUTER_PID > /dev/null 2>&1; then
|
|
echo "Router stopped gracefully"
|
|
break
|
|
fi
|
|
sleep 1
|
|
done
|
|
|
|
# Force kill if still running
|
|
if ps -p $ROUTER_PID > /dev/null 2>&1; then
|
|
echo "Force killing router..."
|
|
kill -9 $ROUTER_PID 2>/dev/null || true
|
|
fi
|
|
|
|
# Short delay to ensure port is released
|
|
sleep 2
|
|
|
|
echo "✓ Completed testing for $policy"
|
|
done
|
|
|
|
echo ""
|
|
echo "✅ All policies tested successfully!"
|
|
|
|
|
|
- name: Upload benchmark results
|
|
if: success()
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: genai-bench-results-all-policies
|
|
path: benchmark_**/
|
|
|
|
- name: Cleanup servers
|
|
if: always()
|
|
run: |
|
|
if [ -n "${{ steps.start_servers.outputs.server_pid }}" ]; then
|
|
pkill -P ${{ steps.start_servers.outputs.server_pid }} || true
|
|
kill ${{ steps.start_servers.outputs.server_pid }} || true
|
|
fi
|
|
pkill -f "sglang.launch_server" || true
|
|
sleep 5
|
|
remaining=$(ps aux | grep -c "sglang.launch_server" || echo "0")
|
|
echo "Cleanup completed. Remaining processes: $remaining"
|
|
|
|
summarize-benchmarks:
|
|
needs: test-disaggregation
|
|
runs-on: ubuntu-latest
|
|
if: success()
|
|
|
|
steps:
|
|
- name: Install jq
|
|
run: sudo apt-get update && sudo apt-get install -y jq bc
|
|
|
|
- name: Download benchmark results
|
|
uses: actions/download-artifact@v4
|
|
with:
|
|
name: genai-bench-results-all-policies
|
|
|
|
- name: List downloaded contents
|
|
run: |
|
|
echo "Contents after download:"
|
|
ls -la
|
|
find . -name "benchmark_*" -type d
|
|
echo "JSON files found:"
|
|
find . -name "*.json" | head -10
|
|
|
|
- name: Create benchmark summary
|
|
run: |
|
|
echo "=== DEBUG: Creating benchmark summary ==="
|
|
echo "Available benchmark directories:"
|
|
find . -name "benchmark_*" -type d
|
|
echo "=========================================="
|
|
|
|
echo "## PD Router Genai-Bench Results Summary" >> $GITHUB_STEP_SUMMARY
|
|
echo "" >> $GITHUB_STEP_SUMMARY
|
|
echo "🚀 **Benchmarked with genai-bench for comprehensive LLM serving performance evaluation**" >> $GITHUB_STEP_SUMMARY
|
|
echo "" >> $GITHUB_STEP_SUMMARY
|
|
echo "| Policy | Status | TTFT (s) | E2E Latency (s) | Input Throughput (tok/s) | Output Throughput (tok/s) |" >> $GITHUB_STEP_SUMMARY
|
|
echo "|--------|--------|----------|-----------------|--------------------------|---------------------------|" >> $GITHUB_STEP_SUMMARY
|
|
|
|
# First, complete the table with all policies
|
|
for policy in random round_robin cache_aware power_of_two; do
|
|
# Find genai-bench result folders for this policy (handle zip extraction structure)
|
|
result_folder=$(find . -maxdepth 2 -name "benchmark_${policy}" -type d | head -1)
|
|
if [ -z "$result_folder" ]; then
|
|
# Try alternative patterns in case of different extraction structure
|
|
result_folder=$(find . -maxdepth 3 -path "*benchmark_${policy}*" -type d | head -1)
|
|
fi
|
|
|
|
echo "DEBUG: Policy ${policy} -> Found folder: ${result_folder:-'NOT FOUND'}"
|
|
|
|
if [ -n "$result_folder" ] && [ -d "$result_folder" ]; then
|
|
# Find JSON file with metrics
|
|
json_file=$(find "$result_folder" -name "*.json" -not -name "experiment_metadata.json" | head -1)
|
|
|
|
if [ -n "$json_file" ] && [ -f "$json_file" ]; then
|
|
# Extract performance metrics
|
|
ttft_mean=$(jq -r '.aggregated_metrics.stats.ttft.mean // "N/A"' "$json_file" 2>/dev/null || echo "N/A")
|
|
e2e_latency_mean=$(jq -r '.aggregated_metrics.stats.e2e_latency.mean // "N/A"' "$json_file" 2>/dev/null || echo "N/A")
|
|
input_throughput_mean=$(jq -r '.aggregated_metrics.stats.input_throughput.mean // "N/A"' "$json_file" 2>/dev/null || echo "N/A")
|
|
output_throughput_mean=$(jq -r '.aggregated_metrics.stats.output_throughput.mean // "N/A"' "$json_file" 2>/dev/null || echo "N/A")
|
|
|
|
# Format numbers for display (2 decimal places)
|
|
if [ "$ttft_mean" != "N/A" ] && [ "$ttft_mean" != "null" ]; then
|
|
ttft_display=$(printf "%.2f" "$ttft_mean" 2>/dev/null || echo "$ttft_mean")
|
|
else
|
|
ttft_display="N/A"
|
|
fi
|
|
|
|
if [ "$e2e_latency_mean" != "N/A" ] && [ "$e2e_latency_mean" != "null" ]; then
|
|
e2e_display=$(printf "%.2f" "$e2e_latency_mean" 2>/dev/null || echo "$e2e_latency_mean")
|
|
else
|
|
e2e_display="N/A"
|
|
fi
|
|
|
|
if [ "$input_throughput_mean" != "N/A" ] && [ "$input_throughput_mean" != "null" ]; then
|
|
input_display=$(printf "%.0f" "$input_throughput_mean" 2>/dev/null || echo "$input_throughput_mean")
|
|
else
|
|
input_display="N/A"
|
|
fi
|
|
|
|
if [ "$output_throughput_mean" != "N/A" ] && [ "$output_throughput_mean" != "null" ]; then
|
|
output_display=$(printf "%.0f" "$output_throughput_mean" 2>/dev/null || echo "$output_throughput_mean")
|
|
else
|
|
output_display="N/A"
|
|
fi
|
|
|
|
echo "| ${policy} | ✅ Success | $ttft_display | $e2e_display | $input_display | $output_display |" >> $GITHUB_STEP_SUMMARY
|
|
else
|
|
echo "| ${policy} | ❌ No Data | N/A | N/A | N/A | N/A |" >> $GITHUB_STEP_SUMMARY
|
|
fi
|
|
else
|
|
echo "| ${policy} | ❌ Failed | N/A | N/A | N/A | N/A |" >> $GITHUB_STEP_SUMMARY
|
|
fi
|
|
done
|
|
|
|
# Add performance validation summary
|
|
echo "" >> $GITHUB_STEP_SUMMARY
|
|
echo "## 📊 Performance Validation" >> $GITHUB_STEP_SUMMARY
|
|
echo "" >> $GITHUB_STEP_SUMMARY
|
|
echo "**Thresholds:** TTFT ≤ 2.0s | E2E Latency ≤ 8.0s | Input Throughput ≥ 10,000 tok/s | Output Throughput ≥ 100 tok/s" >> $GITHUB_STEP_SUMMARY
|
|
echo "" >> $GITHUB_STEP_SUMMARY
|
|
|
|
validation_summary=""
|
|
for policy in random round_robin cache_aware power_of_two; do
|
|
# Use same robust path finding as above
|
|
result_folder=$(find . -maxdepth 2 -name "benchmark_${policy}" -type d | head -1)
|
|
if [ -z "$result_folder" ]; then
|
|
result_folder=$(find . -maxdepth 3 -path "*benchmark_${policy}*" -type d | head -1)
|
|
fi
|
|
|
|
if [ -n "$result_folder" ] && [ -d "$result_folder" ]; then
|
|
json_file=$(find "$result_folder" -name "*.json" -not -name "experiment_metadata.json" | head -1)
|
|
if [ -n "$json_file" ] && [ -f "$json_file" ]; then
|
|
# Extract metrics for validation
|
|
ttft=$(jq -r '.aggregated_metrics.stats.ttft.mean // "N/A"' "$json_file" 2>/dev/null || echo "N/A")
|
|
e2e_latency=$(jq -r '.aggregated_metrics.stats.e2e_latency.mean // "N/A"' "$json_file" 2>/dev/null || echo "N/A")
|
|
input_throughput=$(jq -r '.aggregated_metrics.stats.input_throughput.mean // "N/A"' "$json_file" 2>/dev/null || echo "N/A")
|
|
output_throughput=$(jq -r '.aggregated_metrics.stats.output_throughput.mean // "N/A"' "$json_file" 2>/dev/null || echo "N/A")
|
|
|
|
# Check thresholds (using same values as in main workflow)
|
|
validation_status="✅"
|
|
if [ "$ttft" != "N/A" ] && [ "$ttft" != "null" ]; then
|
|
if (( $(echo "$ttft > 2.0" | bc -l 2>/dev/null || echo "0") )); then
|
|
validation_status="❌"
|
|
fi
|
|
fi
|
|
if [ "$e2e_latency" != "N/A" ] && [ "$e2e_latency" != "null" ]; then
|
|
if (( $(echo "$e2e_latency > 24.0" | bc -l 2>/dev/null || echo "0") )); then
|
|
validation_status="❌"
|
|
fi
|
|
fi
|
|
if [ "$input_throughput" != "N/A" ] && [ "$input_throughput" != "null" ]; then
|
|
if (( $(echo "$input_throughput < 10000" | bc -l 2>/dev/null || echo "0") )); then
|
|
validation_status="❌"
|
|
fi
|
|
fi
|
|
if [ "$output_throughput" != "N/A" ] && [ "$output_throughput" != "null" ]; then
|
|
if (( $(echo "$output_throughput < 90" | bc -l 2>/dev/null || echo "0") )); then
|
|
validation_status="❌"
|
|
fi
|
|
fi
|
|
|
|
validation_summary="${validation_summary}- **${policy}**: $validation_status\n"
|
|
else
|
|
validation_summary="${validation_summary}- **${policy}**: ❌ No data\n"
|
|
fi
|
|
else
|
|
validation_summary="${validation_summary}- **${policy}**: ❌ Failed\n"
|
|
fi
|
|
done
|
|
|
|
echo -e "$validation_summary" >> $GITHUB_STEP_SUMMARY
|
|
|
|
echo "" >> $GITHUB_STEP_SUMMARY
|
|
echo "## 📊 Genai-Bench Features Used" >> $GITHUB_STEP_SUMMARY
|
|
echo "- **Token-level Performance**: TTFT, TPOT, End-to-End latency" >> $GITHUB_STEP_SUMMARY
|
|
echo "- **Throughput Analysis**: Input/Output/Total token throughput" >> $GITHUB_STEP_SUMMARY
|
|
echo "- **Statistical Analysis**: Percentiles, mean, std dev for all metrics" >> $GITHUB_STEP_SUMMARY
|
|
echo "- **Visual Reports**: Automated plots and Excel summaries" >> $GITHUB_STEP_SUMMARY
|
|
echo "- **SGLang Backend**: Native integration with SGLang serving" >> $GITHUB_STEP_SUMMARY
|
|
echo "" >> $GITHUB_STEP_SUMMARY
|
|
echo "✅ All policies tested successfully with genai-bench!" >> $GITHUB_STEP_SUMMARY
|