[CI/Build] Add CI end-to-end (E2E) tests (#139)

* [CI/Build] Add CI end-to-end (E2E) tests Signed-off-by: Chenchao Hu <huchenchao@example.com>
2026-01-28 19:30:55 +08:00
parent c37ee19e3d
commit 7c2966a98c
12 changed files with 573 additions and 0 deletions
--- a/.github/workflows/_e2e_singlecard.yml
+++ b/.github/workflows/_e2e_singlecard.yml
@@ -0,0 +1,141 @@
+name: e2e-test
+
+on:
+  workflow_call:
+  pull_request:
+    branches: [main]
+    types: [opened, synchronize, reopened]
+  push:
+    branches: [main]
+
+concurrency:
+  group: e2e-singlecard
+  cancel-in-progress: false
+
+jobs:
+  e2e:
+    name: e2e-test-singlecard
+    runs-on:
+      - self-hosted
+      - Linux
+      - X64
+
+    steps:
+      - name: Checkout PR code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Verify PR workspace
+        run: |
+          echo "===== WORKSPACE ====="
+          pwd
+          ls -l
+          echo "===== GIT INFO ====="
+          git rev-parse HEAD
+          git log -1 --oneline
+          git status --porcelain
+
+      - name: Start docker
+        run: |
+          bash ci/scripts/docker/start_docker.sh
+
+      - name: Install enviroments
+        run: |
+          bash ci/scripts/env/install_env.sh
+
+      - name: Start vLLM server
+        run: |
+          bash ci/scripts/server/start_vllm.sh
+
+      - name: Wait for vLLM ready
+        run: |
+          bash ci/scripts/server/wait_vllm.sh
+
+      - name: Accuracy testing
+        run: |
+          bash ci/scripts/tests/run_accuracy.sh
+
+      - name: Performance testing
+        run: |
+          docker exec aiak-e2e-singlecard bash -lc '
+            source ci/scripts/common/env.sh
+            source ci/scripts/common/log.sh
+            #!/bin/bash
+            # ==========================================
+            # 1. Define test dimensions
+            #    (can be easily extended, e.g., add "2048x2048")
+            # ==========================================
+            DIMENSIONS=("1024x1024")
+
+            # ==========================================
+            # 2. Define concurrency generation logic (densification strategy)
+            # ============x==============================
+            # Use array concatenation to combine different density ranges
+            # Syntax: seq [start] [step] [end]
+            CONCURRENCIES=(1)
+
+            # ==========================================
+            # 3. Automatically assemble test cases
+            # ==========================================
+            TEST_COMBINATIONS=() # Initialize empty array
+
+            # 🔄 Modified: outer loop over batch size (concurrency), inner loop over dimensions
+            for bs in "${CONCURRENCIES[@]}"; do    # ← outer loop: concurrency
+                for dim in "${DIMENSIONS[@]}"; do  # ← inner loop: dimensions
+                    case_str="${bs}x${dim}"
+                    TEST_COMBINATIONS+=("$case_str")
+                done
+            done
+
+            # ==========================================
+            # 4. (Optional) Print generated cases for sanity check
+            # ==========================================
+            echo "Generated ${#TEST_COMBINATIONS[@]} test cases in total:"
+            echo "${TEST_COMBINATIONS[@]}" # Uncomment if you want to print all cases
+
+            # Progress counters
+            TOTAL_TESTS=${#TEST_COMBINATIONS[@]}
+            CURRENT_TEST=0
+
+            # Iterate over all test combinations
+            for COMBINATION in "${TEST_COMBINATIONS[@]}"; do
+                # Parse parameters from combination string
+                NUM_PROMPTS=$(echo $COMBINATION | cut -d'x' -f1)
+                INPUT_LEN=$(echo $COMBINATION | cut -d'x' -f2)
+                OUTPUT_LEN=$(echo $COMBINATION | cut -d'x' -f3)
+
+                # Update progress
+                CURRENT_TEST=$((CURRENT_TEST + 1))
+
+                echo "=========================================================="
+                echo "Test progress: $CURRENT_TEST / $TOTAL_TESTS"
+                echo "Current configuration: concurrency=$NUM_PROMPTS, input_len=$INPUT_LEN, output_len=$OUTPUT_LEN"
+                echo "=========================================================="
+
+                #OUTPUT_FILE="$RESULT_DIR/p800_${NUM_PROMPTS}_${INPUT_LEN}_${OUTPUT_LEN}.log"
+
+                # Run benchmark
+                python3 -m vllm.entrypoints.cli.main bench serve \
+                    --host 127.0.0.1 \
+                    --port ${VLLM_PORT:-8356}\
+                    --backend vllm \
+                    --model ${SERVED_MODEL_NAME:-Qwen3-8B} \
+                    --dataset-name random \
+                    --num-prompts $NUM_PROMPTS \
+                    --random-input-len $INPUT_LEN \
+                    --random-output-len $OUTPUT_LEN \
+                    --tokenizer ${MODEL_PATH:-/ssd3/models/Qwen3-8B} \
+                    --ignore-eos
+            done
+          '
+      
+      - name: Set permissions
+        if: always()
+        run: |
+          bash ci/scripts/docker/set_permissions.sh
+
+      - name: Cleanup docker
+        if: always()
+        run: |
+          bash ci/scripts/docker/stop_docker.sh
--- a/.github/workflows/run-e2e.yml
+++ b/.github/workflows/run-e2e.yml
@@ -0,0 +1,8 @@
+name: run-e2e-test
+
+on:
+  workflow_dispatch:   
+
+jobs:
+  call-e2e:
+    uses: ./.github/workflows/_e2e_singlecard.yml
--- a/ci/scripts/common/env.sh
+++ b/ci/scripts/common/env.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# static configuration
+export DOCKER_NAME="${DOCKER_NAME:-aiak-e2e-singlecard}"
+export IMAGE_NAME="${IMAGE_NAME:-iregistry.baidu-int.com/xmlir/xmlir_ubuntu_2004_x86_64:v0.32}"
+
+export CONDA_ENV="${CONDA_ENV:-python310_torch25_cuda}"
+
+export VLLM_HOST="${VLLM_HOST:-0.0.0.0}"
+export VLLM_PORT="${VLLM_PORT:-8356}"
+export VLLM_API_BASE="http://127.0.0.1:${VLLM_PORT}"
+
+export MODEL_PATH="${MODEL_PATH:-/ssd3/models/Qwen3-8B}"
+export SERVED_MODEL_NAME="${SERVED_MODEL_NAME:-Qwen3-8B}"
+
+export XPU_VISIBLE_DEVICES="${XPU_VISIBLE_DEVICES:-5}"
+
+# Proxy Configuration
+export PROXY_URL="${PROXY_URL:-http://agent.baidu.com:8891}"
+export NO_PROXY_LIST="${NO_PROXY_LIST:-localhost,127.0.0.1,::1}"
+
+export WORKSPACE_MOUNT="${WORKSPACE_MOUNT:-/home/E2E/workspace:/workspace}"
+
+# Log Path
+export VLLM_LOG="${VLLM_LOG:-/workspace/vllm.log}"
+export ACC_LOG="${ACC_LOG:-/workspace/evalscope_accuracy_report.log}"
+export PERF_LOG="${PERF_LOG:-/workspace/benchmark_performance_report.log}"
--- a/ci/scripts/common/log.sh
+++ b/ci/scripts/common/log.sh
@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+log() {
+  echo "[CI][$(date '+%Y-%m-%d %H:%M:%S')] $*"
+}
--- a/ci/scripts/docker/set_permissions.sh
+++ b/ci/scripts/docker/set_permissions.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+source ci/scripts/common/env.sh
+source ci/scripts/common/log.sh
+
+docker exec "${DOCKER_NAME}" bash -lc "
+    set -e
+    conda activate ${CONDA_ENV}
+    chmod -R 777 /workspace
+"
--- a/ci/scripts/docker/start_docker.sh
+++ b/ci/scripts/docker/start_docker.sh
@@ -0,0 +1,101 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+source ci/scripts/common/env.sh
+source ci/scripts/common/log.sh
+
+log "Starting docker container: ${DOCKER_NAME}"
+
+if docker ps -a --format '{{.Names}}' | grep -q "^${DOCKER_NAME}$"; then
+  log "Container exists, removing first..."
+  docker stop "${DOCKER_NAME}" >/dev/null 2>&1 || true
+  docker rm "${DOCKER_NAME}" >/dev/null 2>&1 || true
+fi
+
+HOST_CUDA_LIB_PATH=""
+for path in "/usr/local/cuda/lib64" /usr/local/cuda-*/lib64; do
+  if [ -d "$path" ]; then
+    HOST_CUDA_LIB_PATH="$path"
+    break
+  fi
+done
+
+if [ -n "${HOST_CUDA_LIB_PATH}" ]; then
+  log "Detected host CUDA lib path: ${HOST_CUDA_LIB_PATH}"
+else
+  log "Host CUDA lib path not found, will use container CUDA"
+fi
+
+# NVIDIA device mapping
+DEVICE_ARGS=""
+if [ -e "/dev/nvidia0" ]; then
+  DEVICE_ARGS="--device /dev/nvidia0:/dev/nvidia0"
+  for i in $(seq 1 16); do
+    if [ -e "/dev/nvidia${i}" ]; then
+      DEVICE_ARGS="${DEVICE_ARGS} --device /dev/nvidia${i}:/dev/nvidia${i}"
+    fi
+  done
+  if [ -e "/dev/nvidia-uvm" ]; then
+    DEVICE_ARGS="${DEVICE_ARGS} --device /dev/nvidia-uvm:/dev/nvidia-uvm"
+  fi
+  if [ -e "/dev/nvidia-modeset" ]; then
+    DEVICE_ARGS="${DEVICE_ARGS} --device /dev/nvidia-modeset:/dev/nvidia-modeset"
+  fi
+else
+  log "WARNING: /dev/nvidia0 not found, GPU may not be available"
+fi
+
+# Mount nvidia-smi
+NVIDIA_BIN=""
+if [ -f "/usr/bin/nvidia-smi" ]; then
+  NVIDIA_BIN="-v /usr/bin/nvidia-smi:/usr/bin/nvidia-smi"
+  log "Added nvidia-smi mount"
+else
+  log "WARNING: nvidia-smi not found on host"
+fi
+
+# Mount critical NVIDIA libs
+NVIDIA_LIBS=""
+if [ -d "/usr/lib64" ]; then
+  for lib in libnvidia-ml.so libnvidia-ml.so.1; do
+    if [ -f "/usr/lib64/${lib}" ]; then
+      NVIDIA_LIBS="${NVIDIA_LIBS} -v /usr/lib64/${lib}:/usr/lib64/${lib}"
+    fi
+  done
+fi
+
+# Ensure libcuda symlink
+ln -sf /usr/lib64/libcuda.so.1 /usr/lib64/libcuda.so || true
+
+log "docker run ${IMAGE_NAME}"
+docker run \
+  -h "$(hostname)" \
+  --privileged \
+  --net=host \
+  --user=root \
+  --name="${DOCKER_NAME}" \
+  -v /home:/home \
+  -v "${WORKSPACE_MOUNT}" \
+  -v /ssd2:/ssd2 \
+  -v /ssd1:/ssd1 \
+  -v /ssd3:/ssd3 \
+  -v /dev/shm:/dev/shm \
+  -v /usr/lib64/libcuda.so.1:/usr/lib64/libcuda.so.1 \
+  -v /usr/lib64/libcuda.so:/usr/lib64/libcuda.so \
+  -v /usr/lib64/libnvidia-ml.so.1:/usr/lib64/libnvidia-ml.so.1 \
+  -v /usr/lib64/libnvidia-ptxjitcompiler.so.1:/usr/lib64/libnvidia-ptxjitcompiler.so.1 2>/dev/null \
+  -v /var/run/docker.sock:/var/run/docker.sock \
+  -w /workspace \
+  ${DEVICE_ARGS} \
+  ${NVIDIA_BIN} \
+  ${NVIDIA_LIBS} \
+  --shm-size=16G \
+  -e NVIDIA_VISIBLE_DEVICES=all \
+  -e NVIDIA_DRIVER_CAPABILITIES=compute,utility \
+  -itd "${IMAGE_NAME}"
+
+log "Container started. Inject conda activate into bashrc"
+docker exec "${DOCKER_NAME}" bash -lc "
+  echo 'conda activate ${CONDA_ENV}' >> ~/.bashrc
+  conda env list || true
+"
--- a/ci/scripts/docker/stop_docker.sh
+++ b/ci/scripts/docker/stop_docker.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+source ci/scripts/common/env.sh
+source ci/scripts/common/log.sh
+
+
+log "Stopping docker container: ${DOCKER_NAME}"
+docker stop "${DOCKER_NAME}" >/dev/null 2>&1 || true
+docker rm "${DOCKER_NAME}" >/dev/null 2>&1 || true
+log "Cleanup done"
--- a/ci/scripts/env/install_env.sh
+++ b/ci/scripts/env/install_env.sh
@@ -0,0 +1,101 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+source ci/scripts/common/env.sh
+source ci/scripts/common/log.sh
+
+########################################
+# Common setup
+########################################
+log "Using container ${DOCKER_NAME}, conda env ${CONDA_ENV}"
+
+docker exec "${DOCKER_NAME}" bash -lc "
+  set -e
+  conda activate ${CONDA_ENV}
+
+  ########################################
+  # Proxy setup
+  ########################################
+  export http_proxy=${PROXY_URL}
+  export https_proxy=${PROXY_URL}
+  export NO_PROXY=${NO_PROXY_LIST}
+  export no_proxy=${NO_PROXY_LIST}
+
+  ########################################
+  # 1. Install evalscope
+  ########################################
+  echo '===== Installing evalscope ====='
+  pip install evalscope
+
+  ########################################
+  # 2. Install vLLM-Kunlun (PR code)
+  ########################################
+  echo '===== Installing vLLM-Kunlun (PR code) ====='
+
+  cd /workspace
+
+  git config --global --add safe.directory \"${GITHUB_WORKSPACE}\"
+
+  cd \"${GITHUB_WORKSPACE}\"
+  echo '===== USING PR CODE ====='
+  git rev-parse HEAD
+  git log -1 --oneline
+
+  # Disable proxy for local build
+  unset http_proxy
+  unset https_proxy
+
+  cd vLLM-Kunlun
+
+  pip install -r requirements.txt
+  python setup.py build
+  python setup.py install
+
+  # Patch torch dynamo eval_frame
+  cp vllm_kunlun/patches/eval_frame.py \
+    /root/miniconda/envs/${CONDA_ENV}/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py
+
+  ########################################
+  # Kunlun runtime dependencies
+  ########################################
+  echo '===== Installing Kunlun runtime dependencies ====='
+
+  wget -O xpytorch.run \
+    \"https://baidu-kunlun-public.su.bcebos.com/v1/baidu-kunlun-share/1130/xpytorch-cp310-torch251-ubuntu2004-x64.run?authorization=bce-auth-v1%2FALTAKypXxBzU7gg4Mk4K4c6OYR%2F2025-12-02T05%3A01%3A27Z%2F-1%2Fhost%2Ff3cf499234f82303891aed2bcb0628918e379a21e841a3fac6bd94afef491ff7\"
+  bash xpytorch.run
+
+  pip install \
+    \"https://baidu-kunlun-public.su.bcebos.com/v1/baidu-kunlun-share/1130/xtorch_ops-0.1.2209%2B6752ad20-cp310-cp310-linux_x86_64.whl?authorization=bce-auth-v1%2FALTAKypXxBzU7gg4Mk4K4c6OYR%2F2025-12-05T06%3A18%3A00Z%2F-1%2Fhost%2F14936c2b7e7c557c1400e4c467c79f7a9217374a7aa4a046711ac4d948f460cd\"
+
+  pip install \
+    \"https://cce-ai-models.bj.bcebos.com/v1/vllm-kunlun-0.11.0/triton-3.0.0%2Bb2cde523-cp310-cp310-linux_x86_64.whl\"
+
+  pip install \
+    \"https://cce-ai-models.bj.bcebos.com/XSpeedGate-whl/release_merge/20251219_152418/xspeedgate_ops-0.0.0-cp310-cp310-linux_x86_64.whl\"
+
+  ########################################
+  # Setup Kunlun env
+  ########################################
+  export NO_PROXY=${NO_PROXY_LIST}
+  export no_proxy=${NO_PROXY_LIST}
+
+  chmod +x \"${GITHUB_WORKSPACE}/vLLM-Kunlun/setup_env.sh\"
+  source \"${GITHUB_WORKSPACE}/vLLM-Kunlun/setup_env.sh\"
+
+  ########################################
+  # 3. Install upstream vLLM 0.11.0
+  ########################################
+  echo '===== Installing vLLM==0.11.0 ====='
+
+  pip uninstall -y vllm || true
+  env | grep -i proxy || true
+
+  pip install vllm==0.11.0 \
+    --no-build-isolation \
+    --no-deps \
+    --index-url https://pip.baidu-int.com/simple/
+
+  python -c 'import vllm; print(\"vllm version:\", vllm.__version__)'
+
+  echo '===== All installations completed successfully ====='
+"
--- a/ci/scripts/server/start_vllm.sh
+++ b/ci/scripts/server/start_vllm.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+source ci/scripts/common/env.sh
+source ci/scripts/common/log.sh
+
+log "Starting vLLM server in container ${DOCKER_NAME}"
+
+docker exec -d "${DOCKER_NAME}" bash -lc "
+  set -e
+
+  chmod +x \"${GITHUB_WORKSPACE}/vLLM-Kunlun/setup_env.sh\"
+  source \"${GITHUB_WORKSPACE}/vLLM-Kunlun/setup_env.sh\"
+
+  rm -f ${VLLM_LOG}
+  export XPU_VISIBLE_DEVICES=${XPU_VISIBLE_DEVICES}
+
+  python -u -m vllm.entrypoints.openai.api_server \
+    --host ${VLLM_HOST} \
+    --port ${VLLM_PORT} \
+    --model ${MODEL_PATH} \
+    --gpu-memory-utilization 0.9 \
+    --trust-remote-code \
+    --max-model-len 32768 \
+    --tensor-parallel-size 1 \
+    --dtype float16 \
+    --max_num_seqs 128 \
+    --max_num_batched_tokens 32768 \
+    --block-size 128 \
+    --no-enable-prefix-caching \
+    --no-enable-chunked-prefill \
+    --distributed-executor-backend mp \
+    --served-model-name ${SERVED_MODEL_NAME} \
+    --compilation-config '{\"splitting_ops\": [\"vllm.unified_attention\",\"vllm.unified_attention_with_output\",\"vllm.unified_attention_with_output_kunlun\",\"vllm.mamba_mixer2\",\"vllm.mamba_mixer\",\"vllm.short_conv\",\"vllm.linear_attention\",\"vllm.plamo2_mamba_mixer\",\"vllm.gdn_attention\",\"vllm.sparse_attn_indexer\"]}' \
+    2>&1 | tee ${VLLM_LOG}
+"
+
+log "vLLM start command issued (running in background)"
--- a/ci/scripts/server/wait_vllm.sh
+++ b/ci/scripts/server/wait_vllm.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+source ci/scripts/common/env.sh
+source ci/scripts/common/log.sh
+
+log "Waiting for vLLM to be ready: ${VLLM_API_BASE}/v1/models"
+
+docker exec "${DOCKER_NAME}" bash -lc "
+  set -e
+
+  for i in {1..90}; do
+    if curl -sf ${VLLM_API_BASE}/v1/models >/dev/null; then
+      echo 'vLLM is ready'
+      tail -n 500 ${VLLM_LOG} || true
+      exit 0
+    fi
+    sleep 5
+  done
+
+  echo 'vLLM start failed'
+  echo '==== last 500 lines of vllm.log ===='
+  tail -n 500 ${VLLM_LOG} || true
+  exit 1
+"
--- a/ci/scripts/tests/run_accuracy.sh
+++ b/ci/scripts/tests/run_accuracy.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+source ci/scripts/common/env.sh
+source ci/scripts/common/log.sh
+
+log "Running accuracy test via evalscope"
+
+docker exec "${DOCKER_NAME}" bash -lc "
+  set -e
+  rm -f ${ACC_LOG}
+
+  export http_proxy=${PROXY_URL}
+  export https_proxy=${PROXY_URL}
+  export NO_PROXY=${NO_PROXY_LIST}
+  export no_proxy=${NO_PROXY_LIST}
+
+  evalscope eval \
+    --model ${SERVED_MODEL_NAME} \
+    --api-url http://localhost:${VLLM_PORT}/v1 \
+    --datasets gsm8k arc \
+    --limit 10 2>&1 | tee ${ACC_LOG}
+"
--- a/ci/scripts/tests/run_performance.sh
+++ b/ci/scripts/tests/run_performance.sh
@@ -0,0 +1,80 @@
+#!/usr/bin/env bash
+set -eo pipefail
+bs=""
+source ci/scripts/common/env.sh
+source ci/scripts/common/log.sh
+
+log "Running performance test via bench"
+
+docker exec "${DOCKER_NAME}" bash -lc "
+
+    source /root/miniconda/etc/profile.d/conda.sh
+    conda activate ${CONDA_ENV}
+    #!/bin/bash
+    # ==========================================
+    # 1. Define test dimensions
+    #    (can be easily extended, e.g., add "2048x2048")
+    # ==========================================
+    DIMENSIONS=("1024x1024")
+
+    # ==========================================
+    # 2. Define concurrency generation logic (densification strategy)
+    # ============x==============================
+    # Use array concatenation to combine different density ranges
+    # Syntax: seq [start] [step] [end]
+    CONCURRENCIES=(1)
+
+    # ==========================================
+    # 3. Automatically assemble test cases
+    # ==========================================
+    TEST_COMBINATIONS=() # Initialize empty array
+
+    # 🔄 Modified: outer loop over batch size (concurrency), inner loop over dimensions
+    for bs in "${CONCURRENCIES[@]}"; do    # ← outer loop: concurrency
+        for dim in "${DIMENSIONS[@]}"; do  # ← inner loop: dimensions
+            case_str="${bs}x${dim}"
+            TEST_COMBINATIONS+=("$case_str")
+        done
+    done
+
+    # ==========================================
+    # 4. (Optional) Print generated cases for sanity check
+    # ==========================================
+    echo "Generated ${#TEST_COMBINATIONS[@]} test cases in total:"
+    echo "${TEST_COMBINATIONS[@]}" # Uncomment if you want to print all cases
+
+    # Progress counters
+    TOTAL_TESTS=${#TEST_COMBINATIONS[@]}
+    CURRENT_TEST=0
+
+    # Iterate over all test combinations
+    for COMBINATION in "${TEST_COMBINATIONS[@]}"; do
+        # Parse parameters from combination string
+        NUM_PROMPTS=$(echo $COMBINATION | cut -d'x' -f1)
+        INPUT_LEN=$(echo $COMBINATION | cut -d'x' -f2)
+        OUTPUT_LEN=$(echo $COMBINATION | cut -d'x' -f3)
+
+        # Update progress
+        CURRENT_TEST=$((CURRENT_TEST + 1))
+
+        echo "=========================================================="
+        echo "Test progress: $CURRENT_TEST / $TOTAL_TESTS"
+        echo "Current configuration: concurrency=$NUM_PROMPTS, input_len=$INPUT_LEN, output_len=$OUTPUT_LEN"
+        echo "=========================================================="
+
+        #OUTPUT_FILE="$RESULT_DIR/p800_${NUM_PROMPTS}_${INPUT_LEN}_${OUTPUT_LEN}.log"
+
+        # Run benchmark
+        python3 -m vllm.entrypoints.cli.main bench serve \
+            --host 127.0.0.1 \
+            --port ${VLLM_PORT} \
+            --backend vllm \
+            --model ${SERVED_MODEL_NAME} \
+            --dataset-name random \
+            --num-prompts $NUM_PROMPTS \
+            --random-input-len $INPUT_LEN \
+            --random-output-len $OUTPUT_LEN \
+            --tokenizer ${MODEL_PATH} \
+            --ignore-eos
+    done
+"