# ========================================== # ACTUAL TEST CASES # ========================================== test_cases: - name: "Qwen2.5-VL-7B-Instruct-epd" model: "Qwen/Qwen2.5-VL-7B-Instruct" service_mode: "epd" envs: ENCODE_PORT: "DEFAULT_PORT" PD_PORT: "DEFAULT_PORT" PROXY_PORT: "DEFAULT_PORT" epd_server_cmds: - - "--port" - "$ENCODE_PORT" - "--model" - "Qwen/Qwen2.5-VL-7B-Instruct" - "--gpu-memory-utilization" - "0.01" - "--tensor-parallel-size" - "1" - "--enforce-eager" - "--no-enable-prefix-caching" - "--max-model-len" - "10000" - "--max-num-batched-tokens" - "10000" - "--max-num-seqs" - "1" - "--ec-transfer-config" - '{"ec_connector_extra_config":{"shared_storage_path":"/dev/shm/epd/storage"},"ec_connector":"ECExampleConnector","ec_role": "ec_producer"}' - - "--port" - "$PD_PORT" - "--model" - "Qwen/Qwen2.5-VL-7B-Instruct" - "--gpu-memory-utilization" - "0.95" - "--tensor-parallel-size" - "1" - "--enforce-eager" - "--max-model-len" - "10000" - "--max-num-batched-tokens" - "10000" - "--max-num-seqs" - "128" - "--ec-transfer-config" - '{"ec_connector_extra_config":{"shared_storage_path":"/dev/shm/epd/storage"},"ec_connector":"ECExampleConnector","ec_role": "ec_consumer"}' epd_proxy_args: - "--host" - "127.0.0.1" - "--port" - "$PROXY_PORT" - "--encode-servers-urls" - "http://localhost:$ENCODE_PORT" - "--decode-servers-urls" - "http://localhost:$PD_PORT" - "--prefill-servers-urls" - "disable" test_content: benchmarks: warm_up: case_type: performance dataset_path: vllm-ascend/textvqa-perf-1080p request_conf: vllm_api_stream_chat dataset_conf: textvqa/textvqa_gen_base64 num_prompts: 50 max_out_len: 20 batch_size: 32 request_rate: 0 baseline: 1 threshold: 0.97 acc: case_type: accuracy dataset_path: vllm-ascend/textvqa-lite request_conf: vllm_api_stream_chat dataset_conf: textvqa/textvqa_gen_base64 max_out_len: 2048 batch_size: 128 baseline: 82.05 threshold: 5 perf: case_type: performance dataset_path: vllm-ascend/textvqa-perf-1080p request_conf: vllm_api_stream_chat dataset_conf: textvqa/textvqa_gen_base64 num_prompts: 512 max_out_len: 256 batch_size: 128 request_rate: 0 baseline: 1 threshold: 0.97