Files
enginex-mlu370-vllm/vllm-v0.6.2/tests/mlu_cases_list.sh
2026-02-04 17:22:39 +08:00

442 lines
13 KiB
Bash

#!/bin/bash
LINK_MODELS() {
mkdir -p meta-llama openai-community Qwen NousResearch mistralai THUDM baichuan-inc llava-hf
ln -s /data/AE/llm/models/Llama-2-7b meta-llama/
ln -s /data/AE/llm/models/Llama-2-7b-hf meta-llama/
ln -s /data/AE/llm/models/Llama-2-7b-hf meta-llama/llama-2-7b-hf
ln -s /data/AE/llm/models/Llama-2-7b-chat-hf meta-llama/Llama-2-7b-chat-hf
ln -s /data/AE/llm/models/Llama-2-13b-chat-hf meta-llama/Llama-2-13b-chat-hf
ln -s /data/AE/llm/models/Meta-Llama-3-8B meta-llama/
ln -s /data/AE/llm/models/Meta-Llama-3-8B-Instruct meta-llama/
ln -s /data/vllm/models/LLM-Research/Llama-3.2-1B-Instruct meta-llama/
ln -s /data/AE/llm/models/Meta-Llama-3-8B-Instruct NousResearch/
ln -s /data/AE/llm/models/Qwen1.5-7B Qwen/
ln -s /data/AE/llm/models/Qwen2-7B-Instruct Qwen/
ln -s /data/AE/llm/models/Mistral-7B-v0.1 mistralai/
ln -s /data/vllm/models/LLM-Research/Mixtral-8x7B-Instruct-v0.1 mistralai/
ln -s /data/AE/llm/models/chatglm3-6b THUDM/
ln -s /data/vllm/models/LLM-Research/Baichuan-7B baichuan-inc/
ln -s /data/vllm/vLLM_ut_hf_models/gpt2 openai-community/
ln -s /data/AE/llm/models/llava-1.5-7b-hf llava-hf/
LOCAL_MODEL=`ls /data/vllm/vLLM_ut_hf_models/`
ln -s /data/vllm/vLLM_ut_hf_models/* .
# create huggingface cache dir if not exists
mkdir -p ~/.cache/huggingface/datasets
ln -s /data/vllm/vLLM_ut_hf_models/gsm8k/ ~/.cache/huggingface/datasets/gsm8k
}
UNLINK_MODELS() {
LOCAL_MODEL=`ls /data/vllm/vLLM_ut_hf_models/`
rm -rf meta-llama openai-community Qwen NousResearch mistralai THUDM baichuan-inc llava-hf ${LOCAL_MODEL}
rm -rf ~/.cache/huggingface/datasets
}
### async_engine ###
ASYNC_ENGINE_CASES=(
async_engine/test_api_server.py
async_engine/test_async_llm_engine.py
async_engine/test_openapi_server.py
async_engine/test_request_tracker.py
)
### basic_correctness ###
# Skip cases
# - test_chunked_prefill.py::test_models_with_fp8_kv_cache
# - test_basic_correctness::test_model_with_failure
# before test: export VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1
# after test: unset VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT
BASIC_CORRECTNESS_CASES=(
basic_correctness/test_basic_correctness.py::test_vllm_gc_ed
basic_correctness/test_basic_correctness.py::test_models
basic_correctness/test_basic_correctness.py::test_models_distributed
basic_correctness/test_chunked_prefill.py::test_models_distributed
basic_correctness/test_chunked_prefill.py::test_models
basic_correctness/test_chunked_prefill.py::test_with_prefix_caching
basic_correctness/test_cpu_offload.py
basic_correctness/test_preemption.py::test_chunked_prefill_recompute
basic_correctness/test_preemption.py::test_preemption
basic_correctness/test_preemption.py::test_preemption_infeasible
)
### benchmark ###
# before test: export VLLM_LATENCY_DEBUG=1
# after test: unset VLLM_LATENCY_DEBUG
BENCHMARK_CASES=(
benchmark/test_benchmark_latency.py
)
### compile ###
# FIXME: Pytorch 2.4 not support torch.compile, skip vllm compile cases.
# Add this back when upgrade pytorch to 2.5.
# COMPILE_CASES=(
# compile/test_full_graph.py
# compile/test_wrapper.py
# )
### core ###
CORE_CASES=(
core/test_chunked_prefill_scheduler.py
core/test_num_computed_tokens_update.py
core/test_scheduler_encoder_decoder.py
core/test_scheduler.py
core/test_serialization.py
core/block/test_block_manager.py
core/block/test_block_table.py
core/block/test_common.py
core/block/test_cpu_gpu_block_allocator.py
core/block/test_naive_block.py
core/block/test_prefix_caching_block.py
core/block/e2e/test_correctness.py::test_block_manager_with_preemption
core/block/e2e/test_correctness.py::test_lookahead_greedy_equality_with_preemption
core/block/e2e/test_correctness.py::test_chunked_prefill_block_manager
core/block/e2e/test_correctness.py::test_block_manager_prefix_caching_enabled_with_preemption
core/block/e2e/test_correctness.py::test_auto_prefix_caching_with_preemption
core/block/e2e/test_correctness.py::test_auto_prefix_caching_after_evition_start
core/block/e2e/test_correctness_sliding_window.py
)
### distributed ###
# Skip cases
# - test_custom_all_reduce.py
# - test_distributed_oot.py
# - test_multi_node_assignment.py
# - test_same_node.py
DISTRIBUTED_CASES=(
distributed/test_pipeline_parallel.py
distributed/test_pipeline_partition.py
distributed/test_pp_cudagraph.py
distributed/test_shm_broadcast.py
distributed/test_utils.py
)
# before test: UNLINK_MODELS
# after test: LINK_MODELS
DISTRIBUTED_NEED_PACK_CASES=(
distributed/test_comm_ops.py
)
### engine ###
ENGINE_CASES=(
engine/test_arg_utils.py
engine/test_computed_prefix_blocks.py
engine/test_custom_executor.py
engine/test_detokenization.py
engine/test_multiproc_workers.py
engine/test_short_mm_context.py
engine/test_skip_tokenizer_init.py
engine/test_stop_reason.py
engine/test_stop_strings.py
engine/output_processor/test_multi_step.py
engine/output_processor/test_stop_checker.py
)
### entrypoints ###
# Skip cases
# - entrypoints/llm/test_encode.py
# - entrypoints/llm/test_generate_multiple_loras.py
# - entrypoints/openai/test_accuracy.py
# - entrypoints/openai/test_audio.py
# - entrypoints/openai/test_chat.py
# - entrypoints/openai/test_completion.py
# - entrypoints/openai/test_embedding.py
# - entrypoints/openai/test_encoder_decoder.py
# - entrypoints/openai/test_metrics.py
# - entrypoints/openai/test_models.py
# - entrypoints/openai/test_oot_registration.py
# - entrypoints/openai/test_return_tokens_as_ids.py
# - entrypoints/openai/test_shutdown.py
# - entrypoints/openai/test_tokenization.py
# - entrypoints/openai/test_vision.py
# - entrypoints/openai/test_run_batch.py::test_embeddings
ENTRYPOINTS_CASES=(
entrypoints/test_chat_utils.py
entrypoints/llm/test_chat.py
entrypoints/llm/test_generate.py
entrypoints/llm/test_guided_generate.py
entrypoints/llm/test_lazy_outlines.py
entrypoints/llm/test_prompt_validation.py
entrypoints/offline_mode/test_offline_mode.py
entrypoints/openai/tool_parsers/test_pythonic_tool_parser.py
entrypoints/openai/test_basic.py
entrypoints/openai/test_chat_template.py
entrypoints/openai/test_chunked_prompt.py
entrypoints/openai/test_cli_args.py
entrypoints/openai/test_prompt_validation.py
entrypoints/openai/test_run_batch.py::test_empty_file
entrypoints/openai/test_run_batch.py::test_completions
entrypoints/openai/test_run_batch.py::test_completions_invalid_input
entrypoints/openai/test_serving_chat.py
entrypoints/openai/test_serving_engine.py
)
### kernels ###
# Skip cases
# - other op/layer test
KERNELS_CASES=(
kernels/bt_torch_ops
kernels/test_advance_step.py
kernels/test_feed_forward.py
)
### lora ###
# triton kernel tests are in TRITON_CASE
# - lora/test_llama.py::test_llama_lora_warmup
# - lora/test_tokenizer_group.py
# NOTE: The following tests requires 4 gpus, which can not run
# in ci environment. We should check these tests after we do
# some modifications for lora.
# lora/test_long_context.py::test_batched_rope_kernel
# lora/test_long_context.py::test_self_consistency
# lora/test_long_context.py::test_quality
# lora/test_long_context.py::test_max_len
LORA_CASES=(
lora/test_layers.py
lora/test_lora_checkpoints.py
lora/test_lora_huggingface.py
lora/test_lora_manager.py
lora/test_utils.py
lora/test_worker.py
lora/test_baichuan.py
lora/test_chatglm3.py
lora/test_llama.py::test_llama_lora[1]
lora/test_llama.py::test_llama_lora[2]
lora/test_long_context.py::test_rotary_emb_replaced
)
### metrics ###
METRICS_CASES=(
metrics/test_metrics.py
)
### model_executor ###
# Skip cases
# - weight_utils.py::test_download_weights_from_hf
MODEL_EXECUTOR_CASES=(
model_executor/test_enabled_custom_ops.py
model_executor/test_guided_processors.py
model_executor/weight_utils.py::test_hf_transfer_auto_activation
)
### models ###
# Skip cases
# - test_oot_registration.py
# - part of cases in decoder_only
# - all cases in embedding
# - all cases in encoder_only
MODELS_CASES=(
models/test_registry.py
models/decoder_only/language/test_big_models.py
models/decoder_only/language/test_models.py
)
### mq_llm_engine ###
MQ_LLM_ENGINE=(
mq_llm_engine
)
### multi_step ###
MULTI_STEP_CASES=(
multi_step/test_correctness_async_llm.py
multi_step/test_correctness_llm.py
)
### multimodal ###
MULTIMODAL_CASES=(
multimodal/test_inputs.py
multimodal/test_mapper.py
multimodal/test_processor_kwargs.py
)
### prefix_caching ###
# Skip cases
# - prefix_caching/test_disable_sliding_window.py
PREFIX_CACHING_CASES=(
prefix_caching/test_prefix_caching.py
)
### prompt_adapter ###
# Skip all cases
### quantization ###
# Skip all cases
### sampler ###
SAMPLER_CASES=(
samplers
)
### spec_decode ###
# Skip cases
# spec_decode/test_multi_step_worker.py
# spec_decode/test_scorer.py
# spec_decode/e2e/test_eagle_correctness.py
# spec_decode/e2e/test_integration.py
# spec_decode/e2e/test_integration_dist_tp2.py
# spec_decode/e2e/test_integration_dist_tp4.py
# spec_decode/e2e/test_logprobs.py
# spec_decode/e2e/test_medusa_correctness.py
# spec_decode/e2e/test_mlp_correctness.py
# spec_decode/e2e/test_multistep_correctness.py
# spec_decode/e2e/test_ngram_correctness.py
# spec_decode/e2e/test_seed.py
SPEC_DECODE_CASES=(
spec_decode/e2e/test_compatibility.py
spec_decode/test_batch_expansion.py
spec_decode/test_dynamic_spec_decode.py
spec_decode/test_metrics.py
spec_decode/test_ngram_worker.py
spec_decode/test_spec_decode_worker.py
spec_decode/test_utils.py
)
### tensorizer_loader
TENSORIZER_LOADER_CASES=(
tensorizer_loader
)
### tokenization ###
# Skip cases
# - test_get_eos.py
# - test_tokenizer.py
TOKENIZATION_CASES=(
tokenization/test_cached_tokenizer.py
tokenization/test_detokenize.py
tokenization/test_tokenizer_group.py
)
### tool_use ###
# Skip all cases
TOOL_USE_CASES=(
tool_use/test_chat_completion_request_validations.py
)
### tpu ###
# Skip all cases
### tracing ###
# Skip all cases
### weight_loading ###
WEIGHT_LOADING_CASES=(
weight_loading/test_weight_loading.py
)
### worker ###
WORKER_CASES=(
worker/test_encoder_decoder_model_runner.py
worker/test_model_input.py
worker/test_model_runner.py
worker/test_swap.py
)
### . ###
# Skip cases
# - test_embedded_commit.py
# - test_scalartype.py
GLOBAL_CASES=(
test_cache_block_hashing.py
test_config.py
test_inputs.py
test_logger.py
test_logits_processor.py
test_regression.py
test_sampling_params.py
test_scalartype.py
test_sequence.py
test_sharded_state_loader.py::test_filter_subtensors
test_utils.py
)
ONLINE_CASES=(
${ASYNC_ENGINE_CASES[@]}
${ENTRYPOINTS_CASES[@]}
)
OFFLINE_CASES0=(
# ${COMPILE_CASES[@]}
${CORE_CASES[@]}
${DISTRIBUTED_CASES[@]}
${ENGINE_CASES[@]}
${KERNELS_CASES[@]}
)
OFFLINE_CASES1=(
${LORA_CASES[@]}
${METRICS_CASES[@]}
${MODEL_EXECUTOR_CASES[@]}
${MODELS_CASES[@]}
${MQ_LLM_ENGINE[@]}
${MULTI_STEP_CASES[@]}
${MULTIMODAL_CASES[@]}
${PREFIX_CACHING_CASES[@]}
)
OFFLINE_CASES2=(
${SAMPLER_CASES[@]}
${SPEC_DECODE_CASES[@]}
${TENSORIZER_LOADER_CASES[@]}
${TOKENIZATION_CASES[@]}
${TOOL_USE_CASES[@]}
${WEIGHT_LOADING_CASES[@]}
${WORKER_CASES[@]}
${GLOBAL_CASES[@]}
)
# examples/cambricon_custom_func cases
CAMBRICON_CUSTOM_FUNC_CASES=(
expert_parallel/test_expert_parallel.py
context_parallel/test_context_parallel.py
context_parallel/test_context_parallel_kv8.py
)
pytest_cmd="pytest -s -v"
RED='\033[0;31m'
GREEN='\033[0;32m'
NC='\033[0m'
pip uninstall datasets -y
pip install datasets
pip install modelscope
pip install pytest_asyncio
run_ut() {
local interval=$1
shift
local case_list=($@)
case_len=${#case_list[@]}
echo "Total ${case_len} cases"
for((i=0;i<${case_len};i++)); do
ut=${case_list[i]}
echo "###############################"
echo "Run ${i}/${case_len}, ${ut} ..."
echo "###############################"
sleep ${interval}
eval ${pytest_cmd} ${ut} --junit-xml ${CI_WORK_DIR}/ut_test.xml
ret_val=$?
if [ $ret_val != 0 ]; then
echo "###############################"
echo -e "${RED}FAILED: ${ut} ... ${NC}"
echo "###############################"
exit $ret_val
else
echo "###############################"
echo -e "${GREEN}PASS: ${ut} ... ${NC}"
echo "###############################"
fi
done
}