diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py index f09c524e..85f19eeb 100644 --- a/vllm_ascend/platform.py +++ b/vllm_ascend/platform.py @@ -327,6 +327,10 @@ class NPUPlatform(Platform): f"{vllm_config.parallel_config.tensor_parallel_size}" ) if len(sp_aclgraph_sizes) != len(original_sizes): + # If user set the max_num_seqs miss fit the multiple of tp_size, + # we need to match the max_cudagraph_capture_size with the valid max size, + # so we can avoid initialization error of vllm server. + compilation_config.max_cudagraph_capture_size = sp_aclgraph_sizes[-1] compilation_config.cudagraph_capture_sizes = sp_aclgraph_sizes update_cudagraph_capture_sizes(vllm_config, sp_aclgraph_sizes)