[Bugfix] Optimized exception throwing when stream captures exception (#3322)

### What this PR does / why we need it?
Optimized exception throwing when stream captures exception, resolved
possible misleading.

- vLLM version: v0.11.0rc3
- vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0

Signed-off-by: lilinsiman <lilinsiman@gmail.com>
This commit is contained in:
lilinsiman
2025-10-10 17:09:28 +08:00
committed by GitHub
parent 1756efa5fd
commit 90e00deaa9

View File

@@ -20,6 +20,7 @@
import copy import copy
import gc import gc
import itertools import itertools
import re
import time import time
from collections import defaultdict from collections import defaultdict
from collections.abc import Iterator from collections.abc import Iterator
@@ -3393,15 +3394,23 @@ class NPUModelRunner(LoRAModelRunnerMixin):
aclgraph_runtime_mode=aclgraph_runtime_mode, aclgraph_runtime_mode=aclgraph_runtime_mode,
uniform_decode=False) uniform_decode=False)
except Exception as e: except Exception as e:
logger.error( error_msg = str(e)
f"ACLgraph sizes capture fail: {type(e).__name__}:\n" error_code = '0x7020023'
"ACLgraph has insufficient available streams to capture the configured number of sizes. " pattern = r'retCode=([^,\s\.]+)'
"Please verify both the availability of adequate streams and the appropriateness of the configured size count.\n\n" match = re.search(pattern, error_msg)
"Recommended solutions:\n" if match:
"1. Manually configure the compilation_config parameter " retCode = match.group(1)
"with a reduced set of sizes: '{\"cudagraph_capture_sizes\":[size1, size2, size3, ...]}'.\n" # Determine whether the error message is caused by stream capture failure.
"2. Utilize ACLgraph's full graph mode as an alternative to the piece-wise approach.\n\n" if match and retCode == error_code:
f"{str(e)}") logger.error(
f"ACLgraph sizes capture fail: {type(e).__name__}:\n"
"ACLgraph has insufficient available streams to capture the configured number of sizes. "
"Please verify both the availability of adequate streams and the appropriateness of the configured size count.\n\n"
"Recommended solutions:\n"
"1. Manually configure the compilation_config parameter "
"with a reduced set of sizes: '{\"cudagraph_capture_sizes\":[size1, size2, size3, ...]}'.\n"
"2. Utilize ACLgraph's full graph mode as an alternative to the piece-wise approach.\n\n"
f"{str(e)}")
raise raise
if aclgraph_mode.decode_mode() == CUDAGraphMode.FULL and \ if aclgraph_mode.decode_mode() == CUDAGraphMode.FULL and \