[Bugfix] Optimized exception throwing when stream captures exception (#3322)

### What this PR does / why we need it?
Optimized exception throwing when stream captures exception, resolved
possible misleading.

- vLLM version: v0.11.0rc3
- vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0

Signed-off-by: lilinsiman <lilinsiman@gmail.com>
This commit is contained in:
lilinsiman
2025-10-10 17:09:28 +08:00
committed by GitHub
parent 1756efa5fd
commit 90e00deaa9

View File

@@ -20,6 +20,7 @@
import copy import copy
import gc import gc
import itertools import itertools
import re
import time import time
from collections import defaultdict from collections import defaultdict
from collections.abc import Iterator from collections.abc import Iterator
@@ -3393,6 +3394,14 @@ class NPUModelRunner(LoRAModelRunnerMixin):
aclgraph_runtime_mode=aclgraph_runtime_mode, aclgraph_runtime_mode=aclgraph_runtime_mode,
uniform_decode=False) uniform_decode=False)
except Exception as e: except Exception as e:
error_msg = str(e)
error_code = '0x7020023'
pattern = r'retCode=([^,\s\.]+)'
match = re.search(pattern, error_msg)
if match:
retCode = match.group(1)
# Determine whether the error message is caused by stream capture failure.
if match and retCode == error_code:
logger.error( logger.error(
f"ACLgraph sizes capture fail: {type(e).__name__}:\n" f"ACLgraph sizes capture fail: {type(e).__name__}:\n"
"ACLgraph has insufficient available streams to capture the configured number of sizes. " "ACLgraph has insufficient available streams to capture the configured number of sizes. "