[Bugfix] Optimized exception throwing when stream captures exception (#3322)
### What this PR does / why we need it? Optimized exception throwing when stream captures exception, resolved possible misleading. - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 Signed-off-by: lilinsiman <lilinsiman@gmail.com>
This commit is contained in:
@@ -20,6 +20,7 @@
|
||||
import copy
|
||||
import gc
|
||||
import itertools
|
||||
import re
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from collections.abc import Iterator
|
||||
@@ -3393,6 +3394,14 @@ class NPUModelRunner(LoRAModelRunnerMixin):
|
||||
aclgraph_runtime_mode=aclgraph_runtime_mode,
|
||||
uniform_decode=False)
|
||||
except Exception as e:
|
||||
error_msg = str(e)
|
||||
error_code = '0x7020023'
|
||||
pattern = r'retCode=([^,\s\.]+)'
|
||||
match = re.search(pattern, error_msg)
|
||||
if match:
|
||||
retCode = match.group(1)
|
||||
# Determine whether the error message is caused by stream capture failure.
|
||||
if match and retCode == error_code:
|
||||
logger.error(
|
||||
f"ACLgraph sizes capture fail: {type(e).__name__}:\n"
|
||||
"ACLgraph has insufficient available streams to capture the configured number of sizes. "
|
||||
|
||||
Reference in New Issue
Block a user