[Bugfix] Optimized exception throwing when stream captures exception (#3322)
### What this PR does / why we need it? Optimized exception throwing when stream captures exception, resolved possible misleading. - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 Signed-off-by: lilinsiman <lilinsiman@gmail.com>
This commit is contained in:
@@ -20,6 +20,7 @@
|
|||||||
import copy
|
import copy
|
||||||
import gc
|
import gc
|
||||||
import itertools
|
import itertools
|
||||||
|
import re
|
||||||
import time
|
import time
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from collections.abc import Iterator
|
from collections.abc import Iterator
|
||||||
@@ -3393,15 +3394,23 @@ class NPUModelRunner(LoRAModelRunnerMixin):
|
|||||||
aclgraph_runtime_mode=aclgraph_runtime_mode,
|
aclgraph_runtime_mode=aclgraph_runtime_mode,
|
||||||
uniform_decode=False)
|
uniform_decode=False)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(
|
error_msg = str(e)
|
||||||
f"ACLgraph sizes capture fail: {type(e).__name__}:\n"
|
error_code = '0x7020023'
|
||||||
"ACLgraph has insufficient available streams to capture the configured number of sizes. "
|
pattern = r'retCode=([^,\s\.]+)'
|
||||||
"Please verify both the availability of adequate streams and the appropriateness of the configured size count.\n\n"
|
match = re.search(pattern, error_msg)
|
||||||
"Recommended solutions:\n"
|
if match:
|
||||||
"1. Manually configure the compilation_config parameter "
|
retCode = match.group(1)
|
||||||
"with a reduced set of sizes: '{\"cudagraph_capture_sizes\":[size1, size2, size3, ...]}'.\n"
|
# Determine whether the error message is caused by stream capture failure.
|
||||||
"2. Utilize ACLgraph's full graph mode as an alternative to the piece-wise approach.\n\n"
|
if match and retCode == error_code:
|
||||||
f"{str(e)}")
|
logger.error(
|
||||||
|
f"ACLgraph sizes capture fail: {type(e).__name__}:\n"
|
||||||
|
"ACLgraph has insufficient available streams to capture the configured number of sizes. "
|
||||||
|
"Please verify both the availability of adequate streams and the appropriateness of the configured size count.\n\n"
|
||||||
|
"Recommended solutions:\n"
|
||||||
|
"1. Manually configure the compilation_config parameter "
|
||||||
|
"with a reduced set of sizes: '{\"cudagraph_capture_sizes\":[size1, size2, size3, ...]}'.\n"
|
||||||
|
"2. Utilize ACLgraph's full graph mode as an alternative to the piece-wise approach.\n\n"
|
||||||
|
f"{str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
if aclgraph_mode.decode_mode() == CUDAGraphMode.FULL and \
|
if aclgraph_mode.decode_mode() == CUDAGraphMode.FULL and \
|
||||||
|
|||||||
Reference in New Issue
Block a user