From e68464a1d6ed2f99af74ad90459bc42e944ffd98 Mon Sep 17 00:00:00 2001
From: Canlin Guo <canlinguosdu@gmail.com>
Date: Mon, 23 Mar 2026 09:26:24 +0800
Subject: [PATCH] [Bugfix] Fix slow hasattr in ACLGraphWrapper.__getattr__
 (#7442)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What this PR does / why we need it?

Follow https://github.com/vllm-project/vllm/pull/37425,
https://github.com/vllm-project/vllm-omni/pull/1982

Copied from them:

Notice that `hasattr(self.model, "flush_pending_metadata")` cost 6ms per
decode step when profiling Qwen3 Omni.

The original `CUDAGraphWrapper.__getattr__` raises:
```python
  raise AttributeError(f"... cudagraph wrapper: {self.runnable}")
  ```
When hasattr() is called for a non-existent attribute, Python internally
calls __getattr__ which constructs this AttributeError. The
{self.runnable} triggers `__repr__()` on the underlying model (e.g.,
`Qwen3OmniMoeForConditionalGeneration`), which recursivelytraverses the
entire nn.Module tree to generate an 18,000+ character string. This
takes ~6-7ms per call.
Since `hasattr(self.model, "flush_pending_metadata") ` is called every
decode step in the Talker forward path, this adds ~6ms overhead per
step, severely impacting audio inter-chunk latency (ICL).

```Python
hasattr(self.model, "flush_pending_metadata")
  → getattr(self.model, "flush_pending_metadata")
    → not found in CUDAGraphWrapper.__dict__
    → not found in the CUDAGraphWrapper class hierarchy
    → triggers CUDAGraphWrapper.__getattr__("flush_pending_metadata")
      → hasattr(self.runnable, "flush_pending_metadata")  # runnable also doesn't have it
      → executes raise AttributeError(f"... {self.runnable}")
        → Python needs to construct the exception object
        → the f-string triggers self.runnable.__repr__()
        → Qwen3OmniMoeForConditionalGeneration.__repr__()
          → recursively traverses the entire nn.Module tree
          → generates a 18,000+ character string
          → takes ~6 ms
        → AttributeError object is created
    → hasattr catches the AttributeError and returns False
    → the 18,000-character string is immediately discarded (no one ever sees it)
```

### Does this PR introduce _any_ user-facing change?

NO.

### How was this patch tested?

See https://github.com/vllm-project/vllm-omni/pull/1982


- vLLM version: v0.17.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/4497431df654e46fb1fb5e64bf8611e762ae5d87

---------

Signed-off-by: gcanlin <canlinguosdu@gmail.com>
---
 tests/ut/compilation/test_acl_graph.py | 2 +-
 vllm_ascend/compilation/acl_graph.py   | 7 ++++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/tests/ut/compilation/test_acl_graph.py b/tests/ut/compilation/test_acl_graph.py
index afee5ef5..7440fc41 100644
--- a/tests/ut/compilation/test_acl_graph.py
+++ b/tests/ut/compilation/test_acl_graph.py
@@ -723,7 +723,7 @@ class TestACLGraphWrapper(TestBase):
         with self.assertRaises(AttributeError) as context:
             _ = wrapper.non_existent_attr
 
-        self.assertIn("Attribute non_existent_attr not exists",
+        self.assertIn("Attribute non_existent_attr not found",
                       str(context.exception))
 
     def test_unwrap_method(self):
diff --git a/vllm_ascend/compilation/acl_graph.py b/vllm_ascend/compilation/acl_graph.py
index e22d1b5a..1ceb3434 100644
--- a/vllm_ascend/compilation/acl_graph.py
+++ b/vllm_ascend/compilation/acl_graph.py
@@ -74,6 +74,7 @@ class ACLGraphWrapper:
 
         self.first_run_finished = False
         self.is_debugging_mode = envs.VLLM_LOGGING_LEVEL == "DEBUG"
+        self._runnable_str = str(runnable) if self.is_debugging_mode else None
 
         # assert runtime_mode is not NONE(no aclgraph), otherwise, we don't
         # need to initialize a ACLGraphWrapper.
@@ -91,7 +92,11 @@ class ACLGraphWrapper:
         # allow accessing the attributes of the runnable.
         if hasattr(self.runnable, key):
             return getattr(self.runnable, key)
-        raise AttributeError(f"Attribute {key} not exists in the runnable of aclgraph wrapper: {self.runnable}")
+        if self.is_debugging_mode:
+            raise AttributeError(
+                f"Attribute {key} not exists in the runnable of aclgraph wrapper: {self._runnable_str}"
+            )
+        raise AttributeError(f"Attribute {key} not found. Set VLLM_LOGGING_LEVEL=DEBUG for more details.")
 
     def unwrap(self) -> Callable:
         # in case we need to access the original runnable.