[main] flashcomm_v1 optim in Qwen Dense Models (#2802)

### What this PR does / why we need it? Flashcomm_v1 optim in Qwen Dense Models. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? CI passed with new added/existing test. - vLLM version: v0.10.1.1 - vLLM main: 5e537f45b4 Co-authored-by: 1024daniel <xxltju324@gmail.com>
2025-09-08 22:52:24 +08:00
parent 4df8df5b94
commit 1bbb20ea13
11 changed files with 362 additions and 20 deletions
--- a/tests/ut/test_utils.py
+++ b/tests/ut/test_utils.py
@@ -303,13 +303,13 @@ class TestUtils(TestBase):
        # ascend custom op is not registered
        utils.register_ascend_customop()
        # should call register_oot three
-        self.assertEqual(mock_customop.register_oot.call_count, 12)
+        self.assertEqual(mock_customop.register_oot.call_count, 13)
        self.assertTrue(utils._ASCEND_CUSTOMOP_IS_REIGISTERED)

        # ascend custom op is already registered
        utils.register_ascend_customop()
        # should not register_oot again, thus only called three in this ut
-        self.assertEqual(mock_customop.register_oot.call_count, 12)
+        self.assertEqual(mock_customop.register_oot.call_count, 13)


 class TestProfileExecuteDuration(TestBase):