[Feat]support sequence parallelism by pass for VL models (#5632)

2026-02-27 08:27:41 +08:00
parent ed175d6d92
commit 5def28dcd3
22 changed files with 460 additions and 101 deletions
--- a/vllm_ascend/compilation/compiler_interface.py
+++ b/vllm_ascend/compilation/compiler_interface.py
@@ -15,6 +15,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import copy
 import functools
 from collections.abc import Callable
 from typing import Any
@@ -129,6 +130,10 @@ class AscendCompiler(CompilerInterface):
        compile_range: Range,
        key: str | None = None,
    ) -> tuple[Callable | None, Any | None]:
+        # inductor can inplace modify the graph, so we need to copy it
+        # see https://github.com/pytorch/pytorch/issues/138980
+        graph = copy.deepcopy(graph)
+
        npugraph_ex_config = get_ascend_config().npugraph_ex_config
        if npugraph_ex_config.enable:
            assert hasattr(self, "vllm_config")