diff --git a/tests/ops/test_fused_moe.py b/tests/ops/test_fused_moe.py
index 7b21307..78c0d88 100644
--- a/tests/ops/test_fused_moe.py
+++ b/tests/ops/test_fused_moe.py
@@ -19,6 +19,9 @@
 
 Run `pytest tests/ops/test_fused_moe.py`.
 """
+# fused moe ops test will hit the infer_schema error, we need add the patch
+# here to make the test pass.
+import vllm_ascend.patch.worker.patch_common.patch_utils  # type: ignore[import]  # isort: skip  # noqa
 
 import pytest
 import torch
diff --git a/vllm_ascend/__init__.py b/vllm_ascend/__init__.py
index 7588e70..c8f3331 100644
--- a/vllm_ascend/__init__.py
+++ b/vllm_ascend/__init__.py
@@ -23,5 +23,9 @@ def register():
 
 
 def register_model():
+    # fix pytorch schema check error, remove this line after pytorch
+    # is upgraded to 2.7.0
+    import vllm_ascend.patch.worker.patch_common.patch_utils  # noqa: F401
+
     from .models import register_model
     register_model()
diff --git a/vllm_ascend/quantization/quant_config.py b/vllm_ascend/quantization/quant_config.py
index 22b61f2..499e236 100644
--- a/vllm_ascend/quantization/quant_config.py
+++ b/vllm_ascend/quantization/quant_config.py
@@ -15,6 +15,10 @@
 # limitations under the License.
 # This file is a part of the vllm-ascend project.
 #
+# By using quantization case, this file is called before worker patch achieve,
+# we need to import patch_utils here first to make sure the patch is applied.
+import vllm_ascend.patch.worker.patch_common.patch_utils  # type: ignore[import]  # isort: skip  # noqa
+
 from types import MappingProxyType
 from typing import Any, Callable, Dict, List, Mapping, Optional