[CI] Patch torch.library.infer_schema for fused moe ops to fix CI (#854)
make sure pytorch infer_schema check is patched before some case which using fused moe ops: 1. model register 2. quantization loading 3. fused moe ut Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -19,6 +19,9 @@
|
||||
|
||||
Run `pytest tests/ops/test_fused_moe.py`.
|
||||
"""
|
||||
# fused moe ops test will hit the infer_schema error, we need add the patch
|
||||
# here to make the test pass.
|
||||
import vllm_ascend.patch.worker.patch_common.patch_utils # type: ignore[import] # isort: skip # noqa
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
@@ -23,5 +23,9 @@ def register():
|
||||
|
||||
|
||||
def register_model():
|
||||
# fix pytorch schema check error, remove this line after pytorch
|
||||
# is upgraded to 2.7.0
|
||||
import vllm_ascend.patch.worker.patch_common.patch_utils # noqa: F401
|
||||
|
||||
from .models import register_model
|
||||
register_model()
|
||||
|
||||
@@ -15,6 +15,10 @@
|
||||
# limitations under the License.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
# By using quantization case, this file is called before worker patch achieve,
|
||||
# we need to import patch_utils here first to make sure the patch is applied.
|
||||
import vllm_ascend.patch.worker.patch_common.patch_utils # type: ignore[import] # isort: skip # noqa
|
||||
|
||||
from types import MappingProxyType
|
||||
from typing import Any, Callable, Dict, List, Mapping, Optional
|
||||
|
||||
|
||||
Reference in New Issue
Block a user