Fix CI (#9012)
This commit is contained in:
@@ -1,9 +1,8 @@
|
||||
# Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/model_executor/layers/quantization/modelopt.py
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib.util
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
||||
|
||||
import torch
|
||||
from torch.nn.parameter import Parameter
|
||||
@@ -42,11 +41,7 @@ if is_cuda():
|
||||
|
||||
try:
|
||||
from flashinfer import mm_fp4 as fp4_gemm
|
||||
from flashinfer import (
|
||||
reorder_rows_for_gated_act_gemm,
|
||||
shuffle_matrix_a,
|
||||
shuffle_matrix_sf_a,
|
||||
)
|
||||
from flashinfer import reorder_rows_for_gated_act_gemm, shuffle_matrix_sf_a
|
||||
|
||||
enable_flashinfer_fp4_gemm = True
|
||||
except ImportError:
|
||||
|
||||
Reference in New Issue
Block a user