Misc fix for min_p_sampling, --cuda-graph-bs (#2761)
This commit is contained in:
@@ -1,8 +1,7 @@
|
||||
# Adapted from https://raw.githubusercontent.com/vllm-project/vllm/v0.5.5/vllm/model_executor/layers/quantization/__init__.py
|
||||
|
||||
from typing import Callable, Dict, Optional, Type
|
||||
from typing import Dict, Type
|
||||
|
||||
import torch
|
||||
from vllm.model_executor.layers.quantization.aqlm import AQLMConfig
|
||||
from vllm.model_executor.layers.quantization.awq import AWQConfig
|
||||
from vllm.model_executor.layers.quantization.awq_marlin import AWQMarlinConfig
|
||||
|
||||
Reference in New Issue
Block a user