Misc fix for min_p_sampling, --cuda-graph-bs (#2761)

This commit is contained in:
Lianmin Zheng
2025-01-07 02:52:53 -08:00
committed by GitHub
parent 6d08ce2aa9
commit bdc1acf6cd
17 changed files with 135 additions and 63 deletions

View File

@@ -1,8 +1,7 @@
# Adapted from https://raw.githubusercontent.com/vllm-project/vllm/v0.5.5/vllm/model_executor/layers/quantization/__init__.py
from typing import Callable, Dict, Optional, Type
from typing import Dict, Type
import torch
from vllm.model_executor.layers.quantization.aqlm import AQLMConfig
from vllm.model_executor.layers.quantization.awq import AWQConfig
from vllm.model_executor.layers.quantization.awq_marlin import AWQMarlinConfig