diff --git a/python/sglang/srt/layers/quantization/modelopt_quant.py b/python/sglang/srt/layers/quantization/modelopt_quant.py index 572a8e8d7..05bdef739 100755 --- a/python/sglang/srt/layers/quantization/modelopt_quant.py +++ b/python/sglang/srt/layers/quantization/modelopt_quant.py @@ -2,7 +2,6 @@ from __future__ import annotations import logging -import os from typing import TYPE_CHECKING, Any, Dict, List, Optional import torch diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py index cd7e8b682..3cbaf3ed2 100644 --- a/python/sglang/srt/models/deepseek_v2.py +++ b/python/sglang/srt/models/deepseek_v2.py @@ -26,7 +26,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple, Union import torch import torch.nn.functional as F from torch import nn -from tqdm import tqdm, trange from transformers import PretrainedConfig from sglang.srt import single_batch_overlap @@ -83,7 +82,7 @@ from sglang.srt.layers.moe import ( from sglang.srt.layers.moe.ep_moe.layer import DeepEPMoE, get_moe_impl_class from sglang.srt.layers.moe.fused_moe_triton.layer import FusedMoE from sglang.srt.layers.moe.topk import TopK, TopKOutputFormat -from sglang.srt.layers.quantization import Fp8Config, deep_gemm_wrapper +from sglang.srt.layers.quantization import deep_gemm_wrapper from sglang.srt.layers.quantization.base_config import QuantizationConfig from sglang.srt.layers.quantization.fp8_kernel import ( is_fp8_fnuz,