diff --git a/python/sglang/srt/models/gpt2.py b/python/sglang/srt/models/gpt2.py index 280ff152a..d457603b0 100644 --- a/python/sglang/srt/models/gpt2.py +++ b/python/sglang/srt/models/gpt2.py @@ -17,7 +17,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Inference-only GPT-2 model compatible with HuggingFace weights.""" -from typing import Iterable, List, Optional, Tuple +from typing import Iterable, Optional, Tuple import torch from torch import nn diff --git a/python/sglang/srt/models/olmo.py b/python/sglang/srt/models/olmo.py index e8fe9a7a0..45a3f3ff4 100644 --- a/python/sglang/srt/models/olmo.py +++ b/python/sglang/srt/models/olmo.py @@ -15,7 +15,7 @@ # Adapted from # https://github.com/vllm-project/vllm/blob/c7f2cf2b7f67bce5842fedfdba508440fe257375/vllm/model_executor/models/olmo.py#L1 """Inference-only OLMo model compatible with HuggingFace weights.""" -from typing import Iterable, List, Optional, Tuple +from typing import Iterable, Optional, Tuple import torch from torch import nn diff --git a/python/sglang/srt/models/olmoe.py b/python/sglang/srt/models/olmoe.py index 9abe9ff25..91722d966 100644 --- a/python/sglang/srt/models/olmoe.py +++ b/python/sglang/srt/models/olmoe.py @@ -17,26 +17,21 @@ """Inference-only OLMoE model compatible with HuggingFace weights.""" -from typing import Any, Dict, Iterable, List, Optional, Tuple +from typing import Any, Dict, Iterable, Optional, Tuple import torch -import torch.nn.functional as F from torch import nn from transformers import PretrainedConfig from vllm.model_executor.layers.rotary_embedding import get_rope -from sglang.srt.distributed import ( - get_tensor_model_parallel_world_size, - tensor_model_parallel_all_reduce, -) -from sglang.srt.layers.activation import SiluAndMul +from sglang.srt.distributed import get_tensor_model_parallel_world_size from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.linear import ( QKVParallelLinear, ReplicatedLinear, RowParallelLinear, ) -from sglang.srt.layers.logits_processor import LogitsProcessor, LogitsProcessorOutput +from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.moe.fused_moe_triton import FusedMoE from sglang.srt.layers.quantization.base_config import QuantizationConfig from sglang.srt.layers.radix_attention import RadixAttention