diff --git a/README.md b/README.md index 8381dd87e..ed1cf94eb 100644 --- a/README.md +++ b/README.md @@ -324,7 +324,7 @@ python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port - Mixtral - LLaVA - `python3 -m sglang.launch_server --model-path liuhaotian/llava-v1.5-7b --tokenizer-path llava-hf/llava-1.5-7b-hf --port 30000` -- Qwen +- Qwen / Qwen 2 - AWQ quantization ## Benchmark And Performance diff --git a/python/sglang/srt/models/llama2.py b/python/sglang/srt/models/llama2.py index b4ee11d5b..fdf6276c1 100644 --- a/python/sglang/srt/models/llama2.py +++ b/python/sglang/srt/models/llama2.py @@ -319,4 +319,5 @@ class LlamaForCausalLM(nn.Module): weight_loader = getattr(param, "weight_loader", default_weight_loader) weight_loader(param, loaded_weight) + EntryClass = LlamaForCausalLM diff --git a/python/sglang/srt/models/llava.py b/python/sglang/srt/models/llava.py index efc362f59..d6213812e 100644 --- a/python/sglang/srt/models/llava.py +++ b/python/sglang/srt/models/llava.py @@ -331,4 +331,5 @@ def monkey_path_clip_vision_embed_forward(): clip_vision_embed_forward, ) + EntryClass = LlavaLlamaForCausalLM diff --git a/python/sglang/srt/models/mixtral.py b/python/sglang/srt/models/mixtral.py index 2f376983c..739097330 100644 --- a/python/sglang/srt/models/mixtral.py +++ b/python/sglang/srt/models/mixtral.py @@ -377,4 +377,5 @@ class MixtralForCausalLM(nn.Module): weight_loader = getattr(param, "weight_loader", default_weight_loader) weight_loader(param, loaded_weight) + EntryClass = MixtralForCausalLM diff --git a/python/sglang/srt/models/qwen.py b/python/sglang/srt/models/qwen.py index acd9af464..c651ea908 100644 --- a/python/sglang/srt/models/qwen.py +++ b/python/sglang/srt/models/qwen.py @@ -259,4 +259,5 @@ class QWenLMHeadModel(nn.Module): weight_loader = getattr(param, "weight_loader", default_weight_loader) weight_loader(param, loaded_weight) + EntryClass = QWenLMHeadModel diff --git a/python/sglang/srt/models/qwen2.py b/python/sglang/srt/models/qwen2.py index 34045ccc5..e4dabfe30 100644 --- a/python/sglang/srt/models/qwen2.py +++ b/python/sglang/srt/models/qwen2.py @@ -315,4 +315,5 @@ class Qwen2ForCausalLM(nn.Module): weight_loader = getattr(param, "weight_loader", default_weight_loader) weight_loader(param, loaded_weight) + EntryClass = Qwen2ForCausalLM