Update model_loader deps and qqq quantization deps (#2220) (#2318)

Co-authored-by: HandH1998 <1335248067@qq.com>
2024-12-02 23:22:13 +08:00
parent 33deca81b5
commit 85e1a6f3aa
58 changed files with 2363 additions and 366 deletions
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -50,6 +50,7 @@ class ServerArgs:
    served_model_name: Optional[str] = None
    chat_template: Optional[str] = None
    is_embedding: bool = False
+    revision: Optional[str] = None

    # Port
    host: str = "127.0.0.1"
@@ -341,6 +342,14 @@ class ServerArgs:
            action="store_true",
            help="Whether to use a CausalLM as an embedding model.",
        )
+        parser.add_argument(
+            "--revision",
+            type=str,
+            default=None,
+            help="The specific model version to use. It can be a branch "
+            "name, a tag name, or a commit id. If unspecified, will use "
+            "the default version.",
+        )

        # Memory and scheduling
        parser.add_argument(