Update model_loader deps and qqq quantization deps (#2220) (#2318)

Co-authored-by: HandH1998 <1335248067@qq.com>
This commit is contained in:
Yineng Zhang
2024-12-02 23:22:13 +08:00
committed by GitHub
parent 33deca81b5
commit 85e1a6f3aa
58 changed files with 2363 additions and 366 deletions

View File

@@ -50,6 +50,7 @@ class ServerArgs:
served_model_name: Optional[str] = None
chat_template: Optional[str] = None
is_embedding: bool = False
revision: Optional[str] = None
# Port
host: str = "127.0.0.1"
@@ -341,6 +342,14 @@ class ServerArgs:
action="store_true",
help="Whether to use a CausalLM as an embedding model.",
)
parser.add_argument(
"--revision",
type=str,
default=None,
help="The specific model version to use. It can be a branch "
"name, a tag name, or a commit id. If unspecified, will use "
"the default version.",
)
# Memory and scheduling
parser.add_argument(