Update Readme (#660)

Co-authored-by: Lianmin Zheng <lianminzheng@gmail.com>
This commit is contained in:
Ying Sheng
2024-07-19 09:54:01 -07:00
committed by GitHub
parent dc4e4a6acc
commit 51fda1439f
25 changed files with 200 additions and 185 deletions

View File

@@ -166,6 +166,15 @@ class ServerArgs:
"--quantization",
type=str,
default=ServerArgs.quantization,
choices=[
"awq",
"fp8",
"gptq",
"marlin",
"gptq_marlin",
"squeezellm",
"bitsandbytes",
],
help="The quantization method.",
)
parser.add_argument(
@@ -243,13 +252,13 @@ class ServerArgs:
parser.add_argument(
"--show-time-cost",
action="store_true",
help="Show time cost of custom marks",
help="Show time cost of custom marks.",
)
parser.add_argument(
"--api-key",
type=str,
default=ServerArgs.api_key,
help="Set API key of the server",
help="Set API key of the server.",
)
# Data parallelism
@@ -285,17 +294,17 @@ class ServerArgs:
parser.add_argument(
"--disable-flashinfer",
action="store_true",
help="Disable flashinfer inference kernels",
help="Disable flashinfer inference kernels.",
)
parser.add_argument(
"--disable-radix-cache",
action="store_true",
help="Disable RadixAttention",
help="Disable RadixAttention for prefix caching.",
)
parser.add_argument(
"--disable-regex-jump-forward",
action="store_true",
help="Disable regex jump-forward",
help="Disable regex jump-forward.",
)
parser.add_argument(
"--disable-cuda-graph",