fix flashinfer & http log level
This commit is contained in:
@@ -152,7 +152,9 @@ def launch_server(server_args: ServerArgs, pipe_finish_writer, model_overide_arg
|
||||
if server_args.disable_disk_cache:
|
||||
disable_cache()
|
||||
if not server_args.disable_flashinfer:
|
||||
assert_pkg_version("flashinfer", "0.0.8")
|
||||
assert_pkg_version("flashinfer", "0.0.8", "Please uninstall the old version and "
|
||||
"reinstall the latest version by following the instructions "
|
||||
"at https://docs.flashinfer.ai/installation.html.")
|
||||
if server_args.chat_template:
|
||||
# TODO: replace this with huggingface transformers template
|
||||
load_chat_template_for_openai_api(server_args.chat_template)
|
||||
@@ -293,7 +295,7 @@ def launch_server(server_args: ServerArgs, pipe_finish_writer, model_overide_arg
|
||||
app,
|
||||
host=server_args.host,
|
||||
port=server_args.port,
|
||||
log_level=server_args.log_level_http or server_args.log_level_http,
|
||||
log_level=server_args.log_level_http or server_args.log_level,
|
||||
timeout_keep_alive=5,
|
||||
loop="uvloop",
|
||||
)
|
||||
|
||||
@@ -50,11 +50,11 @@ class ServerArgs:
|
||||
load_balance_method: str = "round_robin"
|
||||
|
||||
# Optimization/debug options
|
||||
disable_flashinfer: bool = True
|
||||
attention_reduce_in_fp32: bool = False
|
||||
disable_flashinfer: bool = False
|
||||
disable_radix_cache: bool = False
|
||||
disable_regex_jump_forward: bool = False
|
||||
disable_disk_cache: bool = False
|
||||
attention_reduce_in_fp32: bool = False
|
||||
|
||||
# Distributed args
|
||||
nccl_init_addr: Optional[str] = None
|
||||
|
||||
@@ -426,17 +426,19 @@ def suppress_other_loggers():
|
||||
logging.getLogger("vllm.utils").setLevel(logging.WARN)
|
||||
|
||||
|
||||
def assert_pkg_version(pkg: str, min_version: str):
|
||||
def assert_pkg_version(pkg: str, min_version: str, message: str):
|
||||
try:
|
||||
installed_version = version(pkg)
|
||||
if pkg_version.parse(installed_version) < pkg_version.parse(min_version):
|
||||
raise Exception(
|
||||
f"{pkg} is installed with version {installed_version} which "
|
||||
f"is less than the minimum required version {min_version}"
|
||||
f"{pkg} is installed with version {installed_version}, which "
|
||||
f"is less than the minimum required version {min_version}. " +
|
||||
message
|
||||
)
|
||||
except PackageNotFoundError:
|
||||
raise Exception(
|
||||
f"{pkg} with minimum required version {min_version} is not installed"
|
||||
f"{pkg} with minimum required version {min_version} is not installed. " +
|
||||
message
|
||||
)
|
||||
|
||||
|
||||
@@ -588,4 +590,4 @@ def receive_addrs(model_port_args, server_args):
|
||||
print(f"Node 0 received from rank {src_rank}: {tensor.tolist()}")
|
||||
|
||||
dist.barrier()
|
||||
dist.destroy_process_group()
|
||||
dist.destroy_process_group()
|
||||
|
||||
Reference in New Issue
Block a user