Files
sglang/python/sglang/launch_server_llavavid.py
Yineng Zhang 8bee20f80b Update vllm to 0.6.3 (#1711) (#1720)
Co-authored-by: Ke Bao <ISPObaoke@163.com>
2024-10-19 20:45:41 -07:00

26 lines
1007 B
Python

"""Launch the inference server for Llava-video model."""
import json
import sys
from sglang.srt.server import launch_server, prepare_server_args
if __name__ == "__main__":
server_args = prepare_server_args(sys.argv[1:])
model_override_args = {}
model_override_args["mm_spatial_pool_stride"] = 2
model_override_args["architectures"] = ["LlavaVidForCausalLM"]
model_override_args["num_frames"] = 16
model_override_args["model_type"] = "llavavid"
if model_override_args["num_frames"] == 32:
model_override_args["rope_scaling"] = {"factor": 2.0, "rope_type": "linear"}
model_override_args["max_sequence_length"] = 4096 * 2
model_override_args["tokenizer_model_max_length"] = 4096 * 2
model_override_args["model_max_length"] = 4096 * 2
if "34b" in server_args.model_path.lower():
model_override_args["image_token_index"] = 64002
server_args.json_model_override_args = json.dumps(model_override_args)
launch_server(server_args)