2024-09-15 12:46:04 -07:00
|
|
|
import argparse
|
|
|
|
|
import os
|
|
|
|
|
|
2024-10-06 10:33:44 -07:00
|
|
|
NUM_LORAS = 8
|
2024-09-15 12:46:04 -07:00
|
|
|
LORA_PATH = {
|
|
|
|
|
"base": "mistralai/Mistral-7B-Instruct-v0.3",
|
|
|
|
|
"lora": "/home/ying/test_lora",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def launch_server(args):
|
|
|
|
|
base_path = LORA_PATH["base"]
|
|
|
|
|
lora_path = LORA_PATH["lora"]
|
|
|
|
|
|
|
|
|
|
if args.base_only:
|
2024-10-06 10:33:44 -07:00
|
|
|
cmd = f"python3 -m sglang.launch_server --model {base_path} "
|
2024-09-15 12:46:04 -07:00
|
|
|
else:
|
2024-10-06 10:33:44 -07:00
|
|
|
cmd = f"python3 -m sglang.launch_server --model {base_path} --lora-paths "
|
2024-09-15 12:46:04 -07:00
|
|
|
for i in range(NUM_LORAS):
|
|
|
|
|
lora_name = f"lora{i}"
|
|
|
|
|
cmd += f"{lora_name}={lora_path} "
|
|
|
|
|
cmd += f"--disable-radix --disable-cuda-graph "
|
|
|
|
|
cmd += f"--max-loras-per-batch {args.max_loras_per_batch} "
|
|
|
|
|
cmd += f"--max-running-requests {args.max_running_requests}"
|
|
|
|
|
print(cmd)
|
|
|
|
|
os.system(cmd)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
parser = argparse.ArgumentParser()
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--base-only",
|
|
|
|
|
action="store_true",
|
|
|
|
|
)
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--max-loras-per-batch",
|
|
|
|
|
type=int,
|
|
|
|
|
default=8,
|
|
|
|
|
)
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--max-running-requests",
|
|
|
|
|
type=int,
|
|
|
|
|
default=8,
|
|
|
|
|
)
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
|
|
launch_server(args)
|