diff --git a/python/pyproject.toml b/python/pyproject.toml index ea563eb4d..59c3fe012 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -20,7 +20,7 @@ dependencies = [ [project.optional-dependencies] srt = ["aiohttp", "fastapi", "psutil", "rpyc", "torch", "uvloop", "uvicorn", - "zmq", "vllm>=0.4.2", "interegular", "pydantic", "pillow", "packaging", "huggingface_hub", "hf_transfer", "outlines>=0.0.34"] + "zmq", "vllm==0.4.2", "interegular", "pydantic", "pillow", "packaging", "huggingface_hub", "hf_transfer", "outlines>=0.0.34"] openai = ["openai>=1.0", "numpy", "tiktoken"] anthropic = ["anthropic>=0.20.0", "numpy"] all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]"] diff --git a/python/sglang/srt/managers/router/model_runner.py b/python/sglang/srt/managers/router/model_runner.py index fa1190b04..40be207b6 100644 --- a/python/sglang/srt/managers/router/model_runner.py +++ b/python/sglang/srt/managers/router/model_runner.py @@ -235,6 +235,7 @@ class ModelRunner: } # Init torch distributed + logger.info(f"[rank={self.tp_rank}] Set cuda device.") torch.cuda.set_device(self.tp_rank) logger.info(f"[rank={self.tp_rank}] Init torch begin. Avail mem={get_available_gpu_memory(self.tp_rank):.2f} GB") torch.distributed.init_process_group(