Move output processing logic from scheduler.py into a separate file (#4354)
This commit is contained in:
@@ -20,14 +20,13 @@ import random
|
||||
import tempfile
|
||||
from typing import List, Optional
|
||||
|
||||
import torch
|
||||
|
||||
from sglang.srt.hf_transformers_utils import check_gguf_file
|
||||
from sglang.srt.reasoning_parser import ReasoningParser
|
||||
from sglang.srt.utils import (
|
||||
get_amdgpu_memory_capacity,
|
||||
get_hpu_memory_capacity,
|
||||
get_nvgpu_memory_capacity,
|
||||
is_cuda,
|
||||
is_flashinfer_available,
|
||||
is_hip,
|
||||
is_port_available,
|
||||
@@ -71,6 +70,7 @@ class ServerArgs:
|
||||
schedule_policy: str = "fcfs"
|
||||
schedule_conservativeness: float = 1.0
|
||||
cpu_offload_gb: int = 0
|
||||
page_size: int = 1
|
||||
|
||||
# Other runtime options
|
||||
tp_size: int = 1
|
||||
@@ -190,10 +190,10 @@ class ServerArgs:
|
||||
if self.random_seed is None:
|
||||
self.random_seed = random.randint(0, 1 << 30)
|
||||
|
||||
if is_hip():
|
||||
gpu_mem = get_amdgpu_memory_capacity()
|
||||
elif torch.cuda.is_available():
|
||||
if is_cuda():
|
||||
gpu_mem = get_nvgpu_memory_capacity()
|
||||
elif is_hip():
|
||||
gpu_mem = get_amdgpu_memory_capacity()
|
||||
elif self.device == "hpu":
|
||||
gpu_mem = get_hpu_memory_capacity()
|
||||
else:
|
||||
@@ -258,7 +258,7 @@ class ServerArgs:
|
||||
f"EP MoE is enabled. The expert parallel size is adjusted to be the same as the tensor parallel size[{self.tp_size}]."
|
||||
)
|
||||
|
||||
# Others
|
||||
# Data parallelism attention
|
||||
if self.enable_dp_attention:
|
||||
self.dp_size = self.tp_size
|
||||
assert self.tp_size % self.dp_size == 0
|
||||
@@ -507,6 +507,12 @@ class ServerArgs:
|
||||
default=ServerArgs.cpu_offload_gb,
|
||||
help="How many GBs of RAM to reserve for CPU offloading.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--page-size",
|
||||
type=int,
|
||||
default=ServerArgs.page_size,
|
||||
help="The number of tokens in a page.",
|
||||
)
|
||||
|
||||
# Other runtime options
|
||||
parser.add_argument(
|
||||
|
||||
Reference in New Issue
Block a user