[minor] Sync style changes (#9376)

This commit is contained in:
Lianmin Zheng
2025-08-19 21:35:01 -07:00
committed by GitHub
parent 3680d6f88b
commit f20b6a3f2b
6 changed files with 16 additions and 10 deletions

View File

@@ -85,8 +85,11 @@ srt_hip = [
"wave-lang==1.0.1",
]
# CPU: torch wheel for CPU needs to be installed from https://download.pytorch.org/whl/cpu
srt_cpu = ["sglang[runtime_common]", "einops"]
# https://docs.sglang.ai/platforms/cpu_server.html
srt_cpu = ["sglang[runtime_common]"]
# https://docs.sglang.ai/platforms/ascend_npu.html
srt_npu = ["sglang[runtime_common]"]
# xpu is not enabled in public vllm and torch whl,
# need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm
@@ -96,9 +99,6 @@ srt_xpu = ["sglang[runtime_common]"]
# https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html
srt_hpu = ["sglang[runtime_common]"]
# https://vllm-ascend.readthedocs.io/en/latest/installation.html
srt_npu = ["sglang[runtime_common]"]
openai = ["openai==1.99.1", "tiktoken"]
anthropic = ["anthropic>=0.20.0"]
litellm = ["litellm>=1.0.0"]

View File

@@ -26,7 +26,7 @@ from sglang.bench_serving import get_tokenizer, sample_random_requests
from sglang.profiler import run_profile
from sglang.srt.entrypoints.http_server import launch_server
from sglang.srt.server_args import ServerArgs
from sglang.srt.utils import kill_process_tree
from sglang.srt.utils import is_blackwell, kill_process_tree
from sglang.test.test_utils import is_in_ci, write_github_step_summary
@@ -363,7 +363,12 @@ def run_benchmark(server_args: ServerArgs, bench_args: BenchArgs):
acc_length,
trace_link,
) in result:
hourly_cost = 2 * server_args.tp_size # $2/hour for one H100
if is_blackwell():
hourly_cost_per_gpu = 4 # $4/hour for one B200
else:
hourly_cost_per_gpu = 2 # $2/hour for one H100
hourly_cost = hourly_cost_per_gpu * server_args.tp_size
input_util = 0.7
accept_length = round(acc_length, 2) if acc_length is not None else "n/a"
line = (

View File

@@ -9,6 +9,7 @@ import argparse
import json
import os
import time
import urllib.parse
from argparse import ArgumentParser
from pathlib import Path
from typing import List, Optional