[minor] Sync style changes (#9376)
This commit is contained in:
2
.github/workflows/pr-test-h20.yml
vendored
2
.github/workflows/pr-test-h20.yml
vendored
@@ -59,7 +59,7 @@ jobs:
|
|||||||
cd test/srt
|
cd test/srt
|
||||||
python3 run_suite.py --suite per-commit-8-gpu-h20
|
python3 run_suite.py --suite per-commit-8-gpu-h20
|
||||||
|
|
||||||
pr-test-finish:
|
pr-test-h20-finish:
|
||||||
needs: [
|
needs: [
|
||||||
check-changes,
|
check-changes,
|
||||||
per-commit-8-gpu-h20,
|
per-commit-8-gpu-h20,
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ repos:
|
|||||||
hooks:
|
hooks:
|
||||||
- id: codespell
|
- id: codespell
|
||||||
additional_dependencies: ['tomli']
|
additional_dependencies: ['tomli']
|
||||||
args: ['--toml', 'python/pyproject.toml', '-L', 'cann,thi']
|
args: ['--toml', 'python/pyproject.toml', '-L', 'cann,thi,makro,wil,rouge']
|
||||||
exclude: |
|
exclude: |
|
||||||
(?x)^(
|
(?x)^(
|
||||||
test/srt/test_reasoning_parser\.py|
|
test/srt/test_reasoning_parser\.py|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
# SGLang on Ascend NPUs
|
# Ascend NPUs
|
||||||
|
|
||||||
You can install SGLang using any of the methods below. Please go through `System Settings` section to ensure the clusters are roaring at max performance. Feel free to leave an issue [here at sglang](https://github.com/sgl-project/sglang/issues) if you encounter any issues or have any problems.
|
You can install SGLang using any of the methods below. Please go through `System Settings` section to ensure the clusters are roaring at max performance. Feel free to leave an issue [here at sglang](https://github.com/sgl-project/sglang/issues) if you encounter any issues or have any problems.
|
||||||
|
|
||||||
|
|||||||
@@ -85,8 +85,11 @@ srt_hip = [
|
|||||||
"wave-lang==1.0.1",
|
"wave-lang==1.0.1",
|
||||||
]
|
]
|
||||||
|
|
||||||
# CPU: torch wheel for CPU needs to be installed from https://download.pytorch.org/whl/cpu
|
# https://docs.sglang.ai/platforms/cpu_server.html
|
||||||
srt_cpu = ["sglang[runtime_common]", "einops"]
|
srt_cpu = ["sglang[runtime_common]"]
|
||||||
|
|
||||||
|
# https://docs.sglang.ai/platforms/ascend_npu.html
|
||||||
|
srt_npu = ["sglang[runtime_common]"]
|
||||||
|
|
||||||
# xpu is not enabled in public vllm and torch whl,
|
# xpu is not enabled in public vllm and torch whl,
|
||||||
# need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm
|
# need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm
|
||||||
@@ -96,9 +99,6 @@ srt_xpu = ["sglang[runtime_common]"]
|
|||||||
# https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html
|
# https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html
|
||||||
srt_hpu = ["sglang[runtime_common]"]
|
srt_hpu = ["sglang[runtime_common]"]
|
||||||
|
|
||||||
# https://vllm-ascend.readthedocs.io/en/latest/installation.html
|
|
||||||
srt_npu = ["sglang[runtime_common]"]
|
|
||||||
|
|
||||||
openai = ["openai==1.99.1", "tiktoken"]
|
openai = ["openai==1.99.1", "tiktoken"]
|
||||||
anthropic = ["anthropic>=0.20.0"]
|
anthropic = ["anthropic>=0.20.0"]
|
||||||
litellm = ["litellm>=1.0.0"]
|
litellm = ["litellm>=1.0.0"]
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ from sglang.bench_serving import get_tokenizer, sample_random_requests
|
|||||||
from sglang.profiler import run_profile
|
from sglang.profiler import run_profile
|
||||||
from sglang.srt.entrypoints.http_server import launch_server
|
from sglang.srt.entrypoints.http_server import launch_server
|
||||||
from sglang.srt.server_args import ServerArgs
|
from sglang.srt.server_args import ServerArgs
|
||||||
from sglang.srt.utils import kill_process_tree
|
from sglang.srt.utils import is_blackwell, kill_process_tree
|
||||||
from sglang.test.test_utils import is_in_ci, write_github_step_summary
|
from sglang.test.test_utils import is_in_ci, write_github_step_summary
|
||||||
|
|
||||||
|
|
||||||
@@ -363,7 +363,12 @@ def run_benchmark(server_args: ServerArgs, bench_args: BenchArgs):
|
|||||||
acc_length,
|
acc_length,
|
||||||
trace_link,
|
trace_link,
|
||||||
) in result:
|
) in result:
|
||||||
hourly_cost = 2 * server_args.tp_size # $2/hour for one H100
|
if is_blackwell():
|
||||||
|
hourly_cost_per_gpu = 4 # $4/hour for one B200
|
||||||
|
else:
|
||||||
|
hourly_cost_per_gpu = 2 # $2/hour for one H100
|
||||||
|
|
||||||
|
hourly_cost = hourly_cost_per_gpu * server_args.tp_size
|
||||||
input_util = 0.7
|
input_util = 0.7
|
||||||
accept_length = round(acc_length, 2) if acc_length is not None else "n/a"
|
accept_length = round(acc_length, 2) if acc_length is not None else "n/a"
|
||||||
line = (
|
line = (
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import argparse
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
|
import urllib.parse
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|||||||
Reference in New Issue
Block a user