diff --git a/.github/workflows/pr-test-h20.yml b/.github/workflows/pr-test-h20.yml
index e283ea42f..1955dc2d8 100644
--- a/.github/workflows/pr-test-h20.yml
+++ b/.github/workflows/pr-test-h20.yml
@@ -59,7 +59,7 @@ jobs:
           cd test/srt
           python3 run_suite.py --suite per-commit-8-gpu-h20
 
-  pr-test-finish:
+  pr-test-h20-finish:
     needs: [
       check-changes,
       per-commit-8-gpu-h20,
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 8f7455904..a295f2eb4 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -38,7 +38,7 @@ repos:
     hooks:
       - id: codespell
         additional_dependencies: ['tomli']
-        args: ['--toml', 'python/pyproject.toml', '-L', 'cann,thi']
+        args: ['--toml', 'python/pyproject.toml', '-L', 'cann,thi,makro,wil,rouge']
         exclude: |
           (?x)^(
             test/srt/test_reasoning_parser\.py|
diff --git a/docs/platforms/ascend_npu.md b/docs/platforms/ascend_npu.md
index 53fc009fb..a21a95b60 100644
--- a/docs/platforms/ascend_npu.md
+++ b/docs/platforms/ascend_npu.md
@@ -1,4 +1,4 @@
-# SGLang on Ascend NPUs
+# Ascend NPUs
 
 You can install SGLang using any of the methods below. Please go through `System Settings` section to ensure the clusters are roaring at max performance. Feel free to leave an issue [here at sglang](https://github.com/sgl-project/sglang/issues) if you encounter any issues or have any problems.
 
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 4e619d3e3..2543e7c1a 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -85,8 +85,11 @@ srt_hip = [
     "wave-lang==1.0.1",
 ]
 
-# CPU: torch wheel for CPU needs to be installed from https://download.pytorch.org/whl/cpu
-srt_cpu = ["sglang[runtime_common]", "einops"]
+# https://docs.sglang.ai/platforms/cpu_server.html
+srt_cpu = ["sglang[runtime_common]"]
+
+# https://docs.sglang.ai/platforms/ascend_npu.html
+srt_npu = ["sglang[runtime_common]"]
 
 # xpu is not enabled in public vllm and torch whl,
 # need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm
@@ -96,9 +99,6 @@ srt_xpu = ["sglang[runtime_common]"]
 # https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html
 srt_hpu = ["sglang[runtime_common]"]
 
-# https://vllm-ascend.readthedocs.io/en/latest/installation.html
-srt_npu = ["sglang[runtime_common]"]
-
 openai = ["openai==1.99.1", "tiktoken"]
 anthropic = ["anthropic>=0.20.0"]
 litellm = ["litellm>=1.0.0"]
diff --git a/python/sglang/bench_one_batch_server.py b/python/sglang/bench_one_batch_server.py
index d925ae8ce..8ab952559 100644
--- a/python/sglang/bench_one_batch_server.py
+++ b/python/sglang/bench_one_batch_server.py
@@ -26,7 +26,7 @@ from sglang.bench_serving import get_tokenizer, sample_random_requests
 from sglang.profiler import run_profile
 from sglang.srt.entrypoints.http_server import launch_server
 from sglang.srt.server_args import ServerArgs
-from sglang.srt.utils import kill_process_tree
+from sglang.srt.utils import is_blackwell, kill_process_tree
 from sglang.test.test_utils import is_in_ci, write_github_step_summary
 
 
@@ -363,7 +363,12 @@ def run_benchmark(server_args: ServerArgs, bench_args: BenchArgs):
         acc_length,
         trace_link,
     ) in result:
-        hourly_cost = 2 * server_args.tp_size  # $2/hour for one H100
+        if is_blackwell():
+            hourly_cost_per_gpu = 4  # $4/hour for one B200
+        else:
+            hourly_cost_per_gpu = 2  # $2/hour for one H100
+
+        hourly_cost = hourly_cost_per_gpu * server_args.tp_size
         input_util = 0.7
         accept_length = round(acc_length, 2) if acc_length is not None else "n/a"
         line = (
diff --git a/python/sglang/profiler.py b/python/sglang/profiler.py
index d872ca320..3503ae7fc 100644
--- a/python/sglang/profiler.py
+++ b/python/sglang/profiler.py
@@ -9,6 +9,7 @@ import argparse
 import json
 import os
 import time
+import urllib.parse
 from argparse import ArgumentParser
 from pathlib import Path
 from typing import List, Optional