Logging and minor fixes to two batch overlap and EPLB (#6595)

This commit is contained in:
fzyzcjy
2025-05-26 13:36:40 +08:00
committed by GitHub
parent a191a0e47c
commit 93e53f6e0b
3 changed files with 19 additions and 3 deletions

View File

@@ -152,8 +152,10 @@ class ExpertLocationMetadata:
return ExpertLocationMetadata._init_raw( return ExpertLocationMetadata._init_raw(
ep_size=common["ep_size"], ep_size=common["ep_size"],
physical_to_logical_map=physical_to_logical_map, physical_to_logical_map=physical_to_logical_map.to(server_args.device),
logical_to_all_physical_map=logical_to_all_physical_map, logical_to_all_physical_map=logical_to_all_physical_map.to(
server_args.device
),
) )
@staticmethod @staticmethod

View File

@@ -1,4 +1,5 @@
import dataclasses import dataclasses
import logging
from typing import TYPE_CHECKING, Dict, List, Optional, Sequence from typing import TYPE_CHECKING, Dict, List, Optional, Sequence
import torch import torch
@@ -11,11 +12,15 @@ from sglang.srt.managers.schedule_batch import global_server_args_dict
from sglang.srt.model_executor.forward_batch_info import ForwardBatch, ForwardMode from sglang.srt.model_executor.forward_batch_info import ForwardBatch, ForwardMode
from sglang.srt.operations import execute_operations, execute_overlapped_operations from sglang.srt.operations import execute_operations, execute_overlapped_operations
from sglang.srt.operations_strategy import OperationsStrategy from sglang.srt.operations_strategy import OperationsStrategy
from sglang.srt.utils import BumpAllocator, DeepEPMode from sglang.srt.utils import BumpAllocator, DeepEPMode, get_bool_env_var
if TYPE_CHECKING: if TYPE_CHECKING:
from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner
_tbo_debug = get_bool_env_var("SGLANG_TBO_DEBUG")
logger = logging.getLogger(__name__)
# -------------------------------- Compute Basic Info --------------------------------------- # -------------------------------- Compute Basic Info ---------------------------------------
@@ -178,6 +183,14 @@ class TboForwardBatchPreparer:
extend_seq_lens=batch.extend_seq_lens_cpu, extend_seq_lens=batch.extend_seq_lens_cpu,
) )
if _tbo_debug:
logger.info(
f"TboForwardBatchPreparer.prepare "
f"tbo_split_seq_index={batch.tbo_split_seq_index} "
f"tbo_split_token_index={tbo_split_token_index} "
f"extend_seq_lens={batch.extend_seq_lens_cpu}"
)
assert isinstance(batch.attn_backend, TboAttnBackend) assert isinstance(batch.attn_backend, TboAttnBackend)
attn_backend_child_a, attn_backend_child_b = batch.attn_backend.children attn_backend_child_a, attn_backend_child_b = batch.attn_backend.children

View File

@@ -44,6 +44,7 @@ from functools import lru_cache
from importlib.metadata import PackageNotFoundError, version from importlib.metadata import PackageNotFoundError, version
from importlib.util import find_spec from importlib.util import find_spec
from io import BytesIO from io import BytesIO
from json import JSONDecodeError
from multiprocessing.reduction import ForkingPickler from multiprocessing.reduction import ForkingPickler
from pathlib import Path from pathlib import Path
from typing import ( from typing import (