Logging and minor fixes to two batch overlap and EPLB (#6595)
This commit is contained in:
@@ -152,8 +152,10 @@ class ExpertLocationMetadata:
|
||||
|
||||
return ExpertLocationMetadata._init_raw(
|
||||
ep_size=common["ep_size"],
|
||||
physical_to_logical_map=physical_to_logical_map,
|
||||
logical_to_all_physical_map=logical_to_all_physical_map,
|
||||
physical_to_logical_map=physical_to_logical_map.to(server_args.device),
|
||||
logical_to_all_physical_map=logical_to_all_physical_map.to(
|
||||
server_args.device
|
||||
),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import dataclasses
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Dict, List, Optional, Sequence
|
||||
|
||||
import torch
|
||||
@@ -11,11 +12,15 @@ from sglang.srt.managers.schedule_batch import global_server_args_dict
|
||||
from sglang.srt.model_executor.forward_batch_info import ForwardBatch, ForwardMode
|
||||
from sglang.srt.operations import execute_operations, execute_overlapped_operations
|
||||
from sglang.srt.operations_strategy import OperationsStrategy
|
||||
from sglang.srt.utils import BumpAllocator, DeepEPMode
|
||||
from sglang.srt.utils import BumpAllocator, DeepEPMode, get_bool_env_var
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner
|
||||
|
||||
_tbo_debug = get_bool_env_var("SGLANG_TBO_DEBUG")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# -------------------------------- Compute Basic Info ---------------------------------------
|
||||
|
||||
@@ -178,6 +183,14 @@ class TboForwardBatchPreparer:
|
||||
extend_seq_lens=batch.extend_seq_lens_cpu,
|
||||
)
|
||||
|
||||
if _tbo_debug:
|
||||
logger.info(
|
||||
f"TboForwardBatchPreparer.prepare "
|
||||
f"tbo_split_seq_index={batch.tbo_split_seq_index} "
|
||||
f"tbo_split_token_index={tbo_split_token_index} "
|
||||
f"extend_seq_lens={batch.extend_seq_lens_cpu}"
|
||||
)
|
||||
|
||||
assert isinstance(batch.attn_backend, TboAttnBackend)
|
||||
attn_backend_child_a, attn_backend_child_b = batch.attn_backend.children
|
||||
|
||||
|
||||
@@ -44,6 +44,7 @@ from functools import lru_cache
|
||||
from importlib.metadata import PackageNotFoundError, version
|
||||
from importlib.util import find_spec
|
||||
from io import BytesIO
|
||||
from json import JSONDecodeError
|
||||
from multiprocessing.reduction import ForkingPickler
|
||||
from pathlib import Path
|
||||
from typing import (
|
||||
|
||||
Reference in New Issue
Block a user