Logging and minor fixes to two batch overlap and EPLB (#6595)
This commit is contained in:
@@ -152,8 +152,10 @@ class ExpertLocationMetadata:
|
|||||||
|
|
||||||
return ExpertLocationMetadata._init_raw(
|
return ExpertLocationMetadata._init_raw(
|
||||||
ep_size=common["ep_size"],
|
ep_size=common["ep_size"],
|
||||||
physical_to_logical_map=physical_to_logical_map,
|
physical_to_logical_map=physical_to_logical_map.to(server_args.device),
|
||||||
logical_to_all_physical_map=logical_to_all_physical_map,
|
logical_to_all_physical_map=logical_to_all_physical_map.to(
|
||||||
|
server_args.device
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import dataclasses
|
import dataclasses
|
||||||
|
import logging
|
||||||
from typing import TYPE_CHECKING, Dict, List, Optional, Sequence
|
from typing import TYPE_CHECKING, Dict, List, Optional, Sequence
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
@@ -11,11 +12,15 @@ from sglang.srt.managers.schedule_batch import global_server_args_dict
|
|||||||
from sglang.srt.model_executor.forward_batch_info import ForwardBatch, ForwardMode
|
from sglang.srt.model_executor.forward_batch_info import ForwardBatch, ForwardMode
|
||||||
from sglang.srt.operations import execute_operations, execute_overlapped_operations
|
from sglang.srt.operations import execute_operations, execute_overlapped_operations
|
||||||
from sglang.srt.operations_strategy import OperationsStrategy
|
from sglang.srt.operations_strategy import OperationsStrategy
|
||||||
from sglang.srt.utils import BumpAllocator, DeepEPMode
|
from sglang.srt.utils import BumpAllocator, DeepEPMode, get_bool_env_var
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner
|
from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner
|
||||||
|
|
||||||
|
_tbo_debug = get_bool_env_var("SGLANG_TBO_DEBUG")
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
# -------------------------------- Compute Basic Info ---------------------------------------
|
# -------------------------------- Compute Basic Info ---------------------------------------
|
||||||
|
|
||||||
@@ -178,6 +183,14 @@ class TboForwardBatchPreparer:
|
|||||||
extend_seq_lens=batch.extend_seq_lens_cpu,
|
extend_seq_lens=batch.extend_seq_lens_cpu,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if _tbo_debug:
|
||||||
|
logger.info(
|
||||||
|
f"TboForwardBatchPreparer.prepare "
|
||||||
|
f"tbo_split_seq_index={batch.tbo_split_seq_index} "
|
||||||
|
f"tbo_split_token_index={tbo_split_token_index} "
|
||||||
|
f"extend_seq_lens={batch.extend_seq_lens_cpu}"
|
||||||
|
)
|
||||||
|
|
||||||
assert isinstance(batch.attn_backend, TboAttnBackend)
|
assert isinstance(batch.attn_backend, TboAttnBackend)
|
||||||
attn_backend_child_a, attn_backend_child_b = batch.attn_backend.children
|
attn_backend_child_a, attn_backend_child_b = batch.attn_backend.children
|
||||||
|
|
||||||
|
|||||||
@@ -44,6 +44,7 @@ from functools import lru_cache
|
|||||||
from importlib.metadata import PackageNotFoundError, version
|
from importlib.metadata import PackageNotFoundError, version
|
||||||
from importlib.util import find_spec
|
from importlib.util import find_spec
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
from json import JSONDecodeError
|
||||||
from multiprocessing.reduction import ForkingPickler
|
from multiprocessing.reduction import ForkingPickler
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import (
|
from typing import (
|
||||||
|
|||||||
Reference in New Issue
Block a user