Improve profiler and integrate profiler in bench_one_batch_server (#6787)
This commit is contained in:
@@ -39,10 +39,7 @@ from sglang.srt.model_executor.forward_batch_info import (
|
||||
PPProxyTensors,
|
||||
)
|
||||
from sglang.srt.patch_torch import monkey_patch_torch_compile
|
||||
from sglang.srt.two_batch_overlap import (
|
||||
TboCudaGraphRunnerPlugin,
|
||||
TboForwardBatchPreparer,
|
||||
)
|
||||
from sglang.srt.two_batch_overlap import TboCudaGraphRunnerPlugin
|
||||
from sglang.srt.utils import (
|
||||
get_available_gpu_memory,
|
||||
get_device_memory_capacity,
|
||||
|
||||
@@ -77,11 +77,7 @@ from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner
|
||||
from sglang.srt.model_executor.expert_location_updater import ExpertLocationUpdater
|
||||
from sglang.srt.model_executor.forward_batch_info import ForwardBatch, PPProxyTensors
|
||||
from sglang.srt.model_loader import get_model
|
||||
from sglang.srt.model_loader.loader import (
|
||||
DefaultModelLoader,
|
||||
device_loading_context,
|
||||
get_model_loader,
|
||||
)
|
||||
from sglang.srt.model_loader.loader import DefaultModelLoader, get_model_loader
|
||||
from sglang.srt.model_loader.utils import set_default_torch_dtype
|
||||
from sglang.srt.model_loader.weight_utils import default_weight_loader
|
||||
from sglang.srt.patch_torch import monkey_patch_torch_reductions
|
||||
|
||||
Reference in New Issue
Block a user