This reverts commitd886b81971. it breaks pd function - vLLM version: v0.13.0 - vLLM main:bde38c11dfSigned-off-by: zhaomingyu <zhaomingyu13@h-partners.com>
This commit is contained in:
@@ -130,27 +130,6 @@ class EagleProposer(VllmEagleProposer):
|
||||
|
||||
self.use_sparse = hasattr(vllm_config.model_config.hf_text_config,
|
||||
"index_topk")
|
||||
# NOTE:
|
||||
# `draft_tensor_parallel_size` does not take effect for Eagle:
|
||||
# the draft model uses the same TP size as the target model in practice.
|
||||
# so we applied this patch to set tp=1 of draft model separately.
|
||||
# Due to verification of `_verify_and_get_draft_tp` in vllm,
|
||||
# the value of `draft_tensor_parallel_size` here will either be 1 separately
|
||||
# or the same as target model.
|
||||
# TODO(zhaomingyu13): If we want to adapt to the case where draft model tp
|
||||
# is not 1 and differs from target model, this part should be rewritten.
|
||||
if (vllm_config.parallel_config.tensor_parallel_size
|
||||
!= self.speculative_config.draft_tensor_parallel_size):
|
||||
tp_group = init_model_parallel_group(
|
||||
[[get_world_group().rank]],
|
||||
get_world_group().rank,
|
||||
torch.distributed.get_backend(get_world_group().device_group),
|
||||
use_message_queue_broadcaster=True,
|
||||
group_name="tp",
|
||||
)
|
||||
self.tp_group_context = patch_tensor_parallel_group(tp_group)
|
||||
else:
|
||||
self.tp_group_context = nullcontext()
|
||||
|
||||
# TODO: Remove it when the bug of fx-graph is solved
|
||||
self.maybe_eager_context: ContextManager[Any] = nullcontext()
|
||||
|
||||
Reference in New Issue
Block a user