This reverts commitd886b81971. it breaks pd function - vLLM version: v0.13.0 - vLLM main:bde38c11dfSigned-off-by: zhaomingyu <zhaomingyu13@h-partners.com>
This commit is contained in:
@@ -165,10 +165,6 @@ def graph_capture(device: torch.device):
|
||||
yield graph_capture_context
|
||||
|
||||
|
||||
def get_tp_context(drafter):
|
||||
return getattr(drafter, "tp_group_context", nullcontext())
|
||||
|
||||
|
||||
class ExecuteModelState(NamedTuple):
|
||||
"""Ephemeral cached state transferred between execute_model() and
|
||||
sample_tokens(), after execute_model() returns None."""
|
||||
@@ -2326,8 +2322,7 @@ class NPUModelRunner(GPUModelRunner):
|
||||
model_register(self.model, self.model_config)
|
||||
if self.drafter:
|
||||
logger.info("Loading drafter model...")
|
||||
with get_tp_context(self.drafter):
|
||||
self.drafter.load_model(self.model)
|
||||
self.drafter.load_model(self.model)
|
||||
if self.use_aux_hidden_state_outputs:
|
||||
self.model.set_aux_hidden_state_layers(
|
||||
self.model.get_eagle3_aux_hidden_state_layers())
|
||||
@@ -2703,15 +2698,11 @@ class NPUModelRunner(GPUModelRunner):
|
||||
kernel_block_sizes = []
|
||||
for kv_cache_group_id, kv_cache_group in enumerate(
|
||||
kv_cache_config.kv_cache_groups):
|
||||
kv_cache_spec = kv_cache_group.kv_cache_spec
|
||||
if isinstance(kv_cache_spec, UniformTypeKVCacheSpecs):
|
||||
# All layers in the UniformTypeKVCacheSpecs have the same type,
|
||||
# Pick an arbitrary one to dispatch.
|
||||
kv_cache_spec = next(
|
||||
iter(kv_cache_spec.kv_cache_specs.values()))
|
||||
if isinstance(kv_cache_spec, EncoderOnlyAttentionSpec):
|
||||
|
||||
if isinstance(kv_cache_group.kv_cache_spec,
|
||||
EncoderOnlyAttentionSpec):
|
||||
continue
|
||||
elif isinstance(kv_cache_spec, AttentionSpec):
|
||||
elif isinstance(kv_cache_group.kv_cache_spec, AttentionSpec):
|
||||
# This is an attention backend that supports virtual
|
||||
# block splitting. Get the supported block sizes from
|
||||
# the backend.
|
||||
|
||||
Reference in New Issue
Block a user