Share target model embed and head weights for nextn (#4033)

This commit is contained in:
Ke Bao
2025-03-04 05:30:04 +08:00
committed by GitHub
parent 146ac8df07
commit 9fafa62db7
7 changed files with 47 additions and 45 deletions

View File

@@ -280,7 +280,8 @@ class ForwardBatch:
).to(device, non_blocking=True)
if (
model_runner.server_args.attention_backend != "torch_native"
and model_runner.server_args.speculative_algorithm != "NEXTN"
# TODO: Fix triton kernel illegal memory access for EAGLE
and model_runner.server_args.speculative_algorithm != "EAGLE"
):
ret.extend_num_tokens = batch.extend_num_tokens
positions, ret.extend_start_loc = compute_position_triton(