Support penalty in overlap mode; return logprob with chunked prefill; improve benchmark scripts (#3988)
Co-authored-by: SangBin Cho <rkooo567@gmail.com> Co-authored-by: dhou-xai <dhou@x.ai> Co-authored-by: Hanming Lu <hanming_lu@berkeley.edu>
This commit is contained in:
@@ -49,7 +49,7 @@ class TestHiddenState(unittest.TestCase):
|
||||
with torch.inference_mode():
|
||||
hf_out = model(
|
||||
torch.tensor(
|
||||
[input_id + output["token_ids"][:-1]], device=model.device
|
||||
[input_id + output["output_ids"][:-1]], device=model.device
|
||||
),
|
||||
output_hidden_states=True,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user