Add return hidden state in the native API (#3897)
Co-authored-by: Beichen-Ma <mabeichen12@gmail.com> Co-authored-by: Chayenne <zhaochen20@outlook.com>
This commit is contained in:
@@ -2,7 +2,9 @@
|
||||
Usage:
|
||||
python hidden_states.py
|
||||
|
||||
Note that we are actively working on moving return_hidden_states to the sampling_params.
|
||||
Note that each time you change the `return_hidden_states` parameter,
|
||||
the cuda graph will be recaptured, which might lead to a performance hit.
|
||||
So avoid getting hidden states and completions alternately.
|
||||
"""
|
||||
|
||||
import sglang as sgl
|
||||
@@ -18,10 +20,14 @@ def main():
|
||||
# Create an LLM.
|
||||
llm = sgl.Engine(
|
||||
model_path="Alibaba-NLP/gte-Qwen2-1.5B-instruct",
|
||||
return_hidden_states=True,
|
||||
)
|
||||
|
||||
sampling_params = {"temperature": 0.8, "top_p": 0.95, "max_new_tokens": 10}
|
||||
sampling_params = {
|
||||
"temperature": 0.8,
|
||||
"top_p": 0.95,
|
||||
"max_new_tokens": 10,
|
||||
"return_hidden_states": True,
|
||||
}
|
||||
|
||||
outputs = llm.generate(prompts, sampling_params=sampling_params)
|
||||
for prompt, output in zip(prompts, outputs):
|
||||
|
||||
Reference in New Issue
Block a user