Docs fix about EAGLE and streaming output (#3166)

Co-authored-by: Chayenne <zhaochenyang@ucla.edu>
Co-authored-by: Chayenne <zhaochen20@outlook.com>
Co-authored-by: Jhin <jhinpan@umich.edu>
This commit is contained in:
Jhin
2025-01-27 20:10:45 -06:00
committed by GitHub
parent 08104b56de
commit 7b9b4f4426
6 changed files with 91 additions and 29 deletions

View File

@@ -373,3 +373,45 @@ class TypeBasedDispatcher:
if isinstance(obj, ty):
return fn(obj)
raise ValueError(f"Invalid object: {obj}")
def trim_overlap(existing_text, new_chunk):
"""
Finds the largest suffix of 'existing_text' that is a prefix of 'new_chunk'
and removes that overlap from the start of 'new_chunk'.
"""
max_overlap = 0
max_possible = min(len(existing_text), len(new_chunk))
for i in range(max_possible, 0, -1):
if existing_text.endswith(new_chunk[:i]):
max_overlap = i
break
return new_chunk[max_overlap:]
def stream_and_merge(llm, prompt, sampling_params):
"""
1) Streams the text,
2) Removes chunk overlaps,
3) Returns the merged text.
"""
final_text = ""
for chunk in llm.generate(prompt, sampling_params, stream=True):
chunk_text = chunk["text"]
cleaned_chunk = trim_overlap(final_text, chunk_text)
final_text += cleaned_chunk
return final_text
async def async_stream_and_merge(llm, prompt, sampling_params):
"""
Streams tokens asynchronously, removes chunk overlaps,
and yields the cleaned chunk in real time for printing.
"""
final_text = ""
generator = await llm.async_generate(prompt, sampling_params, stream=True)
async for chunk in generator:
chunk_text = chunk["text"]
cleaned_chunk = trim_overlap(final_text, chunk_text)
final_text += cleaned_chunk
yield cleaned_chunk # yield the non-overlapping portion