Improve the control of streaming and improve the first token latency in streaming (#117)

This commit is contained in:
Lianmin Zheng
2024-01-29 17:05:42 -08:00
committed by GitHub
parent cd6872334e
commit 6f560c761b
12 changed files with 46 additions and 23 deletions

View File

@@ -18,9 +18,9 @@ from sglang.srt.hf_transformers_utils import (
)
from sglang.srt.managers.io_struct import (
BatchStrOut,
FlushCacheReq,
GenerateReqInput,
TokenizedGenerateReqInput,
FlushCacheReq,
)
from sglang.srt.mm_utils import expand2square, process_anyres_image
from sglang.srt.sampling_params import SamplingParams