[Minor] Improve code style (#2419)
This commit is contained in:
@@ -29,7 +29,6 @@ from sglang.srt.managers.io_struct import (
|
||||
BatchStrOut,
|
||||
BatchTokenIDOut,
|
||||
)
|
||||
from sglang.srt.managers.schedule_batch import FINISH_MATCHED_STR, FINISH_MATCHED_TOKEN
|
||||
from sglang.srt.server_args import PortArgs, ServerArgs
|
||||
from sglang.srt.utils import configure_logger, get_zmq_socket
|
||||
from sglang.utils import find_printable_text, get_exception_traceback
|
||||
|
||||
@@ -1198,6 +1198,7 @@ class Scheduler:
|
||||
decode_ids_list = []
|
||||
read_offsets = []
|
||||
output_ids = []
|
||||
|
||||
skip_special_tokens = []
|
||||
spaces_between_special_tokens = []
|
||||
no_stop_trim = []
|
||||
|
||||
@@ -623,23 +623,23 @@ class TokenizerManager:
|
||||
i,
|
||||
)
|
||||
|
||||
if not isinstance(recv_obj, BatchEmbeddingOut):
|
||||
meta_info.update(
|
||||
{
|
||||
"completion_tokens": recv_obj.completion_tokens[i],
|
||||
"cached_tokens": recv_obj.cached_tokens[i],
|
||||
}
|
||||
)
|
||||
|
||||
if isinstance(recv_obj, BatchStrOut):
|
||||
out_dict = {
|
||||
"text": recv_obj.output_strs[i],
|
||||
"meta_info": {
|
||||
**meta_info,
|
||||
"completion_tokens": recv_obj.completion_tokens[i],
|
||||
"cached_tokens": recv_obj.cached_tokens[i],
|
||||
},
|
||||
"meta_info": meta_info,
|
||||
}
|
||||
elif isinstance(recv_obj, BatchTokenIDOut):
|
||||
out_dict = {
|
||||
"token_ids": recv_obj.output_ids[i],
|
||||
"meta_info": {
|
||||
**meta_info,
|
||||
"completion_tokens": recv_obj.completion_tokens[i],
|
||||
"cached_tokens": recv_obj.cached_tokens[i],
|
||||
},
|
||||
"meta_info": meta_info,
|
||||
}
|
||||
else:
|
||||
assert isinstance(recv_obj, BatchEmbeddingOut)
|
||||
|
||||
@@ -114,7 +114,7 @@ class ModelRunner:
|
||||
server_args.chunked_prefill_size = -1
|
||||
self.mem_fraction_static *= 0.95
|
||||
logger.info(
|
||||
f"Automatically reduce --mem-fraction-static to {self.mem_fraction_static} "
|
||||
f"Automatically reduce --mem-fraction-static to {self.mem_fraction_static:.3f} "
|
||||
f"and turn off chunked prefill "
|
||||
f"because this is a multimodal model."
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user