[Minor] Improve code style (#2419)
This commit is contained in:
@@ -29,7 +29,6 @@ from sglang.srt.managers.io_struct import (
|
|||||||
BatchStrOut,
|
BatchStrOut,
|
||||||
BatchTokenIDOut,
|
BatchTokenIDOut,
|
||||||
)
|
)
|
||||||
from sglang.srt.managers.schedule_batch import FINISH_MATCHED_STR, FINISH_MATCHED_TOKEN
|
|
||||||
from sglang.srt.server_args import PortArgs, ServerArgs
|
from sglang.srt.server_args import PortArgs, ServerArgs
|
||||||
from sglang.srt.utils import configure_logger, get_zmq_socket
|
from sglang.srt.utils import configure_logger, get_zmq_socket
|
||||||
from sglang.utils import find_printable_text, get_exception_traceback
|
from sglang.utils import find_printable_text, get_exception_traceback
|
||||||
|
|||||||
@@ -1198,6 +1198,7 @@ class Scheduler:
|
|||||||
decode_ids_list = []
|
decode_ids_list = []
|
||||||
read_offsets = []
|
read_offsets = []
|
||||||
output_ids = []
|
output_ids = []
|
||||||
|
|
||||||
skip_special_tokens = []
|
skip_special_tokens = []
|
||||||
spaces_between_special_tokens = []
|
spaces_between_special_tokens = []
|
||||||
no_stop_trim = []
|
no_stop_trim = []
|
||||||
|
|||||||
@@ -623,23 +623,23 @@ class TokenizerManager:
|
|||||||
i,
|
i,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if not isinstance(recv_obj, BatchEmbeddingOut):
|
||||||
|
meta_info.update(
|
||||||
|
{
|
||||||
|
"completion_tokens": recv_obj.completion_tokens[i],
|
||||||
|
"cached_tokens": recv_obj.cached_tokens[i],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
if isinstance(recv_obj, BatchStrOut):
|
if isinstance(recv_obj, BatchStrOut):
|
||||||
out_dict = {
|
out_dict = {
|
||||||
"text": recv_obj.output_strs[i],
|
"text": recv_obj.output_strs[i],
|
||||||
"meta_info": {
|
"meta_info": meta_info,
|
||||||
**meta_info,
|
|
||||||
"completion_tokens": recv_obj.completion_tokens[i],
|
|
||||||
"cached_tokens": recv_obj.cached_tokens[i],
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
elif isinstance(recv_obj, BatchTokenIDOut):
|
elif isinstance(recv_obj, BatchTokenIDOut):
|
||||||
out_dict = {
|
out_dict = {
|
||||||
"token_ids": recv_obj.output_ids[i],
|
"token_ids": recv_obj.output_ids[i],
|
||||||
"meta_info": {
|
"meta_info": meta_info,
|
||||||
**meta_info,
|
|
||||||
"completion_tokens": recv_obj.completion_tokens[i],
|
|
||||||
"cached_tokens": recv_obj.cached_tokens[i],
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
assert isinstance(recv_obj, BatchEmbeddingOut)
|
assert isinstance(recv_obj, BatchEmbeddingOut)
|
||||||
|
|||||||
@@ -114,7 +114,7 @@ class ModelRunner:
|
|||||||
server_args.chunked_prefill_size = -1
|
server_args.chunked_prefill_size = -1
|
||||||
self.mem_fraction_static *= 0.95
|
self.mem_fraction_static *= 0.95
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Automatically reduce --mem-fraction-static to {self.mem_fraction_static} "
|
f"Automatically reduce --mem-fraction-static to {self.mem_fraction_static:.3f} "
|
||||||
f"and turn off chunked prefill "
|
f"and turn off chunked prefill "
|
||||||
f"because this is a multimodal model."
|
f"because this is a multimodal model."
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user