diff --git a/python/sglang/srt/layers/logits_processor.py b/python/sglang/srt/layers/logits_processor.py index f95c30786..668cd3390 100644 --- a/python/sglang/srt/layers/logits_processor.py +++ b/python/sglang/srt/layers/logits_processor.py @@ -98,7 +98,9 @@ class LogitsProcessor(nn.Module): all_logits = tensor_model_parallel_all_gather(all_logits) all_logits = all_logits[:, : self.config.vocab_size] - all_logprobs = torch.log(torch.softmax(all_logits.float(), dim=-1) + 1e-6) + all_logprobs = all_logits.float() + all_logits = None + all_logprobs[:] = torch.nn.functional.log_softmax(all_logprobs, dim=-1) prefill_top_logprobs, decode_top_logprobs = self._get_top_logprobs( all_logprobs, input_metadata diff --git a/python/sglang/srt/managers/router/model_rpc.py b/python/sglang/srt/managers/router/model_rpc.py index f283635c3..55bd9e80c 100644 --- a/python/sglang/srt/managers/router/model_rpc.py +++ b/python/sglang/srt/managers/router/model_rpc.py @@ -589,7 +589,7 @@ class ModelRpcServer: + len(req.output_ids) - req.prompt_tokens, "completion_tokens_wo_jump_forward": req.completion_tokens_wo_jump_forward, - "finish_reason": req.finish_reason, + "finish_reason": str(req.finish_reason), "hit_stop_str": req.hit_stop_str, } if req.return_logprob: