Improve the coverage of the openai api server test (#878)

2024-08-01 16:01:30 -07:00
parent 70c78cfb03
commit 60340a3643
3 changed files with 151 additions and 20 deletions
--- a/python/sglang/srt/layers/logits_processor.py
+++ b/python/sglang/srt/layers/logits_processor.py
@@ -209,7 +209,7 @@ class LogitsProcessor(nn.Module):
                all_logits = all_logits[:, : self.config.vocab_size].float()

                all_logprobs = all_logits
-                del all_logits
+                del all_logits, hidden_states
                all_logprobs[:] = torch.nn.functional.log_softmax(all_logprobs, dim=-1)

                # Get the logprob of top-k tokens
--- a/python/sglang/srt/server.py
+++ b/python/sglang/srt/server.py
@@ -72,8 +72,8 @@ from sglang.srt.utils import (
    allocate_init_ports,
    assert_pkg_version,
    enable_show_time_cost,
-    maybe_set_triton_cache_manager,
    kill_child_process,
+    maybe_set_triton_cache_manager,
    set_ulimit,
 )
 from sglang.utils import get_exception_traceback