Fix the perf regression due to additional_stop_token_ids (#1773)

2024-10-23 16:45:21 -07:00
parent 05b3bf5e8e
commit 8f8f96a621
5 changed files with 20 additions and 16 deletions
--- a/python/sglang/srt/layers/sampler.py
+++ b/python/sglang/srt/layers/sampler.py
@@ -42,11 +42,11 @@ class Sampler(nn.Module):
        logits = logits.contiguous()

        if self.use_nan_detectioin and torch.any(torch.isnan(logits)):
-            exit(1) if crash_on_warning else None
            logger.warning("Detected errors during sampling! NaN in the logits.")
            logits = torch.where(
                torch.isnan(logits), torch.full_like(logits, -1e5), logits
            )
+            exit(1) if crash_on_warning else None

        if sampling_info.is_all_greedy:
            # Use torch.argmax if all requests use greedy sampling