diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index 0926cfbe9..21f9a2111 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -1,16 +1,15 @@
-<!-- Thank you for your contribution, we really appreciate it. The following instructions will help improve your pull request and make it easier to receive feedback. If there are any items you don't understand, don't worry. Just submit the pull request and ask the maintainers for help. -->
+<!-- Thank you for your contribution! We appreciate it. The following guidelines will help improve your pull request and facilitate feedback. If anything is unclear, don't hesitate to submit your pull request and ask the maintainers for assistance. -->
 
 ## Motivation
 
-<!-- Please explain the motivation behind this PR and the goal you aim to achieve with it. -->
+<!-- Explain the purpose of this PR and the goals it aims to achieve. -->
 
-## Modification
+## Modifications
 
-<!-- Briefly describe the changes made in this PR. -->
+<!-- Describe the changes made in this PR. -->
 
 ## Checklist
 
-- [ ] Before submitting a PR for review, make sure it has passed verification in your local development environment **at least**.
-- [ ] Ensure pre-commit `pre-commit run --all-files` or other linting tools are used to fix potential lint issues.
-- [ ] Confirm that modifications are covered by complete unit tests. If not, please add more unit tests for correctness.
-- [ ] Modify documentation as needed, such as docstrings or example tutorials.
+- [ ] Format your code according to the [Contributor Guide](https://github.com/sgl-project/sglang/blob/main/docs/en/contributor_guide.md).
+- [ ] Add unit tests as outlined in the [Contributor Guide](https://github.com/sgl-project/sglang/blob/main/docs/en/contributor_guide.md).
+- [ ] Update documentation as needed, including docstrings or example tutorials.
\ No newline at end of file
diff --git a/README.md b/README.md
index 9ac4fbb30..10b4f95ff 100644
--- a/README.md
+++ b/README.md
@@ -81,14 +81,17 @@ docker run --gpus all \
 
 ### Method 4: Using docker compose
 
+<details>
 > This method is recommended if you plan to serve it as a service.
 > A better approach is to use the [k8s-sglang-service.yaml](./docker/k8s-sglang-service.yaml).
 
 1. Copy the [compose.yml](./docker/compose.yaml) to your local machine
 2. Execute the command `docker compose up -d` in your terminal.
+</details>
 
 ### Method 5: Run on Kubernetes or Clouds with SkyPilot
 
+<details>
 To deploy on Kubernetes or 12+ clouds, you can use [SkyPilot](https://github.com/skypilot-org/skypilot).
 
 1. Install SkyPilot and set up Kubernetes cluster or cloud access: see [SkyPilot's documentation](https://skypilot.readthedocs.io/en/latest/getting-started/installation.html).
@@ -114,8 +117,6 @@ run: |
     --port 30000
 ```
 
-</details>
-
 ```bash
 # Deploy on any cloud or Kubernetes cluster. Use --cloud <cloud> to select a specific cloud provider.
 HF_TOKEN=<secret> sky launch -c sglang --env HF_TOKEN sglang.yaml
@@ -124,7 +125,7 @@ HF_TOKEN=<secret> sky launch -c sglang --env HF_TOKEN sglang.yaml
 sky status --endpoint 30000 sglang
 ```
 3. To further scale up your deployment with autoscaling and failure recovery, check out the [SkyServe + SGLang guide](https://github.com/skypilot-org/skypilot/tree/master/llm/sglang#serving-llama-2-with-sglang-for-more-traffic-using-skyserve).
-
+</details>
 
 
 ### Common Notes
diff --git a/python/sglang/srt/hf_transformers_utils.py b/python/sglang/srt/hf_transformers_utils.py
index fb198fd73..b3576b47b 100644
--- a/python/sglang/srt/hf_transformers_utils.py
+++ b/python/sglang/srt/hf_transformers_utils.py
@@ -147,13 +147,12 @@ def get_tokenizer(
         and kwargs.get("use_fast", True)
         and tokenizer_name != _FAST_LLAMA_TOKENIZER
     ):
-        pass
-        # warnings.warn(
-        #    "For some LLaMA V1 models, initializing the fast tokenizer may "
-        #    "take a long time. To reduce the initialization time, consider "
-        #    f"using '{_FAST_LLAMA_TOKENIZER}' instead of the original "
-        #    "tokenizer."
-        # )
+        warnings.warn(
+            "For some LLaMA V1 models, initializing the fast tokenizer may "
+            "take a long time. To reduce the initialization time, consider "
+            f"using '{_FAST_LLAMA_TOKENIZER}' instead of the original "
+            "tokenizer."
+        )
     try:
         tokenizer = AutoTokenizer.from_pretrained(
             tokenizer_name,
diff --git a/python/sglang/srt/managers/schedule_batch.py b/python/sglang/srt/managers/schedule_batch.py
index 42c291bb1..14374e580 100644
--- a/python/sglang/srt/managers/schedule_batch.py
+++ b/python/sglang/srt/managers/schedule_batch.py
@@ -270,7 +270,7 @@ class Req:
 
         if all_ids[prompt_tokens - 1] != self.origin_input_ids_unpadded[-1]:
             # TODO(lsyin): fix token fusion
-            warnings.warn(
+            logging.warning(
                 "Token fusion between input and output, try to avoid this by removing the space at the end of the input."
             )
             return False
@@ -791,7 +791,7 @@ class ScheduleBatch:
             )
 
         if not torch.all(success):
-            warnings.warn("Sampling failed, fallback to top_k=1 strategy")
+            logging.warning("Sampling failed, fallback to top_k=1 strategy")
             probs = probs.masked_fill(torch.isnan(probs), 0.0)
             argmax_ids = torch.argmax(probs, dim=-1)
             batch_next_token_ids = torch.where(
diff --git a/python/sglang/srt/managers/tp_worker.py b/python/sglang/srt/managers/tp_worker.py
index b6cfa68bd..b8a4576f7 100644
--- a/python/sglang/srt/managers/tp_worker.py
+++ b/python/sglang/srt/managers/tp_worker.py
@@ -774,7 +774,7 @@ class ModelTpServer:
             torch.cuda.empty_cache()
             logger.info("Cache flushed successfully!")
         else:
-            warnings.warn(
+            logging.warning(
                 f"Cache not flushed because there are pending requests. "
                 f"#queue-req: {len(self.waiting_queue)}, "
                 f"#running-req: {0 if self.running_batch is None else len(self.running_batch.reqs)}"
diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py
index b74a19e60..2406addc8 100644
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -237,7 +237,7 @@ class ModelRunner:
         self.max_total_num_tokens = self.profile_max_num_token(total_gpu_memory)
         if max_total_tokens is not None:
             if max_total_tokens > self.max_total_num_tokens:
-                warnings.warn(
+                logging.warning(
                     f"max_total_tokens={max_total_tokens} is larger than the profiled value "
                     f"{self.max_total_num_tokens}. "
                     f"Use the profiled value instead."
diff --git a/python/sglang/srt/openai_api/adapter.py b/python/sglang/srt/openai_api/adapter.py
index 12b40d6c4..582457ae0 100644
--- a/python/sglang/srt/openai_api/adapter.py
+++ b/python/sglang/srt/openai_api/adapter.py
@@ -17,10 +17,10 @@ limitations under the License.
 
 import asyncio
 import json
+import logging
 import os
 import time
 import uuid
-import warnings
 from http import HTTPStatus
 from typing import Dict, List, Optional
 
@@ -65,6 +65,8 @@ from sglang.srt.openai_api.protocol import (
     UsageInfo,
 )
 
+logger = logging.getLogger(__name__)
+
 chat_template_name = None
 
 
@@ -408,7 +410,7 @@ def v1_generate_request(all_requests: List[CompletionRequest]):
                 "Parallel sampling is not supported for completions from files"
             )
         if request.echo and request.logprobs:
-            warnings.warn(
+            logger.warning(
                 "Echo is not compatible with logprobs. "
                 "To compute logprobs of input prompt, please use SGLang /request API."
             )