diff --git a/python/sglang/srt/managers/io_struct.py b/python/sglang/srt/managers/io_struct.py
index 80816c28e..9625ff44e 100644
--- a/python/sglang/srt/managers/io_struct.py
+++ b/python/sglang/srt/managers/io_struct.py
@@ -56,6 +56,9 @@ class GenerateReqInput:
     # LoRA related
     lora_path: Optional[Union[List[Optional[str]], Optional[str]]] = None
 
+    # Whether it is a single request or a batch request
+    is_single: bool = True
+
     def post_init(self):
         if (self.text is None and self.input_ids is None) or (
             self.text is not None and self.input_ids is not None
diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py
index 2621ccd4f..2bc7ff04b 100644
--- a/python/sglang/srt/managers/tokenizer_manager.py
+++ b/python/sglang/srt/managers/tokenizer_manager.py
@@ -150,9 +150,13 @@ class TokenizerManager:
         while self.model_update_lock.locked():
             await asyncio.sleep(0.001)
 
+        if isinstance(obj, EmbeddingReqInput) and self.is_generation:
+            raise ValueError(
+                "This model does not appear to be an embedding model by default. Please add `--is-embedding` when launching the server or try another model."
+            )
+
         obj.post_init()
         is_single = obj.is_single
-
         if is_single:
             async for response in self._handle_single_request(obj, request):
                 yield response
diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py
index 644cb2b8a..9afae99f9 100644
--- a/python/sglang/srt/server.py
+++ b/python/sglang/srt/server.py
@@ -542,8 +542,6 @@ def _wait_and_warmup(server_args, pipe_finish_writer, pid):
         kill_child_process(pid, including_parent=False)
         return
 
-    print(f"{res.json()=}")
-
     logger.info("The server is fired up and ready to roll!")
     if pipe_finish_writer is not None:
         pipe_finish_writer.send("ready")
diff --git a/test/srt/models/test_generation_models.py b/test/srt/models/test_generation_models.py
index ad887e819..4d05eab8d 100755
--- a/test/srt/models/test_generation_models.py
+++ b/test/srt/models/test_generation_models.py
@@ -40,20 +40,23 @@ class ModelCase:
     prefill_tolerance: float = 5e-2
     decode_tolerance: float = 5e-2
     rouge_l_tolerance: float = 1
+    skip_long_prompt: bool = False
 
 
 # Popular models that run on the CI
 CI_MODELS = [
     ModelCase("meta-llama/Llama-3.1-8B-Instruct"),
-    ModelCase("google/gemma-2-2b"),
+    ModelCase(
+        "google/gemma-2-2b", skip_long_prompt=True
+    ),  # There is a bug with new transformers library. This can only run with transformers==4.44
 ]
 
 # All other models that do not run on the CI
 ALL_OTHER_MODELS = [
     ModelCase("Qwen/Qwen2-1.5B"),
     ModelCase("Qwen/Qwen2.5-14B-Instruct"),
-    ModelCase("HuggingFaceTB/SmolLM-135M-Instruct"),
-    ModelCase("allenai/OLMo-1B-0724-hf", decode_tolerance=8e-2),
+    ModelCase("HuggingFaceTB/SmolLM-135M-Instruct", skip_long_prompt=True),
+    ModelCase("allenai/OLMo-1B-0724-hf", decode_tolerance=8e-2, skip_long_prompt=True),
 ]
 
 TORCH_DTYPES = [torch.float16]
@@ -136,8 +139,15 @@ class TestGenerationModels(unittest.TestCase):
     def test_ci_models(self):
         for model_case in CI_MODELS:
             for torch_dtype in TORCH_DTYPES:
+
+                # Skip long prompts for models that do not have a long context
+                prompts = DEFAULT_PROMPTS
+                if model_case.skip_long_prompt:
+                    prompts = [p for p in DEFAULT_PROMPTS if len(p) < 1000]
+
+                # Assert the logits and output strs are close
                 self.assert_close_logits_and_output_strs(
-                    DEFAULT_PROMPTS, model_case, torch_dtype
+                    prompts, model_case, torch_dtype
                 )
 
     def test_others(self):
@@ -152,13 +162,9 @@ class TestGenerationModels(unittest.TestCase):
             ):
                 continue
 
-            # Skip long prompts for models that does not have a long context
+            # Skip long prompts for models that do not have a long context
             prompts = DEFAULT_PROMPTS
-            if model_case.model_path in [
-                "HuggingFaceTB/SmolLM-135M-Instruct",
-                "allenai/OLMo-1B-0724-hf",
-                "google/gemma-2-2b",  # There is a bug with new transformers library. This can only run with transformers==4.44
-            ]:
+            if model_case.skip_long_prompt:
                 prompts = [p for p in DEFAULT_PROMPTS if len(p) < 1000]
 
             # Assert the logits and output strs are close