[VLM RLHF] Take Image input for verl vlm rollout (#4915)

Signed-off-by: Xinyuan Tong <justinning0323@outlook.com> Co-authored-by: GeLee <leege233@gmail.com>
2025-04-01 20:03:17 -07:00
parent 12047f5e94
commit 9eb49e878b
4 changed files with 18 additions and 12 deletions
--- a/python/sglang/srt/entrypoints/engine.py
+++ b/python/sglang/srt/entrypoints/engine.py
@@ -151,10 +151,6 @@ class Engine:
        The arguments of this function is the same as `sglang/srt/managers/io_struct.py::GenerateReqInput`.
        Please refer to `GenerateReqInput` for the documentation.
        """
-        modalities_list = []
-        if image_data is not None:
-            modalities_list.append("image")
-
        obj = GenerateReqInput(
            text=prompt,
            input_ids=input_ids,
@@ -165,7 +161,6 @@ class Engine:
            top_logprobs_num=top_logprobs_num,
            token_ids_logprob=token_ids_logprob,
            lora_path=lora_path,
-            modalities=modalities_list,
            custom_logit_processor=custom_logit_processor,
            return_hidden_states=return_hidden_states,
            stream=stream,