Let reward model take text inputs instead of message lists (#1907)

Co-authored-by: Kyle Corbitt <kyle@corbt.com>
2024-11-03 13:27:12 -08:00
parent 793b79dbe9
commit 2ce32db6fb
12 changed files with 43 additions and 58 deletions
--- a/python/sglang/test/runners.py
+++ b/python/sglang/test/runners.py
@@ -273,6 +273,7 @@ class SRTRunner:
            disable_cuda_graph=disable_cuda_graph,
            disable_radix_cache=disable_radix_cache,
        )
+        self.tokenizer = get_tokenizer(model_path)

    def forward(
        self,
@@ -366,7 +367,7 @@ class SRTRunner:
                return ModelOutput(embed_logits=logits)
            else:
                scores = [x["embedding"][0] for x in response]
-                return ModelOutput(scores=logits)
+                return ModelOutput(scores=scores)

    def __enter__(self):
        return self