Let reward model take text inputs instead of message lists (#1907)

Co-authored-by: Kyle Corbitt <kyle@corbt.com>
2024-11-03 13:27:12 -08:00
parent 793b79dbe9
commit 2ce32db6fb
12 changed files with 43 additions and 58 deletions
--- a/python/sglang/srt/models/llama_reward.py
+++ b/python/sglang/srt/models/llama_reward.py
@@ -52,7 +52,12 @@ class LlamaForSequenceClassification(nn.Module):
        positions: torch.Tensor,
        forward_batch: ForwardBatch,
        input_embeds: torch.Tensor = None,
+        get_embedding: bool = True,
    ) -> EmbeddingPoolerOutput:
+        assert (
+            get_embedding
+        ), "LlamaForSequenceClassification is only used for embedding"
+
        hidden_states = self.model(input_ids, positions, forward_batch, input_embeds)
        scores = self.score(hidden_states)