Rename InputMetadata -> ForwardBatch (#1543)

2024-09-30 02:41:11 -07:00
parent 3f0fe08d37
commit 36d5acfca5
44 changed files with 435 additions and 433 deletions
--- a/python/sglang/srt/models/qwen2.py
+++ b/python/sglang/srt/models/qwen2.py
@@ -40,7 +40,7 @@ from sglang.srt.layers.logits_processor import LogitsProcessor
 from sglang.srt.layers.pooler import Pooler, PoolingType
 from sglang.srt.layers.quantization.base_config import QuantizationConfig
 from sglang.srt.layers.radix_attention import RadixAttention
-from sglang.srt.model_executor.forward_batch_info import InputMetadata
+from sglang.srt.model_executor.forward_batch_info import ForwardBatch

 Qwen2Config = None

@@ -149,12 +149,12 @@ class Qwen2Attention(nn.Module):
        self,
        positions: torch.Tensor,
        hidden_states: torch.Tensor,
-        input_metadata: InputMetadata,
+        forward_batch: ForwardBatch,
    ) -> torch.Tensor:
        qkv, _ = self.qkv_proj(hidden_states)
        q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
        q, k = self.rotary_emb(positions, q, k)
-        attn_output = self.attn(q, k, v, input_metadata)
+        attn_output = self.attn(q, k, v, forward_batch)
        output, _ = self.o_proj(attn_output)
        return output

@@ -196,7 +196,7 @@ class Qwen2DecoderLayer(nn.Module):
        self,
        positions: torch.Tensor,
        hidden_states: torch.Tensor,
-        input_metadata: InputMetadata,
+        forward_batch: ForwardBatch,
        residual: Optional[torch.Tensor],
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        # Self Attention
@@ -208,7 +208,7 @@ class Qwen2DecoderLayer(nn.Module):
        hidden_states = self.self_attn(
            positions=positions,
            hidden_states=hidden_states,
-            input_metadata=input_metadata,
+            forward_batch=forward_batch,
        )

        # Fully Connected
@@ -243,7 +243,7 @@ class Qwen2Model(nn.Module):
        self,
        input_ids: torch.Tensor,
        positions: torch.Tensor,
-        input_metadata: InputMetadata,
+        forward_batch: ForwardBatch,
        input_embeds: torch.Tensor = None,
    ) -> torch.Tensor:
        if input_embeds is None:
@@ -256,7 +256,7 @@ class Qwen2Model(nn.Module):
            hidden_states, residual = layer(
                positions,
                hidden_states,
-                input_metadata,
+                forward_batch,
                residual,
            )
        hidden_states, _ = self.norm(hidden_states, residual)
@@ -283,17 +283,17 @@ class Qwen2ForCausalLM(nn.Module):
        self,
        input_ids: torch.Tensor,
        positions: torch.Tensor,
-        input_metadata: InputMetadata,
+        forward_batch: ForwardBatch,
        input_embeds: torch.Tensor = None,
        get_embedding: bool = False,
    ) -> torch.Tensor:
-        hidden_states = self.model(input_ids, positions, input_metadata, input_embeds)
+        hidden_states = self.model(input_ids, positions, forward_batch, input_embeds)
        if not get_embedding:
            return self.logits_processor(
-                input_ids, hidden_states, self.lm_head.weight, input_metadata
+                input_ids, hidden_states, self.lm_head.weight, forward_batch
            )
        else:
-            return self.pooler(hidden_states, input_metadata)
+            return self.pooler(hidden_states, forward_batch)

    def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
        stacked_params_mapping = [