Update model_loader deps and qqq quantization deps (#2220) (#2318)

Co-authored-by: HandH1998 <1335248067@qq.com>
This commit is contained in:
Yineng Zhang
2024-12-02 23:22:13 +08:00
committed by GitHub
parent 33deca81b5
commit 85e1a6f3aa
58 changed files with 2363 additions and 366 deletions

View File

@@ -21,6 +21,7 @@ from transformers import LlamaConfig
from sglang.srt.layers.pooler import EmbeddingPoolerOutput, Pooler, PoolingType
from sglang.srt.layers.quantization.base_config import QuantizationConfig
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
from sglang.srt.model_loader.weight_utils import default_weight_loader
from sglang.srt.models.llama import LlamaForCausalLM, LlamaModel
@@ -29,7 +30,6 @@ class LlamaForSequenceClassification(nn.Module):
self,
config: LlamaConfig,
quant_config: Optional[QuantizationConfig] = None,
cache_config=None,
) -> None:
super().__init__()
self.config = config
@@ -84,9 +84,8 @@ class LlamaForSequenceClassificationWithNormal_Weights(LlamaForSequenceClassific
self,
config: LlamaConfig,
quant_config: Optional[QuantizationConfig] = None,
cache_config=None,
) -> None:
super().__init__(config, quant_config, cache_config)
super().__init__(config, quant_config)
self.weights = self.Weights(config.hidden_size, self.num_labels)
@torch.no_grad()