[feature]Ascend quantization support (#7791)

Co-authored-by: ichernob <ichernobnn@gmail.com> Co-authored-by: liupeng <liupeng374@huawei.com>
2025-07-10 19:17:37 +03:00
parent 4a0d19198b
commit 766392c6bd
13 changed files with 889 additions and 34 deletions
--- a/python/sglang/srt/models/qwen2.py
+++ b/python/sglang/srt/models/qwen2.py
@@ -538,6 +538,8 @@ class Qwen2ForCausalLM(nn.Module):
                # Skip loading extra bias for GPTQ models.
                if name.endswith(".bias") and name not in params_dict:
                    continue
+                if name not in params_dict:
+                    continue
                param = params_dict[name]
                weight_loader = param.weight_loader
                weight_loader(param, loaded_weight, shard_id)