diff --git a/python/sglang/srt/layers/quantization/utils.py b/python/sglang/srt/layers/quantization/utils.py index d2c7975ba..a7be39141 100644 --- a/python/sglang/srt/layers/quantization/utils.py +++ b/python/sglang/srt/layers/quantization/utils.py @@ -146,6 +146,10 @@ def requantize_with_max_scale( return max_w_scale, weight +def update_tensor_inplace(old: torch.Tensor, new: torch.Tensor) -> None: + old.copy_(new) + + # Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/model_executor/layers/quantization/utils/layer_utils.py # Newly generated tensors need to replace existing tensors that are # already registered as parameters by vLLM (and won't be freed)