Update vocab embedding deps and add TP switch (#1856)

2024-11-01 11:13:07 +08:00
parent 61cf00e112
commit 16eb33ffe2
31 changed files with 602 additions and 101 deletions
--- a/python/sglang/srt/layers/quantization/base_config.py
+++ b/python/sglang/srt/layers/quantization/base_config.py
@@ -1,7 +1,8 @@
 # Adapted from https://raw.githubusercontent.com/vllm-project/vllm/v0.5.5/vllm/model_executor/layers/quantization/base_config.py

+import inspect
 from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Type

 import torch
 from torch import nn
@@ -120,3 +121,17 @@ class QuantizationConfig(ABC):
        For now, this is only used by AWQ.
        """
        raise NotImplementedError
+
+def method_has_implemented_embedding(
+        method_class: Type[QuantizeMethodBase]) -> bool:
+    """
+    Not all quant methods have embedding implemented, so we need to check that
+    it exists for our given method. We check this by making sure the function
+    has been changed from the base implementation.
+    """
+    base_embedding = inspect.getattr_static(QuantizeMethodBase, "embedding",
+                                            None)
+    class_embedding = inspect.getattr_static(method_class, "embedding", None)
+
+    return (class_embedding is not None
+            and class_embedding is not base_embedding)