Support loading of larger models with on-the-fly quantization (#3061)
This commit is contained in:
@@ -20,6 +20,7 @@ class LoadFormat(str, enum.Enum):
|
||||
GGUF = "gguf"
|
||||
BITSANDBYTES = "bitsandbytes"
|
||||
MISTRAL = "mistral"
|
||||
LAYERED = "layered"
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
Reference in New Issue
Block a user