Support compressed tensors fp8w8a8 (#4743)
This commit is contained in:
@@ -16,6 +16,7 @@
|
||||
import argparse
|
||||
import dataclasses
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import tempfile
|
||||
from typing import List, Optional
|
||||
@@ -341,6 +342,10 @@ class ServerArgs:
|
||||
self.disable_overlap_schedule = True
|
||||
logger.warning("Overlap scheduler is disabled for decode server")
|
||||
|
||||
os.environ["SGLANG_ENABLE_TORCH_COMPILE"] = (
|
||||
"1" if self.enable_torch_compile else "0"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def add_cli_args(parser: argparse.ArgumentParser):
|
||||
# Model and port args
|
||||
|
||||
Reference in New Issue
Block a user