Support compressed tensors fp8w8a8 (#4743)

This commit is contained in:
Xiaoyu Zhang
2025-03-27 04:21:25 +08:00
committed by GitHub
parent 45fdf1f7f3
commit 04e3ff6975
30 changed files with 2386 additions and 113 deletions

View File

@@ -16,6 +16,7 @@
import argparse
import dataclasses
import logging
import os
import random
import tempfile
from typing import List, Optional
@@ -341,6 +342,10 @@ class ServerArgs:
self.disable_overlap_schedule = True
logger.warning("Overlap scheduler is disabled for decode server")
os.environ["SGLANG_ENABLE_TORCH_COMPILE"] = (
"1" if self.enable_torch_compile else "0"
)
@staticmethod
def add_cli_args(parser: argparse.ArgumentParser):
# Model and port args