[1/2] Support deterministic inference with flashinfer attention backend (#10645)
Co-authored-by: hebiao064 <hebiaobuaa@gmail.com> Co-authored-by: Qiaolin-Yu <liin1211@outlook.com>
This commit is contained in:
@@ -14,6 +14,7 @@
|
||||
"""Fused operators for normalization layers."""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import Optional, Tuple, Union
|
||||
|
||||
import torch
|
||||
@@ -80,6 +81,8 @@ class RMSNorm(CustomOp):
|
||||
)
|
||||
if _use_aiter:
|
||||
self._forward_method = self.forward_aiter
|
||||
if os.environ["SGLANG_ENABLE_DETERMINISTIC_INFERENCE"] == "1":
|
||||
self._forward_method = self.forward_native
|
||||
|
||||
def forward_cuda(
|
||||
self,
|
||||
|
||||
Reference in New Issue
Block a user