Add warnings and remove dependency for deterministic inference (#10724)
Co-authored-by: Yineng Zhang <me@zhyncs.com>
This commit is contained in:
@@ -981,29 +981,36 @@ class ServerArgs:
|
||||
|
||||
def _handle_deterministic_inference(self):
|
||||
if self.enable_deterministic_inference:
|
||||
import importlib
|
||||
|
||||
if not importlib.util.find_spec("batch_invariant_ops"):
|
||||
raise ValueError(
|
||||
"batch_invariant_ops is not installed. Please install it from https://github.com/thinking-machines-lab/batch_invariant_ops/."
|
||||
)
|
||||
|
||||
# Check some settings
|
||||
# Check sampling backend
|
||||
self.sampling_backend = "pytorch"
|
||||
logger.warning(
|
||||
"Sampling backend is set to pytorch for deterministic inference."
|
||||
)
|
||||
# Currently, only FA3 supports radix cache. Support for other backends is in progress
|
||||
if self.attention_backend != "fa3":
|
||||
self.disable_radix_cache = True
|
||||
logger.warning(
|
||||
"Currently radix cache is disabled for deterministic inference. It will be supported in the future."
|
||||
)
|
||||
|
||||
# Check attention backend
|
||||
if self.attention_backend not in DETERMINISTIC_ATTENTION_BACKEND_CHOICES:
|
||||
raise ValueError(
|
||||
f"Currently only {DETERMINISTIC_ATTENTION_BACKEND_CHOICES} attention backends are supported for deterministic inference."
|
||||
)
|
||||
|
||||
# Currently, only FA3 supports radix cache. Support for other backends is in progress
|
||||
if self.attention_backend != "fa3":
|
||||
self.disable_radix_cache = True
|
||||
logger.warning(
|
||||
f"Currently radix cache is not compatible with {self.attention_backend} attention backend for deterministic inference. It will be supported in the future."
|
||||
)
|
||||
|
||||
# Check TP size
|
||||
if self.tp_size > 1:
|
||||
raise ValueError(
|
||||
"Currently only TP size 1 is supported for deterministic inference."
|
||||
)
|
||||
|
||||
# Warnings on MoE models
|
||||
logger.warning(
|
||||
"Currently deterministic inference is only tested on dense models. Please be cautious when using it on MoE models."
|
||||
)
|
||||
|
||||
def _handle_other_validations(self):
|
||||
pass
|
||||
|
||||
|
||||
Reference in New Issue
Block a user