Add an option to disable penalizer (#1651)

This commit is contained in:
Lianmin Zheng
2024-10-12 17:53:23 -07:00
committed by GitHub
parent 69aa937aa5
commit 9da5a60b18
5 changed files with 111 additions and 90 deletions

View File

@@ -119,6 +119,7 @@ class ModelRunner:
"triton_attention_reduce_in_fp32": server_args.triton_attention_reduce_in_fp32,
"disable_mla": server_args.disable_mla,
"torchao_config": server_args.torchao_config,
"disable_penalizer": server_args.disable_penalizer,
}
)