Support FA3 as Attention backend by using --attention-backend fa3 (#4680)
Co-authored-by: qsong <qsong@linkedin.com> Co-authored-by: qingquansong <ustcsqq@gmail.com>
This commit is contained in:
@@ -501,6 +501,7 @@ def get_dataset(args, tokenizer):
|
||||
question_len=args.gsp_question_len,
|
||||
output_len=args.gsp_output_len,
|
||||
tokenizer=tokenizer,
|
||||
args=args,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unknown dataset: {args.dataset_name}")
|
||||
@@ -788,6 +789,7 @@ def sample_generated_shared_prefix_requests(
|
||||
question_len: int,
|
||||
output_len: int,
|
||||
tokenizer: PreTrainedTokenizerBase,
|
||||
args: argparse.Namespace,
|
||||
) -> List[Tuple[str, int, int]]:
|
||||
"""Generate benchmark requests with shared system prompts using random tokens and caching."""
|
||||
cache_path = get_gen_prefix_cache_path(args, tokenizer)
|
||||
|
||||
Reference in New Issue
Block a user