Misc fix for min_p_sampling, --cuda-graph-bs (#2761)

This commit is contained in:
Lianmin Zheng
2025-01-07 02:52:53 -08:00
committed by GitHub
parent 6d08ce2aa9
commit bdc1acf6cd
17 changed files with 135 additions and 63 deletions

View File

@@ -228,6 +228,7 @@ class BenchmarkWorker:
hidden_size,
topk,
dtype_str,
False,
)
else:
config = op_config[min(op_config.keys(), key=lambda x: abs(x - num_tokens))]