Add integration with gemlite weight only quant (#2528)

This commit is contained in:
Jerry Zhang
2024-12-20 08:25:25 -08:00
committed by GitHub
parent d95a5f5bf5
commit feb2b768ba
4 changed files with 61 additions and 1 deletions

View File

@@ -322,6 +322,18 @@ def throughput_test(
)
time.sleep(0.5)
try:
import os
import pwd
from gemlite.core import GemLiteLinearTriton
GemLiteLinearTriton.cache_config(
f"/tmp/{pwd.getpwuid(os.getuid()).pw_gecos}_gemlite.json"
)
except ImportError:
pass
logging.info("\nBenchmark...")
result = throughput_test_once(
backend_name=bench_args.backend,