diff --git a/K100-vLLM-Patched-v2.0/detect_head_size.py b/K100-vLLM-Patched-v2.0/detect_head_size.py new file mode 100644 index 0000000..cc0ba67 --- /dev/null +++ b/K100-vLLM-Patched-v2.0/detect_head_size.py @@ -0,0 +1,27 @@ +import json, os, sys + +MODEL_DIR = os.environ.get("MODEL_DIR", "/model") +cfg_path = os.path.join(MODEL_DIR, "config.json") + +if not os.path.exists(cfg_path): + sys.exit(0) + +with open(cfg_path) as f: + cfg = json.load(f) + +head_size = cfg.get("head_dim") +if head_size is None: + hs = cfg.get("hidden_size") + nh = cfg.get("num_attention_heads") + if hs and nh: + head_size = hs // nh + +if head_size is None: + sys.exit(0) + +SUPPORTED = {32, 64, 96, 128, 160, 192, 224, 256} +if head_size not in SUPPORTED: + print(head_size) + sys.exit(2) + +sys.exit(0) \ No newline at end of file