[Feature] Initial support for multi-LoRA serving (#1307)
This commit is contained in:
52
test/srt/models/compare.py
Normal file
52
test/srt/models/compare.py
Normal file
@@ -0,0 +1,52 @@
|
||||
"""
|
||||
used for debug using tensor comparison
|
||||
dump {name: tensor} into "log_hf.jsonl" and "log_srt.jsonl"
|
||||
use the same name for two tensors that supposed to be close
|
||||
recommend name like: "layer 2 after mlp"
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
|
||||
import torch
|
||||
|
||||
if len(sys.argv) > 1:
|
||||
assert sys.argv[1] == "base"
|
||||
hf_log = "base_log_hf.jsonl"
|
||||
srt_log = "base_log_srt.jsonl"
|
||||
else:
|
||||
hf_log = "log_hf.jsonl"
|
||||
srt_log = "log_srt.jsonl"
|
||||
|
||||
|
||||
def load_data(filepath):
|
||||
tensors = {}
|
||||
with open(filepath, "r") as f:
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
data = json.loads(line)
|
||||
for k, v in data.items():
|
||||
tensors[k] = torch.tensor(v)
|
||||
return tensors
|
||||
|
||||
|
||||
hf_tensors = load_data(hf_log)
|
||||
srt_tensors = load_data(srt_log)
|
||||
|
||||
|
||||
def get_diff(t1, t2):
|
||||
t1 = t1.reshape(t2.shape)
|
||||
max_diff = torch.max(abs(t1.reshape(t2.shape) - t2))
|
||||
l2_dis = torch.dist(t1, t2, p=2)
|
||||
return l2_dis, max_diff
|
||||
|
||||
|
||||
for k, _ in srt_tensors.items():
|
||||
l2_dis, max_diff = get_diff(hf_tensors[k], srt_tensors[k])
|
||||
print(f"{k} {l2_dis=} {max_diff=}")
|
||||
if k == "layer 1 attn":
|
||||
print(hf_tensors[k])
|
||||
print(srt_tensors[k])
|
||||
if k == "layer 0 prefill k":
|
||||
print(srt_tensors[k].shape)
|
||||
print(hf_tensors[k].shape)
|
||||
Reference in New Issue
Block a user