62 lines
48 KiB
Plaintext
62 lines
48 KiB
Plaintext
|
|
/mnt/petrelfs/wangweiyun/miniconda3/envs/internvl_eval2/lib/python3.10/site-packages/bitsandbytes/cextension.py:34: UserWarning: The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.
|
||
|
|
warn("The installed version of bitsandbytes was compiled without GPU support. "
|
||
|
|
/mnt/petrelfs/wangweiyun/miniconda3/envs/internvl_eval2/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so: undefined symbol: cadam32bit_grad_fp32
|
||
|
|
model path is /mnt/petrelfs/wangweiyun/workspace_cz/InternVL/internvl_chat_dev/share_internvl/InternVL2_5-2B
|
||
|
|
11/22 18:39:59 - OpenCompass - WARNING - No previous results to reuse!
|
||
|
|
11/22 18:39:59 - OpenCompass - INFO - Reusing experiements from 20241122_183959
|
||
|
|
11/22 18:39:59 - OpenCompass - INFO - Current exp folder: /mnt/petrelfs/wangweiyun/workspace_cz/InternVL/internvl_chat_dev/share_internvl/InternVL2_5-2B/20241122_183959
|
||
|
|
11/22 18:40:02 - OpenCompass - INFO - Partitioned into 256 tasks.
|
||
|
|
[ ] 0/256, elapsed: 0s, ETA:
[ ] 1/256, 0.0 task/s, elapsed: 271s, ETA: 69201s
[ ] 2/256, 0.0 task/s, elapsed: 305s, ETA: 38691s
[ ] 3/256, 0.0 task/s, elapsed: 306s, ETA: 25818s
[ ] 4/256, 0.0 task/s, elapsed: 308s, ETA: 19376s
[ ] 5/256, 0.0 task/s, elapsed: 311s, ETA: 15632s
[ ] 6/256, 0.0 task/s, elapsed: 313s, ETA: 13030s
[ ] 7/256, 0.0 task/s, elapsed: 316s, ETA: 11255s
[ ] 8/256, 0.0 task/s, elapsed: 316s, ETA: 9810s
[> ] 9/256, 0.0 task/s, elapsed: 318s, ETA: 8716s
[> ] 10/256, 0.0 task/s, elapsed: 318s, ETA: 7813s
[> ] 11/256, 0.0 task/s, elapsed: 318s, ETA: 7087s
[> ] 12/256, 0.0 task/s, elapsed: 321s, ETA: 6536s
[> ] 13/256, 0.0 task/s, elapsed: 321s, ETA: 6009s
[> ] 14/256, 0.0 task/s, elapsed: 327s, ETA: 5649s
[> ] 15/256, 0.0 task/s, elapsed: 330s, ETA: 5296s
[> ] 16/256, 0.0 task/s, elapsed: 331s, ETA: 4972s
[> ] 17/256, 0.1 task/s, elapsed: 332s, ETA: 4668s
[>> ] 18/256, 0.1 task/s, elapsed: 335s, ETA: 4424s
[>> ] 19/256, 0.1 task/s, elapsed: 338s, ETA: 4214s
[>> ] 20/256, 0.1 task/s, elapsed: 338s, ETA: 3991s
[>> ] 21/256, 0.1 task/s, elapsed: 340s, ETA: 3806s
[>> ] 22/256, 0.1 task/s, elapsed: 343s, ETA: 3651s
[>> ] 23/256, 0.1 task/s, elapsed: 346s, ETA: 3509s
[>> ] 24/256, 0.1 task/s, elapsed: 347s, ETA: 3357s
[>> ] 25/256, 0.1 task/s, elapsed: 348s, ETA: 3211s
[>>> ] 26/256, 0.1 task/s, elapsed: 348s, ETA: 3080s
[>>> ] 27/256, 0.1 task/s, elapsed: 349s, ETA: 2963s
[>>> ] 28/256, 0.1 task/s, elapsed: 351s, ETA: 2855s
[>>> ] 29/256, 0.1 task/s, elapsed: 351s, ETA: 2746s
[>>> ] 30/256, 0.1 task/s, elapsed: 353s, ETA: 2656s
[>>> ] 31/256, 0.1 task/s, elapsed: 353s, ETA: 2560s
[>>> ] 32/256, 0.1 task/s, elapsed: 353s, ETA: 2472s
[>>> ] 33/256, 0.1 task/s, elapsed: 354s, ETA: 2391s
[>>> ] 34/256, 0.1 task/s, elapsed: 355s, ETA: 2316s
[>>>> ] 35/256, 0.1 task/s, elapsed: 355s, ETA: 2243s
[>>>> ] 36/256, 0.1 task/s, elapsed: 356s, ETA: 2173s
[>>>> ] 37/256, 0.1 task/s, elapsed: 357s, ETA: 2113s
[>>>> ] 38/256, 0.1 task/s, elapsed: 358s, ETA: 2053s
[>>>> ] 39/256, 0.1 task/s, elapsed: 359s, ETA: 1998s
[>>>> ] 40/256, 0.1 task/s, elapsed: 359s, ETA: 1939s
[>>>> ] 41/256, 0.1 task/s, elapsed: 362s, ETA: 1900s
[>>>> ] 42/256, 0.1 task/s, elapsed: 364s, ETA: 1857s
[>>>>> ] 43/256, 0.1 task/s, elapsed: 365s, ETA: 1806s
[>>>>> ] 44/256, 0.1 task/s, elapsed: 370s, ETA: 1783s
[>>>>> ] 45/256, 0.1 task/s, elapsed: 370s, ETA: 1736s
[>>>>> ] 46/256, 0.1 task/s, elapsed: 371s, ETA: 1692s
[>>>>> ] 47/256, 0.1 task/s, elapsed: 372s, ETA: 1652s
[>>>>> ] 48/256, 0.1 task/s, elapsed: 372s, ETA: 1613s
[>>>>> ] 49/256, 0.1 task/s, elapsed: 372s, ETA: 1573s
[>>>>> ] 50/256, 0.1 task/s, elapsed: 373s, ETA: 1536s
[>>>>>
|
||
|
|
11/22 18:51:40 - OpenCompass - INFO - Partitioned into 287 tasks.
|
||
|
|
[ ] 0/287, elapsed: 0s, ETA:
[ ] 1/287, 0.0 task/s, elapsed: 45s, ETA: 13007s
[ ] 2/287, 0.0 task/s, elapsed: 45s, ETA: 6481s
[ ] 3/287, 0.1 task/s, elapsed: 46s, ETA: 4315s
[ ] 4/287, 0.1 task/s, elapsed: 46s, ETA: 3228s
[ ] 5/287, 0.1 task/s, elapsed: 46s, ETA: 2574s
[ ] 6/287, 0.1 task/s, elapsed: 46s, ETA: 2150s
[ ] 7/287, 0.1 task/s, elapsed: 47s, ETA: 1887s
[ ] 8/287, 0.2 task/s, elapsed: 47s, ETA: 1647s
[> ] 9/287, 0.2 task/s, elapsed: 48s, ETA: 1494s
[> ] 10/287, 0.2 task/s, elapsed: 49s, ETA: 1347s
[> ] 11/287, 0.2 task/s, elapsed: 49s, ETA: 1221s
[> ] 12/287, 0.2 task/s, elapsed: 49s, ETA: 1126s
[> ] 13/287, 0.3 task/s, elapsed: 49s, ETA: 1040s
[> ] 14/287, 0.3 task/s, elapsed: 49s, ETA: 964s
[> ] 15/287, 0.3 task/s, elapsed: 49s, ETA: 896s
[> ] 16/287, 0.3 task/s, elapsed: 49s, ETA: 837s
[> ] 17/287, 0.3 task/s, elapsed: 49s, ETA: 786s
[> ] 18/287, 0.4 task/s, elapsed: 50s, ETA: 743s
[>> ] 19/287, 0.4 task/s, elapsed: 50s, ETA: 702s
[>> ] 20/287, 0.4 task/s, elapsed: 50s, ETA: 664s
[>> ] 21/287, 0.4 task/s, elapsed: 50s, ETA: 631s
[>> ] 22/287, 0.4 task/s, elapsed: 50s, ETA: 601s
[>> ] 23/287, 0.5 task/s, elapsed: 50s, ETA: 573s
[>> ] 24/287, 0.4 task/s, elapsed: 62s, ETA: 680s
[>> ] 25/287, 0.4 task/s, elapsed: 62s, ETA: 651s
[>> ] 26/287, 0.4 task/s, elapsed: 62s, ETA: 624s
[>> ] 27/287, 0.4 task/s, elapsed: 64s, ETA: 619s
[>>> ] 28/287, 0.4 task/s, elapsed: 65s, ETA: 600s
[>>> ] 29/287, 0.4 task/s, elapsed: 66s, ETA: 586s
[>>> ] 30/287, 0.4 task/s, elapsed: 70s, ETA: 601s
[>>> ] 31/287, 0.4 task/s, elapsed: 70s, ETA: 581s
[>>> ] 32/287, 0.4 task/s, elapsed: 71s, ETA: 567s
[>>> ] 33/287, 0.5 task/s, elapsed: 71s, ETA: 548s
[>>> ] 34/287, 0.5 task/s, elapsed: 71s, ETA: 531s
[>>> ] 35/287, 0.5 task/s, elapsed: 71s, ETA: 515s
[>>> ] 36/287, 0.5 task/s, elapsed: 71s, ETA: 498s
[>>> ] 37/287, 0.5 task/s, elapsed: 73s, ETA: 492s
[>>>> ] 38/287, 0.5 task/s, elapsed: 73s, ETA: 478s
[>>>> ] 39/287, 0.5 task/s, elapsed: 73s, ETA: 466s
[>>>> ] 40/287, 0.5 task/s, elapsed: 73s, ETA: 453s
[>>>> ] 41/287, 0.6 task/s, elapsed: 73s, ETA: 440s
[>>>> ] 42/287, 0.6 task/s, elapsed: 73s, ETA: 428s
[>>>> ] 43/287, 0.6 task/s, elapsed: 73s, ETA: 416s
[>>>> ] 44/287, 0.6 task/s, elapsed: 73s, ETA: 405s
[>>>> ] 45/287, 0.6 task/s, elapsed: 73s, ETA: 395s
[>>>> ] 46/287, 0.6 task/s, elapsed: 73s, ETA: 384s
[>>>>> ] 47/287, 0.6 task/s, elapsed: 73s, ETA: 375s
[>>>>> ] 48/287, 0.7 task/s, elapsed: 73s, ETA: 365s
[>>>>> ] 49/287, 0.7 task/s, elapsed: 73s, ETA: 357s
[>>>>> ] 50/287, 0.7 task/s, elapsed: 73s, ETA: 348s
[>>>>>
|
||
|
|
dataset version metric mode internvl-chat-20b
|
||
|
|
---------------------------- --------- ---------------------------- ------ -------------------
|
||
|
|
mmlu - naive_average gen 52.60
|
||
|
|
mmlu_pro - - - -
|
||
|
|
cmmlu - naive_average gen 57.00
|
||
|
|
ceval - naive_average gen 56.16
|
||
|
|
agieval - - - -
|
||
|
|
GaokaoBench - weighted_average gen 52.58
|
||
|
|
GPQA_extended - - - -
|
||
|
|
GPQA_main - - - -
|
||
|
|
GPQA_diamond - - - -
|
||
|
|
ARC-c - - - -
|
||
|
|
truthfulqa - - - -
|
||
|
|
triviaqa 2121ce score gen 31.23
|
||
|
|
triviaqa_wiki_1shot - - - -
|
||
|
|
nq 3dcea1 score gen 11.75
|
||
|
|
C3 8c358f accuracy gen 78.03
|
||
|
|
race-high 9a54b6 accuracy gen 77.39
|
||
|
|
flores_100 - - - -
|
||
|
|
winogrande b36770 accuracy gen 59.12
|
||
|
|
hellaswag e42710 accuracy gen 68.19
|
||
|
|
bbh - naive_average gen 40.85
|
||
|
|
gsm8k 1d7fe4 accuracy gen 55.12
|
||
|
|
math 393424 accuracy gen 33.54
|
||
|
|
TheoremQA 6f0af8 score gen 12.00
|
||
|
|
MathBench - - - -
|
||
|
|
openai_humaneval 8e312c humaneval_pass@1 gen 52.44
|
||
|
|
humaneval_plus - - - -
|
||
|
|
humanevalx - - - -
|
||
|
|
sanitized_mbpp a447ff score gen 50.58
|
||
|
|
mbpp_plus - - - -
|
||
|
|
mbpp_cn 6fb572 score gen 34.20
|
||
|
|
leval - - - -
|
||
|
|
leval_closed - - - -
|
||
|
|
leval_open - - - -
|
||
|
|
longbench - - - -
|
||
|
|
longbench_single-document-qa - - - -
|
||
|
|
longbench_multi-document-qa - - - -
|
||
|
|
longbench_summarization - - - -
|
||
|
|
longbench_few-shot-learning - - - -
|
||
|
|
longbench_synthetic-tasks - - - -
|
||
|
|
longbench_code-completion - - - -
|
||
|
|
teval - - - -
|
||
|
|
teval_zh - - - -
|
||
|
|
IFEval 3321a3 Prompt-level-strict-accuracy gen 31.05
|
||
|
|
IFEval 3321a3 Inst-level-strict-accuracy gen 43.17
|
||
|
|
IFEval 3321a3 Prompt-level-loose-accuracy gen 32.72
|
||
|
|
IFEval 3321a3 Inst-level-loose-accuracy gen 45.44
|
||
|
|
11/22 18:56:09 - OpenCompass - INFO - write summary to /mnt/petrelfs/wangweiyun/workspace_cz/InternVL/internvl_chat_dev/share_internvl/InternVL2_5-2B/20241122_183959/summary/summary_20241122_183959.txt
|
||
|
|
11/22 18:56:09 - OpenCompass - INFO - write csv to /mnt/petrelfs/wangweiyun/workspace_cz/InternVL/internvl_chat_dev/share_internvl/InternVL2_5-2B/20241122_183959/summary/summary_20241122_183959.csv
|