| task |
dataset |
metrics |
|
|
| type |
name |
| bigcode/humanevalpack |
HumanEvalSynthesize Python |
|
| name |
type |
value |
verified |
| pass@1 |
pass@1 |
46.2 |
false |
|
|
|
| task |
dataset |
metrics |
|
|
| type |
name |
| bigcode/humanevalpack |
HumanEvalSynthesize JavaScript |
|
| name |
type |
value |
verified |
| pass@1 |
pass@1 |
39.2 |
false |
|
|
|
| task |
dataset |
metrics |
|
|
| type |
name |
| bigcode/humanevalpack |
HumanEvalSynthesize Java |
|
| name |
type |
value |
verified |
| pass@1 |
pass@1 |
38.2 |
false |
|
|
|
| task |
dataset |
metrics |
|
|
| type |
name |
| bigcode/humanevalpack |
HumanEvalSynthesize Go |
|
| name |
type |
value |
verified |
| pass@1 |
pass@1 |
30.4 |
false |
|
|
|
| task |
dataset |
metrics |
|
|
| type |
name |
| bigcode/humanevalpack |
HumanEvalSynthesize C++ |
|
| name |
type |
value |
verified |
| pass@1 |
pass@1 |
35.6 |
false |
|
|
|
| task |
dataset |
metrics |
|
|
| type |
name |
| bigcode/humanevalpack |
HumanEvalSynthesize Rust |
|
| name |
type |
value |
verified |
| pass@1 |
pass@1 |
23.4 |
false |
|
|
|
| task |
dataset |
metrics |
|
|
| type |
name |
| bigcode/humanevalpack |
HumanEvalSynthesize Average |
|
| name |
type |
value |
verified |
| pass@1 |
pass@1 |
35.5 |
false |
|
|
|
| task |
dataset |
metrics |
|
|
| type |
name |
| bigcode/humanevalpack |
HumanEvalFix Python |
|
| name |
type |
value |
verified |
| pass@1 |
pass@1 |
30.4 |
false |
|
|
|
| task |
dataset |
metrics |
|
|
| type |
name |
| bigcode/humanevalpack |
HumanEvalFix JavaScript |
|
| name |
type |
value |
verified |
| pass@1 |
pass@1 |
28.4 |
false |
|
|
|
| task |
dataset |
metrics |
|
|
| type |
name |
| bigcode/humanevalpack |
HumanEvalFix Java |
|
| name |
type |
value |
verified |
| pass@1 |
pass@1 |
30.6 |
false |
|
|
|
| task |
dataset |
metrics |
|
|
| type |
name |
| bigcode/humanevalpack |
HumanEvalFix Go |
|
| name |
type |
value |
verified |
| pass@1 |
pass@1 |
30.2 |
false |
|
|
|
| task |
dataset |
metrics |
|
|
| type |
name |
| bigcode/humanevalpack |
HumanEvalFix C++ |
|
| name |
type |
value |
verified |
| pass@1 |
pass@1 |
26.1 |
false |
|
|
|
| task |
dataset |
metrics |
|
|
| type |
name |
| bigcode/humanevalpack |
HumanEvalFix Rust |
|
| name |
type |
value |
verified |
| pass@1 |
pass@1 |
16.5 |
false |
|
|
|
| task |
dataset |
metrics |
|
|
| type |
name |
| bigcode/humanevalpack |
HumanEvalFix Average |
|
| name |
type |
value |
verified |
| pass@1 |
pass@1 |
27.0 |
false |
|
|
|
| task |
dataset |
metrics |
|
|
| type |
name |
| bigcode/humanevalpack |
HumanEvalExplain Python |
|
| name |
type |
value |
verified |
| pass@1 |
pass@1 |
35.1 |
false |
|
|
|
| task |
dataset |
metrics |
|
|
| type |
name |
| bigcode/humanevalpack |
HumanEvalExplain JavaScript |
|
| name |
type |
value |
verified |
| pass@1 |
pass@1 |
24.5 |
false |
|
|
|
| task |
dataset |
metrics |
|
|
| type |
name |
| bigcode/humanevalpack |
HumanEvalExplain Java |
|
| name |
type |
value |
verified |
| pass@1 |
pass@1 |
27.3 |
false |
|
|
|
| task |
dataset |
metrics |
|
|
| type |
name |
| bigcode/humanevalpack |
HumanEvalExplain Go |
|
| name |
type |
value |
verified |
| pass@1 |
pass@1 |
21.1 |
false |
|
|
|
| task |
dataset |
metrics |
|
|
| type |
name |
| bigcode/humanevalpack |
HumanEvalExplain C++ |
|
| name |
type |
value |
verified |
| pass@1 |
pass@1 |
24.1 |
false |
|
|
|
| task |
dataset |
metrics |
|
|
| type |
name |
| bigcode/humanevalpack |
HumanEvalExplain Rust |
|
| name |
type |
value |
verified |
| pass@1 |
pass@1 |
14.8 |
false |
|
|
|
| task |
dataset |
metrics |
|
|
| type |
name |
| bigcode/humanevalpack |
HumanEvalExplain Average |
|
| name |
type |
value |
verified |
| pass@1 |
pass@1 |
24.5 |
false |
|
|
|