Files
enginex-bi_150-vllm/vllm/kernels/helion/configs/silu_mul_fp8.json
2026-04-09 11:23:47 +08:00

27727 lines
459 KiB
JSON

{
"nvidia_h200": {
"intermediate_2048_numtokens_256": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_256": {
"block_sizes": [
32,
512
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 2,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"default": {
"block_sizes": [
1,
512
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
false
],
"l2_groupings": [
4
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"first",
""
],
"num_warps": 8,
"num_stages": 2,
"indexing": [
"tensor_descriptor",
"tensor_descriptor",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_256": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"first",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_256": {
"block_sizes": [
16,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_256": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_7688_numtokens_256": {
"block_sizes": [
8,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_256": {
"block_sizes": [
32,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_1": {
"block_sizes": [
1,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_1": {
"block_sizes": [
1,
1
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_1": {
"block_sizes": [
1,
32
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_1": {
"block_sizes": [
1,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_1": {
"block_sizes": [
1,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_1": {
"block_sizes": [
1,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"first"
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_2": {
"block_sizes": [
2,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_2": {
"block_sizes": [
1,
4
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_2": {
"block_sizes": [
2,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_2": {
"block_sizes": [
1,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"",
""
],
"num_warps": 8,
"num_stages": 2,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_2": {
"block_sizes": [
1,
256
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"first",
""
],
"num_warps": 4,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_2": {
"block_sizes": [
1,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_4": {
"block_sizes": [
1,
256
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_4": {
"block_sizes": [
1,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_4": {
"block_sizes": [
4,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_4": {
"block_sizes": [
1,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_4": {
"block_sizes": [
1,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"first",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_4": {
"block_sizes": [
4,
16
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_8": {
"block_sizes": [
8,
256
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_8": {
"block_sizes": [
8,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_8": {
"block_sizes": [
2,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_8": {
"block_sizes": [
4,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_8": {
"block_sizes": [
8,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
""
],
"num_warps": 4,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_8": {
"block_sizes": [
8,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"first",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_16": {
"block_sizes": [
16,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_16": {
"block_sizes": [
16,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_16": {
"block_sizes": [
16,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_16": {
"block_sizes": [
4,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_16": {
"block_sizes": [
8,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"first"
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_16": {
"block_sizes": [
16,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_24": {
"block_sizes": [
16,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
"last"
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_24": {
"block_sizes": [
32,
64
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_24": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"first",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_24": {
"block_sizes": [
16,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_24": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_24": {
"block_sizes": [
8,
32
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"last"
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_32": {
"block_sizes": [
32,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_32": {
"block_sizes": [
32,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_32": {
"block_sizes": [
32,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_32": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"last"
],
"num_warps": 2,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_32": {
"block_sizes": [
16,
8
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_32": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_40": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_40": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"last"
],
"num_warps": 8,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_40": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_40": {
"block_sizes": [
64,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_40": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_40": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_48": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"first",
""
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_48": {
"block_sizes": [
8,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_48": {
"block_sizes": [
32,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_48": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_48": {
"block_sizes": [
16,
256
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_48": {
"block_sizes": [
64,
4
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_56": {
"block_sizes": [
2,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_56": {
"block_sizes": [
8,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_56": {
"block_sizes": [
32,
4
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"first",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_56": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_56": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"first",
"last"
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_56": {
"block_sizes": [
64,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_64": {
"block_sizes": [
16,
128
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_64": {
"block_sizes": [
4,
64
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_64": {
"block_sizes": [
2,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_64": {
"block_sizes": [
8,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_64": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_64": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_72": {
"block_sizes": [
4,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"last"
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_72": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"first"
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_72": {
"block_sizes": [
64,
16
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_72": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_72": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_72": {
"block_sizes": [
128,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_80": {
"block_sizes": [
32,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_80": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_80": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_80": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
4
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_80": {
"block_sizes": [
64,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_80": {
"block_sizes": [
32,
512
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"first",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_88": {
"block_sizes": [
32,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_88": {
"block_sizes": [
16,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_88": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"tensor_descriptor",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_88": {
"block_sizes": [
128,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"first",
"last"
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_88": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
4
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_88": {
"block_sizes": [
16,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"last",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_96": {
"block_sizes": [
128,
4
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_96": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_96": {
"block_sizes": [
16,
256
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 3,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_96": {
"block_sizes": [
64,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"",
"last"
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_96": {
"block_sizes": [
64,
256
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"",
""
],
"num_warps": 2,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_96": {
"block_sizes": [
32,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"first"
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_104": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_104": {
"block_sizes": [
64,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_104": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_104": {
"block_sizes": [
8,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_104": {
"block_sizes": [
128,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_104": {
"block_sizes": [
32,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_112": {
"block_sizes": [
32,
1024
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_112": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_112": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_112": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_112": {
"block_sizes": [
16,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_112": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_120": {
"block_sizes": [
32,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_120": {
"block_sizes": [
32,
16
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_120": {
"block_sizes": [
32,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_120": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_120": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_120": {
"block_sizes": [
128,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_128": {
"block_sizes": [
32,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_128": {
"block_sizes": [
128,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_128": {
"block_sizes": [
128,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"last"
],
"num_warps": 32,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_128": {
"block_sizes": [
32,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_128": {
"block_sizes": [
128,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"last"
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_128": {
"block_sizes": [
16,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 3,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_136": {
"block_sizes": [
128,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 3,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_136": {
"block_sizes": [
8,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"first",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_136": {
"block_sizes": [
32,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_136": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_136": {
"block_sizes": [
16,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 3,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_136": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_144": {
"block_sizes": [
8,
16
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_144": {
"block_sizes": [
256,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_144": {
"block_sizes": [
128,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_144": {
"block_sizes": [
128,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_144": {
"block_sizes": [
32,
4
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_144": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"first"
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_152": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_152": {
"block_sizes": [
16,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_152": {
"block_sizes": [
64,
4
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_152": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_152": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_152": {
"block_sizes": [
64,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_160": {
"block_sizes": [
32,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_160": {
"block_sizes": [
128,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_160": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_160": {
"block_sizes": [
64,
4
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_160": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_160": {
"block_sizes": [
128,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_168": {
"block_sizes": [
128,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_168": {
"block_sizes": [
32,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_168": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"last"
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_168": {
"block_sizes": [
64,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_168": {
"block_sizes": [
64,
4
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_168": {
"block_sizes": [
32,
512
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_176": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_176": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_176": {
"block_sizes": [
4,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"first"
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_176": {
"block_sizes": [
8,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_176": {
"block_sizes": [
8,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_176": {
"block_sizes": [
8,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_184": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_184": {
"block_sizes": [
8,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_184": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_184": {
"block_sizes": [
8,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_184": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_184": {
"block_sizes": [
16,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
"last"
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_192": {
"block_sizes": [
32,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_192": {
"block_sizes": [
8,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_192": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
4
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"",
""
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_192": {
"block_sizes": [
4,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_192": {
"block_sizes": [
32,
256
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
4
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_192": {
"block_sizes": [
8,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_200": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_200": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_200": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_200": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_200": {
"block_sizes": [
8,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_200": {
"block_sizes": [
16,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"first"
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_208": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_208": {
"block_sizes": [
64,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_208": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_208": {
"block_sizes": [
256,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"tensor_descriptor",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_208": {
"block_sizes": [
64,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"last",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_208": {
"block_sizes": [
16,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
4
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_216": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_216": {
"block_sizes": [
16,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_216": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_216": {
"block_sizes": [
16,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_216": {
"block_sizes": [
32,
4
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"first"
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_216": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
"last"
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_224": {
"block_sizes": [
32,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_224": {
"block_sizes": [
64,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_224": {
"block_sizes": [
64,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
4
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_224": {
"block_sizes": [
16,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"last"
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_224": {
"block_sizes": [
256,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_224": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
"first"
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_232": {
"block_sizes": [
16,
8
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
true
],
"l2_groupings": [
4
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_232": {
"block_sizes": [
64,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_232": {
"block_sizes": [
16,
4
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_232": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_232": {
"block_sizes": [
16,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 2,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_232": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_240": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_240": {
"block_sizes": [
8,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_240": {
"block_sizes": [
16,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_240": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_240": {
"block_sizes": [
8,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_240": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_248": {
"block_sizes": [
16,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_248": {
"block_sizes": [
16,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_248": {
"block_sizes": [
256,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_248": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"first"
],
"num_warps": 4,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_248": {
"block_sizes": [
64,
4
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_248": {
"block_sizes": [
64,
256
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_272": {
"block_sizes": [
128,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_272": {
"block_sizes": [
8,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_272": {
"block_sizes": [
128,
32
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
false
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_272": {
"block_sizes": [
128,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_272": {
"block_sizes": [
16,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_272": {
"block_sizes": [
64,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_288": {
"block_sizes": [
4,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_288": {
"block_sizes": [
8,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_288": {
"block_sizes": [
64,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_288": {
"block_sizes": [
128,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_288": {
"block_sizes": [
256,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_288": {
"block_sizes": [
16,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_304": {
"block_sizes": [
8,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_304": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_304": {
"block_sizes": [
128,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_304": {
"block_sizes": [
8,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_304": {
"block_sizes": [
64,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_304": {
"block_sizes": [
64,
4
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_320": {
"block_sizes": [
128,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_320": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_320": {
"block_sizes": [
512,
4
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
""
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_320": {
"block_sizes": [
64,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_320": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_320": {
"block_sizes": [
128,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_336": {
"block_sizes": [
2,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_336": {
"block_sizes": [
8,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_336": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_336": {
"block_sizes": [
64,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"last"
],
"num_warps": 8,
"num_stages": 2,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_336": {
"block_sizes": [
8,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
""
],
"num_warps": 2,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_336": {
"block_sizes": [
8,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_352": {
"block_sizes": [
32,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_352": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_352": {
"block_sizes": [
16,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"last"
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_352": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_352": {
"block_sizes": [
8,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_352": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"last"
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_368": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_368": {
"block_sizes": [
8,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_368": {
"block_sizes": [
8,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_368": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_368": {
"block_sizes": [
32,
4
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_368": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"first",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_384": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_384": {
"block_sizes": [
64,
256
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"first"
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_384": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"last"
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_384": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_384": {
"block_sizes": [
8,
256
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_384": {
"block_sizes": [
32,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_400": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"first"
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_400": {
"block_sizes": [
8,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_400": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_400": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_400": {
"block_sizes": [
256,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"last"
],
"num_warps": 32,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_400": {
"block_sizes": [
8,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_416": {
"block_sizes": [
128,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_416": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"first"
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_416": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"",
"last"
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_416": {
"block_sizes": [
128,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_416": {
"block_sizes": [
64,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_416": {
"block_sizes": [
32,
256
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_432": {
"block_sizes": [
16,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_432": {
"block_sizes": [
32,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_432": {
"block_sizes": [
16,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 4,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_432": {
"block_sizes": [
16,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_432": {
"block_sizes": [
16,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_432": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 2,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_448": {
"block_sizes": [
4,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_448": {
"block_sizes": [
8,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_448": {
"block_sizes": [
4,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_448": {
"block_sizes": [
32,
256
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
""
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_448": {
"block_sizes": [
16,
256
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_448": {
"block_sizes": [
16,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_464": {
"block_sizes": [
32,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"last"
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_464": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_464": {
"block_sizes": [
16,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_464": {
"block_sizes": [
8,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_464": {
"block_sizes": [
128,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"first"
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_464": {
"block_sizes": [
128,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"first",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_480": {
"block_sizes": [
4,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
""
],
"num_warps": 2,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_480": {
"block_sizes": [
4,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"first",
"first"
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_480": {
"block_sizes": [
8,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_480": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_480": {
"block_sizes": [
64,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
"last"
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_480": {
"block_sizes": [
16,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
4
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
""
],
"num_warps": 8,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_496": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_496": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"first",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_496": {
"block_sizes": [
16,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
4
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_496": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_496": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_496": {
"block_sizes": [
256,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_512": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"last"
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_512": {
"block_sizes": [
16,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_512": {
"block_sizes": [
128,
512
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_512": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_512": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_512": {
"block_sizes": [
16,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"",
""
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat",
"range_warp_specializes": []
}
},
"nvidia_h100": {
"intermediate_2048_numtokens_256": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_256": {
"block_sizes": [
32,
512
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 2,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"default": {
"block_sizes": [
1,
512
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
false
],
"l2_groupings": [
4
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"first",
""
],
"num_warps": 8,
"num_stages": 2,
"indexing": [
"tensor_descriptor",
"tensor_descriptor",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_256": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"first",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_256": {
"block_sizes": [
16,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_256": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_7688_numtokens_256": {
"block_sizes": [
8,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_256": {
"block_sizes": [
32,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_1": {
"block_sizes": [
1,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_1": {
"block_sizes": [
1,
1
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_1": {
"block_sizes": [
1,
32
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_1": {
"block_sizes": [
1,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_1": {
"block_sizes": [
1,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_1": {
"block_sizes": [
1,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"first"
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_2": {
"block_sizes": [
2,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_2": {
"block_sizes": [
1,
4
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_2": {
"block_sizes": [
2,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_2": {
"block_sizes": [
1,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"",
""
],
"num_warps": 8,
"num_stages": 2,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_2": {
"block_sizes": [
1,
256
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"first",
""
],
"num_warps": 4,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_2": {
"block_sizes": [
1,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_4": {
"block_sizes": [
1,
256
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_4": {
"block_sizes": [
1,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_4": {
"block_sizes": [
4,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_4": {
"block_sizes": [
1,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_4": {
"block_sizes": [
1,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"first",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_4": {
"block_sizes": [
4,
16
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_8": {
"block_sizes": [
8,
256
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_8": {
"block_sizes": [
8,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_8": {
"block_sizes": [
2,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_8": {
"block_sizes": [
4,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_8": {
"block_sizes": [
8,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
""
],
"num_warps": 4,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_8": {
"block_sizes": [
8,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"first",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_16": {
"block_sizes": [
16,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_16": {
"block_sizes": [
16,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_16": {
"block_sizes": [
16,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_16": {
"block_sizes": [
4,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_16": {
"block_sizes": [
8,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"first"
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_16": {
"block_sizes": [
16,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_24": {
"block_sizes": [
16,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
"last"
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_24": {
"block_sizes": [
32,
64
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_24": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"first",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_24": {
"block_sizes": [
16,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_24": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_24": {
"block_sizes": [
8,
32
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"last"
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_32": {
"block_sizes": [
32,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_32": {
"block_sizes": [
32,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_32": {
"block_sizes": [
32,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_32": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"last"
],
"num_warps": 2,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_32": {
"block_sizes": [
16,
8
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_32": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_40": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_40": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"last"
],
"num_warps": 8,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_40": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_40": {
"block_sizes": [
64,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_40": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_40": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_48": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"first",
""
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_48": {
"block_sizes": [
8,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_48": {
"block_sizes": [
32,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_48": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_48": {
"block_sizes": [
16,
256
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_48": {
"block_sizes": [
64,
4
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_56": {
"block_sizes": [
2,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_56": {
"block_sizes": [
8,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_56": {
"block_sizes": [
32,
4
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"first",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_56": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_56": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"first",
"last"
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_56": {
"block_sizes": [
64,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_64": {
"block_sizes": [
16,
128
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_64": {
"block_sizes": [
4,
64
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_64": {
"block_sizes": [
2,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_64": {
"block_sizes": [
8,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_64": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_64": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_72": {
"block_sizes": [
4,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"last"
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_72": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"first"
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_72": {
"block_sizes": [
64,
16
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_72": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_72": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_72": {
"block_sizes": [
128,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_80": {
"block_sizes": [
32,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_80": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_80": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_80": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
4
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_80": {
"block_sizes": [
64,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_80": {
"block_sizes": [
32,
512
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"first",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_88": {
"block_sizes": [
32,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_88": {
"block_sizes": [
16,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_88": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"tensor_descriptor",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_88": {
"block_sizes": [
128,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"first",
"last"
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_88": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
4
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_88": {
"block_sizes": [
16,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"last",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_96": {
"block_sizes": [
128,
4
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_96": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_96": {
"block_sizes": [
16,
256
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 3,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_96": {
"block_sizes": [
64,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"",
"last"
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_96": {
"block_sizes": [
64,
256
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"",
""
],
"num_warps": 2,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_96": {
"block_sizes": [
32,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"first"
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_104": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_104": {
"block_sizes": [
64,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_104": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_104": {
"block_sizes": [
8,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_104": {
"block_sizes": [
128,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_104": {
"block_sizes": [
32,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_112": {
"block_sizes": [
32,
1024
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_112": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_112": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_112": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_112": {
"block_sizes": [
16,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_112": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_120": {
"block_sizes": [
32,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_120": {
"block_sizes": [
32,
16
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_120": {
"block_sizes": [
32,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_120": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_120": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_120": {
"block_sizes": [
128,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_128": {
"block_sizes": [
32,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_128": {
"block_sizes": [
128,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_128": {
"block_sizes": [
128,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"last"
],
"num_warps": 32,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_128": {
"block_sizes": [
32,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_128": {
"block_sizes": [
128,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"last"
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_128": {
"block_sizes": [
16,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 3,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_136": {
"block_sizes": [
128,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 3,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_136": {
"block_sizes": [
8,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"first",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_136": {
"block_sizes": [
32,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_136": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_136": {
"block_sizes": [
16,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 3,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_136": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_144": {
"block_sizes": [
8,
16
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_144": {
"block_sizes": [
256,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_144": {
"block_sizes": [
128,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_144": {
"block_sizes": [
128,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_144": {
"block_sizes": [
32,
4
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_144": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"first"
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_152": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_152": {
"block_sizes": [
16,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_152": {
"block_sizes": [
64,
4
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_152": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_152": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_152": {
"block_sizes": [
64,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_160": {
"block_sizes": [
32,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_160": {
"block_sizes": [
128,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_160": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_160": {
"block_sizes": [
64,
4
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_160": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_160": {
"block_sizes": [
128,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_168": {
"block_sizes": [
128,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_168": {
"block_sizes": [
32,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_168": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"last"
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_168": {
"block_sizes": [
64,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_168": {
"block_sizes": [
64,
4
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_168": {
"block_sizes": [
32,
512
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_176": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_176": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_176": {
"block_sizes": [
4,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"first"
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_176": {
"block_sizes": [
8,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_176": {
"block_sizes": [
8,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_176": {
"block_sizes": [
8,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_184": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_184": {
"block_sizes": [
8,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_184": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_184": {
"block_sizes": [
8,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_184": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_184": {
"block_sizes": [
16,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
"last"
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_192": {
"block_sizes": [
32,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_192": {
"block_sizes": [
8,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_192": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
4
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"",
""
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_192": {
"block_sizes": [
4,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_192": {
"block_sizes": [
32,
256
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
4
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_192": {
"block_sizes": [
8,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_200": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_200": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_200": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_200": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_200": {
"block_sizes": [
8,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_200": {
"block_sizes": [
16,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"first"
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_208": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_208": {
"block_sizes": [
64,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_208": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_208": {
"block_sizes": [
256,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"tensor_descriptor",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_208": {
"block_sizes": [
64,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"last",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_208": {
"block_sizes": [
16,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
4
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_216": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_216": {
"block_sizes": [
16,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_216": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_216": {
"block_sizes": [
16,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_216": {
"block_sizes": [
32,
4
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"first"
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_216": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
"last"
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_224": {
"block_sizes": [
32,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_224": {
"block_sizes": [
64,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_224": {
"block_sizes": [
64,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
4
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_224": {
"block_sizes": [
16,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"last"
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_224": {
"block_sizes": [
256,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_224": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
"first"
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_232": {
"block_sizes": [
16,
8
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
true
],
"l2_groupings": [
4
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_232": {
"block_sizes": [
64,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_232": {
"block_sizes": [
16,
4
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_232": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_232": {
"block_sizes": [
16,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 2,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_232": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_240": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_240": {
"block_sizes": [
8,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_240": {
"block_sizes": [
16,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_240": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_240": {
"block_sizes": [
8,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_240": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_248": {
"block_sizes": [
16,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_248": {
"block_sizes": [
16,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_248": {
"block_sizes": [
256,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_248": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"first"
],
"num_warps": 4,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_248": {
"block_sizes": [
64,
4
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_248": {
"block_sizes": [
64,
256
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_272": {
"block_sizes": [
128,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_272": {
"block_sizes": [
8,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_272": {
"block_sizes": [
128,
32
],
"loop_orders": [
[
1,
0
]
],
"flatten_loops": [
false
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_272": {
"block_sizes": [
128,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_272": {
"block_sizes": [
16,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_272": {
"block_sizes": [
64,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_288": {
"block_sizes": [
4,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_288": {
"block_sizes": [
8,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_288": {
"block_sizes": [
64,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_288": {
"block_sizes": [
128,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_288": {
"block_sizes": [
256,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_288": {
"block_sizes": [
16,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_304": {
"block_sizes": [
8,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_304": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_304": {
"block_sizes": [
128,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_304": {
"block_sizes": [
8,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_304": {
"block_sizes": [
64,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_304": {
"block_sizes": [
64,
4
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_320": {
"block_sizes": [
128,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_320": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_320": {
"block_sizes": [
512,
4
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
""
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_320": {
"block_sizes": [
64,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_320": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_320": {
"block_sizes": [
128,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_336": {
"block_sizes": [
2,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_336": {
"block_sizes": [
8,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_336": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_336": {
"block_sizes": [
64,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"last"
],
"num_warps": 8,
"num_stages": 2,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_336": {
"block_sizes": [
8,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
""
],
"num_warps": 2,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_336": {
"block_sizes": [
8,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_352": {
"block_sizes": [
32,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_352": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_352": {
"block_sizes": [
16,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"last"
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_352": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_352": {
"block_sizes": [
8,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_352": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"last"
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_368": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_368": {
"block_sizes": [
8,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_368": {
"block_sizes": [
8,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_368": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_368": {
"block_sizes": [
32,
4
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_368": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"first",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_384": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_384": {
"block_sizes": [
64,
256
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"first"
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_384": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"last"
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_384": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_384": {
"block_sizes": [
8,
256
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_384": {
"block_sizes": [
32,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_400": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"first"
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_400": {
"block_sizes": [
8,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_400": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_400": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_400": {
"block_sizes": [
256,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"last"
],
"num_warps": 32,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_400": {
"block_sizes": [
8,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_416": {
"block_sizes": [
128,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_416": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"first"
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_416": {
"block_sizes": [
64,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"",
"last"
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_416": {
"block_sizes": [
128,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_416": {
"block_sizes": [
64,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_416": {
"block_sizes": [
32,
256
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_432": {
"block_sizes": [
16,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_432": {
"block_sizes": [
32,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_432": {
"block_sizes": [
16,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 4,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_432": {
"block_sizes": [
16,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 8,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_432": {
"block_sizes": [
16,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_432": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 2,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_448": {
"block_sizes": [
4,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_448": {
"block_sizes": [
8,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_448": {
"block_sizes": [
4,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_448": {
"block_sizes": [
32,
256
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
""
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_448": {
"block_sizes": [
16,
256
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_448": {
"block_sizes": [
16,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"last",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_464": {
"block_sizes": [
32,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"last"
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_464": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_464": {
"block_sizes": [
16,
64
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_464": {
"block_sizes": [
8,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_464": {
"block_sizes": [
128,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"first"
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_464": {
"block_sizes": [
128,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"first",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_480": {
"block_sizes": [
4,
16
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
""
],
"num_warps": 2,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_480": {
"block_sizes": [
4,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"first",
"first"
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_480": {
"block_sizes": [
8,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_480": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_480": {
"block_sizes": [
64,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
"last"
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_480": {
"block_sizes": [
16,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
4
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"last",
"",
""
],
"num_warps": 8,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_496": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_496": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"first",
""
],
"num_warps": 8,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_496": {
"block_sizes": [
16,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
4
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 2,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"tensor_descriptor",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_496": {
"block_sizes": [
32,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"tensor_descriptor"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_496": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 4,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_496": {
"block_sizes": [
256,
8
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2048_numtokens_512": {
"block_sizes": [
32,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
"last"
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_2880_numtokens_512": {
"block_sizes": [
16,
32
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 1,
"indexing": [
"pointer",
"tensor_descriptor",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_4096_numtokens_512": {
"block_sizes": [
128,
512
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 16,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_8192_numtokens_512": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
false
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 32,
"num_stages": 1,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_11008_numtokens_512": {
"block_sizes": [
32,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
1
],
"range_unroll_factors": [
0
],
"range_warp_specializes": [],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"",
"",
""
],
"num_warps": 1,
"num_stages": 1,
"indexing": [
"tensor_descriptor",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat"
},
"intermediate_14336_numtokens_512": {
"block_sizes": [
16,
128
],
"loop_orders": [
[
0,
1
]
],
"flatten_loops": [
true
],
"l2_groupings": [
2
],
"range_unroll_factors": [
0
],
"range_num_stages": [
0
],
"range_multi_buffers": [
null
],
"range_flattens": [
null
],
"load_eviction_policies": [
"first",
"",
""
],
"num_warps": 1,
"num_stages": 2,
"indexing": [
"pointer",
"pointer",
"pointer",
"pointer"
],
"pid_type": "flat",
"range_warp_specializes": []
}
}
}