{ "nvidia_h200": { "intermediate_2048_numtokens_256": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_256": { "block_sizes": [ 32, 512 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 2, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "default": { "block_sizes": [ 1, 512 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ false ], "l2_groupings": [ 4 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "first", "" ], "num_warps": 8, "num_stages": 2, "indexing": [ "tensor_descriptor", "tensor_descriptor", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_256": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "first", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_14336_numtokens_256": { "block_sizes": [ 16, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_11008_numtokens_256": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_7688_numtokens_256": { "block_sizes": [ 8, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_256": { "block_sizes": [ 32, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_1": { "block_sizes": [ 1, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_2880_numtokens_1": { "block_sizes": [ 1, 1 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_1": { "block_sizes": [ 1, 32 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_1": { "block_sizes": [ 1, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_1": { "block_sizes": [ 1, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_1": { "block_sizes": [ 1, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "first" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_2": { "block_sizes": [ 2, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_2": { "block_sizes": [ 1, 4 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_2": { "block_sizes": [ 2, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_2": { "block_sizes": [ 1, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "", "" ], "num_warps": 8, "num_stages": 2, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_2": { "block_sizes": [ 1, 256 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "first", "" ], "num_warps": 4, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_14336_numtokens_2": { "block_sizes": [ 1, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_4": { "block_sizes": [ 1, 256 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_4": { "block_sizes": [ 1, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_4": { "block_sizes": [ 4, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_4": { "block_sizes": [ 1, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_4": { "block_sizes": [ 1, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "first", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_4": { "block_sizes": [ 4, 16 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_8": { "block_sizes": [ 8, 256 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_8": { "block_sizes": [ 8, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_8": { "block_sizes": [ 2, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_8": { "block_sizes": [ 4, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_8": { "block_sizes": [ 8, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "" ], "num_warps": 4, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_8": { "block_sizes": [ 8, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "first", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_16": { "block_sizes": [ 16, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_16": { "block_sizes": [ 16, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_16": { "block_sizes": [ 16, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_16": { "block_sizes": [ 4, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_16": { "block_sizes": [ 8, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "first" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_16": { "block_sizes": [ 16, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_24": { "block_sizes": [ 16, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "last" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_24": { "block_sizes": [ 32, 64 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_4096_numtokens_24": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "first", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_8192_numtokens_24": { "block_sizes": [ 16, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_24": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_24": { "block_sizes": [ 8, 32 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "last" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_32": { "block_sizes": [ 32, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_32": { "block_sizes": [ 32, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_32": { "block_sizes": [ 32, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_32": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "last" ], "num_warps": 2, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_32": { "block_sizes": [ 16, 8 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_14336_numtokens_32": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_40": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_40": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "last" ], "num_warps": 8, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_40": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_40": { "block_sizes": [ 64, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_40": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_40": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_48": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "first", "" ], "num_warps": 16, "num_stages": 2, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_48": { "block_sizes": [ 8, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_48": { "block_sizes": [ 32, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_8192_numtokens_48": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_48": { "block_sizes": [ 16, 256 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_14336_numtokens_48": { "block_sizes": [ 64, 4 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_56": { "block_sizes": [ 2, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_56": { "block_sizes": [ 8, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_56": { "block_sizes": [ 32, 4 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "first", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_56": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_56": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "first", "last" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_56": { "block_sizes": [ 64, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_2048_numtokens_64": { "block_sizes": [ 16, 128 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_64": { "block_sizes": [ 4, 64 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_64": { "block_sizes": [ 2, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_64": { "block_sizes": [ 8, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_64": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_64": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_72": { "block_sizes": [ 4, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "last" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_72": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "first" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_72": { "block_sizes": [ 64, 16 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_72": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_72": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_72": { "block_sizes": [ 128, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_80": { "block_sizes": [ 32, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_80": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_80": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_80": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 4 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_80": { "block_sizes": [ 64, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_80": { "block_sizes": [ 32, 512 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "first", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_88": { "block_sizes": [ 32, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_2880_numtokens_88": { "block_sizes": [ 16, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_4096_numtokens_88": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "tensor_descriptor", "tensor_descriptor", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_88": { "block_sizes": [ 128, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "first", "last" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_88": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 4 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_88": { "block_sizes": [ 16, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "last", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_96": { "block_sizes": [ 128, 4 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_96": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_96": { "block_sizes": [ 16, 256 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 3, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_8192_numtokens_96": { "block_sizes": [ 64, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "", "last" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_96": { "block_sizes": [ 64, 256 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "", "" ], "num_warps": 2, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_96": { "block_sizes": [ 32, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "first" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_104": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 2, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_104": { "block_sizes": [ 64, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_104": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_104": { "block_sizes": [ 8, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_11008_numtokens_104": { "block_sizes": [ 128, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_104": { "block_sizes": [ 32, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_2048_numtokens_112": { "block_sizes": [ 32, 1024 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_112": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_112": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_112": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_11008_numtokens_112": { "block_sizes": [ 16, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_112": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_120": { "block_sizes": [ 32, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_120": { "block_sizes": [ 32, 16 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_120": { "block_sizes": [ 32, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_120": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_120": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_120": { "block_sizes": [ 128, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_128": { "block_sizes": [ 32, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_128": { "block_sizes": [ 128, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_128": { "block_sizes": [ 128, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "last" ], "num_warps": 32, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_128": { "block_sizes": [ 32, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_128": { "block_sizes": [ 128, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "last" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_128": { "block_sizes": [ 16, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 3, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_136": { "block_sizes": [ 128, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 3, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_136": { "block_sizes": [ 8, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "first", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_136": { "block_sizes": [ 32, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_136": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_136": { "block_sizes": [ 16, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 3, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_136": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_144": { "block_sizes": [ 8, 16 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_144": { "block_sizes": [ 256, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_144": { "block_sizes": [ 128, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_144": { "block_sizes": [ 128, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_144": { "block_sizes": [ 32, 4 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_144": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "first" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_152": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_152": { "block_sizes": [ 16, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_152": { "block_sizes": [ 64, 4 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_152": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_152": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_152": { "block_sizes": [ 64, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_160": { "block_sizes": [ 32, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_160": { "block_sizes": [ 128, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_160": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_160": { "block_sizes": [ 64, 4 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_11008_numtokens_160": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_160": { "block_sizes": [ 128, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_168": { "block_sizes": [ 128, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "tensor_descriptor", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_168": { "block_sizes": [ 32, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_168": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "last" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_168": { "block_sizes": [ 64, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_168": { "block_sizes": [ 64, 4 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_168": { "block_sizes": [ 32, 512 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_2048_numtokens_176": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_176": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_176": { "block_sizes": [ 4, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "first" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_176": { "block_sizes": [ 8, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_176": { "block_sizes": [ 8, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_176": { "block_sizes": [ 8, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_184": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_184": { "block_sizes": [ 8, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_4096_numtokens_184": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_184": { "block_sizes": [ 8, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_184": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_184": { "block_sizes": [ 16, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "last" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_192": { "block_sizes": [ 32, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_192": { "block_sizes": [ 8, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_192": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 4 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "", "" ], "num_warps": 16, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_192": { "block_sizes": [ 4, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_192": { "block_sizes": [ 32, 256 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 4 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_192": { "block_sizes": [ 8, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_200": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_200": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_200": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_200": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_200": { "block_sizes": [ 8, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_200": { "block_sizes": [ 16, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "first" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_208": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_208": { "block_sizes": [ 64, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_208": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_208": { "block_sizes": [ 256, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 2, "indexing": [ "tensor_descriptor", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_208": { "block_sizes": [ 64, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "last", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_208": { "block_sizes": [ 16, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 4 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_216": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_216": { "block_sizes": [ 16, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_216": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_216": { "block_sizes": [ 16, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 2, "indexing": [ "pointer", "pointer", "tensor_descriptor", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_11008_numtokens_216": { "block_sizes": [ 32, 4 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "first" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_216": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "last" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_224": { "block_sizes": [ 32, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_224": { "block_sizes": [ 64, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_224": { "block_sizes": [ 64, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 4 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_224": { "block_sizes": [ 16, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "last" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_224": { "block_sizes": [ 256, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_224": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "first" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_232": { "block_sizes": [ 16, 8 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ true ], "l2_groupings": [ 4 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_232": { "block_sizes": [ 64, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_232": { "block_sizes": [ 16, 4 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_232": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_232": { "block_sizes": [ 16, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 2, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_232": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_240": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_240": { "block_sizes": [ 8, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_240": { "block_sizes": [ 16, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_240": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_240": { "block_sizes": [ 8, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_240": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_248": { "block_sizes": [ 16, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 2, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_248": { "block_sizes": [ 16, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_4096_numtokens_248": { "block_sizes": [ 256, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_248": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "first" ], "num_warps": 4, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_248": { "block_sizes": [ 64, 4 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_248": { "block_sizes": [ 64, 256 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_272": { "block_sizes": [ 128, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_272": { "block_sizes": [ 8, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_272": { "block_sizes": [ 128, 32 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ false ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_272": { "block_sizes": [ 128, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_272": { "block_sizes": [ 16, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_272": { "block_sizes": [ 64, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_288": { "block_sizes": [ 4, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_288": { "block_sizes": [ 8, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 2, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_288": { "block_sizes": [ 64, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_288": { "block_sizes": [ 128, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_288": { "block_sizes": [ 256, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_288": { "block_sizes": [ 16, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_304": { "block_sizes": [ 8, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_304": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_304": { "block_sizes": [ 128, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_304": { "block_sizes": [ 8, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_304": { "block_sizes": [ 64, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_304": { "block_sizes": [ 64, 4 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_320": { "block_sizes": [ 128, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_320": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_320": { "block_sizes": [ 512, 4 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_320": { "block_sizes": [ 64, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_320": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "tensor_descriptor", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_320": { "block_sizes": [ 128, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_336": { "block_sizes": [ 2, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_336": { "block_sizes": [ 8, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_336": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_336": { "block_sizes": [ 64, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "last" ], "num_warps": 8, "num_stages": 2, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_336": { "block_sizes": [ 8, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "" ], "num_warps": 2, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_336": { "block_sizes": [ 8, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_352": { "block_sizes": [ 32, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_352": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_352": { "block_sizes": [ 16, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "last" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_352": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_352": { "block_sizes": [ 8, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_352": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "last" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_368": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_368": { "block_sizes": [ 8, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_368": { "block_sizes": [ 8, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_368": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_368": { "block_sizes": [ 32, 4 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_368": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "first", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_384": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_384": { "block_sizes": [ 64, 256 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "first" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_384": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "last" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_8192_numtokens_384": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_384": { "block_sizes": [ 8, 256 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_384": { "block_sizes": [ 32, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_400": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "first" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_2880_numtokens_400": { "block_sizes": [ 8, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_400": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_400": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_400": { "block_sizes": [ 256, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "last" ], "num_warps": 32, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_400": { "block_sizes": [ 8, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_2048_numtokens_416": { "block_sizes": [ 128, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_416": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "first" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_416": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "", "last" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_8192_numtokens_416": { "block_sizes": [ 128, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_416": { "block_sizes": [ 64, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_416": { "block_sizes": [ 32, 256 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_432": { "block_sizes": [ 16, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_432": { "block_sizes": [ 32, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_432": { "block_sizes": [ 16, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 4, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_432": { "block_sizes": [ 16, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_11008_numtokens_432": { "block_sizes": [ 16, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_432": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 2, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_448": { "block_sizes": [ 4, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_448": { "block_sizes": [ 8, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_448": { "block_sizes": [ 4, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_448": { "block_sizes": [ 32, 256 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "" ], "num_warps": 16, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_448": { "block_sizes": [ 16, 256 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_448": { "block_sizes": [ 16, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_464": { "block_sizes": [ 32, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "last" ], "num_warps": 16, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_464": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_464": { "block_sizes": [ 16, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_464": { "block_sizes": [ 8, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_464": { "block_sizes": [ 128, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "first" ], "num_warps": 16, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_464": { "block_sizes": [ 128, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "first", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_480": { "block_sizes": [ 4, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "" ], "num_warps": 2, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_480": { "block_sizes": [ 4, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "first", "first" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_480": { "block_sizes": [ 8, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_480": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_480": { "block_sizes": [ 64, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "last" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_480": { "block_sizes": [ 16, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 4 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "" ], "num_warps": 8, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_496": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_496": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "first", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_496": { "block_sizes": [ 16, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 4 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_496": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_11008_numtokens_496": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_496": { "block_sizes": [ 256, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_512": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "last" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_512": { "block_sizes": [ 16, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_512": { "block_sizes": [ 128, 512 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_512": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_512": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_512": { "block_sizes": [ 16, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "", "" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat", "range_warp_specializes": [] } }, "nvidia_h100": { "intermediate_2048_numtokens_256": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_256": { "block_sizes": [ 32, 512 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 2, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "default": { "block_sizes": [ 1, 512 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ false ], "l2_groupings": [ 4 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "first", "" ], "num_warps": 8, "num_stages": 2, "indexing": [ "tensor_descriptor", "tensor_descriptor", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_256": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "first", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_14336_numtokens_256": { "block_sizes": [ 16, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_11008_numtokens_256": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_7688_numtokens_256": { "block_sizes": [ 8, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_256": { "block_sizes": [ 32, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_1": { "block_sizes": [ 1, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_2880_numtokens_1": { "block_sizes": [ 1, 1 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_1": { "block_sizes": [ 1, 32 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_1": { "block_sizes": [ 1, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_1": { "block_sizes": [ 1, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_1": { "block_sizes": [ 1, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "first" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_2": { "block_sizes": [ 2, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_2": { "block_sizes": [ 1, 4 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_2": { "block_sizes": [ 2, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_2": { "block_sizes": [ 1, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "", "" ], "num_warps": 8, "num_stages": 2, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_2": { "block_sizes": [ 1, 256 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "first", "" ], "num_warps": 4, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_14336_numtokens_2": { "block_sizes": [ 1, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_4": { "block_sizes": [ 1, 256 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_4": { "block_sizes": [ 1, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_4": { "block_sizes": [ 4, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_4": { "block_sizes": [ 1, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_4": { "block_sizes": [ 1, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "first", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_4": { "block_sizes": [ 4, 16 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_8": { "block_sizes": [ 8, 256 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_8": { "block_sizes": [ 8, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_8": { "block_sizes": [ 2, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_8": { "block_sizes": [ 4, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_8": { "block_sizes": [ 8, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "" ], "num_warps": 4, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_8": { "block_sizes": [ 8, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "first", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_16": { "block_sizes": [ 16, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_16": { "block_sizes": [ 16, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_16": { "block_sizes": [ 16, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_16": { "block_sizes": [ 4, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_16": { "block_sizes": [ 8, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "first" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_16": { "block_sizes": [ 16, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_24": { "block_sizes": [ 16, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "last" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_24": { "block_sizes": [ 32, 64 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_4096_numtokens_24": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "first", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_8192_numtokens_24": { "block_sizes": [ 16, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_24": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_24": { "block_sizes": [ 8, 32 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "last" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_32": { "block_sizes": [ 32, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_32": { "block_sizes": [ 32, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_32": { "block_sizes": [ 32, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_32": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "last" ], "num_warps": 2, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_32": { "block_sizes": [ 16, 8 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_14336_numtokens_32": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_40": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_40": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "last" ], "num_warps": 8, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_40": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_40": { "block_sizes": [ 64, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_40": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_40": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_48": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "first", "" ], "num_warps": 16, "num_stages": 2, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_48": { "block_sizes": [ 8, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_48": { "block_sizes": [ 32, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_8192_numtokens_48": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_48": { "block_sizes": [ 16, 256 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_14336_numtokens_48": { "block_sizes": [ 64, 4 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_56": { "block_sizes": [ 2, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_56": { "block_sizes": [ 8, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_56": { "block_sizes": [ 32, 4 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "first", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_56": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_56": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "first", "last" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_56": { "block_sizes": [ 64, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_2048_numtokens_64": { "block_sizes": [ 16, 128 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_64": { "block_sizes": [ 4, 64 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_64": { "block_sizes": [ 2, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_64": { "block_sizes": [ 8, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_64": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_64": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_72": { "block_sizes": [ 4, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "last" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_72": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "first" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_72": { "block_sizes": [ 64, 16 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_72": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_72": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_72": { "block_sizes": [ 128, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_80": { "block_sizes": [ 32, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_80": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_80": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_80": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 4 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_80": { "block_sizes": [ 64, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_80": { "block_sizes": [ 32, 512 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "first", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_88": { "block_sizes": [ 32, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_2880_numtokens_88": { "block_sizes": [ 16, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_4096_numtokens_88": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "tensor_descriptor", "tensor_descriptor", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_88": { "block_sizes": [ 128, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "first", "last" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_88": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 4 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_88": { "block_sizes": [ 16, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "last", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_96": { "block_sizes": [ 128, 4 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_96": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_96": { "block_sizes": [ 16, 256 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 3, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_8192_numtokens_96": { "block_sizes": [ 64, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "", "last" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_96": { "block_sizes": [ 64, 256 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "", "" ], "num_warps": 2, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_96": { "block_sizes": [ 32, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "first" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_104": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 2, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_104": { "block_sizes": [ 64, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_104": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_104": { "block_sizes": [ 8, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_11008_numtokens_104": { "block_sizes": [ 128, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_104": { "block_sizes": [ 32, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_2048_numtokens_112": { "block_sizes": [ 32, 1024 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_112": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_112": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_112": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_11008_numtokens_112": { "block_sizes": [ 16, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_112": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_120": { "block_sizes": [ 32, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_120": { "block_sizes": [ 32, 16 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_120": { "block_sizes": [ 32, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_120": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_120": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_120": { "block_sizes": [ 128, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_128": { "block_sizes": [ 32, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_128": { "block_sizes": [ 128, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_128": { "block_sizes": [ 128, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "last" ], "num_warps": 32, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_128": { "block_sizes": [ 32, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_128": { "block_sizes": [ 128, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "last" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_128": { "block_sizes": [ 16, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 3, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_136": { "block_sizes": [ 128, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 3, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_136": { "block_sizes": [ 8, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "first", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_136": { "block_sizes": [ 32, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_136": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_136": { "block_sizes": [ 16, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 3, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_136": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_144": { "block_sizes": [ 8, 16 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_144": { "block_sizes": [ 256, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_144": { "block_sizes": [ 128, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_144": { "block_sizes": [ 128, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_144": { "block_sizes": [ 32, 4 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_144": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "first" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_152": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_152": { "block_sizes": [ 16, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_152": { "block_sizes": [ 64, 4 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_152": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_152": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_152": { "block_sizes": [ 64, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_160": { "block_sizes": [ 32, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_160": { "block_sizes": [ 128, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_160": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_160": { "block_sizes": [ 64, 4 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_11008_numtokens_160": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_160": { "block_sizes": [ 128, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_168": { "block_sizes": [ 128, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "tensor_descriptor", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_168": { "block_sizes": [ 32, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_168": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "last" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_168": { "block_sizes": [ 64, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_168": { "block_sizes": [ 64, 4 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_168": { "block_sizes": [ 32, 512 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_2048_numtokens_176": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_176": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_176": { "block_sizes": [ 4, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "first" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_176": { "block_sizes": [ 8, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_176": { "block_sizes": [ 8, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_176": { "block_sizes": [ 8, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_184": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_184": { "block_sizes": [ 8, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_4096_numtokens_184": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_184": { "block_sizes": [ 8, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_184": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_184": { "block_sizes": [ 16, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "last" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_192": { "block_sizes": [ 32, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_192": { "block_sizes": [ 8, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_192": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 4 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "", "" ], "num_warps": 16, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_192": { "block_sizes": [ 4, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_192": { "block_sizes": [ 32, 256 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 4 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_192": { "block_sizes": [ 8, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_200": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_200": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_200": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_200": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_200": { "block_sizes": [ 8, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_200": { "block_sizes": [ 16, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "first" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_208": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_208": { "block_sizes": [ 64, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_208": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_208": { "block_sizes": [ 256, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 2, "indexing": [ "tensor_descriptor", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_208": { "block_sizes": [ 64, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "last", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_208": { "block_sizes": [ 16, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 4 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_216": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_216": { "block_sizes": [ 16, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_216": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_216": { "block_sizes": [ 16, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 2, "indexing": [ "pointer", "pointer", "tensor_descriptor", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_11008_numtokens_216": { "block_sizes": [ 32, 4 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "first" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_216": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "last" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_224": { "block_sizes": [ 32, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_224": { "block_sizes": [ 64, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_224": { "block_sizes": [ 64, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 4 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_224": { "block_sizes": [ 16, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "last" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_224": { "block_sizes": [ 256, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_224": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "first" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_232": { "block_sizes": [ 16, 8 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ true ], "l2_groupings": [ 4 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_232": { "block_sizes": [ 64, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_232": { "block_sizes": [ 16, 4 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_232": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_232": { "block_sizes": [ 16, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 2, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_232": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_240": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_240": { "block_sizes": [ 8, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_240": { "block_sizes": [ 16, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_240": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_240": { "block_sizes": [ 8, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_240": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_248": { "block_sizes": [ 16, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 2, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_248": { "block_sizes": [ 16, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_4096_numtokens_248": { "block_sizes": [ 256, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_248": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "first" ], "num_warps": 4, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_248": { "block_sizes": [ 64, 4 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_248": { "block_sizes": [ 64, 256 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_272": { "block_sizes": [ 128, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_272": { "block_sizes": [ 8, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_272": { "block_sizes": [ 128, 32 ], "loop_orders": [ [ 1, 0 ] ], "flatten_loops": [ false ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_272": { "block_sizes": [ 128, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_272": { "block_sizes": [ 16, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_272": { "block_sizes": [ 64, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_288": { "block_sizes": [ 4, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_288": { "block_sizes": [ 8, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 2, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_288": { "block_sizes": [ 64, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_288": { "block_sizes": [ 128, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_288": { "block_sizes": [ 256, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_288": { "block_sizes": [ 16, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_304": { "block_sizes": [ 8, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_304": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_304": { "block_sizes": [ 128, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_304": { "block_sizes": [ 8, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_304": { "block_sizes": [ 64, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_304": { "block_sizes": [ 64, 4 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_320": { "block_sizes": [ 128, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_320": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_320": { "block_sizes": [ 512, 4 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_320": { "block_sizes": [ 64, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_320": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "tensor_descriptor", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_320": { "block_sizes": [ 128, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_336": { "block_sizes": [ 2, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_336": { "block_sizes": [ 8, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_336": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_336": { "block_sizes": [ 64, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "last" ], "num_warps": 8, "num_stages": 2, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_336": { "block_sizes": [ 8, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "" ], "num_warps": 2, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_336": { "block_sizes": [ 8, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_352": { "block_sizes": [ 32, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_352": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_352": { "block_sizes": [ 16, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "last" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_352": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_352": { "block_sizes": [ 8, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_352": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "last" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_368": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_368": { "block_sizes": [ 8, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_368": { "block_sizes": [ 8, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_368": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_368": { "block_sizes": [ 32, 4 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_368": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "first", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_384": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_384": { "block_sizes": [ 64, 256 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "first" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_384": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "last" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_8192_numtokens_384": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_384": { "block_sizes": [ 8, 256 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_384": { "block_sizes": [ 32, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_400": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "first" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_2880_numtokens_400": { "block_sizes": [ 8, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_400": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_400": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_400": { "block_sizes": [ 256, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "last" ], "num_warps": 32, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_400": { "block_sizes": [ 8, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_2048_numtokens_416": { "block_sizes": [ 128, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_416": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "first" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_416": { "block_sizes": [ 64, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "", "last" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_8192_numtokens_416": { "block_sizes": [ 128, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_416": { "block_sizes": [ 64, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_416": { "block_sizes": [ 32, 256 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_432": { "block_sizes": [ 16, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_432": { "block_sizes": [ 32, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_432": { "block_sizes": [ 16, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 4, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_432": { "block_sizes": [ 16, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 8, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_11008_numtokens_432": { "block_sizes": [ 16, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_432": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 2, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_448": { "block_sizes": [ 4, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_448": { "block_sizes": [ 8, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_448": { "block_sizes": [ 4, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_448": { "block_sizes": [ 32, 256 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "" ], "num_warps": 16, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_448": { "block_sizes": [ 16, 256 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_448": { "block_sizes": [ 16, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "last", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_464": { "block_sizes": [ 32, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "last" ], "num_warps": 16, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_464": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_464": { "block_sizes": [ 16, 64 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_464": { "block_sizes": [ 8, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_464": { "block_sizes": [ 128, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "first" ], "num_warps": 16, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_464": { "block_sizes": [ 128, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "first", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_480": { "block_sizes": [ 4, 16 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "" ], "num_warps": 2, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_480": { "block_sizes": [ 4, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "first", "first" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_480": { "block_sizes": [ 8, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_480": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_480": { "block_sizes": [ 64, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "last" ], "num_warps": 1, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_480": { "block_sizes": [ 16, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 4 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "last", "", "" ], "num_warps": 8, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_496": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_496": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "first", "" ], "num_warps": 8, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_496": { "block_sizes": [ 16, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 4 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 2, "num_stages": 1, "indexing": [ "pointer", "pointer", "tensor_descriptor", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_496": { "block_sizes": [ 32, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "tensor_descriptor" ], "pid_type": "flat" }, "intermediate_11008_numtokens_496": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 4, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_496": { "block_sizes": [ 256, 8 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2048_numtokens_512": { "block_sizes": [ 32, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "last" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_2880_numtokens_512": { "block_sizes": [ 16, 32 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 1, "indexing": [ "pointer", "tensor_descriptor", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_4096_numtokens_512": { "block_sizes": [ 128, 512 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 16, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_8192_numtokens_512": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ false ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 32, "num_stages": 1, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_11008_numtokens_512": { "block_sizes": [ 32, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 1 ], "range_unroll_factors": [ 0 ], "range_warp_specializes": [], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "", "", "" ], "num_warps": 1, "num_stages": 1, "indexing": [ "tensor_descriptor", "pointer", "pointer", "pointer" ], "pid_type": "flat" }, "intermediate_14336_numtokens_512": { "block_sizes": [ 16, 128 ], "loop_orders": [ [ 0, 1 ] ], "flatten_loops": [ true ], "l2_groupings": [ 2 ], "range_unroll_factors": [ 0 ], "range_num_stages": [ 0 ], "range_multi_buffers": [ null ], "range_flattens": [ null ], "load_eviction_policies": [ "first", "", "" ], "num_warps": 1, "num_stages": 2, "indexing": [ "pointer", "pointer", "pointer", "pointer" ], "pid_type": "flat", "range_warp_specializes": [] } } }