Last active
May 14, 2025 16:47
-
-
Save davidberard98/5e77aa6e0206b20acee4a21535fa3ba3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#loc = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":17:0) | |
module { | |
tt.func public @triton_tem_fused_zeros_7(%arg0: !tt.ptr<bf16> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":17:0), %arg1: !tt.ptr<bf16> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":17:0), %arg2: !tt.ptr<bf16> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":17:0), %arg3: !tt.ptr<f32> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":17:0), %arg4: !tt.ptr<f32> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":17:0), %arg5: !tt.ptr<bf16> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":17:0), %arg6: !tt.ptr<bf16> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":17:0), %arg7: !tt.ptr<bf16> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":17:0), %arg8: !tt.ptr<i32> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":17:0), %arg9: !tt.ptr<i32> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":17:0), %arg10: !tt.ptr<i32> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":17:0), %arg11: !tt.ptr<i32> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":17:0), %arg12: !tt.ptr<i32> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":17:0), %arg13: !tt.ptr<i32> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":17:0), %arg14: !tt.ptr<i32> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":17:0), %arg15: !tt.ptr<i32> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":17:0), %arg16: !tt.ptr<i64> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":17:0), %arg17: !tt.ptr<bf16> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":17:0)) attributes {noinline = false} { | |
%cst = arith.constant dense<1024> : tensor<64x1xi32> loc(#loc1) | |
%cst_0 = arith.constant dense<1024> : tensor<1x64xi32> loc(#loc1) | |
%cst_1 = arith.constant dense<0.000000e+00> : tensor<64xf32> loc(#loc1) | |
%cst_2 = arith.constant dense<0xFF800000> : tensor<64xf32> loc(#loc1) | |
%c2_i32 = arith.constant 2 : i32 loc(#loc1) | |
%c0_i32 = arith.constant 0 : i32 loc(#loc1) | |
%c64_i32 = arith.constant 64 : i32 loc(#loc1) | |
%cst_3 = arith.constant dense<1.44269502> : tensor<128x64xf32> loc(#loc1) | |
%cst_4 = arith.constant dense<0xFF800000> : tensor<128x64xf32> loc(#loc1) | |
%cst_5 = arith.constant dense<1.200000e-01> : tensor<128x64xf32> loc(#loc1) | |
%cst_6 = arith.constant dense<0.000000e+00> : tensor<128x64xf32> loc(#loc1) | |
%cst_7 = arith.constant dense<0.000000e+00> : tensor<128x128xf32> loc(#loc1) | |
%cst_8 = arith.constant dense<65536> : tensor<128x1xi32> loc(#loc1) | |
%cst_9 = arith.constant dense<1.200000e-01> : tensor<128x128xf32> loc(#loc1) | |
%cst_10 = arith.constant dense<1024> : tensor<128x1xi32> loc(#loc1) | |
%cst_11 = arith.constant dense<0.000000e+00> : tensor<128xf32> loc(#loc1) | |
%cst_12 = arith.constant dense<0xFF800000> : tensor<128xf32> loc(#loc1) | |
%c512_i32 = arith.constant 512 : i32 loc(#loc1) | |
%c67108864_i32 = arith.constant 67108864 : i32 loc(#loc1) | |
%c128_i32 = arith.constant 128 : i32 loc(#loc1) | |
%c1024_i32 = arith.constant 1024 : i32 loc(#loc1) | |
%c1_i32 = arith.constant 1 : i32 loc(#loc1) | |
%c8_i32 = arith.constant 8 : i32 loc(#loc1) | |
%c65536_i32 = arith.constant 65536 : i32 loc(#loc1) | |
%0 = tt.get_program_id x : i32 loc(#loc2) | |
%1 = tt.get_program_id z : i32 loc(#loc3) | |
%2 = arith.divsi %1, %c8_i32 : i32 loc(#loc4) | |
%3 = arith.remsi %1, %c8_i32 : i32 loc(#loc5) | |
%4 = arith.muli %3, %c128_i32 : i32 loc(#loc6) | |
%5 = arith.extsi %4 : i32 to i64 loc(#loc7) | |
%6 = arith.muli %2, %c67108864_i32 : i32 loc(#loc8) | |
%7 = arith.addi %4, %6 : i32 loc(#loc9) | |
%8 = arith.extsi %7 : i32 to i64 loc(#loc10) | |
%9 = tt.addptr %arg1, %5 : !tt.ptr<bf16>, i64 loc(#loc11) | |
%10 = tt.addptr %arg2, %5 : !tt.ptr<bf16>, i64 loc(#loc12) | |
%11 = tt.addptr %arg7, %8 : !tt.ptr<bf16>, i64 loc(#loc13) | |
%12 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32> loc(#loc14) | |
%13 = arith.cmpi sge, %0, %c512_i32 : i32 loc(#loc15) | |
scf.if %13 { | |
%14 = arith.subi %0, %c512_i32 : i32 loc(#loc17) | |
%15 = arith.divsi %14, %c512_i32 : i32 loc(#loc18) | |
%16 = arith.addi %15, %3 : i32 loc(#loc19) | |
%17 = arith.remsi %14, %c512_i32 : i32 loc(#loc20) | |
%18 = arith.muli %17, %c512_i32 : i32 loc(#loc21) | |
%19 = arith.muli %16, %c128_i32 : i32 loc(#loc22) | |
%20 = arith.addi %19, %6 : i32 loc(#loc23) | |
%21 = arith.extsi %20 : i32 to i64 loc(#loc24) | |
%22 = arith.muli %2, %c8_i32 : i32 loc(#loc25) | |
%23 = arith.addi %22, %16 : i32 loc(#loc26) | |
%24 = arith.muli %23, %c65536_i32 : i32 loc(#loc27) | |
%25 = arith.extsi %24 : i32 to i64 loc(#loc28) | |
%26 = tt.addptr %arg0, %21 : !tt.ptr<bf16>, i64 loc(#loc29) | |
%27 = tt.addptr %arg5, %21 : !tt.ptr<bf16>, i64 loc(#loc30) | |
%28 = tt.addptr %arg6, %21 : !tt.ptr<bf16>, i64 loc(#loc31) | |
%29 = tt.addptr %arg3, %25 : !tt.ptr<f32>, i64 loc(#loc32) | |
%30 = tt.addptr %arg4, %25 : !tt.ptr<f32>, i64 loc(#loc33) | |
%31 = arith.muli %17, %c128_i32 : i32 loc(#loc34) | |
%32 = tt.splat %31 : i32 -> tensor<128xi32> loc(#loc35) | |
%33 = arith.addi %32, %12 : tensor<128xi32> loc(#loc35) | |
%34 = tt.expand_dims %33 {axis = 1 : i32} : tensor<128xi32> -> tensor<128x1xi32> loc(#loc219) | |
%35 = arith.muli %34, %cst_10 : tensor<128x1xi32> loc(#loc220) | |
%36 = tt.splat %26 : !tt.ptr<bf16> -> tensor<128x1x!tt.ptr<bf16>> loc(#loc221) | |
%37 = tt.addptr %36, %35 : tensor<128x1x!tt.ptr<bf16>>, tensor<128x1xi32> loc(#loc221) | |
%38 = tt.expand_dims %12 {axis = 0 : i32} : tensor<128xi32> -> tensor<1x128xi32> loc(#loc222) | |
%39 = tt.broadcast %37 : tensor<128x1x!tt.ptr<bf16>> -> tensor<128x128x!tt.ptr<bf16>> loc(#loc223) | |
%40 = tt.broadcast %38 : tensor<1x128xi32> -> tensor<128x128xi32> loc(#loc223) | |
%41 = tt.addptr %39, %40 : tensor<128x128x!tt.ptr<bf16>>, tensor<128x128xi32> loc(#loc223) | |
%42 = tt.load %41 : tensor<128x128x!tt.ptr<bf16>> loc(#loc224) | |
%43 = tt.splat %27 : !tt.ptr<bf16> -> tensor<128x1x!tt.ptr<bf16>> loc(#loc225) | |
%44 = tt.addptr %43, %35 : tensor<128x1x!tt.ptr<bf16>>, tensor<128x1xi32> loc(#loc225) | |
%45 = tt.broadcast %44 : tensor<128x1x!tt.ptr<bf16>> -> tensor<128x128x!tt.ptr<bf16>> loc(#loc226) | |
%46 = tt.addptr %45, %40 : tensor<128x128x!tt.ptr<bf16>>, tensor<128x128xi32> loc(#loc226) | |
%47 = tt.load %46 : tensor<128x128x!tt.ptr<bf16>> loc(#loc227) | |
%48 = tt.splat %30 : !tt.ptr<f32> -> tensor<128x!tt.ptr<f32>> loc(#loc44) | |
%49 = tt.addptr %48, %33 : tensor<128x!tt.ptr<f32>>, tensor<128xi32> loc(#loc44) | |
%50 = tt.load %49 : tensor<128x!tt.ptr<f32>> loc(#loc45) | |
%51 = tt.splat %29 : !tt.ptr<f32> -> tensor<128x!tt.ptr<f32>> loc(#loc46) | |
%52 = tt.addptr %51, %33 : tensor<128x!tt.ptr<f32>>, tensor<128xi32> loc(#loc46) | |
%53 = tt.load %52 : tensor<128x!tt.ptr<f32>> loc(#loc47) | |
%54 = arith.cmpf oeq, %53, %cst_12 : tensor<128xf32> loc(#loc48) | |
%55 = arith.select %54, %cst_11, %53 : tensor<128xi1>, tensor<128xf32> loc(#loc49) | |
%56 = tt.expand_dims %55 {axis = 1 : i32} : tensor<128xf32> -> tensor<128x1xf32> loc(#loc50) | |
%57 = tt.addptr %arg9, %18 : !tt.ptr<i32>, i32 loc(#loc51) | |
%58 = tt.load %57 : !tt.ptr<i32> loc(#loc52) | |
%59 = arith.muli %58, %c128_i32 : i32 loc(#loc53) | |
%60 = tt.addptr %arg8, %17 : !tt.ptr<i32>, i32 loc(#loc54) | |
%61 = tt.load %60 : !tt.ptr<i32> loc(#loc55) | |
%62 = tt.make_range {end = 64 : i32, start = 0 : i32} : tensor<64xi32> loc(#loc56) | |
%63 = tt.splat %59 : i32 -> tensor<64xi32> loc(#loc57) | |
%64 = arith.addi %63, %62 : tensor<64xi32> loc(#loc57) | |
%65 = tt.expand_dims %64 {axis = 0 : i32} : tensor<64xi32> -> tensor<1x64xi32> loc(#loc228) | |
%66 = arith.muli %65, %cst_0 : tensor<1x64xi32> loc(#loc229) | |
%67 = tt.splat %9 : !tt.ptr<bf16> -> tensor<1x64x!tt.ptr<bf16>> loc(#loc230) | |
%68 = tt.addptr %67, %66 : tensor<1x64x!tt.ptr<bf16>>, tensor<1x64xi32> loc(#loc230) | |
%69 = tt.expand_dims %12 {axis = 1 : i32} : tensor<128xi32> -> tensor<128x1xi32> loc(#loc231) | |
%70 = tt.broadcast %68 : tensor<1x64x!tt.ptr<bf16>> -> tensor<128x64x!tt.ptr<bf16>> loc(#loc232) | |
%71 = tt.broadcast %69 : tensor<128x1xi32> -> tensor<128x64xi32> loc(#loc232) | |
%72 = tt.addptr %70, %71 : tensor<128x64x!tt.ptr<bf16>>, tensor<128x64xi32> loc(#loc232) | |
%73 = tt.splat %10 : !tt.ptr<bf16> -> tensor<1x64x!tt.ptr<bf16>> loc(#loc233) | |
%74 = tt.addptr %73, %66 : tensor<1x64x!tt.ptr<bf16>>, tensor<1x64xi32> loc(#loc233) | |
%75 = tt.broadcast %74 : tensor<1x64x!tt.ptr<bf16>> -> tensor<128x64x!tt.ptr<bf16>> loc(#loc234) | |
%76 = tt.addptr %75, %71 : tensor<128x64x!tt.ptr<bf16>>, tensor<128x64xi32> loc(#loc234) | |
%77 = arith.muli %61, %c2_i32 : i32 loc(#loc235) | |
%78 = arith.minsi %77, %c1024_i32 : i32 loc(#loc236) | |
%79 = tt.broadcast %34 : tensor<128x1xi32> -> tensor<128x64xi32> loc(#loc391) | |
%80 = tt.splat %arg16 : !tt.ptr<i64> -> tensor<128x1x!tt.ptr<i64>> loc(#loc392) | |
%81 = tt.addptr %80, %34 : tensor<128x1x!tt.ptr<i64>>, tensor<128x1xi32> loc(#loc392) | |
%82 = tt.splat %arg16 : !tt.ptr<i64> -> tensor<1x64x!tt.ptr<i64>> loc(#loc393) | |
%83 = tt.broadcast %56 : tensor<128x1xf32> -> tensor<128x64xf32> loc(#loc394) | |
%84 = tt.expand_dims %50 {axis = 1 : i32} : tensor<128xf32> -> tensor<128x1xf32> loc(#loc395) | |
%85 = tt.broadcast %84 : tensor<128x1xf32> -> tensor<128x64xf32> loc(#loc396) | |
%86:4 = scf.for %arg18 = %c0_i32 to %78 step %c1_i32 iter_args(%arg19 = %cst_7, %arg20 = %72, %arg21 = %76, %arg22 = %64) -> (tensor<128x128xf32>, tensor<128x64x!tt.ptr<bf16>>, tensor<128x64x!tt.ptr<bf16>>, tensor<64xi32>) : i32 { | |
%114 = tt.load %arg20 : tensor<128x64x!tt.ptr<bf16>> loc(#loc551) | |
%115 = tt.dot %42, %114, %cst_6 : tensor<128x128xbf16> * tensor<128x64xbf16> -> tensor<128x64xf32> loc(#loc398) | |
%116 = arith.mulf %115, %cst_5 : tensor<128x64xf32> loc(#loc399) | |
%117 = tt.expand_dims %arg22 {axis = 0 : i32} : tensor<64xi32> -> tensor<1x64xi32> loc(#loc400) | |
%118 = tt.broadcast %117 : tensor<1x64xi32> -> tensor<128x64xi32> loc(#loc391) | |
%119 = arith.cmpi sge, %79, %118 : tensor<128x64xi32> loc(#loc391) | |
%120 = tt.load %81 : tensor<128x1x!tt.ptr<i64>> loc(#loc401) | |
%121 = tt.addptr %82, %117 : tensor<1x64x!tt.ptr<i64>>, tensor<1x64xi32> loc(#loc393) | |
%122 = tt.load %121 : tensor<1x64x!tt.ptr<i64>> loc(#loc402) | |
%123 = tt.broadcast %120 : tensor<128x1xi64> -> tensor<128x64xi64> loc(#loc403) | |
%124 = tt.broadcast %122 : tensor<1x64xi64> -> tensor<128x64xi64> loc(#loc403) | |
%125 = arith.cmpi eq, %123, %124 : tensor<128x64xi64> loc(#loc403) | |
%126 = arith.andi %119, %125 : tensor<128x64xi1> loc(#loc404) | |
%127 = arith.select %126, %116, %cst_4 : tensor<128x64xi1>, tensor<128x64xf32> loc(#loc405) | |
%128 = arith.mulf %127, %cst_3 : tensor<128x64xf32> loc(#loc406) | |
%129 = arith.subf %128, %83 : tensor<128x64xf32> loc(#loc394) | |
%130 = math.exp2 %129 : tensor<128x64xf32> loc(#loc407) | |
%131 = tt.load %arg21 : tensor<128x64x!tt.ptr<bf16>> loc(#loc552) | |
%132 = tt.dot %47, %131, %cst_6 : tensor<128x128xbf16> * tensor<128x64xbf16> -> tensor<128x64xf32> loc(#loc409) | |
%133 = arith.subf %132, %85 : tensor<128x64xf32> loc(#loc396) | |
%134 = arith.mulf %130, %133 : tensor<128x64xf32> loc(#loc410) | |
%135 = arith.select %126, %134, %cst_6 : tensor<128x64xi1>, tensor<128x64xf32> loc(#loc411) | |
%136 = arith.truncf %135 : tensor<128x64xf32> to tensor<128x64xbf16> loc(#loc412) | |
%137 = tt.trans %114 {order = array<i32: 1, 0>} : tensor<128x64xbf16> -> tensor<64x128xbf16> loc(#loc413) | |
%138 = tt.dot %136, %137, %arg19 : tensor<128x64xbf16> * tensor<64x128xbf16> -> tensor<128x128xf32> loc(#loc414) | |
%139 = arith.divsi %arg18, %c2_i32 : i32 loc(#loc415) | |
%140 = tt.addptr %57, %139 : !tt.ptr<i32>, i32 loc(#loc416) | |
%141 = tt.load %140 evictionPolicy = evict_last : !tt.ptr<i32> loc(#loc417) | |
%142 = arith.addi %139, %c1_i32 : i32 loc(#loc418) | |
%143 = arith.cmpi slt, %142, %61 : i32 loc(#loc419) | |
%144 = tt.addptr %140, %c1_i32 : !tt.ptr<i32>, i32 loc(#loc420) | |
%145 = tt.load %144, %143 evictionPolicy = evict_last : !tt.ptr<i32> loc(#loc421) | |
%146 = arith.addi %arg18, %c1_i32 : i32 loc(#loc422) | |
%147 = arith.remsi %146, %c2_i32 : i32 loc(#loc423) | |
%148 = arith.cmpi eq, %147, %c0_i32 : i32 loc(#loc424) | |
%149 = arith.subi %145, %141 : i32 loc(#loc425) | |
%150 = arith.muli %149, %c128_i32 : i32 loc(#loc426) | |
%151 = arith.subi %150, %c64_i32 : i32 loc(#loc427) | |
%152 = arith.extui %148 : i1 to i32 loc(#loc428) | |
%153 = arith.muli %151, %152 : i32 loc(#loc428) | |
%154 = arith.subi %c1_i32, %152 : i32 loc(#loc429) | |
%155 = arith.muli %154, %c64_i32 : i32 loc(#loc430) | |
%156 = arith.addi %153, %155 : i32 loc(#loc431) | |
%157 = arith.muli %156, %c1024_i32 : i32 loc(#loc279) | |
%158 = tt.splat %157 : i32 -> tensor<128x64xi32> loc(#loc280) | |
%159 = tt.addptr %arg20, %158 : tensor<128x64x!tt.ptr<bf16>>, tensor<128x64xi32> loc(#loc280) | |
%160 = tt.addptr %arg21, %158 : tensor<128x64x!tt.ptr<bf16>>, tensor<128x64xi32> loc(#loc281) | |
%161 = tt.splat %156 : i32 -> tensor<64xi32> loc(#loc282) | |
%162 = arith.addi %arg22, %161 : tensor<64xi32> loc(#loc282) | |
scf.yield %138, %159, %160, %162 : tensor<128x128xf32>, tensor<128x64x!tt.ptr<bf16>>, tensor<128x64x!tt.ptr<bf16>>, tensor<64xi32> loc(#loc283) | |
} loc(#loc243) | |
%87 = tt.addptr %arg13, %18 : !tt.ptr<i32>, i32 loc(#loc117) | |
%88 = tt.load %87 : !tt.ptr<i32> loc(#loc118) | |
%89 = arith.muli %88, %c128_i32 : i32 loc(#loc119) | |
%90 = tt.addptr %arg12, %17 : !tt.ptr<i32>, i32 loc(#loc120) | |
%91 = tt.load %90 : !tt.ptr<i32> loc(#loc121) | |
%92 = tt.splat %89 : i32 -> tensor<64xi32> loc(#loc122) | |
%93 = arith.addi %92, %62 : tensor<64xi32> loc(#loc122) | |
%94 = tt.expand_dims %93 {axis = 0 : i32} : tensor<64xi32> -> tensor<1x64xi32> loc(#loc284) | |
%95 = arith.muli %94, %cst_0 : tensor<1x64xi32> loc(#loc285) | |
%96 = tt.addptr %67, %95 : tensor<1x64x!tt.ptr<bf16>>, tensor<1x64xi32> loc(#loc286) | |
%97 = tt.broadcast %96 : tensor<1x64x!tt.ptr<bf16>> -> tensor<128x64x!tt.ptr<bf16>> loc(#loc287) | |
%98 = tt.addptr %97, %71 : tensor<128x64x!tt.ptr<bf16>>, tensor<128x64xi32> loc(#loc287) | |
%99 = tt.addptr %73, %95 : tensor<1x64x!tt.ptr<bf16>>, tensor<1x64xi32> loc(#loc288) | |
%100 = tt.broadcast %99 : tensor<1x64x!tt.ptr<bf16>> -> tensor<128x64x!tt.ptr<bf16>> loc(#loc289) | |
%101 = tt.addptr %100, %71 : tensor<128x64x!tt.ptr<bf16>>, tensor<128x64xi32> loc(#loc289) | |
%102 = arith.muli %91, %c2_i32 : i32 loc(#loc290) | |
%103 = arith.minsi %102, %c1024_i32 : i32 loc(#loc291) | |
%104 = tt.broadcast %56 : tensor<128x1xf32> -> tensor<128x64xf32> loc(#loc432) | |
%105 = tt.expand_dims %50 {axis = 1 : i32} : tensor<128xf32> -> tensor<128x1xf32> loc(#loc433) | |
%106 = tt.broadcast %105 : tensor<128x1xf32> -> tensor<128x64xf32> loc(#loc434) | |
%107:4 = scf.for %arg18 = %c0_i32 to %103 step %c1_i32 iter_args(%arg19 = %86#0, %arg20 = %98, %arg21 = %101, %arg22 = %93) -> (tensor<128x128xf32>, tensor<128x64x!tt.ptr<bf16>>, tensor<128x64x!tt.ptr<bf16>>, tensor<64xi32>) : i32 { | |
%114 = tt.load %arg20 : tensor<128x64x!tt.ptr<bf16>> loc(#loc553) | |
%115 = tt.dot %42, %114, %cst_6 : tensor<128x128xbf16> * tensor<128x64xbf16> -> tensor<128x64xf32> loc(#loc435) | |
%116 = arith.mulf %115, %cst_5 : tensor<128x64xf32> loc(#loc436) | |
%117 = arith.mulf %116, %cst_3 : tensor<128x64xf32> loc(#loc437) | |
%118 = arith.subf %117, %104 : tensor<128x64xf32> loc(#loc432) | |
%119 = math.exp2 %118 : tensor<128x64xf32> loc(#loc438) | |
%120 = tt.load %arg21 : tensor<128x64x!tt.ptr<bf16>> loc(#loc554) | |
%121 = tt.dot %47, %120, %cst_6 : tensor<128x128xbf16> * tensor<128x64xbf16> -> tensor<128x64xf32> loc(#loc439) | |
%122 = arith.subf %121, %106 : tensor<128x64xf32> loc(#loc434) | |
%123 = arith.mulf %119, %122 : tensor<128x64xf32> loc(#loc440) | |
%124 = arith.truncf %123 : tensor<128x64xf32> to tensor<128x64xbf16> loc(#loc441) | |
%125 = tt.trans %114 {order = array<i32: 1, 0>} : tensor<128x64xbf16> -> tensor<64x128xbf16> loc(#loc442) | |
%126 = tt.dot %124, %125, %arg19 : tensor<128x64xbf16> * tensor<64x128xbf16> -> tensor<128x128xf32> loc(#loc443) | |
%127 = arith.divsi %arg18, %c2_i32 : i32 loc(#loc444) | |
%128 = tt.addptr %87, %127 : !tt.ptr<i32>, i32 loc(#loc445) | |
%129 = tt.load %128 evictionPolicy = evict_last : !tt.ptr<i32> loc(#loc446) | |
%130 = arith.addi %127, %c1_i32 : i32 loc(#loc447) | |
%131 = arith.cmpi slt, %130, %91 : i32 loc(#loc448) | |
%132 = tt.addptr %128, %c1_i32 : !tt.ptr<i32>, i32 loc(#loc449) | |
%133 = tt.load %132, %131 evictionPolicy = evict_last : !tt.ptr<i32> loc(#loc450) | |
%134 = arith.addi %arg18, %c1_i32 : i32 loc(#loc451) | |
%135 = arith.remsi %134, %c2_i32 : i32 loc(#loc452) | |
%136 = arith.cmpi eq, %135, %c0_i32 : i32 loc(#loc453) | |
%137 = arith.subi %133, %129 : i32 loc(#loc454) | |
%138 = arith.muli %137, %c128_i32 : i32 loc(#loc455) | |
%139 = arith.subi %138, %c64_i32 : i32 loc(#loc456) | |
%140 = arith.extui %136 : i1 to i32 loc(#loc457) | |
%141 = arith.muli %139, %140 : i32 loc(#loc457) | |
%142 = arith.subi %c1_i32, %140 : i32 loc(#loc458) | |
%143 = arith.muli %142, %c64_i32 : i32 loc(#loc459) | |
%144 = arith.addi %141, %143 : i32 loc(#loc460) | |
%145 = arith.muli %144, %c1024_i32 : i32 loc(#loc293) | |
%146 = tt.splat %145 : i32 -> tensor<128x64xi32> loc(#loc294) | |
%147 = tt.addptr %arg20, %146 : tensor<128x64x!tt.ptr<bf16>>, tensor<128x64xi32> loc(#loc294) | |
%148 = tt.addptr %arg21, %146 : tensor<128x64x!tt.ptr<bf16>>, tensor<128x64xi32> loc(#loc295) | |
%149 = tt.splat %144 : i32 -> tensor<64xi32> loc(#loc296) | |
%150 = arith.addi %arg22, %149 : tensor<64xi32> loc(#loc296) | |
scf.yield %126, %147, %148, %150 : tensor<128x128xf32>, tensor<128x64x!tt.ptr<bf16>>, tensor<128x64x!tt.ptr<bf16>>, tensor<64xi32> loc(#loc297) | |
} loc(#loc292) | |
%108 = tt.splat %28 : !tt.ptr<bf16> -> tensor<128x1x!tt.ptr<bf16>> loc(#loc124) | |
%109 = tt.addptr %108, %35 : tensor<128x1x!tt.ptr<bf16>>, tensor<128x1xi32> loc(#loc124) | |
%110 = tt.broadcast %109 : tensor<128x1x!tt.ptr<bf16>> -> tensor<128x128x!tt.ptr<bf16>> loc(#loc125) | |
%111 = tt.addptr %110, %40 : tensor<128x128x!tt.ptr<bf16>>, tensor<128x128xi32> loc(#loc125) | |
%112 = arith.mulf %107#0, %cst_9 : tensor<128x128xf32> loc(#loc126) | |
%113 = arith.truncf %112 : tensor<128x128xf32> to tensor<128x128xbf16> loc(#loc127) | |
tt.store %111, %113 : tensor<128x128x!tt.ptr<bf16>> loc(#loc127) | |
} else { | |
%14 = arith.muli %0, %c128_i32 : i32 loc(#loc128) | |
%15 = tt.splat %14 : i32 -> tensor<128xi32> loc(#loc129) | |
%16 = arith.addi %15, %12 : tensor<128xi32> loc(#loc129) | |
%17 = tt.expand_dims %16 {axis = 1 : i32} : tensor<128xi32> -> tensor<128x1xi32> loc(#loc298) | |
%18 = arith.muli %17, %cst_10 : tensor<128x1xi32> loc(#loc299) | |
%19 = tt.splat %9 : !tt.ptr<bf16> -> tensor<128x1x!tt.ptr<bf16>> loc(#loc300) | |
%20 = tt.addptr %19, %18 : tensor<128x1x!tt.ptr<bf16>>, tensor<128x1xi32> loc(#loc300) | |
%21 = tt.expand_dims %12 {axis = 0 : i32} : tensor<128xi32> -> tensor<1x128xi32> loc(#loc301) | |
%22 = tt.broadcast %20 : tensor<128x1x!tt.ptr<bf16>> -> tensor<128x128x!tt.ptr<bf16>> loc(#loc302) | |
%23 = tt.broadcast %21 : tensor<1x128xi32> -> tensor<128x128xi32> loc(#loc302) | |
%24 = tt.addptr %22, %23 : tensor<128x128x!tt.ptr<bf16>>, tensor<128x128xi32> loc(#loc302) | |
%25 = tt.load %24 : tensor<128x128x!tt.ptr<bf16>> loc(#loc303) | |
%26 = tt.splat %10 : !tt.ptr<bf16> -> tensor<128x1x!tt.ptr<bf16>> loc(#loc304) | |
%27 = tt.addptr %26, %18 : tensor<128x1x!tt.ptr<bf16>>, tensor<128x1xi32> loc(#loc304) | |
%28 = tt.broadcast %27 : tensor<128x1x!tt.ptr<bf16>> -> tensor<128x128x!tt.ptr<bf16>> loc(#loc305) | |
%29 = tt.addptr %28, %23 : tensor<128x128x!tt.ptr<bf16>>, tensor<128x128xi32> loc(#loc305) | |
%30 = tt.load %29 : tensor<128x128x!tt.ptr<bf16>> loc(#loc306) | |
%31 = arith.muli %2, %c8_i32 : i32 loc(#loc132) | |
%32 = arith.addi %31, %3 : i32 loc(#loc133) | |
%33 = arith.muli %32, %c65536_i32 : i32 loc(#loc134) | |
%34 = arith.extsi %33 : i32 to i64 loc(#loc135) | |
%35 = tt.addptr %arg0, %8 : !tt.ptr<bf16>, i64 loc(#loc136) | |
%36 = tt.addptr %arg5, %8 : !tt.ptr<bf16>, i64 loc(#loc137) | |
%37 = tt.addptr %arg3, %34 : !tt.ptr<f32>, i64 loc(#loc138) | |
%38 = tt.addptr %arg4, %34 : !tt.ptr<f32>, i64 loc(#loc139) | |
%39 = arith.muli %0, %c512_i32 : i32 loc(#loc140) | |
%40 = tt.addptr %arg11, %39 : !tt.ptr<i32>, i32 loc(#loc141) | |
%41 = tt.load %40 : !tt.ptr<i32> loc(#loc142) | |
%42 = arith.muli %41, %c128_i32 : i32 loc(#loc143) | |
%43 = tt.addptr %arg10, %0 : !tt.ptr<i32>, i32 loc(#loc144) | |
%44 = tt.load %43 : !tt.ptr<i32> loc(#loc145) | |
%45 = tt.make_range {end = 64 : i32, start = 0 : i32} : tensor<64xi32> loc(#loc146) | |
%46 = tt.splat %42 : i32 -> tensor<64xi32> loc(#loc147) | |
%47 = arith.addi %46, %45 : tensor<64xi32> loc(#loc147) | |
%48 = tt.expand_dims %47 {axis = 0 : i32} : tensor<64xi32> -> tensor<1x64xi32> loc(#loc307) | |
%49 = arith.muli %48, %cst_0 : tensor<1x64xi32> loc(#loc308) | |
%50 = tt.splat %35 : !tt.ptr<bf16> -> tensor<1x64x!tt.ptr<bf16>> loc(#loc309) | |
%51 = tt.addptr %50, %49 : tensor<1x64x!tt.ptr<bf16>>, tensor<1x64xi32> loc(#loc309) | |
%52 = tt.expand_dims %12 {axis = 1 : i32} : tensor<128xi32> -> tensor<128x1xi32> loc(#loc310) | |
%53 = tt.broadcast %51 : tensor<1x64x!tt.ptr<bf16>> -> tensor<128x64x!tt.ptr<bf16>> loc(#loc311) | |
%54 = tt.broadcast %52 : tensor<128x1xi32> -> tensor<128x64xi32> loc(#loc311) | |
%55 = tt.addptr %53, %54 : tensor<128x64x!tt.ptr<bf16>>, tensor<128x64xi32> loc(#loc311) | |
%56 = tt.expand_dims %47 {axis = 1 : i32} : tensor<64xi32> -> tensor<64x1xi32> loc(#loc312) | |
%57 = arith.muli %56, %cst : tensor<64x1xi32> loc(#loc313) | |
%58 = tt.splat %36 : !tt.ptr<bf16> -> tensor<64x1x!tt.ptr<bf16>> loc(#loc314) | |
%59 = tt.addptr %58, %57 : tensor<64x1x!tt.ptr<bf16>>, tensor<64x1xi32> loc(#loc314) | |
%60 = tt.broadcast %59 : tensor<64x1x!tt.ptr<bf16>> -> tensor<64x128x!tt.ptr<bf16>> loc(#loc315) | |
%61 = tt.broadcast %21 : tensor<1x128xi32> -> tensor<64x128xi32> loc(#loc315) | |
%62 = tt.addptr %60, %61 : tensor<64x128x!tt.ptr<bf16>>, tensor<64x128xi32> loc(#loc315) | |
%63 = arith.muli %44, %c2_i32 : i32 loc(#loc316) | |
%64 = arith.minsi %63, %c1024_i32 : i32 loc(#loc317) | |
%65 = tt.splat %37 : !tt.ptr<f32> -> tensor<64x!tt.ptr<f32>> loc(#loc461) | |
%66 = tt.broadcast %17 : tensor<128x1xi32> -> tensor<128x64xi32> loc(#loc462) | |
%67 = tt.splat %arg16 : !tt.ptr<i64> -> tensor<1x64x!tt.ptr<i64>> loc(#loc463) | |
%68 = tt.splat %arg16 : !tt.ptr<i64> -> tensor<128x1x!tt.ptr<i64>> loc(#loc464) | |
%69 = tt.addptr %68, %17 : tensor<128x1x!tt.ptr<i64>>, tensor<128x1xi32> loc(#loc464) | |
%70 = tt.splat %38 : !tt.ptr<f32> -> tensor<64x!tt.ptr<f32>> loc(#loc465) | |
%71:5 = scf.for %arg18 = %c0_i32 to %64 step %c1_i32 iter_args(%arg19 = %cst_7, %arg20 = %cst_7, %arg21 = %55, %arg22 = %62, %arg23 = %47) -> (tensor<128x128xf32>, tensor<128x128xf32>, tensor<128x64x!tt.ptr<bf16>>, tensor<64x128x!tt.ptr<bf16>>, tensor<64xi32>) : i32 { | |
%110 = tt.load %arg21 : tensor<128x64x!tt.ptr<bf16>> loc(#loc555) | |
%111 = tt.addptr %65, %arg23 : tensor<64x!tt.ptr<f32>>, tensor<64xi32> loc(#loc461) | |
%112 = tt.load %111 : tensor<64x!tt.ptr<f32>> loc(#loc467) | |
%113 = arith.cmpf oeq, %112, %cst_2 : tensor<64xf32> loc(#loc468) | |
%114 = arith.select %113, %cst_1, %112 : tensor<64xi1>, tensor<64xf32> loc(#loc469) | |
%115 = tt.dot %25, %110, %cst_6 : tensor<128x128xbf16> * tensor<128x64xbf16> -> tensor<128x64xf32> loc(#loc470) | |
%116 = arith.mulf %115, %cst_5 : tensor<128x64xf32> loc(#loc471) | |
%117 = tt.expand_dims %arg23 {axis = 0 : i32} : tensor<64xi32> -> tensor<1x64xi32> loc(#loc472) | |
%118 = tt.broadcast %117 : tensor<1x64xi32> -> tensor<128x64xi32> loc(#loc462) | |
%119 = arith.cmpi sge, %118, %66 : tensor<128x64xi32> loc(#loc462) | |
%120 = tt.addptr %67, %117 : tensor<1x64x!tt.ptr<i64>>, tensor<1x64xi32> loc(#loc463) | |
%121 = tt.load %120 : tensor<1x64x!tt.ptr<i64>> loc(#loc473) | |
%122 = tt.load %69 : tensor<128x1x!tt.ptr<i64>> loc(#loc474) | |
%123 = tt.broadcast %121 : tensor<1x64xi64> -> tensor<128x64xi64> loc(#loc475) | |
%124 = tt.broadcast %122 : tensor<128x1xi64> -> tensor<128x64xi64> loc(#loc475) | |
%125 = arith.cmpi eq, %123, %124 : tensor<128x64xi64> loc(#loc475) | |
%126 = arith.andi %119, %125 : tensor<128x64xi1> loc(#loc476) | |
%127 = arith.select %126, %116, %cst_4 : tensor<128x64xi1>, tensor<128x64xf32> loc(#loc477) | |
%128 = arith.mulf %127, %cst_3 : tensor<128x64xf32> loc(#loc478) | |
%129 = tt.expand_dims %114 {axis = 0 : i32} : tensor<64xf32> -> tensor<1x64xf32> loc(#loc479) | |
%130 = tt.broadcast %129 : tensor<1x64xf32> -> tensor<128x64xf32> loc(#loc480) | |
%131 = arith.subf %128, %130 : tensor<128x64xf32> loc(#loc480) | |
%132 = math.exp2 %131 : tensor<128x64xf32> loc(#loc481) | |
%133 = tt.load %arg22 : tensor<64x128x!tt.ptr<bf16>> loc(#loc556) | |
%134 = arith.truncf %132 : tensor<128x64xf32> to tensor<128x64xbf16> loc(#loc483) | |
%135 = tt.dot %134, %133, %arg20 : tensor<128x64xbf16> * tensor<64x128xbf16> -> tensor<128x128xf32> loc(#loc484) | |
%136 = tt.addptr %70, %arg23 : tensor<64x!tt.ptr<f32>>, tensor<64xi32> loc(#loc465) | |
%137 = tt.load %136 : tensor<64x!tt.ptr<f32>> loc(#loc485) | |
%138 = tt.trans %133 {order = array<i32: 1, 0>} : tensor<64x128xbf16> -> tensor<128x64xbf16> loc(#loc486) | |
%139 = tt.dot %30, %138, %cst_6 : tensor<128x128xbf16> * tensor<128x64xbf16> -> tensor<128x64xf32> loc(#loc487) | |
%140 = tt.expand_dims %137 {axis = 0 : i32} : tensor<64xf32> -> tensor<1x64xf32> loc(#loc488) | |
%141 = tt.broadcast %140 : tensor<1x64xf32> -> tensor<128x64xf32> loc(#loc489) | |
%142 = arith.subf %139, %141 : tensor<128x64xf32> loc(#loc489) | |
%143 = arith.mulf %132, %142 : tensor<128x64xf32> loc(#loc490) | |
%144 = arith.select %126, %143, %cst_6 : tensor<128x64xi1>, tensor<128x64xf32> loc(#loc491) | |
%145 = arith.truncf %144 : tensor<128x64xf32> to tensor<128x64xbf16> loc(#loc492) | |
%146 = tt.trans %110 {order = array<i32: 1, 0>} : tensor<128x64xbf16> -> tensor<64x128xbf16> loc(#loc493) | |
%147 = tt.dot %145, %146, %arg19 : tensor<128x64xbf16> * tensor<64x128xbf16> -> tensor<128x128xf32> loc(#loc494) | |
%148 = arith.divsi %arg18, %c2_i32 : i32 loc(#loc495) | |
%149 = tt.addptr %40, %148 : !tt.ptr<i32>, i32 loc(#loc496) | |
%150 = tt.load %149 evictionPolicy = evict_last : !tt.ptr<i32> loc(#loc497) | |
%151 = arith.addi %148, %c1_i32 : i32 loc(#loc498) | |
%152 = arith.cmpi slt, %151, %44 : i32 loc(#loc499) | |
%153 = tt.addptr %149, %c1_i32 : !tt.ptr<i32>, i32 loc(#loc500) | |
%154 = tt.load %153, %152 evictionPolicy = evict_last : !tt.ptr<i32> loc(#loc501) | |
%155 = arith.addi %arg18, %c1_i32 : i32 loc(#loc502) | |
%156 = arith.remsi %155, %c2_i32 : i32 loc(#loc503) | |
%157 = arith.cmpi eq, %156, %c0_i32 : i32 loc(#loc504) | |
%158 = arith.subi %154, %150 : i32 loc(#loc505) | |
%159 = arith.muli %158, %c128_i32 : i32 loc(#loc506) | |
%160 = arith.subi %159, %c64_i32 : i32 loc(#loc507) | |
%161 = arith.extui %157 : i1 to i32 loc(#loc508) | |
%162 = arith.muli %160, %161 : i32 loc(#loc508) | |
%163 = arith.subi %c1_i32, %161 : i32 loc(#loc509) | |
%164 = arith.muli %163, %c64_i32 : i32 loc(#loc510) | |
%165 = arith.addi %162, %164 : i32 loc(#loc511) | |
%166 = arith.muli %165, %c1024_i32 : i32 loc(#loc370) | |
%167 = tt.splat %166 : i32 -> tensor<128x64xi32> loc(#loc371) | |
%168 = tt.addptr %arg21, %167 : tensor<128x64x!tt.ptr<bf16>>, tensor<128x64xi32> loc(#loc371) | |
%169 = tt.splat %166 : i32 -> tensor<64x128xi32> loc(#loc372) | |
%170 = tt.addptr %arg22, %169 : tensor<64x128x!tt.ptr<bf16>>, tensor<64x128xi32> loc(#loc372) | |
%171 = tt.splat %165 : i32 -> tensor<64xi32> loc(#loc373) | |
%172 = arith.addi %arg23, %171 : tensor<64xi32> loc(#loc373) | |
scf.yield %147, %135, %168, %170, %172 : tensor<128x128xf32>, tensor<128x128xf32>, tensor<128x64x!tt.ptr<bf16>>, tensor<64x128x!tt.ptr<bf16>>, tensor<64xi32> loc(#loc374) | |
} loc(#loc323) | |
%72 = tt.addptr %arg15, %39 : !tt.ptr<i32>, i32 loc(#loc202) | |
%73 = tt.load %72 : !tt.ptr<i32> loc(#loc203) | |
%74 = arith.muli %73, %c128_i32 : i32 loc(#loc204) | |
%75 = tt.addptr %arg14, %0 : !tt.ptr<i32>, i32 loc(#loc205) | |
%76 = tt.load %75 : !tt.ptr<i32> loc(#loc206) | |
%77 = tt.splat %74 : i32 -> tensor<64xi32> loc(#loc207) | |
%78 = arith.addi %77, %45 : tensor<64xi32> loc(#loc207) | |
%79 = tt.expand_dims %78 {axis = 0 : i32} : tensor<64xi32> -> tensor<1x64xi32> loc(#loc375) | |
%80 = arith.muli %79, %cst_0 : tensor<1x64xi32> loc(#loc376) | |
%81 = tt.addptr %50, %80 : tensor<1x64x!tt.ptr<bf16>>, tensor<1x64xi32> loc(#loc377) | |
%82 = tt.broadcast %81 : tensor<1x64x!tt.ptr<bf16>> -> tensor<128x64x!tt.ptr<bf16>> loc(#loc378) | |
%83 = tt.addptr %82, %54 : tensor<128x64x!tt.ptr<bf16>>, tensor<128x64xi32> loc(#loc378) | |
%84 = tt.expand_dims %78 {axis = 1 : i32} : tensor<64xi32> -> tensor<64x1xi32> loc(#loc379) | |
%85 = arith.muli %84, %cst : tensor<64x1xi32> loc(#loc380) | |
%86 = tt.addptr %58, %85 : tensor<64x1x!tt.ptr<bf16>>, tensor<64x1xi32> loc(#loc381) | |
%87 = tt.broadcast %86 : tensor<64x1x!tt.ptr<bf16>> -> tensor<64x128x!tt.ptr<bf16>> loc(#loc382) | |
%88 = tt.addptr %87, %61 : tensor<64x128x!tt.ptr<bf16>>, tensor<64x128xi32> loc(#loc382) | |
%89 = arith.muli %76, %c2_i32 : i32 loc(#loc383) | |
%90 = arith.minsi %89, %c1024_i32 : i32 loc(#loc384) | |
%91 = tt.splat %37 : !tt.ptr<f32> -> tensor<64x!tt.ptr<f32>> loc(#loc512) | |
%92 = tt.splat %38 : !tt.ptr<f32> -> tensor<64x!tt.ptr<f32>> loc(#loc513) | |
%93:5 = scf.for %arg18 = %c0_i32 to %90 step %c1_i32 iter_args(%arg19 = %71#0, %arg20 = %71#1, %arg21 = %83, %arg22 = %88, %arg23 = %78) -> (tensor<128x128xf32>, tensor<128x128xf32>, tensor<128x64x!tt.ptr<bf16>>, tensor<64x128x!tt.ptr<bf16>>, tensor<64xi32>) : i32 { | |
%110 = tt.load %arg21 : tensor<128x64x!tt.ptr<bf16>> loc(#loc557) | |
%111 = tt.addptr %91, %arg23 : tensor<64x!tt.ptr<f32>>, tensor<64xi32> loc(#loc512) | |
%112 = tt.load %111 : tensor<64x!tt.ptr<f32>> loc(#loc514) | |
%113 = arith.cmpf oeq, %112, %cst_2 : tensor<64xf32> loc(#loc515) | |
%114 = arith.select %113, %cst_1, %112 : tensor<64xi1>, tensor<64xf32> loc(#loc516) | |
%115 = tt.dot %25, %110, %cst_6 : tensor<128x128xbf16> * tensor<128x64xbf16> -> tensor<128x64xf32> loc(#loc517) | |
%116 = arith.mulf %115, %cst_5 : tensor<128x64xf32> loc(#loc518) | |
%117 = arith.mulf %116, %cst_3 : tensor<128x64xf32> loc(#loc519) | |
%118 = tt.expand_dims %114 {axis = 0 : i32} : tensor<64xf32> -> tensor<1x64xf32> loc(#loc520) | |
%119 = tt.broadcast %118 : tensor<1x64xf32> -> tensor<128x64xf32> loc(#loc521) | |
%120 = arith.subf %117, %119 : tensor<128x64xf32> loc(#loc521) | |
%121 = math.exp2 %120 : tensor<128x64xf32> loc(#loc522) | |
%122 = tt.load %arg22 : tensor<64x128x!tt.ptr<bf16>> loc(#loc558) | |
%123 = arith.truncf %121 : tensor<128x64xf32> to tensor<128x64xbf16> loc(#loc523) | |
%124 = tt.dot %123, %122, %arg20 : tensor<128x64xbf16> * tensor<64x128xbf16> -> tensor<128x128xf32> loc(#loc524) | |
%125 = tt.addptr %92, %arg23 : tensor<64x!tt.ptr<f32>>, tensor<64xi32> loc(#loc513) | |
%126 = tt.load %125 : tensor<64x!tt.ptr<f32>> loc(#loc525) | |
%127 = tt.trans %122 {order = array<i32: 1, 0>} : tensor<64x128xbf16> -> tensor<128x64xbf16> loc(#loc526) | |
%128 = tt.dot %30, %127, %cst_6 : tensor<128x128xbf16> * tensor<128x64xbf16> -> tensor<128x64xf32> loc(#loc527) | |
%129 = tt.expand_dims %126 {axis = 0 : i32} : tensor<64xf32> -> tensor<1x64xf32> loc(#loc528) | |
%130 = tt.broadcast %129 : tensor<1x64xf32> -> tensor<128x64xf32> loc(#loc529) | |
%131 = arith.subf %128, %130 : tensor<128x64xf32> loc(#loc529) | |
%132 = arith.mulf %121, %131 : tensor<128x64xf32> loc(#loc530) | |
%133 = arith.truncf %132 : tensor<128x64xf32> to tensor<128x64xbf16> loc(#loc531) | |
%134 = tt.trans %110 {order = array<i32: 1, 0>} : tensor<128x64xbf16> -> tensor<64x128xbf16> loc(#loc532) | |
%135 = tt.dot %133, %134, %arg19 : tensor<128x64xbf16> * tensor<64x128xbf16> -> tensor<128x128xf32> loc(#loc533) | |
%136 = arith.divsi %arg18, %c2_i32 : i32 loc(#loc534) | |
%137 = tt.addptr %72, %136 : !tt.ptr<i32>, i32 loc(#loc535) | |
%138 = tt.load %137 evictionPolicy = evict_last : !tt.ptr<i32> loc(#loc536) | |
%139 = arith.addi %136, %c1_i32 : i32 loc(#loc537) | |
%140 = arith.cmpi slt, %139, %76 : i32 loc(#loc538) | |
%141 = tt.addptr %137, %c1_i32 : !tt.ptr<i32>, i32 loc(#loc539) | |
%142 = tt.load %141, %140 evictionPolicy = evict_last : !tt.ptr<i32> loc(#loc540) | |
%143 = arith.addi %arg18, %c1_i32 : i32 loc(#loc541) | |
%144 = arith.remsi %143, %c2_i32 : i32 loc(#loc542) | |
%145 = arith.cmpi eq, %144, %c0_i32 : i32 loc(#loc543) | |
%146 = arith.subi %142, %138 : i32 loc(#loc544) | |
%147 = arith.muli %146, %c128_i32 : i32 loc(#loc545) | |
%148 = arith.subi %147, %c64_i32 : i32 loc(#loc546) | |
%149 = arith.extui %145 : i1 to i32 loc(#loc547) | |
%150 = arith.muli %148, %149 : i32 loc(#loc547) | |
%151 = arith.subi %c1_i32, %149 : i32 loc(#loc548) | |
%152 = arith.muli %151, %c64_i32 : i32 loc(#loc549) | |
%153 = arith.addi %150, %152 : i32 loc(#loc550) | |
%154 = arith.muli %153, %c1024_i32 : i32 loc(#loc386) | |
%155 = tt.splat %154 : i32 -> tensor<128x64xi32> loc(#loc387) | |
%156 = tt.addptr %arg21, %155 : tensor<128x64x!tt.ptr<bf16>>, tensor<128x64xi32> loc(#loc387) | |
%157 = tt.splat %154 : i32 -> tensor<64x128xi32> loc(#loc388) | |
%158 = tt.addptr %arg22, %157 : tensor<64x128x!tt.ptr<bf16>>, tensor<64x128xi32> loc(#loc388) | |
%159 = tt.splat %153 : i32 -> tensor<64xi32> loc(#loc389) | |
%160 = arith.addi %arg23, %159 : tensor<64xi32> loc(#loc389) | |
scf.yield %135, %124, %156, %158, %160 : tensor<128x128xf32>, tensor<128x128xf32>, tensor<128x64x!tt.ptr<bf16>>, tensor<64x128x!tt.ptr<bf16>>, tensor<64xi32> loc(#loc390) | |
} loc(#loc385) | |
%94 = tt.splat %11 : !tt.ptr<bf16> -> tensor<128x1x!tt.ptr<bf16>> loc(#loc209) | |
%95 = tt.addptr %94, %18 : tensor<128x1x!tt.ptr<bf16>>, tensor<128x1xi32> loc(#loc209) | |
%96 = tt.broadcast %95 : tensor<128x1x!tt.ptr<bf16>> -> tensor<128x128x!tt.ptr<bf16>> loc(#loc210) | |
%97 = tt.addptr %96, %23 : tensor<128x128x!tt.ptr<bf16>>, tensor<128x128xi32> loc(#loc210) | |
%98 = arith.truncf %93#1 : tensor<128x128xf32> to tensor<128x128xbf16> loc(#loc211) | |
tt.store %97, %98 : tensor<128x128x!tt.ptr<bf16>> loc(#loc211) | |
%99 = arith.mulf %93#0, %cst_9 : tensor<128x128xf32> loc(#loc212) | |
%100 = arith.cmpi slt, %17, %cst_8 : tensor<128x1xi32> loc(#loc213) | |
%101 = tt.splat %4 : i32 -> tensor<1x128xi32> loc(#loc214) | |
%102 = arith.addi %21, %101 : tensor<1x128xi32> loc(#loc214) | |
%103 = tt.broadcast %102 : tensor<1x128xi32> -> tensor<128x128xi32> loc(#loc215) | |
%104 = tt.broadcast %18 : tensor<128x1xi32> -> tensor<128x128xi32> loc(#loc215) | |
%105 = arith.addi %103, %104 : tensor<128x128xi32> loc(#loc215) | |
%106 = tt.splat %arg17 : !tt.ptr<bf16> -> tensor<128x128x!tt.ptr<bf16>> loc(#loc216) | |
%107 = tt.addptr %106, %105 : tensor<128x128x!tt.ptr<bf16>>, tensor<128x128xi32> loc(#loc216) | |
%108 = tt.broadcast %100 : tensor<128x1xi1> -> tensor<128x128xi1> loc(#loc217) | |
%109 = arith.truncf %99 : tensor<128x128xf32> to tensor<128x128xbf16> loc(#loc217) | |
tt.store %107, %109, %108 : tensor<128x128x!tt.ptr<bf16>> loc(#loc217) | |
} loc(#loc16) | |
tt.return loc(#loc218) | |
} loc(#loc) | |
} loc(#loc) | |
#loc1 = loc(unknown) | |
#loc2 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":108:24) | |
#loc3 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":112:27) | |
#loc4 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":113:23) | |
#loc5 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":114:23) | |
#loc6 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":122:25) | |
#loc7 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":122:59) | |
#loc8 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":126:50) | |
#loc9 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":126:37) | |
#loc10 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":126:61) | |
#loc11 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":129:9) | |
#loc12 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":130:9) | |
#loc13 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":131:10) | |
#loc14 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":134:26) | |
#loc15 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":137:14) | |
#loc16 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":137:7) | |
#loc17 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":138:24) | |
#loc18 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":142:29) | |
#loc19 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":142:44) | |
#loc20 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":143:35) | |
#loc21 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":153:83) | |
#loc22 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":156:30) | |
#loc23 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":156:40) | |
#loc24 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":156:63) | |
#loc25 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":159:30) | |
#loc26 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":159:35) | |
#loc27 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":159:46) | |
#loc28 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":159:56) | |
#loc29 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":161:17) | |
#loc30 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":162:19) | |
#loc31 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":165:19) | |
#loc32 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":166:21) | |
#loc33 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":167:25) | |
#loc34 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":172:36) | |
#loc35 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":173:29) | |
#loc36 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":810:27) | |
#loc37 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":176:107) | |
#loc38 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":810:38) | |
#loc39 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":810:20) | |
#loc40 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":810:56) | |
#loc41 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":810:49) | |
#loc42 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":820:23) | |
#loc43 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":177:111) | |
#loc44 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":183:34) | |
#loc45 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":183:25) | |
#loc46 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":184:33) | |
#loc47 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":184:26) | |
#loc48 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":188:30) | |
#loc49 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":188:50) | |
#loc50 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":189:18) | |
#loc51 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":193:30) | |
#loc52 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":194:27) | |
#loc53 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":194:41) | |
#loc54 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":195:53) | |
#loc55 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":195:39) | |
#loc56 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":197:42) | |
#loc57 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":197:29) | |
#loc58 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":385:26) | |
#loc59 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":205:12) | |
#loc60 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":385:37) | |
#loc61 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":385:18) | |
#loc62 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":385:56) | |
#loc63 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":385:49) | |
#loc64 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":386:18) | |
#loc65 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":386:49) | |
#loc66 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":390:43) | |
#loc67 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":390:63) | |
#loc68 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":506:23) | |
#loc69 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":433:16) | |
#loc70 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":507:34) | |
#loc71 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":508:34) | |
#loc72 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":521:39) | |
#loc73 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":527:22) | |
#loc74 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":527:19) | |
#loc75 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":425:32) | |
#loc76 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":484:105) | |
#loc77 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":485:19) | |
#loc78 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":487:14) | |
#loc79 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":490:36) | |
#loc80 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":507:23) | |
#loc81 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":508:23) | |
#loc82 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":509:23) | |
#loc83 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":510:22) | |
#loc84 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":517:69) | |
#loc85 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":520:27) | |
#loc86 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":521:21) | |
#loc87 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":524:104) | |
#loc88 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":526:20) | |
#loc89 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":527:14) | |
#loc90 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":546:43) | |
#loc91 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":548:15) | |
#loc92 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":550:30) | |
#loc93 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":550:21) | |
#loc94 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":784:33) | |
#loc95 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":439:68) | |
#loc96 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":785:38) | |
#loc97 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":785:24) | |
#loc98 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":786:109) | |
#loc99 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":786:113) | |
#loc100 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":786:55) | |
#loc101 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":786:25) | |
#loc102 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":787:30) | |
#loc103 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":787:35) | |
#loc104 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":787:60) | |
#loc105 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":788:34) | |
#loc106 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":788:48) | |
#loc107 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":788:63) | |
#loc108 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":789:29) | |
#loc109 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":789:47) | |
#loc110 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":789:61) | |
#loc111 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":789:42) | |
#loc112 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":442:32) | |
#loc113 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":442:23) | |
#loc114 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":443:23) | |
#loc115 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":445:23) | |
#loc116 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":445:12) | |
#loc117 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":212:39) | |
#loc118 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":213:31) | |
#loc119 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":213:45) | |
#loc120 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":214:62) | |
#loc121 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":214:43) | |
#loc122 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":216:33) | |
#loc123 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":224:16) | |
#loc124 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":229:24) | |
#loc125 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":229:56) | |
#loc126 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":230:14) | |
#loc127 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":232:30) | |
#loc128 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":250:25) | |
#loc129 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":251:29) | |
#loc130 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":254:107) | |
#loc131 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":255:107) | |
#loc132 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":267:34) | |
#loc133 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":267:39) | |
#loc134 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":267:50) | |
#loc135 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":267:60) | |
#loc136 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":269:21) | |
#loc137 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":270:23) | |
#loc138 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":273:25) | |
#loc139 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":274:29) | |
#loc140 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":280:81) | |
#loc141 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":284:32) | |
#loc142 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":285:30) | |
#loc143 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":285:43) | |
#loc144 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":286:55) | |
#loc145 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":286:42) | |
#loc146 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":288:45) | |
#loc147 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":288:32) | |
#loc148 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":596:26) | |
#loc149 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":296:16) | |
#loc150 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":596:37) | |
#loc151 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":596:18) | |
#loc152 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":596:56) | |
#loc153 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":596:49) | |
#loc154 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":597:27) | |
#loc155 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":597:38) | |
#loc156 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":597:19) | |
#loc157 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":597:51) | |
#loc158 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":600:42) | |
#loc159 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":600:61) | |
#loc160 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":696:28) | |
#loc161 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":643:16) | |
#loc162 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":721:25) | |
#loc163 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":722:35) | |
#loc164 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":723:35) | |
#loc165 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":741:29) | |
#loc166 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":635:32) | |
#loc167 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":693:105) | |
#loc168 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":696:22) | |
#loc169 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":699:26) | |
#loc170 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":699:46) | |
#loc171 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":700:20) | |
#loc172 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":702:15) | |
#loc173 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":704:36) | |
#loc174 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":722:24) | |
#loc175 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":723:24) | |
#loc176 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":724:25) | |
#loc177 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":725:24) | |
#loc178 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":731:69) | |
#loc179 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":734:27) | |
#loc180 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":735:44) | |
#loc181 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":735:40) | |
#loc182 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":735:22) | |
#loc183 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":736:99) | |
#loc184 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":739:24) | |
#loc185 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":739:43) | |
#loc186 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":741:21) | |
#loc187 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":745:29) | |
#loc188 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":745:20) | |
#loc189 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":746:25) | |
#loc190 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":746:22) | |
#loc191 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":746:16) | |
#loc192 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":770:45) | |
#loc193 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":772:24) | |
#loc194 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":772:52) | |
#loc195 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":772:43) | |
#loc196 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":648:66) | |
#loc197 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":651:32) | |
#loc198 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":651:23) | |
#loc199 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":652:23) | |
#loc200 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":654:23) | |
#loc201 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":654:12) | |
#loc202 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":304:41) | |
#loc203 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":305:34) | |
#loc204 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":305:47) | |
#loc205 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":306:64) | |
#loc206 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":306:46) | |
#loc207 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":308:36) | |
#loc208 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":316:20) | |
#loc209 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":321:23) | |
#loc210 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":321:55) | |
#loc211 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":328:30) | |
#loc212 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":332:14) | |
#loc213 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":335:29) | |
#loc214 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":342:55) | |
#loc215 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":342:69) | |
#loc216 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":342:29) | |
#loc217 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":342:99) | |
#loc218 = loc("/tmp/torchinductor_dberard/gy/cgy5zvgvr3m2yfnc7jim2n7k35hawsgtqmr2ap6pyu3d4c6muk2u.py":137:4) | |
#loc219 = loc(callsite(#loc36 at #loc37)) | |
#loc220 = loc(callsite(#loc38 at #loc37)) | |
#loc221 = loc(callsite(#loc39 at #loc37)) | |
#loc222 = loc(callsite(#loc40 at #loc37)) | |
#loc223 = loc(callsite(#loc41 at #loc37)) | |
#loc224 = loc(callsite(#loc42 at #loc37)) | |
#loc225 = loc(callsite(#loc39 at #loc43)) | |
#loc226 = loc(callsite(#loc41 at #loc43)) | |
#loc227 = loc(callsite(#loc42 at #loc43)) | |
#loc228 = loc(callsite(#loc58 at #loc59)) | |
#loc229 = loc(callsite(#loc60 at #loc59)) | |
#loc230 = loc(callsite(#loc61 at #loc59)) | |
#loc231 = loc(callsite(#loc62 at #loc59)) | |
#loc232 = loc(callsite(#loc63 at #loc59)) | |
#loc233 = loc(callsite(#loc64 at #loc59)) | |
#loc234 = loc(callsite(#loc65 at #loc59)) | |
#loc235 = loc(callsite(#loc66 at #loc59)) | |
#loc236 = loc(callsite(#loc67 at #loc59)) | |
#loc237 = loc(callsite(#loc68 at #loc69)) | |
#loc238 = loc(callsite(#loc70 at #loc69)) | |
#loc239 = loc(callsite(#loc71 at #loc69)) | |
#loc240 = loc(callsite(#loc72 at #loc69)) | |
#loc241 = loc(callsite(#loc73 at #loc69)) | |
#loc242 = loc(callsite(#loc74 at #loc69)) | |
#loc243 = loc(callsite(#loc75 at #loc59)) | |
#loc244 = loc(callsite(#loc42 at #loc76)) | |
#loc245 = loc(callsite(#loc77 at #loc69)) | |
#loc246 = loc(callsite(#loc78 at #loc69)) | |
#loc247 = loc(callsite(#loc79 at #loc69)) | |
#loc248 = loc(callsite(#loc80 at #loc69)) | |
#loc249 = loc(callsite(#loc81 at #loc69)) | |
#loc250 = loc(callsite(#loc82 at #loc69)) | |
#loc251 = loc(callsite(#loc83 at #loc69)) | |
#loc252 = loc(callsite(#loc84 at #loc69)) | |
#loc253 = loc(callsite(#loc85 at #loc69)) | |
#loc254 = loc(callsite(#loc86 at #loc69)) | |
#loc255 = loc(callsite(#loc42 at #loc87)) | |
#loc256 = loc(callsite(#loc88 at #loc69)) | |
#loc257 = loc(callsite(#loc89 at #loc69)) | |
#loc258 = loc(callsite(#loc90 at #loc69)) | |
#loc259 = loc(callsite(#loc91 at #loc69)) | |
#loc260 = loc(callsite(#loc92 at #loc69)) | |
#loc261 = loc(callsite(#loc93 at #loc69)) | |
#loc262 = loc(callsite(#loc94 at #loc95)) | |
#loc263 = loc(callsite(#loc96 at #loc95)) | |
#loc264 = loc(callsite(#loc97 at #loc95)) | |
#loc265 = loc(callsite(#loc98 at #loc95)) | |
#loc266 = loc(callsite(#loc99 at #loc95)) | |
#loc267 = loc(callsite(#loc100 at #loc95)) | |
#loc268 = loc(callsite(#loc101 at #loc95)) | |
#loc269 = loc(callsite(#loc102 at #loc95)) | |
#loc270 = loc(callsite(#loc103 at #loc95)) | |
#loc271 = loc(callsite(#loc104 at #loc95)) | |
#loc272 = loc(callsite(#loc105 at #loc95)) | |
#loc273 = loc(callsite(#loc106 at #loc95)) | |
#loc274 = loc(callsite(#loc107 at #loc95)) | |
#loc275 = loc(callsite(#loc108 at #loc95)) | |
#loc276 = loc(callsite(#loc109 at #loc95)) | |
#loc277 = loc(callsite(#loc110 at #loc95)) | |
#loc278 = loc(callsite(#loc111 at #loc95)) | |
#loc279 = loc(callsite(#loc112 at #loc59)) | |
#loc280 = loc(callsite(#loc113 at #loc59)) | |
#loc281 = loc(callsite(#loc114 at #loc59)) | |
#loc282 = loc(callsite(#loc115 at #loc59)) | |
#loc283 = loc(callsite(#loc116 at #loc59)) | |
#loc284 = loc(callsite(#loc58 at #loc123)) | |
#loc285 = loc(callsite(#loc60 at #loc123)) | |
#loc286 = loc(callsite(#loc61 at #loc123)) | |
#loc287 = loc(callsite(#loc63 at #loc123)) | |
#loc288 = loc(callsite(#loc64 at #loc123)) | |
#loc289 = loc(callsite(#loc65 at #loc123)) | |
#loc290 = loc(callsite(#loc66 at #loc123)) | |
#loc291 = loc(callsite(#loc67 at #loc123)) | |
#loc292 = loc(callsite(#loc75 at #loc123)) | |
#loc293 = loc(callsite(#loc112 at #loc123)) | |
#loc294 = loc(callsite(#loc113 at #loc123)) | |
#loc295 = loc(callsite(#loc114 at #loc123)) | |
#loc296 = loc(callsite(#loc115 at #loc123)) | |
#loc297 = loc(callsite(#loc116 at #loc123)) | |
#loc298 = loc(callsite(#loc36 at #loc130)) | |
#loc299 = loc(callsite(#loc38 at #loc130)) | |
#loc300 = loc(callsite(#loc39 at #loc130)) | |
#loc301 = loc(callsite(#loc40 at #loc130)) | |
#loc302 = loc(callsite(#loc41 at #loc130)) | |
#loc303 = loc(callsite(#loc42 at #loc130)) | |
#loc304 = loc(callsite(#loc39 at #loc131)) | |
#loc305 = loc(callsite(#loc41 at #loc131)) | |
#loc306 = loc(callsite(#loc42 at #loc131)) | |
#loc307 = loc(callsite(#loc148 at #loc149)) | |
#loc308 = loc(callsite(#loc150 at #loc149)) | |
#loc309 = loc(callsite(#loc151 at #loc149)) | |
#loc310 = loc(callsite(#loc152 at #loc149)) | |
#loc311 = loc(callsite(#loc153 at #loc149)) | |
#loc312 = loc(callsite(#loc154 at #loc149)) | |
#loc313 = loc(callsite(#loc155 at #loc149)) | |
#loc314 = loc(callsite(#loc156 at #loc149)) | |
#loc315 = loc(callsite(#loc157 at #loc149)) | |
#loc316 = loc(callsite(#loc158 at #loc149)) | |
#loc317 = loc(callsite(#loc159 at #loc149)) | |
#loc318 = loc(callsite(#loc160 at #loc161)) | |
#loc319 = loc(callsite(#loc162 at #loc161)) | |
#loc320 = loc(callsite(#loc163 at #loc161)) | |
#loc321 = loc(callsite(#loc164 at #loc161)) | |
#loc322 = loc(callsite(#loc165 at #loc161)) | |
#loc323 = loc(callsite(#loc166 at #loc149)) | |
#loc324 = loc(callsite(#loc42 at #loc167)) | |
#loc325 = loc(callsite(#loc168 at #loc161)) | |
#loc326 = loc(callsite(#loc169 at #loc161)) | |
#loc327 = loc(callsite(#loc170 at #loc161)) | |
#loc328 = loc(callsite(#loc171 at #loc161)) | |
#loc329 = loc(callsite(#loc172 at #loc161)) | |
#loc330 = loc(callsite(#loc173 at #loc161)) | |
#loc331 = loc(callsite(#loc174 at #loc161)) | |
#loc332 = loc(callsite(#loc175 at #loc161)) | |
#loc333 = loc(callsite(#loc176 at #loc161)) | |
#loc334 = loc(callsite(#loc177 at #loc161)) | |
#loc335 = loc(callsite(#loc178 at #loc161)) | |
#loc336 = loc(callsite(#loc179 at #loc161)) | |
#loc337 = loc(callsite(#loc180 at #loc161)) | |
#loc338 = loc(callsite(#loc181 at #loc161)) | |
#loc339 = loc(callsite(#loc182 at #loc161)) | |
#loc340 = loc(callsite(#loc42 at #loc183)) | |
#loc341 = loc(callsite(#loc184 at #loc161)) | |
#loc342 = loc(callsite(#loc185 at #loc161)) | |
#loc343 = loc(callsite(#loc186 at #loc161)) | |
#loc344 = loc(callsite(#loc187 at #loc161)) | |
#loc345 = loc(callsite(#loc188 at #loc161)) | |
#loc346 = loc(callsite(#loc189 at #loc161)) | |
#loc347 = loc(callsite(#loc190 at #loc161)) | |
#loc348 = loc(callsite(#loc191 at #loc161)) | |
#loc349 = loc(callsite(#loc192 at #loc161)) | |
#loc350 = loc(callsite(#loc193 at #loc161)) | |
#loc351 = loc(callsite(#loc194 at #loc161)) | |
#loc352 = loc(callsite(#loc195 at #loc161)) | |
#loc353 = loc(callsite(#loc94 at #loc196)) | |
#loc354 = loc(callsite(#loc96 at #loc196)) | |
#loc355 = loc(callsite(#loc97 at #loc196)) | |
#loc356 = loc(callsite(#loc98 at #loc196)) | |
#loc357 = loc(callsite(#loc99 at #loc196)) | |
#loc358 = loc(callsite(#loc100 at #loc196)) | |
#loc359 = loc(callsite(#loc101 at #loc196)) | |
#loc360 = loc(callsite(#loc102 at #loc196)) | |
#loc361 = loc(callsite(#loc103 at #loc196)) | |
#loc362 = loc(callsite(#loc104 at #loc196)) | |
#loc363 = loc(callsite(#loc105 at #loc196)) | |
#loc364 = loc(callsite(#loc106 at #loc196)) | |
#loc365 = loc(callsite(#loc107 at #loc196)) | |
#loc366 = loc(callsite(#loc108 at #loc196)) | |
#loc367 = loc(callsite(#loc109 at #loc196)) | |
#loc368 = loc(callsite(#loc110 at #loc196)) | |
#loc369 = loc(callsite(#loc111 at #loc196)) | |
#loc370 = loc(callsite(#loc197 at #loc149)) | |
#loc371 = loc(callsite(#loc198 at #loc149)) | |
#loc372 = loc(callsite(#loc199 at #loc149)) | |
#loc373 = loc(callsite(#loc200 at #loc149)) | |
#loc374 = loc(callsite(#loc201 at #loc149)) | |
#loc375 = loc(callsite(#loc148 at #loc208)) | |
#loc376 = loc(callsite(#loc150 at #loc208)) | |
#loc377 = loc(callsite(#loc151 at #loc208)) | |
#loc378 = loc(callsite(#loc153 at #loc208)) | |
#loc379 = loc(callsite(#loc154 at #loc208)) | |
#loc380 = loc(callsite(#loc155 at #loc208)) | |
#loc381 = loc(callsite(#loc156 at #loc208)) | |
#loc382 = loc(callsite(#loc157 at #loc208)) | |
#loc383 = loc(callsite(#loc158 at #loc208)) | |
#loc384 = loc(callsite(#loc159 at #loc208)) | |
#loc385 = loc(callsite(#loc166 at #loc208)) | |
#loc386 = loc(callsite(#loc197 at #loc208)) | |
#loc387 = loc(callsite(#loc198 at #loc208)) | |
#loc388 = loc(callsite(#loc199 at #loc208)) | |
#loc389 = loc(callsite(#loc200 at #loc208)) | |
#loc390 = loc(callsite(#loc201 at #loc208)) | |
#loc391 = loc(callsite(#loc237 at #loc59)) | |
#loc392 = loc(callsite(#loc238 at #loc59)) | |
#loc393 = loc(callsite(#loc239 at #loc59)) | |
#loc394 = loc(callsite(#loc240 at #loc59)) | |
#loc395 = loc(callsite(#loc241 at #loc59)) | |
#loc396 = loc(callsite(#loc242 at #loc59)) | |
#loc397 = loc(callsite(#loc244 at #loc69)) | |
#loc398 = loc(callsite(#loc245 at #loc59)) | |
#loc399 = loc(callsite(#loc246 at #loc59)) | |
#loc400 = loc(callsite(#loc247 at #loc59)) | |
#loc401 = loc(callsite(#loc248 at #loc59)) | |
#loc402 = loc(callsite(#loc249 at #loc59)) | |
#loc403 = loc(callsite(#loc250 at #loc59)) | |
#loc404 = loc(callsite(#loc251 at #loc59)) | |
#loc405 = loc(callsite(#loc252 at #loc59)) | |
#loc406 = loc(callsite(#loc253 at #loc59)) | |
#loc407 = loc(callsite(#loc254 at #loc59)) | |
#loc408 = loc(callsite(#loc255 at #loc69)) | |
#loc409 = loc(callsite(#loc256 at #loc59)) | |
#loc410 = loc(callsite(#loc257 at #loc59)) | |
#loc411 = loc(callsite(#loc258 at #loc59)) | |
#loc412 = loc(callsite(#loc259 at #loc59)) | |
#loc413 = loc(callsite(#loc260 at #loc59)) | |
#loc414 = loc(callsite(#loc261 at #loc59)) | |
#loc415 = loc(callsite(#loc262 at #loc59)) | |
#loc416 = loc(callsite(#loc263 at #loc59)) | |
#loc417 = loc(callsite(#loc264 at #loc59)) | |
#loc418 = loc(callsite(#loc265 at #loc59)) | |
#loc419 = loc(callsite(#loc266 at #loc59)) | |
#loc420 = loc(callsite(#loc267 at #loc59)) | |
#loc421 = loc(callsite(#loc268 at #loc59)) | |
#loc422 = loc(callsite(#loc269 at #loc59)) | |
#loc423 = loc(callsite(#loc270 at #loc59)) | |
#loc424 = loc(callsite(#loc271 at #loc59)) | |
#loc425 = loc(callsite(#loc272 at #loc59)) | |
#loc426 = loc(callsite(#loc273 at #loc59)) | |
#loc427 = loc(callsite(#loc274 at #loc59)) | |
#loc428 = loc(callsite(#loc275 at #loc59)) | |
#loc429 = loc(callsite(#loc276 at #loc59)) | |
#loc430 = loc(callsite(#loc277 at #loc59)) | |
#loc431 = loc(callsite(#loc278 at #loc59)) | |
#loc432 = loc(callsite(#loc240 at #loc123)) | |
#loc433 = loc(callsite(#loc241 at #loc123)) | |
#loc434 = loc(callsite(#loc242 at #loc123)) | |
#loc435 = loc(callsite(#loc245 at #loc123)) | |
#loc436 = loc(callsite(#loc246 at #loc123)) | |
#loc437 = loc(callsite(#loc253 at #loc123)) | |
#loc438 = loc(callsite(#loc254 at #loc123)) | |
#loc439 = loc(callsite(#loc256 at #loc123)) | |
#loc440 = loc(callsite(#loc257 at #loc123)) | |
#loc441 = loc(callsite(#loc259 at #loc123)) | |
#loc442 = loc(callsite(#loc260 at #loc123)) | |
#loc443 = loc(callsite(#loc261 at #loc123)) | |
#loc444 = loc(callsite(#loc262 at #loc123)) | |
#loc445 = loc(callsite(#loc263 at #loc123)) | |
#loc446 = loc(callsite(#loc264 at #loc123)) | |
#loc447 = loc(callsite(#loc265 at #loc123)) | |
#loc448 = loc(callsite(#loc266 at #loc123)) | |
#loc449 = loc(callsite(#loc267 at #loc123)) | |
#loc450 = loc(callsite(#loc268 at #loc123)) | |
#loc451 = loc(callsite(#loc269 at #loc123)) | |
#loc452 = loc(callsite(#loc270 at #loc123)) | |
#loc453 = loc(callsite(#loc271 at #loc123)) | |
#loc454 = loc(callsite(#loc272 at #loc123)) | |
#loc455 = loc(callsite(#loc273 at #loc123)) | |
#loc456 = loc(callsite(#loc274 at #loc123)) | |
#loc457 = loc(callsite(#loc275 at #loc123)) | |
#loc458 = loc(callsite(#loc276 at #loc123)) | |
#loc459 = loc(callsite(#loc277 at #loc123)) | |
#loc460 = loc(callsite(#loc278 at #loc123)) | |
#loc461 = loc(callsite(#loc318 at #loc149)) | |
#loc462 = loc(callsite(#loc319 at #loc149)) | |
#loc463 = loc(callsite(#loc320 at #loc149)) | |
#loc464 = loc(callsite(#loc321 at #loc149)) | |
#loc465 = loc(callsite(#loc322 at #loc149)) | |
#loc466 = loc(callsite(#loc324 at #loc161)) | |
#loc467 = loc(callsite(#loc325 at #loc149)) | |
#loc468 = loc(callsite(#loc326 at #loc149)) | |
#loc469 = loc(callsite(#loc327 at #loc149)) | |
#loc470 = loc(callsite(#loc328 at #loc149)) | |
#loc471 = loc(callsite(#loc329 at #loc149)) | |
#loc472 = loc(callsite(#loc330 at #loc149)) | |
#loc473 = loc(callsite(#loc331 at #loc149)) | |
#loc474 = loc(callsite(#loc332 at #loc149)) | |
#loc475 = loc(callsite(#loc333 at #loc149)) | |
#loc476 = loc(callsite(#loc334 at #loc149)) | |
#loc477 = loc(callsite(#loc335 at #loc149)) | |
#loc478 = loc(callsite(#loc336 at #loc149)) | |
#loc479 = loc(callsite(#loc337 at #loc149)) | |
#loc480 = loc(callsite(#loc338 at #loc149)) | |
#loc481 = loc(callsite(#loc339 at #loc149)) | |
#loc482 = loc(callsite(#loc340 at #loc161)) | |
#loc483 = loc(callsite(#loc341 at #loc149)) | |
#loc484 = loc(callsite(#loc342 at #loc149)) | |
#loc485 = loc(callsite(#loc343 at #loc149)) | |
#loc486 = loc(callsite(#loc344 at #loc149)) | |
#loc487 = loc(callsite(#loc345 at #loc149)) | |
#loc488 = loc(callsite(#loc346 at #loc149)) | |
#loc489 = loc(callsite(#loc347 at #loc149)) | |
#loc490 = loc(callsite(#loc348 at #loc149)) | |
#loc491 = loc(callsite(#loc349 at #loc149)) | |
#loc492 = loc(callsite(#loc350 at #loc149)) | |
#loc493 = loc(callsite(#loc351 at #loc149)) | |
#loc494 = loc(callsite(#loc352 at #loc149)) | |
#loc495 = loc(callsite(#loc353 at #loc149)) | |
#loc496 = loc(callsite(#loc354 at #loc149)) | |
#loc497 = loc(callsite(#loc355 at #loc149)) | |
#loc498 = loc(callsite(#loc356 at #loc149)) | |
#loc499 = loc(callsite(#loc357 at #loc149)) | |
#loc500 = loc(callsite(#loc358 at #loc149)) | |
#loc501 = loc(callsite(#loc359 at #loc149)) | |
#loc502 = loc(callsite(#loc360 at #loc149)) | |
#loc503 = loc(callsite(#loc361 at #loc149)) | |
#loc504 = loc(callsite(#loc362 at #loc149)) | |
#loc505 = loc(callsite(#loc363 at #loc149)) | |
#loc506 = loc(callsite(#loc364 at #loc149)) | |
#loc507 = loc(callsite(#loc365 at #loc149)) | |
#loc508 = loc(callsite(#loc366 at #loc149)) | |
#loc509 = loc(callsite(#loc367 at #loc149)) | |
#loc510 = loc(callsite(#loc368 at #loc149)) | |
#loc511 = loc(callsite(#loc369 at #loc149)) | |
#loc512 = loc(callsite(#loc318 at #loc208)) | |
#loc513 = loc(callsite(#loc322 at #loc208)) | |
#loc514 = loc(callsite(#loc325 at #loc208)) | |
#loc515 = loc(callsite(#loc326 at #loc208)) | |
#loc516 = loc(callsite(#loc327 at #loc208)) | |
#loc517 = loc(callsite(#loc328 at #loc208)) | |
#loc518 = loc(callsite(#loc329 at #loc208)) | |
#loc519 = loc(callsite(#loc336 at #loc208)) | |
#loc520 = loc(callsite(#loc337 at #loc208)) | |
#loc521 = loc(callsite(#loc338 at #loc208)) | |
#loc522 = loc(callsite(#loc339 at #loc208)) | |
#loc523 = loc(callsite(#loc341 at #loc208)) | |
#loc524 = loc(callsite(#loc342 at #loc208)) | |
#loc525 = loc(callsite(#loc343 at #loc208)) | |
#loc526 = loc(callsite(#loc344 at #loc208)) | |
#loc527 = loc(callsite(#loc345 at #loc208)) | |
#loc528 = loc(callsite(#loc346 at #loc208)) | |
#loc529 = loc(callsite(#loc347 at #loc208)) | |
#loc530 = loc(callsite(#loc348 at #loc208)) | |
#loc531 = loc(callsite(#loc350 at #loc208)) | |
#loc532 = loc(callsite(#loc351 at #loc208)) | |
#loc533 = loc(callsite(#loc352 at #loc208)) | |
#loc534 = loc(callsite(#loc353 at #loc208)) | |
#loc535 = loc(callsite(#loc354 at #loc208)) | |
#loc536 = loc(callsite(#loc355 at #loc208)) | |
#loc537 = loc(callsite(#loc356 at #loc208)) | |
#loc538 = loc(callsite(#loc357 at #loc208)) | |
#loc539 = loc(callsite(#loc358 at #loc208)) | |
#loc540 = loc(callsite(#loc359 at #loc208)) | |
#loc541 = loc(callsite(#loc360 at #loc208)) | |
#loc542 = loc(callsite(#loc361 at #loc208)) | |
#loc543 = loc(callsite(#loc362 at #loc208)) | |
#loc544 = loc(callsite(#loc363 at #loc208)) | |
#loc545 = loc(callsite(#loc364 at #loc208)) | |
#loc546 = loc(callsite(#loc365 at #loc208)) | |
#loc547 = loc(callsite(#loc366 at #loc208)) | |
#loc548 = loc(callsite(#loc367 at #loc208)) | |
#loc549 = loc(callsite(#loc368 at #loc208)) | |
#loc550 = loc(callsite(#loc369 at #loc208)) | |
#loc551 = loc(callsite(#loc397 at #loc59)) | |
#loc552 = loc(callsite(#loc408 at #loc59)) | |
#loc553 = loc(callsite(#loc397 at #loc123)) | |
#loc554 = loc(callsite(#loc408 at #loc123)) | |
#loc555 = loc(callsite(#loc466 at #loc149)) | |
#loc556 = loc(callsite(#loc482 at #loc149)) | |
#loc557 = loc(callsite(#loc466 at #loc208)) | |
#loc558 = loc(callsite(#loc482 at #loc208)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#loc = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":16:0) | |
module { | |
tt.func public @triton_tem_fused_zeros_7(%arg0: !tt.ptr<bf16> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":16:0), %arg1: !tt.ptr<bf16> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":16:0), %arg2: !tt.ptr<bf16> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":16:0), %arg3: !tt.ptr<f32> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":16:0), %arg4: !tt.ptr<f32> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":16:0), %arg5: !tt.ptr<bf16> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":16:0), %arg6: !tt.ptr<bf16> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":16:0), %arg7: !tt.ptr<bf16> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":16:0), %arg8: !tt.ptr<i32> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":16:0), %arg9: !tt.ptr<i32> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":16:0), %arg10: !tt.ptr<i32> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":16:0), %arg11: !tt.ptr<i32> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":16:0), %arg12: !tt.ptr<i32> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":16:0), %arg13: !tt.ptr<i32> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":16:0), %arg14: !tt.ptr<i32> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":16:0), %arg15: !tt.ptr<i32> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":16:0), %arg16: !tt.ptr<i64> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":16:0), %arg17: !tt.ptr<bf16> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":16:0)) attributes {noinline = false} { | |
%cst = arith.constant dense<1024> : tensor<64x1xi32> loc(#loc1) | |
%cst_0 = arith.constant dense<1024> : tensor<1x64xi32> loc(#loc1) | |
%cst_1 = arith.constant dense<0.000000e+00> : tensor<64xf32> loc(#loc1) | |
%cst_2 = arith.constant dense<0xFF800000> : tensor<64xf32> loc(#loc1) | |
%c2_i32 = arith.constant 2 : i32 loc(#loc1) | |
%c0_i32 = arith.constant 0 : i32 loc(#loc1) | |
%c64_i32 = arith.constant 64 : i32 loc(#loc1) | |
%cst_3 = arith.constant dense<1.44269502> : tensor<128x64xf32> loc(#loc1) | |
%cst_4 = arith.constant dense<0xFF800000> : tensor<128x64xf32> loc(#loc1) | |
%cst_5 = arith.constant dense<1.200000e-01> : tensor<128x64xf32> loc(#loc1) | |
%cst_6 = arith.constant dense<0.000000e+00> : tensor<128x64xf32> loc(#loc1) | |
%cst_7 = arith.constant dense<0.000000e+00> : tensor<128x128xf32> loc(#loc1) | |
%cst_8 = arith.constant dense<65536> : tensor<128x1xi32> loc(#loc1) | |
%cst_9 = arith.constant dense<1.200000e-01> : tensor<128x128xf32> loc(#loc1) | |
%cst_10 = arith.constant dense<1024> : tensor<128x1xi32> loc(#loc1) | |
%cst_11 = arith.constant dense<0.000000e+00> : tensor<128xf32> loc(#loc1) | |
%cst_12 = arith.constant dense<0xFF800000> : tensor<128xf32> loc(#loc1) | |
%c512_i32 = arith.constant 512 : i32 loc(#loc1) | |
%c67108864_i32 = arith.constant 67108864 : i32 loc(#loc1) | |
%c128_i32 = arith.constant 128 : i32 loc(#loc1) | |
%c1024_i32 = arith.constant 1024 : i32 loc(#loc1) | |
%c1_i32 = arith.constant 1 : i32 loc(#loc1) | |
%c8_i32 = arith.constant 8 : i32 loc(#loc1) | |
%c65536_i32 = arith.constant 65536 : i32 loc(#loc1) | |
%0 = tt.get_program_id x : i32 loc(#loc2) | |
%1 = tt.get_program_id z : i32 loc(#loc3) | |
%2 = arith.divsi %1, %c8_i32 : i32 loc(#loc4) | |
%3 = arith.remsi %1, %c8_i32 : i32 loc(#loc5) | |
%4 = arith.muli %3, %c128_i32 : i32 loc(#loc6) | |
%5 = arith.extsi %4 : i32 to i64 loc(#loc7) | |
%6 = arith.muli %2, %c67108864_i32 : i32 loc(#loc8) | |
%7 = arith.addi %4, %6 : i32 loc(#loc9) | |
%8 = arith.extsi %7 : i32 to i64 loc(#loc10) | |
%9 = tt.addptr %arg1, %5 : !tt.ptr<bf16>, i64 loc(#loc11) | |
%10 = tt.addptr %arg2, %5 : !tt.ptr<bf16>, i64 loc(#loc12) | |
%11 = tt.addptr %arg7, %8 : !tt.ptr<bf16>, i64 loc(#loc13) | |
%12 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32> loc(#loc14) | |
%13 = arith.cmpi sge, %0, %c512_i32 : i32 loc(#loc15) | |
scf.if %13 { | |
%14 = arith.subi %0, %c512_i32 : i32 loc(#loc17) | |
%15 = arith.divsi %14, %c512_i32 : i32 loc(#loc18) | |
%16 = arith.addi %15, %3 : i32 loc(#loc19) | |
%17 = arith.remsi %14, %c512_i32 : i32 loc(#loc20) | |
%18 = arith.muli %17, %c512_i32 : i32 loc(#loc21) | |
%19 = arith.muli %16, %c128_i32 : i32 loc(#loc22) | |
%20 = arith.addi %19, %6 : i32 loc(#loc23) | |
%21 = arith.extsi %20 : i32 to i64 loc(#loc24) | |
%22 = arith.muli %2, %c8_i32 : i32 loc(#loc25) | |
%23 = arith.addi %22, %16 : i32 loc(#loc26) | |
%24 = arith.muli %23, %c65536_i32 : i32 loc(#loc27) | |
%25 = arith.extsi %24 : i32 to i64 loc(#loc28) | |
%26 = tt.addptr %arg0, %21 : !tt.ptr<bf16>, i64 loc(#loc29) | |
%27 = tt.addptr %arg5, %21 : !tt.ptr<bf16>, i64 loc(#loc30) | |
%28 = tt.addptr %arg6, %21 : !tt.ptr<bf16>, i64 loc(#loc31) | |
%29 = tt.addptr %arg3, %25 : !tt.ptr<f32>, i64 loc(#loc32) | |
%30 = tt.addptr %arg4, %25 : !tt.ptr<f32>, i64 loc(#loc33) | |
%31 = arith.muli %17, %c128_i32 : i32 loc(#loc34) | |
%32 = tt.splat %31 : i32 -> tensor<128xi32> loc(#loc35) | |
%33 = arith.addi %32, %12 : tensor<128xi32> loc(#loc35) | |
%34 = tt.expand_dims %33 {axis = 1 : i32} : tensor<128xi32> -> tensor<128x1xi32> loc(#loc219) | |
%35 = arith.muli %34, %cst_10 : tensor<128x1xi32> loc(#loc220) | |
%36 = tt.splat %26 : !tt.ptr<bf16> -> tensor<128x1x!tt.ptr<bf16>> loc(#loc221) | |
%37 = tt.addptr %36, %35 : tensor<128x1x!tt.ptr<bf16>>, tensor<128x1xi32> loc(#loc221) | |
%38 = tt.expand_dims %12 {axis = 0 : i32} : tensor<128xi32> -> tensor<1x128xi32> loc(#loc222) | |
%39 = tt.broadcast %37 : tensor<128x1x!tt.ptr<bf16>> -> tensor<128x128x!tt.ptr<bf16>> loc(#loc223) | |
%40 = tt.broadcast %38 : tensor<1x128xi32> -> tensor<128x128xi32> loc(#loc223) | |
%41 = tt.addptr %39, %40 : tensor<128x128x!tt.ptr<bf16>>, tensor<128x128xi32> loc(#loc223) | |
%42 = tt.load %41 : tensor<128x128x!tt.ptr<bf16>> loc(#loc224) | |
%43 = tt.splat %27 : !tt.ptr<bf16> -> tensor<128x1x!tt.ptr<bf16>> loc(#loc225) | |
%44 = tt.addptr %43, %35 : tensor<128x1x!tt.ptr<bf16>>, tensor<128x1xi32> loc(#loc225) | |
%45 = tt.broadcast %44 : tensor<128x1x!tt.ptr<bf16>> -> tensor<128x128x!tt.ptr<bf16>> loc(#loc226) | |
%46 = tt.addptr %45, %40 : tensor<128x128x!tt.ptr<bf16>>, tensor<128x128xi32> loc(#loc226) | |
%47 = tt.load %46 : tensor<128x128x!tt.ptr<bf16>> loc(#loc227) | |
%48 = tt.splat %30 : !tt.ptr<f32> -> tensor<128x!tt.ptr<f32>> loc(#loc44) | |
%49 = tt.addptr %48, %33 : tensor<128x!tt.ptr<f32>>, tensor<128xi32> loc(#loc44) | |
%50 = tt.load %49 : tensor<128x!tt.ptr<f32>> loc(#loc45) | |
%51 = tt.splat %29 : !tt.ptr<f32> -> tensor<128x!tt.ptr<f32>> loc(#loc46) | |
%52 = tt.addptr %51, %33 : tensor<128x!tt.ptr<f32>>, tensor<128xi32> loc(#loc46) | |
%53 = tt.load %52 : tensor<128x!tt.ptr<f32>> loc(#loc47) | |
%54 = arith.cmpf oeq, %53, %cst_12 : tensor<128xf32> loc(#loc48) | |
%55 = arith.select %54, %cst_11, %53 : tensor<128xi1>, tensor<128xf32> loc(#loc49) | |
%56 = tt.expand_dims %55 {axis = 1 : i32} : tensor<128xf32> -> tensor<128x1xf32> loc(#loc50) | |
%57 = tt.addptr %arg9, %18 : !tt.ptr<i32>, i32 loc(#loc51) | |
%58 = tt.load %57 : !tt.ptr<i32> loc(#loc52) | |
%59 = arith.muli %58, %c128_i32 : i32 loc(#loc53) | |
%60 = tt.addptr %arg8, %17 : !tt.ptr<i32>, i32 loc(#loc54) | |
%61 = tt.load %60 : !tt.ptr<i32> loc(#loc55) | |
%62 = tt.make_range {end = 64 : i32, start = 0 : i32} : tensor<64xi32> loc(#loc56) | |
%63 = tt.splat %59 : i32 -> tensor<64xi32> loc(#loc57) | |
%64 = arith.addi %63, %62 : tensor<64xi32> loc(#loc57) | |
%65 = tt.expand_dims %64 {axis = 0 : i32} : tensor<64xi32> -> tensor<1x64xi32> loc(#loc228) | |
%66 = arith.muli %65, %cst_0 : tensor<1x64xi32> loc(#loc229) | |
%67 = tt.splat %9 : !tt.ptr<bf16> -> tensor<1x64x!tt.ptr<bf16>> loc(#loc230) | |
%68 = tt.addptr %67, %66 : tensor<1x64x!tt.ptr<bf16>>, tensor<1x64xi32> loc(#loc230) | |
%69 = tt.expand_dims %12 {axis = 1 : i32} : tensor<128xi32> -> tensor<128x1xi32> loc(#loc231) | |
%70 = tt.broadcast %68 : tensor<1x64x!tt.ptr<bf16>> -> tensor<128x64x!tt.ptr<bf16>> loc(#loc232) | |
%71 = tt.broadcast %69 : tensor<128x1xi32> -> tensor<128x64xi32> loc(#loc232) | |
%72 = tt.addptr %70, %71 : tensor<128x64x!tt.ptr<bf16>>, tensor<128x64xi32> loc(#loc232) | |
%73 = tt.splat %10 : !tt.ptr<bf16> -> tensor<1x64x!tt.ptr<bf16>> loc(#loc233) | |
%74 = tt.addptr %73, %66 : tensor<1x64x!tt.ptr<bf16>>, tensor<1x64xi32> loc(#loc233) | |
%75 = tt.broadcast %74 : tensor<1x64x!tt.ptr<bf16>> -> tensor<128x64x!tt.ptr<bf16>> loc(#loc234) | |
%76 = tt.addptr %75, %71 : tensor<128x64x!tt.ptr<bf16>>, tensor<128x64xi32> loc(#loc234) | |
%77 = arith.muli %61, %c2_i32 : i32 loc(#loc235) | |
%78 = arith.minsi %77, %c1024_i32 : i32 loc(#loc236) | |
%79:4 = scf.for %arg18 = %c0_i32 to %78 step %c1_i32 iter_args(%arg19 = %cst_7, %arg20 = %72, %arg21 = %76, %arg22 = %64) -> (tensor<128x128xf32>, tensor<128x64x!tt.ptr<bf16>>, tensor<128x64x!tt.ptr<bf16>>, tensor<64xi32>) : i32 { | |
%104 = tt.load %arg20 : tensor<128x64x!tt.ptr<bf16>> loc(#loc551) | |
%105 = tt.dot %42, %104, %cst_6 : tensor<128x128xbf16> * tensor<128x64xbf16> -> tensor<128x64xf32> loc(#loc392) | |
%106 = arith.mulf %105, %cst_5 : tensor<128x64xf32> loc(#loc393) | |
%107 = tt.expand_dims %arg22 {axis = 0 : i32} : tensor<64xi32> -> tensor<1x64xi32> loc(#loc394) | |
%108 = tt.broadcast %34 : tensor<128x1xi32> -> tensor<128x64xi32> loc(#loc395) | |
%109 = tt.broadcast %107 : tensor<1x64xi32> -> tensor<128x64xi32> loc(#loc395) | |
%110 = arith.cmpi sge, %108, %109 : tensor<128x64xi32> loc(#loc395) | |
%111 = tt.splat %arg16 : !tt.ptr<i64> -> tensor<128x1x!tt.ptr<i64>> loc(#loc396) | |
%112 = tt.addptr %111, %34 : tensor<128x1x!tt.ptr<i64>>, tensor<128x1xi32> loc(#loc396) | |
%113 = tt.load %112 : tensor<128x1x!tt.ptr<i64>> loc(#loc397) | |
%114 = tt.splat %arg16 : !tt.ptr<i64> -> tensor<1x64x!tt.ptr<i64>> loc(#loc398) | |
%115 = tt.addptr %114, %107 : tensor<1x64x!tt.ptr<i64>>, tensor<1x64xi32> loc(#loc398) | |
%116 = tt.load %115 : tensor<1x64x!tt.ptr<i64>> loc(#loc399) | |
%117 = tt.broadcast %113 : tensor<128x1xi64> -> tensor<128x64xi64> loc(#loc400) | |
%118 = tt.broadcast %116 : tensor<1x64xi64> -> tensor<128x64xi64> loc(#loc400) | |
%119 = arith.cmpi eq, %117, %118 : tensor<128x64xi64> loc(#loc400) | |
%120 = arith.andi %110, %119 : tensor<128x64xi1> loc(#loc401) | |
%121 = arith.select %120, %106, %cst_4 : tensor<128x64xi1>, tensor<128x64xf32> loc(#loc402) | |
%122 = arith.mulf %121, %cst_3 : tensor<128x64xf32> loc(#loc403) | |
%123 = tt.broadcast %56 : tensor<128x1xf32> -> tensor<128x64xf32> loc(#loc404) | |
%124 = arith.subf %122, %123 : tensor<128x64xf32> loc(#loc404) | |
%125 = math.exp2 %124 : tensor<128x64xf32> loc(#loc405) | |
%126 = tt.load %arg21 : tensor<128x64x!tt.ptr<bf16>> loc(#loc552) | |
%127 = tt.dot %47, %126, %cst_6 : tensor<128x128xbf16> * tensor<128x64xbf16> -> tensor<128x64xf32> loc(#loc407) | |
%128 = tt.expand_dims %50 {axis = 1 : i32} : tensor<128xf32> -> tensor<128x1xf32> loc(#loc408) | |
%129 = tt.broadcast %128 : tensor<128x1xf32> -> tensor<128x64xf32> loc(#loc409) | |
%130 = arith.subf %127, %129 : tensor<128x64xf32> loc(#loc409) | |
%131 = arith.mulf %125, %130 : tensor<128x64xf32> loc(#loc410) | |
%132 = arith.select %120, %131, %cst_6 : tensor<128x64xi1>, tensor<128x64xf32> loc(#loc411) | |
%133 = arith.truncf %132 : tensor<128x64xf32> to tensor<128x64xbf16> loc(#loc412) | |
%134 = tt.trans %104 {order = array<i32: 1, 0>} : tensor<128x64xbf16> -> tensor<64x128xbf16> loc(#loc413) | |
%135 = tt.dot %133, %134, %arg19 : tensor<128x64xbf16> * tensor<64x128xbf16> -> tensor<128x128xf32> loc(#loc414) | |
%136 = arith.divsi %arg18, %c2_i32 : i32 loc(#loc415) | |
%137 = tt.addptr %57, %136 : !tt.ptr<i32>, i32 loc(#loc416) | |
%138 = tt.load %137 evictionPolicy = evict_last : !tt.ptr<i32> loc(#loc417) | |
%139 = arith.addi %136, %c1_i32 : i32 loc(#loc418) | |
%140 = arith.cmpi slt, %139, %61 : i32 loc(#loc419) | |
%141 = tt.addptr %137, %c1_i32 : !tt.ptr<i32>, i32 loc(#loc420) | |
%142 = tt.load %141, %140 evictionPolicy = evict_last : !tt.ptr<i32> loc(#loc421) | |
%143 = arith.addi %arg18, %c1_i32 : i32 loc(#loc422) | |
%144 = arith.remsi %143, %c2_i32 : i32 loc(#loc423) | |
%145 = arith.cmpi eq, %144, %c0_i32 : i32 loc(#loc424) | |
%146 = arith.subi %142, %138 : i32 loc(#loc425) | |
%147 = arith.muli %146, %c128_i32 : i32 loc(#loc426) | |
%148 = arith.subi %147, %c64_i32 : i32 loc(#loc427) | |
%149 = arith.extui %145 : i1 to i32 loc(#loc428) | |
%150 = arith.muli %148, %149 : i32 loc(#loc428) | |
%151 = arith.subi %c1_i32, %149 : i32 loc(#loc429) | |
%152 = arith.muli %151, %c64_i32 : i32 loc(#loc430) | |
%153 = arith.addi %150, %152 : i32 loc(#loc431) | |
%154 = arith.muli %153, %c1024_i32 : i32 loc(#loc279) | |
%155 = tt.splat %154 : i32 -> tensor<128x64xi32> loc(#loc280) | |
%156 = tt.addptr %arg20, %155 : tensor<128x64x!tt.ptr<bf16>>, tensor<128x64xi32> loc(#loc280) | |
%157 = tt.addptr %arg21, %155 : tensor<128x64x!tt.ptr<bf16>>, tensor<128x64xi32> loc(#loc281) | |
%158 = tt.splat %153 : i32 -> tensor<64xi32> loc(#loc282) | |
%159 = arith.addi %arg22, %158 : tensor<64xi32> loc(#loc282) | |
scf.yield %135, %156, %157, %159 : tensor<128x128xf32>, tensor<128x64x!tt.ptr<bf16>>, tensor<128x64x!tt.ptr<bf16>>, tensor<64xi32> loc(#loc283) | |
} loc(#loc237) | |
%80 = tt.addptr %arg13, %18 : !tt.ptr<i32>, i32 loc(#loc117) | |
%81 = tt.load %80 : !tt.ptr<i32> loc(#loc118) | |
%82 = arith.muli %81, %c128_i32 : i32 loc(#loc119) | |
%83 = tt.addptr %arg12, %17 : !tt.ptr<i32>, i32 loc(#loc120) | |
%84 = tt.load %83 : !tt.ptr<i32> loc(#loc121) | |
%85 = tt.splat %82 : i32 -> tensor<64xi32> loc(#loc122) | |
%86 = arith.addi %85, %62 : tensor<64xi32> loc(#loc122) | |
%87 = tt.expand_dims %86 {axis = 0 : i32} : tensor<64xi32> -> tensor<1x64xi32> loc(#loc284) | |
%88 = arith.muli %87, %cst_0 : tensor<1x64xi32> loc(#loc285) | |
%89 = tt.addptr %67, %88 : tensor<1x64x!tt.ptr<bf16>>, tensor<1x64xi32> loc(#loc286) | |
%90 = tt.broadcast %89 : tensor<1x64x!tt.ptr<bf16>> -> tensor<128x64x!tt.ptr<bf16>> loc(#loc287) | |
%91 = tt.addptr %90, %71 : tensor<128x64x!tt.ptr<bf16>>, tensor<128x64xi32> loc(#loc287) | |
%92 = tt.addptr %73, %88 : tensor<1x64x!tt.ptr<bf16>>, tensor<1x64xi32> loc(#loc288) | |
%93 = tt.broadcast %92 : tensor<1x64x!tt.ptr<bf16>> -> tensor<128x64x!tt.ptr<bf16>> loc(#loc289) | |
%94 = tt.addptr %93, %71 : tensor<128x64x!tt.ptr<bf16>>, tensor<128x64xi32> loc(#loc289) | |
%95 = arith.muli %84, %c2_i32 : i32 loc(#loc290) | |
%96 = arith.minsi %95, %c1024_i32 : i32 loc(#loc291) | |
%97:4 = scf.for %arg18 = %c0_i32 to %96 step %c1_i32 iter_args(%arg19 = %79#0, %arg20 = %91, %arg21 = %94, %arg22 = %86) -> (tensor<128x128xf32>, tensor<128x64x!tt.ptr<bf16>>, tensor<128x64x!tt.ptr<bf16>>, tensor<64xi32>) : i32 { | |
%104 = tt.load %arg20 : tensor<128x64x!tt.ptr<bf16>> loc(#loc553) | |
%105 = tt.dot %42, %104, %cst_6 : tensor<128x128xbf16> * tensor<128x64xbf16> -> tensor<128x64xf32> loc(#loc432) | |
%106 = arith.mulf %105, %cst_5 : tensor<128x64xf32> loc(#loc433) | |
%107 = arith.mulf %106, %cst_3 : tensor<128x64xf32> loc(#loc434) | |
%108 = tt.broadcast %56 : tensor<128x1xf32> -> tensor<128x64xf32> loc(#loc435) | |
%109 = arith.subf %107, %108 : tensor<128x64xf32> loc(#loc435) | |
%110 = math.exp2 %109 : tensor<128x64xf32> loc(#loc436) | |
%111 = tt.load %arg21 : tensor<128x64x!tt.ptr<bf16>> loc(#loc554) | |
%112 = tt.dot %47, %111, %cst_6 : tensor<128x128xbf16> * tensor<128x64xbf16> -> tensor<128x64xf32> loc(#loc437) | |
%113 = tt.expand_dims %50 {axis = 1 : i32} : tensor<128xf32> -> tensor<128x1xf32> loc(#loc438) | |
%114 = tt.broadcast %113 : tensor<128x1xf32> -> tensor<128x64xf32> loc(#loc439) | |
%115 = arith.subf %112, %114 : tensor<128x64xf32> loc(#loc439) | |
%116 = arith.mulf %110, %115 : tensor<128x64xf32> loc(#loc440) | |
%117 = arith.truncf %116 : tensor<128x64xf32> to tensor<128x64xbf16> loc(#loc441) | |
%118 = tt.trans %104 {order = array<i32: 1, 0>} : tensor<128x64xbf16> -> tensor<64x128xbf16> loc(#loc442) | |
%119 = tt.dot %117, %118, %arg19 : tensor<128x64xbf16> * tensor<64x128xbf16> -> tensor<128x128xf32> loc(#loc443) | |
%120 = arith.divsi %arg18, %c2_i32 : i32 loc(#loc444) | |
%121 = tt.addptr %80, %120 : !tt.ptr<i32>, i32 loc(#loc445) | |
%122 = tt.load %121 evictionPolicy = evict_last : !tt.ptr<i32> loc(#loc446) | |
%123 = arith.addi %120, %c1_i32 : i32 loc(#loc447) | |
%124 = arith.cmpi slt, %123, %84 : i32 loc(#loc448) | |
%125 = tt.addptr %121, %c1_i32 : !tt.ptr<i32>, i32 loc(#loc449) | |
%126 = tt.load %125, %124 evictionPolicy = evict_last : !tt.ptr<i32> loc(#loc450) | |
%127 = arith.addi %arg18, %c1_i32 : i32 loc(#loc451) | |
%128 = arith.remsi %127, %c2_i32 : i32 loc(#loc452) | |
%129 = arith.cmpi eq, %128, %c0_i32 : i32 loc(#loc453) | |
%130 = arith.subi %126, %122 : i32 loc(#loc454) | |
%131 = arith.muli %130, %c128_i32 : i32 loc(#loc455) | |
%132 = arith.subi %131, %c64_i32 : i32 loc(#loc456) | |
%133 = arith.extui %129 : i1 to i32 loc(#loc457) | |
%134 = arith.muli %132, %133 : i32 loc(#loc457) | |
%135 = arith.subi %c1_i32, %133 : i32 loc(#loc458) | |
%136 = arith.muli %135, %c64_i32 : i32 loc(#loc459) | |
%137 = arith.addi %134, %136 : i32 loc(#loc460) | |
%138 = arith.muli %137, %c1024_i32 : i32 loc(#loc293) | |
%139 = tt.splat %138 : i32 -> tensor<128x64xi32> loc(#loc294) | |
%140 = tt.addptr %arg20, %139 : tensor<128x64x!tt.ptr<bf16>>, tensor<128x64xi32> loc(#loc294) | |
%141 = tt.addptr %arg21, %139 : tensor<128x64x!tt.ptr<bf16>>, tensor<128x64xi32> loc(#loc295) | |
%142 = tt.splat %137 : i32 -> tensor<64xi32> loc(#loc296) | |
%143 = arith.addi %arg22, %142 : tensor<64xi32> loc(#loc296) | |
scf.yield %119, %140, %141, %143 : tensor<128x128xf32>, tensor<128x64x!tt.ptr<bf16>>, tensor<128x64x!tt.ptr<bf16>>, tensor<64xi32> loc(#loc297) | |
} loc(#loc292) | |
%98 = tt.splat %28 : !tt.ptr<bf16> -> tensor<128x1x!tt.ptr<bf16>> loc(#loc124) | |
%99 = tt.addptr %98, %35 : tensor<128x1x!tt.ptr<bf16>>, tensor<128x1xi32> loc(#loc124) | |
%100 = tt.broadcast %99 : tensor<128x1x!tt.ptr<bf16>> -> tensor<128x128x!tt.ptr<bf16>> loc(#loc125) | |
%101 = tt.addptr %100, %40 : tensor<128x128x!tt.ptr<bf16>>, tensor<128x128xi32> loc(#loc125) | |
%102 = arith.mulf %97#0, %cst_9 : tensor<128x128xf32> loc(#loc126) | |
%103 = arith.truncf %102 : tensor<128x128xf32> to tensor<128x128xbf16> loc(#loc127) | |
tt.store %101, %103 : tensor<128x128x!tt.ptr<bf16>> loc(#loc127) | |
} else { | |
%14 = arith.muli %0, %c128_i32 : i32 loc(#loc128) | |
%15 = tt.splat %14 : i32 -> tensor<128xi32> loc(#loc129) | |
%16 = arith.addi %15, %12 : tensor<128xi32> loc(#loc129) | |
%17 = tt.expand_dims %16 {axis = 1 : i32} : tensor<128xi32> -> tensor<128x1xi32> loc(#loc298) | |
%18 = arith.muli %17, %cst_10 : tensor<128x1xi32> loc(#loc299) | |
%19 = tt.splat %9 : !tt.ptr<bf16> -> tensor<128x1x!tt.ptr<bf16>> loc(#loc300) | |
%20 = tt.addptr %19, %18 : tensor<128x1x!tt.ptr<bf16>>, tensor<128x1xi32> loc(#loc300) | |
%21 = tt.expand_dims %12 {axis = 0 : i32} : tensor<128xi32> -> tensor<1x128xi32> loc(#loc301) | |
%22 = tt.broadcast %20 : tensor<128x1x!tt.ptr<bf16>> -> tensor<128x128x!tt.ptr<bf16>> loc(#loc302) | |
%23 = tt.broadcast %21 : tensor<1x128xi32> -> tensor<128x128xi32> loc(#loc302) | |
%24 = tt.addptr %22, %23 : tensor<128x128x!tt.ptr<bf16>>, tensor<128x128xi32> loc(#loc302) | |
%25 = tt.load %24 : tensor<128x128x!tt.ptr<bf16>> loc(#loc303) | |
%26 = tt.splat %10 : !tt.ptr<bf16> -> tensor<128x1x!tt.ptr<bf16>> loc(#loc304) | |
%27 = tt.addptr %26, %18 : tensor<128x1x!tt.ptr<bf16>>, tensor<128x1xi32> loc(#loc304) | |
%28 = tt.broadcast %27 : tensor<128x1x!tt.ptr<bf16>> -> tensor<128x128x!tt.ptr<bf16>> loc(#loc305) | |
%29 = tt.addptr %28, %23 : tensor<128x128x!tt.ptr<bf16>>, tensor<128x128xi32> loc(#loc305) | |
%30 = tt.load %29 : tensor<128x128x!tt.ptr<bf16>> loc(#loc306) | |
%31 = arith.muli %2, %c8_i32 : i32 loc(#loc132) | |
%32 = arith.addi %31, %3 : i32 loc(#loc133) | |
%33 = arith.muli %32, %c65536_i32 : i32 loc(#loc134) | |
%34 = arith.extsi %33 : i32 to i64 loc(#loc135) | |
%35 = tt.addptr %arg0, %8 : !tt.ptr<bf16>, i64 loc(#loc136) | |
%36 = tt.addptr %arg5, %8 : !tt.ptr<bf16>, i64 loc(#loc137) | |
%37 = tt.addptr %arg3, %34 : !tt.ptr<f32>, i64 loc(#loc138) | |
%38 = tt.addptr %arg4, %34 : !tt.ptr<f32>, i64 loc(#loc139) | |
%39 = arith.muli %0, %c512_i32 : i32 loc(#loc140) | |
%40 = tt.addptr %arg11, %39 : !tt.ptr<i32>, i32 loc(#loc141) | |
%41 = tt.load %40 : !tt.ptr<i32> loc(#loc142) | |
%42 = arith.muli %41, %c128_i32 : i32 loc(#loc143) | |
%43 = tt.addptr %arg10, %0 : !tt.ptr<i32>, i32 loc(#loc144) | |
%44 = tt.load %43 : !tt.ptr<i32> loc(#loc145) | |
%45 = tt.make_range {end = 64 : i32, start = 0 : i32} : tensor<64xi32> loc(#loc146) | |
%46 = tt.splat %42 : i32 -> tensor<64xi32> loc(#loc147) | |
%47 = arith.addi %46, %45 : tensor<64xi32> loc(#loc147) | |
%48 = tt.expand_dims %47 {axis = 0 : i32} : tensor<64xi32> -> tensor<1x64xi32> loc(#loc307) | |
%49 = arith.muli %48, %cst_0 : tensor<1x64xi32> loc(#loc308) | |
%50 = tt.splat %35 : !tt.ptr<bf16> -> tensor<1x64x!tt.ptr<bf16>> loc(#loc309) | |
%51 = tt.addptr %50, %49 : tensor<1x64x!tt.ptr<bf16>>, tensor<1x64xi32> loc(#loc309) | |
%52 = tt.expand_dims %12 {axis = 1 : i32} : tensor<128xi32> -> tensor<128x1xi32> loc(#loc310) | |
%53 = tt.broadcast %51 : tensor<1x64x!tt.ptr<bf16>> -> tensor<128x64x!tt.ptr<bf16>> loc(#loc311) | |
%54 = tt.broadcast %52 : tensor<128x1xi32> -> tensor<128x64xi32> loc(#loc311) | |
%55 = tt.addptr %53, %54 : tensor<128x64x!tt.ptr<bf16>>, tensor<128x64xi32> loc(#loc311) | |
%56 = tt.expand_dims %47 {axis = 1 : i32} : tensor<64xi32> -> tensor<64x1xi32> loc(#loc312) | |
%57 = arith.muli %56, %cst : tensor<64x1xi32> loc(#loc313) | |
%58 = tt.splat %36 : !tt.ptr<bf16> -> tensor<64x1x!tt.ptr<bf16>> loc(#loc314) | |
%59 = tt.addptr %58, %57 : tensor<64x1x!tt.ptr<bf16>>, tensor<64x1xi32> loc(#loc314) | |
%60 = tt.broadcast %59 : tensor<64x1x!tt.ptr<bf16>> -> tensor<64x128x!tt.ptr<bf16>> loc(#loc315) | |
%61 = tt.broadcast %21 : tensor<1x128xi32> -> tensor<64x128xi32> loc(#loc315) | |
%62 = tt.addptr %60, %61 : tensor<64x128x!tt.ptr<bf16>>, tensor<64x128xi32> loc(#loc315) | |
%63 = arith.muli %44, %c2_i32 : i32 loc(#loc316) | |
%64 = arith.minsi %63, %c1024_i32 : i32 loc(#loc317) | |
%65:5 = scf.for %arg18 = %c0_i32 to %64 step %c1_i32 iter_args(%arg19 = %cst_7, %arg20 = %cst_7, %arg21 = %55, %arg22 = %62, %arg23 = %47) -> (tensor<128x128xf32>, tensor<128x128xf32>, tensor<128x64x!tt.ptr<bf16>>, tensor<64x128x!tt.ptr<bf16>>, tensor<64xi32>) : i32 { | |
%102 = tt.load %arg21 : tensor<128x64x!tt.ptr<bf16>> loc(#loc555) | |
%103 = tt.splat %37 : !tt.ptr<f32> -> tensor<64x!tt.ptr<f32>> loc(#loc462) | |
%104 = tt.addptr %103, %arg23 : tensor<64x!tt.ptr<f32>>, tensor<64xi32> loc(#loc462) | |
%105 = tt.load %104 : tensor<64x!tt.ptr<f32>> loc(#loc463) | |
%106 = arith.cmpf oeq, %105, %cst_2 : tensor<64xf32> loc(#loc464) | |
%107 = arith.select %106, %cst_1, %105 : tensor<64xi1>, tensor<64xf32> loc(#loc465) | |
%108 = tt.dot %25, %102, %cst_6 : tensor<128x128xbf16> * tensor<128x64xbf16> -> tensor<128x64xf32> loc(#loc466) | |
%109 = arith.mulf %108, %cst_5 : tensor<128x64xf32> loc(#loc467) | |
%110 = tt.expand_dims %arg23 {axis = 0 : i32} : tensor<64xi32> -> tensor<1x64xi32> loc(#loc468) | |
%111 = tt.broadcast %110 : tensor<1x64xi32> -> tensor<128x64xi32> loc(#loc469) | |
%112 = tt.broadcast %17 : tensor<128x1xi32> -> tensor<128x64xi32> loc(#loc469) | |
%113 = arith.cmpi sge, %111, %112 : tensor<128x64xi32> loc(#loc469) | |
%114 = tt.splat %arg16 : !tt.ptr<i64> -> tensor<1x64x!tt.ptr<i64>> loc(#loc470) | |
%115 = tt.addptr %114, %110 : tensor<1x64x!tt.ptr<i64>>, tensor<1x64xi32> loc(#loc470) | |
%116 = tt.load %115 : tensor<1x64x!tt.ptr<i64>> loc(#loc471) | |
%117 = tt.splat %arg16 : !tt.ptr<i64> -> tensor<128x1x!tt.ptr<i64>> loc(#loc472) | |
%118 = tt.addptr %117, %17 : tensor<128x1x!tt.ptr<i64>>, tensor<128x1xi32> loc(#loc472) | |
%119 = tt.load %118 : tensor<128x1x!tt.ptr<i64>> loc(#loc473) | |
%120 = tt.broadcast %116 : tensor<1x64xi64> -> tensor<128x64xi64> loc(#loc474) | |
%121 = tt.broadcast %119 : tensor<128x1xi64> -> tensor<128x64xi64> loc(#loc474) | |
%122 = arith.cmpi eq, %120, %121 : tensor<128x64xi64> loc(#loc474) | |
%123 = arith.andi %113, %122 : tensor<128x64xi1> loc(#loc475) | |
%124 = arith.select %123, %109, %cst_4 : tensor<128x64xi1>, tensor<128x64xf32> loc(#loc476) | |
%125 = arith.mulf %124, %cst_3 : tensor<128x64xf32> loc(#loc477) | |
%126 = tt.expand_dims %107 {axis = 0 : i32} : tensor<64xf32> -> tensor<1x64xf32> loc(#loc478) | |
%127 = tt.broadcast %126 : tensor<1x64xf32> -> tensor<128x64xf32> loc(#loc479) | |
%128 = arith.subf %125, %127 : tensor<128x64xf32> loc(#loc479) | |
%129 = math.exp2 %128 : tensor<128x64xf32> loc(#loc480) | |
%130 = tt.load %arg22 : tensor<64x128x!tt.ptr<bf16>> loc(#loc556) | |
%131 = arith.truncf %129 : tensor<128x64xf32> to tensor<128x64xbf16> loc(#loc482) | |
%132 = tt.dot %131, %130, %arg20 : tensor<128x64xbf16> * tensor<64x128xbf16> -> tensor<128x128xf32> loc(#loc483) | |
%133 = tt.splat %38 : !tt.ptr<f32> -> tensor<64x!tt.ptr<f32>> loc(#loc484) | |
%134 = tt.addptr %133, %arg23 : tensor<64x!tt.ptr<f32>>, tensor<64xi32> loc(#loc484) | |
%135 = tt.load %134 : tensor<64x!tt.ptr<f32>> loc(#loc485) | |
%136 = tt.trans %130 {order = array<i32: 1, 0>} : tensor<64x128xbf16> -> tensor<128x64xbf16> loc(#loc486) | |
%137 = tt.dot %30, %136, %cst_6 : tensor<128x128xbf16> * tensor<128x64xbf16> -> tensor<128x64xf32> loc(#loc487) | |
%138 = tt.expand_dims %135 {axis = 0 : i32} : tensor<64xf32> -> tensor<1x64xf32> loc(#loc488) | |
%139 = tt.broadcast %138 : tensor<1x64xf32> -> tensor<128x64xf32> loc(#loc489) | |
%140 = arith.subf %137, %139 : tensor<128x64xf32> loc(#loc489) | |
%141 = arith.mulf %129, %140 : tensor<128x64xf32> loc(#loc490) | |
%142 = arith.select %123, %141, %cst_6 : tensor<128x64xi1>, tensor<128x64xf32> loc(#loc491) | |
%143 = arith.truncf %142 : tensor<128x64xf32> to tensor<128x64xbf16> loc(#loc492) | |
%144 = tt.trans %102 {order = array<i32: 1, 0>} : tensor<128x64xbf16> -> tensor<64x128xbf16> loc(#loc493) | |
%145 = tt.dot %143, %144, %arg19 : tensor<128x64xbf16> * tensor<64x128xbf16> -> tensor<128x128xf32> loc(#loc494) | |
%146 = arith.divsi %arg18, %c2_i32 : i32 loc(#loc495) | |
%147 = tt.addptr %40, %146 : !tt.ptr<i32>, i32 loc(#loc496) | |
%148 = tt.load %147 evictionPolicy = evict_last : !tt.ptr<i32> loc(#loc497) | |
%149 = arith.addi %146, %c1_i32 : i32 loc(#loc498) | |
%150 = arith.cmpi slt, %149, %44 : i32 loc(#loc499) | |
%151 = tt.addptr %147, %c1_i32 : !tt.ptr<i32>, i32 loc(#loc500) | |
%152 = tt.load %151, %150 evictionPolicy = evict_last : !tt.ptr<i32> loc(#loc501) | |
%153 = arith.addi %arg18, %c1_i32 : i32 loc(#loc502) | |
%154 = arith.remsi %153, %c2_i32 : i32 loc(#loc503) | |
%155 = arith.cmpi eq, %154, %c0_i32 : i32 loc(#loc504) | |
%156 = arith.subi %152, %148 : i32 loc(#loc505) | |
%157 = arith.muli %156, %c128_i32 : i32 loc(#loc506) | |
%158 = arith.subi %157, %c64_i32 : i32 loc(#loc507) | |
%159 = arith.extui %155 : i1 to i32 loc(#loc508) | |
%160 = arith.muli %158, %159 : i32 loc(#loc508) | |
%161 = arith.subi %c1_i32, %159 : i32 loc(#loc509) | |
%162 = arith.muli %161, %c64_i32 : i32 loc(#loc510) | |
%163 = arith.addi %160, %162 : i32 loc(#loc511) | |
%164 = arith.muli %163, %c1024_i32 : i32 loc(#loc370) | |
%165 = tt.splat %164 : i32 -> tensor<128x64xi32> loc(#loc371) | |
%166 = tt.addptr %arg21, %165 : tensor<128x64x!tt.ptr<bf16>>, tensor<128x64xi32> loc(#loc371) | |
%167 = tt.splat %164 : i32 -> tensor<64x128xi32> loc(#loc372) | |
%168 = tt.addptr %arg22, %167 : tensor<64x128x!tt.ptr<bf16>>, tensor<64x128xi32> loc(#loc372) | |
%169 = tt.splat %163 : i32 -> tensor<64xi32> loc(#loc373) | |
%170 = arith.addi %arg23, %169 : tensor<64xi32> loc(#loc373) | |
scf.yield %145, %132, %166, %168, %170 : tensor<128x128xf32>, tensor<128x128xf32>, tensor<128x64x!tt.ptr<bf16>>, tensor<64x128x!tt.ptr<bf16>>, tensor<64xi32> loc(#loc374) | |
} loc(#loc318) | |
%66 = tt.addptr %arg15, %39 : !tt.ptr<i32>, i32 loc(#loc202) | |
%67 = tt.load %66 : !tt.ptr<i32> loc(#loc203) | |
%68 = arith.muli %67, %c128_i32 : i32 loc(#loc204) | |
%69 = tt.addptr %arg14, %0 : !tt.ptr<i32>, i32 loc(#loc205) | |
%70 = tt.load %69 : !tt.ptr<i32> loc(#loc206) | |
%71 = tt.splat %68 : i32 -> tensor<64xi32> loc(#loc207) | |
%72 = arith.addi %71, %45 : tensor<64xi32> loc(#loc207) | |
%73 = tt.expand_dims %72 {axis = 0 : i32} : tensor<64xi32> -> tensor<1x64xi32> loc(#loc375) | |
%74 = arith.muli %73, %cst_0 : tensor<1x64xi32> loc(#loc376) | |
%75 = tt.addptr %50, %74 : tensor<1x64x!tt.ptr<bf16>>, tensor<1x64xi32> loc(#loc377) | |
%76 = tt.broadcast %75 : tensor<1x64x!tt.ptr<bf16>> -> tensor<128x64x!tt.ptr<bf16>> loc(#loc378) | |
%77 = tt.addptr %76, %54 : tensor<128x64x!tt.ptr<bf16>>, tensor<128x64xi32> loc(#loc378) | |
%78 = tt.expand_dims %72 {axis = 1 : i32} : tensor<64xi32> -> tensor<64x1xi32> loc(#loc379) | |
%79 = arith.muli %78, %cst : tensor<64x1xi32> loc(#loc380) | |
%80 = tt.addptr %58, %79 : tensor<64x1x!tt.ptr<bf16>>, tensor<64x1xi32> loc(#loc381) | |
%81 = tt.broadcast %80 : tensor<64x1x!tt.ptr<bf16>> -> tensor<64x128x!tt.ptr<bf16>> loc(#loc382) | |
%82 = tt.addptr %81, %61 : tensor<64x128x!tt.ptr<bf16>>, tensor<64x128xi32> loc(#loc382) | |
%83 = arith.muli %70, %c2_i32 : i32 loc(#loc383) | |
%84 = arith.minsi %83, %c1024_i32 : i32 loc(#loc384) | |
%85:5 = scf.for %arg18 = %c0_i32 to %84 step %c1_i32 iter_args(%arg19 = %65#0, %arg20 = %65#1, %arg21 = %77, %arg22 = %82, %arg23 = %72) -> (tensor<128x128xf32>, tensor<128x128xf32>, tensor<128x64x!tt.ptr<bf16>>, tensor<64x128x!tt.ptr<bf16>>, tensor<64xi32>) : i32 { | |
%102 = tt.load %arg21 : tensor<128x64x!tt.ptr<bf16>> loc(#loc557) | |
%103 = tt.splat %37 : !tt.ptr<f32> -> tensor<64x!tt.ptr<f32>> loc(#loc512) | |
%104 = tt.addptr %103, %arg23 : tensor<64x!tt.ptr<f32>>, tensor<64xi32> loc(#loc512) | |
%105 = tt.load %104 : tensor<64x!tt.ptr<f32>> loc(#loc513) | |
%106 = arith.cmpf oeq, %105, %cst_2 : tensor<64xf32> loc(#loc514) | |
%107 = arith.select %106, %cst_1, %105 : tensor<64xi1>, tensor<64xf32> loc(#loc515) | |
%108 = tt.dot %25, %102, %cst_6 : tensor<128x128xbf16> * tensor<128x64xbf16> -> tensor<128x64xf32> loc(#loc516) | |
%109 = arith.mulf %108, %cst_5 : tensor<128x64xf32> loc(#loc517) | |
%110 = arith.mulf %109, %cst_3 : tensor<128x64xf32> loc(#loc518) | |
%111 = tt.expand_dims %107 {axis = 0 : i32} : tensor<64xf32> -> tensor<1x64xf32> loc(#loc519) | |
%112 = tt.broadcast %111 : tensor<1x64xf32> -> tensor<128x64xf32> loc(#loc520) | |
%113 = arith.subf %110, %112 : tensor<128x64xf32> loc(#loc520) | |
%114 = math.exp2 %113 : tensor<128x64xf32> loc(#loc521) | |
%115 = tt.load %arg22 : tensor<64x128x!tt.ptr<bf16>> loc(#loc558) | |
%116 = arith.truncf %114 : tensor<128x64xf32> to tensor<128x64xbf16> loc(#loc522) | |
%117 = tt.dot %116, %115, %arg20 : tensor<128x64xbf16> * tensor<64x128xbf16> -> tensor<128x128xf32> loc(#loc523) | |
%118 = tt.splat %38 : !tt.ptr<f32> -> tensor<64x!tt.ptr<f32>> loc(#loc524) | |
%119 = tt.addptr %118, %arg23 : tensor<64x!tt.ptr<f32>>, tensor<64xi32> loc(#loc524) | |
%120 = tt.load %119 : tensor<64x!tt.ptr<f32>> loc(#loc525) | |
%121 = tt.trans %115 {order = array<i32: 1, 0>} : tensor<64x128xbf16> -> tensor<128x64xbf16> loc(#loc526) | |
%122 = tt.dot %30, %121, %cst_6 : tensor<128x128xbf16> * tensor<128x64xbf16> -> tensor<128x64xf32> loc(#loc527) | |
%123 = tt.expand_dims %120 {axis = 0 : i32} : tensor<64xf32> -> tensor<1x64xf32> loc(#loc528) | |
%124 = tt.broadcast %123 : tensor<1x64xf32> -> tensor<128x64xf32> loc(#loc529) | |
%125 = arith.subf %122, %124 : tensor<128x64xf32> loc(#loc529) | |
%126 = arith.mulf %114, %125 : tensor<128x64xf32> loc(#loc530) | |
%127 = arith.truncf %126 : tensor<128x64xf32> to tensor<128x64xbf16> loc(#loc531) | |
%128 = tt.trans %102 {order = array<i32: 1, 0>} : tensor<128x64xbf16> -> tensor<64x128xbf16> loc(#loc532) | |
%129 = tt.dot %127, %128, %arg19 : tensor<128x64xbf16> * tensor<64x128xbf16> -> tensor<128x128xf32> loc(#loc533) | |
%130 = arith.divsi %arg18, %c2_i32 : i32 loc(#loc534) | |
%131 = tt.addptr %66, %130 : !tt.ptr<i32>, i32 loc(#loc535) | |
%132 = tt.load %131 evictionPolicy = evict_last : !tt.ptr<i32> loc(#loc536) | |
%133 = arith.addi %130, %c1_i32 : i32 loc(#loc537) | |
%134 = arith.cmpi slt, %133, %70 : i32 loc(#loc538) | |
%135 = tt.addptr %131, %c1_i32 : !tt.ptr<i32>, i32 loc(#loc539) | |
%136 = tt.load %135, %134 evictionPolicy = evict_last : !tt.ptr<i32> loc(#loc540) | |
%137 = arith.addi %arg18, %c1_i32 : i32 loc(#loc541) | |
%138 = arith.remsi %137, %c2_i32 : i32 loc(#loc542) | |
%139 = arith.cmpi eq, %138, %c0_i32 : i32 loc(#loc543) | |
%140 = arith.subi %136, %132 : i32 loc(#loc544) | |
%141 = arith.muli %140, %c128_i32 : i32 loc(#loc545) | |
%142 = arith.subi %141, %c64_i32 : i32 loc(#loc546) | |
%143 = arith.extui %139 : i1 to i32 loc(#loc547) | |
%144 = arith.muli %142, %143 : i32 loc(#loc547) | |
%145 = arith.subi %c1_i32, %143 : i32 loc(#loc548) | |
%146 = arith.muli %145, %c64_i32 : i32 loc(#loc549) | |
%147 = arith.addi %144, %146 : i32 loc(#loc550) | |
%148 = arith.muli %147, %c1024_i32 : i32 loc(#loc386) | |
%149 = tt.splat %148 : i32 -> tensor<128x64xi32> loc(#loc387) | |
%150 = tt.addptr %arg21, %149 : tensor<128x64x!tt.ptr<bf16>>, tensor<128x64xi32> loc(#loc387) | |
%151 = tt.splat %148 : i32 -> tensor<64x128xi32> loc(#loc388) | |
%152 = tt.addptr %arg22, %151 : tensor<64x128x!tt.ptr<bf16>>, tensor<64x128xi32> loc(#loc388) | |
%153 = tt.splat %147 : i32 -> tensor<64xi32> loc(#loc389) | |
%154 = arith.addi %arg23, %153 : tensor<64xi32> loc(#loc389) | |
scf.yield %129, %117, %150, %152, %154 : tensor<128x128xf32>, tensor<128x128xf32>, tensor<128x64x!tt.ptr<bf16>>, tensor<64x128x!tt.ptr<bf16>>, tensor<64xi32> loc(#loc390) | |
} loc(#loc385) | |
%86 = tt.splat %11 : !tt.ptr<bf16> -> tensor<128x1x!tt.ptr<bf16>> loc(#loc209) | |
%87 = tt.addptr %86, %18 : tensor<128x1x!tt.ptr<bf16>>, tensor<128x1xi32> loc(#loc209) | |
%88 = tt.broadcast %87 : tensor<128x1x!tt.ptr<bf16>> -> tensor<128x128x!tt.ptr<bf16>> loc(#loc210) | |
%89 = tt.addptr %88, %23 : tensor<128x128x!tt.ptr<bf16>>, tensor<128x128xi32> loc(#loc210) | |
%90 = arith.truncf %85#1 : tensor<128x128xf32> to tensor<128x128xbf16> loc(#loc211) | |
tt.store %89, %90 : tensor<128x128x!tt.ptr<bf16>> loc(#loc211) | |
%91 = arith.mulf %85#0, %cst_9 : tensor<128x128xf32> loc(#loc212) | |
%92 = arith.cmpi slt, %17, %cst_8 : tensor<128x1xi32> loc(#loc213) | |
%93 = tt.splat %4 : i32 -> tensor<1x128xi32> loc(#loc214) | |
%94 = arith.addi %21, %93 : tensor<1x128xi32> loc(#loc214) | |
%95 = tt.broadcast %94 : tensor<1x128xi32> -> tensor<128x128xi32> loc(#loc215) | |
%96 = tt.broadcast %18 : tensor<128x1xi32> -> tensor<128x128xi32> loc(#loc215) | |
%97 = arith.addi %95, %96 : tensor<128x128xi32> loc(#loc215) | |
%98 = tt.splat %arg17 : !tt.ptr<bf16> -> tensor<128x128x!tt.ptr<bf16>> loc(#loc216) | |
%99 = tt.addptr %98, %97 : tensor<128x128x!tt.ptr<bf16>>, tensor<128x128xi32> loc(#loc216) | |
%100 = tt.broadcast %92 : tensor<128x1xi1> -> tensor<128x128xi1> loc(#loc217) | |
%101 = arith.truncf %91 : tensor<128x128xf32> to tensor<128x128xbf16> loc(#loc217) | |
tt.store %99, %101, %100 : tensor<128x128x!tt.ptr<bf16>> loc(#loc217) | |
} loc(#loc16) | |
tt.return loc(#loc218) | |
} loc(#loc) | |
} loc(#loc) | |
#loc1 = loc(unknown) | |
#loc2 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":107:24) | |
#loc3 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":111:27) | |
#loc4 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":112:23) | |
#loc5 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":113:23) | |
#loc6 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":121:25) | |
#loc7 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":121:59) | |
#loc8 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":125:50) | |
#loc9 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":125:37) | |
#loc10 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":125:61) | |
#loc11 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":128:9) | |
#loc12 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":129:9) | |
#loc13 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":130:10) | |
#loc14 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":133:26) | |
#loc15 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":136:14) | |
#loc16 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":136:7) | |
#loc17 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":137:24) | |
#loc18 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":141:29) | |
#loc19 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":141:44) | |
#loc20 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":142:35) | |
#loc21 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":152:83) | |
#loc22 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":155:30) | |
#loc23 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":155:40) | |
#loc24 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":155:63) | |
#loc25 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":158:30) | |
#loc26 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":158:35) | |
#loc27 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":158:46) | |
#loc28 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":158:56) | |
#loc29 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":160:17) | |
#loc30 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":161:19) | |
#loc31 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":164:19) | |
#loc32 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":165:21) | |
#loc33 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":166:25) | |
#loc34 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":171:36) | |
#loc35 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":172:29) | |
#loc36 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":809:27) | |
#loc37 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":175:107) | |
#loc38 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":809:38) | |
#loc39 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":809:20) | |
#loc40 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":809:56) | |
#loc41 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":809:49) | |
#loc42 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":819:23) | |
#loc43 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":176:111) | |
#loc44 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":182:34) | |
#loc45 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":182:25) | |
#loc46 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":183:33) | |
#loc47 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":183:26) | |
#loc48 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":187:30) | |
#loc49 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":187:50) | |
#loc50 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":188:18) | |
#loc51 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":192:30) | |
#loc52 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":193:27) | |
#loc53 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":193:41) | |
#loc54 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":194:53) | |
#loc55 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":194:39) | |
#loc56 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":196:42) | |
#loc57 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":196:29) | |
#loc58 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":384:26) | |
#loc59 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":204:12) | |
#loc60 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":384:37) | |
#loc61 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":384:18) | |
#loc62 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":384:56) | |
#loc63 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":384:49) | |
#loc64 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":385:18) | |
#loc65 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":385:49) | |
#loc66 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":389:43) | |
#loc67 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":389:63) | |
#loc68 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":424:32) | |
#loc69 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":483:105) | |
#loc70 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":432:16) | |
#loc71 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":484:19) | |
#loc72 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":486:14) | |
#loc73 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":489:36) | |
#loc74 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":505:23) | |
#loc75 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":506:34) | |
#loc76 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":506:23) | |
#loc77 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":507:34) | |
#loc78 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":507:23) | |
#loc79 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":508:23) | |
#loc80 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":509:22) | |
#loc81 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":516:69) | |
#loc82 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":519:27) | |
#loc83 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":520:39) | |
#loc84 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":520:21) | |
#loc85 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":523:104) | |
#loc86 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":525:20) | |
#loc87 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":526:22) | |
#loc88 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":526:19) | |
#loc89 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":526:14) | |
#loc90 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":545:43) | |
#loc91 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":547:15) | |
#loc92 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":549:30) | |
#loc93 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":549:21) | |
#loc94 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":783:33) | |
#loc95 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":438:68) | |
#loc96 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":784:38) | |
#loc97 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":784:24) | |
#loc98 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":785:109) | |
#loc99 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":785:113) | |
#loc100 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":785:55) | |
#loc101 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":785:25) | |
#loc102 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":786:30) | |
#loc103 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":786:35) | |
#loc104 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":786:60) | |
#loc105 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":787:34) | |
#loc106 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":787:48) | |
#loc107 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":787:63) | |
#loc108 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":788:29) | |
#loc109 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":788:47) | |
#loc110 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":788:61) | |
#loc111 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":788:42) | |
#loc112 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":441:32) | |
#loc113 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":441:23) | |
#loc114 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":442:23) | |
#loc115 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":444:23) | |
#loc116 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":444:12) | |
#loc117 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":211:39) | |
#loc118 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":212:31) | |
#loc119 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":212:45) | |
#loc120 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":213:62) | |
#loc121 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":213:43) | |
#loc122 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":215:33) | |
#loc123 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":223:16) | |
#loc124 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":228:24) | |
#loc125 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":228:56) | |
#loc126 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":229:14) | |
#loc127 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":231:30) | |
#loc128 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":249:25) | |
#loc129 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":250:29) | |
#loc130 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":253:107) | |
#loc131 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":254:107) | |
#loc132 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":266:34) | |
#loc133 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":266:39) | |
#loc134 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":266:50) | |
#loc135 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":266:60) | |
#loc136 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":268:21) | |
#loc137 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":269:23) | |
#loc138 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":272:25) | |
#loc139 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":273:29) | |
#loc140 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":279:81) | |
#loc141 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":283:32) | |
#loc142 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":284:30) | |
#loc143 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":284:43) | |
#loc144 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":285:55) | |
#loc145 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":285:42) | |
#loc146 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":287:45) | |
#loc147 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":287:32) | |
#loc148 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":595:26) | |
#loc149 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":295:16) | |
#loc150 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":595:37) | |
#loc151 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":595:18) | |
#loc152 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":595:56) | |
#loc153 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":595:49) | |
#loc154 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":596:27) | |
#loc155 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":596:38) | |
#loc156 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":596:19) | |
#loc157 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":596:51) | |
#loc158 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":599:42) | |
#loc159 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":599:61) | |
#loc160 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":634:32) | |
#loc161 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":692:105) | |
#loc162 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":642:16) | |
#loc163 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":695:28) | |
#loc164 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":695:22) | |
#loc165 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":698:26) | |
#loc166 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":698:46) | |
#loc167 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":699:20) | |
#loc168 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":701:15) | |
#loc169 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":703:36) | |
#loc170 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":720:25) | |
#loc171 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":721:35) | |
#loc172 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":721:24) | |
#loc173 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":722:35) | |
#loc174 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":722:24) | |
#loc175 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":723:25) | |
#loc176 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":724:24) | |
#loc177 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":730:69) | |
#loc178 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":733:27) | |
#loc179 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":734:44) | |
#loc180 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":734:40) | |
#loc181 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":734:22) | |
#loc182 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":735:99) | |
#loc183 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":738:24) | |
#loc184 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":738:43) | |
#loc185 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":740:29) | |
#loc186 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":740:21) | |
#loc187 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":744:29) | |
#loc188 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":744:20) | |
#loc189 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":745:25) | |
#loc190 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":745:22) | |
#loc191 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":745:16) | |
#loc192 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":769:45) | |
#loc193 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":771:24) | |
#loc194 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":771:52) | |
#loc195 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":771:43) | |
#loc196 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":647:66) | |
#loc197 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":650:32) | |
#loc198 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":650:23) | |
#loc199 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":651:23) | |
#loc200 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":653:23) | |
#loc201 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":653:12) | |
#loc202 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":303:41) | |
#loc203 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":304:34) | |
#loc204 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":304:47) | |
#loc205 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":305:64) | |
#loc206 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":305:46) | |
#loc207 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":307:36) | |
#loc208 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":315:20) | |
#loc209 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":320:23) | |
#loc210 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":320:55) | |
#loc211 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":327:30) | |
#loc212 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":331:14) | |
#loc213 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":334:29) | |
#loc214 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":341:55) | |
#loc215 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":341:69) | |
#loc216 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":341:29) | |
#loc217 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":341:99) | |
#loc218 = loc("/tmp/torchinductor_dberard/de/cdehmpx37ndplysdvmriwfhononr4vsoth5rnx5allcqr3m5v3pz.py":136:4) | |
#loc219 = loc(callsite(#loc36 at #loc37)) | |
#loc220 = loc(callsite(#loc38 at #loc37)) | |
#loc221 = loc(callsite(#loc39 at #loc37)) | |
#loc222 = loc(callsite(#loc40 at #loc37)) | |
#loc223 = loc(callsite(#loc41 at #loc37)) | |
#loc224 = loc(callsite(#loc42 at #loc37)) | |
#loc225 = loc(callsite(#loc39 at #loc43)) | |
#loc226 = loc(callsite(#loc41 at #loc43)) | |
#loc227 = loc(callsite(#loc42 at #loc43)) | |
#loc228 = loc(callsite(#loc58 at #loc59)) | |
#loc229 = loc(callsite(#loc60 at #loc59)) | |
#loc230 = loc(callsite(#loc61 at #loc59)) | |
#loc231 = loc(callsite(#loc62 at #loc59)) | |
#loc232 = loc(callsite(#loc63 at #loc59)) | |
#loc233 = loc(callsite(#loc64 at #loc59)) | |
#loc234 = loc(callsite(#loc65 at #loc59)) | |
#loc235 = loc(callsite(#loc66 at #loc59)) | |
#loc236 = loc(callsite(#loc67 at #loc59)) | |
#loc237 = loc(callsite(#loc68 at #loc59)) | |
#loc238 = loc(callsite(#loc42 at #loc69)) | |
#loc239 = loc(callsite(#loc71 at #loc70)) | |
#loc240 = loc(callsite(#loc72 at #loc70)) | |
#loc241 = loc(callsite(#loc73 at #loc70)) | |
#loc242 = loc(callsite(#loc74 at #loc70)) | |
#loc243 = loc(callsite(#loc75 at #loc70)) | |
#loc244 = loc(callsite(#loc76 at #loc70)) | |
#loc245 = loc(callsite(#loc77 at #loc70)) | |
#loc246 = loc(callsite(#loc78 at #loc70)) | |
#loc247 = loc(callsite(#loc79 at #loc70)) | |
#loc248 = loc(callsite(#loc80 at #loc70)) | |
#loc249 = loc(callsite(#loc81 at #loc70)) | |
#loc250 = loc(callsite(#loc82 at #loc70)) | |
#loc251 = loc(callsite(#loc83 at #loc70)) | |
#loc252 = loc(callsite(#loc84 at #loc70)) | |
#loc253 = loc(callsite(#loc42 at #loc85)) | |
#loc254 = loc(callsite(#loc86 at #loc70)) | |
#loc255 = loc(callsite(#loc87 at #loc70)) | |
#loc256 = loc(callsite(#loc88 at #loc70)) | |
#loc257 = loc(callsite(#loc89 at #loc70)) | |
#loc258 = loc(callsite(#loc90 at #loc70)) | |
#loc259 = loc(callsite(#loc91 at #loc70)) | |
#loc260 = loc(callsite(#loc92 at #loc70)) | |
#loc261 = loc(callsite(#loc93 at #loc70)) | |
#loc262 = loc(callsite(#loc94 at #loc95)) | |
#loc263 = loc(callsite(#loc96 at #loc95)) | |
#loc264 = loc(callsite(#loc97 at #loc95)) | |
#loc265 = loc(callsite(#loc98 at #loc95)) | |
#loc266 = loc(callsite(#loc99 at #loc95)) | |
#loc267 = loc(callsite(#loc100 at #loc95)) | |
#loc268 = loc(callsite(#loc101 at #loc95)) | |
#loc269 = loc(callsite(#loc102 at #loc95)) | |
#loc270 = loc(callsite(#loc103 at #loc95)) | |
#loc271 = loc(callsite(#loc104 at #loc95)) | |
#loc272 = loc(callsite(#loc105 at #loc95)) | |
#loc273 = loc(callsite(#loc106 at #loc95)) | |
#loc274 = loc(callsite(#loc107 at #loc95)) | |
#loc275 = loc(callsite(#loc108 at #loc95)) | |
#loc276 = loc(callsite(#loc109 at #loc95)) | |
#loc277 = loc(callsite(#loc110 at #loc95)) | |
#loc278 = loc(callsite(#loc111 at #loc95)) | |
#loc279 = loc(callsite(#loc112 at #loc59)) | |
#loc280 = loc(callsite(#loc113 at #loc59)) | |
#loc281 = loc(callsite(#loc114 at #loc59)) | |
#loc282 = loc(callsite(#loc115 at #loc59)) | |
#loc283 = loc(callsite(#loc116 at #loc59)) | |
#loc284 = loc(callsite(#loc58 at #loc123)) | |
#loc285 = loc(callsite(#loc60 at #loc123)) | |
#loc286 = loc(callsite(#loc61 at #loc123)) | |
#loc287 = loc(callsite(#loc63 at #loc123)) | |
#loc288 = loc(callsite(#loc64 at #loc123)) | |
#loc289 = loc(callsite(#loc65 at #loc123)) | |
#loc290 = loc(callsite(#loc66 at #loc123)) | |
#loc291 = loc(callsite(#loc67 at #loc123)) | |
#loc292 = loc(callsite(#loc68 at #loc123)) | |
#loc293 = loc(callsite(#loc112 at #loc123)) | |
#loc294 = loc(callsite(#loc113 at #loc123)) | |
#loc295 = loc(callsite(#loc114 at #loc123)) | |
#loc296 = loc(callsite(#loc115 at #loc123)) | |
#loc297 = loc(callsite(#loc116 at #loc123)) | |
#loc298 = loc(callsite(#loc36 at #loc130)) | |
#loc299 = loc(callsite(#loc38 at #loc130)) | |
#loc300 = loc(callsite(#loc39 at #loc130)) | |
#loc301 = loc(callsite(#loc40 at #loc130)) | |
#loc302 = loc(callsite(#loc41 at #loc130)) | |
#loc303 = loc(callsite(#loc42 at #loc130)) | |
#loc304 = loc(callsite(#loc39 at #loc131)) | |
#loc305 = loc(callsite(#loc41 at #loc131)) | |
#loc306 = loc(callsite(#loc42 at #loc131)) | |
#loc307 = loc(callsite(#loc148 at #loc149)) | |
#loc308 = loc(callsite(#loc150 at #loc149)) | |
#loc309 = loc(callsite(#loc151 at #loc149)) | |
#loc310 = loc(callsite(#loc152 at #loc149)) | |
#loc311 = loc(callsite(#loc153 at #loc149)) | |
#loc312 = loc(callsite(#loc154 at #loc149)) | |
#loc313 = loc(callsite(#loc155 at #loc149)) | |
#loc314 = loc(callsite(#loc156 at #loc149)) | |
#loc315 = loc(callsite(#loc157 at #loc149)) | |
#loc316 = loc(callsite(#loc158 at #loc149)) | |
#loc317 = loc(callsite(#loc159 at #loc149)) | |
#loc318 = loc(callsite(#loc160 at #loc149)) | |
#loc319 = loc(callsite(#loc42 at #loc161)) | |
#loc320 = loc(callsite(#loc163 at #loc162)) | |
#loc321 = loc(callsite(#loc164 at #loc162)) | |
#loc322 = loc(callsite(#loc165 at #loc162)) | |
#loc323 = loc(callsite(#loc166 at #loc162)) | |
#loc324 = loc(callsite(#loc167 at #loc162)) | |
#loc325 = loc(callsite(#loc168 at #loc162)) | |
#loc326 = loc(callsite(#loc169 at #loc162)) | |
#loc327 = loc(callsite(#loc170 at #loc162)) | |
#loc328 = loc(callsite(#loc171 at #loc162)) | |
#loc329 = loc(callsite(#loc172 at #loc162)) | |
#loc330 = loc(callsite(#loc173 at #loc162)) | |
#loc331 = loc(callsite(#loc174 at #loc162)) | |
#loc332 = loc(callsite(#loc175 at #loc162)) | |
#loc333 = loc(callsite(#loc176 at #loc162)) | |
#loc334 = loc(callsite(#loc177 at #loc162)) | |
#loc335 = loc(callsite(#loc178 at #loc162)) | |
#loc336 = loc(callsite(#loc179 at #loc162)) | |
#loc337 = loc(callsite(#loc180 at #loc162)) | |
#loc338 = loc(callsite(#loc181 at #loc162)) | |
#loc339 = loc(callsite(#loc42 at #loc182)) | |
#loc340 = loc(callsite(#loc183 at #loc162)) | |
#loc341 = loc(callsite(#loc184 at #loc162)) | |
#loc342 = loc(callsite(#loc185 at #loc162)) | |
#loc343 = loc(callsite(#loc186 at #loc162)) | |
#loc344 = loc(callsite(#loc187 at #loc162)) | |
#loc345 = loc(callsite(#loc188 at #loc162)) | |
#loc346 = loc(callsite(#loc189 at #loc162)) | |
#loc347 = loc(callsite(#loc190 at #loc162)) | |
#loc348 = loc(callsite(#loc191 at #loc162)) | |
#loc349 = loc(callsite(#loc192 at #loc162)) | |
#loc350 = loc(callsite(#loc193 at #loc162)) | |
#loc351 = loc(callsite(#loc194 at #loc162)) | |
#loc352 = loc(callsite(#loc195 at #loc162)) | |
#loc353 = loc(callsite(#loc94 at #loc196)) | |
#loc354 = loc(callsite(#loc96 at #loc196)) | |
#loc355 = loc(callsite(#loc97 at #loc196)) | |
#loc356 = loc(callsite(#loc98 at #loc196)) | |
#loc357 = loc(callsite(#loc99 at #loc196)) | |
#loc358 = loc(callsite(#loc100 at #loc196)) | |
#loc359 = loc(callsite(#loc101 at #loc196)) | |
#loc360 = loc(callsite(#loc102 at #loc196)) | |
#loc361 = loc(callsite(#loc103 at #loc196)) | |
#loc362 = loc(callsite(#loc104 at #loc196)) | |
#loc363 = loc(callsite(#loc105 at #loc196)) | |
#loc364 = loc(callsite(#loc106 at #loc196)) | |
#loc365 = loc(callsite(#loc107 at #loc196)) | |
#loc366 = loc(callsite(#loc108 at #loc196)) | |
#loc367 = loc(callsite(#loc109 at #loc196)) | |
#loc368 = loc(callsite(#loc110 at #loc196)) | |
#loc369 = loc(callsite(#loc111 at #loc196)) | |
#loc370 = loc(callsite(#loc197 at #loc149)) | |
#loc371 = loc(callsite(#loc198 at #loc149)) | |
#loc372 = loc(callsite(#loc199 at #loc149)) | |
#loc373 = loc(callsite(#loc200 at #loc149)) | |
#loc374 = loc(callsite(#loc201 at #loc149)) | |
#loc375 = loc(callsite(#loc148 at #loc208)) | |
#loc376 = loc(callsite(#loc150 at #loc208)) | |
#loc377 = loc(callsite(#loc151 at #loc208)) | |
#loc378 = loc(callsite(#loc153 at #loc208)) | |
#loc379 = loc(callsite(#loc154 at #loc208)) | |
#loc380 = loc(callsite(#loc155 at #loc208)) | |
#loc381 = loc(callsite(#loc156 at #loc208)) | |
#loc382 = loc(callsite(#loc157 at #loc208)) | |
#loc383 = loc(callsite(#loc158 at #loc208)) | |
#loc384 = loc(callsite(#loc159 at #loc208)) | |
#loc385 = loc(callsite(#loc160 at #loc208)) | |
#loc386 = loc(callsite(#loc197 at #loc208)) | |
#loc387 = loc(callsite(#loc198 at #loc208)) | |
#loc388 = loc(callsite(#loc199 at #loc208)) | |
#loc389 = loc(callsite(#loc200 at #loc208)) | |
#loc390 = loc(callsite(#loc201 at #loc208)) | |
#loc391 = loc(callsite(#loc238 at #loc70)) | |
#loc392 = loc(callsite(#loc239 at #loc59)) | |
#loc393 = loc(callsite(#loc240 at #loc59)) | |
#loc394 = loc(callsite(#loc241 at #loc59)) | |
#loc395 = loc(callsite(#loc242 at #loc59)) | |
#loc396 = loc(callsite(#loc243 at #loc59)) | |
#loc397 = loc(callsite(#loc244 at #loc59)) | |
#loc398 = loc(callsite(#loc245 at #loc59)) | |
#loc399 = loc(callsite(#loc246 at #loc59)) | |
#loc400 = loc(callsite(#loc247 at #loc59)) | |
#loc401 = loc(callsite(#loc248 at #loc59)) | |
#loc402 = loc(callsite(#loc249 at #loc59)) | |
#loc403 = loc(callsite(#loc250 at #loc59)) | |
#loc404 = loc(callsite(#loc251 at #loc59)) | |
#loc405 = loc(callsite(#loc252 at #loc59)) | |
#loc406 = loc(callsite(#loc253 at #loc70)) | |
#loc407 = loc(callsite(#loc254 at #loc59)) | |
#loc408 = loc(callsite(#loc255 at #loc59)) | |
#loc409 = loc(callsite(#loc256 at #loc59)) | |
#loc410 = loc(callsite(#loc257 at #loc59)) | |
#loc411 = loc(callsite(#loc258 at #loc59)) | |
#loc412 = loc(callsite(#loc259 at #loc59)) | |
#loc413 = loc(callsite(#loc260 at #loc59)) | |
#loc414 = loc(callsite(#loc261 at #loc59)) | |
#loc415 = loc(callsite(#loc262 at #loc59)) | |
#loc416 = loc(callsite(#loc263 at #loc59)) | |
#loc417 = loc(callsite(#loc264 at #loc59)) | |
#loc418 = loc(callsite(#loc265 at #loc59)) | |
#loc419 = loc(callsite(#loc266 at #loc59)) | |
#loc420 = loc(callsite(#loc267 at #loc59)) | |
#loc421 = loc(callsite(#loc268 at #loc59)) | |
#loc422 = loc(callsite(#loc269 at #loc59)) | |
#loc423 = loc(callsite(#loc270 at #loc59)) | |
#loc424 = loc(callsite(#loc271 at #loc59)) | |
#loc425 = loc(callsite(#loc272 at #loc59)) | |
#loc426 = loc(callsite(#loc273 at #loc59)) | |
#loc427 = loc(callsite(#loc274 at #loc59)) | |
#loc428 = loc(callsite(#loc275 at #loc59)) | |
#loc429 = loc(callsite(#loc276 at #loc59)) | |
#loc430 = loc(callsite(#loc277 at #loc59)) | |
#loc431 = loc(callsite(#loc278 at #loc59)) | |
#loc432 = loc(callsite(#loc239 at #loc123)) | |
#loc433 = loc(callsite(#loc240 at #loc123)) | |
#loc434 = loc(callsite(#loc250 at #loc123)) | |
#loc435 = loc(callsite(#loc251 at #loc123)) | |
#loc436 = loc(callsite(#loc252 at #loc123)) | |
#loc437 = loc(callsite(#loc254 at #loc123)) | |
#loc438 = loc(callsite(#loc255 at #loc123)) | |
#loc439 = loc(callsite(#loc256 at #loc123)) | |
#loc440 = loc(callsite(#loc257 at #loc123)) | |
#loc441 = loc(callsite(#loc259 at #loc123)) | |
#loc442 = loc(callsite(#loc260 at #loc123)) | |
#loc443 = loc(callsite(#loc261 at #loc123)) | |
#loc444 = loc(callsite(#loc262 at #loc123)) | |
#loc445 = loc(callsite(#loc263 at #loc123)) | |
#loc446 = loc(callsite(#loc264 at #loc123)) | |
#loc447 = loc(callsite(#loc265 at #loc123)) | |
#loc448 = loc(callsite(#loc266 at #loc123)) | |
#loc449 = loc(callsite(#loc267 at #loc123)) | |
#loc450 = loc(callsite(#loc268 at #loc123)) | |
#loc451 = loc(callsite(#loc269 at #loc123)) | |
#loc452 = loc(callsite(#loc270 at #loc123)) | |
#loc453 = loc(callsite(#loc271 at #loc123)) | |
#loc454 = loc(callsite(#loc272 at #loc123)) | |
#loc455 = loc(callsite(#loc273 at #loc123)) | |
#loc456 = loc(callsite(#loc274 at #loc123)) | |
#loc457 = loc(callsite(#loc275 at #loc123)) | |
#loc458 = loc(callsite(#loc276 at #loc123)) | |
#loc459 = loc(callsite(#loc277 at #loc123)) | |
#loc460 = loc(callsite(#loc278 at #loc123)) | |
#loc461 = loc(callsite(#loc319 at #loc162)) | |
#loc462 = loc(callsite(#loc320 at #loc149)) | |
#loc463 = loc(callsite(#loc321 at #loc149)) | |
#loc464 = loc(callsite(#loc322 at #loc149)) | |
#loc465 = loc(callsite(#loc323 at #loc149)) | |
#loc466 = loc(callsite(#loc324 at #loc149)) | |
#loc467 = loc(callsite(#loc325 at #loc149)) | |
#loc468 = loc(callsite(#loc326 at #loc149)) | |
#loc469 = loc(callsite(#loc327 at #loc149)) | |
#loc470 = loc(callsite(#loc328 at #loc149)) | |
#loc471 = loc(callsite(#loc329 at #loc149)) | |
#loc472 = loc(callsite(#loc330 at #loc149)) | |
#loc473 = loc(callsite(#loc331 at #loc149)) | |
#loc474 = loc(callsite(#loc332 at #loc149)) | |
#loc475 = loc(callsite(#loc333 at #loc149)) | |
#loc476 = loc(callsite(#loc334 at #loc149)) | |
#loc477 = loc(callsite(#loc335 at #loc149)) | |
#loc478 = loc(callsite(#loc336 at #loc149)) | |
#loc479 = loc(callsite(#loc337 at #loc149)) | |
#loc480 = loc(callsite(#loc338 at #loc149)) | |
#loc481 = loc(callsite(#loc339 at #loc162)) | |
#loc482 = loc(callsite(#loc340 at #loc149)) | |
#loc483 = loc(callsite(#loc341 at #loc149)) | |
#loc484 = loc(callsite(#loc342 at #loc149)) | |
#loc485 = loc(callsite(#loc343 at #loc149)) | |
#loc486 = loc(callsite(#loc344 at #loc149)) | |
#loc487 = loc(callsite(#loc345 at #loc149)) | |
#loc488 = loc(callsite(#loc346 at #loc149)) | |
#loc489 = loc(callsite(#loc347 at #loc149)) | |
#loc490 = loc(callsite(#loc348 at #loc149)) | |
#loc491 = loc(callsite(#loc349 at #loc149)) | |
#loc492 = loc(callsite(#loc350 at #loc149)) | |
#loc493 = loc(callsite(#loc351 at #loc149)) | |
#loc494 = loc(callsite(#loc352 at #loc149)) | |
#loc495 = loc(callsite(#loc353 at #loc149)) | |
#loc496 = loc(callsite(#loc354 at #loc149)) | |
#loc497 = loc(callsite(#loc355 at #loc149)) | |
#loc498 = loc(callsite(#loc356 at #loc149)) | |
#loc499 = loc(callsite(#loc357 at #loc149)) | |
#loc500 = loc(callsite(#loc358 at #loc149)) | |
#loc501 = loc(callsite(#loc359 at #loc149)) | |
#loc502 = loc(callsite(#loc360 at #loc149)) | |
#loc503 = loc(callsite(#loc361 at #loc149)) | |
#loc504 = loc(callsite(#loc362 at #loc149)) | |
#loc505 = loc(callsite(#loc363 at #loc149)) | |
#loc506 = loc(callsite(#loc364 at #loc149)) | |
#loc507 = loc(callsite(#loc365 at #loc149)) | |
#loc508 = loc(callsite(#loc366 at #loc149)) | |
#loc509 = loc(callsite(#loc367 at #loc149)) | |
#loc510 = loc(callsite(#loc368 at #loc149)) | |
#loc511 = loc(callsite(#loc369 at #loc149)) | |
#loc512 = loc(callsite(#loc320 at #loc208)) | |
#loc513 = loc(callsite(#loc321 at #loc208)) | |
#loc514 = loc(callsite(#loc322 at #loc208)) | |
#loc515 = loc(callsite(#loc323 at #loc208)) | |
#loc516 = loc(callsite(#loc324 at #loc208)) | |
#loc517 = loc(callsite(#loc325 at #loc208)) | |
#loc518 = loc(callsite(#loc335 at #loc208)) | |
#loc519 = loc(callsite(#loc336 at #loc208)) | |
#loc520 = loc(callsite(#loc337 at #loc208)) | |
#loc521 = loc(callsite(#loc338 at #loc208)) | |
#loc522 = loc(callsite(#loc340 at #loc208)) | |
#loc523 = loc(callsite(#loc341 at #loc208)) | |
#loc524 = loc(callsite(#loc342 at #loc208)) | |
#loc525 = loc(callsite(#loc343 at #loc208)) | |
#loc526 = loc(callsite(#loc344 at #loc208)) | |
#loc527 = loc(callsite(#loc345 at #loc208)) | |
#loc528 = loc(callsite(#loc346 at #loc208)) | |
#loc529 = loc(callsite(#loc347 at #loc208)) | |
#loc530 = loc(callsite(#loc348 at #loc208)) | |
#loc531 = loc(callsite(#loc350 at #loc208)) | |
#loc532 = loc(callsite(#loc351 at #loc208)) | |
#loc533 = loc(callsite(#loc352 at #loc208)) | |
#loc534 = loc(callsite(#loc353 at #loc208)) | |
#loc535 = loc(callsite(#loc354 at #loc208)) | |
#loc536 = loc(callsite(#loc355 at #loc208)) | |
#loc537 = loc(callsite(#loc356 at #loc208)) | |
#loc538 = loc(callsite(#loc357 at #loc208)) | |
#loc539 = loc(callsite(#loc358 at #loc208)) | |
#loc540 = loc(callsite(#loc359 at #loc208)) | |
#loc541 = loc(callsite(#loc360 at #loc208)) | |
#loc542 = loc(callsite(#loc361 at #loc208)) | |
#loc543 = loc(callsite(#loc362 at #loc208)) | |
#loc544 = loc(callsite(#loc363 at #loc208)) | |
#loc545 = loc(callsite(#loc364 at #loc208)) | |
#loc546 = loc(callsite(#loc365 at #loc208)) | |
#loc547 = loc(callsite(#loc366 at #loc208)) | |
#loc548 = loc(callsite(#loc367 at #loc208)) | |
#loc549 = loc(callsite(#loc368 at #loc208)) | |
#loc550 = loc(callsite(#loc369 at #loc208)) | |
#loc551 = loc(callsite(#loc391 at #loc59)) | |
#loc552 = loc(callsite(#loc406 at #loc59)) | |
#loc553 = loc(callsite(#loc391 at #loc123)) | |
#loc554 = loc(callsite(#loc406 at #loc123)) | |
#loc555 = loc(callsite(#loc461 at #loc149)) | |
#loc556 = loc(callsite(#loc481 at #loc149)) | |
#loc557 = loc(callsite(#loc461 at #loc208)) | |
#loc558 = loc(callsite(#loc481 at #loc208)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment