Last active
July 3, 2025 17:56
-
-
Save davidberard98/ec81ec7a5c035db225053074e7e280d6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
metric_id | Samples (3.3) | Samples (3.4) | speedup (3.3) | speedup (3.4) | speedup ((new-old)/old) | speedup (delta) | |
---|---|---|---|---|---|---|---|
tritonbench_ragged_attention_bwd[hstu]-tflops-avg | 0 | 1 | 0 | 98.806091308594 | -1 | -98.806091308594 | |
tritonbench_ragged_attention_bwd[x_(128, 4, 1024, 128, 128, 1.0, 20, 0)-hstu]_tflops | 0 | 1 | 0 | 132.42012023926 | -1 | -132.42012023926 | |
tritonbench_ragged_attention_bwd[x_(128, 4, 256, 128, 128, 1.0, 20, 0)-hstu]_tflops | 0 | 1 | 0 | 65.420997619629 | -1 | -65.420997619629 | |
tritonbench_ragged_attention_bwd[x_(128, 4, 512, 128, 128, 1.0, 20, 0)-hstu]_tflops | 0 | 1 | 0 | 98.577156066895 | -1 | -98.577156066895 | |
tritonbench_ragged_attention_bwd[x_average-hstu]_tflops | 0 | 1 | 0 | 98.806091308594 | -1 | -98.806091308594 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(128, 2304, 6656)-_triton]_speedup | 1 | 1 | 0.60214412212372 | 0.79111462831497 | -0.23886615090629 | -0.18897050619125 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(128, 2304, 6656)-_triton]_tflops | 1 | 1 | 73.069320678711 | 95.622283935547 | -0.23585468081935 | -22.552963256836 | |
tritonbench_int4_gemm_fwd[x_(16, 1, 8192, 3584)-triton]_speedup | 1 | 1 | 0.68327975273132 | 0.82453906536102 | -0.1713191267267 | -0.1412593126297 | |
tritonbench_int4_gemm_fwd[x_(4, 1, 7168, 8192)-triton]_tflops | 1 | 1 | 3.7420506477356 | 4.4838314056396 | -0.16543457833206 | -0.74178075790405 | |
tritonbench_flex_attention_bwd[x_ (8, 16, 4096, 16, 4096, 128) | noop-compiled]_tflops | 1 | 1 | 348.47463989258 | 416.44131469727 | -0.16320828987416 | -67.966674804688 | |
tritonbench_flex_attention_bwd[x_ (8, 16, 4096, 16, 4096, 128) | noop-compiled]_speedup | 1 | 1 | 11.71953868866 | 14.004383087158 | -0.16315209204707 | -2.2848443984985 | |
tritonbench_int4_gemm_fwd[x_(64, 1, 1280, 8192)-triton]_speedup | 1 | 1 | 0.50401866436005 | 0.60148519277573 | -0.16204310527728 | -0.09746652841568 | |
tritonbench_int4_gemm_fwd[x_(64, 1, 1280, 8192)-triton]_tflops | 1 | 1 | 10.874524116516 | 12.977425575256 | -0.16204303746883 | -2.1029014587402 | |
tritonbench_flex_attention_bwd[x_ (8, 16, 2048, 16, 2048, 128) | noop-compiled]_tflops | 1 | 1 | 328.60260009766 | 391.91949462891 | -0.16155586899601 | -63.31689453125 | |
tritonbench_int4_gemm_fwd[x_(16, 1, 7168, 8192)-triton]_speedup | 1 | 1 | 0.59562009572983 | 0.71029460430145 | -0.16144640248873 | -0.11467450857162 | |
tritonbench_int4_gemm_fwd[x_(16, 1, 7168, 8192)-triton]_tflops | 1 | 1 | 14.952956199646 | 17.831842422485 | -0.16144636962523 | -2.8788862228394 | |
tritonbench_flex_attention_bwd[x_ (8, 16, 2048, 16, 2048, 128) | noop-compiled]_speedup | 1 | 1 | 11.371434211731 | 13.560061454773 | -0.16140245752881 | -2.188627243042 | |
tritonbench_int4_gemm_fwd[x_(16, 1, 8192, 3584)-triton]_tflops | 1 | 1 | 15.734259605408 | 18.66505241394 | -0.15702033637708 | -2.9307928085327 | |
tritonbench_gemm_fwd[x_(256, 256, 256)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.87671238183975 | 1.034653544426 | -0.15265125552132 | -0.15794116258621 | |
tritonbench_flex_attention_fwd[x_ (8, 16, 4096, 16, 4096, 128) | noop-compiled]_speedup | 1 | 1 | 25.154708862305 | 29.665010452271 | -0.15204112593261 | -4.5103015899658 | |
tritonbench_flex_attention_fwd[x_ (8, 16, 4096, 16, 4096, 128) | noop-compiled]_tflops | 1 | 1 | 401.48468017578 | 473.37594604492 | -0.15186928374751 | -71.891265869141 | |
tritonbench_int4_gemm_fwd[x_(4, 1, 7168, 8192)-triton]_speedup | 1 | 1 | 0.27478969097137 | 0.32345753908157 | -0.15046131943125 | -0.048667848110199 | |
tritonbench_int4_gemm_fwd[x_(4, 1, 8192, 3584)-triton]_tflops | 1 | 1 | 3.9804947376251 | 4.6692314147949 | -0.14750536351389 | -0.6887366771698 | |
tritonbench_flex_attention_bwd[x_ (8, 16, 1024, 16, 1024, 128) | noop-compiled]_tflops | 1 | 1 | 290.67184448242 | 338.86947631836 | -0.14223066757024 | -48.197631835938 | |
tritonbench_flex_attention_bwd[x_ (8, 16, 1024, 16, 1024, 128) | noop-compiled]_speedup | 1 | 1 | 10.773795127869 | 12.553303718567 | -0.1417561966629 | -1.7795085906982 | |
tritonbench_flex_attention_fwd[x_ (8, 16, 2048, 16, 2048, 128) | noop-compiled]_tflops | 1 | 1 | 370.63174438477 | 431.04516601562 | -0.14015566440355 | -60.413421630859 | |
tritonbench_flex_attention_fwd[x_ (8, 16, 2048, 16, 2048, 128) | noop-compiled]_speedup | 1 | 1 | 23.245239257812 | 27.030340194702 | -0.14003156858646 | -3.7851009368896 | |
tritonbench_flex_attention_bwd[x_average-compiled]_tflops | 1 | 1 | 168.55917358398 | 194.89813232422 | -0.13514218133409 | -26.338958740234 | |
tritonbench_flex_attention_bwd[compiled]-tflops-avg | 1 | 1 | 216.71894836426 | 250.58331298828 | -0.13514213783903 | -33.864364624023 | |
tritonbench_flex_attention_bwd[x_ (8, 16, 128, 16, 128, 128) | noop-compiled]_speedup | 1 | 1 | 20.280294418335 | 23.408653259277 | -0.13364112861566 | -3.1283588409424 | |
tritonbench_flex_attention_fwd[x_ (8, 16, 1024, 16, 1024, 128) | noop-compiled]_speedup | 1 | 1 | 27.321741104126 | 31.118991851807 | -0.1220235785839 | -3.7972507476807 | |
tritonbench_flex_attention_bwd[x_ (8, 16, 512, 16, 512, 128) | noop-compiled]_speedup | 1 | 1 | 12.472326278687 | 14.193314552307 | -0.12125344416755 | -1.7209882736206 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(4, 4096, 2304)-_triton]_speedup | 1 | 1 | 0.34353530406952 | 0.38920053839684 | -0.11733086114273 | -0.045665234327316 | |
tritonbench_int4_gemm_fwd[x_(4, 1, 8192, 3584)-triton]_speedup | 1 | 1 | 0.35412150621414 | 0.40076339244843 | -0.11638260158776 | -0.046641886234283 | |
tritonbench_flex_attention_bwd[compiled]-speedup-avg | 1 | 1 | 14.457441329956 | 16.330188751221 | -0.1146800842167 | -1.8727474212646 | |
tritonbench_flex_attention_bwd[x_average-compiled]_speedup | 1 | 1 | 11.244676589966 | 12.701257705688 | -0.11468006944464 | -1.4565811157227 | |
tritonbench_flex_attention_fwd[x_ (8, 16, 1024, 16, 1024, 128) | noop-compiled]_tflops | 1 | 1 | 315.00180053711 | 354.81768798828 | -0.11221505803985 | -39.815887451172 | |
tritonbench_flex_attention_bwd[x_ (8, 16, 512, 16, 512, 128) | noop-compiled]_tflops | 1 | 1 | 227.87390136719 | 256.38534545898 | -0.11120543586746 | -28.511444091797 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(4, 4096, 2304)-_triton]_tflops | 1 | 1 | 1.4736390113831 | 1.6544852256775 | -0.1093066359782 | -0.18084621429443 | |
tritonbench_low_mem_dropout_fwd[x_8192-triton_dropout]_speedup | 1 | 1 | 1.1655173301697 | 1.3066666126251 | -0.10802241451006 | -0.14114928245544 | |
tritonbench_int4_gemm_fwd[x_(1, 1, 8192, 1024)-triton]_speedup | 1 | 1 | 0.43308272957802 | 0.48513305187225 | -0.10729081865966 | -0.052050322294235 | |
tritonbench_int4_gemm_fwd[x_(16, 1, 8192, 1024)-triton]_speedup | 1 | 1 | 0.7222221493721 | 0.80787402391434 | -0.10602132511605 | -0.085651874542236 | |
tritonbench_low_mem_dropout_fwd[x_2048-torch_dropout]_tflops | 1 | 1 | 0.00067368417512625 | 0.00075294118141755 | -0.10526321078903 | -0.0000792570062913 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(64, 4096, 2048)-_triton]_speedup | 1 | 1 | 0.34268927574158 | 0.38288614153862 | -0.10498386187474 | -0.040196865797043 | |
tritonbench_int4_gemm_fwd[x_(4, 1, 8192, 1024)-triton]_speedup | 1 | 1 | 0.44493392109871 | 0.49592167139053 | -0.10281412011872 | -0.050987750291824 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(64, 4096, 2048)-_triton]_tflops | 1 | 1 | 21.902370452881 | 24.332437515259 | -0.099869446324646 | -2.4300670623779 | |
tritonbench_int4_gemm_fwd[x_(4, 1, 8192, 1024)-triton]_tflops | 1 | 1 | 3.0795183181763 | 3.4211287498474 | -0.099853135222221 | -0.34161043167114 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(8, 2304, 2304)-_triton]_speedup | 1 | 1 | 0.32127264142036 | 0.35587656497955 | -0.097235746785341 | -0.034603923559189 | |
tritonbench_flex_attention_fwd[compiled]-tflops-avg | 1 | 1 | 218.37632751465 | 241.76184082031 | -0.096729546839632 | -23.385513305664 | |
tritonbench_flex_attention_fwd[x_average-compiled]_tflops | 1 | 1 | 169.84826660156 | 188.03698730469 | -0.096729483724671 | -18.188720703125 | |
tritonbench_int4_gemm_fwd[x_(16, 1, 1280, 8192)-triton]_tflops | 1 | 1 | 3.3046832084656 | 3.6497597694397 | -0.094547746364988 | -0.34507656097412 | |
tritonbench_int4_gemm_fwd[x_(4, 1, 1280, 8192)-triton]_tflops | 1 | 1 | 0.82695269584656 | 0.91307556629181 | -0.094321733736688 | -0.086122870445251 | |
tritonbench_int4_gemm_fwd[x_(16, 1, 1280, 8192)-triton]_speedup | 1 | 1 | 0.25590923428535 | 0.28228333592415 | -0.093431309193118 | -0.026374101638794 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(4, 13312, 2048)-_triton]_speedup | 1 | 1 | 0.44009348750114 | 0.484345048666 | -0.091363711235895 | -0.044251561164856 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(4, 13312, 2048)-_triton]_tflops | 1 | 1 | 3.9834856987 | 4.3551077842712 | -0.085330169534133 | -0.37162208557129 | |
tritonbench_gemm_fwd[x_(640, 640, 640)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.77635782957077 | 0.84444439411163 | -0.080628831235822 | -0.068086564540863 | |
tritonbench_low_mem_dropout_fwd[x_524288-triton_dropout]_speedup | 1 | 1 | 1.0486725568771 | 1.1298701763153 | -0.071864556778523 | -0.081197619438171 | |
tritonbench_int4_gemm_fwd[x_(16, 1, 8192, 1024)-triton]_tflops | 1 | 1 | 12.264046669006 | 13.210406303406 | -0.071637435871706 | -0.94635963439941 | |
tritonbench_int4_gemm_fwd[x_(1, 1, 1280, 8192)-triton]_tflops | 1 | 1 | 0.21305592358112 | 0.22946777939796 | -0.07152139555235 | -0.016411855816841 | |
tritonbench_gemm_fwd[x_(896, 896, 896)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.64908254146576 | 0.69907408952713 | -0.071511087036835 | -0.049991548061371 | |
tritonbench_softmax_fwd[x_2304.0-triton_softmax]_speedup | 1 | 1 | 3.6175634860992 | 3.8809525966644 | -0.06786712901146 | -0.26338911056519 | |
tritonbench_gemm_fwd[x_(1024, 1024, 1024)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.64439654350281 | 0.69063180685043 | -0.066946327824775 | -0.046235263347626 | |
tritonbench_int4_gemm_fwd[x_(1, 1, 7168, 8192)-triton]_speedup | 1 | 1 | 0.27390822768211 | 0.29354265332222 | -0.066887811423281 | -0.019634425640106 | |
tritonbench_int4_gemm_fwd[x_(1, 1, 7168, 8192)-triton]_tflops | 1 | 1 | 1.0143769979477 | 1.0870900154114 | -0.066887761301136 | -0.072713017463684 | |
tritonbench_rms_norm_bwd[x_(2048, 2048)-liger_rms]_speedup | 1 | 1 | 0.40108770132065 | 0.42877045273781 | -0.064563104198059 | -0.02768275141716 | |
tritonbench_gemm_fwd[x_(3968, 3968, 3968)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.9412767291069 | 1.0061401128769 | -0.064467545762113 | -0.064863383769989 | |
tritonbench_low_mem_dropout_fwd[x_32-torch_dropout]_tflops | 1 | 1 | 0.000011695906323439 | 0.000012500000593718 | -0.064327538566939 | -8.0409427027917e-7 | |
tritonbench_rms_norm_fwd[x_(2048, 1024)-liger_rms]_speedup | 1 | 1 | 3.7409639358521 | 3.9904458522797 | -0.062519809981907 | -0.24948191642761 | |
tritonbench_int4_gemm_fwd[x_(64, 1, 7168, 8192)-triton]_speedup | 1 | 1 | 1.5582255125046 | 1.6620663404465 | -0.062476945363084 | -0.10384082794189 | |
tritonbench_int4_gemm_fwd[x_(64, 1, 7168, 8192)-triton]_tflops | 1 | 1 | 43.416084289551 | 46.309349060059 | -0.062476904323478 | -2.8932647705078 | |
tritonbench_flex_attention_bwd[x_ (8, 16, 128, 16, 128, 128) | noop-compiled]_tflops | 1 | 1 | 28.159139633179 | 29.99144744873 | -0.06109434426878 | -1.8323078155518 | |
tritonbench_int4_gemm_fwd[x_(64, 1, 8192, 1024)-triton]_speedup | 1 | 1 | 1.6592245101929 | 1.765721321106 | -0.060313487547617 | -0.10649681091309 | |
tritonbench_int4_gemm_fwd[x_(1, 1, 8192, 3584)-triton]_tflops | 1 | 1 | 1.0588620901108 | 1.1250815391541 | -0.058857466537994 | -0.066219449043274 | |
tritonbench_int4_gemm_fwd[x_(4, 1, 1280, 8192)-triton]_speedup | 1 | 1 | 0.15772871673107 | 0.16753743588924 | -0.058546432360687 | -0.0098087191581726 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(16, 4096, 6656)-_triton]_speedup | 1 | 1 | 0.60163551568985 | 0.6378778219223 | -0.056817003173481 | -0.036242306232452 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(16, 4096, 6656)-_triton]_tflops | 1 | 1 | 15.924635887146 | 16.818614959717 | -0.0531541434721 | -0.8939790725708 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(8, 2304, 2304)-_triton]_tflops | 1 | 1 | 1.6557754278183 | 1.7427499294281 | -0.049906472604675 | -0.086974501609802 | |
tritonbench_int4_gemm_fwd[x_(64, 1, 8192, 3584)-triton]_speedup | 1 | 1 | 1.760048866272 | 1.8471986055374 | -0.047179409406325 | -0.087149739265442 | |
tritonbench_int4_gemm_fwd[x_(64, 1, 8192, 1024)-triton]_tflops | 1 | 1 | 39.429412841797 | 41.374145507812 | -0.047003572935383 | -1.9447326660156 | |
tritonbench_int4_gemm_fwd[x_(64, 1, 8192, 3584)-triton]_tflops | 1 | 1 | 47.681900024414 | 49.847415924072 | -0.043442891863376 | -2.1655158996582 | |
tritonbench_embedding_fwd[x_(32, 512, 768, 1024)-liger_embedding]_speedup | 1 | 1 | 0.9772726893425 | 1.0203803777695 | -0.042246685026621 | -0.043107688426971 | |
tritonbench_int4_gemm_fwd[x_(1, 1, 1280, 8192)-triton]_speedup | 1 | 1 | 0.15019506216049 | 0.15651260316372 | -0.040364423538588 | -0.0063175410032272 | |
tritonbench_int4_gemm_fwd[x_(1, 1, 8192, 3584)-tinygemm]_tflops | 1 | 1 | 3.0737152099609 | 3.2024571895599 | -0.040200999413419 | -0.128741979599 | |
tritonbench_softmax_fwd[x_2624.0-triton_softmax]_speedup | 1 | 1 | 3.7012090682983 | 3.8553569316864 | -0.0399827736107 | -0.15414786338806 | |
tritonbench_flex_attention_fwd[x_ (8, 16, 512, 16, 512, 128) | noop-eager]_tflops | 1 | 1 | 3.0194535255432 | 3.1452057361603 | -0.039982189136723 | -0.12575221061707 | |
tritonbench_int4_gemm_fwd[x_(1, 1, 8192, 1024)-triton]_tflops | 1 | 1 | 0.78840303421021 | 0.82048201560974 | -0.039097726445225 | -0.032078981399536 | |
tritonbench_softmax_fwd[x_2560.0-triton_softmax]_speedup | 1 | 1 | 3.7215909957886 | 3.8708415031433 | -0.038557638496316 | -0.14925050735474 | |
tritonbench_int4_gemm_fwd[x_(4, 1, 1280, 8192)-tinygemm]_tflops | 1 | 1 | 5.2428798675537 | 5.4499793052673 | -0.038000041121893 | -0.20709943771362 | |
tritonbench_welford_fwd[x_1024-test_welford]_speedup | 1 | 1 | 0.5749539732933 | 0.59726840257645 | -0.037360806610368 | -0.022314429283142 | |
tritonbench_int4_gemm_fwd[x_average-triton]_speedup | 1 | 1 | 1.4225736856461 | 1.4773001670837 | -0.037044930107681 | -0.054726481437683 | |
tritonbench_int4_gemm_fwd[triton]-speedup-avg | 1 | 1 | 1.4225736856461 | 1.4773001670837 | -0.037044930107681 | -0.054726481437683 | |
tritonbench_rms_norm_bwd[x_(2048, 1024)-liger_rms]_speedup | 1 | 1 | 0.27284902334213 | 0.28308320045471 | -0.036152541359361 | -0.010234177112579 | |
tritonbench_flex_attention_fwd[x_ (8, 16, 256, 16, 256, 128) | noop-eager]_tflops | 1 | 1 | 0.74164092540741 | 0.76901942491531 | -0.035601830878224 | -0.027378499507904 | |
tritonbench_int4_gemm_fwd[x_(4, 1, 8192, 3584)-tinygemm]_tflops | 1 | 1 | 11.240477561951 | 11.6508436203 | -0.035222003807054 | -0.41036605834961 | |
tritonbench_rms_norm_fwd[x_(2048, 2048)-liger_rms]_speedup | 1 | 1 | 3.4962546825409 | 3.6165702342987 | -0.033267859868106 | -0.12031555175781 | |
tritonbench_gemm_fwd[x_(1920, 1920, 1920)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.92721170186996 | 0.95847743749619 | -0.032620210349339 | -0.031265735626221 | |
tritonbench_int4_gemm_fwd[x_(1, 1, 1280, 8192)-tinygemm]_tflops | 1 | 1 | 1.4185281991959 | 1.4661297798157 | -0.032467508180481 | -0.047601580619812 | |
tritonbench_int4_gemm_fwd[x_average-triton]_tflops | 1 | 1 | 38.422355651855 | 39.672805786133 | -0.031519074829702 | -1.2504501342773 | |
tritonbench_int4_gemm_fwd[triton]-tflops-avg | 1 | 1 | 38.422355651855 | 39.672805786133 | -0.031519074829702 | -1.2504501342773 | |
tritonbench_softmax_fwd[x_2816.0-triton_softmax]_speedup | 1 | 1 | 4.0876941680908 | 4.2162551879883 | -0.030491754925964 | -0.12856101989746 | |
tritonbench_int4_gemm_fwd[x_(1, 4096, 1280, 8192)-triton]_tflops | 1 | 1 | 62.10046005249 | 64.004638671875 | -0.02975063462426 | -1.9041786193848 | |
tritonbench_int4_gemm_fwd[x_(1, 4096, 1280, 8192)-triton]_speedup | 1 | 1 | 2.1022531986237 | 2.1666667461395 | -0.029729328532244 | -0.064413547515869 | |
tritonbench_embedding_fwd[x_(32, 512, 768, 32768)-liger_embedding]_speedup | 1 | 1 | 1.0305602550507 | 1.0620155334473 | -0.029618472993991 | -0.031455278396606 | |
tritonbench_addmm_fwd[x_(19747, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 400.54473876953 | 412.10546875 | -0.028052842917942 | -11.560729980469 | |
tritonbench_cross_entropy_fwd[x_(8, 2048, 8192)-liger_cross_entropy_loss]_speedup | 1 | 1 | 0.97025346755981 | 0.9976818561554 | -0.027492119282671 | -0.027428388595581 | |
tritonbench_grouped_gemm_fwd[x_128-triton]_speedup | 1 | 1 | 0.15998917818069 | 0.16427387297153 | -0.026082630873277 | -0.0042846947908401 | |
tritonbench_softmax_fwd[x_3456.0-triton_softmax]_speedup | 1 | 1 | 4.5356521606445 | 4.6566371917725 | -0.025981201915771 | -0.12098503112793 | |
tritonbench_int4_gemm_fwd[x_(1, 4096, 8192, 3584)-triton]_speedup | 1 | 1 | 2.1862864494324 | 2.241589307785 | -0.024671271477159 | -0.055302858352661 | |
tritonbench_softmax_fwd[x_2944.0-triton_softmax]_speedup | 1 | 1 | 4.2317380905151 | 4.338481426239 | -0.024603847576319 | -0.10674333572388 | |
tritonbench_int4_gemm_fwd[x_(1, 4096, 8192, 3584)-triton]_tflops | 1 | 1 | 64.036811828613 | 65.645896911621 | -0.024511586537908 | -1.6090850830078 | |
tritonbench_flex_attention_bwd[x_ (8, 16, 256, 16, 256, 128) | noop-eager]_tflops | 1 | 1 | 5.4229383468628 | 5.5581302642822 | -0.02432327257391 | -0.13519191741943 | |
tritonbench_fp8_gemm_blockwise_fwd[x_average-_triton]_speedup | 1 | 1 | 0.71849012374878 | 0.73637920618057 | -0.024293301985779 | -0.017889082431793 | |
tritonbench_fp8_gemm_blockwise_fwd[_triton]-speedup-avg | 1 | 1 | 0.71849012374878 | 0.73637920618057 | -0.024293301985779 | -0.017889082431793 | |
tritonbench_int4_gemm_fwd[x_(4, 4096, 8192, 3584)-triton]_speedup | 1 | 1 | 2.2025487422943 | 2.2549140453339 | -0.023222749065718 | -0.052365303039551 | |
tritonbench_int4_gemm_fwd[x_(4, 4096, 8192, 3584)-triton]_tflops | 1 | 1 | 64.61498260498 | 66.149406433105 | -0.023196335551048 | -1.534423828125 | |
tritonbench_flex_attention_bwd[x_ (8, 16, 256, 16, 256, 128) | noop-compiled]_speedup | 1 | 1 | 23.340023040771 | 23.890344619751 | -0.023035313543553 | -0.55032157897949 | |
tritonbench_int4_gemm_fwd[x_(16, 4096, 8192, 3584)-triton]_speedup | 1 | 1 | 2.2062826156616 | 2.2581899166107 | -0.022986242462283 | -0.051907300949097 | |
tritonbench_int4_gemm_fwd[x_(16, 4096, 8192, 3584)-triton]_tflops | 1 | 1 | 64.738258361816 | 66.260307312012 | -0.022970749939752 | -1.5220489501953 | |
tritonbench_fused_linear_jsd_fwd[x_(1024, 4096)-liger_lm_head_jsd]_speedup | 1 | 1 | 0.1776294708252 | 0.18172663450241 | -0.022545752241734 | -0.0040971636772156 | |
tritonbench_int4_gemm_fwd[x_(64, 4096, 8192, 3584)-triton]_tflops | 1 | 1 | 64.78099822998 | 66.269020080566 | -0.022454260660213 | -1.4880218505859 | |
tritonbench_int4_gemm_fwd[x_(64, 4096, 8192, 3584)-triton]_speedup | 1 | 1 | 2.2076418399811 | 2.2583112716675 | -0.022436867903063 | -0.050669431686401 | |
tritonbench_softmax_fwd[x_3392.0-triton_softmax]_speedup | 1 | 1 | 4.5094847679138 | 4.6102938652039 | -0.021866089285738 | -0.10080909729004 | |
tritonbench_int4_gemm_fwd[x_(16, 4096, 1280, 8192)-triton]_speedup | 1 | 1 | 2.198029756546 | 2.2459466457367 | -0.021334829694922 | -0.047916889190674 | |
tritonbench_int4_gemm_fwd[x_(1, 4096, 7168, 8192)-triton]_speedup | 1 | 1 | 2.1919093132019 | 2.2393078804016 | -0.021166614744912 | -0.047398567199707 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(16384, 8192, 13312)-_triton]_tflops | 1 | 1 | 1126.2078857422 | 1150.5166015625 | -0.021128522428359 | -24.308715820312 | |
tritonbench_int4_gemm_fwd[x_(1, 4096, 7168, 8192)-triton]_tflops | 1 | 1 | 64.89966583252 | 66.299942016602 | -0.021120322906638 | -1.400276184082 | |
tritonbench_addmm_fwd[x_(35541, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.97830325365067 | 0.99926257133484 | -0.020974785091946 | -0.020959317684174 | |
tritonbench_int4_gemm_fwd[x_(4, 4096, 1280, 8192)-triton]_tflops | 1 | 1 | 64.70858001709 | 66.09228515625 | -0.020935955473304 | -1.3837051391602 | |
tritonbench_int4_gemm_fwd[x_(16, 4096, 1280, 8192)-triton]_tflops | 1 | 1 | 65.200729370117 | 66.590599060059 | -0.020871860436154 | -1.3898696899414 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(16384, 8192, 13312)-_triton]_speedup | 1 | 1 | 1.1740809679031 | 1.1989696025848 | -0.020758353362791 | -0.024888634681702 | |
tritonbench_int4_gemm_fwd[x_(16, 4096, 7168, 8192)-triton]_speedup | 1 | 1 | 2.2063279151917 | 2.2530641555786 | -0.020743413040966 | -0.046736240386963 | |
tritonbench_int4_gemm_fwd[x_(64, 4096, 1280, 8192)-triton]_speedup | 1 | 1 | 2.2030389308929 | 2.2496812343597 | -0.020732849949771 | -0.046642303466797 | |
tritonbench_int4_gemm_fwd[x_(16, 4096, 7168, 8192)-triton]_tflops | 1 | 1 | 65.384864807129 | 66.768981933594 | -0.020729942053654 | -1.3841171264648 | |
tritonbench_int4_gemm_fwd[x_(4, 4096, 7168, 8192)-triton]_speedup | 1 | 1 | 2.2032639980316 | 2.2498755455017 | -0.02071738926328 | -0.046611547470093 | |
tritonbench_int4_gemm_fwd[x_(64, 4096, 1280, 8192)-triton]_tflops | 1 | 1 | 65.363006591797 | 66.745697021484 | -0.02071579879138 | -1.3826904296875 | |
tritonbench_int4_gemm_fwd[x_(4, 4096, 7168, 8192)-triton]_tflops | 1 | 1 | 65.285461425781 | 66.660614013672 | -0.020629161735723 | -1.3751525878906 | |
tritonbench_int4_gemm_fwd[x_(64, 4096, 7168, 8192)-triton]_tflops | 1 | 1 | 65.39958190918 | 66.768783569336 | -0.020506613824025 | -1.3692016601562 | |
tritonbench_int4_gemm_fwd[x_(64, 4096, 7168, 8192)-triton]_speedup | 1 | 1 | 2.2067773342133 | 2.2529644966125 | -0.020500617061981 | -0.046187162399292 | |
tritonbench_int4_gemm_fwd[x_(4, 4096, 1280, 8192)-triton]_speedup | 1 | 1 | 2.1848373413086 | 2.2299630641937 | -0.020236085345856 | -0.045125722885132 | |
tritonbench_int4_gemm_fwd[x_(1, 1, 8192, 3584)-triton]_speedup | 1 | 1 | 0.34448930621147 | 0.35131821036339 | -0.019437945288555 | -0.0068289041519165 | |
tritonbench_gemm_fwd[x_(3200, 3200, 3200)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.77564442157745 | 0.79094076156616 | -0.019339425570152 | -0.015296339988708 | |
tritonbench_embedding_fwd[x_(32, 512, 768, 2048)-liger_embedding]_speedup | 1 | 1 | 1.0112220048904 | 1.0306122303009 | -0.018814278387517 | -0.019390225410461 | |
tritonbench_rope_fwd[x_(8192, 1024)-liger_rotary_pos_emb]_speedup | 1 | 1 | 2.828547000885 | 2.8826596736908 | -0.018771786798024 | -0.054112672805786 | |
tritonbench_addmm_fwd[x_(34238, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.93957775831223 | 0.95723766088486 | -0.018448817147778 | -0.017659902572632 | |
tritonbench_int4_gemm_fwd[x_(4, 1, 7168, 8192)-tinygemm]_tflops | 1 | 1 | 13.617871284485 | 13.862194061279 | -0.017625115888176 | -0.24432277679443 | |
tritonbench_layer_norm_bwd[x_1024-liger_layer_norm]_speedup | 1 | 1 | 0.4849428832531 | 0.49359658360481 | -0.017531929189047 | -0.0086537003517151 | |
tritonbench_softmax_fwd[x_2240.0-triton_softmax]_speedup | 1 | 1 | 3.4922120571136 | 3.5535168647766 | -0.017251869062627 | -0.061304807662964 | |
tritonbench_int4_gemm_fwd[x_(1, 4096, 8192, 1024)-triton]_tflops | 1 | 1 | 62.091121673584 | 63.153854370117 | -0.016827677536592 | -1.0627326965332 | |
tritonbench_int4_gemm_fwd[x_(1, 4096, 8192, 1024)-triton]_speedup | 1 | 1 | 2.2468917369843 | 2.2852900028229 | -0.016802360221763 | -0.038398265838623 | |
tritonbench_int4_gemm_fwd[x_(64, 4096, 8192, 1024)-triton]_speedup | 1 | 1 | 2.2665507793427 | 2.3050208091736 | -0.01668966704241 | -0.038470029830933 | |
tritonbench_int4_gemm_fwd[x_(64, 4096, 8192, 1024)-triton]_tflops | 1 | 1 | 62.818809509277 | 63.872032165527 | -0.016489574866203 | -1.05322265625 | |
tritonbench_addmm_fwd[x_(33961, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.95396971702576 | 0.96959215402603 | -0.016112379762363 | -0.015622437000275 | |
tritonbench_rms_norm_fwd[x_(2048, 8192)-liger_rms]_speedup | 1 | 1 | 4.0878081321716 | 4.1530089378357 | -0.015699654549273 | -0.065200805664062 | |
tritonbench_rms_norm_fwd[liger_rms]-speedup-avg | 1 | 1 | 3.863664150238 | 3.9247057437897 | -0.015553164373718 | -0.061041593551636 | |
tritonbench_rms_norm_fwd[x_average-liger_rms]_speedup | 1 | 1 | 3.863664150238 | 3.9247057437897 | -0.015553164373718 | -0.061041593551636 | |
tritonbench_int4_gemm_fwd[x_(4, 4096, 8192, 1024)-triton]_speedup | 1 | 1 | 2.2667870521545 | 2.3020849227905 | -0.015333001092418 | -0.035297870635986 | |
tritonbench_addmm_fwd[x_(34533, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 419.75738525391 | 426.29138183594 | -0.015327536188723 | -6.5339965820312 | |
tritonbench_int4_gemm_fwd[x_(4, 4096, 8192, 1024)-triton]_tflops | 1 | 1 | 62.791000366211 | 63.758075714111 | -0.015167887943117 | -0.96707534790039 | |
tritonbench_addmm_fwd[x_(33961, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 406.67181396484 | 412.80456542969 | -0.014856307266031 | -6.1327514648438 | |
tritonbench_int4_gemm_fwd[x_(16, 4096, 8192, 1024)-triton]_speedup | 1 | 1 | 2.2711310386658 | 2.3050866127014 | -0.014730715040617 | -0.033955574035645 | |
tritonbench_int4_gemm_fwd[x_(16, 4096, 8192, 1024)-triton]_tflops | 1 | 1 | 62.93949508667 | 63.874828338623 | -0.014643221379705 | -0.93533325195312 | |
tritonbench_welford_fwd[x_2048-test_welford]_speedup | 1 | 1 | 0.56446379423141 | 0.57284736633301 | -0.014634914279626 | -0.008383572101593 | |
tritonbench_flex_attention_fwd[x_ (8, 16, 128, 16, 128, 128) | noop-compiled]_tflops | 1 | 1 | 12.847255706787 | 13.036186218262 | -0.014492774827806 | -0.18893051147461 | |
tritonbench_softmax_fwd[x_4160.0-triton_softmax]_speedup | 1 | 1 | 4.6946330070496 | 4.762912273407 | -0.014335612843144 | -0.068279266357422 | |
tritonbench_kl_div_fwd[x_(8, 512, 131072)-liger_kl_div]_speedup | 1 | 1 | 4.5417404174805 | 4.6071724891663 | -0.01420221878813 | -0.065432071685791 | |
tritonbench_swiglu_bwd[x_(4, 8192, 4096)-liger_swiglu]_speedup | 1 | 1 | 0.91010457277298 | 0.92312401533127 | -0.014103676583061 | -0.013019442558289 | |
tritonbench_addmm_fwd[x_(20224, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.94524836540222 | 0.95875877141953 | -0.014091559232673 | -0.013510406017303 | |
tritonbench_addmm_fwd[x_(34308, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.92780488729477 | 0.9409584403038 | -0.013978888381921 | -0.013153553009033 | |
tritonbench_jsd_fwd[x_(4, 2048, 4096)-liger_jsd]_speedup | 1 | 1 | 4.3638157844543 | 4.4245738983154 | -0.013731969508796 | -0.060758113861084 | |
tritonbench_flex_attention_fwd[x_ (8, 16, 128, 16, 128, 128) | noop-eager]_tflops | 1 | 1 | 0.18656378984451 | 0.1891456246376 | -0.013649984227959 | -0.0025818347930908 | |
tritonbench_softmax_fwd[x_2432.0-triton_softmax]_speedup | 1 | 1 | 3.7875895500183 | 3.8399999141693 | -0.013648532636058 | -0.052410364151001 | |
tritonbench_welford_fwd[x_1536-test_welford]_speedup | 1 | 1 | 0.5701510310173 | 0.57784122228622 | -0.013308485051473 | -0.0076901912689209 | |
tritonbench_gemm_fwd[x_(1536, 1536, 1536)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.71613836288452 | 0.72573834657669 | -0.013227885418281 | -0.0095999836921692 | |
tritonbench_gemm_fwd[x_(384, 384, 384)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.88326847553253 | 0.89495801925659 | -0.013061555371916 | -0.01168954372406 | |
tritonbench_gemm_fwd[x_(2176, 2176, 2176)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.65899455547333 | 0.66747277975082 | -0.012701977570773 | -0.0084782242774963 | |
tritonbench_addmm_fwd[x_(34308, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 411.42822265625 | 416.71337890625 | -0.012682953122052 | -5.28515625 | |
tritonbench_addmm_fwd[x_(34238, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 408.49649047852 | 413.71688842773 | -0.012618285825986 | -5.2203979492188 | |
tritonbench_softmax_fwd[x_4224.0-triton_softmax]_speedup | 1 | 1 | 4.7268881797791 | 4.7869353294373 | -0.01254396508951 | -0.060047149658203 | |
tritonbench_gemm_fwd[x_average-triton_tutorial_matmul]_speedup | 1 | 1 | 0.80918508768082 | 0.81946325302124 | -0.012542557951842 | -0.010278165340424 | |
tritonbench_gemm_fwd[triton_tutorial_matmul]-speedup-avg | 1 | 1 | 0.80918508768082 | 0.81946325302124 | -0.012542557951842 | -0.010278165340424 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(32, 2304, 16384)-_triton]_speedup | 1 | 1 | 0.77092003822327 | 0.78059870004654 | -0.012399023752788 | -0.0096786618232727 | |
tritonbench_cross_entropy_fwd[x_(8, 2048, 32768)-liger_cross_entropy_loss]_speedup | 1 | 1 | 1.540071606636 | 1.5593444108963 | -0.012359555801515 | -0.019272804260254 | |
tritonbench_gemm_fwd[x_(3584, 3584, 3584)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.91369438171387 | 0.92469543218613 | -0.011896944755368 | -0.01100105047226 | |
tritonbench_addmm_fwd[x_(20203, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 400.21801757812 | 404.7844543457 | -0.011281156473658 | -4.5664367675781 | |
tritonbench_addmm_fwd[x_(34579, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 414.27392578125 | 418.86679077148 | -0.010964977628747 | -4.5928649902344 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(4096, 2304, 13312)-_triton]_speedup | 1 | 1 | 1.0860670804977 | 1.0980268716812 | -0.01089207513215 | -0.011959791183472 | |
tritonbench_swiglu_fwd[x_(4, 8192, 4096)-liger_swiglu]_speedup | 1 | 1 | 1.2292054891586 | 1.2425323724747 | -0.010725582376174 | -0.01332688331604 | |
tritonbench_gemm_fwd[x_(1280, 1280, 1280)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.71816635131836 | 0.72580647468567 | -0.010526391860335 | -0.0076401233673096 | |
tritonbench_flex_attention_bwd[x_ (8, 16, 128, 16, 128, 128) | noop-eager]_tflops | 1 | 1 | 1.3940353393555 | 1.4087373018265 | -0.010436269737407 | -0.014701962471008 | |
tritonbench_gemm_fwd[x_(2304, 2304, 2304)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.68742924928665 | 0.69463467597961 | -0.010372972934012 | -0.0072054266929626 | |
tritonbench_addmm_fwd[x_(33887, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 410.78796386719 | 415.0862121582 | -0.010355073633179 | -4.2982482910156 | |
tritonbench_jsd_fwd[x_(4, 2048, 8192)-liger_jsd]_speedup | 1 | 1 | 4.2700200080872 | 4.3146076202393 | -0.010334105920303 | -0.0445876121521 | |
tritonbench_rope_bwd[x_(8192, 4096)-liger_rotary_pos_emb]_speedup | 1 | 1 | 3.6437911987305 | 3.6809666156769 | -0.0100993627022 | -0.037175416946411 | |
tritonbench_gemm_fwd[x_(3840, 3840, 3840)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.83101361989975 | 0.83908796310425 | -0.0096227613307988 | -0.0080743432044983 | |
tritonbench_embedding_fwd[x_(32, 512, 768, 131072)-liger_embedding]_speedup | 1 | 1 | 1.0134600400925 | 1.0232744216919 | -0.0095911530586283 | -0.0098143815994263 | |
tritonbench_addmm_fwd[x_(19747, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 382.10171508789 | 385.7451171875 | -0.0094451023156786 | -3.6434020996094 | |
tritonbench_layer_norm_fwd[x_2560-liger_layer_norm]_speedup | 1 | 1 | 1.2690168619156 | 1.2811017036438 | -0.0094331634200766 | -0.01208484172821 | |
tritonbench_addmm_fwd[x_(35405, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 431.31640625 | 435.41595458984 | -0.0094152460345774 | -4.0995483398438 | |
tritonbench_gemm_fwd[x_(2688, 2688, 2688)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.92427098751068 | 0.9330188035965 | -0.0093758197070577 | -0.0087478160858154 | |
tritonbench_kl_div_fwd[x_(8, 512, 65536)-liger_kl_div]_speedup | 1 | 1 | 4.5090565681458 | 4.5509958267212 | -0.0092154025563357 | -0.041939258575439 | |
tritonbench_cross_entropy_bwd[x_(8, 2048, 4096)-liger_cross_entropy_loss]_speedup | 1 | 1 | 1.4337615966797 | 1.4469134807587 | -0.009089613341693 | -0.013151884078979 | |
tritonbench_addmm_fwd[x_(20120, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 390.24069213867 | 393.8137512207 | -0.0090729667792347 | -3.5730590820312 | |
tritonbench_addmm_fwd[x_(33887, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 429.20040893555 | 433.10528564453 | -0.0090159987384437 | -3.9048767089844 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(16, 13312, 13312)-_triton]_speedup | 1 | 1 | 0.81117695569992 | 0.81849122047424 | -0.0089362776183287 | -0.0073142647743225 | |
tritonbench_softmax_fwd[x_4800.0-triton_softmax]_speedup | 1 | 1 | 4.7331051826477 | 4.7757005691528 | -0.0089191912031208 | -0.042595386505127 | |
tritonbench_cross_entropy_fwd[liger_cross_entropy_loss]-speedup-avg | 1 | 1 | 1.088454246521 | 1.0982412099838 | -0.0089114880901016 | -0.0097869634628296 | |
tritonbench_cross_entropy_fwd[x_average-liger_cross_entropy_loss]_speedup | 1 | 1 | 1.088454246521 | 1.0982412099838 | -0.0089114880901016 | -0.0097869634628296 | |
tritonbench_softmax_fwd[x_4544.0-triton_softmax]_speedup | 1 | 1 | 4.7233815193176 | 4.7651796340942 | -0.0087715716900893 | -0.041798114776611 | |
tritonbench_grouped_gemm_fwd[x_1024-triton]_speedup | 1 | 1 | 0.15219810605049 | 0.15353785455227 | -0.0087258513914011 | -0.0013397485017776 | |
tritonbench_addmm_fwd[x_(35410, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 432.66375732422 | 436.45999145508 | -0.0086977826265437 | -3.7962341308594 | |
tritonbench_addmm_fwd[x_(19735, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 383.07559204102 | 386.43188476562 | -0.0086853410831899 | -3.3562927246094 | |
tritonbench_softmax_fwd[x_7104.0-triton_softmax]_speedup | 1 | 1 | 4.7325577735901 | 4.7731070518494 | -0.0084953632547517 | -0.040549278259277 | |
tritonbench_addmm_fwd[x_(35541, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 426.00680541992 | 429.56735229492 | -0.0082886812882267 | -3.560546875 | |
tritonbench_softmax_fwd[x_5568.0-triton_softmax]_speedup | 1 | 1 | 4.6962127685547 | 4.7350053787231 | -0.008192727793462 | -0.038792610168457 | |
tritonbench_flex_attention_fwd[x_ (8, 16, 256, 16, 256, 128) | noop-compiled]_tflops | 1 | 1 | 51.764122009277 | 52.185131072998 | -0.0080676057540563 | -0.4210090637207 | |
tritonbench_addmm_fwd[x_(19410, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 377.21380615234 | 380.21948242188 | -0.0079051085188641 | -3.0056762695312 | |
tritonbench_softmax_fwd[x_3200.0-triton_softmax]_speedup | 1 | 1 | 4.2482690811157 | 4.2807879447937 | -0.0075964668414674 | -0.032518863677979 | |
tritonbench_addmm_fwd[x_(34579, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.96564328670502 | 0.97289973497391 | -0.0074585777013137 | -0.0072564482688904 | |
tritonbench_kl_div_fwd[x_(8, 512, 16384)-liger_kl_div]_speedup | 1 | 1 | 4.194197177887 | 4.2250413894653 | -0.0073003335908793 | -0.030844211578369 | |
tritonbench_addmm_fwd[x_(20067, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.95089107751846 | 0.95787394046783 | -0.0072899602487994 | -0.0069828629493713 | |
tritonbench_softmax_fwd[x_8192.0-triton_softmax]_speedup | 1 | 1 | 4.6892170906067 | 4.7234978675842 | -0.0072574981377247 | -0.034280776977539 | |
tritonbench_kl_div_fwd[x_(8, 512, 32768)-liger_kl_div]_speedup | 1 | 1 | 4.4066281318665 | 4.4381508827209 | -0.0071026766974552 | -0.031522750854492 | |
tritonbench_flex_attention_bwd[x_ (8, 16, 256, 16, 256, 128) | noop-compiled]_tflops | 1 | 1 | 124.69131469727 | 125.57795715332 | -0.0070604943427466 | -0.88664245605469 | |
tritonbench_layer_norm_fwd[x_4608-liger_layer_norm]_speedup | 1 | 1 | 1.2801905870438 | 1.2891948223114 | -0.0069843867752241 | -0.0090042352676392 | |
tritonbench_addmm_fwd[x_(20120, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.93451678752899 | 0.94108283519745 | -0.0069771197846568 | -0.006566047668457 | |
tritonbench_addmm_fwd[x_(35410, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 424.95455932617 | 427.87881469727 | -0.0068343074502596 | -2.9242553710938 | |
tritonbench_cross_entropy_fwd[x_(8, 2048, 65536)-liger_cross_entropy_loss]_speedup | 1 | 1 | 1.3602063655853 | 1.3695088624954 | -0.0067925788323449 | -0.0093024969100952 | |
tritonbench_softmax_fwd[x_6464.0-triton_softmax]_speedup | 1 | 1 | 4.8188977241516 | 4.8515009880066 | -0.0067202426497653 | -0.03260326385498 | |
tritonbench_softmax_fwd[x_3264.0-triton_softmax]_speedup | 1 | 1 | 4.3702988624573 | 4.3996157646179 | -0.0066635142087665 | -0.029316902160645 | |
tritonbench_addmm_fwd[x_(19735, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.94630879163742 | 0.95260852575302 | -0.0066131405979395 | -0.0062997341156006 | |
tritonbench_cross_entropy_fwd[x_(8, 2048, 131072)-liger_cross_entropy_loss]_speedup | 1 | 1 | 1.2801901102066 | 1.2886205911636 | -0.0065422522461933 | -0.0084304809570312 | |
tritonbench_softmax_fwd[x_4736.0-triton_softmax]_speedup | 1 | 1 | 4.6713008880615 | 4.7011957168579 | -0.0063589840961497 | -0.029894828796387 | |
tritonbench_kl_div_fwd[liger_kl_div]-speedup-avg | 1 | 1 | 4.1495904922485 | 4.1760897636414 | -0.0063454745689461 | -0.026499271392822 | |
tritonbench_kl_div_fwd[x_average-liger_kl_div]_speedup | 1 | 1 | 4.1495904922485 | 4.1760897636414 | -0.0063454745689461 | -0.026499271392822 | |
tritonbench_addmm_fwd[x_(27456, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 406.73596191406 | 409.32583618164 | -0.0063271702850168 | -2.5898742675781 | |
tritonbench_addmm_fwd[x_(34839, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 418.61288452148 | 421.18612670898 | -0.0061095131684571 | -2.5732421875 | |
tritonbench_gemm_fwd[x_(3712, 3712, 3712)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.87552499771118 | 0.88076990842819 | -0.0059549158830488 | -0.0052449107170105 | |
tritonbench_softmax_fwd[x_7424.0-triton_softmax]_speedup | 1 | 1 | 4.7579255104065 | 4.7862777709961 | -0.0059236554889081 | -0.0283522605896 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(32, 8192, 13312)-_triton]_speedup | 1 | 1 | 0.52093267440796 | 0.52389752864838 | -0.005659225475002 | -0.0029648542404175 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(16384, 4096, 16384)-_triton]_speedup | 1 | 1 | 1.2475334405899 | 1.2546100616455 | -0.0056404944228819 | -0.007076621055603 | |
tritonbench_addmm_fwd[x_(36032, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 1.0044182538986 | 1.0101033449173 | -0.0056282270990225 | -0.0056850910186768 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(1, 8192, 16384)-_cutlass]_tflops | 1 | 1 | 3.601806640625 | 3.6220242977142 | -0.0055818667759883 | -0.020217657089233 | |
tritonbench_layer_norm_fwd[x_1536-liger_layer_norm]_speedup | 1 | 1 | 1.2841225862503 | 1.2913165092468 | -0.0055709990114793 | -0.007193922996521 | |
tritonbench_addmm_fwd[x_(20068, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 389.53945922852 | 391.70440673828 | -0.0055269929889049 | -2.1649475097656 | |
tritonbench_addmm_fwd[x_(34516, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 416.66650390625 | 418.92932128906 | -0.0054014299496863 | -2.2628173828125 | |
tritonbench_fused_linear_jsd_fwd[x_(2048, 4096)-liger_lm_head_jsd]_speedup | 1 | 1 | 0.29311120510101 | 0.2946372628212 | -0.0051794457550009 | -0.0015260577201843 | |
tritonbench_addmm_fwd[x_(35656, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 427.69805908203 | 429.90051269531 | -0.0051231704737282 | -2.2024536132812 | |
tritonbench_addmm_fwd[x_(34516, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 430.6223449707 | 432.81887817383 | -0.0050749477758288 | -2.196533203125 | |
tritonbench_softmax_fwd[x_7040.0-triton_softmax]_speedup | 1 | 1 | 4.7460007667542 | 4.7701406478882 | -0.0050606225090491 | -0.024139881134033 | |
tritonbench_addmm_fwd[x_(35901, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 434.55938720703 | 436.70962524414 | -0.0049237248570083 | -2.1502380371094 | |
tritonbench_gemm_fwd[x_(3328, 3328, 3328)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.8518425822258 | 0.85602420568466 | -0.0048849359995817 | -0.0041816234588623 | |
tritonbench_softmax_fwd[x_5120.0-triton_softmax]_speedup | 1 | 1 | 4.7266697883606 | 4.7497782707214 | -0.0048651707603458 | -0.02310848236084 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(16, 13312, 13312)-_triton]_tflops | 1 | 1 | 50.016750335693 | 50.25789642334 | -0.0047981731192095 | -0.24114608764648 | |
tritonbench_softmax_fwd[x_6208.0-triton_softmax]_speedup | 1 | 1 | 4.8063020706177 | 4.8293981552124 | -0.0047823939655501 | -0.023096084594727 | |
tritonbench_addmm_fwd[x_(20203, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 388.78323364258 | 390.61779785156 | -0.0046965709680272 | -1.8345642089844 | |
tritonbench_gemm_fwd[x_(3072, 3072, 3072)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.71186918020248 | 0.71522235870361 | -0.0046883021207656 | -0.0033531785011292 | |
tritonbench_softmax_fwd[x_5632.0-triton_softmax]_speedup | 1 | 1 | 4.7012286186218 | 4.7225432395935 | -0.0045133776209775 | -0.02131462097168 | |
tritonbench_addmm_fwd[x_(35791, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 435.58709716797 | 437.53652954102 | -0.0044554734094816 | -1.9494323730469 | |
tritonbench_addmm_fwd[x_(34533, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 417.38412475586 | 419.23913574219 | -0.0044247085450268 | -1.8550109863281 | |
tritonbench_softmax_fwd[x_2880.0-triton_softmax]_speedup | 1 | 1 | 4.2774724960327 | 4.2964482307434 | -0.004416609648619 | -0.018975734710693 | |
tritonbench_softmax_fwd[x_6720.0-triton_softmax]_speedup | 1 | 1 | 4.775661945343 | 4.7968378067017 | -0.0044145460430323 | -0.021175861358643 | |
tritonbench_addmm_fwd[x_(20224, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 388.88293457031 | 390.56307983398 | -0.0043018537860416 | -1.6801452636719 | |
tritonbench_addmm_fwd[x_(20068, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.93328076601028 | 0.93727666139603 | -0.0042633040491913 | -0.0039958953857422 | |
tritonbench_addmm_fwd[x_(33894, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.94539171457291 | 0.94937020540237 | -0.0041906632489921 | -0.0039784908294678 | |
tritonbench_addmm_fwd[x_(27456, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.97438991069794 | 0.97847175598145 | -0.0041716536614938 | -0.0040818452835083 | |
tritonbench_addmm_fwd[x_(19410, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.94861668348312 | 0.95258963108063 | -0.0041706811284485 | -0.0039729475975037 | |
tritonbench_addmm_fwd[x_(35917, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 1.0036559104919 | 1.0078086853027 | -0.004120598354978 | -0.004152774810791 | |
tritonbench_addmm_fwd[x_(35656, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.96267384290695 | 0.96665042638779 | -0.0041137761617659 | -0.003976583480835 | |
tritonbench_softmax_fwd[x_6400.0-triton_softmax]_speedup | 1 | 1 | 4.8012089729309 | 4.8209953308105 | -0.0041042059827741 | -0.019786357879639 | |
tritonbench_softmax_fwd[x_5504.0-triton_softmax]_speedup | 1 | 1 | 4.7005157470703 | 4.7198567390442 | -0.0040977921668431 | -0.019340991973877 | |
tritonbench_jsd_fwd[liger_jsd]-speedup-avg | 1 | 1 | 1.8437962532043 | 1.8513215780258 | -0.0040648393616721 | -0.0075253248214722 | |
tritonbench_jsd_fwd[x_average-liger_jsd]_speedup | 1 | 1 | 1.8437962532043 | 1.8513215780258 | -0.0040648393616721 | -0.0075253248214722 | |
tritonbench_softmax_fwd[x_7360.0-triton_softmax]_speedup | 1 | 1 | 4.7780427932739 | 4.7974448204041 | -0.0040442418529981 | -0.019402027130127 | |
tritonbench_softmax_fwd[x_5056.0-triton_softmax]_speedup | 1 | 1 | 4.6975698471069 | 4.7166066169739 | -0.0040361156680811 | -0.019036769866943 | |
tritonbench_gemm_fwd[x_(1664, 1664, 1664)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.82526880502701 | 0.82861000299454 | -0.0040322925808938 | -0.0033411979675293 | |
tritonbench_grouped_gemm_fwd[triton]-speedup-avg | 1 | 1 | 0.17178927361965 | 0.17247937619686 | -0.0040010730118934 | -0.00069010257720947 | |
tritonbench_grouped_gemm_fwd[x_average-triton]_speedup | 1 | 1 | 0.17178927361965 | 0.17247937619686 | -0.0040010730118934 | -0.00069010257720947 | |
tritonbench_addmm_fwd[x_(35249, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.98031491041183 | 0.98425197601318 | -0.0040000586204524 | -0.0039370656013489 | |
tritonbench_jsd_bwd[x_(4, 2048, 131072)-liger_jsd]_speedup | 1 | 1 | 5.831259727478 | 5.8544912338257 | -0.003968151188515 | -0.023231506347656 | |
tritonbench_addmm_fwd[x_(36032, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 434.86044311523 | 436.57498168945 | -0.0039272488029063 | -1.7145385742188 | |
tritonbench_softmax_fwd[x_2688.0-triton_softmax]_speedup | 1 | 1 | 3.8132045269012 | 3.8280253410339 | -0.0038716604025112 | -0.01482081413269 | |
tritonbench_softmax_fwd[x_4864.0-triton_softmax]_speedup | 1 | 1 | 4.7352232933044 | 4.7533717155457 | -0.003818010315048 | -0.018148422241211 | |
tritonbench_addmm_fwd[x_(34181, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 426.98458862305 | 428.61807250977 | -0.0038110476237129 | -1.6334838867188 | |
tritonbench_addmm_fwd[x_(35380, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 441.4016418457 | 443.08807373047 | -0.0038060872877194 | -1.6864318847656 | |
tritonbench_addmm_fwd[x_(19410, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 397.64617919922 | 399.14297485352 | -0.0037500237974781 | -1.4967956542969 | |
tritonbench_rope_fwd[liger_rotary_pos_emb]-speedup-avg | 1 | 1 | 2.8306114673615 | 2.8411982059479 | -0.0037261527774666 | -0.010586738586426 | |
tritonbench_rope_fwd[x_average-liger_rotary_pos_emb]_speedup | 1 | 1 | 2.8306114673615 | 2.8411982059479 | -0.0037261527774666 | -0.010586738586426 | |
tritonbench_addmm_fwd[x_(34579, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 429.01342773438 | 430.53436279297 | -0.0035326682142794 | -1.5209350585938 | |
tritonbench_layer_norm_bwd[x_9728-liger_layer_norm]_speedup | 1 | 1 | 0.1488119661808 | 0.14933185279369 | -0.0034814180843948 | -0.00051988661289215 | |
tritonbench_softmax_fwd[x_7872.0-triton_softmax]_speedup | 1 | 1 | 4.7300968170166 | 4.7465920448303 | -0.0034751728520015 | -0.016495227813721 | |
tritonbench_addmm_fwd[x_(35561, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 432.78793334961 | 434.29290771484 | -0.0034653440995692 | -1.5049743652344 | |
tritonbench_rope_fwd[x_(8192, 4096)-liger_rotary_pos_emb]_speedup | 1 | 1 | 2.774352312088 | 2.7839529514313 | -0.0034485637906797 | -0.0096006393432617 | |
tritonbench_addmm_fwd[x_(35884, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 430.32794189453 | 431.80239868164 | -0.0034146563141176 | -1.4744567871094 | |
tritonbench_rope_fwd[x_(2048, 2048)-liger_rotary_pos_emb]_speedup | 1 | 1 | 2.9231371879578 | 2.9330217838287 | -0.0033701065315882 | -0.0098845958709717 | |
tritonbench_jsd_fwd[x_(4, 2048, 32768)-liger_jsd]_speedup | 1 | 1 | 0.59508979320526 | 0.59697759151459 | -0.0031622599175568 | -0.0018877983093262 | |
tritonbench_addmm_fwd[x_average-triton_addmm]_tflops | 1 | 1 | 413.62057495117 | 414.92672729492 | -0.0031479108426332 | -1.30615234375 | |
tritonbench_addmm_fwd[triton_addmm]-tflops-avg | 1 | 1 | 413.62057495117 | 414.92672729492 | -0.0031479108426332 | -1.30615234375 | |
tritonbench_addmm_fwd[x_(35504, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 416.62548828125 | 417.92211914062 | -0.0031025657652226 | -1.296630859375 | |
tritonbench_addmm_fwd[x_(34839, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.97116327285767 | 0.97418242692947 | -0.0030991670434088 | -0.0030191540718079 | |
tritonbench_rms_norm_fwd[x_(2048, 16384)-liger_rms]_speedup | 1 | 1 | 3.9816236495972 | 3.9939901828766 | -0.0030962853470291 | -0.012366533279419 | |
tritonbench_softmax_fwd[x_5184.0-triton_softmax]_speedup | 1 | 1 | 4.7012138366699 | 4.7157759666443 | -0.0030879605132573 | -0.014562129974365 | |
tritonbench_addmm_fwd[x_(34839, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 431.04275512695 | 432.34829711914 | -0.0030196533694864 | -1.3055419921875 | |
tritonbench_addmm_fwd[x_(35791, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 430.68328857422 | 431.9518737793 | -0.0029368670032123 | -1.2685852050781 | |
tritonbench_cross_entropy_fwd[x_(8, 2048, 4096)-liger_cross_entropy_loss]_speedup | 1 | 1 | 0.59793102741241 | 0.59961903095245 | -0.0028151266936238 | -0.0016880035400391 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(2048, 8192, 2048)-_triton]_speedup | 1 | 1 | 1.3148661851883 | 1.3185391426086 | -0.0027856263812407 | -0.0036729574203491 | |
tritonbench_addmm_fwd[x_(35405, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 426.76956176758 | 427.92358398438 | -0.0026967950820841 | -1.1540222167969 | |
tritonbench_kl_div_bwd[x_(8, 512, 32768)-liger_kl_div]_speedup | 1 | 1 | 1.0351914167404 | 1.037954211235 | -0.0026617691461953 | -0.0027627944946289 | |
tritonbench_jsd_bwd[x_(4, 2048, 32768)-liger_jsd]_speedup | 1 | 1 | 5.7887377738953 | 5.8039908409119 | -0.0026280308557836 | -0.015253067016602 | |
tritonbench_addmm_fwd[x_(15168, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 383.63180541992 | 384.62106323242 | -0.0025720323379747 | -0.9892578125 | |
tritonbench_addmm_fwd[x_(20116, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 417.50250244141 | 418.56262207031 | -0.0025327622988948 | -1.0601196289062 | |
tritonbench_addmm_fwd[x_(35504, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.95179003477097 | 0.95403397083282 | -0.0023520504829616 | -0.0022439360618591 | |
tritonbench_fused_linear_cross_entropy_fwd[x_(32768, 4096)-liger_lm_head_ce]_speedup | 1 | 1 | 0.34759637713432 | 0.34840965270996 | -0.0023342509867683 | -0.00081327557563782 | |
tritonbench_rope_bwd[x_(8192, 8192)-liger_rotary_pos_emb]_speedup | 1 | 1 | 3.6788063049316 | 3.6873371601105 | -0.0023135544183807 | -0.008530855178833 | |
tritonbench_fused_linear_jsd_fwd[liger_lm_head_jsd]-speedup-avg | 1 | 1 | 0.32255062460899 | 0.32329457998276 | -0.0023011687167899 | -0.00074395537376404 | |
tritonbench_fused_linear_jsd_fwd[x_average-liger_lm_head_jsd]_speedup | 1 | 1 | 0.32255062460899 | 0.32329457998276 | -0.0023011687167899 | -0.00074395537376404 | |
tritonbench_embedding_fwd[x_(8, 2048, 4096, 4096)-liger_embedding]_speedup | 1 | 1 | 1.163771033287 | 1.1664091348648 | -0.0022617291813859 | -0.0026381015777588 | |
tritonbench_addmm_fwd[x_(35503, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.98901098966599 | 0.99121099710464 | -0.0022195147603144 | -0.0022000074386597 | |
tritonbench_addmm_fwd[x_(27456, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 417.42630004883 | 418.33178710938 | -0.0021645189020985 | -0.90548706054688 | |
tritonbench_geglu_bwd[x_(8, 4096, 4096)-liger_geglu]_speedup | 1 | 1 | 1.001692533493 | 1.0038558244705 | -0.0021549817461278 | -0.002163290977478 | |
tritonbench_addmm_fwd[x_(20120, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 417.58554077148 | 418.46875 | -0.0021105739162497 | -0.88320922851562 | |
tritonbench_addmm_fwd[x_(19735, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 404.81036376953 | 405.65655517578 | -0.0020859798651185 | -0.84619140625 | |
tritonbench_layer_norm_fwd[x_7680-liger_layer_norm]_speedup | 1 | 1 | 1.5593526363373 | 1.5626120567322 | -0.0020858794611592 | -0.0032594203948975 | |
tritonbench_addmm_fwd[x_(34181, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 413.53625488281 | 414.35189819336 | -0.0019684797248503 | -0.81564331054688 | |
tritonbench_addmm_fwd[x_(35380, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 425.73760986328 | 426.57281494141 | -0.0019579425806582 | -0.835205078125 | |
tritonbench_addmm_fwd[x_(35884, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.96121948957443 | 0.96304452419281 | -0.0018950677487184 | -0.0018250346183777 | |
tritonbench_rope_fwd[x_(8192, 8192)-liger_rotary_pos_emb]_speedup | 1 | 1 | 2.78497838974 | 2.7901020050049 | -0.0018363541030765 | -0.0051236152648926 | |
tritonbench_addmm_fwd[triton_addmm]-speedup-avg | 1 | 1 | 0.97238206863403 | 0.97404986619949 | -0.0017122301674015 | -0.0016677975654602 | |
tritonbench_addmm_fwd[x_average-triton_addmm]_speedup | 1 | 1 | 0.97238206863403 | 0.97404986619949 | -0.0017122301674015 | -0.0016677975654602 | |
tritonbench_layer_norm_fwd[x_14336-liger_layer_norm]_speedup | 1 | 1 | 1.5907131433487 | 1.5933464765549 | -0.0016527059524872 | -0.0026333332061768 | |
tritonbench_layer_norm_fwd[x_1024-liger_layer_norm]_speedup | 1 | 1 | 1.2959001064301 | 1.2980251312256 | -0.0016371214581383 | -0.0021250247955322 | |
tritonbench_cross_entropy_bwd[x_(8, 2048, 16384)-liger_cross_entropy_loss]_speedup | 1 | 1 | 1.8119332790375 | 1.8148946762085 | -0.0016317184737173 | -0.0029613971710205 | |
tritonbench_softmax_fwd[x_7232.0-triton_softmax]_speedup | 1 | 1 | 4.7790474891663 | 4.786684513092 | -0.0015954725875276 | -0.0076370239257812 | |
tritonbench_addmm_fwd[x_(35884, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 447.68954467773 | 448.37219238281 | -0.0015225023243531 | -0.68264770507812 | |
tritonbench_cross_entropy_bwd[x_(8, 2048, 32768)-liger_cross_entropy_loss]_speedup | 1 | 1 | 2.0630431175232 | 2.0659890174866 | -0.0014259030122836 | -0.0029458999633789 | |
tritonbench_addmm_fwd[aten_addmm]-tflops-avg | 1 | 1 | 425.37796020508 | 425.97244262695 | -0.001395588921689 | -0.594482421875 | |
tritonbench_addmm_fwd[x_average-aten_addmm]_tflops | 1 | 1 | 425.37796020508 | 425.97244262695 | -0.001395588921689 | -0.594482421875 | |
tritonbench_rms_norm_fwd[x_(2048, 32768)-liger_rms]_speedup | 1 | 1 | 3.5887908935547 | 3.5937695503235 | -0.0013853578252814 | -0.0049786567687988 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(2048, 8192, 2048)-_triton]_tflops | 1 | 1 | 991.45135498047 | 992.82647705078 | -0.0013850578143296 | -1.3751220703125 | |
tritonbench_gemm_fwd[x_(2432, 2432, 2432)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.79625672101974 | 0.79734045267105 | -0.0013591830787911 | -0.0010837316513062 | |
tritonbench_addmm_fwd[x_(33887, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.95710057020187 | 0.95839565992355 | -0.0013513100860484 | -0.0012950897216797 | |
tritonbench_softmax_fwd[x_8384.0-triton_softmax]_speedup | 1 | 1 | 4.7408828735352 | 4.7471313476562 | -0.0013162631626316 | -0.0062484741210938 | |
tritonbench_softmax_fwd[x_6784.0-triton_softmax]_speedup | 1 | 1 | 4.7854399681091 | 4.7915558815002 | -0.0012763940445162 | -0.0061159133911133 | |
tritonbench_addmm_fwd[x_(20068, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 417.38723754883 | 417.91757202148 | -0.0012689929980474 | -0.53033447265625 | |
tritonbench_gemm_fwd[x_(1792, 1792, 1792)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.88695651292801 | 0.88807791471481 | -0.0012627290558896 | -0.0011214017868042 | |
tritonbench_int4_gemm_fwd[x_(16, 1, 1280, 8192)-tinygemm]_tflops | 1 | 1 | 12.91349697113 | 12.929420471191 | -0.0012315710589284 | -0.015923500061035 | |
tritonbench_softmax_fwd[x_4352.0-triton_softmax]_speedup | 1 | 1 | 4.7289991378784 | 4.7348227500916 | -0.001229953584434 | -0.0058236122131348 | |
tritonbench_addmm_fwd[x_(35917, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 430.40875244141 | 430.9338684082 | -0.0012185534841728 | -0.52511596679688 | |
tritonbench_cross_entropy_bwd[liger_cross_entropy_loss]-speedup-avg | 1 | 1 | 1.8443877696991 | 1.8465194702148 | -0.0011544424795581 | -0.0021317005157471 | |
tritonbench_cross_entropy_bwd[x_average-liger_cross_entropy_loss]_speedup | 1 | 1 | 1.8443877696991 | 1.8465194702148 | -0.0011544424795581 | -0.0021317005157471 | |
tritonbench_softmax_fwd[x_6336.0-triton_softmax]_speedup | 1 | 1 | 4.7975540161133 | 4.803083896637 | -0.0011513187449325 | -0.0055298805236816 | |
tritonbench_flex_attention_bwd[x_average-eager]_tflops | 1 | 1 | 13.841172218323 | 13.85685634613 | -0.001131867677332 | -0.015684127807617 | |
tritonbench_flex_attention_bwd[eager]-tflops-avg | 1 | 1 | 17.795793533325 | 17.815958023071 | -0.0011318218037998 | -0.020164489746094 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(4096, 2304, 13312)-_triton]_tflops | 1 | 1 | 979.38598632812 | 980.48657226562 | -0.0011224895563402 | -1.1005859375 | |
tritonbench_softmax_fwd[x_5696.0-triton_softmax]_speedup | 1 | 1 | 4.7087745666504 | 4.7139625549316 | -0.0011005578047756 | -0.00518798828125 | |
tritonbench_rope_bwd[x_(512, 2048)-liger_rotary_pos_emb]_speedup | 1 | 1 | 2.1907494068146 | 2.1930866241455 | -0.001065720480532 | -0.0023372173309326 | |
tritonbench_addmm_fwd[x_(33660, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 438.525390625 | 438.99066162109 | -0.001059865361089 | -0.46527099609375 | |
tritonbench_fused_linear_cross_entropy_bwd[x_(8192, 4096)-liger_lm_head_ce]_speedup | 1 | 1 | 163.07585144043 | 163.24407958984 | -0.001030531397137 | -0.16822814941406 | |
tritonbench_addmm_fwd[x_(35656, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 444.28137207031 | 444.7321472168 | -0.001013587952446 | -0.45077514648438 | |
tritonbench_softmax_fwd[x_8000.0-triton_softmax]_speedup | 1 | 1 | 4.7050309181213 | 4.7097458839417 | -0.0010011083265423 | -0.0047149658203125 | |
tritonbench_swiglu_fwd[x_(4, 2048, 4096)-liger_swiglu]_speedup | 1 | 1 | 1.0251451730728 | 1.0261573791504 | -0.00098640432563448 | -0.0010122060775757 | |
tritonbench_addmm_fwd[x_(35678, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.98730146884918 | 0.9882755279541 | -0.00098561491949103 | -0.00097405910491943 | |
tritonbench_jsd_bwd[x_(4, 2048, 65536)-liger_jsd]_speedup | 1 | 1 | 5.8248076438904 | 5.8303127288818 | -0.00094421778855606 | -0.0055050849914551 | |
tritonbench_softmax_fwd[x_6144.0-triton_softmax]_speedup | 1 | 1 | 4.8020339012146 | 4.8065719604492 | -0.0009441363349931 | -0.0045380592346191 | |
tritonbench_flex_attention_fwd[x_ (8, 16, 128, 16, 128, 128) | noop-compiled]_speedup | 1 | 1 | 68.594589233398 | 68.653251647949 | -0.00085447394176753 | -0.058662414550781 | |
tritonbench_grouped_gemm_fwd[x_512-triton]_speedup | 1 | 1 | 0.19722293317318 | 0.19738560914993 | -0.00082415317638312 | -0.00016267597675323 | |
tritonbench_softmax_fwd[x_8064.0-triton_softmax]_speedup | 1 | 1 | 4.7106871604919 | 4.7144875526428 | -0.00080610927665903 | -0.0038003921508789 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(32, 2304, 16384)-_triton]_tflops | 1 | 1 | 28.586698532104 | 28.608362197876 | -0.00075724942314568 | -0.021663665771484 | |
tritonbench_addmm_fwd[x_(35504, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 437.72833251953 | 438.0578918457 | -0.00075231911650608 | -0.32955932617188 | |
tritonbench_layer_norm_fwd[x_14848-liger_layer_norm]_speedup | 1 | 1 | 1.5790971517563 | 1.5802351236343 | -0.00072012820183022 | -0.0011379718780518 | |
tritonbench_int4_gemm_fwd[x_(4, 4096, 1280, 8192)-tinygemm]_tflops | 1 | 1 | 29.617115020752 | 29.638286590576 | -0.0007143317735159 | -0.021171569824219 | |
tritonbench_softmax_fwd[x_6656.0-triton_softmax]_speedup | 1 | 1 | 4.8014569282532 | 4.8046636581421 | -0.0006674202643679 | -0.003206729888916 | |
tritonbench_rope_bwd[x_(8192, 16384)-liger_rotary_pos_emb]_speedup | 1 | 1 | 3.9488558769226 | 3.9514882564545 | -0.00066617420096354 | -0.0026323795318604 | |
tritonbench_softmax_fwd[x_6080.0-triton_softmax]_speedup | 1 | 1 | 4.6886825561523 | 4.6915149688721 | -0.00060373093521378 | -0.0028324127197266 | |
tritonbench_flex_attention_fwd[x_average-eager]_tflops | 1 | 1 | 5.9016289710999 | 5.9051160812378 | -0.00059052355448506 | -0.0034871101379395 | |
tritonbench_flex_attention_fwd[eager]-tflops-avg | 1 | 1 | 7.5878086090088 | 7.5922918319702 | -0.00059049665906512 | -0.0044832229614258 | |
tritonbench_softmax_fwd[x_8128.0-triton_softmax]_speedup | 1 | 1 | 4.7288055419922 | 4.7313995361328 | -0.00054825091832029 | -0.002593994140625 | |
tritonbench_swiglu_fwd[x_(4, 4096, 4096)-liger_swiglu]_speedup | 1 | 1 | 1.027358174324 | 1.0279189348221 | -0.00054552988475101 | -0.00056076049804688 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(16384, 4096, 16384)-_triton]_tflops | 1 | 1 | 1160.9588623047 | 1161.4886474609 | -0.00045612598746284 | -0.52978515625 | |
tritonbench_softmax_fwd[x_7936.0-triton_softmax]_speedup | 1 | 1 | 4.71284532547 | 4.7148680686951 | -0.00042901374876805 | -0.0020227432250977 | |
tritonbench_softmax_fwd[x_5760.0-triton_softmax]_speedup | 1 | 1 | 4.6898474693298 | 4.6916513442993 | -0.00038448615148571 | -0.0018038749694824 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(16384, 8192, 13312)-_cutlass]_tflops | 1 | 1 | 959.22509765625 | 959.58782958984 | -0.00037800805971955 | -0.36273193359375 | |
tritonbench_softmax_fwd[x_5440.0-triton_softmax]_speedup | 1 | 1 | 4.6911587715149 | 4.6928339004517 | -0.00035695466157589 | -0.0016751289367676 | |
tritonbench_addmm_fwd[x_(34516, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.9675914645195 | 0.96790915727615 | -0.00032822579915131 | -0.00031769275665283 | |
tritonbench_rope_fwd[x_(8192, 2048)-liger_rotary_pos_emb]_speedup | 1 | 1 | 2.7579574584961 | 2.7588572502136 | -0.000326146529495 | -0.0008997917175293 | |
tritonbench_layer_norm_fwd[x_15360-liger_layer_norm]_speedup | 1 | 1 | 1.5953041315079 | 1.5957839488983 | -0.00030067816559575 | -0.00047981739044189 | |
tritonbench_jsd_bwd[x_(4, 2048, 16384)-liger_jsd]_speedup | 1 | 1 | 6.0247311592102 | 6.0264863967896 | -0.00029125388556104 | -0.0017552375793457 | |
tritonbench_addmm_fwd[x_(33660, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 470.16900634766 | 470.30261230469 | -0.00028408508380705 | -0.13360595703125 | |
tritonbench_rope_fwd[x_(8192, 16384)-liger_rotary_pos_emb]_speedup | 1 | 1 | 3.0554838180542 | 3.0563411712646 | -0.00028051619973253 | -0.00085735321044922 | |
tritonbench_addmm_fwd[x_(35916, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 438.41061401367 | 438.51943969727 | -0.00024816615580116 | -0.10882568359375 | |
tritonbench_addmm_fwd[x_(35678, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 428.37994384766 | 428.48458862305 | -0.00024422062816053 | -0.10464477539062 | |
tritonbench_geglu_fwd[x_(8, 1024, 4096)-liger_geglu]_speedup | 1 | 1 | 1.004363656044 | 1.0046082735062 | -0.00024349536890082 | -0.0002446174621582 | |
tritonbench_jsd_bwd[x_average-liger_jsd]_speedup | 1 | 1 | 5.909158706665 | 5.9105486869812 | -0.00023516942161795 | -0.0013899803161621 | |
tritonbench_jsd_bwd[liger_jsd]-speedup-avg | 1 | 1 | 5.909158706665 | 5.9105486869812 | -0.00023516942161795 | -0.0013899803161621 | |
tritonbench_kl_div_fwd[x_(8, 512, 4096)-liger_kl_div]_speedup | 1 | 1 | 3.3659336566925 | 3.3666036128998 | -0.00019900062030122 | -0.00066995620727539 | |
tritonbench_flex_attention_bwd[x_ (8, 16, 4096, 16, 4096, 128) | noop-eager]_tflops | 1 | 1 | 29.741235733032 | 29.746559143066 | -0.00017895885062123 | -0.0053234100341797 | |
tritonbench_flex_attention_fwd[x_ (8, 16, 2048, 16, 2048, 128) | noop-eager]_tflops | 1 | 1 | 15.882374763489 | 15.884665489197 | -0.00014420988024996 | -0.0022907257080078 | |
tritonbench_softmax_fwd[x_7552.0-triton_softmax]_speedup | 1 | 1 | 4.7670950889587 | 4.7675132751465 | -0.000087715788841992 | -0.00041818618774414 | |
tritonbench_flex_attention_bwd[x_ (8, 16, 2048, 16, 2048, 128) | noop-eager]_tflops | 1 | 1 | 28.888723373413 | 28.89012336731 | -0.000048459256427701 | -0.0013999938964844 | |
tritonbench_int4_gemm_fwd[x_(1, 4096, 8192, 1024)-tinygemm]_tflops | 1 | 1 | 27.634229660034 | 27.634943008423 | -0.000025813275187666 | -0.00071334838867188 | |
tritonbench_int4_gemm_fwd[x_(1, 4096, 1280, 8192)-tinygemm]_tflops | 1 | 1 | 29.539953231812 | 29.540601730347 | -0.000021952786915984 | -0.00064849853515625 | |
tritonbench_int4_gemm_fwd[x_(64, 4096, 8192, 3584)-tinygemm]_tflops | 1 | 1 | 29.343978881836 | 29.344501495361 | -0.000017809589488962 | -0.00052261352539062 | |
tritonbench_geglu_bwd[x_(8, 1024, 4096)-liger_geglu]_speedup | 1 | 1 | 1.0050340890884 | 1.0050485134125 | -0.000014351868435355 | -0.000014424324035645 | |
tritonbench_int4_gemm_fwd[x_(64, 4096, 7168, 8192)-tinygemm]_tflops | 1 | 1 | 29.635786056519 | 29.635967254639 | -0.0000061141287733346 | -0.00018119812011719 | |
tritonbench_layer_norm_bwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_kl_div_fwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_jsd_fwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_kl_div_bwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_layer_norm_fwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_jsd_bwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_int4_gemm_fwd[x_(16, 1, 7168, 8192)-tinygemm]_tflops | 1 | 1 | 25.10485458374 | 25.10485458374 | 0 | 0 | |
tritonbench_int4_gemm_fwd[x_(1, 1, 7168, 8192)-tinygemm]_tflops | 1 | 1 | 3.7033462524414 | 3.7033462524414 | 0 | 0 | |
tritonbench_int4_gemm_fwd[x_(64, 1, 7168, 8192)-tinygemm]_tflops | 1 | 1 | 27.862516403198 | 27.862516403198 | 0 | 0 | |
tritonbench_int4_gemm_fwd[x_(64, 1, 1280, 8192)-tinygemm]_tflops | 1 | 1 | 21.575637817383 | 21.575637817383 | 0 | 0 | |
tritonbench_welford_fwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_swiglu_fwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_swiglu_bwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_rms_norm_bwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_rms_norm_fwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_low_mem_dropout_fwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_rope_bwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_rope_fwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_softmax_fwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_bf16_flex_attention_bwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_bf16_flex_attention_fwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_bf16_ragged_attention_bwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_cross_entropy_bwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_cross_entropy_fwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_embedding_bwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_embedding_fwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_addmm_fwd[x_(35249, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 426.45736694336 | 426.45736694336 | 0 | 0 | |
tritonbench_fused_linear_jsd_fwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_fused_linear_cross_entropy_fwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_geglu_bwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_fused_linear_cross_entropy_bwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_geglu_fwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_int4_gemm_fwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_fp16_addmm_fwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_fp16_gemm_fwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_fp16_grouped_gemm_fwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_fp8_gemm_blockwise_fwd-pass | 1 | 1 | 1 | 1 | 0 | 0 | |
tritonbench_int4_gemm_fwd[x_(16, 4096, 7168, 8192)-tinygemm]_tflops | 1 | 1 | 29.635152816772 | 29.634744644165 | 0.000013773447766227 | 0.00040817260742188 | |
tritonbench_int4_gemm_fwd[x_(16, 4096, 8192, 3584)-tinygemm]_tflops | 1 | 1 | 29.342685699463 | 29.342220306396 | 0.00001586086743084 | 0.00046539306640625 | |
tritonbench_int4_gemm_fwd[x_(64, 4096, 1280, 8192)-tinygemm]_tflops | 1 | 1 | 29.669473648071 | 29.668956756592 | 0.000017421963425706 | 0.00051689147949219 | |
tritonbench_int4_gemm_fwd[x_(4, 4096, 8192, 3584)-tinygemm]_tflops | 1 | 1 | 29.336462020874 | 29.335664749146 | 0.000027177557942976 | 0.00079727172851562 | |
tritonbench_int4_gemm_fwd[x_(1, 4096, 7168, 8192)-tinygemm]_tflops | 1 | 1 | 29.608737945557 | 29.60733795166 | 0.000047285368876126 | 0.0013999938964844 | |
tritonbench_int4_gemm_fwd[x_(16, 4096, 8192, 1024)-tinygemm]_tflops | 1 | 1 | 27.712841033936 | 27.710382461548 | 0.000088723870596407 | 0.0024585723876953 | |
tritonbench_int4_gemm_fwd[x_(4, 4096, 7168, 8192)-tinygemm]_tflops | 1 | 1 | 29.631248474121 | 29.628580093384 | 0.00009006104001253 | 0.0026683807373047 | |
tritonbench_softmax_fwd[x_5824.0-triton_softmax]_speedup | 1 | 1 | 4.7046537399292 | 4.7039470672607 | 0.00015022972375166 | 0.00070667266845703 | |
tritonbench_softmax_fwd[x_average-triton_softmax]_speedup | 1 | 1 | 4.6013903617859 | 4.6006407737732 | 0.00016293121970498 | 0.00074958801269531 | |
tritonbench_softmax_fwd[triton_softmax]-speedup-avg | 1 | 1 | 4.6013903617859 | 4.6006407737732 | 0.00016293121970498 | 0.00074958801269531 | |
tritonbench_int4_gemm_fwd[x_(1, 4096, 8192, 3584)-tinygemm]_tflops | 1 | 1 | 29.290222167969 | 29.285425186157 | 0.00016380099592308 | 0.0047969818115234 | |
tritonbench_int4_gemm_fwd[x_(4, 4096, 8192, 1024)-tinygemm]_tflops | 1 | 1 | 27.700441360474 | 27.695796966553 | 0.00016769309532805 | 0.0046443939208984 | |
tritonbench_flex_attention_fwd[x_ (8, 16, 4096, 16, 4096, 128) | noop-eager]_tflops | 1 | 1 | 15.898512840271 | 15.895292282104 | 0.00020261081767774 | 0.0032205581665039 | |
tritonbench_int4_gemm_fwd[x_(64, 4096, 8192, 1024)-tinygemm]_tflops | 1 | 1 | 27.71559715271 | 27.709959030151 | 0.00020346917700091 | 0.0056381225585938 | |
tritonbench_addmm_fwd[x_(35503, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 426.27874755859 | 426.17468261719 | 0.00024418377170407 | 0.10406494140625 | |
tritonbench_cross_entropy_bwd[x_(8, 2048, 131072)-liger_cross_entropy_loss]_speedup | 1 | 1 | 2.1176381111145 | 2.1171009540558 | 0.00025372293073071 | 0.00053715705871582 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(1, 8192, 16384)-_triton]_tflops | 1 | 1 | 2.4049909114838 | 2.404301404953 | 0.0002867804050446 | 0.00068950653076172 | |
tritonbench_rope_fwd[x_(512, 2048)-liger_rotary_pos_emb]_speedup | 1 | 1 | 2.7601881027222 | 2.759259223938 | 0.00033664063750197 | 0.00092887878417969 | |
tritonbench_addmm_fwd[x_(20211, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.97277027368546 | 0.97239744663239 | 0.0003834101522595 | 0.00037282705307007 | |
tritonbench_geglu_bwd[liger_geglu]-speedup-avg | 1 | 1 | 1.0043283700943 | 1.0039280653 | 0.00039873852335417 | 0.00040030479431152 | |
tritonbench_geglu_bwd[x_average-liger_geglu]_speedup | 1 | 1 | 1.0043283700943 | 1.0039280653 | 0.00039873852335417 | 0.00040030479431152 | |
tritonbench_addmm_fwd[x_(20211, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 403.13772583008 | 402.97424316406 | 0.00040569011243001 | 0.16348266601562 | |
tritonbench_softmax_fwd[x_7616.0-triton_softmax]_speedup | 1 | 1 | 4.748733997345 | 4.7467141151428 | 0.00042553272709318 | 0.0020198822021484 | |
tritonbench_int4_gemm_fwd[x_(16, 4096, 1280, 8192)-tinygemm]_tflops | 1 | 1 | 29.663259506226 | 29.64923286438 | 0.00047308616414675 | 0.014026641845703 | |
tritonbench_addmm_fwd[x_(35844, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.96374332904816 | 0.96328711509705 | 0.00047360121812164 | 0.00045621395111084 | |
tritonbench_addmm_fwd[x_(33894, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 411.78656005859 | 411.58316040039 | 0.00049418848430353 | 0.20339965820312 | |
tritonbench_geglu_bwd[x_(8, 2048, 4096)-liger_geglu]_speedup | 1 | 1 | 1.0048246383667 | 1.0042968988419 | 0.00052548158363318 | 0.00052773952484131 | |
tritonbench_layer_norm_fwd[x_12800-liger_layer_norm]_speedup | 1 | 1 | 1.5407487154007 | 1.5398589372635 | 0.00057783094001342 | 0.00088977813720703 | |
tritonbench_flex_attention_bwd[x_ (8, 16, 512, 16, 512, 128) | noop-eager]_tflops | 1 | 1 | 18.300754547119 | 18.289531707764 | 0.00061362092451524 | 0.011222839355469 | |
tritonbench_softmax_fwd[x_2496.0-triton_softmax]_speedup | 1 | 1 | 3.7682402133942 | 3.7656903266907 | 0.00067713658911833 | 0.0025498867034912 | |
tritonbench_fused_linear_cross_entropy_fwd[x_(8192, 4096)-liger_lm_head_ce]_speedup | 1 | 1 | 0.31438690423965 | 0.31417319178581 | 0.00068023771419638 | 0.00021371245384216 | |
tritonbench_flex_attention_bwd[x_ (8, 16, 1024, 16, 1024, 128) | noop-eager]_tflops | 1 | 1 | 26.981693267822 | 26.961772918701 | 0.00073883676645302 | 0.019920349121094 | |
tritonbench_addmm_fwd[x_(35678, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 433.88967895508 | 433.56793212891 | 0.00074209092123588 | 0.32174682617188 | |
tritonbench_addmm_fwd[x_(33660, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 1.0721590518951 | 1.0713270902634 | 0.0007765710765051 | 0.0008319616317749 | |
tritonbench_addmm_fwd[x_(20211, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 392.16040039062 | 391.85113525391 | 0.00078924139525169 | 0.30926513671875 | |
tritonbench_addmm_fwd[x_(19632, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 390.47985839844 | 390.16418457031 | 0.00080907946092656 | 0.315673828125 | |
tritonbench_int4_gemm_fwd[x_average-tinygemm]_tflops | 1 | 1 | 21.576118469238 | 21.558313369751 | 0.00082590410399579 | 0.017805099487305 | |
tritonbench_int4_gemm_fwd[tinygemm]-tflops-avg | 1 | 1 | 21.576118469238 | 21.558313369751 | 0.00082590410399579 | 0.017805099487305 | |
tritonbench_softmax_fwd[x_6848.0-triton_softmax]_speedup | 1 | 1 | 4.7963123321533 | 4.7923483848572 | 0.00082714088747551 | 0.0039639472961426 | |
tritonbench_softmax_fwd[x_5376.0-triton_softmax]_speedup | 1 | 1 | 4.7339677810669 | 4.7299752235413 | 0.00084409692164214 | 0.0039925575256348 | |
tritonbench_cross_entropy_bwd[x_(8, 2048, 65536)-liger_cross_entropy_loss]_speedup | 1 | 1 | 2.1033871173859 | 2.1014928817749 | 0.00090137617281009 | 0.0018942356109619 | |
tritonbench_softmax_fwd[x_6528.0-triton_softmax]_speedup | 1 | 1 | 4.8149876594543 | 4.810601234436 | 0.00091182469810861 | 0.0043864250183105 | |
tritonbench_softmax_fwd[x_6912.0-triton_softmax]_speedup | 1 | 1 | 4.7793526649475 | 4.774956703186 | 0.00092062861188698 | 0.0043959617614746 | |
tritonbench_kl_div_bwd[x_(8, 512, 4096)-liger_kl_div]_speedup | 1 | 1 | 0.91202181577682 | 0.91116416454315 | 0.0009412697152144 | 0.0008576512336731 | |
tritonbench_layer_norm_fwd[x_3072-liger_layer_norm]_speedup | 1 | 1 | 1.2606941461563 | 1.2594681978226 | 0.00097338570029772 | 0.0012259483337402 | |
tritonbench_kl_div_bwd[x_(8, 512, 16384)-liger_kl_div]_speedup | 1 | 1 | 1.0149542093277 | 1.0138709545135 | 0.0010684346063229 | 0.0010832548141479 | |
tritonbench_addmm_fwd[x_(35561, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 426.97512817383 | 426.45443725586 | 0.001220976668268 | 0.52069091796875 | |
tritonbench_layer_norm_fwd[x_15872-liger_layer_norm]_speedup | 1 | 1 | 1.6051309108734 | 1.6031517982483 | 0.0012345135546644 | 0.0019791126251221 | |
tritonbench_softmax_fwd[x_7680.0-triton_softmax]_speedup | 1 | 1 | 4.7407875061035 | 4.7349338531494 | 0.0012362692142379 | 0.0058536529541016 | |
tritonbench_embedding_fwd[x_average-liger_embedding]_speedup | 1 | 1 | 1.0626790523529 | 1.0613275766373 | 0.0012733822670652 | 0.0013514757156372 | |
tritonbench_embedding_fwd[liger_embedding]-speedup-avg | 1 | 1 | 1.0626790523529 | 1.0613275766373 | 0.0012733822670652 | 0.0013514757156372 | |
tritonbench_addmm_fwd[x_(33961, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 426.29425048828 | 425.75073242188 | 0.0012766109956276 | 0.54351806640625 | |
tritonbench_addmm_fwd[x_(34308, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 443.44262695312 | 442.86053466797 | 0.0013143918673915 | 0.58209228515625 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(2048, 8192, 2048)-_cutlass]_tflops | 1 | 1 | 754.03216552734 | 752.974609375 | 0.0014045044005157 | 1.0575561523438 | |
tritonbench_fused_linear_cross_entropy_bwd[x_(4096, 4096)-liger_lm_head_ce]_speedup | 1 | 1 | 85.001419067383 | 84.879432678223 | 0.0014371725318029 | 0.12198638916016 | |
tritonbench_fused_linear_cross_entropy_fwd[liger_lm_head_ce]-speedup-avg | 1 | 1 | 0.31769463419914 | 0.31723618507385 | 0.00144513503459 | 0.00045844912528992 | |
tritonbench_fused_linear_cross_entropy_fwd[x_average-liger_lm_head_ce]_speedup | 1 | 1 | 0.31769463419914 | 0.31723618507385 | 0.00144513503459 | 0.00045844912528992 | |
tritonbench_softmax_fwd[x_4416.0-triton_softmax]_speedup | 1 | 1 | 4.7143659591675 | 4.7074527740479 | 0.0014685617575903 | 0.0069131851196289 | |
tritonbench_addmm_fwd[x_(35791, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.98874205350876 | 0.98723614215851 | 0.0015253810977358 | 0.0015059113502502 | |
tritonbench_addmm_fwd[x_(20116, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 391.24346923828 | 390.62539672852 | 0.0015822640180131 | 0.61807250976562 | |
tritonbench_layer_norm_fwd[x_12288-liger_layer_norm]_speedup | 1 | 1 | 1.5310435295105 | 1.5285838842392 | 0.0016091006170234 | 0.0024596452713013 | |
tritonbench_softmax_fwd[x_3072.0-triton_softmax]_speedup | 1 | 1 | 4.4426422119141 | 4.4354839324951 | 0.001613866610248 | 0.0071582794189453 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(1, 2304, 2048)-_triton]_speedup | 1 | 1 | 0.32097947597504 | 0.32044562697411 | 0.0016659581407673 | 0.00053384900093079 | |
tritonbench_addmm_fwd[x_(36032, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 432.9475402832 | 432.20822143555 | 0.0017105617408217 | 0.73931884765625 | |
tritonbench_kl_div_bwd[x_(8, 512, 131072)-liger_kl_div]_speedup | 1 | 1 | 1.0521993637085 | 1.0503873825073 | 0.0017250599458331 | 0.0018119812011719 | |
tritonbench_layer_norm_fwd[x_11776-liger_layer_norm]_speedup | 1 | 1 | 1.5060023069382 | 1.5033905506134 | 0.0017372440738718 | 0.0026117563247681 | |
tritonbench_addmm_fwd[x_(34181, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.96850395202637 | 0.96671587228775 | 0.0018496435094062 | 0.0017880797386169 | |
tritonbench_addmm_fwd[x_(35380, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.96451300382614 | 0.9627268910408 | 0.0018552642519504 | 0.0017861127853394 | |
tritonbench_addmm_fwd[x_(35410, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.98218202590942 | 0.98033910989761 | 0.0018798760482001 | 0.0018429160118103 | |
tritonbench_addmm_fwd[x_(35916, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 431.34292602539 | 430.5016784668 | 0.0019541098227301 | 0.84124755859375 | |
tritonbench_softmax_fwd[x_3008.0-triton_softmax]_speedup | 1 | 1 | 4.4129900932312 | 4.403892993927 | 0.0020656949014756 | 0.0090970993041992 | |
tritonbench_kl_div_bwd[x_average-liger_kl_div]_speedup | 1 | 1 | 1.0077267885208 | 1.005628824234 | 0.002086221313716 | 0.0020979642868042 | |
tritonbench_kl_div_bwd[liger_kl_div]-speedup-avg | 1 | 1 | 1.0077267885208 | 1.005628824234 | 0.002086221313716 | 0.0020979642868042 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(8, 8192, 6656)-_triton]_speedup | 1 | 1 | 0.69043576717377 | 0.68897634744644 | 0.0021182435837377 | 0.0014594197273254 | |
tritonbench_addmm_fwd[x_(35916, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.98387885093689 | 0.98171627521515 | 0.002202852062595 | 0.0021625757217407 | |
tritonbench_softmax_fwd[x_3840.0-triton_softmax]_speedup | 1 | 1 | 4.7897810935974 | 4.7790694236755 | 0.0022413714830777 | 0.010711669921875 | |
tritonbench_softmax_fwd[x_4672.0-triton_softmax]_speedup | 1 | 1 | 4.7278776168823 | 4.717125415802 | 0.0022793969064938 | 0.010752201080322 | |
tritonbench_embedding_fwd[x_(8, 2048, 4096, 16384)-liger_embedding]_speedup | 1 | 1 | 1.1008973121643 | 1.0982532501221 | 0.0024075157910459 | 0.0026440620422363 | |
tritonbench_addmm_fwd[x_(35503, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 431.01516723633 | 429.95355224609 | 0.0024691387818253 | 1.0616149902344 | |
tritonbench_cross_entropy_bwd[x_(8, 2048, 8192)-liger_cross_entropy_loss]_speedup | 1 | 1 | 1.5365635156631 | 1.5327256917953 | 0.002503920883131 | 0.0038378238677979 | |
tritonbench_softmax_fwd[x_5248.0-triton_softmax]_speedup | 1 | 1 | 4.7398023605347 | 4.7278161048889 | 0.0025352626624706 | 0.011986255645752 | |
tritonbench_geglu_fwd[liger_geglu]-speedup-avg | 1 | 1 | 0.98137181997299 | 0.97881311178207 | 0.0026140926803274 | 0.002558708190918 | |
tritonbench_geglu_fwd[x_average-liger_geglu]_speedup | 1 | 1 | 0.98137181997299 | 0.97881311178207 | 0.0026140926803274 | 0.002558708190918 | |
tritonbench_fused_linear_cross_entropy_bwd[x_(32768, 4096)-liger_lm_head_ce]_speedup | 1 | 1 | 558.11975097656 | 556.66003417969 | 0.0026222769863946 | 1.459716796875 | |
tritonbench_jsd_bwd[x_(4, 2048, 4096)-liger_jsd]_speedup | 1 | 1 | 5.7843570709229 | 5.7690997123718 | 0.0026446688931907 | 0.015257358551025 | |
tritonbench_embedding_fwd[x_(8, 2048, 4096, 32768)-liger_embedding]_speedup | 1 | 1 | 1.0563923120499 | 1.053574681282 | 0.0026743531501665 | 0.0028176307678223 | |
tritonbench_softmax_fwd[x_5312.0-triton_softmax]_speedup | 1 | 1 | 4.72811460495 | 4.7154197692871 | 0.0026921963014888 | 0.012694835662842 | |
tritonbench_fused_linear_jsd_fwd[x_(8192, 4096)-liger_lm_head_jsd]_speedup | 1 | 1 | 0.44636809825897 | 0.4451567530632 | 0.0027211654938059 | 0.0012113451957703 | |
tritonbench_kl_div_bwd[x_(8, 512, 65536)-liger_kl_div]_speedup | 1 | 1 | 1.0487148761749 | 1.0458581447601 | 0.0027314712125229 | 0.0028567314147949 | |
tritonbench_softmax_fwd[x_3776.0-triton_softmax]_speedup | 1 | 1 | 4.7573251724243 | 4.7442035675049 | 0.0027658182733366 | 0.013121604919434 | |
tritonbench_geglu_fwd[x_(8, 4096, 4096)-liger_geglu]_speedup | 1 | 1 | 0.96102768182755 | 0.95834523439407 | 0.0027990408228697 | 0.0026824474334717 | |
tritonbench_softmax_fwd[x_8256.0-triton_softmax]_speedup | 1 | 1 | 4.7324299812317 | 4.7190856933594 | 0.0028277273903062 | 0.013344287872314 | |
tritonbench_layer_norm_bwd[x_3584-liger_layer_norm]_speedup | 1 | 1 | 1.2481770515442 | 1.2446568012238 | 0.0028282899486617 | 0.0035202503204346 | |
tritonbench_addmm_fwd[x_(35917, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 428.84094238281 | 427.59494018555 | 0.0029139778799182 | 1.2460021972656 | |
tritonbench_embedding_fwd[x_(8, 2048, 4096, 131072)-liger_embedding]_speedup | 1 | 1 | 1.0150814056396 | 1.0121160745621 | 0.0029298330024634 | 0.0029653310775757 | |
tritonbench_kl_div_fwd[x_(8, 512, 8192)-liger_kl_div]_speedup | 1 | 1 | 3.879988193512 | 3.8685746192932 | 0.0029503306364646 | 0.01141357421875 | |
tritonbench_layer_norm_bwd[x_10240-liger_layer_norm]_speedup | 1 | 1 | 0.15419176220894 | 0.15373694896698 | 0.0029583860289585 | 0.00045481324195862 | |
tritonbench_geglu_fwd[x_(8, 2048, 4096)-liger_geglu]_speedup | 1 | 1 | 0.94575214385986 | 0.94280004501343 | 0.0031312035484613 | 0.0029520988464355 | |
tritonbench_layer_norm_bwd[x_9216-liger_layer_norm]_speedup | 1 | 1 | 0.14446890354156 | 0.14401112496853 | 0.0031787722867677 | 0.00045777857303619 | |
tritonbench_addmm_fwd[x_(35901, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 432.11267089844 | 430.7419128418 | 0.0031823187290903 | 1.3707580566406 | |
tritonbench_geglu_bwd[x_(8, 8192, 4096)-liger_geglu]_speedup | 1 | 1 | 1.0057621002197 | 1.0025110244751 | 0.0032429326613452 | 0.0032510757446289 | |
tritonbench_addmm_fwd[x_(15168, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 405.9772644043 | 404.65557861328 | 0.0032661993578463 | 1.3216857910156 | |
tritonbench_int4_gemm_fwd[x_(4, 1, 8192, 1024)-tinygemm]_tflops | 1 | 1 | 6.9212937355042 | 6.8985266685486 | 0.0033002796175834 | 0.022767066955566 | |
tritonbench_softmax_fwd[x_4992.0-triton_softmax]_speedup | 1 | 1 | 4.7355527877808 | 4.7197246551514 | 0.0033536135655962 | 0.015828132629395 | |
tritonbench_layer_norm_fwd[x_5632-liger_layer_norm]_speedup | 1 | 1 | 1.4522352218628 | 1.4473438262939 | 0.0033795670938622 | 0.0048913955688477 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(32, 8192, 13312)-_triton]_tflops | 1 | 1 | 57.791149139404 | 57.592765808105 | 0.00344458767547 | 0.19838333129883 | |
tritonbench_jsd_bwd[x_(4, 2048, 8192)-liger_jsd]_speedup | 1 | 1 | 6.2010579109192 | 6.1789107322693 | 0.0035843176264449 | 0.022147178649902 | |
tritonbench_fused_linear_cross_entropy_fwd[x_(4096, 4096)-liger_lm_head_ce]_speedup | 1 | 1 | 0.27612632513046 | 0.27513551712036 | 0.003601163602836 | 0.00099080801010132 | |
tritonbench_layer_norm_fwd[x_6144-liger_layer_norm]_speedup | 1 | 1 | 1.5052447319031 | 1.499564409256 | 0.0037879817712619 | 0.0056803226470947 | |
tritonbench_fused_linear_jsd_fwd[x_(4096, 4096)-liger_lm_head_jsd]_speedup | 1 | 1 | 0.37309375405312 | 0.37165760993958 | 0.0038641590408283 | 0.0014361441135406 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(16, 4096, 6656)-_cutlass]_tflops | 1 | 1 | 26.468908309937 | 26.366516113281 | 0.0038834177490631 | 0.10239219665527 | |
tritonbench_int4_gemm_fwd[x_(64, 1, 8192, 3584)-tinygemm]_tflops | 1 | 1 | 27.091236114502 | 26.985410690308 | 0.0039215791602662 | 0.10582542419434 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(128, 2304, 6656)-_cutlass]_tflops | 1 | 1 | 121.34855651855 | 120.87033081055 | 0.0039565185666397 | 0.47822570800781 | |
tritonbench_softmax_fwd[x_6016.0-triton_softmax]_speedup | 1 | 1 | 4.7411432266235 | 4.7222995758057 | 0.0039903548081564 | 0.018843650817871 | |
tritonbench_addmm_fwd[x_(35249, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 435.0207824707 | 433.28070068359 | 0.0040160611454976 | 1.7400817871094 | |
tritonbench_flex_attention_fwd[x_average-compiled]_speedup | 1 | 1 | 35.26859664917 | 35.127269744873 | 0.0040232817786103 | 0.14132690429688 | |
tritonbench_flex_attention_fwd[compiled]-speedup-avg | 1 | 1 | 45.34534072876 | 45.163631439209 | 0.0040233542733464 | 0.18170928955078 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(8, 8192, 6656)-_cutlass]_tflops | 1 | 1 | 22.346702575684 | 22.255491256714 | 0.0040983736515908 | 0.091211318969727 | |
tritonbench_addmm_fwd[x_(35605, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.99926996231079 | 0.99518299102783 | 0.0041067535516638 | 0.004086971282959 | |
tritonbench_addmm_fwd[x_(20116, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.93710452318192 | 0.93325436115265 | 0.0041255226758449 | 0.0038501620292664 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(16, 13312, 13312)-_cutlass]_tflops | 1 | 1 | 61.659481048584 | 61.403099060059 | 0.0041753916732219 | 0.25638198852539 | |
tritonbench_softmax_fwd[x_4928.0-triton_softmax]_speedup | 1 | 1 | 4.7366433143616 | 4.7164945602417 | 0.0042719765415865 | 0.020148754119873 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(2048, 13312, 6656)-_cutlass]_tflops | 1 | 1 | 956.03118896484 | 951.93872070312 | 0.0042990879273153 | 4.0924682617188 | |
tritonbench_addmm_fwd[x_(19632, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.97862237691879 | 0.97439938783646 | 0.0043339406151651 | 0.0042229890823364 | |
tritonbench_fused_linear_cross_entropy_fwd[x_(16384, 4096)-liger_lm_head_ce]_speedup | 1 | 1 | 0.33266890048981 | 0.3312264084816 | 0.0043550030168847 | 0.0014424920082092 | |
tritonbench_softmax_fwd[x_7488.0-triton_softmax]_speedup | 1 | 1 | 4.7796564102173 | 4.7587189674377 | 0.0043998065283553 | 0.020937442779541 | |
tritonbench_low_mem_dropout_fwd[x_32768-triton_dropout]_speedup | 1 | 1 | 1.3006535768509 | 1.2948718070984 | 0.0044651290736328 | 0.0057817697525024 | |
tritonbench_softmax_fwd[x_7168.0-triton_softmax]_speedup | 1 | 1 | 4.7505497932434 | 4.7290358543396 | 0.0045493287778874 | 0.021513938903809 | |
tritonbench_fp8_gemm_blockwise_fwd[x_average-_cutlass]_tflops | 1 | 1 | 310.30810546875 | 308.90057373047 | 0.0045565850567484 | 1.4075317382812 | |
tritonbench_fp8_gemm_blockwise_fwd[_cutlass]-tflops-avg | 1 | 1 | 310.30810546875 | 308.90057373047 | 0.0045565850567484 | 1.4075317382812 | |
tritonbench_layer_norm_bwd[x_12800-liger_layer_norm]_speedup | 1 | 1 | 0.17960648238659 | 0.17878346145153 | 0.0046034511714705 | 0.00082302093505859 | |
tritonbench_layer_norm_fwd[x_11264-liger_layer_norm]_speedup | 1 | 1 | 1.4942582845688 | 1.4874007701874 | 0.0046104012575876 | 0.0068575143814087 | |
tritonbench_addmm_fwd[x_(35561, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.98656892776489 | 0.98195117712021 | 0.0047026275361535 | 0.0046177506446838 | |
tritonbench_addmm_fwd[x_(33894, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 435.57244873047 | 433.53283691406 | 0.0047046305210107 | 2.0396118164062 | |
tritonbench_fp8_gemm_blockwise_fwd[x_average-_triton]_tflops | 1 | 1 | 338.03369140625 | 336.43594360352 | 0.0047490401460116 | 1.5977478027344 | |
tritonbench_fp8_gemm_blockwise_fwd[_triton]-tflops-avg | 1 | 1 | 338.03369140625 | 336.43594360352 | 0.0047490401460116 | 1.5977478027344 | |
tritonbench_geglu_fwd[x_(8, 8192, 4096)-liger_geglu]_speedup | 1 | 1 | 1.0143437385559 | 1.00949883461 | 0.0047993160366497 | 0.0048449039459229 | |
tritonbench_softmax_fwd[x_8320.0-triton_softmax]_speedup | 1 | 1 | 4.7263035774231 | 4.7037205696106 | 0.0048010946820273 | 0.0225830078125 | |
tritonbench_layer_norm_bwd[x_11776-liger_layer_norm]_speedup | 1 | 1 | 0.16982352733612 | 0.16898182034492 | 0.0049810505619929 | 0.00084170699119568 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(4096, 13312, 2304)-_cutlass]_tflops | 1 | 1 | 835.38006591797 | 831.22351074219 | 0.0050005264794182 | 4.1565551757812 | |
tritonbench_welford_fwd[x_3072-test_welford]_speedup | 1 | 1 | 0.63485509157181 | 0.6316431760788 | 0.0050850157409296 | 0.0032119154930115 | |
tritonbench_softmax_fwd[x_5888.0-triton_softmax]_speedup | 1 | 1 | 4.7194061279297 | 4.6952924728394 | 0.0051357088466419 | 0.024113655090332 | |
tritonbench_addmm_fwd[x_(19632, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 382.13229370117 | 380.17572021484 | 0.0051464977437866 | 1.9565734863281 | |
tritonbench_layer_norm_fwd[liger_layer_norm]-speedup-avg | 1 | 1 | 1.4466648101807 | 1.4392035007477 | 0.0051843324652193 | 0.0074613094329834 | |
tritonbench_layer_norm_fwd[x_average-liger_layer_norm]_speedup | 1 | 1 | 1.4466648101807 | 1.4392035007477 | 0.0051843324652193 | 0.0074613094329834 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(16384, 4096, 16384)-_cutlass]_tflops | 1 | 1 | 930.60339355469 | 925.77667236328 | 0.0052136993029699 | 4.8267211914062 | |
tritonbench_softmax_fwd[x_7296.0-triton_softmax]_speedup | 1 | 1 | 4.7907791137695 | 4.765398979187 | 0.0053259201786394 | 0.02538013458252 | |
tritonbench_softmax_fwd[x_7744.0-triton_softmax]_speedup | 1 | 1 | 4.7621541023254 | 4.7358260154724 | 0.0055593441919131 | 0.026328086853027 | |
tritonbench_embedding_fwd[x_(32, 512, 768, 4096)-liger_embedding]_speedup | 1 | 1 | 1.0932704210281 | 1.0871613025665 | 0.0056193303120583 | 0.0061091184616089 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(64, 13312, 2048)-_cutlass]_tflops | 1 | 1 | 155.12362670898 | 154.24597167969 | 0.0056899705045098 | 0.87765502929688 | |
tritonbench_softmax_fwd[x_6592.0-triton_softmax]_speedup | 1 | 1 | 4.8254308700562 | 4.7980446815491 | 0.0057077810493083 | 0.02738618850708 | |
tritonbench_layer_norm_fwd[x_9728-liger_layer_norm]_speedup | 1 | 1 | 1.3799800872803 | 1.3721449375153 | 0.0057101473399763 | 0.0078351497650146 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(64, 4096, 2048)-_cutlass]_tflops | 1 | 1 | 63.913204193115 | 63.550060272217 | 0.005714297033597 | 0.36314392089844 | |
tritonbench_softmax_fwd[x_5952.0-triton_softmax]_speedup | 1 | 1 | 4.7139296531677 | 4.6870794296265 | 0.0057285616649769 | 0.02685022354126 | |
tritonbench_addmm_fwd[x_(15168, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 1.058247089386 | 1.0520889759064 | 0.0058532249844259 | 0.0061581134796143 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(1, 8192, 16384)-_triton]_speedup | 1 | 1 | 0.66771787405014 | 0.66380047798157 | 0.0059014661762292 | 0.003917396068573 | |
tritonbench_addmm_fwd[x_(34238, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 434.76602172852 | 432.19869995117 | 0.0059401422948144 | 2.5673217773438 | |
tritonbench_addmm_fwd[x_(20067, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 390.7541809082 | 388.4465637207 | 0.0059406296850632 | 2.3076171875 | |
tritonbench_softmax_fwd[x_6976.0-triton_softmax]_speedup | 1 | 1 | 4.7836909294128 | 4.7551832199097 | 0.0059950811955707 | 0.028507709503174 | |
tritonbench_softmax_fwd[x_6272.0-triton_softmax]_speedup | 1 | 1 | 4.8474683761597 | 4.8185377120972 | 0.0060040339603171 | 0.0289306640625 | |
tritonbench_embedding_fwd[x_(8, 2048, 4096, 65536)-liger_embedding]_speedup | 1 | 1 | 1.02791929245 | 1.0216972827911 | 0.0060898758992642 | 0.0062220096588135 | |
tritonbench_layer_norm_bwd[x_11264-liger_layer_norm]_speedup | 1 | 1 | 0.16335791349411 | 0.16234821081161 | 0.0062193643985812 | 0.0010097026824951 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(8, 8192, 6656)-_triton]_tflops | 1 | 1 | 15.42896270752 | 15.333506584167 | 0.006225329009257 | 0.095456123352051 | |
tritonbench_layer_norm_fwd[x_13824-liger_layer_norm]_speedup | 1 | 1 | 1.5709091424942 | 1.5611670017242 | 0.0062402938053384 | 0.0097421407699585 | |
tritonbench_addmm_fwd[x_(35605, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 426.35815429688 | 423.67538452148 | 0.0063321351048531 | 2.6827697753906 | |
tritonbench_softmax_fwd[x_4608.0-triton_softmax]_speedup | 1 | 1 | 4.73885679245 | 4.7087230682373 | 0.0063995532920408 | 0.030133724212646 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(128, 8192, 2304)-_cutlass]_tflops | 1 | 1 | 247.53269958496 | 245.92008972168 | 0.0065574547614484 | 1.6126098632812 | |
tritonbench_layer_norm_fwd[x_8192-liger_layer_norm]_speedup | 1 | 1 | 1.5726380348206 | 1.5623528957367 | 0.0065831087918281 | 0.010285139083862 | |
tritonbench_layer_norm_fwd[x_10752-liger_layer_norm]_speedup | 1 | 1 | 1.4465304613113 | 1.4370265007019 | 0.0066136293274995 | 0.009503960609436 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(4, 13312, 2048)-_cutlass]_tflops | 1 | 1 | 9.0514535903931 | 8.9917469024658 | 0.0066401655401213 | 0.059706687927246 | |
tritonbench_embedding_fwd[x_(8, 2048, 4096, 8192)-liger_embedding]_speedup | 1 | 1 | 1.1679855585098 | 1.1602795124054 | 0.0066415428541487 | 0.0077060461044312 | |
tritonbench_addmm_fwd[x_(20203, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.97142863273621 | 0.96500200033188 | 0.006659708893989 | 0.0064266324043274 | |
tritonbench_layer_norm_bwd[x_12288-liger_layer_norm]_speedup | 1 | 1 | 0.17285060882568 | 0.17169557511806 | 0.0067272188396496 | 0.0011550337076187 | |
tritonbench_jsd_fwd[x_(4, 2048, 65536)-liger_jsd]_speedup | 1 | 1 | 0.58741188049316 | 0.58348619937897 | 0.006727975945918 | 0.0039256811141968 | |
tritonbench_addmm_fwd[x_(35405, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.98945820331573 | 0.9827926158905 | 0.0067822929450811 | 0.0066655874252319 | |
tritonbench_low_mem_dropout_fwd[x_128-triton_dropout]_speedup | 1 | 1 | 1.1777778863907 | 1.1698113679886 | 0.006810088036499 | 0.0079665184020996 | |
tritonbench_layer_norm_fwd[x_6656-liger_layer_norm]_speedup | 1 | 1 | 1.5326796770096 | 1.5223033428192 | 0.0068162066642725 | 0.010376334190369 | |
tritonbench_softmax_fwd[x_7808.0-triton_softmax]_speedup | 1 | 1 | 4.7610359191895 | 4.7274060249329 | 0.007113815500345 | 0.033629894256592 | |
tritonbench_addmm_fwd[x_(35844, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 447.98620605469 | 444.82025146484 | 0.0071173796143901 | 3.1659545898438 | |
tritonbench_welford_fwd[test_welford]-speedup-avg | 1 | 1 | 0.62961786985397 | 0.62512421607971 | 0.007188417371578 | 0.0044936537742615 | |
tritonbench_welford_fwd[x_average-test_welford]_speedup | 1 | 1 | 0.62961786985397 | 0.62512421607971 | 0.007188417371578 | 0.0044936537742615 | |
tritonbench_jsd_fwd[x_(4, 2048, 131072)-liger_jsd]_speedup | 1 | 1 | 0.58466857671738 | 0.58046960830688 | 0.0072337437660854 | 0.0041989684104919 | |
tritonbench_gemm_fwd[x_(2816, 2816, 2816)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.91614627838135 | 0.90929508209229 | 0.0075346237145569 | 0.0068511962890625 | |
tritonbench_addmm_fwd[x_(35844, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 431.74371337891 | 428.48962402344 | 0.0075943247468012 | 3.2540893554688 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(4096, 13312, 2304)-_triton]_speedup | 1 | 1 | 1.1847976446152 | 1.1757529973984 | 0.0076926422784464 | 0.0090446472167969 | |
tritonbench_embedding_fwd[x_(32, 512, 768, 8192)-liger_embedding]_speedup | 1 | 1 | 1.1233299970627 | 1.1147540807724 | 0.0076931014994276 | 0.0085759162902832 | |
tritonbench_layer_norm_fwd[x_13312-liger_layer_norm]_speedup | 1 | 1 | 1.5643110275269 | 1.5521242618561 | 0.0078516688194817 | 0.012186765670776 | |
tritonbench_welford_fwd[x_2560-test_welford]_speedup | 1 | 1 | 0.62982392311096 | 0.62474364042282 | 0.0081317877597002 | 0.0050802826881409 | |
tritonbench_addmm_fwd[x_(35901, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.99436968564987 | 0.98633480072021 | 0.0081462044366578 | 0.008034884929657 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(1, 2304, 2048)-_cutlass]_tflops | 1 | 1 | 0.60806596279144 | 0.60309201478958 | 0.0082474114726871 | 0.0049739480018616 | |
tritonbench_softmax_fwd[x_4288.0-triton_softmax]_speedup | 1 | 1 | 4.7428908348083 | 4.7028603553772 | 0.00851194303173 | 0.040030479431152 | |
tritonbench_layer_norm_bwd[x_15360-liger_layer_norm]_speedup | 1 | 1 | 0.20112508535385 | 0.19937302172184 | 0.0087878671691893 | 0.0017520636320114 | |
tritonbench_kl_div_bwd[x_(8, 512, 8192)-liger_kl_div]_speedup | 1 | 1 | 0.98327893018723 | 0.97453808784485 | 0.0089692157252742 | 0.0087408423423767 | |
tritonbench_fused_linear_cross_entropy_bwd[x_average-liger_lm_head_ce]_speedup | 1 | 1 | 278.9919128418 | 276.50030517578 | 0.0090112293526462 | 2.4916076660156 | |
tritonbench_fused_linear_cross_entropy_bwd[liger_lm_head_ce]-speedup-avg | 1 | 1 | 278.9919128418 | 276.50030517578 | 0.0090112293526462 | 2.4916076660156 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(4, 4096, 2304)-_cutlass]_tflops | 1 | 1 | 4.2896289825439 | 4.2509841918945 | 0.0090907867225428 | 0.038644790649414 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(32, 8192, 13312)-_cutlass]_tflops | 1 | 1 | 110.93784332275 | 109.9313583374 | 0.0091555767214524 | 1.0064849853516 | |
tritonbench_layer_norm_bwd[x_8704-liger_layer_norm]_speedup | 1 | 1 | 0.14031882584095 | 0.13901317119598 | 0.0093923088994595 | 0.0013056546449661 | |
tritonbench_layer_norm_bwd[x_1536-liger_layer_norm]_speedup | 1 | 1 | 0.628637611866 | 0.62275296449661 | 0.0094494088424637 | 0.0058846473693848 | |
tritonbench_cross_entropy_fwd[x_(8, 2048, 16384)-liger_cross_entropy_loss]_speedup | 1 | 1 | 0.78207284212112 | 0.77467256784439 | 0.0095527769846368 | 0.0074002742767334 | |
tritonbench_layer_norm_bwd[x_14848-liger_layer_norm]_speedup | 1 | 1 | 0.1967901289463 | 0.19489553570747 | 0.0097210704799018 | 0.0018945932388306 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(4096, 2304, 13312)-_cutlass]_tflops | 1 | 1 | 901.77294921875 | 892.953125 | 0.0098771413323068 | 8.81982421875 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(1, 2304, 2048)-_triton]_tflops | 1 | 1 | 0.19517670571804 | 0.19325819611549 | 0.0099271836388257 | 0.0019185096025467 | |
tritonbench_addmm_fwd[x_(20224, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 411.408203125 | 407.36322021484 | 0.0099296713827599 | 4.0449829101562 | |
tritonbench_embedding_fwd[x_(8, 2048, 4096, 2048)-liger_embedding]_speedup | 1 | 1 | 1.0869565010071 | 1.0760413408279 | 0.010143811176195 | 0.010915160179138 | |
tritonbench_gemm_fwd[x_(3456, 3456, 3456)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.92233914136887 | 0.9130026102066 | 0.010226182332764 | 0.009336531162262 | |
tritonbench_softmax_fwd[x_4480.0-triton_softmax]_speedup | 1 | 1 | 4.7839775085449 | 4.7355508804321 | 0.010226186844048 | 0.048426628112793 | |
tritonbench_layer_norm_bwd[x_10752-liger_layer_norm]_speedup | 1 | 1 | 0.1592473089695 | 0.15762677788734 | 0.010280810810658 | 0.0016205310821533 | |
tritonbench_addmm_fwd[x_(35605, 512, 1536)-triton_addmm]_tflops | 1 | 1 | 426.04690551758 | 421.63455200195 | 0.010464876501878 | 4.412353515625 | |
tritonbench_swiglu_bwd[x_(4, 2048, 4096)-liger_swiglu]_speedup | 1 | 1 | 1.0346910953522 | 1.0239633321762 | 0.010476706380848 | 0.010727763175964 | |
tritonbench_gemm_fwd[x_(2048, 2048, 2048)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.96544277667999 | 0.95528894662857 | 0.010629066825548 | 0.010153830051422 | |
tritonbench_layer_norm_fwd[x_10240-liger_layer_norm]_speedup | 1 | 1 | 1.4120078086853 | 1.3971049785614 | 0.010666936524159 | 0.014902830123901 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(64, 13312, 2048)-_triton]_speedup | 1 | 1 | 0.41280093789101 | 0.40843445062637 | 0.010690790793815 | 0.0043664872646332 | |
tritonbench_addmm_fwd[x_(34533, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.99434614181519 | 0.98345673084259 | 0.011072587772383 | 0.010889410972595 | |
tritonbench_flex_attention_fwd[x_ (8, 16, 1024, 16, 1024, 128) | noop-eager]_tflops | 1 | 1 | 11.484484672546 | 11.357600212097 | 0.011171766753515 | 0.12688446044922 | |
tritonbench_layer_norm_fwd[x_9216-liger_layer_norm]_speedup | 1 | 1 | 1.3294117450714 | 1.3143794536591 | 0.011436797319454 | 0.015032291412354 | |
tritonbench_gemm_fwd[x_(2560, 2560, 2560)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.80434787273407 | 0.79514420032501 | 0.011574846933796 | 0.0092036724090576 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(32, 2304, 16384)-_cutlass]_tflops | 1 | 1 | 37.081275939941 | 36.649257659912 | 0.011787913524422 | 0.4320182800293 | |
tritonbench_gemm_fwd[x_(2944, 2944, 2944)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.71892529726028 | 0.71019679307938 | 0.012290261327514 | 0.0087285041809082 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(4096, 13312, 2304)-_triton]_tflops | 1 | 1 | 989.75634765625 | 977.31353759766 | 0.012731646068445 | 12.442810058594 | |
tritonbench_layer_norm_fwd[x_8704-liger_layer_norm]_speedup | 1 | 1 | 1.2826479673386 | 1.2664103507996 | 0.012821765495481 | 0.016237616539001 | |
tritonbench_addmm_fwd[x_(35541, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 435.45474243164 | 429.88433837891 | 0.012957913455839 | 5.5704040527344 | |
tritonbench_addmm_fwd[x_(20067, 512, 1536)-aten_addmm]_tflops | 1 | 1 | 410.93472290039 | 405.52990722656 | 0.013327785639269 | 5.4048156738281 | |
tritonbench_layer_norm_bwd[x_15872-liger_layer_norm]_speedup | 1 | 1 | 0.20774175226688 | 0.2049452662468 | 0.013645038362197 | 0.0027964860200882 | |
tritonbench_swiglu_fwd[liger_swiglu]-speedup-avg | 1 | 1 | 1.0763176679611 | 1.061812877655 | 0.013660401574828 | 0.014504790306091 | |
tritonbench_swiglu_fwd[x_average-liger_swiglu]_speedup | 1 | 1 | 1.0763176679611 | 1.061812877655 | 0.013660401574828 | 0.014504790306091 | |
tritonbench_layer_norm_fwd[x_7168-liger_layer_norm]_speedup | 1 | 1 | 1.5588009357452 | 1.53737616539 | 0.013935932426655 | 0.021424770355225 | |
tritonbench_layer_norm_bwd[x_14336-liger_layer_norm]_speedup | 1 | 1 | 0.19314520061016 | 0.19044855237007 | 0.014159457798605 | 0.0026966482400894 | |
tritonbench_int4_gemm_fwd[x_(64, 1, 8192, 1024)-tinygemm]_tflops | 1 | 1 | 23.763761520386 | 23.431865692139 | 0.014164293727512 | 0.33189582824707 | |
tritonbench_swiglu_bwd[x_average-liger_swiglu]_speedup | 1 | 1 | 1.0506772994995 | 1.0357780456543 | 0.014384600936201 | 0.014899253845215 | |
tritonbench_swiglu_bwd[liger_swiglu]-speedup-avg | 1 | 1 | 1.0506772994995 | 1.0357780456543 | 0.014384600936201 | 0.014899253845215 | |
tritonbench_low_mem_dropout_fwd[x_32768-torch_dropout]_tflops | 1 | 1 | 0.010291457176208 | 0.010138614103198 | 0.015075341802607 | 0.00015284307301044 | |
tritonbench_layer_norm_bwd[x_13824-liger_layer_norm]_speedup | 1 | 1 | 0.18860530853271 | 0.18571864068508 | 0.015543231616304 | 0.0028866678476334 | |
tritonbench_softmax_fwd[x_3712.0-triton_softmax]_speedup | 1 | 1 | 4.7741432189941 | 4.698703289032 | 0.016055478569642 | 0.075439929962158 | |
tritonbench_softmax_fwd[x_3968.0-triton_softmax]_speedup | 1 | 1 | 4.7921810150146 | 4.7158250808716 | 0.016191426279313 | 0.076355934143066 | |
tritonbench_softmax_fwd[x_4096.0-triton_softmax]_speedup | 1 | 1 | 4.8334412574768 | 4.7562065124512 | 0.016238728243495 | 0.077234745025635 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(64, 13312, 2048)-_triton]_tflops | 1 | 1 | 64.035171508789 | 62.999366760254 | 0.016441510475446 | 1.0358047485352 | |
tritonbench_low_mem_dropout_fwd[x_131072-triton_dropout]_speedup | 1 | 1 | 1.1437125205994 | 1.125 | 0.01663335164388 | 0.018712520599365 | |
tritonbench_welford_fwd[x_8192-test_welford]_speedup | 1 | 1 | 0.68730229139328 | 0.6760168671608 | 0.016693998006116 | 0.011285424232483 | |
tritonbench_layer_norm_fwd[x_5120-liger_layer_norm]_speedup | 1 | 1 | 1.3843469619751 | 1.3613030910492 | 0.016927803277184 | 0.023043870925903 | |
tritonbench_softmax_fwd[x_3904.0-triton_softmax]_speedup | 1 | 1 | 4.7625088691711 | 4.682758808136 | 0.017030571998839 | 0.079750061035156 | |
tritonbench_layer_norm_bwd[x_13312-liger_layer_norm]_speedup | 1 | 1 | 0.18425744771957 | 0.1811715811491 | 0.01703284008949 | 0.0030858665704727 | |
tritonbench_layer_norm_fwd[x_3584-liger_layer_norm]_speedup | 1 | 1 | 1.3205400705338 | 1.2983927726746 | 0.017057471610513 | 0.022147297859192 | |
tritonbench_int4_gemm_fwd[x_(16, 1, 8192, 3584)-tinygemm]_tflops | 1 | 1 | 23.027549743652 | 22.636953353882 | 0.017254812680149 | 0.39059638977051 | |
tritonbench_grouped_gemm_fwd[x_256-triton]_speedup | 1 | 1 | 0.17774686217308 | 0.17472016811371 | 0.017323094935452 | 0.0030266940593719 | |
tritonbench_rms_norm_bwd[x_(2048, 4096)-liger_rms]_speedup | 1 | 1 | 0.6856546998024 | 0.67370247840881 | 0.017741097556617 | 0.011952221393585 | |
tritonbench_layer_norm_bwd[x_2560-liger_layer_norm]_speedup | 1 | 1 | 0.97717136144638 | 0.95987576246262 | 0.018018580799865 | 0.017295598983765 | |
tritonbench_softmax_fwd[x_4032.0-triton_softmax]_speedup | 1 | 1 | 4.8149633407593 | 4.7282252311707 | 0.018344749953282 | 0.086738109588623 | |
tritonbench_welford_fwd[x_6144-test_welford]_speedup | 1 | 1 | 0.66931861639023 | 0.65720987319946 | 0.018424469388777 | 0.012108743190765 | |
tritonbench_softmax_fwd[x_3520.0-triton_softmax]_speedup | 1 | 1 | 4.7058825492859 | 4.6194090843201 | 0.018719594516827 | 0.08647346496582 | |
tritonbench_layer_norm_fwd[x_2048-liger_layer_norm]_speedup | 1 | 1 | 1.3134160041809 | 1.2892655134201 | 0.018731976082055 | 0.024150490760803 | |
tritonbench_low_mem_dropout_fwd[x_512-torch_dropout]_tflops | 1 | 1 | 0.00020000000949949 | 0.00019631901523098 | 0.018750064858342 | 0.0000036809942685068 | |
tritonbench_softmax_fwd[x_3136.0-triton_softmax]_speedup | 1 | 1 | 4.2041449546814 | 4.1261353492737 | 0.018906215818016 | 0.078009605407715 | |
tritonbench_addmm_fwd[x_(19747, 512, 1536)-triton_addmm]_speedup | 1 | 1 | 0.95395517349243 | 0.93603491783142 | 0.019144858081286 | 0.017920255661011 | |
tritonbench_low_mem_dropout_fwd[x_32768-triton_dropout]_tflops | 1 | 1 | 0.013385620899498 | 0.01312820520252 | 0.019607836182295 | 0.00025741569697857 | |
tritonbench_rms_norm_fwd[x_(2048, 4096)-liger_rms]_speedup | 1 | 1 | 4.2865428924561 | 4.2004504203796 | 0.020496009584761 | 0.086092472076416 | |
tritonbench_softmax_fwd[x_3584.0-triton_softmax]_speedup | 1 | 1 | 4.7383332252502 | 4.6401305198669 | 0.021163780838242 | 0.098202705383301 | |
tritonbench_welford_fwd[x_4096-test_welford]_speedup | 1 | 1 | 0.60431778430939 | 0.59127241373062 | 0.022063215323131 | 0.013045370578766 | |
tritonbench_low_mem_dropout_fwd[x_524288-triton_dropout]_tflops | 1 | 1 | 0.14499114453793 | 0.14185281097889 | 0.022123872888943 | 0.0031383335590363 | |
tritonbench_softmax_fwd[x_3328.0-triton_softmax]_speedup | 1 | 1 | 4.5794034004211 | 4.4784641265869 | 0.022538814866238 | 0.10093927383423 | |
tritonbench_softmax_fwd[x_3648.0-triton_softmax]_speedup | 1 | 1 | 4.7673664093018 | 4.659574508667 | 0.023133421395938 | 0.10779190063477 | |
tritonbench_gemm_fwd[x_(1408, 1408, 1408)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.71864950656891 | 0.70172679424286 | 0.024115813255084 | 0.01692271232605 | |
tritonbench_embedding_fwd[x_(32, 512, 768, 65536)-liger_embedding]_speedup | 1 | 1 | 1.0256623029709 | 1.0008090734482 | 0.024833137690365 | 0.024853229522705 | |
tritonbench_embedding_fwd[x_(8, 2048, 4096, 1024)-liger_embedding]_speedup | 1 | 1 | 1.0267640352249 | 1.0008952617645 | 0.025845634851726 | 0.025868773460388 | |
tritonbench_welford_fwd[x_5120-test_welford]_speedup | 1 | 1 | 0.68712955713272 | 0.66927117109299 | 0.026683333768238 | 0.017858386039734 | |
tritonbench_rms_norm_bwd[x_(2048, 8192)-liger_rms]_speedup | 1 | 1 | 1.1387900114059 | 1.1091717481613 | 0.026703045126895 | 0.029618263244629 | |
tritonbench_swiglu_bwd[x_(4, 1024, 4096)-liger_swiglu]_speedup | 1 | 1 | 1.0493154525757 | 1.0216253995895 | 0.027103919888122 | 0.027690052986145 | |
tritonbench_embedding_fwd[x_(32, 512, 768, 16384)-liger_embedding]_speedup | 1 | 1 | 1.08231985569 | 1.0529681444168 | 0.027875212967103 | 0.029351711273193 | |
tritonbench_fused_linear_cross_entropy_bwd[x_(16384, 4096)-liger_lm_head_ce]_speedup | 1 | 1 | 309.77059936523 | 301.21765136719 | 0.028394577672411 | 8.5529479980469 | |
tritonbench_layer_norm_fwd[x_4096-liger_layer_norm]_speedup | 1 | 1 | 1.382669210434 | 1.3443751335144 | 0.028484666195401 | 0.038294076919556 | |
tritonbench_flex_attention_fwd[x_ (8, 16, 256, 16, 256, 128) | noop-compiled]_speedup | 1 | 1 | 69.525161743164 | 67.595268249512 | 0.028550718765234 | 1.9298934936523 | |
tritonbench_swiglu_bwd[x_(4, 4096, 4096)-liger_swiglu]_speedup | 1 | 1 | 1.2085978984833 | 1.1743993759155 | 0.029120010849026 | 0.034198522567749 | |
tritonbench_gemm_fwd[x_(1152, 1152, 1152)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.6414048075676 | 0.62298023700714 | 0.0295748877187 | 0.018424570560455 | |
tritonbench_low_mem_dropout_fwd[x_2048-triton_dropout]_tflops | 1 | 1 | 0.00081012659939006 | 0.00078527606092393 | 0.031645608089577 | 0.000024850538466126 | |
tritonbench_welford_fwd[x_7168-test_welford]_speedup | 1 | 1 | 0.67386239767075 | 0.65312790870667 | 0.031746444590218 | 0.020734488964081 | |
tritonbench_gemm_fwd[x_(4096, 4096, 4096)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.93079632520676 | 0.90207272768021 | 0.031841775773908 | 0.02872359752655 | |
tritonbench_rms_norm_bwd[liger_rms]-speedup-avg | 1 | 1 | 0.67494732141495 | 0.65355312824249 | 0.03273520123756 | 0.021394193172455 | |
tritonbench_rms_norm_bwd[x_average-liger_rms]_speedup | 1 | 1 | 0.67494732141495 | 0.65355312824249 | 0.03273520123756 | 0.021394193172455 | |
tritonbench_layer_norm_bwd[x_3072-liger_layer_norm]_speedup | 1 | 1 | 1.1580902338028 | 1.1213291883469 | 0.032783455418768 | 0.036761045455933 | |
tritonbench_low_mem_dropout_fwd[x_8192-triton_dropout]_tflops | 1 | 1 | 0.0035310345701873 | 0.0034133330918849 | 0.034482857410638 | 0.00011770147830248 | |
tritonbench_layer_norm_bwd[x_4096-liger_layer_norm]_speedup | 1 | 1 | 1.3873783349991 | 1.3410683870316 | 0.03453213006537 | 0.046309947967529 | |
tritonbench_rope_bwd[x_(2048, 2048)-liger_rotary_pos_emb]_speedup | 1 | 1 | 2.2819972038269 | 2.2028198242188 | 0.03594364765454 | 0.079177379608154 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(128, 8192, 2304)-_triton]_speedup | 1 | 1 | 0.3853442966938 | 0.37189581990242 | 0.036161946630404 | 0.013448476791382 | |
tritonbench_softmax_fwd[x_2752.0-triton_softmax]_speedup | 1 | 1 | 4.084876537323 | 3.9392590522766 | 0.036965704238778 | 0.14561748504639 | |
tritonbench_int4_gemm_fwd[x_(16, 1, 8192, 1024)-tinygemm]_tflops | 1 | 1 | 16.980989456177 | 16.352062225342 | 0.03846164613172 | 0.62892723083496 | |
tritonbench_low_mem_dropout_fwd[x_average-triton_dropout]_speedup | 1 | 1 | 1.1832580566406 | 1.1369156837463 | 0.040761486147839 | 0.046342372894287 | |
tritonbench_low_mem_dropout_fwd[triton_dropout]-speedup-avg | 1 | 1 | 1.1832580566406 | 1.1369156837463 | 0.040761486147839 | 0.046342372894287 | |
tritonbench_rms_norm_bwd[x_(2048, 32768)-liger_rms]_speedup | 1 | 1 | 0.41984125971794 | 0.40266972780228 | 0.04264420871513 | 0.017171531915665 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(128, 8192, 2304)-_triton]_tflops | 1 | 1 | 95.385314941406 | 91.456657409668 | 0.042956495929437 | 3.9286575317383 | |
tritonbench_low_mem_dropout_fwd[x_average-triton_dropout]_tflops | 1 | 1 | 0.026509527117014 | 0.025263078510761 | 0.049338745700439 | 0.0012464486062527 | |
tritonbench_low_mem_dropout_fwd[triton_dropout]-tflops-avg | 1 | 1 | 0.026509527117014 | 0.025263078510761 | 0.049338745700439 | 0.0012464486062527 | |
tritonbench_layer_norm_bwd[x_2048-liger_layer_norm]_speedup | 1 | 1 | 0.82723355293274 | 0.78804343938828 | 0.049730905157824 | 0.039190113544464 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(8, 2304, 2304)-_cutlass]_tflops | 1 | 1 | 5.1538019180298 | 4.8970627784729 | 0.05242716934026 | 0.25673913955688 | |
tritonbench_softmax_fwd[x_2368.0-triton_softmax]_speedup | 1 | 1 | 3.7730867862701 | 3.5819070339203 | 0.053373733751156 | 0.19117975234985 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(2048, 13312, 6656)-_triton]_speedup | 1 | 1 | 1.1307787895203 | 1.0638449192047 | 0.06291694316272 | 0.066933870315552 | |
tritonbench_gemm_fwd[x_(768, 768, 768)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.74530833959579 | 0.69999998807907 | 0.064726217554743 | 0.045308351516724 | |
tritonbench_gemm_fwd[x_(512, 512, 512)-triton_tutorial_matmul]_speedup | 1 | 1 | 0.84999996423721 | 0.7978338599205 | 0.065384670841006 | 0.052166104316711 | |
tritonbench_fp8_gemm_blockwise_fwd[x_(2048, 13312, 6656)-_triton]_tflops | 1 | 1 | 1081.0598144531 | 1012.7152099609 | 0.067486499481748 | 68.344604492188 | |
tritonbench_softmax_fwd[x_2176.0-triton_softmax]_speedup | 1 | 1 | 3.6578948497772 | 3.4172413349152 | 0.070423330188365 | 0.24065351486206 | |
tritonbench_rope_bwd[liger_rotary_pos_emb]-speedup-avg | 1 | 1 | 3.2496359348297 | 3.0274174213409 | 0.073402006582344 | 0.22221851348877 | |
tritonbench_rope_bwd[x_average-liger_rotary_pos_emb]_speedup | 1 | 1 | 3.2496359348297 | 3.0274174213409 | 0.073402006582344 | 0.22221851348877 | |
tritonbench_int4_gemm_fwd[x_(1, 1, 8192, 1024)-tinygemm]_tflops | 1 | 1 | 1.8204443454742 | 1.6912516355515 | 0.076388815955625 | 0.12919270992279 | |
tritonbench_swiglu_fwd[x_(4, 1024, 4096)-liger_swiglu]_speedup | 1 | 1 | 1.023561835289 | 0.9506431221962 | 0.076704613319397 | 0.072918713092804 | |
tritonbench_embedding_bwd[x_(32, 512, 768, 131072)-liger_embedding]_speedup | 1 | 1 | 1.475198507309 | 1.3675272464752 | 0.078734271007222 | 0.10767126083374 | |
tritonbench_embedding_bwd[x_(8, 2048, 4096, 131072)-liger_embedding]_speedup | 1 | 1 | 1.2366671562195 | 1.1448746919632 | 0.08017686555625 | 0.091792464256287 | |
tritonbench_jsd_fwd[x_(4, 2048, 16384)-liger_jsd]_speedup | 1 | 1 | 0.66177201271057 | 0.60781478881836 | 0.088772476229328 | 0.053957223892212 | |
tritonbench_low_mem_dropout_fwd[x_524288-torch_dropout]_tflops | 1 | 1 | 0.13826160132885 | 0.12554788589478 | 0.10126586635421 | 0.012713715434074 | |
tritonbench_low_mem_dropout_fwd[x_average-torch_dropout]_tflops | 1 | 1 | 0.024426048621535 | 0.022153681144118 | 0.10257290707735 | 0.002272367477417 | |
tritonbench_low_mem_dropout_fwd[torch_dropout]-tflops-avg | 1 | 1 | 0.024426048621535 | 0.022153681144118 | 0.10257290707735 | 0.002272367477417 | |
tritonbench_rms_norm_bwd[x_(2048, 16384)-liger_rms]_speedup | 1 | 1 | 1.1314612627029 | 1.0239211320877 | 0.10502774798286 | 0.10754013061523 | |
tritonbench_low_mem_dropout_fwd[x_131072-torch_dropout]_tflops | 1 | 1 | 0.042890053242445 | 0.03792592510581 | 0.13089010018306 | 0.0049641281366348 | |
tritonbench_embedding_bwd[x_(32, 512, 768, 65536)-liger_embedding]_speedup | 1 | 1 | 1.8207459449768 | 1.5952908992767 | 0.14132535063184 | 0.22545504570007 | |
tritonbench_embedding_bwd[x_(8, 2048, 4096, 65536)-liger_embedding]_speedup | 1 | 1 | 1.4066215753555 | 1.2307544946671 | 0.14289371393769 | 0.17586708068848 | |
tritonbench_rope_bwd[x_(8192, 2048)-liger_rotary_pos_emb]_speedup | 1 | 1 | 3.6184692382812 | 3.1576962471008 | 0.14592061906001 | 0.46077299118042 | |
tritonbench_low_mem_dropout_fwd[x_131072-triton_dropout]_tflops | 1 | 1 | 0.049053888767958 | 0.042666666209698 | 0.14970053031254 | 0.00638722255826 | |
tritonbench_flex_attention_fwd[x_ (8, 16, 512, 16, 512, 128) | noop-compiled]_tflops | 1 | 1 | 207.05648803711 | 179.83586120605 | 0.15136373050682 | 27.220626831055 | |
tritonbench_low_mem_dropout_fwd[x_2048-triton_dropout]_speedup | 1 | 1 | 1.2025316953659 | 1.0429447889328 | 0.1530156803376 | 0.15958690643311 | |
tritonbench_low_mem_dropout_fwd[x_32-triton_dropout]_tflops | 1 | 1 | 0.000014925372852304 | 0.000012903225979244 | 0.15671638056347 | 0.0000020221468730597 | |
tritonbench_low_mem_dropout_fwd[x_512-triton_dropout]_speedup | 1 | 1 | 1.1510791778564 | 0.99390250444412 | 0.15814093707333 | 0.15717667341232 | |
tritonbench_low_mem_dropout_fwd[x_8192-torch_dropout]_tflops | 1 | 1 | 0.0030295855831355 | 0.0026122450362891 | 0.15976316962947 | 0.00041734054684639 | |
tritonbench_low_mem_dropout_fwd[x_128-torch_dropout]_tflops | 1 | 1 | 0.000050314465624979 | 0.000043010750232497 | 0.16981139257048 | 0.0000073037153924815 | |
tritonbench_low_mem_dropout_fwd[x_128-triton_dropout]_tflops | 1 | 1 | 0.000059259262343403 | 0.000050314465624979 | 0.17777783401487 | 0.0000089447967184242 | |
tritonbench_low_mem_dropout_fwd[x_512-triton_dropout]_tflops | 1 | 1 | 0.00023021583911031 | 0.00019512196013238 | 0.17985612154635 | 0.000035093878977932 | |
tritonbench_flex_attention_fwd[x_ (8, 16, 512, 16, 512, 128) | noop-compiled]_speedup | 1 | 1 | 68.307334899902 | 56.955291748047 | 0.19931498555171 | 11.352043151855 | |
tritonbench_embedding_bwd[x_(32, 512, 768, 32768)-liger_embedding]_speedup | 1 | 1 | 2.2547099590302 | 1.8318628072739 | 0.23082905012169 | 0.42284715175629 | |
tritonbench_low_mem_dropout_fwd[x_32-triton_dropout]_speedup | 1 | 1 | 1.2761194705963 | 1.0322580337524 | 0.23624077398302 | 0.24386143684387 | |
tritonbench_embedding_bwd[x_(8, 2048, 4096, 32768)-liger_embedding]_speedup | 1 | 1 | 1.6281609535217 | 1.316552400589 | 0.23668526432623 | 0.31160855293274 | |
tritonbench_embedding_bwd[x_(32, 512, 768, 16384)-liger_embedding]_speedup | 1 | 1 | 2.6422991752625 | 2.0475790500641 | 0.29045038587387 | 0.59472012519836 | |
tritonbench_embedding_bwd[liger_embedding]-speedup-avg | 1 | 1 | 1.7441607713699 | 1.3459738492966 | 0.2958355560039 | 0.39818692207336 | |
tritonbench_embedding_bwd[x_average-liger_embedding]_speedup | 1 | 1 | 1.7441607713699 | 1.3459738492966 | 0.2958355560039 | 0.39818692207336 | |
tritonbench_embedding_bwd[x_(32, 512, 768, 4096)-liger_embedding]_speedup | 1 | 1 | 2.2862944602966 | 1.7267206907272 | 0.32406733328349 | 0.5595737695694 | |
tritonbench_embedding_bwd[x_(32, 512, 768, 8192)-liger_embedding]_speedup | 1 | 1 | 2.5408017635345 | 1.8995307683945 | 0.33759442374397 | 0.64127099514008 | |
tritonbench_embedding_bwd[x_(8, 2048, 4096, 16384)-liger_embedding]_speedup | 1 | 1 | 1.8265942335129 | 1.3648246526718 | 0.33833619574287 | 0.46176958084106 | |
tritonbench_embedding_bwd[x_(8, 2048, 4096, 8192)-liger_embedding]_speedup | 1 | 1 | 1.5812239646912 | 1.1168842315674 | 0.41574562519532 | 0.46433973312378 | |
tritonbench_embedding_bwd[x_(8, 2048, 4096, 1024)-liger_embedding]_speedup | 1 | 1 | 0.87206310033798 | 0.61563575267792 | 0.41652445710738 | 0.25642734766006 | |
tritonbench_embedding_bwd[x_(8, 2048, 4096, 2048)-liger_embedding]_speedup | 1 | 1 | 0.98931086063385 | 0.69035649299622 | 0.43304346474695 | 0.29895436763763 | |
tritonbench_embedding_bwd[x_(8, 2048, 4096, 4096)-liger_embedding]_speedup | 1 | 1 | 1.2360243797302 | 0.85892456769943 | 0.43903717068057 | 0.37709981203079 | |
tritonbench_embedding_bwd[x_(32, 512, 768, 2048)-liger_embedding]_speedup | 1 | 1 | 2.1009476184845 | 1.4227942228317 | 0.47663490951142 | 0.67815339565277 | |
tritonbench_embedding_bwd[x_(32, 512, 768, 1024)-liger_embedding]_speedup | 1 | 1 | 2.0089087486267 | 1.3054693937302 | 0.53884017371452 | 0.70343935489655 | |
tritonbench_rope_bwd[x_(8192, 1024)-liger_rotary_pos_emb]_speedup | 1 | 1 | 2.9531304836273 | 1.7136546373367 | 0.72329384187756 | 1.2394758462906 | |
tritonbench_layer_norm_bwd[liger_layer_norm]-speedup-avg | 1 | 1 | 0.67075318098068 | 0.37378814816475 | 0.79447418082674 | 0.29696503281593 | |
tritonbench_layer_norm_bwd[x_average-liger_layer_norm]_speedup | 1 | 1 | 0.67075318098068 | 0.37378814816475 | 0.79447418082674 | 0.29696503281593 | |
tritonbench_layer_norm_bwd[x_8192-liger_layer_norm]_speedup | 1 | 1 | 1.4726729393005 | 0.29377841949463 | 4.0128697057936 | 1.1788945198059 | |
tritonbench_layer_norm_bwd[x_4608-liger_layer_norm]_speedup | 1 | 1 | 1.1561365127563 | 0.22226889431477 | 4.2015218608101 | 0.93386761844158 | |
tritonbench_layer_norm_bwd[x_7680-liger_layer_norm]_speedup | 1 | 1 | 1.4936227798462 | 0.2869755923748 | 4.2047031856823 | 1.2066471874714 | |
tritonbench_layer_norm_bwd[x_5120-liger_layer_norm]_speedup | 1 | 1 | 1.222773194313 | 0.23219281435013 | 4.2661973960538 | 0.99058037996292 | |
tritonbench_layer_norm_bwd[x_6144-liger_layer_norm]_speedup | 1 | 1 | 1.3486423492432 | 0.25371152162552 | 4.3156527563371 | 1.0949308276176 | |
tritonbench_layer_norm_bwd[x_7168-liger_layer_norm]_speedup | 1 | 1 | 1.4414019584656 | 0.27081799507141 | 4.3224009655839 | 1.1705839633942 | |
tritonbench_layer_norm_bwd[x_5632-liger_layer_norm]_speedup | 1 | 1 | 1.2834794521332 | 0.24102148413658 | 4.3251661640498 | 1.0424579679966 | |
tritonbench_layer_norm_bwd[x_6656-liger_layer_norm]_speedup | 1 | 1 | 1.3878934383392 | 0.2594730257988 | 4.3488929497259 | 1.1284204125404 | |
tritonbench_fused_linear_jsd_bwd-pass | 1 | 1 | 0 | 0 | 3.4028235e+38 | 0 | |
tritonbench_flex_attention_fwd[x_ (8, 16, 8192, 16, 8192, 128) | noop-compiled]_speedup | 1 | 1 | 0 | 0 | 3.4028235e+38 | 0 | |
tritonbench_flex_attention_fwd[x_ (8, 16, 8192, 16, 8192, 128) | noop-compiled]_tflops | 1 | 1 | 0 | 0 | 3.4028235e+38 | 0 | |
tritonbench_flex_attention_fwd[x_ (8, 16, 8192, 16, 8192, 128) | noop-eager]_tflops | 1 | 1 | 0 | 0 | 3.4028235e+38 | 0 | |
tritonbench_flex_attention_fwd[x_(8, 16, 16384, 16, 16384, 128) | noop-compiled]_speedup | 1 | 1 | 0 | 0 | 3.4028235e+38 | 0 | |
tritonbench_flex_attention_fwd[x_(8, 16, 16384, 16, 16384, 128) | noop-compiled]_tflops | 1 | 1 | 0 | 0 | 3.4028235e+38 | 0 | |
tritonbench_flex_attention_fwd[x_(8, 16, 16384, 16, 16384, 128) | noop-eager]_tflops | 1 | 1 | 0 | 0 | 3.4028235e+38 | 0 | |
tritonbench_flex_attention_bwd[x_ (8, 16, 8192, 16, 8192, 128) | noop-compiled]_speedup | 1 | 1 | 0 | 0 | 3.4028235e+38 | 0 | |
tritonbench_flex_attention_bwd[x_ (8, 16, 8192, 16, 8192, 128) | noop-compiled]_tflops | 1 | 1 | 0 | 0 | 3.4028235e+38 | 0 | |
tritonbench_flex_attention_bwd[x_ (8, 16, 8192, 16, 8192, 128) | noop-eager]_tflops | 1 | 1 | 0 | 0 | 3.4028235e+38 | 0 | |
tritonbench_flex_attention_bwd[x_(8, 16, 16384, 16, 16384, 128) | noop-compiled]_speedup | 1 | 1 | 0 | 0 | 3.4028235e+38 | 0 | |
tritonbench_flex_attention_bwd[x_(8, 16, 16384, 16, 16384, 128) | noop-compiled]_tflops | 1 | 1 | 0 | 0 | 3.4028235e+38 | 0 | |
tritonbench_flex_attention_bwd[x_(8, 16, 16384, 16, 16384, 128) | noop-eager]_tflops | 1 | 1 | 0 | 0 | 3.4028235e+38 | 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment