Last active
May 29, 2021 12:09
-
-
Save genkuroki/6123aef79488bc20b52047656fc6f015 to your computer and use it in GitHub Desktop.
Octavian!
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "https://discourse.julialang.org/t/intel-c-c-compiler-performance-versus-julia/61929/18" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "versioninfo()", | |
"execution_count": 1, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "Julia Version 1.7.0-DEV.1129\nCommit 9117b4d6d6 (2021-05-20 16:42 UTC)\nPlatform Info:\n OS: Windows (x86_64-w64-mingw32)\n CPU: Intel(R) Core(TM) i7-10750H CPU @ 2.60GHz\n WORD_SIZE: 64\n LIBM: libopenlibm\n LLVM: libLLVM-11.0.1 (ORCJIT, skylake)\nEnvironment:\n JULIA_NUM_THREADS = 12\n JULIA_PYTHONCALL_EXE = C:\\Users\\genkuroki\\.julia\\conda\\3\\python.exe\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "using LinearAlgebra\nusing BLASBenchmarksCPU\nusing Octavian\nusing BenchmarkHistograms", | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "M = K = N = 512\nA = rand(M, K)\nB = rand(K, N)\nC1 = @time(A * B)\nC0 = similar(C1);", | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": " 0.607834 seconds (2.53 M allocations: 136.127 MiB, 6.48% gc time, 99.52% compilation time)\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "@benchmark mul!($C0, $A, $B)", | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 4, | |
"data": { | |
"text/plain": "samples: 2404; evals/sample: 1; memory estimate: 0 bytes; allocs estimate: 0\nns\n\n (1.36e6 - 1.64e6 ] \u001b[32m▋\u001b[39m40\n (1.64e6 - 1.93e6 ] \u001b[32m█▉\u001b[39m124\n (1.93e6 - 2.21e6 ] \u001b[32m██████████████████████████████ \u001b[39m2053\n (2.21e6 - 2.5e6 ] \u001b[32m██▎\u001b[39m148\n (2.5e6 - 2.78e6 ] \u001b[32m▌\u001b[39m28\n (2.78e6 - 3.06e6 ] \u001b[32m▏\u001b[39m2\n (3.06e6 - 3.35e6 ] \u001b[32m▏\u001b[39m1\n (3.35e6 - 3.63e6 ] \u001b[32m▏\u001b[39m1\n (3.63e6 - 3.92e6 ] \u001b[32m▏\u001b[39m3\n (3.92e6 - 4.2e6 ] \u001b[32m \u001b[39m0\n (4.2e6 - 4.48e6 ] \u001b[32m▏\u001b[39m1\n (4.48e6 - 4.77e6 ] \u001b[32m \u001b[39m0\n (4.77e6 - 1.145e7] \u001b[32m▏\u001b[39m3\n\n Counts\n\nmin: 1.359 ms (0.00% GC); mean: 2.065 ms (0.00% GC); median: 2.046 ms (0.00% GC); max: 11.455 ms (0.00% GC)." | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# MKL dgemm\n@benchmark gemmmkl!($C0, $A, $B)", | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 5, | |
"data": { | |
"text/plain": "samples: 2801; evals/sample: 1; memory estimate: 0 bytes; allocs estimate: 0\nns\n\n (1.03e6 - 1.19e6 ] \u001b[32m▎\u001b[39m10\n (1.19e6 - 1.34e6 ] \u001b[32m▋\u001b[39m35\n (1.34e6 - 1.5e6 ] \u001b[32m█▏\u001b[39m63\n (1.5e6 - 1.65e6 ] \u001b[32m█▉\u001b[39m108\n (1.65e6 - 1.81e6 ] \u001b[32m██████████████████████████████ \u001b[39m1745\n (1.81e6 - 1.96e6 ] \u001b[32m████████████▊\u001b[39m737\n (1.96e6 - 2.12e6 ] \u001b[32m█▌\u001b[39m83\n (2.12e6 - 2.27e6 ] \u001b[32m▎\u001b[39m11\n (2.27e6 - 2.42e6 ] \u001b[32m▏\u001b[39m5\n (2.42e6 - 2.58e6 ] \u001b[32m \u001b[39m0\n (2.58e6 - 2.73e6 ] \u001b[32m \u001b[39m0\n (2.73e6 - 2.89e6 ] \u001b[32m▏\u001b[39m1\n (2.89e6 - 2.869e7] \u001b[32m▏\u001b[39m3\n\n Counts\n\nmin: 1.035 ms (0.00% GC); mean: 1.771 ms (0.00% GC); median: 1.757 ms (0.00% GC); max: 28.691 ms (0.00% GC)." | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# MKL dgemm_direct\n@benchmark gemmmkl_direct!($C0, $A, $B)", | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 6, | |
"data": { | |
"text/plain": "samples: 2736; evals/sample: 1; memory estimate: 0 bytes; allocs estimate: 0\nns\n\n (1.07e6 - 1.39e6] \u001b[32m█\u001b[39m67\n (1.39e6 - 1.71e6] \u001b[32m█████▎\u001b[39m376\n (1.71e6 - 2.03e6] \u001b[32m██████████████████████████████ \u001b[39m2187\n (2.03e6 - 2.35e6] \u001b[32m█▎\u001b[39m90\n (2.35e6 - 2.67e6] \u001b[32m▏\u001b[39m1\n (2.67e6 - 2.99e6] \u001b[32m▏\u001b[39m1\n (2.99e6 - 3.31e6] \u001b[32m▏\u001b[39m4\n (3.31e6 - 3.63e6] \u001b[32m▏\u001b[39m3\n (3.63e6 - 3.95e6] \u001b[32m▏\u001b[39m1\n (3.95e6 - 4.27e6] \u001b[32m▏\u001b[39m1\n (4.27e6 - 4.59e6] \u001b[32m▏\u001b[39m2\n (4.59e6 - 4.91e6] \u001b[32m \u001b[39m0\n (4.91e6 - 5.23e6] \u001b[32m▏\u001b[39m3\n\n Counts\n\nmin: 1.074 ms (0.00% GC); mean: 1.811 ms (0.00% GC); median: 1.803 ms (0.00% GC); max: 5.230 ms (0.00% GC)." | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# Octavian.jl\n@benchmark matmul!($C0, $A, $B)", | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 7, | |
"data": { | |
"text/plain": "samples: 2801; evals/sample: 1; memory estimate: 0 bytes; allocs estimate: 0\nns\n\n (1.05e6 - 1.16e6 ] \u001b[32m▍\u001b[39m11\n (1.16e6 - 1.27e6 ] \u001b[32m▍\u001b[39m16\n (1.27e6 - 1.38e6 ] \u001b[32m█▎\u001b[39m50\n (1.38e6 - 1.5e6 ] \u001b[32m█▍\u001b[39m58\n (1.5e6 - 1.61e6 ] \u001b[32m█\u001b[39m39\n (1.61e6 - 1.72e6 ] \u001b[32m█████████████▊\u001b[39m587\n (1.72e6 - 1.83e6 ] \u001b[32m██████████████████████████████ \u001b[39m1290\n (1.83e6 - 1.94e6 ] \u001b[32m██████████████▉\u001b[39m637\n (1.94e6 - 2.05e6 ] \u001b[32m██▎\u001b[39m95\n (2.05e6 - 2.17e6 ] \u001b[32m▎\u001b[39m9\n (2.17e6 - 2.28e6 ] \u001b[32m▏\u001b[39m5\n (2.28e6 - 2.39e6 ] \u001b[32m▏\u001b[39m1\n (2.39e6 - 2.999e7] \u001b[32m▏\u001b[39m3\n\n Counts\n\nmin: 1.049 ms (0.00% GC); mean: 1.773 ms (0.00% GC); median: 1.774 ms (0.00% GC); max: 29.993 ms (0.00% GC)." | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"@webio": { | |
"lastKernelId": null, | |
"lastCommId": null | |
}, | |
"kernelspec": { | |
"name": "julia-1.7-depwarn-o3", | |
"display_name": "Julia 1.7.0-DEV depwarn -O3", | |
"language": "julia" | |
}, | |
"language_info": { | |
"file_extension": ".jl", | |
"name": "julia", | |
"mimetype": "application/julia", | |
"version": "1.7.0" | |
}, | |
"toc": { | |
"nav_menu": {}, | |
"number_sections": true, | |
"sideBar": true, | |
"skip_h1_title": false, | |
"base_numbering": 1, | |
"title_cell": "Table of Contents", | |
"title_sidebar": "Contents", | |
"toc_cell": false, | |
"toc_position": {}, | |
"toc_section_display": true, | |
"toc_window_display": false | |
}, | |
"gist": { | |
"id": "6123aef79488bc20b52047656fc6f015", | |
"data": { | |
"description": "Octavian!", | |
"public": true | |
} | |
}, | |
"_draft": { | |
"nbviewer_url": "https://gist.github.com/6123aef79488bc20b52047656fc6f015" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment