[[wg:dynamo:Performance_results|Back to performance benchmark lists]] \\ ===== Modules ===== shtns: gcc/4.7.1 mkl/13.0.2.146 xshells: intel/13.0.2.146 impi/4.1.0.030 Using MKL FFTW wrappers ===== Flags ===== shtns: ./configure --enable-mkl ; make xshells: mpiicpc -mt_mpi -O3 -march=native -xHost -complex-limited-range -ipo -prec-div -prec-sqrt -DXS_MKL -DXS_VEC=0 -DXS_MPI -fopenmp -Wunknown-pragmas -lshtns -mkl -lrt -lm -o xsbig_hyb ===== Single Node, Strong Scaling ===== Run xsbig for 200 iterations controlling threads with OMP_NUM_THREADS environment variable. All times are in seconds. ^ Cores, \\ Processes, \\ Threads ^^^ Problem \\ Description ^^ \\ Timing (seconds) ^^^^ \\ Metrics ^^^ ^ C ^ P ^ T ^ $ l_{max} $ ^ $ (N_r,N_{\theta},N_{\phi}) $ ^ Total ^ Solver ^ Nonlinear ^ Comm ^ Efficiency ^ SUs per $10^4$ iters ^ Hours per $10^4$ Iters ^ | 1 | 1 | 1 | 47 | (73,72,144) | ~~=round((28.186132-6.065899)/200,5)~~ | ~~=round(1.663185/200,5)~~ | ~~=round(20.456966/200,5)~~ | 0 | ~~=round(r2c5/cell(5,row())/cell(0,row()),5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 2 | 1 | 2 | 47 | (73,72,144) | ~~=round((17.141443-5.883230)/200,5)~~ | ~~=round(0.807704/200,5)~~ | ~~=round(10.450453/200,5)~~ | 0 | ~~=round(r2c5/cell(5,row())/cell(0,row()),5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 4 | 1 | 4 | 47 | (73,72,144) | ~~=round((11.701083-5.867437)/200,5)~~ | ~~=round(0.465078/200,5)~~ | ~~=round(5.368498/200,5)~~ | 0 | ~~=round(r2c5/cell(5,row())/cell(0,row()),5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 8 | 1 | 8 | 47 | (73,72,144) | ~~=round((9.172621-5.966595)/200,5)~~ | ~~=round(0.334851/200,5)~~ | ~~=round(2.871116/200,5)~~ | 0 | ~~=round(r2c5/cell(5,row())/cell(0,row()),5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 16 | 1 | 16 | 47 | (73,72,144) | ~~=round((7.909697-5.993874)/200,5)~~ | ~~=round(0.287537/200,5)~~ | ~~=round(1.628188/200,5)~~ | 0 | ~~=round(r2c5/cell(5,row())/cell(0,row()),5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 16 | 1 | 32 | 47 | (73,72,144) | ~~=round((10.045061-6.194584)/200,5)~~ | ~~=round(0.648611/200,5)~~ | ~~=round(3.201726/200,5)~~ | 0 | ~~=round(r2c5/cell(5,row())/cell(0,row()),5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 16 | 1 | 64 | 47 | (73,72,144) | ~~=round((11.377265-6.261969)/200,5)~~ | ~~=round(0.859792/200,5)~~ | ~~=round(4.255332/200,5)~~ | 0 | ~~=round(r2c5/cell(5,row())/cell(0,row()),5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | {{wg:dynamo:Performance_results:xshells:xshell_elapsed.png?480}}\\ Elapsed (wall clock) time for the strong scaling. Number of OpenMP threads are shown by the numbers. {{wg:dynamo:Performance_results:xshells:xshell_efficiency.png?480}}\\ Parallel Efficiency for the strong scaling. Number of OpenMP threads are shown by the numbers. ===== Multiple Nodes, Strong Scaling ===== Note: Decomposes by radial shell, so only scales up $N_r$ cores. ^ Cores, \\ Processes, \\ Threads ^^^ Problem \\ Description ^^ \\ Timing (seconds) ^^^^ \\ Metrics ^^^ ^ C ^ P ^ T ^ $ l_{max} $ ^ $ (N_r,N_{\theta},N_{\phi}) $ ^ Total ^ Solver ^ Nonlinear ^ Comm ^ Efficiency ^ SUs per $10^4$ iters ^ Hours per $10^4$ Iters ^ | 16 | 1 | 16 | 255 | (512,384,768) | ~~=round((859.350923-22.831441)/200,5)~~ | ~~=round(76.209655/200,5)~~ | ~~=round(760.282343/200,5)~~ | ~~=round(0.022280/200,5)~~ | ~~=round(r2c5/cell(5,row())/cell(0,row())*16,5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 16 | 2 | 8 | 255 | (512,384,768) | ~~=round((848.852199-23.129282)/200,5)~~ | ~~=round(68.842306/200,5)~~ | ~~=round(753.802079/200,5)~~ | ~~=round(3.039999/200,5)~~ | ~~=round(r2c5/cell(5,row())/cell(0,row())*16,5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 16 | 4 | 4 | 255 | (512,384,768) | ~~=round((850.342050-23.438608)/200,5)~~ | ~~=round(65.531879/200,5)~~ | ~~=round(753.857355/200,5)~~ | ~~=round(7.500653/200,5)~~ | ~~=round(r2c5/cell(5,row())/cell(0,row())*16,5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 16 | 8 | 2 | 255 | (512,384,768) | ~~=round((842.758523-24.838339)/200,5)~~ | ~~=round(64.422257/200,5)~~ | ~~=round(738.216189/200,5)~~ | ~~=round(15.269050/200,5)~~ | ~~=round(r2c5/cell(5,row())/cell(0,row())*16,5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 16 | 16 | 1 | 255 | (512,384,768) | ~~=round((840.370934-25.750419)/200,5)~~ | ~~=round(60.882702/200,5)~~ | ~~=round(747.078117/200,5)~~ | ~~=round(6.646923/200,5)~~ | ~~=round(r2c5/cell(5,row())/cell(0,row())*16,5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 32 | 2 | 16 | 255 | (512,384,768) | ~~=round((462.982147-23.106554)/200,5)~~ | ~~=round(41.293572/200,5)~~ | ~~=round(385.654245/200,5)~~ | ~~=round(12.924984/200,5)~~ | ~~=round(r2c5/cell(5,row())/cell(0,row())*16,5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 32 | 4 | 8 | 255 | (512,384,768) | ~~=round((449.089038-23.283358)/200,5)~~ | ~~=round(37.199738/200,5)~~ | ~~=round(382.146451/200,5)~~ | ~~=round(6.440859/200,5)~~ | ~~=round(r2c5/cell(5,row())/cell(0,row())*16,5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 32 | 8 | 4 | 255 | (512,384,768) | ~~=round((447.214890-21.450560)/200,5)~~ | ~~=round(34.283895/200,5)~~ | ~~=round(382.324790/200,5)~~ | ~~=round(9.138399/200,5)~~ | ~~=round(r2c5/cell(5,row())/cell(0,row())*16,5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 32 | 16 | 2 | 255 | (512,384,768) | ~~=round((440.657854-23.042687)/200,5)~~ | ~~=round(34.042211/200,5)~~ | ~~=round(372.518307/200,5)~~ | ~~=round(11.039663/200,5)~~ | ~~=round(r2c5/cell(5,row())/cell(0,row())*16,5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 32 | 32 | 1 | 255 | (512,384,768) | ~~=round((440.160001-25.277095)/200,5)~~ | ~~=round(30.403780/200,5)~~ | ~~=round(370.255018/200,5)~~ | ~~=round(14.209307/200,5)~~ | ~~=round(r2c5/cell(5,row())/cell(0,row())*16,5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 64 | 4 | 16 | 255 | (512,384,768) | ~~=round((254.265684-26.628211)/200,5)~~ | ~~=round(21.511166/200,5)~~ | ~~=round(197.155969/200,5)~~ | ~~=round(8.966534/200,5)~~ | ~~=round(r2c5/cell(5,row())/cell(0,row())*16,5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 64 | 8 | 8 | 255 | (512,384,768) | ~~=round((247.611258-25.237391)/200,5)~~ | ~~=round(21.271763/200,5)~~ | ~~=round(193.930848/200,5)~~ | ~~=round(7.154851/200,5)~~ | ~~=round(r2c5/cell(5,row())/cell(0,row())*16,5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 64 | 16 | 4 | 255 | (512,384,768) | ~~=round((246.711896-25.608422)/200,5)~~ | ~~=round(17.621858/200,5)~~ | ~~=round(194.061829/200,5)~~ | ~~=round(9.404258/200,5)~~ | ~~=round(r2c5/cell(5,row())/cell(0,row())*16,5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 64 | 32 | 2 | 255 | (512,384,768) | ~~=round((246.215753-28.312167)/200,5)~~ | ~~=round(17.802353/200,5)~~ | ~~=round(190.309489/200,5)~~ | ~~=round(9.777235/200,5)~~ | ~~=round(r2c5/cell(5,row())/cell(0,row())*16,5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 64 | 64 | 1 | 255 | (512,384,768) | ~~=round((244.037148-29.216646)/200,5)~~ | ~~=round(14.897830/200,5)~~ | ~~=round(187.399777/200,5)~~ | ~~=round(12.508480/200,5)~~ | ~~=round(r2c5/cell(5,row())/cell(0,row())*16,5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 128 | 8 | 16 | 255 | (512,384,768) | ~~=round((151.495191-28.189163)/200,5)~~ | ~~=round(13.011174/200,5)~~ | ~~=round(104.016242/200,5)~~ | ~~=round(6.273581/200,5)~~ | ~~=round(r2c5/cell(5,row())/cell(0,row())*16,5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 128 | 16 | 8 | 255 | (512,384,768) | ~~=round((146.824696-27.376640)/200,5)~~ | ~~=round(13.501653/200,5)~~ | ~~=round(100.672446/200,5)~~ | ~~=round(5.259515/200,5)~~ | ~~=round(r2c5/cell(5,row())/cell(0,row())*16,5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 128 | 32 | 4 | 255 | (512,384,768) | ~~=round((140.656962-25.415327)/200,5)~~ | ~~=round(10.581474/200,5)~~ | ~~=round(99.275023/200,5)~~ | ~~=round(5.371883/200,5)~~ | ~~=round(r2c5/cell(5,row())/cell(0,row())*16,5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 128 | 64 | 2 | 255 | (512,384,768) | ~~=round((145.036535-32.628063)/200,5)~~ | ~~=round(9.759590/200,5)~~ | ~~=round(97.822146/200,5)~~ | ~~=round(4.811928/200,5)~~ | ~~=round(r2c5/cell(5,row())/cell(0,row())*16,5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 128 | 128 | 1 | 255 | (512,384,768) | ~~=round((157.296720-33.558296)/200,5)~~ | ~~=round(11.448583/200,5)~~ | ~~=round(93.071674/200,5)~~ | ~~=round(19.203873/200,5)~~ | ~~=round(r2c5/cell(5,row())/cell(0,row())*16,5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 256 | 16 | 16 | 255 | (512,384,768) | ~~=round((98.671364-30.542630)/200,5)~~ | ~~=round(7.058917/200,5)~~ | ~~=round(55.422816/200,5)~~ | ~~=round(5.641392/200,5)~~ | ~~=round(r2c5/cell(5,row())/cell(0,row())*16,5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 256 | 32 | 8 | 255 | (512,384,768) | ~~=round((93.635622-29.550634)/200,5)~~ | ~~=round(7.110833/200,5)~~ | ~~=round(52.662469/200,5)~~ | ~~=round(4.297148/200,5)~~ | ~~=round(r2c5/cell(5,row())/cell(0,row())*16,5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 256 | 64 | 4 | 255 | (512,384,768) | ~~=round((100.346066-33.338712)/200,5)~~ | ~~=round(7.868357/200,5)~~ | ~~=round(51.579834/200,5)~~ | ~~=round(7.546415/200,5)~~ | ~~=round(r2c5/cell(5,row())/cell(0,row())*16,5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 256 | 128 | 2 | 255 | (512,384,768) | ~~=round((94.808098-33.185216)/200,5)~~ | ~~=round(5.529006/200,5)~~ | ~~=round(50.040530/200,5)~~ | ~~=round(6.040481/200,5)~~ | ~~=round(r2c5/cell(5,row())/cell(0,row())*16,5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 256 | 256 | 1 | 255 | (512,384,768) | ~~=round((107.513423-33.821557)/200,5)~~ | ~~=round(13.092046/200,5)~~ | ~~=round(47.496520/200,5)~~ | ~~=round(13.078156/200,5)~~ | ~~=round(r2c5/cell(5,row())/cell(0,row())*16,5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 512 | 32 | 16 | 255 | (512,384,768) | ~~=round((88.701216-30.827860)/200,5)~~ | ~~=round(5.631932/200,5)~~ | ~~=round(30.094913/200,5)~~ | ~~=round(22.142136/200,5)~~ | ~~=round(r2c5/cell(5,row())/cell(0,row())*16,5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 512 | 64 | 8 | 255 | (512,384,768) | ~~=round((65.560378-29.305240)/200,5)~~ | ~~=round(5.778026/200,5)~~ | ~~=round(27.482326/200,5)~~ | ~~=round(2.981619/200,5)~~ | ~~=round(r2c5/cell(5,row())/cell(0,row())*16,5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 512 | 128 | 4 | 255 | (512,384,768) | ~~=round((66.245907-32.008877)/200,5)~~ | ~~=round(3.544521/200,5)~~ | ~~=round(26.707292/200,5)~~ | ~~=round(3.972123/200,5)~~ | ~~=round(r2c5/cell(5,row())/cell(0,row())*16,5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | | 512 | 256 | 2 | 255 | (512,384,768) | ~~=round((68.773548-32.536994)/200,5)~~ | ~~=round(5.841073/200,5)~~ | ~~=round(26.214258/200,5)~~ | ~~=round(4.158451/200,5)~~ | ~~=round(r2c5/cell(5,row())/cell(0,row())*16,5)~~ | ~~=round(16*1*cell(5,row())/0.36,5)~~ | ~~=round(cell(5,row())*1e4/3600,5)~~ | ===== Multiple Nodes, Weak Scaling ===== ^ Cores, \\ Processes, \\ Threads ^^^ Problem \\ Description ^^ \\ Timing (seconds) ^^^^ ^ C ^ P ^ T ^ $ l_{max} $ ^ $ (N_r,N_{\theta},N_{\phi}) $ ^ Total ^ Solver ^ Nonlinear ^ Comm ^ | 16 | 16 | 1 | 31 | (512,48,96) | ~~=round((13.064167-8.214126)/200,5)~~ | ~~=round(1.235317/200,5)~~ | ~~=round(3.405381/200,5)~~ | ~~=round(0.206263/200,5)~~ | | 16 | 8 | 2 | 31 | (512,48,96) | ~~=round((12.354198-7.711143)/200,5)~~ | ~~=round(1.092247/200,5)~~ | ~~=round(3.396501/200,5)~~ | ~~=round(0.152480/200,5)~~ | | 16 | 4 | 4 | 31 | (512,48,96) | ~~=round((12.372488-7.771630)/200,5)~~ | ~~=round(1.033719/200,5)~~ | ~~=round(3.460506/200,5)~~ | ~~=round(0.105363/200,5)~~ | | 16 | 2 | 8 | 31 | (512,48,96) | ~~=round((11.985949-7.466198)/200,5)~~ | ~~=round(1.019756/200,5)~~ | ~~=round(3.438194/200,5)~~ | ~~=round(0.060761/200,5)~~ | | 16 | 1 | 16 | 31 | (512,48,96) | ~~=round((15.377444-6.532855)/200,5)~~ | ~~=round(2.038502/200,5)~~ | ~~=round(6.791328/200,5)~~ | ~~=round(0.013441/200,5)~~ | | 32 | 32 | 1 | 44 | (512,68,136) | ~~=round((30.339614-22.825182)/200,5)~~ | ~~=round(1.300490/200,5)~~ | ~~=round(5.608838/200,5)~~ | ~~=round(0.601638/200,5)~~ | | 32 | 16 | 2 | 44 | (512,68,136) | ~~=round((21.523638-12.824732)/200,5)~~ | ~~=round(1.701563/200,5)~~ | ~~=round(5.628580/200,5)~~ | ~~=round(1.366347/200,5)~~ | | 32 | 8 | 4 | 44 | (512,68,136) | ~~=round((20.556051-12.789290)/200,5)~~ | ~~=round(1.484704/200,5)~~ | ~~=round(5.664852/200,5)~~ | ~~=round(0.615543/200,5)~~ | | 32 | 4 | 8 | 44 | (512,68,136) | ~~=round((20.193742-12.643133)/200,5)~~ | ~~=round(1.373998/200,5)~~ | ~~=round(5.663763/200,5)~~ | ~~=round(0.511608/200,5)~~ | | 32 | 2 | 16 | 44 | (512,68,136) | ~~=round((19.265632-11.280716)/200,5)~~ | ~~=round(1.792451/200,5)~~ | ~~=round(5.737147/200,5)~~ | ~~=round(0.453910/200,5)~~ | | 64 | 64 | 1 | 63 | (512,96,192) | ~~=round((38.338288-30.039571)/200,5)~~ | ~~=round(2.202186/200,5)~~ | ~~=round(4.570570/200,5)~~ | ~~=round(1.521923/200,5)~~ | | 64 | 32 | 2 | 63 | (512,96,192) | ~~=round((30.879573-18.344755)/200,5)~~ | ~~=round(6.511942/200,5)~~ | ~~=round(4.665371/200,5)~~ | ~~=round(1.353689/200,5)~~ | | 64 | 16 | 4 | 63 | (512,96,192) | ~~=round((23.818460-17.402759)/200,5)~~ | ~~=round(1.011504/200,5)~~ | ~~=round(4.705575/200,5)~~ | ~~=round(0.696205/200,5)~~ | | 64 | 8 | 8 | 63 | (512,96,192) | ~~=round((24.980080-17.286759)/200,5)~~ | ~~=round(2.567645/200,5)~~ | ~~=round(4.716895/200,5)~~ | ~~=round(0.407248/200,5)~~ | | 64 | 4 | 16 | 63 | (512,96,192) | ~~=round((34.780781-21.312230)/200,5)~~ | ~~=round(2.644489/200,5)~~ | ~~=round(4.992765/200,5)~~ | ~~=round(5.829854/200,5)~~ | | 256 | 256 | 1 | 127 | (512,192,384) | ~~=round((50.998166-38.885887)/200,5)~~ | ~~=round(2.172063/200,5)~~ | ~~=round(7.480504/200,5)~~ | ~~=round(2.447529/200,5)~~ | | 256 | 128 | 2 | 127 | (512,192,384) | ~~=round((41.109527-23.600952)/200,5)~~ | ~~=round(3.398640/200,5)~~ | ~~=round(8.009624/200,5)~~ | ~~=round(6.092880/200,5)~~ | | 256 | 64 | 4 | 127 | (512,192,384) | ~~=round((35.387780-21.886452)/200,5)~~ | ~~=round(3.804355/200,5)~~ | ~~=round(8.214271/200,5)~~ | ~~=round(1.475551/200,5)~~ | | 256 | 32 | 8 | 127 | (512,192,384) | ~~=round((32.320670-20.356467)/200,5)~~ | ~~=round(2.206726/200,5)~~ | ~~=round(8.526686/200,5)~~ | ~~=round(1.224153/200,5)~~ | | 256 | 16 | 16 | 127 | (512,192,384) | ~~=round((38.963833-19.123765)/200,5)~~ | ~~=round(1.702003/200,5)~~ | ~~=round(9.128950/200,5)~~ | ~~=round(9.007131/200,5)~~ | {{wg:dynamo:Performance_results:xshells:xshell_weak_sph.png?480}}\\ Elapsed (wall clock) time for the weak scaling in the horizontal resolutions. Number of OpenMP threads are shown by the numbers. Ideal scaling for Legendre transform ($O(N_{core}^{1/2})$) is plotted by dotted lines. \\ ===== Multiple Nodes, Radial Weak Scaling ===== ^ Cores, \\ Processes, \\ Threads ^^^ Problem \\ Description ^^ \\ Timing (seconds) ^^^^ ^ C ^ P ^ T ^ $ l_{max} $ ^ $ (N_r,N_{\theta},N_{\phi}) $ ^ Total ^ Solver ^ Nonlinear ^ Comm ^ | 128 | 128 | 1 | 255 | (256,384,768) | ~~=round((91.239987-30.271653)/200,5)~~ | ~~=round(4.953100/200,5)~~ | ~~=round(47.411526/200,5)~~ | ~~=round(8.579195/200,5)~~ | | 128 | 64 | 2 | 255 | (256,384,768) | ~~=round((88.905977-27.988788)/200,5)~~ | ~~=round(6.197121/200,5)~~ | ~~=round(50.273175/200,5)~~ | ~~=round(4.433635/200,5)~~ | | 128 | 32 | 4 | 255 | (256,384,768) | ~~=round((86.367076-24.808740)/200,5)~~ | ~~=round(6.678480/200,5)~~ | ~~=round(52.059182/200,5)~~ | ~~=round(2.806119/200,5)~~ | | 128 | 16 | 8 | 255 | (256,384,768) | ~~=round((88.379539-23.948661)/200,5)~~ | ~~=round(7.659652/200,5)~~ | ~~=round(52.999319/200,5)~~ | ~~=round(3.759789/200,5)~~ | | 128 | 8 | 16 | 255 | (256,384,768) | ~~=round((119.482079-27.460995)/200,5)~~ | ~~=round(8.873161/200,5)~~ | ~~=round(76.775907/200,5)~~ | ~~=round(6.367574/200,5)~~ | | 256 | 256 | 1 | 255 | (512,384,768) | ~~=round((98.558874-34.626150)/200,5)~~ | ~~=round(7.228361/200,5)~~ | ~~=round(47.099982/200,5)~~ | ~~=round(9.580092/200,5)~~ | | 256 | 128 | 2 | 255 | (512,384,768) | ~~=round((99.248560-36.934203)/200,5)~~ | ~~=round(7.069448/200,5)~~ | ~~=round(49.699599/200,5)~~ | ~~=round(5.522140/200,5)~~ | | 256 | 64 | 4 | 255 | (512,384,768) | ~~=round((94.058046-31.654451)/200,5)~~ | ~~=round(6.035897/200,5)~~ | ~~=round(51.851893/200,5)~~ | ~~=round(4.503150/200,5)~~ | | 256 | 32 | 8 | 255 | (512,384,768) | ~~=round((97.659647-31.460999)/200,5)~~ | ~~=round(9.987284/200,5)~~ | ~~=round(53.318506/200,5)~~ | ~~=round(2.880901/200,5)~~ | | 256 | 16 | 16 | 255 | (512,384,768) | ~~=round((124.057170-32.648420)/200,5)~~ | ~~=round(9.341497/200,5)~~ | ~~=round(54.760837/200,5)~~ | ~~=round(27.300918/200,5)~~ | | 512 | 512 | 1 | 255 | (1024,384,768) | ~~=round((124.609683-50.033968)/200,5)~~ | ~~=round(10.615772/200,5)~~ | ~~=round(47.544274/200,5)~~ | ~~=round(16.391531/200,5)~~ | | 512 | 256 | 2 | 255 | (1024,384,768) | ~~=round((109.206953-39.685455)/200,5)~~ | ~~=round(12.087525/200,5)~~ | ~~=round(49.967884/200,5)~~ | ~~=round(7.439618/200,5)~~ | | 512 | 128 | 4 | 255 | (1024,384,768) | ~~=round((116.266887-45.299175)/200,5)~~ | ~~=round(7.803739/200,5)~~ | ~~=round(51.160648/200,5)~~ | ~~=round(11.978144/200,5)~~ | | 512 | 64 | 8 | 255 | (1024,384,768) | ~~=round((115.030547-44.241709)/200,5)~~ | ~~=round(9.336811/200,5)~~ | ~~=round(53.376249/200,5)~~ | ~~=round(8.062798/200,5)~~ | | 512 | 32 | 16 | 255 | (1024,384,768) | ~~=round((127.792559-31.552505)/200,5)~~ | ~~=round(10.531005/200,5)~~ | ~~=round(54.739074/200,5)~~ | ~~=round(30.963714/200,5)~~ | | 1024 | 1024 | 1 | 255 | (2048,384,768) | ~~=round((177.724102-91.397830)/200,5)~~ | ~~=round(15.856123/200,5)~~ | ~~=round(47.771289/200,5)~~ | ~~=round(22.655236/200,5)~~ | | 1024 | 512 | 2 | 255 | (2048,384,768) | ~~=round((161.012686-81.654744)/200,5)~~ | ~~=round(19.332428/200,5)~~ | ~~=round(50.192340/200,5)~~ | ~~=round(9.786829/200,5)~~ | | 1024 | 256 | 4 | 255 | (2048,384,768) | ~~=round((157.210049-72.209183)/200,5)~~ | ~~=round(23.350893/200,5)~~ | ~~=round(51.680042/200,5)~~ | ~~=round(9.946280/200,5)~~ | | 1024 | 128 | 8 | 255 | (2048,384,768) | ~~=round((134.970394-58.261670)/200,5)~~ | ~~=round(16.894690/200,5)~~ | ~~=round(53.005553/200,5)~~ | ~~=round(6.794183/200,5)~~ | | 1024 | 64 | 16 | 255 | (2048,384,768) | ~~=round((163.016260-60.792946)/200,5)~~ | ~~=round(17.449489/200,5)~~ | ~~=round(55.361737/200,5)~~ | ~~=round(29.403454/200,5)~~ | | 2048 | 2048 | 1 | 255 | (4096,384,768) | ~~=round((253.066812-143.339576)/200,5)~~ | ~~=round(41.993384/200,5)~~ | ~~=round(47.376132/200,5)~~ | ~~=round(20.311372/200,5)~~ | | 2048 | 1024 | 2 | 255 | (4096,384,768) | ~~=round((325.287676-140.241705)/200,5)~~ | ~~=round(40.214649/200,5)~~ | ~~=round(50.562518/200,5)~~ | ~~=round(94.245942/200,5)~~ | | 2048 | 512 | 4 | 255 | (4096,384,768) | ~~=round((216.159854-117.218754)/200,5)~~ | ~~=round(29.652316/200,5)~~ | ~~=round(51.823570/200,5)~~ | ~~=round(17.420396/200,5)~~ | | 2048 | 256 | 8 | 255 | (4096,384,768) | ~~=round((190.641111-98.377670)/200,5)~~ | ~~=round(28.234432/200,5)~~ | ~~=round(52.236980/200,5)~~ | ~~=round(11.767126/200,5)~~ | | 2048 | 128 | 16 | 255 | (4096,384,768) | ~~=round((297.717424-101.714319)/200,5)~~ | ~~=round(88.564046/200,5)~~ | ~~=round(56.215918/200,5)~~ | ~~=round(51.213984/200,5)~~ | | 4096 | 2048 | 2 | 255 | (8192,384,768) | ~~=round((421.018749-218.138518)/200,5)~~ | ~~=round(106.659275/200,5)~~ | ~~=round(50.279087/200,5)~~ | ~~=round(45.851727/200,5)~~ | | 4096 | 1024 | 4 | 255 | (8192,384,768) | ~~=round((405.636015-222.338224)/200,5)~~ | ~~=round(89.220186/200,5)~~ | ~~=round(52.248461/200,5)~~ | ~~=round(41.784402/200,5)~~ | | 4096 | 512 | 8 | 255 | (8192,384,768) | ~~=round((322.045192-167.179405)/200,5)~~ | ~~=round(84.970569/200,5)~~ | ~~=round(52.858212/200,5)~~ | ~~=round(17.011353/200,5)~~ | | 4096 | 256 | 16 | 255 | (8192,384,768) | ~~=round((363.077617-191.225878)/200,5)~~ | ~~=round(63.638498/200,5)~~ | ~~=round(55.744545/200,5)~~ | ~~=round(52.458868/200,5)~~ | {{wg:dynamo:Performance_results:xshells:xshell_weak_r.png?480}}\\ Elapsed (wall clock) time for the weak scaling in the radial resolutions. Number of OpenMP threads are shown by the numbers. $O(N_{core}^{1/2})$ scaling is plotted by dotted line. \\ [[wg:dynamo:Performance_results|Back to performance benchmark lists]] \\ [[wg:dynamo:Performance_results:xshells:files|files]]