Skip to content

Commit ee8ad84

Browse files
danieljvickersDaniel VickersDaniel VickersDaniel Vickerswilfonba
authored
AMD Flang Weno Optimizations (#1374)
Co-authored-by: Daniel Vickers <danieljvickers@login09.frontier.olcf.ornl.gov> Co-authored-by: Daniel Vickers <danieljvickers@frontier00007.frontier.olcf.ornl.gov> Co-authored-by: Daniel Vickers <danieljvickers@login12.frontier.olcf.ornl.gov> Co-authored-by: wilfonba <bwilfong3@gatech.edu> Co-authored-by: Daniel Vickers <danieljvickers@frontier01665.frontier.olcf.ornl.gov>
1 parent e9910d9 commit ee8ad84

2 files changed

Lines changed: 80 additions & 69 deletions

File tree

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -636,7 +636,7 @@ exit 0
636636
target_compile_options(${a_target} PRIVATE -fopenmp)
637637
target_link_options(${a_target} PRIVATE -fopenmp)
638638
elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang")
639-
target_compile_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a)
639+
target_compile_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a -fopenmp-target-fast -fopenmp-assume-threads-oversubscription -fopenmp-assume-teams-oversubscription)
640640
target_link_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a)
641641
endif()
642642
endif()

src/simulation/m_weno.fpp

Lines changed: 79 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -941,7 +941,7 @@ contains
941941
if (weno_order == 3 .or. dummy) then
942942
#:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')]
943943
if (weno_dir == ${WENO_DIR}$) then
944-
$:GPU_PARALLEL_LOOP(collapse=4,private='[beta, dvd, poly, omega, alpha, tau]')
944+
$:GPU_PARALLEL_LOOP(collapse=4,private='[beta, dvd, poly, omega, alpha, tau, q]')
945945
do l = is3_weno%beg, is3_weno%end
946946
do k = is2_weno%beg, is2_weno%end
947947
do j = is1_weno%beg, is1_weno%end
@@ -962,24 +962,25 @@ contains
962962
beta(1) = beta_coef_${XYZ}$ (j, 1, 0)*dvd(-1)*dvd(-1) + weno_eps
963963

964964
if (wenojs) then
965-
alpha(0:weno_num_stencils) = d_cbL_${XYZ}$ (0:weno_num_stencils, &
966-
& j)/(beta(0:weno_num_stencils)**2._wp)
965+
do q = 0, weno_num_stencils
966+
alpha(q) = d_cbL_${XYZ}$ (q, j)/(beta(q)**2._wp)
967+
end do
967968
else if (mapped_weno) then
968-
alpha(0:weno_num_stencils) = d_cbL_${XYZ}$ (0:weno_num_stencils, &
969-
& j)/(beta(0:weno_num_stencils)**2._wp)
969+
do q = 0, weno_num_stencils
970+
alpha(q) = d_cbL_${XYZ}$ (q, j)/(beta(q)**2._wp)
971+
end do
970972
omega = alpha/sum(alpha)
971-
alpha(0:weno_num_stencils) = (d_cbL_${XYZ}$ (0:weno_num_stencils, &
972-
& j)*(1._wp + d_cbL_${XYZ}$ (0:weno_num_stencils, &
973-
& j) - 3._wp*omega(0:weno_num_stencils)) + omega(0:weno_num_stencils)**2._wp) &
974-
& *(omega(0:weno_num_stencils)/(d_cbL_${XYZ}$ (0:weno_num_stencils, &
975-
& j)**2._wp + omega(0:weno_num_stencils)*(1._wp &
976-
& - 2._wp*d_cbL_${XYZ}$ (0:weno_num_stencils,j))))
973+
do q = 0, weno_num_stencils
974+
alpha(q) = (d_cbL_${XYZ}$ (q, j)*(1._wp + d_cbL_${XYZ}$ (q, &
975+
& j) - 3._wp*omega(q)) + omega(q)**2._wp)*(omega(q)/(d_cbL_${XYZ}$ (q, &
976+
& j)**2._wp + omega(q)*(1._wp - 2._wp*d_cbL_${XYZ}$ (q, j))))
977+
end do
977978
else if (wenoz) then
978979
! Borges, et al. (2008)
979-
980980
tau = abs(beta(1) - beta(0))
981-
alpha(0:weno_num_stencils) = d_cbL_${XYZ}$ (0:weno_num_stencils, &
982-
& j)*(1._wp + tau/beta(0:weno_num_stencils))
981+
do q = 0, weno_num_stencils
982+
alpha(q) = d_cbL_${XYZ}$ (q, j)*(1._wp + tau/beta(q))
983+
end do
983984
end if
984985

985986
omega = alpha/sum(alpha)
@@ -992,21 +993,23 @@ contains
992993
poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) + poly_coef_cbR_${XYZ}$ (j, 1, 0)*dvd(-1)
993994

994995
if (wenojs) then
995-
alpha(0:weno_num_stencils) = d_cbR_${XYZ}$ (0:weno_num_stencils, &
996-
& j)/(beta(0:weno_num_stencils)**2._wp)
996+
do q = 0, weno_num_stencils
997+
alpha(q) = d_cbR_${XYZ}$ (q, j)/(beta(q)**2._wp)
998+
end do
997999
else if (mapped_weno) then
998-
alpha(0:weno_num_stencils) = d_cbR_${XYZ}$ (0:weno_num_stencils, &
999-
& j)/(beta(0:weno_num_stencils)**2._wp)
1000+
do q = 0, weno_num_stencils
1001+
alpha(q) = d_cbR_${XYZ}$ (q, j)/(beta(q)**2._wp)
1002+
end do
10001003
omega = alpha/sum(alpha)
1001-
alpha(0:weno_num_stencils) = (d_cbR_${XYZ}$ (0:weno_num_stencils, &
1002-
& j)*(1._wp + d_cbR_${XYZ}$ (0:weno_num_stencils, &
1003-
& j) - 3._wp*omega(0:weno_num_stencils)) + omega(0:weno_num_stencils)**2._wp) &
1004-
& *(omega(0:weno_num_stencils)/(d_cbR_${XYZ}$ (0:weno_num_stencils, &
1005-
& j)**2._wp + omega(0:weno_num_stencils)*(1._wp &
1006-
& - 2._wp*d_cbR_${XYZ}$ (0:weno_num_stencils,j))))
1004+
do q = 0, weno_num_stencils
1005+
alpha(q) = (d_cbR_${XYZ}$ (q, j)*(1._wp + d_cbR_${XYZ}$ (q, &
1006+
& j) - 3._wp*omega(q)) + omega(q)**2._wp)*(omega(q)/(d_cbR_${XYZ}$ (q, &
1007+
& j)**2._wp + omega(q)*(1._wp - 2._wp*d_cbR_${XYZ}$ (q, j))))
1008+
end do
10071009
else if (wenoz) then
1008-
alpha(0:weno_num_stencils) = d_cbR_${XYZ}$ (0:weno_num_stencils, &
1009-
& j)*(1._wp + tau/beta(0:weno_num_stencils))
1010+
do q = 0, weno_num_stencils
1011+
alpha(q) = d_cbR_${XYZ}$ (q, j)*(1._wp + tau/beta(q))
1012+
end do
10101013
end if
10111014

10121015
omega = alpha/sum(alpha)
@@ -1057,18 +1060,19 @@ contains
10571060
& 1)*dvd(-1)*dvd(-2) + beta_coef_${XYZ}$ (j, 2, 2)*dvd(-2)*dvd(-2) + weno_eps
10581061

10591062
if (wenojs) then
1060-
alpha(0:weno_num_stencils) = d_cbL_${XYZ}$ (0:weno_num_stencils, &
1061-
& j)/(beta(0:weno_num_stencils)**2._wp)
1063+
do q = 0, weno_num_stencils
1064+
alpha(q) = d_cbL_${XYZ}$ (q, j)/(beta(q)**2._wp)
1065+
end do
10621066
else if (mapped_weno) then
1063-
alpha(0:weno_num_stencils) = d_cbL_${XYZ}$ (0:weno_num_stencils, &
1064-
& j)/(beta(0:weno_num_stencils)**2._wp)
1067+
do q = 0, weno_num_stencils
1068+
alpha(q) = d_cbL_${XYZ}$ (q, j)/(beta(q)**2._wp)
1069+
end do
10651070
omega = alpha/sum(alpha)
1066-
alpha(0:weno_num_stencils) = (d_cbL_${XYZ}$ (0:weno_num_stencils, &
1067-
& j)*(1._wp + d_cbL_${XYZ}$ (0:weno_num_stencils, &
1068-
& j) - 3._wp*omega(0:weno_num_stencils)) + omega(0:weno_num_stencils)**2._wp) &
1069-
& *(omega(0:weno_num_stencils)/(d_cbL_${XYZ}$ (0:weno_num_stencils, &
1070-
& j)**2._wp + omega(0:weno_num_stencils)*(1._wp &
1071-
& - 2._wp*d_cbL_${XYZ}$ (0:weno_num_stencils,j))))
1071+
do q = 0, weno_num_stencils
1072+
alpha(q) = (d_cbL_${XYZ}$ (q, j)*(1._wp + d_cbL_${XYZ}$ (q, &
1073+
& j) - 3._wp*omega(q)) + omega(q)**2._wp)*(omega(q)/(d_cbL_${XYZ}$ (q, &
1074+
& j)**2._wp + omega(q)*(1._wp - 2._wp*d_cbL_${XYZ}$ (q, j))))
1075+
end do
10721076
else if (wenoz) then
10731077
! Borges, et al. (2008)
10741078

@@ -1100,7 +1104,9 @@ contains
11001104
end do
11011105
end if
11021106

1103-
omega = alpha/sum(alpha)
1107+
omega(0) = alpha(0)/(alpha(0) + alpha(1) + alpha(2))
1108+
omega(1) = alpha(1)/(alpha(0) + alpha(1) + alpha(2))
1109+
omega(2) = alpha(2)/(alpha(0) + alpha(1) + alpha(2))
11041110

11051111
vL_rs_vf_${XYZ}$ (j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) + omega(2)*poly(2)
11061112

@@ -1114,18 +1120,19 @@ contains
11141120
& 0)*dvd(-1) + poly_coef_cbR_${XYZ}$ (j, 2, 1)*dvd(-2)
11151121

11161122
if (wenojs) then
1117-
alpha(0:weno_num_stencils) = d_cbR_${XYZ}$ (0:weno_num_stencils, &
1118-
& j)/(beta(0:weno_num_stencils)**2._wp)
1123+
do q = 0, weno_num_stencils
1124+
alpha(q) = d_cbR_${XYZ}$ (q, j)/(beta(q)**2._wp)
1125+
end do
11191126
else if (mapped_weno) then
1120-
alpha(0:weno_num_stencils) = d_cbR_${XYZ}$ (0:weno_num_stencils, &
1121-
& j)/(beta(0:weno_num_stencils)**2._wp)
1127+
do q = 0, weno_num_stencils
1128+
alpha(q) = d_cbR_${XYZ}$ (q, j)/(beta(q)**2._wp)
1129+
end do
11221130
omega = alpha/sum(alpha)
1123-
alpha(0:weno_num_stencils) = (d_cbR_${XYZ}$ (0:weno_num_stencils, &
1124-
& j)*(1._wp + d_cbR_${XYZ}$ (0:weno_num_stencils, &
1125-
& j) - 3._wp*omega(0:weno_num_stencils)) + omega(0:weno_num_stencils)**2._wp) &
1126-
& *(omega(0:weno_num_stencils)/(d_cbR_${XYZ}$ (0:weno_num_stencils, &
1127-
& j)**2._wp + omega(0:weno_num_stencils)*(1._wp &
1128-
& - 2._wp*d_cbR_${XYZ}$ (0:weno_num_stencils,j))))
1131+
do q = 0, weno_num_stencils
1132+
alpha(q) = (d_cbR_${XYZ}$ (q, j)*(1._wp + d_cbR_${XYZ}$ (q, &
1133+
& j) - 3._wp*omega(q)) + omega(q)**2._wp)*(omega(q)/(d_cbR_${XYZ}$ (q, &
1134+
& j)**2._wp + omega(q)*(1._wp - 2._wp*d_cbR_${XYZ}$ (q, j))))
1135+
end do
11291136
else if (wenoz) then
11301137
$:GPU_LOOP(parallelism='[seq]')
11311138
do q = 0, weno_num_stencils
@@ -1138,7 +1145,9 @@ contains
11381145
end do
11391146
end if
11401147

1141-
omega = alpha/sum(alpha)
1148+
omega(0) = alpha(0)/(alpha(0) + alpha(1) + alpha(2))
1149+
omega(1) = alpha(1)/(alpha(0) + alpha(1) + alpha(2))
1150+
omega(2) = alpha(2)/(alpha(0) + alpha(1) + alpha(2))
11421151

11431152
vR_rs_vf_${XYZ}$ (j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) + omega(2)*poly(2)
11441153
end do
@@ -1252,18 +1261,19 @@ contains
12521261
end if
12531262

12541263
if (wenojs) then
1255-
alpha(0:weno_num_stencils) = d_cbL_${XYZ}$ (0:weno_num_stencils, &
1256-
& j)/(beta(0:weno_num_stencils)**2._wp)
1264+
do q = 0, weno_num_stencils
1265+
alpha(q) = d_cbL_${XYZ}$ (q, j)/(beta(q)**2._wp)
1266+
end do
12571267
else if (mapped_weno) then
1258-
alpha(0:weno_num_stencils) = d_cbL_${XYZ}$ (0:weno_num_stencils, &
1259-
& j)/(beta(0:weno_num_stencils)**2._wp)
1268+
do q = 0, weno_num_stencils
1269+
alpha(q) = d_cbL_${XYZ}$ (q, j)/(beta(q)**2._wp)
1270+
end do
12601271
omega = alpha/sum(alpha)
1261-
alpha(0:weno_num_stencils) = (d_cbL_${XYZ}$ (0:weno_num_stencils, &
1262-
& j)*(1._wp + d_cbL_${XYZ}$ (0:weno_num_stencils, &
1263-
& j) - 3._wp*omega(0:weno_num_stencils)) + omega(0:weno_num_stencils)**2._wp) &
1264-
& *(omega(0:weno_num_stencils)/(d_cbL_${XYZ}$ (0:weno_num_stencils, &
1265-
& j)**2._wp + omega(0:weno_num_stencils)*(1._wp &
1266-
& - 2._wp*d_cbL_${XYZ}$ (0:weno_num_stencils,j))))
1272+
do q = 0, weno_num_stencils
1273+
alpha(q) = (d_cbL_${XYZ}$ (q, j)*(1._wp + d_cbL_${XYZ}$ (q, &
1274+
& j) - 3._wp*omega(q)) + omega(q)**2._wp)*(omega(q)/(d_cbL_${XYZ}$ (q, &
1275+
& j)**2._wp + omega(q)*(1._wp - 2._wp*d_cbL_${XYZ}$ (q, j))))
1276+
end do
12671277
else if (wenoz) then
12681278
! Castro, et al. (2010) Don & Borges (2013) also helps
12691279
tau = abs(beta(3) - beta(0)) ! Equation 50
@@ -1327,18 +1337,19 @@ contains
13271337
end if
13281338

13291339
if (wenojs) then
1330-
alpha(0:weno_num_stencils) = d_cbR_${XYZ}$ (0:weno_num_stencils, &
1331-
& j)/(beta(0:weno_num_stencils)**2._wp)
1340+
do q = 0, weno_num_stencils
1341+
alpha(q) = d_cbR_${XYZ}$ (q, j)/(beta(q)**2._wp)
1342+
end do
13321343
else if (mapped_weno) then
1333-
alpha(0:weno_num_stencils) = d_cbR_${XYZ}$ (0:weno_num_stencils, &
1334-
& j)/(beta(0:weno_num_stencils)**2._wp)
1344+
do q = 0, weno_num_stencils
1345+
alpha(q) = d_cbR_${XYZ}$ (q, j)/(beta(q)**2._wp)
1346+
end do
13351347
omega = alpha/sum(alpha)
1336-
alpha(0:weno_num_stencils) = (d_cbR_${XYZ}$ (0:weno_num_stencils, &
1337-
& j)*(1._wp + d_cbR_${XYZ}$ (0:weno_num_stencils, &
1338-
& j) - 3._wp*omega(0:weno_num_stencils)) + omega(0:weno_num_stencils)**2._wp) &
1339-
& *(omega(0:weno_num_stencils)/(d_cbR_${XYZ}$ (0:weno_num_stencils, &
1340-
& j)**2._wp + omega(0:weno_num_stencils)*(1._wp &
1341-
& - 2._wp*d_cbR_${XYZ}$ (0:weno_num_stencils,j))))
1348+
do q = 0, weno_num_stencils
1349+
alpha(q) = (d_cbR_${XYZ}$ (q, j)*(1._wp + d_cbR_${XYZ}$ (q, &
1350+
& j) - 3._wp*omega(q)) + omega(q)**2._wp)*(omega(q)/(d_cbR_${XYZ}$ (q, &
1351+
& j)**2._wp + omega(q)*(1._wp - 2._wp*d_cbR_${XYZ}$ (q, j))))
1352+
end do
13421353
else if (wenoz) then
13431354
$:GPU_LOOP(parallelism='[seq]')
13441355
do q = 0, weno_num_stencils

0 commit comments

Comments
 (0)