Skip to content

Commit 3e1e943

Browse files
committed
Use c-style arrays again instead of std::array to be more consistent (since c-style arrays HAVE to be used in the hierarchical kernels due to a icpx internal compiler SegFault) and to be sure that the code generation stays the same as before. Disable the respective clang-tidy warnings for these files.
1 parent 61cd504 commit 3e1e943

18 files changed

Lines changed: 46 additions & 78 deletions

File tree

include/plssvm/backends/SYCL/kernel/cg_explicit/basic/blas.hpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919

2020
#include "sycl/sycl.hpp" // sycl::item
2121

22-
#include <array> // std::array
2322
#include <cstddef> // std::size_t
2423

2524
namespace plssvm::sycl::detail::basic {
@@ -77,7 +76,7 @@ class device_kernel_symm {
7776
const auto j_idx = (idx.get_id(0) + grid_x_offset_ * THREAD_BLOCK_SIZE_uz) * INTERNAL_BLOCK_SIZE_uz; // device_num_rows
7877

7978
// create a work-item private array used for internal caching
80-
std::array<std::array<real_type, INTERNAL_BLOCK_SIZE_uz>, INTERNAL_BLOCK_SIZE_uz> temp{};
79+
real_type temp[INTERNAL_BLOCK_SIZE][INTERNAL_BLOCK_SIZE]{};
8180

8281
// iterate over all values using blocking
8382
for (std::size_t dim_block = 0; dim_block < (num_rows_ - device_row_offset_); dim_block += THREAD_BLOCK_SIZE_uz) {
@@ -216,7 +215,7 @@ class device_kernel_symm_mirror {
216215
const auto j_idx = (idx.get_id(0) + grid_x_offset_ * THREAD_BLOCK_SIZE_uz) * INTERNAL_BLOCK_SIZE_uz; // num_mirror_rows
217216

218217
// create a work-item private array used for internal caching
219-
std::array<std::array<real_type, INTERNAL_BLOCK_SIZE_uz>, INTERNAL_BLOCK_SIZE_uz> temp{};
218+
real_type temp[INTERNAL_BLOCK_SIZE][INTERNAL_BLOCK_SIZE]{};
220219

221220
// iterate over the remaining values using blocking to be able to cache them for faster memory accesses
222221
for (std::size_t dim_block = 0; dim_block < device_num_rows_; dim_block += THREAD_BLOCK_SIZE_uz) {

include/plssvm/backends/SYCL/kernel/cg_explicit/basic/kernel_matrix_assembly.hpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121

2222
#include "sycl/sycl.hpp" // sycl::item
2323

24-
#include <array> // std::array
2524
#include <cstddef> // std::size_t
2625
#include <tuple> // std::tuple, std::make_tuple
2726

@@ -87,7 +86,7 @@ class device_kernel_assembly {
8786
// only calculate the upper triangular matrix
8887
if (i_idx >= j_idx) {
8988
// create a private memory array used for internal caching
90-
std::array<std::array<real_type, INTERNAL_BLOCK_SIZE_uz>, INTERNAL_BLOCK_SIZE_uz> temp{};
89+
real_type temp[INTERNAL_BLOCK_SIZE][INTERNAL_BLOCK_SIZE]{};
9190

9291
// iterate over all features using blocking
9392
for (std::size_t feature_block = 0; feature_block < num_features_; feature_block += THREAD_BLOCK_SIZE_uz) {

include/plssvm/backends/SYCL/kernel/cg_explicit/hierarchical/kernel_matrix_assembly.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121

2222
#include "sycl/sycl.hpp" // sycl::group, sycl::private_memory, sycl::h_item
2323

24-
#include <array> // std::array
2524
#include <cstddef> // std::size_t
2625
#include <tuple> // std::tuple, std::make_tuple
2726

include/plssvm/backends/SYCL/kernel/cg_explicit/scoped/blas.hpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,11 @@ class device_kernel_symm {
7070
void operator()(T group) const {
7171
::sycl::memory_environment(group,
7272
// the indices used in the current work-item
73-
::sycl::require_local_mem<std::array<std::array<real_type, static_cast<std::size_t>(INTERNAL_BLOCK_SIZE) * static_cast<std::size_t>(THREAD_BLOCK_SIZE)>, static_cast<std::size_t>(THREAD_BLOCK_SIZE)>>(), // A_cache
74-
::sycl::require_local_mem<std::array<std::array<real_type, static_cast<std::size_t>(INTERNAL_BLOCK_SIZE) * static_cast<std::size_t>(THREAD_BLOCK_SIZE)>, static_cast<std::size_t>(THREAD_BLOCK_SIZE)>>(), // B_cache
73+
::sycl::require_local_mem<real_type[THREAD_BLOCK_SIZE][INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE]>(), // A_cache
74+
::sycl::require_local_mem<real_type[THREAD_BLOCK_SIZE][INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE]>(), // B_cache
7575

7676
// create two local memory arrays used for caching
77-
::sycl::require_private_mem<std::array<std::array<real_type, static_cast<std::size_t>(INTERNAL_BLOCK_SIZE)>, static_cast<std::size_t>(INTERNAL_BLOCK_SIZE)>>(),
77+
::sycl::require_private_mem<real_type[INTERNAL_BLOCK_SIZE][INTERNAL_BLOCK_SIZE]>(),
7878
[&](auto &A_cache, auto &B_cache, auto &temp) {
7979
// initialize private temp matrix to zero
8080
::sycl::distribute_items_and_wait(group, [&](::sycl::s_item<2> idx) {
@@ -257,11 +257,11 @@ class device_kernel_symm_mirror {
257257
void operator()(T group) const {
258258
::sycl::memory_environment(group,
259259
// the indices used in the current work-item
260-
::sycl::require_local_mem<std::array<std::array<real_type, static_cast<std::size_t>(INTERNAL_BLOCK_SIZE) * static_cast<std::size_t>(THREAD_BLOCK_SIZE)>, static_cast<std::size_t>(THREAD_BLOCK_SIZE)>>(), // A_cache
261-
::sycl::require_local_mem<std::array<std::array<real_type, static_cast<std::size_t>(INTERNAL_BLOCK_SIZE) * static_cast<std::size_t>(THREAD_BLOCK_SIZE)>, static_cast<std::size_t>(THREAD_BLOCK_SIZE)>>(), // B_cache
260+
::sycl::require_local_mem<real_type[THREAD_BLOCK_SIZE][INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE]>(), // A_cache
261+
::sycl::require_local_mem<real_type[THREAD_BLOCK_SIZE][INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE]>(), // B_cache
262262

263263
// create a private memory array used for internal caching
264-
::sycl::require_private_mem<std::array<std::array<real_type, static_cast<std::size_t>(INTERNAL_BLOCK_SIZE)>, static_cast<std::size_t>(INTERNAL_BLOCK_SIZE)>>(),
264+
::sycl::require_private_mem<real_type[INTERNAL_BLOCK_SIZE][INTERNAL_BLOCK_SIZE]>(),
265265
[&](auto &A_cache, auto &B_cache, auto &temp) {
266266
// initialize private temp matrix to zero
267267
::sycl::distribute_items_and_wait(group, [&](::sycl::s_item<2> idx) {

include/plssvm/backends/SYCL/kernel/cg_explicit/scoped/kernel_matrix_assembly.hpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121

2222
#include "sycl/sycl.hpp" // sycl::memory_environment, sycl::require_local_mem, sycl::require_private_mem, sycl::distribute_items_and_wait, sycl::s_item
2323

24-
#include <array> // std::array
2524
#include <cstddef> // std::size_t
2625
#include <tuple> // std::tuple, std::make_tuple
2726

@@ -79,11 +78,11 @@ class device_kernel_assembly {
7978
void operator()(T group) const {
8079
::sycl::memory_environment(group,
8180
// create two local memory arrays used for caching
82-
::sycl::require_local_mem<std::array<std::array<real_type, static_cast<std::size_t>(INTERNAL_BLOCK_SIZE) * static_cast<std::size_t>(THREAD_BLOCK_SIZE)>, static_cast<std::size_t>(THREAD_BLOCK_SIZE)>>(), // data_i_cache
83-
::sycl::require_local_mem<std::array<std::array<real_type, static_cast<std::size_t>(INTERNAL_BLOCK_SIZE) * static_cast<std::size_t>(THREAD_BLOCK_SIZE)>, static_cast<std::size_t>(THREAD_BLOCK_SIZE)>>(), // data_j_cache
81+
::sycl::require_local_mem<real_type[THREAD_BLOCK_SIZE][INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE]>(), // data_i_cache
82+
::sycl::require_local_mem<real_type[THREAD_BLOCK_SIZE][INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE]>(), // data_j_cache
8483

8584
// create a private memory array used for internal caching
86-
::sycl::require_private_mem<std::array<std::array<real_type, static_cast<std::size_t>(INTERNAL_BLOCK_SIZE)>, static_cast<std::size_t>(INTERNAL_BLOCK_SIZE)>>(), // temp
85+
::sycl::require_private_mem<real_type[INTERNAL_BLOCK_SIZE][INTERNAL_BLOCK_SIZE]>(), // temp
8786
[&](auto &data_i_cache, auto &data_j_cache, auto &temp) {
8887
// only calculate the upper triangular matrix -> can't use get_local_id() since all work-items in a work-group must progress further
8988
if (group[1] + grid_y_offset_ >= group[0] + grid_x_offset_) {

include/plssvm/backends/SYCL/kernel/cg_explicit/work_group/blas.hpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919

2020
#include "sycl/sycl.hpp" // sycl::handler, sycl::range, sycl::nd_item, sycl::local_accessor
2121

22-
#include <array> // std::array
2322
#include <cstddef> // std::size_t
2423

2524
namespace plssvm::sycl::detail::work_group {
@@ -87,7 +86,7 @@ class device_kernel_symm {
8786
const auto blockIdx_y = static_cast<std::size_t>(nd_idx.get_group(1)) + grid_y_offset_; // current work-group in global range y-dimension + offsets if the global range is too large
8887

8988
// create a work-item private array used for internal caching
90-
std::array<std::array<real_type, INTERNAL_BLOCK_SIZE_uz>, INTERNAL_BLOCK_SIZE_uz> temp{};
89+
real_type temp[INTERNAL_BLOCK_SIZE][INTERNAL_BLOCK_SIZE]{};
9190

9291
{
9392
// calculate the indices used in the current work-item, pays attention to coalesced memory accesses
@@ -246,7 +245,7 @@ class device_kernel_symm_mirror {
246245
const auto blockIdx_y = static_cast<std::size_t>(nd_idx.get_group(1)) + grid_y_offset_; // current work-group in global range y-dimension + offsets if the global range is too large
247246

248247
// create a work-item private array used for internal caching
249-
std::array<std::array<real_type, INTERNAL_BLOCK_SIZE_uz>, INTERNAL_BLOCK_SIZE_uz> temp{};
248+
real_type temp[INTERNAL_BLOCK_SIZE][INTERNAL_BLOCK_SIZE]{};
250249

251250
{
252251
// calculate the indices used in the current thread, pays attention to coalesced memory accesses

include/plssvm/backends/SYCL/kernel/cg_explicit/work_group/kernel_matrix_assembly.hpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121

2222
#include "sycl/sycl.hpp" // sycl::handler, sycl::range, sycl::nd_item, sycl::local_accessor
2323

24-
#include <array> // std::array
2524
#include <cstddef> // std::size_t
2625
#include <tuple> // std::tuple, std::make_tuple
2726

@@ -97,7 +96,7 @@ class device_kernel_assembly {
9796
// only calculate the upper triangular matrix -> can't use get_local_id() since all work-items in a work-group must progress further
9897
if (blockIdx_y >= blockIdx_x) {
9998
// create a private memory array used for internal caching
100-
std::array<std::array<real_type, INTERNAL_BLOCK_SIZE_uz>, INTERNAL_BLOCK_SIZE_uz> temp{};
99+
real_type temp[INTERNAL_BLOCK_SIZE][INTERNAL_BLOCK_SIZE]{};
101100

102101
{
103102
// calculate the indices used in the current work-item, pays attention to coalesced memory accesses

include/plssvm/backends/SYCL/kernel/cg_implicit/basic/kernel_matrix_assembly_blas.hpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222

2323
#include "sycl/sycl.hpp" // sycl::item
2424

25-
#include <array> // std::array
2625
#include <cstddef> // std::size_t
2726
#include <tuple> // std::tuple, std::make_tuple
2827

@@ -93,7 +92,7 @@ class device_kernel_assembly_symm {
9392
// only calculate the upper triangular matrix
9493
if (i_idx >= j_idx) {
9594
// create a work-item private array used for internal caching
96-
std::array<std::array<real_type, INTERNAL_BLOCK_SIZE_uz>, INTERNAL_BLOCK_SIZE_uz> temp{};
95+
real_type temp[INTERNAL_BLOCK_SIZE][INTERNAL_BLOCK_SIZE]{};
9796

9897
//*************************************************************************//
9998
// inplace kernel matrix construction //

include/plssvm/backends/SYCL/kernel/cg_implicit/hierarchical/kernel_matrix_assembly_blas.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323

2424
#include "sycl/sycl.hpp" // sycl::group, sycl::private_memory, sycl::h_item
2525

26-
#include <array> // std::array
2726
#include <cstddef> // std::size_t
2827
#include <tuple> // std::tuple, std::make_tuple
2928

include/plssvm/backends/SYCL/kernel/cg_implicit/scoped/kernel_matrix_assembly_blas.hpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222

2323
#include "sycl/sycl.hpp" // sycl::memory_environment, sycl::require_local_mem, sycl::require_private_mem, sycl::distribute_items_and_wait, sycl::s_item
2424

25-
#include <array> // std::array
2625
#include <cstddef> // std::size_t
2726
#include <tuple> // std::tuple, std::make_tuple
2827

@@ -92,11 +91,11 @@ class device_kernel_assembly_symm {
9291
::sycl::require_private_mem<std::size_t>(), // device_num_rows
9392

9493
// create two local memory arrays used for caching
95-
::sycl::require_local_mem<std::array<real_type, static_cast<std::size_t>(THREAD_BLOCK_SIZE) * static_cast<std::size_t>(INTERNAL_BLOCK_SIZE) * static_cast<std::size_t>(THREAD_BLOCK_SIZE)>>(), // cache_one
96-
::sycl::require_local_mem<std::array<real_type, static_cast<std::size_t>(THREAD_BLOCK_SIZE) * static_cast<std::size_t>(INTERNAL_BLOCK_SIZE) * static_cast<std::size_t>(THREAD_BLOCK_SIZE)>>(), // cache_two
94+
::sycl::require_local_mem<real_type[THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE]>(), // cache_one
95+
::sycl::require_local_mem<real_type[THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE]>(), // cache_two
9796

9897
// create a private memory array used for internal caching
99-
::sycl::require_private_mem<std::array<std::array<real_type, static_cast<std::size_t>(INTERNAL_BLOCK_SIZE)>, static_cast<std::size_t>(INTERNAL_BLOCK_SIZE)>>(),
98+
::sycl::require_private_mem<real_type[INTERNAL_BLOCK_SIZE][INTERNAL_BLOCK_SIZE]>(),
10099
[&](auto &i_idx, auto &j_idx, auto &i_idx_linear, auto &j_idx_linear, auto &cache_one, auto &cache_two, auto &temp) {
101100
// initialize private and local variables
102101
::sycl::distribute_items_and_wait(group, [&](::sycl::s_item<2> idx) {

0 commit comments

Comments
 (0)