11#include < stdexcept>
2- #include < stdio.h >
2+ #include < cstdio >
33#include < fstream>
44#include < functional>
55#include < sstream>
@@ -16,7 +16,6 @@ class CudaMav
1616 int blockDimY;
1717 int blockDimZ;
1818 int warpSize ;
19- int statusMessage;
2019 unsigned int originalSize_read;
2120 unsigned int currentSize_read;
2221 unsigned int originalSize_write;
@@ -28,16 +27,17 @@ class CudaMav
2827 struct MemoryAccessLog {
2928 // Store the address which was addressed
3029 T* address;
30+
3131 // Store the thread id which accessed the address
32- int threadId;
32+ unsigned int threadId;
3333 // Store the block id which accessed the address
34- int blockId;
34+ unsigned int blockId;
3535
3636 // Constructor which decomposes the block and thread id into the packed long
3737 __host__ __device__ MemoryAccessLog (T* address, int blockId, int threadId) : address(address), threadId(threadId), blockId(blockId) {}
3838
3939 // Empty constructor
40- __host__ __device__ MemoryAccessLog () : address(nullptr ), threadId(- 1 ), blockId(- 1 ) {}
40+ __host__ __device__ MemoryAccessLog () : address(nullptr ), threadId(0 ), blockId(0 ) {}
4141 };
4242
4343private:
@@ -80,13 +80,23 @@ private:
8080 AccessProxy () = delete ;
8181
8282 // Overload the assignment operator so we can write to the array
83- __device__ void operator = (T value) {
83+ __device__ AccessProxy & operator = (const T & value) {
8484 cudaMav->set (index, value);
85+ return *this ;
86+ }
87+
88+ // When accessing the array, and also assign a value to the access, we assign AccessProxy to AccessProxy
89+ // For this reason we need to define the assignment operator for AccessProxy, so that the actual values get changed
90+ __device__ AccessProxy &operator = (const AccessProxy &other) {
91+ if (this != &other) {
92+ cudaMav->set (index, other.cudaMav ->get (other.index ));
93+ }
94+ return *this ;
8595 }
8696
87- // Overload the cast operator so we can read from the array
88- // Leaving the explicit out, wont throw an error, but might result in unexpected behaviour
89- __device__ explicit operator T () {
97+ // Overload the cast operator, so we can read from the array
98+ // Leaving the explicit out, won't throw an error, but might result in unexpected behaviour
99+ __device__ /* explicit*/ operator T () const {
90100 return cudaMav->get (index);
91101 }
92102 };
@@ -230,7 +240,7 @@ public:
230240 __host__ CudaMav (T* array_data, unsigned int size = 100000 )
231241 {
232242
233- h_constantData = new GlobalSettings{ -1 , -1 , -1 , -1 , -1 , -1 , -1 , 0 , size, 0 , size, 0 };
243+ h_constantData = new GlobalSettings{ -1 , -1 , -1 , -1 , -1 , -1 , -1 , size, 0 , size, 0 };
234244
235245 // Allocate the memory on the device for the d_constantData and check if it was successful
236246 checkCudaError (cudaMalloc (&d_constantData, sizeof (GlobalSettings)), " Could not allocate array to store kernel data on device." );
@@ -277,12 +287,11 @@ public:
277287 d_constantData->blockDimZ = blockDim .z ;
278288 // Store the warp size
279289 d_constantData->warpSize = warpSize ;
280- };
281-
290+ }
282291
283292 // Get the block and thread id
284- int blockId = blockIdx .x + blockIdx .y * gridDim .x + gridDim .x * gridDim .y * blockIdx .z ;
285- int threadId = threadIdx .x + threadIdx .y * blockDim .x + blockDim .x * blockDim .y * threadIdx .z ;
293+ unsigned int blockId = blockIdx .x + blockIdx .y * gridDim .x + gridDim .x * gridDim .y * blockIdx .z ;
294+ unsigned int threadId = threadIdx .x + threadIdx .y * blockDim .x + blockDim .x * blockDim .y * threadIdx .z ;
286295
287296 // Get the address of the data
288297 T* address = &d_data[index];
@@ -323,11 +332,11 @@ public:
323332 d_constantData->blockDimZ = blockDim .z ;
324333 // Store the warp size
325334 d_constantData->warpSize = warpSize ;
326- };
335+ }
327336
328337 // Get the block and thread id
329- int blockId = blockIdx .x + blockIdx .y * gridDim .x + gridDim .x * gridDim .y * blockIdx .z ;
330- int threadId = threadIdx .x + threadIdx .y * blockDim .x + blockDim .x * blockDim .y * threadIdx .z ;
338+ unsigned int blockId = blockIdx .x + blockIdx .y * gridDim .x + gridDim .x * gridDim .y * blockIdx .z ;
339+ unsigned int threadId = threadIdx .x + threadIdx .y * blockDim .x + blockDim .x * blockDim .y * threadIdx .z ;
331340
332341 // Get the address of the data
333342 T* address = &d_data[index];
0 commit comments