You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: include/cuda_mav.cuh
+77-78Lines changed: 77 additions & 78 deletions
Original file line number
Diff line number
Diff line change
@@ -10,7 +10,7 @@
10
10
11
11
// Define a custom template class which holds the data for the CUDA kernel
12
12
template <typename T>
13
-
classCudaMav
13
+
classCudaMemAccessLogger
14
14
{
15
15
structGlobalSettings {
16
16
int gridDimX;
@@ -20,28 +20,55 @@ class CudaMav
20
20
int blockDimY;
21
21
int blockDimZ;
22
22
intwarpSize;
23
-
unsignedint originalSize_read;
24
-
unsignedint currentSize_read;
25
-
unsignedint originalSize_write;
26
-
unsignedint currentSize_write;
27
-
23
+
unsignedint originalSize;
24
+
unsignedint currentSize;
28
25
};
29
26
30
27
// Have a struct to store logging data
31
28
structMemoryAccessLog {
29
+
private:
32
30
// Store the address which was addressed
33
31
T* address;
34
-
35
-
// Store the thread id which accessed the address
36
-
unsignedint threadId;
32
+
// Store the thread id which accessed the address, additionally the uppermost bit is used to store if the access was a read or write, 0 for read, 1 for write
33
+
unsignedint threadId_accessType;
37
34
// Store the block id which accessed the address
38
35
unsignedint blockId;
36
+
public:
39
37
40
38
// Constructor which decomposes the block and thread id into the packed long
41
-
__host____device__MemoryAccessLog(T* address, int blockId, int threadId) : address(address), threadId(threadId), blockId(blockId) {}
39
+
__host____device__MemoryAccessLog(T* address, int blockId, int threadId, bool read = true) : address(address), threadId_accessType(threadId), blockId(blockId) {
40
+
// Set the uppermost bit to 1 if the access was a write, 0 if it was a read
// Implement a proxy class so we can both read and write from the array when accessing the array operator
75
96
classAccessProxy {
76
-
// Have a reference to the CudaMav class
77
-
CudaMav<T>* cudaMav;
97
+
// Have a reference to the CudaMemAccessLogger class
98
+
CudaMemAccessLogger<T>* cudaMav;
78
99
// Have a reference to the index
79
100
int index;
80
101
81
102
public:
82
-
// Constructor which takes a reference to the CudaMav class and the index
83
-
__device__AccessProxy(CudaMav<T>* cudaMav, int index) : cudaMav(cudaMav), index(index) {}
103
+
// Constructor which takes a reference to the CudaMemAccessLogger class and the index
104
+
__device__AccessProxy(CudaMemAccessLogger<T>* cudaMav, int index) : cudaMav(cudaMav), index(index) {}
84
105
AccessProxy() = delete;
85
106
86
107
// Overload the assignment operator so we can write to the array
@@ -125,18 +146,10 @@ private:
125
146
// First fetch the h_constantData from the device
126
147
checkCudaError(cudaMemcpy(h_constantData, d_constantData, sizeof(GlobalSettings), cudaMemcpyDeviceToHost), "Could not copy constant data from device.");
127
148
128
-
// Copy the data back from the device for reading
129
-
checkCudaError(cudaMemcpy(h_memoryAccessLog_reading, d_memoryAccessLog_reading, sizeof(MemoryAccessLog) * h_constantData->originalSize_read, cudaMemcpyDeviceToHost), "Could not copy memory access logs from device.");
130
-
// Copy the data back from the device for writing
131
-
checkCudaError(cudaMemcpy(h_memoryAccessLog_writing, d_memoryAccessLog_writing, sizeof(MemoryAccessLog) * h_constantData->originalSize_write, cudaMemcpyDeviceToHost), "Could not copy memory access logs from device.");
149
+
// Copy the data back from the device
150
+
checkCudaError(cudaMemcpy(h_memoryAccessLog, d_memoryAccessLog, sizeof(MemoryAccessLog) * h_constantData->originalSize_read, cudaMemcpyDeviceToHost), "Could not copy memory access logs from device.");
132
151
133
152
134
-
// Free up the memory on the device
135
-
checkCudaError(cudaFree(d_constantData), "Could not free constant data on device.");
136
-
checkCudaError(cudaFree(d_memoryAccessLog_reading), "Could not free memory access logs (reading) on device.");
137
-
checkCudaError(cudaFree(d_memoryAccessLog_writing), "Could not free memory access logs (writing) on device.");
138
-
checkCudaError(cudaFree(d_this), "Could not free class instance pointer on device.");
139
-
140
153
// Set the fetched from device flag to true
141
154
fetchedFromDevice = true;
142
155
}
@@ -231,17 +244,15 @@ private:
231
244
public:
232
245
233
246
// Constructor to create an empty class
234
-
__device____host__CudaMav() {
247
+
__device____host__CudaMemAccessLogger() {
235
248
// Set the data pointer to null
236
249
d_data = nullptr;
237
-
// Set the memory access log pointer to null for reading
238
-
d_memoryAccessLog_reading = nullptr;
239
-
// Set the memory access log pointer to null for writing
240
-
d_memoryAccessLog_writing = nullptr;
250
+
// Set the memory access log pointer to null
251
+
d_memoryAccessLog = nullptr;
241
252
}
242
253
243
254
// Constructor which allocates the memory on the device
// Allocate the memory on the device for the d_memoryAccessLog_reading and check if it was successful
258
-
checkCudaError(cudaMalloc(&d_memoryAccessLog_reading, sizeof(MemoryAccessLog) * size), "Could not allocate array to store memory access logs on device. (reading)");
259
-
// Also allocate the memory on the host for the h_memoryAccessLog_reading and check if it was successful
260
-
h_memoryAccessLog_reading = new MemoryAccessLog[size];
268
+
// Allocate the memory on the device for the d_memoryAccessLog and check if it was successful
269
+
checkCudaError(cudaMalloc(&d_memoryAccessLog, sizeof(MemoryAccessLog) * size), "Could not allocate array to store memory access logs on device. (reading)");
270
+
// Also allocate the memory on the host for the h_memoryAccessLog
271
+
h_memoryAccessLog = new MemoryAccessLog[size];
261
272
262
273
// Copy the empty data to the device
263
-
checkCudaError(cudaMemcpy(d_memoryAccessLog_reading, h_memoryAccessLog_reading, sizeof(MemoryAccessLog) * size, cudaMemcpyHostToDevice), "Could not copy memory access logs to device. (reading)");
264
-
265
-
// To the same for writing
266
-
checkCudaError(cudaMalloc(&d_memoryAccessLog_writing, sizeof(MemoryAccessLog) * size), "Could not allocate array to store memory access logs on device. (writing)");
267
-
h_memoryAccessLog_writing = new MemoryAccessLog[size];
268
-
checkCudaError(cudaMemcpy(d_memoryAccessLog_writing, h_memoryAccessLog_writing, sizeof(MemoryAccessLog) * size, cudaMemcpyHostToDevice), "Could not copy memory access logs to device. (writing)");
274
+
checkCudaError(cudaMemcpy(d_memoryAccessLog, h_memoryAccessLog, sizeof(MemoryAccessLog) * size, cudaMemcpyHostToDevice), "Could not copy memory access logs to device.");
269
275
270
276
// Now we finished initializing the class, so we need to create the copy of this class on the device
271
277
// Allocate the memory on the device for the d_this and check if it was successful
272
-
checkCudaError(cudaMalloc(&d_this, sizeof(CudaMav<T>)), "Could not allocate array to store this class on device.");
278
+
checkCudaError(cudaMalloc(&d_this, sizeof(CudaMemAccessLogger<T>)), "Could not allocate array to store this class on device.");
273
279
// Copy the empty data to the device
274
-
checkCudaError(cudaMemcpy(d_this, this, sizeof(CudaMav<T>), cudaMemcpyHostToDevice), "Could not copy this class to device.");
280
+
checkCudaError(cudaMemcpy(d_this, this, sizeof(CudaMemAccessLogger<T>), cudaMemcpyHostToDevice), "Could not copy this class to device.");
275
281
276
282
}
277
283
278
-
__device__ T get(unsignedint index) {
279
-
// Atomically increase the currentSize by 1
280
-
int current_index = atomicAdd(&d_constantData->currentSize_read, 1);
284
+
__device__intgetStorageIndex() {// Atomically increase the currentSize by 1
285
+
int current_index = atomicAdd(&d_constantData->currentSize, 1);
281
286
282
287
// First check if the currentSize is zero, if so we need to initialize the additional data variables, needed later to restore the data
0 commit comments