/* ********************************************************** * Conway's game of life example * * This program shows the use of thread synchonisation and use * of shared memory. * * */ #include #include #define CONWAYS_ROWS 20 #define CONWAYS_COLS 20 #define TIME_STEPS 5 // Matrices are stored in row-major order: // M(row, col) = *(M.elements + row * M.width + col) typedef struct { int width; int height; char* elements; }Matrix; __global__ void conway(Matrix conwayGrid,Matrix result){ long block = blockIdx.x; long tid = threadIdx.x; if((tid*block) < conwayGrid.width*conwayGrid.height){ //conwayGrid.elements[(block*conwayGrid.width)+tid]; char topLeft,topMiddle, topRight; char left,right; char bottomLeft,bottomMiddle, bottomRight; long colToLeft; long colToRight; long rowAbove; long rowBelow; /* Wrap around both the sides and top/bottom*/ block == 0? colToLeft=(conwayGrid.width-1):colToLeft=block-1; block == (conwayGrid.width-1)? colToRight = 0: colToRight = block+1; tid == 0? rowAbove=(conwayGrid.height-1) : rowAbove= tid-1; tid == (conwayGrid.height-1) ? rowBelow = 0: rowBelow = tid+1; topLeft = conwayGrid.elements[(colToLeft*conwayGrid.width)+rowAbove]; topMiddle = conwayGrid.elements[((block)*conwayGrid.width)+rowAbove]; topRight = conwayGrid.elements[(colToRight*conwayGrid.width)+rowAbove]; left = conwayGrid.elements[((colToLeft)*conwayGrid.width)+(tid)]; right = conwayGrid.elements[((colToRight)*conwayGrid.width)+(tid)]; bottomLeft = conwayGrid.elements[(colToLeft*conwayGrid.width)+rowBelow]; bottomMiddle = conwayGrid.elements[((block)*conwayGrid.width)+rowBelow]; bottomRight = conwayGrid.elements[(colToRight*conwayGrid.width)+rowBelow]; int value = topLeft+topMiddle+topRight+left+right+bottomLeft+bottomMiddle+bottomRight; if(conwayGrid.elements[(block*conwayGrid.width)+tid] > 0){ if(value > 3 || value < 2){ /*Over-population death*/ result.elements[(block*conwayGrid.width)+tid] = 0; }else{ result.elements[(block*conwayGrid.width)+tid] = 1; } }else{ if(value == 3){ /*Reproduction*/ result.elements[(block*conwayGrid.width)+tid] = 1; }else{ result.elements[(block*conwayGrid.width)+tid] = 0; } } } } int main(){ /* Lets set up our initial grid */ Matrix conwayGrid; conwayGrid.height = CONWAYS_ROWS; conwayGrid.width = CONWAYS_COLS; /*This is a flat array*/ conwayGrid.elements = (char*)malloc(CONWAYS_ROWS *CONWAYS_COLS* sizeof(char*)); /* Lets randomize an initial state for our game*/ printf("Initial State\n"); for(int r=0; r< conwayGrid.height; r++){ for(int c = 0; c< conwayGrid.width; c++){ conwayGrid.elements[(c*conwayGrid.height) +r] = rand()%2; printf("%d ",conwayGrid.elements[(c*conwayGrid.height) +r]); } printf("\n"); } /* Now we will need to copy everything over to the GPU*/ // Load A and B to device memory Matrix d_conwayGrid; Matrix d_conwayResult; d_conwayGrid.width = conwayGrid.width; d_conwayGrid.height = conwayGrid.height; d_conwayResult.width = conwayGrid.width; d_conwayResult.height = conwayGrid.height; size_t size = d_conwayGrid.width * d_conwayGrid.height * sizeof(float); cudaMalloc(&d_conwayGrid.elements, size); cudaMalloc(&d_conwayResult.elements, size); for(int step=0;step>>(d_conwayGrid,d_conwayResult); cudaMemcpy( conwayGrid.elements,d_conwayResult.elements, size, cudaMemcpyDeviceToHost); printf("\n\nFinal State\n"); for(int r=0; r< conwayGrid.height; r++){ for(int c = 0; c< conwayGrid.width; c++){ printf("%d ",conwayGrid.elements[(c*conwayGrid.height) +r]); } printf("\n"); } } }