/** * Copyright 1993-2015 NVIDIA Corporation. All rights reserved. * * Please refer to the NVIDIA end user license agreement (EULA) associated * with this source code for terms and conditions that govern your use of * this software. Any use, reproduction, disclosure, or distribution of * this software and related documentation outside the terms of the EULA * is strictly prohibited. * */ //////////////////////////////////////////////////////////////////////////////// // // simpleCUFFT_2d_MGPU.cu // // This sample code demonstrate the use of CUFFT library for 2D data on multiple GPU. // Example showing the use of CUFFT for solving 2D-POISSON equation using FFT on multiple GPU. // For reference we have used the equation given in http://www.bu.edu/pasi/files/2011/07/ // Lecture83.pdf // //////////////////////////////////////////////////////////////////////////////// // System includes #include #include #include #include // CUDA runtime #include //CUFFT Header file #include // helper functions and utilities to work with CUDA #include #include // Complex data type typedef float2 Complex; // Data configuration const int GPU_COUNT = 2; const int BSZ_Y = 4; const int BSZ_X = 4; // Forward Declaration void solvePoissonEquation(cudaLibXtDesc *, cudaLibXtDesc *, float **, int, int ); __global__ void solvePoisson(cufftComplex *, cufftComplex *, float *, int, int, int n_gpu); /////////////////////////////////////////////////////////////////////////////// // Program main //////////////////////////////////////////////////////////////////////////////// int main(int argc, char **argv) { printf("\nPoisson equation using CUFFT library on Multiple GPUs is starting...\n\n"); int GPU_N ; checkCudaErrors(cudaGetDeviceCount(&GPU_N)); if (GPU_N < GPU_COUNT) { printf("No. of GPU on node %d\n", GPU_N); printf("Two GPUs are required to run simpleCUFFT_2d_MGPU sample code\n"); exit(EXIT_WAIVED); } int *major_minor = (int *) malloc(sizeof(int)*GPU_N*2); int found2IdenticalGPUs = 0; int nGPUs = 2; int *whichGPUs ; whichGPUs = (int*) malloc(sizeof(int) * nGPUs); for(int i=0; idescriptor->GPUs[i]; cudaSetDevice(device) ; solvePoisson<<>>((cufftComplex*) d_ft->descriptor->data[i], (cufftComplex*) d_ft_k->descriptor->data[i], k[i], N, i, nGPUs); } // Wait for device to finish all operation for(int i=0; i< nGPUs ; i++) { device = d_ft_k->descriptor->GPUs[i]; cudaSetDevice( device ); cudaDeviceSynchronize(); // Check if kernel execution generated and error getLastCudaError("Kernel execution failed [ solvePoisson ]"); } } //////////////////////////////////////////////////////////////////////////////// // Kernel for Solving Poisson equation on GPU //////////////////////////////////////////////////////////////////////////////// __global__ void solvePoisson(cufftComplex *ft, cufftComplex *ft_k, float *k, int N ,int gpu_id, int n_gpu) { int i = threadIdx.x + blockIdx.x*blockDim.x; int j = threadIdx.y + blockIdx.y*blockDim.y; int index = j*N+i; if (i