/** * Copyright 1993-2015 NVIDIA Corporation. All rights reserved. * * Please refer to the NVIDIA end user license agreement (EULA) associated * with this source code for terms and conditions that govern your use of * this software. Any use, reproduction, disclosure, or distribution of * this software and related documentation outside the terms of the EULA * is strictly prohibited. * */ #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) #define WINDOWS_LEAN_AND_MEAN #define NOMINMAX #include #pragma warning(disable : 4819) #endif #include #include #include #include #include #include #include #include #include #include #include inline int cudaDeviceInit(int argc, const char **argv) { int deviceCount; checkCudaErrors(cudaGetDeviceCount(&deviceCount)); if (deviceCount == 0) { std::cerr << "CUDA error: no devices supporting CUDA." << std::endl; exit(EXIT_FAILURE); } int dev = findCudaDevice(argc, argv); cudaDeviceProp deviceProp; cudaGetDeviceProperties(&deviceProp, dev); std::cerr << "cudaSetDevice GPU" << dev << " = " << deviceProp.name << std::endl; checkCudaErrors(cudaSetDevice(dev)); return dev; } bool printfNPPinfo(int argc, char *argv[]) { const NppLibraryVersion *libVer = nppGetLibVersion(); printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build); int driverVersion, runtimeVersion; cudaDriverGetVersion(&driverVersion); cudaRuntimeGetVersion(&runtimeVersion); printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000, (driverVersion % 100) / 10); printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000, (runtimeVersion % 100) / 10); // Min spec is SM 1.0 devices bool bVal = checkCudaCapabilities(1, 0); return bVal; } int main(int argc, char *argv[]) { printf("%s Starting...\n\n", argv[0]); try { std::string sFilename; char *filePath; cudaDeviceInit(argc, (const char **)argv); if (printfNPPinfo(argc, argv) == false) { exit(EXIT_SUCCESS); } if (checkCmdLineFlag(argc, (const char **)argv, "input")) { getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath); } else { filePath = sdkFindFilePath("Lena.pgm", argv[0]); } if (filePath) { sFilename = filePath; } else { sFilename = "Lena.pgm"; } // if we specify the filename at the command line, then we only test // sFilename[0]. int file_errors = 0; std::ifstream infile(sFilename.data(), std::ifstream::in); if (infile.good()) { std::cout << "cannyEdgeDetectionNPP opened: <" << sFilename.data() << "> successfully!" << std::endl; file_errors = 0; infile.close(); } else { std::cout << "cannyEdgeDetectionNPP unable to open: <" << sFilename.data() << ">" << std::endl; file_errors++; infile.close(); } if (file_errors > 0) { exit(EXIT_FAILURE); } std::string sResultFilename = sFilename; std::string::size_type dot = sResultFilename.rfind('.'); if (dot != std::string::npos) { sResultFilename = sResultFilename.substr(0, dot); } sResultFilename += "_cannyEdgeDetection.pgm"; if (checkCmdLineFlag(argc, (const char **)argv, "output")) { char *outputFilePath; getCmdLineArgumentString(argc, (const char **)argv, "output", &outputFilePath); sResultFilename = outputFilePath; } // declare a host image object for an 8-bit grayscale image npp::ImageCPU_8u_C1 oHostSrc; // load gray-scale image from disk npp::loadImage(sFilename, oHostSrc); // declare a device image and copy construct from the host image, // i.e. upload host to device npp::ImageNPP_8u_C1 oDeviceSrc(oHostSrc); NppiSize oSrcSize = {(int)oDeviceSrc.width(), (int)oDeviceSrc.height()}; NppiPoint oSrcOffset = {0, 0}; // create struct with ROI size NppiSize oSizeROI = {(int)oDeviceSrc.width(), (int)oDeviceSrc.height()}; // allocate device image of appropriately reduced size npp::ImageNPP_8u_C1 oDeviceDst(oSizeROI.width, oSizeROI.height); int nBufferSize = 0; Npp8u *pScratchBufferNPP = 0; // get necessary scratch buffer size and allocate that much device memory NPP_CHECK_NPP(nppiFilterCannyBorderGetBufferSize(oSizeROI, &nBufferSize)); cudaMalloc((void **)&pScratchBufferNPP, nBufferSize); // now run the canny edge detection filter // Using nppiNormL2 will produce larger magnitude values allowing for finer // control of threshold values while nppiNormL1 will be slightly faster. // Also, selecting the sobel gradient filter allows up to a 5x5 kernel size // which can produce more precise results but is a bit slower. Commonly // nppiNormL2 and sobel gradient filter size of 3x3 are used. Canny // recommends that the high threshold value should be about 3 times the low // threshold value. The threshold range will depend on the range of // magnitude values that the sobel gradient filter generates for a // particular image. Npp16s nLowThreshold = 72; Npp16s nHighThreshold = 256; if ((nBufferSize > 0) && (pScratchBufferNPP != 0)) { NPP_CHECK_NPP(nppiFilterCannyBorder_8u_C1R( oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize, oSrcOffset, oDeviceDst.data(), oDeviceDst.pitch(), oSizeROI, NPP_FILTER_SOBEL, NPP_MASK_SIZE_3_X_3, nLowThreshold, nHighThreshold, nppiNormL2, NPP_BORDER_REPLICATE, pScratchBufferNPP)); } // free scratch buffer memory cudaFree(pScratchBufferNPP); // declare a host image for the result npp::ImageCPU_8u_C1 oHostDst(oDeviceDst.size()); // and copy the device result data into it oDeviceDst.copyTo(oHostDst.data(), oHostDst.pitch()); saveImage(sResultFilename, oHostDst); std::cout << "Saved image: " << sResultFilename << std::endl; nppiFree(oDeviceSrc.data()); nppiFree(oDeviceDst.data()); exit(EXIT_SUCCESS); } catch (npp::Exception &rException) { std::cerr << "Program error! The following exception occurred: \n"; std::cerr << rException << std::endl; std::cerr << "Aborting." << std::endl; exit(EXIT_FAILURE); } catch (...) { std::cerr << "Program error! An unknow type of exception occurred. \n"; std::cerr << "Aborting." << std::endl; exit(EXIT_FAILURE); return -1; } return 0; }