/** * Copyright 1993-2015 NVIDIA Corporation. All rights reserved. * * Please refer to the NVIDIA end user license agreement (EULA) associated * with this source code for terms and conditions that govern your use of * this software. Any use, reproduction, disclosure, or distribution of * this software and related documentation outside the terms of the EULA * is strictly prohibited. * */ /* Bicubic texture filtering sample sgreen 6/2008 This sample demonstrates how to efficiently implement bicubic texture filtering in CUDA. Bicubic filtering is a higher order interpolation method that produces smoother results than bilinear interpolation: http://en.wikipedia.org/wiki/Bicubic It requires reading a 4 x 4 pixel neighbourhood rather than the 2 x 2 area required by bilinear filtering. Current graphics hardware doesn't support bicubic filtering natively, but it is possible to compose a bicubic filter using just 4 bilinear lookups by offsetting the sample position within each texel and weighting the samples correctly. The only disadvantage to this method is that the hardware only maintains 9-bits of filtering precision within each texel. See "Fast Third-Order Texture Filtering", Sigg & Hadwiger, GPU Gems 2: http://developer.nvidia.com/object/gpu_gems_2_home.html v1.1 - updated to include the brute force method using 16 texture lookups. v1.2 - added Catmull-Rom interpolation Example performance results from GeForce 8800 GTS: Bilinear - 5500 MPixels/sec Bicubic - 1400 MPixels/sec Fast Bicubic - 2100 MPixels/sec */ // OpenGL Graphics includes #include #if defined(__APPLE__) || defined(MACOSX) #pragma clang diagnostic ignored "-Wdeprecated-declarations" #include #else #include #endif // Includes #include #include #include #include // CUDA system and GL includes #include #include // Helper functions #include // CUDA SDK Helper functions #include // CUDA device initialization helper functions typedef unsigned int uint; typedef unsigned char uchar; #define USE_BUFFER_TEX 0 #ifndef MAX #define MAX(a,b) ((a < b) ? b : a) #endif // Auto-Verification Code const int frameCheckNumber = 4; int fpsCount = 0; // FPS count for averaging int fpsLimit = 4; // FPS limit for sampling int g_Index = 0; unsigned int frameCount = 0; unsigned int g_TotalErrors = 0; StopWatchInterface *timer = 0; bool g_Verify = false; int *pArgc = NULL; char **pArgv = NULL; #define MAX_EPSILON_ERROR 5.0f #define REFRESH_DELAY 10 //ms static const char *sSDKsample = "CUDA BicubicTexture"; // Define the files that are to be save and the reference images for validation const char *sFilterMode[] = { "Nearest", "Bilinear", "Bicubic", "Fast Bicubic", "Catmull-Rom", NULL }; const char *sOriginal[] = { "0_nearest.ppm", "1_bilinear.ppm", "2_bicubic.ppm", "3_fastbicubic.ppm", "4_catmull-rom.ppm", NULL }; const char *sReference[] = { "0_nearest.ppm", "1_bilinear.ppm", "2_bicubic.ppm", "3_fastbicubic.ppm", "4_catmull-rom.ppm", NULL }; const char *srcImageFilename = "lena_bw.pgm"; char *dumpFilename = NULL; uint width = 512, height = 512; uint imageWidth, imageHeight; dim3 blockSize(16, 16); dim3 gridSize(width / blockSize.x, height / blockSize.y); enum eFilterMode { MODE_NEAREST, MODE_BILINEAR, MODE_BICUBIC, MODE_FAST_BICUBIC, MODE_CATMULL_ROM, NUM_MODES }; eFilterMode g_FilterMode = MODE_FAST_BICUBIC; bool drawCurves = false; GLuint pbo = 0; // OpenGL pixel buffer object struct cudaGraphicsResource *cuda_pbo_resource; // handles OpenGL-CUDA exchange GLuint displayTex = 0; GLuint bufferTex = 0; GLuint fprog; // fragment program (shader) float tx = 9.0f, ty = 10.0f; // image translation float scale = 1.0f / 16.0f; // image scale float cx, cy; // image centre void display(); void initGLBuffers(); void runBenchmark(int iterations); void cleanup(); #define GL_TEXTURE_TYPE GL_TEXTURE_RECTANGLE_ARB //#define GL_TEXTURE_TYPE GL_TEXTURE_2D extern "C" void initGL(int *argc, char **argv); extern "C" void loadImageData(int argc, char **argv); extern "C" void initTexture(int imageWidth, int imageHeight, uchar *h_data); extern "C" void freeTexture(); extern "C" void render(int width, int height, float tx, float ty, float scale, float cx, float cy, dim3 blockSize, dim3 gridSize, eFilterMode filter_mode, uchar4 *output); // w0, w1, w2, and w3 are the four cubic B-spline basis functions float bspline_w0(float a) { return (1.0f/6.0f)*(-a*a*a + 3.0f*a*a - 3.0f*a + 1.0f); } float bspline_w1(float a) { return (1.0f/6.0f)*(3.0f*a*a*a - 6.0f*a*a + 4.0f); } float bspline_w2(float a) { return (1.0f/6.0f)*(-3.0f*a*a*a + 3.0f*a*a + 3.0f*a + 1.0f); } __host__ __device__ float bspline_w3(float a) { return (1.0f/6.0f)*(a*a*a); } void computeFPS() { frameCount++; fpsCount++; if (fpsCount == fpsLimit-1) { g_Verify = true; } if (fpsCount == fpsLimit) { char fps[256]; float ifps = 1.f / (sdkGetAverageTimerValue(&timer) / 1000.f); sprintf(fps, "%s %s <%s>: %3.1f fps", "", sSDKsample, sFilterMode[g_FilterMode], ifps); glutSetWindowTitle(fps); fpsCount = 0; sdkResetTimer(&timer); } } void plotCurve(float(*func)(float)) { const int steps = 100; glBegin(GL_LINE_STRIP); for (int i=0; i= '1' && key <= '5') { printf("> FilterMode[%d] = %s\n", g_FilterMode+1, sFilterMode[g_FilterMode]); } } int ox, oy; int buttonState = 0; void mouse(int button, int state, int x, int y) { if (state == GLUT_DOWN) { buttonState |= 1<\n", sSDKsample, sFilterMode[g_FilterMode]); render(imageWidth, imageHeight, tx, ty, scale, cx, cy, blockSize, gridSize, filter_mode, d_output); // check if kernel execution generated an error getLastCudaError("Error: render (bicubicTexture) Kernel execution FAILED"); checkCudaErrors(cudaDeviceSynchronize()); cudaMemcpy(h_result, d_output, imageWidth*imageHeight*4, cudaMemcpyDeviceToHost); sdkSavePPM4ub(dump_filename, (unsigned char *)h_result, imageWidth, imageHeight); checkCudaErrors(cudaFree(d_output)); free(h_result); } #if USE_BUFFER_TEX // fragment program for reading from buffer texture static const char *shaderCode = "!!NVfp4.0\n" "INT PARAM width = program.local[0];\n" "INT TEMP index;\n" "FLR.S index, fragment.texcoord;\n" "MAD.S index.x, index.y, width, index.x;\n" // compute 1D index from 2D coords "TXF result.color, index.x, texture[0], BUFFER;\n" "END"; #endif GLuint compileASMShader(GLenum program_type, const char *code) { GLuint program_id; glGenProgramsARB(1, &program_id); glBindProgramARB(program_type, program_id); glProgramStringARB(program_type, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei) strlen(code), (GLubyte *) code); GLint error_pos; glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &error_pos); if (error_pos != -1) { const GLubyte *error_string; error_string = glGetString(GL_PROGRAM_ERROR_STRING_ARB); fprintf(stderr, "Program error at position: %d\n%s\n", (int)error_pos, error_string); return 0; } return program_id; } void initialize(int argc, char **argv) { printf("[%s] (OpenGL Mode)\n", sSDKsample); initGL(&argc, argv); // use command-line specified CUDA device, otherwise use device with highest Gflops/s int devID = findCudaDevice(argc, (const char **)argv); // get number of SMs on this GPU cudaDeviceProp deviceProps; checkCudaErrors(cudaGetDeviceProperties(&deviceProps, devID)); printf("CUDA device [%s] has %d Multi-Processors\n", deviceProps.name, deviceProps.multiProcessorCount); // Create the timer (for fps measurement) sdkCreateTimer(&timer); // load image from disk loadImageData(argc, argv); printf("\n" "\tControls\n" "\t=/- : Zoom in/out\n" "\tb : Run Benchmark g_FilterMode\n" "\tc : Draw Bicubic Spline Curve\n" "\t[esc] - Quit\n\n" "\tPress number keys to change filtering g_FilterMode:\n\n" "\t1 : nearest filtering\n" "\t2 : bilinear filtering\n" "\t3 : bicubic filtering\n" "\t4 : fast bicubic filtering\n" "\t5 : Catmull-Rom filtering\n\n" ); initGLBuffers(); #if USE_BUFFER_TEX fprog = compileASMShader(GL_FRAGMENT_PROGRAM_ARB, shaderCode); if (!fprog) { exit(EXIT_SUCCESS); } #endif } void initGL(int *argc, char **argv) { // initialize GLUT callback functions glutInit(argc, argv); glutInitDisplayMode(GLUT_RGBA | GLUT_ALPHA | GLUT_DOUBLE | GLUT_DEPTH); glutInitWindowSize(width, height); glutCreateWindow("CUDA bicubic texture filtering"); glutDisplayFunc(display); glutKeyboardFunc(keyboard); glutMouseFunc(mouse); glutMotionFunc(motion); glutReshapeFunc(reshape); glutTimerFunc(REFRESH_DELAY, timerEvent, 0); #if defined (__APPLE__) || defined(MACOSX) atexit(cleanup); #else glutCloseFunc(cleanup); #endif initMenus(); if (!isGLVersionSupported(2,0) || !areGLExtensionsSupported("GL_ARB_pixel_buffer_object")) { fprintf(stderr, "Required OpenGL extensions are missing."); exit(EXIT_FAILURE); } #if USE_BUFFER_TEX if (!areGLExtensionsSupported("GL_EXT_texture_buffer_object")) { fprintf(stderr, "OpenGL extension: GL_EXT_texture_buffer_object missing.\n"); exit(EXIT_FAILURE); } if (!areGLExtensionsSupported("GL_NV_gpu_program4")) { fprintf(stderr, "OpenGL extension: GL_NV_gpu_program4 missing.\n"); exit(EXIT_FAILURE); } #endif } void loadImageData(int argc, char **argv) { // load image from disk uchar *h_data = NULL; char *srcImagePath = NULL; if ((srcImagePath = sdkFindFilePath(srcImageFilename, argv[0])) == NULL) { printf("bicubicTexture loadImageData() could not find <%s>\nExiting...\n", srcImageFilename); exit(EXIT_FAILURE); } sdkLoadPGM(srcImagePath, &h_data, &imageWidth, &imageHeight); printf("Loaded '%s', %d x %d pixels\n", srcImageFilename, imageWidth, imageHeight); cx = imageWidth * 0.5f; cy = imageHeight * 0.5f; // initialize texture initTexture(imageWidth, imageHeight, h_data); } void printHelp() { printf("bicubicTexture Usage:\n"); printf("\t-file=output.ppm (output file to save to disk)\n"); printf("\t-mode=n (0=Nearest, 1=Bilinear, 2=Bicubic, 3=Fast-Bicubic, 4=Catmull-Rom\n"); } //////////////////////////////////////////////////////////////////////////////// // Program main //////////////////////////////////////////////////////////////////////////////// int main(int argc, char **argv) { pArgc = &argc; pArgv = argv; // parse arguments char *filename; #if defined(__linux__) setenv ("DISPLAY", ":0", 0); #endif printf("Starting bicubicTexture\n"); if (checkCmdLineFlag(argc, (const char **) argv, "help")) { printHelp(); exit(EXIT_SUCCESS); } if (checkCmdLineFlag(argc, (const char **) argv, "mode")) { g_FilterMode = (eFilterMode)getCmdLineArgumentInt(argc, (const char **) argv, "mode"); if (g_FilterMode < 0 || g_FilterMode >= NUM_MODES) { printf("Invalid Mode setting %d\n", g_FilterMode); exit(EXIT_FAILURE); } } if (getCmdLineArgumentString(argc, (const char **) argv, "file", &filename)) { dumpFilename = filename; fpsLimit = frameCheckNumber; // Running CUDA kernel (bicubicFiltering) without visualization (QA Testing/Verification) runAutoTest(argc, argv, (const char *)dumpFilename, g_FilterMode); } else { // This runs the CUDA kernel (bicubicFiltering) + OpenGL visualization initialize(argc, argv); glutMainLoop(); } exit(EXIT_SUCCESS); }