/* * Copyright 1993-2015 NVIDIA Corporation. All rights reserved. * * Please refer to the NVIDIA end user license agreement (EULA) associated * with this source code for terms and conditions that govern your use of * this software. Any use, reproduction, disclosure, or distribution of * this software and related documentation outside the terms of the EULA * is strictly prohibited. * */ /** ************************************************************************** * \file DCT8x8_Gold.cpp * \brief Contains DCT, IDCT and quantization routines, used in JPEG internal * data processing. Host code. * * This sample implements forward and inverse Discrete Cosine Transform to blocks * of image pixels (of 8x8 size), as in JPEG standard. The data processing is done * using floating point representation. * The routine that performs quantization of coefficients is also included. */ #include "Common.h" #include "BmpUtil.h" /** * This unitary matrix performs DCT of rows of the matrix to the left */ const float DCTv8matrix[BLOCK_SIZE2] = { 0.3535533905932738f, 0.4903926402016152f, 0.4619397662556434f, 0.4157348061512726f, 0.3535533905932738f, 0.2777851165098011f, 0.1913417161825449f, 0.0975451610080642f, 0.3535533905932738f, 0.4157348061512726f, 0.1913417161825449f, -0.0975451610080641f, -0.3535533905932737f, -0.4903926402016152f, -0.4619397662556434f, -0.2777851165098011f, 0.3535533905932738f, 0.2777851165098011f, -0.1913417161825449f, -0.4903926402016152f, -0.3535533905932738f, 0.0975451610080642f, 0.4619397662556433f, 0.4157348061512727f, 0.3535533905932738f, 0.0975451610080642f, -0.4619397662556434f, -0.2777851165098011f, 0.3535533905932737f, 0.4157348061512727f, -0.1913417161825450f, -0.4903926402016153f, 0.3535533905932738f, -0.0975451610080641f, -0.4619397662556434f, 0.2777851165098009f, 0.3535533905932738f, -0.4157348061512726f, -0.1913417161825453f, 0.4903926402016152f, 0.3535533905932738f, -0.2777851165098010f, -0.1913417161825452f, 0.4903926402016153f, -0.3535533905932733f, -0.0975451610080649f, 0.4619397662556437f, -0.4157348061512720f, 0.3535533905932738f, -0.4157348061512727f, 0.1913417161825450f, 0.0975451610080640f, -0.3535533905932736f, 0.4903926402016152f, -0.4619397662556435f, 0.2777851165098022f, 0.3535533905932738f, -0.4903926402016152f, 0.4619397662556433f, -0.4157348061512721f, 0.3535533905932733f, -0.2777851165098008f, 0.1913417161825431f, -0.0975451610080625f }; /** * This unitary matrix performs DCT of columns of the matrix to the right */ const float DCTv8matrixT[BLOCK_SIZE2] = { 0.3535533905932738f, 0.3535533905932738f, 0.3535533905932738f, 0.3535533905932738f, 0.3535533905932738f, 0.3535533905932738f, 0.3535533905932738f, 0.3535533905932738f, 0.4903926402016152f, 0.4157348061512726f, 0.2777851165098011f, 0.0975451610080642f, -0.0975451610080641f, -0.2777851165098010f, -0.4157348061512727f, -0.4903926402016152f, 0.4619397662556434f, 0.1913417161825449f, -0.1913417161825449f, -0.4619397662556434f, -0.4619397662556434f, -0.1913417161825452f, 0.1913417161825450f, 0.4619397662556433f, 0.4157348061512726f, -0.0975451610080641f, -0.4903926402016152f, -0.2777851165098011f, 0.2777851165098009f, 0.4903926402016153f, 0.0975451610080640f, -0.4157348061512721f, 0.3535533905932738f, -0.3535533905932737f, -0.3535533905932738f, 0.3535533905932737f, 0.3535533905932738f, -0.3535533905932733f, -0.3535533905932736f, 0.3535533905932733f, 0.2777851165098011f, -0.4903926402016152f, 0.0975451610080642f, 0.4157348061512727f, -0.4157348061512726f, -0.0975451610080649f, 0.4903926402016152f, -0.2777851165098008f, 0.1913417161825449f, -0.4619397662556434f, 0.4619397662556433f, -0.1913417161825450f, -0.1913417161825453f, 0.4619397662556437f, -0.4619397662556435f, 0.1913417161825431f, 0.0975451610080642f, -0.2777851165098011f, 0.4157348061512727f, -0.4903926402016153f, 0.4903926402016152f, -0.4157348061512720f, 0.2777851165098022f, -0.0975451610080625f }; /** * JPEG quality=0_of_12 quantization matrix */ float Q[BLOCK_SIZE2] = { 32.f, 33.f, 51.f, 81.f, 66.f, 39.f, 34.f, 17.f, 33.f, 36.f, 48.f, 47.f, 28.f, 23.f, 12.f, 12.f, 51.f, 48.f, 47.f, 28.f, 23.f, 12.f, 12.f, 12.f, 81.f, 47.f, 28.f, 23.f, 12.f, 12.f, 12.f, 12.f, 66.f, 28.f, 23.f, 12.f, 12.f, 12.f, 12.f, 12.f, 39.f, 23.f, 12.f, 12.f, 12.f, 12.f, 12.f, 12.f, 34.f, 12.f, 12.f, 12.f, 12.f, 12.f, 12.f, 12.f, 17.f, 12.f, 12.f, 12.f, 12.f, 12.f, 12.f, 12.f }; /** ************************************************************************** * Performs multiplication of two 8x8 matrices * * \param M1 [IN] - Pointer to the first matrix * \param M1Stride [IN] - Stride of the first matrix * \param M2 [IN] - Pointer to the second matrix * \param M2Stride [IN] - Stride of the second matrix * \param Mres [OUT] - Pointer to the result matrix * \param MresStride [IN] - Stride of the result matrix * * \return None */ void mult8x8(const float *M1, int M1Stride, const float *M2, int M2Stride, float *Mres, int MresStride) { for (int i = 0; i < BLOCK_SIZE; i++) { for (int j = 0; j < BLOCK_SIZE; j++) { float accumul = 0; for (int k = 0; k < BLOCK_SIZE; k++) { accumul += M1[i*M1Stride+k] * M2[k*M2Stride+j]; } Mres[i*MresStride+j] = accumul; } } } /** ************************************************************************** * Performs 8x8 block-wise Forward Discrete Cosine Transform of the given * image plane and outputs result to the plane of coefficients. * 1st version. * * \param fSrc [IN] - Source image plane * \param fDst [OUT] - Destination coefficients plane * \param Stride [IN] - Stride of both planes * \param Size [IN] - Size of planes * * \return None */ extern "C" void computeDCT8x8Gold1(const float *fSrc, float *fDst, int Stride, ROI Size) { float tmpblock[BLOCK_SIZE2]; //perform block wise DCT //DCT(A) = DCTv8matrixT * A * DCTv8matrix for (int i = 0; i + BLOCK_SIZE - 1 < Size.height; i += BLOCK_SIZE) { for (int j = 0; j + BLOCK_SIZE - 1 < Size.width; j += BLOCK_SIZE) { // tmpblock = DCTv8matrixT * A mult8x8(DCTv8matrixT, BLOCK_SIZE, fSrc + i * Stride + j, Stride, tmpblock, BLOCK_SIZE); // DCT(A) = tmpblock * DCTv8matrix mult8x8(tmpblock, BLOCK_SIZE, DCTv8matrix, BLOCK_SIZE, fDst + i * Stride + j, Stride); } } } /** ************************************************************************** * Performs 8x8 block-wise Inverse Discrete Cosine Transform of the given * coefficients plane and outputs result to the image plane. * 1st version. * * \param fSrc [IN] - Source coefficients plane * \param fDst [OUT] - Destination image plane * \param Stride [IN] - Stride of both planes * \param Size [IN] - Size of planes * * \return None */ extern "C" void computeIDCT8x8Gold1(const float *fSrc, float *fDst, int Stride, ROI Size) { float tmpblock[BLOCK_SIZE2]; //perform block wise IDCT //IDCT(A) = DCTv8matrix * A * DCTv8matrixT for (int i = 0; i + BLOCK_SIZE - 1 < Size.height; i += BLOCK_SIZE) { for (int j = 0; j + BLOCK_SIZE - 1 < Size.width; j += BLOCK_SIZE) { // tmpblock = DCTv8matrix * A mult8x8(DCTv8matrix, BLOCK_SIZE, fSrc + i * Stride + j, Stride, tmpblock, BLOCK_SIZE); // DCT(A) = tmpblock * DCTv8matrixT; mult8x8(tmpblock, BLOCK_SIZE, DCTv8matrixT, BLOCK_SIZE, fDst + i * Stride + j, Stride); } } } /** ************************************************************************** * Performs in-place quantization of given coefficients plane using * predefined quantization matrices (float elements) * * \param fSrcDst [IN/OUT] - Coefficients plane * \param Stride [IN] - Stride of SrcDst * \param Size [IN] - Size of the plane * * \return None */ extern "C" void quantizeGoldFloat(float *fSrcDst, int Stride, ROI Size) { //perform block wise in-place quantization using Q //Q(A) = round(A ./ Q) .* Q; for (int i=0; i>1; temp /= quant; temp = -temp; } else { temp += quant>>1; temp /= quant; } fSrcDst[i*Stride+j] = temp * quant; } } } float C_a = 1.387039845322148f; //!< a = (2^0.5) * cos( pi / 16); Used in forward and inverse DCT. float C_b = 1.306562964876377f; //!< b = (2^0.5) * cos( pi / 8); Used in forward and inverse DCT. float C_c = 1.175875602419359f; //!< c = (2^0.5) * cos(3 * pi / 16); Used in forward and inverse DCT. float C_d = 0.785694958387102f; //!< d = (2^0.5) * cos(5 * pi / 16); Used in forward and inverse DCT. float C_e = 0.541196100146197f; //!< e = (2^0.5) * cos(3 * pi / 8); Used in forward and inverse DCT. float C_f = 0.275899379282943f; //!< f = (2^0.5) * cos(7 * pi / 16); Used in forward and inverse DCT. /** * Normalization constant that is used in forward and inverse DCT */ float C_norm = 0.3535533905932737f; // 1 / (8^0.5) /** ************************************************************************** * Performs DCT of vector of 8 elements. * * \param FirstIn [IN] - Pointer to the first element of input vector * \param StepIn [IN] - Value to add to ptr to access other input elements * \param FirstOut [OUT] - Pointer to the first element of output vector * \param StepOut [IN] - Value to add to ptr to access other output elements * * \return None */ void SubroutineDCTvector(float *FirstIn, int StepIn, float *FirstOut, int StepOut) { float X07P = FirstIn[0*StepIn] + FirstIn[7*StepIn]; float X16P = FirstIn[1*StepIn] + FirstIn[6*StepIn]; float X25P = FirstIn[2*StepIn] + FirstIn[5*StepIn]; float X34P = FirstIn[3*StepIn] + FirstIn[4*StepIn]; float X07M = FirstIn[0*StepIn] - FirstIn[7*StepIn]; float X61M = FirstIn[6*StepIn] - FirstIn[1*StepIn]; float X25M = FirstIn[2*StepIn] - FirstIn[5*StepIn]; float X43M = FirstIn[4*StepIn] - FirstIn[3*StepIn]; float X07P34PP = X07P + X34P; float X07P34PM = X07P - X34P; float X16P25PP = X16P + X25P; float X16P25PM = X16P - X25P; FirstOut[0*StepOut] = C_norm * (X07P34PP + X16P25PP); FirstOut[2*StepOut] = C_norm * (C_b * X07P34PM + C_e * X16P25PM); FirstOut[4*StepOut] = C_norm * (X07P34PP - X16P25PP); FirstOut[6*StepOut] = C_norm * (C_e * X07P34PM - C_b * X16P25PM); FirstOut[1*StepOut] = C_norm * (C_a * X07M - C_c * X61M + C_d * X25M - C_f * X43M); FirstOut[3*StepOut] = C_norm * (C_c * X07M + C_f * X61M - C_a * X25M + C_d * X43M); FirstOut[5*StepOut] = C_norm * (C_d * X07M + C_a * X61M + C_f * X25M - C_c * X43M); FirstOut[7*StepOut] = C_norm * (C_f * X07M + C_d * X61M + C_c * X25M + C_a * X43M); } /** ************************************************************************** * Performs IDCT of vector of 8 elements. * * \param FirstIn [IN] - Pointer to the first element of input vector * \param StepIn [IN] - Value to add to ptr to access other input elements * \param FirstOut [OUT] - Pointer to the first element of output vector * \param StepOut [IN] - Value to add to ptr to access other output elements * * \return None */ void SubroutineIDCTvector(float *FirstIn, int StepIn, float *FirstOut, int StepOut) { float Y04P = FirstIn[0*StepIn] + FirstIn[4*StepIn]; float Y2b6eP = C_b * FirstIn[2*StepIn] + C_e * FirstIn[6*StepIn]; float Y04P2b6ePP = Y04P + Y2b6eP; float Y04P2b6ePM = Y04P - Y2b6eP; float Y7f1aP3c5dPP = C_f * FirstIn[7*StepIn] + C_a * FirstIn[1*StepIn] + C_c * FirstIn[3*StepIn] + C_d * FirstIn[5*StepIn]; float Y7a1fM3d5cMP = C_a * FirstIn[7*StepIn] - C_f * FirstIn[1*StepIn] + C_d * FirstIn[3*StepIn] - C_c * FirstIn[5*StepIn]; float Y04M = FirstIn[0*StepIn] - FirstIn[4*StepIn]; float Y2e6bM = C_e * FirstIn[2*StepIn] - C_b * FirstIn[6*StepIn]; float Y04M2e6bMP = Y04M + Y2e6bM; float Y04M2e6bMM = Y04M - Y2e6bM; float Y1c7dM3f5aPM = C_c * FirstIn[1*StepIn] - C_d * FirstIn[7*StepIn] - C_f * FirstIn[3*StepIn] - C_a * FirstIn[5*StepIn]; float Y1d7cP3a5fMM = C_d * FirstIn[1*StepIn] + C_c * FirstIn[7*StepIn] - C_a * FirstIn[3*StepIn] + C_f * FirstIn[5*StepIn]; FirstOut[0*StepOut] = C_norm * (Y04P2b6ePP + Y7f1aP3c5dPP); FirstOut[7*StepOut] = C_norm * (Y04P2b6ePP - Y7f1aP3c5dPP); FirstOut[4*StepOut] = C_norm * (Y04P2b6ePM + Y7a1fM3d5cMP); FirstOut[3*StepOut] = C_norm * (Y04P2b6ePM - Y7a1fM3d5cMP); FirstOut[1*StepOut] = C_norm * (Y04M2e6bMP + Y1c7dM3f5aPM); FirstOut[5*StepOut] = C_norm * (Y04M2e6bMM - Y1d7cP3a5fMM); FirstOut[2*StepOut] = C_norm * (Y04M2e6bMM + Y1d7cP3a5fMM); FirstOut[6*StepOut] = C_norm * (Y04M2e6bMP - Y1c7dM3f5aPM); } /** ************************************************************************** * Performs 8x8 block-wise Forward Discrete Cosine Transform of the given * image plane and outputs result to the plane of coefficients. * 2nd version. * * \param fSrc [IN] - Source image plane * \param fDst [OUT] - Destination coefficients plane * \param Stride [IN] - Stride of both planes * \param Size [IN] - Size of planes * * \return None */ extern "C" void computeDCT8x8Gold2(const float *fSrc, float *fDst, int Stride, ROI Size) { for (int i = 0; i + BLOCK_SIZE - 1 < Size.height; i += BLOCK_SIZE) { for (int j = 0; j + BLOCK_SIZE - 1 < Size.width; j += BLOCK_SIZE) { //process rows for (int k = 0; k < BLOCK_SIZE; k++) { SubroutineDCTvector((float *)fSrc + (i+k) * Stride + j, 1, fDst + (i+k) * Stride + j, 1); } //process columns for (int k = 0; k < BLOCK_SIZE; k++) { SubroutineDCTvector(fDst + i * Stride + (j+k), Stride, fDst + i * Stride + (j+k), Stride); } } } } /** ************************************************************************** * Performs 8x8 block-wise Inverse Discrete Cosine Transform of the given * coefficients plane and outputs result to the image plane * 2nd version. * * \param fSrc [IN] - Source coefficients plane * \param fDst [OUT] - Destination image plane * \param Stride [IN] - Stride of both planes * \param Size [IN] - Size of planes * * \return None */ extern "C" void computeIDCT8x8Gold2(const float *fSrc, float *fDst, int Stride, ROI Size) { for (int i = 0; i + BLOCK_SIZE - 1 < Size.height; i += BLOCK_SIZE) { for (int j = 0; j + BLOCK_SIZE - 1 < Size.width; j += BLOCK_SIZE) { //process rows for (int k = 0; k < BLOCK_SIZE; k++) { SubroutineIDCTvector((float *)fSrc + (i+k) * Stride + j, 1, fDst + (i+k) * Stride + j, 1); } //process columns for (int k = 0; k < BLOCK_SIZE; k++) { SubroutineIDCTvector(fDst + i * Stride + (j+k), Stride, fDst + i * Stride + (j+k), Stride); } } } }