/* * Copyright 1993-2015 NVIDIA Corporation. All rights reserved. * * Please refer to the NVIDIA end user license agreement (EULA) associated * with this source code for terms and conditions that govern your use of * this software. Any use, reproduction, disclosure, or distribution of * this software and related documentation outside the terms of the EULA * is strictly prohibited. * */ /* This file contains simple wrapper functions that call the CUDA kernels */ #define HELPERGL_EXTERN_GL_FUNC_IMPLEMENTATION #include #include #include #include #include #include #include "thrust/device_ptr.h" #include "thrust/for_each.h" #include "thrust/iterator/zip_iterator.h" #include "thrust/sort.h" #include "particles_kernel_device.cuh" #include "ParticleSystem.cuh" extern "C" { cudaArray *noiseArray; void setParameters(SimParams *hostParams) { // copy parameters to constant memory checkCudaErrors(cudaMemcpyToSymbol(params, hostParams, sizeof(SimParams))); } //Round a / b to nearest higher integer value int iDivUp(int a, int b) { return (a % b != 0) ? (a / b + 1) : (a / b); } // compute grid and thread block size for a given number of elements void computeGridSize(int n, int blockSize, int &numBlocks, int &numThreads) { numThreads = min(blockSize, n); numBlocks = iDivUp(n, numThreads); } inline float frand() { return rand() / (float) RAND_MAX; } // create 3D texture containing random values void createNoiseTexture(int w, int h, int d) { cudaExtent size = make_cudaExtent(w, h, d); size_t elements = size.width*size.height*size.depth; float *volumeData = (float *)malloc(elements*4*sizeof(float)); float *ptr = volumeData; for (size_t i=0; i(); checkCudaErrors(cudaMalloc3DArray(&noiseArray, &channelDesc, size)); cudaMemcpy3DParms copyParams = { 0 }; copyParams.srcPtr = make_cudaPitchedPtr((void *)volumeData, size.width*sizeof(float4), size.width, size.height); copyParams.dstArray = noiseArray; copyParams.extent = size; copyParams.kind = cudaMemcpyHostToDevice; checkCudaErrors(cudaMemcpy3D(©Params)); free(volumeData); cudaResourceDesc texRes; memset(&texRes,0,sizeof(cudaResourceDesc)); texRes.resType = cudaResourceTypeArray; texRes.res.array.array = noiseArray; cudaTextureDesc texDescr; memset(&texDescr,0,sizeof(cudaTextureDesc)); texDescr.normalizedCoords = true; texDescr.filterMode = cudaFilterModeLinear; texDescr.addressMode[0] = cudaAddressModeWrap; texDescr.addressMode[1] = cudaAddressModeWrap; texDescr.addressMode[2] = cudaAddressModeWrap; texDescr.readMode = cudaReadModeElementType; checkCudaErrors(cudaCreateTextureObject(&noiseTex, &texRes, &texDescr, NULL)); } void integrateSystem(float4 *oldPos, float4 *newPos, float4 *oldVel, float4 *newVel, float deltaTime, int numParticles) { thrust::device_ptr d_newPos(newPos); thrust::device_ptr d_newVel(newVel); thrust::device_ptr d_oldPos(oldPos); thrust::device_ptr d_oldVel(oldVel); thrust::for_each( thrust::make_zip_iterator(thrust::make_tuple(d_newPos, d_newVel, d_oldPos, d_oldVel)), thrust::make_zip_iterator(thrust::make_tuple(d_newPos+numParticles, d_newVel+numParticles, d_oldPos+numParticles, d_oldVel+numParticles)), integrate_functor(deltaTime, noiseTex)); } void calcDepth(float4 *pos, float *keys, // output uint *indices, // output float3 sortVector, int numParticles) { thrust::device_ptr d_pos(pos); thrust::device_ptr d_keys(keys); thrust::device_ptr d_indices(indices); thrust::for_each( thrust::make_zip_iterator(thrust::make_tuple(d_pos, d_keys)), thrust::make_zip_iterator(thrust::make_tuple(d_pos+numParticles, d_keys+numParticles)), calcDepth_functor(sortVector)); thrust::sequence(d_indices, d_indices + numParticles); } void sortParticles(float *sortKeys, uint *indices, uint numParticles) { thrust::sort_by_key(thrust::device_ptr(sortKeys), thrust::device_ptr(sortKeys + numParticles), thrust::device_ptr(indices)); } } // extern "C"