btGpu3DGridBroadphaseSharedCode.h

Go to the documentation of this file.
00001 /*
00002 Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
00003 Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. 
00004 
00005 This software is provided 'as-is', without any express or implied warranty.
00006 In no event will the authors be held liable for any damages arising from the use of this software.
00007 Permission is granted to anyone to use this software for any purpose, 
00008 including commercial applications, and to alter it and redistribute it freely, 
00009 subject to the following restrictions:
00010 
00011 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
00012 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
00013 3. This notice may not be removed or altered from any source distribution.
00014 */
00015 
00016 //----------------------------------------------------------------------------------------
00017 
00018 //----------------------------------------------------------------------------------------
00019 //----------------------------------------------------------------------------------------
00020 //----------------------------------------------------------------------------------------
00021 //----------------------------------------------------------------------------------------
00022 //               K E R N E L    F U N C T I O N S 
00023 //----------------------------------------------------------------------------------------
00024 //----------------------------------------------------------------------------------------
00025 //----------------------------------------------------------------------------------------
00026 //----------------------------------------------------------------------------------------
00027 //----------------------------------------------------------------------------------------
00028 //----------------------------------------------------------------------------------------
00029 
00030 // calculate position in uniform grid
00031 BT_GPU___device__ int3 bt3DGrid_calcGridPos(float4 p)
00032 {
00033     int3 gridPos;
00034     gridPos.x = (int)floor((p.x - BT_GPU_params.m_worldOriginX) / BT_GPU_params.m_cellSizeX);
00035     gridPos.y = (int)floor((p.y - BT_GPU_params.m_worldOriginY) / BT_GPU_params.m_cellSizeY);
00036     gridPos.z = (int)floor((p.z - BT_GPU_params.m_worldOriginZ) / BT_GPU_params.m_cellSizeZ);
00037     return gridPos;
00038 } // bt3DGrid_calcGridPos()
00039 
00040 //----------------------------------------------------------------------------------------
00041 
00042 // calculate address in grid from position (clamping to edges)
00043 BT_GPU___device__ uint bt3DGrid_calcGridHash(int3 gridPos)
00044 {
00045     gridPos.x = BT_GPU_max(0, BT_GPU_min(gridPos.x, (int)BT_GPU_params.m_gridSizeX - 1));
00046     gridPos.y = BT_GPU_max(0, BT_GPU_min(gridPos.y, (int)BT_GPU_params.m_gridSizeY - 1));
00047     gridPos.z = BT_GPU_max(0, BT_GPU_min(gridPos.z, (int)BT_GPU_params.m_gridSizeZ - 1));
00048     return BT_GPU___mul24(BT_GPU___mul24(gridPos.z, BT_GPU_params.m_gridSizeY), BT_GPU_params.m_gridSizeX) + BT_GPU___mul24(gridPos.y, BT_GPU_params.m_gridSizeX) + gridPos.x;
00049 } // bt3DGrid_calcGridHash()
00050 
00051 //----------------------------------------------------------------------------------------
00052 
00053 // calculate grid hash value for each body using its AABB
00054 BT_GPU___global__ void calcHashAABBD(bt3DGrid3F1U* pAABB, uint2* pHash, uint numBodies)
00055 {
00056     int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
00057     if(index >= (int)numBodies)
00058         {
00059                 return;
00060         }
00061         bt3DGrid3F1U bbMin = pAABB[index*2];
00062         bt3DGrid3F1U bbMax = pAABB[index*2 + 1];
00063         float4 pos;
00064         pos.x = (bbMin.fx + bbMax.fx) * 0.5f;
00065         pos.y = (bbMin.fy + bbMax.fy) * 0.5f;
00066         pos.z = (bbMin.fz + bbMax.fz) * 0.5f;
00067     // get address in grid
00068     int3 gridPos = bt3DGrid_calcGridPos(pos);
00069     uint gridHash = bt3DGrid_calcGridHash(gridPos);
00070     // store grid hash and body index
00071     pHash[index] = BT_GPU_make_uint2(gridHash, index);
00072 } // calcHashAABBD()
00073 
00074 //----------------------------------------------------------------------------------------
00075 
00076 BT_GPU___global__ void findCellStartD(uint2* pHash, uint* cellStart, uint numBodies)
00077 {
00078     int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
00079     if(index >= (int)numBodies)
00080         {
00081                 return;
00082         }
00083     uint2 sortedData = pHash[index];
00084         // Load hash data into shared memory so that we can look 
00085         // at neighboring body's hash value without loading
00086         // two hash values per thread
00087         BT_GPU___shared__ uint sharedHash[257];
00088         sharedHash[BT_GPU_threadIdx.x+1] = sortedData.x;
00089         if((index > 0) && (BT_GPU_threadIdx.x == 0))
00090         {
00091                 // first thread in block must load neighbor body hash
00092                 volatile uint2 prevData = pHash[index-1];
00093                 sharedHash[0] = prevData.x;
00094         }
00095         BT_GPU___syncthreads();
00096         if((index == 0) || (sortedData.x != sharedHash[BT_GPU_threadIdx.x]))
00097         {
00098                 cellStart[sortedData.x] = index;
00099         }
00100 } // findCellStartD()
00101 
00102 //----------------------------------------------------------------------------------------
00103 
00104 BT_GPU___device__ uint cudaTestAABBOverlap(bt3DGrid3F1U min0, bt3DGrid3F1U max0, bt3DGrid3F1U min1, bt3DGrid3F1U max1)
00105 {
00106         return  (min0.fx <= max1.fx)&& (min1.fx <= max0.fx) && 
00107                         (min0.fy <= max1.fy)&& (min1.fy <= max0.fy) && 
00108                         (min0.fz <= max1.fz)&& (min1.fz <= max0.fz); 
00109 } // cudaTestAABBOverlap()
00110  
00111 //----------------------------------------------------------------------------------------
00112 
00113 BT_GPU___device__ void findPairsInCell( int3    gridPos,
00114                                                                                 uint    index,
00115                                                                                 uint2*  pHash,
00116                                                                                 uint*   pCellStart,
00117                                                                                 bt3DGrid3F1U* pAABB, 
00118                                                                                 uint*   pPairBuff,
00119                                                                                 uint2*  pPairBuffStartCurr,
00120                                                                                 uint    numBodies)
00121 {
00122     if (        (gridPos.x < 0) || (gridPos.x > (int)BT_GPU_params.m_gridSizeX - 1)
00123                 ||      (gridPos.y < 0) || (gridPos.y > (int)BT_GPU_params.m_gridSizeY - 1)
00124                 ||  (gridPos.z < 0) || (gridPos.z > (int)BT_GPU_params.m_gridSizeZ - 1)) 
00125     {
00126                 return;
00127         }
00128     uint gridHash = bt3DGrid_calcGridHash(gridPos);
00129     // get start of bucket for this cell
00130     uint bucketStart = pCellStart[gridHash];
00131     if (bucketStart == 0xffffffff)
00132         {
00133         return;   // cell empty
00134         }
00135         // iterate over bodies in this cell
00136     uint2 sortedData = pHash[index];
00137         uint unsorted_indx = sortedData.y;
00138     bt3DGrid3F1U min0 = BT_GPU_FETCH(pAABB, unsorted_indx*2); 
00139         bt3DGrid3F1U max0 = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
00140         uint handleIndex =  min0.uw;
00141         uint2 start_curr = pPairBuffStartCurr[handleIndex];
00142         uint start = start_curr.x;
00143         uint curr = start_curr.y;
00144         uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
00145         uint curr_max = start_curr_next.x - start - 1;
00146         uint bucketEnd = bucketStart + BT_GPU_params.m_maxBodiesPerCell;
00147         bucketEnd = (bucketEnd > numBodies) ? numBodies : bucketEnd;
00148         for(uint index2 = bucketStart; index2 < bucketEnd; index2++) 
00149         {
00150         uint2 cellData = pHash[index2];
00151         if (cellData.x != gridHash)
00152         {
00153                         break;   // no longer in same bucket
00154                 }
00155                 uint unsorted_indx2 = cellData.y;
00156         if (unsorted_indx2 < unsorted_indx) // check not colliding with self
00157         {   
00158                         bt3DGrid3F1U min1 = BT_GPU_FETCH(pAABB, unsorted_indx2*2);
00159                         bt3DGrid3F1U max1 = BT_GPU_FETCH(pAABB, unsorted_indx2*2 + 1);
00160                         if(cudaTestAABBOverlap(min0, max0, min1, max1))
00161                         {
00162                                 uint handleIndex2 = min1.uw;
00163                                 uint k;
00164                                 for(k = 0; k < curr; k++)
00165                                 {
00166                                         uint old_pair = pPairBuff[start+k] & (~BT_3DGRID_PAIR_ANY_FLG);
00167                                         if(old_pair == handleIndex2)
00168                                         {
00169                                                 pPairBuff[start+k] |= BT_3DGRID_PAIR_FOUND_FLG;
00170                                                 break;
00171                                         }
00172                                 }
00173                                 if(k == curr)
00174                                 {
00175                                         if(curr >= curr_max) 
00176                                         { // not a good solution, but let's avoid crash
00177                                                 break;
00178                                         }
00179                                         pPairBuff[start+curr] = handleIndex2 | BT_3DGRID_PAIR_NEW_FLG;
00180                                         curr++;
00181                                 }
00182                         }
00183                 }
00184         }
00185         pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, curr);
00186     return;
00187 } // findPairsInCell()
00188 
00189 //----------------------------------------------------------------------------------------
00190 
00191 BT_GPU___global__ void findOverlappingPairsD(   bt3DGrid3F1U*   pAABB, uint2* pHash, uint* pCellStart, 
00192                                                                                                 uint* pPairBuff, uint2* pPairBuffStartCurr, uint numBodies)
00193 {
00194     int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
00195     if(index >= (int)numBodies)
00196         {
00197                 return;
00198         }
00199     uint2 sortedData = pHash[index];
00200         uint unsorted_indx = sortedData.y;
00201         bt3DGrid3F1U bbMin = BT_GPU_FETCH(pAABB, unsorted_indx*2);
00202         bt3DGrid3F1U bbMax = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
00203         float4 pos;
00204         pos.x = (bbMin.fx + bbMax.fx) * 0.5f;
00205         pos.y = (bbMin.fy + bbMax.fy) * 0.5f;
00206         pos.z = (bbMin.fz + bbMax.fz) * 0.5f;
00207     // get address in grid
00208     int3 gridPos = bt3DGrid_calcGridPos(pos);
00209     // examine only neighbouring cells
00210     for(int z=-1; z<=1; z++) {
00211         for(int y=-1; y<=1; y++) {
00212             for(int x=-1; x<=1; x++) {
00213                 findPairsInCell(gridPos + BT_GPU_make_int3(x, y, z), index, pHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, numBodies);
00214             }
00215         }
00216     }
00217 } // findOverlappingPairsD()
00218 
00219 //----------------------------------------------------------------------------------------
00220 
00221 BT_GPU___global__ void findPairsLargeD( bt3DGrid3F1U* pAABB, uint2* pHash, uint* pCellStart, uint* pPairBuff, 
00222                                                                                 uint2* pPairBuffStartCurr, uint numBodies, uint numLarge)
00223 {
00224     int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
00225     if(index >= (int)numBodies)
00226         {
00227                 return;
00228         }
00229     uint2 sortedData = pHash[index];
00230         uint unsorted_indx = sortedData.y;
00231         bt3DGrid3F1U min0 = BT_GPU_FETCH(pAABB, unsorted_indx*2);
00232         bt3DGrid3F1U max0 = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
00233         uint handleIndex =  min0.uw;
00234         uint2 start_curr = pPairBuffStartCurr[handleIndex];
00235         uint start = start_curr.x;
00236         uint curr = start_curr.y;
00237         uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
00238         uint curr_max = start_curr_next.x - start - 1;
00239     for(uint i = 0; i < numLarge; i++)
00240     {
00241                 uint indx2 = numBodies + i;
00242                 bt3DGrid3F1U min1 = BT_GPU_FETCH(pAABB, indx2*2);
00243                 bt3DGrid3F1U max1 = BT_GPU_FETCH(pAABB, indx2*2 + 1);
00244                 if(cudaTestAABBOverlap(min0, max0, min1, max1))
00245                 {
00246                         uint k;
00247                         uint handleIndex2 =  min1.uw;
00248                         for(k = 0; k < curr; k++)
00249                         {
00250                                 uint old_pair = pPairBuff[start+k] & (~BT_3DGRID_PAIR_ANY_FLG);
00251                                 if(old_pair == handleIndex2)
00252                                 {
00253                                         pPairBuff[start+k] |= BT_3DGRID_PAIR_FOUND_FLG;
00254                                         break;
00255                                 }
00256                         }
00257                         if(k == curr)
00258                         {
00259                                 pPairBuff[start+curr] = handleIndex2 | BT_3DGRID_PAIR_NEW_FLG;
00260                                 if(curr >= curr_max) 
00261                                 { // not a good solution, but let's avoid crash
00262                                         break;
00263                                 }
00264                                 curr++;
00265                         }
00266                 }
00267     }
00268         pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, curr);
00269     return;
00270 } // findPairsLargeD()
00271 
00272 //----------------------------------------------------------------------------------------
00273 
00274 BT_GPU___global__ void computePairCacheChangesD(uint* pPairBuff, uint2* pPairBuffStartCurr, 
00275                                                                                                 uint* pPairScan, bt3DGrid3F1U* pAABB, uint numBodies)
00276 {
00277     int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
00278     if(index >= (int)numBodies)
00279         {
00280                 return;
00281         }
00282         bt3DGrid3F1U bbMin = pAABB[index * 2];
00283         uint handleIndex = bbMin.uw;
00284         uint2 start_curr = pPairBuffStartCurr[handleIndex];
00285         uint start = start_curr.x;
00286         uint curr = start_curr.y;
00287         uint *pInp = pPairBuff + start;
00288         uint num_changes = 0;
00289         for(uint k = 0; k < curr; k++, pInp++)
00290         {
00291                 if(!((*pInp) & BT_3DGRID_PAIR_FOUND_FLG))
00292                 {
00293                         num_changes++;
00294                 }
00295         }
00296         pPairScan[index+1] = num_changes;
00297 } // computePairCacheChangesD()
00298 
00299 //----------------------------------------------------------------------------------------
00300 
00301 BT_GPU___global__ void squeezeOverlappingPairBuffD(uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan,
00302                                                                                                    uint* pPairOut, bt3DGrid3F1U* pAABB, uint numBodies)
00303 {
00304     int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
00305     if(index >= (int)numBodies)
00306         {
00307                 return;
00308         }
00309         bt3DGrid3F1U bbMin = pAABB[index * 2];
00310         uint handleIndex = bbMin.uw;
00311         uint2 start_curr = pPairBuffStartCurr[handleIndex];
00312         uint start = start_curr.x;
00313         uint curr = start_curr.y;
00314         uint* pInp = pPairBuff + start;
00315         uint* pOut = pPairOut + pPairScan[index];
00316         uint* pOut2 = pInp;
00317         uint num = 0; 
00318         for(uint k = 0; k < curr; k++, pInp++)
00319         {
00320                 if(!((*pInp) & BT_3DGRID_PAIR_FOUND_FLG))
00321                 {
00322                         *pOut = *pInp;
00323                         pOut++;
00324                 }
00325                 if((*pInp) & BT_3DGRID_PAIR_ANY_FLG)
00326                 {
00327                         *pOut2 = (*pInp) & (~BT_3DGRID_PAIR_ANY_FLG);
00328                         pOut2++;
00329                         num++;
00330                 }
00331         }
00332         pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, num);
00333 } // squeezeOverlappingPairBuffD()
00334 
00335 
00336 //----------------------------------------------------------------------------------------
00337 //----------------------------------------------------------------------------------------
00338 //----------------------------------------------------------------------------------------
00339 //----------------------------------------------------------------------------------------
00340 //               E N D   O F    K E R N E L    F U N C T I O N S 
00341 //----------------------------------------------------------------------------------------
00342 //----------------------------------------------------------------------------------------
00343 //----------------------------------------------------------------------------------------
00344 //----------------------------------------------------------------------------------------
00345 
00346 extern "C"
00347 {
00348 
00349 //----------------------------------------------------------------------------------------
00350 
00351 void BT_GPU_PREF(calcHashAABB)(bt3DGrid3F1U* pAABB, unsigned int* hash, unsigned int numBodies)
00352 {
00353     int numThreads, numBlocks;
00354     BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
00355     // execute the kernel
00356     BT_GPU_EXECKERNEL(numBlocks, numThreads, calcHashAABBD, (pAABB, (uint2*)hash, numBodies));
00357     // check if kernel invocation generated an error
00358     BT_GPU_CHECK_ERROR("calcHashAABBD kernel execution failed");
00359 } // calcHashAABB()
00360 
00361 //----------------------------------------------------------------------------------------
00362 
00363 void BT_GPU_PREF(findCellStart(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells))
00364 {
00365     int numThreads, numBlocks;
00366     BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
00367         BT_GPU_SAFE_CALL(BT_GPU_Memset(cellStart, 0xffffffff, numCells*sizeof(uint)));
00368         BT_GPU_EXECKERNEL(numBlocks, numThreads, findCellStartD, ((uint2*)hash, (uint*)cellStart, numBodies));
00369     BT_GPU_CHECK_ERROR("Kernel execution failed: findCellStartD");
00370 } // findCellStart()
00371 
00372 //----------------------------------------------------------------------------------------
00373 
00374 void BT_GPU_PREF(findOverlappingPairs(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int*        pPairBuffStartCurr, unsigned int        numBodies))
00375 {
00376 #if B_CUDA_USE_TEX
00377     BT_GPU_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, numBodies * 2 * sizeof(bt3DGrid3F1U)));
00378 #endif
00379     int numThreads, numBlocks;
00380     BT_GPU_PREF(computeGridSize)(numBodies, 64, numBlocks, numThreads);
00381     BT_GPU_EXECKERNEL(numBlocks, numThreads, findOverlappingPairsD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies));
00382     BT_GPU_CHECK_ERROR("Kernel execution failed: bt_CudaFindOverlappingPairsD");
00383 #if B_CUDA_USE_TEX
00384     BT_GPU_SAFE_CALL(cudaUnbindTexture(pAABBTex));
00385 #endif
00386 } // findOverlappingPairs()
00387 
00388 //----------------------------------------------------------------------------------------
00389 
00390 void BT_GPU_PREF(findPairsLarge(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies, unsigned int numLarge))
00391 {
00392 #if B_CUDA_USE_TEX
00393     BT_GPU_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, (numBodies+numLarge) * 2 * sizeof(bt3DGrid3F1U)));
00394 #endif
00395     int numThreads, numBlocks;
00396     BT_GPU_PREF(computeGridSize)(numBodies, 64, numBlocks, numThreads);
00397     BT_GPU_EXECKERNEL(numBlocks, numThreads, findPairsLargeD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies,numLarge));
00398     BT_GPU_CHECK_ERROR("Kernel execution failed: btCuda_findPairsLargeD");
00399 #if B_CUDA_USE_TEX
00400     BT_GPU_SAFE_CALL(cudaUnbindTexture(pAABBTex));
00401 #endif
00402 } // findPairsLarge()
00403 
00404 //----------------------------------------------------------------------------------------
00405 
00406 void BT_GPU_PREF(computePairCacheChanges(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, bt3DGrid3F1U* pAABB, unsigned int numBodies))
00407 {
00408     int numThreads, numBlocks;
00409     BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
00410     BT_GPU_EXECKERNEL(numBlocks, numThreads, computePairCacheChangesD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,pAABB,numBodies));
00411     BT_GPU_CHECK_ERROR("Kernel execution failed: btCudaComputePairCacheChangesD");
00412 } // computePairCacheChanges()
00413 
00414 //----------------------------------------------------------------------------------------
00415 
00416 void BT_GPU_PREF(squeezeOverlappingPairBuff(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, unsigned int* pPairOut, bt3DGrid3F1U* pAABB, unsigned int numBodies))
00417 {
00418     int numThreads, numBlocks;
00419     BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
00420     BT_GPU_EXECKERNEL(numBlocks, numThreads, squeezeOverlappingPairBuffD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,(uint*)pPairOut,pAABB,numBodies));
00421     BT_GPU_CHECK_ERROR("Kernel execution failed: btCudaSqueezeOverlappingPairBuffD");
00422 } // btCuda_squeezeOverlappingPairBuff()
00423 
00424 //------------------------------------------------------------------------------------------------
00425 
00426 } // extern "C"
00427 
00428 //------------------------------------------------------------------------------------------------
00429 //------------------------------------------------------------------------------------------------
00430 //------------------------------------------------------------------------------------------------