btSoftBodySolver_OpenCLSIMDAware.cpp

Go to the documentation of this file.
00001 /*
00002 Bullet Continuous Collision Detection and Physics Library
00003 Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
00004 
00005 This software is provided 'as-is', without any express or implied warranty.
00006 In no event will the authors be held liable for any damages arising from the use of this software.
00007 Permission is granted to anyone to use this software for any purpose, 
00008 including commercial applications, and to alter it and redistribute it freely, 
00009 subject to the following restrictions:
00010 
00011 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
00012 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
00013 3. This notice may not be removed or altered from any source distribution.
00014 */
00015 
00016 
00017 #include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
00018 #include "vectormath/vmInclude.h"
00019 #include <stdio.h> //@todo: remove the debugging printf at some stage
00020 #include "btSoftBodySolver_OpenCLSIMDAware.h"
00021 #include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
00022 #include "BulletSoftBody/btSoftBody.h"
00023 #include "BulletCollision/CollisionShapes/btCapsuleShape.h"
00024 #include <limits.h>
00025 
00026 #define WAVEFRONT_SIZE 32
00027 #define WAVEFRONT_BLOCK_MULTIPLIER 2
00028 #define GROUP_SIZE (WAVEFRONT_SIZE*WAVEFRONT_BLOCK_MULTIPLIER)
00029 #define LINKS_PER_SIMD_LANE 16
00030 
00031 static const size_t workGroupSize = GROUP_SIZE;
00032 
00033 
00034 //CL_VERSION_1_1 seems broken on NVidia SDK so just disable it
00035 
00037 #define MSTRINGIFY(A) #A
00038 static const char* UpdatePositionsFromVelocitiesCLString = 
00039 #include "OpenCLC10/UpdatePositionsFromVelocities.cl"
00040 static const char* SolvePositionsCLString = 
00041 #include "OpenCLC10/SolvePositionsSIMDBatched.cl"
00042 static const char* UpdateNodesCLString = 
00043 #include "OpenCLC10/UpdateNodes.cl"
00044 static const char* UpdatePositionsCLString = 
00045 #include "OpenCLC10/UpdatePositions.cl"
00046 static const char* UpdateConstantsCLString = 
00047 #include "OpenCLC10/UpdateConstants.cl"
00048 static const char* IntegrateCLString = 
00049 #include "OpenCLC10/Integrate.cl"
00050 static const char* ApplyForcesCLString = 
00051 #include "OpenCLC10/ApplyForces.cl"
00052 static const char* UpdateFixedVertexPositionsCLString = 
00053 #include "OpenCLC10/UpdateFixedVertexPositions.cl"
00054 static const char* UpdateNormalsCLString = 
00055 #include "OpenCLC10/UpdateNormals.cl"
00056 static const char* VSolveLinksCLString = 
00057 #include "OpenCLC10/VSolveLinks.cl"
00058 static const char* SolveCollisionsAndUpdateVelocitiesCLString =
00059 #include "OpenCLC10/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl"
00060 static const char* OutputToVertexArrayCLString =
00061 #include "OpenCLC10/OutputToVertexArray.cl"
00062 
00063 
00064 
00065 btSoftBodyLinkDataOpenCLSIMDAware::btSoftBodyLinkDataOpenCLSIMDAware(cl_command_queue queue,  cl_context ctx) :
00066         m_cqCommandQue(queue),
00067         m_wavefrontSize( WAVEFRONT_SIZE ),
00068         m_linksPerWorkItem( LINKS_PER_SIMD_LANE ),
00069         m_maxBatchesWithinWave( 0 ),
00070         m_maxLinksPerWavefront( m_wavefrontSize * m_linksPerWorkItem ),
00071         m_numWavefronts( 0 ),
00072         m_maxVertex( 0 ),
00073         m_clNumBatchesAndVerticesWithinWaves( queue, ctx, &m_numBatchesAndVerticesWithinWaves, true ),
00074         m_clWavefrontVerticesGlobalAddresses( queue, ctx, &m_wavefrontVerticesGlobalAddresses, true ),
00075         m_clLinkVerticesLocalAddresses( queue, ctx, &m_linkVerticesLocalAddresses, true ),
00076         m_clLinkStrength( queue, ctx, &m_linkStrength, false ),
00077         m_clLinksMassLSC( queue, ctx, &m_linksMassLSC, false ),
00078         m_clLinksRestLengthSquared( queue, ctx, &m_linksRestLengthSquared, false ),
00079         m_clLinksRestLength( queue, ctx, &m_linksRestLength, false ),
00080         m_clLinksMaterialLinearStiffnessCoefficient( queue, ctx, &m_linksMaterialLinearStiffnessCoefficient, false )
00081 {
00082 }
00083 
00084 btSoftBodyLinkDataOpenCLSIMDAware::~btSoftBodyLinkDataOpenCLSIMDAware()
00085 {
00086 }
00087 
00088 static Vectormath::Aos::Vector3 toVector3( const btVector3 &vec )
00089 {
00090         Vectormath::Aos::Vector3 outVec( vec.getX(), vec.getY(), vec.getZ() );
00091         return outVec;
00092 }
00093 
00095 void btSoftBodyLinkDataOpenCLSIMDAware::createLinks( int numLinks )
00096 {
00097         int previousSize = m_links.size();
00098         int newSize = previousSize + numLinks;
00099 
00100         btSoftBodyLinkData::createLinks( numLinks );
00101 
00102         // Resize the link addresses array as well
00103         m_linkAddresses.resize( newSize );
00104 }
00105 
00107 void btSoftBodyLinkDataOpenCLSIMDAware::setLinkAt( 
00108         const LinkDescription &link, 
00109         int linkIndex )
00110 {
00111         btSoftBodyLinkData::setLinkAt( link, linkIndex );
00112 
00113         if( link.getVertex0() > m_maxVertex )
00114                 m_maxVertex = link.getVertex0();
00115         if( link.getVertex1() > m_maxVertex )
00116                 m_maxVertex = link.getVertex1();
00117 
00118         // Set the link index correctly for initialisation
00119         m_linkAddresses[linkIndex] = linkIndex;
00120 }
00121 
00122 bool btSoftBodyLinkDataOpenCLSIMDAware::onAccelerator()
00123 {
00124         return m_onGPU;
00125 }
00126 
00127 bool btSoftBodyLinkDataOpenCLSIMDAware::moveToAccelerator()
00128 {
00129         bool success = true;
00130         success = success && m_clNumBatchesAndVerticesWithinWaves.moveToGPU();
00131         success = success && m_clWavefrontVerticesGlobalAddresses.moveToGPU();
00132         success = success && m_clLinkVerticesLocalAddresses.moveToGPU();
00133         success = success && m_clLinkStrength.moveToGPU();
00134         success = success && m_clLinksMassLSC.moveToGPU();
00135         success = success && m_clLinksRestLengthSquared.moveToGPU();
00136         success = success && m_clLinksRestLength.moveToGPU();
00137         success = success && m_clLinksMaterialLinearStiffnessCoefficient.moveToGPU();
00138 
00139         if( success ) {
00140                 m_onGPU = true;
00141         }
00142 
00143         return success;
00144 }
00145 
00146 bool btSoftBodyLinkDataOpenCLSIMDAware::moveFromAccelerator()
00147 {
00148         bool success = true;
00149         success = success && m_clNumBatchesAndVerticesWithinWaves.moveToGPU();
00150         success = success && m_clWavefrontVerticesGlobalAddresses.moveToGPU();
00151         success = success && m_clLinkVerticesLocalAddresses.moveToGPU();
00152         success = success && m_clLinkStrength.moveFromGPU();
00153         success = success && m_clLinksMassLSC.moveFromGPU();
00154         success = success && m_clLinksRestLengthSquared.moveFromGPU();
00155         success = success && m_clLinksRestLength.moveFromGPU();
00156         success = success && m_clLinksMaterialLinearStiffnessCoefficient.moveFromGPU();
00157 
00158         if( success ) {
00159                 m_onGPU = false;
00160         }
00161 
00162         return success;
00163 }
00164 
00165 
00166 
00167 
00168 
00169 
00170 
00171 
00172 btOpenCLSoftBodySolverSIMDAware::btOpenCLSoftBodySolverSIMDAware(cl_command_queue queue, cl_context ctx, bool bUpdateAchchoredNodePos) :
00173         btOpenCLSoftBodySolver( queue, ctx, bUpdateAchchoredNodePos ),
00174         m_linkData(queue, ctx)
00175 {
00176         // Initial we will clearly need to update solver constants
00177         // For now this is global for the cloths linked with this solver - we should probably make this body specific 
00178         // for performance in future once we understand more clearly when constants need to be updated
00179         m_updateSolverConstants = true;
00180 
00181         m_shadersInitialized = false;
00182 }
00183 
00184 btOpenCLSoftBodySolverSIMDAware::~btOpenCLSoftBodySolverSIMDAware()
00185 {
00186         releaseKernels();
00187 }
00188 
00189 void btOpenCLSoftBodySolverSIMDAware::optimize( btAlignedObjectArray< btSoftBody * > &softBodies ,bool forceUpdate)
00190 {
00191         if( forceUpdate || m_softBodySet.size() != softBodies.size() )
00192         {
00193                 // Have a change in the soft body set so update, reloading all the data
00194                 getVertexData().clear();
00195                 getTriangleData().clear();
00196                 getLinkData().clear();
00197                 m_softBodySet.resize(0);
00198                 m_anchorIndex.clear();
00199 
00200                 int maxPiterations = 0;
00201                 int maxViterations = 0;
00202 
00203                 for( int softBodyIndex = 0; softBodyIndex < softBodies.size(); ++softBodyIndex )
00204                 {
00205                         btSoftBody *softBody = softBodies[ softBodyIndex ];
00206                         using Vectormath::Aos::Matrix3;
00207                         using Vectormath::Aos::Point3;
00208 
00209                         // Create SoftBody that will store the information within the solver
00210                         btOpenCLAcceleratedSoftBodyInterface* newSoftBody = new btOpenCLAcceleratedSoftBodyInterface( softBody );
00211                         m_softBodySet.push_back( newSoftBody );
00212 
00213                         m_perClothAcceleration.push_back( toVector3(softBody->getWorldInfo()->m_gravity) );
00214                         m_perClothDampingFactor.push_back(softBody->m_cfg.kDP);
00215                         m_perClothVelocityCorrectionCoefficient.push_back( softBody->m_cfg.kVCF );
00216                         m_perClothLiftFactor.push_back( softBody->m_cfg.kLF );
00217                         m_perClothDragFactor.push_back( softBody->m_cfg.kDG );
00218                         m_perClothMediumDensity.push_back(softBody->getWorldInfo()->air_density);
00219                         // Simple init values. Actually we'll put 0 and -1 into them at the appropriate time
00220                         m_perClothFriction.push_back(softBody->m_cfg.kDF);
00221                         m_perClothCollisionObjects.push_back( CollisionObjectIndices(-1, -1) );
00222 
00223                         // Add space for new vertices and triangles in the default solver for now
00224                         // TODO: Include space here for tearing too later
00225                         int firstVertex = getVertexData().getNumVertices();
00226                         int numVertices = softBody->m_nodes.size();
00227                         // Round maxVertices to a multiple of the workgroup size so we know we're safe to run over in a given group
00228                         // maxVertices can be increased to allow tearing, but should be used sparingly because these extra verts will always be processed
00229                         int maxVertices = GROUP_SIZE*((numVertices+GROUP_SIZE)/GROUP_SIZE);
00230                         // Allocate space for new vertices in all the vertex arrays
00231                         getVertexData().createVertices( numVertices, softBodyIndex, maxVertices );
00232 
00233 
00234                         int firstTriangle = getTriangleData().getNumTriangles();
00235                         int numTriangles = softBody->m_faces.size();
00236                         int maxTriangles = numTriangles;
00237                         getTriangleData().createTriangles( maxTriangles );
00238 
00239                         // Copy vertices from softbody into the solver
00240                         for( int vertex = 0; vertex < numVertices; ++vertex )
00241                         {
00242                                 Point3 multPoint(softBody->m_nodes[vertex].m_x.getX(), softBody->m_nodes[vertex].m_x.getY(), softBody->m_nodes[vertex].m_x.getZ());
00243                                 btSoftBodyVertexData::VertexDescription desc;
00244 
00245                                 // TODO: Position in the softbody might be pre-transformed
00246                                 // or we may need to adapt for the pose.
00247                                 //desc.setPosition( cloth.getMeshTransform()*multPoint );
00248                                 desc.setPosition( multPoint );
00249 
00250                                 float vertexInverseMass = softBody->m_nodes[vertex].m_im;
00251                                 desc.setInverseMass(vertexInverseMass);
00252                                 getVertexData().setVertexAt( desc, firstVertex + vertex );
00253 
00254                                 m_anchorIndex.push_back(-1);
00255                         }
00256                         for( int vertex = numVertices; vertex < maxVertices; ++vertex )
00257                         {
00258                                 m_anchorIndex.push_back(-1.0);
00259                         }
00260 
00261                         // Copy triangles similarly
00262                         // We're assuming here that vertex indices are based on the firstVertex rather than the entire scene
00263                         for( int triangle = 0; triangle < numTriangles; ++triangle )
00264                         {
00265                                 // Note that large array storage is relative to the array not to the cloth
00266                                 // So we need to add firstVertex to each value
00267                                 int vertexIndex0 = (softBody->m_faces[triangle].m_n[0] - &(softBody->m_nodes[0]));
00268                                 int vertexIndex1 = (softBody->m_faces[triangle].m_n[1] - &(softBody->m_nodes[0]));
00269                                 int vertexIndex2 = (softBody->m_faces[triangle].m_n[2] - &(softBody->m_nodes[0]));
00270                                 btSoftBodyTriangleData::TriangleDescription newTriangle(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, vertexIndex2 + firstVertex);
00271                                 getTriangleData().setTriangleAt( newTriangle, firstTriangle + triangle );
00272                                 
00273                                 // Increase vertex triangle counts for this triangle            
00274                                 getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex0)++;
00275                                 getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex1)++;
00276                                 getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex2)++;
00277                         }
00278 
00279                         int firstLink = getLinkData().getNumLinks();
00280                         int numLinks = softBody->m_links.size();
00281                         int maxLinks = numLinks;
00282                         
00283                         // Allocate space for the links
00284                         getLinkData().createLinks( numLinks );
00285 
00286                         // Add the links
00287                         for( int link = 0; link < numLinks; ++link )
00288                         {
00289                                 int vertexIndex0 = softBody->m_links[link].m_n[0] - &(softBody->m_nodes[0]);
00290                                 int vertexIndex1 = softBody->m_links[link].m_n[1] - &(softBody->m_nodes[0]);
00291 
00292                                 btSoftBodyLinkData::LinkDescription newLink(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, softBody->m_links[link].m_material->m_kLST);
00293                                 newLink.setLinkStrength(1.f);
00294                                 getLinkData().setLinkAt(newLink, firstLink + link);
00295                         }
00296                         
00297                         newSoftBody->setFirstVertex( firstVertex );
00298                         newSoftBody->setFirstTriangle( firstTriangle );
00299                         newSoftBody->setNumVertices( numVertices );
00300                         newSoftBody->setMaxVertices( maxVertices );
00301                         newSoftBody->setNumTriangles( numTriangles );
00302                         newSoftBody->setMaxTriangles( maxTriangles );
00303                         newSoftBody->setFirstLink( firstLink );
00304                         newSoftBody->setNumLinks( numLinks );
00305 
00306                         // Find maximum piterations and viterations
00307                         int piterations = softBody->m_cfg.piterations;
00308 
00309             if ( piterations > maxPiterations )
00310                   maxPiterations = piterations;
00311 
00312             int viterations = softBody->m_cfg.viterations;
00313 
00314                         if ( viterations > maxViterations )
00315                   maxViterations = viterations;
00316 
00317                         // zero mass
00318                         for( int vertex = 0; vertex < numVertices; ++vertex )
00319                         {
00320                                 if ( softBody->m_nodes[vertex].m_im == 0 )
00321                                 {
00322                                         AnchorNodeInfoCL nodeInfo;
00323                                         nodeInfo.clVertexIndex = firstVertex + vertex;
00324                                         nodeInfo.pNode = &softBody->m_nodes[vertex];
00325 
00326                                         m_anchorNodeInfoArray.push_back(nodeInfo);
00327                                 }
00328                         }                       
00329 
00330                         // anchor position
00331                         if ( numVertices > 0 )
00332                         {
00333                                 for ( int anchorIndex = 0; anchorIndex < softBody->m_anchors.size(); anchorIndex++ )
00334                                 {
00335                                         btSoftBody::Node* anchorNode = softBody->m_anchors[anchorIndex].m_node;
00336                                         btSoftBody::Node* firstNode = &softBody->m_nodes[0];
00337 
00338                                         AnchorNodeInfoCL nodeInfo;
00339                                         nodeInfo.clVertexIndex = firstVertex + (int)(anchorNode - firstNode);
00340                                         nodeInfo.pNode = anchorNode;
00341 
00342                                         m_anchorNodeInfoArray.push_back(nodeInfo);
00343                                 }
00344                         }                       
00345                 }
00346 
00347                 m_anchorPosition.clear();               
00348                 m_anchorPosition.resize(m_anchorNodeInfoArray.size());
00349 
00350                 for ( int anchorNode = 0; anchorNode < m_anchorNodeInfoArray.size(); anchorNode++ )
00351                 {
00352                         const AnchorNodeInfoCL& anchorNodeInfo = m_anchorNodeInfoArray[anchorNode];
00353                         m_anchorIndex[anchorNodeInfo.clVertexIndex] = anchorNode;
00354                         getVertexData().getInverseMass(anchorNodeInfo.clVertexIndex) = 0.0f;
00355                 }
00356                 
00357                 updateConstants(0.f);
00358 
00359                 // set position and velocity iterations
00360                 setNumberOfPositionIterations(maxPiterations);
00361                 setNumberOfVelocityIterations(maxViterations);
00362 
00363                 // set wind velocity
00364                 m_perClothWindVelocity.resize( m_softBodySet.size() );
00365                 for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
00366                 {
00367                         btSoftBody *softBody = m_softBodySet[softBodyIndex]->getSoftBody();                     
00368                         m_perClothWindVelocity[softBodyIndex] = toVector3(softBody->getWindVelocity());
00369                 }
00370 
00371                 m_clPerClothWindVelocity.changedOnCPU();
00372 
00373                 // generate batches
00374                 m_linkData.generateBatches();           
00375                 m_triangleData.generateBatches();
00376 
00377                 // Build the shaders to match the batching parameters
00378                 buildShaders();
00379         }
00380 }
00381 
00382 
00383 btSoftBodyLinkData &btOpenCLSoftBodySolverSIMDAware::getLinkData()
00384 {
00385         // TODO: Consider setting link data to "changed" here
00386         return m_linkData;
00387 }
00388 
00389 
00390 
00391 
00392 void btOpenCLSoftBodySolverSIMDAware::updateConstants( float timeStep )
00393 {                       
00394 
00395         using namespace Vectormath::Aos;
00396 
00397         if( m_updateSolverConstants )
00398         {
00399                 m_updateSolverConstants = false;
00400 
00401                 // Will have to redo this if we change the structure (tear, maybe) or various other possible changes
00402 
00403                 // Initialise link constants
00404                 const int numLinks = m_linkData.getNumLinks();
00405                 for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex )
00406                 {
00407                         btSoftBodyLinkData::LinkNodePair &vertices( m_linkData.getVertexPair(linkIndex) );
00408                         m_linkData.getRestLength(linkIndex) = length((m_vertexData.getPosition( vertices.vertex0 ) - m_vertexData.getPosition( vertices.vertex1 )));
00409                         float invMass0 = m_vertexData.getInverseMass(vertices.vertex0);
00410                         float invMass1 = m_vertexData.getInverseMass(vertices.vertex1);
00411                         float linearStiffness = m_linkData.getLinearStiffnessCoefficient(linkIndex);
00412                         float massLSC = (invMass0 + invMass1)/linearStiffness;
00413                         m_linkData.getMassLSC(linkIndex) = massLSC;
00414                         float restLength = m_linkData.getRestLength(linkIndex);
00415                         float restLengthSquared = restLength*restLength;
00416                         m_linkData.getRestLengthSquared(linkIndex) = restLengthSquared;
00417                 }
00418         }
00419 
00420 }
00421 
00422 
00423 
00424 void btOpenCLSoftBodySolverSIMDAware::solveConstraints( float solverdt )
00425 {
00426 
00427         using Vectormath::Aos::Vector3;
00428         using Vectormath::Aos::Point3;
00429         using Vectormath::Aos::lengthSqr;
00430         using Vectormath::Aos::dot;
00431 
00432         // Prepare links
00433         int numLinks = m_linkData.getNumLinks();
00434         int numVertices = m_vertexData.getNumVertices();
00435 
00436         float kst = 1.f;
00437         float ti = 0.f;
00438 
00439 
00440         m_clPerClothDampingFactor.moveToGPU();
00441         m_clPerClothVelocityCorrectionCoefficient.moveToGPU();
00442 
00443 
00444         // Ensure data is on accelerator
00445         m_linkData.moveToAccelerator();
00446         m_vertexData.moveToAccelerator();
00447 
00448         
00449         //prepareLinks();       
00450 
00451         prepareCollisionConstraints();
00452 
00453         // Solve drift
00454         for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
00455         {
00456 
00457                 for( int i = 0; i < m_linkData.m_wavefrontBatchStartLengths.size(); ++i )
00458                 {
00459                         int startWave = m_linkData.m_wavefrontBatchStartLengths[i].start;
00460                         int numWaves = m_linkData.m_wavefrontBatchStartLengths[i].length;
00461                         solveLinksForPosition( startWave, numWaves, kst, ti );
00462                 }
00463         } // for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
00464 
00465         
00466         // At this point assume that the force array is blank - we will overwrite it
00467         solveCollisionsAndUpdateVelocities( 1.f/solverdt );
00468 }
00469 
00470 
00472 // Kernel dispatches
00473 
00474 
00475 void btOpenCLSoftBodySolverSIMDAware::solveLinksForPosition( int startWave, int numWaves, float kst, float ti )
00476 {
00477         cl_int ciErrNum;
00478         ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,0, sizeof(int), &startWave);
00479         ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,1, sizeof(int), &numWaves);
00480         ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,2, sizeof(float), &kst);
00481         ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,3, sizeof(float), &ti);
00482         
00483         
00484         ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,4, sizeof(cl_mem), &m_linkData.m_clNumBatchesAndVerticesWithinWaves.m_buffer);
00485         ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,5, sizeof(cl_mem), &m_linkData.m_clWavefrontVerticesGlobalAddresses.m_buffer);
00486         ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,6, sizeof(cl_mem), &m_linkData.m_clLinkVerticesLocalAddresses.m_buffer);
00487         ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,7, sizeof(cl_mem), &m_linkData.m_clLinksMassLSC.m_buffer);
00488 
00489         ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,8, sizeof(cl_mem), &m_linkData.m_clLinksRestLengthSquared.m_buffer);
00490         ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,9, sizeof(cl_mem), &m_vertexData.m_clVertexInverseMass.m_buffer);
00491         ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,10, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer);
00492 
00493         ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,11, WAVEFRONT_BLOCK_MULTIPLIER*sizeof(cl_int2), 0);
00494         ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,12, m_linkData.getMaxVerticesPerWavefront()*WAVEFRONT_BLOCK_MULTIPLIER*sizeof(cl_float4), 0);
00495         ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,13, m_linkData.getMaxVerticesPerWavefront()*WAVEFRONT_BLOCK_MULTIPLIER*sizeof(cl_float), 0);
00496 
00497         size_t  numWorkItems = workGroupSize*((numWaves*WAVEFRONT_SIZE + (workGroupSize-1)) / workGroupSize);
00498         
00499         ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_solvePositionsFromLinksKernel,1,NULL,&numWorkItems,&workGroupSize,0,0,0);
00500         
00501         if( ciErrNum!= CL_SUCCESS ) 
00502         {
00503                 btAssert( 0 &&  "enqueueNDRangeKernel(m_solvePositionsFromLinksKernel)");
00504         }
00505 
00506 } // solveLinksForPosition
00507 
00508 void btOpenCLSoftBodySolverSIMDAware::solveCollisionsAndUpdateVelocities( float isolverdt )
00509 {
00510         // Copy kernel parameters to GPU
00511         m_vertexData.moveToAccelerator();
00512         m_clPerClothFriction.moveToGPU();
00513         m_clPerClothDampingFactor.moveToGPU();
00514         m_clPerClothCollisionObjects.moveToGPU();
00515         m_clCollisionObjectDetails.moveToGPU();
00516         
00517         cl_int ciErrNum;
00518         int numVerts = m_vertexData.getNumVertices();
00519         ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 0, sizeof(int), &numVerts);
00520         ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 1, sizeof(int), &isolverdt);
00521         ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 2, sizeof(cl_mem),&m_vertexData.m_clClothIdentifier.m_buffer);
00522         ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 3, sizeof(cl_mem),&m_vertexData.m_clVertexPreviousPosition.m_buffer);
00523         ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 4, sizeof(cl_mem),&m_clPerClothFriction.m_buffer);
00524         ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 5, sizeof(cl_mem),&m_clPerClothDampingFactor.m_buffer);
00525         ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 6, sizeof(cl_mem),&m_clPerClothCollisionObjects.m_buffer);
00526         ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 7, sizeof(cl_mem),&m_clCollisionObjectDetails.m_buffer);
00527         ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 8, sizeof(cl_mem),&m_vertexData.m_clVertexForceAccumulator.m_buffer);
00528         ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 9, sizeof(cl_mem),&m_vertexData.m_clVertexVelocity.m_buffer);
00529         ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 10, sizeof(cl_mem),&m_vertexData.m_clVertexPosition.m_buffer);
00530         ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 11, sizeof(CollisionShapeDescription)*16,0);
00531         ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 12, sizeof(cl_mem),&m_vertexData.m_clVertexInverseMass.m_buffer);
00532         size_t  numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize);
00533         
00534         if (numWorkItems)
00535         {
00536                 ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_solveCollisionsAndUpdateVelocitiesKernel, 1, NULL, &numWorkItems, &workGroupSize,0,0,0);
00537                 
00538                 if( ciErrNum != CL_SUCCESS ) 
00539                 {
00540                         btAssert( 0 &&  "enqueueNDRangeKernel(m_solveCollisionsAndUpdateVelocitiesKernel)");
00541                 }
00542         }
00543 
00544 } // btOpenCLSoftBodySolverSIMDAware::updateVelocitiesFromPositionsWithoutVelocities
00545 
00546 // End kernel dispatches
00548 
00549 
00550 
00551 bool btOpenCLSoftBodySolverSIMDAware::buildShaders()
00552 {
00553         releaseKernels();
00554 
00555         if( m_shadersInitialized )
00556                 return true;
00557 
00558         const char* additionalMacros="";
00559 
00560         m_currentCLFunctions->clearKernelCompilationFailures();
00561 
00562         char *wavefrontMacros = new char[256];
00563 
00564         sprintf(
00565                 wavefrontMacros, 
00566                 "-DMAX_NUM_VERTICES_PER_WAVE=%d -DMAX_BATCHES_PER_WAVE=%d -DWAVEFRONT_SIZE=%d -DWAVEFRONT_BLOCK_MULTIPLIER=%d -DBLOCK_SIZE=%d", 
00567                 m_linkData.getMaxVerticesPerWavefront(),
00568                 m_linkData.getMaxBatchesPerWavefront(),
00569                 m_linkData.getWavefrontSize(),
00570                 WAVEFRONT_BLOCK_MULTIPLIER,
00571                 WAVEFRONT_BLOCK_MULTIPLIER*m_linkData.getWavefrontSize());
00572         
00573         m_updatePositionsFromVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdatePositionsFromVelocitiesCLString, "UpdatePositionsFromVelocitiesKernel", additionalMacros,"OpenCLC10/UpdatePositionsFromVelocities.cl");
00574         m_solvePositionsFromLinksKernel = m_currentCLFunctions->compileCLKernelFromString( SolvePositionsCLString, "SolvePositionsFromLinksKernel", wavefrontMacros ,"OpenCLC10/SolvePositionsSIMDBatched.cl");
00575         m_updateVelocitiesFromPositionsWithVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNodesCLString, "updateVelocitiesFromPositionsWithVelocitiesKernel", additionalMacros ,"OpenCLC10/UpdateNodes.cl");
00576         m_updateVelocitiesFromPositionsWithoutVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdatePositionsCLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel", additionalMacros,"OpenCLC10/UpdatePositions.cl");
00577         m_integrateKernel = m_currentCLFunctions->compileCLKernelFromString( IntegrateCLString, "IntegrateKernel", additionalMacros ,"OpenCLC10/Integrate.cl");
00578         m_applyForcesKernel = m_currentCLFunctions->compileCLKernelFromString( ApplyForcesCLString, "ApplyForcesKernel", additionalMacros,"OpenCLC10/ApplyForces.cl" );
00579         m_updateFixedVertexPositionsKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateFixedVertexPositionsCLString, "UpdateFixedVertexPositions" ,additionalMacros,"OpenCLC10/UpdateFixedVertexPositions.cl");
00580         m_solveCollisionsAndUpdateVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( SolveCollisionsAndUpdateVelocitiesCLString, "SolveCollisionsAndUpdateVelocitiesKernel", additionalMacros ,"OpenCLC10/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl");
00581 
00582         // TODO: Rename to UpdateSoftBodies
00583         m_resetNormalsAndAreasKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "ResetNormalsAndAreasKernel", additionalMacros ,"OpenCLC10/UpdateNormals.cl");
00584         m_normalizeNormalsAndAreasKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "NormalizeNormalsAndAreasKernel", additionalMacros ,"OpenCLC10/UpdateNormals.cl");
00585         m_updateSoftBodiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "UpdateSoftBodiesKernel", additionalMacros ,"OpenCLC10/UpdateNormals.cl");
00586 
00587         delete [] wavefrontMacros;
00588 
00589         if( m_currentCLFunctions->getKernelCompilationFailures()==0)
00590         {
00591                 m_shadersInitialized = true;
00592         }
00593 
00594         return m_shadersInitialized;
00595 }
00596 
00597 
00598 
00599 
00600 static Vectormath::Aos::Transform3 toTransform3( const btTransform &transform )
00601 {
00602         Vectormath::Aos::Transform3 outTransform;
00603         outTransform.setCol(0, toVector3(transform.getBasis().getColumn(0)));
00604         outTransform.setCol(1, toVector3(transform.getBasis().getColumn(1)));
00605         outTransform.setCol(2, toVector3(transform.getBasis().getColumn(2)));
00606         outTransform.setCol(3, toVector3(transform.getOrigin()));
00607         return outTransform;    
00608 }
00609 
00610 
00611 static void generateBatchesOfWavefronts( btAlignedObjectArray < btAlignedObjectArray <int> > &linksForWavefronts, btSoftBodyLinkData &linkData, int numVertices, btAlignedObjectArray < btAlignedObjectArray <int> > &wavefrontBatches )
00612 {
00613         // A per-batch map of truth values stating whether a given vertex is in that batch
00614         // This allows us to significantly optimize the batching
00615         btAlignedObjectArray <btAlignedObjectArray<bool> > mapOfVerticesInBatches;
00616 
00617         for( int waveIndex = 0; waveIndex < linksForWavefronts.size(); ++waveIndex )
00618         {
00619                 btAlignedObjectArray <int> &wavefront( linksForWavefronts[waveIndex] );
00620 
00621                 int batch = 0;
00622                 bool placed = false;
00623                 while( batch < wavefrontBatches.size() && !placed )
00624                 {
00625                         // Test the current batch, see if this wave shares any vertex with the waves in the batch
00626                         bool foundSharedVertex = false;
00627                         for( int link = 0; link < wavefront.size(); ++link )
00628                         {
00629                                 btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
00630                                 if( (mapOfVerticesInBatches[batch])[vertices.vertex0] || (mapOfVerticesInBatches[batch])[vertices.vertex1] )
00631                                 {
00632                                         foundSharedVertex = true;
00633                                 }
00634                         }
00635 
00636                         if( !foundSharedVertex )
00637                         {
00638                                 wavefrontBatches[batch].push_back( waveIndex ); 
00639                                 // Insert vertices into this batch too
00640                                 for( int link = 0; link < wavefront.size(); ++link )
00641                                 {
00642                                         btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
00643                                         (mapOfVerticesInBatches[batch])[vertices.vertex0] = true;
00644                                         (mapOfVerticesInBatches[batch])[vertices.vertex1] = true;
00645                                 }
00646                                 placed = true;
00647                         }
00648                         batch++;
00649                 }
00650                 if( batch == wavefrontBatches.size() && !placed )
00651                 {
00652                         wavefrontBatches.resize( batch + 1 );
00653                         wavefrontBatches[batch].push_back( waveIndex );
00654 
00655                         // And resize map as well
00656                         mapOfVerticesInBatches.resize( batch + 1 );
00657                         
00658                         // Resize maps with total number of vertices
00659                         mapOfVerticesInBatches[batch].resize( numVertices+1, false );
00660 
00661                         // Insert vertices into this batch too
00662                         for( int link = 0; link < wavefront.size(); ++link )
00663                         {
00664                                 btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
00665                                 (mapOfVerticesInBatches[batch])[vertices.vertex0] = true;
00666                                 (mapOfVerticesInBatches[batch])[vertices.vertex1] = true;
00667                         }
00668                 }
00669         }
00670         mapOfVerticesInBatches.clear();
00671 }
00672 
00673 // Function to remove an object from a vector maintaining correct ordering of the vector
00674 template< typename T > static void removeFromVector( btAlignedObjectArray< T > &vectorToUpdate, int indexToRemove )
00675 {
00676         int currentSize = vectorToUpdate.size();
00677         for( int i = indexToRemove; i < (currentSize-1); ++i )
00678         {
00679                 vectorToUpdate[i] = vectorToUpdate[i+1];
00680         }
00681         if( currentSize > 0 )
00682                 vectorToUpdate.resize( currentSize - 1 );
00683 }
00684 
00688 template< typename T > static void insertAtIndex( btAlignedObjectArray< T > &vectorToUpdate, int index, T element )
00689 {
00690         vectorToUpdate.resize( vectorToUpdate.size() + 1 );
00691         for( int i = (vectorToUpdate.size() - 1); i > index; --i )
00692         {
00693                 vectorToUpdate[i] = vectorToUpdate[i-1];
00694         }
00695         vectorToUpdate[index] = element;
00696 }
00697 
00702 template< typename T > static void insertUniqueAndOrderedIntoVector( btAlignedObjectArray<T> &vectorToUpdate, T element )
00703 {
00704         int index = 0;
00705         while( index < vectorToUpdate.size() && vectorToUpdate[index] < element )
00706         {
00707                 index++;
00708         }
00709         if( index == vectorToUpdate.size() || vectorToUpdate[index] != element )
00710                 insertAtIndex( vectorToUpdate, index, element );
00711 }
00712 
00713 static void generateLinksPerVertex( int numVertices, btSoftBodyLinkData &linkData, btAlignedObjectArray< int > &listOfLinksPerVertex, btAlignedObjectArray <int> &numLinksPerVertex, int &maxLinks )
00714 {
00715         for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
00716         {
00717                 btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
00718                 numLinksPerVertex[nodes.vertex0]++;
00719                 numLinksPerVertex[nodes.vertex1]++;
00720         }
00721         int maxLinksPerVertex = 0;
00722         for( int vertexIndex = 0; vertexIndex < numVertices; ++vertexIndex )
00723         {
00724                 maxLinksPerVertex = btMax(numLinksPerVertex[vertexIndex], maxLinksPerVertex);
00725         }
00726         maxLinks = maxLinksPerVertex;
00727 
00728         btAlignedObjectArray< int > linksFoundPerVertex;
00729         linksFoundPerVertex.resize( numVertices, 0 );
00730 
00731         listOfLinksPerVertex.resize( maxLinksPerVertex * numVertices );
00732 
00733         for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
00734         {
00735                 btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
00736                 {
00737                         // Do vertex 0
00738                         int vertexIndex = nodes.vertex0;
00739                         int linkForVertex = linksFoundPerVertex[nodes.vertex0];
00740                         int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex;
00741 
00742                         listOfLinksPerVertex[linkAddress] = linkIndex;
00743 
00744                         linksFoundPerVertex[nodes.vertex0] = linkForVertex + 1;
00745                 }
00746                 {
00747                         // Do vertex 1
00748                         int vertexIndex = nodes.vertex1;
00749                         int linkForVertex = linksFoundPerVertex[nodes.vertex1];
00750                         int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex;
00751 
00752                         listOfLinksPerVertex[linkAddress] = linkIndex;
00753 
00754                         linksFoundPerVertex[nodes.vertex1] = linkForVertex + 1;
00755                 }
00756         }
00757 }
00758 
00759 static void computeBatchingIntoWavefronts( 
00760         btSoftBodyLinkData &linkData, 
00761         int wavefrontSize, 
00762         int linksPerWorkItem, 
00763         int maxLinksPerWavefront, 
00764         btAlignedObjectArray < btAlignedObjectArray <int> > &linksForWavefronts, 
00765         btAlignedObjectArray< btAlignedObjectArray < btAlignedObjectArray <int> > > &batchesWithinWaves, /* wave, batch, links in batch */
00766         btAlignedObjectArray< btAlignedObjectArray< int > > &verticesForWavefronts /* wavefront, vertex */
00767         )
00768 {
00769         
00770 
00771         // Attempt generation of larger batches of links.
00772         btAlignedObjectArray< bool > processedLink;
00773         processedLink.resize( linkData.getNumLinks() );
00774         btAlignedObjectArray< int > listOfLinksPerVertex;
00775         int maxLinksPerVertex = 0;
00776 
00777         // Count num vertices
00778         int numVertices = 0;
00779         for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
00780         {
00781                 btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
00782                 numVertices = btMax( numVertices, nodes.vertex0 + 1 );
00783                 numVertices = btMax( numVertices, nodes.vertex1 + 1 );
00784         }
00785 
00786         // Need list of links per vertex
00787         // Compute valence of each vertex
00788         btAlignedObjectArray <int> numLinksPerVertex;
00789         numLinksPerVertex.resize(0);
00790         numLinksPerVertex.resize( numVertices, 0 );
00791 
00792         generateLinksPerVertex( numVertices, linkData, listOfLinksPerVertex, numLinksPerVertex, maxLinksPerVertex );
00793 
00794         if (!numVertices)
00795                 return;
00796 
00797         for( int vertex = 0; vertex < 10; ++vertex )
00798         {
00799                 for( int link = 0; link < numLinksPerVertex[vertex]; ++link )
00800                 {
00801                         int linkAddress = vertex * maxLinksPerVertex + link;
00802                 }
00803         }
00804 
00805 
00806         // At this point we know what links we have for each vertex so we can start batching
00807         
00808         // We want a vertex to start with, let's go with 0
00809         int currentVertex = 0;
00810         int linksProcessed = 0;
00811 
00812         btAlignedObjectArray <int> verticesToProcess;
00813 
00814         while( linksProcessed < linkData.getNumLinks() )
00815         {
00816                 // Next wavefront
00817                 int nextWavefront = linksForWavefronts.size();
00818                 linksForWavefronts.resize( nextWavefront + 1 );
00819                 btAlignedObjectArray <int> &linksForWavefront(linksForWavefronts[nextWavefront]);
00820                 verticesForWavefronts.resize( nextWavefront + 1 );
00821                 btAlignedObjectArray<int> &vertexSet( verticesForWavefronts[nextWavefront] );
00822 
00823                 linksForWavefront.resize(0);
00824 
00825                 // Loop to find enough links to fill the wavefront
00826                 // Stopping if we either run out of links, or fill it
00827                 while( linksProcessed < linkData.getNumLinks() && linksForWavefront.size() < maxLinksPerWavefront )
00828                 {
00829                         // Go through the links for the current vertex
00830                         for( int link = 0; link < numLinksPerVertex[currentVertex] && linksForWavefront.size() < maxLinksPerWavefront; ++link )
00831                         {
00832                                 int linkAddress = currentVertex * maxLinksPerVertex + link;
00833                                 int linkIndex = listOfLinksPerVertex[linkAddress];
00834                                 
00835                                 // If we have not already processed this link, add it to the wavefront
00836                                 // Claim it as another processed link
00837                                 // Add the vertex at the far end to the list of vertices to process.
00838                                 if( !processedLink[linkIndex] )
00839                                 {
00840                                         linksForWavefront.push_back( linkIndex );
00841                                         linksProcessed++;
00842                                         processedLink[linkIndex] = true;
00843                                         int v0 = linkData.getVertexPair(linkIndex).vertex0;
00844                                         int v1 = linkData.getVertexPair(linkIndex).vertex1;
00845                                         if( v0 == currentVertex )
00846                                                 verticesToProcess.push_back( v1 );
00847                                         else
00848                                                 verticesToProcess.push_back( v0 );
00849                                 }
00850                         }
00851                         if( verticesToProcess.size() > 0 )
00852                         {
00853                                 // Get the element on the front of the queue and remove it
00854                                 currentVertex = verticesToProcess[0];
00855                                 removeFromVector( verticesToProcess, 0 );
00856                         } else {                
00857                                 // If we've not yet processed all the links, find the first unprocessed one
00858                                 // and select one of its vertices as the current vertex
00859                                 if( linksProcessed < linkData.getNumLinks() )
00860                                 {
00861                                         int searchLink = 0;
00862                                         while( processedLink[searchLink] )
00863                                                 searchLink++;
00864                                         currentVertex = linkData.getVertexPair(searchLink).vertex0;
00865                                 }       
00866                         }
00867                 }
00868 
00869                 // We have either finished or filled a wavefront
00870                 for( int link = 0; link < linksForWavefront.size(); ++link )
00871                 {
00872                         int v0 = linkData.getVertexPair( linksForWavefront[link] ).vertex0;
00873                         int v1 = linkData.getVertexPair( linksForWavefront[link] ).vertex1;
00874                         insertUniqueAndOrderedIntoVector( vertexSet, v0 );
00875                         insertUniqueAndOrderedIntoVector( vertexSet, v1 );
00876                 }
00877                 // Iterate over links mapped to the wave and batch those
00878                 // We can run a batch on each cycle trivially
00879                 
00880                 batchesWithinWaves.resize( batchesWithinWaves.size() + 1 );
00881                 btAlignedObjectArray < btAlignedObjectArray <int> > &batchesWithinWave( batchesWithinWaves[batchesWithinWaves.size()-1] );
00882                 
00883 
00884                 for( int link = 0; link < linksForWavefront.size(); ++link )
00885                 {
00886                         int linkIndex = linksForWavefront[link];
00887                         btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( linkIndex );
00888                         
00889                         int batch = 0;
00890                         bool placed = false;
00891                         while( batch < batchesWithinWave.size() && !placed )
00892                         {
00893                                 bool foundSharedVertex = false;
00894                                 if( batchesWithinWave[batch].size() >= wavefrontSize )
00895                                 {
00896                                         // If we have already filled this batch, move on to another
00897                                         foundSharedVertex = true;
00898                                 } else {
00899                                         for( int link2 = 0; link2 < batchesWithinWave[batch].size(); ++link2 )
00900                                         {
00901                                                 btSoftBodyLinkData::LinkNodePair vertices2 = linkData.getVertexPair( (batchesWithinWave[batch])[link2] );
00902 
00903                                                 if( vertices.vertex0 == vertices2.vertex0 ||
00904                                                         vertices.vertex1 == vertices2.vertex0 ||
00905                                                         vertices.vertex0 == vertices2.vertex1 ||
00906                                                         vertices.vertex1 == vertices2.vertex1 )
00907                                                 {
00908                                                         foundSharedVertex = true;
00909                                                         break;
00910                                                 }
00911                                         }
00912                                 }
00913                                 if( !foundSharedVertex )
00914                                 {
00915                                         batchesWithinWave[batch].push_back( linkIndex );
00916                                         placed = true;
00917                                 } else {
00918                                         ++batch;
00919                                 }
00920                         }
00921                         if( batch == batchesWithinWave.size() && !placed )
00922                         {
00923                                 batchesWithinWave.resize( batch + 1 );
00924                                 batchesWithinWave[batch].push_back( linkIndex );
00925                         }
00926                 }
00927                 
00928         }
00929 
00930 }
00931 
00932 void btSoftBodyLinkDataOpenCLSIMDAware::generateBatches()
00933 {
00934         btAlignedObjectArray < btAlignedObjectArray <int> > linksForWavefronts;
00935         btAlignedObjectArray < btAlignedObjectArray <int> > wavefrontBatches;
00936         btAlignedObjectArray< btAlignedObjectArray < btAlignedObjectArray <int> > > batchesWithinWaves;
00937         btAlignedObjectArray< btAlignedObjectArray< int > > verticesForWavefronts; // wavefronts, vertices in wavefront as an ordered set
00938 
00939         // Group the links into wavefronts
00940         computeBatchingIntoWavefronts( *this, m_wavefrontSize, m_linksPerWorkItem, m_maxLinksPerWavefront, linksForWavefronts, batchesWithinWaves, verticesForWavefronts );
00941 
00942 
00943         // Batch the wavefronts
00944         generateBatchesOfWavefronts( linksForWavefronts, *this, m_maxVertex, wavefrontBatches );
00945 
00946         m_numWavefronts = linksForWavefronts.size();
00947 
00948         // At this point we have a description of which links we need to process in each wavefront
00949 
00950         // First correctly fill the batch ranges vector
00951         int numBatches = wavefrontBatches.size();
00952         m_wavefrontBatchStartLengths.resize(0);
00953         int prefixSum = 0;
00954         for( int batchIndex = 0; batchIndex < numBatches; ++batchIndex )
00955         {
00956                 int wavesInBatch = wavefrontBatches[batchIndex].size();
00957                 int nextPrefixSum = prefixSum + wavesInBatch;
00958                 m_wavefrontBatchStartLengths.push_back( BatchPair( prefixSum, nextPrefixSum - prefixSum ) );
00959 
00960                 prefixSum += wavesInBatch;
00961         }
00962         
00963         // Also find max number of batches within a wave
00964         m_maxBatchesWithinWave = 0;
00965         m_maxVerticesWithinWave = 0;
00966         m_numBatchesAndVerticesWithinWaves.resize( m_numWavefronts );
00967         for( int waveIndex = 0; waveIndex < m_numWavefronts; ++waveIndex )
00968         {
00969                 // See if the number of batches in this wave is greater than the current maxium
00970                 int batchesInCurrentWave = batchesWithinWaves[waveIndex].size();
00971                 int verticesInCurrentWave = verticesForWavefronts[waveIndex].size();
00972                 m_maxBatchesWithinWave = btMax( batchesInCurrentWave, m_maxBatchesWithinWave );
00973                 m_maxVerticesWithinWave = btMax( verticesInCurrentWave, m_maxVerticesWithinWave );
00974         }
00975         
00976         // Add padding values both for alignment and as dudd addresses within LDS to compute junk rather than branch around
00977         m_maxVerticesWithinWave = 16*((m_maxVerticesWithinWave/16)+2);
00978 
00979         // Now we know the maximum number of vertices per-wave we can resize the global vertices array
00980         m_wavefrontVerticesGlobalAddresses.resize( m_maxVerticesWithinWave * m_numWavefronts );
00981 
00982         // Grab backup copies of all the link data arrays for the sorting process
00983         btAlignedObjectArray<btSoftBodyLinkData::LinkNodePair>                          m_links_Backup(m_links);
00984         btAlignedObjectArray<float>                                                                                     m_linkStrength_Backup(m_linkStrength);
00985         btAlignedObjectArray<float>                                                                                     m_linksMassLSC_Backup(m_linksMassLSC);
00986         btAlignedObjectArray<float>                                                                                     m_linksRestLengthSquared_Backup(m_linksRestLengthSquared);
00987         //btAlignedObjectArray<Vectormath::Aos::Vector3>                                                m_linksCLength_Backup(m_linksCLength);
00988         //btAlignedObjectArray<float>                                                                                   m_linksLengthRatio_Backup(m_linksLengthRatio);
00989         btAlignedObjectArray<float>                                                                                     m_linksRestLength_Backup(m_linksRestLength);
00990         btAlignedObjectArray<float>                                                                                     m_linksMaterialLinearStiffnessCoefficient_Backup(m_linksMaterialLinearStiffnessCoefficient);
00991 
00992         // Resize to a wavefront sized batch per batch per wave so we get perfectly coherent memory accesses.
00993         m_links.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
00994         m_linkVerticesLocalAddresses.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
00995         m_linkStrength.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
00996         m_linksMassLSC.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
00997         m_linksRestLengthSquared.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
00998         m_linksRestLength.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
00999         m_linksMaterialLinearStiffnessCoefficient.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts ); 
01000                 
01001         // Then re-order links into wavefront blocks
01002 
01003         // Total number of wavefronts moved. This will decide the ordering of sorted wavefronts.
01004         int wavefrontCount = 0;
01005 
01006         // Iterate over batches of wavefronts, then wavefronts in the batch
01007         for( int batchIndex = 0; batchIndex < numBatches; ++batchIndex )
01008         {
01009                 btAlignedObjectArray <int> &batch( wavefrontBatches[batchIndex] );
01010                 int wavefrontsInBatch = batch.size();
01011 
01012                 
01013                 for( int wavefrontIndex = 0; wavefrontIndex < wavefrontsInBatch; ++wavefrontIndex )
01014                 {       
01015 
01016                         int originalWavefrontIndex = batch[wavefrontIndex];
01017                         btAlignedObjectArray< int > &wavefrontVertices( verticesForWavefronts[originalWavefrontIndex] );
01018                         int verticesUsedByWavefront = wavefrontVertices.size();
01019 
01020                         // Copy the set of vertices into the correctly structured array for use on the device
01021                         // Fill the non-vertices with -1s
01022                         // so we can mask out those reads
01023                         for( int vertex = 0; vertex < verticesUsedByWavefront; ++vertex )
01024                         {
01025                                 m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = wavefrontVertices[vertex];
01026                         }
01027                         for( int vertex = verticesUsedByWavefront; vertex < m_maxVerticesWithinWave; ++vertex )
01028                         {
01029                                 m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = -1;
01030                         }
01031 
01032                         // Obtain the set of batches within the current wavefront
01033                         btAlignedObjectArray < btAlignedObjectArray <int> > &batchesWithinWavefront( batchesWithinWaves[originalWavefrontIndex] );
01034                         // Set the size of the batches for use in the solver, correctly ordered
01035                         NumBatchesVerticesPair batchesAndVertices;
01036                         batchesAndVertices.numBatches = batchesWithinWavefront.size();
01037                         batchesAndVertices.numVertices = verticesUsedByWavefront;
01038                         m_numBatchesAndVerticesWithinWaves[wavefrontCount] = batchesAndVertices;
01039                         
01040 
01041                         // Now iterate over batches within the wavefront to structure the links correctly
01042                         for( int wavefrontBatch = 0; wavefrontBatch < batchesWithinWavefront.size(); ++wavefrontBatch )
01043                         {
01044                                 btAlignedObjectArray <int> &linksInBatch( batchesWithinWavefront[wavefrontBatch] );
01045                                 int wavefrontBatchSize = linksInBatch.size();
01046 
01047                                 int batchAddressInTarget = m_maxBatchesWithinWave * m_wavefrontSize * wavefrontCount + m_wavefrontSize * wavefrontBatch;
01048 
01049                                 for( int linkIndex = 0; linkIndex < wavefrontBatchSize; ++linkIndex )
01050                                 {
01051                                         int originalLinkAddress = linksInBatch[linkIndex];
01052                                         // Reorder simple arrays trivially
01053                                         m_links[batchAddressInTarget + linkIndex] = m_links_Backup[originalLinkAddress];
01054                                         m_linkStrength[batchAddressInTarget + linkIndex] = m_linkStrength_Backup[originalLinkAddress];
01055                                         m_linksMassLSC[batchAddressInTarget + linkIndex] = m_linksMassLSC_Backup[originalLinkAddress];
01056                                         m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = m_linksRestLengthSquared_Backup[originalLinkAddress];
01057                                         m_linksRestLength[batchAddressInTarget + linkIndex] = m_linksRestLength_Backup[originalLinkAddress];
01058                                         m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = m_linksMaterialLinearStiffnessCoefficient_Backup[originalLinkAddress];
01059 
01060                                         // The local address is more complicated. We need to work out where a given vertex will end up
01061                                         // by searching the set of vertices for this link and using the index as the local address
01062                                         btSoftBodyLinkData::LinkNodePair localPair;
01063                                         btSoftBodyLinkData::LinkNodePair globalPair = m_links[batchAddressInTarget + linkIndex];
01064                                         localPair.vertex0 = wavefrontVertices.findLinearSearch( globalPair.vertex0 );
01065                                         localPair.vertex1 = wavefrontVertices.findLinearSearch( globalPair.vertex1 );
01066                                         m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair;
01067                                 }
01068                                 for( int linkIndex = wavefrontBatchSize; linkIndex < m_wavefrontSize; ++linkIndex )
01069                                 {
01070                                         // Put 0s into these arrays for padding for cleanliness
01071                                         m_links[batchAddressInTarget + linkIndex] = btSoftBodyLinkData::LinkNodePair(0, 0);
01072                                         m_linkStrength[batchAddressInTarget + linkIndex] = 0.f;
01073                                         m_linksMassLSC[batchAddressInTarget + linkIndex] = 0.f;
01074                                         m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = 0.f;
01075                                         m_linksRestLength[batchAddressInTarget + linkIndex] = 0.f;
01076                                         m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = 0.f;
01077 
01078 
01079                                         // For local addresses of junk data choose a set of addresses just above the range of valid ones 
01080                                         // and cycling tyhrough % 16 so that we don't have bank conficts between all dud addresses
01081                                         // The valid addresses will do scatter and gather in the valid range, the junk ones should happily work
01082                                         // off the end of that range so we need no control
01083                                         btSoftBodyLinkData::LinkNodePair localPair;
01084                                         localPair.vertex0 = verticesUsedByWavefront + (linkIndex % 16);
01085                                         localPair.vertex1 = verticesUsedByWavefront + (linkIndex % 16);
01086                                         m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair;
01087                                 }
01088 
01089                         }
01090 
01091                         
01092                         wavefrontCount++;
01093                 }
01094 
01095         
01096         }
01097 
01098 } // void btSoftBodyLinkDataDX11SIMDAware::generateBatches()
01099 
01100 
01101