btSoftBodySolver_DX11SIMDAware.cpp

Go to the documentation of this file.
00001 /*
00002 Bullet Continuous Collision Detection and Physics Library
00003 Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
00004 
00005 This software is provided 'as-is', without any express or implied warranty.
00006 In no event will the authors be held liable for any damages arising from the use of this software.
00007 Permission is granted to anyone to use this software for any purpose, 
00008 including commercial applications, and to alter it and redistribute it freely, 
00009 subject to the following restrictions:
00010 
00011 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
00012 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
00013 3. This notice may not be removed or altered from any source distribution.
00014 */
00015 
00016 #include <cstdio>
00017 
00018 
00019 #define WAVEFRONT_SIZE 32
00020 #define WAVEFRONT_BLOCK_MULTIPLIER 2
00021 #define GROUP_SIZE (WAVEFRONT_SIZE*WAVEFRONT_BLOCK_MULTIPLIER)
00022 #define LINKS_PER_SIMD_LANE 16
00023 
00024 #define STRINGIFY( S ) STRINGIFY2( S )
00025 #define STRINGIFY2( S ) #S
00026 
00027 #include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
00028 #include "vectormath/vmInclude.h"
00029 
00030 #include "btSoftBodySolverLinkData_DX11SIMDAware.h"
00031 #include "btSoftBodySolver_DX11SIMDAware.h"
00032 #include "btSoftBodySolverVertexBuffer_DX11.h"
00033 #include "BulletSoftBody/btSoftBody.h"
00034 #include "BulletCollision/CollisionShapes/btCapsuleShape.h"
00035 
00036 #define MSTRINGIFY(A) #A
00037 static char* UpdatePositionsFromVelocitiesHLSLString = 
00038 #include "HLSL/UpdatePositionsFromVelocities.hlsl"
00039 static char* SolvePositionsSIMDBatchedHLSLString = 
00040 #include "HLSL/SolvePositionsSIMDBatched.hlsl"
00041 static char* UpdateNodesHLSLString = 
00042 #include "HLSL/UpdateNodes.hlsl"
00043 static char* UpdatePositionsHLSLString = 
00044 #include "HLSL/UpdatePositions.hlsl"
00045 static char* UpdateConstantsHLSLString = 
00046 #include "HLSL/UpdateConstants.hlsl"
00047 static char* IntegrateHLSLString = 
00048 #include "HLSL/Integrate.hlsl"
00049 static char* ApplyForcesHLSLString = 
00050 #include "HLSL/ApplyForces.hlsl"
00051 static char* UpdateNormalsHLSLString = 
00052 #include "HLSL/UpdateNormals.hlsl"
00053 static char* OutputToVertexArrayHLSLString = 
00054 #include "HLSL/OutputToVertexArray.hlsl"
00055 static char* VSolveLinksHLSLString = 
00056 #include "HLSL/VSolveLinks.hlsl"
00057 static char* ComputeBoundsHLSLString = 
00058 #include "HLSL/ComputeBounds.hlsl"
00059 static char* SolveCollisionsAndUpdateVelocitiesHLSLString =
00060 #include "HLSL/solveCollisionsAndUpdateVelocitiesSIMDBatched.hlsl"
00061 
00062 
00063 
00064 btSoftBodyLinkDataDX11SIMDAware::btSoftBodyLinkDataDX11SIMDAware( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext ) : 
00065                 m_d3dDevice( d3dDevice ),
00066                 m_d3dDeviceContext( d3dDeviceContext ),
00067                 m_wavefrontSize( WAVEFRONT_SIZE ),
00068                 m_linksPerWorkItem( LINKS_PER_SIMD_LANE ),
00069                 m_maxBatchesWithinWave( 0 ),
00070                 m_maxLinksPerWavefront( m_wavefrontSize * m_linksPerWorkItem ),
00071                 m_numWavefronts( 0 ),
00072                 m_maxVertex( 0 ),
00073                 m_dx11NumBatchesAndVerticesWithinWaves( d3dDevice, d3dDeviceContext, &m_numBatchesAndVerticesWithinWaves, true ),
00074                 m_dx11WavefrontVerticesGlobalAddresses( d3dDevice, d3dDeviceContext, &m_wavefrontVerticesGlobalAddresses, true ),
00075                 m_dx11LinkVerticesLocalAddresses( d3dDevice, d3dDeviceContext, &m_linkVerticesLocalAddresses, true ),
00076                 m_dx11LinkStrength( d3dDevice, d3dDeviceContext, &m_linkStrength, true ),
00077                 m_dx11LinksMassLSC( d3dDevice, d3dDeviceContext, &m_linksMassLSC, true ),
00078                 m_dx11LinksRestLengthSquared( d3dDevice, d3dDeviceContext, &m_linksRestLengthSquared, true ),
00079                 m_dx11LinksRestLength( d3dDevice, d3dDeviceContext, &m_linksRestLength, true ),
00080                 m_dx11LinksMaterialLinearStiffnessCoefficient( d3dDevice, d3dDeviceContext, &m_linksMaterialLinearStiffnessCoefficient, true )
00081 {
00082         m_d3dDevice = d3dDevice;
00083         m_d3dDeviceContext = d3dDeviceContext;
00084 }
00085 
00086 btSoftBodyLinkDataDX11SIMDAware::~btSoftBodyLinkDataDX11SIMDAware()
00087 {
00088 }
00089 
00090 static Vectormath::Aos::Vector3 toVector3( const btVector3 &vec )
00091 {
00092         Vectormath::Aos::Vector3 outVec( vec.getX(), vec.getY(), vec.getZ() );
00093         return outVec;
00094 }
00095 
00096 void btSoftBodyLinkDataDX11SIMDAware::createLinks( int numLinks )
00097 {
00098         int previousSize = m_links.size();
00099         int newSize = previousSize + numLinks;
00100 
00101         btSoftBodyLinkData::createLinks( numLinks );
00102 
00103         // Resize the link addresses array as well
00104         m_linkAddresses.resize( newSize );
00105 }
00106 
00107 void btSoftBodyLinkDataDX11SIMDAware::setLinkAt( const btSoftBodyLinkData::LinkDescription &link, int linkIndex )
00108 {
00109         btSoftBodyLinkData::setLinkAt( link, linkIndex );
00110 
00111         if( link.getVertex0() > m_maxVertex )
00112                 m_maxVertex = link.getVertex0();
00113         if( link.getVertex1() > m_maxVertex )
00114                 m_maxVertex = link.getVertex1();
00115 
00116         // Set the link index correctly for initialisation
00117         m_linkAddresses[linkIndex] = linkIndex;
00118 }
00119 
00120 bool btSoftBodyLinkDataDX11SIMDAware::onAccelerator()
00121 {
00122         return m_onGPU;
00123 }
00124 
00125 bool btSoftBodyLinkDataDX11SIMDAware::moveToAccelerator()
00126 {
00127         bool success = true;
00128 
00129         success = success && m_dx11NumBatchesAndVerticesWithinWaves.moveToGPU();
00130         success = success && m_dx11WavefrontVerticesGlobalAddresses.moveToGPU();
00131         success = success && m_dx11LinkVerticesLocalAddresses.moveToGPU();
00132         success = success && m_dx11LinkStrength.moveToGPU();
00133         success = success && m_dx11LinksMassLSC.moveToGPU();
00134         success = success && m_dx11LinksRestLengthSquared.moveToGPU();
00135         success = success && m_dx11LinksRestLength.moveToGPU();
00136         success = success && m_dx11LinksMaterialLinearStiffnessCoefficient.moveToGPU();
00137 
00138         if( success )
00139                 m_onGPU = true;
00140 
00141         return success;
00142 }
00143 
00144 bool btSoftBodyLinkDataDX11SIMDAware::moveFromAccelerator()
00145 {
00146         bool success = true;
00147         success = success && m_dx11NumBatchesAndVerticesWithinWaves.moveFromGPU();
00148         success = success && m_dx11WavefrontVerticesGlobalAddresses.moveFromGPU();
00149         success = success && m_dx11LinkVerticesLocalAddresses.moveFromGPU();
00150         success = success && m_dx11LinkStrength.moveFromGPU();
00151         success = success && m_dx11LinksMassLSC.moveFromGPU();
00152         success = success && m_dx11LinksRestLengthSquared.moveFromGPU();
00153         success = success && m_dx11LinksRestLength.moveFromGPU();
00154         success = success && m_dx11LinksMaterialLinearStiffnessCoefficient.moveFromGPU();
00155 
00156         if( success )
00157                 m_onGPU = false;
00158 
00159         return success;
00160 }
00161 
00162 
00163 
00164 
00165 
00166 
00167 
00168 
00169 
00170 
00171 
00172 
00173 
00174 
00175 
00176 btDX11SIMDAwareSoftBodySolver::btDX11SIMDAwareSoftBodySolver(ID3D11Device * dx11Device, ID3D11DeviceContext* dx11Context, DXFunctions::CompileFromMemoryFunc dx11CompileFromMemory) :
00177         btDX11SoftBodySolver( dx11Device, dx11Context, dx11CompileFromMemory ),
00178         m_linkData(m_dx11Device, m_dx11Context)
00179 {
00180         // Initial we will clearly need to update solver constants
00181         // For now this is global for the cloths linked with this solver - we should probably make this body specific 
00182         // for performance in future once we understand more clearly when constants need to be updated
00183         m_updateSolverConstants = true;
00184 
00185         m_shadersInitialized = false;
00186 }
00187 
00188 btDX11SIMDAwareSoftBodySolver::~btDX11SIMDAwareSoftBodySolver()
00189 {
00190         releaseKernels();
00191 }
00192 
00193 
00194 btSoftBodyLinkData &btDX11SIMDAwareSoftBodySolver::getLinkData()
00195 {
00196         // TODO: Consider setting link data to "changed" here
00197         return m_linkData;
00198 }
00199 
00200 
00201 
00202 void btDX11SIMDAwareSoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate)
00203 {
00204         if(forceUpdate || m_softBodySet.size() != softBodies.size() )
00205         {
00206                 // Have a change in the soft body set so update, reloading all the data
00207                 getVertexData().clear();
00208                 getTriangleData().clear();
00209                 getLinkData().clear();
00210                 m_softBodySet.resize(0);
00211 
00212 
00213                 for( int softBodyIndex = 0; softBodyIndex < softBodies.size(); ++softBodyIndex )
00214                 {
00215                         btSoftBody *softBody = softBodies[ softBodyIndex ];
00216                         using Vectormath::Aos::Matrix3;
00217                         using Vectormath::Aos::Point3;
00218 
00219                         // Create SoftBody that will store the information within the solver
00220                         btAcceleratedSoftBodyInterface *newSoftBody = new btAcceleratedSoftBodyInterface( softBody );
00221                         m_softBodySet.push_back( newSoftBody );
00222 
00223                         m_perClothAcceleration.push_back( toVector3(softBody->getWorldInfo()->m_gravity) );
00224                         m_perClothDampingFactor.push_back(softBody->m_cfg.kDP);
00225                         m_perClothVelocityCorrectionCoefficient.push_back( softBody->m_cfg.kVCF );
00226                         m_perClothLiftFactor.push_back( softBody->m_cfg.kLF );
00227                         m_perClothDragFactor.push_back( softBody->m_cfg.kDG );
00228                         m_perClothMediumDensity.push_back(softBody->getWorldInfo()->air_density);
00229                         // Simple init values. Actually we'll put 0 and -1 into them at the appropriate time
00230                         m_perClothMinBounds.push_back( UIntVector3( 0, 0, 0 ) );
00231                         m_perClothMaxBounds.push_back( UIntVector3( UINT_MAX, UINT_MAX, UINT_MAX ) );
00232                         m_perClothFriction.push_back( softBody->getFriction() );
00233                         m_perClothCollisionObjects.push_back( CollisionObjectIndices(-1, -1) );
00234 
00235                         // Add space for new vertices and triangles in the default solver for now
00236                         // TODO: Include space here for tearing too later
00237                         int firstVertex = getVertexData().getNumVertices();
00238                         int numVertices = softBody->m_nodes.size();
00239                         // Round maxVertices to a multiple of the workgroup size so we know we're safe to run over in a given group
00240                         // maxVertices can be increased to allow tearing, but should be used sparingly because these extra verts will always be processed
00241                         int maxVertices = GROUP_SIZE*((numVertices+GROUP_SIZE)/GROUP_SIZE);
00242                         // Allocate space for new vertices in all the vertex arrays
00243                         getVertexData().createVertices( numVertices, softBodyIndex, maxVertices );
00244 
00245                         int firstTriangle = getTriangleData().getNumTriangles();
00246                         int numTriangles = softBody->m_faces.size();
00247                         int maxTriangles = numTriangles;
00248                         getTriangleData().createTriangles( maxTriangles );
00249 
00250                         // Copy vertices from softbody into the solver
00251                         for( int vertex = 0; vertex < numVertices; ++vertex )
00252                         {
00253                                 Point3 multPoint(softBody->m_nodes[vertex].m_x.getX(), softBody->m_nodes[vertex].m_x.getY(), softBody->m_nodes[vertex].m_x.getZ());
00254                                 btSoftBodyVertexData::VertexDescription desc;
00255 
00256                                 // TODO: Position in the softbody might be pre-transformed
00257                                 // or we may need to adapt for the pose.
00258                                 //desc.setPosition( cloth.getMeshTransform()*multPoint );
00259                                 desc.setPosition( multPoint );
00260 
00261                                 float vertexInverseMass = softBody->m_nodes[vertex].m_im;
00262                                 desc.setInverseMass(vertexInverseMass);
00263                                 getVertexData().setVertexAt( desc, firstVertex + vertex );
00264                         }
00265 
00266                         // Copy triangles similarly
00267                         // We're assuming here that vertex indices are based on the firstVertex rather than the entire scene
00268                         for( int triangle = 0; triangle < numTriangles; ++triangle )
00269                         {
00270                                 // Note that large array storage is relative to the array not to the cloth
00271                                 // So we need to add firstVertex to each value
00272                                 int vertexIndex0 = (softBody->m_faces[triangle].m_n[0] - &(softBody->m_nodes[0]));
00273                                 int vertexIndex1 = (softBody->m_faces[triangle].m_n[1] - &(softBody->m_nodes[0]));
00274                                 int vertexIndex2 = (softBody->m_faces[triangle].m_n[2] - &(softBody->m_nodes[0]));
00275                                 btSoftBodyTriangleData::TriangleDescription newTriangle(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, vertexIndex2 + firstVertex);
00276                                 getTriangleData().setTriangleAt( newTriangle, firstTriangle + triangle );
00277                                 
00278                                 // Increase vertex triangle counts for this triangle            
00279                                 getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex0)++;
00280                                 getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex1)++;
00281                                 getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex2)++;
00282                         }
00283 
00284                         int firstLink = getLinkData().getNumLinks();
00285                         int numLinks = softBody->m_links.size();
00286                         int maxLinks = numLinks;
00287                         
00288                         // Allocate space for the links
00289                         getLinkData().createLinks( numLinks );
00290 
00291                         // Add the links
00292                         for( int link = 0; link < numLinks; ++link )
00293                         {
00294                                 int vertexIndex0 = softBody->m_links[link].m_n[0] - &(softBody->m_nodes[0]);
00295                                 int vertexIndex1 = softBody->m_links[link].m_n[1] - &(softBody->m_nodes[0]);
00296 
00297                                 btSoftBodyLinkData::LinkDescription newLink(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, softBody->m_links[link].m_material->m_kLST);
00298                                 newLink.setLinkStrength(1.f);
00299                                 getLinkData().setLinkAt(newLink, firstLink + link);
00300                         }
00301                         
00302                         newSoftBody->setFirstVertex( firstVertex );
00303                         newSoftBody->setFirstTriangle( firstTriangle );
00304                         newSoftBody->setNumVertices( numVertices );
00305                         newSoftBody->setMaxVertices( maxVertices );
00306                         newSoftBody->setNumTriangles( numTriangles );
00307                         newSoftBody->setMaxTriangles( maxTriangles );
00308                         newSoftBody->setFirstLink( firstLink );
00309                         newSoftBody->setNumLinks( numLinks );
00310                 }
00311 
00312 
00313 
00314                 updateConstants(0.f);
00315 
00316 
00317                 m_linkData.generateBatches();           
00318                 m_triangleData.generateBatches();
00319 
00320                 
00321                 // Build the shaders to match the batching parameters
00322                 buildShaders();
00323         }
00324 
00325 }
00326 
00327 
00328 
00329 void btDX11SIMDAwareSoftBodySolver::solveConstraints( float solverdt )
00330 {
00331 
00332         //std::cerr << "'GPU' solve constraints\n";
00333         using Vectormath::Aos::Vector3;
00334         using Vectormath::Aos::Point3;
00335         using Vectormath::Aos::lengthSqr;
00336         using Vectormath::Aos::dot;
00337 
00338         // Prepare links
00339         int numLinks = m_linkData.getNumLinks();
00340         int numVertices = m_vertexData.getNumVertices();
00341 
00342         float kst = 1.f;
00343         float ti = 0.f;
00344 
00345 
00346         m_dx11PerClothDampingFactor.moveToGPU();
00347         m_dx11PerClothVelocityCorrectionCoefficient.moveToGPU();
00348 
00349         
00350 
00351         // Ensure data is on accelerator
00352         m_linkData.moveToAccelerator();
00353         m_vertexData.moveToAccelerator();
00354 
00355 
00356         
00357         prepareCollisionConstraints();
00358 
00359 
00360         // Solve drift
00361         for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
00362         {
00363 
00364                 for( int i = 0; i < m_linkData.m_wavefrontBatchStartLengths.size(); ++i )
00365                 {
00366                         int startWave = m_linkData.m_wavefrontBatchStartLengths[i].start;
00367                         int numWaves = m_linkData.m_wavefrontBatchStartLengths[i].length;
00368 
00369                         solveLinksForPosition( startWave, numWaves, kst, ti );
00370                 }       
00371 
00372         } // for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
00373 
00374 
00375 
00376         
00377         // At this point assume that the force array is blank - we will overwrite it
00378         solveCollisionsAndUpdateVelocities( 1.f/solverdt );
00379 
00380 } // btDX11SIMDAwareSoftBodySolver::solveConstraints
00381 
00382 
00383 void btDX11SIMDAwareSoftBodySolver::updateConstants( float timeStep )
00384 {
00385         using namespace Vectormath::Aos;
00386 
00387         if( m_updateSolverConstants )
00388         {
00389                 m_updateSolverConstants = false;
00390 
00391                 // Will have to redo this if we change the structure (tear, maybe) or various other possible changes
00392 
00393                 // Initialise link constants
00394                 const int numLinks = m_linkData.getNumLinks();
00395                 for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex )
00396                 {
00397                         btSoftBodyLinkData::LinkNodePair &vertices( m_linkData.getVertexPair(linkIndex) );
00398                         m_linkData.getRestLength(linkIndex) = length((m_vertexData.getPosition( vertices.vertex0 ) - m_vertexData.getPosition( vertices.vertex1 )));
00399                         float invMass0 = m_vertexData.getInverseMass(vertices.vertex0);
00400                         float invMass1 = m_vertexData.getInverseMass(vertices.vertex1);
00401                         float linearStiffness = m_linkData.getLinearStiffnessCoefficient(linkIndex);
00402                         float massLSC = (invMass0 + invMass1)/linearStiffness;
00403                         m_linkData.getMassLSC(linkIndex) = massLSC;
00404                         float restLength = m_linkData.getRestLength(linkIndex);
00405                         float restLengthSquared = restLength*restLength;
00406                         m_linkData.getRestLengthSquared(linkIndex) = restLengthSquared;
00407                 }
00408         }
00409 } // btDX11SIMDAwareSoftBodySolver::updateConstants
00410 
00412 // Kernel dispatches
00413 
00414 
00415 void btDX11SIMDAwareSoftBodySolver::solveLinksForPosition( int startWave, int numWaves, float kst, float ti )
00416 {
00417 
00418 
00419         m_vertexData.moveToAccelerator();
00420         m_linkData.moveToAccelerator();
00421 
00422         // Copy kernel parameters to GPU
00423         SolvePositionsFromLinksKernelCB constBuffer;
00424 
00425         // Set the first wave of the batch and the number of waves
00426         constBuffer.startWave = startWave;
00427         constBuffer.numWaves = numWaves;
00428 
00429         constBuffer.kst = kst;
00430         constBuffer.ti = ti;
00431         
00432         D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
00433         m_dx11Context->Map( solvePositionsFromLinksKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
00434         memcpy( MappedResource.pData, &constBuffer, sizeof(SolvePositionsFromLinksKernelCB) );  
00435         m_dx11Context->Unmap( solvePositionsFromLinksKernel.constBuffer, 0 );
00436         m_dx11Context->CSSetConstantBuffers( 0, 1, &solvePositionsFromLinksKernel.constBuffer );
00437 
00438         // Set resources and dispatch
00439         m_dx11Context->CSSetShaderResources( 0, 1, &(m_linkData.m_dx11NumBatchesAndVerticesWithinWaves.getSRV()) );
00440         m_dx11Context->CSSetShaderResources( 1, 1, &(m_linkData.m_dx11WavefrontVerticesGlobalAddresses.getSRV()) );
00441         m_dx11Context->CSSetShaderResources( 2, 1, &(m_vertexData.m_dx11VertexInverseMass.getSRV()) );
00442         m_dx11Context->CSSetShaderResources( 3, 1, &(m_linkData.m_dx11LinkVerticesLocalAddresses.getSRV()) );
00443         m_dx11Context->CSSetShaderResources( 4, 1, &(m_linkData.m_dx11LinksMassLSC.getSRV()) );
00444         m_dx11Context->CSSetShaderResources( 5, 1, &(m_linkData.m_dx11LinksRestLengthSquared.getSRV()) );
00445         
00446         m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexPosition.getUAV()), NULL );
00447 
00448         // Execute the kernel
00449         m_dx11Context->CSSetShader( solvePositionsFromLinksKernel.kernel, NULL, 0 );
00450 
00451         int     numBlocks = ((constBuffer.numWaves + WAVEFRONT_BLOCK_MULTIPLIER - 1) / WAVEFRONT_BLOCK_MULTIPLIER );
00452         m_dx11Context->Dispatch(numBlocks , 1, 1 );
00453 
00454         {
00455                 // Tidy up 
00456                 ID3D11ShaderResourceView* pViewNULL = NULL;
00457                 m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL );
00458                 m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL );
00459                 m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL );
00460                 m_dx11Context->CSSetShaderResources( 3, 1, &pViewNULL );
00461                 m_dx11Context->CSSetShaderResources( 4, 1, &pViewNULL );
00462                 m_dx11Context->CSSetShaderResources( 5, 1, &pViewNULL );
00463 
00464                 ID3D11UnorderedAccessView* pUAViewNULL = NULL;
00465                 m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
00466 
00467                 ID3D11Buffer *pBufferNull = NULL;
00468                 m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
00469         }       
00470 } // btDX11SIMDAwareSoftBodySolver::solveLinksForPosition
00471 
00472 
00473 
00474 // End kernel dispatches
00476 
00477 
00478 
00479 
00480 
00481 
00482 
00483 
00484 
00485 bool btDX11SIMDAwareSoftBodySolver::buildShaders()
00486 {
00487         // Ensure current kernels are released first
00488         releaseKernels();
00489 
00490         bool returnVal = true;
00491 
00492 
00493         if( m_shadersInitialized )
00494                 return true;
00495 
00496         
00497         updatePositionsFromVelocitiesKernel = dxFunctions.compileComputeShaderFromString( UpdatePositionsFromVelocitiesHLSLString, "UpdatePositionsFromVelocitiesKernel", sizeof(UpdatePositionsFromVelocitiesCB) );
00498         if( !updatePositionsFromVelocitiesKernel.constBuffer )
00499                 returnVal = false;
00500         
00501         char maxVerticesPerWavefront[20];
00502         char maxBatchesPerWavefront[20];
00503         char waveFrontSize[20];
00504         char waveFrontBlockMultiplier[20];
00505         char blockSize[20];
00506 
00507         sprintf(maxVerticesPerWavefront, "%d", m_linkData.getMaxVerticesPerWavefront());
00508         sprintf(maxBatchesPerWavefront, "%d", m_linkData.getMaxBatchesPerWavefront());
00509         sprintf(waveFrontSize, "%d", m_linkData.getWavefrontSize());    
00510         sprintf(waveFrontBlockMultiplier, "%d", WAVEFRONT_BLOCK_MULTIPLIER);
00511         sprintf(blockSize, "%d", WAVEFRONT_BLOCK_MULTIPLIER*m_linkData.getWavefrontSize());
00512         
00513         D3D10_SHADER_MACRO solvePositionsMacros[6] = { "MAX_NUM_VERTICES_PER_WAVE", maxVerticesPerWavefront, "MAX_BATCHES_PER_WAVE", maxBatchesPerWavefront, "WAVEFRONT_SIZE", waveFrontSize, "WAVEFRONT_BLOCK_MULTIPLIER", waveFrontBlockMultiplier, "BLOCK_SIZE", blockSize, 0, 0 };
00514 
00515         solvePositionsFromLinksKernel = dxFunctions.compileComputeShaderFromString( SolvePositionsSIMDBatchedHLSLString, "SolvePositionsFromLinksKernel", sizeof(SolvePositionsFromLinksKernelCB), solvePositionsMacros );
00516         if( !solvePositionsFromLinksKernel.constBuffer )
00517                 returnVal = false;
00518 
00519         updateVelocitiesFromPositionsWithVelocitiesKernel = dxFunctions.compileComputeShaderFromString( UpdateNodesHLSLString, "updateVelocitiesFromPositionsWithVelocitiesKernel", sizeof(UpdateVelocitiesFromPositionsWithVelocitiesCB) );
00520         if( !updateVelocitiesFromPositionsWithVelocitiesKernel.constBuffer )
00521                 returnVal = false;
00522         updateVelocitiesFromPositionsWithoutVelocitiesKernel = dxFunctions.compileComputeShaderFromString( UpdatePositionsHLSLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel", sizeof(UpdateVelocitiesFromPositionsWithoutVelocitiesCB));
00523         if( !updateVelocitiesFromPositionsWithoutVelocitiesKernel.constBuffer )
00524                 returnVal = false;
00525         integrateKernel = dxFunctions.compileComputeShaderFromString( IntegrateHLSLString, "IntegrateKernel", sizeof(IntegrateCB) );
00526         if( !integrateKernel.constBuffer )
00527                 returnVal = false;
00528         applyForcesKernel = dxFunctions.compileComputeShaderFromString( ApplyForcesHLSLString, "ApplyForcesKernel", sizeof(ApplyForcesCB) );
00529         if( !applyForcesKernel.constBuffer )
00530                 returnVal = false;
00531         solveCollisionsAndUpdateVelocitiesKernel = dxFunctions.compileComputeShaderFromString( SolveCollisionsAndUpdateVelocitiesHLSLString, "SolveCollisionsAndUpdateVelocitiesKernel", sizeof(SolveCollisionsAndUpdateVelocitiesCB) );
00532         if( !solveCollisionsAndUpdateVelocitiesKernel.constBuffer )
00533                 returnVal = false;
00534         resetNormalsAndAreasKernel = dxFunctions.compileComputeShaderFromString( UpdateNormalsHLSLString, "ResetNormalsAndAreasKernel", sizeof(UpdateSoftBodiesCB) );
00535         if( !resetNormalsAndAreasKernel.constBuffer )
00536                 returnVal = false;
00537         normalizeNormalsAndAreasKernel = dxFunctions.compileComputeShaderFromString( UpdateNormalsHLSLString, "NormalizeNormalsAndAreasKernel", sizeof(UpdateSoftBodiesCB) );
00538         if( !normalizeNormalsAndAreasKernel.constBuffer )
00539                 returnVal = false;
00540         updateSoftBodiesKernel = dxFunctions.compileComputeShaderFromString( UpdateNormalsHLSLString, "UpdateSoftBodiesKernel", sizeof(UpdateSoftBodiesCB) );
00541         if( !updateSoftBodiesKernel.constBuffer )
00542                 returnVal = false;
00543         
00544         computeBoundsKernel = dxFunctions.compileComputeShaderFromString( ComputeBoundsHLSLString, "ComputeBoundsKernel", sizeof(ComputeBoundsCB) );
00545         if( !computeBoundsKernel.constBuffer )
00546                 returnVal = false;
00547 
00548         if( returnVal )
00549                 m_shadersInitialized = true;
00550 
00551         return returnVal;
00552 } // btDX11SIMDAwareSoftBodySolver::buildShaders
00553 
00554 static Vectormath::Aos::Transform3 toTransform3( const btTransform &transform )
00555 {
00556         Vectormath::Aos::Transform3 outTransform;
00557         outTransform.setCol(0, toVector3(transform.getBasis().getColumn(0)));
00558         outTransform.setCol(1, toVector3(transform.getBasis().getColumn(1)));
00559         outTransform.setCol(2, toVector3(transform.getBasis().getColumn(2)));
00560         outTransform.setCol(3, toVector3(transform.getOrigin()));
00561         return outTransform;    
00562 }
00563 
00564 
00565 
00566 
00567 
00568 
00569 
00570 
00571 
00572 
00573 
00574 
00575 static void generateBatchesOfWavefronts( btAlignedObjectArray < btAlignedObjectArray <int> > &linksForWavefronts, btSoftBodyLinkData &linkData, int numVertices, btAlignedObjectArray < btAlignedObjectArray <int> > &wavefrontBatches )
00576 {
00577         // A per-batch map of truth values stating whether a given vertex is in that batch
00578         // This allows us to significantly optimize the batching
00579         btAlignedObjectArray <btAlignedObjectArray<bool> > mapOfVerticesInBatches;
00580 
00581         for( int waveIndex = 0; waveIndex < linksForWavefronts.size(); ++waveIndex )
00582         {
00583                 btAlignedObjectArray <int> &wavefront( linksForWavefronts[waveIndex] );
00584 
00585                 int batch = 0;
00586                 bool placed = false;
00587                 while( batch < wavefrontBatches.size() && !placed )
00588                 {
00589                         // Test the current batch, see if this wave shares any vertex with the waves in the batch
00590                         bool foundSharedVertex = false;
00591                         for( int link = 0; link < wavefront.size(); ++link )
00592                         {
00593                                 btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
00594                                 if( (mapOfVerticesInBatches[batch])[vertices.vertex0] || (mapOfVerticesInBatches[batch])[vertices.vertex1] )
00595                                 {
00596                                         foundSharedVertex = true;
00597                                 }
00598                         }
00599 
00600                         if( !foundSharedVertex )
00601                         {
00602                                 wavefrontBatches[batch].push_back( waveIndex ); 
00603                                 // Insert vertices into this batch too
00604                                 for( int link = 0; link < wavefront.size(); ++link )
00605                                 {
00606                                         btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
00607                                         (mapOfVerticesInBatches[batch])[vertices.vertex0] = true;
00608                                         (mapOfVerticesInBatches[batch])[vertices.vertex1] = true;
00609                                 }
00610                                 placed = true;
00611                         }
00612                         batch++;
00613                 }
00614                 if( batch == wavefrontBatches.size() && !placed )
00615                 {
00616                         wavefrontBatches.resize( batch + 1 );
00617                         wavefrontBatches[batch].push_back( waveIndex );
00618 
00619                         // And resize map as well
00620                         mapOfVerticesInBatches.resize( batch + 1 );
00621                         
00622                         // Resize maps with total number of vertices
00623                         mapOfVerticesInBatches[batch].resize( numVertices+1, false );
00624 
00625                         // Insert vertices into this batch too
00626                         for( int link = 0; link < wavefront.size(); ++link )
00627                         {
00628                                 btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
00629                                 (mapOfVerticesInBatches[batch])[vertices.vertex0] = true;
00630                                 (mapOfVerticesInBatches[batch])[vertices.vertex1] = true;
00631                         }
00632                 }
00633         }
00634         mapOfVerticesInBatches.clear();
00635 }
00636 
00637 // Function to remove an object from a vector maintaining correct ordering of the vector
00638 template< typename T > static void removeFromVector( btAlignedObjectArray< T > &vectorToUpdate, int indexToRemove )
00639 {
00640         int currentSize = vectorToUpdate.size();
00641         for( int i = indexToRemove; i < (currentSize-1); ++i )
00642         {
00643                 vectorToUpdate[i] = vectorToUpdate[i+1];
00644         }
00645         if( currentSize > 0 )
00646                 vectorToUpdate.resize( currentSize - 1 );
00647 }
00648 
00652 template< typename T > static void insertAtIndex( btAlignedObjectArray< T > &vectorToUpdate, int index, T element )
00653 {
00654         vectorToUpdate.resize( vectorToUpdate.size() + 1 );
00655         for( int i = (vectorToUpdate.size() - 1); i > index; --i )
00656         {
00657                 vectorToUpdate[i] = vectorToUpdate[i-1];
00658         }
00659         vectorToUpdate[index] = element;
00660 }
00661 
00666 template< typename T > static void insertUniqueAndOrderedIntoVector( btAlignedObjectArray<T> &vectorToUpdate, T element )
00667 {
00668         int index = 0;
00669         while( index < vectorToUpdate.size() && vectorToUpdate[index] < element )
00670         {
00671                 index++;
00672         }
00673         if( index == vectorToUpdate.size() || vectorToUpdate[index] != element )
00674                 insertAtIndex( vectorToUpdate, index, element );
00675 }
00676 
00677 static void generateLinksPerVertex( int numVertices, btSoftBodyLinkData &linkData, btAlignedObjectArray< int > &listOfLinksPerVertex, btAlignedObjectArray <int> &numLinksPerVertex, int &maxLinks )
00678 {
00679         for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
00680         {
00681                 btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
00682                 numLinksPerVertex[nodes.vertex0]++;
00683                 numLinksPerVertex[nodes.vertex1]++;
00684         }
00685         int maxLinksPerVertex = 0;
00686         for( int vertexIndex = 0; vertexIndex < numVertices; ++vertexIndex )
00687         {
00688                 maxLinksPerVertex = btMax(numLinksPerVertex[vertexIndex], maxLinksPerVertex);
00689         }
00690         maxLinks = maxLinksPerVertex;
00691 
00692         btAlignedObjectArray< int > linksFoundPerVertex;
00693         linksFoundPerVertex.resize( numVertices, 0 );
00694 
00695         listOfLinksPerVertex.resize( maxLinksPerVertex * numVertices );
00696 
00697         for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
00698         {
00699                 btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
00700                 {
00701                         // Do vertex 0
00702                         int vertexIndex = nodes.vertex0;
00703                         int linkForVertex = linksFoundPerVertex[nodes.vertex0];
00704                         int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex;
00705 
00706                         listOfLinksPerVertex[linkAddress] = linkIndex;
00707 
00708                         linksFoundPerVertex[nodes.vertex0] = linkForVertex + 1;
00709                 }
00710                 {
00711                         // Do vertex 1
00712                         int vertexIndex = nodes.vertex1;
00713                         int linkForVertex = linksFoundPerVertex[nodes.vertex1];
00714                         int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex;
00715 
00716                         listOfLinksPerVertex[linkAddress] = linkIndex;
00717 
00718                         linksFoundPerVertex[nodes.vertex1] = linkForVertex + 1;
00719                 }
00720         }
00721 }
00722 
00723 static void computeBatchingIntoWavefronts( 
00724         btSoftBodyLinkData &linkData, 
00725         int wavefrontSize, 
00726         int linksPerWorkItem, 
00727         int maxLinksPerWavefront, 
00728         btAlignedObjectArray < btAlignedObjectArray <int> > &linksForWavefronts, 
00729         btAlignedObjectArray< btAlignedObjectArray < btAlignedObjectArray <int> > > &batchesWithinWaves, /* wave, batch, links in batch */
00730         btAlignedObjectArray< btAlignedObjectArray< int > > &verticesForWavefronts /* wavefront, vertex */
00731         )
00732 {
00733         
00734 
00735         // Attempt generation of larger batches of links.
00736         btAlignedObjectArray< bool > processedLink;
00737         processedLink.resize( linkData.getNumLinks() );
00738         btAlignedObjectArray< int > listOfLinksPerVertex;
00739         int maxLinksPerVertex = 0;
00740 
00741         // Count num vertices
00742         int numVertices = 0;
00743         for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
00744         {
00745                 btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
00746                 numVertices = btMax( numVertices, nodes.vertex0 + 1 );
00747                 numVertices = btMax( numVertices, nodes.vertex1 + 1 );
00748         }
00749 
00750         // Need list of links per vertex
00751         // Compute valence of each vertex
00752         btAlignedObjectArray <int> numLinksPerVertex;
00753         numLinksPerVertex.resize(0);
00754         numLinksPerVertex.resize( numVertices, 0 );
00755 
00756         generateLinksPerVertex( numVertices, linkData, listOfLinksPerVertex, numLinksPerVertex, maxLinksPerVertex );
00757 
00758 
00759         // At this point we know what links we have for each vertex so we can start batching
00760         
00761         // We want a vertex to start with, let's go with 0
00762         int currentVertex = 0;
00763         int linksProcessed = 0;
00764 
00765         btAlignedObjectArray <int> verticesToProcess;
00766 
00767         while( linksProcessed < linkData.getNumLinks() )
00768         {
00769                 // Next wavefront
00770                 int nextWavefront = linksForWavefronts.size();
00771                 linksForWavefronts.resize( nextWavefront + 1 );
00772                 btAlignedObjectArray <int> &linksForWavefront(linksForWavefronts[nextWavefront]);
00773                 verticesForWavefronts.resize( nextWavefront + 1 );
00774                 btAlignedObjectArray<int> &vertexSet( verticesForWavefronts[nextWavefront] );
00775 
00776                 linksForWavefront.resize(0);
00777 
00778                 // Loop to find enough links to fill the wavefront
00779                 // Stopping if we either run out of links, or fill it
00780                 while( linksProcessed < linkData.getNumLinks() && linksForWavefront.size() < maxLinksPerWavefront )
00781                 {
00782                         // Go through the links for the current vertex
00783                         for( int link = 0; link < numLinksPerVertex[currentVertex] && linksForWavefront.size() < maxLinksPerWavefront; ++link )
00784                         {
00785                                 int linkAddress = currentVertex * maxLinksPerVertex + link;
00786                                 int linkIndex = listOfLinksPerVertex[linkAddress];
00787                                 
00788                                 // If we have not already processed this link, add it to the wavefront
00789                                 // Claim it as another processed link
00790                                 // Add the vertex at the far end to the list of vertices to process.
00791                                 if( !processedLink[linkIndex] )
00792                                 {
00793                                         linksForWavefront.push_back( linkIndex );
00794                                         linksProcessed++;
00795                                         processedLink[linkIndex] = true;
00796                                         int v0 = linkData.getVertexPair(linkIndex).vertex0;
00797                                         int v1 = linkData.getVertexPair(linkIndex).vertex1;
00798                                         if( v0 == currentVertex )
00799                                                 verticesToProcess.push_back( v1 );
00800                                         else
00801                                                 verticesToProcess.push_back( v0 );
00802                                 }
00803                         }
00804                         if( verticesToProcess.size() > 0 )
00805                         {
00806                                 // Get the element on the front of the queue and remove it
00807                                 currentVertex = verticesToProcess[0];
00808                                 removeFromVector( verticesToProcess, 0 );
00809                         } else {                
00810                                 // If we've not yet processed all the links, find the first unprocessed one
00811                                 // and select one of its vertices as the current vertex
00812                                 if( linksProcessed < linkData.getNumLinks() )
00813                                 {
00814                                         int searchLink = 0;
00815                                         while( processedLink[searchLink] )
00816                                                 searchLink++;
00817                                         currentVertex = linkData.getVertexPair(searchLink).vertex0;
00818                                 }       
00819                         }
00820                 }
00821 
00822                 // We have either finished or filled a wavefront
00823                 for( int link = 0; link < linksForWavefront.size(); ++link )
00824                 {
00825                         int v0 = linkData.getVertexPair( linksForWavefront[link] ).vertex0;
00826                         int v1 = linkData.getVertexPair( linksForWavefront[link] ).vertex1;
00827                         insertUniqueAndOrderedIntoVector( vertexSet, v0 );
00828                         insertUniqueAndOrderedIntoVector( vertexSet, v1 );
00829                 }
00830                 // Iterate over links mapped to the wave and batch those
00831                 // We can run a batch on each cycle trivially
00832                 
00833                 batchesWithinWaves.resize( batchesWithinWaves.size() + 1 );
00834                 btAlignedObjectArray < btAlignedObjectArray <int> > &batchesWithinWave( batchesWithinWaves[batchesWithinWaves.size()-1] );
00835                 
00836 
00837                 for( int link = 0; link < linksForWavefront.size(); ++link )
00838                 {
00839                         int linkIndex = linksForWavefront[link];
00840                         btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( linkIndex );
00841                         
00842                         int batch = 0;
00843                         bool placed = false;
00844                         while( batch < batchesWithinWave.size() && !placed )
00845                         {
00846                                 bool foundSharedVertex = false;
00847                                 if( batchesWithinWave[batch].size() >= wavefrontSize )
00848                                 {
00849                                         // If we have already filled this batch, move on to another
00850                                         foundSharedVertex = true;
00851                                 } else {
00852                                         for( int link2 = 0; link2 < batchesWithinWave[batch].size(); ++link2 )
00853                                         {
00854                                                 btSoftBodyLinkData::LinkNodePair vertices2 = linkData.getVertexPair( (batchesWithinWave[batch])[link2] );
00855 
00856                                                 if( vertices.vertex0 == vertices2.vertex0 ||
00857                                                         vertices.vertex1 == vertices2.vertex0 ||
00858                                                         vertices.vertex0 == vertices2.vertex1 ||
00859                                                         vertices.vertex1 == vertices2.vertex1 )
00860                                                 {
00861                                                         foundSharedVertex = true;
00862                                                         break;
00863                                                 }
00864                                         }
00865                                 }
00866                                 if( !foundSharedVertex )
00867                                 {
00868                                         batchesWithinWave[batch].push_back( linkIndex );
00869                                         placed = true;
00870                                 } else {
00871                                         ++batch;
00872                                 }
00873                         }
00874                         if( batch == batchesWithinWave.size() && !placed )
00875                         {
00876                                 batchesWithinWave.resize( batch + 1 );
00877                                 batchesWithinWave[batch].push_back( linkIndex );
00878                         }
00879                 }
00880                 
00881         }
00882 
00883 }
00884 
00885 void btSoftBodyLinkDataDX11SIMDAware::generateBatches()
00886 {
00887         btAlignedObjectArray < btAlignedObjectArray <int> > linksForWavefronts;
00888         btAlignedObjectArray < btAlignedObjectArray <int> > wavefrontBatches;
00889         btAlignedObjectArray< btAlignedObjectArray < btAlignedObjectArray <int> > > batchesWithinWaves;
00890         btAlignedObjectArray< btAlignedObjectArray< int > > verticesForWavefronts; // wavefronts, vertices in wavefront as an ordered set
00891 
00892         // Group the links into wavefronts
00893         computeBatchingIntoWavefronts( *this, m_wavefrontSize, m_linksPerWorkItem, m_maxLinksPerWavefront, linksForWavefronts, batchesWithinWaves, verticesForWavefronts );
00894 
00895 
00896         // Batch the wavefronts
00897         generateBatchesOfWavefronts( linksForWavefronts, *this, m_maxVertex, wavefrontBatches );
00898 
00899         m_numWavefronts = linksForWavefronts.size();
00900 
00901         // At this point we have a description of which links we need to process in each wavefront
00902 
00903         // First correctly fill the batch ranges vector
00904         int numBatches = wavefrontBatches.size();
00905         m_wavefrontBatchStartLengths.resize(0);
00906         int prefixSum = 0;
00907         for( int batchIndex = 0; batchIndex < numBatches; ++batchIndex )
00908         {
00909                 int wavesInBatch = wavefrontBatches[batchIndex].size();
00910                 int nextPrefixSum = prefixSum + wavesInBatch;
00911                 m_wavefrontBatchStartLengths.push_back( BatchPair( prefixSum, nextPrefixSum - prefixSum ) );
00912 
00913                 prefixSum += wavesInBatch;
00914         }
00915         
00916         // Also find max number of batches within a wave
00917         m_maxBatchesWithinWave = 0;
00918         m_maxVerticesWithinWave = 0;
00919         m_numBatchesAndVerticesWithinWaves.resize( m_numWavefronts );
00920         for( int waveIndex = 0; waveIndex < m_numWavefronts; ++waveIndex )
00921         {
00922                 // See if the number of batches in this wave is greater than the current maxium
00923                 int batchesInCurrentWave = batchesWithinWaves[waveIndex].size();
00924                 int verticesInCurrentWave = verticesForWavefronts[waveIndex].size();
00925                 m_maxBatchesWithinWave = btMax( batchesInCurrentWave, m_maxBatchesWithinWave );
00926                 m_maxVerticesWithinWave = btMax( verticesInCurrentWave, m_maxVerticesWithinWave );
00927         }
00928         
00929         // Add padding values both for alignment and as dudd addresses within LDS to compute junk rather than branch around
00930         m_maxVerticesWithinWave = 16*((m_maxVerticesWithinWave/16)+2);
00931 
00932         // Now we know the maximum number of vertices per-wave we can resize the global vertices array
00933         m_wavefrontVerticesGlobalAddresses.resize( m_maxVerticesWithinWave * m_numWavefronts );
00934 
00935         // Grab backup copies of all the link data arrays for the sorting process
00936         btAlignedObjectArray<btSoftBodyLinkData::LinkNodePair>                          m_links_Backup(m_links);
00937         btAlignedObjectArray<float>                                                                                     m_linkStrength_Backup(m_linkStrength);
00938         btAlignedObjectArray<float>                                                                                     m_linksMassLSC_Backup(m_linksMassLSC);
00939         btAlignedObjectArray<float>                                                                                     m_linksRestLengthSquared_Backup(m_linksRestLengthSquared);
00940         //btAlignedObjectArray<Vectormath::Aos::Vector3>                                                m_linksCLength_Backup(m_linksCLength);
00941         //btAlignedObjectArray<float>                                                                                   m_linksLengthRatio_Backup(m_linksLengthRatio);
00942         btAlignedObjectArray<float>                                                                                     m_linksRestLength_Backup(m_linksRestLength);
00943         btAlignedObjectArray<float>                                                                                     m_linksMaterialLinearStiffnessCoefficient_Backup(m_linksMaterialLinearStiffnessCoefficient);
00944 
00945         // Resize to a wavefront sized batch per batch per wave so we get perfectly coherent memory accesses.
00946         m_links.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
00947         m_linkVerticesLocalAddresses.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
00948         m_linkStrength.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
00949         m_linksMassLSC.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
00950         m_linksRestLengthSquared.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
00951         m_linksRestLength.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
00952         m_linksMaterialLinearStiffnessCoefficient.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts ); 
00953                 
00954         // Then re-order links into wavefront blocks
00955 
00956         // Total number of wavefronts moved. This will decide the ordering of sorted wavefronts.
00957         int wavefrontCount = 0;
00958 
00959         // Iterate over batches of wavefronts, then wavefronts in the batch
00960         for( int batchIndex = 0; batchIndex < numBatches; ++batchIndex )
00961         {
00962                 btAlignedObjectArray <int> &batch( wavefrontBatches[batchIndex] );
00963                 int wavefrontsInBatch = batch.size();
00964 
00965                 
00966                 for( int wavefrontIndex = 0; wavefrontIndex < wavefrontsInBatch; ++wavefrontIndex )
00967                 {       
00968 
00969                         int originalWavefrontIndex = batch[wavefrontIndex];
00970                         btAlignedObjectArray< int > &wavefrontVertices( verticesForWavefronts[originalWavefrontIndex] );
00971                         int verticesUsedByWavefront = wavefrontVertices.size();
00972 
00973                         // Copy the set of vertices into the correctly structured array for use on the device
00974                         // Fill the non-vertices with -1s
00975                         // so we can mask out those reads
00976                         for( int vertex = 0; vertex < verticesUsedByWavefront; ++vertex )
00977                         {
00978                                 m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = wavefrontVertices[vertex];
00979                         }
00980                         for( int vertex = verticesUsedByWavefront; vertex < m_maxVerticesWithinWave; ++vertex )
00981                         {
00982                                 m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = -1;
00983                         }
00984 
00985                         // Obtain the set of batches within the current wavefront
00986                         btAlignedObjectArray < btAlignedObjectArray <int> > &batchesWithinWavefront( batchesWithinWaves[originalWavefrontIndex] );
00987                         // Set the size of the batches for use in the solver, correctly ordered
00988                         NumBatchesVerticesPair batchesAndVertices;
00989                         batchesAndVertices.numBatches = batchesWithinWavefront.size();
00990                         batchesAndVertices.numVertices = verticesUsedByWavefront;
00991                         m_numBatchesAndVerticesWithinWaves[wavefrontCount] = batchesAndVertices;
00992                         
00993 
00994                         // Now iterate over batches within the wavefront to structure the links correctly
00995                         for( int wavefrontBatch = 0; wavefrontBatch < batchesWithinWavefront.size(); ++wavefrontBatch )
00996                         {
00997                                 btAlignedObjectArray <int> &linksInBatch( batchesWithinWavefront[wavefrontBatch] );
00998                                 int wavefrontBatchSize = linksInBatch.size();
00999 
01000                                 int batchAddressInTarget = m_maxBatchesWithinWave * m_wavefrontSize * wavefrontCount + m_wavefrontSize * wavefrontBatch;
01001 
01002                                 for( int linkIndex = 0; linkIndex < wavefrontBatchSize; ++linkIndex )
01003                                 {
01004                                         int originalLinkAddress = linksInBatch[linkIndex];
01005                                         // Reorder simple arrays trivially
01006                                         m_links[batchAddressInTarget + linkIndex] = m_links_Backup[originalLinkAddress];
01007                                         m_linkStrength[batchAddressInTarget + linkIndex] = m_linkStrength_Backup[originalLinkAddress];
01008                                         m_linksMassLSC[batchAddressInTarget + linkIndex] = m_linksMassLSC_Backup[originalLinkAddress];
01009                                         m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = m_linksRestLengthSquared_Backup[originalLinkAddress];
01010                                         m_linksRestLength[batchAddressInTarget + linkIndex] = m_linksRestLength_Backup[originalLinkAddress];
01011                                         m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = m_linksMaterialLinearStiffnessCoefficient_Backup[originalLinkAddress];
01012 
01013                                         // The local address is more complicated. We need to work out where a given vertex will end up
01014                                         // by searching the set of vertices for this link and using the index as the local address
01015                                         btSoftBodyLinkData::LinkNodePair localPair;
01016                                         btSoftBodyLinkData::LinkNodePair globalPair = m_links[batchAddressInTarget + linkIndex];
01017                                         localPair.vertex0 = wavefrontVertices.findLinearSearch( globalPair.vertex0 );
01018                                         localPair.vertex1 = wavefrontVertices.findLinearSearch( globalPair.vertex1 );
01019                                         m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair;
01020                                 }
01021                                 for( int linkIndex = wavefrontBatchSize; linkIndex < m_wavefrontSize; ++linkIndex )
01022                                 {
01023                                         // Put 0s into these arrays for padding for cleanliness
01024                                         m_links[batchAddressInTarget + linkIndex] = btSoftBodyLinkData::LinkNodePair(0, 0);
01025                                         m_linkStrength[batchAddressInTarget + linkIndex] = 0.f;
01026                                         m_linksMassLSC[batchAddressInTarget + linkIndex] = 0.f;
01027                                         m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = 0.f;
01028                                         m_linksRestLength[batchAddressInTarget + linkIndex] = 0.f;
01029                                         m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = 0.f;
01030 
01031 
01032                                         // For local addresses of junk data choose a set of addresses just above the range of valid ones 
01033                                         // and cycling tyhrough % 16 so that we don't have bank conficts between all dud addresses
01034                                         // The valid addresses will do scatter and gather in the valid range, the junk ones should happily work
01035                                         // off the end of that range so we need no control
01036                                         btSoftBodyLinkData::LinkNodePair localPair;
01037                                         localPair.vertex0 = verticesUsedByWavefront + (linkIndex % 16);
01038                                         localPair.vertex1 = verticesUsedByWavefront + (linkIndex % 16);
01039                                         m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair;
01040                                 }
01041 
01042                         }
01043 
01044                         
01045                         wavefrontCount++;
01046                 }
01047 
01048         
01049         }
01050 
01051 } // void btSoftBodyLinkDataDX11SIMDAware::generateBatches()