00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #ifndef BT_MATRIX3x3_H
00017 #define BT_MATRIX3x3_H
00018
00019 #include "btVector3.h"
00020 #include "btQuaternion.h"
00021 #include <stdio.h>
00022
00023 #ifdef BT_USE_SSE
00024
00025 const __m128 ATTRIBUTE_ALIGNED16(vMPPP) = {-0.0f, +0.0f, +0.0f, +0.0f};
00026 #endif
00027
00028 #if defined(BT_USE_SSE) || defined(BT_USE_NEON)
00029 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v1000) = {1.0f, 0.0f, 0.0f, 0.0f};
00030 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0100) = {0.0f, 1.0f, 0.0f, 0.0f};
00031 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0010) = {0.0f, 0.0f, 1.0f, 0.0f};
00032 #endif
00033
00034 #ifdef BT_USE_DOUBLE_PRECISION
00035 #define btMatrix3x3Data btMatrix3x3DoubleData
00036 #else
00037 #define btMatrix3x3Data btMatrix3x3FloatData
00038 #endif //BT_USE_DOUBLE_PRECISION
00039
00040
00043 ATTRIBUTE_ALIGNED16(class) btMatrix3x3 {
00044
00046 btVector3 m_el[3];
00047
00048 public:
00050 btMatrix3x3 () {}
00051
00052
00053
00055 explicit btMatrix3x3(const btQuaternion& q) { setRotation(q); }
00056
00057
00058
00059
00060
00061
00062
00064 btMatrix3x3(const btScalar& xx, const btScalar& xy, const btScalar& xz,
00065 const btScalar& yx, const btScalar& yy, const btScalar& yz,
00066 const btScalar& zx, const btScalar& zy, const btScalar& zz)
00067 {
00068 setValue(xx, xy, xz,
00069 yx, yy, yz,
00070 zx, zy, zz);
00071 }
00072
00073 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
00074 SIMD_FORCE_INLINE btMatrix3x3 (const btSimdFloat4 v0, const btSimdFloat4 v1, const btSimdFloat4 v2 )
00075 {
00076 m_el[0].mVec128 = v0;
00077 m_el[1].mVec128 = v1;
00078 m_el[2].mVec128 = v2;
00079 }
00080
00081 SIMD_FORCE_INLINE btMatrix3x3 (const btVector3& v0, const btVector3& v1, const btVector3& v2 )
00082 {
00083 m_el[0] = v0;
00084 m_el[1] = v1;
00085 m_el[2] = v2;
00086 }
00087
00088
00089 SIMD_FORCE_INLINE btMatrix3x3(const btMatrix3x3& rhs)
00090 {
00091 m_el[0].mVec128 = rhs.m_el[0].mVec128;
00092 m_el[1].mVec128 = rhs.m_el[1].mVec128;
00093 m_el[2].mVec128 = rhs.m_el[2].mVec128;
00094 }
00095
00096
00097 SIMD_FORCE_INLINE btMatrix3x3& operator=(const btMatrix3x3& m)
00098 {
00099 m_el[0].mVec128 = m.m_el[0].mVec128;
00100 m_el[1].mVec128 = m.m_el[1].mVec128;
00101 m_el[2].mVec128 = m.m_el[2].mVec128;
00102
00103 return *this;
00104 }
00105
00106 #else
00107
00109 SIMD_FORCE_INLINE btMatrix3x3 (const btMatrix3x3& other)
00110 {
00111 m_el[0] = other.m_el[0];
00112 m_el[1] = other.m_el[1];
00113 m_el[2] = other.m_el[2];
00114 }
00115
00117 SIMD_FORCE_INLINE btMatrix3x3& operator=(const btMatrix3x3& other)
00118 {
00119 m_el[0] = other.m_el[0];
00120 m_el[1] = other.m_el[1];
00121 m_el[2] = other.m_el[2];
00122 return *this;
00123 }
00124
00125 #endif
00126
00129 SIMD_FORCE_INLINE btVector3 getColumn(int i) const
00130 {
00131 return btVector3(m_el[0][i],m_el[1][i],m_el[2][i]);
00132 }
00133
00134
00137 SIMD_FORCE_INLINE const btVector3& getRow(int i) const
00138 {
00139 btFullAssert(0 <= i && i < 3);
00140 return m_el[i];
00141 }
00142
00145 SIMD_FORCE_INLINE btVector3& operator[](int i)
00146 {
00147 btFullAssert(0 <= i && i < 3);
00148 return m_el[i];
00149 }
00150
00153 SIMD_FORCE_INLINE const btVector3& operator[](int i) const
00154 {
00155 btFullAssert(0 <= i && i < 3);
00156 return m_el[i];
00157 }
00158
00162 btMatrix3x3& operator*=(const btMatrix3x3& m);
00163
00167 btMatrix3x3& operator+=(const btMatrix3x3& m);
00168
00172 btMatrix3x3& operator-=(const btMatrix3x3& m);
00173
00176 void setFromOpenGLSubMatrix(const btScalar *m)
00177 {
00178 m_el[0].setValue(m[0],m[4],m[8]);
00179 m_el[1].setValue(m[1],m[5],m[9]);
00180 m_el[2].setValue(m[2],m[6],m[10]);
00181
00182 }
00193 void setValue(const btScalar& xx, const btScalar& xy, const btScalar& xz,
00194 const btScalar& yx, const btScalar& yy, const btScalar& yz,
00195 const btScalar& zx, const btScalar& zy, const btScalar& zz)
00196 {
00197 m_el[0].setValue(xx,xy,xz);
00198 m_el[1].setValue(yx,yy,yz);
00199 m_el[2].setValue(zx,zy,zz);
00200 }
00201
00204 void setRotation(const btQuaternion& q)
00205 {
00206 btScalar d = q.length2();
00207 btFullAssert(d != btScalar(0.0));
00208 btScalar s = btScalar(2.0) / d;
00209
00210 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00211 __m128 vs, Q = q.get128();
00212 __m128i Qi = btCastfTo128i(Q);
00213 __m128 Y, Z;
00214 __m128 V1, V2, V3;
00215 __m128 V11, V21, V31;
00216 __m128 NQ = _mm_xor_ps(Q, btvMzeroMask);
00217 __m128i NQi = btCastfTo128i(NQ);
00218
00219 V1 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,2,3)));
00220 V2 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(0,0,1,3));
00221 V3 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(2,1,0,3)));
00222 V1 = _mm_xor_ps(V1, vMPPP);
00223
00224 V11 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,1,0,3)));
00225 V21 = _mm_unpackhi_ps(Q, Q);
00226 V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(0,2,0,3));
00227
00228 V2 = V2 * V1;
00229 V1 = V1 * V11;
00230 V3 = V3 * V31;
00231
00232 V11 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(2,3,1,3));
00233 V11 = V11 * V21;
00234 V21 = _mm_xor_ps(V21, vMPPP);
00235 V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(3,3,1,3));
00236 V31 = _mm_xor_ps(V31, vMPPP);
00237 Y = btCastiTo128f(_mm_shuffle_epi32 (NQi, BT_SHUFFLE(3,2,0,3)));
00238 Z = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,1,3)));
00239
00240 vs = _mm_load_ss(&s);
00241 V21 = V21 * Y;
00242 V31 = V31 * Z;
00243
00244 V1 = V1 + V11;
00245 V2 = V2 + V21;
00246 V3 = V3 + V31;
00247
00248 vs = bt_splat3_ps(vs, 0);
00249
00250 V1 = V1 * vs;
00251 V2 = V2 * vs;
00252 V3 = V3 * vs;
00253
00254 V1 = V1 + v1000;
00255 V2 = V2 + v0100;
00256 V3 = V3 + v0010;
00257
00258 m_el[0] = V1;
00259 m_el[1] = V2;
00260 m_el[2] = V3;
00261 #else
00262 btScalar xs = q.x() * s, ys = q.y() * s, zs = q.z() * s;
00263 btScalar wx = q.w() * xs, wy = q.w() * ys, wz = q.w() * zs;
00264 btScalar xx = q.x() * xs, xy = q.x() * ys, xz = q.x() * zs;
00265 btScalar yy = q.y() * ys, yz = q.y() * zs, zz = q.z() * zs;
00266 setValue(
00267 btScalar(1.0) - (yy + zz), xy - wz, xz + wy,
00268 xy + wz, btScalar(1.0) - (xx + zz), yz - wx,
00269 xz - wy, yz + wx, btScalar(1.0) - (xx + yy));
00270 #endif
00271 }
00272
00273
00279 void setEulerYPR(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
00280 {
00281 setEulerZYX(roll, pitch, yaw);
00282 }
00283
00293 void setEulerZYX(btScalar eulerX,btScalar eulerY,btScalar eulerZ) {
00295 btScalar ci ( btCos(eulerX));
00296 btScalar cj ( btCos(eulerY));
00297 btScalar ch ( btCos(eulerZ));
00298 btScalar si ( btSin(eulerX));
00299 btScalar sj ( btSin(eulerY));
00300 btScalar sh ( btSin(eulerZ));
00301 btScalar cc = ci * ch;
00302 btScalar cs = ci * sh;
00303 btScalar sc = si * ch;
00304 btScalar ss = si * sh;
00305
00306 setValue(cj * ch, sj * sc - cs, sj * cc + ss,
00307 cj * sh, sj * ss + cc, sj * cs - sc,
00308 -sj, cj * si, cj * ci);
00309 }
00310
00312 void setIdentity()
00313 {
00314 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON)
00315 m_el[0] = v1000;
00316 m_el[1] = v0100;
00317 m_el[2] = v0010;
00318 #else
00319 setValue(btScalar(1.0), btScalar(0.0), btScalar(0.0),
00320 btScalar(0.0), btScalar(1.0), btScalar(0.0),
00321 btScalar(0.0), btScalar(0.0), btScalar(1.0));
00322 #endif
00323 }
00324
00325 static const btMatrix3x3& getIdentity()
00326 {
00327 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON)
00328 static const btMatrix3x3
00329 identityMatrix(v1000, v0100, v0010);
00330 #else
00331 static const btMatrix3x3
00332 identityMatrix(
00333 btScalar(1.0), btScalar(0.0), btScalar(0.0),
00334 btScalar(0.0), btScalar(1.0), btScalar(0.0),
00335 btScalar(0.0), btScalar(0.0), btScalar(1.0));
00336 #endif
00337 return identityMatrix;
00338 }
00339
00342 void getOpenGLSubMatrix(btScalar *m) const
00343 {
00344 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00345 __m128 v0 = m_el[0].mVec128;
00346 __m128 v1 = m_el[1].mVec128;
00347 __m128 v2 = m_el[2].mVec128;
00348 __m128 *vm = (__m128 *)m;
00349 __m128 vT;
00350
00351 v2 = _mm_and_ps(v2, btvFFF0fMask);
00352
00353 vT = _mm_unpackhi_ps(v0, v1);
00354 v0 = _mm_unpacklo_ps(v0, v1);
00355
00356 v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) );
00357 v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) );
00358 v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT)));
00359
00360 vm[0] = v0;
00361 vm[1] = v1;
00362 vm[2] = v2;
00363 #elif defined(BT_USE_NEON)
00364
00365 static const uint32x2_t zMask = (const uint32x2_t) {-1, 0 };
00366 float32x4_t *vm = (float32x4_t *)m;
00367 float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 );
00368 float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) );
00369 float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
00370 float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
00371 float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
00372 float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q );
00373
00374 vm[0] = v0;
00375 vm[1] = v1;
00376 vm[2] = v2;
00377 #else
00378 m[0] = btScalar(m_el[0].x());
00379 m[1] = btScalar(m_el[1].x());
00380 m[2] = btScalar(m_el[2].x());
00381 m[3] = btScalar(0.0);
00382 m[4] = btScalar(m_el[0].y());
00383 m[5] = btScalar(m_el[1].y());
00384 m[6] = btScalar(m_el[2].y());
00385 m[7] = btScalar(0.0);
00386 m[8] = btScalar(m_el[0].z());
00387 m[9] = btScalar(m_el[1].z());
00388 m[10] = btScalar(m_el[2].z());
00389 m[11] = btScalar(0.0);
00390 #endif
00391 }
00392
00395 void getRotation(btQuaternion& q) const
00396 {
00397 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
00398 btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
00399 btScalar s, x;
00400
00401 union {
00402 btSimdFloat4 vec;
00403 btScalar f[4];
00404 } temp;
00405
00406 if (trace > btScalar(0.0))
00407 {
00408 x = trace + btScalar(1.0);
00409
00410 temp.f[0]=m_el[2].y() - m_el[1].z();
00411 temp.f[1]=m_el[0].z() - m_el[2].x();
00412 temp.f[2]=m_el[1].x() - m_el[0].y();
00413 temp.f[3]=x;
00414
00415 }
00416 else
00417 {
00418 int i, j, k;
00419 if(m_el[0].x() < m_el[1].y())
00420 {
00421 if( m_el[1].y() < m_el[2].z() )
00422 { i = 2; j = 0; k = 1; }
00423 else
00424 { i = 1; j = 2; k = 0; }
00425 }
00426 else
00427 {
00428 if( m_el[0].x() < m_el[2].z())
00429 { i = 2; j = 0; k = 1; }
00430 else
00431 { i = 0; j = 1; k = 2; }
00432 }
00433
00434 x = m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0);
00435
00436 temp.f[3] = (m_el[k][j] - m_el[j][k]);
00437 temp.f[j] = (m_el[j][i] + m_el[i][j]);
00438 temp.f[k] = (m_el[k][i] + m_el[i][k]);
00439 temp.f[i] = x;
00440
00441 }
00442
00443 s = btSqrt(x);
00444 q.set128(temp.vec);
00445 s = btScalar(0.5) / s;
00446
00447 q *= s;
00448 #else
00449 btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
00450
00451 btScalar temp[4];
00452
00453 if (trace > btScalar(0.0))
00454 {
00455 btScalar s = btSqrt(trace + btScalar(1.0));
00456 temp[3]=(s * btScalar(0.5));
00457 s = btScalar(0.5) / s;
00458
00459 temp[0]=((m_el[2].y() - m_el[1].z()) * s);
00460 temp[1]=((m_el[0].z() - m_el[2].x()) * s);
00461 temp[2]=((m_el[1].x() - m_el[0].y()) * s);
00462 }
00463 else
00464 {
00465 int i = m_el[0].x() < m_el[1].y() ?
00466 (m_el[1].y() < m_el[2].z() ? 2 : 1) :
00467 (m_el[0].x() < m_el[2].z() ? 2 : 0);
00468 int j = (i + 1) % 3;
00469 int k = (i + 2) % 3;
00470
00471 btScalar s = btSqrt(m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0));
00472 temp[i] = s * btScalar(0.5);
00473 s = btScalar(0.5) / s;
00474
00475 temp[3] = (m_el[k][j] - m_el[j][k]) * s;
00476 temp[j] = (m_el[j][i] + m_el[i][j]) * s;
00477 temp[k] = (m_el[k][i] + m_el[i][k]) * s;
00478 }
00479 q.setValue(temp[0],temp[1],temp[2],temp[3]);
00480 #endif
00481 }
00482
00487 void getEulerYPR(btScalar& yaw, btScalar& pitch, btScalar& roll) const
00488 {
00489
00490
00491 yaw = btScalar(btAtan2(m_el[1].x(), m_el[0].x()));
00492 pitch = btScalar(btAsin(-m_el[2].x()));
00493 roll = btScalar(btAtan2(m_el[2].y(), m_el[2].z()));
00494
00495
00496 if (btFabs(pitch)==SIMD_HALF_PI)
00497 {
00498 if (yaw>0)
00499 yaw-=SIMD_PI;
00500 else
00501 yaw+=SIMD_PI;
00502
00503 if (roll>0)
00504 roll-=SIMD_PI;
00505 else
00506 roll+=SIMD_PI;
00507 }
00508 };
00509
00510
00516 void getEulerZYX(btScalar& yaw, btScalar& pitch, btScalar& roll, unsigned int solution_number = 1) const
00517 {
00518 struct Euler
00519 {
00520 btScalar yaw;
00521 btScalar pitch;
00522 btScalar roll;
00523 };
00524
00525 Euler euler_out;
00526 Euler euler_out2;
00527
00528
00529
00530 if (btFabs(m_el[2].x()) >= 1)
00531 {
00532 euler_out.yaw = 0;
00533 euler_out2.yaw = 0;
00534
00535
00536 btScalar delta = btAtan2(m_el[0].x(),m_el[0].z());
00537 if (m_el[2].x() > 0)
00538 {
00539 euler_out.pitch = SIMD_PI / btScalar(2.0);
00540 euler_out2.pitch = SIMD_PI / btScalar(2.0);
00541 euler_out.roll = euler_out.pitch + delta;
00542 euler_out2.roll = euler_out.pitch + delta;
00543 }
00544 else
00545 {
00546 euler_out.pitch = -SIMD_PI / btScalar(2.0);
00547 euler_out2.pitch = -SIMD_PI / btScalar(2.0);
00548 euler_out.roll = -euler_out.pitch + delta;
00549 euler_out2.roll = -euler_out.pitch + delta;
00550 }
00551 }
00552 else
00553 {
00554 euler_out.pitch = - btAsin(m_el[2].x());
00555 euler_out2.pitch = SIMD_PI - euler_out.pitch;
00556
00557 euler_out.roll = btAtan2(m_el[2].y()/btCos(euler_out.pitch),
00558 m_el[2].z()/btCos(euler_out.pitch));
00559 euler_out2.roll = btAtan2(m_el[2].y()/btCos(euler_out2.pitch),
00560 m_el[2].z()/btCos(euler_out2.pitch));
00561
00562 euler_out.yaw = btAtan2(m_el[1].x()/btCos(euler_out.pitch),
00563 m_el[0].x()/btCos(euler_out.pitch));
00564 euler_out2.yaw = btAtan2(m_el[1].x()/btCos(euler_out2.pitch),
00565 m_el[0].x()/btCos(euler_out2.pitch));
00566 }
00567
00568 if (solution_number == 1)
00569 {
00570 yaw = euler_out.yaw;
00571 pitch = euler_out.pitch;
00572 roll = euler_out.roll;
00573 }
00574 else
00575 {
00576 yaw = euler_out2.yaw;
00577 pitch = euler_out2.pitch;
00578 roll = euler_out2.roll;
00579 }
00580 }
00581
00585 btMatrix3x3 scaled(const btVector3& s) const
00586 {
00587 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
00588 return btMatrix3x3(m_el[0] * s, m_el[1] * s, m_el[2] * s);
00589 #else
00590 return btMatrix3x3(
00591 m_el[0].x() * s.x(), m_el[0].y() * s.y(), m_el[0].z() * s.z(),
00592 m_el[1].x() * s.x(), m_el[1].y() * s.y(), m_el[1].z() * s.z(),
00593 m_el[2].x() * s.x(), m_el[2].y() * s.y(), m_el[2].z() * s.z());
00594 #endif
00595 }
00596
00598 btScalar determinant() const;
00600 btMatrix3x3 adjoint() const;
00602 btMatrix3x3 absolute() const;
00604 btMatrix3x3 transpose() const;
00606 btMatrix3x3 inverse() const;
00607
00608 btMatrix3x3 transposeTimes(const btMatrix3x3& m) const;
00609 btMatrix3x3 timesTranspose(const btMatrix3x3& m) const;
00610
00611 SIMD_FORCE_INLINE btScalar tdotx(const btVector3& v) const
00612 {
00613 return m_el[0].x() * v.x() + m_el[1].x() * v.y() + m_el[2].x() * v.z();
00614 }
00615 SIMD_FORCE_INLINE btScalar tdoty(const btVector3& v) const
00616 {
00617 return m_el[0].y() * v.x() + m_el[1].y() * v.y() + m_el[2].y() * v.z();
00618 }
00619 SIMD_FORCE_INLINE btScalar tdotz(const btVector3& v) const
00620 {
00621 return m_el[0].z() * v.x() + m_el[1].z() * v.y() + m_el[2].z() * v.z();
00622 }
00623
00624
00634 void diagonalize(btMatrix3x3& rot, btScalar threshold, int maxSteps)
00635 {
00636 rot.setIdentity();
00637 for (int step = maxSteps; step > 0; step--)
00638 {
00639
00640 int p = 0;
00641 int q = 1;
00642 int r = 2;
00643 btScalar max = btFabs(m_el[0][1]);
00644 btScalar v = btFabs(m_el[0][2]);
00645 if (v > max)
00646 {
00647 q = 2;
00648 r = 1;
00649 max = v;
00650 }
00651 v = btFabs(m_el[1][2]);
00652 if (v > max)
00653 {
00654 p = 1;
00655 q = 2;
00656 r = 0;
00657 max = v;
00658 }
00659
00660 btScalar t = threshold * (btFabs(m_el[0][0]) + btFabs(m_el[1][1]) + btFabs(m_el[2][2]));
00661 if (max <= t)
00662 {
00663 if (max <= SIMD_EPSILON * t)
00664 {
00665 return;
00666 }
00667 step = 1;
00668 }
00669
00670
00671 btScalar mpq = m_el[p][q];
00672 btScalar theta = (m_el[q][q] - m_el[p][p]) / (2 * mpq);
00673 btScalar theta2 = theta * theta;
00674 btScalar cos;
00675 btScalar sin;
00676 if (theta2 * theta2 < btScalar(10 / SIMD_EPSILON))
00677 {
00678 t = (theta >= 0) ? 1 / (theta + btSqrt(1 + theta2))
00679 : 1 / (theta - btSqrt(1 + theta2));
00680 cos = 1 / btSqrt(1 + t * t);
00681 sin = cos * t;
00682 }
00683 else
00684 {
00685
00686 t = 1 / (theta * (2 + btScalar(0.5) / theta2));
00687 cos = 1 - btScalar(0.5) * t * t;
00688 sin = cos * t;
00689 }
00690
00691
00692 m_el[p][q] = m_el[q][p] = 0;
00693 m_el[p][p] -= t * mpq;
00694 m_el[q][q] += t * mpq;
00695 btScalar mrp = m_el[r][p];
00696 btScalar mrq = m_el[r][q];
00697 m_el[r][p] = m_el[p][r] = cos * mrp - sin * mrq;
00698 m_el[r][q] = m_el[q][r] = cos * mrq + sin * mrp;
00699
00700
00701 for (int i = 0; i < 3; i++)
00702 {
00703 btVector3& row = rot[i];
00704 mrp = row[p];
00705 mrq = row[q];
00706 row[p] = cos * mrp - sin * mrq;
00707 row[q] = cos * mrq + sin * mrp;
00708 }
00709 }
00710 }
00711
00712
00713
00714
00722 btScalar cofac(int r1, int c1, int r2, int c2) const
00723 {
00724 return m_el[r1][c1] * m_el[r2][c2] - m_el[r1][c2] * m_el[r2][c1];
00725 }
00726
00727 void serialize(struct btMatrix3x3Data& dataOut) const;
00728
00729 void serializeFloat(struct btMatrix3x3FloatData& dataOut) const;
00730
00731 void deSerialize(const struct btMatrix3x3Data& dataIn);
00732
00733 void deSerializeFloat(const struct btMatrix3x3FloatData& dataIn);
00734
00735 void deSerializeDouble(const struct btMatrix3x3DoubleData& dataIn);
00736
00737 };
00738
00739
00740 SIMD_FORCE_INLINE btMatrix3x3&
00741 btMatrix3x3::operator*=(const btMatrix3x3& m)
00742 {
00743 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00744 __m128 rv00, rv01, rv02;
00745 __m128 rv10, rv11, rv12;
00746 __m128 rv20, rv21, rv22;
00747 __m128 mv0, mv1, mv2;
00748
00749 rv02 = m_el[0].mVec128;
00750 rv12 = m_el[1].mVec128;
00751 rv22 = m_el[2].mVec128;
00752
00753 mv0 = _mm_and_ps(m[0].mVec128, btvFFF0fMask);
00754 mv1 = _mm_and_ps(m[1].mVec128, btvFFF0fMask);
00755 mv2 = _mm_and_ps(m[2].mVec128, btvFFF0fMask);
00756
00757
00758 rv00 = bt_splat_ps(rv02, 0);
00759 rv01 = bt_splat_ps(rv02, 1);
00760 rv02 = bt_splat_ps(rv02, 2);
00761
00762 rv00 = _mm_mul_ps(rv00, mv0);
00763 rv01 = _mm_mul_ps(rv01, mv1);
00764 rv02 = _mm_mul_ps(rv02, mv2);
00765
00766
00767 rv10 = bt_splat_ps(rv12, 0);
00768 rv11 = bt_splat_ps(rv12, 1);
00769 rv12 = bt_splat_ps(rv12, 2);
00770
00771 rv10 = _mm_mul_ps(rv10, mv0);
00772 rv11 = _mm_mul_ps(rv11, mv1);
00773 rv12 = _mm_mul_ps(rv12, mv2);
00774
00775
00776 rv20 = bt_splat_ps(rv22, 0);
00777 rv21 = bt_splat_ps(rv22, 1);
00778 rv22 = bt_splat_ps(rv22, 2);
00779
00780 rv20 = _mm_mul_ps(rv20, mv0);
00781 rv21 = _mm_mul_ps(rv21, mv1);
00782 rv22 = _mm_mul_ps(rv22, mv2);
00783
00784 rv00 = _mm_add_ps(rv00, rv01);
00785 rv10 = _mm_add_ps(rv10, rv11);
00786 rv20 = _mm_add_ps(rv20, rv21);
00787
00788 m_el[0].mVec128 = _mm_add_ps(rv00, rv02);
00789 m_el[1].mVec128 = _mm_add_ps(rv10, rv12);
00790 m_el[2].mVec128 = _mm_add_ps(rv20, rv22);
00791
00792 #elif defined(BT_USE_NEON)
00793
00794 float32x4_t rv0, rv1, rv2;
00795 float32x4_t v0, v1, v2;
00796 float32x4_t mv0, mv1, mv2;
00797
00798 v0 = m_el[0].mVec128;
00799 v1 = m_el[1].mVec128;
00800 v2 = m_el[2].mVec128;
00801
00802 mv0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
00803 mv1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
00804 mv2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
00805
00806 rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
00807 rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
00808 rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
00809
00810 rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
00811 rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
00812 rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
00813
00814 rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
00815 rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
00816 rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
00817
00818 m_el[0].mVec128 = rv0;
00819 m_el[1].mVec128 = rv1;
00820 m_el[2].mVec128 = rv2;
00821 #else
00822 setValue(
00823 m.tdotx(m_el[0]), m.tdoty(m_el[0]), m.tdotz(m_el[0]),
00824 m.tdotx(m_el[1]), m.tdoty(m_el[1]), m.tdotz(m_el[1]),
00825 m.tdotx(m_el[2]), m.tdoty(m_el[2]), m.tdotz(m_el[2]));
00826 #endif
00827 return *this;
00828 }
00829
00830 SIMD_FORCE_INLINE btMatrix3x3&
00831 btMatrix3x3::operator+=(const btMatrix3x3& m)
00832 {
00833 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
00834 m_el[0].mVec128 = m_el[0].mVec128 + m.m_el[0].mVec128;
00835 m_el[1].mVec128 = m_el[1].mVec128 + m.m_el[1].mVec128;
00836 m_el[2].mVec128 = m_el[2].mVec128 + m.m_el[2].mVec128;
00837 #else
00838 setValue(
00839 m_el[0][0]+m.m_el[0][0],
00840 m_el[0][1]+m.m_el[0][1],
00841 m_el[0][2]+m.m_el[0][2],
00842 m_el[1][0]+m.m_el[1][0],
00843 m_el[1][1]+m.m_el[1][1],
00844 m_el[1][2]+m.m_el[1][2],
00845 m_el[2][0]+m.m_el[2][0],
00846 m_el[2][1]+m.m_el[2][1],
00847 m_el[2][2]+m.m_el[2][2]);
00848 #endif
00849 return *this;
00850 }
00851
00852 SIMD_FORCE_INLINE btMatrix3x3
00853 operator*(const btMatrix3x3& m, const btScalar & k)
00854 {
00855 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
00856 __m128 vk = bt_splat_ps(_mm_load_ss((float *)&k), 0x80);
00857 return btMatrix3x3(
00858 _mm_mul_ps(m[0].mVec128, vk),
00859 _mm_mul_ps(m[1].mVec128, vk),
00860 _mm_mul_ps(m[2].mVec128, vk));
00861 #elif defined(BT_USE_NEON)
00862 return btMatrix3x3(
00863 vmulq_n_f32(m[0].mVec128, k),
00864 vmulq_n_f32(m[1].mVec128, k),
00865 vmulq_n_f32(m[2].mVec128, k));
00866 #else
00867 return btMatrix3x3(
00868 m[0].x()*k,m[0].y()*k,m[0].z()*k,
00869 m[1].x()*k,m[1].y()*k,m[1].z()*k,
00870 m[2].x()*k,m[2].y()*k,m[2].z()*k);
00871 #endif
00872 }
00873
00874 SIMD_FORCE_INLINE btMatrix3x3
00875 operator+(const btMatrix3x3& m1, const btMatrix3x3& m2)
00876 {
00877 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
00878 return btMatrix3x3(
00879 m1[0].mVec128 + m2[0].mVec128,
00880 m1[1].mVec128 + m2[1].mVec128,
00881 m1[2].mVec128 + m2[2].mVec128);
00882 #else
00883 return btMatrix3x3(
00884 m1[0][0]+m2[0][0],
00885 m1[0][1]+m2[0][1],
00886 m1[0][2]+m2[0][2],
00887
00888 m1[1][0]+m2[1][0],
00889 m1[1][1]+m2[1][1],
00890 m1[1][2]+m2[1][2],
00891
00892 m1[2][0]+m2[2][0],
00893 m1[2][1]+m2[2][1],
00894 m1[2][2]+m2[2][2]);
00895 #endif
00896 }
00897
00898 SIMD_FORCE_INLINE btMatrix3x3
00899 operator-(const btMatrix3x3& m1, const btMatrix3x3& m2)
00900 {
00901 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
00902 return btMatrix3x3(
00903 m1[0].mVec128 - m2[0].mVec128,
00904 m1[1].mVec128 - m2[1].mVec128,
00905 m1[2].mVec128 - m2[2].mVec128);
00906 #else
00907 return btMatrix3x3(
00908 m1[0][0]-m2[0][0],
00909 m1[0][1]-m2[0][1],
00910 m1[0][2]-m2[0][2],
00911
00912 m1[1][0]-m2[1][0],
00913 m1[1][1]-m2[1][1],
00914 m1[1][2]-m2[1][2],
00915
00916 m1[2][0]-m2[2][0],
00917 m1[2][1]-m2[2][1],
00918 m1[2][2]-m2[2][2]);
00919 #endif
00920 }
00921
00922
00923 SIMD_FORCE_INLINE btMatrix3x3&
00924 btMatrix3x3::operator-=(const btMatrix3x3& m)
00925 {
00926 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
00927 m_el[0].mVec128 = m_el[0].mVec128 - m.m_el[0].mVec128;
00928 m_el[1].mVec128 = m_el[1].mVec128 - m.m_el[1].mVec128;
00929 m_el[2].mVec128 = m_el[2].mVec128 - m.m_el[2].mVec128;
00930 #else
00931 setValue(
00932 m_el[0][0]-m.m_el[0][0],
00933 m_el[0][1]-m.m_el[0][1],
00934 m_el[0][2]-m.m_el[0][2],
00935 m_el[1][0]-m.m_el[1][0],
00936 m_el[1][1]-m.m_el[1][1],
00937 m_el[1][2]-m.m_el[1][2],
00938 m_el[2][0]-m.m_el[2][0],
00939 m_el[2][1]-m.m_el[2][1],
00940 m_el[2][2]-m.m_el[2][2]);
00941 #endif
00942 return *this;
00943 }
00944
00945
00946 SIMD_FORCE_INLINE btScalar
00947 btMatrix3x3::determinant() const
00948 {
00949 return btTriple((*this)[0], (*this)[1], (*this)[2]);
00950 }
00951
00952
00953 SIMD_FORCE_INLINE btMatrix3x3
00954 btMatrix3x3::absolute() const
00955 {
00956 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
00957 return btMatrix3x3(
00958 _mm_and_ps(m_el[0].mVec128, btvAbsfMask),
00959 _mm_and_ps(m_el[1].mVec128, btvAbsfMask),
00960 _mm_and_ps(m_el[2].mVec128, btvAbsfMask));
00961 #elif defined(BT_USE_NEON)
00962 return btMatrix3x3(
00963 (float32x4_t)vandq_s32((int32x4_t)m_el[0].mVec128, btv3AbsMask),
00964 (float32x4_t)vandq_s32((int32x4_t)m_el[1].mVec128, btv3AbsMask),
00965 (float32x4_t)vandq_s32((int32x4_t)m_el[2].mVec128, btv3AbsMask));
00966 #else
00967 return btMatrix3x3(
00968 btFabs(m_el[0].x()), btFabs(m_el[0].y()), btFabs(m_el[0].z()),
00969 btFabs(m_el[1].x()), btFabs(m_el[1].y()), btFabs(m_el[1].z()),
00970 btFabs(m_el[2].x()), btFabs(m_el[2].y()), btFabs(m_el[2].z()));
00971 #endif
00972 }
00973
00974 SIMD_FORCE_INLINE btMatrix3x3
00975 btMatrix3x3::transpose() const
00976 {
00977 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
00978 __m128 v0 = m_el[0].mVec128;
00979 __m128 v1 = m_el[1].mVec128;
00980 __m128 v2 = m_el[2].mVec128;
00981 __m128 vT;
00982
00983 v2 = _mm_and_ps(v2, btvFFF0fMask);
00984
00985 vT = _mm_unpackhi_ps(v0, v1);
00986 v0 = _mm_unpacklo_ps(v0, v1);
00987
00988 v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) );
00989 v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) );
00990 v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT)));
00991
00992
00993 return btMatrix3x3( v0, v1, v2 );
00994 #elif defined(BT_USE_NEON)
00995
00996 static const uint32x2_t zMask = (const uint32x2_t) {-1, 0 };
00997 float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 );
00998 float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) );
00999 float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
01000 float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
01001 float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
01002 float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q );
01003 return btMatrix3x3( v0, v1, v2 );
01004 #else
01005 return btMatrix3x3( m_el[0].x(), m_el[1].x(), m_el[2].x(),
01006 m_el[0].y(), m_el[1].y(), m_el[2].y(),
01007 m_el[0].z(), m_el[1].z(), m_el[2].z());
01008 #endif
01009 }
01010
01011 SIMD_FORCE_INLINE btMatrix3x3
01012 btMatrix3x3::adjoint() const
01013 {
01014 return btMatrix3x3(cofac(1, 1, 2, 2), cofac(0, 2, 2, 1), cofac(0, 1, 1, 2),
01015 cofac(1, 2, 2, 0), cofac(0, 0, 2, 2), cofac(0, 2, 1, 0),
01016 cofac(1, 0, 2, 1), cofac(0, 1, 2, 0), cofac(0, 0, 1, 1));
01017 }
01018
01019 SIMD_FORCE_INLINE btMatrix3x3
01020 btMatrix3x3::inverse() const
01021 {
01022 btVector3 co(cofac(1, 1, 2, 2), cofac(1, 2, 2, 0), cofac(1, 0, 2, 1));
01023 btScalar det = (*this)[0].dot(co);
01024 btFullAssert(det != btScalar(0.0));
01025 btScalar s = btScalar(1.0) / det;
01026 return btMatrix3x3(co.x() * s, cofac(0, 2, 2, 1) * s, cofac(0, 1, 1, 2) * s,
01027 co.y() * s, cofac(0, 0, 2, 2) * s, cofac(0, 2, 1, 0) * s,
01028 co.z() * s, cofac(0, 1, 2, 0) * s, cofac(0, 0, 1, 1) * s);
01029 }
01030
01031 SIMD_FORCE_INLINE btMatrix3x3
01032 btMatrix3x3::transposeTimes(const btMatrix3x3& m) const
01033 {
01034 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
01035
01036
01037 __m128 row = m_el[0].mVec128;
01038 __m128 m0 = _mm_and_ps( m.getRow(0).mVec128, btvFFF0fMask );
01039 __m128 m1 = _mm_and_ps( m.getRow(1).mVec128, btvFFF0fMask);
01040 __m128 m2 = _mm_and_ps( m.getRow(2).mVec128, btvFFF0fMask );
01041 __m128 r0 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0));
01042 __m128 r1 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0x55));
01043 __m128 r2 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0xaa));
01044 row = m_el[1].mVec128;
01045 r0 = _mm_add_ps( r0, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0)));
01046 r1 = _mm_add_ps( r1, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0x55)));
01047 r2 = _mm_add_ps( r2, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0xaa)));
01048 row = m_el[2].mVec128;
01049 r0 = _mm_add_ps( r0, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0)));
01050 r1 = _mm_add_ps( r1, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0x55)));
01051 r2 = _mm_add_ps( r2, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0xaa)));
01052 return btMatrix3x3( r0, r1, r2 );
01053
01054 #elif defined BT_USE_NEON
01055
01056 static const uint32x4_t xyzMask = (const uint32x4_t){ -1, -1, -1, 0 };
01057 float32x4_t m0 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(0).mVec128, xyzMask );
01058 float32x4_t m1 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(1).mVec128, xyzMask );
01059 float32x4_t m2 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(2).mVec128, xyzMask );
01060 float32x4_t row = m_el[0].mVec128;
01061 float32x4_t r0 = vmulq_lane_f32( m0, vget_low_f32(row), 0);
01062 float32x4_t r1 = vmulq_lane_f32( m0, vget_low_f32(row), 1);
01063 float32x4_t r2 = vmulq_lane_f32( m0, vget_high_f32(row), 0);
01064 row = m_el[1].mVec128;
01065 r0 = vmlaq_lane_f32( r0, m1, vget_low_f32(row), 0);
01066 r1 = vmlaq_lane_f32( r1, m1, vget_low_f32(row), 1);
01067 r2 = vmlaq_lane_f32( r2, m1, vget_high_f32(row), 0);
01068 row = m_el[2].mVec128;
01069 r0 = vmlaq_lane_f32( r0, m2, vget_low_f32(row), 0);
01070 r1 = vmlaq_lane_f32( r1, m2, vget_low_f32(row), 1);
01071 r2 = vmlaq_lane_f32( r2, m2, vget_high_f32(row), 0);
01072 return btMatrix3x3( r0, r1, r2 );
01073 #else
01074 return btMatrix3x3(
01075 m_el[0].x() * m[0].x() + m_el[1].x() * m[1].x() + m_el[2].x() * m[2].x(),
01076 m_el[0].x() * m[0].y() + m_el[1].x() * m[1].y() + m_el[2].x() * m[2].y(),
01077 m_el[0].x() * m[0].z() + m_el[1].x() * m[1].z() + m_el[2].x() * m[2].z(),
01078 m_el[0].y() * m[0].x() + m_el[1].y() * m[1].x() + m_el[2].y() * m[2].x(),
01079 m_el[0].y() * m[0].y() + m_el[1].y() * m[1].y() + m_el[2].y() * m[2].y(),
01080 m_el[0].y() * m[0].z() + m_el[1].y() * m[1].z() + m_el[2].y() * m[2].z(),
01081 m_el[0].z() * m[0].x() + m_el[1].z() * m[1].x() + m_el[2].z() * m[2].x(),
01082 m_el[0].z() * m[0].y() + m_el[1].z() * m[1].y() + m_el[2].z() * m[2].y(),
01083 m_el[0].z() * m[0].z() + m_el[1].z() * m[1].z() + m_el[2].z() * m[2].z());
01084 #endif
01085 }
01086
01087 SIMD_FORCE_INLINE btMatrix3x3
01088 btMatrix3x3::timesTranspose(const btMatrix3x3& m) const
01089 {
01090 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
01091 __m128 a0 = m_el[0].mVec128;
01092 __m128 a1 = m_el[1].mVec128;
01093 __m128 a2 = m_el[2].mVec128;
01094
01095 btMatrix3x3 mT = m.transpose();
01096 __m128 mx = mT[0].mVec128;
01097 __m128 my = mT[1].mVec128;
01098 __m128 mz = mT[2].mVec128;
01099
01100 __m128 r0 = _mm_mul_ps(mx, _mm_shuffle_ps(a0, a0, 0x00));
01101 __m128 r1 = _mm_mul_ps(mx, _mm_shuffle_ps(a1, a1, 0x00));
01102 __m128 r2 = _mm_mul_ps(mx, _mm_shuffle_ps(a2, a2, 0x00));
01103 r0 = _mm_add_ps(r0, _mm_mul_ps(my, _mm_shuffle_ps(a0, a0, 0x55)));
01104 r1 = _mm_add_ps(r1, _mm_mul_ps(my, _mm_shuffle_ps(a1, a1, 0x55)));
01105 r2 = _mm_add_ps(r2, _mm_mul_ps(my, _mm_shuffle_ps(a2, a2, 0x55)));
01106 r0 = _mm_add_ps(r0, _mm_mul_ps(mz, _mm_shuffle_ps(a0, a0, 0xaa)));
01107 r1 = _mm_add_ps(r1, _mm_mul_ps(mz, _mm_shuffle_ps(a1, a1, 0xaa)));
01108 r2 = _mm_add_ps(r2, _mm_mul_ps(mz, _mm_shuffle_ps(a2, a2, 0xaa)));
01109 return btMatrix3x3( r0, r1, r2);
01110
01111 #elif defined BT_USE_NEON
01112 float32x4_t a0 = m_el[0].mVec128;
01113 float32x4_t a1 = m_el[1].mVec128;
01114 float32x4_t a2 = m_el[2].mVec128;
01115
01116 btMatrix3x3 mT = m.transpose();
01117 float32x4_t mx = mT[0].mVec128;
01118 float32x4_t my = mT[1].mVec128;
01119 float32x4_t mz = mT[2].mVec128;
01120
01121 float32x4_t r0 = vmulq_lane_f32( mx, vget_low_f32(a0), 0);
01122 float32x4_t r1 = vmulq_lane_f32( mx, vget_low_f32(a1), 0);
01123 float32x4_t r2 = vmulq_lane_f32( mx, vget_low_f32(a2), 0);
01124 r0 = vmlaq_lane_f32( r0, my, vget_low_f32(a0), 1);
01125 r1 = vmlaq_lane_f32( r1, my, vget_low_f32(a1), 1);
01126 r2 = vmlaq_lane_f32( r2, my, vget_low_f32(a2), 1);
01127 r0 = vmlaq_lane_f32( r0, mz, vget_high_f32(a0), 0);
01128 r1 = vmlaq_lane_f32( r1, mz, vget_high_f32(a1), 0);
01129 r2 = vmlaq_lane_f32( r2, mz, vget_high_f32(a2), 0);
01130 return btMatrix3x3( r0, r1, r2 );
01131
01132 #else
01133 return btMatrix3x3(
01134 m_el[0].dot(m[0]), m_el[0].dot(m[1]), m_el[0].dot(m[2]),
01135 m_el[1].dot(m[0]), m_el[1].dot(m[1]), m_el[1].dot(m[2]),
01136 m_el[2].dot(m[0]), m_el[2].dot(m[1]), m_el[2].dot(m[2]));
01137 #endif
01138 }
01139
01140 SIMD_FORCE_INLINE btVector3
01141 operator*(const btMatrix3x3& m, const btVector3& v)
01142 {
01143 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
01144 return v.dot3(m[0], m[1], m[2]);
01145 #else
01146 return btVector3(m[0].dot(v), m[1].dot(v), m[2].dot(v));
01147 #endif
01148 }
01149
01150
01151 SIMD_FORCE_INLINE btVector3
01152 operator*(const btVector3& v, const btMatrix3x3& m)
01153 {
01154 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
01155
01156 const __m128 vv = v.mVec128;
01157
01158 __m128 c0 = bt_splat_ps( vv, 0);
01159 __m128 c1 = bt_splat_ps( vv, 1);
01160 __m128 c2 = bt_splat_ps( vv, 2);
01161
01162 c0 = _mm_mul_ps(c0, _mm_and_ps(m[0].mVec128, btvFFF0fMask) );
01163 c1 = _mm_mul_ps(c1, _mm_and_ps(m[1].mVec128, btvFFF0fMask) );
01164 c0 = _mm_add_ps(c0, c1);
01165 c2 = _mm_mul_ps(c2, _mm_and_ps(m[2].mVec128, btvFFF0fMask) );
01166
01167 return btVector3(_mm_add_ps(c0, c2));
01168 #elif defined(BT_USE_NEON)
01169 const float32x4_t vv = v.mVec128;
01170 const float32x2_t vlo = vget_low_f32(vv);
01171 const float32x2_t vhi = vget_high_f32(vv);
01172
01173 float32x4_t c0, c1, c2;
01174
01175 c0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
01176 c1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
01177 c2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
01178
01179 c0 = vmulq_lane_f32(c0, vlo, 0);
01180 c1 = vmulq_lane_f32(c1, vlo, 1);
01181 c2 = vmulq_lane_f32(c2, vhi, 0);
01182 c0 = vaddq_f32(c0, c1);
01183 c0 = vaddq_f32(c0, c2);
01184
01185 return btVector3(c0);
01186 #else
01187 return btVector3(m.tdotx(v), m.tdoty(v), m.tdotz(v));
01188 #endif
01189 }
01190
01191 SIMD_FORCE_INLINE btMatrix3x3
01192 operator*(const btMatrix3x3& m1, const btMatrix3x3& m2)
01193 {
01194 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
01195
01196 __m128 m10 = m1[0].mVec128;
01197 __m128 m11 = m1[1].mVec128;
01198 __m128 m12 = m1[2].mVec128;
01199
01200 __m128 m2v = _mm_and_ps(m2[0].mVec128, btvFFF0fMask);
01201
01202 __m128 c0 = bt_splat_ps( m10, 0);
01203 __m128 c1 = bt_splat_ps( m11, 0);
01204 __m128 c2 = bt_splat_ps( m12, 0);
01205
01206 c0 = _mm_mul_ps(c0, m2v);
01207 c1 = _mm_mul_ps(c1, m2v);
01208 c2 = _mm_mul_ps(c2, m2v);
01209
01210 m2v = _mm_and_ps(m2[1].mVec128, btvFFF0fMask);
01211
01212 __m128 c0_1 = bt_splat_ps( m10, 1);
01213 __m128 c1_1 = bt_splat_ps( m11, 1);
01214 __m128 c2_1 = bt_splat_ps( m12, 1);
01215
01216 c0_1 = _mm_mul_ps(c0_1, m2v);
01217 c1_1 = _mm_mul_ps(c1_1, m2v);
01218 c2_1 = _mm_mul_ps(c2_1, m2v);
01219
01220 m2v = _mm_and_ps(m2[2].mVec128, btvFFF0fMask);
01221
01222 c0 = _mm_add_ps(c0, c0_1);
01223 c1 = _mm_add_ps(c1, c1_1);
01224 c2 = _mm_add_ps(c2, c2_1);
01225
01226 m10 = bt_splat_ps( m10, 2);
01227 m11 = bt_splat_ps( m11, 2);
01228 m12 = bt_splat_ps( m12, 2);
01229
01230 m10 = _mm_mul_ps(m10, m2v);
01231 m11 = _mm_mul_ps(m11, m2v);
01232 m12 = _mm_mul_ps(m12, m2v);
01233
01234 c0 = _mm_add_ps(c0, m10);
01235 c1 = _mm_add_ps(c1, m11);
01236 c2 = _mm_add_ps(c2, m12);
01237
01238 return btMatrix3x3(c0, c1, c2);
01239
01240 #elif defined(BT_USE_NEON)
01241
01242 float32x4_t rv0, rv1, rv2;
01243 float32x4_t v0, v1, v2;
01244 float32x4_t mv0, mv1, mv2;
01245
01246 v0 = m1[0].mVec128;
01247 v1 = m1[1].mVec128;
01248 v2 = m1[2].mVec128;
01249
01250 mv0 = (float32x4_t) vandq_s32((int32x4_t)m2[0].mVec128, btvFFF0Mask);
01251 mv1 = (float32x4_t) vandq_s32((int32x4_t)m2[1].mVec128, btvFFF0Mask);
01252 mv2 = (float32x4_t) vandq_s32((int32x4_t)m2[2].mVec128, btvFFF0Mask);
01253
01254 rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
01255 rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
01256 rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
01257
01258 rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
01259 rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
01260 rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
01261
01262 rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
01263 rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
01264 rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
01265
01266 return btMatrix3x3(rv0, rv1, rv2);
01267
01268 #else
01269 return btMatrix3x3(
01270 m2.tdotx( m1[0]), m2.tdoty( m1[0]), m2.tdotz( m1[0]),
01271 m2.tdotx( m1[1]), m2.tdoty( m1[1]), m2.tdotz( m1[1]),
01272 m2.tdotx( m1[2]), m2.tdoty( m1[2]), m2.tdotz( m1[2]));
01273 #endif
01274 }
01275
01276
01277
01278
01279
01280
01281
01282
01283
01284
01285
01286
01287
01288
01289
01290
01293 SIMD_FORCE_INLINE bool operator==(const btMatrix3x3& m1, const btMatrix3x3& m2)
01294 {
01295 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
01296
01297 __m128 c0, c1, c2;
01298
01299 c0 = _mm_cmpeq_ps(m1[0].mVec128, m2[0].mVec128);
01300 c1 = _mm_cmpeq_ps(m1[1].mVec128, m2[1].mVec128);
01301 c2 = _mm_cmpeq_ps(m1[2].mVec128, m2[2].mVec128);
01302
01303 c0 = _mm_and_ps(c0, c1);
01304 c0 = _mm_and_ps(c0, c2);
01305
01306 return (0x7 == _mm_movemask_ps((__m128)c0));
01307 #else
01308 return
01309 ( m1[0][0] == m2[0][0] && m1[1][0] == m2[1][0] && m1[2][0] == m2[2][0] &&
01310 m1[0][1] == m2[0][1] && m1[1][1] == m2[1][1] && m1[2][1] == m2[2][1] &&
01311 m1[0][2] == m2[0][2] && m1[1][2] == m2[1][2] && m1[2][2] == m2[2][2] );
01312 #endif
01313 }
01314
01316 struct btMatrix3x3FloatData
01317 {
01318 btVector3FloatData m_el[3];
01319 };
01320
01322 struct btMatrix3x3DoubleData
01323 {
01324 btVector3DoubleData m_el[3];
01325 };
01326
01327
01328
01329
01330 SIMD_FORCE_INLINE void btMatrix3x3::serialize(struct btMatrix3x3Data& dataOut) const
01331 {
01332 for (int i=0;i<3;i++)
01333 m_el[i].serialize(dataOut.m_el[i]);
01334 }
01335
01336 SIMD_FORCE_INLINE void btMatrix3x3::serializeFloat(struct btMatrix3x3FloatData& dataOut) const
01337 {
01338 for (int i=0;i<3;i++)
01339 m_el[i].serializeFloat(dataOut.m_el[i]);
01340 }
01341
01342
01343 SIMD_FORCE_INLINE void btMatrix3x3::deSerialize(const struct btMatrix3x3Data& dataIn)
01344 {
01345 for (int i=0;i<3;i++)
01346 m_el[i].deSerialize(dataIn.m_el[i]);
01347 }
01348
01349 SIMD_FORCE_INLINE void btMatrix3x3::deSerializeFloat(const struct btMatrix3x3FloatData& dataIn)
01350 {
01351 for (int i=0;i<3;i++)
01352 m_el[i].deSerializeFloat(dataIn.m_el[i]);
01353 }
01354
01355 SIMD_FORCE_INLINE void btMatrix3x3::deSerializeDouble(const struct btMatrix3x3DoubleData& dataIn)
01356 {
01357 for (int i=0;i<3;i++)
01358 m_el[i].deSerializeDouble(dataIn.m_el[i]);
01359 }
01360
01361 #endif //BT_MATRIX3x3_H
01362