btQuaternion.h

Go to the documentation of this file.
00001 /*
00002 Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans  http://continuousphysics.com/Bullet/
00003 
00004 This software is provided 'as-is', without any express or implied warranty.
00005 In no event will the authors be held liable for any damages arising from the use of this software.
00006 Permission is granted to anyone to use this software for any purpose, 
00007 including commercial applications, and to alter it and redistribute it freely, 
00008 subject to the following restrictions:
00009 
00010 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
00011 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
00012 3. This notice may not be removed or altered from any source distribution.
00013 */
00014 
00015 
00016 
00017 #ifndef BT_SIMD__QUATERNION_H_
00018 #define BT_SIMD__QUATERNION_H_
00019 
00020 
00021 #include "btVector3.h"
00022 #include "btQuadWord.h"
00023 
00024 
00025 
00026 
00027 
00028 #ifdef BT_USE_SSE
00029 
00030 const __m128 ATTRIBUTE_ALIGNED16(vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
00031 
00032 #endif
00033 
00034 #if defined(BT_USE_SSE) || defined(BT_USE_NEON)
00035 
00036 const btSimdFloat4 ATTRIBUTE_ALIGNED16(vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
00037 const btSimdFloat4 ATTRIBUTE_ALIGNED16(vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};
00038 
00039 #endif
00040 
00042 class btQuaternion : public btQuadWord {
00043 public:
00045         btQuaternion() {}
00046 
00047 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))|| defined(BT_USE_NEON) 
00048         // Set Vector 
00049         SIMD_FORCE_INLINE btQuaternion(const btSimdFloat4 vec)
00050         {
00051                 mVec128 = vec;
00052         }
00053 
00054         // Copy constructor
00055         SIMD_FORCE_INLINE btQuaternion(const btQuaternion& rhs)
00056         {
00057                 mVec128 = rhs.mVec128;
00058         }
00059 
00060         // Assignment Operator
00061         SIMD_FORCE_INLINE btQuaternion& 
00062         operator=(const btQuaternion& v) 
00063         {
00064                 mVec128 = v.mVec128;
00065                 
00066                 return *this;
00067         }
00068         
00069 #endif
00070 
00071         //              template <typename btScalar>
00072         //              explicit Quaternion(const btScalar *v) : Tuple4<btScalar>(v) {}
00074         btQuaternion(const btScalar& _x, const btScalar& _y, const btScalar& _z, const btScalar& _w) 
00075                 : btQuadWord(_x, _y, _z, _w) 
00076         {}
00080         btQuaternion(const btVector3& _axis, const btScalar& _angle) 
00081         { 
00082                 setRotation(_axis, _angle); 
00083         }
00088         btQuaternion(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
00089         { 
00090 #ifndef BT_EULER_DEFAULT_ZYX
00091                 setEuler(yaw, pitch, roll); 
00092 #else
00093                 setEulerZYX(yaw, pitch, roll); 
00094 #endif 
00095         }
00099         void setRotation(const btVector3& axis, const btScalar& _angle)
00100         {
00101                 btScalar d = axis.length();
00102                 btAssert(d != btScalar(0.0));
00103                 btScalar s = btSin(_angle * btScalar(0.5)) / d;
00104                 setValue(axis.x() * s, axis.y() * s, axis.z() * s, 
00105                         btCos(_angle * btScalar(0.5)));
00106         }
00111         void setEuler(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
00112         {
00113                 btScalar halfYaw = btScalar(yaw) * btScalar(0.5);  
00114                 btScalar halfPitch = btScalar(pitch) * btScalar(0.5);  
00115                 btScalar halfRoll = btScalar(roll) * btScalar(0.5);  
00116                 btScalar cosYaw = btCos(halfYaw);
00117                 btScalar sinYaw = btSin(halfYaw);
00118                 btScalar cosPitch = btCos(halfPitch);
00119                 btScalar sinPitch = btSin(halfPitch);
00120                 btScalar cosRoll = btCos(halfRoll);
00121                 btScalar sinRoll = btSin(halfRoll);
00122                 setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
00123                         cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
00124                         sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
00125                         cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
00126         }
00131         void setEulerZYX(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
00132         {
00133                 btScalar halfYaw = btScalar(yaw) * btScalar(0.5);  
00134                 btScalar halfPitch = btScalar(pitch) * btScalar(0.5);  
00135                 btScalar halfRoll = btScalar(roll) * btScalar(0.5);  
00136                 btScalar cosYaw = btCos(halfYaw);
00137                 btScalar sinYaw = btSin(halfYaw);
00138                 btScalar cosPitch = btCos(halfPitch);
00139                 btScalar sinPitch = btSin(halfPitch);
00140                 btScalar cosRoll = btCos(halfRoll);
00141                 btScalar sinRoll = btSin(halfRoll);
00142                 setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw, //x
00143                          cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw, //y
00144                          cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw, //z
00145                          cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw); //formerly yzx
00146         }
00149         SIMD_FORCE_INLINE       btQuaternion& operator+=(const btQuaternion& q)
00150         {
00151 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00152                 mVec128 = _mm_add_ps(mVec128, q.mVec128);
00153 #elif defined(BT_USE_NEON)
00154                 mVec128 = vaddq_f32(mVec128, q.mVec128);
00155 #else   
00156                 m_floats[0] += q.x(); 
00157         m_floats[1] += q.y(); 
00158         m_floats[2] += q.z(); 
00159         m_floats[3] += q.m_floats[3];
00160 #endif
00161                 return *this;
00162         }
00163 
00166         btQuaternion& operator-=(const btQuaternion& q) 
00167         {
00168 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00169                 mVec128 = _mm_sub_ps(mVec128, q.mVec128);
00170 #elif defined(BT_USE_NEON)
00171                 mVec128 = vsubq_f32(mVec128, q.mVec128);
00172 #else   
00173                 m_floats[0] -= q.x(); 
00174         m_floats[1] -= q.y(); 
00175         m_floats[2] -= q.z(); 
00176         m_floats[3] -= q.m_floats[3];
00177 #endif
00178         return *this;
00179         }
00180 
00183         btQuaternion& operator*=(const btScalar& s)
00184         {
00185 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00186                 __m128  vs = _mm_load_ss(&s);   //      (S 0 0 0)
00187                 vs = bt_pshufd_ps(vs, 0);       //      (S S S S)
00188                 mVec128 = _mm_mul_ps(mVec128, vs);
00189 #elif defined(BT_USE_NEON)
00190                 mVec128 = vmulq_n_f32(mVec128, s);
00191 #else
00192                 m_floats[0] *= s; 
00193         m_floats[1] *= s; 
00194         m_floats[2] *= s; 
00195         m_floats[3] *= s;
00196 #endif
00197                 return *this;
00198         }
00199 
00203         btQuaternion& operator*=(const btQuaternion& q)
00204         {
00205 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00206                 __m128 vQ2 = q.get128();
00207                 
00208                 __m128 A1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(0,1,2,0));
00209                 __m128 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0));
00210                 
00211                 A1 = A1 * B1;
00212                 
00213                 __m128 A2 = bt_pshufd_ps(mVec128, BT_SHUFFLE(1,2,0,1));
00214                 __m128 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
00215                 
00216                 A2 = A2 * B2;
00217                 
00218                 B1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(2,0,1,2));
00219                 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
00220                 
00221                 B1 = B1 * B2;   //      A3 *= B3
00222                 
00223                 mVec128 = bt_splat_ps(mVec128, 3);      //      A0
00224                 mVec128 = mVec128 * vQ2;        //      A0 * B0
00225                 
00226                 A1 = A1 + A2;   //      AB12
00227                 mVec128 = mVec128 - B1; //      AB03 = AB0 - AB3 
00228                 A1 = _mm_xor_ps(A1, vPPPM);     //      change sign of the last element
00229                 mVec128 = mVec128+ A1;  //      AB03 + AB12
00230 
00231 #elif defined(BT_USE_NEON)     
00232 
00233         float32x4_t vQ1 = mVec128;
00234         float32x4_t vQ2 = q.get128();
00235         float32x4_t A0, A1, B1, A2, B2, A3, B3;
00236         float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
00237         
00238         {
00239         float32x2x2_t tmp;
00240         tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );       // {z x}, {w y}
00241         vQ1zx = tmp.val[0];
00242 
00243         tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );       // {z x}, {w y}
00244         vQ2zx = tmp.val[0];
00245         }
00246         vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1); 
00247 
00248         vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
00249 
00250         vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
00251         vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
00252 
00253         A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                    // X Y  z x 
00254         B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W  W X 
00255 
00256         A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
00257         B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
00258 
00259         A3 = vcombine_f32(vQ1zx, vQ1yz);        // Z X Y Z
00260         B3 = vcombine_f32(vQ2yz, vQ2xz);        // Y Z x z
00261 
00262         A1 = vmulq_f32(A1, B1);
00263         A2 = vmulq_f32(A2, B2);
00264         A3 = vmulq_f32(A3, B3); //      A3 *= B3
00265         A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); //     A0 * B0
00266 
00267         A1 = vaddq_f32(A1, A2); //      AB12 = AB1 + AB2
00268         A0 = vsubq_f32(A0, A3); //      AB03 = AB0 - AB3 
00269         
00270         //      change the sign of the last element
00271         A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);  
00272         A0 = vaddq_f32(A0, A1); //      AB03 + AB12
00273         
00274         mVec128 = A0;
00275 #else
00276                 setValue(
00277             m_floats[3] * q.x() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.z() - m_floats[2] * q.y(),
00278                         m_floats[3] * q.y() + m_floats[1] * q.m_floats[3] + m_floats[2] * q.x() - m_floats[0] * q.z(),
00279                         m_floats[3] * q.z() + m_floats[2] * q.m_floats[3] + m_floats[0] * q.y() - m_floats[1] * q.x(),
00280                         m_floats[3] * q.m_floats[3] - m_floats[0] * q.x() - m_floats[1] * q.y() - m_floats[2] * q.z());
00281 #endif
00282                 return *this;
00283         }
00286         btScalar dot(const btQuaternion& q) const
00287         {
00288 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00289                 __m128  vd;
00290                 
00291                 vd = _mm_mul_ps(mVec128, q.mVec128);
00292                 
00293         __m128 t = _mm_movehl_ps(vd, vd);
00294                 vd = _mm_add_ps(vd, t);
00295                 t = _mm_shuffle_ps(vd, vd, 0x55);
00296                 vd = _mm_add_ss(vd, t);
00297                 
00298         return _mm_cvtss_f32(vd);
00299 #elif defined(BT_USE_NEON)
00300                 float32x4_t vd = vmulq_f32(mVec128, q.mVec128);
00301                 float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd));  
00302                 x = vpadd_f32(x, x);
00303                 return vget_lane_f32(x, 0);
00304 #else    
00305                 return  m_floats[0] * q.x() + 
00306                 m_floats[1] * q.y() + 
00307                 m_floats[2] * q.z() + 
00308                 m_floats[3] * q.m_floats[3];
00309 #endif
00310         }
00311 
00313         btScalar length2() const
00314         {
00315                 return dot(*this);
00316         }
00317 
00319         btScalar length() const
00320         {
00321                 return btSqrt(length2());
00322         }
00323 
00326         btQuaternion& normalize() 
00327         {
00328 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00329                 __m128  vd;
00330                 
00331                 vd = _mm_mul_ps(mVec128, mVec128);
00332                 
00333         __m128 t = _mm_movehl_ps(vd, vd);
00334                 vd = _mm_add_ps(vd, t);
00335                 t = _mm_shuffle_ps(vd, vd, 0x55);
00336                 vd = _mm_add_ss(vd, t);
00337 
00338                 vd = _mm_sqrt_ss(vd);
00339                 vd = _mm_div_ss(vOnes, vd);
00340         vd = bt_pshufd_ps(vd, 0); // splat
00341                 mVec128 = _mm_mul_ps(mVec128, vd);
00342     
00343                 return *this;
00344 #else    
00345                 return *this /= length();
00346 #endif
00347         }
00348 
00351         SIMD_FORCE_INLINE btQuaternion
00352         operator*(const btScalar& s) const
00353         {
00354 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00355                 __m128  vs = _mm_load_ss(&s);   //      (S 0 0 0)
00356                 vs = bt_pshufd_ps(vs, 0x00);    //      (S S S S)
00357                 
00358                 return btQuaternion(_mm_mul_ps(mVec128, vs));
00359 #elif defined(BT_USE_NEON)
00360                 return btQuaternion(vmulq_n_f32(mVec128, s));
00361 #else
00362                 return btQuaternion(x() * s, y() * s, z() * s, m_floats[3] * s);
00363 #endif
00364         }
00365 
00368         btQuaternion operator/(const btScalar& s) const
00369         {
00370                 btAssert(s != btScalar(0.0));
00371                 return *this * (btScalar(1.0) / s);
00372         }
00373 
00376         btQuaternion& operator/=(const btScalar& s) 
00377         {
00378                 btAssert(s != btScalar(0.0));
00379                 return *this *= btScalar(1.0) / s;
00380         }
00381 
00383         btQuaternion normalized() const 
00384         {
00385                 return *this / length();
00386         } 
00389         btScalar angle(const btQuaternion& q) const 
00390         {
00391                 btScalar s = btSqrt(length2() * q.length2());
00392                 btAssert(s != btScalar(0.0));
00393                 return btAcos(dot(q) / s);
00394         }
00396         btScalar getAngle() const 
00397         {
00398                 btScalar s = btScalar(2.) * btAcos(m_floats[3]);
00399                 return s;
00400         }
00401 
00403         btVector3 getAxis() const
00404         {
00405                 btScalar s_squared = 1.f-m_floats[3]*m_floats[3];
00406                 
00407                 if (s_squared < btScalar(10.) * SIMD_EPSILON) //Check for divide by zero
00408                         return btVector3(1.0, 0.0, 0.0);  // Arbitrary
00409                 btScalar s = 1.f/btSqrt(s_squared);
00410                 return btVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s);
00411         }
00412 
00414         btQuaternion inverse() const
00415         {
00416 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00417                 return btQuaternion(_mm_xor_ps(mVec128, vQInv));
00418 #elif defined(BT_USE_NEON)
00419         return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)vQInv));
00420 #else   
00421                 return btQuaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]);
00422 #endif
00423         }
00424 
00427         SIMD_FORCE_INLINE btQuaternion
00428         operator+(const btQuaternion& q2) const
00429         {
00430 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00431                 return btQuaternion(_mm_add_ps(mVec128, q2.mVec128));
00432 #elif defined(BT_USE_NEON)
00433         return btQuaternion(vaddq_f32(mVec128, q2.mVec128));
00434 #else   
00435                 const btQuaternion& q1 = *this;
00436                 return btQuaternion(q1.x() + q2.x(), q1.y() + q2.y(), q1.z() + q2.z(), q1.m_floats[3] + q2.m_floats[3]);
00437 #endif
00438         }
00439 
00442         SIMD_FORCE_INLINE btQuaternion
00443         operator-(const btQuaternion& q2) const
00444         {
00445 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00446                 return btQuaternion(_mm_sub_ps(mVec128, q2.mVec128));
00447 #elif defined(BT_USE_NEON)
00448         return btQuaternion(vsubq_f32(mVec128, q2.mVec128));
00449 #else   
00450                 const btQuaternion& q1 = *this;
00451                 return btQuaternion(q1.x() - q2.x(), q1.y() - q2.y(), q1.z() - q2.z(), q1.m_floats[3] - q2.m_floats[3]);
00452 #endif
00453         }
00454 
00457         SIMD_FORCE_INLINE btQuaternion operator-() const
00458         {
00459 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00460                 return btQuaternion(_mm_xor_ps(mVec128, btvMzeroMask));
00461 #elif defined(BT_USE_NEON)
00462                 return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)btvMzeroMask) );
00463 #else   
00464                 const btQuaternion& q2 = *this;
00465                 return btQuaternion( - q2.x(), - q2.y(),  - q2.z(),  - q2.m_floats[3]);
00466 #endif
00467         }
00469         SIMD_FORCE_INLINE btQuaternion farthest( const btQuaternion& qd) const 
00470         {
00471                 btQuaternion diff,sum;
00472                 diff = *this - qd;
00473                 sum = *this + qd;
00474                 if( diff.dot(diff) > sum.dot(sum) )
00475                         return qd;
00476                 return (-qd);
00477         }
00478 
00480         SIMD_FORCE_INLINE btQuaternion nearest( const btQuaternion& qd) const 
00481         {
00482                 btQuaternion diff,sum;
00483                 diff = *this - qd;
00484                 sum = *this + qd;
00485                 if( diff.dot(diff) < sum.dot(sum) )
00486                         return qd;
00487                 return (-qd);
00488         }
00489 
00490 
00495         btQuaternion slerp(const btQuaternion& q, const btScalar& t) const
00496         {
00497           btScalar magnitude = btSqrt(length2() * q.length2()); 
00498           btAssert(magnitude > btScalar(0));
00499 
00500     btScalar product = dot(q) / magnitude;
00501     if (btFabs(product) < btScalar(1))
00502                 {
00503       // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
00504       const btScalar sign = (product < 0) ? btScalar(-1) : btScalar(1);
00505 
00506       const btScalar theta = btAcos(sign * product);
00507       const btScalar s1 = btSin(sign * t * theta);   
00508       const btScalar d = btScalar(1.0) / btSin(theta);
00509       const btScalar s0 = btSin((btScalar(1.0) - t) * theta);
00510 
00511       return btQuaternion(
00512           (m_floats[0] * s0 + q.x() * s1) * d,
00513           (m_floats[1] * s0 + q.y() * s1) * d,
00514           (m_floats[2] * s0 + q.z() * s1) * d,
00515           (m_floats[3] * s0 + q.m_floats[3] * s1) * d);
00516                 }
00517                 else
00518                 {
00519                         return *this;
00520                 }
00521         }
00522 
00523         static const btQuaternion&      getIdentity()
00524         {
00525                 static const btQuaternion identityQuat(btScalar(0.),btScalar(0.),btScalar(0.),btScalar(1.));
00526                 return identityQuat;
00527         }
00528 
00529         SIMD_FORCE_INLINE const btScalar& getW() const { return m_floats[3]; }
00530 
00531         
00532 };
00533 
00534 
00535 
00536 
00537 
00539 SIMD_FORCE_INLINE btQuaternion
00540 operator*(const btQuaternion& q1, const btQuaternion& q2) 
00541 {
00542 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00543         __m128 vQ1 = q1.get128();
00544         __m128 vQ2 = q2.get128();
00545         __m128 A0, A1, B1, A2, B2;
00546     
00547         A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0)); // X Y  z x     //      vtrn
00548         B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0)); // W W  W X     // vdup vext
00549 
00550         A1 = A1 * B1;
00551         
00552         A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1)); // Y Z  X Y     // vext 
00553         B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1)); // z x  Y Y     // vtrn vdup
00554 
00555         A2 = A2 * B2;
00556 
00557         B1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2)); // z x Y Z      // vtrn vext
00558         B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2)); // Y Z x z      // vext vtrn
00559         
00560         B1 = B1 * B2;   //      A3 *= B3
00561 
00562         A0 = bt_splat_ps(vQ1, 3);       //      A0
00563         A0 = A0 * vQ2;  //      A0 * B0
00564 
00565         A1 = A1 + A2;   //      AB12
00566         A0 =  A0 - B1;  //      AB03 = AB0 - AB3 
00567         
00568     A1 = _mm_xor_ps(A1, vPPPM); //      change sign of the last element
00569         A0 = A0 + A1;   //      AB03 + AB12
00570         
00571         return btQuaternion(A0);
00572 
00573 #elif defined(BT_USE_NEON)     
00574 
00575         float32x4_t vQ1 = q1.get128();
00576         float32x4_t vQ2 = q2.get128();
00577         float32x4_t A0, A1, B1, A2, B2, A3, B3;
00578     float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
00579     
00580     {
00581     float32x2x2_t tmp;
00582     tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );       // {z x}, {w y}
00583     vQ1zx = tmp.val[0];
00584 
00585     tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );       // {z x}, {w y}
00586     vQ2zx = tmp.val[0];
00587     }
00588     vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1); 
00589 
00590     vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
00591 
00592     vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
00593     vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
00594 
00595     A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                    // X Y  z x 
00596     B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W  W X 
00597 
00598         A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
00599     B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
00600 
00601     A3 = vcombine_f32(vQ1zx, vQ1yz);        // Z X Y Z
00602     B3 = vcombine_f32(vQ2yz, vQ2xz);        // Y Z x z
00603 
00604         A1 = vmulq_f32(A1, B1);
00605         A2 = vmulq_f32(A2, B2);
00606         A3 = vmulq_f32(A3, B3); //      A3 *= B3
00607         A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); //     A0 * B0
00608 
00609         A1 = vaddq_f32(A1, A2); //      AB12 = AB1 + AB2
00610         A0 = vsubq_f32(A0, A3); //      AB03 = AB0 - AB3 
00611         
00612     //  change the sign of the last element
00613     A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);      
00614         A0 = vaddq_f32(A0, A1); //      AB03 + AB12
00615         
00616         return btQuaternion(A0);
00617 
00618 #else
00619         return btQuaternion(
00620         q1.w() * q2.x() + q1.x() * q2.w() + q1.y() * q2.z() - q1.z() * q2.y(),
00621                 q1.w() * q2.y() + q1.y() * q2.w() + q1.z() * q2.x() - q1.x() * q2.z(),
00622                 q1.w() * q2.z() + q1.z() * q2.w() + q1.x() * q2.y() - q1.y() * q2.x(),
00623                 q1.w() * q2.w() - q1.x() * q2.x() - q1.y() * q2.y() - q1.z() * q2.z()); 
00624 #endif
00625 }
00626 
00627 SIMD_FORCE_INLINE btQuaternion
00628 operator*(const btQuaternion& q, const btVector3& w)
00629 {
00630 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00631         __m128 vQ1 = q.get128();
00632         __m128 vQ2 = w.get128();
00633         __m128 A1, B1, A2, B2, A3, B3;
00634         
00635         A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(3,3,3,0));
00636         B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(0,1,2,0));
00637 
00638         A1 = A1 * B1;
00639         
00640         A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
00641         B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
00642 
00643         A2 = A2 * B2;
00644 
00645         A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
00646         B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
00647         
00648         A3 = A3 * B3;   //      A3 *= B3
00649 
00650         A1 = A1 + A2;   //      AB12
00651         A1 = _mm_xor_ps(A1, vPPPM);     //      change sign of the last element
00652     A1 = A1 - A3;       //      AB123 = AB12 - AB3 
00653         
00654         return btQuaternion(A1);
00655     
00656 #elif defined(BT_USE_NEON)     
00657 
00658         float32x4_t vQ1 = q.get128();
00659         float32x4_t vQ2 = w.get128();
00660         float32x4_t A1, B1, A2, B2, A3, B3;
00661     float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz;
00662     
00663     vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1); 
00664     {
00665     float32x2x2_t tmp;
00666 
00667     tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );       // {z x}, {w y}
00668     vQ2zx = tmp.val[0];
00669 
00670     tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );       // {z x}, {w y}
00671     vQ1zx = tmp.val[0];
00672     }
00673 
00674     vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
00675 
00676     vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
00677     vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
00678 
00679     A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx); // W W  W X 
00680     B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx);                    // X Y  z x 
00681 
00682         A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
00683     B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
00684 
00685     A3 = vcombine_f32(vQ1zx, vQ1yz);        // Z X Y Z
00686     B3 = vcombine_f32(vQ2yz, vQ2xz);        // Y Z x z
00687 
00688         A1 = vmulq_f32(A1, B1);
00689         A2 = vmulq_f32(A2, B2);
00690         A3 = vmulq_f32(A3, B3); //      A3 *= B3
00691 
00692         A1 = vaddq_f32(A1, A2); //      AB12 = AB1 + AB2
00693         
00694     //  change the sign of the last element
00695     A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);      
00696         
00697     A1 = vsubq_f32(A1, A3);     //      AB123 = AB12 - AB3
00698         
00699         return btQuaternion(A1);
00700     
00701 #else
00702         return btQuaternion( 
00703          q.w() * w.x() + q.y() * w.z() - q.z() * w.y(),
00704                  q.w() * w.y() + q.z() * w.x() - q.x() * w.z(),
00705                  q.w() * w.z() + q.x() * w.y() - q.y() * w.x(),
00706                 -q.x() * w.x() - q.y() * w.y() - q.z() * w.z()); 
00707 #endif
00708 }
00709 
00710 SIMD_FORCE_INLINE btQuaternion
00711 operator*(const btVector3& w, const btQuaternion& q)
00712 {
00713 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00714         __m128 vQ1 = w.get128();
00715         __m128 vQ2 = q.get128();
00716         __m128 A1, B1, A2, B2, A3, B3;
00717         
00718         A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0));  // X Y  z x
00719         B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0));  // W W  W X 
00720 
00721         A1 = A1 * B1;
00722         
00723         A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
00724         B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
00725 
00726         A2 = A2 *B2;
00727 
00728         A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
00729         B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
00730         
00731         A3 = A3 * B3;   //      A3 *= B3
00732 
00733         A1 = A1 + A2;   //      AB12
00734         A1 = _mm_xor_ps(A1, vPPPM);     //      change sign of the last element
00735         A1 = A1 - A3;   //      AB123 = AB12 - AB3 
00736         
00737         return btQuaternion(A1);
00738 
00739 #elif defined(BT_USE_NEON)     
00740 
00741         float32x4_t vQ1 = w.get128();
00742         float32x4_t vQ2 = q.get128();
00743         float32x4_t  A1, B1, A2, B2, A3, B3;
00744     float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
00745     
00746     {
00747     float32x2x2_t tmp;
00748    
00749     tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );       // {z x}, {w y}
00750     vQ1zx = tmp.val[0];
00751 
00752     tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );       // {z x}, {w y}
00753     vQ2zx = tmp.val[0];
00754     }
00755     vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1); 
00756 
00757     vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
00758 
00759     vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
00760     vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
00761 
00762     A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                    // X Y  z x 
00763     B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W  W X 
00764 
00765         A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
00766     B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
00767 
00768     A3 = vcombine_f32(vQ1zx, vQ1yz);        // Z X Y Z
00769     B3 = vcombine_f32(vQ2yz, vQ2xz);        // Y Z x z
00770 
00771         A1 = vmulq_f32(A1, B1);
00772         A2 = vmulq_f32(A2, B2);
00773         A3 = vmulq_f32(A3, B3); //      A3 *= B3
00774 
00775         A1 = vaddq_f32(A1, A2); //      AB12 = AB1 + AB2
00776         
00777     //  change the sign of the last element
00778     A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);      
00779         
00780     A1 = vsubq_f32(A1, A3);     //      AB123 = AB12 - AB3
00781         
00782         return btQuaternion(A1);
00783     
00784 #else
00785         return btQuaternion( 
00786         +w.x() * q.w() + w.y() * q.z() - w.z() * q.y(),
00787                 +w.y() * q.w() + w.z() * q.x() - w.x() * q.z(),
00788                 +w.z() * q.w() + w.x() * q.y() - w.y() * q.x(),
00789                 -w.x() * q.x() - w.y() * q.y() - w.z() * q.z()); 
00790 #endif
00791 }
00792 
00794 SIMD_FORCE_INLINE btScalar 
00795 dot(const btQuaternion& q1, const btQuaternion& q2) 
00796 { 
00797         return q1.dot(q2); 
00798 }
00799 
00800 
00802 SIMD_FORCE_INLINE btScalar
00803 length(const btQuaternion& q) 
00804 { 
00805         return q.length(); 
00806 }
00807 
00809 SIMD_FORCE_INLINE btScalar
00810 btAngle(const btQuaternion& q1, const btQuaternion& q2) 
00811 { 
00812         return q1.angle(q2); 
00813 }
00814 
00816 SIMD_FORCE_INLINE btQuaternion
00817 inverse(const btQuaternion& q) 
00818 {
00819         return q.inverse();
00820 }
00821 
00827 SIMD_FORCE_INLINE btQuaternion
00828 slerp(const btQuaternion& q1, const btQuaternion& q2, const btScalar& t) 
00829 {
00830         return q1.slerp(q2, t);
00831 }
00832 
00833 SIMD_FORCE_INLINE btVector3 
00834 quatRotate(const btQuaternion& rotation, const btVector3& v) 
00835 {
00836         btQuaternion q = rotation * v;
00837         q *= rotation.inverse();
00838 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00839         return btVector3(_mm_and_ps(q.get128(), btvFFF0fMask));
00840 #elif defined(BT_USE_NEON)
00841     return btVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), btvFFF0Mask));
00842 #else   
00843         return btVector3(q.getX(),q.getY(),q.getZ());
00844 #endif
00845 }
00846 
00847 SIMD_FORCE_INLINE btQuaternion 
00848 shortestArcQuat(const btVector3& v0, const btVector3& v1) // Game Programming Gems 2.10. make sure v0,v1 are normalized
00849 {
00850         btVector3 c = v0.cross(v1);
00851         btScalar  d = v0.dot(v1);
00852 
00853         if (d < -1.0 + SIMD_EPSILON)
00854         {
00855                 btVector3 n,unused;
00856                 btPlaneSpace1(v0,n,unused);
00857                 return btQuaternion(n.x(),n.y(),n.z(),0.0f); // just pick any vector that is orthogonal to v0
00858         }
00859 
00860         btScalar  s = btSqrt((1.0f + d) * 2.0f);
00861         btScalar rs = 1.0f / s;
00862 
00863         return btQuaternion(c.getX()*rs,c.getY()*rs,c.getZ()*rs,s * 0.5f);
00864 }
00865 
00866 SIMD_FORCE_INLINE btQuaternion 
00867 shortestArcQuatNormalize2(btVector3& v0,btVector3& v1)
00868 {
00869         v0.normalize();
00870         v1.normalize();
00871         return shortestArcQuat(v0,v1);
00872 }
00873 
00874 #endif //BT_SIMD__QUATERNION_H_
00875 
00876 
00877