00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017 #ifndef BT_SIMD__QUATERNION_H_
00018 #define BT_SIMD__QUATERNION_H_
00019
00020
00021 #include "btVector3.h"
00022 #include "btQuadWord.h"
00023
00024
00025
00026
00027
00028 #ifdef BT_USE_SSE
00029
00030 const __m128 ATTRIBUTE_ALIGNED16(vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
00031
00032 #endif
00033
00034 #if defined(BT_USE_SSE) || defined(BT_USE_NEON)
00035
00036 const btSimdFloat4 ATTRIBUTE_ALIGNED16(vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
00037 const btSimdFloat4 ATTRIBUTE_ALIGNED16(vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};
00038
00039 #endif
00040
00042 class btQuaternion : public btQuadWord {
00043 public:
00045 btQuaternion() {}
00046
00047 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))|| defined(BT_USE_NEON)
00048
00049 SIMD_FORCE_INLINE btQuaternion(const btSimdFloat4 vec)
00050 {
00051 mVec128 = vec;
00052 }
00053
00054
00055 SIMD_FORCE_INLINE btQuaternion(const btQuaternion& rhs)
00056 {
00057 mVec128 = rhs.mVec128;
00058 }
00059
00060
00061 SIMD_FORCE_INLINE btQuaternion&
00062 operator=(const btQuaternion& v)
00063 {
00064 mVec128 = v.mVec128;
00065
00066 return *this;
00067 }
00068
00069 #endif
00070
00071
00072
00074 btQuaternion(const btScalar& _x, const btScalar& _y, const btScalar& _z, const btScalar& _w)
00075 : btQuadWord(_x, _y, _z, _w)
00076 {}
00080 btQuaternion(const btVector3& _axis, const btScalar& _angle)
00081 {
00082 setRotation(_axis, _angle);
00083 }
00088 btQuaternion(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
00089 {
00090 #ifndef BT_EULER_DEFAULT_ZYX
00091 setEuler(yaw, pitch, roll);
00092 #else
00093 setEulerZYX(yaw, pitch, roll);
00094 #endif
00095 }
00099 void setRotation(const btVector3& axis, const btScalar& _angle)
00100 {
00101 btScalar d = axis.length();
00102 btAssert(d != btScalar(0.0));
00103 btScalar s = btSin(_angle * btScalar(0.5)) / d;
00104 setValue(axis.x() * s, axis.y() * s, axis.z() * s,
00105 btCos(_angle * btScalar(0.5)));
00106 }
00111 void setEuler(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
00112 {
00113 btScalar halfYaw = btScalar(yaw) * btScalar(0.5);
00114 btScalar halfPitch = btScalar(pitch) * btScalar(0.5);
00115 btScalar halfRoll = btScalar(roll) * btScalar(0.5);
00116 btScalar cosYaw = btCos(halfYaw);
00117 btScalar sinYaw = btSin(halfYaw);
00118 btScalar cosPitch = btCos(halfPitch);
00119 btScalar sinPitch = btSin(halfPitch);
00120 btScalar cosRoll = btCos(halfRoll);
00121 btScalar sinRoll = btSin(halfRoll);
00122 setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
00123 cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
00124 sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
00125 cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
00126 }
00131 void setEulerZYX(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
00132 {
00133 btScalar halfYaw = btScalar(yaw) * btScalar(0.5);
00134 btScalar halfPitch = btScalar(pitch) * btScalar(0.5);
00135 btScalar halfRoll = btScalar(roll) * btScalar(0.5);
00136 btScalar cosYaw = btCos(halfYaw);
00137 btScalar sinYaw = btSin(halfYaw);
00138 btScalar cosPitch = btCos(halfPitch);
00139 btScalar sinPitch = btSin(halfPitch);
00140 btScalar cosRoll = btCos(halfRoll);
00141 btScalar sinRoll = btSin(halfRoll);
00142 setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
00143 cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
00144 cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
00145 cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
00146 }
00149 SIMD_FORCE_INLINE btQuaternion& operator+=(const btQuaternion& q)
00150 {
00151 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00152 mVec128 = _mm_add_ps(mVec128, q.mVec128);
00153 #elif defined(BT_USE_NEON)
00154 mVec128 = vaddq_f32(mVec128, q.mVec128);
00155 #else
00156 m_floats[0] += q.x();
00157 m_floats[1] += q.y();
00158 m_floats[2] += q.z();
00159 m_floats[3] += q.m_floats[3];
00160 #endif
00161 return *this;
00162 }
00163
00166 btQuaternion& operator-=(const btQuaternion& q)
00167 {
00168 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00169 mVec128 = _mm_sub_ps(mVec128, q.mVec128);
00170 #elif defined(BT_USE_NEON)
00171 mVec128 = vsubq_f32(mVec128, q.mVec128);
00172 #else
00173 m_floats[0] -= q.x();
00174 m_floats[1] -= q.y();
00175 m_floats[2] -= q.z();
00176 m_floats[3] -= q.m_floats[3];
00177 #endif
00178 return *this;
00179 }
00180
00183 btQuaternion& operator*=(const btScalar& s)
00184 {
00185 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00186 __m128 vs = _mm_load_ss(&s);
00187 vs = bt_pshufd_ps(vs, 0);
00188 mVec128 = _mm_mul_ps(mVec128, vs);
00189 #elif defined(BT_USE_NEON)
00190 mVec128 = vmulq_n_f32(mVec128, s);
00191 #else
00192 m_floats[0] *= s;
00193 m_floats[1] *= s;
00194 m_floats[2] *= s;
00195 m_floats[3] *= s;
00196 #endif
00197 return *this;
00198 }
00199
00203 btQuaternion& operator*=(const btQuaternion& q)
00204 {
00205 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00206 __m128 vQ2 = q.get128();
00207
00208 __m128 A1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(0,1,2,0));
00209 __m128 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0));
00210
00211 A1 = A1 * B1;
00212
00213 __m128 A2 = bt_pshufd_ps(mVec128, BT_SHUFFLE(1,2,0,1));
00214 __m128 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
00215
00216 A2 = A2 * B2;
00217
00218 B1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(2,0,1,2));
00219 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
00220
00221 B1 = B1 * B2;
00222
00223 mVec128 = bt_splat_ps(mVec128, 3);
00224 mVec128 = mVec128 * vQ2;
00225
00226 A1 = A1 + A2;
00227 mVec128 = mVec128 - B1;
00228 A1 = _mm_xor_ps(A1, vPPPM);
00229 mVec128 = mVec128+ A1;
00230
00231 #elif defined(BT_USE_NEON)
00232
00233 float32x4_t vQ1 = mVec128;
00234 float32x4_t vQ2 = q.get128();
00235 float32x4_t A0, A1, B1, A2, B2, A3, B3;
00236 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
00237
00238 {
00239 float32x2x2_t tmp;
00240 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );
00241 vQ1zx = tmp.val[0];
00242
00243 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );
00244 vQ2zx = tmp.val[0];
00245 }
00246 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
00247
00248 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
00249
00250 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
00251 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
00252
00253 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);
00254 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx);
00255
00256 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
00257 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
00258
00259 A3 = vcombine_f32(vQ1zx, vQ1yz);
00260 B3 = vcombine_f32(vQ2yz, vQ2xz);
00261
00262 A1 = vmulq_f32(A1, B1);
00263 A2 = vmulq_f32(A2, B2);
00264 A3 = vmulq_f32(A3, B3);
00265 A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1);
00266
00267 A1 = vaddq_f32(A1, A2);
00268 A0 = vsubq_f32(A0, A3);
00269
00270
00271 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
00272 A0 = vaddq_f32(A0, A1);
00273
00274 mVec128 = A0;
00275 #else
00276 setValue(
00277 m_floats[3] * q.x() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.z() - m_floats[2] * q.y(),
00278 m_floats[3] * q.y() + m_floats[1] * q.m_floats[3] + m_floats[2] * q.x() - m_floats[0] * q.z(),
00279 m_floats[3] * q.z() + m_floats[2] * q.m_floats[3] + m_floats[0] * q.y() - m_floats[1] * q.x(),
00280 m_floats[3] * q.m_floats[3] - m_floats[0] * q.x() - m_floats[1] * q.y() - m_floats[2] * q.z());
00281 #endif
00282 return *this;
00283 }
00286 btScalar dot(const btQuaternion& q) const
00287 {
00288 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00289 __m128 vd;
00290
00291 vd = _mm_mul_ps(mVec128, q.mVec128);
00292
00293 __m128 t = _mm_movehl_ps(vd, vd);
00294 vd = _mm_add_ps(vd, t);
00295 t = _mm_shuffle_ps(vd, vd, 0x55);
00296 vd = _mm_add_ss(vd, t);
00297
00298 return _mm_cvtss_f32(vd);
00299 #elif defined(BT_USE_NEON)
00300 float32x4_t vd = vmulq_f32(mVec128, q.mVec128);
00301 float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd));
00302 x = vpadd_f32(x, x);
00303 return vget_lane_f32(x, 0);
00304 #else
00305 return m_floats[0] * q.x() +
00306 m_floats[1] * q.y() +
00307 m_floats[2] * q.z() +
00308 m_floats[3] * q.m_floats[3];
00309 #endif
00310 }
00311
00313 btScalar length2() const
00314 {
00315 return dot(*this);
00316 }
00317
00319 btScalar length() const
00320 {
00321 return btSqrt(length2());
00322 }
00323
00326 btQuaternion& normalize()
00327 {
00328 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00329 __m128 vd;
00330
00331 vd = _mm_mul_ps(mVec128, mVec128);
00332
00333 __m128 t = _mm_movehl_ps(vd, vd);
00334 vd = _mm_add_ps(vd, t);
00335 t = _mm_shuffle_ps(vd, vd, 0x55);
00336 vd = _mm_add_ss(vd, t);
00337
00338 vd = _mm_sqrt_ss(vd);
00339 vd = _mm_div_ss(vOnes, vd);
00340 vd = bt_pshufd_ps(vd, 0);
00341 mVec128 = _mm_mul_ps(mVec128, vd);
00342
00343 return *this;
00344 #else
00345 return *this /= length();
00346 #endif
00347 }
00348
00351 SIMD_FORCE_INLINE btQuaternion
00352 operator*(const btScalar& s) const
00353 {
00354 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00355 __m128 vs = _mm_load_ss(&s);
00356 vs = bt_pshufd_ps(vs, 0x00);
00357
00358 return btQuaternion(_mm_mul_ps(mVec128, vs));
00359 #elif defined(BT_USE_NEON)
00360 return btQuaternion(vmulq_n_f32(mVec128, s));
00361 #else
00362 return btQuaternion(x() * s, y() * s, z() * s, m_floats[3] * s);
00363 #endif
00364 }
00365
00368 btQuaternion operator/(const btScalar& s) const
00369 {
00370 btAssert(s != btScalar(0.0));
00371 return *this * (btScalar(1.0) / s);
00372 }
00373
00376 btQuaternion& operator/=(const btScalar& s)
00377 {
00378 btAssert(s != btScalar(0.0));
00379 return *this *= btScalar(1.0) / s;
00380 }
00381
00383 btQuaternion normalized() const
00384 {
00385 return *this / length();
00386 }
00389 btScalar angle(const btQuaternion& q) const
00390 {
00391 btScalar s = btSqrt(length2() * q.length2());
00392 btAssert(s != btScalar(0.0));
00393 return btAcos(dot(q) / s);
00394 }
00396 btScalar getAngle() const
00397 {
00398 btScalar s = btScalar(2.) * btAcos(m_floats[3]);
00399 return s;
00400 }
00401
00403 btVector3 getAxis() const
00404 {
00405 btScalar s_squared = 1.f-m_floats[3]*m_floats[3];
00406
00407 if (s_squared < btScalar(10.) * SIMD_EPSILON)
00408 return btVector3(1.0, 0.0, 0.0);
00409 btScalar s = 1.f/btSqrt(s_squared);
00410 return btVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s);
00411 }
00412
00414 btQuaternion inverse() const
00415 {
00416 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00417 return btQuaternion(_mm_xor_ps(mVec128, vQInv));
00418 #elif defined(BT_USE_NEON)
00419 return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)vQInv));
00420 #else
00421 return btQuaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]);
00422 #endif
00423 }
00424
00427 SIMD_FORCE_INLINE btQuaternion
00428 operator+(const btQuaternion& q2) const
00429 {
00430 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00431 return btQuaternion(_mm_add_ps(mVec128, q2.mVec128));
00432 #elif defined(BT_USE_NEON)
00433 return btQuaternion(vaddq_f32(mVec128, q2.mVec128));
00434 #else
00435 const btQuaternion& q1 = *this;
00436 return btQuaternion(q1.x() + q2.x(), q1.y() + q2.y(), q1.z() + q2.z(), q1.m_floats[3] + q2.m_floats[3]);
00437 #endif
00438 }
00439
00442 SIMD_FORCE_INLINE btQuaternion
00443 operator-(const btQuaternion& q2) const
00444 {
00445 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00446 return btQuaternion(_mm_sub_ps(mVec128, q2.mVec128));
00447 #elif defined(BT_USE_NEON)
00448 return btQuaternion(vsubq_f32(mVec128, q2.mVec128));
00449 #else
00450 const btQuaternion& q1 = *this;
00451 return btQuaternion(q1.x() - q2.x(), q1.y() - q2.y(), q1.z() - q2.z(), q1.m_floats[3] - q2.m_floats[3]);
00452 #endif
00453 }
00454
00457 SIMD_FORCE_INLINE btQuaternion operator-() const
00458 {
00459 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00460 return btQuaternion(_mm_xor_ps(mVec128, btvMzeroMask));
00461 #elif defined(BT_USE_NEON)
00462 return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)btvMzeroMask) );
00463 #else
00464 const btQuaternion& q2 = *this;
00465 return btQuaternion( - q2.x(), - q2.y(), - q2.z(), - q2.m_floats[3]);
00466 #endif
00467 }
00469 SIMD_FORCE_INLINE btQuaternion farthest( const btQuaternion& qd) const
00470 {
00471 btQuaternion diff,sum;
00472 diff = *this - qd;
00473 sum = *this + qd;
00474 if( diff.dot(diff) > sum.dot(sum) )
00475 return qd;
00476 return (-qd);
00477 }
00478
00480 SIMD_FORCE_INLINE btQuaternion nearest( const btQuaternion& qd) const
00481 {
00482 btQuaternion diff,sum;
00483 diff = *this - qd;
00484 sum = *this + qd;
00485 if( diff.dot(diff) < sum.dot(sum) )
00486 return qd;
00487 return (-qd);
00488 }
00489
00490
00495 btQuaternion slerp(const btQuaternion& q, const btScalar& t) const
00496 {
00497 btScalar magnitude = btSqrt(length2() * q.length2());
00498 btAssert(magnitude > btScalar(0));
00499
00500 btScalar product = dot(q) / magnitude;
00501 if (btFabs(product) < btScalar(1))
00502 {
00503
00504 const btScalar sign = (product < 0) ? btScalar(-1) : btScalar(1);
00505
00506 const btScalar theta = btAcos(sign * product);
00507 const btScalar s1 = btSin(sign * t * theta);
00508 const btScalar d = btScalar(1.0) / btSin(theta);
00509 const btScalar s0 = btSin((btScalar(1.0) - t) * theta);
00510
00511 return btQuaternion(
00512 (m_floats[0] * s0 + q.x() * s1) * d,
00513 (m_floats[1] * s0 + q.y() * s1) * d,
00514 (m_floats[2] * s0 + q.z() * s1) * d,
00515 (m_floats[3] * s0 + q.m_floats[3] * s1) * d);
00516 }
00517 else
00518 {
00519 return *this;
00520 }
00521 }
00522
00523 static const btQuaternion& getIdentity()
00524 {
00525 static const btQuaternion identityQuat(btScalar(0.),btScalar(0.),btScalar(0.),btScalar(1.));
00526 return identityQuat;
00527 }
00528
00529 SIMD_FORCE_INLINE const btScalar& getW() const { return m_floats[3]; }
00530
00531
00532 };
00533
00534
00535
00536
00537
00539 SIMD_FORCE_INLINE btQuaternion
00540 operator*(const btQuaternion& q1, const btQuaternion& q2)
00541 {
00542 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00543 __m128 vQ1 = q1.get128();
00544 __m128 vQ2 = q2.get128();
00545 __m128 A0, A1, B1, A2, B2;
00546
00547 A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0));
00548 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0));
00549
00550 A1 = A1 * B1;
00551
00552 A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
00553 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
00554
00555 A2 = A2 * B2;
00556
00557 B1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
00558 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
00559
00560 B1 = B1 * B2;
00561
00562 A0 = bt_splat_ps(vQ1, 3);
00563 A0 = A0 * vQ2;
00564
00565 A1 = A1 + A2;
00566 A0 = A0 - B1;
00567
00568 A1 = _mm_xor_ps(A1, vPPPM);
00569 A0 = A0 + A1;
00570
00571 return btQuaternion(A0);
00572
00573 #elif defined(BT_USE_NEON)
00574
00575 float32x4_t vQ1 = q1.get128();
00576 float32x4_t vQ2 = q2.get128();
00577 float32x4_t A0, A1, B1, A2, B2, A3, B3;
00578 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
00579
00580 {
00581 float32x2x2_t tmp;
00582 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );
00583 vQ1zx = tmp.val[0];
00584
00585 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );
00586 vQ2zx = tmp.val[0];
00587 }
00588 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
00589
00590 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
00591
00592 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
00593 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
00594
00595 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);
00596 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx);
00597
00598 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
00599 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
00600
00601 A3 = vcombine_f32(vQ1zx, vQ1yz);
00602 B3 = vcombine_f32(vQ2yz, vQ2xz);
00603
00604 A1 = vmulq_f32(A1, B1);
00605 A2 = vmulq_f32(A2, B2);
00606 A3 = vmulq_f32(A3, B3);
00607 A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1);
00608
00609 A1 = vaddq_f32(A1, A2);
00610 A0 = vsubq_f32(A0, A3);
00611
00612
00613 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
00614 A0 = vaddq_f32(A0, A1);
00615
00616 return btQuaternion(A0);
00617
00618 #else
00619 return btQuaternion(
00620 q1.w() * q2.x() + q1.x() * q2.w() + q1.y() * q2.z() - q1.z() * q2.y(),
00621 q1.w() * q2.y() + q1.y() * q2.w() + q1.z() * q2.x() - q1.x() * q2.z(),
00622 q1.w() * q2.z() + q1.z() * q2.w() + q1.x() * q2.y() - q1.y() * q2.x(),
00623 q1.w() * q2.w() - q1.x() * q2.x() - q1.y() * q2.y() - q1.z() * q2.z());
00624 #endif
00625 }
00626
00627 SIMD_FORCE_INLINE btQuaternion
00628 operator*(const btQuaternion& q, const btVector3& w)
00629 {
00630 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00631 __m128 vQ1 = q.get128();
00632 __m128 vQ2 = w.get128();
00633 __m128 A1, B1, A2, B2, A3, B3;
00634
00635 A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(3,3,3,0));
00636 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(0,1,2,0));
00637
00638 A1 = A1 * B1;
00639
00640 A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
00641 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
00642
00643 A2 = A2 * B2;
00644
00645 A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
00646 B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
00647
00648 A3 = A3 * B3;
00649
00650 A1 = A1 + A2;
00651 A1 = _mm_xor_ps(A1, vPPPM);
00652 A1 = A1 - A3;
00653
00654 return btQuaternion(A1);
00655
00656 #elif defined(BT_USE_NEON)
00657
00658 float32x4_t vQ1 = q.get128();
00659 float32x4_t vQ2 = w.get128();
00660 float32x4_t A1, B1, A2, B2, A3, B3;
00661 float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz;
00662
00663 vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1);
00664 {
00665 float32x2x2_t tmp;
00666
00667 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );
00668 vQ2zx = tmp.val[0];
00669
00670 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );
00671 vQ1zx = tmp.val[0];
00672 }
00673
00674 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
00675
00676 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
00677 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
00678
00679 A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx);
00680 B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx);
00681
00682 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
00683 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
00684
00685 A3 = vcombine_f32(vQ1zx, vQ1yz);
00686 B3 = vcombine_f32(vQ2yz, vQ2xz);
00687
00688 A1 = vmulq_f32(A1, B1);
00689 A2 = vmulq_f32(A2, B2);
00690 A3 = vmulq_f32(A3, B3);
00691
00692 A1 = vaddq_f32(A1, A2);
00693
00694
00695 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
00696
00697 A1 = vsubq_f32(A1, A3);
00698
00699 return btQuaternion(A1);
00700
00701 #else
00702 return btQuaternion(
00703 q.w() * w.x() + q.y() * w.z() - q.z() * w.y(),
00704 q.w() * w.y() + q.z() * w.x() - q.x() * w.z(),
00705 q.w() * w.z() + q.x() * w.y() - q.y() * w.x(),
00706 -q.x() * w.x() - q.y() * w.y() - q.z() * w.z());
00707 #endif
00708 }
00709
00710 SIMD_FORCE_INLINE btQuaternion
00711 operator*(const btVector3& w, const btQuaternion& q)
00712 {
00713 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00714 __m128 vQ1 = w.get128();
00715 __m128 vQ2 = q.get128();
00716 __m128 A1, B1, A2, B2, A3, B3;
00717
00718 A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0));
00719 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0));
00720
00721 A1 = A1 * B1;
00722
00723 A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
00724 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
00725
00726 A2 = A2 *B2;
00727
00728 A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
00729 B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
00730
00731 A3 = A3 * B3;
00732
00733 A1 = A1 + A2;
00734 A1 = _mm_xor_ps(A1, vPPPM);
00735 A1 = A1 - A3;
00736
00737 return btQuaternion(A1);
00738
00739 #elif defined(BT_USE_NEON)
00740
00741 float32x4_t vQ1 = w.get128();
00742 float32x4_t vQ2 = q.get128();
00743 float32x4_t A1, B1, A2, B2, A3, B3;
00744 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
00745
00746 {
00747 float32x2x2_t tmp;
00748
00749 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );
00750 vQ1zx = tmp.val[0];
00751
00752 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );
00753 vQ2zx = tmp.val[0];
00754 }
00755 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
00756
00757 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
00758
00759 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
00760 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
00761
00762 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);
00763 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx);
00764
00765 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
00766 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
00767
00768 A3 = vcombine_f32(vQ1zx, vQ1yz);
00769 B3 = vcombine_f32(vQ2yz, vQ2xz);
00770
00771 A1 = vmulq_f32(A1, B1);
00772 A2 = vmulq_f32(A2, B2);
00773 A3 = vmulq_f32(A3, B3);
00774
00775 A1 = vaddq_f32(A1, A2);
00776
00777
00778 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
00779
00780 A1 = vsubq_f32(A1, A3);
00781
00782 return btQuaternion(A1);
00783
00784 #else
00785 return btQuaternion(
00786 +w.x() * q.w() + w.y() * q.z() - w.z() * q.y(),
00787 +w.y() * q.w() + w.z() * q.x() - w.x() * q.z(),
00788 +w.z() * q.w() + w.x() * q.y() - w.y() * q.x(),
00789 -w.x() * q.x() - w.y() * q.y() - w.z() * q.z());
00790 #endif
00791 }
00792
00794 SIMD_FORCE_INLINE btScalar
00795 dot(const btQuaternion& q1, const btQuaternion& q2)
00796 {
00797 return q1.dot(q2);
00798 }
00799
00800
00802 SIMD_FORCE_INLINE btScalar
00803 length(const btQuaternion& q)
00804 {
00805 return q.length();
00806 }
00807
00809 SIMD_FORCE_INLINE btScalar
00810 btAngle(const btQuaternion& q1, const btQuaternion& q2)
00811 {
00812 return q1.angle(q2);
00813 }
00814
00816 SIMD_FORCE_INLINE btQuaternion
00817 inverse(const btQuaternion& q)
00818 {
00819 return q.inverse();
00820 }
00821
00827 SIMD_FORCE_INLINE btQuaternion
00828 slerp(const btQuaternion& q1, const btQuaternion& q2, const btScalar& t)
00829 {
00830 return q1.slerp(q2, t);
00831 }
00832
00833 SIMD_FORCE_INLINE btVector3
00834 quatRotate(const btQuaternion& rotation, const btVector3& v)
00835 {
00836 btQuaternion q = rotation * v;
00837 q *= rotation.inverse();
00838 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
00839 return btVector3(_mm_and_ps(q.get128(), btvFFF0fMask));
00840 #elif defined(BT_USE_NEON)
00841 return btVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), btvFFF0Mask));
00842 #else
00843 return btVector3(q.getX(),q.getY(),q.getZ());
00844 #endif
00845 }
00846
00847 SIMD_FORCE_INLINE btQuaternion
00848 shortestArcQuat(const btVector3& v0, const btVector3& v1)
00849 {
00850 btVector3 c = v0.cross(v1);
00851 btScalar d = v0.dot(v1);
00852
00853 if (d < -1.0 + SIMD_EPSILON)
00854 {
00855 btVector3 n,unused;
00856 btPlaneSpace1(v0,n,unused);
00857 return btQuaternion(n.x(),n.y(),n.z(),0.0f);
00858 }
00859
00860 btScalar s = btSqrt((1.0f + d) * 2.0f);
00861 btScalar rs = 1.0f / s;
00862
00863 return btQuaternion(c.getX()*rs,c.getY()*rs,c.getZ()*rs,s * 0.5f);
00864 }
00865
00866 SIMD_FORCE_INLINE btQuaternion
00867 shortestArcQuatNormalize2(btVector3& v0,btVector3& v1)
00868 {
00869 v0.normalize();
00870 v1.normalize();
00871 return shortestArcQuat(v0,v1);
00872 }
00873
00874 #endif //BT_SIMD__QUATERNION_H_
00875
00876
00877