sse/vec_aos.h

Go to the documentation of this file.
00001 /*
00002    Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
00003    All rights reserved.
00004 
00005    Redistribution and use in source and binary forms,
00006    with or without modification, are permitted provided that the
00007    following conditions are met:
00008     * Redistributions of source code must retain the above copyright
00009       notice, this list of conditions and the following disclaimer.
00010     * Redistributions in binary form must reproduce the above copyright
00011       notice, this list of conditions and the following disclaimer in the
00012       documentation and/or other materials provided with the distribution.
00013     * Neither the name of the Sony Computer Entertainment Inc nor the names
00014       of its contributors may be used to endorse or promote products derived
00015       from this software without specific prior written permission.
00016 
00017    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00018    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00019    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00020    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
00021    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00022    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00023    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00024    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00025    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00026    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00027    POSSIBILITY OF SUCH DAMAGE.
00028 */
00029 
00030 #ifndef _VECTORMATH_VEC_AOS_CPP_H
00031 #define _VECTORMATH_VEC_AOS_CPP_H
00032 
00033 //-----------------------------------------------------------------------------
00034 // Constants
00035 // for permutes words are labeled [x,y,z,w] [a,b,c,d]
00036 
00037 #define _VECTORMATH_PERM_X 0x00010203
00038 #define _VECTORMATH_PERM_Y 0x04050607
00039 #define _VECTORMATH_PERM_Z 0x08090a0b
00040 #define _VECTORMATH_PERM_W 0x0c0d0e0f
00041 #define _VECTORMATH_PERM_A 0x10111213
00042 #define _VECTORMATH_PERM_B 0x14151617
00043 #define _VECTORMATH_PERM_C 0x18191a1b
00044 #define _VECTORMATH_PERM_D 0x1c1d1e1f
00045 #define _VECTORMATH_PERM_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A }
00046 #define _VECTORMATH_PERM_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_W }
00047 #define _VECTORMATH_PERM_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W }
00048 #define _VECTORMATH_PERM_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B }
00049 #define _VECTORMATH_PERM_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B, _VECTORMATH_PERM_C }
00050 #define _VECTORMATH_PERM_XYAW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_W }
00051 #define _VECTORMATH_PERM_XAZW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W }
00052 #define _VECTORMATH_MASK_0xF000 (vec_uint4){ 0xffffffff, 0, 0, 0 }
00053 #define _VECTORMATH_MASK_0x0F00 (vec_uint4){ 0, 0xffffffff, 0, 0 }
00054 #define _VECTORMATH_MASK_0x00F0 (vec_uint4){ 0, 0, 0xffffffff, 0 }
00055 #define _VECTORMATH_MASK_0x000F (vec_uint4){ 0, 0, 0, 0xffffffff }
00056 #define _VECTORMATH_UNIT_1000 _mm_setr_ps(1.0f,0.0f,0.0f,0.0f) // (__m128){ 1.0f, 0.0f, 0.0f, 0.0f }
00057 #define _VECTORMATH_UNIT_0100 _mm_setr_ps(0.0f,1.0f,0.0f,0.0f) // (__m128){ 0.0f, 1.0f, 0.0f, 0.0f }
00058 #define _VECTORMATH_UNIT_0010 _mm_setr_ps(0.0f,0.0f,1.0f,0.0f) // (__m128){ 0.0f, 0.0f, 1.0f, 0.0f }
00059 #define _VECTORMATH_UNIT_0001 _mm_setr_ps(0.0f,0.0f,0.0f,1.0f) // (__m128){ 0.0f, 0.0f, 0.0f, 1.0f }
00060 #define _VECTORMATH_SLERP_TOL 0.999f
00061 //_VECTORMATH_SLERP_TOLF
00062 
00063 //-----------------------------------------------------------------------------
00064 // Definitions
00065 
00066 #ifndef _VECTORMATH_INTERNAL_FUNCTIONS
00067 #define _VECTORMATH_INTERNAL_FUNCTIONS
00068 
00069 #define     _vmath_shufps(a, b, immx, immy, immz, immw) _mm_shuffle_ps(a, b, _MM_SHUFFLE(immw, immz, immy, immx))
00070 static VECTORMATH_FORCE_INLINE __m128 _vmathVfDot3( __m128 vec0, __m128 vec1 )
00071 {
00072         __m128 result = _mm_mul_ps( vec0, vec1);
00073     return _mm_add_ps( vec_splat( result, 0 ), _mm_add_ps( vec_splat( result, 1 ), vec_splat( result, 2 ) ) );
00074 }
00075 
00076 static VECTORMATH_FORCE_INLINE __m128 _vmathVfDot4( __m128 vec0, __m128 vec1 )
00077 {
00078     __m128 result = _mm_mul_ps(vec0, vec1);
00079         return _mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(0,0,0,0)),
00080                         _mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(1,1,1,1)),
00081                         _mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(2,2,2,2)), _mm_shuffle_ps(result, result, _MM_SHUFFLE(3,3,3,3)))));
00082 }
00083 
00084 static VECTORMATH_FORCE_INLINE __m128 _vmathVfCross( __m128 vec0, __m128 vec1 )
00085 {
00086     __m128 tmp0, tmp1, tmp2, tmp3, result;
00087     tmp0 = _mm_shuffle_ps( vec0, vec0, _MM_SHUFFLE(3,0,2,1) );
00088     tmp1 = _mm_shuffle_ps( vec1, vec1, _MM_SHUFFLE(3,1,0,2) );
00089     tmp2 = _mm_shuffle_ps( vec0, vec0, _MM_SHUFFLE(3,1,0,2) );
00090     tmp3 = _mm_shuffle_ps( vec1, vec1, _MM_SHUFFLE(3,0,2,1) );
00091     result = vec_mul( tmp0, tmp1 );
00092     result = vec_nmsub( tmp2, tmp3, result );
00093     return result;
00094 }
00095 /*
00096 static VECTORMATH_FORCE_INLINE vec_uint4 _vmathVfToHalfFloatsUnpacked(__m128 v)
00097 {
00098 #if 0
00099     vec_int4 bexp;
00100     vec_uint4 mant, sign, hfloat;
00101     vec_uint4 notZero, isInf;
00102     const vec_uint4 hfloatInf = (vec_uint4)(0x00007c00u);
00103     const vec_uint4 mergeMant = (vec_uint4)(0x000003ffu);
00104     const vec_uint4 mergeSign = (vec_uint4)(0x00008000u);
00105 
00106     sign = vec_sr((vec_uint4)v, (vec_uint4)16);
00107     mant = vec_sr((vec_uint4)v, (vec_uint4)13);
00108     bexp = vec_and(vec_sr((vec_int4)v, (vec_uint4)23), (vec_int4)0xff);
00109 
00110     notZero = (vec_uint4)vec_cmpgt(bexp, (vec_int4)112);
00111     isInf = (vec_uint4)vec_cmpgt(bexp, (vec_int4)142);
00112 
00113     bexp = _mm_add_ps(bexp, (vec_int4)-112);
00114     bexp = vec_sl(bexp, (vec_uint4)10);
00115 
00116     hfloat = vec_sel((vec_uint4)bexp, mant, mergeMant);
00117     hfloat = vec_sel((vec_uint4)(0), hfloat, notZero);
00118     hfloat = vec_sel(hfloat, hfloatInf, isInf);
00119     hfloat = vec_sel(hfloat, sign, mergeSign);
00120 
00121     return hfloat;
00122 #else
00123         assert(0);
00124         return _mm_setzero_ps();
00125 #endif
00126 }
00127 
00128 static VECTORMATH_FORCE_INLINE vec_ushort8 _vmath2VfToHalfFloats(__m128 u, __m128 v)
00129 {
00130 #if 0
00131     vec_uint4 hfloat_u, hfloat_v;
00132     const vec_uchar16 pack = (vec_uchar16){2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31};
00133     hfloat_u = _vmathVfToHalfFloatsUnpacked(u);
00134     hfloat_v = _vmathVfToHalfFloatsUnpacked(v);
00135     return (vec_ushort8)vec_perm(hfloat_u, hfloat_v, pack);
00136 #else
00137         assert(0);
00138         return _mm_setzero_si128();
00139 #endif
00140 }
00141 */
00142 
00143 static VECTORMATH_FORCE_INLINE __m128 _vmathVfInsert(__m128 dst, __m128 src, int slot)
00144 {
00145         SSEFloat s;
00146         s.m128 = src;
00147         SSEFloat d;
00148         d.m128 = dst;
00149         d.f[slot] = s.f[slot];
00150         return d.m128;
00151 }
00152 
00153 #define _vmathVfSetElement(vec, scalar, slot) ((float *)&(vec))[slot] = scalar
00154 
00155 static VECTORMATH_FORCE_INLINE __m128 _vmathVfSplatScalar(float scalar)
00156 {
00157         return _mm_set1_ps(scalar);
00158 }
00159 
00160 #endif
00161 
00162 namespace Vectormath {
00163 namespace Aos {
00164 
00165         
00166 #ifdef _VECTORMATH_NO_SCALAR_CAST
00167 VECTORMATH_FORCE_INLINE VecIdx::operator floatInVec() const
00168 {
00169     return floatInVec(ref, i);
00170 }
00171 
00172 VECTORMATH_FORCE_INLINE float VecIdx::getAsFloat() const
00173 #else
00174 VECTORMATH_FORCE_INLINE VecIdx::operator float() const
00175 #endif
00176 {
00177     return ((float *)&ref)[i];
00178 }
00179 
00180 VECTORMATH_FORCE_INLINE float VecIdx::operator =( float scalar )
00181 {
00182     _vmathVfSetElement(ref, scalar, i);
00183     return scalar;
00184 }
00185 
00186 VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator =( const floatInVec &scalar )
00187 {
00188     ref = _vmathVfInsert(ref, scalar.get128(), i);
00189     return scalar;
00190 }
00191 
00192 VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator =( const VecIdx& scalar )
00193 {
00194     return *this = floatInVec(scalar.ref, scalar.i);
00195 }
00196 
00197 VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator *=( float scalar )
00198 {
00199     return *this *= floatInVec(scalar);
00200 }
00201 
00202 VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator *=( const floatInVec &scalar )
00203 {
00204     return *this = floatInVec(ref, i) * scalar;
00205 }
00206 
00207 VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator /=( float scalar )
00208 {
00209     return *this /= floatInVec(scalar);
00210 }
00211 
00212 inline floatInVec VecIdx::operator /=( const floatInVec &scalar )
00213 {
00214     return *this = floatInVec(ref, i) / scalar;
00215 }
00216 
00217 VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator +=( float scalar )
00218 {
00219     return *this += floatInVec(scalar);
00220 }
00221 
00222 VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator +=( const floatInVec &scalar )
00223 {
00224     return *this = floatInVec(ref, i) + scalar;
00225 }
00226 
00227 VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator -=( float scalar )
00228 {
00229     return *this -= floatInVec(scalar);
00230 }
00231 
00232 VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator -=( const floatInVec &scalar )
00233 {
00234     return *this = floatInVec(ref, i) - scalar;
00235 }
00236 
00237 VECTORMATH_FORCE_INLINE Vector3::Vector3(const Vector3& vec)
00238 {
00239     set128(vec.get128());
00240 }
00241 
00242 VECTORMATH_FORCE_INLINE void Vector3::set128(vec_float4 vec)
00243 {
00244     mVec128 = vec;
00245 }
00246 
00247 
00248 VECTORMATH_FORCE_INLINE Vector3::Vector3( float _x, float _y, float _z )
00249 {
00250     mVec128 = _mm_setr_ps(_x, _y, _z, 0.0f);
00251 }
00252 
00253 VECTORMATH_FORCE_INLINE Vector3::Vector3( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z )
00254 {
00255         __m128 xz = _mm_unpacklo_ps( _x.get128(), _z.get128() );
00256         mVec128 = _mm_unpacklo_ps( xz, _y.get128() );
00257 }
00258 
00259 VECTORMATH_FORCE_INLINE Vector3::Vector3( const Point3 &pnt )
00260 {
00261     mVec128 = pnt.get128();
00262 }
00263 
00264 VECTORMATH_FORCE_INLINE Vector3::Vector3( float scalar )
00265 {
00266     mVec128 = floatInVec(scalar).get128();
00267 }
00268 
00269 VECTORMATH_FORCE_INLINE Vector3::Vector3( const floatInVec &scalar )
00270 {
00271     mVec128 = scalar.get128();
00272 }
00273 
00274 VECTORMATH_FORCE_INLINE Vector3::Vector3( __m128 vf4 )
00275 {
00276     mVec128 = vf4;
00277 }
00278 
00279 VECTORMATH_FORCE_INLINE const Vector3 Vector3::xAxis( )
00280 {
00281     return Vector3( _VECTORMATH_UNIT_1000 );
00282 }
00283 
00284 VECTORMATH_FORCE_INLINE const Vector3 Vector3::yAxis( )
00285 {
00286     return Vector3( _VECTORMATH_UNIT_0100 );
00287 }
00288 
00289 VECTORMATH_FORCE_INLINE const Vector3 Vector3::zAxis( )
00290 {
00291     return Vector3( _VECTORMATH_UNIT_0010 );
00292 }
00293 
00294 VECTORMATH_FORCE_INLINE const Vector3 lerp( float t, const Vector3 &vec0, const Vector3 &vec1 )
00295 {
00296     return lerp( floatInVec(t), vec0, vec1 );
00297 }
00298 
00299 VECTORMATH_FORCE_INLINE const Vector3 lerp( const floatInVec &t, const Vector3 &vec0, const Vector3 &vec1 )
00300 {
00301     return ( vec0 + ( ( vec1 - vec0 ) * t ) );
00302 }
00303 
00304 VECTORMATH_FORCE_INLINE const Vector3 slerp( float t, const Vector3 &unitVec0, const Vector3 &unitVec1 )
00305 {
00306     return slerp( floatInVec(t), unitVec0, unitVec1 );
00307 }
00308 
00309 VECTORMATH_FORCE_INLINE const Vector3 slerp( const floatInVec &t, const Vector3 &unitVec0, const Vector3 &unitVec1 )
00310 {
00311     __m128 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
00312     cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() );
00313     __m128 selectMask = _mm_cmpgt_ps( _mm_set1_ps(_VECTORMATH_SLERP_TOL), cosAngle );
00314     angle = acosf4( cosAngle );
00315     tttt = t.get128();
00316     oneMinusT = _mm_sub_ps( _mm_set1_ps(1.0f), tttt );
00317     angles = _mm_unpacklo_ps( _mm_set1_ps(1.0f), tttt ); // angles = 1, t, 1, t
00318     angles = _mm_unpacklo_ps( angles, oneMinusT );              // angles = 1, 1-t, t, 1-t
00319     angles = _mm_mul_ps( angles, angle );
00320     sines = sinf4( angles );
00321     scales = _mm_div_ps( sines, vec_splat( sines, 0 ) );
00322     scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
00323     scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
00324     return Vector3( vec_madd( unitVec0.get128(), scale0, _mm_mul_ps( unitVec1.get128(), scale1 ) ) );
00325 }
00326 
00327 VECTORMATH_FORCE_INLINE __m128 Vector3::get128( ) const
00328 {
00329     return mVec128;
00330 }
00331 
00332 VECTORMATH_FORCE_INLINE void loadXYZ(Point3& vec, const float* fptr)
00333 {
00334 #ifdef USE_SSE3_LDDQU
00335         vec = Point3(   SSEFloat(_mm_lddqu_si128((const __m128i*)((float*)(fptr)))).m128 );
00336 #else
00337         SSEFloat fl;
00338         fl.f[0] = fptr[0];
00339         fl.f[1] = fptr[1];
00340         fl.f[2] = fptr[2];
00341         fl.f[3] = fptr[3];
00342     vec = Point3(       fl.m128);
00343 #endif //USE_SSE3_LDDQU
00344         
00345 }
00346 
00347 
00348 
00349 VECTORMATH_FORCE_INLINE void loadXYZ(Vector3& vec, const float* fptr)
00350 {
00351 #ifdef USE_SSE3_LDDQU
00352         vec = Vector3(  SSEFloat(_mm_lddqu_si128((const __m128i*)((float*)(fptr)))).m128 );
00353 #else
00354         SSEFloat fl;
00355         fl.f[0] = fptr[0];
00356         fl.f[1] = fptr[1];
00357         fl.f[2] = fptr[2];
00358         fl.f[3] = fptr[3];
00359     vec = Vector3(      fl.m128);
00360 #endif //USE_SSE3_LDDQU
00361         
00362 }
00363 
00364 VECTORMATH_FORCE_INLINE void storeXYZ( const Vector3 &vec, __m128 * quad )
00365 {
00366         __m128 dstVec = *quad;
00367         VM_ATTRIBUTE_ALIGN16  unsigned int sw[4] = {0, 0, 0, 0xffffffff}; // TODO: Centralize
00368         dstVec = vec_sel(vec.get128(), dstVec, sw);
00369         *quad = dstVec;
00370 }
00371 
00372 VECTORMATH_FORCE_INLINE void storeXYZ(const Point3& vec, float* fptr)
00373 {
00374         fptr[0] = vec.getX();
00375         fptr[1] = vec.getY();
00376         fptr[2] = vec.getZ();
00377 }
00378 
00379 VECTORMATH_FORCE_INLINE void storeXYZ(const Vector3& vec, float* fptr)
00380 {
00381         fptr[0] = vec.getX();
00382         fptr[1] = vec.getY();
00383         fptr[2] = vec.getZ();
00384 }
00385 
00386 
00387 VECTORMATH_FORCE_INLINE void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const __m128 * threeQuads )
00388 {
00389         const float *quads = (float *)threeQuads;
00390     vec0 = Vector3(  _mm_load_ps(quads) );
00391     vec1 = Vector3( _mm_loadu_ps(quads + 3) );
00392     vec2 = Vector3( _mm_loadu_ps(quads + 6) );
00393     vec3 = Vector3( _mm_loadu_ps(quads + 9) );
00394 }
00395 
00396 VECTORMATH_FORCE_INLINE void storeXYZArray( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, __m128 * threeQuads )
00397 {
00398         __m128 xxxx = _mm_shuffle_ps( vec1.get128(), vec1.get128(), _MM_SHUFFLE(0, 0, 0, 0) );
00399         __m128 zzzz = _mm_shuffle_ps( vec2.get128(), vec2.get128(), _MM_SHUFFLE(2, 2, 2, 2) );
00400         VM_ATTRIBUTE_ALIGN16 unsigned int xsw[4] = {0, 0, 0, 0xffffffff};
00401         VM_ATTRIBUTE_ALIGN16 unsigned int zsw[4] = {0xffffffff, 0, 0, 0};
00402         threeQuads[0] = vec_sel( vec0.get128(), xxxx, xsw );
00403     threeQuads[1] = _mm_shuffle_ps( vec1.get128(), vec2.get128(), _MM_SHUFFLE(1, 0, 2, 1) );
00404     threeQuads[2] = vec_sel( _mm_shuffle_ps( vec3.get128(), vec3.get128(), _MM_SHUFFLE(2, 1, 0, 3) ), zzzz, zsw );
00405 }
00406 /*
00407 VECTORMATH_FORCE_INLINE void storeHalfFloats( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, const Vector3 &vec4, const Vector3 &vec5, const Vector3 &vec6, const Vector3 &vec7, vec_ushort8 * threeQuads )
00408 {
00409         assert(0);
00410 #if 0
00411     __m128 xyz0[3];
00412     __m128 xyz1[3];
00413     storeXYZArray( vec0, vec1, vec2, vec3, xyz0 );
00414     storeXYZArray( vec4, vec5, vec6, vec7, xyz1 );
00415     threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
00416     threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
00417     threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
00418 #endif
00419 }
00420 */
00421 VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator =( const Vector3 &vec )
00422 {
00423     mVec128 = vec.mVec128;
00424     return *this;
00425 }
00426 
00427 VECTORMATH_FORCE_INLINE Vector3 & Vector3::setX( float _x )
00428 {
00429     _vmathVfSetElement(mVec128, _x, 0);
00430     return *this;
00431 }
00432 
00433 VECTORMATH_FORCE_INLINE Vector3 & Vector3::setX( const floatInVec &_x )
00434 {
00435     mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
00436     return *this;
00437 }
00438 
00439 VECTORMATH_FORCE_INLINE const floatInVec Vector3::getX( ) const
00440 {
00441     return floatInVec( mVec128, 0 );
00442 }
00443 
00444 VECTORMATH_FORCE_INLINE Vector3 & Vector3::setY( float _y )
00445 {
00446     _vmathVfSetElement(mVec128, _y, 1);
00447     return *this;
00448 }
00449 
00450 VECTORMATH_FORCE_INLINE Vector3 & Vector3::setY( const floatInVec &_y )
00451 {
00452     mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
00453     return *this;
00454 }
00455 
00456 VECTORMATH_FORCE_INLINE const floatInVec Vector3::getY( ) const
00457 {
00458     return floatInVec( mVec128, 1 );
00459 }
00460 
00461 VECTORMATH_FORCE_INLINE Vector3 & Vector3::setZ( float _z )
00462 {
00463     _vmathVfSetElement(mVec128, _z, 2);
00464     return *this;
00465 }
00466 
00467 VECTORMATH_FORCE_INLINE Vector3 & Vector3::setZ( const floatInVec &_z )
00468 {
00469     mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
00470     return *this;
00471 }
00472 
00473 VECTORMATH_FORCE_INLINE const floatInVec Vector3::getZ( ) const
00474 {
00475     return floatInVec( mVec128, 2 );
00476 }
00477 
00478 VECTORMATH_FORCE_INLINE Vector3 & Vector3::setElem( int idx, float value )
00479 {
00480     _vmathVfSetElement(mVec128, value, idx);
00481     return *this;
00482 }
00483 
00484 VECTORMATH_FORCE_INLINE Vector3 & Vector3::setElem( int idx, const floatInVec &value )
00485 {
00486     mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
00487     return *this;
00488 }
00489 
00490 VECTORMATH_FORCE_INLINE const floatInVec Vector3::getElem( int idx ) const
00491 {
00492     return floatInVec( mVec128, idx );
00493 }
00494 
00495 VECTORMATH_FORCE_INLINE VecIdx Vector3::operator []( int idx )
00496 {
00497     return VecIdx( mVec128, idx );
00498 }
00499 
00500 VECTORMATH_FORCE_INLINE const floatInVec Vector3::operator []( int idx ) const
00501 {
00502     return floatInVec( mVec128, idx );
00503 }
00504 
00505 VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator +( const Vector3 &vec ) const
00506 {
00507     return Vector3( _mm_add_ps( mVec128, vec.mVec128 ) );
00508 }
00509 
00510 VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator -( const Vector3 &vec ) const
00511 {
00512     return Vector3( _mm_sub_ps( mVec128, vec.mVec128 ) );
00513 }
00514 
00515 VECTORMATH_FORCE_INLINE const Point3 Vector3::operator +( const Point3 &pnt ) const
00516 {
00517     return Point3( _mm_add_ps( mVec128, pnt.get128() ) );
00518 }
00519 
00520 VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator *( float scalar ) const
00521 {
00522     return *this * floatInVec(scalar);
00523 }
00524 
00525 VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator *( const floatInVec &scalar ) const
00526 {
00527     return Vector3( _mm_mul_ps( mVec128, scalar.get128() ) );
00528 }
00529 
00530 VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator +=( const Vector3 &vec )
00531 {
00532     *this = *this + vec;
00533     return *this;
00534 }
00535 
00536 VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator -=( const Vector3 &vec )
00537 {
00538     *this = *this - vec;
00539     return *this;
00540 }
00541 
00542 VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator *=( float scalar )
00543 {
00544     *this = *this * scalar;
00545     return *this;
00546 }
00547 
00548 VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator *=( const floatInVec &scalar )
00549 {
00550     *this = *this * scalar;
00551     return *this;
00552 }
00553 
00554 VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator /( float scalar ) const
00555 {
00556     return *this / floatInVec(scalar);
00557 }
00558 
00559 VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator /( const floatInVec &scalar ) const
00560 {
00561     return Vector3( _mm_div_ps( mVec128, scalar.get128() ) );
00562 }
00563 
00564 VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator /=( float scalar )
00565 {
00566     *this = *this / scalar;
00567     return *this;
00568 }
00569 
00570 VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator /=( const floatInVec &scalar )
00571 {
00572     *this = *this / scalar;
00573     return *this;
00574 }
00575 
00576 VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator -( ) const
00577 {
00578         //return Vector3(_mm_sub_ps( _mm_setzero_ps(), mVec128 ) );
00579 
00580         VM_ATTRIBUTE_ALIGN16 static const int array[] = {0x80000000, 0x80000000, 0x80000000, 0x80000000};
00581         __m128 NEG_MASK = SSEFloat(*(const vec_float4*)array).vf;
00582         return Vector3(_mm_xor_ps(get128(),NEG_MASK));
00583 }
00584 
00585 VECTORMATH_FORCE_INLINE const Vector3 operator *( float scalar, const Vector3 &vec )
00586 {
00587     return floatInVec(scalar) * vec;
00588 }
00589 
00590 VECTORMATH_FORCE_INLINE const Vector3 operator *( const floatInVec &scalar, const Vector3 &vec )
00591 {
00592     return vec * scalar;
00593 }
00594 
00595 VECTORMATH_FORCE_INLINE const Vector3 mulPerElem( const Vector3 &vec0, const Vector3 &vec1 )
00596 {
00597     return Vector3( _mm_mul_ps( vec0.get128(), vec1.get128() ) );
00598 }
00599 
00600 VECTORMATH_FORCE_INLINE const Vector3 divPerElem( const Vector3 &vec0, const Vector3 &vec1 )
00601 {
00602     return Vector3( _mm_div_ps( vec0.get128(), vec1.get128() ) );
00603 }
00604 
00605 VECTORMATH_FORCE_INLINE const Vector3 recipPerElem( const Vector3 &vec )
00606 {
00607     return Vector3( _mm_rcp_ps( vec.get128() ) );
00608 }
00609 
00610 VECTORMATH_FORCE_INLINE const Vector3 absPerElem( const Vector3 &vec )
00611 {
00612     return Vector3( fabsf4( vec.get128() ) );
00613 }
00614 
00615 VECTORMATH_FORCE_INLINE const Vector3 copySignPerElem( const Vector3 &vec0, const Vector3 &vec1 )
00616 {
00617         __m128 vmask = toM128(0x7fffffff);
00618         return Vector3( _mm_or_ps(
00619                 _mm_and_ps   ( vmask, vec0.get128() ),                  // Value
00620                 _mm_andnot_ps( vmask, vec1.get128() ) ) );              // Signs
00621 }
00622 
00623 VECTORMATH_FORCE_INLINE const Vector3 maxPerElem( const Vector3 &vec0, const Vector3 &vec1 )
00624 {
00625     return Vector3( _mm_max_ps( vec0.get128(), vec1.get128() ) );
00626 }
00627 
00628 VECTORMATH_FORCE_INLINE const floatInVec maxElem( const Vector3 &vec )
00629 {
00630     return floatInVec( _mm_max_ps( _mm_max_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), vec_splat( vec.get128(), 2 ) ) );
00631 }
00632 
00633 VECTORMATH_FORCE_INLINE const Vector3 minPerElem( const Vector3 &vec0, const Vector3 &vec1 )
00634 {
00635     return Vector3( _mm_min_ps( vec0.get128(), vec1.get128() ) );
00636 }
00637 
00638 VECTORMATH_FORCE_INLINE const floatInVec minElem( const Vector3 &vec )
00639 {
00640     return floatInVec( _mm_min_ps( _mm_min_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), vec_splat( vec.get128(), 2 ) ) );
00641 }
00642 
00643 VECTORMATH_FORCE_INLINE const floatInVec sum( const Vector3 &vec )
00644 {
00645     return floatInVec( _mm_add_ps( _mm_add_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), vec_splat( vec.get128(), 2 ) ) );
00646 }
00647 
00648 VECTORMATH_FORCE_INLINE const floatInVec dot( const Vector3 &vec0, const Vector3 &vec1 )
00649 {
00650     return floatInVec( _vmathVfDot3( vec0.get128(), vec1.get128() ), 0 );
00651 }
00652 
00653 VECTORMATH_FORCE_INLINE const floatInVec lengthSqr( const Vector3 &vec )
00654 {
00655     return floatInVec(  _vmathVfDot3( vec.get128(), vec.get128() ), 0 );
00656 }
00657 
00658 VECTORMATH_FORCE_INLINE const floatInVec length( const Vector3 &vec )
00659 {
00660     return floatInVec(  _mm_sqrt_ps(_vmathVfDot3( vec.get128(), vec.get128() )), 0 );
00661 }
00662 
00663 
00664 VECTORMATH_FORCE_INLINE const Vector3 normalizeApprox( const Vector3 &vec )
00665 {
00666     return Vector3( _mm_mul_ps( vec.get128(), _mm_rsqrt_ps( _vmathVfDot3( vec.get128(), vec.get128() ) ) ) );
00667 }
00668 
00669 VECTORMATH_FORCE_INLINE const Vector3 normalize( const Vector3 &vec )
00670 {
00671         return Vector3( _mm_mul_ps( vec.get128(), newtonrapson_rsqrt4( _vmathVfDot3( vec.get128(), vec.get128() ) ) ) );
00672 }
00673 
00674 VECTORMATH_FORCE_INLINE const Vector3 cross( const Vector3 &vec0, const Vector3 &vec1 )
00675 {
00676     return Vector3( _vmathVfCross( vec0.get128(), vec1.get128() ) );
00677 }
00678 
00679 VECTORMATH_FORCE_INLINE const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, bool select1 )
00680 {
00681     return select( vec0, vec1, boolInVec(select1) );
00682 }
00683 
00684 
00685 VECTORMATH_FORCE_INLINE  const Vector4 select(const Vector4& vec0, const Vector4& vec1, const boolInVec& select1)
00686 {
00687     return Vector4(vec_sel(vec0.get128(), vec1.get128(), select1.get128()));
00688 }
00689 
00690 #ifdef _VECTORMATH_DEBUG
00691 
00692 VECTORMATH_FORCE_INLINE void print( const Vector3 &vec )
00693 {
00694     union { __m128 v; float s[4]; } tmp;
00695     tmp.v = vec.get128();
00696     printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
00697 }
00698 
00699 VECTORMATH_FORCE_INLINE void print( const Vector3 &vec, const char * name )
00700 {
00701     union { __m128 v; float s[4]; } tmp;
00702     tmp.v = vec.get128();
00703     printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
00704 }
00705 
00706 #endif
00707 
00708 VECTORMATH_FORCE_INLINE Vector4::Vector4( float _x, float _y, float _z, float _w )
00709 {
00710     mVec128 = _mm_setr_ps(_x, _y, _z, _w); 
00711  }
00712 
00713 VECTORMATH_FORCE_INLINE Vector4::Vector4( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z, const floatInVec &_w )
00714 {
00715         mVec128 = _mm_unpacklo_ps(
00716                 _mm_unpacklo_ps( _x.get128(), _z.get128() ),
00717                 _mm_unpacklo_ps( _y.get128(), _w.get128() ) );
00718 }
00719 
00720 VECTORMATH_FORCE_INLINE Vector4::Vector4( const Vector3 &xyz, float _w )
00721 {
00722     mVec128 = xyz.get128();
00723     _vmathVfSetElement(mVec128, _w, 3);
00724 }
00725 
00726 VECTORMATH_FORCE_INLINE Vector4::Vector4( const Vector3 &xyz, const floatInVec &_w )
00727 {
00728     mVec128 = xyz.get128();
00729     mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
00730 }
00731 
00732 VECTORMATH_FORCE_INLINE Vector4::Vector4( const Vector3 &vec )
00733 {
00734     mVec128 = vec.get128();
00735     mVec128 = _vmathVfInsert(mVec128, _mm_setzero_ps(), 3);
00736 }
00737 
00738 VECTORMATH_FORCE_INLINE Vector4::Vector4( const Point3 &pnt )
00739 {
00740     mVec128 = pnt.get128();
00741     mVec128 = _vmathVfInsert(mVec128, _mm_set1_ps(1.0f), 3);
00742 }
00743 
00744 VECTORMATH_FORCE_INLINE Vector4::Vector4( const Quat &quat )
00745 {
00746     mVec128 = quat.get128();
00747 }
00748 
00749 VECTORMATH_FORCE_INLINE Vector4::Vector4( float scalar )
00750 {
00751     mVec128 = floatInVec(scalar).get128();
00752 }
00753 
00754 VECTORMATH_FORCE_INLINE Vector4::Vector4( const floatInVec &scalar )
00755 {
00756     mVec128 = scalar.get128();
00757 }
00758 
00759 VECTORMATH_FORCE_INLINE Vector4::Vector4( __m128 vf4 )
00760 {
00761     mVec128 = vf4;
00762 }
00763 
00764 VECTORMATH_FORCE_INLINE const Vector4 Vector4::xAxis( )
00765 {
00766     return Vector4( _VECTORMATH_UNIT_1000 );
00767 }
00768 
00769 VECTORMATH_FORCE_INLINE const Vector4 Vector4::yAxis( )
00770 {
00771     return Vector4( _VECTORMATH_UNIT_0100 );
00772 }
00773 
00774 VECTORMATH_FORCE_INLINE const Vector4 Vector4::zAxis( )
00775 {
00776     return Vector4( _VECTORMATH_UNIT_0010 );
00777 }
00778 
00779 VECTORMATH_FORCE_INLINE const Vector4 Vector4::wAxis( )
00780 {
00781     return Vector4( _VECTORMATH_UNIT_0001 );
00782 }
00783 
00784 VECTORMATH_FORCE_INLINE const Vector4 lerp( float t, const Vector4 &vec0, const Vector4 &vec1 )
00785 {
00786     return lerp( floatInVec(t), vec0, vec1 );
00787 }
00788 
00789 VECTORMATH_FORCE_INLINE const Vector4 lerp( const floatInVec &t, const Vector4 &vec0, const Vector4 &vec1 )
00790 {
00791     return ( vec0 + ( ( vec1 - vec0 ) * t ) );
00792 }
00793 
00794 VECTORMATH_FORCE_INLINE const Vector4 slerp( float t, const Vector4 &unitVec0, const Vector4 &unitVec1 )
00795 {
00796     return slerp( floatInVec(t), unitVec0, unitVec1 );
00797 }
00798 
00799 VECTORMATH_FORCE_INLINE const Vector4 slerp( const floatInVec &t, const Vector4 &unitVec0, const Vector4 &unitVec1 )
00800 {
00801     __m128 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
00802     cosAngle = _vmathVfDot4( unitVec0.get128(), unitVec1.get128() );
00803     __m128 selectMask = _mm_cmpgt_ps( _mm_set1_ps(_VECTORMATH_SLERP_TOL), cosAngle );
00804     angle = acosf4( cosAngle );
00805     tttt = t.get128();
00806     oneMinusT = _mm_sub_ps( _mm_set1_ps(1.0f), tttt );
00807     angles = _mm_unpacklo_ps( _mm_set1_ps(1.0f), tttt ); // angles = 1, t, 1, t
00808     angles = _mm_unpacklo_ps( angles, oneMinusT );              // angles = 1, 1-t, t, 1-t
00809     angles = _mm_mul_ps( angles, angle );
00810     sines = sinf4( angles );
00811     scales = _mm_div_ps( sines, vec_splat( sines, 0 ) );
00812     scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
00813     scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
00814     return Vector4( vec_madd( unitVec0.get128(), scale0, _mm_mul_ps( unitVec1.get128(), scale1 ) ) );
00815 }
00816 
00817 VECTORMATH_FORCE_INLINE __m128 Vector4::get128( ) const
00818 {
00819     return mVec128;
00820 }
00821 /*
00822 VECTORMATH_FORCE_INLINE void storeHalfFloats( const Vector4 &vec0, const Vector4 &vec1, const Vector4 &vec2, const Vector4 &vec3, vec_ushort8 * twoQuads )
00823 {
00824     twoQuads[0] = _vmath2VfToHalfFloats(vec0.get128(), vec1.get128());
00825     twoQuads[1] = _vmath2VfToHalfFloats(vec2.get128(), vec3.get128());
00826 }
00827 */
00828 VECTORMATH_FORCE_INLINE Vector4 & Vector4::operator =( const Vector4 &vec )
00829 {
00830     mVec128 = vec.mVec128;
00831     return *this;
00832 }
00833 
00834 VECTORMATH_FORCE_INLINE Vector4 & Vector4::setXYZ( const Vector3 &vec )
00835 {
00836         VM_ATTRIBUTE_ALIGN16 unsigned int sw[4] = {0, 0, 0, 0xffffffff};
00837         mVec128 = vec_sel( vec.get128(), mVec128, sw );
00838     return *this;
00839 }
00840 
00841 VECTORMATH_FORCE_INLINE const Vector3 Vector4::getXYZ( ) const
00842 {
00843     return Vector3( mVec128 );
00844 }
00845 
00846 VECTORMATH_FORCE_INLINE Vector4 & Vector4::setX( float _x )
00847 {
00848     _vmathVfSetElement(mVec128, _x, 0);
00849     return *this;
00850 }
00851 
00852 VECTORMATH_FORCE_INLINE Vector4 & Vector4::setX( const floatInVec &_x )
00853 {
00854     mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
00855     return *this;
00856 }
00857 
00858 VECTORMATH_FORCE_INLINE const floatInVec Vector4::getX( ) const
00859 {
00860     return floatInVec( mVec128, 0 );
00861 }
00862 
00863 VECTORMATH_FORCE_INLINE Vector4 & Vector4::setY( float _y )
00864 {
00865     _vmathVfSetElement(mVec128, _y, 1);
00866     return *this;
00867 }
00868 
00869 VECTORMATH_FORCE_INLINE Vector4 & Vector4::setY( const floatInVec &_y )
00870 {
00871     mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
00872     return *this;
00873 }
00874 
00875 VECTORMATH_FORCE_INLINE const floatInVec Vector4::getY( ) const
00876 {
00877     return floatInVec( mVec128, 1 );
00878 }
00879 
00880 VECTORMATH_FORCE_INLINE Vector4 & Vector4::setZ( float _z )
00881 {
00882     _vmathVfSetElement(mVec128, _z, 2);
00883     return *this;
00884 }
00885 
00886 VECTORMATH_FORCE_INLINE Vector4 & Vector4::setZ( const floatInVec &_z )
00887 {
00888     mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
00889     return *this;
00890 }
00891 
00892 VECTORMATH_FORCE_INLINE const floatInVec Vector4::getZ( ) const
00893 {
00894     return floatInVec( mVec128, 2 );
00895 }
00896 
00897 VECTORMATH_FORCE_INLINE Vector4 & Vector4::setW( float _w )
00898 {
00899     _vmathVfSetElement(mVec128, _w, 3);
00900     return *this;
00901 }
00902 
00903 VECTORMATH_FORCE_INLINE Vector4 & Vector4::setW( const floatInVec &_w )
00904 {
00905     mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
00906     return *this;
00907 }
00908 
00909 VECTORMATH_FORCE_INLINE const floatInVec Vector4::getW( ) const
00910 {
00911     return floatInVec( mVec128, 3 );
00912 }
00913 
00914 VECTORMATH_FORCE_INLINE Vector4 & Vector4::setElem( int idx, float value )
00915 {
00916     _vmathVfSetElement(mVec128, value, idx);
00917     return *this;
00918 }
00919 
00920 VECTORMATH_FORCE_INLINE Vector4 & Vector4::setElem( int idx, const floatInVec &value )
00921 {
00922     mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
00923     return *this;
00924 }
00925 
00926 VECTORMATH_FORCE_INLINE const floatInVec Vector4::getElem( int idx ) const
00927 {
00928     return floatInVec( mVec128, idx );
00929 }
00930 
00931 VECTORMATH_FORCE_INLINE VecIdx Vector4::operator []( int idx )
00932 {
00933     return VecIdx( mVec128, idx );
00934 }
00935 
00936 VECTORMATH_FORCE_INLINE const floatInVec Vector4::operator []( int idx ) const
00937 {
00938     return floatInVec( mVec128, idx );
00939 }
00940 
00941 VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator +( const Vector4 &vec ) const
00942 {
00943     return Vector4( _mm_add_ps( mVec128, vec.mVec128 ) );
00944 }
00945 
00946 VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator -( const Vector4 &vec ) const
00947 {
00948     return Vector4( _mm_sub_ps( mVec128, vec.mVec128 ) );
00949 }
00950 
00951 VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator *( float scalar ) const
00952 {
00953     return *this * floatInVec(scalar);
00954 }
00955 
00956 VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator *( const floatInVec &scalar ) const
00957 {
00958     return Vector4( _mm_mul_ps( mVec128, scalar.get128() ) );
00959 }
00960 
00961 VECTORMATH_FORCE_INLINE Vector4 & Vector4::operator +=( const Vector4 &vec )
00962 {
00963     *this = *this + vec;
00964     return *this;
00965 }
00966 
00967 VECTORMATH_FORCE_INLINE Vector4 & Vector4::operator -=( const Vector4 &vec )
00968 {
00969     *this = *this - vec;
00970     return *this;
00971 }
00972 
00973 VECTORMATH_FORCE_INLINE Vector4 & Vector4::operator *=( float scalar )
00974 {
00975     *this = *this * scalar;
00976     return *this;
00977 }
00978 
00979 VECTORMATH_FORCE_INLINE Vector4 & Vector4::operator *=( const floatInVec &scalar )
00980 {
00981     *this = *this * scalar;
00982     return *this;
00983 }
00984 
00985 VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator /( float scalar ) const
00986 {
00987     return *this / floatInVec(scalar);
00988 }
00989 
00990 VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator /( const floatInVec &scalar ) const
00991 {
00992     return Vector4( _mm_div_ps( mVec128, scalar.get128() ) );
00993 }
00994 
00995 VECTORMATH_FORCE_INLINE Vector4 & Vector4::operator /=( float scalar )
00996 {
00997     *this = *this / scalar;
00998     return *this;
00999 }
01000 
01001 VECTORMATH_FORCE_INLINE Vector4 & Vector4::operator /=( const floatInVec &scalar )
01002 {
01003     *this = *this / scalar;
01004     return *this;
01005 }
01006 
01007 VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator -( ) const
01008 {
01009         return Vector4(_mm_sub_ps( _mm_setzero_ps(), mVec128 ) );
01010 }
01011 
01012 VECTORMATH_FORCE_INLINE const Vector4 operator *( float scalar, const Vector4 &vec )
01013 {
01014     return floatInVec(scalar) * vec;
01015 }
01016 
01017 VECTORMATH_FORCE_INLINE const Vector4 operator *( const floatInVec &scalar, const Vector4 &vec )
01018 {
01019     return vec * scalar;
01020 }
01021 
01022 VECTORMATH_FORCE_INLINE const Vector4 mulPerElem( const Vector4 &vec0, const Vector4 &vec1 )
01023 {
01024     return Vector4( _mm_mul_ps( vec0.get128(), vec1.get128() ) );
01025 }
01026 
01027 VECTORMATH_FORCE_INLINE const Vector4 divPerElem( const Vector4 &vec0, const Vector4 &vec1 )
01028 {
01029     return Vector4( _mm_div_ps( vec0.get128(), vec1.get128() ) );
01030 }
01031 
01032 VECTORMATH_FORCE_INLINE const Vector4 recipPerElem( const Vector4 &vec )
01033 {
01034     return Vector4( _mm_rcp_ps( vec.get128() ) );
01035 }
01036 
01037 VECTORMATH_FORCE_INLINE const Vector4 absPerElem( const Vector4 &vec )
01038 {
01039     return Vector4( fabsf4( vec.get128() ) );
01040 }
01041 
01042 VECTORMATH_FORCE_INLINE const Vector4 copySignPerElem( const Vector4 &vec0, const Vector4 &vec1 )
01043 {
01044         __m128 vmask = toM128(0x7fffffff);
01045         return Vector4( _mm_or_ps(
01046                 _mm_and_ps   ( vmask, vec0.get128() ),                  // Value
01047                 _mm_andnot_ps( vmask, vec1.get128() ) ) );              // Signs
01048 }
01049 
01050 VECTORMATH_FORCE_INLINE const Vector4 maxPerElem( const Vector4 &vec0, const Vector4 &vec1 )
01051 {
01052     return Vector4( _mm_max_ps( vec0.get128(), vec1.get128() ) );
01053 }
01054 
01055 VECTORMATH_FORCE_INLINE const floatInVec maxElem( const Vector4 &vec )
01056 {
01057     return floatInVec( _mm_max_ps(
01058                 _mm_max_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ),
01059                 _mm_max_ps( vec_splat( vec.get128(), 2 ), vec_splat( vec.get128(), 3 ) ) ) );
01060 }
01061 
01062 VECTORMATH_FORCE_INLINE const Vector4 minPerElem( const Vector4 &vec0, const Vector4 &vec1 )
01063 {
01064     return Vector4( _mm_min_ps( vec0.get128(), vec1.get128() ) );
01065 }
01066 
01067 VECTORMATH_FORCE_INLINE const floatInVec minElem( const Vector4 &vec )
01068 {
01069     return floatInVec( _mm_min_ps(
01070                 _mm_min_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ),
01071                 _mm_min_ps( vec_splat( vec.get128(), 2 ), vec_splat( vec.get128(), 3 ) ) ) );
01072 }
01073 
01074 VECTORMATH_FORCE_INLINE const floatInVec sum( const Vector4 &vec )
01075 {
01076     return floatInVec( _mm_add_ps(
01077                 _mm_add_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ),
01078                 _mm_add_ps( vec_splat( vec.get128(), 2 ), vec_splat( vec.get128(), 3 ) ) ) );
01079 }
01080 
01081 VECTORMATH_FORCE_INLINE const floatInVec dot( const Vector4 &vec0, const Vector4 &vec1 )
01082 {
01083     return floatInVec( _vmathVfDot4( vec0.get128(), vec1.get128() ), 0 );
01084 }
01085 
01086 VECTORMATH_FORCE_INLINE const floatInVec lengthSqr( const Vector4 &vec )
01087 {
01088     return floatInVec(  _vmathVfDot4( vec.get128(), vec.get128() ), 0 );
01089 }
01090 
01091 VECTORMATH_FORCE_INLINE const floatInVec length( const Vector4 &vec )
01092 {
01093     return floatInVec(  _mm_sqrt_ps(_vmathVfDot4( vec.get128(), vec.get128() )), 0 );
01094 }
01095 
01096 VECTORMATH_FORCE_INLINE const Vector4 normalizeApprox( const Vector4 &vec )
01097 {
01098     return Vector4( _mm_mul_ps( vec.get128(), _mm_rsqrt_ps( _vmathVfDot4( vec.get128(), vec.get128() ) ) ) );
01099 }
01100 
01101 VECTORMATH_FORCE_INLINE const Vector4 normalize( const Vector4 &vec )
01102 {
01103     return Vector4( _mm_mul_ps( vec.get128(), newtonrapson_rsqrt4( _vmathVfDot4( vec.get128(), vec.get128() ) ) ) );
01104 }
01105 
01106 VECTORMATH_FORCE_INLINE const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, bool select1 )
01107 {
01108     return select( vec0, vec1, boolInVec(select1) );
01109 }
01110 
01111 
01112 #ifdef _VECTORMATH_DEBUG
01113 
01114 VECTORMATH_FORCE_INLINE void print( const Vector4 &vec )
01115 {
01116     union { __m128 v; float s[4]; } tmp;
01117     tmp.v = vec.get128();
01118     printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
01119 }
01120 
01121 VECTORMATH_FORCE_INLINE void print( const Vector4 &vec, const char * name )
01122 {
01123     union { __m128 v; float s[4]; } tmp;
01124     tmp.v = vec.get128();
01125     printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
01126 }
01127 
01128 #endif
01129 
01130 VECTORMATH_FORCE_INLINE Point3::Point3( float _x, float _y, float _z )
01131 {
01132     mVec128 = _mm_setr_ps(_x, _y, _z, 0.0f);
01133 }
01134 
01135 VECTORMATH_FORCE_INLINE Point3::Point3( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z )
01136 {
01137         mVec128 = _mm_unpacklo_ps( _mm_unpacklo_ps( _x.get128(), _z.get128() ), _y.get128() );
01138 }
01139 
01140 VECTORMATH_FORCE_INLINE Point3::Point3( const Vector3 &vec )
01141 {
01142     mVec128 = vec.get128();
01143 }
01144 
01145 VECTORMATH_FORCE_INLINE Point3::Point3( float scalar )
01146 {
01147     mVec128 = floatInVec(scalar).get128();
01148 }
01149 
01150 VECTORMATH_FORCE_INLINE Point3::Point3( const floatInVec &scalar )
01151 {
01152     mVec128 = scalar.get128();
01153 }
01154 
01155 VECTORMATH_FORCE_INLINE Point3::Point3( __m128 vf4 )
01156 {
01157     mVec128 = vf4;
01158 }
01159 
01160 VECTORMATH_FORCE_INLINE const Point3 lerp( float t, const Point3 &pnt0, const Point3 &pnt1 )
01161 {
01162     return lerp( floatInVec(t), pnt0, pnt1 );
01163 }
01164 
01165 VECTORMATH_FORCE_INLINE const Point3 lerp( const floatInVec &t, const Point3 &pnt0, const Point3 &pnt1 )
01166 {
01167     return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
01168 }
01169 
01170 VECTORMATH_FORCE_INLINE __m128 Point3::get128( ) const
01171 {
01172     return mVec128;
01173 }
01174 
01175 VECTORMATH_FORCE_INLINE void storeXYZ( const Point3 &pnt, __m128 * quad )
01176 {
01177     __m128 dstVec = *quad;
01178         VM_ATTRIBUTE_ALIGN16 unsigned int sw[4] = {0, 0, 0, 0xffffffff}; // TODO: Centralize
01179     dstVec = vec_sel(pnt.get128(), dstVec, sw);
01180     *quad = dstVec;
01181 }
01182 
01183 VECTORMATH_FORCE_INLINE void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const __m128 * threeQuads )
01184 {
01185         const float *quads = (float *)threeQuads;
01186     pnt0 = Point3(  _mm_load_ps(quads) );
01187     pnt1 = Point3( _mm_loadu_ps(quads + 3) );
01188     pnt2 = Point3( _mm_loadu_ps(quads + 6) );
01189     pnt3 = Point3( _mm_loadu_ps(quads + 9) );
01190 }
01191 
01192 VECTORMATH_FORCE_INLINE void storeXYZArray( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, __m128 * threeQuads )
01193 {
01194         __m128 xxxx = _mm_shuffle_ps( pnt1.get128(), pnt1.get128(), _MM_SHUFFLE(0, 0, 0, 0) );
01195         __m128 zzzz = _mm_shuffle_ps( pnt2.get128(), pnt2.get128(), _MM_SHUFFLE(2, 2, 2, 2) );
01196         VM_ATTRIBUTE_ALIGN16 unsigned int xsw[4] = {0, 0, 0, 0xffffffff};
01197         VM_ATTRIBUTE_ALIGN16 unsigned int zsw[4] = {0xffffffff, 0, 0, 0};
01198         threeQuads[0] = vec_sel( pnt0.get128(), xxxx, xsw );
01199     threeQuads[1] = _mm_shuffle_ps( pnt1.get128(), pnt2.get128(), _MM_SHUFFLE(1, 0, 2, 1) );
01200     threeQuads[2] = vec_sel( _mm_shuffle_ps( pnt3.get128(), pnt3.get128(), _MM_SHUFFLE(2, 1, 0, 3) ), zzzz, zsw );
01201 }
01202 /*
01203 VECTORMATH_FORCE_INLINE void storeHalfFloats( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, const Point3 &pnt4, const Point3 &pnt5, const Point3 &pnt6, const Point3 &pnt7, vec_ushort8 * threeQuads )
01204 {
01205 #if 0
01206     __m128 xyz0[3];
01207     __m128 xyz1[3];
01208     storeXYZArray( pnt0, pnt1, pnt2, pnt3, xyz0 );
01209     storeXYZArray( pnt4, pnt5, pnt6, pnt7, xyz1 );
01210     threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
01211     threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
01212     threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
01213 #else
01214         assert(0);
01215 #endif
01216 }
01217 */
01218 VECTORMATH_FORCE_INLINE Point3 & Point3::operator =( const Point3 &pnt )
01219 {
01220     mVec128 = pnt.mVec128;
01221     return *this;
01222 }
01223 
01224 VECTORMATH_FORCE_INLINE Point3 & Point3::setX( float _x )
01225 {
01226     _vmathVfSetElement(mVec128, _x, 0);
01227     return *this;
01228 }
01229 
01230 VECTORMATH_FORCE_INLINE Point3 & Point3::setX( const floatInVec &_x )
01231 {
01232     mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
01233     return *this;
01234 }
01235 
01236 VECTORMATH_FORCE_INLINE const floatInVec Point3::getX( ) const
01237 {
01238     return floatInVec( mVec128, 0 );
01239 }
01240 
01241 VECTORMATH_FORCE_INLINE Point3 & Point3::setY( float _y )
01242 {
01243     _vmathVfSetElement(mVec128, _y, 1);
01244     return *this;
01245 }
01246 
01247 VECTORMATH_FORCE_INLINE Point3 & Point3::setY( const floatInVec &_y )
01248 {
01249     mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
01250     return *this;
01251 }
01252 
01253 VECTORMATH_FORCE_INLINE const floatInVec Point3::getY( ) const
01254 {
01255     return floatInVec( mVec128, 1 );
01256 }
01257 
01258 VECTORMATH_FORCE_INLINE Point3 & Point3::setZ( float _z )
01259 {
01260     _vmathVfSetElement(mVec128, _z, 2);
01261     return *this;
01262 }
01263 
01264 VECTORMATH_FORCE_INLINE Point3 & Point3::setZ( const floatInVec &_z )
01265 {
01266     mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
01267     return *this;
01268 }
01269 
01270 VECTORMATH_FORCE_INLINE const floatInVec Point3::getZ( ) const
01271 {
01272     return floatInVec( mVec128, 2 );
01273 }
01274 
01275 VECTORMATH_FORCE_INLINE Point3 & Point3::setElem( int idx, float value )
01276 {
01277     _vmathVfSetElement(mVec128, value, idx);
01278     return *this;
01279 }
01280 
01281 VECTORMATH_FORCE_INLINE Point3 & Point3::setElem( int idx, const floatInVec &value )
01282 {
01283     mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
01284     return *this;
01285 }
01286 
01287 VECTORMATH_FORCE_INLINE const floatInVec Point3::getElem( int idx ) const
01288 {
01289     return floatInVec( mVec128, idx );
01290 }
01291 
01292 VECTORMATH_FORCE_INLINE VecIdx Point3::operator []( int idx )
01293 {
01294     return VecIdx( mVec128, idx );
01295 }
01296 
01297 VECTORMATH_FORCE_INLINE const floatInVec Point3::operator []( int idx ) const
01298 {
01299     return floatInVec( mVec128, idx );
01300 }
01301 
01302 VECTORMATH_FORCE_INLINE const Vector3 Point3::operator -( const Point3 &pnt ) const
01303 {
01304     return Vector3( _mm_sub_ps( mVec128, pnt.mVec128 ) );
01305 }
01306 
01307 VECTORMATH_FORCE_INLINE const Point3 Point3::operator +( const Vector3 &vec ) const
01308 {
01309     return Point3( _mm_add_ps( mVec128, vec.get128() ) );
01310 }
01311 
01312 VECTORMATH_FORCE_INLINE const Point3 Point3::operator -( const Vector3 &vec ) const
01313 {
01314     return Point3( _mm_sub_ps( mVec128, vec.get128() ) );
01315 }
01316 
01317 VECTORMATH_FORCE_INLINE Point3 & Point3::operator +=( const Vector3 &vec )
01318 {
01319     *this = *this + vec;
01320     return *this;
01321 }
01322 
01323 VECTORMATH_FORCE_INLINE Point3 & Point3::operator -=( const Vector3 &vec )
01324 {
01325     *this = *this - vec;
01326     return *this;
01327 }
01328 
01329 VECTORMATH_FORCE_INLINE const Point3 mulPerElem( const Point3 &pnt0, const Point3 &pnt1 )
01330 {
01331     return Point3( _mm_mul_ps( pnt0.get128(), pnt1.get128() ) );
01332 }
01333 
01334 VECTORMATH_FORCE_INLINE const Point3 divPerElem( const Point3 &pnt0, const Point3 &pnt1 )
01335 {
01336     return Point3( _mm_div_ps( pnt0.get128(), pnt1.get128() ) );
01337 }
01338 
01339 VECTORMATH_FORCE_INLINE const Point3 recipPerElem( const Point3 &pnt )
01340 {
01341     return Point3( _mm_rcp_ps( pnt.get128() ) );
01342 }
01343 
01344 VECTORMATH_FORCE_INLINE const Point3 absPerElem( const Point3 &pnt )
01345 {
01346     return Point3( fabsf4( pnt.get128() ) );
01347 }
01348 
01349 VECTORMATH_FORCE_INLINE const Point3 copySignPerElem( const Point3 &pnt0, const Point3 &pnt1 )
01350 {
01351         __m128 vmask = toM128(0x7fffffff);
01352         return Point3( _mm_or_ps(
01353                 _mm_and_ps   ( vmask, pnt0.get128() ),                  // Value
01354                 _mm_andnot_ps( vmask, pnt1.get128() ) ) );              // Signs
01355 }
01356 
01357 VECTORMATH_FORCE_INLINE const Point3 maxPerElem( const Point3 &pnt0, const Point3 &pnt1 )
01358 {
01359     return Point3( _mm_max_ps( pnt0.get128(), pnt1.get128() ) );
01360 }
01361 
01362 VECTORMATH_FORCE_INLINE const floatInVec maxElem( const Point3 &pnt )
01363 {
01364     return floatInVec( _mm_max_ps( _mm_max_ps( vec_splat( pnt.get128(), 0 ), vec_splat( pnt.get128(), 1 ) ), vec_splat( pnt.get128(), 2 ) ) );
01365 }
01366 
01367 VECTORMATH_FORCE_INLINE const Point3 minPerElem( const Point3 &pnt0, const Point3 &pnt1 )
01368 {
01369     return Point3( _mm_min_ps( pnt0.get128(), pnt1.get128() ) );
01370 }
01371 
01372 VECTORMATH_FORCE_INLINE const floatInVec minElem( const Point3 &pnt )
01373 {
01374     return floatInVec( _mm_min_ps( _mm_min_ps( vec_splat( pnt.get128(), 0 ), vec_splat( pnt.get128(), 1 ) ), vec_splat( pnt.get128(), 2 ) ) );
01375 }
01376 
01377 VECTORMATH_FORCE_INLINE const floatInVec sum( const Point3 &pnt )
01378 {
01379     return floatInVec( _mm_add_ps( _mm_add_ps( vec_splat( pnt.get128(), 0 ), vec_splat( pnt.get128(), 1 ) ), vec_splat( pnt.get128(), 2 ) ) );
01380 }
01381 
01382 VECTORMATH_FORCE_INLINE const Point3 scale( const Point3 &pnt, float scaleVal )
01383 {
01384     return scale( pnt, floatInVec( scaleVal ) );
01385 }
01386 
01387 VECTORMATH_FORCE_INLINE const Point3 scale( const Point3 &pnt, const floatInVec &scaleVal )
01388 {
01389     return mulPerElem( pnt, Point3( scaleVal ) );
01390 }
01391 
01392 VECTORMATH_FORCE_INLINE const Point3 scale( const Point3 &pnt, const Vector3 &scaleVec )
01393 {
01394     return mulPerElem( pnt, Point3( scaleVec ) );
01395 }
01396 
01397 VECTORMATH_FORCE_INLINE const floatInVec projection( const Point3 &pnt, const Vector3 &unitVec )
01398 {
01399     return floatInVec( _vmathVfDot3( pnt.get128(), unitVec.get128() ), 0 );
01400 }
01401 
01402 VECTORMATH_FORCE_INLINE const floatInVec distSqrFromOrigin( const Point3 &pnt )
01403 {
01404     return lengthSqr( Vector3( pnt ) );
01405 }
01406 
01407 VECTORMATH_FORCE_INLINE const floatInVec distFromOrigin( const Point3 &pnt )
01408 {
01409     return length( Vector3( pnt ) );
01410 }
01411 
01412 VECTORMATH_FORCE_INLINE const floatInVec distSqr( const Point3 &pnt0, const Point3 &pnt1 )
01413 {
01414     return lengthSqr( ( pnt1 - pnt0 ) );
01415 }
01416 
01417 VECTORMATH_FORCE_INLINE const floatInVec dist( const Point3 &pnt0, const Point3 &pnt1 )
01418 {
01419     return length( ( pnt1 - pnt0 ) );
01420 }
01421 
01422 VECTORMATH_FORCE_INLINE const Point3 select( const Point3 &pnt0, const Point3 &pnt1, bool select1 )
01423 {
01424     return select( pnt0, pnt1, boolInVec(select1) );
01425 }
01426 
01427 VECTORMATH_FORCE_INLINE const Point3 select( const Point3 &pnt0, const Point3 &pnt1, const boolInVec &select1 )
01428 {
01429     return Point3( vec_sel( pnt0.get128(), pnt1.get128(), select1.get128() ) );
01430 }
01431 
01432 
01433 
01434 #ifdef _VECTORMATH_DEBUG
01435 
01436 VECTORMATH_FORCE_INLINE void print( const Point3 &pnt )
01437 {
01438     union { __m128 v; float s[4]; } tmp;
01439     tmp.v = pnt.get128();
01440     printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
01441 }
01442 
01443 VECTORMATH_FORCE_INLINE void print( const Point3 &pnt, const char * name )
01444 {
01445     union { __m128 v; float s[4]; } tmp;
01446     tmp.v = pnt.get128();
01447     printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
01448 }
01449 
01450 #endif
01451 
01452 } // namespace Aos
01453 } // namespace Vectormath
01454 
01455 #endif