sse/mat_aos.h

Go to the documentation of this file.
00001 /*
00002    Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
00003    All rights reserved.
00004 
00005    Redistribution and use in source and binary forms,
00006    with or without modification, are permitted provided that the
00007    following conditions are met:
00008     * Redistributions of source code must retain the above copyright
00009       notice, this list of conditions and the following disclaimer.
00010     * Redistributions in binary form must reproduce the above copyright
00011       notice, this list of conditions and the following disclaimer in the
00012       documentation and/or other materials provided with the distribution.
00013     * Neither the name of the Sony Computer Entertainment Inc nor the names
00014       of its contributors may be used to endorse or promote products derived
00015       from this software without specific prior written permission.
00016 
00017    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00018    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00019    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00020    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
00021    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00022    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00023    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00024    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00025    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00026    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00027    POSSIBILITY OF SUCH DAMAGE.
00028 */
00029 
00030 
00031 #ifndef _VECTORMATH_MAT_AOS_CPP_H
00032 #define _VECTORMATH_MAT_AOS_CPP_H
00033 
00034 namespace Vectormath {
00035 namespace Aos {
00036 
00037 //-----------------------------------------------------------------------------
00038 // Constants
00039 // for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
00040 
00041 #define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
00042 #define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
00043 #define _VECTORMATH_PERM_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B })
00044 #define _VECTORMATH_PERM_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_D })
00045 #define _VECTORMATH_PERM_XZBX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X })     
00046 #define _VECTORMATH_PERM_CXXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
00047 #define _VECTORMATH_PERM_YAXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
00048 #define _VECTORMATH_PERM_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C })
00049 #define _VECTORMATH_PERM_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W, _VECTORMATH_PERM_Z })
00050 #define _VECTORMATH_PERM_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_D })
00051 #define _VECTORMATH_PERM_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X })
00052 #define _VECTORMATH_PERM_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y })
00053 #define _VECTORMATH_PERM_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C })
00054 #define _VECTORMATH_PERM_ZAYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
00055 #define _VECTORMATH_PERM_BZXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
00056 #define _VECTORMATH_PERM_XZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
00057 #define _VECTORMATH_PERM_ZXXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_B })
00058 #define _VECTORMATH_PERM_YXXC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_C })
00059 #define _VECTORMATH_PERM_BBYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
00060 #define _VECTORMATH_PI_OVER_2 1.570796327f
00061 
00062 //-----------------------------------------------------------------------------
00063 // Definitions
00064 
00065 VECTORMATH_FORCE_INLINE Matrix3::Matrix3( const Matrix3 & mat )
00066 {
00067     mCol0 = mat.mCol0;
00068     mCol1 = mat.mCol1;
00069     mCol2 = mat.mCol2;
00070 }
00071 
00072 VECTORMATH_FORCE_INLINE Matrix3::Matrix3( float scalar )
00073 {
00074     mCol0 = Vector3( scalar );
00075     mCol1 = Vector3( scalar );
00076     mCol2 = Vector3( scalar );
00077 }
00078 
00079 VECTORMATH_FORCE_INLINE Matrix3::Matrix3( const floatInVec &scalar )
00080 {
00081     mCol0 = Vector3( scalar );
00082     mCol1 = Vector3( scalar );
00083     mCol2 = Vector3( scalar );
00084 }
00085 
00086 VECTORMATH_FORCE_INLINE Matrix3::Matrix3( const Quat &unitQuat )
00087 {
00088     __m128 xyzw_2, wwww, yzxw, zxyw, yzxw_2, zxyw_2;
00089     __m128 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
00090         VM_ATTRIBUTE_ALIGN16 unsigned int sx[4] = {0xffffffff, 0, 0, 0};
00091         VM_ATTRIBUTE_ALIGN16 unsigned int sz[4] = {0, 0, 0xffffffff, 0};
00092         __m128 select_x = _mm_load_ps((float *)sx);
00093         __m128 select_z = _mm_load_ps((float *)sz);
00094 
00095     xyzw_2 = _mm_add_ps( unitQuat.get128(), unitQuat.get128() );
00096     wwww = _mm_shuffle_ps( unitQuat.get128(), unitQuat.get128(), _MM_SHUFFLE(3,3,3,3) );
00097         yzxw = _mm_shuffle_ps( unitQuat.get128(), unitQuat.get128(), _MM_SHUFFLE(3,0,2,1) );
00098         zxyw = _mm_shuffle_ps( unitQuat.get128(), unitQuat.get128(), _MM_SHUFFLE(3,1,0,2) );
00099     yzxw_2 = _mm_shuffle_ps( xyzw_2, xyzw_2, _MM_SHUFFLE(3,0,2,1) );
00100     zxyw_2 = _mm_shuffle_ps( xyzw_2, xyzw_2, _MM_SHUFFLE(3,1,0,2) );
00101 
00102     tmp0 = _mm_mul_ps( yzxw_2, wwww );                                                                  // tmp0 = 2yw, 2zw, 2xw, 2w2
00103         tmp1 = _mm_sub_ps( _mm_set1_ps(1.0f), _mm_mul_ps(yzxw, yzxw_2) );       // tmp1 = 1 - 2y2, 1 - 2z2, 1 - 2x2, 1 - 2w2
00104     tmp2 = _mm_mul_ps( yzxw, xyzw_2 );                                                                  // tmp2 = 2xy, 2yz, 2xz, 2w2
00105     tmp0 = _mm_add_ps( _mm_mul_ps(zxyw, xyzw_2), tmp0 );                                // tmp0 = 2yw + 2zx, 2zw + 2xy, 2xw + 2yz, 2w2 + 2w2
00106     tmp1 = _mm_sub_ps( tmp1, _mm_mul_ps(zxyw, zxyw_2) );                                // tmp1 = 1 - 2y2 - 2z2, 1 - 2z2 - 2x2, 1 - 2x2 - 2y2, 1 - 2w2 - 2w2
00107     tmp2 = _mm_sub_ps( tmp2, _mm_mul_ps(zxyw_2, wwww) );                                // tmp2 = 2xy - 2zw, 2yz - 2xw, 2xz - 2yw, 2w2 -2w2
00108 
00109     tmp3 = vec_sel( tmp0, tmp1, select_x );
00110     tmp4 = vec_sel( tmp1, tmp2, select_x );
00111     tmp5 = vec_sel( tmp2, tmp0, select_x );
00112     mCol0 = Vector3( vec_sel( tmp3, tmp2, select_z ) );
00113     mCol1 = Vector3( vec_sel( tmp4, tmp0, select_z ) );
00114     mCol2 = Vector3( vec_sel( tmp5, tmp1, select_z ) );
00115 }
00116 
00117 VECTORMATH_FORCE_INLINE Matrix3::Matrix3( const Vector3 &_col0, const Vector3 &_col1, const Vector3 &_col2 )
00118 {
00119     mCol0 = _col0;
00120     mCol1 = _col1;
00121     mCol2 = _col2;
00122 }
00123 
00124 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setCol0( const Vector3 &_col0 )
00125 {
00126     mCol0 = _col0;
00127     return *this;
00128 }
00129 
00130 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setCol1( const Vector3 &_col1 )
00131 {
00132     mCol1 = _col1;
00133     return *this;
00134 }
00135 
00136 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setCol2( const Vector3 &_col2 )
00137 {
00138     mCol2 = _col2;
00139     return *this;
00140 }
00141 
00142 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setCol( int col, const Vector3 &vec )
00143 {
00144     *(&mCol0 + col) = vec;
00145     return *this;
00146 }
00147 
00148 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setRow( int row, const Vector3 &vec )
00149 {
00150     mCol0.setElem( row, vec.getElem( 0 ) );
00151     mCol1.setElem( row, vec.getElem( 1 ) );
00152     mCol2.setElem( row, vec.getElem( 2 ) );
00153     return *this;
00154 }
00155 
00156 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setElem( int col, int row, float val )
00157 {
00158     (*this)[col].setElem(row, val);
00159     return *this;
00160 }
00161 
00162 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setElem( int col, int row, const floatInVec &val )
00163 {
00164     Vector3 tmpV3_0;
00165     tmpV3_0 = this->getCol( col );
00166     tmpV3_0.setElem( row, val );
00167     this->setCol( col, tmpV3_0 );
00168     return *this;
00169 }
00170 
00171 VECTORMATH_FORCE_INLINE const floatInVec Matrix3::getElem( int col, int row ) const
00172 {
00173     return this->getCol( col ).getElem( row );
00174 }
00175 
00176 VECTORMATH_FORCE_INLINE const Vector3 Matrix3::getCol0( ) const
00177 {
00178     return mCol0;
00179 }
00180 
00181 VECTORMATH_FORCE_INLINE const Vector3 Matrix3::getCol1( ) const
00182 {
00183     return mCol1;
00184 }
00185 
00186 VECTORMATH_FORCE_INLINE const Vector3 Matrix3::getCol2( ) const
00187 {
00188     return mCol2;
00189 }
00190 
00191 VECTORMATH_FORCE_INLINE const Vector3 Matrix3::getCol( int col ) const
00192 {
00193     return *(&mCol0 + col);
00194 }
00195 
00196 VECTORMATH_FORCE_INLINE const Vector3 Matrix3::getRow( int row ) const
00197 {
00198     return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
00199 }
00200 
00201 VECTORMATH_FORCE_INLINE Vector3 & Matrix3::operator []( int col )
00202 {
00203     return *(&mCol0 + col);
00204 }
00205 
00206 VECTORMATH_FORCE_INLINE const Vector3 Matrix3::operator []( int col ) const
00207 {
00208     return *(&mCol0 + col);
00209 }
00210 
00211 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator =( const Matrix3 & mat )
00212 {
00213     mCol0 = mat.mCol0;
00214     mCol1 = mat.mCol1;
00215     mCol2 = mat.mCol2;
00216     return *this;
00217 }
00218 
00219 VECTORMATH_FORCE_INLINE const Matrix3 transpose( const Matrix3 & mat )
00220 {
00221     __m128 tmp0, tmp1, res0, res1, res2;
00222     tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
00223     tmp1 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
00224     res0 = vec_mergeh( tmp0, mat.getCol1().get128() );
00225     //res1 = vec_perm( tmp0, mat.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
00226         VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
00227         res1 = _mm_shuffle_ps( tmp0, tmp0, _MM_SHUFFLE(0,3,2,2));
00228         res1 = vec_sel(res1, mat.getCol1().get128(), select_y);
00229     //res2 = vec_perm( tmp1, mat.getCol1().get128(), _VECTORMATH_PERM_XCYX );
00230         res2 = _mm_shuffle_ps( tmp1, tmp1, _MM_SHUFFLE(0,1,1,0));
00231         res2 = vec_sel(res2, vec_splat(mat.getCol1().get128(), 2), select_y);
00232     return Matrix3(
00233         Vector3( res0 ),
00234         Vector3( res1 ),
00235         Vector3( res2 )
00236     );
00237 }
00238 
00239 VECTORMATH_FORCE_INLINE const Matrix3 inverse( const Matrix3 & mat )
00240 {
00241     __m128 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet, inv0, inv1, inv2;
00242     tmp2 = _vmathVfCross( mat.getCol0().get128(), mat.getCol1().get128() );
00243     tmp0 = _vmathVfCross( mat.getCol1().get128(), mat.getCol2().get128() );
00244     tmp1 = _vmathVfCross( mat.getCol2().get128(), mat.getCol0().get128() );
00245     dot = _vmathVfDot3( tmp2, mat.getCol2().get128() );
00246     dot = vec_splat( dot, 0 );
00247     invdet = recipf4( dot );
00248     tmp3 = vec_mergeh( tmp0, tmp2 );
00249     tmp4 = vec_mergel( tmp0, tmp2 );
00250     inv0 = vec_mergeh( tmp3, tmp1 );
00251     //inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX );
00252         VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
00253         inv1 = _mm_shuffle_ps( tmp3, tmp3, _MM_SHUFFLE(0,3,2,2));
00254         inv1 = vec_sel(inv1, tmp1, select_y);
00255     //inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX );
00256         inv2 = _mm_shuffle_ps( tmp4, tmp4, _MM_SHUFFLE(0,1,1,0));
00257         inv2 = vec_sel(inv2, vec_splat(tmp1, 2), select_y);
00258     inv0 = vec_mul( inv0, invdet );
00259     inv1 = vec_mul( inv1, invdet );
00260         inv2 = vec_mul( inv2, invdet );
00261     return Matrix3(
00262         Vector3( inv0 ),
00263         Vector3( inv1 ),
00264         Vector3( inv2 )
00265     );
00266 }
00267 
00268 VECTORMATH_FORCE_INLINE const floatInVec determinant( const Matrix3 & mat )
00269 {
00270     return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
00271 }
00272 
00273 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
00274 {
00275     return Matrix3(
00276         ( mCol0 + mat.mCol0 ),
00277         ( mCol1 + mat.mCol1 ),
00278         ( mCol2 + mat.mCol2 )
00279     );
00280 }
00281 
00282 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
00283 {
00284     return Matrix3(
00285         ( mCol0 - mat.mCol0 ),
00286         ( mCol1 - mat.mCol1 ),
00287         ( mCol2 - mat.mCol2 )
00288     );
00289 }
00290 
00291 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
00292 {
00293     *this = *this + mat;
00294     return *this;
00295 }
00296 
00297 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
00298 {
00299     *this = *this - mat;
00300     return *this;
00301 }
00302 
00303 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator -( ) const
00304 {
00305     return Matrix3(
00306         ( -mCol0 ),
00307         ( -mCol1 ),
00308         ( -mCol2 )
00309     );
00310 }
00311 
00312 VECTORMATH_FORCE_INLINE const Matrix3 absPerElem( const Matrix3 & mat )
00313 {
00314     return Matrix3(
00315         absPerElem( mat.getCol0() ),
00316         absPerElem( mat.getCol1() ),
00317         absPerElem( mat.getCol2() )
00318     );
00319 }
00320 
00321 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator *( float scalar ) const
00322 {
00323     return *this * floatInVec(scalar);
00324 }
00325 
00326 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator *( const floatInVec &scalar ) const
00327 {
00328     return Matrix3(
00329         ( mCol0 * scalar ),
00330         ( mCol1 * scalar ),
00331         ( mCol2 * scalar )
00332     );
00333 }
00334 
00335 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator *=( float scalar )
00336 {
00337     return *this *= floatInVec(scalar);
00338 }
00339 
00340 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator *=( const floatInVec &scalar )
00341 {
00342     *this = *this * scalar;
00343     return *this;
00344 }
00345 
00346 VECTORMATH_FORCE_INLINE const Matrix3 operator *( float scalar, const Matrix3 & mat )
00347 {
00348     return floatInVec(scalar) * mat;
00349 }
00350 
00351 VECTORMATH_FORCE_INLINE const Matrix3 operator *( const floatInVec &scalar, const Matrix3 & mat )
00352 {
00353     return mat * scalar;
00354 }
00355 
00356 VECTORMATH_FORCE_INLINE const Vector3 Matrix3::operator *( const Vector3 &vec ) const
00357 {
00358     __m128 res;
00359     __m128 xxxx, yyyy, zzzz;
00360     xxxx = vec_splat( vec.get128(), 0 );
00361     yyyy = vec_splat( vec.get128(), 1 );
00362     zzzz = vec_splat( vec.get128(), 2 );
00363     res = vec_mul( mCol0.get128(), xxxx );
00364     res = vec_madd( mCol1.get128(), yyyy, res );
00365     res = vec_madd( mCol2.get128(), zzzz, res );
00366     return Vector3( res );
00367 }
00368 
00369 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
00370 {
00371     return Matrix3(
00372         ( *this * mat.mCol0 ),
00373         ( *this * mat.mCol1 ),
00374         ( *this * mat.mCol2 )
00375     );
00376 }
00377 
00378 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
00379 {
00380     *this = *this * mat;
00381     return *this;
00382 }
00383 
00384 VECTORMATH_FORCE_INLINE const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
00385 {
00386     return Matrix3(
00387         mulPerElem( mat0.getCol0(), mat1.getCol0() ),
00388         mulPerElem( mat0.getCol1(), mat1.getCol1() ),
00389         mulPerElem( mat0.getCol2(), mat1.getCol2() )
00390     );
00391 }
00392 
00393 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::identity( )
00394 {
00395     return Matrix3(
00396         Vector3::xAxis( ),
00397         Vector3::yAxis( ),
00398         Vector3::zAxis( )
00399     );
00400 }
00401 
00402 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationX( float radians )
00403 {
00404     return rotationX( floatInVec(radians) );
00405 }
00406 
00407 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationX( const floatInVec &radians )
00408 {
00409     __m128 s, c, res1, res2;
00410     __m128 zero;
00411         VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
00412         VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
00413     zero = _mm_setzero_ps();
00414     sincosf4( radians.get128(), &s, &c );
00415     res1 = vec_sel( zero, c, select_y );
00416     res1 = vec_sel( res1, s, select_z );
00417     res2 = vec_sel( zero, negatef4(s), select_y );
00418     res2 = vec_sel( res2, c, select_z );
00419     return Matrix3(
00420         Vector3::xAxis( ),
00421         Vector3( res1 ),
00422         Vector3( res2 )
00423     );
00424 }
00425 
00426 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationY( float radians )
00427 {
00428     return rotationY( floatInVec(radians) );
00429 }
00430 
00431 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationY( const floatInVec &radians )
00432 {
00433     __m128 s, c, res0, res2;
00434     __m128 zero;
00435         VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
00436         VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
00437     zero = _mm_setzero_ps();
00438     sincosf4( radians.get128(), &s, &c );
00439     res0 = vec_sel( zero, c, select_x );
00440     res0 = vec_sel( res0, negatef4(s), select_z );
00441     res2 = vec_sel( zero, s, select_x );
00442     res2 = vec_sel( res2, c, select_z );
00443     return Matrix3(
00444         Vector3( res0 ),
00445         Vector3::yAxis( ),
00446         Vector3( res2 )
00447         );
00448 }
00449 
00450 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationZ( float radians )
00451 {
00452     return rotationZ( floatInVec(radians) );
00453 }
00454 
00455 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationZ( const floatInVec &radians )
00456 {
00457     __m128 s, c, res0, res1;
00458     __m128 zero;
00459         VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
00460         VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
00461     zero = _mm_setzero_ps();
00462     sincosf4( radians.get128(), &s, &c );
00463     res0 = vec_sel( zero, c, select_x );
00464     res0 = vec_sel( res0, s, select_y );
00465     res1 = vec_sel( zero, negatef4(s), select_x );
00466     res1 = vec_sel( res1, c, select_y );
00467     return Matrix3(
00468         Vector3( res0 ),
00469         Vector3( res1 ),
00470         Vector3::zAxis( )
00471         );
00472 }
00473 
00474 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationZYX( const Vector3 &radiansXYZ )
00475 {
00476     __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
00477     angles = Vector4( radiansXYZ, 0.0f ).get128();
00478     sincosf4( angles, &s, &c );
00479     negS = negatef4( s );
00480     Z0 = vec_mergel( c, s );
00481     Z1 = vec_mergel( negS, c );
00482         VM_ATTRIBUTE_ALIGN16 unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0};
00483     Z1 = vec_and( Z1, _mm_load_ps( (float *)select_xyz ) );
00484         Y0 = _mm_shuffle_ps( c, negS, _MM_SHUFFLE(0,1,1,1) );
00485         Y1 = _mm_shuffle_ps( s, c, _MM_SHUFFLE(0,1,1,1) );
00486     X0 = vec_splat( s, 0 );
00487     X1 = vec_splat( c, 0 );
00488     tmp = vec_mul( Z0, Y1 );
00489     return Matrix3(
00490         Vector3( vec_mul( Z0, Y0 ) ),
00491         Vector3( vec_madd( Z1, X1, vec_mul( tmp, X0 ) ) ),
00492         Vector3( vec_nmsub( Z1, X0, vec_mul( tmp, X1 ) ) )
00493     );
00494 }
00495 
00496 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotation( float radians, const Vector3 &unitVec )
00497 {
00498     return rotation( floatInVec(radians), unitVec );
00499 }
00500 
00501 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotation( const floatInVec &radians, const Vector3 &unitVec )
00502 {
00503     __m128 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
00504     axis = unitVec.get128();
00505     sincosf4( radians.get128(), &s, &c );
00506     xxxx = vec_splat( axis, 0 );
00507     yyyy = vec_splat( axis, 1 );
00508     zzzz = vec_splat( axis, 2 );
00509     oneMinusC = vec_sub( _mm_set1_ps(1.0f), c );
00510     axisS = vec_mul( axis, s );
00511     negAxisS = negatef4( axisS );
00512         VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
00513         VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
00514         VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
00515     //tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX );
00516         tmp0 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,2,0) );
00517         tmp0 = vec_sel(tmp0, vec_splat(negAxisS, 1), select_z);
00518     //tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX );
00519         tmp1 = vec_sel( vec_splat(axisS, 0), vec_splat(negAxisS, 2), select_x );
00520     //tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX );
00521         tmp2 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,0,1) );
00522         tmp2 = vec_sel(tmp2, vec_splat(negAxisS, 0), select_y);
00523     tmp0 = vec_sel( tmp0, c, select_x );
00524     tmp1 = vec_sel( tmp1, c, select_y );
00525     tmp2 = vec_sel( tmp2, c, select_z );
00526     return Matrix3(
00527         Vector3( vec_madd( vec_mul( axis, xxxx ), oneMinusC, tmp0 ) ),
00528         Vector3( vec_madd( vec_mul( axis, yyyy ), oneMinusC, tmp1 ) ),
00529         Vector3( vec_madd( vec_mul( axis, zzzz ), oneMinusC, tmp2 ) )
00530     );
00531 }
00532 
00533 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotation( const Quat &unitQuat )
00534 {
00535     return Matrix3( unitQuat );
00536 }
00537 
00538 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::scale( const Vector3 &scaleVec )
00539 {
00540     __m128 zero = _mm_setzero_ps();
00541         VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
00542         VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
00543         VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
00544     return Matrix3(
00545         Vector3( vec_sel( zero, scaleVec.get128(), select_x ) ),
00546         Vector3( vec_sel( zero, scaleVec.get128(), select_y ) ),
00547         Vector3( vec_sel( zero, scaleVec.get128(), select_z ) )
00548     );
00549 }
00550 
00551 VECTORMATH_FORCE_INLINE const Matrix3 appendScale( const Matrix3 & mat, const Vector3 &scaleVec )
00552 {
00553     return Matrix3(
00554         ( mat.getCol0() * scaleVec.getX( ) ),
00555         ( mat.getCol1() * scaleVec.getY( ) ),
00556         ( mat.getCol2() * scaleVec.getZ( ) )
00557     );
00558 }
00559 
00560 VECTORMATH_FORCE_INLINE const Matrix3 prependScale( const Vector3 &scaleVec, const Matrix3 & mat )
00561 {
00562     return Matrix3(
00563         mulPerElem( mat.getCol0(), scaleVec ),
00564         mulPerElem( mat.getCol1(), scaleVec ),
00565         mulPerElem( mat.getCol2(), scaleVec )
00566     );
00567 }
00568 
00569 VECTORMATH_FORCE_INLINE const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 )
00570 {
00571     return Matrix3(
00572         select( mat0.getCol0(), mat1.getCol0(), select1 ),
00573         select( mat0.getCol1(), mat1.getCol1(), select1 ),
00574         select( mat0.getCol2(), mat1.getCol2(), select1 )
00575     );
00576 }
00577 
00578 VECTORMATH_FORCE_INLINE const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, const boolInVec &select1 )
00579 {
00580     return Matrix3(
00581         select( mat0.getCol0(), mat1.getCol0(), select1 ),
00582         select( mat0.getCol1(), mat1.getCol1(), select1 ),
00583         select( mat0.getCol2(), mat1.getCol2(), select1 )
00584     );
00585 }
00586 
00587 #ifdef _VECTORMATH_DEBUG
00588 
00589 VECTORMATH_FORCE_INLINE void print( const Matrix3 & mat )
00590 {
00591     print( mat.getRow( 0 ) );
00592     print( mat.getRow( 1 ) );
00593     print( mat.getRow( 2 ) );
00594 }
00595 
00596 VECTORMATH_FORCE_INLINE void print( const Matrix3 & mat, const char * name )
00597 {
00598     printf("%s:\n", name);
00599     print( mat );
00600 }
00601 
00602 #endif
00603 
00604 VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const Matrix4 & mat )
00605 {
00606     mCol0 = mat.mCol0;
00607     mCol1 = mat.mCol1;
00608     mCol2 = mat.mCol2;
00609     mCol3 = mat.mCol3;
00610 }
00611 
00612 VECTORMATH_FORCE_INLINE Matrix4::Matrix4( float scalar )
00613 {
00614     mCol0 = Vector4( scalar );
00615     mCol1 = Vector4( scalar );
00616     mCol2 = Vector4( scalar );
00617     mCol3 = Vector4( scalar );
00618 }
00619 
00620 VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const floatInVec &scalar )
00621 {
00622     mCol0 = Vector4( scalar );
00623     mCol1 = Vector4( scalar );
00624     mCol2 = Vector4( scalar );
00625     mCol3 = Vector4( scalar );
00626 }
00627 
00628 VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const Transform3 & mat )
00629 {
00630     mCol0 = Vector4( mat.getCol0(), 0.0f );
00631     mCol1 = Vector4( mat.getCol1(), 0.0f );
00632     mCol2 = Vector4( mat.getCol2(), 0.0f );
00633     mCol3 = Vector4( mat.getCol3(), 1.0f );
00634 }
00635 
00636 VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const Vector4 &_col0, const Vector4 &_col1, const Vector4 &_col2, const Vector4 &_col3 )
00637 {
00638     mCol0 = _col0;
00639     mCol1 = _col1;
00640     mCol2 = _col2;
00641     mCol3 = _col3;
00642 }
00643 
00644 VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const Matrix3 & mat, const Vector3 &translateVec )
00645 {
00646     mCol0 = Vector4( mat.getCol0(), 0.0f );
00647     mCol1 = Vector4( mat.getCol1(), 0.0f );
00648     mCol2 = Vector4( mat.getCol2(), 0.0f );
00649     mCol3 = Vector4( translateVec, 1.0f );
00650 }
00651 
00652 VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const Quat &unitQuat, const Vector3 &translateVec )
00653 {
00654     Matrix3 mat;
00655     mat = Matrix3( unitQuat );
00656     mCol0 = Vector4( mat.getCol0(), 0.0f );
00657     mCol1 = Vector4( mat.getCol1(), 0.0f );
00658     mCol2 = Vector4( mat.getCol2(), 0.0f );
00659     mCol3 = Vector4( translateVec, 1.0f );
00660 }
00661 
00662 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setCol0( const Vector4 &_col0 )
00663 {
00664     mCol0 = _col0;
00665     return *this;
00666 }
00667 
00668 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setCol1( const Vector4 &_col1 )
00669 {
00670     mCol1 = _col1;
00671     return *this;
00672 }
00673 
00674 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setCol2( const Vector4 &_col2 )
00675 {
00676     mCol2 = _col2;
00677     return *this;
00678 }
00679 
00680 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setCol3( const Vector4 &_col3 )
00681 {
00682     mCol3 = _col3;
00683     return *this;
00684 }
00685 
00686 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setCol( int col, const Vector4 &vec )
00687 {
00688     *(&mCol0 + col) = vec;
00689     return *this;
00690 }
00691 
00692 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setRow( int row, const Vector4 &vec )
00693 {
00694     mCol0.setElem( row, vec.getElem( 0 ) );
00695     mCol1.setElem( row, vec.getElem( 1 ) );
00696     mCol2.setElem( row, vec.getElem( 2 ) );
00697     mCol3.setElem( row, vec.getElem( 3 ) );
00698     return *this;
00699 }
00700 
00701 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setElem( int col, int row, float val )
00702 {
00703     (*this)[col].setElem(row, val);
00704     return *this;
00705 }
00706 
00707 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setElem( int col, int row, const floatInVec &val )
00708 {
00709     Vector4 tmpV3_0;
00710     tmpV3_0 = this->getCol( col );
00711     tmpV3_0.setElem( row, val );
00712     this->setCol( col, tmpV3_0 );
00713     return *this;
00714 }
00715 
00716 VECTORMATH_FORCE_INLINE const floatInVec Matrix4::getElem( int col, int row ) const
00717 {
00718     return this->getCol( col ).getElem( row );
00719 }
00720 
00721 VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getCol0( ) const
00722 {
00723     return mCol0;
00724 }
00725 
00726 VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getCol1( ) const
00727 {
00728     return mCol1;
00729 }
00730 
00731 VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getCol2( ) const
00732 {
00733     return mCol2;
00734 }
00735 
00736 VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getCol3( ) const
00737 {
00738     return mCol3;
00739 }
00740 
00741 VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getCol( int col ) const
00742 {
00743     return *(&mCol0 + col);
00744 }
00745 
00746 VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getRow( int row ) const
00747 {
00748     return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
00749 }
00750 
00751 VECTORMATH_FORCE_INLINE Vector4 & Matrix4::operator []( int col )
00752 {
00753     return *(&mCol0 + col);
00754 }
00755 
00756 VECTORMATH_FORCE_INLINE const Vector4 Matrix4::operator []( int col ) const
00757 {
00758     return *(&mCol0 + col);
00759 }
00760 
00761 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator =( const Matrix4 & mat )
00762 {
00763     mCol0 = mat.mCol0;
00764     mCol1 = mat.mCol1;
00765     mCol2 = mat.mCol2;
00766     mCol3 = mat.mCol3;
00767     return *this;
00768 }
00769 
00770 VECTORMATH_FORCE_INLINE const Matrix4 transpose( const Matrix4 & mat )
00771 {
00772     __m128 tmp0, tmp1, tmp2, tmp3, res0, res1, res2, res3;
00773     tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
00774     tmp1 = vec_mergeh( mat.getCol1().get128(), mat.getCol3().get128() );
00775     tmp2 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
00776     tmp3 = vec_mergel( mat.getCol1().get128(), mat.getCol3().get128() );
00777     res0 = vec_mergeh( tmp0, tmp1 );
00778     res1 = vec_mergel( tmp0, tmp1 );
00779     res2 = vec_mergeh( tmp2, tmp3 );
00780     res3 = vec_mergel( tmp2, tmp3 );
00781     return Matrix4(
00782         Vector4( res0 ),
00783         Vector4( res1 ),
00784         Vector4( res2 ),
00785         Vector4( res3 )
00786     );
00787 }
00788 
00789 // TODO: Tidy
00790 static VM_ATTRIBUTE_ALIGN16 const unsigned int _vmathPNPN[4] = {0x00000000, 0x80000000, 0x00000000, 0x80000000};
00791 static VM_ATTRIBUTE_ALIGN16 const unsigned int _vmathNPNP[4] = {0x80000000, 0x00000000, 0x80000000, 0x00000000};
00792 static VM_ATTRIBUTE_ALIGN16 const float _vmathZERONE[4] = {1.0f, 0.0f, 0.0f, 1.0f};
00793 
00794 VECTORMATH_FORCE_INLINE const Matrix4 inverse( const Matrix4 & mat )
00795 {
00796         __m128 Va,Vb,Vc;
00797         __m128 r1,r2,r3,tt,tt2;
00798         __m128 sum,Det,RDet;
00799         __m128 trns0,trns1,trns2,trns3;
00800 
00801         __m128 _L1 = mat.getCol0().get128();
00802         __m128 _L2 = mat.getCol1().get128();
00803         __m128 _L3 = mat.getCol2().get128();
00804         __m128 _L4 = mat.getCol3().get128();
00805         // Calculating the minterms for the first line.
00806 
00807         // _mm_ror_ps is just a macro using _mm_shuffle_ps().
00808         tt = _L4; tt2 = _mm_ror_ps(_L3,1); 
00809         Vc = _mm_mul_ps(tt2,_mm_ror_ps(tt,0));                                  // V3'dot V4
00810         Va = _mm_mul_ps(tt2,_mm_ror_ps(tt,2));                                  // V3'dot V4"
00811         Vb = _mm_mul_ps(tt2,_mm_ror_ps(tt,3));                                  // V3' dot V4^
00812 
00813         r1 = _mm_sub_ps(_mm_ror_ps(Va,1),_mm_ror_ps(Vc,2));             // V3" dot V4^ - V3^ dot V4"
00814         r2 = _mm_sub_ps(_mm_ror_ps(Vb,2),_mm_ror_ps(Vb,0));             // V3^ dot V4' - V3' dot V4^
00815         r3 = _mm_sub_ps(_mm_ror_ps(Va,0),_mm_ror_ps(Vc,1));             // V3' dot V4" - V3" dot V4'
00816 
00817         tt = _L2;
00818         Va = _mm_ror_ps(tt,1);          sum = _mm_mul_ps(Va,r1);
00819         Vb = _mm_ror_ps(tt,2);          sum = _mm_add_ps(sum,_mm_mul_ps(Vb,r2));
00820         Vc = _mm_ror_ps(tt,3);          sum = _mm_add_ps(sum,_mm_mul_ps(Vc,r3));
00821 
00822         // Calculating the determinant.
00823         Det = _mm_mul_ps(sum,_L1);
00824         Det = _mm_add_ps(Det,_mm_movehl_ps(Det,Det));
00825 
00826         const __m128 Sign_PNPN = _mm_load_ps((float *)_vmathPNPN);
00827         const __m128 Sign_NPNP = _mm_load_ps((float *)_vmathNPNP);
00828 
00829         __m128 mtL1 = _mm_xor_ps(sum,Sign_PNPN);
00830 
00831         // Calculating the minterms of the second line (using previous results).
00832         tt = _mm_ror_ps(_L1,1);         sum = _mm_mul_ps(tt,r1);
00833         tt = _mm_ror_ps(tt,1);          sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
00834         tt = _mm_ror_ps(tt,1);          sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
00835         __m128 mtL2 = _mm_xor_ps(sum,Sign_NPNP);
00836 
00837         // Testing the determinant.
00838         Det = _mm_sub_ss(Det,_mm_shuffle_ps(Det,Det,1));
00839 
00840         // Calculating the minterms of the third line.
00841         tt = _mm_ror_ps(_L1,1);
00842         Va = _mm_mul_ps(tt,Vb);                                                                 // V1' dot V2"
00843         Vb = _mm_mul_ps(tt,Vc);                                                                 // V1' dot V2^
00844         Vc = _mm_mul_ps(tt,_L2);                                                                // V1' dot V2
00845 
00846         r1 = _mm_sub_ps(_mm_ror_ps(Va,1),_mm_ror_ps(Vc,2));             // V1" dot V2^ - V1^ dot V2"
00847         r2 = _mm_sub_ps(_mm_ror_ps(Vb,2),_mm_ror_ps(Vb,0));             // V1^ dot V2' - V1' dot V2^
00848         r3 = _mm_sub_ps(_mm_ror_ps(Va,0),_mm_ror_ps(Vc,1));             // V1' dot V2" - V1" dot V2'
00849 
00850         tt = _mm_ror_ps(_L4,1);         sum = _mm_mul_ps(tt,r1);
00851         tt = _mm_ror_ps(tt,1);          sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
00852         tt = _mm_ror_ps(tt,1);          sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
00853         __m128 mtL3 = _mm_xor_ps(sum,Sign_PNPN);
00854 
00855         // Dividing is FASTER than rcp_nr! (Because rcp_nr causes many register-memory RWs).
00856         RDet = _mm_div_ss(_mm_load_ss((float *)&_vmathZERONE), Det); // TODO: just 1.0f?
00857         RDet = _mm_shuffle_ps(RDet,RDet,0x00);
00858 
00859         // Devide the first 12 minterms with the determinant.
00860         mtL1 = _mm_mul_ps(mtL1, RDet);
00861         mtL2 = _mm_mul_ps(mtL2, RDet);
00862         mtL3 = _mm_mul_ps(mtL3, RDet);
00863 
00864         // Calculate the minterms of the forth line and devide by the determinant.
00865         tt = _mm_ror_ps(_L3,1);         sum = _mm_mul_ps(tt,r1);
00866         tt = _mm_ror_ps(tt,1);          sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
00867         tt = _mm_ror_ps(tt,1);          sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
00868         __m128 mtL4 = _mm_xor_ps(sum,Sign_NPNP);
00869         mtL4 = _mm_mul_ps(mtL4, RDet);
00870 
00871         // Now we just have to transpose the minterms matrix.
00872         trns0 = _mm_unpacklo_ps(mtL1,mtL2);
00873         trns1 = _mm_unpacklo_ps(mtL3,mtL4);
00874         trns2 = _mm_unpackhi_ps(mtL1,mtL2);
00875         trns3 = _mm_unpackhi_ps(mtL3,mtL4);
00876         _L1 = _mm_movelh_ps(trns0,trns1);
00877         _L2 = _mm_movehl_ps(trns1,trns0);
00878         _L3 = _mm_movelh_ps(trns2,trns3);
00879         _L4 = _mm_movehl_ps(trns3,trns2);
00880 
00881     return Matrix4(
00882         Vector4( _L1 ),
00883         Vector4( _L2 ),
00884         Vector4( _L3 ),
00885         Vector4( _L4 )
00886     );
00887 }
00888 
00889 VECTORMATH_FORCE_INLINE const Matrix4 affineInverse( const Matrix4 & mat )
00890 {
00891     Transform3 affineMat;
00892     affineMat.setCol0( mat.getCol0().getXYZ( ) );
00893     affineMat.setCol1( mat.getCol1().getXYZ( ) );
00894     affineMat.setCol2( mat.getCol2().getXYZ( ) );
00895     affineMat.setCol3( mat.getCol3().getXYZ( ) );
00896     return Matrix4( inverse( affineMat ) );
00897 }
00898 
00899 VECTORMATH_FORCE_INLINE const Matrix4 orthoInverse( const Matrix4 & mat )
00900 {
00901     Transform3 affineMat;
00902     affineMat.setCol0( mat.getCol0().getXYZ( ) );
00903     affineMat.setCol1( mat.getCol1().getXYZ( ) );
00904     affineMat.setCol2( mat.getCol2().getXYZ( ) );
00905     affineMat.setCol3( mat.getCol3().getXYZ( ) );
00906     return Matrix4( orthoInverse( affineMat ) );
00907 }
00908 
00909 VECTORMATH_FORCE_INLINE const floatInVec determinant( const Matrix4 & mat )
00910 {
00911         __m128 Va,Vb,Vc;
00912         __m128 r1,r2,r3,tt,tt2;
00913         __m128 sum,Det;
00914 
00915         __m128 _L1 = mat.getCol0().get128();
00916         __m128 _L2 = mat.getCol1().get128();
00917         __m128 _L3 = mat.getCol2().get128();
00918         __m128 _L4 = mat.getCol3().get128();
00919         // Calculating the minterms for the first line.
00920 
00921         // _mm_ror_ps is just a macro using _mm_shuffle_ps().
00922         tt = _L4; tt2 = _mm_ror_ps(_L3,1); 
00923         Vc = _mm_mul_ps(tt2,_mm_ror_ps(tt,0));                                  // V3' dot V4
00924         Va = _mm_mul_ps(tt2,_mm_ror_ps(tt,2));                                  // V3' dot V4"
00925         Vb = _mm_mul_ps(tt2,_mm_ror_ps(tt,3));                                  // V3' dot V4^
00926 
00927         r1 = _mm_sub_ps(_mm_ror_ps(Va,1),_mm_ror_ps(Vc,2));             // V3" dot V4^ - V3^ dot V4"
00928         r2 = _mm_sub_ps(_mm_ror_ps(Vb,2),_mm_ror_ps(Vb,0));             // V3^ dot V4' - V3' dot V4^
00929         r3 = _mm_sub_ps(_mm_ror_ps(Va,0),_mm_ror_ps(Vc,1));             // V3' dot V4" - V3" dot V4'
00930 
00931         tt = _L2;
00932         Va = _mm_ror_ps(tt,1);          sum = _mm_mul_ps(Va,r1);
00933         Vb = _mm_ror_ps(tt,2);          sum = _mm_add_ps(sum,_mm_mul_ps(Vb,r2));
00934         Vc = _mm_ror_ps(tt,3);          sum = _mm_add_ps(sum,_mm_mul_ps(Vc,r3));
00935 
00936         // Calculating the determinant.
00937         Det = _mm_mul_ps(sum,_L1);
00938         Det = _mm_add_ps(Det,_mm_movehl_ps(Det,Det));
00939 
00940         // Calculating the minterms of the second line (using previous results).
00941         tt = _mm_ror_ps(_L1,1);         sum = _mm_mul_ps(tt,r1);
00942         tt = _mm_ror_ps(tt,1);          sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
00943         tt = _mm_ror_ps(tt,1);          sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
00944 
00945         // Testing the determinant.
00946         Det = _mm_sub_ss(Det,_mm_shuffle_ps(Det,Det,1));
00947         return floatInVec(Det, 0);
00948 }
00949 
00950 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
00951 {
00952     return Matrix4(
00953         ( mCol0 + mat.mCol0 ),
00954         ( mCol1 + mat.mCol1 ),
00955         ( mCol2 + mat.mCol2 ),
00956         ( mCol3 + mat.mCol3 )
00957     );
00958 }
00959 
00960 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
00961 {
00962     return Matrix4(
00963         ( mCol0 - mat.mCol0 ),
00964         ( mCol1 - mat.mCol1 ),
00965         ( mCol2 - mat.mCol2 ),
00966         ( mCol3 - mat.mCol3 )
00967     );
00968 }
00969 
00970 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
00971 {
00972     *this = *this + mat;
00973     return *this;
00974 }
00975 
00976 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
00977 {
00978     *this = *this - mat;
00979     return *this;
00980 }
00981 
00982 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator -( ) const
00983 {
00984     return Matrix4(
00985         ( -mCol0 ),
00986         ( -mCol1 ),
00987         ( -mCol2 ),
00988         ( -mCol3 )
00989     );
00990 }
00991 
00992 VECTORMATH_FORCE_INLINE const Matrix4 absPerElem( const Matrix4 & mat )
00993 {
00994     return Matrix4(
00995         absPerElem( mat.getCol0() ),
00996         absPerElem( mat.getCol1() ),
00997         absPerElem( mat.getCol2() ),
00998         absPerElem( mat.getCol3() )
00999     );
01000 }
01001 
01002 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator *( float scalar ) const
01003 {
01004     return *this * floatInVec(scalar);
01005 }
01006 
01007 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator *( const floatInVec &scalar ) const
01008 {
01009     return Matrix4(
01010         ( mCol0 * scalar ),
01011         ( mCol1 * scalar ),
01012         ( mCol2 * scalar ),
01013         ( mCol3 * scalar )
01014     );
01015 }
01016 
01017 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator *=( float scalar )
01018 {
01019     return *this *= floatInVec(scalar);
01020 }
01021 
01022 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator *=( const floatInVec &scalar )
01023 {
01024     *this = *this * scalar;
01025     return *this;
01026 }
01027 
01028 VECTORMATH_FORCE_INLINE const Matrix4 operator *( float scalar, const Matrix4 & mat )
01029 {
01030     return floatInVec(scalar) * mat;
01031 }
01032 
01033 VECTORMATH_FORCE_INLINE const Matrix4 operator *( const floatInVec &scalar, const Matrix4 & mat )
01034 {
01035     return mat * scalar;
01036 }
01037 
01038 VECTORMATH_FORCE_INLINE const Vector4 Matrix4::operator *( const Vector4 &vec ) const
01039 {
01040     return Vector4(
01041                 _mm_add_ps(
01042                         _mm_add_ps(_mm_mul_ps(mCol0.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(mCol1.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(1,1,1,1)))),
01043                         _mm_add_ps(_mm_mul_ps(mCol2.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(2,2,2,2))), _mm_mul_ps(mCol3.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(3,3,3,3)))))
01044                 );
01045 }
01046 
01047 VECTORMATH_FORCE_INLINE const Vector4 Matrix4::operator *( const Vector3 &vec ) const
01048 {
01049     return Vector4(
01050                 _mm_add_ps(
01051                         _mm_add_ps(_mm_mul_ps(mCol0.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(mCol1.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(1,1,1,1)))),
01052                         _mm_mul_ps(mCol2.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(2,2,2,2))))
01053                 );
01054 }
01055 
01056 VECTORMATH_FORCE_INLINE const Vector4 Matrix4::operator *( const Point3 &pnt ) const
01057 {
01058     return Vector4(
01059                 _mm_add_ps(
01060                         _mm_add_ps(_mm_mul_ps(mCol0.get128(), _mm_shuffle_ps(pnt.get128(), pnt.get128(), _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(mCol1.get128(), _mm_shuffle_ps(pnt.get128(), pnt.get128(), _MM_SHUFFLE(1,1,1,1)))),
01061                         _mm_add_ps(_mm_mul_ps(mCol2.get128(), _mm_shuffle_ps(pnt.get128(), pnt.get128(), _MM_SHUFFLE(2,2,2,2))), mCol3.get128()))
01062                 );
01063 }
01064 
01065 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
01066 {
01067     return Matrix4(
01068         ( *this * mat.mCol0 ),
01069         ( *this * mat.mCol1 ),
01070         ( *this * mat.mCol2 ),
01071         ( *this * mat.mCol3 )
01072     );
01073 }
01074 
01075 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
01076 {
01077     *this = *this * mat;
01078     return *this;
01079 }
01080 
01081 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
01082 {
01083     return Matrix4(
01084         ( *this * tfrm.getCol0() ),
01085         ( *this * tfrm.getCol1() ),
01086         ( *this * tfrm.getCol2() ),
01087         ( *this * Point3( tfrm.getCol3() ) )
01088     );
01089 }
01090 
01091 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
01092 {
01093     *this = *this * tfrm;
01094     return *this;
01095 }
01096 
01097 VECTORMATH_FORCE_INLINE const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
01098 {
01099     return Matrix4(
01100         mulPerElem( mat0.getCol0(), mat1.getCol0() ),
01101         mulPerElem( mat0.getCol1(), mat1.getCol1() ),
01102         mulPerElem( mat0.getCol2(), mat1.getCol2() ),
01103         mulPerElem( mat0.getCol3(), mat1.getCol3() )
01104     );
01105 }
01106 
01107 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::identity( )
01108 {
01109     return Matrix4(
01110         Vector4::xAxis( ),
01111         Vector4::yAxis( ),
01112         Vector4::zAxis( ),
01113         Vector4::wAxis( )
01114     );
01115 }
01116 
01117 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
01118 {
01119     mCol0.setXYZ( mat3.getCol0() );
01120     mCol1.setXYZ( mat3.getCol1() );
01121     mCol2.setXYZ( mat3.getCol2() );
01122     return *this;
01123 }
01124 
01125 VECTORMATH_FORCE_INLINE const Matrix3 Matrix4::getUpper3x3( ) const
01126 {
01127     return Matrix3(
01128         mCol0.getXYZ( ),
01129         mCol1.getXYZ( ),
01130         mCol2.getXYZ( )
01131     );
01132 }
01133 
01134 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setTranslation( const Vector3 &translateVec )
01135 {
01136     mCol3.setXYZ( translateVec );
01137     return *this;
01138 }
01139 
01140 VECTORMATH_FORCE_INLINE const Vector3 Matrix4::getTranslation( ) const
01141 {
01142     return mCol3.getXYZ( );
01143 }
01144 
01145 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationX( float radians )
01146 {
01147     return rotationX( floatInVec(radians) );
01148 }
01149 
01150 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationX( const floatInVec &radians )
01151 {
01152     __m128 s, c, res1, res2;
01153     __m128 zero;
01154         VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
01155         VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
01156     zero = _mm_setzero_ps();
01157     sincosf4( radians.get128(), &s, &c );
01158     res1 = vec_sel( zero, c, select_y );
01159     res1 = vec_sel( res1, s, select_z );
01160     res2 = vec_sel( zero, negatef4(s), select_y );
01161     res2 = vec_sel( res2, c, select_z );
01162     return Matrix4(
01163         Vector4::xAxis( ),
01164         Vector4( res1 ),
01165         Vector4( res2 ),
01166         Vector4::wAxis( )
01167     );
01168 }
01169 
01170 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationY( float radians )
01171 {
01172     return rotationY( floatInVec(radians) );
01173 }
01174 
01175 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationY( const floatInVec &radians )
01176 {
01177     __m128 s, c, res0, res2;
01178     __m128 zero;
01179         VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
01180         VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
01181     zero = _mm_setzero_ps();
01182     sincosf4( radians.get128(), &s, &c );
01183     res0 = vec_sel( zero, c, select_x );
01184     res0 = vec_sel( res0, negatef4(s), select_z );
01185     res2 = vec_sel( zero, s, select_x );
01186     res2 = vec_sel( res2, c, select_z );
01187     return Matrix4(
01188         Vector4( res0 ),
01189         Vector4::yAxis( ),
01190         Vector4( res2 ),
01191         Vector4::wAxis( )
01192     );
01193 }
01194 
01195 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationZ( float radians )
01196 {
01197     return rotationZ( floatInVec(radians) );
01198 }
01199 
01200 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationZ( const floatInVec &radians )
01201 {
01202     __m128 s, c, res0, res1;
01203     __m128 zero;
01204         VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
01205         VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
01206     zero = _mm_setzero_ps();
01207     sincosf4( radians.get128(), &s, &c );
01208     res0 = vec_sel( zero, c, select_x );
01209     res0 = vec_sel( res0, s, select_y );
01210     res1 = vec_sel( zero, negatef4(s), select_x );
01211     res1 = vec_sel( res1, c, select_y );
01212     return Matrix4(
01213         Vector4( res0 ),
01214         Vector4( res1 ),
01215         Vector4::zAxis( ),
01216         Vector4::wAxis( )
01217     );
01218 }
01219 
01220 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationZYX( const Vector3 &radiansXYZ )
01221 {
01222     __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
01223     angles = Vector4( radiansXYZ, 0.0f ).get128();
01224     sincosf4( angles, &s, &c );
01225     negS = negatef4( s );
01226     Z0 = vec_mergel( c, s );
01227     Z1 = vec_mergel( negS, c );
01228         VM_ATTRIBUTE_ALIGN16 unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0};
01229     Z1 = vec_and( Z1, _mm_load_ps( (float *)select_xyz ) );
01230         Y0 = _mm_shuffle_ps( c, negS, _MM_SHUFFLE(0,1,1,1) );
01231         Y1 = _mm_shuffle_ps( s, c, _MM_SHUFFLE(0,1,1,1) );
01232     X0 = vec_splat( s, 0 );
01233     X1 = vec_splat( c, 0 );
01234     tmp = vec_mul( Z0, Y1 );
01235     return Matrix4(
01236         Vector4( vec_mul( Z0, Y0 ) ),
01237         Vector4( vec_madd( Z1, X1, vec_mul( tmp, X0 ) ) ),
01238         Vector4( vec_nmsub( Z1, X0, vec_mul( tmp, X1 ) ) ),
01239         Vector4::wAxis( )
01240     );
01241 }
01242 
01243 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotation( float radians, const Vector3 &unitVec )
01244 {
01245     return rotation( floatInVec(radians), unitVec );
01246 }
01247 
01248 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotation( const floatInVec &radians, const Vector3 &unitVec )
01249 {
01250     __m128 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
01251     axis = unitVec.get128();
01252     sincosf4( radians.get128(), &s, &c );
01253     xxxx = vec_splat( axis, 0 );
01254     yyyy = vec_splat( axis, 1 );
01255     zzzz = vec_splat( axis, 2 );
01256     oneMinusC = vec_sub( _mm_set1_ps(1.0f), c );
01257     axisS = vec_mul( axis, s );
01258     negAxisS = negatef4( axisS );
01259         VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
01260         VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
01261         VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
01262     //tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX );
01263         tmp0 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,2,0) );
01264         tmp0 = vec_sel(tmp0, vec_splat(negAxisS, 1), select_z);
01265     //tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX );
01266         tmp1 = vec_sel( vec_splat(axisS, 0), vec_splat(negAxisS, 2), select_x );
01267     //tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX );
01268         tmp2 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,0,1) );
01269         tmp2 = vec_sel(tmp2, vec_splat(negAxisS, 0), select_y);
01270     tmp0 = vec_sel( tmp0, c, select_x );
01271     tmp1 = vec_sel( tmp1, c, select_y );
01272     tmp2 = vec_sel( tmp2, c, select_z );
01273         VM_ATTRIBUTE_ALIGN16 unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0};
01274     axis = vec_and( axis, _mm_load_ps( (float *)select_xyz ) );
01275     tmp0 = vec_and( tmp0, _mm_load_ps( (float *)select_xyz ) );
01276     tmp1 = vec_and( tmp1, _mm_load_ps( (float *)select_xyz ) );
01277     tmp2 = vec_and( tmp2, _mm_load_ps( (float *)select_xyz ) );
01278     return Matrix4(
01279         Vector4( vec_madd( vec_mul( axis, xxxx ), oneMinusC, tmp0 ) ),
01280         Vector4( vec_madd( vec_mul( axis, yyyy ), oneMinusC, tmp1 ) ),
01281         Vector4( vec_madd( vec_mul( axis, zzzz ), oneMinusC, tmp2 ) ),
01282         Vector4::wAxis( )
01283     );
01284 }
01285 
01286 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotation( const Quat &unitQuat )
01287 {
01288     return Matrix4( Transform3::rotation( unitQuat ) );
01289 }
01290 
01291 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::scale( const Vector3 &scaleVec )
01292 {
01293     __m128 zero = _mm_setzero_ps();
01294         VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
01295         VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
01296         VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
01297     return Matrix4(
01298         Vector4( vec_sel( zero, scaleVec.get128(), select_x ) ),
01299         Vector4( vec_sel( zero, scaleVec.get128(), select_y ) ),
01300         Vector4( vec_sel( zero, scaleVec.get128(), select_z ) ),
01301         Vector4::wAxis( )
01302     );
01303 }
01304 
01305 VECTORMATH_FORCE_INLINE const Matrix4 appendScale( const Matrix4 & mat, const Vector3 &scaleVec )
01306 {
01307     return Matrix4(
01308         ( mat.getCol0() * scaleVec.getX( ) ),
01309         ( mat.getCol1() * scaleVec.getY( ) ),
01310         ( mat.getCol2() * scaleVec.getZ( ) ),
01311         mat.getCol3()
01312     );
01313 }
01314 
01315 VECTORMATH_FORCE_INLINE const Matrix4 prependScale( const Vector3 &scaleVec, const Matrix4 & mat )
01316 {
01317     Vector4 scale4;
01318     scale4 = Vector4( scaleVec, 1.0f );
01319     return Matrix4(
01320         mulPerElem( mat.getCol0(), scale4 ),
01321         mulPerElem( mat.getCol1(), scale4 ),
01322         mulPerElem( mat.getCol2(), scale4 ),
01323         mulPerElem( mat.getCol3(), scale4 )
01324     );
01325 }
01326 
01327 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::translation( const Vector3 &translateVec )
01328 {
01329     return Matrix4(
01330         Vector4::xAxis( ),
01331         Vector4::yAxis( ),
01332         Vector4::zAxis( ),
01333         Vector4( translateVec, 1.0f )
01334     );
01335 }
01336 
01337 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::lookAt( const Point3 &eyePos, const Point3 &lookAtPos, const Vector3 &upVec )
01338 {
01339     Matrix4 m4EyeFrame;
01340     Vector3 v3X, v3Y, v3Z;
01341     v3Y = normalize( upVec );
01342     v3Z = normalize( ( eyePos - lookAtPos ) );
01343     v3X = normalize( cross( v3Y, v3Z ) );
01344     v3Y = cross( v3Z, v3X );
01345     m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
01346     return orthoInverse( m4EyeFrame );
01347 }
01348 
01349 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar )
01350 {
01351     float f, rangeInv;
01352     __m128 zero, col0, col1, col2, col3;
01353     union { __m128 v; float s[4]; } tmp;
01354     f = tanf( _VECTORMATH_PI_OVER_2 - fovyRadians * 0.5f );
01355     rangeInv = 1.0f / ( zNear - zFar );
01356     zero = _mm_setzero_ps();
01357     tmp.v = zero;
01358     tmp.s[0] = f / aspect;
01359     col0 = tmp.v;
01360     tmp.v = zero;
01361     tmp.s[1] = f;
01362     col1 = tmp.v;
01363     tmp.v = zero;
01364     tmp.s[2] = ( zNear + zFar ) * rangeInv;
01365     tmp.s[3] = -1.0f;
01366     col2 = tmp.v;
01367     tmp.v = zero;
01368     tmp.s[2] = zNear * zFar * rangeInv * 2.0f;
01369     col3 = tmp.v;
01370     return Matrix4(
01371         Vector4( col0 ),
01372         Vector4( col1 ),
01373         Vector4( col2 ),
01374         Vector4( col3 )
01375     );
01376 }
01377 
01378 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar )
01379 {
01380     /* function implementation based on code from STIDC SDK:           */
01381     /* --------------------------------------------------------------  */
01382     /* PLEASE DO NOT MODIFY THIS SECTION                               */
01383     /* This prolog section is automatically generated.                 */
01384     /*                                                                 */
01385     /* (C)Copyright                                                    */
01386     /* Sony Computer Entertainment, Inc.,                              */
01387     /* Toshiba Corporation,                                            */
01388     /* International Business Machines Corporation,                    */
01389     /* 2001,2002.                                                      */
01390     /* S/T/I Confidential Information                                  */
01391     /* --------------------------------------------------------------  */
01392     __m128 lbf, rtn;
01393     __m128 diff, sum, inv_diff;
01394     __m128 diagonal, column, near2;
01395     __m128 zero = _mm_setzero_ps();
01396     union { __m128 v; float s[4]; } l, f, r, n, b, t; // TODO: Union?
01397     l.s[0] = left;
01398     f.s[0] = zFar;
01399     r.s[0] = right;
01400     n.s[0] = zNear;
01401     b.s[0] = bottom;
01402     t.s[0] = top;
01403     lbf = vec_mergeh( l.v, f.v );
01404     rtn = vec_mergeh( r.v, n.v );
01405     lbf = vec_mergeh( lbf, b.v );
01406     rtn = vec_mergeh( rtn, t.v );
01407     diff = vec_sub( rtn, lbf );
01408     sum  = vec_add( rtn, lbf );
01409     inv_diff = recipf4( diff );
01410     near2 = vec_splat( n.v, 0 );
01411     near2 = vec_add( near2, near2 );
01412     diagonal = vec_mul( near2, inv_diff );
01413     column = vec_mul( sum, inv_diff );
01414         VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
01415         VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
01416         VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
01417         VM_ATTRIBUTE_ALIGN16 unsigned int select_w[4] = {0, 0, 0, 0xffffffff};
01418     return Matrix4(
01419         Vector4( vec_sel( zero, diagonal, select_x ) ),
01420         Vector4( vec_sel( zero, diagonal, select_y ) ),
01421         Vector4( vec_sel( column, _mm_set1_ps(-1.0f), select_w ) ),
01422         Vector4( vec_sel( zero, vec_mul( diagonal, vec_splat( f.v, 0 ) ), select_z ) )
01423         );
01424 }
01425 
01426 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar )
01427 {
01428     /* function implementation based on code from STIDC SDK:           */
01429     /* --------------------------------------------------------------  */
01430     /* PLEASE DO NOT MODIFY THIS SECTION                               */
01431     /* This prolog section is automatically generated.                 */
01432     /*                                                                 */
01433     /* (C)Copyright                                                    */
01434     /* Sony Computer Entertainment, Inc.,                              */
01435     /* Toshiba Corporation,                                            */
01436     /* International Business Machines Corporation,                    */
01437     /* 2001,2002.                                                      */
01438     /* S/T/I Confidential Information                                  */
01439     /* --------------------------------------------------------------  */
01440     __m128 lbf, rtn;
01441     __m128 diff, sum, inv_diff, neg_inv_diff;
01442     __m128 diagonal, column;
01443     __m128 zero = _mm_setzero_ps();
01444     union { __m128 v; float s[4]; } l, f, r, n, b, t;
01445     l.s[0] = left;
01446     f.s[0] = zFar;
01447     r.s[0] = right;
01448     n.s[0] = zNear;
01449     b.s[0] = bottom;
01450     t.s[0] = top;
01451     lbf = vec_mergeh( l.v, f.v );
01452     rtn = vec_mergeh( r.v, n.v );
01453     lbf = vec_mergeh( lbf, b.v );
01454     rtn = vec_mergeh( rtn, t.v );
01455     diff = vec_sub( rtn, lbf );
01456     sum  = vec_add( rtn, lbf );
01457     inv_diff = recipf4( diff );
01458     neg_inv_diff = negatef4( inv_diff );
01459     diagonal = vec_add( inv_diff, inv_diff );
01460         VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
01461         VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
01462         VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
01463         VM_ATTRIBUTE_ALIGN16 unsigned int select_w[4] = {0, 0, 0, 0xffffffff};
01464     column = vec_mul( sum, vec_sel( neg_inv_diff, inv_diff, select_z ) ); // TODO: no madds with zero
01465     return Matrix4(
01466         Vector4( vec_sel( zero, diagonal, select_x ) ),
01467         Vector4( vec_sel( zero, diagonal, select_y ) ),
01468         Vector4( vec_sel( zero, diagonal, select_z ) ),
01469         Vector4( vec_sel( column, _mm_set1_ps(1.0f), select_w ) )
01470     );
01471 }
01472 
01473 VECTORMATH_FORCE_INLINE const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 )
01474 {
01475     return Matrix4(
01476         select( mat0.getCol0(), mat1.getCol0(), select1 ),
01477         select( mat0.getCol1(), mat1.getCol1(), select1 ),
01478         select( mat0.getCol2(), mat1.getCol2(), select1 ),
01479         select( mat0.getCol3(), mat1.getCol3(), select1 )
01480     );
01481 }
01482 
01483 VECTORMATH_FORCE_INLINE const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, const boolInVec &select1 )
01484 {
01485     return Matrix4(
01486         select( mat0.getCol0(), mat1.getCol0(), select1 ),
01487         select( mat0.getCol1(), mat1.getCol1(), select1 ),
01488         select( mat0.getCol2(), mat1.getCol2(), select1 ),
01489         select( mat0.getCol3(), mat1.getCol3(), select1 )
01490     );
01491 }
01492 
01493 #ifdef _VECTORMATH_DEBUG
01494 
01495 VECTORMATH_FORCE_INLINE void print( const Matrix4 & mat )
01496 {
01497     print( mat.getRow( 0 ) );
01498     print( mat.getRow( 1 ) );
01499     print( mat.getRow( 2 ) );
01500     print( mat.getRow( 3 ) );
01501 }
01502 
01503 VECTORMATH_FORCE_INLINE void print( const Matrix4 & mat, const char * name )
01504 {
01505     printf("%s:\n", name);
01506     print( mat );
01507 }
01508 
01509 #endif
01510 
01511 VECTORMATH_FORCE_INLINE Transform3::Transform3( const Transform3 & tfrm )
01512 {
01513     mCol0 = tfrm.mCol0;
01514     mCol1 = tfrm.mCol1;
01515     mCol2 = tfrm.mCol2;
01516     mCol3 = tfrm.mCol3;
01517 }
01518 
01519 VECTORMATH_FORCE_INLINE Transform3::Transform3( float scalar )
01520 {
01521     mCol0 = Vector3( scalar );
01522     mCol1 = Vector3( scalar );
01523     mCol2 = Vector3( scalar );
01524     mCol3 = Vector3( scalar );
01525 }
01526 
01527 VECTORMATH_FORCE_INLINE Transform3::Transform3( const floatInVec &scalar )
01528 {
01529     mCol0 = Vector3( scalar );
01530     mCol1 = Vector3( scalar );
01531     mCol2 = Vector3( scalar );
01532     mCol3 = Vector3( scalar );
01533 }
01534 
01535 VECTORMATH_FORCE_INLINE Transform3::Transform3( const Vector3 &_col0, const Vector3 &_col1, const Vector3 &_col2, const Vector3 &_col3 )
01536 {
01537     mCol0 = _col0;
01538     mCol1 = _col1;
01539     mCol2 = _col2;
01540     mCol3 = _col3;
01541 }
01542 
01543 VECTORMATH_FORCE_INLINE Transform3::Transform3( const Matrix3 & tfrm, const Vector3 &translateVec )
01544 {
01545     this->setUpper3x3( tfrm );
01546     this->setTranslation( translateVec );
01547 }
01548 
01549 VECTORMATH_FORCE_INLINE Transform3::Transform3( const Quat &unitQuat, const Vector3 &translateVec )
01550 {
01551     this->setUpper3x3( Matrix3( unitQuat ) );
01552     this->setTranslation( translateVec );
01553 }
01554 
01555 VECTORMATH_FORCE_INLINE Transform3 & Transform3::setCol0( const Vector3 &_col0 )
01556 {
01557     mCol0 = _col0;
01558     return *this;
01559 }
01560 
01561 VECTORMATH_FORCE_INLINE Transform3 & Transform3::setCol1( const Vector3 &_col1 )
01562 {
01563     mCol1 = _col1;
01564     return *this;
01565 }
01566 
01567 VECTORMATH_FORCE_INLINE Transform3 & Transform3::setCol2( const Vector3 &_col2 )
01568 {
01569     mCol2 = _col2;
01570     return *this;
01571 }
01572 
01573 VECTORMATH_FORCE_INLINE Transform3 & Transform3::setCol3( const Vector3 &_col3 )
01574 {
01575     mCol3 = _col3;
01576     return *this;
01577 }
01578 
01579 VECTORMATH_FORCE_INLINE Transform3 & Transform3::setCol( int col, const Vector3 &vec )
01580 {
01581     *(&mCol0 + col) = vec;
01582     return *this;
01583 }
01584 
01585 VECTORMATH_FORCE_INLINE Transform3 & Transform3::setRow( int row, const Vector4 &vec )
01586 {
01587     mCol0.setElem( row, vec.getElem( 0 ) );
01588     mCol1.setElem( row, vec.getElem( 1 ) );
01589     mCol2.setElem( row, vec.getElem( 2 ) );
01590     mCol3.setElem( row, vec.getElem( 3 ) );
01591     return *this;
01592 }
01593 
01594 VECTORMATH_FORCE_INLINE Transform3 & Transform3::setElem( int col, int row, float val )
01595 {
01596     (*this)[col].setElem(row, val);
01597     return *this;
01598 }
01599 
01600 VECTORMATH_FORCE_INLINE Transform3 & Transform3::setElem( int col, int row, const floatInVec &val )
01601 {
01602     Vector3 tmpV3_0;
01603     tmpV3_0 = this->getCol( col );
01604     tmpV3_0.setElem( row, val );
01605     this->setCol( col, tmpV3_0 );
01606     return *this;
01607 }
01608 
01609 VECTORMATH_FORCE_INLINE const floatInVec Transform3::getElem( int col, int row ) const
01610 {
01611     return this->getCol( col ).getElem( row );
01612 }
01613 
01614 VECTORMATH_FORCE_INLINE const Vector3 Transform3::getCol0( ) const
01615 {
01616     return mCol0;
01617 }
01618 
01619 VECTORMATH_FORCE_INLINE const Vector3 Transform3::getCol1( ) const
01620 {
01621     return mCol1;
01622 }
01623 
01624 VECTORMATH_FORCE_INLINE const Vector3 Transform3::getCol2( ) const
01625 {
01626     return mCol2;
01627 }
01628 
01629 VECTORMATH_FORCE_INLINE const Vector3 Transform3::getCol3( ) const
01630 {
01631     return mCol3;
01632 }
01633 
01634 VECTORMATH_FORCE_INLINE const Vector3 Transform3::getCol( int col ) const
01635 {
01636     return *(&mCol0 + col);
01637 }
01638 
01639 VECTORMATH_FORCE_INLINE const Vector4 Transform3::getRow( int row ) const
01640 {
01641     return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
01642 }
01643 
01644 VECTORMATH_FORCE_INLINE Vector3 & Transform3::operator []( int col )
01645 {
01646     return *(&mCol0 + col);
01647 }
01648 
01649 VECTORMATH_FORCE_INLINE const Vector3 Transform3::operator []( int col ) const
01650 {
01651     return *(&mCol0 + col);
01652 }
01653 
01654 VECTORMATH_FORCE_INLINE Transform3 & Transform3::operator =( const Transform3 & tfrm )
01655 {
01656     mCol0 = tfrm.mCol0;
01657     mCol1 = tfrm.mCol1;
01658     mCol2 = tfrm.mCol2;
01659     mCol3 = tfrm.mCol3;
01660     return *this;
01661 }
01662 
01663 VECTORMATH_FORCE_INLINE const Transform3 inverse( const Transform3 & tfrm )
01664 {
01665     __m128 inv0, inv1, inv2, inv3;
01666     __m128 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet;
01667     __m128 xxxx, yyyy, zzzz;
01668     tmp2 = _vmathVfCross( tfrm.getCol0().get128(), tfrm.getCol1().get128() );
01669     tmp0 = _vmathVfCross( tfrm.getCol1().get128(), tfrm.getCol2().get128() );
01670     tmp1 = _vmathVfCross( tfrm.getCol2().get128(), tfrm.getCol0().get128() );
01671     inv3 = negatef4( tfrm.getCol3().get128() );
01672     dot = _vmathVfDot3( tmp2, tfrm.getCol2().get128() );
01673     dot = vec_splat( dot, 0 );
01674     invdet = recipf4( dot );
01675     tmp3 = vec_mergeh( tmp0, tmp2 );
01676     tmp4 = vec_mergel( tmp0, tmp2 );
01677     inv0 = vec_mergeh( tmp3, tmp1 );
01678     xxxx = vec_splat( inv3, 0 );
01679     //inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX );
01680         VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
01681         inv1 = _mm_shuffle_ps( tmp3, tmp3, _MM_SHUFFLE(0,3,2,2));
01682         inv1 = vec_sel(inv1, tmp1, select_y);
01683     //inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX );
01684         inv2 = _mm_shuffle_ps( tmp4, tmp4, _MM_SHUFFLE(0,1,1,0));
01685         inv2 = vec_sel(inv2, vec_splat(tmp1, 2), select_y);
01686     yyyy = vec_splat( inv3, 1 );
01687     zzzz = vec_splat( inv3, 2 );
01688     inv3 = vec_mul( inv0, xxxx );
01689     inv3 = vec_madd( inv1, yyyy, inv3 );
01690     inv3 = vec_madd( inv2, zzzz, inv3 );
01691     inv0 = vec_mul( inv0, invdet );
01692     inv1 = vec_mul( inv1, invdet );
01693     inv2 = vec_mul( inv2, invdet );
01694     inv3 = vec_mul( inv3, invdet );
01695     return Transform3(
01696         Vector3( inv0 ),
01697         Vector3( inv1 ),
01698         Vector3( inv2 ),
01699         Vector3( inv3 )
01700     );
01701 }
01702 
01703 VECTORMATH_FORCE_INLINE const Transform3 orthoInverse( const Transform3 & tfrm )
01704 {
01705     __m128 inv0, inv1, inv2, inv3;
01706     __m128 tmp0, tmp1;
01707     __m128 xxxx, yyyy, zzzz;
01708     tmp0 = vec_mergeh( tfrm.getCol0().get128(), tfrm.getCol2().get128() );
01709     tmp1 = vec_mergel( tfrm.getCol0().get128(), tfrm.getCol2().get128() );
01710     inv3 = negatef4( tfrm.getCol3().get128() );
01711     inv0 = vec_mergeh( tmp0, tfrm.getCol1().get128() );
01712     xxxx = vec_splat( inv3, 0 );
01713     //inv1 = vec_perm( tmp0, tfrm.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
01714         VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
01715         inv1 = _mm_shuffle_ps( tmp0, tmp0, _MM_SHUFFLE(0,3,2,2));
01716         inv1 = vec_sel(inv1, tfrm.getCol1().get128(), select_y);
01717     //inv2 = vec_perm( tmp1, tfrm.getCol1().get128(), _VECTORMATH_PERM_XCYX );
01718         inv2 = _mm_shuffle_ps( tmp1, tmp1, _MM_SHUFFLE(0,1,1,0));
01719         inv2 = vec_sel(inv2, vec_splat(tfrm.getCol1().get128(), 2), select_y);
01720     yyyy = vec_splat( inv3, 1 );
01721     zzzz = vec_splat( inv3, 2 );
01722     inv3 = vec_mul( inv0, xxxx );
01723     inv3 = vec_madd( inv1, yyyy, inv3 );
01724     inv3 = vec_madd( inv2, zzzz, inv3 );
01725     return Transform3(
01726         Vector3( inv0 ),
01727         Vector3( inv1 ),
01728         Vector3( inv2 ),
01729         Vector3( inv3 )
01730     );
01731 }
01732 
01733 VECTORMATH_FORCE_INLINE const Transform3 absPerElem( const Transform3 & tfrm )
01734 {
01735     return Transform3(
01736         absPerElem( tfrm.getCol0() ),
01737         absPerElem( tfrm.getCol1() ),
01738         absPerElem( tfrm.getCol2() ),
01739         absPerElem( tfrm.getCol3() )
01740     );
01741 }
01742 
01743 VECTORMATH_FORCE_INLINE const Vector3 Transform3::operator *( const Vector3 &vec ) const
01744 {
01745     __m128 res;
01746     __m128 xxxx, yyyy, zzzz;
01747     xxxx = vec_splat( vec.get128(), 0 );
01748     yyyy = vec_splat( vec.get128(), 1 );
01749     zzzz = vec_splat( vec.get128(), 2 );
01750     res = vec_mul( mCol0.get128(), xxxx );
01751     res = vec_madd( mCol1.get128(), yyyy, res );
01752     res = vec_madd( mCol2.get128(), zzzz, res );
01753     return Vector3( res );
01754 }
01755 
01756 VECTORMATH_FORCE_INLINE const Point3 Transform3::operator *( const Point3 &pnt ) const
01757 {
01758     __m128 tmp0, tmp1, res;
01759     __m128 xxxx, yyyy, zzzz;
01760     xxxx = vec_splat( pnt.get128(), 0 );
01761     yyyy = vec_splat( pnt.get128(), 1 );
01762     zzzz = vec_splat( pnt.get128(), 2 );
01763     tmp0 = vec_mul( mCol0.get128(), xxxx );
01764     tmp1 = vec_mul( mCol1.get128(), yyyy );
01765     tmp0 = vec_madd( mCol2.get128(), zzzz, tmp0 );
01766     tmp1 = vec_add( mCol3.get128(), tmp1 );
01767     res = vec_add( tmp0, tmp1 );
01768     return Point3( res );
01769 }
01770 
01771 VECTORMATH_FORCE_INLINE const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
01772 {
01773     return Transform3(
01774         ( *this * tfrm.mCol0 ),
01775         ( *this * tfrm.mCol1 ),
01776         ( *this * tfrm.mCol2 ),
01777         Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
01778     );
01779 }
01780 
01781 VECTORMATH_FORCE_INLINE Transform3 & Transform3::operator *=( const Transform3 & tfrm )
01782 {
01783     *this = *this * tfrm;
01784     return *this;
01785 }
01786 
01787 VECTORMATH_FORCE_INLINE const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
01788 {
01789     return Transform3(
01790         mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
01791         mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
01792         mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
01793         mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
01794     );
01795 }
01796 
01797 VECTORMATH_FORCE_INLINE const Transform3 Transform3::identity( )
01798 {
01799     return Transform3(
01800         Vector3::xAxis( ),
01801         Vector3::yAxis( ),
01802         Vector3::zAxis( ),
01803         Vector3( 0.0f )
01804     );
01805 }
01806 
01807 VECTORMATH_FORCE_INLINE Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
01808 {
01809     mCol0 = tfrm.getCol0();
01810     mCol1 = tfrm.getCol1();
01811     mCol2 = tfrm.getCol2();
01812     return *this;
01813 }
01814 
01815 VECTORMATH_FORCE_INLINE const Matrix3 Transform3::getUpper3x3( ) const
01816 {
01817     return Matrix3( mCol0, mCol1, mCol2 );
01818 }
01819 
01820 VECTORMATH_FORCE_INLINE Transform3 & Transform3::setTranslation( const Vector3 &translateVec )
01821 {
01822     mCol3 = translateVec;
01823     return *this;
01824 }
01825 
01826 VECTORMATH_FORCE_INLINE const Vector3 Transform3::getTranslation( ) const
01827 {
01828     return mCol3;
01829 }
01830 
01831 VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationX( float radians )
01832 {
01833     return rotationX( floatInVec(radians) );
01834 }
01835 
01836 VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationX( const floatInVec &radians )
01837 {
01838     __m128 s, c, res1, res2;
01839     __m128 zero;
01840         VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
01841         VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
01842     zero = _mm_setzero_ps();
01843     sincosf4( radians.get128(), &s, &c );
01844     res1 = vec_sel( zero, c, select_y );
01845     res1 = vec_sel( res1, s, select_z );
01846     res2 = vec_sel( zero, negatef4(s), select_y );
01847     res2 = vec_sel( res2, c, select_z );
01848     return Transform3(
01849         Vector3::xAxis( ),
01850         Vector3( res1 ),
01851         Vector3( res2 ),
01852         Vector3( _mm_setzero_ps() )
01853     );
01854 }
01855 
01856 VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationY( float radians )
01857 {
01858     return rotationY( floatInVec(radians) );
01859 }
01860 
01861 VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationY( const floatInVec &radians )
01862 {
01863     __m128 s, c, res0, res2;
01864     __m128 zero;
01865         VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
01866         VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
01867     zero = _mm_setzero_ps();
01868     sincosf4( radians.get128(), &s, &c );
01869     res0 = vec_sel( zero, c, select_x );
01870     res0 = vec_sel( res0, negatef4(s), select_z );
01871     res2 = vec_sel( zero, s, select_x );
01872     res2 = vec_sel( res2, c, select_z );
01873     return Transform3(
01874         Vector3( res0 ),
01875         Vector3::yAxis( ),
01876         Vector3( res2 ),
01877         Vector3( 0.0f )
01878     );
01879 }
01880 
01881 VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationZ( float radians )
01882 {
01883     return rotationZ( floatInVec(radians) );
01884 }
01885 
01886 VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationZ( const floatInVec &radians )
01887 {
01888     __m128 s, c, res0, res1;
01889         VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
01890         VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
01891     __m128 zero = _mm_setzero_ps();
01892     sincosf4( radians.get128(), &s, &c );
01893     res0 = vec_sel( zero, c, select_x );
01894     res0 = vec_sel( res0, s, select_y );
01895     res1 = vec_sel( zero, negatef4(s), select_x );
01896     res1 = vec_sel( res1, c, select_y );
01897     return Transform3(
01898         Vector3( res0 ),
01899         Vector3( res1 ),
01900         Vector3::zAxis( ),
01901         Vector3( 0.0f )
01902     );
01903 }
01904 
01905 VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationZYX( const Vector3 &radiansXYZ )
01906 {
01907     __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
01908     angles = Vector4( radiansXYZ, 0.0f ).get128();
01909     sincosf4( angles, &s, &c );
01910     negS = negatef4( s );
01911     Z0 = vec_mergel( c, s );
01912     Z1 = vec_mergel( negS, c );
01913         VM_ATTRIBUTE_ALIGN16 unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0};
01914     Z1 = vec_and( Z1, _mm_load_ps( (float *)select_xyz ) );
01915         Y0 = _mm_shuffle_ps( c, negS, _MM_SHUFFLE(0,1,1,1) );
01916         Y1 = _mm_shuffle_ps( s, c, _MM_SHUFFLE(0,1,1,1) );
01917     X0 = vec_splat( s, 0 );
01918     X1 = vec_splat( c, 0 );
01919     tmp = vec_mul( Z0, Y1 );
01920     return Transform3(
01921         Vector3( vec_mul( Z0, Y0 ) ),
01922         Vector3( vec_madd( Z1, X1, vec_mul( tmp, X0 ) ) ),
01923         Vector3( vec_nmsub( Z1, X0, vec_mul( tmp, X1 ) ) ),
01924         Vector3( 0.0f )
01925     );
01926 }
01927 
01928 VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotation( float radians, const Vector3 &unitVec )
01929 {
01930     return rotation( floatInVec(radians), unitVec );
01931 }
01932 
01933 VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotation( const floatInVec &radians, const Vector3 &unitVec )
01934 {
01935     return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( 0.0f ) );
01936 }
01937 
01938 VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotation( const Quat &unitQuat )
01939 {
01940     return Transform3( Matrix3( unitQuat ), Vector3( 0.0f ) );
01941 }
01942 
01943 VECTORMATH_FORCE_INLINE const Transform3 Transform3::scale( const Vector3 &scaleVec )
01944 {
01945     __m128 zero = _mm_setzero_ps();
01946         VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
01947         VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
01948         VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
01949     return Transform3(
01950         Vector3( vec_sel( zero, scaleVec.get128(), select_x ) ),
01951         Vector3( vec_sel( zero, scaleVec.get128(), select_y ) ),
01952         Vector3( vec_sel( zero, scaleVec.get128(), select_z ) ),
01953         Vector3( 0.0f )
01954     );
01955 }
01956 
01957 VECTORMATH_FORCE_INLINE const Transform3 appendScale( const Transform3 & tfrm, const Vector3 &scaleVec )
01958 {
01959     return Transform3(
01960         ( tfrm.getCol0() * scaleVec.getX( ) ),
01961         ( tfrm.getCol1() * scaleVec.getY( ) ),
01962         ( tfrm.getCol2() * scaleVec.getZ( ) ),
01963         tfrm.getCol3()
01964     );
01965 }
01966 
01967 VECTORMATH_FORCE_INLINE const Transform3 prependScale( const Vector3 &scaleVec, const Transform3 & tfrm )
01968 {
01969     return Transform3(
01970         mulPerElem( tfrm.getCol0(), scaleVec ),
01971         mulPerElem( tfrm.getCol1(), scaleVec ),
01972         mulPerElem( tfrm.getCol2(), scaleVec ),
01973         mulPerElem( tfrm.getCol3(), scaleVec )
01974     );
01975 }
01976 
01977 VECTORMATH_FORCE_INLINE const Transform3 Transform3::translation( const Vector3 &translateVec )
01978 {
01979     return Transform3(
01980         Vector3::xAxis( ),
01981         Vector3::yAxis( ),
01982         Vector3::zAxis( ),
01983         translateVec
01984     );
01985 }
01986 
01987 VECTORMATH_FORCE_INLINE const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 )
01988 {
01989     return Transform3(
01990         select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
01991         select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
01992         select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
01993         select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
01994     );
01995 }
01996 
01997 VECTORMATH_FORCE_INLINE const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, const boolInVec &select1 )
01998 {
01999     return Transform3(
02000         select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
02001         select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
02002         select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
02003         select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
02004     );
02005 }
02006 
02007 #ifdef _VECTORMATH_DEBUG
02008 
02009 VECTORMATH_FORCE_INLINE void print( const Transform3 & tfrm )
02010 {
02011     print( tfrm.getRow( 0 ) );
02012     print( tfrm.getRow( 1 ) );
02013     print( tfrm.getRow( 2 ) );
02014 }
02015 
02016 VECTORMATH_FORCE_INLINE void print( const Transform3 & tfrm, const char * name )
02017 {
02018     printf("%s:\n", name);
02019     print( tfrm );
02020 }
02021 
02022 #endif
02023 
02024 VECTORMATH_FORCE_INLINE Quat::Quat( const Matrix3 & tfrm )
02025 {
02026     __m128 res;
02027     __m128 col0, col1, col2;
02028     __m128 xx_yy, xx_yy_zz_xx, yy_zz_xx_yy, zz_xx_yy_zz, diagSum, diagDiff;
02029     __m128 zy_xz_yx, yz_zx_xy, sum, diff;
02030     __m128 radicand, invSqrt, scale;
02031     __m128 res0, res1, res2, res3;
02032     __m128 xx, yy, zz;
02033         VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
02034         VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
02035         VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
02036         VM_ATTRIBUTE_ALIGN16 unsigned int select_w[4] = {0, 0, 0, 0xffffffff};
02037 
02038     col0 = tfrm.getCol0().get128();
02039     col1 = tfrm.getCol1().get128();
02040     col2 = tfrm.getCol2().get128();
02041 
02042     /* four cases: */
02043     /* trace > 0 */
02044     /* else */
02045     /*    xx largest diagonal element */
02046     /*    yy largest diagonal element */
02047     /*    zz largest diagonal element */
02048 
02049     /* compute quaternion for each case */
02050 
02051     xx_yy = vec_sel( col0, col1, select_y );
02052     //xx_yy_zz_xx = vec_perm( xx_yy, col2, _VECTORMATH_PERM_XYCX );
02053     //yy_zz_xx_yy = vec_perm( xx_yy, col2, _VECTORMATH_PERM_YCXY );
02054     //zz_xx_yy_zz = vec_perm( xx_yy, col2, _VECTORMATH_PERM_CXYC );
02055     xx_yy_zz_xx = _mm_shuffle_ps( xx_yy, xx_yy, _MM_SHUFFLE(0,0,1,0) );
02056     xx_yy_zz_xx = vec_sel( xx_yy_zz_xx, col2, select_z ); // TODO: Ck
02057     yy_zz_xx_yy = _mm_shuffle_ps( xx_yy_zz_xx, xx_yy_zz_xx, _MM_SHUFFLE(1,0,2,1) );
02058     zz_xx_yy_zz = _mm_shuffle_ps( xx_yy_zz_xx, xx_yy_zz_xx, _MM_SHUFFLE(2,1,0,2) );
02059 
02060     diagSum = vec_add( vec_add( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
02061     diagDiff = vec_sub( vec_sub( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
02062     radicand = vec_add( vec_sel( diagDiff, diagSum, select_w ), _mm_set1_ps(1.0f) );
02063  //   invSqrt = rsqrtf4( radicand );
02064         invSqrt = newtonrapson_rsqrt4( radicand );
02065 
02066         
02067 
02068     zy_xz_yx = vec_sel( col0, col1, select_z );                                                                 // zy_xz_yx = 00 01 12 03
02069     //zy_xz_yx = vec_perm( zy_xz_yx, col2, _VECTORMATH_PERM_ZAYX );
02070         zy_xz_yx = _mm_shuffle_ps( zy_xz_yx, zy_xz_yx, _MM_SHUFFLE(0,1,2,2) );          // zy_xz_yx = 12 12 01 00
02071     zy_xz_yx = vec_sel( zy_xz_yx, vec_splat(col2, 0), select_y );                               // zy_xz_yx = 12 20 01 00
02072     yz_zx_xy = vec_sel( col0, col1, select_x );                                                                 // yz_zx_xy = 10 01 02 03
02073     //yz_zx_xy = vec_perm( yz_zx_xy, col2, _VECTORMATH_PERM_BZXX );
02074         yz_zx_xy = _mm_shuffle_ps( yz_zx_xy, yz_zx_xy, _MM_SHUFFLE(0,0,2,0) );          // yz_zx_xy = 10 02 10 10
02075         yz_zx_xy = vec_sel( yz_zx_xy, vec_splat(col2, 1), select_x );                           // yz_zx_xy = 21 02 10 10
02076 
02077     sum = vec_add( zy_xz_yx, yz_zx_xy );
02078     diff = vec_sub( zy_xz_yx, yz_zx_xy );
02079 
02080     scale = vec_mul( invSqrt, _mm_set1_ps(0.5f) );
02081 
02082     //res0 = vec_perm( sum, diff, _VECTORMATH_PERM_XZYA );
02083         res0 = _mm_shuffle_ps( sum, sum, _MM_SHUFFLE(0,1,2,0) );
02084         res0 = vec_sel( res0, vec_splat(diff, 0), select_w );  // TODO: Ck
02085     //res1 = vec_perm( sum, diff, _VECTORMATH_PERM_ZXXB );
02086         res1 = _mm_shuffle_ps( sum, sum, _MM_SHUFFLE(0,0,0,2) );
02087         res1 = vec_sel( res1, vec_splat(diff, 1), select_w );  // TODO: Ck
02088     //res2 = vec_perm( sum, diff, _VECTORMATH_PERM_YXXC );
02089         res2 = _mm_shuffle_ps( sum, sum, _MM_SHUFFLE(0,0,0,1) );
02090         res2 = vec_sel( res2, vec_splat(diff, 2), select_w );  // TODO: Ck
02091     res3 = diff;
02092     res0 = vec_sel( res0, radicand, select_x );
02093     res1 = vec_sel( res1, radicand, select_y );
02094     res2 = vec_sel( res2, radicand, select_z );
02095     res3 = vec_sel( res3, radicand, select_w );
02096     res0 = vec_mul( res0, vec_splat( scale, 0 ) );
02097     res1 = vec_mul( res1, vec_splat( scale, 1 ) );
02098     res2 = vec_mul( res2, vec_splat( scale, 2 ) );
02099     res3 = vec_mul( res3, vec_splat( scale, 3 ) );
02100 
02101     /* determine case and select answer */
02102 
02103     xx = vec_splat( col0, 0 );
02104     yy = vec_splat( col1, 1 );
02105     zz = vec_splat( col2, 2 );
02106     res = vec_sel( res0, res1, vec_cmpgt( yy, xx ) );
02107     res = vec_sel( res, res2, vec_and( vec_cmpgt( zz, xx ), vec_cmpgt( zz, yy ) ) );
02108     res = vec_sel( res, res3, vec_cmpgt( vec_splat( diagSum, 0 ), _mm_setzero_ps() ) );
02109     mVec128 = res;
02110 }
02111 
02112 VECTORMATH_FORCE_INLINE const Matrix3 outer( const Vector3 &tfrm0, const Vector3 &tfrm1 )
02113 {
02114     return Matrix3(
02115         ( tfrm0 * tfrm1.getX( ) ),
02116         ( tfrm0 * tfrm1.getY( ) ),
02117         ( tfrm0 * tfrm1.getZ( ) )
02118     );
02119 }
02120 
02121 VECTORMATH_FORCE_INLINE const Matrix4 outer( const Vector4 &tfrm0, const Vector4 &tfrm1 )
02122 {
02123     return Matrix4(
02124         ( tfrm0 * tfrm1.getX( ) ),
02125         ( tfrm0 * tfrm1.getY( ) ),
02126         ( tfrm0 * tfrm1.getZ( ) ),
02127         ( tfrm0 * tfrm1.getW( ) )
02128     );
02129 }
02130 
02131 VECTORMATH_FORCE_INLINE const Vector3 rowMul( const Vector3 &vec, const Matrix3 & mat )
02132 {
02133     __m128 tmp0, tmp1, mcol0, mcol1, mcol2, res;
02134     __m128 xxxx, yyyy, zzzz;
02135     tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
02136     tmp1 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
02137     xxxx = vec_splat( vec.get128(), 0 );
02138     mcol0 = vec_mergeh( tmp0, mat.getCol1().get128() );
02139     //mcol1 = vec_perm( tmp0, mat.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
02140         VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
02141         mcol1 = _mm_shuffle_ps( tmp0, tmp0, _MM_SHUFFLE(0,3,2,2));
02142         mcol1 = vec_sel(mcol1, mat.getCol1().get128(), select_y);
02143     //mcol2 = vec_perm( tmp1, mat.getCol1().get128(), _VECTORMATH_PERM_XCYX );
02144         mcol2 = _mm_shuffle_ps( tmp1, tmp1, _MM_SHUFFLE(0,1,1,0));
02145         mcol2 = vec_sel(mcol2, vec_splat(mat.getCol1().get128(), 2), select_y);
02146     yyyy = vec_splat( vec.get128(), 1 );
02147     res = vec_mul( mcol0, xxxx );
02148     zzzz = vec_splat( vec.get128(), 2 );
02149     res = vec_madd( mcol1, yyyy, res );
02150     res = vec_madd( mcol2, zzzz, res );
02151     return Vector3( res );
02152 }
02153 
02154 VECTORMATH_FORCE_INLINE const Matrix3 crossMatrix( const Vector3 &vec )
02155 {
02156     __m128 neg, res0, res1, res2;
02157     neg = negatef4( vec.get128() );
02158         VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
02159         VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
02160         VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
02161     //res0 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_XZBX );
02162         res0 = _mm_shuffle_ps( vec.get128(), vec.get128(), _MM_SHUFFLE(0,2,2,0) );
02163         res0 = vec_sel(res0, vec_splat(neg, 1), select_z);
02164     //res1 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_CXXX );
02165         res1 = vec_sel(vec_splat(vec.get128(), 0), vec_splat(neg, 2), select_x);
02166     //res2 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_YAXX );
02167         res2 = _mm_shuffle_ps( vec.get128(), vec.get128(), _MM_SHUFFLE(0,0,1,1) );
02168         res2 = vec_sel(res2, vec_splat(neg, 0), select_y);
02169         VM_ATTRIBUTE_ALIGN16 unsigned int filter_x[4] = {0, 0xffffffff, 0xffffffff, 0xffffffff};
02170         VM_ATTRIBUTE_ALIGN16 unsigned int filter_y[4] = {0xffffffff, 0, 0xffffffff, 0xffffffff};
02171         VM_ATTRIBUTE_ALIGN16 unsigned int filter_z[4] = {0xffffffff, 0xffffffff, 0, 0xffffffff};
02172     res0 = vec_and( res0, _mm_load_ps((float *)filter_x ) );
02173     res1 = vec_and( res1, _mm_load_ps((float *)filter_y ) );
02174     res2 = vec_and( res2, _mm_load_ps((float *)filter_z ) ); // TODO: Use selects?
02175     return Matrix3(
02176         Vector3( res0 ),
02177         Vector3( res1 ),
02178         Vector3( res2 )
02179     );
02180 }
02181 
02182 VECTORMATH_FORCE_INLINE const Matrix3 crossMatrixMul( const Vector3 &vec, const Matrix3 & mat )
02183 {
02184     return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
02185 }
02186 
02187 } // namespace Aos
02188 } // namespace Vectormath
02189 
02190 #endif