newspeoplefor developersdocumentationdownloads

_matrix44_sse.h

Go to the documentation of this file.
00001 #ifndef _MATRIX44_SSE_H
00002 #define _MATRIX44_SSE_H
00003 //------------------------------------------------------------------------------
00012 #include <xmmintrin.h>
00013 #include <memory.h>
00014 #include "mathlib/_vector3_sse.h"
00015 #include "mathlib/_vector4_sse.h"
00016 #include "mathlib/quaternion.h"
00017 #include "mathlib/euler.h"
00018 #include "mathlib/matrixdefs.h"
00019 
00020 static float _matrix44_sse_ident[16] =
00021 {
00022     1.0f, 0.0f, 0.0f, 0.0f,
00023     0.0f, 1.0f, 0.0f, 0.0f,
00024     0.0f, 0.0f, 1.0f, 0.0f,
00025     0.0f, 0.0f, 0.0f, 1.0f,
00026 };
00027 
00028 //------------------------------------------------------------------------------
00029 class _matrix44_sse
00030 {
00031 public:
00032     static const _matrix44_sse identity;
00033     static const _matrix44_sse ortho;
00034 
00035 public:
00037     _matrix44_sse();
00039     _matrix44_sse(const _vector4_sse& v1, const _vector4_sse& v2, const _vector4_sse& v3, const _vector4_sse& v4);
00041     _matrix44_sse(const _matrix44_sse& m1);
00043     _matrix44_sse(float _m11, float _m12, float _m13, float _m14,
00044                   float _m21, float _m22, float _m23, float _m24,
00045                   float _m31, float _m32, float _m33, float _m34,
00046                   float _m41, float _m42, float _m43, float _m44);
00048     _matrix44_sse(const quaternion& q);
00050     _matrix44_sse(const __m128& _m1, const __m128& _m2, const __m128& _m3, const __m128& _m4);
00052     quaternion get_quaternion() const;
00054     void set(const _vector4_sse& v1, const _vector4_sse& v2, const _vector4_sse& v3, const _vector4_sse& v4);
00056     void set(const _matrix44_sse& m1);
00058     void set(float _m11, float _m12, float _m13, float _m14,
00059              float _m21, float _m22, float _m23, float _m24,
00060              float _m31, float _m32, float _m33, float _m34,
00061              float _m41, float _m42, float _m43, float _m44);
00063     void set(const quaternion& q);
00065     void ident();
00067     void transpose();
00069     float det();
00071     void invert();
00073     void invert_simple();
00075     void mult_simple(const _matrix44_sse& m1);
00077     _vector3_sse transform_coord(const _vector3_sse& v) const;
00079     _vector3_sse x_component() const;
00081     _vector3_sse y_component() const;
00083     _vector3_sse z_component() const;
00085     _vector3_sse pos_component() const;
00087     void rotate_x(const float a);
00089     void rotate_y(const float a);
00091     void rotate_z(const float a);
00093     void rotate(const _vector3_sse& vec, float a);
00095     void translate(const _vector3_sse& t);
00097     void set_translation(const _vector3_sse& t);
00099     void scale(const _vector3_sse& s);
00101     void lookat(const _vector3_sse& to, const _vector3_sse& up);
00103     void billboard(const _vector3_sse& to, const _vector3_sse& up);
00105     void operator *= (const _matrix44_sse& m1);
00107     void mult(const _vector4_sse& src, _vector4_sse& dst) const;
00109     void mult(const _vector3_sse& src, _vector3_sse& dst) const;
00110 
00111     union
00112     {
00113         struct
00114         {
00115             __m128 m1;
00116             __m128 m2;
00117             __m128 m3;
00118             __m128 m4;
00119         };
00120         struct
00121         {
00122             float m[4][4];
00123         };
00124     };
00125 };
00126 
00127 //------------------------------------------------------------------------------
00130 inline
00131 _matrix44_sse::_matrix44_sse()
00132 {
00133     memcpy(&(m[0][0]), _matrix44_sse_ident, sizeof(_matrix44_sse_ident));
00134 }
00135 
00136 //------------------------------------------------------------------------------
00139 inline
00140 _matrix44_sse::_matrix44_sse(const _vector4_sse& v1, const _vector4_sse& v2, const _vector4_sse& v3, const _vector4_sse& v4) :
00141     m1(v1.m128), m2(v2.m128), m3(v3.m128), m4(v4.m128)
00142 {
00143     // empty
00144 }
00145 
00146 //------------------------------------------------------------------------------
00149 inline
00150 _matrix44_sse::_matrix44_sse(const _matrix44_sse& mx) :
00151     m1(mx.m1), m2(mx.m2), m3(mx.m3), m4(mx.m4)
00152 {
00153     // empty
00154 }
00155 
00156 //------------------------------------------------------------------------------
00159 inline
00160 _matrix44_sse::_matrix44_sse(float _m11, float _m12, float _m13, float _m14,
00161                              float _m21, float _m22, float _m23, float _m24,
00162                              float _m31, float _m32, float _m33, float _m34,
00163                              float _m41, float _m42, float _m43, float _m44)
00164 {
00165     m1 = _mm_set_ps(_m14, _m13, _m12, _m11);
00166     m2 = _mm_set_ps(_m24, _m23, _m22, _m21);
00167     m3 = _mm_set_ps(_m34, _m33, _m32, _m31);
00168     m4 = _mm_set_ps(_m44, _m43, _m42, _m41);
00169 }
00170 
00171 //------------------------------------------------------------------------------
00175 inline
00176 _matrix44_sse::_matrix44_sse(const quaternion& q)
00177 {
00178     float wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2;
00179     x2 = q.x + q.x; y2 = q.y + q.y; z2 = q.z + q.z;
00180     xx = q.x * x2;   xy = q.x * y2;   xz = q.x * z2;
00181     yy = q.y * y2;   yz = q.y * z2;   zz = q.z * z2;
00182     wx = q.w * x2;   wy = q.w * y2;   wz = q.w * z2;
00183 
00184     m[0][0] = 1.0f - (yy + zz);
00185     m[1][0] = xy - wz;
00186     m[2][0] = xz + wy;
00187 
00188     m[0][1] = xy + wz;
00189     m[1][1] = 1.0f - (xx + zz);
00190     m[2][1] = yz - wx;
00191 
00192     m[0][2] = xz - wy;
00193     m[1][2] = yz + wx;
00194     m[2][2] = 1.0f - (xx + yy);
00195 
00196     m[3][0] = m[3][1] = m[3][2] = 0.0f;
00197     m[0][3] = m[1][3] = m[2][3] = 0.0f;
00198     m[3][3] = 1.0f;
00199 }
00200 
00201 //------------------------------------------------------------------------------
00204 inline
00205 _matrix44_sse::_matrix44_sse(const __m128& _m1, const __m128& _m2, const __m128& _m3, const __m128& _m4) :
00206     m1(_m1), m2(_m2), m3(_m3), m4(_m4)
00207 {
00208     // empty
00209 }
00210 
00211 //------------------------------------------------------------------------------
00218 inline
00219 quaternion
00220 _matrix44_sse::get_quaternion() const
00221 {
00222     float qa[4];
00223     float tr = m[0][0] + m[1][1] + m[2][2];
00224     if (tr > 0.0f)
00225     {
00226         float s = n_sqrt (tr + 1.0f);
00227         qa[3] = s * 0.5f;
00228         s = 0.5f / s;
00229         qa[0] = (m[1][2] - m[2][1]) * s;
00230         qa[1] = (m[2][0] - m[0][2]) * s;
00231         qa[2] = (m[0][1] - m[1][0]) * s;
00232     }
00233     else
00234     {
00235         int i, j, k, nxt[3] = {1,2,0};
00236         i = 0;
00237         if (m[1][1] > m[0][0]) i=1;
00238         if (m[2][2] > m[i][i]) i=2;
00239         j = nxt[i];
00240         k = nxt[j];
00241         float s = n_sqrt((m[i][i] - (m[j][j] + m[k][k])) + 1.0f);
00242         qa[i] = s * 0.5f;
00243         s = 0.5f / s;
00244         qa[3] = (m[j][k] - m[k][j])* s;
00245         qa[j] = (m[i][j] + m[j][i]) * s;
00246         qa[k] = (m[i][k] + m[k][i]) * s;
00247     }
00248     quaternion q(qa[0],qa[1],qa[2],qa[3]);
00249     return q;
00250 }
00251 
00252 //------------------------------------------------------------------------------
00255 inline
00256 void
00257 _matrix44_sse::set(const _vector4_sse& v1, const _vector4_sse& v2, const _vector4_sse& v3, const _vector4_sse& v4)
00258 {
00259     m1 = v1.m128;
00260     m2 = v2.m128;
00261     m3 = v3.m128;
00262     m4 = v4.m128;
00263 }
00264 
00265 //------------------------------------------------------------------------------
00268 inline
00269 void
00270 _matrix44_sse::set(const _matrix44_sse& mx)
00271 {
00272     m1 = mx.m1;
00273     m2 = mx.m2;
00274     m3 = mx.m3;
00275     m4 = mx.m4;
00276 }
00277 
00278 //------------------------------------------------------------------------------
00281 inline
00282 void
00283 _matrix44_sse::set(float _m11, float _m12, float _m13, float _m14,
00284                    float _m21, float _m22, float _m23, float _m24,
00285                    float _m31, float _m32, float _m33, float _m34,
00286                    float _m41, float _m42, float _m43, float _m44)
00287 {
00288     m1 = _mm_set_ps(_m14, _m13, _m12, _m11);
00289     m2 = _mm_set_ps(_m24, _m23, _m22, _m21);
00290     m3 = _mm_set_ps(_m34, _m33, _m32, _m31);
00291     m4 = _mm_set_ps(_m44, _m43, _m42, _m41);
00292 }
00293 
00294 //------------------------------------------------------------------------------
00298 inline
00299 void
00300 _matrix44_sse::set(const quaternion& q)
00301 {
00302     float wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2;
00303     x2 = q.x + q.x; y2 = q.y + q.y; z2 = q.z + q.z;
00304     xx = q.x * x2;   xy = q.x * y2;   xz = q.x * z2;
00305     yy = q.y * y2;   yz = q.y * z2;   zz = q.z * z2;
00306     wx = q.w * x2;   wy = q.w * y2;   wz = q.w * z2;
00307 
00308     m[0][0] = 1.0f - (yy + zz);
00309     m[1][0] = xy - wz;
00310     m[2][0] = xz + wy;
00311 
00312     m[0][1] = xy + wz;
00313     m[1][1] = 1.0f - (xx + zz);
00314     m[2][1] = yz - wx;
00315 
00316     m[0][2] = xz - wy;
00317     m[1][2] = yz + wx;
00318     m[2][2] = 1.0f - (xx + yy);
00319 
00320     m[3][0] = m[3][1] = m[3][2] = 0.0f;
00321     m[0][3] = m[1][3] = m[2][3] = 0.0f;
00322     m[3][3] = 1.0f;
00323 }
00324 
00325 //------------------------------------------------------------------------------
00328 inline
00329 void
00330 _matrix44_sse::ident()
00331 {
00332     memcpy(&(m[0][0]), _matrix44_sse_ident, sizeof(_matrix44_sse_ident));
00333 }
00334 
00335 //------------------------------------------------------------------------------
00338 inline
00339 void
00340 _matrix44_sse::transpose()
00341 {
00342     _MM_TRANSPOSE4_PS(m1, m2, m3, m4);
00343 }
00344 
00345 //------------------------------------------------------------------------------
00349 inline
00350 float
00351 _matrix44_sse::det()
00352 {
00353     return
00354         (M11 * M22 - M12 * M21) * (M33 * M44 - M34 * M43)
00355        -(M11 * M23 - M13 * M21) * (M32 * M44 - M34 * M42)
00356        +(M11 * M24 - M14 * M21) * (M32 * M43 - M33 * M42)
00357        +(M12 * M23 - M13 * M22) * (M31 * M44 - M34 * M41)
00358        -(M12 * M24 - M14 * M22) * (M31 * M43 - M33 * M41)
00359        +(M13 * M24 - M14 * M23) * (M31 * M42 - M32 * M41);
00360 }
00361 
00362 //------------------------------------------------------------------------------
00366 inline
00367 void
00368 _matrix44_sse::invert()
00369 {
00370     float* src = &(m[0][0]);
00371 
00372     __m128 minor0, minor1, minor2, minor3;
00373     __m128 row0, row1, row2, row3;
00374     __m128 det, tmp1;
00375 
00376     tmp1 = _mm_loadh_pi(_mm_loadl_pi(tmp1, (__m64*)(src)), (__m64*)(src+ 4));
00377     row1 = _mm_loadh_pi(_mm_loadl_pi(row1, (__m64*)(src+8)), (__m64*)(src+12));
00378 
00379     row0 = _mm_shuffle_ps(tmp1, row1, 0x88);
00380     row1 = _mm_shuffle_ps(row1, tmp1, 0xDD);
00381 
00382     tmp1 = _mm_loadh_pi(_mm_loadl_pi(tmp1, (__m64*)(src+ 2)), (__m64*)(src+ 6));
00383     row3 = _mm_loadh_pi(_mm_loadl_pi(row3, (__m64*)(src+10)), (__m64*)(src+14));
00384 
00385     row2 = _mm_shuffle_ps(tmp1, row3, 0x88);
00386     row3 = _mm_shuffle_ps(row3, tmp1, 0xDD);
00387 
00388     tmp1 = _mm_mul_ps(row2, row3);
00389     tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
00390 
00391     minor0 = _mm_mul_ps(row1, tmp1);
00392     minor1 = _mm_mul_ps(row0, tmp1);
00393 
00394     tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
00395 
00396     minor0 = _mm_sub_ps(_mm_mul_ps(row1, tmp1), minor0);
00397     minor1 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor1);
00398     minor1 = _mm_shuffle_ps(minor1, minor1, 0x4E);
00399 
00400     tmp1 = _mm_mul_ps(row1, row2);
00401     tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
00402 
00403     minor0 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor0);
00404     minor3 = _mm_mul_ps(row0, tmp1);
00405 
00406     tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
00407 
00408     minor0 = _mm_sub_ps(minor0, _mm_mul_ps(row3, tmp1));
00409     minor3 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor3);
00410     minor3 = _mm_shuffle_ps(minor3, minor3, 0x4E);
00411 
00412     tmp1 = _mm_mul_ps(_mm_shuffle_ps(row1, row1, 0x4E), row3);
00413     tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
00414     row2 = _mm_shuffle_ps(row2, row2, 0x4E);
00415 
00416     minor0 = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor0);
00417     minor2 = _mm_mul_ps(row0, tmp1);
00418 
00419     tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
00420 
00421     minor0 = _mm_sub_ps(minor0, _mm_mul_ps(row2, tmp1));
00422     minor2 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor2);
00423     minor2 = _mm_shuffle_ps(minor2, minor2, 0x4E);
00424 
00425     tmp1 = _mm_mul_ps(row0, row1);
00426     tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
00427 
00428     minor2 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor2);
00429     minor3 = _mm_sub_ps(_mm_mul_ps(row2, tmp1), minor3);
00430 
00431     tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
00432 
00433     minor2 = _mm_sub_ps(_mm_mul_ps(row3, tmp1), minor2);
00434     minor3 = _mm_sub_ps(minor3, _mm_mul_ps(row2, tmp1));
00435 
00436     tmp1 = _mm_mul_ps(row0, row3);
00437     tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
00438 
00439     minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row2, tmp1));
00440     minor2 = _mm_add_ps(_mm_mul_ps(row1, tmp1), minor2);
00441 
00442     tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
00443 
00444     minor1 = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor1);
00445     minor2 = _mm_sub_ps(minor2, _mm_mul_ps(row1, tmp1));
00446 
00447     tmp1 = _mm_mul_ps(row0, row2);
00448     tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
00449 
00450     minor1 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor1);
00451     minor3 = _mm_sub_ps(minor3, _mm_mul_ps(row1, tmp1));
00452 
00453     tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
00454 
00455     minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row3, tmp1));
00456     minor3 = _mm_add_ps(_mm_mul_ps(row1, tmp1), minor3);
00457 
00458     det = _mm_mul_ps(row0, minor0);
00459     det = _mm_add_ps(_mm_shuffle_ps(det, det, 0x4E), det);
00460     det = _mm_add_ss(_mm_shuffle_ps(det, det, 0xB1), det);
00461     tmp1 = _mm_rcp_ss(det);
00462 
00463     det = _mm_sub_ss(_mm_add_ss(tmp1, tmp1), _mm_mul_ss(det, _mm_mul_ss(tmp1, tmp1)));
00464     det = _mm_shuffle_ps(det, det, 0x00);
00465 
00466     minor0 = _mm_mul_ps(det, minor0);
00467     _mm_storel_pi((__m64*)(src), minor0);
00468     _mm_storeh_pi((__m64*)(src+2), minor0);
00469 
00470     minor1 = _mm_mul_ps(det, minor1);
00471     _mm_storel_pi((__m64*)(src+4), minor1);
00472     _mm_storeh_pi((__m64*)(src+6), minor1);
00473 
00474     minor2 = _mm_mul_ps(det, minor2);
00475     _mm_storel_pi((__m64*)(src+ 8), minor2);
00476     _mm_storeh_pi((__m64*)(src+10), minor2);
00477 
00478     minor3 = _mm_mul_ps(det, minor3);
00479     _mm_storel_pi((__m64*)(src+12), minor3);
00480     _mm_storeh_pi((__m64*)(src+14), minor3);
00481 }
00482 
00483 //------------------------------------------------------------------------------
00491 inline
00492 void
00493 _matrix44_sse::invert_simple()
00494 {
00495     float s = det();
00496     if (s == 0.0f) return;
00497     s = 1.0f/s;
00498     this->set(
00499         s * ((M22 * M33) - (M23 * M32)),
00500         s * ((M32 * M13) - (M33 * M12)),
00501         s * ((M12 * M23) - (M13 * M22)),
00502         0.0f,
00503         s * ((M23 * M31) - (M21 * M33)),
00504         s * ((M33 * M11) - (M31 * M13)),
00505         s * ((M13 * M21) - (M11 * M23)),
00506         0.0f,
00507         s * ((M21 * M32) - (M22 * M31)),
00508         s * ((M31 * M12) - (M32 * M11)),
00509         s * ((M11 * M22) - (M12 * M21)),
00510         0.0f,
00511         s * (M21*(M33*M42 - M32*M43) + M22*(M31*M43 - M33*M41) + M23*(M32*M41 - M31*M42)),
00512         s * (M31*(M13*M42 - M12*M43) + M32*(M11*M43 - M13*M41) + M33*(M12*M41 - M11*M42)),
00513         s * (M41*(M13*M22 - M12*M23) + M42*(M11*M23 - M13*M21) + M43*(M12*M21 - M11*M22)),
00514         1.0f);
00515 }
00516 
00517 //------------------------------------------------------------------------------
00525 inline
00526 void
00527 _matrix44_sse::mult_simple(const _matrix44_sse& mx)
00528 {
00529     m1 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00530     m2 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00531     m3 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00532     m4 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00533 }
00534 
00535 //------------------------------------------------------------------------------
00541 inline
00542 _vector3_sse
00543 _matrix44_sse::transform_coord(const _vector3_sse& v) const
00544 {
00545     float d = 1.0f / (M14*v.x + M24*v.y + M34*v.z + M44);
00546     return _vector3_sse(
00547         (M11*v.x + M21*v.y + M31*v.z + M41) * d,
00548         (M12*v.x + M22*v.y + M32*v.z + M42) * d,
00549         (M13*v.x + M23*v.y + M33*v.z + M43) * d);
00550 }
00551 
00552 //------------------------------------------------------------------------------
00555 inline
00556 _vector3_sse
00557 _matrix44_sse::x_component() const
00558 {
00559     _vector3_sse v(m1);
00560     return v;
00561 }
00562 
00563 //------------------------------------------------------------------------------
00566 inline
00567 _vector3_sse
00568 _matrix44_sse::y_component() const
00569 {
00570     _vector3_sse v(m2);
00571     return v;
00572 }
00573 
00574 //------------------------------------------------------------------------------
00577 inline
00578 _vector3_sse
00579 _matrix44_sse::z_component() const
00580 {
00581     _vector3_sse v(m3);
00582     return v;
00583 }
00584 
00585 //------------------------------------------------------------------------------
00588 inline
00589 _vector3_sse
00590 _matrix44_sse::pos_component() const
00591 {
00592     _vector3_sse v(M41, M42, M43);
00593     return v;
00594 }
00595 
00596 //------------------------------------------------------------------------------
00600 inline
00601 void
00602 _matrix44_sse::rotate_x(const float a)
00603 {
00604     float c = n_cos(a);
00605     float s = n_sin(a);
00606     int i;
00607     for (i=0; i<4; i++) {
00608         float mi1 = m[i][1];
00609         float mi2 = m[i][2];
00610         m[i][1] = mi1*c + mi2*-s;
00611         m[i][2] = mi1*s + mi2*c;
00612     }
00613 }
00614 
00615 //------------------------------------------------------------------------------
00619 inline
00620 void
00621 _matrix44_sse::rotate_y(const float a)
00622 {
00623     float c = n_cos(a);
00624     float s = n_sin(a);
00625     int i;
00626     for (i=0; i<4; i++) {
00627         float mi0 = m[i][0];
00628         float mi2 = m[i][2];
00629         m[i][0] = mi0*c + mi2*s;
00630         m[i][2] = mi0*-s + mi2*c;
00631     }
00632 }
00633 
00634 //------------------------------------------------------------------------------
00638 inline
00639 void
00640 _matrix44_sse::rotate_z(const float a)
00641 {
00642     float c = n_cos(a);
00643     float s = n_sin(a);
00644     int i;
00645     for (i=0; i<4; i++) {
00646         float mi0 = m[i][0];
00647         float mi1 = m[i][1];
00648         m[i][0] = mi0*c + mi1*-s;
00649         m[i][1] = mi0*s + mi1*c;
00650     }
00651 }
00652 
00653 //------------------------------------------------------------------------------
00656 inline
00657 void
00658 _matrix44_sse::translate(const _vector3_sse& t)
00659 {
00660     m4 = _mm_add_ps(m4, t.m128);
00661 }
00662 
00663 //------------------------------------------------------------------------------
00667 inline
00668 void
00669 _matrix44_sse::set_translation(const _vector3_sse& t)
00670 {
00671     m4 = t.m128;
00672 };
00673 
00674 //------------------------------------------------------------------------------
00677 inline
00678 void
00679 _matrix44_sse::scale(const _vector3_sse& s)
00680 {
00681     // _vector3_sse have the w element set to zero, we need it at 1...
00682     __m128 scale = _mm_add_ps(_mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f), s.m128);
00683     m1 = _mm_mul_ps(m1, scale);
00684     m2 = _mm_mul_ps(m2, scale);
00685     m3 = _mm_mul_ps(m3, scale);
00686     m4 = _mm_mul_ps(m4, scale);
00687 }
00688 
00689 //------------------------------------------------------------------------------
00692 inline
00693 void
00694 _matrix44_sse::lookat(const _vector3_sse& to, const _vector3_sse& up)
00695 {
00696     _vector3_sse from(M41, M42, M43);
00697     _vector3_sse z(from - to);
00698     z.norm();
00699     _vector3_sse y(up);
00700     _vector3_sse x(y * z);  // x = y cross z
00701     y = z * x;              // y = z cross x
00702     x.norm();
00703     y.norm();
00704 
00705     m1 = x.m128;
00706     m2 = y.m128;
00707     m3 = z.m128;
00708 }
00709 
00710 //------------------------------------------------------------------------------
00713 inline
00714 void
00715 _matrix44_sse::billboard(const _vector3_sse& to, const _vector3_sse& up)
00716 {
00717     _vector3_sse from(M41, M42, M43);
00718     _vector3_sse z(from - to);
00719     z.norm();
00720     _vector3_sse y(up);
00721     _vector3_sse x(y * z);
00722     z = x * y;
00723     x.norm();
00724     y.norm();
00725     z.norm();
00726 
00727     m1 = x.m128;
00728     m2 = y.m128;
00729     m3 = z.m128;
00730 }
00731 
00732 //------------------------------------------------------------------------------
00738 inline
00739 void
00740 _matrix44_sse::operator *= (const _matrix44_sse& mx)
00741 {
00742     m1 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00743     m2 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00744     m3 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00745     m4 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00746 }
00747 
00748 //------------------------------------------------------------------------------
00752 inline
00753 void
00754 _matrix44_sse::rotate(const _vector3_sse& vec, float a)
00755 {
00756     _vector3_sse v(vec);
00757     v.norm();
00758     float sa = (float) n_sin(a);
00759     float ca = (float) n_cos(a);
00760 
00761     _matrix44_sse rotM;
00762     rotM.M11 = ca + (1.0f - ca) * v.x * v.x;
00763     rotM.M12 = (1.0f - ca) * v.x * v.y - sa * v.z;
00764     rotM.M13 = (1.0f - ca) * v.z * v.x + sa * v.y;
00765     rotM.M21 = (1.0f - ca) * v.x * v.y + sa * v.z;
00766     rotM.M22 = ca + (1.0f - ca) * v.y * v.y;
00767     rotM.M23 = (1.0f - ca) * v.y * v.z - sa * v.x;
00768     rotM.M31 = (1.0f - ca) * v.z * v.x - sa * v.y;
00769     rotM.M32 = (1.0f - ca) * v.y * v.z + sa * v.x;
00770     rotM.M33 = ca + (1.0f - ca) * v.z * v.z;
00771 
00772     (*this) *= rotM;
00773 }
00774 
00775 //------------------------------------------------------------------------------
00780 inline
00781 void
00782 _matrix44_sse::mult(const _vector4_sse& src, _vector4_sse& dst) const
00783 {
00784     dst.m128 = _mm_add_ps(
00785                _mm_add_ps(
00786                _mm_add_ps(
00787                     _mm_mul_ps(m1, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(0,0,0,0))),
00788                     _mm_mul_ps(m2, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(1,1,1,1)))),
00789                     _mm_mul_ps(m3, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(2,2,2,2)))),
00790                     _mm_mul_ps(m4, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(3,3,3,3))));
00791 }
00792 
00793 //------------------------------------------------------------------------------
00798 inline
00799 void
00800 _matrix44_sse::mult(const _vector3_sse& src, _vector3_sse& dst) const
00801 {
00802     dst.m128 = _mm_add_ps(
00803                _mm_add_ps(
00804                _mm_add_ps(
00805                     _mm_mul_ps(m1, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(0,0,0,0))),
00806                     _mm_mul_ps(m2, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(1,1,1,1)))),
00807                     _mm_mul_ps(m3, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(2,2,2,2)))),
00808                     _mm_mul_ps(m4, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(3,3,3,3))));
00809 }
00810 
00811 //------------------------------------------------------------------------------
00814 static
00815 inline
00816 _matrix44_sse
00817 operator * (const _matrix44_sse& ma, const _matrix44_sse& mb)
00818 {
00819     return _matrix44_sse(
00820         _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(ma.m1, ma.m1, _MM_SHUFFLE(0,0,0,0)), mb.m1), _mm_mul_ps(_mm_shuffle_ps(ma.m1, ma.m1, _MM_SHUFFLE(1,1,1,1)), mb.m2)), _mm_mul_ps(_mm_shuffle_ps(ma.m1, ma.m1, _MM_SHUFFLE(2,2,2,2)), mb.m3)), _mm_mul_ps(_mm_shuffle_ps(ma.m1, ma.m1, _MM_SHUFFLE(3,3,3,3)), mb.m4)),
00821         _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(ma.m2, ma.m2, _MM_SHUFFLE(0,0,0,0)), mb.m1), _mm_mul_ps(_mm_shuffle_ps(ma.m2, ma.m2, _MM_SHUFFLE(1,1,1,1)), mb.m2)), _mm_mul_ps(_mm_shuffle_ps(ma.m2, ma.m2, _MM_SHUFFLE(2,2,2,2)), mb.m3)), _mm_mul_ps(_mm_shuffle_ps(ma.m2, ma.m2, _MM_SHUFFLE(3,3,3,3)), mb.m4)),
00822         _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(ma.m3, ma.m3, _MM_SHUFFLE(0,0,0,0)), mb.m1), _mm_mul_ps(_mm_shuffle_ps(ma.m3, ma.m3, _MM_SHUFFLE(1,1,1,1)), mb.m2)), _mm_mul_ps(_mm_shuffle_ps(ma.m3, ma.m3, _MM_SHUFFLE(2,2,2,2)), mb.m3)), _mm_mul_ps(_mm_shuffle_ps(ma.m3, ma.m3, _MM_SHUFFLE(3,3,3,3)), mb.m4)),
00823         _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(ma.m4, ma.m4, _MM_SHUFFLE(0,0,0,0)), mb.m1), _mm_mul_ps(_mm_shuffle_ps(ma.m4, ma.m4, _MM_SHUFFLE(1,1,1,1)), mb.m2)), _mm_mul_ps(_mm_shuffle_ps(ma.m4, ma.m4, _MM_SHUFFLE(2,2,2,2)), mb.m3)), _mm_mul_ps(_mm_shuffle_ps(ma.m4, ma.m4, _MM_SHUFFLE(3,3,3,3)), mb.m4))
00824     );
00825 }
00826 
00827 //------------------------------------------------------------------------------
00830 static
00831 inline
00832 _vector3_sse operator * (const _matrix44_sse& m, const _vector3_sse& v)
00833 {
00834     return _vector3_sse(
00835         _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(m.m1, _mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(m.m2, _mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(1,1,1,1)))), _mm_mul_ps(m.m3, _mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(2,2,2,2)))), _mm_mul_ps(m.m4, _mm_set_ps(0.0f, 1.0f, 1.0f, 1.0f)))
00836     );
00837 }
00838 
00839 //------------------------------------------------------------------------------
00842 static
00843 inline
00844 _vector4_sse operator * (const _matrix44_sse& m, const _vector4_sse& v)
00845 {
00846     return _vector4_sse(
00847         _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(m.m1, _mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(m.m2, _mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(1,1,1,1)))), _mm_mul_ps(m.m3, _mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(2,2,2,2)))), _mm_mul_ps(m.m4, _mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(3,3,3,3))))
00848     );
00849 }
00850 
00851 //------------------------------------------------------------------------------
00852 #endif

Copyright © 1999-2005 by the contributing authors. Ideas, requests, problems: Send feedback.