00001 #ifndef _MATRIX44_SSE_H
00002 #define _MATRIX44_SSE_H
00003
00012 #include <xmmintrin.h>
00013 #include <memory.h>
00014 #include "mathlib/_vector3_sse.h"
00015 #include "mathlib/_vector4_sse.h"
00016 #include "mathlib/quaternion.h"
00017 #include "mathlib/euler.h"
00018 #include "mathlib/matrixdefs.h"
00019
00020 static float _matrix44_sse_ident[16] =
00021 {
00022 1.0f, 0.0f, 0.0f, 0.0f,
00023 0.0f, 1.0f, 0.0f, 0.0f,
00024 0.0f, 0.0f, 1.0f, 0.0f,
00025 0.0f, 0.0f, 0.0f, 1.0f,
00026 };
00027
00028
00029 class _matrix44_sse
00030 {
00031 public:
00032 static const _matrix44_sse identity;
00033 static const _matrix44_sse ortho;
00034
00035 public:
00037 _matrix44_sse();
00039 _matrix44_sse(const _vector4_sse& v1, const _vector4_sse& v2, const _vector4_sse& v3, const _vector4_sse& v4);
00041 _matrix44_sse(const _matrix44_sse& m1);
00043 _matrix44_sse(float _m11, float _m12, float _m13, float _m14,
00044 float _m21, float _m22, float _m23, float _m24,
00045 float _m31, float _m32, float _m33, float _m34,
00046 float _m41, float _m42, float _m43, float _m44);
00048 _matrix44_sse(const quaternion& q);
00050 _matrix44_sse(const __m128& _m1, const __m128& _m2, const __m128& _m3, const __m128& _m4);
00052 quaternion get_quaternion() const;
00054 void set(const _vector4_sse& v1, const _vector4_sse& v2, const _vector4_sse& v3, const _vector4_sse& v4);
00056 void set(const _matrix44_sse& m1);
00058 void set(float _m11, float _m12, float _m13, float _m14,
00059 float _m21, float _m22, float _m23, float _m24,
00060 float _m31, float _m32, float _m33, float _m34,
00061 float _m41, float _m42, float _m43, float _m44);
00063 void set(const quaternion& q);
00065 void ident();
00067 void transpose();
00069 float det();
00071 void invert();
00073 void invert_simple();
00075 void mult_simple(const _matrix44_sse& m1);
00077 _vector3_sse transform_coord(const _vector3_sse& v) const;
00079 _vector3_sse x_component() const;
00081 _vector3_sse y_component() const;
00083 _vector3_sse z_component() const;
00085 _vector3_sse pos_component() const;
00087 void rotate_x(const float a);
00089 void rotate_y(const float a);
00091 void rotate_z(const float a);
00093 void rotate(const _vector3_sse& vec, float a);
00095 void translate(const _vector3_sse& t);
00097 void set_translation(const _vector3_sse& t);
00099 void scale(const _vector3_sse& s);
00101 void lookat(const _vector3_sse& to, const _vector3_sse& up);
00103 void billboard(const _vector3_sse& to, const _vector3_sse& up);
00105 void operator *= (const _matrix44_sse& m1);
00107 void mult(const _vector4_sse& src, _vector4_sse& dst) const;
00109 void mult(const _vector3_sse& src, _vector3_sse& dst) const;
00110
00111 union
00112 {
00113 struct
00114 {
00115 __m128 m1;
00116 __m128 m2;
00117 __m128 m3;
00118 __m128 m4;
00119 };
00120 struct
00121 {
00122 float m[4][4];
00123 };
00124 };
00125 };
00126
00127
00130 inline
00131 _matrix44_sse::_matrix44_sse()
00132 {
00133 memcpy(&(m[0][0]), _matrix44_sse_ident, sizeof(_matrix44_sse_ident));
00134 }
00135
00136
00139 inline
00140 _matrix44_sse::_matrix44_sse(const _vector4_sse& v1, const _vector4_sse& v2, const _vector4_sse& v3, const _vector4_sse& v4) :
00141 m1(v1.m128), m2(v2.m128), m3(v3.m128), m4(v4.m128)
00142 {
00143
00144 }
00145
00146
00149 inline
00150 _matrix44_sse::_matrix44_sse(const _matrix44_sse& mx) :
00151 m1(mx.m1), m2(mx.m2), m3(mx.m3), m4(mx.m4)
00152 {
00153
00154 }
00155
00156
00159 inline
00160 _matrix44_sse::_matrix44_sse(float _m11, float _m12, float _m13, float _m14,
00161 float _m21, float _m22, float _m23, float _m24,
00162 float _m31, float _m32, float _m33, float _m34,
00163 float _m41, float _m42, float _m43, float _m44)
00164 {
00165 m1 = _mm_set_ps(_m14, _m13, _m12, _m11);
00166 m2 = _mm_set_ps(_m24, _m23, _m22, _m21);
00167 m3 = _mm_set_ps(_m34, _m33, _m32, _m31);
00168 m4 = _mm_set_ps(_m44, _m43, _m42, _m41);
00169 }
00170
00171
00175 inline
00176 _matrix44_sse::_matrix44_sse(const quaternion& q)
00177 {
00178 float wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2;
00179 x2 = q.x + q.x; y2 = q.y + q.y; z2 = q.z + q.z;
00180 xx = q.x * x2; xy = q.x * y2; xz = q.x * z2;
00181 yy = q.y * y2; yz = q.y * z2; zz = q.z * z2;
00182 wx = q.w * x2; wy = q.w * y2; wz = q.w * z2;
00183
00184 m[0][0] = 1.0f - (yy + zz);
00185 m[1][0] = xy - wz;
00186 m[2][0] = xz + wy;
00187
00188 m[0][1] = xy + wz;
00189 m[1][1] = 1.0f - (xx + zz);
00190 m[2][1] = yz - wx;
00191
00192 m[0][2] = xz - wy;
00193 m[1][2] = yz + wx;
00194 m[2][2] = 1.0f - (xx + yy);
00195
00196 m[3][0] = m[3][1] = m[3][2] = 0.0f;
00197 m[0][3] = m[1][3] = m[2][3] = 0.0f;
00198 m[3][3] = 1.0f;
00199 }
00200
00201
00204 inline
00205 _matrix44_sse::_matrix44_sse(const __m128& _m1, const __m128& _m2, const __m128& _m3, const __m128& _m4) :
00206 m1(_m1), m2(_m2), m3(_m3), m4(_m4)
00207 {
00208
00209 }
00210
00211
00218 inline
00219 quaternion
00220 _matrix44_sse::get_quaternion() const
00221 {
00222 float qa[4];
00223 float tr = m[0][0] + m[1][1] + m[2][2];
00224 if (tr > 0.0f)
00225 {
00226 float s = n_sqrt (tr + 1.0f);
00227 qa[3] = s * 0.5f;
00228 s = 0.5f / s;
00229 qa[0] = (m[1][2] - m[2][1]) * s;
00230 qa[1] = (m[2][0] - m[0][2]) * s;
00231 qa[2] = (m[0][1] - m[1][0]) * s;
00232 }
00233 else
00234 {
00235 int i, j, k, nxt[3] = {1,2,0};
00236 i = 0;
00237 if (m[1][1] > m[0][0]) i=1;
00238 if (m[2][2] > m[i][i]) i=2;
00239 j = nxt[i];
00240 k = nxt[j];
00241 float s = n_sqrt((m[i][i] - (m[j][j] + m[k][k])) + 1.0f);
00242 qa[i] = s * 0.5f;
00243 s = 0.5f / s;
00244 qa[3] = (m[j][k] - m[k][j])* s;
00245 qa[j] = (m[i][j] + m[j][i]) * s;
00246 qa[k] = (m[i][k] + m[k][i]) * s;
00247 }
00248 quaternion q(qa[0],qa[1],qa[2],qa[3]);
00249 return q;
00250 }
00251
00252
00255 inline
00256 void
00257 _matrix44_sse::set(const _vector4_sse& v1, const _vector4_sse& v2, const _vector4_sse& v3, const _vector4_sse& v4)
00258 {
00259 m1 = v1.m128;
00260 m2 = v2.m128;
00261 m3 = v3.m128;
00262 m4 = v4.m128;
00263 }
00264
00265
00268 inline
00269 void
00270 _matrix44_sse::set(const _matrix44_sse& mx)
00271 {
00272 m1 = mx.m1;
00273 m2 = mx.m2;
00274 m3 = mx.m3;
00275 m4 = mx.m4;
00276 }
00277
00278
00281 inline
00282 void
00283 _matrix44_sse::set(float _m11, float _m12, float _m13, float _m14,
00284 float _m21, float _m22, float _m23, float _m24,
00285 float _m31, float _m32, float _m33, float _m34,
00286 float _m41, float _m42, float _m43, float _m44)
00287 {
00288 m1 = _mm_set_ps(_m14, _m13, _m12, _m11);
00289 m2 = _mm_set_ps(_m24, _m23, _m22, _m21);
00290 m3 = _mm_set_ps(_m34, _m33, _m32, _m31);
00291 m4 = _mm_set_ps(_m44, _m43, _m42, _m41);
00292 }
00293
00294
00298 inline
00299 void
00300 _matrix44_sse::set(const quaternion& q)
00301 {
00302 float wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2;
00303 x2 = q.x + q.x; y2 = q.y + q.y; z2 = q.z + q.z;
00304 xx = q.x * x2; xy = q.x * y2; xz = q.x * z2;
00305 yy = q.y * y2; yz = q.y * z2; zz = q.z * z2;
00306 wx = q.w * x2; wy = q.w * y2; wz = q.w * z2;
00307
00308 m[0][0] = 1.0f - (yy + zz);
00309 m[1][0] = xy - wz;
00310 m[2][0] = xz + wy;
00311
00312 m[0][1] = xy + wz;
00313 m[1][1] = 1.0f - (xx + zz);
00314 m[2][1] = yz - wx;
00315
00316 m[0][2] = xz - wy;
00317 m[1][2] = yz + wx;
00318 m[2][2] = 1.0f - (xx + yy);
00319
00320 m[3][0] = m[3][1] = m[3][2] = 0.0f;
00321 m[0][3] = m[1][3] = m[2][3] = 0.0f;
00322 m[3][3] = 1.0f;
00323 }
00324
00325
00328 inline
00329 void
00330 _matrix44_sse::ident()
00331 {
00332 memcpy(&(m[0][0]), _matrix44_sse_ident, sizeof(_matrix44_sse_ident));
00333 }
00334
00335
00338 inline
00339 void
00340 _matrix44_sse::transpose()
00341 {
00342 _MM_TRANSPOSE4_PS(m1, m2, m3, m4);
00343 }
00344
00345
00349 inline
00350 float
00351 _matrix44_sse::det()
00352 {
00353 return
00354 (M11 * M22 - M12 * M21) * (M33 * M44 - M34 * M43)
00355 -(M11 * M23 - M13 * M21) * (M32 * M44 - M34 * M42)
00356 +(M11 * M24 - M14 * M21) * (M32 * M43 - M33 * M42)
00357 +(M12 * M23 - M13 * M22) * (M31 * M44 - M34 * M41)
00358 -(M12 * M24 - M14 * M22) * (M31 * M43 - M33 * M41)
00359 +(M13 * M24 - M14 * M23) * (M31 * M42 - M32 * M41);
00360 }
00361
00362
00366 inline
00367 void
00368 _matrix44_sse::invert()
00369 {
00370 float* src = &(m[0][0]);
00371
00372 __m128 minor0, minor1, minor2, minor3;
00373 __m128 row0, row1, row2, row3;
00374 __m128 det, tmp1;
00375
00376 tmp1 = _mm_loadh_pi(_mm_loadl_pi(tmp1, (__m64*)(src)), (__m64*)(src+ 4));
00377 row1 = _mm_loadh_pi(_mm_loadl_pi(row1, (__m64*)(src+8)), (__m64*)(src+12));
00378
00379 row0 = _mm_shuffle_ps(tmp1, row1, 0x88);
00380 row1 = _mm_shuffle_ps(row1, tmp1, 0xDD);
00381
00382 tmp1 = _mm_loadh_pi(_mm_loadl_pi(tmp1, (__m64*)(src+ 2)), (__m64*)(src+ 6));
00383 row3 = _mm_loadh_pi(_mm_loadl_pi(row3, (__m64*)(src+10)), (__m64*)(src+14));
00384
00385 row2 = _mm_shuffle_ps(tmp1, row3, 0x88);
00386 row3 = _mm_shuffle_ps(row3, tmp1, 0xDD);
00387
00388 tmp1 = _mm_mul_ps(row2, row3);
00389 tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
00390
00391 minor0 = _mm_mul_ps(row1, tmp1);
00392 minor1 = _mm_mul_ps(row0, tmp1);
00393
00394 tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
00395
00396 minor0 = _mm_sub_ps(_mm_mul_ps(row1, tmp1), minor0);
00397 minor1 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor1);
00398 minor1 = _mm_shuffle_ps(minor1, minor1, 0x4E);
00399
00400 tmp1 = _mm_mul_ps(row1, row2);
00401 tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
00402
00403 minor0 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor0);
00404 minor3 = _mm_mul_ps(row0, tmp1);
00405
00406 tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
00407
00408 minor0 = _mm_sub_ps(minor0, _mm_mul_ps(row3, tmp1));
00409 minor3 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor3);
00410 minor3 = _mm_shuffle_ps(minor3, minor3, 0x4E);
00411
00412 tmp1 = _mm_mul_ps(_mm_shuffle_ps(row1, row1, 0x4E), row3);
00413 tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
00414 row2 = _mm_shuffle_ps(row2, row2, 0x4E);
00415
00416 minor0 = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor0);
00417 minor2 = _mm_mul_ps(row0, tmp1);
00418
00419 tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
00420
00421 minor0 = _mm_sub_ps(minor0, _mm_mul_ps(row2, tmp1));
00422 minor2 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor2);
00423 minor2 = _mm_shuffle_ps(minor2, minor2, 0x4E);
00424
00425 tmp1 = _mm_mul_ps(row0, row1);
00426 tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
00427
00428 minor2 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor2);
00429 minor3 = _mm_sub_ps(_mm_mul_ps(row2, tmp1), minor3);
00430
00431 tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
00432
00433 minor2 = _mm_sub_ps(_mm_mul_ps(row3, tmp1), minor2);
00434 minor3 = _mm_sub_ps(minor3, _mm_mul_ps(row2, tmp1));
00435
00436 tmp1 = _mm_mul_ps(row0, row3);
00437 tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
00438
00439 minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row2, tmp1));
00440 minor2 = _mm_add_ps(_mm_mul_ps(row1, tmp1), minor2);
00441
00442 tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
00443
00444 minor1 = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor1);
00445 minor2 = _mm_sub_ps(minor2, _mm_mul_ps(row1, tmp1));
00446
00447 tmp1 = _mm_mul_ps(row0, row2);
00448 tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
00449
00450 minor1 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor1);
00451 minor3 = _mm_sub_ps(minor3, _mm_mul_ps(row1, tmp1));
00452
00453 tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
00454
00455 minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row3, tmp1));
00456 minor3 = _mm_add_ps(_mm_mul_ps(row1, tmp1), minor3);
00457
00458 det = _mm_mul_ps(row0, minor0);
00459 det = _mm_add_ps(_mm_shuffle_ps(det, det, 0x4E), det);
00460 det = _mm_add_ss(_mm_shuffle_ps(det, det, 0xB1), det);
00461 tmp1 = _mm_rcp_ss(det);
00462
00463 det = _mm_sub_ss(_mm_add_ss(tmp1, tmp1), _mm_mul_ss(det, _mm_mul_ss(tmp1, tmp1)));
00464 det = _mm_shuffle_ps(det, det, 0x00);
00465
00466 minor0 = _mm_mul_ps(det, minor0);
00467 _mm_storel_pi((__m64*)(src), minor0);
00468 _mm_storeh_pi((__m64*)(src+2), minor0);
00469
00470 minor1 = _mm_mul_ps(det, minor1);
00471 _mm_storel_pi((__m64*)(src+4), minor1);
00472 _mm_storeh_pi((__m64*)(src+6), minor1);
00473
00474 minor2 = _mm_mul_ps(det, minor2);
00475 _mm_storel_pi((__m64*)(src+ 8), minor2);
00476 _mm_storeh_pi((__m64*)(src+10), minor2);
00477
00478 minor3 = _mm_mul_ps(det, minor3);
00479 _mm_storel_pi((__m64*)(src+12), minor3);
00480 _mm_storeh_pi((__m64*)(src+14), minor3);
00481 }
00482
00483
00491 inline
00492 void
00493 _matrix44_sse::invert_simple()
00494 {
00495 float s = det();
00496 if (s == 0.0f) return;
00497 s = 1.0f/s;
00498 this->set(
00499 s * ((M22 * M33) - (M23 * M32)),
00500 s * ((M32 * M13) - (M33 * M12)),
00501 s * ((M12 * M23) - (M13 * M22)),
00502 0.0f,
00503 s * ((M23 * M31) - (M21 * M33)),
00504 s * ((M33 * M11) - (M31 * M13)),
00505 s * ((M13 * M21) - (M11 * M23)),
00506 0.0f,
00507 s * ((M21 * M32) - (M22 * M31)),
00508 s * ((M31 * M12) - (M32 * M11)),
00509 s * ((M11 * M22) - (M12 * M21)),
00510 0.0f,
00511 s * (M21*(M33*M42 - M32*M43) + M22*(M31*M43 - M33*M41) + M23*(M32*M41 - M31*M42)),
00512 s * (M31*(M13*M42 - M12*M43) + M32*(M11*M43 - M13*M41) + M33*(M12*M41 - M11*M42)),
00513 s * (M41*(M13*M22 - M12*M23) + M42*(M11*M23 - M13*M21) + M43*(M12*M21 - M11*M22)),
00514 1.0f);
00515 }
00516
00517
00525 inline
00526 void
00527 _matrix44_sse::mult_simple(const _matrix44_sse& mx)
00528 {
00529 m1 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00530 m2 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00531 m3 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00532 m4 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00533 }
00534
00535
00541 inline
00542 _vector3_sse
00543 _matrix44_sse::transform_coord(const _vector3_sse& v) const
00544 {
00545 float d = 1.0f / (M14*v.x + M24*v.y + M34*v.z + M44);
00546 return _vector3_sse(
00547 (M11*v.x + M21*v.y + M31*v.z + M41) * d,
00548 (M12*v.x + M22*v.y + M32*v.z + M42) * d,
00549 (M13*v.x + M23*v.y + M33*v.z + M43) * d);
00550 }
00551
00552
00555 inline
00556 _vector3_sse
00557 _matrix44_sse::x_component() const
00558 {
00559 _vector3_sse v(m1);
00560 return v;
00561 }
00562
00563
00566 inline
00567 _vector3_sse
00568 _matrix44_sse::y_component() const
00569 {
00570 _vector3_sse v(m2);
00571 return v;
00572 }
00573
00574
00577 inline
00578 _vector3_sse
00579 _matrix44_sse::z_component() const
00580 {
00581 _vector3_sse v(m3);
00582 return v;
00583 }
00584
00585
00588 inline
00589 _vector3_sse
00590 _matrix44_sse::pos_component() const
00591 {
00592 _vector3_sse v(M41, M42, M43);
00593 return v;
00594 }
00595
00596
00600 inline
00601 void
00602 _matrix44_sse::rotate_x(const float a)
00603 {
00604 float c = n_cos(a);
00605 float s = n_sin(a);
00606 int i;
00607 for (i=0; i<4; i++) {
00608 float mi1 = m[i][1];
00609 float mi2 = m[i][2];
00610 m[i][1] = mi1*c + mi2*-s;
00611 m[i][2] = mi1*s + mi2*c;
00612 }
00613 }
00614
00615
00619 inline
00620 void
00621 _matrix44_sse::rotate_y(const float a)
00622 {
00623 float c = n_cos(a);
00624 float s = n_sin(a);
00625 int i;
00626 for (i=0; i<4; i++) {
00627 float mi0 = m[i][0];
00628 float mi2 = m[i][2];
00629 m[i][0] = mi0*c + mi2*s;
00630 m[i][2] = mi0*-s + mi2*c;
00631 }
00632 }
00633
00634
00638 inline
00639 void
00640 _matrix44_sse::rotate_z(const float a)
00641 {
00642 float c = n_cos(a);
00643 float s = n_sin(a);
00644 int i;
00645 for (i=0; i<4; i++) {
00646 float mi0 = m[i][0];
00647 float mi1 = m[i][1];
00648 m[i][0] = mi0*c + mi1*-s;
00649 m[i][1] = mi0*s + mi1*c;
00650 }
00651 }
00652
00653
00656 inline
00657 void
00658 _matrix44_sse::translate(const _vector3_sse& t)
00659 {
00660 m4 = _mm_add_ps(m4, t.m128);
00661 }
00662
00663
00667 inline
00668 void
00669 _matrix44_sse::set_translation(const _vector3_sse& t)
00670 {
00671 m4 = t.m128;
00672 };
00673
00674
00677 inline
00678 void
00679 _matrix44_sse::scale(const _vector3_sse& s)
00680 {
00681
00682 __m128 scale = _mm_add_ps(_mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f), s.m128);
00683 m1 = _mm_mul_ps(m1, scale);
00684 m2 = _mm_mul_ps(m2, scale);
00685 m3 = _mm_mul_ps(m3, scale);
00686 m4 = _mm_mul_ps(m4, scale);
00687 }
00688
00689
00692 inline
00693 void
00694 _matrix44_sse::lookat(const _vector3_sse& to, const _vector3_sse& up)
00695 {
00696 _vector3_sse from(M41, M42, M43);
00697 _vector3_sse z(from - to);
00698 z.norm();
00699 _vector3_sse y(up);
00700 _vector3_sse x(y * z);
00701 y = z * x;
00702 x.norm();
00703 y.norm();
00704
00705 m1 = x.m128;
00706 m2 = y.m128;
00707 m3 = z.m128;
00708 }
00709
00710
00713 inline
00714 void
00715 _matrix44_sse::billboard(const _vector3_sse& to, const _vector3_sse& up)
00716 {
00717 _vector3_sse from(M41, M42, M43);
00718 _vector3_sse z(from - to);
00719 z.norm();
00720 _vector3_sse y(up);
00721 _vector3_sse x(y * z);
00722 z = x * y;
00723 x.norm();
00724 y.norm();
00725 z.norm();
00726
00727 m1 = x.m128;
00728 m2 = y.m128;
00729 m3 = z.m128;
00730 }
00731
00732
00738 inline
00739 void
00740 _matrix44_sse::operator *= (const _matrix44_sse& mx)
00741 {
00742 m1 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00743 m2 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00744 m3 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00745 m4 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00746 }
00747
00748
00752 inline
00753 void
00754 _matrix44_sse::rotate(const _vector3_sse& vec, float a)
00755 {
00756 _vector3_sse v(vec);
00757 v.norm();
00758 float sa = (float) n_sin(a);
00759 float ca = (float) n_cos(a);
00760
00761 _matrix44_sse rotM;
00762 rotM.M11 = ca + (1.0f - ca) * v.x * v.x;
00763 rotM.M12 = (1.0f - ca) * v.x * v.y - sa * v.z;
00764 rotM.M13 = (1.0f - ca) * v.z * v.x + sa * v.y;
00765 rotM.M21 = (1.0f - ca) * v.x * v.y + sa * v.z;
00766 rotM.M22 = ca + (1.0f - ca) * v.y * v.y;
00767 rotM.M23 = (1.0f - ca) * v.y * v.z - sa * v.x;
00768 rotM.M31 = (1.0f - ca) * v.z * v.x - sa * v.y;
00769 rotM.M32 = (1.0f - ca) * v.y * v.z + sa * v.x;
00770 rotM.M33 = ca + (1.0f - ca) * v.z * v.z;
00771
00772 (*this) *= rotM;
00773 }
00774
00775
00780 inline
00781 void
00782 _matrix44_sse::mult(const _vector4_sse& src, _vector4_sse& dst) const
00783 {
00784 dst.m128 = _mm_add_ps(
00785 _mm_add_ps(
00786 _mm_add_ps(
00787 _mm_mul_ps(m1, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(0,0,0,0))),
00788 _mm_mul_ps(m2, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(1,1,1,1)))),
00789 _mm_mul_ps(m3, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(2,2,2,2)))),
00790 _mm_mul_ps(m4, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(3,3,3,3))));
00791 }
00792
00793
00798 inline
00799 void
00800 _matrix44_sse::mult(const _vector3_sse& src, _vector3_sse& dst) const
00801 {
00802 dst.m128 = _mm_add_ps(
00803 _mm_add_ps(
00804 _mm_add_ps(
00805 _mm_mul_ps(m1, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(0,0,0,0))),
00806 _mm_mul_ps(m2, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(1,1,1,1)))),
00807 _mm_mul_ps(m3, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(2,2,2,2)))),
00808 _mm_mul_ps(m4, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(3,3,3,3))));
00809 }
00810
00811
00814 static
00815 inline
00816 _matrix44_sse
00817 operator * (const _matrix44_sse& ma, const _matrix44_sse& mb)
00818 {
00819 return _matrix44_sse(
00820 _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(ma.m1, ma.m1, _MM_SHUFFLE(0,0,0,0)), mb.m1), _mm_mul_ps(_mm_shuffle_ps(ma.m1, ma.m1, _MM_SHUFFLE(1,1,1,1)), mb.m2)), _mm_mul_ps(_mm_shuffle_ps(ma.m1, ma.m1, _MM_SHUFFLE(2,2,2,2)), mb.m3)), _mm_mul_ps(_mm_shuffle_ps(ma.m1, ma.m1, _MM_SHUFFLE(3,3,3,3)), mb.m4)),
00821 _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(ma.m2, ma.m2, _MM_SHUFFLE(0,0,0,0)), mb.m1), _mm_mul_ps(_mm_shuffle_ps(ma.m2, ma.m2, _MM_SHUFFLE(1,1,1,1)), mb.m2)), _mm_mul_ps(_mm_shuffle_ps(ma.m2, ma.m2, _MM_SHUFFLE(2,2,2,2)), mb.m3)), _mm_mul_ps(_mm_shuffle_ps(ma.m2, ma.m2, _MM_SHUFFLE(3,3,3,3)), mb.m4)),
00822 _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(ma.m3, ma.m3, _MM_SHUFFLE(0,0,0,0)), mb.m1), _mm_mul_ps(_mm_shuffle_ps(ma.m3, ma.m3, _MM_SHUFFLE(1,1,1,1)), mb.m2)), _mm_mul_ps(_mm_shuffle_ps(ma.m3, ma.m3, _MM_SHUFFLE(2,2,2,2)), mb.m3)), _mm_mul_ps(_mm_shuffle_ps(ma.m3, ma.m3, _MM_SHUFFLE(3,3,3,3)), mb.m4)),
00823 _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(ma.m4, ma.m4, _MM_SHUFFLE(0,0,0,0)), mb.m1), _mm_mul_ps(_mm_shuffle_ps(ma.m4, ma.m4, _MM_SHUFFLE(1,1,1,1)), mb.m2)), _mm_mul_ps(_mm_shuffle_ps(ma.m4, ma.m4, _MM_SHUFFLE(2,2,2,2)), mb.m3)), _mm_mul_ps(_mm_shuffle_ps(ma.m4, ma.m4, _MM_SHUFFLE(3,3,3,3)), mb.m4))
00824 );
00825 }
00826
00827
00830 static
00831 inline
00832 _vector3_sse operator * (const _matrix44_sse& m, const _vector3_sse& v)
00833 {
00834 return _vector3_sse(
00835 _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(m.m1, _mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(m.m2, _mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(1,1,1,1)))), _mm_mul_ps(m.m3, _mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(2,2,2,2)))), _mm_mul_ps(m.m4, _mm_set_ps(0.0f, 1.0f, 1.0f, 1.0f)))
00836 );
00837 }
00838
00839
00842 static
00843 inline
00844 _vector4_sse operator * (const _matrix44_sse& m, const _vector4_sse& v)
00845 {
00846 return _vector4_sse(
00847 _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(m.m1, _mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(m.m2, _mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(1,1,1,1)))), _mm_mul_ps(m.m3, _mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(2,2,2,2)))), _mm_mul_ps(m.m4, _mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(3,3,3,3))))
00848 );
00849 }
00850
00851
00852 #endif