00001 #ifndef _VECTOR3_SSE_H
00002 #define _VECTOR3_SSE_H
00003
00012 #include <xmmintrin.h>
00013 #include <math.h>
00014
00015
00016 class _vector3_sse
00017 {
00018 public:
00019 static const _vector3_sse zero;
00020
00021 public:
00023 _vector3_sse();
00025 _vector3_sse(const float _x, const float _y, const float _z);
00027 _vector3_sse(const _vector3_sse& vec);
00029 _vector3_sse(const float* p);
00031 _vector3_sse(const __m128& m);
00033 void set(const float _x, const float _y, const float _z);
00035 void set(const _vector3_sse& vec);
00037 void set(const float* p);
00039 float len() const;
00041 float lensquared() const;
00043 void norm();
00045 void operator +=(const _vector3_sse& v0);
00047 void operator -=(const _vector3_sse& v0);
00049 void operator *=(float s);
00051 bool isequal(const _vector3_sse& v, float tol) const;
00053 int compare(const _vector3_sse& v, float tol) const;
00055 void rotate(const _vector3_sse& axis, float angle);
00057 void lerp(const _vector3_sse& v0, float lerpVal);
00059 void lerp(const _vector3_sse& v0, const _vector3_sse& v1, float lerpVal);
00061 _vector3_sse findortho() const;
00062
00063 union
00064 {
00065 __m128 m128;
00066 struct
00067 {
00068 float x, y, z, pad;
00069 };
00070 };
00071 };
00072
00073
00076 inline
00077 _vector3_sse::_vector3_sse()
00078 {
00079 m128 = _mm_setzero_ps();
00080 }
00081
00082
00085 inline
00086 _vector3_sse::_vector3_sse(const float _x, const float _y, const float _z)
00087 {
00088 m128 = _mm_set_ps(0.0f, _z, _y, _x);
00089 }
00090
00091
00094 inline
00095 _vector3_sse::_vector3_sse(const _vector3_sse& vec)
00096 {
00097 m128 = vec.m128;
00098 }
00099
00100
00103 inline
00104 _vector3_sse::_vector3_sse(const __m128& m)
00105 {
00106 m128 = m;
00107 }
00108
00109
00112 inline
00113 void
00114 _vector3_sse::set(const float _x, const float _y, const float _z)
00115 {
00116 m128 = _mm_set_ps(0.0f, _z, _y, _x);
00117 }
00118
00119
00122 inline
00123 void
00124 _vector3_sse::set(const _vector3_sse& vec)
00125 {
00126 m128 = vec.m128;
00127 }
00128
00129
00132 inline
00133 float
00134 _vector3_sse::len() const
00135 {
00136 static const int X = 0;
00137 static const int Y = 1;
00138 static const int Z = 2;
00139 static const int W = 3;
00140
00141 __m128 a = _mm_mul_ps(m128, m128);
00142
00143
00144 __m128 b = _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(X,X,X,X)), _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(Y,Y,Y,Y)), _mm_shuffle_ps(a, a, _MM_SHUFFLE(Z,Z,Z,Z))));
00145 __m128 l = _mm_sqrt_ss(b);
00146 return l.m128_f32[X];
00147 }
00148
00149
00152 inline
00153 float
00154 _vector3_sse::lensquared() const
00155 {
00156 static const int X = 0;
00157 static const int Y = 1;
00158 static const int Z = 2;
00159 static const int W = 3;
00160
00161 __m128 a = _mm_mul_ps(m128, m128);
00162 __m128 b = _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(X,X,X,X)), _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(Y,Y,Y,Y)), _mm_shuffle_ps(a, a, _MM_SHUFFLE(Z,Z,Z,Z))));
00163 return b.m128_f32[X];
00164 }
00165
00166
00169 inline
00170 void
00171 _vector3_sse::norm()
00172 {
00173 static const int X = 0;
00174 static const int Y = 1;
00175 static const int Z = 2;
00176 static const int W = 3;
00177
00178 __m128 a = _mm_mul_ps(m128, m128);
00179
00180
00181 __m128 b = _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(X,X,X,X)), _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(Y,Y,Y,Y)), _mm_shuffle_ps(a, a, _MM_SHUFFLE(Z,Z,Z,Z))));
00182
00183
00184 __m128 f = _mm_rsqrt_ss(b);
00185 __m128 oneDivLen = _mm_shuffle_ps(f, f, _MM_SHUFFLE(X,X,X,X));
00186
00187 m128 = _mm_mul_ps(m128, oneDivLen);
00188 }
00189
00190
00193 inline
00194 void
00195 _vector3_sse::operator +=(const _vector3_sse& v)
00196 {
00197 m128 = _mm_add_ps(m128, v.m128);
00198 }
00199
00200
00203 inline
00204 void
00205 _vector3_sse::operator -=(const _vector3_sse& v)
00206 {
00207 m128 = _mm_sub_ps(m128, v.m128);
00208 }
00209
00210
00213 inline
00214 void
00215 _vector3_sse::operator *=(float s)
00216 {
00217 __m128 packed = _mm_set1_ps(s);
00218 m128 = _mm_mul_ps(m128, packed);
00219 }
00220
00221
00224 inline
00225 bool
00226 _vector3_sse::isequal(const _vector3_sse& v, float tol) const
00227 {
00228 if (fabs(v.x - x) > tol) return false;
00229 else if (fabs(v.y - y) > tol) return false;
00230 else if (fabs(v.z - z) > tol) return false;
00231 return true;
00232 }
00233
00234
00237 inline
00238 int
00239 _vector3_sse::compare(const _vector3_sse& v, float tol) const
00240 {
00241 if (fabs(v.x - x) > tol) return (v.x > x) ? +1 : -1;
00242 else if (fabs(v.y - y) > tol) return (v.y > y) ? +1 : -1;
00243 else if (fabs(v.z - z) > tol) return (v.z > z) ? +1 : -1;
00244 else return 0;
00245 }
00246
00247
00250 inline
00251 void
00252 _vector3_sse::rotate(const _vector3_sse& axis, float angle)
00253 {
00254
00255
00256 float rotM[9];
00257 float sa, ca;
00258
00259 sa = (float) sin(angle);
00260 ca = (float) cos(angle);
00261
00262
00263 rotM[0] = ca + (1 - ca) * axis.x * axis.x;
00264 rotM[1] = (1 - ca) * axis.x * axis.y - sa * axis.z;
00265 rotM[2] = (1 - ca) * axis.z * axis.x + sa * axis.y;
00266 rotM[3] = (1 - ca) * axis.x * axis.y + sa * axis.z;
00267 rotM[4] = ca + (1 - ca) * axis.y * axis.y;
00268 rotM[5] = (1 - ca) * axis.y * axis.z - sa * axis.x;
00269 rotM[6] = (1 - ca) * axis.z * axis.x - sa * axis.y;
00270 rotM[7] = (1 - ca) * axis.y * axis.z + sa * axis.x;
00271 rotM[8] = ca + (1 - ca) * axis.z * axis.z;
00272
00273
00274 _vector3_sse help(rotM[0] * this->x + rotM[1] * this->y + rotM[2] * this->z,
00275 rotM[3] * this->x + rotM[4] * this->y + rotM[5] * this->z,
00276 rotM[6] * this->x + rotM[7] * this->y + rotM[8] * this->z);
00277 *this = help;
00278 }
00279
00280
00283 static
00284 inline
00285 _vector3_sse operator +(const _vector3_sse& v0, const _vector3_sse& v1)
00286 {
00287 return _vector3_sse(_mm_add_ps(v0.m128, v1.m128));
00288 }
00289
00290
00293 static
00294 inline
00295 _vector3_sse operator -(const _vector3_sse& v0, const _vector3_sse& v1)
00296 {
00297 return _vector3_sse(_mm_sub_ps(v0.m128, v1.m128));
00298 }
00299
00300
00303 static
00304 inline
00305 _vector3_sse operator *(const _vector3_sse& v0, const float s)
00306 {
00307 __m128 packed = _mm_set1_ps(s);
00308 return _vector3_sse(_mm_mul_ps(v0.m128, packed));
00309 }
00310
00311
00314 static
00315 inline
00316 _vector3_sse operator -(const _vector3_sse& v)
00317 {
00318 __m128 zero = _mm_setzero_ps();
00319 return _vector3_sse(_mm_sub_ps(zero, v.m128));
00320 }
00321
00322
00326 static
00327 inline
00328 float operator %(const _vector3_sse& v0, const _vector3_sse& v1)
00329 {
00330 __m128 a = _mm_mul_ps(v0.m128, v1.m128);
00331 __m128 b = _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(0,0,0,0)), _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(1,1,1,1)), _mm_shuffle_ps(a, a, _MM_SHUFFLE(2,2,2,2))));
00332 return b.m128_f32[0];
00333 }
00334
00335
00339 static
00340 inline
00341 _vector3_sse operator *(const _vector3_sse& v0, const _vector3_sse& v1)
00342 {
00343
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353 static const int X = 0;
00354 static const int Y = 1;
00355 static const int Z = 2;
00356 static const int W = 3;
00357
00358 __m128 a = _mm_shuffle_ps(v0.m128, v0.m128, _MM_SHUFFLE(W, X, Z, Y));
00359 __m128 b = _mm_shuffle_ps(v1.m128, v1.m128, _MM_SHUFFLE(W, Y, X, Z));
00360 __m128 c = _mm_shuffle_ps(v0.m128, v0.m128, _MM_SHUFFLE(W, Y, X, Z));
00361 __m128 d = _mm_shuffle_ps(v1.m128, v1.m128, _MM_SHUFFLE(W, X, Z, Y));
00362
00363 __m128 e = _mm_mul_ps(a, b);
00364 __m128 f = _mm_mul_ps(c, d);
00365
00366 return _vector3_sse(_mm_sub_ps(e, f));
00367 }
00368
00369
00372 inline
00373 void
00374 _vector3_sse::lerp(const _vector3_sse& v0, float lerpVal)
00375 {
00376 x = v0.x + ((x - v0.x) * lerpVal);
00377 y = v0.y + ((y - v0.y) * lerpVal);
00378 z = v0.z + ((z - v0.z) * lerpVal);
00379 }
00380
00381
00384 inline
00385 void
00386 _vector3_sse::lerp(const _vector3_sse& v0, const _vector3_sse& v1, float lerpVal)
00387 {
00388 x = v0.x + ((v1.x - v0.x) * lerpVal);
00389 y = v0.y + ((v1.y - v0.y) * lerpVal);
00390 z = v0.z + ((v1.z - v0.z) * lerpVal);
00391 }
00392
00393
00398 inline
00399 _vector3_sse
00400 _vector3_sse::findortho() const
00401 {
00402 if (0.0 != x)
00403 {
00404 return _vector3_sse((-y - z) / x, 1.0, 1.0);
00405 } else
00406 if (0.0 != y)
00407 {
00408 return _vector3_sse(1.0, (-x - z) / y, 1.0);
00409 } else
00410 if (0.0 != z)
00411 {
00412 return _vector3_sse(1.0, 1.0, (-x - y) / z);
00413 } else
00414 {
00415 return _vector3_sse(0.0, 0.0, 0.0);
00416 }
00417 }
00418
00419
00420 #endif