newspeoplefor developersdocumentationdownloads

_vector3_sse.h

Go to the documentation of this file.
00001 #ifndef _VECTOR3_SSE_H
00002 #define _VECTOR3_SSE_H
00003 //------------------------------------------------------------------------------
00012 #include <xmmintrin.h>
00013 #include <math.h>
00014 
00015 //------------------------------------------------------------------------------
00016 class _vector3_sse
00017 {
00018 public:
00019     static const _vector3_sse zero;
00020 
00021 public:
00023     _vector3_sse();
00025     _vector3_sse(const float _x, const float _y, const float _z);
00027     _vector3_sse(const _vector3_sse& vec);
00029     _vector3_sse(const float* p);
00031     _vector3_sse(const __m128& m);
00033     void set(const float _x, const float _y, const float _z);
00035     void set(const _vector3_sse& vec);
00037     void set(const float* p);
00039     float len() const;
00041     float lensquared() const;
00043     void norm();
00045     void operator +=(const _vector3_sse& v0);
00047     void operator -=(const _vector3_sse& v0);
00049     void operator *=(float s);
00051     bool isequal(const _vector3_sse& v, float tol) const;
00053     int compare(const _vector3_sse& v, float tol) const;
00055     void rotate(const _vector3_sse& axis, float angle);
00057     void lerp(const _vector3_sse& v0, float lerpVal);
00059     void lerp(const _vector3_sse& v0, const _vector3_sse& v1, float lerpVal);
00061     _vector3_sse findortho() const;
00062 
00063     union
00064     {
00065         __m128 m128;
00066         struct
00067         {
00068             float x, y, z, pad;
00069         };
00070     };
00071 };
00072 
00073 //------------------------------------------------------------------------------
00076 inline
00077 _vector3_sse::_vector3_sse()
00078 {
00079     m128 = _mm_setzero_ps();
00080 }
00081 
00082 //------------------------------------------------------------------------------
00085 inline
00086 _vector3_sse::_vector3_sse(const float _x, const float _y, const float _z)
00087 {
00088     m128 = _mm_set_ps(0.0f, _z, _y, _x);
00089 }
00090 
00091 //------------------------------------------------------------------------------
00094 inline
00095 _vector3_sse::_vector3_sse(const _vector3_sse& vec)
00096 {
00097     m128 = vec.m128;
00098 }
00099 
00100 //------------------------------------------------------------------------------
00103 inline
00104 _vector3_sse::_vector3_sse(const __m128& m)
00105 {
00106     m128 = m;
00107 }
00108 
00109 //------------------------------------------------------------------------------
00112 inline
00113 void
00114 _vector3_sse::set(const float _x, const float _y, const float _z)
00115 {
00116     m128 = _mm_set_ps(0.0f, _z, _y, _x);
00117 }
00118 
00119 //------------------------------------------------------------------------------
00122 inline
00123 void
00124 _vector3_sse::set(const _vector3_sse& vec)
00125 {
00126     m128 = vec.m128;
00127 }
00128 
00129 //------------------------------------------------------------------------------
00132 inline
00133 float
00134 _vector3_sse::len() const
00135 {
00136     static const int X = 0;
00137     static const int Y = 1;
00138     static const int Z = 2;
00139     static const int W = 3;
00140 
00141     __m128 a = _mm_mul_ps(m128, m128);
00142 
00143     // horizontal add
00144     __m128 b = _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(X,X,X,X)), _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(Y,Y,Y,Y)), _mm_shuffle_ps(a, a, _MM_SHUFFLE(Z,Z,Z,Z))));
00145     __m128 l = _mm_sqrt_ss(b);
00146     return l.m128_f32[X];
00147 }
00148 
00149 //------------------------------------------------------------------------------
00152 inline
00153 float
00154 _vector3_sse::lensquared() const
00155 {
00156     static const int X = 0;
00157     static const int Y = 1;
00158     static const int Z = 2;
00159     static const int W = 3;
00160 
00161     __m128 a = _mm_mul_ps(m128, m128);
00162     __m128 b = _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(X,X,X,X)), _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(Y,Y,Y,Y)), _mm_shuffle_ps(a, a, _MM_SHUFFLE(Z,Z,Z,Z))));
00163     return b.m128_f32[X];
00164 }
00165 
00166 //------------------------------------------------------------------------------
00169 inline
00170 void
00171 _vector3_sse::norm()
00172 {
00173     static const int X = 0;
00174     static const int Y = 1;
00175     static const int Z = 2;
00176     static const int W = 3;
00177 
00178     __m128 a = _mm_mul_ps(m128, m128);
00179 
00180     // horizontal add
00181     __m128 b = _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(X,X,X,X)), _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(Y,Y,Y,Y)), _mm_shuffle_ps(a, a, _MM_SHUFFLE(Z,Z,Z,Z))));
00182 
00183     // get reciprocal of square root of squared length
00184     __m128 f = _mm_rsqrt_ss(b);
00185     __m128 oneDivLen = _mm_shuffle_ps(f, f, _MM_SHUFFLE(X,X,X,X));
00186 
00187     m128 = _mm_mul_ps(m128, oneDivLen);
00188 }
00189 
00190 //------------------------------------------------------------------------------
00193 inline
00194 void
00195 _vector3_sse::operator +=(const _vector3_sse& v)
00196 {
00197     m128 = _mm_add_ps(m128, v.m128);
00198 }
00199 
00200 //------------------------------------------------------------------------------
00203 inline
00204 void
00205 _vector3_sse::operator -=(const _vector3_sse& v)
00206 {
00207     m128 = _mm_sub_ps(m128, v.m128);
00208 }
00209 
00210 //------------------------------------------------------------------------------
00213 inline
00214 void
00215 _vector3_sse::operator *=(float s)
00216 {
00217     __m128 packed = _mm_set1_ps(s);
00218     m128 = _mm_mul_ps(m128, packed);
00219 }
00220 
00221 //------------------------------------------------------------------------------
00224 inline
00225 bool
00226 _vector3_sse::isequal(const _vector3_sse& v, float tol) const
00227 {
00228     if (fabs(v.x - x) > tol)      return false;
00229     else if (fabs(v.y - y) > tol) return false;
00230     else if (fabs(v.z - z) > tol) return false;
00231     return true;
00232 }
00233 
00234 //------------------------------------------------------------------------------
00237 inline
00238 int
00239 _vector3_sse::compare(const _vector3_sse& v, float tol) const
00240 {
00241     if (fabs(v.x - x) > tol)      return (v.x > x) ? +1 : -1;
00242     else if (fabs(v.y - y) > tol) return (v.y > y) ? +1 : -1;
00243     else if (fabs(v.z - z) > tol) return (v.z > z) ? +1 : -1;
00244     else                          return 0;
00245 }
00246 
00247 //------------------------------------------------------------------------------
00250 inline
00251 void
00252 _vector3_sse::rotate(const _vector3_sse& axis, float angle)
00253 {
00254     // rotates this one around given vector. We do
00255     // rotation with matrices, but these aren't defined yet!
00256     float rotM[9];
00257     float sa, ca;
00258 
00259     sa = (float) sin(angle);
00260     ca = (float) cos(angle);
00261 
00262     // build a rotation matrix
00263     rotM[0] = ca + (1 - ca) * axis.x * axis.x;
00264     rotM[1] = (1 - ca) * axis.x * axis.y - sa * axis.z;
00265     rotM[2] = (1 - ca) * axis.z * axis.x + sa * axis.y;
00266     rotM[3] = (1 - ca) * axis.x * axis.y + sa * axis.z;
00267     rotM[4] = ca + (1 - ca) * axis.y * axis.y;
00268     rotM[5] = (1 - ca) * axis.y * axis.z - sa * axis.x;
00269     rotM[6] = (1 - ca) * axis.z * axis.x - sa * axis.y;
00270     rotM[7] = (1 - ca) * axis.y * axis.z + sa * axis.x;
00271     rotM[8] = ca + (1 - ca) * axis.z * axis.z;
00272 
00273     // "handmade" multiplication
00274     _vector3_sse help(rotM[0] * this->x + rotM[1] * this->y + rotM[2] * this->z,
00275                       rotM[3] * this->x + rotM[4] * this->y + rotM[5] * this->z,
00276                       rotM[6] * this->x + rotM[7] * this->y + rotM[8] * this->z);
00277     *this = help;
00278 }
00279 
00280 //------------------------------------------------------------------------------
00283 static
00284 inline
00285 _vector3_sse operator +(const _vector3_sse& v0, const _vector3_sse& v1)
00286 {
00287     return _vector3_sse(_mm_add_ps(v0.m128, v1.m128));
00288 }
00289 
00290 //------------------------------------------------------------------------------
00293 static
00294 inline
00295 _vector3_sse operator -(const _vector3_sse& v0, const _vector3_sse& v1)
00296 {
00297     return _vector3_sse(_mm_sub_ps(v0.m128, v1.m128));
00298 }
00299 
00300 //------------------------------------------------------------------------------
00303 static
00304 inline
00305 _vector3_sse operator *(const _vector3_sse& v0, const float s)
00306 {
00307     __m128 packed = _mm_set1_ps(s);
00308     return _vector3_sse(_mm_mul_ps(v0.m128, packed));
00309 }
00310 
00311 //------------------------------------------------------------------------------
00314 static
00315 inline
00316 _vector3_sse operator -(const _vector3_sse& v)
00317 {
00318     __m128 zero = _mm_setzero_ps();
00319     return _vector3_sse(_mm_sub_ps(zero, v.m128));
00320 }
00321 
00322 //------------------------------------------------------------------------------
00326 static
00327 inline
00328 float operator %(const _vector3_sse& v0, const _vector3_sse& v1)
00329 {
00330     __m128 a = _mm_mul_ps(v0.m128, v1.m128);
00331     __m128 b = _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(0,0,0,0)), _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(1,1,1,1)), _mm_shuffle_ps(a, a, _MM_SHUFFLE(2,2,2,2))));
00332     return b.m128_f32[0];
00333 }
00334 
00335 //------------------------------------------------------------------------------
00339 static
00340 inline
00341 _vector3_sse operator *(const _vector3_sse& v0, const _vector3_sse& v1)
00342 {
00343     // x = v0.y * v1.z - v0.z * v1.y
00344     // y = v0.z * v1.x - v0.x * v1.z
00345     // z = v0.x * v1.y - v0.y * v1.x
00346     //
00347     // a = v0.y | v0.z | v0.x | xxx
00348     // b = v1.z | v1.x | v1.y | xxx
00349     // c = v0.z | v0.x | v0.y | xxx
00350     // d = v1.y | v1.z | v1.x | xxx
00351     //
00352 
00353     static const int X = 0;
00354     static const int Y = 1;
00355     static const int Z = 2;
00356     static const int W = 3;
00357 
00358     __m128 a = _mm_shuffle_ps(v0.m128, v0.m128, _MM_SHUFFLE(W, X, Z, Y));
00359     __m128 b = _mm_shuffle_ps(v1.m128, v1.m128, _MM_SHUFFLE(W, Y, X, Z));
00360     __m128 c = _mm_shuffle_ps(v0.m128, v0.m128, _MM_SHUFFLE(W, Y, X, Z));
00361     __m128 d = _mm_shuffle_ps(v1.m128, v1.m128, _MM_SHUFFLE(W, X, Z, Y));
00362 
00363     __m128 e = _mm_mul_ps(a, b);
00364     __m128 f = _mm_mul_ps(c, d);
00365 
00366     return _vector3_sse(_mm_sub_ps(e, f));
00367 }
00368 
00369 //------------------------------------------------------------------------------
00372 inline
00373 void
00374 _vector3_sse::lerp(const _vector3_sse& v0, float lerpVal)
00375 {
00376     x = v0.x + ((x - v0.x) * lerpVal);
00377     y = v0.y + ((y - v0.y) * lerpVal);
00378     z = v0.z + ((z - v0.z) * lerpVal);
00379 }
00380 
00381 //------------------------------------------------------------------------------
00384 inline
00385 void
00386 _vector3_sse::lerp(const _vector3_sse& v0, const _vector3_sse& v1, float lerpVal)
00387 {
00388     x = v0.x + ((v1.x - v0.x) * lerpVal);
00389     y = v0.y + ((v1.y - v0.y) * lerpVal);
00390     z = v0.z + ((v1.z - v0.z) * lerpVal);
00391 }
00392 
00393 //------------------------------------------------------------------------------
00398 inline
00399 _vector3_sse
00400 _vector3_sse::findortho() const
00401 {
00402     if (0.0 != x)
00403     {
00404         return _vector3_sse((-y - z) / x, 1.0, 1.0);
00405     } else
00406     if (0.0 != y)
00407     {
00408         return _vector3_sse(1.0, (-x - z) / y, 1.0);
00409     } else
00410     if (0.0 != z)
00411     {
00412         return _vector3_sse(1.0, 1.0, (-x - y) / z);
00413     } else
00414     {
00415         return _vector3_sse(0.0, 0.0, 0.0);
00416     }
00417 }
00418 
00419 //------------------------------------------------------------------------------
00420 #endif

Copyright © 1999-2005 by the contributing authors. Ideas, requests, problems: Send feedback.