Merge pull request #7976 from samuele3hu/v3_check_mat

Added Math SSE implementation on x86 and update bindings-generator submodule
This commit is contained in:
minggo 2014-09-11 14:47:39 +08:00
commit a5855baac4
18 changed files with 248 additions and 22 deletions

View File

@ -414,8 +414,11 @@ void Mat4::add(float scalar)
void Mat4::add(float scalar, Mat4* dst)
{
GP_ASSERT(dst);
#ifdef __SSE__
MathUtil::addMatrix(col, scalar, dst->col);
#else
MathUtil::addMatrix(m, scalar, dst->m);
#endif
}
void Mat4::add(const Mat4& mat)
@ -426,8 +429,11 @@ void Mat4::add(const Mat4& mat)
void Mat4::add(const Mat4& m1, const Mat4& m2, Mat4* dst)
{
GP_ASSERT(dst);
#ifdef __SSE__
MathUtil::addMatrix(m1.col, m2.col, dst->col);
#else
MathUtil::addMatrix(m1.m, m2.m, dst->m);
#endif
}
bool Mat4::decompose(Vec3* scale, Quaternion* rotation, Vec3* translation) const
@ -700,8 +706,11 @@ void Mat4::multiply(float scalar, Mat4* dst) const
void Mat4::multiply(const Mat4& m, float scalar, Mat4* dst)
{
GP_ASSERT(dst);
#ifdef __SSE__
MathUtil::multiplyMatrix(m.col, scalar, dst->col);
#else
MathUtil::multiplyMatrix(m.m, scalar, dst->m);
#endif
}
void Mat4::multiply(const Mat4& mat)
@ -712,13 +721,20 @@ void Mat4::multiply(const Mat4& mat)
void Mat4::multiply(const Mat4& m1, const Mat4& m2, Mat4* dst)
{
GP_ASSERT(dst);
#ifdef __SSE__
MathUtil::multiplyMatrix(m1.col, m2.col, dst->col);
#else
MathUtil::multiplyMatrix(m1.m, m2.m, dst->m);
#endif
}
void Mat4::negate()
{
#ifdef __SSE__
MathUtil::negateMatrix(col, col);
#else
MathUtil::negateMatrix(m, m);
#endif
}
Mat4 Mat4::getNegated() const
@ -870,8 +886,11 @@ void Mat4::subtract(const Mat4& mat)
void Mat4::subtract(const Mat4& m1, const Mat4& m2, Mat4* dst)
{
GP_ASSERT(dst);
#ifdef __SSE__
MathUtil::subtractMatrix(m1.col, m2.col, dst->col);
#else
MathUtil::subtractMatrix(m1.m, m2.m, dst->m);
#endif
}
void Mat4::transformPoint(Vec3* point) const
@ -912,8 +931,11 @@ void Mat4::transformVector(Vec4* vector) const
void Mat4::transformVector(const Vec4& vector, Vec4* dst) const
{
GP_ASSERT(dst);
#ifdef __SSE__
MathUtil::transformVec4(col, vector.v, dst->v);
#else
MathUtil::transformVec4(m, (const float*) &vector, (float*)dst);
#endif
}
void Mat4::translate(float x, float y, float z)
@ -940,7 +962,11 @@ void Mat4::translate(const Vec3& t, Mat4* dst) const
void Mat4::transpose()
{
#ifdef __SSE__
MathUtil::transposeMatrix(col, col);
#else
MathUtil::transposeMatrix(m, m);
#endif
}
Mat4 Mat4::getTransposed() const

View File

@ -24,6 +24,10 @@
#include "math/Vec3.h"
#include "math/Vec4.h"
#ifdef __SSE__
#include <xmmintrin.h>
#endif
NS_CC_MATH_BEGIN
//class Plane;
@ -77,7 +81,14 @@ public:
/**
* Stores the columns of this 4x4 matrix.
* */
#ifdef __SSE__
union {
__m128 col[4];
float m[16];
};
#else
float m[16];
#endif
/**
* Constructs a matrix initialized to the identity matrix:

View File

@ -21,6 +21,10 @@
#ifndef MATHUTIL_H_
#define MATHUTIL_H_
#ifdef __SSE__
#include <xmmintrin.h>
#endif
#include "CCMathBase.h"
NS_CC_MATH_BEGIN
@ -67,7 +71,23 @@ public:
static void smooth(float* x, float target, float elapsedTime, float riseTime, float fallTime);
private:
#ifdef __SSE__
inline static void addMatrix(const __m128 m[4], float scalar, __m128 dst[4]);
inline static void addMatrix(const __m128 m1[4], const __m128 m2[4], __m128 dst[4]);
inline static void subtractMatrix(const __m128 m1[4], const __m128 m2[4], __m128 dst[4]);
inline static void multiplyMatrix(const __m128 m[4], float scalar, __m128 dst[4]);
inline static void multiplyMatrix(const __m128 m1[4], const __m128 m2[4], __m128 dst[4]);
inline static void negateMatrix(const __m128 m[4], __m128 dst[4]);
inline static void transposeMatrix(const __m128 m[4], __m128 dst[4]);
inline static void transformVec4(const __m128 m[4], const __m128& v, __m128& dst);
#endif
inline static void addMatrix(const float* m, float scalar, float* dst);
inline static void addMatrix(const float* m1, const float* m2, float* dst);
@ -99,6 +119,9 @@ NS_CC_MATH_END
#include "MathUtilNeon.inl"
#else
#include "MathUtil.inl"
#if defined(__SSE__)
#include "MathUtilSSE.inl"
#endif
#endif
#endif

152
cocos/math/MathUtilSSE.inl Normal file
View File

@ -0,0 +1,152 @@
NS_CC_MATH_BEGIN
inline void MathUtil::addMatrix(const __m128 m[4], float scalar, __m128 dst[4])
{
__m128 s = _mm_set1_ps(scalar);
dst[0] = _mm_add_ps(m[0], s);
dst[1] = _mm_add_ps(m[1], s);
dst[2] = _mm_add_ps(m[2], s);
dst[3] = _mm_add_ps(m[3], s);
}
inline void MathUtil::addMatrix(const __m128 m1[4], const __m128 m2[4], __m128 dst[4])
{
dst[0] = _mm_add_ps(m1[0], m2[0]);
dst[1] = _mm_add_ps(m1[1], m2[1]);
dst[2] = _mm_add_ps(m1[2], m2[2]);
dst[3] = _mm_add_ps(m1[3], m2[3]);
}
inline void MathUtil::subtractMatrix(const __m128 m1[4], const __m128 m2[4], __m128 dst[4])
{
dst[0] = _mm_sub_ps(m1[0], m2[0]);
dst[1] = _mm_sub_ps(m1[1], m2[1]);
dst[2] = _mm_sub_ps(m1[2], m2[2]);
dst[3] = _mm_sub_ps(m1[3], m2[3]);
}
inline void MathUtil::multiplyMatrix(const __m128 m[4], float scalar, __m128 dst[4])
{
__m128 s = _mm_set1_ps(scalar);
dst[0] = _mm_mul_ps(m[0], s);
dst[1] = _mm_mul_ps(m[1], s);
dst[2] = _mm_mul_ps(m[2], s);
dst[3] = _mm_mul_ps(m[3], s);
}
inline void MathUtil::multiplyMatrix(const __m128 m1[4], const __m128 m2[4], __m128 dst[4])
{
__m128 dst0, dst1, dst2, dst3;
{
__m128 e0 = _mm_shuffle_ps(m2[0], m2[0], _MM_SHUFFLE(0, 0, 0, 0));
__m128 e1 = _mm_shuffle_ps(m2[0], m2[0], _MM_SHUFFLE(1, 1, 1, 1));
__m128 e2 = _mm_shuffle_ps(m2[0], m2[0], _MM_SHUFFLE(2, 2, 2, 2));
__m128 e3 = _mm_shuffle_ps(m2[0], m2[0], _MM_SHUFFLE(3, 3, 3, 3));
__m128 v0 = _mm_mul_ps(m1[0], e0);
__m128 v1 = _mm_mul_ps(m1[1], e1);
__m128 v2 = _mm_mul_ps(m1[2], e2);
__m128 v3 = _mm_mul_ps(m1[3], e3);
__m128 a0 = _mm_add_ps(v0, v1);
__m128 a1 = _mm_add_ps(v2, v3);
__m128 a2 = _mm_add_ps(a0, a1);
dst0 = a2;
}
{
__m128 e0 = _mm_shuffle_ps(m2[1], m2[1], _MM_SHUFFLE(0, 0, 0, 0));
__m128 e1 = _mm_shuffle_ps(m2[1], m2[1], _MM_SHUFFLE(1, 1, 1, 1));
__m128 e2 = _mm_shuffle_ps(m2[1], m2[1], _MM_SHUFFLE(2, 2, 2, 2));
__m128 e3 = _mm_shuffle_ps(m2[1], m2[1], _MM_SHUFFLE(3, 3, 3, 3));
__m128 v0 = _mm_mul_ps(m1[0], e0);
__m128 v1 = _mm_mul_ps(m1[1], e1);
__m128 v2 = _mm_mul_ps(m1[2], e2);
__m128 v3 = _mm_mul_ps(m1[3], e3);
__m128 a0 = _mm_add_ps(v0, v1);
__m128 a1 = _mm_add_ps(v2, v3);
__m128 a2 = _mm_add_ps(a0, a1);
dst1 = a2;
}
{
__m128 e0 = _mm_shuffle_ps(m2[2], m2[2], _MM_SHUFFLE(0, 0, 0, 0));
__m128 e1 = _mm_shuffle_ps(m2[2], m2[2], _MM_SHUFFLE(1, 1, 1, 1));
__m128 e2 = _mm_shuffle_ps(m2[2], m2[2], _MM_SHUFFLE(2, 2, 2, 2));
__m128 e3 = _mm_shuffle_ps(m2[2], m2[2], _MM_SHUFFLE(3, 3, 3, 3));
__m128 v0 = _mm_mul_ps(m1[0], e0);
__m128 v1 = _mm_mul_ps(m1[1], e1);
__m128 v2 = _mm_mul_ps(m1[2], e2);
__m128 v3 = _mm_mul_ps(m1[3], e3);
__m128 a0 = _mm_add_ps(v0, v1);
__m128 a1 = _mm_add_ps(v2, v3);
__m128 a2 = _mm_add_ps(a0, a1);
dst2 = a2;
}
{
__m128 e0 = _mm_shuffle_ps(m2[3], m2[3], _MM_SHUFFLE(0, 0, 0, 0));
__m128 e1 = _mm_shuffle_ps(m2[3], m2[3], _MM_SHUFFLE(1, 1, 1, 1));
__m128 e2 = _mm_shuffle_ps(m2[3], m2[3], _MM_SHUFFLE(2, 2, 2, 2));
__m128 e3 = _mm_shuffle_ps(m2[3], m2[3], _MM_SHUFFLE(3, 3, 3, 3));
__m128 v0 = _mm_mul_ps(m1[0], e0);
__m128 v1 = _mm_mul_ps(m1[1], e1);
__m128 v2 = _mm_mul_ps(m1[2], e2);
__m128 v3 = _mm_mul_ps(m1[3], e3);
__m128 a0 = _mm_add_ps(v0, v1);
__m128 a1 = _mm_add_ps(v2, v3);
__m128 a2 = _mm_add_ps(a0, a1);
dst3 = a2;
}
dst[0] = dst0;
dst[1] = dst1;
dst[2] = dst2;
dst[3] = dst3;
}
inline void MathUtil::negateMatrix(const __m128 m[4], __m128 dst[4])
{
__m128 z = _mm_setzero_ps();
dst[0] = _mm_sub_ps(z, m[0]);
dst[1] = _mm_sub_ps(z, m[1]);
dst[2] = _mm_sub_ps(z, m[2]);
dst[3] = _mm_sub_ps(z, m[3]);
}
inline void MathUtil::transposeMatrix(const __m128 m[4], __m128 dst[4])
{
__m128 tmp0 = _mm_shuffle_ps(m[0], m[1], 0x44);
__m128 tmp2 = _mm_shuffle_ps(m[0], m[1], 0xEE);
__m128 tmp1 = _mm_shuffle_ps(m[2], m[3], 0x44);
__m128 tmp3 = _mm_shuffle_ps(m[2], m[3], 0xEE);
dst[0] = _mm_shuffle_ps(tmp0, tmp1, 0x88);
dst[1] = _mm_shuffle_ps(tmp0, tmp1, 0xDD);
dst[2] = _mm_shuffle_ps(tmp2, tmp3, 0x88);
dst[3] = _mm_shuffle_ps(tmp2, tmp3, 0xDD);
}
inline void MathUtil::transformVec4(const __m128 m[4], const __m128& v, __m128& dst)
{
__m128 col1 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0));
__m128 col2 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1));
__m128 col3 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2));
__m128 col4 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(3, 3, 3, 3));
dst = _mm_add_ps(
_mm_add_ps(_mm_mul_ps(m[0], col1), _mm_mul_ps(m[1], col2)),
_mm_add_ps(_mm_mul_ps(m[2], col3), _mm_mul_ps(m[3], col4))
);
}
NS_CC_MATH_END

View File

@ -21,6 +21,10 @@
#ifndef MATH_VEC4_H
#define MATH_VEC4_H
#ifdef __SSE__
#include <xmmintrin.h>
#endif
#include "math/CCMathBase.h"
NS_CC_MATH_BEGIN
@ -33,7 +37,17 @@ class Mat4;
class CC_DLL Vec4
{
public:
#ifdef __SSE__
union {
struct {
float x;
float y;
float z;
float w;
};
__m128 v;
};
#else
/**
* The x-coordinate.
*/
@ -53,7 +67,7 @@ public:
* The w-coordinate.
*/
float w;
#endif
/**
* Constructs a new vector initialized to all zeros.
*/

@ -1 +1 @@
Subproject commit c1db553615789a1545d495d0d0fd6f620547f99d
Subproject commit a8496d540c174236bec0d5a33457340571699f19

View File

@ -11,7 +11,7 @@ android_headers = -I%(androidndkdir)s/platforms/android-14/arch-arm/usr/include
android_flags = -D_SIZE_T_DEFINED_
clang_headers = -I%(clangllvmdir)s/lib/clang/3.3/include
clang_flags = -nostdinc -x c++ -std=c++11
clang_flags = -nostdinc -x c++ -std=c++11 -U __SSE__
cocos_headers = -I%(cocosdir)s/cocos -I%(cocosdir)s/cocos/platform/android
cocos_flags = -DANDROID

View File

@ -11,7 +11,7 @@ android_headers = -I%(androidndkdir)s/platforms/android-14/arch-arm/usr/include
android_flags = -D_SIZE_T_DEFINED_
clang_headers = -I%(clangllvmdir)s/lib/clang/3.3/include
clang_flags = -nostdinc -x c++ -std=c++11
clang_flags = -nostdinc -x c++ -std=c++11 -U __SSE__
cocos_headers = -I%(cocosdir)s/cocos -I%(cocosdir)s/cocos/platform/android
cocos_flags = -DANDROID

View File

@ -11,7 +11,7 @@ android_headers = -I%(androidndkdir)s/platforms/android-14/arch-arm/usr/include
android_flags = -D_SIZE_T_DEFINED_
clang_headers = -I%(clangllvmdir)s/lib/clang/3.3/include
clang_flags = -nostdinc -x c++ -std=c++11
clang_flags = -nostdinc -x c++ -std=c++11 -U __SSE__
cocos_headers = -I%(cocosdir)s -I%(cocosdir)s/cocos -I%(cocosdir)s/cocos/editor-support -I%(cocosdir)s/cocos/platform/android

View File

@ -11,7 +11,7 @@ android_headers = -I%(androidndkdir)s/platforms/android-14/arch-arm/usr/include
android_flags = -D_SIZE_T_DEFINED_
clang_headers = -I%(clangllvmdir)s/lib/clang/3.3/include
clang_flags = -nostdinc -x c++ -std=c++11
clang_flags = -nostdinc -x c++ -std=c++11 -U __SSE__
cocos_headers = -I%(cocosdir)s -I%(cocosdir)s/cocos -I%(cocosdir)s/cocos/platform/android

View File

@ -13,7 +13,7 @@ android_headers = -I%(androidndkdir)s/platforms/android-14/arch-arm/usr/include
android_flags = -D_SIZE_T_DEFINED_
clang_headers = -I%(clangllvmdir)s/lib/clang/3.3/include
clang_flags = -nostdinc -x c++ -std=c++11
clang_flags = -nostdinc -x c++ -std=c++11 -U __SSE__
cocos_headers = -I%(cocosdir)s/cocos -I%(cocosdir)s/cocos/base -I%(cocosdir)s/cocos/platform/android
cocos_flags = -DANDROID

View File

@ -11,7 +11,7 @@ android_headers = -I%(androidndkdir)s/platforms/android-14/arch-arm/usr/include
android_flags = -D_SIZE_T_DEFINED_
clang_headers = -I%(clangllvmdir)s/lib/clang/3.3/include
clang_flags = -nostdinc -x c++ -std=c++11
clang_flags = -nostdinc -x c++ -std=c++11 -U __SSE__
cocos_headers = -I%(cocosdir)s/cocos -I%(cocosdir)s/cocos/editor-support -I%(cocosdir)s/cocos/platform/android

View File

@ -13,7 +13,7 @@ android_headers = -I%(androidndkdir)s/platforms/android-14/arch-arm/usr/include
android_flags = -D_SIZE_T_DEFINED_
clang_headers = -I%(clangllvmdir)s/lib/clang/3.3/include
clang_flags = -nostdinc -x c++ -std=c++11
clang_flags = -nostdinc -x c++ -std=c++11 -U __SSE__
cocos_headers = -I%(cocosdir)s/cocos -I%(cocosdir)s/cocos/editor-support -I%(cocosdir)s/cocos/platform/android

View File

@ -11,7 +11,7 @@ android_headers = -I%(androidndkdir)s/platforms/android-14/arch-arm/usr/include
android_flags = -D_SIZE_T_DEFINED_
clang_headers = -I%(clangllvmdir)s/lib/clang/3.3/include
clang_flags = -nostdinc -x c++ -std=c++11
clang_flags = -nostdinc -x c++ -std=c++11 -U __SSE__
cocos_headers = -I%(cocosdir)s -I%(cocosdir)s/cocos/editor-support -I%(cocosdir)s/cocos -I%(cocosdir)s/cocos/platform/android

View File

@ -13,7 +13,7 @@ android_headers = -I%(androidndkdir)s/platforms/android-14/arch-arm/usr/include
android_flags = -D_SIZE_T_DEFINED_
clang_headers = -I%(clangllvmdir)s/lib/clang/3.3/include
clang_flags = -nostdinc -x c++ -std=c++11
clang_flags = -nostdinc -x c++ -std=c++11 -U __SSE__
cocos_headers = -I%(cocosdir)s/cocos -I%(cocosdir)s/cocos/platform/android

View File

@ -11,7 +11,7 @@ android_headers = -I%(androidndkdir)s/platforms/android-14/arch-arm/usr/include
android_flags = -D_SIZE_T_DEFINED_
clang_headers = -I%(clangllvmdir)s/lib/clang/3.3/include
clang_flags = -nostdinc -x c++ -std=c++11
clang_flags = -nostdinc -x c++ -std=c++11 -U __SSE__
cocos_headers = -I%(cocosdir)s/cocos -I%(cocosdir)s/cocos/editor-support -I%(cocosdir)s/cocos/platform/android

View File

@ -14,7 +14,7 @@ android_headers = -I%(androidndkdir)s/platforms/android-14/arch-arm/usr/include
android_flags = -D_SIZE_T_DEFINED_
clang_headers = -I%(clangllvmdir)s/lib/clang/3.3/include
clang_flags = -nostdinc -x c++ -std=c++11
clang_flags = -nostdinc -x c++ -std=c++11 -U __SSE__
cocos_headers = -I%(cocosdir)s/external -I%(cocosdir)s/cocos -I%(cocosdir)s/cocos/editor-support -I%(cocosdir)s/cocos/platform/android

View File

@ -14,7 +14,7 @@ android_headers = -I%(androidndkdir)s/platforms/android-14/arch-arm/usr/include
android_flags = -D_SIZE_T_DEFINED_
clang_headers = -I%(clangllvmdir)s/lib/clang/3.3/include
clang_flags = -nostdinc -x c++ -std=c++11
clang_flags = -nostdinc -x c++ -std=c++11 -U __SSE__
cocos_headers = -I%(cocosdir)s/cocos -I%(cocosdir)s/cocos/editor-support -I%(cocosdir)s/cocos/platform/android