mirror of https://github.com/axmolengine/axmol.git
Refactor math simd (#2070)
This commit is contained in:
parent
695ccc0357
commit
8fd2a551e8
|
@ -895,6 +895,7 @@ function setup_cmake($skipOS = $false, $scope = 'local') {
|
|||
else {
|
||||
& "$cmake_pkg_path" '--skip-license' '--prefix=/usr/local' 1>$null 2>$null
|
||||
}
|
||||
if (!$?) { Remove-Item $cmake_pkg_path -Force }
|
||||
}
|
||||
|
||||
$cmake_prog, $_ = find_prog -name 'cmake' -path $cmake_bin -silent $true
|
||||
|
|
|
@ -20,10 +20,16 @@ function(_1kfetch_init)
|
|||
set(_1kfetch_manifest "${_1kfetch_manifest}" CACHE STRING "" FORCE)
|
||||
endif()
|
||||
|
||||
if(NOT EXISTS ${PWSH_PROG}) # try again
|
||||
unset(PWSH_PROG CACHE)
|
||||
find_program(PWSH_PROG NAMES pwsh powershell NO_PACKAGE_ROOT_PATH NO_CMAKE_PATH NO_CMAKE_ENVIRONMENT_PATH NO_CMAKE_SYSTEM_PATH NO_CMAKE_FIND_ROOT_PATH)
|
||||
endif()
|
||||
|
||||
execute_process(COMMAND ${PWSH_PROG} ${CMAKE_CURRENT_FUNCTION_LIST_DIR}/resolv-uri.ps1
|
||||
-name "1kdist"
|
||||
-manifest ${_1kfetch_manifest}
|
||||
OUTPUT_VARIABLE _1kdist_url
|
||||
RESULT_VARIABLE _1kdist_error
|
||||
)
|
||||
|
||||
if(_1kdist_url)
|
||||
|
@ -33,7 +39,7 @@ function(_1kfetch_init)
|
|||
set(_1kdist_base_url "${_1kdist_base_url}/${_1kdist_ver}" PARENT_SCOPE)
|
||||
set(_1kdist_ver ${_1kdist_ver} PARENT_SCOPE)
|
||||
else()
|
||||
message(WARNING "Resolve 1kdist uri fail, the _1kfetch_dist will not work")
|
||||
message(WARNING "Resolve 1kdist uri fail, ${_1kdist_error}, the _1kfetch_dist will not work")
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
|
|
|
@ -248,7 +248,7 @@
|
|||
|
||||
## yasio
|
||||
- [![Upstream](https://img.shields.io/github/v/release/yasio/yasio?label=Upstream)](https://github.com/yasio/yasio)
|
||||
- Version: 4.2.3
|
||||
- Version: 4.2.4
|
||||
- License: MIT WITH Anti-996
|
||||
|
||||
## zlib
|
||||
|
|
|
@ -60,14 +60,14 @@ YASIO_NI_API void yasio_init_globals(void(YASIO_INTEROP_DECL* pfn)(int level, co
|
|||
YASIO_NI_API void yasio_cleanup_globals() { io_service::cleanup_globals(); }
|
||||
|
||||
struct yasio_io_event {
|
||||
int kind; //
|
||||
int channel;
|
||||
void* thandle;
|
||||
int kind; // event kind
|
||||
int channel; // channel index
|
||||
void* thandle; // transport
|
||||
union {
|
||||
void* msg;
|
||||
int status; //
|
||||
void* hmsg; // io_packet*
|
||||
int ec; // error code
|
||||
};
|
||||
void* user;
|
||||
void* user; // user data
|
||||
};
|
||||
|
||||
YASIO_NI_API void* yasio_create_service(int channel_count, void(YASIO_INTEROP_DECL* event_cb)(yasio_io_event* event), void* user)
|
||||
|
@ -82,9 +82,9 @@ YASIO_NI_API void* yasio_create_service(int channel_count, void(YASIO_INTEROP_DE
|
|||
event.thandle = e->transport();
|
||||
event.user = user;
|
||||
if (event.kind == yasio::YEK_ON_PACKET)
|
||||
event.msg = !is_packet_empty(pkt) ? &pkt : nullptr;
|
||||
event.hmsg = !is_packet_empty(pkt) ? &pkt : nullptr;
|
||||
else
|
||||
event.status = e->status();
|
||||
event.ec = e->status();
|
||||
event_cb(&event);
|
||||
});
|
||||
return service;
|
||||
|
@ -157,8 +157,12 @@ YASIO_NI_API void yasio_set_option(void* service_ptr, int opt, const char* pszAr
|
|||
std::array<cxx17::string_view, YASIO_MAX_OPTION_ARGC> args;
|
||||
int argc = 0;
|
||||
yasio::split_if(&strArgs.front(), ';', [&](char* s, char* e) {
|
||||
*e = '\0'; // to c style string
|
||||
args[argc++] = cxx17::string_view(s, e - s);
|
||||
if (e) {
|
||||
*e = '\0'; // to c style string
|
||||
args[argc++] = cxx17::string_view(s, e - s);
|
||||
} else {
|
||||
args[argc++] = cxx17::string_view{s};
|
||||
}
|
||||
return (argc < YASIO_MAX_OPTION_ARGC);
|
||||
});
|
||||
|
||||
|
|
|
@ -205,7 +205,7 @@ SOFTWARE.
|
|||
/*
|
||||
** The yasio version macros
|
||||
*/
|
||||
#define YASIO_VERSION_NUM 0x040203
|
||||
#define YASIO_VERSION_NUM 0x040204
|
||||
|
||||
/*
|
||||
** The macros used by io_service.
|
||||
|
|
|
@ -20,11 +20,11 @@
|
|||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <fcntl.h>
|
||||
#if defined(__GLIBC__) && (__GLIBC__ == 2 && __GLIBC_MINOR__ < 8)
|
||||
# include <asm/unistd.h>
|
||||
#else // __GLIBC__ == 2 && __GLIBC_MINOR__ < 8
|
||||
#if defined(__GLIBC__) && (__GLIBC__ == 2 && __GLIBC_MINOR__ < 8) && !defined(__UCLIBC__)
|
||||
# include <asm/unistd.h> // for syscall without API: eventfd
|
||||
#else
|
||||
# include <sys/eventfd.h>
|
||||
#endif // __GLIBC__ == 2 && __GLIBC_MINOR__ < 8
|
||||
#endif
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
|
@ -105,7 +105,7 @@ private:
|
|||
// Open the descriptors. Throws on error.
|
||||
inline void open_descriptors()
|
||||
{
|
||||
#if defined(__GLIBC__) && (__GLIBC__ == 2 && __GLIBC_MINOR__ < 8)
|
||||
#if defined(__GLIBC__) && (__GLIBC__ == 2 && __GLIBC_MINOR__ < 8) && !defined(__UCLIBC__)
|
||||
write_descriptor_ = read_descriptor_ = syscall(__NR_eventfd, 0);
|
||||
if (read_descriptor_ != -1)
|
||||
{
|
||||
|
|
|
@ -40,6 +40,9 @@ inline void yasio__print(std::string&& message) { ::write(::fileno(stdout), mess
|
|||
# include <android/log.h>
|
||||
# include <jni.h>
|
||||
# define YASIO_LOG_TAG(tag, format, ...) __android_log_print(ANDROID_LOG_INFO, "yasio", (tag format), ##__VA_ARGS__)
|
||||
#elif defined(__OHOS__)
|
||||
# include <hilog/log.h>
|
||||
# define YASIO_LOG_TAG(tag, format, ...) OH_LOG_INFO(LOG_APP, (tag format "\n"), ##__VA_ARGS__)
|
||||
#else
|
||||
# define YASIO_LOG_TAG(tag, format, ...) printf((tag format "\n"), ##__VA_ARGS__)
|
||||
#endif
|
||||
|
|
|
@ -209,7 +209,7 @@ int xxsocket::pserve(const endpoint& ep)
|
|||
if (!this->reopen(ep.af()))
|
||||
return -1;
|
||||
|
||||
set_optval(SOL_SOCKET, SO_REUSEADDR, 1);
|
||||
this->reuse_address(true);
|
||||
|
||||
int n = this->bind(ep);
|
||||
if (n != 0)
|
||||
|
|
|
@ -52,6 +52,7 @@ default is: `navigator.hardwareConcurrency`
|
|||
- AX_WASM_SHELL_FILE: specify the wasm shell file, by default use `${_AX_ROOT}/core/platform/wasm/shell_minimal.html`
|
||||
- AX_WASM_ENABLE_DEVTOOLS: whether enable web devtools aka `pause`, `resume`, `step` buttons in webpage, default: `TRUE`
|
||||
- AX_WASM_INITIAL_MEMORY: set the wasm initial memory size, default `1024MB`
|
||||
- AX_WASM_ISA_SIMD: specify the wasm simd intrinsics type, default `none`, supports `sse`, `neon`, note the `wasm-simd` not support by axmol yet
|
||||
|
||||
## The options for axmol apps
|
||||
|
||||
|
|
|
@ -186,22 +186,21 @@ endfunction()
|
|||
|
||||
if(EMSCRIPTEN)
|
||||
set(AX_WASM_THREADS "4" CACHE STRING "Wasm threads count")
|
||||
|
||||
set(_AX_WASM_THREADS_INT 0)
|
||||
set(_threads_hint "")
|
||||
if (AX_WASM_THREADS STREQUAL "auto") # not empty string or not 0
|
||||
# Enable pthread support globally
|
||||
set(_threads_hint "(auto)")
|
||||
include(ProcessorCount)
|
||||
set(_AX_WASM_THREADS_INT 0)
|
||||
ProcessorCount(_AX_WASM_THREADS_INT)
|
||||
elseif(AX_WASM_THREADS MATCHES "^([0-9]+)$" OR AX_WASM_THREADS STREQUAL "navigator.hardwareConcurrency")
|
||||
set(_AX_WASM_THREADS_INT ${AX_WASM_THREADS})
|
||||
set(AX_WASM_THREADS "${_AX_WASM_THREADS_INT}" CACHE STRING "Wasm threads count" FORCE)
|
||||
endif()
|
||||
|
||||
message(STATUS "AX_WASM_THREADS=${AX_WASM_THREADS}")
|
||||
message(STATUS "_AX_WASM_THREADS_INT=${_AX_WASM_THREADS_INT}")
|
||||
message(STATUS "AX_WASM_THREADS=${AX_WASM_THREADS}${_threads_hint}")
|
||||
|
||||
if (_AX_WASM_THREADS_INT)
|
||||
if(AX_WASM_THREADS MATCHES "^([0-9]+)$" OR AX_WASM_THREADS STREQUAL "navigator.hardwareConcurrency")
|
||||
list(APPEND _ax_compile_options -pthread)
|
||||
add_link_options(-pthread -sPTHREAD_POOL_SIZE=${_AX_WASM_THREADS_INT})
|
||||
add_link_options(-pthread -sPTHREAD_POOL_SIZE=${AX_WASM_THREADS})
|
||||
endif()
|
||||
|
||||
set(AX_WASM_INITIAL_MEMORY "1024MB" CACHE STRING "")
|
||||
|
|
|
@ -400,9 +400,43 @@ if(WINDOWS)
|
|||
endif()
|
||||
endif()
|
||||
|
||||
# AX_USE_SSE
|
||||
if (AX_ISA_SIMD MATCHES "sse")
|
||||
target_compile_definitions(${_AX_CORE_LIB} PUBLIC AX_USE_SSE=1)
|
||||
# axmol math simd intrinsics support
|
||||
set(_simdc_defines)
|
||||
set(_simdc_options)
|
||||
if (NOT WASM) # native platforms auto detect from cmake or preprocessor check
|
||||
if (AX_ISA_SIMD MATCHES "sse")
|
||||
list(APPEND _simdc_defines AX_SSE_INTRINSICS=1)
|
||||
if (AX_ISA_SIMD MATCHES "sse4")
|
||||
list(APPEND _simdc_defines __SSE4_1__=1)
|
||||
if (LINUX)
|
||||
list(APPEND _simdc_options -msse4.1)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
else() # wasm requires user specify SIMD intrinsics manually
|
||||
set(AX_WASM_ISA_SIMD "none" CACHE STRING "")
|
||||
string(TOLOWER ${AX_WASM_ISA_SIMD} AX_WASM_ISA_SIMD)
|
||||
if(AX_WASM_ISA_SIMD MATCHES "sse")
|
||||
message(AUTHOR_WARNING "Using SSE intrinsics for WASM ...")
|
||||
list(APPEND _simdc_defines AX_SSE_INTRINSICS=1 __SSE__=1 __SSE2__=1)
|
||||
list(APPEND _simdc_options -msse -msse2)
|
||||
if(AX_ISA_LEVEL GREATER_EQUAL 2)
|
||||
list(APPEND _simdc_defines __SSE4_1__=1)
|
||||
list(APPEND _simdc_options -msse4.1)
|
||||
endif()
|
||||
list(APPEND _simdc_options -msimd128)
|
||||
elseif(AX_WASM_ISA_SIMD MATCHES "neon")
|
||||
message(AUTHOR_WARNING "Using NEON intrinsics for WASM ...")
|
||||
list(APPEND _simdc_defines AX_NEON_INTRINSICS=1)
|
||||
list(APPEND _simdc_options -mfpu=neon -msimd128)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(_simdc_defines)
|
||||
target_compile_definitions(${_AX_CORE_LIB} PUBLIC ${_simdc_defines})
|
||||
if(_simdc_options)
|
||||
target_compile_options(${_AX_CORE_LIB} PUBLIC ${_simdc_options})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# engine extensions
|
||||
|
|
|
@ -71,7 +71,7 @@ bool Configuration::init()
|
|||
#if AX_ENABLE_PROFILERS
|
||||
_valueDict["axmol.compiled_with_profiler"] = Value(true);
|
||||
#else
|
||||
_valueDict["axmol.compiled_with_profiler"] = Value(false);
|
||||
_valueDict["axmol.compiled_with_profiler"] = Value(false);
|
||||
#endif
|
||||
|
||||
#if AX_ENABLE_GL_STATE_CACHE == 0
|
||||
|
@ -83,7 +83,17 @@ bool Configuration::init()
|
|||
#if _AX_DEBUG
|
||||
_valueDict["axmol.build_type"] = Value("DEBUG");
|
||||
#else
|
||||
_valueDict["axmol.build_type"] = Value("RELEASE");
|
||||
_valueDict["axmol.build_type"] = Value("RELEASE");
|
||||
#endif
|
||||
|
||||
#if defined(AX_SSE_INTRINSICS)
|
||||
# if defined(__SSE4_1__)
|
||||
_valueDict["axmol.simd"] = Value("SSE41");
|
||||
# else
|
||||
_valueDict["axmol.simd"] = Value("SSE2");
|
||||
# endif
|
||||
#elif defined(AX_NEON_INTRINSICS)
|
||||
_valueDict["axmol.simd"] = Value("NEON");
|
||||
#endif
|
||||
|
||||
return true;
|
||||
|
|
|
@ -398,7 +398,7 @@ bool Console::listenOnTCP(int port)
|
|||
if (sock.pserve(ep) != 0)
|
||||
{
|
||||
int ec = xxsocket::get_last_errno();
|
||||
AXLOGW("Console: open server failed, ec:{}", ec);
|
||||
AXLOGW("Console: open server failed, ec:{}, {}", ec, xxsocket::strerror(ec));
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
|
||||
Original file from GamePlay3D: http://gameplay3d.org
|
||||
|
||||
This file was modified to fit the cocos2d-x project
|
||||
This file was modified to fit the axmol project
|
||||
*/
|
||||
|
||||
#include "math/Mat4.h"
|
||||
|
@ -459,11 +459,7 @@ void Mat4::add(float scalar)
|
|||
void Mat4::add(float scalar, Mat4* dst)
|
||||
{
|
||||
GP_ASSERT(dst);
|
||||
#ifdef AX_USE_SSE
|
||||
MathUtil::addMatrix(col, scalar, dst->col);
|
||||
#else
|
||||
MathUtil::addMatrix(m, scalar, dst->m);
|
||||
#endif
|
||||
}
|
||||
|
||||
void Mat4::add(const Mat4& mat)
|
||||
|
@ -474,11 +470,7 @@ void Mat4::add(const Mat4& mat)
|
|||
void Mat4::add(const Mat4& m1, const Mat4& m2, Mat4* dst)
|
||||
{
|
||||
GP_ASSERT(dst);
|
||||
#ifdef AX_USE_SSE
|
||||
MathUtil::addMatrix(m1.col, m2.col, dst->col);
|
||||
#else
|
||||
MathUtil::addMatrix(m1.m, m2.m, dst->m);
|
||||
#endif
|
||||
}
|
||||
|
||||
bool Mat4::decompose(Vec3* scale, Quaternion* rotation, Vec3* translation) const
|
||||
|
@ -751,11 +743,7 @@ void Mat4::multiply(float scalar, Mat4* dst) const
|
|||
void Mat4::multiply(const Mat4& m, float scalar, Mat4* dst)
|
||||
{
|
||||
GP_ASSERT(dst);
|
||||
#ifdef AX_USE_SSE
|
||||
MathUtil::multiplyMatrix(m.col, scalar, dst->col);
|
||||
#else
|
||||
MathUtil::multiplyMatrix(m.m, scalar, dst->m);
|
||||
#endif
|
||||
}
|
||||
|
||||
void Mat4::multiply(const Mat4& mat)
|
||||
|
@ -766,20 +754,12 @@ void Mat4::multiply(const Mat4& mat)
|
|||
void Mat4::multiply(const Mat4& m1, const Mat4& m2, Mat4* dst)
|
||||
{
|
||||
GP_ASSERT(dst);
|
||||
#ifdef AX_USE_SSE
|
||||
MathUtil::multiplyMatrix(m1.col, m2.col, dst->col);
|
||||
#else
|
||||
MathUtil::multiplyMatrix(m1.m, m2.m, dst->m);
|
||||
#endif
|
||||
}
|
||||
|
||||
void Mat4::negate()
|
||||
{
|
||||
#ifdef AX_USE_SSE
|
||||
MathUtil::negateMatrix(col, col);
|
||||
#else
|
||||
MathUtil::negateMatrix(m, m);
|
||||
#endif
|
||||
}
|
||||
|
||||
Mat4 Mat4::getNegated() const
|
||||
|
@ -945,11 +925,7 @@ void Mat4::subtract(const Mat4& mat)
|
|||
void Mat4::subtract(const Mat4& m1, const Mat4& m2, Mat4* dst)
|
||||
{
|
||||
GP_ASSERT(dst);
|
||||
#ifdef AX_USE_SSE
|
||||
MathUtil::subtractMatrix(m1.col, m2.col, dst->col);
|
||||
#else
|
||||
MathUtil::subtractMatrix(m1.m, m2.m, dst->m);
|
||||
#endif
|
||||
}
|
||||
|
||||
void Mat4::transformVector(Vec3* vector) const
|
||||
|
@ -967,7 +943,7 @@ void Mat4::transformVector(float x, float y, float z, float w, Vec3* dst) const
|
|||
{
|
||||
GP_ASSERT(dst);
|
||||
|
||||
MathUtil::transformVec4(m, x, y, z, w, (float*)dst);
|
||||
MathUtil::transformVec4(m, x, y, z, w, reinterpret_cast<float*>(dst));
|
||||
}
|
||||
|
||||
void Mat4::transformVector(Vec4* vector) const
|
||||
|
@ -979,14 +955,7 @@ void Mat4::transformVector(Vec4* vector) const
|
|||
void Mat4::transformVector(const Vec4& vector, Vec4* dst) const
|
||||
{
|
||||
GP_ASSERT(dst);
|
||||
#ifdef AX_USE_SSE
|
||||
alignas(16) Vec4 inVal{vector};
|
||||
alignas(16) Vec4 outVal;
|
||||
MathUtil::transformVec4(col, reinterpret_cast<const __m128&>(inVal), reinterpret_cast<__m128&>(outVal));
|
||||
*dst = outVal;
|
||||
#else
|
||||
MathUtil::transformVec4(m, (const float*)&vector, (float*)dst);
|
||||
#endif
|
||||
MathUtil::transformVec4(m, reinterpret_cast<const float*>(&vector), reinterpret_cast<float*>(dst));
|
||||
}
|
||||
|
||||
void Mat4::translate(float x, float y, float z)
|
||||
|
@ -1013,11 +982,7 @@ void Mat4::translate(const Vec3& t, Mat4* dst) const
|
|||
|
||||
void Mat4::transpose()
|
||||
{
|
||||
#ifdef AX_USE_SSE
|
||||
MathUtil::transposeMatrix(col, col);
|
||||
#else
|
||||
MathUtil::transposeMatrix(m, m);
|
||||
#endif
|
||||
}
|
||||
|
||||
Mat4 Mat4::getTransposed() const
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
Original file from GamePlay3D: http://gameplay3d.org
|
||||
|
||||
This file was modified to fit the cocos2d-x project
|
||||
This file was modified to fit the axmol project
|
||||
*/
|
||||
|
||||
#ifndef MATH_MAT4_H
|
||||
|
@ -29,10 +29,6 @@
|
|||
#include "math/Vec3.h"
|
||||
#include "math/Vec4.h"
|
||||
|
||||
#ifdef AX_USE_SSE
|
||||
# include <xmmintrin.h>
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @addtogroup base
|
||||
* @{
|
||||
|
@ -73,7 +69,7 @@ NS_AX_MATH_BEGIN
|
|||
*
|
||||
* @see Transform
|
||||
*/
|
||||
#ifdef AX_USE_SSE
|
||||
#if defined(AX_SSE_INTRINSICS) || defined(AX_NEON_INTRINSICS)
|
||||
class AX_DLL alignas(16) Mat4
|
||||
#else
|
||||
class AX_DLL Mat4
|
||||
|
@ -95,10 +91,10 @@ public:
|
|||
/**
|
||||
* Stores the columns of this 4x4 matrix.
|
||||
* */
|
||||
#ifdef AX_USE_SSE
|
||||
#if defined(AX_SSE_INTRINSICS) || defined(AX_NEON_INTRINSICS)
|
||||
union
|
||||
{
|
||||
__m128 col[4];
|
||||
_xm128_t col[4];
|
||||
float m[16];
|
||||
};
|
||||
#else
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
/**
|
||||
Copyright 2013 BlackBerry Inc.
|
||||
Copyright (c) 2019-present Axmol Engine contributors (see AUTHORS.md).
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
|
@ -15,7 +16,7 @@
|
|||
|
||||
Original file from GamePlay3D: http://gameplay3d.org
|
||||
|
||||
This file was modified to fit the cocos2d-x project
|
||||
This file was modified to fit the axmol project
|
||||
*/
|
||||
|
||||
#include "math/Mat4.h"
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
/****************************************************************************
|
||||
Copyright (c) 2017-2018 Xiamen Yaji Software Co., Ltd.
|
||||
Copyright (c) 2019-present Axmol Engine contributors (see AUTHORS.md).
|
||||
|
||||
https://axmol.dev/
|
||||
|
||||
|
@ -22,46 +23,47 @@
|
|||
THE SOFTWARE.
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef __CCMATHBASE_H__
|
||||
#define __CCMATHBASE_H__
|
||||
#ifndef __AXMATHBASE_H__
|
||||
#define __AXMATHBASE_H__
|
||||
|
||||
#include <memory>
|
||||
#include <string.h>
|
||||
#include "platform/PlatformMacros.h"
|
||||
|
||||
/**
|
||||
* @addtogroup base
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**Util macro for conversion from degrees to radians.*/
|
||||
#define MATH_DEG_TO_RAD(x) ((x)*0.0174532925f)
|
||||
#define MATH_DEG_TO_RAD(x) ((x) * 0.0174532925f)
|
||||
/**Util macro for conversion from radians to degrees.*/
|
||||
#define MATH_RAD_TO_DEG(x) ((x)*57.29577951f)
|
||||
#define MATH_RAD_TO_DEG(x) ((x) * 57.29577951f)
|
||||
/**
|
||||
@{ Util macro for const float such as epsilon, small float and float precision tolerance.
|
||||
*/
|
||||
#define MATH_FLOAT_SMALL 1.0e-37f
|
||||
#define MATH_TOLERANCE 2e-37f
|
||||
#define MATH_PIOVER2 1.57079632679489661923f
|
||||
#define MATH_EPSILON 0.000001f
|
||||
#define MATH_TOLERANCE 2e-37f
|
||||
#define MATH_PIOVER2 1.57079632679489661923f
|
||||
#define MATH_EPSILON 0.000001f
|
||||
/**@}*/
|
||||
|
||||
//#define MATH_PIOVER4 0.785398163397448309616f
|
||||
//#define MATH_PIX2 6.28318530717958647693f
|
||||
//#define MATH_E 2.71828182845904523536f
|
||||
//#define MATH_LOG10E 0.4342944819032518f
|
||||
//#define MATH_LOG2E 1.442695040888963387f
|
||||
//#define MATH_PI 3.14159265358979323846f
|
||||
//#define MATH_RANDOM_MINUS1_1() ((2.0f*((float)rand()/RAND_MAX))-1.0f) // Returns a random float between -1
|
||||
// and 1. #define MATH_RANDOM_0_1() ((float)rand()/RAND_MAX) // Returns a random float
|
||||
// between 0 and 1. #define MATH_CLAMP(x, lo, hi) ((x < lo) ? lo : ((x > hi) ? hi : x)) #ifndef M_1_PI #define
|
||||
// M_1_PI 0.31830988618379067154
|
||||
// #define MATH_PIOVER4 0.785398163397448309616f
|
||||
// #define MATH_PIX2 6.28318530717958647693f
|
||||
// #define MATH_E 2.71828182845904523536f
|
||||
// #define MATH_LOG10E 0.4342944819032518f
|
||||
// #define MATH_LOG2E 1.442695040888963387f
|
||||
// #define MATH_PI 3.14159265358979323846f
|
||||
// #define MATH_RANDOM_MINUS1_1() ((2.0f*((float)rand()/RAND_MAX))-1.0f) // Returns a random float between -1
|
||||
// and 1. #define MATH_RANDOM_0_1() ((float)rand()/RAND_MAX) // Returns a random float
|
||||
// between 0 and 1. #define MATH_CLAMP(x, lo, hi) ((x < lo) ? lo : ((x > hi) ? hi : x)) #ifndef M_1_PI #define
|
||||
// M_1_PI 0.31830988618379067154
|
||||
|
||||
#ifdef __cplusplus
|
||||
# define NS_AX_MATH_BEGIN \
|
||||
namespace ax \
|
||||
namespace ax \
|
||||
{
|
||||
# define NS_AX_MATH_END }
|
||||
# define NS_AX_MATH_END }
|
||||
# define USING_NS_AX_MATH using namespace ax
|
||||
#else
|
||||
# define NS_AX_MATH_BEGIN
|
||||
|
|
|
@ -17,7 +17,7 @@ limitations under the License.
|
|||
|
||||
Original file from GamePlay3D: http://gameplay3d.org
|
||||
|
||||
This file was modified to fit the cocos2d-x project
|
||||
This file was modified to fit the axmol project
|
||||
*/
|
||||
|
||||
#include "math/MathUtil.h"
|
||||
|
@ -28,50 +28,10 @@ This file was modified to fit the cocos2d-x project
|
|||
# include <cpu-features.h>
|
||||
#endif
|
||||
|
||||
//#define USE_NEON32 : neon 32 code will be used
|
||||
//#define USE_NEON64 : neon 64 code will be used
|
||||
//#define INCLUDE_NEON32 : neon 32 code included
|
||||
//#define INCLUDE_NEON64 : neon 64 code included
|
||||
//#define USE_SSE : SSE code used
|
||||
//#define INCLUDE_SSE : SSE code included
|
||||
|
||||
#if (AX_TARGET_PLATFORM == AX_PLATFORM_IOS)
|
||||
# if defined(__arm64__)
|
||||
# define USE_NEON64 1
|
||||
# define INCLUDE_NEON64 1
|
||||
# elif defined(__ARM_NEON__)
|
||||
# define USE_NEON32 1
|
||||
# define INCLUDE_NEON32 1
|
||||
# endif
|
||||
#elif (AX_TARGET_PLATFORM == AX_PLATFORM_OSX)
|
||||
# if defined(__arm64__) || defined(__aarch64__)
|
||||
# define USE_NEON64 1
|
||||
# define INCLUDE_NEON64 1
|
||||
# endif
|
||||
#elif (AX_TARGET_PLATFORM == AX_PLATFORM_ANDROID)
|
||||
# if defined(__arm64__) || defined(__aarch64__)
|
||||
# define USE_NEON64 1
|
||||
# define INCLUDE_NEON64 1
|
||||
# elif defined(__ARM_NEON__)
|
||||
# define INCLUDE_NEON32 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(AX_USE_SSE)
|
||||
# define USE_SSE 1
|
||||
# define INCLUDE_SSE 1
|
||||
#endif
|
||||
|
||||
#ifdef INCLUDE_NEON32
|
||||
# include "math/MathUtilNeon.inl"
|
||||
#endif
|
||||
|
||||
#ifdef INCLUDE_NEON64
|
||||
# include "math/MathUtilNeon64.inl"
|
||||
#endif
|
||||
|
||||
#ifdef INCLUDE_SSE
|
||||
#if defined(AX_SSE_INTRINSICS)
|
||||
# include "math/MathUtilSSE.inl"
|
||||
#elif defined(AX_NEON_INTRINSICS)
|
||||
# include "math/MathUtilNeon.inl"
|
||||
#endif
|
||||
|
||||
#include "math/MathUtil.inl"
|
||||
|
@ -106,9 +66,8 @@ float MathUtil::lerp(float from, float to, float alpha)
|
|||
|
||||
bool MathUtil::isNeon32Enabled()
|
||||
{
|
||||
#ifdef USE_NEON32
|
||||
return true;
|
||||
#elif (defined(INCLUDE_NEON32) && (AX_TARGET_PLATFORM == AX_PLATFORM_ANDROID))
|
||||
#if defined(AX_NEON_INTRINSICS) && !AX_64BITS
|
||||
# if AX_NEON_INTRINSICS == 1 && AX_TARGET_PLATFORM == AX_PLATFORM_ANDROID
|
||||
class AnrdoidNeonChecker
|
||||
{
|
||||
public:
|
||||
|
@ -127,15 +86,9 @@ bool MathUtil::isNeon32Enabled()
|
|||
};
|
||||
static AnrdoidNeonChecker checker;
|
||||
return checker.isNeonEnabled();
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool MathUtil::isNeon64Enabled()
|
||||
{
|
||||
#ifdef USE_NEON64
|
||||
# else
|
||||
return true;
|
||||
# endif
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
|
@ -143,15 +96,17 @@ bool MathUtil::isNeon64Enabled()
|
|||
|
||||
void MathUtil::addMatrix(const float* m, float scalar, float* dst)
|
||||
{
|
||||
#ifdef USE_NEON32
|
||||
MathUtilNeon::addMatrix(m, scalar, dst);
|
||||
#elif defined(USE_NEON64)
|
||||
MathUtilNeon64::addMatrix(m, scalar, dst);
|
||||
#elif defined(INCLUDE_NEON32)
|
||||
#if defined(AX_SSE_INTRINSICS)
|
||||
MathUtilSSE::addMatrix(reinterpret_cast<const _xm128_t*>(m), scalar, reinterpret_cast<_xm128_t*>(dst));
|
||||
#elif defined(AX_NEON_INTRINSICS)
|
||||
# if AX_64BITS || AX_NEON_INTRINSICS > 1
|
||||
MathUtilNeon::addMatrix(reinterpret_cast<const _xm128_t*>(m), scalar, reinterpret_cast<_xm128_t*>(dst));
|
||||
# else
|
||||
if (isNeon32Enabled())
|
||||
MathUtilNeon::addMatrix(m, scalar, dst);
|
||||
MathUtilNeon::addMatrix(reinterpret_cast<const _xm128_t*>(m), scalar, reinterpret_cast<_xm128_t*>(dst));
|
||||
else
|
||||
MathUtilC::addMatrix(m, scalar, dst);
|
||||
# endif
|
||||
#else
|
||||
MathUtilC::addMatrix(m, scalar, dst);
|
||||
#endif
|
||||
|
@ -159,15 +114,20 @@ void MathUtil::addMatrix(const float* m, float scalar, float* dst)
|
|||
|
||||
void MathUtil::addMatrix(const float* m1, const float* m2, float* dst)
|
||||
{
|
||||
#ifdef USE_NEON32
|
||||
MathUtilNeon::addMatrix(m1, m2, dst);
|
||||
#elif defined(USE_NEON64)
|
||||
MathUtilNeon64::addMatrix(m1, m2, dst);
|
||||
#elif defined(INCLUDE_NEON32)
|
||||
#if defined(AX_SSE_INTRINSICS)
|
||||
MathUtilSSE::addMatrix(reinterpret_cast<const _xm128_t*>(m1), reinterpret_cast<const _xm128_t*>(m2),
|
||||
reinterpret_cast<_xm128_t*>(dst));
|
||||
#elif defined(AX_NEON_INTRINSICS)
|
||||
# if AX_64BITS || AX_NEON_INTRINSICS > 1
|
||||
MathUtilNeon::addMatrix(reinterpret_cast<const _xm128_t*>(m1), reinterpret_cast<const _xm128_t*>(m2),
|
||||
reinterpret_cast<_xm128_t*>(dst));
|
||||
# else
|
||||
if (isNeon32Enabled())
|
||||
MathUtilNeon::addMatrix(m1, m2, dst);
|
||||
MathUtilNeon::addMatrix(reinterpret_cast<const _xm128_t*>(m1), reinterpret_cast<const _xm128_t*>(m2),
|
||||
reinterpret_cast<_xm128_t*>(dst));
|
||||
else
|
||||
MathUtilC::addMatrix(m1, m2, dst);
|
||||
# endif
|
||||
#else
|
||||
MathUtilC::addMatrix(m1, m2, dst);
|
||||
#endif
|
||||
|
@ -175,15 +135,20 @@ void MathUtil::addMatrix(const float* m1, const float* m2, float* dst)
|
|||
|
||||
void MathUtil::subtractMatrix(const float* m1, const float* m2, float* dst)
|
||||
{
|
||||
#ifdef USE_NEON32
|
||||
MathUtilNeon::subtractMatrix(m1, m2, dst);
|
||||
#elif defined(USE_NEON64)
|
||||
MathUtilNeon64::subtractMatrix(m1, m2, dst);
|
||||
#elif defined(INCLUDE_NEON32)
|
||||
#if defined(AX_SSE_INTRINSICS)
|
||||
MathUtilSSE::subtractMatrix(reinterpret_cast<const _xm128_t*>(m1), reinterpret_cast<const _xm128_t*>(m2),
|
||||
reinterpret_cast<_xm128_t*>(dst));
|
||||
#elif defined(AX_NEON_INTRINSICS)
|
||||
# if AX_64BITS || AX_NEON_INTRINSICS > 1
|
||||
MathUtilNeon::subtractMatrix(reinterpret_cast<const _xm128_t*>(m1), reinterpret_cast<const _xm128_t*>(m2),
|
||||
reinterpret_cast<_xm128_t*>(dst));
|
||||
# else
|
||||
if (isNeon32Enabled())
|
||||
MathUtilNeon::subtractMatrix(m1, m2, dst);
|
||||
MathUtilNeon::subtractMatrix(reinterpret_cast<const _xm128_t*>(m1), reinterpret_cast<const _xm128_t*>(m2),
|
||||
reinterpret_cast<_xm128_t*>(dst));
|
||||
else
|
||||
MathUtilC::subtractMatrix(m1, m2, dst);
|
||||
# endif
|
||||
#else
|
||||
MathUtilC::subtractMatrix(m1, m2, dst);
|
||||
#endif
|
||||
|
@ -191,15 +156,17 @@ void MathUtil::subtractMatrix(const float* m1, const float* m2, float* dst)
|
|||
|
||||
void MathUtil::multiplyMatrix(const float* m, float scalar, float* dst)
|
||||
{
|
||||
#ifdef USE_NEON32
|
||||
MathUtilNeon::multiplyMatrix(m, scalar, dst);
|
||||
#elif defined(USE_NEON64)
|
||||
MathUtilNeon64::multiplyMatrix(m, scalar, dst);
|
||||
#elif defined(INCLUDE_NEON32)
|
||||
#if defined(AX_SSE_INTRINSICS)
|
||||
MathUtilSSE::multiplyMatrix(reinterpret_cast<const _xm128_t*>(m), scalar, reinterpret_cast<_xm128_t*>(dst));
|
||||
#elif defined(AX_NEON_INTRINSICS)
|
||||
# if AX_64BITS || AX_NEON_INTRINSICS > 1
|
||||
MathUtilNeon::multiplyMatrix(reinterpret_cast<const _xm128_t*>(m), scalar, reinterpret_cast<_xm128_t*>(dst));
|
||||
# else
|
||||
if (isNeon32Enabled())
|
||||
MathUtilNeon::multiplyMatrix(m, scalar, dst);
|
||||
MathUtilNeon::multiplyMatrix(reinterpret_cast<const _xm128_t*>(m), scalar, reinterpret_cast<_xm128_t*>(dst));
|
||||
else
|
||||
MathUtilC::multiplyMatrix(m, scalar, dst);
|
||||
# endif
|
||||
#else
|
||||
MathUtilC::multiplyMatrix(m, scalar, dst);
|
||||
#endif
|
||||
|
@ -207,15 +174,20 @@ void MathUtil::multiplyMatrix(const float* m, float scalar, float* dst)
|
|||
|
||||
void MathUtil::multiplyMatrix(const float* m1, const float* m2, float* dst)
|
||||
{
|
||||
#ifdef USE_NEON32
|
||||
MathUtilNeon::multiplyMatrix(m1, m2, dst);
|
||||
#elif defined(USE_NEON64)
|
||||
MathUtilNeon64::multiplyMatrix(m1, m2, dst);
|
||||
#elif defined(INCLUDE_NEON32)
|
||||
#if defined(AX_SSE_INTRINSICS)
|
||||
MathUtilSSE::multiplyMatrix(reinterpret_cast<const _xm128_t*>(m1), reinterpret_cast<const _xm128_t*>(m2),
|
||||
reinterpret_cast<_xm128_t*>(dst));
|
||||
#elif defined(AX_NEON_INTRINSICS)
|
||||
# if AX_64BITS || AX_NEON_INTRINSICS > 1
|
||||
MathUtilNeon::multiplyMatrix(reinterpret_cast<const _xm128_t*>(m1), reinterpret_cast<const _xm128_t*>(m2),
|
||||
reinterpret_cast<_xm128_t*>(dst));
|
||||
# else
|
||||
if (isNeon32Enabled())
|
||||
MathUtilNeon::multiplyMatrix(m1, m2, dst);
|
||||
MathUtilNeon::multiplyMatrix(reinterpret_cast<const _xm128_t*>(m1), reinterpret_cast<const _xm128_t*>(m2),
|
||||
reinterpret_cast<_xm128_t*>(dst));
|
||||
else
|
||||
MathUtilC::multiplyMatrix(m1, m2, dst);
|
||||
# endif
|
||||
#else
|
||||
MathUtilC::multiplyMatrix(m1, m2, dst);
|
||||
#endif
|
||||
|
@ -223,15 +195,17 @@ void MathUtil::multiplyMatrix(const float* m1, const float* m2, float* dst)
|
|||
|
||||
void MathUtil::negateMatrix(const float* m, float* dst)
|
||||
{
|
||||
#ifdef USE_NEON32
|
||||
MathUtilNeon::negateMatrix(m, dst);
|
||||
#elif defined(USE_NEON64)
|
||||
MathUtilNeon64::negateMatrix(m, dst);
|
||||
#elif defined(INCLUDE_NEON32)
|
||||
#if defined(AX_SSE_INTRINSICS)
|
||||
MathUtilSSE::negateMatrix(reinterpret_cast<const _xm128_t*>(m), reinterpret_cast<_xm128_t*>(dst));
|
||||
#elif defined(AX_NEON_INTRINSICS)
|
||||
# if AX_64BITS || AX_NEON_INTRINSICS > 1
|
||||
MathUtilNeon::negateMatrix(reinterpret_cast<const _xm128_t*>(m), reinterpret_cast<_xm128_t*>(dst));
|
||||
# else
|
||||
if (isNeon32Enabled())
|
||||
MathUtilNeon::negateMatrix(m, dst);
|
||||
MathUtilNeon::negateMatrix(reinterpret_cast<const _xm128_t*>(m), reinterpret_cast<_xm128_t*>(dst));
|
||||
else
|
||||
MathUtilC::negateMatrix(m, dst);
|
||||
# endif
|
||||
#else
|
||||
MathUtilC::negateMatrix(m, dst);
|
||||
#endif
|
||||
|
@ -239,47 +213,53 @@ void MathUtil::negateMatrix(const float* m, float* dst)
|
|||
|
||||
void MathUtil::transposeMatrix(const float* m, float* dst)
|
||||
{
|
||||
#ifdef USE_NEON32
|
||||
MathUtilNeon::transposeMatrix(m, dst);
|
||||
#elif defined(USE_NEON64)
|
||||
MathUtilNeon64::transposeMatrix(m, dst);
|
||||
#elif defined(INCLUDE_NEON32)
|
||||
#if defined(AX_SSE_INTRINSICS)
|
||||
MathUtilSSE::transposeMatrix(reinterpret_cast<const _xm128_t*>(m), reinterpret_cast<_xm128_t*>(dst));
|
||||
#elif defined(AX_NEON_INTRINSICS)
|
||||
# if AX_64BITS || AX_NEON_INTRINSICS > 1
|
||||
MathUtilNeon::transposeMatrix(reinterpret_cast<const _xm128_t*>(m), reinterpret_cast<_xm128_t*>(dst));
|
||||
# else
|
||||
if (isNeon32Enabled())
|
||||
MathUtilNeon::transposeMatrix(m, dst);
|
||||
MathUtilNeon::transposeMatrix(reinterpret_cast<const _xm128_t*>(m), reinterpret_cast<_xm128_t*>(dst));
|
||||
else
|
||||
MathUtilC::transposeMatrix(m, dst);
|
||||
# endif
|
||||
#else
|
||||
MathUtilC::transposeMatrix(m, dst);
|
||||
#endif
|
||||
}
|
||||
|
||||
void MathUtil::transformVec4(const float* m, float x, float y, float z, float w, float* dst)
|
||||
void MathUtil::transformVec4(const float* m, float x, float y, float z, float w, float* dst /*vec3*/)
|
||||
{
|
||||
#ifdef USE_NEON32
|
||||
MathUtilNeon::transformVec4(m, x, y, z, w, dst);
|
||||
#elif defined(USE_NEON64)
|
||||
MathUtilNeon64::transformVec4(m, x, y, z, w, dst);
|
||||
#elif defined(INCLUDE_NEON32)
|
||||
#if defined(AX_SSE_INTRINSICS)
|
||||
MathUtilSSE::transformVec4(reinterpret_cast<const _xm128_t*>(m), x, y, z, w, dst);
|
||||
#elif defined(AX_NEON_INTRINSICS)
|
||||
# if AX_64BITS || AX_NEON_INTRINSICS > 1
|
||||
MathUtilNeon::transformVec4(reinterpret_cast<const _xm128_t*>(m), x, y, z, w, dst);
|
||||
# else
|
||||
if (isNeon32Enabled())
|
||||
MathUtilNeon::transformVec4(m, x, y, z, w, dst);
|
||||
MathUtilNeon::transformVec4(reinterpret_cast<const _xm128_t*>(m), x, y, z, w, dst);
|
||||
else
|
||||
MathUtilC::transformVec4(m, x, y, z, w, dst);
|
||||
# endif
|
||||
#else
|
||||
MathUtilC::transformVec4(m, x, y, z, w, dst);
|
||||
#endif
|
||||
}
|
||||
|
||||
void MathUtil::transformVec4(const float* m, const float* v, float* dst)
|
||||
void MathUtil::transformVec4(const float* m, const float* v, float* dst /*vec4*/)
|
||||
{
|
||||
#ifdef USE_NEON32
|
||||
MathUtilNeon::transformVec4(m, v, dst);
|
||||
#elif defined(USE_NEON64)
|
||||
MathUtilNeon64::transformVec4(m, v, dst);
|
||||
#elif defined(INCLUDE_NEON32)
|
||||
#if defined(AX_SSE_INTRINSICS)
|
||||
MathUtilSSE::transformVec4(reinterpret_cast<const _xm128_t*>(m), v, dst);
|
||||
#elif defined(AX_NEON_INTRINSICS)
|
||||
# if AX_64BITS || AX_NEON_INTRINSICS > 1
|
||||
MathUtilNeon::transformVec4(reinterpret_cast<const _xm128_t*>(m), v, dst);
|
||||
# else
|
||||
if (isNeon32Enabled())
|
||||
MathUtilNeon::transformVec4(m, v, dst);
|
||||
MathUtilNeon::transformVec4(reinterpret_cast<const _xm128_t*>(m), v, dst);
|
||||
else
|
||||
MathUtilC::transformVec4(m, v, dst);
|
||||
# endif
|
||||
#else
|
||||
MathUtilC::transformVec4(m, v, dst);
|
||||
#endif
|
||||
|
@ -287,15 +267,17 @@ void MathUtil::transformVec4(const float* m, const float* v, float* dst)
|
|||
|
||||
void MathUtil::crossVec3(const float* v1, const float* v2, float* dst)
|
||||
{
|
||||
#ifdef USE_NEON32
|
||||
#if defined(AX_SSE_INTRINSICS)
|
||||
MathUtilSSE::crossVec3(v1, v2, dst);
|
||||
#elif defined(AX_NEON_INTRINSICS)
|
||||
# if AX_64BITS || AX_NEON_INTRINSICS > 1
|
||||
MathUtilNeon::crossVec3(v1, v2, dst);
|
||||
#elif defined(USE_NEON64)
|
||||
MathUtilNeon64::crossVec3(v1, v2, dst);
|
||||
#elif defined(INCLUDE_NEON32)
|
||||
# else
|
||||
if (isNeon32Enabled())
|
||||
MathUtilNeon::crossVec3(v1, v2, dst);
|
||||
else
|
||||
MathUtilC::crossVec3(v1, v2, dst);
|
||||
# endif
|
||||
#else
|
||||
MathUtilC::crossVec3(v1, v2, dst);
|
||||
#endif
|
||||
|
@ -308,24 +290,28 @@ void MathUtil::transformVertices(V3F_C4B_T2F* dst, const V3F_C4B_T2F* src, size_
|
|||
static_assert(offsetof(V3F_C4B_T2F, vertices) == 0);
|
||||
static_assert(offsetof(V3F_C4B_T2F, colors) == 12);
|
||||
static_assert(offsetof(V3F_C4B_T2F, texCoords) == 16);
|
||||
|
||||
#ifdef USE_NEON32
|
||||
#if defined(AX_SSE_INTRINSICS)
|
||||
MathUtilSSE::transformVertices(dst, src, count, transform);
|
||||
#elif defined(AX_NEON_INTRINSICS)
|
||||
# if AX_64BITS || AX_NEON_INTRINSICS > 1
|
||||
MathUtilNeon::transformVertices(dst, src, count, transform);
|
||||
#elif defined(USE_NEON64)
|
||||
MathUtilNeon64::transformVertices(dst, src, count, transform);
|
||||
#elif defined(INCLUDE_NEON32)
|
||||
# else
|
||||
if (isNeon32Enabled())
|
||||
MathUtilNeon::transformVertices(dst, src, count, transform);
|
||||
else
|
||||
MathUtilC::transformVertices(dst, src, count, transform);
|
||||
# endif
|
||||
#else
|
||||
MathUtilC::transformVertices(dst, src, count, transform);
|
||||
#endif
|
||||
}
|
||||
|
||||
void MathUtil::transformIndices(uint16_t* dst, const uint16_t* src, size_t count, uint16_t offset) {
|
||||
#if defined(USE_NEON64)
|
||||
MathUtilNeon64::transformIndices(dst, src, count, offset);
|
||||
void MathUtil::transformIndices(uint16_t* dst, const uint16_t* src, size_t count, uint16_t offset)
|
||||
{
|
||||
#if defined(AX_SSE_INTRINSICS)
|
||||
MathUtilSSE::transformIndices(dst, src, count, offset);
|
||||
#elif defined(AX_NEON_INTRINSICS) && AX_64BITS
|
||||
MathUtilNeon::transformIndices(dst, src, count, offset);
|
||||
#else
|
||||
MathUtilC::transformIndices(dst, src, count, offset);
|
||||
#endif
|
||||
|
|
|
@ -18,16 +18,12 @@
|
|||
|
||||
Original file from GamePlay3D: http://gameplay3d.org
|
||||
|
||||
This file was modified to fit the cocos2d-x project
|
||||
This file was modified to fit the axmol project
|
||||
*/
|
||||
|
||||
#ifndef MATHUTIL_H_
|
||||
#define MATHUTIL_H_
|
||||
|
||||
#ifdef AX_USE_SSE
|
||||
# include <xmmintrin.h>
|
||||
#endif
|
||||
|
||||
#include "math/MathBase.h"
|
||||
|
||||
|
||||
|
@ -42,7 +38,7 @@ NS_AX_END
|
|||
|
||||
NS_AX_MATH_BEGIN
|
||||
|
||||
class Mat4;
|
||||
class Vec4;
|
||||
|
||||
/**
|
||||
* Defines a math utility class.
|
||||
|
@ -100,26 +96,8 @@ public:
|
|||
private:
|
||||
// Indicates that if neon is enabled
|
||||
static bool isNeon32Enabled();
|
||||
static bool isNeon64Enabled();
|
||||
|
||||
private:
|
||||
#ifdef AX_USE_SSE
|
||||
static void addMatrix(const __m128 m[4], float scalar, __m128 dst[4]);
|
||||
|
||||
static void addMatrix(const __m128 m1[4], const __m128 m2[4], __m128 dst[4]);
|
||||
|
||||
static void subtractMatrix(const __m128 m1[4], const __m128 m2[4], __m128 dst[4]);
|
||||
|
||||
static void multiplyMatrix(const __m128 m[4], float scalar, __m128 dst[4]);
|
||||
|
||||
static void multiplyMatrix(const __m128 m1[4], const __m128 m2[4], __m128 dst[4]);
|
||||
|
||||
static void negateMatrix(const __m128 m[4], __m128 dst[4]);
|
||||
|
||||
static void transposeMatrix(const __m128 m[4], __m128 dst[4]);
|
||||
|
||||
static void transformVec4(const __m128 m[4], const __m128& v, __m128& dst);
|
||||
#endif
|
||||
static void addMatrix(const float* m, float scalar, float* dst);
|
||||
|
||||
static void addMatrix(const float* m1, const float* m2, float* dst);
|
||||
|
@ -134,9 +112,9 @@ private:
|
|||
|
||||
static void transposeMatrix(const float* m, float* dst);
|
||||
|
||||
static void transformVec4(const float* m, float x, float y, float z, float w, float* dst);
|
||||
static void transformVec4(const float* m, float x, float y, float z, float w, float* dst/*vec3*/);
|
||||
|
||||
static void transformVec4(const float* m, const float* v, float* dst);
|
||||
static void transformVec4(const float* m, const float* v, float* dst/*vec4*/);
|
||||
|
||||
static void crossVec3(const float* v1, const float* v2, float* dst);
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
|
||||
Original file from GamePlay3D: http://gameplay3d.org
|
||||
|
||||
This file was modified to fit the cocos2d-x project
|
||||
This file was modified to fit the axmol project
|
||||
*/
|
||||
|
||||
NS_AX_MATH_BEGIN
|
||||
|
@ -24,221 +24,201 @@ NS_AX_MATH_BEGIN
|
|||
class MathUtilC
|
||||
{
|
||||
public:
|
||||
inline static void addMatrix(const float* m, float scalar, float* dst);
|
||||
inline static void addMatrix(const float* m1, const float* m2, float* dst);
|
||||
inline static void subtractMatrix(const float* m1, const float* m2, float* dst);
|
||||
inline static void multiplyMatrix(const float* m, float scalar, float* dst);
|
||||
inline static void multiplyMatrix(const float* m1, const float* m2, float* dst);
|
||||
inline static void addMatrix(const float* m, float scalar, float* dst)
|
||||
{
|
||||
dst[0] = m[0] + scalar;
|
||||
dst[1] = m[1] + scalar;
|
||||
dst[2] = m[2] + scalar;
|
||||
dst[3] = m[3] + scalar;
|
||||
dst[4] = m[4] + scalar;
|
||||
dst[5] = m[5] + scalar;
|
||||
dst[6] = m[6] + scalar;
|
||||
dst[7] = m[7] + scalar;
|
||||
dst[8] = m[8] + scalar;
|
||||
dst[9] = m[9] + scalar;
|
||||
dst[10] = m[10] + scalar;
|
||||
dst[11] = m[11] + scalar;
|
||||
dst[12] = m[12] + scalar;
|
||||
dst[13] = m[13] + scalar;
|
||||
dst[14] = m[14] + scalar;
|
||||
dst[15] = m[15] + scalar;
|
||||
}
|
||||
|
||||
inline static void negateMatrix(const float* m, float* dst);
|
||||
inline static void transposeMatrix(const float* m, float* dst);
|
||||
inline static void addMatrix(const float* m1, const float* m2, float* dst)
|
||||
{
|
||||
dst[0] = m1[0] + m2[0];
|
||||
dst[1] = m1[1] + m2[1];
|
||||
dst[2] = m1[2] + m2[2];
|
||||
dst[3] = m1[3] + m2[3];
|
||||
dst[4] = m1[4] + m2[4];
|
||||
dst[5] = m1[5] + m2[5];
|
||||
dst[6] = m1[6] + m2[6];
|
||||
dst[7] = m1[7] + m2[7];
|
||||
dst[8] = m1[8] + m2[8];
|
||||
dst[9] = m1[9] + m2[9];
|
||||
dst[10] = m1[10] + m2[10];
|
||||
dst[11] = m1[11] + m2[11];
|
||||
dst[12] = m1[12] + m2[12];
|
||||
dst[13] = m1[13] + m2[13];
|
||||
dst[14] = m1[14] + m2[14];
|
||||
dst[15] = m1[15] + m2[15];
|
||||
}
|
||||
|
||||
inline static void transformVec4(const float* m, float x, float y, float z, float w, float* dst);
|
||||
inline static void transformVec4(const float* m, const float* v, float* dst);
|
||||
inline static void crossVec3(const float* v1, const float* v2, float* dst);
|
||||
inline static void subtractMatrix(const float* m1, const float* m2, float* dst)
|
||||
{
|
||||
dst[0] = m1[0] - m2[0];
|
||||
dst[1] = m1[1] - m2[1];
|
||||
dst[2] = m1[2] - m2[2];
|
||||
dst[3] = m1[3] - m2[3];
|
||||
dst[4] = m1[4] - m2[4];
|
||||
dst[5] = m1[5] - m2[5];
|
||||
dst[6] = m1[6] - m2[6];
|
||||
dst[7] = m1[7] - m2[7];
|
||||
dst[8] = m1[8] - m2[8];
|
||||
dst[9] = m1[9] - m2[9];
|
||||
dst[10] = m1[10] - m2[10];
|
||||
dst[11] = m1[11] - m2[11];
|
||||
dst[12] = m1[12] - m2[12];
|
||||
dst[13] = m1[13] - m2[13];
|
||||
dst[14] = m1[14] - m2[14];
|
||||
dst[15] = m1[15] - m2[15];
|
||||
}
|
||||
|
||||
inline static void transformVertices(V3F_C4B_T2F* dst, const V3F_C4B_T2F* src, size_t count, const Mat4& transform);
|
||||
inline static void transformIndices(uint16_t* dst, const uint16_t* src, size_t count, uint16_t offset);
|
||||
inline static void multiplyMatrix(const float* m, float scalar, float* dst)
|
||||
{
|
||||
dst[0] = m[0] * scalar;
|
||||
dst[1] = m[1] * scalar;
|
||||
dst[2] = m[2] * scalar;
|
||||
dst[3] = m[3] * scalar;
|
||||
dst[4] = m[4] * scalar;
|
||||
dst[5] = m[5] * scalar;
|
||||
dst[6] = m[6] * scalar;
|
||||
dst[7] = m[7] * scalar;
|
||||
dst[8] = m[8] * scalar;
|
||||
dst[9] = m[9] * scalar;
|
||||
dst[10] = m[10] * scalar;
|
||||
dst[11] = m[11] * scalar;
|
||||
dst[12] = m[12] * scalar;
|
||||
dst[13] = m[13] * scalar;
|
||||
dst[14] = m[14] * scalar;
|
||||
dst[15] = m[15] * scalar;
|
||||
}
|
||||
|
||||
inline static void multiplyMatrix(const float* m1, const float* m2, float* dst)
|
||||
{
|
||||
// Support the case where m1 or m2 is the same array as dst.
|
||||
float product[16];
|
||||
|
||||
product[0] = m1[0] * m2[0] + m1[4] * m2[1] + m1[8] * m2[2] + m1[12] * m2[3];
|
||||
product[1] = m1[1] * m2[0] + m1[5] * m2[1] + m1[9] * m2[2] + m1[13] * m2[3];
|
||||
product[2] = m1[2] * m2[0] + m1[6] * m2[1] + m1[10] * m2[2] + m1[14] * m2[3];
|
||||
product[3] = m1[3] * m2[0] + m1[7] * m2[1] + m1[11] * m2[2] + m1[15] * m2[3];
|
||||
|
||||
product[4] = m1[0] * m2[4] + m1[4] * m2[5] + m1[8] * m2[6] + m1[12] * m2[7];
|
||||
product[5] = m1[1] * m2[4] + m1[5] * m2[5] + m1[9] * m2[6] + m1[13] * m2[7];
|
||||
product[6] = m1[2] * m2[4] + m1[6] * m2[5] + m1[10] * m2[6] + m1[14] * m2[7];
|
||||
product[7] = m1[3] * m2[4] + m1[7] * m2[5] + m1[11] * m2[6] + m1[15] * m2[7];
|
||||
|
||||
product[8] = m1[0] * m2[8] + m1[4] * m2[9] + m1[8] * m2[10] + m1[12] * m2[11];
|
||||
product[9] = m1[1] * m2[8] + m1[5] * m2[9] + m1[9] * m2[10] + m1[13] * m2[11];
|
||||
product[10] = m1[2] * m2[8] + m1[6] * m2[9] + m1[10] * m2[10] + m1[14] * m2[11];
|
||||
product[11] = m1[3] * m2[8] + m1[7] * m2[9] + m1[11] * m2[10] + m1[15] * m2[11];
|
||||
|
||||
product[12] = m1[0] * m2[12] + m1[4] * m2[13] + m1[8] * m2[14] + m1[12] * m2[15];
|
||||
product[13] = m1[1] * m2[12] + m1[5] * m2[13] + m1[9] * m2[14] + m1[13] * m2[15];
|
||||
product[14] = m1[2] * m2[12] + m1[6] * m2[13] + m1[10] * m2[14] + m1[14] * m2[15];
|
||||
product[15] = m1[3] * m2[12] + m1[7] * m2[13] + m1[11] * m2[14] + m1[15] * m2[15];
|
||||
|
||||
memcpy(dst, product, MATRIX_SIZE);
|
||||
}
|
||||
|
||||
inline static void negateMatrix(const float* m, float* dst)
|
||||
{
|
||||
dst[0] = -m[0];
|
||||
dst[1] = -m[1];
|
||||
dst[2] = -m[2];
|
||||
dst[3] = -m[3];
|
||||
dst[4] = -m[4];
|
||||
dst[5] = -m[5];
|
||||
dst[6] = -m[6];
|
||||
dst[7] = -m[7];
|
||||
dst[8] = -m[8];
|
||||
dst[9] = -m[9];
|
||||
dst[10] = -m[10];
|
||||
dst[11] = -m[11];
|
||||
dst[12] = -m[12];
|
||||
dst[13] = -m[13];
|
||||
dst[14] = -m[14];
|
||||
dst[15] = -m[15];
|
||||
}
|
||||
|
||||
inline static void transposeMatrix(const float* m, float* dst)
|
||||
{
|
||||
float t[16] = {m[0], m[4], m[8], m[12], m[1], m[5], m[9], m[13],
|
||||
m[2], m[6], m[10], m[14], m[3], m[7], m[11], m[15]};
|
||||
memcpy(dst, t, MATRIX_SIZE);
|
||||
}
|
||||
|
||||
inline static void transformVec4(const float* m, float x, float y, float z, float w, float* dst)
|
||||
{
|
||||
dst[0] = x * m[0] + y * m[4] + z * m[8] + w * m[12];
|
||||
dst[1] = x * m[1] + y * m[5] + z * m[9] + w * m[13];
|
||||
dst[2] = x * m[2] + y * m[6] + z * m[10] + w * m[14];
|
||||
}
|
||||
|
||||
inline static void transformVec4(const float* m, const float* v, float* dst)
|
||||
{
|
||||
// Handle case where v == dst.
|
||||
float x = v[0] * m[0] + v[1] * m[4] + v[2] * m[8] + v[3] * m[12];
|
||||
float y = v[0] * m[1] + v[1] * m[5] + v[2] * m[9] + v[3] * m[13];
|
||||
float z = v[0] * m[2] + v[1] * m[6] + v[2] * m[10] + v[3] * m[14];
|
||||
float w = v[0] * m[3] + v[1] * m[7] + v[2] * m[11] + v[3] * m[15];
|
||||
|
||||
dst[0] = x;
|
||||
dst[1] = y;
|
||||
dst[2] = z;
|
||||
dst[3] = w;
|
||||
}
|
||||
|
||||
inline static void crossVec3(const float* v1, const float* v2, float* dst)
|
||||
{
|
||||
float x = (v1[1] * v2[2]) - (v1[2] * v2[1]);
|
||||
float y = (v1[2] * v2[0]) - (v1[0] * v2[2]);
|
||||
float z = (v1[0] * v2[1]) - (v1[1] * v2[0]);
|
||||
|
||||
dst[0] = x;
|
||||
dst[1] = y;
|
||||
dst[2] = z;
|
||||
}
|
||||
|
||||
inline static void transformVertices(V3F_C4B_T2F* dst, const V3F_C4B_T2F* src, size_t count, const Mat4& transform)
|
||||
{
|
||||
auto end = dst + count;
|
||||
auto& t = transform; // Make copy for better aliasing inference
|
||||
auto m = t.m;
|
||||
|
||||
while (dst < end)
|
||||
{
|
||||
auto pos = src->vertices;
|
||||
dst->vertices.x = pos.x * m[0] + pos.y * m[4] + pos.z * m[8] + m[12];
|
||||
dst->vertices.y = pos.x * m[1] + pos.y * m[5] + pos.z * m[9] + m[13];
|
||||
dst->vertices.z = pos.x * m[2] + pos.y * m[6] + pos.z * m[10] + m[14];
|
||||
memcpy(&dst->colors, &src->colors, sizeof(V3F_C4B_T2F::colors) + sizeof(V3F_C4B_T2F::texCoords));
|
||||
++dst;
|
||||
++src;
|
||||
}
|
||||
}
|
||||
|
||||
inline static void transformIndices(uint16_t* dst, const uint16_t* src, size_t count, uint16_t offset)
|
||||
{
|
||||
auto end = dst + count;
|
||||
while (dst < end)
|
||||
{
|
||||
*dst = *src + offset;
|
||||
++dst;
|
||||
++src;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
inline void MathUtilC::addMatrix(const float* m, float scalar, float* dst)
|
||||
{
|
||||
dst[0] = m[0] + scalar;
|
||||
dst[1] = m[1] + scalar;
|
||||
dst[2] = m[2] + scalar;
|
||||
dst[3] = m[3] + scalar;
|
||||
dst[4] = m[4] + scalar;
|
||||
dst[5] = m[5] + scalar;
|
||||
dst[6] = m[6] + scalar;
|
||||
dst[7] = m[7] + scalar;
|
||||
dst[8] = m[8] + scalar;
|
||||
dst[9] = m[9] + scalar;
|
||||
dst[10] = m[10] + scalar;
|
||||
dst[11] = m[11] + scalar;
|
||||
dst[12] = m[12] + scalar;
|
||||
dst[13] = m[13] + scalar;
|
||||
dst[14] = m[14] + scalar;
|
||||
dst[15] = m[15] + scalar;
|
||||
}
|
||||
|
||||
inline void MathUtilC::addMatrix(const float* m1, const float* m2, float* dst)
|
||||
{
|
||||
dst[0] = m1[0] + m2[0];
|
||||
dst[1] = m1[1] + m2[1];
|
||||
dst[2] = m1[2] + m2[2];
|
||||
dst[3] = m1[3] + m2[3];
|
||||
dst[4] = m1[4] + m2[4];
|
||||
dst[5] = m1[5] + m2[5];
|
||||
dst[6] = m1[6] + m2[6];
|
||||
dst[7] = m1[7] + m2[7];
|
||||
dst[8] = m1[8] + m2[8];
|
||||
dst[9] = m1[9] + m2[9];
|
||||
dst[10] = m1[10] + m2[10];
|
||||
dst[11] = m1[11] + m2[11];
|
||||
dst[12] = m1[12] + m2[12];
|
||||
dst[13] = m1[13] + m2[13];
|
||||
dst[14] = m1[14] + m2[14];
|
||||
dst[15] = m1[15] + m2[15];
|
||||
}
|
||||
|
||||
inline void MathUtilC::subtractMatrix(const float* m1, const float* m2, float* dst)
|
||||
{
|
||||
dst[0] = m1[0] - m2[0];
|
||||
dst[1] = m1[1] - m2[1];
|
||||
dst[2] = m1[2] - m2[2];
|
||||
dst[3] = m1[3] - m2[3];
|
||||
dst[4] = m1[4] - m2[4];
|
||||
dst[5] = m1[5] - m2[5];
|
||||
dst[6] = m1[6] - m2[6];
|
||||
dst[7] = m1[7] - m2[7];
|
||||
dst[8] = m1[8] - m2[8];
|
||||
dst[9] = m1[9] - m2[9];
|
||||
dst[10] = m1[10] - m2[10];
|
||||
dst[11] = m1[11] - m2[11];
|
||||
dst[12] = m1[12] - m2[12];
|
||||
dst[13] = m1[13] - m2[13];
|
||||
dst[14] = m1[14] - m2[14];
|
||||
dst[15] = m1[15] - m2[15];
|
||||
}
|
||||
|
||||
inline void MathUtilC::multiplyMatrix(const float* m, float scalar, float* dst)
|
||||
{
|
||||
dst[0] = m[0] * scalar;
|
||||
dst[1] = m[1] * scalar;
|
||||
dst[2] = m[2] * scalar;
|
||||
dst[3] = m[3] * scalar;
|
||||
dst[4] = m[4] * scalar;
|
||||
dst[5] = m[5] * scalar;
|
||||
dst[6] = m[6] * scalar;
|
||||
dst[7] = m[7] * scalar;
|
||||
dst[8] = m[8] * scalar;
|
||||
dst[9] = m[9] * scalar;
|
||||
dst[10] = m[10] * scalar;
|
||||
dst[11] = m[11] * scalar;
|
||||
dst[12] = m[12] * scalar;
|
||||
dst[13] = m[13] * scalar;
|
||||
dst[14] = m[14] * scalar;
|
||||
dst[15] = m[15] * scalar;
|
||||
}
|
||||
|
||||
inline void MathUtilC::multiplyMatrix(const float* m1, const float* m2, float* dst)
|
||||
{
|
||||
// Support the case where m1 or m2 is the same array as dst.
|
||||
float product[16];
|
||||
|
||||
product[0] = m1[0] * m2[0] + m1[4] * m2[1] + m1[8] * m2[2] + m1[12] * m2[3];
|
||||
product[1] = m1[1] * m2[0] + m1[5] * m2[1] + m1[9] * m2[2] + m1[13] * m2[3];
|
||||
product[2] = m1[2] * m2[0] + m1[6] * m2[1] + m1[10] * m2[2] + m1[14] * m2[3];
|
||||
product[3] = m1[3] * m2[0] + m1[7] * m2[1] + m1[11] * m2[2] + m1[15] * m2[3];
|
||||
|
||||
product[4] = m1[0] * m2[4] + m1[4] * m2[5] + m1[8] * m2[6] + m1[12] * m2[7];
|
||||
product[5] = m1[1] * m2[4] + m1[5] * m2[5] + m1[9] * m2[6] + m1[13] * m2[7];
|
||||
product[6] = m1[2] * m2[4] + m1[6] * m2[5] + m1[10] * m2[6] + m1[14] * m2[7];
|
||||
product[7] = m1[3] * m2[4] + m1[7] * m2[5] + m1[11] * m2[6] + m1[15] * m2[7];
|
||||
|
||||
product[8] = m1[0] * m2[8] + m1[4] * m2[9] + m1[8] * m2[10] + m1[12] * m2[11];
|
||||
product[9] = m1[1] * m2[8] + m1[5] * m2[9] + m1[9] * m2[10] + m1[13] * m2[11];
|
||||
product[10] = m1[2] * m2[8] + m1[6] * m2[9] + m1[10] * m2[10] + m1[14] * m2[11];
|
||||
product[11] = m1[3] * m2[8] + m1[7] * m2[9] + m1[11] * m2[10] + m1[15] * m2[11];
|
||||
|
||||
product[12] = m1[0] * m2[12] + m1[4] * m2[13] + m1[8] * m2[14] + m1[12] * m2[15];
|
||||
product[13] = m1[1] * m2[12] + m1[5] * m2[13] + m1[9] * m2[14] + m1[13] * m2[15];
|
||||
product[14] = m1[2] * m2[12] + m1[6] * m2[13] + m1[10] * m2[14] + m1[14] * m2[15];
|
||||
product[15] = m1[3] * m2[12] + m1[7] * m2[13] + m1[11] * m2[14] + m1[15] * m2[15];
|
||||
|
||||
memcpy(dst, product, MATRIX_SIZE);
|
||||
}
|
||||
|
||||
inline void MathUtilC::negateMatrix(const float* m, float* dst)
|
||||
{
|
||||
dst[0] = -m[0];
|
||||
dst[1] = -m[1];
|
||||
dst[2] = -m[2];
|
||||
dst[3] = -m[3];
|
||||
dst[4] = -m[4];
|
||||
dst[5] = -m[5];
|
||||
dst[6] = -m[6];
|
||||
dst[7] = -m[7];
|
||||
dst[8] = -m[8];
|
||||
dst[9] = -m[9];
|
||||
dst[10] = -m[10];
|
||||
dst[11] = -m[11];
|
||||
dst[12] = -m[12];
|
||||
dst[13] = -m[13];
|
||||
dst[14] = -m[14];
|
||||
dst[15] = -m[15];
|
||||
}
|
||||
|
||||
inline void MathUtilC::transposeMatrix(const float* m, float* dst)
|
||||
{
|
||||
float t[16] = {
|
||||
m[0], m[4], m[8], m[12],
|
||||
m[1], m[5], m[9], m[13],
|
||||
m[2], m[6], m[10], m[14],
|
||||
m[3], m[7], m[11], m[15]
|
||||
};
|
||||
memcpy(dst, t, MATRIX_SIZE);
|
||||
}
|
||||
|
||||
inline void MathUtilC::transformVec4(const float* m, float x, float y, float z, float w, float* dst)
|
||||
{
|
||||
dst[0] = x * m[0] + y * m[4] + z * m[8] + w * m[12];
|
||||
dst[1] = x * m[1] + y * m[5] + z * m[9] + w * m[13];
|
||||
dst[2] = x * m[2] + y * m[6] + z * m[10] + w * m[14];
|
||||
}
|
||||
|
||||
inline void MathUtilC::transformVec4(const float* m, const float* v, float* dst)
|
||||
{
|
||||
// Handle case where v == dst.
|
||||
float x = v[0] * m[0] + v[1] * m[4] + v[2] * m[8] + v[3] * m[12];
|
||||
float y = v[0] * m[1] + v[1] * m[5] + v[2] * m[9] + v[3] * m[13];
|
||||
float z = v[0] * m[2] + v[1] * m[6] + v[2] * m[10] + v[3] * m[14];
|
||||
float w = v[0] * m[3] + v[1] * m[7] + v[2] * m[11] + v[3] * m[15];
|
||||
|
||||
dst[0] = x;
|
||||
dst[1] = y;
|
||||
dst[2] = z;
|
||||
dst[3] = w;
|
||||
}
|
||||
|
||||
inline void MathUtilC::crossVec3(const float* v1, const float* v2, float* dst)
|
||||
{
|
||||
float x = (v1[1] * v2[2]) - (v1[2] * v2[1]);
|
||||
float y = (v1[2] * v2[0]) - (v1[0] * v2[2]);
|
||||
float z = (v1[0] * v2[1]) - (v1[1] * v2[0]);
|
||||
|
||||
dst[0] = x;
|
||||
dst[1] = y;
|
||||
dst[2] = z;
|
||||
}
|
||||
|
||||
inline void MathUtilC::transformVertices(V3F_C4B_T2F* dst, const V3F_C4B_T2F* src, size_t count, const Mat4& transform)
|
||||
{
|
||||
auto end = dst + count;
|
||||
auto t = transform; // Make copy for better aliasing inference
|
||||
auto m = t.m;
|
||||
|
||||
while (dst < end)
|
||||
{
|
||||
auto pos = src->vertices;
|
||||
dst->vertices.x = pos.x * m[0] + pos.y * m[4] + pos.z * m[8] + m[12];
|
||||
dst->vertices.y = pos.x * m[1] + pos.y * m[5] + pos.z * m[9] + m[13];
|
||||
dst->vertices.z = pos.x * m[2] + pos.y * m[6] + pos.z * m[10] + m[14];
|
||||
memcpy(&dst->colors, &src->colors, sizeof(dst->colors) + sizeof(dst->texCoords));
|
||||
++dst;
|
||||
++src;
|
||||
}
|
||||
}
|
||||
|
||||
inline void MathUtilC::transformIndices(uint16_t* dst, const uint16_t* src, size_t count, uint16_t offset)
|
||||
{
|
||||
auto end = dst + count;
|
||||
while (dst < end)
|
||||
{
|
||||
*dst = *src + offset;
|
||||
++dst;
|
||||
++src;
|
||||
}
|
||||
}
|
||||
|
||||
NS_AX_MATH_END
|
||||
|
|
|
@ -16,356 +16,374 @@
|
|||
|
||||
Original file from GamePlay3D: http://gameplay3d.org
|
||||
|
||||
This file was modified to fit the cocos2d-x project
|
||||
This file was modified to fit the axmol project
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
NS_AX_MATH_BEGIN
|
||||
|
||||
class MathUtilNeon
|
||||
struct MathUtilNeon
|
||||
{
|
||||
public:
|
||||
inline static void addMatrix(const float* m, float scalar, float* dst);
|
||||
inline static void addMatrix(const float* m1, const float* m2, float* dst);
|
||||
inline static void subtractMatrix(const float* m1, const float* m2, float* dst);
|
||||
inline static void multiplyMatrix(const float* m, float scalar, float* dst);
|
||||
inline static void multiplyMatrix(const float* m1, const float* m2, float* dst);
|
||||
#if defined(__EMSCRIPTEN__)
|
||||
# define vmlaq_lane_f32(a, b, c, lane) vaddq_f32(a, vmulq_lane_f32(b, c, lane))
|
||||
#endif
|
||||
|
||||
inline static void negateMatrix(const float* m, float* dst);
|
||||
inline static void transposeMatrix(const float* m, float* dst);
|
||||
inline static void addMatrix(const _xm128_t* m, float scalar, _xm128_t* dst)
|
||||
{
|
||||
float32x4_t s = vdupq_n_f32(scalar);
|
||||
dst[0] = vaddq_f32(m[0], s);
|
||||
dst[1] = vaddq_f32(m[1], s);
|
||||
dst[2] = vaddq_f32(m[2], s);
|
||||
dst[3] = vaddq_f32(m[3], s);
|
||||
}
|
||||
|
||||
inline static void transformVec4(const float* m, float x, float y, float z, float w, float* dst);
|
||||
inline static void transformVec4(const float* m, const float* v, float* dst);
|
||||
inline static void crossVec3(const float* v1, const float* v2, float* dst);
|
||||
inline static void addMatrix(const _xm128_t* m1, const _xm128_t* m2, _xm128_t* dst)
|
||||
{
|
||||
dst[0] = vaddq_f32(m1[0], m2[0]);
|
||||
dst[1] = vaddq_f32(m1[1], m2[1]);
|
||||
dst[2] = vaddq_f32(m1[2], m2[2]);
|
||||
dst[3] = vaddq_f32(m1[3], m2[3]);
|
||||
}
|
||||
|
||||
inline static void transformVertices(ax::V3F_C4B_T2F* dst, const ax::V3F_C4B_T2F* src, size_t count, const ax::Mat4& transform);
|
||||
inline static void subtractMatrix(const _xm128_t* m1, const _xm128_t* m2, _xm128_t* dst)
|
||||
{
|
||||
dst[0] = vsubq_f32(m1[0], m2[0]);
|
||||
dst[1] = vsubq_f32(m1[1], m2[1]);
|
||||
dst[2] = vsubq_f32(m1[2], m2[2]);
|
||||
dst[3] = vsubq_f32(m1[3], m2[3]);
|
||||
}
|
||||
|
||||
inline static void multiplyMatrix(const _xm128_t* m, float scalar, _xm128_t* dst)
|
||||
{
|
||||
_xm128_t s = vdupq_n_f32(scalar);
|
||||
UTILS_UNROLL
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
dst[i] = vmulq_f32(m[i], s);
|
||||
}
|
||||
}
|
||||
|
||||
inline static void multiplyMatrix(const _xm128_t* m1, const _xm128_t* m2, _xm128_t* dst)
|
||||
{
|
||||
float32x4_t product[4];
|
||||
float32x4_t val;
|
||||
UTILS_UNROLL
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
val = vmulq_n_f32(m1[0], vgetq_lane_f32(m2[i], 0));
|
||||
val = vmlaq_n_f32(val, m1[1], vgetq_lane_f32(m2[i], 1));
|
||||
val = vmlaq_n_f32(val, m1[2], vgetq_lane_f32(m2[i], 2));
|
||||
val = vmlaq_n_f32(val, m1[3], vgetq_lane_f32(m2[i], 3));
|
||||
product[i] = val;
|
||||
}
|
||||
memcpy(dst, product, sizeof(product));
|
||||
}
|
||||
|
||||
inline static void negateMatrix(const _xm128_t* m, _xm128_t* dst)
|
||||
{
|
||||
UTILS_UNROLL
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
dst[i] = vnegq_f32(m[i]);
|
||||
}
|
||||
}
|
||||
|
||||
inline static void transposeMatrix(const _xm128_t* m, _xm128_t* dst)
|
||||
{
|
||||
auto tmp0 = vzipq_f32(m[0], m[2]);
|
||||
auto tmp1 = vzipq_f32(m[1], m[3]);
|
||||
auto tmp2 = vzipq_f32(tmp0.val[0], tmp1.val[0]);
|
||||
auto tmp3 = vzipq_f32(tmp0.val[1], tmp1.val[1]);
|
||||
|
||||
dst[0] = tmp2.val[0];
|
||||
dst[1] = tmp2.val[1];
|
||||
dst[2] = tmp3.val[0];
|
||||
dst[3] = tmp3.val[1];
|
||||
}
|
||||
|
||||
inline static void transformVec4(const _xm128_t* m, float x, float y, float z, float w, float* dst/*vec3*/)
|
||||
{
|
||||
auto v0 = vmulq_n_f32(m[0], x);
|
||||
auto v1 = vmulq_n_f32(m[1], y);
|
||||
auto v2 = vmulq_n_f32(m[2], z);
|
||||
auto v3 = vmulq_n_f32(m[3], w);
|
||||
auto prod = vaddq_f32(v0, vaddq_f32(v1, vaddq_f32(v2, v3)));
|
||||
vst1_f32(dst, vget_low_f32(prod));
|
||||
vst1_lane_f32(dst + 2, vget_high_f32(prod), 0);
|
||||
}
|
||||
|
||||
inline static void transformVec4(const _xm128_t* m, const float* v /*vec4*/, float* dst /*vec4*/)
|
||||
{
|
||||
auto v0 = vmulq_n_f32(m[0], v[0]);
|
||||
auto v1 = vmulq_n_f32(m[1], v[1]);
|
||||
auto v2 = vmulq_n_f32(m[2], v[2]);
|
||||
auto v3 = vmulq_n_f32(m[3], v[3]);
|
||||
auto prod = vaddq_f32(v0, vaddq_f32(v1, vaddq_f32(v2, v3)));
|
||||
vst1q_f32(dst, prod);
|
||||
}
|
||||
|
||||
inline static void crossVec3(const float* v1, const float* v2, float* dst)
|
||||
{
|
||||
// refer to:
|
||||
// https://developer.arm.com/documentation/den0018/a/NEON-Code-Examples-with-Mixed-Operations/Cross-product/Single-cross-product
|
||||
// Vector a is stored in memory such that ai is at the lower address and
|
||||
// ak is at the higher address. Vector b is also stored in the same way.
|
||||
|
||||
float32x4_t vec_a = vcombine_f32(vld1_f32(v1 + 1), vld1_f32(v1)); // Q register = [aj, ai, ak, aj]
|
||||
float32x4_t vec_b = vcombine_f32(vld1_f32(v2 + 1), vld1_f32(v2)); // Q register = [bj, bi, bk, bj]
|
||||
float32x4_t vec_a_rot = vextq_f32(vec_a, vec_a, 1);
|
||||
float32x4_t vec_b_rot = vextq_f32(vec_b, vec_b, 1);
|
||||
|
||||
float32x4_t prod = vmulq_f32(vec_a, vec_b_rot);
|
||||
|
||||
// prod = [ ajbj, aibj, akbi, ajbk ]
|
||||
|
||||
prod = vmlsq_f32(prod, vec_a_rot, vec_b);
|
||||
// prod = [ ajbj-ajbj, aibj-ajbi, akbi-aibk, ajbk-akbj ]
|
||||
|
||||
vst1_f32(dst, vget_low_f32(prod)); // Store the lower two elements to address r
|
||||
vst1_lane_f32(dst + 2, vget_high_f32(prod), 0); // Store the 3rd element
|
||||
}
|
||||
|
||||
#if AX_64BITS
|
||||
inline static void transformVertices(V3F_C4B_T2F* dst, const V3F_C4B_T2F* src, size_t count, const Mat4& transform)
|
||||
{
|
||||
auto end = dst + count;
|
||||
|
||||
// Load matrix
|
||||
float32x4x4_t m = vld1q_f32_x4(transform.m);
|
||||
|
||||
// Process 4 vertices at a time if there's enough data
|
||||
auto end4 = dst + count / 4 * 4;
|
||||
while (dst < end4)
|
||||
{
|
||||
// Do this for each vertex
|
||||
// dst->vertices.x = pos.x * m[0] + pos.y * m[4] + pos.z * m[8] + m[12];
|
||||
// dst->vertices.y = pos.x * m[1] + pos.y * m[5] + pos.z * m[9] + m[13];
|
||||
// dst->vertices.z = pos.x * m[2] + pos.y * m[6] + pos.z * m[10] + m[14];
|
||||
|
||||
// First, load each vertex, multiply x by column 0 and add to column 3
|
||||
// Note: since we're reading 4 floats it will load color bytes into v.w
|
||||
float32x4_t v0 = vld1q_f32(&src[0].vertices.x);
|
||||
float32x4_t r0 = vmlaq_laneq_f32(m.val[3], m.val[0], v0, 0);
|
||||
float32x4_t v1 = vld1q_f32(&src[1].vertices.x);
|
||||
float32x4_t r1 = vmlaq_laneq_f32(m.val[3], m.val[0], v1, 0);
|
||||
float32x4_t v2 = vld1q_f32(&src[2].vertices.x);
|
||||
float32x4_t r2 = vmlaq_laneq_f32(m.val[3], m.val[0], v2, 0);
|
||||
float32x4_t v3 = vld1q_f32(&src[3].vertices.x);
|
||||
float32x4_t r3 = vmlaq_laneq_f32(m.val[3], m.val[0], v3, 0);
|
||||
|
||||
// Load texCoords
|
||||
float32x2_t uv0 = vld1_f32(&src[0].texCoords.u);
|
||||
float32x2_t uv1 = vld1_f32(&src[1].texCoords.u);
|
||||
float32x2_t uv2 = vld1_f32(&src[2].texCoords.u);
|
||||
float32x2_t uv3 = vld1_f32(&src[3].texCoords.u);
|
||||
|
||||
// Multiply y by column 1 and add to result
|
||||
r0 = vmlaq_laneq_f32(r0, m.val[1], v0, 1);
|
||||
r1 = vmlaq_laneq_f32(r1, m.val[1], v1, 1);
|
||||
r2 = vmlaq_laneq_f32(r2, m.val[1], v2, 1);
|
||||
r3 = vmlaq_laneq_f32(r3, m.val[1], v3, 1);
|
||||
|
||||
// Multiply z by column 2 and add to result
|
||||
r0 = vmlaq_laneq_f32(r0, m.val[2], v0, 2);
|
||||
r1 = vmlaq_laneq_f32(r1, m.val[2], v1, 2);
|
||||
r2 = vmlaq_laneq_f32(r2, m.val[2], v2, 2);
|
||||
r3 = vmlaq_laneq_f32(r3, m.val[2], v3, 2);
|
||||
|
||||
// Set w to loaded color
|
||||
r0 = vsetq_lane_f32(vgetq_lane_f32(v0, 3), r0, 3);
|
||||
r1 = vsetq_lane_f32(vgetq_lane_f32(v1, 3), r1, 3);
|
||||
r2 = vsetq_lane_f32(vgetq_lane_f32(v2, 3), r2, 3);
|
||||
r3 = vsetq_lane_f32(vgetq_lane_f32(v3, 3), r3, 3);
|
||||
|
||||
// Store result
|
||||
vst1q_f32(&dst[0].vertices.x, r0);
|
||||
vst1_f32(&dst[0].texCoords.u, uv0);
|
||||
vst1q_f32(&dst[1].vertices.x, r1);
|
||||
vst1_f32(&dst[1].texCoords.u, uv1);
|
||||
vst1q_f32(&dst[2].vertices.x, r2);
|
||||
vst1_f32(&dst[2].texCoords.u, uv2);
|
||||
vst1q_f32(&dst[3].vertices.x, r3);
|
||||
vst1_f32(&dst[3].texCoords.u, uv3);
|
||||
|
||||
dst += 4;
|
||||
src += 4;
|
||||
}
|
||||
|
||||
// Process remaining vertices one by one
|
||||
while (dst < end)
|
||||
{
|
||||
float32x4_t v = vld1q_f32(&src->vertices.x);
|
||||
float32x4_t r = vmlaq_laneq_f32(m.val[3], m.val[0], v, 0);
|
||||
r = vmlaq_laneq_f32(r, m.val[1], v, 1);
|
||||
r = vmlaq_laneq_f32(r, m.val[2], v, 2);
|
||||
r = vsetq_lane_f32(vgetq_lane_f32(v, 3), r, 3);
|
||||
float32x2_t uv = vld1_f32(&src->texCoords.u);
|
||||
vst1q_f32(&dst->vertices.x, r);
|
||||
vst1_f32(&dst->texCoords.u, uv);
|
||||
|
||||
++dst;
|
||||
++src;
|
||||
}
|
||||
}
|
||||
|
||||
inline static void transformIndices(uint16_t* dst, const uint16_t* src, size_t count, uint16_t offset)
|
||||
{
|
||||
auto end = dst + count;
|
||||
auto off = vdupq_n_u16(offset);
|
||||
|
||||
if (count < 8)
|
||||
goto LEFTOVER;
|
||||
|
||||
// Process 32 indices at a time if there's enough data
|
||||
while (count >= 32)
|
||||
{
|
||||
// Load 32 indices
|
||||
uint16x8x4_t v = vld1q_u16_x4(src);
|
||||
|
||||
// Add offset
|
||||
v.val[0] = vaddq_u16(v.val[0], off);
|
||||
v.val[1] = vaddq_u16(v.val[1], off);
|
||||
v.val[2] = vaddq_u16(v.val[2], off);
|
||||
v.val[3] = vaddq_u16(v.val[3], off);
|
||||
|
||||
// Store result
|
||||
vst1q_u16_x4(dst, v);
|
||||
|
||||
dst += 32;
|
||||
src += 32;
|
||||
count -= 32;
|
||||
}
|
||||
|
||||
// Process 8 indices at a time if there's enough data
|
||||
while (count >= 8)
|
||||
{
|
||||
uint16x8_t v = vld1q_u16(src);
|
||||
v = vaddq_u16(v, off);
|
||||
vst1q_u16(dst, v);
|
||||
|
||||
dst += 8;
|
||||
src += 8;
|
||||
count -= 8;
|
||||
}
|
||||
|
||||
LEFTOVER:
|
||||
// Process remaining indices one by one
|
||||
while (count > 0)
|
||||
{
|
||||
*dst = *src + offset;
|
||||
++dst;
|
||||
++src;
|
||||
--count;
|
||||
}
|
||||
}
|
||||
#else
|
||||
inline static void transformVertices(ax::V3F_C4B_T2F* dst,
|
||||
const ax::V3F_C4B_T2F* src,
|
||||
size_t count,
|
||||
const ax::Mat4& transform)
|
||||
{
|
||||
auto end = dst + count;
|
||||
|
||||
// Load matrix
|
||||
float32x4_t mc0 = vld1q_f32(transform.m);
|
||||
float32x4_t mc1 = vld1q_f32(transform.m + 4);
|
||||
float32x4_t mc2 = vld1q_f32(transform.m + 8);
|
||||
float32x4_t mc3 = vld1q_f32(transform.m + 12);
|
||||
|
||||
// Process 4 vertices at a time
|
||||
auto end4 = dst + count / 4 * 4;
|
||||
while (dst < end4)
|
||||
{
|
||||
// Load 4 vertices. Note that color will also get loaded into w
|
||||
float32x2_t xy0 = vld1_f32(&src[0].vertices.x);
|
||||
float32x2_t zw0 = vld1_f32(&src[0].vertices.z);
|
||||
float32x2_t uv0 = vld1_f32(&src[0].texCoords.u);
|
||||
float32x2_t xy1 = vld1_f32(&src[1].vertices.x);
|
||||
float32x2_t zw1 = vld1_f32(&src[1].vertices.z);
|
||||
float32x2_t uv1 = vld1_f32(&src[1].texCoords.u);
|
||||
float32x2_t xy2 = vld1_f32(&src[2].vertices.x);
|
||||
float32x2_t zw2 = vld1_f32(&src[2].vertices.z);
|
||||
float32x2_t uv2 = vld1_f32(&src[2].texCoords.u);
|
||||
float32x2_t xy3 = vld1_f32(&src[3].vertices.x);
|
||||
float32x2_t zw3 = vld1_f32(&src[3].vertices.z);
|
||||
float32x2_t uv3 = vld1_f32(&src[3].texCoords.u);
|
||||
|
||||
// Multiply x by column 0
|
||||
float32x4_t r0 = vmulq_lane_f32(mc0, xy0, 0);
|
||||
float32x4_t r1 = vmulq_lane_f32(mc0, xy1, 0);
|
||||
float32x4_t r2 = vmulq_lane_f32(mc0, xy2, 0);
|
||||
float32x4_t r3 = vmulq_lane_f32(mc0, xy3, 0);
|
||||
|
||||
// Multiply y by column 1 and add to result
|
||||
r0 = vmlaq_lane_f32(r0, mc1, xy0, 1);
|
||||
r1 = vmlaq_lane_f32(r1, mc1, xy1, 1);
|
||||
r2 = vmlaq_lane_f32(r2, mc1, xy2, 1);
|
||||
r3 = vmlaq_lane_f32(r3, mc1, xy3, 1);
|
||||
|
||||
// Multiply z by column 2 and add to result
|
||||
r0 = vmlaq_lane_f32(r0, mc2, zw0, 0);
|
||||
r1 = vmlaq_lane_f32(r1, mc2, zw1, 0);
|
||||
r2 = vmlaq_lane_f32(r2, mc2, zw2, 0);
|
||||
r3 = vmlaq_lane_f32(r3, mc2, zw3, 0);
|
||||
|
||||
// Add column 3
|
||||
r0 = vaddq_f32(r0, mc3);
|
||||
r1 = vaddq_f32(r1, mc3);
|
||||
r2 = vaddq_f32(r2, mc3);
|
||||
r3 = vaddq_f32(r3, mc3);
|
||||
|
||||
// Set color
|
||||
r0 = vsetq_lane_f32(vget_lane_f32(zw0, 1), r0, 3);
|
||||
r1 = vsetq_lane_f32(vget_lane_f32(zw1, 1), r1, 3);
|
||||
r2 = vsetq_lane_f32(vget_lane_f32(zw2, 1), r2, 3);
|
||||
r3 = vsetq_lane_f32(vget_lane_f32(zw3, 1), r3, 3);
|
||||
|
||||
// Store result
|
||||
vst1q_f32(&dst[0].vertices.x, r0);
|
||||
vst1_f32(&dst[0].texCoords.u, uv0);
|
||||
vst1q_f32(&dst[1].vertices.x, r1);
|
||||
vst1_f32(&dst[1].texCoords.u, uv1);
|
||||
vst1q_f32(&dst[2].vertices.x, r2);
|
||||
vst1_f32(&dst[2].texCoords.u, uv2);
|
||||
vst1q_f32(&dst[3].vertices.x, r3);
|
||||
vst1_f32(&dst[3].texCoords.u, uv3);
|
||||
|
||||
dst += 4;
|
||||
src += 4;
|
||||
}
|
||||
|
||||
// Process remaining vertices
|
||||
while (dst < end)
|
||||
{
|
||||
// Load vertex
|
||||
float32x2_t xy = vld1_f32(&src->vertices.x);
|
||||
float32x2_t zw = vld1_f32(&src->vertices.z);
|
||||
float32x2_t uv = vld1_f32(&src->texCoords.u);
|
||||
|
||||
// Multiply x by column 0
|
||||
float32x4_t r = vmulq_lane_f32(mc0, xy, 0);
|
||||
// Multiply y by column 1 and add to result
|
||||
r = vmlaq_lane_f32(r, mc1, xy, 1);
|
||||
// Multiply z by column 2 and add to result
|
||||
r = vmlaq_lane_f32(r, mc2, zw, 0);
|
||||
// Add column 3
|
||||
r = vaddq_f32(r, mc3);
|
||||
|
||||
// Set color
|
||||
r = vsetq_lane_f32(vget_lane_f32(zw, 1), r, 3);
|
||||
|
||||
// Store result
|
||||
vst1q_f32(&dst->vertices.x, r);
|
||||
vst1_f32(&dst->texCoords.u, uv);
|
||||
|
||||
++dst;
|
||||
++src;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
inline void MathUtilNeon::addMatrix(const float* m, float scalar, float* dst)
|
||||
{
|
||||
asm volatile(
|
||||
"vld1.32 {q0, q1}, [%1]! \n\t" // M[m0-m7]
|
||||
"vld1.32 {q2, q3}, [%1] \n\t" // M[m8-m15]
|
||||
"vld1.32 {d8[0]}, [%2] \n\t" // s
|
||||
"vmov.f32 s17, s16 \n\t" // s
|
||||
"vmov.f32 s18, s16 \n\t" // s
|
||||
"vmov.f32 s19, s16 \n\t" // s
|
||||
|
||||
"vadd.f32 q8, q0, q4 \n\t" // DST->M[m0-m3] = M[m0-m3] + s
|
||||
"vadd.f32 q9, q1, q4 \n\t" // DST->M[m4-m7] = M[m4-m7] + s
|
||||
"vadd.f32 q10, q2, q4 \n\t" // DST->M[m8-m11] = M[m8-m11] + s
|
||||
"vadd.f32 q11, q3, q4 \n\t" // DST->M[m12-m15] = M[m12-m15] + s
|
||||
|
||||
"vst1.32 {q8, q9}, [%0]! \n\t" // DST->M[m0-m7]
|
||||
"vst1.32 {q10, q11}, [%0] \n\t" // DST->M[m8-m15]
|
||||
:
|
||||
: "r"(dst), "r"(m), "r"(&scalar)
|
||||
: "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
inline void MathUtilNeon::addMatrix(const float* m1, const float* m2, float* dst)
|
||||
{
|
||||
asm volatile(
|
||||
"vld1.32 {q0, q1}, [%1]! \n\t" // M1[m0-m7]
|
||||
"vld1.32 {q2, q3}, [%1] \n\t" // M1[m8-m15]
|
||||
"vld1.32 {q8, q9}, [%2]! \n\t" // M2[m0-m7]
|
||||
"vld1.32 {q10, q11}, [%2] \n\t" // M2[m8-m15]
|
||||
|
||||
"vadd.f32 q12, q0, q8 \n\t" // DST->M[m0-m3] = M1[m0-m3] + M2[m0-m3]
|
||||
"vadd.f32 q13, q1, q9 \n\t" // DST->M[m4-m7] = M1[m4-m7] + M2[m4-m7]
|
||||
"vadd.f32 q14, q2, q10 \n\t" // DST->M[m8-m11] = M1[m8-m11] + M2[m8-m11]
|
||||
"vadd.f32 q15, q3, q11 \n\t" // DST->M[m12-m15] = M1[m12-m15] + M2[m12-m15]
|
||||
|
||||
"vst1.32 {q12, q13}, [%0]! \n\t" // DST->M[m0-m7]
|
||||
"vst1.32 {q14, q15}, [%0] \n\t" // DST->M[m8-m15]
|
||||
:
|
||||
: "r"(dst), "r"(m1), "r"(m2)
|
||||
: "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
inline void MathUtilNeon::subtractMatrix(const float* m1, const float* m2, float* dst)
|
||||
{
|
||||
asm volatile(
|
||||
"vld1.32 {q0, q1}, [%1]! \n\t" // M1[m0-m7]
|
||||
"vld1.32 {q2, q3}, [%1] \n\t" // M1[m8-m15]
|
||||
"vld1.32 {q8, q9}, [%2]! \n\t" // M2[m0-m7]
|
||||
"vld1.32 {q10, q11}, [%2] \n\t" // M2[m8-m15]
|
||||
|
||||
"vsub.f32 q12, q0, q8 \n\t" // DST->M[m0-m3] = M1[m0-m3] - M2[m0-m3]
|
||||
"vsub.f32 q13, q1, q9 \n\t" // DST->M[m4-m7] = M1[m4-m7] - M2[m4-m7]
|
||||
"vsub.f32 q14, q2, q10 \n\t" // DST->M[m8-m11] = M1[m8-m11] - M2[m8-m11]
|
||||
"vsub.f32 q15, q3, q11 \n\t" // DST->M[m12-m15] = M1[m12-m15] - M2[m12-m15]
|
||||
|
||||
"vst1.32 {q12, q13}, [%0]! \n\t" // DST->M[m0-m7]
|
||||
"vst1.32 {q14, q15}, [%0] \n\t" // DST->M[m8-m15]
|
||||
:
|
||||
: "r"(dst), "r"(m1), "r"(m2)
|
||||
: "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
inline void MathUtilNeon::multiplyMatrix(const float* m, float scalar, float* dst)
|
||||
{
|
||||
asm volatile(
|
||||
"vld1.32 {d0[0]}, [%2] \n\t" // M[m0-m7]
|
||||
"vld1.32 {q4-q5}, [%1]! \n\t" // M[m8-m15]
|
||||
"vld1.32 {q6-q7}, [%1] \n\t" // s
|
||||
|
||||
"vmul.f32 q8, q4, d0[0] \n\t" // DST->M[m0-m3] = M[m0-m3] * s
|
||||
"vmul.f32 q9, q5, d0[0] \n\t" // DST->M[m4-m7] = M[m4-m7] * s
|
||||
"vmul.f32 q10, q6, d0[0] \n\t" // DST->M[m8-m11] = M[m8-m11] * s
|
||||
"vmul.f32 q11, q7, d0[0] \n\t" // DST->M[m12-m15] = M[m12-m15] * s
|
||||
|
||||
"vst1.32 {q8-q9}, [%0]! \n\t" // DST->M[m0-m7]
|
||||
"vst1.32 {q10-q11}, [%0] \n\t" // DST->M[m8-m15]
|
||||
:
|
||||
: "r"(dst), "r"(m), "r"(&scalar)
|
||||
: "q0", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
inline void MathUtilNeon::multiplyMatrix(const float* m1, const float* m2, float* dst)
|
||||
{
|
||||
asm volatile(
|
||||
"vld1.32 {d16 - d19}, [%1]! \n\t" // M1[m0-m7]
|
||||
"vld1.32 {d20 - d23}, [%1] \n\t" // M1[m8-m15]
|
||||
"vld1.32 {d0 - d3}, [%2]! \n\t" // M2[m0-m7]
|
||||
"vld1.32 {d4 - d7}, [%2] \n\t" // M2[m8-m15]
|
||||
|
||||
"vmul.f32 q12, q8, d0[0] \n\t" // DST->M[m0-m3] = M1[m0-m3] * M2[m0]
|
||||
"vmul.f32 q13, q8, d2[0] \n\t" // DST->M[m4-m7] = M1[m4-m7] * M2[m4]
|
||||
"vmul.f32 q14, q8, d4[0] \n\t" // DST->M[m8-m11] = M1[m8-m11] * M2[m8]
|
||||
"vmul.f32 q15, q8, d6[0] \n\t" // DST->M[m12-m15] = M1[m12-m15] * M2[m12]
|
||||
|
||||
"vmla.f32 q12, q9, d0[1] \n\t" // DST->M[m0-m3] += M1[m0-m3] * M2[m1]
|
||||
"vmla.f32 q13, q9, d2[1] \n\t" // DST->M[m4-m7] += M1[m4-m7] * M2[m5]
|
||||
"vmla.f32 q14, q9, d4[1] \n\t" // DST->M[m8-m11] += M1[m8-m11] * M2[m9]
|
||||
"vmla.f32 q15, q9, d6[1] \n\t" // DST->M[m12-m15] += M1[m12-m15] * M2[m13]
|
||||
|
||||
"vmla.f32 q12, q10, d1[0] \n\t" // DST->M[m0-m3] += M1[m0-m3] * M2[m2]
|
||||
"vmla.f32 q13, q10, d3[0] \n\t" // DST->M[m4-m7] += M1[m4-m7] * M2[m6]
|
||||
"vmla.f32 q14, q10, d5[0] \n\t" // DST->M[m8-m11] += M1[m8-m11] * M2[m10]
|
||||
"vmla.f32 q15, q10, d7[0] \n\t" // DST->M[m12-m15] += M1[m12-m15] * M2[m14]
|
||||
|
||||
"vmla.f32 q12, q11, d1[1] \n\t" // DST->M[m0-m3] += M1[m0-m3] * M2[m3]
|
||||
"vmla.f32 q13, q11, d3[1] \n\t" // DST->M[m4-m7] += M1[m4-m7] * M2[m7]
|
||||
"vmla.f32 q14, q11, d5[1] \n\t" // DST->M[m8-m11] += M1[m8-m11] * M2[m11]
|
||||
"vmla.f32 q15, q11, d7[1] \n\t" // DST->M[m12-m15] += M1[m12-m15] * M2[m15]
|
||||
|
||||
"vst1.32 {d24 - d27}, [%0]! \n\t" // DST->M[m0-m7]
|
||||
"vst1.32 {d28 - d31}, [%0] \n\t" // DST->M[m8-m15]
|
||||
|
||||
: // output
|
||||
: "r"(dst), "r"(m1), "r"(m2) // input - note *value* of pointer doesn't change.
|
||||
: "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
|
||||
inline void MathUtilNeon::negateMatrix(const float* m, float* dst)
|
||||
{
|
||||
asm volatile(
|
||||
"vld1.32 {q0-q1}, [%1]! \n\t" // load m0-m7
|
||||
"vld1.32 {q2-q3}, [%1] \n\t" // load m8-m15
|
||||
|
||||
"vneg.f32 q4, q0 \n\t" // negate m0-m3
|
||||
"vneg.f32 q5, q1 \n\t" // negate m4-m7
|
||||
"vneg.f32 q6, q2 \n\t" // negate m8-m15
|
||||
"vneg.f32 q7, q3 \n\t" // negate m8-m15
|
||||
|
||||
"vst1.32 {q4-q5}, [%0]! \n\t" // store m0-m7
|
||||
"vst1.32 {q6-q7}, [%0] \n\t" // store m8-m15
|
||||
:
|
||||
: "r"(dst), "r"(m)
|
||||
: "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
inline void MathUtilNeon::transposeMatrix(const float* m, float* dst)
|
||||
{
|
||||
asm volatile(
|
||||
"vld4.32 {d0[0], d2[0], d4[0], d6[0]}, [%1]! \n\t" // DST->M[m0, m4, m8, m12] = M[m0-m3]
|
||||
"vld4.32 {d0[1], d2[1], d4[1], d6[1]}, [%1]! \n\t" // DST->M[m1, m5, m9, m12] = M[m4-m7]
|
||||
"vld4.32 {d1[0], d3[0], d5[0], d7[0]}, [%1]! \n\t" // DST->M[m2, m6, m10, m12] = M[m8-m11]
|
||||
"vld4.32 {d1[1], d3[1], d5[1], d7[1]}, [%1] \n\t" // DST->M[m3, m7, m11, m12] = M[m12-m15]
|
||||
|
||||
"vst1.32 {q0-q1}, [%0]! \n\t" // DST->M[m0-m7]
|
||||
"vst1.32 {q2-q3}, [%0] \n\t" // DST->M[m8-m15]
|
||||
:
|
||||
: "r"(dst), "r"(m)
|
||||
: "q0", "q1", "q2", "q3", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
inline void MathUtilNeon::transformVec4(const float* m, float x, float y, float z, float w, float* dst)
|
||||
{
|
||||
asm volatile(
|
||||
"vld1.32 {d0[0]}, [%1] \n\t" // V[x]
|
||||
"vld1.32 {d0[1]}, [%2] \n\t" // V[y]
|
||||
"vld1.32 {d1[0]}, [%3] \n\t" // V[z]
|
||||
"vld1.32 {d1[1]}, [%4] \n\t" // V[w]
|
||||
"vld1.32 {d18 - d21}, [%5]! \n\t" // M[m0-m7]
|
||||
"vld1.32 {d22 - d25}, [%5] \n\t" // M[m8-m15]
|
||||
|
||||
"vmul.f32 q13, q9, d0[0] \n\t" // DST->V = M[m0-m3] * V[x]
|
||||
"vmla.f32 q13, q10, d0[1] \n\t" // DST->V += M[m4-m7] * V[y]
|
||||
"vmla.f32 q13, q11, d1[0] \n\t" // DST->V += M[m8-m11] * V[z]
|
||||
"vmla.f32 q13, q12, d1[1] \n\t" // DST->V += M[m12-m15] * V[w]
|
||||
|
||||
"vst1.32 {d26}, [%0]! \n\t" // DST->V[x, y]
|
||||
"vst1.32 {d27[0]}, [%0] \n\t" // DST->V[z]
|
||||
:
|
||||
: "r"(dst), "r"(&x), "r"(&y), "r"(&z), "r"(&w), "r"(m)
|
||||
: "q0", "q9", "q10","q11", "q12", "q13", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
inline void MathUtilNeon::transformVec4(const float* m, const float* v, float* dst)
|
||||
{
|
||||
asm volatile
|
||||
(
|
||||
"vld1.32 {d0, d1}, [%1] \n\t" // V[x, y, z, w]
|
||||
"vld1.32 {d18 - d21}, [%2]! \n\t" // M[m0-m7]
|
||||
"vld1.32 {d22 - d25}, [%2] \n\t" // M[m8-m15]
|
||||
|
||||
"vmul.f32 q13, q9, d0[0] \n\t" // DST->V = M[m0-m3] * V[x]
|
||||
"vmla.f32 q13, q10, d0[1] \n\t" // DST->V = M[m4-m7] * V[y]
|
||||
"vmla.f32 q13, q11, d1[0] \n\t" // DST->V = M[m8-m11] * V[z]
|
||||
"vmla.f32 q13, q12, d1[1] \n\t" // DST->V = M[m12-m15] * V[w]
|
||||
|
||||
"vst1.32 {d26, d27}, [%0] \n\t" // DST->V
|
||||
:
|
||||
: "r"(dst), "r"(v), "r"(m)
|
||||
: "q0", "q9", "q10","q11", "q12", "q13", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
inline void MathUtilNeon::crossVec3(const float* v1, const float* v2, float* dst)
|
||||
{
|
||||
asm volatile(
|
||||
"vld1.32 {d1[1]}, [%1] \n\t" //
|
||||
"vld1.32 {d0}, [%2] \n\t" //
|
||||
"vmov.f32 s2, s1 \n\t" // q0 = (v1y, v1z, v1z, v1x)
|
||||
|
||||
"vld1.32 {d2[1]}, [%3] \n\t" //
|
||||
"vld1.32 {d3}, [%4] \n\t" //
|
||||
"vmov.f32 s4, s7 \n\t" // q1 = (v2z, v2x, v2y, v2z)
|
||||
|
||||
"vmul.f32 d4, d0, d2 \n\t" // x = v1y * v2z, y = v1z * v2x
|
||||
"vmls.f32 d4, d1, d3 \n\t" // x -= v1z * v2y, y-= v1x - v2z
|
||||
|
||||
"vmul.f32 d5, d3, d1[1] \n\t" // z = v1x * v2y
|
||||
"vmls.f32 d5, d0, d2[1] \n\t" // z-= v1y * vx
|
||||
|
||||
"vst1.32 {d4}, [%0]! \n\t" // V[x, y]
|
||||
"vst1.32 {d5[0]}, [%0] \n\t" // V[z]
|
||||
:
|
||||
: "r"(dst), "r"(v1), "r"((v1+1)), "r"(v2), "r"((v2+1))
|
||||
: "q0", "q1", "q2", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
inline void MathUtilNeon::transformVertices(ax::V3F_C4B_T2F* dst, const ax::V3F_C4B_T2F* src, size_t count, const ax::Mat4& transform)
|
||||
{
|
||||
auto end = dst + count;
|
||||
|
||||
// Load matrix
|
||||
float32x4_t mc0 = vld1q_f32(transform.m);
|
||||
float32x4_t mc1 = vld1q_f32(transform.m + 4);
|
||||
float32x4_t mc2 = vld1q_f32(transform.m + 8);
|
||||
float32x4_t mc3 = vld1q_f32(transform.m + 12);
|
||||
|
||||
// Process 4 vertices at a time
|
||||
auto end4 = dst + count / 4 * 4;
|
||||
while (dst < end4)
|
||||
{
|
||||
// Load 4 vertices. Note that color will also get loaded into w
|
||||
float32x2_t xy0 = vld1_f32(&src[0].vertices.x);
|
||||
float32x2_t zw0 = vld1_f32(&src[0].vertices.z);
|
||||
float32x2_t uv0 = vld1_f32(&src[0].texCoords.u);
|
||||
float32x2_t xy1 = vld1_f32(&src[1].vertices.x);
|
||||
float32x2_t zw1 = vld1_f32(&src[1].vertices.z);
|
||||
float32x2_t uv1 = vld1_f32(&src[1].texCoords.u);
|
||||
float32x2_t xy2 = vld1_f32(&src[2].vertices.x);
|
||||
float32x2_t zw2 = vld1_f32(&src[2].vertices.z);
|
||||
float32x2_t uv2 = vld1_f32(&src[2].texCoords.u);
|
||||
float32x2_t xy3 = vld1_f32(&src[3].vertices.x);
|
||||
float32x2_t zw3 = vld1_f32(&src[3].vertices.z);
|
||||
float32x2_t uv3 = vld1_f32(&src[3].texCoords.u);
|
||||
|
||||
// Multiply x by column 0
|
||||
float32x4_t r0 = vmulq_lane_f32(mc0, xy0, 0);
|
||||
float32x4_t r1 = vmulq_lane_f32(mc0, xy1, 0);
|
||||
float32x4_t r2 = vmulq_lane_f32(mc0, xy2, 0);
|
||||
float32x4_t r3 = vmulq_lane_f32(mc0, xy3, 0);
|
||||
|
||||
// Multiply y by column 1 and add to result
|
||||
r0 = vmlaq_lane_f32(r0, mc1, xy0, 1);
|
||||
r1 = vmlaq_lane_f32(r1, mc1, xy1, 1);
|
||||
r2 = vmlaq_lane_f32(r2, mc1, xy2, 1);
|
||||
r3 = vmlaq_lane_f32(r3, mc1, xy3, 1);
|
||||
|
||||
// Multiply z by column 2 and add to result
|
||||
r0 = vmlaq_lane_f32(r0, mc2, zw0, 0);
|
||||
r1 = vmlaq_lane_f32(r1, mc2, zw1, 0);
|
||||
r2 = vmlaq_lane_f32(r2, mc2, zw2, 0);
|
||||
r3 = vmlaq_lane_f32(r3, mc2, zw3, 0);
|
||||
|
||||
// Add column 3
|
||||
r0 = vaddq_f32(r0, mc3);
|
||||
r1 = vaddq_f32(r1, mc3);
|
||||
r2 = vaddq_f32(r2, mc3);
|
||||
r3 = vaddq_f32(r3, mc3);
|
||||
|
||||
// Set color
|
||||
r0 = vsetq_lane_f32(vget_lane_f32(zw0, 1), r0, 3);
|
||||
r1 = vsetq_lane_f32(vget_lane_f32(zw1, 1), r1, 3);
|
||||
r2 = vsetq_lane_f32(vget_lane_f32(zw2, 1), r2, 3);
|
||||
r3 = vsetq_lane_f32(vget_lane_f32(zw3, 1), r3, 3);
|
||||
|
||||
// Store result
|
||||
vst1q_f32(&dst[0].vertices.x, r0);
|
||||
vst1_f32(&dst[0].texCoords.u, uv0);
|
||||
vst1q_f32(&dst[1].vertices.x, r1);
|
||||
vst1_f32(&dst[1].texCoords.u, uv1);
|
||||
vst1q_f32(&dst[2].vertices.x, r2);
|
||||
vst1_f32(&dst[2].texCoords.u, uv2);
|
||||
vst1q_f32(&dst[3].vertices.x, r3);
|
||||
vst1_f32(&dst[3].texCoords.u, uv3);
|
||||
|
||||
dst += 4;
|
||||
src += 4;
|
||||
}
|
||||
|
||||
// Process remaining vertices
|
||||
while (dst < end)
|
||||
{
|
||||
// Load vertex
|
||||
float32x2_t xy = vld1_f32(&src->vertices.x);
|
||||
float32x2_t zw = vld1_f32(&src->vertices.z);
|
||||
float32x2_t uv = vld1_f32(&src->texCoords.u);
|
||||
|
||||
// Multiply x by column 0
|
||||
float32x4_t r = vmulq_lane_f32(mc0, xy, 0);
|
||||
// Multiply y by column 1 and add to result
|
||||
r = vmlaq_lane_f32(r, mc1, xy, 1);
|
||||
// Multiply z by column 2 and add to result
|
||||
r = vmlaq_lane_f32(r, mc2, zw, 0);
|
||||
// Add column 3
|
||||
r = vaddq_f32(r, mc3);
|
||||
|
||||
// Set color
|
||||
r = vsetq_lane_f32(vget_lane_f32(zw, 1), r, 3);
|
||||
|
||||
// Store result
|
||||
vst1q_f32(&dst->vertices.x, r);
|
||||
vst1_f32(&dst->texCoords.u, uv);
|
||||
|
||||
++dst;
|
||||
++src;
|
||||
}
|
||||
}
|
||||
|
||||
NS_AX_MATH_END
|
||||
|
|
|
@ -1,398 +0,0 @@
|
|||
/**
|
||||
Copyright 2013 BlackBerry Inc.
|
||||
Copyright (c) 2019-present Axmol Engine contributors (see AUTHORS.md).
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Original file from GamePlay3D: http://gameplay3d.org
|
||||
|
||||
This file was modified to fit the cocos2d-x project
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include "base/Types.h"
|
||||
|
||||
NS_AX_MATH_BEGIN
|
||||
|
||||
class MathUtilNeon64
|
||||
{
|
||||
public:
|
||||
inline static void addMatrix(const float* m, float scalar, float* dst);
|
||||
inline static void addMatrix(const float* m1, const float* m2, float* dst);
|
||||
inline static void subtractMatrix(const float* m1, const float* m2, float* dst);
|
||||
inline static void multiplyMatrix(const float* m, float scalar, float* dst);
|
||||
inline static void multiplyMatrix(const float* m1, const float* m2, float* dst);
|
||||
|
||||
inline static void negateMatrix(const float* m, float* dst);
|
||||
inline static void transposeMatrix(const float* m, float* dst);
|
||||
|
||||
inline static void transformVec4(const float* m, float x, float y, float z, float w, float* dst);
|
||||
inline static void transformVec4(const float* m, const float* v, float* dst);
|
||||
inline static void crossVec3(const float* v1, const float* v2, float* dst);
|
||||
|
||||
inline static void transformVertices(V3F_C4B_T2F* dst, const V3F_C4B_T2F* src, size_t count, const Mat4& transform);
|
||||
inline static void transformIndices(uint16_t* dst, const uint16_t* src, size_t count, uint16_t offset);
|
||||
};
|
||||
|
||||
inline void MathUtilNeon64::addMatrix(const float* m, float scalar, float* dst)
|
||||
{
|
||||
asm volatile(
|
||||
"ld4 {v0.4s, v1.4s, v2.4s, v3.4s}, [%1] \n\t" // M[m0-m7] M[m8-m15]
|
||||
"ld1r {v4.4s}, [%2] \n\t" //ssss
|
||||
|
||||
"fadd v8.4s, v0.4s, v4.4s \n\t" // DST->M[m0-m3] = M[m0-m3] + s
|
||||
"fadd v9.4s, v1.4s, v4.4s \n\t" // DST->M[m4-m7] = M[m4-m7] + s
|
||||
"fadd v10.4s, v2.4s, v4.4s \n\t" // DST->M[m8-m11] = M[m8-m11] + s
|
||||
"fadd v11.4s, v3.4s, v4.4s \n\t" // DST->M[m12-m15] = M[m12-m15] + s
|
||||
|
||||
"st4 {v8.4s, v9.4s, v10.4s, v11.4s}, [%0] \n\t" // Result in V9
|
||||
:
|
||||
: "r"(dst), "r"(m), "r"(&scalar)
|
||||
: "v0", "v1", "v2", "v3", "v4", "v8", "v9", "v10", "v11", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
inline void MathUtilNeon64::addMatrix(const float* m1, const float* m2, float* dst)
|
||||
{
|
||||
asm volatile(
|
||||
"ld4 {v0.4s, v1.4s, v2.4s, v3.4s}, [%1] \n\t" // M1[m0-m7] M1[m8-m15]
|
||||
"ld4 {v8.4s, v9.4s, v10.4s, v11.4s}, [%2] \n\t" // M2[m0-m7] M2[m8-m15]
|
||||
|
||||
"fadd v12.4s, v0.4s, v8.4s \n\t" // DST->M[m0-m3] = M1[m0-m3] + M2[m0-m3]
|
||||
"fadd v13.4s, v1.4s, v9.4s \n\t" // DST->M[m4-m7] = M1[m4-m7] + M2[m4-m7]
|
||||
"fadd v14.4s, v2.4s, v10.4s \n\t" // DST->M[m8-m11] = M1[m8-m11] + M2[m8-m11]
|
||||
"fadd v15.4s, v3.4s, v11.4s \n\t" // DST->M[m12-m15] = M1[m12-m15] + M2[m12-m15]
|
||||
|
||||
"st4 {v12.4s, v13.4s, v14.4s, v15.4s}, [%0] \n\t" // DST->M[m0-m7] DST->M[m8-m15]
|
||||
:
|
||||
: "r"(dst), "r"(m1), "r"(m2)
|
||||
: "v0", "v1", "v2", "v3", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
inline void MathUtilNeon64::subtractMatrix(const float* m1, const float* m2, float* dst)
|
||||
{
|
||||
asm volatile(
|
||||
"ld4 {v0.4s, v1.4s, v2.4s, v3.4s}, [%1] \n\t" // M1[m0-m7] M1[m8-m15]
|
||||
"ld4 {v8.4s, v9.4s, v10.4s, v11.4s}, [%2] \n\t" // M2[m0-m7] M2[m8-m15]
|
||||
|
||||
"fsub v12.4s, v0.4s, v8.4s \n\t" // DST->M[m0-m3] = M1[m0-m3] - M2[m0-m3]
|
||||
"fsub v13.4s, v1.4s, v9.4s \n\t" // DST->M[m4-m7] = M1[m4-m7] - M2[m4-m7]
|
||||
"fsub v14.4s, v2.4s, v10.4s \n\t" // DST->M[m8-m11] = M1[m8-m11] - M2[m8-m11]
|
||||
"fsub v15.4s, v3.4s, v11.4s \n\t" // DST->M[m12-m15] = M1[m12-m15] - M2[m12-m15]
|
||||
|
||||
"st4 {v12.4s, v13.4s, v14.4s, v15.4s}, [%0] \n\t" // DST->M[m0-m7] DST->M[m8-m15]
|
||||
:
|
||||
: "r"(dst), "r"(m1), "r"(m2)
|
||||
: "v0", "v1", "v2", "v3", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
inline void MathUtilNeon64::multiplyMatrix(const float* m, float scalar, float* dst)
|
||||
{
|
||||
asm volatile(
|
||||
"ld1 {v0.s}[0], [%2] \n\t" //s
|
||||
"ld4 {v4.4s, v5.4s, v6.4s, v7.4s}, [%1] \n\t" //M[m0-m7] M[m8-m15]
|
||||
|
||||
"fmul v8.4s, v4.4s, v0.s[0] \n\t" // DST->M[m0-m3] = M[m0-m3] * s
|
||||
"fmul v9.4s, v5.4s, v0.s[0] \n\t" // DST->M[m4-m7] = M[m4-m7] * s
|
||||
"fmul v10.4s, v6.4s, v0.s[0] \n\t" // DST->M[m8-m11] = M[m8-m11] * s
|
||||
"fmul v11.4s, v7.4s, v0.s[0] \n\t" // DST->M[m12-m15] = M[m12-m15] * s
|
||||
|
||||
"st4 {v8.4s, v9.4s, v10.4s, v11.4s}, [%0] \n\t" // DST->M[m0-m7] DST->M[m8-m15]
|
||||
:
|
||||
: "r"(dst), "r"(m), "r"(&scalar)
|
||||
: "v0", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
inline void MathUtilNeon64::multiplyMatrix(const float* m1, const float* m2, float* dst)
|
||||
{
|
||||
asm volatile(
|
||||
"ld1 {v8.4s, v9.4s, v10.4s, v11.4s}, [%1] \n\t" // M1[m0-m7] M1[m8-m15] M2[m0-m7] M2[m8-m15]
|
||||
"ld4 {v0.4s, v1.4s, v2.4s, v3.4s}, [%2] \n\t" // M2[m0-m15]
|
||||
|
||||
|
||||
"fmul v12.4s, v8.4s, v0.s[0] \n\t" // DST->M[m0-m3] = M1[m0-m3] * M2[m0]
|
||||
"fmul v13.4s, v8.4s, v0.s[1] \n\t" // DST->M[m4-m7] = M1[m4-m7] * M2[m4]
|
||||
"fmul v14.4s, v8.4s, v0.s[2] \n\t" // DST->M[m8-m11] = M1[m8-m11] * M2[m8]
|
||||
"fmul v15.4s, v8.4s, v0.s[3] \n\t" // DST->M[m12-m15] = M1[m12-m15] * M2[m12]
|
||||
|
||||
"fmla v12.4s, v9.4s, v1.s[0] \n\t" // DST->M[m0-m3] += M1[m0-m3] * M2[m1]
|
||||
"fmla v13.4s, v9.4s, v1.s[1] \n\t" // DST->M[m4-m7] += M1[m4-m7] * M2[m5]
|
||||
"fmla v14.4s, v9.4s, v1.s[2] \n\t" // DST->M[m8-m11] += M1[m8-m11] * M2[m9]
|
||||
"fmla v15.4s, v9.4s, v1.s[3] \n\t" // DST->M[m12-m15] += M1[m12-m15] * M2[m13]
|
||||
|
||||
"fmla v12.4s, v10.4s, v2.s[0] \n\t" // DST->M[m0-m3] += M1[m0-m3] * M2[m2]
|
||||
"fmla v13.4s, v10.4s, v2.s[1] \n\t" // DST->M[m4-m7] += M1[m4-m7] * M2[m6]
|
||||
"fmla v14.4s, v10.4s, v2.s[2] \n\t" // DST->M[m8-m11] += M1[m8-m11] * M2[m10]
|
||||
"fmla v15.4s, v10.4s, v2.s[3] \n\t" // DST->M[m12-m15] += M1[m12-m15] * M2[m14]
|
||||
|
||||
"fmla v12.4s, v11.4s, v3.s[0] \n\t" // DST->M[m0-m3] += M1[m0-m3] * M2[m3]
|
||||
"fmla v13.4s, v11.4s, v3.s[1] \n\t" // DST->M[m4-m7] += M1[m4-m7] * M2[m7]
|
||||
"fmla v14.4s, v11.4s, v3.s[2] \n\t" // DST->M[m8-m11] += M1[m8-m11] * M2[m11]
|
||||
"fmla v15.4s, v11.4s, v3.s[3] \n\t" // DST->M[m12-m15] += M1[m12-m15] * M2[m15]
|
||||
|
||||
"st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [%0] \n\t" // DST->M[m0-m7]// DST->M[m8-m15]
|
||||
|
||||
: // output
|
||||
: "r"(dst), "r"(m1), "r"(m2) // input - note *value* of pointer doesn't change.
|
||||
: "memory", "v0", "v1", "v2", "v3", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15"
|
||||
);
|
||||
}
|
||||
|
||||
inline void MathUtilNeon64::negateMatrix(const float* m, float* dst)
|
||||
{
|
||||
asm volatile(
|
||||
"ld4 {v0.4s, v1.4s, v2.4s, v3.4s}, [%1] \n\t" // load m0-m7 load m8-m15
|
||||
|
||||
"fneg v4.4s, v0.4s \n\t" // negate m0-m3
|
||||
"fneg v5.4s, v1.4s \n\t" // negate m4-m7
|
||||
"fneg v6.4s, v2.4s \n\t" // negate m8-m15
|
||||
"fneg v7.4s, v3.4s \n\t" // negate m8-m15
|
||||
|
||||
"st4 {v4.4s, v5.4s, v6.4s, v7.4s}, [%0] \n\t" // store m0-m7 store m8-m15
|
||||
:
|
||||
: "r"(dst), "r"(m)
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
inline void MathUtilNeon64::transposeMatrix(const float* m, float* dst)
|
||||
{
|
||||
asm volatile(
|
||||
"ld4 {v0.4s, v1.4s, v2.4s, v3.4s}, [%1] \n\t" // DST->M[m0, m4, m8, m12] = M[m0-m3]
|
||||
//DST->M[m1, m5, m9, m12] = M[m4-m7]
|
||||
"st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [%0] \n\t"
|
||||
:
|
||||
: "r"(dst), "r"(m)
|
||||
: "v0", "v1", "v2", "v3", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
inline void MathUtilNeon64::transformVec4(const float* m, float x, float y, float z, float w, float* dst)
|
||||
{
|
||||
asm volatile(
|
||||
"ld1 {v0.s}[0], [%1] \n\t" // V[x]
|
||||
"ld1 {v0.s}[1], [%2] \n\t" // V[y]
|
||||
"ld1 {v0.s}[2], [%3] \n\t" // V[z]
|
||||
"ld1 {v0.s}[3], [%4] \n\t" // V[w]
|
||||
"ld1 {v9.4s, v10.4s, v11.4s, v12.4s}, [%5] \n\t" // M[m0-m7] M[m8-m15]
|
||||
|
||||
|
||||
"fmul v13.4s, v9.4s, v0.s[0] \n\t" // DST->V = M[m0-m3] * V[x]
|
||||
"fmla v13.4s, v10.4s, v0.s[1] \n\t" // DST->V += M[m4-m7] * V[y]
|
||||
"fmla v13.4s, v11.4s, v0.s[2] \n\t" // DST->V += M[m8-m11] * V[z]
|
||||
"fmla v13.4s, v12.4s, v0.s[3] \n\t" // DST->V += M[m12-m15] * V[w]
|
||||
|
||||
//"st1 {v13.4s}, [%0] \n\t" // DST->V[x, y] // DST->V[z]
|
||||
"st1 {v13.2s}, [%0], 8 \n\t"
|
||||
"st1 {v13.s}[2], [%0] \n\t"
|
||||
: "+r"(dst)
|
||||
: "r"(&x), "r"(&y), "r"(&z), "r"(&w), "r"(m)
|
||||
: "v0", "v9", "v10","v11", "v12", "v13", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
inline void MathUtilNeon64::transformVec4(const float* m, const float* v, float* dst)
|
||||
{
|
||||
asm volatile
|
||||
(
|
||||
"ld1 {v0.4s}, [%1] \n\t" // V[x, y, z, w]
|
||||
"ld1 {v9.4s, v10.4s, v11.4s, v12.4s}, [%2] \n\t" // M[m0-m7] M[m8-m15]
|
||||
|
||||
"fmul v13.4s, v9.4s, v0.s[0] \n\t" // DST->V = M[m0-m3] * V[x]
|
||||
"fmla v13.4s, v10.4s, v0.s[1] \n\t" // DST->V = M[m4-m7] * V[y]
|
||||
"fmla v13.4s, v11.4s, v0.s[2] \n\t" // DST->V = M[m8-m11] * V[z]
|
||||
"fmla v13.4s, v12.4s, v0.s[3] \n\t" // DST->V = M[m12-m15] * V[w]
|
||||
|
||||
"st1 {v13.4s}, [%0] \n\t" // DST->V
|
||||
:
|
||||
: "r"(dst), "r"(v), "r"(m)
|
||||
: "v0", "v9", "v10","v11", "v12", "v13", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
inline void MathUtilNeon64::crossVec3(const float* v1, const float* v2, float* dst)
|
||||
{
|
||||
asm volatile(
|
||||
"ld1 {v0.2s}, [%2] \n\t"
|
||||
"ld1 {v0.s}[2], [%1] \n\t"
|
||||
"mov v0.s[3], v0.s[0] \n\t" // q0 = (v1y, v1z, v1x, v1x)
|
||||
|
||||
"ld1 {v1.4s}, [%3] \n\t"
|
||||
"mov v1.s[3], v1.s[0] \n\t" // q1 = (v2x, v2y, v2z, v2x)
|
||||
|
||||
"fmul v2.4s, v0.4s, v1.4s \n\t" // x = v1y * v2z, y = v1z * v2x
|
||||
|
||||
|
||||
"mov v0.s[0], v0.s[1] \n\t"
|
||||
"mov v0.s[1], v0.s[2] \n\t"
|
||||
"mov v0.s[2], v0.s[3] \n\t"
|
||||
|
||||
"mov v1.s[3], v1.s[2] \n\t"
|
||||
|
||||
"fmul v0.4s, v0.4s, v1.4s \n\t"
|
||||
|
||||
"mov v0.s[3], v0.s[1] \n\t"
|
||||
"mov v0.s[1], v0.s[2] \n\t"
|
||||
"mov v0.s[2], v0.s[0] \n\t"
|
||||
|
||||
"fsub v2.4s, v0.4s, v2.4s \n\t"
|
||||
|
||||
"mov v2.s[0], v2.s[1] \n\t"
|
||||
"mov v2.s[1], v2.s[2] \n\t"
|
||||
"mov v2.s[2], v2.s[3] \n\t"
|
||||
|
||||
"st1 {v2.2s}, [%0], 8 \n\t" // V[x, y]
|
||||
"st1 {v2.s}[2], [%0] \n\t" // V[z]
|
||||
: "+r"(dst)
|
||||
: "r"(v1), "r"((v1+1)), "r"(v2), "r"((v2+1))
|
||||
: "v0", "v1", "v2", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
inline void MathUtilNeon64::transformVertices(V3F_C4B_T2F* dst, const V3F_C4B_T2F* src, size_t count, const Mat4& transform)
|
||||
{
|
||||
auto end = dst + count;
|
||||
|
||||
// Load matrix
|
||||
float32x4x4_t m = vld1q_f32_x4(transform.m);
|
||||
|
||||
// Process 4 vertices at a time if there's enough data
|
||||
auto end4 = dst + count / 4 * 4;
|
||||
while (dst < end4)
|
||||
{
|
||||
// Do this for each vertex
|
||||
// dst->vertices.x = pos.x * m[0] + pos.y * m[4] + pos.z * m[8] + m[12];
|
||||
// dst->vertices.y = pos.x * m[1] + pos.y * m[5] + pos.z * m[9] + m[13];
|
||||
// dst->vertices.z = pos.x * m[2] + pos.y * m[6] + pos.z * m[10] + m[14];
|
||||
|
||||
// First, load each vertex, multiply x by column 0 and add to column 3
|
||||
// Note: since we're reading 4 floats it will load color bytes into v.w
|
||||
float32x4_t v0 = vld1q_f32(&src[0].vertices.x);
|
||||
float32x4_t r0 = vmlaq_laneq_f32(m.val[3], m.val[0], v0, 0);
|
||||
float32x4_t v1 = vld1q_f32(&src[1].vertices.x);
|
||||
float32x4_t r1 = vmlaq_laneq_f32(m.val[3], m.val[0], v1, 0);
|
||||
float32x4_t v2 = vld1q_f32(&src[2].vertices.x);
|
||||
float32x4_t r2 = vmlaq_laneq_f32(m.val[3], m.val[0], v2, 0);
|
||||
float32x4_t v3 = vld1q_f32(&src[3].vertices.x);
|
||||
float32x4_t r3 = vmlaq_laneq_f32(m.val[3], m.val[0], v3, 0);
|
||||
|
||||
// Load texCoords
|
||||
float32x2_t uv0 = vld1_f32(&src[0].texCoords.u);
|
||||
float32x2_t uv1 = vld1_f32(&src[1].texCoords.u);
|
||||
float32x2_t uv2 = vld1_f32(&src[2].texCoords.u);
|
||||
float32x2_t uv3 = vld1_f32(&src[3].texCoords.u);
|
||||
|
||||
// Multiply y by column 1 and add to result
|
||||
r0 = vmlaq_laneq_f32(r0, m.val[1], v0, 1);
|
||||
r1 = vmlaq_laneq_f32(r1, m.val[1], v1, 1);
|
||||
r2 = vmlaq_laneq_f32(r2, m.val[1], v2, 1);
|
||||
r3 = vmlaq_laneq_f32(r3, m.val[1], v3, 1);
|
||||
|
||||
// Multiply z by column 2 and add to result
|
||||
r0 = vmlaq_laneq_f32(r0, m.val[2], v0, 2);
|
||||
r1 = vmlaq_laneq_f32(r1, m.val[2], v1, 2);
|
||||
r2 = vmlaq_laneq_f32(r2, m.val[2], v2, 2);
|
||||
r3 = vmlaq_laneq_f32(r3, m.val[2], v3, 2);
|
||||
|
||||
// Set w to loaded color
|
||||
r0 = vsetq_lane_f32(vgetq_lane_f32(v0, 3), r0, 3);
|
||||
r1 = vsetq_lane_f32(vgetq_lane_f32(v1, 3), r1, 3);
|
||||
r2 = vsetq_lane_f32(vgetq_lane_f32(v2, 3), r2, 3);
|
||||
r3 = vsetq_lane_f32(vgetq_lane_f32(v3, 3), r3, 3);
|
||||
|
||||
// Store result
|
||||
vst1q_f32(&dst[0].vertices.x, r0);
|
||||
vst1_f32(&dst[0].texCoords.u, uv0);
|
||||
vst1q_f32(&dst[1].vertices.x, r1);
|
||||
vst1_f32(&dst[1].texCoords.u, uv1);
|
||||
vst1q_f32(&dst[2].vertices.x, r2);
|
||||
vst1_f32(&dst[2].texCoords.u, uv2);
|
||||
vst1q_f32(&dst[3].vertices.x, r3);
|
||||
vst1_f32(&dst[3].texCoords.u, uv3);
|
||||
|
||||
dst += 4;
|
||||
src += 4;
|
||||
}
|
||||
|
||||
// Process remaining vertices one by one
|
||||
while (dst < end)
|
||||
{
|
||||
float32x4_t v = vld1q_f32(&src->vertices.x);
|
||||
float32x4_t r = vmlaq_laneq_f32(m.val[3], m.val[0], v, 0);
|
||||
r = vmlaq_laneq_f32(r, m.val[1], v, 1);
|
||||
r = vmlaq_laneq_f32(r, m.val[2], v, 2);
|
||||
r = vsetq_lane_f32(vgetq_lane_f32(v, 3), r, 3);
|
||||
float32x2_t uv = vld1_f32(&src->texCoords.u);
|
||||
vst1q_f32(&dst->vertices.x, r);
|
||||
vst1_f32(&dst->texCoords.u, uv);
|
||||
|
||||
++dst;
|
||||
++src;
|
||||
}
|
||||
}
|
||||
|
||||
inline void MathUtilNeon64::transformIndices(uint16_t* dst, const uint16_t* src, size_t count, uint16_t offset)
|
||||
{
|
||||
auto end = dst + count;
|
||||
auto off = vdupq_n_u16(offset);
|
||||
|
||||
if (count < 8)
|
||||
goto LEFTOVER;
|
||||
|
||||
// Process 32 indices at a time if there's enough data
|
||||
while (count >= 32)
|
||||
{
|
||||
// Load 32 indices
|
||||
uint16x8x4_t v = vld1q_u16_x4(src);
|
||||
|
||||
// Add offset
|
||||
v.val[0] = vaddq_u16(v.val[0], off);
|
||||
v.val[1] = vaddq_u16(v.val[1], off);
|
||||
v.val[2] = vaddq_u16(v.val[2], off);
|
||||
v.val[3] = vaddq_u16(v.val[3], off);
|
||||
|
||||
// Store result
|
||||
vst1q_u16_x4(dst, v);
|
||||
|
||||
dst += 32;
|
||||
src += 32;
|
||||
count -= 32;
|
||||
}
|
||||
|
||||
// Process 8 indices at a time if there's enough data
|
||||
while (count >= 8)
|
||||
{
|
||||
uint16x8_t v = vld1q_u16(src);
|
||||
v = vaddq_u16(v, off);
|
||||
vst1q_u16(dst, v);
|
||||
|
||||
dst += 8;
|
||||
src += 8;
|
||||
count -= 8;
|
||||
}
|
||||
|
||||
LEFTOVER:
|
||||
// Process remaining indices one by one
|
||||
while (count > 0)
|
||||
{
|
||||
*dst = *src + offset;
|
||||
++dst;
|
||||
++src;
|
||||
--count;
|
||||
}
|
||||
}
|
||||
|
||||
NS_AX_MATH_END
|
|
@ -1,157 +1,276 @@
|
|||
/****************************************************************************
|
||||
Copyright (c) 2010-2012 cocos2d-x.org
|
||||
Copyright (c) 2013-2017 Chukong Technologies
|
||||
Copyright (c) 2017-2018 Xiamen Yaji Software Co., Ltd.
|
||||
Copyright (c) 2019-present Axmol Engine contributors (see AUTHORS.md).
|
||||
|
||||
https://axmol.dev/
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
****************************************************************************/
|
||||
|
||||
NS_AX_MATH_BEGIN
|
||||
|
||||
#ifdef AX_USE_SSE
|
||||
#ifdef AX_SSE_INTRINSICS
|
||||
|
||||
void MathUtil::addMatrix(const __m128 m[4], float scalar, __m128 dst[4])
|
||||
struct MathUtilSSE
|
||||
{
|
||||
__m128 s = _mm_set1_ps(scalar);
|
||||
dst[0] = _mm_add_ps(m[0], s);
|
||||
dst[1] = _mm_add_ps(m[1], s);
|
||||
dst[2] = _mm_add_ps(m[2], s);
|
||||
dst[3] = _mm_add_ps(m[3], s);
|
||||
}
|
||||
|
||||
void MathUtil::addMatrix(const __m128 m1[4], const __m128 m2[4], __m128 dst[4])
|
||||
{
|
||||
dst[0] = _mm_add_ps(m1[0], m2[0]);
|
||||
dst[1] = _mm_add_ps(m1[1], m2[1]);
|
||||
dst[2] = _mm_add_ps(m1[2], m2[2]);
|
||||
dst[3] = _mm_add_ps(m1[3], m2[3]);
|
||||
}
|
||||
static void addMatrix(const __m128 m[4], float scalar, __m128 dst[4])
|
||||
{
|
||||
__m128 s = _mm_set1_ps(scalar);
|
||||
dst[0] = _mm_add_ps(m[0], s);
|
||||
dst[1] = _mm_add_ps(m[1], s);
|
||||
dst[2] = _mm_add_ps(m[2], s);
|
||||
dst[3] = _mm_add_ps(m[3], s);
|
||||
}
|
||||
|
||||
void MathUtil::subtractMatrix(const __m128 m1[4], const __m128 m2[4], __m128 dst[4])
|
||||
{
|
||||
dst[0] = _mm_sub_ps(m1[0], m2[0]);
|
||||
dst[1] = _mm_sub_ps(m1[1], m2[1]);
|
||||
dst[2] = _mm_sub_ps(m1[2], m2[2]);
|
||||
dst[3] = _mm_sub_ps(m1[3], m2[3]);
|
||||
}
|
||||
static void addMatrix(const __m128 m1[4], const __m128 m2[4], __m128 dst[4])
|
||||
{
|
||||
dst[0] = _mm_add_ps(m1[0], m2[0]);
|
||||
dst[1] = _mm_add_ps(m1[1], m2[1]);
|
||||
dst[2] = _mm_add_ps(m1[2], m2[2]);
|
||||
dst[3] = _mm_add_ps(m1[3], m2[3]);
|
||||
}
|
||||
|
||||
void MathUtil::multiplyMatrix(const __m128 m[4], float scalar, __m128 dst[4])
|
||||
{
|
||||
__m128 s = _mm_set1_ps(scalar);
|
||||
dst[0] = _mm_mul_ps(m[0], s);
|
||||
dst[1] = _mm_mul_ps(m[1], s);
|
||||
dst[2] = _mm_mul_ps(m[2], s);
|
||||
dst[3] = _mm_mul_ps(m[3], s);
|
||||
}
|
||||
static void subtractMatrix(const __m128 m1[4], const __m128 m2[4], __m128 dst[4])
|
||||
{
|
||||
dst[0] = _mm_sub_ps(m1[0], m2[0]);
|
||||
dst[1] = _mm_sub_ps(m1[1], m2[1]);
|
||||
dst[2] = _mm_sub_ps(m1[2], m2[2]);
|
||||
dst[3] = _mm_sub_ps(m1[3], m2[3]);
|
||||
}
|
||||
|
||||
void MathUtil::multiplyMatrix(const __m128 m1[4], const __m128 m2[4], __m128 dst[4])
|
||||
{
|
||||
__m128 dst0, dst1, dst2, dst3;
|
||||
{
|
||||
__m128 e0 = _mm_shuffle_ps(m2[0], m2[0], _MM_SHUFFLE(0, 0, 0, 0));
|
||||
__m128 e1 = _mm_shuffle_ps(m2[0], m2[0], _MM_SHUFFLE(1, 1, 1, 1));
|
||||
__m128 e2 = _mm_shuffle_ps(m2[0], m2[0], _MM_SHUFFLE(2, 2, 2, 2));
|
||||
__m128 e3 = _mm_shuffle_ps(m2[0], m2[0], _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
__m128 v0 = _mm_mul_ps(m1[0], e0);
|
||||
__m128 v1 = _mm_mul_ps(m1[1], e1);
|
||||
__m128 v2 = _mm_mul_ps(m1[2], e2);
|
||||
__m128 v3 = _mm_mul_ps(m1[3], e3);
|
||||
|
||||
__m128 a0 = _mm_add_ps(v0, v1);
|
||||
__m128 a1 = _mm_add_ps(v2, v3);
|
||||
__m128 a2 = _mm_add_ps(a0, a1);
|
||||
|
||||
dst0 = a2;
|
||||
}
|
||||
|
||||
{
|
||||
__m128 e0 = _mm_shuffle_ps(m2[1], m2[1], _MM_SHUFFLE(0, 0, 0, 0));
|
||||
__m128 e1 = _mm_shuffle_ps(m2[1], m2[1], _MM_SHUFFLE(1, 1, 1, 1));
|
||||
__m128 e2 = _mm_shuffle_ps(m2[1], m2[1], _MM_SHUFFLE(2, 2, 2, 2));
|
||||
__m128 e3 = _mm_shuffle_ps(m2[1], m2[1], _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
__m128 v0 = _mm_mul_ps(m1[0], e0);
|
||||
__m128 v1 = _mm_mul_ps(m1[1], e1);
|
||||
__m128 v2 = _mm_mul_ps(m1[2], e2);
|
||||
__m128 v3 = _mm_mul_ps(m1[3], e3);
|
||||
|
||||
__m128 a0 = _mm_add_ps(v0, v1);
|
||||
__m128 a1 = _mm_add_ps(v2, v3);
|
||||
__m128 a2 = _mm_add_ps(a0, a1);
|
||||
|
||||
dst1 = a2;
|
||||
}
|
||||
|
||||
{
|
||||
__m128 e0 = _mm_shuffle_ps(m2[2], m2[2], _MM_SHUFFLE(0, 0, 0, 0));
|
||||
__m128 e1 = _mm_shuffle_ps(m2[2], m2[2], _MM_SHUFFLE(1, 1, 1, 1));
|
||||
__m128 e2 = _mm_shuffle_ps(m2[2], m2[2], _MM_SHUFFLE(2, 2, 2, 2));
|
||||
__m128 e3 = _mm_shuffle_ps(m2[2], m2[2], _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
__m128 v0 = _mm_mul_ps(m1[0], e0);
|
||||
__m128 v1 = _mm_mul_ps(m1[1], e1);
|
||||
__m128 v2 = _mm_mul_ps(m1[2], e2);
|
||||
__m128 v3 = _mm_mul_ps(m1[3], e3);
|
||||
|
||||
__m128 a0 = _mm_add_ps(v0, v1);
|
||||
__m128 a1 = _mm_add_ps(v2, v3);
|
||||
__m128 a2 = _mm_add_ps(a0, a1);
|
||||
|
||||
dst2 = a2;
|
||||
}
|
||||
|
||||
{
|
||||
__m128 e0 = _mm_shuffle_ps(m2[3], m2[3], _MM_SHUFFLE(0, 0, 0, 0));
|
||||
__m128 e1 = _mm_shuffle_ps(m2[3], m2[3], _MM_SHUFFLE(1, 1, 1, 1));
|
||||
__m128 e2 = _mm_shuffle_ps(m2[3], m2[3], _MM_SHUFFLE(2, 2, 2, 2));
|
||||
__m128 e3 = _mm_shuffle_ps(m2[3], m2[3], _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
__m128 v0 = _mm_mul_ps(m1[0], e0);
|
||||
__m128 v1 = _mm_mul_ps(m1[1], e1);
|
||||
__m128 v2 = _mm_mul_ps(m1[2], e2);
|
||||
__m128 v3 = _mm_mul_ps(m1[3], e3);
|
||||
|
||||
__m128 a0 = _mm_add_ps(v0, v1);
|
||||
__m128 a1 = _mm_add_ps(v2, v3);
|
||||
__m128 a2 = _mm_add_ps(a0, a1);
|
||||
|
||||
dst3 = a2;
|
||||
}
|
||||
dst[0] = dst0;
|
||||
dst[1] = dst1;
|
||||
dst[2] = dst2;
|
||||
dst[3] = dst3;
|
||||
}
|
||||
static void multiplyMatrix(const __m128 m[4], float scalar, __m128 dst[4])
|
||||
{
|
||||
__m128 s = _mm_set1_ps(scalar);
|
||||
dst[0] = _mm_mul_ps(m[0], s);
|
||||
dst[1] = _mm_mul_ps(m[1], s);
|
||||
dst[2] = _mm_mul_ps(m[2], s);
|
||||
dst[3] = _mm_mul_ps(m[3], s);
|
||||
}
|
||||
|
||||
void MathUtil::negateMatrix(const __m128 m[4], __m128 dst[4])
|
||||
{
|
||||
__m128 z = _mm_setzero_ps();
|
||||
dst[0] = _mm_sub_ps(z, m[0]);
|
||||
dst[1] = _mm_sub_ps(z, m[1]);
|
||||
dst[2] = _mm_sub_ps(z, m[2]);
|
||||
dst[3] = _mm_sub_ps(z, m[3]);
|
||||
}
|
||||
static void multiplyMatrix(const __m128 m1[4], const __m128 m2[4], __m128 dst[4])
|
||||
{
|
||||
__m128 dst0, dst1, dst2, dst3;
|
||||
{
|
||||
__m128 e0 = _mm_shuffle_ps(m2[0], m2[0], _MM_SHUFFLE(0, 0, 0, 0));
|
||||
__m128 e1 = _mm_shuffle_ps(m2[0], m2[0], _MM_SHUFFLE(1, 1, 1, 1));
|
||||
__m128 e2 = _mm_shuffle_ps(m2[0], m2[0], _MM_SHUFFLE(2, 2, 2, 2));
|
||||
__m128 e3 = _mm_shuffle_ps(m2[0], m2[0], _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
void MathUtil::transposeMatrix(const __m128 m[4], __m128 dst[4])
|
||||
{
|
||||
__m128 tmp0 = _mm_shuffle_ps(m[0], m[1], 0x44);
|
||||
__m128 tmp2 = _mm_shuffle_ps(m[0], m[1], 0xEE);
|
||||
__m128 tmp1 = _mm_shuffle_ps(m[2], m[3], 0x44);
|
||||
__m128 tmp3 = _mm_shuffle_ps(m[2], m[3], 0xEE);
|
||||
|
||||
dst[0] = _mm_shuffle_ps(tmp0, tmp1, 0x88);
|
||||
dst[1] = _mm_shuffle_ps(tmp0, tmp1, 0xDD);
|
||||
dst[2] = _mm_shuffle_ps(tmp2, tmp3, 0x88);
|
||||
dst[3] = _mm_shuffle_ps(tmp2, tmp3, 0xDD);
|
||||
}
|
||||
__m128 v0 = _mm_mul_ps(m1[0], e0);
|
||||
__m128 v1 = _mm_mul_ps(m1[1], e1);
|
||||
__m128 v2 = _mm_mul_ps(m1[2], e2);
|
||||
__m128 v3 = _mm_mul_ps(m1[3], e3);
|
||||
|
||||
void MathUtil::transformVec4(const __m128 m[4], const __m128& v, __m128& dst)
|
||||
{
|
||||
__m128 col1 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
__m128 col2 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
__m128 col3 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
__m128 col4 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
dst = _mm_add_ps(
|
||||
_mm_add_ps(_mm_mul_ps(m[0], col1), _mm_mul_ps(m[1], col2)),
|
||||
_mm_add_ps(_mm_mul_ps(m[2], col3), _mm_mul_ps(m[3], col4))
|
||||
);
|
||||
}
|
||||
__m128 a0 = _mm_add_ps(v0, v1);
|
||||
__m128 a1 = _mm_add_ps(v2, v3);
|
||||
__m128 a2 = _mm_add_ps(a0, a1);
|
||||
|
||||
dst0 = a2;
|
||||
}
|
||||
|
||||
{
|
||||
__m128 e0 = _mm_shuffle_ps(m2[1], m2[1], _MM_SHUFFLE(0, 0, 0, 0));
|
||||
__m128 e1 = _mm_shuffle_ps(m2[1], m2[1], _MM_SHUFFLE(1, 1, 1, 1));
|
||||
__m128 e2 = _mm_shuffle_ps(m2[1], m2[1], _MM_SHUFFLE(2, 2, 2, 2));
|
||||
__m128 e3 = _mm_shuffle_ps(m2[1], m2[1], _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
__m128 v0 = _mm_mul_ps(m1[0], e0);
|
||||
__m128 v1 = _mm_mul_ps(m1[1], e1);
|
||||
__m128 v2 = _mm_mul_ps(m1[2], e2);
|
||||
__m128 v3 = _mm_mul_ps(m1[3], e3);
|
||||
|
||||
__m128 a0 = _mm_add_ps(v0, v1);
|
||||
__m128 a1 = _mm_add_ps(v2, v3);
|
||||
__m128 a2 = _mm_add_ps(a0, a1);
|
||||
|
||||
dst1 = a2;
|
||||
}
|
||||
|
||||
{
|
||||
__m128 e0 = _mm_shuffle_ps(m2[2], m2[2], _MM_SHUFFLE(0, 0, 0, 0));
|
||||
__m128 e1 = _mm_shuffle_ps(m2[2], m2[2], _MM_SHUFFLE(1, 1, 1, 1));
|
||||
__m128 e2 = _mm_shuffle_ps(m2[2], m2[2], _MM_SHUFFLE(2, 2, 2, 2));
|
||||
__m128 e3 = _mm_shuffle_ps(m2[2], m2[2], _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
__m128 v0 = _mm_mul_ps(m1[0], e0);
|
||||
__m128 v1 = _mm_mul_ps(m1[1], e1);
|
||||
__m128 v2 = _mm_mul_ps(m1[2], e2);
|
||||
__m128 v3 = _mm_mul_ps(m1[3], e3);
|
||||
|
||||
__m128 a0 = _mm_add_ps(v0, v1);
|
||||
__m128 a1 = _mm_add_ps(v2, v3);
|
||||
__m128 a2 = _mm_add_ps(a0, a1);
|
||||
|
||||
dst2 = a2;
|
||||
}
|
||||
|
||||
{
|
||||
__m128 e0 = _mm_shuffle_ps(m2[3], m2[3], _MM_SHUFFLE(0, 0, 0, 0));
|
||||
__m128 e1 = _mm_shuffle_ps(m2[3], m2[3], _MM_SHUFFLE(1, 1, 1, 1));
|
||||
__m128 e2 = _mm_shuffle_ps(m2[3], m2[3], _MM_SHUFFLE(2, 2, 2, 2));
|
||||
__m128 e3 = _mm_shuffle_ps(m2[3], m2[3], _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
__m128 v0 = _mm_mul_ps(m1[0], e0);
|
||||
__m128 v1 = _mm_mul_ps(m1[1], e1);
|
||||
__m128 v2 = _mm_mul_ps(m1[2], e2);
|
||||
__m128 v3 = _mm_mul_ps(m1[3], e3);
|
||||
|
||||
__m128 a0 = _mm_add_ps(v0, v1);
|
||||
__m128 a1 = _mm_add_ps(v2, v3);
|
||||
__m128 a2 = _mm_add_ps(a0, a1);
|
||||
|
||||
dst3 = a2;
|
||||
}
|
||||
dst[0] = dst0;
|
||||
dst[1] = dst1;
|
||||
dst[2] = dst2;
|
||||
dst[3] = dst3;
|
||||
}
|
||||
|
||||
static void negateMatrix(const __m128 m[4], __m128 dst[4])
|
||||
{
|
||||
__m128 z = _mm_setzero_ps();
|
||||
dst[0] = _mm_sub_ps(z, m[0]);
|
||||
dst[1] = _mm_sub_ps(z, m[1]);
|
||||
dst[2] = _mm_sub_ps(z, m[2]);
|
||||
dst[3] = _mm_sub_ps(z, m[3]);
|
||||
}
|
||||
|
||||
static void transposeMatrix(const __m128 m[4], __m128 dst[4])
|
||||
{
|
||||
__m128 tmp0 = _mm_shuffle_ps(m[0], m[1], 0x44);
|
||||
__m128 tmp2 = _mm_shuffle_ps(m[0], m[1], 0xEE);
|
||||
__m128 tmp1 = _mm_shuffle_ps(m[2], m[3], 0x44);
|
||||
__m128 tmp3 = _mm_shuffle_ps(m[2], m[3], 0xEE);
|
||||
|
||||
dst[0] = _mm_shuffle_ps(tmp0, tmp1, 0x88);
|
||||
dst[1] = _mm_shuffle_ps(tmp0, tmp1, 0xDD);
|
||||
dst[2] = _mm_shuffle_ps(tmp2, tmp3, 0x88);
|
||||
dst[3] = _mm_shuffle_ps(tmp2, tmp3, 0xDD);
|
||||
}
|
||||
|
||||
static void transformVec4(const __m128 m[4], float x, float y, float z, float w, float* dst /*vec3*/)
|
||||
{
|
||||
//__m128 res = _mm_set_ps(w, z, y, x);
|
||||
//__m128 xx = _mm_shuffle_ps(res, res, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
//__m128 yy = _mm_shuffle_ps(res, res, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
//__m128 zz = _mm_shuffle_ps(res, res, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
//__m128 ww = _mm_shuffle_ps(res, res, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
__m128 xx = _mm_set1_ps(x);
|
||||
__m128 yy = _mm_set1_ps(y);
|
||||
__m128 zz = _mm_set1_ps(z);
|
||||
__m128 ww = _mm_set1_ps(w);
|
||||
|
||||
auto res = _mm_add_ps(_mm_add_ps(_mm_mul_ps(m[0], xx), _mm_mul_ps(m[1], yy)),
|
||||
_mm_add_ps(_mm_mul_ps(m[2], zz), _mm_mul_ps(m[3], ww)));
|
||||
|
||||
_mm_storel_pi((__m64*)dst, res);
|
||||
|
||||
# if defined(__SSE4_1__)
|
||||
*reinterpret_cast<int*>(dst + 2) = _mm_extract_ps(res, 2);
|
||||
# else
|
||||
dst[2] = _mm_cvtss_f32(_mm_movehl_ps(res, res));
|
||||
# endif
|
||||
}
|
||||
|
||||
static void transformVec4(const __m128 m[4], const float* v /*vec4*/, float* dst /*vec4*/)
|
||||
{
|
||||
//__m128 res = _mm_loadu_ps(v);
|
||||
//__m128 xx = _mm_shuffle_ps(res, res, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
//__m128 yy = _mm_shuffle_ps(res, res, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
//__m128 zz = _mm_shuffle_ps(res, res, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
//__m128 ww = _mm_shuffle_ps(res, res, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
__m128 xx = _mm_set1_ps(v[0]);
|
||||
__m128 yy = _mm_set1_ps(v[1]);
|
||||
__m128 zz = _mm_set1_ps(v[2]);
|
||||
__m128 ww = _mm_set1_ps(v[3]);
|
||||
|
||||
auto res = _mm_add_ps(_mm_add_ps(_mm_mul_ps(m[0], xx), _mm_mul_ps(m[1], yy)),
|
||||
_mm_add_ps(_mm_mul_ps(m[2], zz), _mm_mul_ps(m[3], ww)));
|
||||
_mm_storeu_ps(dst, res);
|
||||
}
|
||||
|
||||
static void crossVec3(const float* v1, const float* v2, float* dst)
|
||||
{
|
||||
__m128 a = _mm_set_ps(0.0f, v1[2], v1[1], v1[0]);
|
||||
__m128 b = _mm_set_ps(0.0f, v2[2], v2[1], v2[0]);
|
||||
|
||||
__m128 a_yzx = _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 0, 2, 1));
|
||||
__m128 b_yzx = _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 0, 2, 1));
|
||||
__m128 res = _mm_sub_ps(_mm_mul_ps(a, b_yzx), _mm_mul_ps(a_yzx, b));
|
||||
|
||||
res = _mm_shuffle_ps(res, res, _MM_SHUFFLE(3, 0, 2, 1));
|
||||
|
||||
_mm_storel_pi((__m64*)dst, res);
|
||||
# if defined(__SSE4_1__)
|
||||
*reinterpret_cast<int*>(dst + 2) = _mm_extract_ps(res, 2);
|
||||
# else
|
||||
dst[2] = _mm_cvtss_f32(_mm_movehl_ps(res, res));
|
||||
# endif
|
||||
}
|
||||
|
||||
static void transformVertices(V3F_C4B_T2F* dst, const V3F_C4B_T2F* src, size_t count, const Mat4& transform)
|
||||
{
|
||||
auto& m = transform.col;
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
auto& vert = src[i].vertices;
|
||||
__m128 v = _mm_set_ps(1.0f, vert.z, vert.y, vert.x);
|
||||
v = _mm_add_ps(
|
||||
_mm_add_ps(_mm_mul_ps(m[0], _mm_shuffle_ps(v, v, 0)), _mm_mul_ps(m[1], _mm_shuffle_ps(v, v, 0x55))),
|
||||
_mm_add_ps(_mm_mul_ps(m[2], _mm_shuffle_ps(v, v, 0xaa)), _mm_mul_ps(m[3], _mm_shuffle_ps(v, v, 0xff))));
|
||||
_mm_storeu_ps((float*)&dst[i].vertices, v);
|
||||
|
||||
// Copy tex coords and colors
|
||||
// dst[i].texCoords = src[i].texCoords;
|
||||
// dst[i].colors = src[i].colors;
|
||||
memcpy(&dst[i].colors, &src[i].colors, sizeof(V3F_C4B_T2F::colors) + sizeof(V3F_C4B_T2F::texCoords));
|
||||
}
|
||||
}
|
||||
|
||||
static void transformIndices(uint16_t* dst, const uint16_t* src, size_t count, uint16_t offset)
|
||||
{
|
||||
__m128i offset_vector = _mm_set1_epi16(offset);
|
||||
size_t remainder = count % 8;
|
||||
size_t rounded_count = count - remainder;
|
||||
|
||||
for (size_t i = 0; i < rounded_count; i += 8)
|
||||
{
|
||||
__m128i current_values = _mm_loadu_si128((__m128i*)(src + i)); // Load 8 values.
|
||||
current_values = _mm_add_epi16(current_values, offset_vector); // Add offset to them.
|
||||
_mm_storeu_si128((__m128i*)(dst + i), current_values); // Store the result.
|
||||
}
|
||||
|
||||
// If count is not divisible by 8, add offset for the remainder elements one by one.
|
||||
for (size_t i = 0; i < remainder; ++i)
|
||||
{
|
||||
dst[rounded_count + i] = src[rounded_count + i] + offset;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
NS_AX_MATH_END
|
||||
|
|
|
@ -163,5 +163,26 @@ Linux: Desktop GL/Vulkan
|
|||
# endif
|
||||
#endif
|
||||
|
||||
// ## SIMD detections
|
||||
#if !defined(AX_NEON_INTRINSICS)
|
||||
# if (AX_TARGET_PLATFORM != AX_PLATFORM_WASM)
|
||||
# if defined(__arm64__) || defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM) || defined(__ARM_NEON__)
|
||||
# define AX_NEON_INTRINSICS 1
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef AX_SSE_INTRINSICS
|
||||
// axmol math ISA require SSE2 at latest
|
||||
# include <emmintrin.h>
|
||||
# if defined(__SSE4_1__)
|
||||
# include <smmintrin.h>
|
||||
# endif
|
||||
using _xm128_t = __m128;
|
||||
#elif defined(AX_NEON_INTRINSICS)
|
||||
# include <arm_neon.h>
|
||||
using _xm128_t = float32x4_t;
|
||||
#endif
|
||||
|
||||
/// @endcond
|
||||
#endif // __BASE_AX_PLATFORM_CONFIG_H__
|
||||
|
|
|
@ -89,12 +89,12 @@ Copyright (c) 2019-present Axmol Engine contributors (see AUTHORS.md).
|
|||
* @since v0.99.5
|
||||
*/
|
||||
#if (AX_TARGET_PLATFORM == AX_PLATFORM_ANDROID)
|
||||
# if !defined(AX_ENABLE_CACHE_TEXTURE_DATA)
|
||||
# define AX_ENABLE_CACHE_TEXTURE_DATA 1
|
||||
# endif
|
||||
# if !defined(AX_ENABLE_CACHE_TEXTURE_DATA)
|
||||
# define AX_ENABLE_CACHE_TEXTURE_DATA 1
|
||||
# endif
|
||||
#else
|
||||
# undef AX_ENABLE_CACHE_TEXTURE_DATA
|
||||
# define AX_ENABLE_CACHE_TEXTURE_DATA 0
|
||||
# undef AX_ENABLE_CACHE_TEXTURE_DATA
|
||||
# define AX_ENABLE_CACHE_TEXTURE_DATA 0
|
||||
#endif
|
||||
|
||||
/** @def AX_ENABLE_RESTART_APPLICATION_ON_CONTEXT_LOST
|
||||
|
@ -102,12 +102,12 @@ Copyright (c) 2019-present Axmol Engine contributors (see AUTHORS.md).
|
|||
*
|
||||
*/
|
||||
#if (AX_TARGET_PLATFORM == AX_PLATFORM_ANDROID && !AX_ENABLE_CACHE_TEXTURE_DATA)
|
||||
# if !defined(AX_ENABLE_RESTART_APPLICATION_ON_CONTEXT_LOST)
|
||||
# define AX_ENABLE_RESTART_APPLICATION_ON_CONTEXT_LOST 1
|
||||
# endif
|
||||
# if !defined(AX_ENABLE_RESTART_APPLICATION_ON_CONTEXT_LOST)
|
||||
# define AX_ENABLE_RESTART_APPLICATION_ON_CONTEXT_LOST 1
|
||||
# endif
|
||||
#else
|
||||
# undef AX_ENABLE_RESTART_APPLICATION_ON_CONTEXT_LOST
|
||||
# define AX_ENABLE_RESTART_APPLICATION_ON_CONTEXT_LOST 0
|
||||
# undef AX_ENABLE_RESTART_APPLICATION_ON_CONTEXT_LOST
|
||||
# define AX_ENABLE_RESTART_APPLICATION_ON_CONTEXT_LOST 0
|
||||
#endif
|
||||
|
||||
#if (AX_TARGET_PLATFORM == AX_PLATFORM_ANDROID) || (AX_TARGET_PLATFORM == AX_PLATFORM_WIN32)
|
||||
|
@ -188,14 +188,20 @@ protected: \
|
|||
varType varName; \
|
||||
\
|
||||
public: \
|
||||
virtual inline varType get##funName() const { return varName; }
|
||||
virtual inline varType get##funName() const \
|
||||
{ \
|
||||
return varName; \
|
||||
}
|
||||
|
||||
#define AX_SYNTHESIZE_READONLY_PASS_BY_REF(varType, varName, funName) \
|
||||
protected: \
|
||||
varType varName; \
|
||||
\
|
||||
public: \
|
||||
virtual inline const varType& get##funName() const { return varName; }
|
||||
virtual inline const varType& get##funName() const \
|
||||
{ \
|
||||
return varName; \
|
||||
}
|
||||
|
||||
/** @def AX_SYNTHESIZE
|
||||
* It is used to declare a protected variable.
|
||||
|
@ -209,36 +215,51 @@ public: \
|
|||
* The variables and methods declared after AX_SYNTHESIZE are all public.
|
||||
* If you need protected or private, please declare.
|
||||
*/
|
||||
#define AX_SYNTHESIZE(varType, varName, funName) \
|
||||
protected: \
|
||||
varType varName; \
|
||||
\
|
||||
public: \
|
||||
virtual inline varType get##funName() const { return varName; } \
|
||||
virtual inline void set##funName(varType var) { varName = var; }
|
||||
#define AX_SYNTHESIZE(varType, varName, funName) \
|
||||
protected: \
|
||||
varType varName; \
|
||||
\
|
||||
public: \
|
||||
virtual inline varType get##funName() const \
|
||||
{ \
|
||||
return varName; \
|
||||
} \
|
||||
virtual inline void set##funName(varType var) \
|
||||
{ \
|
||||
varName = var; \
|
||||
}
|
||||
|
||||
#define AX_SYNTHESIZE_PASS_BY_REF(varType, varName, funName) \
|
||||
protected: \
|
||||
varType varName; \
|
||||
\
|
||||
public: \
|
||||
virtual inline const varType& get##funName() const { return varName; } \
|
||||
virtual inline void set##funName(const varType& var) { varName = var; }
|
||||
#define AX_SYNTHESIZE_PASS_BY_REF(varType, varName, funName) \
|
||||
protected: \
|
||||
varType varName; \
|
||||
\
|
||||
public: \
|
||||
virtual inline const varType& get##funName() const \
|
||||
{ \
|
||||
return varName; \
|
||||
} \
|
||||
virtual inline void set##funName(const varType& var) \
|
||||
{ \
|
||||
varName = var; \
|
||||
}
|
||||
|
||||
#define AX_SYNTHESIZE_RETAIN(varType, varName, funName) \
|
||||
private: \
|
||||
varType varName; \
|
||||
\
|
||||
public: \
|
||||
virtual inline varType get##funName() const { return varName; } \
|
||||
virtual inline void set##funName(varType var) \
|
||||
{ \
|
||||
if (varName != var) \
|
||||
{ \
|
||||
AX_SAFE_RETAIN(var); \
|
||||
AX_SAFE_RELEASE(varName); \
|
||||
varName = var; \
|
||||
} \
|
||||
#define AX_SYNTHESIZE_RETAIN(varType, varName, funName) \
|
||||
private: \
|
||||
varType varName; \
|
||||
\
|
||||
public: \
|
||||
virtual inline varType get##funName() const \
|
||||
{ \
|
||||
return varName; \
|
||||
} \
|
||||
virtual inline void set##funName(varType var) \
|
||||
{ \
|
||||
if (varName != var) \
|
||||
{ \
|
||||
AX_SAFE_RETAIN(var); \
|
||||
AX_SAFE_RELEASE(varName); \
|
||||
varName = var; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define AX_SAFE_DELETE(p) \
|
||||
|
@ -252,7 +273,7 @@ public: \
|
|||
{ \
|
||||
if (p) \
|
||||
{ \
|
||||
delete[](p); \
|
||||
delete[] (p); \
|
||||
(p) = nullptr; \
|
||||
} \
|
||||
} while (0)
|
||||
|
@ -318,7 +339,7 @@ public: \
|
|||
} while (0)
|
||||
|
||||
#elif _AX_DEBUG == 1
|
||||
# define AXLOG(format, ...) ax::print(format, ##__VA_ARGS__)
|
||||
# define AXLOG(format, ...) ax::print(format, ##__VA_ARGS__)
|
||||
# define AXLOGERROR(format, ...) ax::print(format, ##__VA_ARGS__)
|
||||
# define AXLOGINFO(format, ...) \
|
||||
do \
|
||||
|
@ -327,10 +348,10 @@ public: \
|
|||
# define AXLOGWARN(...) __AXLOGWITHFUNCTION(__VA_ARGS__)
|
||||
|
||||
#elif _AX_DEBUG > 1
|
||||
# define AXLOG(format, ...) ax::print(format, ##__VA_ARGS__)
|
||||
# define AXLOG(format, ...) ax::print(format, ##__VA_ARGS__)
|
||||
# define AXLOGERROR(format, ...) ax::print(format, ##__VA_ARGS__)
|
||||
# define AXLOGINFO(format, ...) ax::print(format, ##__VA_ARGS__)
|
||||
# define AXLOGWARN(...) __AXLOGWITHFUNCTION(__VA_ARGS__)
|
||||
# define AXLOGINFO(format, ...) ax::print(format, ##__VA_ARGS__)
|
||||
# define AXLOGWARN(...) __AXLOGWITHFUNCTION(__VA_ARGS__)
|
||||
#endif // _AX_DEBUG
|
||||
|
||||
/** Lua engine debug */
|
||||
|
@ -349,8 +370,8 @@ public: \
|
|||
*/
|
||||
#if defined(__GNUC__) && ((__GNUC__ >= 5) || ((__GNUG__ == 4) && (__GNUC_MINOR__ >= 4))) || \
|
||||
(defined(__clang__) && (__clang_major__ >= 3)) || (_MSC_VER >= 1800)
|
||||
# define AX_DISALLOW_COPY_AND_ASSIGN(TypeName) \
|
||||
TypeName(const TypeName&) = delete; \
|
||||
# define AX_DISALLOW_COPY_AND_ASSIGN(TypeName) \
|
||||
TypeName(const TypeName&) = delete; \
|
||||
TypeName& operator=(const TypeName&) = delete;
|
||||
#else
|
||||
# define AX_DISALLOW_COPY_AND_ASSIGN(TypeName) \
|
||||
|
@ -444,15 +465,25 @@ public: \
|
|||
*/
|
||||
#if __has_builtin(__builtin_expect)
|
||||
# ifdef __cplusplus
|
||||
# define UTILS_LIKELY(exp) (__builtin_expect(!!(exp), true))
|
||||
# define UTILS_LIKELY(exp) (__builtin_expect(!!(exp), true))
|
||||
# define UTILS_UNLIKELY(exp) (__builtin_expect(!!(exp), false))
|
||||
# else
|
||||
# define UTILS_LIKELY(exp) (__builtin_expect(!!(exp), 1))
|
||||
# define UTILS_LIKELY(exp) (__builtin_expect(!!(exp), 1))
|
||||
# define UTILS_UNLIKELY(exp) (__builtin_expect(!!(exp), 0))
|
||||
# endif
|
||||
#else
|
||||
# define UTILS_LIKELY(exp) (!!(exp))
|
||||
# define UTILS_LIKELY(exp) (!!(exp))
|
||||
# define UTILS_UNLIKELY(exp) (!!(exp))
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
// MSVC does not support loop unrolling hints
|
||||
# define UTILS_UNROLL
|
||||
# define UTILS_NOUNROLL
|
||||
#else
|
||||
// C++11 allows pragmas to be specified as part of defines using the _Pragma syntax.
|
||||
# define UTILS_UNROLL _Pragma("unroll")
|
||||
# define UTILS_NOUNROLL _Pragma("nounroll")
|
||||
#endif
|
||||
|
||||
#endif // __AX_PLATFORM_MACROS_H__
|
||||
|
|
|
@ -26,57 +26,33 @@
|
|||
#include <doctest.h>
|
||||
#include "base/Config.h"
|
||||
#include "base/Types.h"
|
||||
#include "math/MathBase.h"
|
||||
#include "TestUtils.h"
|
||||
|
||||
#if (AX_TARGET_PLATFORM == AX_PLATFORM_IOS)
|
||||
#if defined(__arm64__)
|
||||
#define USE_NEON64 1
|
||||
#define INCLUDE_NEON64 1
|
||||
#elif defined(__ARM_NEON__)
|
||||
#define USE_NEON32 1
|
||||
#define INCLUDE_NEON32 1
|
||||
#endif
|
||||
#elif (AX_TARGET_PLATFORM == AX_PLATFORM_OSX)
|
||||
#if defined(__arm64__) || defined(__aarch64__)
|
||||
#define USE_NEON64 1
|
||||
#define INCLUDE_NEON64 1
|
||||
#endif
|
||||
#elif (AX_TARGET_PLATFORM == AX_PLATFORM_ANDROID)
|
||||
#if defined(__arm64__) || defined(__aarch64__)
|
||||
#define USE_NEON64 1
|
||||
#define INCLUDE_NEON64 1
|
||||
#elif defined(__ARM_NEON__)
|
||||
#define INCLUDE_NEON32 1
|
||||
#endif
|
||||
#endif
|
||||
#define INCLUDE_SSE
|
||||
#define USE_SSE
|
||||
|
||||
#if defined(USE_NEON32) || defined(USE_NEON64) // || defined(USE_SSE)
|
||||
#define SKIP_SIMD_TEST doctest::skip(false)
|
||||
#if defined(AX_SSE_INTRINSICS) || defined(AX_NEON_INTRINSICS)
|
||||
# define SKIP_SIMD_TEST doctest::skip(false)
|
||||
#else
|
||||
#define SKIP_SIMD_TEST doctest::skip(true)
|
||||
# define SKIP_SIMD_TEST doctest::skip(true)
|
||||
#endif
|
||||
|
||||
USING_NS_AX;
|
||||
|
||||
namespace UnitTest {
|
||||
namespace UnitTest
|
||||
{
|
||||
|
||||
#ifdef INCLUDE_NEON32
|
||||
#include "math/MathUtilNeon.inl"
|
||||
#endif
|
||||
|
||||
#ifdef INCLUDE_NEON64
|
||||
#include "math/MathUtilNeon64.inl"
|
||||
#endif
|
||||
|
||||
#ifdef INCLUDE_SSE
|
||||
// #include "math/MathUtilSSE.inl"
|
||||
#ifdef AX_NEON_INTRINSICS
|
||||
# include "math/MathUtilNeon.inl"
|
||||
#elif defined(AX_SSE_INTRINSICS)
|
||||
# include "math/MathUtilSSE.inl"
|
||||
#endif
|
||||
|
||||
#include "math/MathUtil.inl"
|
||||
|
||||
} // namespace UnitTest
|
||||
|
||||
|
||||
static void __checkMathUtilResult(std::string_view description, const float* a1, const float* a2, int size)
|
||||
{
|
||||
// Check whether the result of the optimized instruction is the same as which is implemented in C
|
||||
|
@ -87,11 +63,10 @@ static void __checkMathUtilResult(std::string_view description, const float* a1,
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
TEST_SUITE("math/MathUtil") {
|
||||
TEST_SUITE("math/MathUtil")
|
||||
{
|
||||
using namespace UnitTest::ax;
|
||||
|
||||
|
||||
static void checkVerticesAreEqual(const V3F_C4B_T2F* v1, const V3F_C4B_T2F* v2, size_t count)
|
||||
{
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
|
@ -102,84 +77,94 @@ TEST_SUITE("math/MathUtil") {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
TEST_CASE("transformVertices") {
|
||||
TEST_CASE("transformVertices")
|
||||
{
|
||||
auto count = 5;
|
||||
std::vector<V3F_C4B_T2F> src(count);
|
||||
std::vector<V3F_C4B_T2F> expected(count);
|
||||
std::vector<V3F_C4B_T2F> dst(count);
|
||||
|
||||
for (int i = 0; i < count; ++i) {
|
||||
for (int i = 0; i < count; ++i)
|
||||
{
|
||||
src[i].vertices.set(float(i), float(i + 1), float(i + 2));
|
||||
src[i].colors.set(uint8_t(i + 3), uint8_t(i + 4), uint8_t(i + 5), uint8_t(i + 6));
|
||||
src[i].texCoords.set(float(i + 7), float(i + 8));
|
||||
|
||||
expected[i] = src[i];
|
||||
expected[i] = src[i];
|
||||
expected[i].vertices.x = src[i].vertices.y * 4;
|
||||
expected[i].vertices.y = src[i].vertices.x * -5;
|
||||
expected[i].vertices.z = src[i].vertices.z * 6;
|
||||
}
|
||||
|
||||
Mat4 transform(
|
||||
0, 4, 0, 0,
|
||||
-5, 0, 0, 0,
|
||||
0, 0, 6, 0,
|
||||
1, 2, 3, 1
|
||||
);
|
||||
Mat4 transform(0, 4, 0, 0, -5, 0, 0, 0, 0, 0, 6, 0, 1, 2, 3, 1);
|
||||
|
||||
SUBCASE("MathUtilC") {
|
||||
SUBCASE("MathUtilC")
|
||||
{
|
||||
MathUtilC::transformVertices(dst.data(), src.data(), count, transform);
|
||||
checkVerticesAreEqual(expected.data(), dst.data(), count);
|
||||
}
|
||||
|
||||
#if INCLUDE_NEON32
|
||||
SUBCASE("MathUtilNeon") {
|
||||
MathUtilNeon::transformVertices(dst.data(), src.data(), count, transform);
|
||||
checkVerticesAreEqual(expected.data(), dst.data(), count);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INCLUDE_NEON64
|
||||
SUBCASE("MathUtilNeon64") {
|
||||
MathUtilNeon64::transformVertices(dst.data(), src.data(), count, transform);
|
||||
checkVerticesAreEqual(expected.data(), dst.data(), count);
|
||||
}
|
||||
#endif
|
||||
#ifdef AX_NEON_INTRINSICS
|
||||
SUBCASE("MathUtilNeon")
|
||||
{
|
||||
MathUtilNeon::transformVertices(dst.data(), src.data(), count, transform);
|
||||
checkVerticesAreEqual(expected.data(), dst.data(), count);
|
||||
}
|
||||
#elif defined(AX_SSE_INTRINSICS)
|
||||
SUBCASE("MathUtilSSE")
|
||||
{
|
||||
MathUtilSSE::transformVertices(dst.data(), src.data(), count, transform);
|
||||
checkVerticesAreEqual(expected.data(), dst.data(), count);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_CASE("transformIndices") {
|
||||
TEST_CASE("transformIndices")
|
||||
{
|
||||
auto count = 43;
|
||||
std::vector<uint16_t> src(count);
|
||||
std::vector<uint16_t> expected(count);
|
||||
|
||||
for (int i = 0; i < count; ++i) {
|
||||
src[i] = i;
|
||||
for (int i = 0; i < count; ++i)
|
||||
{
|
||||
src[i] = i;
|
||||
expected[i] = i + 5;
|
||||
}
|
||||
|
||||
uint16_t offset = 5;
|
||||
|
||||
SUBCASE("MathUtilC") {
|
||||
SUBCASE("MathUtilC")
|
||||
{
|
||||
std::vector<uint16_t> dst(count);
|
||||
MathUtilC::transformIndices(dst.data(), src.data(), count, offset);
|
||||
for (int i = 0; i < count; ++i)
|
||||
CHECK_EQ(expected[i], dst[i]);
|
||||
}
|
||||
|
||||
#if INCLUDE_NEON64
|
||||
SUBCASE("MathUtilNeon64") {
|
||||
std::vector<uint16_t> dst(count);
|
||||
MathUtilNeon64::transformIndices(dst.data(), src.data(), count, offset);
|
||||
for (int i = 0; i < count; ++i)
|
||||
CHECK_EQ(expected[i], dst[i]);
|
||||
}
|
||||
#endif
|
||||
#if defined(AX_NEON_INTRINSICS) && AX_64BITS
|
||||
SUBCASE("MathUtilNeon")
|
||||
{
|
||||
std::vector<uint16_t> dst(count);
|
||||
MathUtilNeon::transformIndices(dst.data(), src.data(), count, offset);
|
||||
for (int i = 0; i < count; ++i)
|
||||
CHECK_EQ(expected[i], dst[i]);
|
||||
}
|
||||
#elif defined(AX_SSE_INTRINSICS)
|
||||
SUBCASE("MathUtilSSE")
|
||||
{
|
||||
std::vector<uint16_t> dst(count);
|
||||
MathUtilSSE::transformIndices(dst.data(), src.data(), count, offset);
|
||||
for (int i = 0; i < count; ++i)
|
||||
CHECK_EQ(expected[i], dst[i]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST_SUITE("math/MathUtil" * SKIP_SIMD_TEST) {
|
||||
TEST_CASE("old_tests") {
|
||||
TEST_SUITE("math/MathUtil" * SKIP_SIMD_TEST)
|
||||
{
|
||||
TEST_CASE("old_tests")
|
||||
{
|
||||
// I know the next line looks ugly, but it's a way to test MathUtil. :)
|
||||
using namespace UnitTest::ax;
|
||||
|
||||
|
@ -213,20 +198,18 @@ TEST_SUITE("math/MathUtil" * SKIP_SIMD_TEST) {
|
|||
// inline static void addMatrix(const float* m, float scalar, float* dst);
|
||||
MathUtilC::addMatrix(inMat41, scalar, outMat4C);
|
||||
|
||||
#ifdef INCLUDE_NEON32
|
||||
MathUtilNeon::addMatrix(inMat41, scalar, outMat4Opt);
|
||||
#endif
|
||||
#ifdef AX_NEON_INTRINSICS
|
||||
MathUtilNeon::addMatrix(reinterpret_cast<const _xm128_t*>(inMat41), scalar,
|
||||
reinterpret_cast<_xm128_t*>(outMat4Opt));
|
||||
#endif
|
||||
|
||||
#ifdef INCLUDE_NEON64
|
||||
MathUtilNeon64::addMatrix(inMat41, scalar, outMat4Opt);
|
||||
#endif
|
||||
|
||||
#ifdef INCLUDE_SSE
|
||||
// FIXME:
|
||||
#endif
|
||||
#ifdef AX_SSE_INTRINSICS
|
||||
MathUtilSSE::addMatrix(reinterpret_cast<const _xm128_t*>(inMat41), scalar,
|
||||
reinterpret_cast<_xm128_t*>(outMat4Opt));
|
||||
#endif
|
||||
|
||||
__checkMathUtilResult("inline static void addMatrix(const float* m, float scalar, float* dst);", outMat4C,
|
||||
outMat4Opt, MAT4_SIZE);
|
||||
outMat4Opt, MAT4_SIZE);
|
||||
// Clean
|
||||
memset(outMat4C, 0, sizeof(outMat4C));
|
||||
memset(outMat4Opt, 0, sizeof(outMat4Opt));
|
||||
|
@ -234,20 +217,16 @@ TEST_SUITE("math/MathUtil" * SKIP_SIMD_TEST) {
|
|||
// inline static void addMatrix(const float* m1, const float* m2, float* dst);
|
||||
MathUtilC::addMatrix(inMat41, inMat42, outMat4C);
|
||||
|
||||
#ifdef INCLUDE_NEON32
|
||||
MathUtilNeon::addMatrix(inMat41, inMat42, outMat4Opt);
|
||||
#endif
|
||||
|
||||
#ifdef INCLUDE_NEON64
|
||||
MathUtilNeon64::addMatrix(inMat41, inMat42, outMat4Opt);
|
||||
#endif
|
||||
|
||||
#ifdef INCLUDE_SSE
|
||||
// FIXME:
|
||||
#endif
|
||||
#ifdef AX_NEON_INTRINSICS
|
||||
MathUtilNeon::addMatrix(reinterpret_cast<const _xm128_t*>(inMat41), reinterpret_cast<const _xm128_t*>(inMat42),
|
||||
reinterpret_cast<_xm128_t*>(outMat4Opt));
|
||||
#elif defined(AX_SSE_INTRINSICS)
|
||||
MathUtilSSE::addMatrix(reinterpret_cast<const _xm128_t*>(inMat41), reinterpret_cast<const _xm128_t*>(inMat42),
|
||||
reinterpret_cast<_xm128_t*>(outMat4Opt));
|
||||
#endif
|
||||
|
||||
__checkMathUtilResult("inline static void addMatrix(const float* m1, const float* m2, float* dst);", outMat4C,
|
||||
outMat4Opt, MAT4_SIZE);
|
||||
outMat4Opt, MAT4_SIZE);
|
||||
// Clean
|
||||
memset(outMat4C, 0, sizeof(outMat4C));
|
||||
memset(outMat4Opt, 0, sizeof(outMat4Opt));
|
||||
|
@ -255,20 +234,18 @@ TEST_SUITE("math/MathUtil" * SKIP_SIMD_TEST) {
|
|||
// inline static void subtractMatrix(const float* m1, const float* m2, float* dst);
|
||||
MathUtilC::subtractMatrix(inMat41, inMat42, outMat4C);
|
||||
|
||||
#ifdef INCLUDE_NEON32
|
||||
MathUtilNeon::subtractMatrix(inMat41, inMat42, outMat4Opt);
|
||||
#endif
|
||||
#ifdef AX_NEON_INTRINSICS
|
||||
MathUtilNeon::subtractMatrix(reinterpret_cast<const _xm128_t*>(inMat41),
|
||||
reinterpret_cast<const _xm128_t*>(inMat42),
|
||||
reinterpret_cast<_xm128_t*>(outMat4Opt));
|
||||
#elif defined(AX_SSE_INTRINSICS)
|
||||
MathUtilSSE::subtractMatrix(reinterpret_cast<const _xm128_t*>(inMat41),
|
||||
reinterpret_cast<const _xm128_t*>(inMat42),
|
||||
reinterpret_cast<_xm128_t*>(outMat4Opt));
|
||||
#endif
|
||||
|
||||
#ifdef INCLUDE_NEON64
|
||||
MathUtilNeon64::subtractMatrix(inMat41, inMat42, outMat4Opt);
|
||||
#endif
|
||||
|
||||
#ifdef INCLUDE_SSE
|
||||
// FIXME:
|
||||
#endif
|
||||
|
||||
__checkMathUtilResult("inline static void subtractMatrix(const float* m1, const float* m2, float* dst);", outMat4C,
|
||||
outMat4Opt, MAT4_SIZE);
|
||||
__checkMathUtilResult("inline static void subtractMatrix(const float* m1, const float* m2, float* dst);",
|
||||
outMat4C, outMat4Opt, MAT4_SIZE);
|
||||
// Clean
|
||||
memset(outMat4C, 0, sizeof(outMat4C));
|
||||
memset(outMat4Opt, 0, sizeof(outMat4Opt));
|
||||
|
@ -276,20 +253,16 @@ TEST_SUITE("math/MathUtil" * SKIP_SIMD_TEST) {
|
|||
// inline static void multiplyMatrix(const float* m, float scalar, float* dst);
|
||||
MathUtilC::multiplyMatrix(inMat41, scalar, outMat4C);
|
||||
|
||||
#ifdef INCLUDE_NEON32
|
||||
MathUtilNeon::multiplyMatrix(inMat41, scalar, outMat4Opt);
|
||||
#endif
|
||||
|
||||
#ifdef INCLUDE_NEON64
|
||||
MathUtilNeon64::multiplyMatrix(inMat41, scalar, outMat4Opt);
|
||||
#endif
|
||||
|
||||
#ifdef INCLUDE_SSE
|
||||
// FIXME:
|
||||
#endif
|
||||
#ifdef AX_NEON_INTRINSICS
|
||||
MathUtilNeon::multiplyMatrix(reinterpret_cast<const _xm128_t*>(inMat41), scalar,
|
||||
reinterpret_cast<_xm128_t*>(outMat4Opt));
|
||||
#elif defined(AX_SSE_INTRINSICS)
|
||||
MathUtilSSE::multiplyMatrix(reinterpret_cast<const _xm128_t*>(inMat41), scalar,
|
||||
reinterpret_cast<_xm128_t*>(outMat4Opt));
|
||||
#endif
|
||||
|
||||
__checkMathUtilResult("inline static void multiplyMatrix(const float* m, float scalar, float* dst);", outMat4C,
|
||||
outMat4Opt, MAT4_SIZE);
|
||||
outMat4Opt, MAT4_SIZE);
|
||||
// Clean
|
||||
memset(outMat4C, 0, sizeof(outMat4C));
|
||||
memset(outMat4Opt, 0, sizeof(outMat4Opt));
|
||||
|
@ -297,20 +270,18 @@ TEST_SUITE("math/MathUtil" * SKIP_SIMD_TEST) {
|
|||
// inline static void multiplyMatrix(const float* m1, const float* m2, float* dst);
|
||||
MathUtilC::multiplyMatrix(inMat41, inMat42, outMat4C);
|
||||
|
||||
#ifdef INCLUDE_NEON32
|
||||
MathUtilNeon::multiplyMatrix(inMat41, inMat42, outMat4Opt);
|
||||
#endif
|
||||
#ifdef AX_NEON_INTRINSICS
|
||||
MathUtilNeon::multiplyMatrix(reinterpret_cast<const _xm128_t*>(inMat41),
|
||||
reinterpret_cast<const _xm128_t*>(inMat42),
|
||||
reinterpret_cast<_xm128_t*>(outMat4Opt));
|
||||
#elif defined(AX_SSE_INTRINSICS)
|
||||
MathUtilSSE::multiplyMatrix(reinterpret_cast<const _xm128_t*>(inMat41),
|
||||
reinterpret_cast<const _xm128_t*>(inMat42),
|
||||
reinterpret_cast<_xm128_t*>(outMat4Opt));
|
||||
#endif
|
||||
|
||||
#ifdef INCLUDE_NEON64
|
||||
MathUtilNeon64::multiplyMatrix(inMat41, inMat42, outMat4Opt);
|
||||
#endif
|
||||
|
||||
#ifdef INCLUDE_SSE
|
||||
// FIXME:
|
||||
#endif
|
||||
|
||||
__checkMathUtilResult("inline static void multiplyMatrix(const float* m1, const float* m2, float* dst);", outMat4C,
|
||||
outMat4Opt, MAT4_SIZE);
|
||||
__checkMathUtilResult("inline static void multiplyMatrix(const float* m1, const float* m2, float* dst);",
|
||||
outMat4C, outMat4Opt, MAT4_SIZE);
|
||||
// Clean
|
||||
memset(outMat4C, 0, sizeof(outMat4C));
|
||||
memset(outMat4Opt, 0, sizeof(outMat4Opt));
|
||||
|
@ -318,20 +289,14 @@ TEST_SUITE("math/MathUtil" * SKIP_SIMD_TEST) {
|
|||
// inline static void negateMatrix(const float* m, float* dst);
|
||||
MathUtilC::negateMatrix(inMat41, outMat4C);
|
||||
|
||||
#ifdef INCLUDE_NEON32
|
||||
MathUtilNeon::negateMatrix(inMat41, outMat4Opt);
|
||||
#endif
|
||||
|
||||
#ifdef INCLUDE_NEON64
|
||||
MathUtilNeon64::negateMatrix(inMat41, outMat4Opt);
|
||||
#endif
|
||||
|
||||
#ifdef INCLUDE_SSE
|
||||
// FIXME:
|
||||
#endif
|
||||
#ifdef AX_NEON_INTRINSICS
|
||||
MathUtilNeon::negateMatrix(reinterpret_cast<const _xm128_t*>(inMat41), reinterpret_cast<_xm128_t*>(outMat4Opt));
|
||||
#elif defined(AX_SSE_INTRINSICS)
|
||||
MathUtilSSE::negateMatrix(reinterpret_cast<const _xm128_t*>(inMat41), reinterpret_cast<_xm128_t*>(outMat4Opt));
|
||||
#endif
|
||||
|
||||
__checkMathUtilResult("inline static void negateMatrix(const float* m, float* dst);", outMat4C, outMat4Opt,
|
||||
MAT4_SIZE);
|
||||
MAT4_SIZE);
|
||||
// Clean
|
||||
memset(outMat4C, 0, sizeof(outMat4C));
|
||||
memset(outMat4Opt, 0, sizeof(outMat4Opt));
|
||||
|
@ -339,20 +304,16 @@ TEST_SUITE("math/MathUtil" * SKIP_SIMD_TEST) {
|
|||
// inline static void transposeMatrix(const float* m, float* dst);
|
||||
MathUtilC::transposeMatrix(inMat41, outMat4C);
|
||||
|
||||
#ifdef INCLUDE_NEON32
|
||||
MathUtilNeon::transposeMatrix(inMat41, outMat4Opt);
|
||||
#endif
|
||||
|
||||
#ifdef INCLUDE_NEON64
|
||||
MathUtilNeon64::transposeMatrix(inMat41, outMat4Opt);
|
||||
#endif
|
||||
|
||||
#ifdef INCLUDE_SSE
|
||||
// FIXME:
|
||||
#endif
|
||||
#ifdef AX_NEON_INTRINSICS
|
||||
MathUtilNeon::transposeMatrix(reinterpret_cast<const _xm128_t*>(inMat41),
|
||||
reinterpret_cast<_xm128_t*>(outMat4Opt));
|
||||
#elif defined(AX_SSE_INTRINSICS)
|
||||
MathUtilSSE::transposeMatrix(reinterpret_cast<const _xm128_t*>(inMat41),
|
||||
reinterpret_cast<_xm128_t*>(outMat4Opt));
|
||||
#endif
|
||||
|
||||
__checkMathUtilResult("inline static void transposeMatrix(const float* m, float* dst);", outMat4C, outMat4Opt,
|
||||
MAT4_SIZE);
|
||||
MAT4_SIZE);
|
||||
// Clean
|
||||
memset(outMat4C, 0, sizeof(outMat4C));
|
||||
memset(outMat4Opt, 0, sizeof(outMat4Opt));
|
||||
|
@ -360,21 +321,16 @@ TEST_SUITE("math/MathUtil" * SKIP_SIMD_TEST) {
|
|||
// inline static void transformVec4(const float* m, float x, float y, float z, float w, float* dst);
|
||||
MathUtilC::transformVec4(inMat41, x, y, z, w, outVec4C);
|
||||
|
||||
#ifdef INCLUDE_NEON32
|
||||
MathUtilNeon::transformVec4(inMat41, x, y, z, w, outVec4Opt);
|
||||
#endif
|
||||
|
||||
#ifdef INCLUDE_NEON64
|
||||
MathUtilNeon64::transformVec4(inMat41, x, y, z, w, outVec4Opt);
|
||||
#endif
|
||||
|
||||
#ifdef INCLUDE_SSE
|
||||
// FIXME:
|
||||
#endif
|
||||
#ifdef AX_NEON_INTRINSICS
|
||||
MathUtilNeon::transformVec4(reinterpret_cast<const _xm128_t*>(inMat41), x, y, z, w, outVec4Opt);
|
||||
#elif defined(AX_SSE_INTRINSICS)
|
||||
// FIXME:
|
||||
MathUtilSSE::transformVec4(reinterpret_cast<const _xm128_t*>(inMat41), x, y, z, w, outVec4Opt);
|
||||
#endif
|
||||
|
||||
__checkMathUtilResult(
|
||||
"inline static void transformVec4(const float* m, float x, float y, float z, float w, float* dst);", outVec4C,
|
||||
outVec4Opt, VEC4_SIZE);
|
||||
"inline static void transformVec4(const float* m, float x, float y, float z, float w, float* dst);",
|
||||
outVec4C, outVec4Opt, VEC4_SIZE);
|
||||
// Clean
|
||||
memset(outVec4C, 0, sizeof(outVec4C));
|
||||
memset(outVec4Opt, 0, sizeof(outVec4Opt));
|
||||
|
@ -382,20 +338,15 @@ TEST_SUITE("math/MathUtil" * SKIP_SIMD_TEST) {
|
|||
// inline static void transformVec4(const float* m, const float* v, float* dst);
|
||||
MathUtilC::transformVec4(inMat41, inVec4, outVec4C);
|
||||
|
||||
#ifdef INCLUDE_NEON32
|
||||
MathUtilNeon::transformVec4(inMat41, inVec4, outVec4Opt);
|
||||
#endif
|
||||
|
||||
#ifdef INCLUDE_NEON64
|
||||
MathUtilNeon64::transformVec4(inMat41, inVec4, outVec4Opt);
|
||||
#endif
|
||||
|
||||
#ifdef INCLUDE_SSE
|
||||
// FIXME:
|
||||
#endif
|
||||
#ifdef AX_NEON_INTRINSICS
|
||||
MathUtilNeon::transformVec4(reinterpret_cast<const _xm128_t*>(inMat41), reinterpret_cast<const float*>(inVec4),
|
||||
reinterpret_cast<float*>(outVec4Opt));
|
||||
#elif defined(AX_SSE_INTRINSICS)
|
||||
MathUtilSSE::transformVec4(reinterpret_cast<const _xm128_t*>(inMat41), reinterpret_cast<const float*>(inVec4), reinterpret_cast<float*>(outVec4Opt));
|
||||
#endif
|
||||
|
||||
__checkMathUtilResult("inline static void transformVec4(const float* m, const float* v, float* dst);", outVec4C,
|
||||
outVec4Opt, VEC4_SIZE);
|
||||
outVec4Opt, VEC4_SIZE);
|
||||
// Clean
|
||||
memset(outVec4C, 0, sizeof(outVec4C));
|
||||
memset(outVec4Opt, 0, sizeof(outVec4Opt));
|
||||
|
@ -403,20 +354,14 @@ TEST_SUITE("math/MathUtil" * SKIP_SIMD_TEST) {
|
|||
// inline static void crossVec3(const float* v1, const float* v2, float* dst);
|
||||
MathUtilC::crossVec3(inVec4, inVec42, outVec4C);
|
||||
|
||||
#ifdef INCLUDE_NEON32
|
||||
MathUtilNeon::crossVec3(inVec4, inVec42, outVec4Opt);
|
||||
#endif
|
||||
|
||||
#ifdef INCLUDE_NEON64
|
||||
MathUtilNeon64::crossVec3(inVec4, inVec42, outVec4Opt);
|
||||
#endif
|
||||
|
||||
#ifdef INCLUDE_SSE
|
||||
// FIXME:
|
||||
#endif
|
||||
#ifdef AX_NEON_INTRINSICS
|
||||
MathUtilNeon::crossVec3(inVec4, inVec42, outVec4Opt);
|
||||
#elif defined(AX_SSE_INTRINSICS)
|
||||
MathUtilSSE::crossVec3(inVec4, inVec42, outVec4Opt);
|
||||
#endif
|
||||
|
||||
__checkMathUtilResult("inline static void crossVec3(const float* v1, const float* v2, float* dst);", outVec4C,
|
||||
outVec4Opt, VEC4_SIZE);
|
||||
outVec4Opt, VEC4_SIZE);
|
||||
// Clean
|
||||
memset(outVec4C, 0, sizeof(outVec4C));
|
||||
memset(outVec4Opt, 0, sizeof(outVec4Opt));
|
||||
|
|
Loading…
Reference in New Issue