axmol/thirdparty/astc/astcenc_mathlib.h

// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2011-2021 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
// of the License at:
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
// ----------------------------------------------------------------------------

/*
 * This module implements a variety of mathematical data types and library
 * functions used by the codec.
 */

#ifndef ASTC_MATHLIB_H_INCLUDED
#define ASTC_MATHLIB_H_INCLUDED

#include <cassert>
#include <cstdint>
#include <cmath>

#ifndef ASTCENC_POPCNT
  #if defined(__POPCNT__)
    #define ASTCENC_POPCNT 1
  #else
    #define ASTCENC_POPCNT 0
  #endif
#endif

#ifndef ASTCENC_F16C
  #if defined(__F16C__)
    #define ASTCENC_F16C 1
  #else
    #define ASTCENC_F16C 0
  #endif
#endif

#ifndef ASTCENC_SSE
  #if defined(__SSE4_2__)
    #define ASTCENC_SSE 42
  #elif defined(__SSE4_1__)
    #define ASTCENC_SSE 41
  #elif defined(__SSE3__)
    #define ASTCENC_SSE 30
  #elif defined(__SSE2__)
    #define ASTCENC_SSE 20
  #else
    #define ASTCENC_SSE 0
  #endif
#endif

#ifndef ASTCENC_AVX
  #if defined(__AVX2__)
    #define ASTCENC_AVX 2
  #elif defined(__AVX__)
    #define ASTCENC_AVX 1
  #else
    #define ASTCENC_AVX 0
  #endif
#endif

#ifndef ASTCENC_NEON
  #if defined(__aarch64__)
    #define ASTCENC_NEON 1
  #else
    #define ASTCENC_NEON 0
  #endif
#endif

#if ASTCENC_AVX
  #define ASTCENC_VECALIGN 32
#else
  #define ASTCENC_VECALIGN 16
#endif

#if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 || ASTCENC_POPCNT != 0
	#include <immintrin.h>
#endif

/* ============================================================================
  Fast math library; note that many of the higher-order functions in this set
  use approximations which are less accurate, but faster, than <cmath> standard
  library equivalents.

  Note: Many of these are not necessarily faster than simple C versions when
  used on a single scalar value, but are included for testing purposes as most
  have an option based on SSE intrinsics and therefore provide an obvious route
  to future vectorization.
============================================================================ */

// Union for manipulation of float bit patterns
typedef union
{
	uint32_t u;
	int32_t s;
	float f;
} if32;

// These are namespaced to avoid colliding with C standard library functions.
namespace astc
{

static const float PI          = 3.14159265358979323846f;
static const float PI_OVER_TWO = 1.57079632679489661923f;

/**
 * @brief SP float absolute value.
 *
 * @param v   The value to make absolute.
 *
 * @return The absolute value.
 */
static inline float fabs(float v)
{
	return std::fabs(v);
}

/**
 * @brief Test if a float value is a nan.
 *
 * @param v    The value test.
 *
 * @return Zero is not a NaN, non-zero otherwise.
 */
static inline bool isnan(float v)
{
	return v != v;
}

/**
 * @brief Return the minimum of two values.
 *
 * For floats, NaNs are turned into @c q.
 *
 * @param p   The first value to compare.
 * @param q   The second value to compare.
 *
 * @return The smallest value.
 */
template<typename T>
static inline T min(T p, T q)
{
	return p < q ? p : q;
}

/**
 * @brief Return the minimum of three values.
 *
 * For floats, NaNs are turned into @c r.
 *
 * @param p   The first value to compare.
 * @param q   The second value to compare.
 * @param r   The third value to compare.
 *
 * @return The smallest value.
 */
template<typename T>
static inline T min(T p, T q, T r)
{
	return min(min(p, q), r);
}

/**
 * @brief Return the minimum of four values.
 *
 * For floats, NaNs are turned into @c s.
 *
 * @param p   The first value to compare.
 * @param q   The second value to compare.
 * @param r   The third value to compare.
 * @param s   The fourth value to compare.
 *
 * @return The smallest value.
 */
template<typename T>
static inline T min(T p, T q, T r, T s)
{
	return min(min(p, q), min(r, s));
}

/**
 * @brief Return the maximum of two values.
 *
 * For floats, NaNs are turned into @c q.
 *
 * @param p   The first value to compare.
 * @param q   The second value to compare.
 *
 * @return The largest value.
 */
template<typename T>
static inline T max(T p, T q)
{
	return p > q ? p : q;
}

/**
 * @brief Return the maximum of three values.
 *
 * For floats, NaNs are turned into @c r.
 *
 * @param p   The first value to compare.
 * @param q   The second value to compare.
 * @param r   The third value to compare.
 *
 * @return The largest value.
 */
template<typename T>
static inline T max(T p, T q, T r)
{
	return max(max(p, q), r);
}

/**
 * @brief Return the maximum of four values.
 *
 * For floats, NaNs are turned into @c s.
 *
 * @param p   The first value to compare.
 * @param q   The second value to compare.
 * @param r   The third value to compare.
 * @param s   The fourth value to compare.
 *
 * @return The largest value.
 */
template<typename T>
static inline T max(T p, T q, T r, T s)
{
	return max(max(p, q), max(r, s));
}

/**
 * @brief Clamp a value value between @c mn and @c mx.
 *
 * For floats, NaNs are turned into @c mn.
 *
 * @param v      The value to clamp.
 * @param mn     The min value (inclusive).
 * @param mx     The max value (inclusive).
 *
 * @return The clamped value.
 */
template<typename T>
inline T clamp(T v, T mn, T mx)
{
	// Do not reorder; correct NaN handling relies on the fact that comparison
	// with NaN returns false and will fall-though to the "min" value.
	if (v > mx) return mx;
	if (v > mn) return v;
	return mn;
}

/**
 * @brief Clamp a float value between 0.0f and 1.0f.
 *
 * NaNs are turned into 0.0f.
 *
 * @param v   The value to clamp.
 *
 * @return The clamped value.
 */
static inline float clamp1f(float v)
{
	return astc::clamp(v, 0.0f, 1.0f);
}

/**
 * @brief Clamp a float value between 0.0f and 255.0f.
 *
 * NaNs are turned into 0.0f.
 *
 * @param v  The value to clamp.
 *
 * @return The clamped value.
 */
static inline float clamp255f(float v)
{
	return astc::clamp(v, 0.0f, 255.0f);
}

/**
 * @brief SP float round-down.
 *
 * @param v   The value to round.
 *
 * @return The rounded value.
 */
static inline float flt_rd(float v)
{
	return std::floor(v);
}

/**
 * @brief SP float round-to-nearest and convert to integer.
 *
 * @param v   The value to round.
 *
 * @return The rounded value.
 */
static inline int flt2int_rtn(float v)
{

	return (int)(v + 0.5f);
}

/**
 * @brief SP float round down and convert to integer.
 *
 * @param v   The value to round.
 *
 * @return The rounded value.
 */
static inline int flt2int_rd(float v)
{
	return (int)(v);
}

/**
 * @brief Population bit count.
 *
 * @param v   The value to population count.
 *
 * @return The number of 1 bits.
 */
static inline int popcount(uint64_t v)
{
#if ASTCENC_POPCNT >= 1
	return (int)_mm_popcnt_u64(v);
#else
	uint64_t mask1 = 0x5555555555555555ULL;
	uint64_t mask2 = 0x3333333333333333ULL;
	uint64_t mask3 = 0x0F0F0F0F0F0F0F0FULL;
	v -= (v >> 1) & mask1;
	v = (v & mask2) + ((v >> 2) & mask2);
	v += v >> 4;
	v &= mask3;
	v *= 0x0101010101010101ULL;
	v >>= 56;
	return (int)v;
#endif
}

/**
 * @brief Fast approximation of 1.0 / sqrt(val).
 *
 * @param v   The input value.
 *
 * @return The approximated result.
 */
static inline float rsqrt(float v)
{
	return 1.0f / std::sqrt(v);
}

/**
 * @brief Fast approximation of sqrt(val).
 *
 * @param v   The input value.
 *
 * @return The approximated result.
 */
static inline float sqrt(float v)
{
	return std::sqrt(v);
}

/**
 * @brief Extract mantissa and exponent of a float value.
 *
 * @param      v      The input value.
 * @param[out] expo   The output exponent.
 *
 * @return The mantissa.
 */
static inline float frexp(float v, int* expo)
{
	if32 p;
	p.f = v;
	*expo = ((p.u >> 23) & 0xFF) - 126;
	p.u = (p.u & 0x807fffff) | 0x3f000000;
	return p.f;
}

/**
 * @brief Initialize the seed structure for a random number generator.
 *
 * Important note: For the purposes of ASTC we want sets of random numbers to
 * use the codec, but we want the same seed value across instances and threads
 * to ensure that image output is stable across compressor runs and across
 * platforms. Every PRNG created by this call will therefore return the same
 * sequence of values ...
 *
 * @param state The state structure to initialize.
 */
void rand_init(uint64_t state[2]);

/**
 * @brief Return the next random number from the generator.
 *
 * This RNG is an implementation of the "xoroshoro-128+ 1.0" PRNG, based on the
 * public-domain implementation given by David Blackman & Sebastiano Vigna at
 * http://vigna.di.unimi.it/xorshift/xoroshiro128plus.c
 *
 * @param state The state structure to use/update.
 */
uint64_t rand(uint64_t state[2]);

}

/* ============================================================================
  Softfloat library with fp32 and fp16 conversion functionality.
============================================================================ */
#if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0)
	/* narrowing float->float conversions */
	uint16_t float_to_sf16(float val);
	float sf16_to_float(uint16_t val);
#endif

/*********************************
  Vector library
*********************************/
#include "astcenc_vecmathlib.h"

/*********************************
  Declaration of line types
*********************************/
// parametric line, 2D: The line is given by line = a + b * t.

struct line2
{
	vfloat4 a;
	vfloat4 b;
};

// parametric line, 3D
struct line3
{
	vfloat4 a;
	vfloat4 b;
};

struct line4
{
	vfloat4 a;
	vfloat4 b;
};


struct processed_line2
{
	vfloat4 amod;
	vfloat4 bs;
	vfloat4 bis;
};

struct processed_line3
{
	vfloat4 amod;
	vfloat4 bs;
	vfloat4 bis;
};

struct processed_line4
{
	vfloat4 amod;
	vfloat4 bs;
	vfloat4 bis;
};

#endif
Update astc [ci build] 2021-06-01 23:43:28 +08:00			`// SPDX-License-Identifier: Apache-2.0`
			`// ----------------------------------------------------------------------------`
			`// Copyright 2011-2021 Arm Limited`
			`//`
			`// Licensed under the Apache License, Version 2.0 (the "License"); you may not`
			`// use this file except in compliance with the License. You may obtain a copy`
			`// of the License at:`
			`//`
			`// http://www.apache.org/licenses/LICENSE-2.0`
			`//`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT`
			`// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the`
			`// License for the specific language governing permissions and limitations`
			`// under the License.`
			`// ----------------------------------------------------------------------------`

			`/*`
			`* This module implements a variety of mathematical data types and library`
			`* functions used by the codec.`
			`*/`

			`#ifndef ASTC_MATHLIB_H_INCLUDED`
			`#define ASTC_MATHLIB_H_INCLUDED`

			`#include <cassert>`
			`#include <cstdint>`
			`#include <cmath>`

			`#ifndef ASTCENC_POPCNT`
			`#if defined(__POPCNT__)`
			`#define ASTCENC_POPCNT 1`
			`#else`
			`#define ASTCENC_POPCNT 0`
			`#endif`
			`#endif`

			`#ifndef ASTCENC_F16C`
			`#if defined(__F16C__)`
			`#define ASTCENC_F16C 1`
			`#else`
			`#define ASTCENC_F16C 0`
			`#endif`
			`#endif`

			`#ifndef ASTCENC_SSE`
			`#if defined(__SSE4_2__)`
			`#define ASTCENC_SSE 42`
			`#elif defined(__SSE4_1__)`
			`#define ASTCENC_SSE 41`
			`#elif defined(__SSE3__)`
			`#define ASTCENC_SSE 30`
			`#elif defined(__SSE2__)`
			`#define ASTCENC_SSE 20`
			`#else`
			`#define ASTCENC_SSE 0`
			`#endif`
			`#endif`

			`#ifndef ASTCENC_AVX`
			`#if defined(__AVX2__)`
			`#define ASTCENC_AVX 2`
			`#elif defined(__AVX__)`
			`#define ASTCENC_AVX 1`
			`#else`
			`#define ASTCENC_AVX 0`
			`#endif`
			`#endif`

			`#ifndef ASTCENC_NEON`
Update astcenc to 3.3 2021-11-11 18:43:05 +08:00			`#if defined(__aarch64__)`
Update astc [ci build] 2021-06-01 23:43:28 +08:00			`#define ASTCENC_NEON 1`
			`#else`
			`#define ASTCENC_NEON 0`
			`#endif`
			`#endif`

			`#if ASTCENC_AVX`
			`#define ASTCENC_VECALIGN 32`
			`#else`
			`#define ASTCENC_VECALIGN 16`
			`#endif`

			`#if ASTCENC_SSE != 0 \|\| ASTCENC_AVX != 0 \|\| ASTCENC_POPCNT != 0`
			`#include <immintrin.h>`
			`#endif`

			`/* ============================================================================`
			`Fast math library; note that many of the higher-order functions in this set`
			`use approximations which are less accurate, but faster, than <cmath> standard`
			`library equivalents.`

			`Note: Many of these are not necessarily faster than simple C versions when`
			`used on a single scalar value, but are included for testing purposes as most`
			`have an option based on SSE intrinsics and therefore provide an obvious route`
			`to future vectorization.`
			`============================================================================ */`

			`// Union for manipulation of float bit patterns`
			`typedef union`
			`{`
			`uint32_t u;`
			`int32_t s;`
			`float f;`
			`} if32;`

			`// These are namespaced to avoid colliding with C standard library functions.`
			`namespace astc`
			`{`

			`static const float PI = 3.14159265358979323846f;`
			`static const float PI_OVER_TWO = 1.57079632679489661923f;`

			`/**`
			`* @brief SP float absolute value.`
			`*`
			`* @param v The value to make absolute.`
			`*`
			`* @return The absolute value.`
			`*/`
			`static inline float fabs(float v)`
			`{`
			`return std::fabs(v);`
			`}`

			`/**`
			`* @brief Test if a float value is a nan.`
			`*`
			`* @param v The value test.`
			`*`
			`* @return Zero is not a NaN, non-zero otherwise.`
			`*/`
			`static inline bool isnan(float v)`
			`{`
			`return v != v;`
			`}`

			`/**`
			`* @brief Return the minimum of two values.`
			`*`
			`* For floats, NaNs are turned into @c q.`
			`*`
			`* @param p The first value to compare.`
			`* @param q The second value to compare.`
			`*`
			`* @return The smallest value.`
			`*/`
			`template<typename T>`
			`static inline T min(T p, T q)`
			`{`
			`return p < q ? p : q;`
			`}`

			`/**`
			`* @brief Return the minimum of three values.`
			`*`
			`* For floats, NaNs are turned into @c r.`
			`*`
			`* @param p The first value to compare.`
			`* @param q The second value to compare.`
			`* @param r The third value to compare.`
			`*`
			`* @return The smallest value.`
			`*/`
			`template<typename T>`
			`static inline T min(T p, T q, T r)`
			`{`
			`return min(min(p, q), r);`
			`}`

			`/**`
			`* @brief Return the minimum of four values.`
			`*`
			`* For floats, NaNs are turned into @c s.`
			`*`
			`* @param p The first value to compare.`
			`* @param q The second value to compare.`
			`* @param r The third value to compare.`
			`* @param s The fourth value to compare.`
			`*`
			`* @return The smallest value.`
			`*/`
			`template<typename T>`
			`static inline T min(T p, T q, T r, T s)`
			`{`
			`return min(min(p, q), min(r, s));`
			`}`

			`/**`
			`* @brief Return the maximum of two values.`
			`*`
			`* For floats, NaNs are turned into @c q.`
			`*`
			`* @param p The first value to compare.`
			`* @param q The second value to compare.`
			`*`
			`* @return The largest value.`
			`*/`
			`template<typename T>`
			`static inline T max(T p, T q)`
			`{`
			`return p > q ? p : q;`
			`}`

			`/**`
			`* @brief Return the maximum of three values.`
			`*`
			`* For floats, NaNs are turned into @c r.`
			`*`
			`* @param p The first value to compare.`
			`* @param q The second value to compare.`
			`* @param r The third value to compare.`
			`*`
			`* @return The largest value.`
			`*/`
			`template<typename T>`
			`static inline T max(T p, T q, T r)`
			`{`
			`return max(max(p, q), r);`
			`}`

			`/**`
			`* @brief Return the maximum of four values.`
			`*`
			`* For floats, NaNs are turned into @c s.`
			`*`
			`* @param p The first value to compare.`
			`* @param q The second value to compare.`
			`* @param r The third value to compare.`
			`* @param s The fourth value to compare.`
			`*`
			`* @return The largest value.`
			`*/`
			`template<typename T>`
			`static inline T max(T p, T q, T r, T s)`
			`{`
			`return max(max(p, q), max(r, s));`
			`}`

			`/**`
			`* @brief Clamp a value value between @c mn and @c mx.`
			`*`
			`* For floats, NaNs are turned into @c mn.`
			`*`
			`* @param v The value to clamp.`
			`* @param mn The min value (inclusive).`
			`* @param mx The max value (inclusive).`
			`*`
			`* @return The clamped value.`
			`*/`
			`template<typename T>`
			`inline T clamp(T v, T mn, T mx)`
			`{`
			`// Do not reorder; correct NaN handling relies on the fact that comparison`
			`// with NaN returns false and will fall-though to the "min" value.`
			`if (v > mx) return mx;`
			`if (v > mn) return v;`
			`return mn;`
			`}`

			`/**`
			`* @brief Clamp a float value between 0.0f and 1.0f.`
			`*`
			`* NaNs are turned into 0.0f.`
			`*`
			`* @param v The value to clamp.`
			`*`
			`* @return The clamped value.`
			`*/`
			`static inline float clamp1f(float v)`
			`{`
			`return astc::clamp(v, 0.0f, 1.0f);`
			`}`

			`/**`
			`* @brief Clamp a float value between 0.0f and 255.0f.`
			`*`
			`* NaNs are turned into 0.0f.`
			`*`
			`* @param v The value to clamp.`
			`*`
			`* @return The clamped value.`
			`*/`
			`static inline float clamp255f(float v)`
			`{`
			`return astc::clamp(v, 0.0f, 255.0f);`
			`}`

			`/**`
			`* @brief SP float round-down.`
			`*`
			`* @param v The value to round.`
			`*`
			`* @return The rounded value.`
			`*/`
			`static inline float flt_rd(float v)`
			`{`
			`return std::floor(v);`
			`}`

			`/**`
			`* @brief SP float round-to-nearest and convert to integer.`
			`*`
			`* @param v The value to round.`
			`*`
			`* @return The rounded value.`
			`*/`
			`static inline int flt2int_rtn(float v)`
			`{`

			`return (int)(v + 0.5f);`
			`}`

			`/**`
			`* @brief SP float round down and convert to integer.`
			`*`
			`* @param v The value to round.`
			`*`
			`* @return The rounded value.`
			`*/`
			`static inline int flt2int_rd(float v)`
			`{`
			`return (int)(v);`
			`}`

			`/**`
			`* @brief Population bit count.`
			`*`
			`* @param v The value to population count.`
			`*`
			`* @return The number of 1 bits.`
			`*/`
			`static inline int popcount(uint64_t v)`
			`{`
			`#if ASTCENC_POPCNT >= 1`
			`return (int)_mm_popcnt_u64(v);`
			`#else`
			`uint64_t mask1 = 0x5555555555555555ULL;`
			`uint64_t mask2 = 0x3333333333333333ULL;`
			`uint64_t mask3 = 0x0F0F0F0F0F0F0F0FULL;`
			`v -= (v >> 1) & mask1;`
			`v = (v & mask2) + ((v >> 2) & mask2);`
			`v += v >> 4;`
			`v &= mask3;`
			`v *= 0x0101010101010101ULL;`
			`v >>= 56;`
			`return (int)v;`
			`#endif`
			`}`

			`/**`
			`* @brief Fast approximation of 1.0 / sqrt(val).`
			`*`
			`* @param v The input value.`
			`*`
			`* @return The approximated result.`
			`*/`
			`static inline float rsqrt(float v)`
			`{`
			`return 1.0f / std::sqrt(v);`
			`}`

			`/**`
			`* @brief Fast approximation of sqrt(val).`
			`*`
			`* @param v The input value.`
			`*`
			`* @return The approximated result.`
			`*/`
			`static inline float sqrt(float v)`
			`{`
			`return std::sqrt(v);`
			`}`

			`/**`
			`* @brief Extract mantissa and exponent of a float value.`
			`*`
			`* @param v The input value.`
			`* @param[out] expo The output exponent.`
			`*`
			`* @return The mantissa.`
			`*/`
			`static inline float frexp(float v, int* expo)`
			`{`
			`if32 p;`
			`p.f = v;`
			`*expo = ((p.u >> 23) & 0xFF) - 126;`
			`p.u = (p.u & 0x807fffff) \| 0x3f000000;`
			`return p.f;`
			`}`

			`/**`
			`* @brief Initialize the seed structure for a random number generator.`
			`*`
			`* Important note: For the purposes of ASTC we want sets of random numbers to`
			`* use the codec, but we want the same seed value across instances and threads`
			`* to ensure that image output is stable across compressor runs and across`
			`* platforms. Every PRNG created by this call will therefore return the same`
			`* sequence of values ...`
			`*`
			`* @param state The state structure to initialize.`
			`*/`
			`void rand_init(uint64_t state[2]);`

			`/**`
			`* @brief Return the next random number from the generator.`
			`*`
			`* This RNG is an implementation of the "xoroshoro-128+ 1.0" PRNG, based on the`
			`* public-domain implementation given by David Blackman & Sebastiano Vigna at`
			`* http://vigna.di.unimi.it/xorshift/xoroshiro128plus.c`
			`*`
			`* @param state The state structure to use/update.`
			`*/`
			`uint64_t rand(uint64_t state[2]);`

			`}`

			`/* ============================================================================`
			`Softfloat library with fp32 and fp16 conversion functionality.`
			`============================================================================ */`
Add decompress astc parallel support [ci build] a. Define macro ASTC_ENABLE_PARALLEL_DECOMPRESS to enable. 2021-07-02 00:18:02 +08:00			`#if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0)`
Update astc [ci build] 2021-06-01 23:43:28 +08:00			`/* narrowing float->float conversions */`
			`uint16_t float_to_sf16(float val);`
			`float sf16_to_float(uint16_t val);`
			`#endif`

			`/*********************************`
			`Vector library`
			`*********************************/`
			`#include "astcenc_vecmathlib.h"`

			`/*********************************`
			`Declaration of line types`
			`*********************************/`
			`// parametric line, 2D: The line is given by line = a + b * t.`

			`struct line2`
			`{`
			`vfloat4 a;`
			`vfloat4 b;`
			`};`

			`// parametric line, 3D`
			`struct line3`
			`{`
			`vfloat4 a;`
			`vfloat4 b;`
			`};`

			`struct line4`
			`{`
			`vfloat4 a;`
			`vfloat4 b;`
			`};`


			`struct processed_line2`
			`{`
			`vfloat4 amod;`
			`vfloat4 bs;`
			`vfloat4 bis;`
			`};`

			`struct processed_line3`
			`{`
			`vfloat4 amod;`
			`vfloat4 bs;`
			`vfloat4 bis;`
			`};`

			`struct processed_line4`
			`{`
			`vfloat4 amod;`
			`vfloat4 bs;`
			`vfloat4 bis;`
			`};`

			`#endif`