mirror of https://github.com/axmolengine/axmol.git
2010 lines
61 KiB
C++
2010 lines
61 KiB
C++
// ----------------------------------------------------------------------------
|
|
// This confidential and proprietary software may be used only as authorised
|
|
// by a licensing agreement from Arm Limited.
|
|
// (C) COPYRIGHT 2011-2020 Arm Limited, ALL RIGHTS RESERVED
|
|
// The entire notice above must be reproduced on all authorised copies and
|
|
// copies may only be made to the extent permitted by a licensing agreement
|
|
// from Arm Limited.
|
|
// ----------------------------------------------------------------------------
|
|
|
|
/**
|
|
* @brief Functions for computing color endpoints and texel weights.
|
|
*/
|
|
|
|
#include "astc_codec_internals.h"
|
|
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
#include <stdio.h>
|
|
#endif
|
|
|
|
#ifdef DEBUG_CAPTURE_NAN
|
|
#ifndef _GNU_SOURCE
|
|
#define _GNU_SOURCE
|
|
#endif
|
|
|
|
#include <fenv.h>
|
|
#endif
|
|
|
|
static void compute_endpoints_and_ideal_weights_1_component(int xdim, int ydim, int zdim,
|
|
const partition_info * pt, const imageblock * blk,
|
|
const error_weight_block * ewb, endpoints_and_weights * ei,
|
|
int component)
|
|
{
|
|
int i;
|
|
|
|
int partition_count = pt->partition_count;
|
|
ei->ep.partition_count = partition_count;
|
|
|
|
float lowvalues[4], highvalues[4];
|
|
float partition_error_scale[4];
|
|
float linelengths_rcp[4];
|
|
|
|
int texels_per_block = xdim * ydim * zdim;
|
|
|
|
const float *error_weights;
|
|
switch (component)
|
|
{
|
|
case 0:
|
|
error_weights = ewb->texel_weight_r;
|
|
break;
|
|
case 1:
|
|
error_weights = ewb->texel_weight_g;
|
|
break;
|
|
case 2:
|
|
error_weights = ewb->texel_weight_b;
|
|
break;
|
|
case 3:
|
|
error_weights = ewb->texel_weight_a;
|
|
break;
|
|
default:
|
|
ASTC_CODEC_INTERNAL_ERROR();
|
|
}
|
|
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
lowvalues[i] = 1e10;
|
|
highvalues[i] = -1e10;
|
|
}
|
|
|
|
for (i = 0; i < texels_per_block; i++)
|
|
{
|
|
if (error_weights[i] > 1e-10)
|
|
{
|
|
float value = blk->work_data[4 * i + component];
|
|
int partition = pt->partition_of_texel[i];
|
|
if (value < lowvalues[partition])
|
|
lowvalues[partition] = value;
|
|
if (value > highvalues[partition])
|
|
highvalues[partition] = value;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
float diff = highvalues[i] - lowvalues[i];
|
|
if (diff < 0)
|
|
{
|
|
lowvalues[i] = 0;
|
|
highvalues[i] = 0;
|
|
}
|
|
if (diff < 1e-7f)
|
|
diff = 1e-7f;
|
|
partition_error_scale[i] = diff * diff;
|
|
linelengths_rcp[i] = 1.0f / diff;
|
|
}
|
|
|
|
for (i = 0; i < texels_per_block; i++)
|
|
{
|
|
float value = blk->work_data[4 * i + component];
|
|
int partition = pt->partition_of_texel[i];
|
|
value -= lowvalues[partition];
|
|
value *= linelengths_rcp[partition];
|
|
if (value > 1.0f)
|
|
value = 1.0f;
|
|
else if (!(value > 0.0f))
|
|
value = 0.0f;
|
|
|
|
ei->weights[i] = value;
|
|
ei->weight_error_scale[i] = partition_error_scale[partition] * error_weights[i];
|
|
if (astc_isnan(ei->weight_error_scale[i]))
|
|
{
|
|
ASTC_CODEC_INTERNAL_ERROR();
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
ei->ep.endpt0[i] = float4(blk->red_min, blk->green_min, blk->blue_min, blk->alpha_min);
|
|
ei->ep.endpt1[i] = float4(blk->red_max, blk->green_max, blk->blue_max, blk->alpha_max);
|
|
switch (component)
|
|
{
|
|
case 0: // red/x
|
|
ei->ep.endpt0[i].x = lowvalues[i];
|
|
ei->ep.endpt1[i].x = highvalues[i];
|
|
break;
|
|
case 1: // green/y
|
|
ei->ep.endpt0[i].y = lowvalues[i];
|
|
ei->ep.endpt1[i].y = highvalues[i];
|
|
break;
|
|
case 2: // blue/z
|
|
ei->ep.endpt0[i].z = lowvalues[i];
|
|
ei->ep.endpt1[i].z = highvalues[i];
|
|
break;
|
|
case 3: // alpha/w
|
|
ei->ep.endpt0[i].w = lowvalues[i];
|
|
ei->ep.endpt1[i].w = highvalues[i];
|
|
break;
|
|
}
|
|
}
|
|
|
|
// print all the data that this function computes.
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
{
|
|
printf("%s: %dx%dx%d texels, %d partitions, component=%d\n", __func__, xdim, ydim, zdim, partition_count, component);
|
|
printf("Endpoints:\n");
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
printf("%d Low: <%g> => <%g %g %g %g>\n", i, lowvalues[i], ei->ep.endpt0[i].x, ei->ep.endpt0[i].y, ei->ep.endpt0[i].z, ei->ep.endpt0[i].w);
|
|
printf("%d High: <%g> => <%g %g %g %g>\n", i, highvalues[i], ei->ep.endpt1[i].x, ei->ep.endpt1[i].y, ei->ep.endpt1[i].z, ei->ep.endpt1[i].w);
|
|
}
|
|
printf("Ideal-weights:\n");
|
|
|
|
for (i = 0; i < texels_per_block; i++)
|
|
{
|
|
printf("%3d <%2d %2d %2d>=> %g (weight=%g)\n", i, i % xdim, (i / xdim) % ydim, i / (xdim * ydim), ei->weights[i], ei->weight_error_scale[i]);
|
|
}
|
|
printf("\n");
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static void compute_endpoints_and_ideal_weights_2_components(int xdim, int ydim, int zdim, const partition_info * pt,
|
|
const imageblock * blk, const error_weight_block * ewb,
|
|
endpoints_and_weights * ei, int component1, int component2)
|
|
{
|
|
int i;
|
|
|
|
int partition_count = pt->partition_count;
|
|
ei->ep.partition_count = partition_count;
|
|
|
|
float4 error_weightings[4];
|
|
float4 color_scalefactors[4];
|
|
|
|
float2 scalefactors[4];
|
|
|
|
const float *error_weights;
|
|
if (component1 == 0 && component2 == 1)
|
|
error_weights = ewb->texel_weight_rg;
|
|
else if (component1 == 0 && component2 == 2)
|
|
error_weights = ewb->texel_weight_rb;
|
|
else if (component1 == 1 && component2 == 2)
|
|
error_weights = ewb->texel_weight_gb;
|
|
else
|
|
{
|
|
error_weights = ewb->texel_weight_rg;
|
|
ASTC_CODEC_INTERNAL_ERROR();
|
|
}
|
|
|
|
int texels_per_block = xdim * ydim * zdim;
|
|
|
|
compute_partition_error_color_weightings(xdim, ydim, zdim, ewb, pt, error_weightings, color_scalefactors);
|
|
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
float s1 = 0, s2 = 0;
|
|
switch (component1)
|
|
{
|
|
case 0:
|
|
s1 = color_scalefactors[i].x;
|
|
break;
|
|
case 1:
|
|
s1 = color_scalefactors[i].y;
|
|
break;
|
|
case 2:
|
|
s1 = color_scalefactors[i].z;
|
|
break;
|
|
case 3:
|
|
s1 = color_scalefactors[i].w;
|
|
break;
|
|
}
|
|
|
|
switch (component2)
|
|
{
|
|
case 0:
|
|
s2 = color_scalefactors[i].x;
|
|
break;
|
|
case 1:
|
|
s2 = color_scalefactors[i].y;
|
|
break;
|
|
case 2:
|
|
s2 = color_scalefactors[i].z;
|
|
break;
|
|
case 3:
|
|
s2 = color_scalefactors[i].w;
|
|
break;
|
|
}
|
|
scalefactors[i] = normalize(float2(s1, s2)) * 1.41421356f;
|
|
}
|
|
|
|
float lowparam[4], highparam[4];
|
|
|
|
float2 averages[4];
|
|
float2 directions[4];
|
|
|
|
line2 lines[4];
|
|
float scale[4];
|
|
float length_squared[4];
|
|
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
lowparam[i] = 1e10;
|
|
highparam[i] = -1e10;
|
|
}
|
|
|
|
compute_averages_and_directions_2_components(pt, blk, ewb, scalefactors, component1, component2, averages, directions);
|
|
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
float2 egv = directions[i];
|
|
if (egv.x + egv.y < 0.0f)
|
|
directions[i] = float2(0, 0) - egv;
|
|
}
|
|
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
lines[i].a = averages[i];
|
|
if (dot(directions[i], directions[i]) == 0.0f)
|
|
lines[i].b = normalize(float2(1, 1));
|
|
else
|
|
lines[i].b = normalize(directions[i]);
|
|
}
|
|
|
|
for (i = 0; i < texels_per_block; i++)
|
|
{
|
|
if (error_weights[i] > 1e-10)
|
|
{
|
|
int partition = pt->partition_of_texel[i];
|
|
float2 point = float2(blk->work_data[4 * i + component1], blk->work_data[4 * i + component2]) * scalefactors[partition];
|
|
line2 l = lines[partition];
|
|
float param = dot(point - l.a, l.b);
|
|
ei->weights[i] = param;
|
|
if (param < lowparam[partition])
|
|
lowparam[partition] = param;
|
|
if (param > highparam[partition])
|
|
highparam[partition] = param;
|
|
}
|
|
else
|
|
{
|
|
ei->weights[i] = -1e38f;
|
|
}
|
|
}
|
|
|
|
float2 lowvalues[4];
|
|
float2 highvalues[4];
|
|
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
float length = highparam[i] - lowparam[i];
|
|
if (length < 0) // case for when none of the texels had any weight
|
|
{
|
|
lowparam[i] = 0.0f;
|
|
highparam[i] = 1e-7f;
|
|
}
|
|
|
|
// it is possible for a uniform-color partition to produce length=0; this
|
|
// causes NaN-production and NaN-propagation later on. Set length to
|
|
// a small value to avoid this problem.
|
|
if (length < 1e-7f)
|
|
length = 1e-7f;
|
|
|
|
length_squared[i] = length * length;
|
|
scale[i] = 1.0f / length;
|
|
|
|
float2 ep0 = lines[i].a + lines[i].b * lowparam[i];
|
|
float2 ep1 = lines[i].a + lines[i].b * highparam[i];
|
|
|
|
ep0 = ep0 / scalefactors[i];
|
|
ep1 = ep1 / scalefactors[i];
|
|
|
|
lowvalues[i] = ep0;
|
|
highvalues[i] = ep1;
|
|
}
|
|
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
ei->ep.endpt0[i] = float4(blk->red_min, blk->green_min, blk->blue_min, blk->alpha_min);
|
|
ei->ep.endpt1[i] = float4(blk->red_max, blk->green_max, blk->blue_max, blk->alpha_max);
|
|
|
|
float2 ep0 = lowvalues[i];
|
|
float2 ep1 = highvalues[i];
|
|
|
|
switch (component1)
|
|
{
|
|
case 0:
|
|
ei->ep.endpt0[i].x = ep0.x;
|
|
ei->ep.endpt1[i].x = ep1.x;
|
|
break;
|
|
case 1:
|
|
ei->ep.endpt0[i].y = ep0.x;
|
|
ei->ep.endpt1[i].y = ep1.x;
|
|
break;
|
|
case 2:
|
|
ei->ep.endpt0[i].z = ep0.x;
|
|
ei->ep.endpt1[i].z = ep1.x;
|
|
break;
|
|
case 3:
|
|
ei->ep.endpt0[i].w = ep0.x;
|
|
ei->ep.endpt1[i].w = ep1.x;
|
|
break;
|
|
}
|
|
|
|
switch (component2)
|
|
{
|
|
case 0:
|
|
ei->ep.endpt0[i].x = ep0.y;
|
|
ei->ep.endpt1[i].x = ep1.y;
|
|
break;
|
|
case 1:
|
|
ei->ep.endpt0[i].y = ep0.y;
|
|
ei->ep.endpt1[i].y = ep1.y;
|
|
break;
|
|
case 2:
|
|
ei->ep.endpt0[i].z = ep0.y;
|
|
ei->ep.endpt1[i].z = ep1.y;
|
|
break;
|
|
case 3:
|
|
ei->ep.endpt0[i].w = ep0.y;
|
|
ei->ep.endpt1[i].w = ep1.y;
|
|
break;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < texels_per_block; i++)
|
|
{
|
|
int partition = pt->partition_of_texel[i];
|
|
float idx = (ei->weights[i] - lowparam[partition]) * scale[partition];
|
|
if (idx > 1.0f)
|
|
idx = 1.0f;
|
|
else if (!(idx > 0.0f))
|
|
idx = 0.0f;
|
|
|
|
ei->weights[i] = idx;
|
|
ei->weight_error_scale[i] = length_squared[partition] * error_weights[i];
|
|
if (astc_isnan(ei->weight_error_scale[i]))
|
|
{
|
|
ASTC_CODEC_INTERNAL_ERROR();
|
|
}
|
|
}
|
|
|
|
// print all the data that this function computes.
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
{
|
|
printf("%s: %dx%dx%d texels, %d partitions, component1=%d, component2=%d\n", __func__, xdim, ydim, zdim, partition_count, component1, component2);
|
|
printf("Endpoints:\n");
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
printf("%d Low: <%g %g> => <%g %g %g %g>\n", i, lowvalues[i].x, lowvalues[i].y, ei->ep.endpt0[i].x, ei->ep.endpt0[i].y, ei->ep.endpt0[i].z, ei->ep.endpt0[i].w);
|
|
printf("%d High: <%g %g> => <%g %g %g %g>\n", i, highvalues[i].x, highvalues[i].y, ei->ep.endpt1[i].x, ei->ep.endpt1[i].y, ei->ep.endpt1[i].z, ei->ep.endpt1[i].w);
|
|
}
|
|
printf("Ideal-weights:\n");
|
|
|
|
for (i = 0; i < texels_per_block; i++)
|
|
{
|
|
printf("%3d <%2d %2d %2d>=> %g (weight=%g)\n", i, i % xdim, (i / xdim) % ydim, i / (xdim * ydim), ei->weights[i], ei->weight_error_scale[i]);
|
|
}
|
|
printf("\n");
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static void compute_endpoints_and_ideal_weights_3_components(int xdim, int ydim, int zdim, const partition_info * pt,
|
|
const imageblock * blk, const error_weight_block * ewb,
|
|
endpoints_and_weights * ei, int component1, int component2, int component3)
|
|
{
|
|
int i;
|
|
|
|
int partition_count = pt->partition_count;
|
|
ei->ep.partition_count = partition_count;
|
|
|
|
float4 error_weightings[4];
|
|
float4 color_scalefactors[4];
|
|
|
|
float3 scalefactors[4];
|
|
|
|
int texels_per_block = xdim * ydim * zdim;
|
|
|
|
const float *error_weights;
|
|
if (component1 == 1 && component2 == 2 && component3 == 3)
|
|
error_weights = ewb->texel_weight_gba;
|
|
else if (component1 == 0 && component2 == 2 && component3 == 3)
|
|
error_weights = ewb->texel_weight_rba;
|
|
else if (component1 == 0 && component2 == 1 && component3 == 3)
|
|
error_weights = ewb->texel_weight_rga;
|
|
else if (component1 == 0 && component2 == 1 && component3 == 2)
|
|
error_weights = ewb->texel_weight_rgb;
|
|
else
|
|
{
|
|
ASTC_CODEC_INTERNAL_ERROR();
|
|
}
|
|
|
|
compute_partition_error_color_weightings(xdim, ydim, zdim, ewb, pt, error_weightings, color_scalefactors);
|
|
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
float s1 = 0, s2 = 0, s3 = 0;
|
|
switch (component1)
|
|
{
|
|
case 0:
|
|
s1 = color_scalefactors[i].x;
|
|
break;
|
|
case 1:
|
|
s1 = color_scalefactors[i].y;
|
|
break;
|
|
case 2:
|
|
s1 = color_scalefactors[i].z;
|
|
break;
|
|
case 3:
|
|
s1 = color_scalefactors[i].w;
|
|
break;
|
|
}
|
|
|
|
switch (component2)
|
|
{
|
|
case 0:
|
|
s2 = color_scalefactors[i].x;
|
|
break;
|
|
case 1:
|
|
s2 = color_scalefactors[i].y;
|
|
break;
|
|
case 2:
|
|
s2 = color_scalefactors[i].z;
|
|
break;
|
|
case 3:
|
|
s2 = color_scalefactors[i].w;
|
|
break;
|
|
}
|
|
|
|
switch (component3)
|
|
{
|
|
case 0:
|
|
s3 = color_scalefactors[i].x;
|
|
break;
|
|
case 1:
|
|
s3 = color_scalefactors[i].y;
|
|
break;
|
|
case 2:
|
|
s3 = color_scalefactors[i].z;
|
|
break;
|
|
case 3:
|
|
s3 = color_scalefactors[i].w;
|
|
break;
|
|
}
|
|
scalefactors[i] = normalize(float3(s1, s2, s3)) * 1.73205080f;
|
|
}
|
|
|
|
float lowparam[4], highparam[4];
|
|
|
|
float3 averages[4];
|
|
float3 directions[4];
|
|
|
|
line3 lines[4];
|
|
float scale[4];
|
|
float length_squared[4];
|
|
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
lowparam[i] = 1e10;
|
|
highparam[i] = -1e10;
|
|
}
|
|
|
|
compute_averages_and_directions_3_components(pt, blk, ewb, scalefactors, component1, component2, component3, averages, directions);
|
|
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
float3 direc = directions[i];
|
|
if (direc.x + direc.y + direc.z < 0.0f)
|
|
directions[i] = float3(0, 0, 0) - direc;
|
|
}
|
|
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
lines[i].a = averages[i];
|
|
if (dot(directions[i], directions[i]) == 0.0f)
|
|
lines[i].b = normalize(float3(1, 1, 1));
|
|
else
|
|
lines[i].b = normalize(directions[i]);
|
|
}
|
|
|
|
for (i = 0; i < texels_per_block; i++)
|
|
{
|
|
if (error_weights[i] > 1e-10)
|
|
{
|
|
int partition = pt->partition_of_texel[i];
|
|
float3 point = float3(blk->work_data[4 * i + component1], blk->work_data[4 * i + component2], blk->work_data[4 * i + component3]) * scalefactors[partition];
|
|
line3 l = lines[partition];
|
|
float param = dot(point - l.a, l.b);
|
|
ei->weights[i] = param;
|
|
if (param < lowparam[partition])
|
|
lowparam[partition] = param;
|
|
if (param > highparam[partition])
|
|
highparam[partition] = param;
|
|
}
|
|
else
|
|
{
|
|
ei->weights[i] = -1e38f;
|
|
}
|
|
}
|
|
|
|
float3 lowvalues[4];
|
|
float3 highvalues[4];
|
|
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
float length = highparam[i] - lowparam[i];
|
|
if (length < 0) // case for when none of the texels had any weight
|
|
{
|
|
lowparam[i] = 0.0f;
|
|
highparam[i] = 1e-7f;
|
|
}
|
|
|
|
// it is possible for a uniform-color partition to produce length=0; this
|
|
// causes NaN-production and NaN-propagation later on. Set length to
|
|
// a small value to avoid this problem.
|
|
if (length < 1e-7f)
|
|
length = 1e-7f;
|
|
|
|
length_squared[i] = length * length;
|
|
scale[i] = 1.0f / length;
|
|
|
|
float3 ep0 = lines[i].a + lines[i].b * lowparam[i];
|
|
float3 ep1 = lines[i].a + lines[i].b * highparam[i];
|
|
|
|
ep0 = ep0 / scalefactors[i];
|
|
ep1 = ep1 / scalefactors[i];
|
|
|
|
|
|
lowvalues[i] = ep0;
|
|
highvalues[i] = ep1;
|
|
}
|
|
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
ei->ep.endpt0[i] = float4(blk->red_min, blk->green_min, blk->blue_min, blk->alpha_min);
|
|
ei->ep.endpt1[i] = float4(blk->red_max, blk->green_max, blk->blue_max, blk->alpha_max);
|
|
|
|
|
|
float3 ep0 = lowvalues[i];
|
|
float3 ep1 = highvalues[i];
|
|
|
|
switch (component1)
|
|
{
|
|
case 0:
|
|
ei->ep.endpt0[i].x = ep0.x;
|
|
ei->ep.endpt1[i].x = ep1.x;
|
|
break;
|
|
case 1:
|
|
ei->ep.endpt0[i].y = ep0.x;
|
|
ei->ep.endpt1[i].y = ep1.x;
|
|
break;
|
|
case 2:
|
|
ei->ep.endpt0[i].z = ep0.x;
|
|
ei->ep.endpt1[i].z = ep1.x;
|
|
break;
|
|
case 3:
|
|
ei->ep.endpt0[i].w = ep0.x;
|
|
ei->ep.endpt1[i].w = ep1.x;
|
|
break;
|
|
}
|
|
|
|
switch (component2)
|
|
{
|
|
case 0:
|
|
ei->ep.endpt0[i].x = ep0.y;
|
|
ei->ep.endpt1[i].x = ep1.y;
|
|
break;
|
|
case 1:
|
|
ei->ep.endpt0[i].y = ep0.y;
|
|
ei->ep.endpt1[i].y = ep1.y;
|
|
break;
|
|
case 2:
|
|
ei->ep.endpt0[i].z = ep0.y;
|
|
ei->ep.endpt1[i].z = ep1.y;
|
|
break;
|
|
case 3:
|
|
ei->ep.endpt0[i].w = ep0.y;
|
|
ei->ep.endpt1[i].w = ep1.y;
|
|
break;
|
|
}
|
|
|
|
switch (component3)
|
|
{
|
|
case 0:
|
|
ei->ep.endpt0[i].x = ep0.z;
|
|
ei->ep.endpt1[i].x = ep1.z;
|
|
break;
|
|
case 1:
|
|
ei->ep.endpt0[i].y = ep0.z;
|
|
ei->ep.endpt1[i].y = ep1.z;
|
|
break;
|
|
case 2:
|
|
ei->ep.endpt0[i].z = ep0.z;
|
|
ei->ep.endpt1[i].z = ep1.z;
|
|
break;
|
|
case 3:
|
|
ei->ep.endpt0[i].w = ep0.z;
|
|
ei->ep.endpt1[i].w = ep1.z;
|
|
break;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < texels_per_block; i++)
|
|
{
|
|
int partition = pt->partition_of_texel[i];
|
|
float idx = (ei->weights[i] - lowparam[partition]) * scale[partition];
|
|
if (idx > 1.0f)
|
|
idx = 1.0f;
|
|
else if (!(idx > 0.0f))
|
|
idx = 0.0f;
|
|
|
|
ei->weights[i] = idx;
|
|
ei->weight_error_scale[i] = length_squared[partition] * error_weights[i];
|
|
if (astc_isnan(ei->weight_error_scale[i]))
|
|
{
|
|
ASTC_CODEC_INTERNAL_ERROR();
|
|
}
|
|
}
|
|
|
|
// print all the data that this function computes.
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
{
|
|
printf("%s: %dx%dx%d texels, %d partitions, component1=%d, component2=%d, component3=%d\n", __func__, xdim, ydim, zdim, partition_count, component1, component2, component3);
|
|
printf("Endpoints:\n");
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
printf("%d Low: <%g %g %f> => <%g %g %g %g>\n", i, lowvalues[i].x, lowvalues[i].y, lowvalues[i].z, ei->ep.endpt0[i].x, ei->ep.endpt0[i].y, ei->ep.endpt0[i].z, ei->ep.endpt0[i].w);
|
|
printf("%d High: <%g %g %g> => <%g %g %g %g>\n", i, highvalues[i].x, highvalues[i].y, highvalues[i].z, ei->ep.endpt1[i].x, ei->ep.endpt1[i].y, ei->ep.endpt1[i].z, ei->ep.endpt1[i].w);
|
|
}
|
|
printf("Ideal-weights:\n");
|
|
|
|
for (i = 0; i < texels_per_block; i++)
|
|
{
|
|
printf("%3d <%2d %2d %2d>=> %g (weight=%g)\n", i, (i % xdim), (i / xdim) % ydim, i / (xdim * ydim), ei->weights[i], ei->weight_error_scale[i]);
|
|
}
|
|
printf("\n");
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static void compute_endpoints_and_ideal_weights_rgba(int xdim, int ydim, int zdim, const partition_info * pt, const imageblock * blk, const error_weight_block * ewb, endpoints_and_weights * ei)
|
|
{
|
|
int i;
|
|
|
|
const float *error_weights = ewb->texel_weight;
|
|
|
|
int partition_count = pt->partition_count;
|
|
float lowparam[4], highparam[4];
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
lowparam[i] = 1e10;
|
|
highparam[i] = -1e10;
|
|
}
|
|
|
|
float4 averages[4];
|
|
float4 directions_rgba[4];
|
|
float3 directions_gba[4];
|
|
float3 directions_rba[4];
|
|
float3 directions_rga[4];
|
|
float3 directions_rgb[4];
|
|
|
|
line4 lines[4];
|
|
|
|
float scale[4];
|
|
float length_squared[4];
|
|
|
|
float4 error_weightings[4];
|
|
float4 color_scalefactors[4];
|
|
float4 scalefactors[4];
|
|
|
|
int texels_per_block = xdim * ydim * zdim;
|
|
|
|
compute_partition_error_color_weightings(xdim, ydim, zdim, ewb, pt, error_weightings, color_scalefactors);
|
|
|
|
for (i = 0; i < partition_count; i++)
|
|
scalefactors[i] = normalize(color_scalefactors[i]) * 2.0f;
|
|
|
|
compute_averages_and_directions_rgba(pt, blk, ewb, scalefactors, averages, directions_rgba, directions_gba, directions_rba, directions_rga, directions_rgb);
|
|
|
|
// if the direction-vector ends up pointing from light to dark, FLIP IT!
|
|
// this will make the first endpoint the darkest one.
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
float4 direc = directions_rgba[i];
|
|
if (direc.x + direc.y + direc.z < 0.0f)
|
|
directions_rgba[i] = float4(0, 0, 0, 0) - direc;
|
|
}
|
|
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
lines[i].a = averages[i];
|
|
if (dot(directions_rgba[i], directions_rgba[i]) == 0.0f)
|
|
lines[i].b = normalize(float4(1, 1, 1, 1));
|
|
else
|
|
lines[i].b = normalize(directions_rgba[i]);
|
|
}
|
|
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
{
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
printf("Direction-vector %d: <%f %f %f %f>\n", i, directions_rgba[i].x, directions_rgba[i].y, directions_rgba[i].z, directions_rgba[i].w);
|
|
printf("Line %d A: <%f %f %f %f>\n", i, lines[i].a.x, lines[i].a.y, lines[i].a.z, lines[i].a.w);
|
|
printf("Line %d B: <%f %f %f %f>\n", i, lines[i].b.x, lines[i].b.y, lines[i].b.z, lines[i].b.w);
|
|
printf("Scalefactors %d: <%f %f %f %f>\n", i, scalefactors[i].x, scalefactors[i].y, scalefactors[i].z, scalefactors[i].w);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
for (i = 0; i < texels_per_block; i++)
|
|
{
|
|
if (error_weights[i] > 1e-10)
|
|
{
|
|
int partition = pt->partition_of_texel[i];
|
|
|
|
float4 point = float4(blk->work_data[4 * i], blk->work_data[4 * i + 1], blk->work_data[4 * i + 2], blk->work_data[4 * i + 3]) * scalefactors[partition];
|
|
line4 l = lines[partition];
|
|
|
|
float param = dot(point - l.a, l.b);
|
|
ei->weights[i] = param;
|
|
if (param < lowparam[partition])
|
|
lowparam[partition] = param;
|
|
if (param > highparam[partition])
|
|
highparam[partition] = param;
|
|
}
|
|
else
|
|
{
|
|
ei->weights[i] = -1e38f;
|
|
}
|
|
}
|
|
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
{
|
|
for (i = 0; i < partition_count; i++)
|
|
printf("Partition %d: Lowparam=%f Highparam=%f\n", i, lowparam[i], highparam[i]);
|
|
}
|
|
#endif
|
|
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
float length = highparam[i] - lowparam[i];
|
|
if (length < 0)
|
|
{
|
|
lowparam[i] = 0.0f;
|
|
highparam[i] = 1e-7f;
|
|
}
|
|
|
|
// it is possible for a uniform-color partition to produce length=0; this
|
|
// causes NaN-production and NaN-propagation later on. Set length to
|
|
// a small value to avoid this problem.
|
|
if (length < 1e-7f)
|
|
length = 1e-7f;
|
|
|
|
length_squared[i] = length * length;
|
|
scale[i] = 1.0f / length;
|
|
|
|
ei->ep.endpt0[i] = (lines[i].a + lines[i].b * lowparam[i]) / scalefactors[i];
|
|
ei->ep.endpt1[i] = (lines[i].a + lines[i].b * highparam[i]) / scalefactors[i];
|
|
}
|
|
|
|
for (i = 0; i < texels_per_block; i++)
|
|
{
|
|
int partition = pt->partition_of_texel[i];
|
|
float idx = (ei->weights[i] - lowparam[partition]) * scale[partition];
|
|
if (idx > 1.0f)
|
|
idx = 1.0f;
|
|
else if (!(idx > 0.0f))
|
|
idx = 0.0f;
|
|
ei->weights[i] = idx;
|
|
ei->weight_error_scale[i] = error_weights[i] * length_squared[partition];
|
|
if (astc_isnan(ei->weight_error_scale[i]))
|
|
{
|
|
ASTC_CODEC_INTERNAL_ERROR();
|
|
}
|
|
}
|
|
|
|
// print all the data that this function computes.
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
{
|
|
printf("%s: %dx%dx%d texels, %d partitions\n", __func__, xdim, ydim, zdim, partition_count);
|
|
printf("Endpoints:\n");
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
printf("%d Low: <%g %g %g %g>\n", i, ei->ep.endpt0[i].x, ei->ep.endpt0[i].y, ei->ep.endpt0[i].z, ei->ep.endpt0[i].w);
|
|
printf("%d High: <%g %g %g %g>\n", i, ei->ep.endpt1[i].x, ei->ep.endpt1[i].y, ei->ep.endpt1[i].z, ei->ep.endpt1[i].w);
|
|
}
|
|
printf("\nIdeal-weights:\n");
|
|
|
|
for (i = 0; i < texels_per_block; i++)
|
|
{
|
|
printf("%3d <%2d %2d %2d>=> %g (weight=%g)\n", i, i % xdim, (i / xdim) % ydim, i / (xdim * ydim), ei->weights[i], ei->weight_error_scale[i]);
|
|
}
|
|
printf("\n\n");
|
|
}
|
|
#endif
|
|
|
|
}
|
|
|
|
/*
|
|
For a given partitioning, compute: for each partition, the ideal endpoint colors;
|
|
these define a color line for the partition. for each pixel, the ideal position of the pixel on the partition's
|
|
color line. for each pixel, the length of the color line.
|
|
|
|
These data allow us to assess the error introduced by removing and quantizing the per-pixel weights.
|
|
*/
|
|
void compute_endpoints_and_ideal_weights_1_plane(int xdim, int ydim, int zdim, const partition_info * pt, const imageblock * blk, const error_weight_block * ewb, endpoints_and_weights * ei)
|
|
{
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
printf("%s: texels_per_block=%dx%dx%d\n\n", __func__, xdim, ydim, zdim);
|
|
#endif
|
|
|
|
int uses_alpha = imageblock_uses_alpha(blk);
|
|
if (uses_alpha)
|
|
{
|
|
compute_endpoints_and_ideal_weights_rgba(xdim, ydim, zdim, pt, blk, ewb, ei);
|
|
}
|
|
else
|
|
{
|
|
compute_endpoints_and_ideal_weights_3_components(xdim, ydim, zdim, pt, blk, ewb, ei, 0, 1, 2);
|
|
}
|
|
}
|
|
|
|
void compute_endpoints_and_ideal_weights_2_planes(int xdim, int ydim, int zdim, const partition_info * pt,
|
|
const imageblock * blk, const error_weight_block * ewb, int separate_component,
|
|
endpoints_and_weights * ei1, endpoints_and_weights * ei2)
|
|
{
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
printf("%s: texels_per_block=%dx%dx%d, separate_component=%d\n\n", __func__, xdim, ydim, zdim, separate_component);
|
|
#endif
|
|
|
|
int uses_alpha = imageblock_uses_alpha(blk);
|
|
switch (separate_component)
|
|
{
|
|
case 0: // separate weights for red
|
|
if (uses_alpha == 1)
|
|
compute_endpoints_and_ideal_weights_3_components(xdim, ydim, zdim, pt, blk, ewb, ei1, 1, 2, 3);
|
|
else
|
|
compute_endpoints_and_ideal_weights_2_components(xdim, ydim, zdim, pt, blk, ewb, ei1, 1, 2);
|
|
compute_endpoints_and_ideal_weights_1_component(xdim, ydim, zdim, pt, blk, ewb, ei2, 0);
|
|
break;
|
|
|
|
case 1: // separate weights for green
|
|
if (uses_alpha == 1)
|
|
compute_endpoints_and_ideal_weights_3_components(xdim, ydim, zdim, pt, blk, ewb, ei1, 0, 2, 3);
|
|
else
|
|
compute_endpoints_and_ideal_weights_2_components(xdim, ydim, zdim, pt, blk, ewb, ei1, 0, 2);
|
|
compute_endpoints_and_ideal_weights_1_component(xdim, ydim, zdim, pt, blk, ewb, ei2, 1);
|
|
break;
|
|
|
|
case 2: // separate weights for blue
|
|
if (uses_alpha == 1)
|
|
compute_endpoints_and_ideal_weights_3_components(xdim, ydim, zdim, pt, blk, ewb, ei1, 0, 1, 3);
|
|
else
|
|
compute_endpoints_and_ideal_weights_2_components(xdim, ydim, zdim, pt, blk, ewb, ei1, 0, 1);
|
|
compute_endpoints_and_ideal_weights_1_component(xdim, ydim, zdim, pt, blk, ewb, ei2, 2);
|
|
break;
|
|
|
|
case 3: // separate weights for alpha
|
|
if (uses_alpha == 0)
|
|
{
|
|
ASTC_CODEC_INTERNAL_ERROR();
|
|
}
|
|
compute_endpoints_and_ideal_weights_3_components(xdim, ydim, zdim, pt, blk, ewb, ei1, 0, 1, 2);
|
|
|
|
compute_endpoints_and_ideal_weights_1_component(xdim, ydim, zdim, pt, blk, ewb, ei2, 3);
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
After having computed ideal weights for the case where a weight exists for
|
|
every texel, we want to compute the ideal weights for the case where weights
|
|
exist only for some texels.
|
|
|
|
We do this with a steepest-descent grid solver; this works as follows:
|
|
|
|
* First, for each actual weight, perform a weighted averaging based on the
|
|
texels affected by the weight.
|
|
* Then, set step size to <some initial value>
|
|
* Then, repeat:
|
|
1: First, compute for each weight how much the error will change
|
|
if we change the weight by an infinitesimal amount.
|
|
2: This produces a vector that points the direction we should step in.
|
|
Normalize this vector.
|
|
3: Perform a step
|
|
4: Check if the step actually improved the error. If it did, perform
|
|
another step in the same direction; repeat until error no longer
|
|
improves. If the *first* step did not improve error, then we halve
|
|
the step size.
|
|
5: If the step size dropped down below <some threshold value>,
|
|
then we quit, else we go back to #1.
|
|
|
|
Subroutines: one routine to apply a step and compute the step's effect on
|
|
the error one routine to compute the error change of an infinitesimal
|
|
weight change
|
|
|
|
Data structures needed:
|
|
For every decimation pattern, we need:
|
|
* For each weight, a list of <texel, weight> tuples that tell which texels
|
|
the weight influences.
|
|
* For each texel, a list of <texel, weight> tuples that tell which weights
|
|
go into a given texel.
|
|
*/
|
|
|
|
float compute_value_of_texel_flt(int texel_to_get, const decimation_table * it, const float *weights)
|
|
{
|
|
const uint8_t *texel_weights = it->texel_weights[texel_to_get];
|
|
const float *texel_weights_float = it->texel_weights_float[texel_to_get];
|
|
|
|
return
|
|
(weights[texel_weights[0]] * texel_weights_float[0] + weights[texel_weights[1]] * texel_weights_float[1]) + (weights[texel_weights[2]] * texel_weights_float[2] + weights[texel_weights[3]] * texel_weights_float[3]);
|
|
}
|
|
|
|
static inline float compute_error_of_texel(const endpoints_and_weights * eai, int texel_to_get, const decimation_table * it, const float *weights)
|
|
{
|
|
float current_value = compute_value_of_texel_flt(texel_to_get, it, weights);
|
|
float valuedif = current_value - eai->weights[texel_to_get];
|
|
return valuedif * valuedif * eai->weight_error_scale[texel_to_get];
|
|
}
|
|
|
|
/*
|
|
helper function: given
|
|
* for each texel, an ideal weight and an error-modifier these are contained
|
|
in an endpoints_and_weights data structure.
|
|
* a weight_table data structure
|
|
* for each weight, its current value
|
|
compute the change to overall error that results from adding N to the weight
|
|
*/
|
|
void compute_two_error_changes_from_perturbing_weight_infill(const endpoints_and_weights * eai, const decimation_table * it,
|
|
float *infilled_weights, int weight_to_perturb,
|
|
float perturbation1, float perturbation2, float *res1, float *res2)
|
|
{
|
|
int num_weights = it->weight_num_texels[weight_to_perturb];
|
|
float error_change0 = 0.0f;
|
|
float error_change1 = 0.0f;
|
|
int i;
|
|
|
|
const uint8_t *weight_texel_ptr = it->weight_texel[weight_to_perturb];
|
|
const float *weights_ptr = it->weights_flt[weight_to_perturb];
|
|
for (i = num_weights - 1; i >= 0; i--)
|
|
{
|
|
uint8_t weight_texel = weight_texel_ptr[i];
|
|
float weights = weights_ptr[i];
|
|
|
|
float scale = eai->weight_error_scale[weight_texel] * weights;
|
|
float old_weight = infilled_weights[weight_texel];
|
|
float ideal_weight = eai->weights[weight_texel];
|
|
|
|
error_change0 += weights * scale;
|
|
error_change1 += (old_weight - ideal_weight) * scale;
|
|
}
|
|
|
|
*res1 = error_change0 * (perturbation1 * perturbation1 * (1.0f / (TEXEL_WEIGHT_SUM * TEXEL_WEIGHT_SUM))) + error_change1 * (perturbation1 * (2.0f / TEXEL_WEIGHT_SUM));
|
|
*res2 = error_change0 * (perturbation2 * perturbation2 * (1.0f / (TEXEL_WEIGHT_SUM * TEXEL_WEIGHT_SUM))) + error_change1 * (perturbation2 * (2.0f / TEXEL_WEIGHT_SUM));
|
|
}
|
|
|
|
float compute_error_of_weight_set(const endpoints_and_weights * eai, const decimation_table * it, const float *weights)
|
|
{
|
|
int i;
|
|
int texel_count = it->num_texels;
|
|
float error_summa = 0.0;
|
|
for (i = 0; i < texel_count; i++)
|
|
error_summa += compute_error_of_texel(eai, i, it, weights);
|
|
return error_summa;
|
|
}
|
|
|
|
/*
|
|
Given a complete weight set and a decimation table, try to
|
|
compute the optimal weight set (assuming infinite precision)
|
|
given the selected decimation table.
|
|
*/
|
|
void compute_ideal_weights_for_decimation_table(const endpoints_and_weights * eai, const decimation_table * it, float *weight_set, float *weights)
|
|
{
|
|
int i, j, k;
|
|
|
|
int texels_per_block = it->num_texels;
|
|
int weight_count = it->num_weights;
|
|
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
{
|
|
int blockdim = (int)floor(sqrt((float)it->num_texels) + 0.5f);
|
|
printf("%s : decimation from %d to %d weights\n\n", __func__, it->num_texels, it->num_weights);
|
|
printf("Input weight set:\n");
|
|
for (i = 0; i < it->num_texels; i++)
|
|
{
|
|
printf("%3d <%2d %2d> : %g\n", i, i % blockdim, i / blockdim, eai->weights[i]);
|
|
}
|
|
printf("\n");
|
|
}
|
|
#endif
|
|
|
|
// perform a shortcut in the case of a complete decimation table
|
|
if (texels_per_block == weight_count)
|
|
{
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
printf("%s : no decimation actually needed: early-out\n\n", __func__);
|
|
#endif
|
|
|
|
for (i = 0; i < it->num_texels; i++)
|
|
{
|
|
int texel = it->weight_texel[i][0];
|
|
weight_set[i] = eai->weights[texel];
|
|
weights[i] = eai->weight_error_scale[texel];
|
|
}
|
|
return;
|
|
}
|
|
|
|
// if the shortcut is not available, we will instead compute a simple estimate
|
|
// and perform three rounds of refinement on that estimate.
|
|
float infilled_weights[MAX_TEXELS_PER_BLOCK];
|
|
|
|
// compute an initial average for each weight.
|
|
for (i = 0; i < weight_count; i++)
|
|
{
|
|
int texel_count = it->weight_num_texels[i];
|
|
|
|
float weight_weight = 1e-10f; // to avoid 0/0 later on
|
|
float initial_weight = 0.0f;
|
|
for (j = 0; j < texel_count; j++)
|
|
{
|
|
int texel = it->weight_texel[i][j];
|
|
float weight = it->weights_flt[i][j];
|
|
float contrib_weight = weight * eai->weight_error_scale[texel];
|
|
weight_weight += contrib_weight;
|
|
initial_weight += eai->weights[texel] * contrib_weight;
|
|
}
|
|
|
|
weights[i] = weight_weight;
|
|
weight_set[i] = initial_weight / weight_weight; // this is the 0/0 that is to be avoided.
|
|
}
|
|
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
float initial_weight_set[MAX_WEIGHTS_PER_BLOCK];
|
|
if (print_diagnostics)
|
|
{
|
|
// stash away the initial-weight estimates for later printing
|
|
for (i = 0; i < weight_count; i++)
|
|
initial_weight_set[i] = weight_set[i];
|
|
}
|
|
#endif
|
|
|
|
for (i = 0; i < texels_per_block; i++)
|
|
{
|
|
infilled_weights[i] = compute_value_of_texel_flt(i, it, weight_set);
|
|
}
|
|
|
|
const float stepsizes[2] = { 0.25f, 0.125f };
|
|
|
|
for (j = 0; j < 2; j++)
|
|
{
|
|
float stepsize = stepsizes[j];
|
|
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
printf("Pass %d, step=%f \n", j, stepsize);
|
|
#endif
|
|
|
|
for (i = 0; i < weight_count; i++)
|
|
{
|
|
float weight_val = weight_set[i];
|
|
float error_change_up, error_change_down;
|
|
compute_two_error_changes_from_perturbing_weight_infill(eai, it, infilled_weights, i, stepsize, -stepsize, &error_change_up, &error_change_down);
|
|
|
|
/*
|
|
assume that the error-change function behaves like a quadratic function in the interval examined,
|
|
with "error_change_up" and "error_change_down" defining the function at the endpoints
|
|
of the interval. Then, find the position where the function's derivative is zero.
|
|
|
|
The "fabs(b) >= a" check tests several conditions in one:
|
|
if a is negative, then the 2nd derivative of the function is negative;
|
|
in this case, f'(x)=0 will maximize error.
|
|
If fabs(b) > fabs(a), then f'(x)=0 will lie outside the interval altogether.
|
|
If a and b are both 0, then set step to 0;
|
|
otherwise, we end up computing 0/0, which produces a lethal NaN.
|
|
We can get an a=b=0 situation if an error weight is 0 in the wrong place.
|
|
*/
|
|
|
|
float step;
|
|
float a = (error_change_up + error_change_down) * 2.0f;
|
|
float b = error_change_down - error_change_up;
|
|
if (fabs(b) >= a)
|
|
{
|
|
if (a <= 0.0f)
|
|
{
|
|
if (error_change_up < error_change_down)
|
|
step = 1;
|
|
else if (error_change_up > error_change_down)
|
|
step = -1;
|
|
|
|
else
|
|
step = 0;
|
|
}
|
|
else
|
|
{
|
|
if (a < 1e-10f)
|
|
a = 1e-10f;
|
|
step = b / a;
|
|
if (step < -1.0f)
|
|
step = -1.0f;
|
|
else if (step > 1.0f)
|
|
step = 1.0f;
|
|
}
|
|
}
|
|
else
|
|
step = b / a;
|
|
|
|
step *= stepsize;
|
|
float new_weight_val = weight_val + step;
|
|
|
|
// update the weight
|
|
weight_set[i] = new_weight_val;
|
|
// update the infilled-weights
|
|
int num_weights = it->weight_num_texels[i];
|
|
float perturbation = (new_weight_val - weight_val) * (1.0f / TEXEL_WEIGHT_SUM);
|
|
const uint8_t *weight_texel_ptr = it->weight_texel[i];
|
|
const float *weights_ptr = it->weights_flt[i];
|
|
for (k = num_weights - 1; k >= 0; k--)
|
|
{
|
|
uint8_t weight_texel = weight_texel_ptr[k];
|
|
float weight_weight = weights_ptr[k];
|
|
infilled_weights[weight_texel] += perturbation * weight_weight;
|
|
}
|
|
|
|
}
|
|
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
printf("\n");
|
|
#endif
|
|
}
|
|
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
{
|
|
printf("Error weights, initial-estimates, final-results\n");
|
|
for (i = 0; i < weight_count; i++)
|
|
{
|
|
printf("%2d -> weight=%g, initial=%g final=%g\n", i, weights[i], initial_weight_set[i], weight_set[i]);
|
|
}
|
|
printf("\n");
|
|
}
|
|
#endif
|
|
|
|
return;
|
|
}
|
|
|
|
/*
|
|
For a decimation table, try to compute an optimal weight set, assuming
|
|
that the weights are quantized and subject to a transfer function.
|
|
|
|
We do this as follows:
|
|
First, we take the initial weights and quantize them. This is our initial estimate.
|
|
Then, go through the weights one by one; try to perturb then up and down one weight at a
|
|
time; apply any perturbations that improve overall error
|
|
Repeat until we have made a complete processing pass over all weights without
|
|
triggering any perturbations *OR* we have run 4 full passes.
|
|
*/
|
|
void compute_ideal_quantized_weights_for_decimation_table(const endpoints_and_weights * eai,
|
|
const decimation_table * it,
|
|
float low_bound, float high_bound, const float *weight_set_in, float *weight_set_out, uint8_t * quantized_weight_set, int quantization_level)
|
|
{
|
|
int i;
|
|
int weight_count = it->num_weights;
|
|
int texels_per_block = it->num_texels;
|
|
|
|
const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[quantization_level]);
|
|
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
{
|
|
printf("%s : texels-per-block=%d, weights=%d, quantization-level=%d\n\n", __func__, texels_per_block, weight_count, quantization_level);
|
|
|
|
printf("Weight values before quantization:\n");
|
|
for (i = 0; i < weight_count; i++)
|
|
printf("%3d : %g\n", i, weight_set_in[i]);
|
|
|
|
printf("Low-bound: %f High-bound: %f\n", low_bound, high_bound);
|
|
}
|
|
#endif
|
|
|
|
// quantize the weight set using both the specified low/high bounds and the
|
|
// standard 0..1 weight bounds.
|
|
|
|
/*
|
|
TODO: WTF issue that we need to examine some time
|
|
*/
|
|
if (!((high_bound - low_bound) > 0.5f))
|
|
{
|
|
low_bound = 0.0f;
|
|
high_bound = 1.0f;
|
|
}
|
|
|
|
float rscale = high_bound - low_bound;
|
|
float scale = 1.0f / rscale;
|
|
|
|
// rescale the weights so that
|
|
// low_bound -> 0
|
|
// high_bound -> 1
|
|
// OK: first, subtract low_bound, then divide by (high_bound - low_bound)
|
|
for (i = 0; i < weight_count; i++)
|
|
weight_set_out[i] = (weight_set_in[i] - low_bound) * scale;
|
|
|
|
static const float quantization_step_table[12] = {
|
|
1.0f / 1.0f,
|
|
1.0f / 2.0f,
|
|
1.0f / 3.0f,
|
|
1.0f / 4.0f,
|
|
1.0f / 5.0f,
|
|
1.0f / 7.0f,
|
|
1.0f / 9.0f,
|
|
1.0f / 11.0f,
|
|
1.0f / 15.0f,
|
|
1.0f / 19.0f,
|
|
1.0f / 23.0f,
|
|
1.0f / 31.0f,
|
|
};
|
|
|
|
float quantization_cutoff = quantization_step_table[quantization_level] * 0.333f;
|
|
|
|
int is_perturbable[MAX_WEIGHTS_PER_BLOCK];
|
|
int perturbable_count = 0;
|
|
|
|
// quantize the weight set
|
|
for (i = 0; i < weight_count; i++)
|
|
{
|
|
float ix0 = weight_set_out[i];
|
|
if (ix0 < 0.0f)
|
|
ix0 = 0.0f;
|
|
if (ix0 > 1.0f)
|
|
ix0 = 1.0f;
|
|
float ix = ix0;
|
|
|
|
ix *= 1024.0f;
|
|
int ix2 = (int)floor(ix + 0.5f);
|
|
int weight = qat->closest_quantized_weight[ix2];
|
|
|
|
ix = qat->unquantized_value_flt[weight];
|
|
weight_set_out[i] = ix;
|
|
quantized_weight_set[i] = weight;
|
|
|
|
// test whether the error of the weight is greater than 1/3 of the weight spacing;
|
|
// if it is not, then it is flagged as "not perturbable". This causes a
|
|
// quality loss of about 0.002 dB, which is totally worth the speedup we're getting.
|
|
is_perturbable[i] = 0;
|
|
if (fabs(ix - ix0) > quantization_cutoff)
|
|
{
|
|
is_perturbable[i] = 1;
|
|
perturbable_count++;
|
|
}
|
|
}
|
|
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
{
|
|
printf("Weight values after initial quantization:\n");
|
|
for (i = 0; i < weight_count; i++)
|
|
printf("%3d : %g <%d>\n", i, weight_set_out[i], quantized_weight_set[i]);
|
|
}
|
|
#endif
|
|
|
|
// if the decimation table is complete, the quantization above was all we needed to do,
|
|
// so we can early-out.
|
|
if (it->num_weights == it->num_texels)
|
|
{
|
|
// invert the weight-scaling that was done initially
|
|
// 0 -> low_bound
|
|
// 1 -> high_bound
|
|
|
|
rscale = high_bound - low_bound;
|
|
for (i = 0; i < weight_count; i++)
|
|
weight_set_out[i] = (weight_set_out[i] * rscale) + low_bound;
|
|
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
{
|
|
printf("Weight values after adjustment:\n");
|
|
for (i = 0; i < weight_count; i++)
|
|
printf("%3d : %g <%d> <error=%g>\n", i, weight_set_out[i], quantized_weight_set[i], weight_set_out[i] - weight_set_in[i]);
|
|
printf("\n");
|
|
printf("%s: Early-out\n\n", __func__);
|
|
|
|
}
|
|
#endif
|
|
|
|
return;
|
|
}
|
|
|
|
int weights_tested = 0;
|
|
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
int perturbation_count = 0;
|
|
#endif
|
|
|
|
// if no weights are flagged as perturbable, don't try to perturb them.
|
|
// if only one weight is flagged as perturbable, perturbation is also pointless.
|
|
if (perturbable_count > 1)
|
|
{
|
|
endpoints_and_weights eaix;
|
|
for (i = 0; i < texels_per_block; i++)
|
|
{
|
|
eaix.weights[i] = (eai->weights[i] - low_bound) * scale;
|
|
eaix.weight_error_scale[i] = eai->weight_error_scale[i];
|
|
}
|
|
|
|
float infilled_weights[MAX_TEXELS_PER_BLOCK];
|
|
for (i = 0; i < texels_per_block; i++)
|
|
infilled_weights[i] = compute_value_of_texel_flt(i, it, weight_set_out);
|
|
|
|
int weight_to_perturb = 0;
|
|
int weights_since_last_perturbation = 0;
|
|
int num_weights = it->num_weights;
|
|
|
|
while (weights_since_last_perturbation < num_weights && weights_tested < num_weights * 4)
|
|
{
|
|
int do_quant_mod = 0;
|
|
if (is_perturbable[weight_to_perturb])
|
|
{
|
|
|
|
int weight_val = quantized_weight_set[weight_to_perturb];
|
|
int weight_next_up = qat->next_quantized_value[weight_val];
|
|
int weight_next_down = qat->prev_quantized_value[weight_val];
|
|
float flt_weight_val = qat->unquantized_value_flt[weight_val];
|
|
float flt_weight_next_up = qat->unquantized_value_flt[weight_next_up];
|
|
float flt_weight_next_down = qat->unquantized_value_flt[weight_next_down];
|
|
float error_change_up, error_change_down;
|
|
|
|
// compute the error change from perturbing the weight either up or down.
|
|
compute_two_error_changes_from_perturbing_weight_infill(&eaix,
|
|
it,
|
|
infilled_weights,
|
|
weight_to_perturb,
|
|
(flt_weight_next_up - flt_weight_val), (flt_weight_next_down - flt_weight_val), &error_change_up, &error_change_down);
|
|
|
|
int new_weight_val;
|
|
float flt_new_weight_val;
|
|
if (weight_val != weight_next_up && error_change_up < 0.0f)
|
|
{
|
|
do_quant_mod = 1;
|
|
new_weight_val = weight_next_up;
|
|
flt_new_weight_val = flt_weight_next_up;
|
|
}
|
|
else if (weight_val != weight_next_down && error_change_down < 0.0f)
|
|
{
|
|
do_quant_mod = 1;
|
|
new_weight_val = weight_next_down;
|
|
flt_new_weight_val = flt_weight_next_down;
|
|
}
|
|
|
|
if (do_quant_mod)
|
|
{
|
|
|
|
// update the weight.
|
|
weight_set_out[weight_to_perturb] = flt_new_weight_val;
|
|
quantized_weight_set[weight_to_perturb] = new_weight_val;
|
|
|
|
// update the infilled-weights
|
|
int num_weights_infill = it->weight_num_texels[weight_to_perturb];
|
|
float perturbation = (flt_new_weight_val - flt_weight_val) * (1.0f / TEXEL_WEIGHT_SUM);
|
|
const uint8_t *weight_texel_ptr = it->weight_texel[weight_to_perturb];
|
|
const float *weights_ptr = it->weights_flt[weight_to_perturb];
|
|
for (i = num_weights_infill - 1; i >= 0; i--)
|
|
{
|
|
uint8_t weight_texel = weight_texel_ptr[i];
|
|
float weights = weights_ptr[i];
|
|
infilled_weights[weight_texel] += perturbation * weights;
|
|
}
|
|
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
{
|
|
printf("Perturbation of weight %d : %g\n", weight_to_perturb, perturbation * (float)TEXEL_WEIGHT_SUM);
|
|
perturbation_count++;
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|
|
if (do_quant_mod)
|
|
weights_since_last_perturbation = 0;
|
|
else
|
|
weights_since_last_perturbation++;
|
|
|
|
weight_to_perturb++;
|
|
if (weight_to_perturb >= num_weights)
|
|
weight_to_perturb -= num_weights;
|
|
|
|
weights_tested++;
|
|
}
|
|
}
|
|
|
|
// invert the weight-scaling that was done initially
|
|
// 0 -> low_bound
|
|
// 1 -> high_bound
|
|
|
|
for (i = 0; i < weight_count; i++)
|
|
weight_set_out[i] = (weight_set_out[i] * rscale) + low_bound;
|
|
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
{
|
|
printf("%d weights, %d weight tests, %d perturbations\n", weight_count, weights_tested, perturbation_count);
|
|
printf("Weight values after adjustment:\n");
|
|
for (i = 0; i < weight_count; i++)
|
|
printf("%3d : %g <%d>\n", i, weight_set_out[i], quantized_weight_set[i]);
|
|
printf("\n");
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static inline float mat_square_sum(mat2 p)
|
|
{
|
|
float a = p.v[0].x;
|
|
float b = p.v[0].y;
|
|
float c = p.v[1].x;
|
|
float d = p.v[1].y;
|
|
return a * a + b * b + c * c + d * d;
|
|
}
|
|
|
|
/* for a given weight set, we wish to recompute the colors so that they are optimal for a particular weight set. */
|
|
void recompute_ideal_colors(int xdim, int ydim, int zdim, int weight_quantization_mode, endpoints * ep, // contains the endpoints we wish to update
|
|
float4 * rgbs_vectors, // used to return RGBS-vectors. (endpoint mode #6)
|
|
float4 * rgbo_vectors, // used to return RGBO-vectors. (endpoint mode #7)
|
|
const uint8_t * weight_set8, // the current set of weight values
|
|
const uint8_t * plane2_weight_set8, // NULL if plane 2 is not actually used.
|
|
int plane2_color_component, // color component for 2nd plane of weights; -1 if the 2nd plane of weights is not present
|
|
const partition_info * pi, const decimation_table * it, const imageblock * pb, // picture-block containing the actual data.
|
|
const error_weight_block * ewb)
|
|
{
|
|
int i, j;
|
|
|
|
int texels_per_block = xdim * ydim * zdim;
|
|
|
|
const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quantization_mode]);
|
|
|
|
float weight_set[MAX_WEIGHTS_PER_BLOCK];
|
|
float plane2_weight_set[MAX_WEIGHTS_PER_BLOCK];
|
|
|
|
for (i = 0; i < it->num_weights; i++)
|
|
{
|
|
weight_set[i] = qat->unquantized_value_flt[weight_set8[i]];
|
|
}
|
|
|
|
if (plane2_weight_set8)
|
|
{
|
|
for (i = 0; i < it->num_weights; i++)
|
|
plane2_weight_set[i] = qat->unquantized_value_flt[plane2_weight_set8[i]];
|
|
}
|
|
|
|
int partition_count = pi->partition_count;
|
|
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
{
|
|
printf("%s : %dx%dx%d texels_per_block, %d partitions, plane2-color-component=%d\n\n", __func__, xdim, ydim, zdim, partition_count, plane2_color_component);
|
|
|
|
printf("Pre-adjustment endpoint-colors: \n");
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
printf("%d Low <%g %g %g %g>\n", i, ep->endpt0[i].x, ep->endpt0[i].y, ep->endpt0[i].z, ep->endpt0[i].w);
|
|
printf("%d High <%g %g %g %g>\n", i, ep->endpt1[i].x, ep->endpt1[i].y, ep->endpt1[i].z, ep->endpt1[i].w);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
mat2 pmat1_red[4], pmat1_green[4], pmat1_blue[4], pmat1_alpha[4], pmat1_scale[4]; // matrices for plane of weights 1
|
|
mat2 pmat2_red[4], pmat2_green[4], pmat2_blue[4], pmat2_alpha[4]; // matrices for plane of weights 2
|
|
float2 red_vec[4];
|
|
float2 green_vec[4];
|
|
float2 blue_vec[4];
|
|
float2 alpha_vec[4];
|
|
float2 scale_vec[4];
|
|
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
for (j = 0; j < 2; j++)
|
|
{
|
|
pmat1_red[i].v[j] = float2(0, 0);
|
|
pmat2_red[i].v[j] = float2(0, 0);
|
|
pmat1_green[i].v[j] = float2(0, 0);
|
|
pmat2_green[i].v[j] = float2(0, 0);
|
|
pmat1_blue[i].v[j] = float2(0, 0);
|
|
pmat2_blue[i].v[j] = float2(0, 0);
|
|
pmat1_alpha[i].v[j] = float2(0, 0);
|
|
pmat2_alpha[i].v[j] = float2(0, 0);
|
|
pmat1_scale[i].v[j] = float2(0, 0);
|
|
}
|
|
|
|
red_vec[i] = float2(0, 0);
|
|
green_vec[i] = float2(0, 0);
|
|
blue_vec[i] = float2(0, 0);
|
|
alpha_vec[i] = float2(0, 0);
|
|
scale_vec[i] = float2(0, 0);
|
|
}
|
|
|
|
float wmin1[4], wmax1[4];
|
|
float wmin2[4], wmax2[4];
|
|
float red_weight_sum[4];
|
|
float green_weight_sum[4];
|
|
float blue_weight_sum[4];
|
|
float alpha_weight_sum[4];
|
|
float scale_weight_sum[4];
|
|
|
|
float red_weight_weight_sum[4];
|
|
float green_weight_weight_sum[4];
|
|
float blue_weight_weight_sum[4];
|
|
|
|
float psum[4]; // sum of (weight * qweight^2) across (red,green,blue)
|
|
float qsum[4]; // sum of (weight * qweight * texelval) across (red,green,blue)
|
|
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
wmin1[i] = 1.0f;
|
|
wmax1[i] = 0.0f;
|
|
wmin2[i] = 1.0f;
|
|
wmax2[i] = 0.0f;
|
|
red_weight_sum[i] = 1e-17f;
|
|
green_weight_sum[i] = 1e-17f;
|
|
blue_weight_sum[i] = 1e-17f;
|
|
alpha_weight_sum[i] = 1e-17f;
|
|
|
|
scale_weight_sum[i] = 1e-17f;
|
|
|
|
red_weight_weight_sum[i] = 1e-17f;
|
|
green_weight_weight_sum[i] = 1e-17f;
|
|
blue_weight_weight_sum[i] = 1e-17f;
|
|
|
|
psum[i] = 1e-17f;
|
|
qsum[i] = 1e-17f;
|
|
}
|
|
|
|
// for each partition, compute the direction that an RGB-scale color endpoint pair would have.
|
|
float3 rgb_sum[4];
|
|
float3 rgb_weight_sum[4];
|
|
float3 scale_directions[4];
|
|
float scale_min[4];
|
|
float scale_max[4];
|
|
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
rgb_sum[i] = float3(1e-17f, 1e-17f, 1e-17f);
|
|
rgb_weight_sum[i] = float3(1e-17f, 1e-17f, 1e-17f);
|
|
}
|
|
|
|
for (i = 0; i < texels_per_block; i++)
|
|
{
|
|
float3 rgb = float3(pb->work_data[4 * i], pb->work_data[4 * i + 1], pb->work_data[4 * i + 2]);
|
|
float3 rgb_weight = float3(ewb->texel_weight_r[i],
|
|
ewb->texel_weight_g[i],
|
|
ewb->texel_weight_b[i]);
|
|
|
|
int part = pi->partition_of_texel[i];
|
|
rgb_sum[part] = rgb_sum[part] + (rgb * rgb_weight);
|
|
rgb_weight_sum[part] = rgb_weight_sum[part] + rgb_weight;
|
|
}
|
|
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
scale_directions[i] = normalize(rgb_sum[i] / rgb_weight_sum[i]);
|
|
scale_max[i] = 0.0f;
|
|
scale_min[i] = 1e10f;
|
|
}
|
|
|
|
for (i = 0; i < texels_per_block; i++)
|
|
{
|
|
float r = pb->work_data[4 * i];
|
|
float g = pb->work_data[4 * i + 1];
|
|
float b = pb->work_data[4 * i + 2];
|
|
float a = pb->work_data[4 * i + 3];
|
|
|
|
int part = pi->partition_of_texel[i];
|
|
float idx0 = it ? compute_value_of_texel_flt(i, it, weight_set) : weight_set[i];
|
|
float om_idx0 = 1.0f - idx0;
|
|
|
|
if (idx0 > wmax1[part])
|
|
wmax1[part] = idx0;
|
|
if (idx0 < wmin1[part])
|
|
wmin1[part] = idx0;
|
|
|
|
float red_weight = ewb->texel_weight_r[i];
|
|
float green_weight = ewb->texel_weight_g[i];
|
|
float blue_weight = ewb->texel_weight_b[i];
|
|
float alpha_weight = ewb->texel_weight_a[i];
|
|
|
|
float scale_weight = (red_weight + green_weight + blue_weight);
|
|
|
|
float3 scale_direction = scale_directions[part];
|
|
float scale = dot(scale_direction, float3(r, g, b));
|
|
if (scale < scale_min[part])
|
|
scale_min[part] = scale;
|
|
if (scale > scale_max[part])
|
|
scale_max[part] = scale;
|
|
|
|
red_weight_sum[part] += red_weight;
|
|
green_weight_sum[part] += green_weight;
|
|
blue_weight_sum[part] += blue_weight;
|
|
alpha_weight_sum[part] += alpha_weight;
|
|
scale_weight_sum[part] += scale_weight;
|
|
|
|
pmat1_red[part].v[0].x += om_idx0 * om_idx0 * red_weight;
|
|
pmat1_red[part].v[0].y += idx0 * om_idx0 * red_weight;
|
|
pmat1_red[part].v[1].x += idx0 * om_idx0 * red_weight;
|
|
pmat1_red[part].v[1].y += idx0 * idx0 * red_weight;
|
|
|
|
pmat1_green[part].v[0].x += om_idx0 * om_idx0 * green_weight;
|
|
pmat1_green[part].v[0].y += idx0 * om_idx0 * green_weight;
|
|
pmat1_green[part].v[1].x += idx0 * om_idx0 * green_weight;
|
|
pmat1_green[part].v[1].y += idx0 * idx0 * green_weight;
|
|
|
|
pmat1_blue[part].v[0].x += om_idx0 * om_idx0 * blue_weight;
|
|
pmat1_blue[part].v[0].y += idx0 * om_idx0 * blue_weight;
|
|
pmat1_blue[part].v[1].x += idx0 * om_idx0 * blue_weight;
|
|
pmat1_blue[part].v[1].y += idx0 * idx0 * blue_weight;
|
|
|
|
pmat1_alpha[part].v[0].x += om_idx0 * om_idx0 * alpha_weight;
|
|
pmat1_alpha[part].v[0].y += idx0 * om_idx0 * alpha_weight;
|
|
pmat1_alpha[part].v[1].x += idx0 * om_idx0 * alpha_weight;
|
|
pmat1_alpha[part].v[1].y += idx0 * idx0 * alpha_weight;
|
|
|
|
pmat1_scale[part].v[0].x += om_idx0 * om_idx0 * scale_weight;
|
|
pmat1_scale[part].v[0].y += idx0 * om_idx0 * scale_weight;
|
|
pmat1_scale[part].v[1].x += idx0 * om_idx0 * scale_weight;
|
|
pmat1_scale[part].v[1].y += idx0 * idx0 * scale_weight;
|
|
|
|
float idx1 = 0.0f, om_idx1 = 0.0f;
|
|
if (plane2_weight_set8)
|
|
{
|
|
idx1 = it ? compute_value_of_texel_flt(i, it, plane2_weight_set) : plane2_weight_set[i];
|
|
om_idx1 = 1.0f - idx1;
|
|
if (idx1 > wmax2[part])
|
|
wmax2[part] = idx1;
|
|
if (idx1 < wmin2[part])
|
|
wmin2[part] = idx1;
|
|
|
|
pmat2_red[part].v[0].x += om_idx1 * om_idx1 * red_weight;
|
|
pmat2_red[part].v[0].y += idx1 * om_idx1 * red_weight;
|
|
pmat2_red[part].v[1].x += idx1 * om_idx1 * red_weight;
|
|
pmat2_red[part].v[1].y += idx1 * idx1 * red_weight;
|
|
|
|
pmat2_green[part].v[0].x += om_idx1 * om_idx1 * green_weight;
|
|
pmat2_green[part].v[0].y += idx1 * om_idx1 * green_weight;
|
|
pmat2_green[part].v[1].x += idx1 * om_idx1 * green_weight;
|
|
pmat2_green[part].v[1].y += idx1 * idx1 * green_weight;
|
|
|
|
pmat2_blue[part].v[0].x += om_idx1 * om_idx1 * blue_weight;
|
|
pmat2_blue[part].v[0].y += idx1 * om_idx1 * blue_weight;
|
|
pmat2_blue[part].v[1].x += idx1 * om_idx1 * blue_weight;
|
|
pmat2_blue[part].v[1].y += idx1 * idx1 * blue_weight;
|
|
|
|
pmat2_alpha[part].v[0].x += om_idx1 * om_idx1 * alpha_weight;
|
|
pmat2_alpha[part].v[0].y += idx1 * om_idx1 * alpha_weight;
|
|
pmat2_alpha[part].v[1].x += idx1 * om_idx1 * alpha_weight;
|
|
pmat2_alpha[part].v[1].y += idx1 * idx1 * alpha_weight;
|
|
}
|
|
|
|
float red_idx = (plane2_color_component == 0) ? idx1 : idx0;
|
|
float green_idx = (plane2_color_component == 1) ? idx1 : idx0;
|
|
float blue_idx = (plane2_color_component == 2) ? idx1 : idx0;
|
|
float alpha_idx = (plane2_color_component == 3) ? idx1 : idx0;
|
|
|
|
red_vec[part].x += (red_weight * r) * (1.0f - red_idx);
|
|
green_vec[part].x += (green_weight * g) * (1.0f - green_idx);
|
|
blue_vec[part].x += (blue_weight * b) * (1.0f - blue_idx);
|
|
alpha_vec[part].x += (alpha_weight * a) * (1.0f - alpha_idx);
|
|
scale_vec[part].x += (scale_weight * scale) * om_idx0;
|
|
|
|
red_vec[part].y += (red_weight * r) * red_idx;
|
|
green_vec[part].y += (green_weight * g) * green_idx;
|
|
blue_vec[part].y += (blue_weight * b) * blue_idx;
|
|
alpha_vec[part].y += (alpha_weight * a) * alpha_idx;
|
|
scale_vec[part].y += (scale_weight * scale) * idx0;
|
|
|
|
red_weight_weight_sum[part] += red_weight * red_idx;
|
|
green_weight_weight_sum[part] += green_weight * green_idx;
|
|
blue_weight_weight_sum[part] += blue_weight * blue_idx;
|
|
|
|
psum[part] += red_weight * red_idx * red_idx + green_weight * green_idx * green_idx + blue_weight * blue_idx * blue_idx;
|
|
}
|
|
|
|
// calculations specific to mode #7, the HDR RGB-scale mode.
|
|
float red_sum[4];
|
|
float green_sum[4];
|
|
float blue_sum[4];
|
|
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
red_sum[i] = red_vec[i].x + red_vec[i].y;
|
|
green_sum[i] = green_vec[i].x + green_vec[i].y;
|
|
blue_sum[i] = blue_vec[i].x + blue_vec[i].y;
|
|
qsum[i] = red_vec[i].y + green_vec[i].y + blue_vec[i].y;
|
|
}
|
|
|
|
// RGB+offset for HDR endpoint mode #7
|
|
int rgbo_fail[4];
|
|
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
mat4 mod7_mat;
|
|
mod7_mat.v[0] = float4(red_weight_sum[i], 0.0f, 0.0f, red_weight_weight_sum[i]);
|
|
mod7_mat.v[1] = float4(0.0f, green_weight_sum[i], 0.0f, green_weight_weight_sum[i]);
|
|
mod7_mat.v[2] = float4(0.0f, 0.0f, blue_weight_sum[i], blue_weight_weight_sum[i]);
|
|
mod7_mat.v[3] = float4(red_weight_weight_sum[i], green_weight_weight_sum[i], blue_weight_weight_sum[i], psum[i]);
|
|
|
|
float4 vect = float4(red_sum[i], green_sum[i], blue_sum[i], qsum[i]);
|
|
|
|
#ifdef DEBUG_CAPTURE_NAN
|
|
fedisableexcept(FE_DIVBYZERO | FE_INVALID);
|
|
#endif
|
|
|
|
mat4 rmod7_mat = invert(mod7_mat);
|
|
float4 rgbovec = transform(rmod7_mat, vect);
|
|
rgbo_vectors[i] = rgbovec;
|
|
|
|
// we will occasionally get a failure due to a singular matrix. Record whether such a
|
|
// failure has taken place; if it did, compute rgbo_vectors[] with a different method
|
|
// later on.
|
|
float chkval = dot(rgbovec, rgbovec);
|
|
rgbo_fail[i] = chkval != chkval;
|
|
|
|
#ifdef DEBUG_CAPTURE_NAN
|
|
feenableexcept(FE_DIVBYZERO | FE_INVALID);
|
|
#endif
|
|
}
|
|
|
|
// initialize the luminance and scale vectors with a reasonable default,
|
|
// just in case the subsequent calculation blows up.
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
#ifdef DEBUG_CAPTURE_NAN
|
|
fedisableexcept(FE_DIVBYZERO | FE_INVALID);
|
|
#endif
|
|
|
|
float scalediv = scale_min[i] / scale_max[i];
|
|
if (!(scalediv > 0.0f))
|
|
scalediv = 0.0f; // set to zero if scalediv is zero, negative, or NaN.
|
|
|
|
#ifdef DEBUG_CAPTURE_NAN
|
|
feenableexcept(FE_DIVBYZERO | FE_INVALID);
|
|
#endif
|
|
|
|
if (scalediv > 1.0f)
|
|
scalediv = 1.0f;
|
|
|
|
rgbs_vectors[i] = float4(scale_directions[i] * scale_max[i], scalediv);
|
|
}
|
|
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
if (wmin1[i] >= wmax1[i] * 0.999)
|
|
{
|
|
// if all weights in the partition were equal, then just take average
|
|
// of all colors in the partition and use that as both endpoint colors.
|
|
float4 avg = float4((red_vec[i].x + red_vec[i].y) / red_weight_sum[i],
|
|
(green_vec[i].x + green_vec[i].y) / green_weight_sum[i],
|
|
(blue_vec[i].x + blue_vec[i].y) / blue_weight_sum[i],
|
|
(alpha_vec[i].x + alpha_vec[i].y) / alpha_weight_sum[i]);
|
|
|
|
if (plane2_color_component != 0 && avg.x == avg.x)
|
|
ep->endpt0[i].x = ep->endpt1[i].x = avg.x;
|
|
if (plane2_color_component != 1 && avg.y == avg.y)
|
|
ep->endpt0[i].y = ep->endpt1[i].y = avg.y;
|
|
if (plane2_color_component != 2 && avg.z == avg.z)
|
|
ep->endpt0[i].z = ep->endpt1[i].z = avg.z;
|
|
if (plane2_color_component != 3 && avg.w == avg.w)
|
|
ep->endpt0[i].w = ep->endpt1[i].w = avg.w;
|
|
|
|
rgbs_vectors[i] = float4(scale_directions[i] * scale_max[i], 1.0f);
|
|
}
|
|
else
|
|
{
|
|
// otherwise, complete the analytic calculation of ideal-endpoint-values
|
|
// for the given set of texel weights and pixel colors.
|
|
|
|
#ifdef DEBUG_CAPTURE_NAN
|
|
fedisableexcept(FE_DIVBYZERO | FE_INVALID);
|
|
#endif
|
|
|
|
float red_det1 = determinant(pmat1_red[i]);
|
|
float green_det1 = determinant(pmat1_green[i]);
|
|
float blue_det1 = determinant(pmat1_blue[i]);
|
|
float alpha_det1 = determinant(pmat1_alpha[i]);
|
|
float scale_det1 = determinant(pmat1_scale[i]);
|
|
|
|
float red_mss1 = mat_square_sum(pmat1_red[i]);
|
|
float green_mss1 = mat_square_sum(pmat1_green[i]);
|
|
float blue_mss1 = mat_square_sum(pmat1_blue[i]);
|
|
float alpha_mss1 = mat_square_sum(pmat1_alpha[i]);
|
|
float scale_mss1 = mat_square_sum(pmat1_scale[i]);
|
|
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
printf("Plane-1 partition %d determinants: R=%g G=%g B=%g A=%g S=%g\n", i, red_det1, green_det1, blue_det1, alpha_det1, scale_det1);
|
|
#endif
|
|
|
|
pmat1_red[i] = invert(pmat1_red[i]);
|
|
pmat1_green[i] = invert(pmat1_green[i]);
|
|
pmat1_blue[i] = invert(pmat1_blue[i]);
|
|
pmat1_alpha[i] = invert(pmat1_alpha[i]);
|
|
pmat1_scale[i] = invert(pmat1_scale[i]);
|
|
|
|
float4 ep0 = float4(dot(pmat1_red[i].v[0], red_vec[i]),
|
|
dot(pmat1_green[i].v[0], green_vec[i]),
|
|
dot(pmat1_blue[i].v[0], blue_vec[i]),
|
|
dot(pmat1_alpha[i].v[0], alpha_vec[i]));
|
|
float4 ep1 = float4(dot(pmat1_red[i].v[1], red_vec[i]),
|
|
dot(pmat1_green[i].v[1], green_vec[i]),
|
|
dot(pmat1_blue[i].v[1], blue_vec[i]),
|
|
dot(pmat1_alpha[i].v[1], alpha_vec[i]));
|
|
|
|
float scale_ep0 = dot(pmat1_scale[i].v[0], scale_vec[i]);
|
|
float scale_ep1 = dot(pmat1_scale[i].v[1], scale_vec[i]);
|
|
|
|
if (plane2_color_component != 0 && fabs(red_det1) > (red_mss1 * 1e-4f) && ep0.x == ep0.x && ep1.x == ep1.x)
|
|
{
|
|
ep->endpt0[i].x = ep0.x;
|
|
ep->endpt1[i].x = ep1.x;
|
|
}
|
|
if (plane2_color_component != 1 && fabs(green_det1) > (green_mss1 * 1e-4f) && ep0.y == ep0.y && ep1.y == ep1.y)
|
|
{
|
|
ep->endpt0[i].y = ep0.y;
|
|
ep->endpt1[i].y = ep1.y;
|
|
}
|
|
if (plane2_color_component != 2 && fabs(blue_det1) > (blue_mss1 * 1e-4f) && ep0.z == ep0.z && ep1.z == ep1.z)
|
|
{
|
|
ep->endpt0[i].z = ep0.z;
|
|
ep->endpt1[i].z = ep1.z;
|
|
}
|
|
if (plane2_color_component != 3 && fabs(alpha_det1) > (alpha_mss1 * 1e-4f) && ep0.w == ep0.w && ep1.w == ep1.w)
|
|
{
|
|
ep->endpt0[i].w = ep0.w;
|
|
ep->endpt1[i].w = ep1.w;
|
|
}
|
|
|
|
if (fabs(scale_det1) > (scale_mss1 * 1e-4f) && scale_ep0 == scale_ep0 && scale_ep1 == scale_ep1 && scale_ep0 < scale_ep1)
|
|
{
|
|
float scalediv = scale_ep0 / scale_ep1;
|
|
rgbs_vectors[i] = float4(scale_directions[i] * scale_ep1, scalediv);
|
|
}
|
|
|
|
#ifdef DEBUG_CAPTURE_NAN
|
|
feenableexcept(FE_DIVBYZERO | FE_INVALID);
|
|
#endif
|
|
}
|
|
|
|
if (plane2_weight_set8)
|
|
{
|
|
if (wmin2[i] >= wmax2[i] * 0.999)
|
|
{
|
|
// if all weights in the partition were equal, then just take average
|
|
// of all colors in the partition and use that as both endpoint colors.
|
|
float4 avg = float4((red_vec[i].x + red_vec[i].y) / red_weight_sum[i],
|
|
(green_vec[i].x + green_vec[i].y) / green_weight_sum[i],
|
|
(blue_vec[i].x + blue_vec[i].y) / blue_weight_sum[i],
|
|
(alpha_vec[i].x + alpha_vec[i].y) / alpha_weight_sum[i]);
|
|
|
|
if (plane2_color_component == 0 && avg.x == avg.x)
|
|
ep->endpt0[i].x = ep->endpt1[i].x = avg.x;
|
|
if (plane2_color_component == 1 && avg.y == avg.y)
|
|
ep->endpt0[i].y = ep->endpt1[i].y = avg.y;
|
|
if (plane2_color_component == 2 && avg.z == avg.z)
|
|
ep->endpt0[i].z = ep->endpt1[i].z = avg.z;
|
|
if (plane2_color_component == 3 && avg.w == avg.w)
|
|
ep->endpt0[i].w = ep->endpt1[i].w = avg.w;
|
|
}
|
|
else
|
|
{
|
|
#ifdef DEBUG_CAPTURE_NAN
|
|
fedisableexcept(FE_DIVBYZERO | FE_INVALID);
|
|
#endif
|
|
|
|
// otherwise, complete the analytic calculation of ideal-endpoint-values
|
|
// for the given set of texel weights and pixel colors.
|
|
float red_det2 = determinant(pmat2_red[i]);
|
|
float green_det2 = determinant(pmat2_green[i]);
|
|
float blue_det2 = determinant(pmat2_blue[i]);
|
|
float alpha_det2 = determinant(pmat2_alpha[i]);
|
|
|
|
float red_mss2 = mat_square_sum(pmat2_red[i]);
|
|
float green_mss2 = mat_square_sum(pmat2_green[i]);
|
|
float blue_mss2 = mat_square_sum(pmat2_blue[i]);
|
|
float alpha_mss2 = mat_square_sum(pmat2_alpha[i]);
|
|
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
printf("Plane-2 partition %d determinants: R=%g G=%g B=%g A=%g\n", i, red_det2, green_det2, blue_det2, alpha_det2);
|
|
#endif
|
|
|
|
pmat2_red[i] = invert(pmat2_red[i]);
|
|
pmat2_green[i] = invert(pmat2_green[i]);
|
|
pmat2_blue[i] = invert(pmat2_blue[i]);
|
|
pmat2_alpha[i] = invert(pmat2_alpha[i]);
|
|
float4 ep0 = float4(dot(pmat2_red[i].v[0], red_vec[i]),
|
|
dot(pmat2_green[i].v[0], green_vec[i]),
|
|
dot(pmat2_blue[i].v[0], blue_vec[i]),
|
|
dot(pmat2_alpha[i].v[0], alpha_vec[i]));
|
|
float4 ep1 = float4(dot(pmat2_red[i].v[1], red_vec[i]),
|
|
dot(pmat2_green[i].v[1], green_vec[i]),
|
|
dot(pmat2_blue[i].v[1], blue_vec[i]),
|
|
dot(pmat2_alpha[i].v[1], alpha_vec[i]));
|
|
|
|
if (plane2_color_component == 0 && fabs(red_det2) > (red_mss2 * 1e-4f) && ep0.x == ep0.x && ep1.x == ep1.x)
|
|
{
|
|
ep->endpt0[i].x = ep0.x;
|
|
ep->endpt1[i].x = ep1.x;
|
|
}
|
|
|
|
if (plane2_color_component == 1 && fabs(green_det2) > (green_mss2 * 1e-4f) && ep0.y == ep0.y && ep1.y == ep1.y)
|
|
{
|
|
ep->endpt0[i].y = ep0.y;
|
|
ep->endpt1[i].y = ep1.y;
|
|
}
|
|
|
|
if (plane2_color_component == 2 && fabs(blue_det2) > (blue_mss2 * 1e-4f) && ep0.z == ep0.z && ep1.z == ep1.z)
|
|
{
|
|
ep->endpt0[i].z = ep0.z;
|
|
ep->endpt1[i].z = ep1.z;
|
|
}
|
|
|
|
if (plane2_color_component == 3 && fabs(alpha_det2) > (alpha_mss2 * 1e-4f) && ep0.w == ep0.w && ep1.w == ep1.w)
|
|
{
|
|
ep->endpt0[i].w = ep0.w;
|
|
ep->endpt1[i].w = ep1.w;
|
|
}
|
|
|
|
#ifdef DEBUG_CAPTURE_NAN
|
|
feenableexcept(FE_DIVBYZERO | FE_INVALID);
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
|
|
// if the calculation of an RGB-offset vector failed, try to compute
|
|
// a somewhat-sensible value anyway
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
if (rgbo_fail[i])
|
|
{
|
|
float4 v0 = ep->endpt0[i];
|
|
float4 v1 = ep->endpt1[i];
|
|
float avgdif = dot(v1.xyz - v0.xyz, float3(1, 1, 1)) * (1.0f / 3.0f);
|
|
if (avgdif <= 0.0f)
|
|
avgdif = 0.0f;
|
|
float4 avg = (v0 + v1) * 0.5f;
|
|
float4 ep0 = avg - float4(avgdif, avgdif, avgdif, avgdif) * 0.5f;
|
|
|
|
rgbo_vectors[i] = float4(ep0.xyz, avgdif);
|
|
}
|
|
}
|
|
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
{
|
|
printf("Post-adjustment endpoint-colors: \n");
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
printf("%d Low <%g %g %g %g>\n", i, ep->endpt0[i].x, ep->endpt0[i].y, ep->endpt0[i].z, ep->endpt0[i].w);
|
|
printf("%d High <%g %g %g %g>\n", i, ep->endpt1[i].x, ep->endpt1[i].y, ep->endpt1[i].z, ep->endpt1[i].w);
|
|
printf("%d RGBS: <%g %g %g %g>\n", i, rgbs_vectors[i].x, rgbs_vectors[i].y, rgbs_vectors[i].z, rgbs_vectors[i].w);
|
|
printf("%d RGBO <%g %g %g %g>\n", i, rgbo_vectors[i].x, rgbo_vectors[i].y, rgbo_vectors[i].z, rgbo_vectors[i].w);
|
|
}
|
|
}
|
|
#endif
|
|
}
|