mirror of https://github.com/axmolengine/axmol.git
836 lines
31 KiB
C++
836 lines
31 KiB
C++
// ----------------------------------------------------------------------------
|
|
// This confidential and proprietary software may be used only as authorised
|
|
// by a licensing agreement from Arm Limited.
|
|
// (C) COPYRIGHT 2011-2019 Arm Limited, ALL RIGHTS RESERVED
|
|
// The entire notice above must be reproduced on all authorised copies and
|
|
// copies may only be made to the extent permitted by a licensing agreement
|
|
// from Arm Limited.
|
|
// ----------------------------------------------------------------------------
|
|
|
|
/**
|
|
* @brief Functions for finding best partition for a block.
|
|
*
|
|
* Major step 1:
|
|
* - find best partitioning assuming uncorrelated colors
|
|
* - find best partitioning assuming RGBS color representation
|
|
*
|
|
* Finding best partitioning for a block:
|
|
*
|
|
* foreach available partitioning:
|
|
* - compute mean-color-value and dominant direction.
|
|
* - this defines two lines, both of which go through the mean-color-value.
|
|
* - one line has a direction defined by the dominant direction; this is used
|
|
* to assess the error from using an uncorrelated color representation.
|
|
* - the other line goes through (0,0,0,1) and is used to assess the error from
|
|
* using an RGBS color representation.
|
|
* - we then compute, as a sum across the block, the squared-errors that result
|
|
* from using the dominant-direction-lines and the squared-errors that result
|
|
* from using the 0001-lines.
|
|
*
|
|
* Partition table representation:
|
|
* We have 3 tables, each with 1024 partitions
|
|
* (these correspond to the 3x128 hardware partitions crossed with all the
|
|
* partition-transform modes in the hardware.)
|
|
*
|
|
* For each partitioning, we have:
|
|
* * a 4-entry table indicating how many texels there are in each of the 4
|
|
* partitions. this may be from 2 to about 60 or so.
|
|
* * a 64-entry table indicating the partition index of each of the 64 texels
|
|
* in the block. each index may be 0, 1, 2 or 3.
|
|
*
|
|
* each element in the table is an uint8_t indicating partition index (0, 1, 2 or 3)
|
|
*/
|
|
|
|
#include "astc_codec_internals.h"
|
|
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
#include <stdio.h>
|
|
#endif
|
|
|
|
int imageblock_uses_alpha(const imageblock * pb)
|
|
{
|
|
return pb->alpha_max != pb->alpha_min;
|
|
}
|
|
|
|
static void compute_alpha_minmax(int xdim, int ydim, int zdim, const partition_info * pt, const imageblock * blk, const error_weight_block * ewb, float *alpha_min, float *alpha_max)
|
|
{
|
|
int i;
|
|
int partition_count = pt->partition_count;
|
|
|
|
int texels_per_block = xdim * ydim * zdim;
|
|
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
alpha_min[i] = 1e38f;
|
|
alpha_max[i] = -1e38f;
|
|
}
|
|
|
|
for (i = 0; i < texels_per_block; i++)
|
|
{
|
|
if (ewb->texel_weight[i] > 1e-10)
|
|
{
|
|
int partition = pt->partition_of_texel[i];
|
|
float alphaval = blk->work_data[4 * i + 3];
|
|
if (alphaval > alpha_max[partition])
|
|
alpha_max[partition] = alphaval;
|
|
if (alphaval < alpha_min[partition])
|
|
alpha_min[partition] = alphaval;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
if (alpha_min[i] >= alpha_max[i])
|
|
{
|
|
alpha_min[i] = 0;
|
|
alpha_max[i] = 1e-10f;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void compute_rgb_minmax(int xdim,
|
|
int ydim,
|
|
int zdim,
|
|
const partition_info * pt,
|
|
const imageblock * blk, const error_weight_block * ewb, float *red_min, float *red_max, float *green_min, float *green_max, float *blue_min, float *blue_max)
|
|
{
|
|
int i;
|
|
int partition_count = pt->partition_count;
|
|
int texels_per_block = xdim * ydim * zdim;
|
|
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
red_min[i] = 1e38f;
|
|
red_max[i] = -1e38f;
|
|
green_min[i] = 1e38f;
|
|
green_max[i] = -1e38f;
|
|
blue_min[i] = 1e38f;
|
|
blue_max[i] = -1e38f;
|
|
}
|
|
|
|
for (i = 0; i < texels_per_block; i++)
|
|
{
|
|
if (ewb->texel_weight[i] > 1e-10f)
|
|
{
|
|
int partition = pt->partition_of_texel[i];
|
|
float redval = blk->work_data[4 * i];
|
|
float greenval = blk->work_data[4 * i + 1];
|
|
float blueval = blk->work_data[4 * i + 2];
|
|
if (redval > red_max[partition])
|
|
red_max[partition] = redval;
|
|
if (redval < red_min[partition])
|
|
red_min[partition] = redval;
|
|
if (greenval > green_max[partition])
|
|
green_max[partition] = greenval;
|
|
if (greenval < green_min[partition])
|
|
green_min[partition] = greenval;
|
|
if (blueval > blue_max[partition])
|
|
blue_max[partition] = blueval;
|
|
if (blueval < blue_min[partition])
|
|
blue_min[partition] = blueval;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < partition_count; i++)
|
|
{
|
|
if (red_min[i] >= red_max[i])
|
|
{
|
|
red_min[i] = 0.0f;
|
|
red_max[i] = 1e-10f;
|
|
}
|
|
if (green_min[i] >= green_max[i])
|
|
{
|
|
green_min[i] = 0.0f;
|
|
green_max[i] = 1e-10f;
|
|
}
|
|
if (blue_min[i] >= blue_max[i])
|
|
{
|
|
blue_min[i] = 0.0f;
|
|
blue_max[i] = 1e-10f;
|
|
}
|
|
}
|
|
}
|
|
|
|
void compute_partition_error_color_weightings(int xdim, int ydim, int zdim, const error_weight_block * ewb, const partition_info * pi, float4 error_weightings[4], float4 color_scalefactors[4])
|
|
{
|
|
int i;
|
|
int texels_per_block = xdim * ydim * zdim;
|
|
int pcnt = pi->partition_count;
|
|
|
|
for (i = 0; i < pcnt; i++)
|
|
error_weightings[i] = float4(1e-12f, 1e-12f, 1e-12f, 1e-12f);
|
|
|
|
for (i = 0; i < texels_per_block; i++)
|
|
{
|
|
int part = pi->partition_of_texel[i];
|
|
error_weightings[part] = error_weightings[part] + ewb->error_weights[i];
|
|
}
|
|
|
|
for (i = 0; i < pcnt; i++)
|
|
{
|
|
error_weightings[i] = error_weightings[i] * (1.0f / pi->texels_per_partition[i]);
|
|
}
|
|
|
|
for (i = 0; i < pcnt; i++)
|
|
{
|
|
color_scalefactors[i].x = sqrt(error_weightings[i].x);
|
|
color_scalefactors[i].y = sqrt(error_weightings[i].y);
|
|
color_scalefactors[i].z = sqrt(error_weightings[i].z);
|
|
color_scalefactors[i].w = sqrt(error_weightings[i].w);
|
|
}
|
|
}
|
|
|
|
/* main function to identify the best partitioning for a given number of texels */
|
|
void find_best_partitionings(int partition_search_limit, int xdim, int ydim, int zdim, int partition_count,
|
|
const imageblock * pb, const error_weight_block * ewb, int candidates_to_return,
|
|
// best partitions to use if the endpoint colors are assumed to be uncorrelated
|
|
int *best_partitions_uncorrelated,
|
|
// best partitions to use if the endpoint colors have the same chroma
|
|
int *best_partitions_samechroma,
|
|
// best partitions to use if using dual plane of weights
|
|
int *best_partitions_dual_weight_planes)
|
|
{
|
|
int i, j;
|
|
|
|
int texels_per_block = xdim * ydim * zdim;
|
|
|
|
// constant used to estimate quantization error for a given partitioning;
|
|
// the optimal value for this constant depends on bitrate.
|
|
// These constants have been determined empirically.
|
|
|
|
float weight_imprecision_estim = 100;
|
|
|
|
if (texels_per_block <= 20)
|
|
weight_imprecision_estim = 0.03f;
|
|
else if (texels_per_block <= 31)
|
|
weight_imprecision_estim = 0.04f;
|
|
else if (texels_per_block <= 41)
|
|
weight_imprecision_estim = 0.05f;
|
|
else
|
|
weight_imprecision_estim = 0.055f;
|
|
|
|
int partition_sequence[PARTITION_COUNT];
|
|
|
|
kmeans_compute_partition_ordering(xdim, ydim, zdim, partition_count, pb, partition_sequence);
|
|
|
|
float weight_imprecision_estim_squared = weight_imprecision_estim * weight_imprecision_estim;
|
|
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
printf("weight_imprecision_estim = %g\n", weight_imprecision_estim);
|
|
#endif
|
|
|
|
int uses_alpha = imageblock_uses_alpha(pb);
|
|
|
|
const partition_info *ptab = get_partition_table(xdim, ydim, zdim, partition_count);
|
|
|
|
// partitioning errors assuming uncorrelated-chrominance endpoints
|
|
float uncorr_errors[PARTITION_COUNT];
|
|
// partitioning errors assuming same-chrominance endpoints
|
|
float samechroma_errors[PARTITION_COUNT];
|
|
|
|
// partitioning errors assuming that one of the color channels
|
|
// is uncorrelated from all the other ones
|
|
float separate_errors[4 * PARTITION_COUNT];
|
|
|
|
float *separate_red_errors = separate_errors;
|
|
float *separate_green_errors = separate_errors + PARTITION_COUNT;
|
|
float *separate_blue_errors = separate_errors + 2 * PARTITION_COUNT;
|
|
float *separate_alpha_errors = separate_errors + 3 * PARTITION_COUNT;
|
|
|
|
int defacto_search_limit = PARTITION_COUNT - 1;
|
|
|
|
if (uses_alpha)
|
|
{
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
printf("Partition testing with alpha, %d partitions\n\n", partition_count);
|
|
#endif
|
|
|
|
for (i = 0; i < PARTITION_COUNT; i++)
|
|
{
|
|
int partition = partition_sequence[i];
|
|
int bk_partition_count = ptab[partition].partition_count;
|
|
|
|
if (bk_partition_count < partition_count)
|
|
{
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
printf("Partitioning %d-%d: invalid\n", partition_count, partition);
|
|
#endif
|
|
|
|
uncorr_errors[i] = 1e35f;
|
|
samechroma_errors[i] = 1e35f;
|
|
separate_red_errors[i] = 1e35f;
|
|
separate_green_errors[i] = 1e35f;
|
|
separate_blue_errors[i] = 1e35f;
|
|
separate_alpha_errors[i] = 1e35f;
|
|
continue;
|
|
}
|
|
// the sentinel value for partitions above the search limit must be smaller
|
|
// than the sentinel value for invalid partitions
|
|
if (i >= partition_search_limit)
|
|
{
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
printf("Partitioning %d-%d: excluded from testing\n", partition_count, partition);
|
|
#endif
|
|
|
|
defacto_search_limit = i;
|
|
|
|
uncorr_errors[i] = 1e34f;
|
|
samechroma_errors[i] = 1e34f;
|
|
separate_red_errors[i] = 1e34f;
|
|
separate_green_errors[i] = 1e34f;
|
|
separate_blue_errors[i] = 1e34f;
|
|
separate_alpha_errors[i] = 1e34f;
|
|
break;
|
|
}
|
|
|
|
// compute the weighting to give to each color channel
|
|
// in each partition.
|
|
float4 error_weightings[4];
|
|
float4 color_scalefactors[4];
|
|
float4 inverse_color_scalefactors[4];
|
|
compute_partition_error_color_weightings(xdim, ydim, zdim, ewb, ptab + partition, error_weightings, color_scalefactors);
|
|
|
|
for (j = 0; j < partition_count; j++)
|
|
{
|
|
inverse_color_scalefactors[j].x = 1.0f / MAX(color_scalefactors[j].x, 1e-7f);
|
|
inverse_color_scalefactors[j].y = 1.0f / MAX(color_scalefactors[j].y, 1e-7f);
|
|
inverse_color_scalefactors[j].z = 1.0f / MAX(color_scalefactors[j].z, 1e-7f);
|
|
inverse_color_scalefactors[j].w = 1.0f / MAX(color_scalefactors[j].w, 1e-7f);
|
|
}
|
|
|
|
float4 averages[4];
|
|
float4 directions_rgba[4];
|
|
float3 directions_gba[4];
|
|
float3 directions_rba[4];
|
|
float3 directions_rga[4];
|
|
float3 directions_rgb[4];
|
|
|
|
compute_averages_and_directions_rgba(ptab + partition, pb, ewb, color_scalefactors, averages, directions_rgba, directions_gba, directions_rba, directions_rga, directions_rgb);
|
|
|
|
line4 uncorr_lines[4];
|
|
line4 samechroma_lines[4];
|
|
line3 separate_red_lines[4];
|
|
line3 separate_green_lines[4];
|
|
line3 separate_blue_lines[4];
|
|
line3 separate_alpha_lines[4];
|
|
|
|
processed_line4 proc_uncorr_lines[4];
|
|
processed_line4 proc_samechroma_lines[4];
|
|
processed_line3 proc_separate_red_lines[4];
|
|
processed_line3 proc_separate_green_lines[4];
|
|
processed_line3 proc_separate_blue_lines[4];
|
|
processed_line3 proc_separate_alpha_lines[4];
|
|
|
|
float uncorr_linelengths[4];
|
|
float samechroma_linelengths[4];
|
|
float separate_red_linelengths[4];
|
|
float separate_green_linelengths[4];
|
|
float separate_blue_linelengths[4];
|
|
float separate_alpha_linelengths[4];
|
|
|
|
for (j = 0; j < partition_count; j++)
|
|
{
|
|
uncorr_lines[j].a = averages[j];
|
|
if (dot(directions_rgba[j], directions_rgba[j]) == 0.0f)
|
|
uncorr_lines[j].b = normalize(float4(1, 1, 1, 1));
|
|
else
|
|
uncorr_lines[j].b = normalize(directions_rgba[j]);
|
|
|
|
proc_uncorr_lines[j].amod = (uncorr_lines[j].a - uncorr_lines[j].b * dot(uncorr_lines[j].a, uncorr_lines[j].b)) * inverse_color_scalefactors[j];
|
|
proc_uncorr_lines[j].bs = (uncorr_lines[j].b * color_scalefactors[j]);
|
|
proc_uncorr_lines[j].bis = (uncorr_lines[j].b * inverse_color_scalefactors[j]);
|
|
|
|
samechroma_lines[j].a = float4(0, 0, 0, 0);
|
|
if (dot(averages[j], averages[j]) == 0)
|
|
samechroma_lines[j].b = normalize(float4(1, 1, 1, 1));
|
|
else
|
|
samechroma_lines[j].b = normalize(averages[j]);
|
|
|
|
proc_samechroma_lines[j].amod = (samechroma_lines[j].a - samechroma_lines[j].b * dot(samechroma_lines[j].a, samechroma_lines[j].b)) * inverse_color_scalefactors[j];
|
|
proc_samechroma_lines[j].bs = (samechroma_lines[j].b * color_scalefactors[j]);
|
|
proc_samechroma_lines[j].bis = (samechroma_lines[j].b * inverse_color_scalefactors[j]);
|
|
|
|
separate_red_lines[j].a = averages[j].yzw;
|
|
if (dot(directions_gba[j], directions_gba[j]) == 0.0f)
|
|
separate_red_lines[j].b = normalize(float3(1, 1, 1));
|
|
else
|
|
separate_red_lines[j].b = normalize(directions_gba[j]);
|
|
|
|
separate_green_lines[j].a = averages[j].xzw;
|
|
if (dot(directions_rba[j], directions_rba[j]) == 0.0f)
|
|
separate_green_lines[j].b = normalize(float3(1, 1, 1));
|
|
else
|
|
separate_green_lines[j].b = normalize(directions_rba[j]);
|
|
|
|
separate_blue_lines[j].a = averages[j].xyw;
|
|
if (dot(directions_rga[j], directions_rga[j]) == 0.0f)
|
|
separate_blue_lines[j].b = normalize(float3(1, 1, 1));
|
|
else
|
|
separate_blue_lines[j].b = normalize(directions_rga[j]);
|
|
|
|
separate_alpha_lines[j].a = averages[j].xyz;
|
|
if (dot(directions_rgb[j], directions_rgb[j]) == 0.0f)
|
|
separate_alpha_lines[j].b = normalize(float3(1, 1, 1));
|
|
else
|
|
separate_alpha_lines[j].b = normalize(directions_rgb[j]);
|
|
|
|
proc_separate_red_lines[j].amod = (separate_red_lines[j].a - separate_red_lines[j].b * dot(separate_red_lines[j].a, separate_red_lines[j].b)) * inverse_color_scalefactors[j].yzw;
|
|
proc_separate_red_lines[j].bs = (separate_red_lines[j].b * color_scalefactors[j].yzw);
|
|
proc_separate_red_lines[j].bis = (separate_red_lines[j].b * inverse_color_scalefactors[j].yzw);
|
|
|
|
proc_separate_green_lines[j].amod =
|
|
(separate_green_lines[j].a - separate_green_lines[j].b * dot(separate_green_lines[j].a, separate_green_lines[j].b)) * inverse_color_scalefactors[j].xzw;
|
|
proc_separate_green_lines[j].bs = (separate_green_lines[j].b * color_scalefactors[j].xzw);
|
|
proc_separate_green_lines[j].bis = (separate_green_lines[j].b * inverse_color_scalefactors[j].xzw);
|
|
|
|
proc_separate_blue_lines[j].amod = (separate_blue_lines[j].a - separate_blue_lines[j].b * dot(separate_blue_lines[j].a, separate_blue_lines[j].b)) * inverse_color_scalefactors[j].xyw;
|
|
proc_separate_blue_lines[j].bs = (separate_blue_lines[j].b * color_scalefactors[j].xyw);
|
|
proc_separate_blue_lines[j].bis = (separate_blue_lines[j].b * inverse_color_scalefactors[j].xyw);
|
|
|
|
proc_separate_alpha_lines[j].amod =
|
|
(separate_alpha_lines[j].a - separate_alpha_lines[j].b * dot(separate_alpha_lines[j].a, separate_alpha_lines[j].b)) * inverse_color_scalefactors[j].xyz;
|
|
proc_separate_alpha_lines[j].bs = (separate_alpha_lines[j].b * color_scalefactors[j].xyz);
|
|
proc_separate_alpha_lines[j].bis = (separate_alpha_lines[j].b * inverse_color_scalefactors[j].xyz);
|
|
}
|
|
|
|
float uncorr_error = compute_error_squared_rgba(ptab + partition,
|
|
pb,
|
|
ewb,
|
|
proc_uncorr_lines,
|
|
uncorr_linelengths);
|
|
|
|
float samechroma_error = compute_error_squared_rgba(ptab + partition,
|
|
pb,
|
|
ewb,
|
|
proc_samechroma_lines,
|
|
samechroma_linelengths);
|
|
|
|
|
|
float separate_red_error = compute_error_squared_gba(ptab + partition,
|
|
pb,
|
|
ewb,
|
|
proc_separate_red_lines,
|
|
separate_red_linelengths);
|
|
|
|
float separate_green_error = compute_error_squared_rba(ptab + partition,
|
|
pb,
|
|
ewb,
|
|
proc_separate_green_lines,
|
|
separate_green_linelengths);
|
|
|
|
float separate_blue_error = compute_error_squared_rga(ptab + partition,
|
|
pb,
|
|
ewb,
|
|
proc_separate_blue_lines,
|
|
separate_blue_linelengths);
|
|
|
|
float separate_alpha_error = compute_error_squared_rgb(ptab + partition,
|
|
pb,
|
|
ewb,
|
|
proc_separate_alpha_lines,
|
|
separate_alpha_linelengths);
|
|
|
|
// compute minimum & maximum alpha values in each partition
|
|
float red_min[4], red_max[4];
|
|
float green_min[4], green_max[4];
|
|
float blue_min[4], blue_max[4];
|
|
float alpha_min[4], alpha_max[4];
|
|
compute_alpha_minmax(xdim, ydim, zdim, ptab + partition, pb, ewb, alpha_min, alpha_max);
|
|
|
|
compute_rgb_minmax(xdim, ydim, zdim, ptab + partition, pb, ewb, red_min, red_max, green_min, green_max, blue_min, blue_max);
|
|
|
|
/*
|
|
Compute an estimate of error introduced by weight quantization imprecision.
|
|
This error is computed as follows, for each partition
|
|
1: compute the principal-axis vector (full length) in error-space
|
|
2: convert the principal-axis vector to regular RGB-space
|
|
3: scale the vector by a constant that estimates average quantization error
|
|
4: for each texel, square the vector, then do a dot-product with the texel's error weight;
|
|
sum up the results across all texels.
|
|
4(optimized): square the vector once, then do a dot-product with the average texel error,
|
|
then multiply by the number of texels.
|
|
*/
|
|
|
|
for (j = 0; j < partition_count; j++)
|
|
{
|
|
float tpp = (float)(ptab[partition].texels_per_partition[j]);
|
|
|
|
float4 ics = inverse_color_scalefactors[j];
|
|
float4 error_weights = error_weightings[j] * (tpp * weight_imprecision_estim_squared);
|
|
|
|
float4 uncorr_vector = (uncorr_lines[j].b * uncorr_linelengths[j]) * ics;
|
|
float4 samechroma_vector = (samechroma_lines[j].b * samechroma_linelengths[j]) * ics;
|
|
float3 separate_red_vector = (separate_red_lines[j].b * separate_red_linelengths[j]) * ics.yzw;
|
|
float3 separate_green_vector = (separate_green_lines[j].b * separate_green_linelengths[j]) * ics.xzw;
|
|
float3 separate_blue_vector = (separate_blue_lines[j].b * separate_blue_linelengths[j]) * ics.xyw;
|
|
float3 separate_alpha_vector = (separate_alpha_lines[j].b * separate_alpha_linelengths[j]) * ics.xyz;
|
|
|
|
uncorr_vector = uncorr_vector * uncorr_vector;
|
|
samechroma_vector = samechroma_vector * samechroma_vector;
|
|
separate_red_vector = separate_red_vector * separate_red_vector;
|
|
separate_green_vector = separate_green_vector * separate_green_vector;
|
|
separate_blue_vector = separate_blue_vector * separate_blue_vector;
|
|
separate_alpha_vector = separate_alpha_vector * separate_alpha_vector;
|
|
|
|
uncorr_error += dot(uncorr_vector, error_weights);
|
|
samechroma_error += dot(samechroma_vector, error_weights);
|
|
separate_red_error += dot(separate_red_vector, error_weights.yzw);
|
|
separate_green_error += dot(separate_green_vector, error_weights.xzw);
|
|
separate_blue_error += dot(separate_blue_vector, error_weights.xyw);
|
|
separate_alpha_error += dot(separate_alpha_vector, error_weights.xyz);
|
|
|
|
float red_scalar = (red_max[j] - red_min[j]);
|
|
float green_scalar = (green_max[j] - green_min[j]);
|
|
float blue_scalar = (blue_max[j] - blue_min[j]);
|
|
float alpha_scalar = (alpha_max[j] - alpha_min[j]);
|
|
red_scalar *= red_scalar;
|
|
green_scalar *= green_scalar;
|
|
blue_scalar *= blue_scalar;
|
|
alpha_scalar *= alpha_scalar;
|
|
separate_red_error += red_scalar * error_weights.x;
|
|
separate_green_error += green_scalar * error_weights.y;
|
|
separate_blue_error += blue_scalar * error_weights.z;
|
|
separate_alpha_error += alpha_scalar * error_weights.w;
|
|
}
|
|
|
|
uncorr_errors[i] = uncorr_error;
|
|
samechroma_errors[i] = samechroma_error;
|
|
separate_red_errors[i] = separate_red_error;
|
|
separate_green_errors[i] = separate_green_error;
|
|
separate_blue_errors[i] = separate_blue_error;
|
|
separate_alpha_errors[i] = separate_alpha_error;
|
|
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
printf("Partitioning %d-%d errors: uncorr=%g, samechroma=%g, sep-alpha=%g\n", partition_count, i, uncorr_error, samechroma_error, separate_alpha_error);
|
|
#endif
|
|
}
|
|
}
|
|
else
|
|
{
|
|
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
printf("Partition testing without alpha, %d partitions\n", partition_count);
|
|
#endif
|
|
|
|
for (i = 0; i < PARTITION_COUNT; i++)
|
|
{
|
|
int partition = partition_sequence[i];
|
|
|
|
int bk_partition_count = ptab[partition].partition_count;
|
|
if (bk_partition_count < partition_count)
|
|
{
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
printf("Partitioning %d-%d: invalid\n", partition_count, i);
|
|
#endif
|
|
|
|
uncorr_errors[i] = 1e35f;
|
|
samechroma_errors[i] = 1e35f;
|
|
separate_red_errors[i] = 1e35f;
|
|
separate_green_errors[i] = 1e35f;
|
|
separate_blue_errors[i] = 1e35f;
|
|
continue;
|
|
}
|
|
|
|
// the sentinel value for valid partitions above the search limit must be smaller
|
|
// than the sentinel value for invalid partitions
|
|
if (i >= partition_search_limit)
|
|
{
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
printf(" Partitioning %d-%d: excluded from testing\n", partition_count, partition);
|
|
#endif
|
|
|
|
defacto_search_limit = i;
|
|
uncorr_errors[i] = 1e34f;
|
|
samechroma_errors[i] = 1e34f;
|
|
separate_red_errors[i] = 1e34f;
|
|
separate_green_errors[i] = 1e34f;
|
|
separate_blue_errors[i] = 1e34f;
|
|
break;
|
|
}
|
|
|
|
// compute the weighting to give to each color channel
|
|
// in each partition.
|
|
float4 error_weightings[4];
|
|
float4 color_scalefactors[4];
|
|
float4 inverse_color_scalefactors[4];
|
|
|
|
compute_partition_error_color_weightings(xdim, ydim, zdim, ewb, ptab + partition, error_weightings, color_scalefactors);
|
|
|
|
for (j = 0; j < partition_count; j++)
|
|
{
|
|
inverse_color_scalefactors[j].x = 1.0f / MAX(color_scalefactors[j].x, 1e-7f);
|
|
inverse_color_scalefactors[j].y = 1.0f / MAX(color_scalefactors[j].y, 1e-7f);
|
|
inverse_color_scalefactors[j].z = 1.0f / MAX(color_scalefactors[j].z, 1e-7f);
|
|
inverse_color_scalefactors[j].w = 1.0f / MAX(color_scalefactors[j].w, 1e-7f);
|
|
}
|
|
|
|
float3 averages[4];
|
|
float3 directions_rgb[4];
|
|
float2 directions_rg[4];
|
|
float2 directions_rb[4];
|
|
float2 directions_gb[4];
|
|
|
|
compute_averages_and_directions_rgb(ptab + partition, pb, ewb, color_scalefactors, averages, directions_rgb, directions_rg, directions_rb, directions_gb);
|
|
|
|
line3 uncorr_lines[4];
|
|
line3 samechroma_lines[4];
|
|
line2 separate_red_lines[4];
|
|
line2 separate_green_lines[4];
|
|
line2 separate_blue_lines[4];
|
|
|
|
processed_line3 proc_uncorr_lines[4];
|
|
processed_line3 proc_samechroma_lines[4];
|
|
|
|
processed_line2 proc_separate_red_lines[4];
|
|
processed_line2 proc_separate_green_lines[4];
|
|
processed_line2 proc_separate_blue_lines[4];
|
|
|
|
float uncorr_linelengths[4];
|
|
float samechroma_linelengths[4];
|
|
float separate_red_linelengths[4];
|
|
float separate_green_linelengths[4];
|
|
float separate_blue_linelengths[4];
|
|
|
|
for (j = 0; j < partition_count; j++)
|
|
{
|
|
uncorr_lines[j].a = averages[j];
|
|
if (dot(directions_rgb[j], directions_rgb[j]) == 0.0f)
|
|
uncorr_lines[j].b = normalize(float3(1, 1, 1));
|
|
else
|
|
uncorr_lines[j].b = normalize(directions_rgb[j]);
|
|
|
|
|
|
samechroma_lines[j].a = float3(0, 0, 0);
|
|
|
|
if (dot(averages[j], averages[j]) == 0.0f)
|
|
samechroma_lines[j].b = normalize(float3(1, 1, 1));
|
|
else
|
|
samechroma_lines[j].b = normalize(averages[j]);
|
|
|
|
proc_uncorr_lines[j].amod = (uncorr_lines[j].a - uncorr_lines[j].b * dot(uncorr_lines[j].a, uncorr_lines[j].b)) * inverse_color_scalefactors[j].xyz;
|
|
proc_uncorr_lines[j].bs = (uncorr_lines[j].b * color_scalefactors[j].xyz);
|
|
proc_uncorr_lines[j].bis = (uncorr_lines[j].b * inverse_color_scalefactors[j].xyz);
|
|
|
|
proc_samechroma_lines[j].amod = (samechroma_lines[j].a - samechroma_lines[j].b * dot(samechroma_lines[j].a, samechroma_lines[j].b)) * inverse_color_scalefactors[j].xyz;
|
|
proc_samechroma_lines[j].bs = (samechroma_lines[j].b * color_scalefactors[j].xyz);
|
|
proc_samechroma_lines[j].bis = (samechroma_lines[j].b * inverse_color_scalefactors[j].xyz);
|
|
|
|
separate_red_lines[j].a = averages[j].yz;
|
|
if (dot(directions_gb[j], directions_gb[j]) == 0.0f)
|
|
separate_red_lines[j].b = normalize(float2(1, 1));
|
|
else
|
|
separate_red_lines[j].b = normalize(directions_gb[j]);
|
|
|
|
separate_green_lines[j].a = averages[j].xz;
|
|
if (dot(directions_rb[j], directions_rb[j]) == 0.0f)
|
|
separate_green_lines[j].b = normalize(float2(1, 1));
|
|
else
|
|
separate_green_lines[j].b = normalize(directions_rb[j]);
|
|
|
|
separate_blue_lines[j].a = averages[j].xy;
|
|
if (dot(directions_rg[j], directions_rg[j]) == 0.0f)
|
|
separate_blue_lines[j].b = normalize(float2(1, 1));
|
|
else
|
|
separate_blue_lines[j].b = normalize(directions_rg[j]);
|
|
|
|
proc_separate_red_lines[j].amod = (separate_red_lines[j].a - separate_red_lines[j].b * dot(separate_red_lines[j].a, separate_red_lines[j].b)) * inverse_color_scalefactors[j].yz;
|
|
proc_separate_red_lines[j].bs = (separate_red_lines[j].b * color_scalefactors[j].yz);
|
|
proc_separate_red_lines[j].bis = (separate_red_lines[j].b * inverse_color_scalefactors[j].yz);
|
|
|
|
proc_separate_green_lines[j].amod =
|
|
(separate_green_lines[j].a - separate_green_lines[j].b * dot(separate_green_lines[j].a, separate_green_lines[j].b)) * inverse_color_scalefactors[j].xz;
|
|
proc_separate_green_lines[j].bs = (separate_green_lines[j].b * color_scalefactors[j].xz);
|
|
proc_separate_green_lines[j].bis = (separate_green_lines[j].b * inverse_color_scalefactors[j].xz);
|
|
|
|
proc_separate_blue_lines[j].amod = (separate_blue_lines[j].a - separate_blue_lines[j].b * dot(separate_blue_lines[j].a, separate_blue_lines[j].b)) * inverse_color_scalefactors[j].xy;
|
|
proc_separate_blue_lines[j].bs = (separate_blue_lines[j].b * color_scalefactors[j].xy);
|
|
proc_separate_blue_lines[j].bis = (separate_blue_lines[j].b * inverse_color_scalefactors[j].xy);
|
|
}
|
|
|
|
float uncorr_error = compute_error_squared_rgb(ptab + partition,
|
|
pb,
|
|
ewb,
|
|
proc_uncorr_lines,
|
|
uncorr_linelengths);
|
|
|
|
float samechroma_error = compute_error_squared_rgb(ptab + partition,
|
|
pb,
|
|
ewb,
|
|
proc_samechroma_lines,
|
|
samechroma_linelengths);
|
|
|
|
float separate_red_error = compute_error_squared_gb(ptab + partition,
|
|
pb,
|
|
ewb,
|
|
proc_separate_red_lines,
|
|
separate_red_linelengths);
|
|
|
|
float separate_green_error = compute_error_squared_rb(ptab + partition,
|
|
pb,
|
|
ewb,
|
|
proc_separate_green_lines,
|
|
separate_green_linelengths);
|
|
|
|
float separate_blue_error = compute_error_squared_rg(ptab + partition,
|
|
pb,
|
|
ewb,
|
|
proc_separate_blue_lines,
|
|
separate_blue_linelengths);
|
|
|
|
float red_min[4], red_max[4];
|
|
float green_min[4], green_max[4];
|
|
float blue_min[4], blue_max[4];
|
|
|
|
compute_rgb_minmax(xdim, ydim, zdim, ptab + partition, pb, ewb, red_min, red_max, green_min, green_max, blue_min, blue_max);
|
|
|
|
/*
|
|
compute an estimate of error introduced by weight imprecision.
|
|
This error is computed as follows, for each partition
|
|
1: compute the principal-axis vector (full length) in error-space
|
|
2: convert the principal-axis vector to regular RGB-space
|
|
3: scale the vector by a constant that estimates average quantization error.
|
|
4: for each texel, square the vector, then do a dot-product with the texel's error weight;
|
|
sum up the results across all texels.
|
|
4(optimized): square the vector once, then do a dot-product with the average texel error,
|
|
then multiply by the number of texels.
|
|
*/
|
|
|
|
for (j = 0; j < partition_count; j++)
|
|
{
|
|
float tpp = (float)(ptab[partition].texels_per_partition[j]);
|
|
|
|
float3 ics = inverse_color_scalefactors[j].xyz;
|
|
float3 error_weights = error_weightings[j].xyz * (tpp * weight_imprecision_estim_squared);
|
|
|
|
float3 uncorr_vector = (uncorr_lines[j].b * uncorr_linelengths[j]) * ics;
|
|
float3 samechroma_vector = (samechroma_lines[j].b * samechroma_linelengths[j]) * ics;
|
|
|
|
float2 separate_red_vector = (separate_red_lines[j].b * separate_red_linelengths[j]) * ics.yz;
|
|
float2 separate_green_vector = (separate_green_lines[j].b * separate_green_linelengths[j]) * ics.xz;
|
|
float2 separate_blue_vector = (separate_blue_lines[j].b * separate_blue_linelengths[j]) * ics.xy;
|
|
|
|
uncorr_vector = uncorr_vector * uncorr_vector;
|
|
samechroma_vector = samechroma_vector * samechroma_vector;
|
|
separate_red_vector = separate_red_vector * separate_red_vector;
|
|
separate_green_vector = separate_green_vector * separate_green_vector;
|
|
separate_blue_vector = separate_blue_vector * separate_blue_vector;
|
|
|
|
uncorr_error += dot(uncorr_vector, error_weights);
|
|
samechroma_error += dot(samechroma_vector, error_weights);
|
|
separate_red_error += dot(separate_red_vector, error_weights.yz);
|
|
separate_green_error += dot(separate_green_vector, error_weights.xz);
|
|
separate_blue_error += dot(separate_blue_vector, error_weights.xy);
|
|
|
|
float red_scalar = (red_max[j] - red_min[j]);
|
|
float green_scalar = (green_max[j] - green_min[j]);
|
|
float blue_scalar = (blue_max[j] - blue_min[j]);
|
|
|
|
red_scalar *= red_scalar;
|
|
green_scalar *= green_scalar;
|
|
blue_scalar *= blue_scalar;
|
|
|
|
separate_red_error += red_scalar * error_weights.x;
|
|
separate_green_error += green_scalar * error_weights.y;
|
|
separate_blue_error += blue_scalar * error_weights.z;
|
|
}
|
|
|
|
uncorr_errors[i] = uncorr_error;
|
|
samechroma_errors[i] = samechroma_error;
|
|
|
|
separate_red_errors[i] = separate_red_error;
|
|
separate_green_errors[i] = separate_green_error;
|
|
separate_blue_errors[i] = separate_blue_error;
|
|
|
|
#ifdef DEBUG_PRINT_DIAGNOSTICS
|
|
if (print_diagnostics)
|
|
printf("Partitioning %d-%d errors: uncorr=%f, samechroma=%f, sep-red=%f, sep-green=%f, sep-blue=%f\n",
|
|
partition_count, partition, uncorr_error, samechroma_error, separate_red_error, separate_green_error, separate_blue_error);
|
|
#endif
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < candidates_to_return; i++)
|
|
{
|
|
int best_uncorr_partition = 0;
|
|
int best_samechroma_partition = 0;
|
|
float best_uncorr_error = 1e30f;
|
|
float best_samechroma_error = 1e30f;
|
|
|
|
for (j = 0; j <= defacto_search_limit; j++)
|
|
{
|
|
if (uncorr_errors[j] < best_uncorr_error)
|
|
{
|
|
best_uncorr_partition = j;
|
|
best_uncorr_error = uncorr_errors[j];
|
|
}
|
|
}
|
|
|
|
best_partitions_uncorrelated[i] = partition_sequence[best_uncorr_partition];
|
|
uncorr_errors[best_uncorr_partition] = 1e30f;
|
|
samechroma_errors[best_uncorr_partition] = 1e30f;
|
|
|
|
for (j = 0; j <= defacto_search_limit; j++)
|
|
{
|
|
if (samechroma_errors[j] < best_samechroma_error)
|
|
{
|
|
best_samechroma_partition = j;
|
|
best_samechroma_error = samechroma_errors[j];
|
|
}
|
|
}
|
|
|
|
best_partitions_samechroma[i] = partition_sequence[best_samechroma_partition];
|
|
samechroma_errors[best_samechroma_partition] = 1e30f;
|
|
uncorr_errors[best_samechroma_partition] = 1e30f;
|
|
}
|
|
|
|
for (i = 0; i < 2 * candidates_to_return; i++)
|
|
{
|
|
int best_partition = 0;
|
|
float best_partition_error = 1e30f;
|
|
|
|
for (j = 0; j <= defacto_search_limit; j++)
|
|
{
|
|
// TODO: Review this heuristic
|
|
if (1 || !uses_alpha)
|
|
{
|
|
if (separate_errors[j] < best_partition_error)
|
|
{
|
|
best_partition = j;
|
|
best_partition_error = separate_errors[j];
|
|
}
|
|
if (separate_errors[j + PARTITION_COUNT] < best_partition_error)
|
|
{
|
|
best_partition = j + PARTITION_COUNT;
|
|
best_partition_error = separate_errors[j + PARTITION_COUNT];
|
|
}
|
|
if (separate_errors[j + 2 * PARTITION_COUNT] < best_partition_error)
|
|
{
|
|
best_partition = j + 2 * PARTITION_COUNT;
|
|
best_partition_error = separate_errors[j + 2 * PARTITION_COUNT];
|
|
}
|
|
}
|
|
|
|
if (uses_alpha)
|
|
{
|
|
if (separate_errors[j + 3 * PARTITION_COUNT] < best_partition_error)
|
|
{
|
|
best_partition = j + 3 * PARTITION_COUNT;
|
|
best_partition_error = separate_errors[j + 3 * PARTITION_COUNT];
|
|
}
|
|
}
|
|
}
|
|
|
|
separate_errors[best_partition] = 1e30f;
|
|
best_partition = ((best_partition >> PARTITION_BITS) << PARTITION_BITS) | partition_sequence[best_partition & (PARTITION_COUNT - 1)];
|
|
best_partitions_dual_weight_planes[i] = best_partition;
|
|
}
|
|
}
|