// ---------------------------------------------------------------------------- // This confidential and proprietary software may be used only as authorised // by a licensing agreement from Arm Limited. // (C) COPYRIGHT 2011-2019 Arm Limited, ALL RIGHTS RESERVED // The entire notice above must be reproduced on all authorised copies and // copies may only be made to the extent permitted by a licensing agreement // from Arm Limited. // ---------------------------------------------------------------------------- /** * @brief Functions for loading/storing ASTC compressed images. */ #include "astc_codec_internals.h" #include #include "softfloat.h" void destroy_image(astc_codec_image * img) { if (img == NULL) return; if (img->imagedata8) { delete[]img->imagedata8[0][0]; delete[]img->imagedata8[0]; delete[]img->imagedata8; } if (img->imagedata16) { delete[]img->imagedata16[0][0]; delete[]img->imagedata16[0]; delete[]img->imagedata16; } delete img; } astc_codec_image *allocate_image(int bitness, int xsize, int ysize, int zsize, int padding) { int i, j; astc_codec_image *img = new astc_codec_image; img->xsize = xsize; img->ysize = ysize; img->zsize = zsize; img->padding = padding; int exsize = xsize + 2 * padding; int eysize = ysize + 2 * padding; int ezsize = (zsize == 1) ? 1 : zsize + 2 * padding; if (bitness == 8) { img->imagedata8 = new uint8_t **[ezsize]; img->imagedata8[0] = new uint8_t *[ezsize * eysize]; img->imagedata8[0][0] = new uint8_t[4 * ezsize * eysize * exsize]; for (i = 1; i < ezsize; i++) { img->imagedata8[i] = img->imagedata8[0] + i * eysize; img->imagedata8[i][0] = img->imagedata8[0][0] + 4 * i * exsize * eysize; } for (i = 0; i < ezsize; i++) for (j = 1; j < eysize; j++) img->imagedata8[i][j] = img->imagedata8[i][0] + 4 * j * exsize; img->imagedata16 = NULL; } else if (bitness == 16) { img->imagedata16 = new uint16_t **[ezsize]; img->imagedata16[0] = new uint16_t *[ezsize * eysize]; img->imagedata16[0][0] = new uint16_t[4 * ezsize * eysize * exsize]; for (i = 1; i < ezsize; i++) { img->imagedata16[i] = img->imagedata16[0] + i * eysize; img->imagedata16[i][0] = img->imagedata16[0][0] + 4 * i * exsize * eysize; } for (i = 0; i < ezsize; i++) for (j = 1; j < eysize; j++) img->imagedata16[i][j] = img->imagedata16[i][0] + 4 * j * exsize; img->imagedata8 = NULL; } else { ASTC_CODEC_INTERNAL_ERROR(); } return img; } void initialize_image(astc_codec_image * img) { int x, y, z; int exsize = img->xsize + 2 * img->padding; int eysize = img->ysize + 2 * img->padding; int ezsize = (img->zsize == 1) ? 1 : img->zsize + 2 * img->padding; if (img->imagedata8) { for (z = 0; z < ezsize; z++) for (y = 0; y < eysize; y++) for (x = 0; x < exsize; x++) { img->imagedata8[z][y][4 * x] = 0; img->imagedata8[z][y][4 * x + 1] = 0; img->imagedata8[z][y][4 * x + 2] = 0; img->imagedata8[z][y][4 * x + 3] = 0xFF; } } else if (img->imagedata16) { for (z = 0; z < ezsize; z++) for (y = 0; y < eysize; y++) for (x = 0; x < exsize; x++) { img->imagedata16[z][y][4 * x] = 0; img->imagedata16[z][y][4 * x + 1] = 0; img->imagedata16[z][y][4 * x + 2] = 0; img->imagedata16[z][y][4 * x + 3] = 0x3C00; } } else { ASTC_CODEC_INTERNAL_ERROR(); } } // fill the padding area of the input-file buffer with clamp-to-edge data // Done inefficiently, in that it will overwrite all the interior data at least once; // this is not considered a problem, since this makes up a very small part of total // running time. void fill_image_padding_area(astc_codec_image * img) { if (img->padding == 0) return; int x, y, z, i; int exsize = img->xsize + 2 * img->padding; int eysize = img->ysize + 2 * img->padding; int ezsize = (img->zsize == 1) ? 1 : (img->zsize + 2 * img->padding); int xmin = img->padding; int ymin = img->padding; int zmin = (img->zsize == 1) ? 0 : img->padding; int xmax = img->xsize + img->padding - 1; int ymax = img->ysize + img->padding - 1; int zmax = (img->zsize == 1) ? 0 : img->zsize + img->padding - 1; // This is a very simple implementation. Possible optimizations include: // * Testing if texel is outside the edge. // * Looping over texels that we know are outside the edge. if (img->imagedata8) { for (z = 0; z < ezsize; z++) { int zc = MIN(MAX(z, zmin), zmax); for (y = 0; y < eysize; y++) { int yc = MIN(MAX(y, ymin), ymax); for (x = 0; x < exsize; x++) { int xc = MIN(MAX(x, xmin), xmax); for (i = 0; i < 4; i++) { img->imagedata8[z][y][4 * x + i] = img->imagedata8[zc][yc][4 * xc + i]; } } } } } else if (img->imagedata16) { for (z = 0; z < ezsize; z++) { int zc = MIN(MAX(z, zmin), zmax); for (y = 0; y < eysize; y++) { int yc = MIN(MAX(y, ymin), ymax); for (x = 0; x < exsize; x++) { int xc = MIN(MAX(x, xmin), xmax); for (i = 0; i < 4; i++) { img->imagedata16[z][y][4 * x + i] = img->imagedata16[zc][yc][4 * xc + i]; } } } } } } int determine_image_channels(const astc_codec_image * img) { int x, y, z; int xsize = img->xsize; int ysize = img->ysize; int zsize = img->zsize; // scan through the image data // to determine how many color channels the image has. int lum_mask; int alpha_mask; int alpha_mask_ref; if (img->imagedata8) { alpha_mask_ref = 0xFF; alpha_mask = 0xFF; lum_mask = 0; for (z = 0; z < zsize; z++) { for (y = 0; y < ysize; y++) { for (x = 0; x < xsize; x++) { int r = img->imagedata8[z][y][4 * x]; int g = img->imagedata8[z][y][4 * x + 1]; int b = img->imagedata8[z][y][4 * x + 2]; int a = img->imagedata8[z][y][4 * x + 3]; lum_mask |= (r ^ g) | (r ^ b); alpha_mask &= a; } } } } else // if( bitness == 16 ) { alpha_mask_ref = 0xFFFF; alpha_mask = 0xFFFF; lum_mask = 0; for (z = 0; z < zsize; z++) { for (y = 0; y < ysize; y++) { for (x = 0; x < xsize; x++) { int r = img->imagedata16[z][y][4 * x]; int g = img->imagedata16[z][y][4 * x + 1]; int b = img->imagedata16[z][y][4 * x + 2]; int a = img->imagedata16[z][y][4 * x + 3]; lum_mask |= (r ^ g) | (r ^ b); alpha_mask &= (a ^ 0xC3FF); // a ^ 0xC3FF returns FFFF if and only if the input is 1.0 } } } } int image_channels = 1 + (lum_mask == 0 ? 0 : 2) + (alpha_mask == alpha_mask_ref ? 0 : 1); return image_channels; } // conversion functions between the LNS representation and the FP16 representation. float float_to_lns(float p) { if (astc_isnan(p) || p <= 1.0f / 67108864.0f) { // underflow or NaN value, return 0. // We count underflow if the input value is smaller than 2^-26. return 0; } if (fabs(p) >= 65536.0f) { // overflow, return a +INF value return 65535; } int expo; float normfrac = frexp(p, &expo); float p1; if (expo < -13) { // input number is smaller than 2^-14. In this case, multiply by 2^25. p1 = p * 33554432.0f; expo = 0; } else { expo += 14; p1 = (normfrac - 0.5f) * 4096.0f; } if (p1 < 384.0f) p1 *= 4.0f / 3.0f; else if (p1 <= 1408.0f) p1 += 128.0f; else p1 = (p1 + 512.0f) * (4.0f / 5.0f); p1 += expo * 2048.0f; return p1 + 1.0f; } uint16_t lns_to_sf16(uint16_t p) { uint16_t mc = p & 0x7FF; uint16_t ec = p >> 11; uint16_t mt; if (mc < 512) mt = 3 * mc; else if (mc < 1536) mt = 4 * mc - 512; else mt = 5 * mc - 2048; uint16_t res = (ec << 10) | (mt >> 3); if (res >= 0x7BFF) res = 0x7BFF; return res; } // conversion function from 16-bit LDR value to FP16. // note: for LDR interpolation, it is impossible to get a denormal result; // this simplifies the conversion. // FALSE; we can receive a very small UNORM16 through the constant-block. uint16_t unorm16_to_sf16(uint16_t p) { if (p == 0xFFFF) return 0x3C00; // value of 1.0 . if (p < 4) return p << 8; int lz = clz32(p) - 16; p <<= (lz + 1); p >>= 6; p |= (14 - lz) << 10; return p; } void imageblock_initialize_deriv_from_work_and_orig(imageblock * pb, int pixelcount) { int i; const float *fptr = pb->orig_data; const float *wptr = pb->work_data; float *dptr = pb->deriv_data; for (i = 0; i < pixelcount; i++) { // compute derivatives for RGB first if (pb->rgb_lns[i]) { float r = MAX(fptr[0], 6e-5f); float g = MAX(fptr[1], 6e-5f); float b = MAX(fptr[2], 6e-5f); float rderiv = (float_to_lns(r * 1.05f) - float_to_lns(r)) / (r * 0.05f); float gderiv = (float_to_lns(g * 1.05f) - float_to_lns(g)) / (g * 0.05f); float bderiv = (float_to_lns(b * 1.05f) - float_to_lns(b)) / (b * 0.05f); // the derivative may not actually take values smaller than 1/32 or larger than 2^25; // if it does, we clamp it. if (rderiv < (1.0f / 32.0f)) rderiv = (1.0f / 32.0f); else if (rderiv > 33554432.0f) rderiv = 33554432.0f; if (gderiv < (1.0f / 32.0f)) gderiv = (1.0f / 32.0f); else if (gderiv > 33554432.0f) gderiv = 33554432.0f; if (bderiv < (1.0f / 32.0f)) bderiv = (1.0f / 32.0f); else if (bderiv > 33554432.0f) bderiv = 33554432.0f; dptr[0] = rderiv; dptr[1] = gderiv; dptr[2] = bderiv; } else { dptr[0] = 65535.0f; dptr[1] = 65535.0f; dptr[2] = 65535.0f; } // then compute derivatives for Alpha if (pb->alpha_lns[i]) { float a = MAX(fptr[3], 6e-5f); float aderiv = (float_to_lns(a * 1.05f) - float_to_lns(a)) / (a * 0.05f); // the derivative may not actually take values smaller than 1/32 or larger than 2^25; // if it does, we clamp it. if (aderiv < (1.0f / 32.0f)) aderiv = (1.0f / 32.0f); else if (aderiv > 33554432.0f) aderiv = 33554432.0f; dptr[3] = aderiv; } else { dptr[3] = 65535.0f; } fptr += 4; wptr += 4; dptr += 4; } } // helper function to initialize the work-data from the orig-data void imageblock_initialize_work_from_orig(imageblock * pb, int pixelcount) { int i; float *fptr = pb->orig_data; float *wptr = pb->work_data; for (i = 0; i < pixelcount; i++) { if (pb->rgb_lns[i]) { wptr[0] = float_to_lns(fptr[0]); wptr[1] = float_to_lns(fptr[1]); wptr[2] = float_to_lns(fptr[2]); } else { wptr[0] = fptr[0] * 65535.0f; wptr[1] = fptr[1] * 65535.0f; wptr[2] = fptr[2] * 65535.0f; } if (pb->alpha_lns[i]) { wptr[3] = float_to_lns(fptr[3]); } else { wptr[3] = fptr[3] * 65535.0f; } fptr += 4; wptr += 4; } imageblock_initialize_deriv_from_work_and_orig(pb, pixelcount); } // helper function to initialize the orig-data from the work-data void imageblock_initialize_orig_from_work(imageblock * pb, int pixelcount) { int i; float *fptr = pb->orig_data; float *wptr = pb->work_data; for (i = 0; i < pixelcount; i++) { if (pb->rgb_lns[i]) { fptr[0] = sf16_to_float(lns_to_sf16((uint16_t) wptr[0])); fptr[1] = sf16_to_float(lns_to_sf16((uint16_t) wptr[1])); fptr[2] = sf16_to_float(lns_to_sf16((uint16_t) wptr[2])); } else { fptr[0] = sf16_to_float(unorm16_to_sf16((uint16_t) wptr[0])); fptr[1] = sf16_to_float(unorm16_to_sf16((uint16_t) wptr[1])); fptr[2] = sf16_to_float(unorm16_to_sf16((uint16_t) wptr[2])); } if (pb->alpha_lns[i]) { fptr[3] = sf16_to_float(lns_to_sf16((uint16_t) wptr[3])); } else { fptr[3] = sf16_to_float(unorm16_to_sf16((uint16_t) wptr[3])); } fptr += 4; wptr += 4; } imageblock_initialize_deriv_from_work_and_orig(pb, pixelcount); } // fetch an imageblock from the input file. void fetch_imageblock(const astc_codec_image * img, imageblock * pb, // picture-block to initialize with image data // block dimensions int xdim, int ydim, int zdim, // position in texture. int xpos, int ypos, int zpos, swizzlepattern swz) { float *fptr = pb->orig_data; int xsize = img->xsize + 2 * img->padding; int ysize = img->ysize + 2 * img->padding; int zsize = (img->zsize == 1) ? 1 : img->zsize + 2 * img->padding; int x, y, z, i; pb->xpos = xpos; pb->ypos = ypos; pb->zpos = zpos; xpos += img->padding; ypos += img->padding; if (img->zsize > 1) zpos += img->padding; float data[6]; data[4] = 0; data[5] = 1; if (img->imagedata8) { for (z = 0; z < zdim; z++) { for (y = 0; y < ydim; y++) { for (x = 0; x < xdim; x++) { int xi = xpos + x; int yi = ypos + y; int zi = zpos + z; // clamp XY coordinates to the picture. if (xi < 0) xi = 0; if (yi < 0) yi = 0; if (zi < 0) zi = 0; if (xi >= xsize) xi = xsize - 1; if (yi >= ysize) yi = ysize - 1; if (zi >= zsize) zi = zsize - 1; int r = img->imagedata8[zi][yi][4 * xi]; int g = img->imagedata8[zi][yi][4 * xi + 1]; int b = img->imagedata8[zi][yi][4 * xi + 2]; int a = img->imagedata8[zi][yi][4 * xi + 3]; data[0] = r / 255.0f; data[1] = g / 255.0f; data[2] = b / 255.0f; data[3] = a / 255.0f; fptr[0] = data[swz.r]; fptr[1] = data[swz.g]; fptr[2] = data[swz.b]; fptr[3] = data[swz.a]; fptr += 4; } } } } else if (img->imagedata16) { for (z = 0; z < zdim; z++) { for (y = 0; y < ydim; y++) { for (x = 0; x < xdim; x++) { int xi = xpos + x; int yi = ypos + y; int zi = zpos + z; // clamp XY coordinates to the picture. if (xi < 0) xi = 0; if (yi < 0) yi = 0; if (zi < 0) zi = 0; if (xi >= xsize) xi = xsize - 1; if (yi >= ysize) yi = ysize - 1; if (zi >= ysize) zi = zsize - 1; int r = img->imagedata16[zi][yi][4 * xi]; int g = img->imagedata16[zi][yi][4 * xi + 1]; int b = img->imagedata16[zi][yi][4 * xi + 2]; int a = img->imagedata16[zi][yi][4 * xi + 3]; float rf = sf16_to_float(r); float gf = sf16_to_float(g); float bf = sf16_to_float(b); float af = sf16_to_float(a); // equalize the color components somewhat, and get rid of negative values. rf = MAX(rf, 1e-8f); gf = MAX(gf, 1e-8f); bf = MAX(bf, 1e-8f); af = MAX(af, 1e-8f); data[0] = rf; data[1] = gf; data[2] = bf; data[3] = af; fptr[0] = data[swz.r]; fptr[1] = data[swz.g]; fptr[2] = data[swz.b]; fptr[3] = data[swz.a]; fptr += 4; } } } } // perform sRGB-to-linear transform on input data, if requested. int pixelcount = xdim * ydim * zdim; if (perform_srgb_transform) { fptr = pb->orig_data; for (i = 0; i < pixelcount; i++) { float r = fptr[0]; float g = fptr[1]; float b = fptr[2]; if (r <= 0.04045f) r = r * (1.0f / 12.92f); else if (r <= 1) r = pow((r + 0.055f) * (1.0f / 1.055f), 2.4f); if (g <= 0.04045f) g = g * (1.0f / 12.92f); else if (g <= 1) g = pow((g + 0.055f) * (1.0f / 1.055f), 2.4f); if (b <= 0.04045f) b = b * (1.0f / 12.92f); else if (b <= 1) b = pow((b + 0.055f) * (1.0f / 1.055f), 2.4f); fptr[0] = r; fptr[1] = g; fptr[2] = b; fptr += 4; } } // collect color max-value, in order to determine whether to use LDR or HDR // interpolation. float max_red, max_green, max_blue, max_alpha; max_red = 0.0f; max_green = 0.0f; max_blue = 0.0f; max_alpha = 0.0f; fptr = pb->orig_data; for (i = 0; i < pixelcount; i++) { float r = fptr[0]; float g = fptr[1]; float b = fptr[2]; float a = fptr[3]; if (r > max_red) max_red = r; if (g > max_green) max_green = g; if (b > max_blue) max_blue = b; if (a > max_alpha) max_alpha = a; fptr += 4; } float max_rgb = MAX(max_red, MAX(max_green, max_blue)); // use LNS if: // * RGB-maximum is less than 0.15 // * RGB-maximum is greater than 1 // * Alpha-maximum is greater than 1 int rgb_lns = (max_rgb < 0.15f || max_rgb > 1.0f || max_alpha > 1.0f) ? 1 : 0; int alpha_lns = rgb_lns ? (max_alpha > 1.0f || max_alpha < 0.15f) : 0; // not yet though; for the time being, just obey the command line. rgb_lns = rgb_force_use_of_hdr; alpha_lns = alpha_force_use_of_hdr; // impose the choice on every pixel when encoding. for (i = 0; i < pixelcount; i++) { pb->rgb_lns[i] = rgb_lns; pb->alpha_lns[i] = alpha_lns; pb->nan_texel[i] = 0; } imageblock_initialize_work_from_orig(pb, pixelcount); update_imageblock_flags(pb, xdim, ydim, zdim); } void write_imageblock(astc_codec_image * img, const imageblock * pb, // picture-block to initialize with image data. We assume that orig_data is valid. // block dimensions int xdim, int ydim, int zdim, // position to write the block to int xpos, int ypos, int zpos, swizzlepattern swz) { const float *fptr = pb->orig_data; const uint8_t *nptr = pb->nan_texel; int xsize = img->xsize; int ysize = img->ysize; int zsize = img->zsize; int x, y, z; float data[7]; data[4] = 0.0f; data[5] = 1.0f; if (img->imagedata8) { for (z = 0; z < zdim; z++) { for (y = 0; y < ydim; y++) { for (x = 0; x < xdim; x++) { int xi = xpos + x; int yi = ypos + y; int zi = zpos + z; if (xi >= 0 && yi >= 0 && zi >= 0 && xi < xsize && yi < ysize && zi < zsize) { if (*nptr) { // NaN-pixel, but we can't display it. Display purple instead. img->imagedata8[zi][yi][4 * xi] = 0xFF; img->imagedata8[zi][yi][4 * xi + 1] = 0x00; img->imagedata8[zi][yi][4 * xi + 2] = 0xFF; img->imagedata8[zi][yi][4 * xi + 3] = 0xFF; } else { // apply swizzle if (perform_srgb_transform) { float r = fptr[0]; float g = fptr[1]; float b = fptr[2]; if (r <= 0.0031308f) r = r * 12.92f; else if (r <= 1) r = 1.055f * pow(r, (1.0f / 2.4f)) - 0.055f; if (g <= 0.0031308f) g = g * 12.92f; else if (g <= 1) g = 1.055f * pow(g, (1.0f / 2.4f)) - 0.055f; if (b <= 0.0031308f) b = b * 12.92f; else if (b <= 1) b = 1.055f * pow(b, (1.0f / 2.4f)) - 0.055f; data[0] = r; data[1] = g; data[2] = b; } else { float r = fptr[0]; float g = fptr[1]; float b = fptr[2]; data[0] = r; data[1] = g; data[2] = b; } data[3] = fptr[3]; float xcoord = (data[0] * 2.0f) - 1.0f; float ycoord = (data[3] * 2.0f) - 1.0f; float zcoord = 1.0f - xcoord * xcoord - ycoord * ycoord; if (zcoord < 0.0f) zcoord = 0.0f; data[6] = (sqrt(zcoord) * 0.5f) + 0.5f; // clamp to [0,1] if (data[0] > 1.0f) data[0] = 1.0f; if (data[1] > 1.0f) data[1] = 1.0f; if (data[2] > 1.0f) data[2] = 1.0f; if (data[3] > 1.0f) data[3] = 1.0f; // pack the data int ri = static_cast < int >(floor(data[swz.r] * 255.0f + 0.5f)); int gi = static_cast < int >(floor(data[swz.g] * 255.0f + 0.5f)); int bi = static_cast < int >(floor(data[swz.b] * 255.0f + 0.5f)); int ai = static_cast < int >(floor(data[swz.a] * 255.0f + 0.5f)); img->imagedata8[zi][yi][4 * xi] = ri; img->imagedata8[zi][yi][4 * xi + 1] = gi; img->imagedata8[zi][yi][4 * xi + 2] = bi; img->imagedata8[zi][yi][4 * xi + 3] = ai; } } fptr += 4; nptr++; } } } } else if (img->imagedata16) { for (z = 0; z < zdim; z++) { for (y = 0; y < ydim; y++) { for (x = 0; x < xdim; x++) { int xi = xpos + x; int yi = ypos + y; int zi = zpos + z; if (xi >= 0 && yi >= 0 && zi >= 0 && xi < xsize && yi < ysize && zi < zsize) { if (*nptr) { img->imagedata16[zi][yi][4 * xi] = 0xFFFF; img->imagedata16[zi][yi][4 * xi + 1] = 0xFFFF; img->imagedata16[zi][yi][4 * xi + 2] = 0xFFFF; img->imagedata16[zi][yi][4 * xi + 3] = 0xFFFF; } else { // apply swizzle if (perform_srgb_transform) { float r = fptr[0]; float g = fptr[1]; float b = fptr[2]; if (r <= 0.0031308f) r = r * 12.92f; else if (r <= 1) r = 1.055f * pow(r, (1.0f / 2.4f)) - 0.055f; if (g <= 0.0031308f) g = g * 12.92f; else if (g <= 1) g = 1.055f * pow(g, (1.0f / 2.4f)) - 0.055f; if (b <= 0.0031308f) b = b * 12.92f; else if (b <= 1) b = 1.055f * pow(b, (1.0f / 2.4f)) - 0.055f; data[0] = r; data[1] = g; data[2] = b; } else { data[0] = fptr[0]; data[1] = fptr[1]; data[2] = fptr[2]; } data[3] = fptr[3]; float xN = (data[0] * 2.0f) - 1.0f; float yN = (data[3] * 2.0f) - 1.0f; float zN = 1.0f - xN * xN - yN * yN; if (zN < 0.0f) zN = 0.0f; data[6] = (sqrt(zN) * 0.5f) + 0.5f; int r = float_to_sf16(data[swz.r], SF_NEARESTEVEN); int g = float_to_sf16(data[swz.g], SF_NEARESTEVEN); int b = float_to_sf16(data[swz.b], SF_NEARESTEVEN); int a = float_to_sf16(data[swz.a], SF_NEARESTEVEN); img->imagedata16[zi][yi][4 * xi] = r; img->imagedata16[zi][yi][4 * xi + 1] = g; img->imagedata16[zi][yi][4 * xi + 2] = b; img->imagedata16[zi][yi][4 * xi + 3] = a; } } fptr += 4; nptr++; } } } } } /* For an imageblock, update its flags. The updating is done based on work_data, not orig_data. */ void update_imageblock_flags(imageblock * pb, int xdim, int ydim, int zdim) { int i; float red_min = 1e38f, red_max = -1e38f; float green_min = 1e38f, green_max = -1e38f; float blue_min = 1e38f, blue_max = -1e38f; float alpha_min = 1e38f, alpha_max = -1e38f; int texels_per_block = xdim * ydim * zdim; int grayscale = 1; for (i = 0; i < texels_per_block; i++) { float red = pb->work_data[4 * i]; float green = pb->work_data[4 * i + 1]; float blue = pb->work_data[4 * i + 2]; float alpha = pb->work_data[4 * i + 3]; if (red < red_min) red_min = red; if (red > red_max) red_max = red; if (green < green_min) green_min = green; if (green > green_max) green_max = green; if (blue < blue_min) blue_min = blue; if (blue > blue_max) blue_max = blue; if (alpha < alpha_min) alpha_min = alpha; if (alpha > alpha_max) alpha_max = alpha; if (grayscale == 1 && (red != green || red != blue)) grayscale = 0; } pb->red_min = red_min; pb->red_max = red_max; pb->green_min = green_min; pb->green_max = green_max; pb->blue_min = blue_min; pb->blue_max = blue_max; pb->alpha_min = alpha_min; pb->alpha_max = alpha_max; pb->grayscale = grayscale; } // Helper functions for various error-metric calculations double clampx(double p) { if (astc_isnan(p) || p < 0.0f) p = 0.0f; else if (p > 65504.0f) p = 65504.0f; return p; } // logarithm-function, linearized from 2^-14. double xlog2(double p) { if (p >= 0.00006103515625) return log(p) * 1.44269504088896340735; // log(x)/log(2) else return -15.44269504088896340735 + p * 23637.11554992477646609062; } // mPSNR tone-mapping operator double mpsnr_operator(double v, int fstop) { int64_t vl = 1LL << (fstop + 32); double vl2 = (double)vl * (1.0 / 4294967296.0); v *= vl2; v = pow(v, (1.0 / 2.2)); v *= 255.0f; if (astc_isnan(v) || v < 0.0f) v = 0.0f; else if (v > 255.0f) v = 255.0f; return v; } double mpsnr_sumdiff(double v1, double v2, int low_fstop, int high_fstop) { int i; double summa = 0.0; for (i = low_fstop; i <= high_fstop; i++) { double mv1 = mpsnr_operator(v1, i); double mv2 = mpsnr_operator(v2, i); double mdiff = mv1 - mv2; summa += mdiff * mdiff; } return summa; } // Compute PSNR and other error metrics between input and output image void compute_error_metrics(int compute_hdr_error_metrics, int input_components, const astc_codec_image * img1, const astc_codec_image * img2, int low_fstop, int high_fstop, int psnrmode) { int x, y, z; static int channelmasks[5] = { 0x00, 0x07, 0x0C, 0x07, 0x0F }; int channelmask; channelmask = channelmasks[input_components]; double4 errorsum = double4(0, 0, 0, 0); double4 alpha_scaled_errorsum = double4(0, 0, 0, 0); double4 log_errorsum = double4(0, 0, 0, 0); double4 mpsnr_errorsum = double4(0, 0, 0, 0); int xsize = MIN(img1->xsize, img2->xsize); int ysize = MIN(img1->ysize, img2->ysize); int zsize = MIN(img1->zsize, img2->zsize); if (img1->xsize != img2->xsize || img1->ysize != img2->ysize || img1->zsize != img2->zsize) { printf("Warning: comparing images of different size:\n" "Image 1: %dx%dx%d\n" "Image 2: %dx%dx%d\n" "Only intersection region will be compared.\n", img1->xsize, img1->ysize, img1->zsize, img2->xsize, img2->ysize, img2->zsize); } if (compute_hdr_error_metrics) { printf("Computing error metrics ... "); fflush(stdout); } int img1pad = img1->padding; int img2pad = img2->padding; double rgb_peak = 0.0f; for (z = 0; z < zsize; z++) { for (y = 0; y < ysize; y++) { int ze1 = (img1->zsize == 1) ? z : z + img1pad; int ze2 = (img2->zsize == 1) ? z : z + img2pad; int ye1 = y + img1pad; int ye2 = y + img2pad; for (x = 0; x < xsize; x++) { double4 input_color1; double4 input_color2; int xe1 = 4 * x + 4 * img1pad; int xe2 = 4 * x + 4 * img2pad; if (img1->imagedata8) { input_color1 = double4(img1->imagedata8[ze1][ye1][xe1] * (1.0f / 255.0f), img1->imagedata8[ze1][ye1][xe1 + 1] * (1.0f / 255.0f), img1->imagedata8[ze1][ye1][xe1 + 2] * (1.0f / 255.0f), img1->imagedata8[ze1][ye1][xe1 + 3] * (1.0f / 255.0f)); } else { input_color1 = double4(clampx(sf16_to_float(img1->imagedata16[ze1][ye1][xe1])), clampx(sf16_to_float(img1->imagedata16[ze1][ye1][xe1 + 1])), clampx(sf16_to_float(img1->imagedata16[ze1][ye1][xe1 + 2])), clampx(sf16_to_float(img1->imagedata16[ze1][ye1][xe1 + 3]))); } if (img2->imagedata8) { input_color2 = double4(img2->imagedata8[ze2][ye2][xe2] * (1.0f / 255.0f), img2->imagedata8[ze2][ye2][xe2 + 1] * (1.0f / 255.0f), img2->imagedata8[ze2][ye2][xe2 + 2] * (1.0f / 255.0f), img2->imagedata8[ze2][ye2][xe2 + 3] * (1.0f / 255.0f)); } else { input_color2 = double4(clampx(sf16_to_float(img2->imagedata16[ze2][ye2][xe2])), clampx(sf16_to_float(img2->imagedata16[ze2][ye2][xe2 + 1])), clampx(sf16_to_float(img2->imagedata16[ze2][ye2][xe2 + 2])), clampx(sf16_to_float(img2->imagedata16[ze2][ye2][xe2 + 3]))); } rgb_peak = MAX(MAX(input_color1.x, input_color1.y), MAX(input_color1.z, rgb_peak)); double4 diffcolor = input_color1 - input_color2; errorsum = errorsum + diffcolor * diffcolor; double4 alpha_scaled_diffcolor = double4(diffcolor.xyz * input_color1.w, diffcolor.w); alpha_scaled_errorsum = alpha_scaled_errorsum + alpha_scaled_diffcolor * alpha_scaled_diffcolor; if (compute_hdr_error_metrics) { double4 log_input_color1 = double4(xlog2(input_color1.x), xlog2(input_color1.y), xlog2(input_color1.z), xlog2(input_color1.w)); double4 log_input_color2 = double4(xlog2(input_color2.x), xlog2(input_color2.y), xlog2(input_color2.z), xlog2(input_color2.w)); double4 log_diffcolor = log_input_color1 - log_input_color2; log_errorsum = log_errorsum + log_diffcolor * log_diffcolor; double4 mpsnr_error = double4(mpsnr_sumdiff(input_color1.x, input_color2.x, low_fstop, high_fstop), mpsnr_sumdiff(input_color1.y, input_color2.y, low_fstop, high_fstop), mpsnr_sumdiff(input_color1.z, input_color2.z, low_fstop, high_fstop), mpsnr_sumdiff(input_color1.w, input_color2.w, low_fstop, high_fstop)); mpsnr_errorsum = mpsnr_errorsum + mpsnr_error; } } } } if (compute_hdr_error_metrics) { printf("done\n"); } double pixels = xsize * ysize * zsize; double num = 0.0; double alpha_num = 0.0; double log_num = 0.0; double mpsnr_num = 0.0; double samples = 0.0; if (channelmask & 1) { num += errorsum.x; alpha_num += alpha_scaled_errorsum.x; log_num += log_errorsum.x; mpsnr_num += mpsnr_errorsum.x; samples += pixels; } if (channelmask & 2) { num += errorsum.y; alpha_num += alpha_scaled_errorsum.y; log_num += log_errorsum.y; mpsnr_num += mpsnr_errorsum.y; samples += pixels; } if (channelmask & 4) { num += errorsum.z; alpha_num += alpha_scaled_errorsum.z; log_num += log_errorsum.z; mpsnr_num += mpsnr_errorsum.z; samples += pixels; } if (channelmask & 8) { num += errorsum.w; alpha_num += alpha_scaled_errorsum.w; /* log_num += log_errorsum.w; mpsnr_num += mpsnr_errorsum.w; */ samples += pixels; } double denom = samples; double mpsnr_denom = pixels * 3.0 * (high_fstop - low_fstop + 1) * 255.0f * 255.0f; double psnr; if (num == 0) psnr = 999.0; else psnr = 10.0 * log10((double)denom / (double)num); double rgb_psnr = psnr; if(psnrmode == 1) { if (channelmask & 8) { printf("PSNR (LDR-RGBA): %.6lf dB\n", psnr); double alpha_psnr; if (alpha_num == 0) alpha_psnr = 999.0; else alpha_psnr = 10.0 * log10((double)denom / (double)alpha_num); printf("Alpha-Weighted PSNR: %.6lf dB\n", alpha_psnr); double rgb_num = errorsum.x + errorsum.y + errorsum.z; if (rgb_num == 0) rgb_psnr = 999.0; else rgb_psnr = 10.0 * log10((double)pixels * 3 / (double)rgb_num); printf("PSNR (LDR-RGB): %.6lf dB\n", rgb_psnr); } else printf("PSNR (LDR-RGB): %.6lf dB\n", psnr); if (compute_hdr_error_metrics) { printf("Color peak value: %f\n", rgb_peak); printf("PSNR (RGB normalized to peak): %f dB\n", rgb_psnr + 20.0 * log10(rgb_peak)); double mpsnr; if (mpsnr_num == 0) mpsnr = 999.0; else mpsnr = 10.0 * log10((double)mpsnr_denom / (double)mpsnr_num); printf("mPSNR (RGB) [fstops: %+d to %+d] : %.6lf dB\n", low_fstop, high_fstop, mpsnr); double logrmse = sqrt((double)log_num / (double)pixels); printf("LogRMSE (RGB): %.6lf\n", logrmse); } } } /* Main image loader function. We have specialized loaders for DDS, KTX and HTGA; for other formats, we use stb_image. This image loader will choose one based on filename. */ astc_codec_image *astc_codec_load_image(const char *input_filename, int padding, int *load_result) { #define LOAD_HTGA 0 #define LOAD_KTX 1 #define LOAD_DDS 2 #define LOAD_STB_IMAGE 3 // check the ending of the input filename int load_fileformat = LOAD_STB_IMAGE; size_t filename_len = strlen(input_filename); const char *eptr = input_filename + filename_len - 5; if (eptr > input_filename && (strcmp(eptr, ".htga") == 0 || strcmp(eptr, ".HTGA") == 0)) load_fileformat = LOAD_HTGA; eptr = input_filename + filename_len - 4; if (eptr > input_filename && (strcmp(eptr, ".ktx") == 0 || strcmp(eptr, ".KTX") == 0)) load_fileformat = LOAD_KTX; if (eptr > input_filename && (strcmp(eptr, ".dds") == 0 || strcmp(eptr, ".DDS") == 0)) load_fileformat = LOAD_DDS; // OpenEXR support: call exr_to_htga to convert from EXR to HTGA. char htga_load_filename[300]; int load_exr = 0; if (eptr > input_filename && (strcmp(eptr, ".exr") == 0 || strcmp(eptr, ".EXR") == 0)) { // don't support filenames longer than 250 characters; this way, we // cannot get a buffer overflow from the sprintfs below. if (filename_len > 250) { *load_result = -1; return NULL; } char exr_to_htga_command[550]; sprintf(htga_load_filename, "%s.htga", input_filename); sprintf(exr_to_htga_command, "exr_to_htga -q %s %s", input_filename, htga_load_filename); //int retval = system(exr_to_htga_command); int retval = 0; if (retval != 0) { printf("Failed to run exr_to_htga to convert input .exr file.\n"); exit(1); } input_filename = htga_load_filename; load_fileformat = LOAD_HTGA; load_exr = 1; } astc_codec_image *input_image; switch (load_fileformat) { case LOAD_KTX: input_image = load_ktx_uncompressed_image(input_filename, padding, load_result); break; case LOAD_DDS: input_image = load_dds_uncompressed_image(input_filename, padding, load_result); break; case LOAD_HTGA: input_image = load_tga_image(input_filename, padding, load_result); break; case LOAD_STB_IMAGE: input_image = load_image_with_stb(input_filename, padding, load_result); break; default: ASTC_CODEC_INTERNAL_ERROR(); } if (load_exr) astc_codec_unlink(htga_load_filename); return input_image; } int get_output_filename_enforced_bitness(const char *output_filename) { if (output_filename == NULL) return -1; size_t filename_len = strlen(output_filename); const char *eptr = output_filename + filename_len - 5; if (eptr > output_filename && (strcmp(eptr, ".htga") == 0 || strcmp(eptr, ".HTGA") == 0)) { return 16; } eptr = output_filename + filename_len - 4; if (eptr > output_filename && (strcmp(eptr, ".tga") == 0 || strcmp(eptr, ".TGA") == 0)) { return 8; } if (eptr > output_filename && (strcmp(eptr, ".exr") == 0 || strcmp(eptr, ".EXR") == 0)) { return 16; } // file formats that don't match any of the templates above are capable of accommodating // both 8-bit and 16-bit data (DDS, KTX) return -1; } int astc_codec_store_image(const astc_codec_image * output_image, const char *output_filename, int bitness, const char **format_string) { #define STORE_TGA 0 #define STORE_HTGA 1 #define STORE_KTX 2 #define STORE_DDS 3 #define STORE_EXR 4 size_t filename_len = strlen(output_filename); int store_fileformat = STORE_TGA; const char *eptr = output_filename + filename_len - 5; if (eptr > output_filename && (strcmp(eptr, ".htga") == 0 || strcmp(eptr, ".HTGA") == 0)) { store_fileformat = STORE_HTGA; } eptr = output_filename + filename_len - 4; if (eptr > output_filename && (strcmp(eptr, ".ktx") == 0 || strcmp(eptr, ".KTX") == 0)) { store_fileformat = STORE_KTX; } if (eptr > output_filename && (strcmp(eptr, ".dds") == 0 || strcmp(eptr, ".DDS") == 0)) { store_fileformat = STORE_DDS; } if (eptr > output_filename && (strcmp(eptr, ".exr") == 0 || strcmp(eptr, ".EXR") == 0)) { store_fileformat = STORE_EXR; } if (store_fileformat == STORE_TGA && bitness == 16) store_fileformat = STORE_HTGA; // guard against OpenEXR files with too-long names if (store_fileformat == STORE_EXR && filename_len > 250) { *format_string = "EXR"; return -1; } char htga_output_filename[300]; char htga_output_command[550]; int system_retval; int store_result = -1; switch (store_fileformat) { case STORE_TGA: case STORE_HTGA: *format_string = bitness == 16 ? "HTGA" : "TGA"; store_result = store_tga_image(output_image, output_filename, bitness); break; case STORE_KTX: *format_string = "KTX"; store_result = store_ktx_uncompressed_image(output_image, output_filename, bitness); break; case STORE_DDS: *format_string = "DDS"; store_result = store_dds_uncompressed_image(output_image, output_filename, bitness); break; case STORE_EXR: *format_string = "EXR"; sprintf(htga_output_filename, "%s.htga", output_filename); store_result = store_tga_image(output_image, htga_output_filename, 16); sprintf(htga_output_command, "exr_to_htga -e %s %s", htga_output_filename, output_filename); //system_retval = system(htga_output_command); astc_codec_unlink(htga_output_filename); if (system_retval != 0) store_result = -99; break; default: ASTC_CODEC_INTERNAL_ERROR(); }; return store_result; }