TextureTaffy/Source/ispc_texcomp/ispc_texcomp.cpp

561 lines
15 KiB
C++
Raw Normal View History

2023-04-28 15:47:06 +03:00
////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2016-2019, Intel Corporation
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////
2023-04-28 19:09:20 +03:00
#if defined(_MSC_VER)
#include <intrin.h>
#endif
2023-04-28 15:47:06 +03:00
#include "ispc_texcomp.h"
#include "kernel_ispc.h"
#include <memory.h> // memcpy
namespace ispc {
extern "C" {
extern int32_t ISPCIsa_ispc_sse4();
extern "C" void CompressBlocksBC1_ispc_sse4(const rgba_surface* src, uint8_t* dst);
extern "C" void CompressBlocksBC3_ispc_sse4(const rgba_surface* src, uint8_t* dst);
extern "C" void CompressBlocksBC4_ispc_sse4(const rgba_surface* src, uint8_t* dst);
extern "C" void CompressBlocksBC5_ispc_sse4(const rgba_surface* src, uint8_t* dst);
extern "C" void CompressBlocksBC6H_ispc_sse4(const rgba_surface* src, uint8_t* dst, bc6h_enc_settings* settings);
extern "C" void CompressBlocksBC7_ispc_sse4(const rgba_surface* src, uint8_t* dst, bc7_enc_settings* settings);
extern "C" void CompressBlocksETC1_ispc_sse4(const rgba_surface* src, uint8_t* dst, etc_enc_settings* settings);
extern "C" void CompressBlocksASTC_ispc_sse4(const rgba_surface* src, uint8_t* dst, astc_enc_settings* settings);
}
}
static bool isAmd = false;
void ISPCInit()
{
#if defined(_MSC_VER)
{
int CPUInfo[4];
__cpuid(CPUInfo, 0x80000001);
isAmd = (CPUInfo[2] & (1 << 6)) != 0;
}
#else
{
unsigned int eax = 0x80000001;
unsigned int ebx = 0;
unsigned int ecx = 0;
unsigned int edx = 0;
asm volatile("cpuid"
: "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
: "a"(eax));
isAmd = (ecx & (1 << 6)) != 0;
}
#endif
}
void GetProfile_ultrafast(bc7_enc_settings* settings)
{
settings->channels = 3;
// mode02
settings->mode_selection[0] = false;
settings->skip_mode2 = true;
settings->refineIterations[0] = 2;
settings->refineIterations[2] = 2;
// mode13
settings->mode_selection[1] = false;
settings->fastSkipTreshold_mode1 = 3;
settings->fastSkipTreshold_mode3 = 1;
settings->fastSkipTreshold_mode7 = 0;
settings->refineIterations[1] = 2;
settings->refineIterations[3] = 1;
// mode45
settings->mode_selection[2] = false;
settings->mode45_channel0 = 0;
settings->refineIterations_channel = 0;
settings->refineIterations[4] = 2;
settings->refineIterations[5] = 2;
// mode6
settings->mode_selection[3] = true;
settings->refineIterations[6] = 1;
}
void GetProfile_veryfast(bc7_enc_settings* settings)
{
settings->channels = 3;
// mode02
settings->mode_selection[0] = false;
settings->skip_mode2 = true;
settings->refineIterations[0] = 2;
settings->refineIterations[2] = 2;
// mode13
settings->mode_selection[1] = true;
settings->fastSkipTreshold_mode1 = 3;
settings->fastSkipTreshold_mode3 = 1;
settings->fastSkipTreshold_mode7 = 0;
settings->refineIterations[1] = 2;
settings->refineIterations[3] = 1;
// mode45
settings->mode_selection[2] = false;
settings->mode45_channel0 = 0;
settings->refineIterations_channel = 0;
settings->refineIterations[4] = 2;
settings->refineIterations[5] = 2;
// mode6
settings->mode_selection[3] = true;
settings->refineIterations[6] = 1;
}
void GetProfile_fast(bc7_enc_settings* settings)
{
settings->channels = 3;
// mode02
settings->mode_selection[0] = false;
settings->skip_mode2 = true;
settings->refineIterations[0] = 2;
settings->refineIterations[2] = 2;
// mode13
settings->mode_selection[1] = true;
settings->fastSkipTreshold_mode1 = 12;
settings->fastSkipTreshold_mode3 = 4;
settings->fastSkipTreshold_mode7 = 0;
settings->refineIterations[1] = 2;
settings->refineIterations[3] = 1;
// mode45
settings->mode_selection[2] = false;
settings->mode45_channel0 = 0;
settings->refineIterations_channel = 0;
settings->refineIterations[4] = 2;
settings->refineIterations[5] = 2;
// mode6
settings->mode_selection[3] = true;
settings->refineIterations[6] = 2;
}
void GetProfile_basic(bc7_enc_settings* settings)
{
settings->channels = 3;
// mode02
settings->mode_selection[0] = true;
settings->skip_mode2 = true;
settings->refineIterations[0] = 2;
settings->refineIterations[2] = 2;
// mode13
settings->mode_selection[1] = true;
settings->fastSkipTreshold_mode1 = 8+4;
settings->fastSkipTreshold_mode3 = 8;
settings->fastSkipTreshold_mode7 = 0;
settings->refineIterations[1] = 2;
settings->refineIterations[3] = 2;
// mode45
settings->mode_selection[2] = true;
settings->mode45_channel0 = 0;
settings->refineIterations_channel = 2;
settings->refineIterations[4] = 2;
settings->refineIterations[5] = 2;
// mode6
settings->mode_selection[3] = true;
settings->refineIterations[6] = 2;
}
void GetProfile_slow(bc7_enc_settings* settings)
{
settings->channels = 3;
int moreRefine = 2;
// mode02
settings->mode_selection[0] = true;
settings->skip_mode2 = false;
settings->refineIterations[0] = 2+moreRefine;
settings->refineIterations[2] = 2+moreRefine;
// mode13
settings->mode_selection[1] = true;
settings->fastSkipTreshold_mode1 = 64;
settings->fastSkipTreshold_mode3 = 64;
settings->fastSkipTreshold_mode7 = 0;
settings->refineIterations[1] = 2+moreRefine;
settings->refineIterations[3] = 2+moreRefine;
// mode45
settings->mode_selection[2] = true;
settings->mode45_channel0 = 0;
settings->refineIterations_channel = 2+moreRefine;
settings->refineIterations[4] = 2+moreRefine;
settings->refineIterations[5] = 2+moreRefine;
// mode6
settings->mode_selection[3] = true;
settings->refineIterations[6] = 2+moreRefine;
}
void GetProfile_alpha_ultrafast(bc7_enc_settings* settings)
{
settings->channels = 4;
// mode02
settings->mode_selection[0] = false;
settings->skip_mode2 = true;
settings->refineIterations[0] = 2;
settings->refineIterations[2] = 2;
// mode137
settings->mode_selection[1] = false;
settings->fastSkipTreshold_mode1 = 0;
settings->fastSkipTreshold_mode3 = 0;
settings->fastSkipTreshold_mode7 = 4;
settings->refineIterations[1] = 1;
settings->refineIterations[3] = 1;
settings->refineIterations[7] = 2;
// mode45
settings->mode_selection[2] = true;
settings->mode45_channel0 = 3;
settings->refineIterations_channel = 1;
settings->refineIterations[4] = 1;
settings->refineIterations[5] = 1;
// mode6
settings->mode_selection[3] = true;
settings->refineIterations[6] = 2;
}
void GetProfile_alpha_veryfast(bc7_enc_settings* settings)
{
settings->channels = 4;
// mode02
settings->mode_selection[0] = false;
settings->skip_mode2 = true;
settings->refineIterations[0] = 2;
settings->refineIterations[2] = 2;
// mode137
settings->mode_selection[1] = true;
settings->fastSkipTreshold_mode1 = 0;
settings->fastSkipTreshold_mode3 = 0;
settings->fastSkipTreshold_mode7 = 4;
settings->refineIterations[1] = 1;
settings->refineIterations[3] = 1;
settings->refineIterations[7] = 2;
// mode45
settings->mode_selection[2] = true;
settings->mode45_channel0 = 3;
settings->refineIterations_channel = 2;
settings->refineIterations[4] = 2;
settings->refineIterations[5] = 2;
// mode6
settings->mode_selection[3] = true;
settings->refineIterations[6] = 2;
}
void GetProfile_alpha_fast(bc7_enc_settings* settings)
{
settings->channels = 4;
// mode02
settings->mode_selection[0] = false;
settings->skip_mode2 = true;
settings->refineIterations[0] = 2;
settings->refineIterations[2] = 2;
// mode137
settings->mode_selection[1] = true;
settings->fastSkipTreshold_mode1 = 4;
settings->fastSkipTreshold_mode3 = 4;
settings->fastSkipTreshold_mode7 = 8;
settings->refineIterations[1] = 1;
settings->refineIterations[3] = 1;
settings->refineIterations[7] = 2;
// mode45
settings->mode_selection[2] = true;
settings->mode45_channel0 = 3;
settings->refineIterations_channel = 2;
settings->refineIterations[4] = 2;
settings->refineIterations[5] = 2;
// mode6
settings->mode_selection[3] = true;
settings->refineIterations[6] = 2;
}
void GetProfile_alpha_basic(bc7_enc_settings* settings)
{
settings->channels = 4;
// mode02
settings->mode_selection[0] = true;
settings->skip_mode2 = true;
settings->refineIterations[0] = 2;
settings->refineIterations[2] = 2;
// mode137
settings->mode_selection[1] = true;
settings->fastSkipTreshold_mode1 = 8+4;
settings->fastSkipTreshold_mode3 = 8;
settings->fastSkipTreshold_mode7 = 8;
settings->refineIterations[1] = 2;
settings->refineIterations[3] = 2;
settings->refineIterations[7] = 2;
// mode45
settings->mode_selection[2] = true;
settings->mode45_channel0 = 0;
settings->refineIterations_channel = 2;
settings->refineIterations[4] = 2;
settings->refineIterations[5] = 2;
// mode6
settings->mode_selection[3] = true;
settings->refineIterations[6] = 2;
}
void GetProfile_alpha_slow(bc7_enc_settings* settings)
{
settings->channels = 4;
int moreRefine = 2;
// mode02
settings->mode_selection[0] = true;
settings->skip_mode2 = false;
settings->refineIterations[0] = 2+moreRefine;
settings->refineIterations[2] = 2+moreRefine;
// mode137
settings->mode_selection[1] = true;
settings->fastSkipTreshold_mode1 = 64;
settings->fastSkipTreshold_mode3 = 64;
settings->fastSkipTreshold_mode7 = 64;
settings->refineIterations[1] = 2+moreRefine;
settings->refineIterations[3] = 2+moreRefine;
settings->refineIterations[7] = 2+moreRefine;
// mode45
settings->mode_selection[2] = true;
settings->mode45_channel0 = 0;
settings->refineIterations_channel = 2+moreRefine;
settings->refineIterations[4] = 2+moreRefine;
settings->refineIterations[5] = 2+moreRefine;
// mode6
settings->mode_selection[3] = true;
settings->refineIterations[6] = 2+moreRefine;
}
void GetProfile_bc6h_veryfast(bc6h_enc_settings* settings)
{
settings->slow_mode = false;
settings->fast_mode = true;
settings->fastSkipTreshold = 0;
settings->refineIterations_1p = 0;
settings->refineIterations_2p = 0;
}
void GetProfile_bc6h_fast(bc6h_enc_settings* settings)
{
settings->slow_mode = false;
settings->fast_mode = true;
settings->fastSkipTreshold = 2;
settings->refineIterations_1p = 0;
settings->refineIterations_2p = 1;
}
void GetProfile_bc6h_basic(bc6h_enc_settings* settings)
{
settings->slow_mode = false;
settings->fast_mode = false;
settings->fastSkipTreshold = 4;
settings->refineIterations_1p = 2;
settings->refineIterations_2p = 2;
}
void GetProfile_bc6h_slow(bc6h_enc_settings* settings)
{
settings->slow_mode = true;
settings->fast_mode = false;
settings->fastSkipTreshold = 10;
settings->refineIterations_1p = 2;
settings->refineIterations_2p = 2;
}
void GetProfile_bc6h_veryslow(bc6h_enc_settings* settings)
{
settings->slow_mode = true;
settings->fast_mode = false;
settings->fastSkipTreshold = 32;
settings->refineIterations_1p = 2;
settings->refineIterations_2p = 2;
}
void GetProfile_etc_slow(etc_enc_settings* settings)
{
settings->fastSkipTreshold = 6;
}
void ReplicateBorders(rgba_surface* dst_slice, const rgba_surface* src_tex, int start_x, int start_y, int bpp)
{
int bytes_per_pixel = bpp >> 3;
bool aliasing = false;
if (&src_tex->ptr[src_tex->stride * start_y + bytes_per_pixel * start_x] == dst_slice->ptr) aliasing = true;
for (int y = 0; y < dst_slice->height; y++)
for (int x = 0; x < dst_slice->width; x++)
{
int xx = start_x + x;
int yy = start_y + y;
if (aliasing && xx < src_tex->width && yy < src_tex->height) continue;
if (xx >= src_tex->width) xx = src_tex->width - 1;
if (yy >= src_tex->height) yy = src_tex->height - 1;
void* dst = &dst_slice->ptr[dst_slice->stride * y + bytes_per_pixel * x];
void* src = &src_tex->ptr[src_tex->stride * yy + bytes_per_pixel * xx];
memcpy(dst, src, bytes_per_pixel);
}
}
void CompressBlocksBC1(const rgba_surface* src, uint8_t* dst)
{
if (isAmd) {
ispc::CompressBlocksBC1_ispc_sse4((ispc::rgba_surface*)src, dst);
} else {
ispc::CompressBlocksBC1_ispc((ispc::rgba_surface*)src, dst);
}
}
void CompressBlocksBC3(const rgba_surface* src, uint8_t* dst)
{
if (isAmd) {
ispc::CompressBlocksBC3_ispc_sse4((ispc::rgba_surface*)src, dst);
} else {
ispc::CompressBlocksBC3_ispc((ispc::rgba_surface*)src, dst);
}
}
void CompressBlocksBC4(const rgba_surface* src, uint8_t* dst)
{
if (isAmd) {
ispc::CompressBlocksBC4_ispc_sse4((ispc::rgba_surface*)src, dst);
} else {
ispc::CompressBlocksBC4_ispc((ispc::rgba_surface*)src, dst);
}
}
void CompressBlocksBC5(const rgba_surface* src, uint8_t* dst)
{
if (isAmd) {
ispc::CompressBlocksBC5_ispc_sse4((ispc::rgba_surface*)src, dst);
} else {
ispc::CompressBlocksBC5_ispc((ispc::rgba_surface*)src, dst);
}
}
void CompressBlocksBC7(const rgba_surface* src, uint8_t* dst, bc7_enc_settings* settings)
{
if (isAmd) {
ispc::CompressBlocksBC7_ispc_sse4((ispc::rgba_surface*)src, dst, (ispc::bc7_enc_settings*)settings);
} else {
ispc::CompressBlocksBC7_ispc((ispc::rgba_surface*)src, dst, (ispc::bc7_enc_settings*)settings);
}
}
void CompressBlocksBC6H(const rgba_surface* src, uint8_t* dst, bc6h_enc_settings* settings)
{
if (isAmd) {
ispc::CompressBlocksBC6H_ispc_sse4((ispc::rgba_surface*)src, dst, (ispc::bc6h_enc_settings*)settings);
} else {
ispc::CompressBlocksBC6H_ispc((ispc::rgba_surface*)src, dst, (ispc::bc6h_enc_settings*)settings);
}
}
void CompressBlocksETC1(const rgba_surface* src, uint8_t* dst, etc_enc_settings* settings)
{
if (isAmd) {
ispc::CompressBlocksETC1_ispc_sse4((ispc::rgba_surface*)src, dst, (ispc::etc_enc_settings*)settings);
} else {
ispc::CompressBlocksETC1_ispc((ispc::rgba_surface*)src, dst, (ispc::etc_enc_settings*)settings);
}
}
int ISPCIsa()
{
if (isAmd) {
return ispc::ISPCIsa_ispc_sse4();
} else {
return ispc::ISPCIsa_ispc();
}
}