558 lines
No EOL
15 KiB
C++
558 lines
No EOL
15 KiB
C++
////////////////////////////////////////////////////////////////////////////////
|
|
// Copyright (c) 2016-2019, Intel Corporation
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
// of this software and associated documentation files (the "Software"), to
|
|
// deal in the Software without restriction, including without limitation the
|
|
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
// sell copies of the Software, and to permit persons to whom the Software is
|
|
// furnished to do so, subject to the following conditions:
|
|
//
|
|
// The above copyright notice and this permission notice shall be included in
|
|
// all copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
// IN THE SOFTWARE.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#if defined(_MSC_VER)
|
|
#include <intrin.h>
|
|
#else
|
|
#include <cpuid.h>
|
|
#endif
|
|
|
|
#include "ispc_texcomp.h"
|
|
#include "kernel_ispc.h"
|
|
#include <memory.h> // memcpy
|
|
|
|
namespace ispc {
|
|
extern "C" {
|
|
extern int32_t ISPCIsa_ispc_sse4();
|
|
extern "C" void CompressBlocksBC1_ispc_sse4(const rgba_surface* src, uint8_t* dst);
|
|
extern "C" void CompressBlocksBC3_ispc_sse4(const rgba_surface* src, uint8_t* dst);
|
|
extern "C" void CompressBlocksBC4_ispc_sse4(const rgba_surface* src, uint8_t* dst);
|
|
extern "C" void CompressBlocksBC5_ispc_sse4(const rgba_surface* src, uint8_t* dst);
|
|
extern "C" void CompressBlocksBC6H_ispc_sse4(const rgba_surface* src, uint8_t* dst, bc6h_enc_settings* settings);
|
|
extern "C" void CompressBlocksBC7_ispc_sse4(const rgba_surface* src, uint8_t* dst, bc7_enc_settings* settings);
|
|
extern "C" void CompressBlocksETC1_ispc_sse4(const rgba_surface* src, uint8_t* dst, etc_enc_settings* settings);
|
|
extern "C" void CompressBlocksASTC_ispc_sse4(const rgba_surface* src, uint8_t* dst, astc_enc_settings* settings);
|
|
}
|
|
}
|
|
|
|
static bool isAmd = false;
|
|
|
|
void ISPCInit()
|
|
{
|
|
#if defined(_MSC_VER)
|
|
{
|
|
int CPUInfo[4];
|
|
__cpuid(CPUInfo, 0x80000001);
|
|
isAmd = (CPUInfo[2] & (1 << 6)) != 0;
|
|
}
|
|
#else
|
|
{
|
|
unsigned int eax, ebx, ecx, edx;
|
|
__get_cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
|
|
isAmd = (ecx & (1 << 6)) != 0;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
void GetProfile_ultrafast(bc7_enc_settings* settings)
|
|
{
|
|
settings->channels = 3;
|
|
|
|
// mode02
|
|
settings->mode_selection[0] = false;
|
|
settings->skip_mode2 = true;
|
|
|
|
settings->refineIterations[0] = 2;
|
|
settings->refineIterations[2] = 2;
|
|
|
|
// mode13
|
|
settings->mode_selection[1] = false;
|
|
settings->fastSkipTreshold_mode1 = 3;
|
|
settings->fastSkipTreshold_mode3 = 1;
|
|
settings->fastSkipTreshold_mode7 = 0;
|
|
|
|
settings->refineIterations[1] = 2;
|
|
settings->refineIterations[3] = 1;
|
|
|
|
// mode45
|
|
settings->mode_selection[2] = false;
|
|
|
|
settings->mode45_channel0 = 0;
|
|
settings->refineIterations_channel = 0;
|
|
settings->refineIterations[4] = 2;
|
|
settings->refineIterations[5] = 2;
|
|
|
|
// mode6
|
|
settings->mode_selection[3] = true;
|
|
|
|
settings->refineIterations[6] = 1;
|
|
}
|
|
|
|
void GetProfile_veryfast(bc7_enc_settings* settings)
|
|
{
|
|
settings->channels = 3;
|
|
|
|
// mode02
|
|
settings->mode_selection[0] = false;
|
|
settings->skip_mode2 = true;
|
|
|
|
settings->refineIterations[0] = 2;
|
|
settings->refineIterations[2] = 2;
|
|
|
|
// mode13
|
|
settings->mode_selection[1] = true;
|
|
settings->fastSkipTreshold_mode1 = 3;
|
|
settings->fastSkipTreshold_mode3 = 1;
|
|
settings->fastSkipTreshold_mode7 = 0;
|
|
|
|
settings->refineIterations[1] = 2;
|
|
settings->refineIterations[3] = 1;
|
|
|
|
// mode45
|
|
settings->mode_selection[2] = false;
|
|
|
|
settings->mode45_channel0 = 0;
|
|
settings->refineIterations_channel = 0;
|
|
settings->refineIterations[4] = 2;
|
|
settings->refineIterations[5] = 2;
|
|
|
|
// mode6
|
|
settings->mode_selection[3] = true;
|
|
|
|
settings->refineIterations[6] = 1;
|
|
}
|
|
|
|
void GetProfile_fast(bc7_enc_settings* settings)
|
|
{
|
|
settings->channels = 3;
|
|
|
|
// mode02
|
|
settings->mode_selection[0] = false;
|
|
settings->skip_mode2 = true;
|
|
|
|
settings->refineIterations[0] = 2;
|
|
settings->refineIterations[2] = 2;
|
|
|
|
// mode13
|
|
settings->mode_selection[1] = true;
|
|
settings->fastSkipTreshold_mode1 = 12;
|
|
settings->fastSkipTreshold_mode3 = 4;
|
|
settings->fastSkipTreshold_mode7 = 0;
|
|
|
|
settings->refineIterations[1] = 2;
|
|
settings->refineIterations[3] = 1;
|
|
|
|
// mode45
|
|
settings->mode_selection[2] = false;
|
|
|
|
settings->mode45_channel0 = 0;
|
|
settings->refineIterations_channel = 0;
|
|
settings->refineIterations[4] = 2;
|
|
settings->refineIterations[5] = 2;
|
|
|
|
// mode6
|
|
settings->mode_selection[3] = true;
|
|
|
|
settings->refineIterations[6] = 2;
|
|
}
|
|
|
|
void GetProfile_basic(bc7_enc_settings* settings)
|
|
{
|
|
settings->channels = 3;
|
|
|
|
// mode02
|
|
settings->mode_selection[0] = true;
|
|
settings->skip_mode2 = true;
|
|
|
|
settings->refineIterations[0] = 2;
|
|
settings->refineIterations[2] = 2;
|
|
|
|
// mode13
|
|
settings->mode_selection[1] = true;
|
|
settings->fastSkipTreshold_mode1 = 8+4;
|
|
settings->fastSkipTreshold_mode3 = 8;
|
|
settings->fastSkipTreshold_mode7 = 0;
|
|
|
|
settings->refineIterations[1] = 2;
|
|
settings->refineIterations[3] = 2;
|
|
|
|
// mode45
|
|
settings->mode_selection[2] = true;
|
|
|
|
settings->mode45_channel0 = 0;
|
|
settings->refineIterations_channel = 2;
|
|
settings->refineIterations[4] = 2;
|
|
settings->refineIterations[5] = 2;
|
|
|
|
// mode6
|
|
settings->mode_selection[3] = true;
|
|
|
|
settings->refineIterations[6] = 2;
|
|
}
|
|
|
|
void GetProfile_slow(bc7_enc_settings* settings)
|
|
{
|
|
settings->channels = 3;
|
|
|
|
int moreRefine = 2;
|
|
// mode02
|
|
settings->mode_selection[0] = true;
|
|
settings->skip_mode2 = false;
|
|
|
|
settings->refineIterations[0] = 2+moreRefine;
|
|
settings->refineIterations[2] = 2+moreRefine;
|
|
|
|
// mode13
|
|
settings->mode_selection[1] = true;
|
|
settings->fastSkipTreshold_mode1 = 64;
|
|
settings->fastSkipTreshold_mode3 = 64;
|
|
settings->fastSkipTreshold_mode7 = 0;
|
|
|
|
settings->refineIterations[1] = 2+moreRefine;
|
|
settings->refineIterations[3] = 2+moreRefine;
|
|
|
|
// mode45
|
|
settings->mode_selection[2] = true;
|
|
|
|
settings->mode45_channel0 = 0;
|
|
settings->refineIterations_channel = 2+moreRefine;
|
|
settings->refineIterations[4] = 2+moreRefine;
|
|
settings->refineIterations[5] = 2+moreRefine;
|
|
|
|
// mode6
|
|
settings->mode_selection[3] = true;
|
|
|
|
settings->refineIterations[6] = 2+moreRefine;
|
|
}
|
|
|
|
void GetProfile_alpha_ultrafast(bc7_enc_settings* settings)
|
|
{
|
|
settings->channels = 4;
|
|
|
|
// mode02
|
|
settings->mode_selection[0] = false;
|
|
settings->skip_mode2 = true;
|
|
|
|
settings->refineIterations[0] = 2;
|
|
settings->refineIterations[2] = 2;
|
|
|
|
// mode137
|
|
settings->mode_selection[1] = false;
|
|
settings->fastSkipTreshold_mode1 = 0;
|
|
settings->fastSkipTreshold_mode3 = 0;
|
|
settings->fastSkipTreshold_mode7 = 4;
|
|
|
|
settings->refineIterations[1] = 1;
|
|
settings->refineIterations[3] = 1;
|
|
settings->refineIterations[7] = 2;
|
|
|
|
// mode45
|
|
settings->mode_selection[2] = true;
|
|
|
|
settings->mode45_channel0 = 3;
|
|
settings->refineIterations_channel = 1;
|
|
settings->refineIterations[4] = 1;
|
|
settings->refineIterations[5] = 1;
|
|
|
|
// mode6
|
|
settings->mode_selection[3] = true;
|
|
|
|
settings->refineIterations[6] = 2;
|
|
}
|
|
|
|
void GetProfile_alpha_veryfast(bc7_enc_settings* settings)
|
|
{
|
|
settings->channels = 4;
|
|
|
|
// mode02
|
|
settings->mode_selection[0] = false;
|
|
settings->skip_mode2 = true;
|
|
|
|
settings->refineIterations[0] = 2;
|
|
settings->refineIterations[2] = 2;
|
|
|
|
// mode137
|
|
settings->mode_selection[1] = true;
|
|
settings->fastSkipTreshold_mode1 = 0;
|
|
settings->fastSkipTreshold_mode3 = 0;
|
|
settings->fastSkipTreshold_mode7 = 4;
|
|
|
|
settings->refineIterations[1] = 1;
|
|
settings->refineIterations[3] = 1;
|
|
settings->refineIterations[7] = 2;
|
|
|
|
// mode45
|
|
settings->mode_selection[2] = true;
|
|
|
|
settings->mode45_channel0 = 3;
|
|
settings->refineIterations_channel = 2;
|
|
settings->refineIterations[4] = 2;
|
|
settings->refineIterations[5] = 2;
|
|
|
|
// mode6
|
|
settings->mode_selection[3] = true;
|
|
|
|
settings->refineIterations[6] = 2;
|
|
}
|
|
|
|
void GetProfile_alpha_fast(bc7_enc_settings* settings)
|
|
{
|
|
settings->channels = 4;
|
|
|
|
// mode02
|
|
settings->mode_selection[0] = false;
|
|
settings->skip_mode2 = true;
|
|
|
|
settings->refineIterations[0] = 2;
|
|
settings->refineIterations[2] = 2;
|
|
|
|
// mode137
|
|
settings->mode_selection[1] = true;
|
|
settings->fastSkipTreshold_mode1 = 4;
|
|
settings->fastSkipTreshold_mode3 = 4;
|
|
settings->fastSkipTreshold_mode7 = 8;
|
|
|
|
settings->refineIterations[1] = 1;
|
|
settings->refineIterations[3] = 1;
|
|
settings->refineIterations[7] = 2;
|
|
|
|
// mode45
|
|
settings->mode_selection[2] = true;
|
|
|
|
settings->mode45_channel0 = 3;
|
|
settings->refineIterations_channel = 2;
|
|
settings->refineIterations[4] = 2;
|
|
settings->refineIterations[5] = 2;
|
|
|
|
// mode6
|
|
settings->mode_selection[3] = true;
|
|
|
|
settings->refineIterations[6] = 2;
|
|
}
|
|
|
|
void GetProfile_alpha_basic(bc7_enc_settings* settings)
|
|
{
|
|
settings->channels = 4;
|
|
|
|
// mode02
|
|
settings->mode_selection[0] = true;
|
|
settings->skip_mode2 = true;
|
|
|
|
settings->refineIterations[0] = 2;
|
|
settings->refineIterations[2] = 2;
|
|
|
|
// mode137
|
|
settings->mode_selection[1] = true;
|
|
settings->fastSkipTreshold_mode1 = 8+4;
|
|
settings->fastSkipTreshold_mode3 = 8;
|
|
settings->fastSkipTreshold_mode7 = 8;
|
|
|
|
settings->refineIterations[1] = 2;
|
|
settings->refineIterations[3] = 2;
|
|
settings->refineIterations[7] = 2;
|
|
|
|
// mode45
|
|
settings->mode_selection[2] = true;
|
|
|
|
settings->mode45_channel0 = 0;
|
|
settings->refineIterations_channel = 2;
|
|
settings->refineIterations[4] = 2;
|
|
settings->refineIterations[5] = 2;
|
|
|
|
// mode6
|
|
settings->mode_selection[3] = true;
|
|
|
|
settings->refineIterations[6] = 2;
|
|
}
|
|
|
|
void GetProfile_alpha_slow(bc7_enc_settings* settings)
|
|
{
|
|
settings->channels = 4;
|
|
|
|
int moreRefine = 2;
|
|
// mode02
|
|
settings->mode_selection[0] = true;
|
|
settings->skip_mode2 = false;
|
|
|
|
settings->refineIterations[0] = 2+moreRefine;
|
|
settings->refineIterations[2] = 2+moreRefine;
|
|
|
|
// mode137
|
|
settings->mode_selection[1] = true;
|
|
settings->fastSkipTreshold_mode1 = 64;
|
|
settings->fastSkipTreshold_mode3 = 64;
|
|
settings->fastSkipTreshold_mode7 = 64;
|
|
|
|
settings->refineIterations[1] = 2+moreRefine;
|
|
settings->refineIterations[3] = 2+moreRefine;
|
|
settings->refineIterations[7] = 2+moreRefine;
|
|
|
|
// mode45
|
|
settings->mode_selection[2] = true;
|
|
|
|
settings->mode45_channel0 = 0;
|
|
settings->refineIterations_channel = 2+moreRefine;
|
|
settings->refineIterations[4] = 2+moreRefine;
|
|
settings->refineIterations[5] = 2+moreRefine;
|
|
|
|
// mode6
|
|
settings->mode_selection[3] = true;
|
|
|
|
settings->refineIterations[6] = 2+moreRefine;
|
|
}
|
|
|
|
void GetProfile_bc6h_veryfast(bc6h_enc_settings* settings)
|
|
{
|
|
settings->slow_mode = false;
|
|
settings->fast_mode = true;
|
|
settings->fastSkipTreshold = 0;
|
|
settings->refineIterations_1p = 0;
|
|
settings->refineIterations_2p = 0;
|
|
}
|
|
|
|
void GetProfile_bc6h_fast(bc6h_enc_settings* settings)
|
|
{
|
|
settings->slow_mode = false;
|
|
settings->fast_mode = true;
|
|
settings->fastSkipTreshold = 2;
|
|
settings->refineIterations_1p = 0;
|
|
settings->refineIterations_2p = 1;
|
|
}
|
|
|
|
void GetProfile_bc6h_basic(bc6h_enc_settings* settings)
|
|
{
|
|
settings->slow_mode = false;
|
|
settings->fast_mode = false;
|
|
settings->fastSkipTreshold = 4;
|
|
settings->refineIterations_1p = 2;
|
|
settings->refineIterations_2p = 2;
|
|
}
|
|
|
|
void GetProfile_bc6h_slow(bc6h_enc_settings* settings)
|
|
{
|
|
settings->slow_mode = true;
|
|
settings->fast_mode = false;
|
|
settings->fastSkipTreshold = 10;
|
|
settings->refineIterations_1p = 2;
|
|
settings->refineIterations_2p = 2;
|
|
}
|
|
|
|
void GetProfile_bc6h_veryslow(bc6h_enc_settings* settings)
|
|
{
|
|
settings->slow_mode = true;
|
|
settings->fast_mode = false;
|
|
settings->fastSkipTreshold = 32;
|
|
settings->refineIterations_1p = 2;
|
|
settings->refineIterations_2p = 2;
|
|
}
|
|
|
|
void GetProfile_etc_slow(etc_enc_settings* settings)
|
|
{
|
|
settings->fastSkipTreshold = 6;
|
|
}
|
|
|
|
void ReplicateBorders(rgba_surface* dst_slice, const rgba_surface* src_tex, int start_x, int start_y, int bpp)
|
|
{
|
|
int bytes_per_pixel = bpp >> 3;
|
|
|
|
bool aliasing = false;
|
|
if (&src_tex->ptr[src_tex->stride * start_y + bytes_per_pixel * start_x] == dst_slice->ptr) aliasing = true;
|
|
|
|
for (int y = 0; y < dst_slice->height; y++)
|
|
for (int x = 0; x < dst_slice->width; x++)
|
|
{
|
|
int xx = start_x + x;
|
|
int yy = start_y + y;
|
|
|
|
if (aliasing && xx < src_tex->width && yy < src_tex->height) continue;
|
|
|
|
if (xx >= src_tex->width) xx = src_tex->width - 1;
|
|
if (yy >= src_tex->height) yy = src_tex->height - 1;
|
|
|
|
void* dst = &dst_slice->ptr[dst_slice->stride * y + bytes_per_pixel * x];
|
|
void* src = &src_tex->ptr[src_tex->stride * yy + bytes_per_pixel * xx];
|
|
|
|
memcpy(dst, src, bytes_per_pixel);
|
|
}
|
|
}
|
|
|
|
void CompressBlocksBC1(const rgba_surface* src, uint8_t* dst)
|
|
{
|
|
if (isAmd) {
|
|
ispc::CompressBlocksBC1_ispc_sse4((ispc::rgba_surface*)src, dst);
|
|
} else {
|
|
ispc::CompressBlocksBC1_ispc((ispc::rgba_surface*)src, dst);
|
|
}
|
|
}
|
|
|
|
void CompressBlocksBC3(const rgba_surface* src, uint8_t* dst)
|
|
{
|
|
if (isAmd) {
|
|
ispc::CompressBlocksBC3_ispc_sse4((ispc::rgba_surface*)src, dst);
|
|
} else {
|
|
ispc::CompressBlocksBC3_ispc((ispc::rgba_surface*)src, dst);
|
|
}
|
|
}
|
|
|
|
void CompressBlocksBC4(const rgba_surface* src, uint8_t* dst)
|
|
{
|
|
if (isAmd) {
|
|
ispc::CompressBlocksBC4_ispc_sse4((ispc::rgba_surface*)src, dst);
|
|
} else {
|
|
ispc::CompressBlocksBC4_ispc((ispc::rgba_surface*)src, dst);
|
|
}
|
|
}
|
|
|
|
void CompressBlocksBC5(const rgba_surface* src, uint8_t* dst)
|
|
{
|
|
if (isAmd) {
|
|
ispc::CompressBlocksBC5_ispc_sse4((ispc::rgba_surface*)src, dst);
|
|
} else {
|
|
ispc::CompressBlocksBC5_ispc((ispc::rgba_surface*)src, dst);
|
|
}
|
|
}
|
|
|
|
void CompressBlocksBC7(const rgba_surface* src, uint8_t* dst, bc7_enc_settings* settings)
|
|
{
|
|
if (isAmd) {
|
|
ispc::CompressBlocksBC7_ispc_sse4((ispc::rgba_surface*)src, dst, (ispc::bc7_enc_settings*)settings);
|
|
} else {
|
|
ispc::CompressBlocksBC7_ispc((ispc::rgba_surface*)src, dst, (ispc::bc7_enc_settings*)settings);
|
|
}
|
|
}
|
|
|
|
void CompressBlocksBC6H(const rgba_surface* src, uint8_t* dst, bc6h_enc_settings* settings)
|
|
{
|
|
if (isAmd) {
|
|
ispc::CompressBlocksBC6H_ispc_sse4((ispc::rgba_surface*)src, dst, (ispc::bc6h_enc_settings*)settings);
|
|
} else {
|
|
ispc::CompressBlocksBC6H_ispc((ispc::rgba_surface*)src, dst, (ispc::bc6h_enc_settings*)settings);
|
|
}
|
|
}
|
|
|
|
void CompressBlocksETC1(const rgba_surface* src, uint8_t* dst, etc_enc_settings* settings)
|
|
{
|
|
if (isAmd) {
|
|
ispc::CompressBlocksETC1_ispc_sse4((ispc::rgba_surface*)src, dst, (ispc::etc_enc_settings*)settings);
|
|
} else {
|
|
ispc::CompressBlocksETC1_ispc((ispc::rgba_surface*)src, dst, (ispc::etc_enc_settings*)settings);
|
|
}
|
|
}
|
|
|
|
int ISPCIsa()
|
|
{
|
|
if (isAmd) {
|
|
return ispc::ISPCIsa_ispc_sse4();
|
|
} else {
|
|
return ispc::ISPCIsa_ispc();
|
|
}
|
|
} |