Initial code commit

This commit is contained in:
leblane 2023-04-28 15:47:06 +03:00
parent 7485d9cbe1
commit 029dc67524
25 changed files with 21809 additions and 0 deletions

View file

@ -0,0 +1,557 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2016-2019, Intel Corporation
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////
#include "ispc_texcomp.h"
#include "kernel_ispc.h"
#include <memory.h> // memcpy
namespace ispc {
extern "C" {
extern int32_t ISPCIsa_ispc_sse4();
extern "C" void CompressBlocksBC1_ispc_sse4(const rgba_surface* src, uint8_t* dst);
extern "C" void CompressBlocksBC3_ispc_sse4(const rgba_surface* src, uint8_t* dst);
extern "C" void CompressBlocksBC4_ispc_sse4(const rgba_surface* src, uint8_t* dst);
extern "C" void CompressBlocksBC5_ispc_sse4(const rgba_surface* src, uint8_t* dst);
extern "C" void CompressBlocksBC6H_ispc_sse4(const rgba_surface* src, uint8_t* dst, bc6h_enc_settings* settings);
extern "C" void CompressBlocksBC7_ispc_sse4(const rgba_surface* src, uint8_t* dst, bc7_enc_settings* settings);
extern "C" void CompressBlocksETC1_ispc_sse4(const rgba_surface* src, uint8_t* dst, etc_enc_settings* settings);
extern "C" void CompressBlocksASTC_ispc_sse4(const rgba_surface* src, uint8_t* dst, astc_enc_settings* settings);
}
}
static bool isAmd = false;
void ISPCInit()
{
#if defined(_MSC_VER)
{
int CPUInfo[4];
__cpuid(CPUInfo, 0x80000001);
isAmd = (CPUInfo[2] & (1 << 6)) != 0;
}
#else
{
unsigned int eax = 0x80000001;
unsigned int ebx = 0;
unsigned int ecx = 0;
unsigned int edx = 0;
asm volatile("cpuid"
: "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
: "a"(eax));
isAmd = (ecx & (1 << 6)) != 0;
}
#endif
}
void GetProfile_ultrafast(bc7_enc_settings* settings)
{
settings->channels = 3;
// mode02
settings->mode_selection[0] = false;
settings->skip_mode2 = true;
settings->refineIterations[0] = 2;
settings->refineIterations[2] = 2;
// mode13
settings->mode_selection[1] = false;
settings->fastSkipTreshold_mode1 = 3;
settings->fastSkipTreshold_mode3 = 1;
settings->fastSkipTreshold_mode7 = 0;
settings->refineIterations[1] = 2;
settings->refineIterations[3] = 1;
// mode45
settings->mode_selection[2] = false;
settings->mode45_channel0 = 0;
settings->refineIterations_channel = 0;
settings->refineIterations[4] = 2;
settings->refineIterations[5] = 2;
// mode6
settings->mode_selection[3] = true;
settings->refineIterations[6] = 1;
}
void GetProfile_veryfast(bc7_enc_settings* settings)
{
settings->channels = 3;
// mode02
settings->mode_selection[0] = false;
settings->skip_mode2 = true;
settings->refineIterations[0] = 2;
settings->refineIterations[2] = 2;
// mode13
settings->mode_selection[1] = true;
settings->fastSkipTreshold_mode1 = 3;
settings->fastSkipTreshold_mode3 = 1;
settings->fastSkipTreshold_mode7 = 0;
settings->refineIterations[1] = 2;
settings->refineIterations[3] = 1;
// mode45
settings->mode_selection[2] = false;
settings->mode45_channel0 = 0;
settings->refineIterations_channel = 0;
settings->refineIterations[4] = 2;
settings->refineIterations[5] = 2;
// mode6
settings->mode_selection[3] = true;
settings->refineIterations[6] = 1;
}
void GetProfile_fast(bc7_enc_settings* settings)
{
settings->channels = 3;
// mode02
settings->mode_selection[0] = false;
settings->skip_mode2 = true;
settings->refineIterations[0] = 2;
settings->refineIterations[2] = 2;
// mode13
settings->mode_selection[1] = true;
settings->fastSkipTreshold_mode1 = 12;
settings->fastSkipTreshold_mode3 = 4;
settings->fastSkipTreshold_mode7 = 0;
settings->refineIterations[1] = 2;
settings->refineIterations[3] = 1;
// mode45
settings->mode_selection[2] = false;
settings->mode45_channel0 = 0;
settings->refineIterations_channel = 0;
settings->refineIterations[4] = 2;
settings->refineIterations[5] = 2;
// mode6
settings->mode_selection[3] = true;
settings->refineIterations[6] = 2;
}
void GetProfile_basic(bc7_enc_settings* settings)
{
settings->channels = 3;
// mode02
settings->mode_selection[0] = true;
settings->skip_mode2 = true;
settings->refineIterations[0] = 2;
settings->refineIterations[2] = 2;
// mode13
settings->mode_selection[1] = true;
settings->fastSkipTreshold_mode1 = 8+4;
settings->fastSkipTreshold_mode3 = 8;
settings->fastSkipTreshold_mode7 = 0;
settings->refineIterations[1] = 2;
settings->refineIterations[3] = 2;
// mode45
settings->mode_selection[2] = true;
settings->mode45_channel0 = 0;
settings->refineIterations_channel = 2;
settings->refineIterations[4] = 2;
settings->refineIterations[5] = 2;
// mode6
settings->mode_selection[3] = true;
settings->refineIterations[6] = 2;
}
void GetProfile_slow(bc7_enc_settings* settings)
{
settings->channels = 3;
int moreRefine = 2;
// mode02
settings->mode_selection[0] = true;
settings->skip_mode2 = false;
settings->refineIterations[0] = 2+moreRefine;
settings->refineIterations[2] = 2+moreRefine;
// mode13
settings->mode_selection[1] = true;
settings->fastSkipTreshold_mode1 = 64;
settings->fastSkipTreshold_mode3 = 64;
settings->fastSkipTreshold_mode7 = 0;
settings->refineIterations[1] = 2+moreRefine;
settings->refineIterations[3] = 2+moreRefine;
// mode45
settings->mode_selection[2] = true;
settings->mode45_channel0 = 0;
settings->refineIterations_channel = 2+moreRefine;
settings->refineIterations[4] = 2+moreRefine;
settings->refineIterations[5] = 2+moreRefine;
// mode6
settings->mode_selection[3] = true;
settings->refineIterations[6] = 2+moreRefine;
}
void GetProfile_alpha_ultrafast(bc7_enc_settings* settings)
{
settings->channels = 4;
// mode02
settings->mode_selection[0] = false;
settings->skip_mode2 = true;
settings->refineIterations[0] = 2;
settings->refineIterations[2] = 2;
// mode137
settings->mode_selection[1] = false;
settings->fastSkipTreshold_mode1 = 0;
settings->fastSkipTreshold_mode3 = 0;
settings->fastSkipTreshold_mode7 = 4;
settings->refineIterations[1] = 1;
settings->refineIterations[3] = 1;
settings->refineIterations[7] = 2;
// mode45
settings->mode_selection[2] = true;
settings->mode45_channel0 = 3;
settings->refineIterations_channel = 1;
settings->refineIterations[4] = 1;
settings->refineIterations[5] = 1;
// mode6
settings->mode_selection[3] = true;
settings->refineIterations[6] = 2;
}
void GetProfile_alpha_veryfast(bc7_enc_settings* settings)
{
settings->channels = 4;
// mode02
settings->mode_selection[0] = false;
settings->skip_mode2 = true;
settings->refineIterations[0] = 2;
settings->refineIterations[2] = 2;
// mode137
settings->mode_selection[1] = true;
settings->fastSkipTreshold_mode1 = 0;
settings->fastSkipTreshold_mode3 = 0;
settings->fastSkipTreshold_mode7 = 4;
settings->refineIterations[1] = 1;
settings->refineIterations[3] = 1;
settings->refineIterations[7] = 2;
// mode45
settings->mode_selection[2] = true;
settings->mode45_channel0 = 3;
settings->refineIterations_channel = 2;
settings->refineIterations[4] = 2;
settings->refineIterations[5] = 2;
// mode6
settings->mode_selection[3] = true;
settings->refineIterations[6] = 2;
}
void GetProfile_alpha_fast(bc7_enc_settings* settings)
{
settings->channels = 4;
// mode02
settings->mode_selection[0] = false;
settings->skip_mode2 = true;
settings->refineIterations[0] = 2;
settings->refineIterations[2] = 2;
// mode137
settings->mode_selection[1] = true;
settings->fastSkipTreshold_mode1 = 4;
settings->fastSkipTreshold_mode3 = 4;
settings->fastSkipTreshold_mode7 = 8;
settings->refineIterations[1] = 1;
settings->refineIterations[3] = 1;
settings->refineIterations[7] = 2;
// mode45
settings->mode_selection[2] = true;
settings->mode45_channel0 = 3;
settings->refineIterations_channel = 2;
settings->refineIterations[4] = 2;
settings->refineIterations[5] = 2;
// mode6
settings->mode_selection[3] = true;
settings->refineIterations[6] = 2;
}
void GetProfile_alpha_basic(bc7_enc_settings* settings)
{
settings->channels = 4;
// mode02
settings->mode_selection[0] = true;
settings->skip_mode2 = true;
settings->refineIterations[0] = 2;
settings->refineIterations[2] = 2;
// mode137
settings->mode_selection[1] = true;
settings->fastSkipTreshold_mode1 = 8+4;
settings->fastSkipTreshold_mode3 = 8;
settings->fastSkipTreshold_mode7 = 8;
settings->refineIterations[1] = 2;
settings->refineIterations[3] = 2;
settings->refineIterations[7] = 2;
// mode45
settings->mode_selection[2] = true;
settings->mode45_channel0 = 0;
settings->refineIterations_channel = 2;
settings->refineIterations[4] = 2;
settings->refineIterations[5] = 2;
// mode6
settings->mode_selection[3] = true;
settings->refineIterations[6] = 2;
}
void GetProfile_alpha_slow(bc7_enc_settings* settings)
{
settings->channels = 4;
int moreRefine = 2;
// mode02
settings->mode_selection[0] = true;
settings->skip_mode2 = false;
settings->refineIterations[0] = 2+moreRefine;
settings->refineIterations[2] = 2+moreRefine;
// mode137
settings->mode_selection[1] = true;
settings->fastSkipTreshold_mode1 = 64;
settings->fastSkipTreshold_mode3 = 64;
settings->fastSkipTreshold_mode7 = 64;
settings->refineIterations[1] = 2+moreRefine;
settings->refineIterations[3] = 2+moreRefine;
settings->refineIterations[7] = 2+moreRefine;
// mode45
settings->mode_selection[2] = true;
settings->mode45_channel0 = 0;
settings->refineIterations_channel = 2+moreRefine;
settings->refineIterations[4] = 2+moreRefine;
settings->refineIterations[5] = 2+moreRefine;
// mode6
settings->mode_selection[3] = true;
settings->refineIterations[6] = 2+moreRefine;
}
void GetProfile_bc6h_veryfast(bc6h_enc_settings* settings)
{
settings->slow_mode = false;
settings->fast_mode = true;
settings->fastSkipTreshold = 0;
settings->refineIterations_1p = 0;
settings->refineIterations_2p = 0;
}
void GetProfile_bc6h_fast(bc6h_enc_settings* settings)
{
settings->slow_mode = false;
settings->fast_mode = true;
settings->fastSkipTreshold = 2;
settings->refineIterations_1p = 0;
settings->refineIterations_2p = 1;
}
void GetProfile_bc6h_basic(bc6h_enc_settings* settings)
{
settings->slow_mode = false;
settings->fast_mode = false;
settings->fastSkipTreshold = 4;
settings->refineIterations_1p = 2;
settings->refineIterations_2p = 2;
}
void GetProfile_bc6h_slow(bc6h_enc_settings* settings)
{
settings->slow_mode = true;
settings->fast_mode = false;
settings->fastSkipTreshold = 10;
settings->refineIterations_1p = 2;
settings->refineIterations_2p = 2;
}
void GetProfile_bc6h_veryslow(bc6h_enc_settings* settings)
{
settings->slow_mode = true;
settings->fast_mode = false;
settings->fastSkipTreshold = 32;
settings->refineIterations_1p = 2;
settings->refineIterations_2p = 2;
}
void GetProfile_etc_slow(etc_enc_settings* settings)
{
settings->fastSkipTreshold = 6;
}
void ReplicateBorders(rgba_surface* dst_slice, const rgba_surface* src_tex, int start_x, int start_y, int bpp)
{
int bytes_per_pixel = bpp >> 3;
bool aliasing = false;
if (&src_tex->ptr[src_tex->stride * start_y + bytes_per_pixel * start_x] == dst_slice->ptr) aliasing = true;
for (int y = 0; y < dst_slice->height; y++)
for (int x = 0; x < dst_slice->width; x++)
{
int xx = start_x + x;
int yy = start_y + y;
if (aliasing && xx < src_tex->width && yy < src_tex->height) continue;
if (xx >= src_tex->width) xx = src_tex->width - 1;
if (yy >= src_tex->height) yy = src_tex->height - 1;
void* dst = &dst_slice->ptr[dst_slice->stride * y + bytes_per_pixel * x];
void* src = &src_tex->ptr[src_tex->stride * yy + bytes_per_pixel * xx];
memcpy(dst, src, bytes_per_pixel);
}
}
void CompressBlocksBC1(const rgba_surface* src, uint8_t* dst)
{
if (isAmd) {
ispc::CompressBlocksBC1_ispc_sse4((ispc::rgba_surface*)src, dst);
} else {
ispc::CompressBlocksBC1_ispc((ispc::rgba_surface*)src, dst);
}
}
void CompressBlocksBC3(const rgba_surface* src, uint8_t* dst)
{
if (isAmd) {
ispc::CompressBlocksBC3_ispc_sse4((ispc::rgba_surface*)src, dst);
} else {
ispc::CompressBlocksBC3_ispc((ispc::rgba_surface*)src, dst);
}
}
void CompressBlocksBC4(const rgba_surface* src, uint8_t* dst)
{
if (isAmd) {
ispc::CompressBlocksBC4_ispc_sse4((ispc::rgba_surface*)src, dst);
} else {
ispc::CompressBlocksBC4_ispc((ispc::rgba_surface*)src, dst);
}
}
void CompressBlocksBC5(const rgba_surface* src, uint8_t* dst)
{
if (isAmd) {
ispc::CompressBlocksBC5_ispc_sse4((ispc::rgba_surface*)src, dst);
} else {
ispc::CompressBlocksBC5_ispc((ispc::rgba_surface*)src, dst);
}
}
void CompressBlocksBC7(const rgba_surface* src, uint8_t* dst, bc7_enc_settings* settings)
{
if (isAmd) {
ispc::CompressBlocksBC7_ispc_sse4((ispc::rgba_surface*)src, dst, (ispc::bc7_enc_settings*)settings);
} else {
ispc::CompressBlocksBC7_ispc((ispc::rgba_surface*)src, dst, (ispc::bc7_enc_settings*)settings);
}
}
void CompressBlocksBC6H(const rgba_surface* src, uint8_t* dst, bc6h_enc_settings* settings)
{
if (isAmd) {
ispc::CompressBlocksBC6H_ispc_sse4((ispc::rgba_surface*)src, dst, (ispc::bc6h_enc_settings*)settings);
} else {
ispc::CompressBlocksBC6H_ispc((ispc::rgba_surface*)src, dst, (ispc::bc6h_enc_settings*)settings);
}
}
void CompressBlocksETC1(const rgba_surface* src, uint8_t* dst, etc_enc_settings* settings)
{
if (isAmd) {
ispc::CompressBlocksETC1_ispc_sse4((ispc::rgba_surface*)src, dst, (ispc::etc_enc_settings*)settings);
} else {
ispc::CompressBlocksETC1_ispc((ispc::rgba_surface*)src, dst, (ispc::etc_enc_settings*)settings);
}
}
int ISPCIsa()
{
if (isAmd) {
return ispc::ISPCIsa_ispc_sse4();
} else {
return ispc::ISPCIsa_ispc();
}
}

View file

@ -0,0 +1,30 @@
LIBRARY ispc_texcomp
EXPORTS
CompressBlocksBC1
CompressBlocksBC3
CompressBlocksBC4
CompressBlocksBC5
CompressBlocksBC6H
CompressBlocksBC7
CompressBlocksETC1
CompressBlocksASTC
GetProfile_ultrafast
GetProfile_veryfast
GetProfile_fast
GetProfile_basic
GetProfile_slow
GetProfile_alpha_ultrafast
GetProfile_alpha_veryfast
GetProfile_alpha_fast
GetProfile_alpha_basic
GetProfile_alpha_slow
GetProfile_bc6h_veryfast
GetProfile_bc6h_fast
GetProfile_bc6h_basic
GetProfile_bc6h_slow
GetProfile_bc6h_veryslow
GetProfile_etc_slow
GetProfile_astc_fast
GetProfile_astc_alpha_fast
GetProfile_astc_alpha_slow
ReplicateBorders

View file

@ -0,0 +1,128 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2016-2019, Intel Corporation
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////
#include <stdint.h>
struct rgba_surface
{
uint8_t* ptr;
int32_t width;
int32_t height;
int32_t stride; // in bytes
};
struct bc7_enc_settings
{
bool mode_selection[4];
int refineIterations[8];
bool skip_mode2;
int fastSkipTreshold_mode1;
int fastSkipTreshold_mode3;
int fastSkipTreshold_mode7;
int mode45_channel0;
int refineIterations_channel;
int channels;
};
struct bc6h_enc_settings
{
bool slow_mode;
bool fast_mode;
int refineIterations_1p;
int refineIterations_2p;
int fastSkipTreshold;
};
struct etc_enc_settings
{
int fastSkipTreshold;
};
struct astc_enc_settings
{
int block_width;
int block_height;
int channels;
int fastSkipTreshold;
int refineIterations;
};
// profiles for RGB data (alpha channel will be ignored)
extern "C" void GetProfile_ultrafast(bc7_enc_settings* settings);
extern "C" void GetProfile_veryfast(bc7_enc_settings* settings);
extern "C" void GetProfile_fast(bc7_enc_settings* settings);
extern "C" void GetProfile_basic(bc7_enc_settings* settings);
extern "C" void GetProfile_slow(bc7_enc_settings* settings);
// profiles for RGBA inputs
extern "C" void GetProfile_alpha_ultrafast(bc7_enc_settings* settings);
extern "C" void GetProfile_alpha_veryfast(bc7_enc_settings* settings);
extern "C" void GetProfile_alpha_fast(bc7_enc_settings* settings);
extern "C" void GetProfile_alpha_basic(bc7_enc_settings* settings);
extern "C" void GetProfile_alpha_slow(bc7_enc_settings* settings);
// profiles for BC6H (RGB HDR)
extern "C" void GetProfile_bc6h_veryfast(bc6h_enc_settings* settings);
extern "C" void GetProfile_bc6h_fast(bc6h_enc_settings* settings);
extern "C" void GetProfile_bc6h_basic(bc6h_enc_settings* settings);
extern "C" void GetProfile_bc6h_slow(bc6h_enc_settings* settings);
extern "C" void GetProfile_bc6h_veryslow(bc6h_enc_settings* settings);
// profiles for ETC
extern "C" void GetProfile_etc_slow(etc_enc_settings* settings);
// profiles for ASTC
extern "C" void GetProfile_astc_fast(astc_enc_settings* settings, int block_width, int block_height);
extern "C" void GetProfile_astc_alpha_fast(astc_enc_settings* settings, int block_width, int block_height);
extern "C" void GetProfile_astc_alpha_slow(astc_enc_settings* settings, int block_width, int block_height);
// helper function to replicate border pixels for the desired block sizes (bpp = 32 or 64)
extern "C" void ReplicateBorders(rgba_surface* dst_slice, const rgba_surface* src_tex, int x, int y, int bpp);
/*
Notes:
- input width and height need to be a multiple of block size
- LDR input is 32 bit/pixel (sRGB), HDR is 64 bit/pixel (half float)
- for BC4 input is 8bit/pixel (R8), for BC5 input is 16bit/pixel (RG8)
- dst buffer must be allocated with enough space for the compressed texture:
- 8 bytes/block for BC1/BC4/ETC1,
- 16 bytes/block for BC3/BC5/BC6H/BC7/ASTC
- the blocks are stored in raster scan order (natural CPU texture layout)
- use the GetProfile_* functions to select various speed/quality tradeoffs
- the RGB profiles are slightly faster as they ignore the alpha channel
*/
extern "C" void CompressBlocksBC1(const rgba_surface* src, uint8_t* dst);
extern "C" void CompressBlocksBC3(const rgba_surface* src, uint8_t* dst);
extern "C" void CompressBlocksBC4(const rgba_surface* src, uint8_t* dst);
extern "C" void CompressBlocksBC5(const rgba_surface* src, uint8_t* dst);
extern "C" void CompressBlocksBC6H(const rgba_surface* src, uint8_t* dst, bc6h_enc_settings* settings);
extern "C" void CompressBlocksBC7(const rgba_surface* src, uint8_t* dst, bc7_enc_settings* settings);
extern "C" void CompressBlocksETC1(const rgba_surface* src, uint8_t* dst, etc_enc_settings* settings);
extern "C" void CompressBlocksASTC(const rgba_surface* src, uint8_t* dst, astc_enc_settings* settings);
extern "C" void ISPCInit();
extern "C" int ISPCIsa();

View file

@ -0,0 +1,177 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{9B44F7B9-A9AF-45A4-8695-96792A18B052}</ProjectGuid>
<RootNamespace>ispc_texcomp</RootNamespace>
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v142</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v142</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v142</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v142</PlatformToolset>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ExecutablePath>$(SolutionDir);$(ExecutablePath)</ExecutablePath>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ExecutablePath>$(SolutionDir);$(ExecutablePath)</ExecutablePath>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ExecutablePath>$(SolutionDir);$(ExecutablePath)</ExecutablePath>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ExecutablePath>$(SolutionDir);$(ExecutablePath)</ExecutablePath>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<SubSystem>Windows</SubSystem>
<ModuleDefinitionFile>ispc_texcomp.def</ModuleDefinitionFile>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<ModuleDefinitionFile>ispc_texcomp.def</ModuleDefinitionFile>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<ModuleDefinitionFile>ispc_texcomp.def</ModuleDefinitionFile>
<ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<ModuleDefinitionFile>ispc_texcomp.def</ModuleDefinitionFile>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="ispc_texcomp.cpp" />
<ClCompile Include="ispc_texcomp_astc.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="ispc_texcomp.h" />
<ClInclude Include="kernel_astc_ispc.h" />
<ClInclude Include="kernel_astc_ispc_avx.h" />
<ClInclude Include="kernel_astc_ispc_avx2.h" />
<ClInclude Include="kernel_astc_ispc_sse2.h" />
<ClInclude Include="kernel_astc_ispc_sse4.h" />
<ClInclude Include="kernel_ispc.h" />
<ClInclude Include="kernel_ispc_avx.h" />
<ClInclude Include="kernel_ispc_avx2.h" />
<ClInclude Include="kernel_ispc_sse2.h" />
<ClInclude Include="kernel_ispc_sse4.h" />
</ItemGroup>
<ItemGroup>
<CustomBuild Include="kernel.ispc">
<FileType>Document</FileType>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">"$(ProjectDir)..\ISPC\win\ispc.exe" -O2 "%(Filename).ispc" -o "$(TargetDir)%(Filename).obj" -h "$(ProjectDir)%(Filename)_ispc.h" --arch=x86 --target=sse2,sse4,avx,avx2 --opt=fast-math</Command>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">"$(ProjectDir)..\ISPC\win\ispc.exe" -O2 "%(Filename).ispc" -o "$(TargetDir)%(Filename).obj" -h "$(ProjectDir)%(Filename)_ispc.h" --target=sse2,sse4,avx,avx2 --opt=fast-math</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_avx2.obj;</Outputs>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_avx2.obj;</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">"$(ProjectDir)..\ISPC\win\ispc.exe" -O2 "%(Filename).ispc" -o "$(TargetDir)%(Filename).obj" -h "$(ProjectDir)%(Filename)_ispc.h" --arch=x86 --target=sse2,sse4,avx,avx2 --opt=fast-math</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_avx2.obj;</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">"$(ProjectDir)..\ISPC\win\ispc.exe" -O2 "%(Filename).ispc" -o "$(TargetDir)%(Filename).obj" -h "$(ProjectDir)%(Filename)_ispc.h" --target=sse2,sse4,avx,avx2 --opt=fast-math</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_avx2.obj;</Outputs>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="kernel_astc.ispc">
<FileType>Document</FileType>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">"$(ProjectDir)..\ISPC\win\ispc.exe" -O2 "%(Filename).ispc" -o "$(TargetDir)%(Filename).obj" -h "$(ProjectDir)%(Filename)_ispc.h" --target=sse2,sse4,avx,avx2 --opt=fast-math</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_avx2.obj;</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">"$(ProjectDir)..\ISPC\win\ispc.exe" -O2 "%(Filename).ispc" -o "$(TargetDir)%(Filename).obj" -h "$(ProjectDir)%(Filename)_ispc.h" --target=avx --opt=fast-math</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(TargetDir)%(Filename).obj;</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">"$(ProjectDir)..\ISPC\win\ispc.exe" -O2 "%(Filename).ispc" -o "$(TargetDir)%(Filename).obj" -h "$(ProjectDir)%(Filename)_ispc.h" --arch=x86 --target=sse2,sse4,avx,avx2 --opt=fast-math</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_avx2.obj;</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">"$(ProjectDir)..\ISPC\win\ispc.exe" -O2 "%(Filename).ispc" -o "$(TargetDir)%(Filename).obj" -h "$(ProjectDir)%(Filename)_ispc.h" --arch=x86 --target=avx --opt=fast-math</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(TargetDir)%(Filename).obj;</Outputs>
</CustomBuild>
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

View file

@ -0,0 +1,62 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
</Filter>
<Filter Include="Generated Header Files">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="ispc_texcomp.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="ispc_texcomp_astc.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="kernel.ispc">
<Filter>Source Files</Filter>
</CustomBuild>
<CustomBuild Include="kernel_astc.ispc">
<Filter>Source Files</Filter>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<ClInclude Include="ispc_texcomp.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="kernel_astc_ispc_sse2.h">
<Filter>Generated Header Files</Filter>
</ClInclude>
<ClInclude Include="kernel_astc_ispc_sse4.h">
<Filter>Generated Header Files</Filter>
</ClInclude>
<ClInclude Include="kernel_ispc.h">
<Filter>Generated Header Files</Filter>
</ClInclude>
<ClInclude Include="kernel_ispc_avx.h">
<Filter>Generated Header Files</Filter>
</ClInclude>
<ClInclude Include="kernel_ispc_avx2.h">
<Filter>Generated Header Files</Filter>
</ClInclude>
<ClInclude Include="kernel_ispc_sse2.h">
<Filter>Generated Header Files</Filter>
</ClInclude>
<ClInclude Include="kernel_ispc_sse4.h">
<Filter>Generated Header Files</Filter>
</ClInclude>
<ClInclude Include="kernel_astc_ispc.h">
<Filter>Generated Header Files</Filter>
</ClInclude>
<ClInclude Include="kernel_astc_ispc_avx.h">
<Filter>Generated Header Files</Filter>
</ClInclude>
<ClInclude Include="kernel_astc_ispc_avx2.h">
<Filter>Generated Header Files</Filter>
</ClInclude>
</ItemGroup>
</Project>

View file

@ -0,0 +1,564 @@
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2016, Intel Corporation
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
// permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of
// the Software.
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#include "ispc_texcomp.h"
#include "kernel_astc_ispc.h"
#include <cassert>
#include <cstring>
#include <algorithm>
#include <vector>
#include <limits>
void GetProfile_astc_fast(astc_enc_settings* settings, int block_width, int block_height)
{
settings->block_width = block_width;
settings->block_height = block_height;
settings->channels = 3;
settings->fastSkipTreshold = 5;
settings->refineIterations = 2;
}
void GetProfile_astc_alpha_fast(astc_enc_settings* settings, int block_width, int block_height)
{
settings->block_width = block_width;
settings->block_height = block_height;
settings->channels = 4;
settings->fastSkipTreshold = 5;
settings->refineIterations = 2;
}
void GetProfile_astc_alpha_slow(astc_enc_settings* settings, int block_width, int block_height)
{
settings->block_width = block_width;
settings->block_height = block_height;
settings->channels = 4;
settings->fastSkipTreshold = 64;
settings->refineIterations = 2;
}
struct astc_block
{
int width;
int height;
uint8_t dual_plane;
int weight_range;
uint8_t weights[64];
int color_component_selector;
int partitions;
int partition_id;
int color_endpoint_pairs;
int channels;
int color_endpoint_modes[4];
int endpoint_range;
uint8_t endpoints[18];
};
bool can_store(int value, int bits)
{
if (value < 0) return false;
if (value >= 1 << bits) return false;
return true;
}
int pack_block_mode(astc_block* block)
{
int block_mode = 0;
int D = block->dual_plane;
int H = block->weight_range >= 6;
int DH = D * 2 + H;
int R = block->weight_range + 2 - ((H > 0) ? 6 : 0);
R = R / 2 + R % 2 * 4;
if (can_store(block->width - 4, 2) && can_store(block->height - 2, 2))
{
int B = block->width - 4;
int A = block->height - 2;
block_mode = (DH << 9) | (B << 7) | (A << 5) | ((R & 4) << 2) | (R & 3);
}
if (can_store(block->width - 8, 2) && can_store(block->height - 2, 2))
{
int B = block->width - 8;
int A = block->height - 2;
block_mode = (DH << 9) | (B << 7) | (A << 5) | ((R & 4) << 2) | 4 | (R & 3);
}
if (can_store(block->width - 2, 2) && can_store(block->height - 8, 2))
{
int A = block->width - 2;
int B = block->height - 8;
block_mode = (DH << 9) | (B << 7) | (A << 5) | ((R & 4) << 2) | 8 | (R & 3);
}
if (can_store(block->width - 2, 2) && can_store(block->height - 6, 1))
{
int A = block->width - 2;
int B = block->height - 6;
block_mode = (DH << 9) | (B << 7) | (A << 5) | ((R & 4) << 2) | 12 | (R & 3);
}
if (can_store(block->width - 2, 1) && can_store(block->height - 2, 2))
{
int B = block->width;
int A = block->height - 2;
block_mode = (DH << 9) | (B << 7) | (A << 5) | ((R & 4) << 2) | 12 | (R & 3);
}
if (DH == 0 && can_store(block->width - 6, 2) && can_store(block->height - 6, 2))
{
int A = block->width - 6;
int B = block->height - 6;
block_mode = (B << 9) | 256 | (A << 5) | (R << 2);
}
return block_mode;
}
int range_table[][3] =
{
//2^ 3^ 5^
{ 1, 0, 0 }, // 0..1
{ 0, 1, 0 }, // 0..2
{ 2, 0, 0 }, // 0..3
{ 0, 0, 1 }, // 0..4
{ 1, 1, 0 }, // 0..5
{ 3, 0, 0 }, // 0..7
{ 1, 0, 1 }, // 0..9
{ 2, 1, 0 }, // 0..11
{ 4, 0, 0 }, // 0..15
{ 2, 0, 1 }, // 0..19
{ 3, 1, 0 }, // 0..23
{ 5, 0, 0 }, // 0..31
{ 3, 0, 1 }, // 0..39
{ 4, 1, 0 }, // 0..47
{ 6, 0, 0 }, // 0..63
{ 4, 0, 1 }, // 0..79
{ 5, 1, 0 }, // 0..95
{ 7, 0, 0 }, // 0..127
{ 5, 0, 1 }, // 0..159
{ 6, 1, 0 }, // 0..191
{ 8, 0, 0 }, // 0..255
};
int get_levels(int range)
{
return (1 + 2 * range_table[range][1] + 4 * range_table[range][2]) << range_table[range][0];
}
int sequence_bits(int count, int range)
{
int bits = count * range_table[range][0];
bits += (count * range_table[range][1] * 8 + 4) / 5;
bits += (count * range_table[range][2] * 7 + 2) / 3;
return bits;
}
void set_bits(uint32_t data[4], int* pos, int bits, uint32_t value)
{
assert(bits <= 25);
uint32_t word = *(uint32_t*)(((uint8_t*)data) + *pos / 8);
uint32_t mask = (1 << bits) - 1;
word |= value << (*pos % 8);
*(uint32_t*)(((uint8_t*)data) + *pos / 8) = word;
*pos += bits;
}
uint32_t get_field(uint32_t input, int a, int b)
{
assert(a >= b);
return (input >> b) & ((1 << (a - b + 1)) - 1);
}
uint32_t get_bit(uint32_t input, int a)
{
return get_field(input, a, a);
}
void pack_five_trits(uint32_t data[4], int sequence[5], int* pos, int n)
{
int t[5];
int m[5];
for (int i = 0; i < 5; i++)
{
t[i] = sequence[i] >> n;
m[i] = sequence[i] - (t[i] << n);
}
int C;
if (t[1] == 2 && t[2] == 2)
{
C = 3 * 4 + t[0];
}
else if (t[2] == 2)
{
C = t[1] * 16 + t[0] * 4 + 3;
}
else
{
C = t[2] * 16 + t[1] * 4 + t[0];
}
int T;
if (t[3] == 2 && t[4] == 2)
{
T = get_field(C, 4, 2) * 32 + 7 * 4 + get_field(C, 1, 0);
}
else
{
T = get_field(C, 4, 0);
if (t[4] == 2)
{
T += t[3] * 128 + 3 * 32;
}
else
{
T += t[4] * 128 + t[3] * 32;
}
}
uint32_t pack1 = 0;
pack1 |= m[0];
pack1 |= get_field(T, 1, 0) << n;
pack1 |= m[1] << (2 + n);
uint32_t pack2 = 0;
pack2 |= get_field(T, 3, 2);
pack2 |= m[2] << 2;
pack2 |= get_field(T, 4, 4) << (2 + n);
pack2 |= m[3] << (3 + n);
pack2 |= get_field(T, 6, 5) << (3 + n * 2);
pack2 |= m[4] << (5 + n * 2);
pack2 |= get_field(T, 7, 7) << (5 + n * 3);
set_bits(data, pos, 2 + n * 2, pack1);
set_bits(data, pos, 6 + n * 3, pack2);
}
void pack_three_quint(uint32_t data[4], int sequence[3], int* pos, int n)
{
int q[3];
int m[3];
for (int i = 0; i < 3; i++)
{
q[i] = sequence[i] >> n;
m[i] = sequence[i] - (q[i] << n);
}
int Q;
if (q[0] == 4 && q[1] == 4)
{
Q = get_field(q[2], 1, 0) * 8 + 3 * 2 + get_bit(q[2], 2);
}
else
{
int C;
if (q[1] == 4)
{
C = (q[0] << 3) + 5;
}
else
{
C = (q[1] << 3) + q[0];
}
if (q[2] == 4)
{
Q = get_field(~C, 2, 1) * 32 + get_field(C, 4, 3) * 8 + 3 * 2 + get_bit(C, 0);
}
else
{
Q = q[2] * 32 + get_field(C, 4, 0);
}
}
uint32_t pack = 0;
pack |= m[0];
pack |= get_field(Q, 2, 0) << n;
pack |= m[1] << (3 + n);
pack |= get_field(Q, 4, 3) << (3 + n * 2);
pack |= m[2] << (5 + n * 2);
pack |= get_field(Q, 6, 5) << (5 + n * 3);
set_bits(data, pos, 7 + n * 3, pack);
}
void pack_integer_sequence(uint32_t output_data[4], uint8_t sequence[], int pos, int count, int range)
{
int n = range_table[range][0];
int bits = sequence_bits(count, range);
int pos0 = pos;
uint32_t data[5] = { 0 };
if (range_table[range][1] == 1)
{
for (int j = 0; j < (count + 4) / 5; j++)
{
int temp[5] = { 0 };
for (int i = 0; i < std::min(count - j * 5, 5); i++) temp[i] = sequence[j * 5 + i];
pack_five_trits(data, temp, &pos, n);
}
}
else if (range_table[range][2] == 1)
{
for (int j = 0; j < (count + 2) / 3; j++)
{
int temp[3] = { 0 };
for (int i = 0; i < std::min(count - j * 3, 3); i++) temp[i] = sequence[j * 3 + i];
pack_three_quint(data, temp, &pos, n);
}
}
else
{
for (int i = 0; i < count; i++)
{
set_bits(data, &pos, n, sequence[i]);
}
}
if (pos0 + bits < 96) data[3] = 0;
if (pos0 + bits < 64) data[2] = 0;
if (pos0 + bits < 32) data[1] = 0;
data[(pos0 + bits) / 32] &= (1 << ((pos0 + bits) % 32)) - 1;
for (int k = 0; k < 4; k++) output_data[k] |= data[k];
}
uint32_t reverse_bits_32(uint32_t input)
{
uint32_t t = input;
t = (t << 16) | (t >> 16);
t = ((t & 0x00FF00FF) << 8) | ((t & 0xFF00FF00) >> 8);
t = ((t & 0x0F0F0F0F) << 4) | ((t & 0xF0F0F0F0) >> 4);
t = ((t & 0x33333333) << 2) | ((t & 0xCCCCCCCC) >> 2);
t = ((t & 0x55555555) << 1) | ((t & 0xAAAAAAAA) >> 1);
return t;
}
void pack_block(uint32_t data[4], astc_block* block)
{
memset(data, 0, 16);
int pos = 0;
set_bits(data, &pos, 11, pack_block_mode(block));
int num_weights = block->width * block->height * (block->dual_plane ? 2 : 1);
int weight_bits = sequence_bits(num_weights, block->weight_range);
int extra_bits = 0;
assert(num_weights <= 64);
assert(24 <= weight_bits && weight_bits <= 96);
set_bits(data, &pos, 2, block->partitions - 1);
if (block->partitions > 1)
{
set_bits(data, &pos, 10, block->partition_id);
int min_cem = 16;
int max_cem = 0;
for (int j = 0; j < block->partitions; j++)
{
min_cem = std::min(min_cem, block->color_endpoint_modes[j]);
max_cem = std::max(max_cem, block->color_endpoint_modes[j]);
}
assert(max_cem / 4 <= min_cem / 4 + 1);
int CEM = block->color_endpoint_modes[0] << 2;
if (max_cem != min_cem)
{
CEM = std::min(3, min_cem / 4 + 1);
for (int j = 0; j < block->partitions; j++)
{
int c = block->color_endpoint_modes[j] / 4 - ((CEM & 3) - 1);
int m = block->color_endpoint_modes[j] % 4;
assert(c == 0 || c == 1);
CEM |= c << (2 + j);
CEM |= m << (2 + block->partitions + 2 * j);
}
extra_bits = 3 * block->partitions - 4;
int pos2 = 128 - weight_bits - extra_bits;
set_bits(data, &pos2, extra_bits, CEM >> 6);
}
set_bits(data, &pos, 6, CEM & 63);
}
else
{
set_bits(data, &pos, 4, block->color_endpoint_modes[0]);
}
if (block->dual_plane)
{
assert(block->partitions < 4);
extra_bits += 2;
int pos2 = 128 - weight_bits - extra_bits;
set_bits(data, &pos2, 2, block->color_component_selector);
}
int config_bits = pos + extra_bits;
int remaining_bits = 128 - config_bits - weight_bits;
int num_cem_pairs = 0;
for (int j = 0; j < block->partitions; j++) num_cem_pairs += 1 + block->color_endpoint_modes[j] / 4;
assert(num_cem_pairs <= 9);
int endpoint_range = -1;
for (int range = 20; range>0; range--)
{
int bits = sequence_bits(2 * num_cem_pairs, range);
if (bits <= remaining_bits)
{
endpoint_range = range;
break;
}
}
assert(endpoint_range >= 4);
assert(block->endpoint_range == endpoint_range);
pack_integer_sequence(data, block->endpoints, pos, 2 * num_cem_pairs, endpoint_range);
uint32_t rdata[4] = { 0, 0, 0, 0 };
pack_integer_sequence(rdata, block->weights, 0, num_weights, block->weight_range);
for (int i = 0; i < 4; i++) data[i] |= reverse_bits_32(rdata[3 - i]);
}
void atsc_rank(const rgba_surface* src, int xx, int yy, uint32_t* mode_buffer, astc_enc_settings* settings)
{
ispc::astc_rank_ispc((ispc::rgba_surface*)src, xx, yy, mode_buffer, (ispc::astc_enc_settings*)settings);
}
extern "C" void pack_block_c(uint32_t data[4], ispc::astc_block* block)
{
assert(sizeof(ispc::astc_block) == sizeof(astc_block));
pack_block(data, (astc_block*)block);
}
void setup_list_context(ispc::astc_enc_context* ctx, uint32_t packed_mode)
{
ctx->width = 2 + get_field(packed_mode, 15, 13); // 2..8 <= 2^3
ctx->height = 2 + get_field(packed_mode, 18, 16); // 2..8 <= 2^3
ctx->dual_plane = get_field(packed_mode, 19, 19); // 0 or 1
ctx->partitions = 1;
int color_endpoint_modes0 = get_field(packed_mode, 7, 6) * 2 + 6; // 6, 8, 10 or 12
ctx->color_endpoint_pairs = 1 + (color_endpoint_modes0 / 4);
ctx->channels = (color_endpoint_modes0 > 8) ? 4 : 3;
}
void astc_encode(const rgba_surface* src, float* block_scores, uint8_t* dst, uint64_t* list, astc_enc_settings* settings)
{
ispc::astc_enc_context list_context;
setup_list_context(&list_context, uint32_t(list[1] & 0xFFFFFFFF));
assert(sizeof(ispc::rgba_surface) == sizeof(rgba_surface));
assert(sizeof(ispc::astc_enc_settings) == sizeof(astc_enc_settings));
ispc::astc_encode_ispc((ispc::rgba_surface*)src, block_scores, dst, list, &list_context, (ispc::astc_enc_settings*)settings);
}
void CompressBlocksASTC(const rgba_surface* src, uint8_t* dst, astc_enc_settings* settings)
{
assert(src->height % settings->block_height == 0);
assert(src->width % settings->block_width == 0);
assert(settings->block_height <= 8);
assert(settings->block_width <= 8);
int tex_width = src->width / settings->block_width;
int programCount = ispc::get_programCount();
std::vector<float> block_scores(tex_width * src->height / settings->block_height);
for (int yy = 0; yy < src->height / settings->block_height; yy++)
for (int xx = 0; xx < tex_width; xx++)
{
block_scores[yy * tex_width + xx] = std::numeric_limits<float>::infinity();
}
int mode_list_size = 3334;
int list_size = programCount;
std::vector<uint64_t> mode_lists(list_size * mode_list_size);
std::vector<uint32_t> mode_buffer(programCount * settings->fastSkipTreshold);
for (int yy = 0; yy < src->height / settings->block_height; yy++)
for (int _x = 0; _x < (tex_width + programCount - 1) / programCount; _x++)
{
int xx = _x * programCount;
atsc_rank(src, xx, yy, mode_buffer.data(), settings);
for (int i = 0; i < settings->fastSkipTreshold; i++)
for (int k = 0; k < programCount; k++)
{
if (xx + k >= tex_width) continue;
uint32_t offset = (yy << 16) + (xx + k);
uint32_t mode = mode_buffer[programCount * i + k];
int mode_bin = mode >> 20;
uint64_t* mode_list = &mode_lists[list_size * mode_bin];
if (*mode_list < programCount - 1)
{
int index = int(mode_list[0] + 1);
mode_list[0] = index;
mode_list[index] = (uint64_t(offset) << 32) + mode;
}
else
{
mode_list[0] = (uint64_t(offset) << 32) + mode;
astc_encode(src, block_scores.data(), dst, mode_list, settings);
memset(mode_list, 0, list_size * sizeof(uint64_t));
}
}
}
for (int mode_bin = 0; mode_bin < mode_list_size; mode_bin++)
{
uint64_t* mode_list = &mode_lists[list_size * mode_bin];
if (mode_list[0] == 0) continue;
mode_list[0] = 0;
astc_encode(src, block_scores.data(), dst, mode_list, settings);
memset(mode_list, 0, list_size * sizeof(uint64_t));
}
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff