Browse Source

Use a C array for transparency LUT for debug perf

In a debug build, `std::array` accesses are function calls.

Timedemo on my machine:
* Before: 45.39 seconds	141.3 FPS
* After:  40.20 seconds	159.5 FPS
pull/6585/head
Gleb Mazovetskiy 3 years ago
parent
commit
b83637add8
  1. 3
      Source/engine.cpp
  2. 14
      Source/engine/palette.cpp
  3. 2
      Source/engine/palette.h
  4. 2
      Source/engine/render/blit_impl.hpp
  5. 2
      Source/utils/sdl_bilinear_scale.cpp
  6. 3
      Source/utils/sdl_bilinear_scale.hpp

3
Source/engine.cpp

@ -11,7 +11,6 @@
* - Video playback
*/
#include <array>
#include <cassert>
#include <cstdint>
#include <cstring>
@ -26,7 +25,7 @@ namespace {
void DrawHalfTransparentUnalignedBlendedRectTo(const Surface &out, unsigned sx, unsigned sy, unsigned width, unsigned height)
{
uint8_t *pix = out.at(static_cast<int>(sx), static_cast<int>(sy));
const std::array<uint8_t, 256> &lookupTable = paletteTransparencyLookup[0];
const uint8_t *const lookupTable = paletteTransparencyLookup[0];
const unsigned skipX = out.pitch() - width;
for (unsigned y = 0; y < height; ++y) {
for (unsigned x = 0; x < width; ++x, ++pix) {

14
Source/engine/palette.cpp

@ -24,7 +24,11 @@ namespace devilution {
std::array<SDL_Color, 256> logical_palette;
std::array<SDL_Color, 256> system_palette;
std::array<SDL_Color, 256> orig_palette;
std::array<std::array<Uint8, 256>, 256> paletteTransparencyLookup;
// This array is read from a lot on every frame.
// We do not use `std::array` here to improve debug build performance.
// In a debug build, `std::array` accesses are function calls.
Uint8 paletteTransparencyLookup[256][256];
#if DEVILUTIONX_PALETTE_TRANSPARENCY_BLACK_16_LUT
uint16_t paletteTransparencyLookupBlack16[65536];
@ -124,10 +128,10 @@ void CycleColors(int from, int to)
std::rotate(system_palette.begin() + from, system_palette.begin() + from + 1, system_palette.begin() + to + 1);
for (auto &palette : paletteTransparencyLookup) {
std::rotate(palette.begin() + from, palette.begin() + from + 1, palette.begin() + to + 1);
std::rotate(std::begin(palette) + from, std::begin(palette) + from + 1, std::begin(palette) + to + 1);
}
std::rotate(paletteTransparencyLookup.begin() + from, paletteTransparencyLookup.begin() + from + 1, paletteTransparencyLookup.begin() + to + 1);
std::rotate(&paletteTransparencyLookup[from][0], &paletteTransparencyLookup[from + 1][0], &paletteTransparencyLookup[to + 1][0]);
}
/**
@ -140,10 +144,10 @@ void CycleColorsReverse(int from, int to)
std::rotate(system_palette.begin() + from, system_palette.begin() + to, system_palette.begin() + to + 1);
for (auto &palette : paletteTransparencyLookup) {
std::rotate(palette.begin() + from, palette.begin() + to, palette.begin() + to + 1);
std::rotate(std::begin(palette) + from, std::begin(palette) + to, std::begin(palette) + to + 1);
}
std::rotate(paletteTransparencyLookup.begin() + from, paletteTransparencyLookup.begin() + to, paletteTransparencyLookup.begin() + to + 1);
std::rotate(&paletteTransparencyLookup[from][0], &paletteTransparencyLookup[to][0], &paletteTransparencyLookup[to + 1][0]);
}
} // namespace

2
Source/engine/palette.h

@ -36,7 +36,7 @@ extern std::array<SDL_Color, 256> logical_palette;
extern std::array<SDL_Color, 256> system_palette;
extern std::array<SDL_Color, 256> orig_palette;
/** Lookup table for transparency */
extern std::array<std::array<Uint8, 256>, 256> paletteTransparencyLookup;
extern Uint8 paletteTransparencyLookup[256][256];
#if DEVILUTIONX_PALETTE_TRANSPARENCY_BLACK_16_LUT
/**

2
Source/engine/render/blit_impl.hpp

@ -72,7 +72,7 @@ DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void BlitFillBlended(uint8_t *dst, unsigned
{
assert(length != 0);
const uint8_t *end = dst + length;
const std::array<uint8_t, 256> &tbl = paletteTransparencyLookup[color];
const uint8_t *const tbl = paletteTransparencyLookup[color];
while (dst + 3 < end) {
*dst = tbl[*dst];
++dst;

2
Source/utils/sdl_bilinear_scale.cpp

@ -148,7 +148,7 @@ void BilinearScale32(SDL_Surface *src, SDL_Surface *dst)
}
}
void BilinearDownscaleByHalf8(const SDL_Surface *src, const std::array<std::array<Uint8, 256>, 256> &paletteBlendingTable, SDL_Surface *dst, uint8_t transparentIndex)
void BilinearDownscaleByHalf8(const SDL_Surface *src, const Uint8 paletteBlendingTable[256][256], SDL_Surface *dst, uint8_t transparentIndex)
{
const auto *const srcPixelsBegin = static_cast<const uint8_t *>(src->pixels)
+ static_cast<size_t>(src->clip_rect.y * src->pitch + src->clip_rect.x);

3
Source/utils/sdl_bilinear_scale.hpp

@ -1,6 +1,5 @@
#pragma once
#include <array>
#include <cstdint>
#include <SDL_version.h>
@ -23,6 +22,6 @@ void BilinearScale32(SDL_Surface *src, SDL_Surface *dst);
* @brief Streamlined bilinear downscaling using blended transparency table.
* Requires `src` and `dst` to have the same pixel format (INDEX8).
*/
void BilinearDownscaleByHalf8(const SDL_Surface *src, const std::array<std::array<Uint8, 256>, 256> &paletteBlendingTable, SDL_Surface *dst, uint8_t transparentIndex);
void BilinearDownscaleByHalf8(const SDL_Surface *src, const Uint8 paletteBlendingTable[256][256], SDL_Surface *dst, uint8_t transparentIndex);
} // namespace devilution

Loading…
Cancel
Save