From b83637add8464cfaf3db7d79b616d7703c8bb049 Mon Sep 17 00:00:00 2001 From: Gleb Mazovetskiy Date: Sat, 9 Sep 2023 13:52:24 +0100 Subject: [PATCH] Use a C array for transparency LUT for debug perf In a debug build, `std::array` accesses are function calls. Timedemo on my machine: * Before: 45.39 seconds 141.3 FPS * After: 40.20 seconds 159.5 FPS --- Source/engine.cpp | 3 +-- Source/engine/palette.cpp | 14 +++++++++----- Source/engine/palette.h | 2 +- Source/engine/render/blit_impl.hpp | 2 +- Source/utils/sdl_bilinear_scale.cpp | 2 +- Source/utils/sdl_bilinear_scale.hpp | 3 +-- 6 files changed, 14 insertions(+), 12 deletions(-) diff --git a/Source/engine.cpp b/Source/engine.cpp index 909733cd8..8c553c814 100644 --- a/Source/engine.cpp +++ b/Source/engine.cpp @@ -11,7 +11,6 @@ * - Video playback */ -#include #include #include #include @@ -26,7 +25,7 @@ namespace { void DrawHalfTransparentUnalignedBlendedRectTo(const Surface &out, unsigned sx, unsigned sy, unsigned width, unsigned height) { uint8_t *pix = out.at(static_cast(sx), static_cast(sy)); - const std::array &lookupTable = paletteTransparencyLookup[0]; + const uint8_t *const lookupTable = paletteTransparencyLookup[0]; const unsigned skipX = out.pitch() - width; for (unsigned y = 0; y < height; ++y) { for (unsigned x = 0; x < width; ++x, ++pix) { diff --git a/Source/engine/palette.cpp b/Source/engine/palette.cpp index 9e1271095..43bc05c25 100644 --- a/Source/engine/palette.cpp +++ b/Source/engine/palette.cpp @@ -24,7 +24,11 @@ namespace devilution { std::array logical_palette; std::array system_palette; std::array orig_palette; -std::array, 256> paletteTransparencyLookup; + +// This array is read from a lot on every frame. +// We do not use `std::array` here to improve debug build performance. +// In a debug build, `std::array` accesses are function calls. +Uint8 paletteTransparencyLookup[256][256]; #if DEVILUTIONX_PALETTE_TRANSPARENCY_BLACK_16_LUT uint16_t paletteTransparencyLookupBlack16[65536]; @@ -124,10 +128,10 @@ void CycleColors(int from, int to) std::rotate(system_palette.begin() + from, system_palette.begin() + from + 1, system_palette.begin() + to + 1); for (auto &palette : paletteTransparencyLookup) { - std::rotate(palette.begin() + from, palette.begin() + from + 1, palette.begin() + to + 1); + std::rotate(std::begin(palette) + from, std::begin(palette) + from + 1, std::begin(palette) + to + 1); } - std::rotate(paletteTransparencyLookup.begin() + from, paletteTransparencyLookup.begin() + from + 1, paletteTransparencyLookup.begin() + to + 1); + std::rotate(&paletteTransparencyLookup[from][0], &paletteTransparencyLookup[from + 1][0], &paletteTransparencyLookup[to + 1][0]); } /** @@ -140,10 +144,10 @@ void CycleColorsReverse(int from, int to) std::rotate(system_palette.begin() + from, system_palette.begin() + to, system_palette.begin() + to + 1); for (auto &palette : paletteTransparencyLookup) { - std::rotate(palette.begin() + from, palette.begin() + to, palette.begin() + to + 1); + std::rotate(std::begin(palette) + from, std::begin(palette) + to, std::begin(palette) + to + 1); } - std::rotate(paletteTransparencyLookup.begin() + from, paletteTransparencyLookup.begin() + to, paletteTransparencyLookup.begin() + to + 1); + std::rotate(&paletteTransparencyLookup[from][0], &paletteTransparencyLookup[to][0], &paletteTransparencyLookup[to + 1][0]); } } // namespace diff --git a/Source/engine/palette.h b/Source/engine/palette.h index b1c33dc8b..7989f3af1 100644 --- a/Source/engine/palette.h +++ b/Source/engine/palette.h @@ -36,7 +36,7 @@ extern std::array logical_palette; extern std::array system_palette; extern std::array orig_palette; /** Lookup table for transparency */ -extern std::array, 256> paletteTransparencyLookup; +extern Uint8 paletteTransparencyLookup[256][256]; #if DEVILUTIONX_PALETTE_TRANSPARENCY_BLACK_16_LUT /** diff --git a/Source/engine/render/blit_impl.hpp b/Source/engine/render/blit_impl.hpp index 5066b7d51..c336ea0b1 100644 --- a/Source/engine/render/blit_impl.hpp +++ b/Source/engine/render/blit_impl.hpp @@ -72,7 +72,7 @@ DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void BlitFillBlended(uint8_t *dst, unsigned { assert(length != 0); const uint8_t *end = dst + length; - const std::array &tbl = paletteTransparencyLookup[color]; + const uint8_t *const tbl = paletteTransparencyLookup[color]; while (dst + 3 < end) { *dst = tbl[*dst]; ++dst; diff --git a/Source/utils/sdl_bilinear_scale.cpp b/Source/utils/sdl_bilinear_scale.cpp index 9d9b1a2f7..9164d7097 100644 --- a/Source/utils/sdl_bilinear_scale.cpp +++ b/Source/utils/sdl_bilinear_scale.cpp @@ -148,7 +148,7 @@ void BilinearScale32(SDL_Surface *src, SDL_Surface *dst) } } -void BilinearDownscaleByHalf8(const SDL_Surface *src, const std::array, 256> &paletteBlendingTable, SDL_Surface *dst, uint8_t transparentIndex) +void BilinearDownscaleByHalf8(const SDL_Surface *src, const Uint8 paletteBlendingTable[256][256], SDL_Surface *dst, uint8_t transparentIndex) { const auto *const srcPixelsBegin = static_cast(src->pixels) + static_cast(src->clip_rect.y * src->pitch + src->clip_rect.x); diff --git a/Source/utils/sdl_bilinear_scale.hpp b/Source/utils/sdl_bilinear_scale.hpp index 220d9ddcb..b35670363 100644 --- a/Source/utils/sdl_bilinear_scale.hpp +++ b/Source/utils/sdl_bilinear_scale.hpp @@ -1,6 +1,5 @@ #pragma once -#include #include #include @@ -23,6 +22,6 @@ void BilinearScale32(SDL_Surface *src, SDL_Surface *dst); * @brief Streamlined bilinear downscaling using blended transparency table. * Requires `src` and `dst` to have the same pixel format (INDEX8). */ -void BilinearDownscaleByHalf8(const SDL_Surface *src, const std::array, 256> &paletteBlendingTable, SDL_Surface *dst, uint8_t transparentIndex); +void BilinearDownscaleByHalf8(const SDL_Surface *src, const Uint8 paletteBlendingTable[256][256], SDL_Surface *dst, uint8_t transparentIndex); } // namespace devilution