From 397529bf0f34c7f6e3bbd7dfa7917b1908a1d1af Mon Sep 17 00:00:00 2001 From: Gleb Mazovetskiy Date: Tue, 21 Mar 2023 03:16:56 +0000 Subject: [PATCH] Rendering: Unify and optimize pixel blitters 1. Unifies the underlying CLX and dun_render blitters. 2. Optimizes them by unrolling loops and using pointer comparison rather than length comparison (saves a length decrement). 3. In `dun_render`, extracts `RenderLineTransparent/Opaque` branches into functions via explicit template specialization. Example RG-99 FPS (non-PGO'd): 17.4->18.4 --- Source/engine/render/blit_impl.hpp | 85 +++++++++++++++++++++++------ Source/engine/render/dun_render.cpp | 76 +++++++++++++++----------- 2 files changed, 110 insertions(+), 51 deletions(-) diff --git a/Source/engine/render/blit_impl.hpp b/Source/engine/render/blit_impl.hpp index a8a8535ac..08d9689a7 100644 --- a/Source/engine/render/blit_impl.hpp +++ b/Source/engine/render/blit_impl.hpp @@ -26,13 +26,13 @@ DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void BlitFillDirect(uint8_t *dst, unsigned l std::memset(dst, color, length); } -DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void BlitPixelsDirect(uint8_t *dst, const uint8_t *src, unsigned length) +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void BlitPixelsDirect(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, unsigned length) { std::memcpy(dst, src, length); } struct BlitDirect { - DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void operator()(BlitCommand cmd, uint8_t *dst, const uint8_t *src) + DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void operator()(BlitCommand cmd, uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src) { switch (cmd.type) { case BlitType::Fill: @@ -47,21 +47,68 @@ struct BlitDirect { } }; -DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void BlitFillWithMap(uint8_t *dst, unsigned length, uint8_t color, const uint8_t *colorMap) +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void BlitFillWithMap(uint8_t *dst, unsigned length, uint8_t color, const uint8_t *DVL_RESTRICT colorMap) { std::memset(dst, colorMap[color], length); } -DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void BlitPixelsWithMap(uint8_t *dst, const uint8_t *src, unsigned length, const uint8_t *colorMap) +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void BlitPixelsWithMap(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, unsigned length, const uint8_t *DVL_RESTRICT colorMap) { - while (length-- > 0) + const uint8_t *end = src + length; + while (src < end - 3) { *dst++ = colorMap[*src++]; + *dst++ = colorMap[*src++]; + *dst++ = colorMap[*src++]; + *dst++ = colorMap[*src++]; + } + while (src < end) { + *dst++ = colorMap[*src++]; + } +} + +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void BlitFillBlended(uint8_t *dst, unsigned length, uint8_t color) +{ + const uint8_t *end = dst + length; + const uint8_t *tbl = paletteTransparencyLookup[color]; + while (dst < end - 3) { + *dst = tbl[*dst]; + ++dst; + *dst = tbl[*dst]; + ++dst; + *dst = tbl[*dst]; + ++dst; + *dst = tbl[*dst]; + ++dst; + } + while (dst < end) { + *dst = tbl[*dst]; + ++dst; + } +} + +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void BlitPixelsBlended(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, unsigned length) +{ + const uint8_t *end = src + length; + while (src < end - 3) { + *dst = paletteTransparencyLookup[*dst][*src++]; + ++dst; + *dst = paletteTransparencyLookup[*dst][*src++]; + ++dst; + *dst = paletteTransparencyLookup[*dst][*src++]; + ++dst; + *dst = paletteTransparencyLookup[*dst][*src++]; + ++dst; + } + while (src < end) { + *dst = paletteTransparencyLookup[*dst][*src++]; + ++dst; + } } struct BlitWithMap { - const uint8_t *colorMap; + const uint8_t *DVL_RESTRICT colorMap; - DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void operator()(BlitCommand cmd, uint8_t *dst, const uint8_t *src) const + DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void operator()(BlitCommand cmd, uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src) const { switch (cmd.type) { case BlitType::Fill: @@ -76,18 +123,20 @@ struct BlitWithMap { } }; -DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void BlitFillBlendedWithMap(uint8_t *dst, unsigned length, uint8_t color, const uint8_t *colorMap) +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void BlitPixelsBlendedWithMap(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, unsigned length, const uint8_t *DVL_RESTRICT colorMap) { - color = colorMap[color]; - while (length-- > 0) { - *dst = paletteTransparencyLookup[*dst][color]; + const uint8_t *end = src + length; + while (src < end - 3) { + *dst = paletteTransparencyLookup[*dst][colorMap[*src++]]; + ++dst; + *dst = paletteTransparencyLookup[*dst][colorMap[*src++]]; + ++dst; + *dst = paletteTransparencyLookup[*dst][colorMap[*src++]]; + ++dst; + *dst = paletteTransparencyLookup[*dst][colorMap[*src++]]; ++dst; } -} - -DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void BlitPixelsBlendedWithMap(uint8_t *dst, const uint8_t *src, unsigned length, const uint8_t *colorMap) -{ - while (length-- > 0) { + while (src < end) { *dst = paletteTransparencyLookup[*dst][colorMap[*src++]]; ++dst; } @@ -96,11 +145,11 @@ DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void BlitPixelsBlendedWithMap(uint8_t *dst, struct BlitBlendedWithMap { const uint8_t *colorMap; - DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void operator()(BlitCommand cmd, uint8_t *dst, const uint8_t *src) const + DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void operator()(BlitCommand cmd, uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src) const { switch (cmd.type) { case BlitType::Fill: - BlitFillBlendedWithMap(dst, cmd.length, cmd.color, colorMap); + BlitFillBlended(dst, cmd.length, colorMap[cmd.color]); return; case BlitType::Pixels: BlitPixelsBlendedWithMap(dst, src, cmd.length, colorMap); diff --git a/Source/engine/render/dun_render.cpp b/Source/engine/render/dun_render.cpp index 22ea46a7b..f30c96720 100644 --- a/Source/engine/render/dun_render.cpp +++ b/Source/engine/render/dun_render.cpp @@ -18,6 +18,7 @@ #include #include +#include "engine/render/blit_impl.hpp" #include "lighting.h" #include "options.h" #include "utils/attributes.h" @@ -152,55 +153,64 @@ enum class LightType : uint8_t { }; template -DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineOpaque(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t n, const uint8_t *DVL_RESTRICT tbl) +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineOpaque(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t n, const uint8_t *DVL_RESTRICT tbl); + +template <> +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineOpaque(uint8_t *DVL_RESTRICT dst, [[maybe_unused]] const uint8_t *DVL_RESTRICT src, uint_fast8_t n, [[maybe_unused]] const uint8_t *DVL_RESTRICT tbl) +{ + BlitFillDirect(dst, n, 0); +} + +template <> +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineOpaque(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t n, [[maybe_unused]] const uint8_t *DVL_RESTRICT tbl) { - if (Light == LightType::FullyDark) { - memset(dst, 0, n); - } else if (Light == LightType::FullyLit) { #ifndef DEBUG_RENDER_COLOR - memcpy(dst, src, n); + BlitPixelsDirect(dst, src, n); #else - memset(dst, DBGCOLOR, n); + BlitFillDirect(dst, n, DBGCOLOR); #endif - } else { // Partially lit +} + +template <> +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineOpaque(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t n, const uint8_t *DVL_RESTRICT tbl) +{ #ifndef DEBUG_RENDER_COLOR - while (n-- != 0) { - *dst++ = tbl[*src++]; - } + BlitPixelsWithMap(dst, src, n, tbl); #else - memset(dst, tbl[DBGCOLOR], n); + BlitFillDirect(dst, n, tbl[DBGCOLOR]); #endif - } } +#ifndef DEBUG_RENDER_COLOR +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineTransparent(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t n, const uint8_t *DVL_RESTRICT tbl); + +template <> +void RenderLineTransparent(uint8_t *DVL_RESTRICT dst, [[maybe_unused]] const uint8_t *DVL_RESTRICT src, uint_fast8_t n, [[maybe_unused]] const uint8_t *DVL_RESTRICT tbl) +{ + BlitFillBlended(dst, n, 0); +} + +template <> +void RenderLineTransparent(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t n, [[maybe_unused]] const uint8_t *DVL_RESTRICT tbl) +{ + BlitPixelsBlended(dst, src, n); +} + +template <> +void RenderLineTransparent(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t n, const uint8_t *DVL_RESTRICT tbl) +{ + BlitPixelsBlendedWithMap(dst, src, n, tbl); +} +#else // DEBUG_RENDER_COLOR template DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineTransparent(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t n, const uint8_t *DVL_RESTRICT tbl) { -#ifndef DEBUG_RENDER_COLOR - if (Light == LightType::FullyDark) { - while (n-- != 0) { - *dst = paletteTransparencyLookup[0][*dst]; - ++dst; - } - } else if (Light == LightType::FullyLit) { - while (n-- != 0) { - *dst = paletteTransparencyLookup[*dst][*src]; - ++dst; - ++src; - } - } else { // Partially lit - while (n-- != 0) { - *dst = paletteTransparencyLookup[*dst][tbl[*src]]; - ++dst; - ++src; - } - } -#else for (size_t i = 0; i < n; i++) { dst[i] = paletteTransparencyLookup[dst[i]][tbl[DBGCOLOR + 4]]; } -#endif } +#endif template DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineTransparentOrOpaque(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t width, const uint8_t *DVL_RESTRICT tbl)