From 44ed0296d881948abb7c3e5a2d48df56f39eb596 Mon Sep 17 00:00:00 2001 From: Gleb Mazovetskiy Date: Wed, 30 Nov 2022 22:37:39 +0000 Subject: [PATCH] dun_render: Overhaul mask handling We notice that masks can be described by 2 parameters: 1. Whether they have 0 or 1 as their high bits. 2. Whether they shift to the left or to the right on the next line. Describing masks this way allows us to lift them to template variables and simplify the code. We also avoid handling the mask in the `RenderLine` loop entirely. Also fixes a foliage rendering bug: Transparent foliage pixels were previously blended but they should have been simply skipped. --- Source/engine/render/dun_render.cpp | 1231 ++++++++++++++------------- Source/engine/render/dun_render.hpp | 29 +- Source/engine/render/scrollrt.cpp | 27 + Source/utils/attributes.h | 6 + 4 files changed, 696 insertions(+), 597 deletions(-) diff --git a/Source/engine/render/dun_render.cpp b/Source/engine/render/dun_render.cpp index a8bf57946..b32bcdede 100644 --- a/Source/engine/render/dun_render.cpp +++ b/Source/engine/render/dun_render.cpp @@ -3,6 +3,13 @@ * * Implementation of functionality for rendering the level tiles. */ + +// Debugging variables +// #define DEBUG_STR +// #define DEBUG_RENDER_COLOR +// #define DEBUG_RENDER_OFFSET_X 5 +// #define DEBUG_RENDER_OFFSET_Y 5 + #include "engine/render/dun_render.hpp" #include @@ -10,47 +17,66 @@ #include #include "lighting.h" +#include "utils/stdcompat/algorithm.hpp" #ifdef _DEBUG #include "miniwin/misc_msg.h" #endif #include "options.h" #include "utils/attributes.h" +#ifdef DEBUG_STR +#include "engine/render/text_render.hpp" +#endif +#if defined(DEBUG_STR) || defined(DUN_RENDER_STATS) +#include "utils/str_cat.hpp" +#endif namespace devilution { namespace { /** Width of a tile rendering primitive. */ -constexpr std::int_fast16_t Width = TILE_WIDTH / 2; +constexpr int_fast16_t Width = TILE_WIDTH / 2; /** Height of a tile rendering primitive (except triangles). */ -constexpr std::int_fast16_t Height = TILE_HEIGHT; +constexpr int_fast16_t Height = TILE_HEIGHT; /** Height of the lower triangle of a triangular or a trapezoid tile. */ -constexpr std::int_fast16_t LowerHeight = TILE_HEIGHT / 2; +constexpr int_fast16_t LowerHeight = TILE_HEIGHT / 2; /** Height of the upper triangle of a triangular tile. */ -constexpr std::int_fast16_t TriangleUpperHeight = TILE_HEIGHT / 2 - 1; +constexpr int_fast16_t TriangleUpperHeight = TILE_HEIGHT / 2 - 1; /** Height of the upper rectangle of a trapezoid tile. */ -constexpr std::int_fast16_t TrapezoidUpperHeight = TILE_HEIGHT / 2; +constexpr int_fast16_t TrapezoidUpperHeight = TILE_HEIGHT / 2; -constexpr std::int_fast16_t TriangleHeight = LowerHeight + TriangleUpperHeight; +constexpr int_fast16_t TriangleHeight = LowerHeight + TriangleUpperHeight; /** For triangles, for each pixel drawn vertically, this many pixels are drawn horizontally. */ -constexpr std::int_fast16_t XStep = 2; +constexpr int_fast16_t XStep = 2; -std::int_fast16_t GetTileHeight(TileType tile) +int_fast16_t GetTileHeight(TileType tile) { if (tile == TileType::LeftTriangle || tile == TileType::RightTriangle) return TriangleHeight; return Height; } -// Debugging variables -// #define DEBUG_RENDER_COLOR -// #define DEBUG_RENDER_OFFSET_X 5 -// #define DEBUG_RENDER_OFFSET_Y 5 +#ifdef DEBUG_STR +std::pair GetTileDebugStr(TileType tile) +{ + // clang-format off + switch (tile) { + case TileType::Square: return {"S", UiFlags::AlignCenter | UiFlags::VerticalCenter}; + case TileType::TransparentSquare: return {"T", UiFlags::AlignCenter | UiFlags::VerticalCenter}; + case TileType::LeftTriangle: return {"<", UiFlags::AlignRight | UiFlags::VerticalCenter}; + case TileType::RightTriangle: return {">", UiFlags::VerticalCenter}; + case TileType::LeftTrapezoid: return {"\\", UiFlags::AlignCenter}; + case TileType::RightTrapezoid: return {"/", UiFlags::AlignCenter}; + default: return {"", {}}; + } + // clang-format on +} +#endif #ifdef DEBUG_RENDER_COLOR int DBGCOLOR = 0; @@ -67,224 +93,67 @@ int GetTileDebugColor(TileType tile) case TileType::RightTrapezoid: return PAL16_BLUE + 5; default: return 0; } - // clang-format on } #endif // DEBUG_RENDER_COLOR -/** Fully transparent variant of WallMask. */ -const std::uint32_t WallMaskFullyTrasparent[TILE_HEIGHT] = { - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000 -}; -/** Transparent variant of RightMask. */ -const std::uint32_t RightMaskTransparent[TILE_HEIGHT] = { - 0xC0000000, - 0xF0000000, - 0xFC000000, - 0xFF000000, - 0xFFC00000, - 0xFFF00000, - 0xFFFC0000, - 0xFFFF0000, - 0xFFFFC000, - 0xFFFFF000, - 0xFFFFFC00, - 0xFFFFFF00, - 0xFFFFFFC0, - 0xFFFFFFF0, - 0xFFFFFFFC, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF -}; -/** Transparent variant of LeftMask. */ -const std::uint32_t LeftMaskTransparent[TILE_HEIGHT] = { - 0x00000003, - 0x0000000F, - 0x0000003F, - 0x000000FF, - 0x000003FF, - 0x00000FFF, - 0x00003FFF, - 0x0000FFFF, - 0x0003FFFF, - 0x000FFFFF, - 0x003FFFFF, - 0x00FFFFFF, - 0x03FFFFFF, - 0x0FFFFFFF, - 0x3FFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF -}; -/** Fully opaque mask */ -const std::uint32_t SolidMask[TILE_HEIGHT] = { - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF -}; -/** Used to mask out the left half of the tile diamond and only render additional content */ -const std::uint32_t RightFoliageMask[TILE_HEIGHT] = { - 0xFFFFFFFF, - 0x3FFFFFFF, - 0x0FFFFFFF, - 0x03FFFFFF, - 0x00FFFFFF, - 0x003FFFFF, - 0x000FFFFF, - 0x0003FFFF, - 0x0000FFFF, - 0x00003FFF, - 0x00000FFF, - 0x000003FF, - 0x000000FF, - 0x0000003F, - 0x0000000F, - 0x00000003, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, -}; -/** Used to mask out the left half of the tile diamond and only render additional content */ -const std::uint32_t LeftFoliageMask[TILE_HEIGHT] = { - 0xFFFFFFFF, - 0xFFFFFFFC, - 0xFFFFFFF0, - 0xFFFFFFC0, - 0xFFFFFF00, - 0xFFFFFC00, - 0xFFFFF000, - 0xFFFFC000, - 0xFFFF0000, - 0xFFFC0000, - 0xFFF00000, - 0xFFC00000, - 0xFF000000, - 0xFC000000, - 0xF0000000, - 0xC0000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, -}; +// Masks are defined by 2 template variables: +// +// 1. `OpaquePrefix`: Whether the line starts with opaque pixels +// followed by blended pixels or the other way around. +// 2. `PrefixIncrement`: The change to the prefix when going +// up 1 line. +// +// The Left mask can only be applied to LeftTrapezoid and TransparentSquare. +// The Right mask can only be applied to RightTrapezoid and TransparentSquare. +// The Left/RightFoliage masks can only be applied to TransparentSquare. + +// True if the given OpaquePrefix and PrefixIncrement represent foliage. +// For foliage, we skip transparent pixels instead of blending them. +template +constexpr bool IsFoliage = PrefixIncrement != 0 && (OpaquePrefix == (PrefixIncrement > 0)); + +// True for foliage: +template +constexpr bool SkipTransparentPixels = IsFoliage; + +// True if the entire lower half of the mask is transparent. +// True for Transparent, LeftFoliage, and RightFoliage. +template +constexpr bool LowerHalfTransparent = (OpaquePrefix == (PrefixIncrement >= 0)); + +// The initial value for the prefix: +template +DVL_ALWAYS_INLINE int8_t InitPrefix() +{ + return PrefixIncrement >= 0 ? -32 : 64; +} + +// The initial value for the prefix at y-th line (counting from the bottom). +template +DVL_ALWAYS_INLINE int8_t InitPrefix(int8_t y) +{ + return InitPrefix() + PrefixIncrement * y; +} -enum class TransparencyType : uint8_t { +#ifdef DEBUG_STR +template +std::string prefixDebugString(int8_t prefix) { + std::string out(32, OpaquePrefix ? '0' : '1'); + const uint8_t clamped = clamp(prefix, 0, 32); + out.replace(0, clamped, clamped, OpaquePrefix ? '1' : '0'); + StrAppend(out, " prefix=", prefix, " OpaquePrefix=", OpaquePrefix, " PrefixIncrement=", PrefixIncrement); + return out; +} +#endif + +enum class MaskType { + Invalid, Solid, - Blended, + Transparent, + Right, + Left, + RightFoliage, + LeftFoliage, }; enum class LightType : uint8_t { @@ -294,7 +163,7 @@ enum class LightType : uint8_t { }; template -DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineOpaque(std::uint8_t *dst, const std::uint8_t *src, std::uint_fast8_t n, const std::uint8_t *tbl) +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineOpaque(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t n, const uint8_t *DVL_RESTRICT tbl) { if (Light == LightType::FullyDark) { memset(dst, 0, n); @@ -306,8 +175,8 @@ DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineOpaque(std::uint8_t *dst, con #endif } else { // Partially lit #ifndef DEBUG_RENDER_COLOR - for (size_t i = 0; i < n; i++) { - dst[i] = tbl[src[i]]; + while (n-- != 0) { + *dst++ = tbl[*src++]; } #else memset(dst, tbl[DBGCOLOR], n); @@ -316,72 +185,78 @@ DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineOpaque(std::uint8_t *dst, con } template -DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineBlended(std::uint8_t *dst, const std::uint8_t *src, std::uint_fast8_t n, const std::uint8_t *tbl, std::uint32_t mask) +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineTransparent(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t n, const uint8_t *DVL_RESTRICT tbl) { #ifndef DEBUG_RENDER_COLOR if (Light == LightType::FullyDark) { - for (size_t i = 0; i < n; i++, mask <<= 1) { - if ((mask & 0x80000000) != 0) - dst[i] = 0; - else - dst[i] = paletteTransparencyLookup[0][dst[i]]; + while (n-- != 0) { + *dst = paletteTransparencyLookup[0][*dst]; + ++dst; } } else if (Light == LightType::FullyLit) { - for (size_t i = 0; i < n; i++, mask <<= 1) { - if ((mask & 0x80000000) != 0) - dst[i] = src[i]; - else - dst[i] = paletteTransparencyLookup[dst[i]][src[i]]; + while (n-- != 0) { + *dst = paletteTransparencyLookup[*dst][*src]; + ++dst; + ++src; } } else { // Partially lit - for (size_t i = 0; i < n; i++, mask <<= 1) { - if ((mask & 0x80000000) != 0) - dst[i] = tbl[src[i]]; - else - dst[i] = paletteTransparencyLookup[dst[i]][tbl[src[i]]]; + while (n-- != 0) { + *dst = paletteTransparencyLookup[*dst][tbl[*src]]; + ++dst; + ++src; } } #else - for (size_t i = 0; i < n; i++, mask <<= 1) { - if ((mask & 0x80000000) != 0) - dst[i] = tbl[DBGCOLOR]; - else - dst[i] = paletteTransparencyLookup[dst[i]][tbl[DBGCOLOR]]; + for (size_t i = 0; i < n; i++) { + dst[i] = paletteTransparencyLookup[dst[i]][tbl[DBGCOLOR + 4]]; } #endif } -template -DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLine(std::uint8_t *dst, const std::uint8_t *src, std::uint_fast8_t n, const std::uint8_t *tbl, std::uint32_t mask) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineTransparentOrOpaque(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t width, const uint8_t *DVL_RESTRICT tbl) { - if (Transparency == TransparencyType::Solid) { - RenderLineOpaque(dst, src, n, tbl); + if (Transparent) { + RenderLineTransparent(dst, src, width, tbl); } else { - // The number of iterations is limited by the size of the mask. - // So we can limit it by ANDing the mask with another mask that only keeps - // iterations that are lower than n. We can now avoid testing if i < n - // at every loop iteration. - assert(n != 0 && n <= sizeof(std::uint32_t) * CHAR_BIT); - const std::uint32_t firstNOnes = std::uint32_t(-1) << ((sizeof(std::uint32_t) * CHAR_BIT) - n); - mask &= firstNOnes; - if (mask == firstNOnes) { - RenderLineOpaque(dst, src, n, tbl); - } else if (Transparency == TransparencyType::Blended) { - RenderLineBlended(dst, src, n, tbl, mask); - } + RenderLineOpaque(dst, src, width, tbl); + } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineTransparentAndOpaque(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t prefixWidth, uint_fast8_t width, const uint8_t *DVL_RESTRICT tbl) +{ + if (OpaquePrefix) { + RenderLineOpaque(dst, src, prefixWidth, tbl); + if (!SkipTransparentPixels) + RenderLineTransparent(dst + prefixWidth, src + prefixWidth, width - prefixWidth, tbl); + } else { + if (!SkipTransparentPixels) + RenderLineTransparent(dst, src, prefixWidth, tbl); + RenderLineOpaque(dst + prefixWidth, src + prefixWidth, width - prefixWidth, tbl); + } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLine(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t n, const uint8_t *DVL_RESTRICT tbl, int8_t prefix) +{ + if (PrefixIncrement == 0) { + RenderLineTransparentOrOpaque(dst, src, n, tbl); + } else { + RenderLineTransparentAndOpaque(dst, src, clamp(prefix, 0, n), n, tbl); } } struct Clip { - std::int_fast16_t top; - std::int_fast16_t bottom; - std::int_fast16_t left; - std::int_fast16_t right; - std::int_fast16_t width; - std::int_fast16_t height; + int_fast16_t top; + int_fast16_t bottom; + int_fast16_t left; + int_fast16_t right; + int_fast16_t width; + int_fast16_t height; }; -Clip CalculateClip(std::int_fast16_t x, std::int_fast16_t y, std::int_fast16_t w, std::int_fast16_t h, const Surface &out) +DVL_ALWAYS_INLINE Clip CalculateClip(int_fast16_t x, int_fast16_t y, int_fast16_t w, int_fast16_t h, const Surface &out) { Clip clip; clip.top = y + 1 < h ? h - (y + 1) : 0; @@ -393,64 +268,90 @@ Clip CalculateClip(std::int_fast16_t x, std::int_fast16_t y, std::int_fast16_t w return clip; } -template -DVL_ATTRIBUTE_HOT void RenderSquareFull(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl) -{ - for (auto i = 0; i < Height; ++i, dst -= dstPitch, --mask) { - RenderLine(dst, src, Width, tbl, *mask); +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderSquareLowerHalf(uint8_t *DVL_RESTRICT &dst, int dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl) { + for (auto i = 0; i < LowerHeight; ++i, dst -= dstPitch) { + RenderLineTransparentOrOpaque(dst, src, Width, tbl); src += Width; } } -template -DVL_ATTRIBUTE_HOT void RenderSquareClipped(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderSquareUpperHalf(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl) { + uint_fast8_t prefixWidth = PrefixIncrement < 0 ? 32 : 0; + for (auto i = 0; i < TrapezoidUpperHeight; ++i, dst -= dstPitch) { + RenderLineTransparentAndOpaque(dst, src, prefixWidth, Width, tbl); + if (PrefixIncrement != 0) + prefixWidth += PrefixIncrement; + src += Width; + } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderSquareFull(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl) { + if (PrefixIncrement == 0) { + // Fast path for MaskType::Solid and MaskType::Transparent + for (auto i = 0; i < Height; ++i, dst -= dstPitch) { + RenderLineTransparentOrOpaque(dst, src, Width, tbl); + src += Width; + } + } else { + RenderSquareLowerHalf>(dst, dstPitch, src, tbl); + RenderSquareUpperHalf(dst, dstPitch, src, tbl); + } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderSquareClipped(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) +{ + int8_t prefix = InitPrefix(clip.bottom); src += clip.bottom * Height + clip.left; - for (auto i = 0; i < clip.height; ++i, dst -= dstPitch, --mask) { - RenderLine(dst, src, clip.width, tbl, (*mask) << clip.left); + for (auto i = 0; i < clip.height; ++i, dst -= dstPitch) { + RenderLine(dst, src, clip.width, tbl, prefix - (clip.left)); src += Width; + prefix += PrefixIncrement; } } -template -DVL_ATTRIBUTE_HOT void RenderSquare(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderSquare(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { if (clip.width == Width && clip.height == Height) { - RenderSquareFull(dst, dstPitch, src, mask, tbl); + RenderSquareFull(dst, dstPitch, src, tbl); } else { - RenderSquareClipped(dst, dstPitch, src, mask, tbl, clip); + RenderSquareClipped(dst, dstPitch, src, tbl, clip); } } -template -DVL_ATTRIBUTE_HOT void RenderTransparentSquareFull(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTransparentSquareFull(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl) { - for (auto i = 0; i < Height; ++i, dst -= dstPitch + Width, --mask) { - constexpr unsigned MaxMaskShift = 32; - std::uint_fast8_t drawWidth = Width; - std::uint32_t m = *mask; + int8_t prefix = InitPrefix(); + for (auto i = 0; i < Height; ++i, dst -= dstPitch + Width) { + uint_fast8_t drawWidth = Width; while (drawWidth > 0) { - auto v = static_cast(*src++); + auto v = static_cast(*src++); if (v > 0) { - RenderLine(dst, src, v, tbl, m); + RenderLine(dst, src, v, tbl, prefix - (Width - drawWidth)); src += v; } else { v = -v; } dst += v; drawWidth -= v; - m = (v == MaxMaskShift) ? 0 : (m << v); } + prefix += PrefixIncrement; } } -template +template // NOLINTNEXTLINE(readability-function-cognitive-complexity): Actually complex and has to be fast. -DVL_ATTRIBUTE_HOT void RenderTransparentSquareClipped(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTransparentSquareClipped(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { - const auto skipRestOfTheLine = [&src](std::int_fast16_t remainingWidth) { + const auto skipRestOfTheLine = [&src](int_fast16_t remainingWidth) { while (remainingWidth > 0) { - const auto v = static_cast(*src++); + const auto v = static_cast(*src++); if (v > 0) { src += v; remainingWidth -= v; @@ -466,20 +367,19 @@ DVL_ATTRIBUTE_HOT void RenderTransparentSquareClipped(std::uint8_t *dst, int dst skipRestOfTheLine(Width); } - for (auto i = 0; i < clip.height; ++i, dst -= dstPitch + clip.width, --mask) { - constexpr unsigned MaxMaskShift = 32; + int8_t prefix = InitPrefix(clip.bottom); + for (auto i = 0; i < clip.height; ++i, dst -= dstPitch + clip.width) { auto drawWidth = clip.width; - std::uint32_t m = *mask; // Skip initial src if clipping on the left. // Handles overshoot, i.e. when the RLE segment goes into the unclipped area. auto remainingLeftClip = clip.left; while (remainingLeftClip > 0) { - auto v = static_cast(*src++); + auto v = static_cast(*src++); if (v > 0) { if (v > remainingLeftClip) { const auto overshoot = v - remainingLeftClip; - RenderLine(dst, src + remainingLeftClip, overshoot, tbl, m); + RenderLine(dst, src + remainingLeftClip, overshoot, tbl, prefix - (Width - remainingLeftClip)); dst += overshoot; drawWidth -= overshoot; } @@ -493,21 +393,20 @@ DVL_ATTRIBUTE_HOT void RenderTransparentSquareClipped(std::uint8_t *dst, int dst } } remainingLeftClip -= v; - m = (v == MaxMaskShift) ? 0 : (m << v); } // Draw the non-clipped segment while (drawWidth > 0) { - auto v = static_cast(*src++); + auto v = static_cast(*src++); if (v > 0) { if (v > drawWidth) { - RenderLine(dst, src, drawWidth, tbl, m); + RenderLine(dst, src, drawWidth, tbl, prefix - (Width - drawWidth)); src += v; dst += drawWidth; drawWidth -= v; break; } - RenderLine(dst, src, v, tbl, m); + RenderLine(dst, src, v, tbl, prefix - (Width - drawWidth)); src += v; } else { v = -v; @@ -519,35 +418,35 @@ DVL_ATTRIBUTE_HOT void RenderTransparentSquareClipped(std::uint8_t *dst, int dst } dst += v; drawWidth -= v; - m = (v == MaxMaskShift) ? 0 : (m << v); } // Skip the rest of src line if clipping on the right assert(drawWidth <= 0); skipRestOfTheLine(clip.right + drawWidth); + prefix += PrefixIncrement; } } -template -DVL_ATTRIBUTE_HOT void RenderTransparentSquare(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTransparentSquare(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { if (clip.width == Width && clip.height == Height) { - RenderTransparentSquareFull(dst, dstPitch, src, mask, tbl); + RenderTransparentSquareFull(dst, dstPitch, src, tbl); } else { - RenderTransparentSquareClipped(dst, dstPitch, src, mask, tbl, clip); + RenderTransparentSquareClipped(dst, dstPitch, src, tbl, clip); } } /** Vertical clip for the lower and upper triangles of a diamond tile (L/RTRIANGLE).*/ struct DiamondClipY { - std::int_fast16_t lowerBottom; - std::int_fast16_t lowerTop; - std::int_fast16_t upperBottom; - std::int_fast16_t upperTop; + int_fast16_t lowerBottom; + int_fast16_t lowerTop; + int_fast16_t upperBottom; + int_fast16_t upperTop; }; -template -DiamondClipY CalculateDiamondClipY(const Clip &clip) +template +DVL_ALWAYS_INLINE DiamondClipY CalculateDiamondClipY(const Clip &clip) { DiamondClipY result; if (clip.bottom > LowerHeight) { @@ -566,492 +465,586 @@ DiamondClipY CalculateDiamondClipY(const Clip &clip) return result; } -std::size_t CalculateTriangleSourceSkipLowerBottom(std::int_fast16_t numLines) +DVL_ALWAYS_INLINE std::size_t CalculateTriangleSourceSkipLowerBottom(int_fast16_t numLines) { return XStep * numLines * (numLines + 1) / 2 + 2 * ((numLines + 1) / 2); } -std::size_t CalculateTriangleSourceSkipUpperBottom(std::int_fast16_t numLines) +DVL_ALWAYS_INLINE std::size_t CalculateTriangleSourceSkipUpperBottom(int_fast16_t numLines) { return 2 * TriangleUpperHeight * numLines - numLines * (numLines - 1) + 2 * ((numLines + 1) / 2); } -template -DVL_ATTRIBUTE_HOT void RenderLeftTriangleFull(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleLower(uint8_t *DVL_RESTRICT &dst, int dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl) { dst += XStep * (LowerHeight - 1); - for (auto i = 1; i <= LowerHeight; ++i, dst -= dstPitch + XStep, --mask) { + for (auto i = 1; i <= LowerHeight; ++i, dst -= dstPitch + XStep) { src += 2 * (i % 2); const auto width = XStep * i; - RenderLine(dst, src, width, tbl, *mask); - src += width; - } - dst += 2 * XStep; - for (auto i = 1; i <= TriangleUpperHeight; ++i, dst -= dstPitch - XStep, --mask) { - src += 2 * (i % 2); - const auto width = Width - XStep * i; - RenderLine(dst, src, width, tbl, *mask); + RenderLineTransparentOrOpaque(dst, src, width, tbl); src += width; } } -template -DVL_ATTRIBUTE_HOT void RenderLeftTriangleClipVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleLowerClipVertical(int8_t &prefix, const DiamondClipY &clipY, uint8_t *DVL_RESTRICT &dst, int dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl) { - const auto clipY = CalculateDiamondClipY(clip); src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom); dst += XStep * (LowerHeight - clipY.lowerBottom - 1); const auto lowerMax = LowerHeight - clipY.lowerTop; - for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch + XStep, --mask) { + for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch + XStep) { src += 2 * (i % 2); const auto width = XStep * i; - RenderLine(dst, src, width, tbl, *mask); - src += width; - } - src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom); - dst += 2 * XStep + XStep * clipY.upperBottom; - const auto upperMax = TriangleUpperHeight - clipY.upperTop; - for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch - XStep, --mask) { - src += 2 * (i % 2); - const auto width = Width - XStep * i; - RenderLine(dst, src, width, tbl, *mask); + RenderLine(dst, src, width, tbl, prefix); src += width; + prefix += PrefixIncrement; } } -template -DVL_ATTRIBUTE_HOT void RenderLeftTriangleClipLeftAndVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleLowerClipLeftAndVertical(int_fast16_t clipLeft, int8_t &prefix, const DiamondClipY &clipY, uint8_t *DVL_RESTRICT &dst, int dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl) { - const auto clipY = CalculateDiamondClipY(clip); - const auto clipLeft = clip.left; src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom); dst += XStep * (LowerHeight - clipY.lowerBottom - 1) - clipLeft; const auto lowerMax = LowerHeight - clipY.lowerTop; - for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch + XStep, --mask) { + for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch + XStep) { src += 2 * (i % 2); const auto width = XStep * i; const auto startX = Width - XStep * i; const auto skip = startX < clipLeft ? clipLeft - startX : 0; if (width > skip) - RenderLine(dst + skip, src + skip, width - skip, tbl, (*mask) << skip); + RenderLine(dst + skip, src + skip, width - skip, tbl, prefix - (skip)); src += width; + prefix += PrefixIncrement; } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleLowerClipRightAndVertical(int_fast16_t clipRight, int8_t &prefix, const DiamondClipY &clipY, uint8_t *DVL_RESTRICT &dst, int dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl) +{ + src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom); + dst += XStep * (LowerHeight - clipY.lowerBottom - 1); + const auto lowerMax = LowerHeight - clipY.lowerTop; + for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch + XStep) { + src += 2 * (i % 2); + const auto width = XStep * i; + if (width > clipRight) + RenderLine(dst, src, width - clipRight, tbl, prefix); + src += width; + prefix += PrefixIncrement; + } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleFull(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl) +{ + RenderLeftTriangleLower>(dst, dstPitch, src, tbl); + int8_t prefix = InitPrefix(LowerHeight); + dst += 2 * XStep; + for (auto i = 1; i <= TriangleUpperHeight; ++i, dst -= dstPitch - XStep) { + src += 2 * (i % 2); + const auto width = Width - XStep * i; + RenderLine(dst, src, width, tbl, prefix); + src += width; + prefix += PrefixIncrement; + } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleClipVertical(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) +{ + int8_t prefix = InitPrefix(clip.bottom); + const DiamondClipY clipY = CalculateDiamondClipY(clip); + RenderLeftTriangleLowerClipVertical(prefix, clipY, dst, dstPitch, src, tbl); src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom); dst += 2 * XStep + XStep * clipY.upperBottom; const auto upperMax = TriangleUpperHeight - clipY.upperTop; - for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch - XStep, --mask) { + for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch - XStep) { src += 2 * (i % 2); const auto width = Width - XStep * i; - const auto startX = XStep * i; - const auto skip = startX < clipLeft ? clipLeft - startX : 0; - if (width > skip) - RenderLine(dst + skip, src + skip, width - skip, tbl, (*mask) << skip); + RenderLine(dst, src, width, tbl, prefix); src += width; + prefix += PrefixIncrement; } } -template -DVL_ATTRIBUTE_HOT void RenderLeftTriangleClipRightAndVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleClipLeftAndVertical(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { - const auto clipY = CalculateDiamondClipY(clip); - const auto clipRight = clip.right; - src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom); - dst += XStep * (LowerHeight - clipY.lowerBottom - 1); - const auto lowerMax = LowerHeight - clipY.lowerTop; - for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch + XStep, --mask) { + int8_t prefix = InitPrefix(clip.bottom); + const DiamondClipY clipY = CalculateDiamondClipY(clip); + const int_fast16_t clipLeft = clip.left; + RenderLeftTriangleLowerClipLeftAndVertical(clipLeft, prefix, clipY, dst, dstPitch, src, tbl); + src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom); + dst += 2 * XStep + XStep * clipY.upperBottom; + const auto upperMax = TriangleUpperHeight - clipY.upperTop; + for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch - XStep) { src += 2 * (i % 2); - const auto width = XStep * i; - if (width > clipRight) - RenderLine(dst, src, width - clipRight, tbl, *mask); + const auto width = Width - XStep * i; + const auto startX = XStep * i; + const auto skip = startX < clipLeft ? clipLeft - startX : 0; + if (width > skip) + RenderLine(dst + skip, src + skip, width - skip, tbl, prefix - (skip)); src += width; + prefix += PrefixIncrement; } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleClipRightAndVertical(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) +{ + int8_t prefix = InitPrefix(clip.bottom); + const DiamondClipY clipY = CalculateDiamondClipY(clip); + const int_fast16_t clipRight = clip.right; + RenderLeftTriangleLowerClipRightAndVertical(clipRight, prefix, clipY, dst, dstPitch, src, tbl); src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom); dst += 2 * XStep + XStep * clipY.upperBottom; const auto upperMax = TriangleUpperHeight - clipY.upperTop; - for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch - XStep, --mask) { + for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch - XStep) { src += 2 * (i % 2); const auto width = Width - XStep * i; if (width <= clipRight) break; - RenderLine(dst, src, width - clipRight, tbl, *mask); + RenderLine(dst, src, width - clipRight, tbl, prefix); src += width; + prefix += PrefixIncrement; } } -template -DVL_ATTRIBUTE_HOT void RenderLeftTriangle(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangle(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { if (clip.width == Width) { if (clip.height == TriangleHeight) { - RenderLeftTriangleFull(dst, dstPitch, src, mask, tbl); + RenderLeftTriangleFull(dst, dstPitch, src, tbl); } else { - RenderLeftTriangleClipVertical(dst, dstPitch, src, mask, tbl, clip); + RenderLeftTriangleClipVertical(dst, dstPitch, src, tbl, clip); } } else if (clip.right == 0) { - RenderLeftTriangleClipLeftAndVertical(dst, dstPitch, src, mask, tbl, clip); + RenderLeftTriangleClipLeftAndVertical(dst, dstPitch, src, tbl, clip); } else { - RenderLeftTriangleClipRightAndVertical(dst, dstPitch, src, mask, tbl, clip); + RenderLeftTriangleClipRightAndVertical(dst, dstPitch, src, tbl, clip); } } -template -DVL_ATTRIBUTE_HOT void RenderRightTriangleFull(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleLower(uint8_t *DVL_RESTRICT &dst, int dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl) { - for (auto i = 1; i <= LowerHeight; ++i, dst -= dstPitch, --mask) { + for (auto i = 1; i <= LowerHeight; ++i, dst -= dstPitch) { const auto width = XStep * i; - RenderLine(dst, src, width, tbl, *mask); - src += width + 2 * (i % 2); - } - for (auto i = 1; i <= TriangleUpperHeight; ++i, dst -= dstPitch, --mask) { - const auto width = Width - XStep * i; - RenderLine(dst, src, width, tbl, *mask); + RenderLineTransparentOrOpaque(dst, src, width, tbl); src += width + 2 * (i % 2); } } -template -DVL_ATTRIBUTE_HOT void RenderRightTriangleClipVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleLowerClipVertical(int8_t &prefix, const DiamondClipY &clipY, uint8_t *DVL_RESTRICT &dst, int dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl) { - const auto clipY = CalculateDiamondClipY(clip); src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom); const auto lowerMax = LowerHeight - clipY.lowerTop; - for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch, --mask) { + for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch) { const auto width = XStep * i; - RenderLine(dst, src, width, tbl, *mask); + RenderLine(dst, src, width, tbl, prefix); src += width + 2 * (i % 2); + prefix += PrefixIncrement; } - src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom); - const auto upperMax = TriangleUpperHeight - clipY.upperTop; - for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch, --mask) { - const auto width = Width - XStep * i; - RenderLine(dst, src, width, tbl, *mask); +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleLowerClipLeftAndVertical(int_fast16_t clipLeft, int8_t &prefix, const DiamondClipY &clipY, uint8_t *DVL_RESTRICT &dst, int dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl) +{ + src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom); + const auto lowerMax = LowerHeight - clipY.lowerTop; + for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch) { + const auto width = XStep * i; + if (width > clipLeft) + RenderLine(dst, src + clipLeft, width - clipLeft, tbl, prefix - clipLeft); src += width + 2 * (i % 2); + prefix += PrefixIncrement; } } -template -DVL_ATTRIBUTE_HOT void RenderRightTriangleClipLeftAndVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleLowerClipRightAndVertical(int_fast16_t clipRight, int8_t &prefix, const DiamondClipY &clipY, uint8_t *DVL_RESTRICT &dst, int dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl) { - const auto clipY = CalculateDiamondClipY(clip); - const auto clipLeft = clip.left; src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom); const auto lowerMax = LowerHeight - clipY.lowerTop; - for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch, --mask) { + for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch) { const auto width = XStep * i; - if (width > clipLeft) - RenderLine(dst, src + clipLeft, width - clipLeft, tbl, (*mask) << clipLeft); + const auto skip = Width - width < clipRight ? clipRight - (Width - width) : 0; + if (width > skip) + RenderLine(dst, src, width - skip, tbl, prefix); + src += width + 2 * (i % 2); + prefix += PrefixIncrement; + } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleFull(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl) +{ + RenderRightTriangleLower>(dst, dstPitch, src, tbl); + int8_t prefix = InitPrefix(LowerHeight); + for (auto i = 1; i <= TriangleUpperHeight; ++i, dst -= dstPitch) { + const auto width = Width - XStep * i; + RenderLine(dst, src, width, tbl, prefix); + src += width + 2 * (i % 2); + prefix += PrefixIncrement; + } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleClipVertical(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) +{ + int8_t prefix = InitPrefix(clip.bottom); + const DiamondClipY clipY = CalculateDiamondClipY(clip); + RenderRightTriangleLowerClipVertical(prefix, clipY, dst, dstPitch, src, tbl); + src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom); + const auto upperMax = TriangleUpperHeight - clipY.upperTop; + for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch) { + const auto width = Width - XStep * i; + RenderLine(dst, src, width, tbl, prefix); src += width + 2 * (i % 2); + prefix += PrefixIncrement; } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleClipLeftAndVertical(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) +{ + int8_t prefix = InitPrefix(clip.bottom); + const DiamondClipY clipY = CalculateDiamondClipY(clip); + const int_fast16_t clipLeft = clip.left; + RenderRightTriangleLowerClipLeftAndVertical(clipLeft, prefix, clipY, dst, dstPitch, src, tbl); src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom); const auto upperMax = TriangleUpperHeight - clipY.upperTop; - for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch, --mask) { + for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch) { const auto width = Width - XStep * i; if (width <= clipLeft) break; - RenderLine(dst, src + clipLeft, width - clipLeft, tbl, (*mask) << clipLeft); + RenderLine(dst, src + clipLeft, width - clipLeft, tbl, prefix - clipLeft); src += width + 2 * (i % 2); + prefix += PrefixIncrement; } } -template -DVL_ATTRIBUTE_HOT void RenderRightTriangleClipRightAndVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleClipRightAndVertical(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { - const auto clipY = CalculateDiamondClipY(clip); - const auto clipRight = clip.right; - src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom); - const auto lowerMax = LowerHeight - clipY.lowerTop; - for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch, --mask) { - const auto width = XStep * i; - const auto skip = Width - width < clipRight ? clipRight - (Width - width) : 0; - if (width > skip) - RenderLine(dst, src, width - skip, tbl, *mask); - src += width + 2 * (i % 2); - } + int8_t prefix = InitPrefix(clip.bottom); + const DiamondClipY clipY = CalculateDiamondClipY(clip); + const int_fast16_t clipRight = clip.right; + RenderRightTriangleLowerClipRightAndVertical(clipRight, prefix, clipY, dst, dstPitch, src, tbl); src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom); const auto upperMax = TriangleUpperHeight - clipY.upperTop; - for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch, --mask) { + for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch) { const auto width = Width - XStep * i; const auto skip = Width - width < clipRight ? clipRight - (Width - width) : 0; if (width > skip) - RenderLine(dst, src, width - skip, tbl, *mask); + RenderLine(dst, src, width - skip, tbl, prefix); src += width + 2 * (i % 2); + prefix += PrefixIncrement; } } -template -DVL_ATTRIBUTE_HOT void RenderRightTriangle(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangle(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { if (clip.width == Width) { if (clip.height == TriangleHeight) { - RenderRightTriangleFull(dst, dstPitch, src, mask, tbl); + RenderRightTriangleFull(dst, dstPitch, src, tbl); } else { - RenderRightTriangleClipVertical(dst, dstPitch, src, mask, tbl, clip); + RenderRightTriangleClipVertical(dst, dstPitch, src, tbl, clip); } } else if (clip.right == 0) { - RenderRightTriangleClipLeftAndVertical(dst, dstPitch, src, mask, tbl, clip); + RenderRightTriangleClipLeftAndVertical(dst, dstPitch, src, tbl, clip); } else { - RenderRightTriangleClipRightAndVertical(dst, dstPitch, src, mask, tbl, clip); + RenderRightTriangleClipRightAndVertical(dst, dstPitch, src, tbl, clip); } } -template -DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidFull(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidFull(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl) { - dst += XStep * (LowerHeight - 1); - for (auto i = 1; i <= LowerHeight; ++i, dst -= dstPitch + XStep, --mask) { - src += 2 * (i % 2); - const auto width = XStep * i; - RenderLine(dst, src, width, tbl, *mask); - src += width; - } + RenderLeftTriangleLower>(dst, dstPitch, src, tbl); dst += XStep; - for (auto i = 1; i <= TrapezoidUpperHeight; ++i, dst -= dstPitch, --mask) { - RenderLine(dst, src, Width, tbl, *mask); - src += Width; - } + RenderSquareUpperHalf(dst, dstPitch, src, tbl); } -template -DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidClipVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidClipVertical(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { - const auto clipY = CalculateDiamondClipY(clip); - src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom); - dst += XStep * (LowerHeight - clipY.lowerBottom - 1); - const auto lowerMax = LowerHeight - clipY.lowerTop; - for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch + XStep, --mask) { - src += 2 * (i % 2); - const auto width = XStep * i; - RenderLine(dst, src, width, tbl, *mask); - src += width; - } + int8_t prefix = InitPrefix(clip.bottom); + const DiamondClipY clipY = CalculateDiamondClipY(clip); + RenderLeftTriangleLowerClipVertical(prefix, clipY, dst, dstPitch, src, tbl); src += clipY.upperBottom * Width; dst += XStep; const auto upperMax = TrapezoidUpperHeight - clipY.upperTop; - for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch, --mask) { - RenderLine(dst, src, Width, tbl, *mask); + for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch) { + RenderLine(dst, src, Width, tbl, prefix); src += Width; + prefix += PrefixIncrement; } } -template -DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidClipLeftAndVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidClipLeftAndVertical(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { - const auto clipY = CalculateDiamondClipY(clip); - const auto clipLeft = clip.left; - src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom); - dst += XStep * (LowerHeight - clipY.lowerBottom - 1) - clipLeft; - const auto lowerMax = LowerHeight - clipY.lowerTop; - for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch + XStep, --mask) { - src += 2 * (i % 2); - const auto width = XStep * i; - const auto startX = Width - XStep * i; - const auto skip = startX < clipLeft ? clipLeft - startX : 0; - if (width > skip) - RenderLine(dst + skip, src + skip, width - skip, tbl, (*mask) << skip); - src += width; - } + int8_t prefix = InitPrefix(clip.bottom); + const DiamondClipY clipY = CalculateDiamondClipY(clip); + const int_fast16_t clipLeft = clip.left; + RenderLeftTriangleLowerClipLeftAndVertical(clipLeft, prefix, clipY, dst, dstPitch, src, tbl); src += clipY.upperBottom * Width + clipLeft; dst += XStep + clipLeft; const auto upperMax = TrapezoidUpperHeight - clipY.upperTop; - for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch, --mask) { - RenderLine(dst, src, clip.width, tbl, (*mask) << clipLeft); + for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch) { + RenderLine(dst, src, clip.width, tbl, prefix - clipLeft); src += Width; + prefix += PrefixIncrement; } } -template -DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidClipRightAndVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidClipRightAndVertical(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { - const auto clipY = CalculateDiamondClipY(clip); - const auto clipRight = clip.right; - src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom); - dst += XStep * (LowerHeight - clipY.lowerBottom - 1); - const auto lowerMax = LowerHeight - clipY.lowerTop; - for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch + XStep, --mask) { - src += 2 * (i % 2); - const auto width = XStep * i; - if (width > clipRight) - RenderLine(dst, src, width - clipRight, tbl, *mask); - src += width; - } + int8_t prefix = InitPrefix(clip.bottom); + const DiamondClipY clipY = CalculateDiamondClipY(clip); + const int_fast16_t clipRight = clip.right; + RenderLeftTriangleLowerClipRightAndVertical(clipRight, prefix, clipY, dst, dstPitch, src, tbl); src += clipY.upperBottom * Width; dst += XStep; const auto upperMax = TrapezoidUpperHeight - clipY.upperTop; - for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch, --mask) { - RenderLine(dst, src, clip.width, tbl, *mask); + for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch) { + RenderLine(dst, src, clip.width, tbl, prefix); src += Width; + prefix += PrefixIncrement; } } -template -DVL_ATTRIBUTE_HOT void RenderLeftTrapezoid(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTrapezoid(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { if (clip.width == Width) { if (clip.height == Height) { - RenderLeftTrapezoidFull(dst, dstPitch, src, mask, tbl); + RenderLeftTrapezoidFull(dst, dstPitch, src, tbl); } else { - RenderLeftTrapezoidClipVertical(dst, dstPitch, src, mask, tbl, clip); + RenderLeftTrapezoidClipVertical(dst, dstPitch, src, tbl, clip); } } else if (clip.right == 0) { - RenderLeftTrapezoidClipLeftAndVertical(dst, dstPitch, src, mask, tbl, clip); + RenderLeftTrapezoidClipLeftAndVertical(dst, dstPitch, src, tbl, clip); } else { - RenderLeftTrapezoidClipRightAndVertical(dst, dstPitch, src, mask, tbl, clip); + RenderLeftTrapezoidClipRightAndVertical(dst, dstPitch, src, tbl, clip); } } -template -DVL_ATTRIBUTE_HOT void RenderRightTrapezoidFull(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTrapezoidFull(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl) { - for (auto i = 1; i <= LowerHeight; ++i, dst -= dstPitch, --mask) { - const auto width = XStep * i; - RenderLine(dst, src, width, tbl, *mask); - src += width + 2 * (i % 2); - } - for (auto i = 1; i <= TrapezoidUpperHeight; ++i, dst -= dstPitch, --mask) { - RenderLine(dst, src, Width, tbl, *mask); - src += Width; - } + RenderRightTriangleLower>(dst, dstPitch, src, tbl); + RenderSquareUpperHalf(dst, dstPitch, src, tbl); } -template -DVL_ATTRIBUTE_HOT void RenderRightTrapezoidClipVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTrapezoidClipVertical(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { - const auto clipY = CalculateDiamondClipY(clip); - const auto lowerMax = LowerHeight - clipY.lowerTop; - src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom); - for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch, --mask) { - const auto width = XStep * i; - RenderLine(dst, src, width, tbl, *mask); - src += width + 2 * (i % 2); - } + int8_t prefix = InitPrefix(clip.bottom); + const DiamondClipY clipY = CalculateDiamondClipY(clip); + RenderRightTriangleLowerClipVertical(prefix, clipY, dst, dstPitch, src, tbl); src += clipY.upperBottom * Width; const auto upperMax = TrapezoidUpperHeight - clipY.upperTop; - for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch, --mask) { - RenderLine(dst, src, Width, tbl, *mask); + for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch) { + RenderLine(dst, src, Width, tbl, prefix); src += Width; + prefix += PrefixIncrement; } } -template -DVL_ATTRIBUTE_HOT void RenderRightTrapezoidClipLeftAndVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTrapezoidClipLeftAndVertical(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { - const auto clipY = CalculateDiamondClipY(clip); - const auto clipLeft = clip.left; - const auto lowerMax = LowerHeight - clipY.lowerTop; - src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom); - for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch, --mask) { - const auto width = XStep * i; - if (width > clipLeft) - RenderLine(dst, src + clipLeft, width - clipLeft, tbl, (*mask) << clipLeft); - src += width + 2 * (i % 2); - } + int8_t prefix = InitPrefix(clip.bottom); + const DiamondClipY clipY = CalculateDiamondClipY(clip); + const int_fast16_t clipLeft = clip.left; + RenderRightTriangleLowerClipLeftAndVertical(clipLeft, prefix, clipY, dst, dstPitch, src, tbl); src += clipY.upperBottom * Width + clipLeft; const auto upperMax = TrapezoidUpperHeight - clipY.upperTop; - for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch, --mask) { - RenderLine(dst, src, clip.width, tbl, (*mask) << clipLeft); + for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch) { + RenderLine(dst, src, clip.width, tbl, prefix - clipLeft); src += Width; + prefix += PrefixIncrement; } } -template -DVL_ATTRIBUTE_HOT void RenderRightTrapezoidClipRightAndVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTrapezoidClipRightAndVertical(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { - const auto clipY = CalculateDiamondClipY(clip); - const auto clipRight = clip.right; - const auto lowerMax = LowerHeight - clipY.lowerTop; - src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom); - for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch, --mask) { - const auto width = XStep * i; - const auto skip = Width - width < clipRight ? clipRight - (Width - width) : 0; - if (width > skip) - RenderLine(dst, src, width - skip, tbl, *mask); - src += width + 2 * (i % 2); - } + int8_t prefix = InitPrefix(clip.bottom); + const DiamondClipY clipY = CalculateDiamondClipY(clip); + const int_fast16_t clipRight = clip.right; + RenderRightTriangleLowerClipRightAndVertical(clipRight, prefix, clipY, dst, dstPitch, src, tbl); src += clipY.upperBottom * Width; const auto upperMax = TrapezoidUpperHeight - clipY.upperTop; - for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch, --mask) { - RenderLine(dst, src, clip.width, tbl, *mask); + for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch) { + RenderLine(dst, src, clip.width, tbl, prefix); src += Width; + prefix += PrefixIncrement; } } -template -DVL_ATTRIBUTE_HOT void RenderRightTrapezoid(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTrapezoid(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { if (clip.width == Width) { if (clip.height == Height) { - RenderRightTrapezoidFull(dst, dstPitch, src, mask, tbl); + RenderRightTrapezoidFull(dst, dstPitch, src, tbl); } else { - RenderRightTrapezoidClipVertical(dst, dstPitch, src, mask, tbl, clip); + RenderRightTrapezoidClipVertical(dst, dstPitch, src, tbl, clip); } } else if (clip.right == 0) { - RenderRightTrapezoidClipLeftAndVertical(dst, dstPitch, src, mask, tbl, clip); + RenderRightTrapezoidClipLeftAndVertical(dst, dstPitch, src, tbl, clip); } else { - RenderRightTrapezoidClipRightAndVertical(dst, dstPitch, src, mask, tbl, clip); + RenderRightTrapezoidClipRightAndVertical(dst, dstPitch, src, tbl, clip); } } -template -DVL_ATTRIBUTE_HOT void RenderTileType(TileType tile, std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTileType(TileType tile, uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { switch (tile) { case TileType::Square: - RenderSquare(dst, dstPitch, src, mask, tbl, clip); + RenderSquare(dst, dstPitch, src, tbl, clip); break; case TileType::TransparentSquare: - RenderTransparentSquare(dst, dstPitch, src, mask, tbl, clip); + RenderTransparentSquare(dst, dstPitch, src, tbl, clip); break; case TileType::LeftTriangle: - RenderLeftTriangle(dst, dstPitch, src, mask, tbl, clip); + RenderLeftTriangle(dst, dstPitch, src, tbl, clip); break; case TileType::RightTriangle: - RenderRightTriangle(dst, dstPitch, src, mask, tbl, clip); + RenderRightTriangle(dst, dstPitch, src, tbl, clip); break; case TileType::LeftTrapezoid: - RenderLeftTrapezoid(dst, dstPitch, src, mask, tbl, clip); + RenderLeftTrapezoid(dst, dstPitch, src, tbl, clip); break; case TileType::RightTrapezoid: - RenderRightTrapezoid(dst, dstPitch, src, mask, tbl, clip); + RenderRightTrapezoid(dst, dstPitch, src, tbl, clip); break; } } -/** Returns the mask that defines what parts of the tile are opaque. */ -const std::uint32_t *GetMask(TileType tile, uint16_t levelPieceId, ArchType archType, bool transparency, bool foliage) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTransparentSquareDispatch(uint8_t lightTableIndex, uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) +{ + if (lightTableIndex == LightsMax) { + RenderTransparentSquare(dst, dstPitch, src, tbl, clip); + } else if (lightTableIndex == 0) { + RenderTransparentSquare(dst, dstPitch, src, tbl, clip); + } else { + RenderTransparentSquare(dst, dstPitch, src, tbl, clip); + } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidOrTransparentSquare(TileType tile, uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) +{ + switch (tile) { + case TileType::TransparentSquare: + RenderTransparentSquare(dst, dstPitch, src, tbl, clip); + break; + case TileType::LeftTrapezoid: + RenderLeftTrapezoid(dst, dstPitch, src, tbl, clip); + break; + default: + app_fatal("Given mask can only be applied to TransparentSquare or LeftTrapezoid tiles"); + } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTrapezoidOrTransparentSquare(TileType tile, uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) +{ + switch (tile) { + case TileType::TransparentSquare: + RenderTransparentSquare(dst, dstPitch, src, tbl, clip); + break; + case TileType::RightTrapezoid: + RenderRightTrapezoid(dst, dstPitch, src, tbl, clip); + break; + default: + app_fatal("Given mask can only be applied to TransparentSquare or LeftTrapezoid tiles"); + } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidOrTransparentSquareDispatch(uint8_t lightTableIndex, TileType tile, uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) +{ + if (lightTableIndex == LightsMax) { + RenderLeftTrapezoidOrTransparentSquare(tile, dst, dstPitch, src, tbl, clip); + } else if (lightTableIndex == 0) { + RenderLeftTrapezoidOrTransparentSquare(tile, dst, dstPitch, src, tbl, clip); + } else { + RenderLeftTrapezoidOrTransparentSquare(tile, dst, dstPitch, src, tbl, clip); + } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTrapezoidOrTransparentSquareDispatch(uint8_t lightTableIndex, TileType tile, uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) +{ + if (lightTableIndex == LightsMax) { + RenderRightTrapezoidOrTransparentSquare(tile, dst, dstPitch, src, tbl, clip); + } else if (lightTableIndex == 0) { + RenderRightTrapezoidOrTransparentSquare(tile, dst, dstPitch, src, tbl, clip); + } else { + RenderRightTrapezoidOrTransparentSquare(tile, dst, dstPitch, src, tbl, clip); + } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTileDispatch(uint8_t lightTableIndex, TileType tile, uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) +{ + if (lightTableIndex == LightsMax) { + RenderTileType(tile, dst, dstPitch, src, tbl, clip); + } else if (lightTableIndex == 0) { + RenderTileType(tile, dst, dstPitch, src, tbl, clip); + } else { + RenderTileType(tile, dst, dstPitch, src, tbl, clip); + } +} + +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT MaskType GetMask(TileType tile, uint16_t levelPieceId, ArchType archType, bool transparency, bool foliage) { #ifdef _DEBUG if ((SDL_GetModState() & KMOD_ALT) != 0) { - return &SolidMask[TILE_HEIGHT - 1]; + return MaskType::Solid; } #endif if (transparency) { if (archType == ArchType::None) { - return &WallMaskFullyTrasparent[TILE_HEIGHT - 1]; + return MaskType::Transparent; } if (archType == ArchType::Left && tile != TileType::LeftTriangle) { if (TileHasAny(levelPieceId, TileProperties::TransparentLeft)) { - return &LeftMaskTransparent[TILE_HEIGHT - 1]; + return MaskType::Left; } } if (archType == ArchType::Right && tile != TileType::RightTriangle) { if (TileHasAny(levelPieceId, TileProperties::TransparentRight)) { - return &RightMaskTransparent[TILE_HEIGHT - 1]; + return MaskType::Right; } } } else if (archType != ArchType::None && foliage) { if (tile != TileType::TransparentSquare) - return nullptr; + return MaskType::Invalid; if (archType == ArchType::Left) - return &LeftFoliageMask[TILE_HEIGHT - 1]; + return MaskType::LeftFoliage; if (archType == ArchType::Right) - return &RightFoliageMask[TILE_HEIGHT - 1]; + return MaskType::RightFoliage; } - return &SolidMask[TILE_HEIGHT - 1]; + return MaskType::Solid; } // Blit with left and vertical clipping. -void RenderBlackTileClipLeftAndVertical(std::uint8_t *dst, int dstPitch, int sx, DiamondClipY clipY) +void RenderBlackTileClipLeftAndVertical(uint8_t *DVL_RESTRICT dst, int dstPitch, int sx, DiamondClipY clipY) { dst += XStep * (LowerHeight - clipY.lowerBottom - 1); // Lower triangle (drawn bottom to top): @@ -1082,7 +1075,7 @@ void RenderBlackTileClipLeftAndVertical(std::uint8_t *dst, int dstPitch, int sx, } // Blit with right and vertical clipping. -void RenderBlackTileClipRightAndVertical(std::uint8_t *dst, int dstPitch, std::int_fast16_t maxWidth, DiamondClipY clipY) +void RenderBlackTileClipRightAndVertical(uint8_t *DVL_RESTRICT dst, int dstPitch, int_fast16_t maxWidth, DiamondClipY clipY) { dst += XStep * (LowerHeight - clipY.lowerBottom - 1); // Lower triangle (drawn bottom to top): @@ -1108,7 +1101,7 @@ void RenderBlackTileClipRightAndVertical(std::uint8_t *dst, int dstPitch, std::i } // Blit with vertical clipping only. -void RenderBlackTileClipY(std::uint8_t *dst, int dstPitch, DiamondClipY clipY) +void RenderBlackTileClipY(uint8_t *DVL_RESTRICT dst, int dstPitch, DiamondClipY clipY) { dst += XStep * (LowerHeight - clipY.lowerBottom - 1); // Lower triangle (drawn bottom to top): @@ -1125,7 +1118,7 @@ void RenderBlackTileClipY(std::uint8_t *dst, int dstPitch, DiamondClipY clipY) } // Blit a black tile without clipping (must be fully in bounds). -void RenderBlackTileFull(std::uint8_t *dst, int dstPitch) +void RenderBlackTileFull(uint8_t *DVL_RESTRICT dst, int dstPitch) { dst += XStep * (LowerHeight - 1); // Tile is fully in bounds, can use constant loop boundaries. @@ -1142,15 +1135,47 @@ void RenderBlackTileFull(std::uint8_t *dst, int dstPitch) } // namespace +#ifdef DUN_RENDER_STATS +std::unordered_map DunRenderStats; + +string_view TileTypeToString(TileType tileType) +{ + // clang-format off + switch (tileType) { + case TileType::Square: return "Square"; + case TileType::TransparentSquare: return "TransparentSquare"; + case TileType::LeftTriangle: return "LeftTriangle"; + case TileType::RightTriangle: return "RightTriangle"; + case TileType::LeftTrapezoid: return "LeftTrapezoid"; + case TileType::RightTrapezoid: return "RightTrapezoid"; + default: return "???"; + } + // clang-format on +} + +string_view MaskTypeToString(uint8_t maskType) +{ + // clang-format off + switch (static_cast(maskType)) { + case MaskType::Invalid: return "Invalid"; + case MaskType::Solid: return "Solid"; + case MaskType::Transparent: return "Transparent"; + case MaskType::Right: return "Right"; + case MaskType::Left: return "Left"; + case MaskType::RightFoliage: return "RightFoliage"; + case MaskType::LeftFoliage: return "LeftFoliage"; + default: return "???"; + } + // clang-format on +} +#endif + void RenderTile(const Surface &out, Point position, LevelCelBlock levelCelBlock, uint16_t levelPieceId, uint8_t lightTableIndex, ArchType archType, bool transparency, bool foliage) { const TileType tile = levelCelBlock.type(); - const uint32_t *mask = GetMask(tile, levelPieceId, archType, transparency, foliage); - if (mask == nullptr) - return; #ifdef DEBUG_RENDER_OFFSET_X position.x += DEBUG_RENDER_OFFSET_X; @@ -1166,30 +1191,44 @@ void RenderTile(const Surface &out, Point position, if (clip.width <= 0 || clip.height <= 0) return; - const std::uint8_t *tbl = &LightTables[256 * lightTableIndex]; - const auto *pFrameTable = reinterpret_cast(pDungeonCels.get()); - const auto *src = reinterpret_cast(&pDungeonCels[pFrameTable[levelCelBlock.frame()]]); - std::uint8_t *dst = out.at(static_cast(position.x + clip.left), static_cast(position.y - clip.bottom)); + MaskType maskType = GetMask(tile, levelPieceId, archType, transparency, foliage); + const uint8_t *tbl = &LightTables[256 * lightTableIndex]; + const auto *pFrameTable = reinterpret_cast(pDungeonCels.get()); + const auto *src = reinterpret_cast(&pDungeonCels[pFrameTable[levelCelBlock.frame()]]); + uint8_t *dst = out.at(static_cast(position.x + clip.left), static_cast(position.y - clip.bottom)); const auto dstPitch = out.pitch(); - if (mask == &SolidMask[TILE_HEIGHT - 1]) { - if (lightTableIndex == LightsMax) { - RenderTileType(tile, dst, dstPitch, src, mask, tbl, clip); - } else if (lightTableIndex == 0) { - RenderTileType(tile, dst, dstPitch, src, mask, tbl, clip); - } else { - RenderTileType(tile, dst, dstPitch, src, mask, tbl, clip); - } - } else { - mask -= clip.bottom; - if (lightTableIndex == LightsMax) { - RenderTileType(tile, dst, dstPitch, src, mask, tbl, clip); - } else if (lightTableIndex == 0) { - RenderTileType(tile, dst, dstPitch, src, mask, tbl, clip); - } else { - RenderTileType(tile, dst, dstPitch, src, mask, tbl, clip); - } +#ifdef DUN_RENDER_STATS + ++DunRenderStats[DunRenderType { tile, static_cast(maskType) }]; +#endif + + switch (maskType) { + case MaskType::Invalid: + break; + case MaskType::Solid: + RenderTileDispatch(lightTableIndex, tile, dst, dstPitch, src, tbl, clip); + break; + case MaskType::Transparent: + RenderTileDispatch(lightTableIndex, tile, dst, dstPitch, src, tbl, clip); + break; + case MaskType::Left: + RenderLeftTrapezoidOrTransparentSquareDispatch(lightTableIndex, tile, dst, dstPitch, src, tbl, clip); + break; + case MaskType::Right: + RenderRightTrapezoidOrTransparentSquareDispatch(lightTableIndex, tile, dst, dstPitch, src, tbl, clip); + break; + case MaskType::LeftFoliage: + RenderTransparentSquareDispatch(lightTableIndex, dst, dstPitch, src, tbl, clip); + break; + case MaskType::RightFoliage: + RenderTransparentSquareDispatch(lightTableIndex, dst, dstPitch, src, tbl, clip); + break; } + +#ifdef DEBUG_STR + const std::pair debugStr = GetTileDebugStr(tile); + DrawString(out, debugStr.first, Rectangle { Point { position.x + 2, position.y - 29 }, Size { 28, 28 } }, debugStr.second); +#endif } void world_draw_black_tile(const Surface &out, int sx, int sy) @@ -1205,7 +1244,7 @@ void world_draw_black_tile(const Surface &out, int sx, int sy) return; auto clipY = CalculateDiamondClipY(clip); - std::uint8_t *dst = out.at(sx, static_cast(sy - clip.bottom)); + uint8_t *dst = out.at(sx, static_cast(sy - clip.bottom)); if (clip.width == TILE_WIDTH) { if (clip.height == TriangleHeight) { RenderBlackTileFull(dst, out.pitch()); diff --git a/Source/engine/render/dun_render.hpp b/Source/engine/render/dun_render.hpp index c4fe3f5ed..ed6880c62 100644 --- a/Source/engine/render/dun_render.hpp +++ b/Source/engine/render/dun_render.hpp @@ -11,6 +11,11 @@ #include "engine.h" +// #define DUN_RENDER_STATS +#ifdef DUN_RENDER_STATS +#include +#endif + namespace devilution { /** @@ -110,13 +115,35 @@ public: [[nodiscard]] uint16_t frame() const { - return SDL_SwapLE32(data_ & 0xFFF); + return data_ & 0xFFF; } private: uint16_t data_; }; +#ifdef DUN_RENDER_STATS +struct DunRenderType { + TileType tileType; + uint8_t maskType; + bool operator==(const DunRenderType &other) const + { + return tileType == other.tileType && maskType == other.maskType; + } +}; +struct DunRenderTypeHash { + size_t operator()(DunRenderType t) const noexcept + { + return std::hash {}((static_cast(t.tileType) << 1) | t.maskType); + } +}; +extern std::unordered_map DunRenderStats; + +string_view TileTypeToString(TileType tileType); + +string_view MaskTypeToString(uint8_t maskType); +#endif + /** * @brief Blit current world CEL to the given buffer * @param out Target buffer diff --git a/Source/engine/render/scrollrt.cpp b/Source/engine/render/scrollrt.cpp index 842dac99c..0a3489921 100644 --- a/Source/engine/render/scrollrt.cpp +++ b/Source/engine/render/scrollrt.cpp @@ -54,6 +54,10 @@ #include "debug.h" #endif +#ifdef DUN_RENDER_STATS +#include "utils/format_int.hpp" +#endif + namespace devilution { /** @@ -1010,12 +1014,35 @@ void DrawGame(const Surface &fullOut, Point position) } } +#ifdef DUN_RENDER_STATS + DunRenderStats.clear(); +#endif + DrawFloor(out, position, { sx, sy }, rows, columns); DrawTileContent(out, position, { sx, sy }, rows, columns); if (*sgOptions.Graphics.zoom) { Zoom(fullOut.subregionY(0, gnViewportHeight)); } + +#ifdef DUN_RENDER_STATS + std::vector> sortedStats(DunRenderStats.begin(), DunRenderStats.end()); + std::sort(sortedStats.begin(), sortedStats.end(), + [](const std::pair &a, const std::pair &b) { + return a.first.maskType == b.first.maskType + ? static_cast(a.first.tileType) < static_cast(b.first.tileType) + : a.first.maskType < b.first.maskType; + }); + Point pos { 100, 20 }; + for (size_t i = 0; i < sortedStats.size(); ++i) { + const auto &stat = sortedStats[i]; + DrawString(out, StrCat(i, "."), Rectangle(pos, Size { 20, 16 }), UiFlags::AlignRight); + DrawString(out, MaskTypeToString(stat.first.maskType), { pos.x + 24, pos.y }); + DrawString(out, TileTypeToString(stat.first.tileType), { pos.x + 184, pos.y }); + DrawString(out, FormatInteger(stat.second), Rectangle({ pos.x + 354, pos.y }, Size(40, 16)), UiFlags::AlignRight); + pos.y += 16; + } +#endif } /** diff --git a/Source/utils/attributes.h b/Source/utils/attributes.h index cc27a0215..682d1ce31 100644 --- a/Source/utils/attributes.h +++ b/Source/utils/attributes.h @@ -56,3 +56,9 @@ #else #define DVL_EXCEPTIONS 1 #endif + +#if defined(_MSC_VER) +#define DVL_RESTRICT __restrict +#else +#define DVL_RESTRICT __restrict__ +#endif