diff --git a/Source/engine/render/dun_render.cpp b/Source/engine/render/dun_render.cpp index a8bf57946..b32bcdede 100644 --- a/Source/engine/render/dun_render.cpp +++ b/Source/engine/render/dun_render.cpp @@ -3,6 +3,13 @@ * * Implementation of functionality for rendering the level tiles. */ + +// Debugging variables +// #define DEBUG_STR +// #define DEBUG_RENDER_COLOR +// #define DEBUG_RENDER_OFFSET_X 5 +// #define DEBUG_RENDER_OFFSET_Y 5 + #include "engine/render/dun_render.hpp" #include @@ -10,47 +17,66 @@ #include #include "lighting.h" +#include "utils/stdcompat/algorithm.hpp" #ifdef _DEBUG #include "miniwin/misc_msg.h" #endif #include "options.h" #include "utils/attributes.h" +#ifdef DEBUG_STR +#include "engine/render/text_render.hpp" +#endif +#if defined(DEBUG_STR) || defined(DUN_RENDER_STATS) +#include "utils/str_cat.hpp" +#endif namespace devilution { namespace { /** Width of a tile rendering primitive. */ -constexpr std::int_fast16_t Width = TILE_WIDTH / 2; +constexpr int_fast16_t Width = TILE_WIDTH / 2; /** Height of a tile rendering primitive (except triangles). */ -constexpr std::int_fast16_t Height = TILE_HEIGHT; +constexpr int_fast16_t Height = TILE_HEIGHT; /** Height of the lower triangle of a triangular or a trapezoid tile. */ -constexpr std::int_fast16_t LowerHeight = TILE_HEIGHT / 2; +constexpr int_fast16_t LowerHeight = TILE_HEIGHT / 2; /** Height of the upper triangle of a triangular tile. */ -constexpr std::int_fast16_t TriangleUpperHeight = TILE_HEIGHT / 2 - 1; +constexpr int_fast16_t TriangleUpperHeight = TILE_HEIGHT / 2 - 1; /** Height of the upper rectangle of a trapezoid tile. */ -constexpr std::int_fast16_t TrapezoidUpperHeight = TILE_HEIGHT / 2; +constexpr int_fast16_t TrapezoidUpperHeight = TILE_HEIGHT / 2; -constexpr std::int_fast16_t TriangleHeight = LowerHeight + TriangleUpperHeight; +constexpr int_fast16_t TriangleHeight = LowerHeight + TriangleUpperHeight; /** For triangles, for each pixel drawn vertically, this many pixels are drawn horizontally. */ -constexpr std::int_fast16_t XStep = 2; +constexpr int_fast16_t XStep = 2; -std::int_fast16_t GetTileHeight(TileType tile) +int_fast16_t GetTileHeight(TileType tile) { if (tile == TileType::LeftTriangle || tile == TileType::RightTriangle) return TriangleHeight; return Height; } -// Debugging variables -// #define DEBUG_RENDER_COLOR -// #define DEBUG_RENDER_OFFSET_X 5 -// #define DEBUG_RENDER_OFFSET_Y 5 +#ifdef DEBUG_STR +std::pair GetTileDebugStr(TileType tile) +{ + // clang-format off + switch (tile) { + case TileType::Square: return {"S", UiFlags::AlignCenter | UiFlags::VerticalCenter}; + case TileType::TransparentSquare: return {"T", UiFlags::AlignCenter | UiFlags::VerticalCenter}; + case TileType::LeftTriangle: return {"<", UiFlags::AlignRight | UiFlags::VerticalCenter}; + case TileType::RightTriangle: return {">", UiFlags::VerticalCenter}; + case TileType::LeftTrapezoid: return {"\\", UiFlags::AlignCenter}; + case TileType::RightTrapezoid: return {"/", UiFlags::AlignCenter}; + default: return {"", {}}; + } + // clang-format on +} +#endif #ifdef DEBUG_RENDER_COLOR int DBGCOLOR = 0; @@ -67,224 +93,67 @@ int GetTileDebugColor(TileType tile) case TileType::RightTrapezoid: return PAL16_BLUE + 5; default: return 0; } - // clang-format on } #endif // DEBUG_RENDER_COLOR -/** Fully transparent variant of WallMask. */ -const std::uint32_t WallMaskFullyTrasparent[TILE_HEIGHT] = { - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000 -}; -/** Transparent variant of RightMask. */ -const std::uint32_t RightMaskTransparent[TILE_HEIGHT] = { - 0xC0000000, - 0xF0000000, - 0xFC000000, - 0xFF000000, - 0xFFC00000, - 0xFFF00000, - 0xFFFC0000, - 0xFFFF0000, - 0xFFFFC000, - 0xFFFFF000, - 0xFFFFFC00, - 0xFFFFFF00, - 0xFFFFFFC0, - 0xFFFFFFF0, - 0xFFFFFFFC, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF -}; -/** Transparent variant of LeftMask. */ -const std::uint32_t LeftMaskTransparent[TILE_HEIGHT] = { - 0x00000003, - 0x0000000F, - 0x0000003F, - 0x000000FF, - 0x000003FF, - 0x00000FFF, - 0x00003FFF, - 0x0000FFFF, - 0x0003FFFF, - 0x000FFFFF, - 0x003FFFFF, - 0x00FFFFFF, - 0x03FFFFFF, - 0x0FFFFFFF, - 0x3FFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF -}; -/** Fully opaque mask */ -const std::uint32_t SolidMask[TILE_HEIGHT] = { - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF, - 0xFFFFFFFF -}; -/** Used to mask out the left half of the tile diamond and only render additional content */ -const std::uint32_t RightFoliageMask[TILE_HEIGHT] = { - 0xFFFFFFFF, - 0x3FFFFFFF, - 0x0FFFFFFF, - 0x03FFFFFF, - 0x00FFFFFF, - 0x003FFFFF, - 0x000FFFFF, - 0x0003FFFF, - 0x0000FFFF, - 0x00003FFF, - 0x00000FFF, - 0x000003FF, - 0x000000FF, - 0x0000003F, - 0x0000000F, - 0x00000003, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, -}; -/** Used to mask out the left half of the tile diamond and only render additional content */ -const std::uint32_t LeftFoliageMask[TILE_HEIGHT] = { - 0xFFFFFFFF, - 0xFFFFFFFC, - 0xFFFFFFF0, - 0xFFFFFFC0, - 0xFFFFFF00, - 0xFFFFFC00, - 0xFFFFF000, - 0xFFFFC000, - 0xFFFF0000, - 0xFFFC0000, - 0xFFF00000, - 0xFFC00000, - 0xFF000000, - 0xFC000000, - 0xF0000000, - 0xC0000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, -}; +// Masks are defined by 2 template variables: +// +// 1. `OpaquePrefix`: Whether the line starts with opaque pixels +// followed by blended pixels or the other way around. +// 2. `PrefixIncrement`: The change to the prefix when going +// up 1 line. +// +// The Left mask can only be applied to LeftTrapezoid and TransparentSquare. +// The Right mask can only be applied to RightTrapezoid and TransparentSquare. +// The Left/RightFoliage masks can only be applied to TransparentSquare. + +// True if the given OpaquePrefix and PrefixIncrement represent foliage. +// For foliage, we skip transparent pixels instead of blending them. +template +constexpr bool IsFoliage = PrefixIncrement != 0 && (OpaquePrefix == (PrefixIncrement > 0)); + +// True for foliage: +template +constexpr bool SkipTransparentPixels = IsFoliage; + +// True if the entire lower half of the mask is transparent. +// True for Transparent, LeftFoliage, and RightFoliage. +template +constexpr bool LowerHalfTransparent = (OpaquePrefix == (PrefixIncrement >= 0)); + +// The initial value for the prefix: +template +DVL_ALWAYS_INLINE int8_t InitPrefix() +{ + return PrefixIncrement >= 0 ? -32 : 64; +} + +// The initial value for the prefix at y-th line (counting from the bottom). +template +DVL_ALWAYS_INLINE int8_t InitPrefix(int8_t y) +{ + return InitPrefix() + PrefixIncrement * y; +} -enum class TransparencyType : uint8_t { +#ifdef DEBUG_STR +template +std::string prefixDebugString(int8_t prefix) { + std::string out(32, OpaquePrefix ? '0' : '1'); + const uint8_t clamped = clamp(prefix, 0, 32); + out.replace(0, clamped, clamped, OpaquePrefix ? '1' : '0'); + StrAppend(out, " prefix=", prefix, " OpaquePrefix=", OpaquePrefix, " PrefixIncrement=", PrefixIncrement); + return out; +} +#endif + +enum class MaskType { + Invalid, Solid, - Blended, + Transparent, + Right, + Left, + RightFoliage, + LeftFoliage, }; enum class LightType : uint8_t { @@ -294,7 +163,7 @@ enum class LightType : uint8_t { }; template -DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineOpaque(std::uint8_t *dst, const std::uint8_t *src, std::uint_fast8_t n, const std::uint8_t *tbl) +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineOpaque(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t n, const uint8_t *DVL_RESTRICT tbl) { if (Light == LightType::FullyDark) { memset(dst, 0, n); @@ -306,8 +175,8 @@ DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineOpaque(std::uint8_t *dst, con #endif } else { // Partially lit #ifndef DEBUG_RENDER_COLOR - for (size_t i = 0; i < n; i++) { - dst[i] = tbl[src[i]]; + while (n-- != 0) { + *dst++ = tbl[*src++]; } #else memset(dst, tbl[DBGCOLOR], n); @@ -316,72 +185,78 @@ DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineOpaque(std::uint8_t *dst, con } template -DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineBlended(std::uint8_t *dst, const std::uint8_t *src, std::uint_fast8_t n, const std::uint8_t *tbl, std::uint32_t mask) +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineTransparent(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t n, const uint8_t *DVL_RESTRICT tbl) { #ifndef DEBUG_RENDER_COLOR if (Light == LightType::FullyDark) { - for (size_t i = 0; i < n; i++, mask <<= 1) { - if ((mask & 0x80000000) != 0) - dst[i] = 0; - else - dst[i] = paletteTransparencyLookup[0][dst[i]]; + while (n-- != 0) { + *dst = paletteTransparencyLookup[0][*dst]; + ++dst; } } else if (Light == LightType::FullyLit) { - for (size_t i = 0; i < n; i++, mask <<= 1) { - if ((mask & 0x80000000) != 0) - dst[i] = src[i]; - else - dst[i] = paletteTransparencyLookup[dst[i]][src[i]]; + while (n-- != 0) { + *dst = paletteTransparencyLookup[*dst][*src]; + ++dst; + ++src; } } else { // Partially lit - for (size_t i = 0; i < n; i++, mask <<= 1) { - if ((mask & 0x80000000) != 0) - dst[i] = tbl[src[i]]; - else - dst[i] = paletteTransparencyLookup[dst[i]][tbl[src[i]]]; + while (n-- != 0) { + *dst = paletteTransparencyLookup[*dst][tbl[*src]]; + ++dst; + ++src; } } #else - for (size_t i = 0; i < n; i++, mask <<= 1) { - if ((mask & 0x80000000) != 0) - dst[i] = tbl[DBGCOLOR]; - else - dst[i] = paletteTransparencyLookup[dst[i]][tbl[DBGCOLOR]]; + for (size_t i = 0; i < n; i++) { + dst[i] = paletteTransparencyLookup[dst[i]][tbl[DBGCOLOR + 4]]; } #endif } -template -DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLine(std::uint8_t *dst, const std::uint8_t *src, std::uint_fast8_t n, const std::uint8_t *tbl, std::uint32_t mask) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineTransparentOrOpaque(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t width, const uint8_t *DVL_RESTRICT tbl) { - if (Transparency == TransparencyType::Solid) { - RenderLineOpaque(dst, src, n, tbl); + if (Transparent) { + RenderLineTransparent(dst, src, width, tbl); } else { - // The number of iterations is limited by the size of the mask. - // So we can limit it by ANDing the mask with another mask that only keeps - // iterations that are lower than n. We can now avoid testing if i < n - // at every loop iteration. - assert(n != 0 && n <= sizeof(std::uint32_t) * CHAR_BIT); - const std::uint32_t firstNOnes = std::uint32_t(-1) << ((sizeof(std::uint32_t) * CHAR_BIT) - n); - mask &= firstNOnes; - if (mask == firstNOnes) { - RenderLineOpaque(dst, src, n, tbl); - } else if (Transparency == TransparencyType::Blended) { - RenderLineBlended(dst, src, n, tbl, mask); - } + RenderLineOpaque(dst, src, width, tbl); + } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineTransparentAndOpaque(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t prefixWidth, uint_fast8_t width, const uint8_t *DVL_RESTRICT tbl) +{ + if (OpaquePrefix) { + RenderLineOpaque(dst, src, prefixWidth, tbl); + if (!SkipTransparentPixels) + RenderLineTransparent(dst + prefixWidth, src + prefixWidth, width - prefixWidth, tbl); + } else { + if (!SkipTransparentPixels) + RenderLineTransparent(dst, src, prefixWidth, tbl); + RenderLineOpaque(dst + prefixWidth, src + prefixWidth, width - prefixWidth, tbl); + } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLine(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t n, const uint8_t *DVL_RESTRICT tbl, int8_t prefix) +{ + if (PrefixIncrement == 0) { + RenderLineTransparentOrOpaque(dst, src, n, tbl); + } else { + RenderLineTransparentAndOpaque(dst, src, clamp(prefix, 0, n), n, tbl); } } struct Clip { - std::int_fast16_t top; - std::int_fast16_t bottom; - std::int_fast16_t left; - std::int_fast16_t right; - std::int_fast16_t width; - std::int_fast16_t height; + int_fast16_t top; + int_fast16_t bottom; + int_fast16_t left; + int_fast16_t right; + int_fast16_t width; + int_fast16_t height; }; -Clip CalculateClip(std::int_fast16_t x, std::int_fast16_t y, std::int_fast16_t w, std::int_fast16_t h, const Surface &out) +DVL_ALWAYS_INLINE Clip CalculateClip(int_fast16_t x, int_fast16_t y, int_fast16_t w, int_fast16_t h, const Surface &out) { Clip clip; clip.top = y + 1 < h ? h - (y + 1) : 0; @@ -393,64 +268,90 @@ Clip CalculateClip(std::int_fast16_t x, std::int_fast16_t y, std::int_fast16_t w return clip; } -template -DVL_ATTRIBUTE_HOT void RenderSquareFull(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl) -{ - for (auto i = 0; i < Height; ++i, dst -= dstPitch, --mask) { - RenderLine(dst, src, Width, tbl, *mask); +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderSquareLowerHalf(uint8_t *DVL_RESTRICT &dst, int dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl) { + for (auto i = 0; i < LowerHeight; ++i, dst -= dstPitch) { + RenderLineTransparentOrOpaque(dst, src, Width, tbl); src += Width; } } -template -DVL_ATTRIBUTE_HOT void RenderSquareClipped(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderSquareUpperHalf(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl) { + uint_fast8_t prefixWidth = PrefixIncrement < 0 ? 32 : 0; + for (auto i = 0; i < TrapezoidUpperHeight; ++i, dst -= dstPitch) { + RenderLineTransparentAndOpaque(dst, src, prefixWidth, Width, tbl); + if (PrefixIncrement != 0) + prefixWidth += PrefixIncrement; + src += Width; + } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderSquareFull(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl) { + if (PrefixIncrement == 0) { + // Fast path for MaskType::Solid and MaskType::Transparent + for (auto i = 0; i < Height; ++i, dst -= dstPitch) { + RenderLineTransparentOrOpaque(dst, src, Width, tbl); + src += Width; + } + } else { + RenderSquareLowerHalf>(dst, dstPitch, src, tbl); + RenderSquareUpperHalf(dst, dstPitch, src, tbl); + } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderSquareClipped(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) +{ + int8_t prefix = InitPrefix(clip.bottom); src += clip.bottom * Height + clip.left; - for (auto i = 0; i < clip.height; ++i, dst -= dstPitch, --mask) { - RenderLine(dst, src, clip.width, tbl, (*mask) << clip.left); + for (auto i = 0; i < clip.height; ++i, dst -= dstPitch) { + RenderLine(dst, src, clip.width, tbl, prefix - (clip.left)); src += Width; + prefix += PrefixIncrement; } } -template -DVL_ATTRIBUTE_HOT void RenderSquare(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderSquare(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { if (clip.width == Width && clip.height == Height) { - RenderSquareFull(dst, dstPitch, src, mask, tbl); + RenderSquareFull(dst, dstPitch, src, tbl); } else { - RenderSquareClipped(dst, dstPitch, src, mask, tbl, clip); + RenderSquareClipped(dst, dstPitch, src, tbl, clip); } } -template -DVL_ATTRIBUTE_HOT void RenderTransparentSquareFull(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTransparentSquareFull(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl) { - for (auto i = 0; i < Height; ++i, dst -= dstPitch + Width, --mask) { - constexpr unsigned MaxMaskShift = 32; - std::uint_fast8_t drawWidth = Width; - std::uint32_t m = *mask; + int8_t prefix = InitPrefix(); + for (auto i = 0; i < Height; ++i, dst -= dstPitch + Width) { + uint_fast8_t drawWidth = Width; while (drawWidth > 0) { - auto v = static_cast(*src++); + auto v = static_cast(*src++); if (v > 0) { - RenderLine(dst, src, v, tbl, m); + RenderLine(dst, src, v, tbl, prefix - (Width - drawWidth)); src += v; } else { v = -v; } dst += v; drawWidth -= v; - m = (v == MaxMaskShift) ? 0 : (m << v); } + prefix += PrefixIncrement; } } -template +template // NOLINTNEXTLINE(readability-function-cognitive-complexity): Actually complex and has to be fast. -DVL_ATTRIBUTE_HOT void RenderTransparentSquareClipped(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTransparentSquareClipped(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { - const auto skipRestOfTheLine = [&src](std::int_fast16_t remainingWidth) { + const auto skipRestOfTheLine = [&src](int_fast16_t remainingWidth) { while (remainingWidth > 0) { - const auto v = static_cast(*src++); + const auto v = static_cast(*src++); if (v > 0) { src += v; remainingWidth -= v; @@ -466,20 +367,19 @@ DVL_ATTRIBUTE_HOT void RenderTransparentSquareClipped(std::uint8_t *dst, int dst skipRestOfTheLine(Width); } - for (auto i = 0; i < clip.height; ++i, dst -= dstPitch + clip.width, --mask) { - constexpr unsigned MaxMaskShift = 32; + int8_t prefix = InitPrefix(clip.bottom); + for (auto i = 0; i < clip.height; ++i, dst -= dstPitch + clip.width) { auto drawWidth = clip.width; - std::uint32_t m = *mask; // Skip initial src if clipping on the left. // Handles overshoot, i.e. when the RLE segment goes into the unclipped area. auto remainingLeftClip = clip.left; while (remainingLeftClip > 0) { - auto v = static_cast(*src++); + auto v = static_cast(*src++); if (v > 0) { if (v > remainingLeftClip) { const auto overshoot = v - remainingLeftClip; - RenderLine(dst, src + remainingLeftClip, overshoot, tbl, m); + RenderLine(dst, src + remainingLeftClip, overshoot, tbl, prefix - (Width - remainingLeftClip)); dst += overshoot; drawWidth -= overshoot; } @@ -493,21 +393,20 @@ DVL_ATTRIBUTE_HOT void RenderTransparentSquareClipped(std::uint8_t *dst, int dst } } remainingLeftClip -= v; - m = (v == MaxMaskShift) ? 0 : (m << v); } // Draw the non-clipped segment while (drawWidth > 0) { - auto v = static_cast(*src++); + auto v = static_cast(*src++); if (v > 0) { if (v > drawWidth) { - RenderLine(dst, src, drawWidth, tbl, m); + RenderLine(dst, src, drawWidth, tbl, prefix - (Width - drawWidth)); src += v; dst += drawWidth; drawWidth -= v; break; } - RenderLine(dst, src, v, tbl, m); + RenderLine(dst, src, v, tbl, prefix - (Width - drawWidth)); src += v; } else { v = -v; @@ -519,35 +418,35 @@ DVL_ATTRIBUTE_HOT void RenderTransparentSquareClipped(std::uint8_t *dst, int dst } dst += v; drawWidth -= v; - m = (v == MaxMaskShift) ? 0 : (m << v); } // Skip the rest of src line if clipping on the right assert(drawWidth <= 0); skipRestOfTheLine(clip.right + drawWidth); + prefix += PrefixIncrement; } } -template -DVL_ATTRIBUTE_HOT void RenderTransparentSquare(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTransparentSquare(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { if (clip.width == Width && clip.height == Height) { - RenderTransparentSquareFull(dst, dstPitch, src, mask, tbl); + RenderTransparentSquareFull(dst, dstPitch, src, tbl); } else { - RenderTransparentSquareClipped(dst, dstPitch, src, mask, tbl, clip); + RenderTransparentSquareClipped(dst, dstPitch, src, tbl, clip); } } /** Vertical clip for the lower and upper triangles of a diamond tile (L/RTRIANGLE).*/ struct DiamondClipY { - std::int_fast16_t lowerBottom; - std::int_fast16_t lowerTop; - std::int_fast16_t upperBottom; - std::int_fast16_t upperTop; + int_fast16_t lowerBottom; + int_fast16_t lowerTop; + int_fast16_t upperBottom; + int_fast16_t upperTop; }; -template -DiamondClipY CalculateDiamondClipY(const Clip &clip) +template +DVL_ALWAYS_INLINE DiamondClipY CalculateDiamondClipY(const Clip &clip) { DiamondClipY result; if (clip.bottom > LowerHeight) { @@ -566,492 +465,586 @@ DiamondClipY CalculateDiamondClipY(const Clip &clip) return result; } -std::size_t CalculateTriangleSourceSkipLowerBottom(std::int_fast16_t numLines) +DVL_ALWAYS_INLINE std::size_t CalculateTriangleSourceSkipLowerBottom(int_fast16_t numLines) { return XStep * numLines * (numLines + 1) / 2 + 2 * ((numLines + 1) / 2); } -std::size_t CalculateTriangleSourceSkipUpperBottom(std::int_fast16_t numLines) +DVL_ALWAYS_INLINE std::size_t CalculateTriangleSourceSkipUpperBottom(int_fast16_t numLines) { return 2 * TriangleUpperHeight * numLines - numLines * (numLines - 1) + 2 * ((numLines + 1) / 2); } -template -DVL_ATTRIBUTE_HOT void RenderLeftTriangleFull(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleLower(uint8_t *DVL_RESTRICT &dst, int dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl) { dst += XStep * (LowerHeight - 1); - for (auto i = 1; i <= LowerHeight; ++i, dst -= dstPitch + XStep, --mask) { + for (auto i = 1; i <= LowerHeight; ++i, dst -= dstPitch + XStep) { src += 2 * (i % 2); const auto width = XStep * i; - RenderLine(dst, src, width, tbl, *mask); - src += width; - } - dst += 2 * XStep; - for (auto i = 1; i <= TriangleUpperHeight; ++i, dst -= dstPitch - XStep, --mask) { - src += 2 * (i % 2); - const auto width = Width - XStep * i; - RenderLine(dst, src, width, tbl, *mask); + RenderLineTransparentOrOpaque(dst, src, width, tbl); src += width; } } -template -DVL_ATTRIBUTE_HOT void RenderLeftTriangleClipVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleLowerClipVertical(int8_t &prefix, const DiamondClipY &clipY, uint8_t *DVL_RESTRICT &dst, int dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl) { - const auto clipY = CalculateDiamondClipY(clip); src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom); dst += XStep * (LowerHeight - clipY.lowerBottom - 1); const auto lowerMax = LowerHeight - clipY.lowerTop; - for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch + XStep, --mask) { + for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch + XStep) { src += 2 * (i % 2); const auto width = XStep * i; - RenderLine(dst, src, width, tbl, *mask); - src += width; - } - src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom); - dst += 2 * XStep + XStep * clipY.upperBottom; - const auto upperMax = TriangleUpperHeight - clipY.upperTop; - for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch - XStep, --mask) { - src += 2 * (i % 2); - const auto width = Width - XStep * i; - RenderLine(dst, src, width, tbl, *mask); + RenderLine(dst, src, width, tbl, prefix); src += width; + prefix += PrefixIncrement; } } -template -DVL_ATTRIBUTE_HOT void RenderLeftTriangleClipLeftAndVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleLowerClipLeftAndVertical(int_fast16_t clipLeft, int8_t &prefix, const DiamondClipY &clipY, uint8_t *DVL_RESTRICT &dst, int dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl) { - const auto clipY = CalculateDiamondClipY(clip); - const auto clipLeft = clip.left; src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom); dst += XStep * (LowerHeight - clipY.lowerBottom - 1) - clipLeft; const auto lowerMax = LowerHeight - clipY.lowerTop; - for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch + XStep, --mask) { + for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch + XStep) { src += 2 * (i % 2); const auto width = XStep * i; const auto startX = Width - XStep * i; const auto skip = startX < clipLeft ? clipLeft - startX : 0; if (width > skip) - RenderLine(dst + skip, src + skip, width - skip, tbl, (*mask) << skip); + RenderLine(dst + skip, src + skip, width - skip, tbl, prefix - (skip)); src += width; + prefix += PrefixIncrement; } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleLowerClipRightAndVertical(int_fast16_t clipRight, int8_t &prefix, const DiamondClipY &clipY, uint8_t *DVL_RESTRICT &dst, int dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl) +{ + src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom); + dst += XStep * (LowerHeight - clipY.lowerBottom - 1); + const auto lowerMax = LowerHeight - clipY.lowerTop; + for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch + XStep) { + src += 2 * (i % 2); + const auto width = XStep * i; + if (width > clipRight) + RenderLine(dst, src, width - clipRight, tbl, prefix); + src += width; + prefix += PrefixIncrement; + } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleFull(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl) +{ + RenderLeftTriangleLower>(dst, dstPitch, src, tbl); + int8_t prefix = InitPrefix(LowerHeight); + dst += 2 * XStep; + for (auto i = 1; i <= TriangleUpperHeight; ++i, dst -= dstPitch - XStep) { + src += 2 * (i % 2); + const auto width = Width - XStep * i; + RenderLine(dst, src, width, tbl, prefix); + src += width; + prefix += PrefixIncrement; + } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleClipVertical(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) +{ + int8_t prefix = InitPrefix(clip.bottom); + const DiamondClipY clipY = CalculateDiamondClipY(clip); + RenderLeftTriangleLowerClipVertical(prefix, clipY, dst, dstPitch, src, tbl); src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom); dst += 2 * XStep + XStep * clipY.upperBottom; const auto upperMax = TriangleUpperHeight - clipY.upperTop; - for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch - XStep, --mask) { + for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch - XStep) { src += 2 * (i % 2); const auto width = Width - XStep * i; - const auto startX = XStep * i; - const auto skip = startX < clipLeft ? clipLeft - startX : 0; - if (width > skip) - RenderLine(dst + skip, src + skip, width - skip, tbl, (*mask) << skip); + RenderLine(dst, src, width, tbl, prefix); src += width; + prefix += PrefixIncrement; } } -template -DVL_ATTRIBUTE_HOT void RenderLeftTriangleClipRightAndVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleClipLeftAndVertical(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { - const auto clipY = CalculateDiamondClipY(clip); - const auto clipRight = clip.right; - src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom); - dst += XStep * (LowerHeight - clipY.lowerBottom - 1); - const auto lowerMax = LowerHeight - clipY.lowerTop; - for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch + XStep, --mask) { + int8_t prefix = InitPrefix(clip.bottom); + const DiamondClipY clipY = CalculateDiamondClipY(clip); + const int_fast16_t clipLeft = clip.left; + RenderLeftTriangleLowerClipLeftAndVertical(clipLeft, prefix, clipY, dst, dstPitch, src, tbl); + src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom); + dst += 2 * XStep + XStep * clipY.upperBottom; + const auto upperMax = TriangleUpperHeight - clipY.upperTop; + for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch - XStep) { src += 2 * (i % 2); - const auto width = XStep * i; - if (width > clipRight) - RenderLine(dst, src, width - clipRight, tbl, *mask); + const auto width = Width - XStep * i; + const auto startX = XStep * i; + const auto skip = startX < clipLeft ? clipLeft - startX : 0; + if (width > skip) + RenderLine(dst + skip, src + skip, width - skip, tbl, prefix - (skip)); src += width; + prefix += PrefixIncrement; } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleClipRightAndVertical(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) +{ + int8_t prefix = InitPrefix(clip.bottom); + const DiamondClipY clipY = CalculateDiamondClipY(clip); + const int_fast16_t clipRight = clip.right; + RenderLeftTriangleLowerClipRightAndVertical(clipRight, prefix, clipY, dst, dstPitch, src, tbl); src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom); dst += 2 * XStep + XStep * clipY.upperBottom; const auto upperMax = TriangleUpperHeight - clipY.upperTop; - for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch - XStep, --mask) { + for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch - XStep) { src += 2 * (i % 2); const auto width = Width - XStep * i; if (width <= clipRight) break; - RenderLine(dst, src, width - clipRight, tbl, *mask); + RenderLine(dst, src, width - clipRight, tbl, prefix); src += width; + prefix += PrefixIncrement; } } -template -DVL_ATTRIBUTE_HOT void RenderLeftTriangle(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangle(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { if (clip.width == Width) { if (clip.height == TriangleHeight) { - RenderLeftTriangleFull(dst, dstPitch, src, mask, tbl); + RenderLeftTriangleFull(dst, dstPitch, src, tbl); } else { - RenderLeftTriangleClipVertical(dst, dstPitch, src, mask, tbl, clip); + RenderLeftTriangleClipVertical(dst, dstPitch, src, tbl, clip); } } else if (clip.right == 0) { - RenderLeftTriangleClipLeftAndVertical(dst, dstPitch, src, mask, tbl, clip); + RenderLeftTriangleClipLeftAndVertical(dst, dstPitch, src, tbl, clip); } else { - RenderLeftTriangleClipRightAndVertical(dst, dstPitch, src, mask, tbl, clip); + RenderLeftTriangleClipRightAndVertical(dst, dstPitch, src, tbl, clip); } } -template -DVL_ATTRIBUTE_HOT void RenderRightTriangleFull(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleLower(uint8_t *DVL_RESTRICT &dst, int dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl) { - for (auto i = 1; i <= LowerHeight; ++i, dst -= dstPitch, --mask) { + for (auto i = 1; i <= LowerHeight; ++i, dst -= dstPitch) { const auto width = XStep * i; - RenderLine(dst, src, width, tbl, *mask); - src += width + 2 * (i % 2); - } - for (auto i = 1; i <= TriangleUpperHeight; ++i, dst -= dstPitch, --mask) { - const auto width = Width - XStep * i; - RenderLine(dst, src, width, tbl, *mask); + RenderLineTransparentOrOpaque(dst, src, width, tbl); src += width + 2 * (i % 2); } } -template -DVL_ATTRIBUTE_HOT void RenderRightTriangleClipVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleLowerClipVertical(int8_t &prefix, const DiamondClipY &clipY, uint8_t *DVL_RESTRICT &dst, int dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl) { - const auto clipY = CalculateDiamondClipY(clip); src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom); const auto lowerMax = LowerHeight - clipY.lowerTop; - for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch, --mask) { + for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch) { const auto width = XStep * i; - RenderLine(dst, src, width, tbl, *mask); + RenderLine(dst, src, width, tbl, prefix); src += width + 2 * (i % 2); + prefix += PrefixIncrement; } - src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom); - const auto upperMax = TriangleUpperHeight - clipY.upperTop; - for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch, --mask) { - const auto width = Width - XStep * i; - RenderLine(dst, src, width, tbl, *mask); +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleLowerClipLeftAndVertical(int_fast16_t clipLeft, int8_t &prefix, const DiamondClipY &clipY, uint8_t *DVL_RESTRICT &dst, int dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl) +{ + src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom); + const auto lowerMax = LowerHeight - clipY.lowerTop; + for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch) { + const auto width = XStep * i; + if (width > clipLeft) + RenderLine(dst, src + clipLeft, width - clipLeft, tbl, prefix - clipLeft); src += width + 2 * (i % 2); + prefix += PrefixIncrement; } } -template -DVL_ATTRIBUTE_HOT void RenderRightTriangleClipLeftAndVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleLowerClipRightAndVertical(int_fast16_t clipRight, int8_t &prefix, const DiamondClipY &clipY, uint8_t *DVL_RESTRICT &dst, int dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl) { - const auto clipY = CalculateDiamondClipY(clip); - const auto clipLeft = clip.left; src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom); const auto lowerMax = LowerHeight - clipY.lowerTop; - for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch, --mask) { + for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch) { const auto width = XStep * i; - if (width > clipLeft) - RenderLine(dst, src + clipLeft, width - clipLeft, tbl, (*mask) << clipLeft); + const auto skip = Width - width < clipRight ? clipRight - (Width - width) : 0; + if (width > skip) + RenderLine(dst, src, width - skip, tbl, prefix); + src += width + 2 * (i % 2); + prefix += PrefixIncrement; + } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleFull(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl) +{ + RenderRightTriangleLower>(dst, dstPitch, src, tbl); + int8_t prefix = InitPrefix(LowerHeight); + for (auto i = 1; i <= TriangleUpperHeight; ++i, dst -= dstPitch) { + const auto width = Width - XStep * i; + RenderLine(dst, src, width, tbl, prefix); + src += width + 2 * (i % 2); + prefix += PrefixIncrement; + } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleClipVertical(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) +{ + int8_t prefix = InitPrefix(clip.bottom); + const DiamondClipY clipY = CalculateDiamondClipY(clip); + RenderRightTriangleLowerClipVertical(prefix, clipY, dst, dstPitch, src, tbl); + src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom); + const auto upperMax = TriangleUpperHeight - clipY.upperTop; + for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch) { + const auto width = Width - XStep * i; + RenderLine(dst, src, width, tbl, prefix); src += width + 2 * (i % 2); + prefix += PrefixIncrement; } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleClipLeftAndVertical(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) +{ + int8_t prefix = InitPrefix(clip.bottom); + const DiamondClipY clipY = CalculateDiamondClipY(clip); + const int_fast16_t clipLeft = clip.left; + RenderRightTriangleLowerClipLeftAndVertical(clipLeft, prefix, clipY, dst, dstPitch, src, tbl); src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom); const auto upperMax = TriangleUpperHeight - clipY.upperTop; - for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch, --mask) { + for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch) { const auto width = Width - XStep * i; if (width <= clipLeft) break; - RenderLine(dst, src + clipLeft, width - clipLeft, tbl, (*mask) << clipLeft); + RenderLine(dst, src + clipLeft, width - clipLeft, tbl, prefix - clipLeft); src += width + 2 * (i % 2); + prefix += PrefixIncrement; } } -template -DVL_ATTRIBUTE_HOT void RenderRightTriangleClipRightAndVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleClipRightAndVertical(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { - const auto clipY = CalculateDiamondClipY(clip); - const auto clipRight = clip.right; - src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom); - const auto lowerMax = LowerHeight - clipY.lowerTop; - for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch, --mask) { - const auto width = XStep * i; - const auto skip = Width - width < clipRight ? clipRight - (Width - width) : 0; - if (width > skip) - RenderLine(dst, src, width - skip, tbl, *mask); - src += width + 2 * (i % 2); - } + int8_t prefix = InitPrefix(clip.bottom); + const DiamondClipY clipY = CalculateDiamondClipY(clip); + const int_fast16_t clipRight = clip.right; + RenderRightTriangleLowerClipRightAndVertical(clipRight, prefix, clipY, dst, dstPitch, src, tbl); src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom); const auto upperMax = TriangleUpperHeight - clipY.upperTop; - for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch, --mask) { + for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch) { const auto width = Width - XStep * i; const auto skip = Width - width < clipRight ? clipRight - (Width - width) : 0; if (width > skip) - RenderLine(dst, src, width - skip, tbl, *mask); + RenderLine(dst, src, width - skip, tbl, prefix); src += width + 2 * (i % 2); + prefix += PrefixIncrement; } } -template -DVL_ATTRIBUTE_HOT void RenderRightTriangle(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangle(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { if (clip.width == Width) { if (clip.height == TriangleHeight) { - RenderRightTriangleFull(dst, dstPitch, src, mask, tbl); + RenderRightTriangleFull(dst, dstPitch, src, tbl); } else { - RenderRightTriangleClipVertical(dst, dstPitch, src, mask, tbl, clip); + RenderRightTriangleClipVertical(dst, dstPitch, src, tbl, clip); } } else if (clip.right == 0) { - RenderRightTriangleClipLeftAndVertical(dst, dstPitch, src, mask, tbl, clip); + RenderRightTriangleClipLeftAndVertical(dst, dstPitch, src, tbl, clip); } else { - RenderRightTriangleClipRightAndVertical(dst, dstPitch, src, mask, tbl, clip); + RenderRightTriangleClipRightAndVertical(dst, dstPitch, src, tbl, clip); } } -template -DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidFull(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidFull(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl) { - dst += XStep * (LowerHeight - 1); - for (auto i = 1; i <= LowerHeight; ++i, dst -= dstPitch + XStep, --mask) { - src += 2 * (i % 2); - const auto width = XStep * i; - RenderLine(dst, src, width, tbl, *mask); - src += width; - } + RenderLeftTriangleLower>(dst, dstPitch, src, tbl); dst += XStep; - for (auto i = 1; i <= TrapezoidUpperHeight; ++i, dst -= dstPitch, --mask) { - RenderLine(dst, src, Width, tbl, *mask); - src += Width; - } + RenderSquareUpperHalf(dst, dstPitch, src, tbl); } -template -DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidClipVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidClipVertical(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { - const auto clipY = CalculateDiamondClipY(clip); - src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom); - dst += XStep * (LowerHeight - clipY.lowerBottom - 1); - const auto lowerMax = LowerHeight - clipY.lowerTop; - for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch + XStep, --mask) { - src += 2 * (i % 2); - const auto width = XStep * i; - RenderLine(dst, src, width, tbl, *mask); - src += width; - } + int8_t prefix = InitPrefix(clip.bottom); + const DiamondClipY clipY = CalculateDiamondClipY(clip); + RenderLeftTriangleLowerClipVertical(prefix, clipY, dst, dstPitch, src, tbl); src += clipY.upperBottom * Width; dst += XStep; const auto upperMax = TrapezoidUpperHeight - clipY.upperTop; - for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch, --mask) { - RenderLine(dst, src, Width, tbl, *mask); + for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch) { + RenderLine(dst, src, Width, tbl, prefix); src += Width; + prefix += PrefixIncrement; } } -template -DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidClipLeftAndVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidClipLeftAndVertical(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { - const auto clipY = CalculateDiamondClipY(clip); - const auto clipLeft = clip.left; - src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom); - dst += XStep * (LowerHeight - clipY.lowerBottom - 1) - clipLeft; - const auto lowerMax = LowerHeight - clipY.lowerTop; - for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch + XStep, --mask) { - src += 2 * (i % 2); - const auto width = XStep * i; - const auto startX = Width - XStep * i; - const auto skip = startX < clipLeft ? clipLeft - startX : 0; - if (width > skip) - RenderLine(dst + skip, src + skip, width - skip, tbl, (*mask) << skip); - src += width; - } + int8_t prefix = InitPrefix(clip.bottom); + const DiamondClipY clipY = CalculateDiamondClipY(clip); + const int_fast16_t clipLeft = clip.left; + RenderLeftTriangleLowerClipLeftAndVertical(clipLeft, prefix, clipY, dst, dstPitch, src, tbl); src += clipY.upperBottom * Width + clipLeft; dst += XStep + clipLeft; const auto upperMax = TrapezoidUpperHeight - clipY.upperTop; - for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch, --mask) { - RenderLine(dst, src, clip.width, tbl, (*mask) << clipLeft); + for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch) { + RenderLine(dst, src, clip.width, tbl, prefix - clipLeft); src += Width; + prefix += PrefixIncrement; } } -template -DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidClipRightAndVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidClipRightAndVertical(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { - const auto clipY = CalculateDiamondClipY(clip); - const auto clipRight = clip.right; - src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom); - dst += XStep * (LowerHeight - clipY.lowerBottom - 1); - const auto lowerMax = LowerHeight - clipY.lowerTop; - for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch + XStep, --mask) { - src += 2 * (i % 2); - const auto width = XStep * i; - if (width > clipRight) - RenderLine(dst, src, width - clipRight, tbl, *mask); - src += width; - } + int8_t prefix = InitPrefix(clip.bottom); + const DiamondClipY clipY = CalculateDiamondClipY(clip); + const int_fast16_t clipRight = clip.right; + RenderLeftTriangleLowerClipRightAndVertical(clipRight, prefix, clipY, dst, dstPitch, src, tbl); src += clipY.upperBottom * Width; dst += XStep; const auto upperMax = TrapezoidUpperHeight - clipY.upperTop; - for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch, --mask) { - RenderLine(dst, src, clip.width, tbl, *mask); + for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch) { + RenderLine(dst, src, clip.width, tbl, prefix); src += Width; + prefix += PrefixIncrement; } } -template -DVL_ATTRIBUTE_HOT void RenderLeftTrapezoid(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTrapezoid(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { if (clip.width == Width) { if (clip.height == Height) { - RenderLeftTrapezoidFull(dst, dstPitch, src, mask, tbl); + RenderLeftTrapezoidFull(dst, dstPitch, src, tbl); } else { - RenderLeftTrapezoidClipVertical(dst, dstPitch, src, mask, tbl, clip); + RenderLeftTrapezoidClipVertical(dst, dstPitch, src, tbl, clip); } } else if (clip.right == 0) { - RenderLeftTrapezoidClipLeftAndVertical(dst, dstPitch, src, mask, tbl, clip); + RenderLeftTrapezoidClipLeftAndVertical(dst, dstPitch, src, tbl, clip); } else { - RenderLeftTrapezoidClipRightAndVertical(dst, dstPitch, src, mask, tbl, clip); + RenderLeftTrapezoidClipRightAndVertical(dst, dstPitch, src, tbl, clip); } } -template -DVL_ATTRIBUTE_HOT void RenderRightTrapezoidFull(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTrapezoidFull(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl) { - for (auto i = 1; i <= LowerHeight; ++i, dst -= dstPitch, --mask) { - const auto width = XStep * i; - RenderLine(dst, src, width, tbl, *mask); - src += width + 2 * (i % 2); - } - for (auto i = 1; i <= TrapezoidUpperHeight; ++i, dst -= dstPitch, --mask) { - RenderLine(dst, src, Width, tbl, *mask); - src += Width; - } + RenderRightTriangleLower>(dst, dstPitch, src, tbl); + RenderSquareUpperHalf(dst, dstPitch, src, tbl); } -template -DVL_ATTRIBUTE_HOT void RenderRightTrapezoidClipVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTrapezoidClipVertical(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { - const auto clipY = CalculateDiamondClipY(clip); - const auto lowerMax = LowerHeight - clipY.lowerTop; - src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom); - for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch, --mask) { - const auto width = XStep * i; - RenderLine(dst, src, width, tbl, *mask); - src += width + 2 * (i % 2); - } + int8_t prefix = InitPrefix(clip.bottom); + const DiamondClipY clipY = CalculateDiamondClipY(clip); + RenderRightTriangleLowerClipVertical(prefix, clipY, dst, dstPitch, src, tbl); src += clipY.upperBottom * Width; const auto upperMax = TrapezoidUpperHeight - clipY.upperTop; - for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch, --mask) { - RenderLine(dst, src, Width, tbl, *mask); + for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch) { + RenderLine(dst, src, Width, tbl, prefix); src += Width; + prefix += PrefixIncrement; } } -template -DVL_ATTRIBUTE_HOT void RenderRightTrapezoidClipLeftAndVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTrapezoidClipLeftAndVertical(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { - const auto clipY = CalculateDiamondClipY(clip); - const auto clipLeft = clip.left; - const auto lowerMax = LowerHeight - clipY.lowerTop; - src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom); - for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch, --mask) { - const auto width = XStep * i; - if (width > clipLeft) - RenderLine(dst, src + clipLeft, width - clipLeft, tbl, (*mask) << clipLeft); - src += width + 2 * (i % 2); - } + int8_t prefix = InitPrefix(clip.bottom); + const DiamondClipY clipY = CalculateDiamondClipY(clip); + const int_fast16_t clipLeft = clip.left; + RenderRightTriangleLowerClipLeftAndVertical(clipLeft, prefix, clipY, dst, dstPitch, src, tbl); src += clipY.upperBottom * Width + clipLeft; const auto upperMax = TrapezoidUpperHeight - clipY.upperTop; - for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch, --mask) { - RenderLine(dst, src, clip.width, tbl, (*mask) << clipLeft); + for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch) { + RenderLine(dst, src, clip.width, tbl, prefix - clipLeft); src += Width; + prefix += PrefixIncrement; } } -template -DVL_ATTRIBUTE_HOT void RenderRightTrapezoidClipRightAndVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTrapezoidClipRightAndVertical(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { - const auto clipY = CalculateDiamondClipY(clip); - const auto clipRight = clip.right; - const auto lowerMax = LowerHeight - clipY.lowerTop; - src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom); - for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch, --mask) { - const auto width = XStep * i; - const auto skip = Width - width < clipRight ? clipRight - (Width - width) : 0; - if (width > skip) - RenderLine(dst, src, width - skip, tbl, *mask); - src += width + 2 * (i % 2); - } + int8_t prefix = InitPrefix(clip.bottom); + const DiamondClipY clipY = CalculateDiamondClipY(clip); + const int_fast16_t clipRight = clip.right; + RenderRightTriangleLowerClipRightAndVertical(clipRight, prefix, clipY, dst, dstPitch, src, tbl); src += clipY.upperBottom * Width; const auto upperMax = TrapezoidUpperHeight - clipY.upperTop; - for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch, --mask) { - RenderLine(dst, src, clip.width, tbl, *mask); + for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch) { + RenderLine(dst, src, clip.width, tbl, prefix); src += Width; + prefix += PrefixIncrement; } } -template -DVL_ATTRIBUTE_HOT void RenderRightTrapezoid(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTrapezoid(uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { if (clip.width == Width) { if (clip.height == Height) { - RenderRightTrapezoidFull(dst, dstPitch, src, mask, tbl); + RenderRightTrapezoidFull(dst, dstPitch, src, tbl); } else { - RenderRightTrapezoidClipVertical(dst, dstPitch, src, mask, tbl, clip); + RenderRightTrapezoidClipVertical(dst, dstPitch, src, tbl, clip); } } else if (clip.right == 0) { - RenderRightTrapezoidClipLeftAndVertical(dst, dstPitch, src, mask, tbl, clip); + RenderRightTrapezoidClipLeftAndVertical(dst, dstPitch, src, tbl, clip); } else { - RenderRightTrapezoidClipRightAndVertical(dst, dstPitch, src, mask, tbl, clip); + RenderRightTrapezoidClipRightAndVertical(dst, dstPitch, src, tbl, clip); } } -template -DVL_ATTRIBUTE_HOT void RenderTileType(TileType tile, std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTileType(TileType tile, uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) { switch (tile) { case TileType::Square: - RenderSquare(dst, dstPitch, src, mask, tbl, clip); + RenderSquare(dst, dstPitch, src, tbl, clip); break; case TileType::TransparentSquare: - RenderTransparentSquare(dst, dstPitch, src, mask, tbl, clip); + RenderTransparentSquare(dst, dstPitch, src, tbl, clip); break; case TileType::LeftTriangle: - RenderLeftTriangle(dst, dstPitch, src, mask, tbl, clip); + RenderLeftTriangle(dst, dstPitch, src, tbl, clip); break; case TileType::RightTriangle: - RenderRightTriangle(dst, dstPitch, src, mask, tbl, clip); + RenderRightTriangle(dst, dstPitch, src, tbl, clip); break; case TileType::LeftTrapezoid: - RenderLeftTrapezoid(dst, dstPitch, src, mask, tbl, clip); + RenderLeftTrapezoid(dst, dstPitch, src, tbl, clip); break; case TileType::RightTrapezoid: - RenderRightTrapezoid(dst, dstPitch, src, mask, tbl, clip); + RenderRightTrapezoid(dst, dstPitch, src, tbl, clip); break; } } -/** Returns the mask that defines what parts of the tile are opaque. */ -const std::uint32_t *GetMask(TileType tile, uint16_t levelPieceId, ArchType archType, bool transparency, bool foliage) +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTransparentSquareDispatch(uint8_t lightTableIndex, uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) +{ + if (lightTableIndex == LightsMax) { + RenderTransparentSquare(dst, dstPitch, src, tbl, clip); + } else if (lightTableIndex == 0) { + RenderTransparentSquare(dst, dstPitch, src, tbl, clip); + } else { + RenderTransparentSquare(dst, dstPitch, src, tbl, clip); + } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidOrTransparentSquare(TileType tile, uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) +{ + switch (tile) { + case TileType::TransparentSquare: + RenderTransparentSquare(dst, dstPitch, src, tbl, clip); + break; + case TileType::LeftTrapezoid: + RenderLeftTrapezoid(dst, dstPitch, src, tbl, clip); + break; + default: + app_fatal("Given mask can only be applied to TransparentSquare or LeftTrapezoid tiles"); + } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTrapezoidOrTransparentSquare(TileType tile, uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) +{ + switch (tile) { + case TileType::TransparentSquare: + RenderTransparentSquare(dst, dstPitch, src, tbl, clip); + break; + case TileType::RightTrapezoid: + RenderRightTrapezoid(dst, dstPitch, src, tbl, clip); + break; + default: + app_fatal("Given mask can only be applied to TransparentSquare or LeftTrapezoid tiles"); + } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidOrTransparentSquareDispatch(uint8_t lightTableIndex, TileType tile, uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) +{ + if (lightTableIndex == LightsMax) { + RenderLeftTrapezoidOrTransparentSquare(tile, dst, dstPitch, src, tbl, clip); + } else if (lightTableIndex == 0) { + RenderLeftTrapezoidOrTransparentSquare(tile, dst, dstPitch, src, tbl, clip); + } else { + RenderLeftTrapezoidOrTransparentSquare(tile, dst, dstPitch, src, tbl, clip); + } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTrapezoidOrTransparentSquareDispatch(uint8_t lightTableIndex, TileType tile, uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) +{ + if (lightTableIndex == LightsMax) { + RenderRightTrapezoidOrTransparentSquare(tile, dst, dstPitch, src, tbl, clip); + } else if (lightTableIndex == 0) { + RenderRightTrapezoidOrTransparentSquare(tile, dst, dstPitch, src, tbl, clip); + } else { + RenderRightTrapezoidOrTransparentSquare(tile, dst, dstPitch, src, tbl, clip); + } +} + +template +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTileDispatch(uint8_t lightTableIndex, TileType tile, uint8_t *DVL_RESTRICT dst, int dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip) +{ + if (lightTableIndex == LightsMax) { + RenderTileType(tile, dst, dstPitch, src, tbl, clip); + } else if (lightTableIndex == 0) { + RenderTileType(tile, dst, dstPitch, src, tbl, clip); + } else { + RenderTileType(tile, dst, dstPitch, src, tbl, clip); + } +} + +DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT MaskType GetMask(TileType tile, uint16_t levelPieceId, ArchType archType, bool transparency, bool foliage) { #ifdef _DEBUG if ((SDL_GetModState() & KMOD_ALT) != 0) { - return &SolidMask[TILE_HEIGHT - 1]; + return MaskType::Solid; } #endif if (transparency) { if (archType == ArchType::None) { - return &WallMaskFullyTrasparent[TILE_HEIGHT - 1]; + return MaskType::Transparent; } if (archType == ArchType::Left && tile != TileType::LeftTriangle) { if (TileHasAny(levelPieceId, TileProperties::TransparentLeft)) { - return &LeftMaskTransparent[TILE_HEIGHT - 1]; + return MaskType::Left; } } if (archType == ArchType::Right && tile != TileType::RightTriangle) { if (TileHasAny(levelPieceId, TileProperties::TransparentRight)) { - return &RightMaskTransparent[TILE_HEIGHT - 1]; + return MaskType::Right; } } } else if (archType != ArchType::None && foliage) { if (tile != TileType::TransparentSquare) - return nullptr; + return MaskType::Invalid; if (archType == ArchType::Left) - return &LeftFoliageMask[TILE_HEIGHT - 1]; + return MaskType::LeftFoliage; if (archType == ArchType::Right) - return &RightFoliageMask[TILE_HEIGHT - 1]; + return MaskType::RightFoliage; } - return &SolidMask[TILE_HEIGHT - 1]; + return MaskType::Solid; } // Blit with left and vertical clipping. -void RenderBlackTileClipLeftAndVertical(std::uint8_t *dst, int dstPitch, int sx, DiamondClipY clipY) +void RenderBlackTileClipLeftAndVertical(uint8_t *DVL_RESTRICT dst, int dstPitch, int sx, DiamondClipY clipY) { dst += XStep * (LowerHeight - clipY.lowerBottom - 1); // Lower triangle (drawn bottom to top): @@ -1082,7 +1075,7 @@ void RenderBlackTileClipLeftAndVertical(std::uint8_t *dst, int dstPitch, int sx, } // Blit with right and vertical clipping. -void RenderBlackTileClipRightAndVertical(std::uint8_t *dst, int dstPitch, std::int_fast16_t maxWidth, DiamondClipY clipY) +void RenderBlackTileClipRightAndVertical(uint8_t *DVL_RESTRICT dst, int dstPitch, int_fast16_t maxWidth, DiamondClipY clipY) { dst += XStep * (LowerHeight - clipY.lowerBottom - 1); // Lower triangle (drawn bottom to top): @@ -1108,7 +1101,7 @@ void RenderBlackTileClipRightAndVertical(std::uint8_t *dst, int dstPitch, std::i } // Blit with vertical clipping only. -void RenderBlackTileClipY(std::uint8_t *dst, int dstPitch, DiamondClipY clipY) +void RenderBlackTileClipY(uint8_t *DVL_RESTRICT dst, int dstPitch, DiamondClipY clipY) { dst += XStep * (LowerHeight - clipY.lowerBottom - 1); // Lower triangle (drawn bottom to top): @@ -1125,7 +1118,7 @@ void RenderBlackTileClipY(std::uint8_t *dst, int dstPitch, DiamondClipY clipY) } // Blit a black tile without clipping (must be fully in bounds). -void RenderBlackTileFull(std::uint8_t *dst, int dstPitch) +void RenderBlackTileFull(uint8_t *DVL_RESTRICT dst, int dstPitch) { dst += XStep * (LowerHeight - 1); // Tile is fully in bounds, can use constant loop boundaries. @@ -1142,15 +1135,47 @@ void RenderBlackTileFull(std::uint8_t *dst, int dstPitch) } // namespace +#ifdef DUN_RENDER_STATS +std::unordered_map DunRenderStats; + +string_view TileTypeToString(TileType tileType) +{ + // clang-format off + switch (tileType) { + case TileType::Square: return "Square"; + case TileType::TransparentSquare: return "TransparentSquare"; + case TileType::LeftTriangle: return "LeftTriangle"; + case TileType::RightTriangle: return "RightTriangle"; + case TileType::LeftTrapezoid: return "LeftTrapezoid"; + case TileType::RightTrapezoid: return "RightTrapezoid"; + default: return "???"; + } + // clang-format on +} + +string_view MaskTypeToString(uint8_t maskType) +{ + // clang-format off + switch (static_cast(maskType)) { + case MaskType::Invalid: return "Invalid"; + case MaskType::Solid: return "Solid"; + case MaskType::Transparent: return "Transparent"; + case MaskType::Right: return "Right"; + case MaskType::Left: return "Left"; + case MaskType::RightFoliage: return "RightFoliage"; + case MaskType::LeftFoliage: return "LeftFoliage"; + default: return "???"; + } + // clang-format on +} +#endif + void RenderTile(const Surface &out, Point position, LevelCelBlock levelCelBlock, uint16_t levelPieceId, uint8_t lightTableIndex, ArchType archType, bool transparency, bool foliage) { const TileType tile = levelCelBlock.type(); - const uint32_t *mask = GetMask(tile, levelPieceId, archType, transparency, foliage); - if (mask == nullptr) - return; #ifdef DEBUG_RENDER_OFFSET_X position.x += DEBUG_RENDER_OFFSET_X; @@ -1166,30 +1191,44 @@ void RenderTile(const Surface &out, Point position, if (clip.width <= 0 || clip.height <= 0) return; - const std::uint8_t *tbl = &LightTables[256 * lightTableIndex]; - const auto *pFrameTable = reinterpret_cast(pDungeonCels.get()); - const auto *src = reinterpret_cast(&pDungeonCels[pFrameTable[levelCelBlock.frame()]]); - std::uint8_t *dst = out.at(static_cast(position.x + clip.left), static_cast(position.y - clip.bottom)); + MaskType maskType = GetMask(tile, levelPieceId, archType, transparency, foliage); + const uint8_t *tbl = &LightTables[256 * lightTableIndex]; + const auto *pFrameTable = reinterpret_cast(pDungeonCels.get()); + const auto *src = reinterpret_cast(&pDungeonCels[pFrameTable[levelCelBlock.frame()]]); + uint8_t *dst = out.at(static_cast(position.x + clip.left), static_cast(position.y - clip.bottom)); const auto dstPitch = out.pitch(); - if (mask == &SolidMask[TILE_HEIGHT - 1]) { - if (lightTableIndex == LightsMax) { - RenderTileType(tile, dst, dstPitch, src, mask, tbl, clip); - } else if (lightTableIndex == 0) { - RenderTileType(tile, dst, dstPitch, src, mask, tbl, clip); - } else { - RenderTileType(tile, dst, dstPitch, src, mask, tbl, clip); - } - } else { - mask -= clip.bottom; - if (lightTableIndex == LightsMax) { - RenderTileType(tile, dst, dstPitch, src, mask, tbl, clip); - } else if (lightTableIndex == 0) { - RenderTileType(tile, dst, dstPitch, src, mask, tbl, clip); - } else { - RenderTileType(tile, dst, dstPitch, src, mask, tbl, clip); - } +#ifdef DUN_RENDER_STATS + ++DunRenderStats[DunRenderType { tile, static_cast(maskType) }]; +#endif + + switch (maskType) { + case MaskType::Invalid: + break; + case MaskType::Solid: + RenderTileDispatch(lightTableIndex, tile, dst, dstPitch, src, tbl, clip); + break; + case MaskType::Transparent: + RenderTileDispatch(lightTableIndex, tile, dst, dstPitch, src, tbl, clip); + break; + case MaskType::Left: + RenderLeftTrapezoidOrTransparentSquareDispatch(lightTableIndex, tile, dst, dstPitch, src, tbl, clip); + break; + case MaskType::Right: + RenderRightTrapezoidOrTransparentSquareDispatch(lightTableIndex, tile, dst, dstPitch, src, tbl, clip); + break; + case MaskType::LeftFoliage: + RenderTransparentSquareDispatch(lightTableIndex, dst, dstPitch, src, tbl, clip); + break; + case MaskType::RightFoliage: + RenderTransparentSquareDispatch(lightTableIndex, dst, dstPitch, src, tbl, clip); + break; } + +#ifdef DEBUG_STR + const std::pair debugStr = GetTileDebugStr(tile); + DrawString(out, debugStr.first, Rectangle { Point { position.x + 2, position.y - 29 }, Size { 28, 28 } }, debugStr.second); +#endif } void world_draw_black_tile(const Surface &out, int sx, int sy) @@ -1205,7 +1244,7 @@ void world_draw_black_tile(const Surface &out, int sx, int sy) return; auto clipY = CalculateDiamondClipY(clip); - std::uint8_t *dst = out.at(sx, static_cast(sy - clip.bottom)); + uint8_t *dst = out.at(sx, static_cast(sy - clip.bottom)); if (clip.width == TILE_WIDTH) { if (clip.height == TriangleHeight) { RenderBlackTileFull(dst, out.pitch()); diff --git a/Source/engine/render/dun_render.hpp b/Source/engine/render/dun_render.hpp index c4fe3f5ed..ed6880c62 100644 --- a/Source/engine/render/dun_render.hpp +++ b/Source/engine/render/dun_render.hpp @@ -11,6 +11,11 @@ #include "engine.h" +// #define DUN_RENDER_STATS +#ifdef DUN_RENDER_STATS +#include +#endif + namespace devilution { /** @@ -110,13 +115,35 @@ public: [[nodiscard]] uint16_t frame() const { - return SDL_SwapLE32(data_ & 0xFFF); + return data_ & 0xFFF; } private: uint16_t data_; }; +#ifdef DUN_RENDER_STATS +struct DunRenderType { + TileType tileType; + uint8_t maskType; + bool operator==(const DunRenderType &other) const + { + return tileType == other.tileType && maskType == other.maskType; + } +}; +struct DunRenderTypeHash { + size_t operator()(DunRenderType t) const noexcept + { + return std::hash {}((static_cast(t.tileType) << 1) | t.maskType); + } +}; +extern std::unordered_map DunRenderStats; + +string_view TileTypeToString(TileType tileType); + +string_view MaskTypeToString(uint8_t maskType); +#endif + /** * @brief Blit current world CEL to the given buffer * @param out Target buffer diff --git a/Source/engine/render/scrollrt.cpp b/Source/engine/render/scrollrt.cpp index 842dac99c..0a3489921 100644 --- a/Source/engine/render/scrollrt.cpp +++ b/Source/engine/render/scrollrt.cpp @@ -54,6 +54,10 @@ #include "debug.h" #endif +#ifdef DUN_RENDER_STATS +#include "utils/format_int.hpp" +#endif + namespace devilution { /** @@ -1010,12 +1014,35 @@ void DrawGame(const Surface &fullOut, Point position) } } +#ifdef DUN_RENDER_STATS + DunRenderStats.clear(); +#endif + DrawFloor(out, position, { sx, sy }, rows, columns); DrawTileContent(out, position, { sx, sy }, rows, columns); if (*sgOptions.Graphics.zoom) { Zoom(fullOut.subregionY(0, gnViewportHeight)); } + +#ifdef DUN_RENDER_STATS + std::vector> sortedStats(DunRenderStats.begin(), DunRenderStats.end()); + std::sort(sortedStats.begin(), sortedStats.end(), + [](const std::pair &a, const std::pair &b) { + return a.first.maskType == b.first.maskType + ? static_cast(a.first.tileType) < static_cast(b.first.tileType) + : a.first.maskType < b.first.maskType; + }); + Point pos { 100, 20 }; + for (size_t i = 0; i < sortedStats.size(); ++i) { + const auto &stat = sortedStats[i]; + DrawString(out, StrCat(i, "."), Rectangle(pos, Size { 20, 16 }), UiFlags::AlignRight); + DrawString(out, MaskTypeToString(stat.first.maskType), { pos.x + 24, pos.y }); + DrawString(out, TileTypeToString(stat.first.tileType), { pos.x + 184, pos.y }); + DrawString(out, FormatInteger(stat.second), Rectangle({ pos.x + 354, pos.y }, Size(40, 16)), UiFlags::AlignRight); + pos.y += 16; + } +#endif } /** diff --git a/Source/utils/attributes.h b/Source/utils/attributes.h index cc27a0215..682d1ce31 100644 --- a/Source/utils/attributes.h +++ b/Source/utils/attributes.h @@ -56,3 +56,9 @@ #else #define DVL_EXCEPTIONS 1 #endif + +#if defined(_MSC_VER) +#define DVL_RESTRICT __restrict +#else +#define DVL_RESTRICT __restrict__ +#endif