From fb27ee3907fdb9cea54858aa2771103d13c344f5 Mon Sep 17 00:00:00 2001 From: Gleb Mazovetskiy Date: Wed, 12 May 2021 05:58:03 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=92=A8=20dun=5Frender.cpp:=20Faster=20Ren?= =?UTF-8?q?derLine?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turns RenderLine line branches into template parameters, allowing the compiler to eliminate the branches and also fully inline it. Example FPS change * In dungeon: 1450 -> 1530 * In town: 1655 -> 1700 Also splits RenderLine into 3 functions Easier to read and also gives more useful profiling. Apparently the most time is spent in `RenderLineOpaque`. Also, mark them `inline` because that apparently hints GCC to inline the function (in a later refactoring we can introduce attribute always_inline instead where supported). --- Source/engine/render/dun_render.cpp | 366 +++++++++++++++++----------- 1 file changed, 228 insertions(+), 138 deletions(-) diff --git a/Source/engine/render/dun_render.cpp b/Source/engine/render/dun_render.cpp index 4e9a1a045..8bbf44590 100644 --- a/Source/engine/render/dun_render.cpp +++ b/Source/engine/render/dun_render.cpp @@ -487,79 +487,111 @@ void ForEachSetBit(std::uint32_t mask, const F &f) } } -inline void RenderLine(std::uint8_t *dst, const std::uint8_t *src, size_t n, const std::uint8_t *tbl, std::uint32_t mask) +enum class TransparencyType { + Solid, + Blended, + Stippled, +}; + +enum class LightType { + FullyDark, + PartiallyLit, + FullyLit, +}; + +template +inline void RenderLineOpaque(std::uint8_t *dst, const std::uint8_t *src, std::uint_fast8_t n, const std::uint8_t *tbl) { - // The number of iterations is limited by the size of the mask. - // So we can limit it by ANDing the mask with another mask that only keeps - // iterations that are lower than n. We can now avoid testing if i < n - // at every loop iteration. - assert(n != 0 && n <= sizeof(std::uint32_t) * CHAR_BIT); - const std::uint32_t firstNOnes = std::uint32_t(-1) << ((sizeof(std::uint32_t) * CHAR_BIT) - n); - mask &= firstNOnes; - - if (mask == firstNOnes) { // Opaque line - if (light_table_index == lightmax) { // Complete darkness - memset(dst, 0, n); - } else if (light_table_index == 0) { // Fully lit + if (Light == LightType::FullyDark) { + memset(dst, 0, n); + } else if (Light == LightType::FullyLit) { #ifndef DEBUG_RENDER_COLOR - memcpy(dst, src, n); + memcpy(dst, src, n); #else - memset(dst, DBGCOLOR, n); + memset(dst, DBGCOLOR, n); #endif - } else { // Partially lit + } else { // Partially lit #ifndef DEBUG_RENDER_COLOR - for (size_t i = 0; i < n; i++) { - dst[i] = tbl[src[i]]; - } + for (size_t i = 0; i < n; i++) { + dst[i] = tbl[src[i]]; + } #else - memset(dst, tbl[DBGCOLOR], n); + memset(dst, tbl[DBGCOLOR], n); #endif - } - } else { - if (sgOptions.Graphics.bBlendedTransparancy) { // Blended transparancy + } +} + +template +inline void RenderLineBlended(std::uint8_t *dst, const std::uint8_t *src, std::uint_fast8_t n, const std::uint8_t *tbl, std::uint32_t mask) +{ #ifndef DEBUG_RENDER_COLOR - if (light_table_index == lightmax) { // Complete darkness - for (size_t i = 0; i < n; i++, mask <<= 1) { - if ((mask & 0x80000000) != 0) - dst[i] = 0; - else - dst[i] = paletteTransparencyLookup[0][dst[i]]; - } - } else if (light_table_index == 0) { // Fully lit - for (size_t i = 0; i < n; i++, mask <<= 1) { - if ((mask & 0x80000000) != 0) - dst[i] = src[i]; - else - dst[i] = paletteTransparencyLookup[dst[i]][src[i]]; - } - } else { // Partially lit - for (size_t i = 0; i < n; i++, mask <<= 1) { - if ((mask & 0x80000000) != 0) - dst[i] = tbl[src[i]]; - else - dst[i] = paletteTransparencyLookup[dst[i]][tbl[src[i]]]; - } - } + if (Light == LightType::FullyDark) { + for (size_t i = 0; i < n; i++, mask <<= 1) { + if ((mask & 0x80000000) != 0) + dst[i] = 0; + else + dst[i] = paletteTransparencyLookup[0][dst[i]]; + } + } else if (Light == LightType::FullyLit) { + for (size_t i = 0; i < n; i++, mask <<= 1) { + if ((mask & 0x80000000) != 0) + dst[i] = src[i]; + else + dst[i] = paletteTransparencyLookup[dst[i]][src[i]]; + } + } else { // Partially lit + for (size_t i = 0; i < n; i++, mask <<= 1) { + if ((mask & 0x80000000) != 0) + dst[i] = tbl[src[i]]; + else + dst[i] = paletteTransparencyLookup[dst[i]][tbl[src[i]]]; + } + } #else - for (size_t i = 0; i < n; i++, mask <<= 1) { - if ((mask & 0x80000000) != 0) - dst[i] = tbl[DBGCOLOR]; - else - dst[i] = paletteTransparencyLookup[dst[i]][tbl[DBGCOLOR]]; - } + for (size_t i = 0; i < n; i++, mask <<= 1) { + if ((mask & 0x80000000) != 0) + dst[i] = tbl[DBGCOLOR]; + else + dst[i] = paletteTransparencyLookup[dst[i]][tbl[DBGCOLOR]]; + } #endif - } else { // Stippled transparancy - if (light_table_index == lightmax) { // Complete darkness - ForEachSetBit(mask, [=](int i) { dst[i] = 0; }); - } else if (light_table_index == 0) { // Fully lit +} + +template +inline void RenderLineStippled(std::uint8_t *dst, const std::uint8_t *src, std::uint_fast8_t n, const std::uint8_t *tbl, std::uint32_t mask) +{ + if (Light == LightType::FullyDark) { + ForEachSetBit(mask, [=](int i) { dst[i] = 0; }); + } else if (Light == LightType::FullyLit) { #ifndef DEBUG_RENDER_COLOR - ForEachSetBit(mask, [=](int i) { dst[i] = src[i]; }); + ForEachSetBit(mask, [=](int i) { dst[i] = src[i]; }); #else - ForEachSetBit(mask, [=](int i) { dst[i] = DBGCOLOR; }); + ForEachSetBit(mask, [=](int i) { dst[i] = DBGCOLOR; }); #endif - } else { // Partially lit - ForEachSetBit(mask, [=](int i) { dst[i] = tbl[src[i]]; }); - } + } else { // Partially lit + ForEachSetBit(mask, [=](int i) { dst[i] = tbl[src[i]]; }); + } +} + +template +inline void RenderLine(std::uint8_t *dst, const std::uint8_t *src, std::uint_fast8_t n, const std::uint8_t *tbl, std::uint32_t mask) +{ + if (Transparency == TransparencyType::Solid) { + RenderLineOpaque(dst, src, n, tbl); + } else { + // The number of iterations is limited by the size of the mask. + // So we can limit it by ANDing the mask with another mask that only keeps + // iterations that are lower than n. We can now avoid testing if i < n + // at every loop iteration. + assert(n != 0 && n <= sizeof(std::uint32_t) * CHAR_BIT); + const std::uint32_t firstNOnes = std::uint32_t(-1) << ((sizeof(std::uint32_t) * CHAR_BIT) - n); + mask &= firstNOnes; + if (mask == firstNOnes) { + RenderLineOpaque(dst, src, n, tbl); + } else if (Transparency == TransparencyType::Blended) { + RenderLineBlended(dst, src, n, tbl, mask); + } else { + RenderLineStippled(dst, src, n, tbl, mask); } } } @@ -585,32 +617,36 @@ Clip CalculateClip(std::int_fast16_t x, std::int_fast16_t y, std::int_fast16_t w return clip; } +template void RenderSquareFull(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl) { for (auto i = 0; i < Height; ++i, dst -= dstPitch, --mask) { - RenderLine(dst, src, Width, tbl, *mask); + RenderLine(dst, src, Width, tbl, *mask); src += Width; } } +template void RenderSquareClipped(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) { src += clip.bottom * Height + clip.left; for (auto i = 0; i < clip.height; ++i, dst -= dstPitch, --mask) { - RenderLine(dst, src, clip.width, tbl, (*mask) << clip.left); + RenderLine(dst, src, clip.width, tbl, (*mask) << clip.left); src += Width; } } +template void RenderSquare(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) { if (clip.width == Width && clip.height == Height) { - RenderSquareFull(dst, dstPitch, src, mask, tbl); + RenderSquareFull(dst, dstPitch, src, mask, tbl); } else { - RenderSquareClipped(dst, dstPitch, src, mask, tbl, clip); + RenderSquareClipped(dst, dstPitch, src, mask, tbl, clip); } } +template void RenderTransparentSquareFull(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl) { for (auto i = 0; i < Height; ++i, dst -= dstPitch + Width, --mask) { @@ -620,7 +656,7 @@ void RenderTransparentSquareFull(std::uint8_t *dst, int dstPitch, const std::uin while (drawWidth > 0) { auto v = static_cast(*src++); if (v > 0) { - RenderLine(dst, src, v, tbl, m); + RenderLine(dst, src, v, tbl, m); src += v; } else { v = -v; @@ -632,6 +668,7 @@ void RenderTransparentSquareFull(std::uint8_t *dst, int dstPitch, const std::uin } } +template // NOLINTNEXTLINE(readability-function-cognitive-complexity): Actually complex and has to be fast. void RenderTransparentSquareClipped(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) { @@ -666,7 +703,7 @@ void RenderTransparentSquareClipped(std::uint8_t *dst, int dstPitch, const std:: if (v > 0) { if (v > remainingLeftClip) { const auto overshoot = v - remainingLeftClip; - RenderLine(dst, src + remainingLeftClip, overshoot, tbl, m); + RenderLine(dst, src + remainingLeftClip, overshoot, tbl, m); dst += overshoot; drawWidth -= overshoot; } @@ -688,13 +725,13 @@ void RenderTransparentSquareClipped(std::uint8_t *dst, int dstPitch, const std:: auto v = static_cast(*src++); if (v > 0) { if (v > drawWidth) { - RenderLine(dst, src, drawWidth, tbl, m); + RenderLine(dst, src, drawWidth, tbl, m); src += v; dst += drawWidth; drawWidth -= v; break; } - RenderLine(dst, src, v, tbl, m); + RenderLine(dst, src, v, tbl, m); src += v; } else { v = -v; @@ -715,12 +752,13 @@ void RenderTransparentSquareClipped(std::uint8_t *dst, int dstPitch, const std:: } } +template void RenderTransparentSquare(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) { if (clip.width == Width && clip.height == Height) { - RenderTransparentSquareFull(dst, dstPitch, src, mask, tbl); + RenderTransparentSquareFull(dst, dstPitch, src, mask, tbl); } else { - RenderTransparentSquareClipped(dst, dstPitch, src, mask, tbl, clip); + RenderTransparentSquareClipped(dst, dstPitch, src, mask, tbl, clip); } } @@ -762,24 +800,26 @@ std::size_t CalculateTriangleSourceSkipUpperBottom(std::int_fast16_t numLines) return 2 * TriangleUpperHeight * numLines - numLines * (numLines - 1) + 2 * ((numLines + 1) / 2); } +template void RenderLeftTriangleFull(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl) { dst += XStep * (LowerHeight - 1); for (auto i = 1; i <= LowerHeight; ++i, dst -= dstPitch + XStep, --mask) { src += 2 * (i % 2); const auto width = XStep * i; - RenderLine(dst, src, width, tbl, *mask); + RenderLine(dst, src, width, tbl, *mask); src += width; } dst += 2 * XStep; for (auto i = 1; i <= TriangleUpperHeight; ++i, dst -= dstPitch - XStep, --mask) { src += 2 * (i % 2); const auto width = Width - XStep * i; - RenderLine(dst, src, width, tbl, *mask); + RenderLine(dst, src, width, tbl, *mask); src += width; } } +template void RenderLeftTriangleClipVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) { const auto clipY = CalculateDiamondClipY(clip); @@ -789,7 +829,7 @@ void RenderLeftTriangleClipVertical(std::uint8_t *dst, int dstPitch, const std:: for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch + XStep, --mask) { src += 2 * (i % 2); const auto width = XStep * i; - RenderLine(dst, src, width, tbl, *mask); + RenderLine(dst, src, width, tbl, *mask); src += width; } src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom); @@ -798,11 +838,12 @@ void RenderLeftTriangleClipVertical(std::uint8_t *dst, int dstPitch, const std:: for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch - XStep, --mask) { src += 2 * (i % 2); const auto width = Width - XStep * i; - RenderLine(dst, src, width, tbl, *mask); + RenderLine(dst, src, width, tbl, *mask); src += width; } } +template void RenderLeftTriangleClipLeftAndVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) { const auto clipY = CalculateDiamondClipY(clip); @@ -816,7 +857,7 @@ void RenderLeftTriangleClipLeftAndVertical(std::uint8_t *dst, int dstPitch, cons const auto startX = Width - XStep * i; const auto skip = startX < clipLeft ? clipLeft - startX : 0; if (width > skip) - RenderLine(dst + skip, src + skip, width - skip, tbl, (*mask) << skip); + RenderLine(dst + skip, src + skip, width - skip, tbl, (*mask) << skip); src += width; } src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom); @@ -828,11 +869,12 @@ void RenderLeftTriangleClipLeftAndVertical(std::uint8_t *dst, int dstPitch, cons const auto startX = XStep * i; const auto skip = startX < clipLeft ? clipLeft - startX : 0; if (width > skip) - RenderLine(dst + skip, src + skip, width - skip, tbl, (*mask) << skip); + RenderLine(dst + skip, src + skip, width - skip, tbl, (*mask) << skip); src += width; } } +template void RenderLeftTriangleClipRightAndVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) { const auto clipY = CalculateDiamondClipY(clip); @@ -844,7 +886,7 @@ void RenderLeftTriangleClipRightAndVertical(std::uint8_t *dst, int dstPitch, con src += 2 * (i % 2); const auto width = XStep * i; if (width > clipRight) - RenderLine(dst, src, width - clipRight, tbl, *mask); + RenderLine(dst, src, width - clipRight, tbl, *mask); src += width; } src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom); @@ -855,40 +897,43 @@ void RenderLeftTriangleClipRightAndVertical(std::uint8_t *dst, int dstPitch, con const auto width = Width - XStep * i; if (width <= clipRight) break; - RenderLine(dst, src, width - clipRight, tbl, *mask); + RenderLine(dst, src, width - clipRight, tbl, *mask); src += width; } } +template void RenderLeftTriangle(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) { if (clip.width == Width) { if (clip.height == TriangleHeight) { - RenderLeftTriangleFull(dst, dstPitch, src, mask, tbl); + RenderLeftTriangleFull(dst, dstPitch, src, mask, tbl); } else { - RenderLeftTriangleClipVertical(dst, dstPitch, src, mask, tbl, clip); + RenderLeftTriangleClipVertical(dst, dstPitch, src, mask, tbl, clip); } } else if (clip.right == 0) { - RenderLeftTriangleClipLeftAndVertical(dst, dstPitch, src, mask, tbl, clip); + RenderLeftTriangleClipLeftAndVertical(dst, dstPitch, src, mask, tbl, clip); } else { - RenderLeftTriangleClipRightAndVertical(dst, dstPitch, src, mask, tbl, clip); + RenderLeftTriangleClipRightAndVertical(dst, dstPitch, src, mask, tbl, clip); } } +template void RenderRightTriangleFull(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl) { for (auto i = 1; i <= LowerHeight; ++i, dst -= dstPitch, --mask) { const auto width = XStep * i; - RenderLine(dst, src, width, tbl, *mask); + RenderLine(dst, src, width, tbl, *mask); src += width + 2 * (i % 2); } for (auto i = 1; i <= TriangleUpperHeight; ++i, dst -= dstPitch, --mask) { const auto width = Width - XStep * i; - RenderLine(dst, src, width, tbl, *mask); + RenderLine(dst, src, width, tbl, *mask); src += width + 2 * (i % 2); } } +template void RenderRightTriangleClipVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) { const auto clipY = CalculateDiamondClipY(clip); @@ -896,18 +941,19 @@ void RenderRightTriangleClipVertical(std::uint8_t *dst, int dstPitch, const std: const auto lowerMax = LowerHeight - clipY.lowerTop; for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch, --mask) { const auto width = XStep * i; - RenderLine(dst, src, width, tbl, *mask); + RenderLine(dst, src, width, tbl, *mask); src += width + 2 * (i % 2); } src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom); const auto upperMax = TriangleUpperHeight - clipY.upperTop; for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch, --mask) { const auto width = Width - XStep * i; - RenderLine(dst, src, width, tbl, *mask); + RenderLine(dst, src, width, tbl, *mask); src += width + 2 * (i % 2); } } +template void RenderRightTriangleClipLeftAndVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) { const auto clipY = CalculateDiamondClipY(clip); @@ -917,7 +963,7 @@ void RenderRightTriangleClipLeftAndVertical(std::uint8_t *dst, int dstPitch, con for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch, --mask) { const auto width = XStep * i; if (width > clipLeft) - RenderLine(dst, src + clipLeft, width - clipLeft, tbl, (*mask) << clipLeft); + RenderLine(dst, src + clipLeft, width - clipLeft, tbl, (*mask) << clipLeft); src += width + 2 * (i % 2); } src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom); @@ -926,11 +972,12 @@ void RenderRightTriangleClipLeftAndVertical(std::uint8_t *dst, int dstPitch, con const auto width = Width - XStep * i; if (width <= clipLeft) break; - RenderLine(dst, src + clipLeft, width - clipLeft, tbl, (*mask) << clipLeft); + RenderLine(dst, src + clipLeft, width - clipLeft, tbl, (*mask) << clipLeft); src += width + 2 * (i % 2); } } +template void RenderRightTriangleClipRightAndVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) { const auto clipY = CalculateDiamondClipY(clip); @@ -941,7 +988,7 @@ void RenderRightTriangleClipRightAndVertical(std::uint8_t *dst, int dstPitch, co const auto width = XStep * i; const auto skip = Width - width < clipRight ? clipRight - (Width - width) : 0; if (width > skip) - RenderLine(dst, src, width - skip, tbl, *mask); + RenderLine(dst, src, width - skip, tbl, *mask); src += width + 2 * (i % 2); } src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom); @@ -950,42 +997,45 @@ void RenderRightTriangleClipRightAndVertical(std::uint8_t *dst, int dstPitch, co const auto width = Width - XStep * i; const auto skip = Width - width < clipRight ? clipRight - (Width - width) : 0; if (width > skip) - RenderLine(dst, src, width - skip, tbl, *mask); + RenderLine(dst, src, width - skip, tbl, *mask); src += width + 2 * (i % 2); } } +template void RenderRightTriangle(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) { if (clip.width == Width) { if (clip.height == TriangleHeight) { - RenderRightTriangleFull(dst, dstPitch, src, mask, tbl); + RenderRightTriangleFull(dst, dstPitch, src, mask, tbl); } else { - RenderRightTriangleClipVertical(dst, dstPitch, src, mask, tbl, clip); + RenderRightTriangleClipVertical(dst, dstPitch, src, mask, tbl, clip); } } else if (clip.right == 0) { - RenderRightTriangleClipLeftAndVertical(dst, dstPitch, src, mask, tbl, clip); + RenderRightTriangleClipLeftAndVertical(dst, dstPitch, src, mask, tbl, clip); } else { - RenderRightTriangleClipRightAndVertical(dst, dstPitch, src, mask, tbl, clip); + RenderRightTriangleClipRightAndVertical(dst, dstPitch, src, mask, tbl, clip); } } +template void RenderLeftTrapezoidFull(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl) { dst += XStep * (LowerHeight - 1); for (auto i = 1; i <= LowerHeight; ++i, dst -= dstPitch + XStep, --mask) { src += 2 * (i % 2); const auto width = XStep * i; - RenderLine(dst, src, width, tbl, *mask); + RenderLine(dst, src, width, tbl, *mask); src += width; } dst += XStep; for (auto i = 1; i <= TrapezoidUpperHeight; ++i, dst -= dstPitch, --mask) { - RenderLine(dst, src, Width, tbl, *mask); + RenderLine(dst, src, Width, tbl, *mask); src += Width; } } +template void RenderLeftTrapezoidClipVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) { const auto clipY = CalculateDiamondClipY(clip); @@ -995,18 +1045,19 @@ void RenderLeftTrapezoidClipVertical(std::uint8_t *dst, int dstPitch, const std: for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch + XStep, --mask) { src += 2 * (i % 2); const auto width = XStep * i; - RenderLine(dst, src, width, tbl, *mask); + RenderLine(dst, src, width, tbl, *mask); src += width; } src += clipY.upperBottom * Width; dst += XStep; const auto upperMax = TrapezoidUpperHeight - clipY.upperTop; for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch, --mask) { - RenderLine(dst, src, Width, tbl, *mask); + RenderLine(dst, src, Width, tbl, *mask); src += Width; } } +template void RenderLeftTrapezoidClipLeftAndVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) { const auto clipY = CalculateDiamondClipY(clip); @@ -1020,18 +1071,19 @@ void RenderLeftTrapezoidClipLeftAndVertical(std::uint8_t *dst, int dstPitch, con const auto startX = Width - XStep * i; const auto skip = startX < clipLeft ? clipLeft - startX : 0; if (width > skip) - RenderLine(dst + skip, src + skip, width - skip, tbl, (*mask) << skip); + RenderLine(dst + skip, src + skip, width - skip, tbl, (*mask) << skip); src += width; } src += clipY.upperBottom * Width + clipLeft; dst += XStep + clipLeft; const auto upperMax = TrapezoidUpperHeight - clipY.upperTop; for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch, --mask) { - RenderLine(dst, src, clip.width, tbl, (*mask) << clipLeft); + RenderLine(dst, src, clip.width, tbl, (*mask) << clipLeft); src += Width; } } +template void RenderLeftTrapezoidClipRightAndVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) { const auto clipY = CalculateDiamondClipY(clip); @@ -1043,46 +1095,49 @@ void RenderLeftTrapezoidClipRightAndVertical(std::uint8_t *dst, int dstPitch, co src += 2 * (i % 2); const auto width = XStep * i; if (width > clipRight) - RenderLine(dst, src, width - clipRight, tbl, *mask); + RenderLine(dst, src, width - clipRight, tbl, *mask); src += width; } src += clipY.upperBottom * Width; dst += XStep; const auto upperMax = TrapezoidUpperHeight - clipY.upperTop; for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch, --mask) { - RenderLine(dst, src, clip.width, tbl, *mask); + RenderLine(dst, src, clip.width, tbl, *mask); src += Width; } } +template void RenderLeftTrapezoid(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) { if (clip.width == Width) { if (clip.height == Height) { - RenderLeftTrapezoidFull(dst, dstPitch, src, mask, tbl); + RenderLeftTrapezoidFull(dst, dstPitch, src, mask, tbl); } else { - RenderLeftTrapezoidClipVertical(dst, dstPitch, src, mask, tbl, clip); + RenderLeftTrapezoidClipVertical(dst, dstPitch, src, mask, tbl, clip); } } else if (clip.right == 0) { - RenderLeftTrapezoidClipLeftAndVertical(dst, dstPitch, src, mask, tbl, clip); + RenderLeftTrapezoidClipLeftAndVertical(dst, dstPitch, src, mask, tbl, clip); } else { - RenderLeftTrapezoidClipRightAndVertical(dst, dstPitch, src, mask, tbl, clip); + RenderLeftTrapezoidClipRightAndVertical(dst, dstPitch, src, mask, tbl, clip); } } +template void RenderRightTrapezoidFull(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl) { for (auto i = 1; i <= LowerHeight; ++i, dst -= dstPitch, --mask) { const auto width = XStep * i; - RenderLine(dst, src, width, tbl, *mask); + RenderLine(dst, src, width, tbl, *mask); src += width + 2 * (i % 2); } for (auto i = 1; i <= TrapezoidUpperHeight; ++i, dst -= dstPitch, --mask) { - RenderLine(dst, src, Width, tbl, *mask); + RenderLine(dst, src, Width, tbl, *mask); src += Width; } } +template void RenderRightTrapezoidClipVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) { const auto clipY = CalculateDiamondClipY(clip); @@ -1090,17 +1145,18 @@ void RenderRightTrapezoidClipVertical(std::uint8_t *dst, int dstPitch, const std src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom); for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch, --mask) { const auto width = XStep * i; - RenderLine(dst, src, width, tbl, *mask); + RenderLine(dst, src, width, tbl, *mask); src += width + 2 * (i % 2); } src += clipY.upperBottom * Width; const auto upperMax = TrapezoidUpperHeight - clipY.upperTop; for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch, --mask) { - RenderLine(dst, src, Width, tbl, *mask); + RenderLine(dst, src, Width, tbl, *mask); src += Width; } } +template void RenderRightTrapezoidClipLeftAndVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) { const auto clipY = CalculateDiamondClipY(clip); @@ -1110,17 +1166,18 @@ void RenderRightTrapezoidClipLeftAndVertical(std::uint8_t *dst, int dstPitch, co for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch, --mask) { const auto width = XStep * i; if (width > clipLeft) - RenderLine(dst, src + clipLeft, width - clipLeft, tbl, (*mask) << clipLeft); + RenderLine(dst, src + clipLeft, width - clipLeft, tbl, (*mask) << clipLeft); src += width + 2 * (i % 2); } src += clipY.upperBottom * Width + clipLeft; const auto upperMax = TrapezoidUpperHeight - clipY.upperTop; for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch, --mask) { - RenderLine(dst, src, clip.width, tbl, (*mask) << clipLeft); + RenderLine(dst, src, clip.width, tbl, (*mask) << clipLeft); src += Width; } } +template void RenderRightTrapezoidClipRightAndVertical(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) { const auto clipY = CalculateDiamondClipY(clip); @@ -1131,29 +1188,55 @@ void RenderRightTrapezoidClipRightAndVertical(std::uint8_t *dst, int dstPitch, c const auto width = XStep * i; const auto skip = Width - width < clipRight ? clipRight - (Width - width) : 0; if (width > skip) - RenderLine(dst, src, width - skip, tbl, *mask); + RenderLine(dst, src, width - skip, tbl, *mask); src += width + 2 * (i % 2); } src += clipY.upperBottom * Width; const auto upperMax = TrapezoidUpperHeight - clipY.upperTop; for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch, --mask) { - RenderLine(dst, src, clip.width, tbl, *mask); + RenderLine(dst, src, clip.width, tbl, *mask); src += Width; } } +template void RenderRightTrapezoid(std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) { if (clip.width == Width) { if (clip.height == Height) { - RenderRightTrapezoidFull(dst, dstPitch, src, mask, tbl); + RenderRightTrapezoidFull(dst, dstPitch, src, mask, tbl); } else { - RenderRightTrapezoidClipVertical(dst, dstPitch, src, mask, tbl, clip); + RenderRightTrapezoidClipVertical(dst, dstPitch, src, mask, tbl, clip); } } else if (clip.right == 0) { - RenderRightTrapezoidClipLeftAndVertical(dst, dstPitch, src, mask, tbl, clip); + RenderRightTrapezoidClipLeftAndVertical(dst, dstPitch, src, mask, tbl, clip); } else { - RenderRightTrapezoidClipRightAndVertical(dst, dstPitch, src, mask, tbl, clip); + RenderRightTrapezoidClipRightAndVertical(dst, dstPitch, src, mask, tbl, clip); + } +} + +template +void RenderTileType(TileType tile, std::uint8_t *dst, int dstPitch, const std::uint8_t *src, const std::uint32_t *mask, const std::uint8_t *tbl, Clip clip) +{ + switch (tile) { + case TileType::Square: + RenderSquare(dst, dstPitch, src, mask, tbl, clip); + break; + case TileType::TransparentSquare: + RenderTransparentSquare(dst, dstPitch, src, mask, tbl, clip); + break; + case TileType::LeftTriangle: + RenderLeftTriangle(dst, dstPitch, src, mask, tbl, clip); + break; + case TileType::RightTriangle: + RenderRightTriangle(dst, dstPitch, src, mask, tbl, clip); + break; + case TileType::LeftTrapezoid: + RenderLeftTrapezoid(dst, dstPitch, src, mask, tbl, clip); + break; + case TileType::RightTrapezoid: + RenderRightTrapezoid(dst, dstPitch, src, mask, tbl, clip); + break; } } @@ -1317,27 +1400,34 @@ void RenderTile(const CelOutputBuffer &out, int x, int y) const auto *src = reinterpret_cast(&pDungeonCels[SDL_SwapLE32(pFrameTable[level_cel_block & 0xFFF])]); std::uint8_t *dst = out.at(static_cast(x + clip.left), static_cast(y - clip.bottom)); const auto dstPitch = out.pitch(); - mask -= clip.bottom; - switch (tile) { - case TileType::Square: - RenderSquare(dst, dstPitch, src, mask, tbl, clip); - break; - case TileType::TransparentSquare: - RenderTransparentSquare(dst, dstPitch, src, mask, tbl, clip); - break; - case TileType::LeftTriangle: - RenderLeftTriangle(dst, dstPitch, src, mask, tbl, clip); - break; - case TileType::RightTriangle: - RenderRightTriangle(dst, dstPitch, src, mask, tbl, clip); - break; - case TileType::LeftTrapezoid: - RenderLeftTrapezoid(dst, dstPitch, src, mask, tbl, clip); - break; - case TileType::RightTrapezoid: - RenderRightTrapezoid(dst, dstPitch, src, mask, tbl, clip); - break; + if (mask == &SolidMask[TILE_HEIGHT - 1]) { + if (light_table_index == lightmax) { + RenderTileType(tile, dst, dstPitch, src, mask, tbl, clip); + } else if (light_table_index == 0) { + RenderTileType(tile, dst, dstPitch, src, mask, tbl, clip); + } else { + RenderTileType(tile, dst, dstPitch, src, mask, tbl, clip); + } + } else { + mask -= clip.bottom; + if (sgOptions.Graphics.bBlendedTransparancy) { + if (light_table_index == lightmax) { + RenderTileType(tile, dst, dstPitch, src, mask, tbl, clip); + } else if (light_table_index == 0) { + RenderTileType(tile, dst, dstPitch, src, mask, tbl, clip); + } else { + RenderTileType(tile, dst, dstPitch, src, mask, tbl, clip); + } + } else { + if (light_table_index == lightmax) { + RenderTileType(tile, dst, dstPitch, src, mask, tbl, clip); + } else if (light_table_index == 0) { + RenderTileType(tile, dst, dstPitch, src, mask, tbl, clip); + } else { + RenderTileType(tile, dst, dstPitch, src, mask, tbl, clip); + } + } } }