diff --git a/Source/engine.cpp b/Source/engine.cpp index e2550c75e..91aa49cf3 100644 --- a/Source/engine.cpp +++ b/Source/engine.cpp @@ -12,6 +12,8 @@ */ #include +#include +#include #include "engine/render/common_impl.h" #include "lighting.h" @@ -21,17 +23,69 @@ namespace devilution { namespace { -void DrawHalfTransparentBlendedRectTo(const Surface &out, int sx, int sy, int width, int height) +// Expects everything to be 4-byte aligned. +void DrawHalfTransparentAligned32BlendedRectTo(const Surface &out, unsigned sx, unsigned sy, unsigned width, unsigned height) { - BYTE *pix = out.at(sx, sy); + assert(out.pitch() % 4 == 0); - for (int row = 0; row < height; row++) { - for (int col = 0; col < width; col++) { - *pix = paletteTransparencyLookup[0][*pix]; - pix++; + auto *pix = reinterpret_cast(out.at(static_cast(sx), static_cast(sy))); + assert(reinterpret_cast(pix) % 4 == 0); + + const uint16_t *lookupTable = paletteTransparencyLookupBlack16; + + const unsigned skipX = (out.pitch() - width) / 4; + width /= 4; + while (height-- > 0) { + for (unsigned i = 0; i < width; ++i, ++pix) { + const uint32_t v = *pix; +#if SDL_BYTEORDER == SDL_LIL_ENDIAN + *pix = lookupTable[v & 0xFFFF] | (lookupTable[(v >> 16) & 0xFFFF] << 16); +#else + *pix = lookupTable[(v >> 16) & 0xFFFF] | (lookupTable[v & 0xFFFF] << 16); +#endif } - pix += out.pitch() - width; + pix += skipX; + } +} + +void DrawHalfTransparentUnalignedBlendedRectTo(const Surface &out, unsigned sx, unsigned sy, unsigned width, unsigned height) +{ + uint8_t *pix = out.at(static_cast(sx), static_cast(sy)); + const uint8_t *lookupTable = paletteTransparencyLookup[0]; + const unsigned skipX = out.pitch() - width; + for (unsigned y = 0; y < height; ++y) { + for (unsigned x = 0; x < width; ++x, ++pix) { + *pix = lookupTable[*pix]; + } + pix += skipX; + } +} + +void DrawHalfTransparentBlendedRectTo(const Surface &out, unsigned sx, unsigned sy, unsigned width, unsigned height) +{ + // All SDL surfaces are 4-byte aligned and divisible by 4. + // However, our coordinates and widths may not be. + + // First, draw the leading unaligned part. + if (sx % 4 != 0) { + const unsigned w = 4 - sx % 4; + DrawHalfTransparentUnalignedBlendedRectTo(out, sx, sy, w, height); + sx += w; + width -= w; } + + if (static_cast(sx + width) == out.w()) { + // The pitch is 4-byte aligned, so we can simply extend the width to the pitch. + width = out.pitch() - sx; + } else if (width % 4 != 0) { + // Draw the trailing unaligned part. + const unsigned w = width % 4; + DrawHalfTransparentUnalignedBlendedRectTo(out, sx + (width / 4) * 4, sy, w, height); + width -= w; + } + + // Now everything is divisible by 4. Draw the aligned part. + DrawHalfTransparentAligned32BlendedRectTo(out, sx, sy, width, height); } void DrawHalfTransparentStippledRectTo(const Surface &out, int sx, int sy, int width, int height) diff --git a/Source/palette.cpp b/Source/palette.cpp index 6926979ff..99fdb4d85 100644 --- a/Source/palette.cpp +++ b/Source/palette.cpp @@ -19,6 +19,8 @@ SDL_Color system_palette[256]; SDL_Color orig_palette[256]; Uint8 paletteTransparencyLookup[256][256]; +uint16_t paletteTransparencyLookupBlack16[65536]; + namespace { /** Specifies whether the palette has max brightness. */ @@ -88,6 +90,17 @@ void GenerateBlendedLookupTable(SDL_Color *palette, int skipFrom, int skipTo, in paletteTransparencyLookup[i][j] = best; } } + + for (unsigned i = 0; i < 256; ++i) { + for (unsigned j = 0; j < 256; ++j) { +#if SDL_BYTEORDER == SDL_LIL_ENDIAN + const std::uint16_t index = i | (j << 8); +#else + const std::uint16_t index = j | (i << 8); +#endif + paletteTransparencyLookupBlack16[index] = paletteTransparencyLookup[0][i] | (paletteTransparencyLookup[0][j] << 8); + } + } } /** diff --git a/Source/palette.h b/Source/palette.h index 716806b2c..804d425f9 100644 --- a/Source/palette.h +++ b/Source/palette.h @@ -5,6 +5,8 @@ */ #pragma once +#include + #include "gendung.h" namespace devilution { @@ -35,6 +37,17 @@ extern SDL_Color orig_palette[256]; /** Lookup table for transparency */ extern Uint8 paletteTransparencyLookup[256][256]; +/** + * A lookup table from black for a pair of colors. + * + * For a pair of colors i and j, the index `i | (j << 8)` contains + * `paletteTransparencyLookup[0][i] | (paletteTransparencyLookup[0][j] << 8)`. + * + * On big-endian platforms, the indices are encoded as `j | (i << 8)`, while the + * value order remains the same. + */ +extern uint16_t paletteTransparencyLookupBlack16[65536]; + void palette_update(int first = 0, int ncolor = 256); void palette_init(); void LoadPalette(const char *pszFileName, bool blend = true);