Browse Source

Optimize `DrawHalfTransparentBlendedRectTo`

`DrawHalfTransparentBlendedRectTo` takes up a significant chunk of time
when rendering store and quest dialogs.

Optimize the function to read 2 pixels at a time and write 4 pixels at a
time.
pull/3455/head
Gleb Mazovetskiy 4 years ago committed by Anders Jenbo
parent
commit
909e450f5e
  1. 68
      Source/engine.cpp
  2. 13
      Source/palette.cpp
  3. 13
      Source/palette.h

68
Source/engine.cpp

@ -12,6 +12,8 @@
*/
#include <array>
#include <cassert>
#include <cstdint>
#include "engine/render/common_impl.h"
#include "lighting.h"
@ -21,17 +23,69 @@
namespace devilution {
namespace {
void DrawHalfTransparentBlendedRectTo(const Surface &out, int sx, int sy, int width, int height)
// Expects everything to be 4-byte aligned.
void DrawHalfTransparentAligned32BlendedRectTo(const Surface &out, unsigned sx, unsigned sy, unsigned width, unsigned height)
{
BYTE *pix = out.at(sx, sy);
assert(out.pitch() % 4 == 0);
for (int row = 0; row < height; row++) {
for (int col = 0; col < width; col++) {
*pix = paletteTransparencyLookup[0][*pix];
pix++;
auto *pix = reinterpret_cast<uint32_t *>(out.at(static_cast<int>(sx), static_cast<int>(sy)));
assert(reinterpret_cast<intptr_t>(pix) % 4 == 0);
const uint16_t *lookupTable = paletteTransparencyLookupBlack16;
const unsigned skipX = (out.pitch() - width) / 4;
width /= 4;
while (height-- > 0) {
for (unsigned i = 0; i < width; ++i, ++pix) {
const uint32_t v = *pix;
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
*pix = lookupTable[v & 0xFFFF] | (lookupTable[(v >> 16) & 0xFFFF] << 16);
#else
*pix = lookupTable[(v >> 16) & 0xFFFF] | (lookupTable[v & 0xFFFF] << 16);
#endif
}
pix += out.pitch() - width;
pix += skipX;
}
}
void DrawHalfTransparentUnalignedBlendedRectTo(const Surface &out, unsigned sx, unsigned sy, unsigned width, unsigned height)
{
uint8_t *pix = out.at(static_cast<int>(sx), static_cast<int>(sy));
const uint8_t *lookupTable = paletteTransparencyLookup[0];
const unsigned skipX = out.pitch() - width;
for (unsigned y = 0; y < height; ++y) {
for (unsigned x = 0; x < width; ++x, ++pix) {
*pix = lookupTable[*pix];
}
pix += skipX;
}
}
void DrawHalfTransparentBlendedRectTo(const Surface &out, unsigned sx, unsigned sy, unsigned width, unsigned height)
{
// All SDL surfaces are 4-byte aligned and divisible by 4.
// However, our coordinates and widths may not be.
// First, draw the leading unaligned part.
if (sx % 4 != 0) {
const unsigned w = 4 - sx % 4;
DrawHalfTransparentUnalignedBlendedRectTo(out, sx, sy, w, height);
sx += w;
width -= w;
}
if (static_cast<int>(sx + width) == out.w()) {
// The pitch is 4-byte aligned, so we can simply extend the width to the pitch.
width = out.pitch() - sx;
} else if (width % 4 != 0) {
// Draw the trailing unaligned part.
const unsigned w = width % 4;
DrawHalfTransparentUnalignedBlendedRectTo(out, sx + (width / 4) * 4, sy, w, height);
width -= w;
}
// Now everything is divisible by 4. Draw the aligned part.
DrawHalfTransparentAligned32BlendedRectTo(out, sx, sy, width, height);
}
void DrawHalfTransparentStippledRectTo(const Surface &out, int sx, int sy, int width, int height)

13
Source/palette.cpp

@ -19,6 +19,8 @@ SDL_Color system_palette[256];
SDL_Color orig_palette[256];
Uint8 paletteTransparencyLookup[256][256];
uint16_t paletteTransparencyLookupBlack16[65536];
namespace {
/** Specifies whether the palette has max brightness. */
@ -88,6 +90,17 @@ void GenerateBlendedLookupTable(SDL_Color *palette, int skipFrom, int skipTo, in
paletteTransparencyLookup[i][j] = best;
}
}
for (unsigned i = 0; i < 256; ++i) {
for (unsigned j = 0; j < 256; ++j) {
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
const std::uint16_t index = i | (j << 8);
#else
const std::uint16_t index = j | (i << 8);
#endif
paletteTransparencyLookupBlack16[index] = paletteTransparencyLookup[0][i] | (paletteTransparencyLookup[0][j] << 8);
}
}
}
/**

13
Source/palette.h

@ -5,6 +5,8 @@
*/
#pragma once
#include <cstdint>
#include "gendung.h"
namespace devilution {
@ -35,6 +37,17 @@ extern SDL_Color orig_palette[256];
/** Lookup table for transparency */
extern Uint8 paletteTransparencyLookup[256][256];
/**
* A lookup table from black for a pair of colors.
*
* For a pair of colors i and j, the index `i | (j << 8)` contains
* `paletteTransparencyLookup[0][i] | (paletteTransparencyLookup[0][j] << 8)`.
*
* On big-endian platforms, the indices are encoded as `j | (i << 8)`, while the
* value order remains the same.
*/
extern uint16_t paletteTransparencyLookupBlack16[65536];
void palette_update(int first = 0, int ncolor = 256);
void palette_init();
void LoadPalette(const char *pszFileName, bool blend = true);

Loading…
Cancel
Save