Browse Source

CelOutputBuffer: Recover lost performance

Makes `RenderLine` hotspot manipulate the buffer directly again.

FPS: 195 -> 275
pull/1171/head
Gleb Mazovetskiy 5 years ago committed by Anders Jenbo
parent
commit
ae22644d43
  1. 120
      Source/render.cpp
  2. 12
      defs.h

120
Source/render.cpp

@ -9,6 +9,8 @@ DEVILUTION_BEGIN_NAMESPACE
#define NO_OVERDRAW
namespace {
enum {
RT_SQUARE,
RT_TRANSPARENT,
@ -19,7 +21,7 @@ enum {
};
/** Fully transparent variant of WallMask. */
static DWORD WallMask_FullyTrasparent[TILE_HEIGHT] = {
const DWORD WallMask_FullyTrasparent[TILE_HEIGHT] = {
0x00000000,
0x00000000,
0x00000000,
@ -54,7 +56,7 @@ static DWORD WallMask_FullyTrasparent[TILE_HEIGHT] = {
0x00000000
};
/** Transparent variant of RightMask. */
static DWORD RightMask_Transparent[TILE_HEIGHT] = {
const DWORD RightMask_Transparent[TILE_HEIGHT] = {
0xC0000000,
0xF0000000,
0xFC000000,
@ -89,7 +91,7 @@ static DWORD RightMask_Transparent[TILE_HEIGHT] = {
0xFFFFFFFF
};
/** Transparent variant of LeftMask. */
static DWORD LeftMask_Transparent[TILE_HEIGHT] = {
const DWORD LeftMask_Transparent[TILE_HEIGHT] = {
0x00000003,
0x0000000F,
0x0000003F,
@ -124,7 +126,7 @@ static DWORD LeftMask_Transparent[TILE_HEIGHT] = {
0xFFFFFFFF
};
/** Specifies the draw masks used to render transparency of the right side of tiles. */
static DWORD RightMask[TILE_HEIGHT] = {
const DWORD RightMask[TILE_HEIGHT] = {
0xEAAAAAAA,
0xF5555555,
0xFEAAAAAA,
@ -159,7 +161,7 @@ static DWORD RightMask[TILE_HEIGHT] = {
0xFFFFFFFF
};
/** Specifies the draw masks used to render transparency of the left side of tiles. */
static DWORD LeftMask[TILE_HEIGHT] = {
const DWORD LeftMask[TILE_HEIGHT] = {
0xAAAAAAAB,
0x5555555F,
0xAAAAAABF,
@ -194,7 +196,7 @@ static DWORD LeftMask[TILE_HEIGHT] = {
0xFFFFFFFF
};
/** Specifies the draw masks used to render transparency of wall tiles. */
static DWORD WallMask[TILE_HEIGHT] = {
const DWORD WallMask[TILE_HEIGHT] = {
0xAAAAAAAA,
0x55555555,
0xAAAAAAAA,
@ -229,7 +231,7 @@ static DWORD WallMask[TILE_HEIGHT] = {
0x55555555
};
/** Fully opaque mask */
static DWORD SolidMask[TILE_HEIGHT] = {
const DWORD SolidMask[TILE_HEIGHT] = {
0xFFFFFFFF,
0xFFFFFFFF,
0xFFFFFFFF,
@ -264,7 +266,7 @@ static DWORD SolidMask[TILE_HEIGHT] = {
0xFFFFFFFF
};
/** Used to mask out the left half of the tile diamond and only render additional content */
static DWORD RightFoliageMask[TILE_HEIGHT] = {
const DWORD RightFoliageMask[TILE_HEIGHT] = {
0xFFFFFFFF,
0x3FFFFFFF,
0x0FFFFFFF,
@ -299,7 +301,7 @@ static DWORD RightFoliageMask[TILE_HEIGHT] = {
0x00000000,
};
/** Used to mask out the left half of the tile diamond and only render additional content */
static DWORD LeftFoliageMask[TILE_HEIGHT] = {
const DWORD LeftFoliageMask[TILE_HEIGHT] = {
0xFFFFFFFF,
0xFFFFFFFC,
0xFFFFFFF0,
@ -334,7 +336,7 @@ static DWORD LeftFoliageMask[TILE_HEIGHT] = {
0x00000000,
};
inline static int count_leading_zeros(DWORD mask)
inline int count_leading_zeros(DWORD mask)
{
// Note: This function assumes that the argument is not zero,
// which means there is at least one bit set.
@ -372,23 +374,16 @@ void foreach_set_bit(DWORD mask, const F &f)
}
}
inline static void RenderLine(CelOutputBuffer out, int &x, int y, BYTE **src, int n, BYTE *tbl, DWORD mask)
inline void DoRenderLine(BYTE *dst, BYTE *src, int n, BYTE *tbl, DWORD mask)
{
BYTE *dst = out.at(x, y);
#ifdef NO_OVERDRAW
if (y < SCREEN_Y || dst > out.end()) {
goto skip;
}
#endif
if (mask == 0xFFFFFFFF) { // Opaque line
if (light_table_index == lightmax) { // Complete darkness
memset(dst, 0, n);
} else if (light_table_index == 0) { // Fully lit
memcpy(dst, *src, n);
memcpy(dst, src, n);
} else { // Partially lit
for (int i = 0; i < n; i++) {
dst[i] = tbl[(*src)[i]];
dst[i] = tbl[src[i]];
}
}
} else {
@ -410,34 +405,42 @@ inline static void RenderLine(CelOutputBuffer out, int &x, int y, BYTE **src, in
} else if (light_table_index == 0) { // Fully lit
for (int i = 0; i < n; i++, mask <<= 1) {
if (mask & 0x80000000)
dst[i] = (*src)[i];
dst[i] = src[i];
else
dst[i] = paletteTransparencyLookup[dst[i]][(*src)[i]];
dst[i] = paletteTransparencyLookup[dst[i]][src[i]];
}
} else { // Partially lit
for (int i = 0; i < n; i++, mask <<= 1) {
if (mask & 0x80000000)
dst[i] = tbl[(*src)[i]];
dst[i] = tbl[src[i]];
else
dst[i] = paletteTransparencyLookup[dst[i]][tbl[(*src)[i]]];
dst[i] = paletteTransparencyLookup[dst[i]][tbl[src[i]]];
}
}
} else { // Stippled transparancy
if (light_table_index == lightmax) { // Complete darkness
foreach_set_bit(mask, [=](int i) { dst[i] = 0; });
} else if (light_table_index == 0) { // Fully lit
foreach_set_bit(mask, [=](int i) { dst[i] = (*src)[i]; });
foreach_set_bit(mask, [=](int i) { dst[i] = src[i]; });
} else { // Partially lit
foreach_set_bit(mask, [=](int i) { dst[i] = tbl[(*src)[i]]; });
foreach_set_bit(mask, [=](int i) { dst[i] = tbl[src[i]]; });
}
}
}
}
skip:
DVL_ATTRIBUTE_ALWAYS_INLINE
inline void RenderLine(BYTE *dst_begin, BYTE *dst_end, BYTE **dst, BYTE **src, int n, BYTE *tbl, DWORD mask) {
#ifdef NO_OVERDRAW
if (*dst >= dst_begin && *dst <= dst_end)
#endif
DoRenderLine(*dst, *src, n, tbl, mask);
(*src) += n;
x += n;
(*dst) += n;
}
} // namespace
#if defined(__clang__) || defined(__GNUC__)
__attribute__((no_sanitize("shift-base")))
#endif
@ -447,7 +450,8 @@ void RenderTile(CelOutputBuffer out, int x, int y)
int i, j;
char c, v, tile;
BYTE *src, *tbl;
DWORD m, *mask, *pFrameTable;
DWORD m, *pFrameTable;
const DWORD *mask;
pFrameTable = (DWORD *)pDungeonCels;
@ -501,68 +505,72 @@ void RenderTile(CelOutputBuffer out, int x, int y)
}
#endif
BYTE *dst_begin = out.at(0, SCREEN_Y);
BYTE *dst_end = out.end();
BYTE *dst = out.at(x, y);
const int dst_pitch = out.pitch();
switch (tile) {
case RT_SQUARE:
for (i = TILE_HEIGHT; i != 0; --i, --y, x -= TILE_WIDTH / 2, mask--) {
RenderLine(out, x, y, &src, TILE_WIDTH / 2, tbl, *mask);
for (i = TILE_HEIGHT; i != 0; i--, dst -= dst_pitch + TILE_WIDTH / 2, mask--) {
RenderLine(dst_begin, dst_end, &dst, &src, TILE_WIDTH / 2, tbl, *mask);
}
break;
case RT_TRANSPARENT:
for (i = TILE_HEIGHT; i != 0; --i, --y, x -= TILE_WIDTH / 2, mask--) {
for (i = TILE_HEIGHT; i != 0; i--, dst -= dst_pitch + TILE_WIDTH / 2, mask--) {
m = *mask;
for (j = TILE_WIDTH / 2; j != 0; j -= v, v == TILE_WIDTH / 2 ? m = 0 : m <<= v) {
v = *src++;
if (v >= 0) {
RenderLine(out, x, y, &src, v, tbl, m);
RenderLine(dst_begin, dst_end, &dst, &src, v, tbl, m);
} else {
v = -v;
x += v;
dst += v;
}
}
}
break;
case RT_LTRIANGLE:
for (i = TILE_HEIGHT - 2; i >= 0; i -= 2, --y, x -= TILE_WIDTH / 2, mask--) {
for (i = TILE_HEIGHT - 2; i >= 0; i -= 2, dst -= dst_pitch + TILE_WIDTH / 2, mask--) {
src += i & 2;
x += i;
RenderLine(out, x, y, &src, TILE_WIDTH / 2 - i, tbl, *mask);
dst += i;
RenderLine(dst_begin, dst_end, &dst, &src, TILE_WIDTH / 2 - i, tbl, *mask);
}
for (i = 2; i != TILE_WIDTH / 2; i += 2, --y, x -= TILE_WIDTH / 2, mask--) {
for (i = 2; i != TILE_WIDTH / 2; i += 2, dst -= dst_pitch + TILE_WIDTH / 2, mask--) {
src += i & 2;
x += i;
RenderLine(out, x, y, &src, TILE_WIDTH / 2 - i, tbl, *mask);
dst += i;
RenderLine(dst_begin, dst_end, &dst, &src, TILE_WIDTH / 2 - i, tbl, *mask);
}
break;
case RT_RTRIANGLE:
for (i = TILE_HEIGHT - 2; i >= 0; i -= 2, --y, x -= TILE_WIDTH / 2, mask--) {
RenderLine(out, x, y, &src, TILE_WIDTH / 2 - i, tbl, *mask);
for (i = TILE_HEIGHT - 2; i >= 0; i -= 2, dst -= dst_pitch + TILE_WIDTH / 2, mask--) {
RenderLine(dst_begin, dst_end, &dst, &src, TILE_WIDTH / 2 - i, tbl, *mask);
src += i & 2;
x += i;
dst += i;
}
for (i = 2; i != TILE_HEIGHT; i += 2, --y, x -= TILE_WIDTH / 2, mask--) {
RenderLine(out, x, y, &src, TILE_WIDTH / 2 - i, tbl, *mask);
for (i = 2; i != TILE_HEIGHT; i += 2, dst -= dst_pitch + TILE_WIDTH / 2, mask--) {
RenderLine(dst_begin, dst_end, &dst, &src, TILE_WIDTH / 2 - i, tbl, *mask);
src += i & 2;
x += i;
dst += i;
}
break;
case RT_LTRAPEZOID:
for (i = TILE_HEIGHT - 2; i >= 0; i -= 2, --y, x -= TILE_WIDTH / 2, mask--) {
for (i = TILE_HEIGHT - 2; i >= 0; i -= 2, dst -= dst_pitch + TILE_WIDTH / 2, mask--) {
src += i & 2;
x += i;
RenderLine(out, x, y, &src, TILE_WIDTH / 2 - i, tbl, *mask);
dst += i;
RenderLine(dst_begin, dst_end, &dst, &src, TILE_WIDTH / 2 - i, tbl, *mask);
}
for (i = TILE_HEIGHT / 2; i != 0; --i, --y, x -= TILE_WIDTH / 2, mask--) {
RenderLine(out, x, y, &src, TILE_WIDTH / 2, tbl, *mask);
for (i = TILE_HEIGHT / 2; i != 0; i--, dst -= dst_pitch + TILE_WIDTH / 2, mask--) {
RenderLine(dst_begin, dst_end, &dst, &src, TILE_WIDTH / 2, tbl, *mask);
}
break;
case RT_RTRAPEZOID:
for (i = TILE_HEIGHT - 2; i >= 0; i -= 2, --y, x -= TILE_WIDTH / 2, mask--) {
RenderLine(out, x, y, &src, TILE_WIDTH / 2 - i, tbl, *mask);
for (i = TILE_HEIGHT - 2; i >= 0; i -= 2, dst -= dst_pitch + TILE_WIDTH / 2, mask--) {
RenderLine(dst_begin, dst_end, &dst, &src, TILE_WIDTH / 2 - i, tbl, *mask);
src += i & 2;
x += i;
dst += i;
}
for (i = TILE_HEIGHT / 2; i != 0; i--, --y, x -= TILE_WIDTH / 2, mask--) {
RenderLine(out, x, y, &src, TILE_WIDTH / 2, tbl, *mask);
for (i = TILE_HEIGHT / 2; i != 0; i--, dst -= dst_pitch + TILE_WIDTH / 2, mask--) {
RenderLine(dst_begin, dst_end, &dst, &src, TILE_WIDTH / 2, tbl, *mask);
}
break;
}

12
defs.h

@ -200,3 +200,15 @@
#define strcasecmp _stricmp
#define strncasecmp _strnicmp
#endif
#ifdef __has_attribute
#define DVL_HAVE_ATTRIBUTE(x) __has_attribute(x)
#else
#define DVL_HAVE_ATTRIBUTE(x) 0
#endif
#if DVL_HAVE_ATTRIBUTE(always_inline) || (defined(__GNUC__) && !defined(__clang__))
#define DVL_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline))
#else
#define DVL_ATTRIBUTE_ALWAYS_INLINE
#endif

Loading…
Cancel
Save