Browse Source

Improvements to the `RenderLine()` function (#920)

* Improvements to the `RenderLine()` function
- Simplify by using indices instead of incrementing pointers
- Improve performance in the case where mask != -1 by only processing the bits that are set
pull/923/head
Arsène Pérard-Gayot 5 years ago committed by GitHub
parent
commit
8e1c61a6f7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 66
      Source/render.cpp

66
Source/render.cpp

@ -133,54 +133,66 @@ static DWORD LeftFoliageMask[TILE_HEIGHT] = {
0xFFFFFFF0, 0xFFFFFFFC,
};
inline static void RenderLine(BYTE **dst, BYTE **src, int n, BYTE *tbl, DWORD mask)
{
inline static int count_leading_zeros(DWORD mask) {
// Note: This assumes that the argument is not zero,
// which means there is at least one bit set.
#if defined(__GNUC__) || defined(__clang__)
return __builtin_clz(mask);
#else
int i;
for (i = 0; (mask & 0x80000000) == 0; i++, mask <<= 1);
return i;
#endif
}
template <typename F>
void foreach_set_bit(DWORD mask, const F& f) {
int i = 0;
while (mask != 0) {
int z = count_leading_zeros(mask);
i += z, mask <<= z;
for (; mask & 0x80000000; i++, mask <<= 1)
f(i);
}
}
inline static void RenderLine(BYTE **dst, BYTE **src, int n, BYTE *tbl, DWORD mask)
{
#ifdef NO_OVERDRAW
if (*dst < gpBufStart || *dst > gpBufEnd) {
*src += n;
*dst += n;
return;
goto skip;
}
#endif
if (mask == 0xFFFFFFFF) {
if (light_table_index == lightmax) {
memset(*dst, 0, n);
(*src) += n;
(*dst) += n;
} else if (light_table_index == 0) {
memcpy(*dst, *src, n);
(*src) += n;
(*dst) += n;
} else {
for (i = 0; i < n; i++, (*src)++, (*dst)++) {
(*dst)[0] = tbl[(*src)[0]];
for (int i = 0; i < n; i++) {
(*dst)[i] = tbl[(*src)[i]];
}
}
} else {
// The number of iterations is anyway limited by the size of the mask.
// So we can limit it by ANDing the mask with another mask that only keeps
// iterations that are lower than n. We can now avoid testing if i < n
// at every loop iteration.
mask &= ((((DWORD)1) << n) - 1) << ((sizeof(DWORD) * CHAR_BIT) - n);
if (light_table_index == lightmax) {
(*src) += n;
for (i = 0; i < n; i++, (*dst)++, mask <<= 1) {
if (mask & 0x80000000) {
(*dst)[0] = 0;
}
}
foreach_set_bit(mask, [=] (int i) { (*dst)[i] = 0; });
} else if (light_table_index == 0) {
for (i = 0; i < n; i++, (*src)++, (*dst)++, mask <<= 1) {
if (mask & 0x80000000) {
(*dst)[0] = (*src)[0];
}
}
foreach_set_bit(mask, [=] (int i) { (*dst)[i] = (*src)[i]; });
} else {
for (i = 0; i < n; i++, (*src)++, (*dst)++, mask <<= 1) {
if (mask & 0x80000000) {
(*dst)[0] = tbl[(*src)[0]];
}
}
foreach_set_bit(mask, [=] (int i) { (*dst)[i] = tbl[(*src)[i]]; });
}
}
skip:
(*src) += n;
(*dst) += n;
}
#if defined(__clang__) || defined(__GNUC__)

Loading…
Cancel
Save