Browse Source

clx_render: Slightly optimize `GetSkipSize`

This gives a very slight FPS boost.
1140 to 1143 FPS on my machine as measured by:

```bash
tools/linux_reduced_cpu_variance_run.sh tools/measure_timedemo_performance.py -n 5 --binary build-rel/devilutionx
```
pull/7330/head
Gleb Mazovetskiy 2 years ago
parent
commit
7940a10da9
  1. 19
      Source/engine/render/clx_render.cpp
  2. 30
      Source/utils/cl2_to_clx.cpp
  3. 28
      Source/utils/clx_decode.hpp

19
Source/engine/render/clx_render.cpp

@ -70,25 +70,6 @@ struct RenderSrc {
uint_fast16_t width; uint_fast16_t width;
}; };
struct SkipSize {
int_fast16_t wholeLines;
int_fast16_t xOffset;
};
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT SkipSize GetSkipSize(int_fast16_t remainingWidth, int_fast16_t srcWidth)
{
if (remainingWidth < 0) {
// If `remainingWidth` is negative, `-remainingWidth` is the overrun.
const int_fast16_t overrunLines = -remainingWidth / srcWidth;
return {
static_cast<int_fast16_t>(1 + overrunLines),
static_cast<int_fast16_t>(-remainingWidth - srcWidth * overrunLines)
};
}
// If `remainingWidth` is non-negative, then it is 0, meaning we drew a whole line.
return { 1, 0 };
}
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT const uint8_t *SkipRestOfLineWithOverrun( DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT const uint8_t *SkipRestOfLineWithOverrun(
const uint8_t *src, int_fast16_t srcWidth, SkipSize &skipSize) const uint8_t *src, int_fast16_t srcWidth, SkipSize &skipSize)
{ {

30
Source/utils/cl2_to_clx.cpp

@ -11,24 +11,6 @@
namespace devilution { namespace devilution {
namespace {
constexpr size_t FrameHeaderSize = 10;
struct SkipSize {
int_fast16_t wholeLines;
int_fast16_t xOffset;
};
SkipSize GetSkipSize(int_fast16_t overrun, int_fast16_t srcWidth)
{
SkipSize result;
result.wholeLines = overrun / srcWidth;
result.xOffset = overrun - srcWidth * result.wholeLines;
return result;
}
} // namespace
uint16_t Cl2ToClx(const uint8_t *data, size_t size, uint16_t Cl2ToClx(const uint8_t *data, size_t size,
PointerOrValue<uint16_t> widthOrWidths, std::vector<uint8_t> &clxData) PointerOrValue<uint16_t> widthOrWidths, std::vector<uint8_t> &clxData)
{ {
@ -74,6 +56,7 @@ uint16_t Cl2ToClx(const uint8_t *data, size_t size,
const uint16_t frameWidth = widthOrWidths.HoldsPointer() ? widthOrWidths.AsPointer()[frame - 1] : widthOrWidths.AsValue(); const uint16_t frameWidth = widthOrWidths.HoldsPointer() ? widthOrWidths.AsPointer()[frame - 1] : widthOrWidths.AsValue();
const size_t frameHeaderPos = clxData.size(); const size_t frameHeaderPos = clxData.size();
constexpr size_t FrameHeaderSize = 10;
clxData.resize(clxData.size() + FrameHeaderSize); clxData.resize(clxData.size() + FrameHeaderSize);
WriteLE16(&clxData[frameHeaderPos], FrameHeaderSize); WriteLE16(&clxData[frameHeaderPos], FrameHeaderSize);
WriteLE16(&clxData[frameHeaderPos + 2], frameWidth); WriteLE16(&clxData[frameHeaderPos + 2], frameWidth);
@ -110,14 +93,9 @@ uint16_t Cl2ToClx(const uint8_t *data, size_t size,
} }
} }
++frameHeight; const auto skipSize = GetSkipSize(remainingWidth, static_cast<int_fast16_t>(frameWidth));
if (remainingWidth < 0) { xOffset = skipSize.xOffset;
const auto skipSize = GetSkipSize(-remainingWidth, static_cast<int_fast16_t>(frameWidth)); frameHeight += skipSize.wholeLines;
xOffset = skipSize.xOffset;
frameHeight += skipSize.wholeLines;
} else {
xOffset = 0;
}
} }
if (!pixels.empty()) { if (!pixels.empty()) {
AppendClxPixelsOrFillRun(pixels.data(), pixels.size(), clxData); AppendClxPixelsOrFillRun(pixels.data(), pixels.size(), clxData);

28
Source/utils/clx_decode.hpp

@ -2,6 +2,9 @@
#include <cstdint> #include <cstdint>
#include "appfat.h"
#include "utils/attributes.h"
namespace devilution { namespace devilution {
[[nodiscard]] constexpr bool IsClxOpaque(uint8_t control) [[nodiscard]] constexpr bool IsClxOpaque(uint8_t control)
@ -27,4 +30,29 @@ namespace devilution {
return ClxFillEnd - control; return ClxFillEnd - control;
} }
struct SkipSize {
int_fast16_t wholeLines;
int_fast16_t xOffset;
};
// Returns the number of lines and the x-offset by which the rendering has overrun
// the current line (when a CLX command overruns the current line).
//
// Requires: remainingWidth <= 0.
DVL_ALWAYS_INLINE SkipSize GetSkipSize(int_fast16_t remainingWidth, int_fast16_t srcWidth)
{
// If `remainingWidth` is negative, `-remainingWidth` is the overrun.
// Otherwise, `remainingWidth` is always 0.
// Remaining width of 0 (= no overrun) is a common case.
// The calculation below would result in the same result.
// However, checking for 0 and skipping it entirely turns out to be faster.
if (remainingWidth == 0) return { 1, 0 };
const auto overrun = static_cast<uint_fast16_t>(-remainingWidth);
const uint_fast16_t overrunLines = overrun / srcWidth + 1;
const uint_fast16_t xOffset = overrun % srcWidth;
return { static_cast<int_fast16_t>(overrunLines), static_cast<int_fast16_t>(xOffset) };
}
} // namespace devilution } // namespace devilution

Loading…
Cancel
Save