Browse Source
Tricks the compiler into skipping expensive `uninit var analysis` (`-Wmaybe-uninitialized`) by using a struct with state rather than separate variables for `best` / `bestDiff`. This has no performance impact. Also optimizes lookup a bit further and moves some code that does not need to be inlined to the cpp file. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ----------------------------------------------------------------------------------------------------------------------------------- BM_GenerateBlendedLookupTable_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_GenerateBlendedLookupTable_mean +0.0237 +0.0237 2092090 2141601 2091732 2141291 BM_GenerateBlendedLookupTable_median +0.0237 +0.0237 2092104 2141662 2091669 2141319 BM_GenerateBlendedLookupTable_stddev -0.6414 -0.5834 664 238 538 224 BM_GenerateBlendedLookupTable_cv -0.6497 -0.5930 0 0 0 0 BM_BuildTree_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_BuildTree_mean +0.0410 +0.0410 4495 4679 4494 4678 BM_BuildTree_median +0.0403 +0.0402 4494 4675 4493 4674 BM_BuildTree_stddev +0.9515 +0.9359 7 14 7 14 BM_BuildTree_cv +0.8746 +0.8596 0 0 0 0 BM_FindNearestNeighbor_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_FindNearestNeighbor_mean -0.0399 -0.0398 1964257108 1885966812 1963954917 1885694336 BM_FindNearestNeighbor_median -0.0397 -0.0396 1963969748 1886074435 1963650984 1885803182 BM_FindNearestNeighbor_stddev -0.3380 -0.3443 1217360 805946 1225442 803469 BM_FindNearestNeighbor_cv -0.3105 -0.3171 0 0 0 0 OVERALL_GEOMEAN +0.0077 +0.0077 0 0 0 0 ```pull/8144/head
4 changed files with 280 additions and 229 deletions
@ -0,0 +1,217 @@
|
||||
#include "utils/palette_kd_tree.hpp" |
||||
|
||||
#include <array> |
||||
#include <cstddef> |
||||
#include <cstdint> |
||||
#include <span> |
||||
#include <string> |
||||
#include <utility> |
||||
|
||||
#ifdef USE_SDL1 |
||||
#include <SDL_video.h> |
||||
#else |
||||
#include <SDL_pixels.h> |
||||
#endif |
||||
|
||||
#include <fmt/format.h> |
||||
|
||||
#include "utils/static_vector.hpp" |
||||
#include "utils/str_cat.hpp" |
||||
|
||||
#if DEVILUTIONX_PRINT_PALETTE_BLENDING_TREE_GRAPHVIZ |
||||
#include <cstdio> |
||||
#endif |
||||
|
||||
namespace devilution { |
||||
namespace { |
||||
|
||||
template <size_t N> |
||||
uint8_t GetColorComponent(const SDL_Color &); |
||||
template <> |
||||
inline uint8_t GetColorComponent<0>(const SDL_Color &c) { return c.r; } |
||||
template <> |
||||
inline uint8_t GetColorComponent<1>(const SDL_Color &c) { return c.g; } |
||||
template <> |
||||
inline uint8_t GetColorComponent<2>(const SDL_Color &c) { return c.b; } |
||||
|
||||
template <size_t RemainingDepth> |
||||
[[nodiscard]] PaletteKdTreeNode<0> &LeafByIndex(PaletteKdTreeNode<RemainingDepth> &node, uint8_t index) |
||||
{ |
||||
if constexpr (RemainingDepth == 1) { |
||||
return node.child(index % 2 == 0); |
||||
} else { |
||||
return LeafByIndex(node.child(index % 2 == 0), index / 2); |
||||
} |
||||
} |
||||
|
||||
template <size_t RemainingDepth> |
||||
[[nodiscard]] uint8_t LeafIndexForColor(const PaletteKdTreeNode<RemainingDepth> &node, const SDL_Color &color, uint8_t result = 0) |
||||
{ |
||||
const bool isLeft = GetColorComponent<PaletteKdTreeNode<RemainingDepth>::Coord>(color) < node.pivot; |
||||
if constexpr (RemainingDepth == 1) { |
||||
return (2 * result) + (isLeft ? 0 : 1); |
||||
} else { |
||||
return (2 * LeafIndexForColor(node.child(isLeft), color, result)) + (isLeft ? 0 : 1); |
||||
} |
||||
} |
||||
|
||||
struct MedianInfo { |
||||
std::array<uint16_t, 256> counts = {}; |
||||
uint16_t numValues = 0; |
||||
}; |
||||
|
||||
[[nodiscard]] static uint8_t GetMedian(const MedianInfo &medianInfo) |
||||
{ |
||||
const std::span<const uint16_t, 256> counts = medianInfo.counts; |
||||
const uint_fast16_t numValues = medianInfo.numValues; |
||||
const auto medianTarget = static_cast<uint_fast16_t>((medianInfo.numValues + 1) / 2); |
||||
uint_fast16_t partialSum = 0; |
||||
uint_fast16_t i = 0; |
||||
for (; partialSum < medianTarget && partialSum != numValues; ++i) { |
||||
partialSum += counts[i]; |
||||
} |
||||
|
||||
// Special cases:
|
||||
// 1. If the elements are empty, this will return 0.
|
||||
// 2. If all the elements are the same, this will be `value + 1` (rolling over to 0 if value is 256).
|
||||
// This means all the elements will be on one side of the pivot (left unless the value is 255).
|
||||
return static_cast<uint8_t>(i); |
||||
} |
||||
|
||||
template <size_t RemainingDepth, size_t N> |
||||
void MaybeAddToSubdivisionForMedian( |
||||
const PaletteKdTreeNode<RemainingDepth> &node, |
||||
const SDL_Color palette[256], unsigned paletteIndex, |
||||
std::span<MedianInfo, N> medianInfos) |
||||
{ |
||||
const uint8_t color = GetColorComponent<PaletteKdTreeNode<RemainingDepth>::Coord>(palette[paletteIndex]); |
||||
if constexpr (N == 1) { |
||||
MedianInfo &medianInfo = medianInfos[0]; |
||||
++medianInfo.counts[color]; |
||||
++medianInfo.numValues; |
||||
} else { |
||||
const bool isLeft = color < node.pivot; |
||||
MaybeAddToSubdivisionForMedian(node.child(isLeft), |
||||
palette, |
||||
paletteIndex, |
||||
isLeft |
||||
? medianInfos.template subspan<0, N / 2>() |
||||
: medianInfos.template subspan<N / 2, N / 2>()); |
||||
} |
||||
} |
||||
|
||||
template <size_t RemainingDepth, size_t N> |
||||
void SetPivotsRecursively( |
||||
PaletteKdTreeNode<RemainingDepth> &node, |
||||
std::span<MedianInfo, N> medianInfos) |
||||
{ |
||||
if constexpr (N == 1) { |
||||
node.pivot = GetMedian(medianInfos[0]); |
||||
} else { |
||||
SetPivotsRecursively(node.left, medianInfos.template subspan<0, N / 2>()); |
||||
SetPivotsRecursively(node.right, medianInfos.template subspan<N / 2, N / 2>()); |
||||
} |
||||
} |
||||
|
||||
template <size_t TargetDepth> |
||||
void PopulatePivotsForTargetDepth(PaletteKdTreeNode<PaletteKdTreeDepth> &root, |
||||
const SDL_Color palette[256], int skipFrom, int skipTo) |
||||
{ |
||||
constexpr size_t NumSubdivisions = 1U << TargetDepth; |
||||
std::array<MedianInfo, NumSubdivisions> subdivisions = {}; |
||||
const std::span<MedianInfo, NumSubdivisions> subdivisionsSpan { subdivisions }; |
||||
for (int i = 0; i < 256; ++i) { |
||||
if (i >= skipFrom && i <= skipTo) continue; |
||||
MaybeAddToSubdivisionForMedian(root, palette, i, subdivisionsSpan); |
||||
} |
||||
SetPivotsRecursively(root, subdivisionsSpan); |
||||
} |
||||
|
||||
template <size_t... TargetDepths> |
||||
void PopulatePivotsImpl(PaletteKdTreeNode<PaletteKdTreeDepth> &root, |
||||
const SDL_Color palette[256], int skipFrom, int skipTo, std::index_sequence<TargetDepths...> intSeq) // NOLINT(misc-unused-parameters)
|
||||
{ |
||||
(PopulatePivotsForTargetDepth<TargetDepths>(root, palette, skipFrom, skipTo), ...); |
||||
} |
||||
|
||||
void PopulatePivots(PaletteKdTreeNode<PaletteKdTreeDepth> &root, |
||||
const SDL_Color palette[256], int skipFrom, int skipTo) |
||||
{ |
||||
PopulatePivotsImpl(root, palette, skipFrom, skipTo, std::make_index_sequence<PaletteKdTreeDepth> {}); |
||||
} |
||||
|
||||
} // namespace
|
||||
|
||||
PaletteKdTree::PaletteKdTree(const SDL_Color palette[256], int skipFrom, int skipTo) |
||||
{ |
||||
PopulatePivots(tree_, palette, skipFrom, skipTo); |
||||
StaticVector<uint8_t, 256> leafValues[NumLeaves]; |
||||
for (int i = 0; i < 256; ++i) { |
||||
if (i >= skipFrom && i <= skipTo) continue; |
||||
leafValues[LeafIndexForColor(tree_, palette[i])].emplace_back(i); |
||||
} |
||||
|
||||
size_t totalLen = 0; |
||||
for (uint8_t leafIndex = 0; leafIndex < NumLeaves; ++leafIndex) { |
||||
PaletteKdTreeNode<0> &leaf = LeafByIndex(tree_, leafIndex); |
||||
const std::span<const uint8_t> values = leafValues[leafIndex]; |
||||
if (values.empty()) { |
||||
leaf.valuesBegin = 1; |
||||
leaf.valuesEndInclusive = 0; |
||||
} else { |
||||
leaf.valuesBegin = static_cast<uint8_t>(totalLen); |
||||
leaf.valuesEndInclusive = static_cast<uint8_t>(totalLen - 1 + values.size()); |
||||
|
||||
for (size_t i = 0; i < values.size(); ++i) { |
||||
const uint8_t value = values[i]; |
||||
values_[totalLen + i] = std::make_pair(RGB { palette[value].r, palette[value].g, palette[value].b }, value); |
||||
} |
||||
totalLen += values.size(); |
||||
} |
||||
} |
||||
|
||||
#if DEVILUTIONX_PRINT_PALETTE_BLENDING_TREE_GRAPHVIZ |
||||
// To generate palette.dot.svg, run:
|
||||
// dot -O -Tsvg palette.dot
|
||||
FILE *out = std::fopen("palette.dot", "w"); |
||||
std::string dot = toGraphvizDot(); |
||||
std::fwrite(dot.data(), dot.size(), 1, out); |
||||
std::fclose(out); |
||||
#endif |
||||
} |
||||
|
||||
std::string PaletteKdTree::toGraphvizDot() const |
||||
{ |
||||
std::string dot = "graph palette_tree {\n rankdir=LR\n"; |
||||
tree_.toGraphvizDot(0, values_, dot); |
||||
dot.append("}\n"); |
||||
return dot; |
||||
} |
||||
|
||||
void PaletteKdTreeNode<0>::toGraphvizDot( |
||||
size_t id, std::span<const std::pair<PaletteKdTreeNode<0>::RGB, uint8_t>, 256> values, std::string &dot) const |
||||
{ |
||||
StrAppend(dot, " node_", id, R"( [shape=plain label=< |
||||
<table border="0" cellborder="0" cellspacing="0" cellpadding="2" style="ROUNDED"> |
||||
<tr>)"); |
||||
const std::pair<RGB, uint8_t> *const end = values.data() + valuesEndInclusive; |
||||
for (const std::pair<RGB, uint8_t> *it = values.data() + valuesBegin; it <= end; ++it) { |
||||
const auto &[rgb, paletteIndex] = *it; |
||||
char hexColor[6]; |
||||
fmt::format_to(hexColor, "{:02x}{:02x}{:02x}", rgb[0], rgb[1], rgb[2]); |
||||
StrAppend(dot, R"(<td balign="left" bgcolor="#)", std::string_view(hexColor, 6), "\">"); |
||||
const bool useWhiteText = rgb[0] + rgb[1] + rgb[2] < 350; |
||||
if (useWhiteText) StrAppend(dot, R"(<font color="white">)"); |
||||
StrAppend(dot, |
||||
static_cast<int>(rgb[0]), " ", |
||||
static_cast<int>(rgb[1]), " ", |
||||
static_cast<int>(rgb[2]), R"(<br/>)", |
||||
static_cast<int>(paletteIndex)); |
||||
if (useWhiteText) StrAppend(dot, "</font>"); |
||||
StrAppend(dot, "</td>"); |
||||
} |
||||
if (valuesBegin > valuesEndInclusive) StrAppend(dot, "<td></td>"); |
||||
StrAppend(dot, "</tr>\n </table>>]\n"); |
||||
} |
||||
|
||||
} // namespace devilution
|
||||
Loading…
Reference in new issue