You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

174 lines
5.2 KiB

#pragma once
#include <array>
#include <cstddef>
#include <cstdint>
#include <limits>
#include <span>
#include <string>
#include <utility>
#ifdef USE_SDL3
#include <SDL3/SDL_pixels.h>
#elif defined(USE_SDL1)
Palette KD-tree: Fix compilation speed Tricks the compiler into skipping expensive `uninit var analysis` (`-Wmaybe-uninitialized`) by using a struct with state rather than separate variables for `best` / `bestDiff`. This has no performance impact. Also optimizes lookup a bit further and moves some code that does not need to be inlined to the cpp file. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ----------------------------------------------------------------------------------------------------------------------------------- BM_GenerateBlendedLookupTable_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_GenerateBlendedLookupTable_mean +0.0237 +0.0237 2092090 2141601 2091732 2141291 BM_GenerateBlendedLookupTable_median +0.0237 +0.0237 2092104 2141662 2091669 2141319 BM_GenerateBlendedLookupTable_stddev -0.6414 -0.5834 664 238 538 224 BM_GenerateBlendedLookupTable_cv -0.6497 -0.5930 0 0 0 0 BM_BuildTree_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_BuildTree_mean +0.0410 +0.0410 4495 4679 4494 4678 BM_BuildTree_median +0.0403 +0.0402 4494 4675 4493 4674 BM_BuildTree_stddev +0.9515 +0.9359 7 14 7 14 BM_BuildTree_cv +0.8746 +0.8596 0 0 0 0 BM_FindNearestNeighbor_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_FindNearestNeighbor_mean -0.0399 -0.0398 1964257108 1885966812 1963954917 1885694336 BM_FindNearestNeighbor_median -0.0397 -0.0396 1963969748 1886074435 1963650984 1885803182 BM_FindNearestNeighbor_stddev -0.3380 -0.3443 1217360 805946 1225442 803469 BM_FindNearestNeighbor_cv -0.3105 -0.3171 0 0 0 0 OVERALL_GEOMEAN +0.0077 +0.0077 0 0 0 0 ```
8 months ago
#include <SDL_video.h>
#else
#include <SDL_pixels.h>
#endif
#include "utils/str_cat.hpp"
Palette KD-tree: Fix compilation speed Tricks the compiler into skipping expensive `uninit var analysis` (`-Wmaybe-uninitialized`) by using a struct with state rather than separate variables for `best` / `bestDiff`. This has no performance impact. Also optimizes lookup a bit further and moves some code that does not need to be inlined to the cpp file. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ----------------------------------------------------------------------------------------------------------------------------------- BM_GenerateBlendedLookupTable_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_GenerateBlendedLookupTable_mean +0.0237 +0.0237 2092090 2141601 2091732 2141291 BM_GenerateBlendedLookupTable_median +0.0237 +0.0237 2092104 2141662 2091669 2141319 BM_GenerateBlendedLookupTable_stddev -0.6414 -0.5834 664 238 538 224 BM_GenerateBlendedLookupTable_cv -0.6497 -0.5930 0 0 0 0 BM_BuildTree_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_BuildTree_mean +0.0410 +0.0410 4495 4679 4494 4678 BM_BuildTree_median +0.0403 +0.0402 4494 4675 4493 4674 BM_BuildTree_stddev +0.9515 +0.9359 7 14 7 14 BM_BuildTree_cv +0.8746 +0.8596 0 0 0 0 BM_FindNearestNeighbor_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_FindNearestNeighbor_mean -0.0399 -0.0398 1964257108 1885966812 1963954917 1885694336 BM_FindNearestNeighbor_median -0.0397 -0.0396 1963969748 1886074435 1963650984 1885803182 BM_FindNearestNeighbor_stddev -0.3380 -0.3443 1217360 805946 1225442 803469 BM_FindNearestNeighbor_cv -0.3105 -0.3171 0 0 0 0 OVERALL_GEOMEAN +0.0077 +0.0077 0 0 0 0 ```
8 months ago
#define DEVILUTIONX_PRINT_PALETTE_BLENDING_TREE_GRAPHVIZ 0 // NOLINT(modernize-macro-to-enum)
namespace devilution {
/**
* @brief Depth (number of levels) of the tree.
*/
constexpr size_t PaletteKdTreeDepth = 5;
/**
* @brief A node in the k-d tree.
*
* @tparam RemainingDepth distance to the leaf nodes.
*/
template <size_t RemainingDepth>
struct PaletteKdTreeNode {
using RGB = std::array<uint8_t, 3>;
static constexpr unsigned Coord = (PaletteKdTreeDepth - RemainingDepth) % 3;
PaletteKdTreeNode<RemainingDepth - 1> left;
PaletteKdTreeNode<RemainingDepth - 1> right;
uint8_t pivot;
[[nodiscard]] const PaletteKdTreeNode<RemainingDepth - 1> &child(bool isLeft) const
{
return isLeft ? left : right;
}
[[nodiscard]] PaletteKdTreeNode<RemainingDepth - 1> &child(bool isLeft)
{
return isLeft ? left : right;
}
[[maybe_unused]] void toGraphvizDot(size_t id, std::span<const std::pair<RGB, uint8_t>, 256> values, std::string &dot) const
{
Palette KD-tree: Fix compilation speed Tricks the compiler into skipping expensive `uninit var analysis` (`-Wmaybe-uninitialized`) by using a struct with state rather than separate variables for `best` / `bestDiff`. This has no performance impact. Also optimizes lookup a bit further and moves some code that does not need to be inlined to the cpp file. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ----------------------------------------------------------------------------------------------------------------------------------- BM_GenerateBlendedLookupTable_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_GenerateBlendedLookupTable_mean +0.0237 +0.0237 2092090 2141601 2091732 2141291 BM_GenerateBlendedLookupTable_median +0.0237 +0.0237 2092104 2141662 2091669 2141319 BM_GenerateBlendedLookupTable_stddev -0.6414 -0.5834 664 238 538 224 BM_GenerateBlendedLookupTable_cv -0.6497 -0.5930 0 0 0 0 BM_BuildTree_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_BuildTree_mean +0.0410 +0.0410 4495 4679 4494 4678 BM_BuildTree_median +0.0403 +0.0402 4494 4675 4493 4674 BM_BuildTree_stddev +0.9515 +0.9359 7 14 7 14 BM_BuildTree_cv +0.8746 +0.8596 0 0 0 0 BM_FindNearestNeighbor_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_FindNearestNeighbor_mean -0.0399 -0.0398 1964257108 1885966812 1963954917 1885694336 BM_FindNearestNeighbor_median -0.0397 -0.0396 1963969748 1886074435 1963650984 1885803182 BM_FindNearestNeighbor_stddev -0.3380 -0.3443 1217360 805946 1225442 803469 BM_FindNearestNeighbor_cv -0.3105 -0.3171 0 0 0 0 OVERALL_GEOMEAN +0.0077 +0.0077 0 0 0 0 ```
8 months ago
StrAppend(dot, " node_", id, " [label=\"", "rgb"[Coord], ": ", pivot, "\"]\n");
const size_t leftId = (2 * id) + 1;
const size_t rightId = (2 * id) + 2;
left.toGraphvizDot(leftId, values, dot);
right.toGraphvizDot(rightId, values, dot);
Palette KD-tree: Fix compilation speed Tricks the compiler into skipping expensive `uninit var analysis` (`-Wmaybe-uninitialized`) by using a struct with state rather than separate variables for `best` / `bestDiff`. This has no performance impact. Also optimizes lookup a bit further and moves some code that does not need to be inlined to the cpp file. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ----------------------------------------------------------------------------------------------------------------------------------- BM_GenerateBlendedLookupTable_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_GenerateBlendedLookupTable_mean +0.0237 +0.0237 2092090 2141601 2091732 2141291 BM_GenerateBlendedLookupTable_median +0.0237 +0.0237 2092104 2141662 2091669 2141319 BM_GenerateBlendedLookupTable_stddev -0.6414 -0.5834 664 238 538 224 BM_GenerateBlendedLookupTable_cv -0.6497 -0.5930 0 0 0 0 BM_BuildTree_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_BuildTree_mean +0.0410 +0.0410 4495 4679 4494 4678 BM_BuildTree_median +0.0403 +0.0402 4494 4675 4493 4674 BM_BuildTree_stddev +0.9515 +0.9359 7 14 7 14 BM_BuildTree_cv +0.8746 +0.8596 0 0 0 0 BM_FindNearestNeighbor_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_FindNearestNeighbor_mean -0.0399 -0.0398 1964257108 1885966812 1963954917 1885694336 BM_FindNearestNeighbor_median -0.0397 -0.0396 1963969748 1886074435 1963650984 1885803182 BM_FindNearestNeighbor_stddev -0.3380 -0.3443 1217360 805946 1225442 803469 BM_FindNearestNeighbor_cv -0.3105 -0.3171 0 0 0 0 OVERALL_GEOMEAN +0.0077 +0.0077 0 0 0 0 ```
8 months ago
StrAppend(dot, " node_", id, " -- node_", leftId,
"\n node_", id, " -- node_", rightId, "\n");
}
};
/**
* @brief A leaf node in the k-d tree.
*/
template <>
struct PaletteKdTreeNode</*RemainingDepth=*/0> {
using RGB = std::array<uint8_t, 3>;
// We use inclusive indices to allow for representing the full [0, 255] range.
// An empty node is represented as [1, 0].
uint8_t valuesBegin;
uint8_t valuesEndInclusive;
Palette KD-tree: Fix compilation speed Tricks the compiler into skipping expensive `uninit var analysis` (`-Wmaybe-uninitialized`) by using a struct with state rather than separate variables for `best` / `bestDiff`. This has no performance impact. Also optimizes lookup a bit further and moves some code that does not need to be inlined to the cpp file. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ----------------------------------------------------------------------------------------------------------------------------------- BM_GenerateBlendedLookupTable_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_GenerateBlendedLookupTable_mean +0.0237 +0.0237 2092090 2141601 2091732 2141291 BM_GenerateBlendedLookupTable_median +0.0237 +0.0237 2092104 2141662 2091669 2141319 BM_GenerateBlendedLookupTable_stddev -0.6414 -0.5834 664 238 538 224 BM_GenerateBlendedLookupTable_cv -0.6497 -0.5930 0 0 0 0 BM_BuildTree_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_BuildTree_mean +0.0410 +0.0410 4495 4679 4494 4678 BM_BuildTree_median +0.0403 +0.0402 4494 4675 4493 4674 BM_BuildTree_stddev +0.9515 +0.9359 7 14 7 14 BM_BuildTree_cv +0.8746 +0.8596 0 0 0 0 BM_FindNearestNeighbor_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_FindNearestNeighbor_mean -0.0399 -0.0398 1964257108 1885966812 1963954917 1885694336 BM_FindNearestNeighbor_median -0.0397 -0.0396 1963969748 1886074435 1963650984 1885803182 BM_FindNearestNeighbor_stddev -0.3380 -0.3443 1217360 805946 1225442 803469 BM_FindNearestNeighbor_cv -0.3105 -0.3171 0 0 0 0 OVERALL_GEOMEAN +0.0077 +0.0077 0 0 0 0 ```
8 months ago
void toGraphvizDot(size_t id, std::span<const std::pair<RGB, uint8_t>, 256> values, std::string &dot) const;
};
/**
* @brief A kd-tree used to find the nearest neighbor in the color space.
*
* Each level splits the space in half by red, green, and blue respectively.
*/
class PaletteKdTree {
private:
using RGB = std::array<uint8_t, 3>;
static constexpr unsigned NumLeaves = 1U << PaletteKdTreeDepth;
public:
PaletteKdTree() = default;
/**
* @brief Constructs a PaletteKdTree
*
* The palette is used as points in the tree.
* Colors between skipFrom and skipTo (inclusive) are skipped.
*/
Palette KD-tree: Fix compilation speed Tricks the compiler into skipping expensive `uninit var analysis` (`-Wmaybe-uninitialized`) by using a struct with state rather than separate variables for `best` / `bestDiff`. This has no performance impact. Also optimizes lookup a bit further and moves some code that does not need to be inlined to the cpp file. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ----------------------------------------------------------------------------------------------------------------------------------- BM_GenerateBlendedLookupTable_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_GenerateBlendedLookupTable_mean +0.0237 +0.0237 2092090 2141601 2091732 2141291 BM_GenerateBlendedLookupTable_median +0.0237 +0.0237 2092104 2141662 2091669 2141319 BM_GenerateBlendedLookupTable_stddev -0.6414 -0.5834 664 238 538 224 BM_GenerateBlendedLookupTable_cv -0.6497 -0.5930 0 0 0 0 BM_BuildTree_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_BuildTree_mean +0.0410 +0.0410 4495 4679 4494 4678 BM_BuildTree_median +0.0403 +0.0402 4494 4675 4493 4674 BM_BuildTree_stddev +0.9515 +0.9359 7 14 7 14 BM_BuildTree_cv +0.8746 +0.8596 0 0 0 0 BM_FindNearestNeighbor_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_FindNearestNeighbor_mean -0.0399 -0.0398 1964257108 1885966812 1963954917 1885694336 BM_FindNearestNeighbor_median -0.0397 -0.0396 1963969748 1886074435 1963650984 1885803182 BM_FindNearestNeighbor_stddev -0.3380 -0.3443 1217360 805946 1225442 803469 BM_FindNearestNeighbor_cv -0.3105 -0.3171 0 0 0 0 OVERALL_GEOMEAN +0.0077 +0.0077 0 0 0 0 ```
8 months ago
PaletteKdTree(const SDL_Color palette[256], int skipFrom, int skipTo);
Palette KD-tree: Fix compilation speed Tricks the compiler into skipping expensive `uninit var analysis` (`-Wmaybe-uninitialized`) by using a struct with state rather than separate variables for `best` / `bestDiff`. This has no performance impact. Also optimizes lookup a bit further and moves some code that does not need to be inlined to the cpp file. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ----------------------------------------------------------------------------------------------------------------------------------- BM_GenerateBlendedLookupTable_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_GenerateBlendedLookupTable_mean +0.0237 +0.0237 2092090 2141601 2091732 2141291 BM_GenerateBlendedLookupTable_median +0.0237 +0.0237 2092104 2141662 2091669 2141319 BM_GenerateBlendedLookupTable_stddev -0.6414 -0.5834 664 238 538 224 BM_GenerateBlendedLookupTable_cv -0.6497 -0.5930 0 0 0 0 BM_BuildTree_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_BuildTree_mean +0.0410 +0.0410 4495 4679 4494 4678 BM_BuildTree_median +0.0403 +0.0402 4494 4675 4493 4674 BM_BuildTree_stddev +0.9515 +0.9359 7 14 7 14 BM_BuildTree_cv +0.8746 +0.8596 0 0 0 0 BM_FindNearestNeighbor_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_FindNearestNeighbor_mean -0.0399 -0.0398 1964257108 1885966812 1963954917 1885694336 BM_FindNearestNeighbor_median -0.0397 -0.0396 1963969748 1886074435 1963650984 1885803182 BM_FindNearestNeighbor_stddev -0.3380 -0.3443 1217360 805946 1225442 803469 BM_FindNearestNeighbor_cv -0.3105 -0.3171 0 0 0 0 OVERALL_GEOMEAN +0.0077 +0.0077 0 0 0 0 ```
8 months ago
struct VisitState {
uint8_t best;
Palette KD-tree: Fix compilation speed Tricks the compiler into skipping expensive `uninit var analysis` (`-Wmaybe-uninitialized`) by using a struct with state rather than separate variables for `best` / `bestDiff`. This has no performance impact. Also optimizes lookup a bit further and moves some code that does not need to be inlined to the cpp file. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ----------------------------------------------------------------------------------------------------------------------------------- BM_GenerateBlendedLookupTable_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_GenerateBlendedLookupTable_mean +0.0237 +0.0237 2092090 2141601 2091732 2141291 BM_GenerateBlendedLookupTable_median +0.0237 +0.0237 2092104 2141662 2091669 2141319 BM_GenerateBlendedLookupTable_stddev -0.6414 -0.5834 664 238 538 224 BM_GenerateBlendedLookupTable_cv -0.6497 -0.5930 0 0 0 0 BM_BuildTree_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_BuildTree_mean +0.0410 +0.0410 4495 4679 4494 4678 BM_BuildTree_median +0.0403 +0.0402 4494 4675 4493 4674 BM_BuildTree_stddev +0.9515 +0.9359 7 14 7 14 BM_BuildTree_cv +0.8746 +0.8596 0 0 0 0 BM_FindNearestNeighbor_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_FindNearestNeighbor_mean -0.0399 -0.0398 1964257108 1885966812 1963954917 1885694336 BM_FindNearestNeighbor_median -0.0397 -0.0396 1963969748 1886074435 1963650984 1885803182 BM_FindNearestNeighbor_stddev -0.3380 -0.3443 1217360 805946 1225442 803469 BM_FindNearestNeighbor_cv -0.3105 -0.3171 0 0 0 0 OVERALL_GEOMEAN +0.0077 +0.0077 0 0 0 0 ```
8 months ago
uint32_t bestDiff;
};
Palette KD-tree: Fix compilation speed Tricks the compiler into skipping expensive `uninit var analysis` (`-Wmaybe-uninitialized`) by using a struct with state rather than separate variables for `best` / `bestDiff`. This has no performance impact. Also optimizes lookup a bit further and moves some code that does not need to be inlined to the cpp file. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ----------------------------------------------------------------------------------------------------------------------------------- BM_GenerateBlendedLookupTable_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_GenerateBlendedLookupTable_mean +0.0237 +0.0237 2092090 2141601 2091732 2141291 BM_GenerateBlendedLookupTable_median +0.0237 +0.0237 2092104 2141662 2091669 2141319 BM_GenerateBlendedLookupTable_stddev -0.6414 -0.5834 664 238 538 224 BM_GenerateBlendedLookupTable_cv -0.6497 -0.5930 0 0 0 0 BM_BuildTree_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_BuildTree_mean +0.0410 +0.0410 4495 4679 4494 4678 BM_BuildTree_median +0.0403 +0.0402 4494 4675 4493 4674 BM_BuildTree_stddev +0.9515 +0.9359 7 14 7 14 BM_BuildTree_cv +0.8746 +0.8596 0 0 0 0 BM_FindNearestNeighbor_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_FindNearestNeighbor_mean -0.0399 -0.0398 1964257108 1885966812 1963954917 1885694336 BM_FindNearestNeighbor_median -0.0397 -0.0396 1963969748 1886074435 1963650984 1885803182 BM_FindNearestNeighbor_stddev -0.3380 -0.3443 1217360 805946 1225442 803469 BM_FindNearestNeighbor_cv -0.3105 -0.3171 0 0 0 0 OVERALL_GEOMEAN +0.0077 +0.0077 0 0 0 0 ```
8 months ago
[[nodiscard]] uint8_t findNearestNeighbor(const RGB &rgb) const
{
Palette KD-tree: Fix compilation speed Tricks the compiler into skipping expensive `uninit var analysis` (`-Wmaybe-uninitialized`) by using a struct with state rather than separate variables for `best` / `bestDiff`. This has no performance impact. Also optimizes lookup a bit further and moves some code that does not need to be inlined to the cpp file. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ----------------------------------------------------------------------------------------------------------------------------------- BM_GenerateBlendedLookupTable_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_GenerateBlendedLookupTable_mean +0.0237 +0.0237 2092090 2141601 2091732 2141291 BM_GenerateBlendedLookupTable_median +0.0237 +0.0237 2092104 2141662 2091669 2141319 BM_GenerateBlendedLookupTable_stddev -0.6414 -0.5834 664 238 538 224 BM_GenerateBlendedLookupTable_cv -0.6497 -0.5930 0 0 0 0 BM_BuildTree_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_BuildTree_mean +0.0410 +0.0410 4495 4679 4494 4678 BM_BuildTree_median +0.0403 +0.0402 4494 4675 4493 4674 BM_BuildTree_stddev +0.9515 +0.9359 7 14 7 14 BM_BuildTree_cv +0.8746 +0.8596 0 0 0 0 BM_FindNearestNeighbor_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_FindNearestNeighbor_mean -0.0399 -0.0398 1964257108 1885966812 1963954917 1885694336 BM_FindNearestNeighbor_median -0.0397 -0.0396 1963969748 1886074435 1963650984 1885803182 BM_FindNearestNeighbor_stddev -0.3380 -0.3443 1217360 805946 1225442 803469 BM_FindNearestNeighbor_cv -0.3105 -0.3171 0 0 0 0 OVERALL_GEOMEAN +0.0077 +0.0077 0 0 0 0 ```
8 months ago
VisitState visitState;
visitState.bestDiff = std::numeric_limits<uint32_t>::max();
findNearestNeighborVisit(tree_, rgb, visitState);
return visitState.best;
}
Palette KD-tree: Fix compilation speed Tricks the compiler into skipping expensive `uninit var analysis` (`-Wmaybe-uninitialized`) by using a struct with state rather than separate variables for `best` / `bestDiff`. This has no performance impact. Also optimizes lookup a bit further and moves some code that does not need to be inlined to the cpp file. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ----------------------------------------------------------------------------------------------------------------------------------- BM_GenerateBlendedLookupTable_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_GenerateBlendedLookupTable_mean +0.0237 +0.0237 2092090 2141601 2091732 2141291 BM_GenerateBlendedLookupTable_median +0.0237 +0.0237 2092104 2141662 2091669 2141319 BM_GenerateBlendedLookupTable_stddev -0.6414 -0.5834 664 238 538 224 BM_GenerateBlendedLookupTable_cv -0.6497 -0.5930 0 0 0 0 BM_BuildTree_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_BuildTree_mean +0.0410 +0.0410 4495 4679 4494 4678 BM_BuildTree_median +0.0403 +0.0402 4494 4675 4493 4674 BM_BuildTree_stddev +0.9515 +0.9359 7 14 7 14 BM_BuildTree_cv +0.8746 +0.8596 0 0 0 0 BM_FindNearestNeighbor_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_FindNearestNeighbor_mean -0.0399 -0.0398 1964257108 1885966812 1963954917 1885694336 BM_FindNearestNeighbor_median -0.0397 -0.0396 1963969748 1886074435 1963650984 1885803182 BM_FindNearestNeighbor_stddev -0.3380 -0.3443 1217360 805946 1225442 803469 BM_FindNearestNeighbor_cv -0.3105 -0.3171 0 0 0 0 OVERALL_GEOMEAN +0.0077 +0.0077 0 0 0 0 ```
8 months ago
[[maybe_unused]] [[nodiscard]] std::string toGraphvizDot() const;
Palette KD-tree: Fix compilation speed Tricks the compiler into skipping expensive `uninit var analysis` (`-Wmaybe-uninitialized`) by using a struct with state rather than separate variables for `best` / `bestDiff`. This has no performance impact. Also optimizes lookup a bit further and moves some code that does not need to be inlined to the cpp file. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ----------------------------------------------------------------------------------------------------------------------------------- BM_GenerateBlendedLookupTable_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_GenerateBlendedLookupTable_mean +0.0237 +0.0237 2092090 2141601 2091732 2141291 BM_GenerateBlendedLookupTable_median +0.0237 +0.0237 2092104 2141662 2091669 2141319 BM_GenerateBlendedLookupTable_stddev -0.6414 -0.5834 664 238 538 224 BM_GenerateBlendedLookupTable_cv -0.6497 -0.5930 0 0 0 0 BM_BuildTree_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_BuildTree_mean +0.0410 +0.0410 4495 4679 4494 4678 BM_BuildTree_median +0.0403 +0.0402 4494 4675 4493 4674 BM_BuildTree_stddev +0.9515 +0.9359 7 14 7 14 BM_BuildTree_cv +0.8746 +0.8596 0 0 0 0 BM_FindNearestNeighbor_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_FindNearestNeighbor_mean -0.0399 -0.0398 1964257108 1885966812 1963954917 1885694336 BM_FindNearestNeighbor_median -0.0397 -0.0396 1963969748 1886074435 1963650984 1885803182 BM_FindNearestNeighbor_stddev -0.3380 -0.3443 1217360 805946 1225442 803469 BM_FindNearestNeighbor_cv -0.3105 -0.3171 0 0 0 0 OVERALL_GEOMEAN +0.0077 +0.0077 0 0 0 0 ```
8 months ago
private:
[[nodiscard]] static constexpr uint32_t getColorDistance(const std::array<uint8_t, 3> &a, const std::array<uint8_t, 3> &b)
{
Palette KD-tree: Fix compilation speed Tricks the compiler into skipping expensive `uninit var analysis` (`-Wmaybe-uninitialized`) by using a struct with state rather than separate variables for `best` / `bestDiff`. This has no performance impact. Also optimizes lookup a bit further and moves some code that does not need to be inlined to the cpp file. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ----------------------------------------------------------------------------------------------------------------------------------- BM_GenerateBlendedLookupTable_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_GenerateBlendedLookupTable_mean +0.0237 +0.0237 2092090 2141601 2091732 2141291 BM_GenerateBlendedLookupTable_median +0.0237 +0.0237 2092104 2141662 2091669 2141319 BM_GenerateBlendedLookupTable_stddev -0.6414 -0.5834 664 238 538 224 BM_GenerateBlendedLookupTable_cv -0.6497 -0.5930 0 0 0 0 BM_BuildTree_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_BuildTree_mean +0.0410 +0.0410 4495 4679 4494 4678 BM_BuildTree_median +0.0403 +0.0402 4494 4675 4493 4674 BM_BuildTree_stddev +0.9515 +0.9359 7 14 7 14 BM_BuildTree_cv +0.8746 +0.8596 0 0 0 0 BM_FindNearestNeighbor_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_FindNearestNeighbor_mean -0.0399 -0.0398 1964257108 1885966812 1963954917 1885694336 BM_FindNearestNeighbor_median -0.0397 -0.0396 1963969748 1886074435 1963650984 1885803182 BM_FindNearestNeighbor_stddev -0.3380 -0.3443 1217360 805946 1225442 803469 BM_FindNearestNeighbor_cv -0.3105 -0.3171 0 0 0 0 OVERALL_GEOMEAN +0.0077 +0.0077 0 0 0 0 ```
8 months ago
const int diffr = a[0] - b[0];
const int diffg = a[1] - b[1];
const int diffb = a[2] - b[2];
return (diffr * diffr) + (diffg * diffg) + (diffb * diffb);
}
Palette KD-tree: Fix compilation speed Tricks the compiler into skipping expensive `uninit var analysis` (`-Wmaybe-uninitialized`) by using a struct with state rather than separate variables for `best` / `bestDiff`. This has no performance impact. Also optimizes lookup a bit further and moves some code that does not need to be inlined to the cpp file. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ----------------------------------------------------------------------------------------------------------------------------------- BM_GenerateBlendedLookupTable_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_GenerateBlendedLookupTable_mean +0.0237 +0.0237 2092090 2141601 2091732 2141291 BM_GenerateBlendedLookupTable_median +0.0237 +0.0237 2092104 2141662 2091669 2141319 BM_GenerateBlendedLookupTable_stddev -0.6414 -0.5834 664 238 538 224 BM_GenerateBlendedLookupTable_cv -0.6497 -0.5930 0 0 0 0 BM_BuildTree_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_BuildTree_mean +0.0410 +0.0410 4495 4679 4494 4678 BM_BuildTree_median +0.0403 +0.0402 4494 4675 4493 4674 BM_BuildTree_stddev +0.9515 +0.9359 7 14 7 14 BM_BuildTree_cv +0.8746 +0.8596 0 0 0 0 BM_FindNearestNeighbor_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_FindNearestNeighbor_mean -0.0399 -0.0398 1964257108 1885966812 1963954917 1885694336 BM_FindNearestNeighbor_median -0.0397 -0.0396 1963969748 1886074435 1963650984 1885803182 BM_FindNearestNeighbor_stddev -0.3380 -0.3443 1217360 805946 1225442 803469 BM_FindNearestNeighbor_cv -0.3105 -0.3171 0 0 0 0 OVERALL_GEOMEAN +0.0077 +0.0077 0 0 0 0 ```
8 months ago
[[nodiscard]] static constexpr uint32_t getColorDistanceToPlane(int x1, int x2)
{
Palette KD-tree: Fix compilation speed Tricks the compiler into skipping expensive `uninit var analysis` (`-Wmaybe-uninitialized`) by using a struct with state rather than separate variables for `best` / `bestDiff`. This has no performance impact. Also optimizes lookup a bit further and moves some code that does not need to be inlined to the cpp file. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ----------------------------------------------------------------------------------------------------------------------------------- BM_GenerateBlendedLookupTable_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_GenerateBlendedLookupTable_mean +0.0237 +0.0237 2092090 2141601 2091732 2141291 BM_GenerateBlendedLookupTable_median +0.0237 +0.0237 2092104 2141662 2091669 2141319 BM_GenerateBlendedLookupTable_stddev -0.6414 -0.5834 664 238 538 224 BM_GenerateBlendedLookupTable_cv -0.6497 -0.5930 0 0 0 0 BM_BuildTree_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_BuildTree_mean +0.0410 +0.0410 4495 4679 4494 4678 BM_BuildTree_median +0.0403 +0.0402 4494 4675 4493 4674 BM_BuildTree_stddev +0.9515 +0.9359 7 14 7 14 BM_BuildTree_cv +0.8746 +0.8596 0 0 0 0 BM_FindNearestNeighbor_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_FindNearestNeighbor_mean -0.0399 -0.0398 1964257108 1885966812 1963954917 1885694336 BM_FindNearestNeighbor_median -0.0397 -0.0396 1963969748 1886074435 1963650984 1885803182 BM_FindNearestNeighbor_stddev -0.3380 -0.3443 1217360 805946 1225442 803469 BM_FindNearestNeighbor_cv -0.3105 -0.3171 0 0 0 0 OVERALL_GEOMEAN +0.0077 +0.0077 0 0 0 0 ```
8 months ago
// Our planes are axis-aligned, so a distance from a point to a plane
// can be calculated based on just the axis coordinate.
const int delta = x1 - x2;
return static_cast<uint32_t>(delta * delta);
}
template <size_t RemainingDepth>
void findNearestNeighborVisit(const PaletteKdTreeNode<RemainingDepth> &node, const RGB &rgb,
Palette KD-tree: Fix compilation speed Tricks the compiler into skipping expensive `uninit var analysis` (`-Wmaybe-uninitialized`) by using a struct with state rather than separate variables for `best` / `bestDiff`. This has no performance impact. Also optimizes lookup a bit further and moves some code that does not need to be inlined to the cpp file. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ----------------------------------------------------------------------------------------------------------------------------------- BM_GenerateBlendedLookupTable_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_GenerateBlendedLookupTable_mean +0.0237 +0.0237 2092090 2141601 2091732 2141291 BM_GenerateBlendedLookupTable_median +0.0237 +0.0237 2092104 2141662 2091669 2141319 BM_GenerateBlendedLookupTable_stddev -0.6414 -0.5834 664 238 538 224 BM_GenerateBlendedLookupTable_cv -0.6497 -0.5930 0 0 0 0 BM_BuildTree_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_BuildTree_mean +0.0410 +0.0410 4495 4679 4494 4678 BM_BuildTree_median +0.0403 +0.0402 4494 4675 4493 4674 BM_BuildTree_stddev +0.9515 +0.9359 7 14 7 14 BM_BuildTree_cv +0.8746 +0.8596 0 0 0 0 BM_FindNearestNeighbor_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_FindNearestNeighbor_mean -0.0399 -0.0398 1964257108 1885966812 1963954917 1885694336 BM_FindNearestNeighbor_median -0.0397 -0.0396 1963969748 1886074435 1963650984 1885803182 BM_FindNearestNeighbor_stddev -0.3380 -0.3443 1217360 805946 1225442 803469 BM_FindNearestNeighbor_cv -0.3105 -0.3171 0 0 0 0 OVERALL_GEOMEAN +0.0077 +0.0077 0 0 0 0 ```
8 months ago
VisitState &visitState) const
{
const uint8_t coord = rgb[PaletteKdTreeNode<RemainingDepth>::Coord];
Palette KD-tree: Fix compilation speed Tricks the compiler into skipping expensive `uninit var analysis` (`-Wmaybe-uninitialized`) by using a struct with state rather than separate variables for `best` / `bestDiff`. This has no performance impact. Also optimizes lookup a bit further and moves some code that does not need to be inlined to the cpp file. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ----------------------------------------------------------------------------------------------------------------------------------- BM_GenerateBlendedLookupTable_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_GenerateBlendedLookupTable_mean +0.0237 +0.0237 2092090 2141601 2091732 2141291 BM_GenerateBlendedLookupTable_median +0.0237 +0.0237 2092104 2141662 2091669 2141319 BM_GenerateBlendedLookupTable_stddev -0.6414 -0.5834 664 238 538 224 BM_GenerateBlendedLookupTable_cv -0.6497 -0.5930 0 0 0 0 BM_BuildTree_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_BuildTree_mean +0.0410 +0.0410 4495 4679 4494 4678 BM_BuildTree_median +0.0403 +0.0402 4494 4675 4493 4674 BM_BuildTree_stddev +0.9515 +0.9359 7 14 7 14 BM_BuildTree_cv +0.8746 +0.8596 0 0 0 0 BM_FindNearestNeighbor_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_FindNearestNeighbor_mean -0.0399 -0.0398 1964257108 1885966812 1963954917 1885694336 BM_FindNearestNeighbor_median -0.0397 -0.0396 1963969748 1886074435 1963650984 1885803182 BM_FindNearestNeighbor_stddev -0.3380 -0.3443 1217360 805946 1225442 803469 BM_FindNearestNeighbor_cv -0.3105 -0.3171 0 0 0 0 OVERALL_GEOMEAN +0.0077 +0.0077 0 0 0 0 ```
8 months ago
findNearestNeighborVisit(node.child(coord < node.pivot), rgb, visitState);
// To see if we need to check a node's subtree, we compare the distance from the query
// to the current best candidate vs the distance to the edge of the half-space represented
// by the node.
Palette KD-tree: Fix compilation speed Tricks the compiler into skipping expensive `uninit var analysis` (`-Wmaybe-uninitialized`) by using a struct with state rather than separate variables for `best` / `bestDiff`. This has no performance impact. Also optimizes lookup a bit further and moves some code that does not need to be inlined to the cpp file. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ----------------------------------------------------------------------------------------------------------------------------------- BM_GenerateBlendedLookupTable_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_GenerateBlendedLookupTable_mean +0.0237 +0.0237 2092090 2141601 2091732 2141291 BM_GenerateBlendedLookupTable_median +0.0237 +0.0237 2092104 2141662 2091669 2141319 BM_GenerateBlendedLookupTable_stddev -0.6414 -0.5834 664 238 538 224 BM_GenerateBlendedLookupTable_cv -0.6497 -0.5930 0 0 0 0 BM_BuildTree_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_BuildTree_mean +0.0410 +0.0410 4495 4679 4494 4678 BM_BuildTree_median +0.0403 +0.0402 4494 4675 4493 4674 BM_BuildTree_stddev +0.9515 +0.9359 7 14 7 14 BM_BuildTree_cv +0.8746 +0.8596 0 0 0 0 BM_FindNearestNeighbor_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_FindNearestNeighbor_mean -0.0399 -0.0398 1964257108 1885966812 1963954917 1885694336 BM_FindNearestNeighbor_median -0.0397 -0.0396 1963969748 1886074435 1963650984 1885803182 BM_FindNearestNeighbor_stddev -0.3380 -0.3443 1217360 805946 1225442 803469 BM_FindNearestNeighbor_cv -0.3105 -0.3171 0 0 0 0 OVERALL_GEOMEAN +0.0077 +0.0077 0 0 0 0 ```
8 months ago
if (getColorDistanceToPlane(node.pivot, coord) < visitState.bestDiff) {
findNearestNeighborVisit(node.child(coord >= node.pivot), rgb, visitState);
}
}
void findNearestNeighborVisit(const PaletteKdTreeNode<0> &node, const RGB &rgb,
Palette KD-tree: Fix compilation speed Tricks the compiler into skipping expensive `uninit var analysis` (`-Wmaybe-uninitialized`) by using a struct with state rather than separate variables for `best` / `bestDiff`. This has no performance impact. Also optimizes lookup a bit further and moves some code that does not need to be inlined to the cpp file. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ----------------------------------------------------------------------------------------------------------------------------------- BM_GenerateBlendedLookupTable_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_GenerateBlendedLookupTable_mean +0.0237 +0.0237 2092090 2141601 2091732 2141291 BM_GenerateBlendedLookupTable_median +0.0237 +0.0237 2092104 2141662 2091669 2141319 BM_GenerateBlendedLookupTable_stddev -0.6414 -0.5834 664 238 538 224 BM_GenerateBlendedLookupTable_cv -0.6497 -0.5930 0 0 0 0 BM_BuildTree_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_BuildTree_mean +0.0410 +0.0410 4495 4679 4494 4678 BM_BuildTree_median +0.0403 +0.0402 4494 4675 4493 4674 BM_BuildTree_stddev +0.9515 +0.9359 7 14 7 14 BM_BuildTree_cv +0.8746 +0.8596 0 0 0 0 BM_FindNearestNeighbor_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_FindNearestNeighbor_mean -0.0399 -0.0398 1964257108 1885966812 1963954917 1885694336 BM_FindNearestNeighbor_median -0.0397 -0.0396 1963969748 1886074435 1963650984 1885803182 BM_FindNearestNeighbor_stddev -0.3380 -0.3443 1217360 805946 1225442 803469 BM_FindNearestNeighbor_cv -0.3105 -0.3171 0 0 0 0 OVERALL_GEOMEAN +0.0077 +0.0077 0 0 0 0 ```
8 months ago
VisitState &visitState) const
{
Palette KD-tree: Fix compilation speed Tricks the compiler into skipping expensive `uninit var analysis` (`-Wmaybe-uninitialized`) by using a struct with state rather than separate variables for `best` / `bestDiff`. This has no performance impact. Also optimizes lookup a bit further and moves some code that does not need to be inlined to the cpp file. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ----------------------------------------------------------------------------------------------------------------------------------- BM_GenerateBlendedLookupTable_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_GenerateBlendedLookupTable_mean +0.0237 +0.0237 2092090 2141601 2091732 2141291 BM_GenerateBlendedLookupTable_median +0.0237 +0.0237 2092104 2141662 2091669 2141319 BM_GenerateBlendedLookupTable_stddev -0.6414 -0.5834 664 238 538 224 BM_GenerateBlendedLookupTable_cv -0.6497 -0.5930 0 0 0 0 BM_BuildTree_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_BuildTree_mean +0.0410 +0.0410 4495 4679 4494 4678 BM_BuildTree_median +0.0403 +0.0402 4494 4675 4493 4674 BM_BuildTree_stddev +0.9515 +0.9359 7 14 7 14 BM_BuildTree_cv +0.8746 +0.8596 0 0 0 0 BM_FindNearestNeighbor_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_FindNearestNeighbor_mean -0.0399 -0.0398 1964257108 1885966812 1963954917 1885694336 BM_FindNearestNeighbor_median -0.0397 -0.0396 1963969748 1886074435 1963650984 1885803182 BM_FindNearestNeighbor_stddev -0.3380 -0.3443 1217360 805946 1225442 803469 BM_FindNearestNeighbor_cv -0.3105 -0.3171 0 0 0 0 OVERALL_GEOMEAN +0.0077 +0.0077 0 0 0 0 ```
8 months ago
// Nodes are almost never empty.
// Separating the empty check from the loop makes this faster,
// probaly because of better branch prediction.
if (node.valuesBegin > node.valuesEndInclusive) return;
const std::pair<RGB, uint8_t> *it = values_.data() + node.valuesBegin;
const std::pair<RGB, uint8_t> *const end = values_.data() + node.valuesEndInclusive;
Palette KD-tree: Fix compilation speed Tricks the compiler into skipping expensive `uninit var analysis` (`-Wmaybe-uninitialized`) by using a struct with state rather than separate variables for `best` / `bestDiff`. This has no performance impact. Also optimizes lookup a bit further and moves some code that does not need to be inlined to the cpp file. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ----------------------------------------------------------------------------------------------------------------------------------- BM_GenerateBlendedLookupTable_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_GenerateBlendedLookupTable_mean +0.0237 +0.0237 2092090 2141601 2091732 2141291 BM_GenerateBlendedLookupTable_median +0.0237 +0.0237 2092104 2141662 2091669 2141319 BM_GenerateBlendedLookupTable_stddev -0.6414 -0.5834 664 238 538 224 BM_GenerateBlendedLookupTable_cv -0.6497 -0.5930 0 0 0 0 BM_BuildTree_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_BuildTree_mean +0.0410 +0.0410 4495 4679 4494 4678 BM_BuildTree_median +0.0403 +0.0402 4494 4675 4493 4674 BM_BuildTree_stddev +0.9515 +0.9359 7 14 7 14 BM_BuildTree_cv +0.8746 +0.8596 0 0 0 0 BM_FindNearestNeighbor_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_FindNearestNeighbor_mean -0.0399 -0.0398 1964257108 1885966812 1963954917 1885694336 BM_FindNearestNeighbor_median -0.0397 -0.0396 1963969748 1886074435 1963650984 1885803182 BM_FindNearestNeighbor_stddev -0.3380 -0.3443 1217360 805946 1225442 803469 BM_FindNearestNeighbor_cv -0.3105 -0.3171 0 0 0 0 OVERALL_GEOMEAN +0.0077 +0.0077 0 0 0 0 ```
8 months ago
do {
const auto &[paletteColor, paletteIndex] = *it++;
Palette KD-tree: Fix compilation speed Tricks the compiler into skipping expensive `uninit var analysis` (`-Wmaybe-uninitialized`) by using a struct with state rather than separate variables for `best` / `bestDiff`. This has no performance impact. Also optimizes lookup a bit further and moves some code that does not need to be inlined to the cpp file. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ----------------------------------------------------------------------------------------------------------------------------------- BM_GenerateBlendedLookupTable_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_GenerateBlendedLookupTable_mean +0.0237 +0.0237 2092090 2141601 2091732 2141291 BM_GenerateBlendedLookupTable_median +0.0237 +0.0237 2092104 2141662 2091669 2141319 BM_GenerateBlendedLookupTable_stddev -0.6414 -0.5834 664 238 538 224 BM_GenerateBlendedLookupTable_cv -0.6497 -0.5930 0 0 0 0 BM_BuildTree_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_BuildTree_mean +0.0410 +0.0410 4495 4679 4494 4678 BM_BuildTree_median +0.0403 +0.0402 4494 4675 4493 4674 BM_BuildTree_stddev +0.9515 +0.9359 7 14 7 14 BM_BuildTree_cv +0.8746 +0.8596 0 0 0 0 BM_FindNearestNeighbor_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_FindNearestNeighbor_mean -0.0399 -0.0398 1964257108 1885966812 1963954917 1885694336 BM_FindNearestNeighbor_median -0.0397 -0.0396 1963969748 1886074435 1963650984 1885803182 BM_FindNearestNeighbor_stddev -0.3380 -0.3443 1217360 805946 1225442 803469 BM_FindNearestNeighbor_cv -0.3105 -0.3171 0 0 0 0 OVERALL_GEOMEAN +0.0077 +0.0077 0 0 0 0 ```
8 months ago
const uint32_t diff = getColorDistance(paletteColor, rgb);
if (diff < visitState.bestDiff) {
visitState.best = paletteIndex;
visitState.bestDiff = diff;
}
Palette KD-tree: Fix compilation speed Tricks the compiler into skipping expensive `uninit var analysis` (`-Wmaybe-uninitialized`) by using a struct with state rather than separate variables for `best` / `bestDiff`. This has no performance impact. Also optimizes lookup a bit further and moves some code that does not need to be inlined to the cpp file. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ----------------------------------------------------------------------------------------------------------------------------------- BM_GenerateBlendedLookupTable_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_GenerateBlendedLookupTable_mean +0.0237 +0.0237 2092090 2141601 2091732 2141291 BM_GenerateBlendedLookupTable_median +0.0237 +0.0237 2092104 2141662 2091669 2141319 BM_GenerateBlendedLookupTable_stddev -0.6414 -0.5834 664 238 538 224 BM_GenerateBlendedLookupTable_cv -0.6497 -0.5930 0 0 0 0 BM_BuildTree_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_BuildTree_mean +0.0410 +0.0410 4495 4679 4494 4678 BM_BuildTree_median +0.0403 +0.0402 4494 4675 4493 4674 BM_BuildTree_stddev +0.9515 +0.9359 7 14 7 14 BM_BuildTree_cv +0.8746 +0.8596 0 0 0 0 BM_FindNearestNeighbor_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 BM_FindNearestNeighbor_mean -0.0399 -0.0398 1964257108 1885966812 1963954917 1885694336 BM_FindNearestNeighbor_median -0.0397 -0.0396 1963969748 1886074435 1963650984 1885803182 BM_FindNearestNeighbor_stddev -0.3380 -0.3443 1217360 805946 1225442 803469 BM_FindNearestNeighbor_cv -0.3105 -0.3171 0 0 0 0 OVERALL_GEOMEAN +0.0077 +0.0077 0 0 0 0 ```
8 months ago
} while (it <= end);
}
PaletteKdTreeNode<PaletteKdTreeDepth> tree_;
std::array<std::pair<RGB, uint8_t>, 256> values_;
};
} // namespace devilution