You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1107 lines
48 KiB

/**
* @file dun_render.cpp
*
* Implementation of functionality for rendering the level tiles.
*/
// Debugging variables
// #define DEBUG_STR
// #define DEBUG_RENDER_COLOR
// #define DEBUG_RENDER_OFFSET_X 5
// #define DEBUG_RENDER_OFFSET_Y 5
#include "engine/render/dun_render.hpp"
#include <SDL_endian.h>
#include <climits>
dun_render: Unroll triangle loops Rather than relying on the compiler to do it, which doesn't always happen, we do it by hand. Previously, very slightly different versions of the code could result in those loops not being unrolled (such as in the current master). I've run the benchmark like this: ```bash BASELINE=dun-benchmark BENCHMARK=dun_render_benchmark git checkout "$BASELINE" tools/build_and_run_benchmark.py -B "build-reld-${BASELINE}" --no-run "$BENCHMARK" git checkout - tools/build_and_run_benchmark.py --no-run "$BENCHMARK" tools/linux_reduced_cpu_variance_run.sh ~/google-benchmark/tools/compare.py -a benchmarks \ "build-reld-${BASELINE}/${BENCHMARK}" "build-reld/${BENCHMARK}" \ --benchmark_repetitions=10 ``` Benchmark results are below. The `FullyLit` calls are ~55% faster. The `PartiallyLit` calls are ~40% faster. The `FullyDark` version is twice as slow, which is surprising. I have a separate idea about eliminating most of the `FullyDark` calls entirely. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ------------------------------------------------------------------------------------------------------------------------------------------------------------------- DunRenderBenchmark/LeftTriangle_Solid_FullyLit_pvalue 0.0004 0.0004 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Solid_FullyLit_mean -0.5634 -0.5634 188036 82095 188014 82080 DunRenderBenchmark/LeftTriangle_Solid_FullyLit_median -0.5601 -0.5601 186905 82218 186884 82202 DunRenderBenchmark/LeftTriangle_Solid_FullyLit_stddev -0.6274 -0.6274 61414 22883 61402 22879 DunRenderBenchmark/LeftTriangle_Solid_FullyLit_cv -0.1465 -0.1465 0 0 0 0 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_pvalue 0.0013 0.0013 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_mean +0.9781 +0.9781 238021 470823 237988 470757 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_median +0.9733 +0.9733 238086 469812 238050 469752 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_stddev +0.9791 +0.9790 74987 148403 74978 148383 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_cv +0.0005 +0.0005 0 0 0 0 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_pvalue 0.0113 0.0113 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_mean -0.3891 -0.3891 984208 601272 984080 601180 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_median -0.3931 -0.3931 980791 595272 980682 595188 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_stddev -0.3950 -0.3950 316631 191562 316575 191533 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_cv -0.0097 -0.0096 0 0 0 0 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_pvalue 0.6776 0.6776 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_mean -0.0802 -0.0802 995379 915591 995236 915450 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_median -0.0816 -0.0817 994452 913256 994299 913112 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_stddev -0.0916 -0.0915 319114 289898 319065 289864 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_cv -0.0124 -0.0123 0 0 0 0 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_mean -0.3875 -0.3875 1001347 613375 1001222 613282 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_median -0.3830 -0.3830 998801 616295 998628 616194 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_stddev -0.3901 -0.3901 320998 195778 320963 195747 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_cv -0.0043 -0.0043 0 0 0 0 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_mean +0.0046 +0.0046 1014824 1019541 1014707 1019386 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_median +0.0032 +0.0032 1015895 1019161 1015801 1019001 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_stddev +0.0037 +0.0036 322403 323590 322370 323538 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_cv -0.0010 -0.0010 0 0 0 0 DunRenderBenchmark/RightTriangle_Solid_FullyLit_pvalue 0.0006 0.0006 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Solid_FullyLit_mean -0.5523 -0.5524 176362 78951 176340 78938 DunRenderBenchmark/RightTriangle_Solid_FullyLit_median -0.5514 -0.5514 175837 78875 175814 78865 DunRenderBenchmark/RightTriangle_Solid_FullyLit_stddev -0.5884 -0.5884 55614 22891 55607 22888 DunRenderBenchmark/RightTriangle_Solid_FullyLit_cv -0.0805 -0.0805 0 0 0 0 DunRenderBenchmark/RightTriangle_Solid_FullyDark_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Solid_FullyDark_mean +1.9896 +1.9896 161841 483845 161819 483767 DunRenderBenchmark/RightTriangle_Solid_FullyDark_median +1.9924 +1.9924 161742 483991 161715 483917 DunRenderBenchmark/RightTriangle_Solid_FullyDark_stddev +2.0012 +2.0010 51356 154129 51351 154107 DunRenderBenchmark/RightTriangle_Solid_FullyDark_cv +0.0039 +0.0038 0 0 0 0 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_pvalue 0.0058 0.0058 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_mean -0.4247 -0.4247 1046216 601892 1046042 601809 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_median -0.4249 -0.4249 1053545 605870 1053349 605785 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_stddev -0.4174 -0.4174 330051 192291 330001 192261 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_cv +0.0127 +0.0127 0 0 0 0 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_pvalue 0.3847 0.3847 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_mean -0.1217 -0.1217 1050082 922274 1049928 922136 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_median -0.1226 -0.1226 1047556 919087 1047400 918953 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_stddev -0.0833 -0.0833 324195 297188 324156 297143 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_cv +0.0437 +0.0437 0 0 0 0 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_mean -0.4253 -0.4253 1047974 602234 1047816 602137 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_median -0.4267 -0.4267 1051985 603146 1051802 603042 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_stddev -0.4276 -0.4276 334355 191387 334304 191363 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_cv -0.0039 -0.0039 0 0 0 0 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_pvalue 0.7913 0.7913 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_mean -0.0137 -0.0138 1059054 1044492 1058910 1044325 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_median -0.0178 -0.0179 1058596 1039738 1058508 1039571 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_stddev +0.0031 +0.0031 337359 338411 337309 338362 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_cv +0.0171 +0.0171 0 0 0 0 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_pvalue 0.9097 0.9097 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_mean +0.0011 +0.0012 1493697 1495394 1493444 1495169 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_median -0.0017 -0.0017 1496838 1494238 1496604 1494029 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_stddev +0.0021 +0.0020 530878 531975 530805 531882 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_cv +0.0009 +0.0009 0 0 0 0 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_mean -0.0204 -0.0204 1433093 1403907 1432881 1403668 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_median -0.0183 -0.0183 1429205 1403083 1428989 1402880 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_stddev -0.0096 -0.0097 505699 500858 505631 500743 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_cv +0.0110 +0.0109 0 0 0 0 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_pvalue 0.0539 0.0539 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_mean -0.3176 -0.3176 2382675 1626018 2382333 1625760 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_median -0.3198 -0.3198 2403842 1635148 2403483 1634913 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_stddev -0.3148 -0.3149 827423 566929 827319 566830 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_cv +0.0040 +0.0040 0 0 0 0 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_pvalue 0.5708 0.5708 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_mean -0.0873 -0.0873 2175867 1985983 2175589 1985685 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_median -0.0631 -0.0631 2157339 2021156 2157036 2020874 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_stddev -0.1511 -0.1511 793554 673670 793451 673555 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_cv -0.0699 -0.0699 0 0 0 0 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_pvalue 0.0757 0.0757 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_mean -0.2913 -0.2913 2168904 1537149 2168665 1536932 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_median -0.2906 -0.2906 2169825 1539371 2169553 1539149 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_stddev -0.2952 -0.2952 773577 545208 773499 545150 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_cv -0.0055 -0.0055 0 0 0 0 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_mean -0.0228 -0.0227 2215243 2164785 2214801 2164528 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_median -0.0105 -0.0105 2206971 2183841 2206650 2183583 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_stddev -0.0493 -0.0492 794968 755773 794792 755680 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_cv -0.0271 -0.0271 0 0 0 0 DunRenderBenchmark/Square_Solid_FullyLit_pvalue 0.4274 0.4274 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Solid_FullyLit_mean -0.1018 -0.1018 106111 95306 106093 95293 DunRenderBenchmark/Square_Solid_FullyLit_median -0.1039 -0.1039 106890 95786 106879 95776 DunRenderBenchmark/Square_Solid_FullyLit_stddev -0.1415 -0.1414 31405 26959 31394 26956 DunRenderBenchmark/Square_Solid_FullyLit_cv -0.0442 -0.0440 0 0 0 0 DunRenderBenchmark/Square_Solid_FullyDark_pvalue 0.6232 0.6232 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Solid_FullyDark_mean -0.0624 -0.0623 76148 71400 76137 71390 DunRenderBenchmark/Square_Solid_FullyDark_median -0.0533 -0.0533 75419 71398 75409 71388 DunRenderBenchmark/Square_Solid_FullyDark_stddev -0.1158 -0.1158 23287 20590 23284 20587 DunRenderBenchmark/Square_Solid_FullyDark_cv -0.0570 -0.0570 0 0 0 0 DunRenderBenchmark/Square_Solid_PartiallyLit_pvalue 0.0140 0.0140 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Solid_PartiallyLit_mean -0.4191 -0.4191 1856509 1078451 1856141 1078317 DunRenderBenchmark/Square_Solid_PartiallyLit_median -0.4220 -0.4220 1865303 1078075 1864830 1077921 DunRenderBenchmark/Square_Solid_PartiallyLit_stddev -0.4194 -0.4193 661581 384134 661390 384090 DunRenderBenchmark/Square_Solid_PartiallyLit_cv -0.0005 -0.0004 0 0 0 0 DunRenderBenchmark/Square_Transparent_FullyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Transparent_FullyLit_mean -0.0599 -0.0599 1777629 1671062 1777373 1670858 DunRenderBenchmark/Square_Transparent_FullyLit_median -0.0599 -0.0599 1777353 1670891 1777080 1670709 DunRenderBenchmark/Square_Transparent_FullyLit_stddev -0.0605 -0.0605 633282 594964 633184 594903 DunRenderBenchmark/Square_Transparent_FullyLit_cv -0.0006 -0.0006 0 0 0 0 DunRenderBenchmark/Square_Transparent_FullyDark_pvalue 0.0211 0.0211 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Transparent_FullyDark_mean -0.3763 -0.3762 1772392 1105501 1772116 1105383 DunRenderBenchmark/Square_Transparent_FullyDark_median -0.3766 -0.3766 1772282 1104753 1772089 1104637 DunRenderBenchmark/Square_Transparent_FullyDark_stddev -0.4402 -0.4402 631014 353260 630951 353231 DunRenderBenchmark/Square_Transparent_FullyDark_cv -0.1025 -0.1025 0 0 0 0 DunRenderBenchmark/Square_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Transparent_PartiallyLit_mean -0.0272 -0.0271 1970857 1917285 1970523 1917053 DunRenderBenchmark/Square_Transparent_PartiallyLit_median -0.0268 -0.0267 1970258 1917452 1969801 1917178 DunRenderBenchmark/Square_Transparent_PartiallyLit_stddev -0.0294 -0.0293 703415 682714 703280 682642 DunRenderBenchmark/Square_Transparent_PartiallyLit_cv -0.0023 -0.0023 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_pvalue 0.0004 0.0004 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_mean -0.5359 -0.5359 59253 27500 59242 27497 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_median -0.5336 -0.5336 58982 27509 58975 27506 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_stddev -0.5374 -0.5373 17150 7933 17144 7932 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_cv -0.0034 -0.0032 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_mean +0.7013 +0.7013 56973 96926 56963 96913 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_median +0.6907 +0.6908 57326 96921 57317 96909 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_stddev +0.8879 +0.8880 16361 30888 16358 30885 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_cv +0.1097 +0.1098 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_mean -0.3988 -0.3988 511666 307603 511589 307567 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_median -0.3993 -0.3993 511913 307523 511851 307491 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_stddev -0.3971 -0.3971 162799 98150 162774 98139 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_cv +0.0028 +0.0029 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_pvalue 0.6232 0.6232 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_mean -0.0820 -0.0820 514358 472170 514290 472112 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_median -0.0821 -0.0821 514335 472090 514264 472018 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_stddev -0.0828 -0.0828 163969 150389 163949 150372 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_cv -0.0009 -0.0009 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_pvalue 0.0058 0.0058 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_mean -0.4180 -0.4180 512755 298434 512692 298398 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_median -0.4187 -0.4187 513116 298290 513049 298258 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_stddev -0.4181 -0.4181 163460 95123 163439 95112 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_cv -0.0001 -0.0001 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_mean +0.0149 +0.0149 544744 552873 544676 552807 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_median +0.0167 +0.0167 544823 553899 544761 553834 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_stddev +0.0110 +0.0110 173683 175592 173672 175580 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_cv -0.0039 -0.0039 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_pvalue 0.0010 0.0010 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_mean -0.4654 -0.4654 51804 27693 51798 27690 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_median -0.4648 -0.4648 51787 27718 51779 27713 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_stddev -0.4702 -0.4702 14885 7886 14884 7885 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_cv -0.0090 -0.0090 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_mean +1.3529 +1.3529 43904 103301 43898 103287 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_median +1.3371 +1.3372 44199 103297 44192 103285 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_stddev +1.4168 +1.4167 12350 29848 12349 29843 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_cv +0.0272 +0.0271 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_mean -0.3977 -0.3977 493458 297225 493385 297188 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_median -0.3973 -0.3973 493440 297377 493364 297342 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_stddev -0.3965 -0.3965 157323 94947 157300 94934 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_cv +0.0020 +0.0020 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_mean -0.0668 -0.0668 501015 467552 500947 467496 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_median -0.0661 -0.0661 500672 467557 500607 467504 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_stddev -0.0641 -0.0642 159233 149019 159215 148998 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_cv +0.0028 +0.0028 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_mean -0.4295 -0.4295 494445 282076 494382 282043 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_median -0.4300 -0.4300 494497 281875 494434 281841 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_stddev -0.4287 -0.4287 157383 89911 157370 89902 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_cv +0.0014 +0.0014 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_mean -0.0574 -0.0573 556031 524113 555924 524044 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_median -0.0758 -0.0757 565940 523059 565802 522984 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_stddev -0.1094 -0.1093 186798 166360 186758 166346 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_cv -0.0552 -0.0551 0 0 0 0 OVERALL_GEOMEAN -0.1671 -0.1671 0 0 0 0 ```
2 years ago
#include <cstddef>
#include <cstdint>
#include "engine/render/blit_impl.hpp"
#include "levels/dun_tile.hpp"
#include "lighting.h"
#include "options.h"
#include "utils/attributes.h"
#ifdef DEBUG_STR
#include "engine/render/text_render.hpp"
#endif
#if defined(DEBUG_STR) || defined(DUN_RENDER_STATS)
#include "utils/str_cat.hpp"
#endif
namespace devilution {
namespace {
/** Width of a tile rendering primitive. */
constexpr int_fast16_t Width = DunFrameWidth;
/** Height of a tile rendering primitive (except triangles). */
constexpr int_fast16_t Height = DunFrameHeight;
/** Height of the lower triangle of a triangular or a trapezoid tile. */
constexpr int_fast16_t LowerHeight = DunFrameHeight / 2;
/** Height of the upper triangle of a triangular tile. */
constexpr int_fast16_t TriangleUpperHeight = DunFrameHeight / 2 - 1;
/** Height of the upper rectangle of a trapezoid tile. */
constexpr int_fast16_t TrapezoidUpperHeight = DunFrameHeight / 2;
constexpr int_fast16_t TriangleHeight = DunFrameTriangleHeight;
/** For triangles, for each pixel drawn vertically, this many pixels are drawn horizontally. */
constexpr int_fast16_t XStep = 2;
#ifdef DEBUG_STR
std::pair<std::string_view, UiFlags> GetTileDebugStr(TileType tile)
{
// clang-format off
switch (tile) {
case TileType::Square: return {"S", UiFlags::AlignCenter | UiFlags::VerticalCenter};
case TileType::TransparentSquare: return {"T", UiFlags::AlignCenter | UiFlags::VerticalCenter};
case TileType::LeftTriangle: return {"<", UiFlags::AlignRight | UiFlags::VerticalCenter};
case TileType::RightTriangle: return {">", UiFlags::VerticalCenter};
case TileType::LeftTrapezoid: return {"\\", UiFlags::AlignCenter};
case TileType::RightTrapezoid: return {"/", UiFlags::AlignCenter};
default: return {"", {}};
}
// clang-format on
}
#endif
#ifdef DEBUG_RENDER_COLOR
int DBGCOLOR = 0;
int GetTileDebugColor(TileType tile)
{
// clang-format off
switch (tile) {
case TileType::Square: return PAL16_YELLOW + 5;
case TileType::TransparentSquare: return PAL16_ORANGE + 5;
case TileType::LeftTriangle: return PAL16_GRAY + 5;
case TileType::RightTriangle: return PAL16_BEIGE;
case TileType::LeftTrapezoid: return PAL16_RED + 5;
case TileType::RightTrapezoid: return PAL16_BLUE + 5;
default: return 0;
}
// clang-format on
}
#endif // DEBUG_RENDER_COLOR
// How many pixels to increment the transparent (Left) or opaque (Right)
// prefix width after each line (drawing bottom-to-top).
template <MaskType Mask>
constexpr int8_t PrefixIncrement = 0;
template <>
constexpr int8_t PrefixIncrement<MaskType::Left> = 2;
template <>
constexpr int8_t PrefixIncrement<MaskType::Right> = -2;
// Initial value for the prefix.
template <MaskType Mask>
int8_t InitialPrefix = PrefixIncrement<Mask> >= 0 ? -32 : 64;
// The initial value for the prefix at y-th line (counting from the bottom).
template <MaskType Mask>
DVL_ALWAYS_INLINE int8_t InitPrefix(int8_t y)
{
return InitialPrefix<Mask> + PrefixIncrement<Mask> * y;
}
enum class LightType : uint8_t {
FullyDark,
PartiallyLit,
FullyLit,
};
template <LightType Light>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineOpaque(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t n, const uint8_t *DVL_RESTRICT tbl);
template <>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineOpaque<LightType::FullyDark>(uint8_t *DVL_RESTRICT dst, [[maybe_unused]] const uint8_t *DVL_RESTRICT src, uint_fast8_t n, [[maybe_unused]] const uint8_t *DVL_RESTRICT tbl)
{
BlitFillDirect(dst, n, 0);
}
template <>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineOpaque<LightType::FullyLit>(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t n, [[maybe_unused]] const uint8_t *DVL_RESTRICT tbl)
{
#ifndef DEBUG_RENDER_COLOR
BlitPixelsDirect(dst, src, n);
#else
BlitFillDirect(dst, n, DBGCOLOR);
#endif
}
template <>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineOpaque<LightType::PartiallyLit>(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t n, const uint8_t *DVL_RESTRICT tbl)
{
#ifndef DEBUG_RENDER_COLOR
BlitPixelsWithMap(dst, src, n, tbl);
#else
BlitFillDirect(dst, n, tbl[DBGCOLOR]);
#endif
}
#ifndef DEBUG_RENDER_COLOR
template <LightType Light>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineTransparent(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t n, const uint8_t *DVL_RESTRICT tbl);
template <>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineTransparent<LightType::FullyDark>(uint8_t *DVL_RESTRICT dst, [[maybe_unused]] const uint8_t *DVL_RESTRICT src, uint_fast8_t n, [[maybe_unused]] const uint8_t *DVL_RESTRICT tbl)
{
BlitFillBlended(dst, n, 0);
}
template <>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineTransparent<LightType::FullyLit>(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t n, [[maybe_unused]] const uint8_t *DVL_RESTRICT tbl)
{
BlitPixelsBlended(dst, src, n);
}
template <>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineTransparent<LightType::PartiallyLit>(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t n, const uint8_t *DVL_RESTRICT tbl)
{
BlitPixelsBlendedWithMap(dst, src, n, tbl);
}
#else // DEBUG_RENDER_COLOR
template <LightType Light>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineTransparent(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t n, const uint8_t *DVL_RESTRICT tbl)
{
for (size_t i = 0; i < n; i++) {
dst[i] = paletteTransparencyLookup[dst[i]][tbl[DBGCOLOR + 4]];
}
}
#endif
template <LightType Light, bool Transparent>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineTransparentOrOpaque(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t width, const uint8_t *DVL_RESTRICT tbl)
{
if constexpr (Transparent) {
RenderLineTransparent<Light>(dst, src, width, tbl);
} else {
RenderLineOpaque<Light>(dst, src, width, tbl);
}
}
template <LightType Light, bool OpaqueFirst>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineTransparentAndOpaque(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t prefixWidth, uint_fast8_t width, const uint8_t *DVL_RESTRICT tbl)
{
if constexpr (OpaqueFirst) {
RenderLineOpaque<Light>(dst, src, prefixWidth, tbl);
RenderLineTransparent<Light>(dst + prefixWidth, src + prefixWidth, width - prefixWidth, tbl);
} else {
RenderLineTransparent<Light>(dst, src, prefixWidth, tbl);
RenderLineOpaque<Light>(dst + prefixWidth, src + prefixWidth, width - prefixWidth, tbl);
}
}
template <LightType Light, MaskType Mask>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLine(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t n, const uint8_t *DVL_RESTRICT tbl, int8_t prefix)
{
if constexpr (Mask == MaskType::Solid || Mask == MaskType::Transparent) {
RenderLineTransparentOrOpaque<Light, /*Transparent=*/Mask == MaskType::Transparent>(dst, src, n, tbl);
} else if (prefix >= static_cast<int8_t>(n)) {
// We std::clamp the prefix to (0, n] and avoid calling `RenderLineTransparent/Opaque` with width=0.
if constexpr (Mask == MaskType::Right) {
RenderLineOpaque<Light>(dst, src, n, tbl);
} else {
RenderLineTransparent<Light>(dst, src, n, tbl);
}
} else if (prefix <= 0) {
if constexpr (Mask == MaskType::Left) {
RenderLineOpaque<Light>(dst, src, n, tbl);
} else {
RenderLineTransparent<Light>(dst, src, n, tbl);
}
} else {
RenderLineTransparentAndOpaque<Light, /*OpaqueFirst=*/Mask == MaskType::Right>(dst, src, prefix, n, tbl);
}
}
struct Clip {
int_fast16_t top;
int_fast16_t bottom;
int_fast16_t left;
int_fast16_t right;
int_fast16_t width;
int_fast16_t height;
};
DVL_ALWAYS_INLINE Clip CalculateClip(int_fast16_t x, int_fast16_t y, int_fast16_t w, int_fast16_t h, const Surface &out)
{
Clip clip;
clip.top = y + 1 < h ? h - (y + 1) : 0;
clip.bottom = y + 1 > out.h() ? (y + 1) - out.h() : 0;
clip.left = x < 0 ? -x : 0;
clip.right = x + w > out.w() ? x + w - out.w() : 0;
clip.width = w - clip.left - clip.right;
clip.height = h - clip.top - clip.bottom;
return clip;
}
DVL_ALWAYS_INLINE bool IsFullyDark(const uint8_t *DVL_RESTRICT tbl)
{
return tbl == FullyDarkLightTable;
}
DVL_ALWAYS_INLINE bool IsFullyLit(const uint8_t *DVL_RESTRICT tbl)
{
return tbl == FullyLitLightTable;
}
template <LightType Light, bool Transparent>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderSquareFull(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl)
{
for (auto i = 0; i < Height; ++i, dst -= dstPitch) {
RenderLineTransparentOrOpaque<Light, Transparent>(dst, src, Width, tbl);
src += Width;
}
}
template <LightType Light, bool Transparent>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderSquareClipped(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
{
src += clip.bottom * Height + clip.left;
for (auto i = 0; i < clip.height; ++i, dst -= dstPitch) {
RenderLineTransparentOrOpaque<Light, Transparent>(dst, src, clip.width, tbl);
src += Width;
}
}
template <LightType Light, bool Transparent>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderSquare(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
{
if (clip.width == Width && clip.height == Height) {
RenderSquareFull<Light, Transparent>(dst, dstPitch, src, tbl);
} else {
RenderSquareClipped<Light, Transparent>(dst, dstPitch, src, tbl, clip);
}
}
template <LightType Light, MaskType Mask>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTransparentSquareFull(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, unsigned height)
{
int8_t prefix = InitialPrefix<Mask>;
DVL_ASSUME(height >= 16);
DVL_ASSUME(height <= 32);
for (unsigned i = 0; i < height; ++i, dst -= dstPitch + Width) {
uint_fast8_t drawWidth = Width;
while (drawWidth > 0) {
auto v = static_cast<int8_t>(*src++);
if (v > 0) {
RenderLine<Light, Mask>(dst, src, v, tbl, prefix - (Width - drawWidth));
src += v;
} else {
v = -v;
}
dst += v;
drawWidth -= v;
}
prefix += PrefixIncrement<Mask>;
}
}
template <LightType Light, MaskType Mask>
// NOLINTNEXTLINE(readability-function-cognitive-complexity): Actually complex and has to be fast.
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTransparentSquareClipped(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
{
const auto skipRestOfTheLine = [&src](int_fast16_t remainingWidth) {
while (remainingWidth > 0) {
const auto v = static_cast<int8_t>(*src++);
if (v > 0) {
src += v;
remainingWidth -= v;
} else {
remainingWidth -= -v;
}
}
assert(remainingWidth == 0);
};
// Skip the bottom clipped lines.
for (auto i = 0; i < clip.bottom; ++i) {
skipRestOfTheLine(Width);
}
int8_t prefix = InitPrefix<Mask>(clip.bottom);
for (auto i = 0; i < clip.height; ++i, dst -= dstPitch + clip.width) {
auto drawWidth = clip.width;
// Skip initial src if clipping on the left.
// Handles overshoot, i.e. when the RLE segment goes into the unclipped area.
auto remainingLeftClip = clip.left;
while (remainingLeftClip > 0) {
auto v = static_cast<int8_t>(*src++);
if (v > 0) {
if (v > remainingLeftClip) {
const auto overshoot = v - remainingLeftClip;
RenderLine<Light, Mask>(dst, src + remainingLeftClip, overshoot, tbl, prefix - (Width - remainingLeftClip));
dst += overshoot;
drawWidth -= overshoot;
}
src += v;
} else {
v = -v;
if (v > remainingLeftClip) {
const auto overshoot = v - remainingLeftClip;
dst += overshoot;
drawWidth -= overshoot;
}
}
remainingLeftClip -= v;
}
// Draw the non-clipped segment
while (drawWidth > 0) {
auto v = static_cast<int8_t>(*src++);
if (v > 0) {
if (v > drawWidth) {
RenderLine<Light, Mask>(dst, src, drawWidth, tbl, prefix - (Width - drawWidth));
src += v;
dst += drawWidth;
drawWidth -= v;
break;
}
RenderLine<Light, Mask>(dst, src, v, tbl, prefix - (Width - drawWidth));
src += v;
} else {
v = -v;
if (v > drawWidth) {
dst += drawWidth;
drawWidth -= v;
break;
}
}
dst += v;
drawWidth -= v;
}
// Skip the rest of src line if clipping on the right
assert(drawWidth <= 0);
skipRestOfTheLine(clip.right + drawWidth);
prefix += PrefixIncrement<Mask>;
}
}
template <LightType Light, MaskType Mask>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTransparentSquare(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
{
if (clip.width == Width && clip.bottom == 0 && clip.top == 0) {
RenderTransparentSquareFull<Light, Mask>(dst, dstPitch, src, tbl, clip.height);
} else {
RenderTransparentSquareClipped<Light, Mask>(dst, dstPitch, src, tbl, clip);
}
}
/** Vertical clip for the lower and upper triangles of a diamond tile (L/RTRIANGLE).*/
struct DiamondClipY {
int_fast16_t lowerBottom;
int_fast16_t lowerTop;
int_fast16_t upperBottom;
int_fast16_t upperTop;
};
template <int_fast16_t UpperHeight = TriangleUpperHeight>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT DiamondClipY CalculateDiamondClipY(const Clip &clip)
{
DiamondClipY result;
if (clip.bottom > LowerHeight) {
result.lowerBottom = LowerHeight;
result.upperBottom = clip.bottom - LowerHeight;
result.lowerTop = result.upperTop = 0;
} else if (clip.top > UpperHeight) {
result.upperTop = UpperHeight;
result.lowerTop = clip.top - UpperHeight;
result.upperBottom = result.lowerBottom = 0;
} else {
result.upperTop = clip.top;
result.lowerBottom = clip.bottom;
result.lowerTop = result.upperBottom = 0;
}
return result;
}
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT std::size_t CalculateTriangleSourceSkipLowerBottom(int_fast16_t numLines)
{
return XStep * numLines * (numLines + 1) / 2;
}
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT std::size_t CalculateTriangleSourceSkipUpperBottom(int_fast16_t numLines)
{
return 2 * TriangleUpperHeight * numLines - numLines * (numLines - 1);
}
dun_render: Unroll triangle loops Rather than relying on the compiler to do it, which doesn't always happen, we do it by hand. Previously, very slightly different versions of the code could result in those loops not being unrolled (such as in the current master). I've run the benchmark like this: ```bash BASELINE=dun-benchmark BENCHMARK=dun_render_benchmark git checkout "$BASELINE" tools/build_and_run_benchmark.py -B "build-reld-${BASELINE}" --no-run "$BENCHMARK" git checkout - tools/build_and_run_benchmark.py --no-run "$BENCHMARK" tools/linux_reduced_cpu_variance_run.sh ~/google-benchmark/tools/compare.py -a benchmarks \ "build-reld-${BASELINE}/${BENCHMARK}" "build-reld/${BENCHMARK}" \ --benchmark_repetitions=10 ``` Benchmark results are below. The `FullyLit` calls are ~55% faster. The `PartiallyLit` calls are ~40% faster. The `FullyDark` version is twice as slow, which is surprising. I have a separate idea about eliminating most of the `FullyDark` calls entirely. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ------------------------------------------------------------------------------------------------------------------------------------------------------------------- DunRenderBenchmark/LeftTriangle_Solid_FullyLit_pvalue 0.0004 0.0004 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Solid_FullyLit_mean -0.5634 -0.5634 188036 82095 188014 82080 DunRenderBenchmark/LeftTriangle_Solid_FullyLit_median -0.5601 -0.5601 186905 82218 186884 82202 DunRenderBenchmark/LeftTriangle_Solid_FullyLit_stddev -0.6274 -0.6274 61414 22883 61402 22879 DunRenderBenchmark/LeftTriangle_Solid_FullyLit_cv -0.1465 -0.1465 0 0 0 0 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_pvalue 0.0013 0.0013 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_mean +0.9781 +0.9781 238021 470823 237988 470757 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_median +0.9733 +0.9733 238086 469812 238050 469752 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_stddev +0.9791 +0.9790 74987 148403 74978 148383 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_cv +0.0005 +0.0005 0 0 0 0 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_pvalue 0.0113 0.0113 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_mean -0.3891 -0.3891 984208 601272 984080 601180 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_median -0.3931 -0.3931 980791 595272 980682 595188 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_stddev -0.3950 -0.3950 316631 191562 316575 191533 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_cv -0.0097 -0.0096 0 0 0 0 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_pvalue 0.6776 0.6776 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_mean -0.0802 -0.0802 995379 915591 995236 915450 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_median -0.0816 -0.0817 994452 913256 994299 913112 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_stddev -0.0916 -0.0915 319114 289898 319065 289864 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_cv -0.0124 -0.0123 0 0 0 0 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_mean -0.3875 -0.3875 1001347 613375 1001222 613282 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_median -0.3830 -0.3830 998801 616295 998628 616194 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_stddev -0.3901 -0.3901 320998 195778 320963 195747 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_cv -0.0043 -0.0043 0 0 0 0 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_mean +0.0046 +0.0046 1014824 1019541 1014707 1019386 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_median +0.0032 +0.0032 1015895 1019161 1015801 1019001 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_stddev +0.0037 +0.0036 322403 323590 322370 323538 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_cv -0.0010 -0.0010 0 0 0 0 DunRenderBenchmark/RightTriangle_Solid_FullyLit_pvalue 0.0006 0.0006 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Solid_FullyLit_mean -0.5523 -0.5524 176362 78951 176340 78938 DunRenderBenchmark/RightTriangle_Solid_FullyLit_median -0.5514 -0.5514 175837 78875 175814 78865 DunRenderBenchmark/RightTriangle_Solid_FullyLit_stddev -0.5884 -0.5884 55614 22891 55607 22888 DunRenderBenchmark/RightTriangle_Solid_FullyLit_cv -0.0805 -0.0805 0 0 0 0 DunRenderBenchmark/RightTriangle_Solid_FullyDark_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Solid_FullyDark_mean +1.9896 +1.9896 161841 483845 161819 483767 DunRenderBenchmark/RightTriangle_Solid_FullyDark_median +1.9924 +1.9924 161742 483991 161715 483917 DunRenderBenchmark/RightTriangle_Solid_FullyDark_stddev +2.0012 +2.0010 51356 154129 51351 154107 DunRenderBenchmark/RightTriangle_Solid_FullyDark_cv +0.0039 +0.0038 0 0 0 0 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_pvalue 0.0058 0.0058 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_mean -0.4247 -0.4247 1046216 601892 1046042 601809 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_median -0.4249 -0.4249 1053545 605870 1053349 605785 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_stddev -0.4174 -0.4174 330051 192291 330001 192261 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_cv +0.0127 +0.0127 0 0 0 0 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_pvalue 0.3847 0.3847 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_mean -0.1217 -0.1217 1050082 922274 1049928 922136 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_median -0.1226 -0.1226 1047556 919087 1047400 918953 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_stddev -0.0833 -0.0833 324195 297188 324156 297143 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_cv +0.0437 +0.0437 0 0 0 0 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_mean -0.4253 -0.4253 1047974 602234 1047816 602137 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_median -0.4267 -0.4267 1051985 603146 1051802 603042 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_stddev -0.4276 -0.4276 334355 191387 334304 191363 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_cv -0.0039 -0.0039 0 0 0 0 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_pvalue 0.7913 0.7913 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_mean -0.0137 -0.0138 1059054 1044492 1058910 1044325 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_median -0.0178 -0.0179 1058596 1039738 1058508 1039571 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_stddev +0.0031 +0.0031 337359 338411 337309 338362 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_cv +0.0171 +0.0171 0 0 0 0 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_pvalue 0.9097 0.9097 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_mean +0.0011 +0.0012 1493697 1495394 1493444 1495169 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_median -0.0017 -0.0017 1496838 1494238 1496604 1494029 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_stddev +0.0021 +0.0020 530878 531975 530805 531882 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_cv +0.0009 +0.0009 0 0 0 0 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_mean -0.0204 -0.0204 1433093 1403907 1432881 1403668 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_median -0.0183 -0.0183 1429205 1403083 1428989 1402880 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_stddev -0.0096 -0.0097 505699 500858 505631 500743 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_cv +0.0110 +0.0109 0 0 0 0 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_pvalue 0.0539 0.0539 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_mean -0.3176 -0.3176 2382675 1626018 2382333 1625760 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_median -0.3198 -0.3198 2403842 1635148 2403483 1634913 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_stddev -0.3148 -0.3149 827423 566929 827319 566830 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_cv +0.0040 +0.0040 0 0 0 0 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_pvalue 0.5708 0.5708 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_mean -0.0873 -0.0873 2175867 1985983 2175589 1985685 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_median -0.0631 -0.0631 2157339 2021156 2157036 2020874 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_stddev -0.1511 -0.1511 793554 673670 793451 673555 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_cv -0.0699 -0.0699 0 0 0 0 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_pvalue 0.0757 0.0757 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_mean -0.2913 -0.2913 2168904 1537149 2168665 1536932 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_median -0.2906 -0.2906 2169825 1539371 2169553 1539149 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_stddev -0.2952 -0.2952 773577 545208 773499 545150 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_cv -0.0055 -0.0055 0 0 0 0 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_mean -0.0228 -0.0227 2215243 2164785 2214801 2164528 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_median -0.0105 -0.0105 2206971 2183841 2206650 2183583 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_stddev -0.0493 -0.0492 794968 755773 794792 755680 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_cv -0.0271 -0.0271 0 0 0 0 DunRenderBenchmark/Square_Solid_FullyLit_pvalue 0.4274 0.4274 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Solid_FullyLit_mean -0.1018 -0.1018 106111 95306 106093 95293 DunRenderBenchmark/Square_Solid_FullyLit_median -0.1039 -0.1039 106890 95786 106879 95776 DunRenderBenchmark/Square_Solid_FullyLit_stddev -0.1415 -0.1414 31405 26959 31394 26956 DunRenderBenchmark/Square_Solid_FullyLit_cv -0.0442 -0.0440 0 0 0 0 DunRenderBenchmark/Square_Solid_FullyDark_pvalue 0.6232 0.6232 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Solid_FullyDark_mean -0.0624 -0.0623 76148 71400 76137 71390 DunRenderBenchmark/Square_Solid_FullyDark_median -0.0533 -0.0533 75419 71398 75409 71388 DunRenderBenchmark/Square_Solid_FullyDark_stddev -0.1158 -0.1158 23287 20590 23284 20587 DunRenderBenchmark/Square_Solid_FullyDark_cv -0.0570 -0.0570 0 0 0 0 DunRenderBenchmark/Square_Solid_PartiallyLit_pvalue 0.0140 0.0140 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Solid_PartiallyLit_mean -0.4191 -0.4191 1856509 1078451 1856141 1078317 DunRenderBenchmark/Square_Solid_PartiallyLit_median -0.4220 -0.4220 1865303 1078075 1864830 1077921 DunRenderBenchmark/Square_Solid_PartiallyLit_stddev -0.4194 -0.4193 661581 384134 661390 384090 DunRenderBenchmark/Square_Solid_PartiallyLit_cv -0.0005 -0.0004 0 0 0 0 DunRenderBenchmark/Square_Transparent_FullyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Transparent_FullyLit_mean -0.0599 -0.0599 1777629 1671062 1777373 1670858 DunRenderBenchmark/Square_Transparent_FullyLit_median -0.0599 -0.0599 1777353 1670891 1777080 1670709 DunRenderBenchmark/Square_Transparent_FullyLit_stddev -0.0605 -0.0605 633282 594964 633184 594903 DunRenderBenchmark/Square_Transparent_FullyLit_cv -0.0006 -0.0006 0 0 0 0 DunRenderBenchmark/Square_Transparent_FullyDark_pvalue 0.0211 0.0211 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Transparent_FullyDark_mean -0.3763 -0.3762 1772392 1105501 1772116 1105383 DunRenderBenchmark/Square_Transparent_FullyDark_median -0.3766 -0.3766 1772282 1104753 1772089 1104637 DunRenderBenchmark/Square_Transparent_FullyDark_stddev -0.4402 -0.4402 631014 353260 630951 353231 DunRenderBenchmark/Square_Transparent_FullyDark_cv -0.1025 -0.1025 0 0 0 0 DunRenderBenchmark/Square_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Transparent_PartiallyLit_mean -0.0272 -0.0271 1970857 1917285 1970523 1917053 DunRenderBenchmark/Square_Transparent_PartiallyLit_median -0.0268 -0.0267 1970258 1917452 1969801 1917178 DunRenderBenchmark/Square_Transparent_PartiallyLit_stddev -0.0294 -0.0293 703415 682714 703280 682642 DunRenderBenchmark/Square_Transparent_PartiallyLit_cv -0.0023 -0.0023 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_pvalue 0.0004 0.0004 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_mean -0.5359 -0.5359 59253 27500 59242 27497 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_median -0.5336 -0.5336 58982 27509 58975 27506 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_stddev -0.5374 -0.5373 17150 7933 17144 7932 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_cv -0.0034 -0.0032 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_mean +0.7013 +0.7013 56973 96926 56963 96913 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_median +0.6907 +0.6908 57326 96921 57317 96909 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_stddev +0.8879 +0.8880 16361 30888 16358 30885 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_cv +0.1097 +0.1098 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_mean -0.3988 -0.3988 511666 307603 511589 307567 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_median -0.3993 -0.3993 511913 307523 511851 307491 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_stddev -0.3971 -0.3971 162799 98150 162774 98139 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_cv +0.0028 +0.0029 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_pvalue 0.6232 0.6232 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_mean -0.0820 -0.0820 514358 472170 514290 472112 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_median -0.0821 -0.0821 514335 472090 514264 472018 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_stddev -0.0828 -0.0828 163969 150389 163949 150372 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_cv -0.0009 -0.0009 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_pvalue 0.0058 0.0058 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_mean -0.4180 -0.4180 512755 298434 512692 298398 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_median -0.4187 -0.4187 513116 298290 513049 298258 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_stddev -0.4181 -0.4181 163460 95123 163439 95112 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_cv -0.0001 -0.0001 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_mean +0.0149 +0.0149 544744 552873 544676 552807 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_median +0.0167 +0.0167 544823 553899 544761 553834 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_stddev +0.0110 +0.0110 173683 175592 173672 175580 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_cv -0.0039 -0.0039 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_pvalue 0.0010 0.0010 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_mean -0.4654 -0.4654 51804 27693 51798 27690 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_median -0.4648 -0.4648 51787 27718 51779 27713 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_stddev -0.4702 -0.4702 14885 7886 14884 7885 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_cv -0.0090 -0.0090 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_mean +1.3529 +1.3529 43904 103301 43898 103287 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_median +1.3371 +1.3372 44199 103297 44192 103285 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_stddev +1.4168 +1.4167 12350 29848 12349 29843 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_cv +0.0272 +0.0271 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_mean -0.3977 -0.3977 493458 297225 493385 297188 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_median -0.3973 -0.3973 493440 297377 493364 297342 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_stddev -0.3965 -0.3965 157323 94947 157300 94934 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_cv +0.0020 +0.0020 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_mean -0.0668 -0.0668 501015 467552 500947 467496 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_median -0.0661 -0.0661 500672 467557 500607 467504 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_stddev -0.0641 -0.0642 159233 149019 159215 148998 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_cv +0.0028 +0.0028 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_mean -0.4295 -0.4295 494445 282076 494382 282043 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_median -0.4300 -0.4300 494497 281875 494434 281841 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_stddev -0.4287 -0.4287 157383 89911 157370 89902 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_cv +0.0014 +0.0014 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_mean -0.0574 -0.0573 556031 524113 555924 524044 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_median -0.0758 -0.0757 565940 523059 565802 522984 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_stddev -0.1094 -0.1093 186798 166360 186758 166346 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_cv -0.0552 -0.0551 0 0 0 0 OVERALL_GEOMEAN -0.1671 -0.1671 0 0 0 0 ```
2 years ago
template <LightType Light, bool Transparent>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTriangleLower(uint8_t *DVL_RESTRICT &dst, ptrdiff_t dstLineOffset, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl)
{
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 0 * dstLineOffset, src + 0, 2, tbl);
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 1 * dstLineOffset, src + 2, 4, tbl);
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 2 * dstLineOffset, src + 6, 6, tbl);
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 3 * dstLineOffset, src + 12, 8, tbl);
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 4 * dstLineOffset, src + 20, 10, tbl);
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 5 * dstLineOffset, src + 30, 12, tbl);
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 6 * dstLineOffset, src + 42, 14, tbl);
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 7 * dstLineOffset, src + 56, 16, tbl);
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 8 * dstLineOffset, src + 72, 18, tbl);
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 9 * dstLineOffset, src + 90, 20, tbl);
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 10 * dstLineOffset, src + 110, 22, tbl);
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 11 * dstLineOffset, src + 132, 24, tbl);
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 12 * dstLineOffset, src + 156, 26, tbl);
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 13 * dstLineOffset, src + 182, 28, tbl);
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 14 * dstLineOffset, src + 210, 30, tbl);
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 15 * dstLineOffset, src + 240, 32, tbl);
src += 272;
dst -= 16 * dstLineOffset;
}
template <>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTriangleLower<LightType::FullyDark, /*Transparent=*/false>(uint8_t *DVL_RESTRICT &dst, ptrdiff_t dstLineOffset, const uint8_t *DVL_RESTRICT &src, [[maybe_unused]] const uint8_t *DVL_RESTRICT tbl)
{
unsigned width = XStep;
for (unsigned i = 0; i < LowerHeight; ++i) {
BlitFillDirect(dst, width, 0);
dst -= dstLineOffset;
width += XStep;
}
src += 272;
}
dun_render: Unroll triangle loops Rather than relying on the compiler to do it, which doesn't always happen, we do it by hand. Previously, very slightly different versions of the code could result in those loops not being unrolled (such as in the current master). I've run the benchmark like this: ```bash BASELINE=dun-benchmark BENCHMARK=dun_render_benchmark git checkout "$BASELINE" tools/build_and_run_benchmark.py -B "build-reld-${BASELINE}" --no-run "$BENCHMARK" git checkout - tools/build_and_run_benchmark.py --no-run "$BENCHMARK" tools/linux_reduced_cpu_variance_run.sh ~/google-benchmark/tools/compare.py -a benchmarks \ "build-reld-${BASELINE}/${BENCHMARK}" "build-reld/${BENCHMARK}" \ --benchmark_repetitions=10 ``` Benchmark results are below. The `FullyLit` calls are ~55% faster. The `PartiallyLit` calls are ~40% faster. The `FullyDark` version is twice as slow, which is surprising. I have a separate idea about eliminating most of the `FullyDark` calls entirely. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ------------------------------------------------------------------------------------------------------------------------------------------------------------------- DunRenderBenchmark/LeftTriangle_Solid_FullyLit_pvalue 0.0004 0.0004 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Solid_FullyLit_mean -0.5634 -0.5634 188036 82095 188014 82080 DunRenderBenchmark/LeftTriangle_Solid_FullyLit_median -0.5601 -0.5601 186905 82218 186884 82202 DunRenderBenchmark/LeftTriangle_Solid_FullyLit_stddev -0.6274 -0.6274 61414 22883 61402 22879 DunRenderBenchmark/LeftTriangle_Solid_FullyLit_cv -0.1465 -0.1465 0 0 0 0 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_pvalue 0.0013 0.0013 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_mean +0.9781 +0.9781 238021 470823 237988 470757 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_median +0.9733 +0.9733 238086 469812 238050 469752 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_stddev +0.9791 +0.9790 74987 148403 74978 148383 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_cv +0.0005 +0.0005 0 0 0 0 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_pvalue 0.0113 0.0113 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_mean -0.3891 -0.3891 984208 601272 984080 601180 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_median -0.3931 -0.3931 980791 595272 980682 595188 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_stddev -0.3950 -0.3950 316631 191562 316575 191533 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_cv -0.0097 -0.0096 0 0 0 0 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_pvalue 0.6776 0.6776 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_mean -0.0802 -0.0802 995379 915591 995236 915450 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_median -0.0816 -0.0817 994452 913256 994299 913112 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_stddev -0.0916 -0.0915 319114 289898 319065 289864 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_cv -0.0124 -0.0123 0 0 0 0 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_mean -0.3875 -0.3875 1001347 613375 1001222 613282 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_median -0.3830 -0.3830 998801 616295 998628 616194 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_stddev -0.3901 -0.3901 320998 195778 320963 195747 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_cv -0.0043 -0.0043 0 0 0 0 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_mean +0.0046 +0.0046 1014824 1019541 1014707 1019386 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_median +0.0032 +0.0032 1015895 1019161 1015801 1019001 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_stddev +0.0037 +0.0036 322403 323590 322370 323538 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_cv -0.0010 -0.0010 0 0 0 0 DunRenderBenchmark/RightTriangle_Solid_FullyLit_pvalue 0.0006 0.0006 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Solid_FullyLit_mean -0.5523 -0.5524 176362 78951 176340 78938 DunRenderBenchmark/RightTriangle_Solid_FullyLit_median -0.5514 -0.5514 175837 78875 175814 78865 DunRenderBenchmark/RightTriangle_Solid_FullyLit_stddev -0.5884 -0.5884 55614 22891 55607 22888 DunRenderBenchmark/RightTriangle_Solid_FullyLit_cv -0.0805 -0.0805 0 0 0 0 DunRenderBenchmark/RightTriangle_Solid_FullyDark_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Solid_FullyDark_mean +1.9896 +1.9896 161841 483845 161819 483767 DunRenderBenchmark/RightTriangle_Solid_FullyDark_median +1.9924 +1.9924 161742 483991 161715 483917 DunRenderBenchmark/RightTriangle_Solid_FullyDark_stddev +2.0012 +2.0010 51356 154129 51351 154107 DunRenderBenchmark/RightTriangle_Solid_FullyDark_cv +0.0039 +0.0038 0 0 0 0 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_pvalue 0.0058 0.0058 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_mean -0.4247 -0.4247 1046216 601892 1046042 601809 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_median -0.4249 -0.4249 1053545 605870 1053349 605785 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_stddev -0.4174 -0.4174 330051 192291 330001 192261 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_cv +0.0127 +0.0127 0 0 0 0 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_pvalue 0.3847 0.3847 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_mean -0.1217 -0.1217 1050082 922274 1049928 922136 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_median -0.1226 -0.1226 1047556 919087 1047400 918953 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_stddev -0.0833 -0.0833 324195 297188 324156 297143 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_cv +0.0437 +0.0437 0 0 0 0 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_mean -0.4253 -0.4253 1047974 602234 1047816 602137 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_median -0.4267 -0.4267 1051985 603146 1051802 603042 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_stddev -0.4276 -0.4276 334355 191387 334304 191363 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_cv -0.0039 -0.0039 0 0 0 0 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_pvalue 0.7913 0.7913 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_mean -0.0137 -0.0138 1059054 1044492 1058910 1044325 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_median -0.0178 -0.0179 1058596 1039738 1058508 1039571 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_stddev +0.0031 +0.0031 337359 338411 337309 338362 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_cv +0.0171 +0.0171 0 0 0 0 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_pvalue 0.9097 0.9097 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_mean +0.0011 +0.0012 1493697 1495394 1493444 1495169 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_median -0.0017 -0.0017 1496838 1494238 1496604 1494029 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_stddev +0.0021 +0.0020 530878 531975 530805 531882 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_cv +0.0009 +0.0009 0 0 0 0 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_mean -0.0204 -0.0204 1433093 1403907 1432881 1403668 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_median -0.0183 -0.0183 1429205 1403083 1428989 1402880 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_stddev -0.0096 -0.0097 505699 500858 505631 500743 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_cv +0.0110 +0.0109 0 0 0 0 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_pvalue 0.0539 0.0539 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_mean -0.3176 -0.3176 2382675 1626018 2382333 1625760 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_median -0.3198 -0.3198 2403842 1635148 2403483 1634913 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_stddev -0.3148 -0.3149 827423 566929 827319 566830 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_cv +0.0040 +0.0040 0 0 0 0 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_pvalue 0.5708 0.5708 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_mean -0.0873 -0.0873 2175867 1985983 2175589 1985685 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_median -0.0631 -0.0631 2157339 2021156 2157036 2020874 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_stddev -0.1511 -0.1511 793554 673670 793451 673555 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_cv -0.0699 -0.0699 0 0 0 0 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_pvalue 0.0757 0.0757 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_mean -0.2913 -0.2913 2168904 1537149 2168665 1536932 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_median -0.2906 -0.2906 2169825 1539371 2169553 1539149 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_stddev -0.2952 -0.2952 773577 545208 773499 545150 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_cv -0.0055 -0.0055 0 0 0 0 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_mean -0.0228 -0.0227 2215243 2164785 2214801 2164528 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_median -0.0105 -0.0105 2206971 2183841 2206650 2183583 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_stddev -0.0493 -0.0492 794968 755773 794792 755680 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_cv -0.0271 -0.0271 0 0 0 0 DunRenderBenchmark/Square_Solid_FullyLit_pvalue 0.4274 0.4274 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Solid_FullyLit_mean -0.1018 -0.1018 106111 95306 106093 95293 DunRenderBenchmark/Square_Solid_FullyLit_median -0.1039 -0.1039 106890 95786 106879 95776 DunRenderBenchmark/Square_Solid_FullyLit_stddev -0.1415 -0.1414 31405 26959 31394 26956 DunRenderBenchmark/Square_Solid_FullyLit_cv -0.0442 -0.0440 0 0 0 0 DunRenderBenchmark/Square_Solid_FullyDark_pvalue 0.6232 0.6232 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Solid_FullyDark_mean -0.0624 -0.0623 76148 71400 76137 71390 DunRenderBenchmark/Square_Solid_FullyDark_median -0.0533 -0.0533 75419 71398 75409 71388 DunRenderBenchmark/Square_Solid_FullyDark_stddev -0.1158 -0.1158 23287 20590 23284 20587 DunRenderBenchmark/Square_Solid_FullyDark_cv -0.0570 -0.0570 0 0 0 0 DunRenderBenchmark/Square_Solid_PartiallyLit_pvalue 0.0140 0.0140 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Solid_PartiallyLit_mean -0.4191 -0.4191 1856509 1078451 1856141 1078317 DunRenderBenchmark/Square_Solid_PartiallyLit_median -0.4220 -0.4220 1865303 1078075 1864830 1077921 DunRenderBenchmark/Square_Solid_PartiallyLit_stddev -0.4194 -0.4193 661581 384134 661390 384090 DunRenderBenchmark/Square_Solid_PartiallyLit_cv -0.0005 -0.0004 0 0 0 0 DunRenderBenchmark/Square_Transparent_FullyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Transparent_FullyLit_mean -0.0599 -0.0599 1777629 1671062 1777373 1670858 DunRenderBenchmark/Square_Transparent_FullyLit_median -0.0599 -0.0599 1777353 1670891 1777080 1670709 DunRenderBenchmark/Square_Transparent_FullyLit_stddev -0.0605 -0.0605 633282 594964 633184 594903 DunRenderBenchmark/Square_Transparent_FullyLit_cv -0.0006 -0.0006 0 0 0 0 DunRenderBenchmark/Square_Transparent_FullyDark_pvalue 0.0211 0.0211 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Transparent_FullyDark_mean -0.3763 -0.3762 1772392 1105501 1772116 1105383 DunRenderBenchmark/Square_Transparent_FullyDark_median -0.3766 -0.3766 1772282 1104753 1772089 1104637 DunRenderBenchmark/Square_Transparent_FullyDark_stddev -0.4402 -0.4402 631014 353260 630951 353231 DunRenderBenchmark/Square_Transparent_FullyDark_cv -0.1025 -0.1025 0 0 0 0 DunRenderBenchmark/Square_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Transparent_PartiallyLit_mean -0.0272 -0.0271 1970857 1917285 1970523 1917053 DunRenderBenchmark/Square_Transparent_PartiallyLit_median -0.0268 -0.0267 1970258 1917452 1969801 1917178 DunRenderBenchmark/Square_Transparent_PartiallyLit_stddev -0.0294 -0.0293 703415 682714 703280 682642 DunRenderBenchmark/Square_Transparent_PartiallyLit_cv -0.0023 -0.0023 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_pvalue 0.0004 0.0004 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_mean -0.5359 -0.5359 59253 27500 59242 27497 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_median -0.5336 -0.5336 58982 27509 58975 27506 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_stddev -0.5374 -0.5373 17150 7933 17144 7932 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_cv -0.0034 -0.0032 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_mean +0.7013 +0.7013 56973 96926 56963 96913 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_median +0.6907 +0.6908 57326 96921 57317 96909 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_stddev +0.8879 +0.8880 16361 30888 16358 30885 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_cv +0.1097 +0.1098 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_mean -0.3988 -0.3988 511666 307603 511589 307567 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_median -0.3993 -0.3993 511913 307523 511851 307491 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_stddev -0.3971 -0.3971 162799 98150 162774 98139 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_cv +0.0028 +0.0029 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_pvalue 0.6232 0.6232 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_mean -0.0820 -0.0820 514358 472170 514290 472112 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_median -0.0821 -0.0821 514335 472090 514264 472018 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_stddev -0.0828 -0.0828 163969 150389 163949 150372 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_cv -0.0009 -0.0009 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_pvalue 0.0058 0.0058 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_mean -0.4180 -0.4180 512755 298434 512692 298398 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_median -0.4187 -0.4187 513116 298290 513049 298258 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_stddev -0.4181 -0.4181 163460 95123 163439 95112 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_cv -0.0001 -0.0001 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_mean +0.0149 +0.0149 544744 552873 544676 552807 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_median +0.0167 +0.0167 544823 553899 544761 553834 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_stddev +0.0110 +0.0110 173683 175592 173672 175580 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_cv -0.0039 -0.0039 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_pvalue 0.0010 0.0010 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_mean -0.4654 -0.4654 51804 27693 51798 27690 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_median -0.4648 -0.4648 51787 27718 51779 27713 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_stddev -0.4702 -0.4702 14885 7886 14884 7885 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_cv -0.0090 -0.0090 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_mean +1.3529 +1.3529 43904 103301 43898 103287 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_median +1.3371 +1.3372 44199 103297 44192 103285 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_stddev +1.4168 +1.4167 12350 29848 12349 29843 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_cv +0.0272 +0.0271 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_mean -0.3977 -0.3977 493458 297225 493385 297188 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_median -0.3973 -0.3973 493440 297377 493364 297342 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_stddev -0.3965 -0.3965 157323 94947 157300 94934 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_cv +0.0020 +0.0020 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_mean -0.0668 -0.0668 501015 467552 500947 467496 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_median -0.0661 -0.0661 500672 467557 500607 467504 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_stddev -0.0641 -0.0642 159233 149019 159215 148998 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_cv +0.0028 +0.0028 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_mean -0.4295 -0.4295 494445 282076 494382 282043 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_median -0.4300 -0.4300 494497 281875 494434 281841 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_stddev -0.4287 -0.4287 157383 89911 157370 89902 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_cv +0.0014 +0.0014 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_mean -0.0574 -0.0573 556031 524113 555924 524044 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_median -0.0758 -0.0757 565940 523059 565802 522984 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_stddev -0.1094 -0.1093 186798 166360 186758 166346 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_cv -0.0552 -0.0551 0 0 0 0 OVERALL_GEOMEAN -0.1671 -0.1671 0 0 0 0 ```
2 years ago
template <LightType Light, bool Transparent>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTriangleUpper(uint8_t *DVL_RESTRICT dst, ptrdiff_t dstLineOffset, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl)
{
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 0 * dstLineOffset, src + 0, 30, tbl);
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 1 * dstLineOffset, src + 30, 28, tbl);
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 2 * dstLineOffset, src + 58, 26, tbl);
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 3 * dstLineOffset, src + 84, 24, tbl);
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 4 * dstLineOffset, src + 108, 22, tbl);
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 5 * dstLineOffset, src + 130, 20, tbl);
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 6 * dstLineOffset, src + 150, 18, tbl);
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 7 * dstLineOffset, src + 168, 16, tbl);
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 8 * dstLineOffset, src + 184, 14, tbl);
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 9 * dstLineOffset, src + 198, 12, tbl);
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 10 * dstLineOffset, src + 210, 10, tbl);
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 11 * dstLineOffset, src + 220, 8, tbl);
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 12 * dstLineOffset, src + 228, 6, tbl);
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 13 * dstLineOffset, src + 234, 4, tbl);
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 14 * dstLineOffset, src + 238, 2, tbl);
}
template <>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTriangleUpper<LightType::FullyDark, /*Transparent=*/false>(uint8_t *DVL_RESTRICT dst, ptrdiff_t dstLineOffset, [[maybe_unused]] const uint8_t *DVL_RESTRICT src, [[maybe_unused]] const uint8_t *DVL_RESTRICT tbl)
{
unsigned width = Width - XStep;
for (unsigned i = 0; i < TriangleUpperHeight; ++i) {
BlitFillDirect(dst, width, 0);
dst -= dstLineOffset;
width -= XStep;
}
}
template <LightType Light, bool Transparent>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleLower(uint8_t *DVL_RESTRICT &dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl)
{
dst += XStep * (LowerHeight - 1);
dun_render: Unroll triangle loops Rather than relying on the compiler to do it, which doesn't always happen, we do it by hand. Previously, very slightly different versions of the code could result in those loops not being unrolled (such as in the current master). I've run the benchmark like this: ```bash BASELINE=dun-benchmark BENCHMARK=dun_render_benchmark git checkout "$BASELINE" tools/build_and_run_benchmark.py -B "build-reld-${BASELINE}" --no-run "$BENCHMARK" git checkout - tools/build_and_run_benchmark.py --no-run "$BENCHMARK" tools/linux_reduced_cpu_variance_run.sh ~/google-benchmark/tools/compare.py -a benchmarks \ "build-reld-${BASELINE}/${BENCHMARK}" "build-reld/${BENCHMARK}" \ --benchmark_repetitions=10 ``` Benchmark results are below. The `FullyLit` calls are ~55% faster. The `PartiallyLit` calls are ~40% faster. The `FullyDark` version is twice as slow, which is surprising. I have a separate idea about eliminating most of the `FullyDark` calls entirely. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ------------------------------------------------------------------------------------------------------------------------------------------------------------------- DunRenderBenchmark/LeftTriangle_Solid_FullyLit_pvalue 0.0004 0.0004 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Solid_FullyLit_mean -0.5634 -0.5634 188036 82095 188014 82080 DunRenderBenchmark/LeftTriangle_Solid_FullyLit_median -0.5601 -0.5601 186905 82218 186884 82202 DunRenderBenchmark/LeftTriangle_Solid_FullyLit_stddev -0.6274 -0.6274 61414 22883 61402 22879 DunRenderBenchmark/LeftTriangle_Solid_FullyLit_cv -0.1465 -0.1465 0 0 0 0 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_pvalue 0.0013 0.0013 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_mean +0.9781 +0.9781 238021 470823 237988 470757 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_median +0.9733 +0.9733 238086 469812 238050 469752 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_stddev +0.9791 +0.9790 74987 148403 74978 148383 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_cv +0.0005 +0.0005 0 0 0 0 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_pvalue 0.0113 0.0113 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_mean -0.3891 -0.3891 984208 601272 984080 601180 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_median -0.3931 -0.3931 980791 595272 980682 595188 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_stddev -0.3950 -0.3950 316631 191562 316575 191533 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_cv -0.0097 -0.0096 0 0 0 0 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_pvalue 0.6776 0.6776 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_mean -0.0802 -0.0802 995379 915591 995236 915450 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_median -0.0816 -0.0817 994452 913256 994299 913112 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_stddev -0.0916 -0.0915 319114 289898 319065 289864 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_cv -0.0124 -0.0123 0 0 0 0 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_mean -0.3875 -0.3875 1001347 613375 1001222 613282 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_median -0.3830 -0.3830 998801 616295 998628 616194 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_stddev -0.3901 -0.3901 320998 195778 320963 195747 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_cv -0.0043 -0.0043 0 0 0 0 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_mean +0.0046 +0.0046 1014824 1019541 1014707 1019386 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_median +0.0032 +0.0032 1015895 1019161 1015801 1019001 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_stddev +0.0037 +0.0036 322403 323590 322370 323538 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_cv -0.0010 -0.0010 0 0 0 0 DunRenderBenchmark/RightTriangle_Solid_FullyLit_pvalue 0.0006 0.0006 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Solid_FullyLit_mean -0.5523 -0.5524 176362 78951 176340 78938 DunRenderBenchmark/RightTriangle_Solid_FullyLit_median -0.5514 -0.5514 175837 78875 175814 78865 DunRenderBenchmark/RightTriangle_Solid_FullyLit_stddev -0.5884 -0.5884 55614 22891 55607 22888 DunRenderBenchmark/RightTriangle_Solid_FullyLit_cv -0.0805 -0.0805 0 0 0 0 DunRenderBenchmark/RightTriangle_Solid_FullyDark_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Solid_FullyDark_mean +1.9896 +1.9896 161841 483845 161819 483767 DunRenderBenchmark/RightTriangle_Solid_FullyDark_median +1.9924 +1.9924 161742 483991 161715 483917 DunRenderBenchmark/RightTriangle_Solid_FullyDark_stddev +2.0012 +2.0010 51356 154129 51351 154107 DunRenderBenchmark/RightTriangle_Solid_FullyDark_cv +0.0039 +0.0038 0 0 0 0 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_pvalue 0.0058 0.0058 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_mean -0.4247 -0.4247 1046216 601892 1046042 601809 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_median -0.4249 -0.4249 1053545 605870 1053349 605785 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_stddev -0.4174 -0.4174 330051 192291 330001 192261 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_cv +0.0127 +0.0127 0 0 0 0 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_pvalue 0.3847 0.3847 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_mean -0.1217 -0.1217 1050082 922274 1049928 922136 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_median -0.1226 -0.1226 1047556 919087 1047400 918953 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_stddev -0.0833 -0.0833 324195 297188 324156 297143 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_cv +0.0437 +0.0437 0 0 0 0 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_mean -0.4253 -0.4253 1047974 602234 1047816 602137 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_median -0.4267 -0.4267 1051985 603146 1051802 603042 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_stddev -0.4276 -0.4276 334355 191387 334304 191363 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_cv -0.0039 -0.0039 0 0 0 0 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_pvalue 0.7913 0.7913 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_mean -0.0137 -0.0138 1059054 1044492 1058910 1044325 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_median -0.0178 -0.0179 1058596 1039738 1058508 1039571 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_stddev +0.0031 +0.0031 337359 338411 337309 338362 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_cv +0.0171 +0.0171 0 0 0 0 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_pvalue 0.9097 0.9097 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_mean +0.0011 +0.0012 1493697 1495394 1493444 1495169 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_median -0.0017 -0.0017 1496838 1494238 1496604 1494029 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_stddev +0.0021 +0.0020 530878 531975 530805 531882 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_cv +0.0009 +0.0009 0 0 0 0 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_mean -0.0204 -0.0204 1433093 1403907 1432881 1403668 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_median -0.0183 -0.0183 1429205 1403083 1428989 1402880 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_stddev -0.0096 -0.0097 505699 500858 505631 500743 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_cv +0.0110 +0.0109 0 0 0 0 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_pvalue 0.0539 0.0539 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_mean -0.3176 -0.3176 2382675 1626018 2382333 1625760 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_median -0.3198 -0.3198 2403842 1635148 2403483 1634913 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_stddev -0.3148 -0.3149 827423 566929 827319 566830 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_cv +0.0040 +0.0040 0 0 0 0 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_pvalue 0.5708 0.5708 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_mean -0.0873 -0.0873 2175867 1985983 2175589 1985685 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_median -0.0631 -0.0631 2157339 2021156 2157036 2020874 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_stddev -0.1511 -0.1511 793554 673670 793451 673555 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_cv -0.0699 -0.0699 0 0 0 0 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_pvalue 0.0757 0.0757 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_mean -0.2913 -0.2913 2168904 1537149 2168665 1536932 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_median -0.2906 -0.2906 2169825 1539371 2169553 1539149 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_stddev -0.2952 -0.2952 773577 545208 773499 545150 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_cv -0.0055 -0.0055 0 0 0 0 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_mean -0.0228 -0.0227 2215243 2164785 2214801 2164528 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_median -0.0105 -0.0105 2206971 2183841 2206650 2183583 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_stddev -0.0493 -0.0492 794968 755773 794792 755680 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_cv -0.0271 -0.0271 0 0 0 0 DunRenderBenchmark/Square_Solid_FullyLit_pvalue 0.4274 0.4274 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Solid_FullyLit_mean -0.1018 -0.1018 106111 95306 106093 95293 DunRenderBenchmark/Square_Solid_FullyLit_median -0.1039 -0.1039 106890 95786 106879 95776 DunRenderBenchmark/Square_Solid_FullyLit_stddev -0.1415 -0.1414 31405 26959 31394 26956 DunRenderBenchmark/Square_Solid_FullyLit_cv -0.0442 -0.0440 0 0 0 0 DunRenderBenchmark/Square_Solid_FullyDark_pvalue 0.6232 0.6232 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Solid_FullyDark_mean -0.0624 -0.0623 76148 71400 76137 71390 DunRenderBenchmark/Square_Solid_FullyDark_median -0.0533 -0.0533 75419 71398 75409 71388 DunRenderBenchmark/Square_Solid_FullyDark_stddev -0.1158 -0.1158 23287 20590 23284 20587 DunRenderBenchmark/Square_Solid_FullyDark_cv -0.0570 -0.0570 0 0 0 0 DunRenderBenchmark/Square_Solid_PartiallyLit_pvalue 0.0140 0.0140 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Solid_PartiallyLit_mean -0.4191 -0.4191 1856509 1078451 1856141 1078317 DunRenderBenchmark/Square_Solid_PartiallyLit_median -0.4220 -0.4220 1865303 1078075 1864830 1077921 DunRenderBenchmark/Square_Solid_PartiallyLit_stddev -0.4194 -0.4193 661581 384134 661390 384090 DunRenderBenchmark/Square_Solid_PartiallyLit_cv -0.0005 -0.0004 0 0 0 0 DunRenderBenchmark/Square_Transparent_FullyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Transparent_FullyLit_mean -0.0599 -0.0599 1777629 1671062 1777373 1670858 DunRenderBenchmark/Square_Transparent_FullyLit_median -0.0599 -0.0599 1777353 1670891 1777080 1670709 DunRenderBenchmark/Square_Transparent_FullyLit_stddev -0.0605 -0.0605 633282 594964 633184 594903 DunRenderBenchmark/Square_Transparent_FullyLit_cv -0.0006 -0.0006 0 0 0 0 DunRenderBenchmark/Square_Transparent_FullyDark_pvalue 0.0211 0.0211 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Transparent_FullyDark_mean -0.3763 -0.3762 1772392 1105501 1772116 1105383 DunRenderBenchmark/Square_Transparent_FullyDark_median -0.3766 -0.3766 1772282 1104753 1772089 1104637 DunRenderBenchmark/Square_Transparent_FullyDark_stddev -0.4402 -0.4402 631014 353260 630951 353231 DunRenderBenchmark/Square_Transparent_FullyDark_cv -0.1025 -0.1025 0 0 0 0 DunRenderBenchmark/Square_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Transparent_PartiallyLit_mean -0.0272 -0.0271 1970857 1917285 1970523 1917053 DunRenderBenchmark/Square_Transparent_PartiallyLit_median -0.0268 -0.0267 1970258 1917452 1969801 1917178 DunRenderBenchmark/Square_Transparent_PartiallyLit_stddev -0.0294 -0.0293 703415 682714 703280 682642 DunRenderBenchmark/Square_Transparent_PartiallyLit_cv -0.0023 -0.0023 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_pvalue 0.0004 0.0004 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_mean -0.5359 -0.5359 59253 27500 59242 27497 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_median -0.5336 -0.5336 58982 27509 58975 27506 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_stddev -0.5374 -0.5373 17150 7933 17144 7932 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_cv -0.0034 -0.0032 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_mean +0.7013 +0.7013 56973 96926 56963 96913 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_median +0.6907 +0.6908 57326 96921 57317 96909 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_stddev +0.8879 +0.8880 16361 30888 16358 30885 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_cv +0.1097 +0.1098 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_mean -0.3988 -0.3988 511666 307603 511589 307567 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_median -0.3993 -0.3993 511913 307523 511851 307491 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_stddev -0.3971 -0.3971 162799 98150 162774 98139 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_cv +0.0028 +0.0029 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_pvalue 0.6232 0.6232 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_mean -0.0820 -0.0820 514358 472170 514290 472112 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_median -0.0821 -0.0821 514335 472090 514264 472018 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_stddev -0.0828 -0.0828 163969 150389 163949 150372 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_cv -0.0009 -0.0009 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_pvalue 0.0058 0.0058 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_mean -0.4180 -0.4180 512755 298434 512692 298398 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_median -0.4187 -0.4187 513116 298290 513049 298258 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_stddev -0.4181 -0.4181 163460 95123 163439 95112 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_cv -0.0001 -0.0001 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_mean +0.0149 +0.0149 544744 552873 544676 552807 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_median +0.0167 +0.0167 544823 553899 544761 553834 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_stddev +0.0110 +0.0110 173683 175592 173672 175580 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_cv -0.0039 -0.0039 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_pvalue 0.0010 0.0010 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_mean -0.4654 -0.4654 51804 27693 51798 27690 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_median -0.4648 -0.4648 51787 27718 51779 27713 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_stddev -0.4702 -0.4702 14885 7886 14884 7885 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_cv -0.0090 -0.0090 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_mean +1.3529 +1.3529 43904 103301 43898 103287 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_median +1.3371 +1.3372 44199 103297 44192 103285 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_stddev +1.4168 +1.4167 12350 29848 12349 29843 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_cv +0.0272 +0.0271 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_mean -0.3977 -0.3977 493458 297225 493385 297188 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_median -0.3973 -0.3973 493440 297377 493364 297342 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_stddev -0.3965 -0.3965 157323 94947 157300 94934 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_cv +0.0020 +0.0020 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_mean -0.0668 -0.0668 501015 467552 500947 467496 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_median -0.0661 -0.0661 500672 467557 500607 467504 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_stddev -0.0641 -0.0642 159233 149019 159215 148998 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_cv +0.0028 +0.0028 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_mean -0.4295 -0.4295 494445 282076 494382 282043 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_median -0.4300 -0.4300 494497 281875 494434 281841 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_stddev -0.4287 -0.4287 157383 89911 157370 89902 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_cv +0.0014 +0.0014 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_mean -0.0574 -0.0573 556031 524113 555924 524044 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_median -0.0758 -0.0757 565940 523059 565802 522984 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_stddev -0.1094 -0.1093 186798 166360 186758 166346 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_cv -0.0552 -0.0551 0 0 0 0 OVERALL_GEOMEAN -0.1671 -0.1671 0 0 0 0 ```
2 years ago
RenderTriangleLower<Light, Transparent>(dst, dstPitch + XStep, src, tbl);
}
template <LightType Light, bool Transparent>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleLowerClipVertical(const DiamondClipY &clipY, uint8_t *DVL_RESTRICT &dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl)
{
src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom);
dst += XStep * (LowerHeight - clipY.lowerBottom - 1);
const auto lowerMax = LowerHeight - clipY.lowerTop;
for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch + XStep) {
const auto width = XStep * i;
RenderLineTransparentOrOpaque<Light, Transparent>(dst, src, width, tbl);
src += width;
}
}
template <LightType Light, bool Transparent>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleLowerClipLeftAndVertical(int_fast16_t clipLeft, const DiamondClipY &clipY, uint8_t *DVL_RESTRICT &dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl)
{
src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom);
dst += XStep * (LowerHeight - clipY.lowerBottom - 1) - clipLeft;
const auto lowerMax = LowerHeight - clipY.lowerTop;
for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch + XStep) {
const auto width = XStep * i;
const auto startX = Width - XStep * i;
const auto skip = startX < clipLeft ? clipLeft - startX : 0;
if (width > skip)
RenderLineTransparentOrOpaque<Light, Transparent>(dst + skip, src + skip, width - skip, tbl);
src += width;
}
}
template <LightType Light, bool Transparent>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleLowerClipRightAndVertical(int_fast16_t clipRight, const DiamondClipY &clipY, uint8_t *DVL_RESTRICT &dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl)
{
src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom);
dst += XStep * (LowerHeight - clipY.lowerBottom - 1);
const auto lowerMax = LowerHeight - clipY.lowerTop;
for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch + XStep) {
const auto width = XStep * i;
if (width > clipRight)
RenderLineTransparentOrOpaque<Light, Transparent>(dst, src, width - clipRight, tbl);
src += width;
}
}
template <LightType Light, bool Transparent>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleFull(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl)
{
RenderLeftTriangleLower<Light, Transparent>(dst, dstPitch, src, tbl);
dst += 2 * XStep;
dun_render: Unroll triangle loops Rather than relying on the compiler to do it, which doesn't always happen, we do it by hand. Previously, very slightly different versions of the code could result in those loops not being unrolled (such as in the current master). I've run the benchmark like this: ```bash BASELINE=dun-benchmark BENCHMARK=dun_render_benchmark git checkout "$BASELINE" tools/build_and_run_benchmark.py -B "build-reld-${BASELINE}" --no-run "$BENCHMARK" git checkout - tools/build_and_run_benchmark.py --no-run "$BENCHMARK" tools/linux_reduced_cpu_variance_run.sh ~/google-benchmark/tools/compare.py -a benchmarks \ "build-reld-${BASELINE}/${BENCHMARK}" "build-reld/${BENCHMARK}" \ --benchmark_repetitions=10 ``` Benchmark results are below. The `FullyLit` calls are ~55% faster. The `PartiallyLit` calls are ~40% faster. The `FullyDark` version is twice as slow, which is surprising. I have a separate idea about eliminating most of the `FullyDark` calls entirely. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ------------------------------------------------------------------------------------------------------------------------------------------------------------------- DunRenderBenchmark/LeftTriangle_Solid_FullyLit_pvalue 0.0004 0.0004 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Solid_FullyLit_mean -0.5634 -0.5634 188036 82095 188014 82080 DunRenderBenchmark/LeftTriangle_Solid_FullyLit_median -0.5601 -0.5601 186905 82218 186884 82202 DunRenderBenchmark/LeftTriangle_Solid_FullyLit_stddev -0.6274 -0.6274 61414 22883 61402 22879 DunRenderBenchmark/LeftTriangle_Solid_FullyLit_cv -0.1465 -0.1465 0 0 0 0 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_pvalue 0.0013 0.0013 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_mean +0.9781 +0.9781 238021 470823 237988 470757 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_median +0.9733 +0.9733 238086 469812 238050 469752 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_stddev +0.9791 +0.9790 74987 148403 74978 148383 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_cv +0.0005 +0.0005 0 0 0 0 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_pvalue 0.0113 0.0113 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_mean -0.3891 -0.3891 984208 601272 984080 601180 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_median -0.3931 -0.3931 980791 595272 980682 595188 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_stddev -0.3950 -0.3950 316631 191562 316575 191533 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_cv -0.0097 -0.0096 0 0 0 0 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_pvalue 0.6776 0.6776 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_mean -0.0802 -0.0802 995379 915591 995236 915450 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_median -0.0816 -0.0817 994452 913256 994299 913112 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_stddev -0.0916 -0.0915 319114 289898 319065 289864 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_cv -0.0124 -0.0123 0 0 0 0 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_mean -0.3875 -0.3875 1001347 613375 1001222 613282 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_median -0.3830 -0.3830 998801 616295 998628 616194 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_stddev -0.3901 -0.3901 320998 195778 320963 195747 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_cv -0.0043 -0.0043 0 0 0 0 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_mean +0.0046 +0.0046 1014824 1019541 1014707 1019386 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_median +0.0032 +0.0032 1015895 1019161 1015801 1019001 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_stddev +0.0037 +0.0036 322403 323590 322370 323538 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_cv -0.0010 -0.0010 0 0 0 0 DunRenderBenchmark/RightTriangle_Solid_FullyLit_pvalue 0.0006 0.0006 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Solid_FullyLit_mean -0.5523 -0.5524 176362 78951 176340 78938 DunRenderBenchmark/RightTriangle_Solid_FullyLit_median -0.5514 -0.5514 175837 78875 175814 78865 DunRenderBenchmark/RightTriangle_Solid_FullyLit_stddev -0.5884 -0.5884 55614 22891 55607 22888 DunRenderBenchmark/RightTriangle_Solid_FullyLit_cv -0.0805 -0.0805 0 0 0 0 DunRenderBenchmark/RightTriangle_Solid_FullyDark_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Solid_FullyDark_mean +1.9896 +1.9896 161841 483845 161819 483767 DunRenderBenchmark/RightTriangle_Solid_FullyDark_median +1.9924 +1.9924 161742 483991 161715 483917 DunRenderBenchmark/RightTriangle_Solid_FullyDark_stddev +2.0012 +2.0010 51356 154129 51351 154107 DunRenderBenchmark/RightTriangle_Solid_FullyDark_cv +0.0039 +0.0038 0 0 0 0 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_pvalue 0.0058 0.0058 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_mean -0.4247 -0.4247 1046216 601892 1046042 601809 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_median -0.4249 -0.4249 1053545 605870 1053349 605785 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_stddev -0.4174 -0.4174 330051 192291 330001 192261 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_cv +0.0127 +0.0127 0 0 0 0 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_pvalue 0.3847 0.3847 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_mean -0.1217 -0.1217 1050082 922274 1049928 922136 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_median -0.1226 -0.1226 1047556 919087 1047400 918953 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_stddev -0.0833 -0.0833 324195 297188 324156 297143 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_cv +0.0437 +0.0437 0 0 0 0 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_mean -0.4253 -0.4253 1047974 602234 1047816 602137 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_median -0.4267 -0.4267 1051985 603146 1051802 603042 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_stddev -0.4276 -0.4276 334355 191387 334304 191363 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_cv -0.0039 -0.0039 0 0 0 0 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_pvalue 0.7913 0.7913 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_mean -0.0137 -0.0138 1059054 1044492 1058910 1044325 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_median -0.0178 -0.0179 1058596 1039738 1058508 1039571 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_stddev +0.0031 +0.0031 337359 338411 337309 338362 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_cv +0.0171 +0.0171 0 0 0 0 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_pvalue 0.9097 0.9097 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_mean +0.0011 +0.0012 1493697 1495394 1493444 1495169 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_median -0.0017 -0.0017 1496838 1494238 1496604 1494029 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_stddev +0.0021 +0.0020 530878 531975 530805 531882 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_cv +0.0009 +0.0009 0 0 0 0 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_mean -0.0204 -0.0204 1433093 1403907 1432881 1403668 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_median -0.0183 -0.0183 1429205 1403083 1428989 1402880 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_stddev -0.0096 -0.0097 505699 500858 505631 500743 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_cv +0.0110 +0.0109 0 0 0 0 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_pvalue 0.0539 0.0539 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_mean -0.3176 -0.3176 2382675 1626018 2382333 1625760 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_median -0.3198 -0.3198 2403842 1635148 2403483 1634913 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_stddev -0.3148 -0.3149 827423 566929 827319 566830 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_cv +0.0040 +0.0040 0 0 0 0 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_pvalue 0.5708 0.5708 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_mean -0.0873 -0.0873 2175867 1985983 2175589 1985685 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_median -0.0631 -0.0631 2157339 2021156 2157036 2020874 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_stddev -0.1511 -0.1511 793554 673670 793451 673555 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_cv -0.0699 -0.0699 0 0 0 0 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_pvalue 0.0757 0.0757 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_mean -0.2913 -0.2913 2168904 1537149 2168665 1536932 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_median -0.2906 -0.2906 2169825 1539371 2169553 1539149 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_stddev -0.2952 -0.2952 773577 545208 773499 545150 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_cv -0.0055 -0.0055 0 0 0 0 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_mean -0.0228 -0.0227 2215243 2164785 2214801 2164528 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_median -0.0105 -0.0105 2206971 2183841 2206650 2183583 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_stddev -0.0493 -0.0492 794968 755773 794792 755680 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_cv -0.0271 -0.0271 0 0 0 0 DunRenderBenchmark/Square_Solid_FullyLit_pvalue 0.4274 0.4274 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Solid_FullyLit_mean -0.1018 -0.1018 106111 95306 106093 95293 DunRenderBenchmark/Square_Solid_FullyLit_median -0.1039 -0.1039 106890 95786 106879 95776 DunRenderBenchmark/Square_Solid_FullyLit_stddev -0.1415 -0.1414 31405 26959 31394 26956 DunRenderBenchmark/Square_Solid_FullyLit_cv -0.0442 -0.0440 0 0 0 0 DunRenderBenchmark/Square_Solid_FullyDark_pvalue 0.6232 0.6232 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Solid_FullyDark_mean -0.0624 -0.0623 76148 71400 76137 71390 DunRenderBenchmark/Square_Solid_FullyDark_median -0.0533 -0.0533 75419 71398 75409 71388 DunRenderBenchmark/Square_Solid_FullyDark_stddev -0.1158 -0.1158 23287 20590 23284 20587 DunRenderBenchmark/Square_Solid_FullyDark_cv -0.0570 -0.0570 0 0 0 0 DunRenderBenchmark/Square_Solid_PartiallyLit_pvalue 0.0140 0.0140 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Solid_PartiallyLit_mean -0.4191 -0.4191 1856509 1078451 1856141 1078317 DunRenderBenchmark/Square_Solid_PartiallyLit_median -0.4220 -0.4220 1865303 1078075 1864830 1077921 DunRenderBenchmark/Square_Solid_PartiallyLit_stddev -0.4194 -0.4193 661581 384134 661390 384090 DunRenderBenchmark/Square_Solid_PartiallyLit_cv -0.0005 -0.0004 0 0 0 0 DunRenderBenchmark/Square_Transparent_FullyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Transparent_FullyLit_mean -0.0599 -0.0599 1777629 1671062 1777373 1670858 DunRenderBenchmark/Square_Transparent_FullyLit_median -0.0599 -0.0599 1777353 1670891 1777080 1670709 DunRenderBenchmark/Square_Transparent_FullyLit_stddev -0.0605 -0.0605 633282 594964 633184 594903 DunRenderBenchmark/Square_Transparent_FullyLit_cv -0.0006 -0.0006 0 0 0 0 DunRenderBenchmark/Square_Transparent_FullyDark_pvalue 0.0211 0.0211 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Transparent_FullyDark_mean -0.3763 -0.3762 1772392 1105501 1772116 1105383 DunRenderBenchmark/Square_Transparent_FullyDark_median -0.3766 -0.3766 1772282 1104753 1772089 1104637 DunRenderBenchmark/Square_Transparent_FullyDark_stddev -0.4402 -0.4402 631014 353260 630951 353231 DunRenderBenchmark/Square_Transparent_FullyDark_cv -0.1025 -0.1025 0 0 0 0 DunRenderBenchmark/Square_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Transparent_PartiallyLit_mean -0.0272 -0.0271 1970857 1917285 1970523 1917053 DunRenderBenchmark/Square_Transparent_PartiallyLit_median -0.0268 -0.0267 1970258 1917452 1969801 1917178 DunRenderBenchmark/Square_Transparent_PartiallyLit_stddev -0.0294 -0.0293 703415 682714 703280 682642 DunRenderBenchmark/Square_Transparent_PartiallyLit_cv -0.0023 -0.0023 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_pvalue 0.0004 0.0004 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_mean -0.5359 -0.5359 59253 27500 59242 27497 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_median -0.5336 -0.5336 58982 27509 58975 27506 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_stddev -0.5374 -0.5373 17150 7933 17144 7932 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_cv -0.0034 -0.0032 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_mean +0.7013 +0.7013 56973 96926 56963 96913 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_median +0.6907 +0.6908 57326 96921 57317 96909 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_stddev +0.8879 +0.8880 16361 30888 16358 30885 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_cv +0.1097 +0.1098 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_mean -0.3988 -0.3988 511666 307603 511589 307567 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_median -0.3993 -0.3993 511913 307523 511851 307491 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_stddev -0.3971 -0.3971 162799 98150 162774 98139 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_cv +0.0028 +0.0029 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_pvalue 0.6232 0.6232 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_mean -0.0820 -0.0820 514358 472170 514290 472112 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_median -0.0821 -0.0821 514335 472090 514264 472018 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_stddev -0.0828 -0.0828 163969 150389 163949 150372 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_cv -0.0009 -0.0009 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_pvalue 0.0058 0.0058 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_mean -0.4180 -0.4180 512755 298434 512692 298398 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_median -0.4187 -0.4187 513116 298290 513049 298258 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_stddev -0.4181 -0.4181 163460 95123 163439 95112 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_cv -0.0001 -0.0001 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_mean +0.0149 +0.0149 544744 552873 544676 552807 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_median +0.0167 +0.0167 544823 553899 544761 553834 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_stddev +0.0110 +0.0110 173683 175592 173672 175580 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_cv -0.0039 -0.0039 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_pvalue 0.0010 0.0010 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_mean -0.4654 -0.4654 51804 27693 51798 27690 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_median -0.4648 -0.4648 51787 27718 51779 27713 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_stddev -0.4702 -0.4702 14885 7886 14884 7885 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_cv -0.0090 -0.0090 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_mean +1.3529 +1.3529 43904 103301 43898 103287 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_median +1.3371 +1.3372 44199 103297 44192 103285 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_stddev +1.4168 +1.4167 12350 29848 12349 29843 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_cv +0.0272 +0.0271 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_mean -0.3977 -0.3977 493458 297225 493385 297188 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_median -0.3973 -0.3973 493440 297377 493364 297342 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_stddev -0.3965 -0.3965 157323 94947 157300 94934 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_cv +0.0020 +0.0020 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_mean -0.0668 -0.0668 501015 467552 500947 467496 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_median -0.0661 -0.0661 500672 467557 500607 467504 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_stddev -0.0641 -0.0642 159233 149019 159215 148998 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_cv +0.0028 +0.0028 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_mean -0.4295 -0.4295 494445 282076 494382 282043 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_median -0.4300 -0.4300 494497 281875 494434 281841 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_stddev -0.4287 -0.4287 157383 89911 157370 89902 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_cv +0.0014 +0.0014 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_mean -0.0574 -0.0573 556031 524113 555924 524044 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_median -0.0758 -0.0757 565940 523059 565802 522984 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_stddev -0.1094 -0.1093 186798 166360 186758 166346 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_cv -0.0552 -0.0551 0 0 0 0 OVERALL_GEOMEAN -0.1671 -0.1671 0 0 0 0 ```
2 years ago
RenderTriangleUpper<Light, Transparent>(dst, dstPitch - XStep, src, tbl);
}
template <LightType Light, bool Transparent>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleClipVertical(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
{
const DiamondClipY clipY = CalculateDiamondClipY(clip);
RenderLeftTriangleLowerClipVertical<Light, Transparent>(clipY, dst, dstPitch, src, tbl);
src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom);
dst += 2 * XStep + XStep * clipY.upperBottom;
const auto upperMax = TriangleUpperHeight - clipY.upperTop;
for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch - XStep) {
const auto width = Width - XStep * i;
RenderLineTransparentOrOpaque<Light, Transparent>(dst, src, width, tbl);
src += width;
}
}
template <LightType Light, bool Transparent>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleClipLeftAndVertical(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
{
const DiamondClipY clipY = CalculateDiamondClipY(clip);
const int_fast16_t clipLeft = clip.left;
RenderLeftTriangleLowerClipLeftAndVertical<Light, Transparent>(clipLeft, clipY, dst, dstPitch, src, tbl);
src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom);
dst += 2 * XStep + XStep * clipY.upperBottom;
const auto upperMax = TriangleUpperHeight - clipY.upperTop;
for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch - XStep) {
const auto width = Width - XStep * i;
const auto startX = XStep * i;
const auto skip = startX < clipLeft ? clipLeft - startX : 0;
RenderLineTransparentOrOpaque<Light, Transparent>(dst + skip, src + skip, width > skip ? width - skip : 0, tbl);
src += width;
}
}
template <LightType Light, bool Transparent>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleClipRightAndVertical(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
{
const DiamondClipY clipY = CalculateDiamondClipY(clip);
const int_fast16_t clipRight = clip.right;
RenderLeftTriangleLowerClipRightAndVertical<Light, Transparent>(clipRight, clipY, dst, dstPitch, src, tbl);
src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom);
dst += 2 * XStep + XStep * clipY.upperBottom;
const auto upperMax = TriangleUpperHeight - clipY.upperTop;
for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch - XStep) {
const auto width = Width - XStep * i;
if (width <= clipRight)
break;
RenderLineTransparentOrOpaque<Light, Transparent>(dst, src, width - clipRight, tbl);
src += width;
}
}
template <LightType Light, bool Transparent>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangle(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
{
if (clip.width == Width) {
if (clip.height == TriangleHeight) {
RenderLeftTriangleFull<Light, Transparent>(dst, dstPitch, src, tbl);
} else {
RenderLeftTriangleClipVertical<Light, Transparent>(dst, dstPitch, src, tbl, clip);
}
} else if (clip.right == 0) {
RenderLeftTriangleClipLeftAndVertical<Light, Transparent>(dst, dstPitch, src, tbl, clip);
} else {
RenderLeftTriangleClipRightAndVertical<Light, Transparent>(dst, dstPitch, src, tbl, clip);
}
}
template <LightType Light, bool Transparent>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleLower(uint8_t *DVL_RESTRICT &dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl)
{
dun_render: Unroll triangle loops Rather than relying on the compiler to do it, which doesn't always happen, we do it by hand. Previously, very slightly different versions of the code could result in those loops not being unrolled (such as in the current master). I've run the benchmark like this: ```bash BASELINE=dun-benchmark BENCHMARK=dun_render_benchmark git checkout "$BASELINE" tools/build_and_run_benchmark.py -B "build-reld-${BASELINE}" --no-run "$BENCHMARK" git checkout - tools/build_and_run_benchmark.py --no-run "$BENCHMARK" tools/linux_reduced_cpu_variance_run.sh ~/google-benchmark/tools/compare.py -a benchmarks \ "build-reld-${BASELINE}/${BENCHMARK}" "build-reld/${BENCHMARK}" \ --benchmark_repetitions=10 ``` Benchmark results are below. The `FullyLit` calls are ~55% faster. The `PartiallyLit` calls are ~40% faster. The `FullyDark` version is twice as slow, which is surprising. I have a separate idea about eliminating most of the `FullyDark` calls entirely. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ------------------------------------------------------------------------------------------------------------------------------------------------------------------- DunRenderBenchmark/LeftTriangle_Solid_FullyLit_pvalue 0.0004 0.0004 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Solid_FullyLit_mean -0.5634 -0.5634 188036 82095 188014 82080 DunRenderBenchmark/LeftTriangle_Solid_FullyLit_median -0.5601 -0.5601 186905 82218 186884 82202 DunRenderBenchmark/LeftTriangle_Solid_FullyLit_stddev -0.6274 -0.6274 61414 22883 61402 22879 DunRenderBenchmark/LeftTriangle_Solid_FullyLit_cv -0.1465 -0.1465 0 0 0 0 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_pvalue 0.0013 0.0013 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_mean +0.9781 +0.9781 238021 470823 237988 470757 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_median +0.9733 +0.9733 238086 469812 238050 469752 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_stddev +0.9791 +0.9790 74987 148403 74978 148383 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_cv +0.0005 +0.0005 0 0 0 0 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_pvalue 0.0113 0.0113 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_mean -0.3891 -0.3891 984208 601272 984080 601180 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_median -0.3931 -0.3931 980791 595272 980682 595188 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_stddev -0.3950 -0.3950 316631 191562 316575 191533 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_cv -0.0097 -0.0096 0 0 0 0 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_pvalue 0.6776 0.6776 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_mean -0.0802 -0.0802 995379 915591 995236 915450 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_median -0.0816 -0.0817 994452 913256 994299 913112 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_stddev -0.0916 -0.0915 319114 289898 319065 289864 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_cv -0.0124 -0.0123 0 0 0 0 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_mean -0.3875 -0.3875 1001347 613375 1001222 613282 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_median -0.3830 -0.3830 998801 616295 998628 616194 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_stddev -0.3901 -0.3901 320998 195778 320963 195747 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_cv -0.0043 -0.0043 0 0 0 0 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_mean +0.0046 +0.0046 1014824 1019541 1014707 1019386 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_median +0.0032 +0.0032 1015895 1019161 1015801 1019001 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_stddev +0.0037 +0.0036 322403 323590 322370 323538 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_cv -0.0010 -0.0010 0 0 0 0 DunRenderBenchmark/RightTriangle_Solid_FullyLit_pvalue 0.0006 0.0006 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Solid_FullyLit_mean -0.5523 -0.5524 176362 78951 176340 78938 DunRenderBenchmark/RightTriangle_Solid_FullyLit_median -0.5514 -0.5514 175837 78875 175814 78865 DunRenderBenchmark/RightTriangle_Solid_FullyLit_stddev -0.5884 -0.5884 55614 22891 55607 22888 DunRenderBenchmark/RightTriangle_Solid_FullyLit_cv -0.0805 -0.0805 0 0 0 0 DunRenderBenchmark/RightTriangle_Solid_FullyDark_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Solid_FullyDark_mean +1.9896 +1.9896 161841 483845 161819 483767 DunRenderBenchmark/RightTriangle_Solid_FullyDark_median +1.9924 +1.9924 161742 483991 161715 483917 DunRenderBenchmark/RightTriangle_Solid_FullyDark_stddev +2.0012 +2.0010 51356 154129 51351 154107 DunRenderBenchmark/RightTriangle_Solid_FullyDark_cv +0.0039 +0.0038 0 0 0 0 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_pvalue 0.0058 0.0058 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_mean -0.4247 -0.4247 1046216 601892 1046042 601809 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_median -0.4249 -0.4249 1053545 605870 1053349 605785 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_stddev -0.4174 -0.4174 330051 192291 330001 192261 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_cv +0.0127 +0.0127 0 0 0 0 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_pvalue 0.3847 0.3847 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_mean -0.1217 -0.1217 1050082 922274 1049928 922136 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_median -0.1226 -0.1226 1047556 919087 1047400 918953 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_stddev -0.0833 -0.0833 324195 297188 324156 297143 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_cv +0.0437 +0.0437 0 0 0 0 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_mean -0.4253 -0.4253 1047974 602234 1047816 602137 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_median -0.4267 -0.4267 1051985 603146 1051802 603042 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_stddev -0.4276 -0.4276 334355 191387 334304 191363 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_cv -0.0039 -0.0039 0 0 0 0 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_pvalue 0.7913 0.7913 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_mean -0.0137 -0.0138 1059054 1044492 1058910 1044325 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_median -0.0178 -0.0179 1058596 1039738 1058508 1039571 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_stddev +0.0031 +0.0031 337359 338411 337309 338362 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_cv +0.0171 +0.0171 0 0 0 0 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_pvalue 0.9097 0.9097 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_mean +0.0011 +0.0012 1493697 1495394 1493444 1495169 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_median -0.0017 -0.0017 1496838 1494238 1496604 1494029 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_stddev +0.0021 +0.0020 530878 531975 530805 531882 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_cv +0.0009 +0.0009 0 0 0 0 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_mean -0.0204 -0.0204 1433093 1403907 1432881 1403668 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_median -0.0183 -0.0183 1429205 1403083 1428989 1402880 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_stddev -0.0096 -0.0097 505699 500858 505631 500743 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_cv +0.0110 +0.0109 0 0 0 0 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_pvalue 0.0539 0.0539 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_mean -0.3176 -0.3176 2382675 1626018 2382333 1625760 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_median -0.3198 -0.3198 2403842 1635148 2403483 1634913 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_stddev -0.3148 -0.3149 827423 566929 827319 566830 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_cv +0.0040 +0.0040 0 0 0 0 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_pvalue 0.5708 0.5708 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_mean -0.0873 -0.0873 2175867 1985983 2175589 1985685 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_median -0.0631 -0.0631 2157339 2021156 2157036 2020874 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_stddev -0.1511 -0.1511 793554 673670 793451 673555 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_cv -0.0699 -0.0699 0 0 0 0 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_pvalue 0.0757 0.0757 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_mean -0.2913 -0.2913 2168904 1537149 2168665 1536932 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_median -0.2906 -0.2906 2169825 1539371 2169553 1539149 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_stddev -0.2952 -0.2952 773577 545208 773499 545150 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_cv -0.0055 -0.0055 0 0 0 0 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_mean -0.0228 -0.0227 2215243 2164785 2214801 2164528 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_median -0.0105 -0.0105 2206971 2183841 2206650 2183583 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_stddev -0.0493 -0.0492 794968 755773 794792 755680 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_cv -0.0271 -0.0271 0 0 0 0 DunRenderBenchmark/Square_Solid_FullyLit_pvalue 0.4274 0.4274 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Solid_FullyLit_mean -0.1018 -0.1018 106111 95306 106093 95293 DunRenderBenchmark/Square_Solid_FullyLit_median -0.1039 -0.1039 106890 95786 106879 95776 DunRenderBenchmark/Square_Solid_FullyLit_stddev -0.1415 -0.1414 31405 26959 31394 26956 DunRenderBenchmark/Square_Solid_FullyLit_cv -0.0442 -0.0440 0 0 0 0 DunRenderBenchmark/Square_Solid_FullyDark_pvalue 0.6232 0.6232 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Solid_FullyDark_mean -0.0624 -0.0623 76148 71400 76137 71390 DunRenderBenchmark/Square_Solid_FullyDark_median -0.0533 -0.0533 75419 71398 75409 71388 DunRenderBenchmark/Square_Solid_FullyDark_stddev -0.1158 -0.1158 23287 20590 23284 20587 DunRenderBenchmark/Square_Solid_FullyDark_cv -0.0570 -0.0570 0 0 0 0 DunRenderBenchmark/Square_Solid_PartiallyLit_pvalue 0.0140 0.0140 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Solid_PartiallyLit_mean -0.4191 -0.4191 1856509 1078451 1856141 1078317 DunRenderBenchmark/Square_Solid_PartiallyLit_median -0.4220 -0.4220 1865303 1078075 1864830 1077921 DunRenderBenchmark/Square_Solid_PartiallyLit_stddev -0.4194 -0.4193 661581 384134 661390 384090 DunRenderBenchmark/Square_Solid_PartiallyLit_cv -0.0005 -0.0004 0 0 0 0 DunRenderBenchmark/Square_Transparent_FullyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Transparent_FullyLit_mean -0.0599 -0.0599 1777629 1671062 1777373 1670858 DunRenderBenchmark/Square_Transparent_FullyLit_median -0.0599 -0.0599 1777353 1670891 1777080 1670709 DunRenderBenchmark/Square_Transparent_FullyLit_stddev -0.0605 -0.0605 633282 594964 633184 594903 DunRenderBenchmark/Square_Transparent_FullyLit_cv -0.0006 -0.0006 0 0 0 0 DunRenderBenchmark/Square_Transparent_FullyDark_pvalue 0.0211 0.0211 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Transparent_FullyDark_mean -0.3763 -0.3762 1772392 1105501 1772116 1105383 DunRenderBenchmark/Square_Transparent_FullyDark_median -0.3766 -0.3766 1772282 1104753 1772089 1104637 DunRenderBenchmark/Square_Transparent_FullyDark_stddev -0.4402 -0.4402 631014 353260 630951 353231 DunRenderBenchmark/Square_Transparent_FullyDark_cv -0.1025 -0.1025 0 0 0 0 DunRenderBenchmark/Square_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Transparent_PartiallyLit_mean -0.0272 -0.0271 1970857 1917285 1970523 1917053 DunRenderBenchmark/Square_Transparent_PartiallyLit_median -0.0268 -0.0267 1970258 1917452 1969801 1917178 DunRenderBenchmark/Square_Transparent_PartiallyLit_stddev -0.0294 -0.0293 703415 682714 703280 682642 DunRenderBenchmark/Square_Transparent_PartiallyLit_cv -0.0023 -0.0023 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_pvalue 0.0004 0.0004 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_mean -0.5359 -0.5359 59253 27500 59242 27497 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_median -0.5336 -0.5336 58982 27509 58975 27506 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_stddev -0.5374 -0.5373 17150 7933 17144 7932 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_cv -0.0034 -0.0032 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_mean +0.7013 +0.7013 56973 96926 56963 96913 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_median +0.6907 +0.6908 57326 96921 57317 96909 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_stddev +0.8879 +0.8880 16361 30888 16358 30885 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_cv +0.1097 +0.1098 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_mean -0.3988 -0.3988 511666 307603 511589 307567 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_median -0.3993 -0.3993 511913 307523 511851 307491 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_stddev -0.3971 -0.3971 162799 98150 162774 98139 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_cv +0.0028 +0.0029 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_pvalue 0.6232 0.6232 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_mean -0.0820 -0.0820 514358 472170 514290 472112 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_median -0.0821 -0.0821 514335 472090 514264 472018 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_stddev -0.0828 -0.0828 163969 150389 163949 150372 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_cv -0.0009 -0.0009 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_pvalue 0.0058 0.0058 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_mean -0.4180 -0.4180 512755 298434 512692 298398 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_median -0.4187 -0.4187 513116 298290 513049 298258 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_stddev -0.4181 -0.4181 163460 95123 163439 95112 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_cv -0.0001 -0.0001 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_mean +0.0149 +0.0149 544744 552873 544676 552807 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_median +0.0167 +0.0167 544823 553899 544761 553834 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_stddev +0.0110 +0.0110 173683 175592 173672 175580 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_cv -0.0039 -0.0039 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_pvalue 0.0010 0.0010 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_mean -0.4654 -0.4654 51804 27693 51798 27690 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_median -0.4648 -0.4648 51787 27718 51779 27713 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_stddev -0.4702 -0.4702 14885 7886 14884 7885 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_cv -0.0090 -0.0090 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_mean +1.3529 +1.3529 43904 103301 43898 103287 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_median +1.3371 +1.3372 44199 103297 44192 103285 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_stddev +1.4168 +1.4167 12350 29848 12349 29843 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_cv +0.0272 +0.0271 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_mean -0.3977 -0.3977 493458 297225 493385 297188 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_median -0.3973 -0.3973 493440 297377 493364 297342 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_stddev -0.3965 -0.3965 157323 94947 157300 94934 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_cv +0.0020 +0.0020 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_mean -0.0668 -0.0668 501015 467552 500947 467496 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_median -0.0661 -0.0661 500672 467557 500607 467504 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_stddev -0.0641 -0.0642 159233 149019 159215 148998 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_cv +0.0028 +0.0028 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_mean -0.4295 -0.4295 494445 282076 494382 282043 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_median -0.4300 -0.4300 494497 281875 494434 281841 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_stddev -0.4287 -0.4287 157383 89911 157370 89902 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_cv +0.0014 +0.0014 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_mean -0.0574 -0.0573 556031 524113 555924 524044 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_median -0.0758 -0.0757 565940 523059 565802 522984 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_stddev -0.1094 -0.1093 186798 166360 186758 166346 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_cv -0.0552 -0.0551 0 0 0 0 OVERALL_GEOMEAN -0.1671 -0.1671 0 0 0 0 ```
2 years ago
RenderTriangleLower<Light, Transparent>(dst, dstPitch, src, tbl);
}
template <LightType Light, bool Transparent>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleLowerClipVertical(const DiamondClipY &clipY, uint8_t *DVL_RESTRICT &dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl)
{
src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom);
const auto lowerMax = LowerHeight - clipY.lowerTop;
for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch) {
const auto width = XStep * i;
RenderLineTransparentOrOpaque<Light, Transparent>(dst, src, width, tbl);
src += width;
}
}
template <LightType Light, bool Transparent>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleLowerClipLeftAndVertical(int_fast16_t clipLeft, const DiamondClipY &clipY, uint8_t *DVL_RESTRICT &dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl)
{
src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom);
const auto lowerMax = LowerHeight - clipY.lowerTop;
for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch) {
const auto width = XStep * i;
if (width > clipLeft)
RenderLineTransparentOrOpaque<Light, Transparent>(dst, src + clipLeft, width - clipLeft, tbl);
src += width;
}
}
template <LightType Light, bool Transparent>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleLowerClipRightAndVertical(int_fast16_t clipRight, const DiamondClipY &clipY, uint8_t *DVL_RESTRICT &dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl)
{
src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom);
const auto lowerMax = LowerHeight - clipY.lowerTop;
for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch) {
const auto width = XStep * i;
const auto skip = Width - width < clipRight ? clipRight - (Width - width) : 0;
if (width > skip)
RenderLineTransparentOrOpaque<Light, Transparent>(dst, src, width - skip, tbl);
src += width;
}
}
template <LightType Light, bool Transparent>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleFull(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl)
{
RenderRightTriangleLower<Light, Transparent>(dst, dstPitch, src, tbl);
dun_render: Unroll triangle loops Rather than relying on the compiler to do it, which doesn't always happen, we do it by hand. Previously, very slightly different versions of the code could result in those loops not being unrolled (such as in the current master). I've run the benchmark like this: ```bash BASELINE=dun-benchmark BENCHMARK=dun_render_benchmark git checkout "$BASELINE" tools/build_and_run_benchmark.py -B "build-reld-${BASELINE}" --no-run "$BENCHMARK" git checkout - tools/build_and_run_benchmark.py --no-run "$BENCHMARK" tools/linux_reduced_cpu_variance_run.sh ~/google-benchmark/tools/compare.py -a benchmarks \ "build-reld-${BASELINE}/${BENCHMARK}" "build-reld/${BENCHMARK}" \ --benchmark_repetitions=10 ``` Benchmark results are below. The `FullyLit` calls are ~55% faster. The `PartiallyLit` calls are ~40% faster. The `FullyDark` version is twice as slow, which is surprising. I have a separate idea about eliminating most of the `FullyDark` calls entirely. ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ------------------------------------------------------------------------------------------------------------------------------------------------------------------- DunRenderBenchmark/LeftTriangle_Solid_FullyLit_pvalue 0.0004 0.0004 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Solid_FullyLit_mean -0.5634 -0.5634 188036 82095 188014 82080 DunRenderBenchmark/LeftTriangle_Solid_FullyLit_median -0.5601 -0.5601 186905 82218 186884 82202 DunRenderBenchmark/LeftTriangle_Solid_FullyLit_stddev -0.6274 -0.6274 61414 22883 61402 22879 DunRenderBenchmark/LeftTriangle_Solid_FullyLit_cv -0.1465 -0.1465 0 0 0 0 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_pvalue 0.0013 0.0013 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_mean +0.9781 +0.9781 238021 470823 237988 470757 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_median +0.9733 +0.9733 238086 469812 238050 469752 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_stddev +0.9791 +0.9790 74987 148403 74978 148383 DunRenderBenchmark/LeftTriangle_Solid_FullyDark_cv +0.0005 +0.0005 0 0 0 0 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_pvalue 0.0113 0.0113 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_mean -0.3891 -0.3891 984208 601272 984080 601180 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_median -0.3931 -0.3931 980791 595272 980682 595188 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_stddev -0.3950 -0.3950 316631 191562 316575 191533 DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_cv -0.0097 -0.0096 0 0 0 0 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_pvalue 0.6776 0.6776 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_mean -0.0802 -0.0802 995379 915591 995236 915450 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_median -0.0816 -0.0817 994452 913256 994299 913112 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_stddev -0.0916 -0.0915 319114 289898 319065 289864 DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_cv -0.0124 -0.0123 0 0 0 0 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_mean -0.3875 -0.3875 1001347 613375 1001222 613282 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_median -0.3830 -0.3830 998801 616295 998628 616194 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_stddev -0.3901 -0.3901 320998 195778 320963 195747 DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_cv -0.0043 -0.0043 0 0 0 0 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_mean +0.0046 +0.0046 1014824 1019541 1014707 1019386 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_median +0.0032 +0.0032 1015895 1019161 1015801 1019001 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_stddev +0.0037 +0.0036 322403 323590 322370 323538 DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_cv -0.0010 -0.0010 0 0 0 0 DunRenderBenchmark/RightTriangle_Solid_FullyLit_pvalue 0.0006 0.0006 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Solid_FullyLit_mean -0.5523 -0.5524 176362 78951 176340 78938 DunRenderBenchmark/RightTriangle_Solid_FullyLit_median -0.5514 -0.5514 175837 78875 175814 78865 DunRenderBenchmark/RightTriangle_Solid_FullyLit_stddev -0.5884 -0.5884 55614 22891 55607 22888 DunRenderBenchmark/RightTriangle_Solid_FullyLit_cv -0.0805 -0.0805 0 0 0 0 DunRenderBenchmark/RightTriangle_Solid_FullyDark_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Solid_FullyDark_mean +1.9896 +1.9896 161841 483845 161819 483767 DunRenderBenchmark/RightTriangle_Solid_FullyDark_median +1.9924 +1.9924 161742 483991 161715 483917 DunRenderBenchmark/RightTriangle_Solid_FullyDark_stddev +2.0012 +2.0010 51356 154129 51351 154107 DunRenderBenchmark/RightTriangle_Solid_FullyDark_cv +0.0039 +0.0038 0 0 0 0 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_pvalue 0.0058 0.0058 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_mean -0.4247 -0.4247 1046216 601892 1046042 601809 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_median -0.4249 -0.4249 1053545 605870 1053349 605785 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_stddev -0.4174 -0.4174 330051 192291 330001 192261 DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_cv +0.0127 +0.0127 0 0 0 0 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_pvalue 0.3847 0.3847 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_mean -0.1217 -0.1217 1050082 922274 1049928 922136 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_median -0.1226 -0.1226 1047556 919087 1047400 918953 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_stddev -0.0833 -0.0833 324195 297188 324156 297143 DunRenderBenchmark/RightTriangle_Transparent_FullyLit_cv +0.0437 +0.0437 0 0 0 0 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_mean -0.4253 -0.4253 1047974 602234 1047816 602137 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_median -0.4267 -0.4267 1051985 603146 1051802 603042 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_stddev -0.4276 -0.4276 334355 191387 334304 191363 DunRenderBenchmark/RightTriangle_Transparent_FullyDark_cv -0.0039 -0.0039 0 0 0 0 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_pvalue 0.7913 0.7913 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_mean -0.0137 -0.0138 1059054 1044492 1058910 1044325 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_median -0.0178 -0.0179 1058596 1039738 1058508 1039571 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_stddev +0.0031 +0.0031 337359 338411 337309 338362 DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_cv +0.0171 +0.0171 0 0 0 0 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_pvalue 0.9097 0.9097 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_mean +0.0011 +0.0012 1493697 1495394 1493444 1495169 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_median -0.0017 -0.0017 1496838 1494238 1496604 1494029 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_stddev +0.0021 +0.0020 530878 531975 530805 531882 DunRenderBenchmark/TransparentSquare_Solid_FullyLit_cv +0.0009 +0.0009 0 0 0 0 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_mean -0.0204 -0.0204 1433093 1403907 1432881 1403668 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_median -0.0183 -0.0183 1429205 1403083 1428989 1402880 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_stddev -0.0096 -0.0097 505699 500858 505631 500743 DunRenderBenchmark/TransparentSquare_Solid_FullyDark_cv +0.0110 +0.0109 0 0 0 0 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_pvalue 0.0539 0.0539 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_mean -0.3176 -0.3176 2382675 1626018 2382333 1625760 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_median -0.3198 -0.3198 2403842 1635148 2403483 1634913 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_stddev -0.3148 -0.3149 827423 566929 827319 566830 DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_cv +0.0040 +0.0040 0 0 0 0 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_pvalue 0.5708 0.5708 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_mean -0.0873 -0.0873 2175867 1985983 2175589 1985685 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_median -0.0631 -0.0631 2157339 2021156 2157036 2020874 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_stddev -0.1511 -0.1511 793554 673670 793451 673555 DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_cv -0.0699 -0.0699 0 0 0 0 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_pvalue 0.0757 0.0757 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_mean -0.2913 -0.2913 2168904 1537149 2168665 1536932 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_median -0.2906 -0.2906 2169825 1539371 2169553 1539149 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_stddev -0.2952 -0.2952 773577 545208 773499 545150 DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_cv -0.0055 -0.0055 0 0 0 0 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_mean -0.0228 -0.0227 2215243 2164785 2214801 2164528 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_median -0.0105 -0.0105 2206971 2183841 2206650 2183583 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_stddev -0.0493 -0.0492 794968 755773 794792 755680 DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_cv -0.0271 -0.0271 0 0 0 0 DunRenderBenchmark/Square_Solid_FullyLit_pvalue 0.4274 0.4274 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Solid_FullyLit_mean -0.1018 -0.1018 106111 95306 106093 95293 DunRenderBenchmark/Square_Solid_FullyLit_median -0.1039 -0.1039 106890 95786 106879 95776 DunRenderBenchmark/Square_Solid_FullyLit_stddev -0.1415 -0.1414 31405 26959 31394 26956 DunRenderBenchmark/Square_Solid_FullyLit_cv -0.0442 -0.0440 0 0 0 0 DunRenderBenchmark/Square_Solid_FullyDark_pvalue 0.6232 0.6232 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Solid_FullyDark_mean -0.0624 -0.0623 76148 71400 76137 71390 DunRenderBenchmark/Square_Solid_FullyDark_median -0.0533 -0.0533 75419 71398 75409 71388 DunRenderBenchmark/Square_Solid_FullyDark_stddev -0.1158 -0.1158 23287 20590 23284 20587 DunRenderBenchmark/Square_Solid_FullyDark_cv -0.0570 -0.0570 0 0 0 0 DunRenderBenchmark/Square_Solid_PartiallyLit_pvalue 0.0140 0.0140 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Solid_PartiallyLit_mean -0.4191 -0.4191 1856509 1078451 1856141 1078317 DunRenderBenchmark/Square_Solid_PartiallyLit_median -0.4220 -0.4220 1865303 1078075 1864830 1077921 DunRenderBenchmark/Square_Solid_PartiallyLit_stddev -0.4194 -0.4193 661581 384134 661390 384090 DunRenderBenchmark/Square_Solid_PartiallyLit_cv -0.0005 -0.0004 0 0 0 0 DunRenderBenchmark/Square_Transparent_FullyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Transparent_FullyLit_mean -0.0599 -0.0599 1777629 1671062 1777373 1670858 DunRenderBenchmark/Square_Transparent_FullyLit_median -0.0599 -0.0599 1777353 1670891 1777080 1670709 DunRenderBenchmark/Square_Transparent_FullyLit_stddev -0.0605 -0.0605 633282 594964 633184 594903 DunRenderBenchmark/Square_Transparent_FullyLit_cv -0.0006 -0.0006 0 0 0 0 DunRenderBenchmark/Square_Transparent_FullyDark_pvalue 0.0211 0.0211 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Transparent_FullyDark_mean -0.3763 -0.3762 1772392 1105501 1772116 1105383 DunRenderBenchmark/Square_Transparent_FullyDark_median -0.3766 -0.3766 1772282 1104753 1772089 1104637 DunRenderBenchmark/Square_Transparent_FullyDark_stddev -0.4402 -0.4402 631014 353260 630951 353231 DunRenderBenchmark/Square_Transparent_FullyDark_cv -0.1025 -0.1025 0 0 0 0 DunRenderBenchmark/Square_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/Square_Transparent_PartiallyLit_mean -0.0272 -0.0271 1970857 1917285 1970523 1917053 DunRenderBenchmark/Square_Transparent_PartiallyLit_median -0.0268 -0.0267 1970258 1917452 1969801 1917178 DunRenderBenchmark/Square_Transparent_PartiallyLit_stddev -0.0294 -0.0293 703415 682714 703280 682642 DunRenderBenchmark/Square_Transparent_PartiallyLit_cv -0.0023 -0.0023 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_pvalue 0.0004 0.0004 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_mean -0.5359 -0.5359 59253 27500 59242 27497 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_median -0.5336 -0.5336 58982 27509 58975 27506 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_stddev -0.5374 -0.5373 17150 7933 17144 7932 DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_cv -0.0034 -0.0032 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_mean +0.7013 +0.7013 56973 96926 56963 96913 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_median +0.6907 +0.6908 57326 96921 57317 96909 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_stddev +0.8879 +0.8880 16361 30888 16358 30885 DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_cv +0.1097 +0.1098 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_mean -0.3988 -0.3988 511666 307603 511589 307567 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_median -0.3993 -0.3993 511913 307523 511851 307491 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_stddev -0.3971 -0.3971 162799 98150 162774 98139 DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_cv +0.0028 +0.0029 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_pvalue 0.6232 0.6232 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_mean -0.0820 -0.0820 514358 472170 514290 472112 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_median -0.0821 -0.0821 514335 472090 514264 472018 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_stddev -0.0828 -0.0828 163969 150389 163949 150372 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_cv -0.0009 -0.0009 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_pvalue 0.0058 0.0058 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_mean -0.4180 -0.4180 512755 298434 512692 298398 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_median -0.4187 -0.4187 513116 298290 513049 298258 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_stddev -0.4181 -0.4181 163460 95123 163439 95112 DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_cv -0.0001 -0.0001 0 0 0 0 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_mean +0.0149 +0.0149 544744 552873 544676 552807 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_median +0.0167 +0.0167 544823 553899 544761 553834 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_stddev +0.0110 +0.0110 173683 175592 173672 175580 DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_cv -0.0039 -0.0039 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_pvalue 0.0010 0.0010 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_mean -0.4654 -0.4654 51804 27693 51798 27690 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_median -0.4648 -0.4648 51787 27718 51779 27713 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_stddev -0.4702 -0.4702 14885 7886 14884 7885 DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_cv -0.0090 -0.0090 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_mean +1.3529 +1.3529 43904 103301 43898 103287 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_median +1.3371 +1.3372 44199 103297 44192 103285 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_stddev +1.4168 +1.4167 12350 29848 12349 29843 DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_cv +0.0272 +0.0271 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_mean -0.3977 -0.3977 493458 297225 493385 297188 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_median -0.3973 -0.3973 493440 297377 493364 297342 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_stddev -0.3965 -0.3965 157323 94947 157300 94934 DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_cv +0.0020 +0.0020 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_mean -0.0668 -0.0668 501015 467552 500947 467496 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_median -0.0661 -0.0661 500672 467557 500607 467504 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_stddev -0.0641 -0.0642 159233 149019 159215 148998 DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_cv +0.0028 +0.0028 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_mean -0.4295 -0.4295 494445 282076 494382 282043 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_median -0.4300 -0.4300 494497 281875 494434 281841 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_stddev -0.4287 -0.4287 157383 89911 157370 89902 DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_cv +0.0014 +0.0014 0 0 0 0 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_mean -0.0574 -0.0573 556031 524113 555924 524044 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_median -0.0758 -0.0757 565940 523059 565802 522984 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_stddev -0.1094 -0.1093 186798 166360 186758 166346 DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_cv -0.0552 -0.0551 0 0 0 0 OVERALL_GEOMEAN -0.1671 -0.1671 0 0 0 0 ```
2 years ago
RenderTriangleUpper<Light, Transparent>(dst, dstPitch, src, tbl);
}
template <LightType Light, bool Transparent>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleClipVertical(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
{
const DiamondClipY clipY = CalculateDiamondClipY(clip);
RenderRightTriangleLowerClipVertical<Light, Transparent>(clipY, dst, dstPitch, src, tbl);
src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom);
const auto upperMax = TriangleUpperHeight - clipY.upperTop;
for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch) {
const auto width = Width - XStep * i;
RenderLineTransparentOrOpaque<Light, Transparent>(dst, src, width, tbl);
src += width;
}
}
template <LightType Light, bool Transparent>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleClipLeftAndVertical(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
{
const DiamondClipY clipY = CalculateDiamondClipY(clip);
const int_fast16_t clipLeft = clip.left;
RenderRightTriangleLowerClipLeftAndVertical<Light, Transparent>(clipLeft, clipY, dst, dstPitch, src, tbl);
src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom);
const auto upperMax = TriangleUpperHeight - clipY.upperTop;
for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch) {
const auto width = Width - XStep * i;
if (width <= clipLeft)
break;
RenderLineTransparentOrOpaque<Light, Transparent>(dst, src + clipLeft, width - clipLeft, tbl);
src += width;
}
}
template <LightType Light, bool Transparent>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleClipRightAndVertical(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
{
const DiamondClipY clipY = CalculateDiamondClipY(clip);
const int_fast16_t clipRight = clip.right;
RenderRightTriangleLowerClipRightAndVertical<Light, Transparent>(clipRight, clipY, dst, dstPitch, src, tbl);
src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom);
const auto upperMax = TriangleUpperHeight - clipY.upperTop;
for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch) {
const auto width = Width - XStep * i;
const auto skip = Width - width < clipRight ? clipRight - (Width - width) : 0;
RenderLineTransparentOrOpaque<Light, Transparent>(dst, src, width > skip ? width - skip : 0, tbl);
src += width;
}
}
template <LightType Light, bool Transparent>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangle(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
{
if (clip.width == Width) {
if (clip.height == TriangleHeight) {
RenderRightTriangleFull<Light, Transparent>(dst, dstPitch, src, tbl);
} else {
RenderRightTriangleClipVertical<Light, Transparent>(dst, dstPitch, src, tbl, clip);
}
} else if (clip.right == 0) {
RenderRightTriangleClipLeftAndVertical<Light, Transparent>(dst, dstPitch, src, tbl, clip);
} else {
RenderRightTriangleClipRightAndVertical<Light, Transparent>(dst, dstPitch, src, tbl, clip);
}
}
template <LightType Light, MaskType Mask>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTrapezoidUpperHalf(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl)
{
if constexpr (Mask == MaskType::Left || Mask == MaskType::Right) {
// The first line is always fully opaque.
// We handle it specially to avoid calling the blitter with width=0.
const uint8_t *srcEnd = src + Width * TrapezoidUpperHeight;
RenderLineOpaque<Light>(dst, src, Width, tbl);
src += Width;
dst -= dstPitch;
uint8_t prefixWidth = (PrefixIncrement<Mask> < 0 ? 32 : 0) + PrefixIncrement<Mask>;
do {
RenderLineTransparentAndOpaque<Light, /*OpaqueFirst=*/Mask == MaskType::Right>(dst, src, prefixWidth, Width, tbl);
prefixWidth += PrefixIncrement<Mask>;
src += Width;
dst -= dstPitch;
} while (src != srcEnd);
} else { // Mask == MaskType::Solid || Mask == MaskType::Transparent
const uint8_t *srcEnd = src + Width * TrapezoidUpperHeight;
do {
RenderLineTransparentOrOpaque<Light, /*Transparent=*/Mask == MaskType::Transparent>(dst, src, Width, tbl);
src += Width;
dst -= dstPitch;
} while (src != srcEnd);
}
}
template <LightType Light, MaskType Mask>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTrapezoidUpperHalfClipVertical(const Clip &clip, const DiamondClipY &clipY, uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl)
{
const auto upperMax = TrapezoidUpperHeight - clipY.upperTop;
int8_t prefix = InitPrefix<Mask>(clip.bottom);
for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch) {
RenderLine<Light, Mask>(dst, src, Width, tbl, prefix);
src += Width;
prefix += PrefixIncrement<Mask>;
}
}
template <LightType Light, MaskType Mask>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTrapezoidUpperHalfClipLeftAndVertical(const Clip &clip, const DiamondClipY &clipY, uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl)
{
const auto upperMax = TrapezoidUpperHeight - clipY.upperTop;
int8_t prefix = InitPrefix<Mask>(clip.bottom);
for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch) {
RenderLine<Light, Mask>(dst, src, clip.width, tbl, prefix - clip.left);
src += Width;
prefix += PrefixIncrement<Mask>;
}
}
template <LightType Light, MaskType Mask>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTrapezoidUpperHalfClipRightAndVertical(const Clip &clip, const DiamondClipY &clipY, uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl)
{
const auto upperMax = TrapezoidUpperHeight - clipY.upperTop;
int8_t prefix = InitPrefix<Mask>(clip.bottom);
for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch) {
RenderLine<Light, Mask>(dst, src, clip.width, tbl, prefix);
src += Width;
prefix += PrefixIncrement<Mask>;
}
}
template <LightType Light, MaskType Mask>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidFull(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl)
{
RenderLeftTriangleLower<Light, /*Transparent=*/Mask == MaskType::Transparent>(dst, dstPitch, src, tbl);
dst += XStep;
RenderTrapezoidUpperHalf<Light, Mask>(dst, dstPitch, src, tbl);
}
template <LightType Light, MaskType Mask>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidClipVertical(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
{
const DiamondClipY clipY = CalculateDiamondClipY<TrapezoidUpperHeight>(clip);
RenderLeftTriangleLowerClipVertical<Light, /*Transparent=*/Mask == MaskType::Transparent>(clipY, dst, dstPitch, src, tbl);
src += clipY.upperBottom * Width;
dst += XStep;
RenderTrapezoidUpperHalfClipVertical<Light, Mask>(clip, clipY, dst, dstPitch, src, tbl);
}
template <LightType Light, MaskType Mask>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidClipLeftAndVertical(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
{
const DiamondClipY clipY = CalculateDiamondClipY<TrapezoidUpperHeight>(clip);
RenderLeftTriangleLowerClipLeftAndVertical<Light, /*Transparent=*/Mask == MaskType::Transparent>(clip.left, clipY, dst, dstPitch, src, tbl);
src += clipY.upperBottom * Width + clip.left;
dst += XStep + clip.left;
RenderTrapezoidUpperHalfClipLeftAndVertical<Light, Mask>(clip, clipY, dst, dstPitch, src, tbl);
}
template <LightType Light, MaskType Mask>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidClipRightAndVertical(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
{
const DiamondClipY clipY = CalculateDiamondClipY<TrapezoidUpperHeight>(clip);
RenderLeftTriangleLowerClipRightAndVertical<Light, /*Transparent=*/Mask == MaskType::Transparent>(clip.right, clipY, dst, dstPitch, src, tbl);
src += clipY.upperBottom * Width;
dst += XStep;
RenderTrapezoidUpperHalfClipRightAndVertical<Light, Mask>(clip, clipY, dst, dstPitch, src, tbl);
}
template <LightType Light, MaskType Mask>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTrapezoid(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
{
if (clip.width == Width) {
if (clip.height == Height) {
RenderLeftTrapezoidFull<Light, Mask>(dst, dstPitch, src, tbl);
} else {
RenderLeftTrapezoidClipVertical<Light, Mask>(dst, dstPitch, src, tbl, clip);
}
} else if (clip.right == 0) {
RenderLeftTrapezoidClipLeftAndVertical<Light, Mask>(dst, dstPitch, src, tbl, clip);
} else {
RenderLeftTrapezoidClipRightAndVertical<Light, Mask>(dst, dstPitch, src, tbl, clip);
}
}
template <LightType Light, MaskType Mask>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTrapezoidFull(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl)
{
RenderRightTriangleLower<Light, /*Transparent=*/Mask == MaskType::Transparent>(dst, dstPitch, src, tbl);
RenderTrapezoidUpperHalf<Light, Mask>(dst, dstPitch, src, tbl);
}
template <LightType Light, MaskType Mask>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTrapezoidClipVertical(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
{
const DiamondClipY clipY = CalculateDiamondClipY<TrapezoidUpperHeight>(clip);
RenderRightTriangleLowerClipVertical<Light, /*Transparent=*/Mask == MaskType::Transparent>(clipY, dst, dstPitch, src, tbl);
src += clipY.upperBottom * Width;
RenderTrapezoidUpperHalfClipVertical<Light, Mask>(clip, clipY, dst, dstPitch, src, tbl);
}
template <LightType Light, MaskType Mask>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTrapezoidClipLeftAndVertical(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
{
const DiamondClipY clipY = CalculateDiamondClipY<TrapezoidUpperHeight>(clip);
RenderRightTriangleLowerClipLeftAndVertical<Light, /*Transparent=*/Mask == MaskType::Transparent>(clip.left, clipY, dst, dstPitch, src, tbl);
src += clipY.upperBottom * Width + clip.left;
RenderTrapezoidUpperHalfClipLeftAndVertical<Light, Mask>(clip, clipY, dst, dstPitch, src, tbl);
}
template <LightType Light, MaskType Mask>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTrapezoidClipRightAndVertical(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
{
const DiamondClipY clipY = CalculateDiamondClipY<TrapezoidUpperHeight>(clip);
RenderRightTriangleLowerClipRightAndVertical<Light, /*Transparent=*/Mask == MaskType::Transparent>(clip.right, clipY, dst, dstPitch, src, tbl);
src += clipY.upperBottom * Width;
RenderTrapezoidUpperHalfClipRightAndVertical<Light, Mask>(clip, clipY, dst, dstPitch, src, tbl);
}
template <LightType Light, MaskType Mask>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTrapezoid(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
{
if (clip.width == Width) {
if (clip.height == Height) {
RenderRightTrapezoidFull<Light, Mask>(dst, dstPitch, src, tbl);
} else {
RenderRightTrapezoidClipVertical<Light, Mask>(dst, dstPitch, src, tbl, clip);
}
} else if (clip.right == 0) {
RenderRightTrapezoidClipLeftAndVertical<Light, Mask>(dst, dstPitch, src, tbl, clip);
} else {
RenderRightTrapezoidClipRightAndVertical<Light, Mask>(dst, dstPitch, src, tbl, clip);
}
}
template <LightType Light, bool Transparent>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTileType(TileType tile, uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
{
switch (tile) {
case TileType::Square:
RenderSquare<Light, Transparent>(dst, dstPitch, src, tbl, clip);
break;
case TileType::TransparentSquare:
RenderTransparentSquare<Light, Transparent ? MaskType::Transparent : MaskType::Solid>(dst, dstPitch, src, tbl, clip);
break;
case TileType::LeftTriangle:
RenderLeftTriangle<Light, Transparent>(dst, dstPitch, src, tbl, clip);
break;
case TileType::RightTriangle:
RenderRightTriangle<Light, Transparent>(dst, dstPitch, src, tbl, clip);
break;
case TileType::LeftTrapezoid:
RenderLeftTrapezoid<Light, Transparent ? MaskType::Transparent : MaskType::Solid>(dst, dstPitch, src, tbl, clip);
break;
case TileType::RightTrapezoid:
RenderRightTrapezoid<Light, Transparent ? MaskType::Transparent : MaskType::Solid>(dst, dstPitch, src, tbl, clip);
break;
}
}
template <LightType Light, MaskType Mask>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidOrTransparentSquare(TileType tile, uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
{
switch (tile) {
case TileType::TransparentSquare:
RenderTransparentSquare<Light, Mask>(dst, dstPitch, src, tbl, clip);
break;
case TileType::LeftTrapezoid:
RenderLeftTrapezoid<Light, Mask>(dst, dstPitch, src, tbl, clip);
break;
default:
app_fatal("Given mask can only be applied to TransparentSquare or LeftTrapezoid tiles");
}
}
template <LightType Light, MaskType Mask>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTrapezoidOrTransparentSquare(TileType tile, uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
{
switch (tile) {
case TileType::TransparentSquare:
RenderTransparentSquare<Light, Mask>(dst, dstPitch, src, tbl, clip);
break;
case TileType::RightTrapezoid:
RenderRightTrapezoid<Light, Mask>(dst, dstPitch, src, tbl, clip);
break;
default:
app_fatal("Given mask can only be applied to TransparentSquare or LeftTrapezoid tiles");
}
}
template <MaskType Mask>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidOrTransparentSquareDispatch(TileType tile, uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
{
if (IsFullyDark(tbl)) {
RenderLeftTrapezoidOrTransparentSquare<LightType::FullyDark, Mask>(tile, dst, dstPitch, src, tbl, clip);
} else if (IsFullyLit(tbl)) {
RenderLeftTrapezoidOrTransparentSquare<LightType::FullyLit, Mask>(tile, dst, dstPitch, src, tbl, clip);
} else {
RenderLeftTrapezoidOrTransparentSquare<LightType::PartiallyLit, Mask>(tile, dst, dstPitch, src, tbl, clip);
}
}
template <MaskType Mask>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTrapezoidOrTransparentSquareDispatch(TileType tile, uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
{
if (IsFullyDark(tbl)) {
RenderRightTrapezoidOrTransparentSquare<LightType::FullyDark, Mask>(tile, dst, dstPitch, src, tbl, clip);
} else if (IsFullyLit(tbl)) {
RenderRightTrapezoidOrTransparentSquare<LightType::FullyLit, Mask>(tile, dst, dstPitch, src, tbl, clip);
} else {
RenderRightTrapezoidOrTransparentSquare<LightType::PartiallyLit, Mask>(tile, dst, dstPitch, src, tbl, clip);
}
}
template <bool Transparent>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTileDispatch(TileType tile, uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
{
if (IsFullyDark(tbl)) {
RenderTileType<LightType::FullyDark, Transparent>(tile, dst, dstPitch, src, tbl, clip);
} else if (IsFullyLit(tbl)) {
RenderTileType<LightType::FullyLit, Transparent>(tile, dst, dstPitch, src, tbl, clip);
} else {
RenderTileType<LightType::PartiallyLit, Transparent>(tile, dst, dstPitch, src, tbl, clip);
}
}
} // namespace
#ifdef DUN_RENDER_STATS
ankerl::unordered_dense::map<DunRenderType, size_t, DunRenderTypeHash> DunRenderStats;
std::string_view TileTypeToString(TileType tileType)
{
// clang-format off
switch (tileType) {
case TileType::Square: return "Square";
case TileType::TransparentSquare: return "TransparentSquare";
case TileType::LeftTriangle: return "LeftTriangle";
case TileType::RightTriangle: return "RightTriangle";
case TileType::LeftTrapezoid: return "LeftTrapezoid";
case TileType::RightTrapezoid: return "RightTrapezoid";
default: return "???";
}
// clang-format on
}
std::string_view MaskTypeToString(MaskType maskType)
{
// clang-format off
switch (maskType) {
case MaskType::Solid: return "Solid";
case MaskType::Transparent: return "Transparent";
case MaskType::Right: return "Right";
case MaskType::Left: return "Left";
case MaskType::RightFoliage: return "RightFoliage";
case MaskType::LeftFoliage: return "LeftFoliage";
default: return "???";
}
// clang-format on
}
#endif
DVL_ATTRIBUTE_HOT void RenderTileFrame(const Surface &out, const Point &position, TileType tile, const uint8_t *src, int_fast16_t height,
MaskType maskType, const uint8_t *tbl)
{
#ifdef DEBUG_RENDER_OFFSET_X
position.x += DEBUG_RENDER_OFFSET_X;
#endif
#ifdef DEBUG_RENDER_OFFSET_Y
position.y += DEBUG_RENDER_OFFSET_Y;
#endif
const Clip clip = CalculateClip(position.x, position.y, DunFrameWidth, height, out);
if (clip.width <= 0 || clip.height <= 0) return;
uint8_t *dst = out.at(static_cast<int>(position.x + clip.left), static_cast<int>(position.y - clip.bottom));
const uint16_t dstPitch = out.pitch();
#ifdef DUN_RENDER_STATS
++DunRenderStats[DunRenderType { tile, maskType }];
#endif
switch (maskType) {
case MaskType::Solid:
RenderTileDispatch</*Transparent=*/false>(tile, dst, dstPitch, src, tbl, clip);
break;
case MaskType::Transparent:
RenderTileDispatch</*Transparent=*/true>(tile, dst, dstPitch, src, tbl, clip);
break;
case MaskType::Left:
RenderLeftTrapezoidOrTransparentSquareDispatch<MaskType::Left>(tile, dst, dstPitch, src, tbl, clip);
break;
case MaskType::Right:
RenderRightTrapezoidOrTransparentSquareDispatch<MaskType::Right>(tile, dst, dstPitch, src, tbl, clip);
break;
}
#ifdef DEBUG_STR
const auto [debugStr, flags] = GetTileDebugStr(tile);
DrawString(out, debugStr, Rectangle { Point { position.x + 2, position.y - 29 }, Size { 28, 28 } }, { .flags = flags });
#endif
}
void world_draw_black_tile(const Surface &out, int sx, int sy)
{
#ifdef DEBUG_RENDER_OFFSET_X
sx += DEBUG_RENDER_OFFSET_X;
#endif
#ifdef DEBUG_RENDER_OFFSET_Y
sy += DEBUG_RENDER_OFFSET_Y;
#endif
const Clip clipLeft = CalculateClip(sx, sy, Width, TriangleHeight, out);
if (clipLeft.height <= 0) return;
Clip clipRight;
clipRight.top = clipLeft.top;
clipRight.bottom = clipLeft.bottom;
clipRight.left = (sx + Width) < 0 ? -(sx + Width) : 0;
clipRight.right = sx + Width + Width > out.w() ? sx + Width + Width - out.w() : 0;
clipRight.width = Width - clipRight.left - clipRight.right;
clipRight.height = clipLeft.height;
const uint16_t dstPitch = out.pitch();
if (clipLeft.width > 0) {
uint8_t *dst = out.at(static_cast<int>(sx + clipLeft.left), static_cast<int>(sy - clipLeft.bottom));
RenderLeftTriangle<LightType::FullyDark, /*Transparent=*/false>(dst, dstPitch, nullptr, nullptr, clipLeft);
}
if (clipRight.width > 0) {
uint8_t *dst = out.at(static_cast<int>(sx + Width + clipRight.left), static_cast<int>(sy - clipRight.bottom));
RenderRightTriangle<LightType::FullyDark, /*Transparent=*/false>(dst, dstPitch, nullptr, nullptr, clipRight);
}
}
} // namespace devilution