|
|
|
|
/**
|
|
|
|
|
* @file dun_render.cpp
|
|
|
|
|
*
|
|
|
|
|
* Implementation of functionality for rendering the level tiles.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
// Debugging variables
|
|
|
|
|
// #define DEBUG_STR
|
|
|
|
|
// #define DEBUG_RENDER_COLOR
|
|
|
|
|
// #define DEBUG_RENDER_OFFSET_X 5
|
|
|
|
|
// #define DEBUG_RENDER_OFFSET_Y 5
|
|
|
|
|
|
|
|
|
|
#include "engine/render/dun_render.hpp"
|
|
|
|
|
|
|
|
|
|
#include <SDL_endian.h>
|
|
|
|
|
|
|
|
|
|
#include <climits>
|
dun_render: Unroll triangle loops
Rather than relying on the compiler to do it, which doesn't always
happen, we do it by hand.
Previously, very slightly different versions of the code could result in
those loops not being unrolled (such as in the current master).
I've run the benchmark like this:
```bash
BASELINE=dun-benchmark
BENCHMARK=dun_render_benchmark
git checkout "$BASELINE"
tools/build_and_run_benchmark.py -B "build-reld-${BASELINE}" --no-run "$BENCHMARK"
git checkout -
tools/build_and_run_benchmark.py --no-run "$BENCHMARK"
tools/linux_reduced_cpu_variance_run.sh ~/google-benchmark/tools/compare.py -a benchmarks \
"build-reld-${BASELINE}/${BENCHMARK}" "build-reld/${BENCHMARK}" \
--benchmark_repetitions=10
```
Benchmark results are below.
The `FullyLit` calls are ~55% faster.
The `PartiallyLit` calls are ~40% faster.
The `FullyDark` version is twice as slow, which is surprising.
I have a separate idea about eliminating most of the `FullyDark` calls
entirely.
```
Benchmark Time CPU Time Old Time New CPU Old CPU New
-------------------------------------------------------------------------------------------------------------------------------------------------------------------
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_pvalue 0.0004 0.0004 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_mean -0.5634 -0.5634 188036 82095 188014 82080
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_median -0.5601 -0.5601 186905 82218 186884 82202
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_stddev -0.6274 -0.6274 61414 22883 61402 22879
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_cv -0.1465 -0.1465 0 0 0 0
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_pvalue 0.0013 0.0013 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_mean +0.9781 +0.9781 238021 470823 237988 470757
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_median +0.9733 +0.9733 238086 469812 238050 469752
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_stddev +0.9791 +0.9790 74987 148403 74978 148383
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_cv +0.0005 +0.0005 0 0 0 0
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_pvalue 0.0113 0.0113 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_mean -0.3891 -0.3891 984208 601272 984080 601180
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_median -0.3931 -0.3931 980791 595272 980682 595188
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_stddev -0.3950 -0.3950 316631 191562 316575 191533
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_cv -0.0097 -0.0096 0 0 0 0
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_pvalue 0.6776 0.6776 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_mean -0.0802 -0.0802 995379 915591 995236 915450
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_median -0.0816 -0.0817 994452 913256 994299 913112
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_stddev -0.0916 -0.0915 319114 289898 319065 289864
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_cv -0.0124 -0.0123 0 0 0 0
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_mean -0.3875 -0.3875 1001347 613375 1001222 613282
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_median -0.3830 -0.3830 998801 616295 998628 616194
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_stddev -0.3901 -0.3901 320998 195778 320963 195747
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_cv -0.0043 -0.0043 0 0 0 0
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_mean +0.0046 +0.0046 1014824 1019541 1014707 1019386
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_median +0.0032 +0.0032 1015895 1019161 1015801 1019001
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_stddev +0.0037 +0.0036 322403 323590 322370 323538
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_cv -0.0010 -0.0010 0 0 0 0
DunRenderBenchmark/RightTriangle_Solid_FullyLit_pvalue 0.0006 0.0006 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Solid_FullyLit_mean -0.5523 -0.5524 176362 78951 176340 78938
DunRenderBenchmark/RightTriangle_Solid_FullyLit_median -0.5514 -0.5514 175837 78875 175814 78865
DunRenderBenchmark/RightTriangle_Solid_FullyLit_stddev -0.5884 -0.5884 55614 22891 55607 22888
DunRenderBenchmark/RightTriangle_Solid_FullyLit_cv -0.0805 -0.0805 0 0 0 0
DunRenderBenchmark/RightTriangle_Solid_FullyDark_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Solid_FullyDark_mean +1.9896 +1.9896 161841 483845 161819 483767
DunRenderBenchmark/RightTriangle_Solid_FullyDark_median +1.9924 +1.9924 161742 483991 161715 483917
DunRenderBenchmark/RightTriangle_Solid_FullyDark_stddev +2.0012 +2.0010 51356 154129 51351 154107
DunRenderBenchmark/RightTriangle_Solid_FullyDark_cv +0.0039 +0.0038 0 0 0 0
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_pvalue 0.0058 0.0058 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_mean -0.4247 -0.4247 1046216 601892 1046042 601809
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_median -0.4249 -0.4249 1053545 605870 1053349 605785
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_stddev -0.4174 -0.4174 330051 192291 330001 192261
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_cv +0.0127 +0.0127 0 0 0 0
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_pvalue 0.3847 0.3847 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_mean -0.1217 -0.1217 1050082 922274 1049928 922136
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_median -0.1226 -0.1226 1047556 919087 1047400 918953
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_stddev -0.0833 -0.0833 324195 297188 324156 297143
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_cv +0.0437 +0.0437 0 0 0 0
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_mean -0.4253 -0.4253 1047974 602234 1047816 602137
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_median -0.4267 -0.4267 1051985 603146 1051802 603042
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_stddev -0.4276 -0.4276 334355 191387 334304 191363
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_cv -0.0039 -0.0039 0 0 0 0
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_pvalue 0.7913 0.7913 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_mean -0.0137 -0.0138 1059054 1044492 1058910 1044325
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_median -0.0178 -0.0179 1058596 1039738 1058508 1039571
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_stddev +0.0031 +0.0031 337359 338411 337309 338362
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_cv +0.0171 +0.0171 0 0 0 0
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_pvalue 0.9097 0.9097 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_mean +0.0011 +0.0012 1493697 1495394 1493444 1495169
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_median -0.0017 -0.0017 1496838 1494238 1496604 1494029
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_stddev +0.0021 +0.0020 530878 531975 530805 531882
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_cv +0.0009 +0.0009 0 0 0 0
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_mean -0.0204 -0.0204 1433093 1403907 1432881 1403668
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_median -0.0183 -0.0183 1429205 1403083 1428989 1402880
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_stddev -0.0096 -0.0097 505699 500858 505631 500743
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_cv +0.0110 +0.0109 0 0 0 0
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_pvalue 0.0539 0.0539 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_mean -0.3176 -0.3176 2382675 1626018 2382333 1625760
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_median -0.3198 -0.3198 2403842 1635148 2403483 1634913
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_stddev -0.3148 -0.3149 827423 566929 827319 566830
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_cv +0.0040 +0.0040 0 0 0 0
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_pvalue 0.5708 0.5708 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_mean -0.0873 -0.0873 2175867 1985983 2175589 1985685
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_median -0.0631 -0.0631 2157339 2021156 2157036 2020874
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_stddev -0.1511 -0.1511 793554 673670 793451 673555
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_cv -0.0699 -0.0699 0 0 0 0
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_pvalue 0.0757 0.0757 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_mean -0.2913 -0.2913 2168904 1537149 2168665 1536932
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_median -0.2906 -0.2906 2169825 1539371 2169553 1539149
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_stddev -0.2952 -0.2952 773577 545208 773499 545150
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_cv -0.0055 -0.0055 0 0 0 0
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_mean -0.0228 -0.0227 2215243 2164785 2214801 2164528
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_median -0.0105 -0.0105 2206971 2183841 2206650 2183583
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_stddev -0.0493 -0.0492 794968 755773 794792 755680
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_cv -0.0271 -0.0271 0 0 0 0
DunRenderBenchmark/Square_Solid_FullyLit_pvalue 0.4274 0.4274 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Solid_FullyLit_mean -0.1018 -0.1018 106111 95306 106093 95293
DunRenderBenchmark/Square_Solid_FullyLit_median -0.1039 -0.1039 106890 95786 106879 95776
DunRenderBenchmark/Square_Solid_FullyLit_stddev -0.1415 -0.1414 31405 26959 31394 26956
DunRenderBenchmark/Square_Solid_FullyLit_cv -0.0442 -0.0440 0 0 0 0
DunRenderBenchmark/Square_Solid_FullyDark_pvalue 0.6232 0.6232 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Solid_FullyDark_mean -0.0624 -0.0623 76148 71400 76137 71390
DunRenderBenchmark/Square_Solid_FullyDark_median -0.0533 -0.0533 75419 71398 75409 71388
DunRenderBenchmark/Square_Solid_FullyDark_stddev -0.1158 -0.1158 23287 20590 23284 20587
DunRenderBenchmark/Square_Solid_FullyDark_cv -0.0570 -0.0570 0 0 0 0
DunRenderBenchmark/Square_Solid_PartiallyLit_pvalue 0.0140 0.0140 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Solid_PartiallyLit_mean -0.4191 -0.4191 1856509 1078451 1856141 1078317
DunRenderBenchmark/Square_Solid_PartiallyLit_median -0.4220 -0.4220 1865303 1078075 1864830 1077921
DunRenderBenchmark/Square_Solid_PartiallyLit_stddev -0.4194 -0.4193 661581 384134 661390 384090
DunRenderBenchmark/Square_Solid_PartiallyLit_cv -0.0005 -0.0004 0 0 0 0
DunRenderBenchmark/Square_Transparent_FullyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Transparent_FullyLit_mean -0.0599 -0.0599 1777629 1671062 1777373 1670858
DunRenderBenchmark/Square_Transparent_FullyLit_median -0.0599 -0.0599 1777353 1670891 1777080 1670709
DunRenderBenchmark/Square_Transparent_FullyLit_stddev -0.0605 -0.0605 633282 594964 633184 594903
DunRenderBenchmark/Square_Transparent_FullyLit_cv -0.0006 -0.0006 0 0 0 0
DunRenderBenchmark/Square_Transparent_FullyDark_pvalue 0.0211 0.0211 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Transparent_FullyDark_mean -0.3763 -0.3762 1772392 1105501 1772116 1105383
DunRenderBenchmark/Square_Transparent_FullyDark_median -0.3766 -0.3766 1772282 1104753 1772089 1104637
DunRenderBenchmark/Square_Transparent_FullyDark_stddev -0.4402 -0.4402 631014 353260 630951 353231
DunRenderBenchmark/Square_Transparent_FullyDark_cv -0.1025 -0.1025 0 0 0 0
DunRenderBenchmark/Square_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Transparent_PartiallyLit_mean -0.0272 -0.0271 1970857 1917285 1970523 1917053
DunRenderBenchmark/Square_Transparent_PartiallyLit_median -0.0268 -0.0267 1970258 1917452 1969801 1917178
DunRenderBenchmark/Square_Transparent_PartiallyLit_stddev -0.0294 -0.0293 703415 682714 703280 682642
DunRenderBenchmark/Square_Transparent_PartiallyLit_cv -0.0023 -0.0023 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_pvalue 0.0004 0.0004 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_mean -0.5359 -0.5359 59253 27500 59242 27497
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_median -0.5336 -0.5336 58982 27509 58975 27506
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_stddev -0.5374 -0.5373 17150 7933 17144 7932
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_cv -0.0034 -0.0032 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_mean +0.7013 +0.7013 56973 96926 56963 96913
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_median +0.6907 +0.6908 57326 96921 57317 96909
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_stddev +0.8879 +0.8880 16361 30888 16358 30885
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_cv +0.1097 +0.1098 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_mean -0.3988 -0.3988 511666 307603 511589 307567
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_median -0.3993 -0.3993 511913 307523 511851 307491
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_stddev -0.3971 -0.3971 162799 98150 162774 98139
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_cv +0.0028 +0.0029 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_pvalue 0.6232 0.6232 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_mean -0.0820 -0.0820 514358 472170 514290 472112
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_median -0.0821 -0.0821 514335 472090 514264 472018
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_stddev -0.0828 -0.0828 163969 150389 163949 150372
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_cv -0.0009 -0.0009 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_pvalue 0.0058 0.0058 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_mean -0.4180 -0.4180 512755 298434 512692 298398
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_median -0.4187 -0.4187 513116 298290 513049 298258
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_stddev -0.4181 -0.4181 163460 95123 163439 95112
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_cv -0.0001 -0.0001 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_mean +0.0149 +0.0149 544744 552873 544676 552807
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_median +0.0167 +0.0167 544823 553899 544761 553834
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_stddev +0.0110 +0.0110 173683 175592 173672 175580
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_cv -0.0039 -0.0039 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_pvalue 0.0010 0.0010 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_mean -0.4654 -0.4654 51804 27693 51798 27690
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_median -0.4648 -0.4648 51787 27718 51779 27713
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_stddev -0.4702 -0.4702 14885 7886 14884 7885
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_cv -0.0090 -0.0090 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_mean +1.3529 +1.3529 43904 103301 43898 103287
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_median +1.3371 +1.3372 44199 103297 44192 103285
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_stddev +1.4168 +1.4167 12350 29848 12349 29843
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_cv +0.0272 +0.0271 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_mean -0.3977 -0.3977 493458 297225 493385 297188
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_median -0.3973 -0.3973 493440 297377 493364 297342
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_stddev -0.3965 -0.3965 157323 94947 157300 94934
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_cv +0.0020 +0.0020 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_mean -0.0668 -0.0668 501015 467552 500947 467496
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_median -0.0661 -0.0661 500672 467557 500607 467504
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_stddev -0.0641 -0.0642 159233 149019 159215 148998
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_cv +0.0028 +0.0028 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_mean -0.4295 -0.4295 494445 282076 494382 282043
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_median -0.4300 -0.4300 494497 281875 494434 281841
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_stddev -0.4287 -0.4287 157383 89911 157370 89902
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_cv +0.0014 +0.0014 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_mean -0.0574 -0.0573 556031 524113 555924 524044
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_median -0.0758 -0.0757 565940 523059 565802 522984
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_stddev -0.1094 -0.1093 186798 166360 186758 166346
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_cv -0.0552 -0.0551 0 0 0 0
OVERALL_GEOMEAN -0.1671 -0.1671 0 0 0 0
```
2 years ago
|
|
|
#include <cstddef>
|
|
|
|
|
#include <cstdint>
|
|
|
|
|
|
|
|
|
|
#include "engine/render/blit_impl.hpp"
|
|
|
|
|
#include "levels/dun_tile.hpp"
|
|
|
|
|
#include "lighting.h"
|
|
|
|
|
#include "options.h"
|
|
|
|
|
#include "utils/attributes.h"
|
|
|
|
|
#ifdef DEBUG_STR
|
|
|
|
|
#include "engine/render/text_render.hpp"
|
|
|
|
|
#endif
|
|
|
|
|
#if defined(DEBUG_STR) || defined(DUN_RENDER_STATS)
|
|
|
|
|
#include "utils/str_cat.hpp"
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
namespace devilution {
|
|
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
|
|
/** Width of a tile rendering primitive. */
|
|
|
|
|
constexpr int_fast16_t Width = DunFrameWidth;
|
|
|
|
|
|
|
|
|
|
/** Height of a tile rendering primitive (except triangles). */
|
|
|
|
|
constexpr int_fast16_t Height = DunFrameHeight;
|
|
|
|
|
|
|
|
|
|
/** Height of the lower triangle of a triangular or a trapezoid tile. */
|
|
|
|
|
constexpr int_fast16_t LowerHeight = DunFrameHeight / 2;
|
|
|
|
|
|
|
|
|
|
/** Height of the upper triangle of a triangular tile. */
|
|
|
|
|
constexpr int_fast16_t TriangleUpperHeight = DunFrameHeight / 2 - 1;
|
|
|
|
|
|
|
|
|
|
/** Height of the upper rectangle of a trapezoid tile. */
|
|
|
|
|
constexpr int_fast16_t TrapezoidUpperHeight = DunFrameHeight / 2;
|
|
|
|
|
|
Simplify foliage rendering
During `ReencodeDungeonCels`, extracts floor tile foliage into
a triangle with the floor frame and a separate 16-px tall `TransparentSquare`.
This means that the floor frames are now always triangles and
the foliage can be rendered directly without masking.
Dungeon graphics sizes:
Map | Frames | Foliage frames | Byte size | Before PR | After PR
-----|--------|---------------:|-----------:|----------:|----------:
Town | 3,803 | 41 | 2,317,832 | 2,242,056 | 2,242,190
L1 | 1,119 | 11 | 738,836 | 721,604 | 721,110
L4 | 1,091 | 6 | 603,140 | 584,500 | 584,242
RG99 binary size reduced by ~4 KiB: 2,426,380 bytes -> 2,421,388 bytes
2 years ago
|
|
|
constexpr int_fast16_t TriangleHeight = DunFrameTriangleHeight;
|
|
|
|
|
|
|
|
|
|
/** For triangles, for each pixel drawn vertically, this many pixels are drawn horizontally. */
|
|
|
|
|
constexpr int_fast16_t XStep = 2;
|
|
|
|
|
|
|
|
|
|
#ifdef DEBUG_STR
|
|
|
|
|
std::pair<std::string_view, UiFlags> GetTileDebugStr(TileType tile)
|
|
|
|
|
{
|
|
|
|
|
// clang-format off
|
|
|
|
|
switch (tile) {
|
|
|
|
|
case TileType::Square: return {"S", UiFlags::AlignCenter | UiFlags::VerticalCenter};
|
|
|
|
|
case TileType::TransparentSquare: return {"T", UiFlags::AlignCenter | UiFlags::VerticalCenter};
|
|
|
|
|
case TileType::LeftTriangle: return {"<", UiFlags::AlignRight | UiFlags::VerticalCenter};
|
|
|
|
|
case TileType::RightTriangle: return {">", UiFlags::VerticalCenter};
|
|
|
|
|
case TileType::LeftTrapezoid: return {"\\", UiFlags::AlignCenter};
|
|
|
|
|
case TileType::RightTrapezoid: return {"/", UiFlags::AlignCenter};
|
|
|
|
|
default: return {"", {}};
|
|
|
|
|
}
|
|
|
|
|
// clang-format on
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#ifdef DEBUG_RENDER_COLOR
|
|
|
|
|
int DBGCOLOR = 0;
|
|
|
|
|
|
|
|
|
|
int GetTileDebugColor(TileType tile)
|
|
|
|
|
{
|
|
|
|
|
// clang-format off
|
|
|
|
|
switch (tile) {
|
|
|
|
|
case TileType::Square: return PAL16_YELLOW + 5;
|
|
|
|
|
case TileType::TransparentSquare: return PAL16_ORANGE + 5;
|
|
|
|
|
case TileType::LeftTriangle: return PAL16_GRAY + 5;
|
|
|
|
|
case TileType::RightTriangle: return PAL16_BEIGE;
|
|
|
|
|
case TileType::LeftTrapezoid: return PAL16_RED + 5;
|
|
|
|
|
case TileType::RightTrapezoid: return PAL16_BLUE + 5;
|
|
|
|
|
default: return 0;
|
|
|
|
|
}
|
|
|
|
|
// clang-format on
|
|
|
|
|
}
|
|
|
|
|
#endif // DEBUG_RENDER_COLOR
|
|
|
|
|
|
|
|
|
|
// How many pixels to increment the transparent (Left) or opaque (Right)
|
|
|
|
|
// prefix width after each line (drawing bottom-to-top).
|
|
|
|
|
template <MaskType Mask>
|
|
|
|
|
constexpr int8_t PrefixIncrement = 0;
|
|
|
|
|
template <>
|
|
|
|
|
constexpr int8_t PrefixIncrement<MaskType::Left> = 2;
|
|
|
|
|
template <>
|
|
|
|
|
constexpr int8_t PrefixIncrement<MaskType::Right> = -2;
|
|
|
|
|
|
|
|
|
|
// Initial value for the prefix.
|
|
|
|
|
template <MaskType Mask>
|
|
|
|
|
int8_t InitialPrefix = PrefixIncrement<Mask> >= 0 ? -32 : 64;
|
|
|
|
|
|
|
|
|
|
// The initial value for the prefix at y-th line (counting from the bottom).
|
|
|
|
|
template <MaskType Mask>
|
|
|
|
|
DVL_ALWAYS_INLINE int8_t InitPrefix(int8_t y)
|
|
|
|
|
{
|
|
|
|
|
return InitialPrefix<Mask> + PrefixIncrement<Mask> * y;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
enum class LightType : uint8_t {
|
|
|
|
|
FullyDark,
|
|
|
|
|
PartiallyLit,
|
|
|
|
|
FullyLit,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
template <LightType Light>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineOpaque(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t n, const uint8_t *DVL_RESTRICT tbl);
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineOpaque<LightType::FullyDark>(uint8_t *DVL_RESTRICT dst, [[maybe_unused]] const uint8_t *DVL_RESTRICT src, uint_fast8_t n, [[maybe_unused]] const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
BlitFillDirect(dst, n, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineOpaque<LightType::FullyLit>(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t n, [[maybe_unused]] const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
#ifndef DEBUG_RENDER_COLOR
|
|
|
|
|
BlitPixelsDirect(dst, src, n);
|
|
|
|
|
#else
|
|
|
|
|
BlitFillDirect(dst, n, DBGCOLOR);
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineOpaque<LightType::PartiallyLit>(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t n, const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
#ifndef DEBUG_RENDER_COLOR
|
|
|
|
|
BlitPixelsWithMap(dst, src, n, tbl);
|
|
|
|
|
#else
|
|
|
|
|
BlitFillDirect(dst, n, tbl[DBGCOLOR]);
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifndef DEBUG_RENDER_COLOR
|
|
|
|
|
template <LightType Light>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineTransparent(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t n, const uint8_t *DVL_RESTRICT tbl);
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineTransparent<LightType::FullyDark>(uint8_t *DVL_RESTRICT dst, [[maybe_unused]] const uint8_t *DVL_RESTRICT src, uint_fast8_t n, [[maybe_unused]] const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
BlitFillBlended(dst, n, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineTransparent<LightType::FullyLit>(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t n, [[maybe_unused]] const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
BlitPixelsBlended(dst, src, n);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineTransparent<LightType::PartiallyLit>(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t n, const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
BlitPixelsBlendedWithMap(dst, src, n, tbl);
|
|
|
|
|
}
|
|
|
|
|
#else // DEBUG_RENDER_COLOR
|
|
|
|
|
template <LightType Light>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineTransparent(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t n, const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
for (size_t i = 0; i < n; i++) {
|
|
|
|
|
dst[i] = paletteTransparencyLookup[dst[i]][tbl[DBGCOLOR + 4]];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
template <LightType Light, bool Transparent>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineTransparentOrOpaque(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t width, const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
if constexpr (Transparent) {
|
|
|
|
|
RenderLineTransparent<Light>(dst, src, width, tbl);
|
|
|
|
|
} else {
|
|
|
|
|
RenderLineOpaque<Light>(dst, src, width, tbl);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, bool OpaqueFirst>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLineTransparentAndOpaque(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t prefixWidth, uint_fast8_t width, const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
if constexpr (OpaqueFirst) {
|
|
|
|
|
RenderLineOpaque<Light>(dst, src, prefixWidth, tbl);
|
|
|
|
|
RenderLineTransparent<Light>(dst + prefixWidth, src + prefixWidth, width - prefixWidth, tbl);
|
|
|
|
|
} else {
|
|
|
|
|
RenderLineTransparent<Light>(dst, src, prefixWidth, tbl);
|
|
|
|
|
RenderLineOpaque<Light>(dst + prefixWidth, src + prefixWidth, width - prefixWidth, tbl);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, MaskType Mask>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLine(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, uint_fast8_t n, const uint8_t *DVL_RESTRICT tbl, int8_t prefix)
|
|
|
|
|
{
|
|
|
|
|
if constexpr (Mask == MaskType::Solid || Mask == MaskType::Transparent) {
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, /*Transparent=*/Mask == MaskType::Transparent>(dst, src, n, tbl);
|
|
|
|
|
} else if (prefix >= static_cast<int8_t>(n)) {
|
|
|
|
|
// We std::clamp the prefix to (0, n] and avoid calling `RenderLineTransparent/Opaque` with width=0.
|
|
|
|
|
if constexpr (Mask == MaskType::Right) {
|
|
|
|
|
RenderLineOpaque<Light>(dst, src, n, tbl);
|
|
|
|
|
} else {
|
|
|
|
|
RenderLineTransparent<Light>(dst, src, n, tbl);
|
|
|
|
|
}
|
|
|
|
|
} else if (prefix <= 0) {
|
|
|
|
|
if constexpr (Mask == MaskType::Left) {
|
|
|
|
|
RenderLineOpaque<Light>(dst, src, n, tbl);
|
|
|
|
|
} else {
|
|
|
|
|
RenderLineTransparent<Light>(dst, src, n, tbl);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
RenderLineTransparentAndOpaque<Light, /*OpaqueFirst=*/Mask == MaskType::Right>(dst, src, prefix, n, tbl);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct Clip {
|
|
|
|
|
int_fast16_t top;
|
|
|
|
|
int_fast16_t bottom;
|
|
|
|
|
int_fast16_t left;
|
|
|
|
|
int_fast16_t right;
|
|
|
|
|
int_fast16_t width;
|
|
|
|
|
int_fast16_t height;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
DVL_ALWAYS_INLINE Clip CalculateClip(int_fast16_t x, int_fast16_t y, int_fast16_t w, int_fast16_t h, const Surface &out)
|
|
|
|
|
{
|
|
|
|
|
Clip clip;
|
|
|
|
|
clip.top = y + 1 < h ? h - (y + 1) : 0;
|
|
|
|
|
clip.bottom = y + 1 > out.h() ? (y + 1) - out.h() : 0;
|
|
|
|
|
clip.left = x < 0 ? -x : 0;
|
|
|
|
|
clip.right = x + w > out.w() ? x + w - out.w() : 0;
|
|
|
|
|
clip.width = w - clip.left - clip.right;
|
|
|
|
|
clip.height = h - clip.top - clip.bottom;
|
|
|
|
|
return clip;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DVL_ALWAYS_INLINE bool IsFullyDark(const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
return tbl == FullyDarkLightTable;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DVL_ALWAYS_INLINE bool IsFullyLit(const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
return tbl == FullyLitLightTable;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, bool Transparent>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderSquareFull(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
for (auto i = 0; i < Height; ++i, dst -= dstPitch) {
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst, src, Width, tbl);
|
|
|
|
|
src += Width;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, bool Transparent>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderSquareClipped(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
|
|
|
|
|
{
|
|
|
|
|
src += clip.bottom * Height + clip.left;
|
|
|
|
|
for (auto i = 0; i < clip.height; ++i, dst -= dstPitch) {
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst, src, clip.width, tbl);
|
|
|
|
|
src += Width;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, bool Transparent>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderSquare(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
|
|
|
|
|
{
|
|
|
|
|
if (clip.width == Width && clip.height == Height) {
|
|
|
|
|
RenderSquareFull<Light, Transparent>(dst, dstPitch, src, tbl);
|
|
|
|
|
} else {
|
|
|
|
|
RenderSquareClipped<Light, Transparent>(dst, dstPitch, src, tbl, clip);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, MaskType Mask>
|
Simplify foliage rendering
During `ReencodeDungeonCels`, extracts floor tile foliage into
a triangle with the floor frame and a separate 16-px tall `TransparentSquare`.
This means that the floor frames are now always triangles and
the foliage can be rendered directly without masking.
Dungeon graphics sizes:
Map | Frames | Foliage frames | Byte size | Before PR | After PR
-----|--------|---------------:|-----------:|----------:|----------:
Town | 3,803 | 41 | 2,317,832 | 2,242,056 | 2,242,190
L1 | 1,119 | 11 | 738,836 | 721,604 | 721,110
L4 | 1,091 | 6 | 603,140 | 584,500 | 584,242
RG99 binary size reduced by ~4 KiB: 2,426,380 bytes -> 2,421,388 bytes
2 years ago
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTransparentSquareFull(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, unsigned height)
|
|
|
|
|
{
|
|
|
|
|
int8_t prefix = InitialPrefix<Mask>;
|
Simplify foliage rendering
During `ReencodeDungeonCels`, extracts floor tile foliage into
a triangle with the floor frame and a separate 16-px tall `TransparentSquare`.
This means that the floor frames are now always triangles and
the foliage can be rendered directly without masking.
Dungeon graphics sizes:
Map | Frames | Foliage frames | Byte size | Before PR | After PR
-----|--------|---------------:|-----------:|----------:|----------:
Town | 3,803 | 41 | 2,317,832 | 2,242,056 | 2,242,190
L1 | 1,119 | 11 | 738,836 | 721,604 | 721,110
L4 | 1,091 | 6 | 603,140 | 584,500 | 584,242
RG99 binary size reduced by ~4 KiB: 2,426,380 bytes -> 2,421,388 bytes
2 years ago
|
|
|
DVL_ASSUME(height >= 16);
|
|
|
|
|
DVL_ASSUME(height <= 32);
|
|
|
|
|
for (unsigned i = 0; i < height; ++i, dst -= dstPitch + Width) {
|
|
|
|
|
uint_fast8_t drawWidth = Width;
|
|
|
|
|
while (drawWidth > 0) {
|
|
|
|
|
auto v = static_cast<int8_t>(*src++);
|
|
|
|
|
if (v > 0) {
|
|
|
|
|
RenderLine<Light, Mask>(dst, src, v, tbl, prefix - (Width - drawWidth));
|
|
|
|
|
src += v;
|
|
|
|
|
} else {
|
|
|
|
|
v = -v;
|
|
|
|
|
}
|
|
|
|
|
dst += v;
|
|
|
|
|
drawWidth -= v;
|
|
|
|
|
}
|
|
|
|
|
prefix += PrefixIncrement<Mask>;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, MaskType Mask>
|
|
|
|
|
// NOLINTNEXTLINE(readability-function-cognitive-complexity): Actually complex and has to be fast.
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTransparentSquareClipped(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
|
|
|
|
|
{
|
|
|
|
|
const auto skipRestOfTheLine = [&src](int_fast16_t remainingWidth) {
|
|
|
|
|
while (remainingWidth > 0) {
|
|
|
|
|
const auto v = static_cast<int8_t>(*src++);
|
|
|
|
|
if (v > 0) {
|
|
|
|
|
src += v;
|
|
|
|
|
remainingWidth -= v;
|
|
|
|
|
} else {
|
|
|
|
|
remainingWidth -= -v;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
assert(remainingWidth == 0);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Skip the bottom clipped lines.
|
|
|
|
|
for (auto i = 0; i < clip.bottom; ++i) {
|
|
|
|
|
skipRestOfTheLine(Width);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int8_t prefix = InitPrefix<Mask>(clip.bottom);
|
|
|
|
|
for (auto i = 0; i < clip.height; ++i, dst -= dstPitch + clip.width) {
|
|
|
|
|
auto drawWidth = clip.width;
|
|
|
|
|
|
|
|
|
|
// Skip initial src if clipping on the left.
|
|
|
|
|
// Handles overshoot, i.e. when the RLE segment goes into the unclipped area.
|
|
|
|
|
auto remainingLeftClip = clip.left;
|
|
|
|
|
while (remainingLeftClip > 0) {
|
|
|
|
|
auto v = static_cast<int8_t>(*src++);
|
|
|
|
|
if (v > 0) {
|
|
|
|
|
if (v > remainingLeftClip) {
|
|
|
|
|
const auto overshoot = v - remainingLeftClip;
|
|
|
|
|
RenderLine<Light, Mask>(dst, src + remainingLeftClip, overshoot, tbl, prefix - (Width - remainingLeftClip));
|
|
|
|
|
dst += overshoot;
|
|
|
|
|
drawWidth -= overshoot;
|
|
|
|
|
}
|
|
|
|
|
src += v;
|
|
|
|
|
} else {
|
|
|
|
|
v = -v;
|
|
|
|
|
if (v > remainingLeftClip) {
|
|
|
|
|
const auto overshoot = v - remainingLeftClip;
|
|
|
|
|
dst += overshoot;
|
|
|
|
|
drawWidth -= overshoot;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
remainingLeftClip -= v;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Draw the non-clipped segment
|
|
|
|
|
while (drawWidth > 0) {
|
|
|
|
|
auto v = static_cast<int8_t>(*src++);
|
|
|
|
|
if (v > 0) {
|
|
|
|
|
if (v > drawWidth) {
|
|
|
|
|
RenderLine<Light, Mask>(dst, src, drawWidth, tbl, prefix - (Width - drawWidth));
|
|
|
|
|
src += v;
|
|
|
|
|
dst += drawWidth;
|
|
|
|
|
drawWidth -= v;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
RenderLine<Light, Mask>(dst, src, v, tbl, prefix - (Width - drawWidth));
|
|
|
|
|
src += v;
|
|
|
|
|
} else {
|
|
|
|
|
v = -v;
|
|
|
|
|
if (v > drawWidth) {
|
|
|
|
|
dst += drawWidth;
|
|
|
|
|
drawWidth -= v;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
dst += v;
|
|
|
|
|
drawWidth -= v;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Skip the rest of src line if clipping on the right
|
|
|
|
|
assert(drawWidth <= 0);
|
|
|
|
|
skipRestOfTheLine(clip.right + drawWidth);
|
|
|
|
|
prefix += PrefixIncrement<Mask>;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, MaskType Mask>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTransparentSquare(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
|
|
|
|
|
{
|
Simplify foliage rendering
During `ReencodeDungeonCels`, extracts floor tile foliage into
a triangle with the floor frame and a separate 16-px tall `TransparentSquare`.
This means that the floor frames are now always triangles and
the foliage can be rendered directly without masking.
Dungeon graphics sizes:
Map | Frames | Foliage frames | Byte size | Before PR | After PR
-----|--------|---------------:|-----------:|----------:|----------:
Town | 3,803 | 41 | 2,317,832 | 2,242,056 | 2,242,190
L1 | 1,119 | 11 | 738,836 | 721,604 | 721,110
L4 | 1,091 | 6 | 603,140 | 584,500 | 584,242
RG99 binary size reduced by ~4 KiB: 2,426,380 bytes -> 2,421,388 bytes
2 years ago
|
|
|
if (clip.width == Width && clip.bottom == 0 && clip.top == 0) {
|
|
|
|
|
RenderTransparentSquareFull<Light, Mask>(dst, dstPitch, src, tbl, clip.height);
|
|
|
|
|
} else {
|
|
|
|
|
RenderTransparentSquareClipped<Light, Mask>(dst, dstPitch, src, tbl, clip);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** Vertical clip for the lower and upper triangles of a diamond tile (L/RTRIANGLE).*/
|
|
|
|
|
struct DiamondClipY {
|
|
|
|
|
int_fast16_t lowerBottom;
|
|
|
|
|
int_fast16_t lowerTop;
|
|
|
|
|
int_fast16_t upperBottom;
|
|
|
|
|
int_fast16_t upperTop;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
template <int_fast16_t UpperHeight = TriangleUpperHeight>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT DiamondClipY CalculateDiamondClipY(const Clip &clip)
|
|
|
|
|
{
|
|
|
|
|
DiamondClipY result;
|
|
|
|
|
if (clip.bottom > LowerHeight) {
|
|
|
|
|
result.lowerBottom = LowerHeight;
|
|
|
|
|
result.upperBottom = clip.bottom - LowerHeight;
|
|
|
|
|
result.lowerTop = result.upperTop = 0;
|
|
|
|
|
} else if (clip.top > UpperHeight) {
|
|
|
|
|
result.upperTop = UpperHeight;
|
|
|
|
|
result.lowerTop = clip.top - UpperHeight;
|
|
|
|
|
result.upperBottom = result.lowerBottom = 0;
|
|
|
|
|
} else {
|
|
|
|
|
result.upperTop = clip.top;
|
|
|
|
|
result.lowerBottom = clip.bottom;
|
|
|
|
|
result.lowerTop = result.upperBottom = 0;
|
|
|
|
|
}
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT std::size_t CalculateTriangleSourceSkipLowerBottom(int_fast16_t numLines)
|
|
|
|
|
{
|
|
|
|
|
return XStep * numLines * (numLines + 1) / 2;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT std::size_t CalculateTriangleSourceSkipUpperBottom(int_fast16_t numLines)
|
|
|
|
|
{
|
|
|
|
|
return 2 * TriangleUpperHeight * numLines - numLines * (numLines - 1);
|
|
|
|
|
}
|
|
|
|
|
|
dun_render: Unroll triangle loops
Rather than relying on the compiler to do it, which doesn't always
happen, we do it by hand.
Previously, very slightly different versions of the code could result in
those loops not being unrolled (such as in the current master).
I've run the benchmark like this:
```bash
BASELINE=dun-benchmark
BENCHMARK=dun_render_benchmark
git checkout "$BASELINE"
tools/build_and_run_benchmark.py -B "build-reld-${BASELINE}" --no-run "$BENCHMARK"
git checkout -
tools/build_and_run_benchmark.py --no-run "$BENCHMARK"
tools/linux_reduced_cpu_variance_run.sh ~/google-benchmark/tools/compare.py -a benchmarks \
"build-reld-${BASELINE}/${BENCHMARK}" "build-reld/${BENCHMARK}" \
--benchmark_repetitions=10
```
Benchmark results are below.
The `FullyLit` calls are ~55% faster.
The `PartiallyLit` calls are ~40% faster.
The `FullyDark` version is twice as slow, which is surprising.
I have a separate idea about eliminating most of the `FullyDark` calls
entirely.
```
Benchmark Time CPU Time Old Time New CPU Old CPU New
-------------------------------------------------------------------------------------------------------------------------------------------------------------------
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_pvalue 0.0004 0.0004 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_mean -0.5634 -0.5634 188036 82095 188014 82080
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_median -0.5601 -0.5601 186905 82218 186884 82202
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_stddev -0.6274 -0.6274 61414 22883 61402 22879
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_cv -0.1465 -0.1465 0 0 0 0
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_pvalue 0.0013 0.0013 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_mean +0.9781 +0.9781 238021 470823 237988 470757
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_median +0.9733 +0.9733 238086 469812 238050 469752
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_stddev +0.9791 +0.9790 74987 148403 74978 148383
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_cv +0.0005 +0.0005 0 0 0 0
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_pvalue 0.0113 0.0113 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_mean -0.3891 -0.3891 984208 601272 984080 601180
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_median -0.3931 -0.3931 980791 595272 980682 595188
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_stddev -0.3950 -0.3950 316631 191562 316575 191533
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_cv -0.0097 -0.0096 0 0 0 0
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_pvalue 0.6776 0.6776 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_mean -0.0802 -0.0802 995379 915591 995236 915450
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_median -0.0816 -0.0817 994452 913256 994299 913112
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_stddev -0.0916 -0.0915 319114 289898 319065 289864
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_cv -0.0124 -0.0123 0 0 0 0
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_mean -0.3875 -0.3875 1001347 613375 1001222 613282
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_median -0.3830 -0.3830 998801 616295 998628 616194
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_stddev -0.3901 -0.3901 320998 195778 320963 195747
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_cv -0.0043 -0.0043 0 0 0 0
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_mean +0.0046 +0.0046 1014824 1019541 1014707 1019386
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_median +0.0032 +0.0032 1015895 1019161 1015801 1019001
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_stddev +0.0037 +0.0036 322403 323590 322370 323538
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_cv -0.0010 -0.0010 0 0 0 0
DunRenderBenchmark/RightTriangle_Solid_FullyLit_pvalue 0.0006 0.0006 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Solid_FullyLit_mean -0.5523 -0.5524 176362 78951 176340 78938
DunRenderBenchmark/RightTriangle_Solid_FullyLit_median -0.5514 -0.5514 175837 78875 175814 78865
DunRenderBenchmark/RightTriangle_Solid_FullyLit_stddev -0.5884 -0.5884 55614 22891 55607 22888
DunRenderBenchmark/RightTriangle_Solid_FullyLit_cv -0.0805 -0.0805 0 0 0 0
DunRenderBenchmark/RightTriangle_Solid_FullyDark_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Solid_FullyDark_mean +1.9896 +1.9896 161841 483845 161819 483767
DunRenderBenchmark/RightTriangle_Solid_FullyDark_median +1.9924 +1.9924 161742 483991 161715 483917
DunRenderBenchmark/RightTriangle_Solid_FullyDark_stddev +2.0012 +2.0010 51356 154129 51351 154107
DunRenderBenchmark/RightTriangle_Solid_FullyDark_cv +0.0039 +0.0038 0 0 0 0
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_pvalue 0.0058 0.0058 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_mean -0.4247 -0.4247 1046216 601892 1046042 601809
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_median -0.4249 -0.4249 1053545 605870 1053349 605785
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_stddev -0.4174 -0.4174 330051 192291 330001 192261
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_cv +0.0127 +0.0127 0 0 0 0
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_pvalue 0.3847 0.3847 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_mean -0.1217 -0.1217 1050082 922274 1049928 922136
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_median -0.1226 -0.1226 1047556 919087 1047400 918953
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_stddev -0.0833 -0.0833 324195 297188 324156 297143
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_cv +0.0437 +0.0437 0 0 0 0
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_mean -0.4253 -0.4253 1047974 602234 1047816 602137
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_median -0.4267 -0.4267 1051985 603146 1051802 603042
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_stddev -0.4276 -0.4276 334355 191387 334304 191363
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_cv -0.0039 -0.0039 0 0 0 0
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_pvalue 0.7913 0.7913 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_mean -0.0137 -0.0138 1059054 1044492 1058910 1044325
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_median -0.0178 -0.0179 1058596 1039738 1058508 1039571
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_stddev +0.0031 +0.0031 337359 338411 337309 338362
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_cv +0.0171 +0.0171 0 0 0 0
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_pvalue 0.9097 0.9097 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_mean +0.0011 +0.0012 1493697 1495394 1493444 1495169
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_median -0.0017 -0.0017 1496838 1494238 1496604 1494029
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_stddev +0.0021 +0.0020 530878 531975 530805 531882
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_cv +0.0009 +0.0009 0 0 0 0
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_mean -0.0204 -0.0204 1433093 1403907 1432881 1403668
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_median -0.0183 -0.0183 1429205 1403083 1428989 1402880
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_stddev -0.0096 -0.0097 505699 500858 505631 500743
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_cv +0.0110 +0.0109 0 0 0 0
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_pvalue 0.0539 0.0539 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_mean -0.3176 -0.3176 2382675 1626018 2382333 1625760
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_median -0.3198 -0.3198 2403842 1635148 2403483 1634913
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_stddev -0.3148 -0.3149 827423 566929 827319 566830
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_cv +0.0040 +0.0040 0 0 0 0
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_pvalue 0.5708 0.5708 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_mean -0.0873 -0.0873 2175867 1985983 2175589 1985685
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_median -0.0631 -0.0631 2157339 2021156 2157036 2020874
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_stddev -0.1511 -0.1511 793554 673670 793451 673555
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_cv -0.0699 -0.0699 0 0 0 0
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_pvalue 0.0757 0.0757 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_mean -0.2913 -0.2913 2168904 1537149 2168665 1536932
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_median -0.2906 -0.2906 2169825 1539371 2169553 1539149
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_stddev -0.2952 -0.2952 773577 545208 773499 545150
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_cv -0.0055 -0.0055 0 0 0 0
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_mean -0.0228 -0.0227 2215243 2164785 2214801 2164528
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_median -0.0105 -0.0105 2206971 2183841 2206650 2183583
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_stddev -0.0493 -0.0492 794968 755773 794792 755680
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_cv -0.0271 -0.0271 0 0 0 0
DunRenderBenchmark/Square_Solid_FullyLit_pvalue 0.4274 0.4274 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Solid_FullyLit_mean -0.1018 -0.1018 106111 95306 106093 95293
DunRenderBenchmark/Square_Solid_FullyLit_median -0.1039 -0.1039 106890 95786 106879 95776
DunRenderBenchmark/Square_Solid_FullyLit_stddev -0.1415 -0.1414 31405 26959 31394 26956
DunRenderBenchmark/Square_Solid_FullyLit_cv -0.0442 -0.0440 0 0 0 0
DunRenderBenchmark/Square_Solid_FullyDark_pvalue 0.6232 0.6232 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Solid_FullyDark_mean -0.0624 -0.0623 76148 71400 76137 71390
DunRenderBenchmark/Square_Solid_FullyDark_median -0.0533 -0.0533 75419 71398 75409 71388
DunRenderBenchmark/Square_Solid_FullyDark_stddev -0.1158 -0.1158 23287 20590 23284 20587
DunRenderBenchmark/Square_Solid_FullyDark_cv -0.0570 -0.0570 0 0 0 0
DunRenderBenchmark/Square_Solid_PartiallyLit_pvalue 0.0140 0.0140 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Solid_PartiallyLit_mean -0.4191 -0.4191 1856509 1078451 1856141 1078317
DunRenderBenchmark/Square_Solid_PartiallyLit_median -0.4220 -0.4220 1865303 1078075 1864830 1077921
DunRenderBenchmark/Square_Solid_PartiallyLit_stddev -0.4194 -0.4193 661581 384134 661390 384090
DunRenderBenchmark/Square_Solid_PartiallyLit_cv -0.0005 -0.0004 0 0 0 0
DunRenderBenchmark/Square_Transparent_FullyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Transparent_FullyLit_mean -0.0599 -0.0599 1777629 1671062 1777373 1670858
DunRenderBenchmark/Square_Transparent_FullyLit_median -0.0599 -0.0599 1777353 1670891 1777080 1670709
DunRenderBenchmark/Square_Transparent_FullyLit_stddev -0.0605 -0.0605 633282 594964 633184 594903
DunRenderBenchmark/Square_Transparent_FullyLit_cv -0.0006 -0.0006 0 0 0 0
DunRenderBenchmark/Square_Transparent_FullyDark_pvalue 0.0211 0.0211 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Transparent_FullyDark_mean -0.3763 -0.3762 1772392 1105501 1772116 1105383
DunRenderBenchmark/Square_Transparent_FullyDark_median -0.3766 -0.3766 1772282 1104753 1772089 1104637
DunRenderBenchmark/Square_Transparent_FullyDark_stddev -0.4402 -0.4402 631014 353260 630951 353231
DunRenderBenchmark/Square_Transparent_FullyDark_cv -0.1025 -0.1025 0 0 0 0
DunRenderBenchmark/Square_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Transparent_PartiallyLit_mean -0.0272 -0.0271 1970857 1917285 1970523 1917053
DunRenderBenchmark/Square_Transparent_PartiallyLit_median -0.0268 -0.0267 1970258 1917452 1969801 1917178
DunRenderBenchmark/Square_Transparent_PartiallyLit_stddev -0.0294 -0.0293 703415 682714 703280 682642
DunRenderBenchmark/Square_Transparent_PartiallyLit_cv -0.0023 -0.0023 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_pvalue 0.0004 0.0004 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_mean -0.5359 -0.5359 59253 27500 59242 27497
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_median -0.5336 -0.5336 58982 27509 58975 27506
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_stddev -0.5374 -0.5373 17150 7933 17144 7932
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_cv -0.0034 -0.0032 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_mean +0.7013 +0.7013 56973 96926 56963 96913
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_median +0.6907 +0.6908 57326 96921 57317 96909
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_stddev +0.8879 +0.8880 16361 30888 16358 30885
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_cv +0.1097 +0.1098 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_mean -0.3988 -0.3988 511666 307603 511589 307567
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_median -0.3993 -0.3993 511913 307523 511851 307491
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_stddev -0.3971 -0.3971 162799 98150 162774 98139
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_cv +0.0028 +0.0029 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_pvalue 0.6232 0.6232 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_mean -0.0820 -0.0820 514358 472170 514290 472112
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_median -0.0821 -0.0821 514335 472090 514264 472018
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_stddev -0.0828 -0.0828 163969 150389 163949 150372
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_cv -0.0009 -0.0009 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_pvalue 0.0058 0.0058 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_mean -0.4180 -0.4180 512755 298434 512692 298398
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_median -0.4187 -0.4187 513116 298290 513049 298258
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_stddev -0.4181 -0.4181 163460 95123 163439 95112
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_cv -0.0001 -0.0001 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_mean +0.0149 +0.0149 544744 552873 544676 552807
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_median +0.0167 +0.0167 544823 553899 544761 553834
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_stddev +0.0110 +0.0110 173683 175592 173672 175580
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_cv -0.0039 -0.0039 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_pvalue 0.0010 0.0010 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_mean -0.4654 -0.4654 51804 27693 51798 27690
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_median -0.4648 -0.4648 51787 27718 51779 27713
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_stddev -0.4702 -0.4702 14885 7886 14884 7885
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_cv -0.0090 -0.0090 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_mean +1.3529 +1.3529 43904 103301 43898 103287
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_median +1.3371 +1.3372 44199 103297 44192 103285
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_stddev +1.4168 +1.4167 12350 29848 12349 29843
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_cv +0.0272 +0.0271 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_mean -0.3977 -0.3977 493458 297225 493385 297188
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_median -0.3973 -0.3973 493440 297377 493364 297342
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_stddev -0.3965 -0.3965 157323 94947 157300 94934
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_cv +0.0020 +0.0020 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_mean -0.0668 -0.0668 501015 467552 500947 467496
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_median -0.0661 -0.0661 500672 467557 500607 467504
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_stddev -0.0641 -0.0642 159233 149019 159215 148998
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_cv +0.0028 +0.0028 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_mean -0.4295 -0.4295 494445 282076 494382 282043
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_median -0.4300 -0.4300 494497 281875 494434 281841
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_stddev -0.4287 -0.4287 157383 89911 157370 89902
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_cv +0.0014 +0.0014 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_mean -0.0574 -0.0573 556031 524113 555924 524044
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_median -0.0758 -0.0757 565940 523059 565802 522984
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_stddev -0.1094 -0.1093 186798 166360 186758 166346
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_cv -0.0552 -0.0551 0 0 0 0
OVERALL_GEOMEAN -0.1671 -0.1671 0 0 0 0
```
2 years ago
|
|
|
template <LightType Light, bool Transparent>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTriangleLower(uint8_t *DVL_RESTRICT &dst, ptrdiff_t dstLineOffset, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 0 * dstLineOffset, src + 0, 2, tbl);
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 1 * dstLineOffset, src + 2, 4, tbl);
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 2 * dstLineOffset, src + 6, 6, tbl);
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 3 * dstLineOffset, src + 12, 8, tbl);
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 4 * dstLineOffset, src + 20, 10, tbl);
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 5 * dstLineOffset, src + 30, 12, tbl);
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 6 * dstLineOffset, src + 42, 14, tbl);
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 7 * dstLineOffset, src + 56, 16, tbl);
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 8 * dstLineOffset, src + 72, 18, tbl);
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 9 * dstLineOffset, src + 90, 20, tbl);
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 10 * dstLineOffset, src + 110, 22, tbl);
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 11 * dstLineOffset, src + 132, 24, tbl);
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 12 * dstLineOffset, src + 156, 26, tbl);
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 13 * dstLineOffset, src + 182, 28, tbl);
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 14 * dstLineOffset, src + 210, 30, tbl);
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 15 * dstLineOffset, src + 240, 32, tbl);
|
|
|
|
|
src += 272;
|
|
|
|
|
dst -= 16 * dstLineOffset;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTriangleLower<LightType::FullyDark, /*Transparent=*/false>(uint8_t *DVL_RESTRICT &dst, ptrdiff_t dstLineOffset, const uint8_t *DVL_RESTRICT &src, [[maybe_unused]] const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
unsigned width = XStep;
|
|
|
|
|
for (unsigned i = 0; i < LowerHeight; ++i) {
|
|
|
|
|
BlitFillDirect(dst, width, 0);
|
|
|
|
|
dst -= dstLineOffset;
|
|
|
|
|
width += XStep;
|
|
|
|
|
}
|
|
|
|
|
src += 272;
|
|
|
|
|
}
|
|
|
|
|
|
dun_render: Unroll triangle loops
Rather than relying on the compiler to do it, which doesn't always
happen, we do it by hand.
Previously, very slightly different versions of the code could result in
those loops not being unrolled (such as in the current master).
I've run the benchmark like this:
```bash
BASELINE=dun-benchmark
BENCHMARK=dun_render_benchmark
git checkout "$BASELINE"
tools/build_and_run_benchmark.py -B "build-reld-${BASELINE}" --no-run "$BENCHMARK"
git checkout -
tools/build_and_run_benchmark.py --no-run "$BENCHMARK"
tools/linux_reduced_cpu_variance_run.sh ~/google-benchmark/tools/compare.py -a benchmarks \
"build-reld-${BASELINE}/${BENCHMARK}" "build-reld/${BENCHMARK}" \
--benchmark_repetitions=10
```
Benchmark results are below.
The `FullyLit` calls are ~55% faster.
The `PartiallyLit` calls are ~40% faster.
The `FullyDark` version is twice as slow, which is surprising.
I have a separate idea about eliminating most of the `FullyDark` calls
entirely.
```
Benchmark Time CPU Time Old Time New CPU Old CPU New
-------------------------------------------------------------------------------------------------------------------------------------------------------------------
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_pvalue 0.0004 0.0004 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_mean -0.5634 -0.5634 188036 82095 188014 82080
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_median -0.5601 -0.5601 186905 82218 186884 82202
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_stddev -0.6274 -0.6274 61414 22883 61402 22879
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_cv -0.1465 -0.1465 0 0 0 0
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_pvalue 0.0013 0.0013 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_mean +0.9781 +0.9781 238021 470823 237988 470757
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_median +0.9733 +0.9733 238086 469812 238050 469752
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_stddev +0.9791 +0.9790 74987 148403 74978 148383
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_cv +0.0005 +0.0005 0 0 0 0
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_pvalue 0.0113 0.0113 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_mean -0.3891 -0.3891 984208 601272 984080 601180
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_median -0.3931 -0.3931 980791 595272 980682 595188
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_stddev -0.3950 -0.3950 316631 191562 316575 191533
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_cv -0.0097 -0.0096 0 0 0 0
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_pvalue 0.6776 0.6776 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_mean -0.0802 -0.0802 995379 915591 995236 915450
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_median -0.0816 -0.0817 994452 913256 994299 913112
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_stddev -0.0916 -0.0915 319114 289898 319065 289864
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_cv -0.0124 -0.0123 0 0 0 0
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_mean -0.3875 -0.3875 1001347 613375 1001222 613282
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_median -0.3830 -0.3830 998801 616295 998628 616194
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_stddev -0.3901 -0.3901 320998 195778 320963 195747
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_cv -0.0043 -0.0043 0 0 0 0
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_mean +0.0046 +0.0046 1014824 1019541 1014707 1019386
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_median +0.0032 +0.0032 1015895 1019161 1015801 1019001
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_stddev +0.0037 +0.0036 322403 323590 322370 323538
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_cv -0.0010 -0.0010 0 0 0 0
DunRenderBenchmark/RightTriangle_Solid_FullyLit_pvalue 0.0006 0.0006 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Solid_FullyLit_mean -0.5523 -0.5524 176362 78951 176340 78938
DunRenderBenchmark/RightTriangle_Solid_FullyLit_median -0.5514 -0.5514 175837 78875 175814 78865
DunRenderBenchmark/RightTriangle_Solid_FullyLit_stddev -0.5884 -0.5884 55614 22891 55607 22888
DunRenderBenchmark/RightTriangle_Solid_FullyLit_cv -0.0805 -0.0805 0 0 0 0
DunRenderBenchmark/RightTriangle_Solid_FullyDark_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Solid_FullyDark_mean +1.9896 +1.9896 161841 483845 161819 483767
DunRenderBenchmark/RightTriangle_Solid_FullyDark_median +1.9924 +1.9924 161742 483991 161715 483917
DunRenderBenchmark/RightTriangle_Solid_FullyDark_stddev +2.0012 +2.0010 51356 154129 51351 154107
DunRenderBenchmark/RightTriangle_Solid_FullyDark_cv +0.0039 +0.0038 0 0 0 0
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_pvalue 0.0058 0.0058 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_mean -0.4247 -0.4247 1046216 601892 1046042 601809
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_median -0.4249 -0.4249 1053545 605870 1053349 605785
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_stddev -0.4174 -0.4174 330051 192291 330001 192261
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_cv +0.0127 +0.0127 0 0 0 0
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_pvalue 0.3847 0.3847 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_mean -0.1217 -0.1217 1050082 922274 1049928 922136
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_median -0.1226 -0.1226 1047556 919087 1047400 918953
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_stddev -0.0833 -0.0833 324195 297188 324156 297143
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_cv +0.0437 +0.0437 0 0 0 0
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_mean -0.4253 -0.4253 1047974 602234 1047816 602137
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_median -0.4267 -0.4267 1051985 603146 1051802 603042
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_stddev -0.4276 -0.4276 334355 191387 334304 191363
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_cv -0.0039 -0.0039 0 0 0 0
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_pvalue 0.7913 0.7913 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_mean -0.0137 -0.0138 1059054 1044492 1058910 1044325
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_median -0.0178 -0.0179 1058596 1039738 1058508 1039571
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_stddev +0.0031 +0.0031 337359 338411 337309 338362
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_cv +0.0171 +0.0171 0 0 0 0
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_pvalue 0.9097 0.9097 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_mean +0.0011 +0.0012 1493697 1495394 1493444 1495169
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_median -0.0017 -0.0017 1496838 1494238 1496604 1494029
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_stddev +0.0021 +0.0020 530878 531975 530805 531882
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_cv +0.0009 +0.0009 0 0 0 0
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_mean -0.0204 -0.0204 1433093 1403907 1432881 1403668
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_median -0.0183 -0.0183 1429205 1403083 1428989 1402880
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_stddev -0.0096 -0.0097 505699 500858 505631 500743
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_cv +0.0110 +0.0109 0 0 0 0
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_pvalue 0.0539 0.0539 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_mean -0.3176 -0.3176 2382675 1626018 2382333 1625760
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_median -0.3198 -0.3198 2403842 1635148 2403483 1634913
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_stddev -0.3148 -0.3149 827423 566929 827319 566830
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_cv +0.0040 +0.0040 0 0 0 0
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_pvalue 0.5708 0.5708 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_mean -0.0873 -0.0873 2175867 1985983 2175589 1985685
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_median -0.0631 -0.0631 2157339 2021156 2157036 2020874
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_stddev -0.1511 -0.1511 793554 673670 793451 673555
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_cv -0.0699 -0.0699 0 0 0 0
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_pvalue 0.0757 0.0757 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_mean -0.2913 -0.2913 2168904 1537149 2168665 1536932
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_median -0.2906 -0.2906 2169825 1539371 2169553 1539149
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_stddev -0.2952 -0.2952 773577 545208 773499 545150
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_cv -0.0055 -0.0055 0 0 0 0
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_mean -0.0228 -0.0227 2215243 2164785 2214801 2164528
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_median -0.0105 -0.0105 2206971 2183841 2206650 2183583
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_stddev -0.0493 -0.0492 794968 755773 794792 755680
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_cv -0.0271 -0.0271 0 0 0 0
DunRenderBenchmark/Square_Solid_FullyLit_pvalue 0.4274 0.4274 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Solid_FullyLit_mean -0.1018 -0.1018 106111 95306 106093 95293
DunRenderBenchmark/Square_Solid_FullyLit_median -0.1039 -0.1039 106890 95786 106879 95776
DunRenderBenchmark/Square_Solid_FullyLit_stddev -0.1415 -0.1414 31405 26959 31394 26956
DunRenderBenchmark/Square_Solid_FullyLit_cv -0.0442 -0.0440 0 0 0 0
DunRenderBenchmark/Square_Solid_FullyDark_pvalue 0.6232 0.6232 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Solid_FullyDark_mean -0.0624 -0.0623 76148 71400 76137 71390
DunRenderBenchmark/Square_Solid_FullyDark_median -0.0533 -0.0533 75419 71398 75409 71388
DunRenderBenchmark/Square_Solid_FullyDark_stddev -0.1158 -0.1158 23287 20590 23284 20587
DunRenderBenchmark/Square_Solid_FullyDark_cv -0.0570 -0.0570 0 0 0 0
DunRenderBenchmark/Square_Solid_PartiallyLit_pvalue 0.0140 0.0140 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Solid_PartiallyLit_mean -0.4191 -0.4191 1856509 1078451 1856141 1078317
DunRenderBenchmark/Square_Solid_PartiallyLit_median -0.4220 -0.4220 1865303 1078075 1864830 1077921
DunRenderBenchmark/Square_Solid_PartiallyLit_stddev -0.4194 -0.4193 661581 384134 661390 384090
DunRenderBenchmark/Square_Solid_PartiallyLit_cv -0.0005 -0.0004 0 0 0 0
DunRenderBenchmark/Square_Transparent_FullyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Transparent_FullyLit_mean -0.0599 -0.0599 1777629 1671062 1777373 1670858
DunRenderBenchmark/Square_Transparent_FullyLit_median -0.0599 -0.0599 1777353 1670891 1777080 1670709
DunRenderBenchmark/Square_Transparent_FullyLit_stddev -0.0605 -0.0605 633282 594964 633184 594903
DunRenderBenchmark/Square_Transparent_FullyLit_cv -0.0006 -0.0006 0 0 0 0
DunRenderBenchmark/Square_Transparent_FullyDark_pvalue 0.0211 0.0211 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Transparent_FullyDark_mean -0.3763 -0.3762 1772392 1105501 1772116 1105383
DunRenderBenchmark/Square_Transparent_FullyDark_median -0.3766 -0.3766 1772282 1104753 1772089 1104637
DunRenderBenchmark/Square_Transparent_FullyDark_stddev -0.4402 -0.4402 631014 353260 630951 353231
DunRenderBenchmark/Square_Transparent_FullyDark_cv -0.1025 -0.1025 0 0 0 0
DunRenderBenchmark/Square_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Transparent_PartiallyLit_mean -0.0272 -0.0271 1970857 1917285 1970523 1917053
DunRenderBenchmark/Square_Transparent_PartiallyLit_median -0.0268 -0.0267 1970258 1917452 1969801 1917178
DunRenderBenchmark/Square_Transparent_PartiallyLit_stddev -0.0294 -0.0293 703415 682714 703280 682642
DunRenderBenchmark/Square_Transparent_PartiallyLit_cv -0.0023 -0.0023 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_pvalue 0.0004 0.0004 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_mean -0.5359 -0.5359 59253 27500 59242 27497
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_median -0.5336 -0.5336 58982 27509 58975 27506
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_stddev -0.5374 -0.5373 17150 7933 17144 7932
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_cv -0.0034 -0.0032 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_mean +0.7013 +0.7013 56973 96926 56963 96913
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_median +0.6907 +0.6908 57326 96921 57317 96909
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_stddev +0.8879 +0.8880 16361 30888 16358 30885
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_cv +0.1097 +0.1098 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_mean -0.3988 -0.3988 511666 307603 511589 307567
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_median -0.3993 -0.3993 511913 307523 511851 307491
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_stddev -0.3971 -0.3971 162799 98150 162774 98139
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_cv +0.0028 +0.0029 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_pvalue 0.6232 0.6232 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_mean -0.0820 -0.0820 514358 472170 514290 472112
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_median -0.0821 -0.0821 514335 472090 514264 472018
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_stddev -0.0828 -0.0828 163969 150389 163949 150372
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_cv -0.0009 -0.0009 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_pvalue 0.0058 0.0058 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_mean -0.4180 -0.4180 512755 298434 512692 298398
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_median -0.4187 -0.4187 513116 298290 513049 298258
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_stddev -0.4181 -0.4181 163460 95123 163439 95112
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_cv -0.0001 -0.0001 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_mean +0.0149 +0.0149 544744 552873 544676 552807
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_median +0.0167 +0.0167 544823 553899 544761 553834
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_stddev +0.0110 +0.0110 173683 175592 173672 175580
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_cv -0.0039 -0.0039 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_pvalue 0.0010 0.0010 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_mean -0.4654 -0.4654 51804 27693 51798 27690
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_median -0.4648 -0.4648 51787 27718 51779 27713
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_stddev -0.4702 -0.4702 14885 7886 14884 7885
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_cv -0.0090 -0.0090 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_mean +1.3529 +1.3529 43904 103301 43898 103287
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_median +1.3371 +1.3372 44199 103297 44192 103285
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_stddev +1.4168 +1.4167 12350 29848 12349 29843
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_cv +0.0272 +0.0271 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_mean -0.3977 -0.3977 493458 297225 493385 297188
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_median -0.3973 -0.3973 493440 297377 493364 297342
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_stddev -0.3965 -0.3965 157323 94947 157300 94934
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_cv +0.0020 +0.0020 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_mean -0.0668 -0.0668 501015 467552 500947 467496
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_median -0.0661 -0.0661 500672 467557 500607 467504
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_stddev -0.0641 -0.0642 159233 149019 159215 148998
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_cv +0.0028 +0.0028 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_mean -0.4295 -0.4295 494445 282076 494382 282043
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_median -0.4300 -0.4300 494497 281875 494434 281841
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_stddev -0.4287 -0.4287 157383 89911 157370 89902
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_cv +0.0014 +0.0014 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_mean -0.0574 -0.0573 556031 524113 555924 524044
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_median -0.0758 -0.0757 565940 523059 565802 522984
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_stddev -0.1094 -0.1093 186798 166360 186758 166346
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_cv -0.0552 -0.0551 0 0 0 0
OVERALL_GEOMEAN -0.1671 -0.1671 0 0 0 0
```
2 years ago
|
|
|
template <LightType Light, bool Transparent>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTriangleUpper(uint8_t *DVL_RESTRICT dst, ptrdiff_t dstLineOffset, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 0 * dstLineOffset, src + 0, 30, tbl);
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 1 * dstLineOffset, src + 30, 28, tbl);
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 2 * dstLineOffset, src + 58, 26, tbl);
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 3 * dstLineOffset, src + 84, 24, tbl);
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 4 * dstLineOffset, src + 108, 22, tbl);
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 5 * dstLineOffset, src + 130, 20, tbl);
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 6 * dstLineOffset, src + 150, 18, tbl);
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 7 * dstLineOffset, src + 168, 16, tbl);
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 8 * dstLineOffset, src + 184, 14, tbl);
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 9 * dstLineOffset, src + 198, 12, tbl);
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 10 * dstLineOffset, src + 210, 10, tbl);
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 11 * dstLineOffset, src + 220, 8, tbl);
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 12 * dstLineOffset, src + 228, 6, tbl);
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 13 * dstLineOffset, src + 234, 4, tbl);
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst - 14 * dstLineOffset, src + 238, 2, tbl);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTriangleUpper<LightType::FullyDark, /*Transparent=*/false>(uint8_t *DVL_RESTRICT dst, ptrdiff_t dstLineOffset, [[maybe_unused]] const uint8_t *DVL_RESTRICT src, [[maybe_unused]] const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
unsigned width = Width - XStep;
|
|
|
|
|
for (unsigned i = 0; i < TriangleUpperHeight; ++i) {
|
|
|
|
|
BlitFillDirect(dst, width, 0);
|
|
|
|
|
dst -= dstLineOffset;
|
|
|
|
|
width -= XStep;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, bool Transparent>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleLower(uint8_t *DVL_RESTRICT &dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
dst += XStep * (LowerHeight - 1);
|
dun_render: Unroll triangle loops
Rather than relying on the compiler to do it, which doesn't always
happen, we do it by hand.
Previously, very slightly different versions of the code could result in
those loops not being unrolled (such as in the current master).
I've run the benchmark like this:
```bash
BASELINE=dun-benchmark
BENCHMARK=dun_render_benchmark
git checkout "$BASELINE"
tools/build_and_run_benchmark.py -B "build-reld-${BASELINE}" --no-run "$BENCHMARK"
git checkout -
tools/build_and_run_benchmark.py --no-run "$BENCHMARK"
tools/linux_reduced_cpu_variance_run.sh ~/google-benchmark/tools/compare.py -a benchmarks \
"build-reld-${BASELINE}/${BENCHMARK}" "build-reld/${BENCHMARK}" \
--benchmark_repetitions=10
```
Benchmark results are below.
The `FullyLit` calls are ~55% faster.
The `PartiallyLit` calls are ~40% faster.
The `FullyDark` version is twice as slow, which is surprising.
I have a separate idea about eliminating most of the `FullyDark` calls
entirely.
```
Benchmark Time CPU Time Old Time New CPU Old CPU New
-------------------------------------------------------------------------------------------------------------------------------------------------------------------
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_pvalue 0.0004 0.0004 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_mean -0.5634 -0.5634 188036 82095 188014 82080
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_median -0.5601 -0.5601 186905 82218 186884 82202
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_stddev -0.6274 -0.6274 61414 22883 61402 22879
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_cv -0.1465 -0.1465 0 0 0 0
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_pvalue 0.0013 0.0013 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_mean +0.9781 +0.9781 238021 470823 237988 470757
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_median +0.9733 +0.9733 238086 469812 238050 469752
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_stddev +0.9791 +0.9790 74987 148403 74978 148383
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_cv +0.0005 +0.0005 0 0 0 0
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_pvalue 0.0113 0.0113 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_mean -0.3891 -0.3891 984208 601272 984080 601180
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_median -0.3931 -0.3931 980791 595272 980682 595188
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_stddev -0.3950 -0.3950 316631 191562 316575 191533
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_cv -0.0097 -0.0096 0 0 0 0
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_pvalue 0.6776 0.6776 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_mean -0.0802 -0.0802 995379 915591 995236 915450
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_median -0.0816 -0.0817 994452 913256 994299 913112
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_stddev -0.0916 -0.0915 319114 289898 319065 289864
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_cv -0.0124 -0.0123 0 0 0 0
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_mean -0.3875 -0.3875 1001347 613375 1001222 613282
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_median -0.3830 -0.3830 998801 616295 998628 616194
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_stddev -0.3901 -0.3901 320998 195778 320963 195747
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_cv -0.0043 -0.0043 0 0 0 0
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_mean +0.0046 +0.0046 1014824 1019541 1014707 1019386
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_median +0.0032 +0.0032 1015895 1019161 1015801 1019001
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_stddev +0.0037 +0.0036 322403 323590 322370 323538
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_cv -0.0010 -0.0010 0 0 0 0
DunRenderBenchmark/RightTriangle_Solid_FullyLit_pvalue 0.0006 0.0006 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Solid_FullyLit_mean -0.5523 -0.5524 176362 78951 176340 78938
DunRenderBenchmark/RightTriangle_Solid_FullyLit_median -0.5514 -0.5514 175837 78875 175814 78865
DunRenderBenchmark/RightTriangle_Solid_FullyLit_stddev -0.5884 -0.5884 55614 22891 55607 22888
DunRenderBenchmark/RightTriangle_Solid_FullyLit_cv -0.0805 -0.0805 0 0 0 0
DunRenderBenchmark/RightTriangle_Solid_FullyDark_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Solid_FullyDark_mean +1.9896 +1.9896 161841 483845 161819 483767
DunRenderBenchmark/RightTriangle_Solid_FullyDark_median +1.9924 +1.9924 161742 483991 161715 483917
DunRenderBenchmark/RightTriangle_Solid_FullyDark_stddev +2.0012 +2.0010 51356 154129 51351 154107
DunRenderBenchmark/RightTriangle_Solid_FullyDark_cv +0.0039 +0.0038 0 0 0 0
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_pvalue 0.0058 0.0058 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_mean -0.4247 -0.4247 1046216 601892 1046042 601809
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_median -0.4249 -0.4249 1053545 605870 1053349 605785
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_stddev -0.4174 -0.4174 330051 192291 330001 192261
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_cv +0.0127 +0.0127 0 0 0 0
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_pvalue 0.3847 0.3847 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_mean -0.1217 -0.1217 1050082 922274 1049928 922136
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_median -0.1226 -0.1226 1047556 919087 1047400 918953
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_stddev -0.0833 -0.0833 324195 297188 324156 297143
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_cv +0.0437 +0.0437 0 0 0 0
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_mean -0.4253 -0.4253 1047974 602234 1047816 602137
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_median -0.4267 -0.4267 1051985 603146 1051802 603042
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_stddev -0.4276 -0.4276 334355 191387 334304 191363
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_cv -0.0039 -0.0039 0 0 0 0
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_pvalue 0.7913 0.7913 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_mean -0.0137 -0.0138 1059054 1044492 1058910 1044325
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_median -0.0178 -0.0179 1058596 1039738 1058508 1039571
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_stddev +0.0031 +0.0031 337359 338411 337309 338362
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_cv +0.0171 +0.0171 0 0 0 0
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_pvalue 0.9097 0.9097 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_mean +0.0011 +0.0012 1493697 1495394 1493444 1495169
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_median -0.0017 -0.0017 1496838 1494238 1496604 1494029
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_stddev +0.0021 +0.0020 530878 531975 530805 531882
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_cv +0.0009 +0.0009 0 0 0 0
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_mean -0.0204 -0.0204 1433093 1403907 1432881 1403668
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_median -0.0183 -0.0183 1429205 1403083 1428989 1402880
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_stddev -0.0096 -0.0097 505699 500858 505631 500743
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_cv +0.0110 +0.0109 0 0 0 0
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_pvalue 0.0539 0.0539 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_mean -0.3176 -0.3176 2382675 1626018 2382333 1625760
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_median -0.3198 -0.3198 2403842 1635148 2403483 1634913
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_stddev -0.3148 -0.3149 827423 566929 827319 566830
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_cv +0.0040 +0.0040 0 0 0 0
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_pvalue 0.5708 0.5708 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_mean -0.0873 -0.0873 2175867 1985983 2175589 1985685
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_median -0.0631 -0.0631 2157339 2021156 2157036 2020874
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_stddev -0.1511 -0.1511 793554 673670 793451 673555
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_cv -0.0699 -0.0699 0 0 0 0
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_pvalue 0.0757 0.0757 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_mean -0.2913 -0.2913 2168904 1537149 2168665 1536932
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_median -0.2906 -0.2906 2169825 1539371 2169553 1539149
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_stddev -0.2952 -0.2952 773577 545208 773499 545150
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_cv -0.0055 -0.0055 0 0 0 0
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_mean -0.0228 -0.0227 2215243 2164785 2214801 2164528
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_median -0.0105 -0.0105 2206971 2183841 2206650 2183583
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_stddev -0.0493 -0.0492 794968 755773 794792 755680
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_cv -0.0271 -0.0271 0 0 0 0
DunRenderBenchmark/Square_Solid_FullyLit_pvalue 0.4274 0.4274 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Solid_FullyLit_mean -0.1018 -0.1018 106111 95306 106093 95293
DunRenderBenchmark/Square_Solid_FullyLit_median -0.1039 -0.1039 106890 95786 106879 95776
DunRenderBenchmark/Square_Solid_FullyLit_stddev -0.1415 -0.1414 31405 26959 31394 26956
DunRenderBenchmark/Square_Solid_FullyLit_cv -0.0442 -0.0440 0 0 0 0
DunRenderBenchmark/Square_Solid_FullyDark_pvalue 0.6232 0.6232 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Solid_FullyDark_mean -0.0624 -0.0623 76148 71400 76137 71390
DunRenderBenchmark/Square_Solid_FullyDark_median -0.0533 -0.0533 75419 71398 75409 71388
DunRenderBenchmark/Square_Solid_FullyDark_stddev -0.1158 -0.1158 23287 20590 23284 20587
DunRenderBenchmark/Square_Solid_FullyDark_cv -0.0570 -0.0570 0 0 0 0
DunRenderBenchmark/Square_Solid_PartiallyLit_pvalue 0.0140 0.0140 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Solid_PartiallyLit_mean -0.4191 -0.4191 1856509 1078451 1856141 1078317
DunRenderBenchmark/Square_Solid_PartiallyLit_median -0.4220 -0.4220 1865303 1078075 1864830 1077921
DunRenderBenchmark/Square_Solid_PartiallyLit_stddev -0.4194 -0.4193 661581 384134 661390 384090
DunRenderBenchmark/Square_Solid_PartiallyLit_cv -0.0005 -0.0004 0 0 0 0
DunRenderBenchmark/Square_Transparent_FullyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Transparent_FullyLit_mean -0.0599 -0.0599 1777629 1671062 1777373 1670858
DunRenderBenchmark/Square_Transparent_FullyLit_median -0.0599 -0.0599 1777353 1670891 1777080 1670709
DunRenderBenchmark/Square_Transparent_FullyLit_stddev -0.0605 -0.0605 633282 594964 633184 594903
DunRenderBenchmark/Square_Transparent_FullyLit_cv -0.0006 -0.0006 0 0 0 0
DunRenderBenchmark/Square_Transparent_FullyDark_pvalue 0.0211 0.0211 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Transparent_FullyDark_mean -0.3763 -0.3762 1772392 1105501 1772116 1105383
DunRenderBenchmark/Square_Transparent_FullyDark_median -0.3766 -0.3766 1772282 1104753 1772089 1104637
DunRenderBenchmark/Square_Transparent_FullyDark_stddev -0.4402 -0.4402 631014 353260 630951 353231
DunRenderBenchmark/Square_Transparent_FullyDark_cv -0.1025 -0.1025 0 0 0 0
DunRenderBenchmark/Square_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Transparent_PartiallyLit_mean -0.0272 -0.0271 1970857 1917285 1970523 1917053
DunRenderBenchmark/Square_Transparent_PartiallyLit_median -0.0268 -0.0267 1970258 1917452 1969801 1917178
DunRenderBenchmark/Square_Transparent_PartiallyLit_stddev -0.0294 -0.0293 703415 682714 703280 682642
DunRenderBenchmark/Square_Transparent_PartiallyLit_cv -0.0023 -0.0023 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_pvalue 0.0004 0.0004 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_mean -0.5359 -0.5359 59253 27500 59242 27497
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_median -0.5336 -0.5336 58982 27509 58975 27506
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_stddev -0.5374 -0.5373 17150 7933 17144 7932
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_cv -0.0034 -0.0032 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_mean +0.7013 +0.7013 56973 96926 56963 96913
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_median +0.6907 +0.6908 57326 96921 57317 96909
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_stddev +0.8879 +0.8880 16361 30888 16358 30885
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_cv +0.1097 +0.1098 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_mean -0.3988 -0.3988 511666 307603 511589 307567
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_median -0.3993 -0.3993 511913 307523 511851 307491
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_stddev -0.3971 -0.3971 162799 98150 162774 98139
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_cv +0.0028 +0.0029 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_pvalue 0.6232 0.6232 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_mean -0.0820 -0.0820 514358 472170 514290 472112
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_median -0.0821 -0.0821 514335 472090 514264 472018
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_stddev -0.0828 -0.0828 163969 150389 163949 150372
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_cv -0.0009 -0.0009 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_pvalue 0.0058 0.0058 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_mean -0.4180 -0.4180 512755 298434 512692 298398
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_median -0.4187 -0.4187 513116 298290 513049 298258
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_stddev -0.4181 -0.4181 163460 95123 163439 95112
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_cv -0.0001 -0.0001 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_mean +0.0149 +0.0149 544744 552873 544676 552807
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_median +0.0167 +0.0167 544823 553899 544761 553834
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_stddev +0.0110 +0.0110 173683 175592 173672 175580
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_cv -0.0039 -0.0039 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_pvalue 0.0010 0.0010 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_mean -0.4654 -0.4654 51804 27693 51798 27690
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_median -0.4648 -0.4648 51787 27718 51779 27713
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_stddev -0.4702 -0.4702 14885 7886 14884 7885
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_cv -0.0090 -0.0090 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_mean +1.3529 +1.3529 43904 103301 43898 103287
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_median +1.3371 +1.3372 44199 103297 44192 103285
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_stddev +1.4168 +1.4167 12350 29848 12349 29843
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_cv +0.0272 +0.0271 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_mean -0.3977 -0.3977 493458 297225 493385 297188
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_median -0.3973 -0.3973 493440 297377 493364 297342
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_stddev -0.3965 -0.3965 157323 94947 157300 94934
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_cv +0.0020 +0.0020 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_mean -0.0668 -0.0668 501015 467552 500947 467496
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_median -0.0661 -0.0661 500672 467557 500607 467504
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_stddev -0.0641 -0.0642 159233 149019 159215 148998
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_cv +0.0028 +0.0028 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_mean -0.4295 -0.4295 494445 282076 494382 282043
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_median -0.4300 -0.4300 494497 281875 494434 281841
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_stddev -0.4287 -0.4287 157383 89911 157370 89902
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_cv +0.0014 +0.0014 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_mean -0.0574 -0.0573 556031 524113 555924 524044
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_median -0.0758 -0.0757 565940 523059 565802 522984
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_stddev -0.1094 -0.1093 186798 166360 186758 166346
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_cv -0.0552 -0.0551 0 0 0 0
OVERALL_GEOMEAN -0.1671 -0.1671 0 0 0 0
```
2 years ago
|
|
|
RenderTriangleLower<Light, Transparent>(dst, dstPitch + XStep, src, tbl);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, bool Transparent>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleLowerClipVertical(const DiamondClipY &clipY, uint8_t *DVL_RESTRICT &dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom);
|
|
|
|
|
dst += XStep * (LowerHeight - clipY.lowerBottom - 1);
|
|
|
|
|
const auto lowerMax = LowerHeight - clipY.lowerTop;
|
|
|
|
|
for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch + XStep) {
|
|
|
|
|
const auto width = XStep * i;
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst, src, width, tbl);
|
|
|
|
|
src += width;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, bool Transparent>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleLowerClipLeftAndVertical(int_fast16_t clipLeft, const DiamondClipY &clipY, uint8_t *DVL_RESTRICT &dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom);
|
|
|
|
|
dst += XStep * (LowerHeight - clipY.lowerBottom - 1) - clipLeft;
|
|
|
|
|
const auto lowerMax = LowerHeight - clipY.lowerTop;
|
|
|
|
|
for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch + XStep) {
|
|
|
|
|
const auto width = XStep * i;
|
|
|
|
|
const auto startX = Width - XStep * i;
|
|
|
|
|
const auto skip = startX < clipLeft ? clipLeft - startX : 0;
|
|
|
|
|
if (width > skip)
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst + skip, src + skip, width - skip, tbl);
|
|
|
|
|
src += width;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, bool Transparent>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleLowerClipRightAndVertical(int_fast16_t clipRight, const DiamondClipY &clipY, uint8_t *DVL_RESTRICT &dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom);
|
|
|
|
|
dst += XStep * (LowerHeight - clipY.lowerBottom - 1);
|
|
|
|
|
const auto lowerMax = LowerHeight - clipY.lowerTop;
|
|
|
|
|
for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch + XStep) {
|
|
|
|
|
const auto width = XStep * i;
|
|
|
|
|
if (width > clipRight)
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst, src, width - clipRight, tbl);
|
|
|
|
|
src += width;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, bool Transparent>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleFull(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
RenderLeftTriangleLower<Light, Transparent>(dst, dstPitch, src, tbl);
|
|
|
|
|
dst += 2 * XStep;
|
dun_render: Unroll triangle loops
Rather than relying on the compiler to do it, which doesn't always
happen, we do it by hand.
Previously, very slightly different versions of the code could result in
those loops not being unrolled (such as in the current master).
I've run the benchmark like this:
```bash
BASELINE=dun-benchmark
BENCHMARK=dun_render_benchmark
git checkout "$BASELINE"
tools/build_and_run_benchmark.py -B "build-reld-${BASELINE}" --no-run "$BENCHMARK"
git checkout -
tools/build_and_run_benchmark.py --no-run "$BENCHMARK"
tools/linux_reduced_cpu_variance_run.sh ~/google-benchmark/tools/compare.py -a benchmarks \
"build-reld-${BASELINE}/${BENCHMARK}" "build-reld/${BENCHMARK}" \
--benchmark_repetitions=10
```
Benchmark results are below.
The `FullyLit` calls are ~55% faster.
The `PartiallyLit` calls are ~40% faster.
The `FullyDark` version is twice as slow, which is surprising.
I have a separate idea about eliminating most of the `FullyDark` calls
entirely.
```
Benchmark Time CPU Time Old Time New CPU Old CPU New
-------------------------------------------------------------------------------------------------------------------------------------------------------------------
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_pvalue 0.0004 0.0004 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_mean -0.5634 -0.5634 188036 82095 188014 82080
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_median -0.5601 -0.5601 186905 82218 186884 82202
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_stddev -0.6274 -0.6274 61414 22883 61402 22879
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_cv -0.1465 -0.1465 0 0 0 0
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_pvalue 0.0013 0.0013 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_mean +0.9781 +0.9781 238021 470823 237988 470757
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_median +0.9733 +0.9733 238086 469812 238050 469752
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_stddev +0.9791 +0.9790 74987 148403 74978 148383
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_cv +0.0005 +0.0005 0 0 0 0
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_pvalue 0.0113 0.0113 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_mean -0.3891 -0.3891 984208 601272 984080 601180
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_median -0.3931 -0.3931 980791 595272 980682 595188
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_stddev -0.3950 -0.3950 316631 191562 316575 191533
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_cv -0.0097 -0.0096 0 0 0 0
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_pvalue 0.6776 0.6776 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_mean -0.0802 -0.0802 995379 915591 995236 915450
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_median -0.0816 -0.0817 994452 913256 994299 913112
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_stddev -0.0916 -0.0915 319114 289898 319065 289864
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_cv -0.0124 -0.0123 0 0 0 0
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_mean -0.3875 -0.3875 1001347 613375 1001222 613282
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_median -0.3830 -0.3830 998801 616295 998628 616194
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_stddev -0.3901 -0.3901 320998 195778 320963 195747
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_cv -0.0043 -0.0043 0 0 0 0
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_mean +0.0046 +0.0046 1014824 1019541 1014707 1019386
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_median +0.0032 +0.0032 1015895 1019161 1015801 1019001
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_stddev +0.0037 +0.0036 322403 323590 322370 323538
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_cv -0.0010 -0.0010 0 0 0 0
DunRenderBenchmark/RightTriangle_Solid_FullyLit_pvalue 0.0006 0.0006 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Solid_FullyLit_mean -0.5523 -0.5524 176362 78951 176340 78938
DunRenderBenchmark/RightTriangle_Solid_FullyLit_median -0.5514 -0.5514 175837 78875 175814 78865
DunRenderBenchmark/RightTriangle_Solid_FullyLit_stddev -0.5884 -0.5884 55614 22891 55607 22888
DunRenderBenchmark/RightTriangle_Solid_FullyLit_cv -0.0805 -0.0805 0 0 0 0
DunRenderBenchmark/RightTriangle_Solid_FullyDark_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Solid_FullyDark_mean +1.9896 +1.9896 161841 483845 161819 483767
DunRenderBenchmark/RightTriangle_Solid_FullyDark_median +1.9924 +1.9924 161742 483991 161715 483917
DunRenderBenchmark/RightTriangle_Solid_FullyDark_stddev +2.0012 +2.0010 51356 154129 51351 154107
DunRenderBenchmark/RightTriangle_Solid_FullyDark_cv +0.0039 +0.0038 0 0 0 0
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_pvalue 0.0058 0.0058 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_mean -0.4247 -0.4247 1046216 601892 1046042 601809
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_median -0.4249 -0.4249 1053545 605870 1053349 605785
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_stddev -0.4174 -0.4174 330051 192291 330001 192261
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_cv +0.0127 +0.0127 0 0 0 0
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_pvalue 0.3847 0.3847 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_mean -0.1217 -0.1217 1050082 922274 1049928 922136
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_median -0.1226 -0.1226 1047556 919087 1047400 918953
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_stddev -0.0833 -0.0833 324195 297188 324156 297143
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_cv +0.0437 +0.0437 0 0 0 0
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_mean -0.4253 -0.4253 1047974 602234 1047816 602137
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_median -0.4267 -0.4267 1051985 603146 1051802 603042
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_stddev -0.4276 -0.4276 334355 191387 334304 191363
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_cv -0.0039 -0.0039 0 0 0 0
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_pvalue 0.7913 0.7913 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_mean -0.0137 -0.0138 1059054 1044492 1058910 1044325
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_median -0.0178 -0.0179 1058596 1039738 1058508 1039571
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_stddev +0.0031 +0.0031 337359 338411 337309 338362
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_cv +0.0171 +0.0171 0 0 0 0
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_pvalue 0.9097 0.9097 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_mean +0.0011 +0.0012 1493697 1495394 1493444 1495169
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_median -0.0017 -0.0017 1496838 1494238 1496604 1494029
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_stddev +0.0021 +0.0020 530878 531975 530805 531882
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_cv +0.0009 +0.0009 0 0 0 0
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_mean -0.0204 -0.0204 1433093 1403907 1432881 1403668
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_median -0.0183 -0.0183 1429205 1403083 1428989 1402880
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_stddev -0.0096 -0.0097 505699 500858 505631 500743
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_cv +0.0110 +0.0109 0 0 0 0
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_pvalue 0.0539 0.0539 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_mean -0.3176 -0.3176 2382675 1626018 2382333 1625760
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_median -0.3198 -0.3198 2403842 1635148 2403483 1634913
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_stddev -0.3148 -0.3149 827423 566929 827319 566830
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_cv +0.0040 +0.0040 0 0 0 0
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_pvalue 0.5708 0.5708 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_mean -0.0873 -0.0873 2175867 1985983 2175589 1985685
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_median -0.0631 -0.0631 2157339 2021156 2157036 2020874
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_stddev -0.1511 -0.1511 793554 673670 793451 673555
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_cv -0.0699 -0.0699 0 0 0 0
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_pvalue 0.0757 0.0757 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_mean -0.2913 -0.2913 2168904 1537149 2168665 1536932
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_median -0.2906 -0.2906 2169825 1539371 2169553 1539149
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_stddev -0.2952 -0.2952 773577 545208 773499 545150
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_cv -0.0055 -0.0055 0 0 0 0
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_mean -0.0228 -0.0227 2215243 2164785 2214801 2164528
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_median -0.0105 -0.0105 2206971 2183841 2206650 2183583
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_stddev -0.0493 -0.0492 794968 755773 794792 755680
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_cv -0.0271 -0.0271 0 0 0 0
DunRenderBenchmark/Square_Solid_FullyLit_pvalue 0.4274 0.4274 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Solid_FullyLit_mean -0.1018 -0.1018 106111 95306 106093 95293
DunRenderBenchmark/Square_Solid_FullyLit_median -0.1039 -0.1039 106890 95786 106879 95776
DunRenderBenchmark/Square_Solid_FullyLit_stddev -0.1415 -0.1414 31405 26959 31394 26956
DunRenderBenchmark/Square_Solid_FullyLit_cv -0.0442 -0.0440 0 0 0 0
DunRenderBenchmark/Square_Solid_FullyDark_pvalue 0.6232 0.6232 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Solid_FullyDark_mean -0.0624 -0.0623 76148 71400 76137 71390
DunRenderBenchmark/Square_Solid_FullyDark_median -0.0533 -0.0533 75419 71398 75409 71388
DunRenderBenchmark/Square_Solid_FullyDark_stddev -0.1158 -0.1158 23287 20590 23284 20587
DunRenderBenchmark/Square_Solid_FullyDark_cv -0.0570 -0.0570 0 0 0 0
DunRenderBenchmark/Square_Solid_PartiallyLit_pvalue 0.0140 0.0140 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Solid_PartiallyLit_mean -0.4191 -0.4191 1856509 1078451 1856141 1078317
DunRenderBenchmark/Square_Solid_PartiallyLit_median -0.4220 -0.4220 1865303 1078075 1864830 1077921
DunRenderBenchmark/Square_Solid_PartiallyLit_stddev -0.4194 -0.4193 661581 384134 661390 384090
DunRenderBenchmark/Square_Solid_PartiallyLit_cv -0.0005 -0.0004 0 0 0 0
DunRenderBenchmark/Square_Transparent_FullyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Transparent_FullyLit_mean -0.0599 -0.0599 1777629 1671062 1777373 1670858
DunRenderBenchmark/Square_Transparent_FullyLit_median -0.0599 -0.0599 1777353 1670891 1777080 1670709
DunRenderBenchmark/Square_Transparent_FullyLit_stddev -0.0605 -0.0605 633282 594964 633184 594903
DunRenderBenchmark/Square_Transparent_FullyLit_cv -0.0006 -0.0006 0 0 0 0
DunRenderBenchmark/Square_Transparent_FullyDark_pvalue 0.0211 0.0211 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Transparent_FullyDark_mean -0.3763 -0.3762 1772392 1105501 1772116 1105383
DunRenderBenchmark/Square_Transparent_FullyDark_median -0.3766 -0.3766 1772282 1104753 1772089 1104637
DunRenderBenchmark/Square_Transparent_FullyDark_stddev -0.4402 -0.4402 631014 353260 630951 353231
DunRenderBenchmark/Square_Transparent_FullyDark_cv -0.1025 -0.1025 0 0 0 0
DunRenderBenchmark/Square_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Transparent_PartiallyLit_mean -0.0272 -0.0271 1970857 1917285 1970523 1917053
DunRenderBenchmark/Square_Transparent_PartiallyLit_median -0.0268 -0.0267 1970258 1917452 1969801 1917178
DunRenderBenchmark/Square_Transparent_PartiallyLit_stddev -0.0294 -0.0293 703415 682714 703280 682642
DunRenderBenchmark/Square_Transparent_PartiallyLit_cv -0.0023 -0.0023 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_pvalue 0.0004 0.0004 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_mean -0.5359 -0.5359 59253 27500 59242 27497
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_median -0.5336 -0.5336 58982 27509 58975 27506
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_stddev -0.5374 -0.5373 17150 7933 17144 7932
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_cv -0.0034 -0.0032 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_mean +0.7013 +0.7013 56973 96926 56963 96913
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_median +0.6907 +0.6908 57326 96921 57317 96909
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_stddev +0.8879 +0.8880 16361 30888 16358 30885
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_cv +0.1097 +0.1098 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_mean -0.3988 -0.3988 511666 307603 511589 307567
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_median -0.3993 -0.3993 511913 307523 511851 307491
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_stddev -0.3971 -0.3971 162799 98150 162774 98139
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_cv +0.0028 +0.0029 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_pvalue 0.6232 0.6232 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_mean -0.0820 -0.0820 514358 472170 514290 472112
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_median -0.0821 -0.0821 514335 472090 514264 472018
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_stddev -0.0828 -0.0828 163969 150389 163949 150372
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_cv -0.0009 -0.0009 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_pvalue 0.0058 0.0058 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_mean -0.4180 -0.4180 512755 298434 512692 298398
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_median -0.4187 -0.4187 513116 298290 513049 298258
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_stddev -0.4181 -0.4181 163460 95123 163439 95112
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_cv -0.0001 -0.0001 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_mean +0.0149 +0.0149 544744 552873 544676 552807
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_median +0.0167 +0.0167 544823 553899 544761 553834
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_stddev +0.0110 +0.0110 173683 175592 173672 175580
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_cv -0.0039 -0.0039 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_pvalue 0.0010 0.0010 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_mean -0.4654 -0.4654 51804 27693 51798 27690
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_median -0.4648 -0.4648 51787 27718 51779 27713
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_stddev -0.4702 -0.4702 14885 7886 14884 7885
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_cv -0.0090 -0.0090 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_mean +1.3529 +1.3529 43904 103301 43898 103287
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_median +1.3371 +1.3372 44199 103297 44192 103285
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_stddev +1.4168 +1.4167 12350 29848 12349 29843
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_cv +0.0272 +0.0271 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_mean -0.3977 -0.3977 493458 297225 493385 297188
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_median -0.3973 -0.3973 493440 297377 493364 297342
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_stddev -0.3965 -0.3965 157323 94947 157300 94934
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_cv +0.0020 +0.0020 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_mean -0.0668 -0.0668 501015 467552 500947 467496
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_median -0.0661 -0.0661 500672 467557 500607 467504
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_stddev -0.0641 -0.0642 159233 149019 159215 148998
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_cv +0.0028 +0.0028 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_mean -0.4295 -0.4295 494445 282076 494382 282043
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_median -0.4300 -0.4300 494497 281875 494434 281841
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_stddev -0.4287 -0.4287 157383 89911 157370 89902
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_cv +0.0014 +0.0014 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_mean -0.0574 -0.0573 556031 524113 555924 524044
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_median -0.0758 -0.0757 565940 523059 565802 522984
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_stddev -0.1094 -0.1093 186798 166360 186758 166346
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_cv -0.0552 -0.0551 0 0 0 0
OVERALL_GEOMEAN -0.1671 -0.1671 0 0 0 0
```
2 years ago
|
|
|
RenderTriangleUpper<Light, Transparent>(dst, dstPitch - XStep, src, tbl);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, bool Transparent>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleClipVertical(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
|
|
|
|
|
{
|
|
|
|
|
const DiamondClipY clipY = CalculateDiamondClipY(clip);
|
|
|
|
|
RenderLeftTriangleLowerClipVertical<Light, Transparent>(clipY, dst, dstPitch, src, tbl);
|
|
|
|
|
src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom);
|
|
|
|
|
dst += 2 * XStep + XStep * clipY.upperBottom;
|
|
|
|
|
const auto upperMax = TriangleUpperHeight - clipY.upperTop;
|
|
|
|
|
for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch - XStep) {
|
|
|
|
|
const auto width = Width - XStep * i;
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst, src, width, tbl);
|
|
|
|
|
src += width;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, bool Transparent>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleClipLeftAndVertical(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
|
|
|
|
|
{
|
|
|
|
|
const DiamondClipY clipY = CalculateDiamondClipY(clip);
|
|
|
|
|
const int_fast16_t clipLeft = clip.left;
|
|
|
|
|
RenderLeftTriangleLowerClipLeftAndVertical<Light, Transparent>(clipLeft, clipY, dst, dstPitch, src, tbl);
|
|
|
|
|
src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom);
|
|
|
|
|
dst += 2 * XStep + XStep * clipY.upperBottom;
|
|
|
|
|
const auto upperMax = TriangleUpperHeight - clipY.upperTop;
|
|
|
|
|
for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch - XStep) {
|
|
|
|
|
const auto width = Width - XStep * i;
|
|
|
|
|
const auto startX = XStep * i;
|
|
|
|
|
const auto skip = startX < clipLeft ? clipLeft - startX : 0;
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst + skip, src + skip, width > skip ? width - skip : 0, tbl);
|
|
|
|
|
src += width;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, bool Transparent>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangleClipRightAndVertical(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
|
|
|
|
|
{
|
|
|
|
|
const DiamondClipY clipY = CalculateDiamondClipY(clip);
|
|
|
|
|
const int_fast16_t clipRight = clip.right;
|
|
|
|
|
RenderLeftTriangleLowerClipRightAndVertical<Light, Transparent>(clipRight, clipY, dst, dstPitch, src, tbl);
|
|
|
|
|
src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom);
|
|
|
|
|
dst += 2 * XStep + XStep * clipY.upperBottom;
|
|
|
|
|
const auto upperMax = TriangleUpperHeight - clipY.upperTop;
|
|
|
|
|
for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch - XStep) {
|
|
|
|
|
const auto width = Width - XStep * i;
|
|
|
|
|
if (width <= clipRight)
|
|
|
|
|
break;
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst, src, width - clipRight, tbl);
|
|
|
|
|
src += width;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, bool Transparent>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTriangle(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
|
|
|
|
|
{
|
|
|
|
|
if (clip.width == Width) {
|
|
|
|
|
if (clip.height == TriangleHeight) {
|
|
|
|
|
RenderLeftTriangleFull<Light, Transparent>(dst, dstPitch, src, tbl);
|
|
|
|
|
} else {
|
|
|
|
|
RenderLeftTriangleClipVertical<Light, Transparent>(dst, dstPitch, src, tbl, clip);
|
|
|
|
|
}
|
|
|
|
|
} else if (clip.right == 0) {
|
|
|
|
|
RenderLeftTriangleClipLeftAndVertical<Light, Transparent>(dst, dstPitch, src, tbl, clip);
|
|
|
|
|
} else {
|
|
|
|
|
RenderLeftTriangleClipRightAndVertical<Light, Transparent>(dst, dstPitch, src, tbl, clip);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, bool Transparent>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleLower(uint8_t *DVL_RESTRICT &dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
dun_render: Unroll triangle loops
Rather than relying on the compiler to do it, which doesn't always
happen, we do it by hand.
Previously, very slightly different versions of the code could result in
those loops not being unrolled (such as in the current master).
I've run the benchmark like this:
```bash
BASELINE=dun-benchmark
BENCHMARK=dun_render_benchmark
git checkout "$BASELINE"
tools/build_and_run_benchmark.py -B "build-reld-${BASELINE}" --no-run "$BENCHMARK"
git checkout -
tools/build_and_run_benchmark.py --no-run "$BENCHMARK"
tools/linux_reduced_cpu_variance_run.sh ~/google-benchmark/tools/compare.py -a benchmarks \
"build-reld-${BASELINE}/${BENCHMARK}" "build-reld/${BENCHMARK}" \
--benchmark_repetitions=10
```
Benchmark results are below.
The `FullyLit` calls are ~55% faster.
The `PartiallyLit` calls are ~40% faster.
The `FullyDark` version is twice as slow, which is surprising.
I have a separate idea about eliminating most of the `FullyDark` calls
entirely.
```
Benchmark Time CPU Time Old Time New CPU Old CPU New
-------------------------------------------------------------------------------------------------------------------------------------------------------------------
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_pvalue 0.0004 0.0004 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_mean -0.5634 -0.5634 188036 82095 188014 82080
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_median -0.5601 -0.5601 186905 82218 186884 82202
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_stddev -0.6274 -0.6274 61414 22883 61402 22879
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_cv -0.1465 -0.1465 0 0 0 0
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_pvalue 0.0013 0.0013 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_mean +0.9781 +0.9781 238021 470823 237988 470757
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_median +0.9733 +0.9733 238086 469812 238050 469752
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_stddev +0.9791 +0.9790 74987 148403 74978 148383
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_cv +0.0005 +0.0005 0 0 0 0
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_pvalue 0.0113 0.0113 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_mean -0.3891 -0.3891 984208 601272 984080 601180
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_median -0.3931 -0.3931 980791 595272 980682 595188
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_stddev -0.3950 -0.3950 316631 191562 316575 191533
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_cv -0.0097 -0.0096 0 0 0 0
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_pvalue 0.6776 0.6776 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_mean -0.0802 -0.0802 995379 915591 995236 915450
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_median -0.0816 -0.0817 994452 913256 994299 913112
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_stddev -0.0916 -0.0915 319114 289898 319065 289864
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_cv -0.0124 -0.0123 0 0 0 0
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_mean -0.3875 -0.3875 1001347 613375 1001222 613282
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_median -0.3830 -0.3830 998801 616295 998628 616194
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_stddev -0.3901 -0.3901 320998 195778 320963 195747
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_cv -0.0043 -0.0043 0 0 0 0
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_mean +0.0046 +0.0046 1014824 1019541 1014707 1019386
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_median +0.0032 +0.0032 1015895 1019161 1015801 1019001
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_stddev +0.0037 +0.0036 322403 323590 322370 323538
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_cv -0.0010 -0.0010 0 0 0 0
DunRenderBenchmark/RightTriangle_Solid_FullyLit_pvalue 0.0006 0.0006 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Solid_FullyLit_mean -0.5523 -0.5524 176362 78951 176340 78938
DunRenderBenchmark/RightTriangle_Solid_FullyLit_median -0.5514 -0.5514 175837 78875 175814 78865
DunRenderBenchmark/RightTriangle_Solid_FullyLit_stddev -0.5884 -0.5884 55614 22891 55607 22888
DunRenderBenchmark/RightTriangle_Solid_FullyLit_cv -0.0805 -0.0805 0 0 0 0
DunRenderBenchmark/RightTriangle_Solid_FullyDark_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Solid_FullyDark_mean +1.9896 +1.9896 161841 483845 161819 483767
DunRenderBenchmark/RightTriangle_Solid_FullyDark_median +1.9924 +1.9924 161742 483991 161715 483917
DunRenderBenchmark/RightTriangle_Solid_FullyDark_stddev +2.0012 +2.0010 51356 154129 51351 154107
DunRenderBenchmark/RightTriangle_Solid_FullyDark_cv +0.0039 +0.0038 0 0 0 0
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_pvalue 0.0058 0.0058 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_mean -0.4247 -0.4247 1046216 601892 1046042 601809
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_median -0.4249 -0.4249 1053545 605870 1053349 605785
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_stddev -0.4174 -0.4174 330051 192291 330001 192261
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_cv +0.0127 +0.0127 0 0 0 0
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_pvalue 0.3847 0.3847 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_mean -0.1217 -0.1217 1050082 922274 1049928 922136
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_median -0.1226 -0.1226 1047556 919087 1047400 918953
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_stddev -0.0833 -0.0833 324195 297188 324156 297143
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_cv +0.0437 +0.0437 0 0 0 0
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_mean -0.4253 -0.4253 1047974 602234 1047816 602137
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_median -0.4267 -0.4267 1051985 603146 1051802 603042
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_stddev -0.4276 -0.4276 334355 191387 334304 191363
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_cv -0.0039 -0.0039 0 0 0 0
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_pvalue 0.7913 0.7913 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_mean -0.0137 -0.0138 1059054 1044492 1058910 1044325
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_median -0.0178 -0.0179 1058596 1039738 1058508 1039571
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_stddev +0.0031 +0.0031 337359 338411 337309 338362
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_cv +0.0171 +0.0171 0 0 0 0
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_pvalue 0.9097 0.9097 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_mean +0.0011 +0.0012 1493697 1495394 1493444 1495169
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_median -0.0017 -0.0017 1496838 1494238 1496604 1494029
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_stddev +0.0021 +0.0020 530878 531975 530805 531882
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_cv +0.0009 +0.0009 0 0 0 0
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_mean -0.0204 -0.0204 1433093 1403907 1432881 1403668
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_median -0.0183 -0.0183 1429205 1403083 1428989 1402880
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_stddev -0.0096 -0.0097 505699 500858 505631 500743
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_cv +0.0110 +0.0109 0 0 0 0
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_pvalue 0.0539 0.0539 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_mean -0.3176 -0.3176 2382675 1626018 2382333 1625760
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_median -0.3198 -0.3198 2403842 1635148 2403483 1634913
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_stddev -0.3148 -0.3149 827423 566929 827319 566830
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_cv +0.0040 +0.0040 0 0 0 0
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_pvalue 0.5708 0.5708 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_mean -0.0873 -0.0873 2175867 1985983 2175589 1985685
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_median -0.0631 -0.0631 2157339 2021156 2157036 2020874
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_stddev -0.1511 -0.1511 793554 673670 793451 673555
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_cv -0.0699 -0.0699 0 0 0 0
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_pvalue 0.0757 0.0757 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_mean -0.2913 -0.2913 2168904 1537149 2168665 1536932
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_median -0.2906 -0.2906 2169825 1539371 2169553 1539149
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_stddev -0.2952 -0.2952 773577 545208 773499 545150
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_cv -0.0055 -0.0055 0 0 0 0
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_mean -0.0228 -0.0227 2215243 2164785 2214801 2164528
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_median -0.0105 -0.0105 2206971 2183841 2206650 2183583
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_stddev -0.0493 -0.0492 794968 755773 794792 755680
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_cv -0.0271 -0.0271 0 0 0 0
DunRenderBenchmark/Square_Solid_FullyLit_pvalue 0.4274 0.4274 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Solid_FullyLit_mean -0.1018 -0.1018 106111 95306 106093 95293
DunRenderBenchmark/Square_Solid_FullyLit_median -0.1039 -0.1039 106890 95786 106879 95776
DunRenderBenchmark/Square_Solid_FullyLit_stddev -0.1415 -0.1414 31405 26959 31394 26956
DunRenderBenchmark/Square_Solid_FullyLit_cv -0.0442 -0.0440 0 0 0 0
DunRenderBenchmark/Square_Solid_FullyDark_pvalue 0.6232 0.6232 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Solid_FullyDark_mean -0.0624 -0.0623 76148 71400 76137 71390
DunRenderBenchmark/Square_Solid_FullyDark_median -0.0533 -0.0533 75419 71398 75409 71388
DunRenderBenchmark/Square_Solid_FullyDark_stddev -0.1158 -0.1158 23287 20590 23284 20587
DunRenderBenchmark/Square_Solid_FullyDark_cv -0.0570 -0.0570 0 0 0 0
DunRenderBenchmark/Square_Solid_PartiallyLit_pvalue 0.0140 0.0140 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Solid_PartiallyLit_mean -0.4191 -0.4191 1856509 1078451 1856141 1078317
DunRenderBenchmark/Square_Solid_PartiallyLit_median -0.4220 -0.4220 1865303 1078075 1864830 1077921
DunRenderBenchmark/Square_Solid_PartiallyLit_stddev -0.4194 -0.4193 661581 384134 661390 384090
DunRenderBenchmark/Square_Solid_PartiallyLit_cv -0.0005 -0.0004 0 0 0 0
DunRenderBenchmark/Square_Transparent_FullyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Transparent_FullyLit_mean -0.0599 -0.0599 1777629 1671062 1777373 1670858
DunRenderBenchmark/Square_Transparent_FullyLit_median -0.0599 -0.0599 1777353 1670891 1777080 1670709
DunRenderBenchmark/Square_Transparent_FullyLit_stddev -0.0605 -0.0605 633282 594964 633184 594903
DunRenderBenchmark/Square_Transparent_FullyLit_cv -0.0006 -0.0006 0 0 0 0
DunRenderBenchmark/Square_Transparent_FullyDark_pvalue 0.0211 0.0211 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Transparent_FullyDark_mean -0.3763 -0.3762 1772392 1105501 1772116 1105383
DunRenderBenchmark/Square_Transparent_FullyDark_median -0.3766 -0.3766 1772282 1104753 1772089 1104637
DunRenderBenchmark/Square_Transparent_FullyDark_stddev -0.4402 -0.4402 631014 353260 630951 353231
DunRenderBenchmark/Square_Transparent_FullyDark_cv -0.1025 -0.1025 0 0 0 0
DunRenderBenchmark/Square_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Transparent_PartiallyLit_mean -0.0272 -0.0271 1970857 1917285 1970523 1917053
DunRenderBenchmark/Square_Transparent_PartiallyLit_median -0.0268 -0.0267 1970258 1917452 1969801 1917178
DunRenderBenchmark/Square_Transparent_PartiallyLit_stddev -0.0294 -0.0293 703415 682714 703280 682642
DunRenderBenchmark/Square_Transparent_PartiallyLit_cv -0.0023 -0.0023 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_pvalue 0.0004 0.0004 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_mean -0.5359 -0.5359 59253 27500 59242 27497
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_median -0.5336 -0.5336 58982 27509 58975 27506
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_stddev -0.5374 -0.5373 17150 7933 17144 7932
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_cv -0.0034 -0.0032 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_mean +0.7013 +0.7013 56973 96926 56963 96913
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_median +0.6907 +0.6908 57326 96921 57317 96909
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_stddev +0.8879 +0.8880 16361 30888 16358 30885
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_cv +0.1097 +0.1098 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_mean -0.3988 -0.3988 511666 307603 511589 307567
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_median -0.3993 -0.3993 511913 307523 511851 307491
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_stddev -0.3971 -0.3971 162799 98150 162774 98139
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_cv +0.0028 +0.0029 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_pvalue 0.6232 0.6232 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_mean -0.0820 -0.0820 514358 472170 514290 472112
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_median -0.0821 -0.0821 514335 472090 514264 472018
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_stddev -0.0828 -0.0828 163969 150389 163949 150372
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_cv -0.0009 -0.0009 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_pvalue 0.0058 0.0058 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_mean -0.4180 -0.4180 512755 298434 512692 298398
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_median -0.4187 -0.4187 513116 298290 513049 298258
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_stddev -0.4181 -0.4181 163460 95123 163439 95112
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_cv -0.0001 -0.0001 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_mean +0.0149 +0.0149 544744 552873 544676 552807
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_median +0.0167 +0.0167 544823 553899 544761 553834
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_stddev +0.0110 +0.0110 173683 175592 173672 175580
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_cv -0.0039 -0.0039 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_pvalue 0.0010 0.0010 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_mean -0.4654 -0.4654 51804 27693 51798 27690
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_median -0.4648 -0.4648 51787 27718 51779 27713
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_stddev -0.4702 -0.4702 14885 7886 14884 7885
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_cv -0.0090 -0.0090 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_mean +1.3529 +1.3529 43904 103301 43898 103287
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_median +1.3371 +1.3372 44199 103297 44192 103285
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_stddev +1.4168 +1.4167 12350 29848 12349 29843
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_cv +0.0272 +0.0271 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_mean -0.3977 -0.3977 493458 297225 493385 297188
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_median -0.3973 -0.3973 493440 297377 493364 297342
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_stddev -0.3965 -0.3965 157323 94947 157300 94934
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_cv +0.0020 +0.0020 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_mean -0.0668 -0.0668 501015 467552 500947 467496
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_median -0.0661 -0.0661 500672 467557 500607 467504
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_stddev -0.0641 -0.0642 159233 149019 159215 148998
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_cv +0.0028 +0.0028 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_mean -0.4295 -0.4295 494445 282076 494382 282043
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_median -0.4300 -0.4300 494497 281875 494434 281841
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_stddev -0.4287 -0.4287 157383 89911 157370 89902
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_cv +0.0014 +0.0014 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_mean -0.0574 -0.0573 556031 524113 555924 524044
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_median -0.0758 -0.0757 565940 523059 565802 522984
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_stddev -0.1094 -0.1093 186798 166360 186758 166346
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_cv -0.0552 -0.0551 0 0 0 0
OVERALL_GEOMEAN -0.1671 -0.1671 0 0 0 0
```
2 years ago
|
|
|
RenderTriangleLower<Light, Transparent>(dst, dstPitch, src, tbl);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, bool Transparent>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleLowerClipVertical(const DiamondClipY &clipY, uint8_t *DVL_RESTRICT &dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom);
|
|
|
|
|
const auto lowerMax = LowerHeight - clipY.lowerTop;
|
|
|
|
|
for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch) {
|
|
|
|
|
const auto width = XStep * i;
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst, src, width, tbl);
|
|
|
|
|
src += width;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, bool Transparent>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleLowerClipLeftAndVertical(int_fast16_t clipLeft, const DiamondClipY &clipY, uint8_t *DVL_RESTRICT &dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom);
|
|
|
|
|
const auto lowerMax = LowerHeight - clipY.lowerTop;
|
|
|
|
|
for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch) {
|
|
|
|
|
const auto width = XStep * i;
|
|
|
|
|
if (width > clipLeft)
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst, src + clipLeft, width - clipLeft, tbl);
|
|
|
|
|
src += width;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, bool Transparent>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleLowerClipRightAndVertical(int_fast16_t clipRight, const DiamondClipY &clipY, uint8_t *DVL_RESTRICT &dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT &src, const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
src += CalculateTriangleSourceSkipLowerBottom(clipY.lowerBottom);
|
|
|
|
|
const auto lowerMax = LowerHeight - clipY.lowerTop;
|
|
|
|
|
for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch) {
|
|
|
|
|
const auto width = XStep * i;
|
|
|
|
|
const auto skip = Width - width < clipRight ? clipRight - (Width - width) : 0;
|
|
|
|
|
if (width > skip)
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst, src, width - skip, tbl);
|
|
|
|
|
src += width;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, bool Transparent>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleFull(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
RenderRightTriangleLower<Light, Transparent>(dst, dstPitch, src, tbl);
|
dun_render: Unroll triangle loops
Rather than relying on the compiler to do it, which doesn't always
happen, we do it by hand.
Previously, very slightly different versions of the code could result in
those loops not being unrolled (such as in the current master).
I've run the benchmark like this:
```bash
BASELINE=dun-benchmark
BENCHMARK=dun_render_benchmark
git checkout "$BASELINE"
tools/build_and_run_benchmark.py -B "build-reld-${BASELINE}" --no-run "$BENCHMARK"
git checkout -
tools/build_and_run_benchmark.py --no-run "$BENCHMARK"
tools/linux_reduced_cpu_variance_run.sh ~/google-benchmark/tools/compare.py -a benchmarks \
"build-reld-${BASELINE}/${BENCHMARK}" "build-reld/${BENCHMARK}" \
--benchmark_repetitions=10
```
Benchmark results are below.
The `FullyLit` calls are ~55% faster.
The `PartiallyLit` calls are ~40% faster.
The `FullyDark` version is twice as slow, which is surprising.
I have a separate idea about eliminating most of the `FullyDark` calls
entirely.
```
Benchmark Time CPU Time Old Time New CPU Old CPU New
-------------------------------------------------------------------------------------------------------------------------------------------------------------------
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_pvalue 0.0004 0.0004 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_mean -0.5634 -0.5634 188036 82095 188014 82080
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_median -0.5601 -0.5601 186905 82218 186884 82202
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_stddev -0.6274 -0.6274 61414 22883 61402 22879
DunRenderBenchmark/LeftTriangle_Solid_FullyLit_cv -0.1465 -0.1465 0 0 0 0
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_pvalue 0.0013 0.0013 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_mean +0.9781 +0.9781 238021 470823 237988 470757
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_median +0.9733 +0.9733 238086 469812 238050 469752
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_stddev +0.9791 +0.9790 74987 148403 74978 148383
DunRenderBenchmark/LeftTriangle_Solid_FullyDark_cv +0.0005 +0.0005 0 0 0 0
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_pvalue 0.0113 0.0113 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_mean -0.3891 -0.3891 984208 601272 984080 601180
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_median -0.3931 -0.3931 980791 595272 980682 595188
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_stddev -0.3950 -0.3950 316631 191562 316575 191533
DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit_cv -0.0097 -0.0096 0 0 0 0
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_pvalue 0.6776 0.6776 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_mean -0.0802 -0.0802 995379 915591 995236 915450
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_median -0.0816 -0.0817 994452 913256 994299 913112
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_stddev -0.0916 -0.0915 319114 289898 319065 289864
DunRenderBenchmark/LeftTriangle_Transparent_FullyLit_cv -0.0124 -0.0123 0 0 0 0
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_mean -0.3875 -0.3875 1001347 613375 1001222 613282
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_median -0.3830 -0.3830 998801 616295 998628 616194
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_stddev -0.3901 -0.3901 320998 195778 320963 195747
DunRenderBenchmark/LeftTriangle_Transparent_FullyDark_cv -0.0043 -0.0043 0 0 0 0
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_mean +0.0046 +0.0046 1014824 1019541 1014707 1019386
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_median +0.0032 +0.0032 1015895 1019161 1015801 1019001
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_stddev +0.0037 +0.0036 322403 323590 322370 323538
DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit_cv -0.0010 -0.0010 0 0 0 0
DunRenderBenchmark/RightTriangle_Solid_FullyLit_pvalue 0.0006 0.0006 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Solid_FullyLit_mean -0.5523 -0.5524 176362 78951 176340 78938
DunRenderBenchmark/RightTriangle_Solid_FullyLit_median -0.5514 -0.5514 175837 78875 175814 78865
DunRenderBenchmark/RightTriangle_Solid_FullyLit_stddev -0.5884 -0.5884 55614 22891 55607 22888
DunRenderBenchmark/RightTriangle_Solid_FullyLit_cv -0.0805 -0.0805 0 0 0 0
DunRenderBenchmark/RightTriangle_Solid_FullyDark_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Solid_FullyDark_mean +1.9896 +1.9896 161841 483845 161819 483767
DunRenderBenchmark/RightTriangle_Solid_FullyDark_median +1.9924 +1.9924 161742 483991 161715 483917
DunRenderBenchmark/RightTriangle_Solid_FullyDark_stddev +2.0012 +2.0010 51356 154129 51351 154107
DunRenderBenchmark/RightTriangle_Solid_FullyDark_cv +0.0039 +0.0038 0 0 0 0
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_pvalue 0.0058 0.0058 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_mean -0.4247 -0.4247 1046216 601892 1046042 601809
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_median -0.4249 -0.4249 1053545 605870 1053349 605785
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_stddev -0.4174 -0.4174 330051 192291 330001 192261
DunRenderBenchmark/RightTriangle_Solid_PartiallyLit_cv +0.0127 +0.0127 0 0 0 0
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_pvalue 0.3847 0.3847 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_mean -0.1217 -0.1217 1050082 922274 1049928 922136
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_median -0.1226 -0.1226 1047556 919087 1047400 918953
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_stddev -0.0833 -0.0833 324195 297188 324156 297143
DunRenderBenchmark/RightTriangle_Transparent_FullyLit_cv +0.0437 +0.0437 0 0 0 0
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_mean -0.4253 -0.4253 1047974 602234 1047816 602137
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_median -0.4267 -0.4267 1051985 603146 1051802 603042
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_stddev -0.4276 -0.4276 334355 191387 334304 191363
DunRenderBenchmark/RightTriangle_Transparent_FullyDark_cv -0.0039 -0.0039 0 0 0 0
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_pvalue 0.7913 0.7913 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_mean -0.0137 -0.0138 1059054 1044492 1058910 1044325
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_median -0.0178 -0.0179 1058596 1039738 1058508 1039571
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_stddev +0.0031 +0.0031 337359 338411 337309 338362
DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit_cv +0.0171 +0.0171 0 0 0 0
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_pvalue 0.9097 0.9097 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_mean +0.0011 +0.0012 1493697 1495394 1493444 1495169
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_median -0.0017 -0.0017 1496838 1494238 1496604 1494029
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_stddev +0.0021 +0.0020 530878 531975 530805 531882
DunRenderBenchmark/TransparentSquare_Solid_FullyLit_cv +0.0009 +0.0009 0 0 0 0
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_mean -0.0204 -0.0204 1433093 1403907 1432881 1403668
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_median -0.0183 -0.0183 1429205 1403083 1428989 1402880
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_stddev -0.0096 -0.0097 505699 500858 505631 500743
DunRenderBenchmark/TransparentSquare_Solid_FullyDark_cv +0.0110 +0.0109 0 0 0 0
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_pvalue 0.0539 0.0539 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_mean -0.3176 -0.3176 2382675 1626018 2382333 1625760
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_median -0.3198 -0.3198 2403842 1635148 2403483 1634913
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_stddev -0.3148 -0.3149 827423 566929 827319 566830
DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit_cv +0.0040 +0.0040 0 0 0 0
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_pvalue 0.5708 0.5708 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_mean -0.0873 -0.0873 2175867 1985983 2175589 1985685
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_median -0.0631 -0.0631 2157339 2021156 2157036 2020874
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_stddev -0.1511 -0.1511 793554 673670 793451 673555
DunRenderBenchmark/TransparentSquare_Transparent_FullyLit_cv -0.0699 -0.0699 0 0 0 0
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_pvalue 0.0757 0.0757 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_mean -0.2913 -0.2913 2168904 1537149 2168665 1536932
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_median -0.2906 -0.2906 2169825 1539371 2169553 1539149
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_stddev -0.2952 -0.2952 773577 545208 773499 545150
DunRenderBenchmark/TransparentSquare_Transparent_FullyDark_cv -0.0055 -0.0055 0 0 0 0
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_mean -0.0228 -0.0227 2215243 2164785 2214801 2164528
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_median -0.0105 -0.0105 2206971 2183841 2206650 2183583
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_stddev -0.0493 -0.0492 794968 755773 794792 755680
DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit_cv -0.0271 -0.0271 0 0 0 0
DunRenderBenchmark/Square_Solid_FullyLit_pvalue 0.4274 0.4274 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Solid_FullyLit_mean -0.1018 -0.1018 106111 95306 106093 95293
DunRenderBenchmark/Square_Solid_FullyLit_median -0.1039 -0.1039 106890 95786 106879 95776
DunRenderBenchmark/Square_Solid_FullyLit_stddev -0.1415 -0.1414 31405 26959 31394 26956
DunRenderBenchmark/Square_Solid_FullyLit_cv -0.0442 -0.0440 0 0 0 0
DunRenderBenchmark/Square_Solid_FullyDark_pvalue 0.6232 0.6232 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Solid_FullyDark_mean -0.0624 -0.0623 76148 71400 76137 71390
DunRenderBenchmark/Square_Solid_FullyDark_median -0.0533 -0.0533 75419 71398 75409 71388
DunRenderBenchmark/Square_Solid_FullyDark_stddev -0.1158 -0.1158 23287 20590 23284 20587
DunRenderBenchmark/Square_Solid_FullyDark_cv -0.0570 -0.0570 0 0 0 0
DunRenderBenchmark/Square_Solid_PartiallyLit_pvalue 0.0140 0.0140 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Solid_PartiallyLit_mean -0.4191 -0.4191 1856509 1078451 1856141 1078317
DunRenderBenchmark/Square_Solid_PartiallyLit_median -0.4220 -0.4220 1865303 1078075 1864830 1077921
DunRenderBenchmark/Square_Solid_PartiallyLit_stddev -0.4194 -0.4193 661581 384134 661390 384090
DunRenderBenchmark/Square_Solid_PartiallyLit_cv -0.0005 -0.0004 0 0 0 0
DunRenderBenchmark/Square_Transparent_FullyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Transparent_FullyLit_mean -0.0599 -0.0599 1777629 1671062 1777373 1670858
DunRenderBenchmark/Square_Transparent_FullyLit_median -0.0599 -0.0599 1777353 1670891 1777080 1670709
DunRenderBenchmark/Square_Transparent_FullyLit_stddev -0.0605 -0.0605 633282 594964 633184 594903
DunRenderBenchmark/Square_Transparent_FullyLit_cv -0.0006 -0.0006 0 0 0 0
DunRenderBenchmark/Square_Transparent_FullyDark_pvalue 0.0211 0.0211 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Transparent_FullyDark_mean -0.3763 -0.3762 1772392 1105501 1772116 1105383
DunRenderBenchmark/Square_Transparent_FullyDark_median -0.3766 -0.3766 1772282 1104753 1772089 1104637
DunRenderBenchmark/Square_Transparent_FullyDark_stddev -0.4402 -0.4402 631014 353260 630951 353231
DunRenderBenchmark/Square_Transparent_FullyDark_cv -0.1025 -0.1025 0 0 0 0
DunRenderBenchmark/Square_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/Square_Transparent_PartiallyLit_mean -0.0272 -0.0271 1970857 1917285 1970523 1917053
DunRenderBenchmark/Square_Transparent_PartiallyLit_median -0.0268 -0.0267 1970258 1917452 1969801 1917178
DunRenderBenchmark/Square_Transparent_PartiallyLit_stddev -0.0294 -0.0293 703415 682714 703280 682642
DunRenderBenchmark/Square_Transparent_PartiallyLit_cv -0.0023 -0.0023 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_pvalue 0.0004 0.0004 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_mean -0.5359 -0.5359 59253 27500 59242 27497
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_median -0.5336 -0.5336 58982 27509 58975 27506
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_stddev -0.5374 -0.5373 17150 7933 17144 7932
DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit_cv -0.0034 -0.0032 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_mean +0.7013 +0.7013 56973 96926 56963 96913
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_median +0.6907 +0.6908 57326 96921 57317 96909
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_stddev +0.8879 +0.8880 16361 30888 16358 30885
DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark_cv +0.1097 +0.1098 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_mean -0.3988 -0.3988 511666 307603 511589 307567
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_median -0.3993 -0.3993 511913 307523 511851 307491
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_stddev -0.3971 -0.3971 162799 98150 162774 98139
DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit_cv +0.0028 +0.0029 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_pvalue 0.6232 0.6232 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_mean -0.0820 -0.0820 514358 472170 514290 472112
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_median -0.0821 -0.0821 514335 472090 514264 472018
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_stddev -0.0828 -0.0828 163969 150389 163949 150372
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit_cv -0.0009 -0.0009 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_pvalue 0.0058 0.0058 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_mean -0.4180 -0.4180 512755 298434 512692 298398
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_median -0.4187 -0.4187 513116 298290 513049 298258
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_stddev -0.4181 -0.4181 163460 95123 163439 95112
DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark_cv -0.0001 -0.0001 0 0 0 0
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_mean +0.0149 +0.0149 544744 552873 544676 552807
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_median +0.0167 +0.0167 544823 553899 544761 553834
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_stddev +0.0110 +0.0110 173683 175592 173672 175580
DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit_cv -0.0039 -0.0039 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_pvalue 0.0010 0.0010 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_mean -0.4654 -0.4654 51804 27693 51798 27690
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_median -0.4648 -0.4648 51787 27718 51779 27713
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_stddev -0.4702 -0.4702 14885 7886 14884 7885
DunRenderBenchmark/RightTrapezoid_Solid_FullyLit_cv -0.0090 -0.0090 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_mean +1.3529 +1.3529 43904 103301 43898 103287
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_median +1.3371 +1.3372 44199 103297 44192 103285
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_stddev +1.4168 +1.4167 12350 29848 12349 29843
DunRenderBenchmark/RightTrapezoid_Solid_FullyDark_cv +0.0272 +0.0271 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_pvalue 0.0091 0.0091 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_mean -0.3977 -0.3977 493458 297225 493385 297188
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_median -0.3973 -0.3973 493440 297377 493364 297342
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_stddev -0.3965 -0.3965 157323 94947 157300 94934
DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit_cv +0.0020 +0.0020 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_mean -0.0668 -0.0668 501015 467552 500947 467496
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_median -0.0661 -0.0661 500672 467557 500607 467504
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_stddev -0.0641 -0.0642 159233 149019 159215 148998
DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit_cv +0.0028 +0.0028 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_pvalue 0.0046 0.0046 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_mean -0.4295 -0.4295 494445 282076 494382 282043
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_median -0.4300 -0.4300 494497 281875 494434 281841
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_stddev -0.4287 -0.4287 157383 89911 157370 89902
DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark_cv +0.0014 +0.0014 0 0 0 0
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_pvalue 0.7337 0.7337 U Test, Repetitions: 10 vs 10
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_mean -0.0574 -0.0573 556031 524113 555924 524044
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_median -0.0758 -0.0757 565940 523059 565802 522984
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_stddev -0.1094 -0.1093 186798 166360 186758 166346
DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit_cv -0.0552 -0.0551 0 0 0 0
OVERALL_GEOMEAN -0.1671 -0.1671 0 0 0 0
```
2 years ago
|
|
|
RenderTriangleUpper<Light, Transparent>(dst, dstPitch, src, tbl);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, bool Transparent>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleClipVertical(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
|
|
|
|
|
{
|
|
|
|
|
const DiamondClipY clipY = CalculateDiamondClipY(clip);
|
|
|
|
|
RenderRightTriangleLowerClipVertical<Light, Transparent>(clipY, dst, dstPitch, src, tbl);
|
|
|
|
|
src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom);
|
|
|
|
|
const auto upperMax = TriangleUpperHeight - clipY.upperTop;
|
|
|
|
|
for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch) {
|
|
|
|
|
const auto width = Width - XStep * i;
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst, src, width, tbl);
|
|
|
|
|
src += width;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, bool Transparent>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleClipLeftAndVertical(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
|
|
|
|
|
{
|
|
|
|
|
const DiamondClipY clipY = CalculateDiamondClipY(clip);
|
|
|
|
|
const int_fast16_t clipLeft = clip.left;
|
|
|
|
|
RenderRightTriangleLowerClipLeftAndVertical<Light, Transparent>(clipLeft, clipY, dst, dstPitch, src, tbl);
|
|
|
|
|
src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom);
|
|
|
|
|
const auto upperMax = TriangleUpperHeight - clipY.upperTop;
|
|
|
|
|
for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch) {
|
|
|
|
|
const auto width = Width - XStep * i;
|
|
|
|
|
if (width <= clipLeft)
|
|
|
|
|
break;
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst, src + clipLeft, width - clipLeft, tbl);
|
|
|
|
|
src += width;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, bool Transparent>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangleClipRightAndVertical(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
|
|
|
|
|
{
|
|
|
|
|
const DiamondClipY clipY = CalculateDiamondClipY(clip);
|
|
|
|
|
const int_fast16_t clipRight = clip.right;
|
|
|
|
|
RenderRightTriangleLowerClipRightAndVertical<Light, Transparent>(clipRight, clipY, dst, dstPitch, src, tbl);
|
|
|
|
|
src += CalculateTriangleSourceSkipUpperBottom(clipY.upperBottom);
|
|
|
|
|
const auto upperMax = TriangleUpperHeight - clipY.upperTop;
|
|
|
|
|
for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch) {
|
|
|
|
|
const auto width = Width - XStep * i;
|
|
|
|
|
const auto skip = Width - width < clipRight ? clipRight - (Width - width) : 0;
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, Transparent>(dst, src, width > skip ? width - skip : 0, tbl);
|
|
|
|
|
src += width;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, bool Transparent>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTriangle(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
|
|
|
|
|
{
|
|
|
|
|
if (clip.width == Width) {
|
|
|
|
|
if (clip.height == TriangleHeight) {
|
|
|
|
|
RenderRightTriangleFull<Light, Transparent>(dst, dstPitch, src, tbl);
|
|
|
|
|
} else {
|
|
|
|
|
RenderRightTriangleClipVertical<Light, Transparent>(dst, dstPitch, src, tbl, clip);
|
|
|
|
|
}
|
|
|
|
|
} else if (clip.right == 0) {
|
|
|
|
|
RenderRightTriangleClipLeftAndVertical<Light, Transparent>(dst, dstPitch, src, tbl, clip);
|
|
|
|
|
} else {
|
|
|
|
|
RenderRightTriangleClipRightAndVertical<Light, Transparent>(dst, dstPitch, src, tbl, clip);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, MaskType Mask>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTrapezoidUpperHalf(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
if constexpr (Mask == MaskType::Left || Mask == MaskType::Right) {
|
|
|
|
|
// The first line is always fully opaque.
|
|
|
|
|
// We handle it specially to avoid calling the blitter with width=0.
|
|
|
|
|
const uint8_t *srcEnd = src + Width * TrapezoidUpperHeight;
|
|
|
|
|
RenderLineOpaque<Light>(dst, src, Width, tbl);
|
|
|
|
|
src += Width;
|
|
|
|
|
dst -= dstPitch;
|
|
|
|
|
uint8_t prefixWidth = (PrefixIncrement<Mask> < 0 ? 32 : 0) + PrefixIncrement<Mask>;
|
|
|
|
|
do {
|
|
|
|
|
RenderLineTransparentAndOpaque<Light, /*OpaqueFirst=*/Mask == MaskType::Right>(dst, src, prefixWidth, Width, tbl);
|
|
|
|
|
prefixWidth += PrefixIncrement<Mask>;
|
|
|
|
|
src += Width;
|
|
|
|
|
dst -= dstPitch;
|
|
|
|
|
} while (src != srcEnd);
|
|
|
|
|
} else { // Mask == MaskType::Solid || Mask == MaskType::Transparent
|
|
|
|
|
const uint8_t *srcEnd = src + Width * TrapezoidUpperHeight;
|
|
|
|
|
do {
|
|
|
|
|
RenderLineTransparentOrOpaque<Light, /*Transparent=*/Mask == MaskType::Transparent>(dst, src, Width, tbl);
|
|
|
|
|
src += Width;
|
|
|
|
|
dst -= dstPitch;
|
|
|
|
|
} while (src != srcEnd);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, MaskType Mask>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTrapezoidUpperHalfClipVertical(const Clip &clip, const DiamondClipY &clipY, uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
const auto upperMax = TrapezoidUpperHeight - clipY.upperTop;
|
|
|
|
|
int8_t prefix = InitPrefix<Mask>(clip.bottom);
|
|
|
|
|
for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch) {
|
|
|
|
|
RenderLine<Light, Mask>(dst, src, Width, tbl, prefix);
|
|
|
|
|
src += Width;
|
|
|
|
|
prefix += PrefixIncrement<Mask>;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, MaskType Mask>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTrapezoidUpperHalfClipLeftAndVertical(const Clip &clip, const DiamondClipY &clipY, uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
const auto upperMax = TrapezoidUpperHeight - clipY.upperTop;
|
|
|
|
|
int8_t prefix = InitPrefix<Mask>(clip.bottom);
|
|
|
|
|
for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch) {
|
|
|
|
|
RenderLine<Light, Mask>(dst, src, clip.width, tbl, prefix - clip.left);
|
|
|
|
|
src += Width;
|
|
|
|
|
prefix += PrefixIncrement<Mask>;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, MaskType Mask>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTrapezoidUpperHalfClipRightAndVertical(const Clip &clip, const DiamondClipY &clipY, uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
const auto upperMax = TrapezoidUpperHeight - clipY.upperTop;
|
|
|
|
|
int8_t prefix = InitPrefix<Mask>(clip.bottom);
|
|
|
|
|
for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch) {
|
|
|
|
|
RenderLine<Light, Mask>(dst, src, clip.width, tbl, prefix);
|
|
|
|
|
src += Width;
|
|
|
|
|
prefix += PrefixIncrement<Mask>;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, MaskType Mask>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidFull(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
RenderLeftTriangleLower<Light, /*Transparent=*/Mask == MaskType::Transparent>(dst, dstPitch, src, tbl);
|
|
|
|
|
dst += XStep;
|
|
|
|
|
RenderTrapezoidUpperHalf<Light, Mask>(dst, dstPitch, src, tbl);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, MaskType Mask>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidClipVertical(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
|
|
|
|
|
{
|
|
|
|
|
const DiamondClipY clipY = CalculateDiamondClipY<TrapezoidUpperHeight>(clip);
|
|
|
|
|
RenderLeftTriangleLowerClipVertical<Light, /*Transparent=*/Mask == MaskType::Transparent>(clipY, dst, dstPitch, src, tbl);
|
|
|
|
|
src += clipY.upperBottom * Width;
|
|
|
|
|
dst += XStep;
|
|
|
|
|
RenderTrapezoidUpperHalfClipVertical<Light, Mask>(clip, clipY, dst, dstPitch, src, tbl);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, MaskType Mask>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidClipLeftAndVertical(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
|
|
|
|
|
{
|
|
|
|
|
const DiamondClipY clipY = CalculateDiamondClipY<TrapezoidUpperHeight>(clip);
|
|
|
|
|
RenderLeftTriangleLowerClipLeftAndVertical<Light, /*Transparent=*/Mask == MaskType::Transparent>(clip.left, clipY, dst, dstPitch, src, tbl);
|
|
|
|
|
src += clipY.upperBottom * Width + clip.left;
|
|
|
|
|
dst += XStep + clip.left;
|
|
|
|
|
RenderTrapezoidUpperHalfClipLeftAndVertical<Light, Mask>(clip, clipY, dst, dstPitch, src, tbl);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, MaskType Mask>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidClipRightAndVertical(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
|
|
|
|
|
{
|
|
|
|
|
const DiamondClipY clipY = CalculateDiamondClipY<TrapezoidUpperHeight>(clip);
|
|
|
|
|
RenderLeftTriangleLowerClipRightAndVertical<Light, /*Transparent=*/Mask == MaskType::Transparent>(clip.right, clipY, dst, dstPitch, src, tbl);
|
|
|
|
|
src += clipY.upperBottom * Width;
|
|
|
|
|
dst += XStep;
|
|
|
|
|
RenderTrapezoidUpperHalfClipRightAndVertical<Light, Mask>(clip, clipY, dst, dstPitch, src, tbl);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, MaskType Mask>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTrapezoid(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
|
|
|
|
|
{
|
|
|
|
|
if (clip.width == Width) {
|
|
|
|
|
if (clip.height == Height) {
|
|
|
|
|
RenderLeftTrapezoidFull<Light, Mask>(dst, dstPitch, src, tbl);
|
|
|
|
|
} else {
|
|
|
|
|
RenderLeftTrapezoidClipVertical<Light, Mask>(dst, dstPitch, src, tbl, clip);
|
|
|
|
|
}
|
|
|
|
|
} else if (clip.right == 0) {
|
|
|
|
|
RenderLeftTrapezoidClipLeftAndVertical<Light, Mask>(dst, dstPitch, src, tbl, clip);
|
|
|
|
|
} else {
|
|
|
|
|
RenderLeftTrapezoidClipRightAndVertical<Light, Mask>(dst, dstPitch, src, tbl, clip);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, MaskType Mask>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTrapezoidFull(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl)
|
|
|
|
|
{
|
|
|
|
|
RenderRightTriangleLower<Light, /*Transparent=*/Mask == MaskType::Transparent>(dst, dstPitch, src, tbl);
|
|
|
|
|
RenderTrapezoidUpperHalf<Light, Mask>(dst, dstPitch, src, tbl);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, MaskType Mask>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTrapezoidClipVertical(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
|
|
|
|
|
{
|
|
|
|
|
const DiamondClipY clipY = CalculateDiamondClipY<TrapezoidUpperHeight>(clip);
|
|
|
|
|
RenderRightTriangleLowerClipVertical<Light, /*Transparent=*/Mask == MaskType::Transparent>(clipY, dst, dstPitch, src, tbl);
|
|
|
|
|
src += clipY.upperBottom * Width;
|
|
|
|
|
RenderTrapezoidUpperHalfClipVertical<Light, Mask>(clip, clipY, dst, dstPitch, src, tbl);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, MaskType Mask>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTrapezoidClipLeftAndVertical(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
|
|
|
|
|
{
|
|
|
|
|
const DiamondClipY clipY = CalculateDiamondClipY<TrapezoidUpperHeight>(clip);
|
|
|
|
|
RenderRightTriangleLowerClipLeftAndVertical<Light, /*Transparent=*/Mask == MaskType::Transparent>(clip.left, clipY, dst, dstPitch, src, tbl);
|
|
|
|
|
src += clipY.upperBottom * Width + clip.left;
|
|
|
|
|
RenderTrapezoidUpperHalfClipLeftAndVertical<Light, Mask>(clip, clipY, dst, dstPitch, src, tbl);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, MaskType Mask>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTrapezoidClipRightAndVertical(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
|
|
|
|
|
{
|
|
|
|
|
const DiamondClipY clipY = CalculateDiamondClipY<TrapezoidUpperHeight>(clip);
|
|
|
|
|
RenderRightTriangleLowerClipRightAndVertical<Light, /*Transparent=*/Mask == MaskType::Transparent>(clip.right, clipY, dst, dstPitch, src, tbl);
|
|
|
|
|
src += clipY.upperBottom * Width;
|
|
|
|
|
RenderTrapezoidUpperHalfClipRightAndVertical<Light, Mask>(clip, clipY, dst, dstPitch, src, tbl);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, MaskType Mask>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTrapezoid(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
|
|
|
|
|
{
|
|
|
|
|
if (clip.width == Width) {
|
|
|
|
|
if (clip.height == Height) {
|
|
|
|
|
RenderRightTrapezoidFull<Light, Mask>(dst, dstPitch, src, tbl);
|
|
|
|
|
} else {
|
|
|
|
|
RenderRightTrapezoidClipVertical<Light, Mask>(dst, dstPitch, src, tbl, clip);
|
|
|
|
|
}
|
|
|
|
|
} else if (clip.right == 0) {
|
|
|
|
|
RenderRightTrapezoidClipLeftAndVertical<Light, Mask>(dst, dstPitch, src, tbl, clip);
|
|
|
|
|
} else {
|
|
|
|
|
RenderRightTrapezoidClipRightAndVertical<Light, Mask>(dst, dstPitch, src, tbl, clip);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, bool Transparent>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTileType(TileType tile, uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
|
|
|
|
|
{
|
|
|
|
|
switch (tile) {
|
|
|
|
|
case TileType::Square:
|
|
|
|
|
RenderSquare<Light, Transparent>(dst, dstPitch, src, tbl, clip);
|
|
|
|
|
break;
|
|
|
|
|
case TileType::TransparentSquare:
|
|
|
|
|
RenderTransparentSquare<Light, Transparent ? MaskType::Transparent : MaskType::Solid>(dst, dstPitch, src, tbl, clip);
|
|
|
|
|
break;
|
|
|
|
|
case TileType::LeftTriangle:
|
|
|
|
|
RenderLeftTriangle<Light, Transparent>(dst, dstPitch, src, tbl, clip);
|
|
|
|
|
break;
|
|
|
|
|
case TileType::RightTriangle:
|
|
|
|
|
RenderRightTriangle<Light, Transparent>(dst, dstPitch, src, tbl, clip);
|
|
|
|
|
break;
|
|
|
|
|
case TileType::LeftTrapezoid:
|
|
|
|
|
RenderLeftTrapezoid<Light, Transparent ? MaskType::Transparent : MaskType::Solid>(dst, dstPitch, src, tbl, clip);
|
|
|
|
|
break;
|
|
|
|
|
case TileType::RightTrapezoid:
|
|
|
|
|
RenderRightTrapezoid<Light, Transparent ? MaskType::Transparent : MaskType::Solid>(dst, dstPitch, src, tbl, clip);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, MaskType Mask>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidOrTransparentSquare(TileType tile, uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
|
|
|
|
|
{
|
|
|
|
|
switch (tile) {
|
|
|
|
|
case TileType::TransparentSquare:
|
|
|
|
|
RenderTransparentSquare<Light, Mask>(dst, dstPitch, src, tbl, clip);
|
|
|
|
|
break;
|
|
|
|
|
case TileType::LeftTrapezoid:
|
|
|
|
|
RenderLeftTrapezoid<Light, Mask>(dst, dstPitch, src, tbl, clip);
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
app_fatal("Given mask can only be applied to TransparentSquare or LeftTrapezoid tiles");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <LightType Light, MaskType Mask>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTrapezoidOrTransparentSquare(TileType tile, uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
|
|
|
|
|
{
|
|
|
|
|
switch (tile) {
|
|
|
|
|
case TileType::TransparentSquare:
|
|
|
|
|
RenderTransparentSquare<Light, Mask>(dst, dstPitch, src, tbl, clip);
|
|
|
|
|
break;
|
|
|
|
|
case TileType::RightTrapezoid:
|
|
|
|
|
RenderRightTrapezoid<Light, Mask>(dst, dstPitch, src, tbl, clip);
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
app_fatal("Given mask can only be applied to TransparentSquare or LeftTrapezoid tiles");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <MaskType Mask>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderLeftTrapezoidOrTransparentSquareDispatch(TileType tile, uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
|
|
|
|
|
{
|
|
|
|
|
if (IsFullyDark(tbl)) {
|
|
|
|
|
RenderLeftTrapezoidOrTransparentSquare<LightType::FullyDark, Mask>(tile, dst, dstPitch, src, tbl, clip);
|
|
|
|
|
} else if (IsFullyLit(tbl)) {
|
|
|
|
|
RenderLeftTrapezoidOrTransparentSquare<LightType::FullyLit, Mask>(tile, dst, dstPitch, src, tbl, clip);
|
|
|
|
|
} else {
|
|
|
|
|
RenderLeftTrapezoidOrTransparentSquare<LightType::PartiallyLit, Mask>(tile, dst, dstPitch, src, tbl, clip);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <MaskType Mask>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTrapezoidOrTransparentSquareDispatch(TileType tile, uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
|
|
|
|
|
{
|
|
|
|
|
if (IsFullyDark(tbl)) {
|
|
|
|
|
RenderRightTrapezoidOrTransparentSquare<LightType::FullyDark, Mask>(tile, dst, dstPitch, src, tbl, clip);
|
|
|
|
|
} else if (IsFullyLit(tbl)) {
|
|
|
|
|
RenderRightTrapezoidOrTransparentSquare<LightType::FullyLit, Mask>(tile, dst, dstPitch, src, tbl, clip);
|
|
|
|
|
} else {
|
|
|
|
|
RenderRightTrapezoidOrTransparentSquare<LightType::PartiallyLit, Mask>(tile, dst, dstPitch, src, tbl, clip);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <bool Transparent>
|
|
|
|
|
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTileDispatch(TileType tile, uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
|
|
|
|
|
{
|
|
|
|
|
if (IsFullyDark(tbl)) {
|
|
|
|
|
RenderTileType<LightType::FullyDark, Transparent>(tile, dst, dstPitch, src, tbl, clip);
|
|
|
|
|
} else if (IsFullyLit(tbl)) {
|
|
|
|
|
RenderTileType<LightType::FullyLit, Transparent>(tile, dst, dstPitch, src, tbl, clip);
|
|
|
|
|
} else {
|
|
|
|
|
RenderTileType<LightType::PartiallyLit, Transparent>(tile, dst, dstPitch, src, tbl, clip);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
|
|
#ifdef DUN_RENDER_STATS
|
|
|
|
|
ankerl::unordered_dense::map<DunRenderType, size_t, DunRenderTypeHash> DunRenderStats;
|
|
|
|
|
|
|
|
|
|
std::string_view TileTypeToString(TileType tileType)
|
|
|
|
|
{
|
|
|
|
|
// clang-format off
|
|
|
|
|
switch (tileType) {
|
|
|
|
|
case TileType::Square: return "Square";
|
|
|
|
|
case TileType::TransparentSquare: return "TransparentSquare";
|
|
|
|
|
case TileType::LeftTriangle: return "LeftTriangle";
|
|
|
|
|
case TileType::RightTriangle: return "RightTriangle";
|
|
|
|
|
case TileType::LeftTrapezoid: return "LeftTrapezoid";
|
|
|
|
|
case TileType::RightTrapezoid: return "RightTrapezoid";
|
|
|
|
|
default: return "???";
|
|
|
|
|
}
|
|
|
|
|
// clang-format on
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::string_view MaskTypeToString(MaskType maskType)
|
|
|
|
|
{
|
|
|
|
|
// clang-format off
|
|
|
|
|
switch (maskType) {
|
|
|
|
|
case MaskType::Solid: return "Solid";
|
|
|
|
|
case MaskType::Transparent: return "Transparent";
|
|
|
|
|
case MaskType::Right: return "Right";
|
|
|
|
|
case MaskType::Left: return "Left";
|
|
|
|
|
case MaskType::RightFoliage: return "RightFoliage";
|
|
|
|
|
case MaskType::LeftFoliage: return "LeftFoliage";
|
|
|
|
|
default: return "???";
|
|
|
|
|
}
|
|
|
|
|
// clang-format on
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
Simplify foliage rendering
During `ReencodeDungeonCels`, extracts floor tile foliage into
a triangle with the floor frame and a separate 16-px tall `TransparentSquare`.
This means that the floor frames are now always triangles and
the foliage can be rendered directly without masking.
Dungeon graphics sizes:
Map | Frames | Foliage frames | Byte size | Before PR | After PR
-----|--------|---------------:|-----------:|----------:|----------:
Town | 3,803 | 41 | 2,317,832 | 2,242,056 | 2,242,190
L1 | 1,119 | 11 | 738,836 | 721,604 | 721,110
L4 | 1,091 | 6 | 603,140 | 584,500 | 584,242
RG99 binary size reduced by ~4 KiB: 2,426,380 bytes -> 2,421,388 bytes
2 years ago
|
|
|
DVL_ATTRIBUTE_HOT void RenderTileFrame(const Surface &out, const Point &position, TileType tile, const uint8_t *src, int_fast16_t height,
|
|
|
|
|
MaskType maskType, const uint8_t *tbl)
|
|
|
|
|
{
|
|
|
|
|
#ifdef DEBUG_RENDER_OFFSET_X
|
|
|
|
|
position.x += DEBUG_RENDER_OFFSET_X;
|
|
|
|
|
#endif
|
|
|
|
|
#ifdef DEBUG_RENDER_OFFSET_Y
|
|
|
|
|
position.y += DEBUG_RENDER_OFFSET_Y;
|
|
|
|
|
#endif
|
Simplify foliage rendering
During `ReencodeDungeonCels`, extracts floor tile foliage into
a triangle with the floor frame and a separate 16-px tall `TransparentSquare`.
This means that the floor frames are now always triangles and
the foliage can be rendered directly without masking.
Dungeon graphics sizes:
Map | Frames | Foliage frames | Byte size | Before PR | After PR
-----|--------|---------------:|-----------:|----------:|----------:
Town | 3,803 | 41 | 2,317,832 | 2,242,056 | 2,242,190
L1 | 1,119 | 11 | 738,836 | 721,604 | 721,110
L4 | 1,091 | 6 | 603,140 | 584,500 | 584,242
RG99 binary size reduced by ~4 KiB: 2,426,380 bytes -> 2,421,388 bytes
2 years ago
|
|
|
const Clip clip = CalculateClip(position.x, position.y, DunFrameWidth, height, out);
|
|
|
|
|
if (clip.width <= 0 || clip.height <= 0) return;
|
|
|
|
|
|
|
|
|
|
uint8_t *dst = out.at(static_cast<int>(position.x + clip.left), static_cast<int>(position.y - clip.bottom));
|
|
|
|
|
const uint16_t dstPitch = out.pitch();
|
|
|
|
|
|
|
|
|
|
#ifdef DUN_RENDER_STATS
|
|
|
|
|
++DunRenderStats[DunRenderType { tile, maskType }];
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
switch (maskType) {
|
|
|
|
|
case MaskType::Solid:
|
|
|
|
|
RenderTileDispatch</*Transparent=*/false>(tile, dst, dstPitch, src, tbl, clip);
|
|
|
|
|
break;
|
|
|
|
|
case MaskType::Transparent:
|
|
|
|
|
RenderTileDispatch</*Transparent=*/true>(tile, dst, dstPitch, src, tbl, clip);
|
|
|
|
|
break;
|
|
|
|
|
case MaskType::Left:
|
|
|
|
|
RenderLeftTrapezoidOrTransparentSquareDispatch<MaskType::Left>(tile, dst, dstPitch, src, tbl, clip);
|
|
|
|
|
break;
|
|
|
|
|
case MaskType::Right:
|
|
|
|
|
RenderRightTrapezoidOrTransparentSquareDispatch<MaskType::Right>(tile, dst, dstPitch, src, tbl, clip);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef DEBUG_STR
|
|
|
|
|
const auto [debugStr, flags] = GetTileDebugStr(tile);
|
|
|
|
|
DrawString(out, debugStr, Rectangle { Point { position.x + 2, position.y - 29 }, Size { 28, 28 } }, { .flags = flags });
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void world_draw_black_tile(const Surface &out, int sx, int sy)
|
|
|
|
|
{
|
|
|
|
|
#ifdef DEBUG_RENDER_OFFSET_X
|
|
|
|
|
sx += DEBUG_RENDER_OFFSET_X;
|
|
|
|
|
#endif
|
|
|
|
|
#ifdef DEBUG_RENDER_OFFSET_Y
|
|
|
|
|
sy += DEBUG_RENDER_OFFSET_Y;
|
|
|
|
|
#endif
|
|
|
|
|
const Clip clipLeft = CalculateClip(sx, sy, Width, TriangleHeight, out);
|
|
|
|
|
if (clipLeft.height <= 0) return;
|
|
|
|
|
Clip clipRight;
|
|
|
|
|
clipRight.top = clipLeft.top;
|
|
|
|
|
clipRight.bottom = clipLeft.bottom;
|
|
|
|
|
clipRight.left = (sx + Width) < 0 ? -(sx + Width) : 0;
|
|
|
|
|
clipRight.right = sx + Width + Width > out.w() ? sx + Width + Width - out.w() : 0;
|
|
|
|
|
clipRight.width = Width - clipRight.left - clipRight.right;
|
|
|
|
|
clipRight.height = clipLeft.height;
|
|
|
|
|
|
|
|
|
|
const uint16_t dstPitch = out.pitch();
|
|
|
|
|
if (clipLeft.width > 0) {
|
|
|
|
|
uint8_t *dst = out.at(static_cast<int>(sx + clipLeft.left), static_cast<int>(sy - clipLeft.bottom));
|
|
|
|
|
RenderLeftTriangle<LightType::FullyDark, /*Transparent=*/false>(dst, dstPitch, nullptr, nullptr, clipLeft);
|
|
|
|
|
}
|
|
|
|
|
if (clipRight.width > 0) {
|
|
|
|
|
uint8_t *dst = out.at(static_cast<int>(sx + Width + clipRight.left), static_cast<int>(sy - clipRight.bottom));
|
|
|
|
|
RenderRightTriangle<LightType::FullyDark, /*Transparent=*/false>(dst, dstPitch, nullptr, nullptr, clipRight);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} // namespace devilution
|