CPU profiling with gperf

Documentation and scripts for profiling with gperf
2 years ago · 356d3aaad4
4 changed files with 172 additions and 0 deletions
--- a/docs/benchmarking.md
+++ b/docs/benchmarking.md
@ -0,0 +1,34 @@
+# Benchmarking
+
+On Linux, use the `tools/linux_reduced_cpu_variance_run.sh` wrapper
+to [reduce CPU variance](https://google.github.io/benchmark/reducing_variance.html).
+
+Timedemo:
+
+```bash
+tools/linux_reduced_cpu_variance_run.sh tools/measure_timedemo_performance.py -n 5 --binary build-rel/devilutionx
+```
+
+Individual benchmarks (built when `BUILD_TESTING` is `ON`):
+
+```bash
+tools/build_and_run_benchmark.py clx_render_benchmark
+```
+
+You can pass arguments to the benchmark binary with `--`, e.g.:
+
+```bash
+tools/build_and_run_benchmark.py clx_render_benchmark -- --benchmark_repetitions=5
+```
+
+The `tools/build_and_run_benchmark.py` script basically does something like this:
+
+```bash
+{ [ -d build-reld ] || cmake -S. -Bbuild-reld -DCMAKE_BUILD_TYPE=RelWithDebInfo; } && \
+cmake --build build-reld --target clx_render_benchmark && \
+tools/linux_reduced_cpu_variance_run.sh build-reld/clx_render_benchmark
+```
+
+See `tools/build_and_run_benchmark.py --help` for more information.
+
+You can also [profile](profiling-linux.md) your benchmarks.
--- a/docs/profiling-linux.md
+++ b/docs/profiling-linux.md
@ -18,6 +18,34 @@ You may also want to install debugging symbols for SDL2:
 sudo apt install libsdl2-dev-dbgsym
 ```

+gperftools by default only comes with a basic visualizer.
+[pprof](https://github.com/google/pprof), also from Google, is a more fully-featured profile visualizer
+that provides an interactive web server with a flame graph, source annotation, etc.
+
+To install pprof, run:
+
+```bash
+go install github.com/google/pprof@latest
+```
+
+## CPU profiling with gperftools
+
+```bash
+cmake -S. -Bbuild-gperf -DCMAKE_BUILD_TYPE=RelWithDebInfo -DGPERF=ON -DBUILD_TESTING=ON
+```
+
+Timedemo:
+
+```bash
+tools/build_and_run_benchmark.py --gperf devilutionx -- --diablo --spawn --lang en --demo 0 --timedemo
+```
+
+Individual benchmarks (built when `BUILD_TESTING` is `ON`):
+
+```bash
+tools/build_and_run_benchmark.py --gperf clx_render_benchmark
+```
+
 ## Heap profiling with gperftools

 Heap profiling produces a graph of all heap allocations that are alive between two points
--- a/tools/build_and_run_benchmark.py
+++ b/tools/build_and_run_benchmark.py
@ -0,0 +1,94 @@
+#!/usr/bin/env python
+
+import subprocess
+import argparse
+import os
+import sys
+import pathlib
+import shlex
+import platform
+
+_PROFILE = "/tmp/out.profile"
+
+
+def run(*args: list[str], env: dict[str, str] | None = None):
+    print(
+        "+",
+        *(map(shlex.quote, [f"{k}={v}" for k, v in env.items()] if env else [])),
+        *map(shlex.quote, args),
+        file=sys.stderr,
+    )
+    full_env = None
+    if env:
+        full_env = os.environ.copy()
+        for k, v in env.items():
+            full_env[k] = v
+    subprocess.run(args, stdout=sys.stdout, stderr=sys.stderr, check=True, env=full_env)
+
+
+def nproc():
+    return len(os.sched_getaffinity(0))
+
+
+def maybe_create_build_dir(dir: str, args: list[str]):
+    if os.path.isdir(dir):
+        return
+    print("Creating build directory at ", dir, file=sys.stderr)
+    run("cmake", "-S.", f"-B{dir}", "-DCMAKE_BUILD_TYPE=RelWithDebInfo", *args)
+
+
+def build_target(dir: str, target: str):
+    run("cmake", "--build", dir, "-j", str(nproc()), "--target", target)
+
+
+def run_benchmark(dir: str, target: str, benchmark_args: list[str], gperf: bool):
+    args = []
+    if platform.system() == "Linux":
+        args.append("tools/linux_reduced_cpu_variance_run.sh")
+    env = None
+    if gperf:
+        env: dict[str, str] = {"CPUPROFILE": _PROFILE}
+        if not "CPUPROFILE_FREQUENCY" in env:
+            env["CPUPROFILE_FREQUENCY"] = "1000"
+    run(*args, f"{dir}/{target}", *benchmark_args, env=env)
+
+
+def run_pprof(dir: str, target: str, port: int):
+    run("pprof", f"--http=localhost:{port}", f"{dir}/{target}", _PROFILE)
+
+
+def main():
+    os.chdir(pathlib.Path(__file__).resolve().parent.parent)
+    parser = argparse.ArgumentParser(description="Builds and runs a benchmark")
+    parser.add_argument("-B", "--build", help="build directory")
+    parser.add_argument(
+        "--gperf", action=argparse.BooleanOptionalAction, help="profile with gperftools"
+    )
+    parser.add_argument("--port", type=int, default=1337, help="pprof server port")
+    parser.add_argument("target", help="benchmark target")
+    parser.add_argument(
+        "benchmark_args",
+        nargs="*",
+        help="arguments passed to the benchmark binary",
+    )
+    args = parser.parse_args()
+    build = args.build
+    if not build:
+        build = "build-gperf" if args.gperf else "build-reld"
+    configure_args = []
+    if args.gperf:
+        configure_args.append("-DGPERF=ON")
+    try:
+        maybe_create_build_dir(build, configure_args)
+        build_target(build, args.target)
+        run_benchmark(build, args.target, args.benchmark_args, args.gperf)
+        if args.gperf:
+            run_pprof(build, args.target, args.port)
+    except subprocess.CalledProcessError as e:
+        print("Error:", e.cmd[0], "failed", file=sys.stderr)
+        return e.returncode
+    except KeyboardInterrupt as e:
+        return 1
+
+
+main()
--- a/tools/linux_reduced_cpu_variance_run.sh
+++ b/tools/linux_reduced_cpu_variance_run.sh
@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+# See https://google.github.io/benchmark/reducing_variance.html
+set -x
+
+sudo cpupower frequency-set --governor performance 1>/dev/null
+echo 0 | sudo tee /sys/devices/system/cpu/cpufreq/boost 1>/dev/null
+
+taskset -c 0 "$@"
+{ set +x; } 2> /dev/null
+result=$?
+set -x
+
+sudo cpupower frequency-set --governor ondemand 1>/dev/null
+echo 1 | sudo tee /sys/devices/system/cpu/cpufreq/boost 1>/dev/null
+
+exit "$result"