From 78ae277d287c449a8ee5c0311dfe7c6ed29d8f8b Mon Sep 17 00:00:00 2001 From: krcroft Date: Sat, 28 Mar 2020 13:12:31 -0700 Subject: [PATCH] Expand use and support for LTO and FDO builds Adds LTO to the CI build for Linux, which bring it as close as possible to the planned formal release, which will additionally use FDO. Adds some helper scripts to work with FDO files. Improves the build notes for how to create and use FDO files. --- .github/scripts/build-autofdo.sh | 64 +++++ .github/scripts/fetch-and-merge-afdo.sh | 30 +++ .github/scripts/fetch-and-merge-profraw.sh | 30 +++ .github/workflows/linux.yml | 10 +- .gitignore | 7 + scripts/automator/build/clang-defaults | 9 +- scripts/automator/build/gcc-defaults | 10 +- scripts/automator/main.sh | 5 +- scripts/build.md | 299 +++++++++++++++++---- 9 files changed, 393 insertions(+), 71 deletions(-) create mode 100755 .github/scripts/build-autofdo.sh create mode 100755 .github/scripts/fetch-and-merge-afdo.sh create mode 100755 .github/scripts/fetch-and-merge-profraw.sh diff --git a/.github/scripts/build-autofdo.sh b/.github/scripts/build-autofdo.sh new file mode 100755 index 00000000..e6dd9a15 --- /dev/null +++ b/.github/scripts/build-autofdo.sh @@ -0,0 +1,64 @@ +#!/bin/bash + +# Copyright (c) 2020 Kevin R Croft +# SPDX-License-Identifier: GPL-2.0-or-later + +# Builds and installs the AutoFDO package from +# https://github.com/google/autofdo +# +# AutoFDO is used to convert sample profiling data (collected using perf +# or ocperf.py from a Linux kernel built with with last branch +# record (LBR) tracing support running on an Intel CPU with LBR +# support) to GCC-specific AutoFDO records or LLVM's raw profile +# records. +# +# Pre-requisites: autoconf automake git libelf-dev libssl-dev pkg-config +# If building for LLVM, clang and llvm-dev are needed. +# +# Usage: build-autofdo.sh [LLVM version] +# Examples: ./build-autofdo.sh +# ./build-autofdo.sh 10 +# +# Where the optional [LLVM version] allows building with LLVM support +# for the provided LLVM version. + +set -euo pipefail + +rootdir="$(pwd)" +prefix="$rootdir/afdo" + +# Clone the repo +if [[ ! -d autofdo ]]; then + git clone --depth 1 --recursive https://github.com/google/autofdo.git +fi + +# Enter and sync the repo (if it already exists) +pushd autofdo +git pull + +# Initialize auto-tools +aclocal -I . +autoheader +autoconf +automake --add-missing -c + +# Configure with the specified LLVM version if provided +if [[ -n "${1:-}" ]]; then + ver="$1" + withllvm="--with-llvm=$(command -v llvm-config-"$ver")" +fi + +# Configure +flags="-Os -DNDEBUG -pipe" +./configure CFLAGS="$flags" CXXFLAGS="$flags" --prefix="$prefix" "${withllvm:-}" + +# Build and install +# Note: make cannot be run in parallel because the sub-projects' +# need to be configured serially with respect to eachother +make +make install +popd + +# Strip the binaries +cd "$prefix/bin" +strip ./* diff --git a/.github/scripts/fetch-and-merge-afdo.sh b/.github/scripts/fetch-and-merge-afdo.sh new file mode 100755 index 00000000..c8ac8ae4 --- /dev/null +++ b/.github/scripts/fetch-and-merge-afdo.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +# Copyright (c) 2019-2020 Kevin R Croft +# SPDX-License-Identifier: GPL-2.0-or-later + +# A helper script that fetches, converts, and merges kernel sample +# (.prof) files (collected during prior DOSBox testing) into a single +# GCC-compatible AutoFDO record that can be used to optimize builds. + +# Depedencies: +# - zstd +# - autofdo + +set -euo pipefail + +# Tarball containing profile records +PROFILES="https://gitlab.com/luxtorpeda/dosbox-tests/-/raw/master/archives/profiles.tar.zst" +BINARY="tests/dosbox" + +# Move to our repo root +cd "$(git rev-parse --show-toplevel)" + +# Fetch and unpack the profiles +wget "${PROFILES}" -O - | zstd -d | tar -x + +# Convert and merge the profiles +find . -name '*.prof' -print0 \ + | xargs -0 -P "$(nproc)" -I {} \ + create_gcov --binary="${BINARY}" --profile="{}" -gcov="{}".afdo -gcov_version=1 +profile_merger -gcov_version=1 -output_file=current.afdo tests/*/*.afdo diff --git a/.github/scripts/fetch-and-merge-profraw.sh b/.github/scripts/fetch-and-merge-profraw.sh new file mode 100755 index 00000000..7a1de773 --- /dev/null +++ b/.github/scripts/fetch-and-merge-profraw.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +# Copyright (c) 2020 Kevin R Croft +# SPDX-License-Identifier: GPL-2.0-or-later + +# A helper script that fetches, converts, and merges kernel sample +# (.prof) files (collected during prior DOSBox testing) into a single +# LLVM-compatible Raw Profile record that can be used to optimize builds. + +# Depedencies: +# - zstd +# - autofdo + +set -euo pipefail + +# The tarball containing one or more profile records +PROFILES="https://gitlab.com/luxtorpeda/dosbox-tests/-/raw/master/archives/profiles.tar.zst" +BINARY="tests/dosbox" + +# Move to our repo root +cd "$(git rev-parse --show-toplevel)" + +# Fetch and unpack the profiles +wget "${PROFILES}" -O - | zstd -d | tar -x + +# Convert and merge the profiles +find . -name '*.prof' -print0 \ + | xargs -0 -P "$(nproc)" -I {} \ + create_llvm_prof --binary="${BINARY}" --profile="{}" --out="{}".profraw +llvm-profdata-9 merge -sample -output=current.profraw tests/*/*.profraw diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index c1a108cf..fac45a69 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -96,11 +96,17 @@ jobs: echo ::set-env name=VERSION::$VERSION - name: Build env: - FLAGS: -O3 -DNDEBUG -pipe + AR: gcc-ar + CC: ccache gcc + CXX: ccache g++ + LD: gcc + RANLIB: gcc-ranlib + FLAGS: -O3 -flto -ffunction-sections -fdata-sections -DNDEBUG -pipe + LINKFLAGS: -Wl,--as-needed run: | set -x ./autogen.sh - ./configure CC="ccache gcc" CXX="ccache g++" CFLAGS="$FLAGS" CXXFLAGS="$FLAGS" --disable-screenshots + ./configure CFLAGS="$FLAGS" CXXFLAGS="$FLAGS" LDFLAGS="$FLAGS $LINKFLAGS -flto=$(nproc)" --disable-screenshots make -j "$(nproc)" strip src/dosbox - name: Package diff --git a/.gitignore b/.gitignore index a92d820e..ed530663 100644 --- a/.gitignore +++ b/.gitignore @@ -77,3 +77,10 @@ suppress_base.json # macOS directory prefs .DS_Store + +# Profiling data +*.prof +*.profraw +*.profraw.imports +*.afdo +*.afdo.imports diff --git a/scripts/automator/build/clang-defaults b/scripts/automator/build/clang-defaults index 1356f071..c9c37cc4 100644 --- a/scripts/automator/build/clang-defaults +++ b/scripts/automator/build/clang-defaults @@ -3,10 +3,10 @@ cc="${ccache}clang${postfix}" cxx="${ccache}clang++${postfix}" # Flag additions -TYPES+=(debug warnmore profile) -cflags_release=("${cflags[@]}" -Os) +TYPES+=(debug warnmore pgotrain) +cflags_release=("${cflags[@]}" -O3) cflags_debug=("${cflags[@]}" -g -fno-omit-frame-pointer) -cflags_profile=("${cflags_debug[@]}" -fprofile-instr-generate -fcoverage-mapping) +cflags_pgotrain=("${cflags_debug[@]}" -fprofile-instr-generate -fcoverage-mapping) cflags_warnmore=("${cflags_debug[@]}" -Wextra -Wshadow -Wcast-align -Wunused -Woverloaded-virtual -Wpedantic -Wconversion -Wsign-conversion -Wdouble-promotion -Wformat=2) @@ -19,5 +19,4 @@ fi # Modifier additions MODIFIERS=(fdo) -ldflags_fdo=(-fprofile-instr-generate) -cflags_fdo=("-fprofile-sample-use=${FDO_FILE:-}") +cflags_fdo=("-fprofile-sample-use=${FDO_FILE:-${repo_root}/current.afdo}") diff --git a/scripts/automator/build/gcc-defaults b/scripts/automator/build/gcc-defaults index 3a491b65..8422ad35 100644 --- a/scripts/automator/build/gcc-defaults +++ b/scripts/automator/build/gcc-defaults @@ -6,18 +6,20 @@ ld="gcc${postfix}" ranlib="gcc-ranlib${postfix}" # Flag additions -TYPES+=(debug warnmore profile) +TYPES+=(debug warnmore pgotrain fdotrain) cflags+=(-fstack-protector -fdiagnostics-color=auto) cflags_debug=("${cflags[@]}" -g -fno-omit-frame-pointer) -cflags_release=("${cflags[@]}" -Ofast -ffunction-sections -fdata-sections) -cflags_profile=("${cflags_debug[@]}" -pg) +cflags_release=("${cflags[@]}" -DNDEBUG -O3 -ffunction-sections -fdata-sections) +cflags_pgotrain=("${cflags_debug[@]}" -pg) cflags_warnmore=("${cflags_debug[@]}" -pedantic -Wcast-align -Wdouble-promotion -Wduplicated-branches -Wduplicated-cond -Wextra -Wformat=2 -Wlogical-op -Wmisleading-indentation -Wnull-dereference -Wshadow -Wunused) cxxonly_warnmore=(-Weffc++ -Wnon-virtual-dtor -Woverloaded-virtual -Wuseless-cast) +cflags_fdotrain=("${cflags[@]}" -DNDEBUG -g1 -fno-omit-frame-pointer) # Modifier additions MODIFIERS=(fdo) -cflags_fdo=("-fauto-profile=${FDO_FILE:-}") +# Override the prior optimization flag because O2 does better w/ feedback +cflags_fdo=("-O2 -fauto-profile=${FDO_FILE:-${repo_root}/current.afdo}") diff --git a/scripts/automator/main.sh b/scripts/automator/main.sh index 95150cc2..4087e9b6 100644 --- a/scripts/automator/main.sh +++ b/scripts/automator/main.sh @@ -109,7 +109,10 @@ function perform_steps() { } function main() { - cd "$(dirname "$0")/automator" + cd "$(dirname "$0")"/.. + export repo_root + repo_root="$PWD" + cd scripts/automator if [[ -z "${data_dir:-}" ]]; then data_dir="$(basename "$0" '.sh')"; fi parse_args "$@" construct_environment diff --git a/scripts/build.md b/scripts/build.md index 1743b66f..b05a4838 100644 --- a/scripts/build.md +++ b/scripts/build.md @@ -1,14 +1,11 @@ # DOSBox Build Script -A script that builds DOSBox with your choice of compiler, release types, and -additional optimization options on MacOS, Linux, and Windows. +This script builds DOSBox with your choice of compiler, release type, and +additional options. It runs on MacOS, Linux, and Windows. -If this is the first time you are attempting to build DOSBox, then you need to -first install the development tools and DOSBox's development packages prior to -building. To help in this regard, the **list-build-dependencies.sh** script -prints a list of packages that you can use to install these dependencies. - -Use of both scripts is described below. +If this is your first time building DOSBox, then you will need to install +DOSBox's development tools and dependencies, which is included in the notes +below. ## Requirements @@ -26,6 +23,7 @@ Use of both scripts is described below. 1. Download and install Chocolatey: 1. Open a console and run Cholocatey's command line interface (CLI) to install msys2 and git: + `choco install msys2 git --no-progress` ``` text @@ -50,22 +48,25 @@ Use of both scripts is described below. 1. Clone and enter the repository's directory: 1. `git clone https://github.com/dreamer/dosbox-staging.git` 1. `cd dosbox-staging` - 1. Run all subsequent steps while residing in the repo's directory. + + Be sure to run all subsequent steps below while inside the repo's directory. -1. (🏁 first-time-only) Install the build tools and package dependencies: - `./scripts/list-build-dependencies.sh -p msys2 | xargs pacman -S --noconfirm` +1. (🏁 first-time-only) Install the build tools and runtime dependencies: -1. Launch the build script with default settings: - `./scripts/build/run.sh --bin-path /mingw64/bin` + `./scripts/list-build-dependencies.sh -p msys2 | xargs pacman -S + --noconfirm` -## MacOS Installation and Usage +1. Launch the build script with default settings: + + `./scripts/build/run.sh -c gcc -t release --bin-path /mingw64/bin` -Builds on Mac can be performed with Clang or GCC. +## macOS Installation and Usage -If you only plan on only building with Clang, then follow the Brew -installation steps. If you're interested in building with GCC, then either -Brew or MacPorts will work. Both can be installed without conflicting with -eachother. +Builds on macOS can be performed with Clang or GCC. + +If you only plan on only building with Clang, then follow the Brew installation +steps. If you're interested in building with GCC, then Brew or MacPorts will +work, and both can be installed without conflicting with each other. Before installing either, the Xcode tools need to be installed and the license agreed to: @@ -85,7 +86,8 @@ agreed to: ### MacPorts Installation -1. Build and install MacPorts along with DOSBox dependencies with the following sequence: +1. Build and install MacPorts along with DOSBox dependencies with the following + sequence: ``` shell git clone --quiet --depth=1 https://github.com/macports/macports-base.git @@ -101,59 +103,238 @@ agreed to: ### Build DOSBox (common for all of the above) -1. Clone the repository: `git clone https://github.com/dreamer/dosbox-staging.git` +1. Clone the repository: `git clone + https://github.com/dreamer/dosbox-staging.git` 1. Change directories into the repo: `cd dosbox-staging` 1. Build DOSBox: -- Clang: `./scripts/build.sh --compiler clang --bin-path /usr/local/bin` - - GCC (brew): `./scripts/build.sh --compiler-version 9 --bin-path /usr/local/bin` - - GCC (macports): `./scripts/build.sh --compiler-version mp-9 --bin-path /opt/local/bin` +- Clang: `./scripts/build.sh --compiler clang -t release --bin-path + /usr/local/bin` +- GCC (brew): `./scripts/build.sh --compiler-version 9 -t release --bin-path + /usr/local/bin` +- GCC (macports): `./scripts/build.sh --compiler-version mp-9 -t release + --bin-path /opt/local/bin` ## Linux Installation -1. (🏁 first-time-only) Install dependencies based on your package manager; apt - in this example: - `sudo apt install -y $(./scripts/list-build-dependencies.sh -p apt)` - For other supported package managers, run: - `./scripts/list-build-dependencies.sh --help` - 1. Install git: `sudo apt install -y git` -1. Clone the repository: `git clone https://github.com/dreamer/dosbox-staging.git` +1. Clone the repository: `git clone + https://github.com/dreamer/dosbox-staging.git` 1. Change directories into the repo: `cd dosbox-staging` -1. Build DOSBox: `./scripts/build.sh` +1. (🏁 first-time-only) Install dependencies based on your package manager; apt + in this example: `sudo apt install -y $(./scripts/list-build-dependencies.sh + -p apt)` For other supported package managers, run: + `./scripts/list-build-dependencies.sh --help` +1. Build DOSBox: + +- Clang: `./scripts/build.sh --compiler clang -t release -v 9` +- GCC (default version): `./scripts/build.sh -c gcc -t release` +- GCC (specific version, ie: 9): `./scripts/build.sh -c gcc -v 9 -t release` ## Additional Tips -The compiler, version, and bit-depth can be selected by passing the following common -options to the **list-build-dependencies.sh** and **build.sh** scripts: +### Compiler variations -- `--compiler clang`, to use CLang instead of GCC -- `--compiler-version 8`, to use a specific version of compiler - (if available in your package manager) +The compiler, version, and bit-depth can be selected by passing the following +common options to the **list-build-dependencies.sh** and **build.sh** scripts: + +- `--compiler clang` or `-c clang` to use CLang instead of GCC +- `--compiler-version 8` or `-v ` to specify a particular version of + compiler (if available in your package manager) - `--bit-depth 32`, to build a 32-bit binary instead of 64-bit -After building, your `dosbox` or `dosbox.exe` binary will reside inside `./dosbox-staging/src/`. +### Release types -Build flags you might be interested in: - -- `--lto`, perform optimizations across the entire object space instead of - per-file (Only available on Mac and Linux) -- `--release debug`, to build a binary containing debug symbols +Build release types includes: +- **release**, optimizes the binary and disables some checks, such as + assertions. +- **debug**, adds debug symbols and disables optimizations for ideal debugging. - You can run the resulting binary in the GNU debugger: `gdb /path/to/ - dosbox`, followed by `start mygame.bat` -- `--release profile`, to generate performance statistics - - Instructions are provided after the build completes, which describe how to - generate and process the profiling data -- `--release `, to build a binary that performs dynamic - code-analysis at runtime (Linux and macOS) - - see `./scripts/build.sh --help` for a list of sanitizer-types that are - available - - Run your binary like normal and it will generate output describing - problematic behavior - - Some sanitizers accept runtime options via an environment variables, - such as `ASAN_OPTIONS`, described here: - + dosbox`, followed by `run mygame.bat` +- **pgotrain** adds Profile Guided Optimization (PGO) tracking instrumentation + to the compiled binary. + + This allows the recording of profile statistics that can be used to compile a + PGO-optimized binary. Note that PGO optimization is different from + Automatic Feedback Directed Optimization (AutoFDO) mentioned below. -If you want to run multiple back-to-back builds from the same directory with -different settings then add the `--clean` flag to ensure previous objects and -binaries are removed. + After compiling your PGO binary, the build script presents instructions + describing how to generate and use the profiling data. + +- **warnmore**, displays additional helpful C and C++ warnings for developers. +- **fdotrain**, add tracing symbols used to generate AutoFDO sampling data. +- **$SANITIZER TYPE**, builds a binary intrumented with code to catch issues at + runtime that relate to the type of sanitizer being used. For example: memory + leaks, threading issues, and so on. This is for Linux and macOS only. + + - see `./scripts/build.sh --help` for a list of sanitizer-types that are + available. + - Run your binary like normal and it will generate output describing + problematic behavior + - Some sanitizers accept runtime options via an environment variables, + such as `ASAN_OPTIONS`, described here: + + +### Build Results, Rebuilding, and Cleaning + +After building, your `dosbox` or `dosbox.exe` binary will reside inside +`./dosbox-staging/src/`. + +The build script records the prior build type and will clean if needed between +builds. To manually remove all intermediate obect files and ephemeral +auto-tools outputs, run `make distclean`. + +To additionally remove all files except for the repository files, use `git +clean -fdx`. + +### CCache + +The build script will make use of ccache, which saves compiled objects for +potential re-use in future builds (hence the name, "cache") to speed up build +times. If you performed the one-time installation step above, then you will +already have ccache installed. + +Simply having `ccache` in your path is sufficient to use it; you do not +need to invasively symlink `/usr/bin/gcc` -> `ccache`. + +The build script enables ccache's object compression, which significantly +reduces the size of the cache. It will also display cache statistics after each +build. To see more details, run `ccache -s`. + +To learn more about ccache run `ccache -h`, and read +https://ccache.dev/manual/latest.html + +### Optimization Modifiers + +The following modifier flags can be added when building a **release** type: + +- `-m lto`, optimize the entire object space instead of per-file (Only + available on Mac and Linux) + +- `-m fdo`, performs feedback directed optimizations (FDO) using an AutoFDO + data set. Export the `FDO_FILE` variable with the full path to your merged + FDO dataset. For example: + + - GCC: `export FDO_FILE=/full/path/to/current.afdo` and then build with: + + `./scripts/builds.sh -c gcc -t release -m fdo -m lto` + + - Clang: `export FDO_FILE=/full/path/to/current.profraw`, and then build + with: + + `./scripts/builds.sh -c clang -t release -m fdo -m lto` + + The section below describes how to collect an AutoFDO dataset for GCC and + Clang. + +### Recording Sampling Data used in AutoFDO builds + +Prerequisites: + +- An **Intel processor** that supports the last branch record (LBR) instruction. +- A Linux **kernel** built with Branch Profiling tracers enabled: + + ``` + CONFIG_PM_TRACE=y + CONFIG_TRACE_BRANCH_PROFILING=y + CONFIG_BRANCH_TRACER=y + ``` + + These can be enable directly in your kernel's `.config` file or using `make + menuconfig` via the following menu options: + 1. `Kernel hacking --->` + 1. `[*] Tracers --->` + 1. `Branch Profiling (Trace likely/unlikely profiler)` + 1. `(X) Trace likely/unlikely profiler` + +- The **AutoFDO** software package. It may be available via your package + manager or built from sources (https://github.com/google/autofdo). + + - **Note about compiler versions** the autofdo binaries need to be compiled + with the exact version of the compiler that will later be used to compile + our final optimized version of dosbox-staging. + + So for example, if you install autofdo via package-manager, then it will be + valid for the default version of gcc and clang also installed by your + package manager. Where as if you plan to build with `gcc-`, then + you will need to compile autofdo from sources using `gcc-` by + pointing the `CC` and `CXX` environment variables to the newer gcc + binaries. + + - **Note about clang** If you plan to compile with a version of clang newer + than your package manager's default version, then you will need to compile + autofdo from source and configure it with the coresponding version of + `llvm-config`. For example, if I want to build with clang-10, then I would + configure autofdo with: `./configure --with-llvm=/usr/bin/llvm-config-10`. + + - The included `.github/scripts/build-autofdo.sh` script can be used to build + and install autofdo, for example: + + - default GCC: + + `sudo .github/scripts/build-autofdo.sh` + - newer GCC: + + ``` + export CC=/usr/bin/gcc-9 + export CXX=/usr/bin/g++-9 + sudo .github/scripts/build-autofdo.sh + ``` + + - Clang version 10: + + `sudo .github/scripts/build-autofdo.sh` + +- The **pmu-tools** software package, which can be downloaded from + https://github.com/andikleen/pmu-tools. This is a collection of python + scripts used to assist in capturing sampling data. + +Procedures: + +1. Ensure the custom Linux Kernel supporting LBR tracing is running. + +1. Build `dosbox-staging` from source using the `fdotrain` target: + `./scripts/build.h -c gcc -t fdotrain` + +1. Record kernel sample profiles while running dosbox-staging: + + `/path/to/pmu-tools/ocperf.py record -F max -o "samples-1.prof" -b -e + br_inst_retired.near_taken:pp -- /path/to/fdo-trained/dosbox ARGS` + + Where `samples-1.prof` is the file that will be filled with samples. + + Repeat this for multiple training runs, each time saving the output to a new + `-o samples-N.prof` file. Ideally you want to exercise all code paths in + DOSBox (core types, video cards, video modes, sound cards, and audio + codecs). + +1. Convert your sample profiles into compiler-specific records using tools + provided in the `autofdo` package: + + For GCC, run: + - `create_gcov --binary=/path/to/fdo-trained/dosbox + --profile=samples-1.prof -gcov=samples-1.afdo -gcov_version=1` + + ... for each `.prof` file, creating a coresponding `.afdo` file. + + - At this point, you now have an `.afdo` file for each `.prof` file. Merge + the `.afdo`s into a single `curren.afdo` file with: + + `profile_merger -gcov_version=1 -output_file=current.afdo *.afdo` + + For Clang, run: + + - `create_llvm_prof --binary=/path/to/fdo-trained/dosbox + --profile=samples-1.prof --out=samples-1.profraw` + + ... for each `*.prof` file, creating a coresponding `.profraw` file. + + - At this point, you now have a `.profraw` file for each `.prof` file. Merge + them into a single `current.profraw` file with: + + `llvm-profdata- merge -sample -output=current.profraw *.profraw` + +You can now use your merged `.afdo` or `.profraw` file to build with the `-m +fdo` modifier by placing your `current.afdo/.profraw` file in the repo's root +directory, or point to it using the FDO_FILE environment variable, and launch +the build with `./scripts/build.sh -c -t release -m lto -m fdo`.