diff --git a/.github/scripts/build-autofdo.sh b/.github/scripts/build-autofdo.sh new file mode 100755 index 00000000..e6dd9a15 --- /dev/null +++ b/.github/scripts/build-autofdo.sh @@ -0,0 +1,64 @@ +#!/bin/bash + +# Copyright (c) 2020 Kevin R Croft +# SPDX-License-Identifier: GPL-2.0-or-later + +# Builds and installs the AutoFDO package from +# https://github.com/google/autofdo +# +# AutoFDO is used to convert sample profiling data (collected using perf +# or ocperf.py from a Linux kernel built with with last branch +# record (LBR) tracing support running on an Intel CPU with LBR +# support) to GCC-specific AutoFDO records or LLVM's raw profile +# records. +# +# Pre-requisites: autoconf automake git libelf-dev libssl-dev pkg-config +# If building for LLVM, clang and llvm-dev are needed. +# +# Usage: build-autofdo.sh [LLVM version] +# Examples: ./build-autofdo.sh +# ./build-autofdo.sh 10 +# +# Where the optional [LLVM version] allows building with LLVM support +# for the provided LLVM version. + +set -euo pipefail + +rootdir="$(pwd)" +prefix="$rootdir/afdo" + +# Clone the repo +if [[ ! -d autofdo ]]; then + git clone --depth 1 --recursive https://github.com/google/autofdo.git +fi + +# Enter and sync the repo (if it already exists) +pushd autofdo +git pull + +# Initialize auto-tools +aclocal -I . +autoheader +autoconf +automake --add-missing -c + +# Configure with the specified LLVM version if provided +if [[ -n "${1:-}" ]]; then + ver="$1" + withllvm="--with-llvm=$(command -v llvm-config-"$ver")" +fi + +# Configure +flags="-Os -DNDEBUG -pipe" +./configure CFLAGS="$flags" CXXFLAGS="$flags" --prefix="$prefix" "${withllvm:-}" + +# Build and install +# Note: make cannot be run in parallel because the sub-projects' +# need to be configured serially with respect to eachother +make +make install +popd + +# Strip the binaries +cd "$prefix/bin" +strip ./* diff --git a/.github/scripts/fetch-and-merge-afdo.sh b/.github/scripts/fetch-and-merge-afdo.sh new file mode 100755 index 00000000..c8ac8ae4 --- /dev/null +++ b/.github/scripts/fetch-and-merge-afdo.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +# Copyright (c) 2019-2020 Kevin R Croft +# SPDX-License-Identifier: GPL-2.0-or-later + +# A helper script that fetches, converts, and merges kernel sample +# (.prof) files (collected during prior DOSBox testing) into a single +# GCC-compatible AutoFDO record that can be used to optimize builds. + +# Depedencies: +# - zstd +# - autofdo + +set -euo pipefail + +# Tarball containing profile records +PROFILES="https://gitlab.com/luxtorpeda/dosbox-tests/-/raw/master/archives/profiles.tar.zst" +BINARY="tests/dosbox" + +# Move to our repo root +cd "$(git rev-parse --show-toplevel)" + +# Fetch and unpack the profiles +wget "${PROFILES}" -O - | zstd -d | tar -x + +# Convert and merge the profiles +find . -name '*.prof' -print0 \ + | xargs -0 -P "$(nproc)" -I {} \ + create_gcov --binary="${BINARY}" --profile="{}" -gcov="{}".afdo -gcov_version=1 +profile_merger -gcov_version=1 -output_file=current.afdo tests/*/*.afdo diff --git a/.github/scripts/fetch-and-merge-profraw.sh b/.github/scripts/fetch-and-merge-profraw.sh new file mode 100755 index 00000000..7a1de773 --- /dev/null +++ b/.github/scripts/fetch-and-merge-profraw.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +# Copyright (c) 2020 Kevin R Croft +# SPDX-License-Identifier: GPL-2.0-or-later + +# A helper script that fetches, converts, and merges kernel sample +# (.prof) files (collected during prior DOSBox testing) into a single +# LLVM-compatible Raw Profile record that can be used to optimize builds. + +# Depedencies: +# - zstd +# - autofdo + +set -euo pipefail + +# The tarball containing one or more profile records +PROFILES="https://gitlab.com/luxtorpeda/dosbox-tests/-/raw/master/archives/profiles.tar.zst" +BINARY="tests/dosbox" + +# Move to our repo root +cd "$(git rev-parse --show-toplevel)" + +# Fetch and unpack the profiles +wget "${PROFILES}" -O - | zstd -d | tar -x + +# Convert and merge the profiles +find . -name '*.prof' -print0 \ + | xargs -0 -P "$(nproc)" -I {} \ + create_llvm_prof --binary="${BINARY}" --profile="{}" --out="{}".profraw +llvm-profdata-9 merge -sample -output=current.profraw tests/*/*.profraw diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index c1a108cf..fac45a69 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -96,11 +96,17 @@ jobs: echo ::set-env name=VERSION::$VERSION - name: Build env: - FLAGS: -O3 -DNDEBUG -pipe + AR: gcc-ar + CC: ccache gcc + CXX: ccache g++ + LD: gcc + RANLIB: gcc-ranlib + FLAGS: -O3 -flto -ffunction-sections -fdata-sections -DNDEBUG -pipe + LINKFLAGS: -Wl,--as-needed run: | set -x ./autogen.sh - ./configure CC="ccache gcc" CXX="ccache g++" CFLAGS="$FLAGS" CXXFLAGS="$FLAGS" --disable-screenshots + ./configure CFLAGS="$FLAGS" CXXFLAGS="$FLAGS" LDFLAGS="$FLAGS $LINKFLAGS -flto=$(nproc)" --disable-screenshots make -j "$(nproc)" strip src/dosbox - name: Package diff --git a/.gitignore b/.gitignore index a92d820e..ed530663 100644 --- a/.gitignore +++ b/.gitignore @@ -77,3 +77,10 @@ suppress_base.json # macOS directory prefs .DS_Store + +# Profiling data +*.prof +*.profraw +*.profraw.imports +*.afdo +*.afdo.imports diff --git a/scripts/automator/build/clang-defaults b/scripts/automator/build/clang-defaults index 1356f071..c9c37cc4 100644 --- a/scripts/automator/build/clang-defaults +++ b/scripts/automator/build/clang-defaults @@ -3,10 +3,10 @@ cc="${ccache}clang${postfix}" cxx="${ccache}clang++${postfix}" # Flag additions -TYPES+=(debug warnmore profile) -cflags_release=("${cflags[@]}" -Os) +TYPES+=(debug warnmore pgotrain) +cflags_release=("${cflags[@]}" -O3) cflags_debug=("${cflags[@]}" -g -fno-omit-frame-pointer) -cflags_profile=("${cflags_debug[@]}" -fprofile-instr-generate -fcoverage-mapping) +cflags_pgotrain=("${cflags_debug[@]}" -fprofile-instr-generate -fcoverage-mapping) cflags_warnmore=("${cflags_debug[@]}" -Wextra -Wshadow -Wcast-align -Wunused -Woverloaded-virtual -Wpedantic -Wconversion -Wsign-conversion -Wdouble-promotion -Wformat=2) @@ -19,5 +19,4 @@ fi # Modifier additions MODIFIERS=(fdo) -ldflags_fdo=(-fprofile-instr-generate) -cflags_fdo=("-fprofile-sample-use=${FDO_FILE:-}") +cflags_fdo=("-fprofile-sample-use=${FDO_FILE:-${repo_root}/current.afdo}") diff --git a/scripts/automator/build/gcc-defaults b/scripts/automator/build/gcc-defaults index 3a491b65..8422ad35 100644 --- a/scripts/automator/build/gcc-defaults +++ b/scripts/automator/build/gcc-defaults @@ -6,18 +6,20 @@ ld="gcc${postfix}" ranlib="gcc-ranlib${postfix}" # Flag additions -TYPES+=(debug warnmore profile) +TYPES+=(debug warnmore pgotrain fdotrain) cflags+=(-fstack-protector -fdiagnostics-color=auto) cflags_debug=("${cflags[@]}" -g -fno-omit-frame-pointer) -cflags_release=("${cflags[@]}" -Ofast -ffunction-sections -fdata-sections) -cflags_profile=("${cflags_debug[@]}" -pg) +cflags_release=("${cflags[@]}" -DNDEBUG -O3 -ffunction-sections -fdata-sections) +cflags_pgotrain=("${cflags_debug[@]}" -pg) cflags_warnmore=("${cflags_debug[@]}" -pedantic -Wcast-align -Wdouble-promotion -Wduplicated-branches -Wduplicated-cond -Wextra -Wformat=2 -Wlogical-op -Wmisleading-indentation -Wnull-dereference -Wshadow -Wunused) cxxonly_warnmore=(-Weffc++ -Wnon-virtual-dtor -Woverloaded-virtual -Wuseless-cast) +cflags_fdotrain=("${cflags[@]}" -DNDEBUG -g1 -fno-omit-frame-pointer) # Modifier additions MODIFIERS=(fdo) -cflags_fdo=("-fauto-profile=${FDO_FILE:-}") +# Override the prior optimization flag because O2 does better w/ feedback +cflags_fdo=("-O2 -fauto-profile=${FDO_FILE:-${repo_root}/current.afdo}") diff --git a/scripts/automator/main.sh b/scripts/automator/main.sh index 95150cc2..4087e9b6 100644 --- a/scripts/automator/main.sh +++ b/scripts/automator/main.sh @@ -109,7 +109,10 @@ function perform_steps() { } function main() { - cd "$(dirname "$0")/automator" + cd "$(dirname "$0")"/.. + export repo_root + repo_root="$PWD" + cd scripts/automator if [[ -z "${data_dir:-}" ]]; then data_dir="$(basename "$0" '.sh')"; fi parse_args "$@" construct_environment diff --git a/scripts/build.md b/scripts/build.md index 1743b66f..b05a4838 100644 --- a/scripts/build.md +++ b/scripts/build.md @@ -1,14 +1,11 @@ # DOSBox Build Script -A script that builds DOSBox with your choice of compiler, release types, and -additional optimization options on MacOS, Linux, and Windows. +This script builds DOSBox with your choice of compiler, release type, and +additional options. It runs on MacOS, Linux, and Windows. -If this is the first time you are attempting to build DOSBox, then you need to -first install the development tools and DOSBox's development packages prior to -building. To help in this regard, the **list-build-dependencies.sh** script -prints a list of packages that you can use to install these dependencies. - -Use of both scripts is described below. +If this is your first time building DOSBox, then you will need to install +DOSBox's development tools and dependencies, which is included in the notes +below. ## Requirements @@ -26,6 +23,7 @@ Use of both scripts is described below. 1. Download and install Chocolatey: 1. Open a console and run Cholocatey's command line interface (CLI) to install msys2 and git: + `choco install msys2 git --no-progress` ``` text @@ -50,22 +48,25 @@ Use of both scripts is described below. 1. Clone and enter the repository's directory: 1. `git clone https://github.com/dreamer/dosbox-staging.git` 1. `cd dosbox-staging` - 1. Run all subsequent steps while residing in the repo's directory. + + Be sure to run all subsequent steps below while inside the repo's directory. -1. (🏁 first-time-only) Install the build tools and package dependencies: - `./scripts/list-build-dependencies.sh -p msys2 | xargs pacman -S --noconfirm` +1. (🏁 first-time-only) Install the build tools and runtime dependencies: -1. Launch the build script with default settings: - `./scripts/build/run.sh --bin-path /mingw64/bin` + `./scripts/list-build-dependencies.sh -p msys2 | xargs pacman -S + --noconfirm` -## MacOS Installation and Usage +1. Launch the build script with default settings: + + `./scripts/build/run.sh -c gcc -t release --bin-path /mingw64/bin` -Builds on Mac can be performed with Clang or GCC. +## macOS Installation and Usage -If you only plan on only building with Clang, then follow the Brew -installation steps. If you're interested in building with GCC, then either -Brew or MacPorts will work. Both can be installed without conflicting with -eachother. +Builds on macOS can be performed with Clang or GCC. + +If you only plan on only building with Clang, then follow the Brew installation +steps. If you're interested in building with GCC, then Brew or MacPorts will +work, and both can be installed without conflicting with each other. Before installing either, the Xcode tools need to be installed and the license agreed to: @@ -85,7 +86,8 @@ agreed to: ### MacPorts Installation -1. Build and install MacPorts along with DOSBox dependencies with the following sequence: +1. Build and install MacPorts along with DOSBox dependencies with the following + sequence: ``` shell git clone --quiet --depth=1 https://github.com/macports/macports-base.git @@ -101,59 +103,238 @@ agreed to: ### Build DOSBox (common for all of the above) -1. Clone the repository: `git clone https://github.com/dreamer/dosbox-staging.git` +1. Clone the repository: `git clone + https://github.com/dreamer/dosbox-staging.git` 1. Change directories into the repo: `cd dosbox-staging` 1. Build DOSBox: -- Clang: `./scripts/build.sh --compiler clang --bin-path /usr/local/bin` - - GCC (brew): `./scripts/build.sh --compiler-version 9 --bin-path /usr/local/bin` - - GCC (macports): `./scripts/build.sh --compiler-version mp-9 --bin-path /opt/local/bin` +- Clang: `./scripts/build.sh --compiler clang -t release --bin-path + /usr/local/bin` +- GCC (brew): `./scripts/build.sh --compiler-version 9 -t release --bin-path + /usr/local/bin` +- GCC (macports): `./scripts/build.sh --compiler-version mp-9 -t release + --bin-path /opt/local/bin` ## Linux Installation -1. (🏁 first-time-only) Install dependencies based on your package manager; apt - in this example: - `sudo apt install -y $(./scripts/list-build-dependencies.sh -p apt)` - For other supported package managers, run: - `./scripts/list-build-dependencies.sh --help` - 1. Install git: `sudo apt install -y git` -1. Clone the repository: `git clone https://github.com/dreamer/dosbox-staging.git` +1. Clone the repository: `git clone + https://github.com/dreamer/dosbox-staging.git` 1. Change directories into the repo: `cd dosbox-staging` -1. Build DOSBox: `./scripts/build.sh` +1. (🏁 first-time-only) Install dependencies based on your package manager; apt + in this example: `sudo apt install -y $(./scripts/list-build-dependencies.sh + -p apt)` For other supported package managers, run: + `./scripts/list-build-dependencies.sh --help` +1. Build DOSBox: + +- Clang: `./scripts/build.sh --compiler clang -t release -v 9` +- GCC (default version): `./scripts/build.sh -c gcc -t release` +- GCC (specific version, ie: 9): `./scripts/build.sh -c gcc -v 9 -t release` ## Additional Tips -The compiler, version, and bit-depth can be selected by passing the following common -options to the **list-build-dependencies.sh** and **build.sh** scripts: +### Compiler variations -- `--compiler clang`, to use CLang instead of GCC -- `--compiler-version 8`, to use a specific version of compiler - (if available in your package manager) +The compiler, version, and bit-depth can be selected by passing the following +common options to the **list-build-dependencies.sh** and **build.sh** scripts: + +- `--compiler clang` or `-c clang` to use CLang instead of GCC +- `--compiler-version 8` or `-v ` to specify a particular version of + compiler (if available in your package manager) - `--bit-depth 32`, to build a 32-bit binary instead of 64-bit -After building, your `dosbox` or `dosbox.exe` binary will reside inside `./dosbox-staging/src/`. +### Release types -Build flags you might be interested in: - -- `--lto`, perform optimizations across the entire object space instead of - per-file (Only available on Mac and Linux) -- `--release debug`, to build a binary containing debug symbols +Build release types includes: +- **release**, optimizes the binary and disables some checks, such as + assertions. +- **debug**, adds debug symbols and disables optimizations for ideal debugging. - You can run the resulting binary in the GNU debugger: `gdb /path/to/ - dosbox`, followed by `start mygame.bat` -- `--release profile`, to generate performance statistics - - Instructions are provided after the build completes, which describe how to - generate and process the profiling data -- `--release `, to build a binary that performs dynamic - code-analysis at runtime (Linux and macOS) - - see `./scripts/build.sh --help` for a list of sanitizer-types that are - available - - Run your binary like normal and it will generate output describing - problematic behavior - - Some sanitizers accept runtime options via an environment variables, - such as `ASAN_OPTIONS`, described here: - + dosbox`, followed by `run mygame.bat` +- **pgotrain** adds Profile Guided Optimization (PGO) tracking instrumentation + to the compiled binary. + + This allows the recording of profile statistics that can be used to compile a + PGO-optimized binary. Note that PGO optimization is different from + Automatic Feedback Directed Optimization (AutoFDO) mentioned below. -If you want to run multiple back-to-back builds from the same directory with -different settings then add the `--clean` flag to ensure previous objects and -binaries are removed. + After compiling your PGO binary, the build script presents instructions + describing how to generate and use the profiling data. + +- **warnmore**, displays additional helpful C and C++ warnings for developers. +- **fdotrain**, add tracing symbols used to generate AutoFDO sampling data. +- **$SANITIZER TYPE**, builds a binary intrumented with code to catch issues at + runtime that relate to the type of sanitizer being used. For example: memory + leaks, threading issues, and so on. This is for Linux and macOS only. + + - see `./scripts/build.sh --help` for a list of sanitizer-types that are + available. + - Run your binary like normal and it will generate output describing + problematic behavior + - Some sanitizers accept runtime options via an environment variables, + such as `ASAN_OPTIONS`, described here: + + +### Build Results, Rebuilding, and Cleaning + +After building, your `dosbox` or `dosbox.exe` binary will reside inside +`./dosbox-staging/src/`. + +The build script records the prior build type and will clean if needed between +builds. To manually remove all intermediate obect files and ephemeral +auto-tools outputs, run `make distclean`. + +To additionally remove all files except for the repository files, use `git +clean -fdx`. + +### CCache + +The build script will make use of ccache, which saves compiled objects for +potential re-use in future builds (hence the name, "cache") to speed up build +times. If you performed the one-time installation step above, then you will +already have ccache installed. + +Simply having `ccache` in your path is sufficient to use it; you do not +need to invasively symlink `/usr/bin/gcc` -> `ccache`. + +The build script enables ccache's object compression, which significantly +reduces the size of the cache. It will also display cache statistics after each +build. To see more details, run `ccache -s`. + +To learn more about ccache run `ccache -h`, and read +https://ccache.dev/manual/latest.html + +### Optimization Modifiers + +The following modifier flags can be added when building a **release** type: + +- `-m lto`, optimize the entire object space instead of per-file (Only + available on Mac and Linux) + +- `-m fdo`, performs feedback directed optimizations (FDO) using an AutoFDO + data set. Export the `FDO_FILE` variable with the full path to your merged + FDO dataset. For example: + + - GCC: `export FDO_FILE=/full/path/to/current.afdo` and then build with: + + `./scripts/builds.sh -c gcc -t release -m fdo -m lto` + + - Clang: `export FDO_FILE=/full/path/to/current.profraw`, and then build + with: + + `./scripts/builds.sh -c clang -t release -m fdo -m lto` + + The section below describes how to collect an AutoFDO dataset for GCC and + Clang. + +### Recording Sampling Data used in AutoFDO builds + +Prerequisites: + +- An **Intel processor** that supports the last branch record (LBR) instruction. +- A Linux **kernel** built with Branch Profiling tracers enabled: + + ``` + CONFIG_PM_TRACE=y + CONFIG_TRACE_BRANCH_PROFILING=y + CONFIG_BRANCH_TRACER=y + ``` + + These can be enable directly in your kernel's `.config` file or using `make + menuconfig` via the following menu options: + 1. `Kernel hacking --->` + 1. `[*] Tracers --->` + 1. `Branch Profiling (Trace likely/unlikely profiler)` + 1. `(X) Trace likely/unlikely profiler` + +- The **AutoFDO** software package. It may be available via your package + manager or built from sources (https://github.com/google/autofdo). + + - **Note about compiler versions** the autofdo binaries need to be compiled + with the exact version of the compiler that will later be used to compile + our final optimized version of dosbox-staging. + + So for example, if you install autofdo via package-manager, then it will be + valid for the default version of gcc and clang also installed by your + package manager. Where as if you plan to build with `gcc-`, then + you will need to compile autofdo from sources using `gcc-` by + pointing the `CC` and `CXX` environment variables to the newer gcc + binaries. + + - **Note about clang** If you plan to compile with a version of clang newer + than your package manager's default version, then you will need to compile + autofdo from source and configure it with the coresponding version of + `llvm-config`. For example, if I want to build with clang-10, then I would + configure autofdo with: `./configure --with-llvm=/usr/bin/llvm-config-10`. + + - The included `.github/scripts/build-autofdo.sh` script can be used to build + and install autofdo, for example: + + - default GCC: + + `sudo .github/scripts/build-autofdo.sh` + - newer GCC: + + ``` + export CC=/usr/bin/gcc-9 + export CXX=/usr/bin/g++-9 + sudo .github/scripts/build-autofdo.sh + ``` + + - Clang version 10: + + `sudo .github/scripts/build-autofdo.sh` + +- The **pmu-tools** software package, which can be downloaded from + https://github.com/andikleen/pmu-tools. This is a collection of python + scripts used to assist in capturing sampling data. + +Procedures: + +1. Ensure the custom Linux Kernel supporting LBR tracing is running. + +1. Build `dosbox-staging` from source using the `fdotrain` target: + `./scripts/build.h -c gcc -t fdotrain` + +1. Record kernel sample profiles while running dosbox-staging: + + `/path/to/pmu-tools/ocperf.py record -F max -o "samples-1.prof" -b -e + br_inst_retired.near_taken:pp -- /path/to/fdo-trained/dosbox ARGS` + + Where `samples-1.prof` is the file that will be filled with samples. + + Repeat this for multiple training runs, each time saving the output to a new + `-o samples-N.prof` file. Ideally you want to exercise all code paths in + DOSBox (core types, video cards, video modes, sound cards, and audio + codecs). + +1. Convert your sample profiles into compiler-specific records using tools + provided in the `autofdo` package: + + For GCC, run: + - `create_gcov --binary=/path/to/fdo-trained/dosbox + --profile=samples-1.prof -gcov=samples-1.afdo -gcov_version=1` + + ... for each `.prof` file, creating a coresponding `.afdo` file. + + - At this point, you now have an `.afdo` file for each `.prof` file. Merge + the `.afdo`s into a single `curren.afdo` file with: + + `profile_merger -gcov_version=1 -output_file=current.afdo *.afdo` + + For Clang, run: + + - `create_llvm_prof --binary=/path/to/fdo-trained/dosbox + --profile=samples-1.prof --out=samples-1.profraw` + + ... for each `*.prof` file, creating a coresponding `.profraw` file. + + - At this point, you now have a `.profraw` file for each `.prof` file. Merge + them into a single `current.profraw` file with: + + `llvm-profdata- merge -sample -output=current.profraw *.profraw` + +You can now use your merged `.afdo` or `.profraw` file to build with the `-m +fdo` modifier by placing your `current.afdo/.profraw` file in the repo's root +directory, or point to it using the FDO_FILE environment variable, and launch +the build with `./scripts/build.sh -c -t release -m lto -m fdo`.