From de3958fd74acbea10db4c1863460c0f2eb2389f1 Mon Sep 17 00:00:00 2001 From: krcroft Date: Mon, 6 Apr 2020 21:08:35 -0700 Subject: [PATCH] Add an 'optinfo' build target The 'optinfo' build target asks GCC to print optimizations that could not be performed to local 'missing.txt' files; these will appear in each repsective subdirectory having source files. Both GCC and Clang will now print Verbose vectorization information during the build process, often describing why vectorization cannot be performed. This commit also enables basic instruction and math vectorization for both the 'release' and 'optinfo' targets. This includes making use of altivec instructions (available on all powerpc processors), and at a minimum sse4.2 on all x86_64 processors (circa-2008+ AMD and Intel CPUs). Vectorization is also re-enabled for GCC FDO builds, which would otherwise be disabled when we switch to -O2 optimizations. --- .gitignore | 1 + scripts/automator/build/clang-defaults | 25 ++++++++++------- scripts/automator/build/compiler-defaults | 4 --- scripts/automator/build/gcc-darwin_x86_64 | 5 ++++ scripts/automator/build/gcc-defaults | 31 ++++++++++++++-------- scripts/automator/build/gcc-linux_x86_64 | 5 ++++ scripts/automator/build/gcc-msys_nt_x86_64 | 5 ++++ 7 files changed, 51 insertions(+), 25 deletions(-) diff --git a/.gitignore b/.gitignore index ed530663..0e39916b 100644 --- a/.gitignore +++ b/.gitignore @@ -58,6 +58,7 @@ make.log .previous_build .current_build *.defines +missed.txt # Visual Studio .vs diff --git a/scripts/automator/build/clang-defaults b/scripts/automator/build/clang-defaults index c9c37cc4..d9e111a4 100644 --- a/scripts/automator/build/clang-defaults +++ b/scripts/automator/build/clang-defaults @@ -2,21 +2,26 @@ cc="${ccache}clang${postfix}" cxx="${ccache}clang++${postfix}" -# Flag additions -TYPES+=(debug warnmore pgotrain) -cflags_release=("${cflags[@]}" -O3) -cflags_debug=("${cflags[@]}" -g -fno-omit-frame-pointer) -cflags_pgotrain=("${cflags_debug[@]}" -fprofile-instr-generate -fcoverage-mapping) -cflags_warnmore=("${cflags_debug[@]}" -Wextra -Wshadow -Wcast-align -Wunused - -Woverloaded-virtual -Wpedantic -Wconversion -Wsign-conversion - -Wdouble-promotion -Wformat=2) -cxxonly_warnmore=(-Wnon-virtual-dtor -Woverloaded-virtual) - # Colorize output only for interactive shells if [[ $- == *i* ]]; then cflags+=(-fcolor-diagnostics) fi +# Release-type additions +TYPES+=(release debug warnmore pgotrain optinfo) + +# Note: -fno-math-errno allows better optimization of C/C++ math library functions +cflags_release+=("${cflags[@]}" -DNDEBUG -O3 -fno-math-errno -fno-strict-aliasing) + +cflags_debug+=("${cflags[@]}" -g -fno-omit-frame-pointer) +cflags_pgotrain+=("${cflags_debug[@]}" -fprofile-instr-generate -fcoverage-mapping) +cflags_warnmore+=("${cflags_debug[@]}" -Wextra -Wshadow -Wcast-align -Wunused + -Woverloaded-virtual -Wpedantic -Wconversion -Wsign-conversion + -Wdouble-promotion -Wformat=2) +cxxonly_warnmore+=(-Wnon-virtual-dtor -Woverloaded-virtual) +cflags_optinfo+=("${cflags_release[@]}" -Rpass-analysis=loop-vectorize + -gline-tables-only -gcolumn-info) + # Modifier additions MODIFIERS=(fdo) cflags_fdo=("-fprofile-sample-use=${FDO_FILE:-${repo_root}/current.afdo}") diff --git a/scripts/automator/build/compiler-defaults b/scripts/automator/build/compiler-defaults index caab2271..9af67cfd 100644 --- a/scripts/automator/build/compiler-defaults +++ b/scripts/automator/build/compiler-defaults @@ -10,10 +10,6 @@ cxxonly=("") ldflags=("") libs=("") -# Builds for all compilers -TYPES=(release) -cflags_release=("${cflags[@]}" -DNDEBUG) - # Use ccache if it's available if command -v ccache &> /dev/null; then ccache="ccache " diff --git a/scripts/automator/build/gcc-darwin_x86_64 b/scripts/automator/build/gcc-darwin_x86_64 index bc62edb0..2fd3ed0f 100644 --- a/scripts/automator/build/gcc-darwin_x86_64 +++ b/scripts/automator/build/gcc-darwin_x86_64 @@ -2,6 +2,11 @@ ar="ar" ranlib="ranlib" +# The oldest processor supported by macOS is nehalem, so we can safely +# enable its full instruction set give all subsequent processors support it. +cflags_release+=(-march=nehalem) +cflags_optinfo+=(-march=nehalem) + # Build additions TYPES+=(asan uasan usan tsan) cflags_asan=("${cflags_debug[@]}" -fsanitize=address) diff --git a/scripts/automator/build/gcc-defaults b/scripts/automator/build/gcc-defaults index 8422ad35..d85e1110 100644 --- a/scripts/automator/build/gcc-defaults +++ b/scripts/automator/build/gcc-defaults @@ -5,19 +5,28 @@ cxx="${ccache}g++${postfix}" ld="gcc${postfix}" ranlib="gcc-ranlib${postfix}" -# Flag additions -TYPES+=(debug warnmore pgotrain fdotrain) +# Release-type additions +TYPES+=(release debug warnmore pgotrain fdotrain optinfo) cflags+=(-fstack-protector -fdiagnostics-color=auto) -cflags_debug=("${cflags[@]}" -g -fno-omit-frame-pointer) -cflags_release=("${cflags[@]}" -DNDEBUG -O3 -ffunction-sections -fdata-sections) -cflags_pgotrain=("${cflags_debug[@]}" -pg) -cflags_warnmore=("${cflags_debug[@]}" -pedantic -Wcast-align -Wdouble-promotion - -Wduplicated-branches -Wduplicated-cond -Wextra -Wformat=2 - -Wlogical-op -Wmisleading-indentation -Wnull-dereference - -Wshadow -Wunused) -cxxonly_warnmore=(-Weffc++ -Wnon-virtual-dtor -Woverloaded-virtual -Wuseless-cast) -cflags_fdotrain=("${cflags[@]}" -DNDEBUG -g1 -fno-omit-frame-pointer) +cflags_debug+=("${cflags[@]}" -g -fno-omit-frame-pointer) + +# Note: associative-math is needed for vectorization of floating point +# calculations, which also relies on no-signed-zeros and +# no-trapping-math. +cflags_release+=("${cflags[@]}" -DNDEBUG -O3 -fno-strict-aliasing + -fno-signed-zeros -fno-trapping-math -fassociative-math + -frename-registers -ffunction-sections -fdata-sections) + +cflags_pgotrain+=("${cflags_debug[@]}" -pg -ftree-vectorize) +cflags_warnmore+=("${cflags_debug[@]}" -pedantic -Wcast-align -Wdouble-promotion + -Wduplicated-branches -Wduplicated-cond -Wextra -Wformat=2 + -Wlogical-op -Wmisleading-indentation -Wnull-dereference + -Wshadow -Wunused) +cxxonly_warnmore+=(-Weffc++ -Wnon-virtual-dtor -Woverloaded-virtual -Wuseless-cast) +cflags_fdotrain+=("${cflags[@]}" -DNDEBUG -g1 -fno-omit-frame-pointer) +cflags_optinfo+=("${cflags_release[@]}" -fopt-info-missed + -ftree-vectorizer-verbose=6) # Modifier additions MODIFIERS=(fdo) diff --git a/scripts/automator/build/gcc-linux_x86_64 b/scripts/automator/build/gcc-linux_x86_64 index 41c2c97b..2bac8d51 100644 --- a/scripts/automator/build/gcc-linux_x86_64 +++ b/scripts/automator/build/gcc-linux_x86_64 @@ -1,6 +1,11 @@ # Tool additions ldflags+=(-Wl,--as-needed) +# Enable math vectorizions using instructions avaiable in circa-2008+ CPUs +x86_math=(-mfpmath=sse -msse4.2) +cflags_release+=("${x86_math[@]}") +cflags_optinfo+=("${x86_math[@]}") + # Build additions TYPES+=(asan uasan usan tsan) cflags_asan=("${cflags_debug[@]}" -fsanitize=address) diff --git a/scripts/automator/build/gcc-msys_nt_x86_64 b/scripts/automator/build/gcc-msys_nt_x86_64 index 0a9df17d..b737fa47 100644 --- a/scripts/automator/build/gcc-msys_nt_x86_64 +++ b/scripts/automator/build/gcc-msys_nt_x86_64 @@ -1,3 +1,8 @@ +# Enable math vectorizations using instructions avaiable in circa-2008+ CPUs +x86_math=(-mfpmath=sse -msse4.2) +cflags_release+=("${x86_math[@]}") +cflags_optinfo+=("${x86_math[@]}") + # Flag additions ldflags+=(-Wl,--as-needed -static-libgcc -static-libstdc++)