1
0
Fork 0

Add an 'optinfo' build target

The 'optinfo' build target asks GCC to print optimizations that
could not be performed to local 'missing.txt' files; these will
appear in each repsective subdirectory having source files.

Both GCC and Clang will now print Verbose vectorization information
during the build process, often describing why vectorization
cannot be performed.

This commit also enables basic instruction and math vectorization
for both the 'release' and 'optinfo' targets. This includes making
use of altivec instructions (available on all powerpc processors),
and at a minimum sse4.2 on all x86_64 processors (circa-2008+ AMD
and Intel CPUs).

Vectorization is also re-enabled for GCC FDO builds, which would
otherwise be disabled when we switch to -O2 optimizations.
This commit is contained in:
krcroft 2020-04-06 21:08:35 -07:00 committed by Patryk Obara
parent c7287e116e
commit de3958fd74
7 changed files with 51 additions and 25 deletions

1
.gitignore vendored
View file

@ -58,6 +58,7 @@ make.log
.previous_build
.current_build
*.defines
missed.txt
# Visual Studio
.vs

View file

@ -2,21 +2,26 @@
cc="${ccache}clang${postfix}"
cxx="${ccache}clang++${postfix}"
# Flag additions
TYPES+=(debug warnmore pgotrain)
cflags_release=("${cflags[@]}" -O3)
cflags_debug=("${cflags[@]}" -g -fno-omit-frame-pointer)
cflags_pgotrain=("${cflags_debug[@]}" -fprofile-instr-generate -fcoverage-mapping)
cflags_warnmore=("${cflags_debug[@]}" -Wextra -Wshadow -Wcast-align -Wunused
-Woverloaded-virtual -Wpedantic -Wconversion -Wsign-conversion
-Wdouble-promotion -Wformat=2)
cxxonly_warnmore=(-Wnon-virtual-dtor -Woverloaded-virtual)
# Colorize output only for interactive shells
if [[ $- == *i* ]]; then
cflags+=(-fcolor-diagnostics)
fi
# Release-type additions
TYPES+=(release debug warnmore pgotrain optinfo)
# Note: -fno-math-errno allows better optimization of C/C++ math library functions
cflags_release+=("${cflags[@]}" -DNDEBUG -O3 -fno-math-errno -fno-strict-aliasing)
cflags_debug+=("${cflags[@]}" -g -fno-omit-frame-pointer)
cflags_pgotrain+=("${cflags_debug[@]}" -fprofile-instr-generate -fcoverage-mapping)
cflags_warnmore+=("${cflags_debug[@]}" -Wextra -Wshadow -Wcast-align -Wunused
-Woverloaded-virtual -Wpedantic -Wconversion -Wsign-conversion
-Wdouble-promotion -Wformat=2)
cxxonly_warnmore+=(-Wnon-virtual-dtor -Woverloaded-virtual)
cflags_optinfo+=("${cflags_release[@]}" -Rpass-analysis=loop-vectorize
-gline-tables-only -gcolumn-info)
# Modifier additions
MODIFIERS=(fdo)
cflags_fdo=("-fprofile-sample-use=${FDO_FILE:-${repo_root}/current.afdo}")

View file

@ -10,10 +10,6 @@ cxxonly=("")
ldflags=("")
libs=("")
# Builds for all compilers
TYPES=(release)
cflags_release=("${cflags[@]}" -DNDEBUG)
# Use ccache if it's available
if command -v ccache &> /dev/null; then
ccache="ccache "

View file

@ -2,6 +2,11 @@
ar="ar"
ranlib="ranlib"
# The oldest processor supported by macOS is nehalem, so we can safely
# enable its full instruction set give all subsequent processors support it.
cflags_release+=(-march=nehalem)
cflags_optinfo+=(-march=nehalem)
# Build additions
TYPES+=(asan uasan usan tsan)
cflags_asan=("${cflags_debug[@]}" -fsanitize=address)

View file

@ -5,19 +5,28 @@ cxx="${ccache}g++${postfix}"
ld="gcc${postfix}"
ranlib="gcc-ranlib${postfix}"
# Flag additions
TYPES+=(debug warnmore pgotrain fdotrain)
# Release-type additions
TYPES+=(release debug warnmore pgotrain fdotrain optinfo)
cflags+=(-fstack-protector -fdiagnostics-color=auto)
cflags_debug=("${cflags[@]}" -g -fno-omit-frame-pointer)
cflags_release=("${cflags[@]}" -DNDEBUG -O3 -ffunction-sections -fdata-sections)
cflags_pgotrain=("${cflags_debug[@]}" -pg)
cflags_warnmore=("${cflags_debug[@]}" -pedantic -Wcast-align -Wdouble-promotion
-Wduplicated-branches -Wduplicated-cond -Wextra -Wformat=2
-Wlogical-op -Wmisleading-indentation -Wnull-dereference
-Wshadow -Wunused)
cxxonly_warnmore=(-Weffc++ -Wnon-virtual-dtor -Woverloaded-virtual -Wuseless-cast)
cflags_fdotrain=("${cflags[@]}" -DNDEBUG -g1 -fno-omit-frame-pointer)
cflags_debug+=("${cflags[@]}" -g -fno-omit-frame-pointer)
# Note: associative-math is needed for vectorization of floating point
# calculations, which also relies on no-signed-zeros and
# no-trapping-math.
cflags_release+=("${cflags[@]}" -DNDEBUG -O3 -fno-strict-aliasing
-fno-signed-zeros -fno-trapping-math -fassociative-math
-frename-registers -ffunction-sections -fdata-sections)
cflags_pgotrain+=("${cflags_debug[@]}" -pg -ftree-vectorize)
cflags_warnmore+=("${cflags_debug[@]}" -pedantic -Wcast-align -Wdouble-promotion
-Wduplicated-branches -Wduplicated-cond -Wextra -Wformat=2
-Wlogical-op -Wmisleading-indentation -Wnull-dereference
-Wshadow -Wunused)
cxxonly_warnmore+=(-Weffc++ -Wnon-virtual-dtor -Woverloaded-virtual -Wuseless-cast)
cflags_fdotrain+=("${cflags[@]}" -DNDEBUG -g1 -fno-omit-frame-pointer)
cflags_optinfo+=("${cflags_release[@]}" -fopt-info-missed
-ftree-vectorizer-verbose=6)
# Modifier additions
MODIFIERS=(fdo)

View file

@ -1,6 +1,11 @@
# Tool additions
ldflags+=(-Wl,--as-needed)
# Enable math vectorizions using instructions avaiable in circa-2008+ CPUs
x86_math=(-mfpmath=sse -msse4.2)
cflags_release+=("${x86_math[@]}")
cflags_optinfo+=("${x86_math[@]}")
# Build additions
TYPES+=(asan uasan usan tsan)
cflags_asan=("${cflags_debug[@]}" -fsanitize=address)

View file

@ -1,3 +1,8 @@
# Enable math vectorizations using instructions avaiable in circa-2008+ CPUs
x86_math=(-mfpmath=sse -msse4.2)
cflags_release+=("${x86_math[@]}")
cflags_optinfo+=("${x86_math[@]}")
# Flag additions
ldflags+=(-Wl,--as-needed -static-libgcc -static-libstdc++)