Add an 'optinfo' build target
The 'optinfo' build target asks GCC to print optimizations that could not be performed to local 'missing.txt' files; these will appear in each repsective subdirectory having source files. Both GCC and Clang will now print Verbose vectorization information during the build process, often describing why vectorization cannot be performed. This commit also enables basic instruction and math vectorization for both the 'release' and 'optinfo' targets. This includes making use of altivec instructions (available on all powerpc processors), and at a minimum sse4.2 on all x86_64 processors (circa-2008+ AMD and Intel CPUs). Vectorization is also re-enabled for GCC FDO builds, which would otherwise be disabled when we switch to -O2 optimizations.
This commit is contained in:
parent
c7287e116e
commit
de3958fd74
7 changed files with 51 additions and 25 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -58,6 +58,7 @@ make.log
|
|||
.previous_build
|
||||
.current_build
|
||||
*.defines
|
||||
missed.txt
|
||||
|
||||
# Visual Studio
|
||||
.vs
|
||||
|
|
|
@ -2,21 +2,26 @@
|
|||
cc="${ccache}clang${postfix}"
|
||||
cxx="${ccache}clang++${postfix}"
|
||||
|
||||
# Flag additions
|
||||
TYPES+=(debug warnmore pgotrain)
|
||||
cflags_release=("${cflags[@]}" -O3)
|
||||
cflags_debug=("${cflags[@]}" -g -fno-omit-frame-pointer)
|
||||
cflags_pgotrain=("${cflags_debug[@]}" -fprofile-instr-generate -fcoverage-mapping)
|
||||
cflags_warnmore=("${cflags_debug[@]}" -Wextra -Wshadow -Wcast-align -Wunused
|
||||
-Woverloaded-virtual -Wpedantic -Wconversion -Wsign-conversion
|
||||
-Wdouble-promotion -Wformat=2)
|
||||
cxxonly_warnmore=(-Wnon-virtual-dtor -Woverloaded-virtual)
|
||||
|
||||
# Colorize output only for interactive shells
|
||||
if [[ $- == *i* ]]; then
|
||||
cflags+=(-fcolor-diagnostics)
|
||||
fi
|
||||
|
||||
# Release-type additions
|
||||
TYPES+=(release debug warnmore pgotrain optinfo)
|
||||
|
||||
# Note: -fno-math-errno allows better optimization of C/C++ math library functions
|
||||
cflags_release+=("${cflags[@]}" -DNDEBUG -O3 -fno-math-errno -fno-strict-aliasing)
|
||||
|
||||
cflags_debug+=("${cflags[@]}" -g -fno-omit-frame-pointer)
|
||||
cflags_pgotrain+=("${cflags_debug[@]}" -fprofile-instr-generate -fcoverage-mapping)
|
||||
cflags_warnmore+=("${cflags_debug[@]}" -Wextra -Wshadow -Wcast-align -Wunused
|
||||
-Woverloaded-virtual -Wpedantic -Wconversion -Wsign-conversion
|
||||
-Wdouble-promotion -Wformat=2)
|
||||
cxxonly_warnmore+=(-Wnon-virtual-dtor -Woverloaded-virtual)
|
||||
cflags_optinfo+=("${cflags_release[@]}" -Rpass-analysis=loop-vectorize
|
||||
-gline-tables-only -gcolumn-info)
|
||||
|
||||
# Modifier additions
|
||||
MODIFIERS=(fdo)
|
||||
cflags_fdo=("-fprofile-sample-use=${FDO_FILE:-${repo_root}/current.afdo}")
|
||||
|
|
|
@ -10,10 +10,6 @@ cxxonly=("")
|
|||
ldflags=("")
|
||||
libs=("")
|
||||
|
||||
# Builds for all compilers
|
||||
TYPES=(release)
|
||||
cflags_release=("${cflags[@]}" -DNDEBUG)
|
||||
|
||||
# Use ccache if it's available
|
||||
if command -v ccache &> /dev/null; then
|
||||
ccache="ccache "
|
||||
|
|
|
@ -2,6 +2,11 @@
|
|||
ar="ar"
|
||||
ranlib="ranlib"
|
||||
|
||||
# The oldest processor supported by macOS is nehalem, so we can safely
|
||||
# enable its full instruction set give all subsequent processors support it.
|
||||
cflags_release+=(-march=nehalem)
|
||||
cflags_optinfo+=(-march=nehalem)
|
||||
|
||||
# Build additions
|
||||
TYPES+=(asan uasan usan tsan)
|
||||
cflags_asan=("${cflags_debug[@]}" -fsanitize=address)
|
||||
|
|
|
@ -5,19 +5,28 @@ cxx="${ccache}g++${postfix}"
|
|||
ld="gcc${postfix}"
|
||||
ranlib="gcc-ranlib${postfix}"
|
||||
|
||||
# Flag additions
|
||||
TYPES+=(debug warnmore pgotrain fdotrain)
|
||||
# Release-type additions
|
||||
TYPES+=(release debug warnmore pgotrain fdotrain optinfo)
|
||||
|
||||
cflags+=(-fstack-protector -fdiagnostics-color=auto)
|
||||
cflags_debug=("${cflags[@]}" -g -fno-omit-frame-pointer)
|
||||
cflags_release=("${cflags[@]}" -DNDEBUG -O3 -ffunction-sections -fdata-sections)
|
||||
cflags_pgotrain=("${cflags_debug[@]}" -pg)
|
||||
cflags_warnmore=("${cflags_debug[@]}" -pedantic -Wcast-align -Wdouble-promotion
|
||||
-Wduplicated-branches -Wduplicated-cond -Wextra -Wformat=2
|
||||
-Wlogical-op -Wmisleading-indentation -Wnull-dereference
|
||||
-Wshadow -Wunused)
|
||||
cxxonly_warnmore=(-Weffc++ -Wnon-virtual-dtor -Woverloaded-virtual -Wuseless-cast)
|
||||
cflags_fdotrain=("${cflags[@]}" -DNDEBUG -g1 -fno-omit-frame-pointer)
|
||||
cflags_debug+=("${cflags[@]}" -g -fno-omit-frame-pointer)
|
||||
|
||||
# Note: associative-math is needed for vectorization of floating point
|
||||
# calculations, which also relies on no-signed-zeros and
|
||||
# no-trapping-math.
|
||||
cflags_release+=("${cflags[@]}" -DNDEBUG -O3 -fno-strict-aliasing
|
||||
-fno-signed-zeros -fno-trapping-math -fassociative-math
|
||||
-frename-registers -ffunction-sections -fdata-sections)
|
||||
|
||||
cflags_pgotrain+=("${cflags_debug[@]}" -pg -ftree-vectorize)
|
||||
cflags_warnmore+=("${cflags_debug[@]}" -pedantic -Wcast-align -Wdouble-promotion
|
||||
-Wduplicated-branches -Wduplicated-cond -Wextra -Wformat=2
|
||||
-Wlogical-op -Wmisleading-indentation -Wnull-dereference
|
||||
-Wshadow -Wunused)
|
||||
cxxonly_warnmore+=(-Weffc++ -Wnon-virtual-dtor -Woverloaded-virtual -Wuseless-cast)
|
||||
cflags_fdotrain+=("${cflags[@]}" -DNDEBUG -g1 -fno-omit-frame-pointer)
|
||||
cflags_optinfo+=("${cflags_release[@]}" -fopt-info-missed
|
||||
-ftree-vectorizer-verbose=6)
|
||||
|
||||
# Modifier additions
|
||||
MODIFIERS=(fdo)
|
||||
|
|
|
@ -1,6 +1,11 @@
|
|||
# Tool additions
|
||||
ldflags+=(-Wl,--as-needed)
|
||||
|
||||
# Enable math vectorizions using instructions avaiable in circa-2008+ CPUs
|
||||
x86_math=(-mfpmath=sse -msse4.2)
|
||||
cflags_release+=("${x86_math[@]}")
|
||||
cflags_optinfo+=("${x86_math[@]}")
|
||||
|
||||
# Build additions
|
||||
TYPES+=(asan uasan usan tsan)
|
||||
cflags_asan=("${cflags_debug[@]}" -fsanitize=address)
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
# Enable math vectorizations using instructions avaiable in circa-2008+ CPUs
|
||||
x86_math=(-mfpmath=sse -msse4.2)
|
||||
cflags_release+=("${x86_math[@]}")
|
||||
cflags_optinfo+=("${x86_math[@]}")
|
||||
|
||||
# Flag additions
|
||||
ldflags+=(-Wl,--as-needed -static-libgcc -static-libstdc++)
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue