1
0
Fork 0

Enable NEON SIMD for corresponding ARM platforms

This commit is contained in:
kcgen 2020-04-26 14:54:10 -07:00 committed by Patryk Obara
parent 2f4830830e
commit ec43dabba4
5 changed files with 60 additions and 5 deletions

View file

@ -1,3 +1,14 @@
# GCC flags for generically identified AArch64
# Note: Advanced SIMD (aka NEON) is mandatory for AArch64 and implied
cflags_release+=(-mcpu=native -mstrict-align)
# Note: NEON SIMD instructions for floating-point operations are
# not generated by GCCs auto-vectorization pass unless
# -funsafe-math-optimizations is also specified. This is because
# NEON hardware does not fully implement the IEEE 754 standard for
# some floating-point arithmetic operations, specifically
# "denormal" values are treated as zero, so in these corner-cases,
# the use of NEON instructions may lead to a loss of precision.
# For our purposes, we expect to perform normal calculations and
# thus accept this risk for release builds.
cflags_release+=(-mcpu=native -funsafe-math-optimizations -mstrict-align)

View file

@ -1,2 +1,13 @@
# GCC flags for generically identified ARMv7 MALI SBCs
cflags_release+=(-march=armv7-a -mfpu=neon-vfpv4 -mfloat-abi=hard)
# Note: NEON SIMD instructions for floating-point operations are
# not generated by GCCs auto-vectorization pass unless
# -funsafe-math-optimizations is also specified. This is because
# NEON hardware does not fully implement the IEEE 754 standard for
# some floating-point arithmetic operations, specifically
# "denormal" values are treated as zero, so in these corner-cases,
# the use of NEON instructions may lead to a loss of precision.
# For our purposes, we expect to perform normal calculations and
# thus accept this risk for release builds.
cflags_release+=(-march=armv7-a -funsafe-math-optimizations -mfpu=neon-vfpv4 -mfloat-abi=hard)

View file

@ -1,2 +1,13 @@
# GCC flags specific to the Raspberry Pi 2 series of SBC
cflags_release+=(-mcpu=cortex-a7 -mfpu=neon-vfpv4 -mfloat-abi=hard)
# Note: NEON SIMD instructions for floating-point operations are
# not generated by GCCs auto-vectorization pass unless
# -funsafe-math-optimizations is also specified. This is because
# NEON hardware does not fully implement the IEEE 754 standard for
# some floating-point arithmetic operations, specifically
# "denormal" values are treated as zero, so in these corner-cases,
# the use of NEON instructions may lead to a loss of precision.
# For our purposes, we expect to perform normal calculations and
# thus accept this risk for release builds.
cflags_release+=(-mcpu=cortex-a7 -funsafe-math-optimizations -mfpu=neon-vfpv4 -mfloat-abi=hard)

View file

@ -1,4 +1,15 @@
# GCC flags specific to the Raspberry Pi 3 series of SBC
cflags_release+=(-march=armv8-a+crc -mtune=cortex-a53
# Note: NEON SIMD instructions for floating-point operations are
# not generated by GCCs auto-vectorization pass unless
# -funsafe-math-optimizations is also specified. This is because
# NEON hardware does not fully implement the IEEE 754 standard for
# some floating-point arithmetic operations, specifically
# "denormal" values are treated as zero, so in these corner-cases,
# the use of NEON instructions may lead to a loss of precision.
# For our purposes, we expect to perform normal calculations and
# thus accept this risk for release builds.
cflags_release+=(-march=armv8-a+crc -mtune=cortex-a53 -funsafe-math-optimizations
-mfpu=neon-fp-armv8 -mfloat-abi=hard)

View file

@ -1,3 +1,14 @@
# GCC flags specific to the Raspberry Pi 4 series of SBC
cflags_release+=(-march=armv8-a+crc -mtune=cortex-a72
# Note: NEON SIMD instructions for floating-point operations are
# not generated by GCCs auto-vectorization pass unless
# -funsafe-math-optimizations is also specified. This is because
# NEON hardware does not fully implement the IEEE 754 standard for
# some floating-point arithmetic operations, specifically
# "denormal" values are treated as zero, so in these corner-cases,
# the use of NEON instructions may lead to a loss of precision.
# For our purposes, we expect to perform normal calculations and
# thus accept this risk for release builds.
cflags_release+=(-march=armv8-a+crc -mtune=cortex-a72 -funsafe-math-optimizations
-mfpu=neon-fp-armv8 -mfloat-abi=hard)