Enable NEON SIMD for corresponding ARM platforms

2020-04-26 14:54:10 -07:00 · 2020-04-26 14:54:10 -07:00 · ec43dabba4
commit ec43dabba4
parent 2f4830830e
5 changed files with 60 additions and 5 deletions
--- a/scripts/automator/build/gcc-linux_aarch64
+++ b/scripts/automator/build/gcc-linux_aarch64
@ -1,3 +1,14 @@
 # GCC flags for generically identified AArch64
 # Note: Advanced SIMD (aka NEON) is mandatory for AArch64 and implied
-cflags_release+=(-mcpu=native -mstrict-align)
+
+# Note: NEON SIMD instructions for floating-point operations are 
+# not generated by GCC’s auto-vectorization pass unless 
+# -funsafe-math-optimizations is also specified. This is because 
+# NEON hardware does not fully implement the IEEE 754 standard for 
+# some floating-point arithmetic operations, specifically 
+# "denormal" values are treated as zero, so in these corner-cases, 
+# the use of NEON instructions may lead to a loss of precision. 
+# For our purposes, we expect to perform normal calculations and 
+# thus accept this risk for release builds.
+
+cflags_release+=(-mcpu=native -funsafe-math-optimizations -mstrict-align)
--- a/scripts/automator/build/gcc-linux_armv7_mali
+++ b/scripts/automator/build/gcc-linux_armv7_mali
@ -1,2 +1,13 @@
 # GCC flags for generically identified ARMv7 MALI SBCs
-cflags_release+=(-march=armv7-a -mfpu=neon-vfpv4 -mfloat-abi=hard)
+
+# Note: NEON SIMD instructions for floating-point operations are 
+# not generated by GCC’s auto-vectorization pass unless 
+# -funsafe-math-optimizations is also specified. This is because 
+# NEON hardware does not fully implement the IEEE 754 standard for 
+# some floating-point arithmetic operations, specifically 
+# "denormal" values are treated as zero, so in these corner-cases, 
+# the use of NEON instructions may lead to a loss of precision. 
+# For our purposes, we expect to perform normal calculations and 
+# thus accept this risk for release builds.
+
+cflags_release+=(-march=armv7-a -funsafe-math-optimizations -mfpu=neon-vfpv4 -mfloat-abi=hard)
--- a/scripts/automator/build/gcc-linux_rpi2
+++ b/scripts/automator/build/gcc-linux_rpi2
@ -1,2 +1,13 @@
 # GCC flags specific to the Raspberry Pi 2 series of SBC
-cflags_release+=(-mcpu=cortex-a7 -mfpu=neon-vfpv4 -mfloat-abi=hard)
+
+# Note: NEON SIMD instructions for floating-point operations are 
+# not generated by GCC’s auto-vectorization pass unless 
+# -funsafe-math-optimizations is also specified. This is because 
+# NEON hardware does not fully implement the IEEE 754 standard for 
+# some floating-point arithmetic operations, specifically 
+# "denormal" values are treated as zero, so in these corner-cases, 
+# the use of NEON instructions may lead to a loss of precision. 
+# For our purposes, we expect to perform normal calculations and 
+# thus accept this risk for release builds.
+
+cflags_release+=(-mcpu=cortex-a7 -funsafe-math-optimizations -mfpu=neon-vfpv4 -mfloat-abi=hard)
--- a/scripts/automator/build/gcc-linux_rpi3
+++ b/scripts/automator/build/gcc-linux_rpi3
@ -1,4 +1,15 @@
 # GCC flags specific to the Raspberry Pi 3 series of SBC
-cflags_release+=(-march=armv8-a+crc -mtune=cortex-a53 
+
+# Note: NEON SIMD instructions for floating-point operations are 
+# not generated by GCC’s auto-vectorization pass unless 
+# -funsafe-math-optimizations is also specified. This is because 
+# NEON hardware does not fully implement the IEEE 754 standard for 
+# some floating-point arithmetic operations, specifically 
+# "denormal" values are treated as zero, so in these corner-cases, 
+# the use of NEON instructions may lead to a loss of precision. 
+# For our purposes, we expect to perform normal calculations and 
+# thus accept this risk for release builds.
+
+cflags_release+=(-march=armv8-a+crc -mtune=cortex-a53 -funsafe-math-optimizations
                 -mfpu=neon-fp-armv8 -mfloat-abi=hard)

--- a/scripts/automator/build/gcc-linux_rpi4
+++ b/scripts/automator/build/gcc-linux_rpi4
@ -1,3 +1,14 @@
 # GCC flags specific to the Raspberry Pi 4 series of SBC
-cflags_release+=(-march=armv8-a+crc -mtune=cortex-a72 
+
+# Note: NEON SIMD instructions for floating-point operations are 
+# not generated by GCC’s auto-vectorization pass unless 
+# -funsafe-math-optimizations is also specified. This is because 
+# NEON hardware does not fully implement the IEEE 754 standard for 
+# some floating-point arithmetic operations, specifically 
+# "denormal" values are treated as zero, so in these corner-cases, 
+# the use of NEON instructions may lead to a loss of precision. 
+# For our purposes, we expect to perform normal calculations and 
+# thus accept this risk for release builds.
+
+cflags_release+=(-march=armv8-a+crc -mtune=cortex-a72 -funsafe-math-optimizations
                 -mfpu=neon-fp-armv8 -mfloat-abi=hard)