From ec43dabba4d3f192d77f366d4adf9469bcd52cf9 Mon Sep 17 00:00:00 2001
From: kcgen <1557255+kcgen@users.noreply.github.com>
Date: Sun, 26 Apr 2020 14:54:10 -0700
Subject: [PATCH] Enable NEON SIMD for corresponding ARM platforms

---
 scripts/automator/build/gcc-linux_aarch64    | 13 ++++++++++++-
 scripts/automator/build/gcc-linux_armv7_mali | 13 ++++++++++++-
 scripts/automator/build/gcc-linux_rpi2       | 13 ++++++++++++-
 scripts/automator/build/gcc-linux_rpi3       | 13 ++++++++++++-
 scripts/automator/build/gcc-linux_rpi4       | 13 ++++++++++++-
 5 files changed, 60 insertions(+), 5 deletions(-)

diff --git a/scripts/automator/build/gcc-linux_aarch64 b/scripts/automator/build/gcc-linux_aarch64
index 7cb2cd47..48eb4f87 100644
--- a/scripts/automator/build/gcc-linux_aarch64
+++ b/scripts/automator/build/gcc-linux_aarch64
@@ -1,3 +1,14 @@
 # GCC flags for generically identified AArch64
 # Note: Advanced SIMD (aka NEON) is mandatory for AArch64 and implied
-cflags_release+=(-mcpu=native -mstrict-align)
+
+# Note: NEON SIMD instructions for floating-point operations are 
+# not generated by GCC’s auto-vectorization pass unless 
+# -funsafe-math-optimizations is also specified. This is because 
+# NEON hardware does not fully implement the IEEE 754 standard for 
+# some floating-point arithmetic operations, specifically 
+# "denormal" values are treated as zero, so in these corner-cases, 
+# the use of NEON instructions may lead to a loss of precision. 
+# For our purposes, we expect to perform normal calculations and 
+# thus accept this risk for release builds.
+
+cflags_release+=(-mcpu=native -funsafe-math-optimizations -mstrict-align)
diff --git a/scripts/automator/build/gcc-linux_armv7_mali b/scripts/automator/build/gcc-linux_armv7_mali
index ed7ba8c2..12511902 100644
--- a/scripts/automator/build/gcc-linux_armv7_mali
+++ b/scripts/automator/build/gcc-linux_armv7_mali
@@ -1,2 +1,13 @@
 # GCC flags for generically identified ARMv7 MALI SBCs
-cflags_release+=(-march=armv7-a -mfpu=neon-vfpv4 -mfloat-abi=hard)
+
+# Note: NEON SIMD instructions for floating-point operations are 
+# not generated by GCC’s auto-vectorization pass unless 
+# -funsafe-math-optimizations is also specified. This is because 
+# NEON hardware does not fully implement the IEEE 754 standard for 
+# some floating-point arithmetic operations, specifically 
+# "denormal" values are treated as zero, so in these corner-cases, 
+# the use of NEON instructions may lead to a loss of precision. 
+# For our purposes, we expect to perform normal calculations and 
+# thus accept this risk for release builds.
+
+cflags_release+=(-march=armv7-a -funsafe-math-optimizations -mfpu=neon-vfpv4 -mfloat-abi=hard)
diff --git a/scripts/automator/build/gcc-linux_rpi2 b/scripts/automator/build/gcc-linux_rpi2
index 2db2ac81..c16bcd70 100644
--- a/scripts/automator/build/gcc-linux_rpi2
+++ b/scripts/automator/build/gcc-linux_rpi2
@@ -1,2 +1,13 @@
 # GCC flags specific to the Raspberry Pi 2 series of SBC
-cflags_release+=(-mcpu=cortex-a7 -mfpu=neon-vfpv4 -mfloat-abi=hard)
+
+# Note: NEON SIMD instructions for floating-point operations are 
+# not generated by GCC’s auto-vectorization pass unless 
+# -funsafe-math-optimizations is also specified. This is because 
+# NEON hardware does not fully implement the IEEE 754 standard for 
+# some floating-point arithmetic operations, specifically 
+# "denormal" values are treated as zero, so in these corner-cases, 
+# the use of NEON instructions may lead to a loss of precision. 
+# For our purposes, we expect to perform normal calculations and 
+# thus accept this risk for release builds.
+
+cflags_release+=(-mcpu=cortex-a7 -funsafe-math-optimizations -mfpu=neon-vfpv4 -mfloat-abi=hard)
diff --git a/scripts/automator/build/gcc-linux_rpi3 b/scripts/automator/build/gcc-linux_rpi3
index 54698406..3e697ee1 100644
--- a/scripts/automator/build/gcc-linux_rpi3
+++ b/scripts/automator/build/gcc-linux_rpi3
@@ -1,4 +1,15 @@
 # GCC flags specific to the Raspberry Pi 3 series of SBC
-cflags_release+=(-march=armv8-a+crc -mtune=cortex-a53 
+
+# Note: NEON SIMD instructions for floating-point operations are 
+# not generated by GCC’s auto-vectorization pass unless 
+# -funsafe-math-optimizations is also specified. This is because 
+# NEON hardware does not fully implement the IEEE 754 standard for 
+# some floating-point arithmetic operations, specifically 
+# "denormal" values are treated as zero, so in these corner-cases, 
+# the use of NEON instructions may lead to a loss of precision. 
+# For our purposes, we expect to perform normal calculations and 
+# thus accept this risk for release builds.
+
+cflags_release+=(-march=armv8-a+crc -mtune=cortex-a53 -funsafe-math-optimizations
                  -mfpu=neon-fp-armv8 -mfloat-abi=hard)
 
diff --git a/scripts/automator/build/gcc-linux_rpi4 b/scripts/automator/build/gcc-linux_rpi4
index bff6f080..8f651bcf 100644
--- a/scripts/automator/build/gcc-linux_rpi4
+++ b/scripts/automator/build/gcc-linux_rpi4
@@ -1,3 +1,14 @@
 # GCC flags specific to the Raspberry Pi 4 series of SBC
-cflags_release+=(-march=armv8-a+crc -mtune=cortex-a72 
+
+# Note: NEON SIMD instructions for floating-point operations are 
+# not generated by GCC’s auto-vectorization pass unless 
+# -funsafe-math-optimizations is also specified. This is because 
+# NEON hardware does not fully implement the IEEE 754 standard for 
+# some floating-point arithmetic operations, specifically 
+# "denormal" values are treated as zero, so in these corner-cases, 
+# the use of NEON instructions may lead to a loss of precision. 
+# For our purposes, we expect to perform normal calculations and 
+# thus accept this risk for release builds.
+
+cflags_release+=(-march=armv8-a+crc -mtune=cortex-a72 -funsafe-math-optimizations
                  -mfpu=neon-fp-armv8 -mfloat-abi=hard)