From ec43dabba4d3f192d77f366d4adf9469bcd52cf9 Mon Sep 17 00:00:00 2001 From: kcgen <1557255+kcgen@users.noreply.github.com> Date: Sun, 26 Apr 2020 14:54:10 -0700 Subject: [PATCH] Enable NEON SIMD for corresponding ARM platforms --- scripts/automator/build/gcc-linux_aarch64 | 13 ++++++++++++- scripts/automator/build/gcc-linux_armv7_mali | 13 ++++++++++++- scripts/automator/build/gcc-linux_rpi2 | 13 ++++++++++++- scripts/automator/build/gcc-linux_rpi3 | 13 ++++++++++++- scripts/automator/build/gcc-linux_rpi4 | 13 ++++++++++++- 5 files changed, 60 insertions(+), 5 deletions(-) diff --git a/scripts/automator/build/gcc-linux_aarch64 b/scripts/automator/build/gcc-linux_aarch64 index 7cb2cd47..48eb4f87 100644 --- a/scripts/automator/build/gcc-linux_aarch64 +++ b/scripts/automator/build/gcc-linux_aarch64 @@ -1,3 +1,14 @@ # GCC flags for generically identified AArch64 # Note: Advanced SIMD (aka NEON) is mandatory for AArch64 and implied -cflags_release+=(-mcpu=native -mstrict-align) + +# Note: NEON SIMD instructions for floating-point operations are +# not generated by GCC’s auto-vectorization pass unless +# -funsafe-math-optimizations is also specified. This is because +# NEON hardware does not fully implement the IEEE 754 standard for +# some floating-point arithmetic operations, specifically +# "denormal" values are treated as zero, so in these corner-cases, +# the use of NEON instructions may lead to a loss of precision. +# For our purposes, we expect to perform normal calculations and +# thus accept this risk for release builds. + +cflags_release+=(-mcpu=native -funsafe-math-optimizations -mstrict-align) diff --git a/scripts/automator/build/gcc-linux_armv7_mali b/scripts/automator/build/gcc-linux_armv7_mali index ed7ba8c2..12511902 100644 --- a/scripts/automator/build/gcc-linux_armv7_mali +++ b/scripts/automator/build/gcc-linux_armv7_mali @@ -1,2 +1,13 @@ # GCC flags for generically identified ARMv7 MALI SBCs -cflags_release+=(-march=armv7-a -mfpu=neon-vfpv4 -mfloat-abi=hard) + +# Note: NEON SIMD instructions for floating-point operations are +# not generated by GCC’s auto-vectorization pass unless +# -funsafe-math-optimizations is also specified. This is because +# NEON hardware does not fully implement the IEEE 754 standard for +# some floating-point arithmetic operations, specifically +# "denormal" values are treated as zero, so in these corner-cases, +# the use of NEON instructions may lead to a loss of precision. +# For our purposes, we expect to perform normal calculations and +# thus accept this risk for release builds. + +cflags_release+=(-march=armv7-a -funsafe-math-optimizations -mfpu=neon-vfpv4 -mfloat-abi=hard) diff --git a/scripts/automator/build/gcc-linux_rpi2 b/scripts/automator/build/gcc-linux_rpi2 index 2db2ac81..c16bcd70 100644 --- a/scripts/automator/build/gcc-linux_rpi2 +++ b/scripts/automator/build/gcc-linux_rpi2 @@ -1,2 +1,13 @@ # GCC flags specific to the Raspberry Pi 2 series of SBC -cflags_release+=(-mcpu=cortex-a7 -mfpu=neon-vfpv4 -mfloat-abi=hard) + +# Note: NEON SIMD instructions for floating-point operations are +# not generated by GCC’s auto-vectorization pass unless +# -funsafe-math-optimizations is also specified. This is because +# NEON hardware does not fully implement the IEEE 754 standard for +# some floating-point arithmetic operations, specifically +# "denormal" values are treated as zero, so in these corner-cases, +# the use of NEON instructions may lead to a loss of precision. +# For our purposes, we expect to perform normal calculations and +# thus accept this risk for release builds. + +cflags_release+=(-mcpu=cortex-a7 -funsafe-math-optimizations -mfpu=neon-vfpv4 -mfloat-abi=hard) diff --git a/scripts/automator/build/gcc-linux_rpi3 b/scripts/automator/build/gcc-linux_rpi3 index 54698406..3e697ee1 100644 --- a/scripts/automator/build/gcc-linux_rpi3 +++ b/scripts/automator/build/gcc-linux_rpi3 @@ -1,4 +1,15 @@ # GCC flags specific to the Raspberry Pi 3 series of SBC -cflags_release+=(-march=armv8-a+crc -mtune=cortex-a53 + +# Note: NEON SIMD instructions for floating-point operations are +# not generated by GCC’s auto-vectorization pass unless +# -funsafe-math-optimizations is also specified. This is because +# NEON hardware does not fully implement the IEEE 754 standard for +# some floating-point arithmetic operations, specifically +# "denormal" values are treated as zero, so in these corner-cases, +# the use of NEON instructions may lead to a loss of precision. +# For our purposes, we expect to perform normal calculations and +# thus accept this risk for release builds. + +cflags_release+=(-march=armv8-a+crc -mtune=cortex-a53 -funsafe-math-optimizations -mfpu=neon-fp-armv8 -mfloat-abi=hard) diff --git a/scripts/automator/build/gcc-linux_rpi4 b/scripts/automator/build/gcc-linux_rpi4 index bff6f080..8f651bcf 100644 --- a/scripts/automator/build/gcc-linux_rpi4 +++ b/scripts/automator/build/gcc-linux_rpi4 @@ -1,3 +1,14 @@ # GCC flags specific to the Raspberry Pi 4 series of SBC -cflags_release+=(-march=armv8-a+crc -mtune=cortex-a72 + +# Note: NEON SIMD instructions for floating-point operations are +# not generated by GCC’s auto-vectorization pass unless +# -funsafe-math-optimizations is also specified. This is because +# NEON hardware does not fully implement the IEEE 754 standard for +# some floating-point arithmetic operations, specifically +# "denormal" values are treated as zero, so in these corner-cases, +# the use of NEON instructions may lead to a loss of precision. +# For our purposes, we expect to perform normal calculations and +# thus accept this risk for release builds. + +cflags_release+=(-march=armv8-a+crc -mtune=cortex-a72 -funsafe-math-optimizations -mfpu=neon-fp-armv8 -mfloat-abi=hard)