From 95ca0ad63126c5bc70ada1eaf7856506c1a19d60 Mon Sep 17 00:00:00 2001 From: Mike Oliphant Date: Fri, 24 Mar 2023 11:00:51 -0700 Subject: [PATCH] Add floating point denormal code --- CMakeLists.txt | 1 + deps/denormal/architecture.hpp | 102 +++++++++++++++++++++++++++++++++ src/nam_lv2.cpp | 10 ++++ src/nam_plugin.cpp | 6 +- 4 files changed, 116 insertions(+), 3 deletions(-) create mode 100644 deps/denormal/architecture.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 6e18eda..ab6a9ab 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,6 +23,7 @@ include_directories(SYSTEM deps/eigen) include_directories(SYSTEM deps/lv2/include) include_directories(SYSTEM deps/NeuralAmpModelerCore/NAM) include_directories(SYSTEM deps/json) +include_directories(SYSTEM deps/denormal) add_subdirectory(src) diff --git a/deps/denormal/architecture.hpp b/deps/denormal/architecture.hpp new file mode 100644 index 0000000..53be234 --- /dev/null +++ b/deps/denormal/architecture.hpp @@ -0,0 +1,102 @@ +// From https://github.com/Dougal-s/Aether + +#ifndef ARCHITECTURE_HPP +#define ARCHITECTURE_HPP + +// check cpu architecture + +#if /* x86_64 */ \ + /* clang & gcc */ defined(__x86_64__) || \ + /* msvc */ defined(_M_AMD64) \ + + #define ARCH_X86 + #define ARCH_X86_64 + +#elif /* i386 */ \ + /* clang & gcc */ defined(__i386__) || \ + /* msvc */ defined(_M_IX86) \ + + #define ARCH_X86 + #define ARCH_I386 + +#elif /* Arm64 */ \ + /* clang & gcc */ defined(__aarch64__) || \ + /* msvc */ defined(_M_ARM64) \ + + #define ARCH_ARM + #define ARCH_ARM64 + +#elif /* Arm */ \ + /* clang & gcc */ defined(__arm__) || \ + /* msvc */ defined(_M_ARM) \ + + #define ARCH_ARM + #define ARCH_ARM32 + +#else + #define ARCH_UNKNOWN +#endif + + +// check cpu extensions + +/* clang & gcc */ +#ifdef __SSE__ + #define ARCH_EXT_SSE +#endif + +#ifdef __SSE2__ + #define ARCH_EXT_SSE2 +#endif + +#ifdef __SSE3__ + #define ARCH_EXT_SSE3 +#endif + +/* msvc */ +#if defined(ARCH_X86_64) + #define ARCH_EXT_SSE + #define ARCH_EXT_SSE2 + + // msvc doesn't seem to have anything for sse3 so I am just assuming + // it is supported + #define ARCH_EXT_SSE3 +#elif defined(ARCH_I386) + #if _M_IX86_FP > 0 + #define ARCH_EXT_SSE + #elif _M_IX86_FP > 1 + #define ARCH_EXT_SSE3 + #define ARCH_EXT_SSE2 + #define ARCH_EXT_SSE + #endif +#endif + + +// misc functions + +#ifdef ARCH_EXT_SSE + + #include + #ifndef FE_DFL_DISABLE_SSE_DENORMS_ENV + #include + #endif + +#endif + +inline void disable_denormals() noexcept { + + #if defined(ARCH_EXT_SSE) + #ifdef FE_DFL_DISABLE_SSE_DENORMS_ENV + std::fesetenv(FE_DFL_DISABLE_SSE_DENORMS_ENV); + #else + _mm_setcsr(_mm_getcsr() | 0x8040); + #endif + #elif defined(ARCH_ARM) + #if __has_builtin(__builtin_arm_set_fpscr) && __has_builtin(__builtin_arm_get_fpscr) + __builtin_arm_set_fpscr(__builtin_arm_get_fpscr() | (1 << 24)); + #endif + #endif + +} + +#endif diff --git a/src/nam_lv2.cpp b/src/nam_lv2.cpp index 6deda30..f01e6c8 100644 --- a/src/nam_lv2.cpp +++ b/src/nam_lv2.cpp @@ -9,6 +9,8 @@ #include #include +#include "architecture.hpp" + #include "nam_plugin.h" // LV2 Functions @@ -41,7 +43,15 @@ static void activate(LV2_Handle) {} static void run(LV2_Handle instance, uint32_t n_samples) { + // Disable floating point denormals + std::fenv_t fe_state; + std::feholdexcept(&fe_state); + disable_denormals(); + static_cast(instance)->process(n_samples); + + // restore previous floating point state + std::feupdateenv(&fe_state); } static void deactivate(LV2_Handle) {} diff --git a/src/nam_plugin.cpp b/src/nam_plugin.cpp index dd8b445..0b3567d 100644 --- a/src/nam_plugin.cpp +++ b/src/nam_plugin.cpp @@ -155,8 +155,8 @@ namespace NAM { if (dblData.size() != n_samples) dblData.resize(n_samples); - float inputLevel = pow(10, *(ports.input_level) * 0.05); - float outputLevel = pow(10, *(ports.output_level) * 0.05); + float inputLevel = powf(10, *(ports.input_level) * 0.05f); + float outputLevel = powf(10, *(ports.output_level) * 0.05f); for (unsigned int i = 0; i < n_samples; i++) { @@ -176,7 +176,7 @@ namespace NAM { for (unsigned int i = 0; i < n_samples; i++) { - ports.audio_out[i] = dblData[i] * outputLevel; + ports.audio_out[i] = (float)(dblData[i] * outputLevel); } } }