Initial commit

2026-05-06 19:50:11 +02:00 · 2023-03-08 17:19:08 -08:00
parent 63d499cff8
commit 6f2f7921cc
17 changed files with 25126 additions and 0 deletions
@@ -0,0 +1,35 @@
+cmake_minimum_required(VERSION 3.10)
+
+project(NeuralAmpModeler VERSION 0.0.1)
+
+set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
+
+set(CMAKE_CXX_STANDARD 20)
+set(CMAKE_CXX_STANDARD_REQUIRED OFF)
+set(CMAKE_CXX_EXTENSIONS OFF)
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Darwin")
+	include_directories(SYSTEM /usr/local/include)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux")
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Windows")
+	add_compile_definitions(NOMINMAX WIN32_LEAN_AND_MEAN)
+else()
+	message(FATAL_ERROR "Unrecognized Platform!")
+endif()
+
+
+include_directories(SYSTEM eigen)
+include_directories(SYSTEM lv2/include)
+
+add_subdirectory(src)
+
+
+# create neural_amp_modeler.lv2
+add_custom_target(copy_binaries ALL
+	${CMAKE_COMMAND} -E copy "$<TARGET_FILE:neural_amp_modeler>" neural_amp_modeler.lv2/
+	DEPENDS neural-amp-modeler
+)
+
+configure_file(resources/manifest.ttl.in neural_amp_modeler.lv2/manifest.ttl)
+configure_file(resources/neural_amp_modeler.ttl.in neural_amp_modeler.lv2/neural_amp_modeler.ttl)
+
@@ -0,0 +1,4 @@
+# Ignore everything in this directory
+*
+# Except this file
+!.gitignore
@@ -0,0 +1,7 @@
+@prefix lv2:  <http://lv2plug.in/ns/lv2core#>.
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.
+
+<http://github.com/mikeoliphant/neural-amp-modeler-lv2>
+	a lv2:Plugin;
+	lv2:binary <neural_amp_modeler@CMAKE_SHARED_MODULE_SUFFIX@>;
+	rdfs:seeAlso <neural_amp_modeler.ttl>.
@@ -0,0 +1,97 @@
+@prefix atom:  <http://lv2plug.in/ns/ext/atom#>.
+@prefix doap:  <http://usefulinc.com/ns/doap#>.
+@prefix foaf:  <http://xmlns.com/foaf/0.1/>.
+@prefix lv2:   <http://lv2plug.in/ns/lv2core#>.
+@prefix props: <http://lv2plug.in/ns/ext/port-props#>.
+@prefix rdf:   <http://www.w3.org/1999/02/22-rdf-syntax-ns#>.
+@prefix rdfs:  <http://www.w3.org/2000/01/rdf-schema#>.
+@prefix rsz:   <http://lv2plug.in/ns/ext/resize-port#>.
+@prefix ui:    <http://lv2plug.in/ns/extensions/ui#>.
+@prefix units: <http://lv2plug.in/ns/extensions/units#>.
+@prefix urid:  <http://lv2plug.in/ns/ext/urid#>.
+@prefix param: <http://lv2plug.in/ns/ext/parameters#>.
+@prefix pg:    <http://lv2plug.in/ns/ext/port-groups#>.
+
+<http://github.com/mikeoliphant>
+	a foaf:Person;
+	foaf:name "Mike Oliphant";
+	foaf:homepage <http://github.com/mikeoliphant>.
+
+<http://github.com/mikeoliphant/neural-amp-modeler-lv2>
+	a doap:Project;
+	doap:maintainer <http://github.com/mikeoliphant>;
+	doap:name "Neural Amp Modeler".
+
+<http://github.com/mikeoliphant/neural-amp-modeler-lv2#input>
+	a pg:MonoGroup, pg:InputGroup;
+	lv2:symbol "input".
+
+<http://github.com/mikeoliphant/neural-amp-modeler-lv2#output>
+	a pg:MonoGroup, pg:OutputGroup;
+	lv2:symbol "output";
+	pg:source <http://github.com/mikeoliphant/neural-amp-modeler-lv2#input>.
+
+<http://github.com/mikeoliphant/neural-amp-modeler-lv2>
+	a lv2:Plugin, lv2:AmplifierPlugin;
+	doap:name "Neural Amp Modeler";
+	lv2:project <http://github.com/mikeoliphant/neural-amp-modeler-lv2>;
+	lv2:minorVersion @PROJECT_VERSION_MINOR@;
+	lv2:microVersion @PROJECT_VERSION_PATCH@;
+	doap:license <http://opensource.org/licenses/MIT>;
+
+	lv2:requiredFeature urid:map;
+	lv2:optionalFeature lv2:hardRTCapable;
+
+	rdfs:comment "An LV2 implementation of Neural Amp Modeler";
+
+	pg:mainInput <http://github.com/mikeoliphant/neural-amp-modeler-lv2#input>;
+	pg:mainOutput <http://github.com/mikeoliphant/neural-amp-modeler-lv2#output>;
+
+	# Control Ports
+	lv2:port [
+		a lv2:InputPort, atom:AtomPort;
+		atom:bufferType atom:Sequence;
+		lv2:designation lv2:control ;
+		lv2:index 0;
+		lv2:symbol "control";
+		lv2:name "control";
+		rdfs:comment "UI -> DSP communication"
+	], [
+		a lv2:OutputPort, atom:AtomPort;
+		atom:bufferType atom:Sequence;
+		lv2:designation lv2:control ;
+		lv2:index 1;
+		lv2:symbol "notify";
+		lv2:name "Notify";
+		# amount of data sent in a single 8192 sample process block
+		rsz:minimumSize 131428;
+		rdfs:comment "DSP -> UI communication"
+	], [
+		a lv2:InputPort, lv2:AudioPort;
+		lv2:index 2;
+		lv2:symbol "input";
+		lv2:name "Input";
+		pg:group <http://github.com/mikeoliphant/neural-amp-modeler-lv2#input>;
+		lv2:designation pg:left
+	], [
+		a lv2:OutputPort, lv2:AudioPort;
+		lv2:index 3;
+		lv2:symbol "output";
+		lv2:name "Output";
+		pg:group <http://github.com/mikeoliphant/neural-amp-modeler-lv2#output>;
+		lv2:designation pg:left
+	];
+
+	# Mixer
+	lv2:port [
+		a lv2:InputPort, lv2:ControlPort;
+		lv2:designation param:wetDryRatio;
+		lv2:index 4;
+		lv2:symbol "mix";
+		lv2:name "Mix";
+		rdfs:comment "dry/wet ratio";
+		lv2:default 100.0;
+		lv2:minimum 0.0;
+		lv2:maximum 100.0;
+		units:unit units:pc
+	].
@@ -0,0 +1,65 @@
+add_library(neural_amp_modeler MODULE
+	nam_lv2.cpp
+	nam_plugin.cpp
+	nam_plugin.hpp
+	dsp.h
+	dsp.cpp
+	get_dsp.cpp
+	util.cpp
+	util.h
+	wavenet.cpp
+	wavenet.h
+	json.hpp
+)
+
+target_compile_features(neural_amp_modeler PUBLIC cxx_std_17)
+
+set_target_properties(neural_amp_modeler
+	PROPERTIES
+	CXX_VISIBILITY_PRESET hidden
+	INTERPROCEDURAL_OPTIMIZATION TRUE
+	PREFIX ""
+)
+
+# Compile Options
+
+option(FORCE_DISABLE_DENORMALS "Disable denormal numbers before processing" ON)
+target_compile_definitions(neural_amp_modeler
+	PRIVATE
+	"$<$<CONFIG:RELEASE>:NDEBUG>"
+	"$<$<BOOL:${FORCE_DISABLE_DENORMALS}>:FORCE_DISABLE_DENORMALS>"
+)
+
+# Architecture
+if (
+	FORCE_DISABLE_DENORMALS
+	AND CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(i386)|(i686)|(AMD64)"
+)
+	if (MSVC)
+		target_compile_options(neural_amp_modeler PRIVATE /arch:SSE2)
+	else()
+		target_compile_options(neural_amp_modeler PRIVATE -msse3)
+	endif()
+endif()
+
+
+# Platform
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Windows")
+	target_compile_definitions(neural_amp_modeler PRIVATE NOMINMAX WIN32_LEAN_AND_MEAN)
+endif()
+
+if (MSVC)
+	target_compile_options(neural_amp_modeler PRIVATE
+		"$<$<CONFIG:DEBUG>:/W4>"
+		"$<$<CONFIG:RELEASE>:/O2>"
+	)
+else()
+	target_compile_options(neural_amp_modeler PRIVATE
+		-Wall -Wextra -Wpedantic -Wshadow -Wstrict-aliasing
+		-Wunreachable-code -Wdouble-promotion -Weffc++ -Wconversion
+		-Wsign-conversion
+		"$<$<CONFIG:DEBUG>:-Og;-ggdb;-Werror>"
+		"$<$<CONFIG:RELEASE>:-Ofast>"
+	)
+endif()
@@ -0,0 +1,100 @@
+#ifndef ARCHITECTURE_HPP
+#define ARCHITECTURE_HPP
+
+// check cpu architecture
+
+#if /* x86_64 */ \
+	/* clang & gcc */ defined(__x86_64__) || \
+	/* msvc        */ defined(_M_AMD64) \
+
+	#define ARCH_X86
+	#define ARCH_X86_64
+
+#elif /* i386 */ \
+	/* clang & gcc */ defined(__i386__) || \
+	/* msvc        */ defined(_M_IX86) \
+
+	#define ARCH_X86
+	#define ARCH_I386
+
+#elif /* Arm64 */ \
+	/* clang & gcc */ defined(__aarch64__) || \
+	/* msvc        */ defined(_M_ARM64) \
+
+	#define ARCH_ARM
+	#define ARCH_ARM64
+
+#elif /* Arm */ \
+	/* clang & gcc */ defined(__arm__) || \
+	/* msvc        */ defined(_M_ARM) \
+
+	#define ARCH_ARM
+	#define ARCH_ARM32
+
+#else
+	#define ARCH_UNKNOWN
+#endif
+
+
+// check cpu extensions
+
+/* clang & gcc */
+#ifdef __SSE__
+	#define ARCH_EXT_SSE
+#endif
+
+#ifdef __SSE2__
+	#define ARCH_EXT_SSE2
+#endif
+
+#ifdef __SSE3__
+	#define ARCH_EXT_SSE3
+#endif
+
+/* msvc */
+#if defined(ARCH_X86_64)
+	#define ARCH_EXT_SSE
+	#define ARCH_EXT_SSE2
+
+	// msvc doesn't seem to have anything for sse3 so I am just assuming
+	// it is supported
+	#define ARCH_EXT_SSE3
+#elif defined(ARCH_I386)
+	#if _M_IX86_FP > 0
+		#define ARCH_EXT_SSE
+	#elif _M_IX86_FP > 1
+		#define ARCH_EXT_SSE3
+		#define ARCH_EXT_SSE2
+		#define ARCH_EXT_SSE
+	#endif
+#endif
+
+
+// misc functions
+
+#ifdef ARCH_EXT_SSE
+
+	#include <cfenv>
+	#ifndef FE_DFL_DISABLE_SSE_DENORMS_ENV
+		#include <immintrin.h>
+	#endif
+
+#endif
+
+inline void disable_denormals() noexcept {
+
+	#if defined(ARCH_EXT_SSE)
+		#ifdef FE_DFL_DISABLE_SSE_DENORMS_ENV
+			std::fesetenv(FE_DFL_DISABLE_SSE_DENORMS_ENV);
+		#else
+			_mm_setcsr(_mm_getcsr() | 0x8040);
+		#endif
+	#elif defined(ARCH_ARM)
+		#if __has_builtin(__builtin_arm_set_fpscr) && __has_builtin(__builtin_arm_get_fpscr)
+			__builtin_arm_set_fpscr(__builtin_arm_get_fpscr() | (1 << 24));
+		#endif
+	#endif
+
+}
+
+#endif
@@ -0,0 +1,596 @@
+#include <algorithm> // std::max_element
+#include <algorithm>
+#include <cmath> // pow, tanh, expf
+#include <filesystem>
+#include <fstream>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+
+#include "dsp.h"
+#include "json.hpp"
+#include "util.h"
+
+//#define tanh_impl_ std::tanh
+#define tanh_impl_ fast_tanh_
+
+
+constexpr auto _INPUT_BUFFER_SAFETY_FACTOR = 32;
+
+DSP::DSP() { this->_stale_params = true; }
+
+void DSP::process(const NAMSample *input, NAMSample *output,
+                  const int num_channels, const int num_frames,
+                  const double input_gain, const double output_gain,
+                  const std::unordered_map<std::string, double> &params) {
+  this->_get_params_(params);
+  this->_apply_input_level_(input, num_channels, num_frames, input_gain);
+  this->_ensure_core_dsp_output_ready_();
+  this->_process_core_();
+  this->_apply_output_level_(output, num_channels, num_frames, output_gain);
+}
+
+void DSP::finalize_(const int num_frames) {}
+
+void DSP::_get_params_(
+    const std::unordered_map<std::string, double> &input_params) {
+  this->_stale_params = false;
+  for (auto it = input_params.begin(); it != input_params.end(); ++it) {
+    const std::string key = util::lowercase(it->first);
+    const double value = it->second;
+    if (this->_params.find(key) == this->_params.end()) // Not contained
+      this->_stale_params = true;
+    else if (this->_params[key] != value) // Contained but new value
+      this->_stale_params = true;
+    this->_params[key] = value;
+  }
+}
+
+void DSP::_apply_input_level_(const NAMSample *input, const int num_channels,
+                              const int num_frames, const double gain) {
+  // Must match exactly; we're going to use the size of _input_post_gain later
+  // for num_frames.
+  if (this->_input_post_gain.size() != num_frames)
+    this->_input_post_gain.resize(num_frames);
+  // MONO ONLY
+  const int channel = 0;
+  for (int i = 0; i < num_frames; i++)
+    this->_input_post_gain[i] = float(gain * input[i]);
+}
+
+void DSP::_ensure_core_dsp_output_ready_() {
+  if (this->_core_dsp_output.size() < this->_input_post_gain.size())
+    this->_core_dsp_output.resize(this->_input_post_gain.size());
+}
+
+void DSP::_process_core_() {
+  // Default implementation is the null operation
+  for (int i = 0; i < this->_input_post_gain.size(); i++)
+    this->_core_dsp_output[i] = this->_input_post_gain[i];
+}
+
+void DSP::_apply_output_level_(NAMSample *output, const int num_channels,
+                               const int num_frames, const double gain) {
+  for (int c = 0; c < num_channels; c++)
+    for (int s = 0; s < num_frames; s++)
+      output[s] = double(gain * this->_core_dsp_output[s]);
+}
+
+// Buffer =====================================================================
+
+Buffer::Buffer(const int receptive_field) : DSP() {
+  this->_set_receptive_field(receptive_field);
+}
+
+void Buffer::_set_receptive_field(const int new_receptive_field) {
+  this->_set_receptive_field(new_receptive_field,
+                             _INPUT_BUFFER_SAFETY_FACTOR * new_receptive_field);
+};
+
+void Buffer::_set_receptive_field(const int new_receptive_field,
+                                  const int input_buffer_size) {
+  this->_receptive_field = new_receptive_field;
+  this->_input_buffer.resize(input_buffer_size);
+  this->_reset_input_buffer();
+}
+
+void Buffer::_update_buffers_() {
+  const long int num_frames = this->_input_post_gain.size();
+  // Make sure that the buffer is big enough for the receptive field and the
+  // frames needed!
+  {
+    const long minimum_input_buffer_size =
+        (long)this->_receptive_field + _INPUT_BUFFER_SAFETY_FACTOR * num_frames;
+    if (this->_input_buffer.size() < minimum_input_buffer_size) {
+      long new_buffer_size = 2;
+      while (new_buffer_size < minimum_input_buffer_size)
+        new_buffer_size *= 2;
+      this->_input_buffer.resize(new_buffer_size);
+    }
+  }
+
+  // If we'd run off the end of the input buffer, then we need to move the data
+  // back to the start of the buffer and start again.
+  if (this->_input_buffer_offset + num_frames > this->_input_buffer.size())
+    this->_rewind_buffers_();
+  // Put the new samples into the input buffer
+  for (long i = this->_input_buffer_offset, j = 0; j < num_frames; i++, j++)
+    this->_input_buffer[i] = this->_input_post_gain[j];
+  // And resize the output buffer:
+  this->_output_buffer.resize(num_frames);
+}
+
+void Buffer::_rewind_buffers_() {
+  // Copy the input buffer back
+  // RF-1 samples because we've got at least one new one inbound.
+  for (long i = 0, j = this->_input_buffer_offset - this->_receptive_field;
+       i < this->_receptive_field; i++, j++)
+    this->_input_buffer[i] = this->_input_buffer[j];
+  // And reset the offset.
+  // Even though we could be stingy about that one sample that we won't be using
+  // (because a new set is incoming) it's probably not worth the
+  // hyper-optimization and liable for bugs. And the code looks way tidier this
+  // way.
+  this->_input_buffer_offset = this->_receptive_field;
+}
+
+void Buffer::_reset_input_buffer() {
+  this->_input_buffer_offset = this->_receptive_field;
+}
+
+void Buffer::finalize_(const int num_frames) {
+  this->DSP::finalize_(num_frames);
+  this->_input_buffer_offset += num_frames;
+}
+
+// Linear =====================================================================
+
+Linear::Linear(const int receptive_field, const bool _bias,
+               const std::vector<float> &params)
+    : Buffer(receptive_field) {
+  if (params.size() != (receptive_field + (_bias ? 1 : 0)))
+    throw std::runtime_error("Params vector does not match expected size based "
+                             "on architecture parameters");
+
+  this->_weight.resize(this->_receptive_field);
+  // Pass in in reverse order so that dot products work out of the box.
+  for (int i = 0; i < this->_receptive_field; i++)
+    this->_weight(i) = params[receptive_field - 1 - i];
+  this->_bias = _bias ? params[receptive_field] : (float)0.0;
+}
+
+void Linear::_process_core_() {
+  this->Buffer::_update_buffers_();
+
+  // Main computation!
+  for (long i = 0; i < this->_input_post_gain.size(); i++) {
+    const long offset =
+        this->_input_buffer_offset - this->_weight.size() + i + 1;
+    auto input = Eigen::Map<const Eigen::VectorXf>(&this->_input_buffer[offset],
+                                                   this->_receptive_field);
+    this->_core_dsp_output[i] = this->_bias + this->_weight.dot(input);
+  }
+}
+
+// NN modules =================================================================
+
+void relu_(Eigen::MatrixXf &x, const long i_start, const long i_end,
+           const long j_start, const long j_end) {
+  for (long j = j_start; j < j_end; j++)
+    for (long i = 0; i < x.rows(); i++)
+      x(i, j) = x(i, j) < (float)0.0 ? (float)0.0 : x(i, j);
+}
+
+void relu_(Eigen::MatrixXf &x, const long j_start, const long j_end) {
+  relu_(x, 0, x.rows(), j_start, j_end);
+}
+
+void relu_(Eigen::MatrixXf &x) { relu_(x, 0, x.rows(), 0, x.cols()); }
+
+void sigmoid_(Eigen::MatrixXf &x, const long i_start, const long i_end,
+              const long j_start, const long j_end) {
+  for (long j = j_start; j < j_end; j++)
+    for (long i = i_start; i < i_end; i++)
+      x(i, j) = 1.0 / (1.0 + expf(-x(i, j)));
+}
+
+inline float fast_tanh_(const float x)
+{
+    const float ax = fabs(x);
+    const float x2 = x * x;
+
+    return(x * (2.45550750702956f + 2.45550750702956f * ax +
+        (0.893229853513558f + 0.821226666969744f * ax) * x2) /
+        (2.44506634652299f + (2.44506634652299f + x2) *
+            fabs(x + 0.814642734961073f * x * ax)));
+}
+
+void tanh_(Eigen::MatrixXf& x)
+{
+    float *ptr = x.data();
+
+    long size = x.rows() * x.cols();
+
+    for (long pos = 0; pos < size; pos++)
+    {
+        ptr[pos] = tanh_impl_(ptr[pos]);
+    }
+}
+
+void tanh_(Eigen::MatrixXf &x, const long i_start, const long i_end,
+           const long j_start, const long j_end) {
+  for (long j = j_start; j < j_end; j++)
+    for (long i = i_start; i < i_end; i++)
+      x(i, j) = tanh_impl_(x(i, j));
+}
+
+void tanh_cols_(Eigen::MatrixXf &x, const long j_start, const long j_end) {
+  tanh_(x, 0, x.rows(), j_start, j_end);
+}
+
+
+void Conv1D::set_params_(std::vector<float>::iterator &params) {
+  if (this->_weight.size() > 0) {
+    const long out_channels = this->_weight[0].rows();
+    const long in_channels = this->_weight[0].cols();
+    // Crazy ordering because that's how it gets flattened.
+    for (auto i = 0; i < out_channels; i++)
+      for (auto j = 0; j < in_channels; j++)
+        for (auto k = 0; k < this->_weight.size(); k++)
+          this->_weight[k](i, j) = *(params++);
+  }
+  for (int i = 0; i < this->_bias.size(); i++)
+    this->_bias(i) = *(params++);
+}
+
+void Conv1D::set_size_(const int in_channels, const int out_channels,
+                       const int kernel_size, const bool do_bias,
+                       const int _dilation) {
+  this->_weight.resize(kernel_size);
+  for (int i = 0; i < this->_weight.size(); i++)
+    this->_weight[i].resize(out_channels,
+                            in_channels); // y = Ax, input array (C,L)
+  if (do_bias)
+    this->_bias.resize(out_channels);
+  else
+    this->_bias.resize(0);
+  this->_dilation = _dilation;
+}
+
+void Conv1D::set_size_and_params_(const int in_channels, const int out_channels,
+                                  const int kernel_size, const int _dilation,
+                                  const bool do_bias,
+                                  std::vector<float>::iterator &params) {
+  this->set_size_(in_channels, out_channels, kernel_size, do_bias, _dilation);
+  this->set_params_(params);
+}
+
+void Conv1D::process_(const Eigen::MatrixXf &input, Eigen::MatrixXf &output,
+                      const long i_start, const long ncols,
+                      const long j_start) const {
+  // This is the clever part ;)
+  for (long k = 0; k < this->_weight.size(); k++) {
+    const long offset = this->_dilation * (k + 1 - this->_weight.size());
+    if (k == 0)
+      output.middleCols(j_start, ncols).noalias() =
+          this->_weight[k] * input.middleCols(i_start + offset, ncols);
+    else
+      output.middleCols(j_start, ncols).noalias() +=
+          this->_weight[k] * input.middleCols(i_start + offset, ncols);
+  }
+  if (this->_bias.size() > 0)
+    output.middleCols(j_start, ncols).colwise() += this->_bias;
+}
+
+long Conv1D::get_num_params() const {
+  long num_params = this->_bias.size();
+  for (long i = 0; i < this->_weight.size(); i++)
+    num_params += this->_weight[i].size();
+  return num_params;
+}
+
+Conv1x1::Conv1x1(const int in_channels, const int out_channels,
+                 const bool _bias) {
+  this->_weight.resize(out_channels, in_channels);
+  this->_do_bias = _bias;
+  if (_bias)
+    this->_bias.resize(out_channels);
+}
+
+void Conv1x1::set_params_(std::vector<float>::iterator &params) {
+  for (int i = 0; i < this->_weight.rows(); i++)
+    for (int j = 0; j < this->_weight.cols(); j++)
+      this->_weight(i, j) = *(params++);
+  if (this->_do_bias)
+    for (int i = 0; i < this->_bias.size(); i++)
+      this->_bias(i) = *(params++);
+}
+
+Eigen::MatrixXf Conv1x1::process(const Eigen::MatrixXf &input) const {
+  if (this->_do_bias)
+    return (this->_weight * input).colwise() + this->_bias;
+  else
+    return this->_weight * input;
+}
+
+// ConvNet ====================================================================
+
+convnet::BatchNorm::BatchNorm(const int dim,
+                              std::vector<float>::iterator &params) {
+  // Extract from param buffer
+  Eigen::VectorXf running_mean(dim);
+  Eigen::VectorXf running_var(dim);
+  Eigen::VectorXf _weight(dim);
+  Eigen::VectorXf _bias(dim);
+  for (int i = 0; i < dim; i++)
+    running_mean(i) = *(params++);
+  for (int i = 0; i < dim; i++)
+    running_var(i) = *(params++);
+  for (int i = 0; i < dim; i++)
+    _weight(i) = *(params++);
+  for (int i = 0; i < dim; i++)
+    _bias(i) = *(params++);
+  float eps = *(params++);
+
+  // Convert to scale & loc
+  this->scale.resize(dim);
+  this->loc.resize(dim);
+  for (int i = 0; i < dim; i++)
+    this->scale(i) = _weight(i) / sqrt(eps + running_var(i));
+  this->loc = _bias - this->scale.cwiseProduct(running_mean);
+}
+
+void convnet::BatchNorm::process_(Eigen::MatrixXf &x, const long i_start,
+                                  const long i_end) const {
+  // todo using colwise?
+  // #speed but conv probably dominates
+  for (auto i = i_start; i < i_end; i++) {
+    x.col(i) = x.col(i).cwiseProduct(this->scale);
+    x.col(i) += this->loc;
+  }
+}
+
+void convnet::ConvNetBlock::set_params_(const int in_channels,
+                                        const int out_channels,
+                                        const int _dilation,
+                                        const bool batchnorm,
+                                        const std::string activation,
+                                        std::vector<float>::iterator &params) {
+  this->_batchnorm = batchnorm;
+  // HACK 2 kernel
+  this->conv.set_size_and_params_(in_channels, out_channels, 2, _dilation,
+                                  !batchnorm, params);
+  if (this->_batchnorm)
+    this->batchnorm = BatchNorm(out_channels, params);
+  this->activation = activation;
+}
+
+void convnet::ConvNetBlock::process_(const Eigen::MatrixXf &input,
+                                     Eigen::MatrixXf &output,
+                                     const long i_start,
+                                     const long i_end) const {
+  const long ncols = i_end - i_start;
+  this->conv.process_(input, output, i_start, ncols, i_start);
+  if (this->_batchnorm)
+    this->batchnorm.process_(output, i_start, i_end);
+  if (this->activation == "Tanh")
+    tanh_cols_(output, i_start, i_end);
+  else if (this->activation == "ReLU")
+    relu_(output, i_start, i_end);
+  else
+    throw std::runtime_error("Unrecognized activation");
+}
+
+long convnet::ConvNetBlock::get_out_channels() const {
+  return this->conv.get_out_channels();
+}
+
+convnet::_Head::_Head(const int channels,
+                      std::vector<float>::iterator &params) {
+  this->_weight.resize(channels);
+  for (int i = 0; i < channels; i++)
+    this->_weight[i] = *(params++);
+  this->_bias = *(params++);
+}
+
+void convnet::_Head::process_(const Eigen::MatrixXf &input,
+                              Eigen::VectorXf &output, const long i_start,
+                              const long i_end) const {
+  const long length = i_end - i_start;
+  output.resize(length);
+  for (long i = 0, j = i_start; i < length; i++, j++)
+    output(i) = this->_bias + input.col(j).dot(this->_weight);
+}
+
+convnet::ConvNet::ConvNet(const int channels, const std::vector<int> &dilations,
+                          const bool batchnorm, const std::string activation,
+                          std::vector<float> &params)
+    : Buffer(*std::max_element(dilations.begin(), dilations.end())) {
+  this->_verify_params(channels, dilations, batchnorm, params.size());
+  this->_blocks.resize(dilations.size());
+  std::vector<float>::iterator it = params.begin();
+  for (int i = 0; i < dilations.size(); i++)
+    this->_blocks[i].set_params_(i == 0 ? 1 : channels, channels, dilations[i],
+                                 batchnorm, activation, it);
+  this->_block_vals.resize(this->_blocks.size() + 1);
+  this->_head = _Head(channels, it);
+  if (it != params.end())
+    throw std::runtime_error(
+        "Didn't touch all the params when initializing wavenet");
+  this->_reset_anti_pop_();
+}
+
+void convnet::ConvNet::_process_core_() {
+  this->_update_buffers_();
+  // Main computation!
+  const long i_start = this->_input_buffer_offset;
+  const long num_frames = this->_input_post_gain.size();
+  const long i_end = i_start + num_frames;
+  // TODO one unnecessary copy :/ #speed
+  for (auto i = i_start; i < i_end; i++)
+    this->_block_vals[0](0, i) = this->_input_buffer[i];
+  for (auto i = 0; i < this->_blocks.size(); i++)
+    this->_blocks[i].process_(this->_block_vals[i], this->_block_vals[i + 1],
+                              i_start, i_end);
+  // TODO clean up this allocation
+  this->_head.process_(this->_block_vals[this->_blocks.size()],
+                       this->_head_output, i_start, i_end);
+  // Copy to required output array (TODO tighten this up)
+  for (int s = 0; s < num_frames; s++)
+    this->_core_dsp_output[s] = this->_head_output(s);
+  // Apply anti-pop
+  this->_anti_pop_();
+}
+
+void convnet::ConvNet::_verify_params(const int channels,
+                                      const std::vector<int> &dilations,
+                                      const bool batchnorm,
+                                      const size_t actual_params) {
+  // TODO
+}
+
+void convnet::ConvNet::_update_buffers_() {
+  this->Buffer::_update_buffers_();
+  const long buffer_size = this->_input_buffer.size();
+  this->_block_vals[0].resize(1, buffer_size);
+  for (long i = 1; i < this->_block_vals.size(); i++)
+    this->_block_vals[i].resize(this->_blocks[i - 1].get_out_channels(),
+                                buffer_size);
+}
+
+void convnet::ConvNet::_rewind_buffers_() {
+  // Need to rewind the block vals first because Buffer::rewind_buffers()
+  // resets the offset index
+  // The last _block_vals is the output of the last block and doesn't need to be
+  // rewound.
+  for (long k = 0; k < this->_block_vals.size() - 1; k++) {
+    // We actually don't need to pull back a lot...just as far as the first
+    // input sample would grab from dilation
+    const long _dilation = this->_blocks[k].conv.get_dilation();
+    for (long i = this->_receptive_field - _dilation,
+              j = this->_input_buffer_offset - _dilation;
+         j < this->_input_buffer_offset; i++, j++)
+      for (long r = 0; r < this->_block_vals[k].rows(); r++)
+        this->_block_vals[k](r, i) = this->_block_vals[k](r, j);
+  }
+  // Now we can do the rest of the rewind
+  this->Buffer::_rewind_buffers_();
+}
+
+void convnet::ConvNet::_anti_pop_() {
+  if (this->_anti_pop_countdown >= this->_anti_pop_ramp)
+    return;
+  const float slope = 1.0f / float(this->_anti_pop_ramp);
+  for (int i = 0; i < this->_core_dsp_output.size(); i++) {
+    if (this->_anti_pop_countdown >= this->_anti_pop_ramp)
+      break;
+    const float gain =
+        std::max(slope * float(this->_anti_pop_countdown), float(0.0));
+    this->_core_dsp_output[i] *= gain;
+    this->_anti_pop_countdown++;
+  }
+}
+
+void convnet::ConvNet::_reset_anti_pop_() {
+  // You need the "real" receptive field, not the buffers.
+  long receptive_field = 1;
+  for (int i = 0; i < this->_blocks.size(); i++)
+    receptive_field += this->_blocks[i].conv.get_dilation();
+  this->_anti_pop_countdown = -receptive_field;
+}
+
+// ============================================================================
+// Implementation of Version 2 interface
+
+dsp::DSP::DSP() : mOutputPointers(nullptr), mOutputPointersSize(0) {}
+
+dsp::DSP::~DSP() { this->_DeallocateOutputPointers(); };
+
+void dsp::DSP::_AllocateOutputPointers(const size_t numChannels) {
+  if (this->mOutputPointers != nullptr)
+    throw std::runtime_error(
+        "Tried to re-allocate over non-null mOutputPointers");
+  this->mOutputPointers = new float *[numChannels];
+  if (this->mOutputPointers == nullptr)
+    throw std::runtime_error("Failed to allocate pointer to output buffer!\n");
+  this->mOutputPointersSize = numChannels;
+}
+
+void dsp::DSP::_DeallocateOutputPointers() {
+  if (this->mOutputPointers != nullptr) {
+    delete[] this->mOutputPointers;
+    this->mOutputPointers = nullptr;
+  }
+  if (this->mOutputPointers != nullptr)
+    throw std::runtime_error("Failed to deallocate output pointer!");
+  this->mOutputPointersSize = 0;
+}
+
+float **dsp::DSP::_GetPointers() {
+  for (auto c = 0; c < this->_GetNumChannels(); c++)
+    this->mOutputPointers[c] = this->mOutputs[c].data();
+  return this->mOutputPointers;
+}
+
+void dsp::DSP::_PrepareBuffers(const size_t numChannels,
+                               const size_t numFrames) {
+  const size_t oldFrames = this->_GetNumFrames();
+  const size_t oldChannels = this->_GetNumChannels();
+
+  const bool resizeChannels = oldChannels != numChannels;
+  const bool resizeFrames = resizeChannels || (oldFrames != numFrames);
+  if (resizeChannels) {
+    this->mOutputs.resize(numChannels);
+    this->_ResizePointers(numChannels);
+  }
+  if (resizeFrames)
+    for (auto c = 0; c < numChannels; c++)
+      this->mOutputs[c].resize(numFrames);
+}
+
+void dsp::DSP::_ResizePointers(const size_t numChannels) {
+  if (this->mOutputPointersSize == numChannels)
+    return;
+  this->_DeallocateOutputPointers();
+  this->_AllocateOutputPointers(numChannels);
+}
+
+dsp::History::History() : DSP(), mHistoryRequired(0), mHistoryIndex(0) {}
+
+void dsp::History::_AdvanceHistoryIndex(const size_t bufferSize) {
+  this->mHistoryIndex += bufferSize;
+}
+
+void dsp::History::_EnsureHistorySize(const size_t bufferSize) {
+  const size_t repeatSize = std::max(bufferSize, this->mHistoryRequired);
+  const size_t requiredHistoryArraySize =
+      10 * repeatSize; // Just so we don't spend too much time copying back.
+  if (this->mHistory.size() < requiredHistoryArraySize) {
+    this->mHistory.resize(requiredHistoryArraySize);
+    std::fill(this->mHistory.begin(), this->mHistory.end(), 0.0f);
+    this->mHistoryIndex = this->mHistoryRequired; // Guaranteed to be less than
+                                                  // requiredHistoryArraySize
+  }
+}
+
+void dsp::History::_RewindHistory() {
+  // TODO memcpy?  Should be fine w/ history array being >2x the history length.
+  for (size_t i = 0, j = this->mHistoryIndex - this->mHistoryRequired;
+       i < this->mHistoryRequired; i++, j++)
+    this->mHistory[i] = this->mHistory[j];
+  this->mHistoryIndex = this->mHistoryRequired;
+}
+
+void dsp::History::_UpdateHistory(float **inputs,
+                                  const size_t numChannels,
+                                  const size_t numFrames) {
+  this->_EnsureHistorySize(numFrames);
+  if (numChannels < 1)
+    throw std::runtime_error("Zero channels?");
+  if (this->mHistoryIndex + numFrames >= this->mHistory.size())
+    this->_RewindHistory();
+  // Grabs channel 1, drops hannel 2.
+  for (size_t i = 0, j = this->mHistoryIndex; i < numFrames; i++, j++)
+    // Convert down to float here.
+    this->mHistory[j] = (float)inputs[0][i];
+}
@@ -0,0 +1,400 @@
+#pragma once
+
+#include <filesystem>
+#include <iterator>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include <Eigen/Dense>
+
+enum EArchitectures {
+  kLinear = 0,
+  kConvNet,
+  kLSTM,
+  kCatLSTM,
+  kWaveNet,
+  kCatWaveNet,
+  kNumModels
+};
+
+#define NAMSample float
+
+// Class for providing params from the plugin to the DSP module
+// For now, we'll work with doubles. Later, we'll add other types.
+class DSPParam {
+public:
+  const char *name;
+  const double val;
+};
+// And the params shall be provided as a std::vector<DSPParam>.
+
+class DSP {
+public:
+  DSP();
+  // process() does all of the processing requried to take `inputs` array and
+  // fill in the required values on `outputs`.
+  // To do this:
+  // 1. The parameters from the plugin (I/O levels and any other parametric
+  //    inputs) are gotten.
+  // 2. The input level is applied
+  // 3. The core DSP algorithm is run (This is what should probably be
+  //    overridden in subclasses).
+  // 4. The output level is applied and the result stored to `output`.
+  virtual void process(const NAMSample *input, NAMSample *output,
+                       const int num_channels, const int num_frames,
+                       const double input_gain, const double output_gain,
+                       const std::unordered_map<std::string, double> &params);
+  // Anything to take care of before next buffer comes in.
+  // For example:
+  // * Move the buffer index forward
+  // * Does NOT say that params aren't stale; that's the job of the routine
+  //   that actually uses them, which varies depends on the particulars of the
+  //   DSP subclass implementation.
+  virtual void finalize_(const int num_frames);
+
+protected:
+  // Parameters (aka "knobs")
+  std::unordered_map<std::string, double> _params;
+  // If the params have changed since the last buffer was processed:
+  bool _stale_params;
+  // Where to store the samples after applying input gain
+  std::vector<float> _input_post_gain;
+  // Location for the output of the core DSP algorithm.
+  std::vector<float> _core_dsp_output;
+
+  // Methods
+
+  // Copy the parameters to the DSP module.
+  // If anything has changed, then set this->_stale_params to true.
+  // (TODO use "listener" approach)
+  void
+  _get_params_(const std::unordered_map<std::string, double> &input_params);
+
+  // Apply the input gain
+  // Result populates this->_input_post_gain
+  void _apply_input_level_(const NAMSample *input, const int num_channels,
+                           const int num_frames, const double gain);
+
+  // i.e. ensure the size is correct.
+  void _ensure_core_dsp_output_ready_();
+
+  // The core of your DSP algorithm.
+  // Access the inputs in this->_input_post_gain
+  // Place the outputs in this->_core_dsp_output
+  virtual void _process_core_();
+
+  // Copy this->_core_dsp_output to output and apply the output volume
+  void _apply_output_level_(NAMSample *output, const int num_channels,
+                            const int num_frames, const double gain);
+};
+
+// Class where an input buffer is kept so that long-time effects can be
+// captured. (e.g. conv nets or impulse responses, where we need history that's
+// longer than the sample buffer that's coming in.)
+class Buffer : public DSP {
+public:
+  Buffer(const int receptive_field);
+  void finalize_(const int num_frames);
+
+protected:
+  // Input buffer
+  const int _input_buffer_channels = 1; // Mono
+  int _receptive_field;
+  // First location where we add new samples from the input
+  long _input_buffer_offset;
+  std::vector<float> _input_buffer;
+  std::vector<float> _output_buffer;
+
+  void _set_receptive_field(const int new_receptive_field,
+                            const int input_buffer_size);
+  void _set_receptive_field(const int new_receptive_field);
+  void _reset_input_buffer();
+  // Use this->_input_post_gain
+  virtual void _update_buffers_();
+  virtual void _rewind_buffers_();
+};
+
+// Basic linear model (an IR!)
+class Linear : public Buffer {
+public:
+  Linear(const int receptive_field, const bool _bias,
+         const std::vector<float> &params);
+  void _process_core_() override;
+
+protected:
+  Eigen::VectorXf _weight;
+  float _bias;
+};
+
+// NN modules =================================================================
+
+// Activations
+
+// In-place ReLU on (N,M) array
+void relu_(Eigen::MatrixXf &x, const long i_start, const long i_end,
+           const long j_start, const long j_end);
+// Subset of the columns
+void relu_(Eigen::MatrixXf &x, const long j_start, const long j_end);
+void relu_(Eigen::MatrixXf &x);
+
+// In-place sigmoid
+void sigmoid_(Eigen::MatrixXf &x, const long i_start, const long i_end,
+              const long j_start, const long j_end);
+void sigmoid_(Eigen::MatrixXf &x);
+
+// In-place Tanh on (N,M) array
+void tanh_(Eigen::MatrixXf& x);
+
+void tanh_(Eigen::MatrixXf &x, const long i_start, const long i_end,
+           const long j_start, const long j_end);
+// Subset of the columns
+void tanh_cols_(Eigen::MatrixXf &x, const long j_start, const long j_end);
+
+class Conv1D {
+public:
+  Conv1D() { this->_dilation = 1; };
+  void set_params_(std::vector<float>::iterator &params);
+  void set_size_(const int in_channels, const int out_channels,
+                 const int kernel_size, const bool do_bias,
+                 const int _dilation);
+  void set_size_and_params_(const int in_channels, const int out_channels,
+                            const int kernel_size, const int _dilation,
+                            const bool do_bias,
+                            std::vector<float>::iterator &params);
+  // Process from input to output
+  //  Rightmost indices of input go from i_start to i_end,
+  //  Indices on output for from j_start (to j_start + i_end - i_start)
+  void process_(const Eigen::MatrixXf &input, Eigen::MatrixXf &output,
+                const long i_start, const long i_end, const long j_start) const;
+  long get_in_channels() const {
+    return this->_weight.size() > 0 ? this->_weight[0].cols() : 0;
+  };
+  long get_kernel_size() const { return this->_weight.size(); };
+  long get_num_params() const;
+  long get_out_channels() const {
+    return this->_weight.size() > 0 ? this->_weight[0].rows() : 0;
+  };
+  int get_dilation() const { return this->_dilation; };
+
+private:
+  // Gonna wing this...
+  // conv[kernel](cout, cin)
+  std::vector<Eigen::MatrixXf> _weight;
+  Eigen::VectorXf _bias;
+  int _dilation;
+};
+
+// Really just a linear layer
+class Conv1x1 {
+public:
+  Conv1x1(const int in_channels, const int out_channels, const bool _bias);
+  void set_params_(std::vector<float>::iterator &params);
+  // :param input: (N,Cin) or (Cin,)
+  // :return: (N,Cout) or (Cout,), respectively
+  Eigen::MatrixXf process(const Eigen::MatrixXf &input) const;
+
+  long get_out_channels() const { return this->_weight.rows(); };
+
+private:
+  Eigen::MatrixXf _weight;
+  Eigen::VectorXf _bias;
+  bool _do_bias;
+};
+
+// ConvNet ====================================================================
+
+namespace convnet {
+// Custom Conv that avoids re-computing on pieces of the input and trusts
+// that the corresponding outputs are where they need to be.
+// Beware: this is clever!
+
+// Batch normalization
+// In prod mode, so really just an elementwise affine layer.
+class BatchNorm {
+public:
+  BatchNorm(){};
+  BatchNorm(const int dim, std::vector<float>::iterator &params);
+  void process_(Eigen::MatrixXf &input, const long i_start,
+                const long i_end) const;
+
+private:
+  // TODO simplify to just ax+b
+  // y = (x-m)/sqrt(v+eps) * w + bias
+  // y = ax+b
+  // a = w / sqrt(v+eps)
+  // b = a * m + bias
+  Eigen::VectorXf scale;
+  Eigen::VectorXf loc;
+};
+
+class ConvNetBlock {
+public:
+  ConvNetBlock() { this->_batchnorm = false; };
+  void set_params_(const int in_channels, const int out_channels,
+                   const int _dilation, const bool batchnorm,
+                   const std::string activation,
+                   std::vector<float>::iterator &params);
+  void process_(const Eigen::MatrixXf &input, Eigen::MatrixXf &output,
+                const long i_start, const long i_end) const;
+  long get_out_channels() const;
+  Conv1D conv;
+
+private:
+  BatchNorm batchnorm;
+  bool _batchnorm;
+  std::string activation;
+};
+
+class _Head {
+public:
+  _Head() { this->_bias = (float)0.0; };
+  _Head(const int channels, std::vector<float>::iterator &params);
+  void process_(const Eigen::MatrixXf &input, Eigen::VectorXf &output,
+                const long i_start, const long i_end) const;
+
+private:
+  Eigen::VectorXf _weight;
+  float _bias;
+};
+
+class ConvNet : public Buffer {
+public:
+  ConvNet(const int channels, const std::vector<int> &dilations,
+          const bool batchnorm, const std::string activation,
+          std::vector<float> &params);
+
+protected:
+  std::vector<ConvNetBlock> _blocks;
+  std::vector<Eigen::MatrixXf> _block_vals;
+  Eigen::VectorXf _head_output;
+  _Head _head;
+  void _verify_params(const int channels, const std::vector<int> &dilations,
+                      const bool batchnorm, const size_t actual_params);
+  void _update_buffers_() override;
+  void _rewind_buffers_() override;
+
+  void _process_core_() override;
+
+  // The net starts with random parameters inside; we need to wait for a full
+  // receptive field to pass through before we can count on the output being
+  // ok. This implements a gentle "ramp-up" so that there's no "pop" at the
+  // start.
+  long _anti_pop_countdown;
+  const long _anti_pop_ramp = 100;
+  void _anti_pop_();
+  void _reset_anti_pop_();
+};
+}; // namespace convnet
+
+// Utilities ==================================================================
+// Implemented in get_dsp.cpp
+
+// Verify that the config that we are building our model from is supported by
+// this plugin version.
+void verify_config_version(const std::string version);
+
+// Takes the model file and uses it to instantiate an instance of DSP.
+std::unique_ptr<DSP> get_dsp(const std::filesystem::path model_file);
+// Legacy loader for directory-type DSPs
+std::unique_ptr<DSP> get_dsp_legacy(const std::filesystem::path dirname);
+
+// Hard-coded model:
+std::unique_ptr<DSP> get_hard_dsp();
+
+// Version 2 DSP abstraction ==================================================
+
+namespace dsp {
+class Params {};
+
+class DSP {
+public:
+  DSP();
+  ~DSP();
+  // The main interface for processing audio.
+  // The incoming audio is given as a raw pointer-to-pointers.
+  // The indexing is [channel][frame].
+  // The output shall be a pointer-to-pointers of matching size.
+  // This object instance will own the data referenced by the pointers and be
+  // responsible for its allocation and deallocation.
+  virtual float **Process(float **inputs,
+                                  const size_t numChannels,
+                                  const size_t numFrames) = 0;
+  // Update the parameters of the DSP object according to the provided params.
+  // Not declaring a pure virtual bc there's no concrete definition that can
+  // use Params.
+  // But, use this name :)
+  // virtual void SetParams(Params* params) = 0;
+
+protected:
+  // Methods
+
+  // Allocate mOutputPointers.
+  // Assumes it's already null (Use _DeallocateOutputPointers()).
+  void _AllocateOutputPointers(const size_t numChannels);
+  // Ensure mOutputPointers is freed.
+  void _DeallocateOutputPointers();
+
+  size_t _GetNumChannels() const { return this->mOutputs.size(); };
+  size_t _GetNumFrames() const {
+    return this->_GetNumChannels() > 0 ? this->mOutputs[0].size() : 0;
+  }
+  // Return a pointer-to-pointers for the DSP's output buffers (all channels)
+  // Assumes that ._PrepareBuffers()  was called recently enough.
+  float **_GetPointers();
+  // Resize mOutputs to (numChannels, numFrames) and ensure that the raw
+  // pointers are also keeping up.
+  virtual void _PrepareBuffers(const size_t numChannels,
+                               const size_t numFrames);
+  // Resize the pointer-to-pointers for the vector-of-vectors.
+  void _ResizePointers(const size_t numChannels);
+
+  // Attributes
+
+  // The output array into which the DSP module's calculations will be written.
+  // Pointers to this member's data will be returned by .Process(), and std
+  // Will ensure proper allocation.
+  std::vector<std::vector<float>> mOutputs;
+  // A pointer to pointers of which copies will be given out as the output of
+  // .Process(). This object will ensure proper allocation and deallocation of
+  // the first level; The second level points to .data() from mOutputs.
+  float **mOutputPointers;
+  size_t mOutputPointersSize;
+};
+
+// A class where a longer buffer of history is needed to correctly calculate
+// the DSP algorithm (e.g. algorithms involving convolution).
+//
+// Hacky stuff:
+// * Mono
+// * Single-precision floats.
+class History : public DSP {
+public:
+  History();
+
+protected:
+  // Called at the end of the DSP, advance the hsitory index to the next open
+  // spot.  Does not ensure that it's at a valid address.
+  void _AdvanceHistoryIndex(const size_t bufferSize);
+  // Drop the new samples into the history array.
+  // Manages history array size
+  void _UpdateHistory(float **inputs, const size_t numChannels,
+                      const size_t numFrames);
+
+  // The history array that's used for DSP calculations.
+  std::vector<float> mHistory;
+  // How many samples previous are required.
+  // Zero means that no history is required--only the current sample.
+  size_t mHistoryRequired;
+  // Location of the first sample in the current buffer.
+  // Shall always be in the range [mHistoryRequired, mHistory.size()).
+  size_t mHistoryIndex;
+
+private:
+  // Make sure that the history array is long enough.
+  void _EnsureHistorySize(const size_t bufferSize);
+  // Copy the end of the history back to the fron and reset mHistoryIndex
+  void _RewindHistory();
+};
+}; // namespace dsp
@@ -0,0 +1,117 @@
+#include <fstream>
+#include <unordered_set>
+
+#include "json.hpp"
+#include "dsp.h"
+//#include "HardCodedModel.h"
+//#include "lstm.h"
+#include "wavenet.h"
+
+void verify_config_version(const std::string version) {
+  const std::unordered_set<std::string> supported_versions({"0.5.0"});
+  if (supported_versions.find(version) == supported_versions.end()) {
+    std::stringstream ss;
+    ss << "Model config is an unsupported version " << version
+       << ". Try either converting the model to a more recent version, or "
+          "update your version of the NAM plugin.";
+    throw std::runtime_error(ss.str());
+  }
+}
+
+std::vector<float> _get_weights(nlohmann::json const &j,
+                                const std::filesystem::path config_path) {
+  if (j.find("weights") != j.end()) {
+    auto weight_list = j["weights"];
+    std::vector<float> weights;
+    for (auto it = weight_list.begin(); it != weight_list.end(); ++it)
+      weights.push_back(*it);
+    return weights;
+  } else
+    throw std::runtime_error("Corrupted model file is missing weights.");
+}
+
+std::unique_ptr<DSP> get_dsp_legacy(const std::filesystem::path model_dir) {
+  auto config_filename = model_dir / std::filesystem::path("config.json");
+  return get_dsp(config_filename);
+}
+
+std::unique_ptr<DSP> get_dsp(const std::filesystem::path config_filename) {
+  if (!std::filesystem::exists(config_filename))
+    throw std::runtime_error("Config JSON doesn't exist!\n");
+  std::ifstream i(config_filename);
+  nlohmann::json j;
+  i >> j;
+  verify_config_version(j["version"]);
+
+  auto architecture = j["architecture"];
+  nlohmann::json config = j["config"];
+  std::vector<float> params = _get_weights(j, config_filename);
+
+  //if (architecture == "Linear") {
+  //  const int receptive_field = config["receptive_field"];
+  //  const bool _bias = config["bias"];
+  //  return std::make_unique<Linear>(receptive_field, _bias, params);
+  //} else if (architecture == "ConvNet") {
+  //  const int channels = config["channels"];
+  //  const bool batchnorm = config["batchnorm"];
+  //  std::vector<int> dilations;
+  //  for (int i = 0; i < config["dilations"].size(); i++)
+  //    dilations.push_back(config["dilations"][i]);
+  //  const std::string activation = config["activation"];
+  //  return std::make_unique<convnet::ConvNet>(channels, dilations, batchnorm,
+  //                                            activation, params);
+  //} else if (architecture == "LSTM") {
+  //  const int num_layers = config["num_layers"];
+  //  const int input_size = config["input_size"];
+  //  const int hidden_size = config["hidden_size"];
+  //  auto json = nlohmann::json{};
+  //  return std::make_unique<lstm::LSTM>(num_layers, input_size, hidden_size,
+  //                                      params, json);
+  //} else if (architecture == "CatLSTM") {
+  //  const int num_layers = config["num_layers"];
+  //  const int input_size = config["input_size"];
+  //  const int hidden_size = config["hidden_size"];
+  //  return std::make_unique<lstm::LSTM>(num_layers, input_size, hidden_size,
+  //                                      params, config["parametric"]);
+  //} else
+  
+  if (architecture == "WaveNet" || architecture == "CatWaveNet") {
+    std::vector<wavenet::LayerArrayParams> layer_array_params;
+    for (int i = 0; i < config["layers"].size(); i++) {
+      nlohmann::json layer_config = config["layers"][i];
+      std::vector<int> dilations;
+      for (int j = 0; j < layer_config["dilations"].size(); j++)
+        dilations.push_back(layer_config["dilations"][j]);
+      layer_array_params.push_back(wavenet::LayerArrayParams(
+          layer_config["input_size"], layer_config["condition_size"],
+          layer_config["head_size"], layer_config["channels"],
+          layer_config["kernel_size"], dilations, layer_config["activation"],
+          layer_config["gated"], layer_config["head_bias"]));
+    }
+    const bool with_head = config["head"] == NULL;
+    const float head_scale = config["head_scale"];
+    // Solves compilation issue on macOS Error: No matching constructor for
+    // initialization of 'wavenet::WaveNet' Solution from
+    // https://stackoverflow.com/a/73956681/3768284
+    auto parametric_json =
+        architecture == "CatWaveNet" ? config["parametric"] : nlohmann::json{};
+    return std::make_unique<wavenet::WaveNet>(
+        layer_array_params, head_scale, with_head, parametric_json, params);
+  } else {
+    throw std::runtime_error("Unrecognized architecture");
+  }
+}
+
+//std::unique_ptr<DSP> get_hard_dsp() {
+//  // Values are defined in HardCodedModel.h
+//  verify_config_version(std::string(PYTHON_MODEL_VERSION));
+//
+//  // Uncomment the line that corresponds to the model type that you're using.
+//
+//  // return std::make_unique<convnet::ConvNet>(CHANNELS, DILATIONS, BATCHNORM,
+//  // ACTIVATION, PARAMS); return
+//  // std::make_unique<wavenet::WaveNet>(LAYER_ARRAY_PARAMS, HEAD_SCALE,
+//  // WITH_HEAD, PARAMETRIC, PARAMS);
+//  return std::make_unique<lstm::LSTM>(NUM_LAYERS, INPUT_SIZE, HIDDEN_SIZE,
+//                                      PARAMS, PARAMETRIC);
+//}
@@ -0,0 +1,103 @@
+#include <cfenv>
+#include <cstddef>
+#include <cstdint>
+#include <string>
+#include <memory>
+
+// LV2
+#include <lv2/core/lv2.h>
+#include <lv2/urid/urid.h>
+#include <lv2/log/log.h>
+#include <lv2/log/logger.h>
+
+#include "nam_plugin.hpp"
+
+#ifdef FORCE_DISABLE_DENORMALS
+	#include "architecture.hpp"
+#endif
+
+// LV2 Functions
+static LV2_Handle instantiate(
+	const LV2_Descriptor*,
+	double rate,
+	const char*,
+	const LV2_Feature* const* features
+) {
+	LV2_URID_Map* map = nullptr;
+	LV2_Log_Logger logger = {};
+
+	for (size_t i = 0; features[i]; ++i) {
+		if (std::string(features[i]->URI) == std::string(LV2_URID__map))
+			map = static_cast<LV2_URID_Map*>(features[i]->data);
+		else if (std::string(features[i]->URI) == std::string(LV2_LOG__log))
+			logger.log = static_cast<LV2_Log_Log*>(features[i]->data);
+	}
+
+	lv2_log_logger_set_map(&logger, map);
+
+	if (!map) {
+		lv2_log_error(&logger, "Missing required feature: `%s`", LV2_URID__map);
+		return nullptr;
+	}
+
+	try {
+		auto nam = std::make_unique<NAM::Plugin>(static_cast<float>(rate));
+		nam->map_uris(map);
+		return static_cast<LV2_Handle>(nam.release());
+	} catch(const std::exception& e) {
+		lv2_log_error(&logger, "Failed to instantiate plugin: %s", e.what());
+		return nullptr;
+	}
+}
+
+static void connect_port(LV2_Handle instance, uint32_t port, void* data) {
+	auto nam = static_cast<NAM::Plugin*>(instance);
+	constexpr uint32_t misc_port_cnt = sizeof(nam->ports)/sizeof(void*);
+	if (port >= misc_port_cnt)
+	{
+	}
+	else
+		*(reinterpret_cast<void**>(&nam->ports)+port) = data;
+}
+
+static void activate(LV2_Handle) {}
+
+static void run(LV2_Handle instance, uint32_t n_samples) {
+
+	#ifdef FORCE_DISABLE_DENORMALS
+		std::fenv_t fe_state;
+		std::feholdexcept(&fe_state);
+
+		disable_denormals();
+	#endif
+
+	static_cast<NAM::Plugin*>(instance)->process(n_samples);
+
+	#ifdef FORCE_DISABLE_DENORMALS
+		// restore previous floating point state
+		std::feupdateenv(&fe_state);
+	#endif
+}
+
+static void deactivate(LV2_Handle) {}
+
+static void cleanup(LV2_Handle instance) {
+	delete static_cast<NAM::Plugin*>(instance);
+}
+
+static const void* extension_data(const char*) { return nullptr; }
+
+static const LV2_Descriptor descriptor = {
+	NAM::Plugin::URI.data(),
+	instantiate,
+	connect_port,
+	activate,
+	run,
+	deactivate,
+	cleanup,
+	extension_data
+};
+
+LV2_SYMBOL_EXPORT const LV2_Descriptor* lv2_descriptor(uint32_t index) {
+	return index == 0 ? &descriptor : nullptr;
+}
@@ -0,0 +1,34 @@
+#include <algorithm>
+#include <cmath>
+#include <utility>
+
+// Lv2
+#include <lv2/atom/util.h>
+
+#include "nam_plugin.hpp"
+
+namespace NAM {
+	Plugin::Plugin(float rate)
+	{
+		namModel = get_dsp("C:\\Users\\oliph\\AppData\\Roaming\\GuitarSim\\NAM\\JCM2000Crunch.nam");
+	}
+
+	void Plugin::map_uris(LV2_URID_Map* map) noexcept {
+		lv2_atom_forge_init(&atom_forge, map);
+		uris.atom_Object = map->map(map->handle, LV2_ATOM__Object);
+		uris.atom_Float = map->map(map->handle, LV2_ATOM__Float);
+	}
+
+	void Plugin::process(uint32_t n_samples) noexcept {
+		if (ports.control) {
+			LV2_ATOM_SEQUENCE_FOREACH(ports.control, event) {
+				if (event->body.type == uris.atom_Object) {
+					const auto obj = reinterpret_cast<LV2_Atom_Object*>(&event->body);
+				}
+			}
+		}
+
+		namModel->process(ports.audio_in, ports.audio_out, 1, n_samples, 1.0, 1.0, mNAMParams);
+		namModel->finalize_(n_samples);
+	}
+}
@@ -0,0 +1,61 @@
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <cstdint>
+#include <random>
+#include <string_view>
+
+// LV2
+#include <lv2/atom/atom.h>
+#include <lv2/urid/urid.h>
+#include <lv2/atom/forge.h>
+
+#include "dsp.h"
+
+namespace NAM {
+
+	class Plugin {
+	public:
+		static constexpr std::string_view URI = "http://github.com/mikeoliphant/neural-amp-modeler-lv2";
+
+		struct Ports {
+			const LV2_Atom_Sequence* control;
+			LV2_Atom_Sequence* notify;
+			const float* audio_in;
+			float* audio_out;
+		};
+
+		Ports ports = {};
+
+		std::unique_ptr<::DSP> namModel;
+
+		std::unordered_map<std::string, double> mNAMParams =
+		{
+			{"Input", 0.0},
+			{"Output", 0.0}
+		};
+
+		/*
+			Member Functions
+		*/
+
+		Plugin(float rate);
+		~Plugin() = default;
+
+		void map_uris(LV2_URID_Map* map) noexcept;
+
+		void process(uint32_t n_samples) noexcept;
+
+	private:
+		struct URIs {
+			LV2_URID atom_Object;
+			LV2_URID atom_Float;
+		};
+
+		URIs uris = {};
+		LV2_Atom_Forge atom_forge = {};
+
+		float m_rate;
+	};
+}
@@ -0,0 +1,11 @@
+#include <algorithm>
+#include <cctype>
+
+#include "util.h"
+
+std::string util::lowercase(const std::string &s) {
+  std::string out(s);
+  std::transform(s.begin(), s.end(), out.begin(),
+                 [](unsigned char c) { return std::tolower(c); });
+  return out;
+}
@@ -0,0 +1,9 @@
+#pragma once
+
+// Utilities
+
+#include <string>
+
+namespace util {
+std::string lowercase(const std::string &s);
+}; // namespace util
@@ -0,0 +1,400 @@
+#include <algorithm>
+#include <iostream>
+#include <math.h>
+
+#include <Eigen/Dense>
+
+#include "wavenet.h"
+
+wavenet::_DilatedConv::_DilatedConv(const int in_channels,
+                                    const int out_channels,
+                                    const int kernel_size, const int bias,
+                                    const int dilation) {
+  this->set_size_(in_channels, out_channels, kernel_size, bias, dilation);
+}
+
+void wavenet::_Layer::set_params_(std::vector<float>::iterator &params) {
+  this->_conv.set_params_(params);
+  this->_input_mixin.set_params_(params);
+  this->_1x1.set_params_(params);
+}
+
+void wavenet::_Layer::process_(const Eigen::MatrixXf &input,
+                               const Eigen::MatrixXf &condition,
+                               Eigen::MatrixXf &head_input,
+                               Eigen::MatrixXf &output, const long i_start,
+                               const long j_start) {
+  const long ncols = condition.cols();
+  const long channels = this->get_channels();
+  // Input dilated conv
+  this->_conv.process_(input, this->_z, i_start, ncols, 0);
+
+  // Mix-in condition
+  this->_z.noalias() += this->_input_mixin.process(condition);
+  if (this->_activation == "Tanh")
+      tanh_(this->_z);
+  else if (this->_activation == "ReLU")
+    relu_(this->_z, 0, channels, 0, this->_z.cols());
+  else
+    throw std::runtime_error("Unrecognized activation.");
+  if (this->_gated) {
+    sigmoid_(this->_z, channels, 2 * channels, 0, this->_z.cols());
+
+    this->_z.topRows(channels).array() *= this->_z.bottomRows(channels).array();
+
+    // this->_z.topRows(channels) = this->_z.topRows(channels).cwiseProduct(
+    //   this->_z.bottomRows(channels)
+    // );
+  }
+
+  head_input.noalias() += this->_z.topRows(channels);
+  output.middleCols(j_start, ncols).noalias() =
+      input.middleCols(i_start, ncols) +
+      this->_1x1.process(this->_z.topRows(channels));
+}
+
+void wavenet::_Layer::set_num_frames_(const long num_frames) {
+  this->_z.resize(this->_conv.get_out_channels(), num_frames);
+}
+
+// LayerArray =================================================================
+
+#define LAYER_ARRAY_BUFFER_SIZE 65536
+
+wavenet::_LayerArray::_LayerArray(const int input_size,
+                                  const int condition_size, const int head_size,
+                                  const int channels, const int kernel_size,
+                                  const std::vector<int> &dilations,
+                                  const std::string activation,
+                                  const bool gated, const bool head_bias)
+    : _rechannel(input_size, channels, false),
+      _head_rechannel(channels, head_size, head_bias) {
+  for (int i = 0; i < dilations.size(); i++)
+    this->_layers.push_back(_Layer(condition_size, channels, kernel_size,
+                                   dilations[i], activation, gated));
+  const long receptive_field = this->_get_receptive_field();
+  for (int i = 0; i < dilations.size(); i++) {
+    this->_layer_buffers.push_back(Eigen::MatrixXf(
+        channels, LAYER_ARRAY_BUFFER_SIZE + receptive_field - 1));
+    this->_layer_buffers[i].setZero();
+  }
+  this->_buffer_start = this->_get_receptive_field() - 1;
+}
+
+void wavenet::_LayerArray::advance_buffers_(const int num_frames) {
+  this->_buffer_start += num_frames;
+}
+
+long wavenet::_LayerArray::get_receptive_field() const {
+  long result = 0;
+  for (int i = 0; i < this->_layers.size(); i++)
+    result += this->_layers[i].get_dilation() *
+              (this->_layers[i].get_kernel_size() - 1);
+  return result;
+}
+
+void wavenet::_LayerArray::prepare_for_frames_(const long num_frames) {
+  // Example:
+  // _buffer_start = 0
+  // num_frames = 64
+  // buffer_size = 64
+  // -> this will write on indices 0 through 63, inclusive.
+  // -> No illegal writes.
+  // -> no rewind needed.
+  if (this->_buffer_start + num_frames > this->_get_buffer_size())
+    this->_rewind_buffers_();
+}
+
+void wavenet::_LayerArray::process_(const Eigen::MatrixXf &layer_inputs,
+                                    const Eigen::MatrixXf &condition,
+                                    Eigen::MatrixXf &head_inputs,
+                                    Eigen::MatrixXf &layer_outputs,
+                                    Eigen::MatrixXf &head_outputs) {
+  this->_layer_buffers[0].middleCols(this->_buffer_start, layer_inputs.cols()) =
+      this->_rechannel.process(layer_inputs);
+  const long last_layer = this->_layers.size() - 1;
+  for (auto i = 0; i < this->_layers.size(); i++) {
+    this->_layers[i].process_(
+        this->_layer_buffers[i], condition, head_inputs,
+        i == last_layer ? layer_outputs : this->_layer_buffers[i + 1],
+        this->_buffer_start, i == last_layer ? 0 : this->_buffer_start);
+  }
+  head_outputs = this->_head_rechannel.process(head_inputs);
+}
+
+void wavenet::_LayerArray::set_num_frames_(const long num_frames) {
+  // Wavenet checks for unchanged num_frames; if we made it here, there's
+  // something to do.
+  if (LAYER_ARRAY_BUFFER_SIZE - num_frames < this->_get_receptive_field()) {
+    std::stringstream ss;
+    ss << "Asked to accept a buffer of " << num_frames
+       << " samples, but the buffer is too short (" << LAYER_ARRAY_BUFFER_SIZE
+       << ") to get out of the recptive field (" << this->_get_receptive_field()
+       << "); copy errors could occur!\n";
+    throw std::runtime_error(ss.str().c_str());
+  }
+  for (int i = 0; i < this->_layers.size(); i++)
+    this->_layers[i].set_num_frames_(num_frames);
+}
+
+void wavenet::_LayerArray::set_params_(std::vector<float>::iterator &params) {
+  this->_rechannel.set_params_(params);
+  for (int i = 0; i < this->_layers.size(); i++)
+    this->_layers[i].set_params_(params);
+  this->_head_rechannel.set_params_(params);
+}
+
+long wavenet::_LayerArray::_get_channels() const {
+  return this->_layers.size() > 0 ? this->_layers[0].get_channels() : 0;
+}
+
+long wavenet::_LayerArray::_get_receptive_field() const {
+  // TODO remove this and use get_receptive_field() instead!
+  long res = 1;
+  for (int i = 0; i < this->_layers.size(); i++)
+    res += (this->_layers[i].get_kernel_size() - 1) *
+           this->_layers[i].get_dilation();
+  return res;
+}
+
+void wavenet::_LayerArray::_rewind_buffers_()
+// Consider wrapping instead...
+// Can make this smaller--largest dilation, not receptive field!
+{
+  const long start = this->_get_receptive_field() - 1;
+  for (int i = 0; i < this->_layer_buffers.size(); i++) {
+    const long d = (this->_layers[i].get_kernel_size() - 1) *
+                   this->_layers[i].get_dilation();
+    this->_layer_buffers[i].middleCols(start - d, d) =
+        this->_layer_buffers[i].middleCols(this->_buffer_start - d, d);
+  }
+  this->_buffer_start = start;
+}
+
+// Head =======================================================================
+
+wavenet::_Head::_Head(const int input_size, const int num_layers,
+                      const int channels, const std::string activation)
+    : _channels(channels), _activation(activation),
+      _head(num_layers > 0 ? channels : input_size, 1, true) {
+  assert(num_layers > 0);
+  int dx = input_size;
+  for (int i = 0; i < num_layers; i++) {
+    this->_layers.push_back(
+        Conv1x1(dx, i == num_layers - 1 ? 1 : channels, true));
+    dx = channels;
+    if (i < num_layers - 1)
+      this->_buffers.push_back(Eigen::MatrixXf());
+  }
+}
+
+void wavenet::_Head::set_params_(std::vector<float>::iterator &params) {
+  for (int i = 0; i < this->_layers.size(); i++)
+    this->_layers[i].set_params_(params);
+}
+
+void wavenet::_Head::process_(Eigen::MatrixXf &inputs,
+                              Eigen::MatrixXf &outputs) {
+  const size_t num_layers = this->_layers.size();
+  this->_apply_activation_(inputs);
+  if (num_layers == 1)
+    outputs = this->_layers[0].process(inputs);
+  else {
+    this->_buffers[0] = this->_layers[0].process(inputs);
+    for (int i = 1; i < num_layers; i++) { // Asserted > 0 layers
+      this->_apply_activation_(this->_buffers[i - 1]);
+      if (i < num_layers - 1)
+        this->_buffers[i] = this->_layers[i].process(this->_buffers[i - 1]);
+      else
+        outputs = this->_layers[i].process(this->_buffers[i - 1]);
+    }
+  }
+}
+
+void wavenet::_Head::set_num_frames_(const long num_frames) {
+  for (int i = 0; i < this->_buffers.size(); i++)
+    this->_buffers[i].resize(this->_channels, num_frames);
+}
+
+void wavenet::_Head::_apply_activation_(Eigen::MatrixXf &x) {
+  if (this->_activation == "Tanh")
+    tanh_(x);
+  else if (this->_activation == "ReLU")
+    relu_(x);
+  else
+    throw std::runtime_error("Unrecognized activation.");
+}
+
+// WaveNet ====================================================================
+
+wavenet::WaveNet::WaveNet(
+    const std::vector<wavenet::LayerArrayParams> &layer_array_params,
+    const float head_scale, const bool with_head, nlohmann::json parametric,
+    std::vector<float> params)
+    : //_head(channels, head_layers, head_channels, head_activation),
+      _num_frames(0), _head_scale(head_scale) {
+  if (with_head)
+    throw std::runtime_error("Head not implemented!");
+  this->_init_parametric_(parametric);
+  for (int i = 0; i < layer_array_params.size(); i++) {
+    this->_layer_arrays.push_back(wavenet::_LayerArray(
+        layer_array_params[i].input_size, layer_array_params[i].condition_size,
+        layer_array_params[i].head_size, layer_array_params[i].channels,
+        layer_array_params[i].kernel_size, layer_array_params[i].dilations,
+        layer_array_params[i].activation, layer_array_params[i].gated,
+        layer_array_params[i].head_bias));
+    this->_layer_array_outputs.push_back(
+        Eigen::MatrixXf(layer_array_params[i].channels, 0));
+    if (i == 0)
+      this->_head_arrays.push_back(
+          Eigen::MatrixXf(layer_array_params[i].channels, 0));
+    if (i > 0)
+      if (layer_array_params[i].channels !=
+          layer_array_params[i - 1].head_size) {
+        std::stringstream ss;
+        ss << "channels of layer " << i << " ("
+           << layer_array_params[i].channels
+           << ") doesn't match head_size of preceding layer ("
+           << layer_array_params[i - 1].head_size << "!\n";
+        throw std::runtime_error(ss.str().c_str());
+      }
+    this->_head_arrays.push_back(
+        Eigen::MatrixXf(layer_array_params[i].head_size, 0));
+  }
+  this->_head_output.resize(1, 0); // Mono output!
+  this->set_params_(params);
+  this->_reset_anti_pop_();
+}
+
+void wavenet::WaveNet::finalize_(const int num_frames) {
+  this->DSP::finalize_(num_frames);
+  this->_advance_buffers_(num_frames);
+}
+
+void wavenet::WaveNet::set_params_(std::vector<float> &params) {
+  std::vector<float>::iterator it = params.begin();
+  for (int i = 0; i < this->_layer_arrays.size(); i++)
+    this->_layer_arrays[i].set_params_(it);
+  // this->_head.set_params_(it);
+  this->_head_scale = *(it++);
+  if (it != params.end()) {
+    std::stringstream ss;
+    for (int i = 0; i < params.size(); i++)
+      if (params[i] == *it) {
+        ss << "Parameter mismatch: assigned " << i + 1 << " parameters, but "
+           << params.size() << " were provided.";
+        throw std::runtime_error(ss.str().c_str());
+      }
+    ss << "Parameter mismatch: provided " << params.size()
+       << " weights, but the model expects more.";
+    throw std::runtime_error(ss.str().c_str());
+  }
+}
+
+void wavenet::WaveNet::_advance_buffers_(const int num_frames) {
+  for (int i = 0; i < this->_layer_arrays.size(); i++)
+    this->_layer_arrays[i].advance_buffers_(num_frames);
+}
+
+void wavenet::WaveNet::_init_parametric_(nlohmann::json &parametric) {
+  for (nlohmann::json::iterator it = parametric.begin(); it != parametric.end();
+       ++it)
+    this->_param_names.push_back(it.key());
+  // TODO assert continuous 0 to 1
+  std::sort(this->_param_names.begin(), this->_param_names.end());
+}
+
+void wavenet::WaveNet::_prepare_for_frames_(const long num_frames) {
+  for (auto i = 0; i < this->_layer_arrays.size(); i++)
+    this->_layer_arrays[i].prepare_for_frames_(num_frames);
+}
+
+void wavenet::WaveNet::_process_core_() {
+  const long num_frames = this->_input_post_gain.size();
+  this->_set_num_frames_(num_frames);
+  this->_prepare_for_frames_(num_frames);
+
+  // NOTE: During warm-up, weird things can happen that NaN out the layers.
+  // We could solve this by anti-popping the *input*. But, it's easier to check
+  // the outputs for NaNs and zero them out.
+  // They'll flush out eventually because the model doesn't use any feedback.
+
+  // Fill into condition array:
+  // Clumsy...
+  for (int j = 0; j < num_frames; j++) {
+    this->_condition(0, j) = this->_input_post_gain[j];
+    if (this->_stale_params) // Column-major assignment; good for Eigen. Let the
+                             // compiler optimize this.
+      for (int i = 0; i < this->_param_names.size(); i++)
+        this->_condition(i + 1, j) =
+            (float)this->_params[this->_param_names[i]];
+  }
+
+  // Main layer arrays:
+  // Layer-to-layer
+  // Sum on head output
+  this->_head_arrays[0].setZero();
+  for (int i = 0; i < this->_layer_arrays.size(); i++)
+    this->_layer_arrays[i].process_(
+        i == 0 ? this->_condition : this->_layer_array_outputs[i - 1],
+        this->_condition, this->_head_arrays[i], this->_layer_array_outputs[i],
+        this->_head_arrays[i + 1]);
+  // this->_head.process_(
+  //   this->_head_input,
+  //   this->_head_output
+  //);
+  //  Copy to required output array
+  //  Hack: apply head scale here; revisit when/if I activate the head.
+  //  assert(this->_head_output.rows() == 1);
+
+  const long final_head_array = this->_head_arrays.size() - 1;
+  assert(this->_head_arrays[final_head_array].rows() == 1);
+  for (int s = 0; s < num_frames; s++) {
+    float out = this->_head_scale * this->_head_arrays[final_head_array](0, s);
+    // This is the NaN check that we could fix with anti-popping the input
+    if (isnan(out))
+      out = 0.0;
+    this->_core_dsp_output[s] = out;
+  }
+  // Apply anti-pop
+  this->_anti_pop_();
+}
+
+void wavenet::WaveNet::_set_num_frames_(const long num_frames) {
+  if (num_frames == this->_num_frames)
+    return;
+
+  this->_condition.resize(1 + this->_param_names.size(), num_frames);
+  for (int i = 0; i < this->_head_arrays.size(); i++)
+    this->_head_arrays[i].resize(this->_head_arrays[i].rows(), num_frames);
+  for (int i = 0; i < this->_layer_array_outputs.size(); i++)
+    this->_layer_array_outputs[i].resize(this->_layer_array_outputs[i].rows(),
+                                         num_frames);
+  this->_head_output.resize(this->_head_output.rows(), num_frames);
+
+  for (int i = 0; i < this->_layer_arrays.size(); i++)
+    this->_layer_arrays[i].set_num_frames_(num_frames);
+  // this->_head.set_num_frames_(num_frames);
+  this->_num_frames = num_frames;
+}
+
+void wavenet::WaveNet::_anti_pop_() {
+  if (this->_anti_pop_countdown >= this->_anti_pop_ramp)
+    return;
+  const float slope = 1.0f / float(this->_anti_pop_ramp);
+  for (int i = 0; i < this->_core_dsp_output.size(); i++) {
+    if (this->_anti_pop_countdown >= this->_anti_pop_ramp)
+      break;
+    const float gain = std::max(slope * float(this->_anti_pop_countdown), 0.0f);
+    this->_core_dsp_output[i] *= gain;
+    this->_anti_pop_countdown++;
+  }
+}
+
+void wavenet::WaveNet::_reset_anti_pop_() {
+  // You need the "real" receptive field, not the buffers.
+  long receptive_field = 1;
+  for (int i = 0; i < this->_layer_arrays.size(); i++)
+    receptive_field += this->_layer_arrays[i].get_receptive_field();
+  this->_anti_pop_countdown = -receptive_field;
+}
@@ -0,0 +1,212 @@
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "json.hpp"
+#include <Eigen/Dense>
+
+#include "dsp.h"
+
+namespace wavenet {
+// Rework the initialization API slightly. Merge w/ dsp.h later.
+class _DilatedConv : public Conv1D {
+public:
+  _DilatedConv(const int in_channels, const int out_channels,
+               const int kernel_size, const int bias, const int dilation);
+};
+
+class _Layer {
+public:
+  _Layer(const int condition_size, const int channels, const int kernel_size,
+         const int dilation, const std::string activation, const bool gated)
+      : _activation(activation), _gated(gated),
+        _conv(channels, gated ? 2 * channels : channels, kernel_size, true,
+              dilation),
+        _input_mixin(condition_size, gated ? 2 * channels : channels, false),
+        _1x1(channels, channels, true){};
+  void set_params_(std::vector<float>::iterator &params);
+  // :param `input`: from previous layer
+  // :param `output`: to next layer
+  void process_(const Eigen::MatrixXf &input, const Eigen::MatrixXf &condition,
+                Eigen::MatrixXf &head_input, Eigen::MatrixXf &output,
+                const long i_start, const long j_start);
+  void set_num_frames_(const long num_frames);
+  long get_channels() const { return this->_conv.get_in_channels(); };
+  int get_dilation() const { return this->_conv.get_dilation(); };
+  long get_kernel_size() const { return this->_conv.get_kernel_size(); };
+
+private:
+  // The dilated convolution at the front of the block
+  _DilatedConv _conv;
+  // Input mixin
+  Conv1x1 _input_mixin;
+  // The post-activation 1x1 convolution
+  Conv1x1 _1x1;
+  // The internal state
+  Eigen::MatrixXf _z;
+
+  const std::string _activation;
+  const bool _gated;
+};
+
+class LayerArrayParams {
+public:
+  LayerArrayParams(const int input_size_, const int condition_size_,
+                   const int head_size_, const int channels_,
+                   const int kernel_size_, const std::vector<int> &dilations_,
+                   const std::string activation_, const bool gated_,
+                   const bool head_bias_)
+      : input_size(input_size_), condition_size(condition_size_),
+        head_size(head_size_), channels(channels_), kernel_size(kernel_size_),
+        activation(activation_), gated(gated_), head_bias(head_bias_) {
+    for (int i = 0; i < dilations_.size(); i++)
+      this->dilations.push_back(dilations_[i]);
+  };
+
+  const int input_size;
+  const int condition_size;
+  const int head_size;
+  const int channels;
+  const int kernel_size;
+  std::vector<int> dilations;
+  const std::string activation;
+  const bool gated;
+  const bool head_bias;
+};
+
+// An array of layers with the same channels, kernel sizes, activations.
+class _LayerArray {
+public:
+  _LayerArray(const int input_size, const int condition_size,
+              const int head_size, const int channels, const int kernel_size,
+              const std::vector<int> &dilations, const std::string activation,
+              const bool gated, const bool head_bias);
+
+  void advance_buffers_(const int num_frames);
+
+  // Preparing for frames:
+  // Rewind buffers if needed
+  // Shift index to prepare
+  //
+  void prepare_for_frames_(const long num_frames);
+
+  // All arrays are "short".
+  void process_(const Eigen::MatrixXf &layer_inputs, // Short
+                const Eigen::MatrixXf &condition,    // Short
+                Eigen::MatrixXf &layer_outputs,      // Short
+                Eigen::MatrixXf &head_inputs,        // Sum up on this.
+                Eigen::MatrixXf &head_outputs        // post head-rechannel
+  );
+  void set_num_frames_(const long num_frames);
+  void set_params_(std::vector<float>::iterator &it);
+
+  // "Zero-indexed" receptive field.
+  // E.g. a 1x1 convolution has a z.i.r.f. of zero.
+  long get_receptive_field() const;
+
+private:
+  long _buffer_start;
+  // The rechannel before the layers
+  Conv1x1 _rechannel;
+
+  // Buffers in between layers.
+  // buffer [i] is the input to layer [i].
+  // the last layer outputs to a short array provided by outside.
+  std::vector<Eigen::MatrixXf> _layer_buffers;
+  // The layer objects
+  std::vector<_Layer> _layers;
+
+  // Rechannel for the head
+  Conv1x1 _head_rechannel;
+
+  long _get_buffer_size() const {
+    return this->_layer_buffers.size() > 0 ? this->_layer_buffers[0].cols() : 0;
+  };
+  long _get_channels() const;
+  // "One-indexed" receptive field
+  // TODO remove!
+  // E.g. a 1x1 convolution has a o.i.r.f. of one.
+  long _get_receptive_field() const;
+  void _rewind_buffers_();
+};
+
+// The head module
+// [Act->Conv] x L
+class _Head {
+public:
+  _Head(const int input_size, const int num_layers, const int channels,
+        const std::string activation);
+  void set_params_(std::vector<float>::iterator &params);
+  // NOTE: the head transforms the provided input by applying a nonlinearity
+  // to it in-place!
+  void process_(Eigen::MatrixXf &inputs, Eigen::MatrixXf &outputs);
+  void set_num_frames_(const long num_frames);
+
+private:
+  int _channels;
+  std::vector<Conv1x1> _layers;
+  Conv1x1 _head;
+  std::string _activation;
+
+  // Stores the outputs of the convs *except* the last one, which goes in
+  // The array `outputs` provided to .process_()
+  std::vector<Eigen::MatrixXf> _buffers;
+
+  // Apply the activation to the provided array, in-place
+  void _apply_activation_(Eigen::MatrixXf &x);
+};
+
+// The main WaveNet model
+// Both parametric and not; difference is handled at param read-in.
+class WaveNet : public DSP {
+public:
+  WaveNet(const std::vector<LayerArrayParams> &layer_array_params,
+          const float head_scale, const bool with_head,
+          nlohmann::json parametric, std::vector<float> params);
+
+  //    WaveNet(WaveNet&&) = default;
+  //    WaveNet& operator=(WaveNet&&) = default;
+  //    ~WaveNet() = default;
+
+  void finalize_(const int num_frames) override;
+  void set_params_(std::vector<float> &params);
+
+private:
+  long _num_frames;
+  std::vector<_LayerArray> _layer_arrays;
+  // Their outputs
+  std::vector<Eigen::MatrixXf> _layer_array_outputs;
+  // Head _head;
+
+  // Element-wise arrays:
+  Eigen::MatrixXf _condition;
+  // One more than total layer arrays
+  std::vector<Eigen::MatrixXf> _head_arrays;
+  float _head_scale;
+  Eigen::MatrixXf _head_output;
+
+  // Names of the params, sorted.
+  // TODO move this up, ugh.
+  std::vector<std::string> _param_names;
+
+  void _advance_buffers_(const int num_frames);
+  // Get the info from the parametric config
+  void _init_parametric_(nlohmann::json &parametric);
+  void _prepare_for_frames_(const long num_frames);
+  // Reminder: From ._input_post_gain to ._core_dsp_output
+  void _process_core_() override;
+
+  // Ensure that all buffer arrays are the right size for this num_frames
+  void _set_num_frames_(const long num_frames);
+
+  // The net starts with random parameters inside; we need to wait for a full
+  // receptive field to pass through before we can count on the output being
+  // ok. This implements a gentle "ramp-up" so that there's no "pop" at the
+  // start.
+  long _anti_pop_countdown;
+  const long _anti_pop_ramp = 4000;
+  void _anti_pop_();
+  void _reset_anti_pop_();
+};
+}; // namespace wavenet