From 52942dc16bfef3cf133f2f0fc1de6778225b6140 Mon Sep 17 00:00:00 2001 From: Mike Oliphant Date: Thu, 23 Mar 2023 12:09:33 -0700 Subject: [PATCH] Delete local NAM code --- CMakeLists.txt | 2 + NeuralAmpModelerCore | 2 +- {src => json}/json.hpp | 0 src/CMakeLists.txt | 19 +- src/dsp.h | 397 ---------------------------------------- src/get_dsp.cpp | 117 ------------ src/nam_plugin.cpp | 22 ++- src/nam_plugin.h | 4 +- src/util.cpp | 11 -- src/util.h | 9 - src/wavenet.cpp | 400 ----------------------------------------- src/wavenet.h | 212 ---------------------- 12 files changed, 33 insertions(+), 1162 deletions(-) rename {src => json}/json.hpp (100%) delete mode 100644 src/dsp.h delete mode 100644 src/get_dsp.cpp delete mode 100644 src/util.cpp delete mode 100644 src/util.h delete mode 100644 src/wavenet.cpp delete mode 100644 src/wavenet.h diff --git a/CMakeLists.txt b/CMakeLists.txt index aca32b5..a574d36 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,6 +21,8 @@ set(NAM_LV2_ID http://github.com/mikeoliphant/neural-amp-modeler-lv2) include_directories(SYSTEM eigen) include_directories(SYSTEM lv2/include) +include_directories(SYSTEM NeuralAmpModelerCore/NAM) +include_directories(SYSTEM json) add_subdirectory(src) diff --git a/NeuralAmpModelerCore b/NeuralAmpModelerCore index 91a2bdb..ccf5ffe 160000 --- a/NeuralAmpModelerCore +++ b/NeuralAmpModelerCore @@ -1 +1 @@ -Subproject commit 91a2bdb7a26e0e5b52f31e7f55639fa180e33603 +Subproject commit ccf5ffe767122a8b03fa9c862c357a841bd6a608 diff --git a/src/json.hpp b/json/json.hpp similarity index 100% rename from src/json.hpp rename to json/json.hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5ddbcf0..506be34 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -2,14 +2,17 @@ add_library(neural_amp_modeler MODULE nam_lv2.cpp nam_plugin.cpp nam_plugin.h - dsp.h - dsp.cpp - get_dsp.cpp - util.cpp - util.h - wavenet.cpp - wavenet.h - json.hpp + ../NeuralAmpModelerCore/NAM/activations.h + ../NeuralAmpModelerCore/NAM/version.h + ../NeuralAmpModelerCore/NAM/lstm.h + ../NeuralAmpModelerCore/NAM/lstm.cpp + ../NeuralAmpModelerCore/NAM/dsp.h + ../NeuralAmpModelerCore/NAM/dsp.cpp + ../NeuralAmpModelerCore/NAM/get_dsp.cpp + ../NeuralAmpModelerCore/NAM/util.cpp + ../NeuralAmpModelerCore/NAM/util.h + ../NeuralAmpModelerCore/NAM/wavenet.cpp + ../NeuralAmpModelerCore/NAM/wavenet.h ) target_compile_features(neural_amp_modeler PUBLIC cxx_std_17) diff --git a/src/dsp.h b/src/dsp.h deleted file mode 100644 index 53e7bd6..0000000 --- a/src/dsp.h +++ /dev/null @@ -1,397 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include - -#include - -enum EArchitectures { - kLinear = 0, - kConvNet, - kLSTM, - kCatLSTM, - kWaveNet, - kCatWaveNet, - kNumModels -}; - -#define NAMSample float - -// Class for providing params from the plugin to the DSP module -// For now, we'll work with doubles. Later, we'll add other types. -class DSPParam { -public: - const char *name; - const double val; -}; -// And the params shall be provided as a std::vector. - -class DSP { -public: - DSP(); - // process() does all of the processing requried to take `inputs` array and - // fill in the required values on `outputs`. - // To do this: - // 1. The parameters from the plugin (I/O levels and any other parametric - // inputs) are gotten. - // 2. The input level is applied - // 3. The core DSP algorithm is run (This is what should probably be - // overridden in subclasses). - // 4. The output level is applied and the result stored to `output`. - virtual void process(const NAMSample *input, NAMSample *output, const int num_frames, - const double input_gain, const double output_gain, - const std::unordered_map ¶ms); - // Anything to take care of before next buffer comes in. - // For example: - // * Move the buffer index forward - // * Does NOT say that params aren't stale; that's the job of the routine - // that actually uses them, which varies depends on the particulars of the - // DSP subclass implementation. - virtual void finalize_(const int num_frames); - -protected: - // Parameters (aka "knobs") - std::unordered_map _params; - // If the params have changed since the last buffer was processed: - bool _stale_params; - // Where to store the samples after applying input gain - std::vector _input_post_gain; - // Location for the output of the core DSP algorithm. - std::vector _core_dsp_output; - - // Methods - - // Copy the parameters to the DSP module. - // If anything has changed, then set this->_stale_params to true. - // (TODO use "listener" approach) - void - _get_params_(const std::unordered_map &input_params); - - // Apply the input gain - // Result populates this->_input_post_gain - void _apply_input_level_(const NAMSample *input, const int num_frames, const double gain); - - // i.e. ensure the size is correct. - void _ensure_core_dsp_output_ready_(); - - // The core of your DSP algorithm. - // Access the inputs in this->_input_post_gain - // Place the outputs in this->_core_dsp_output - virtual void _process_core_(); - - // Copy this->_core_dsp_output to output and apply the output volume - void _apply_output_level_(NAMSample *output, const int num_frames, const double gain); -}; - -// Class where an input buffer is kept so that long-time effects can be -// captured. (e.g. conv nets or impulse responses, where we need history that's -// longer than the sample buffer that's coming in.) -class Buffer : public DSP { -public: - Buffer(const int receptive_field); - void finalize_(const int num_frames); - -protected: - // Input buffer - const int _input_buffer_channels = 1; // Mono - int _receptive_field; - // First location where we add new samples from the input - long _input_buffer_offset; - std::vector _input_buffer; - std::vector _output_buffer; - - void _set_receptive_field(const int new_receptive_field, - const int input_buffer_size); - void _set_receptive_field(const int new_receptive_field); - void _reset_input_buffer(); - // Use this->_input_post_gain - virtual void _update_buffers_(); - virtual void _rewind_buffers_(); -}; - -// Basic linear model (an IR!) -class Linear : public Buffer { -public: - Linear(const int receptive_field, const bool _bias, - const std::vector ¶ms); - void _process_core_() override; - -protected: - Eigen::VectorXf _weight; - float _bias; -}; - -// NN modules ================================================================= - -// Activations - -// In-place ReLU on (N,M) array -void relu_(Eigen::MatrixXf &x, const long i_start, const long i_end, - const long j_start, const long j_end); -// Subset of the columns -void relu_(Eigen::MatrixXf &x, const long j_start, const long j_end); -void relu_(Eigen::MatrixXf &x); - -// In-place sigmoid -void sigmoid_(Eigen::MatrixXf &x, const long i_start, const long i_end, - const long j_start, const long j_end); -void sigmoid_(Eigen::MatrixXf &x); - -// In-place Tanh on (N,M) array -void tanh_(Eigen::MatrixXf& x); - -void tanh_(Eigen::MatrixXf &x, const long i_start, const long i_end, - const long j_start, const long j_end); -// Subset of the columns -void tanh_cols_(Eigen::MatrixXf &x, const long j_start, const long j_end); - -class Conv1D { -public: - Conv1D() { this->_dilation = 1; }; - void set_params_(std::vector::iterator ¶ms); - void set_size_(const int in_channels, const int out_channels, - const int kernel_size, const bool do_bias, - const int _dilation); - void set_size_and_params_(const int in_channels, const int out_channels, - const int kernel_size, const int _dilation, - const bool do_bias, - std::vector::iterator ¶ms); - // Process from input to output - // Rightmost indices of input go from i_start to i_end, - // Indices on output for from j_start (to j_start + i_end - i_start) - void process_(const Eigen::MatrixXf &input, Eigen::MatrixXf &output, - const long i_start, const long i_end, const long j_start) const; - long get_in_channels() const { - return this->_weight.size() > 0 ? this->_weight[0].cols() : 0; - }; - long get_kernel_size() const { return this->_weight.size(); }; - long get_num_params() const; - long get_out_channels() const { - return this->_weight.size() > 0 ? this->_weight[0].rows() : 0; - }; - int get_dilation() const { return this->_dilation; }; - -private: - // Gonna wing this... - // conv[kernel](cout, cin) - std::vector _weight; - Eigen::VectorXf _bias; - int _dilation; -}; - -// Really just a linear layer -class Conv1x1 { -public: - Conv1x1(const int in_channels, const int out_channels, const bool _bias); - void set_params_(std::vector::iterator ¶ms); - // :param input: (N,Cin) or (Cin,) - // :return: (N,Cout) or (Cout,), respectively - Eigen::MatrixXf process(const Eigen::MatrixXf &input) const; - - long get_out_channels() const { return this->_weight.rows(); }; - -private: - Eigen::MatrixXf _weight; - Eigen::VectorXf _bias; - bool _do_bias; -}; - -// ConvNet ==================================================================== - -namespace convnet { -// Custom Conv that avoids re-computing on pieces of the input and trusts -// that the corresponding outputs are where they need to be. -// Beware: this is clever! - -// Batch normalization -// In prod mode, so really just an elementwise affine layer. -class BatchNorm { -public: - BatchNorm(){}; - BatchNorm(const int dim, std::vector::iterator ¶ms); - void process_(Eigen::MatrixXf &input, const long i_start, - const long i_end) const; - -private: - // TODO simplify to just ax+b - // y = (x-m)/sqrt(v+eps) * w + bias - // y = ax+b - // a = w / sqrt(v+eps) - // b = a * m + bias - Eigen::VectorXf scale; - Eigen::VectorXf loc; -}; - -class ConvNetBlock { -public: - ConvNetBlock() { this->_batchnorm = false; }; - void set_params_(const int in_channels, const int out_channels, - const int _dilation, const bool batchnorm, - const std::string activation, - std::vector::iterator ¶ms); - void process_(const Eigen::MatrixXf &input, Eigen::MatrixXf &output, - const long i_start, const long i_end) const; - long get_out_channels() const; - Conv1D conv; - -private: - BatchNorm batchnorm; - bool _batchnorm; - std::string activation; -}; - -class _Head { -public: - _Head() { this->_bias = (float)0.0; }; - _Head(const int channels, std::vector::iterator ¶ms); - void process_(const Eigen::MatrixXf &input, Eigen::VectorXf &output, - const long i_start, const long i_end) const; - -private: - Eigen::VectorXf _weight; - float _bias; -}; - -class ConvNet : public Buffer { -public: - ConvNet(const int channels, const std::vector &dilations, - const bool batchnorm, const std::string activation, - std::vector ¶ms); - -protected: - std::vector _blocks; - std::vector _block_vals; - Eigen::VectorXf _head_output; - _Head _head; - void _verify_params(const int channels, const std::vector &dilations, - const bool batchnorm, const size_t actual_params); - void _update_buffers_() override; - void _rewind_buffers_() override; - - void _process_core_() override; - - // The net starts with random parameters inside; we need to wait for a full - // receptive field to pass through before we can count on the output being - // ok. This implements a gentle "ramp-up" so that there's no "pop" at the - // start. - long _anti_pop_countdown; - const long _anti_pop_ramp = 100; - void _anti_pop_(); - void _reset_anti_pop_(); -}; -}; // namespace convnet - -// Utilities ================================================================== -// Implemented in get_dsp.cpp - -// Verify that the config that we are building our model from is supported by -// this plugin version. -void verify_config_version(const std::string version); - -// Takes the model file and uses it to instantiate an instance of DSP. -std::unique_ptr get_dsp(const std::filesystem::path model_file); -// Legacy loader for directory-type DSPs -std::unique_ptr get_dsp_legacy(const std::filesystem::path dirname); - -// Hard-coded model: -std::unique_ptr get_hard_dsp(); - -// Version 2 DSP abstraction ================================================== - -namespace dsp { -class Params {}; - -class DSP { -public: - DSP(); - ~DSP(); - // The main interface for processing audio. - // The incoming audio is given as a raw pointer-to-pointers. - // The indexing is [channel][frame]. - // The output shall be a pointer-to-pointers of matching size. - // This object instance will own the data referenced by the pointers and be - // responsible for its allocation and deallocation. - virtual float **Process(float **inputs, - const size_t numChannels, - const size_t numFrames) = 0; - // Update the parameters of the DSP object according to the provided params. - // Not declaring a pure virtual bc there's no concrete definition that can - // use Params. - // But, use this name :) - // virtual void SetParams(Params* params) = 0; - -protected: - // Methods - - // Allocate mOutputPointers. - // Assumes it's already null (Use _DeallocateOutputPointers()). - void _AllocateOutputPointers(const size_t numChannels); - // Ensure mOutputPointers is freed. - void _DeallocateOutputPointers(); - - size_t _GetNumChannels() const { return this->mOutputs.size(); }; - size_t _GetNumFrames() const { - return this->_GetNumChannels() > 0 ? this->mOutputs[0].size() : 0; - } - // Return a pointer-to-pointers for the DSP's output buffers (all channels) - // Assumes that ._PrepareBuffers() was called recently enough. - float **_GetPointers(); - // Resize mOutputs to (numChannels, numFrames) and ensure that the raw - // pointers are also keeping up. - virtual void _PrepareBuffers(const size_t numChannels, - const size_t numFrames); - // Resize the pointer-to-pointers for the vector-of-vectors. - void _ResizePointers(const size_t numChannels); - - // Attributes - - // The output array into which the DSP module's calculations will be written. - // Pointers to this member's data will be returned by .Process(), and std - // Will ensure proper allocation. - std::vector> mOutputs; - // A pointer to pointers of which copies will be given out as the output of - // .Process(). This object will ensure proper allocation and deallocation of - // the first level; The second level points to .data() from mOutputs. - float **mOutputPointers; - size_t mOutputPointersSize; -}; - -// A class where a longer buffer of history is needed to correctly calculate -// the DSP algorithm (e.g. algorithms involving convolution). -// -// Hacky stuff: -// * Mono -// * Single-precision floats. -class History : public DSP { -public: - History(); - -protected: - // Called at the end of the DSP, advance the hsitory index to the next open - // spot. Does not ensure that it's at a valid address. - void _AdvanceHistoryIndex(const size_t bufferSize); - // Drop the new samples into the history array. - // Manages history array size - void _UpdateHistory(float **inputs, const size_t numChannels, - const size_t numFrames); - - // The history array that's used for DSP calculations. - std::vector mHistory; - // How many samples previous are required. - // Zero means that no history is required--only the current sample. - size_t mHistoryRequired; - // Location of the first sample in the current buffer. - // Shall always be in the range [mHistoryRequired, mHistory.size()). - size_t mHistoryIndex; - -private: - // Make sure that the history array is long enough. - void _EnsureHistorySize(const size_t bufferSize); - // Copy the end of the history back to the fron and reset mHistoryIndex - void _RewindHistory(); -}; -}; // namespace dsp diff --git a/src/get_dsp.cpp b/src/get_dsp.cpp deleted file mode 100644 index cf71ab0..0000000 --- a/src/get_dsp.cpp +++ /dev/null @@ -1,117 +0,0 @@ -#include -#include - -#include "json.hpp" -#include "dsp.h" -//#include "HardCodedModel.h" -//#include "lstm.h" -#include "wavenet.h" - -void verify_config_version(const std::string version) { - const std::unordered_set supported_versions({"0.5.0"}); - if (supported_versions.find(version) == supported_versions.end()) { - std::stringstream ss; - ss << "Model config is an unsupported version " << version - << ". Try either converting the model to a more recent version, or " - "update your version of the NAM plugin."; - throw std::runtime_error(ss.str()); - } -} - -std::vector _get_weights(nlohmann::json const &j, - const std::filesystem::path config_path) { - if (j.find("weights") != j.end()) { - auto weight_list = j["weights"]; - std::vector weights; - for (auto it = weight_list.begin(); it != weight_list.end(); ++it) - weights.push_back(*it); - return weights; - } else - throw std::runtime_error("Corrupted model file is missing weights."); -} - -std::unique_ptr get_dsp_legacy(const std::filesystem::path model_dir) { - auto config_filename = model_dir / std::filesystem::path("config.json"); - return get_dsp(config_filename); -} - -std::unique_ptr get_dsp(const std::filesystem::path config_filename) { - if (!std::filesystem::exists(config_filename)) - throw std::runtime_error("Config JSON doesn't exist!\n"); - std::ifstream i(config_filename); - nlohmann::json j; - i >> j; - verify_config_version(j["version"]); - - auto architecture = j["architecture"]; - nlohmann::json config = j["config"]; - std::vector params = _get_weights(j, config_filename); - - //if (architecture == "Linear") { - // const int receptive_field = config["receptive_field"]; - // const bool _bias = config["bias"]; - // return std::make_unique(receptive_field, _bias, params); - //} else if (architecture == "ConvNet") { - // const int channels = config["channels"]; - // const bool batchnorm = config["batchnorm"]; - // std::vector dilations; - // for (int i = 0; i < config["dilations"].size(); i++) - // dilations.push_back(config["dilations"][i]); - // const std::string activation = config["activation"]; - // return std::make_unique(channels, dilations, batchnorm, - // activation, params); - //} else if (architecture == "LSTM") { - // const int num_layers = config["num_layers"]; - // const int input_size = config["input_size"]; - // const int hidden_size = config["hidden_size"]; - // auto json = nlohmann::json{}; - // return std::make_unique(num_layers, input_size, hidden_size, - // params, json); - //} else if (architecture == "CatLSTM") { - // const int num_layers = config["num_layers"]; - // const int input_size = config["input_size"]; - // const int hidden_size = config["hidden_size"]; - // return std::make_unique(num_layers, input_size, hidden_size, - // params, config["parametric"]); - //} else - - if (architecture == "WaveNet" || architecture == "CatWaveNet") { - std::vector layer_array_params; - for (int i = 0; i < config["layers"].size(); i++) { - nlohmann::json layer_config = config["layers"][i]; - std::vector dilations; - for (int j = 0; j < layer_config["dilations"].size(); j++) - dilations.push_back(layer_config["dilations"][j]); - layer_array_params.push_back(wavenet::LayerArrayParams( - layer_config["input_size"], layer_config["condition_size"], - layer_config["head_size"], layer_config["channels"], - layer_config["kernel_size"], dilations, layer_config["activation"], - layer_config["gated"], layer_config["head_bias"])); - } - const bool with_head = config["head"] == NULL; - const float head_scale = config["head_scale"]; - // Solves compilation issue on macOS Error: No matching constructor for - // initialization of 'wavenet::WaveNet' Solution from - // https://stackoverflow.com/a/73956681/3768284 - auto parametric_json = - architecture == "CatWaveNet" ? config["parametric"] : nlohmann::json{}; - return std::make_unique( - layer_array_params, head_scale, with_head, parametric_json, params); - } else { - throw std::runtime_error("Unrecognized architecture"); - } -} - -//std::unique_ptr get_hard_dsp() { -// // Values are defined in HardCodedModel.h -// verify_config_version(std::string(PYTHON_MODEL_VERSION)); -// -// // Uncomment the line that corresponds to the model type that you're using. -// -// // return std::make_unique(CHANNELS, DILATIONS, BATCHNORM, -// // ACTIVATION, PARAMS); return -// // std::make_unique(LAYER_ARRAY_PARAMS, HEAD_SCALE, -// // WITH_HEAD, PARAMETRIC, PARAMS); -// return std::make_unique(NUM_LAYERS, INPUT_SIZE, HIDDEN_SIZE, -// PARAMS, PARAMETRIC); -//} diff --git a/src/nam_plugin.cpp b/src/nam_plugin.cpp index 1ba47a7..dd8b445 100644 --- a/src/nam_plugin.cpp +++ b/src/nam_plugin.cpp @@ -64,7 +64,11 @@ namespace NAM { auto nam = static_cast(instance); - //nam->currentModel = get_dsp("C://Users//oliph//AppData//Roaming//GuitarSim//NAM//JCM2000Crunch.nam"); + // If we had a previous model, delete it + if (nam->deleteModel) + { + nam->deleteModel.reset(); + } nam->stagedModel = get_dsp(msg->path); @@ -93,8 +97,9 @@ namespace NAM { { auto nam = static_cast(instance); - nam->currentModel = std::move(nam->stagedModel); - nam->stagedModel = nullptr; + std::swap(nam->currentModel, nam->stagedModel); + + nam->deleteModel = std::move(nam->stagedModel); return LV2_WORKER_SUCCESS; } @@ -147,12 +152,15 @@ namespace NAM { } } + if (dblData.size() != n_samples) + dblData.resize(n_samples); + float inputLevel = pow(10, *(ports.input_level) * 0.05); float outputLevel = pow(10, *(ports.output_level) * 0.05); for (unsigned int i = 0; i < n_samples; i++) { - ports.audio_out[i] = ports.audio_in[i] * inputLevel; + dblData[i] = ports.audio_in[i] * inputLevel; } if (currentModel == nullptr) @@ -160,13 +168,15 @@ namespace NAM { } else { - currentModel->process(ports.audio_out, ports.audio_out, n_samples, 1.0, 1.0, mNAMParams); + double* data = dblData.data(); + + currentModel->process(&data, &data, 1, n_samples, 1.0, 1.0, mNAMParams); currentModel->finalize_(n_samples); } for (unsigned int i = 0; i < n_samples; i++) { - ports.audio_out[i] *= outputLevel; + ports.audio_out[i] = dblData[i] * outputLevel; } } } diff --git a/src/nam_plugin.h b/src/nam_plugin.h index 450e72a..5da8f33 100644 --- a/src/nam_plugin.h +++ b/src/nam_plugin.h @@ -52,10 +52,10 @@ namespace NAM { std::unique_ptr<::DSP> currentModel; std::unique_ptr<::DSP> stagedModel; + std::unique_ptr<::DSP> deleteModel; std::unordered_map mNAMParams = {}; - Plugin(); ~Plugin() = default; @@ -83,6 +83,8 @@ namespace NAM { URIs uris = {}; LV2_Atom_Forge atom_forge = {}; + std::vector dblData; + float m_rate; }; } diff --git a/src/util.cpp b/src/util.cpp deleted file mode 100644 index b5451cb..0000000 --- a/src/util.cpp +++ /dev/null @@ -1,11 +0,0 @@ -#include -#include - -#include "util.h" - -std::string util::lowercase(const std::string &s) { - std::string out(s); - std::transform(s.begin(), s.end(), out.begin(), - [](unsigned char c) { return std::tolower(c); }); - return out; -} \ No newline at end of file diff --git a/src/util.h b/src/util.h deleted file mode 100644 index 722ea5e..0000000 --- a/src/util.h +++ /dev/null @@ -1,9 +0,0 @@ -#pragma once - -// Utilities - -#include - -namespace util { -std::string lowercase(const std::string &s); -}; // namespace util diff --git a/src/wavenet.cpp b/src/wavenet.cpp deleted file mode 100644 index 1b716d0..0000000 --- a/src/wavenet.cpp +++ /dev/null @@ -1,400 +0,0 @@ -#include -#include -#include - -#include - -#include "wavenet.h" - -wavenet::_DilatedConv::_DilatedConv(const int in_channels, - const int out_channels, - const int kernel_size, const int bias, - const int dilation) { - this->set_size_(in_channels, out_channels, kernel_size, bias, dilation); -} - -void wavenet::_Layer::set_params_(std::vector::iterator ¶ms) { - this->_conv.set_params_(params); - this->_input_mixin.set_params_(params); - this->_1x1.set_params_(params); -} - -void wavenet::_Layer::process_(const Eigen::MatrixXf &input, - const Eigen::MatrixXf &condition, - Eigen::MatrixXf &head_input, - Eigen::MatrixXf &output, const long i_start, - const long j_start) { - const long ncols = condition.cols(); - const long channels = this->get_channels(); - // Input dilated conv - this->_conv.process_(input, this->_z, i_start, ncols, 0); - - // Mix-in condition - this->_z.noalias() += this->_input_mixin.process(condition); - if (this->_activation == "Tanh") - tanh_(this->_z); - else if (this->_activation == "ReLU") - relu_(this->_z, 0, channels, 0, this->_z.cols()); - else - throw std::runtime_error("Unrecognized activation."); - if (this->_gated) { - sigmoid_(this->_z, channels, 2 * channels, 0, this->_z.cols()); - - this->_z.topRows(channels).array() *= this->_z.bottomRows(channels).array(); - - // this->_z.topRows(channels) = this->_z.topRows(channels).cwiseProduct( - // this->_z.bottomRows(channels) - // ); - } - - head_input.noalias() += this->_z.topRows(channels); - output.middleCols(j_start, ncols).noalias() = - input.middleCols(i_start, ncols) + - this->_1x1.process(this->_z.topRows(channels)); -} - -void wavenet::_Layer::set_num_frames_(const long num_frames) { - this->_z.resize(this->_conv.get_out_channels(), num_frames); -} - -// LayerArray ================================================================= - -#define LAYER_ARRAY_BUFFER_SIZE 65536 - -wavenet::_LayerArray::_LayerArray(const int input_size, - const int condition_size, const int head_size, - const int channels, const int kernel_size, - const std::vector &dilations, - const std::string activation, - const bool gated, const bool head_bias) - : _rechannel(input_size, channels, false), - _head_rechannel(channels, head_size, head_bias) { - for (int i = 0; i < dilations.size(); i++) - this->_layers.push_back(_Layer(condition_size, channels, kernel_size, - dilations[i], activation, gated)); - const long receptive_field = this->_get_receptive_field(); - for (int i = 0; i < dilations.size(); i++) { - this->_layer_buffers.push_back(Eigen::MatrixXf( - channels, LAYER_ARRAY_BUFFER_SIZE + receptive_field - 1)); - this->_layer_buffers[i].setZero(); - } - this->_buffer_start = this->_get_receptive_field() - 1; -} - -void wavenet::_LayerArray::advance_buffers_(const int num_frames) { - this->_buffer_start += num_frames; -} - -long wavenet::_LayerArray::get_receptive_field() const { - long result = 0; - for (int i = 0; i < this->_layers.size(); i++) - result += this->_layers[i].get_dilation() * - (this->_layers[i].get_kernel_size() - 1); - return result; -} - -void wavenet::_LayerArray::prepare_for_frames_(const long num_frames) { - // Example: - // _buffer_start = 0 - // num_frames = 64 - // buffer_size = 64 - // -> this will write on indices 0 through 63, inclusive. - // -> No illegal writes. - // -> no rewind needed. - if (this->_buffer_start + num_frames > this->_get_buffer_size()) - this->_rewind_buffers_(); -} - -void wavenet::_LayerArray::process_(const Eigen::MatrixXf &layer_inputs, - const Eigen::MatrixXf &condition, - Eigen::MatrixXf &head_inputs, - Eigen::MatrixXf &layer_outputs, - Eigen::MatrixXf &head_outputs) { - this->_layer_buffers[0].middleCols(this->_buffer_start, layer_inputs.cols()) = - this->_rechannel.process(layer_inputs); - const long last_layer = this->_layers.size() - 1; - for (auto i = 0; i < this->_layers.size(); i++) { - this->_layers[i].process_( - this->_layer_buffers[i], condition, head_inputs, - i == last_layer ? layer_outputs : this->_layer_buffers[i + 1], - this->_buffer_start, i == last_layer ? 0 : this->_buffer_start); - } - head_outputs = this->_head_rechannel.process(head_inputs); -} - -void wavenet::_LayerArray::set_num_frames_(const long num_frames) { - // Wavenet checks for unchanged num_frames; if we made it here, there's - // something to do. - if (LAYER_ARRAY_BUFFER_SIZE - num_frames < this->_get_receptive_field()) { - std::stringstream ss; - ss << "Asked to accept a buffer of " << num_frames - << " samples, but the buffer is too short (" << LAYER_ARRAY_BUFFER_SIZE - << ") to get out of the recptive field (" << this->_get_receptive_field() - << "); copy errors could occur!\n"; - throw std::runtime_error(ss.str().c_str()); - } - for (int i = 0; i < this->_layers.size(); i++) - this->_layers[i].set_num_frames_(num_frames); -} - -void wavenet::_LayerArray::set_params_(std::vector::iterator ¶ms) { - this->_rechannel.set_params_(params); - for (int i = 0; i < this->_layers.size(); i++) - this->_layers[i].set_params_(params); - this->_head_rechannel.set_params_(params); -} - -long wavenet::_LayerArray::_get_channels() const { - return this->_layers.size() > 0 ? this->_layers[0].get_channels() : 0; -} - -long wavenet::_LayerArray::_get_receptive_field() const { - // TODO remove this and use get_receptive_field() instead! - long res = 1; - for (int i = 0; i < this->_layers.size(); i++) - res += (this->_layers[i].get_kernel_size() - 1) * - this->_layers[i].get_dilation(); - return res; -} - -void wavenet::_LayerArray::_rewind_buffers_() -// Consider wrapping instead... -// Can make this smaller--largest dilation, not receptive field! -{ - const long start = this->_get_receptive_field() - 1; - for (int i = 0; i < this->_layer_buffers.size(); i++) { - const long d = (this->_layers[i].get_kernel_size() - 1) * - this->_layers[i].get_dilation(); - this->_layer_buffers[i].middleCols(start - d, d) = - this->_layer_buffers[i].middleCols(this->_buffer_start - d, d); - } - this->_buffer_start = start; -} - -// Head ======================================================================= - -wavenet::_Head::_Head(const int input_size, const int num_layers, - const int channels, const std::string activation) - : _channels(channels), _activation(activation), - _head(num_layers > 0 ? channels : input_size, 1, true) { - assert(num_layers > 0); - int dx = input_size; - for (int i = 0; i < num_layers; i++) { - this->_layers.push_back( - Conv1x1(dx, i == num_layers - 1 ? 1 : channels, true)); - dx = channels; - if (i < num_layers - 1) - this->_buffers.push_back(Eigen::MatrixXf()); - } -} - -void wavenet::_Head::set_params_(std::vector::iterator ¶ms) { - for (int i = 0; i < this->_layers.size(); i++) - this->_layers[i].set_params_(params); -} - -void wavenet::_Head::process_(Eigen::MatrixXf &inputs, - Eigen::MatrixXf &outputs) { - const size_t num_layers = this->_layers.size(); - this->_apply_activation_(inputs); - if (num_layers == 1) - outputs = this->_layers[0].process(inputs); - else { - this->_buffers[0] = this->_layers[0].process(inputs); - for (int i = 1; i < num_layers; i++) { // Asserted > 0 layers - this->_apply_activation_(this->_buffers[i - 1]); - if (i < num_layers - 1) - this->_buffers[i] = this->_layers[i].process(this->_buffers[i - 1]); - else - outputs = this->_layers[i].process(this->_buffers[i - 1]); - } - } -} - -void wavenet::_Head::set_num_frames_(const long num_frames) { - for (int i = 0; i < this->_buffers.size(); i++) - this->_buffers[i].resize(this->_channels, num_frames); -} - -void wavenet::_Head::_apply_activation_(Eigen::MatrixXf &x) { - if (this->_activation == "Tanh") - tanh_(x); - else if (this->_activation == "ReLU") - relu_(x); - else - throw std::runtime_error("Unrecognized activation."); -} - -// WaveNet ==================================================================== - -wavenet::WaveNet::WaveNet( - const std::vector &layer_array_params, - const float head_scale, const bool with_head, nlohmann::json parametric, - std::vector params) - : //_head(channels, head_layers, head_channels, head_activation), - _num_frames(0), _head_scale(head_scale) { - if (with_head) - throw std::runtime_error("Head not implemented!"); - this->_init_parametric_(parametric); - for (int i = 0; i < layer_array_params.size(); i++) { - this->_layer_arrays.push_back(wavenet::_LayerArray( - layer_array_params[i].input_size, layer_array_params[i].condition_size, - layer_array_params[i].head_size, layer_array_params[i].channels, - layer_array_params[i].kernel_size, layer_array_params[i].dilations, - layer_array_params[i].activation, layer_array_params[i].gated, - layer_array_params[i].head_bias)); - this->_layer_array_outputs.push_back( - Eigen::MatrixXf(layer_array_params[i].channels, 0)); - if (i == 0) - this->_head_arrays.push_back( - Eigen::MatrixXf(layer_array_params[i].channels, 0)); - if (i > 0) - if (layer_array_params[i].channels != - layer_array_params[i - 1].head_size) { - std::stringstream ss; - ss << "channels of layer " << i << " (" - << layer_array_params[i].channels - << ") doesn't match head_size of preceding layer (" - << layer_array_params[i - 1].head_size << "!\n"; - throw std::runtime_error(ss.str().c_str()); - } - this->_head_arrays.push_back( - Eigen::MatrixXf(layer_array_params[i].head_size, 0)); - } - this->_head_output.resize(1, 0); // Mono output! - this->set_params_(params); - this->_reset_anti_pop_(); -} - -void wavenet::WaveNet::finalize_(const int num_frames) { - this->DSP::finalize_(num_frames); - this->_advance_buffers_(num_frames); -} - -void wavenet::WaveNet::set_params_(std::vector ¶ms) { - std::vector::iterator it = params.begin(); - for (int i = 0; i < this->_layer_arrays.size(); i++) - this->_layer_arrays[i].set_params_(it); - // this->_head.set_params_(it); - this->_head_scale = *(it++); - if (it != params.end()) { - std::stringstream ss; - for (int i = 0; i < params.size(); i++) - if (params[i] == *it) { - ss << "Parameter mismatch: assigned " << i + 1 << " parameters, but " - << params.size() << " were provided."; - throw std::runtime_error(ss.str().c_str()); - } - ss << "Parameter mismatch: provided " << params.size() - << " weights, but the model expects more."; - throw std::runtime_error(ss.str().c_str()); - } -} - -void wavenet::WaveNet::_advance_buffers_(const int num_frames) { - for (int i = 0; i < this->_layer_arrays.size(); i++) - this->_layer_arrays[i].advance_buffers_(num_frames); -} - -void wavenet::WaveNet::_init_parametric_(nlohmann::json ¶metric) { - for (nlohmann::json::iterator it = parametric.begin(); it != parametric.end(); - ++it) - this->_param_names.push_back(it.key()); - // TODO assert continuous 0 to 1 - std::sort(this->_param_names.begin(), this->_param_names.end()); -} - -void wavenet::WaveNet::_prepare_for_frames_(const long num_frames) { - for (auto i = 0; i < this->_layer_arrays.size(); i++) - this->_layer_arrays[i].prepare_for_frames_(num_frames); -} - -void wavenet::WaveNet::_process_core_() { - const long num_frames = this->_input_post_gain.size(); - this->_set_num_frames_(num_frames); - this->_prepare_for_frames_(num_frames); - - // NOTE: During warm-up, weird things can happen that NaN out the layers. - // We could solve this by anti-popping the *input*. But, it's easier to check - // the outputs for NaNs and zero them out. - // They'll flush out eventually because the model doesn't use any feedback. - - // Fill into condition array: - // Clumsy... - for (int j = 0; j < num_frames; j++) { - this->_condition(0, j) = this->_input_post_gain[j]; - if (this->_stale_params) // Column-major assignment; good for Eigen. Let the - // compiler optimize this. - for (int i = 0; i < this->_param_names.size(); i++) - this->_condition(i + 1, j) = - (float)this->_params[this->_param_names[i]]; - } - - // Main layer arrays: - // Layer-to-layer - // Sum on head output - this->_head_arrays[0].setZero(); - for (int i = 0; i < this->_layer_arrays.size(); i++) - this->_layer_arrays[i].process_( - i == 0 ? this->_condition : this->_layer_array_outputs[i - 1], - this->_condition, this->_head_arrays[i], this->_layer_array_outputs[i], - this->_head_arrays[i + 1]); - // this->_head.process_( - // this->_head_input, - // this->_head_output - //); - // Copy to required output array - // Hack: apply head scale here; revisit when/if I activate the head. - // assert(this->_head_output.rows() == 1); - - const long final_head_array = this->_head_arrays.size() - 1; - assert(this->_head_arrays[final_head_array].rows() == 1); - for (int s = 0; s < num_frames; s++) { - float out = this->_head_scale * this->_head_arrays[final_head_array](0, s); - // This is the NaN check that we could fix with anti-popping the input - if (isnan(out)) - out = 0.0; - this->_core_dsp_output[s] = out; - } - // Apply anti-pop - this->_anti_pop_(); -} - -void wavenet::WaveNet::_set_num_frames_(const long num_frames) { - if (num_frames == this->_num_frames) - return; - - this->_condition.resize(1 + this->_param_names.size(), num_frames); - for (int i = 0; i < this->_head_arrays.size(); i++) - this->_head_arrays[i].resize(this->_head_arrays[i].rows(), num_frames); - for (int i = 0; i < this->_layer_array_outputs.size(); i++) - this->_layer_array_outputs[i].resize(this->_layer_array_outputs[i].rows(), - num_frames); - this->_head_output.resize(this->_head_output.rows(), num_frames); - - for (int i = 0; i < this->_layer_arrays.size(); i++) - this->_layer_arrays[i].set_num_frames_(num_frames); - // this->_head.set_num_frames_(num_frames); - this->_num_frames = num_frames; -} - -void wavenet::WaveNet::_anti_pop_() { - if (this->_anti_pop_countdown >= this->_anti_pop_ramp) - return; - const float slope = 1.0f / float(this->_anti_pop_ramp); - for (int i = 0; i < this->_core_dsp_output.size(); i++) { - if (this->_anti_pop_countdown >= this->_anti_pop_ramp) - break; - const float gain = std::max(slope * float(this->_anti_pop_countdown), 0.0f); - this->_core_dsp_output[i] *= gain; - this->_anti_pop_countdown++; - } -} - -void wavenet::WaveNet::_reset_anti_pop_() { - // You need the "real" receptive field, not the buffers. - long receptive_field = 1; - for (int i = 0; i < this->_layer_arrays.size(); i++) - receptive_field += this->_layer_arrays[i].get_receptive_field(); - this->_anti_pop_countdown = -receptive_field; -} diff --git a/src/wavenet.h b/src/wavenet.h deleted file mode 100644 index 7ae61e0..0000000 --- a/src/wavenet.h +++ /dev/null @@ -1,212 +0,0 @@ -#pragma once - -#include -#include - -#include "json.hpp" -#include - -#include "dsp.h" - -namespace wavenet { -// Rework the initialization API slightly. Merge w/ dsp.h later. -class _DilatedConv : public Conv1D { -public: - _DilatedConv(const int in_channels, const int out_channels, - const int kernel_size, const int bias, const int dilation); -}; - -class _Layer { -public: - _Layer(const int condition_size, const int channels, const int kernel_size, - const int dilation, const std::string activation, const bool gated) - : _activation(activation), _gated(gated), - _conv(channels, gated ? 2 * channels : channels, kernel_size, true, - dilation), - _input_mixin(condition_size, gated ? 2 * channels : channels, false), - _1x1(channels, channels, true){}; - void set_params_(std::vector::iterator ¶ms); - // :param `input`: from previous layer - // :param `output`: to next layer - void process_(const Eigen::MatrixXf &input, const Eigen::MatrixXf &condition, - Eigen::MatrixXf &head_input, Eigen::MatrixXf &output, - const long i_start, const long j_start); - void set_num_frames_(const long num_frames); - long get_channels() const { return this->_conv.get_in_channels(); }; - int get_dilation() const { return this->_conv.get_dilation(); }; - long get_kernel_size() const { return this->_conv.get_kernel_size(); }; - -private: - // The dilated convolution at the front of the block - _DilatedConv _conv; - // Input mixin - Conv1x1 _input_mixin; - // The post-activation 1x1 convolution - Conv1x1 _1x1; - // The internal state - Eigen::MatrixXf _z; - - const std::string _activation; - const bool _gated; -}; - -class LayerArrayParams { -public: - LayerArrayParams(const int input_size_, const int condition_size_, - const int head_size_, const int channels_, - const int kernel_size_, const std::vector &dilations_, - const std::string activation_, const bool gated_, - const bool head_bias_) - : input_size(input_size_), condition_size(condition_size_), - head_size(head_size_), channels(channels_), kernel_size(kernel_size_), - activation(activation_), gated(gated_), head_bias(head_bias_) { - for (int i = 0; i < dilations_.size(); i++) - this->dilations.push_back(dilations_[i]); - }; - - const int input_size; - const int condition_size; - const int head_size; - const int channels; - const int kernel_size; - std::vector dilations; - const std::string activation; - const bool gated; - const bool head_bias; -}; - -// An array of layers with the same channels, kernel sizes, activations. -class _LayerArray { -public: - _LayerArray(const int input_size, const int condition_size, - const int head_size, const int channels, const int kernel_size, - const std::vector &dilations, const std::string activation, - const bool gated, const bool head_bias); - - void advance_buffers_(const int num_frames); - - // Preparing for frames: - // Rewind buffers if needed - // Shift index to prepare - // - void prepare_for_frames_(const long num_frames); - - // All arrays are "short". - void process_(const Eigen::MatrixXf &layer_inputs, // Short - const Eigen::MatrixXf &condition, // Short - Eigen::MatrixXf &layer_outputs, // Short - Eigen::MatrixXf &head_inputs, // Sum up on this. - Eigen::MatrixXf &head_outputs // post head-rechannel - ); - void set_num_frames_(const long num_frames); - void set_params_(std::vector::iterator &it); - - // "Zero-indexed" receptive field. - // E.g. a 1x1 convolution has a z.i.r.f. of zero. - long get_receptive_field() const; - -private: - long _buffer_start; - // The rechannel before the layers - Conv1x1 _rechannel; - - // Buffers in between layers. - // buffer [i] is the input to layer [i]. - // the last layer outputs to a short array provided by outside. - std::vector _layer_buffers; - // The layer objects - std::vector<_Layer> _layers; - - // Rechannel for the head - Conv1x1 _head_rechannel; - - long _get_buffer_size() const { - return this->_layer_buffers.size() > 0 ? this->_layer_buffers[0].cols() : 0; - }; - long _get_channels() const; - // "One-indexed" receptive field - // TODO remove! - // E.g. a 1x1 convolution has a o.i.r.f. of one. - long _get_receptive_field() const; - void _rewind_buffers_(); -}; - -// The head module -// [Act->Conv] x L -class _Head { -public: - _Head(const int input_size, const int num_layers, const int channels, - const std::string activation); - void set_params_(std::vector::iterator ¶ms); - // NOTE: the head transforms the provided input by applying a nonlinearity - // to it in-place! - void process_(Eigen::MatrixXf &inputs, Eigen::MatrixXf &outputs); - void set_num_frames_(const long num_frames); - -private: - int _channels; - std::vector _layers; - Conv1x1 _head; - std::string _activation; - - // Stores the outputs of the convs *except* the last one, which goes in - // The array `outputs` provided to .process_() - std::vector _buffers; - - // Apply the activation to the provided array, in-place - void _apply_activation_(Eigen::MatrixXf &x); -}; - -// The main WaveNet model -// Both parametric and not; difference is handled at param read-in. -class WaveNet : public DSP { -public: - WaveNet(const std::vector &layer_array_params, - const float head_scale, const bool with_head, - nlohmann::json parametric, std::vector params); - - // WaveNet(WaveNet&&) = default; - // WaveNet& operator=(WaveNet&&) = default; - // ~WaveNet() = default; - - void finalize_(const int num_frames) override; - void set_params_(std::vector ¶ms); - -private: - long _num_frames; - std::vector<_LayerArray> _layer_arrays; - // Their outputs - std::vector _layer_array_outputs; - // Head _head; - - // Element-wise arrays: - Eigen::MatrixXf _condition; - // One more than total layer arrays - std::vector _head_arrays; - float _head_scale; - Eigen::MatrixXf _head_output; - - // Names of the params, sorted. - // TODO move this up, ugh. - std::vector _param_names; - - void _advance_buffers_(const int num_frames); - // Get the info from the parametric config - void _init_parametric_(nlohmann::json ¶metric); - void _prepare_for_frames_(const long num_frames); - // Reminder: From ._input_post_gain to ._core_dsp_output - void _process_core_() override; - - // Ensure that all buffer arrays are the right size for this num_frames - void _set_num_frames_(const long num_frames); - - // The net starts with random parameters inside; we need to wait for a full - // receptive field to pass through before we can count on the output being - // ok. This implements a gentle "ramp-up" so that there's no "pop" at the - // start. - long _anti_pop_countdown; - const long _anti_pop_ramp = 4000; - void _anti_pop_(); - void _reset_anti_pop_(); -}; -}; // namespace wavenet