#pragma once #include #include #include #include #include #include #include enum EArchitectures { kLinear = 0, kConvNet, kLSTM, kCatLSTM, kWaveNet, kCatWaveNet, kNumModels }; #define NAMSample float // Class for providing params from the plugin to the DSP module // For now, we'll work with doubles. Later, we'll add other types. class DSPParam { public: const char *name; const double val; }; // And the params shall be provided as a std::vector. class DSP { public: DSP(); // process() does all of the processing requried to take `inputs` array and // fill in the required values on `outputs`. // To do this: // 1. The parameters from the plugin (I/O levels and any other parametric // inputs) are gotten. // 2. The input level is applied // 3. The core DSP algorithm is run (This is what should probably be // overridden in subclasses). // 4. The output level is applied and the result stored to `output`. virtual void process(const NAMSample *input, NAMSample *output, const int num_frames, const double input_gain, const double output_gain, const std::unordered_map ¶ms); // Anything to take care of before next buffer comes in. // For example: // * Move the buffer index forward // * Does NOT say that params aren't stale; that's the job of the routine // that actually uses them, which varies depends on the particulars of the // DSP subclass implementation. virtual void finalize_(const int num_frames); protected: // Parameters (aka "knobs") std::unordered_map _params; // If the params have changed since the last buffer was processed: bool _stale_params; // Where to store the samples after applying input gain std::vector _input_post_gain; // Location for the output of the core DSP algorithm. std::vector _core_dsp_output; // Methods // Copy the parameters to the DSP module. // If anything has changed, then set this->_stale_params to true. // (TODO use "listener" approach) void _get_params_(const std::unordered_map &input_params); // Apply the input gain // Result populates this->_input_post_gain void _apply_input_level_(const NAMSample *input, const int num_frames, const double gain); // i.e. ensure the size is correct. void _ensure_core_dsp_output_ready_(); // The core of your DSP algorithm. // Access the inputs in this->_input_post_gain // Place the outputs in this->_core_dsp_output virtual void _process_core_(); // Copy this->_core_dsp_output to output and apply the output volume void _apply_output_level_(NAMSample *output, const int num_frames, const double gain); }; // Class where an input buffer is kept so that long-time effects can be // captured. (e.g. conv nets or impulse responses, where we need history that's // longer than the sample buffer that's coming in.) class Buffer : public DSP { public: Buffer(const int receptive_field); void finalize_(const int num_frames); protected: // Input buffer const int _input_buffer_channels = 1; // Mono int _receptive_field; // First location where we add new samples from the input long _input_buffer_offset; std::vector _input_buffer; std::vector _output_buffer; void _set_receptive_field(const int new_receptive_field, const int input_buffer_size); void _set_receptive_field(const int new_receptive_field); void _reset_input_buffer(); // Use this->_input_post_gain virtual void _update_buffers_(); virtual void _rewind_buffers_(); }; // Basic linear model (an IR!) class Linear : public Buffer { public: Linear(const int receptive_field, const bool _bias, const std::vector ¶ms); void _process_core_() override; protected: Eigen::VectorXf _weight; float _bias; }; // NN modules ================================================================= // Activations // In-place ReLU on (N,M) array void relu_(Eigen::MatrixXf &x, const long i_start, const long i_end, const long j_start, const long j_end); // Subset of the columns void relu_(Eigen::MatrixXf &x, const long j_start, const long j_end); void relu_(Eigen::MatrixXf &x); // In-place sigmoid void sigmoid_(Eigen::MatrixXf &x, const long i_start, const long i_end, const long j_start, const long j_end); void sigmoid_(Eigen::MatrixXf &x); // In-place Tanh on (N,M) array void tanh_(Eigen::MatrixXf& x); void tanh_(Eigen::MatrixXf &x, const long i_start, const long i_end, const long j_start, const long j_end); // Subset of the columns void tanh_cols_(Eigen::MatrixXf &x, const long j_start, const long j_end); class Conv1D { public: Conv1D() { this->_dilation = 1; }; void set_params_(std::vector::iterator ¶ms); void set_size_(const int in_channels, const int out_channels, const int kernel_size, const bool do_bias, const int _dilation); void set_size_and_params_(const int in_channels, const int out_channels, const int kernel_size, const int _dilation, const bool do_bias, std::vector::iterator ¶ms); // Process from input to output // Rightmost indices of input go from i_start to i_end, // Indices on output for from j_start (to j_start + i_end - i_start) void process_(const Eigen::MatrixXf &input, Eigen::MatrixXf &output, const long i_start, const long i_end, const long j_start) const; long get_in_channels() const { return this->_weight.size() > 0 ? this->_weight[0].cols() : 0; }; long get_kernel_size() const { return this->_weight.size(); }; long get_num_params() const; long get_out_channels() const { return this->_weight.size() > 0 ? this->_weight[0].rows() : 0; }; int get_dilation() const { return this->_dilation; }; private: // Gonna wing this... // conv[kernel](cout, cin) std::vector _weight; Eigen::VectorXf _bias; int _dilation; }; // Really just a linear layer class Conv1x1 { public: Conv1x1(const int in_channels, const int out_channels, const bool _bias); void set_params_(std::vector::iterator ¶ms); // :param input: (N,Cin) or (Cin,) // :return: (N,Cout) or (Cout,), respectively Eigen::MatrixXf process(const Eigen::MatrixXf &input) const; long get_out_channels() const { return this->_weight.rows(); }; private: Eigen::MatrixXf _weight; Eigen::VectorXf _bias; bool _do_bias; }; // ConvNet ==================================================================== namespace convnet { // Custom Conv that avoids re-computing on pieces of the input and trusts // that the corresponding outputs are where they need to be. // Beware: this is clever! // Batch normalization // In prod mode, so really just an elementwise affine layer. class BatchNorm { public: BatchNorm(){}; BatchNorm(const int dim, std::vector::iterator ¶ms); void process_(Eigen::MatrixXf &input, const long i_start, const long i_end) const; private: // TODO simplify to just ax+b // y = (x-m)/sqrt(v+eps) * w + bias // y = ax+b // a = w / sqrt(v+eps) // b = a * m + bias Eigen::VectorXf scale; Eigen::VectorXf loc; }; class ConvNetBlock { public: ConvNetBlock() { this->_batchnorm = false; }; void set_params_(const int in_channels, const int out_channels, const int _dilation, const bool batchnorm, const std::string activation, std::vector::iterator ¶ms); void process_(const Eigen::MatrixXf &input, Eigen::MatrixXf &output, const long i_start, const long i_end) const; long get_out_channels() const; Conv1D conv; private: BatchNorm batchnorm; bool _batchnorm; std::string activation; }; class _Head { public: _Head() { this->_bias = (float)0.0; }; _Head(const int channels, std::vector::iterator ¶ms); void process_(const Eigen::MatrixXf &input, Eigen::VectorXf &output, const long i_start, const long i_end) const; private: Eigen::VectorXf _weight; float _bias; }; class ConvNet : public Buffer { public: ConvNet(const int channels, const std::vector &dilations, const bool batchnorm, const std::string activation, std::vector ¶ms); protected: std::vector _blocks; std::vector _block_vals; Eigen::VectorXf _head_output; _Head _head; void _verify_params(const int channels, const std::vector &dilations, const bool batchnorm, const size_t actual_params); void _update_buffers_() override; void _rewind_buffers_() override; void _process_core_() override; // The net starts with random parameters inside; we need to wait for a full // receptive field to pass through before we can count on the output being // ok. This implements a gentle "ramp-up" so that there's no "pop" at the // start. long _anti_pop_countdown; const long _anti_pop_ramp = 100; void _anti_pop_(); void _reset_anti_pop_(); }; }; // namespace convnet // Utilities ================================================================== // Implemented in get_dsp.cpp // Verify that the config that we are building our model from is supported by // this plugin version. void verify_config_version(const std::string version); // Takes the model file and uses it to instantiate an instance of DSP. std::unique_ptr get_dsp(const std::filesystem::path model_file); // Legacy loader for directory-type DSPs std::unique_ptr get_dsp_legacy(const std::filesystem::path dirname); // Hard-coded model: std::unique_ptr get_hard_dsp(); // Version 2 DSP abstraction ================================================== namespace dsp { class Params {}; class DSP { public: DSP(); ~DSP(); // The main interface for processing audio. // The incoming audio is given as a raw pointer-to-pointers. // The indexing is [channel][frame]. // The output shall be a pointer-to-pointers of matching size. // This object instance will own the data referenced by the pointers and be // responsible for its allocation and deallocation. virtual float **Process(float **inputs, const size_t numChannels, const size_t numFrames) = 0; // Update the parameters of the DSP object according to the provided params. // Not declaring a pure virtual bc there's no concrete definition that can // use Params. // But, use this name :) // virtual void SetParams(Params* params) = 0; protected: // Methods // Allocate mOutputPointers. // Assumes it's already null (Use _DeallocateOutputPointers()). void _AllocateOutputPointers(const size_t numChannels); // Ensure mOutputPointers is freed. void _DeallocateOutputPointers(); size_t _GetNumChannels() const { return this->mOutputs.size(); }; size_t _GetNumFrames() const { return this->_GetNumChannels() > 0 ? this->mOutputs[0].size() : 0; } // Return a pointer-to-pointers for the DSP's output buffers (all channels) // Assumes that ._PrepareBuffers() was called recently enough. float **_GetPointers(); // Resize mOutputs to (numChannels, numFrames) and ensure that the raw // pointers are also keeping up. virtual void _PrepareBuffers(const size_t numChannels, const size_t numFrames); // Resize the pointer-to-pointers for the vector-of-vectors. void _ResizePointers(const size_t numChannels); // Attributes // The output array into which the DSP module's calculations will be written. // Pointers to this member's data will be returned by .Process(), and std // Will ensure proper allocation. std::vector> mOutputs; // A pointer to pointers of which copies will be given out as the output of // .Process(). This object will ensure proper allocation and deallocation of // the first level; The second level points to .data() from mOutputs. float **mOutputPointers; size_t mOutputPointersSize; }; // A class where a longer buffer of history is needed to correctly calculate // the DSP algorithm (e.g. algorithms involving convolution). // // Hacky stuff: // * Mono // * Single-precision floats. class History : public DSP { public: History(); protected: // Called at the end of the DSP, advance the hsitory index to the next open // spot. Does not ensure that it's at a valid address. void _AdvanceHistoryIndex(const size_t bufferSize); // Drop the new samples into the history array. // Manages history array size void _UpdateHistory(float **inputs, const size_t numChannels, const size_t numFrames); // The history array that's used for DSP calculations. std::vector mHistory; // How many samples previous are required. // Zero means that no history is required--only the current sample. size_t mHistoryRequired; // Location of the first sample in the current buffer. // Shall always be in the range [mHistoryRequired, mHistory.size()). size_t mHistoryIndex; private: // Make sure that the history array is long enough. void _EnsureHistorySize(const size_t bufferSize); // Copy the end of the history back to the fron and reset mHistoryIndex void _RewindHistory(); }; }; // namespace dsp