mirror of
https://github.com/mikeoliphant/neural-amp-modeler-lv2.git
synced 2026-05-06 19:50:11 +02:00
Delete local NAM code
This commit is contained in:
@@ -21,6 +21,8 @@ set(NAM_LV2_ID http://github.com/mikeoliphant/neural-amp-modeler-lv2)
|
|||||||
|
|
||||||
include_directories(SYSTEM eigen)
|
include_directories(SYSTEM eigen)
|
||||||
include_directories(SYSTEM lv2/include)
|
include_directories(SYSTEM lv2/include)
|
||||||
|
include_directories(SYSTEM NeuralAmpModelerCore/NAM)
|
||||||
|
include_directories(SYSTEM json)
|
||||||
|
|
||||||
add_subdirectory(src)
|
add_subdirectory(src)
|
||||||
|
|
||||||
|
|||||||
+1
-1
Submodule NeuralAmpModelerCore updated: 91a2bdb7a2...ccf5ffe767
+11
-8
@@ -2,14 +2,17 @@ add_library(neural_amp_modeler MODULE
|
|||||||
nam_lv2.cpp
|
nam_lv2.cpp
|
||||||
nam_plugin.cpp
|
nam_plugin.cpp
|
||||||
nam_plugin.h
|
nam_plugin.h
|
||||||
dsp.h
|
../NeuralAmpModelerCore/NAM/activations.h
|
||||||
dsp.cpp
|
../NeuralAmpModelerCore/NAM/version.h
|
||||||
get_dsp.cpp
|
../NeuralAmpModelerCore/NAM/lstm.h
|
||||||
util.cpp
|
../NeuralAmpModelerCore/NAM/lstm.cpp
|
||||||
util.h
|
../NeuralAmpModelerCore/NAM/dsp.h
|
||||||
wavenet.cpp
|
../NeuralAmpModelerCore/NAM/dsp.cpp
|
||||||
wavenet.h
|
../NeuralAmpModelerCore/NAM/get_dsp.cpp
|
||||||
json.hpp
|
../NeuralAmpModelerCore/NAM/util.cpp
|
||||||
|
../NeuralAmpModelerCore/NAM/util.h
|
||||||
|
../NeuralAmpModelerCore/NAM/wavenet.cpp
|
||||||
|
../NeuralAmpModelerCore/NAM/wavenet.h
|
||||||
)
|
)
|
||||||
|
|
||||||
target_compile_features(neural_amp_modeler PUBLIC cxx_std_17)
|
target_compile_features(neural_amp_modeler PUBLIC cxx_std_17)
|
||||||
|
|||||||
@@ -1,397 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <filesystem>
|
|
||||||
#include <iterator>
|
|
||||||
#include <memory>
|
|
||||||
#include <string>
|
|
||||||
#include <unordered_map>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include <Eigen/Dense>
|
|
||||||
|
|
||||||
enum EArchitectures {
|
|
||||||
kLinear = 0,
|
|
||||||
kConvNet,
|
|
||||||
kLSTM,
|
|
||||||
kCatLSTM,
|
|
||||||
kWaveNet,
|
|
||||||
kCatWaveNet,
|
|
||||||
kNumModels
|
|
||||||
};
|
|
||||||
|
|
||||||
#define NAMSample float
|
|
||||||
|
|
||||||
// Class for providing params from the plugin to the DSP module
|
|
||||||
// For now, we'll work with doubles. Later, we'll add other types.
|
|
||||||
class DSPParam {
|
|
||||||
public:
|
|
||||||
const char *name;
|
|
||||||
const double val;
|
|
||||||
};
|
|
||||||
// And the params shall be provided as a std::vector<DSPParam>.
|
|
||||||
|
|
||||||
class DSP {
|
|
||||||
public:
|
|
||||||
DSP();
|
|
||||||
// process() does all of the processing requried to take `inputs` array and
|
|
||||||
// fill in the required values on `outputs`.
|
|
||||||
// To do this:
|
|
||||||
// 1. The parameters from the plugin (I/O levels and any other parametric
|
|
||||||
// inputs) are gotten.
|
|
||||||
// 2. The input level is applied
|
|
||||||
// 3. The core DSP algorithm is run (This is what should probably be
|
|
||||||
// overridden in subclasses).
|
|
||||||
// 4. The output level is applied and the result stored to `output`.
|
|
||||||
virtual void process(const NAMSample *input, NAMSample *output, const int num_frames,
|
|
||||||
const double input_gain, const double output_gain,
|
|
||||||
const std::unordered_map<std::string, double> ¶ms);
|
|
||||||
// Anything to take care of before next buffer comes in.
|
|
||||||
// For example:
|
|
||||||
// * Move the buffer index forward
|
|
||||||
// * Does NOT say that params aren't stale; that's the job of the routine
|
|
||||||
// that actually uses them, which varies depends on the particulars of the
|
|
||||||
// DSP subclass implementation.
|
|
||||||
virtual void finalize_(const int num_frames);
|
|
||||||
|
|
||||||
protected:
|
|
||||||
// Parameters (aka "knobs")
|
|
||||||
std::unordered_map<std::string, double> _params;
|
|
||||||
// If the params have changed since the last buffer was processed:
|
|
||||||
bool _stale_params;
|
|
||||||
// Where to store the samples after applying input gain
|
|
||||||
std::vector<float> _input_post_gain;
|
|
||||||
// Location for the output of the core DSP algorithm.
|
|
||||||
std::vector<float> _core_dsp_output;
|
|
||||||
|
|
||||||
// Methods
|
|
||||||
|
|
||||||
// Copy the parameters to the DSP module.
|
|
||||||
// If anything has changed, then set this->_stale_params to true.
|
|
||||||
// (TODO use "listener" approach)
|
|
||||||
void
|
|
||||||
_get_params_(const std::unordered_map<std::string, double> &input_params);
|
|
||||||
|
|
||||||
// Apply the input gain
|
|
||||||
// Result populates this->_input_post_gain
|
|
||||||
void _apply_input_level_(const NAMSample *input, const int num_frames, const double gain);
|
|
||||||
|
|
||||||
// i.e. ensure the size is correct.
|
|
||||||
void _ensure_core_dsp_output_ready_();
|
|
||||||
|
|
||||||
// The core of your DSP algorithm.
|
|
||||||
// Access the inputs in this->_input_post_gain
|
|
||||||
// Place the outputs in this->_core_dsp_output
|
|
||||||
virtual void _process_core_();
|
|
||||||
|
|
||||||
// Copy this->_core_dsp_output to output and apply the output volume
|
|
||||||
void _apply_output_level_(NAMSample *output, const int num_frames, const double gain);
|
|
||||||
};
|
|
||||||
|
|
||||||
// Class where an input buffer is kept so that long-time effects can be
|
|
||||||
// captured. (e.g. conv nets or impulse responses, where we need history that's
|
|
||||||
// longer than the sample buffer that's coming in.)
|
|
||||||
class Buffer : public DSP {
|
|
||||||
public:
|
|
||||||
Buffer(const int receptive_field);
|
|
||||||
void finalize_(const int num_frames);
|
|
||||||
|
|
||||||
protected:
|
|
||||||
// Input buffer
|
|
||||||
const int _input_buffer_channels = 1; // Mono
|
|
||||||
int _receptive_field;
|
|
||||||
// First location where we add new samples from the input
|
|
||||||
long _input_buffer_offset;
|
|
||||||
std::vector<float> _input_buffer;
|
|
||||||
std::vector<float> _output_buffer;
|
|
||||||
|
|
||||||
void _set_receptive_field(const int new_receptive_field,
|
|
||||||
const int input_buffer_size);
|
|
||||||
void _set_receptive_field(const int new_receptive_field);
|
|
||||||
void _reset_input_buffer();
|
|
||||||
// Use this->_input_post_gain
|
|
||||||
virtual void _update_buffers_();
|
|
||||||
virtual void _rewind_buffers_();
|
|
||||||
};
|
|
||||||
|
|
||||||
// Basic linear model (an IR!)
|
|
||||||
class Linear : public Buffer {
|
|
||||||
public:
|
|
||||||
Linear(const int receptive_field, const bool _bias,
|
|
||||||
const std::vector<float> ¶ms);
|
|
||||||
void _process_core_() override;
|
|
||||||
|
|
||||||
protected:
|
|
||||||
Eigen::VectorXf _weight;
|
|
||||||
float _bias;
|
|
||||||
};
|
|
||||||
|
|
||||||
// NN modules =================================================================
|
|
||||||
|
|
||||||
// Activations
|
|
||||||
|
|
||||||
// In-place ReLU on (N,M) array
|
|
||||||
void relu_(Eigen::MatrixXf &x, const long i_start, const long i_end,
|
|
||||||
const long j_start, const long j_end);
|
|
||||||
// Subset of the columns
|
|
||||||
void relu_(Eigen::MatrixXf &x, const long j_start, const long j_end);
|
|
||||||
void relu_(Eigen::MatrixXf &x);
|
|
||||||
|
|
||||||
// In-place sigmoid
|
|
||||||
void sigmoid_(Eigen::MatrixXf &x, const long i_start, const long i_end,
|
|
||||||
const long j_start, const long j_end);
|
|
||||||
void sigmoid_(Eigen::MatrixXf &x);
|
|
||||||
|
|
||||||
// In-place Tanh on (N,M) array
|
|
||||||
void tanh_(Eigen::MatrixXf& x);
|
|
||||||
|
|
||||||
void tanh_(Eigen::MatrixXf &x, const long i_start, const long i_end,
|
|
||||||
const long j_start, const long j_end);
|
|
||||||
// Subset of the columns
|
|
||||||
void tanh_cols_(Eigen::MatrixXf &x, const long j_start, const long j_end);
|
|
||||||
|
|
||||||
class Conv1D {
|
|
||||||
public:
|
|
||||||
Conv1D() { this->_dilation = 1; };
|
|
||||||
void set_params_(std::vector<float>::iterator ¶ms);
|
|
||||||
void set_size_(const int in_channels, const int out_channels,
|
|
||||||
const int kernel_size, const bool do_bias,
|
|
||||||
const int _dilation);
|
|
||||||
void set_size_and_params_(const int in_channels, const int out_channels,
|
|
||||||
const int kernel_size, const int _dilation,
|
|
||||||
const bool do_bias,
|
|
||||||
std::vector<float>::iterator ¶ms);
|
|
||||||
// Process from input to output
|
|
||||||
// Rightmost indices of input go from i_start to i_end,
|
|
||||||
// Indices on output for from j_start (to j_start + i_end - i_start)
|
|
||||||
void process_(const Eigen::MatrixXf &input, Eigen::MatrixXf &output,
|
|
||||||
const long i_start, const long i_end, const long j_start) const;
|
|
||||||
long get_in_channels() const {
|
|
||||||
return this->_weight.size() > 0 ? this->_weight[0].cols() : 0;
|
|
||||||
};
|
|
||||||
long get_kernel_size() const { return this->_weight.size(); };
|
|
||||||
long get_num_params() const;
|
|
||||||
long get_out_channels() const {
|
|
||||||
return this->_weight.size() > 0 ? this->_weight[0].rows() : 0;
|
|
||||||
};
|
|
||||||
int get_dilation() const { return this->_dilation; };
|
|
||||||
|
|
||||||
private:
|
|
||||||
// Gonna wing this...
|
|
||||||
// conv[kernel](cout, cin)
|
|
||||||
std::vector<Eigen::MatrixXf> _weight;
|
|
||||||
Eigen::VectorXf _bias;
|
|
||||||
int _dilation;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Really just a linear layer
|
|
||||||
class Conv1x1 {
|
|
||||||
public:
|
|
||||||
Conv1x1(const int in_channels, const int out_channels, const bool _bias);
|
|
||||||
void set_params_(std::vector<float>::iterator ¶ms);
|
|
||||||
// :param input: (N,Cin) or (Cin,)
|
|
||||||
// :return: (N,Cout) or (Cout,), respectively
|
|
||||||
Eigen::MatrixXf process(const Eigen::MatrixXf &input) const;
|
|
||||||
|
|
||||||
long get_out_channels() const { return this->_weight.rows(); };
|
|
||||||
|
|
||||||
private:
|
|
||||||
Eigen::MatrixXf _weight;
|
|
||||||
Eigen::VectorXf _bias;
|
|
||||||
bool _do_bias;
|
|
||||||
};
|
|
||||||
|
|
||||||
// ConvNet ====================================================================
|
|
||||||
|
|
||||||
namespace convnet {
|
|
||||||
// Custom Conv that avoids re-computing on pieces of the input and trusts
|
|
||||||
// that the corresponding outputs are where they need to be.
|
|
||||||
// Beware: this is clever!
|
|
||||||
|
|
||||||
// Batch normalization
|
|
||||||
// In prod mode, so really just an elementwise affine layer.
|
|
||||||
class BatchNorm {
|
|
||||||
public:
|
|
||||||
BatchNorm(){};
|
|
||||||
BatchNorm(const int dim, std::vector<float>::iterator ¶ms);
|
|
||||||
void process_(Eigen::MatrixXf &input, const long i_start,
|
|
||||||
const long i_end) const;
|
|
||||||
|
|
||||||
private:
|
|
||||||
// TODO simplify to just ax+b
|
|
||||||
// y = (x-m)/sqrt(v+eps) * w + bias
|
|
||||||
// y = ax+b
|
|
||||||
// a = w / sqrt(v+eps)
|
|
||||||
// b = a * m + bias
|
|
||||||
Eigen::VectorXf scale;
|
|
||||||
Eigen::VectorXf loc;
|
|
||||||
};
|
|
||||||
|
|
||||||
class ConvNetBlock {
|
|
||||||
public:
|
|
||||||
ConvNetBlock() { this->_batchnorm = false; };
|
|
||||||
void set_params_(const int in_channels, const int out_channels,
|
|
||||||
const int _dilation, const bool batchnorm,
|
|
||||||
const std::string activation,
|
|
||||||
std::vector<float>::iterator ¶ms);
|
|
||||||
void process_(const Eigen::MatrixXf &input, Eigen::MatrixXf &output,
|
|
||||||
const long i_start, const long i_end) const;
|
|
||||||
long get_out_channels() const;
|
|
||||||
Conv1D conv;
|
|
||||||
|
|
||||||
private:
|
|
||||||
BatchNorm batchnorm;
|
|
||||||
bool _batchnorm;
|
|
||||||
std::string activation;
|
|
||||||
};
|
|
||||||
|
|
||||||
class _Head {
|
|
||||||
public:
|
|
||||||
_Head() { this->_bias = (float)0.0; };
|
|
||||||
_Head(const int channels, std::vector<float>::iterator ¶ms);
|
|
||||||
void process_(const Eigen::MatrixXf &input, Eigen::VectorXf &output,
|
|
||||||
const long i_start, const long i_end) const;
|
|
||||||
|
|
||||||
private:
|
|
||||||
Eigen::VectorXf _weight;
|
|
||||||
float _bias;
|
|
||||||
};
|
|
||||||
|
|
||||||
class ConvNet : public Buffer {
|
|
||||||
public:
|
|
||||||
ConvNet(const int channels, const std::vector<int> &dilations,
|
|
||||||
const bool batchnorm, const std::string activation,
|
|
||||||
std::vector<float> ¶ms);
|
|
||||||
|
|
||||||
protected:
|
|
||||||
std::vector<ConvNetBlock> _blocks;
|
|
||||||
std::vector<Eigen::MatrixXf> _block_vals;
|
|
||||||
Eigen::VectorXf _head_output;
|
|
||||||
_Head _head;
|
|
||||||
void _verify_params(const int channels, const std::vector<int> &dilations,
|
|
||||||
const bool batchnorm, const size_t actual_params);
|
|
||||||
void _update_buffers_() override;
|
|
||||||
void _rewind_buffers_() override;
|
|
||||||
|
|
||||||
void _process_core_() override;
|
|
||||||
|
|
||||||
// The net starts with random parameters inside; we need to wait for a full
|
|
||||||
// receptive field to pass through before we can count on the output being
|
|
||||||
// ok. This implements a gentle "ramp-up" so that there's no "pop" at the
|
|
||||||
// start.
|
|
||||||
long _anti_pop_countdown;
|
|
||||||
const long _anti_pop_ramp = 100;
|
|
||||||
void _anti_pop_();
|
|
||||||
void _reset_anti_pop_();
|
|
||||||
};
|
|
||||||
}; // namespace convnet
|
|
||||||
|
|
||||||
// Utilities ==================================================================
|
|
||||||
// Implemented in get_dsp.cpp
|
|
||||||
|
|
||||||
// Verify that the config that we are building our model from is supported by
|
|
||||||
// this plugin version.
|
|
||||||
void verify_config_version(const std::string version);
|
|
||||||
|
|
||||||
// Takes the model file and uses it to instantiate an instance of DSP.
|
|
||||||
std::unique_ptr<DSP> get_dsp(const std::filesystem::path model_file);
|
|
||||||
// Legacy loader for directory-type DSPs
|
|
||||||
std::unique_ptr<DSP> get_dsp_legacy(const std::filesystem::path dirname);
|
|
||||||
|
|
||||||
// Hard-coded model:
|
|
||||||
std::unique_ptr<DSP> get_hard_dsp();
|
|
||||||
|
|
||||||
// Version 2 DSP abstraction ==================================================
|
|
||||||
|
|
||||||
namespace dsp {
|
|
||||||
class Params {};
|
|
||||||
|
|
||||||
class DSP {
|
|
||||||
public:
|
|
||||||
DSP();
|
|
||||||
~DSP();
|
|
||||||
// The main interface for processing audio.
|
|
||||||
// The incoming audio is given as a raw pointer-to-pointers.
|
|
||||||
// The indexing is [channel][frame].
|
|
||||||
// The output shall be a pointer-to-pointers of matching size.
|
|
||||||
// This object instance will own the data referenced by the pointers and be
|
|
||||||
// responsible for its allocation and deallocation.
|
|
||||||
virtual float **Process(float **inputs,
|
|
||||||
const size_t numChannels,
|
|
||||||
const size_t numFrames) = 0;
|
|
||||||
// Update the parameters of the DSP object according to the provided params.
|
|
||||||
// Not declaring a pure virtual bc there's no concrete definition that can
|
|
||||||
// use Params.
|
|
||||||
// But, use this name :)
|
|
||||||
// virtual void SetParams(Params* params) = 0;
|
|
||||||
|
|
||||||
protected:
|
|
||||||
// Methods
|
|
||||||
|
|
||||||
// Allocate mOutputPointers.
|
|
||||||
// Assumes it's already null (Use _DeallocateOutputPointers()).
|
|
||||||
void _AllocateOutputPointers(const size_t numChannels);
|
|
||||||
// Ensure mOutputPointers is freed.
|
|
||||||
void _DeallocateOutputPointers();
|
|
||||||
|
|
||||||
size_t _GetNumChannels() const { return this->mOutputs.size(); };
|
|
||||||
size_t _GetNumFrames() const {
|
|
||||||
return this->_GetNumChannels() > 0 ? this->mOutputs[0].size() : 0;
|
|
||||||
}
|
|
||||||
// Return a pointer-to-pointers for the DSP's output buffers (all channels)
|
|
||||||
// Assumes that ._PrepareBuffers() was called recently enough.
|
|
||||||
float **_GetPointers();
|
|
||||||
// Resize mOutputs to (numChannels, numFrames) and ensure that the raw
|
|
||||||
// pointers are also keeping up.
|
|
||||||
virtual void _PrepareBuffers(const size_t numChannels,
|
|
||||||
const size_t numFrames);
|
|
||||||
// Resize the pointer-to-pointers for the vector-of-vectors.
|
|
||||||
void _ResizePointers(const size_t numChannels);
|
|
||||||
|
|
||||||
// Attributes
|
|
||||||
|
|
||||||
// The output array into which the DSP module's calculations will be written.
|
|
||||||
// Pointers to this member's data will be returned by .Process(), and std
|
|
||||||
// Will ensure proper allocation.
|
|
||||||
std::vector<std::vector<float>> mOutputs;
|
|
||||||
// A pointer to pointers of which copies will be given out as the output of
|
|
||||||
// .Process(). This object will ensure proper allocation and deallocation of
|
|
||||||
// the first level; The second level points to .data() from mOutputs.
|
|
||||||
float **mOutputPointers;
|
|
||||||
size_t mOutputPointersSize;
|
|
||||||
};
|
|
||||||
|
|
||||||
// A class where a longer buffer of history is needed to correctly calculate
|
|
||||||
// the DSP algorithm (e.g. algorithms involving convolution).
|
|
||||||
//
|
|
||||||
// Hacky stuff:
|
|
||||||
// * Mono
|
|
||||||
// * Single-precision floats.
|
|
||||||
class History : public DSP {
|
|
||||||
public:
|
|
||||||
History();
|
|
||||||
|
|
||||||
protected:
|
|
||||||
// Called at the end of the DSP, advance the hsitory index to the next open
|
|
||||||
// spot. Does not ensure that it's at a valid address.
|
|
||||||
void _AdvanceHistoryIndex(const size_t bufferSize);
|
|
||||||
// Drop the new samples into the history array.
|
|
||||||
// Manages history array size
|
|
||||||
void _UpdateHistory(float **inputs, const size_t numChannels,
|
|
||||||
const size_t numFrames);
|
|
||||||
|
|
||||||
// The history array that's used for DSP calculations.
|
|
||||||
std::vector<float> mHistory;
|
|
||||||
// How many samples previous are required.
|
|
||||||
// Zero means that no history is required--only the current sample.
|
|
||||||
size_t mHistoryRequired;
|
|
||||||
// Location of the first sample in the current buffer.
|
|
||||||
// Shall always be in the range [mHistoryRequired, mHistory.size()).
|
|
||||||
size_t mHistoryIndex;
|
|
||||||
|
|
||||||
private:
|
|
||||||
// Make sure that the history array is long enough.
|
|
||||||
void _EnsureHistorySize(const size_t bufferSize);
|
|
||||||
// Copy the end of the history back to the fron and reset mHistoryIndex
|
|
||||||
void _RewindHistory();
|
|
||||||
};
|
|
||||||
}; // namespace dsp
|
|
||||||
-117
@@ -1,117 +0,0 @@
|
|||||||
#include <fstream>
|
|
||||||
#include <unordered_set>
|
|
||||||
|
|
||||||
#include "json.hpp"
|
|
||||||
#include "dsp.h"
|
|
||||||
//#include "HardCodedModel.h"
|
|
||||||
//#include "lstm.h"
|
|
||||||
#include "wavenet.h"
|
|
||||||
|
|
||||||
void verify_config_version(const std::string version) {
|
|
||||||
const std::unordered_set<std::string> supported_versions({"0.5.0"});
|
|
||||||
if (supported_versions.find(version) == supported_versions.end()) {
|
|
||||||
std::stringstream ss;
|
|
||||||
ss << "Model config is an unsupported version " << version
|
|
||||||
<< ". Try either converting the model to a more recent version, or "
|
|
||||||
"update your version of the NAM plugin.";
|
|
||||||
throw std::runtime_error(ss.str());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<float> _get_weights(nlohmann::json const &j,
|
|
||||||
const std::filesystem::path config_path) {
|
|
||||||
if (j.find("weights") != j.end()) {
|
|
||||||
auto weight_list = j["weights"];
|
|
||||||
std::vector<float> weights;
|
|
||||||
for (auto it = weight_list.begin(); it != weight_list.end(); ++it)
|
|
||||||
weights.push_back(*it);
|
|
||||||
return weights;
|
|
||||||
} else
|
|
||||||
throw std::runtime_error("Corrupted model file is missing weights.");
|
|
||||||
}
|
|
||||||
|
|
||||||
std::unique_ptr<DSP> get_dsp_legacy(const std::filesystem::path model_dir) {
|
|
||||||
auto config_filename = model_dir / std::filesystem::path("config.json");
|
|
||||||
return get_dsp(config_filename);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::unique_ptr<DSP> get_dsp(const std::filesystem::path config_filename) {
|
|
||||||
if (!std::filesystem::exists(config_filename))
|
|
||||||
throw std::runtime_error("Config JSON doesn't exist!\n");
|
|
||||||
std::ifstream i(config_filename);
|
|
||||||
nlohmann::json j;
|
|
||||||
i >> j;
|
|
||||||
verify_config_version(j["version"]);
|
|
||||||
|
|
||||||
auto architecture = j["architecture"];
|
|
||||||
nlohmann::json config = j["config"];
|
|
||||||
std::vector<float> params = _get_weights(j, config_filename);
|
|
||||||
|
|
||||||
//if (architecture == "Linear") {
|
|
||||||
// const int receptive_field = config["receptive_field"];
|
|
||||||
// const bool _bias = config["bias"];
|
|
||||||
// return std::make_unique<Linear>(receptive_field, _bias, params);
|
|
||||||
//} else if (architecture == "ConvNet") {
|
|
||||||
// const int channels = config["channels"];
|
|
||||||
// const bool batchnorm = config["batchnorm"];
|
|
||||||
// std::vector<int> dilations;
|
|
||||||
// for (int i = 0; i < config["dilations"].size(); i++)
|
|
||||||
// dilations.push_back(config["dilations"][i]);
|
|
||||||
// const std::string activation = config["activation"];
|
|
||||||
// return std::make_unique<convnet::ConvNet>(channels, dilations, batchnorm,
|
|
||||||
// activation, params);
|
|
||||||
//} else if (architecture == "LSTM") {
|
|
||||||
// const int num_layers = config["num_layers"];
|
|
||||||
// const int input_size = config["input_size"];
|
|
||||||
// const int hidden_size = config["hidden_size"];
|
|
||||||
// auto json = nlohmann::json{};
|
|
||||||
// return std::make_unique<lstm::LSTM>(num_layers, input_size, hidden_size,
|
|
||||||
// params, json);
|
|
||||||
//} else if (architecture == "CatLSTM") {
|
|
||||||
// const int num_layers = config["num_layers"];
|
|
||||||
// const int input_size = config["input_size"];
|
|
||||||
// const int hidden_size = config["hidden_size"];
|
|
||||||
// return std::make_unique<lstm::LSTM>(num_layers, input_size, hidden_size,
|
|
||||||
// params, config["parametric"]);
|
|
||||||
//} else
|
|
||||||
|
|
||||||
if (architecture == "WaveNet" || architecture == "CatWaveNet") {
|
|
||||||
std::vector<wavenet::LayerArrayParams> layer_array_params;
|
|
||||||
for (int i = 0; i < config["layers"].size(); i++) {
|
|
||||||
nlohmann::json layer_config = config["layers"][i];
|
|
||||||
std::vector<int> dilations;
|
|
||||||
for (int j = 0; j < layer_config["dilations"].size(); j++)
|
|
||||||
dilations.push_back(layer_config["dilations"][j]);
|
|
||||||
layer_array_params.push_back(wavenet::LayerArrayParams(
|
|
||||||
layer_config["input_size"], layer_config["condition_size"],
|
|
||||||
layer_config["head_size"], layer_config["channels"],
|
|
||||||
layer_config["kernel_size"], dilations, layer_config["activation"],
|
|
||||||
layer_config["gated"], layer_config["head_bias"]));
|
|
||||||
}
|
|
||||||
const bool with_head = config["head"] == NULL;
|
|
||||||
const float head_scale = config["head_scale"];
|
|
||||||
// Solves compilation issue on macOS Error: No matching constructor for
|
|
||||||
// initialization of 'wavenet::WaveNet' Solution from
|
|
||||||
// https://stackoverflow.com/a/73956681/3768284
|
|
||||||
auto parametric_json =
|
|
||||||
architecture == "CatWaveNet" ? config["parametric"] : nlohmann::json{};
|
|
||||||
return std::make_unique<wavenet::WaveNet>(
|
|
||||||
layer_array_params, head_scale, with_head, parametric_json, params);
|
|
||||||
} else {
|
|
||||||
throw std::runtime_error("Unrecognized architecture");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//std::unique_ptr<DSP> get_hard_dsp() {
|
|
||||||
// // Values are defined in HardCodedModel.h
|
|
||||||
// verify_config_version(std::string(PYTHON_MODEL_VERSION));
|
|
||||||
//
|
|
||||||
// // Uncomment the line that corresponds to the model type that you're using.
|
|
||||||
//
|
|
||||||
// // return std::make_unique<convnet::ConvNet>(CHANNELS, DILATIONS, BATCHNORM,
|
|
||||||
// // ACTIVATION, PARAMS); return
|
|
||||||
// // std::make_unique<wavenet::WaveNet>(LAYER_ARRAY_PARAMS, HEAD_SCALE,
|
|
||||||
// // WITH_HEAD, PARAMETRIC, PARAMS);
|
|
||||||
// return std::make_unique<lstm::LSTM>(NUM_LAYERS, INPUT_SIZE, HIDDEN_SIZE,
|
|
||||||
// PARAMS, PARAMETRIC);
|
|
||||||
//}
|
|
||||||
+16
-6
@@ -64,7 +64,11 @@ namespace NAM {
|
|||||||
|
|
||||||
auto nam = static_cast<NAM::Plugin*>(instance);
|
auto nam = static_cast<NAM::Plugin*>(instance);
|
||||||
|
|
||||||
//nam->currentModel = get_dsp("C://Users//oliph//AppData//Roaming//GuitarSim//NAM//JCM2000Crunch.nam");
|
// If we had a previous model, delete it
|
||||||
|
if (nam->deleteModel)
|
||||||
|
{
|
||||||
|
nam->deleteModel.reset();
|
||||||
|
}
|
||||||
|
|
||||||
nam->stagedModel = get_dsp(msg->path);
|
nam->stagedModel = get_dsp(msg->path);
|
||||||
|
|
||||||
@@ -93,8 +97,9 @@ namespace NAM {
|
|||||||
{
|
{
|
||||||
auto nam = static_cast<NAM::Plugin*>(instance);
|
auto nam = static_cast<NAM::Plugin*>(instance);
|
||||||
|
|
||||||
nam->currentModel = std::move(nam->stagedModel);
|
std::swap(nam->currentModel, nam->stagedModel);
|
||||||
nam->stagedModel = nullptr;
|
|
||||||
|
nam->deleteModel = std::move(nam->stagedModel);
|
||||||
|
|
||||||
return LV2_WORKER_SUCCESS;
|
return LV2_WORKER_SUCCESS;
|
||||||
}
|
}
|
||||||
@@ -147,12 +152,15 @@ namespace NAM {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (dblData.size() != n_samples)
|
||||||
|
dblData.resize(n_samples);
|
||||||
|
|
||||||
float inputLevel = pow(10, *(ports.input_level) * 0.05);
|
float inputLevel = pow(10, *(ports.input_level) * 0.05);
|
||||||
float outputLevel = pow(10, *(ports.output_level) * 0.05);
|
float outputLevel = pow(10, *(ports.output_level) * 0.05);
|
||||||
|
|
||||||
for (unsigned int i = 0; i < n_samples; i++)
|
for (unsigned int i = 0; i < n_samples; i++)
|
||||||
{
|
{
|
||||||
ports.audio_out[i] = ports.audio_in[i] * inputLevel;
|
dblData[i] = ports.audio_in[i] * inputLevel;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (currentModel == nullptr)
|
if (currentModel == nullptr)
|
||||||
@@ -160,13 +168,15 @@ namespace NAM {
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
currentModel->process(ports.audio_out, ports.audio_out, n_samples, 1.0, 1.0, mNAMParams);
|
double* data = dblData.data();
|
||||||
|
|
||||||
|
currentModel->process(&data, &data, 1, n_samples, 1.0, 1.0, mNAMParams);
|
||||||
currentModel->finalize_(n_samples);
|
currentModel->finalize_(n_samples);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned int i = 0; i < n_samples; i++)
|
for (unsigned int i = 0; i < n_samples; i++)
|
||||||
{
|
{
|
||||||
ports.audio_out[i] *= outputLevel;
|
ports.audio_out[i] = dblData[i] * outputLevel;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
+3
-1
@@ -52,10 +52,10 @@ namespace NAM {
|
|||||||
|
|
||||||
std::unique_ptr<::DSP> currentModel;
|
std::unique_ptr<::DSP> currentModel;
|
||||||
std::unique_ptr<::DSP> stagedModel;
|
std::unique_ptr<::DSP> stagedModel;
|
||||||
|
std::unique_ptr<::DSP> deleteModel;
|
||||||
|
|
||||||
std::unordered_map<std::string, double> mNAMParams = {};
|
std::unordered_map<std::string, double> mNAMParams = {};
|
||||||
|
|
||||||
|
|
||||||
Plugin();
|
Plugin();
|
||||||
~Plugin() = default;
|
~Plugin() = default;
|
||||||
|
|
||||||
@@ -83,6 +83,8 @@ namespace NAM {
|
|||||||
URIs uris = {};
|
URIs uris = {};
|
||||||
LV2_Atom_Forge atom_forge = {};
|
LV2_Atom_Forge atom_forge = {};
|
||||||
|
|
||||||
|
std::vector<double> dblData;
|
||||||
|
|
||||||
float m_rate;
|
float m_rate;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,11 +0,0 @@
|
|||||||
#include <algorithm>
|
|
||||||
#include <cctype>
|
|
||||||
|
|
||||||
#include "util.h"
|
|
||||||
|
|
||||||
std::string util::lowercase(const std::string &s) {
|
|
||||||
std::string out(s);
|
|
||||||
std::transform(s.begin(), s.end(), out.begin(),
|
|
||||||
[](unsigned char c) { return std::tolower(c); });
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
// Utilities
|
|
||||||
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
namespace util {
|
|
||||||
std::string lowercase(const std::string &s);
|
|
||||||
}; // namespace util
|
|
||||||
-400
@@ -1,400 +0,0 @@
|
|||||||
#include <algorithm>
|
|
||||||
#include <iostream>
|
|
||||||
#include <math.h>
|
|
||||||
|
|
||||||
#include <Eigen/Dense>
|
|
||||||
|
|
||||||
#include "wavenet.h"
|
|
||||||
|
|
||||||
wavenet::_DilatedConv::_DilatedConv(const int in_channels,
|
|
||||||
const int out_channels,
|
|
||||||
const int kernel_size, const int bias,
|
|
||||||
const int dilation) {
|
|
||||||
this->set_size_(in_channels, out_channels, kernel_size, bias, dilation);
|
|
||||||
}
|
|
||||||
|
|
||||||
void wavenet::_Layer::set_params_(std::vector<float>::iterator ¶ms) {
|
|
||||||
this->_conv.set_params_(params);
|
|
||||||
this->_input_mixin.set_params_(params);
|
|
||||||
this->_1x1.set_params_(params);
|
|
||||||
}
|
|
||||||
|
|
||||||
void wavenet::_Layer::process_(const Eigen::MatrixXf &input,
|
|
||||||
const Eigen::MatrixXf &condition,
|
|
||||||
Eigen::MatrixXf &head_input,
|
|
||||||
Eigen::MatrixXf &output, const long i_start,
|
|
||||||
const long j_start) {
|
|
||||||
const long ncols = condition.cols();
|
|
||||||
const long channels = this->get_channels();
|
|
||||||
// Input dilated conv
|
|
||||||
this->_conv.process_(input, this->_z, i_start, ncols, 0);
|
|
||||||
|
|
||||||
// Mix-in condition
|
|
||||||
this->_z.noalias() += this->_input_mixin.process(condition);
|
|
||||||
if (this->_activation == "Tanh")
|
|
||||||
tanh_(this->_z);
|
|
||||||
else if (this->_activation == "ReLU")
|
|
||||||
relu_(this->_z, 0, channels, 0, this->_z.cols());
|
|
||||||
else
|
|
||||||
throw std::runtime_error("Unrecognized activation.");
|
|
||||||
if (this->_gated) {
|
|
||||||
sigmoid_(this->_z, channels, 2 * channels, 0, this->_z.cols());
|
|
||||||
|
|
||||||
this->_z.topRows(channels).array() *= this->_z.bottomRows(channels).array();
|
|
||||||
|
|
||||||
// this->_z.topRows(channels) = this->_z.topRows(channels).cwiseProduct(
|
|
||||||
// this->_z.bottomRows(channels)
|
|
||||||
// );
|
|
||||||
}
|
|
||||||
|
|
||||||
head_input.noalias() += this->_z.topRows(channels);
|
|
||||||
output.middleCols(j_start, ncols).noalias() =
|
|
||||||
input.middleCols(i_start, ncols) +
|
|
||||||
this->_1x1.process(this->_z.topRows(channels));
|
|
||||||
}
|
|
||||||
|
|
||||||
void wavenet::_Layer::set_num_frames_(const long num_frames) {
|
|
||||||
this->_z.resize(this->_conv.get_out_channels(), num_frames);
|
|
||||||
}
|
|
||||||
|
|
||||||
// LayerArray =================================================================
|
|
||||||
|
|
||||||
#define LAYER_ARRAY_BUFFER_SIZE 65536
|
|
||||||
|
|
||||||
wavenet::_LayerArray::_LayerArray(const int input_size,
|
|
||||||
const int condition_size, const int head_size,
|
|
||||||
const int channels, const int kernel_size,
|
|
||||||
const std::vector<int> &dilations,
|
|
||||||
const std::string activation,
|
|
||||||
const bool gated, const bool head_bias)
|
|
||||||
: _rechannel(input_size, channels, false),
|
|
||||||
_head_rechannel(channels, head_size, head_bias) {
|
|
||||||
for (int i = 0; i < dilations.size(); i++)
|
|
||||||
this->_layers.push_back(_Layer(condition_size, channels, kernel_size,
|
|
||||||
dilations[i], activation, gated));
|
|
||||||
const long receptive_field = this->_get_receptive_field();
|
|
||||||
for (int i = 0; i < dilations.size(); i++) {
|
|
||||||
this->_layer_buffers.push_back(Eigen::MatrixXf(
|
|
||||||
channels, LAYER_ARRAY_BUFFER_SIZE + receptive_field - 1));
|
|
||||||
this->_layer_buffers[i].setZero();
|
|
||||||
}
|
|
||||||
this->_buffer_start = this->_get_receptive_field() - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
void wavenet::_LayerArray::advance_buffers_(const int num_frames) {
|
|
||||||
this->_buffer_start += num_frames;
|
|
||||||
}
|
|
||||||
|
|
||||||
long wavenet::_LayerArray::get_receptive_field() const {
|
|
||||||
long result = 0;
|
|
||||||
for (int i = 0; i < this->_layers.size(); i++)
|
|
||||||
result += this->_layers[i].get_dilation() *
|
|
||||||
(this->_layers[i].get_kernel_size() - 1);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
void wavenet::_LayerArray::prepare_for_frames_(const long num_frames) {
|
|
||||||
// Example:
|
|
||||||
// _buffer_start = 0
|
|
||||||
// num_frames = 64
|
|
||||||
// buffer_size = 64
|
|
||||||
// -> this will write on indices 0 through 63, inclusive.
|
|
||||||
// -> No illegal writes.
|
|
||||||
// -> no rewind needed.
|
|
||||||
if (this->_buffer_start + num_frames > this->_get_buffer_size())
|
|
||||||
this->_rewind_buffers_();
|
|
||||||
}
|
|
||||||
|
|
||||||
void wavenet::_LayerArray::process_(const Eigen::MatrixXf &layer_inputs,
|
|
||||||
const Eigen::MatrixXf &condition,
|
|
||||||
Eigen::MatrixXf &head_inputs,
|
|
||||||
Eigen::MatrixXf &layer_outputs,
|
|
||||||
Eigen::MatrixXf &head_outputs) {
|
|
||||||
this->_layer_buffers[0].middleCols(this->_buffer_start, layer_inputs.cols()) =
|
|
||||||
this->_rechannel.process(layer_inputs);
|
|
||||||
const long last_layer = this->_layers.size() - 1;
|
|
||||||
for (auto i = 0; i < this->_layers.size(); i++) {
|
|
||||||
this->_layers[i].process_(
|
|
||||||
this->_layer_buffers[i], condition, head_inputs,
|
|
||||||
i == last_layer ? layer_outputs : this->_layer_buffers[i + 1],
|
|
||||||
this->_buffer_start, i == last_layer ? 0 : this->_buffer_start);
|
|
||||||
}
|
|
||||||
head_outputs = this->_head_rechannel.process(head_inputs);
|
|
||||||
}
|
|
||||||
|
|
||||||
void wavenet::_LayerArray::set_num_frames_(const long num_frames) {
|
|
||||||
// Wavenet checks for unchanged num_frames; if we made it here, there's
|
|
||||||
// something to do.
|
|
||||||
if (LAYER_ARRAY_BUFFER_SIZE - num_frames < this->_get_receptive_field()) {
|
|
||||||
std::stringstream ss;
|
|
||||||
ss << "Asked to accept a buffer of " << num_frames
|
|
||||||
<< " samples, but the buffer is too short (" << LAYER_ARRAY_BUFFER_SIZE
|
|
||||||
<< ") to get out of the recptive field (" << this->_get_receptive_field()
|
|
||||||
<< "); copy errors could occur!\n";
|
|
||||||
throw std::runtime_error(ss.str().c_str());
|
|
||||||
}
|
|
||||||
for (int i = 0; i < this->_layers.size(); i++)
|
|
||||||
this->_layers[i].set_num_frames_(num_frames);
|
|
||||||
}
|
|
||||||
|
|
||||||
void wavenet::_LayerArray::set_params_(std::vector<float>::iterator ¶ms) {
|
|
||||||
this->_rechannel.set_params_(params);
|
|
||||||
for (int i = 0; i < this->_layers.size(); i++)
|
|
||||||
this->_layers[i].set_params_(params);
|
|
||||||
this->_head_rechannel.set_params_(params);
|
|
||||||
}
|
|
||||||
|
|
||||||
long wavenet::_LayerArray::_get_channels() const {
|
|
||||||
return this->_layers.size() > 0 ? this->_layers[0].get_channels() : 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
long wavenet::_LayerArray::_get_receptive_field() const {
|
|
||||||
// TODO remove this and use get_receptive_field() instead!
|
|
||||||
long res = 1;
|
|
||||||
for (int i = 0; i < this->_layers.size(); i++)
|
|
||||||
res += (this->_layers[i].get_kernel_size() - 1) *
|
|
||||||
this->_layers[i].get_dilation();
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
void wavenet::_LayerArray::_rewind_buffers_()
|
|
||||||
// Consider wrapping instead...
|
|
||||||
// Can make this smaller--largest dilation, not receptive field!
|
|
||||||
{
|
|
||||||
const long start = this->_get_receptive_field() - 1;
|
|
||||||
for (int i = 0; i < this->_layer_buffers.size(); i++) {
|
|
||||||
const long d = (this->_layers[i].get_kernel_size() - 1) *
|
|
||||||
this->_layers[i].get_dilation();
|
|
||||||
this->_layer_buffers[i].middleCols(start - d, d) =
|
|
||||||
this->_layer_buffers[i].middleCols(this->_buffer_start - d, d);
|
|
||||||
}
|
|
||||||
this->_buffer_start = start;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Head =======================================================================
|
|
||||||
|
|
||||||
wavenet::_Head::_Head(const int input_size, const int num_layers,
|
|
||||||
const int channels, const std::string activation)
|
|
||||||
: _channels(channels), _activation(activation),
|
|
||||||
_head(num_layers > 0 ? channels : input_size, 1, true) {
|
|
||||||
assert(num_layers > 0);
|
|
||||||
int dx = input_size;
|
|
||||||
for (int i = 0; i < num_layers; i++) {
|
|
||||||
this->_layers.push_back(
|
|
||||||
Conv1x1(dx, i == num_layers - 1 ? 1 : channels, true));
|
|
||||||
dx = channels;
|
|
||||||
if (i < num_layers - 1)
|
|
||||||
this->_buffers.push_back(Eigen::MatrixXf());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void wavenet::_Head::set_params_(std::vector<float>::iterator ¶ms) {
|
|
||||||
for (int i = 0; i < this->_layers.size(); i++)
|
|
||||||
this->_layers[i].set_params_(params);
|
|
||||||
}
|
|
||||||
|
|
||||||
void wavenet::_Head::process_(Eigen::MatrixXf &inputs,
|
|
||||||
Eigen::MatrixXf &outputs) {
|
|
||||||
const size_t num_layers = this->_layers.size();
|
|
||||||
this->_apply_activation_(inputs);
|
|
||||||
if (num_layers == 1)
|
|
||||||
outputs = this->_layers[0].process(inputs);
|
|
||||||
else {
|
|
||||||
this->_buffers[0] = this->_layers[0].process(inputs);
|
|
||||||
for (int i = 1; i < num_layers; i++) { // Asserted > 0 layers
|
|
||||||
this->_apply_activation_(this->_buffers[i - 1]);
|
|
||||||
if (i < num_layers - 1)
|
|
||||||
this->_buffers[i] = this->_layers[i].process(this->_buffers[i - 1]);
|
|
||||||
else
|
|
||||||
outputs = this->_layers[i].process(this->_buffers[i - 1]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void wavenet::_Head::set_num_frames_(const long num_frames) {
|
|
||||||
for (int i = 0; i < this->_buffers.size(); i++)
|
|
||||||
this->_buffers[i].resize(this->_channels, num_frames);
|
|
||||||
}
|
|
||||||
|
|
||||||
void wavenet::_Head::_apply_activation_(Eigen::MatrixXf &x) {
|
|
||||||
if (this->_activation == "Tanh")
|
|
||||||
tanh_(x);
|
|
||||||
else if (this->_activation == "ReLU")
|
|
||||||
relu_(x);
|
|
||||||
else
|
|
||||||
throw std::runtime_error("Unrecognized activation.");
|
|
||||||
}
|
|
||||||
|
|
||||||
// WaveNet ====================================================================
|
|
||||||
|
|
||||||
wavenet::WaveNet::WaveNet(
|
|
||||||
const std::vector<wavenet::LayerArrayParams> &layer_array_params,
|
|
||||||
const float head_scale, const bool with_head, nlohmann::json parametric,
|
|
||||||
std::vector<float> params)
|
|
||||||
: //_head(channels, head_layers, head_channels, head_activation),
|
|
||||||
_num_frames(0), _head_scale(head_scale) {
|
|
||||||
if (with_head)
|
|
||||||
throw std::runtime_error("Head not implemented!");
|
|
||||||
this->_init_parametric_(parametric);
|
|
||||||
for (int i = 0; i < layer_array_params.size(); i++) {
|
|
||||||
this->_layer_arrays.push_back(wavenet::_LayerArray(
|
|
||||||
layer_array_params[i].input_size, layer_array_params[i].condition_size,
|
|
||||||
layer_array_params[i].head_size, layer_array_params[i].channels,
|
|
||||||
layer_array_params[i].kernel_size, layer_array_params[i].dilations,
|
|
||||||
layer_array_params[i].activation, layer_array_params[i].gated,
|
|
||||||
layer_array_params[i].head_bias));
|
|
||||||
this->_layer_array_outputs.push_back(
|
|
||||||
Eigen::MatrixXf(layer_array_params[i].channels, 0));
|
|
||||||
if (i == 0)
|
|
||||||
this->_head_arrays.push_back(
|
|
||||||
Eigen::MatrixXf(layer_array_params[i].channels, 0));
|
|
||||||
if (i > 0)
|
|
||||||
if (layer_array_params[i].channels !=
|
|
||||||
layer_array_params[i - 1].head_size) {
|
|
||||||
std::stringstream ss;
|
|
||||||
ss << "channels of layer " << i << " ("
|
|
||||||
<< layer_array_params[i].channels
|
|
||||||
<< ") doesn't match head_size of preceding layer ("
|
|
||||||
<< layer_array_params[i - 1].head_size << "!\n";
|
|
||||||
throw std::runtime_error(ss.str().c_str());
|
|
||||||
}
|
|
||||||
this->_head_arrays.push_back(
|
|
||||||
Eigen::MatrixXf(layer_array_params[i].head_size, 0));
|
|
||||||
}
|
|
||||||
this->_head_output.resize(1, 0); // Mono output!
|
|
||||||
this->set_params_(params);
|
|
||||||
this->_reset_anti_pop_();
|
|
||||||
}
|
|
||||||
|
|
||||||
void wavenet::WaveNet::finalize_(const int num_frames) {
|
|
||||||
this->DSP::finalize_(num_frames);
|
|
||||||
this->_advance_buffers_(num_frames);
|
|
||||||
}
|
|
||||||
|
|
||||||
void wavenet::WaveNet::set_params_(std::vector<float> ¶ms) {
|
|
||||||
std::vector<float>::iterator it = params.begin();
|
|
||||||
for (int i = 0; i < this->_layer_arrays.size(); i++)
|
|
||||||
this->_layer_arrays[i].set_params_(it);
|
|
||||||
// this->_head.set_params_(it);
|
|
||||||
this->_head_scale = *(it++);
|
|
||||||
if (it != params.end()) {
|
|
||||||
std::stringstream ss;
|
|
||||||
for (int i = 0; i < params.size(); i++)
|
|
||||||
if (params[i] == *it) {
|
|
||||||
ss << "Parameter mismatch: assigned " << i + 1 << " parameters, but "
|
|
||||||
<< params.size() << " were provided.";
|
|
||||||
throw std::runtime_error(ss.str().c_str());
|
|
||||||
}
|
|
||||||
ss << "Parameter mismatch: provided " << params.size()
|
|
||||||
<< " weights, but the model expects more.";
|
|
||||||
throw std::runtime_error(ss.str().c_str());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void wavenet::WaveNet::_advance_buffers_(const int num_frames) {
|
|
||||||
for (int i = 0; i < this->_layer_arrays.size(); i++)
|
|
||||||
this->_layer_arrays[i].advance_buffers_(num_frames);
|
|
||||||
}
|
|
||||||
|
|
||||||
void wavenet::WaveNet::_init_parametric_(nlohmann::json ¶metric) {
|
|
||||||
for (nlohmann::json::iterator it = parametric.begin(); it != parametric.end();
|
|
||||||
++it)
|
|
||||||
this->_param_names.push_back(it.key());
|
|
||||||
// TODO assert continuous 0 to 1
|
|
||||||
std::sort(this->_param_names.begin(), this->_param_names.end());
|
|
||||||
}
|
|
||||||
|
|
||||||
void wavenet::WaveNet::_prepare_for_frames_(const long num_frames) {
|
|
||||||
for (auto i = 0; i < this->_layer_arrays.size(); i++)
|
|
||||||
this->_layer_arrays[i].prepare_for_frames_(num_frames);
|
|
||||||
}
|
|
||||||
|
|
||||||
void wavenet::WaveNet::_process_core_() {
|
|
||||||
const long num_frames = this->_input_post_gain.size();
|
|
||||||
this->_set_num_frames_(num_frames);
|
|
||||||
this->_prepare_for_frames_(num_frames);
|
|
||||||
|
|
||||||
// NOTE: During warm-up, weird things can happen that NaN out the layers.
|
|
||||||
// We could solve this by anti-popping the *input*. But, it's easier to check
|
|
||||||
// the outputs for NaNs and zero them out.
|
|
||||||
// They'll flush out eventually because the model doesn't use any feedback.
|
|
||||||
|
|
||||||
// Fill into condition array:
|
|
||||||
// Clumsy...
|
|
||||||
for (int j = 0; j < num_frames; j++) {
|
|
||||||
this->_condition(0, j) = this->_input_post_gain[j];
|
|
||||||
if (this->_stale_params) // Column-major assignment; good for Eigen. Let the
|
|
||||||
// compiler optimize this.
|
|
||||||
for (int i = 0; i < this->_param_names.size(); i++)
|
|
||||||
this->_condition(i + 1, j) =
|
|
||||||
(float)this->_params[this->_param_names[i]];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Main layer arrays:
|
|
||||||
// Layer-to-layer
|
|
||||||
// Sum on head output
|
|
||||||
this->_head_arrays[0].setZero();
|
|
||||||
for (int i = 0; i < this->_layer_arrays.size(); i++)
|
|
||||||
this->_layer_arrays[i].process_(
|
|
||||||
i == 0 ? this->_condition : this->_layer_array_outputs[i - 1],
|
|
||||||
this->_condition, this->_head_arrays[i], this->_layer_array_outputs[i],
|
|
||||||
this->_head_arrays[i + 1]);
|
|
||||||
// this->_head.process_(
|
|
||||||
// this->_head_input,
|
|
||||||
// this->_head_output
|
|
||||||
//);
|
|
||||||
// Copy to required output array
|
|
||||||
// Hack: apply head scale here; revisit when/if I activate the head.
|
|
||||||
// assert(this->_head_output.rows() == 1);
|
|
||||||
|
|
||||||
const long final_head_array = this->_head_arrays.size() - 1;
|
|
||||||
assert(this->_head_arrays[final_head_array].rows() == 1);
|
|
||||||
for (int s = 0; s < num_frames; s++) {
|
|
||||||
float out = this->_head_scale * this->_head_arrays[final_head_array](0, s);
|
|
||||||
// This is the NaN check that we could fix with anti-popping the input
|
|
||||||
if (isnan(out))
|
|
||||||
out = 0.0;
|
|
||||||
this->_core_dsp_output[s] = out;
|
|
||||||
}
|
|
||||||
// Apply anti-pop
|
|
||||||
this->_anti_pop_();
|
|
||||||
}
|
|
||||||
|
|
||||||
void wavenet::WaveNet::_set_num_frames_(const long num_frames) {
|
|
||||||
if (num_frames == this->_num_frames)
|
|
||||||
return;
|
|
||||||
|
|
||||||
this->_condition.resize(1 + this->_param_names.size(), num_frames);
|
|
||||||
for (int i = 0; i < this->_head_arrays.size(); i++)
|
|
||||||
this->_head_arrays[i].resize(this->_head_arrays[i].rows(), num_frames);
|
|
||||||
for (int i = 0; i < this->_layer_array_outputs.size(); i++)
|
|
||||||
this->_layer_array_outputs[i].resize(this->_layer_array_outputs[i].rows(),
|
|
||||||
num_frames);
|
|
||||||
this->_head_output.resize(this->_head_output.rows(), num_frames);
|
|
||||||
|
|
||||||
for (int i = 0; i < this->_layer_arrays.size(); i++)
|
|
||||||
this->_layer_arrays[i].set_num_frames_(num_frames);
|
|
||||||
// this->_head.set_num_frames_(num_frames);
|
|
||||||
this->_num_frames = num_frames;
|
|
||||||
}
|
|
||||||
|
|
||||||
void wavenet::WaveNet::_anti_pop_() {
|
|
||||||
if (this->_anti_pop_countdown >= this->_anti_pop_ramp)
|
|
||||||
return;
|
|
||||||
const float slope = 1.0f / float(this->_anti_pop_ramp);
|
|
||||||
for (int i = 0; i < this->_core_dsp_output.size(); i++) {
|
|
||||||
if (this->_anti_pop_countdown >= this->_anti_pop_ramp)
|
|
||||||
break;
|
|
||||||
const float gain = std::max(slope * float(this->_anti_pop_countdown), 0.0f);
|
|
||||||
this->_core_dsp_output[i] *= gain;
|
|
||||||
this->_anti_pop_countdown++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void wavenet::WaveNet::_reset_anti_pop_() {
|
|
||||||
// You need the "real" receptive field, not the buffers.
|
|
||||||
long receptive_field = 1;
|
|
||||||
for (int i = 0; i < this->_layer_arrays.size(); i++)
|
|
||||||
receptive_field += this->_layer_arrays[i].get_receptive_field();
|
|
||||||
this->_anti_pop_countdown = -receptive_field;
|
|
||||||
}
|
|
||||||
-212
@@ -1,212 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "json.hpp"
|
|
||||||
#include <Eigen/Dense>
|
|
||||||
|
|
||||||
#include "dsp.h"
|
|
||||||
|
|
||||||
namespace wavenet {
|
|
||||||
// Rework the initialization API slightly. Merge w/ dsp.h later.
|
|
||||||
class _DilatedConv : public Conv1D {
|
|
||||||
public:
|
|
||||||
_DilatedConv(const int in_channels, const int out_channels,
|
|
||||||
const int kernel_size, const int bias, const int dilation);
|
|
||||||
};
|
|
||||||
|
|
||||||
class _Layer {
|
|
||||||
public:
|
|
||||||
_Layer(const int condition_size, const int channels, const int kernel_size,
|
|
||||||
const int dilation, const std::string activation, const bool gated)
|
|
||||||
: _activation(activation), _gated(gated),
|
|
||||||
_conv(channels, gated ? 2 * channels : channels, kernel_size, true,
|
|
||||||
dilation),
|
|
||||||
_input_mixin(condition_size, gated ? 2 * channels : channels, false),
|
|
||||||
_1x1(channels, channels, true){};
|
|
||||||
void set_params_(std::vector<float>::iterator ¶ms);
|
|
||||||
// :param `input`: from previous layer
|
|
||||||
// :param `output`: to next layer
|
|
||||||
void process_(const Eigen::MatrixXf &input, const Eigen::MatrixXf &condition,
|
|
||||||
Eigen::MatrixXf &head_input, Eigen::MatrixXf &output,
|
|
||||||
const long i_start, const long j_start);
|
|
||||||
void set_num_frames_(const long num_frames);
|
|
||||||
long get_channels() const { return this->_conv.get_in_channels(); };
|
|
||||||
int get_dilation() const { return this->_conv.get_dilation(); };
|
|
||||||
long get_kernel_size() const { return this->_conv.get_kernel_size(); };
|
|
||||||
|
|
||||||
private:
|
|
||||||
// The dilated convolution at the front of the block
|
|
||||||
_DilatedConv _conv;
|
|
||||||
// Input mixin
|
|
||||||
Conv1x1 _input_mixin;
|
|
||||||
// The post-activation 1x1 convolution
|
|
||||||
Conv1x1 _1x1;
|
|
||||||
// The internal state
|
|
||||||
Eigen::MatrixXf _z;
|
|
||||||
|
|
||||||
const std::string _activation;
|
|
||||||
const bool _gated;
|
|
||||||
};
|
|
||||||
|
|
||||||
class LayerArrayParams {
|
|
||||||
public:
|
|
||||||
LayerArrayParams(const int input_size_, const int condition_size_,
|
|
||||||
const int head_size_, const int channels_,
|
|
||||||
const int kernel_size_, const std::vector<int> &dilations_,
|
|
||||||
const std::string activation_, const bool gated_,
|
|
||||||
const bool head_bias_)
|
|
||||||
: input_size(input_size_), condition_size(condition_size_),
|
|
||||||
head_size(head_size_), channels(channels_), kernel_size(kernel_size_),
|
|
||||||
activation(activation_), gated(gated_), head_bias(head_bias_) {
|
|
||||||
for (int i = 0; i < dilations_.size(); i++)
|
|
||||||
this->dilations.push_back(dilations_[i]);
|
|
||||||
};
|
|
||||||
|
|
||||||
const int input_size;
|
|
||||||
const int condition_size;
|
|
||||||
const int head_size;
|
|
||||||
const int channels;
|
|
||||||
const int kernel_size;
|
|
||||||
std::vector<int> dilations;
|
|
||||||
const std::string activation;
|
|
||||||
const bool gated;
|
|
||||||
const bool head_bias;
|
|
||||||
};
|
|
||||||
|
|
||||||
// An array of layers with the same channels, kernel sizes, activations.
|
|
||||||
class _LayerArray {
|
|
||||||
public:
|
|
||||||
_LayerArray(const int input_size, const int condition_size,
|
|
||||||
const int head_size, const int channels, const int kernel_size,
|
|
||||||
const std::vector<int> &dilations, const std::string activation,
|
|
||||||
const bool gated, const bool head_bias);
|
|
||||||
|
|
||||||
void advance_buffers_(const int num_frames);
|
|
||||||
|
|
||||||
// Preparing for frames:
|
|
||||||
// Rewind buffers if needed
|
|
||||||
// Shift index to prepare
|
|
||||||
//
|
|
||||||
void prepare_for_frames_(const long num_frames);
|
|
||||||
|
|
||||||
// All arrays are "short".
|
|
||||||
void process_(const Eigen::MatrixXf &layer_inputs, // Short
|
|
||||||
const Eigen::MatrixXf &condition, // Short
|
|
||||||
Eigen::MatrixXf &layer_outputs, // Short
|
|
||||||
Eigen::MatrixXf &head_inputs, // Sum up on this.
|
|
||||||
Eigen::MatrixXf &head_outputs // post head-rechannel
|
|
||||||
);
|
|
||||||
void set_num_frames_(const long num_frames);
|
|
||||||
void set_params_(std::vector<float>::iterator &it);
|
|
||||||
|
|
||||||
// "Zero-indexed" receptive field.
|
|
||||||
// E.g. a 1x1 convolution has a z.i.r.f. of zero.
|
|
||||||
long get_receptive_field() const;
|
|
||||||
|
|
||||||
private:
|
|
||||||
long _buffer_start;
|
|
||||||
// The rechannel before the layers
|
|
||||||
Conv1x1 _rechannel;
|
|
||||||
|
|
||||||
// Buffers in between layers.
|
|
||||||
// buffer [i] is the input to layer [i].
|
|
||||||
// the last layer outputs to a short array provided by outside.
|
|
||||||
std::vector<Eigen::MatrixXf> _layer_buffers;
|
|
||||||
// The layer objects
|
|
||||||
std::vector<_Layer> _layers;
|
|
||||||
|
|
||||||
// Rechannel for the head
|
|
||||||
Conv1x1 _head_rechannel;
|
|
||||||
|
|
||||||
long _get_buffer_size() const {
|
|
||||||
return this->_layer_buffers.size() > 0 ? this->_layer_buffers[0].cols() : 0;
|
|
||||||
};
|
|
||||||
long _get_channels() const;
|
|
||||||
// "One-indexed" receptive field
|
|
||||||
// TODO remove!
|
|
||||||
// E.g. a 1x1 convolution has a o.i.r.f. of one.
|
|
||||||
long _get_receptive_field() const;
|
|
||||||
void _rewind_buffers_();
|
|
||||||
};
|
|
||||||
|
|
||||||
// The head module
|
|
||||||
// [Act->Conv] x L
|
|
||||||
class _Head {
|
|
||||||
public:
|
|
||||||
_Head(const int input_size, const int num_layers, const int channels,
|
|
||||||
const std::string activation);
|
|
||||||
void set_params_(std::vector<float>::iterator ¶ms);
|
|
||||||
// NOTE: the head transforms the provided input by applying a nonlinearity
|
|
||||||
// to it in-place!
|
|
||||||
void process_(Eigen::MatrixXf &inputs, Eigen::MatrixXf &outputs);
|
|
||||||
void set_num_frames_(const long num_frames);
|
|
||||||
|
|
||||||
private:
|
|
||||||
int _channels;
|
|
||||||
std::vector<Conv1x1> _layers;
|
|
||||||
Conv1x1 _head;
|
|
||||||
std::string _activation;
|
|
||||||
|
|
||||||
// Stores the outputs of the convs *except* the last one, which goes in
|
|
||||||
// The array `outputs` provided to .process_()
|
|
||||||
std::vector<Eigen::MatrixXf> _buffers;
|
|
||||||
|
|
||||||
// Apply the activation to the provided array, in-place
|
|
||||||
void _apply_activation_(Eigen::MatrixXf &x);
|
|
||||||
};
|
|
||||||
|
|
||||||
// The main WaveNet model
|
|
||||||
// Both parametric and not; difference is handled at param read-in.
|
|
||||||
class WaveNet : public DSP {
|
|
||||||
public:
|
|
||||||
WaveNet(const std::vector<LayerArrayParams> &layer_array_params,
|
|
||||||
const float head_scale, const bool with_head,
|
|
||||||
nlohmann::json parametric, std::vector<float> params);
|
|
||||||
|
|
||||||
// WaveNet(WaveNet&&) = default;
|
|
||||||
// WaveNet& operator=(WaveNet&&) = default;
|
|
||||||
// ~WaveNet() = default;
|
|
||||||
|
|
||||||
void finalize_(const int num_frames) override;
|
|
||||||
void set_params_(std::vector<float> ¶ms);
|
|
||||||
|
|
||||||
private:
|
|
||||||
long _num_frames;
|
|
||||||
std::vector<_LayerArray> _layer_arrays;
|
|
||||||
// Their outputs
|
|
||||||
std::vector<Eigen::MatrixXf> _layer_array_outputs;
|
|
||||||
// Head _head;
|
|
||||||
|
|
||||||
// Element-wise arrays:
|
|
||||||
Eigen::MatrixXf _condition;
|
|
||||||
// One more than total layer arrays
|
|
||||||
std::vector<Eigen::MatrixXf> _head_arrays;
|
|
||||||
float _head_scale;
|
|
||||||
Eigen::MatrixXf _head_output;
|
|
||||||
|
|
||||||
// Names of the params, sorted.
|
|
||||||
// TODO move this up, ugh.
|
|
||||||
std::vector<std::string> _param_names;
|
|
||||||
|
|
||||||
void _advance_buffers_(const int num_frames);
|
|
||||||
// Get the info from the parametric config
|
|
||||||
void _init_parametric_(nlohmann::json ¶metric);
|
|
||||||
void _prepare_for_frames_(const long num_frames);
|
|
||||||
// Reminder: From ._input_post_gain to ._core_dsp_output
|
|
||||||
void _process_core_() override;
|
|
||||||
|
|
||||||
// Ensure that all buffer arrays are the right size for this num_frames
|
|
||||||
void _set_num_frames_(const long num_frames);
|
|
||||||
|
|
||||||
// The net starts with random parameters inside; we need to wait for a full
|
|
||||||
// receptive field to pass through before we can count on the output being
|
|
||||||
// ok. This implements a gentle "ramp-up" so that there's no "pop" at the
|
|
||||||
// start.
|
|
||||||
long _anti_pop_countdown;
|
|
||||||
const long _anti_pop_ramp = 4000;
|
|
||||||
void _anti_pop_();
|
|
||||||
void _reset_anti_pop_();
|
|
||||||
};
|
|
||||||
}; // namespace wavenet
|
|
||||||
Reference in New Issue
Block a user