Initial commit

This commit is contained in:
Mike Oliphant
2023-03-08 17:19:08 -08:00
parent 63d499cff8
commit 6f2f7921cc
17 changed files with 25126 additions and 0 deletions
+596
View File
@@ -0,0 +1,596 @@
#include <algorithm> // std::max_element
#include <algorithm>
#include <cmath> // pow, tanh, expf
#include <filesystem>
#include <fstream>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include "dsp.h"
#include "json.hpp"
#include "util.h"
//#define tanh_impl_ std::tanh
#define tanh_impl_ fast_tanh_
constexpr auto _INPUT_BUFFER_SAFETY_FACTOR = 32;
DSP::DSP() { this->_stale_params = true; }
void DSP::process(const NAMSample *input, NAMSample *output,
const int num_channels, const int num_frames,
const double input_gain, const double output_gain,
const std::unordered_map<std::string, double> &params) {
this->_get_params_(params);
this->_apply_input_level_(input, num_channels, num_frames, input_gain);
this->_ensure_core_dsp_output_ready_();
this->_process_core_();
this->_apply_output_level_(output, num_channels, num_frames, output_gain);
}
void DSP::finalize_(const int num_frames) {}
void DSP::_get_params_(
const std::unordered_map<std::string, double> &input_params) {
this->_stale_params = false;
for (auto it = input_params.begin(); it != input_params.end(); ++it) {
const std::string key = util::lowercase(it->first);
const double value = it->second;
if (this->_params.find(key) == this->_params.end()) // Not contained
this->_stale_params = true;
else if (this->_params[key] != value) // Contained but new value
this->_stale_params = true;
this->_params[key] = value;
}
}
void DSP::_apply_input_level_(const NAMSample *input, const int num_channels,
const int num_frames, const double gain) {
// Must match exactly; we're going to use the size of _input_post_gain later
// for num_frames.
if (this->_input_post_gain.size() != num_frames)
this->_input_post_gain.resize(num_frames);
// MONO ONLY
const int channel = 0;
for (int i = 0; i < num_frames; i++)
this->_input_post_gain[i] = float(gain * input[i]);
}
void DSP::_ensure_core_dsp_output_ready_() {
if (this->_core_dsp_output.size() < this->_input_post_gain.size())
this->_core_dsp_output.resize(this->_input_post_gain.size());
}
void DSP::_process_core_() {
// Default implementation is the null operation
for (int i = 0; i < this->_input_post_gain.size(); i++)
this->_core_dsp_output[i] = this->_input_post_gain[i];
}
void DSP::_apply_output_level_(NAMSample *output, const int num_channels,
const int num_frames, const double gain) {
for (int c = 0; c < num_channels; c++)
for (int s = 0; s < num_frames; s++)
output[s] = double(gain * this->_core_dsp_output[s]);
}
// Buffer =====================================================================
Buffer::Buffer(const int receptive_field) : DSP() {
this->_set_receptive_field(receptive_field);
}
void Buffer::_set_receptive_field(const int new_receptive_field) {
this->_set_receptive_field(new_receptive_field,
_INPUT_BUFFER_SAFETY_FACTOR * new_receptive_field);
};
void Buffer::_set_receptive_field(const int new_receptive_field,
const int input_buffer_size) {
this->_receptive_field = new_receptive_field;
this->_input_buffer.resize(input_buffer_size);
this->_reset_input_buffer();
}
void Buffer::_update_buffers_() {
const long int num_frames = this->_input_post_gain.size();
// Make sure that the buffer is big enough for the receptive field and the
// frames needed!
{
const long minimum_input_buffer_size =
(long)this->_receptive_field + _INPUT_BUFFER_SAFETY_FACTOR * num_frames;
if (this->_input_buffer.size() < minimum_input_buffer_size) {
long new_buffer_size = 2;
while (new_buffer_size < minimum_input_buffer_size)
new_buffer_size *= 2;
this->_input_buffer.resize(new_buffer_size);
}
}
// If we'd run off the end of the input buffer, then we need to move the data
// back to the start of the buffer and start again.
if (this->_input_buffer_offset + num_frames > this->_input_buffer.size())
this->_rewind_buffers_();
// Put the new samples into the input buffer
for (long i = this->_input_buffer_offset, j = 0; j < num_frames; i++, j++)
this->_input_buffer[i] = this->_input_post_gain[j];
// And resize the output buffer:
this->_output_buffer.resize(num_frames);
}
void Buffer::_rewind_buffers_() {
// Copy the input buffer back
// RF-1 samples because we've got at least one new one inbound.
for (long i = 0, j = this->_input_buffer_offset - this->_receptive_field;
i < this->_receptive_field; i++, j++)
this->_input_buffer[i] = this->_input_buffer[j];
// And reset the offset.
// Even though we could be stingy about that one sample that we won't be using
// (because a new set is incoming) it's probably not worth the
// hyper-optimization and liable for bugs. And the code looks way tidier this
// way.
this->_input_buffer_offset = this->_receptive_field;
}
void Buffer::_reset_input_buffer() {
this->_input_buffer_offset = this->_receptive_field;
}
void Buffer::finalize_(const int num_frames) {
this->DSP::finalize_(num_frames);
this->_input_buffer_offset += num_frames;
}
// Linear =====================================================================
Linear::Linear(const int receptive_field, const bool _bias,
const std::vector<float> &params)
: Buffer(receptive_field) {
if (params.size() != (receptive_field + (_bias ? 1 : 0)))
throw std::runtime_error("Params vector does not match expected size based "
"on architecture parameters");
this->_weight.resize(this->_receptive_field);
// Pass in in reverse order so that dot products work out of the box.
for (int i = 0; i < this->_receptive_field; i++)
this->_weight(i) = params[receptive_field - 1 - i];
this->_bias = _bias ? params[receptive_field] : (float)0.0;
}
void Linear::_process_core_() {
this->Buffer::_update_buffers_();
// Main computation!
for (long i = 0; i < this->_input_post_gain.size(); i++) {
const long offset =
this->_input_buffer_offset - this->_weight.size() + i + 1;
auto input = Eigen::Map<const Eigen::VectorXf>(&this->_input_buffer[offset],
this->_receptive_field);
this->_core_dsp_output[i] = this->_bias + this->_weight.dot(input);
}
}
// NN modules =================================================================
void relu_(Eigen::MatrixXf &x, const long i_start, const long i_end,
const long j_start, const long j_end) {
for (long j = j_start; j < j_end; j++)
for (long i = 0; i < x.rows(); i++)
x(i, j) = x(i, j) < (float)0.0 ? (float)0.0 : x(i, j);
}
void relu_(Eigen::MatrixXf &x, const long j_start, const long j_end) {
relu_(x, 0, x.rows(), j_start, j_end);
}
void relu_(Eigen::MatrixXf &x) { relu_(x, 0, x.rows(), 0, x.cols()); }
void sigmoid_(Eigen::MatrixXf &x, const long i_start, const long i_end,
const long j_start, const long j_end) {
for (long j = j_start; j < j_end; j++)
for (long i = i_start; i < i_end; i++)
x(i, j) = 1.0 / (1.0 + expf(-x(i, j)));
}
inline float fast_tanh_(const float x)
{
const float ax = fabs(x);
const float x2 = x * x;
return(x * (2.45550750702956f + 2.45550750702956f * ax +
(0.893229853513558f + 0.821226666969744f * ax) * x2) /
(2.44506634652299f + (2.44506634652299f + x2) *
fabs(x + 0.814642734961073f * x * ax)));
}
void tanh_(Eigen::MatrixXf& x)
{
float *ptr = x.data();
long size = x.rows() * x.cols();
for (long pos = 0; pos < size; pos++)
{
ptr[pos] = tanh_impl_(ptr[pos]);
}
}
void tanh_(Eigen::MatrixXf &x, const long i_start, const long i_end,
const long j_start, const long j_end) {
for (long j = j_start; j < j_end; j++)
for (long i = i_start; i < i_end; i++)
x(i, j) = tanh_impl_(x(i, j));
}
void tanh_cols_(Eigen::MatrixXf &x, const long j_start, const long j_end) {
tanh_(x, 0, x.rows(), j_start, j_end);
}
void Conv1D::set_params_(std::vector<float>::iterator &params) {
if (this->_weight.size() > 0) {
const long out_channels = this->_weight[0].rows();
const long in_channels = this->_weight[0].cols();
// Crazy ordering because that's how it gets flattened.
for (auto i = 0; i < out_channels; i++)
for (auto j = 0; j < in_channels; j++)
for (auto k = 0; k < this->_weight.size(); k++)
this->_weight[k](i, j) = *(params++);
}
for (int i = 0; i < this->_bias.size(); i++)
this->_bias(i) = *(params++);
}
void Conv1D::set_size_(const int in_channels, const int out_channels,
const int kernel_size, const bool do_bias,
const int _dilation) {
this->_weight.resize(kernel_size);
for (int i = 0; i < this->_weight.size(); i++)
this->_weight[i].resize(out_channels,
in_channels); // y = Ax, input array (C,L)
if (do_bias)
this->_bias.resize(out_channels);
else
this->_bias.resize(0);
this->_dilation = _dilation;
}
void Conv1D::set_size_and_params_(const int in_channels, const int out_channels,
const int kernel_size, const int _dilation,
const bool do_bias,
std::vector<float>::iterator &params) {
this->set_size_(in_channels, out_channels, kernel_size, do_bias, _dilation);
this->set_params_(params);
}
void Conv1D::process_(const Eigen::MatrixXf &input, Eigen::MatrixXf &output,
const long i_start, const long ncols,
const long j_start) const {
// This is the clever part ;)
for (long k = 0; k < this->_weight.size(); k++) {
const long offset = this->_dilation * (k + 1 - this->_weight.size());
if (k == 0)
output.middleCols(j_start, ncols).noalias() =
this->_weight[k] * input.middleCols(i_start + offset, ncols);
else
output.middleCols(j_start, ncols).noalias() +=
this->_weight[k] * input.middleCols(i_start + offset, ncols);
}
if (this->_bias.size() > 0)
output.middleCols(j_start, ncols).colwise() += this->_bias;
}
long Conv1D::get_num_params() const {
long num_params = this->_bias.size();
for (long i = 0; i < this->_weight.size(); i++)
num_params += this->_weight[i].size();
return num_params;
}
Conv1x1::Conv1x1(const int in_channels, const int out_channels,
const bool _bias) {
this->_weight.resize(out_channels, in_channels);
this->_do_bias = _bias;
if (_bias)
this->_bias.resize(out_channels);
}
void Conv1x1::set_params_(std::vector<float>::iterator &params) {
for (int i = 0; i < this->_weight.rows(); i++)
for (int j = 0; j < this->_weight.cols(); j++)
this->_weight(i, j) = *(params++);
if (this->_do_bias)
for (int i = 0; i < this->_bias.size(); i++)
this->_bias(i) = *(params++);
}
Eigen::MatrixXf Conv1x1::process(const Eigen::MatrixXf &input) const {
if (this->_do_bias)
return (this->_weight * input).colwise() + this->_bias;
else
return this->_weight * input;
}
// ConvNet ====================================================================
convnet::BatchNorm::BatchNorm(const int dim,
std::vector<float>::iterator &params) {
// Extract from param buffer
Eigen::VectorXf running_mean(dim);
Eigen::VectorXf running_var(dim);
Eigen::VectorXf _weight(dim);
Eigen::VectorXf _bias(dim);
for (int i = 0; i < dim; i++)
running_mean(i) = *(params++);
for (int i = 0; i < dim; i++)
running_var(i) = *(params++);
for (int i = 0; i < dim; i++)
_weight(i) = *(params++);
for (int i = 0; i < dim; i++)
_bias(i) = *(params++);
float eps = *(params++);
// Convert to scale & loc
this->scale.resize(dim);
this->loc.resize(dim);
for (int i = 0; i < dim; i++)
this->scale(i) = _weight(i) / sqrt(eps + running_var(i));
this->loc = _bias - this->scale.cwiseProduct(running_mean);
}
void convnet::BatchNorm::process_(Eigen::MatrixXf &x, const long i_start,
const long i_end) const {
// todo using colwise?
// #speed but conv probably dominates
for (auto i = i_start; i < i_end; i++) {
x.col(i) = x.col(i).cwiseProduct(this->scale);
x.col(i) += this->loc;
}
}
void convnet::ConvNetBlock::set_params_(const int in_channels,
const int out_channels,
const int _dilation,
const bool batchnorm,
const std::string activation,
std::vector<float>::iterator &params) {
this->_batchnorm = batchnorm;
// HACK 2 kernel
this->conv.set_size_and_params_(in_channels, out_channels, 2, _dilation,
!batchnorm, params);
if (this->_batchnorm)
this->batchnorm = BatchNorm(out_channels, params);
this->activation = activation;
}
void convnet::ConvNetBlock::process_(const Eigen::MatrixXf &input,
Eigen::MatrixXf &output,
const long i_start,
const long i_end) const {
const long ncols = i_end - i_start;
this->conv.process_(input, output, i_start, ncols, i_start);
if (this->_batchnorm)
this->batchnorm.process_(output, i_start, i_end);
if (this->activation == "Tanh")
tanh_cols_(output, i_start, i_end);
else if (this->activation == "ReLU")
relu_(output, i_start, i_end);
else
throw std::runtime_error("Unrecognized activation");
}
long convnet::ConvNetBlock::get_out_channels() const {
return this->conv.get_out_channels();
}
convnet::_Head::_Head(const int channels,
std::vector<float>::iterator &params) {
this->_weight.resize(channels);
for (int i = 0; i < channels; i++)
this->_weight[i] = *(params++);
this->_bias = *(params++);
}
void convnet::_Head::process_(const Eigen::MatrixXf &input,
Eigen::VectorXf &output, const long i_start,
const long i_end) const {
const long length = i_end - i_start;
output.resize(length);
for (long i = 0, j = i_start; i < length; i++, j++)
output(i) = this->_bias + input.col(j).dot(this->_weight);
}
convnet::ConvNet::ConvNet(const int channels, const std::vector<int> &dilations,
const bool batchnorm, const std::string activation,
std::vector<float> &params)
: Buffer(*std::max_element(dilations.begin(), dilations.end())) {
this->_verify_params(channels, dilations, batchnorm, params.size());
this->_blocks.resize(dilations.size());
std::vector<float>::iterator it = params.begin();
for (int i = 0; i < dilations.size(); i++)
this->_blocks[i].set_params_(i == 0 ? 1 : channels, channels, dilations[i],
batchnorm, activation, it);
this->_block_vals.resize(this->_blocks.size() + 1);
this->_head = _Head(channels, it);
if (it != params.end())
throw std::runtime_error(
"Didn't touch all the params when initializing wavenet");
this->_reset_anti_pop_();
}
void convnet::ConvNet::_process_core_() {
this->_update_buffers_();
// Main computation!
const long i_start = this->_input_buffer_offset;
const long num_frames = this->_input_post_gain.size();
const long i_end = i_start + num_frames;
// TODO one unnecessary copy :/ #speed
for (auto i = i_start; i < i_end; i++)
this->_block_vals[0](0, i) = this->_input_buffer[i];
for (auto i = 0; i < this->_blocks.size(); i++)
this->_blocks[i].process_(this->_block_vals[i], this->_block_vals[i + 1],
i_start, i_end);
// TODO clean up this allocation
this->_head.process_(this->_block_vals[this->_blocks.size()],
this->_head_output, i_start, i_end);
// Copy to required output array (TODO tighten this up)
for (int s = 0; s < num_frames; s++)
this->_core_dsp_output[s] = this->_head_output(s);
// Apply anti-pop
this->_anti_pop_();
}
void convnet::ConvNet::_verify_params(const int channels,
const std::vector<int> &dilations,
const bool batchnorm,
const size_t actual_params) {
// TODO
}
void convnet::ConvNet::_update_buffers_() {
this->Buffer::_update_buffers_();
const long buffer_size = this->_input_buffer.size();
this->_block_vals[0].resize(1, buffer_size);
for (long i = 1; i < this->_block_vals.size(); i++)
this->_block_vals[i].resize(this->_blocks[i - 1].get_out_channels(),
buffer_size);
}
void convnet::ConvNet::_rewind_buffers_() {
// Need to rewind the block vals first because Buffer::rewind_buffers()
// resets the offset index
// The last _block_vals is the output of the last block and doesn't need to be
// rewound.
for (long k = 0; k < this->_block_vals.size() - 1; k++) {
// We actually don't need to pull back a lot...just as far as the first
// input sample would grab from dilation
const long _dilation = this->_blocks[k].conv.get_dilation();
for (long i = this->_receptive_field - _dilation,
j = this->_input_buffer_offset - _dilation;
j < this->_input_buffer_offset; i++, j++)
for (long r = 0; r < this->_block_vals[k].rows(); r++)
this->_block_vals[k](r, i) = this->_block_vals[k](r, j);
}
// Now we can do the rest of the rewind
this->Buffer::_rewind_buffers_();
}
void convnet::ConvNet::_anti_pop_() {
if (this->_anti_pop_countdown >= this->_anti_pop_ramp)
return;
const float slope = 1.0f / float(this->_anti_pop_ramp);
for (int i = 0; i < this->_core_dsp_output.size(); i++) {
if (this->_anti_pop_countdown >= this->_anti_pop_ramp)
break;
const float gain =
std::max(slope * float(this->_anti_pop_countdown), float(0.0));
this->_core_dsp_output[i] *= gain;
this->_anti_pop_countdown++;
}
}
void convnet::ConvNet::_reset_anti_pop_() {
// You need the "real" receptive field, not the buffers.
long receptive_field = 1;
for (int i = 0; i < this->_blocks.size(); i++)
receptive_field += this->_blocks[i].conv.get_dilation();
this->_anti_pop_countdown = -receptive_field;
}
// ============================================================================
// Implementation of Version 2 interface
dsp::DSP::DSP() : mOutputPointers(nullptr), mOutputPointersSize(0) {}
dsp::DSP::~DSP() { this->_DeallocateOutputPointers(); };
void dsp::DSP::_AllocateOutputPointers(const size_t numChannels) {
if (this->mOutputPointers != nullptr)
throw std::runtime_error(
"Tried to re-allocate over non-null mOutputPointers");
this->mOutputPointers = new float *[numChannels];
if (this->mOutputPointers == nullptr)
throw std::runtime_error("Failed to allocate pointer to output buffer!\n");
this->mOutputPointersSize = numChannels;
}
void dsp::DSP::_DeallocateOutputPointers() {
if (this->mOutputPointers != nullptr) {
delete[] this->mOutputPointers;
this->mOutputPointers = nullptr;
}
if (this->mOutputPointers != nullptr)
throw std::runtime_error("Failed to deallocate output pointer!");
this->mOutputPointersSize = 0;
}
float **dsp::DSP::_GetPointers() {
for (auto c = 0; c < this->_GetNumChannels(); c++)
this->mOutputPointers[c] = this->mOutputs[c].data();
return this->mOutputPointers;
}
void dsp::DSP::_PrepareBuffers(const size_t numChannels,
const size_t numFrames) {
const size_t oldFrames = this->_GetNumFrames();
const size_t oldChannels = this->_GetNumChannels();
const bool resizeChannels = oldChannels != numChannels;
const bool resizeFrames = resizeChannels || (oldFrames != numFrames);
if (resizeChannels) {
this->mOutputs.resize(numChannels);
this->_ResizePointers(numChannels);
}
if (resizeFrames)
for (auto c = 0; c < numChannels; c++)
this->mOutputs[c].resize(numFrames);
}
void dsp::DSP::_ResizePointers(const size_t numChannels) {
if (this->mOutputPointersSize == numChannels)
return;
this->_DeallocateOutputPointers();
this->_AllocateOutputPointers(numChannels);
}
dsp::History::History() : DSP(), mHistoryRequired(0), mHistoryIndex(0) {}
void dsp::History::_AdvanceHistoryIndex(const size_t bufferSize) {
this->mHistoryIndex += bufferSize;
}
void dsp::History::_EnsureHistorySize(const size_t bufferSize) {
const size_t repeatSize = std::max(bufferSize, this->mHistoryRequired);
const size_t requiredHistoryArraySize =
10 * repeatSize; // Just so we don't spend too much time copying back.
if (this->mHistory.size() < requiredHistoryArraySize) {
this->mHistory.resize(requiredHistoryArraySize);
std::fill(this->mHistory.begin(), this->mHistory.end(), 0.0f);
this->mHistoryIndex = this->mHistoryRequired; // Guaranteed to be less than
// requiredHistoryArraySize
}
}
void dsp::History::_RewindHistory() {
// TODO memcpy? Should be fine w/ history array being >2x the history length.
for (size_t i = 0, j = this->mHistoryIndex - this->mHistoryRequired;
i < this->mHistoryRequired; i++, j++)
this->mHistory[i] = this->mHistory[j];
this->mHistoryIndex = this->mHistoryRequired;
}
void dsp::History::_UpdateHistory(float **inputs,
const size_t numChannels,
const size_t numFrames) {
this->_EnsureHistorySize(numFrames);
if (numChannels < 1)
throw std::runtime_error("Zero channels?");
if (this->mHistoryIndex + numFrames >= this->mHistory.size())
this->_RewindHistory();
// Grabs channel 1, drops hannel 2.
for (size_t i = 0, j = this->mHistoryIndex; i < numFrames; i++, j++)
// Convert down to float here.
this->mHistory[j] = (float)inputs[0][i];
}