From dec19dc12a879a75c68420df857595026568e9b6 Mon Sep 17 00:00:00 2001
From: Robbert van der Helm <mail@robbertvanderhelm.nl>
Date: Fri, 11 Jun 2021 13:56:42 +0200
Subject: [PATCH] :boom: Reimplement VST3 audio processing

In the same way as 50c25c1cf0d1846ae3717b987ebdfa555ac6d693 did it for
VST2 plugins. Input and output audio data is now stored in a shared
memory buffer instead of being sent over the sockets. This reduces the
bridging overhead to a minimum since copying data was the most expensive
operation we were doing and we now only need to copy the entire buffer
once per processing cycle.
---
 CHANGELOG.md                                  |   8 +-
 src/common/logging/vst3.cpp                   |  41 ++-
 .../serialization/vst3/process-data.cpp       | 272 +++++++-----------
 src/common/serialization/vst3/process-data.h  | 244 +++++-----------
 .../bridges/vst3-impls/plugin-proxy.cpp       |  31 +-
 src/plugin/bridges/vst3-impls/plugin-proxy.h  |  29 +-
 src/wine-host/bridges/vst3.cpp                | 145 +++++++++-
 src/wine-host/bridges/vst3.h                  |  37 +++
 8 files changed, 444 insertions(+), 363 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 889cb869..ee80ca98 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,10 +10,10 @@ Versioning](https://semver.org/spec/v2.0.0.html).
 
 ### Changed
 
-- The audio processing implementation for VST2 (and soon, also for VST3) plugins
-  has been rewritten using both shared memory and message passing to reduce
-  memory copies to a minimum. With this change the DSP load overhead during
-  audio processing should now be as low as it's going to get.
+- The audio processing implementation for both VST2 and VST3 plugins has been
+  completely rewritten to use both shared memory and message passing to reduce
+  expensive memory copies to a minimum. With this change the DSP load overhead
+  during audio processing should now be about as low as it's going to get.
 - Prevented some more potential unnecessary memory operations during yabridge's
   communication. The underlying serialization library was recreating some
   objects even when that wasn't needed, which could in theory result in memory
diff --git a/src/common/logging/vst3.cpp b/src/common/logging/vst3.cpp
index 563b842b..e0192fcc 100644
--- a/src/common/logging/vst3.cpp
+++ b/src/common/logging/vst3.cpp
@@ -1002,12 +1002,24 @@ bool Vst3Logger::log_request(
             // this
             const YaAudioProcessor::Process& request = request_wrapper.get();
 
+            // TODO: The channel counts are now capped at what the plugin
+            //       supports (based on the audio buffers we set up during
+            //       `IAudioProcessor::setupProcessing()`). Some hosts may send
+            //       more buffers, but we don't reflect that in the output right
+            //       now.
             std::ostringstream num_input_channels;
             num_input_channels << "[";
             for (bool is_first = true;
                  const auto& buffers : request.data.inputs) {
                 num_input_channels << (is_first ? "" : ", ")
-                                   << buffers.num_channels();
+                                   << buffers.numChannels;
+                if (buffers.silenceFlags > 0 &&
+                    buffers.silenceFlags <
+                        (static_cast<uint64>(1)
+                         << static_cast<uint64>(buffers.numChannels))) {
+                    num_input_channels << " (silence)" << std::endl;
+                }
+
                 is_first = false;
             }
             num_input_channels << "]";
@@ -1015,8 +1027,16 @@ bool Vst3Logger::log_request(
             std::ostringstream num_output_channels;
             num_output_channels << "[";
             for (bool is_first = true;
-                 const auto& num_channels : request.data.outputs_num_channels) {
-                num_output_channels << (is_first ? "" : ", ") << num_channels;
+                 const auto& buffers : request.data.outputs) {
+                num_output_channels << (is_first ? "" : ", ")
+                                    << buffers.numChannels;
+                if (buffers.silenceFlags > 0 &&
+                    buffers.silenceFlags <
+                        (static_cast<uint64>(1)
+                         << static_cast<uint64>(buffers.numChannels))) {
+                    num_output_channels << " (silence)" << std::endl;
+                }
+
                 is_first = false;
             }
             num_output_channels << "]";
@@ -1030,7 +1050,7 @@ bool Vst3Logger::log_request(
                     << ", input_parameter_changes = <IParameterChanges* for "
                     << request.data.input_parameter_changes.num_parameters()
                     << " parameters>, output_parameter_changes = "
-                    << (request.data.output_parameter_changes_supported
+                    << (request.data.output_parameter_changes
                             ? "<IParameterChanges*>"
                             : "nullptr")
                     << ", input_events = ";
@@ -1042,8 +1062,8 @@ bool Vst3Logger::log_request(
                 message << "<nullptr>";
             }
             message << ", output_events = "
-                    << (request.data.output_events_supported ? "<IEventList*>"
-                                                             : "<nullptr>")
+                    << (request.data.output_events ? "<IEventList*>"
+                                                   : "<nullptr>")
                     << ", process_context = "
                     << (request.data.process_context ? "<ProcessContext*>"
                                                      : "<nullptr>")
@@ -1793,7 +1813,14 @@ void Vst3Logger::log_response(
         for (bool is_first = true;
              const auto& buffers : *response.output_data.outputs) {
             num_output_channels << (is_first ? "" : ", ")
-                                << buffers.num_channels();
+                                << buffers.numChannels;
+            if (buffers.silenceFlags > 0 &&
+                buffers.silenceFlags <
+                    (static_cast<uint64>(1)
+                     << static_cast<uint64>(buffers.numChannels))) {
+                num_output_channels << " (silence)" << std::endl;
+            }
+
             is_first = false;
         }
         num_output_channels << "]";
diff --git a/src/common/serialization/vst3/process-data.cpp b/src/common/serialization/vst3/process-data.cpp
index f14daf7e..2425ac4e 100644
--- a/src/common/serialization/vst3/process-data.cpp
+++ b/src/common/serialization/vst3/process-data.cpp
@@ -18,113 +18,6 @@
 
 #include "src/common/utils.h"
 
-YaAudioBusBuffers::YaAudioBusBuffers() noexcept {}
-
-void YaAudioBusBuffers::clear(int32 sample_size,
-                              size_t num_samples,
-                              size_t num_channels) {
-    auto do_clear = [&]<typename T>(T) {
-        if (!std::holds_alternative<std::vector<std::vector<T>>>(buffers)) {
-            buffers.emplace<std::vector<std::vector<T>>>();
-        }
-
-        std::vector<std::vector<T>>& vector_buffers =
-            std::get<std::vector<std::vector<T>>>(buffers);
-        vector_buffers.resize(num_channels);
-        for (size_t i = 0; i < vector_buffers.size(); i++) {
-            vector_buffers[i].resize(num_samples);
-        }
-    };
-
-    if (sample_size == Steinberg::Vst::SymbolicSampleSizes::kSample64) {
-        // XXX: Clangd doesn't let you specify template parameters for templated
-        //      lambdas. This argument should get optimized out
-        do_clear(double());
-    } else {
-        do_clear(float());
-    }
-}
-
-void YaAudioBusBuffers::repopulate(
-    int32 sample_size,
-    int32 num_samples,
-    const Steinberg::Vst::AudioBusBuffers& data) {
-    silence_flags = data.silenceFlags;
-
-    auto do_repopuldate = [&]<typename T>(T** original_buffer) {
-        if (!std::holds_alternative<std::vector<std::vector<T>>>(buffers)) {
-            buffers.emplace<std::vector<std::vector<T>>>();
-        }
-
-        std::vector<std::vector<T>>& vector_buffers =
-            std::get<std::vector<std::vector<T>>>(buffers);
-        vector_buffers.resize(data.numChannels);
-        for (int channel = 0; channel < data.numChannels; channel++) {
-            vector_buffers[channel].assign(
-                &original_buffer[channel][0],
-                &original_buffer[channel][num_samples]);
-        }
-    };
-
-    if (sample_size == Steinberg::Vst::kSample64) {
-        do_repopuldate(data.channelBuffers64);
-    } else {
-        // I don't think they'll add any other sample sizes any time soon
-        do_repopuldate(data.channelBuffers32);
-    }
-}
-
-void YaAudioBusBuffers::reconstruct(
-    Steinberg::Vst::AudioBusBuffers& reconstructed_buffers) {
-    // We'll update the `AudioBusBuffers` object in place to point to our new
-    // data
-    reconstructed_buffers.silenceFlags = silence_flags;
-
-    std::visit(
-        [&]<typename T>(std::vector<std::vector<T>>& buffers) {
-            buffer_pointers.resize(buffers.size());
-            for (size_t i = 0; i < buffers.size(); i++) {
-                buffer_pointers[i] = buffers[i].data();
-            }
-
-            reconstructed_buffers.numChannels =
-                static_cast<int32>(buffers.size());
-            if constexpr (std::is_same_v<T, double>) {
-                reconstructed_buffers.channelBuffers64 =
-                    reinterpret_cast<T**>(buffer_pointers.data());
-            } else {
-                reconstructed_buffers.channelBuffers32 =
-                    reinterpret_cast<T**>(buffer_pointers.data());
-            }
-        },
-        buffers);
-}
-
-size_t YaAudioBusBuffers::num_channels() const {
-    return std::visit([&](const auto& buffers) { return buffers.size(); },
-                      buffers);
-}
-
-void YaAudioBusBuffers::write_back_outputs(
-    Steinberg::Vst::AudioBusBuffers& output_buffers) const {
-    output_buffers.silenceFlags = silence_flags;
-
-    std::visit(
-        [&]<typename T>(const std::vector<std::vector<T>>& buffers) {
-            for (int channel = 0; channel < output_buffers.numChannels;
-                 channel++) {
-                if constexpr (std::is_same_v<T, double>) {
-                    std::copy(buffers[channel].begin(), buffers[channel].end(),
-                              output_buffers.channelBuffers64[channel]);
-                } else {
-                    std::copy(buffers[channel].begin(), buffers[channel].end(),
-                              output_buffers.channelBuffers32[channel]);
-                }
-            }
-        },
-        buffers);
-}
-
 YaProcessData::YaProcessData() noexcept
     // This response object acts as an optimization. It stores pointers to the
     // original fields in our objects, so we can both only serialize those
@@ -142,8 +35,8 @@ YaProcessData::YaProcessData() noexcept
       // `create_response()` on the plugin side
       reconstructed_process_data() {}
 
-void YaProcessData::repopulate(
-    const Steinberg::Vst::ProcessData& process_data) {
+void YaProcessData::repopulate(const Steinberg::Vst::ProcessData& process_data,
+                               AudioShmBuffer& shared_audio_buffers) {
     // In this function and in every function we call, we should be careful to
     // not use `push_back`/`emplace_back` anywhere. Resizing vectors and
     // modifying them in place performs much better because that avoids
@@ -152,19 +45,45 @@ void YaProcessData::repopulate(
     symbolic_sample_size = process_data.symbolicSampleSize;
     num_samples = process_data.numSamples;
 
-    // We'll make sure to not do any allocations here after the first processing
-    // cycle
+    // The actual audio is stored in an accompanying `AudioShmBuffer` object, so
+    // these inputs and outputs objects are only used to serialize metadata
+    // about the input and output audio bus buffers
     inputs.resize(process_data.numInputs);
-    for (int i = 0; i < process_data.numInputs; i++) {
-        inputs[i].repopulate(symbolic_sample_size, num_samples,
-                             process_data.inputs[i]);
+    for (int bus = 0; bus < process_data.numInputs; bus++) {
+        // NOTE: The host might provide more input channels than what the plugin
+        //       asked for. Carla does this for some reason. We should just
+        //       ignore these.
+        inputs[bus].numChannels = std::min(
+            static_cast<int32>(shared_audio_buffers.num_input_channels(bus)),
+            process_data.inputs[bus].numChannels);
+        inputs[bus].silenceFlags = process_data.inputs[bus].silenceFlags;
+
+        // We copy the actual input audio for every bus to the shared memory
+        // object
+        for (int channel = 0; channel < inputs[bus].numChannels; channel++) {
+            if (process_data.symbolicSampleSize == Steinberg::Vst::kSample64) {
+                std::copy_n(process_data.inputs[bus].channelBuffers64[channel],
+                            process_data.numSamples,
+                            shared_audio_buffers.input_channel_ptr<double>(
+                                bus, channel));
+            } else {
+                std::copy_n(process_data.inputs[bus].channelBuffers32[channel],
+                            process_data.numSamples,
+                            shared_audio_buffers.input_channel_ptr<float>(
+                                bus, channel));
+            }
+        }
     }
 
-    // We only store how many channels ouch output has so we can recreate the
-    // objects on the Wine side
-    outputs_num_channels.resize(process_data.numOutputs);
-    for (int i = 0; i < process_data.numOutputs; i++) {
-        outputs_num_channels[i] = process_data.outputs[i].numChannels;
+    outputs.resize(process_data.numOutputs);
+    for (int bus = 0; bus < process_data.numOutputs; bus++) {
+        // NOTE: The host might provide more output channels than what the
+        //       plugin asked for. Carla does this for some reason. We should
+        //       just ignore these.
+        outputs[bus].numChannels = std::min(
+            static_cast<int32>(shared_audio_buffers.num_output_channels(bus)),
+            process_data.outputs[bus].numChannels);
+        outputs[bus].silenceFlags = process_data.outputs[bus].silenceFlags;
     }
 
     // Even though `ProcessData::inputParamterChanges` is mandatory, the VST3
@@ -175,7 +94,15 @@ void YaProcessData::repopulate(
         input_parameter_changes.clear();
     }
 
-    output_parameter_changes_supported = process_data.outputParameterChanges;
+    // The existence of the output parameter changes object indicates whether or
+    // not the host provides this for the plugin
+    if (process_data.outputParameterChanges) {
+        if (!output_parameter_changes) {
+            output_parameter_changes.emplace();
+        }
+    } else {
+        output_parameter_changes.reset();
+    }
 
     if (process_data.inputEvents) {
         if (!input_events) {
@@ -186,7 +113,14 @@ void YaProcessData::repopulate(
         input_events.reset();
     }
 
-    output_events_supported = process_data.outputEvents;
+    // Same for the output events
+    if (process_data.outputEvents) {
+        if (!output_events) {
+            output_events.emplace();
+        }
+    } else {
+        output_events.reset();
+    }
 
     if (process_data.processContext) {
         process_context.emplace(*process_data.processContext);
@@ -195,43 +129,40 @@ void YaProcessData::repopulate(
     }
 }
 
-Steinberg::Vst::ProcessData& YaProcessData::reconstruct() {
+Steinberg::Vst::ProcessData& YaProcessData::reconstruct(
+    std::vector<std::vector<void*>>& input_pointers,
+    std::vector<std::vector<void*>>& output_pointers) {
     reconstructed_process_data.processMode = process_mode;
     reconstructed_process_data.symbolicSampleSize = symbolic_sample_size;
     reconstructed_process_data.numSamples = num_samples;
     reconstructed_process_data.numInputs = static_cast<int32>(inputs.size());
-    reconstructed_process_data.numOutputs =
-        static_cast<int32>(outputs_num_channels.size());
+    reconstructed_process_data.numOutputs = static_cast<int32>(outputs.size());
 
-    // We'll have to transform our `YaAudioBusBuffers` objects into an array of
-    // `AudioBusBuffers` object so the plugin can deal with them. These objects
-    // contain pointers to those original objects and thus don't store any
-    // buffer data themselves.
-    inputs_audio_bus_buffers.resize(inputs.size());
-    for (size_t i = 0; i < inputs.size(); i++) {
-        inputs[i].reconstruct(inputs_audio_bus_buffers[i]);
+    // The actual audio data is contained within a shared memory object, and the
+    // input and output pointers point to regions in that object. These pointers
+    // are calculated while handling `IAudioProcessor::setupProcessing()`.
+    // NOTE: The 32-bit and 64-bit audio pointers are a union, and since this is
+    //       a raw memory buffer we can set either `channelBuffers32` or
+    //       `channelBuffers64` to point at that buffer as long as we do the
+    //       same thing on both the native plugin side and on the Wine plugin
+    //       host
+    assert(inputs.size() <= input_pointers.size() &&
+           outputs.size() <= output_pointers.size());
+    for (size_t bus = 0; bus < inputs.size(); bus++) {
+        inputs[bus].channelBuffers32 =
+            reinterpret_cast<float**>(input_pointers[bus].data());
+    }
+    for (size_t bus = 0; bus < outputs.size(); bus++) {
+        outputs[bus].channelBuffers32 =
+            reinterpret_cast<float**>(output_pointers[bus].data());
     }
 
-    reconstructed_process_data.inputs = inputs_audio_bus_buffers.data();
+    reconstructed_process_data.inputs = inputs.data();
+    reconstructed_process_data.outputs = outputs.data();
 
-    // We'll do the same with with the outputs, but we'll first have to
-    // initialize zeroed out buffers for the plugin to work with since we didn't
-    // serialize those directly
-    outputs.resize(outputs_num_channels.size());
-    outputs_audio_bus_buffers.resize(outputs_num_channels.size());
-    for (size_t i = 0; i < outputs_num_channels.size(); i++) {
-        outputs[i].clear(symbolic_sample_size, num_samples,
-                         outputs_num_channels[i]);
-        outputs[i].reconstruct(outputs_audio_bus_buffers[i]);
-    }
-
-    reconstructed_process_data.outputs = outputs_audio_bus_buffers.data();
     reconstructed_process_data.inputParameterChanges = &input_parameter_changes;
 
-    if (output_parameter_changes_supported) {
-        if (!output_parameter_changes) {
-            output_parameter_changes.emplace();
-        }
+    if (output_parameter_changes) {
         output_parameter_changes->clear();
         reconstructed_process_data.outputParameterChanges =
             &*output_parameter_changes;
@@ -245,10 +176,7 @@ Steinberg::Vst::ProcessData& YaProcessData::reconstruct() {
         reconstructed_process_data.inputEvents = nullptr;
     }
 
-    if (output_events_supported) {
-        if (!output_events) {
-            output_events.emplace();
-        }
+    if (output_events) {
         output_events->clear();
         reconstructed_process_data.outputEvents = &*output_events;
     } else {
@@ -265,27 +193,39 @@ Steinberg::Vst::ProcessData& YaProcessData::reconstruct() {
 }
 
 YaProcessData::Response& YaProcessData::create_response() noexcept {
-    // NOTE: We _have_ to manually copy over the silence flags from the
-    //       `ProcessData` object generated in `get()` here sicne these of
-    //       course are not references or pointers like all other fields, so
-    //       they're not implicitly copied like all of our other fields
-    //
-    //       On the plugin side this is not necessary, but it also doesn't hurt
-    for (int i = 0; i < reconstructed_process_data.numOutputs; i++) {
-        outputs[i].silence_flags =
-            reconstructed_process_data.outputs[i].silenceFlags;
-    }
-
     // NOTE: We return an object that only contains references to these original
     //       fields to avoid any copies or moves
     return response_object;
 }
 
 void YaProcessData::write_back_outputs(
-    Steinberg::Vst::ProcessData& process_data) {
+    Steinberg::Vst::ProcessData& process_data,
+    const AudioShmBuffer& shared_audio_buffers) {
     assert(static_cast<int32>(outputs.size()) == process_data.numOutputs);
-    for (int i = 0; i < process_data.numOutputs; i++) {
-        outputs[i].write_back_outputs(process_data.outputs[i]);
+    for (int bus = 0; bus < process_data.numOutputs; bus++) {
+        process_data.outputs[bus].silenceFlags = outputs[bus].silenceFlags;
+
+        // NOTE: Some hosts, like Carla, provide more output channels than what
+        //       the plugin wants. We'll have already capped
+        //       `outputs[bus].numChannels` to the number of channels requested
+        //       by the plugin during `YaProcessData::repopulate()`.
+        for (int channel = 0; channel < outputs[bus].numChannels; channel++) {
+            // We copy the output audio for every bus from the shared memory
+            // object back to the buffer provided by the host
+            if (process_data.symbolicSampleSize == Steinberg::Vst::kSample64) {
+                std::copy_n(
+                    shared_audio_buffers.output_channel_ptr<double>(bus,
+                                                                    channel),
+                    process_data.numSamples,
+                    process_data.outputs[bus].channelBuffers64[channel]);
+            } else {
+                std::copy_n(
+                    shared_audio_buffers.output_channel_ptr<float>(bus,
+                                                                   channel),
+                    process_data.numSamples,
+                    process_data.outputs[bus].channelBuffers32[channel]);
+            }
+        }
     }
 
     if (output_parameter_changes && process_data.outputParameterChanges) {
diff --git a/src/common/serialization/vst3/process-data.h b/src/common/serialization/vst3/process-data.h
index b0ddc54c..524fbb12 100644
--- a/src/common/serialization/vst3/process-data.h
+++ b/src/common/serialization/vst3/process-data.h
@@ -20,6 +20,7 @@
 
 #include <pluginterfaces/vst/ivstaudioprocessor.h>
 
+#include "../../audio-shm.h"
 #include "../../bitsery/ext/in-place-optional.h"
 #include "../../bitsery/ext/in-place-variant.h"
 #include "base.h"
@@ -28,114 +29,18 @@
 
 // This header provides serialization wrappers around `ProcessData`
 
-/**
- * A serializable wrapper around `AudioBusBuffers` back by `std::vector<T>`s.
- * Data can be read from a `AudioBusBuffers` object provided by the host, and
- * one the Wine plugin host side we can reconstruct the `AudioBusBuffers` object
- * back from this object again.
- *
- * @see YaProcessData
- */
-class alignas(16) YaAudioBusBuffers {
-   public:
-    /**
-     * We only provide a default constructor here, because we need to fill the
-     * existing object with new audio data every processing cycle to avoid
-     * reallocating a new object every time.
-     */
-    YaAudioBusBuffers() noexcept;
-
-    /**
-     * Create a new, zero initialize audio bus buffers object. Used to
-     * reconstruct the output buffers during `YaProcessData::reconstruct()`.
-     */
-    void clear(int32 sample_size, size_t num_samples, size_t num_channels);
-
-    /**
-     * Copy data from a host provided `AudioBusBuffers` object during a process
-     * call. Used in `YaProcessData::repopulate()`. Since `AudioBusBuffers`
-     * contains an untagged union for storing single and double precision
-     * floating point values, the original `ProcessData`'s `symbolicSampleSize`
-     * field determines which variant of that union to use. Similarly the
-     * `ProcessData`' `numSamples` field determines the extent of these arrays.
-     */
-    void repopulate(int32 sample_size,
-                    int32 num_samples,
-                    const Steinberg::Vst::AudioBusBuffers& data);
-
-    /**
-     * Reconstruct the original `AudioBusBuffers` object passed to the
-     * constructor and return it. This is used as part of
-     * `YaProcessData::reconstruct()`. The object contains pointers to
-     * `buffers`, so it may not outlive this object.
-     *
-     * NOTE: The `silenceFlags` field is of course not a reference, so writing
-     *       to that will not modify `silence_flags`.
-     */
-    void reconstruct(Steinberg::Vst::AudioBusBuffers& reconstructed_buffers);
-
-    /**
-     * Return the number of channels in `buffers`. Only used for debug logs.
-     */
-    size_t num_channels() const;
-
-    /**
-     * Write these buffers and the silence flag back to an `AudioBusBuffers
-     * object provided by the host.
-     */
-    void write_back_outputs(
-        Steinberg::Vst::AudioBusBuffers& output_buffers) const;
-
-    template <typename S>
-    void serialize(S& s) {
-        s.value8b(silence_flags);
-        s.ext(buffers, bitsery::ext::InPlaceVariant{
-                           [](S& s, std::vector<std::vector<float>>& buffers) {
-                               s.container(buffers, max_num_speakers,
-                                           [](S& s, auto& channel) {
-                                               s.container4b(channel, 1 << 16);
-                                           });
-                           },
-                           [](S& s, std::vector<std::vector<double>>& buffers) {
-                               s.container(buffers, max_num_speakers,
-                                           [](S& s, auto& channel) {
-                                               s.container8b(channel, 1 << 16);
-                                           });
-                           },
-                       });
-    }
-
-    /**
-     * A bitfield for silent channels copied directly from the input struct.
-     *
-     * We could have done some optimizations to avoid unnecessary copying when
-     * these silence flags are set, but since it's an optional feature we
-     * shouldn't risk it.
-     */
-    uint64 silence_flags = 0;
-
-   private:
-    /**
-     * We need these during the reconstruction process to provide a pointer to
-     * an array of pointers to the actual buffers.
-     */
-    std::vector<void*> buffer_pointers;
-
-    /**
-     * The original implementation uses heap arrays and it stores a
-     * {float,double} array pointer per channel, with a separate field for the
-     * number of channels. We'll store this using a vector of vectors.
-     */
-    std::variant<std::vector<std::vector<float>>,
-                 std::vector<std::vector<double>>>
-        buffers;
-};
-
 /**
  * A serializable wrapper around `ProcessData`. We'll read all information from
  * the host so we can serialize it and provide an equivalent `ProcessData`
- * struct to the plugin. Then we can create a `YaProcessData::Response` object
- * that contains all output values so we can write those back to the host.
+ * struct to the Windows VST3 plugin. Then we can create a
+ * `YaProcessData::Response` object that contains all output values so we can
+ * write those back to the host.
+ *
+ * As an optimization, this no longer stores any actual audio. Instead, both
+ * `Vst3PluginProxyImpl` and `Vst3Bridge::InstanceInterfaces` contain a shared
+ * memory object that stores the audio buffers used for the plugin instance.
+ * This object is then sent alongside it with auxiliary information. This
+ * prevents a lot of unnecessary copies.
  *
  * Be sure to double check how `YaProcessData::Response` is used. We do some
  * pointer tricks there to avoid copies and moves when serializing the results
@@ -157,21 +62,45 @@ class YaProcessData {
      * original `ProcessData` object. This will avoid allocating unless it's
      * absolutely necessary (e.g. when we receive more parameter changes than
      * we've received in previous calls).
+     *
+     * During this process the input audio will be written to
+     * `shared_audio_buffers`. There's no direct link between this
+     * `YaProcessData` object and those buffers, but they should be used as a
+     * pair. This is a bit ugly, but optimizations sadly never made code
+     * prettier.
      */
-    void repopulate(const Steinberg::Vst::ProcessData& process_data);
+    void repopulate(const Steinberg::Vst::ProcessData& process_data,
+                    AudioShmBuffer& shared_audio_buffers);
 
     /**
      * Reconstruct the original `ProcessData` object passed to the constructor
      * and return it. This is used in the Wine plugin host when processing an
      * `IAudioProcessor::process()` call.
+     *
+     * Because the actual audio is stored in an `AudioShmBuffer` outside of this
+     * object, we need to make sure that the `AudioBusBuffers` objects we're
+     * using point to the correct buffer even after a resize. To make it more
+     * difficult for us to mess this up, we'll store those bus-channel pointers
+     * in `Vst3Bridge::InstanceInterfaces` and we'll point the pointers in our
+     * `inputs` and `outputs` fields directly to those pointers. They will have
+     * been set up during `IAudioProcessor::setupProcessing()`.
+     *
+     * These can be either float or double pointers. Since a pointer is a
+     * pointer and they're stored using a union the actual type doesn't matter,
+     * but we'll accept these as void pointers since the stride will be
+     * different depending on whether the host is going to be sending double or
+     * single precision audio.
      */
-    Steinberg::Vst::ProcessData& reconstruct();
+    Steinberg::Vst::ProcessData& reconstruct(
+        std::vector<std::vector<void*>>& input_pointers,
+        std::vector<std::vector<void*>>& output_pointers);
 
     /**
      * A serializable wrapper around the output fields of `ProcessData`, so we
      * only have to copy the information back that's actually important. These
      * fields are pointers to the corresponding fields in `YaProcessData`. On
      * the plugin side this information can then be written back to the host.
+     * The actual output audio is stored in the shared memory object.
      *
      * HACK: All of this is an optimization to avoid unnecessarily copying or
      *       moving and reallocating. Directly serializing and deserializing
@@ -183,7 +112,8 @@ class YaProcessData {
     struct Response {
         // We store raw pointers instead of references so we can default
         // initialize this object during deserialization
-        std::vector<YaAudioBusBuffers>* outputs = nullptr;
+        boost::container::small_vector_base<Steinberg::Vst::AudioBusBuffers>*
+            outputs = nullptr;
         std::optional<YaParameterChanges>* output_parameter_changes = nullptr;
         std::optional<YaEventList>* output_events = nullptr;
 
@@ -218,20 +148,31 @@ class YaProcessData {
 
     /**
      * Write all of this output data back to the host's `ProcessData` object.
+     * During this process we'll also write the output audio from the
+     * corresponding shared memory audio buffers back.
      */
-    void write_back_outputs(Steinberg::Vst::ProcessData& process_data);
+    void write_back_outputs(Steinberg::Vst::ProcessData& process_data,
+                            const AudioShmBuffer& shared_audio_buffers);
 
     template <typename S>
     void serialize(S& s) {
         s.value4b(process_mode);
         s.value4b(symbolic_sample_size);
         s.value4b(num_samples);
+
+        // Both of these fields only store metadata. The actual audio is sent
+        // using an accompanying `AudioShmBuffer` object.
         s.container(inputs, max_num_speakers);
-        s.container4b(outputs_num_channels, max_num_speakers);
+        s.container(outputs, max_num_speakers);
+
+        // The output parameter changes and events will remain empty on the
+        // plugin side, so by serializing them we merely indicate to the Wine
+        // plugin host whether the host supports them or not
         s.object(input_parameter_changes);
-        s.value1b(output_parameter_changes_supported);
+        s.ext(output_parameter_changes, bitsery::ext::InPlaceOptional{});
         s.ext(input_events, bitsery::ext::InPlaceOptional{});
-        s.value1b(output_events_supported);
+        s.ext(output_events, bitsery::ext::InPlaceOptional{});
+
         s.ext(process_context, bitsery::ext::InPlaceOptional{});
 
         // We of course won't serialize the `reconstructed_process_data` and all
@@ -260,18 +201,20 @@ class YaProcessData {
     int32 num_samples;
 
     /**
-     * In `ProcessData` they use C-style heap arrays, so they have to store the
-     * number of input/output busses, and then also store pointers to the first
-     * audio buffer object. We can combine these two into vectors.
+     * This contains metadata about the input buffers for every bus. During
+     * `reconstruct()` the channel pointers contained within these objects will
+     * be set to point to our shared memory surface that holds the actual audio
+     * data.
      */
-    std::vector<YaAudioBusBuffers> inputs;
+    boost::container::small_vector<Steinberg::Vst::AudioBusBuffers, 8> inputs;
 
     /**
-     * For the outputs we only have to keep track of how many output channels
-     * each bus has. From this and from `num_samples` we can reconstruct the
-     * output buffers on the Wine side of the process call.
+     * This contains metadata about the output buffers for every bus. During
+     * `reconstruct()` the channel pointers contained within these objects will
+     * be set to point to our shared memory surface that holds the actual audio
+     * data.
      */
-    std::vector<int32> outputs_num_channels;
+    boost::container::small_vector<Steinberg::Vst::AudioBusBuffers, 8> outputs;
 
     /**
      * Incoming parameter changes.
@@ -279,10 +222,10 @@ class YaProcessData {
     YaParameterChanges input_parameter_changes;
 
     /**
-     * Whether the host supports output parameter changes (depending on whether
-     * `outputParameterChanges` was a null pointer or not).
+     * If the host supports it, this will allow the plugin to output parameter
+     * changes. Otherwise we'll also pass a null pointer to the plugin.
      */
-    bool output_parameter_changes_supported;
+    std::optional<YaParameterChanges> output_parameter_changes;
 
     /**
      * Incoming events.
@@ -290,10 +233,11 @@ class YaProcessData {
     std::optional<YaEventList> input_events;
 
     /**
-     * Whether the host supports output events (depending on whether
-     * `outputEvents` was a null pointer or not).
+     * If the host supports it, this will allow the plugin to output events,
+     * such as note events. Otherwise we'll also pass a null pointer to the
+     * plugin.
      */
-    bool output_events_supported;
+    std::optional<YaEventList> output_events;
 
     /**
      * Some more information about the project and transport.
@@ -301,29 +245,6 @@ class YaProcessData {
     std::optional<Steinberg::Vst::ProcessContext> process_context;
 
    private:
-    // These are the same fields as in `YaProcessData::Response`. We'll generate
-    // these as part of creating `reconstructed_process_data`, and they will be
-    // referred to in the response object created in `create_response()`
-
-    /**
-     * The outputs. Will be created based on `outputs_num_channels` (which
-     * determines how many output busses there are and how many channels each
-     * bus has) and `num_samples`.
-     */
-    std::vector<YaAudioBusBuffers> outputs;
-
-    /**
-     * The output parameter changes. Will be initialized depending on
-     * `output_parameter_changes_supported`.
-     */
-    std::optional<YaParameterChanges> output_parameter_changes;
-
-    /**
-     * The output events. Will be initialized depending on
-     * `output_events_supported`.
-     */
-    std::optional<YaEventList> output_events;
-
     // These last few members are used on the Wine plugin host side to
     // reconstruct the original `ProcessData` object. Here we also initialize
     // these `output*` fields so the Windows VST3 plugin can write to them
@@ -341,23 +262,6 @@ class YaProcessData {
      */
     Response response_object;
 
-    /**
-     * Obtained by calling `.get()` on every `YaAudioBusBuffers` object in
-     * `intputs`. These objects contain pointers to the data in `inputs` and may
-     * thus not outlive them.
-     */
-    std::vector<Steinberg::Vst::AudioBusBuffers> inputs_audio_bus_buffers;
-
-    /**
-     * Obtained by calling `.get()` on every `YaAudioBusBuffers` object in
-     * `outputs`. These objects contain pointers to the data in `outputs` and
-     * may thus not outlive them. These are created in a two step process, since
-     * we first have to create `outputs` from `outputs_num_channels` before we
-     * can transform it into a structure the Windows VST3 plugin can work with.
-     * Hooray for heap arrays.
-     */
-    std::vector<Steinberg::Vst::AudioBusBuffers> outputs_audio_bus_buffers;
-
     /**
      * The process data we reconstruct from the other fields during `get()`.
      */
@@ -366,6 +270,14 @@ class YaProcessData {
 
 namespace Steinberg {
 namespace Vst {
+template <typename S>
+void serialize(S& s, Steinberg::Vst::AudioBusBuffers& buffers) {
+    // We don't don't touch the audio pointers. Those should point to the
+    // correct positions in the corresponding `AudioShmBuffer` object.
+    s.value4b(buffers.numChannels);
+    s.value8b(buffers.silenceFlags);
+}
+
 template <typename S>
 void serialize(S& s, Steinberg::Vst::ProcessContext& process_context) {
     // The docs don't mention that things ever got added to this context (and
diff --git a/src/plugin/bridges/vst3-impls/plugin-proxy.cpp b/src/plugin/bridges/vst3-impls/plugin-proxy.cpp
index 6fd8f000..29cb176a 100644
--- a/src/plugin/bridges/vst3-impls/plugin-proxy.cpp
+++ b/src/plugin/bridges/vst3-impls/plugin-proxy.cpp
@@ -182,11 +182,19 @@ uint32 PLUGIN_API Vst3PluginProxyImpl::getLatencySamples() {
 
 tresult PLUGIN_API
 Vst3PluginProxyImpl::setupProcessing(Steinberg::Vst::ProcessSetup& setup) {
-    // TOOD: Set up the shared audio buffers next
-    return bridge
-        .send_audio_processor_message(YaAudioProcessor::SetupProcessing{
-            .instance_id = instance_id(), .setup = setup})
-        .result;
+    const YaAudioProcessor::SetupProcessingResponse response =
+        bridge.send_audio_processor_message(YaAudioProcessor::SetupProcessing{
+            .instance_id = instance_id(), .setup = setup});
+
+    // We have now set up the shared audio buffers on the Wine side, and we'll
+    // be able to able to connect to them by using the same audio configuration
+    if (!process_buffers) {
+        process_buffers.emplace(response.audio_buffers_config);
+    } else {
+        process_buffers->resize(response.audio_buffers_config);
+    }
+
+    return response.result;
 }
 
 tresult PLUGIN_API Vst3PluginProxyImpl::setProcessing(TBool state) {
@@ -220,9 +228,12 @@ Vst3PluginProxyImpl::process(Steinberg::Vst::ProcessData& data) {
         last_audio_thread_priority_synchronization = now;
     }
 
-    // We reuse this existing object to avoid allocations
+    // We reuse this existing object to avoid allocations.
+    // `YaProcessData::repopulate()` will write the input audio to the shared
+    // audio buffers, so they're not stored within the request object itself.
+    assert(process_buffers);
     process_request.instance_id = instance_id();
-    process_request.data.repopulate(data);
+    process_request.data.repopulate(data, *process_buffers);
     process_request.new_realtime_priority = new_realtime_priority;
 
     // HACK: This is a bit ugly. This `YaProcessData::Response` object actually
@@ -245,7 +256,11 @@ Vst3PluginProxyImpl::process(Steinberg::Vst::ProcessData& data) {
         MessageReference<YaAudioProcessor::Process>(process_request),
         process_response);
 
-    process_request.data.write_back_outputs(data);
+    // At this point the shared audio buffers should contain the output audio,
+    // so we'll write that back to the host along with any metadata (which in
+    // practice are only the silence flags), as well as any output parameter
+    // changes and events
+    process_request.data.write_back_outputs(data, *process_buffers);
 
     return process_response.result;
 }
diff --git a/src/plugin/bridges/vst3-impls/plugin-proxy.h b/src/plugin/bridges/vst3-impls/plugin-proxy.h
index dd56c612..7b7118e3 100644
--- a/src/plugin/bridges/vst3-impls/plugin-proxy.h
+++ b/src/plugin/bridges/vst3-impls/plugin-proxy.h
@@ -438,14 +438,16 @@ class Vst3PluginProxyImpl : public Vst3PluginProxy {
     std::atomic_size_t current_context_menu_id;
 
     /**
-     * NOTE: We'll reuse the request objects for the audio processor so we can
-     *       keep the process data object (which contains vectors and other heap
-     *       allocated data structure) alive. We'll then just fill this object
-     *       with new data every processing cycle to prevent allocations. Then,
-     *       we pass a `MessageReference<YaAudioProcessor::Process>` to our
-     *       sockets. This together with `bitisery::ext::MessageReference` will
-     *       let us serialize from and to existing objects without having to
-     *       copy or reallocate them.
+     * We'll reuse the request objects for the audio processor so we can keep
+     * the process data object (which contains vectors and other heap allocated
+     * data structure) alive. We'll then just fill this object with new data
+     * every processing cycle to prevent allocations. Then, we pass a
+     * `MessageReference<YaAudioProcessor::Process>` to our sockets. This
+     * together with `bitisery::ext::MessageReference` will let us serialize
+     * from and to existing objects without having to copy or reallocate them.
+     *
+     * To reduce the amount of copying during audio processing we'll write the
+     * audio data to a shared memory object stored in `process_buffers` first.
      */
     YaAudioProcessor::Process process_request;
 
@@ -456,6 +458,17 @@ class Vst3PluginProxyImpl : public Vst3PluginProxy {
      */
     YaAudioProcessor::ProcessResponse process_response;
 
+    /**
+     * A shared memory object to share audio buffers between the native plugin
+     * and the Wine plugin host. Copying audio is the most significant source of
+     * bridging overhead during audio processing, and this way we can reduce the
+     * amount of copies required to only once for the input audio, and one more
+     * copy when copying the results back to the host.
+     *
+     * This will be set up during `IAudioProcessor::setupProcessing()`.
+     */
+    std::optional<AudioShmBuffer> process_buffers;
+
     // Caches
 
     /**
diff --git a/src/wine-host/bridges/vst3.cpp b/src/wine-host/bridges/vst3.cpp
index e7fbb062..a4e48ea5 100644
--- a/src/wine-host/bridges/vst3.cpp
+++ b/src/wine-host/bridges/vst3.cpp
@@ -1149,6 +1149,127 @@ size_t Vst3Bridge::generate_instance_id() noexcept {
     return current_instance_id.fetch_add(1);
 }
 
+AudioShmBuffer::Config Vst3Bridge::setup_shared_audio_buffers(
+    size_t instance_id,
+    const Steinberg::Vst::ProcessSetup& setup) {
+    const Steinberg::IPtr<Steinberg::Vst::IComponent> component =
+        object_instances[instance_id].component;
+    const Steinberg::IPtr<Steinberg::Vst::IAudioProcessor> audio_processor =
+        object_instances[instance_id].audio_processor;
+    assert(component && audio_processor);
+
+    // We'll query the plugin for its audio bus layouts, and then create
+    // calculate the offsets in a large memory buffer for the different audio
+    // channels. The offsets for each audio channel are in samples (since
+    // they'll be used with pointer arithmetic in `AudioShmBuffer`).
+    uint32_t current_offset = 0;
+
+    auto create_bus_offsets = [&](Steinberg::Vst::BusDirection direction) {
+        const auto num_busses =
+            component->getBusCount(Steinberg::Vst::kAudio, direction);
+
+        std::vector<std::vector<uint32_t>> bus_offsets(num_busses);
+        for (int bus = 0; bus < num_busses; bus++) {
+            Steinberg::Vst::SpeakerArrangement speaker_arrangement{};
+            audio_processor->getBusArrangement(direction, bus,
+                                               speaker_arrangement);
+
+            const size_t num_channels =
+                std::bitset<sizeof(Steinberg::Vst::SpeakerArrangement)>(
+                    speaker_arrangement)
+                    .count();
+            bus_offsets[bus].resize(num_channels);
+
+            for (size_t channel = 0; channel < num_channels; channel++) {
+                bus_offsets[bus][channel] = current_offset;
+                current_offset += setup.maxSamplesPerBlock;
+            }
+        }
+
+        return bus_offsets;
+    };
+
+    // Creating the audio buffer offsets for every channel in every bus will
+    // advacne `current_offset` to keep pointing to the starting position for
+    // the next channel
+    std::vector<std::vector<uint32_t>> input_bus_offsets =
+        create_bus_offsets(Steinberg::Vst::kInput);
+    std::vector<std::vector<uint32_t>> output_bus_offsets =
+        create_bus_offsets(Steinberg::Vst::kOutput);
+
+    // The size of the buffer is in bytes, and it will depend on whether the
+    // host is going to pass 32-bit or 64-bit audio to the plugin
+    const bool double_precision =
+        setup.symbolicSampleSize == Steinberg::Vst::kSample64;
+    const uint32_t buffer_size =
+        current_offset * (double_precision ? sizeof(double) : sizeof(float));
+
+    // We'll set up these shared memory buffers on the Wine side first, and then
+    // when this request returns we'll do the same thing on the native plugin
+    // side
+    AudioShmBuffer::Config buffer_config{
+        .name = sockets.base_dir.filename().string() + "-" +
+                std::to_string(instance_id),
+        .size = buffer_size,
+        .input_offsets = std::move(input_bus_offsets),
+        .output_offsets = std::move(output_bus_offsets)};
+
+    std::optional<AudioShmBuffer>& process_buffers =
+        object_instances[instance_id].process_buffers;
+    if (!process_buffers) {
+        process_buffers.emplace(buffer_config);
+    } else {
+        process_buffers->resize(buffer_config);
+    }
+
+    // After setting up the shared memory buffer, we need to create a vector of
+    // channel audio pointers for every bus. These will then be assigned to the
+    // `AudioBusBuffers` objects in the `ProcessData` struct in
+    // `YaProcessData::reconstruct()` before passing the reconstructed process
+    // data to `IAudioProcessor::process()`.
+    auto set_bus_pointers =
+        [&]<std::invocable<uint32_t, uint32_t> F>(
+            std::vector<std::vector<void*>>& bus_pointers,
+            const std::vector<std::vector<uint32_t>>& bus_offsets,
+            F&& get_channel_pointer) {
+            bus_pointers.resize(bus_offsets.size());
+
+            for (size_t bus = 0; bus < bus_offsets.size(); bus++) {
+                bus_pointers[bus].resize(bus_offsets[bus].size());
+
+                for (size_t channel = 0; channel < bus_offsets[bus].size();
+                     channel++) {
+                    bus_pointers[bus][channel] =
+                        get_channel_pointer(bus, channel);
+                }
+            }
+        };
+
+    set_bus_pointers(
+        object_instances[instance_id].process_buffers_input_pointers,
+        process_buffers->config.input_offsets,
+        [&](uint32_t bus, uint32_t channel) -> void* {
+            if (double_precision) {
+                return process_buffers->input_channel_ptr<double>(bus, channel);
+            } else {
+                return process_buffers->input_channel_ptr<float>(bus, channel);
+            }
+        });
+    set_bus_pointers(
+        object_instances[instance_id].process_buffers_output_pointers,
+        process_buffers->config.output_offsets,
+        [&](uint32_t bus, uint32_t channel) -> void* {
+            if (double_precision) {
+                return process_buffers->output_channel_ptr<double>(bus,
+                                                                   channel);
+            } else {
+                return process_buffers->output_channel_ptr<float>(bus, channel);
+            }
+        });
+
+    return buffer_config;
+}
+
 size_t Vst3Bridge::register_object_instance(
     Steinberg::IPtr<Steinberg::FUnknown> object) {
     std::lock_guard lock(object_instances_mutex);
@@ -1223,11 +1344,20 @@ size_t Vst3Bridge::register_object_instance(
                             object_instances[request.instance_id]
                                 .audio_processor->setupProcessing(
                                     request.setup);
+
+                        // We'll set up the shared audio buffers on the Wine
+                        // side after the plugin has finished doing their setup.
+                        // This configuration can then be used on the native
+                        // plugin side to connect to the same shared audio
+                        // buffers.
+                        const AudioShmBuffer::Config audio_buffers_config =
+                            setup_shared_audio_buffers(request.instance_id,
+                                                       request.setup);
+
                         return YaAudioProcessor::SetupProcessingResponse{
                             .result = result,
-                            // TODO: Send the configuration for the shared audio
-                            //       buffers
-                            .audio_buffers_config{}};
+                            .audio_buffers_config =
+                                std::move(audio_buffers_config)};
                     },
                     [&](const YaAudioProcessor::SetProcessing& request)
                         -> YaAudioProcessor::SetProcessing::Response {
@@ -1260,10 +1390,17 @@ size_t Vst3Bridge::register_object_instance(
                                 true, *request.new_realtime_priority);
                         }
 
+                        // The actual audio is stored in the shared memory
+                        // buffers, so the reconstruction function will need to
+                        // know where it should point the `AudioBusBuffers` to
                         const tresult result =
                             object_instances[request.instance_id]
                                 .audio_processor->process(
-                                    request.data.reconstruct());
+                                    request.data.reconstruct(
+                                        object_instances[request.instance_id]
+                                            .process_buffers_input_pointers,
+                                        object_instances[request.instance_id]
+                                            .process_buffers_output_pointers));
 
                         return YaAudioProcessor::ProcessResponse{
                             .result = result,
diff --git a/src/wine-host/bridges/vst3.h b/src/wine-host/bridges/vst3.h
index a7aa90bc..94f920f3 100644
--- a/src/wine-host/bridges/vst3.h
+++ b/src/wine-host/bridges/vst3.h
@@ -139,6 +139,34 @@ struct InstanceInterfaces {
      */
     std::optional<InstancePlugView> plug_view_instance;
 
+    /**
+     * A shared memory object we'll write the input audio buffers to on the
+     * native plugin side. We'll then let the plugin write its outputs here on
+     * the Wine side. The buffer will be configured during
+     * `IAudioProcessor::setupProcessing()`. At that point we'll build the
+     * configuration for the object here, on the Wine side, and then we'll
+     * initialize the buffers using that configuration. This same configuration
+     * is then used on the native plugin side to connect to this same shared
+     * memory object for the matching plugin instance.
+     */
+    std::optional<AudioShmBuffer> process_buffers;
+
+    /**
+     * Pointers to the per-bus input channels in process_buffers so we can pass
+     * them to the plugin after a call to `YaProcessData::reconstruct()`. These
+     * can be either `float*` or `double*`, so we sadly have to use void
+     * pointers here.
+     */
+    std::vector<std::vector<void*>> process_buffers_input_pointers;
+
+    /**
+     * Pointers to the per-bus output channels in process_buffers so we can pass
+     * them to the plugin after a call to `YaProcessData::reconstruct()`. These
+     * can be either `float*` or `double*`, so we sadly have to use void
+     * pointers here.
+     */
+    std::vector<std::vector<void*>> process_buffers_output_pointers;
+
     /**
      * This instance's editor, if it has an open editor. Embedding here works
      * exactly the same as how it works for VST2 plugins.
@@ -348,6 +376,15 @@ class Vst3Bridge : public HostBridge {
      */
     size_t generate_instance_id() noexcept;
 
+    /**
+     * Sets up the shared memory audio buffers for a plugin instance plugin
+     * instance and return the configuration so the native plugin can connect to
+     * it as well.
+     */
+    AudioShmBuffer::Config setup_shared_audio_buffers(
+        size_t instance_id,
+        const Steinberg::Vst::ProcessSetup& setup);
+
     /**
      * Assign a unique identifier to an object and add it to `object_instances`.
      * This will also set up listeners for `IAudioProcessor` and `IComponent`