💥 Reduce allocations in VST3 audio sockets

We do this by using this new `MessageReference<T>` type to avoid copying our `YaAudioProcessor::Process` struct and the contained `YaProcessData` object. This is only part of the work, but this redesign lets us keep the these objects alive on both the plugin and the host side. On the plugin side, we'll simply serialize the data from the referred to object without copying it. On the Wine side, we'll write the data to a persistent thread local object, and then reassign the `MessageReference<T>` to point to that object. This lets us serialize 'references', thus avoiding potentially expensive allocations. With these last few changes alone VST3 plugins are already at the same performance level as our optimized VST2 plugin groups.
2026-05-07 12:10:09 +02:00 · 2021-05-07 16:32:08 +02:00
parent d08ec70f2c
commit fcaac219a6
9 changed files with 159 additions and 38 deletions
@@ -19,6 +19,12 @@ Versioning](https://semver.org/spec/v2.0.0.html).
  this can significantly reduce the overhead of bridging VST3 plugins under
  those hosts.

+### Changed
+
+- Redesigned the VST3 audio socket handling to be able to reuse the process data
+  objects on both sides. This greatly reduces the overhead of our VST3 bridging
+  by getting rid of all memory allocations during audio processing.
+
 ## [3.2.0] - 2021-05-03

 ### Added
@@ -237,7 +237,12 @@ class Vst3MessageHandler : public AdHocSocketHandler<Thread> {
                            auto [logger, is_host_vst] = *logging;
                            return logger.log_request(is_host_vst, object);
                        },
-                        request);
+                        // In the case of `AudioProcessorRequest`, we need to
+                        // actually fetch the variant field since our object
+                        // also contains a persistent object to store process
+                        // data into so we can prevent allocations during audio
+                        // processing
+                        get_request_variant(request));
                }

                // We do the visiting here using a templated lambda. This way we
@@ -258,7 +263,8 @@ class Vst3MessageHandler : public AdHocSocketHandler<Thread> {
                            write_object(socket, response);
                        }
                    },
-                    request);
+                    // See above
+                    get_request_variant(request));
            };

        this->receive_multi(logging
@@ -442,6 +448,20 @@ class Vst3Sockets : public Sockets {
                          audio_processor_buffers.at(object.instance_id));
    }

+    /**
+     * Overload for use with `MessageReference<T>`, since we cannot
+     * directly get the instance ID there.
+     */
+    template <typename T>
+    typename T::Response send_audio_processor_message(
+        const MessageReference<T>& object_ref,
+        std::optional<std::pair<Vst3Logger&, bool>> logging) {
+        return audio_processor_sockets.at(object_ref.get().instance_id)
+            .send_message(
+                object_ref, logging,
+                audio_processor_buffers.at(object_ref.get().instance_id));
+    }
+
    /**
     * For sending messages from the host to the plugin. After we have a better
     * idea of what our communication model looks like we'll probably want to
@@ -992,13 +992,15 @@ bool Vst3Logger::log_request(bool is_host_vst,
    });
 }

-bool Vst3Logger::log_request(bool is_host_vst,
-                             const YaAudioProcessor::Process& request) {
+bool Vst3Logger::log_request(
+    bool is_host_vst,
+    const MessageReference<YaAudioProcessor::Process>& request_wrapper) {
    return log_request_base(
        is_host_vst, Logger::Verbosity::all_events, [&](auto& message) {
            // This is incredibly verbose, but if you're really a plugin that
            // handles processing in a weird way you're going to need all of
            // this
+            const YaAudioProcessor::Process& request = request_wrapper.get();

            std::ostringstream num_input_channels;
            num_input_channels << "[";
@@ -191,7 +191,8 @@ class Vst3Logger {
    bool log_request(bool is_host_vst,
                     const YaAudioProcessor::SetupProcessing&);
    bool log_request(bool is_host_vst, const YaAudioProcessor::SetProcessing&);
-    bool log_request(bool is_host_vst, const YaAudioProcessor::Process&);
+    bool log_request(bool is_host_vst,
+                     const MessageReference<YaAudioProcessor::Process>&);
    bool log_request(bool is_host_vst, const YaAudioProcessor::GetTailSamples&);
    bool log_request(bool is_host_vst,
                     const YaComponent::GetControllerClassId&);
@@ -20,6 +20,7 @@

 #include <bitsery/ext/std_variant.h>

+#include "../bitsery/ext/message-reference.h"
 #include "../configuration.h"
 #include "../utils.h"
 #include "common.h"
@@ -153,31 +154,78 @@ void serialize(S& s, ControlRequest& payload) {
 * A subset of all functions a host can call on a plugin. These functions are
 * called from a hot loop every processing cycle, so we want a dedicated socket
 * for these for every plugin instance.
+ *
+ * We use a separate struct for this so we can keep the
+ * `YaAudioProcessor::Process` object, which also contains the entire audio
+ * processing data struct, alive as a thread local static object on the Wine
+ * side, and as a regular field in `Vst3PluginProxyImpl` on the plugin side. In
+ * our variant we then store a `MessageReference<T>` that points to this object,
+ * and we'll do some magic to be able to serialize and deserialize this object
+ * without needing to create copies. See `MessageReference<T>` and
+ * `bitsery::ext::MessageReference<T>` for more information.
 */
-using AudioProcessorRequest =
-    std::variant<YaAudioProcessor::SetBusArrangements,
-                 YaAudioProcessor::GetBusArrangement,
-                 YaAudioProcessor::CanProcessSampleSize,
-                 YaAudioProcessor::GetLatencySamples,
-                 YaAudioProcessor::SetupProcessing,
-                 YaAudioProcessor::SetProcessing,
-                 YaAudioProcessor::Process,
-                 YaAudioProcessor::GetTailSamples,
-                 YaComponent::GetControllerClassId,
-                 YaComponent::SetIoMode,
-                 YaComponent::GetBusCount,
-                 YaComponent::GetBusInfo,
-                 YaComponent::GetRoutingInfo,
-                 YaComponent::ActivateBus,
-                 YaComponent::SetActive,
-                 YaPrefetchableSupport::GetPrefetchableSupport>;
+struct AudioProcessorRequest {
+    AudioProcessorRequest() {}

-template <typename S>
-void serialize(S& s, AudioProcessorRequest& payload) {
-    // All of the objects in `AudioProcessorRequest` should have their own
-    // serialization function.
-    s.ext(payload, bitsery::ext::StdVariant{});
-}
+    /**
+     * Initialize the variant with an object. In `Vst3Sockets::send_message()`
+     * the object gets implicitly converted to the this variant.
+     */
+    template <typename T>
+    AudioProcessorRequest(T request) : payload(std::move(request)) {}
+
+    using Payload =
+        std::variant<YaAudioProcessor::SetBusArrangements,
+                     YaAudioProcessor::GetBusArrangement,
+                     YaAudioProcessor::CanProcessSampleSize,
+                     YaAudioProcessor::GetLatencySamples,
+                     YaAudioProcessor::SetupProcessing,
+                     YaAudioProcessor::SetProcessing,
+                     // The actual value for this will be stored in the
+                     // `process_request` field. That way we don't have to
+                     // destroy the object (and deallocate all vectors in it) on
+                     // the Wine side during every processing cycle.
+                     MessageReference<YaAudioProcessor::Process>,
+                     YaAudioProcessor::GetTailSamples,
+                     YaComponent::GetControllerClassId,
+                     YaComponent::SetIoMode,
+                     YaComponent::GetBusCount,
+                     YaComponent::GetBusInfo,
+                     YaComponent::GetRoutingInfo,
+                     YaComponent::ActivateBus,
+                     YaComponent::SetActive,
+                     YaPrefetchableSupport::GetPrefetchableSupport>;
+
+    Payload payload;
+
+    template <typename S>
+    void serialize(S& s) {
+        s.ext(
+            payload,
+            bitsery::ext::StdVariant{
+                [&](S& s,
+                    MessageReference<YaAudioProcessor::Process>& request_ref) {
+                    // When serializing this reference we'll read the data
+                    // directly from the referred to object. During
+                    // deserializing we'll deserialize into the persistent and
+                    // thread local `process_request` object (see
+                    // `Vst3Sockets::add_audio_processor_and_listen`) and then
+                    // reassign the reference to point to that boject.
+                    s.ext(request_ref,
+                          bitsery::ext::MessageReference(process_request));
+                },
+                [](S& s, auto& request) { s.object(request); }});
+    }
+
+    /**
+     * Used for deserializing the `MessageReference<YaAudioProcessor::Process>`
+     * variant. When we encounter this variant, we'll actually deserialize the
+     * object into this object, and we'll then reassign the reference to point
+     * to this object. That way we can keep it around as a thread local object
+     * to prevent unnecessary allocations.
+     */
+    std::optional<YaAudioProcessor::Process> process_request;
+};

 /**
 * When we do a callback from the Wine VST host to the plugin, this encodes the
@@ -222,3 +270,26 @@ void serialize(S& s, CallbackRequest& payload) {
    // serialization function.
    s.ext(payload, bitsery::ext::StdVariant{});
 }
+
+/**
+ * Get the actual variant for a request. We need a function for this to be able
+ * to handle composite types, like `AudioProcessorRequest` that use
+ * `MesasgeReference` to be able to store persistent objects in the message
+ * variant.
+ */
+template <typename... Ts>
+std::variant<Ts...>& get_request_variant(std::variant<Ts...>& request) {
+    return request;
+}
+
+/**
+ * Fetch the `std::variant<>` from an audio processor request object. This will
+ * let us use our regular, simple function call dispatch code, but we can still
+ * store the process data in a separate field (to reduce allocations).
+ *
+ * @overload
+ */
+inline AudioProcessorRequest::Payload& get_request_variant(
+    AudioProcessorRequest& request) {
+    return request.payload;
+}
@@ -281,6 +281,9 @@ YaProcessDataResponse YaProcessData::move_outputs_to_response() {
    //       `ProcessData` object generated in `get()` here sicne these of
    //       course are not references or pointers like all other fields, so
    //       they're not implicitly copied like all of our other fields
+    // FIXME: Instead of moving, the `YaProcessDataResponse` should be an
+    //        (optional) field. Moving defeats the point of us trying to reuse
+    //        these objects.
    for (int i = 0; i < reconstructed_process_data.numOutputs; i++) {
        outputs[i].silence_flags =
            reconstructed_process_data.outputs[i].silenceFlags;
@@ -216,13 +216,12 @@ Vst3PluginProxyImpl::process(Steinberg::Vst::ProcessData& data) {
    }

    // We reuse this existing object to avoid allocations
-    process_data.repopulate(data);
+    process_request.instance_id = instance_id();
+    process_request.data.repopulate(data);
+    process_request.new_realtime_priority = new_realtime_priority;

-    ProcessResponse response =
-        bridge.send_audio_processor_message(YaAudioProcessor::Process{
-            .instance_id = instance_id(),
-            .data = process_data,
-            .new_realtime_priority = new_realtime_priority});
+    ProcessResponse response = bridge.send_audio_processor_message(
+        MessageReference<YaAudioProcessor::Process>(process_request));

    response.output_data.write_back_outputs(data);

@@ -478,10 +478,16 @@ class Vst3PluginProxyImpl : public Vst3PluginProxy {
    std::atomic_size_t current_context_menu_id;

    /**
-     * We'll reuse this process data object and simply fill the objects
-     * contained with new data to avoid allocations during audio processing.
+     * NOTE: We'll reuse the request objects for the audio processor so we can
+     *       keep the process data object (which contains vectors and other heap
+     *       allocated data structure) alive. We'll then just fill this object
+     *       with new data every processing cycle to prevent allocations. Then,
+     *       we pass a `MessageReference<YaAudioProcessor::Process>` to our
+     *       sockets. This together with `bitisery::ext::MessageReference` will
+     *       let us serialize from and to existing objects without having to
+     *       copy or reallocate them.
     */
-    YaProcessData process_data;
+    YaAudioProcessor::Process process_request;

    // Caches

@@ -1235,8 +1235,18 @@ size_t Vst3Bridge::register_object_instance(
                        return object_instances[request.instance_id]
                            .audio_processor->setProcessing(request.state);
                    },
-                    [&](YaAudioProcessor::Process& request)
+                    [&](MessageReference<YaAudioProcessor::Process>&
+                            request_ref)
                        -> YaAudioProcessor::Process::Response {
+                        // NOTE: To prevent allocations we keep this actual
+                        //       `YaAudioProcessor::Process` object around as
+                        //       part of a static thread local
+                        //       `AudioProcessorRequest` object, and we only
+                        //       store a reference to it in our variant (this is
+                        //       done during the deserialization in
+                        //       `bitsery::ext::MessageReference`)
+                        YaAudioProcessor::Process& request = request_ref.get();
+
                        // Most plugins will already enable FTZ, but there are a
                        // handful of plugins that don't that suffer from
                        // extreme DSP load increases when they start producing
@@ -1251,6 +1261,9 @@ size_t Vst3Bridge::register_object_instance(
                                true, *request.new_realtime_priority);
                        }

+                        // TODO: This `get()` now moves data. We should avoid
+                        //       that, since that would require reallocating the
+                        //       process data next iteration.
                        const tresult result =
                            object_instances[request.instance_id]
                                .audio_processor->process(request.data.get());