From 6f321649c4e620df9de540734cb633cb3d0b42a7 Mon Sep 17 00:00:00 2001 From: Robbert van der Helm Date: Sun, 23 May 2021 14:43:02 +0200 Subject: [PATCH] Do small vector optimization for all communication I once read years ago somewhere on Stack Overflow that `std::vectors` with that are preinitialized to a default size would allocate the initial capacity on the stack. This of course doesn't make any sense (run time sized stack allocations can cause all kinds of issues), so we were still allocating with our default 64-byte sized buffers, but just not as often. --- CHANGELOG.md | 3 + src/common/communication/common.h | 97 ++++++++++++++++++++++++++----- src/common/communication/vst3.h | 10 ++-- src/plugin/bridges/vst2.cpp | 2 +- src/plugin/bridges/vst2.h | 7 ++- src/wine-host/bridges/vst2.cpp | 4 +- 6 files changed, 96 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f97dd683..e587ed1d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,9 @@ Versioning](https://semver.org/spec/v2.0.0.html). event and parameter change queues. - VST2 audio processing also received the same small vector optimization to get rid of any last potential allocations during audio processing. +- The same small vector optimization has been applied across yabridge's entire + communication architecture, meaning that most function calls should no longer + produce any allocations for both VST2 and VST3 plugins. - Changed the way mutual recursion in VST3 plugins on the plugin side works to counter any potential GUI related timing issues with VST3 plugins when using multiple instances of a plugin. diff --git a/src/common/communication/common.h b/src/common/communication/common.h index 980d6c90..d71cb7ab 100644 --- a/src/common/communication/common.h +++ b/src/common/communication/common.h @@ -30,11 +30,18 @@ #include #include #include +#include #include +#include "../bitsery/traits/small-vector.h" #include "../logging/common.h" #include "../utils.h" +// Our input and output adapters for binary serialization always expect the data +// to be encoded in little endian format. This should not make any difference +// currently, but this would make it possible (somewhat, it would probably still +// be too slow) to have yabridge be usable with Wine run through Qemu on +// big-endian architectures. namespace bitsery { struct LittleEndianConfig { // In case we ever want to bridge from some big-endian architecture to @@ -56,6 +63,62 @@ template using InputAdapter = bitsery::InputBufferAdapter; +/** + * For binary serialization we use these small vectors that preallocate a small + * capacity on the stack as part of our binary serialization process. For most + * messages we don't need more than the default capacity (which would usually be + * 64 bytes), so we can avoid a lot of allocations in the serialization process + * this way. + */ +template +using SerializationBuffer = boost::container::small_vector; + +/** + * The class `SerializationBuffer` is derived from, so we can erase the + * buffer's initial capacity from all functions that work with them. + */ +using SerializationBufferBase = boost::container::small_vector_base; + +namespace boost { +namespace asio { + +template +inline BOOST_ASIO_MUTABLE_BUFFER buffer( + boost::container::small_vector_base& data) + BOOST_ASIO_NOEXCEPT { + return BOOST_ASIO_MUTABLE_BUFFER( + data.size() ? &data[0] : 0, data.size() * sizeof(PodType) +#if defined(BOOST_ASIO_ENABLE_BUFFER_DEBUGGING) + , + detail::buffer_debug_check< + typename std::vector::iterator>(data.begin()) +#endif // BOOST_ASIO_ENABLE_BUFFER_DEBUGGING + ); +} + +// These are copied verbatim `boost::asio::buffer(std::vector&, std::size_t)`, since `boost::container::small_vector` is +// compatible with the STL vector. +template +inline BOOST_ASIO_MUTABLE_BUFFER buffer( + boost::container::small_vector_base& data, + std::size_t max_size_in_bytes) BOOST_ASIO_NOEXCEPT { + return BOOST_ASIO_MUTABLE_BUFFER( + data.size() ? &data[0] : 0, + data.size() * sizeof(PodType) < max_size_in_bytes + ? data.size() * sizeof(PodType) + : max_size_in_bytes +#if defined(BOOST_ASIO_ENABLE_BUFFER_DEBUGGING) + , + detail::buffer_debug_check< + typename std::vector::iterator>(data.begin()) +#endif // BOOST_ASIO_ENABLE_BUFFER_DEBUGGING + ); +} + +} // namespace asio +} // namespace boost + /** * Serialize an object using bitsery and write it to a socket. This will write * both the size of the serialized object and the object itself over the socket. @@ -74,9 +137,9 @@ using InputAdapter = template inline void write_object(Socket& socket, const T& object, - std::vector& buffer) { + SerializationBufferBase& buffer) { const size_t size = - bitsery::quickSerialization>>( + bitsery::quickSerialization>( buffer, object); // Tell the other side how large the object is so it can prepare a buffer @@ -100,7 +163,7 @@ inline void write_object(Socket& socket, */ template inline void write_object(Socket& socket, const T& object) { - std::vector buffer(64); + SerializationBuffer<64> buffer{}; write_object(socket, object, buffer); } @@ -123,7 +186,9 @@ inline void write_object(Socket& socket, const T& object) { * @relates write_object */ template -inline T& read_object(Socket& socket, T& object, std::vector& buffer) { +inline T& read_object(Socket& socket, + T& object, + SerializationBufferBase& buffer) { // See the note above on the use of `uint64_t` instead of `size_t` std::array message_length; boost::asio::read(socket, boost::asio::buffer(message_length), @@ -140,7 +205,7 @@ inline T& read_object(Socket& socket, T& object, std::vector& buffer) { boost::asio::transfer_exactly(size)); auto [_, success] = - bitsery::quickDeserialization>>( + bitsery::quickDeserialization>( {buffer.begin(), size}, object); if (BOOST_UNLIKELY(!success)) { @@ -158,7 +223,7 @@ inline T& read_object(Socket& socket, T& object, std::vector& buffer) { * @overload */ template -inline T read_object(Socket& socket, std::vector& buffer) { +inline T read_object(Socket& socket, SerializationBufferBase& buffer) { T object; read_object(socket, object, buffer); @@ -173,7 +238,7 @@ inline T read_object(Socket& socket, std::vector& buffer) { */ template inline T& read_object(Socket& socket, T& object) { - std::vector buffer(64); + SerializationBuffer<64> buffer{}; return read_object(socket, object, buffer); } @@ -186,7 +251,7 @@ inline T& read_object(Socket& socket, T& object) { template inline T read_object(Socket& socket) { T object; - std::vector buffer(64); + SerializationBuffer<64> buffer{}; read_object(socket, object, buffer); return object; @@ -360,7 +425,7 @@ class SocketHandler { * @see SocketHandler::receive_multi */ template - inline void send(const T& object, std::vector& buffer) { + inline void send(const T& object, SerializationBufferBase& buffer) { write_object(socket, object, buffer); } @@ -402,7 +467,7 @@ class SocketHandler { * @see SocketHandler::receive_multi */ template - inline T receive_single(std::vector& buffer) { + inline T receive_single(SerializationBufferBase& buffer) { return read_object(socket, buffer); } @@ -425,19 +490,19 @@ class SocketHandler { * we'd probably want to do some more stuff after sending a reply, calling * `send()` is the responsibility of this function. * - * @tparam F A function type in the form of `void(T, std::vector&)` - * that does something with the object, and then calls `send()`. The - * reading/writing buffer is passed along so it can be reused for sending - * large amounts of data. + * @tparam F A function type in the form of `void(T, + * SerializationBufferBase&)` that does something with the object, and + * then calls `send()`. The reading/writing buffer is passed along so it + * can be reused for sending large amounts of data. * * @relates SocketHandler::send * * @see read_object * @see SocketHandler::receive_single */ - template &> F> + template F> void receive_multi(F&& callback) { - std::vector buffer{}; + SerializationBuffer<64> buffer{}; while (true) { try { auto object = receive_single(buffer); diff --git a/src/common/communication/vst3.h b/src/common/communication/vst3.h index 5642823f..bd6ab66b 100644 --- a/src/common/communication/vst3.h +++ b/src/common/communication/vst3.h @@ -87,7 +87,7 @@ class Vst3MessageHandler : public AdHocSocketHandler { typename T::Response send_message( const T& object, std::optional> logging, - std::vector& buffer) { + SerializationBufferBase& buffer) { typename T::Response response_object; receive_into(object, response_object, logging, buffer); @@ -122,7 +122,7 @@ class Vst3MessageHandler : public AdHocSocketHandler { const T& object, typename T::Response& response_object, std::optional> logging, - std::vector& buffer) { + SerializationBufferBase& buffer) { using TResponse = typename T::Response; // Since a lot of messages just return a `tresult`, we can't filter out @@ -161,7 +161,7 @@ class Vst3MessageHandler : public AdHocSocketHandler { const T& object, typename T::Response& response_object, std::optional> logging) { - std::vector buffer(64); + SerializationBuffer<64> buffer{}; return receive_into(object, response_object, std::move(logging), buffer); } @@ -217,7 +217,7 @@ class Vst3MessageHandler : public AdHocSocketHandler { // every time, but on the audio processor side we store the // actual variant within an object and we then use some hackery // to always keep the large process data object in memory. - thread_local std::vector persistent_buffer{}; + thread_local SerializationBuffer<64> persistent_buffer{}; thread_local Request persistent_object; auto& request = @@ -506,7 +506,7 @@ class Vst3Sockets : public Sockets { typename T::Response& response_object, size_t instance_id, std::optional> logging) { - thread_local std::vector audio_processor_buffer{}; + thread_local SerializationBuffer<64> audio_processor_buffer{}; return audio_processor_sockets.at(instance_id) .receive_into(object, response_object, logging, diff --git a/src/plugin/bridges/vst2.cpp b/src/plugin/bridges/vst2.cpp index 09abc335..9d043858 100644 --- a/src/plugin/bridges/vst2.cpp +++ b/src/plugin/bridges/vst2.cpp @@ -593,7 +593,7 @@ void Vst2PluginBridge::do_process(T** inputs, T** outputs, int sample_frames) { } // The inputs and outputs arrays should be `[num_inputs][sample_frames]` and - // `[num_outputs][sample_frames]` floats large respectfully. + // `[num_outputs][sample_frames]` floats large respectfully std::vector> input_buffers(plugin.numInputs, std::vector(sample_frames)); for (int channel = 0; channel < plugin.numInputs; channel++) { diff --git a/src/plugin/bridges/vst2.h b/src/plugin/bridges/vst2.h index 441d2595..11f29b31 100644 --- a/src/plugin/bridges/vst2.h +++ b/src/plugin/bridges/vst2.h @@ -154,10 +154,11 @@ class Vst2PluginBridge : PluginBridge> { Vst2Logger logger; /** - * A scratch buffer for sending and receiving data during `process`, - * `processReplacing` and `processDoubleReplacing` calls. + * A scratch buffer for sending and receiving binary data during the + * `process()`, `processReplacing()` and `processDoubleReplacing()` calls. + * This buffer also needs to stay alive. */ - std::vector process_buffer; + SerializationBuffer<0> process_buffer; /** * We'll periodically synchronize the Wine host's audio thread priority with diff --git a/src/wine-host/bridges/vst2.cpp b/src/wine-host/bridges/vst2.cpp index 78ff9fd6..709f5826 100644 --- a/src/wine-host/bridges/vst2.cpp +++ b/src/wine-host/bridges/vst2.cpp @@ -179,7 +179,7 @@ Vst2Bridge::Vst2Bridge(MainContext& main_context, parameters_handler = Win32Thread([&]() { sockets.host_vst_parameters.receive_multi( - [&](Parameter request, std::vector& buffer) { + [&](Parameter request, SerializationBufferBase& buffer) { // Both `getParameter` and `setParameter` functions are passed // through on this socket since they have a lot of overlap. The // presence of the `value` field tells us which one we're @@ -216,7 +216,7 @@ Vst2Bridge::Vst2Bridge(MainContext& main_context, plugin->numOutputs); sockets.host_vst_process_replacing.receive_multi( - [&](AudioBuffers request, std::vector& buffer) { + [&](AudioBuffers request, SerializationBufferBase& buffer) { // Since the value cannot change during this processing cycle, // we'll send the current transport information as part of the // request so we prefetch it to avoid unnecessary callbacks from