diff --git a/CHANGELOG.md b/CHANGELOG.md index f97dd683..e587ed1d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,9 @@ Versioning](https://semver.org/spec/v2.0.0.html). event and parameter change queues. - VST2 audio processing also received the same small vector optimization to get rid of any last potential allocations during audio processing. +- The same small vector optimization has been applied across yabridge's entire + communication architecture, meaning that most function calls should no longer + produce any allocations for both VST2 and VST3 plugins. - Changed the way mutual recursion in VST3 plugins on the plugin side works to counter any potential GUI related timing issues with VST3 plugins when using multiple instances of a plugin. diff --git a/src/common/communication/common.h b/src/common/communication/common.h index 980d6c90..d71cb7ab 100644 --- a/src/common/communication/common.h +++ b/src/common/communication/common.h @@ -30,11 +30,18 @@ #include #include #include +#include #include +#include "../bitsery/traits/small-vector.h" #include "../logging/common.h" #include "../utils.h" +// Our input and output adapters for binary serialization always expect the data +// to be encoded in little endian format. This should not make any difference +// currently, but this would make it possible (somewhat, it would probably still +// be too slow) to have yabridge be usable with Wine run through Qemu on +// big-endian architectures. namespace bitsery { struct LittleEndianConfig { // In case we ever want to bridge from some big-endian architecture to @@ -56,6 +63,62 @@ template using InputAdapter = bitsery::InputBufferAdapter; +/** + * For binary serialization we use these small vectors that preallocate a small + * capacity on the stack as part of our binary serialization process. For most + * messages we don't need more than the default capacity (which would usually be + * 64 bytes), so we can avoid a lot of allocations in the serialization process + * this way. + */ +template +using SerializationBuffer = boost::container::small_vector; + +/** + * The class `SerializationBuffer` is derived from, so we can erase the + * buffer's initial capacity from all functions that work with them. + */ +using SerializationBufferBase = boost::container::small_vector_base; + +namespace boost { +namespace asio { + +template +inline BOOST_ASIO_MUTABLE_BUFFER buffer( + boost::container::small_vector_base& data) + BOOST_ASIO_NOEXCEPT { + return BOOST_ASIO_MUTABLE_BUFFER( + data.size() ? &data[0] : 0, data.size() * sizeof(PodType) +#if defined(BOOST_ASIO_ENABLE_BUFFER_DEBUGGING) + , + detail::buffer_debug_check< + typename std::vector::iterator>(data.begin()) +#endif // BOOST_ASIO_ENABLE_BUFFER_DEBUGGING + ); +} + +// These are copied verbatim `boost::asio::buffer(std::vector&, std::size_t)`, since `boost::container::small_vector` is +// compatible with the STL vector. +template +inline BOOST_ASIO_MUTABLE_BUFFER buffer( + boost::container::small_vector_base& data, + std::size_t max_size_in_bytes) BOOST_ASIO_NOEXCEPT { + return BOOST_ASIO_MUTABLE_BUFFER( + data.size() ? &data[0] : 0, + data.size() * sizeof(PodType) < max_size_in_bytes + ? data.size() * sizeof(PodType) + : max_size_in_bytes +#if defined(BOOST_ASIO_ENABLE_BUFFER_DEBUGGING) + , + detail::buffer_debug_check< + typename std::vector::iterator>(data.begin()) +#endif // BOOST_ASIO_ENABLE_BUFFER_DEBUGGING + ); +} + +} // namespace asio +} // namespace boost + /** * Serialize an object using bitsery and write it to a socket. This will write * both the size of the serialized object and the object itself over the socket. @@ -74,9 +137,9 @@ using InputAdapter = template inline void write_object(Socket& socket, const T& object, - std::vector& buffer) { + SerializationBufferBase& buffer) { const size_t size = - bitsery::quickSerialization>>( + bitsery::quickSerialization>( buffer, object); // Tell the other side how large the object is so it can prepare a buffer @@ -100,7 +163,7 @@ inline void write_object(Socket& socket, */ template inline void write_object(Socket& socket, const T& object) { - std::vector buffer(64); + SerializationBuffer<64> buffer{}; write_object(socket, object, buffer); } @@ -123,7 +186,9 @@ inline void write_object(Socket& socket, const T& object) { * @relates write_object */ template -inline T& read_object(Socket& socket, T& object, std::vector& buffer) { +inline T& read_object(Socket& socket, + T& object, + SerializationBufferBase& buffer) { // See the note above on the use of `uint64_t` instead of `size_t` std::array message_length; boost::asio::read(socket, boost::asio::buffer(message_length), @@ -140,7 +205,7 @@ inline T& read_object(Socket& socket, T& object, std::vector& buffer) { boost::asio::transfer_exactly(size)); auto [_, success] = - bitsery::quickDeserialization>>( + bitsery::quickDeserialization>( {buffer.begin(), size}, object); if (BOOST_UNLIKELY(!success)) { @@ -158,7 +223,7 @@ inline T& read_object(Socket& socket, T& object, std::vector& buffer) { * @overload */ template -inline T read_object(Socket& socket, std::vector& buffer) { +inline T read_object(Socket& socket, SerializationBufferBase& buffer) { T object; read_object(socket, object, buffer); @@ -173,7 +238,7 @@ inline T read_object(Socket& socket, std::vector& buffer) { */ template inline T& read_object(Socket& socket, T& object) { - std::vector buffer(64); + SerializationBuffer<64> buffer{}; return read_object(socket, object, buffer); } @@ -186,7 +251,7 @@ inline T& read_object(Socket& socket, T& object) { template inline T read_object(Socket& socket) { T object; - std::vector buffer(64); + SerializationBuffer<64> buffer{}; read_object(socket, object, buffer); return object; @@ -360,7 +425,7 @@ class SocketHandler { * @see SocketHandler::receive_multi */ template - inline void send(const T& object, std::vector& buffer) { + inline void send(const T& object, SerializationBufferBase& buffer) { write_object(socket, object, buffer); } @@ -402,7 +467,7 @@ class SocketHandler { * @see SocketHandler::receive_multi */ template - inline T receive_single(std::vector& buffer) { + inline T receive_single(SerializationBufferBase& buffer) { return read_object(socket, buffer); } @@ -425,19 +490,19 @@ class SocketHandler { * we'd probably want to do some more stuff after sending a reply, calling * `send()` is the responsibility of this function. * - * @tparam F A function type in the form of `void(T, std::vector&)` - * that does something with the object, and then calls `send()`. The - * reading/writing buffer is passed along so it can be reused for sending - * large amounts of data. + * @tparam F A function type in the form of `void(T, + * SerializationBufferBase&)` that does something with the object, and + * then calls `send()`. The reading/writing buffer is passed along so it + * can be reused for sending large amounts of data. * * @relates SocketHandler::send * * @see read_object * @see SocketHandler::receive_single */ - template &> F> + template F> void receive_multi(F&& callback) { - std::vector buffer{}; + SerializationBuffer<64> buffer{}; while (true) { try { auto object = receive_single(buffer); diff --git a/src/common/communication/vst3.h b/src/common/communication/vst3.h index 5642823f..bd6ab66b 100644 --- a/src/common/communication/vst3.h +++ b/src/common/communication/vst3.h @@ -87,7 +87,7 @@ class Vst3MessageHandler : public AdHocSocketHandler { typename T::Response send_message( const T& object, std::optional> logging, - std::vector& buffer) { + SerializationBufferBase& buffer) { typename T::Response response_object; receive_into(object, response_object, logging, buffer); @@ -122,7 +122,7 @@ class Vst3MessageHandler : public AdHocSocketHandler { const T& object, typename T::Response& response_object, std::optional> logging, - std::vector& buffer) { + SerializationBufferBase& buffer) { using TResponse = typename T::Response; // Since a lot of messages just return a `tresult`, we can't filter out @@ -161,7 +161,7 @@ class Vst3MessageHandler : public AdHocSocketHandler { const T& object, typename T::Response& response_object, std::optional> logging) { - std::vector buffer(64); + SerializationBuffer<64> buffer{}; return receive_into(object, response_object, std::move(logging), buffer); } @@ -217,7 +217,7 @@ class Vst3MessageHandler : public AdHocSocketHandler { // every time, but on the audio processor side we store the // actual variant within an object and we then use some hackery // to always keep the large process data object in memory. - thread_local std::vector persistent_buffer{}; + thread_local SerializationBuffer<64> persistent_buffer{}; thread_local Request persistent_object; auto& request = @@ -506,7 +506,7 @@ class Vst3Sockets : public Sockets { typename T::Response& response_object, size_t instance_id, std::optional> logging) { - thread_local std::vector audio_processor_buffer{}; + thread_local SerializationBuffer<64> audio_processor_buffer{}; return audio_processor_sockets.at(instance_id) .receive_into(object, response_object, logging, diff --git a/src/plugin/bridges/vst2.cpp b/src/plugin/bridges/vst2.cpp index 09abc335..9d043858 100644 --- a/src/plugin/bridges/vst2.cpp +++ b/src/plugin/bridges/vst2.cpp @@ -593,7 +593,7 @@ void Vst2PluginBridge::do_process(T** inputs, T** outputs, int sample_frames) { } // The inputs and outputs arrays should be `[num_inputs][sample_frames]` and - // `[num_outputs][sample_frames]` floats large respectfully. + // `[num_outputs][sample_frames]` floats large respectfully std::vector> input_buffers(plugin.numInputs, std::vector(sample_frames)); for (int channel = 0; channel < plugin.numInputs; channel++) { diff --git a/src/plugin/bridges/vst2.h b/src/plugin/bridges/vst2.h index 441d2595..11f29b31 100644 --- a/src/plugin/bridges/vst2.h +++ b/src/plugin/bridges/vst2.h @@ -154,10 +154,11 @@ class Vst2PluginBridge : PluginBridge> { Vst2Logger logger; /** - * A scratch buffer for sending and receiving data during `process`, - * `processReplacing` and `processDoubleReplacing` calls. + * A scratch buffer for sending and receiving binary data during the + * `process()`, `processReplacing()` and `processDoubleReplacing()` calls. + * This buffer also needs to stay alive. */ - std::vector process_buffer; + SerializationBuffer<0> process_buffer; /** * We'll periodically synchronize the Wine host's audio thread priority with diff --git a/src/wine-host/bridges/vst2.cpp b/src/wine-host/bridges/vst2.cpp index 78ff9fd6..709f5826 100644 --- a/src/wine-host/bridges/vst2.cpp +++ b/src/wine-host/bridges/vst2.cpp @@ -179,7 +179,7 @@ Vst2Bridge::Vst2Bridge(MainContext& main_context, parameters_handler = Win32Thread([&]() { sockets.host_vst_parameters.receive_multi( - [&](Parameter request, std::vector& buffer) { + [&](Parameter request, SerializationBufferBase& buffer) { // Both `getParameter` and `setParameter` functions are passed // through on this socket since they have a lot of overlap. The // presence of the `value` field tells us which one we're @@ -216,7 +216,7 @@ Vst2Bridge::Vst2Bridge(MainContext& main_context, plugin->numOutputs); sockets.host_vst_process_replacing.receive_multi( - [&](AudioBuffers request, std::vector& buffer) { + [&](AudioBuffers request, SerializationBufferBase& buffer) { // Since the value cannot change during this processing cycle, // we'll send the current transport information as part of the // request so we prefetch it to avoid unnecessary callbacks from