diff --git a/CHANGELOG.md b/CHANGELOG.md
index f97dd683..e587ed1d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -40,6 +40,9 @@ Versioning](https://semver.org/spec/v2.0.0.html).
   event and parameter change queues.
 - VST2 audio processing also received the same small vector optimization to get
   rid of any last potential allocations during audio processing.
+- The same small vector optimization has been applied across yabridge's entire
+  communication architecture, meaning that most function calls should no longer
+  produce any allocations for both VST2 and VST3 plugins.
 - Changed the way mutual recursion in VST3 plugins on the plugin side works to
   counter any potential GUI related timing issues with VST3 plugins when using
   multiple instances of a plugin.
diff --git a/src/common/communication/common.h b/src/common/communication/common.h
index 980d6c90..d71cb7ab 100644
--- a/src/common/communication/common.h
+++ b/src/common/communication/common.h
@@ -30,11 +30,18 @@
 #include <boost/asio/local/stream_protocol.hpp>
 #include <boost/asio/read.hpp>
 #include <boost/asio/write.hpp>
+#include <boost/container/small_vector.hpp>
 #include <boost/filesystem.hpp>
 
+#include "../bitsery/traits/small-vector.h"
 #include "../logging/common.h"
 #include "../utils.h"
 
+// Our input and output adapters for binary serialization always expect the data
+// to be encoded in little endian format. This should not make any difference
+// currently, but this would make it possible (somewhat, it would probably still
+// be too slow) to have yabridge be usable with Wine run through Qemu on
+// big-endian architectures.
 namespace bitsery {
 struct LittleEndianConfig {
     // In case we ever want to bridge from some big-endian architecture to
@@ -56,6 +63,62 @@ template <typename B>
 using InputAdapter =
     bitsery::InputBufferAdapter<B, bitsery::LittleEndianConfig>;
 
+/**
+ * For binary serialization we use these small vectors that preallocate a small
+ * capacity on the stack as part of our binary serialization process. For most
+ * messages we don't need more than the default capacity (which would usually be
+ * 64 bytes), so we can avoid a lot of allocations in the serialization process
+ * this way.
+ */
+template <size_t N>
+using SerializationBuffer = boost::container::small_vector<uint8_t, N>;
+
+/**
+ * The class `SerializationBuffer<N>` is derived from, so we can erase the
+ * buffer's initial capacity from all functions that work with them.
+ */
+using SerializationBufferBase = boost::container::small_vector_base<uint8_t>;
+
+namespace boost {
+namespace asio {
+
+template <typename PodType, typename Allocator>
+inline BOOST_ASIO_MUTABLE_BUFFER buffer(
+    boost::container::small_vector_base<PodType, Allocator>& data)
+    BOOST_ASIO_NOEXCEPT {
+    return BOOST_ASIO_MUTABLE_BUFFER(
+        data.size() ? &data[0] : 0, data.size() * sizeof(PodType)
+#if defined(BOOST_ASIO_ENABLE_BUFFER_DEBUGGING)
+                                        ,
+        detail::buffer_debug_check<
+            typename std::vector<PodType, Allocator>::iterator>(data.begin())
+#endif  // BOOST_ASIO_ENABLE_BUFFER_DEBUGGING
+    );
+}
+
+// These are copied verbatim `boost::asio::buffer(std::vector<PodType,
+// Allocator>&, std::size_t)`, since `boost::container::small_vector` is
+// compatible with the STL vector.
+template <typename PodType, typename Allocator>
+inline BOOST_ASIO_MUTABLE_BUFFER buffer(
+    boost::container::small_vector_base<PodType, Allocator>& data,
+    std::size_t max_size_in_bytes) BOOST_ASIO_NOEXCEPT {
+    return BOOST_ASIO_MUTABLE_BUFFER(
+        data.size() ? &data[0] : 0,
+        data.size() * sizeof(PodType) < max_size_in_bytes
+            ? data.size() * sizeof(PodType)
+            : max_size_in_bytes
+#if defined(BOOST_ASIO_ENABLE_BUFFER_DEBUGGING)
+        ,
+        detail::buffer_debug_check<
+            typename std::vector<PodType, Allocator>::iterator>(data.begin())
+#endif  // BOOST_ASIO_ENABLE_BUFFER_DEBUGGING
+    );
+}
+
+}  // namespace asio
+}  // namespace boost
+
 /**
  * Serialize an object using bitsery and write it to a socket. This will write
  * both the size of the serialized object and the object itself over the socket.
@@ -74,9 +137,9 @@ using InputAdapter =
 template <typename T, typename Socket>
 inline void write_object(Socket& socket,
                          const T& object,
-                         std::vector<uint8_t>& buffer) {
+                         SerializationBufferBase& buffer) {
     const size_t size =
-        bitsery::quickSerialization<OutputAdapter<std::vector<uint8_t>>>(
+        bitsery::quickSerialization<OutputAdapter<SerializationBufferBase>>(
             buffer, object);
 
     // Tell the other side how large the object is so it can prepare a buffer
@@ -100,7 +163,7 @@ inline void write_object(Socket& socket,
  */
 template <typename T, typename Socket>
 inline void write_object(Socket& socket, const T& object) {
-    std::vector<uint8_t> buffer(64);
+    SerializationBuffer<64> buffer{};
     write_object(socket, object, buffer);
 }
 
@@ -123,7 +186,9 @@ inline void write_object(Socket& socket, const T& object) {
  * @relates write_object
  */
 template <typename T, typename Socket>
-inline T& read_object(Socket& socket, T& object, std::vector<uint8_t>& buffer) {
+inline T& read_object(Socket& socket,
+                      T& object,
+                      SerializationBufferBase& buffer) {
     // See the note above on the use of `uint64_t` instead of `size_t`
     std::array<uint64_t, 1> message_length;
     boost::asio::read(socket, boost::asio::buffer(message_length),
@@ -140,7 +205,7 @@ inline T& read_object(Socket& socket, T& object, std::vector<uint8_t>& buffer) {
                       boost::asio::transfer_exactly(size));
 
     auto [_, success] =
-        bitsery::quickDeserialization<InputAdapter<std::vector<uint8_t>>>(
+        bitsery::quickDeserialization<InputAdapter<SerializationBufferBase>>(
             {buffer.begin(), size}, object);
 
     if (BOOST_UNLIKELY(!success)) {
@@ -158,7 +223,7 @@ inline T& read_object(Socket& socket, T& object, std::vector<uint8_t>& buffer) {
  * @overload
  */
 template <typename T, typename Socket>
-inline T read_object(Socket& socket, std::vector<uint8_t>& buffer) {
+inline T read_object(Socket& socket, SerializationBufferBase& buffer) {
     T object;
     read_object<T>(socket, object, buffer);
 
@@ -173,7 +238,7 @@ inline T read_object(Socket& socket, std::vector<uint8_t>& buffer) {
  */
 template <typename T, typename Socket>
 inline T& read_object(Socket& socket, T& object) {
-    std::vector<uint8_t> buffer(64);
+    SerializationBuffer<64> buffer{};
     return read_object<T>(socket, object, buffer);
 }
 
@@ -186,7 +251,7 @@ inline T& read_object(Socket& socket, T& object) {
 template <typename T, typename Socket>
 inline T read_object(Socket& socket) {
     T object;
-    std::vector<uint8_t> buffer(64);
+    SerializationBuffer<64> buffer{};
     read_object<T>(socket, object, buffer);
 
     return object;
@@ -360,7 +425,7 @@ class SocketHandler {
      * @see SocketHandler::receive_multi
      */
     template <typename T>
-    inline void send(const T& object, std::vector<uint8_t>& buffer) {
+    inline void send(const T& object, SerializationBufferBase& buffer) {
         write_object(socket, object, buffer);
     }
 
@@ -402,7 +467,7 @@ class SocketHandler {
      * @see SocketHandler::receive_multi
      */
     template <typename T>
-    inline T receive_single(std::vector<uint8_t>& buffer) {
+    inline T receive_single(SerializationBufferBase& buffer) {
         return read_object<T>(socket, buffer);
     }
 
@@ -425,19 +490,19 @@ class SocketHandler {
      *   we'd probably want to do some more stuff after sending a reply, calling
      *   `send()` is the responsibility of this function.
      *
-     * @tparam F A function type in the form of `void(T, std::vector<uint8_t>&)`
-     *   that does something with the object, and then calls `send()`. The
-     *   reading/writing buffer is passed along so it can be reused for sending
-     *   large amounts of data.
+     * @tparam F A function type in the form of `void(T,
+     *   SerializationBufferBase&)` that does something with the object, and
+     *   then calls `send()`. The reading/writing buffer is passed along so it
+     *   can be reused for sending large amounts of data.
      *
      * @relates SocketHandler::send
      *
      * @see read_object
      * @see SocketHandler::receive_single
      */
-    template <typename T, std::invocable<T, std::vector<uint8_t>&> F>
+    template <typename T, std::invocable<T, SerializationBufferBase&> F>
     void receive_multi(F&& callback) {
-        std::vector<uint8_t> buffer{};
+        SerializationBuffer<64> buffer{};
         while (true) {
             try {
                 auto object = receive_single<T>(buffer);
diff --git a/src/common/communication/vst3.h b/src/common/communication/vst3.h
index 5642823f..bd6ab66b 100644
--- a/src/common/communication/vst3.h
+++ b/src/common/communication/vst3.h
@@ -87,7 +87,7 @@ class Vst3MessageHandler : public AdHocSocketHandler<Thread> {
     typename T::Response send_message(
         const T& object,
         std::optional<std::pair<Vst3Logger&, bool>> logging,
-        std::vector<uint8_t>& buffer) {
+        SerializationBufferBase& buffer) {
         typename T::Response response_object;
         receive_into(object, response_object, logging, buffer);
 
@@ -122,7 +122,7 @@ class Vst3MessageHandler : public AdHocSocketHandler<Thread> {
         const T& object,
         typename T::Response& response_object,
         std::optional<std::pair<Vst3Logger&, bool>> logging,
-        std::vector<uint8_t>& buffer) {
+        SerializationBufferBase& buffer) {
         using TResponse = typename T::Response;
 
         // Since a lot of messages just return a `tresult`, we can't filter out
@@ -161,7 +161,7 @@ class Vst3MessageHandler : public AdHocSocketHandler<Thread> {
         const T& object,
         typename T::Response& response_object,
         std::optional<std::pair<Vst3Logger&, bool>> logging) {
-        std::vector<uint8_t> buffer(64);
+        SerializationBuffer<64> buffer{};
         return receive_into(object, response_object, std::move(logging),
                             buffer);
     }
@@ -217,7 +217,7 @@ class Vst3MessageHandler : public AdHocSocketHandler<Thread> {
                 // every time, but on the audio processor side we store the
                 // actual variant within an object and we then use some hackery
                 // to always keep the large process data object in memory.
-                thread_local std::vector<uint8_t> persistent_buffer{};
+                thread_local SerializationBuffer<64> persistent_buffer{};
                 thread_local Request persistent_object;
 
                 auto& request =
@@ -506,7 +506,7 @@ class Vst3Sockets : public Sockets {
         typename T::Response& response_object,
         size_t instance_id,
         std::optional<std::pair<Vst3Logger&, bool>> logging) {
-        thread_local std::vector<uint8_t> audio_processor_buffer{};
+        thread_local SerializationBuffer<64> audio_processor_buffer{};
 
         return audio_processor_sockets.at(instance_id)
             .receive_into(object, response_object, logging,
diff --git a/src/plugin/bridges/vst2.cpp b/src/plugin/bridges/vst2.cpp
index 09abc335..9d043858 100644
--- a/src/plugin/bridges/vst2.cpp
+++ b/src/plugin/bridges/vst2.cpp
@@ -593,7 +593,7 @@ void Vst2PluginBridge::do_process(T** inputs, T** outputs, int sample_frames) {
     }
 
     // The inputs and outputs arrays should be `[num_inputs][sample_frames]` and
-    // `[num_outputs][sample_frames]` floats large respectfully.
+    // `[num_outputs][sample_frames]` floats large respectfully
     std::vector<std::vector<T>> input_buffers(plugin.numInputs,
                                               std::vector<T>(sample_frames));
     for (int channel = 0; channel < plugin.numInputs; channel++) {
diff --git a/src/plugin/bridges/vst2.h b/src/plugin/bridges/vst2.h
index 441d2595..11f29b31 100644
--- a/src/plugin/bridges/vst2.h
+++ b/src/plugin/bridges/vst2.h
@@ -154,10 +154,11 @@ class Vst2PluginBridge : PluginBridge<Vst2Sockets<std::jthread>> {
     Vst2Logger logger;
 
     /**
-     * A scratch buffer for sending and receiving data during `process`,
-     * `processReplacing` and `processDoubleReplacing` calls.
+     * A scratch buffer for sending and receiving binary data during the
+     * `process()`, `processReplacing()` and `processDoubleReplacing()` calls.
+     * This buffer also needs to stay alive.
      */
-    std::vector<uint8_t> process_buffer;
+    SerializationBuffer<0> process_buffer;
 
     /**
      * We'll periodically synchronize the Wine host's audio thread priority with
diff --git a/src/wine-host/bridges/vst2.cpp b/src/wine-host/bridges/vst2.cpp
index 78ff9fd6..709f5826 100644
--- a/src/wine-host/bridges/vst2.cpp
+++ b/src/wine-host/bridges/vst2.cpp
@@ -179,7 +179,7 @@ Vst2Bridge::Vst2Bridge(MainContext& main_context,
 
     parameters_handler = Win32Thread([&]() {
         sockets.host_vst_parameters.receive_multi<Parameter>(
-            [&](Parameter request, std::vector<uint8_t>& buffer) {
+            [&](Parameter request, SerializationBufferBase& buffer) {
                 // Both `getParameter` and `setParameter` functions are passed
                 // through on this socket since they have a lot of overlap. The
                 // presence of the `value` field tells us which one we're
@@ -216,7 +216,7 @@ Vst2Bridge::Vst2Bridge(MainContext& main_context,
             plugin->numOutputs);
 
         sockets.host_vst_process_replacing.receive_multi<AudioBuffers>(
-            [&](AudioBuffers request, std::vector<uint8_t>& buffer) {
+            [&](AudioBuffers request, SerializationBufferBase& buffer) {
                 // Since the value cannot change during this processing cycle,
                 // we'll send the current transport information as part of the
                 // request so we prefetch it to avoid unnecessary callbacks from