Path MTU discovery fixes and improvements (#3109)

- Existing option `WIREGUARD_MTU` , if set, disables PMTUD and is used
- New option `PMTUD_ICMP_ADDRESSES=1.1.1.1,8.8.8.8` and `PMTUD_TCP_ADDRESSES=1.1.1.1:443,8.8.8.8:443`
- ICMP PMTUD now targets external-by-default IP addresses
- New TCP PMTUD (binary search only) as a second MTU confirmation and fallback mechanism.
- Force set TCP MSS to MTU - IP header - TCP base header - "magic 20 bytes" 🎆
- Fix #3108
This commit is contained in:
Quentin McGaw
2026-02-15 01:40:34 +01:00
committed by GitHub
parent 8f1fda7646
commit be92aa2ac4
59 changed files with 2050 additions and 376 deletions
+73
View File
@@ -0,0 +1,73 @@
package ip
import (
"encoding/binary"
"net/netip"
"syscall"
"github.com/qdm12/gluetun/internal/pmtud/constants"
)
func HeaderV4(srcIP, dstIP netip.Addr, payloadLength uint32) []byte {
ipHeader := make([]byte, constants.IPv4HeaderLength)
const version byte = 4
const headerLength byte = 20 / 4 // in 32-bit words
ipHeader[0] = (version << 4) | headerLength //nolint:mnd
ipHeader[1] = 0 // type of Service
putUint16(ipHeader[2:], uint16(constants.IPv4HeaderLength+payloadLength)) //nolint:gosec
ipHeader[4], ipHeader[5] = 0, 0 // identification
const flagsAndOffset uint16 = 0x4000 // DF bit set
putUint16(ipHeader[6:], flagsAndOffset)
ipHeader[8] = 64 // ttl
ipHeader[9] = syscall.IPPROTO_TCP
srcIPBytes := srcIP.As4()
copy(ipHeader[12:16], srcIPBytes[:])
dstIPBytes := dstIP.As4()
copy(ipHeader[16:20], dstIPBytes[:])
checksum := ipChecksum(ipHeader)
ipHeader[10] = byte(checksum >> 8) //nolint:mnd
ipHeader[11] = byte(checksum & 0xff) //nolint:mnd
return ipHeader
}
// ipChecksum calculates the checksum for the IP header.
//
//nolint:mnd
func ipChecksum(header []byte) uint16 {
sum := uint32(0)
for i := 0; i < len(header)-1; i += 2 {
sum += uint32(header[i])<<8 + uint32(header[i+1])
}
if len(header)%2 != 0 {
sum += uint32(header[len(header)-1]) << 8
}
for (sum >> 16) > 0 {
sum = (sum & 0xFFFF) + (sum >> 16)
}
return ^uint16(sum) //nolint:gosec
}
// HeaderV6 makes an IPv6 header.
// payloadLen is the length of the payload following the header.
// nextHeader can be byte([syscall.IPPROTO_TCP]) for example.
func HeaderV6(srcIP, dstIP netip.Addr,
payloadLen uint16, nextHeader byte,
) []byte {
ipv6Header := make([]byte, constants.IPv6HeaderLength)
ipv6Header[0] = 0x60 // version (4 bits) | traffic Class (4 bits)
ipv6Header[1] = 0x00 // traffic Class (4 bits) | flow label (4 bits)
// Flow Label (remaining 16 bits)
ipv6Header[2] = 0x00
ipv6Header[3] = 0x00
binary.BigEndian.PutUint16(ipv6Header[4:], payloadLen)
ipv6Header[6] = nextHeader
const hopLimit = 64
ipv6Header[7] = hopLimit
copy(ipv6Header[8:24], srcIP.AsSlice())
copy(ipv6Header[24:40], dstIP.AsSlice())
return ipv6Header
}
+9
View File
@@ -0,0 +1,9 @@
package ip
import (
"encoding/binary"
)
func putUint16(b []byte, v uint16) {
binary.NativeEndian.PutUint16(b, v)
}
@@ -0,0 +1,9 @@
//go:build !darwin
package ip
import "encoding/binary"
func putUint16(b []byte, v uint16) {
binary.BigEndian.PutUint16(b, v)
}
+9
View File
@@ -0,0 +1,9 @@
//go:build linux || darwin
package ip
import "syscall"
func SetIPv4HeaderIncluded(fd int) error {
return syscall.SetsockoptInt(fd, syscall.IPPROTO_IP, syscall.IP_HDRINCL, 1)
}
+7
View File
@@ -0,0 +1,7 @@
//go:build !linux && !windows && !darwin
package ip
func SetIPv4HeaderIncluded(fd int) error {
panic("not implemented")
}
+12
View File
@@ -0,0 +1,12 @@
package ip
import (
"syscall"
"golang.org/x/sys/windows"
)
func SetIPv4HeaderIncluded(handle syscall.Handle) error {
const ipHdrIncluded = windows.IP_HDRINCL
return syscall.SetsockoptInt(handle, syscall.IPPROTO_IP, ipHdrIncluded, 1)
}
+5
View File
@@ -0,0 +1,5 @@
package ip
func SetIPv6HeaderIncluded(fd int) error {
panic("darwin does not allow an application to build IPv6 headers")
}
+8
View File
@@ -0,0 +1,8 @@
package ip
import "syscall"
func SetIPv6HeaderIncluded(fd int) error {
const ipv6HdrIncluded = 36 // IPV6_HDRINCL
return syscall.SetsockoptInt(fd, syscall.IPPROTO_IPV6, ipv6HdrIncluded, 1)
}
+7
View File
@@ -0,0 +1,7 @@
//go:build !linux && !windows && !darwin
package ip
func SetIPv6HeaderIncluded(fd int) error {
panic("not implemented")
}
+7
View File
@@ -0,0 +1,7 @@
package ip
import "syscall"
func SetIPv6HeaderIncluded(fd syscall.Handle) error {
panic("windows does not allow an application to build IPv6 headers")
}
+123
View File
@@ -0,0 +1,123 @@
package ip
import (
"fmt"
"net/netip"
"syscall"
"github.com/jsimonetti/rtnetlink"
)
// SrcAddr determines the appropriate source IP address to use when sending a packet to the
// specified destination. It also reserves an ephemeral source port for the specified protocol
// to ensure that the port is not used by other processes. The cleanup function returned should
// be called to release the reserved port when done.
func SrcAddr(dst netip.AddrPort, proto int) (src netip.AddrPort, cleanup func(), err error) {
srcAddr, err := srcIP(dst.Addr())
if err != nil {
return netip.AddrPort{}, nil, fmt.Errorf("finding source IP: %w", err)
}
srcPort, cleanup, err := srcPort(srcAddr, proto)
if err != nil {
return netip.AddrPort{}, nil, fmt.Errorf("reserving source port: %w", err)
}
return netip.AddrPortFrom(srcAddr, srcPort), cleanup, nil
}
var errNoRoute = fmt.Errorf("no route to destination")
func srcIP(dst netip.Addr) (netip.Addr, error) {
conn, err := rtnetlink.Dial(nil)
if err != nil {
return netip.Addr{}, err
}
defer conn.Close()
family := uint8(syscall.AF_INET)
if dst.Is6() {
family = syscall.AF_INET6
}
// Request route to destination
requestMessage := &rtnetlink.RouteMessage{
Family: family,
Attributes: rtnetlink.RouteAttributes{
Dst: dst.AsSlice(),
},
}
messages, err := conn.Route.Get(requestMessage)
if err != nil {
return netip.Addr{}, fmt.Errorf("getting routes to %s: %w", dst, err)
}
for _, message := range messages {
if message.Attributes.Src == nil {
continue
}
ipv6 := message.Attributes.Src.To4() == nil
if ipv6 {
return netip.AddrFrom16([16]byte(message.Attributes.Src)), nil
}
return netip.AddrFrom4([4]byte(message.Attributes.Src)), nil
}
return netip.Addr{}, fmt.Errorf("%w: in %d route(s)", errNoRoute, len(messages))
}
// srcPort reserves an ephemeral source port by opening a socket for the
// protocol specified and binds it to the provided source address.
// It doesn't actually listen on the port.
// The cleanup function returned should be called to release the port when done.
func srcPort(srcAddr netip.Addr, proto int) (srcPort uint16, cleanup func(), err error) {
family := syscall.AF_INET
if srcAddr.Is6() {
family = syscall.AF_INET6
}
fd, err := syscall.Socket(family, syscall.SOCK_STREAM, proto)
if err != nil {
return 0, nil, fmt.Errorf("creating reservation socket: %w", err)
}
cleanup = func() {
_ = syscall.Close(fd)
}
// Bind to port 0 to get an ephemeral port
const port = 0
var bindAddr syscall.Sockaddr
if srcAddr.Is4() {
bindAddr = &syscall.SockaddrInet4{
Port: port,
Addr: srcAddr.As4(),
}
} else {
bindAddr = &syscall.SockaddrInet6{
Port: port,
Addr: srcAddr.As16(),
}
}
err = syscall.Bind(fd, bindAddr)
if err != nil {
cleanup()
return 0, nil, fmt.Errorf("binding reservation socket: %w", err)
}
sockAddr, err := syscall.Getsockname(fd)
if err != nil {
cleanup()
return 0, nil, fmt.Errorf("getting bound socket name: %w", err)
}
switch typedSockAddr := sockAddr.(type) {
case *syscall.SockaddrInet4:
srcPort = uint16(typedSockAddr.Port) //nolint:gosec
case *syscall.SockaddrInet6:
srcPort = uint16(typedSockAddr.Port) //nolint:gosec
default:
panic(fmt.Sprintf("unexpected sockaddr type: %T", typedSockAddr))
}
return srcPort, cleanup, nil
}