switch from shaped CAKE to FQ for BuyVM servers

These servers originally only had the 1Gbps base bandwidth and shaping
it with CAKE worked well to make the most of it during traffic spikes
for the web servers. It has little value for the nameservers since the
only potentially high throughput service is non-interactive SSH.

These servers now have 10Gbps burst available but are heavily limited by
their single virtual core and unable to use all of it in practice. CAKE
can only provide significant value when it's the bottleneck which isn't
the case when the workload is CPU limited. We don't want to keep around
the artificially low 1Gbps limit and it can't do much more.

Unlike OVH, the practical bottleneck is the CPU and FQ has the lowest
CPU usage in practice due to being very performance-oriented with a FIFO
fast path and offloading TCP pacing from the TCP stack to itself. On the
DNS servers, the fast path is always used in practice. Our OVH servers
have a much lower enforced bandwidth limit and the way they implement it
ruins fairness across flows. We definitely want to stick with CAKE for
our VPS instances on OVH but it doesn't make sense on BuyVM anymore.
This commit is contained in:
Daniel Micay 2025-09-17 23:17:24 -04:00
parent b2c15916cc
commit c6156ebed7
14 changed files with 417 additions and 61 deletions

View file

@ -0,0 +1,179 @@
#!/usr/bin/nft -f
table inet filter
flush table inet filter
table inet filter {
define tcp-ports = { 80, 443 }
define tcp-ports-full = { 22, $tcp-ports }
define ip-allowlist-ssh = {
{{ssh_ipv4}},
}
define ip6-allowlist-ssh = {
{{ssh_ipv6}},
}
define priority-besteffort = 0
define priority-bulk = 2
define priority-interactive-bulk = 4
define priority-interactive = 6
# based on CAKE diffserv4
map dscp-to-priority {
typeof ip dscp : meta priority
elements = {
cs1 : $priority-bulk,
lephb : $priority-bulk,
af11 : $priority-besteffort,
af12 : $priority-besteffort,
af13 : $priority-besteffort,
cs2 : $priority-interactive-bulk,
cs3 : $priority-interactive-bulk,
cs4 : $priority-interactive-bulk,
af21 : $priority-interactive-bulk,
af22 : $priority-interactive-bulk,
af23 : $priority-interactive-bulk,
af31 : $priority-interactive-bulk,
af32 : $priority-interactive-bulk,
af33 : $priority-interactive-bulk,
af41 : $priority-interactive-bulk,
af42 : $priority-interactive-bulk,
af43 : $priority-interactive-bulk,
cs5 : $priority-interactive,
cs6 : $priority-interactive,
cs7 : $priority-interactive,
ef : $priority-interactive,
va : $priority-interactive,
}
}
set ip-connlimit-ssh {
type ipv4_addr
flags dynamic
}
set ip6-connlimit-ssh {
type ipv6_addr
flags dynamic
}
set ip-connlimit-main {
type ipv4_addr
flags dynamic
}
set ip6-connlimit-main {
type ipv6_addr
flags dynamic
}
chain prerouting-raw {
type filter hook prerouting priority raw
policy drop
# drop packets without a reverse path (strict reverse path filtering)
fib saddr . iif oif missing counter drop
iif lo notrack accept
# drop packets to address not configured on incoming interface (strong host model)
#
# ordered after accepting loopback to permit using external IPs via loopback
fib daddr . iif type != { local, broadcast, multicast } counter drop
# handle new TCP connections beyond rate limit via synproxy to avoid conntrack table exhaustion
tcp dport $tcp-ports-full tcp flags syn limit rate over {{synproxy_threshold}}/second burst {{synproxy_threshold}} packets counter notrack accept
meta l4proto { tcp, udp } accept
icmp type { echo-reply, destination-unreachable, echo-request, time-exceeded, parameter-problem } notrack accept
meta l4proto ipv6-icmp notrack accept
}
chain input {
type filter hook input priority filter
policy drop
ip dscp set cs0
ip6 dscp set cs0
tcp dport $tcp-ports-full goto input-tcp-service
ct state vmap { invalid : drop, established : accept, related : accept, new : drop, untracked: accept }
}
chain input-tcp-service {
iif lo goto input-tcp-service-loopback
# for synproxy, SYN is untracked and first ACK is invalid which are handled via fallthrough
ct state vmap { established : goto input-tcp-service-established, related : accept, new : goto input-tcp-service-new }
tcp dport 22 ip saddr @ip-connlimit-ssh counter reject with tcp reset
tcp dport 22 ip6 saddr and ffff:ffff:ffff:ffff:: @ip6-connlimit-ssh counter reject with tcp reset
tcp dport $tcp-ports ip saddr @ip-connlimit-main counter reject with tcp reset
tcp dport $tcp-ports ip6 saddr and ffff:ffff:ffff:ffff:: @ip6-connlimit-main counter reject with tcp reset
synproxy mss 1460 wscale 7 timestamp sack-perm
}
chain input-tcp-service-new {
tcp dport 22 ip saddr @ip-connlimit-ssh counter reject with tcp reset
tcp dport 22 ip6 saddr and ffff:ffff:ffff:ffff:: @ip6-connlimit-ssh counter reject with tcp reset
tcp dport $tcp-ports ip saddr @ip-connlimit-main counter reject with tcp reset
tcp dport $tcp-ports ip6 saddr and ffff:ffff:ffff:ffff:: @ip6-connlimit-main counter reject with tcp reset
accept
}
# add connections established without synproxy to connection limit sets with limits enforced
chain input-tcp-service-established {
ct mark 0x1 accept
tcp dport 22 ip saddr != $ip-allowlist-ssh add @ip-connlimit-ssh { ip saddr ct count over 1 } counter reject with tcp reset
tcp dport 22 ip6 saddr != $ip6-allowlist-ssh add @ip6-connlimit-ssh { ip6 saddr and ffff:ffff:ffff:ffff:: ct count over 1 } counter reject with tcp reset
tcp dport $tcp-ports add @ip-connlimit-main { ip saddr ct count over 32 } counter reject with tcp reset
tcp dport $tcp-ports add @ip6-connlimit-main { ip6 saddr and ffff:ffff:ffff:ffff:: ct count over 32 } counter reject with tcp reset
ct mark set 0x1 accept
}
# add connections established with synproxy to connection limit sets with limits enforced
chain input-tcp-service-loopback {
tcp flags != syn accept
tcp dport 22 ip saddr != $ip-allowlist-ssh add @ip-connlimit-ssh { ip saddr ct count over 1 } counter reject with tcp reset
tcp dport 22 ip6 saddr != $ip6-allowlist-ssh add @ip6-connlimit-ssh { ip6 saddr and ffff:ffff:ffff:ffff:: ct count over 1 } counter reject with tcp reset
tcp dport $tcp-ports add @ip-connlimit-main { ip saddr ct count over 32 } counter reject with tcp reset
tcp dport $tcp-ports add @ip6-connlimit-main { ip6 saddr and ffff:ffff:ffff:ffff:: ct count over 32 } counter reject with tcp reset
ct mark set 0x1 accept
}
chain forward {
type filter hook forward priority filter
policy drop
}
chain output-raw {
type filter hook output priority raw
oif lo goto output-raw-loopback
skuid != { root, systemd-network, unbound, alpm, chrony, http, attestation } counter goto graceful-reject
# translate DSCP to priority for fq bands
meta priority set ip dscp map @dscp-to-priority
meta priority set ip6 dscp map @dscp-to-priority
meta l4proto { icmp, ipv6-icmp } notrack accept
}
chain output-raw-loopback {
skuid unbound meta l4proto { tcp, udp } th sport 53 th dport >= 1024 th dport != 8080 notrack accept
skuid { alpm, chrony, attestation } meta l4proto { tcp, udp } th sport >= 1024 th sport != 8080 th dport 53 notrack accept
skuid attestation tcp sport 8080 tcp dport >= 1024 tcp dport != 8080 notrack accept
skuid http tcp sport >= 1024 tcp sport != 8080 tcp dport 8080 notrack accept
skuid != root counter goto graceful-reject
notrack accept
}
chain graceful-reject {
meta l4proto udp reject
meta l4proto tcp reject with tcp reset
reject
}
}

View file

@ -0,0 +1,182 @@
#!/usr/bin/nft -f
table inet filter
flush table inet filter
table inet filter {
define tcp-ports = { 80, 443, 7275 }
define tcp-ports-full = { 22, $tcp-ports }
define udp-ports = 123
define ip-allowlist-ssh = {
{{ssh_ipv4}},
51.222.159.116, # 0.grapheneos.network
}
define ip6-allowlist-ssh = {
{{ssh_ipv6}},
2607:5300:205:200::2584, # 0.grapheneos.network
}
define priority-besteffort = 0
define priority-bulk = 2
define priority-interactive-bulk = 4
define priority-interactive = 6
# based on CAKE diffserv4
map dscp-to-priority {
typeof ip dscp : meta priority
elements = {
cs1 : $priority-bulk,
lephb : $priority-bulk,
af11 : $priority-besteffort,
af12 : $priority-besteffort,
af13 : $priority-besteffort,
cs2 : $priority-interactive-bulk,
cs3 : $priority-interactive-bulk,
cs4 : $priority-interactive-bulk,
af21 : $priority-interactive-bulk,
af22 : $priority-interactive-bulk,
af23 : $priority-interactive-bulk,
af31 : $priority-interactive-bulk,
af32 : $priority-interactive-bulk,
af33 : $priority-interactive-bulk,
af41 : $priority-interactive-bulk,
af42 : $priority-interactive-bulk,
af43 : $priority-interactive-bulk,
cs5 : $priority-interactive,
cs6 : $priority-interactive,
cs7 : $priority-interactive,
ef : $priority-interactive,
va : $priority-interactive,
}
}
set ip-connlimit-ssh {
type ipv4_addr
flags dynamic
}
set ip6-connlimit-ssh {
type ipv6_addr
flags dynamic
}
set ip-connlimit-main {
type ipv4_addr
flags dynamic
}
set ip6-connlimit-main {
type ipv6_addr
flags dynamic
}
chain prerouting-raw {
type filter hook prerouting priority raw
policy drop
# drop packets without a reverse path (strict reverse path filtering)
fib saddr . iif oif missing counter drop
iif lo notrack accept
# drop packets to address not configured on incoming interface (strong host model)
#
# ordered after accepting loopback to permit using external IPs via loopback
fib daddr . iif type != { local, broadcast, multicast } counter drop
udp dport $udp-ports notrack accept
# handle new TCP connections beyond rate limit via synproxy to avoid conntrack table exhaustion
tcp dport $tcp-ports-full tcp flags syn limit rate over {{synproxy_threshold}}/second burst {{synproxy_threshold}} packets counter notrack accept
meta l4proto { tcp, udp } accept
icmp type { echo-reply, destination-unreachable, echo-request, time-exceeded, parameter-problem } notrack accept
meta l4proto ipv6-icmp notrack accept
}
chain input {
type filter hook input priority filter
policy drop
ip dscp set cs0
ip6 dscp set cs0
tcp dport $tcp-ports-full goto input-tcp-service
ct state vmap { invalid : drop, established : accept, related : accept, new : drop, untracked: accept }
}
chain input-tcp-service {
iif lo goto input-tcp-service-loopback
# for synproxy, SYN is untracked and first ACK is invalid which are handled via fallthrough
ct state vmap { established : goto input-tcp-service-established, related : accept, new : goto input-tcp-service-new }
tcp dport 22 ip saddr @ip-connlimit-ssh counter reject with tcp reset
tcp dport 22 ip6 saddr and ffff:ffff:ffff:ffff:: @ip6-connlimit-ssh counter reject with tcp reset
tcp dport $tcp-ports ip saddr @ip-connlimit-main counter reject with tcp reset
tcp dport $tcp-ports ip6 saddr and ffff:ffff:ffff:ffff:: @ip6-connlimit-main counter reject with tcp reset
synproxy mss 1460 wscale 7 timestamp sack-perm
}
chain input-tcp-service-new {
tcp dport 22 ip saddr @ip-connlimit-ssh counter reject with tcp reset
tcp dport 22 ip6 saddr and ffff:ffff:ffff:ffff:: @ip6-connlimit-ssh counter reject with tcp reset
tcp dport $tcp-ports ip saddr @ip-connlimit-main counter reject with tcp reset
tcp dport $tcp-ports ip6 saddr and ffff:ffff:ffff:ffff:: @ip6-connlimit-main counter reject with tcp reset
accept
}
# add connections established without synproxy to connection limit sets with limits enforced
chain input-tcp-service-established {
ct mark 0x1 accept
tcp dport 22 ip saddr != $ip-allowlist-ssh add @ip-connlimit-ssh { ip saddr ct count over 1 } counter reject with tcp reset
tcp dport 22 ip6 saddr != $ip6-allowlist-ssh add @ip6-connlimit-ssh { ip6 saddr and ffff:ffff:ffff:ffff:: ct count over 1 } counter reject with tcp reset
tcp dport $tcp-ports add @ip-connlimit-main { ip saddr ct count over 32 } counter reject with tcp reset
tcp dport $tcp-ports add @ip6-connlimit-main { ip6 saddr and ffff:ffff:ffff:ffff:: ct count over 32 } counter reject with tcp reset
ct mark set 0x1 accept
}
# add connections established with synproxy to connection limit sets with limits enforced
chain input-tcp-service-loopback {
tcp flags != syn accept
tcp dport 22 ip saddr != $ip-allowlist-ssh add @ip-connlimit-ssh { ip saddr ct count over 1 } counter reject with tcp reset
tcp dport 22 ip6 saddr != $ip6-allowlist-ssh add @ip6-connlimit-ssh { ip6 saddr and ffff:ffff:ffff:ffff:: ct count over 1 } counter reject with tcp reset
tcp dport $tcp-ports add @ip-connlimit-main { ip saddr ct count over 32 } counter reject with tcp reset
tcp dport $tcp-ports add @ip6-connlimit-main { ip6 saddr and ffff:ffff:ffff:ffff:: ct count over 32 } counter reject with tcp reset
ct mark set 0x1 accept
}
chain forward {
type filter hook forward priority filter
policy drop
}
chain output-raw {
type filter hook output priority raw
oif lo goto output-raw-loopback
skuid != { root, systemd-network, unbound, alpm, chrony, http } counter goto graceful-reject
udp sport $udp-ports notrack accept
# translate DSCP to priority for fq bands
meta priority set ip dscp map @dscp-to-priority
meta priority set ip6 dscp map @dscp-to-priority
meta l4proto { icmp, ipv6-icmp } notrack accept
}
chain output-raw-loopback {
skuid unbound meta l4proto { tcp, udp } th sport 53 th dport >= 1024 notrack accept
skuid { alpm, chrony, http } meta l4proto { tcp, udp } th sport >= 1024 th dport 53 notrack accept
skuid != root counter goto graceful-reject
notrack accept
}
chain graceful-reject {
meta l4proto udp reject
meta l4proto tcp reject with tcp reset
reject
}
}

View file

@ -19,6 +19,40 @@ table inet filter {
2605:6400:10:102e:95bc:89ef:2e7f:49bb, # 0.ns2.grapheneos.org
}
define priority-besteffort = 0
define priority-bulk = 2
define priority-interactive-bulk = 4
define priority-interactive = 6
# based on CAKE diffserv4
map dscp-to-priority {
typeof ip dscp : meta priority
elements = {
cs1 : $priority-bulk,
lephb : $priority-bulk,
af11 : $priority-besteffort,
af12 : $priority-besteffort,
af13 : $priority-besteffort,
cs2 : $priority-interactive-bulk,
cs3 : $priority-interactive-bulk,
cs4 : $priority-interactive-bulk,
af21 : $priority-interactive-bulk,
af22 : $priority-interactive-bulk,
af23 : $priority-interactive-bulk,
af31 : $priority-interactive-bulk,
af32 : $priority-interactive-bulk,
af33 : $priority-interactive-bulk,
af41 : $priority-interactive-bulk,
af42 : $priority-interactive-bulk,
af43 : $priority-interactive-bulk,
cs5 : $priority-interactive,
cs6 : $priority-interactive,
cs7 : $priority-interactive,
ef : $priority-interactive,
va : $priority-interactive,
}
}
set ip-connlimit-ssh {
type ipv4_addr
flags dynamic
@ -128,6 +162,11 @@ table inet filter {
oif lo goto output-raw-loopback
skuid != { root, systemd-network, unbound, alpm, chrony, http, powerdns, dnsdist, geoipupdate } counter goto graceful-reject
udp sport $udp-ports notrack accept
# translate DSCP to priority for fq bands
meta priority set ip dscp map @dscp-to-priority
meta priority set ip6 dscp map @dscp-to-priority
meta l4proto { icmp, ipv6-icmp } notrack accept
}

View file

@ -26,10 +26,3 @@ PreferredSource=2605:6400:10:102e:95bc:89ef:2e7f:49bb
[Route]
Destination=2605:6400:10::1
PreferredSource=2605:6400:10:102e:95bc:89ef:2e7f:49bb
[CAKE]
Handle=1
Bandwidth=1000M
PriorityQueueingPreset=diffserv4
FlowIsolationMode=dual-dst-host
SplitGSO=false

View file

@ -23,10 +23,3 @@ PreferredSource=2605:6400:20:387:72d4:dab9:a369:f351
[Route]
Destination=2605:6400:20::1
PreferredSource=2605:6400:20:387:72d4:dab9:a369:f351
[CAKE]
Handle=1
Bandwidth=1000M
PriorityQueueingPreset=diffserv4
FlowIsolationMode=dual-dst-host
SplitGSO=false

View file

@ -23,10 +23,3 @@ PreferredSource=2605:6400:20:1131:8088:e08:84e6:632
[Route]
Destination=2605:6400:20::1
PreferredSource=2605:6400:20:1131:8088:e08:84e6:632
[CAKE]
Handle=1
Bandwidth=1000M
PriorityQueueingPreset=diffserv4
FlowIsolationMode=dual-dst-host
SplitGSO=false

View file

@ -26,10 +26,3 @@ PreferredSource=2605:6400:20:1c8f:a0c9:372d:482e:945b
[Route]
Destination=2605:6400:20::1
PreferredSource=2605:6400:20:1c8f:a0c9:372d:482e:945b
[CAKE]
Handle=1
Bandwidth=1000M
PriorityQueueingPreset=diffserv4
FlowIsolationMode=dual-dst-host
SplitGSO=false

View file

@ -26,10 +26,3 @@ PreferredSource=2605:6400:30:ec25:102c:af6d:5be:1eb8
[Route]
Destination=2605:6400:30::1
PreferredSource=2605:6400:30:ec25:102c:af6d:5be:1eb8
[CAKE]
Handle=1
Bandwidth=1000M
PriorityQueueingPreset=diffserv4
FlowIsolationMode=dual-dst-host
SplitGSO=false

View file

@ -23,10 +23,3 @@ PreferredSource=2605:6400:10:c41:de92:c534:326a:711a
[Route]
Destination=2605:6400:10::1
PreferredSource=2605:6400:10:c41:de92:c534:326a:711a
[CAKE]
Handle=1
Bandwidth=1000M
PriorityQueueingPreset=diffserv4
FlowIsolationMode=dual-dst-host
SplitGSO=false

View file

@ -23,10 +23,3 @@ PreferredSource=2605:6400:10:aa9:1c0f:44d3:da15:c0ec
[Route]
Destination=2605:6400:10::1
PreferredSource=2605:6400:10:aa9:1c0f:44d3:da15:c0ec
[CAKE]
Handle=1
Bandwidth=1000M
PriorityQueueingPreset=diffserv4
FlowIsolationMode=dual-dst-host
SplitGSO=false

View file

@ -23,10 +23,3 @@ PreferredSource=2605:6400:10:9d6:6d84:e183:acda:16d7
[Route]
Destination=2605:6400:10::1
PreferredSource=2605:6400:10:9d6:6d84:e183:acda:16d7
[CAKE]
Handle=1
Bandwidth=1000M
PriorityQueueingPreset=diffserv4
FlowIsolationMode=dual-dst-host
SplitGSO=false

View file

@ -0,0 +1,10 @@
[Unit]
Requires=sys-subsystem-net-devices-public.device
After=sys-subsystem-net-devices-public.device
[Service]
Type=oneshot
ExecStart=/usr/bin/tc qdisc replace dev public root handle 1 fq bands 3 priomap 1 2 2 2 0 2 0 0 1 1 1 1 1 1 1 1
[Install]
WantedBy=sys-subsystem-net-devices-public.device

View file

@ -72,14 +72,16 @@ declare -Ar hosts_firewall=(
[1.ns2.grapheneos.org]=ns2
[2.ns2.grapheneos.org]=ns2
[mail.grapheneos.org]=mail
[staging.grapheneos.org]=web-fq
[1.grapheneos.org]=web-fq
[0.grapheneos.network]=network
[1.grapheneos.network]=network
[1.grapheneos.network]=network-fq
[2.grapheneos.network]=network
[3.grapheneos.network]=network
[0.releases.grapheneos.org]=releases
[1.releases.grapheneos.org]=releases
[2.releases.grapheneos.org]=releases
[staging.attestation.app]=attestation
[0.releases.grapheneos.org]=web-fq
[1.releases.grapheneos.org]=web-fq
[2.releases.grapheneos.org]=web-fq
[staging.attestation.app]=attestation-fq
[attestation.app]=attestation
[matrix.grapheneos.org]=matrix
[discuss.grapheneos.org]=discuss