mirror of
https://github.com/edgelesssys/constellation.git
synced 2025-01-09 22:49:39 -05:00
169 lines
7.7 KiB
Bash
169 lines
7.7 KiB
Bash
|
#!/bin/bash
|
||
|
|
||
|
set -o errexit
|
||
|
set -o pipefail
|
||
|
set -o nounset
|
||
|
|
||
|
echo "Link information:"
|
||
|
ip link
|
||
|
|
||
|
echo "Routing table:"
|
||
|
ip route
|
||
|
|
||
|
echo "Addressing:"
|
||
|
ip -4 a
|
||
|
ip -6 a
|
||
|
|
||
|
{{- if .Values.nodeinit.removeCbrBridge }}
|
||
|
if ip link show cbr0; then
|
||
|
echo "Detected cbr0 bridge. Deleting interface..."
|
||
|
ip link del cbr0
|
||
|
fi
|
||
|
{{- end }}
|
||
|
|
||
|
{{- if .Values.nodeinit.reconfigureKubelet }}
|
||
|
# Check if we're running on a GKE containerd flavor as indicated by the presence
|
||
|
# of the '--container-runtime-endpoint' flag in '/etc/default/kubelet'.
|
||
|
GKE_KUBERNETES_BIN_DIR="/home/kubernetes/bin"
|
||
|
KUBELET_DEFAULTS_FILE="/etc/default/kubelet"
|
||
|
if [[ -f "${GKE_KUBERNETES_BIN_DIR}/gke" ]] && [[ $(grep -cF -- '--container-runtime-endpoint' "${KUBELET_DEFAULTS_FILE}") == "1" ]]; then
|
||
|
echo "GKE *_containerd flavor detected..."
|
||
|
|
||
|
# (GKE *_containerd) Upon node restarts, GKE's containerd images seem to reset
|
||
|
# the /etc directory and our changes to the kubelet and Cilium's CNI
|
||
|
# configuration are removed. This leaves room for containerd and its CNI to
|
||
|
# take over pods previously managed by Cilium, causing Cilium to lose
|
||
|
# ownership over these pods. We rely on the empirical observation that
|
||
|
# /home/kubernetes/bin/kubelet is not changed across node reboots, and replace
|
||
|
# it with a wrapper script that performs some initialization steps when
|
||
|
# required and then hands over control to the real kubelet.
|
||
|
|
||
|
# Only create the kubelet wrapper if we haven't previously done so.
|
||
|
if [[ ! -f "${GKE_KUBERNETES_BIN_DIR}/the-kubelet" ]];
|
||
|
then
|
||
|
echo "Installing the kubelet wrapper..."
|
||
|
|
||
|
# Rename the real kubelet.
|
||
|
mv "${GKE_KUBERNETES_BIN_DIR}/kubelet" "${GKE_KUBERNETES_BIN_DIR}/the-kubelet"
|
||
|
|
||
|
# Initialize the kubelet wrapper which lives in the place of the real kubelet.
|
||
|
touch "${GKE_KUBERNETES_BIN_DIR}/kubelet"
|
||
|
chmod a+x "${GKE_KUBERNETES_BIN_DIR}/kubelet"
|
||
|
|
||
|
# Populate the kubelet wrapper. It will perform the initialization steps we
|
||
|
# need and then become the kubelet.
|
||
|
cat <<'EOF' | tee "${GKE_KUBERNETES_BIN_DIR}/kubelet"
|
||
|
#!/bin/bash
|
||
|
|
||
|
set -euo pipefail
|
||
|
|
||
|
CNI_CONF_DIR="/etc/cni/net.d"
|
||
|
CONTAINERD_CONFIG="/etc/containerd/config.toml"
|
||
|
|
||
|
# Only stop and start containerd if the Cilium CNI configuration does not exist,
|
||
|
# or if the 'conf_template' property is present in the containerd config file,
|
||
|
# in order to avoid unnecessarily restarting containerd.
|
||
|
if [[ -z "$(find "${CNI_CONF_DIR}" -type f -name '*cilium*')" || \
|
||
|
"$(grep -cE '^\s+conf_template' "${CONTAINERD_CONFIG}")" != "0" ]];
|
||
|
then
|
||
|
# Stop containerd as it starts by creating a CNI configuration from a template
|
||
|
# causing pods to start with IPs assigned by GKE's CNI.
|
||
|
# 'disable --now' is used instead of stop as this script runs concurrently
|
||
|
# with containerd on node startup, and hence containerd might not have been
|
||
|
# started yet, in which case 'disable' prevents it from starting.
|
||
|
echo "Disabling and stopping containerd"
|
||
|
systemctl disable --now containerd
|
||
|
|
||
|
# Remove any pre-existing files in the CNI configuration directory. We skip
|
||
|
# any possibly existing Cilium configuration file for the obvious reasons.
|
||
|
echo "Removing undesired CNI configuration files"
|
||
|
find "${CNI_CONF_DIR}" -type f -not -name '*cilium*' -exec rm {} \;
|
||
|
|
||
|
# As mentioned above, the containerd configuration needs a little tweak in
|
||
|
# order not to create the default CNI configuration, so we update its config.
|
||
|
echo "Fixing containerd configuration"
|
||
|
sed -Ei 's/^(\s+conf_template)/\#\1/g' "${CONTAINERD_CONFIG}"
|
||
|
|
||
|
# Start containerd. It won't create it's CNI configuration file anymore.
|
||
|
echo "Enabling and starting containerd"
|
||
|
systemctl enable --now containerd
|
||
|
fi
|
||
|
|
||
|
# Become the real kubelet, and pass it some additionally required flags (and
|
||
|
# place these last so they have precedence).
|
||
|
exec /home/kubernetes/bin/the-kubelet "${@}" --network-plugin=cni --cni-bin-dir={{ .Values.cni.binPath }}
|
||
|
EOF
|
||
|
else
|
||
|
echo "Kubelet wrapper already exists, skipping..."
|
||
|
fi
|
||
|
else
|
||
|
# (Generic) Alter the kubelet configuration to run in CNI mode
|
||
|
echo "Changing kubelet configuration to --network-plugin=cni --cni-bin-dir={{ .Values.cni.binPath }}"
|
||
|
mkdir -p {{ .Values.cni.binPath }}
|
||
|
sed -i "s:--network-plugin=kubenet:--network-plugin=cni\ --cni-bin-dir={{ .Values.cni.binPath }}:g" "${KUBELET_DEFAULTS_FILE}"
|
||
|
fi
|
||
|
echo "Restarting the kubelet..."
|
||
|
systemctl restart kubelet
|
||
|
{{- end }}
|
||
|
|
||
|
{{- if (and .Values.gke.enabled (or .Values.enableIPv4Masquerade .Values.gke.disableDefaultSnat))}}
|
||
|
# If Cilium is configured to manage masquerading of traffic leaving the node,
|
||
|
# we need to disable the IP-MASQ chain because even if ip-masq-agent
|
||
|
# is not installed, the node init script installs some default rules into
|
||
|
# the IP-MASQ chain.
|
||
|
# If we remove the jump to that ip-masq chain, then we ensure the ip masquerade
|
||
|
# configuration is solely managed by Cilium.
|
||
|
# Also, if Cilium is installed, it may be expected that it would be solely responsible
|
||
|
# for the networking configuration on that node. So provide the same functionality
|
||
|
# as the --disable-snat-flag for existing GKE clusters.
|
||
|
iptables -w -t nat -D POSTROUTING -m comment --comment "ip-masq: ensure nat POSTROUTING directs all non-LOCAL destination traffic to our custom IP-MASQ chain" -m addrtype ! --dst-type LOCAL -j IP-MASQ || true
|
||
|
{{- end }}
|
||
|
|
||
|
{{- if not (eq .Values.nodeinit.bootstrapFile "") }}
|
||
|
mkdir -p {{ .Values.nodeinit.bootstrapFile | dir | quote }}
|
||
|
date > {{ .Values.nodeinit.bootstrapFile | quote }}
|
||
|
{{- end }}
|
||
|
|
||
|
{{- if .Values.azure.enabled }}
|
||
|
# AKS: If azure-vnet is installed on the node, and (still) configured in bridge mode,
|
||
|
# configure it as 'transparent' to be consistent with Cilium's CNI chaining config.
|
||
|
# If the azure-vnet CNI config is not removed, kubelet will execute CNI CHECK commands
|
||
|
# against it every 5 seconds and write 'bridge' to its state file, causing inconsistent
|
||
|
# behaviour when Pods are removed.
|
||
|
if [ -f /etc/cni/net.d/10-azure.conflist ]; then
|
||
|
echo "Ensuring azure-vnet is configured in 'transparent' mode..."
|
||
|
sed -i 's/"mode":\s*"bridge"/"mode":"transparent"/g' /etc/cni/net.d/10-azure.conflist
|
||
|
fi
|
||
|
|
||
|
# The azure0 interface being present means the node was booted with azure-vnet configured
|
||
|
# in bridge mode. This means there might be ebtables rules and neight entries interfering
|
||
|
# with pod connectivity if we deploy with Azure IPAM.
|
||
|
if ip l show dev azure0 >/dev/null 2>&1; then
|
||
|
|
||
|
# In Azure IPAM mode, also remove the azure-vnet state file, otherwise ebtables rules get
|
||
|
# restored by the azure-vnet CNI plugin on every CNI CHECK, which can cause connectivity
|
||
|
# issues in Cilium-managed Pods. Since azure-vnet is no longer called on scheduling events,
|
||
|
# this file can be removed.
|
||
|
rm -f /var/run/azure-vnet.json
|
||
|
|
||
|
# This breaks connectivity for existing workload Pods when Cilium is scheduled, but we need
|
||
|
# to flush these to prevent Cilium-managed Pod IPs conflicting with Pod IPs previously allocated
|
||
|
# by azure-vnet. These ebtables DNAT rules contain fixed MACs that are no longer bound on the node,
|
||
|
# causing packets for these Pods to be redirected back out to the gateway, where they are dropped.
|
||
|
echo 'Flushing ebtables pre/postrouting rules in nat table.. (disconnecting non-Cilium Pods!)'
|
||
|
ebtables -t nat -F PREROUTING || true
|
||
|
ebtables -t nat -F POSTROUTING || true
|
||
|
|
||
|
# ip-masq-agent periodically injects PERM neigh entries towards the gateway
|
||
|
# for all other k8s nodes in the cluster. These are safe to flush, as ARP can
|
||
|
# resolve these nodes as usual. PERM entries will be automatically restored later.
|
||
|
echo 'Deleting all permanent neighbour entries on azure0...'
|
||
|
ip neigh show dev azure0 nud permanent | cut -d' ' -f1 | xargs -r -n1 ip neigh del dev azure0 to || true
|
||
|
fi
|
||
|
{{- end }}
|
||
|
|
||
|
{{- if .Values.nodeinit.revertReconfigureKubelet }}
|
||
|
rm -f /tmp/node-deinit.cilium.io
|
||
|
{{- end }}
|
||
|
echo "Node initialization complete"
|