Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix containerd installation #116

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 32 additions & 28 deletions modules/controller_pool/controller-primary.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -5,35 +5,40 @@ export WORKLOADS=$(echo ${workloads})
mkdir $HOME/kube

function load_workloads() {
echo "{"| tee -a $HOME/workloads.json ; for w in $WORKLOADS; do \
echo "{"| tee -a $HOME/workloads.json ; for w in $WORKLOADS; do \
echo $w | sed 's| |\n|'g | awk '{sub(/:/,"\":\"")}1' | sed 's/.*/"&",/' | tee -a $HOME/workloads.json; \
done ; echo "\"applied_at\":\"$(date +%F:%H:%m:%S)\"" | tee -a $HOME/workloads.json \
; echo "}" | tee -a $HOME/workloads.json
}

function install_containerd() {
cat <<EOF > /etc/modules-load.d/containerd.conf
cat <<EOF > /etc/modules-load.d/containerd.conf
overlay
br_netfilter
EOF
modprobe overlay
modprobe br_netfilter
echo "Installing Containerd..."
apt-get update
apt-get install -y ca-certificates socat ebtables apt-transport-https cloud-utils prips containerd jq python3
modprobe overlay
modprobe br_netfilter
echo "Installing Containerd..."
apt-get update && apt-get install -y gnupg2 software-properties-common apt-transport-https ca-certificates socat ebtables cloud-utils prips jq python3
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add -
echo "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" > /etc/apt/sources.list.d/docker.list
apt-get update && apt-get install -y containerd.io
# Configure containerd
mkdir -p /etc/containerd
containerd config default | sudo tee /etc/containerd/config.toml >/dev/null 2>&1
sed -i 's/SystemdCgroup \= false/SystemdCgroup \= true/g' /etc/containerd/config.toml
}

function enable_containerd() {
systemctl daemon-reload
systemctl enable containerd
systemctl start containerd
systemctl daemon-reload
systemctl restart containerd
systemctl enable containerd
}

function install_kube_tools {
echo "Installing Kubeadm tools..." ;
sed -ri '/\sswap\s/s/^#?/#/' /etc/fstab
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sed -i.bak -E -e might be a way to keep the \s patterns (which should be more versatile and forgiving to minor changes in the stock config file between OS versions).

echo "Installing Kubeadm tools..."
sed -i '/ swap / s/^\(.*\)$/#\1/g' /etc/fstab
swapoff -a
apt-get update && apt-get install -y apt-transport-https
curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
echo "deb http://apt.kubernetes.io/ kubernetes-xenial main" > /etc/apt/sources.list.d/kubernetes.list
apt-get update
Expand Down Expand Up @@ -63,23 +68,22 @@ EOF
}

function init_cluster {
export CNI_CIDR=$(cat $HOME/workloads.json | jq -r .cni_cidr) && \
echo "Initializing cluster..." && \
cat <<EOF > /etc/sysctl.d/99-kubernetes-cri.conf
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
export CNI_CIDR=$(cat $HOME/workloads.json | jq -r .cni_cidr) && \
echo "Initializing cluster..." && \
tee /etc/sysctl.d/kubernetes.conf <<EOF
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
EOF

sysctl --system
kubeadm init --pod-network-cidr="$CNI_CIDR" --token "${kube_token}"
sysctl --system
kubeadm init --pod-network-cidr="$CNI_CIDR" --token "${kube_token}"
}

function configure_network {
echo "Configuring network..."
workload_manifests=$(cat $HOME/workloads.json | jq .cni_workloads | sed "s/^\([\"']\)\(.*\)\1\$/\2/g" | tr , '\n') && \
for w in $workload_manifests; do
# we use `kubectl create` command instead of `apply` because it fails on kubernetes verison <1.22
# err: The CustomResourceDefinition "installations.operator.tigera.io" is invalid: metadata.annotations: Too long: must have at most 262144 bytes
kubectl --kubeconfig=/etc/kubernetes/admin.conf create -f $w
done
}
Expand All @@ -88,7 +92,7 @@ function gpu_config {
if [ "${count_gpu}" = "0" ]; then
echo "No GPU nodes to prepare for presently...moving on..."
else
kubectl --kubeconfig=/etc/kubernetes/admin.conf create -f $(cat $HOME/workloads.json | jq .nvidia_gpu)
kubectl --kubeconfig=/etc/kubernetes/admin.conf apply -f $(cat $HOME/workloads.json | jq .nvidia_gpu)
fi
}

Expand All @@ -114,7 +118,7 @@ EOF
cd $HOME/kube && \
kubectl --kubeconfig=/etc/kubernetes/admin.conf apply -f $(cat $HOME/workloads.json | jq .metallb_namespace) && \
kubectl --kubeconfig=/etc/kubernetes/admin.conf apply -f $(cat $HOME/workloads.json | jq .metallb_release) && \
kubectl --kubeconfig=/etc/kubernetes/admin.conf create -f metal_lb.yaml
kubectl --kubeconfig=/etc/kubernetes/admin.conf apply -f metal_lb.yaml
# kubectl --kubeconfig=/etc/kubernetes/admin.conf create secret generic -n metallb-system memberlist --from-literal=secretkey="$(openssl rand -base64 128)" && \
}

Expand All @@ -139,17 +143,17 @@ function ceph_rook_basic {
cd $HOME/kube ; \
mkdir ceph ;\
echo "Pulled Manifest for Ceph-Rook..." && \
kubectl --kubeconfig=/etc/kubernetes/admin.conf create -f $(cat $HOME/workloads.json | jq .ceph_common) ; \
kubectl --kubeconfig=/etc/kubernetes/admin.conf apply -f $(cat $HOME/workloads.json | jq .ceph_common) ; \
sleep 30 ; \
echo "Applying Ceph Operator..." ; \
kubectl --kubeconfig=/etc/kubernetes/admin.conf create -f $(cat $HOME/workloads.json | jq .ceph_operator) ; \
kubectl --kubeconfig=/etc/kubernetes/admin.conf apply -f $(cat $HOME/workloads.json | jq .ceph_operator) ; \
sleep 30 ; \
echo "Creating Ceph Cluster..." ; \
if [ "${count}" -gt 3 ]; then
echo "Node count less than 3, creating minimal cluster" ; \
kubectl --kubeconfig=/etc/kubernetes/admin.conf create -f $(cat $HOME/workloads.json | jq .ceph_cluster_minimal)
kubectl --kubeconfig=/etc/kubernetes/admin.conf create -f $(cat $HOME/workloads.json | jq .ceph_cluster_minimal)
else
kubectl --kubeconfig=/etc/kubernetes/admin.conf create -f $(cat $HOME/workloads.json | jq .ceph_cluster)
kubectl --kubeconfig=/etc/kubernetes/admin.conf create -f $(cat $HOME/workloads.json | jq .ceph_cluster)
fi
}

Expand Down
49 changes: 28 additions & 21 deletions modules/controller_pool/controller-standby.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -3,43 +3,50 @@
export HOME=/root

function install_containerd() {
cat <<EOF > /etc/modules-load.d/containerd.conf
cat <<EOF > /etc/modules-load.d/containerd.conf
overlay
br_netfilter
EOF
modprobe overlay
modprobe br_netfilter
echo "Installing Containerd..."
apt-get update
apt-get install -y ca-certificates socat ebtables apt-transport-https cloud-utils prips containerd jq python3
modprobe overlay
modprobe br_netfilter
echo "Installing Containerd..."
apt-get update && apt-get install -y gnupg2 software-properties-common apt-transport-https ca-certificates socat ebtables cloud-utils prips jq python3
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add -
echo "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" > /etc/apt/sources.list.d/docker.list
apt-get update && apt-get install -y containerd.io
# Configure containerd
mkdir -p /etc/containerd
containerd config default | sudo tee /etc/containerd/config.toml >/dev/null 2>&1
sed -i 's/SystemdCgroup \= false/SystemdCgroup \= true/g' /etc/containerd/config.toml
}

function enable_containerd() {
systemctl daemon-reload
systemctl enable containerd
systemctl start containerd
systemctl daemon-reload
systemctl restart containerd
systemctl enable containerd
}

function bgp_routes {
GATEWAY_IP=$(curl https://metadata.platformequinix.com/metadata | jq -r ".network.addresses[] | select(.public == false) | .gateway")
# TODO use metadata peer ips
ip route add 169.254.255.1 via $GATEWAY_IP
ip route add 169.254.255.2 via $GATEWAY_IP
sed -i.bak -E "/^\s+post-down route del -net 10\.0\.0\.0.* gw .*$/a \ \ \ \ up ip route add 169.254.255.1 via $GATEWAY_IP || true\n up ip route add 169.254.255.2 via $GATEWAY_IP || true\n down ip route del 169.254.255.1 || true\n down ip route del 169.254.255.2 || true" /etc/network/interfaces
GATEWAY_IP=$(curl https://metadata.platformequinix.com/metadata | jq -r ".network.addresses[] | select(.public == false) | .gateway")
# TODO use metadata peer ips
ip route add 169.254.255.1 via $GATEWAY_IP
ip route add 169.254.255.2 via $GATEWAY_IP
sed -i.bak -E "/^\s+post-down route del -net 10\.0\.0\.0.* gw .*$/a \ \ \ \ up ip route add 169.254.255.1 via $GATEWAY_IP || true\n up ip route add 169.254.255.2 via $GATEWAY_IP || true\n down ip route del 169.254.255.1 || true\n down ip route del 169.254.255.2 || true" /etc/network/interfaces
}

function ceph_pre_check {
apt install -y lvm2 ; \
modprobe rbd
}

function install_kube_tools() {
swapoff -a && \
apt-get update && apt-get install -y apt-transport-https
curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
echo "deb http://apt.kubernetes.io/ kubernetes-xenial main" > /etc/apt/sources.list.d/kubernetes.list
apt-get update
apt-get install -y kubelet=${kube_version} kubeadm=${kube_version} kubectl=${kube_version}
function install_kube_tools {
echo "Installing Kubeadm tools..."
sed -i '/ swap / s/^\(.*\)$/#\1/g' /etc/fstab
swapoff -a
curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
echo "deb http://apt.kubernetes.io/ kubernetes-xenial main" > /etc/apt/sources.list.d/kubernetes.list
apt-get update
apt-get install -y kubelet=${kube_version} kubeadm=${kube_version} kubectl=${kube_version}
}

install_containerd && \
Expand Down
4 changes: 2 additions & 2 deletions modules/controller_pool/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ data "template_file" "controller-primary" {

resource "equinix_metal_device" "k8s_primary" {
hostname = "${var.cluster_name}-controller-primary"
operating_system = "ubuntu_18_04"
operating_system = "ubuntu_22_04"
plan = var.plan_primary
facilities = var.facility != "" ? [var.facility] : null
metro = var.metro != "" ? var.metro : null
Expand All @@ -57,7 +57,7 @@ resource "equinix_metal_device" "k8s_controller_standby" {
depends_on = [equinix_metal_device.k8s_primary]

hostname = format("${var.cluster_name}-controller-standby-%02d", count.index)
operating_system = "ubuntu_18_04"
operating_system = "ubuntu_22_04"
plan = var.plan_primary
facilities = var.facility != "" ? [var.facility] : null
metro = var.metro != "" ? var.metro : null
Expand Down
68 changes: 40 additions & 28 deletions modules/gpu_node_pool/gpu_node.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
export HOME=/root

function nvidia_configure() {
distribution=$(. /etc/os-release;echo $ID$VERSION_ID) ; \
curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - ; \
curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list ; \
sudo apt-get update && sudo apt-get install -y nvidia-container-runtime nvidia-cuda-toolkit
distribution=$(. /etc/os-release;echo $ID$VERSION_ID) ; \
curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - ; \
curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list ; \
sudo apt-get update && sudo apt-get install -y nvidia-container-runtime nvidia-cuda-toolkit
}

function nvidia_drivers() {
Expand All @@ -23,50 +23,62 @@ function nvidia_drivers() {
}

function install_containerd() {
cat <<EOF > /etc/modules-load.d/containerd.conf
cat <<EOF > /etc/modules-load.d/containerd.conf
overlay
br_netfilter
EOF
modprobe overlay
modprobe br_netfilter
echo "Installing Containerd..."
apt-get update
apt-get install -y ca-certificates socat ebtables apt-transport-https cloud-utils prips containerd jq python3
modprobe overlay
modprobe br_netfilter
echo "Installing Containerd..."
apt-get update && apt-get install -y gnupg2 software-properties-common apt-transport-https ca-certificates socat ebtables cloud-utils prips jq python3
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add -
echo "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" > /etc/apt/sources.list.d/docker.list
apt-get update && apt-get install -y containerd.io
# Configure containerd
mkdir -p /etc/containerd
containerd config default | sudo tee /etc/containerd/config.toml >/dev/null 2>&1
sed -i 's/SystemdCgroup \= false/SystemdCgroup \= true/g' /etc/containerd/config.toml
}

function enable_containerd() {
systemctl daemon-reload
systemctl enable containerd
systemctl start containerd
systemctl daemon-reload
systemctl restart containerd
systemctl enable containerd
}


function ceph_pre_check {
apt install -y lvm2 ; \
modprobe rbd
}

function bgp_routes {
GATEWAY_IP=$(curl https://metadata.platformequinix.com/metadata | jq -r ".network.addresses[] | select(.public == false) | .gateway")
# TODO use metadata peer ips
ip route add 169.254.255.1 via $GATEWAY_IP
ip route add 169.254.255.2 via $GATEWAY_IP
sed -i.bak -E "/^\s+post-down route del -net 10\.0\.0\.0.* gw .*$/a \ \ \ \ up ip route add 169.254.255.1 via $GATEWAY_IP || true\n up ip route add 169.254.255.2 via $GATEWAY_IP || true\n down ip route del 169.254.255.1 || true\n down ip route del 169.254.255.2 || true" /etc/network/interfaces
GATEWAY_IP=$(curl https://metadata.platformequinix.com/metadata | jq -r ".network.addresses[] | select(.public == false) | .gateway")
# TODO use metadata peer ips
ip route add 169.254.255.1 via $GATEWAY_IP
ip route add 169.254.255.2 via $GATEWAY_IP
sed -i.bak -E "/^\s+post-down route del -net 10\.0\.0\.0.* gw .*$/a \ \ \ \ up ip route add 169.254.255.1 via $GATEWAY_IP || true\n up ip route add 169.254.255.2 via $GATEWAY_IP || true\n down ip route del 169.254.255.1 || true\n down ip route del 169.254.255.2 || true" /etc/network/interfaces
}

function install_kube_tools() {
swapoff -a && \
apt-get update && apt-get install -y apt-transport-https
curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
echo "deb http://apt.kubernetes.io/ kubernetes-xenial main" > /etc/apt/sources.list.d/kubernetes.list
apt-get update
apt-get install -y kubelet=${kube_version} kubeadm=${kube_version} kubectl=${kube_version}
echo "Waiting 180s to attempt to join cluster..."
sed -ri '/\sswap\s/s/^#?/#/' /etc/fstab
swapoff -a
curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
echo "deb http://apt.kubernetes.io/ kubernetes-xenial main" > /etc/apt/sources.list.d/kubernetes.list
apt-get update
apt-get install -y kubelet=${kube_version} kubeadm=${kube_version} kubectl=${kube_version}
echo "Waiting 180s to attempt to join cluster..."
}

function join_cluster() {
echo "Attempting to join cluster" && \
kubeadm join "${primary_node_ip}:6443" --token "${kube_token}" --discovery-token-unsafe-skip-ca-verification
echo "Attempting to join cluster"
tee /etc/sysctl.d/kubernetes.conf <<EOF
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
EOF

sysctl --system
kubeadm join "${primary_node_ip}:6443" --token "${kube_token}" --discovery-token-unsafe-skip-ca-verification
}

install_containerd && \
Expand Down
2 changes: 1 addition & 1 deletion modules/gpu_node_pool/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ data "template_file" "gpu_node" {

resource "equinix_metal_device" "gpu_node" {
hostname = format("${var.cluster_name}-gpu-${var.pool_label}-%02d", count.index)
operating_system = "ubuntu_18_04"
operating_system = "ubuntu_22_04"
count = var.count_gpu
plan = var.plan_gpu
facilities = var.facility != "" ? [var.facility] : null
Expand Down
4 changes: 2 additions & 2 deletions modules/node_pool/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ data "template_file" "node" {

resource "equinix_metal_device" "x86_node" {
hostname = format("${var.cluster_name}-x86-${var.pool_label}-%02d", count.index)
operating_system = "ubuntu_18_04"
operating_system = "ubuntu_22_04"
count = var.count_x86
plan = var.plan_x86
facilities = var.facility != "" ? [var.facility] : null
Expand All @@ -26,7 +26,7 @@ resource "equinix_metal_device" "x86_node" {

resource "equinix_metal_device" "arm_node" {
hostname = format("${var.cluster_name}-arm-${var.pool_label}-%02d", count.index)
operating_system = "ubuntu_18_04"
operating_system = "ubuntu_22_04"
count = var.count_arm
plan = var.plan_arm
facilities = var.facility != "" ? [var.facility] : null
Expand Down
Loading