Lxc
Install
# note: avoid running docker daemon, or lxc containers will not have internet access apt-get install lxc
Modules
Required modules for privileged containers
modprobe bridge br_netfilter ip_tables iptable_filter iptable_nat netlink_diag nf_conntrack overlay tun udp_tunnel vxlan xfrm_user xt_statistic cat > /etc/modules-load.d/lxc_privileged_containers.conf << EOF bridge br_netfilter ip_tables iptable_filter iptable_nat netlink_diag nf_conntrack overlay tun udp_tunnel vxlan xfrm_user xt_statistic EOF
Config
Prepare a common privileged config
References:
- https://discuss.linuxcontainers.org/t/not-able-to-run-kubernetes-inside-docker-inside-lxd/4643/2
- https://ubuntu.com/kubernetes/charmed-k8s/docs/install-local
- https://github.com/schu/kubedee/blob/master/lib.bash#L1159
- https://radar231.com/posts/k3s-nodes-in-lxd-containers/
- https://discuss.kubernetes.io/t/microk8s-in-lxd/11520/2
cat > /etc/lxc/privileged.conf << EOF ### replace settings from /usr/share/lxc/config/common.conf # Default configuration shared by all containers # Setup the LXC devices in /dev/lxc/ lxc.tty.dir = lxc # Allow for 1024 pseudo terminals lxc.pty.max = 1024 # Setup 4 tty devices lxc.tty.max = 4 # Drop some harmful capabilities ###lxc.cap.drop = mac_admin mac_override sys_time sys_module sys_rawio # Ensure hostname is changed on clone lxc.hook.clone = /usr/share/lxc/hooks/clonehostname # Setup the default mounts ###lxc.mount.auto = cgroup:mixed proc:mixed sys:mixed lxc.mount.entry = /sys/fs/fuse/connections sys/fs/fuse/connections none bind,optional 0 0 # Block some syscalls which are not safe in privileged # containers ###lxc.seccomp.profile = /usr/share/lxc/config/common.seccomp # Lastly, include all the configs from /usr/share/lxc/config/common.conf.d/ lxc.include = /usr/share/lxc/config/common.conf.d/ ### end replace settings from /usr/share/lxc/config/common.conf lxc.start.auto = 1 lxc.apparmor.profile=unconfined lxc.mount.auto=proc:rw sys:rw cgroup:rw lxc.cap.drop= lxc.cgroup.devices.allow=a lxc.cgroup.devices.deny= lxc.cgroup2.devices.allow=a lxc.cgroup2.devices.deny= lxc.mount.entry = /dev/kmsg dev/kmsg none bind,create=file 0 0 lxc.mount.entry = /boot boot none bind,create=dir 0 0 # mount /lib/modules directly on the container destination will lead to an error due to recurring links # so we mount it to an another folder now and at first boot we will manual link it to the destination lxc.mount.entry = /lib/modules host/lib/modules none bind,create=dir 0 0 EOF
Service
The start/stop lxc service scripts will be in charge of create the host network that allow communication between the host and its containers as well as between the containers of all the hosts.
In the follow example there are three hosts, already connected in a same network, with arbitrary IPs 10.1.1.123, 10.1.1.124 and 10.1.1.125, both with a common network device named enp0s6
Create a env file for the lxc service:
# three hosts will be used, with following networks, IPs and devices
# note that hosts need to know each other to add a rule on bridge forwarding database (FDB)
cat > /etc/default/lxc-custom-net.env << 'EOF'
BRIDGE_CIDR=10.10.0.0/16
HOSTS='enp0s6 10.1.1.123 10.10.1.100
enp0s6 10.1.1.124 10.10.2.100
enp0s6 10.1.1.125 10.10.3.100'
EOF
Create the start script for the lxc service:
cat > /etc/default/lxc-custom-net-start.sh << 'EOF'
#!/bin/sh
set -o errexit -o nounset
# identify current host ip by device
while read -r TEST_DEVICE TEST_DEVICE_IP TEST_BRIDGE_IP
do
echo "$TEST_DEVICE"
echo "$TEST_DEVICE_IP"
echo "$TEST_BRIDGE_IP"
echo
HOST_DEVICE_IP=$( { ip -4 -o addr show dev "$TEST_DEVICE" 2>/dev/null || true; } | sed -n -E 's|.*inet ([0-9.]+)/.*|\1|p' )
if test "$HOST_DEVICE_IP" = "$TEST_DEVICE_IP"
then
HOST_DEVICE=$TEST_DEVICE
HOST_BRIDGE_IP=$TEST_BRIDGE_IP
break
fi
done <<EOVARS
$HOSTS
EOVARS
if test -z "${HOST_DEVICE:-}" || test -z "${HOST_DEVICE_IP:-}" || test -z "${HOST_BRIDGE_IP:-}"
then
echo "Cannot find device ip. exit" >&2
exit 1
fi
# create a bridge and assign a new ip to the current host
ip link add brvxlan0 type bridge
ip addr add ${HOST_BRIDGE_IP}/16 dev brvxlan0
ip link set brvxlan0 up
# create a vxlan (id 42 is arbitrary, nolearning is to disable mac learning to manually specify where to send traffic destined for a specific mac)
ip link add vxlan0 type vxlan id 42 dev $HOST_DEVICE dstport 4789 local $HOST_DEVICE_IP nolearning
ip link set vxlan0 up
ip link set vxlan0 master brvxlan0
# for each other host, add a bridge rule on current host
# for each other host, add a bridge rule on current host
while read -r OTHER_DEVICE OTHER_DEVICE_IP OTHER_BRIDGE_IP
do
if test "$HOST_DEVICE_IP" != "$OTHER_DEVICE_IP"
then
bridge fdb append 00:00:00:00:00:00 dst $OTHER_DEVICE_IP dev vxlan0
fi
done <<EOVARS
$HOSTS
EOVARS
# add a masquerade postrouting rule for current network
iptables -t nat -A POSTROUTING -s $BRIDGE_CIDR ! -d $BRIDGE_CIDR -j MASQUERADE
EOF
chmod 755 /etc/default/lxc-custom-net-start.sh
Create the stop script for the lxc service:
cat > /etc/default/lxc-custom-net-stop.sh << 'EOF' #!/bin/sh set -o errexit -o nounset ip link del vxlan0 ip link del brvxlan0 EOF chmod 755 /etc/default/lxc-custom-net-stop.sh
Create the lxc service definition for systemd:
cat > /etc/systemd/system/lxc-custom-net.service << 'EOF' [Unit] Description=LXC custom network After=network.target [Service] Type=oneshot EnvironmentFile=/etc/default/lxc-custom-net.env ExecStart=/etc/default/lxc-custom-net-start.sh ExecStop=/etc/default/lxc-custom-net-stop.sh RemainAfterExit=yes [Install] WantedBy=multi-user.target EOF
Enable and start lxc service:
systemctl daemon-reload systemctl enable lxc-custom-net systemctl start lxc-custom-net systemctl status lxc-custom-net
Create an LXC container
Create the container
CNT=s101
lxc-create "${CNT}" --template download -- --dist debian --release bookworm --arch arm64
Reset network config and configure the container
cat >> /var/lib/lxc/${CNT}/config << EOF
#################
# Custom config #
#################
## Memory
lxc.cgroup2.memory.max = 2560M
## Network
lxc.net =
lxc.net.0.type = veth
lxc.net.0.link = brvxlan0
lxc.net.0.name = eth0
lxc.net.0.flags = up
## Privileged
lxc.include = /etc/lxc/privileged.conf
EOF
Start the container and enter
lxc-start ${CNT} -l DEBUG -o start.log
cat start.log
lxc-ls --fancy
lxc-attach ${CNT}
Configure systemd-networkd to use our defined static ip
CONTAINER_CIDR=10.10.1.101/16
HOST_BRIDGE_IP=10.10.1.100
cat > /etc/systemd/network/eth0.network << EOF
[Match]
Name=eth0
[Network]
DHCP=no
Address=${CONTAINER_CIDR}
Gateway=${HOST_BRIDGE_IP}
DNS=8.8.8.8
DNS=1.1.1.1
Domains=~.
EOF
systemctl restart systemd-networkd
After that, you can see that the ip is correct and exit
ip addr show dev eth0 exit