Lxc
Install
# note: avoid running docker daemon, or lxc containers will not have internet access apt-get install lxc
Modules
Required modules for privileged containers
modprobe bridge br_netfilter ip_tables iptable_filter iptable_nat netlink_diag nf_conntrack overlay tun udp_tunnel vxlan xfrm_user xt_statistic cat > /etc/modules-load.d/lxc_privileged_containers.conf << EOF bridge br_netfilter ip_tables iptable_filter iptable_nat netlink_diag nf_conntrack overlay tun udp_tunnel vxlan xfrm_user xt_statistic EOF
Config
Prepare a common privileged config
References:
- https://discuss.linuxcontainers.org/t/not-able-to-run-kubernetes-inside-docker-inside-lxd/4643/2
- https://ubuntu.com/kubernetes/charmed-k8s/docs/install-local
- https://github.com/schu/kubedee/blob/master/lib.bash#L1159
- https://radar231.com/posts/k3s-nodes-in-lxd-containers/
- https://discuss.kubernetes.io/t/microk8s-in-lxd/11520/2
cat > /etc/lxc/privileged.conf << EOF ### replace settings from /usr/share/lxc/config/common.conf # Default configuration shared by all containers # Setup the LXC devices in /dev/lxc/ lxc.tty.dir = lxc # Allow for 1024 pseudo terminals lxc.pty.max = 1024 # Setup 4 tty devices lxc.tty.max = 4 # Drop some harmful capabilities ###lxc.cap.drop = mac_admin mac_override sys_time sys_module sys_rawio # Ensure hostname is changed on clone lxc.hook.clone = /usr/share/lxc/hooks/clonehostname # Setup the default mounts ###lxc.mount.auto = cgroup:mixed proc:mixed sys:mixed lxc.mount.entry = /sys/fs/fuse/connections sys/fs/fuse/connections none bind,optional 0 0 # Block some syscalls which are not safe in privileged # containers ###lxc.seccomp.profile = /usr/share/lxc/config/common.seccomp # Lastly, include all the configs from /usr/share/lxc/config/common.conf.d/ lxc.include = /usr/share/lxc/config/common.conf.d/ ### end replace settings from /usr/share/lxc/config/common.conf lxc.start.auto = 1 lxc.apparmor.profile=unconfined lxc.mount.auto=proc:rw sys:rw cgroup:rw lxc.cap.drop= lxc.cgroup.devices.allow=a lxc.cgroup.devices.deny= lxc.cgroup2.devices.allow=a lxc.cgroup2.devices.deny= lxc.mount.entry = /dev/kmsg dev/kmsg none bind,create=file 0 0 lxc.mount.entry = /boot boot none bind,create=dir 0 0 # mount /lib/modules directly on the container destination will lead to an error due to recurring links # so we mount it to an another folder now and at first boot we will manual link it to the destination lxc.mount.entry = /lib/modules host/lib/modules none bind,create=dir 0 0 EOF
Service
The start/stop lxc service script will be in charge of create the host network capable to communication between the host and its containers as well as communication between the containers of all the hosts.
In the follow example there are three hosts, already connected in a same network, with arbitrary IPs 10.1.1.1, 10.1.1.2 and 10.1.1.3, all with a common network device named enp0s0
Create a env file for the lxc service:
# three hosts will be used, with following networks, ips and devices # note that hosts need to know each other to add a rule on bridge forwarding database (FDB) cat > /etc/default/lxc-custom-net.env << 'EOF' ALL_HOSTS_DEV_IPS="10.1.1.1 10.1.1.2 10.1.1.3" ALL_HOSTS_DEVS="enp0s0 enp0s0 enp0s0" ALL_HOSTS_BR_IPS="10.10.11.100 10.10.12.100 10.10.13.100" BR_NET=10.10.0.0/16 EOF
Create the start script for the lxc service:
cat > /etc/default/lxc-custom-net-start.sh << 'EOF'
#!/bin/sh
set -o errexit -o nounset
# identify current host ip by device
i=1
for TEST_DEV_IP in $ALL_HOSTS_DEV_IPS
do
j=1; for TEST_DEV in $ALL_HOSTS_DEVS; do test "$j" -eq "$i" && break; j=$((j + 1)); done
j=1; for TEST_BR_IP in $ALL_HOSTS_BR_IPS; do test "$j" -eq "$i" && break; j=$((j + 1)); done
echo $TEST_DEV_IP $TEST_DEV $TEST_BR_IP
HOST_DEV_IP=$( { ip -4 -o addr show dev "$TEST_DEV" 2>/dev/null || true; } | sed -n -E 's|.*inet ([0-9.]+)/.*|\1|p' )
if test "$HOST_DEV_IP" = "$TEST_DEV_IP"
then
HOST_DEV=$TEST_DEV
HOST_BR_IP=$TEST_BR_IP
break
fi
i=$((i + 1))
done
if test -z "${HOST_DEV_IP:-}" || test -z "${HOST_DEV:-}" || test -z "${HOST_BR_IP:-}"
then
echo "Cannot find device ip. exit" >&2
exit 1
fi
# create a bridge and assign a new ip to the current host
ip link add brvxlan0 type bridge
ip addr add ${HOST_BR_IP}/16 dev brvxlan0
ip link set brvxlan0 up
# create a vxlan (id 42 is arbitrary, nolearning is to disable mac learning to manually specify where to send traffic destined for a specific mac)
ip link add vxlan0 type vxlan id 42 dev $HOST_DEV dstport 4789 local $HOST_DEV_IP nolearning
ip link set vxlan0 up
ip link set vxlan0 master brvxlan0
# for each other host, add a bridge rule on current host
for OTHER_HOST_DEV_IP in $ALL_HOSTS_DEV_IPS
do
if test "$HOST_DEV_IP" != "$OTHER_HOST_DEV_IP"
then
bridge fdb append 00:00:00:00:00:00 dst $OTHER_HOST_DEV_IP dev vxlan0
fi
done
# add a masquerade postrouting rule for current network
iptables -t nat -A POSTROUTING -s $BR_NET ! -d $BR_NET -j MASQUERADE
EOF
chmod 755 /etc/default/lxc-custom-net-start.sh
Create the stop script for the lxc service:
cat > /etc/default/lxc-custom-net-stop.sh << 'EOF' #!/bin/sh set -o errexit -o nounset ip link del vxlan0 ip link del brvxlan0 EOF chmod 755 /etc/default/lxc-custom-net-stop.sh
Create the lxc service definition for systemd:
cat > /etc/systemd/system/lxc-custom-net.service << 'EOF' [Unit] Description=LXC custom network After=network.target [Service] Type=oneshot EnvironmentFile=/etc/default/lxc-custom-net.env ExecStart=/etc/default/lxc-custom-net-start.sh ExecStop=/etc/default/lxc-custom-net-stop.sh RemainAfterExit=yes [Install] WantedBy=multi-user.target EOF
Enable and start lxc service:
systemctl daemon-reload systemctl enable lxc-custom-net systemctl start lxc-custom-net systemctl status lxc-custom-net
Create an LXC container
Create the container
CNT=s111
lxc-create "${CNT}" --template download -- --dist debian --release bookworm --arch arm64
Reset network config and configure the container
cat >> /var/lib/lxc/${CNT}/config << EOF
#################
# Custom config #
#################
## Memory
lxc.cgroup2.memory.max = 2560M
## Network
lxc.net =
lxc.net.0.type = veth
lxc.net.0.link = brvxlan0
lxc.net.0.name = eth0
lxc.net.0.flags = up
## Privileged
lxc.include = /etc/lxc/privileged.conf
EOF
Start the container and enter
lxc-start ${CNT} -l DEBUG -o start.log
cat start.log
lxc-ls --fancy
lxc-attach ${CNT}
Configure the network in a persistent way
HOST_BR_IP=10.10.11.100
CNT_IP=10.10.11.1
cat > /etc/systemd/network/eth0.network << EOF
[Match]
Name=eth0
[Network]
DHCP=no
Address=${CNT_IP}/16
Gateway=${HOST_BR_IP}
DNS=8.8.8.8
DNS=1.1.1.1
Domains=~.
EOF
systemctl restart systemd-networkd
Exit
exit