Lxc

From campisano.org
Jump to navigation Jump to search

Install

# note: avoid running docker daemon, or lxc containers will not have internet access
apt-get install lxc

Modules

Required modules for privileged containers

modprobe bridge br_netfilter ip_tables iptable_filter iptable_nat netlink_diag nf_conntrack overlay tun udp_tunnel vxlan xfrm_user xt_statistic
cat > /etc/modules-load.d/lxc_privileged_containers.conf << EOF
bridge
br_netfilter
ip_tables
iptable_filter
iptable_nat
netlink_diag
nf_conntrack
overlay
tun
udp_tunnel
vxlan
xfrm_user
xt_statistic
EOF

Config

Prepare a common privileged config

References:

cat > /etc/lxc/privileged.conf << EOF


### replace settings from /usr/share/lxc/config/common.conf

# Default configuration shared by all containers

# Setup the LXC devices in /dev/lxc/
lxc.tty.dir = lxc

# Allow for 1024 pseudo terminals
lxc.pty.max = 1024

# Setup 4 tty devices
lxc.tty.max = 4

# Drop some harmful capabilities
###lxc.cap.drop = mac_admin mac_override sys_time sys_module sys_rawio

# Ensure hostname is changed on clone
lxc.hook.clone = /usr/share/lxc/hooks/clonehostname

# Setup the default mounts
###lxc.mount.auto = cgroup:mixed proc:mixed sys:mixed
lxc.mount.entry = /sys/fs/fuse/connections sys/fs/fuse/connections none bind,optional 0 0

# Block some syscalls which are not safe in privileged
# containers
###lxc.seccomp.profile = /usr/share/lxc/config/common.seccomp

# Lastly, include all the configs from /usr/share/lxc/config/common.conf.d/
lxc.include = /usr/share/lxc/config/common.conf.d/

### end replace settings from /usr/share/lxc/config/common.conf



lxc.start.auto = 1
lxc.apparmor.profile=unconfined
lxc.mount.auto=proc:rw sys:rw cgroup:rw
lxc.cap.drop=
lxc.cgroup.devices.allow=a
lxc.cgroup.devices.deny=
lxc.cgroup2.devices.allow=a
lxc.cgroup2.devices.deny=

lxc.mount.entry = /dev/kmsg dev/kmsg none bind,create=file 0 0
lxc.mount.entry = /boot boot none bind,create=dir 0 0

# mount /lib/modules directly on the container destination will lead to an error due to recurring links
# so we mount it to an another folder now and at first boot we will manual link it to the destination
lxc.mount.entry = /lib/modules host/lib/modules none bind,create=dir 0 0
EOF

Service

The start/stop lxc service script will be in charge of create the host network capable to communication between the host and its containers as well as communication between the containers of all the hosts.

In the follow example there are three hosts, already connected in a same network, with arbitrary IPs 10.1.1.1, 10.1.1.2 and 10.1.1.3, all with a common network device named enp0s0

Create a env file for the lxc service:

# three hosts will be used, with following networks, ips and devices
# note that hosts need to know each other to add a rule on bridge forwarding database (FDB) 
cat > /etc/default/lxc-custom-net.env << 'EOF'
ALL_HOSTS_DEV_IPS="10.1.1.1 10.1.1.2 10.1.1.3"
ALL_HOSTS_DEVS="enp0s0 enp0s0 enp0s0"
ALL_HOSTS_BR_IPS="10.10.11.100 10.10.12.100 10.10.13.100"
BR_NET=10.10.0.0/16
EOF

Create the start script for the lxc service:

cat > /etc/default/lxc-custom-net-start.sh << 'EOF'
#!/bin/sh

set -o errexit -o nounset

# identify current host ip by device
i=1
for TEST_DEV_IP in $ALL_HOSTS_DEV_IPS
do
    j=1; for TEST_DEV in $ALL_HOSTS_DEVS; do test "$j" -eq "$i" && break; j=$((j + 1)); done
    j=1; for TEST_BR_IP in $ALL_HOSTS_BR_IPS; do test "$j" -eq "$i" && break; j=$((j + 1)); done

    echo $TEST_DEV_IP $TEST_DEV $TEST_BR_IP

    HOST_DEV_IP=$( { ip -4 -o addr show dev "$TEST_DEV" 2>/dev/null || true; } | sed -n -E 's|.*inet ([0-9.]+)/.*|\1|p' )

    if test "$HOST_DEV_IP" = "$TEST_DEV_IP"
    then
        HOST_DEV=$TEST_DEV
        HOST_BR_IP=$TEST_BR_IP
        break
    fi

    i=$((i + 1))
done

if test -z "${HOST_DEV_IP:-}" || test -z "${HOST_DEV:-}" || test -z "${HOST_BR_IP:-}"
then
   echo "Cannot find device ip. exit" >&2
   exit 1
fi

# create a bridge and assign a new ip to the current host
ip link add brvxlan0 type bridge
ip addr add ${HOST_BR_IP}/16 dev brvxlan0
ip link set brvxlan0 up

# create a vxlan (id 42 is arbitrary, nolearning is to disable mac learning to manually specify where to send traffic destined for a specific mac)
ip link add vxlan0 type vxlan id 42 dev $HOST_DEV dstport 4789 local $HOST_DEV_IP nolearning
ip link set vxlan0 up
ip link set vxlan0 master brvxlan0

# for each other host, add a bridge rule on current host
for OTHER_HOST_DEV_IP in $ALL_HOSTS_DEV_IPS
do
    if test "$HOST_DEV_IP" != "$OTHER_HOST_DEV_IP"
    then
        bridge fdb append 00:00:00:00:00:00 dst $OTHER_HOST_DEV_IP dev vxlan0
    fi
done

# add a masquerade postrouting rule for current network
iptables -t nat -A POSTROUTING -s $BR_NET ! -d $BR_NET -j MASQUERADE
EOF

chmod 755 /etc/default/lxc-custom-net-start.sh

Create the stop script for the lxc service:

cat > /etc/default/lxc-custom-net-stop.sh << 'EOF'
#!/bin/sh

set -o errexit -o nounset

ip link del vxlan0
ip link del brvxlan0
EOF

chmod 755 /etc/default/lxc-custom-net-stop.sh

Create the lxc service definition for systemd:

cat > /etc/systemd/system/lxc-custom-net.service << 'EOF'
[Unit]
Description=LXC custom network
After=network.target

[Service]
Type=oneshot
EnvironmentFile=/etc/default/lxc-custom-net.env
ExecStart=/etc/default/lxc-custom-net-start.sh
ExecStop=/etc/default/lxc-custom-net-stop.sh
RemainAfterExit=yes

[Install]
WantedBy=multi-user.target
EOF

Enable and start lxc service:

systemctl daemon-reload
systemctl enable lxc-custom-net
systemctl start lxc-custom-net
systemctl status lxc-custom-net

Create an LXC container

Create the container

CNT=s111
lxc-create "${CNT}" --template download -- --dist debian --release bookworm --arch arm64

Reset network config and configure the container

cat >> /var/lib/lxc/${CNT}/config << EOF

#################
# Custom config #
#################

## Memory
lxc.cgroup2.memory.max = 2560M

## Network
lxc.net =
lxc.net.0.type = veth
lxc.net.0.link = brvxlan0
lxc.net.0.name = eth0
lxc.net.0.flags = up

## Privileged
lxc.include = /etc/lxc/privileged.conf
EOF

Start the container and enter

lxc-start ${CNT} -l DEBUG -o start.log
cat start.log
lxc-ls --fancy
lxc-attach ${CNT}

Configure the network in a persistent way

HOST_BR_IP=10.10.11.100
CNT_IP=10.10.11.1

cat > /etc/systemd/network/eth0.network << EOF
[Match]
Name=eth0

[Network]
DHCP=no
Address=${CNT_IP}/16
Gateway=${HOST_BR_IP}
DNS=8.8.8.8
DNS=1.1.1.1
Domains=~.
EOF

systemctl restart systemd-networkd

Exit

exit