Hello.
For our internal purposes we build deb packages for LXD 4.0.5 and installed them on clean bionic and focal VM (with latest HWE kernels). So, on both VM instances we have identical binaries.
Initial steps:
# apt-get install lxd (4.0.5 from our repo)
# lxd init --auto
# enable security.nesting for default profile
# lxc launch ubuntu:ReleaseHere cnt1 (bionic or focal)
# lxc exec cnt1
# apt-get install lxd (4.0.5 from our repo)
# lxd init --auto
# lxc launch ubuntu:ReleaseHere cnt1 (bionic or focal)
And here is a problem - on focal VM containers launch always fails on unpacking with such errors:
root@cnt1:~# lxc launch ubuntu:focal sub1
Creating sub1
Error: Failed instance creation: Create instance from image: Unpack failed, Failed to run: unsquashfs -f -d /var/lib/lxd/storage-pools/default/containers/sub1/rootfs -n -da 194 -fr 194 -p 1 /var/lib/lxd/images/d1df9c150a9fd265ba93a00fe062757bd34d9c0daa076063f59204f0e3bf2629.rootfs: create_inode: failed to create character device /var/lib/lxd/storage-pools/default/containers/sub1/rootfs/dev/console, because Operation not permitted
create_inode: failed to create character device /var/lib/lxd/storage-pools/default/containers/sub1/rootfs/dev/full, because Operation not permitted
create_inode: failed to create character device /var/lib/lxd/storage-pools/default/containers/sub1/rootfs/dev/mapper/control, because Operation not permitted
create_inode: failed to create character device /var/lib/lxd/storage-pools/default/containers/sub1/rootfs/dev/null, because Operation not permitted
create_inode: failed to create character device /var/lib/lxd/storage-pools/default/containers/sub1/rootfs/dev/ptmx, because Operation not permitted
create_inode: failed to create character device /var/lib/lxd/storage-pools/default/containers/sub1/rootfs/dev/random, because Operation not permitted
create_inode: failed to create character device /var/lib/lxd/storage-pools/default/containers/sub1/rootfs/dev/tty, because Operation not permitted
create_inode: failed to create character device /var/lib/lxd/storage-pools/default/containers/sub1/rootfs/dev/urandom, because Operation not permitted
create_inode: failed to create character device /var/lib/lxd/storage-pools/default/containers/sub1/rootfs/dev/zero, because Operation not permitted.
Based on quick look inside sources, I have only one idea - by some reason runningInUserns returns false? But it shouldn’t be happening, because it should see itself “under” uid mapping:
root@cnt1:~# cat /proc/self/uid_map
0 100000 65536
And some technical details on problematic environment:
root@ubuntu-s-1vcpu-2gb-ams3-01:~# uname -a
Linux ubuntu-s-1vcpu-2gb-ams3-01 5.8.0-43-generic #49~20.04.1-Ubuntu SMP Fri Feb 5 09:57:56 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux
root@ubuntu-s-1vcpu-2gb-ams3-01:~# dpkg -l 'lxd*'
Desired=Unknown/Install/Remove/Purge/Hold
| Status=Not/Inst/Conf-files/Unpacked/halF-conf/Half-inst/trig-aWait/Trig-pend
|/ Err?=(none)/Reinst-required (Status,Err: uppercase=bad)
||/ Name Version Architecture Description
+++-================-=============-============-==========================================
ii lxd 4.0.5-1focal0 amd64 Container hypervisor based on LXC - daemon
un lxd-agent-loader <none> <none> (no description available)
ii lxd-client 4.0.5-1focal0 amd64 Container hypervisor based on LXC - client
un lxd-tools <none> <none> (no description available)
root@ubuntu-s-1vcpu-2gb-ams3-01:~# ls -1 /etc/sub??? | xargs -n 1 -t cat
cat /etc/subgid
lxd:100000:65536
root:100000:65536
cat /etc/subuid
lxd:100000:65536
root:100000:65536
Details from lxc info
:
config: {}
api_extensions:
- storage_zfs_remove_snapshots
- container_host_shutdown_timeout
- container_stop_priority
- container_syscall_filtering
- auth_pki
- container_last_used_at
- etag
- patch
- usb_devices
- https_allowed_credentials
- image_compression_algorithm
- directory_manipulation
- container_cpu_time
- storage_zfs_use_refquota
- storage_lvm_mount_options
- network
- profile_usedby
- container_push
- container_exec_recording
- certificate_update
- container_exec_signal_handling
- gpu_devices
- container_image_properties
- migration_progress
- id_map
- network_firewall_filtering
- network_routes
- storage
- file_delete
- file_append
- network_dhcp_expiry
- storage_lvm_vg_rename
- storage_lvm_thinpool_rename
- network_vlan
- image_create_aliases
- container_stateless_copy
- container_only_migration
- storage_zfs_clone_copy
- unix_device_rename
- storage_lvm_use_thinpool
- storage_rsync_bwlimit
- network_vxlan_interface
- storage_btrfs_mount_options
- entity_description
- image_force_refresh
- storage_lvm_lv_resizing
- id_map_base
- file_symlinks
- container_push_target
- network_vlan_physical
- storage_images_delete
- container_edit_metadata
- container_snapshot_stateful_migration
- storage_driver_ceph
- storage_ceph_user_name
- resource_limits
- storage_volatile_initial_source
- storage_ceph_force_osd_reuse
- storage_block_filesystem_btrfs
- resources
- kernel_limits
- storage_api_volume_rename
- macaroon_authentication
- network_sriov
- console
- restrict_devlxd
- migration_pre_copy
- infiniband
- maas_network
- devlxd_events
- proxy
- network_dhcp_gateway
- file_get_symlink
- network_leases
- unix_device_hotplug
- storage_api_local_volume_handling
- operation_description
- clustering
- event_lifecycle
- storage_api_remote_volume_handling
- nvidia_runtime
- container_mount_propagation
- container_backup
- devlxd_images
- container_local_cross_pool_handling
- proxy_unix
- proxy_udp
- clustering_join
- proxy_tcp_udp_multi_port_handling
- network_state
- proxy_unix_dac_properties
- container_protection_delete
- unix_priv_drop
- pprof_http
- proxy_haproxy_protocol
- network_hwaddr
- proxy_nat
- network_nat_order
- container_full
- candid_authentication
- backup_compression
- candid_config
- nvidia_runtime_config
- storage_api_volume_snapshots
- storage_unmapped
- projects
- candid_config_key
- network_vxlan_ttl
- container_incremental_copy
- usb_optional_vendorid
- snapshot_scheduling
- container_copy_project
- clustering_server_address
- clustering_image_replication
- container_protection_shift
- snapshot_expiry
- container_backup_override_pool
- snapshot_expiry_creation
- network_leases_location
- resources_cpu_socket
- resources_gpu
- resources_numa
- kernel_features
- id_map_current
- event_location
- storage_api_remote_volume_snapshots
- network_nat_address
- container_nic_routes
- rbac
- cluster_internal_copy
- seccomp_notify
- lxc_features
- container_nic_ipvlan
- network_vlan_sriov
- storage_cephfs
- container_nic_ipfilter
- resources_v2
- container_exec_user_group_cwd
- container_syscall_intercept
- container_disk_shift
- storage_shifted
- resources_infiniband
- daemon_storage
- instances
- image_types
- resources_disk_sata
- clustering_roles
- images_expiry
- resources_network_firmware
- backup_compression_algorithm
- ceph_data_pool_name
- container_syscall_intercept_mount
- compression_squashfs
- container_raw_mount
- container_nic_routed
- container_syscall_intercept_mount_fuse
- container_disk_ceph
- virtual-machines
- image_profiles
- clustering_architecture
- resources_disk_id
- storage_lvm_stripes
- vm_boot_priority
- unix_hotplug_devices
- api_filtering
- instance_nic_network
- clustering_sizing
- firewall_driver
- projects_limits
- container_syscall_intercept_hugetlbfs
- limits_hugepages
- container_nic_routed_gateway
- projects_restrictions
- custom_volume_snapshot_expiry
- volume_snapshot_scheduling
- trust_ca_certificates
- snapshot_disk_usage
- clustering_edit_roles
- container_nic_routed_host_address
- container_nic_ipvlan_gateway
- resources_usb_pci
- resources_cpu_threads_numa
- resources_cpu_core_die
- api_os
- resources_system
- usedby_consistency
- resources_gpu_mdev
- console_vga_type
- projects_limits_disk
- storage_rsync_compression
- gpu_mdev
- resources_pci_iommu
- resources_network_usb
- resources_disk_address
- network_state_vlan
- gpu_sriov
api_status: stable
api_version: "1.0"
auth: trusted
public: false
auth_methods:
- tls
environment:
addresses: []
architectures:
- x86_64
- i686
certificate: |
<DELETED>
certificate_fingerprint: f5d454ee473d66705915fd62c7777aa76dbab700d253f7b31fe0be7e888dcd66
driver: qemu | lxc
driver_version: 4.2.1 | 4.0.2
firewall: xtables
kernel: Linux
kernel_architecture: x86_64
kernel_features:
netnsid_getifaddrs: "true"
seccomp_listener: "true"
seccomp_listener_continue: "true"
shiftfs: "true"
uevent_injection: "true"
unpriv_fscaps: "true"
kernel_version: 5.8.0-43-generic
lxc_features:
cgroup2: "true"
devpts_fd: "false"
mount_injection_file: "true"
network_gateway_device_route: "true"
network_ipvlan: "true"
network_l2proxy: "true"
network_phys_macvlan_mtu: "true"
network_veth_router: "true"
pidfd: "false"
seccomp_allow_deny_syntax: "false"
seccomp_notify: "true"
seccomp_proxy_send_notify_fd: "false"
os_name: Ubuntu
os_version: "20.04"
project: default
server: lxd
server_clustered: false
server_name: ubuntu-s-1vcpu-2gb-ams3-01
server_pid: 703
server_version: 4.0.5
storage: dir
storage_version: "1"
Parent container:
root@ubuntu-s-1vcpu-2gb-ams3-01:~# lxc profile show default
config:
security.nesting: "true"
description: Default LXD profile
devices:
eth0:
name: eth0
network: lxdbr0
type: nic
root:
path: /
pool: default
type: disk
name: default
used_by:
- /1.0/instances/cnt1
root@ubuntu-s-1vcpu-2gb-ams3-01:~# lxc config show cnt1 --expanded
architecture: x86_64
config:
image.architecture: amd64
image.description: ubuntu 20.04 LTS amd64 (release) (20210201)
image.label: release
image.os: ubuntu
image.release: focal
image.serial: "20210201"
image.type: squashfs
image.version: "20.04"
security.nesting: "true"
volatile.base_image: d1df9c150a9fd265ba93a00fe062757bd34d9c0daa076063f59204f0e3bf2629
volatile.eth0.host_name: veth0a9a0510
volatile.eth0.hwaddr: 00:16:3e:50:97:02
volatile.idmap.base: "0"
volatile.idmap.current: '[{"Isuid":true,"Isgid":false,"Hostid":100000,"Nsid":0,"Maprange":65536},{"Isuid":false,"Isgid":true,"Hostid":100000,"Nsid":0,"Maprange":65536}]'
volatile.idmap.next: '[{"Isuid":true,"Isgid":false,"Hostid":100000,"Nsid":0,"Maprange":65536},{"Isuid":false,"Isgid":true,"Hostid":100000,"Nsid":0,"Maprange":65536}]'
volatile.last_state.idmap: '[]'
volatile.last_state.power: RUNNING
volatile.uuid: c2ea40d7-55a6-4b0f-bb9a-d415d1df6ba2
devices:
eth0:
name: eth0
network: lxdbr0
type: nic
root:
path: /
pool: default
type: disk
ephemeral: false
profiles:
- default
stateful: false
description: ""
Inside cnt1:
config: {}
api_extensions:
- storage_zfs_remove_snapshots
- container_host_shutdown_timeout
- container_stop_priority
- container_syscall_filtering
- auth_pki
- container_last_used_at
- etag
- patch
- usb_devices
- https_allowed_credentials
- image_compression_algorithm
- directory_manipulation
- container_cpu_time
- storage_zfs_use_refquota
- storage_lvm_mount_options
- network
- profile_usedby
- container_push
- container_exec_recording
- certificate_update
- container_exec_signal_handling
- gpu_devices
- container_image_properties
- migration_progress
- id_map
- network_firewall_filtering
- network_routes
- storage
- file_delete
- file_append
- network_dhcp_expiry
- storage_lvm_vg_rename
- storage_lvm_thinpool_rename
- network_vlan
- image_create_aliases
- container_stateless_copy
- container_only_migration
- storage_zfs_clone_copy
- unix_device_rename
- storage_lvm_use_thinpool
- storage_rsync_bwlimit
- network_vxlan_interface
- storage_btrfs_mount_options
- entity_description
- image_force_refresh
- storage_lvm_lv_resizing
- id_map_base
- file_symlinks
- container_push_target
- network_vlan_physical
- storage_images_delete
- container_edit_metadata
- container_snapshot_stateful_migration
- storage_driver_ceph
- storage_ceph_user_name
- resource_limits
- storage_volatile_initial_source
- storage_ceph_force_osd_reuse
- storage_block_filesystem_btrfs
- resources
- kernel_limits
- storage_api_volume_rename
- macaroon_authentication
- network_sriov
- console
- restrict_devlxd
- migration_pre_copy
- infiniband
- maas_network
- devlxd_events
- proxy
- network_dhcp_gateway
- file_get_symlink
- network_leases
- unix_device_hotplug
- storage_api_local_volume_handling
- operation_description
- clustering
- event_lifecycle
- storage_api_remote_volume_handling
- nvidia_runtime
- container_mount_propagation
- container_backup
- devlxd_images
- container_local_cross_pool_handling
- proxy_unix
- proxy_udp
- clustering_join
- proxy_tcp_udp_multi_port_handling
- network_state
- proxy_unix_dac_properties
- container_protection_delete
- unix_priv_drop
- pprof_http
- proxy_haproxy_protocol
- network_hwaddr
- proxy_nat
- network_nat_order
- container_full
- candid_authentication
- backup_compression
- candid_config
- nvidia_runtime_config
- storage_api_volume_snapshots
- storage_unmapped
- projects
- candid_config_key
- network_vxlan_ttl
- container_incremental_copy
- usb_optional_vendorid
- snapshot_scheduling
- container_copy_project
- clustering_server_address
- clustering_image_replication
- container_protection_shift
- snapshot_expiry
- container_backup_override_pool
- snapshot_expiry_creation
- network_leases_location
- resources_cpu_socket
- resources_gpu
- resources_numa
- kernel_features
- id_map_current
- event_location
- storage_api_remote_volume_snapshots
- network_nat_address
- container_nic_routes
- rbac
- cluster_internal_copy
- seccomp_notify
- lxc_features
- container_nic_ipvlan
- network_vlan_sriov
- storage_cephfs
- container_nic_ipfilter
- resources_v2
- container_exec_user_group_cwd
- container_syscall_intercept
- container_disk_shift
- storage_shifted
- resources_infiniband
- daemon_storage
- instances
- image_types
- resources_disk_sata
- clustering_roles
- images_expiry
- resources_network_firmware
- backup_compression_algorithm
- ceph_data_pool_name
- container_syscall_intercept_mount
- compression_squashfs
- container_raw_mount
- container_nic_routed
- container_syscall_intercept_mount_fuse
- container_disk_ceph
- virtual-machines
- image_profiles
- clustering_architecture
- resources_disk_id
- storage_lvm_stripes
- vm_boot_priority
- unix_hotplug_devices
- api_filtering
- instance_nic_network
- clustering_sizing
- firewall_driver
- projects_limits
- container_syscall_intercept_hugetlbfs
- limits_hugepages
- container_nic_routed_gateway
- projects_restrictions
- custom_volume_snapshot_expiry
- volume_snapshot_scheduling
- trust_ca_certificates
- snapshot_disk_usage
- clustering_edit_roles
- container_nic_routed_host_address
- container_nic_ipvlan_gateway
- resources_usb_pci
- resources_cpu_threads_numa
- resources_cpu_core_die
- api_os
- resources_system
- usedby_consistency
- resources_gpu_mdev
- console_vga_type
- projects_limits_disk
- storage_rsync_compression
- gpu_mdev
- resources_pci_iommu
- resources_network_usb
- resources_disk_address
- network_state_vlan
- gpu_sriov
api_status: stable
api_version: "1.0"
auth: trusted
public: false
auth_methods:
- tls
environment:
addresses: []
architectures:
- x86_64
- i686
certificate: |
<DELETED>
certificate_fingerprint: d1230bd32787d3bf16b05471c277f37d51119c2e5c5d05a29db7b8a3acb6a16f
driver: lxc
driver_version: 4.0.2
firewall: xtables
kernel: Linux
kernel_architecture: x86_64
kernel_features:
netnsid_getifaddrs: "true"
seccomp_listener: "true"
seccomp_listener_continue: "true"
shiftfs: "true"
uevent_injection: "true"
unpriv_fscaps: "false"
kernel_version: 5.8.0-43-generic
lxc_features:
cgroup2: "true"
devpts_fd: "false"
mount_injection_file: "true"
network_gateway_device_route: "true"
network_ipvlan: "true"
network_l2proxy: "true"
network_phys_macvlan_mtu: "true"
network_veth_router: "true"
pidfd: "false"
seccomp_allow_deny_syntax: "false"
seccomp_notify: "true"
seccomp_proxy_send_notify_fd: "false"
os_name: Ubuntu
os_version: "20.04"
project: default
server: lxd
server_clustered: false
server_name: cnt1
server_pid: 2964
server_version: 4.0.5
storage: dir
storage_version: "1"
And default profile for nested containers:
root@cnt1:~# lxc profile show default
config:
security.nesting: "true"
security.privileged: "true"
description: Default LXD profile
devices:
eth0:
name: eth0
network: lxdbr0
type: nic
root:
path: /
pool: default
type: disk
name: default
used_by: []
And allowed subuid/subgid:
root@cnt1:~# ls -1 /etc/sub??? | xargs -n 1 -t cat
cat /etc/subgid
ubuntu:100000:65536
lxd:165536:65536
root:165536:65536
cat /etc/subuid
ubuntu:100000:65536
lxd:165536:65536
root:165536:65536
There is definitely something, that I missed. The main question - what? Why basic operation of unpacking goes totally wrong? 0_o