Delete fails with directory not empty error. LXD 3.0.3


(Silentphantom62) #1
$ lxc delete c1
Error: remove /var/lib/lxd/storage-pools/data/containers/c1: directory not empty

When I head over to that path it has a backup.yaml file. once I delete it I am able to delete the container.

root@lxdhost:/var/lib/lxd/storage-pools/data/containers/c1# ls -la
total 12
drwx--x--x 2 root root 4096 Mar  5 14:02 .
drwx--x--x 3 root root 4096 Mar  5 14:05 ..
-r-------- 1 root root 2480 Mar  5 14:02 backup.yaml

Why does this happen?

$ lxc info --show-log c1
Name: c1
Remote: unix://
Architecture: x86_64
Created: 2019/03/05 18:53 UTC
Status: Stopped
Type: persistent
Profiles: default

Log:

lxc c1 20190305185404.738 WARN     conf - conf.c:lxc_setup_devpts:1616 - Invalid argument - Failed to unmount old devpts instance
lxc c1 20190305185954.319 WARN     network - network.c:lxc_delete_network_priv:2589 - Operation not permitted - Failed to remove interface "eth0" with index 43

This is the host setup:

$ lxd --version
3.0.3

$ uname -r
4.15.0-46-generic

$ cat /etc/lsb-release
DISTRIB_ID=Ubuntu
DISTRIB_RELEASE=18.04
DISTRIB_CODENAME=bionic
DISTRIB_DESCRIPTION="Ubuntu 18.04.2 LTS"

LXC monitor log:

 lxc monitor
metadata:
  context: {}
  level: dbug
  message: 'New event listener: 1ba6972a-0982-45c8-93bd-aa42a5d5f063'
timestamp: "2019-03-05T11:35:55.69848348-08:00"
type: logging


metadata:
  context:
    ip: '@'
    method: GET
    url: /1.0
  level: dbug
  message: handling
timestamp: "2019-03-05T11:35:59.835248242-08:00"
type: logging


metadata:
  context:
    ip: '@'
    method: GET
    url: /1.0/containers/c1
  level: dbug
  message: handling
timestamp: "2019-03-05T11:35:59.839415334-08:00"
type: logging


metadata:
  context: {}
  level: dbug
  message: 'New event listener: b18bcb92-e704-4e60-8c56-f9acf34bb711'
timestamp: "2019-03-05T11:35:59.84578629-08:00"
type: logging


metadata:
  context:
    ip: '@'
    method: GET
    url: /1.0/events?project=
  level: dbug
  message: handling
timestamp: "2019-03-05T11:35:59.845636004-08:00"
type: logging


metadata:
  context:
    ip: '@'
    method: DELETE
    url: /1.0/containers/c1
  level: dbug
  message: handling
timestamp: "2019-03-05T11:35:59.847069719-08:00"
type: logging


metadata:
  context: {}
  level: dbug
  message: 'New task operation: bfbd9793-54f7-49f6-b0dd-eaee65c6cf12'
timestamp: "2019-03-05T11:35:59.852620721-08:00"
type: logging


metadata:
  context: {}
  level: dbug
  message: 'Started task operation: bfbd9793-54f7-49f6-b0dd-eaee65c6cf12'
timestamp: "2019-03-05T11:35:59.852732104-08:00"
type: logging


metadata:
  class: task
  created_at: "2019-03-05T11:35:59.850553435-08:00"
  description: Deleting container
  err: ""
  id: bfbd9793-54f7-49f6-b0dd-eaee65c6cf12
  may_cancel: false
  metadata: null
  resources:
    containers:
    - /1.0/containers/c1
  status: Pending
  status_code: 105
  updated_at: "2019-03-05T11:35:59.850553435-08:00"
timestamp: "2019-03-05T11:35:59.852672432-08:00"
type: operation


metadata:
  context:
    created: 2019-03-05 10:53:54 -0800 PST
    ephemeral: "false"
    name: c1
    used: 2019-03-05 10:54:04.42581804 -0800 PST
  level: info
  message: Deleting container
timestamp: "2019-03-05T11:35:59.852911648-08:00"
type: logging


metadata:
  class: task
  created_at: "2019-03-05T11:35:59.850553435-08:00"
  description: Deleting container
  err: ""
  id: bfbd9793-54f7-49f6-b0dd-eaee65c6cf12
  may_cancel: false
  metadata: null
  resources:
    containers:
    - /1.0/containers/c1
  status: Running
  status_code: 103
  updated_at: "2019-03-05T11:35:59.850553435-08:00"
timestamp: "2019-03-05T11:35:59.852755874-08:00"
type: operation


metadata:
  context:
    ip: '@'
    method: GET
    url: /1.0/operations/bfbd9793-54f7-49f6-b0dd-eaee65c6cf12
  level: dbug
  message: handling
timestamp: "2019-03-05T11:35:59.854309488-08:00"
type: logging


metadata:
  context: {}
  level: dbug
  message: Deleting ZFS storage volume for container "c1" on storage
    pool "data"
timestamp: "2019-03-05T11:35:59.862185234-08:00"
type: logging


metadata:
  context:
    err: 'remove /var/lib/lxd/storage-pools/data/containers/c1:
      directory not empty'
    name: c1
  level: eror
  message: Failed deleting container storage
timestamp: "2019-03-05T11:35:59.867088703-08:00"
type: logging


metadata:
  class: task
  created_at: "2019-03-05T11:35:59.850553435-08:00"
  description: Deleting container
  err: 'remove /var/lib/lxd/storage-pools/data/containers/c1: directory
    not empty'
  id: bfbd9793-54f7-49f6-b0dd-eaee65c6cf12
  may_cancel: false
  metadata: null
  resources:
    containers:
    - /1.0/containers/c1
  status: Failure
  status_code: 400
  updated_at: "2019-03-05T11:35:59.850553435-08:00"
timestamp: "2019-03-05T11:35:59.867219824-08:00"
type: operation


metadata:
  context: {}
  level: dbug
  message: 'Failure for task operation: bfbd9793-54f7-49f6-b0dd-eaee65c6cf12: remove
    /var/lib/lxd/storage-pools/data/containers/c1: directory not
    empty'
timestamp: "2019-03-05T11:35:59.867167805-08:00"
type: logging


^C

@stgraber I am unable to reproduce this at will, but often when we run our automated tests this comes up. Any inputs to help debug this?


(Silentphantom62) #3

@stgraber It happened again. Any direction from you will be much appreciated. I’m having to hold on rolling LXD 3.0.3 to our Prod setup because of this.


(Silentphantom62) #4

Opened an issue as well. https://github.com/lxc/lxd/issues/5553


#5

it seems you are deleting a container immediately after creating it (from the log dates)
How about letting it cool a bit ? maybe there is still something open in the system after the container is officially stopped. Stop the container / wait 10 s / delete the container. Maybe it’s worth a try.


(Silentphantom62) #6

@gpatel-fr that’s true. I create about 20 containers and then delete them all one after another once the tests finish in the cleanup. Probably it tries to delete the last container that was created first. I run a test suite every time I deploy a change and I create/destroy about 25 containers back to back. This helps me ascertain that I am not pushing any new change / middleware change which will break in production.

I have raised an issue and @stgraber he has already added a commit to fix this issue.
https://github.com/lxc/lxd/issues/5553