Best method to mount a big dataset to privileged container?

Hello,

Ubuntu 16.04
LXD 3.5
ZFS 0.7.9
privileged container

I need to mount almost 30T of small files to container.
Zfs dataset is mounted on host.

Add dataset as disk to container:
lxc config device add <container> <dev> disk path=<> source=<>
and starting one.

It starts for days and fall out with:
Error: Put http://unix.socket/1.0/containers/<name>/state: EOF

I changed snap refresh.time, but the same, container do not start.

What is preferred method to pass to container such big volume of small files?

Thanks.

That should be fine, the error you’re getting is very likely unrelated.
What do you have in /var/snap/lxd/common/lxd/logs/lxd.log?

Hi,

When start fails, I don’t see any errors in log.
Today was another fail because of version upgrade I suppose.

3.4

lvl=info msg="LXD 3.4 is starting in normal mode" path=/var/snap/lxd/common/lxd t=2018-09-12T21:04:23+0300
lvl=info msg="Kernel uid/gid map:" t=2018-09-12T21:04:23+0300
lvl=info msg=" - u 0 0 4294967295" t=2018-09-12T21:04:23+0300
lvl=info msg=" - g 0 0 4294967295" t=2018-09-12T21:04:23+0300
lvl=info msg="Configured LXD uid/gid map:" t=2018-09-12T21:04:23+0300
lvl=info msg=" - u 0 1000000 1000000000" t=2018-09-12T21:04:23+0300
lvl=info msg=" - g 0 1000000 1000000000" t=2018-09-12T21:04:23+0300
lvl=warn msg="CGroup memory swap accounting is disabled, swap limits will be ignored." t=2018-09-12T21:04:23+0300
lvl=info msg="Initializing local database" t=2018-09-12T21:04:23+0300
lvl=info msg="Initializing database gateway" t=2018-09-12T21:04:23+0300
address= id=1 lvl=info msg="Start database node" t=2018-09-12T21:04:23+0300
lvl=info msg="Raft: Restored from snapshot 1-219718-1534595722924" t=2018-09-12T21:04:23+0300
lvl=info msg="Raft: Initial configuration (index=1): [{Suffrage:Voter ID:1 Address:0}]" t=2018-09-12T21:04:23+0300
lvl=info msg="Raft: Node at 0 [Leader] entering Leader state" t=2018-09-12T21:04:23+0300
lvl=info msg="Dqlite: starting event loop" t=2018-09-12T21:04:23+0300
lvl=info msg="LXD isn't socket activated" t=2018-09-12T21:04:23+0300
lvl=info msg="Starting /dev/lxd handler:" t=2018-09-12T21:04:23+0300
lvl=info msg=" - binding devlxd socket" socket=/var/snap/lxd/common/lxd/devlxd/sock t=2018-09-12T21:04:23+0300
lvl=info msg="REST API daemon:" t=2018-09-12T21:04:23+0300
lvl=info msg=" - binding Unix socket" socket=/var/snap/lxd/common/lxd/unix.socket t=2018-09-12T21:04:23+0300
lvl=info msg=" - binding TCP socket" socket=[::]:8443 t=2018-09-12T21:04:23+0300
lvl=info msg="Initializing global database" t=2018-09-12T21:04:23+0300
lvl=info msg="Dqlite: handling new connection (fd=21)" t=2018-09-12T21:04:23+0300
lvl=info msg="Dqlite: connected address=0 attempt=0" t=2018-09-12T21:04:23+0300
lvl=info msg="Initializing storage pools" t=2018-09-12T21:04:23+0300
lvl=info msg="Initializing networks" t=2018-09-12T21:04:23+0300
lvl=info msg="Pruning leftover image files" t=2018-09-12T21:04:23+0300
lvl=info msg="Done pruning leftover image files" t=2018-09-12T21:04:23+0300
lvl=info msg="Loading configuration" t=2018-09-12T21:04:23+0300
lvl=info msg="Connected to MAAS controller" t=2018-09-12T21:04:23+0300
lvl=info msg="Pruning expired images" t=2018-09-12T21:04:23+0300
lvl=info msg="Done pruning expired images" t=2018-09-12T21:04:23+0300
lvl=info msg="Expiring log files" t=2018-09-12T21:04:23+0300
lvl=info msg="Updating instance types" t=2018-09-12T21:04:23+0300
lvl=info msg="Updating images" t=2018-09-12T21:04:23+0300
lvl=info msg="Done updating images" t=2018-09-12T21:04:23+0300
lvl=info msg="Done expiring log files" t=2018-09-12T21:04:23+0300
lvl=warn msg="Unable to update backup.yaml at this time" name=prometheus-2 t=2018-09-12T21:04:23+0300
lvl=info msg="Done updating instance types" t=2018-09-12T21:04:27+0300
lvl=info msg="Updating images" t=2018-09-13T03:04:23+0300
lvl=info msg="Done updating images" t=2018-09-13T03:04:23+0300
lvl=info msg="Updating images" t=2018-09-13T09:04:23+0300
lvl=info msg="Done updating images" t=2018-09-13T09:04:23+0300
lvl=info msg="Updating images" t=2018-09-13T15:04:23+0300
lvl=info msg="Done updating images" t=2018-09-13T15:04:23+0300
lvl=info msg="Pruning expired images" t=2018-09-13T21:04:23+0300
lvl=info msg="Done pruning expired images" t=2018-09-13T21:04:23+0300
lvl=info msg="Expiring log files" t=2018-09-13T21:04:23+0300
lvl=info msg="Done expiring log files" t=2018-09-13T21:04:23+0300
lvl=info msg="Updating images" t=2018-09-13T21:04:23+0300
lvl=info msg="Done updating images" t=2018-09-13T21:04:23+0300
lvl=info msg="Updating instance types" t=2018-09-13T21:04:27+0300
lvl=info msg="Done updating instance types" t=2018-09-13T21:04:29+0300
lvl=info msg="Updating images" t=2018-09-14T03:04:23+0300
lvl=info msg="Done updating images" t=2018-09-14T03:04:23+0300
lvl=info msg="Updating images" t=2018-09-14T09:04:23+0300
lvl=info msg="Done updating images" t=2018-09-14T09:04:23+0300
lvl=info msg="Updating images" t=2018-09-14T15:04:23+0300
lvl=info msg="Done updating images" t=2018-09-14T15:04:23+0300
lvl=info msg="Pruning expired images" t=2018-09-14T21:04:23+0300
lvl=info msg="Done pruning expired images" t=2018-09-14T21:04:23+0300
lvl=info msg="Expiring log files" t=2018-09-14T21:04:23+0300
lvl=info msg="Done expiring log files" t=2018-09-14T21:04:23+0300
lvl=info msg="Updating images" t=2018-09-14T21:04:23+0300
lvl=info msg="Done updating images" t=2018-09-14T21:04:23+0300
lvl=info msg="Updating instance types" t=2018-09-14T21:04:29+0300
lvl=info msg="Done updating instance types" t=2018-09-14T21:04:32+0300
lvl=info msg="Updating images" t=2018-09-15T03:04:23+0300
lvl=info msg="Done updating images" t=2018-09-15T03:04:23+0300
lvl=info msg="Updating images" t=2018-09-15T09:04:23+0300
lvl=info msg="Done updating images" t=2018-09-15T09:04:23+0300
lvl=info msg="Updating images" t=2018-09-15T15:04:23+0300
lvl=info msg="Done updating images" t=2018-09-15T15:04:23+0300
lvl=info msg="Pruning expired images" t=2018-09-15T21:04:23+0300
lvl=info msg="Done pruning expired images" t=2018-09-15T21:04:23+0300
lvl=info msg="Expiring log files" t=2018-09-15T21:04:23+0300
lvl=info msg="Done expiring log files" t=2018-09-15T21:04:23+0300
lvl=info msg="Updating images" t=2018-09-15T21:04:23+0300
lvl=info msg="Done updating images" t=2018-09-15T21:04:23+0300
lvl=info msg="Updating instance types" t=2018-09-15T21:04:32+0300
lvl=info msg="Done updating instance types" t=2018-09-15T21:04:34+0300
lvl=info msg="Updating images" t=2018-09-16T03:04:23+0300
lvl=info msg="Done updating images" t=2018-09-16T03:04:23+0300
lvl=info msg="Updating images" t=2018-09-16T09:04:23+0300
lvl=info msg="Done updating images" t=2018-09-16T09:04:23+0300
lvl=info msg="Updating images" t=2018-09-16T15:04:23+0300
lvl=info msg="Done updating images" t=2018-09-16T15:04:23+0300
lvl=info msg="Pruning expired images" t=2018-09-16T21:04:23+0300
lvl=info msg="Done pruning expired images" t=2018-09-16T21:04:23+0300
lvl=info msg="Expiring log files" t=2018-09-16T21:04:23+0300
lvl=info msg="Done expiring log files" t=2018-09-16T21:04:23+0300
lvl=info msg="Updating images" t=2018-09-16T21:04:23+0300
lvl=info msg="Done updating images" t=2018-09-16T21:04:23+0300
lvl=info msg="Updating instance types" t=2018-09-16T21:04:34+0300
lvl=info msg="Done updating instance types" t=2018-09-16T21:04:37+0300
lvl=info msg="Updating images" t=2018-09-17T03:04:23+0300
lvl=info msg="Done updating images" t=2018-09-17T03:04:23+0300
lvl=info msg="Updating images" t=2018-09-17T09:04:23+0300
lvl=info msg="Done updating images" t=2018-09-17T09:04:23+0300

3.5

lvl=info msg="LXD 3.5 is starting in normal mode" path=/var/snap/lxd/common/lxd t=2018-09-17T14:19:22+0300
lvl=info msg="Kernel uid/gid map:" t=2018-09-17T14:19:22+0300
lvl=info msg=" - u 0 0 4294967295" t=2018-09-17T14:19:22+0300
lvl=info msg=" - g 0 0 4294967295" t=2018-09-17T14:19:22+0300
lvl=info msg="Configured LXD uid/gid map:" t=2018-09-17T14:19:22+0300
lvl=info msg=" - u 0 1000000 1000000000" t=2018-09-17T14:19:22+0300
lvl=info msg=" - g 0 1000000 1000000000" t=2018-09-17T14:19:22+0300
lvl=warn msg="CGroup memory swap accounting is disabled, swap limits will be ignored." t=2018-09-17T14:19:22+0300
lvl=info msg="Initializing local database" t=2018-09-17T14:19:22+0300
lvl=info msg="Initializing database gateway" t=2018-09-17T14:19:22+0300
address= id=1 lvl=info msg="Start database node" t=2018-09-17T14:19:22+0300
lvl=info msg="Raft: Restored from snapshot 1-219718-1534595722924" t=2018-09-17T14:19:22+0300
lvl=info msg="Raft: Initial configuration (index=1): [{Suffrage:Voter ID:1 Address:0}]" t=2018-09-17T14:19:22+0300
lvl=info msg="Raft: Node at 0 [Leader] entering Leader state" t=2018-09-17T14:19:22+0300
lvl=info msg="Dqlite: starting event loop" t=2018-09-17T14:19:22+0300
lvl=info msg="LXD isn't socket activated" t=2018-09-17T14:19:22+0300
lvl=info msg="Starting /dev/lxd handler:" t=2018-09-17T14:19:22+0300
lvl=info msg=" - binding devlxd socket" socket=/var/snap/lxd/common/lxd/devlxd/sock t=2018-09-17T14:19:22+0300
lvl=info msg="REST API daemon:" t=2018-09-17T14:19:22+0300
lvl=info msg=" - binding Unix socket" socket=/var/snap/lxd/common/lxd/unix.socket t=2018-09-17T14:19:22+0300
lvl=info msg=" - binding TCP socket" socket=[::]:8443 t=2018-09-17T14:19:22+0300
lvl=info msg="Initializing global database" t=2018-09-17T14:19:22+0300
lvl=info msg="Dqlite: handling new connection (fd=21)" t=2018-09-17T14:19:22+0300
lvl=info msg="Dqlite: connected address=0 attempt=0" t=2018-09-17T14:19:22+0300
lvl=info msg="Updating the LXD global schema. Backup made as \"global.bak\"" t=2018-09-17T14:19:22+0300
lvl=info msg="Initializing storage pools" t=2018-09-17T14:19:22+0300
lvl=info msg="Applying patch: move_backups" t=2018-09-17T14:19:22+0300
lvl=info msg="Initializing networks" t=2018-09-17T14:19:22+0300
lvl=info msg="Pruning leftover image files" t=2018-09-17T14:19:22+0300
lvl=info msg="Done pruning leftover image files" t=2018-09-17T14:19:22+0300
lvl=info msg="Loading configuration" t=2018-09-17T14:19:22+0300
lvl=info msg="Connected to MAAS controller" t=2018-09-17T14:19:22+0300
lvl=info msg="Pruning expired images" t=2018-09-17T14:19:22+0300
lvl=info msg="Done pruning expired images" t=2018-09-17T14:19:22+0300
lvl=info msg="Expiring log files" t=2018-09-17T14:19:22+0300
lvl=info msg="Updating instance types" t=2018-09-17T14:19:22+0300
lvl=info msg="Done expiring log files" t=2018-09-17T14:19:22+0300
lvl=info msg="Updating images" t=2018-09-17T14:19:22+0300
lvl=info msg="Done updating images" t=2018-09-17T14:19:22+0300
lvl=warn msg="Unable to update backup.yaml at this time" name=prometheus-2 t=2018-09-17T14:19:22+0300
lvl=info msg="Done updating instance types" t=2018-09-17T14:19:25+0300

Now I start the container again and will wait for days to finish, maybe.

Can you paste journalctl -u snap.lxd.daemon -u 3000?

Ah, didn’t look at journal. Shorten the same lines.

Aug 27 18:34:07 server systemd[1]: Started Service for snap application lxd.daemon.
Aug 27 18:34:08 server lxd.daemon[16964]: => Preparing the system
Aug 27 18:34:08 server lxd.daemon[16964]: ==> Loading snap configuration
Aug 27 18:34:08 server lxd.daemon[16964]: ==> Setting up mntns symlink (mnt:[4026533001])
Aug 27 18:34:08 server lxd.daemon[16964]: ==> Setting up persistent shmounts path
Aug 27 18:34:08 server lxd.daemon[16964]: ====> Making LXD shmounts use the persistent path
Aug 27 18:34:08 server lxd.daemon[16964]: ====> Making LXCFS use the persistent path
Aug 27 18:34:08 server lxd.daemon[16964]: ==> Setting up kmod wrapper
Aug 27 18:34:08 server lxd.daemon[16964]: ==> Preparing /boot
Aug 27 18:34:09 server lxd.daemon[16964]: ==> Preparing a clean copy of /run
Aug 27 18:34:09 server lxd.daemon[16964]: ==> Preparing a clean copy of /etc
Aug 27 18:34:09 server lxd.daemon[16964]: ==> Setting up ceph configuration
Aug 27 18:34:09 server lxd.daemon[16964]: ==> Setting up LVM configuration
Aug 27 18:34:09 server lxd.daemon[16964]: ==> Rotating logs
Aug 27 18:34:09 server lxd.daemon[16964]: ==> Setting up ZFS (0.7)
Aug 27 18:34:09 server lxd.daemon[16964]: ==> Escaping the systemd cgroups
Aug 27 18:34:09 server lxd.daemon[16964]: ==> Escaping the systemd process resource limits
Aug 27 18:34:09 server lxd.daemon[16964]: => Starting LXCFS
Aug 27 18:34:09 server lxd.daemon[16964]: => Starting LXD
Aug 27 18:34:09 server lxd.daemon[16964]: mount namespace: 5
Aug 27 18:34:09 server lxd.daemon[16964]: hierarchies:
Aug 27 18:34:09 server lxd.daemon[16964]:   0: fd:   6: memory
Aug 27 18:34:09 server lxd.daemon[16964]:   1: fd:   7: hugetlb
Aug 27 18:34:09 server lxd.daemon[16964]:   2: fd:   8: cpuset
Aug 27 18:34:09 server lxd.daemon[16964]:   3: fd:   9: devices
Aug 27 18:34:09 server lxd.daemon[16964]:   4: fd:  10: perf_event
Aug 27 18:34:09 server lxd.daemon[16964]:   5: fd:  11: cpu,cpuacct
Aug 27 18:34:09 server lxd.daemon[16964]:   6: fd:  12: net_cls,net_prio
Aug 27 18:34:09 server lxd.daemon[16964]:   7: fd:  13: freezer
Aug 27 18:34:09 server lxd.daemon[16964]:   8: fd:  14: pids
Aug 27 18:34:09 server lxd.daemon[16964]:   9: fd:  15: blkio
Aug 27 18:34:09 server lxd.daemon[16964]:  10: fd:  16: name=systemd
Aug 27 18:34:10 server lxd.daemon[16964]: lvl=warn msg="CGroup memory swap accounting is disabled, swap limits will be ignored." t=2018-08-27T18:34:10+0300
Aug 27 18:34:12 server lxd.daemon[16964]: lvl=warn msg="Unable to update backup.yaml at this time" name=prometheus-2 t=2018-08-27T18:34:12+0300
Aug 27 18:34:13 server lxd.daemon[16964]: => LXD is ready
Sep 03 18:34:33 server lxd.daemon[16964]: lvl=warn msg="Failed to update instance types: Get https://images.linuxcontainers.org/meta/instance-types/.yaml: lookup images.linuxcontainers.org on
Sep 10 11:24:00 server lxd.daemon[16964]: lvl=warn msg="Unable to update backup.yaml at this time" name=prometheus-2 t=2018-09-10T11:24:00+0300
Sep 10 11:26:41 server lxd.daemon[16964]: lvl=warn msg="Unable to update backup.yaml at this time" name=prometheus-2 t=2018-09-10T11:26:41+0300
Sep 10 11:27:17 server lxd.daemon[16964]: lvl=warn msg="Unable to update backup.yaml at this time" name=prometheus-2 t=2018-09-10T11:27:17+0300
Sep 11 09:49:09 server systemd[1]: Stopping Service for snap application lxd.daemon...
Sep 11 09:49:10 server lxd.daemon[8893]: => Stop reason is: snap refresh
Sep 11 09:49:10 server lxd.daemon[8893]: => Stopping LXD
Sep 11 09:49:10 server lxd.daemon[16964]: => LXD exited cleanly
Sep 11 09:49:11 server systemd[1]: Stopped Service for snap application lxd.daemon.
Sep 11 09:49:21 server systemd[1]: Started Service for snap application lxd.daemon.
Sep 11 09:49:21 server lxd.daemon[10636]: => Preparing the system
Sep 11 09:49:21 server lxd.daemon[10636]: ==> Loading snap configuration
Sep 11 09:49:21 server lxd.daemon[10636]: ==> Setting up mntns symlink (mnt:[4026532809])
Sep 11 09:49:21 server lxd.daemon[10636]: ==> Setting up persistent shmounts path
Sep 11 09:49:22 server lxd.daemon[10636]: ====> Making LXD shmounts use the persistent path
Sep 11 09:49:22 server lxd.daemon[10636]: ==> Setting up kmod wrapper
Sep 11 09:49:22 server lxd.daemon[10636]: ==> Preparing /boot
Sep 11 09:49:22 server lxd.daemon[10636]: ==> Preparing a clean copy of /run
Sep 11 09:49:22 server lxd.daemon[10636]: ==> Preparing a clean copy of /etc
Sep 11 09:49:22 server lxd.daemon[10636]: ==> Setting up ceph configuration
Sep 11 09:49:22 server lxd.daemon[10636]: ==> Setting up LVM configuration
Sep 11 09:49:23 server lxd.daemon[10636]: ==> Rotating logs
Sep 11 09:49:23 server lxd.daemon[10636]: ==> Setting up ZFS (0.7)
Sep 11 09:49:23 server lxd.daemon[10636]: ==> Escaping the systemd cgroups
Sep 11 09:49:23 server lxd.daemon[10636]: ==> Escaping the systemd process resource limits
Sep 11 09:49:23 server lxd.daemon[10636]: => Re-using existing LXCFS
Sep 11 09:49:23 server lxd.daemon[10636]: => Starting LXD
Sep 11 09:49:24 server lxd.daemon[10636]: lvl=warn msg="CGroup memory swap accounting is disabled, swap limits will be ignored." t=2018-09-11T09:49:24+0300
Sep 11 09:49:24 server lxd.daemon[10636]: ERROR: can't write page 5 of file db.bin which has only 3 pages
Sep 11 09:49:24 server lxd.daemon[10636]: panic: checkpoint: disk I/O error
Sep 11 09:49:24 server lxd.daemon[10636]: trace:
Sep 11 09:49:24 server lxd.daemon[10636]: 2018-09-11 09:49:24.26865: fsm: term=1 index=220344 cmd=frames txn=220343 pages=2 commit=1 unregister txn
Sep 11 09:49:24 server lxd.daemon[10636]: 2018-09-11 09:49:24.26867: fsm: term=1 index=220344 cmd=frames txn=220343 pages=2 commit=1 done
Sep 11 09:49:24 server lxd.daemon[10636]: 2018-09-11 09:49:24.26868: fsm: term=1 index=220345 cmd=frames txn=220344 pages=2 commit=1 start
Sep 11 09:49:24 server lxd.daemon[10636]: 2018-09-11 09:49:24.26872: fsm: term=1 index=220345 cmd=frames txn=220344 pages=2 commit=1 unregister txn
Sep 11 09:49:24 server lxd.daemon[10636]: 2018-09-11 09:49:24.26872: fsm: term=1 index=220345 cmd=frames txn=220344 pages=2 commit=1 done
Sep 11 09:49:24 server lxd.daemon[10636]: 2018-09-11 09:49:24.26873: fsm: term=1 index=220346 cmd=frames txn=220345 pages=3 commit=1 start
Sep 11 09:49:24 server lxd.daemon[10636]: 2018-09-11 09:49:24.26878: fsm: term=1 index=220346 cmd=frames txn=220345 pages=3 commit=1 unregister txn
Sep 11 09:49:24 server lxd.daemon[10636]: 2018-09-11 09:49:24.26878: fsm: term=1 index=220346 cmd=frames txn=220345 pages=3 commit=1 done
.................
.................
Sep 11 09:49:24 server lxd.daemon[10636]: 2018-09-11 09:49:24.30382: fsm: term=1 index=220431 cmd=frames txn=220430 pages=2 commit=1 start
Sep 11 09:49:24 server lxd.daemon[10636]: 2018-09-11 09:49:24.30386: fsm: term=1 index=220431 cmd=frames txn=220430 pages=2 commit=1 unregister txn
Sep 11 09:49:24 server lxd.daemon[10636]: 2018-09-11 09:49:24.30386: fsm: term=1 index=220431 cmd=frames txn=220430 pages=2 commit=1 done
Sep 11 09:49:24 server lxd.daemon[10636]: 2018-09-11 09:49:24.30387: fsm: term=1 index=220432 cmd=frames txn=220431 pages=4 commit=1 start
Sep 11 09:49:24 server lxd.daemon[10636]: 2018-09-11 09:49:24.30393: fsm: term=1 index=220432 cmd=frames txn=220431 pages=4 commit=1 unregister txn
Sep 11 09:49:24 server lxd.daemon[10636]: 2018-09-11 09:49:24.30393: fsm: term=1 index=220432 cmd=frames txn=220431 pages=4 commit=1 done
Sep 11 09:49:24 server lxd.daemon[10636]: 2018-09-11 09:49:24.30394: fsm: term=1 index=220433 cmd=checkpoint file=db.bin start
Sep 11 09:49:24 server lxd.daemon[10636]: 2018-09-11 09:49:24.30412: fsm: term=1 index=220433 cmd=checkpoint failed: checkpoint: disk I/O error
Sep 11 09:49:24 server lxd.daemon[10636]: goroutine 70 [running]:
Sep 11 09:49:24 server lxd.daemon[10636]: github.com/CanonicalLtd/go-dqlite/internal/trace.(*Tracer).Panic(0xc00032a280, 0x110e0d1, 0x2, 0xc00080bd68, 0x1, 0x1)
Sep 11 09:49:24 server lxd.daemon[10636]:         /build/lxd/parts/lxd/go/src/github.com/CanonicalLtd/go-dqlite/internal/trace/tracer.go:59 +0x12a
Sep 11 09:49:24 server lxd.daemon[10636]: github.com/CanonicalLtd/go-dqlite/internal/replication.(*FSM).Apply(0xc000370e00, 0xc000374c30, 0x0, 0x0)
Sep 11 09:49:24 server lxd.daemon[10636]:         /build/lxd/parts/lxd/go/src/github.com/CanonicalLtd/go-dqlite/internal/replication/fsm.go:84 +0x136
Sep 11 09:49:24 server lxd.daemon[10636]: github.com/hashicorp/raft.(*Raft).runFSM.func1(0xc0001d69f0)
Sep 11 09:49:24 server lxd.daemon[10636]:         /build/lxd/parts/lxd/go/src/github.com/hashicorp/raft/fsm.go:57 +0x155
Sep 11 09:49:24 server lxd.daemon[10636]: github.com/hashicorp/raft.(*Raft).runFSM(0xc00022eb00)
Sep 11 09:49:24 server lxd.daemon[10636]:         /build/lxd/parts/lxd/go/src/github.com/hashicorp/raft/fsm.go:120 +0x2ef
Sep 11 09:49:24 server lxd.daemon[10636]: github.com/hashicorp/raft.(*Raft).runFSM-fm()
Sep 11 09:49:24 server lxd.daemon[10636]:         /build/lxd/parts/lxd/go/src/github.com/hashicorp/raft/api.go:506 +0x2a
Sep 11 09:49:24 server lxd.daemon[10636]: github.com/hashicorp/raft.(*raftState).goFunc.func1(0xc00022eb00, 0xc000322750)
Sep 11 09:49:24 server lxd.daemon[10636]:         /build/lxd/parts/lxd/go/src/github.com/hashicorp/raft/state.go:146 +0x53
Sep 11 09:49:24 server lxd.daemon[10636]: created by github.com/hashicorp/raft.(*raftState).goFunc
Sep 11 09:49:24 server lxd.daemon[10636]:         /build/lxd/parts/lxd/go/src/github.com/hashicorp/raft/state.go:144 +0x66
Sep 11 09:49:25 server lxd.daemon[10636]: => LXD failed to start
Sep 11 09:49:25 server systemd[1]: snap.lxd.daemon.service: Main process exited, code=exited, status=137/n/a
Sep 11 09:49:25 server lxd.daemon[11689]: => Stop reason is: crashed
Sep 11 09:49:25 server systemd[1]: snap.lxd.daemon.service: Unit entered failed state.
Sep 11 09:49:25 server systemd[1]: snap.lxd.daemon.service: Failed with result 'exit-code'.
Sep 11 09:49:26 server systemd[1]: snap.lxd.daemon.service: Service hold-off time over, scheduling restart.
Sep 11 09:49:26 server systemd[1]: Stopped Service for snap application lxd.daemon.
Sep 11 09:49:26 server systemd[1]: Started Service for snap application lxd.daemon.
Sep 11 09:49:26 server lxd.daemon[11820]: => Preparing the system
Sep 11 09:49:26 server lxd.daemon[11820]: ==> Loading snap configuration
Sep 11 09:49:26 server lxd.daemon[11820]: ==> Setting up mntns symlink (mnt:[4026532809])
Sep 11 09:49:26 server lxd.daemon[11820]: ==> Setting up kmod wrapper
Sep 11 09:49:26 server lxd.daemon[11820]: ==> Preparing /boot
Sep 11 09:49:26 server lxd.daemon[11820]: ==> Preparing a clean copy of /run
Sep 11 09:49:26 server lxd.daemon[11820]: ==> Preparing a clean copy of /etc
Sep 11 09:49:26 server lxd.daemon[11820]: ==> Setting up ceph configuration
Sep 11 09:49:26 server lxd.daemon[11820]: ==> Setting up LVM configuration
Sep 11 09:49:26 server lxd.daemon[11820]: ==> Rotating logs
Sep 11 09:49:26 server lxd.daemon[11820]: ==> Setting up ZFS (0.7)
Sep 11 09:49:26 server lxd.daemon[11820]: ==> Escaping the systemd cgroups
Sep 11 09:49:26 server lxd.daemon[11820]: ==> Escaping the systemd process resource limits
Sep 11 09:49:26 server lxd.daemon[16964]: mount namespace: 7
Sep 11 09:49:26 server lxd.daemon[16964]: hierarchies:
Sep 11 09:49:26 server lxd.daemon[16964]:   0: fd:   8: memory
Sep 11 09:49:26 server lxd.daemon[16964]:   1: fd:   9: hugetlb
Sep 11 09:49:26 server lxd.daemon[16964]:   2: fd:  10: cpuset
Sep 11 09:49:26 server lxd.daemon[16964]:   3: fd:  11: devices
Sep 11 09:49:26 server lxd.daemon[16964]:   4: fd:  12: perf_event
Sep 11 09:49:26 server lxd.daemon[16964]:   5: fd:  13: cpu,cpuacct
Sep 11 09:49:26 server lxd.daemon[16964]:   6: fd:  14: net_cls,net_prio
Sep 11 09:49:26 server lxd.daemon[16964]:   7: fd:  15: freezer
Sep 11 09:49:26 server lxd.daemon[16964]:   8: fd:  16: pids
Sep 11 09:49:26 server lxd.daemon[16964]:   9: fd:  17: blkio
Sep 11 09:49:26 server lxd.daemon[16964]:  10: fd:  18: name=systemd
Sep 11 09:49:26 server lxd.daemon[16964]: lxcfs.c: 105: do_reload: lxcfs: reloaded
Sep 11 09:49:26 server lxd.daemon[11820]: => Re-using existing LXCFS
Sep 11 09:49:26 server lxd.daemon[11820]: => Starting LXD
Sep 11 09:49:26 server lxd.daemon[11820]: lvl=warn msg="CGroup memory swap accounting is disabled, swap limits will be ignored." t=2018-09-11T09:49:26+0300
Sep 11 09:49:29 server lxd.daemon[16964]: mount namespace: 7
Sep 11 09:49:29 server lxd.daemon[16964]: hierarchies:
Sep 11 09:49:29 server lxd.daemon[16964]:   0: fd:   8: memory
Sep 11 09:49:29 server lxd.daemon[16964]:   1: fd:   9: hugetlb
Sep 11 09:49:29 server lxd.daemon[16964]:   2: fd:  10: cpuset
Sep 11 09:49:29 server lxd.daemon[16964]:   3: fd:  11: devices
Sep 11 09:49:29 server lxd.daemon[16964]:   4: fd:  12: perf_event
Sep 11 09:49:29 server lxd.daemon[16964]:   5: fd:  13: cpu,cpuacct
Sep 11 09:49:29 server lxd.daemon[16964]:   6: fd:  14: net_cls,net_prio
Sep 11 09:49:29 server lxd.daemon[16964]:   7: fd:  15: freezer
Sep 11 09:49:29 server lxd.daemon[16964]:   8: fd:  16: pids
Sep 11 09:49:29 server lxd.daemon[16964]:   9: fd:  17: blkio
Sep 11 09:49:29 server lxd.daemon[16964]:  10: fd:  18: name=systemd
Sep 11 09:49:29 server lxd.daemon[16964]: lxcfs.c: 105: do_reload: lxcfs: reloaded
Sep 11 09:49:29 server lxd.daemon[11820]: lvl=warn msg="Unable to update backup.yaml at this time" name=prometheus-2 t=2018-09-11T09:49:29+0300
Sep 12 21:04:09 server systemd[1]: Stopping Service for snap application lxd.daemon...
Sep 12 21:04:10 server lxd.daemon[9786]: => Stop reason is: snap refresh
Sep 12 21:04:10 server lxd.daemon[9786]: => Stopping LXD
Sep 12 21:04:10 server lxd.daemon[11820]: => LXD failed to start
Sep 12 21:04:10 server systemd[1]: snap.lxd.daemon.service: Main process exited, code=exited, status=137/n/a
Sep 12 21:04:11 server systemd[1]: Stopped Service for snap application lxd.daemon.
Sep 12 21:04:11 server systemd[1]: snap.lxd.daemon.service: Unit entered failed state.
Sep 12 21:04:11 server systemd[1]: snap.lxd.daemon.service: Failed with result 'exit-code'.
Sep 12 21:04:19 server systemd[1]: Started Service for snap application lxd.daemon.
Sep 12 21:04:19 server lxd.daemon[10305]: => Preparing the system
Sep 12 21:04:19 server lxd.daemon[10305]: ==> Loading snap configuration
Sep 12 21:04:19 server lxd.daemon[10305]: ==> Setting up mntns symlink (mnt:[4026532809])
Sep 12 21:04:19 server lxd.daemon[10305]: ==> Setting up kmod wrapper
Sep 12 21:04:19 server lxd.daemon[10305]: ==> Preparing /boot
Sep 12 21:04:19 server lxd.daemon[10305]: ==> Preparing a clean copy of /run
Sep 12 21:04:19 server lxd.daemon[10305]: ==> Preparing a clean copy of /etc
Sep 12 21:04:19 server lxd.daemon[10305]: ==> Setting up ceph configuration
Sep 12 21:04:19 server lxd.daemon[10305]: ==> Setting up LVM configuration
Sep 12 21:04:19 server lxd.daemon[10305]: ==> Rotating logs
Sep 12 21:04:19 server lxd.daemon[10305]: ==> Setting up ZFS (0.7)
Sep 12 21:04:19 server lxd.daemon[10305]: ==> Escaping the systemd cgroups
Sep 12 21:04:19 server lxd.daemon[10305]: ==> Escaping the systemd process resource limits
Sep 12 21:04:19 server lxd.daemon[10305]: => Re-using existing LXCFS
Sep 12 21:04:19 server lxd.daemon[10305]: => Starting LXD
Sep 12 21:04:20 server lxd.daemon[10305]: lvl=warn msg="CGroup memory swap accounting is disabled, swap limits will be ignored." t=2018-09-12T21:04:20+0300
Sep 12 21:04:20 server lxd.daemon[10305]: panic: checkpoint: database is locked
Sep 12 21:04:20 server lxd.daemon[10305]: trace:
Sep 12 21:04:20 server lxd.daemon[10305]: 2018-09-12 21:04:20.85780: fsm: term=1 index=220344 cmd=frames txn=220343 pages=2 commit=1 unregister txn
Sep 12 21:04:20 server lxd.daemon[10305]: 2018-09-12 21:04:20.85781: fsm: term=1 index=220344 cmd=frames txn=220343 pages=2 commit=1 done
Sep 12 21:04:20 server lxd.daemon[10305]: 2018-09-12 21:04:20.85782: fsm: term=1 index=220345 cmd=frames txn=220344 pages=2 commit=1 start
Sep 12 21:04:20 server lxd.daemon[10305]: 2018-09-12 21:04:20.85786: fsm: term=1 index=220345 cmd=frames txn=220344 pages=2 commit=1 unregister txn
Sep 12 21:04:20 server lxd.daemon[10305]: 2018-09-12 21:04:20.85786: fsm: term=1 index=220345 cmd=frames txn=220344 pages=2 commit=1 done
.................
.................
Sep 12 21:04:20 server lxd.daemon[10305]: 2018-09-12 21:04:20.89769: fsm: term=1 index=220432 cmd=frames txn=220431 pages=4 commit=1 start
Sep 12 21:04:20 server lxd.daemon[10305]: 2018-09-12 21:04:20.89774: fsm: term=1 index=220432 cmd=frames txn=220431 pages=4 commit=1 unregister txn
Sep 12 21:04:20 server lxd.daemon[10305]: 2018-09-12 21:04:20.89774: fsm: term=1 index=220432 cmd=frames txn=220431 pages=4 commit=1 done
Sep 12 21:04:20 server lxd.daemon[10305]: 2018-09-12 21:04:20.89774: fsm: term=1 index=220433 cmd=checkpoint file=db.bin start
Sep 12 21:04:20 server lxd.daemon[10305]: 2018-09-12 21:04:20.89839: fsm: term=1 index=220433 cmd=checkpoint failed: checkpoint: database is locked
Sep 12 21:04:20 server lxd.daemon[10305]: goroutine 90 [running]:
Sep 12 21:04:20 server lxd.daemon[10305]: github.com/CanonicalLtd/go-dqlite/internal/trace.(*Tracer).Panic(0xc000358280, 0x110e0d1, 0x2, 0xc000201d68, 0x1, 0x1)
Sep 12 21:04:20 server lxd.daemon[10305]:         /build/lxd/parts/lxd/go/src/github.com/CanonicalLtd/go-dqlite/internal/trace/tracer.go:59 +0x12a
Sep 12 21:04:20 server lxd.daemon[10305]: github.com/CanonicalLtd/go-dqlite/internal/replication.(*FSM).Apply(0xc000397020, 0xc000352060, 0x0, 0x0)
Sep 12 21:04:20 server lxd.daemon[10305]:         /build/lxd/parts/lxd/go/src/github.com/CanonicalLtd/go-dqlite/internal/replication/fsm.go:84 +0x136
Sep 12 21:04:20 server lxd.daemon[10305]: github.com/hashicorp/raft.(*Raft).runFSM.func1(0xc0001e40d0)
Sep 12 21:04:20 server lxd.daemon[10305]:         /build/lxd/parts/lxd/go/src/github.com/hashicorp/raft/fsm.go:57 +0x155
Sep 12 21:04:20 server lxd.daemon[10305]: github.com/hashicorp/raft.(*Raft).runFSM(0xc000252840)
Sep 12 21:04:20 server lxd.daemon[10305]:         /build/lxd/parts/lxd/go/src/github.com/hashicorp/raft/fsm.go:120 +0x2ef
Sep 12 21:04:20 server lxd.daemon[10305]: github.com/hashicorp/raft.(*Raft).runFSM-fm()
Sep 12 21:04:20 server lxd.daemon[10305]:         /build/lxd/parts/lxd/go/src/github.com/hashicorp/raft/api.go:506 +0x2a
Sep 12 21:04:20 server lxd.daemon[10305]: github.com/hashicorp/raft.(*raftState).goFunc.func1(0xc000252840, 0xc0003468c0)
Sep 12 21:04:20 server lxd.daemon[10305]:         /build/lxd/parts/lxd/go/src/github.com/hashicorp/raft/state.go:146 +0x53
Sep 12 21:04:20 server lxd.daemon[10305]: created by github.com/hashicorp/raft.(*raftState).goFunc
Sep 12 21:04:20 server lxd.daemon[10305]:         /build/lxd/parts/lxd/go/src/github.com/hashicorp/raft/state.go:144 +0x66
Sep 12 21:04:21 server lxd.daemon[10305]: => LXD failed to start
Sep 12 21:04:21 server systemd[1]: snap.lxd.daemon.service: Main process exited, code=exited, status=137/n/a
Sep 12 21:04:22 server lxd.daemon[10521]: => Stop reason is: crashed
Sep 12 21:04:22 server systemd[1]: snap.lxd.daemon.service: Unit entered failed state.
Sep 12 21:04:22 server systemd[1]: snap.lxd.daemon.service: Failed with result 'exit-code'.
Sep 12 21:04:22 server systemd[1]: snap.lxd.daemon.service: Service hold-off time over, scheduling restart.
Sep 12 21:04:22 server systemd[1]: Stopped Service for snap application lxd.daemon.
Sep 12 21:04:22 server systemd[1]: Started Service for snap application lxd.daemon.
Sep 12 21:04:22 server lxd.daemon[10622]: => Preparing the system
Sep 12 21:04:22 server lxd.daemon[10622]: ==> Loading snap configuration
Sep 12 21:04:22 server lxd.daemon[10622]: ==> Setting up mntns symlink (mnt:[4026532809])
Sep 12 21:04:22 server lxd.daemon[10622]: ==> Setting up kmod wrapper
Sep 12 21:04:22 server lxd.daemon[10622]: ==> Preparing /boot
Sep 12 21:04:22 server lxd.daemon[10622]: ==> Preparing a clean copy of /run
Sep 12 21:04:22 server lxd.daemon[10622]: ==> Preparing a clean copy of /etc
Sep 12 21:04:22 server lxd.daemon[10622]: ==> Setting up ceph configuration
Sep 12 21:04:22 server lxd.daemon[10622]: ==> Setting up LVM configuration
Sep 12 21:04:22 server lxd.daemon[10622]: ==> Rotating logs
Sep 12 21:04:22 server lxd.daemon[10622]: ==> Setting up ZFS (0.7)
Sep 12 21:04:22 server lxd.daemon[10622]: ==> Escaping the systemd cgroups
Sep 12 21:04:22 server lxd.daemon[10622]: ==> Escaping the systemd process resource limits
Sep 12 21:04:22 server lxd.daemon[16964]: mount namespace: 7
Sep 12 21:04:22 server lxd.daemon[16964]: hierarchies:
Sep 12 21:04:22 server lxd.daemon[16964]:   0: fd:   8: memory
Sep 12 21:04:22 server lxd.daemon[16964]:   1: fd:   9: hugetlb
Sep 12 21:04:22 server lxd.daemon[16964]:   2: fd:  10: cpuset
Sep 12 21:04:22 server lxd.daemon[16964]:   3: fd:  11: devices
Sep 12 21:04:22 server lxd.daemon[16964]:   4: fd:  12: perf_event
Sep 12 21:04:22 server lxd.daemon[16964]:   5: fd:  13: cpu,cpuacct
Sep 12 21:04:22 server lxd.daemon[16964]:   6: fd:  14: net_cls,net_prio
Sep 12 21:04:22 server lxd.daemon[16964]:   7: fd:  15: freezer
Sep 12 21:04:22 server lxd.daemon[16964]:   8: fd:  16: pids
Sep 12 21:04:22 server lxd.daemon[16964]:   9: fd:  17: blkio
Sep 12 21:04:22 server lxd.daemon[16964]:  10: fd:  18: name=systemd
Sep 12 21:04:22 server lxd.daemon[16964]: lxcfs.c: 105: do_reload: lxcfs: reloaded
Sep 12 21:04:22 server lxd.daemon[10622]: => Re-using existing LXCFS
Sep 12 21:04:23 server lxd.daemon[10622]: => Starting LXD
Sep 12 21:04:23 server lxd.daemon[10622]: lvl=warn msg="CGroup memory swap accounting is disabled, swap limits will be ignored." t=2018-09-12T21:04:23+0300
Sep 12 21:04:23 server lxd.daemon[10622]: lvl=warn msg="Unable to update backup.yaml at this time" name=prometheus-2 t=2018-09-12T21:04:23+0300
Sep 12 21:15:01 server lxd.daemon[16964]: mount namespace: 7
Sep 12 21:15:01 server lxd.daemon[16964]: hierarchies:
Sep 12 21:15:01 server lxd.daemon[16964]:   0: fd:   8: memory
Sep 12 21:15:01 server lxd.daemon[16964]:   1: fd:   9: hugetlb
Sep 12 21:15:01 server lxd.daemon[16964]:   2: fd:  10: cpuset
Sep 12 21:15:01 server lxd.daemon[16964]:   3: fd:  11: devices
Sep 12 21:15:01 server lxd.daemon[16964]:   4: fd:  12: perf_event
Sep 12 21:15:01 server lxd.daemon[16964]:   5: fd:  13: cpu,cpuacct
Sep 12 21:15:01 server lxd.daemon[16964]:   6: fd:  14: net_cls,net_prio
Sep 12 21:15:01 server lxd.daemon[16964]:   7: fd:  15: freezer
Sep 12 21:15:01 server lxd.daemon[16964]:   8: fd:  16: pids
Sep 12 21:15:01 server lxd.daemon[16964]:   9: fd:  17: blkio
Sep 12 21:15:01 server lxd.daemon[16964]:  10: fd:  18: name=systemd
Sep 12 21:15:01 server lxd.daemon[16964]: lxcfs.c: 105: do_reload: lxcfs: reloaded
Sep 17 14:19:09 server systemd[1]: Stopping Service for snap application lxd.daemon...
Sep 17 14:19:10 server lxd.daemon[21446]: => Stop reason is: snap refresh
Sep 17 14:19:10 server lxd.daemon[21446]: => Stopping LXD
Sep 17 14:19:10 server lxd.daemon[10622]: => LXD failed to start
Sep 17 14:19:10 server systemd[1]: snap.lxd.daemon.service: Main process exited, code=exited, status=137/n/a
Sep 17 14:19:11 server systemd[1]: Stopped Service for snap application lxd.daemon.
Sep 17 14:19:11 server systemd[1]: snap.lxd.daemon.service: Unit entered failed state.
Sep 17 14:19:11 server systemd[1]: snap.lxd.daemon.service: Failed with result 'exit-code'.
Sep 17 14:19:18 server systemd[1]: Started Service for snap application lxd.daemon.
Sep 17 14:19:18 server lxd.daemon[21876]: => Preparing the system
Sep 17 14:19:18 server lxd.daemon[21876]: ==> Loading snap configuration
Sep 17 14:19:18 server lxd.daemon[21876]: ==> Setting up mntns symlink (mnt:[4026532809])
Sep 17 14:19:18 server lxd.daemon[21876]: ==> Setting up kmod wrapper
Sep 17 14:19:18 server lxd.daemon[21876]: ==> Preparing /boot
Sep 17 14:19:18 server lxd.daemon[21876]: ==> Preparing a clean copy of /run
Sep 17 14:19:18 server lxd.daemon[21876]: ==> Preparing a clean copy of /etc
Sep 17 14:19:18 server lxd.daemon[21876]: ==> Setting up ceph configuration
Sep 17 14:19:18 server lxd.daemon[21876]: ==> Setting up LVM configuration
Sep 17 14:19:18 server lxd.daemon[21876]: ==> Rotating logs
Sep 17 14:19:18 server lxd.daemon[21876]: ==> Setting up ZFS (0.7)
Sep 17 14:19:18 server lxd.daemon[21876]: ==> Escaping the systemd cgroups
Sep 17 14:19:18 server lxd.daemon[21876]: ==> Escaping the systemd process resource limits
Sep 17 14:19:18 server lxd.daemon[21876]: => Re-using existing LXCFS
Sep 17 14:19:18 server lxd.daemon[21876]: => Starting LXD
Sep 17 14:19:19 server lxd.daemon[21876]: lvl=warn msg="CGroup memory swap accounting is disabled, swap limits will be ignored." t=2018-09-17T14:19:19+0300
Sep 17 14:19:19 server lxd.daemon[21876]: panic: checkpoint: database is locked
Sep 17 14:19:19 server lxd.daemon[21876]: trace:
Sep 17 14:19:19 server lxd.daemon[21876]: 2018-09-17 14:19:19.74100: fsm: term=1 index=220344 cmd=frames txn=220343 pages=2 commit=1 unregister txn
Sep 17 14:19:19 server lxd.daemon[21876]: 2018-09-17 14:19:19.74101: fsm: term=1 index=220344 cmd=frames txn=220343 pages=2 commit=1 done
Sep 17 14:19:19 server lxd.daemon[21876]: 2018-09-17 14:19:19.74102: fsm: term=1 index=220345 cmd=frames txn=220344 pages=2 commit=1 start
Sep 17 14:19:19 server lxd.daemon[21876]: 2018-09-17 14:19:19.74107: fsm: term=1 index=220345 cmd=frames txn=220344 pages=2 commit=1 unregister txn
Sep 17 14:19:19 server lxd.daemon[21876]: 2018-09-17 14:19:19.74107: fsm: term=1 index=220345 cmd=frames txn=220344 pages=2 commit=1 done
....................
....................
Sep 17 14:19:19 server lxd.daemon[21876]: 2018-09-17 14:19:19.80108: fsm: term=1 index=220432 cmd=frames txn=220431 pages=4 commit=1 start
Sep 17 14:19:19 server lxd.daemon[21876]: 2018-09-17 14:19:19.80113: fsm: term=1 index=220432 cmd=frames txn=220431 pages=4 commit=1 unregister txn
Sep 17 14:19:19 server lxd.daemon[21876]: 2018-09-17 14:19:19.80113: fsm: term=1 index=220432 cmd=frames txn=220431 pages=4 commit=1 done
Sep 17 14:19:19 server lxd.daemon[21876]: 2018-09-17 14:19:19.80114: fsm: term=1 index=220433 cmd=checkpoint file=db.bin start
Sep 17 14:19:19 server lxd.daemon[21876]: 2018-09-17 14:19:19.81242: fsm: term=1 index=220433 cmd=checkpoint failed: checkpoint: database is locked
Sep 17 14:19:19 server lxd.daemon[21876]: goroutine 10 [running]:
Sep 17 14:19:19 server lxd.daemon[21876]: github.com/CanonicalLtd/go-dqlite/internal/trace.(*Tracer).Panic(0xc000356280, 0x1108cf1, 0x2, 0xc0006edd68, 0x1, 0x1)
Sep 17 14:19:19 server lxd.daemon[21876]:         /build/lxd/parts/lxd/go/src/github.com/CanonicalLtd/go-dqlite/internal/trace/tracer.go:59 +0x12a
Sep 17 14:19:19 server lxd.daemon[21876]: github.com/CanonicalLtd/go-dqlite/internal/replication.(*FSM).Apply(0xc000394fc0, 0xc000371890, 0x0, 0x0)
Sep 17 14:19:19 server lxd.daemon[21876]:         /build/lxd/parts/lxd/go/src/github.com/CanonicalLtd/go-dqlite/internal/replication/fsm.go:84 +0x136
Sep 17 14:19:19 server lxd.daemon[21876]: github.com/hashicorp/raft.(*Raft).runFSM.func1(0xc000444af0)
Sep 17 14:19:19 server lxd.daemon[21876]:         /build/lxd/parts/lxd/go/src/github.com/hashicorp/raft/fsm.go:57 +0x155
Sep 17 14:19:19 server lxd.daemon[21876]: github.com/hashicorp/raft.(*Raft).runFSM(0xc00025a840)
Sep 17 14:19:19 server lxd.daemon[21876]:         /build/lxd/parts/lxd/go/src/github.com/hashicorp/raft/fsm.go:120 +0x2ef
Sep 17 14:19:19 server lxd.daemon[21876]: github.com/hashicorp/raft.(*Raft).runFSM-fm()
Sep 17 14:19:19 server lxd.daemon[21876]:         /build/lxd/parts/lxd/go/src/github.com/hashicorp/raft/api.go:506 +0x2a
Sep 17 14:19:19 server lxd.daemon[21876]: github.com/hashicorp/raft.(*raftState).goFunc.func1(0xc00025a840, 0xc0001d2cd0)
Sep 17 14:19:19 server lxd.daemon[21876]:         /build/lxd/parts/lxd/go/src/github.com/hashicorp/raft/state.go:146 +0x53
Sep 17 14:19:19 server lxd.daemon[21876]: created by github.com/hashicorp/raft.(*raftState).goFunc
Sep 17 14:19:19 server lxd.daemon[21876]:         /build/lxd/parts/lxd/go/src/github.com/hashicorp/raft/state.go:144 +0x66
Sep 17 14:19:20 server lxd.daemon[21876]: => LXD failed to start
Sep 17 14:19:20 server systemd[1]: snap.lxd.daemon.service: Main process exited, code=exited, status=137/n/a
Sep 17 14:19:21 server lxd.daemon[22207]: => Stop reason is: crashed
Sep 17 14:19:21 server systemd[1]: snap.lxd.daemon.service: Unit entered failed state.
Sep 17 14:19:21 server systemd[1]: snap.lxd.daemon.service: Failed with result 'exit-code'.
Sep 17 14:19:21 server systemd[1]: snap.lxd.daemon.service: Service hold-off time over, scheduling restart.
Sep 17 14:19:21 server systemd[1]: Stopped Service for snap application lxd.daemon.
Sep 17 14:19:21 server systemd[1]: Started Service for snap application lxd.daemon.
Sep 17 14:19:21 server lxd.daemon[22247]: => Preparing the system
Sep 17 14:19:21 server lxd.daemon[22247]: ==> Loading snap configuration
Sep 17 14:19:21 server lxd.daemon[22247]: ==> Setting up mntns symlink (mnt:[4026532809])
Sep 17 14:19:21 server lxd.daemon[22247]: ==> Setting up kmod wrapper
Sep 17 14:19:21 server lxd.daemon[22247]: ==> Preparing /boot
Sep 17 14:19:21 server lxd.daemon[22247]: ==> Preparing a clean copy of /run
Sep 17 14:19:21 server lxd.daemon[22247]: ==> Preparing a clean copy of /etc
Sep 17 14:19:21 server lxd.daemon[22247]: ==> Setting up ceph configuration
Sep 17 14:19:21 server lxd.daemon[22247]: ==> Setting up LVM configuration
Sep 17 14:19:21 server lxd.daemon[22247]: ==> Rotating logs
Sep 17 14:19:21 server lxd.daemon[22247]: ==> Setting up ZFS (0.7)
Sep 17 14:19:21 server lxd.daemon[22247]: ==> Escaping the systemd cgroups
Sep 17 14:19:21 server lxd.daemon[22247]: ==> Escaping the systemd process resource limits
Sep 17 14:19:21 server lxd.daemon[16964]: mount namespace: 7
Sep 17 14:19:21 server lxd.daemon[16964]: hierarchies:
Sep 17 14:19:21 server lxd.daemon[16964]:   0: fd:   8: memory
Sep 17 14:19:21 server lxd.daemon[16964]:   1: fd:   9: hugetlb
Sep 17 14:19:21 server lxd.daemon[16964]:   2: fd:  10: cpuset
Sep 17 14:19:21 server lxd.daemon[16964]:   3: fd:  11: devices
Sep 17 14:19:21 server lxd.daemon[16964]:   4: fd:  12: perf_event
Sep 17 14:19:21 server lxd.daemon[16964]:   5: fd:  13: cpu,cpuacct
Sep 17 14:19:21 server lxd.daemon[16964]:   6: fd:  14: net_cls,net_prio
Sep 17 14:19:21 server lxd.daemon[16964]:   7: fd:  15: freezer
Sep 17 14:19:21 server lxd.daemon[16964]:   8: fd:  16: pids
Sep 17 14:19:21 server lxd.daemon[16964]:   9: fd:  17: blkio
Sep 17 14:19:21 server lxd.daemon[16964]:  10: fd:  18: name=systemd
Sep 17 14:19:21 server lxd.daemon[16964]: lxcfs.c: 105: do_reload: lxcfs: reloaded
Sep 17 14:19:21 server lxd.daemon[22247]: => Re-using existing LXCFS
Sep 17 14:19:22 server lxd.daemon[22247]: => Starting LXD
Sep 17 14:19:22 server lxd.daemon[22247]: lvl=warn msg="CGroup memory swap accounting is disabled, swap limits will be ignored." t=2018-09-17T14:19:22+0300
Sep 17 14:19:22 server lxd.daemon[22247]: lvl=warn msg="Unable to update backup.yaml at this time" name=prometheus-2 t=2018-09-17T14:19:22+0300
Sep 17 14:30:01 server lxd.daemon[16964]: mount namespace: 7
Sep 17 14:30:01 server lxd.daemon[16964]: hierarchies:
Sep 17 14:30:01 server lxd.daemon[16964]:   0: fd:   8: memory
Sep 17 14:30:01 server lxd.daemon[16964]:   1: fd:   9: hugetlb
Sep 17 14:30:01 server lxd.daemon[16964]:   2: fd:  10: cpuset
Sep 17 14:30:01 server lxd.daemon[16964]:   3: fd:  11: devices
Sep 17 14:30:01 server lxd.daemon[16964]:   4: fd:  12: perf_event
Sep 17 14:30:01 server lxd.daemon[16964]:   5: fd:  13: cpu,cpuacct
Sep 17 14:30:01 server lxd.daemon[16964]:   6: fd:  14: net_cls,net_prio
Sep 17 14:30:01 server lxd.daemon[16964]:   7: fd:  15: freezer
Sep 17 14:30:01 server lxd.daemon[16964]:   8: fd:  16: pids
Sep 17 14:30:01 server lxd.daemon[16964]:   9: fd:  17: blkio
Sep 17 14:30:01 server lxd.daemon[16964]:  10: fd:  18: name=systemd
Sep 17 14:30:01 server lxd.daemon[16964]: lxcfs.c: 105: do_reload: lxcfs: reloaded

The start fails today

Sep 17 14:30:01 server lxd.daemon[16964]: lxcfs.c: 105: do_reload: lxcfs: reloaded                                                                                                                                  
Sep 18 14:49:08 server systemd[1]: Stopping Service for snap application lxd.daemon...                                                                                                                              
Sep 18 14:49:09 server lxd.daemon[50923]: => Stop reason is: snap refresh                                                                                                                                           
Sep 18 14:49:09 server lxd.daemon[50923]: => Stopping LXD                                                                                                                                                           
Sep 18 14:49:09 server lxd.daemon[22247]: => LXD failed to start                                                                                                                                                    
Sep 18 14:49:09 server systemd[1]: snap.lxd.daemon.service: Main process exited, code=exited, status=137/n/a                                                                                                        
Sep 18 14:49:10 server systemd[1]: Stopped Service for snap application lxd.daemon.                                                                                                                                 
Sep 18 14:49:10 server systemd[1]: snap.lxd.daemon.service: Unit entered failed state.                                                                                                                              
Sep 18 14:49:10 server systemd[1]: snap.lxd.daemon.service: Failed with result 'exit-code'.                                                                                                                         
Sep 18 14:49:17 server systemd[1]: Started Service for snap application lxd.daemon.                                                                                                                                 
Sep 18 14:49:17 server lxd.daemon[51551]: => Preparing the system                                                                                                                                                   
Sep 18 14:49:17 server lxd.daemon[51551]: ==> Loading snap configuration                                                                                                                                            
Sep 18 14:49:17 server lxd.daemon[51551]: ==> Setting up mntns symlink (mnt:[4026532809])                                                                                                                           
Sep 18 14:49:17 server lxd.daemon[51551]: ==> Setting up kmod wrapper                                                                                                                                               
Sep 18 14:49:17 server lxd.daemon[51551]: ==> Preparing /boot                                                                                                                                                       
Sep 18 14:49:17 server lxd.daemon[51551]: ==> Preparing a clean copy of /run                                                                                                                                        
Sep 18 14:49:17 server lxd.daemon[51551]: ==> Preparing a clean copy of /etc                                                                                                                                        
Sep 18 14:49:17 server lxd.daemon[51551]: ==> Setting up ceph configuration                                                                                                                                         
Sep 18 14:49:17 server lxd.daemon[51551]: ==> Setting up LVM configuration                                                                                                                                          
Sep 18 14:49:17 server lxd.daemon[51551]: ==> Rotating logs                                                                                                                                                         
Sep 18 14:49:17 server lxd.daemon[51551]: ==> Setting up ZFS (0.7)                                                                                                                                                  
Sep 18 14:49:17 server lxd.daemon[51551]: ==> Escaping the systemd cgroups                                                                                                                                          
Sep 18 14:49:17 server lxd.daemon[51551]: ==> Escaping the systemd process resource limits                                                                                                                          
Sep 18 14:49:17 server lxd.daemon[51551]: => Re-using existing LXCFS                                                                                                                                                
Sep 18 14:49:17 server lxd.daemon[51551]: => Starting LXD                                                                                                                                                           
Sep 18 14:49:18 server lxd.daemon[51551]: lvl=warn msg="CGroup memory swap accounting is disabled, swap limits will be ignored." t=2018-09-18T14:49:18+0300                                                         
Sep 18 14:49:19 server lxd.daemon[51551]: lvl=warn msg="Unable to update backup.yaml at this time" name=prometheus-2 t=2018-09-18T14:49:19+0300                                                                     
Sep 18 15:00:01 server lxd.daemon[16964]: mount namespace: 7                                                                                                                                                        
Sep 18 15:00:01 server lxd.daemon[16964]: hierarchies:                                                                                                                                                              
Sep 18 15:00:01 server lxd.daemon[16964]:   0: fd:   8: memory                                                                                                                                                      
Sep 18 15:00:01 server lxd.daemon[16964]:   1: fd:   9: hugetlb                                                                                                                                                     
Sep 18 15:00:01 server lxd.daemon[16964]:   2: fd:  10: cpuset                                                                                                                                                      
Sep 18 15:00:01 server lxd.daemon[16964]:   3: fd:  11: devices                                                                                                                                                     
Sep 18 15:00:01 server lxd.daemon[16964]:   4: fd:  12: perf_event                                                                                                                                                  
Sep 18 15:00:01 server lxd.daemon[16964]:   5: fd:  13: cpu,cpuacct                                                                                                                                                 
Sep 18 15:00:01 server lxd.daemon[16964]:   6: fd:  14: net_cls,net_prio                                                                                                                                            
Sep 18 15:00:01 server lxd.daemon[16964]:   7: fd:  15: freezer                                                                                                                                                     
Sep 18 15:00:01 server lxd.daemon[16964]:   8: fd:  16: pids                                                                                                                                                        
Sep 18 15:00:01 server lxd.daemon[16964]:   9: fd:  17: blkio                                                                                                                                                       
Sep 18 15:00:01 server lxd.daemon[16964]:  10: fd:  18: name=systemd                                                                                                                                                
Sep 18 15:00:01 server lxd.daemon[16964]: lxcfs.c: 105: do_reload: lxcfs: reloaded

Though
snap get lxd refresh.timer mon3,23:00-24:00

Added refresh.timer for snap core, system.

And lxd isn’t responding? The log suggests that it in fact did start and should have been operational starting around 14:49:20

Thanks Stéphane,
I thought lxd running ok, because of systemctl status shows ‘running’ and lxc working…
Now, look more intently at lxd processes, see ‘lxd waitready’
Restart daemon didn’t work, rebooted the host.
Here’s log after reboot:

Sep 18 18:28:56 server systemd[1]: Started Service for snap application lxd.daemon.                                                                                                                                 
Sep 18 18:28:57 server lxd.daemon[7794]: => Preparing the system                                                                                                                                                    
Sep 18 18:28:58 server lxd.daemon[7794]: ==> Loading snap configuration                                                                                                                                             
Sep 18 18:28:58 server lxd.daemon[7794]: ==> Setting up mntns symlink (mnt:[4026533001])                                                                                                                            
Sep 18 18:28:58 server lxd.daemon[7794]: ==> Setting up persistent shmounts path                                                                                                                                    
Sep 18 18:28:58 server lxd.daemon[7794]: ====> Making LXD shmounts use the persistent path                                                                                                                          
Sep 18 18:28:58 server lxd.daemon[7794]: ====> Making LXCFS use the persistent path                                                                                                                                 
Sep 18 18:28:58 server lxd.daemon[7794]: ==> Setting up kmod wrapper                                                                                                                                                
Sep 18 18:28:58 server lxd.daemon[7794]: ==> Preparing /boot                                                                                                                                                        
Sep 18 18:28:58 server lxd.daemon[7794]: ==> Preparing a clean copy of /run                                                                                                                                         
Sep 18 18:28:58 server lxd.daemon[7794]: ==> Preparing a clean copy of /etc                                                                                                                                         
Sep 18 18:28:59 server lxd.daemon[7794]: ==> Setting up ceph configuration                                                                                                                                          
Sep 18 18:28:59 server lxd.daemon[7794]: ==> Setting up LVM configuration                                                                                                                                           
Sep 18 18:28:59 server lxd.daemon[7794]: ==> Rotating logs                                                                                                                                                          
Sep 18 18:28:59 server lxd.daemon[7794]: ==> Setting up ZFS (0.7)                                                                                                                                                   
Sep 18 18:28:59 server lxd.daemon[7794]: ==> Escaping the systemd cgroups                                                                                                                                           
Sep 18 18:28:59 server lxd.daemon[7794]: ==> Escaping the systemd process resource limits                                                                                                                           
Sep 18 18:28:59 server lxd.daemon[7794]: => Starting LXCFS                                                                                                                                                          
Sep 18 18:28:59 server lxd.daemon[7794]: => Starting LXD                                                                                                                                                            
Sep 18 18:28:59 server lxd.daemon[7794]: mount namespace: 5                                                                                                                                                         
Sep 18 18:28:59 server lxd.daemon[7794]: hierarchies:                                                                                                                                                               
Sep 18 18:28:59 server lxd.daemon[7794]:   0: fd:   6: freezer                                                                                                                                                      
Sep 18 18:28:59 server lxd.daemon[7794]:   1: fd:   7: net_cls,net_prio                                                                                                                                             
Sep 18 18:28:59 server lxd.daemon[7794]:   2: fd:   8: memory                                                                                                                                                       
Sep 18 18:28:59 server lxd.daemon[7794]:   3: fd:   9: blkio                                                                                                                                                        
Sep 18 18:28:59 server lxd.daemon[7794]:   4: fd:  10: cpuset                                                                                                                                                       
Sep 18 18:28:59 server lxd.daemon[7794]:   5: fd:  11: hugetlb                                                                                                                                                      
Sep 18 18:28:59 server lxd.daemon[7794]:   6: fd:  12: perf_event                                                                                                                                                   
Sep 18 18:28:59 server lxd.daemon[7794]:   7: fd:  13: pids                                                                                                                                                         
Sep 18 18:28:59 server lxd.daemon[7794]:   8: fd:  14: cpu,cpuacct                                                                                                                                                  
Sep 18 18:28:59 server lxd.daemon[7794]:   9: fd:  15: devices                                                                                                                                                      
Sep 18 18:28:59 server lxd.daemon[7794]:  10: fd:  16: name=systemd                                                                                                                                                 
Sep 18 18:29:00 server lxd.daemon[7794]: lvl=warn msg="CGroup memory swap accounting is disabled, swap limits will be ignored." t=2018-09-18T18:29:00+0300                                                          
Sep 18 18:29:00 server lxd.daemon[7794]: ERROR: can't write page 59 of file db.bin which has only 54 pages                                                                                                          
Sep 18 18:29:00 server lxd.daemon[7794]: panic: checkpoint: disk I/O error                                                                                                                                          
Sep 18 18:29:00 server lxd.daemon[7794]: trace:                                                                                                                                                                     
Sep 18 18:29:00 server lxd.daemon[7794]: 2018-09-18 18:29:00.42704: fsm: term=1 index=220344 cmd=frames txn=220343 pages=2 commit=1 unregister txn                                                                  
Sep 18 18:29:00 server lxd.daemon[7794]: 2018-09-18 18:29:00.42705: fsm: term=1 index=220344 cmd=frames txn=220343 pages=2 commit=1 done                                                                            
Sep 18 18:29:00 server lxd.daemon[7794]: 2018-09-18 18:29:00.42705: fsm: term=1 index=220345 cmd=frames txn=220344 pages=2 commit=1 start                                                                           
Sep 18 18:29:00 server lxd.daemon[7794]: 2018-09-18 18:29:00.42707: fsm: term=1 index=220345 cmd=frames txn=220344 pages=2 commit=1 unregister txn                                                                  
Sep 18 18:29:00 server lxd.daemon[7794]: 2018-09-18 18:29:00.42707: fsm: term=1 index=220345 cmd=frames txn=220344 pages=2 commit=1 done                                                                            
...                                                                                                                                                                                                                 
Sep 18 18:29:00 server lxd.daemon[7794]: 2018-09-18 18:29:00.45576: fsm: term=1 index=220432 cmd=frames txn=220431 pages=4 commit=1 start                                                                           
Sep 18 18:29:00 server lxd.daemon[7794]: 2018-09-18 18:29:00.45579: fsm: term=1 index=220432 cmd=frames txn=220431 pages=4 commit=1 unregister txn                                                                  
Sep 18 18:29:00 server lxd.daemon[7794]: 2018-09-18 18:29:00.45579: fsm: term=1 index=220432 cmd=frames txn=220431 pages=4 commit=1 done                                                                            
Sep 18 18:29:00 server lxd.daemon[7794]: 2018-09-18 18:29:00.45579: fsm: term=1 index=220433 cmd=checkpoint file=db.bin start                                                                                       
Sep 18 18:29:00 server lxd.daemon[7794]: 2018-09-18 18:29:00.45610: fsm: term=1 index=220433 cmd=checkpoint failed: checkpoint: disk I/O error                                                                      
Sep 18 18:29:00 server lxd.daemon[7794]: goroutine 74 [running]:                                                                                                                                                    
Sep 18 18:29:00 server lxd.daemon[7794]: github.com/CanonicalLtd/go-dqlite/internal/trace.(*Tracer).Panic(0xc00034c280, 0x1109af1, 0x2, 0xc00026bd68, 0x1, 0x1)                                                     
Sep 18 18:29:00 server lxd.daemon[7794]:         /build/lxd/parts/lxd/go/src/github.com/CanonicalLtd/go-dqlite/internal/trace/tracer.go:59 +0x12a                                                                   
Sep 18 18:29:00 server lxd.daemon[7794]: github.com/CanonicalLtd/go-dqlite/internal/replication.(*FSM).Apply(0xc000388fa0, 0xc0003874d0, 0x0, 0x0)                                                                  
Sep 18 18:29:00 server lxd.daemon[7794]:         /build/lxd/parts/lxd/go/src/github.com/CanonicalLtd/go-dqlite/internal/replication/fsm.go:84 +0x136                                                                
Sep 18 18:29:00 server lxd.daemon[7794]: github.com/hashicorp/raft.(*Raft).runFSM.func1(0xc00047ab60)                                                                                                               
Sep 18 18:29:00 server lxd.daemon[7794]:         /build/lxd/parts/lxd/go/src/github.com/hashicorp/raft/fsm.go:57 +0x155                           
Sep 18 18:29:00 server lxd.daemon[7794]: github.com/hashicorp/raft.(*Raft).runFSM(0xc000266840)                                                                                                                     
Sep 18 18:29:00 server lxd.daemon[7794]:         /build/lxd/parts/lxd/go/src/github.com/hashicorp/raft/fsm.go:120 +0x2ef                                                                                            
Sep 18 18:29:00 server lxd.daemon[7794]: github.com/hashicorp/raft.(*Raft).runFSM-fm()                                                                                                                              
Sep 18 18:29:00 server lxd.daemon[7794]:         /build/lxd/parts/lxd/go/src/github.com/hashicorp/raft/api.go:506 +0x2a                                                                                             
Sep 18 18:29:00 server lxd.daemon[7794]: github.com/hashicorp/raft.(*raftState).goFunc.func1(0xc000266840, 0xc000344950)                                                                                            
Sep 18 18:29:00 server lxd.daemon[7794]:         /build/lxd/parts/lxd/go/src/github.com/hashicorp/raft/state.go:146 +0x53                                                                                           
Sep 18 18:29:00 server lxd.daemon[7794]: created by github.com/hashicorp/raft.(*raftState).goFunc                                                                                                                   
Sep 18 18:29:00 server lxd.daemon[7794]:         /build/lxd/parts/lxd/go/src/github.com/hashicorp/raft/state.go:144 +0x66                                                                                           
Sep 18 18:29:01 server lxd.daemon[7794]: => LXD failed to start                                                                                                                                                     
Sep 18 18:29:01 server systemd[1]: snap.lxd.daemon.service: Main process exited, code=exited, status=137/n/a                                                                                                        
Sep 18 18:29:01 server lxd.daemon[8494]: => Stop reason is: crashed                                                                                                                                                 
Sep 18 18:29:01 server systemd[1]: snap.lxd.daemon.service: Unit entered failed state.                                                                                                                              
Sep 18 18:29:01 server systemd[1]: snap.lxd.daemon.service: Failed with result 'exit-code'.                                                                                                                         
Sep 18 18:29:02 server systemd[1]: snap.lxd.daemon.service: Service hold-off time over, scheduling restart.                                                                                                         
Sep 18 18:29:02 server systemd[1]: Stopped Service for snap application lxd.daemon.                                                                                                                                 
Sep 18 18:29:02 server systemd[1]: Started Service for snap application lxd.daemon.                                                                                                                                 
Sep 18 18:29:02 server lxd.daemon[8563]: => Preparing the system                                                                                                                                                    
Sep 18 18:29:02 server lxd.daemon[8563]: ==> Loading snap configuration                                                                                                                                             
Sep 18 18:29:02 server lxd.daemon[8563]: ==> Setting up mntns symlink (mnt:[4026533001])                                                                                                                            
Sep 18 18:29:02 server lxd.daemon[8563]: ==> Setting up kmod wrapper                                                                                                                                                
Sep 18 18:29:02 server lxd.daemon[8563]: ==> Preparing /boot                                                                                                                                                        
Sep 18 18:29:02 server lxd.daemon[8563]: ==> Preparing a clean copy of /run                                                                                                                                         
Sep 18 18:29:02 server lxd.daemon[8563]: ==> Preparing a clean copy of /etc                                                                                                                                         
Sep 18 18:29:02 server lxd.daemon[8563]: ==> Setting up ceph configuration                                                                                                                                          
Sep 18 18:29:02 server lxd.daemon[8563]: ==> Setting up LVM configuration                                                                                                                                           
Sep 18 18:29:02 server lxd.daemon[8563]: ==> Rotating logs                                                                                                                                                          
Sep 18 18:29:02 server lxd.daemon[8563]: ==> Setting up ZFS (0.7)                                                                                                                                                   
Sep 18 18:29:02 server lxd.daemon[8563]: ==> Escaping the systemd cgroups                                                                                                                                           
Sep 18 18:29:02 server lxd.daemon[8563]: ==> Escaping the systemd process resource limits                                                                                                                           
Sep 18 18:29:02 server lxd.daemon[8563]: => Re-using existing LXCFS            
Sep 18 18:29:02 server lxd.daemon[8563]: => Starting LXD                                                                                                                                                            
Sep 18 18:29:02 server lxd.daemon[8563]: lvl=warn msg="CGroup memory swap accounting is disabled, swap limits will be ignored." t=2018-09-18T18:29:02+0300                                                          
Sep 18 18:29:03 server lxd.daemon[7794]: mount namespace: 7                                                                                                                                                         
Sep 18 18:29:03 server lxd.daemon[7794]: hierarchies:                                                                                                                                                               
Sep 18 18:29:03 server lxd.daemon[7794]:   0: fd:   8: freezer                                                                                                                                                      
Sep 18 18:29:03 server lxd.daemon[7794]:   1: fd:   9: net_cls,net_prio                                                                                                                                             
Sep 18 18:29:03 server lxd.daemon[7794]:   2: fd:  10: memory                                                                                                                                                       
Sep 18 18:29:03 server lxd.daemon[7794]:   3: fd:  11: blkio                                                                                                                                                        
Sep 18 18:29:03 server lxd.daemon[7794]:   4: fd:  12: cpuset                                                                                                                                                       
Sep 18 18:29:03 server lxd.daemon[7794]:   5: fd:  13: hugetlb                                                                                                                                                      
Sep 18 18:29:03 server lxd.daemon[7794]:   6: fd:  14: perf_event                                                                                                                                                   
Sep 18 18:29:03 server lxd.daemon[7794]:   7: fd:  15: pids                                                                                                                                                         
Sep 18 18:29:03 server lxd.daemon[7794]:   8: fd:  16: cpu,cpuacct                                                                                                                                                  
Sep 18 18:29:03 server lxd.daemon[7794]:   9: fd:  17: devices                                                                                                                                                      
Sep 18 18:29:03 server lxd.daemon[7794]:  10: fd:  18: name=systemd                                                                                                                                                 
Sep 18 18:29:03 server lxd.daemon[7794]: lxcfs.c: 105: do_reload: lxcfs: reloaded                                                                                                                                   
Sep 18 18:29:03 server lxd.daemon[8563]: lvl=warn msg="Unable to update backup.yaml at this time" name=prometheus-2 t=2018-09-18T18:29:03+0300 

I think I need to wipe lxd out and start fresh.

Hmm, that’s not very good indeed.
Can you try:

  • systemctl stop snap.lxd.daemon
  • pkill -9 lxd

Then check if you have any lxd related processes running, if you do, kill those too.

Once your process list looks clean, run:

sudo lxd --debug --group lxd

And post that output here, that should eliminate most of the variables and focus on figuring out the LXD crash or issue. Depending on the database problem, @freeekanayaka may be able to fix it by reverting some recent changes.

If manually running that command brings LXD back online just fine, then keep that terminal running and use another terminal to interact with LXD, start containers, … try to stress it a bit to see if it’ll crash then.

That’s assuming that you have a bit of time to help us debug this. If you don’t then feel free to wipe/reinstall and move on.

INFO[09-19|06:31:17] Starting /dev/lxd handler:                                                                                                                                                                     
INFO[09-19|06:31:17]  - binding devlxd socket                 socket=/var/snap/lxd/common/lxd/devlxd/sock                                                                                                           
INFO[09-19|06:31:17] REST API daemon:                                                                                                                                                                               
INFO[09-19|06:31:17]  - binding Unix socket                   socket=/var/snap/lxd/common/lxd/unix.socket                                                                                                           
INFO[09-19|06:31:17]  - binding TCP socket                    socket=[::]:8443                                                                                                                                      
INFO[09-19|06:31:17] Initializing global database                                                                                                                                                                   
INFO[09-19|06:31:17] Dqlite: handling new connection (fd=20)                                                                                                                                                        
INFO[09-19|06:31:17] Dqlite: connected address=0 attempt=0                                                                                                                                                          
INFO[09-19|06:31:17] Initializing storage pools                                                                                                                                                                     
DBUG[09-19|06:31:17] Initializing and checking storage pool "zfs-lxd"                                                                                                                                               
DBUG[09-19|06:31:17] Checking ZFS storage pool "zfs-lxd"                                                                                                                                                            
DBUG[09-19|06:31:17] Initializing and checking storage pool "collections"                                                                                                                                           
DBUG[09-19|06:31:17] Checking ZFS storage pool "collections"                                                                                                                                                        
INFO[09-19|06:31:17] Initializing networks                                                                                                                                                                          
DBUG[09-19|06:31:17] Connecting to a remote simplestreams server                                                                                                                                                    
INFO[09-19|06:31:17] Pruning leftover image files                                                                                                                                                                   
INFO[09-19|06:31:17] Done pruning leftover image files                                                                                                                                                              
INFO[09-19|06:31:17] Loading configuration                                                                                                                                                                          
DBUG[09-19|06:31:17] Initialized inotify with file descriptor 18                                                                                                                                                    
INFO[09-19|06:31:17] Connected to MAAS controller                                                                                                                                                                   
INFO[09-19|06:31:17] Pruning expired images                                                                                                                                                                         
INFO[09-19|06:31:17] Done pruning expired images                                                                                                                                                                    
INFO[09-19|06:31:17] Expiring log files                                                                                                                                                                             
INFO[09-19|06:31:17] Updating instance types                                                                                                                                                                        
INFO[09-19|06:31:17] Done expiring log files                                                                                                                                                                        
INFO[09-19|06:31:17] Updating images                                                                                                                                                                                
INFO[09-19|06:31:17] Done updating images                                                                                                                                                                           
DBUG[09-19|06:31:17] Mounting ZFS storage volume for container "test-01" on storage pool "zfs-lxd"                                                                                                                  
DBUG[09-19|06:31:17] Mounted ZFS storage volume for container "test-01" on storage pool "zfs-lxd"                                                                                                                   
DBUG[09-19|06:31:17] Unmounting ZFS storage volume for container "test-01" on storage pool "zfs-lxd"                                                                                                                
DBUG[09-19|06:31:17] Unmounted ZFS storage volume for container "test-01" on storage pool "zfs-lxd"                                                                                                                 
DBUG[09-19|06:31:17] Mounting ZFS storage volume for container "test-01" on storage pool "zfs-lxd"                                                                                                                  
DBUG[09-19|06:31:17] Mounted ZFS storage volume for container "test-01" on storage pool "zfs-lxd"                                                                                                                   
INFO[09-19|06:31:17] Starting container                       action=start created=2018-08-14T13:43:01+0300 ephemeral=false name=test-01 stateful=false used=2018-09-19T06:20:44+0300                               
DBUG[09-19|06:31:17] handling                                 ip=@ method=GET url=/1.0                                                                                                                              
DBUG[09-19|06:31:17] handling                                 ip=@ method=GET url=/internal/containers/17/onstart                                                                                                   
DBUG[09-19|06:31:17] Mounting ZFS storage volume for container "test-01" on storage pool "zfs-lxd"                                                                                                                  
DBUG[09-19|06:31:17] Mounted ZFS storage volume for container "test-01" on storage pool "zfs-lxd"                                                                                                                   
DBUG[09-19|06:31:17] Scheduler: container test-01 started: re-balancing                                                                                                                                             
INFO[09-19|06:31:18] Started container                        action=start created=2018-08-14T13:43:01+0300 ephemeral=false name=test-01 stateful=false used=2018-09-19T06:20:44+0300                               
DBUG[09-19|06:31:18] Scheduler: network: vethYBUKI9 has been added: updating network priorities                                                                                                                     
DBUG[09-19|06:31:18] Scheduler: network: vethY3JW0W has been added: updating network priorities                                                                                                                     
WARN[09-19|06:31:18] Unable to update backup.yaml at this time name=prometheus-2                                                                                                                                    
DBUG[09-19|06:31:18] Mounting ZFS storage volume "data01" on storage pool "collections"                                                                                                                           
DBUG[09-19|06:31:18] Mounted ZFS storage volume "data01" on storage pool "collections"                                                                                                                            
DBUG[09-19|06:31:18] Shifting storage volume                                                                                                                                                                        
DBUG[09-19|06:31:18] Mounting ZFS storage volume "data02" on storage pool "collections"                                                                                                                             
DBUG[09-19|06:31:18] Mounted ZFS storage volume "data02" on storage pool "collections"                                                                                                                              
INFO[09-19|06:31:20] Done updating instance types                                                                                                                                                                   
DBUG[09-19|06:31:58] handling                                 ip=@ method=GET url=/1.0                                                                                                                              
DBUG[09-19|06:31:58] handling                                 ip=@ method=GET url="/1.0/containers?recursion=2"                                                                                                     
DBUG[09-19|06:32:12] handling                                 ip=@ method=GET url=/1.0                                                                                                                              
DBUG[09-19|06:32:12] handling                                 ip=@ method=GET url=/1.0/events                                                                                                                       
DBUG[09-19|06:32:12] New event listener: 53721e83-3871-494e-947b-9dbadcbc1721                                                                                                                                       
DBUG[09-19|06:32:12] handling                                 ip=@ method=PUT url=/1.0/containers/test-01/state                                                                                                     
DBUG[09-19|10:38:52] handling                                 ip=@ method=GET url=/1.0   
DBUG[09-19|10:38:52] handling                                 ip=@ method=GET url="/1.0/containers?recursion=2"  
DBUG[09-19|10:46:05] handling                                 ip=@ method=GET url=/1.0
DBUG[09-19|10:46:05] handling                                 ip=@ method=GET url="/1.0/storage-pools?recursion=1"
DBUG[09-19|10:46:40] handling                                 ip=@ method=GET url=/1.0
DBUG[09-19|10:46:40] handling                                 ip=@ method=GET url="/1.0/storage-pools/collections/volumes?recursion=1"       

2 containers on host:
test-01 in ‘running’ state.
test-02 in ‘stopped’ state, which I’m trying to start.

After ‘lxd --debug --group lxd’, ‘lxc stop test-01’ is hunging for 5 hours already.

After some lxc commands:

DBUG[09-19|11:07:17] handling                                 ip=@ method=GET url=/1.0                                                                                                                              
DBUG[09-19|11:07:17] handling                                 ip=@ method=GET url="/1.0/containers?recursion=2"                                                                                                     
DBUG[09-19|11:08:51] handling                                 ip=@ method=GET url=/1.0                                                                                                                              
DBUG[09-19|11:08:51] handling                                 ip=@ method=GET url=/1.0/containers/test-01                                                                                                           
DBUG[09-19|11:09:09] handling                                 ip=@ method=GET url=/1.0                                                                                                                              
DBUG[09-19|11:09:09] handling                                 ip=@ method=GET url=/1.0/containers/test-02                                                                                                           
DBUG[09-19|11:09:39] handling                                 ip=@ method=GET url=/1.0                                                                                                                              
DBUG[09-19|11:09:39] handling                                 ip=@ method=GET url=/1.0/containers/test-02                                                                                                           
DBUG[09-19|11:09:39] handling                                 ip=@ method=GET url=/1.0/events                                                                                                                       
DBUG[09-19|11:09:39] New event listener: 4774df93-8fb7-403f-bc07-2091ecf17751                                                                                                                                       
DBUG[09-19|11:09:39] handling                                 ip=@ method=PUT url=/1.0/containers/test-02                                                                                                           
DBUG[09-19|11:09:43] New task operation: 6ca7fcbc-5c9a-4902-998c-5cd554483810                                                                                                                                       
DBUG[09-19|11:09:43] Started task operation: 6ca7fcbc-5c9a-4902-998c-5cd554483810                                                                                                                                   
DBUG[09-19|11:09:43] handling                                 ip=@ method=GET url=/1.0/operations/6ca7fcbc-5c9a-4902-998c-5cd554483810                                                                              
INFO[09-19|11:09:48] Dqlite: closing client                                                                                                                                                                         
DBUG[09-19|11:09:48] Database error: &errors.errorString{s:"driver: bad connection"}                                                                                                                                
DBUG[09-19|11:09:48] Retry failed db interaction (driver: bad connection)                                                                                                                                           
INFO[09-19|11:09:48] Dqlite: handling new connection (fd=30)                                                                                                                                                        
INFO[09-19|11:09:53] Dqlite: closing client                                                                                                                                                                         
DBUG[09-19|11:09:53] Dqlite: server connection failed err=failed to send Leader request: failed to receive response: failed to receive header: failed to receive header: read unix @->@00556: i/o timeout address=0 
DBUG[09-19|11:09:53] Dqlite: connection failed err=no available dqlite leader server found attempt=0                                                                                                                
INFO[09-19|11:09:58] Dqlite: closing client                                                                                                                                                                         
DBUG[09-19|11:09:58] Dqlite: server connection failed err=failed to send Leader request: failed to receive response: failed to receive header: failed to receive header: read unix @->@00556: i/o timeout address=0 
DBUG[09-19|11:09:58] Dqlite: connection failed err=no available dqlite leader server found attempt=1                                                                                                                
DBUG[09-19|11:09:58] Database error: failed to begin transaction: failed to create dqlite connection: no available dqlite leader server found                                                                       
WARN[09-19|11:09:58] Failed to get current cluster nodes: failed to begin transaction: failed to create dqlite connection: no available dqlite leader server found                                                  
INFO[09-19|11:10:00] Dqlite: handling new connection (fd=29)                                                                                                                                                        
INFO[09-19|11:10:00] Dqlite: handling new connection (fd=29)                                                                                                                                                        
INFO[09-19|11:10:00] Dqlite: connected address=0 attempt=0                                                                                                                                                          
INFO[09-19|11:10:05] Dqlite: closing client                                                                                                                                                                         
DBUG[09-19|11:10:05] Database error: &errors.errorString{s:"driver: bad connection"}                                                                                                                                
DBUG[09-19|11:10:05] Retry failed db interaction (driver: bad connection)                                                                                                                                           
DBUG[09-19|11:10:05] Disconnected event listener: 4774df93-8fb7-403f-bc07-2091ecf17751                                                                                                                              
INFO[09-19|11:10:05] Dqlite: handling new connection (fd=28)                                                                                                                                                        
INFO[09-19|11:10:10] Dqlite: closing client                                                                                                                                                                         
DBUG[09-19|11:10:10] Dqlite: server connection failed err=failed to send Leader request: failed to receive response: failed to receive header: failed to receive header: read unix @->@00556: i/o timeout address=0 
DBUG[09-19|11:10:10] Dqlite: connection failed err=no available dqlite leader server found attempt=0                                                                                                                
INFO[09-19|11:10:13] Dqlite: handling new connection (fd=29)                                                                                                                                                        
INFO[09-19|11:10:13] Dqlite: connected address=0 attempt=1                                                                                                                                                          
WARN[09-19|11:10:16] Unable to update backup.yaml at this time name=test-02                                                                                                                                         
DBUG[09-19|11:10:16] Success for task operation: 6ca7fcbc-5c9a-4902-998c-5cd554483810                                                                                                                               
INFO[09-19|11:10:26] Dqlite: closing client                                                                                                                                                                         
DBUG[09-19|11:10:26] Database error: &errors.errorString{s:"driver: bad connection"}                                                                                                                                
DBUG[09-19|11:10:26] Retry failed db interaction (driver: bad connection)                                                                                                                                           
INFO[09-19|11:10:26] Dqlite: handling new connection (fd=28)                                                                                                                                                        
INFO[09-19|11:10:28] Dqlite: connected address=0 attempt=0                                                                                                                                                          
DBUG[09-19|11:10:28] Database error: &errors.errorString{s:"query deleted 0 rows instead of 1"}                                                                                                                     
WARN[09-19|11:10:28] Failed to delete operation 6ca7fcbc-5c9a-4902-998c-5cd554483810: query deleted 0 rows instead of 1

Launch a new container stucks on starting too:

lxc launch ubuntu:16.04/amd64 dima                                                                                                                                                                 
Creating dima
Starting dima  
DBUG[09-19|11:21:14] Updated metadata for task operation: c0a3baef-1bd1-4153-b2aa-321f9128f5ce 
DBUG[09-19|11:21:14] Updated metadata for task operation: c0a3baef-1bd1-4153-b2aa-321f9128f5ce 
DBUG[09-19|11:21:14] Updated metadata for task operation: c0a3baef-1bd1-4153-b2aa-321f9128f5ce 
DBUG[09-19|11:21:14] Database error: &errors.errorString{s:"sql: no rows in result set"} 
INFO[09-19|11:21:14] Image downloaded                         alias=16.04/amd64 image=bf49cef91a4122d9c8aacb9b6629078036a73798af1662b86a234081e3a355e9 operation=c0a3baef-1bd1-4153-b2aa-321f9128f5ce server=https://cloud-images.ubuntu.com/releases trigger=/1.0/operations/c0a3baef-1bd1-4153-b2aa-321f9128f5ce
DBUG[09-19|11:21:14] Database error: &errors.errorString{s:"sql: no rows in result set"} 
INFO[09-19|11:21:14] Creating container                       ephemeral=false name=dima
INFO[09-19|11:21:14] Created container                        ephemeral=false name=dima
DBUG[09-19|11:21:14] Creating ZFS storage volume for container "dima" on storage pool "zfs-lxd" 
DBUG[09-19|11:21:14] Creating ZFS storage volume for image "bf49cef91a4122d9c8aacb9b6629078036a73798af1662b86a234081e3a355e9" on storage pool "zfs-lxd" 
DBUG[09-19|11:21:19] Created ZFS storage volume for image "bf49cef91a4122d9c8aacb9b6629078036a73798af1662b86a234081e3a355e9" on storage pool "zfs-lxd" 
DBUG[09-19|11:21:19] Mounting ZFS storage volume for container "dima" on storage pool "zfs-lxd" 
DBUG[09-19|11:21:19] Mounted ZFS storage volume for container "dima" on storage pool "zfs-lxd" 
DBUG[09-19|11:21:19] Shifting root filesystem "/var/snap/lxd/common/lxd/containers/dima/rootfs" for "dima" 
DBUG[09-19|11:21:22] Created ZFS storage volume for container "dima" on storage pool "zfs-lxd" 
DBUG[09-19|11:21:22] Unmounting ZFS storage volume for container "dima" on storage pool "zfs-lxd" 
DBUG[09-19|11:21:22] Unmounted ZFS storage volume for container "dima" on storage pool "zfs-lxd" 
DBUG[09-19|11:21:22] Mounting ZFS storage volume for container "dima" on storage pool "zfs-lxd" 
DBUG[09-19|11:21:22] Mounted ZFS storage volume for container "dima" on storage pool "zfs-lxd" 
DBUG[09-19|11:21:22] Unmounting ZFS storage volume for container "dima" on storage pool "zfs-lxd" 
DBUG[09-19|11:21:22] Unmounted ZFS storage volume for container "dima" on storage pool "zfs-lxd" 
DBUG[09-19|11:21:22] Success for task operation: c0a3baef-1bd1-4153-b2aa-321f9128f5ce 
DBUG[09-19|11:21:22] handling                                 ip=@ method=GET url=/1.0/containers/dima
DBUG[09-19|11:21:22] handling                                 ip=@ method=GET url=/1.0/events
DBUG[09-19|11:21:22] New event listener: 293ab5b7-abe9-4ad8-8856-01d4aa1e199f 
DBUG[09-19|11:21:22] Disconnected event listener: 4650e6e7-a880-44d1-bdb2-d9ab0e2d39ec 
DBUG[09-19|11:21:22] handling                                 ip=@ method=PUT url=/1.0/containers/dima/state
DBUG[09-19|11:21:59] handling                                 ip=@ method=GET url=/1.0
DBUG[09-19|11:21:59] handling                                 ip=@ method=GET url="/1.0/containers?recursion=2"
DBUG[09-19|11:24:07] handling                                 ip=@ method=GET url=/1.0
DBUG[09-19|11:24:07] handling                                 ip=@ method=GET url="/1.0/containers?recursion=2"

So no crash so far then?

No crash, except containers cannot switch over states, from ‘stopped’ to ‘running’ and vice-versa, until now.
After next restart lxd --debug --group lxd recently, containers test-01 and newborn dima started to up and down magically.
Now trying to start test-02 with dataset. No errors in console and stuck in starting for the present.
Stracing the process, futex only:

futex(0x11ec5e0, FUTEX_WAIT_PRIVATE, 0, NULL) = 0
futex(0xc0000c8840, FUTEX_WAKE_PRIVATE, 1) = 1
sched_yield()                           = 0
futex(0x11eba30, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x11ebb28, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x11ec5e0, FUTEX_WAIT_PRIVATE, 0, NULL) = 0
sched_yield()                           = 0
futex(0x11eba30, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x11ec5e0, FUTEX_WAIT_PRIVATE, 0, NULL) = 0
futex(0x11ebf08, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x11ec5e0, FUTEX_WAIT_PRIVATE, 0, NULL) = -1 EAGAIN (Resource temporarily unavailable)
nanosleep({0, 3000}, NULL)              = 0
futex(0x11ec5e0, FUTEX_WAIT_PRIVATE, 0, NULL) = 0
futex(0xc00037c4c0, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x11ec5e0, FUTEX_WAIT_PRIVATE, 0, NULL) = 0
sched_yield()                           = 0
futex(0x11eba30, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x11ec5e0, FUTEX_WAIT_PRIVATE, 0, NULL) = 0
futex(0x11ebf08, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x11ec5e0, FUTEX_WAIT_PRIVATE, 0, NULL) = -1 EAGAIN (Resource temporarily unavailable)
nanosleep({0, 3000}, NULL)              = 0
futex(0x11ec5e0, FUTEX_WAIT_PRIVATE, 0, NULL) = 0
epoll_pwait(4, [], 128, 0, NULL, 0)     = 0
futex(0x11ebb10, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0xc000402140, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x11ec5e0, FUTEX_WAIT_PRIVATE, 0, NULL) = 0
sched_yield()                           = 0
futex(0x11eba30, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x11ec5e0, FUTEX_WAIT_PRIVATE, 0, NULL

Can you show lxc operation list, ps fauxww and dmesg?

It seems I found out the problem.
This big dataset has been created outside of lxd in lxd owned pool.
Now I’m in process moving dataset from lxd pool. Couple days needed.
Thanks for your help.

After relocate the dataset to non-lxd zfs pool, all works normal.
Thanks.