ansible: Add cron job to reboot armhf nodes that have gone read-only
authorKienan Stewart <kstewart@efficios.com>
Wed, 17 Jan 2024 19:04:46 +0000 (14:04 -0500)
committerKienan Stewart <kstewart@efficios.com>
Wed, 17 Jan 2024 19:43:42 +0000 (14:43 -0500)
Change-Id: I4f2c38244bf4135600e1e33bf9c4fe76be1de740
Signed-off-by: Kienan Stewart <kstewart@efficios.com>
automation/ansible/group_vars/node_armhf.yml [new file with mode: 0644]
automation/ansible/roles/common-node/files/readonly_root_reboot.sh [new file with mode: 0755]
automation/ansible/roles/common-node/tasks/main.yml

diff --git a/automation/ansible/group_vars/node_armhf.yml b/automation/ansible/group_vars/node_armhf.yml
new file mode 100644 (file)
index 0000000..27c2931
--- /dev/null
@@ -0,0 +1,6 @@
+---
+# The SATA controllers on the boards that are running the armhf
+# nodes are somewhat flaky, and the root filesystem often ends
+# up readonly due to errors. When the FS goes read-only, jobs
+# will fail so the easiest action is to have the node reboot.
+common_node_rootfs_readonly_reboot: true
diff --git a/automation/ansible/roles/common-node/files/readonly_root_reboot.sh b/automation/ansible/roles/common-node/files/readonly_root_reboot.sh
new file mode 100755 (executable)
index 0000000..4ce1091
--- /dev/null
@@ -0,0 +1,15 @@
+#!/usr/bin/bash
+
+IFS=',' read -r -a OPTIONS < <(findmnt --json / | jq -r '.[][0]["options"]')
+RO=
+for OPTION in "${OPTIONS[@]}" ; do
+    if [[ "${OPTION}" == "ro" ]] ; then
+        RO=0
+        break
+    fi
+done
+
+if [[ "${RO}" == "0" ]] ; then
+    echo "'/' is mounted read-only, rebooting"
+    shutdown -r "+1"
+fi
index 177603ea415eced742d3d7517bc4cc2e2e0b9c38..28f2a27d9244dda5e4bd8078e7f028e9fe38fa3c 100644 (file)
 
 - include: setup-Suse.yml
   when: ansible_os_family == 'Suse'
+
+- when: common_node_rootfs_readonly_reboot|default(false)
+  block:
+    - name: Install readonly reboot script
+      ansible.builtin.copy:
+        dest: '/usr/sbin/readonly_root_reboot.sh'
+        src: 'readonly_root_reboot.sh'
+        owner: 'root'
+        group: 'root'
+        mode: '0750'
+    - name: Add cronjob
+      ansible.builtin.cron:
+        name: 'readonly_root_reboot'
+        job: '/usr/sbin/readonly_root_reboot.sh'
+        cron_file: 'readonly_root_reboot'
+        minute: '*/10'
+        user: 'root'
This page took 0.023743 seconds and 4 git commands to generate.