ansible: Install rasdaemon and prometheus exporter on CI hosts
authorKienan Stewart <kstewart@efficios.com>
Mon, 8 Jan 2024 20:29:20 +0000 (15:29 -0500)
committerKienan Stewart <kstewart@efficios.com>
Mon, 8 Jan 2024 20:29:20 +0000 (15:29 -0500)
Change-Id: Iff4018e7e4174aad321ab740e534cab4d3414255
Signed-off-by: Kienan Stewart <kstewart@efficios.com>
automation/ansible/hosts.yml
automation/ansible/roles/rasdaemon/defaults/main.yml [new file with mode: 0644]
automation/ansible/roles/rasdaemon/files/rasdaemon-exporter.py [new file with mode: 0755]
automation/ansible/roles/rasdaemon/handlers/main.yml [new file with mode: 0644]
automation/ansible/roles/rasdaemon/tasks/main.yml [new file with mode: 0644]
automation/ansible/roles/rasdaemon/tasks/prometheus.yml [new file with mode: 0644]
automation/ansible/roles/rasdaemon/templates/rasdaemon-exporter.service.j2 [new file with mode: 0644]

index 9fc739f64fc89cc2f4c028f2d2958163513549db..33b9062a953359ddfb31cf39a18d48880e27d643 100644 (file)
@@ -7,3 +7,4 @@
     - common
     - libvirt
     - lxd
+    - rasdaemon
diff --git a/automation/ansible/roles/rasdaemon/defaults/main.yml b/automation/ansible/roles/rasdaemon/defaults/main.yml
new file mode 100644 (file)
index 0000000..11ad6f1
--- /dev/null
@@ -0,0 +1,6 @@
+---
+rasdaemon_prometheus_exporter: true
+rasdaemon_prometheus_exporter_bind_address: '0.0.0.0'
+rasdaemon_prometheus_exporter_port: 9797
+rasdaemon_prometheus_exporter_prerequisites:
+  - python3
diff --git a/automation/ansible/roles/rasdaemon/files/rasdaemon-exporter.py b/automation/ansible/roles/rasdaemon/files/rasdaemon-exporter.py
new file mode 100755 (executable)
index 0000000..7613957
--- /dev/null
@@ -0,0 +1,94 @@
+#!/usr/bin/python3
+#
+# SPDX-FileCopyrightText: 2024 Kienan Stewart <kstewart@efficios.com>
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+# Prometheus exporter for rasdaemon
+#
+# Based on https://github.com/openstreetmap/prometheus-exporters/blob/main/exporters/rasdaemon/rasdaemon_exporter
+#
+
+import argparse
+import http.server
+import sqlite3
+import urllib.parse
+
+METRICS = {
+    'rasdaemon_mc_events_total': {
+        'help': 'Memory controller errors',
+        'type': 'counter',
+        'query': 'SELECT mc, top_layer, middle_layer, lower_layer, err_type, SUM(err_count) as err_count FROM mc_event GROUP BY mc, top_layer, middle_layer, lower_layer, err_type',
+    },
+}
+
+class ExporterServer(http.server.ThreadingHTTPServer):
+
+    def __init__(self, server_address, RequestHandlerClass, db_file='/var/lib/rasdaemon/ras-mc_event.db'):
+        super().__init__(server_address, RequestHandlerClass)
+        self.db_file = db_file
+
+
+class ExporterHandler(http.server.BaseHTTPRequestHandler):
+
+    def do_GET(self):
+        url = urllib.parse.urlparse(self.path)
+        if url.path != '/metrics':
+            self.send_response(404)
+            self.end_headers()
+            return
+
+        metrics_details = {key: dict() for (key, _) in METRICS.items()}
+        with sqlite3.connect('file:{}?mode=ro'.format(self.server.db_file), uri=True) as con:
+            cursor = con.cursor()
+            cursor.row_factory = sqlite3.Row
+            for key, _ in METRICS.items():
+                result = cursor.execute(METRICS[key]['query'])
+                metrics_details[key] = result.fetchall()
+
+        print(metrics_details)
+        self.send_response(200)
+        self.end_headers()
+        for key, value in METRICS.items():
+            self.wfile.write("# HELP {} {}\n".format(key, value['help']).encode())
+            self.wfile.write("# TYPE {} {}\n".format(key, value['type']).encode())
+            if not metrics_details[key]:
+                self.wfile.write("{} 0\n".format(key).encode())
+            for entry in metrics_details[key]:
+                labels = ','.join(['{}="{}"'.format(key, entry[key]) for key in entry.keys() if key != 'err_count'])
+                if labels:
+                    self.wfile.write("{}{{{}}} {}\n".format(key, labels, entry['err_count']).encode())
+                else:
+                    self.wfile.write("{} {}\n".format(key, entry['err_count']).encode())
+
+
+def _get_argument_parser():
+    parser = argparse.ArgumentParser(
+        prog='rasdaemon-exporter',
+        description='Exporters rasdaemon metrics'
+    )
+    parser.add_argument(
+        '-p', '--port', type=int, default=9797,
+        help='The port to listen on'
+    )
+    parser.add_argument(
+        '-l', '--listen-address', type=str, default='',
+        help='The address to listen on'
+    )
+    parser.add_argument(
+        '-f', '--rasdaemon-db-file', type=str, #type=argparse.FileType('r'),
+        default='/var/lib/rasdaemon/ras-mc_event.db',
+        help='The path to the rasdaemon sqlite3 database'
+    )
+    return parser
+
+
+def serve(listen_address, listen_port, db_file):
+    with ExporterServer((listen_address, listen_port), ExporterHandler, db_file) as httpd:
+        httpd.serve_forever()
+    print('done')
+
+
+if __name__ == '__main__':
+    parser = _get_argument_parser()
+    args = parser.parse_args()
+    serve(args.listen_address, args.port, args.rasdaemon_db_file)
diff --git a/automation/ansible/roles/rasdaemon/handlers/main.yml b/automation/ansible/roles/rasdaemon/handlers/main.yml
new file mode 100644 (file)
index 0000000..a4f487e
--- /dev/null
@@ -0,0 +1,8 @@
+---
+- name: Systemd daemon-reload
+  ansible.builtin.systemd:
+    daemon_reload: true
+- name: Restart rasdaemon-exporter
+  ansible.builtin.service:
+    name: rasdaemon-exporter
+    state: restarted
diff --git a/automation/ansible/roles/rasdaemon/tasks/main.yml b/automation/ansible/roles/rasdaemon/tasks/main.yml
new file mode 100644 (file)
index 0000000..c2c8657
--- /dev/null
@@ -0,0 +1,10 @@
+---
+- name: Install rasdaemon
+  ansible.builtin.package:
+    name: rasdaemon
+- name: Ensure rasdaemon is running
+  ansible.builtin.service:
+    name: rasdaemon
+    state: started
+    enabled: true
+- ansible.builtin.include: prometheus.yml
diff --git a/automation/ansible/roles/rasdaemon/tasks/prometheus.yml b/automation/ansible/roles/rasdaemon/tasks/prometheus.yml
new file mode 100644 (file)
index 0000000..03b6213
--- /dev/null
@@ -0,0 +1,25 @@
+---
+- name: Install rasdaemon prometheus exporter requirements
+  ansible.builtin.package:
+    name: "{{rasdaemon_prometheus_exporter_prerequisites}}"
+- name: Install rasdaemon prometheus exporter
+  ansible.builtin.copy:
+    dest: '/usr/local/bin/rasdaemon-exporter'
+    src: 'rasdaemon-exporter.py'
+    owner: 'root'
+    group: 'root'
+    mode: '0755'
+  notify:
+    - Restart rasdaemon-exporter
+- name: Deploy rasdaemon prometheus exporter service
+  ansible.builtin.template:
+    src: 'rasdaemon-exporter.service.j2'
+    dest: '/etc/systemd/system/rasdaemon-exporter.service'
+  notify:
+    - Systemd daemon-reload
+    - Restart rasdaemon-exporter
+- name: Ensure rasdaemon prometheus exporter service is running
+  ansible.builtin.service:
+    name: rasdaemon-exporter
+    enabled: "{{rasdaemon_prometheus_exporter}}"
+    state: "{{rasdaemon_prometheus_exporter|ternary('started', 'stopped')}}"
diff --git a/automation/ansible/roles/rasdaemon/templates/rasdaemon-exporter.service.j2 b/automation/ansible/roles/rasdaemon/templates/rasdaemon-exporter.service.j2
new file mode 100644 (file)
index 0000000..4e2b859
--- /dev/null
@@ -0,0 +1,10 @@
+[Unit]
+Description=Rasdaemon Prometheus Exporter
+After=network.target
+
+[Service]
+Type=simple
+ExecStart=/usr/local/bin/rasdaemon-exporter -l "{{rasdaemon_prometheus_exporter_bind_address}}" -p "{{rasdaemon_prometheus_exporter_port}}"
+
+[Install]
+WantedBy=multi-user.target
This page took 0.025905 seconds and 4 git commands to generate.