From 4f66f7f7ed16202810135ecb1490cb4fe60a1aab Mon Sep 17 00:00:00 2001 From: timotheereausanofi Date: Fri, 20 Mar 2026 11:28:47 +0100 Subject: [PATCH] feat(observability): OneLab-only Promtail, provisioned OneLab logs dashboard - Promtail: keep kubernetes-pods in namespace onelab; tag host file logs (host-logs) - Grafana: enable dashboard sidecar; ConfigMap onelab-logs.json - Dashboard: stats (total/error/warn heuristics), logs panel, component + regex filters Made-with: Cursor --- gitops/docs/OBSERVABILITY.md | 14 ++ .../observability/dashboards/onelab-logs.json | 205 ++++++++++++++++++ .../configmap-dashboard-onelab-logs.yaml | 14 ++ gitops/observability/values.yaml | 19 ++ 4 files changed, 252 insertions(+) create mode 100644 gitops/observability/dashboards/onelab-logs.json create mode 100644 gitops/observability/templates/configmap-dashboard-onelab-logs.yaml diff --git a/gitops/docs/OBSERVABILITY.md b/gitops/docs/OBSERVABILITY.md index 0f0dd26..ef6bf17 100644 --- a/gitops/docs/OBSERVABILITY.md +++ b/gitops/docs/OBSERVABILITY.md @@ -14,6 +14,20 @@ It is synced by the **same** Argo CD Application as the OneLab chart ([`gitops/a 2. **Align host paths** — if you change `persistence.hostPath.logs` for OneLab, update `promtail.extraVolumes` / `extraVolumeMounts` in the same `values.yaml` so Promtail still reads the shared log directory. 3. **Multi-node** — with `hostPath` logs, each node only sees its own files; Promtail runs on every node, so you still get coverage when pods move. +## OneLab-only ingestion + +Promtail adds **`extraRelabelConfigs`** so the **kubernetes-pods** job **keeps only** pods in namespace **`onelab`**. Other namespaces no longer reach Loki (Explore only sees OneLab). Host file logs under `/opt/onelab/logs` are tagged with **`namespace: onelab`** and **`component: host-logs`** so they appear in the same queries. + +Existing Loki data from before this change may still show non-`onelab` streams until **retention** drops them; for a clean index you would need to wipe the Loki PVC (destructive). + +## Dashboard: **OneLab logs** + +Grafana’s **dashboard sidecar** loads ConfigMap **`…-dashboard-onelab-logs`** (JSON: `dashboards/onelab-logs.json`). Open **Dashboards → OneLab logs** (`uid` `onelab-logs`): + +- **Component** — multi-select from `label_values({namespace="onelab"}, component)` (includes **`host-logs`** for file logs). +- **Line filter** — regex applied to log line content (`.*` = all). +- Stat panels: total lines, heuristic **error** / **warning** counts (tuned for typical text logs, not strict JSON parsing). + ## Access Grafana An **Ingress** named **`grafana-onelab`** is created by the umbrella chart (`templates/ingress-grafana-onelab.yaml`), Traefik + cert-manager, matching the OneLab web UI pattern in `gitops/values/k3s-example.yaml`: diff --git a/gitops/observability/dashboards/onelab-logs.json b/gitops/observability/dashboards/onelab-logs.json new file mode 100644 index 0000000..8693370 --- /dev/null +++ b/gitops/observability/dashboards/onelab-logs.json @@ -0,0 +1,205 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": {"type": "grafana", "uid": "-- Grafana --"}, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": {"type": "loki", "uid": "loki"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [{"color": "blue", "value": null}] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 5, "w": 8, "x": 0, "y": 0}, + "id": 1, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, + "textMode": "auto" + }, + "targets": [ + { + "datasource": {"type": "loki", "uid": "loki"}, + "editorMode": "code", + "expr": "sum(count_over_time({namespace=\"onelab\", component=~\"$component\"} |~ \"$filter\" [$__range]))", + "queryType": "instant", + "refId": "A" + } + ], + "title": "Total lines (namespace onelab, matches line filter)", + "type": "stat" + }, + { + "datasource": {"type": "loki", "uid": "loki"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "orange", "value": 1} + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 5, "w": 8, "x": 8, "y": 0}, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, + "textMode": "auto" + }, + "targets": [ + { + "datasource": {"type": "loki", "uid": "loki"}, + "editorMode": "code", + "expr": "sum(count_over_time({namespace=\"onelab\", component=~\"$component\"} |~ \"$filter\" |~ \"(?i)(\\\\[ERROR\\\\]|\\\\berror\\\\b|\\\\sERROR\\\\s)\" [$__range]))", + "queryType": "instant", + "refId": "A" + } + ], + "title": "~ Error-like lines (heuristic)", + "type": "stat" + }, + { + "datasource": {"type": "loki", "uid": "loki"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "yellow", "value": 1} + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 5, "w": 8, "x": 16, "y": 0}, + "id": 3, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, + "textMode": "auto" + }, + "targets": [ + { + "datasource": {"type": "loki", "uid": "loki"}, + "editorMode": "code", + "expr": "sum(count_over_time({namespace=\"onelab\", component=~\"$component\"} |~ \"$filter\" |~ \"(?i)(\\\\[WARN\\\\]|\\\\bwarn(ing)?\\\\b|\\\\sWARN\\\\s)\" [$__range]))", + "queryType": "instant", + "refId": "A" + } + ], + "title": "~ Warning-like lines (heuristic)", + "type": "stat" + }, + { + "datasource": {"type": "loki", "uid": "loki"}, + "gridPos": {"h": 16, "w": 24, "x": 0, "y": 5}, + "id": 4, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": true, + "showTime": true, + "sortOrder": "Descending", + "wrapLogMessage": true + }, + "targets": [ + { + "datasource": {"type": "loki", "uid": "loki"}, + "editorMode": "code", + "expr": "{namespace=\"onelab\", component=~\"$component\"} |~ \"$filter\"", + "queryType": "range", + "refId": "A" + } + ], + "title": "OneLab logs — use Component + Line filter (regex)", + "type": "logs" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": ["onelab", "loki"], + "templating": { + "list": [ + { + "allValue": ".*", + "current": {"selected": true, "text": "All", "value": "$__all"}, + "datasource": {"type": "loki", "uid": "loki"}, + "definition": "label_values({namespace=\"onelab\"}, component)", + "hide": 0, + "includeAll": true, + "label": "Component", + "multi": true, + "name": "component", + "options": [], + "query": "label_values({namespace=\"onelab\"}, component)", + "refresh": 2, + "regex": "", + "sort": 1, + "type": "query" + }, + { + "current": {"selected": true, "text": ".*", "value": ".*"}, + "hide": 0, + "label": "Line filter (regex)", + "name": "filter", + "options": [ + {"selected": true, "text": ".*", "value": ".*"} + ], + "query": ".*", + "type": "textbox" + } + ] + }, + "time": {"from": "now-1h", "to": "now"}, + "timepicker": {}, + "timezone": "browser", + "title": "OneLab logs", + "uid": "onelab-logs", + "version": 1, + "weekStart": "" +} diff --git a/gitops/observability/templates/configmap-dashboard-onelab-logs.yaml b/gitops/observability/templates/configmap-dashboard-onelab-logs.yaml new file mode 100644 index 0000000..1e182b7 --- /dev/null +++ b/gitops/observability/templates/configmap-dashboard-onelab-logs.yaml @@ -0,0 +1,14 @@ +{{- if .Values.grafana.sidecar.dashboards.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ printf "%s-dashboard-onelab-logs" .Release.Name | trunc 63 | trimSuffix "-" }} + namespace: {{ .Release.Namespace }} + labels: + grafana_dashboard: "1" + annotations: + argocd.argoproj.io/sync-wave: "0" +data: + onelab-logs.json: |- +{{ .Files.Get "dashboards/onelab-logs.json" | nindent 4 }} +{{- end }} diff --git a/gitops/observability/values.yaml b/gitops/observability/values.yaml index 1599e60..37a57c8 100644 --- a/gitops/observability/values.yaml +++ b/gitops/observability/values.yaml @@ -69,6 +69,12 @@ promtail: clients: - url: http://{{ .Release.Name }}-loki-gateway.{{ .Release.Namespace }}.svc.cluster.local/loki/api/v1/push snippets: + # Only ingest pod logs from namespace onelab (Explore / Loki stay focused on OneLab). + extraRelabelConfigs: + - action: keep + source_labels: + - __meta_kubernetes_namespace + regex: onelab extraScrapeConfigs: | - job_name: onelab-host-log-files static_configs: @@ -76,6 +82,8 @@ promtail: - localhost labels: job: onelab-files + namespace: onelab + component: host-logs __path__: /onelab-host-logs/**/* extraVolumes: - name: onelab-host-logs @@ -101,6 +109,17 @@ grafanaOnelabIngress: grafana: adminUser: admin adminPassword: changeme + # Load dashboards from ConfigMaps labeled grafana_dashboard (see templates/configmap-dashboard-onelab-logs.yaml). + sidecar: + dashboards: + enabled: true + label: grafana_dashboard + folder: /tmp/dashboards + provider: + foldersFromFilesStructure: false + allowUiUpdates: true + datasources: + enabled: false persistence: enabled: true size: 2Gi