From 3e5dfaa1cbf9ed892ae9a149dec4e6a33476665f Mon Sep 17 00:00:00 2001 From: timotheereausanofi Date: Fri, 20 Mar 2026 12:06:20 +0100 Subject: [PATCH] docs(gitops): single Argo CD README, remove redundant docs Made-with: Cursor --- gitops/README.md | 225 ++++++++++++++---- gitops/argocd/README.md | 27 --- gitops/argocd/application.yaml | 2 +- gitops/docs/BOOTSTRAP.md | 57 ----- gitops/docs/OBSERVABILITY.md | 62 ----- gitops/observability/README.md | 7 - gitops/values/instance-overrides.example.yaml | 11 +- 7 files changed, 190 insertions(+), 201 deletions(-) delete mode 100644 gitops/argocd/README.md delete mode 100644 gitops/docs/BOOTSTRAP.md delete mode 100644 gitops/docs/OBSERVABILITY.md delete mode 100644 gitops/observability/README.md diff --git a/gitops/README.md b/gitops/README.md index e657534..f184894 100644 --- a/gitops/README.md +++ b/gitops/README.md @@ -1,78 +1,217 @@ -# OneLab GitOps (k3s + Argo CD) +# OneLab GitOps (Argo CD) -This directory holds the **Helm chart** that replaces `docker stack deploy` from the legacy Swarm installer (`app/docker-compose.yml`). +This directory is the **declarative source** for OneLab on Kubernetes. Argo CD applies two **Helm-based sources** from Git (Argo invokes Helm internally; you do not run a separate Helm install workflow). + +Legacy Swarm install lives under [`app/`](../app/) (`docker-compose.yml`); this tree replaces `docker stack deploy` for k3s/Kubernetes. ## Layout | Path | Purpose | |------|---------| -| `charts/onelab` | Helm chart (StatefulSets, Deployments, Services, ConfigMaps, Secrets) | -| `values/*.yaml` | Environment-specific overrides (non-secret defaults; use sealed/external secrets for prod) | -| `argocd/application.yaml` | `Application` (multi-source): OneLab chart + [`observability/`](observability/) (Loki/Promtail/Grafana) | -| `observability/` | Umbrella Helm chart for log aggregation (same Argo app, release `onelab-obs`) | +| [`charts/onelab`](charts/onelab) | OneLab chart (StatefulSets, Deployments, Services, ConfigMaps, Secrets) — **Argo source 1** | +| [`values/`](values/) | Environment values (e.g. [`values/k3s-example.yaml`](values/k3s-example.yaml)); reference from `helm.valueFiles` | +| [`observability/`](observability/) | Loki / Promtail / Grafana umbrella chart — **Argo source 2** (`releaseName: onelab-obs`) | +| [`argocd/application.yaml`](argocd/application.yaml) | `Application` manifest (`spec.sources`, namespace `onelab`) | +| [`argocd/jsonpatch-multisource.json`](argocd/jsonpatch-multisource.json) | One-time JSON patch if the live `Application` stuck on `spec.source` | ## Prerequisites -1. **k3s** (or any Kubernetes) with default storage class for Postgres/Rabbit PVCs (e.g. `local-path`). -2. **Image pull access** to `hub.andrewalliance.com` — create a docker-registry secret and reference it in `imagePullSecrets`: - ```bash - kubectl create namespace onelab - kubectl create secret docker-registry hub-andrewalliance -n onelab \ - --docker-server=hub.andrewalliance.com --docker-username=... --docker-password=... - ``` -3. **RabbitMQ TLS secret** (name `onelab-rabbit-tls` by default) — see `values/k3s-example.yaml` comments, or set `rabbitmq.tls.embed: true` with PEM strings in a **private** values file. -4. **Host paths** (default): ensure `/opt/onelab/data` and `/opt/onelab/logs` exist on nodes that run workloads using `persistence.mode: hostPath`, or switch to RWX storage for multi-node. +1. **Kubernetes** (e.g. k3s) with a default **StorageClass** for Postgres/Rabbit PVCs (e.g. `local-path`). +2. **Image pull** to `hub.andrewalliance.com` — registry Secret + `imagePullSecrets` (see [`values/k3s-example.yaml`](values/k3s-example.yaml) and [Private registry credentials](#private-registry-credentials)). +3. **RabbitMQ TLS** Secret `onelab-rabbit-tls` (or `rabbitmq.tls.embed` in a private values file) — [RabbitMQ TLS](#rabbitmq-tls). +4. **Host paths** when using `persistence.mode: hostPath`: `/opt/onelab/data` and `/opt/onelab/logs` on nodes that run those pods, or use RWX storage for multi-node. -## Helm (without Argo CD) +## Bootstrap (registry, Argo repo, TLS) + +### Private registry credentials + +By default, `gitops/values/k3s-example.yaml` matches the Swarm installer (`app/playbooks/tasks/manage-images.yml`): user **`public`**, password **`Andrew01..Release`**, and the chart creates Secret **`hub-andrewalliance`** when `registry.createPullSecret: true`. + +To use other credentials, override `registry.username` / `registry.password` or create the secret manually: ```bash -cd gitops/charts/onelab -helm upgrade --install onelab . -n onelab --create-namespace \ - -f ../../values/k3s-example.yaml +kubectl create secret docker-registry hub-andrewalliance -n onelab \ + --docker-server=hub.andrewalliance.com \ + --docker-username='YOUR_USER' \ + --docker-password='YOUR_PASSWORD' ``` -## Argo CD +…and set `registry.createPullSecret: false` plus `imagePullSecrets: [{ name: hub-andrewalliance }]`. -1. Push this repository to a Git remote Argo CD can read. -2. Edit `argocd/application.yaml`: `repoURL`, `targetRevision`, and values file as needed. -3. `kubectl apply -f gitops/argocd/application.yaml` (from a machine with a working kubeconfig). +#### StatefulSet pods still get `401 Unauthorized` / `ImagePullBackOff` after enabling registry auth -The Application uses **`spec.sources`** (Argo CD 2.6+): source 1 is the OneLab chart (`releaseName: onelab`), source 2 is [`observability/`](observability/) (`releaseName: onelab-obs`). Both deploy to namespace **`onelab`**. +If `db-0` / `rabbitmq-0` were created **before** `imagePullSecrets` existed, their **Pod** spec can still use anonymous pulls until they are recreated: -Sync waves order Postgres → Redis/Rabbit/config → application pods. +```bash +kubectl delete pod -n onelab db-0 rabbitmq-0 +``` + +The chart adds a pod-template checksum so after you change registry settings in Git and **Argo syncs**, workloads normally roll; a one-time delete is enough if pods were created before pull secrets existed. + +### Argo CD private Git repository + +If the Application shows `authentication required: Unauthorized`, register the repo in Argo CD (CLI or UI): + +```bash +# Example; use a deploy token or PAT with repo read access +argocd repo add https://git.luneski.fr/luneski/onelab-k8s.git \ + --username git \ + --password YOUR_TOKEN +``` + +Then apply the Application: + +```bash +kubectl apply -f gitops/argocd/application.yaml +``` + +**Single controller:** Use **only** this Argo CD `Application` for `onelab` / `onelab-obs`. Do not manage the same namespace with a separate **Helm CLI** release. + +### RabbitMQ TLS + +Secret `onelab-rabbit-tls` must exist before RabbitMQ starts (created once from `app/rabbit/ssl/` or your own PEMs). + +### Argo CD version and observability stack + +[`argocd/application.yaml`](argocd/application.yaml) uses **`spec.sources`** (two Helm charts in one Application). Use **Argo CD 2.6 or newer**. + +If the `onelab` Application was created earlier with **`spec.source` only**, Argo will **not** show the observability resources until you remove `source` and set `sources` — see [Migrating `spec.source` → `spec.sources`](#migrating-specsource--specsources) below. + +The second source installs Loki/Promtail/Grafana from [`observability/`](observability/) (`releaseName: onelab-obs`). Set a strong **`grafana.adminPassword`** in [`observability/values.yaml`](observability/values.yaml) before production — details in [Observability](#observability-loki--promtail--grafana). + +## Deploy with Argo CD + +1. Push this repo to a Git remote Argo CD can read. +2. Register the repo in Argo CD (CLI or UI) if it is private — [Argo CD private Git repository](#argo-cd-private-git-repository). +3. Edit [`argocd/application.yaml`](argocd/application.yaml): `repoURL`, `targetRevision`, and per-source `helm.valueFiles` if needed. +4. Apply the Application: + + ```bash + kubectl apply -f gitops/argocd/application.yaml + ``` + +**Requirements:** Argo CD **2.6+** (`spec.sources`). + +Each entry under `spec.sources` has its own `helm.releaseName` and `helm.valueFiles` (paths are **relative to that source’s `path`**): + +- Source `gitops/charts/onelab` → e.g. `../../values/k3s-example.yaml` +- Source `gitops/observability` → e.g. `values.yaml` + +Both targets deploy into namespace **`onelab`**. Sync waves order: Postgres → Redis/Rabbit/config → application workloads. + +### Migrating `spec.source` → `spec.sources` + +If the `onelab` `Application` was created earlier with **`spec.source` only**, a plain `kubectl apply` of the new file may **not** remove `spec.source`, and Argo will never reconcile the observability chart. + +Check: + +```bash +kubectl get application onelab -n argocd -o jsonpath='{.spec.source}{"\n"}{.spec.sources}{"\n"}' +``` + +If `source` is set and `sources` is empty, patch once (adjust `repoURL` in the patch file if needed): + +```bash +kubectl patch application onelab -n argocd --type json --patch-file gitops/argocd/jsonpatch-multisource.json +``` + +Then sync in Argo (or wait for auto-sync). + +### Single controller + +Manage these workloads **only** through this Argo CD `Application`. Do not drive the same resources with a parallel **Helm CLI** release. ### Logs / Grafana -See [docs/OBSERVABILITY.md](docs/OBSERVABILITY.md). Change `grafana.adminPassword` in `observability/values.yaml` before relying on it in production. +See [Observability (Loki / Promtail / Grafana)](#observability-loki--promtail--grafana) — set a strong `grafana.adminPassword` in [`observability/values.yaml`](observability/values.yaml) before production. + +## Observability (Loki / Promtail / Grafana) + +The umbrella chart under [`observability/`](observability/) deploys: + +- **Loki** — log storage (SingleBinary, filesystem PVC, 7-day retention by default). +- **Promtail** — DaemonSet: Kubernetes pod logs (`/var/log/pods`) plus **OneLab file logs** from the same host path the app chart uses (`/opt/onelab/logs` by default). +- **Grafana** — explore logs; datasource points at this release’s Loki gateway. + +It is synced by the **same** Argo CD Application as the OneLab chart ([`argocd/application.yaml`](argocd/application.yaml)): second `sources` entry, Argo **`helm.releaseName`** **`onelab-obs`** (so services are like `onelab-obs-loki-gateway`). + +### First-time setup + +1. **Change the Grafana admin password** in [`observability/values.yaml`](observability/values.yaml) (`grafana.adminPassword`) or switch to `admin.existingSecret` per the upstream Grafana chart. +2. **Align host paths** — if you change `persistence.hostPath.logs` for OneLab, update `promtail.extraVolumes` / `extraVolumeMounts` in the same `values.yaml` so Promtail still reads the shared log directory. +3. **Multi-node** — with `hostPath` logs, each node only sees its own files; Promtail runs on every node, so you still get coverage when pods move. + +### OneLab-only ingestion + +Promtail adds **`extraRelabelConfigs`** so the **kubernetes-pods** job **keeps only** pods in namespace **`onelab`**. Other namespaces no longer reach Loki (Explore only sees OneLab). Host file logs under `/opt/onelab/logs` are tagged with **`namespace: onelab`** and **`component: host-logs`** so they appear in the same queries. + +Existing Loki data from before this change may still show non-`onelab` streams until **retention** drops them; for a clean index you would need to wipe the Loki PVC (destructive). + +### Dashboard: **OneLab logs** + +Grafana’s **dashboard sidecar** loads ConfigMap **`…-dashboard-onelab-logs`** (JSON: `observability/dashboards/onelab-logs.json`). Open **Dashboards → OneLab logs** (`uid` `onelab-logs`): + +- **Component** — multi-select from `label_values({namespace="onelab"}, component)` (includes **`host-logs`** for file logs). +- **Line filter** — regex applied to log line content (`.*` = all). +- Stat panels: total lines, heuristic **error** / **warning** counts (tuned for typical text logs, not strict JSON parsing). + +#### Grafana pod: `init-chown-data` CrashLoopBackOff + +The upstream chart runs an init container as **root** to `chown` `/var/lib/grafana`. Clusters with **Pod Security Admission** (often on k3s) commonly block that. This repo sets **`grafana.initChownData.enabled: false`**; the Grafana pod keeps **`fsGroup: 472`** so the PVC is usually group-writable. If Grafana still cannot write to disk, delete the Grafana PVC once after the change or relax PSA for namespace `onelab`. + +### Access Grafana + +An **Ingress** named **`grafana-onelab`** is created by the umbrella chart (`observability/templates/ingress-grafana-onelab.yaml`), Traefik + cert-manager, matching the OneLab web UI pattern in `gitops/values/k3s-example.yaml`: + +- Host: **`grafana.k8s.selair.it`** — edit `grafanaOnelabIngress` and `grafana.ini.server` in `gitops/observability/values.yaml` together. +- TLS Secret: **`grafana-tls-k8s-selair`** (cert-manager with `letsencrypt-prod`). + +Point DNS at your ingress, sync the app, then open `https:///` (user `admin` until you change values). + +For debugging without DNS: + +```bash +kubectl -n onelab port-forward svc/onelab-obs-grafana 3000:80 +``` + +### Maintainers: vendored chart dependencies + +The observability umbrella vendors upstream charts under `gitops/observability/charts/*.tgz` so **Argo CD** can render without relying on live Helm repo access at sync time. + +When bumping Loki / Promtail / Grafana versions, from `gitops/observability/` run: + +```bash +helm dependency update +``` + +Commit the updated `Chart.lock` and `charts/*.tgz` with your Git change. This is **repository packaging**, not an alternative install path — deploy still happens only via Argo CD. + +### OneLab `logs.path` + +The OneLab chart sets `onelab.logs.path: "/logs"` in the generated configuration so application file logs match the `/logs` volume mount (see Enterprise guide §7.2). ## kubectl / credentials -If `kubectl` reports *You must be logged in*, refresh your kubeconfig (e.g. copy `/etc/rancher/k3s/k3s.yaml` from the server or re-run your auth plugin) before applying manifests. - -## Private Git + registry - -See [docs/BOOTSTRAP.md](docs/BOOTSTRAP.md) for Argo CD access to `git.luneski.fr` and `docker-registry` for `hub.andrewalliance.com`. - -## Helm note (Windows) - -Helm 3.19 may return empty content for `.Files.Get` on Windows; this chart uses `fromYaml (.Files.AsConfig)` as a workaround so packaged files still render correctly. +If `kubectl` reports *You must be logged in*, refresh your kubeconfig (e.g. k3s `/etc/rancher/k3s/k3s.yaml` on the server or your auth plugin) before applying manifests. ## Application configuration (`configurations.yml`) -Do **not** need to edit `app/configurations.yml` in Git for Kubernetes. The chart builds `configurations.yml` from `charts/onelab/files/configurations.gotmpl` and stores it in Secret **`onelab-configurations`** (mounted by app pods and `ldap-worker`). +You do not need to edit [`app/configurations.yml`](../app/configurations.yml) in Git for Kubernetes. The chart renders `configurations.yml` from [`charts/onelab/files/configurations.gotmpl`](charts/onelab/files/configurations.gotmpl) into Secret **`onelab-configurations`**. -1. **Values (recommended)** — set `onelab.compliance.enabled`, `onelab.ldap.enabled`, and related fields. See `values/instance-overrides.example.yaml`. Point Helm/Argo at an extra values file for your site (Argo: add another path under `spec.source.helm.valueFiles`, relative to the chart directory). -2. **Bring your own Secret** — set `configuration.existingSecretName` to a Secret you manage (SealedSecrets, External Secrets, `kubectl create secret ... --from-file=configurations.yml=...`). The chart will **not** create `onelab-configurations` in that case; the Secret must contain key **`configurations.yml`**. +1. **Values (recommended)** — set `onelab.compliance`, `onelab.ldap`, etc. See [`values/instance-overrides.example.yaml`](values/instance-overrides.example.yaml). Add extra paths under **`spec.sources[].helm.valueFiles`** for the `gitops/charts/onelab` source (paths relative to `gitops/charts/onelab`). +2. **Bring your own Secret** — set `configuration.existingSecretName`; the Secret must contain key **`configurations.yml`**. -A **ConfigMap** alone is fine if you mount it yourself, but this chart expects a **Secret** for the config file (same as Swarm-style sensitivity). LDAP TLS file paths in values are container paths; mount PEMs with extra volumes on `ldap-worker` if you use them. +LDAP TLS paths in values are container paths; mount PEMs on `ldap-worker` if required. ## Ingress (web UI) -Enable `ingress.enabled` and set `ingress.host` (and optional TLS). Traffic is sent to Service **`revproxy`** (internal nginx). On k3s, `ingress.className: traefik` matches the default controller. +Set `ingress.enabled`, `ingress.host`, and optional TLS in values. Traffic goes to Service **`revproxy`**. On k3s, `ingress.className: traefik` matches the default controller. For cert-manager, set `ingress.tls`, `ingress.tlsSecretName`, and `ingress.certManager.clusterIssuer`; DNS for `ingress.host` must resolve before ACME runs. -For **cert-manager**, set `ingress.tls: true`, `ingress.tlsSecretName`, and `ingress.certManager.clusterIssuer` (e.g. `letsencrypt-prod`). Ensure a **DNS A/CNAME** for `ingress.host` points to your ingress before the ACME challenge runs. +## Developer note (local render) + +Running **`helm template` on Windows** against some paths can return empty `.Files.Get` content; the OneLab chart uses `fromYaml (.Files.AsConfig)` where needed. **Argo CD runs on Linux** and renders the same charts in-cluster — this is a local-tooling caveat, not a second deploy path. ## Not migrated in this chart -- **Edge proxy stack** (`app/proxy/docker-compose.yml`, host 80/443 Swarm mode) — replaced for K8s by this **Ingress** + `revproxy`; optional **cert-manager** for TLS at the Ingress. -- **Swarm-only secrets** (e.g. `ssl_passphrase`) — handle via Kubernetes Secrets or external operators. +- **Edge proxy stack** (`app/proxy/docker-compose.yml`, host 80/443 Swarm) — use **Ingress** + `revproxy` and optional cert-manager. +- **Swarm-only secrets** (e.g. `ssl_passphrase`) — use Kubernetes Secrets or external operators. diff --git a/gitops/argocd/README.md b/gitops/argocd/README.md deleted file mode 100644 index 7de2c60..0000000 --- a/gitops/argocd/README.md +++ /dev/null @@ -1,27 +0,0 @@ -# Argo CD Application - -Apply the Application: - -```bash -kubectl apply -f gitops/argocd/application.yaml -``` - -## Migrating from `spec.source` to `spec.sources` - -If the `onelab` Application was created **before** the observability stack, the live object may still have **`spec.source`** only. A plain `kubectl apply` of the new manifest often **does not remove** `spec.source`, so Argo never reconciles the second chart (Loki/Promtail/Grafana). - -**Check:** - -```bash -kubectl get application onelab -n argocd -o jsonpath='{.spec.source}{"\n"}{.spec.sources}{"\n"}' -``` - -If `source` is set and `sources` is empty, patch once: - -```bash -kubectl patch application onelab -n argocd --type json --patch-file gitops/argocd/jsonpatch-multisource.json -``` - -Then sync the app in Argo (or wait for auto-sync). - -Adjust `repoURL` in `jsonpatch-multisource.json` if your remote differs. diff --git a/gitops/argocd/application.yaml b/gitops/argocd/application.yaml index 778669d..72f8684 100644 --- a/gitops/argocd/application.yaml +++ b/gitops/argocd/application.yaml @@ -2,7 +2,7 @@ # Requires Argo CD 2.6+ (spec.sources). Ensure repoURL matches your remote. # # If you already had this Application with spec.source only, kubectl apply may not drop -# source — see README.md in this folder and jsonpatch-multisource.json. +# source — see gitops/README.md (Migrating spec.source → spec.sources) and jsonpatch-multisource.json. apiVersion: argoproj.io/v1alpha1 kind: Application metadata: diff --git a/gitops/docs/BOOTSTRAP.md b/gitops/docs/BOOTSTRAP.md deleted file mode 100644 index cb0f355..0000000 --- a/gitops/docs/BOOTSTRAP.md +++ /dev/null @@ -1,57 +0,0 @@ -# Bootstrap OneLab on this cluster - -## 1. Private registry (`hub.andrewalliance.com`) - -By default, `gitops/values/k3s-example.yaml` matches the Swarm installer (`app/playbooks/tasks/manage-images.yml`): user **`public`**, password **`Andrew01..Release`**, and the chart creates Secret **`hub-andrewalliance`** when `registry.createPullSecret: true`. - -To use other credentials, override `registry.username` / `registry.password` or create the secret manually: - -```bash -kubectl create secret docker-registry hub-andrewalliance -n onelab \ - --docker-server=hub.andrewalliance.com \ - --docker-username='YOUR_USER' \ - --docker-password='YOUR_PASSWORD' -``` - -…and set `registry.createPullSecret: false` plus `imagePullSecrets: [{ name: hub-andrewalliance }]`. - -### StatefulSet pods still get `401 Unauthorized` / `ImagePullBackOff` after enabling registry auth - -If `db-0` / `rabbitmq-0` were created **before** `imagePullSecrets` existed, their **Pod** spec can still use anonymous pulls until they are recreated: - -```bash -kubectl delete pod -n onelab db-0 rabbitmq-0 -``` - -The chart adds a pod-template checksum so a `helm upgrade` after changing registry credentials normally rolls these pods; a one-time delete is enough if you toggled pull secrets outside that path. - -## 2. Argo CD + private Git (`git.luneski.fr`) - -If the Application shows `authentication required: Unauthorized`, register the repo in Argo CD (CLI or UI): - -```bash -# Example; use a deploy token or PAT with repo read access -argocd repo add https://git.luneski.fr/luneski/onelab-k8s.git \ - --username git \ - --password YOUR_TOKEN -``` - -Then apply the Application: - -```bash -kubectl apply -f gitops/argocd/application.yaml -``` - -**Helm vs Argo:** If you already installed with `helm upgrade --install onelab ...`, either delete that Helm release before letting Argo manage the same resources, or keep Helm-only and do not apply the Application until you choose one controller. - -## 3. RabbitMQ TLS - -Secret `onelab-rabbit-tls` must exist before RabbitMQ starts (created once from `app/rabbit/ssl/` or your own PEMs). - -## 4. Argo CD version + observability stack - -`gitops/argocd/application.yaml` uses **`spec.sources`** (two Helm charts in one Application). Use **Argo CD 2.6 or newer**. - -If the `onelab` Application was created earlier with **`spec.source` only**, Argo will **not** show the observability resources until you remove `source` and set `sources` (a plain `kubectl apply` often leaves the old field). Use [`gitops/argocd/jsonpatch-multisource.json`](../argocd/jsonpatch-multisource.json) as documented in [`gitops/argocd/README.md`](../argocd/README.md). - -The second source installs Loki/Promtail/Grafana from `gitops/observability/` (`releaseName: onelab-obs`). Set a strong **`grafana.adminPassword`** in `gitops/observability/values.yaml` before production. Details: [OBSERVABILITY.md](OBSERVABILITY.md). diff --git a/gitops/docs/OBSERVABILITY.md b/gitops/docs/OBSERVABILITY.md deleted file mode 100644 index e377b6b..0000000 --- a/gitops/docs/OBSERVABILITY.md +++ /dev/null @@ -1,62 +0,0 @@ -# Observability (Loki / Promtail / Grafana) - -The umbrella chart under [`gitops/observability/`](../observability/) deploys: - -- **Loki** — log storage (SingleBinary, filesystem PVC, 7-day retention by default). -- **Promtail** — DaemonSet: Kubernetes pod logs (`/var/log/pods`) plus **OneLab file logs** from the same host path the app chart uses (`/opt/onelab/logs` by default). -- **Grafana** — explore logs; datasource points at this release’s Loki gateway. - -It is synced by the **same** Argo CD Application as the OneLab chart ([`gitops/argocd/application.yaml`](../argocd/application.yaml)): second `sources` entry, Helm release name **`onelab-obs`** (so services are like `onelab-obs-loki-gateway`). - -## First-time setup - -1. **Change the Grafana admin password** in [`gitops/observability/values.yaml`](../observability/values.yaml) (`grafana.adminPassword`) or switch to `admin.existingSecret` per the upstream Grafana chart. -2. **Align host paths** — if you change `persistence.hostPath.logs` for OneLab, update `promtail.extraVolumes` / `extraVolumeMounts` in the same `values.yaml` so Promtail still reads the shared log directory. -3. **Multi-node** — with `hostPath` logs, each node only sees its own files; Promtail runs on every node, so you still get coverage when pods move. - -## OneLab-only ingestion - -Promtail adds **`extraRelabelConfigs`** so the **kubernetes-pods** job **keeps only** pods in namespace **`onelab`**. Other namespaces no longer reach Loki (Explore only sees OneLab). Host file logs under `/opt/onelab/logs` are tagged with **`namespace: onelab`** and **`component: host-logs`** so they appear in the same queries. - -Existing Loki data from before this change may still show non-`onelab` streams until **retention** drops them; for a clean index you would need to wipe the Loki PVC (destructive). - -## Dashboard: **OneLab logs** - -Grafana’s **dashboard sidecar** loads ConfigMap **`…-dashboard-onelab-logs`** (JSON: `dashboards/onelab-logs.json`). Open **Dashboards → OneLab logs** (`uid` `onelab-logs`): - -- **Component** — multi-select from `label_values({namespace="onelab"}, component)` (includes **`host-logs`** for file logs). -- **Line filter** — regex applied to log line content (`.*` = all). -- Stat panels: total lines, heuristic **error** / **warning** counts (tuned for typical text logs, not strict JSON parsing). - -### Grafana pod: `init-chown-data` CrashLoopBackOff - -The upstream chart runs an init container as **root** to `chown` `/var/lib/grafana`. Clusters with **Pod Security Admission** (often on k3s) commonly block that. This repo sets **`grafana.initChownData.enabled: false`**; the Grafana pod keeps **`fsGroup: 472`** so the PVC is usually group-writable. If Grafana still cannot write to disk, delete the Grafana PVC once after the change or relax PSA for namespace `onelab`. - -## Access Grafana - -An **Ingress** named **`grafana-onelab`** is created by the umbrella chart (`templates/ingress-grafana-onelab.yaml`), Traefik + cert-manager, matching the OneLab web UI pattern in `gitops/values/k3s-example.yaml`: - -- Host: **`grafana.k8s.selair.it`** — edit `grafanaOnelabIngress` and `grafana.ini.server` in `gitops/observability/values.yaml` together. -- TLS Secret: **`grafana-tls-k8s-selair`** (cert-manager with `letsencrypt-prod`). - -Point DNS at your ingress, sync the app, then open `https:///` (user `admin` until you change values). - -For debugging without DNS: - -```bash -kubectl -n onelab port-forward svc/onelab-obs-grafana 3000:80 -``` - -## Upgrading chart dependencies - -From `gitops/observability/`: - -```bash -helm dependency update -``` - -Commit updated `Chart.lock` and `charts/*.tgz` if you want Argo to render without calling remote Helm repos at sync time. - -## OneLab `logs.path` - -The OneLab chart now sets `onelab.logs.path: "/logs"` in the generated configuration so application file logs match the `/logs` volume mount (see Enterprise guide §7.2). diff --git a/gitops/observability/README.md b/gitops/observability/README.md deleted file mode 100644 index 2904905..0000000 --- a/gitops/observability/README.md +++ /dev/null @@ -1,7 +0,0 @@ -# OneLab observability (Helm umbrella) - -Loki + Promtail + Grafana dependencies are pinned in `Chart.lock`; packaged charts live in `charts/*.tgz`. - -Deployed by Argo CD as the second `sources` entry in `gitops/argocd/application.yaml` with **`releaseName: onelab-obs`**. - -See [../docs/OBSERVABILITY.md](../docs/OBSERVABILITY.md) for operations and security notes. diff --git a/gitops/values/instance-overrides.example.yaml b/gitops/values/instance-overrides.example.yaml index c07207a..6c4eed6 100644 --- a/gitops/values/instance-overrides.example.yaml +++ b/gitops/values/instance-overrides.example.yaml @@ -1,6 +1,9 @@ -# Copy to a private file (e.g. gitops/values/private-k3s.yaml, gitignored) or merge into your env values. -# Reference from Helm: -f ../../values/k3s-example.yaml -f ../../values/private-k3s.yaml -# Argo CD: add a second entry under helm.valueFiles (paths relative to chart path). +# Copy to a private file (e.g. gitops/values/private-k3s.yaml, gitignored) or merge into gitops/values/k3s-example.yaml. +# +# Argo CD: under spec.sources, for the source with path gitops/charts/onelab, add another path to helm.valueFiles +# (paths are relative to that chart directory), e.g.: +# - ../../values/k3s-example.yaml +# - ../../values/private-k3s.yaml onelab: compliance: @@ -26,7 +29,7 @@ onelab: # tlsCiphers: "" # tlsSslVersion: "" -# Alternative: supply the full YAML yourself (no Helm templating of compliance/LDAP blocks). +# Alternative: supply the full YAML yourself (bypasses chart templates in configurations.gotmpl for those keys). # 1. kubectl create secret generic onelab-configurations-custom -n onelab \ # --from-file=configurations.yml=./my-configurations.yml # 2. Set in values: