From c0254f1bb48448fa451ad08a1c50eb38f3fefb17 Mon Sep 17 00:00:00 2001 From: Tine Date: Sat, 27 Jul 2024 16:08:21 +0200 Subject: [PATCH] feat: monitoring fixes --- k8s.tjo.cloud/main.tf | 8 +- .../modules}/cluster-components/dashboard.tf | 0 .../modules}/cluster-components/gateway.tf | 0 .../modules}/cluster-components/main.tf | 0 .../modules}/cluster-components/variables.tf | 0 .../modules}/cluster-components/versions.tf | 0 .../modules}/cluster-core/main.tf | 0 .../manifests/crd-podmonitors.yaml | 0 .../manifests/crd-servicemonitors.yaml | 0 .../modules}/cluster-core/monitoring.tf | 76 ++++++++++++------- .../modules/cluster-core/variables.tf | 4 + .../modules}/cluster-core/versions.tf | 0 .../modules}/cluster/components.tf | 0 .../modules}/cluster/kubeconfig.tftpl | 0 .../modules}/cluster/main.tf | 0 .../cluster/manifests/gateway-api.crds.yaml | 0 .../modules}/cluster/outputs.tf | 0 .../modules}/cluster/proxmox.tf | 0 .../modules}/cluster/variables.tf | 0 .../modules}/cluster/versions.tf | 0 modules/cluster-core/variables.tf | 1 - 21 files changed, 57 insertions(+), 32 deletions(-) rename {modules => k8s.tjo.cloud/modules}/cluster-components/dashboard.tf (100%) rename {modules => k8s.tjo.cloud/modules}/cluster-components/gateway.tf (100%) rename {modules => k8s.tjo.cloud/modules}/cluster-components/main.tf (100%) rename {modules => k8s.tjo.cloud/modules}/cluster-components/variables.tf (100%) rename {modules => k8s.tjo.cloud/modules}/cluster-components/versions.tf (100%) rename {modules => k8s.tjo.cloud/modules}/cluster-core/main.tf (100%) rename {modules => k8s.tjo.cloud/modules}/cluster-core/manifests/crd-podmonitors.yaml (100%) rename {modules => k8s.tjo.cloud/modules}/cluster-core/manifests/crd-servicemonitors.yaml (100%) rename {modules => k8s.tjo.cloud/modules}/cluster-core/monitoring.tf (85%) create mode 100644 k8s.tjo.cloud/modules/cluster-core/variables.tf rename {modules => k8s.tjo.cloud/modules}/cluster-core/versions.tf (100%) rename {modules => k8s.tjo.cloud/modules}/cluster/components.tf (100%) rename {modules => k8s.tjo.cloud/modules}/cluster/kubeconfig.tftpl (100%) rename {modules => k8s.tjo.cloud/modules}/cluster/main.tf (100%) rename {modules => k8s.tjo.cloud/modules}/cluster/manifests/gateway-api.crds.yaml (100%) rename {modules => k8s.tjo.cloud/modules}/cluster/outputs.tf (100%) rename {modules => k8s.tjo.cloud/modules}/cluster/proxmox.tf (100%) rename {modules => k8s.tjo.cloud/modules}/cluster/variables.tf (100%) rename {modules => k8s.tjo.cloud/modules}/cluster/versions.tf (100%) delete mode 100644 modules/cluster-core/variables.tf diff --git a/k8s.tjo.cloud/main.tf b/k8s.tjo.cloud/main.tf index acefacf..aac3f61 100644 --- a/k8s.tjo.cloud/main.tf +++ b/k8s.tjo.cloud/main.tf @@ -1,5 +1,5 @@ module "cluster" { - source = "../modules/cluster" + source = "./modules/cluster" providers = { helm.template = helm.template @@ -61,11 +61,13 @@ resource "local_file" "kubeconfig" { } module "cluster-core" { - source = "../modules/cluster-core" + source = "./modules/cluster-core" + + cluster_name = module.cluster.name } module "cluster-components" { - source = "../modules/cluster-components" + source = "./modules/cluster-components" oidc_issuer_url = var.oidc_issuer_url oidc_client_id = var.oidc_client_id diff --git a/modules/cluster-components/dashboard.tf b/k8s.tjo.cloud/modules/cluster-components/dashboard.tf similarity index 100% rename from modules/cluster-components/dashboard.tf rename to k8s.tjo.cloud/modules/cluster-components/dashboard.tf diff --git a/modules/cluster-components/gateway.tf b/k8s.tjo.cloud/modules/cluster-components/gateway.tf similarity index 100% rename from modules/cluster-components/gateway.tf rename to k8s.tjo.cloud/modules/cluster-components/gateway.tf diff --git a/modules/cluster-components/main.tf b/k8s.tjo.cloud/modules/cluster-components/main.tf similarity index 100% rename from modules/cluster-components/main.tf rename to k8s.tjo.cloud/modules/cluster-components/main.tf diff --git a/modules/cluster-components/variables.tf b/k8s.tjo.cloud/modules/cluster-components/variables.tf similarity index 100% rename from modules/cluster-components/variables.tf rename to k8s.tjo.cloud/modules/cluster-components/variables.tf diff --git a/modules/cluster-components/versions.tf b/k8s.tjo.cloud/modules/cluster-components/versions.tf similarity index 100% rename from modules/cluster-components/versions.tf rename to k8s.tjo.cloud/modules/cluster-components/versions.tf diff --git a/modules/cluster-core/main.tf b/k8s.tjo.cloud/modules/cluster-core/main.tf similarity index 100% rename from modules/cluster-core/main.tf rename to k8s.tjo.cloud/modules/cluster-core/main.tf diff --git a/modules/cluster-core/manifests/crd-podmonitors.yaml b/k8s.tjo.cloud/modules/cluster-core/manifests/crd-podmonitors.yaml similarity index 100% rename from modules/cluster-core/manifests/crd-podmonitors.yaml rename to k8s.tjo.cloud/modules/cluster-core/manifests/crd-podmonitors.yaml diff --git a/modules/cluster-core/manifests/crd-servicemonitors.yaml b/k8s.tjo.cloud/modules/cluster-core/manifests/crd-servicemonitors.yaml similarity index 100% rename from modules/cluster-core/manifests/crd-servicemonitors.yaml rename to k8s.tjo.cloud/modules/cluster-core/manifests/crd-servicemonitors.yaml diff --git a/modules/cluster-core/monitoring.tf b/k8s.tjo.cloud/modules/cluster-core/monitoring.tf similarity index 85% rename from modules/cluster-core/monitoring.tf rename to k8s.tjo.cloud/modules/cluster-core/monitoring.tf index d25a182..20cfbdb 100644 --- a/modules/cluster-core/monitoring.tf +++ b/k8s.tjo.cloud/modules/cluster-core/monitoring.tf @@ -28,7 +28,9 @@ resource "helm_release" "kube-state-metrics" { node-role.kubernetes.io/control-plane: "" tolerations: - key: "node-role.kubernetes.io/control-plane" - effect: "NoSchedule" + effect: NoSchedule + updateStrategy: Recreate + prometheusScrape: false prometheus: monitor: enabled: true @@ -62,7 +64,7 @@ resource "helm_release" "grafana-alloy" { alloy: extraEnv: - name: "CLUSTER_NAME" - value: "tjo-cloud" + value: "${var.cluster_name}" - name: "PROMETHEUS_CLIENT_ID" value: "o6Tz2215HLvhvZ4RCZCR8oMmCapTu30iwkoMkz6m" - name: "LOKI_CLIENT_ID" @@ -80,18 +82,22 @@ resource "helm_release" "grafana-alloy" { discovery.kubernetes "pods" { role = "pod" } - discovery.kubernetes "services" { - role = "services" - } discovery.relabel "all" { - targets = concat(discovery.kubernetes.pods.targets, discovery.kubernetes.services.targets) + targets = discovery.kubernetes.pods.targets + // Only process if scrape enabled + rule { + source_labels = [ + "__meta_kubernetes_pod_annotation_prometheus_io_scrape", + ] + action = "keep" + regex = "true" + } // allow override of http scheme with `promehteus.io/scheme` rule { action = "replace" regex = "(https?)" source_labels = [ - "__meta_kubernetes_service_annotation_prometheus_io_scheme", "__meta_kubernetes_pod_annotation_prometheus_io_scheme", ] target_label = "__scheme__" @@ -99,43 +105,42 @@ resource "helm_release" "grafana-alloy" { // allow override of default /metrics path with `prometheus.io/path` rule { action = "replace" - regex = "(.+)" source_labels = [ - "__meta_kubernetes_service_annotation_prometheus_io_path", "__meta_kubernetes_pod_annotation_prometheus_io_path", ] target_label = "__metrics_path__" } // allow override of default port with `prometheus.io/port` + // If the metrics port number annotation has a value, override the target address to use it, regardless whether it is + // one of the declared ports on that Pod. + rule { + source_labels = [ + "__meta_kubernetes_pod_annotation_prometheus_io_port", + "__meta_kubernetes_pod_ip", + ] + regex = "(\\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})" + replacement = "[$2]:$1" // IPv6 + target_label = "__address__" + } + rule { + source_labels = [ + "__meta_kubernetes_pod_annotation_prometheus_io_port", + "__meta_kubernetes_pod_ip", + ] + regex = "(\\d+);((([0-9]+?)(\\.|$)){4})" // IPv4, takes priority over IPv6 when both exists + replacement = "$2:$1" + target_label = "__address__" + } rule { action = "replace" regex = "([^:]+)(?::\\d+)?;(\\d+)" replacement = "$1:$2" source_labels = [ "__address__", - "__meta_kubernetes_service_annotation_prometheus_io_port", "__meta_kubernetes_pod_annotation_prometheus_io_port", ] target_label = "__address__" } - } - - // -- - // Metrics - // -- - prometheus.scrape "all" { - honor_labels = true - targets = discovery.relabel.all.output - forward_to = [prometheus.relabel.all.receiver] - } - prometheus.operator.podmonitors "all" { - forward_to = [prometheus.relabel.all.receiver] - } - prometheus.operator.servicemonitors "all" { - forward_to = [prometheus.relabel.all.receiver] - } - prometheus.relabel "all" { - forward_to = [prometheus.remote_write.prometheus_monitor_tjo_space.receiver] rule { source_labels = ["__meta_kubernetes_namespace"] @@ -177,6 +182,21 @@ resource "helm_release" "grafana-alloy" { replacement = "$1" } } + + // -- + // Metrics + // -- + prometheus.scrape "all" { + honor_labels = true + targets = discovery.relabel.all.output + forward_to = [prometheus.remote_write.prometheus_monitor_tjo_space.receiver] + } + prometheus.operator.podmonitors "all" { + forward_to = [prometheus.remote_write.prometheus_monitor_tjo_space.receiver] + } + prometheus.operator.servicemonitors "all" { + forward_to = [prometheus.remote_write.prometheus_monitor_tjo_space.receiver] + } prometheus.remote_write "prometheus_monitor_tjo_space" { external_labels = { cluster = env("CLUSTER_NAME"), diff --git a/k8s.tjo.cloud/modules/cluster-core/variables.tf b/k8s.tjo.cloud/modules/cluster-core/variables.tf new file mode 100644 index 0000000..ded2725 --- /dev/null +++ b/k8s.tjo.cloud/modules/cluster-core/variables.tf @@ -0,0 +1,4 @@ +variable "cluster_name" { + description = "Name of the Kubernetes cluster" + type = string +} diff --git a/modules/cluster-core/versions.tf b/k8s.tjo.cloud/modules/cluster-core/versions.tf similarity index 100% rename from modules/cluster-core/versions.tf rename to k8s.tjo.cloud/modules/cluster-core/versions.tf diff --git a/modules/cluster/components.tf b/k8s.tjo.cloud/modules/cluster/components.tf similarity index 100% rename from modules/cluster/components.tf rename to k8s.tjo.cloud/modules/cluster/components.tf diff --git a/modules/cluster/kubeconfig.tftpl b/k8s.tjo.cloud/modules/cluster/kubeconfig.tftpl similarity index 100% rename from modules/cluster/kubeconfig.tftpl rename to k8s.tjo.cloud/modules/cluster/kubeconfig.tftpl diff --git a/modules/cluster/main.tf b/k8s.tjo.cloud/modules/cluster/main.tf similarity index 100% rename from modules/cluster/main.tf rename to k8s.tjo.cloud/modules/cluster/main.tf diff --git a/modules/cluster/manifests/gateway-api.crds.yaml b/k8s.tjo.cloud/modules/cluster/manifests/gateway-api.crds.yaml similarity index 100% rename from modules/cluster/manifests/gateway-api.crds.yaml rename to k8s.tjo.cloud/modules/cluster/manifests/gateway-api.crds.yaml diff --git a/modules/cluster/outputs.tf b/k8s.tjo.cloud/modules/cluster/outputs.tf similarity index 100% rename from modules/cluster/outputs.tf rename to k8s.tjo.cloud/modules/cluster/outputs.tf diff --git a/modules/cluster/proxmox.tf b/k8s.tjo.cloud/modules/cluster/proxmox.tf similarity index 100% rename from modules/cluster/proxmox.tf rename to k8s.tjo.cloud/modules/cluster/proxmox.tf diff --git a/modules/cluster/variables.tf b/k8s.tjo.cloud/modules/cluster/variables.tf similarity index 100% rename from modules/cluster/variables.tf rename to k8s.tjo.cloud/modules/cluster/variables.tf diff --git a/modules/cluster/versions.tf b/k8s.tjo.cloud/modules/cluster/versions.tf similarity index 100% rename from modules/cluster/versions.tf rename to k8s.tjo.cloud/modules/cluster/versions.tf diff --git a/modules/cluster-core/variables.tf b/modules/cluster-core/variables.tf deleted file mode 100644 index 8b13789..0000000 --- a/modules/cluster-core/variables.tf +++ /dev/null @@ -1 +0,0 @@ -