feat: monitoring fixes
Some checks failed
/ lint (push) Failing after 12m52s

This commit is contained in:
Tine 2024-07-27 16:08:21 +02:00
parent 52f5d3e307
commit c0254f1bb4
Signed by: mentos1386
SSH key fingerprint: SHA256:MNtTsLbihYaWF8j1fkOHfkKNlnN1JQfxEU/rBU8nCGw
21 changed files with 57 additions and 32 deletions

View file

@ -1,5 +1,5 @@
module "cluster" { module "cluster" {
source = "../modules/cluster" source = "./modules/cluster"
providers = { providers = {
helm.template = helm.template helm.template = helm.template
@ -61,11 +61,13 @@ resource "local_file" "kubeconfig" {
} }
module "cluster-core" { module "cluster-core" {
source = "../modules/cluster-core" source = "./modules/cluster-core"
cluster_name = module.cluster.name
} }
module "cluster-components" { module "cluster-components" {
source = "../modules/cluster-components" source = "./modules/cluster-components"
oidc_issuer_url = var.oidc_issuer_url oidc_issuer_url = var.oidc_issuer_url
oidc_client_id = var.oidc_client_id oidc_client_id = var.oidc_client_id

View file

@ -28,7 +28,9 @@ resource "helm_release" "kube-state-metrics" {
node-role.kubernetes.io/control-plane: "" node-role.kubernetes.io/control-plane: ""
tolerations: tolerations:
- key: "node-role.kubernetes.io/control-plane" - key: "node-role.kubernetes.io/control-plane"
effect: "NoSchedule" effect: NoSchedule
updateStrategy: Recreate
prometheusScrape: false
prometheus: prometheus:
monitor: monitor:
enabled: true enabled: true
@ -62,7 +64,7 @@ resource "helm_release" "grafana-alloy" {
alloy: alloy:
extraEnv: extraEnv:
- name: "CLUSTER_NAME" - name: "CLUSTER_NAME"
value: "tjo-cloud" value: "${var.cluster_name}"
- name: "PROMETHEUS_CLIENT_ID" - name: "PROMETHEUS_CLIENT_ID"
value: "o6Tz2215HLvhvZ4RCZCR8oMmCapTu30iwkoMkz6m" value: "o6Tz2215HLvhvZ4RCZCR8oMmCapTu30iwkoMkz6m"
- name: "LOKI_CLIENT_ID" - name: "LOKI_CLIENT_ID"
@ -80,18 +82,22 @@ resource "helm_release" "grafana-alloy" {
discovery.kubernetes "pods" { discovery.kubernetes "pods" {
role = "pod" role = "pod"
} }
discovery.kubernetes "services" {
role = "services"
}
discovery.relabel "all" { discovery.relabel "all" {
targets = concat(discovery.kubernetes.pods.targets, discovery.kubernetes.services.targets) targets = discovery.kubernetes.pods.targets
// Only process if scrape enabled
rule {
source_labels = [
"__meta_kubernetes_pod_annotation_prometheus_io_scrape",
]
action = "keep"
regex = "true"
}
// allow override of http scheme with `promehteus.io/scheme` // allow override of http scheme with `promehteus.io/scheme`
rule { rule {
action = "replace" action = "replace"
regex = "(https?)" regex = "(https?)"
source_labels = [ source_labels = [
"__meta_kubernetes_service_annotation_prometheus_io_scheme",
"__meta_kubernetes_pod_annotation_prometheus_io_scheme", "__meta_kubernetes_pod_annotation_prometheus_io_scheme",
] ]
target_label = "__scheme__" target_label = "__scheme__"
@ -99,43 +105,42 @@ resource "helm_release" "grafana-alloy" {
// allow override of default /metrics path with `prometheus.io/path` // allow override of default /metrics path with `prometheus.io/path`
rule { rule {
action = "replace" action = "replace"
regex = "(.+)"
source_labels = [ source_labels = [
"__meta_kubernetes_service_annotation_prometheus_io_path",
"__meta_kubernetes_pod_annotation_prometheus_io_path", "__meta_kubernetes_pod_annotation_prometheus_io_path",
] ]
target_label = "__metrics_path__" target_label = "__metrics_path__"
} }
// allow override of default port with `prometheus.io/port` // allow override of default port with `prometheus.io/port`
// If the metrics port number annotation has a value, override the target address to use it, regardless whether it is
// one of the declared ports on that Pod.
rule {
source_labels = [
"__meta_kubernetes_pod_annotation_prometheus_io_port",
"__meta_kubernetes_pod_ip",
]
regex = "(\\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})"
replacement = "[$2]:$1" // IPv6
target_label = "__address__"
}
rule {
source_labels = [
"__meta_kubernetes_pod_annotation_prometheus_io_port",
"__meta_kubernetes_pod_ip",
]
regex = "(\\d+);((([0-9]+?)(\\.|$)){4})" // IPv4, takes priority over IPv6 when both exists
replacement = "$2:$1"
target_label = "__address__"
}
rule { rule {
action = "replace" action = "replace"
regex = "([^:]+)(?::\\d+)?;(\\d+)" regex = "([^:]+)(?::\\d+)?;(\\d+)"
replacement = "$1:$2" replacement = "$1:$2"
source_labels = [ source_labels = [
"__address__", "__address__",
"__meta_kubernetes_service_annotation_prometheus_io_port",
"__meta_kubernetes_pod_annotation_prometheus_io_port", "__meta_kubernetes_pod_annotation_prometheus_io_port",
] ]
target_label = "__address__" target_label = "__address__"
} }
}
// --
// Metrics
// --
prometheus.scrape "all" {
honor_labels = true
targets = discovery.relabel.all.output
forward_to = [prometheus.relabel.all.receiver]
}
prometheus.operator.podmonitors "all" {
forward_to = [prometheus.relabel.all.receiver]
}
prometheus.operator.servicemonitors "all" {
forward_to = [prometheus.relabel.all.receiver]
}
prometheus.relabel "all" {
forward_to = [prometheus.remote_write.prometheus_monitor_tjo_space.receiver]
rule { rule {
source_labels = ["__meta_kubernetes_namespace"] source_labels = ["__meta_kubernetes_namespace"]
@ -177,6 +182,21 @@ resource "helm_release" "grafana-alloy" {
replacement = "$1" replacement = "$1"
} }
} }
// --
// Metrics
// --
prometheus.scrape "all" {
honor_labels = true
targets = discovery.relabel.all.output
forward_to = [prometheus.remote_write.prometheus_monitor_tjo_space.receiver]
}
prometheus.operator.podmonitors "all" {
forward_to = [prometheus.remote_write.prometheus_monitor_tjo_space.receiver]
}
prometheus.operator.servicemonitors "all" {
forward_to = [prometheus.remote_write.prometheus_monitor_tjo_space.receiver]
}
prometheus.remote_write "prometheus_monitor_tjo_space" { prometheus.remote_write "prometheus_monitor_tjo_space" {
external_labels = { external_labels = {
cluster = env("CLUSTER_NAME"), cluster = env("CLUSTER_NAME"),

View file

@ -0,0 +1,4 @@
variable "cluster_name" {
description = "Name of the Kubernetes cluster"
type = string
}

View file

@ -1 +0,0 @@