infrastructure/k8s.tjo.cloud/modules/cluster-core/monitoring.tf

112 lines
2.8 KiB
Terraform
Raw Normal View History

2024-07-26 21:40:38 +00:00
resource "kubernetes_namespace" "monitoring-system" {
metadata {
name = "monitoring-system"
2024-08-07 21:13:58 +00:00
labels = {
"pod-security.kubernetes.io/enforce" = "privileged"
}
2024-07-26 21:40:38 +00:00
}
}
resource "kubernetes_manifest" "prometheus-pod-monitors" {
2024-07-27 09:31:15 +00:00
manifest = yamldecode(file("${path.module}/manifests/crd-podmonitors.yaml"))
2024-07-26 21:40:38 +00:00
}
resource "kubernetes_manifest" "prometheus-service-monitors" {
2024-07-27 09:31:15 +00:00
manifest = yamldecode(file("${path.module}/manifests/crd-servicemonitors.yaml"))
}
resource "helm_release" "kube-state-metrics" {
depends_on = [kubernetes_manifest.prometheus-pod-monitors, kubernetes_manifest.prometheus-service-monitors]
name = "kube-state-metrics"
chart = "kube-state-metrics"
repository = "https://prometheus-community.github.io/helm-charts"
version = "5.24.0"
namespace = kubernetes_namespace.monitoring-system.metadata[0].name
atomic = true
cleanup_on_fail = true
values = [<<-EOF
nodeSelector:
node-role.kubernetes.io/control-plane: ""
tolerations:
- key: "node-role.kubernetes.io/control-plane"
2024-07-27 14:08:21 +00:00
effect: NoSchedule
updateStrategy: Recreate
prometheusScrape: false
2024-07-27 09:31:15 +00:00
prometheus:
monitor:
enabled: true
http:
honorLabels: true
EOF
]
2024-07-26 21:40:38 +00:00
}
2024-08-07 21:13:58 +00:00
resource "helm_release" "monitoring" {
2024-07-27 09:31:15 +00:00
depends_on = [kubernetes_manifest.prometheus-pod-monitors, kubernetes_manifest.prometheus-service-monitors]
2024-07-26 21:40:38 +00:00
2024-12-22 12:32:22 +00:00
count = 0
2024-08-07 21:13:58 +00:00
name = "monitoring"
chart = "k8s-monitoring"
2024-07-26 21:40:38 +00:00
repository = "https://grafana.github.io/helm-charts"
2024-12-22 12:32:22 +00:00
version = "2.0.0-rc.10"
2024-07-26 21:40:38 +00:00
namespace = kubernetes_namespace.monitoring-system.metadata[0].name
atomic = true
cleanup_on_fail = true
values = [<<-EOF
2024-08-07 21:13:58 +00:00
cluster:
name: "${var.cluster_name}"
2024-07-26 21:40:38 +00:00
2024-12-22 12:32:22 +00:00
clusterMetrics:
2024-08-07 21:13:58 +00:00
enabled: true
2024-07-26 21:40:38 +00:00
2024-12-22 12:32:22 +00:00
clusterEvents:
enabled: true
podLogs:
2024-08-07 21:13:58 +00:00
enabled: true
2024-07-26 21:40:38 +00:00
2024-12-22 12:32:22 +00:00
nodeLogs:
enabled: true
prometheusOperatorObjects:
enabled: true
annotationAutodiscovery:
enabled: true
alloy-logs:
enabled: true
alloy-metrics:
enabled: true
alloy-singleton:
2024-08-07 21:13:58 +00:00
enabled: true
2024-07-26 21:40:38 +00:00
2024-12-22 12:32:22 +00:00
destinations:
- name: monitor-tjo-cloud
type: otlp
url: "grpc.otel.monitor.tjo.cloud:443"
auth:
type: oauth2
oauth2:
tokenURL: "https://id.tjo.space/application/o/token/"
clientId: "o6Tz2215HLvhvZ4RCZCR8oMmCapTu30iwkoMkz6m"
clientSecretFile: "/var/run/secrets/kubernetes.io/serviceaccount/token"
endpointParams:
grant_type:
- "client_credentials"
client_assertion_type:
- "urn:ietf:params:oauth:client-assertion-type:jwt-bearer"
logs:
enabled: true
metrics:
enabled: true
traces:
enabled: false
2024-07-26 21:40:38 +00:00
EOF
]
}