From bca4a4aa6eaa34bc7edb1314a86835c3ff6973a1 Mon Sep 17 00:00:00 2001 From: Tine Date: Sun, 15 Dec 2024 18:35:26 +0100 Subject: [PATCH] feat: working bgp --- k8s.tjo.cloud/main.tf | 52 +++----- .../manifests/crd-podmonitors.yaml | 44 ++++++- .../manifests/crd-servicemonitors.yaml | 32 ++++- k8s.tjo.cloud/modules/cluster/components.tf | 4 +- k8s.tjo.cloud/modules/cluster/main.tf | 122 ++++++++++++++---- k8s.tjo.cloud/modules/cluster/variables.tf | 21 ++- 6 files changed, 196 insertions(+), 79 deletions(-) diff --git a/k8s.tjo.cloud/main.tf b/k8s.tjo.cloud/main.tf index 4af03c0..2dc53d0 100644 --- a/k8s.tjo.cloud/main.tf +++ b/k8s.tjo.cloud/main.tf @@ -20,6 +20,14 @@ module "cluster" { client_id = var.oidc_client_id issuer_url = var.oidc_issuer_url } + pod_cidr = { + ipv4 = "10.0.240.0/21" + ipv6 = "fd74:6a6f:0:f000::/53" + } + service_cidr = { + ipv4 = "10.0.248.0/22" + ipv6 = "fd74:6a6f:0:f800::/108" + } } proxmox = { @@ -28,8 +36,17 @@ module "cluster" { common_storage = "synology.storage.tjo.cloud" } + hosts = { + nevaroo = { + asn = 65003 + } + mustafar = { + asn = 65004 + } + } + nodes = { - nevaroo-cp = { + nevaroo-1 = { id = 6001 type = "controlplane" host = "nevaroo" @@ -37,39 +54,6 @@ module "cluster" { cores = 4 memory = 4096 } - #mustafar-cp = { - # id = 6000 - # type = "controlplane" - # host = "mustafar" - # storage = "local" - # cores = 2 - # memory = 4096 - #} - #jakku-cp = { - # id = 6000 - # type = "controlplane" - # host = "jakku" - # storage = "local-nvme" - # cores = 2 - # memory = 4096 - #} - #batuu-cp = { - # id = 6000 - # type = "controlplane" - # host = "batuu" - # storage = "local-nvme" - # cores = 2 - # memory = 4096 - #} - - nevaro-w1 = { - id = 6002 - type = "worker" - host = "nevaroo" - storage = "local-nvme-lvm" - cores = 8 - memory = 24576 - } mustafar-1 = { id = 6000 type = "worker" diff --git a/k8s.tjo.cloud/modules/cluster-core/manifests/crd-podmonitors.yaml b/k8s.tjo.cloud/modules/cluster-core/manifests/crd-podmonitors.yaml index e51c40d..2252724 100644 --- a/k8s.tjo.cloud/modules/cluster-core/manifests/crd-podmonitors.yaml +++ b/k8s.tjo.cloud/modules/cluster-core/manifests/crd-podmonitors.yaml @@ -1,11 +1,11 @@ -# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.78.2/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml +# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.79.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml --- apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.16.4 - operator.prometheus.io/version: 0.78.2 + controller-gen.kubebuilder.io/version: v0.16.5 + operator.prometheus.io/version: 0.79.0 name: podmonitors.monitoring.coreos.com spec: group: monitoring.coreos.com @@ -760,10 +760,16 @@ spec: type: string port: description: |- - Name of the Pod port which this endpoint refers to. + The `Pod` port name which exposes the endpoint. - It takes precedence over `targetPort`. + It takes precedence over the `portNumber` and `targetPort` fields. type: string + portNumber: + description: The `Pod` port number which exposes the endpoint. + format: int32 + maximum: 65535 + minimum: 1 + type: integer proxyUrl: description: |- `proxyURL` configures the HTTP Proxy URL (e.g. @@ -892,7 +898,7 @@ spec: Name or number of the target port of the `Pod` object behind the Service, the port must be specified with container port property. - Deprecated: use 'port' instead. + Deprecated: use 'port' or 'portNumber' instead. x-kubernetes-int-or-string: true tlsConfig: description: TLS configuration to use when scraping the target. @@ -1088,6 +1094,18 @@ spec: Whether to scrape a classic histogram that is also exposed as a native histogram. It requires Prometheus >= v2.45.0. type: boolean + scrapeFallbackProtocol: + description: |- + The protocol to use if a scrape returns blank, unparseable, or otherwise invalid Content-Type. + + It requires Prometheus >= v3.0.0. + enum: + - PrometheusProto + - OpenMetricsText0.0.1 + - OpenMetricsText1.0.0 + - PrometheusText0.0.4 + - PrometheusText1.0.0 + type: string scrapeProtocols: description: |- `scrapeProtocols` defines the protocols to negotiate during a scrape. It tells clients the @@ -1104,11 +1122,13 @@ spec: * `OpenMetricsText1.0.0` * `PrometheusProto` * `PrometheusText0.0.4` + * `PrometheusText1.0.0` enum: - PrometheusProto - OpenMetricsText0.0.1 - OpenMetricsText1.0.0 - PrometheusText0.0.4 + - PrometheusText1.0.0 type: string type: array x-kubernetes-list-type: set @@ -1159,6 +1179,18 @@ spec: type: object type: object x-kubernetes-map-type: atomic + selectorMechanism: + description: |- + Mechanism used to select the endpoints to scrape. + By default, the selection process relies on relabel configurations to filter the discovered targets. + Alternatively, you can opt in for role selectors, which may offer better efficiency in large clusters. + Which strategy is best for your use case needs to be carefully evaluated. + + It requires Prometheus >= v2.17.0. + enum: + - RelabelConfig + - RoleSelector + type: string targetLimit: description: |- `targetLimit` defines a limit on the number of scraped targets that will diff --git a/k8s.tjo.cloud/modules/cluster-core/manifests/crd-servicemonitors.yaml b/k8s.tjo.cloud/modules/cluster-core/manifests/crd-servicemonitors.yaml index ebfe496..b6f0f6d 100644 --- a/k8s.tjo.cloud/modules/cluster-core/manifests/crd-servicemonitors.yaml +++ b/k8s.tjo.cloud/modules/cluster-core/manifests/crd-servicemonitors.yaml @@ -1,11 +1,11 @@ -# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.78.2/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml +# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.79.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml --- apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.16.4 - operator.prometheus.io/version: 0.78.2 + controller-gen.kubebuilder.io/version: v0.16.5 + operator.prometheus.io/version: 0.79.0 name: servicemonitors.monitoring.coreos.com spec: group: monitoring.coreos.com @@ -1108,6 +1108,18 @@ spec: Whether to scrape a classic histogram that is also exposed as a native histogram. It requires Prometheus >= v2.45.0. type: boolean + scrapeFallbackProtocol: + description: |- + The protocol to use if a scrape returns blank, unparseable, or otherwise invalid Content-Type. + + It requires Prometheus >= v3.0.0. + enum: + - PrometheusProto + - OpenMetricsText0.0.1 + - OpenMetricsText1.0.0 + - PrometheusText0.0.4 + - PrometheusText1.0.0 + type: string scrapeProtocols: description: |- `scrapeProtocols` defines the protocols to negotiate during a scrape. It tells clients the @@ -1124,11 +1136,13 @@ spec: * `OpenMetricsText1.0.0` * `PrometheusProto` * `PrometheusText0.0.4` + * `PrometheusText1.0.0` enum: - PrometheusProto - OpenMetricsText0.0.1 - OpenMetricsText1.0.0 - PrometheusText0.0.4 + - PrometheusText1.0.0 type: string type: array x-kubernetes-list-type: set @@ -1179,6 +1193,18 @@ spec: type: object type: object x-kubernetes-map-type: atomic + selectorMechanism: + description: |- + Mechanism used to select the endpoints to scrape. + By default, the selection process relies on relabel configurations to filter the discovered targets. + Alternatively, you can opt in for role selectors, which may offer better efficiency in large clusters. + Which strategy is best for your use case needs to be carefully evaluated. + + It requires Prometheus >= v2.17.0. + enum: + - RelabelConfig + - RoleSelector + type: string targetLabels: description: |- `targetLabels` defines the labels which are transferred from the diff --git a/k8s.tjo.cloud/modules/cluster/components.tf b/k8s.tjo.cloud/modules/cluster/components.tf index 91cebce..5696f85 100644 --- a/k8s.tjo.cloud/modules/cluster/components.tf +++ b/k8s.tjo.cloud/modules/cluster/components.tf @@ -29,12 +29,12 @@ data "helm_template" "cilium" { enableIPv4Masquerade: true ipv4: enabled: true - ipv4NativeRoutingCIDR: "10.0.0.0/16" + ipv4NativeRoutingCIDR: "${var.cluster.pod_cidr.ipv4}" enableIPv6Masquerade: true ipv6: enabled: false - ipv6NativeRoutingCIDR: "fd74:6a6f:0::/48" + ipv6NativeRoutingCIDR: "${var.cluster.pod_cidr.ipv6}" kubeProxyReplacement: true diff --git a/k8s.tjo.cloud/modules/cluster/main.tf b/k8s.tjo.cloud/modules/cluster/main.tf index 57cc891..5c61af6 100644 --- a/k8s.tjo.cloud/modules/cluster/main.tf +++ b/k8s.tjo.cloud/modules/cluster/main.tf @@ -4,15 +4,6 @@ locals { cluster_internal_endpoint = "https://${local.internal_domain}:${var.cluster.api.internal.port}" cluster_public_endpoint = "https://${local.public_domain}:${var.cluster.api.public.port}" - podSubnets = [ - "10.0.240.0/21", - "fd74:6a6f:0:f000::/53", - ] - serviceSubnets = [ - "10.0.248.0/22", - "fd74:6a6f:0:f800::/108", - ] - talos_controlplane_config = { machine = { kubelet = { @@ -35,7 +26,7 @@ locals { } } cluster = { - allowSchedulingOnControlPlanes = var.allow_scheduling_on_control_planes, + allowSchedulingOnControlPlanes = true, apiServer = { certSANs = [ local.public_domain, @@ -51,7 +42,7 @@ locals { "oidc-groups-prefix" = "oidc:groups:", } } - inlineManifests = [ + inlineManifests = concat([ { name = "proxmox-cloud-controller-manager" contents = data.helm_template.proxmox-ccm.manifest @@ -72,24 +63,94 @@ locals { name = "cilium" contents = data.helm_template.cilium.manifest }, + { + name = "cilium-bgp-advertisement" + contents = <<-EOF + apiVersion: cilium.io/v2alpha1 + kind: CiliumBGPAdvertisement + metadata: + name: pods-and-services + labels: + k8s.tjo.cloud/default: "true" + spec: + advertisements: + - advertisementType: "PodCIDR" + - advertisementType: "Service" + service: + addresses: + - ClusterIP + - ExternalIP + - LoadBalancerIP + EOF + }, + { + name = "cilium-bgp-peer-config" + contents = <<-EOF + apiVersion: cilium.io/v2alpha1 + kind: CiliumBGPPeerConfig + metadata: + name: default + spec: + families: + - afi: ipv4 + safi: unicast + advertisements: + matchLabels: + k8s.tjo.cloud/default: "true" + - afi: ipv6 + safi: unicast + advertisements: + matchLabels: + k8s.tjo.cloud/default: "true" + EOF + }, { name = "oidc-admins" contents = <<-EOF - apiVersion: rbac.authorization.k8s.io/v1 - kind: ClusterRoleBinding - metadata: - name: id-tjo-space:admins - subjects: - - kind: Group - name: oidc:groups:k8s.tjo.cloud admin - apiGroup: rbac.authorization.k8s.io - roleRef: - kind: ClusterRole - name: cluster-admin - apiGroup: rbac.authorization.k8s.io - EOF + apiVersion: rbac.authorization.k8s.io/v1 + kind: ClusterRoleBinding + metadata: + name: id-tjo-space:admins + subjects: + - kind: Group + name: oidc:groups:k8s.tjo.cloud admin + apiGroup: rbac.authorization.k8s.io + roleRef: + kind: ClusterRole + name: cluster-admin + apiGroup: rbac.authorization.k8s.io + EOF }, - ] + ], + [for name, attributes in var.hosts : { + name = "cilium-bgp-node-config-override-${name}" + contents = <<-EOF + apiVersion: cilium.io/v2alpha1 + kind: CiliumBGPClusterConfig + metadata: + name: ${name} + spec: + gracefulRestart: + enabled: true + restartTimeSeconds: 15 + nodeSelector: + matchLabels: + k8s.tjo.cloud/bgp: "true" + k8s.tjo.cloud/host: ${name} + k8s.tjo.cloud/proxmox: ${var.proxmox.name} + bgpInstances: + - name: "${name}" + localASN: ${attributes.asn} + peers: + - name: "local-router-vip" + peerASN: ${attributes.asn} + peerAddress: "10.0.0.1" + peerConfigRef: + name: "default" + EOF + } + ] + ) } } @@ -99,8 +160,14 @@ locals { cni = { name = "none" } - podSubnets = local.podSubnets - serviceSubnets = local.serviceSubnets + podSubnets = [ + var.cluster.pod_cidr.ipv4, + var.cluster.pod_cidr.ipv6 + ] + serviceSubnets = [ + var.cluster.service_cidr.ipv4, + var.cluster.service_cidr.ipv6 + ] } proxy = { disabled = true @@ -128,6 +195,7 @@ locals { hostname = node.name } nodeLabels = { + "k8s.tjo.cloud/bgp" = "true" "k8s.tjo.cloud/host" = node.host "k8s.tjo.cloud/proxmox" = var.proxmox.name } diff --git a/k8s.tjo.cloud/modules/cluster/variables.tf b/k8s.tjo.cloud/modules/cluster/variables.tf index b1a0eff..bfa6427 100644 --- a/k8s.tjo.cloud/modules/cluster/variables.tf +++ b/k8s.tjo.cloud/modules/cluster/variables.tf @@ -12,6 +12,12 @@ variable "nodes" { })) } +variable "hosts" { + type = map(object({ + asn = number + })) +} + variable "talos" { type = object({ version = optional(string, "v1.8.3") @@ -28,13 +34,6 @@ variable "talos" { }) } - -variable "allow_scheduling_on_control_planes" { - default = false - type = bool - description = "Allow scheduling on control plane nodes" -} - variable "cluster" { type = object({ name = string @@ -54,6 +53,14 @@ variable "cluster" { client_id = string issuer_url = string }) + pod_cidr = object({ + ipv4 = string + ipv6 = string + }) + service_cidr = object({ + ipv4 = string + ipv6 = string + }) }) }