generated from onedr0p/cluster-template
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> Co-authored-by: RonaldPhilipsen <[email protected]> Co-authored-by: Ronald Philipsen <[email protected]> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
- Loading branch information
1 parent
0707ce5
commit af0685e
Showing
5 changed files
with
267 additions
and
3 deletions.
There are no files selected for viewing
85 changes: 85 additions & 0 deletions
85
kubernetes/apps/observability/kube-prometheus-stack/alertmanagerconfig.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
--- | ||
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/monitoring.coreos.com/alertmanagerconfig_v1alpha1.json | ||
apiVersion: monitoring.coreos.com/v1alpha1 | ||
kind: AlertmanagerConfig | ||
metadata: | ||
name: alertmanager | ||
spec: | ||
route: | ||
groupBy: ["alertname", "job"] | ||
groupInterval: 10m | ||
groupWait: 1m | ||
receiver: pushover | ||
repeatInterval: 12h | ||
routes: | ||
- receiver: "null" | ||
matchers: | ||
- name: alertname | ||
value: InfoInhibitor | ||
matchType: = | ||
- receiver: heartbeat | ||
groupInterval: 5m | ||
groupWait: 0s | ||
repeatInterval: 5m | ||
matchers: | ||
- name: alertname | ||
value: Watchdog | ||
matchType: = | ||
- receiver: email | ||
matchers: | ||
- name: severity | ||
value: critical | ||
matchType: = | ||
inhibitRules: | ||
- equal: ["alertname", "namespace"] | ||
sourceMatch: | ||
- name: severity | ||
value: critical | ||
matchType: = | ||
targetMatch: | ||
- name: severity | ||
value: warning | ||
matchType: = | ||
receivers: | ||
- name: "null" | ||
- name: heartbeat | ||
webhookConfigs: | ||
- urlSecret: | ||
name: &secret alertmanager-secret | ||
key: ALERTMANAGER_HEARTBEAT_URL | ||
- name: email | ||
emailConfigs: | ||
# Whether to notify about resolved alerts. | ||
- sendResolved: true | ||
to: 'alerts@${SECRET_DOMAIN}' | ||
from: 'alertmanager@${SECRET_DOMAIN}' | ||
hello: k8s@${SECRET_DOMAIN} | ||
# The smarthost and SMTP sender used for mail notifications. | ||
smarthost: ${ALERTMANAGER_SMTP_HOST} | ||
authUsername: ${ALERTMANAGER_SMTP_USERNAME} | ||
authPassword: | ||
key: *secret | ||
name: ALERTMANAGER_SMTP_PASSWORD | ||
text: >- | ||
[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] | ||
{{ .CommonLabels.alertname }} | ||
html: |- | ||
{{- range .Alerts }} | ||
{{- if ne .Annotations.description "" }} | ||
{{ .Annotations.description }} | ||
{{- else if ne .Annotations.summary "" }} | ||
{{ .Annotations.summary }} | ||
{{- else if ne .Annotations.message "" }} | ||
{{ .Annotations.message }} | ||
{{- else }} | ||
Alert description not available | ||
{{- end }} | ||
{{- if gt (len .Labels.SortedPairs) 0 }} | ||
<small> | ||
{{- range .Labels.SortedPairs }} | ||
<b>{{ .Name }}:</b> {{ .Value }} | ||
{{- end }} | ||
</small> | ||
{{- end }} | ||
{{- end }} | ||
20 changes: 20 additions & 0 deletions
20
kubernetes/apps/observability/kube-prometheus-stack/externalsecret.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
--- | ||
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/external-secrets.io/externalsecret_v1beta1.json | ||
apiVersion: external-secrets.io/v1beta1 | ||
kind: ExternalSecret | ||
metadata: | ||
name: alertmanager | ||
spec: | ||
refreshInterval: 5m | ||
secretStoreRef: | ||
kind: ClusterSecretStore | ||
name: onepassword | ||
target: | ||
name: alertmanager-secret | ||
template: | ||
data: | ||
ALERTMANAGER_HEARTBEAT_URL: "{{ .ALERTMANAGER_HEARTBEAT_URL }}" | ||
ALERTMANAGER_SMTP_PASSWORD: "{{ .ALERTMANAGER_SMTP_PASSWORD }}" | ||
dataFrom: | ||
- extract: | ||
key: alertmanager |
148 changes: 148 additions & 0 deletions
148
kubernetes/apps/observability/kube-prometheus-stack/helmrelease.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
--- | ||
# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/helm.toolkit.fluxcd.io/helmrelease_v2.json | ||
apiVersion: helm.toolkit.fluxcd.io/v2 | ||
kind: HelmRelease | ||
metadata: | ||
name: kube-prometheus-stack | ||
spec: | ||
interval: 30m | ||
chart: | ||
spec: | ||
chart: kube-prometheus-stack | ||
version: 68.2.1 | ||
sourceRef: | ||
kind: HelmRepository | ||
name: prometheus-community | ||
namespace: flux-system | ||
install: | ||
crds: Skip | ||
remediation: | ||
retries: 3 | ||
upgrade: | ||
cleanupOnFail: true | ||
crds: Skip | ||
remediation: | ||
strategy: rollback | ||
retries: 3 | ||
dependsOn: | ||
- name: kube-prometheus-stack-crds | ||
namespace: observability | ||
values: | ||
crds: | ||
enabled: false | ||
cleanPrometheusOperatorObjectNames: true | ||
alertmanager: | ||
ingress: | ||
enabled: true | ||
ingressClassName: internal | ||
hosts: ["alertmanager.${SECRET_DOMAIN}"] | ||
pathType: Prefix | ||
alertmanagerSpec: | ||
alertmanagerConfiguration: | ||
name: alertmanager | ||
global: | ||
resolveTimeout: 5m | ||
externalUrl: https://alertmanager.${SECRET_DOMAIN} | ||
storage: | ||
volumeClaimTemplate: | ||
spec: | ||
storageClassName: nfs-provision | ||
resources: | ||
requests: | ||
storage: 1Gi | ||
kubeApiServer: | ||
serviceMonitor: | ||
selector: | ||
k8s-app: kube-apiserver | ||
kubeScheduler: | ||
service: | ||
selector: | ||
k8s-app: kube-scheduler | ||
kubeControllerManager: &kubeControllerManager | ||
service: | ||
selector: | ||
k8s-app: kube-controller-manager | ||
kubeEtcd: | ||
<<: *kubeControllerManager # etcd runs on control plane nodes | ||
kubeProxy: | ||
enabled: false | ||
prometheus: | ||
ingress: | ||
enabled: true | ||
ingressClassName: internal | ||
hosts: ["prometheus.${SECRET_DOMAIN}"] | ||
pathType: Prefix | ||
prometheusSpec: | ||
podMonitorSelectorNilUsesHelmValues: false | ||
probeSelectorNilUsesHelmValues: false | ||
ruleSelectorNilUsesHelmValues: false | ||
scrapeConfigSelectorNilUsesHelmValues: false | ||
serviceMonitorSelectorNilUsesHelmValues: false | ||
enableAdminAPI: true | ||
walCompression: true | ||
enableFeatures: | ||
- memory-snapshot-on-shutdown | ||
retention: 14d | ||
retentionSize: 50GB | ||
resources: | ||
requests: | ||
cpu: 100m | ||
limits: | ||
memory: 2000Mi | ||
storageSpec: | ||
volumeClaimTemplate: | ||
spec: | ||
storageClassName: nfs-provision | ||
resources: | ||
requests: | ||
storage: 50Gi | ||
prometheus-node-exporter: | ||
fullnameOverride: node-exporter | ||
prometheus: | ||
monitor: | ||
enabled: true | ||
relabelings: | ||
- action: replace | ||
regex: (.*) | ||
replacement: $1 | ||
sourceLabels: ["__meta_kubernetes_pod_node_name"] | ||
targetLabel: kubernetes_node | ||
kube-state-metrics: | ||
fullnameOverride: kube-state-metrics | ||
metricLabelsAllowlist: | ||
- pods=[*] | ||
- deployments=[*] | ||
- persistentvolumeclaims=[*] | ||
prometheus: | ||
monitor: | ||
enabled: true | ||
relabelings: | ||
- action: replace | ||
regex: (.*) | ||
replacement: $1 | ||
sourceLabels: ["__meta_kubernetes_pod_node_name"] | ||
targetLabel: kubernetes_node | ||
grafana: | ||
enabled: false | ||
forceDeployDashboards: true | ||
additionalPrometheusRulesMap: | ||
dockerhub-rules: | ||
groups: | ||
- name: dockerhub | ||
rules: | ||
- alert: DockerhubRateLimitRisk | ||
annotations: | ||
summary: Kubernetes cluster Dockerhub rate limit risk | ||
expr: count(time() - container_last_seen{image=~"(docker.io).*",container!=""} < 30) > 100 | ||
labels: | ||
severity: critical | ||
oom-rules: | ||
groups: | ||
- name: oom | ||
rules: | ||
- alert: OomKilled | ||
annotations: | ||
summary: Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} has been OOMKilled {{ $value }} times in the last 10 minutes. | ||
expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total offset 10m >= 1) and ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}[10m]) == 1 | ||
labels: | ||
severity: critical |
8 changes: 8 additions & 0 deletions
8
kubernetes/apps/observability/kube-prometheus-stack/kustomization.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
--- | ||
# yaml-language-server: $schema=https://json.schemastore.org/kustomization | ||
apiVersion: kustomize.config.k8s.io/v1beta1 | ||
kind: Kustomization | ||
resources: | ||
- ./alertmanagerconfig.yaml | ||
- ./externalsecret.yaml | ||
- ./helmrelease.yaml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters