-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #103 from johnku001/chore/argo-workflow-chart-basi…
…cAndMonitering Chore/argo workflow chart basic and monitering
- Loading branch information
Showing
8 changed files
with
496 additions
and
57 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
apiVersion: v1 | ||
description: Helm chart with simple cronjob template | ||
name: cronjob | ||
version: 0.3.0 | ||
version: 0.4.0 | ||
appVersion: 0.0.1 | ||
tillerVersion: ">=2.14.3" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,197 @@ | ||
{{- define "cronjob.argo_cron_workflow" -}} | ||
workflowSpec: | ||
workflowMetadata: | ||
labels: | ||
name: {{ .Values.name }} | ||
{{- range $key, $value := .Values.annotations }} | ||
{{ $key | quote }} : {{ $value | quote }} | ||
{{- end }} | ||
{{- if .Values.serviceaccount }} | ||
serviceAccountName: {{ .Values.serviceaccount.name | default (printf "%s-pod-service-account" .Values.name) }} | ||
{{- else if .Values.serviceAccount }} | ||
serviceAccountName: {{ .Values.name }}-pod-service-account | ||
{{- end }} | ||
# If .Values.job.timeout equal to null, the pod will be kill ONLY the job is done. Otherwise, the pod will kill after the value you set | ||
{{- if and (.Values.job) (.Values.job.timeout) }} | ||
activeDeadlineSeconds: {{.Values.job.timeout }} | ||
{{- end }} | ||
metrics: | ||
prometheus: | ||
# Metric name (will be prepended with "argo_workflows_") | ||
- name: cron_workflow_exec_duration_gauge | ||
# Labels are optional. Avoid cardinality explosion. | ||
labels: | ||
- key: name | ||
value: "{{ "{{" }}workflow.labels.name{{ "}}" }}" | ||
- key: namespace | ||
value: "{{ "{{" }}workflow.namespace{{ "}}" }}" | ||
# A help doc describing your metric. This is required. | ||
help: "Duration gauge by name" | ||
# The metric type. Available are "gauge", "histogram", and "counter". | ||
gauge: | ||
# The value of your metric. It could be an Argo variable (see variables doc) or a literal value | ||
value: "{{ "{{" }}workflow.duration{{ "}}" }}" | ||
- name: cron_workflow_fail_count | ||
labels: | ||
- key: name | ||
value: "{{ "{{" }}workflow.labels.name{{ "}}" }}" | ||
- key: namespace | ||
value: "{{ "{{" }}workflow.namespace{{ "}}" }}" | ||
help: "Count of execution by fail status" | ||
# Emit the metric conditionally. Works the same as normal "when" | ||
when: "{{ "{{" }}status{{ "}}" }} != Succeeded" | ||
counter: | ||
# This increments the counter by 1 | ||
value: "1" | ||
- name: cron_workflow_success_count | ||
labels: | ||
- key: name | ||
value: "{{ "{{" }}workflow.labels.name{{ "}}" }}" | ||
- key: namespace | ||
value: "{{ "{{" }}workflow.namespace{{ "}}" }}" | ||
help: "Count of execution by success status" | ||
# Emit the metric conditionally. Works the same as normal "when" | ||
when: "{{ "{{" }}status{{ "}}" }} == Succeeded" | ||
counter: | ||
# This increments the counter by 1 | ||
value: "1" | ||
entrypoint: entry | ||
# If not exitNotifications config is set, the default exit-handler of the argo server will be used | ||
{{- if .Values.exitNotifications }} | ||
onExit: exit-handler | ||
{{- end }} | ||
templates: | ||
- name: entry | ||
steps: | ||
- - name: step1 | ||
template: template | ||
{{- if and (.Values.job) (.Values.job.retries)}} | ||
retryStrategy: | ||
# Limit of retries if the job is fail | ||
limit: {{ .Values.job.retries }} | ||
{{- if .Values.job.retryPolicy }} | ||
# Valid Value: "Always" | "OnFailure" | "OnError" | "OnTransientError", Default: "OnFailure" | ||
retryPolicy: {{ .Values.job.retryPolicy }} | ||
{{- end }} | ||
{{- end }} | ||
- name: template | ||
metadata: | ||
namespace: {{ .Release.Namespace }} | ||
container: | ||
image: '{{ required "image.repository must be provided" .Values.image.repository }}:{{ required "image.tag must be provided" .Values.image.tag }}' | ||
{{- if .Values.command }} | ||
# The command to call the function of the image | ||
command: {{- toYaml ( .Values.command) | nindent 12 }} | ||
{{- end }} | ||
{{- if .Values.args }} | ||
# The args need to pass for the function | ||
args: {{- toYaml ( .Values.args) | nindent 12 }} | ||
{{- end }} | ||
{{- if .Values.resources }} | ||
# The resource will be apply if "resource is set" | ||
resources: {{- toYaml ( .Values.resources) | nindent 12 }} | ||
{{- else }} | ||
# default settings on resources | ||
resources: | ||
limits: | ||
memory: "2Gi" | ||
cpu: "1" | ||
requests: | ||
cpu: "300m" | ||
memory: "1Gi" | ||
{{- end }} | ||
env: | ||
- name: POD_NAME | ||
value: {{ .Values.name }} | ||
{{- range $key, $value := .Values.env }} | ||
- name: {{ $key }} | ||
value: {{ $value | quote }} | ||
{{- end }} | ||
{{- range $key, $name := .Values.envSecrets }} | ||
- name: {{ $key }} | ||
valueFrom: | ||
secretKeyRef: | ||
name: {{ $name }} | ||
key: {{ $key | quote }} | ||
{{- end }} | ||
# Apply .Values.envFrom if it is set | ||
{{- if .Values.envFrom }} | ||
envFrom: | ||
{{- range .Values.envFrom.configMapRef }} | ||
- configMapRef: | ||
name: {{ . }} | ||
{{- end }} | ||
{{- range .Values.envFrom.secretRef }} | ||
- secretRef: | ||
name: {{ . }} | ||
{{- end }} | ||
{{- end }} | ||
# The template of exist-handler if any .Values.exitNotifications config is set | ||
{{- if .Values.exitNotifications }} | ||
- name: exit-handler | ||
steps: | ||
- - name: Success | ||
template: success-handler | ||
when: "{{ "{{" }}workflow.status{{ "}}" }} == Succeeded" | ||
- name: Failure | ||
template: failure-handler | ||
when: "{{ "{{" }}workflow.status{{ "}}" }} != Succeeded" | ||
# The template of steps will go through if the job is done successfully | ||
- name: success-handler | ||
steps: | ||
- | ||
# If .Values.exitNotifications.slackApp is set, slackApp will be notify if the job is done | ||
{{- if .Values.exitNotifications.slackApp }} | ||
- name: Notice-SlackApp-Succeeded | ||
template: notice-slack-app-succeeded | ||
{{- end }} | ||
# If .Values.exitNotifications.healthcheckIo is set, Healthcheck IO will be notify if the job is done | ||
{{- if .Values.exitNotifications.healthcheckIo }} | ||
- name: Notice-HealthcheckIo-Succeeded | ||
template: notice-healthcheck-io-succeeded | ||
{{- end }} | ||
# The template of steps will go through if the job is failed | ||
- name: failure-handler | ||
steps: | ||
- | ||
# If .Values.exitNotifications.slackApp is set, slackApp will be notify if the job is failed | ||
{{- if .Values.exitNotifications.slackApp }} | ||
- name: Notice-SlackApp-Failed | ||
template: notice-slack-app-failed | ||
{{- end }} | ||
# If .Values.exitNotifications.newRelic is set, New Relic will be notify if the job is failed | ||
{{- if .Values.exitNotifications.newRelic }} | ||
- name: Notice-NewRelic-Failed | ||
template: notice-newrelic-failed | ||
{{- end }} | ||
# If .Values.exitNotifications.newRelic is set, New Relic will be notify if the job is failed | ||
{{- if .Values.exitNotifications.healthcheckIo }} | ||
- name: Notice-HealthcheckIo-Failed | ||
template: notice-healthcheck-io-failed | ||
{{- end }} | ||
# If .Values.exitNotifications.slackApp is set, Slack app notification template will be loaded | ||
{{- if .Values.exitNotifications.slackApp }} | ||
{{ template "cronjob._exit_handler_slack_app" . }} | ||
{{- end }} | ||
# If .Values.exitNotifications.newRelic is set, New Relic notification template will be loaded | ||
{{- if .Values.exitNotifications.newRelic }} | ||
{{ template "cronjob._exit_handler_newrelic" . }} | ||
{{- end }} | ||
# If .Values.exitNotifications.healthcheckIo is set, Healthcheck IO notification template will be loaded | ||
{{- if .Values.exitNotifications.healthcheckIo }} | ||
{{ template "cronjob._exit_handler_healthcheck_io" . }} | ||
{{- end }} | ||
{{- end }} | ||
{{- if and (.Values.ttlStrategy) (.Values.ttlStrategy.secondsAfterCompletion) }} | ||
ttlStrategy: | ||
# The second of the pod can be alive after the job is done | ||
secondsAfterCompletion: {{.Values.ttlStrategy.secondsAfterCompletion}} | ||
{{- end }} | ||
# The mechanism for garbage collecting completed pods. There is default value "OnPodCompletion" | ||
podGC: | ||
{{- if and (.Values.podGC) (.Values.podGC.strategy) }} | ||
strategy: {{ .Values.podGC.strategy }} | ||
{{- else}} | ||
strategy: OnPodCompletion | ||
{{- end }} | ||
{{- end -}} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
{{- define "cronjob._exit_handler_healthcheck_io" -}} | ||
{{- $healthcheckIo := .Values.exitNotifications.healthcheckIo | default dict -}} | ||
- name: notice-healthcheck-io-succeeded # For cronjob health check, as the schedule may different therefore each cronjob will have different uuid | ||
container: | ||
image: curlimages/curl | ||
command: [ "sh", "-c" ] | ||
args: | ||
- curl https://hc-ping.com/{{ required "exitNotifications.healthcheckIo.uuid must be provided" $healthcheckIo.uuid }} | ||
- name: notice-healthcheck-io-failed | ||
container: | ||
image: curlimages/curl | ||
command: [ "sh", "-c" ] | ||
args: | ||
- curl https://hc-ping.com/{{ required "exitNotifications.healthcheckIo.uuid must be provided" $healthcheckIo.uuid }}/fail | ||
{{- end -}} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
{{- define "cronjob._exit_handler_newrelic" -}} | ||
{{- $newRelic := .Values.exitNotifications.newRelic | default dict -}} | ||
{{- $image := $newRelic.image | default dict -}} | ||
- name: notice-newrelic-failed | ||
container: | ||
image: '{{ required "exitNotifications.newRelic.image.repository must be provided" $image.repository }}:{{ required "exitNotifications.newRelic.image.tag must be provided" $image.tag }}' | ||
env: | ||
- name: NEWRELIC_APP_NAME | ||
value: "{{ required "exitNotifications.newRelic.appName must be provided" $newRelic.appName }}" | ||
- name: FUNCTION_NAME | ||
value: "{{ .Values.name }}" | ||
- name: NEWRELIC_LICENSE_KEY | ||
value: "{{ required "exitNotifications.newRelic.licenseKey must be provided" $newRelic.licenseKey }}" | ||
- name: ARGO_WORKFLOW_ERROR | ||
value: "{{ "{{" }}workflow.failures{{ "}}" }}" | ||
- name: ARGO_WORKFLOW_NAME | ||
value: "{{ "{{" }}workflow.name{{ "}}" }}" | ||
- name: ARGO_WORKFLOW_STATUS | ||
value: "{{ "{{" }}workflow.status{{ "}}" }}" | ||
- name: ARGO_WORKFLOW_DURATION | ||
value: "{{ "{{" }}workflow.duration{{ "}}" }}" | ||
{{- end -}} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
{{- define "cronjob._exit_handler_slack_app" -}} | ||
{{- $slackApp := .Values.exitNotifications.slackApp | default dict -}} | ||
- name: notice-slack-app-succeeded | ||
container: | ||
image: curlimages/curl | ||
command: [sh, -c] | ||
args: [ | ||
"curl -X POST -H 'Content-type: application/json' --data '{\"attachments\": [ | ||
{ | ||
\"color\": \"#18be52\", | ||
\"blocks\": [ | ||
{ | ||
\"type\": \"header\", | ||
\"text\": { | ||
\"type\": \"plain_text\", | ||
\"text\": \"Workflow Succeededed - {{ "{{" }}workflow.name{{ "}}" }}\", | ||
\"emoji\": true | ||
} | ||
}, | ||
{ | ||
\"type\": \"divider\" | ||
}, | ||
{ | ||
\"type\": \"section\", | ||
\"fields\": [ | ||
{ | ||
\"type\": \"mrkdwn\", | ||
\"text\": \"*Cluster*\\n{{ .Values.clusterName | default "unknown"}}\" | ||
}, | ||
{ | ||
\"type\": \"mrkdwn\", | ||
\"text\": \"*Namespace*\\n{{ "{{" }}workflow.namespace{{ "}}" }}\" | ||
}, | ||
{ | ||
\"type\": \"mrkdwn\", | ||
\"text\": \"*Duration*\\n{{ "{{" }}workflow.duration{{ "}}" }} sec\" | ||
}, | ||
{ | ||
\"type\": \"mrkdwn\", | ||
\"text\": \"*Link*\\n<{{required "exitNotifications.slackApp.portalDomain must be provided" $slackApp.portalDomain}}/workflows/{{ "{{" }}workflow.namespace{{ "}}" }}/{{ "{{" }}workflow.name{{ "}}" }}?tab=workflow|View>\" | ||
} | ||
] | ||
} | ||
] | ||
} | ||
]}' | ||
{{ required "exitNotifications.slackApp.webhookUrl must be provided" $slackApp.webhookUrl }}" | ||
] | ||
- name: notice-slack-app-failed | ||
container: | ||
image: curlimages/curl | ||
command: [sh, -c] | ||
args: [ | ||
"curl -X POST -H 'Content-type: application/json' --data '{\"attachments\": [ | ||
{ | ||
\"color\": \"#E01E5A\", | ||
\"blocks\": [ | ||
{ | ||
\"type\": \"header\", | ||
\"text\": { | ||
\"type\": \"plain_text\", | ||
\"text\": \"Workflow Failed - {{ "{{" }}workflow.name{{ "}}" }}\", | ||
\"emoji\": true | ||
} | ||
}, | ||
{ | ||
\"type\": \"divider\" | ||
}, | ||
{ | ||
\"type\": \"section\", | ||
\"fields\": [ | ||
{ | ||
\"type\": \"mrkdwn\", | ||
\"text\": \"*Cluster*\\n{{ .Values.clusterName | default "unknown"}}\" | ||
}, | ||
{ | ||
\"type\": \"mrkdwn\", | ||
\"text\": \"*Namespace*\\n{{ "{{" }}workflow.namespace{{ "}}" }}\" | ||
}, | ||
{ | ||
\"type\": \"mrkdwn\", | ||
\"text\": \"*Duration*\\n{{ "{{" }}workflow.duration{{ "}}" }} sec\" | ||
}, | ||
{ | ||
\"type\": \"mrkdwn\", | ||
\"text\": \"*Link*\\n<{{required "exitNotifications.slackApp.portalDomain must be provided" $slackApp.portalDomain}}/workflows/{{ "{{" }}workflow.namespace{{ "}}" }}/{{ "{{" }}workflow.name{{ "}}" }}?tab=workflow|View>\" | ||
} | ||
] | ||
} | ||
] | ||
} | ||
]}' | ||
{{ required "exitNotifications.slackApp.webhookUrl must be provided" $slackApp.webhookUrl }}" | ||
] | ||
{{- end -}} |
Oops, something went wrong.