diff --git a/.chloggen/bump-tempo.yaml b/.chloggen/bump-tempo.yaml new file mode 100755 index 000000000..dacd0fc4d --- /dev/null +++ b/.chloggen/bump-tempo.yaml @@ -0,0 +1,20 @@ +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: breaking + +# The name of the component, or a single word describing the area of concern, (e.g. operator, github action) +component: operator + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Update Tempo to 2.5.0 + +# One or more tracking issues related to the change +issues: [958] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: | + Upstream Tempo 2.5.0 image switched user from `root` to `tempo` (10001:10001) and ownership of `/var/tempo`. + Therefore ingester's `/var/tempo/wal` created by previous deployment using Tempo 2.4.1 needs to be updated and + changed ownership. The operator upgrades the `/var/tempo` ownership by deploying a `job` with `securityContext.runAsUser(0)` + and it runs `chown -R /var/tempo 10001:10001`. diff --git a/Dockerfile b/Dockerfile index d21a8b058..f16cb3f00 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,6 +18,7 @@ COPY . . # Build ARG OPERATOR_VERSION +ARG TEMPO_VERSION RUN make build # Use distroless as minimal base image to package the manager binary diff --git a/Makefile b/Makefile index 8df75f590..840c6a352 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # Current Operator version OPERATOR_VERSION ?= 0.10.0 -TEMPO_VERSION ?= 2.4.1 -TEMPO_QUERY_VERSION ?= 2.4.1 +TEMPO_VERSION ?= 2.5.0 +TEMPO_QUERY_VERSION ?= 2.5.0 TEMPO_GATEWAY_VERSION ?= main-2024-05-29-ca8d2de TEMPO_GATEWAY_OPA_VERSION ?= main-2024-04-29-914c13f OAUTH_PROXY_VERSION=4.12 @@ -159,7 +159,7 @@ run: manifests generate ## Run a controller from your host. .PHONY: docker-build docker-build: ## Build docker image with the manager. - docker buildx build --load --platform linux/${ARCH} --build-arg OPERATOR_VERSION -t ${IMG} . + docker buildx build --load --platform linux/${ARCH} --build-arg OPERATOR_VERSION --build-arg TEMPO_VERSION -t ${IMG} . .PHONY: docker-push docker-push: ## Push docker image with the manager. diff --git a/bundle/community/manifests/tempo-operator.clusterserviceversion.yaml b/bundle/community/manifests/tempo-operator.clusterserviceversion.yaml index 4596e5fd6..a36cd0ae1 100644 --- a/bundle/community/manifests/tempo-operator.clusterserviceversion.yaml +++ b/bundle/community/manifests/tempo-operator.clusterserviceversion.yaml @@ -74,7 +74,7 @@ metadata: capabilities: Deep Insights categories: Logging & Tracing,Monitoring containerImage: ghcr.io/grafana/tempo-operator/tempo-operator:v0.10.0 - createdAt: "2024-06-28T12:21:20Z" + createdAt: "2024-07-04T12:01:25Z" description: Create and manage deployments of Tempo, a high-scale distributed tracing backend. operatorframework.io/cluster-monitoring: "true" @@ -1190,6 +1190,15 @@ spec: - deployments/finalizers verbs: - update + - apiGroups: + - batch + resources: + - jobs + verbs: + - create + - get + - list + - watch - apiGroups: - config.openshift.io resources: @@ -1198,6 +1207,13 @@ spec: - get - list - watch + - apiGroups: + - "" + resources: + - persistentvolumeclaims + verbs: + - list + - watch - apiGroups: - grafana.integreatly.org resources: @@ -1366,9 +1382,9 @@ spec: - --config=controller_manager_config.yaml env: - name: RELATED_IMAGE_TEMPO - value: docker.io/grafana/tempo:2.4.1 + value: docker.io/grafana/tempo:2.5.0 - name: RELATED_IMAGE_TEMPO_QUERY - value: docker.io/grafana/tempo-query:2.4.1 + value: docker.io/grafana/tempo-query:2.5.0 - name: RELATED_IMAGE_TEMPO_GATEWAY value: quay.io/observatorium/api:main-2024-05-29-ca8d2de - name: RELATED_IMAGE_TEMPO_GATEWAY_OPA @@ -1513,9 +1529,9 @@ spec: provider: name: Grafana Tempo Operator SIG relatedImages: - - image: docker.io/grafana/tempo:2.4.1 + - image: docker.io/grafana/tempo:2.5.0 name: tempo - - image: docker.io/grafana/tempo-query:2.4.1 + - image: docker.io/grafana/tempo-query:2.5.0 name: tempo-query - image: quay.io/observatorium/api:main-2024-05-29-ca8d2de name: tempo-gateway diff --git a/bundle/openshift/manifests/tempo-operator.clusterserviceversion.yaml b/bundle/openshift/manifests/tempo-operator.clusterserviceversion.yaml index 43bb2261a..278e01ed5 100644 --- a/bundle/openshift/manifests/tempo-operator.clusterserviceversion.yaml +++ b/bundle/openshift/manifests/tempo-operator.clusterserviceversion.yaml @@ -74,7 +74,7 @@ metadata: capabilities: Deep Insights categories: Logging & Tracing,Monitoring containerImage: ghcr.io/grafana/tempo-operator/tempo-operator:v0.10.0 - createdAt: "2024-06-28T12:21:19Z" + createdAt: "2024-07-04T12:01:24Z" description: Create and manage deployments of Tempo, a high-scale distributed tracing backend. operatorframework.io/cluster-monitoring: "true" @@ -1200,6 +1200,15 @@ spec: - deployments/finalizers verbs: - update + - apiGroups: + - batch + resources: + - jobs + verbs: + - create + - get + - list + - watch - apiGroups: - config.openshift.io resources: @@ -1208,6 +1217,13 @@ spec: - get - list - watch + - apiGroups: + - "" + resources: + - persistentvolumeclaims + verbs: + - list + - watch - apiGroups: - grafana.integreatly.org resources: @@ -1376,9 +1392,9 @@ spec: - --config=controller_manager_config.yaml env: - name: RELATED_IMAGE_TEMPO - value: docker.io/grafana/tempo:2.4.1 + value: docker.io/grafana/tempo:2.5.0 - name: RELATED_IMAGE_TEMPO_QUERY - value: docker.io/grafana/tempo-query:2.4.1 + value: docker.io/grafana/tempo-query:2.5.0 - name: RELATED_IMAGE_TEMPO_GATEWAY value: quay.io/observatorium/api:main-2024-05-29-ca8d2de - name: RELATED_IMAGE_TEMPO_GATEWAY_OPA @@ -1534,9 +1550,9 @@ spec: provider: name: Grafana Tempo Operator SIG relatedImages: - - image: docker.io/grafana/tempo:2.4.1 + - image: docker.io/grafana/tempo:2.5.0 name: tempo - - image: docker.io/grafana/tempo-query:2.4.1 + - image: docker.io/grafana/tempo-query:2.5.0 name: tempo-query - image: quay.io/observatorium/api:main-2024-05-29-ca8d2de name: tempo-gateway diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 9f9cc8cfe..bc9c24d14 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -40,9 +40,9 @@ spec: - --leader-elect env: - name: RELATED_IMAGE_TEMPO - value: docker.io/grafana/tempo:2.4.1 + value: docker.io/grafana/tempo:2.5.0 - name: RELATED_IMAGE_TEMPO_QUERY - value: docker.io/grafana/tempo-query:2.4.1 + value: docker.io/grafana/tempo-query:2.5.0 - name: RELATED_IMAGE_TEMPO_GATEWAY value: quay.io/observatorium/api:main-2024-05-29-ca8d2de - name: RELATED_IMAGE_TEMPO_GATEWAY_OPA diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index c9b71324e..5cade67bd 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -46,6 +46,15 @@ rules: - deployments/finalizers verbs: - update +- apiGroups: + - batch + resources: + - jobs + verbs: + - create + - get + - list + - watch - apiGroups: - config.openshift.io resources: @@ -54,6 +63,13 @@ rules: - get - list - watch +- apiGroups: + - "" + resources: + - persistentvolumeclaims + verbs: + - list + - watch - apiGroups: - grafana.integreatly.org resources: diff --git a/controllers/tempo/tempostack_controller.go b/controllers/tempo/tempostack_controller.go index 4405c9965..3d82a0539 100644 --- a/controllers/tempo/tempostack_controller.go +++ b/controllers/tempo/tempostack_controller.go @@ -59,6 +59,10 @@ type TempoStackReconciler struct { // +kubebuilder:rbac:groups=monitoring.coreos.com,resources=servicemonitors;prometheusrules,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=grafana.integreatly.org,resources=grafanadatasources,verbs=get;list;watch;create;update;patch;delete +// Upgrate for 0.11.0 to Tempo 2.5 +// +kubebuilder:rbac:groups="core",resources=persistentvolumeclaims,verbs=list;watch +// +kubebuilder:rbac:groups=batch,resources=jobs,verbs=get;list;watch;create + //+kubebuilder:rbac:groups=tempo.grafana.com,resources=tempostacks,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=tempo.grafana.com,resources=tempostacks/status,verbs=get;update;patch //+kubebuilder:rbac:groups=tempo.grafana.com,resources=tempostacks/finalizers,verbs=update diff --git a/internal/upgrade/v0_11_0.go b/internal/upgrade/v0_11_0.go new file mode 100644 index 000000000..c7040d5df --- /dev/null +++ b/internal/upgrade/v0_11_0.go @@ -0,0 +1,198 @@ +package upgrade + +import ( + "context" + "fmt" + "time" + + appsv1 "k8s.io/api/apps/v1" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/grafana/tempo-operator/apis/tempo/v1alpha1" + "github.com/grafana/tempo-operator/internal/manifests/manifestutils" + "github.com/grafana/tempo-operator/internal/manifests/naming" +) + +const ( + pollInterval = 2 * time.Second + pollTimeout = 5 * time.Minute +) + +// Upstream Tempo 2.5.0 has a breaking change https://github.com/grafana/tempo/releases/tag/v2.5.0 +// The /var/tempo is created in the dockerfile with 10001:10001 +// The user is changed to 10001:10001 +// The previous user in 2.4.2 was root (0) +// The Red Hat Tempo image does not use root user (it uses 1001) and on OpenShift the /var/tempo PV has a different fsGroup +// so the issue does not happen on OpenShift. +func upgrade0_11_0(ctx context.Context, u Upgrade, tempo *v1alpha1.TempoStack) error { + // do nothing on OpenShift + if u.CtrlConfig.Gates.OpenShift.OpenShiftRoute { + return nil + } + + image := tempo.Spec.Images.Tempo + if image == "" { + image = u.CtrlConfig.DefaultImages.Tempo + } + + listOps := []client.ListOption{ + client.MatchingLabels(manifestutils.ComponentLabels(manifestutils.IngesterComponentName, tempo.Name)), + } + pvcs := &corev1.PersistentVolumeClaimList{} + err := u.Client.List(ctx, pvcs, listOps...) + if err != nil { + return err + } + if len(pvcs.Items) == 0 { + return nil + } + + err = scale_down_ingester(ctx, u, client.ObjectKey{Namespace: tempo.GetNamespace(), Name: naming.Name(manifestutils.IngesterComponentName, tempo.GetName())}) + if err != nil { + return err + } + + return chown_pvcs(ctx, u, tempo, tempo.Spec.Template.Ingester.NodeSelector, image, pvcs) +} + +func upgrade0_11_0_monolithic(ctx context.Context, u Upgrade, tempo *v1alpha1.TempoMonolithic) error { + // do nothing on OpenShift + if u.CtrlConfig.Gates.OpenShift.OpenShiftRoute { + return nil + } + + listOps := []client.ListOption{ + client.MatchingLabels(manifestutils.ComponentLabels(manifestutils.TempoMonolithComponentName, tempo.Name)), + } + pvcs := &corev1.PersistentVolumeClaimList{} + err := u.Client.List(ctx, pvcs, listOps...) + if err != nil { + return err + } + if len(pvcs.Items) == 0 { + return nil + } + + err = scale_down_ingester(ctx, u, client.ObjectKey{Namespace: tempo.GetNamespace(), Name: naming.Name(manifestutils.TempoMonolithComponentName, tempo.GetName())}) + if err != nil { + return err + } + + return chown_pvcs(ctx, u, tempo, tempo.Spec.NodeSelector, u.CtrlConfig.DefaultImages.Tempo, pvcs) +} + +func scale_down_ingester(ctx context.Context, u Upgrade, ingesterQuery client.ObjectKey) error { + ingester := &appsv1.StatefulSet{} + err := u.Client.Get(ctx, ingesterQuery, ingester) + if err != nil { + // ingester does not exist, maybe scaled down? + if client.IgnoreNotFound(err) == nil { + return nil + } + return err + } + + patch := ingester.DeepCopy() + zero := int32(0) + patch.Spec.Replicas = &zero + err = u.Client.Patch(ctx, patch, client.MergeFrom(ingester)) + if err != nil { + return err + } + + return wait.PollUntilContextTimeout(ctx, pollInterval, pollTimeout, true, func(ctx context.Context) (done bool, err error) { + ingester := &appsv1.StatefulSet{} + err = u.Client.Get(ctx, ingesterQuery, ingester) + if err != nil { + return false, err + } + if ingester.Status.Replicas == 0 { + return true, nil + } + + return false, nil + }) +} + +func chown_pvcs(ctx context.Context, u Upgrade, tempo metav1.Object, nodeSelector map[string]string, image string, pvcs *corev1.PersistentVolumeClaimList) error { + var volumes []corev1.Volume + var volumeMounts []corev1.VolumeMount + for _, pvc := range pvcs.Items { + volumes = append(volumes, corev1.Volume{ + Name: pvc.Name, + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: pvc.Name, + }, + }, + }) + + volumeMounts = append(volumeMounts, corev1.VolumeMount{ + Name: pvc.Name, + MountPath: fmt.Sprintf("/var/tempo/%s", pvc.Name), + }) + } + + // keep the jobs around for 1 day + ttl := int32(60 * 60 * 24) + rootUser := int64(0) + upgradeJob := batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("chown-%s", tempo.GetName()), + Namespace: tempo.GetNamespace(), + }, + Spec: batchv1.JobSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + // Make sure the job runs on the same node as ingester + NodeSelector: nodeSelector, + ServiceAccountName: naming.DefaultServiceAccountName(tempo.GetName()), + Volumes: volumes, + Containers: []corev1.Container{ + { + Name: "chown", + Image: image, + Command: []string{"chown", "-R", "10001:10001", "/var/tempo"}, + VolumeMounts: volumeMounts, + }, + }, + RestartPolicy: corev1.RestartPolicyNever, + SecurityContext: &corev1.PodSecurityContext{ + RunAsUser: &rootUser, + }, + }, + }, + TTLSecondsAfterFinished: &ttl, + }, + } + + if err := ctrl.SetControllerReference(tempo, &upgradeJob, u.Client.Scheme()); err != nil { + return err + } + err := u.Client.Create(ctx, &upgradeJob) + if err != nil { + return err + } + return wait.PollUntilContextTimeout(ctx, pollInterval, pollTimeout, true, func(ctx context.Context) (done bool, err error) { + job := &batchv1.Job{} + objectKey := client.ObjectKey{ + Namespace: upgradeJob.Namespace, + Name: upgradeJob.Name, + } + err = u.Client.Get(ctx, objectKey, job) + if err != nil { + return false, err + } + if job.Status.Succeeded == 1 { + return true, nil + } + + return false, nil + }) + +} diff --git a/internal/upgrade/versions.go b/internal/upgrade/versions.go index 2c0ad6a0b..c4d3386e5 100644 --- a/internal/upgrade/versions.go +++ b/internal/upgrade/versions.go @@ -45,5 +45,10 @@ var ( version: *semver.MustParse("0.8.0"), upgradeTempoStack: upgrade0_8_0, }, + { + version: *semver.MustParse("0.11.0"), + upgradeTempoStack: upgrade0_11_0, + upgradeTempoMonolithic: upgrade0_11_0_monolithic, + }, } )