Skip to content

Commit

Permalink
Merge branch 'main' into reduce-fp-java-inspection
Browse files Browse the repository at this point in the history
  • Loading branch information
AvihuHenya authored Feb 11, 2025
2 parents 33946bd + 730cdc5 commit 0fc6c59
Show file tree
Hide file tree
Showing 16 changed files with 144 additions and 60 deletions.
13 changes: 10 additions & 3 deletions api/config/crd/bases/odigos.io_instrumentationconfigs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@ spec:
description: determines if odigos should inject agents to pods of
this workload.
type: boolean
agentsMetaHash:
description: |-
this hash is used to determine the deployment of the agents.
e.g. when the distro for container changes, or it's compatibility version,
or something else that requires rollout, the hash change will indicate that.
if the hash is empty, it means that no agent should be enabled in any pod container.
type: string
containers:
description: configuration for each instrumented container in the
workload
Expand Down Expand Up @@ -611,9 +618,9 @@ spec:
type: array
workloadRolloutHash:
description: |-
The hash used to determine whether the associated workload needs to be rolled out.
This hash is calculated based on the containers config array and takes into account the
container name, Instrumented flag and the OTel distro name.
This hash is recorded only after the rollout took place.
it allows us to determine if the workload needs to be rollout based on previous rollout and the current config.
if this field is different than the spec.AgentsDeploymentHash it means rollout is needed or not yet updated.
type: string
type: object
type: object
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions api/k8sconsts/resources.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
package k8sconsts

const (
// OdigosAgentsMetaHashLabel is used to label pods being instrumented.
// It can be used to count the number of instrumented pods for a workload and whether they are up to date
// with the expected agents.
OdigosAgentsMetaHashLabel = "odigos.io/agents-meta-hash"

// OdigosCollectorRoleLabel is the label used to identify the role of the Odigos collector.
OdigosCollectorRoleLabel = "odigos.io/collector-role"

Expand Down
12 changes: 9 additions & 3 deletions api/odigos/v1alpha1/instrumentationconfig_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,9 +179,9 @@ type InstrumentationConfigStatus struct {
// Represents the observations of a InstrumentationConfig's current state.
Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" protobuf:"bytes,1,rep,name=conditions"`

// The hash used to determine whether the associated workload needs to be rolled out.
// This hash is calculated based on the containers config array and takes into account the
// container name, Instrumented flag and the OTel distro name.
// This hash is recorded only after the rollout took place.
// it allows us to determine if the workload needs to be rollout based on previous rollout and the current config.
// if this field is different than the spec.AgentsDeploymentHash it means rollout is needed or not yet updated.
WorkloadRolloutHash string `json:"workloadRolloutHash,omitempty"`
}

Expand Down Expand Up @@ -230,6 +230,12 @@ type InstrumentationConfigSpec struct {
// configuration for each instrumented container in the workload
Containers []ContainerAgentConfig `json:"containers,omitempty"`

// this hash is used to determine the deployment of the agents.
// e.g. when the distro for container changes, or it's compatibility version,
// or something else that requires rollout, the hash change will indicate that.
// if the hash is empty, it means that no agent should be enabled in any pod container.
AgentsMetaHash string `json:"agentsMetaHash,omitempty"`

// Configuration for the OpenTelemetry SDKs that this workload should use.
// The SDKs are identified by the programming language they are written in.
// TODO: consider adding more granular control over the SDKs, such as community/enterprise, native/ebpf.
Expand Down
5 changes: 5 additions & 0 deletions cli/cmd/resources/instrumentor.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,11 @@ func NewInstrumentorClusterRole(ownerPermissionEnforcement bool) *rbacv1.Cluster
Resources: []string{"namespaces"},
Verbs: []string{"list", "watch", "get"},
},
{ // reconcile rollouts and instrumentation delpoyment status by actual odigos pods
APIGroups: []string{""},
Resources: []string{"pods"},
Verbs: []string{"get", "list", "watch"},
},
{ // Read instrumentation labels from daemonsets and apply pod spec changes
APIGroups: []string{"apps"},
Resources: []string{"daemonsets"},
Expand Down
2 changes: 1 addition & 1 deletion distros/yamls/java-ebpf-instrumentations.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ spec:
delimiter: ' '
runtimeAgent:
directoryNames:
- "{{ODIGOS_AGENTS_DIR}}/java-ebpf"
# - "{{ODIGOS_AGENTS_DIR}}/java-ebpf"
2 changes: 1 addition & 1 deletion distros/yamls/nodejs-enterprise.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ spec:
delimiter: ' '
runtimeAgent:
directoryNames:
- "{{ODIGOS_AGENTS_DIR}}/nodejs-ebpf"
# - "{{ODIGOS_AGENTS_DIR}}/nodejs-ebpf"
20 changes: 17 additions & 3 deletions docs/api-reference/odigos.io.v1alpha1.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -1297,6 +1297,20 @@ The workload is identified by the owner reference</p>
</tr>
<tr>
<td>
<code>agentsMetaHash</code> <B>[Required]</B>
</td>
<td>
<code>string</code>
</td>
<td>
<p>this hash is used to determine the deployment of the agents.
e.g. when the distro for container changes, or it's compatibility version,
or something else that requires rollout, the hash change will indicate that.
if the hash is empty, it means that no agent should be enabled in any pod container.</p>
</td>
</tr>
<tr>
<td>
<code>sdkConfigs</code> <B>[Required]</B>
</td>
<td>
Expand Down Expand Up @@ -1353,9 +1367,9 @@ TODO: consider adding more granular control over the SDKs, such as community/ent
<code>string</code>
</td>
<td>
<p>The hash used to determine whether the associated workload needs to be rolled out.
This hash is calculated based on the containers config array and takes into account the
container name, Instrumented flag and the OTel distro name.</p>
<p>This hash is recorded only after the rollout took place.
it allows us to determine if the workload needs to be rollout based on previous rollout and the current config.
if this field is different than the spec.AgentsDeploymentHash it means rollout is needed or not yet updated.</p>
</td>
</tr>
</tbody>
Expand Down
13 changes: 10 additions & 3 deletions helm/odigos/templates/crds/odigos.io_instrumentationconfigs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@ spec:
description: determines if odigos should inject agents to pods of
this workload.
type: boolean
agentsMetaHash:
description: |-
this hash is used to determine the deployment of the agents.
e.g. when the distro for container changes, or it's compatibility version,
or something else that requires rollout, the hash change will indicate that.
if the hash is empty, it means that no agent should be enabled in any pod container.
type: string
containers:
description: configuration for each instrumented container in the
workload
Expand Down Expand Up @@ -611,9 +618,9 @@ spec:
type: array
workloadRolloutHash:
description: |-
The hash used to determine whether the associated workload needs to be rolled out.
This hash is calculated based on the containers config array and takes into account the
container name, Instrumented flag and the OTel distro name.
This hash is recorded only after the rollout took place.
it allows us to determine if the workload needs to be rollout based on previous rollout and the current config.
if this field is different than the spec.AgentsDeploymentHash it means rollout is needed or not yet updated.
type: string
type: object
type: object
Expand Down
8 changes: 8 additions & 0 deletions helm/odigos/templates/instrumentor/clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@ rules:
- list
- watch
- get
- apiGroups:
- ''
resources:
- pods
verbs:
- list
- watch
- get
- apiGroups:
- apps
resources:
Expand Down
12 changes: 9 additions & 3 deletions instrumentor/controllers/agentenabled/pods_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ func (p *PodsWebhook) Default(ctx context.Context, obj runtime.Object) error {
return nil
}

// store the agents deployment value so we can later associate each pod with the instrumentation version.
// we can pull only our pods into cache, and follow the lifecycle of the instrumentation process.
pod.Labels[k8sconsts.OdigosAgentsMetaHashLabel] = ic.Spec.AgentsMetaHash

return nil
}

Expand Down Expand Up @@ -172,9 +176,11 @@ func (p *PodsWebhook) injectOdigosInstrumentation(ctx context.Context, pod *core
}

// amir: 07 feb 2025. hard-coded temporary list which is removed once all distros migrate away from device
if (runtimeDetails.Language == common.JavascriptProgrammingLanguage && otelSdk == common.OtelSdkEbpfEnterprise) ||
(runtimeDetails.Language == common.GoProgrammingLanguage && otelSdk == common.OtelSdkEbpfCommunity) ||
(runtimeDetails.Language == common.JavaProgrammingLanguage && otelSdk == common.OtelSdkEbpfEnterprise) ||
// amir: 11 feb 2025. reverted java and nodejs enterprise temporarily
if
// (runtimeDetails.Language == common.JavascriptProgrammingLanguage && otelSdk == common.OtelSdkEbpfEnterprise) ||
(runtimeDetails.Language == common.GoProgrammingLanguage && otelSdk == common.OtelSdkEbpfCommunity) ||
// (runtimeDetails.Language == common.JavaProgrammingLanguage && otelSdk == common.OtelSdkEbpfEnterprise) ||
(runtimeDetails.Language == common.MySQLProgrammingLanguage && otelSdk == common.OtelSdkEbpfEnterprise) {
// Skip device injection for distros that no longer use it
} else {
Expand Down
17 changes: 11 additions & 6 deletions instrumentor/controllers/agentenabled/rollout/hash.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,22 @@ import (
"bytes"
"crypto/sha256"
"encoding/gob"
"encoding/hex"
"slices"
"strings"

odigosv1alpha1 "github.com/odigos-io/odigos/api/odigos/v1alpha1"
odigosv1 "github.com/odigos-io/odigos/api/odigos/v1alpha1"
)

func hashForContainersConfig(containersConfig []odigosv1alpha1.ContainerAgentConfig) ([]byte, error) {
func HashForContainersConfig(containersConfig []odigosv1.ContainerAgentConfig) (string, error) {
if len(containersConfig) == 0 {
return []byte{}, nil
return "", nil
}

// sort the entries by container name
// then hash the body of each entry
// this is required to ensure that the hash is consistent
slices.SortFunc(containersConfig, func(i, j odigosv1alpha1.ContainerAgentConfig) int {
slices.SortFunc(containersConfig, func(i, j odigosv1.ContainerAgentConfig) int {
return strings.Compare(i.ContainerName, j.ContainerName)
})

Expand All @@ -42,11 +43,15 @@ func hashForContainersConfig(containersConfig []odigosv1alpha1.ContainerAgentCon
}
err = enc.Encode(configEntry)
if err != nil {
return nil, err
return "", err
}
hash.Write(buf.Bytes())
buf.Reset()
}

return hash.Sum(nil), nil
hashBytes := hash.Sum(nil)
// limit the hash to 16 characters, as it's written into k8s resources and annotations
shortHash := hashBytes[:8]
hashAsHex := hex.EncodeToString(shortHash)
return hashAsHex, nil
}
21 changes: 10 additions & 11 deletions instrumentor/controllers/agentenabled/rollout/hash_test.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package rollout

import (
"encoding/hex"
"testing"

"github.com/stretchr/testify/assert"
Expand All @@ -11,9 +10,9 @@ import (

func TestHashForContainersConfig(t *testing.T) {
// empty containers config
hash, err := hashForContainersConfig([]odigosv1alpha1.ContainerAgentConfig{})
hashText, err := HashForContainersConfig([]odigosv1alpha1.ContainerAgentConfig{})
assert.NoError(t, err)
assert.Equal(t, len(hash), 0)
assert.Equal(t, len(hashText), 0)

containersConfig := []odigosv1alpha1.ContainerAgentConfig{
{
Expand All @@ -28,9 +27,9 @@ func TestHashForContainersConfig(t *testing.T) {
},
}

hash, err = hashForContainersConfig(containersConfig)
hashText, err = HashForContainersConfig(containersConfig)
assert.NoError(t, err)
assert.Greater(t, len(hash), 0)
assert.Greater(t, len(hashText), 0)

// flip the order of the containers and check if the hash is the same

Expand All @@ -47,22 +46,22 @@ func TestHashForContainersConfig(t *testing.T) {
},
}

hash2, err := hashForContainersConfig(containersConfig)
hash2, err := HashForContainersConfig(containersConfig)
assert.NoError(t, err)
assert.Greater(t, len(hash2), 0)
assert.Equal(t, hex.EncodeToString(hash), hex.EncodeToString(hash2))
assert.Equal(t, hashText, hash2)

// flip the false instrumented flag and check if the hash is different
containersConfig[1].AgentEnabled = true
hash3, err := hashForContainersConfig(containersConfig)
hash3, err := HashForContainersConfig(containersConfig)
assert.NoError(t, err)
assert.Greater(t, len(hash3), 0)
assert.NotEqual(t, hex.EncodeToString(hash), hex.EncodeToString(hash3))
assert.NotEqual(t, hashText, hash3)

// change the distro name and check if the hash is different
containersConfig[1].OtelDistroName = "otel-distro-3"
hash4, err := hashForContainersConfig(containersConfig)
hash4, err := HashForContainersConfig(containersConfig)
assert.NoError(t, err)
assert.Greater(t, len(hash4), 0)
assert.NotEqual(t, hex.EncodeToString(hash), hex.EncodeToString(hash4))
assert.NotEqual(t, hashText, hash4)
}
25 changes: 1 addition & 24 deletions instrumentor/controllers/agentenabled/rollout/rollout.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package rollout

import (
"context"
"encoding/hex"
"errors"
"fmt"
"time"
Expand Down Expand Up @@ -49,11 +48,7 @@ func Do(ctx context.Context, c client.Client, ic *odigosv1alpha1.Instrumentation
}

savedRolloutHash := ic.Status.WorkloadRolloutHash
newRolloutHash, err := configHash(ic)
if err != nil {
logger.Error(err, "error calculating rollout hash")
return false, ctrl.Result{}, nil
}
newRolloutHash := ic.Spec.AgentsMetaHash

if savedRolloutHash == newRolloutHash {
return false, ctrl.Result{}, nil
Expand Down Expand Up @@ -181,24 +176,6 @@ func isWorkloadRolloutDone(obj client.Object) bool {
}
}

// configHash calculates a hash for the instrumentation config, based on the containers configuration
// if agent injection is enabled, the hash is based on the containers configuration
// if agent injection is disabled, the hash is empty
// the reason for this is to avoid unnecessary rollouts when agent injection is disabled
// (e,g the transition from empty config to a disabled one should not trigger a rollout)
func configHash(ic *odigosv1alpha1.InstrumentationConfig) (string, error) {
if !ic.Spec.AgentInjectionEnabled {
return "", nil
}

newRolloutHashBytes, err := hashForContainersConfig(ic.Spec.Containers)
if err != nil {
return "", err
}

return hex.EncodeToString(newRolloutHashBytes), nil
}

func rolloutCondition(rolloutErr error) metav1.Condition {
cond := metav1.Condition{
Type: odigosv1alpha1.WorkloadRolloutStatusConditionType,
Expand Down
7 changes: 7 additions & 0 deletions instrumentor/controllers/agentenabled/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ func updateInstrumentationConfigSpec(ctx context.Context, c client.Client, pw k8
prerequisiteCompleted, reason, message := isReadyForInstrumentation(cg, ic)
if !prerequisiteCompleted {
ic.Spec.AgentInjectionEnabled = false
ic.Spec.AgentsMetaHash = ""
ic.Spec.Containers = []odigosv1.ContainerAgentConfig{}
return &agentInjectedStatusCondition{
Status: metav1.ConditionUnknown,
Expand Down Expand Up @@ -170,6 +171,11 @@ func updateInstrumentationConfigSpec(ctx context.Context, c client.Client, pw k8
// if any instrumented containers are found, the pods webhook should process pods for this workload.
// set the AgentInjectionEnabled to true to signal that.
ic.Spec.AgentInjectionEnabled = true
agentsDeploymentHash, err := rollout.HashForContainersConfig(containersConfig)
if err != nil {
return nil, err
}
ic.Spec.AgentsMetaHash = string(agentsDeploymentHash)
return &agentInjectedStatusCondition{
Status: metav1.ConditionTrue,
Reason: odigosv1.AgentEnabledReasonEnabledSuccessfully,
Expand All @@ -179,6 +185,7 @@ func updateInstrumentationConfigSpec(ctx context.Context, c client.Client, pw k8
// if none of the containers are instrumented, we can set the status to false
// to signal to the webhook that those pods should not be processed.
ic.Spec.AgentInjectionEnabled = false
ic.Spec.AgentsMetaHash = ""
return aggregatedCondition, nil
}
}
Expand Down
Loading

0 comments on commit 0fc6c59

Please sign in to comment.