Skip to content

Commit

Permalink
Use Prometheus exporter
Browse files Browse the repository at this point in the history
Rename pkg metric to metrics.
Use observable counter instead of a counter since observable instrument
is more adapted for this case according to the open telemetry
documentation.
otpl exporter replaced by the prometheus via a http server.
Open Telemetry has to be updated to fix bugs related to temporality of
the metrics.
  • Loading branch information
LionelJouin committed Oct 19, 2023
1 parent 5f404d0 commit 2e277c7
Show file tree
Hide file tree
Showing 16 changed files with 281 additions and 403 deletions.
5 changes: 2 additions & 3 deletions cmd/stateless-lb/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,8 @@ type Config struct {
GRPCKeepaliveTime time.Duration `default:"30s" desc:"gRPC keepalive timeout"`
GRPCProbeRPCTimeout time.Duration `default:"1s" desc:"RPC timeout of internal gRPC health probe" envconfig:"grpc_probe_rpc_timeout"`
GRPCMaxBackoff time.Duration `default:"5s" desc:"Upper bound on gRPC connection backoff delay" envconfig:"grpc_max_backoff"`
OTCollectorEnabled bool `default:"false" desc:"Open Telemetry Collector Service" envconfig:"OT_COLLECTOR_ENABLED"`
OTCollectorService string `default:"ot-collector.default:4317" desc:"Open Telemetry Collector Service" envconfig:"OT_COLLECTOR_SERVICE"`
OTCollectorInterval time.Duration `default:"30s" desc:"Open Telemetry Interval of data collection" envconfig:"OT_COLLECTOR_INTERVAL"`
MetricsEnabled bool `default:"false" desc:"Enable the metrics collection" split_words:"true"`
MetricsPort int `default:"2223" desc:"Specify the port used to expose the metrics" split_words:"true"`
}

// IsValid checks if the configuration is valid
Expand Down
49 changes: 30 additions & 19 deletions cmd/stateless-lb/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ import (
"github.com/nordix/meridio/pkg/loadbalancer/stream"
"github.com/nordix/meridio/pkg/loadbalancer/types"
"github.com/nordix/meridio/pkg/log"
"github.com/nordix/meridio/pkg/metric"
"github.com/nordix/meridio/pkg/metrics"
"github.com/nordix/meridio/pkg/nat"
"github.com/nordix/meridio/pkg/networking"
"github.com/nordix/meridio/pkg/nsm"
Expand Down Expand Up @@ -247,33 +247,44 @@ func main() {
fns := NewFrontendNetworkService(ctx, targetRegistryClient, ep, NewServiceControlDispatcher(sns))
go fns.Start()

if config.OTCollectorEnabled {
_, err = metric.Init(
ctx,
metric.WithGRPCKeepaliveTime(config.GRPCKeepaliveTime),
metric.WithOTCollectorService(config.OTCollectorService),
metric.WithOTCollectorInterval(config.OTCollectorInterval),
)
if err != nil {
log.Fatal(logger, "Unable to init metric collector", "error", err)
}
if config.MetricsEnabled {
func() {
_, err = metrics.Init(ctx)
if err != nil {
logger.Error(err, "Unable to init metrics collector")
cancel()
return
}

hostname, err := os.Hostname()
if err != nil {
log.Fatal(logger, "Unable to get hostname", "error", err)
}
hostname, err := os.Hostname()
if err != nil {
logger.Error(err, "Unable to get hostname")
cancel()
return
}

go func() {
err = flow.CollectMetrics(
ctx,
flow.WithHostname(hostname),
flow.WithTrenchName(config.TrenchName),
flow.WithConduitName(config.ConduitName),
flow.WithInterval(config.OTCollectorInterval/2),
)
if err != nil {
log.Fatal(logger, "Unable to start flow metric collector", "error", err)
logger.Error(err, "Unable to start flow metrics collector")
cancel()
return
}

metricsServer := metrics.Server{
IP: "",
Port: config.MetricsPort,
}
go func() {
err := metricsServer.Start(ctx)
if err != nil {
logger.Error(err, "Unable to start metrics server")
cancel()
}
}()
}()
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@ metadata:
name: stateless-lb-frontend
labels:
app: stateless-lb-frontend
app-type: stateless-lb-frontend
spec:
progressDeadlineSeconds: 600
selector:
matchLabels:
app: stateless-lb-frontend
app-type: stateless-lb-frontend
replicas: 2
revisionHistoryLimit: 10
strategy:
Expand All @@ -22,6 +24,7 @@ spec:
metadata:
labels:
app: stateless-lb-frontend
app-type: stateless-lb-frontend
"spiffe.io/spiffe-id": "true"
spec:
affinity:
Expand Down Expand Up @@ -49,6 +52,9 @@ spec:
- name: stateless-lb
image: {{ .Values.registry }}/{{ .Values.repository }}/{{ .Values.statelessLB.image }}:{{ .Values.version }}
imagePullPolicy: # Kubernetes default according to image tag
ports:
- name: metrics
containerPort: 2223
startupProbe: # will be filled by operator if not specified
exec:
command:
Expand Down Expand Up @@ -107,6 +113,8 @@ spec:
value: "10m"
- name: NSM_LOG_LEVEL
value: # to be filled by operator
- name: NSM_METRICS_ENABLED
value: "true"
volumeMounts:
- name: spire-agent-socket
mountPath: /run/spire/sockets
Expand Down
73 changes: 0 additions & 73 deletions docs/demo/deployments/optl-prometheus-grafana/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,76 +5,3 @@ helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm
helm repo update

helm install prometheus prometheus-community/kube-prometheus-stack

kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.11.0/cert-manager.yaml

sleep 5

while kubectl get pods --no-headers | awk '$3' | grep -v "Running" > /dev/null; do sleep 1; done

sleep 15

helm install opentelemetry-operator open-telemetry/opentelemetry-operator

sleep 15

while kubectl get pods --no-headers | awk '$3' | grep -v "Running" > /dev/null; do sleep 1; done

sleep 10

kubectl apply -f - <<EOF
apiVersion: opentelemetry.io/v1alpha1
kind: OpenTelemetryCollector
metadata:
name: ot
spec:
mode: deployment
ports:
- name: prometheus
port: 9464
targetPort: 9464
protocol: TCP
config: |
receivers:
jaeger:
protocols:
grpc:
otlp:
protocols:
grpc:
http:
processors:
exporters:
logging:
verbosity: detailed
prometheus:
endpoint: 0.0.0.0:9464
metric_expiration: 30s
service:
pipelines:
traces:
receivers: [ jaeger ]
processors: []
exporters: [ logging ]
metrics:
receivers: [ otlp ]
exporters: [ prometheus, logging ]
EOF

kubectl apply -f - <<EOF
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: ot-collector-service-monitor
labels:
release: prometheus
spec:
endpoints:
- port: prometheus
selector:
matchLabels:
app.kubernetes.io/name: "ot-collector"
EOF
24 changes: 17 additions & 7 deletions docs/observability/dashboard.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 27,
"id": 28,
"links": [],
"liveNow": false,
"panels": [
Expand Down Expand Up @@ -46,6 +46,7 @@
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
Expand Down Expand Up @@ -104,11 +105,15 @@
"type": "prometheus",
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "sum by(Flow, Stream, Conduit, Trench) (rate(meridio_conduit_stream_flow_matches_packets_total{service!=\"ot-collector-headless\"}[$__rate_interval]))",
"expr": "sum by(Flow, Stream, Conduit, Trench) (rate(meridio_conduit_stream_flow_matches_total[$__rate_interval]))",
"fullMetaSearch": false,
"includeNullMetadata": true,
"legendFormat": "{{Flow}}.{{Stream}}.{{Conduit}}.{{Trench}}",
"range": true,
"refId": "A"
"refId": "A",
"useBackend": false
}
],
"title": "Flow Match (packet per second)",
Expand Down Expand Up @@ -167,21 +172,25 @@
},
"showHeader": true
},
"pluginVersion": "9.5.2",
"pluginVersion": "10.1.4",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": false,
"expr": "meridio_conduit_stream_flow_matches_packets_total{service!=\"ot-collector-headless\"}",
"expr": "meridio_conduit_stream_flow_matches_total",
"format": "table",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "A"
"refId": "A",
"useBackend": false
}
],
"title": "Flow List",
Expand All @@ -200,6 +209,7 @@
"instance": true,
"job": true,
"namespace": true,
"otel_scope_name": true,
"pod": true,
"service": true
},
Expand Down Expand Up @@ -243,6 +253,6 @@
"timezone": "",
"title": "Meridio",
"uid": "f0339d9f-4744-441c-972b-f8b294fb7ff8",
"version": 5,
"version": 2,
"weekStart": ""
}
19 changes: 15 additions & 4 deletions docs/observability/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

## Metric List

### meridio.interface.`METRIC_TYPE`
### meridio.interface.`METRIC_TYPE` (Planned)

`METRIC_TYPE`: rx_packets, tx_packets, rx_bytes, tx_bytes, rx_errors, tx_errors, rx_dropped, tx_dropped

Expand All @@ -16,7 +16,7 @@ Counts number of `METRIC_TYPE` for a network interface.
* Attactor (optional)
* Interface Name

### meridio.conduit.stream.status
### meridio.conduit.stream.status (Planned)

Stream status in the conduit instance.

Expand All @@ -39,7 +39,7 @@ Counts number of packets that have matched a flow.
* Stream
* Flow

### meridio.conduit.stream.target.packet.hits
### meridio.conduit.stream.target.packet.hits (Planned)

Counts number of packets that have hit a target.

Expand All @@ -51,7 +51,18 @@ Counts number of packets that have hit a target.
* Stream
* Target (identifier + IPs)

### meridio.attractor.gateway.status
### meridio.conduit.stream.target.latency (Planned)

Reports the latency with a target.

* Type: Gauge
* Attributes:
* Pod Name
* Trench
* Conduit
* IP

### meridio.attractor.gateway.status (Planned)

Gateway status in the attractor instance.

Expand Down
Loading

0 comments on commit 2e277c7

Please sign in to comment.