aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorSebastian Poxhofer <[email protected]>2024-12-23 15:46:34 +0100
committerGitHub <[email protected]>2024-12-23 14:46:34 +0000
commit7db44d743a9f6c577c3f0c0eea23bd23ea87718b (patch)
treede57cbcd96feb9e1ebe56dc8ab49d367ea495ae7
parenteef012758fcdec5b87dc1bfe355e97e1e1bed6b6 (diff)
downloadrenovate-7db44d743a9f6c577c3f0c0eea23bd23ea87718b.tar.gz
renovate-7db44d743a9f6c577c3f0c0eea23bd23ea87718b.zip
docs(examples/opentelemetry): use modern components and simplify (#33226)
Co-authored-by: HonkingGoose <[email protected]>
-rw-r--r--docs/usage/examples/opentelemetry.md195
1 files changed, 111 insertions, 84 deletions
diff --git a/docs/usage/examples/opentelemetry.md b/docs/usage/examples/opentelemetry.md
index 2e6181fb36a..c007e6bea9f 100644
--- a/docs/usage/examples/opentelemetry.md
+++ b/docs/usage/examples/opentelemetry.md
@@ -9,29 +9,42 @@ Requirements:
Create a `docker-compose.yaml` and `otel-collector-config.yml` file as seen below in a folder.
```yaml title="docker-compose.yaml"
-version: '3'
+name: renovate-otel-demo
+
services:
- # Jaeger
+ # Jaeger for storing traces
jaeger:
- image: jaegertracing/all-in-one:1.64.0
+ image: jaegertracing/jaeger:2.1.0
+ ports:
+ - '16686:16686' # Web UI
+ - '4317' # OTLP gRPC
+ - '4318' # OTLP HTTP
+
+ # Prometheus for storing metrics
+ prometheus:
+ image: prom/prometheus:v3.0.1
ports:
- - '16686:16686'
- - '4317'
+ - '9090:9090' # Web UI
+ - '4318' # OTLP HTTP
+ command:
+ - --web.enable-otlp-receiver
+ # Mirror these flags from the Dockerfile, because `command` overwrites the default flags.
+ # https://github.com/prometheus/prometheus/blob/5b5fee08af4c73230b2dae35964816f7b3c29351/Dockerfile#L23-L24
+ - --config.file=/etc/prometheus/prometheus.yml
+ - --storage.tsdb.path=/prometheus
otel-collector:
+ # Using the Contrib version to access the spanmetrics connector.
+ # If you don't need the spanmetrics connector, you can use the standard version
image: otel/opentelemetry-collector-contrib:0.116.1
- command: ['--config=/etc/otel-collector-config.yml']
volumes:
- - ./otel-collector-config.yml:/etc/otel-collector-config.yml
+ - ./otel-collector-config.yml:/etc/otelcol-contrib/config.yaml
ports:
- - '1888:1888' # pprof extension
- - '13133:13133' # health_check extension
- - '55679:55679' # zpages extension
- - '4318:4318' # OTLP HTTP
- - '4317:4317' # OTLP GRPC
- - '9123:9123' # Prometheus exporter
+ - '4318:4318' # OTLP HTTP ( exposed to the host )
+ - '4317:4317' # OTLP gRPC ( exposed to the host )
depends_on:
- jaeger
+ - prometheus
```
```yaml title="otel-collector-config.yml"
@@ -39,28 +52,36 @@ receivers:
otlp:
protocols:
grpc:
+ endpoint: 0.0.0.0:4317
http:
+ endpoint: 0.0.0.0:4318
exporters:
otlp/jaeger:
endpoint: jaeger:4317
tls:
insecure: true
- logging:
- prometheus:
- endpoint: '0.0.0.0:9123'
+ otlphttp/prometheus:
+ endpoint: http://prometheus:9090/api/v1/otlp
+ debug:
+ # verbosity: normal
-processors:
- batch:
+connectors:
spanmetrics:
- metrics_exporter: prometheus
- latency_histogram_buckets: [10ms, 100ms, 250ms, 1s, 30s, 1m, 5m]
+ histogram:
+ exponential:
dimensions:
- name: http.method
+ default: GET
- name: http.status_code
- name: http.host
dimensions_cache_size: 1000
aggregation_temporality: 'AGGREGATION_TEMPORALITY_CUMULATIVE'
+ exemplars:
+ enabled: true
+
+processors:
+ batch:
extensions:
health_check:
@@ -72,12 +93,23 @@ service:
pipelines:
traces:
receivers: [otlp]
- exporters: [otlp/jaeger, logging]
- processors: [spanmetrics, batch]
+ exporters:
+ - otlp/jaeger
+ # Send traces to connector for metrics calculation
+ - spanmetrics
+ # Enable debug exporter to see traces in the logs
+ #- debug
+ processors: [batch]
metrics:
- receivers: [otlp]
- exporters: [prometheus]
+ receivers:
+ - otlp # Receive metrics from Renovate.
+ - spanmetrics # Receive metrics calculated by the spanmetrics connector.
+ processors: [batch]
+ exporters:
+ - otlphttp/prometheus
+ # Enable debug exporter to see metrics in the logs
+ # - debug
```
Start setup using this command inside the folder containing the files created in the earlier steps:
@@ -86,7 +118,11 @@ Start setup using this command inside the folder containing the files created in
docker-compose up
```
-This command will start an [OpenTelemetry Collector](https://github.com/open-telemetry/opentelemetry-collector-contrib) and an instance of [Jaeger](https://www.jaegertracing.io/).
+This command will start:
+
+- an [OpenTelemetry Collector](https://github.com/open-telemetry/opentelemetry-collector-contrib)
+- an instance of [Jaeger for traces](https://www.jaegertracing.io/)
+- and [Prometheus](https://prometheus.io/)
Jaeger will be now reachable under [http://localhost:16686](http://localhost:16686).
@@ -97,7 +133,8 @@ To start Renovate with OpenTelemetry enabled run following command, after pointi
```
docker run \
--rm \
- -e OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 \
+ --network renovate-otel-demo_default \
+ -e OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 \
-v "/path/to/your/config.js:/usr/src/app/config.js" \
renovate/renovate:latest
```
@@ -130,100 +167,90 @@ You should be able to see now the full trace view which shows each HTTP request
### Metrics
Additional to the received traces some metrics are calculated.
-This is achieved using the [spanmetricsprocessor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/spanmetricsprocessor).
-The previous implemented setup will produce following metrics, which are exposed under [http://localhost:9123/metrics](http://localhost:9123/metrics):
+This is achieved using the [spanmetrics connector](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/connector/spanmetricsconnector).
+The previously implemented setup will produce following metrics, which pushed to [Prometheus](http://localhost:9090):
```
-# HELP calls_total
-# TYPE calls_total counter
-
### Example of internal spans
-calls_total{operation="renovate repository",service_name="renovate",span_kind="SPAN_KIND_INTERNAL",status_code="STATUS_CODE_UNSET"} 3
-calls_total{operation="run",service_name="renovate",span_kind="SPAN_KIND_INTERNAL",status_code="STATUS_CODE_UNSET"} 1
-### Example of http calls from Renovate to external services
-calls_total{http_host="api.github.com:443",http_method="POST",http_status_code="200",operation="HTTPS POST",service_name="renovate",span_kind="SPAN_KIND_CLIENT",status_code="STATUS_CODE_UNSET"} 9
+traces_span_metrics_calls_total{http_method="GET", job="renovatebot.com/renovate", service_name="renovate", span_kind="SPAN_KIND_INTERNAL", span_name="repository", status_code="STATUS_CODE_UNSET"} 2
+traces_span_metrics_calls_total{http_method="GET", job="renovatebot.com/renovate", service_name="renovate", span_kind="SPAN_KIND_INTERNAL", span_name="run", status_code="STATUS_CODE_UNSET"} 2
-...
-
-# HELP latency
-# TYPE latency histogram
-### Example of internal spans
-latency_bucket{operation="renovate repository",service_name="renovate",span_kind="SPAN_KIND_INTERNAL",status_code="STATUS_CODE_UNSET",le="0.1"} 0
-...
-latency_bucket{operation="renovate repository",service_name="renovate",span_kind="SPAN_KIND_INTERNAL",status_code="STATUS_CODE_UNSET",le="9.223372036854775e+12"} 3
-latency_bucket{operation="renovate repository",service_name="renovate",span_kind="SPAN_KIND_INTERNAL",status_code="STATUS_CODE_UNSET",le="+Inf"} 3
-latency_sum{operation="renovate repository",service_name="renovate",span_kind="SPAN_KIND_INTERNAL",status_code="STATUS_CODE_UNSET"} 30947.4689
-latency_count{operation="renovate repository",service_name="renovate",span_kind="SPAN_KIND_INTERNAL",status_code="STATUS_CODE_UNSET"} 3
+### Example of http calls from Renovate to external services
+traces_span_metrics_calls_total{http_host="api.github.com:443", http_method="POST", http_status_code="200", job="renovatebot.com/renovate", service_name="renovate", span_kind="SPAN_KIND_CLIENT", span_name="POST", status_code="STATUS_CODE_UNSET"} 4
-...
-### Example of http calls from Renovate to external services
-latency_bucket{http_host="api.github.com:443",http_method="POST",http_status_code="200",operation="HTTPS POST",service_name="renovate",span_kind="SPAN_KIND_CLIENT",status_code="STATUS_CODE_UNSET",le="0.1"} 0
+### Example histogram metrics
+traces_span_metrics_duration_milliseconds_bucket{http_method="GET", job="renovatebot.com/renovate", le="8", service_name="renovate", span_kind="SPAN_KIND_INTERNAL", span_name="repository", status_code="STATUS_CODE_UNSET"} 0
...
-latency_bucket{http_host="api.github.com:443",http_method="POST",http_status_code="200",operation="HTTPS POST",service_name="renovate",span_kind="SPAN_KIND_CLIENT",status_code="STATUS_CODE_UNSET",le="250"} 3
-latency_bucket{http_host="api.github.com:443",http_method="POST",http_status_code="200",operation="HTTPS POST",service_name="renovate",span_kind="SPAN_KIND_CLIENT",status_code="STATUS_CODE_UNSET",le="9.223372036854775e+12"} 9
-latency_bucket{http_host="api.github.com:443",http_method="POST",http_status_code="200",operation="HTTPS POST",service_name="renovate",span_kind="SPAN_KIND_CLIENT",status_code="STATUS_CODE_UNSET",le="+Inf"} 9
-latency_sum{http_host="api.github.com:443",http_method="POST",http_status_code="200",operation="HTTPS POST",service_name="renovate",span_kind="SPAN_KIND_CLIENT",status_code="STATUS_CODE_UNSET"} 2306.1385999999998
-latency_count{http_host="api.github.com:443",http_method="POST",http_status_code="200",operation="HTTPS POST",service_name="renovate",span_kind="SPAN_KIND_CLIENT",status_code="STATUS_CODE_UNSET"} 9
+traces_span_metrics_duration_milliseconds_bucket{http_method="GET", job="renovatebot.com/renovate", le="2000", service_name="renovate", span_kind="SPAN_KIND_INTERNAL", span_name="repository", status_code="STATUS_CODE_UNSET"} 0
+traces_span_metrics_duration_milliseconds_bucket{http_method="GET", job="renovatebot.com/renovate", le="5000", service_name="renovate", span_kind="SPAN_KIND_INTERNAL", span_name="repository", status_code="STATUS_CODE_UNSET"} 1
+traces_span_metrics_duration_milliseconds_bucket{http_method="GET", job="renovatebot.com/renovate", le="15000", service_name="renovate", span_kind="SPAN_KIND_INTERNAL", span_name="repository", status_code="STATUS_CODE_UNSET"} 1
+traces_span_metrics_duration_milliseconds_bucket{http_method="GET", job="renovatebot.com/renovate", le="10000", service_name="renovate", span_kind="SPAN_KIND_INTERNAL", span_name="repository", status_code="STATUS_CODE_UNSET"} 1
+traces_span_metrics_duration_milliseconds_bucket{http_method="GET", job="renovatebot.com/renovate", le="+Inf", service_name="renovate", span_kind="SPAN_KIND_INTERNAL", span_name="repository", status_code="STATUS_CODE_UNSET"} 1
+
+traces_span_metrics_duration_milliseconds_sum{http_method="GET", job="renovatebot.com/renovate", service_name="renovate", span_kind="SPAN_KIND_INTERNAL", span_name="repository", status_code="STATUS_CODE_UNSET"} 4190.694209
+traces_span_metrics_duration_milliseconds_count{http_method="GET", job="renovatebot.com/renovate", service_name="renovate", span_kind="SPAN_KIND_INTERNAL", span_name="repository", status_code="STATUS_CODE_UNSET"} 1
```
-The [spanmetricsprocessor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/spanmetricsprocessor) creates two sets of metrics.
+The [spanmetrics connector](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/connector/spanmetricsconnector) creates two sets of metrics.
#### Calls metric
-At first there are the `calls_total` metrics which display how often specific trace spans have been observed.
+At first there are the `traces_span_metrics_calls_total` metrics.
+These metrics show how often _specific_ trace spans have been observed.
For example:
-`calls_total{operation="renovate repository",service_name="renovate",span_kind="SPAN_KIND_INTERNAL",status_code="STATUS_CODE_UNSET"} 3` signals that 3 repositories have been renovated.
-`calls_total{operation="run",service_name="renovate",span_kind="SPAN_KIND_INTERNAL",status_code="STATUS_CODE_UNSET"} 1` represents how often Renovate has been run.
+
+- `traces_span_metrics_calls_total{http_method="GET", job="renovatebot.com/renovate", service_name="renovate", span_kind="SPAN_KIND_INTERNAL", span_name="repositories", status_code="STATUS_CODE_UNSET"} 2` signals that 2 repositories have been renovated.
+- `traces_span_metrics_calls_total{http_method="GET", job="renovatebot.com/renovate", service_name="renovate", span_kind="SPAN_KIND_INTERNAL", span_name="run", status_code="STATUS_CODE_UNSET"} 1` represents how often Renovate has been run.
If we combine this using the PrometheusQueryLanguage ( PromQL ), we can calculate the average count of repositories each Renovate run handles.
```
-calls_total{operation="renovate repository",service_name="renovate"} / calls_total{operation="run",service_name="renovate"}
+traces_span_metrics_calls_total{span_name="repository",service_name="renovate"} / traces_span_metrics_calls_total{span_name="run",service_name="renovate"}
```
-This metrics is also for spans generated by http calls:
+These metrics are generated for HTTP call spans too:
```yaml
-calls_total{http_host="registry.terraform.io:443",http_method="GET",http_status_code="200",operation="HTTPS GET",service_name="renovate",span_kind="SPAN_KIND_CLIENT",status_code="STATUS_CODE_UNSET"} 5
+traces_span_metrics_calls_total{http_host="prometheus-community.github.io:443", http_method="GET", http_status_code="200", job="renovatebot.com/renovate", service_name="renovate", span_kind="SPAN_KIND_CLIENT", span_name="GET", status_code="STATUS_CODE_UNSET"} 5
```
#### Latency buckets
-The second class of metrics exposed are the latency focused latency buckets which allow to create [heatmaps](https://grafana.com/docs/grafana/latest/basics/intro-histograms/#heatmaps).
+The second class of metrics exposed are the latency-focused buckets, that allow creating [heatmaps](https://grafana.com/docs/grafana/latest/basics/intro-histograms/#heatmaps).
A request is added to a backed if the latency is bigger than the bucket value (`le`). `request_duration => le`
As an example if we receive a request which need `1.533s` to complete get following metrics:
```
-latency_bucket{http_host="api.github.com:443",le="0.1"} 0
-latency_bucket{http_host="api.github.com:443",le="1"} 0
-latency_bucket{http_host="api.github.com:443",le="2"} 1
-latency_bucket{http_host="api.github.com:443",le="6"} 1
-latency_bucket{http_host="api.github.com:443",le="10"} 1
-latency_bucket{http_host="api.github.com:443",le="100"} 1
-latency_bucket{http_host="api.github.com:443",le="250"} 1
-latency_bucket{http_host="api.github.com:443",le="9.223372036854775e+12"} 1
-latency_bucket{http_host="api.github.com:443",le="+Inf"} 1
-latency_sum{http_host="api.github.com:443"} 1.533
-latency_count{http_host="api.github.com:443"} 1
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="0.1"} 0
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="1"} 0
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="2"} 1
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="6"} 1
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="10"} 1
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="100"} 1
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="250"} 1
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="9.223372036854775e+12"} 1
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="+Inf"} 1
+traces_span_metrics_duration_milliseconds_sum{http_host="api.github.com:443"} 1.533
+traces_span_metrics_duration_milliseconds_count{http_host="api.github.com:443"} 1
```
Now we have another request which this time takes 10s to complete:
```
-latency_bucket{http_host="api.github.com:443",le="0.1"} 0
-latency_bucket{http_host="api.github.com:443",le="1"} 0
-latency_bucket{http_host="api.github.com:443",le="2"} 1
-latency_bucket{http_host="api.github.com:443",le="6"} 1
-latency_bucket{http_host="api.github.com:443",le="10"} 2
-latency_bucket{http_host="api.github.com:443",le="100"} 2
-latency_bucket{http_host="api.github.com:443",le="250"} 2
-latency_bucket{http_host="api.github.com:443",le="9.223372036854775e+12"} 2
-latency_bucket{http_host="api.github.com:443",le="+Inf"} 2
-latency_sum{http_host="api.github.com:443"} 11.533
-latency_count{http_host="api.github.com:443"} 2
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="0.1"} 0
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="1"} 0
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="2"} 1
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="6"} 1
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="10"} 2
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="100"} 2
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="250"} 2
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="9.223372036854775e+12"} 2
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="+Inf"} 2
+traces_span_metrics_duration_milliseconds_sum{http_host="api.github.com:443"} 11.533
+traces_span_metrics_duration_milliseconds_count{http_host="api.github.com:443"} 2
```
More about the functionality can be found on the Prometheus page for [metric types](https://prometheus.io/docs/concepts/metric_types/#histogram).