[Kubernetes] OTel(OpenTelemetry) Collector - Logging

🔀 OTEL 예시 흐름: #

graph TD
  A[Application] --> B[OTel SDK]
  B --> C[OTel Collector]
  C --> D[Logs]
  C --> E[Metrics]
  C --> F[Traces]
  D --> G[Loki]
  E --> H[Prometheus]
  F --> I[Tempo/Jaeger]

Opentelemetry Collector #

  • 아키텍쳐는 OTel Collector와 기존의 로그 수집 도구를 혼합해 구성한 Plan A와 OTel Collector만으로 구성한 Plan B로 나눌 수 있다.

OpenTelemetry Collector 배포 및 구성 #

  • vi /etc/rsyslog.conf에 아래의 Code 추가

    *.* action(type="omfwd" target="0.0.0.0" port="54527" protocol="tcp" action.resumeRetryCount="10" queue.type="linkedList" queue.size="10000")
  • syslog 및 container log

      1apiVersion: opentelemetry.io/v1beta1
      2kind: OpenTelemetryCollector
      3metadata:
      4  name: otel-log
      5spec:
      6  mode: daemonset
      7  hostNetwork: true
      8  podSecurityContext:
      9    runAsUser: 0
     10    runAsGroup: 0
     11  tolerations:
     12    - operator: Exists
     13  volumes:
     14    # Typically the collector will want access to pod logs and container logs
     15    - name: varlogpods
     16      hostPath:
     17        path: /var/log/pods
     18    - name: varlibdockercontainers
     19      hostPath:
     20        path: /var/lib/docker/containers
     21    - name: applogs
     22      hostPath:
     23        path: /appdata/applog
     24  volumeMounts:
     25    # Mount the volumes to the collector container
     26    - name: varlogpods
     27      mountPath: /var/log/pods
     28      readOnly: true
     29    - name: varlibdockercontainers
     30      mountPath: /var/lib/docker/containers
     31      readOnly: true
     32    - name: applogs
     33      mountPath: /appdata/applog
     34      readOnly: true
     35  config:
     36    # This is a new configuration file - do not merge this with your metrics configuration!
     37    receivers:
     38      syslog:
     39        tcp:
     40          listen_address: '0.0.0.0:54527'
     41        protocol: rfc3164
     42        location: UTC or Asia/Seoul # specify server timezone here
     43        operators:
     44          - type: move
     45            from: attributes.message
     46            to: body
     47          - type: move
     48            from: attributes.hostname
     49            to: resource["hostname"]
     50          - type: move
     51            from: attributes.appname
     52            to: resource["daemon"]
     53
     54      filelog/applog:
     55        include:
     56          - /appdata/applog/*/*/*.log
     57        operators:
     58          # Extract metadata from file path
     59          - type: regex_parser
     60            id: extract_metadata_from_filepath
     61            # Pod UID is not always 36 characters long
     62            regex: '^.*\/(?P<namespace>\S+)\/(?P<pod_name>\S+)\/(?P<log_file_name>\S+)\.log$'
     63            parse_from: attributes["log.file.path"]
     64            cache:
     65              size: 128 # default maximum amount of Pods per Node is 110
     66          # Rename attributes
     67          - type: move
     68            from: attributes["log.file.path"]
     69            to: resource["filename"]
     70          - type: move
     71            from: attributes.namespace
     72            to: resource["namespace"]
     73          - type: move
     74            from: attributes.pod_name
     75            to: resource["pod"]
     76          - type: add
     77            field: resource["cluster"]
     78            value: 'your-cluster-name'
     79
     80      filelog:
     81        include:
     82          - /var/log/pods/*/*/*.log
     83        exclude:
     84          # Exclude logs from all containers named otel-collector
     85          - /var/log/pods/*/otel-collector/*.log
     86        start_at: beginning
     87        include_file_path: true
     88        include_file_name: false
     89        operators:
     90          # Find out which format is used by kubernetes
     91          - type: router
     92            id: get-format
     93            routes:
     94              - output: parser-docker
     95                expr: 'body matches "^\\{"'
     96              - output: parser-crio
     97                expr: 'body matches "^[^ Z]+ "'
     98              - output: parser-containerd
     99                expr: 'body matches "^[^ Z]+Z"'
    100          # Parse CRI-O format
    101          - type: regex_parser
    102            id: parser-crio
    103            regex: '^(?P<time>[^ Z]+) (?P<stream>stdout|stderr) (?P<logtag>[^ ]*) ?(?P<log>.*)$'
    104            output: extract_metadata_from_filepath
    105            timestamp:
    106              parse_from: attributes.time
    107              layout_type: gotime
    108              layout: '2006-01-02T15:04:05.999999999Z07:00'
    109          # Parse CRI-Containerd format
    110          - type: regex_parser
    111            id: parser-containerd
    112            regex: '^(?P<time>[^ ^Z]+Z) (?P<stream>stdout|stderr) (?P<logtag>[^ ]*) ?(?P<log>.*)$'
    113            output: extract_metadata_from_filepath
    114            timestamp:
    115              parse_from: attributes.time
    116              layout: '%Y-%m-%dT%H:%M:%S.%LZ'
    117          # Parse Docker format
    118          - type: json_parser
    119            id: parser-docker
    120            output: extract_metadata_from_filepath
    121            timestamp:
    122              parse_from: attributes.time
    123              layout: '%Y-%m-%dT%H:%M:%S.%LZ'
    124          # Extract metadata from file path
    125          - type: regex_parser
    126            id: extract_metadata_from_filepath
    127            # Pod UID is not always 36 characters long
    128            regex: '^.*\/(?P<namespace>[^_]+)_(?P<pod_name>[^_]+)_(?P<uid>[a-f0-9\-]{16,36})\/(?P<container_name>[^\._]+)\/(?P<restart_count>\d+)\.log$'
    129            parse_from: attributes["log.file.path"]
    130            cache:
    131              size: 128 # default maximum amount of Pods per Node is 110
    132          # Rename attributes
    133          - type: move
    134            from: attributes["log.file.path"]
    135            to: resource["filename"]
    136          - type: move
    137            from: attributes.container_name
    138            to: resource["container"]
    139          - type: move
    140            from: attributes.namespace
    141            to: resource["namespace"]
    142          - type: move
    143            from: attributes.pod_name
    144            to: resource["pod"]
    145          - type: add
    146            field: resource["cluster"]
    147            value: 'your-cluster-name' # Set your cluster name here
    148          - type: move
    149            from: attributes.log
    150            to: body
    151
    152    processors:
    153      attributes:
    154        actions:
    155        - action: insert
    156          key: loki.resource.labels
    157          value: hostname, daemon
    158      resource:
    159        attributes:
    160          - action: insert
    161            key: loki.format
    162            value: raw
    163          - action: insert
    164            key: loki.resource.labels
    165            value: pod, namespace, container, cluster, filename
    166
    167    exporters:
    168      loki:
    169        endpoint: https://LOKI_USERNAME:ACCESS_POLICY_TOKEN@LOKI_URL/loki/api/v1/push or http://<Loki-svc>.<Loki-Namespace>.svc/loki/api/v1/push
    170
    171    service:
    172      pipelines:
    173        logs:
    174          receivers: [syslog, filelog/applog, filelog]
    175          processors: [attributes, resource]
    176          exporters: [loki]
  • 변경될 Container Log 수집 방법

     1apiVersion: opentelemetry.io/v1beta1
     2kind: OpenTelemetryCollector
     3metadata:
     4  name: otel-log
     5spec:
     6  mode: daemonset
     7  hostNetwork: true
     8  volumes:
     9    # Typically the collector will want access to pod logs and container logs
    10    - name: varlogpods
    11      hostPath:
    12        path: /var/log/pods
    13    - name: varlibdockercontainers
    14      hostPath:
    15        path: /var/lib/docker/containers
    16  volumeMounts:
    17    # Mount the volumes to the collector container
    18    - name: varlogpods
    19      mountPath: /var/log/pods
    20      readOnly: true
    21    - name: varlibdockercontainers
    22      mountPath: /var/lib/docker/containers
    23      readOnly: true
    24  config:
    25    # This is a new configuration file - do not merge this with your metrics configuration!
    26    receivers:
    27      filelog:
    28        include_file_path: true
    29        include:
    30          - /var/log/pods/*/*/*.log
    31        operators:
    32          - id: container-parser
    33            type: container
    34
    35    processors:
    36      resource:
    37        attributes:
    38          - action: insert
    39            key: loki.format
    40            value: raw
    41          - action: insert
    42            key: loki.resource.labels
    43            value: pod, namespace, container, cluster, filename
    44
    45    exporters:
    46      loki:
    47        endpoint: https://LOKI_USERNAME:ACCESS_POLICY_TOKEN@LOKI_URL/loki/api/v1/push or http://<Loki-svc>.<Loki-Namespace>.svc/loki/api/v1/push
    48
    49    service:
    50      pipelines:
    51        logs:
    52          receivers: [filelog]
    53          processors: [resource]
    54          exporters: [loki]

    참고 : https://opentelemetry.io/blog/2024/otel-collector-container-log-parser/ {: .prompt-info }

Receiver Configuration - Plan A #

  • Receiver는 Promtail 및 EventExporter로부터 Log 데이터를 받는 진입점을 위한 loki receiver를 사용한다.
  • loki receiver를 사용하면 Otel Collector에 기존의 Loki가 노출하는 endpoint를 동일하게 노출시켜 기존의 Log 수집 컴포넌트들이 동일한 방법으로 OTel Collector에 Log를 보낼 수 있도록 구성할 수 있다.

Receiver Configuration - Plan B #

  • Receiver는 Container Log 수집을 위한 filelog, System Log 수집을 위한 filelog, Kubernetes Event Log 수집을 위한 k8s_event 3개를 사용한다.

  • Container Log는 filelog receiver로 /var/log/pods/*/*/*.log 경로에서 수집하고, 수집한 파일들을 기반으로 Path 및 Body를 분석해 Container명, Pod명, Namespace명 등의 정보를 추출한다.

  • System Log는 별도의 filelog receiver로 /var/log 경로에서 수집한 dmesg, messages, secure 파일들에서 syslog_parser로 정보를 추출해 수집한다.

  • Kubernetes Event Log는 k8s_event receiver를 이용해 Kubernetes API로부터 수집한다.

Processor Configuration #

  • Processor는 Log에 Kubernetes Attribute를 부착하기 위한 k8sattributes, Loki Label을 구성하기 위한 resource, OOM 방지를 위한 memory_limiter, Log를 batch성으로 전송하기 위한 batch 4개를 사용한다.

  • k8sattributes Processor는 filelog로부터 수집한 Container log를 기반으로 이와 일치하는 Pod, Deployment, Cluster 등의 정보를 데이터에 부착한다.

  • resource Processor는 위에서 부착한 정보를 Loki의 indexing에 필요한 Label로 변환하는 작업을 수행한다.

  • batch와 memory_limiter Processor는 가공한 Log 데이터를 Export하는 방법을 제공한다.

Exporter Configuration #

  • Exporter는 Log를 Loki로 전송하기 위한 loki exporter를 사용한다.

  • loki의 endpoint Attribute에 loki 주소의 /loki/api/v1/push Path를 붙여 로그 진입점을 값으로 넣어 수집한 Log를 Loki로 전송한다.

Pipeline Configuration #

  • 마지막으로 위에서 정의한 Receiver, Processor, Exporter를 순서에 맞게 조합하는 Pipeline을 정의한다.

  • 특히 Processor 요소들의 배치 순서에 따라 Log를 가공하는 순서가 달라지기 때문에, 위의 순서를 준수하는 것이 중요하다.

  • Loki Receiver에서 Log 데이터를 수집해 k8sattributes, resource, memory_limiter, batch 순으로 가공한 뒤, Loki Exporter를 사용해 Loki backend로 전송한다.

Pipeline Configuration - Plan B #

filelog, k8s_events Receiver에서 Log 데이터를 수집해 k8sattributes, resource, memory_limiter, batch순으로 가공한 뒤, Loki Exporter를 사용해 Loki backend로 전송한다.

Node Collector(Daemonset) #

  • File Logs

  • Host metrics

  • Kubelet state metrics

  • 공식 문서에서 DaemonSet을 권장하는 receiver가 모인 collector이다.

Log | Filelog #

수집 대상은 stdout/stderr로 생성된 Kubernetes, app log으로,\ 사실상 Fluentbit를 대체한다.\ 이를 위해 log scraping 및 전달 뿐 아니라 Processors 에서 언급한 다양한 processor 사용을 고려해야 한다.

Metric | Kubelet Stats #

node, pod, container, volume, filesystem network I/O and error metrics 등 CPU, memory 등 infra resource에 관한 metric을 다루어,\ 각 노드의 kubelet이 노출하는 API에서 추출한다. 사실 상 cAdvisor의 대체이다.

Metric | Host Metrics #

수집 대상은 node (cpu, disk, CPU load, filesystem, memory, network, paging, process..)의 metric으로,\ 사실 상 Prometheus Node Exporter를 대체한다.\ Kubelet Stats Receiver와 일부 항목이 겹치므로 동시 운용 시 중복 처리가 필요하다.

  1# otel-node-collector service accounts are created automatically
  2---
  3apiVersion: rbac.authorization.k8s.io/v1
  4kind: ClusterRole
  5metadata:
  6  name: otel-node-collector
  7rules:
  8  - apiGroups: [""]
  9    resources: ["nodes/stats", "nodes/proxy"]
 10    verbs: ["get", "watch", "list"]
 11---
 12apiVersion: rbac.authorization.k8s.io/v1
 13kind: ClusterRoleBinding
 14metadata:
 15  name: otel-node-collector
 16roleRef:
 17  apiGroup: rbac.authorization.k8s.io
 18  kind: ClusterRole
 19  name: otel-node-collector
 20subjects:
 21  - kind: ServiceAccount
 22    name: otel-node-collector
 23    namespace: cluster
 24---
 25apiVersion: opentelemetry.io/v1beta1
 26kind: OpenTelemetryCollector
 27metadata:
 28  name: otel-node
 29  namespace: cluster
 30  labels:
 31    app: otel-node-collector
 32spec:
 33  mode: daemonset
 34  resources:
 35    # requests:
 36    #   cpu: 10m
 37    #   memory: 10Mi
 38    limits:
 39      cpu: 500m
 40      memory: 1000Mi
 41  podAnnotations:
 42    prometheus.io/scrape: "true"
 43    prometheus.io/port: "8888"
 44  env:
 45    - name: NODE_NAME
 46      valueFrom:
 47        fieldRef:
 48          fieldPath: spec.nodeName
 49  # volumes:
 50  #   - name: hostfs
 51  #     hostPath:
 52  #       path: /
 53  # volumeMounts:
 54  #   - name: hostfs
 55  #     mountPath: /hostfs
 56  #     readOnly: true
 57  #     mountPropagation: HostToContainer
 58  config:
 59    extensions:
 60      health_check: # for k8s liveness and readiness probes
 61        endpoint: 0.0.0.0:13133 # default
 62
 63    processors:
 64      batch: # buffer up to 10000 spans, metric data points, log records for up to 5 seconds
 65        send_batch_size: 10000
 66        timeout: 5s
 67      memory_limiter:
 68        check_interval: 1s # recommended by official README
 69        limit_percentage: 80 # in 1Gi memory environment, hard limit is 800Mi
 70        spike_limit_percentage: 25 # in 1Gi memory environment, soft limit is 500Mi (800 - 250 = 550Mi)
 71
 72    service:
 73      extensions:
 74        - health_check
 75
 76      telemetry:
 77        logs:
 78          level: INFO
 79        metrics:
 80          address: 0.0.0.0:8888
 81
 82      pipelines:
 83        metrics:
 84          receivers:
 85            - kubeletstats
 86            # - hostmetrics
 87          processors:
 88            - memory_limiter
 89            - batch
 90          exporters:
 91            - otlphttp/prometheus
 92
 93    receivers:
 94      kubeletstats:
 95        auth_type: serviceAccount
 96        endpoint: https://${env:NODE_NAME}:10250
 97        collection_interval: 10s
 98        insecure_skip_verify: true
 99        extra_metadata_labels:
100          - k8s.volume.type
101        k8s_api_config:
102          auth_type: serviceAccount
103        metric_groups:
104          - node
105          - pod
106          - container
107          - volume
108
109      # hostmetrics:
110      #   collection_interval: 10s
111      #   root_path: /hostfs
112      #   scrapers:
113      #     cpu:        # CPU utilization metrics
114      #     load:       # CPU load metrics
115      #     memory:     # Memory utilization
116      #     disk:       # Disk I/O metrics
117      #     filesystem: # File System utilization metrics
118      #     network:    # Network interface I/O metrics & TCP connection metrics
119      #     paging:     # Paging/Swap space utilization and I/O metrics
120      #     processes:  # Process count metrics
121      #     process:    # Per process CPU, Memory, and Disk I/O metrics
122      #       # The following settings can be used to handle the error to work hostmetrics: 2024-05-12T01:06:30.683Z        error   scraperhelper/scrapercontroller.go:197  Error scraping metrics  {"kind": "receiver", "name": "hostmetrics", "data_type": "metrics", "error": "error reading process executable for pid 1: readlink /hostfs/proc/1/exe: permission denied; error reading username for process \"systemd\" (pid 1): open /etc/passwd: no such file or directory;
123      #       # refer: https://github.com/open-telemetry/opentelemetry-collector-contrib/pull/28661
124      #       mute_process_name_error: true
125      #       mute_process_exe_error: true
126      #       mute_process_io_error: true
127      #       mute_process_user_error: true
128      #       mute_process_cgroup_error: true
129
130    exporters:
131      debug:
132        verbosity: basic # detailed, basic
133
134      otlphttp/prometheus:
135        metrics_endpoint: http://prometheus-server.cluster.svc.cluster.local:80/api/v1/otlp/v1/metrics
136        tls:
137          insecure: true

Cluster Collector(Single Pod) #

  • k8s events(log)
  • k8s objects(metrics)

단일 replica 사용 권장인 receivers 대상으로,\ 이들 receiver는 2개 이상의 instance 사용 시 중복이 발생 가능하기 때문이라고 공식 문서에서 논한다.\ 두 receiver 모두 cluster 관점에서 추출하기 때문이라고. 이에 따라 deployment type에 1개의 replica로 설정한다.

Log | Kubernetes Objects #

주로 Kubernetes event 수집용으로 Kubernetes API server 출처의 objects(전체 목록은 kubectl api-resources 로 확인) 수집에도 사용한다.

Metric | Kubernetes Cluster #

사실 상 Kube State Metrics의 대체로 Kubernetes API server에서 cluster level의 metric과 entity events를 추출한다.

  1apiVersion: v1
  2kind: ServiceAccount
  3metadata:
  4  name: otel-collector-opentelemetry-collector
  5---
  6apiVersion: rbac.authorization.k8s.io/v1
  7kind: ClusterRole
  8metadata:
  9  name: otel-collector-opentelemetry-collector
 10rules:
 11  - apiGroups:
 12      - ''
 13    resources:
 14      - events
 15      - namespaces
 16      - namespaces/status
 17      - nodes
 18      - nodes/spec
 19      - pods
 20      - pods/status
 21      - replicationcontrollers
 22      - replicationcontrollers/status
 23      - resourcequotas
 24      - services
 25    verbs:
 26      - get
 27      - list
 28      - watch
 29  - apiGroups:
 30      - apps
 31    resources:
 32      - daemonsets
 33      - deployments
 34      - replicasets
 35      - statefulsets
 36    verbs:
 37      - get
 38      - list
 39      - watch
 40  - apiGroups:
 41      - extensions
 42    resources:
 43      - daemonsets
 44      - deployments
 45      - replicasets
 46    verbs:
 47      - get
 48      - list
 49      - watch
 50  - apiGroups:
 51      - batch
 52    resources:
 53      - jobs
 54      - cronjobs
 55    verbs:
 56      - get
 57      - list
 58      - watch
 59  - apiGroups:
 60      - autoscaling
 61    resources:
 62      - horizontalpodautoscalers
 63    verbs:
 64      - get
 65      - list
 66      - watch
 67---
 68apiVersion: rbac.authorization.k8s.io/v1
 69kind: ClusterRoleBinding
 70metadata:
 71  name: otel-collector-opentelemetry-collector
 72roleRef:
 73  apiGroup: rbac.authorization.k8s.io
 74  kind: ClusterRole
 75  name: otel-collector-opentelemetry-collector
 76subjects:
 77  - kind: ServiceAccount
 78    name: otel-collector-opentelemetry-collector
 79    namespace: default
 80---
 81# otel-cluster-collector service accounts are created automatically
 82apiVersion: opentelemetry.io/v1beta1
 83kind: OpenTelemetryCollector
 84metadata:
 85  name: otel-cluster
 86  namespace: cluster
 87  labels:
 88    app: otel-cluster-collector
 89spec:
 90  mode: deployment
 91  replicas: 1
 92  podAnnotations:
 93    prometheus.io/scrape: "true"
 94    prometheus.io/port: "8888"
 95  config:
 96    extensions:
 97      health_check: # for k8s liveness and readiness probes
 98        endpoint: 0.0.0.0:13133 # default
 99
100    processors:
101      batch: # buffer up to 10000 spans, metric data points, log records for up to 5 seconds
102        send_batch_size: 10000
103        timeout: 5s
104      memory_limiter:
105        check_interval: 1s # recommended by official README
106        limit_percentage: 80 # in 1Gi memory environment, hard limit is 800Mi
107        spike_limit_percentage: 25 # in 1Gi memory environment, soft limit is 500Mi (800 - 250 = 550Mi)
108      attributes:
109        actions:
110          key: elasticsearch.index.prefix
111          value: otel-k8sobject
112          action: insert
113    service:
114      extensions:
115        - health_check
116
117      telemetry:
118        logs:
119          level: DEBUG
120        metrics:
121          address: 0.0.0.0:8888
122
123      pipelines:
124        logs:
125          receivers:
126            - k8sobjects
127          processors:
128            - memory_limiter
129            - batch
130            - attributes
131          exporters:
132            - debug
133            - elasticsearch
134
135        metrics:
136          receivers:
137            - k8s_cluster
138          processors:
139            - memory_limiter
140            - batch
141          exporters:
142            - otlphttp/prometheus
143
144    receivers:
145      k8sobjects:
146        objects:
147          - name: pods
148            mode: pull
149          - name: events
150            mode: watch
151      k8s_cluster:
152        collection_interval: 10s
153        node_conditions_to_report:
154          - Ready
155          - MemoryPressure
156        allocatable_types_to_report:
157          - cpu
158          - memory
159          - ephemeral-storage
160          - storage
161
162    exporters:
163      debug:
164        verbosity: detailed # default is basic
165
166      otlphttp/prometheus:
167        metrics_endpoint: http://prometheus-server.cluster.svc.cluster.local:80/api/v1/otlp/v1/metrics
168        tls:
169          insecure: true
170
171      elasticsearch:
172        endpoints:
173          - http://elasticsearch-es-http.cluster.svc.cluster.local:9200
174        logs_index: ""
175        logs_dynamic_index:
176          enabled: true
177        logstash_format:
178          enabled: true
179        user: anyflow
180        password: mycluster
 1apiVersion: opentelemetry.io/v1beta1
 2kind: OpenTelemetryCollector
 3metadata:
 4  name: otel-cluster-k8s-events
 5  namespace: cluster
 6  labels:
 7    app: otel-cluster-collector
 8spec:
 9  mode: deployment
10  replicas: 1
11  config:
12    receivers:
13      k8s_events:
14        auth_type: serviceAccount
15
16    processors:
17      batch:
18
19    exporters:
20      loki:
21        endpoint: https://LOKI_USERNAME:ACCESS_POLICY_TOKEN@LOKI_URL/loki/api/v1/push or http://<Loki-svc>.<Loki-Namespace>.svc/loki/api/v1/push
22    service:
23      pipelines:
24        logs:
25          receivers: [k8s_events]
26          processors: [batch]
27          exporters: [loki]

prometheus Collector(statefulset) #

  • prometheus metrics

OTLP Collector(Deployment) #

  • Traces(OTEL)
  • Generic OTEL Logs
  • Generic OTEL metrics

공용 receiver, exporter 공통적으로 otlp 프로토콜을 사용하고 replica 개수 제약이 없는 signal 대상 collector로서,\ 제약이 없을 경우 가장 운용에 유리한 배포 패턴인 Deployment 를 사용한다. MLT 모두를 대상으로 한다.

Trace | Generic OTEL trace #

JaegerGrafana Tempo는 OTLP Receiver를 자체적으로 지원한다.

Metric | Generic OTEL metric #

앞서 논한 metric 이외의 app level metrics 등의 여타 metric 수집을 위한 endpoint이다.

Log | Generic OTEL log #

Istio의 OTel access log를 포함한 여타 log 수집을 위한 endpoint이다.

  1# otel-otlp-collector service accounts are created automatically
  2apiVersion: opentelemetry.io/v1beta1
  3kind: OpenTelemetryCollector
  4metadata:
  5  name: otel-otlp
  6  namespace: cluster
  7  labels:
  8    app: otel-otlp-collector
  9spec:
 10  mode: deployment
 11  # replicas: 1
 12  autoscaler:
 13    minReplicas: 1
 14    maxReplicas: 2
 15  resources:
 16    # requests:
 17    #   cpu: 10m
 18    #   memory: 10Mi
 19    limits:
 20      cpu: 500m
 21      memory: 1000Mi
 22  podAnnotations:
 23    prometheus.io/scrape: "true"
 24    prometheus.io/port: "8888"
 25  config:
 26    extensions:
 27      health_check: # for k8s liveness and readiness probes
 28        endpoint: 0.0.0.0:13133 # default
 29
 30    processors:
 31      batch: # buffer up to 10000 spans, metric data points, log records for up to 5 seconds
 32        send_batch_size: 10000
 33        timeout: 5s
 34      memory_limiter:
 35        check_interval: 1s # recommended by official README
 36        limit_percentage: 80 # in 1Gi memory environment, hard limit is 800Mi
 37        spike_limit_percentage: 25 # in 1Gi memory environment, soft limit is 500Mi (800 - 250 = 550Mi)
 38
 39    service:
 40      extensions:
 41        - health_check
 42
 43      telemetry:
 44        logs:
 45          level: INFO
 46        metrics:
 47          address: 0.0.0.0:8888
 48
 49      pipelines:
 50        traces:
 51          receivers:
 52            - otlp
 53          processors:
 54            - memory_limiter
 55            - batch
 56          exporters:
 57            - debug
 58            - otlp/jaeger
 59
 60        logs:
 61          receivers:
 62            - otlp
 63          processors:
 64            - memory_limiter
 65            - batch
 66          exporters:
 67            - debug
 68            - elasticsearch
 69
 70        metrics:
 71          receivers:
 72            - otlp
 73          processors:
 74            - memory_limiter
 75            - batch
 76          exporters:
 77            - debug
 78            - otlphttp/prometheus
 79
 80    receivers:
 81      otlp:
 82        protocols:
 83          grpc:
 84            endpoint: 0.0.0.0:4317
 85          http:
 86            endpoint: 0.0.0.0:4318
 87
 88    exporters:
 89      debug:
 90        verbosity: basic # detailed, basic
 91
 92      otlp/jaeger:
 93        endpoint: jaeger-collector.istio-system.svc.cluster.local:4317
 94        tls:
 95          insecure: true
 96
 97      otlphttp/prometheus:
 98        metrics_endpoint: http://prometheus-server.cluster.svc.cluster.local:80/api/v1/otlp/v1/metrics
 99        tls:
100          insecure: true
101
102      elasticsearch:
103        endpoints:
104          - http://elasticsearch-es-http.cluster.svc.cluster.local:9200
105        logs_index: "istio-access-log"
106        logs_dynamic_index:
107          enabled: true
108        logstash_format:
109          enabled: true
110        user: anyflow
111        password: mycluster
Advertisement