[Kubernetes] OTel(OpenTelemetry) Collector - Logging

🔀 OTEL 예시 흐름: #

graph TD
  A[Application] --> B[OTel SDK]
  B --> C[OTel Collector]
  C --> D[Logs]
  C --> E[Metrics]
  C --> F[Traces]
  D --> G[Loki]
  E --> H[Prometheus]
  F --> I[Tempo/Jaeger]

Opentelemetry Collector #

아키텍쳐는 OTel Collector와 기존의 로그 수집 도구를 혼합해 구성한 Plan A와 OTel Collector만으로 구성한 Plan B로 나눌 수 있다.

OpenTelemetry Collector 배포 및 구성 #

vi /etc/rsyslog.conf에 아래의 Code 추가

*.* action(type="omfwd" target="0.0.0.0" port="54527" protocol="tcp" action.resumeRetryCount="10" queue.type="linkedList" queue.size="10000")

syslog 및 container log

  1apiVersion: opentelemetry.io/v1beta1
  2kind: OpenTelemetryCollector
  3metadata:
  4  name: otel-log
  5spec:
  6  mode: daemonset
  7  hostNetwork: true
  8  podSecurityContext:
  9    runAsUser: 0
 10    runAsGroup: 0
 11  tolerations:
 12    - operator: Exists
 13  volumes:
 14    # Typically the collector will want access to pod logs and container logs
 15    - name: varlogpods
 16      hostPath:
 17        path: /var/log/pods
 18    - name: varlibdockercontainers
 19      hostPath:
 20        path: /var/lib/docker/containers
 21    - name: applogs
 22      hostPath:
 23        path: /appdata/applog
 24  volumeMounts:
 25    # Mount the volumes to the collector container
 26    - name: varlogpods
 27      mountPath: /var/log/pods
 28      readOnly: true
 29    - name: varlibdockercontainers
 30      mountPath: /var/lib/docker/containers
 31      readOnly: true
 32    - name: applogs
 33      mountPath: /appdata/applog
 34      readOnly: true
 35  config:
 36    # This is a new configuration file - do not merge this with your metrics configuration!
 37    receivers:
 38      syslog:
 39        tcp:
 40          listen_address: '0.0.0.0:54527'
 41        protocol: rfc3164
 42        location: UTC or Asia/Seoul # specify server timezone here
 43        operators:
 44          - type: move
 45            from: attributes.message
 46            to: body
 47          - type: move
 48            from: attributes.hostname
 49            to: resource["hostname"]
 50          - type: move
 51            from: attributes.appname
 52            to: resource["daemon"]
 53
 54      filelog/applog:
 55        include:
 56          - /appdata/applog/*/*/*.log
 57        operators:
 58          # Extract metadata from file path
 59          - type: regex_parser
 60            id: extract_metadata_from_filepath
 61            # Pod UID is not always 36 characters long
 62            regex: '^.*\/(?P<namespace>\S+)\/(?P<pod_name>\S+)\/(?P<log_file_name>\S+)\.log$'
 63            parse_from: attributes["log.file.path"]
 64            cache:
 65              size: 128 # default maximum amount of Pods per Node is 110
 66          # Rename attributes
 67          - type: move
 68            from: attributes["log.file.path"]
 69            to: resource["filename"]
 70          - type: move
 71            from: attributes.namespace
 72            to: resource["namespace"]
 73          - type: move
 74            from: attributes.pod_name
 75            to: resource["pod"]
 76          - type: add
 77            field: resource["cluster"]
 78            value: 'your-cluster-name'
 79
 80      filelog:
 81        include:
 82          - /var/log/pods/*/*/*.log
 83        exclude:
 84          # Exclude logs from all containers named otel-collector
 85          - /var/log/pods/*/otel-collector/*.log
 86        start_at: beginning
 87        include_file_path: true
 88        include_file_name: false
 89        operators:
 90          # Find out which format is used by kubernetes
 91          - type: router
 92            id: get-format
 93            routes:
 94              - output: parser-docker
 95                expr: 'body matches "^\\{"'
 96              - output: parser-crio
 97                expr: 'body matches "^[^ Z]+ "'
 98              - output: parser-containerd
 99                expr: 'body matches "^[^ Z]+Z"'
100          # Parse CRI-O format
101          - type: regex_parser
102            id: parser-crio
103            regex: '^(?P<time>[^ Z]+) (?P<stream>stdout|stderr) (?P<logtag>[^ ]*) ?(?P<log>.*)$'
104            output: extract_metadata_from_filepath
105            timestamp:
106              parse_from: attributes.time
107              layout_type: gotime
108              layout: '2006-01-02T15:04:05.999999999Z07:00'
109          # Parse CRI-Containerd format
110          - type: regex_parser
111            id: parser-containerd
112            regex: '^(?P<time>[^ ^Z]+Z) (?P<stream>stdout|stderr) (?P<logtag>[^ ]*) ?(?P<log>.*)$'
113            output: extract_metadata_from_filepath
114            timestamp:
115              parse_from: attributes.time
116              layout: '%Y-%m-%dT%H:%M:%S.%LZ'
117          # Parse Docker format
118          - type: json_parser
119            id: parser-docker
120            output: extract_metadata_from_filepath
121            timestamp:
122              parse_from: attributes.time
123              layout: '%Y-%m-%dT%H:%M:%S.%LZ'
124          # Extract metadata from file path
125          - type: regex_parser
126            id: extract_metadata_from_filepath
127            # Pod UID is not always 36 characters long
128            regex: '^.*\/(?P<namespace>[^_]+)_(?P<pod_name>[^_]+)_(?P<uid>[a-f0-9\-]{16,36})\/(?P<container_name>[^\._]+)\/(?P<restart_count>\d+)\.log$'
129            parse_from: attributes["log.file.path"]
130            cache:
131              size: 128 # default maximum amount of Pods per Node is 110
132          # Rename attributes
133          - type: move
134            from: attributes["log.file.path"]
135            to: resource["filename"]
136          - type: move
137            from: attributes.container_name
138            to: resource["container"]
139          - type: move
140            from: attributes.namespace
141            to: resource["namespace"]
142          - type: move
143            from: attributes.pod_name
144            to: resource["pod"]
145          - type: add
146            field: resource["cluster"]
147            value: 'your-cluster-name' # Set your cluster name here
148          - type: move
149            from: attributes.log
150            to: body
151
152    processors:
153      attributes:
154        actions:
155        - action: insert
156          key: loki.resource.labels
157          value: hostname, daemon
158      resource:
159        attributes:
160          - action: insert
161            key: loki.format
162            value: raw
163          - action: insert
164            key: loki.resource.labels
165            value: pod, namespace, container, cluster, filename
166
167    exporters:
168      loki:
169        endpoint: https://LOKI_USERNAME:ACCESS_POLICY_TOKEN@LOKI_URL/loki/api/v1/push or http://<Loki-svc>.<Loki-Namespace>.svc/loki/api/v1/push
170
171    service:
172      pipelines:
173        logs:
174          receivers: [syslog, filelog/applog, filelog]
175          processors: [attributes, resource]
176          exporters: [loki]

변경될 Container Log 수집 방법

 1apiVersion: opentelemetry.io/v1beta1
 2kind: OpenTelemetryCollector
 3metadata:
 4  name: otel-log
 5spec:
 6  mode: daemonset
 7  hostNetwork: true
 8  volumes:
 9    # Typically the collector will want access to pod logs and container logs
10    - name: varlogpods
11      hostPath:
12        path: /var/log/pods
13    - name: varlibdockercontainers
14      hostPath:
15        path: /var/lib/docker/containers
16  volumeMounts:
17    # Mount the volumes to the collector container
18    - name: varlogpods
19      mountPath: /var/log/pods
20      readOnly: true
21    - name: varlibdockercontainers
22      mountPath: /var/lib/docker/containers
23      readOnly: true
24  config:
25    # This is a new configuration file - do not merge this with your metrics configuration!
26    receivers:
27      filelog:
28        include_file_path: true
29        include:
30          - /var/log/pods/*/*/*.log
31        operators:
32          - id: container-parser
33            type: container
34
35    processors:
36      resource:
37        attributes:
38          - action: insert
39            key: loki.format
40            value: raw
41          - action: insert
42            key: loki.resource.labels
43            value: pod, namespace, container, cluster, filename
44
45    exporters:
46      loki:
47        endpoint: https://LOKI_USERNAME:ACCESS_POLICY_TOKEN@LOKI_URL/loki/api/v1/push or http://<Loki-svc>.<Loki-Namespace>.svc/loki/api/v1/push
48
49    service:
50      pipelines:
51        logs:
52          receivers: [filelog]
53          processors: [resource]
54          exporters: [loki]

참고 : https://opentelemetry.io/blog/2024/otel-collector-container-log-parser/ {: .prompt-info }

Receiver Configuration - Plan A #

Receiver는 Promtail 및 EventExporter로부터 Log 데이터를 받는 진입점을 위한 loki receiver를 사용한다.
loki receiver를 사용하면 Otel Collector에 기존의 Loki가 노출하는 endpoint를 동일하게 노출시켜 기존의 Log 수집 컴포넌트들이 동일한 방법으로 OTel Collector에 Log를 보낼 수 있도록 구성할 수 있다.

Receiver Configuration - Plan B #

Receiver는 Container Log 수집을 위한 filelog, System Log 수집을 위한 filelog, Kubernetes Event Log 수집을 위한 k8s_event 3개를 사용한다.
Container Log는 filelog receiver로 /var/log/pods/*/*/*.log 경로에서 수집하고, 수집한 파일들을 기반으로 Path 및 Body를 분석해 Container명, Pod명, Namespace명 등의 정보를 추출한다.
System Log는 별도의 filelog receiver로 /var/log 경로에서 수집한 dmesg, messages, secure 파일들에서 syslog_parser로 정보를 추출해 수집한다.
Kubernetes Event Log는 k8s_event receiver를 이용해 Kubernetes API로부터 수집한다.

Processor Configuration #

Processor는 Log에 Kubernetes Attribute를 부착하기 위한 k8sattributes, Loki Label을 구성하기 위한 resource, OOM 방지를 위한 memory_limiter, Log를 batch성으로 전송하기 위한 batch 4개를 사용한다.
k8sattributes Processor는 filelog로부터 수집한 Container log를 기반으로 이와 일치하는 Pod, Deployment, Cluster 등의 정보를 데이터에 부착한다.
resource Processor는 위에서 부착한 정보를 Loki의 indexing에 필요한 Label로 변환하는 작업을 수행한다.
batch와 memory_limiter Processor는 가공한 Log 데이터를 Export하는 방법을 제공한다.

Exporter Configuration #

Exporter는 Log를 Loki로 전송하기 위한 loki exporter를 사용한다.
loki의 endpoint Attribute에 loki 주소의 /loki/api/v1/push Path를 붙여 로그 진입점을 값으로 넣어 수집한 Log를 Loki로 전송한다.

Pipeline Configuration #

마지막으로 위에서 정의한 Receiver, Processor, Exporter를 순서에 맞게 조합하는 Pipeline을 정의한다.
특히 Processor 요소들의 배치 순서에 따라 Log를 가공하는 순서가 달라지기 때문에, 위의 순서를 준수하는 것이 중요하다.
Loki Receiver에서 Log 데이터를 수집해 k8sattributes, resource, memory_limiter, batch 순으로 가공한 뒤, Loki Exporter를 사용해 Loki backend로 전송한다.

Pipeline Configuration - Plan B #

filelog, k8s_events Receiver에서 Log 데이터를 수집해 k8sattributes, resource, memory_limiter, batch순으로 가공한 뒤, Loki Exporter를 사용해 Loki backend로 전송한다.

Node Collector(Daemonset) #

File Logs
Host metrics
Kubelet state metrics
공식 문서에서 DaemonSet을 권장하는 receiver가 모인 collector이다.

Log | Filelog #

수집 대상은 stdout/stderr로 생성된 Kubernetes, app log으로,\ 사실상 Fluentbit를 대체한다.\ 이를 위해 log scraping 및 전달 뿐 아니라 Processors 에서 언급한 다양한 processor 사용을 고려해야 한다.

Receiver: Filelog Receiver
Exporter: Loki exporter

Metric | Kubelet Stats #

node, pod, container, volume, filesystem network I/O and error metrics 등 CPU, memory 등 infra resource에 관한 metric을 다루어,\ 각 노드의 kubelet이 노출하는 API에서 추출한다. 사실 상 cAdvisor의 대체이다.

Receiver: Kubelet Stats Receiver
Exporter: OTLP/HTTP Exporter

Metric | Host Metrics #

수집 대상은 node (cpu, disk, CPU load, filesystem, memory, network, paging, process..)의 metric으로,\ 사실 상 Prometheus Node Exporter를 대체한다.\ Kubelet Stats Receiver와 일부 항목이 겹치므로 동시 운용 시 중복 처리가 필요하다.

Receiver: Host Metrics Receiver
Exporter: OTLP/HTTP Exporter

  1# otel-node-collector service accounts are created automatically
  2---
  3apiVersion: rbac.authorization.k8s.io/v1
  4kind: ClusterRole
  5metadata:
  6  name: otel-node-collector
  7rules:
  8  - apiGroups: [""]
  9    resources: ["nodes/stats", "nodes/proxy"]
 10    verbs: ["get", "watch", "list"]
 11---
 12apiVersion: rbac.authorization.k8s.io/v1
 13kind: ClusterRoleBinding
 14metadata:
 15  name: otel-node-collector
 16roleRef:
 17  apiGroup: rbac.authorization.k8s.io
 18  kind: ClusterRole
 19  name: otel-node-collector
 20subjects:
 21  - kind: ServiceAccount
 22    name: otel-node-collector
 23    namespace: cluster
 24---
 25apiVersion: opentelemetry.io/v1beta1
 26kind: OpenTelemetryCollector
 27metadata:
 28  name: otel-node
 29  namespace: cluster
 30  labels:
 31    app: otel-node-collector
 32spec:
 33  mode: daemonset
 34  resources:
 35    # requests:
 36    #   cpu: 10m
 37    #   memory: 10Mi
 38    limits:
 39      cpu: 500m
 40      memory: 1000Mi
 41  podAnnotations:
 42    prometheus.io/scrape: "true"
 43    prometheus.io/port: "8888"
 44  env:
 45    - name: NODE_NAME
 46      valueFrom:
 47        fieldRef:
 48          fieldPath: spec.nodeName
 49  # volumes:
 50  #   - name: hostfs
 51  #     hostPath:
 52  #       path: /
 53  # volumeMounts:
 54  #   - name: hostfs
 55  #     mountPath: /hostfs
 56  #     readOnly: true
 57  #     mountPropagation: HostToContainer
 58  config:
 59    extensions:
 60      health_check: # for k8s liveness and readiness probes
 61        endpoint: 0.0.0.0:13133 # default
 62
 63    processors:
 64      batch: # buffer up to 10000 spans, metric data points, log records for up to 5 seconds
 65        send_batch_size: 10000
 66        timeout: 5s
 67      memory_limiter:
 68        check_interval: 1s # recommended by official README
 69        limit_percentage: 80 # in 1Gi memory environment, hard limit is 800Mi
 70        spike_limit_percentage: 25 # in 1Gi memory environment, soft limit is 500Mi (800 - 250 = 550Mi)
 71
 72    service:
 73      extensions:
 74        - health_check
 75
 76      telemetry:
 77        logs:
 78          level: INFO
 79        metrics:
 80          address: 0.0.0.0:8888
 81
 82      pipelines:
 83        metrics:
 84          receivers:
 85            - kubeletstats
 86            # - hostmetrics
 87          processors:
 88            - memory_limiter
 89            - batch
 90          exporters:
 91            - otlphttp/prometheus
 92
 93    receivers:
 94      kubeletstats:
 95        auth_type: serviceAccount
 96        endpoint: https://${env:NODE_NAME}:10250
 97        collection_interval: 10s
 98        insecure_skip_verify: true
 99        extra_metadata_labels:
100          - k8s.volume.type
101        k8s_api_config:
102          auth_type: serviceAccount
103        metric_groups:
104          - node
105          - pod
106          - container
107          - volume
108
109      # hostmetrics:
110      #   collection_interval: 10s
111      #   root_path: /hostfs
112      #   scrapers:
113      #     cpu:        # CPU utilization metrics
114      #     load:       # CPU load metrics
115      #     memory:     # Memory utilization
116      #     disk:       # Disk I/O metrics
117      #     filesystem: # File System utilization metrics
118      #     network:    # Network interface I/O metrics & TCP connection metrics
119      #     paging:     # Paging/Swap space utilization and I/O metrics
120      #     processes:  # Process count metrics
121      #     process:    # Per process CPU, Memory, and Disk I/O metrics
122      #       # The following settings can be used to handle the error to work hostmetrics: 2024-05-12T01:06:30.683Z        error   scraperhelper/scrapercontroller.go:197  Error scraping metrics  {"kind": "receiver", "name": "hostmetrics", "data_type": "metrics", "error": "error reading process executable for pid 1: readlink /hostfs/proc/1/exe: permission denied; error reading username for process \"systemd\" (pid 1): open /etc/passwd: no such file or directory;
123      #       # refer: https://github.com/open-telemetry/opentelemetry-collector-contrib/pull/28661
124      #       mute_process_name_error: true
125      #       mute_process_exe_error: true
126      #       mute_process_io_error: true
127      #       mute_process_user_error: true
128      #       mute_process_cgroup_error: true
129
130    exporters:
131      debug:
132        verbosity: basic # detailed, basic
133
134      otlphttp/prometheus:
135        metrics_endpoint: http://prometheus-server.cluster.svc.cluster.local:80/api/v1/otlp/v1/metrics
136        tls:
137          insecure: true

Cluster Collector(Single Pod) #

k8s events(log)
k8s objects(metrics)

단일 replica 사용 권장인 receivers 대상으로,\ 이들 receiver는 2개 이상의 instance 사용 시 중복이 발생 가능하기 때문이라고 공식 문서에서 논한다.\ 두 receiver 모두 cluster 관점에서 추출하기 때문이라고. 이에 따라 deployment type에 1개의 replica로 설정한다.

Log | Kubernetes Objects #

주로 Kubernetes event 수집용으로 Kubernetes API server 출처의 objects(전체 목록은 kubectl api-resources 로 확인) 수집에도 사용한다.

Receiver: Kubernetes Objects Receiver
Exporter: Loki exporter

Metric | Kubernetes Cluster #

사실 상 Kube State Metrics의 대체로 Kubernetes API server에서 cluster level의 metric과 entity events를 추출한다.

Receiver: Kubernetes Cluster Receiver
Exporter: OTLP/HTTP Exporter

  1apiVersion: v1
  2kind: ServiceAccount
  3metadata:
  4  name: otel-collector-opentelemetry-collector
  5---
  6apiVersion: rbac.authorization.k8s.io/v1
  7kind: ClusterRole
  8metadata:
  9  name: otel-collector-opentelemetry-collector
 10rules:
 11  - apiGroups:
 12      - ''
 13    resources:
 14      - events
 15      - namespaces
 16      - namespaces/status
 17      - nodes
 18      - nodes/spec
 19      - pods
 20      - pods/status
 21      - replicationcontrollers
 22      - replicationcontrollers/status
 23      - resourcequotas
 24      - services
 25    verbs:
 26      - get
 27      - list
 28      - watch
 29  - apiGroups:
 30      - apps
 31    resources:
 32      - daemonsets
 33      - deployments
 34      - replicasets
 35      - statefulsets
 36    verbs:
 37      - get
 38      - list
 39      - watch
 40  - apiGroups:
 41      - extensions
 42    resources:
 43      - daemonsets
 44      - deployments
 45      - replicasets
 46    verbs:
 47      - get
 48      - list
 49      - watch
 50  - apiGroups:
 51      - batch
 52    resources:
 53      - jobs
 54      - cronjobs
 55    verbs:
 56      - get
 57      - list
 58      - watch
 59  - apiGroups:
 60      - autoscaling
 61    resources:
 62      - horizontalpodautoscalers
 63    verbs:
 64      - get
 65      - list
 66      - watch
 67---
 68apiVersion: rbac.authorization.k8s.io/v1
 69kind: ClusterRoleBinding
 70metadata:
 71  name: otel-collector-opentelemetry-collector
 72roleRef:
 73  apiGroup: rbac.authorization.k8s.io
 74  kind: ClusterRole
 75  name: otel-collector-opentelemetry-collector
 76subjects:
 77  - kind: ServiceAccount
 78    name: otel-collector-opentelemetry-collector
 79    namespace: default
 80---
 81# otel-cluster-collector service accounts are created automatically
 82apiVersion: opentelemetry.io/v1beta1
 83kind: OpenTelemetryCollector
 84metadata:
 85  name: otel-cluster
 86  namespace: cluster
 87  labels:
 88    app: otel-cluster-collector
 89spec:
 90  mode: deployment
 91  replicas: 1
 92  podAnnotations:
 93    prometheus.io/scrape: "true"
 94    prometheus.io/port: "8888"
 95  config:
 96    extensions:
 97      health_check: # for k8s liveness and readiness probes
 98        endpoint: 0.0.0.0:13133 # default
 99
100    processors:
101      batch: # buffer up to 10000 spans, metric data points, log records for up to 5 seconds
102        send_batch_size: 10000
103        timeout: 5s
104      memory_limiter:
105        check_interval: 1s # recommended by official README
106        limit_percentage: 80 # in 1Gi memory environment, hard limit is 800Mi
107        spike_limit_percentage: 25 # in 1Gi memory environment, soft limit is 500Mi (800 - 250 = 550Mi)
108      attributes:
109        actions:
110          key: elasticsearch.index.prefix
111          value: otel-k8sobject
112          action: insert
113    service:
114      extensions:
115        - health_check
116
117      telemetry:
118        logs:
119          level: DEBUG
120        metrics:
121          address: 0.0.0.0:8888
122
123      pipelines:
124        logs:
125          receivers:
126            - k8sobjects
127          processors:
128            - memory_limiter
129            - batch
130            - attributes
131          exporters:
132            - debug
133            - elasticsearch
134
135        metrics:
136          receivers:
137            - k8s_cluster
138          processors:
139            - memory_limiter
140            - batch
141          exporters:
142            - otlphttp/prometheus
143
144    receivers:
145      k8sobjects:
146        objects:
147          - name: pods
148            mode: pull
149          - name: events
150            mode: watch
151      k8s_cluster:
152        collection_interval: 10s
153        node_conditions_to_report:
154          - Ready
155          - MemoryPressure
156        allocatable_types_to_report:
157          - cpu
158          - memory
159          - ephemeral-storage
160          - storage
161
162    exporters:
163      debug:
164        verbosity: detailed # default is basic
165
166      otlphttp/prometheus:
167        metrics_endpoint: http://prometheus-server.cluster.svc.cluster.local:80/api/v1/otlp/v1/metrics
168        tls:
169          insecure: true
170
171      elasticsearch:
172        endpoints:
173          - http://elasticsearch-es-http.cluster.svc.cluster.local:9200
174        logs_index: ""
175        logs_dynamic_index:
176          enabled: true
177        logstash_format:
178          enabled: true
179        user: anyflow
180        password: mycluster

 1apiVersion: opentelemetry.io/v1beta1
 2kind: OpenTelemetryCollector
 3metadata:
 4  name: otel-cluster-k8s-events
 5  namespace: cluster
 6  labels:
 7    app: otel-cluster-collector
 8spec:
 9  mode: deployment
10  replicas: 1
11  config:
12    receivers:
13      k8s_events:
14        auth_type: serviceAccount
15
16    processors:
17      batch:
18
19    exporters:
20      loki:
21        endpoint: https://LOKI_USERNAME:ACCESS_POLICY_TOKEN@LOKI_URL/loki/api/v1/push or http://<Loki-svc>.<Loki-Namespace>.svc/loki/api/v1/push
22    service:
23      pipelines:
24        logs:
25          receivers: [k8s_events]
26          processors: [batch]
27          exporters: [loki]

prometheus Collector(statefulset) #

prometheus metrics

OTLP Collector(Deployment) #

Traces(OTEL)
Generic OTEL Logs
Generic OTEL metrics

공용 receiver, exporter 공통적으로 otlp 프로토콜을 사용하고 replica 개수 제약이 없는 signal 대상 collector로서,\ 제약이 없을 경우 가장 운용에 유리한 배포 패턴인 Deployment 를 사용한다. MLT 모두를 대상으로 한다.

Trace | Generic OTEL trace #

Jaeger 및 Grafana Tempo는 OTLP Receiver를 자체적으로 지원한다.

Receiver: OTLP Receiver
Exporter: OTLP Exporter (gRPC)

Metric | Generic OTEL metric #

앞서 논한 metric 이외의 app level metrics 등의 여타 metric 수집을 위한 endpoint이다.

Receiver: OTLP Receiver
Exporter: OTLP/HTTP Exporter

Log | Generic OTEL log #

Istio의 OTel access log를 포함한 여타 log 수집을 위한 endpoint이다.

Receiver: OTLP Receiver
Exporter: Loki exporter

  1# otel-otlp-collector service accounts are created automatically
  2apiVersion: opentelemetry.io/v1beta1
  3kind: OpenTelemetryCollector
  4metadata:
  5  name: otel-otlp
  6  namespace: cluster
  7  labels:
  8    app: otel-otlp-collector
  9spec:
 10  mode: deployment
 11  # replicas: 1
 12  autoscaler:
 13    minReplicas: 1
 14    maxReplicas: 2
 15  resources:
 16    # requests:
 17    #   cpu: 10m
 18    #   memory: 10Mi
 19    limits:
 20      cpu: 500m
 21      memory: 1000Mi
 22  podAnnotations:
 23    prometheus.io/scrape: "true"
 24    prometheus.io/port: "8888"
 25  config:
 26    extensions:
 27      health_check: # for k8s liveness and readiness probes
 28        endpoint: 0.0.0.0:13133 # default
 29
 30    processors:
 31      batch: # buffer up to 10000 spans, metric data points, log records for up to 5 seconds
 32        send_batch_size: 10000
 33        timeout: 5s
 34      memory_limiter:
 35        check_interval: 1s # recommended by official README
 36        limit_percentage: 80 # in 1Gi memory environment, hard limit is 800Mi
 37        spike_limit_percentage: 25 # in 1Gi memory environment, soft limit is 500Mi (800 - 250 = 550Mi)
 38
 39    service:
 40      extensions:
 41        - health_check
 42
 43      telemetry:
 44        logs:
 45          level: INFO
 46        metrics:
 47          address: 0.0.0.0:8888
 48
 49      pipelines:
 50        traces:
 51          receivers:
 52            - otlp
 53          processors:
 54            - memory_limiter
 55            - batch
 56          exporters:
 57            - debug
 58            - otlp/jaeger
 59
 60        logs:
 61          receivers:
 62            - otlp
 63          processors:
 64            - memory_limiter
 65            - batch
 66          exporters:
 67            - debug
 68            - elasticsearch
 69
 70        metrics:
 71          receivers:
 72            - otlp
 73          processors:
 74            - memory_limiter
 75            - batch
 76          exporters:
 77            - debug
 78            - otlphttp/prometheus
 79
 80    receivers:
 81      otlp:
 82        protocols:
 83          grpc:
 84            endpoint: 0.0.0.0:4317
 85          http:
 86            endpoint: 0.0.0.0:4318
 87
 88    exporters:
 89      debug:
 90        verbosity: basic # detailed, basic
 91
 92      otlp/jaeger:
 93        endpoint: jaeger-collector.istio-system.svc.cluster.local:4317
 94        tls:
 95          insecure: true
 96
 97      otlphttp/prometheus:
 98        metrics_endpoint: http://prometheus-server.cluster.svc.cluster.local:80/api/v1/otlp/v1/metrics
 99        tls:
100          insecure: true
101
102      elasticsearch:
103        endpoints:
104          - http://elasticsearch-es-http.cluster.svc.cluster.local:9200
105        logs_index: "istio-access-log"
106        logs_dynamic_index:
107          enabled: true
108        logstash_format:
109          enabled: true
110        user: anyflow
111        password: mycluster