欢迎阅读!

潇湘夜雨

当前位置: 主页 > 系统 > 云计算与云原生 >

promtail添加自定义指标和性能优化大杀器

时间:2026-04-03 10:35来源:未知 作者:华嵩阳 点击:
1、配置文件 server: log_level: info http_listen_port: 3101 clients: - url: http://loki-distributed-gateway/loki/api/v1/push positions: filename: /run/promtail/positions.yaml scrape_configs: - job_name: kubernetes-pods pipeline_stages:
1、配置文件
server:
  log_level: info
  http_listen_port: 3101
  
 
clients:
  - external_labels:
      cluster: prod-common
    url: http://loki-distributed-gateway/loki/api/v1/push
 
positions:
  filename: /run/promtail/positions.yaml
 
scrape_configs:
  # See also https://github.com/grafana/loki/blob/master/production/ksonnet/promtail/scrape_config.libsonnet for reference
  - job_name: kubernetes-pods
    pipeline_stages:
      - metrics:
          log_bytes_total:
            config:
              action: add
              count_entry_bytes: true
              match_all: true
            description: total bytes of log lines
            max_idle_duration: 24h
            prefix: my_promtail_custom_
            type: Counter
          log_lines_total:
            config:
              action: inc
              match_all: true
            description: total number of log lines
            max_idle_duration: 24h
            prefix: my_promtail_custom_
            type: Counter
      - regex:
          expression: (?i)\b(?P<error_flag>error|ERROR)\b
      - metrics:
          log_errors_total:
            config:
              action: inc
            description: Total number of error log lines
            max_idle_duration: 24h
            prefix: my_promtail_custom_
            source: error_flag
            type: Counter
      - docker: {}
      - match:
          selector: '{container="nginx-ingress-controller"}'
          stages:
          - json:
              expressions:
                bytes_sent: bytes_sent
                method: request_method
                path: path
                request_length: request_length
                request_time: request_time
                service: service
                status: status
                upstream_response_time: upstream_response_time
                vhost: vhost
          - template:
              source: should_collect
              template: '{{ if and (regexFind ".*(com|cn|io)$" .vhost) (regexFind "^(/api/visual/.*|/data-statistics/.*|/data-receiver/.*|/map-receiver/.*|/business-monitor/.*|/business-monitor/.*)"
                .path) (not (regexFind "(grafana|log).*" .vhost)) }}{{ .request_time }}{{
                end }}'
          - labels:
              path: ""
              service: ""
              status: ""
              vhost: ""
          - metrics:
              nginx_request_duration_seconds:
                config:
                  action: set
                description: Nginx request duration in seconds
                max_idle_duration: 5s
                prefix: my_promtail_custom_
                source: should_collect
                type: Gauge
          - labeldrop:
            - path
          - metrics:
              nginx_request_count:
                config:
                  action: inc
                  match_all: true
                description: Nginx request count
                max_idle_duration: 24h
                prefix: my_promtail_custom_
                type: Counter
      - match:
          selector: '{app="kubernetes-event-exporter"}'
          stages:
          - json:
              expressions:
                namespace: involvedObject.namespace
          - labels:
              namespace: ""
    kubernetes_sd_configs:
      - role: pod
    relabel_configs:
      - source_labels:
          - __meta_kubernetes_pod_controller_name
        regex: ([0-9a-z-.]+?)(-[0-9a-f]{8,10})?
        action: replace
        target_label: __tmp_controller_name
      - source_labels:
          - __meta_kubernetes_pod_label_app_kubernetes_io_name
          - __meta_kubernetes_pod_label_app
          - __tmp_controller_name
          - __meta_kubernetes_pod_name
        regex: ^;*([^;]+)(;.*)?$
        action: replace
        target_label: app
      - source_labels:
          - __meta_kubernetes_pod_label_app_kubernetes_io_instance
          - __meta_kubernetes_pod_label_release
        regex: ^;*([^;]+)(;.*)?$
        action: replace
        target_label: instance
      - source_labels:
          - __meta_kubernetes_pod_label_app_kubernetes_io_component
          - __meta_kubernetes_pod_label_component
        regex: ^;*([^;]+)(;.*)?$
        action: replace
        target_label: component
      - action: replace
        source_labels:
        - __meta_kubernetes_pod_node_name
        target_label: node_name
      - action: replace
        source_labels:
        - __meta_kubernetes_namespace
        target_label: namespace
      - action: replace
        replacement: $1
        separator: /
        source_labels:
        - namespace
        - app
        target_label: job
      - action: replace
        source_labels:
        - __meta_kubernetes_pod_name
        target_label: pod
      - action: replace
        source_labels:
        - __meta_kubernetes_pod_container_name
        target_label: container
      - action: replace
        replacement: /var/log/pods/*$1/*.log
        separator: /
        source_labels:
        - __meta_kubernetes_pod_uid
        - __meta_kubernetes_pod_container_name
        target_label: __path__
      - action: replace
        regex: true/(.*)
        replacement: /var/log/pods/*$1/*.log
        separator: /
        source_labels:
        - __meta_kubernetes_pod_annotationpresent_kubernetes_io_config_hash
        - __meta_kubernetes_pod_annotation_kubernetes_io_config_hash
        - __meta_kubernetes_pod_container_name
        target_label: __path__
  
  
 
limits_config:
 

2、Prometheus配置服务发现

容器版本配置参考:

apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
  labels:
    app.kubernetes.io/name: promtail
 
 
  name: promtail
  namespace: loki
spec:
  endpoints:
  - interval: 30s
    path: /metrics
    port: http-metrics
    relabelings:
    - action: replace
      sourceLabels:
      - __meta_kubernetes_pod_node_name
      targetLabel: instance
  selector:
    matchLabels:
      app.kubernetes.io/name: promtail


apiVersion: v1
kind: Service
metadata:
  name: promtail
  labels:
    app.kubernetes.io/name: promtail
#    key: string
  namespace: loki
  resourceVersion: '1831319066'
 
    - app.kubernetes.io/name=promtail
spec:
  selector:
    app.kubernetes.io/name: promtail
 
3、统计多天日志量对比
label_replace(
sum by (exported_container)(delta(my_promtail_custom_log_lines_total{namespace="$namespace", container="promtail",cluster="$cluster",exported_namespace=~"$exported_namespace",exported_container=~"$exported_container"}[$__range])),
  "period", "today", "", ".*"
)
or
label_replace(
sum by (exported_container)(delta(my_promtail_custom_log_lines_total{namespace="$namespace", container="promtail",cluster="$cluster",exported_namespace=~"$exported_namespace",exported_container=~"$exported_container"}[$__range] offset 1d)),
  "period", "yesterday", "", ".*"
)
or
label_replace(
sum by (exported_container)(delta(my_promtail_custom_log_lines_total{namespace="$namespace", container="promtail",cluster="$cluster",exported_namespace=~"$exported_namespace",exported_container=~"$exported_container"}[$__range] offset 2d)),
  "period", "2_days_ago", "", ".*"
)
 
日志收集指定名称空间筛选:
    kubernetes_sd_configs:
      - role: pod
    relabel_configs:
      - source_labels: [__meta_kubernetes_namespace]
        regex: ^(name1|name2)
        action: keep
      - source_labels:
          - __meta_kubernetes_pod_controller_name
        regex: ([0-9a-z-.]+?)(-[0-9a-f]{8,10})?
        action: replace
        target_label: __tmp_controller_name

采集压缩文件日志:(需新增job,否则不会采集明文日志)

  - job_name: kubernetes-pods-gz
    decompression:
      enabled: true
      initial_delay: 5s
      format: "gz"
    pipeline_stages:
      - cri: {}
 
      - match:
          selector: '{app="kubernetes-event-exporter"}'
          stages:
          - json:
              expressions:
                namespace: involvedObject.namespace
          - labels:
              namespace: ""
    kubernetes_sd_configs:
      - role: pod
    relabel_configs:
      - source_labels:
          - __meta_kubernetes_pod_controller_name
        regex: ([0-9a-z-.]+?)(-[0-9a-f]{8,10})?
        action: replace
        target_label: __tmp_controller_name
      - source_labels:
          - __meta_kubernetes_pod_label_app_kubernetes_io_name
          - __meta_kubernetes_pod_label_app
          - __tmp_controller_name
          - __meta_kubernetes_pod_name
        regex: ^;*([^;]+)(;.*)?$
        action: replace
        target_label: app
      - source_labels:
          - __meta_kubernetes_pod_label_app_kubernetes_io_instance
          - __meta_kubernetes_pod_label_release
        regex: ^;*([^;]+)(;.*)?$
        action: replace
        target_label: instance
      - source_labels:
          - __meta_kubernetes_pod_label_app_kubernetes_io_component
          - __meta_kubernetes_pod_label_component
        regex: ^;*([^;]+)(;.*)?$
        action: replace
        target_label: component
      - action: replace
        source_labels:
        - __meta_kubernetes_pod_node_name
        target_label: node_name
      - action: replace
        source_labels:
        - __meta_kubernetes_namespace
        target_label: namespace
      - action: replace
        replacement: $1
        separator: /
        source_labels:
        - namespace
        - app
        target_label: job
      - action: replace
        source_labels:
        - __meta_kubernetes_pod_name
        target_label: pod
      - action: replace
        source_labels:
        - __meta_kubernetes_pod_container_name
        target_label: container
      - action: replace
        replacement: /var/log/pods/*$1/*.log.*.gz
        separator: /
        source_labels:
        - __meta_kubernetes_pod_uid
        - __meta_kubernetes_pod_container_name
        target_label: __path__
      - action: replace
        regex: true/(.*)
        replacement: /var/log/pods/*$1/*.log.*.gz
        separator: /
        source_labels:
        - __meta_kubernetes_pod_annotationpresent_kubernetes_io_config_hash
        - __meta_kubernetes_pod_annotation_kubernetes_io_config_hash
        - __meta_kubernetes_pod_container_name
        target_label: __path__

日志文件正则匹配:

正则匹配规则和ls查看命令基本一样:
 ls -lh *.lo*[0-9g] #查看以数字或log结尾的文件
 
-rw-r----- 1 root root  55M Apr  3 02:43 0.log
-rw-r----- 1 root root 101M Apr  3 02:05 0.log.20260403-100542

promtail配置:
      - action: replace
        regex: true/(.*)
        replacement: /var/log/pods/*$1/*.lo*[0-9g]
        separator: /


采集性能优化:
客户端参数优化:加大采集发送的数据量和发送的频率
clients:
  - url: http://10.79.130.137:30828/loki/api/v1/push
    batchsize: 50000000
    batchwait: 100ms
    backoff_config:
      min_period: 500ms
      max_period: 10m
      max_retries: 3
大杀器优化:promtail没有调整并发能力的参数,但是他是go语言的程序,提高cpu核心数,可以大大提高promtail采集的日志量,尤其是在容器环境中cpu分配最好在1核以上,否则每秒采集日志能力会<1000行,无法支撑大日志量的采集。

(责任编辑:liangzh)
织梦二维码生成器
顶一下
(0)
0%
踩一下
(0)
0%
------分隔线----------------------------