|
1、配置文件
server:
log_level: info
http_listen_port: 3101
clients:
- external_labels:
cluster: prod-common
url: http://loki-distributed-gateway/loki/api/v1/push
positions:
filename: /run/promtail/positions.yaml
scrape_configs:
# See also https://github.com/grafana/loki/blob/master/production/ksonnet/promtail/scrape_config.libsonnet for reference
- job_name: kubernetes-pods
pipeline_stages:
- metrics:
log_bytes_total:
config:
action: add
count_entry_bytes: true
match_all: true
description: total bytes of log lines
max_idle_duration: 24h
prefix: my_promtail_custom_
type: Counter
log_lines_total:
config:
action: inc
match_all: true
description: total number of log lines
max_idle_duration: 24h
prefix: my_promtail_custom_
type: Counter
- regex:
expression: (?i)\b(?P<error_flag>error|ERROR)\b
- metrics:
log_errors_total:
config:
action: inc
description: Total number of error log lines
max_idle_duration: 24h
prefix: my_promtail_custom_
source: error_flag
type: Counter
- docker: {}
- match:
selector: '{container="nginx-ingress-controller"}'
stages:
- json:
expressions:
bytes_sent: bytes_sent
method: request_method
path: path
request_length: request_length
request_time: request_time
service: service
status: status
upstream_response_time: upstream_response_time
vhost: vhost
- template:
source: should_collect
template: '{{ if and (regexFind ".*(com|cn|io)$" .vhost) (regexFind "^(/api/visual/.*|/data-statistics/.*|/data-receiver/.*|/map-receiver/.*|/business-monitor/.*|/business-monitor/.*)"
.path) (not (regexFind "(grafana|log).*" .vhost)) }}{{ .request_time }}{{
end }}'
- labels:
path: ""
service: ""
status: ""
vhost: ""
- metrics:
nginx_request_duration_seconds:
config:
action: set
description: Nginx request duration in seconds
max_idle_duration: 5s
prefix: my_promtail_custom_
source: should_collect
type: Gauge
- labeldrop:
- path
- metrics:
nginx_request_count:
config:
action: inc
match_all: true
description: Nginx request count
max_idle_duration: 24h
prefix: my_promtail_custom_
type: Counter
- match:
selector: '{app="kubernetes-event-exporter"}'
stages:
- json:
expressions:
namespace: involvedObject.namespace
- labels:
namespace: ""
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels:
- __meta_kubernetes_pod_controller_name
regex: ([0-9a-z-.]+?)(-[0-9a-f]{8,10})?
action: replace
target_label: __tmp_controller_name
- source_labels:
- __meta_kubernetes_pod_label_app_kubernetes_io_name
- __meta_kubernetes_pod_label_app
- __tmp_controller_name
- __meta_kubernetes_pod_name
regex: ^;*([^;]+)(;.*)?$
action: replace
target_label: app
- source_labels:
- __meta_kubernetes_pod_label_app_kubernetes_io_instance
- __meta_kubernetes_pod_label_release
regex: ^;*([^;]+)(;.*)?$
action: replace
target_label: instance
- source_labels:
- __meta_kubernetes_pod_label_app_kubernetes_io_component
- __meta_kubernetes_pod_label_component
regex: ^;*([^;]+)(;.*)?$
action: replace
target_label: component
- action: replace
source_labels:
- __meta_kubernetes_pod_node_name
target_label: node_name
- action: replace
source_labels:
- __meta_kubernetes_namespace
target_label: namespace
- action: replace
replacement: $1
separator: /
source_labels:
- namespace
- app
target_label: job
- action: replace
source_labels:
- __meta_kubernetes_pod_name
target_label: pod
- action: replace
source_labels:
- __meta_kubernetes_pod_container_name
target_label: container
- action: replace
replacement: /var/log/pods/*$1/*.log
separator: /
source_labels:
- __meta_kubernetes_pod_uid
- __meta_kubernetes_pod_container_name
target_label: __path__
- action: replace
regex: true/(.*)
replacement: /var/log/pods/*$1/*.log
separator: /
source_labels:
- __meta_kubernetes_pod_annotationpresent_kubernetes_io_config_hash
- __meta_kubernetes_pod_annotation_kubernetes_io_config_hash
- __meta_kubernetes_pod_container_name
target_label: __path__
limits_config:
2、Prometheus配置服务发现 容器版本配置参考:
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/name: promtail
name: promtail
namespace: loki
spec:
endpoints:
- interval: 30s
path: /metrics
port: http-metrics
relabelings:
- action: replace
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: instance
selector:
matchLabels:
app.kubernetes.io/name: promtail
apiVersion: v1
kind: Service
metadata:
name: promtail
labels:
app.kubernetes.io/name: promtail
# key: string
namespace: loki
resourceVersion: '1831319066'
- app.kubernetes.io/name=promtail
spec:
selector:
app.kubernetes.io/name: promtail
label_replace(
sum by (exported_container)(delta(my_promtail_custom_log_lines_total{namespace="$namespace", container="promtail",cluster="$cluster",exported_namespace=~"$exported_namespace",exported_container=~"$exported_container"}[$__range])),
"period", "today", "", ".*"
)
or
label_replace(
sum by (exported_container)(delta(my_promtail_custom_log_lines_total{namespace="$namespace", container="promtail",cluster="$cluster",exported_namespace=~"$exported_namespace",exported_container=~"$exported_container"}[$__range] offset 1d)),
"period", "yesterday", "", ".*"
)
or
label_replace(
sum by (exported_container)(delta(my_promtail_custom_log_lines_total{namespace="$namespace", container="promtail",cluster="$cluster",exported_namespace=~"$exported_namespace",exported_container=~"$exported_container"}[$__range] offset 2d)),
"period", "2_days_ago", "", ".*"
)
日志收集指定名称空间筛选:
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_namespace]
regex: ^(name1|name2)
action: keep
- source_labels:
- __meta_kubernetes_pod_controller_name
regex: ([0-9a-z-.]+?)(-[0-9a-f]{8,10})?
action: replace
target_label: __tmp_controller_name
采集压缩文件日志:(需新增job,否则不会采集明文日志)
- job_name: kubernetes-pods-gz
decompression:
enabled: true
initial_delay: 5s
format: "gz"
pipeline_stages:
- cri: {}
- match:
selector: '{app="kubernetes-event-exporter"}'
stages:
- json:
expressions:
namespace: involvedObject.namespace
- labels:
namespace: ""
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels:
- __meta_kubernetes_pod_controller_name
regex: ([0-9a-z-.]+?)(-[0-9a-f]{8,10})?
action: replace
target_label: __tmp_controller_name
- source_labels:
- __meta_kubernetes_pod_label_app_kubernetes_io_name
- __meta_kubernetes_pod_label_app
- __tmp_controller_name
- __meta_kubernetes_pod_name
regex: ^;*([^;]+)(;.*)?$
action: replace
target_label: app
- source_labels:
- __meta_kubernetes_pod_label_app_kubernetes_io_instance
- __meta_kubernetes_pod_label_release
regex: ^;*([^;]+)(;.*)?$
action: replace
target_label: instance
- source_labels:
- __meta_kubernetes_pod_label_app_kubernetes_io_component
- __meta_kubernetes_pod_label_component
regex: ^;*([^;]+)(;.*)?$
action: replace
target_label: component
- action: replace
source_labels:
- __meta_kubernetes_pod_node_name
target_label: node_name
- action: replace
source_labels:
- __meta_kubernetes_namespace
target_label: namespace
- action: replace
replacement: $1
separator: /
source_labels:
- namespace
- app
target_label: job
- action: replace
source_labels:
- __meta_kubernetes_pod_name
target_label: pod
- action: replace
source_labels:
- __meta_kubernetes_pod_container_name
target_label: container
- action: replace
replacement: /var/log/pods/*$1/*.log.*.gz
separator: /
source_labels:
- __meta_kubernetes_pod_uid
- __meta_kubernetes_pod_container_name
target_label: __path__
- action: replace
regex: true/(.*)
replacement: /var/log/pods/*$1/*.log.*.gz
separator: /
source_labels:
- __meta_kubernetes_pod_annotationpresent_kubernetes_io_config_hash
- __meta_kubernetes_pod_annotation_kubernetes_io_config_hash
- __meta_kubernetes_pod_container_name
target_label: __path__
日志文件正则匹配: 正则匹配规则和ls查看命令基本一样:
ls -lh *.lo*[0-9g] #查看以数字或log结尾的文件
-rw-r----- 1 root root 55M Apr 3 02:43 0.log
-rw-r----- 1 root root 101M Apr 3 02:05 0.log.20260403-100542
promtail配置:
- action: replace
regex: true/(.*)
replacement: /var/log/pods/*$1/*.lo*[0-9g]
separator: /
采集性能优化: 客户端参数优化:加大采集发送的数据量和发送的频率
clients:
- url: http://10.79.130.137:30828/loki/api/v1/push
batchsize: 50000000
batchwait: 100ms
backoff_config:
min_period: 500ms
max_period: 10m
max_retries: 3
大杀器优化:promtail没有调整并发能力的参数,但是他是go语言的程序,提高cpu核心数,可以大大提高promtail采集的日志量,尤其是在容器环境中cpu分配最好在1核以上,否则每秒采集日志能力会<1000行,无法支撑大日志量的采集。 (责任编辑:liangzh) |
