LPG日志采集框架

采用Loki Promtail Grafana作为整个框架构成

Promtail采集日志数据,Loki接收前一步日志数据并解析存储,Grafana将数据呈现出来

所有的程序都通过docker-compose构建

下面是docker-compose.yaml

services:
  # 采集数据
  prometheus:
    image: prom/prometheus:v3.9.0
    container_name: prometheus
    restart: always
    ports:
      - "9090:9090"
    volumes:
      - /docker-data/prometheus/config:/etc/prometheus
      - /docker-data/prometheus/data:/prometheus
      - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
      - /etc/timezone:/etc/timezone:ro
    command:
      - "--web.config.file=/etc/prometheus/web.yml"
      - "--config.file=/etc/prometheus/prometheus.yml"
    privileged: true
    network_mode: bridge
    user: root
    environment:
      - STORAGE_TSDB_RETENTION_TIME=60d
      - TZ=Asia/Shanghai
    hostname: prometheus

  # 日志采集
  promtail:
    image: grafana/promtail:3.6.4-amd64
    container_name: promtail
    restart: always
    volumes:
      # Promtail 的“游标”文件,保证重启后不会重复采集
      - /docker-data/promtail/positions:/tmp
      # 将需要收集的日志所在目录挂载到promtail容器中
      - /opt/logs/:/var/log/
      - /docker-data/promtail/promtail.yml:/etc/promtail/promtail.yml:ro
      - /etc/localtime:/etc/localtime:ro
    # 修改promtail默认配置文件路径
    command: -config.file=/etc/promtail/promtail.yml
    network_mode: bridge
    hostname: promtail
    environment:
      - TZ=Asia/Shanghai

  # 日志存储解析
  loki:
    image: grafana/loki:3.6.4-amd64
    container_name: loki
    restart: always
    volumes:
      - /docker-data/loki/data:/loki
      - /docker-data/loki/loki.yml:/etc/loki/loki.yml:ro
      - /etc/localtime:/etc/localtime:ro
    command: -config.file=/etc/loki/loki.yml
    ports:
      - "2900:2900"
    network_mode: bridge
    hostname: loki
    environment:
      - TZ=Asia/Shanghai

  # 展示采集数据
  grafana:
    image: grafana:12.3.2-profile
    container_name: grafana
    restart: always
    privileged: true
    user: root
    ports:
      - "3000:3000"
    volumes:
      - /docker-data/grafana/config:/etc/grafana
      - /docker-data/grafana/data:/var/lib/grafana
      - /etc/localtime:/etc/localtime:ro
    network_mode: bridge
    environment:
      - TZ=Asia/Shanghai
      - HTTP_PROXY=socks5://192.168.1.5:1089
      - HTTPS_PROXY=socks5://192.168.1.5:1089
      # - GF_INSTALL_PLUGINS=https://storage.googleapis.com/integration-artifacts/grafana-lokiexplore-app/release/main/any/grafana-lokiexplore-app-main.zip;grafana-lokiexplore-app
    hostname: grafana

通过访问IP:3000进入Grafana,在数据源处配置Loki数据源IP:2900默认是3100端口,因冲突换了端口号

以下是配置文件

loki.yml

auth_enabled: false

server:
  http_listen_port: 2900

common:
  instance_addr: 127.0.0.1
  path_prefix: /loki
  storage:
    filesystem:
      chunks_directory: /loki/chunks
      rules_directory: /loki/rules
  replication_factor: 1
  ring:
    kvstore:
      store: inmemory

pattern_ingester:
  enabled: true

limits_config:
  # 开启drilldown log配置
  allow_structured_metadata: true
  volume_enabled: true
  # 提高速率限制
  ingestion_rate_mb: 16 # 从 4MB 提高到 16MB
  ingestion_burst_size_mb: 32 # 突发大小提高到 32MB
  max_entries_limit_per_query: 10000
  retention_period: 744h # 31天保留期
  # 允许写入过去 365 天的日志(生产环境不建议设置过大)
  reject_old_samples: true
  reject_old_samples_max_age: 8760h # 365天,默认是7天(168h)
  # 允许未来 12 小时的日志(覆盖北京时间与 UTC 的 8 小时差 + 缓冲)
  creation_grace_period: 12h

schema_config:
  configs:
    - from: 2020-10-24
      store: tsdb
      object_store: filesystem
      schema: v13
      index:
        prefix: index_
        period: 24h

ruler:
  alertmanager_url: http://localhost:9093

# By default, Loki will send anonymous, but uniquely-identifiable usage and configuration
# analytics to Grafana Labs. These statistics are sent to https://stats.grafana.org/
#
# Statistics help us better understand how Loki is used, and they show us performance
# levels for most users. This helps us prioritize features and documentation.
# For more information on what's sent, look at
# https://github.com/grafana/loki/blob/main/pkg/usagestats/stats.go
# Refer to the buildReport method to see what goes into a report.
#
# If you would like to disable reporting, uncomment the following lines:
#analytics:
#  reporting_enabled: false

promtail.yml

配置中job_name可以多个配置,__path__为容器内的/var/log目录,上一步在启动容器时挂载了宿主机的日志文件目录到容器中,所以直接扫描容器中目录即可

server:
  http_listen_port: 9080
  grpc_listen_port: 0

positions:
  filename: /tmp/positions.yaml

clients:
  - url: http://192.168.1.5:2900/loki/api/v1/push
    tenant_id: fake # 如果使用多租户,保持与错误中一致

    # 批次配置优化
    batchsize: 1024 # 减少批次大小
    batchwait: 1s # 增加等待时间
    timeout: 30s # 增加超时时间

    # 重试配置
    backoff_config:
      min_period: 100ms
      max_period: 10s
      max_retries: 5

scrape_configs:
- job_name: ycm-logs
  static_configs:
    - targets:
        - localhost
      labels:
        job: ycm-logs
        __path__: /var/log/YCM/**/*.log

  pipeline_stages:
  # 多行合并:支持三种时间格式
  - multiline:
      firstline: '^((\[[a-f0-9]+\]\s+)?\[\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}(\.\d{3})?\]|\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})'
      max_wait_time: 3s
      max_lines: 500

  # ========== 提取时间戳(三种格式)==========
  # 格式1: [cf5dbb2b329e] [2026-02-04 16:53:11.346] 或 [2026-02-04 17:00:00.107]
  # 使用命名捕获组,如果没匹配到timestamp1,再匹配timestamp2
  - regex:
      expression: '^(?:(?P<timestamp1>(?:\[[a-f0-9]+\]\s+)?\[\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}(?:\.\d{3})?\])|(?P<timestamp2>\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}))'

  # 合并两种匹配结果
  - template:
      source: timestamp
      template: '{{ if .timestamp1 }}{{ .timestamp1 }}{{ else }}{{ .timestamp2 }}{{ end }}'

  # 清理方括号和requestId,提取纯时间
  - regex:
      source: timestamp
      expression: '(?:\[[a-f0-9]+\]\s+)?\[?(?P<clean_time>\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}(?:\.\d{3})?)\]?'

  - timestamp:
      source: clean_time
      format: '2006-01-02 15:04:05.000'
      fallback_formats:
        - '2006-01-02 15:04:05'
      location: Asia/Shanghai

  # 正则匹配日志级别
  - regex:
      expression: '\[(?P<level>DEBUG|INFO|WARN|ERROR)\]'

  # 未抓到标签,设置为INFO
  - template:
      source: level
      template: '{{ if .Value }}{{ .Value }}{{ else }}INFO{{ end }}'

  # 升标签
  - labels:
      level:

通过对promtail配置时间格式匹配,让其采集日志的时间为日志时间打印的时间,增加日志审查的便捷性,同时需要配合loki配置实现