通過helm部署EFK收集應用日誌,ingress-nginx日誌解析。

guanfengasd發表於2021-07-01

前段時間看了馬哥的k8s新書,最後幾章講了下EFK,嘗試部署了下,很多問題, 這裡改進下,寫個筆記記錄下吧。

準備工作

所有元件都通過helm3部署,先新增幾個倉庫。

helm repo add bitnami https://charts.bitnami.com/bitnami
helm repo add fluent https://fluent.github.io/helm-charts

儲存用的nfs的sc,自行解決。

三個4G記憶體工作節點的k8s叢集。

部署es

其中動態pv, prometheus 根據實際情況修改,沒裝prometheus也沒什麼事。
 

[root@bjzb-lecar-ops-jenkins-master-33 cluster-log]# cat bitnami-elasticsearch-values.yaml
clusterDomain: cluster.local   # Kubernetes叢集域名;
name: elasticsearch   # Elasticsearch叢集名稱;

master:   # 準主節點相關的配置;
  name: master
  replicas: 2   # 例項數量;
  heapSize: 512m   # 堆記憶體大小;
  resources:
    limits: {}
    #   cpu: 1000m
    #   memory: 2048Mi
    requests:
      cpu: 200m
      memory: 512Mi
  persistence:   # 持久卷相關的配置;
    enabled: true    # 禁用時將自動使用emptyDir儲存卷;
    storageClass: "managed-nfs-storage"   # 從指定儲存類中動態建立PV;
    # existingClaim: my-persistent-volume-claim   # 使用現有的PVC;
    # existingVolume: my-persistent-volume    # 使用現有的PV;
    accessModes:
      - ReadWriteOnce
    size: 8Gi
  service:   # 服務配置
    type: ClusterIP
    port: 9300   # 節點間的transport流量使用埠;

coordinating:   # 僅協調節點相關的配置;
  replicas: 2   # 例項數量;
  heapSize: 128m
  resources:
    requests:
      cpu: 250m
      memory: 512Mi
  service:   # 僅協調節點相關的服務,這也是接收Elasticsearch客戶端請求的入口;
    type: ClusterIP
    port: 9200
    # nodePort:
    # loadBalancerIP:

data:   # 資料節點相關的配置;
  name: data
  replicas: 2
  heapSize: 512m 
  resources:   # 資料節點是CPU密集及IO密集型的應用,資源需求和限制要謹慎設定;
    limits: {}
    #   cpu: 100m
    #   memory: 2176Mi
    requests:
      cpu: 250m
      memory: 512Mi
  persistence:
    enabled: true
    storageClass: "managed-nfs-storage"
    # existingClaim: my-persistent-volume-claim
    # existingVolume: my-persistent-volume
    accessModes:
      - ReadWriteOnce
    size: 10Gi

ingest:   # 攝取節點相關的配置;
  enabled: false   # 預設為禁用狀態;
  name: ingest
  replicas: 2
  heapSize: 128m
  resources:
    limits: {}
    #   cpu: 100m
    #   memory: 384Mi
    requests:
      cpu: 500m
      memory: 512Mi
  service:
    type: ClusterIP
    port: 9300

curator:   # curator相關的配置;
  enabled: false
  name: curator
  cronjob:   # 執行週期及相關的配置;
    # At 01:00 every day
    schedule: "0 1 * * *"
    concurrencyPolicy: ""
    failedJobsHistoryLimit: ""
    successfulJobsHistoryLimit: ""
    jobRestartPolicy: Never

metrics:   # 用於暴露指標的exporter;
  enabled: true
  name: metrics
  service:
    type: ClusterIP
    annotations:   # 指標採集相關的註解資訊;
      prometheus.io/scrape: "true"
      prometheus.io/port: "9114"
  resources:
    limits: {}
    #   cpu: 100m
    #   memory: 128Mi
    requests:
       cpu: 100m
       memory: 128Mi
  podAnnotations:   # Pod上的註解,用於支援指標採集;
    prometheus.io/scrape: "true"
    prometheus.io/port: "8080"
  serviceMonitor:   # Service監控相關的配置
    enabled: false
    namespace: monitoring
    interval: 10s
    scrapeTimeout: 10s
helm install es -f bitnami-elasticsearch-values.yaml  bitnami/elasticsearch -n logging

哎,這一步各種問題,會遇到映象下載慢,k8s叢集資源不夠(我已經把yml裡申請的資源調的很低了),儲存許可權問題,反正大家注意點就行。

 

部署fluent-bit

[root@bj-k8s-master efk]# cat fluent-fluent-bit-values.yaml 
# kind -- DaemonSet or Deployment
kind: DaemonSet

image:
  repository: fluent/fluent-bit
  pullPolicy: IfNotPresent

service:
  type: ClusterIP
  port: 2020
  annotations:
    prometheus.io/path: "/api/v1/metrics/prometheus"
    prometheus.io/port: "2020"
    prometheus.io/scrape: "true"

resources: {}
  # limits:
  #   cpu: 100m
  #   memory: 128Mi
  #requests:
  #  cpu: 100m
  #  memory: 128Mi

tolerations:
  - key: node-role.kubernetes.io/master
    effect: NoSchedule

config:
  service: |
    [SERVICE]
        Flush 3
        Daemon Off
        Log_Level info
        #Log_Level debug
        Parsers_File custom_parsers.conf
        Parsers_File parsers.conf
        HTTP_Server On
        HTTP_Listen 0.0.0.0
        HTTP_Port 2020

  inputs: |
    [INPUT]
        Name tail
        Path /var/log/containers/*.log
        Parser docker
        Tag kube.*
        Mem_Buf_Limit 5MB
        Skip_Long_Lines On
        Refresh_Interval  10
    [INPUT]
        Name tail
        Path /var/log/containers/nginx-demo*.log
        Parser docker
        Tag nginx-demo.*
        Mem_Buf_Limit 5MB
        Skip_Long_Lines On
        Refresh_Interval  10
    [INPUT]
        Name tail
        Path /var/log/containers/ingress-nginx-controller*.log
        Parser docker
        Tag ingress-nginx-controller.*
        Mem_Buf_Limit 5MB
        Skip_Long_Lines On
        Refresh_Interval  10

  filters: |
    [FILTER]
        Name                kubernetes
        Match               kube.*
        Kube_URL            https://kubernetes.default.svc:443
        Kube_CA_File        /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
        Kube_Token_File     /var/run/secrets/kubernetes.io/serviceaccount/token
        Kube_Tag_Prefix     kube.var.log.containers.
        Merge_Log           On
        Keep_Log            Off
        K8S-Logging.Exclude On
        K8S-Logging.Parser On
    [FILTER]
        Name                kubernetes
        Match               ingress-nginx-controller.*
        Kube_URL            https://kubernetes.default.svc:443
        Kube_CA_File        /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
        Kube_Token_File     /var/run/secrets/kubernetes.io/serviceaccount/token
        Kube_Tag_Prefix     kube.var.log.containers.
        Merge_Log           On
        Merge_Parser        ingress-nginx
        Keep_Log            Off
        K8S-Logging.Exclude On
        K8S-Logging.Parser On


  outputs: |
    [OUTPUT]
        Name es
        Match kube.*
        Host es-elasticsearch-coordinating-only.logging.svc.cluster.local.
        Logstash_Format On
        Logstash_Prefix k8s-cluster
        Type  flb_type
        Replace_Dots On

    [OUTPUT]
        Name es
        Match nginx-demo.*
        Host es-elasticsearch-coordinating-only.logging.svc.cluster.local.
        Logstash_Format On
        Logstash_Prefix nginx-demo
        Type  flb_type
        Replace_Dots On
    [OUTPUT]
        Name es
        Match ingress-nginx-controller.*
        Host es-elasticsearch-coordinating-only.logging.svc.cluster.local.
        Logstash_Format On
        Logstash_Prefix ingress-nginx-controller
        Type  flb_type
        Replace_Dots On


  customParsers: |
    [PARSER]
        Name docker_no_time
        Format json
        Time_Keep Off
        Time_Key time
        Time_Format %Y-%m-%dT%H:%M:%S.%L

    [PARSER]
        Name        ingress-nginx
        Format      regex
        Regex       ^(?<message>(?<remote>[^ ]*) - (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^\"]*?)(?: +\S*)?)?" (?<code>[^ ]*) (?<size>[^ ]*) "(?<referer>[^\"]*)" "(?<agent>[^\"]*)" (?<request_length>[^ ]*) (?<request_time>[^ ]*) \[(?<proxy_upstream_name>[^ ]*)\] \[(?<proxy_alternative_upstream_name>[^ ]*)\] (?<upstream_addr>[^ ]*) (?<upstream_response_length>[^ ]*) (?<upstream_response_time>[^ ]*) (?<upstream_status>[^ ]*) (?<req_id>[^ ]*).*)$
        Time_Key    time
        Time_Format %d/%b/%Y:%H:%M:%S %z
helm install fb -f fluent-fluent-bit-values.yaml fluent/fluent-bit -n logging

其中nginx-demo是我自己部署的一個應用,拿來測試用的。注意下es的host地址,如果跟我不是一樣的namespace記得改下,吃過虧。這個書裡提供的都寫一個index裡,nginx-ingress日誌也不解析,fluent-bit研究了半天,大概就這程度吧,通過docker不同的log名字打上tag,這樣每個應用在es裡單獨的index。當然量不大都寫進一個index,通過label欄位查詢某應用的日誌也行。

 

部署kibana

[root@bj-k8s-master efk]# cat bitnami-kibana-values.yaml 
replicaCount: 1

updateStrategy:
  type: RollingUpdate

plugins:
  - https://github.com/pjhampton/kibana-prometheus-exporter/releases/download/7.8.1/kibana-prometheus-exporter-7.8.1.zip

persistence:
  enabled: true
  storageClass: "managed-nfs-storage"
  # existingClaim: your-claim
  accessMode: ReadWriteOnce
  size: 10Gi

service:
  port: 5601
  type: ClusterIP
  # nodePort:
  externalTrafficPolicy: Cluster
  annotations: {}
  # loadBalancerIP:
  # extraPorts:

ingress:
  enabled: true
  certManager: false
  annotations:
    kubernetes.io/ingress.class: nginx
  hostname: kibana.ilinux.io
  path: /
  tls: false
      # tlsHosts:
      #   - www.kibana.local
      #   - kibana.local
      # tlsSecret: kibana.local-tls

configuration:
  server:
    basePath: ""
    rewriteBasePath: false

metrics:
  enabled: true
  service:
    annotations:
      prometheus.io/scrape: "true"
      prometheus.io/port: "80"
      prometheus.io/path: "_prometheus/metrics"

  serviceMonitor:
    enabled: false
    # namespace: monitoring
    # interval: 10s
    # scrapeTimeout: 10s
    # selector:
    #   prometheus: my-prometheus

elasticsearch:
  hosts:
  - es-elasticsearch-coordinating-only.logging.svc.cluster.local.
  # - elasticsearch-2
  port: 9200
helm install kib -f bitnami-kibana-values.yaml bitnami/kibana -n logging

同上如果跟我namespace不一樣記得改下es地址。 這地方書裡有坑,把charts pull下來看了下,values.yml檔案有出入,估計是寫書的時候chart版本不一樣導致的。

 

配置kibana

自己改下hosts解析到ingress地址,訪問kibana

新增匹配的索引

 

 

每個應用日誌獨立的index

 

 

ingress-nginx日誌已解析

 

 

fluentbit官方文件

https://docs.fluentbit.io/

線上正則匹配

https://rubular.com/

 

相關文章