上次部署完了Prometheus这次让他实现告警功能

首先,先配置alertmanager配置文件

#当前目录
[root@master231 kube-prometheus-0.11.0]# pwd 
/kubernetes/manifests/add-ons/kube-prometheus-0.11.0
#配置manifests/alertmanager-secret.yaml文件(就是alertmanager配置文件)

cat manifests/alertmanager-secret.yaml
apiVersion: v1
kind: Secret
metadata:
  labels:
    app.kubernetes.io/component: alert-router
    app.kubernetes.io/instance: main
    app.kubernetes.io/name: alertmanager
    app.kubernetes.io/part-of: kube-prometheus
    app.kubernetes.io/version: 0.24.0
  name: alertmanager-main
  namespace: monitoring
stringData:
  alertmanager.yaml: |-
    "global":
      "resolve_timeout": "5m"     
      "smtp_from": "xxx@qq.com"  #发件人地址
      "smtp_smarthost": "smtp.qq.com:465" 
      "smtp_auth_username": "xxx@qq.com" #SMTP 登录账号
      "smtp_auth_password": "gyzgepedmtuthfba"  #SMTP授权码
      "smtp_require_tls": false
      "smtp_hello": "qq.com"
    "route":
      "group_by":
      - "alertname"
      "group_wait": "5s"
      "group_interval": "5s"
      "repeat_interval": "5m"
      "receiver": "sre_system"
      "routes":
      - "receiver": "zhiyang"
        "match_re":
          "job": "yinzhengjie_dba_exporter"
        "continue": true
      - "receiver": "wupeixin"
        "match_re":
          "job": "yinzhengjie_devops_exporter"
        "continue": true
      - "receiver": "wanglei"
        "match_re":
          "job": "oldboyedu-etcd-cluster"
        "continue": true
      - "receiver": "sre_system"
        "match_re":
          "job": ".*"
        "continue": true
    "receivers":
    - "name": "xxx"
      "email_configs":
      - "to": "xxx@qq.com" #目标地址
        "send_resolved": true
        "headers":
          "Subject": "[WARN] LINUX报警邮件"
        "html": '{{ template "oldboyedu" . }}'
    - "name": "xxx"
      "email_configs":
      - "to": "xxx@qq.com"#目标地址
        "send_resolved": true
        "headers":
          "Subject": "[WARN] LINUX报警邮件"
        "html": '{{ template "oldboyedu" . }}'
    - "name": "xxx"
      "email_configs":
      - "to": "xxx@qq.com"#目标地址
        "send_resolved": true
        "headers":
          "Subject": "[WARN] LINUX报警邮件"
        "html": '{{ template "oldboyedu" . }}'
    - "name": "sre_system"
      "email_configs":
      - "to": "xxx@qq.com"#目标地址
        "send_resolved": true
        "headers":
          "Subject": "[WARN] LINUX报警邮件"
        "html": '{{ template "oldboyedu" . }}'
    "templates":
    - "/oldboyedu/softwares/alertmanager/tmpl/*.tmpl"
type: Opaque

创建cm资源(模板) 模板文件

cat linux.tmpl

{{ define "Linux" }}
<!DOCTYPE html>
<html>
<head>
  <title>{{ if eq .Status "firing" }}&#128680; 告警触发{{ else }}&#9989; 告警恢复{{ end }}</title>
  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
  <style>
    @font-face {
      font-family: "EmojiFont";
      src: local("Apple Color Emoji"), 
           local("Segoe UI Emoji"), 
           local("Noto Color Emoji");
    }
    :root {
      --color-critical: #ff4444;
      --color-warning: #ffbb33;
      --color-resolved: #00c851;
      --color-info: #33b5e5;
    }
    body {
      font-family: 'Segoe UI', system-ui, sans-serif, "EmojiFont";
      line-height: 1.6;
      color: #333;
      max-width: 800px;
      margin: 20px auto;
      padding: 0 20px;
    }
    .header {
      text-align: center;
      padding: 30px;
      border-radius: 15px;
      margin-bottom: 30px;
      background: {{ if eq .Status "firing" }}#fff0f0{{ else }}#f0fff4{{ end }};
      border: 2px solid {{ if eq .Status "firing" }}var(--color-critical){{ else }}var(--color-resolved){{ end }};
    }
    .status-badge {
      padding: 8px 16px;
      border-radius: 20px;
      font-weight: bold;
      display: inline-block;
    }
    .alert-table {
      width: 100%;
      border-collapse: separate;
      border-spacing: 0;
      background: white;
      border-radius: 10px;
      overflow: hidden;
      box-shadow: 0 2px 6px rgba(0,0,0,0.1);
      margin: 20px 0;
    }
    .alert-table th {
      background: #f8f9fa;
      padding: 16px;
      text-align: left;
      width: 130px;
      border-right: 2px solid #e9ecef;
    }
    .alert-table td {
      padding: 16px;
      border-bottom: 1px solid #e9ecef;
    }
    .timeline {
      display: flex;
      justify-content: space-between;
      margin: 15px 0;
    }
    .timeline-item {
      flex: 1;
      text-align: center;
      padding: 10px;
      background: #f8f9fa;
      border-radius: 8px;
      margin: 0 5px;
    }
    .alert-image {
      text-align: center;
      margin: 30px 0;
    }
    .alert-image img {
      width: {{ if eq .Status "firing" }}140px{{ else }}100px{{ end }};
      opacity: 0.9;
      transition: all 0.3s ease;
    }
    .emoji { 
      font-family: "EmojiFont", sans-serif;
      font-size: 1.3em; 
    }
    .severity-critical { color: var(--color-critical); }
    .severity-warning { color: var(--color-warning); }
  </style>
</head>
<body>
  <div class="header">
    <h1>
      {{ if eq .Status "firing" }}
      <span class="emoji">&#128680;</span> 告警触发通知
      {{ else }}
      <span class="emoji">&#9989;</span> 告警恢复通知
      {{ end }}
    </h1>
  </div>

  {{ if eq .Status "firing" }}
  <!-- 告警触发内容 -->
  <table class="alert-table">
    <tr>
      <th><span class="emoji">&#128683;</span> 告警名称</th>
      <td>{{ range .Alerts }}<span class="emoji">&#128227;</span> {{ .Labels.alertname }}{{ end }}</td>
    </tr>
    <tr>
      <th><span class="emoji">&#9888;&#65039;</span> 严重等级</th>
      <td class="severity-{{ range .Alerts }}{{ .Labels.severity }}{{ end }}">
        {{ range .Alerts }}<span class="emoji">&#9210;</span> {{ .Labels.severity | toUpper }}{{ end }}
      </td>
    </tr>
    <tr>
      <th><span class="emoji">&#128346;</span> 触发时间</th>
      <td>{{ range .Alerts }}<span class="emoji">&#128337;</span> {{ .StartsAt.Format "2006-01-02 15:04:05" }}{{ end }}</td>
    </tr>
  </table>
  {{ else }}
  <!-- 告警恢复内容 -->
  <table class="alert-table">
    <tr>
      <th><span class="emoji">&#128227;</span> 恢复告警</th>
      <td>{{ range .Alerts }}<span class="emoji">&#128272;</span> {{ .Labels.alertname }}{{ end }}</td>
    </tr>
    <tr>
      <th><span class="emoji">&#9203;</span> 持续时间</th>
      <td>
        {{ range .Alerts }}
          {{ .StartsAt.Format "15:04:05" }} - {{ .EndsAt.Format "15:04:05" }}
          ({{ .EndsAt.Sub .StartsAt | printf "%.0f" }} 分钟)
        {{ end }}
      </td>
    </tr>
    <tr>
      <th><span class="emoji">&#9989;</span> 恢复时间</th>
      <td>{{ range .Alerts }}<span class="emoji">&#128338;</span> {{ .EndsAt.Format "2006-01-02 15:04:05" }}{{ end }}</td>
    </tr>
  </table>
  {{ end }}

  <!-- 公共信息部分 -->
  <table class="alert-table">
    <tr>
      <th><span class="emoji">&#128187;&#65039;</span> 实例信息</th>
      <td>{{ range .Alerts }}<span class="emoji">&#127991;</span> {{ .Labels.instance }}{{ end }}</td>
    </tr>
    <tr>
      <th><span class="emoji">&#128221;</span> 告警详情</th>
      <td>{{ range .Alerts }}<span class="emoji">&#128204;</span> {{ .Annotations.summary }}{{ end }}</td>
    </tr>
    <tr>
      <th><span class="emoji">&#128196;</span> 详细描述</th>
      <td>{{ range .Alerts }}<span class="emoji">&#128209;</span> {{ .Annotations.description }}{{ end }}</td>
    </tr>
  </table>

  <div class="alert-image">
    {{ if eq .Status "firing" }}
    <img src="https://img95.699pic.com/element/40114/9548.png_860.png" alt="告警图标">
    {{ else }}
    <img src="https://tse2-mm.cn.bing.net/th/id/OIP-C.n7AyZv_wWXqFCc1mtlGhFgHaHa?rs=1&pid=ImgDetMain" alt="恢复图标"> 
    {{ end }}
  </div>

  <div class="timeline">
    <div class="timeline-item">
      <div class="emoji">&#128678; 当前状态</div>
      {{ range .Alerts }}
      <strong>{{ if eq .Status "firing" }}<span class="emoji">&#128293;</span> FIRING{{ else }}<span class="emoji">&#9989;</span> RESOLVED{{ end }}</strong>
      {{ end }}
    </div>
    <div class="timeline-item">
      <div class="emoji">&#128204; 触发次数</div>
      <strong>{{ len .Alerts }} 次</strong>
    </div>
  </div>
</body>
</html>
{{ end }}

启用

kubectl create configmap cm-alertmanager -n monitoring --from-file=oldboyedu.tmpl=oldboyedu.tmpl

manifests/alertmanager-alertmanager.yaml 引用上面Secret资源(alertmanager配置文件)和cm资源(模板文件)

cat manifests/alertmanager-alertmanager.yaml 
apiVersion: monitoring.coreos.com/v1
kind: Alertmanager
metadata:
  labels:
    app.kubernetes.io/component: alert-router
    app.kubernetes.io/instance: main
    app.kubernetes.io/name: alertmanager
    app.kubernetes.io/part-of: kube-prometheus
    app.kubernetes.io/version: 0.24.0
  name: main
  namespace: monitoring
spec:
  configSecret: alertmanager-main  # 添加这行引用 Secret
  volumes:
  - name: data
    configMap:
      name: cm-alertmanager
      items:
      - key: linux.tmpl
        path: linux.tmpl
  volumeMounts:
  - name: data
    mountPath: /linux/softwares/alertmanager/tmpl
  image: quay.io/prometheus/alertmanager:v0.24.0
  nodeSelector:
    kubernetes.io/os: linux
  podMetadata:
    labels:
      app.kubernetes.io/component: alert-router
      app.kubernetes.io/instance: main
      app.kubernetes.io/name: alertmanager
      app.kubernetes.io/part-of: kube-prometheus
      app.kubernetes.io/version: 0.24.0
  replicas: 3
  resources:
    limits:
      cpu: 100m
      memory: 100Mi
    requests:
      cpu: 4m
      memory: 100Mi
  securityContext:
    fsGroup: 2000
    runAsNonRoot: true
    runAsUser: 1000
  serviceAccountName: alertmanager-main
  version: 0.24.0

配置告警规则 这里我用的那个自定义goapi那个进行告警

cat manifests/prometheus-prometheusRule.yaml
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
  labels:
    app.kubernetes.io/component: prometheus
    app.kubernetes.io/instance: k8s
    app.kubernetes.io/name: prometheus
    app.kubernetes.io/part-of: kube-prometheus
    app.kubernetes.io/version: 2.36.1
    prometheus: k8s
    role: alert-rules
  name: prometheus-k8s-prometheus-rules
  namespace: monitoring
spec:
  groups:
  - name: prometheus
    rules:
    # 这里可以添加你的新规则
    - alert: ApplicationLoginAPIHigh
      expr: yinzhengjie_application_login_api > 30
      for: 2m
      labels:
        severity: warning
        team: application
      annotations:
        summary: "应用登录API调用次数过高"
        description: "应用登录API调用次数当前值为 {{ $value }},超过阈值 20"

应用所有资源

kubectl apply -f manifests/
#如果不行就
kubectl apply --server-side -f manifests/setup
kubectl wait --for condition=Established --all CustomResourceDefinition 	--namespace=monitoring
kubectl apply -f manifests/

展示效果

Q.E.D.