Alerts

Inactive (6) Pending (0) Firing (1)

/etc/alerts.d/node_alerting_rules.yml > container_cpu_usage_is_high

POD_CPU_IS_HIGH (2 active)

alert: POD_CPU_IS_HIGH
expr: sum by(container, pod, namespace) (rate(container_cpu_usage_seconds_total{container!=""}[5m])) * 100 > 90
for: 1m
labels:
  severity: critical
annotations:
  description: Container {{ $labels.container }} CPU usage inside POD {{ $labels.pod}} is high in {{ $labels.namespace}}
  summary: POD {{ $labels.pod}} CPU Usage is high in {{ $labels.namespace}}

Labels	State	Active Since	Value
alertname="POD_CPU_IS_HIGH" container="alpha" namespace="redica-sep-final" pod="dgraph-0" severity="critical"	firing	2026-01-30 02:51:39.87619941 +0000 UTC	143.80444667364236
Annotations
description Container alpha CPU usage inside POD dgraph-0 is high in redica-sep-final summary POD dgraph-0 CPU Usage is high in redica-sep-final
alertname="POD_CPU_IS_HIGH" container="alpha" namespace="nov-striim" pod="dgraph-0" severity="critical"	firing	2026-01-30 02:09:39.87619941 +0000 UTC	98.73926583427757
Annotations
description Container alpha CPU usage inside POD dgraph-0 is high in nov-striim summary POD dgraph-0 CPU Usage is high in nov-striim

/etc/alerts.d/node_alerting_rules.yml > container_memory_usage_is_high

POD_MEMORY_USAGE_IS_HIGH (0 active)

alert: POD_MEMORY_USAGE_IS_HIGH
expr: (sum by(container, pod, namespace) (container_memory_working_set_bytes{container!=""}) / sum by(container, pod, namespace) (container_spec_memory_limit_bytes > 0) * 100) > 80
for: 1m
labels:
  severity: critical
annotations:
  description: |-
    Container Memory usage is above 80%
      VALUE = {{ $value }}
      LABELS = {{ $labels }}
  summary: Container {{ $labels.container }} Memory usage inside POD {{ $labels.pod}} is high in {{ $labels.namespace}}

/etc/alerts.d/node_alerting_rules.yml > node_cpu_greater_than_80

NODE_CPU_IS_HIGH (0 active)

alert: NODE_CPU_IS_HIGH
expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 90
for: 1m
labels:
  severity: critical
annotations:
  description: node {{ $labels.kubernetes_node }} cpu is high
  summary: node cpu is greater than 80 precent

/etc/alerts.d/node_alerting_rules.yml > node_disk_space_too_low

NODE_DISK_SPACE_IS_LOW (0 active)

alert: NODE_DISK_SPACE_IS_LOW
expr: (100 * ((node_filesystem_avail_bytes{fstype!="rootfs",mountpoint="/"}) / (node_filesystem_size_bytes{fstype!="rootfs",mountpoint="/"}))) < 10
for: 1m
labels:
  severity: critical
annotations:
  description: node {{ $labels.node }} disk space is only {{ printf "%0.2f" $value }}% free.
  summary: node disk space remaining is less than 10 percent

/etc/alerts.d/node_alerting_rules.yml > node_down

NODE_DOWN (0 active)

alert: NODE_DOWN
expr: up{component="kubernetes-nodes"} == 0
for: 3m
labels:
  severity: warning
annotations:
  description: '{{ $labels.job }} job failed to scrape instance {{ $labels.instance }} for more than 3 minutes. Node Seems to be down'
  summary: Node {{ $labels.kubernetes_node }} is down

/etc/alerts.d/node_alerting_rules.yml > node_memory_left_lessser_than_10

NODE_MEMORY_LESS_THAN_10% (0 active)

alert: NODE_MEMORY_LESS_THAN_10%
expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10
for: 1m
labels:
  severity: critical
annotations:
  description: node {{ $labels.kubernetes_node }} memory left is low
  summary: node memory left is lesser than 10 precent

/etc/alerts.d/node_alerting_rules.yml > prometheus-job-down

prometheus-job-down (0 active)

alert: prometheus-job-down
expr: up{job="prometheus"} == 0
for: 1m
labels:
  severity: warning
annotations:
  description: Default Prometheus Job is Down LABELS = {{ $labels }}
  summary: The Default Prometheus Job is Down (job {{ $labels.job}})