#!/bin/bash # Default threshold values. cpu_threshold_warning=25 cpu_threshold_critical=50 mem_threshold_warning=50 mem_threshold_critical=75 filter='' print_help() { echo "usage: ${0} [OPTIONS]" echo "options:" echo -e " -c, --cpu <warning:critical>\t\tthreshold values for cpu usage" echo -e " -f, --filter <regex>\t\t\tcontainers to check" echo -e " -h, --help\t\t\t\tprint this help" echo -e " -m, --memory <warning:critical>\tthreshold values for memory usage" } while [[ $# -gt 0 ]]; do case "$1" in -c|--cpu) cpu_threshold_warning=$(/usr/bin/awk -F ':' '{print +$1}' <(echo $2)) cpu_threshold_critical=$(/usr/bin/awk -F ':' '{print +$2}' <(echo $2)) shift 2 ;; -f|--filter) filter=$2 shift 2 ;; -m|--memory) mem_threshold_warning=$(/usr/bin/awk -F ':' '{print +$1}' <(echo $2)) mem_threshold_critical=$(/usr/bin/awk -F ':' '{print +$2}' <(echo $2)) shift 2 ;; --help|-h) print_help exit 0 ;; *) echo "Error while parsing argument '$1'." exit 1 ;; esac done # Gathering facts. all_containers=$(sudo /usr/bin/docker ps --format '{{.Names}}' --filter "name=${filter}" --no-trunc --quiet --all) running_containers=$(sudo /usr/bin/docker ps --format '{{.Names}}' --filter "name=${filter}" --no-trunc --quiet) running_containers_stats=$(sudo /usr/bin/docker stats --format '{{.Name}}:{{.CPUPerc}}:{{.MemPerc}}' --filter "name=${filter}" --no-stream) not_running_containers=$(comm -23 <(echo $all_containers | /usr/bin/tr ' ' '\n') <(echo $running_containers | /usr/bin/tr ' ' '\n')) # Format data. output='' perf='' cpu_scale=";${cpu_threshold_warning};${cpu_threshold_critical};0;100" mem_scale=";${mem_threshold_warning};${mem_threshold_critical};0;100" # Running containers. for container in ${running_containers}; do output+="OK: ${container} status is running\n" done # Not running containers. for container in ${not_running_containers}; do output+="NOK: ${container} status is not running\n" done # CPU and memory statistics for running containers. for container_stats in ${running_containers_stats}; do name=$(/usr/bin/awk -F ':' '{print $1}' <(echo $container_stats)) [[ ! $name =~ ${filter} ]] && continue all_cpu=$(/usr/bin/awk -F ':' '{print +$2}' <(echo $container_stats)) mem=$(/usr/bin/awk -F ':' '{print +$3}' <(echo $container_stats)) proc_number=$(nproc --all) cpu=$(echo "${all_cpu} ${proc_number}" | /usr/bin/awk '{printf "%.2f", $1 / $2}') status='OK' [[ ${cpu%.*} -ge $cpu_threshold_warning ]] && status='WARNING' [[ ${cpu%.*} -ge $cpu_threshold_critical ]] && status='CRITICAL' output+="${status}: ${name} cpu is ${cpu}%\n" status='OK' [[ ${mem%.*} -ge $mem_threshold_warning ]] && status='WARNING' [[ ${mem%.*} -ge $mem_threshold_critical ]] && status='CRITICAL' output+="${status}: ${name} memory is ${mem}%\n" perf+="${name}_cpu=${cpu}%${cpu_scale} " perf+="${name}_mem=${mem}%${mem_scale} " done # Nagios OK status. exit_code=0 # Nagios WARNING status. grep --quiet 'WARNING:' <(echo $output) && exit_code=1 # Nagios CRITICAL status. grep --quiet 'NOK:\|CRITICAL:' <(echo $output) && exit_code=2 # Plugin output. echo "${output%'\n'}|${perf%' '}" exit $exit_code