diff --git a/templates/templates.cfg.j2 b/templates/templates.cfg.j2 index 7979f4600fba8f1f5dc8b81deb56f300447435e8..30ae6b4616efe8dd4dbad92d473f4db8d097fddc 100644 --- a/templates/templates.cfg.j2 +++ b/templates/templates.cfg.j2 @@ -119,6 +119,12 @@ define servicegroup{ members {{ groups.all | default([]) | map('extract', hostvars, ['inventory_hostname']) | sort | join(',Charge RAM,') }},Charge RAM } +define servicegroup{ + servicegroup_name psi + alias PSI + members {{ groups.full_maintenance | default([]) | map('extract', hostvars, ['inventory_hostname']) | sort | join(',PSI CPU,') }},PSI CPU,{{ groups.full_maintenance | default([]) | map('extract', hostvars, ['inventory_hostname']) | sort | join(',PSI IO,') }},PSI IO,{{ groups.full_maintenance | default([]) | map('extract', hostvars, ['inventory_hostname']) | sort | join(',PSI memory,') }},PSI memory + } + {% endif %} {% if 'full_maintenance' in groups %} define servicegroup{ @@ -200,6 +206,31 @@ define service{ register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE! } +define service{ + name often-checked-service ; The 'name' of this service template + active_checks_enabled 1 ; Active service checks are enabled + passive_checks_enabled 1 ; Passive service checks are enabled/accepted + parallelize_check 1 ; Active service checks should be parallelized (disabling this can lead to major performance problems) + obsess_over_service 1 ; We should obsess over this service (if necessary) + check_freshness 0 ; Default is to NOT check service 'freshness' + notifications_enabled 1 ; Service notifications are enabled + event_handler_enabled 1 ; Service event handler is enabled + flap_detection_enabled 1 ; Flap detection is enabled + process_perf_data 1 ; Process performance data + retain_status_information 1 ; Retain status information across program restarts + retain_nonstatus_information 1 ; Retain non-status information across program restarts + is_volatile 0 ; The service is not volatile + check_period 24x7 ; The service can be checked at any time of the day + max_check_attempts 3 ; Re-check the service up to 3 times in order to determine its final (hard) state + check_interval 1 ; Check the service every 10 minutes under normal conditions + retry_interval 2 ; Re-check the service every two minutes until a hard state can be determined + contact_groups admins ; Notifications get sent out to everyone in the 'admins' group + notification_options w,u,c,r ; Send notifications about warning, unknown, critical, and recovery events + notification_interval 120 ; Re-notify about service problems every two hours + notification_period heures-ouvres ; Notifications can be sent out at any time during working hours + register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE! + } + # Daily service definition template - This is NOT a real service, just a template! diff --git a/templates/vps.cfg.j2 b/templates/vps.cfg.j2 index 6964e2655a9e89028b22d4d6cf5e72408bc9c679..77a21afd1a1de015bb2b41cf96f1196716557eb8 100644 --- a/templates/vps.cfg.j2 +++ b/templates/vps.cfg.j2 @@ -211,6 +211,30 @@ define service{ check_command check_nrpe!check_mem } +# Define a service to check cpu PSI on the local machine. +define service{ + use often-checked-service,graphed-service + host_name {{ groups['full_maintenance'] | map('extract', hostvars, ['inventory_hostname']) | sort | join(',') }} + service_description PSI CPU + check_command check_nrpe!check_psi_cpu + } + +# Define a service to check io PSI on the local machine. +define service{ + use often-checked-service,graphed-service + host_name {{ groups['full_maintenance'] | map('extract', hostvars, ['inventory_hostname']) | sort | join(',') }} + service_description PSI IO + check_command check_nrpe!check_psi_io + } + +# Define a service to check memory PSI on the local machine. +define service{ + use often-checked-service,graphed-service + host_name {{ groups['full_maintenance'] | map('extract', hostvars, ['inventory_hostname']) | sort | join(',') }} + service_description PSI memory + check_command check_nrpe!check_psi_memory + } + # SSH define service{ use generic-service