add UPS monitoring capability v0.6.0
authorKatelyn Schiesser <kschiesser@laika.com>
Thu, 18 Apr 2019 11:49:09 -0700
changeset 20 00a38d493f2c
parent 19 77084121952b
child 21 58e70e52f817
add UPS monitoring capability
README.md
Rakefile
lib/arborist/monitor/snmp.rb
lib/arborist/monitor/snmp/ups.rb
lib/arborist/monitor/snmp/ups/battery.rb
lib/arborist/snmp.rb
--- a/README.md	Mon Jul 23 09:42:02 2018 -0700
+++ b/README.md	Thu Apr 18 11:49:09 2019 -0700
@@ -23,6 +23,7 @@
  - System CPU utilization
  - Memory and swap usage
  - Running process checks
+ - UPS battery checks
 
 It tries to provide sane defaults, while allowing fine grained settings
 per resource node.  Both Windows and UCD-SNMP systems are supported.
@@ -75,6 +76,10 @@
     memory:
       physical_warn_at: 
       swap_warn_at: 60
+    ups:
+      battery:
+        capacity_warn_at: 60
+        temperature_warn_at: 50
 ```
 
 The `warn_at` keys imply usage capacity as a percentage. ie:  "Warn me
@@ -144,6 +149,20 @@
 current number of running processes.
 
 
+#### UPS
+
+##### Battery
+
+  * **capacity_warn_at**: Set the node to a `warning` state when the battery capacity is at or below this percentage.
+  * **temperature_warn_at**: Set the node to a `warning` state when the battery temperature is at or above this
+                             temperature (in Celcius).
+
+This check warns when the UPS is on battery, and reports the UPS' own
+estimated time remaining.
+
+A warning state will also be triggered based on battery status - low,
+depleted, or unknown.
+
 Examples
 --------
 
@@ -254,7 +273,7 @@
 
 ## License
 
-Copyright (c) 2016-2018 Michael Granger and Mahlon E. Smith
+Copyright (c) 2016-2019 Michael Granger and Mahlon E. Smith
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
--- a/Rakefile	Mon Jul 23 09:42:02 2018 -0700
+++ b/Rakefile	Thu Apr 18 11:49:09 2019 -0700
@@ -116,3 +116,5 @@
 lib/arborist/monitor/snmp/process.rb
 lib/arborist/monitor/snmp/memory.rb
 lib/arborist/monitor/snmp/cpu.rb
+lib/arborist/monitor/snmp/ups.rb
+lib/arborist/monitor/snmp/ups/battery.rb
--- a/lib/arborist/monitor/snmp.rb	Mon Jul 23 09:42:02 2018 -0700
+++ b/lib/arborist/monitor/snmp.rb	Thu Apr 18 11:49:09 2019 -0700
@@ -135,4 +135,6 @@
 require 'arborist/monitor/snmp/disk'
 require 'arborist/monitor/snmp/process'
 require 'arborist/monitor/snmp/memory'
+require 'arborist/monitor/snmp/ups'
+require 'arborist/monitor/snmp/ups/battery'
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/arborist/monitor/snmp/ups.rb	Thu Apr 18 11:49:09 2019 -0700
@@ -0,0 +1,10 @@
+# -*- ruby -*-
+# vim: set noet nosta sw=4 ts=4 :
+
+require 'arborist/monitor/snmp' unless defined?( Arborist::Monitor::SNMP )
+
+# Namespace for UPS check classes.
+class Arborist::Monitor::SNMP::UPS
+	include Arborist::Monitor::SNMP
+
+end
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/arborist/monitor/snmp/ups/battery.rb	Thu Apr 18 11:49:09 2019 -0700
@@ -0,0 +1,136 @@
+# -*- ruby -*-
+# vim: set noet nosta sw=4 ts=4 :
+
+require 'arborist/monitor/snmp/ups' unless defined?( Arborist::Monitor::SNMP::UPS )
+
+# Checks for UPS battery health.
+#
+# Checks the available battery percentage, if the UPS is on battery,
+# and the temperature of the battery.
+#
+class Arborist::Monitor::SNMP::UPS::Battery
+	include Arborist::Monitor::SNMP
+
+	extend Configurability, Loggability
+	log_to :arborist_snmp
+
+	# OIDS for discovering ups status.
+	#
+	OIDS = {
+		battery_status:        '.1.3.6.1.2.1.33.1.2.1.0', # 1 - unk, 2 - normal, 3 - low, 4 - depleted
+		seconds_on_battery:    '.1.3.6.1.2.1.33.1.2.2.0',
+		est_minutes_remaining: '.1.3.6.1.2.1.33.1.2.3.0',
+		est_charge_remaining:  '.1.3.6.1.2.1.33.1.2.4.0', # in percent
+		battery_voltage:       '.1.3.6.1.2.1.33.1.2.5.0', # in 0.1v DC
+		battery_current:       '.1.3.6.1.2.1.33.1.2.6.0', # in 0.1a DC
+		battery_temperature:   '.1.3.6.1.2.1.33.1.2.7.0'  # in Celcius
+	}
+
+	# Human-readable translations for battery status OID.
+	#
+	BATTERY_STATUS = {
+		1 => "Battery status is Unknown.",
+		2 => "Battery is OK.",
+		3 => "Battery is Low.",
+		4 => "Battery is Depleted."
+	}
+
+	# Global defaults for instances of this monitor
+	#
+	configurability( 'arborist.snmp.ups.battery' ) do
+		# What battery percentage qualifies as a warning
+		setting :capacity_warn_at, default: 60
+
+		# What battery temperature qualifies as a warning, in C
+		setting :temperature_warn_at, default: 50
+	end
+
+
+	### Return the properties used by this monitor.
+	###
+	def self::node_properties
+		return USED_PROPERTIES
+	end
+
+
+	### Class #run creates a new instance and immediately runs it.
+	###
+	def self::run( nodes )
+		return new.run( nodes )
+	end
+
+
+	### Perform the monitoring checks.
+	###
+	def run( nodes )
+		super do |host, snmp|
+			self.check_battery( host, snmp )
+		end
+	end
+
+
+	#########
+	protected
+	#########
+
+	### Query SNMP and format information into a hash.
+	###
+	def format_battery( snmp )
+		info = {}
+
+		# basic info that's always available
+		info[ :status ] = snmp.get( oid: OIDS[:battery_status] )
+		info[ :capacity ] = snmp.get( oid: OIDS[:est_charge_remaining] )
+		info[ :temperature ] = snmp.get( oid: OIDS[:battery_temperature] )
+		info[ :minutes_remaining ]  = snmp.get( oid: OIDS[:est_minutes_remaining] )
+
+		# don't report voltage if the UPS doesn't
+		voltage = snmp.get( oid: OIDS[:battery_voltage] ) rescue nil
+		info[ :voltage ] = voltage / 10 unless voltage.nil?
+
+		# don't report current if the UPS doesn't
+		current = snmp.get( oid: OIDS[:battery_current] ) rescue nil
+		info[ :current ] = current/10 unless current.nil?
+
+		# see if we are on battery
+		info[ :seconds_on_battery ] = snmp.get( oid: OIDS[:seconds_on_battery] ) rescue 0
+		info[ :in_use ] = ( info[ :seconds_on_battery ] != 0 )
+
+		return { battery: info }
+	end
+
+	### Parse SNMP-provided information and alert based on thresholds.
+	###
+	def check_battery( host, snmp )
+		info = self.format_battery( snmp )
+
+		config    = identifiers[ host ].last || {}
+		cap_warn  = config[ 'capacity_warn_at' ] || self.class.capacity_warn_at
+		temp_warn = config[ 'temperature_warn_at' ] || self.class.temperature_warn_at
+
+		in_use      = info.dig( :battery, :in_use )
+		status      = info.dig( :battery, :status )
+		capacity    = info.dig( :battery, :capacity )
+		temperature = info.dig( :battery, :temperature )
+		warnings	= []
+
+		if in_use
+			mins = info.dig( :battery, :minutes_remaining )
+			warnings << "UPS on battery - %s minute(s) remaning." % [ mins ]
+		end
+
+		warnings << BATTERY_STATUS[ status ] if status != 2
+
+		warnings << "Battery remaining capacity %0.1f%% less than %0.1f percent" %
+			[ capacity, cap_warn ] if capacity <= cap_warn
+
+		warnings << "Battery temperature %dC greater than %dC" %
+			[ temperature, temp_warn ] if temperature >= temp_warn
+
+		info[ :warning ] = warnings.join( "\n" ) unless warnings.empty?
+		self.results[ host ] = info
+
+	end
+
+end # class Arborist::Monitor::UPS::Battery
+
--- a/lib/arborist/snmp.rb	Mon Jul 23 09:42:02 2018 -0700
+++ b/lib/arborist/snmp.rb	Thu Apr 18 11:49:09 2019 -0700
@@ -14,7 +14,7 @@
 
 
 	# Package version
-	VERSION = '0.5.0'
+	VERSION = '0.6.0'
 
 	# Version control revision
 	REVISION = %q$Revision$