diff -r 4548e58c8c66 -r e0b7c95a154f lib/arborist/monitor/snmp/cpu.rb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/arborist/monitor/snmp/cpu.rb Wed Apr 04 11:00:35 2018 -0700 @@ -0,0 +1,140 @@ +# -*- ruby -*- +# vim: set noet nosta sw=4 ts=4 : + +require 'arborist/monitor/snmp' unless defined?( Arborist::Monitor::SNMP ) + +# Machine load/cpu checks. +# +# Sets current 1, 5, and 15 minute loads under the 'load' attribute, +# and calculates/warns on cpu overutilization. +# +class Arborist::Monitor::SNMP::CPU + include Arborist::Monitor::SNMP + + extend Configurability, Loggability + log_to :arborist_snmp + + # OIDS for discovering system load. + # + OIDS = { + load: '1.3.6.1.4.1.2021.10.1.3', + cpu: '1.3.6.1.2.1.25.3.3.1.2' + } + + # When walking load OIDS, the iterator count matches + # these labels. + # + LOADKEYS = { + 1 => :load1, + 2 => :load5, + 3 => :load15 + } + + + # Global defaults for instances of this monitor + # + configurability( 'arborist.snmp.cpu' ) do + # What overutilization percentage qualifies as a warning + setting :warn_at, default: 80 + end + + + ### Return the properties used by this monitor. + ### + def self::node_properties + return USED_PROPERTIES + end + + + ### Class #run creates a new instance and immediately runs it. + ### + def self::run( nodes ) + return new.run( nodes ) + end + + + ### Perform the monitoring checks. + ### + def run( nodes ) + super do |host, snmp| + self.find_load( host, snmp ) + end + end + + + ######### + protected + ######### + + ### Return system CPU data. + ### + def cpu( snmp ) + return snmp.walk( OIDS[:cpu] ) + end + + + ### Find load data, add additional niceties for reporting. + ### + def format_load( snmp ) + info = { cpu: {}, load: {} } + cpus = self.cpu( snmp ) + + info[ :cpu ][ :count ] = cpus.size + + # Windows SNMP doesn't have a concept of "load" over time, + # so we have to just use the current averaged CPU usage. + # + # This means that windows machines will very likely want to + # adjust the default "overutilization" number, considering + # it's really just how much of the CPU is used at the time of + # the monitor run, along with liberal use of the Observer "only + # alert after X events" pragmas. + # + if self.system =~ /windows\s+/i + info[ :cpu ][ :usage ] = cpus.values.inject( :+ ).to_f / cpus.size + info[ :message ] = "System is %0.1f%% in use." % [ info[ :cpu ][ :usage ] ] + + # UCDavis stuff is better for alerting only after there has been + # an extended load event. Use the 5 minute average to avoid + # state changes on transient spikes. + # + else + snmp.walk( OIDS[:load] ).each_with_index do |(_, value), idx| + next unless LOADKEYS[ idx + 1 ] + info[ :load ][ LOADKEYS[idx + 1] ] = value.to_f + end + + percentage = (( ( info[:load][ :load5 ] / cpus.size ) - 1 ) * 100 ).round( 1 ) + + if percentage < 0 + info[ :message ] = "System is %0.1f%% idle." % [ percentage.abs ] + info[ :cpu ][ :usage ] = percentage + 100 + else + info[ :message ] = "System is %0.1f%% overloaded." % [ percentage ] + info[ :cpu ][ :usage ] = percentage + end + end + + return info + end + + + ### Collect the load information for +host+ from an existing + ### (and open) +snmp+ connection. + ### + def find_load( host, snmp ) + info = self.format_load( snmp ) + + config = identifiers[ host ].last || {} + warn_at = config[ 'warn_at' ] || self.class.warn_at + usage = info.dig( :cpu, :usage ) || 0 + + if usage >= warn_at + info[ :warning ] = "%0.1f utilization exceeds %0.1f percent" % [ usage, warn_at ] + end + + self.results[ host ] = info + end + +end # class Arborist::Monitor::SNMP::CPU +