# HG changeset patch # User Mahlon E. Smith # Date 1473287022 25200 # Node ID e6eb11b1e00de25dfb08c1776e3df8ce370af16f # Parent d46ca2b52efe7fac5d2090eebb6190a6f2562c24 Refactor. Move all SNMP "sections" to their own classes. diff -r d46ca2b52efe -r e6eb11b1e00d README.md --- a/README.md Tue Sep 06 10:57:38 2016 -0700 +++ b/README.md Wed Sep 07 15:23:42 2016 -0700 @@ -39,8 +39,9 @@ Arborist::Host( 'example' ) do description "Example host" address '10.6.0.169' - resource 'load', description: 'machine load' do - config load_error_at: 5 + resource 'load', description: 'machine load' + resource 'disk' do + include: [ '/', '/mnt' ] end end @@ -56,15 +57,26 @@ include_down true use :addresses - snmp = Arborist::Monitor::SNMP.new( mode: 'load', load_error_at: 10 ) + snmp = Arborist::Monitor::SNMP::Load( error_at: 10 ) exec( snmp ) end + Arborist::Monitor 'mount capacity check' do + every 30.seconds + match type: 'resource', category: 'load' + include_down true + use :addresses, :config + + exec( Arborist::Monitor::SNMP::Disk ) + end + + Please see the rdoc for all the mode types and error_at options. Per node "config" vars override global defaults when instantiating the monitor. + ## License Copyright (c) 2016, Michael Granger and Mahlon E. Smith diff -r d46ca2b52efe -r e6eb11b1e00d Rakefile --- a/Rakefile Tue Sep 06 10:57:38 2016 -0700 +++ b/Rakefile Wed Sep 07 15:23:42 2016 -0700 @@ -110,4 +110,8 @@ ######################################################################## __END__ lib/arborist/monitor/snmp.rb - +lib/arborist/monitor/snmp/swap.rb +lib/arborist/monitor/snmp/disk.rb +lib/arborist/monitor/snmp/process.rb +lib/arborist/monitor/snmp/memory.rb +lib/arborist/monitor/snmp/load.rb diff -r d46ca2b52efe -r e6eb11b1e00d lib/arborist/monitor/snmp.rb --- a/lib/arborist/monitor/snmp.rb Tue Sep 06 10:57:38 2016 -0700 +++ b/lib/arborist/monitor/snmp.rb Wed Sep 07 15:23:42 2016 -0700 @@ -16,225 +16,69 @@ using Arborist::TimeRefinements -# SNMP specific monitors and monitor logic. +# Shared SNMP monitor logic. # -class Arborist::Monitor::SNMP +module Arborist::Monitor::SNMP extend Loggability log_to :arborist # The version of this library. - VERSION = '0.2.1' - - # "Modes" that this monitor understands. - VALID_MODES = %i[ disk load memory swap process ] - - # The OID that returns the system environment. - IDENTIFICATION_OID = '1.3.6.1.2.1.1.1.0' - - # For net-snmp systems, ignore mount types that match - # this regular expression. This includes null/union mounts - # and NFS, currently. - STORAGE_IGNORE = %r{25.3.9.(?:2|14)$} - - # The OID that matches a local windows hard disk. Anything else - # is a remote (SMB) mount. - WINDOWS_DEVICE = '1.3.6.1.2.1.25.2.1.4' - - # OIDS required to pull disk information from net-snmp. - # - STORAGE_NET_SNMP = [ - '1.3.6.1.4.1.2021.9.1.2', # paths - '1.3.6.1.2.1.25.3.8.1.4', # types - '1.3.6.1.4.1.2021.9.1.9' # percents - ] - - # OIDS required to pull disk information from Windows. - # - STORAGE_WINDOWS = [ - '1.3.6.1.2.1.25.2.3.1.2', # types - '1.3.6.1.2.1.25.2.3.1.3', # paths - '1.3.6.1.2.1.25.2.3.1.5', # totalsize - '1.3.6.1.2.1.25.2.3.1.6' # usedsize - ] - - # OIDS for discovering memory usage. - # - MEMORY = { - swap_total: '1.3.6.1.4.1.2021.4.3.0', - swap_avail: '1.3.6.1.4.1.2021.4.4.0', - mem_avail: '1.3.6.1.4.1.2021.4.6.0' - } - - # OIDS for discovering system load. - # - LOAD = { - five_min: '1.3.6.1.4.1.2021.10.1.3.2' - } - - # OIDS for discovering running processes. - # - PROCESS = { - list: '1.3.6.1.2.1.25.4.2.1.4', - args: '1.3.6.1.2.1.25.4.2.1.5' - } - + VERSION = '0.3.0' # Global defaults for instances of this monitor # DEFAULT_OPTIONS = { - timeout: 2, - retries: 1, - community: 'public', - port: 161, - storage_error_at: 95, # in percent full - storage_include: [], # if non-empty, only these paths are included in checks - storage_exclude: [], # paths to exclude from checks - load_error_at: 7, - swap_error_at: 25, # in percent remaining - mem_error_at: 51200, # in kilobytes - processes: [] # list of procs to match + timeout: 2, + retries: 1, + community: 'public', + port: 161 } - ### This monitor is complex enough to require creating an instance from the caller. - ### Provide a friendlier error message the class was provided to exec() directly. - ### - def self::run( nodes ) - self.log.error "Please use %s via an instance." % [ self.name ] - return {} - end - - - ### Create a new instance of this monitor. - ### - def initialize( options=DEFAULT_OPTIONS ) - options = DEFAULT_OPTIONS.merge( options || {} ) - %i[ storage_include storage_exclude processes ].each do |opt| - options[ opt ] = Array( options[opt] ) - end - - options.each do |name, value| - self.public_send( "#{name.to_s}=", value ) - end - end - - - # The mode (section) that this SMMP instance should check. - # Must be a +VALID_MODES+ mode. - attr_reader :mode - - # Mapping of node addresses back to the node identifier. - attr_reader :identifiers - - # The results from the SNMP daemons, keyed by address. - attr_reader :results - - # A timeout in seconds if the SNMP server isn't responding. - attr_accessor :timeout - - # Retry with the timeout this many times. Defaults to 1. - attr_accessor :retries - - # The SNMP UDP port, if running on non default. - attr_accessor :port - - # The community string to connect with. - attr_accessor :community - - # Set an error if mount points are above this percentage. - attr_accessor :storage_error_at - - # Only check these specific mount points. - attr_accessor :storage_include - - # Exclude these mount points (array of paths) from checks. - attr_accessor :storage_exclude - - # Set an error if the 5 minute load average exceeds this. - attr_accessor :load_error_at - - # Set an error if used swap exceeds this percentage. - attr_accessor :swap_error_at - - # Set an error if memory used is below this many kilobytes. - attr_accessor :mem_error_at - - # Set an error if processes in this array aren't running. - attr_accessor :processes - - - ### Set the SNMP mode, after validation. - ### - def mode=( mode ) - unless VALID_MODES.include?( mode.to_sym ) - self.log.error "Unknown SNMP mode: %s" % [ mode ] - return nil - end - - @mode = mode.to_sym - @results = {} - end - - - ### Perform the monitoring checks. + ### Connect to the SNMP daemon and yield. ### def run( nodes ) self.log.debug "Got nodes to SNMP check: %p" % [ nodes ] - - # Sanity check. - # - unless self.mode - self.log.error "You must set the 'mode' for the SNMP monitor. (%s)" % [ VALID_MODES.join( ', ' ) ] - return {} - end + opts = Arborist::Monitor::SNMP::DEFAULT_OPTIONS # Create mapping of addresses back to node identifiers, - # and retain any custom configs per node. + # and retain any custom (overrides) config per node. # @identifiers = {} + @results = {} + nodes.each_pair do |(identifier, props)| next unless props.key?( 'addresses' ) address = props[ 'addresses' ].first - self.identifiers[ address ] = [ identifier, props['config'] ] + @identifiers[ address ] = [ identifier, props['config'] ] end # Perform the work! # threads = [] - self.identifiers.keys.each do |host| + @identifiers.keys.each do |host| thr = Thread.new do Thread.current.abort_on_exception = true - config = self.identifiers[host].last || {} + config = @identifiers[host].last || {} opts = { host: host, - port: config[ 'port' ] || self.port, - community: config[ 'community' ] || self.community, - timeout: config[ 'timeout' ] || self.timeout, - retries: config[ 'retries' ] || self.retries + port: config[ 'port' ] || opts[ :port ], + community: config[ 'community' ] || opts[ :community ], + timeout: config[ 'timeout' ] || opts[ :timeout ], + retries: config[ 'retries' ] || opts[ :retries ] } begin SNMP::Manager.open( opts ) do |snmp| - case self.mode - when :disk - self.gather_disks( snmp, host ) - when :load - self.gather_load( snmp, host ) - when :memory - self.gather_free_memory( snmp, host ) - when :swap - self.gather_swap( snmp, host ) - when :process - self.gather_processlist( snmp, host ) - end + yield( snmp, host ) end rescue SNMP::RequestTimeout - self.results[ host ] = { + @results[ host ] = { error: "Host is not responding to SNMP requests." } rescue StandardError => err - self.results[ host ] = { + @results[ host ] = { error: "Network is not accessible. (%s: %s)" % [ err.class.name, err.message ] } end @@ -247,184 +91,23 @@ # Map everything back to identifier -> attribute(s), and send to the manager. # - reply = self.results.each_with_object({}) do |(address, results), hash| - identifier = self.identifiers[ address ] or next + reply = @results.each_with_object({}) do |(address, results), hash| + identifier = @identifiers[ address ] or next hash[ identifier.first ] = results end self.log.debug "Sending to manager: %p" % [ reply ] return reply - end - - ######### - protected - ######### - - ### Collect the load information for +host+ from an existing - ### (and open) +snmp+ connection. - ### - def gather_load( snmp, host ) - self.log.debug "Getting system load for: %s" % [ host ] - load5 = snmp.get( SNMP::ObjectId.new( LOAD[:five_min] ) ).varbind_list.first.value.to_f - self.log.debug " Load on %s: %0.2f" % [ host, load5 ] - - config = self.identifiers[ host ].last || {} - error_at = config[ 'load_error_at' ] || self.load_error_at - if load5 >= error_at - self.results[ host ] = { - error: "Load has exceeded %0.2f over a 5 minute average" % [ error_at ], - load5: load5 - } - else - self.results[ host ] = { load5: load5 } - end - end - - - ### Collect available memory information for +host+ from an existing - ### (and open) +snmp+ connection. - ### - def gather_free_memory( snmp, host ) - self.log.debug "Getting available memory for: %s" % [ host ] - mem_avail = snmp.get( SNMP::ObjectId.new( MEMORY[:mem_avail] ) ).varbind_list.first.value.to_f - self.log.debug " Available memory on %s: %0.2f" % [ host, mem_avail ] - - config = self.identifiers[ host ].last || {} - error_at = config['mem_error_at'] || self.mem_error_at - if mem_avail <= error_at - self.results[ host ] = { - error: "Available memory is under %0.1fMB" % [ error_at.to_f / 1024 ], - available_memory: mem_avail - } - else - self.results[ host ] = { available_memory: mem_avail } - end - end - - - ### Collect used swap information for +host+ from an existing (and - ### open) +snmp+ connection. - ### - def gather_swap( snmp, host ) - self.log.debug "Getting used swap for: %s" % [ host ] - - swap_total = snmp.get( SNMP::ObjectId.new(MEMORY[:swap_total]) ).varbind_list.first.value.to_f - swap_avail = snmp.get( SNMP::ObjectId.new(MEMORY[:swap_avail]) ).varbind_list.first.value.to_f - swap_used = ( "%0.2f" % ((swap_avail / swap_total.to_f * 100 ) - 100).abs ).to_f - self.log.debug " Swap in use on %s: %0.2f" % [ host, swap_used ] - - config = self.identifiers[ host ].last || {} - if swap_used >= ( config['swap_error_at'] || self.swap_error_at ) - self.results[ host ] = { - error: "%0.2f%% swap in use" % [ swap_used ], - swap_used: swap_used - } - else - self.results[ host ] = { swap_used: swap_used } - end + ensure + @identifiers = {} + @results = {} end - - ### Collect mount point usage for +host+ from an existing (and open) - #### +snmp+ connection. - ### - def gather_disks( snmp, host ) - self.log.debug "Getting disk information for %s" % [ host ] - errors = [] - results = {} - mounts = self.get_disk_percentages( snmp ) - config = self.identifiers[ host ].last || {} - - includes = config[ 'storage_include' ] || self.storage_include - excludes = config[ 'storage_exclude' ] || self.storage_exclude - - mounts.each_pair do |path, percentage| - next if excludes.include?( path ) - next if ! includes.empty? && ! includes.include?( path ) - if percentage >= ( config[ 'storage_error_at' ] || self.storage_error_at ) - errors << "%s at %d%% capacity" % [ path, percentage ] - end - end - - results[ :mounts ] = mounts - results[ :error ] = errors.join( ', ' ) unless errors.empty? - - self.results[ host ] = results - end - - - ### Collect running processes on +host+ from an existing (and open) - #### +snmp+ connection. - ### - def gather_processlist( snmp, host ) - self.log.debug "Getting running process list for %s" % [ host ] - config = self.identifiers[ host ].last || {} - procs = [] - errors = [] - - snmp.walk([ PROCESS[:list], PROCESS[:args] ]) do |list| - process = list[0].value.to_s - args = list[1].value.to_s - procs << "%s %s " % [ process, args ] - end +end # Arborist::Monitor::SNMP - # Check against the running stuff, setting an error if - # one isn't found. - # - Array( config['processes'] || self.processes ).each do |process| - process_r = Regexp.new( process ) - found = procs.find{|p| p.match(process_r) } - errors << "Process '%s' is not running" % [ process, host ] unless found - end - - self.log.debug " %d running processes" % [ procs.length ] - if errors.empty? - self.results[ host ] = {} - else - self.results[ host ] = { error: errors.join( ', ' ) } - end - end - - - ### Given a SNMP object, return a hash of: - ### - ### device path => percentage full - ### - def get_disk_percentages( snmp ) - - # Does this look like a windows system, or a net-snmp based one? - system_type = snmp.get( SNMP::ObjectId.new( IDENTIFICATION_OID ) ).varbind_list.first.value - disks = {} +require 'arborist/monitor/snmp/disk' +require 'arborist/monitor/snmp/load' +require 'arborist/monitor/snmp/memory' +require 'arborist/monitor/snmp/process' +require 'arborist/monitor/snmp/swap' - # Windows has it's own MIBs. - # - if system_type =~ /windows/i - snmp.walk( STORAGE_WINDOWS ) do |list| - next unless list[0].value.to_s == WINDOWS_DEVICE - disks[ list[1].value.to_s ] = ( list[3].value.to_f / list[2].value.to_f ) * 100 - end - return disks - end - - # Everything else. - # - snmp.walk( STORAGE_NET_SNMP ) do |list| - mount = list[0].value.to_s - next if mount == 'noSuchInstance' - - next if list[2].value.to_s == 'noSuchInstance' - used = list[2].value.to_i - - unless list[1].value.to_s == 'noSuchInstance' - typeoid = list[1].value.join('.').to_s - next if typeoid =~ STORAGE_IGNORE - end - next if mount =~ /\/(?:dev|proc)$/ - - disks[ mount ] = used - end - - return disks - end -end # class Arborist::Monitor::SNMP - diff -r d46ca2b52efe -r e6eb11b1e00d lib/arborist/monitor/snmp/disk.rb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/arborist/monitor/snmp/disk.rb Wed Sep 07 15:23:42 2016 -0700 @@ -0,0 +1,167 @@ +# -*- ruby -*- +# vim: set noet nosta sw=4 ts=4 : + +require 'arborist/monitor/snmp' unless defined?( Arborist::Monitor::SNMP ) + +# SNMP Disk capacity checks. +# Returns all mounts with their current usage percentage in a "mount" attribute. +# +class Arborist::Monitor::SNMP::Disk + include Arborist::Monitor::SNMP + + extend Loggability + log_to :arborist + + # The OID that returns the system environment. + IDENTIFICATION_OID = '1.3.6.1.2.1.1.1.0' + + # For net-snmp systems, ignore mount types that match + # this regular expression. This includes null/union mounts + # and NFS, currently. + STORAGE_IGNORE = %r{25.3.9.(?:2|14)$} + + # The OID that matches a local windows hard disk. Anything else + # is a remote (SMB) mount. + WINDOWS_DEVICE = '1.3.6.1.2.1.25.2.1.4' + + # OIDS required to pull disk information from net-snmp. + # + STORAGE_NET_SNMP = [ + '1.3.6.1.4.1.2021.9.1.2', # paths + '1.3.6.1.2.1.25.3.8.1.4', # types + '1.3.6.1.4.1.2021.9.1.9' # percents + ] + + # OIDS required to pull disk information from Windows. + # + STORAGE_WINDOWS = [ + '1.3.6.1.2.1.25.2.3.1.2', # types + '1.3.6.1.2.1.25.2.3.1.3', # paths + '1.3.6.1.2.1.25.2.3.1.5', # totalsize + '1.3.6.1.2.1.25.2.3.1.6' # usedsize + ] + + # Global defaults for instances of this monitor + # + DEFAULT_OPTIONS = { + error_at: 95, # in percent full + include: [], # if non-empty, only these paths are included in checks + exclude: [] # paths to exclude from checks + } + + + ### This monitor is complex enough to require creating an instance from the caller. + ### Provide a friendlier error message the class was provided to exec() directly. + ### + def self::run( nodes ) + return new.run( nodes ) + end + + + ### Create a new instance of this monitor. + ### + def initialize( options=DEFAULT_OPTIONS ) + options = DEFAULT_OPTIONS.merge( options || {} ) + %i[ include exclude ].each do |opt| + options[ opt ] = Array( options[opt] ) + end + + options.each do |name, value| + self.public_send( "#{name.to_s}=", value ) + end + end + + # Set an error if mount points are above this percentage. + attr_accessor :error_at + + # Only check these specific mount points. + attr_accessor :include + + # Exclude these mount points (array of paths) from checks. + attr_accessor :exclude + + + ### Perform the monitoring checks. + ### + def run( nodes ) + super do |snmp, host| + self.gather_disks( snmp, host ) + end + end + + + ######### + protected + ######### + + ### Collect mount point usage for +host+ from an existing (and open) + #### +snmp+ connection. + ### + def gather_disks( snmp, host ) + self.log.debug "Getting disk information for %s" % [ host ] + errors = [] + results = {} + mounts = self.get_disk_percentages( snmp ) + config = @identifiers[ host ].last || {} + + includes = config[ 'include' ] || self.include + excludes = config[ 'exclude' ] || self.exclude + + mounts.each_pair do |path, percentage| + next if excludes.include?( path ) + next if ! includes.empty? && ! includes.include?( path ) + if percentage >= ( config[ 'error_at' ] || self.error_at ) + errors << "%s at %d%% capacity" % [ path, percentage ] + end + end + + results[ :mounts ] = mounts + results[ :error ] = errors.join( ', ' ) unless errors.empty? + + @results[ host ] = results + end + + + ### Given a SNMP object, return a hash of: + ### + ### device path => percentage full + ### + def get_disk_percentages( snmp ) + + # Does this look like a windows system, or a net-snmp based one? + system_type = snmp.get( SNMP::ObjectId.new( IDENTIFICATION_OID ) ).varbind_list.first.value + disks = {} + + # Windows has it's own MIBs. + # + if system_type =~ /windows/i + snmp.walk( STORAGE_WINDOWS ) do |list| + next unless list[0].value.to_s == WINDOWS_DEVICE + disks[ list[1].value.to_s ] = ( list[3].value.to_f / list[2].value.to_f ) * 100 + end + return disks + end + + # Everything else. + # + snmp.walk( STORAGE_NET_SNMP ) do |list| + mount = list[0].value.to_s + next if mount == 'noSuchInstance' + + next if list[2].value.to_s == 'noSuchInstance' + used = list[2].value.to_i + + unless list[1].value.to_s == 'noSuchInstance' + typeoid = list[1].value.join('.').to_s + next if typeoid =~ STORAGE_IGNORE + end + next if mount =~ /\/(?:dev|proc)$/ + + disks[ mount ] = used + end + + return disks + end + +end # class Arborist::Monitor::SNMP::Disk + diff -r d46ca2b52efe -r e6eb11b1e00d lib/arborist/monitor/snmp/load.rb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/arborist/monitor/snmp/load.rb Wed Sep 07 15:23:42 2016 -0700 @@ -0,0 +1,82 @@ +# -*- ruby -*- +# vim: set noet nosta sw=4 ts=4 : + +require 'arborist/monitor/snmp' unless defined?( Arborist::Monitor::SNMP ) + +# SNMP 5 minute load checks. +# Sets current 5 minute load as a 'load5' attribute. +# +class Arborist::Monitor::SNMP::Load + include Arborist::Monitor::SNMP + + extend Loggability + log_to :arborist + + # OIDS for discovering system load. + # + LOAD = { + five_min: '1.3.6.1.4.1.2021.10.1.3.2' + } + + # Global defaults for instances of this monitor + # + DEFAULT_OPTIONS = { + error_at: 7 + } + + + ### Class #run creates a new instance. + ### + def self::run( nodes ) + return new.run( nodes ) + end + + + ### Create a new instance of this monitor. + ### + def initialize( options=DEFAULT_OPTIONS ) + options = DEFAULT_OPTIONS.merge( options || {} ) + options.each do |name, value| + self.public_send( "#{name.to_s}=", value ) + end + end + + # Set an error if mount points are above this percentage. + attr_accessor :error_at + + + ### Perform the monitoring checks. + ### + def run( nodes ) + super do |snmp, host| + self.gather_load( snmp, host ) + end + end + + + ######### + protected + ######### + + ### Collect the load information for +host+ from an existing + ### (and open) +snmp+ connection. + ### + def gather_load( snmp, host ) + self.log.debug "Getting system load for: %s" % [ host ] + load5 = snmp.get( SNMP::ObjectId.new( LOAD[:five_min] ) ).varbind_list.first.value.to_f + self.log.debug " Load on %s: %0.2f" % [ host, load5 ] + + config = @identifiers[ host ].last || {} + error_at = config[ 'error_at' ] || self.error_at + if load5 >= error_at + @results[ host ] = { + error: "Load has exceeded %0.2f over a 5 minute average" % [ error_at ], + load5: load5 + } + else + @results[ host ] = { load5: load5 } + end + end + +end # class Arborist::Monitor::SNMP::Load + diff -r d46ca2b52efe -r e6eb11b1e00d lib/arborist/monitor/snmp/memory.rb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/arborist/monitor/snmp/memory.rb Wed Sep 07 15:23:42 2016 -0700 @@ -0,0 +1,83 @@ +# -*- ruby -*- +# vim: set noet nosta sw=4 ts=4 : + +require 'arborist/monitor/snmp' unless defined?( Arborist::Monitor::SNMP ) + +# SNMP memory availability checks. +# Returns total available memory in Kb to the 'available_memory' attribute. +# +class Arborist::Monitor::SNMP::Memory + include Arborist::Monitor::SNMP + + extend Loggability + log_to :arborist + + # OIDS for discovering memory usage. + # + MEMORY = { + mem_avail: '1.3.6.1.4.1.2021.4.6.0' + } + + # Global defaults for instances of this monitor + # + DEFAULT_OPTIONS = { + error_at: 95, # in percent full + } + + + ### This monitor is complex enough to require creating an instance from the caller. + ### Provide a friendlier error message the class was provided to exec() directly. + ### + def self::run( nodes ) + return new.run( nodes ) + end + + + ### Create a new instance of this monitor. + ### + def initialize( options=DEFAULT_OPTIONS ) + options = DEFAULT_OPTIONS.merge( options || {} ) + options.each do |name, value| + self.public_send( "#{name.to_s}=", value ) + end + end + + # Set an error if memory used is below this many kilobytes. + attr_accessor :error_at + + + ### Perform the monitoring checks. + ### + def run( nodes ) + super do |snmp, host| + self.gather_free_memory( snmp, host ) + end + end + + + ######### + protected + ######### + + ### Collect available memory information for +host+ from an existing + ### (and open) +snmp+ connection. + ### + def gather_free_memory( snmp, host ) + self.log.debug "Getting available memory for: %s" % [ host ] + mem_avail = snmp.get( SNMP::ObjectId.new( MEMORY[:mem_avail] ) ).varbind_list.first.value.to_f + self.log.debug " Available memory on %s: %0.2f" % [ host, mem_avail ] + + config = @identifiers[ host ].last || {} + error_at = config['error_at'] || self.error_at + if mem_avail <= error_at + @results[ host ] = { + error: "Available memory is under %0.1fMB" % [ error_at.to_f / 1024 ], + available_memory: mem_avail + } + else + @results[ host ] = { available_memory: mem_avail } + end + end + +end # class Arborist::Monitor::SNMP::Memory + diff -r d46ca2b52efe -r e6eb11b1e00d lib/arborist/monitor/snmp/process.rb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/arborist/monitor/snmp/process.rb Wed Sep 07 15:23:42 2016 -0700 @@ -0,0 +1,99 @@ +# -*- ruby -*- +# vim: set noet nosta sw=4 ts=4 : + +require 'arborist/monitor/snmp' unless defined?( Arborist::Monitor::SNMP ) + +# SNMP running process checks. +# +class Arborist::Monitor::SNMP::Process + include Arborist::Monitor::SNMP + + extend Loggability + log_to :arborist + + # OIDS for discovering running processes. + # + PROCESS = { + list: '1.3.6.1.2.1.25.4.2.1.4', + args: '1.3.6.1.2.1.25.4.2.1.5' + } + + # Global defaults for instances of this monitor + # + DEFAULT_OPTIONS = { + processes: [] # list of procs to match + } + + + ### This monitor is complex enough to require creating an instance from the caller. + ### Provide a friendlier error message the class was provided to exec() directly. + ### + def self::run( nodes ) + return new.run( nodes ) + end + + + ### Create a new instance of this monitor. + ### + def initialize( options=DEFAULT_OPTIONS ) + options = DEFAULT_OPTIONS.merge( options || {} ) + %i[ processes ].each do |opt| + options[ opt ] = Array( options[opt] ) + end + + options.each do |name, value| + self.public_send( "#{name.to_s}=", value ) + end + end + + # Set an error if processes in this array aren't running. + attr_accessor :processes + + + ### Perform the monitoring checks. + ### + def run( nodes ) + super do |snmp, host| + self.gather_processlist( snmp, host ) + end + end + + + ######### + protected + ######### + + ### Collect running processes on +host+ from an existing (and open) + #### +snmp+ connection. + ### + def gather_processlist( snmp, host ) + self.log.debug "Getting running process list for %s" % [ host ] + config = @identifiers[ host ].last || {} + procs = [] + errors = [] + + snmp.walk([ PROCESS[:list], PROCESS[:args] ]) do |list| + process = list[0].value.to_s + args = list[1].value.to_s + procs << "%s %s " % [ process, args ] + end + + # Check against the running stuff, setting an error if + # one isn't found. + # + Array( config['processes'] || self.processes ).each do |process| + process_r = Regexp.new( process ) + found = procs.find{|p| p.match(process_r) } + errors << "Process '%s' is not running" % [ process, host ] unless found + end + + self.log.debug " %d running processes" % [ procs.length ] + if errors.empty? + @results[ host ] = {} + else + @results[ host ] = { error: errors.join( ', ' ) } + end + end + +end # class Arborist::Monitor::SNMP::Process + diff -r d46ca2b52efe -r e6eb11b1e00d lib/arborist/monitor/snmp/swap.rb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/arborist/monitor/snmp/swap.rb Wed Sep 07 15:23:42 2016 -0700 @@ -0,0 +1,86 @@ +# -*- ruby -*- +# vim: set noet nosta sw=4 ts=4 : + +require 'arborist/monitor/snmp' unless defined?( Arborist::Monitor::SNMP ) + +# SNMP swap usage checks. +# Returns swap used in a 'swap_in_use' attribute. +# +class Arborist::Monitor::SNMP::Swap + include Arborist::Monitor::SNMP + + extend Loggability + log_to :arborist + + # Global defaults for instances of this monitor + # + DEFAULT_OPTIONS = { + error_at: 95, # in percent full + } + + # OIDS for discovering memory usage. + # + MEMORY = { + swap_total: '1.3.6.1.4.1.2021.4.3.0', + swap_avail: '1.3.6.1.4.1.2021.4.4.0', + } + + + ### This monitor is complex enough to require creating an instance from the caller. + ### Provide a friendlier error message the class was provided to exec() directly. + ### + def self::run( nodes ) + return new.run( nodes ) + end + + + ### Create a new instance of this monitor. + ### + def initialize( options=DEFAULT_OPTIONS ) + options = DEFAULT_OPTIONS.merge( options || {} ) + options.each do |name, value| + self.public_send( "#{name.to_s}=", value ) + end + end + + # Set an error if used swap exceeds this percentage. + attr_accessor :error_at + + + ### Perform the monitoring checks. + ### + def run( nodes ) + super do |snmp, host| + self.gather_swap( snmp, host ) + end + end + + + ######### + protected + ######### + + ### Collect used swap information for +host+ from an existing (and + ### open) +snmp+ connection. + ### + def gather_swap( snmp, host ) + self.log.debug "Getting used swap for: %s" % [ host ] + + swap_total = snmp.get( SNMP::ObjectId.new(MEMORY[:swap_total]) ).varbind_list.first.value.to_f + swap_avail = snmp.get( SNMP::ObjectId.new(MEMORY[:swap_avail]) ).varbind_list.first.value.to_f + swap_in_use = (( swap_avail.to_f / swap_total * 100 ) - 100 ).abs + self.log.debug " Swap in use on %s: %0.1f%%" % [ host, swap_in_use ] + + config = @identifiers[ host ].last || {} + if swap_in_use >= ( config['error_at'] || self.error_at ) + @results[ host ] = { + error: "%0.1f%% swap in use" % [ swap_in_use ], + swap_in_use: swap_avail + } + else + @results[ host ] = { swap_in_use: swap_in_use } + end + end + +end # class Arborist::Monitor::SNMP::Swap +