diff -r 4548e58c8c66 -r e0b7c95a154f lib/arborist/monitor/snmp/disk.rb --- a/lib/arborist/monitor/snmp/disk.rb Wed Aug 30 13:55:02 2017 -0700 +++ b/lib/arborist/monitor/snmp/disk.rb Wed Apr 04 11:00:35 2018 -0700 @@ -3,89 +3,89 @@ require 'arborist/monitor/snmp' unless defined?( Arborist::Monitor::SNMP ) -# SNMP Disk capacity checks. -# Returns all mounts with their current usage percentage in a "mount" attribute. +# Disk capacity checks. +# +# Sets all configured mounts with their current usage percentage +# in an attribute named "mounts". # class Arborist::Monitor::SNMP::Disk include Arborist::Monitor::SNMP - extend Loggability - log_to :arborist - - # The OID that returns the system environment. - IDENTIFICATION_OID = '1.3.6.1.2.1.1.1.0' - - # For net-snmp systems, ignore mount types that match - # this regular expression. This includes null/union mounts - # and NFS, currently. - STORAGE_IGNORE = %r{25.3.9.(?:2|14)$} - - # The OID that matches a local windows hard disk. Anything else - # is a remote (SMB) mount. - WINDOWS_DEVICE = '1.3.6.1.2.1.25.2.1.4' + extend Configurability, Loggability + log_to :arborist_snmp # OIDS required to pull disk information from net-snmp. # - STORAGE_NET_SNMP = [ - '1.3.6.1.4.1.2021.9.1.2', # paths - '1.3.6.1.2.1.25.3.8.1.4', # types - '1.3.6.1.4.1.2021.9.1.9' # percents + STORAGE_NET_SNMP = { + path: '1.3.6.1.4.1.2021.9.1.2', + percent: '1.3.6.1.4.1.2021.9.1.9', + type: '1.3.6.1.2.1.25.3.8.1.4' + } + + # The OID that matches a local windows hard disk. + # + WINDOWS_DEVICES = [ + '1.3.6.1.2.1.25.2.1.4', # local disk + '1.3.6.1.2.1.25.2.1.7' # removables, but we have to include them for iscsi mounts ] # OIDS required to pull disk information from Windows. # - STORAGE_WINDOWS = [ - '1.3.6.1.2.1.25.2.3.1.2', # types - '1.3.6.1.2.1.25.2.3.1.3', # paths - '1.3.6.1.2.1.25.2.3.1.5', # totalsize - '1.3.6.1.2.1.25.2.3.1.6' # usedsize - ] - - # Global defaults for instances of this monitor - # - DEFAULT_OPTIONS = { - error_at: 95, # in percent full - include: [], # if non-empty, only these paths are included in checks - exclude: [] # paths to exclude from checks + STORAGE_WINDOWS = { + type: '1.3.6.1.2.1.25.2.3.1.2', + path: '1.3.6.1.2.1.25.2.3.1.3', + total: '1.3.6.1.2.1.25.2.3.1.5', + used: '1.3.6.1.2.1.25.2.3.1.6' } + # The fallback warning capacity. + WARN_AT = 90 - ### This monitor is complex enough to require creating an instance from the caller. - ### Provide a friendlier error message the class was provided to exec() directly. + + # Configurability API + # + configurability( 'arborist.snmp.disk' ) do + # What percentage qualifies as a warning + setting :warn_at, default: WARN_AT + + # If non-empty, only these paths are included in checks. + # + setting :include do |val| + if val + mounts = Array( val ).map{|m| Regexp.new(m) } + Regexp.union( mounts ) + end + end + + # Paths to exclude from checks + # + setting :exclude, + default: [ '^/dev(/.+)?$', '^/net(/.+)?$', '^/proc$', '^/run$', '^/sys/' ] do |val| + mounts = Array( val ).map{|m| Regexp.new(m) } + Regexp.union( mounts ) + end + end + + + ### Return the properties used by this monitor. + ### + def self::node_properties + return USED_PROPERTIES + end + + + ### Class #run creates a new instance and immediately runs it. ### def self::run( nodes ) return new.run( nodes ) end - ### Create a new instance of this monitor. - ### - def initialize( options=DEFAULT_OPTIONS ) - options = DEFAULT_OPTIONS.merge( options || {} ) - %i[ include exclude ].each do |opt| - options[ opt ] = Array( options[opt] ) - end - - options.each do |name, value| - self.public_send( "#{name.to_s}=", value ) - end - end - - # Set an error if mount points are above this percentage. - attr_accessor :error_at - - # Only check these specific mount points. - attr_accessor :include - - # Exclude these mount points (array of paths) from checks. - attr_accessor :exclude - - ### Perform the monitoring checks. ### def run( nodes ) - super do |snmp, host| - self.gather_disks( snmp, host ) + super do |host, snmp| + self.gather_disks( host, snmp ) end end @@ -95,73 +95,89 @@ ######### ### Collect mount point usage for +host+ from an existing (and open) - #### +snmp+ connection. + ### +snmp+ connection. ### - def gather_disks( snmp, host ) - self.log.debug "Getting disk information for %s" % [ host ] - errors = [] - results = {} - mounts = self.get_disk_percentages( snmp ) - config = @identifiers[ host ].last || {} + def gather_disks( host, snmp ) + mounts = self.system =~ /windows\s+/i ? self.windows_disks( snmp ) : self.unix_disks( snmp ) + config = self.identifiers[ host ].last || {} + warn_at = config[ 'warn_at' ] || self.class.warn_at + + includes = self.format_mounts( config, 'include' ) || self.class.include + excludes = self.format_mounts( config, 'exclude' ) || self.class.exclude + + mounts.reject! do |path, percentage| + excludes.match( path ) || ( includes && ! includes.match( path ) ) + end - includes = config[ 'include' ] || self.include - excludes = config[ 'exclude' ] || self.exclude - + errors = [] + warnings = [] mounts.each_pair do |path, percentage| - next if excludes.include?( path ) - next if ! includes.empty? && ! includes.include?( path ) - if percentage >= ( config[ 'error_at' ] || self.error_at ) - errors << "%s at %d%% capacity" % [ path, percentage ] + + warn = begin + if warn_at.is_a?( Hash ) + warn_at[ path ] || WARN_AT + else + warn_at + end + end + + self.log.debug "%s:%s -> at %d, warn at %d" % [ host, path, percentage, warn ] + + if percentage >= warn.to_i + if percentage >= 100 + errors << "%s at %d%% capacity" % [ path, percentage ] + else + warnings << "%s at %d%% capacity" % [ path, percentage ] + end end end - results[ :mounts ] = mounts - results[ :error ] = errors.join( ', ' ) unless errors.empty? - - @results[ host ] = results + self.results[ host ] = { mounts: mounts } + self.results[ host ][ :error ] = errors.join(', ') unless errors.empty? + self.results[ host ][ :warning ] = warnings.join(', ') unless warnings.empty? end - ### Given a SNMP object, return a hash of: - ### - ### device path => percentage full + ### Return a single regexp for the 'include' or 'exclude' section of + ### resource node's +config+, or nil if nonexistent. ### - def get_disk_percentages( snmp ) + def format_mounts( config, section ) + list = config[ section ] || return + mounts = Array( list ).map{|m| Regexp.new(m) } + return Regexp.union( mounts ) + end - # Does this look like a windows system, or a net-snmp based one? - system_type = snmp.get( SNMP::ObjectId.new( IDENTIFICATION_OID ) ).varbind_list.first.value - disks = {} - # Windows has it's own MIBs. - # - if system_type =~ /windows/i - snmp.walk( STORAGE_WINDOWS ) do |list| - next unless list[0].value.to_s == WINDOWS_DEVICE - disks[ list[1].value.to_s ] = ( list[3].value.to_f / list[2].value.to_f ) * 100 - end - return disks - end + ### Fetch information for Windows systems. + ### + def windows_disks( snmp ) + raw = snmp.get_bulk([ + STORAGE_WINDOWS[:path], + STORAGE_WINDOWS[:type], + STORAGE_WINDOWS[:total], + STORAGE_WINDOWS[:used] + ]).varbinds.map( &:value ) - # Everything else. - # - snmp.walk( STORAGE_NET_SNMP ) do |list| - mount = list[0].value.to_s - next if mount == 'noSuchInstance' - - next if list[2].value.to_s == 'noSuchInstance' - used = list[2].value.to_i - - unless list[1].value.to_s == 'noSuchInstance' - typeoid = list[1].value.join('.').to_s - next if typeoid =~ STORAGE_IGNORE - end - next if mount =~ /\/(?:dev|proc)$/ - - disks[ mount ] = used + disks = {} + raw.each_slice( 4 ) do |device| + next unless device[1].respond_to?( :oid ) && WINDOWS_DEVICES.include?( device[1].oid ) + next if device[2].zero? + disks[ device[0] ] = (( device[3].to_f / device[2] ) * 100).round( 1 ) end return disks end + + ### Fetch information for Unix/MacOS systems. + ### + def unix_disks( snmp ) + raw = snmp.get_bulk([ + STORAGE_NET_SNMP[:path], + STORAGE_NET_SNMP[:percent] ]).varbinds.map( &:value ) + + return Hash[ *raw ] + end + end # class Arborist::Monitor::SNMP::Disk