lib/arborist/monitor/snmp/disk.rb
changeset 8 e0b7c95a154f
parent 4 e6eb11b1e00d
child 14 d5cb8bd33170
equal deleted inserted replaced
7:4548e58c8c66 8:e0b7c95a154f
     1 # -*- ruby -*-
     1 # -*- ruby -*-
     2 # vim: set noet nosta sw=4 ts=4 :
     2 # vim: set noet nosta sw=4 ts=4 :
     3 
     3 
     4 require 'arborist/monitor/snmp' unless defined?( Arborist::Monitor::SNMP )
     4 require 'arborist/monitor/snmp' unless defined?( Arborist::Monitor::SNMP )
     5 
     5 
     6 # SNMP Disk capacity checks.
     6 # Disk capacity checks.
     7 # Returns all mounts with their current usage percentage in a "mount" attribute.
     7 #
       
     8 # Sets all configured mounts with their current usage percentage
       
     9 # in an attribute named "mounts".
     8 #
    10 #
     9 class Arborist::Monitor::SNMP::Disk
    11 class Arborist::Monitor::SNMP::Disk
    10 	include Arborist::Monitor::SNMP
    12 	include Arborist::Monitor::SNMP
    11 
    13 
    12 	extend Loggability
    14 	extend Configurability, Loggability
    13 	log_to :arborist
    15 	log_to :arborist_snmp
    14 
       
    15 	# The OID that returns the system environment.
       
    16 	IDENTIFICATION_OID = '1.3.6.1.2.1.1.1.0'
       
    17 
       
    18 	# For net-snmp systems, ignore mount types that match
       
    19 	# this regular expression.  This includes null/union mounts
       
    20 	# and NFS, currently.
       
    21 	STORAGE_IGNORE = %r{25.3.9.(?:2|14)$}
       
    22 
       
    23 	# The OID that matches a local windows hard disk.  Anything else
       
    24 	# is a remote (SMB) mount.
       
    25 	WINDOWS_DEVICE = '1.3.6.1.2.1.25.2.1.4'
       
    26 
    16 
    27 	# OIDS required to pull disk information from net-snmp.
    17 	# OIDS required to pull disk information from net-snmp.
    28 	#
    18 	#
    29 	STORAGE_NET_SNMP = [
    19 	STORAGE_NET_SNMP = {
    30 		'1.3.6.1.4.1.2021.9.1.2', # paths
    20 		path: '1.3.6.1.4.1.2021.9.1.2',
    31 		'1.3.6.1.2.1.25.3.8.1.4', # types
    21 		percent: '1.3.6.1.4.1.2021.9.1.9',
    32 		'1.3.6.1.4.1.2021.9.1.9'  # percents
    22 		type: '1.3.6.1.2.1.25.3.8.1.4'
       
    23 	}
       
    24 
       
    25 	# The OID that matches a local windows hard disk.
       
    26 	#
       
    27 	WINDOWS_DEVICES = [
       
    28 		'1.3.6.1.2.1.25.2.1.4', # local disk
       
    29 		'1.3.6.1.2.1.25.2.1.7'  # removables, but we have to include them for iscsi mounts
    33 	]
    30 	]
    34 
    31 
    35 	# OIDS required to pull disk information from Windows.
    32 	# OIDS required to pull disk information from Windows.
    36 	#
    33 	#
    37 	STORAGE_WINDOWS = [
    34 	STORAGE_WINDOWS = {
    38 		'1.3.6.1.2.1.25.2.3.1.2', # types
    35 		type: '1.3.6.1.2.1.25.2.3.1.2',
    39 		'1.3.6.1.2.1.25.2.3.1.3', # paths
    36 		path: '1.3.6.1.2.1.25.2.3.1.3',
    40 		'1.3.6.1.2.1.25.2.3.1.5', # totalsize
    37 		total: '1.3.6.1.2.1.25.2.3.1.5',
    41 		'1.3.6.1.2.1.25.2.3.1.6'  # usedsize
    38 		used: '1.3.6.1.2.1.25.2.3.1.6'
    42 	]
       
    43 
       
    44 	# Global defaults for instances of this monitor
       
    45 	#
       
    46 	DEFAULT_OPTIONS = {
       
    47 		error_at: 95, # in percent full
       
    48 		include:  [], # if non-empty, only these paths are included in checks
       
    49 		exclude:  []  # paths to exclude from checks
       
    50 	}
    39 	}
    51 
    40 
       
    41 	# The fallback warning capacity.
       
    42 	WARN_AT = 90
    52 
    43 
    53 	### This monitor is complex enough to require creating an instance from the caller.
    44 
    54 	### Provide a friendlier error message the class was provided to exec() directly.
    45 	# Configurability API
       
    46 	#
       
    47 	configurability( 'arborist.snmp.disk' ) do
       
    48 		# What percentage qualifies as a warning
       
    49 		setting :warn_at, default: WARN_AT
       
    50 
       
    51 		# If non-empty, only these paths are included in checks.
       
    52 		#
       
    53 		setting :include do |val|
       
    54 			if val
       
    55 				mounts = Array( val ).map{|m| Regexp.new(m) }
       
    56 				Regexp.union( mounts )
       
    57 			end
       
    58 		end
       
    59 
       
    60 		# Paths to exclude from checks
       
    61 		#
       
    62 		setting :exclude,
       
    63 			default: [ '^/dev(/.+)?$', '^/net(/.+)?$', '^/proc$', '^/run$', '^/sys/' ] do |val|
       
    64 			mounts = Array( val ).map{|m| Regexp.new(m) }
       
    65 			Regexp.union( mounts )
       
    66 		end
       
    67 	end
       
    68 
       
    69 
       
    70 	### Return the properties used by this monitor.
       
    71 	###
       
    72 	def self::node_properties
       
    73 		return USED_PROPERTIES
       
    74 	end
       
    75 
       
    76 
       
    77 	### Class #run creates a new instance and immediately runs it.
    55 	###
    78 	###
    56 	def self::run( nodes )
    79 	def self::run( nodes )
    57 		return new.run( nodes )
    80 		return new.run( nodes )
    58 	end
    81 	end
    59 
    82 
    60 
    83 
    61 	### Create a new instance of this monitor.
       
    62 	###
       
    63 	def initialize( options=DEFAULT_OPTIONS )
       
    64 		options = DEFAULT_OPTIONS.merge( options || {} )
       
    65 		%i[ include exclude ].each do |opt|
       
    66 			options[ opt ] = Array( options[opt] )
       
    67 		end
       
    68 
       
    69 		options.each do |name, value|
       
    70 			self.public_send( "#{name.to_s}=", value )
       
    71 		end
       
    72 	end
       
    73 
       
    74 	# Set an error if mount points are above this percentage.
       
    75 	attr_accessor :error_at
       
    76 
       
    77 	# Only check these specific mount points.
       
    78 	attr_accessor :include
       
    79 
       
    80 	# Exclude these mount points (array of paths) from checks.
       
    81 	attr_accessor :exclude
       
    82 
       
    83 
       
    84 	### Perform the monitoring checks.
    84 	### Perform the monitoring checks.
    85 	###
    85 	###
    86 	def run( nodes )
    86 	def run( nodes )
    87 		super do |snmp, host|
    87 		super do |host, snmp|
    88 			self.gather_disks( snmp, host )
    88 			self.gather_disks( host, snmp )
    89 		end
    89 		end
    90 	end
    90 	end
    91 
    91 
    92 
    92 
    93 	#########
    93 	#########
    94 	protected
    94 	protected
    95 	#########
    95 	#########
    96 
    96 
    97 	### Collect mount point usage for +host+ from an existing (and open)
    97 	### Collect mount point usage for +host+ from an existing (and open)
    98 	#### +snmp+ connection.
    98 	### +snmp+ connection.
    99 	###
    99 	###
   100 	def gather_disks( snmp, host )
   100 	def gather_disks( host, snmp )
   101 		self.log.debug "Getting disk information for %s" % [ host ]
   101 		mounts  =  self.system =~ /windows\s+/i ? self.windows_disks( snmp ) : self.unix_disks( snmp )
   102 		errors  = []
   102 		config  = self.identifiers[ host ].last || {}
   103 		results = {}
   103 		warn_at = config[ 'warn_at' ] || self.class.warn_at
   104 		mounts  = self.get_disk_percentages( snmp )
       
   105 		config  = @identifiers[ host ].last || {}
       
   106 
   104 
   107 		includes = config[ 'include' ] || self.include
   105 		includes = self.format_mounts( config, 'include' ) || self.class.include
   108 		excludes = config[ 'exclude' ] || self.exclude
   106 		excludes = self.format_mounts( config, 'exclude' ) || self.class.exclude
   109 
   107 
       
   108 		mounts.reject! do |path, percentage|
       
   109 			excludes.match( path ) || ( includes && ! includes.match( path ) )
       
   110 		end
       
   111 
       
   112 		errors   = []
       
   113 		warnings = []
   110 		mounts.each_pair do |path, percentage|
   114 		mounts.each_pair do |path, percentage|
   111 			next if excludes.include?( path )
   115 
   112 			next if ! includes.empty? && ! includes.include?( path )
   116 			warn = begin
   113 			if percentage >= ( config[ 'error_at' ] || self.error_at )
   117 			  if warn_at.is_a?( Hash )
   114 				errors << "%s at %d%% capacity" % [ path, percentage ]
   118 				  warn_at[ path ] || WARN_AT
       
   119 			  else
       
   120 				  warn_at
       
   121 			  end
       
   122 			end
       
   123 
       
   124 			self.log.debug "%s:%s -> at %d, warn at %d" % [ host, path, percentage, warn ]
       
   125 
       
   126 			if percentage >= warn.to_i
       
   127 				if percentage >= 100
       
   128 					errors << "%s at %d%% capacity" % [ path, percentage ]
       
   129 				else
       
   130 					warnings << "%s at %d%% capacity" % [ path, percentage ]
       
   131 				end
   115 			end
   132 			end
   116 		end
   133 		end
   117 
   134 
   118 		results[ :mounts ] = mounts
   135 		self.results[ host ] = { mounts: mounts }
   119 		results[ :error ] = errors.join( ', ' ) unless errors.empty?
   136 		self.results[ host ][ :error ]   = errors.join(', ')   unless errors.empty?
   120 
   137 		self.results[ host ][ :warning ] = warnings.join(', ') unless warnings.empty?
   121 		@results[ host ] = results
       
   122 	end
   138 	end
   123 
   139 
   124 
   140 
   125 	### Given a SNMP object, return a hash of:
   141 	### Return a single regexp for the 'include' or 'exclude' section of
       
   142 	### resource node's +config+, or nil if nonexistent.
   126 	###
   143 	###
   127 	###    device path => percentage full
   144 	def format_mounts( config, section )
       
   145 		list = config[ section ] || return
       
   146 		mounts = Array( list ).map{|m| Regexp.new(m) }
       
   147 		return Regexp.union( mounts )
       
   148 	end
       
   149 
       
   150 
       
   151 	### Fetch information for Windows systems.
   128 	###
   152 	###
   129 	def get_disk_percentages( snmp )
   153 	def windows_disks( snmp )
       
   154 		raw = snmp.get_bulk([
       
   155 			STORAGE_WINDOWS[:path],
       
   156 			STORAGE_WINDOWS[:type],
       
   157 			STORAGE_WINDOWS[:total],
       
   158 			STORAGE_WINDOWS[:used]
       
   159 		]).varbinds.map( &:value )
   130 
   160 
   131 		# Does this look like a windows system, or a net-snmp based one?
       
   132 		system_type = snmp.get( SNMP::ObjectId.new( IDENTIFICATION_OID ) ).varbind_list.first.value
       
   133 		disks = {}
   161 		disks = {}
   134 
   162 		raw.each_slice( 4 ) do |device|
   135 		# Windows has it's own MIBs.
   163 			next unless device[1].respond_to?( :oid ) && WINDOWS_DEVICES.include?( device[1].oid )
   136 		#
   164 			next if device[2].zero?
   137 		if system_type =~ /windows/i
   165 			disks[ device[0] ] = (( device[3].to_f / device[2] ) * 100).round( 1 )
   138 			snmp.walk( STORAGE_WINDOWS ) do |list|
       
   139 				next unless list[0].value.to_s == WINDOWS_DEVICE
       
   140 				disks[ list[1].value.to_s ] = ( list[3].value.to_f / list[2].value.to_f ) * 100
       
   141 			end
       
   142 			return disks
       
   143 		end
       
   144 
       
   145 		# Everything else.
       
   146 		#
       
   147 		snmp.walk( STORAGE_NET_SNMP ) do |list|
       
   148 			mount = list[0].value.to_s
       
   149 			next if mount == 'noSuchInstance'
       
   150 
       
   151 			next if list[2].value.to_s == 'noSuchInstance'
       
   152 			used = list[2].value.to_i
       
   153 
       
   154 			unless list[1].value.to_s == 'noSuchInstance'
       
   155 				typeoid = list[1].value.join('.').to_s
       
   156 				next if typeoid =~ STORAGE_IGNORE
       
   157 			end
       
   158 			next if mount =~ /\/(?:dev|proc)$/
       
   159 
       
   160 			disks[ mount ] = used
       
   161 		end
   166 		end
   162 
   167 
   163 		return disks
   168 		return disks
   164 	end
   169 	end
   165 
   170 
       
   171 
       
   172 	### Fetch information for Unix/MacOS systems.
       
   173 	###
       
   174 	def unix_disks( snmp )
       
   175 		raw = snmp.get_bulk([
       
   176 			STORAGE_NET_SNMP[:path],
       
   177 			STORAGE_NET_SNMP[:percent] ]).varbinds.map( &:value )
       
   178 
       
   179 		return Hash[ *raw ]
       
   180 	end
       
   181 
   166 end # class Arborist::Monitor::SNMP::Disk
   182 end # class Arborist::Monitor::SNMP::Disk
   167 
   183