1 # -*- ruby -*- |
1 # -*- ruby -*- |
2 # vim: set noet nosta sw=4 ts=4 : |
2 # vim: set noet nosta sw=4 ts=4 : |
3 |
3 |
4 require 'arborist/monitor/snmp' unless defined?( Arborist::Monitor::SNMP ) |
4 require 'arborist/monitor/snmp' unless defined?( Arborist::Monitor::SNMP ) |
5 |
5 |
6 # SNMP Disk capacity checks. |
6 # Disk capacity checks. |
7 # Returns all mounts with their current usage percentage in a "mount" attribute. |
7 # |
|
8 # Sets all configured mounts with their current usage percentage |
|
9 # in an attribute named "mounts". |
8 # |
10 # |
9 class Arborist::Monitor::SNMP::Disk |
11 class Arborist::Monitor::SNMP::Disk |
10 include Arborist::Monitor::SNMP |
12 include Arborist::Monitor::SNMP |
11 |
13 |
12 extend Loggability |
14 extend Configurability, Loggability |
13 log_to :arborist |
15 log_to :arborist_snmp |
14 |
|
15 # The OID that returns the system environment. |
|
16 IDENTIFICATION_OID = '1.3.6.1.2.1.1.1.0' |
|
17 |
|
18 # For net-snmp systems, ignore mount types that match |
|
19 # this regular expression. This includes null/union mounts |
|
20 # and NFS, currently. |
|
21 STORAGE_IGNORE = %r{25.3.9.(?:2|14)$} |
|
22 |
|
23 # The OID that matches a local windows hard disk. Anything else |
|
24 # is a remote (SMB) mount. |
|
25 WINDOWS_DEVICE = '1.3.6.1.2.1.25.2.1.4' |
|
26 |
16 |
27 # OIDS required to pull disk information from net-snmp. |
17 # OIDS required to pull disk information from net-snmp. |
28 # |
18 # |
29 STORAGE_NET_SNMP = [ |
19 STORAGE_NET_SNMP = { |
30 '1.3.6.1.4.1.2021.9.1.2', # paths |
20 path: '1.3.6.1.4.1.2021.9.1.2', |
31 '1.3.6.1.2.1.25.3.8.1.4', # types |
21 percent: '1.3.6.1.4.1.2021.9.1.9', |
32 '1.3.6.1.4.1.2021.9.1.9' # percents |
22 type: '1.3.6.1.2.1.25.3.8.1.4' |
|
23 } |
|
24 |
|
25 # The OID that matches a local windows hard disk. |
|
26 # |
|
27 WINDOWS_DEVICES = [ |
|
28 '1.3.6.1.2.1.25.2.1.4', # local disk |
|
29 '1.3.6.1.2.1.25.2.1.7' # removables, but we have to include them for iscsi mounts |
33 ] |
30 ] |
34 |
31 |
35 # OIDS required to pull disk information from Windows. |
32 # OIDS required to pull disk information from Windows. |
36 # |
33 # |
37 STORAGE_WINDOWS = [ |
34 STORAGE_WINDOWS = { |
38 '1.3.6.1.2.1.25.2.3.1.2', # types |
35 type: '1.3.6.1.2.1.25.2.3.1.2', |
39 '1.3.6.1.2.1.25.2.3.1.3', # paths |
36 path: '1.3.6.1.2.1.25.2.3.1.3', |
40 '1.3.6.1.2.1.25.2.3.1.5', # totalsize |
37 total: '1.3.6.1.2.1.25.2.3.1.5', |
41 '1.3.6.1.2.1.25.2.3.1.6' # usedsize |
38 used: '1.3.6.1.2.1.25.2.3.1.6' |
42 ] |
|
43 |
|
44 # Global defaults for instances of this monitor |
|
45 # |
|
46 DEFAULT_OPTIONS = { |
|
47 error_at: 95, # in percent full |
|
48 include: [], # if non-empty, only these paths are included in checks |
|
49 exclude: [] # paths to exclude from checks |
|
50 } |
39 } |
51 |
40 |
|
41 # The fallback warning capacity. |
|
42 WARN_AT = 90 |
52 |
43 |
53 ### This monitor is complex enough to require creating an instance from the caller. |
44 |
54 ### Provide a friendlier error message the class was provided to exec() directly. |
45 # Configurability API |
|
46 # |
|
47 configurability( 'arborist.snmp.disk' ) do |
|
48 # What percentage qualifies as a warning |
|
49 setting :warn_at, default: WARN_AT |
|
50 |
|
51 # If non-empty, only these paths are included in checks. |
|
52 # |
|
53 setting :include do |val| |
|
54 if val |
|
55 mounts = Array( val ).map{|m| Regexp.new(m) } |
|
56 Regexp.union( mounts ) |
|
57 end |
|
58 end |
|
59 |
|
60 # Paths to exclude from checks |
|
61 # |
|
62 setting :exclude, |
|
63 default: [ '^/dev(/.+)?$', '^/net(/.+)?$', '^/proc$', '^/run$', '^/sys/' ] do |val| |
|
64 mounts = Array( val ).map{|m| Regexp.new(m) } |
|
65 Regexp.union( mounts ) |
|
66 end |
|
67 end |
|
68 |
|
69 |
|
70 ### Return the properties used by this monitor. |
|
71 ### |
|
72 def self::node_properties |
|
73 return USED_PROPERTIES |
|
74 end |
|
75 |
|
76 |
|
77 ### Class #run creates a new instance and immediately runs it. |
55 ### |
78 ### |
56 def self::run( nodes ) |
79 def self::run( nodes ) |
57 return new.run( nodes ) |
80 return new.run( nodes ) |
58 end |
81 end |
59 |
82 |
60 |
83 |
61 ### Create a new instance of this monitor. |
|
62 ### |
|
63 def initialize( options=DEFAULT_OPTIONS ) |
|
64 options = DEFAULT_OPTIONS.merge( options || {} ) |
|
65 %i[ include exclude ].each do |opt| |
|
66 options[ opt ] = Array( options[opt] ) |
|
67 end |
|
68 |
|
69 options.each do |name, value| |
|
70 self.public_send( "#{name.to_s}=", value ) |
|
71 end |
|
72 end |
|
73 |
|
74 # Set an error if mount points are above this percentage. |
|
75 attr_accessor :error_at |
|
76 |
|
77 # Only check these specific mount points. |
|
78 attr_accessor :include |
|
79 |
|
80 # Exclude these mount points (array of paths) from checks. |
|
81 attr_accessor :exclude |
|
82 |
|
83 |
|
84 ### Perform the monitoring checks. |
84 ### Perform the monitoring checks. |
85 ### |
85 ### |
86 def run( nodes ) |
86 def run( nodes ) |
87 super do |snmp, host| |
87 super do |host, snmp| |
88 self.gather_disks( snmp, host ) |
88 self.gather_disks( host, snmp ) |
89 end |
89 end |
90 end |
90 end |
91 |
91 |
92 |
92 |
93 ######### |
93 ######### |
94 protected |
94 protected |
95 ######### |
95 ######### |
96 |
96 |
97 ### Collect mount point usage for +host+ from an existing (and open) |
97 ### Collect mount point usage for +host+ from an existing (and open) |
98 #### +snmp+ connection. |
98 ### +snmp+ connection. |
99 ### |
99 ### |
100 def gather_disks( snmp, host ) |
100 def gather_disks( host, snmp ) |
101 self.log.debug "Getting disk information for %s" % [ host ] |
101 mounts = self.system =~ /windows\s+/i ? self.windows_disks( snmp ) : self.unix_disks( snmp ) |
102 errors = [] |
102 config = self.identifiers[ host ].last || {} |
103 results = {} |
103 warn_at = config[ 'warn_at' ] || self.class.warn_at |
104 mounts = self.get_disk_percentages( snmp ) |
|
105 config = @identifiers[ host ].last || {} |
|
106 |
104 |
107 includes = config[ 'include' ] || self.include |
105 includes = self.format_mounts( config, 'include' ) || self.class.include |
108 excludes = config[ 'exclude' ] || self.exclude |
106 excludes = self.format_mounts( config, 'exclude' ) || self.class.exclude |
109 |
107 |
|
108 mounts.reject! do |path, percentage| |
|
109 excludes.match( path ) || ( includes && ! includes.match( path ) ) |
|
110 end |
|
111 |
|
112 errors = [] |
|
113 warnings = [] |
110 mounts.each_pair do |path, percentage| |
114 mounts.each_pair do |path, percentage| |
111 next if excludes.include?( path ) |
115 |
112 next if ! includes.empty? && ! includes.include?( path ) |
116 warn = begin |
113 if percentage >= ( config[ 'error_at' ] || self.error_at ) |
117 if warn_at.is_a?( Hash ) |
114 errors << "%s at %d%% capacity" % [ path, percentage ] |
118 warn_at[ path ] || WARN_AT |
|
119 else |
|
120 warn_at |
|
121 end |
|
122 end |
|
123 |
|
124 self.log.debug "%s:%s -> at %d, warn at %d" % [ host, path, percentage, warn ] |
|
125 |
|
126 if percentage >= warn.to_i |
|
127 if percentage >= 100 |
|
128 errors << "%s at %d%% capacity" % [ path, percentage ] |
|
129 else |
|
130 warnings << "%s at %d%% capacity" % [ path, percentage ] |
|
131 end |
115 end |
132 end |
116 end |
133 end |
117 |
134 |
118 results[ :mounts ] = mounts |
135 self.results[ host ] = { mounts: mounts } |
119 results[ :error ] = errors.join( ', ' ) unless errors.empty? |
136 self.results[ host ][ :error ] = errors.join(', ') unless errors.empty? |
120 |
137 self.results[ host ][ :warning ] = warnings.join(', ') unless warnings.empty? |
121 @results[ host ] = results |
|
122 end |
138 end |
123 |
139 |
124 |
140 |
125 ### Given a SNMP object, return a hash of: |
141 ### Return a single regexp for the 'include' or 'exclude' section of |
|
142 ### resource node's +config+, or nil if nonexistent. |
126 ### |
143 ### |
127 ### device path => percentage full |
144 def format_mounts( config, section ) |
|
145 list = config[ section ] || return |
|
146 mounts = Array( list ).map{|m| Regexp.new(m) } |
|
147 return Regexp.union( mounts ) |
|
148 end |
|
149 |
|
150 |
|
151 ### Fetch information for Windows systems. |
128 ### |
152 ### |
129 def get_disk_percentages( snmp ) |
153 def windows_disks( snmp ) |
|
154 raw = snmp.get_bulk([ |
|
155 STORAGE_WINDOWS[:path], |
|
156 STORAGE_WINDOWS[:type], |
|
157 STORAGE_WINDOWS[:total], |
|
158 STORAGE_WINDOWS[:used] |
|
159 ]).varbinds.map( &:value ) |
130 |
160 |
131 # Does this look like a windows system, or a net-snmp based one? |
|
132 system_type = snmp.get( SNMP::ObjectId.new( IDENTIFICATION_OID ) ).varbind_list.first.value |
|
133 disks = {} |
161 disks = {} |
134 |
162 raw.each_slice( 4 ) do |device| |
135 # Windows has it's own MIBs. |
163 next unless device[1].respond_to?( :oid ) && WINDOWS_DEVICES.include?( device[1].oid ) |
136 # |
164 next if device[2].zero? |
137 if system_type =~ /windows/i |
165 disks[ device[0] ] = (( device[3].to_f / device[2] ) * 100).round( 1 ) |
138 snmp.walk( STORAGE_WINDOWS ) do |list| |
|
139 next unless list[0].value.to_s == WINDOWS_DEVICE |
|
140 disks[ list[1].value.to_s ] = ( list[3].value.to_f / list[2].value.to_f ) * 100 |
|
141 end |
|
142 return disks |
|
143 end |
|
144 |
|
145 # Everything else. |
|
146 # |
|
147 snmp.walk( STORAGE_NET_SNMP ) do |list| |
|
148 mount = list[0].value.to_s |
|
149 next if mount == 'noSuchInstance' |
|
150 |
|
151 next if list[2].value.to_s == 'noSuchInstance' |
|
152 used = list[2].value.to_i |
|
153 |
|
154 unless list[1].value.to_s == 'noSuchInstance' |
|
155 typeoid = list[1].value.join('.').to_s |
|
156 next if typeoid =~ STORAGE_IGNORE |
|
157 end |
|
158 next if mount =~ /\/(?:dev|proc)$/ |
|
159 |
|
160 disks[ mount ] = used |
|
161 end |
166 end |
162 |
167 |
163 return disks |
168 return disks |
164 end |
169 end |
165 |
170 |
|
171 |
|
172 ### Fetch information for Unix/MacOS systems. |
|
173 ### |
|
174 def unix_disks( snmp ) |
|
175 raw = snmp.get_bulk([ |
|
176 STORAGE_NET_SNMP[:path], |
|
177 STORAGE_NET_SNMP[:percent] ]).varbinds.map( &:value ) |
|
178 |
|
179 return Hash[ *raw ] |
|
180 end |
|
181 |
166 end # class Arborist::Monitor::SNMP::Disk |
182 end # class Arborist::Monitor::SNMP::Disk |
167 |
183 |