# HG changeset patch # User Mahlon E. Smith # Date 1523236309 25200 # Node ID d5cb8bd33170c94f779d25325dc76057e6d1ac7b # Parent 6723f3b07536beadd9002dcdeccede0b4bb640ae Although faster, once aiming a few thousand nodes at net-snmp2 it leaks memory like a sieve. Use 'netsnmp' instead. Additionally, fix a shared thread variable. diff -r 6723f3b07536 -r d5cb8bd33170 .gems --- a/.gems Wed Apr 04 13:29:56 2018 -0700 +++ b/.gems Sun Apr 08 18:11:49 2018 -0700 @@ -1,2 +1,2 @@ -net-snmp2 -v0.3.1 - +netsnmp -v0.1.4 +xorcist -v1.1.1 diff -r 6723f3b07536 -r d5cb8bd33170 README.md --- a/README.md Wed Apr 04 13:29:56 2018 -0700 +++ b/README.md Sun Apr 08 18:11:49 2018 -0700 @@ -32,7 +32,6 @@ ------------- * Ruby 2.3 or better - * Net-SNMP libraries Installation @@ -85,7 +84,7 @@ * **timeout**: How long to wait for an SNMP response, in seconds? * **retries**: If an error occurs during SNMP communication, try again this many times before giving up. * **community**: The SNMP community name for reading data. - * **version**: The SNMP protocol version. 1 and 2c are supported. + * **version**: The SNMP protocol version. v1, v2c, and v3 are supported. * **port**: The UDP port SNMP is listening on. * **batchsize**: How many hosts to gather SNMP data on simultaneously. diff -r 6723f3b07536 -r d5cb8bd33170 Rakefile --- a/Rakefile Wed Apr 04 13:29:56 2018 -0700 +++ b/Rakefile Sun Apr 08 18:11:49 2018 -0700 @@ -45,7 +45,8 @@ s.required_ruby_version = '>= 2' s.add_dependency 'arborist', "~> 0.1" - s.add_dependency 'net-snmp2', "~> 0.3" + s.add_dependency 'netsnmp', "~> 0.1" + s.add_dependency 'xorcist', "~> 1.1" end Gem::PackageTask.new( spec ) do |pkg| diff -r 6723f3b07536 -r d5cb8bd33170 lib/arborist/monitor/snmp.rb --- a/lib/arborist/monitor/snmp.rb Wed Apr 04 13:29:56 2018 -0700 +++ b/lib/arborist/monitor/snmp.rb Sun Apr 08 18:11:49 2018 -0700 @@ -3,7 +3,7 @@ #encoding: utf-8 require 'arborist/monitor' unless defined?( Arborist::Monitor ) -require 'net-snmp2' +require 'netsnmp' # SNMP checks for Arborist. Requires an SNMP agent to be installed # on target machine, and the various "pieces" enabled for your platform. @@ -38,13 +38,6 @@ setting :batchsize, default: 25 end - # Indicate to FFI that we're using threads. - Net::SNMP.thread_safe = true - - - # The system type, as advertised. - attr_reader :system - # The mapping of addresses back to node identifiers. attr_reader :identifiers @@ -86,7 +79,7 @@ thr = Thread.new do config = self.identifiers[ host ].last || {} opts = { - peername: host, + host: host, port: config[ 'port' ] || Arborist::Monitor::SNMP.port, version: config[ 'version' ] || Arborist::Monitor::SNMP.version, community: config[ 'community' ] || Arborist::Monitor::SNMP.community, @@ -94,22 +87,17 @@ retries: config[ 'retries' ] || Arborist::Monitor::SNMP.retries } - snmp = Net::SNMP::Session.open( opts ) begin - @system = snmp.get( IDENTIFICATION_OID ).varbinds.first.value - yield( host, snmp ) + NETSNMP::Client.new( opts ) do |snmp| + Thread.current[ :system ] = snmp.get( oid: IDENTIFICATION_OID ) + yield( host, snmp ) + end - rescue Net::SNMP::TimeoutError, Net::SNMP::Error => err - self.log.error "%s: %s %s" % [ host, err.message, snmp.error_message ] + rescue => err + self.log.error "%s: %s\n%s" % [ host, err.message, err.backtrace.join("\n ") ] self.results[ host ] = { - error: "%s" % [ snmp.error_message ] + error: "Exception (%s: %s)" % [ err.class.name, err.message ] } - rescue => err - self.results[ host ] = { - error: "Uncaught exception. (%s: %s)" % [ err.class.name, err.message ] - } - ensure - snmp.close end end @@ -135,6 +123,12 @@ @results = {} end + + ### Return the current SNMP connection system string. + def system + return Thread.current[ :system ] + end + end # Arborist::Monitor::SNMP require 'arborist/monitor/snmp/cpu' diff -r 6723f3b07536 -r d5cb8bd33170 lib/arborist/monitor/snmp/cpu.rb --- a/lib/arborist/monitor/snmp/cpu.rb Wed Apr 04 13:29:56 2018 -0700 +++ b/lib/arborist/monitor/snmp/cpu.rb Sun Apr 08 18:11:49 2018 -0700 @@ -66,18 +66,13 @@ protected ######### - ### Return system CPU data. - ### - def cpu( snmp ) - return snmp.walk( OIDS[:cpu] ) - end - - ### Find load data, add additional niceties for reporting. ### def format_load( snmp ) info = { cpu: {}, load: {} } - cpus = self.cpu( snmp ) + cpus = snmp.walk( oid: OIDS[:cpu] ).each_with_object( [] ) do |(_, value), acc| + acc << value + end info[ :cpu ][ :count ] = cpus.size @@ -91,20 +86,21 @@ # alert after X events" pragmas. # if self.system =~ /windows\s+/i - info[ :cpu ][ :usage ] = cpus.values.inject( :+ ).to_f / cpus.size + info[ :cpu ][ :usage ] = cpus.inject( :+ ).to_f / cpus.size info[ :message ] = "System is %0.1f%% in use." % [ info[ :cpu ][ :usage ] ] + # UCDavis stuff is better for alerting only after there has been # an extended load event. Use the 5 minute average to avoid # state changes on transient spikes. # else - snmp.walk( OIDS[:load] ).each_with_index do |(_, value), idx| + snmp.walk( oid: OIDS[:load] ).each_with_index do |(_, value), idx| next unless LOADKEYS[ idx + 1 ] info[ :load ][ LOADKEYS[idx + 1] ] = value.to_f end - percentage = (( ( info[:load][ :load5 ] / cpus.size ) - 1 ) * 100 ).round( 1 ) + percentage = (( ( info[:load][ :load5 ] / cpus.size) - 1 ) * 100 ).round( 1 ) if percentage < 0 info[ :message ] = "System is %0.1f%% idle." % [ percentage.abs ] diff -r 6723f3b07536 -r d5cb8bd33170 lib/arborist/monitor/snmp/disk.rb --- a/lib/arborist/monitor/snmp/disk.rb Wed Apr 04 13:29:56 2018 -0700 +++ b/lib/arborist/monitor/snmp/disk.rb Sun Apr 08 18:11:49 2018 -0700 @@ -106,6 +106,7 @@ excludes = self.format_mounts( config, 'exclude' ) || self.class.exclude mounts.reject! do |path, percentage| + path = path.to_s excludes.match( path ) || ( includes && ! includes.match( path ) ) end @@ -151,18 +152,32 @@ ### Fetch information for Windows systems. ### def windows_disks( snmp ) - raw = snmp.get_bulk([ + oids = [ STORAGE_WINDOWS[:path], STORAGE_WINDOWS[:type], STORAGE_WINDOWS[:total], STORAGE_WINDOWS[:used] - ]).varbinds.map( &:value ) + ] + + paths = snmp.walk( oid: oids[0] ).each_with_object( [] ) do |(_, value), acc| + acc << value + end + types = snmp.walk( oid: oids[1] ).each_with_object( [] ) do |(_, value), acc| + acc << WINDOWS_DEVICES.include?( value ) + end + totals = snmp.walk( oid: oids[2] ).each_with_object( [] ) do |(_, value), acc| + acc << value + end + used = snmp.walk( oid: oids[3] ).each_with_object( [] ) do |(_, value), acc| + acc << value + end disks = {} - raw.each_slice( 4 ) do |device| - next unless device[1].respond_to?( :oid ) && WINDOWS_DEVICES.include?( device[1].oid ) - next if device[2].zero? - disks[ device[0] ] = (( device[3].to_f / device[2] ) * 100).round( 1 ) + paths.each_with_index do |path, idx| + next if totals[ idx ].zero? + next unless types[ idx ] + disks[ path ] ||= {} + disks[ path ] = (( used[idx].to_f / totals[idx] ) * 100).round( 1 ) end return disks @@ -172,11 +187,16 @@ ### Fetch information for Unix/MacOS systems. ### def unix_disks( snmp ) - raw = snmp.get_bulk([ - STORAGE_NET_SNMP[:path], - STORAGE_NET_SNMP[:percent] ]).varbinds.map( &:value ) + oids = [ STORAGE_NET_SNMP[:path], STORAGE_NET_SNMP[:percent] ] + paths = snmp.walk( oid: oids.first ).each_with_object( [] ) do |(_, value), acc| + acc << value + end + capacities = snmp.walk( oid: oids.last ).each_with_object( [] ) do |(_, value), acc| + acc << value + end - return Hash[ *raw ] + pairs = paths.zip( capacities ) + return Hash[ *pairs.flatten ] end end # class Arborist::Monitor::SNMP::Disk diff -r 6723f3b07536 -r d5cb8bd33170 lib/arborist/monitor/snmp/memory.rb --- a/lib/arborist/monitor/snmp/memory.rb Wed Apr 04 13:29:56 2018 -0700 +++ b/lib/arborist/monitor/snmp/memory.rb Sun Apr 08 18:11:49 2018 -0700 @@ -131,7 +131,7 @@ info = { memory: {}, swap: {} } mem_idx, swap_idx = nil - snmp.walk( MEMORY[:windows][:label] ).each_with_index do |(_, val), i| + snmp.walk( oid: MEMORY[:windows][:label] ).each_with_index do |(_, val), i| mem_idx = i + 1 if val =~ /physical memory/i swap_idx = i + 1 if val =~ /virtual memory/i end @@ -148,8 +148,8 @@ ### def calc_memory( snmp, oids ) info = { usage: 0, available: 0 } - avail = snmp.get( oids[:avail] ).varbinds.first.value.to_f - total = snmp.get( oids[:total] ).varbinds.first.value.to_f + avail = snmp.get( oid: oids[:avail] ).to_f + total = snmp.get( oid: oids[:total] ).to_f used = total - avail return info if avail.zero? @@ -166,9 +166,9 @@ info = { usage: 0, available: 0 } return info unless idx - units = snmp.get( MEMORY[:windows][:units] + ".#{idx}" ).varbinds.first.value - total = snmp.get( MEMORY[:windows][:total] + ".#{idx}" ).varbinds.first.value.to_f * units - used = snmp.get( MEMORY[:windows][:used] + ".#{idx}" ).varbinds.first.value.to_f * units + units = snmp.get( oid: MEMORY[:windows][:units] + ".#{idx}" ) + total = snmp.get( oid: MEMORY[:windows][:total] + ".#{idx}" ).to_f * units + used = snmp.get( oid: MEMORY[:windows][:used] + ".#{idx}" ).to_f * units info[ :usage ] = (( used / total ) * 100 ).round( 2 ) info[ :available ] = (( total - used ) / 1024 / 1024 ).round( 2 ) diff -r 6723f3b07536 -r d5cb8bd33170 lib/arborist/monitor/snmp/process.rb --- a/lib/arborist/monitor/snmp/process.rb Wed Apr 04 13:29:56 2018 -0700 +++ b/lib/arborist/monitor/snmp/process.rb Sun Apr 08 18:11:49 2018 -0700 @@ -95,14 +95,24 @@ ### def get_windows( snmp ) oids = [ PROCESS[:windows][:path], PROCESS[:windows][:list], PROCESS[:windows][:args] ] - return snmp.walk( oids ).each_slice( 3 ). each_with_object( [] ) do |vals, acc| - path, process, args = vals[0][1], vals[1][1], vals[2][1] - next if path.empty? - process = "%s%s" % [ path, process ] - process << " %s" % [ args ] unless args.empty? - acc << process + paths = snmp.walk( oid: oids[0] ).each_with_object( [] ) do |(_, value), acc| + acc << value + end + procs = snmp.walk( oid: oids[1] ).each_with_object( [] ) do |(_, value), acc| + acc << value + end + args = snmp.walk( oid: oids[2] ).each_with_object( [] ) do |(_, value), acc| + acc << value end + + return paths.zip( procs, args ).collect do |(path, process, arg)| + next unless path && process + next if path.empty? + path << process unless process.empty? + path << " %s" % [ arg.to_s ] if arg && ! arg.empty? + path + end.compact end @@ -110,13 +120,19 @@ ### def get_procs( snmp ) oids = [ PROCESS[:netsnmp][:list], PROCESS[:netsnmp][:args] ] - return snmp.walk( oids ).each_slice( 2 ).each_with_object( [] ) do |vals, acc| - process, args = vals[0][1], vals[1][1] - next if process.empty? - process << " %s" % [ args ] unless args.empty? - acc << process + procs = snmp.walk( oid: oids.first ).each_with_object( [] ) do |(_, value), acc| + acc << value + end + args = snmp.walk( oid: oids.last ).each_with_object( [] ) do |(_, value), acc| + acc << value end + + return procs.zip( args ).collect do |(process, arg)| + next if process.empty? + process << " %s" % [ arg.to_s ] unless arg.empty? + process + end.compact end end # class Arborist::Monitor::SNMP::Process