Although faster, once aiming a few thousand nodes at net-snmp2 it leaks memory like a sieve. Use 'netsnmp' instead.
authorMahlon E. Smith <mahlon@martini.nu>
Sun, 08 Apr 2018 18:11:49 -0700
changeset 14 d5cb8bd33170
parent 13 6723f3b07536
child 15 d4776d62b193
Although faster, once aiming a few thousand nodes at net-snmp2 it leaks memory like a sieve. Use 'netsnmp' instead. Additionally, fix a shared thread variable.
.gems
README.md
Rakefile
lib/arborist/monitor/snmp.rb
lib/arborist/monitor/snmp/cpu.rb
lib/arborist/monitor/snmp/disk.rb
lib/arborist/monitor/snmp/memory.rb
lib/arborist/monitor/snmp/process.rb
--- a/.gems	Wed Apr 04 13:29:56 2018 -0700
+++ b/.gems	Sun Apr 08 18:11:49 2018 -0700
@@ -1,2 +1,2 @@
-net-snmp2 -v0.3.1
-
+netsnmp -v0.1.4
+xorcist -v1.1.1
--- a/README.md	Wed Apr 04 13:29:56 2018 -0700
+++ b/README.md	Sun Apr 08 18:11:49 2018 -0700
@@ -32,7 +32,6 @@
 -------------
 
   * Ruby 2.3 or better
-  * Net-SNMP libraries
 
 
 Installation
@@ -85,7 +84,7 @@
   * **timeout**: How long to wait for an SNMP response, in seconds?
   * **retries**: If an error occurs during SNMP communication, try again this many times before giving up.
   * **community**: The SNMP community name for reading data.
-  * **version**: The SNMP protocol version.  1 and 2c are supported.
+  * **version**: The SNMP protocol version.  v1, v2c, and v3 are supported.
   * **port**: The UDP port SNMP is listening on.
   * **batchsize**: How many hosts to gather SNMP data on simultaneously.
 
--- a/Rakefile	Wed Apr 04 13:29:56 2018 -0700
+++ b/Rakefile	Sun Apr 08 18:11:49 2018 -0700
@@ -45,7 +45,8 @@
 	s.required_ruby_version = '>= 2'
 
 	s.add_dependency 'arborist', "~> 0.1"
-	s.add_dependency 'net-snmp2', "~> 0.3"
+	s.add_dependency 'netsnmp', "~> 0.1"
+	s.add_dependency 'xorcist', "~> 1.1"
 end
 
 Gem::PackageTask.new( spec ) do |pkg|
--- a/lib/arborist/monitor/snmp.rb	Wed Apr 04 13:29:56 2018 -0700
+++ b/lib/arborist/monitor/snmp.rb	Sun Apr 08 18:11:49 2018 -0700
@@ -3,7 +3,7 @@
 #encoding: utf-8
 
 require 'arborist/monitor' unless defined?( Arborist::Monitor )
-require 'net-snmp2'
+require 'netsnmp'
 
 # SNMP checks for Arborist.  Requires an SNMP agent to be installed
 # on target machine, and the various "pieces" enabled for your platform.
@@ -38,13 +38,6 @@
 		setting :batchsize, default: 25
 	end
 
-	# Indicate to FFI that we're using threads.
-	Net::SNMP.thread_safe = true
-
-
-	# The system type, as advertised.
-	attr_reader :system
-
 	# The mapping of addresses back to node identifiers.
 	attr_reader :identifiers
 
@@ -86,7 +79,7 @@
 				thr = Thread.new do
 					config = self.identifiers[ host ].last || {}
 					opts = {
-						peername:  host,
+						host:      host,
 						port:      config[ 'port' ]      || Arborist::Monitor::SNMP.port,
 						version:   config[ 'version' ]   || Arborist::Monitor::SNMP.version,
 						community: config[ 'community' ] || Arborist::Monitor::SNMP.community,
@@ -94,22 +87,17 @@
 						retries:   config[ 'retries' ]   || Arborist::Monitor::SNMP.retries
 					}
 
-					snmp = Net::SNMP::Session.open( opts )
 					begin
-						@system = snmp.get( IDENTIFICATION_OID ).varbinds.first.value
-						yield( host, snmp )
+						NETSNMP::Client.new( opts ) do |snmp|
+							Thread.current[ :system ] = snmp.get( oid: IDENTIFICATION_OID )
+							yield( host, snmp )
+						end
 
-					rescue Net::SNMP::TimeoutError, Net::SNMP::Error => err
-						self.log.error "%s: %s %s" % [ host, err.message, snmp.error_message ]
+					rescue => err
+						self.log.error "%s: %s\n%s" % [ host, err.message, err.backtrace.join("\n  ") ]
 						self.results[ host ] = {
-							error: "%s" % [ snmp.error_message ]
+							error: "Exception (%s: %s)" % [ err.class.name, err.message ]
 						}
-					rescue => err
-						self.results[ host ] = {
-							error: "Uncaught exception. (%s: %s)" % [ err.class.name, err.message ]
-						}
-					ensure
-						snmp.close
 					end
 				end
 
@@ -135,6 +123,12 @@
 		@results     = {}
 	end
 
+
+	### Return the current SNMP connection system string.
+	def system
+		return Thread.current[ :system ]
+	end
+
 end # Arborist::Monitor::SNMP
 
 require 'arborist/monitor/snmp/cpu'
--- a/lib/arborist/monitor/snmp/cpu.rb	Wed Apr 04 13:29:56 2018 -0700
+++ b/lib/arborist/monitor/snmp/cpu.rb	Sun Apr 08 18:11:49 2018 -0700
@@ -66,18 +66,13 @@
 	protected
 	#########
 
-	### Return system CPU data.
-	###
-	def cpu( snmp )
-		return snmp.walk( OIDS[:cpu] )
-	end
-
-
 	### Find load data, add additional niceties for reporting.
 	###
 	def format_load( snmp )
 		info = { cpu: {}, load: {} }
-		cpus = self.cpu( snmp )
+		cpus = snmp.walk( oid: OIDS[:cpu] ).each_with_object( [] ) do |(_, value), acc|
+			acc << value
+		end
 
 		info[ :cpu ][ :count ] = cpus.size
 
@@ -91,20 +86,21 @@
 		# alert after X events" pragmas.
 		#
 		if self.system =~ /windows\s+/i
-			info[ :cpu ][ :usage ] = cpus.values.inject( :+ ).to_f / cpus.size
+			info[ :cpu ][ :usage ] = cpus.inject( :+ ).to_f / cpus.size
 			info[ :message ] = "System is %0.1f%% in use." % [ info[ :cpu ][ :usage ] ]
 
+
 		# UCDavis stuff is better for alerting only after there has been
 		# an extended load event.  Use the 5 minute average to avoid
 		# state changes on transient spikes.
 		#
 		else
-			snmp.walk( OIDS[:load] ).each_with_index do |(_, value), idx|
+			snmp.walk( oid: OIDS[:load] ).each_with_index do |(_, value), idx|
 				next unless LOADKEYS[ idx + 1 ]
 				info[ :load ][ LOADKEYS[idx + 1] ] = value.to_f
 			end
 
-			percentage = (( ( info[:load][ :load5 ] / cpus.size ) - 1 ) * 100 ).round( 1 )
+			percentage = (( ( info[:load][ :load5 ] / cpus.size) - 1 ) * 100 ).round( 1 )
 
 			if percentage < 0
 				info[ :message ] = "System is %0.1f%% idle." % [ percentage.abs ]
--- a/lib/arborist/monitor/snmp/disk.rb	Wed Apr 04 13:29:56 2018 -0700
+++ b/lib/arborist/monitor/snmp/disk.rb	Sun Apr 08 18:11:49 2018 -0700
@@ -106,6 +106,7 @@
 		excludes = self.format_mounts( config, 'exclude' ) || self.class.exclude
 
 		mounts.reject! do |path, percentage|
+			path = path.to_s
 			excludes.match( path ) || ( includes && ! includes.match( path ) )
 		end
 
@@ -151,18 +152,32 @@
 	### Fetch information for Windows systems.
 	###
 	def windows_disks( snmp )
-		raw = snmp.get_bulk([
+		oids = [
 			STORAGE_WINDOWS[:path],
 			STORAGE_WINDOWS[:type],
 			STORAGE_WINDOWS[:total],
 			STORAGE_WINDOWS[:used]
-		]).varbinds.map( &:value )
+		]
+
+		paths = snmp.walk( oid: oids[0] ).each_with_object( [] ) do |(_, value), acc|
+			acc << value
+		end
+		types = snmp.walk( oid: oids[1] ).each_with_object( [] ) do |(_, value), acc|
+			acc << WINDOWS_DEVICES.include?( value )
+		end
+		totals = snmp.walk( oid: oids[2] ).each_with_object( [] ) do |(_, value), acc|
+			acc << value
+		end
+		used = snmp.walk( oid: oids[3] ).each_with_object( [] ) do |(_, value), acc|
+			acc << value
+		end
 
 		disks = {}
-		raw.each_slice( 4 ) do |device|
-			next unless device[1].respond_to?( :oid ) && WINDOWS_DEVICES.include?( device[1].oid )
-			next if device[2].zero?
-			disks[ device[0] ] = (( device[3].to_f / device[2] ) * 100).round( 1 )
+		paths.each_with_index do |path, idx|
+			next if totals[ idx ].zero?
+			next unless types[ idx ]
+			disks[ path ] ||= {}
+			disks[ path ] = (( used[idx].to_f / totals[idx] ) * 100).round( 1 )
 		end
 
 		return disks
@@ -172,11 +187,16 @@
 	### Fetch information for Unix/MacOS systems.
 	###
 	def unix_disks( snmp )
-		raw = snmp.get_bulk([
-			STORAGE_NET_SNMP[:path],
-			STORAGE_NET_SNMP[:percent] ]).varbinds.map( &:value )
+		oids = [ STORAGE_NET_SNMP[:path], STORAGE_NET_SNMP[:percent] ]
+		paths = snmp.walk( oid: oids.first ).each_with_object( [] ) do |(_, value), acc|
+			acc << value
+		end
+		capacities = snmp.walk( oid: oids.last ).each_with_object( [] ) do |(_, value), acc|
+			acc << value
+		end
 
-		return Hash[ *raw ]
+		pairs = paths.zip( capacities )
+		return Hash[ *pairs.flatten ]
 	end
 
 end # class Arborist::Monitor::SNMP::Disk
--- a/lib/arborist/monitor/snmp/memory.rb	Wed Apr 04 13:29:56 2018 -0700
+++ b/lib/arborist/monitor/snmp/memory.rb	Sun Apr 08 18:11:49 2018 -0700
@@ -131,7 +131,7 @@
 		info  = { memory: {}, swap: {} }
 		mem_idx, swap_idx = nil
 
-		snmp.walk( MEMORY[:windows][:label] ).each_with_index do |(_, val), i|
+		snmp.walk( oid: MEMORY[:windows][:label] ).each_with_index do |(_, val), i|
 			mem_idx  = i + 1 if val =~ /physical memory/i
 			swap_idx = i + 1 if val =~ /virtual memory/i
 		end
@@ -148,8 +148,8 @@
 	###
 	def calc_memory( snmp, oids )
 		info = { usage: 0, available: 0 }
-		avail = snmp.get( oids[:avail] ).varbinds.first.value.to_f
-		total = snmp.get( oids[:total] ).varbinds.first.value.to_f
+		avail = snmp.get( oid: oids[:avail] ).to_f
+		total = snmp.get( oid: oids[:total] ).to_f
 		used  = total - avail
 
 		return info if avail.zero?
@@ -166,9 +166,9 @@
 		info = { usage: 0, available: 0 }
 		return info unless idx
 
-		units = snmp.get( MEMORY[:windows][:units] + ".#{idx}" ).varbinds.first.value
-		total = snmp.get( MEMORY[:windows][:total] + ".#{idx}" ).varbinds.first.value.to_f * units
-		used  = snmp.get( MEMORY[:windows][:used] + ".#{idx}" ).varbinds.first.value.to_f * units
+		units = snmp.get( oid: MEMORY[:windows][:units] + ".#{idx}" )
+		total = snmp.get( oid: MEMORY[:windows][:total] + ".#{idx}" ).to_f * units
+		used  = snmp.get( oid: MEMORY[:windows][:used] + ".#{idx}" ).to_f * units
 
 		info[ :usage ]     = (( used / total ) * 100 ).round( 2 )
 		info[ :available ] = (( total - used ) / 1024 / 1024 ).round( 2 )
--- a/lib/arborist/monitor/snmp/process.rb	Wed Apr 04 13:29:56 2018 -0700
+++ b/lib/arborist/monitor/snmp/process.rb	Sun Apr 08 18:11:49 2018 -0700
@@ -95,14 +95,24 @@
 	###
 	def get_windows( snmp )
 		oids = [ PROCESS[:windows][:path], PROCESS[:windows][:list], PROCESS[:windows][:args] ]
-		return snmp.walk( oids ).each_slice( 3 ). each_with_object( [] ) do |vals, acc|
-			path, process, args = vals[0][1], vals[1][1], vals[2][1]
-			next if path.empty?
 
-			process = "%s%s" % [ path, process ]
-			process << " %s" % [ args ] unless args.empty?
-			acc << process
+		paths = snmp.walk( oid: oids[0] ).each_with_object( [] ) do |(_, value), acc|
+			acc << value
+		end
+		procs = snmp.walk( oid: oids[1] ).each_with_object( [] ) do |(_, value), acc|
+			acc << value
+		end
+		args = snmp.walk( oid: oids[2] ).each_with_object( [] ) do |(_, value), acc|
+			acc << value
 		end
+
+		return paths.zip( procs, args ).collect do |(path, process, arg)|
+			next unless path && process
+			next if path.empty?
+			path << process unless process.empty?
+			path << " %s" % [ arg.to_s ] if arg && ! arg.empty?
+			path
+		end.compact
 	end
 
 
@@ -110,13 +120,19 @@
 	###
 	def get_procs( snmp )
 		oids = [ PROCESS[:netsnmp][:list], PROCESS[:netsnmp][:args] ]
-		return snmp.walk( oids ).each_slice( 2 ).each_with_object( [] ) do |vals, acc|
-			process, args = vals[0][1], vals[1][1]
-			next if process.empty?
 
-			process << " %s" % [ args ] unless args.empty?
-			acc << process
+		procs = snmp.walk( oid: oids.first ).each_with_object( [] ) do |(_, value), acc|
+			acc << value
+		end
+		args = snmp.walk( oid: oids.last ).each_with_object( [] ) do |(_, value), acc|
+			acc << value
 		end
+
+		return procs.zip( args ).collect do |(process, arg)|
+			next if process.empty?
+			process << " %s" % [ arg.to_s ] unless arg.empty?
+			process
+		end.compact
 	end
 
 end # class Arborist::Monitor::SNMP::Process