Initial commit of chunker, a ruby module to aid with data blocks. ruby-modules
authormahlon
Sat, 08 Nov 2008 18:59:05 +0000
branchruby-modules
changeset 1 9e127bf6e84f
parent 0 83c0eed6db19
child 2 e5c705047540
Initial commit of chunker, a ruby module to aid with data blocks.
chunker/README
chunker/Rakefile
chunker/lib/chunker.rb
chunker/spec/chunker_spec.rb
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chunker/README	Sat Nov 08 18:59:05 2008 +0000
@@ -0,0 +1,7 @@
+
+The DATA constant
+
+The problem
+
+A workaround
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chunker/Rakefile	Sat Nov 08 18:59:05 2008 +0000
@@ -0,0 +1,202 @@
+#!/usr/bin/env rake
+#
+# Chunker Rakefile
+#
+
+require 'rubygems'
+require 'pathname'
+
+require 'rake'
+require 'rake/gempackagetask'
+require 'spec/rake/spectask'
+
+
+######################################################################
+### P A T H S
+######################################################################
+
+BASEDIR    = Pathname.new( __FILE__ ).expand_path.dirname.relative_path_from( Pathname.getwd )
+SPECDIR    = BASEDIR + 'spec'
+LIBDIR     = BASEDIR + 'lib'
+SPEC_FILES = Pathname.glob( SPECDIR + '**/*_spec.rb' ).reject {|f| f =~ /^\.svn/ }
+
+######################################################################
+### H E L P E R S
+######################################################################
+
+### Given a +file+ path, find the first captured match of +pattern+,
+### or the string 'UNKNOWN' if not found. (easy to notice something is wrong.)
+###
+def find_pattern( file, pattern )
+	ver = nil
+	File.open( file ) do |f|
+		ver = f.each do |line|
+			break $1 if line =~ pattern
+		end
+	end
+	return ver.is_a?( String ) ? ver : 'UNKNOWN'
+end
+
+######################################################################
+### P A C K A G E   C O N S T A N T S
+######################################################################
+
+PKG_NAME      = 'chunker'
+PKG_VERSION   = find_pattern( LIBDIR + 'chunker.rb', /VERSION = ['"](\d\.\d(?:\/\d)?)['"]/ )
+PKG_REVISION  = find_pattern( LIBDIR + 'chunker.rb', /SVNRev = .+Rev: (\d+)/ )
+PKG_VERSION   = begin
+					ver = nil
+					File.open( LIBDIR + 'chunker.rb' ) do |f|
+						ver = f.each do |line|
+							break $1 if line =~ /VERSION = ['"](\d\.\d(?:\/\d)?)['"]/
+						end
+					end
+					ver.is_a?( String ) ? ver : 'UNKNOWN'
+				end
+RELEASE_NAME  = "REL #{PKG_VERSION}"
+PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"
+
+
+######################################################################
+### T A S K S
+######################################################################
+
+task :default => [:test]
+
+
+### Task: run rspec tests
+###
+desc "Run tests"
+Spec::Rake::SpecTask.new('test') do |task|
+	task.spec_files = FileList['spec/**/*.rb']
+	task.spec_opts  = %w{ -c -fs }
+end
+
+
+### Task: generate ctags
+### This assumes exuberant ctags, since ctags 'native' doesn't support ruby anyway.
+###
+desc "Generate a ctags 'tags' file from Chunker source"
+task :ctags do
+	sh "ctags -R #{LIBDIR}"
+end
+
+
+### Task: Create gem from source
+###
+gem = Gem::Specification.new do |gem|
+end
+
+Rake::GemPackageTask.new( gem ) do |pkg|
+	pkg.need_zip = true
+	pkg.need_tar = true
+end
+
+
+
+__END__
+
+  spec = Gem::Specification.new do |s|
+    s.platform = Gem::Platform::RUBY
+    s.summary = "Ruby based make-like utility."
+    s.name = 'rake'
+    s.version = PKG_VERSION
+    s.requirements << 'none'
+    s.require_path = 'lib'
+    s.autorequire = 'rake'
+    s.files = PKG_FILES
+    s.description = <<EOF
+  Rake is a Make-like program implemented in Ruby. Tasks
+  and dependencies are specified in standard Ruby syntax.
+  EOF
+  end
+
+  Rake::GemPackageTask.new(spec) do |pkg|
+    pkg.need_zip = true
+    pkg.need_tar = true
+  end
+
+
+
+
+
+
+require 'rake/packagetask'
+require 'rake/gempackagetask'
+
+### Task: gem
+gemspec = Gem::Specification.new do |gem|
+	pkg_build = get_svn_rev( BASEDIR ) || 0
+	
+	gem.name    	= PKG_NAME
+	gem.version 	= "%s.%s" % [ PKG_VERSION, pkg_build ]
+
+	gem.summary     = "ThingFish - A highly-accessable network datastore"
+	gem.description = "ThingFish is a network-accessable, searchable, extensible " +
+	                  "datastore. It can be used to store chunks of data on the " +
+	                  "network in an application-independent way, associate the chunks " +
+	                  "with other chunks through metadata, and then search for the chunk " +
+	                  "you need later and fetch it again, all through a REST API over HTTP."	
+
+	gem.authors  	= "Michael Granger and Mahlon E. Smith"
+	gem.email  		= "mgranger@laika.com, mahlon@laika.com"
+	gem.homepage 	= "http://opensource.laika.com/wiki/ThingFish"
+
+	gem.rubyforge_project = 'laika'
+
+	gem.has_rdoc 	= true
+
+	gem.files      	= RELEASE_FILES.
+		collect {|f| f.relative_path_from(BASEDIR).to_s }
+	gem.test_files 	= SPEC_FILES.
+		collect {|f| f.relative_path_from(BASEDIR).to_s }
+	gem.executables = BIN_FILES	.
+		collect {|f| f.relative_path_from(BINDIR).to_s }
+
+  	gem.add_dependency( 'uuidtools', '>= 1.0.0' )
+  	gem.add_dependency( 'pluginfactory', '>= 1.0.3' )
+end
+Rake::GemPackageTask.new( gemspec ) do |task|
+	task.gem_spec = gemspec
+	task.need_tar = false
+	task.need_tar_gz = true
+	task.need_tar_bz2 = true
+	task.need_zip = true
+end
+
+
+desc "Build the ThingFish gem and gems for all the standard plugins"
+task :gems => [:gem] do
+	log "Building gems for plugins in: %s" % [PLUGINS.join(', ')]
+	PLUGINS.each do |plugindir|
+		log plugindir.basename
+		cp BASEDIR + 'LICENSE', plugindir
+		Dir.chdir( plugindir ) do
+			system 'rake', 'gem'
+		end
+		
+		fail unless $?.success?
+		
+		pkgdir = plugindir + 'pkg'
+		gems = Pathname.glob( pkgdir + '*.gem' )
+		cp gems, PKGDIR
+	end
+end
+
+
+### Task: install
+task :install_gem => [:package] do
+	$stderr.puts 
+	installer = Gem::Installer.new( %{pkg/#{PKG_FILE_NAME}.gem} )
+	installer.install
+end
+
+### Task: uninstall
+task :uninstall_gem => [:clean] do
+	uninstaller = Gem::Uninstaller.new( PKG_FILE_NAME )
+	uninstaller.uninstall
+end
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chunker/lib/chunker.rb	Sat Nov 08 18:59:05 2008 +0000
@@ -0,0 +1,125 @@
+#
+# Chunker!
+#
+#	Mahlon E. Smith <mahlon@martini.nu>
+#
+
+
+### Namespace for the datablock parser.
+###
+module Chunker
+
+	require 'strscan'
+	require 'stringio'
+
+	# SVN Revision
+	#
+	SVNRev = %q$Rev$
+
+	# SVN Id
+	#
+	SVNId = %q$Id$
+
+	# Package version
+	#
+	VERSION = '0.1'
+
+
+	### Parser class for __END__ data blocks.
+	### Find each __MARKER__ within the __END__, and put each into a
+	### DATA_MARKER constant within the namespace that included us.
+	###
+	class DataParser
+
+		# The mark for a DATA block.
+		#
+		END_MARKER = /^__END__\r?\n/
+
+		# The mark for a 'sub' block.
+		#
+		CHUNK_MARKER = /^__([A-Z\_0-9]+)__\r?\n/
+
+
+		### Constructor: Given a +klass+ and an +io+ to the class file,
+		### extract the data blocks and install constants.
+		###
+		def initialize( klass, io )
+			io.open if io.closed?
+			end_string = io.read.split( END_MARKER, 2 ).last
+
+			@klass   = klass
+			@scanner = StringScanner.new( end_string )
+			io.close
+
+			if @scanner.check_until( CHUNK_MARKER )
+				# put each chunk into its own constant
+				self.extract_blocks
+			else
+				# no sub blocks, put the whole mess into DATA_END
+				@klass.const_set( :DATA_END, StringIO.new( end_string ) )
+			end
+		end
+
+
+		#########
+		protected
+		#########
+
+		### Parse the current +io+ for data blocks, set contents to
+		### IO constants in the including class.
+		###
+		def extract_blocks
+			label = nil
+
+			while @scanner.scan_until( CHUNK_MARKER ) and ! @scanner.eos?
+				data = ''
+
+				# First pass, __END__ contents (until next marker, instead
+				# of entire data block.)
+				#
+				if label.nil?
+					label = 'END'
+					data  = @scanner.pre_match
+
+					@scanner.pos = self.next_position
+				else
+					label = @scanner[1]
+
+					if data = @scanner.scan_until( CHUNK_MARKER )
+						# Pull the next marker text out of the data, set up the next pass
+						#
+						data         = data[ 0, data.length - @scanner[0].length ]
+						@scanner.pos = self.next_position
+					else
+						# No additional blocks
+						#
+						data = @scanner.rest
+					end
+				end
+
+				# Add the IO constant to the class that included me.
+				#
+				@klass.const_set( "DATA_#{label}".to_sym, StringIO.new( data ) )
+			end
+		end
+
+
+		### Return the next scanner position for searching.
+		###
+		def next_position
+			return @scanner.pos - @scanner[0].length
+		end
+	end
+
+
+	### Included hook: Find the file path for how we arrived here, and open
+	### it as an IO object. __FILE__ won't work, so we find it via caller().
+	### Start parsing this file for data blocks.
+	###
+    def self.included( klass )
+		# klass.instance_eval{ __FILE__ }   awww, nope.
+		io = File.open( caller(1).last.sub(/:.*?$/, ''), 'r' )
+		DataParser.new( klass, io )
+    end
+end
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chunker/spec/chunker_spec.rb	Sat Nov 08 18:59:05 2008 +0000
@@ -0,0 +1,117 @@
+#!/usr/bin/env ruby
+
+BEGIN {
+	require 'pathname'
+	basedir = Pathname.new( __FILE__ ).dirname.parent
+	libdir = basedir + "lib"
+
+	$LOAD_PATH.unshift( libdir ) unless $LOAD_PATH.include?( libdir )
+}
+
+require 'chunker'
+require 'rubygems'
+require 'spec'
+
+ENDSTUFF = <<ENDSTUFF
+Stuff within the end block.
+
+Content of the END block
+Content of the END block
+Content of the END block
+Content of the END block
+ENDSTUFF
+
+HURGADURGA = <<HURGADURGA
+
+Content of the HURGADURGA block
+Content of the HURGADURGA block
+Content of the HURGADURGA block
+Content of the HURGADURGA block
+
+HURGADURGA
+
+HURRRRG = <<HURRRRG
+	123123123 123123123 123123123
+	123123123 123123123 123123123
+	123123123 123123123 123123123
+HURRRRG
+
+POOP = <<POOP
+Content of the POOP block
+POOP
+
+FILE_TEXT = <<EO_FILE_TEXT
+
+This is stuff we shouldn't see or care about.
+You know, stuff like code, presumably.
+
+__END__
+#{ENDSTUFF}
+EO_FILE_TEXT
+
+FILE_TEXT_MULTIPLE = <<EO_FILE_TEXT
+
+This is stuff we shouldn't see or care about.
+You know, stuff like code, presumably.
+
+__END__
+#{ENDSTUFF}
+__POOP__
+#{POOP}
+__HURRRRG__
+#{HURRRRG}
+__HURGADURGA__
+#{HURGADURGA}
+EO_FILE_TEXT
+
+
+describe Chunker::DataParser do
+
+	it "doesn't include content above the __END__ marker" do
+		klass = Class.new
+		dp = Chunker::DataParser.new( klass, StringIO.new( FILE_TEXT_MULTIPLE ))
+		dp.instance_variable_get( :@scanner ).string.
+			should_not =~ /This is stuff we shouldn't see/
+	end
+
+	it "doesn't contain the __END__ marker itself" do
+		klass = Class.new
+		dp = Chunker::DataParser.new( klass, StringIO.new( FILE_TEXT ))
+		dp.instance_variable_get( :@scanner ).string.should_not =~ /^__END__/
+	end
+end
+
+
+describe 'A class that includes Chunker' do
+
+	it "has all content in DATA_END if there are no sub blocks" do
+		File.stub!( :open ).and_return( StringIO.new( FILE_TEXT ))
+		klass = Class.new { include Chunker }
+
+		klass.constants.should_not include( 'DATA_POOP' )
+		klass.constants.should_not include( 'DATA_HURRRRG' )
+		klass.constants.should_not include( 'DATA_HURGADURGA' )
+		klass.constants.should include( 'DATA_END' )
+	end
+
+	it "separates data sub blocks into individual constants" do
+		File.stub!( :open ).and_return( StringIO.new( FILE_TEXT_MULTIPLE ))
+		klass = Class.new { include Chunker }
+
+		klass.constants.should include( 'DATA_END' )
+		klass.constants.should include( 'DATA_POOP' )
+		klass.constants.should include( 'DATA_HURRRRG' )
+		klass.constants.should include( 'DATA_HURGADURGA' )
+	end
+
+	it "has IO constants that contain the data block contents" do
+		File.stub!( :open ).and_return( StringIO.new( FILE_TEXT_MULTIPLE ))
+		klass = Class.new { include Chunker }
+
+		klass.const_get( :DATA_END ).read.chomp.should        == ENDSTUFF
+		klass.const_get( :DATA_POOP ).read.chomp.should       == POOP
+		klass.const_get( :DATA_HURRRRG ).read.chomp.should    == HURRRRG
+		klass.const_get( :DATA_HURGADURGA ).read.chomp.should == HURGADURGA
+	end
+end
+