Initial commit of chunker, a ruby module to aid with data blocks.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/chunker/README Sat Nov 08 18:59:05 2008 +0000
@@ -0,0 +1,7 @@
+
+The DATA constant
+
+The problem
+
+A workaround
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/chunker/Rakefile Sat Nov 08 18:59:05 2008 +0000
@@ -0,0 +1,202 @@
+#!/usr/bin/env rake
+#
+# Chunker Rakefile
+#
+
+require 'rubygems'
+require 'pathname'
+
+require 'rake'
+require 'rake/gempackagetask'
+require 'spec/rake/spectask'
+
+
+######################################################################
+### P A T H S
+######################################################################
+
+BASEDIR = Pathname.new( __FILE__ ).expand_path.dirname.relative_path_from( Pathname.getwd )
+SPECDIR = BASEDIR + 'spec'
+LIBDIR = BASEDIR + 'lib'
+SPEC_FILES = Pathname.glob( SPECDIR + '**/*_spec.rb' ).reject {|f| f =~ /^\.svn/ }
+
+######################################################################
+### H E L P E R S
+######################################################################
+
+### Given a +file+ path, find the first captured match of +pattern+,
+### or the string 'UNKNOWN' if not found. (easy to notice something is wrong.)
+###
+def find_pattern( file, pattern )
+ ver = nil
+ File.open( file ) do |f|
+ ver = f.each do |line|
+ break $1 if line =~ pattern
+ end
+ end
+ return ver.is_a?( String ) ? ver : 'UNKNOWN'
+end
+
+######################################################################
+### P A C K A G E C O N S T A N T S
+######################################################################
+
+PKG_NAME = 'chunker'
+PKG_VERSION = find_pattern( LIBDIR + 'chunker.rb', /VERSION = ['"](\d\.\d(?:\/\d)?)['"]/ )
+PKG_REVISION = find_pattern( LIBDIR + 'chunker.rb', /SVNRev = .+Rev: (\d+)/ )
+PKG_VERSION = begin
+ ver = nil
+ File.open( LIBDIR + 'chunker.rb' ) do |f|
+ ver = f.each do |line|
+ break $1 if line =~ /VERSION = ['"](\d\.\d(?:\/\d)?)['"]/
+ end
+ end
+ ver.is_a?( String ) ? ver : 'UNKNOWN'
+ end
+RELEASE_NAME = "REL #{PKG_VERSION}"
+PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"
+
+
+######################################################################
+### T A S K S
+######################################################################
+
+task :default => [:test]
+
+
+### Task: run rspec tests
+###
+desc "Run tests"
+Spec::Rake::SpecTask.new('test') do |task|
+ task.spec_files = FileList['spec/**/*.rb']
+ task.spec_opts = %w{ -c -fs }
+end
+
+
+### Task: generate ctags
+### This assumes exuberant ctags, since ctags 'native' doesn't support ruby anyway.
+###
+desc "Generate a ctags 'tags' file from Chunker source"
+task :ctags do
+ sh "ctags -R #{LIBDIR}"
+end
+
+
+### Task: Create gem from source
+###
+gem = Gem::Specification.new do |gem|
+end
+
+Rake::GemPackageTask.new( gem ) do |pkg|
+ pkg.need_zip = true
+ pkg.need_tar = true
+end
+
+
+
+__END__
+
+ spec = Gem::Specification.new do |s|
+ s.platform = Gem::Platform::RUBY
+ s.summary = "Ruby based make-like utility."
+ s.name = 'rake'
+ s.version = PKG_VERSION
+ s.requirements << 'none'
+ s.require_path = 'lib'
+ s.autorequire = 'rake'
+ s.files = PKG_FILES
+ s.description = <<EOF
+ Rake is a Make-like program implemented in Ruby. Tasks
+ and dependencies are specified in standard Ruby syntax.
+ EOF
+ end
+
+ Rake::GemPackageTask.new(spec) do |pkg|
+ pkg.need_zip = true
+ pkg.need_tar = true
+ end
+
+
+
+
+
+
+require 'rake/packagetask'
+require 'rake/gempackagetask'
+
+### Task: gem
+gemspec = Gem::Specification.new do |gem|
+ pkg_build = get_svn_rev( BASEDIR ) || 0
+
+ gem.name = PKG_NAME
+ gem.version = "%s.%s" % [ PKG_VERSION, pkg_build ]
+
+ gem.summary = "ThingFish - A highly-accessable network datastore"
+ gem.description = "ThingFish is a network-accessable, searchable, extensible " +
+ "datastore. It can be used to store chunks of data on the " +
+ "network in an application-independent way, associate the chunks " +
+ "with other chunks through metadata, and then search for the chunk " +
+ "you need later and fetch it again, all through a REST API over HTTP."
+
+ gem.authors = "Michael Granger and Mahlon E. Smith"
+ gem.email = "mgranger@laika.com, mahlon@laika.com"
+ gem.homepage = "http://opensource.laika.com/wiki/ThingFish"
+
+ gem.rubyforge_project = 'laika'
+
+ gem.has_rdoc = true
+
+ gem.files = RELEASE_FILES.
+ collect {|f| f.relative_path_from(BASEDIR).to_s }
+ gem.test_files = SPEC_FILES.
+ collect {|f| f.relative_path_from(BASEDIR).to_s }
+ gem.executables = BIN_FILES .
+ collect {|f| f.relative_path_from(BINDIR).to_s }
+
+ gem.add_dependency( 'uuidtools', '>= 1.0.0' )
+ gem.add_dependency( 'pluginfactory', '>= 1.0.3' )
+end
+Rake::GemPackageTask.new( gemspec ) do |task|
+ task.gem_spec = gemspec
+ task.need_tar = false
+ task.need_tar_gz = true
+ task.need_tar_bz2 = true
+ task.need_zip = true
+end
+
+
+desc "Build the ThingFish gem and gems for all the standard plugins"
+task :gems => [:gem] do
+ log "Building gems for plugins in: %s" % [PLUGINS.join(', ')]
+ PLUGINS.each do |plugindir|
+ log plugindir.basename
+ cp BASEDIR + 'LICENSE', plugindir
+ Dir.chdir( plugindir ) do
+ system 'rake', 'gem'
+ end
+
+ fail unless $?.success?
+
+ pkgdir = plugindir + 'pkg'
+ gems = Pathname.glob( pkgdir + '*.gem' )
+ cp gems, PKGDIR
+ end
+end
+
+
+### Task: install
+task :install_gem => [:package] do
+ $stderr.puts
+ installer = Gem::Installer.new( %{pkg/#{PKG_FILE_NAME}.gem} )
+ installer.install
+end
+
+### Task: uninstall
+task :uninstall_gem => [:clean] do
+ uninstaller = Gem::Uninstaller.new( PKG_FILE_NAME )
+ uninstaller.uninstall
+end
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/chunker/lib/chunker.rb Sat Nov 08 18:59:05 2008 +0000
@@ -0,0 +1,125 @@
+#
+# Chunker!
+#
+# Mahlon E. Smith <mahlon@martini.nu>
+#
+
+
+### Namespace for the datablock parser.
+###
+module Chunker
+
+ require 'strscan'
+ require 'stringio'
+
+ # SVN Revision
+ #
+ SVNRev = %q$Rev$
+
+ # SVN Id
+ #
+ SVNId = %q$Id$
+
+ # Package version
+ #
+ VERSION = '0.1'
+
+
+ ### Parser class for __END__ data blocks.
+ ### Find each __MARKER__ within the __END__, and put each into a
+ ### DATA_MARKER constant within the namespace that included us.
+ ###
+ class DataParser
+
+ # The mark for a DATA block.
+ #
+ END_MARKER = /^__END__\r?\n/
+
+ # The mark for a 'sub' block.
+ #
+ CHUNK_MARKER = /^__([A-Z\_0-9]+)__\r?\n/
+
+
+ ### Constructor: Given a +klass+ and an +io+ to the class file,
+ ### extract the data blocks and install constants.
+ ###
+ def initialize( klass, io )
+ io.open if io.closed?
+ end_string = io.read.split( END_MARKER, 2 ).last
+
+ @klass = klass
+ @scanner = StringScanner.new( end_string )
+ io.close
+
+ if @scanner.check_until( CHUNK_MARKER )
+ # put each chunk into its own constant
+ self.extract_blocks
+ else
+ # no sub blocks, put the whole mess into DATA_END
+ @klass.const_set( :DATA_END, StringIO.new( end_string ) )
+ end
+ end
+
+
+ #########
+ protected
+ #########
+
+ ### Parse the current +io+ for data blocks, set contents to
+ ### IO constants in the including class.
+ ###
+ def extract_blocks
+ label = nil
+
+ while @scanner.scan_until( CHUNK_MARKER ) and ! @scanner.eos?
+ data = ''
+
+ # First pass, __END__ contents (until next marker, instead
+ # of entire data block.)
+ #
+ if label.nil?
+ label = 'END'
+ data = @scanner.pre_match
+
+ @scanner.pos = self.next_position
+ else
+ label = @scanner[1]
+
+ if data = @scanner.scan_until( CHUNK_MARKER )
+ # Pull the next marker text out of the data, set up the next pass
+ #
+ data = data[ 0, data.length - @scanner[0].length ]
+ @scanner.pos = self.next_position
+ else
+ # No additional blocks
+ #
+ data = @scanner.rest
+ end
+ end
+
+ # Add the IO constant to the class that included me.
+ #
+ @klass.const_set( "DATA_#{label}".to_sym, StringIO.new( data ) )
+ end
+ end
+
+
+ ### Return the next scanner position for searching.
+ ###
+ def next_position
+ return @scanner.pos - @scanner[0].length
+ end
+ end
+
+
+ ### Included hook: Find the file path for how we arrived here, and open
+ ### it as an IO object. __FILE__ won't work, so we find it via caller().
+ ### Start parsing this file for data blocks.
+ ###
+ def self.included( klass )
+ # klass.instance_eval{ __FILE__ } awww, nope.
+ io = File.open( caller(1).last.sub(/:.*?$/, ''), 'r' )
+ DataParser.new( klass, io )
+ end
+end
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/chunker/spec/chunker_spec.rb Sat Nov 08 18:59:05 2008 +0000
@@ -0,0 +1,117 @@
+#!/usr/bin/env ruby
+
+BEGIN {
+ require 'pathname'
+ basedir = Pathname.new( __FILE__ ).dirname.parent
+ libdir = basedir + "lib"
+
+ $LOAD_PATH.unshift( libdir ) unless $LOAD_PATH.include?( libdir )
+}
+
+require 'chunker'
+require 'rubygems'
+require 'spec'
+
+ENDSTUFF = <<ENDSTUFF
+Stuff within the end block.
+
+Content of the END block
+Content of the END block
+Content of the END block
+Content of the END block
+ENDSTUFF
+
+HURGADURGA = <<HURGADURGA
+
+Content of the HURGADURGA block
+Content of the HURGADURGA block
+Content of the HURGADURGA block
+Content of the HURGADURGA block
+
+HURGADURGA
+
+HURRRRG = <<HURRRRG
+ 123123123 123123123 123123123
+ 123123123 123123123 123123123
+ 123123123 123123123 123123123
+HURRRRG
+
+POOP = <<POOP
+Content of the POOP block
+POOP
+
+FILE_TEXT = <<EO_FILE_TEXT
+
+This is stuff we shouldn't see or care about.
+You know, stuff like code, presumably.
+
+__END__
+#{ENDSTUFF}
+EO_FILE_TEXT
+
+FILE_TEXT_MULTIPLE = <<EO_FILE_TEXT
+
+This is stuff we shouldn't see or care about.
+You know, stuff like code, presumably.
+
+__END__
+#{ENDSTUFF}
+__POOP__
+#{POOP}
+__HURRRRG__
+#{HURRRRG}
+__HURGADURGA__
+#{HURGADURGA}
+EO_FILE_TEXT
+
+
+describe Chunker::DataParser do
+
+ it "doesn't include content above the __END__ marker" do
+ klass = Class.new
+ dp = Chunker::DataParser.new( klass, StringIO.new( FILE_TEXT_MULTIPLE ))
+ dp.instance_variable_get( :@scanner ).string.
+ should_not =~ /This is stuff we shouldn't see/
+ end
+
+ it "doesn't contain the __END__ marker itself" do
+ klass = Class.new
+ dp = Chunker::DataParser.new( klass, StringIO.new( FILE_TEXT ))
+ dp.instance_variable_get( :@scanner ).string.should_not =~ /^__END__/
+ end
+end
+
+
+describe 'A class that includes Chunker' do
+
+ it "has all content in DATA_END if there are no sub blocks" do
+ File.stub!( :open ).and_return( StringIO.new( FILE_TEXT ))
+ klass = Class.new { include Chunker }
+
+ klass.constants.should_not include( 'DATA_POOP' )
+ klass.constants.should_not include( 'DATA_HURRRRG' )
+ klass.constants.should_not include( 'DATA_HURGADURGA' )
+ klass.constants.should include( 'DATA_END' )
+ end
+
+ it "separates data sub blocks into individual constants" do
+ File.stub!( :open ).and_return( StringIO.new( FILE_TEXT_MULTIPLE ))
+ klass = Class.new { include Chunker }
+
+ klass.constants.should include( 'DATA_END' )
+ klass.constants.should include( 'DATA_POOP' )
+ klass.constants.should include( 'DATA_HURRRRG' )
+ klass.constants.should include( 'DATA_HURGADURGA' )
+ end
+
+ it "has IO constants that contain the data block contents" do
+ File.stub!( :open ).and_return( StringIO.new( FILE_TEXT_MULTIPLE ))
+ klass = Class.new { include Chunker }
+
+ klass.const_get( :DATA_END ).read.chomp.should == ENDSTUFF
+ klass.const_get( :DATA_POOP ).read.chomp.should == POOP
+ klass.const_get( :DATA_HURRRRG ).read.chomp.should == HURRRRG
+ klass.const_get( :DATA_HURGADURGA ).read.chomp.should == HURGADURGA
+ end
+end
+