# HG changeset patch # User mahlon # Date 1226170745 0 # Node ID 9e127bf6e84fd0858ea96278b91cc032a10abf29 # Parent 83c0eed6db1938b3219cdc3a1f6b2b7110775fd9 Initial commit of chunker, a ruby module to aid with data blocks. diff -r 83c0eed6db19 -r 9e127bf6e84f chunker/README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chunker/README Sat Nov 08 18:59:05 2008 +0000 @@ -0,0 +1,7 @@ + +The DATA constant + +The problem + +A workaround + diff -r 83c0eed6db19 -r 9e127bf6e84f chunker/Rakefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chunker/Rakefile Sat Nov 08 18:59:05 2008 +0000 @@ -0,0 +1,202 @@ +#!/usr/bin/env rake +# +# Chunker Rakefile +# + +require 'rubygems' +require 'pathname' + +require 'rake' +require 'rake/gempackagetask' +require 'spec/rake/spectask' + + +###################################################################### +### P A T H S +###################################################################### + +BASEDIR = Pathname.new( __FILE__ ).expand_path.dirname.relative_path_from( Pathname.getwd ) +SPECDIR = BASEDIR + 'spec' +LIBDIR = BASEDIR + 'lib' +SPEC_FILES = Pathname.glob( SPECDIR + '**/*_spec.rb' ).reject {|f| f =~ /^\.svn/ } + +###################################################################### +### H E L P E R S +###################################################################### + +### Given a +file+ path, find the first captured match of +pattern+, +### or the string 'UNKNOWN' if not found. (easy to notice something is wrong.) +### +def find_pattern( file, pattern ) + ver = nil + File.open( file ) do |f| + ver = f.each do |line| + break $1 if line =~ pattern + end + end + return ver.is_a?( String ) ? ver : 'UNKNOWN' +end + +###################################################################### +### P A C K A G E C O N S T A N T S +###################################################################### + +PKG_NAME = 'chunker' +PKG_VERSION = find_pattern( LIBDIR + 'chunker.rb', /VERSION = ['"](\d\.\d(?:\/\d)?)['"]/ ) +PKG_REVISION = find_pattern( LIBDIR + 'chunker.rb', /SVNRev = .+Rev: (\d+)/ ) +PKG_VERSION = begin + ver = nil + File.open( LIBDIR + 'chunker.rb' ) do |f| + ver = f.each do |line| + break $1 if line =~ /VERSION = ['"](\d\.\d(?:\/\d)?)['"]/ + end + end + ver.is_a?( String ) ? ver : 'UNKNOWN' + end +RELEASE_NAME = "REL #{PKG_VERSION}" +PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}" + + +###################################################################### +### T A S K S +###################################################################### + +task :default => [:test] + + +### Task: run rspec tests +### +desc "Run tests" +Spec::Rake::SpecTask.new('test') do |task| + task.spec_files = FileList['spec/**/*.rb'] + task.spec_opts = %w{ -c -fs } +end + + +### Task: generate ctags +### This assumes exuberant ctags, since ctags 'native' doesn't support ruby anyway. +### +desc "Generate a ctags 'tags' file from Chunker source" +task :ctags do + sh "ctags -R #{LIBDIR}" +end + + +### Task: Create gem from source +### +gem = Gem::Specification.new do |gem| +end + +Rake::GemPackageTask.new( gem ) do |pkg| + pkg.need_zip = true + pkg.need_tar = true +end + + + +__END__ + + spec = Gem::Specification.new do |s| + s.platform = Gem::Platform::RUBY + s.summary = "Ruby based make-like utility." + s.name = 'rake' + s.version = PKG_VERSION + s.requirements << 'none' + s.require_path = 'lib' + s.autorequire = 'rake' + s.files = PKG_FILES + s.description = <<EOF + Rake is a Make-like program implemented in Ruby. Tasks + and dependencies are specified in standard Ruby syntax. + EOF + end + + Rake::GemPackageTask.new(spec) do |pkg| + pkg.need_zip = true + pkg.need_tar = true + end + + + + + + +require 'rake/packagetask' +require 'rake/gempackagetask' + +### Task: gem +gemspec = Gem::Specification.new do |gem| + pkg_build = get_svn_rev( BASEDIR ) || 0 + + gem.name = PKG_NAME + gem.version = "%s.%s" % [ PKG_VERSION, pkg_build ] + + gem.summary = "ThingFish - A highly-accessable network datastore" + gem.description = "ThingFish is a network-accessable, searchable, extensible " + + "datastore. It can be used to store chunks of data on the " + + "network in an application-independent way, associate the chunks " + + "with other chunks through metadata, and then search for the chunk " + + "you need later and fetch it again, all through a REST API over HTTP." + + gem.authors = "Michael Granger and Mahlon E. Smith" + gem.email = "mgranger@laika.com, mahlon@laika.com" + gem.homepage = "http://opensource.laika.com/wiki/ThingFish" + + gem.rubyforge_project = 'laika' + + gem.has_rdoc = true + + gem.files = RELEASE_FILES. + collect {|f| f.relative_path_from(BASEDIR).to_s } + gem.test_files = SPEC_FILES. + collect {|f| f.relative_path_from(BASEDIR).to_s } + gem.executables = BIN_FILES . + collect {|f| f.relative_path_from(BINDIR).to_s } + + gem.add_dependency( 'uuidtools', '>= 1.0.0' ) + gem.add_dependency( 'pluginfactory', '>= 1.0.3' ) +end +Rake::GemPackageTask.new( gemspec ) do |task| + task.gem_spec = gemspec + task.need_tar = false + task.need_tar_gz = true + task.need_tar_bz2 = true + task.need_zip = true +end + + +desc "Build the ThingFish gem and gems for all the standard plugins" +task :gems => [:gem] do + log "Building gems for plugins in: %s" % [PLUGINS.join(', ')] + PLUGINS.each do |plugindir| + log plugindir.basename + cp BASEDIR + 'LICENSE', plugindir + Dir.chdir( plugindir ) do + system 'rake', 'gem' + end + + fail unless $?.success? + + pkgdir = plugindir + 'pkg' + gems = Pathname.glob( pkgdir + '*.gem' ) + cp gems, PKGDIR + end +end + + +### Task: install +task :install_gem => [:package] do + $stderr.puts + installer = Gem::Installer.new( %{pkg/#{PKG_FILE_NAME}.gem} ) + installer.install +end + +### Task: uninstall +task :uninstall_gem => [:clean] do + uninstaller = Gem::Uninstaller.new( PKG_FILE_NAME ) + uninstaller.uninstall +end + + + + diff -r 83c0eed6db19 -r 9e127bf6e84f chunker/lib/chunker.rb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chunker/lib/chunker.rb Sat Nov 08 18:59:05 2008 +0000 @@ -0,0 +1,125 @@ +# +# Chunker! +# +# Mahlon E. Smith <mahlon@martini.nu> +# + + +### Namespace for the datablock parser. +### +module Chunker + + require 'strscan' + require 'stringio' + + # SVN Revision + # + SVNRev = %q$Rev$ + + # SVN Id + # + SVNId = %q$Id$ + + # Package version + # + VERSION = '0.1' + + + ### Parser class for __END__ data blocks. + ### Find each __MARKER__ within the __END__, and put each into a + ### DATA_MARKER constant within the namespace that included us. + ### + class DataParser + + # The mark for a DATA block. + # + END_MARKER = /^__END__\r?\n/ + + # The mark for a 'sub' block. + # + CHUNK_MARKER = /^__([A-Z\_0-9]+)__\r?\n/ + + + ### Constructor: Given a +klass+ and an +io+ to the class file, + ### extract the data blocks and install constants. + ### + def initialize( klass, io ) + io.open if io.closed? + end_string = io.read.split( END_MARKER, 2 ).last + + @klass = klass + @scanner = StringScanner.new( end_string ) + io.close + + if @scanner.check_until( CHUNK_MARKER ) + # put each chunk into its own constant + self.extract_blocks + else + # no sub blocks, put the whole mess into DATA_END + @klass.const_set( :DATA_END, StringIO.new( end_string ) ) + end + end + + + ######### + protected + ######### + + ### Parse the current +io+ for data blocks, set contents to + ### IO constants in the including class. + ### + def extract_blocks + label = nil + + while @scanner.scan_until( CHUNK_MARKER ) and ! @scanner.eos? + data = '' + + # First pass, __END__ contents (until next marker, instead + # of entire data block.) + # + if label.nil? + label = 'END' + data = @scanner.pre_match + + @scanner.pos = self.next_position + else + label = @scanner[1] + + if data = @scanner.scan_until( CHUNK_MARKER ) + # Pull the next marker text out of the data, set up the next pass + # + data = data[ 0, data.length - @scanner[0].length ] + @scanner.pos = self.next_position + else + # No additional blocks + # + data = @scanner.rest + end + end + + # Add the IO constant to the class that included me. + # + @klass.const_set( "DATA_#{label}".to_sym, StringIO.new( data ) ) + end + end + + + ### Return the next scanner position for searching. + ### + def next_position + return @scanner.pos - @scanner[0].length + end + end + + + ### Included hook: Find the file path for how we arrived here, and open + ### it as an IO object. __FILE__ won't work, so we find it via caller(). + ### Start parsing this file for data blocks. + ### + def self.included( klass ) + # klass.instance_eval{ __FILE__ } awww, nope. + io = File.open( caller(1).last.sub(/:.*?$/, ''), 'r' ) + DataParser.new( klass, io ) + end +end + diff -r 83c0eed6db19 -r 9e127bf6e84f chunker/spec/chunker_spec.rb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chunker/spec/chunker_spec.rb Sat Nov 08 18:59:05 2008 +0000 @@ -0,0 +1,117 @@ +#!/usr/bin/env ruby + +BEGIN { + require 'pathname' + basedir = Pathname.new( __FILE__ ).dirname.parent + libdir = basedir + "lib" + + $LOAD_PATH.unshift( libdir ) unless $LOAD_PATH.include?( libdir ) +} + +require 'chunker' +require 'rubygems' +require 'spec' + +ENDSTUFF = <<ENDSTUFF +Stuff within the end block. + +Content of the END block +Content of the END block +Content of the END block +Content of the END block +ENDSTUFF + +HURGADURGA = <<HURGADURGA + +Content of the HURGADURGA block +Content of the HURGADURGA block +Content of the HURGADURGA block +Content of the HURGADURGA block + +HURGADURGA + +HURRRRG = <<HURRRRG + 123123123 123123123 123123123 + 123123123 123123123 123123123 + 123123123 123123123 123123123 +HURRRRG + +POOP = <<POOP +Content of the POOP block +POOP + +FILE_TEXT = <<EO_FILE_TEXT + +This is stuff we shouldn't see or care about. +You know, stuff like code, presumably. + +__END__ +#{ENDSTUFF} +EO_FILE_TEXT + +FILE_TEXT_MULTIPLE = <<EO_FILE_TEXT + +This is stuff we shouldn't see or care about. +You know, stuff like code, presumably. + +__END__ +#{ENDSTUFF} +__POOP__ +#{POOP} +__HURRRRG__ +#{HURRRRG} +__HURGADURGA__ +#{HURGADURGA} +EO_FILE_TEXT + + +describe Chunker::DataParser do + + it "doesn't include content above the __END__ marker" do + klass = Class.new + dp = Chunker::DataParser.new( klass, StringIO.new( FILE_TEXT_MULTIPLE )) + dp.instance_variable_get( :@scanner ).string. + should_not =~ /This is stuff we shouldn't see/ + end + + it "doesn't contain the __END__ marker itself" do + klass = Class.new + dp = Chunker::DataParser.new( klass, StringIO.new( FILE_TEXT )) + dp.instance_variable_get( :@scanner ).string.should_not =~ /^__END__/ + end +end + + +describe 'A class that includes Chunker' do + + it "has all content in DATA_END if there are no sub blocks" do + File.stub!( :open ).and_return( StringIO.new( FILE_TEXT )) + klass = Class.new { include Chunker } + + klass.constants.should_not include( 'DATA_POOP' ) + klass.constants.should_not include( 'DATA_HURRRRG' ) + klass.constants.should_not include( 'DATA_HURGADURGA' ) + klass.constants.should include( 'DATA_END' ) + end + + it "separates data sub blocks into individual constants" do + File.stub!( :open ).and_return( StringIO.new( FILE_TEXT_MULTIPLE )) + klass = Class.new { include Chunker } + + klass.constants.should include( 'DATA_END' ) + klass.constants.should include( 'DATA_POOP' ) + klass.constants.should include( 'DATA_HURRRRG' ) + klass.constants.should include( 'DATA_HURGADURGA' ) + end + + it "has IO constants that contain the data block contents" do + File.stub!( :open ).and_return( StringIO.new( FILE_TEXT_MULTIPLE )) + klass = Class.new { include Chunker } + + klass.const_get( :DATA_END ).read.chomp.should == ENDSTUFF + klass.const_get( :DATA_POOP ).read.chomp.should == POOP + klass.const_get( :DATA_HURRRRG ).read.chomp.should == HURRRRG + klass.const_get( :DATA_HURGADURGA ).read.chomp.should == HURGADURGA + end +end +