Initial commit of chunker, a ruby module to aid with data blocks.

This commit is contained in:
mahlon 2008-11-08 18:59:05 +00:00
parent 3654007126
commit f4051c5a35
4 changed files with 451 additions and 0 deletions

7
chunker/README Normal file
View file

@ -0,0 +1,7 @@
The DATA constant
The problem
A workaround

202
chunker/Rakefile Normal file
View file

@ -0,0 +1,202 @@
#!/usr/bin/env rake
#
# Chunker Rakefile
#
require 'rubygems'
require 'pathname'
require 'rake'
require 'rake/gempackagetask'
require 'spec/rake/spectask'
######################################################################
### P A T H S
######################################################################
BASEDIR = Pathname.new( __FILE__ ).expand_path.dirname.relative_path_from( Pathname.getwd )
SPECDIR = BASEDIR + 'spec'
LIBDIR = BASEDIR + 'lib'
SPEC_FILES = Pathname.glob( SPECDIR + '**/*_spec.rb' ).reject {|f| f =~ /^\.svn/ }
######################################################################
### H E L P E R S
######################################################################
### Given a +file+ path, find the first captured match of +pattern+,
### or the string 'UNKNOWN' if not found. (easy to notice something is wrong.)
###
def find_pattern( file, pattern )
ver = nil
File.open( file ) do |f|
ver = f.each do |line|
break $1 if line =~ pattern
end
end
return ver.is_a?( String ) ? ver : 'UNKNOWN'
end
######################################################################
### P A C K A G E C O N S T A N T S
######################################################################
PKG_NAME = 'chunker'
PKG_VERSION = find_pattern( LIBDIR + 'chunker.rb', /VERSION = ['"](\d\.\d(?:\/\d)?)['"]/ )
PKG_REVISION = find_pattern( LIBDIR + 'chunker.rb', /SVNRev = .+Rev: (\d+)/ )
PKG_VERSION = begin
ver = nil
File.open( LIBDIR + 'chunker.rb' ) do |f|
ver = f.each do |line|
break $1 if line =~ /VERSION = ['"](\d\.\d(?:\/\d)?)['"]/
end
end
ver.is_a?( String ) ? ver : 'UNKNOWN'
end
RELEASE_NAME = "REL #{PKG_VERSION}"
PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"
######################################################################
### T A S K S
######################################################################
task :default => [:test]
### Task: run rspec tests
###
desc "Run tests"
Spec::Rake::SpecTask.new('test') do |task|
task.spec_files = FileList['spec/**/*.rb']
task.spec_opts = %w{ -c -fs }
end
### Task: generate ctags
### This assumes exuberant ctags, since ctags 'native' doesn't support ruby anyway.
###
desc "Generate a ctags 'tags' file from Chunker source"
task :ctags do
sh "ctags -R #{LIBDIR}"
end
### Task: Create gem from source
###
gem = Gem::Specification.new do |gem|
end
Rake::GemPackageTask.new( gem ) do |pkg|
pkg.need_zip = true
pkg.need_tar = true
end
__END__
spec = Gem::Specification.new do |s|
s.platform = Gem::Platform::RUBY
s.summary = "Ruby based make-like utility."
s.name = 'rake'
s.version = PKG_VERSION
s.requirements << 'none'
s.require_path = 'lib'
s.autorequire = 'rake'
s.files = PKG_FILES
s.description = <<EOF
Rake is a Make-like program implemented in Ruby. Tasks
and dependencies are specified in standard Ruby syntax.
EOF
end
Rake::GemPackageTask.new(spec) do |pkg|
pkg.need_zip = true
pkg.need_tar = true
end
require 'rake/packagetask'
require 'rake/gempackagetask'
### Task: gem
gemspec = Gem::Specification.new do |gem|
pkg_build = get_svn_rev( BASEDIR ) || 0
gem.name = PKG_NAME
gem.version = "%s.%s" % [ PKG_VERSION, pkg_build ]
gem.summary = "ThingFish - A highly-accessable network datastore"
gem.description = "ThingFish is a network-accessable, searchable, extensible " +
"datastore. It can be used to store chunks of data on the " +
"network in an application-independent way, associate the chunks " +
"with other chunks through metadata, and then search for the chunk " +
"you need later and fetch it again, all through a REST API over HTTP."
gem.authors = "Michael Granger and Mahlon E. Smith"
gem.email = "mgranger@laika.com, mahlon@laika.com"
gem.homepage = "http://opensource.laika.com/wiki/ThingFish"
gem.rubyforge_project = 'laika'
gem.has_rdoc = true
gem.files = RELEASE_FILES.
collect {|f| f.relative_path_from(BASEDIR).to_s }
gem.test_files = SPEC_FILES.
collect {|f| f.relative_path_from(BASEDIR).to_s }
gem.executables = BIN_FILES .
collect {|f| f.relative_path_from(BINDIR).to_s }
gem.add_dependency( 'uuidtools', '>= 1.0.0' )
gem.add_dependency( 'pluginfactory', '>= 1.0.3' )
end
Rake::GemPackageTask.new( gemspec ) do |task|
task.gem_spec = gemspec
task.need_tar = false
task.need_tar_gz = true
task.need_tar_bz2 = true
task.need_zip = true
end
desc "Build the ThingFish gem and gems for all the standard plugins"
task :gems => [:gem] do
log "Building gems for plugins in: %s" % [PLUGINS.join(', ')]
PLUGINS.each do |plugindir|
log plugindir.basename
cp BASEDIR + 'LICENSE', plugindir
Dir.chdir( plugindir ) do
system 'rake', 'gem'
end
fail unless $?.success?
pkgdir = plugindir + 'pkg'
gems = Pathname.glob( pkgdir + '*.gem' )
cp gems, PKGDIR
end
end
### Task: install
task :install_gem => [:package] do
$stderr.puts
installer = Gem::Installer.new( %{pkg/#{PKG_FILE_NAME}.gem} )
installer.install
end
### Task: uninstall
task :uninstall_gem => [:clean] do
uninstaller = Gem::Uninstaller.new( PKG_FILE_NAME )
uninstaller.uninstall
end

125
chunker/lib/chunker.rb Normal file
View file

@ -0,0 +1,125 @@
#
# Chunker!
#
# Mahlon E. Smith <mahlon@martini.nu>
#
### Namespace for the datablock parser.
###
module Chunker
require 'strscan'
require 'stringio'
# SVN Revision
#
SVNRev = %q$Rev$
# SVN Id
#
SVNId = %q$Id$
# Package version
#
VERSION = '0.1'
### Parser class for __END__ data blocks.
### Find each __MARKER__ within the __END__, and put each into a
### DATA_MARKER constant within the namespace that included us.
###
class DataParser
# The mark for a DATA block.
#
END_MARKER = /^__END__\r?\n/
# The mark for a 'sub' block.
#
CHUNK_MARKER = /^__([A-Z\_0-9]+)__\r?\n/
### Constructor: Given a +klass+ and an +io+ to the class file,
### extract the data blocks and install constants.
###
def initialize( klass, io )
io.open if io.closed?
end_string = io.read.split( END_MARKER, 2 ).last
@klass = klass
@scanner = StringScanner.new( end_string )
io.close
if @scanner.check_until( CHUNK_MARKER )
# put each chunk into its own constant
self.extract_blocks
else
# no sub blocks, put the whole mess into DATA_END
@klass.const_set( :DATA_END, StringIO.new( end_string ) )
end
end
#########
protected
#########
### Parse the current +io+ for data blocks, set contents to
### IO constants in the including class.
###
def extract_blocks
label = nil
while @scanner.scan_until( CHUNK_MARKER ) and ! @scanner.eos?
data = ''
# First pass, __END__ contents (until next marker, instead
# of entire data block.)
#
if label.nil?
label = 'END'
data = @scanner.pre_match
@scanner.pos = self.next_position
else
label = @scanner[1]
if data = @scanner.scan_until( CHUNK_MARKER )
# Pull the next marker text out of the data, set up the next pass
#
data = data[ 0, data.length - @scanner[0].length ]
@scanner.pos = self.next_position
else
# No additional blocks
#
data = @scanner.rest
end
end
# Add the IO constant to the class that included me.
#
@klass.const_set( "DATA_#{label}".to_sym, StringIO.new( data ) )
end
end
### Return the next scanner position for searching.
###
def next_position
return @scanner.pos - @scanner[0].length
end
end
### Included hook: Find the file path for how we arrived here, and open
### it as an IO object. __FILE__ won't work, so we find it via caller().
### Start parsing this file for data blocks.
###
def self.included( klass )
# klass.instance_eval{ __FILE__ } awww, nope.
io = File.open( caller(1).last.sub(/:.*?$/, ''), 'r' )
DataParser.new( klass, io )
end
end

View file

@ -0,0 +1,117 @@
#!/usr/bin/env ruby
BEGIN {
require 'pathname'
basedir = Pathname.new( __FILE__ ).dirname.parent
libdir = basedir + "lib"
$LOAD_PATH.unshift( libdir ) unless $LOAD_PATH.include?( libdir )
}
require 'chunker'
require 'rubygems'
require 'spec'
ENDSTUFF = <<ENDSTUFF
Stuff within the end block.
Content of the END block
Content of the END block
Content of the END block
Content of the END block
ENDSTUFF
HURGADURGA = <<HURGADURGA
Content of the HURGADURGA block
Content of the HURGADURGA block
Content of the HURGADURGA block
Content of the HURGADURGA block
HURGADURGA
HURRRRG = <<HURRRRG
123123123 123123123 123123123
123123123 123123123 123123123
123123123 123123123 123123123
HURRRRG
POOP = <<POOP
Content of the POOP block
POOP
FILE_TEXT = <<EO_FILE_TEXT
This is stuff we shouldn't see or care about.
You know, stuff like code, presumably.
__END__
#{ENDSTUFF}
EO_FILE_TEXT
FILE_TEXT_MULTIPLE = <<EO_FILE_TEXT
This is stuff we shouldn't see or care about.
You know, stuff like code, presumably.
__END__
#{ENDSTUFF}
__POOP__
#{POOP}
__HURRRRG__
#{HURRRRG}
__HURGADURGA__
#{HURGADURGA}
EO_FILE_TEXT
describe Chunker::DataParser do
it "doesn't include content above the __END__ marker" do
klass = Class.new
dp = Chunker::DataParser.new( klass, StringIO.new( FILE_TEXT_MULTIPLE ))
dp.instance_variable_get( :@scanner ).string.
should_not =~ /This is stuff we shouldn't see/
end
it "doesn't contain the __END__ marker itself" do
klass = Class.new
dp = Chunker::DataParser.new( klass, StringIO.new( FILE_TEXT ))
dp.instance_variable_get( :@scanner ).string.should_not =~ /^__END__/
end
end
describe 'A class that includes Chunker' do
it "has all content in DATA_END if there are no sub blocks" do
File.stub!( :open ).and_return( StringIO.new( FILE_TEXT ))
klass = Class.new { include Chunker }
klass.constants.should_not include( 'DATA_POOP' )
klass.constants.should_not include( 'DATA_HURRRRG' )
klass.constants.should_not include( 'DATA_HURGADURGA' )
klass.constants.should include( 'DATA_END' )
end
it "separates data sub blocks into individual constants" do
File.stub!( :open ).and_return( StringIO.new( FILE_TEXT_MULTIPLE ))
klass = Class.new { include Chunker }
klass.constants.should include( 'DATA_END' )
klass.constants.should include( 'DATA_POOP' )
klass.constants.should include( 'DATA_HURRRRG' )
klass.constants.should include( 'DATA_HURGADURGA' )
end
it "has IO constants that contain the data block contents" do
File.stub!( :open ).and_return( StringIO.new( FILE_TEXT_MULTIPLE ))
klass = Class.new { include Chunker }
klass.const_get( :DATA_END ).read.chomp.should == ENDSTUFF
klass.const_get( :DATA_POOP ).read.chomp.should == POOP
klass.const_get( :DATA_HURRRRG ).read.chomp.should == HURRRRG
klass.const_get( :DATA_HURGADURGA ).read.chomp.should == HURGADURGA
end
end