Initial commit of chunker, a ruby module to aid with data blocks.
This commit is contained in:
parent
3654007126
commit
f4051c5a35
4 changed files with 451 additions and 0 deletions
7
chunker/README
Normal file
7
chunker/README
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
|
||||||
|
The DATA constant
|
||||||
|
|
||||||
|
The problem
|
||||||
|
|
||||||
|
A workaround
|
||||||
|
|
||||||
202
chunker/Rakefile
Normal file
202
chunker/Rakefile
Normal file
|
|
@ -0,0 +1,202 @@
|
||||||
|
#!/usr/bin/env rake
|
||||||
|
#
|
||||||
|
# Chunker Rakefile
|
||||||
|
#
|
||||||
|
|
||||||
|
require 'rubygems'
|
||||||
|
require 'pathname'
|
||||||
|
|
||||||
|
require 'rake'
|
||||||
|
require 'rake/gempackagetask'
|
||||||
|
require 'spec/rake/spectask'
|
||||||
|
|
||||||
|
|
||||||
|
######################################################################
|
||||||
|
### P A T H S
|
||||||
|
######################################################################
|
||||||
|
|
||||||
|
BASEDIR = Pathname.new( __FILE__ ).expand_path.dirname.relative_path_from( Pathname.getwd )
|
||||||
|
SPECDIR = BASEDIR + 'spec'
|
||||||
|
LIBDIR = BASEDIR + 'lib'
|
||||||
|
SPEC_FILES = Pathname.glob( SPECDIR + '**/*_spec.rb' ).reject {|f| f =~ /^\.svn/ }
|
||||||
|
|
||||||
|
######################################################################
|
||||||
|
### H E L P E R S
|
||||||
|
######################################################################
|
||||||
|
|
||||||
|
### Given a +file+ path, find the first captured match of +pattern+,
|
||||||
|
### or the string 'UNKNOWN' if not found. (easy to notice something is wrong.)
|
||||||
|
###
|
||||||
|
def find_pattern( file, pattern )
|
||||||
|
ver = nil
|
||||||
|
File.open( file ) do |f|
|
||||||
|
ver = f.each do |line|
|
||||||
|
break $1 if line =~ pattern
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return ver.is_a?( String ) ? ver : 'UNKNOWN'
|
||||||
|
end
|
||||||
|
|
||||||
|
######################################################################
|
||||||
|
### P A C K A G E C O N S T A N T S
|
||||||
|
######################################################################
|
||||||
|
|
||||||
|
PKG_NAME = 'chunker'
|
||||||
|
PKG_VERSION = find_pattern( LIBDIR + 'chunker.rb', /VERSION = ['"](\d\.\d(?:\/\d)?)['"]/ )
|
||||||
|
PKG_REVISION = find_pattern( LIBDIR + 'chunker.rb', /SVNRev = .+Rev: (\d+)/ )
|
||||||
|
PKG_VERSION = begin
|
||||||
|
ver = nil
|
||||||
|
File.open( LIBDIR + 'chunker.rb' ) do |f|
|
||||||
|
ver = f.each do |line|
|
||||||
|
break $1 if line =~ /VERSION = ['"](\d\.\d(?:\/\d)?)['"]/
|
||||||
|
end
|
||||||
|
end
|
||||||
|
ver.is_a?( String ) ? ver : 'UNKNOWN'
|
||||||
|
end
|
||||||
|
RELEASE_NAME = "REL #{PKG_VERSION}"
|
||||||
|
PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"
|
||||||
|
|
||||||
|
|
||||||
|
######################################################################
|
||||||
|
### T A S K S
|
||||||
|
######################################################################
|
||||||
|
|
||||||
|
task :default => [:test]
|
||||||
|
|
||||||
|
|
||||||
|
### Task: run rspec tests
|
||||||
|
###
|
||||||
|
desc "Run tests"
|
||||||
|
Spec::Rake::SpecTask.new('test') do |task|
|
||||||
|
task.spec_files = FileList['spec/**/*.rb']
|
||||||
|
task.spec_opts = %w{ -c -fs }
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
### Task: generate ctags
|
||||||
|
### This assumes exuberant ctags, since ctags 'native' doesn't support ruby anyway.
|
||||||
|
###
|
||||||
|
desc "Generate a ctags 'tags' file from Chunker source"
|
||||||
|
task :ctags do
|
||||||
|
sh "ctags -R #{LIBDIR}"
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
### Task: Create gem from source
|
||||||
|
###
|
||||||
|
gem = Gem::Specification.new do |gem|
|
||||||
|
end
|
||||||
|
|
||||||
|
Rake::GemPackageTask.new( gem ) do |pkg|
|
||||||
|
pkg.need_zip = true
|
||||||
|
pkg.need_tar = true
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
__END__
|
||||||
|
|
||||||
|
spec = Gem::Specification.new do |s|
|
||||||
|
s.platform = Gem::Platform::RUBY
|
||||||
|
s.summary = "Ruby based make-like utility."
|
||||||
|
s.name = 'rake'
|
||||||
|
s.version = PKG_VERSION
|
||||||
|
s.requirements << 'none'
|
||||||
|
s.require_path = 'lib'
|
||||||
|
s.autorequire = 'rake'
|
||||||
|
s.files = PKG_FILES
|
||||||
|
s.description = <<EOF
|
||||||
|
Rake is a Make-like program implemented in Ruby. Tasks
|
||||||
|
and dependencies are specified in standard Ruby syntax.
|
||||||
|
EOF
|
||||||
|
end
|
||||||
|
|
||||||
|
Rake::GemPackageTask.new(spec) do |pkg|
|
||||||
|
pkg.need_zip = true
|
||||||
|
pkg.need_tar = true
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
require 'rake/packagetask'
|
||||||
|
require 'rake/gempackagetask'
|
||||||
|
|
||||||
|
### Task: gem
|
||||||
|
gemspec = Gem::Specification.new do |gem|
|
||||||
|
pkg_build = get_svn_rev( BASEDIR ) || 0
|
||||||
|
|
||||||
|
gem.name = PKG_NAME
|
||||||
|
gem.version = "%s.%s" % [ PKG_VERSION, pkg_build ]
|
||||||
|
|
||||||
|
gem.summary = "ThingFish - A highly-accessable network datastore"
|
||||||
|
gem.description = "ThingFish is a network-accessable, searchable, extensible " +
|
||||||
|
"datastore. It can be used to store chunks of data on the " +
|
||||||
|
"network in an application-independent way, associate the chunks " +
|
||||||
|
"with other chunks through metadata, and then search for the chunk " +
|
||||||
|
"you need later and fetch it again, all through a REST API over HTTP."
|
||||||
|
|
||||||
|
gem.authors = "Michael Granger and Mahlon E. Smith"
|
||||||
|
gem.email = "mgranger@laika.com, mahlon@laika.com"
|
||||||
|
gem.homepage = "http://opensource.laika.com/wiki/ThingFish"
|
||||||
|
|
||||||
|
gem.rubyforge_project = 'laika'
|
||||||
|
|
||||||
|
gem.has_rdoc = true
|
||||||
|
|
||||||
|
gem.files = RELEASE_FILES.
|
||||||
|
collect {|f| f.relative_path_from(BASEDIR).to_s }
|
||||||
|
gem.test_files = SPEC_FILES.
|
||||||
|
collect {|f| f.relative_path_from(BASEDIR).to_s }
|
||||||
|
gem.executables = BIN_FILES .
|
||||||
|
collect {|f| f.relative_path_from(BINDIR).to_s }
|
||||||
|
|
||||||
|
gem.add_dependency( 'uuidtools', '>= 1.0.0' )
|
||||||
|
gem.add_dependency( 'pluginfactory', '>= 1.0.3' )
|
||||||
|
end
|
||||||
|
Rake::GemPackageTask.new( gemspec ) do |task|
|
||||||
|
task.gem_spec = gemspec
|
||||||
|
task.need_tar = false
|
||||||
|
task.need_tar_gz = true
|
||||||
|
task.need_tar_bz2 = true
|
||||||
|
task.need_zip = true
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
desc "Build the ThingFish gem and gems for all the standard plugins"
|
||||||
|
task :gems => [:gem] do
|
||||||
|
log "Building gems for plugins in: %s" % [PLUGINS.join(', ')]
|
||||||
|
PLUGINS.each do |plugindir|
|
||||||
|
log plugindir.basename
|
||||||
|
cp BASEDIR + 'LICENSE', plugindir
|
||||||
|
Dir.chdir( plugindir ) do
|
||||||
|
system 'rake', 'gem'
|
||||||
|
end
|
||||||
|
|
||||||
|
fail unless $?.success?
|
||||||
|
|
||||||
|
pkgdir = plugindir + 'pkg'
|
||||||
|
gems = Pathname.glob( pkgdir + '*.gem' )
|
||||||
|
cp gems, PKGDIR
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
### Task: install
|
||||||
|
task :install_gem => [:package] do
|
||||||
|
$stderr.puts
|
||||||
|
installer = Gem::Installer.new( %{pkg/#{PKG_FILE_NAME}.gem} )
|
||||||
|
installer.install
|
||||||
|
end
|
||||||
|
|
||||||
|
### Task: uninstall
|
||||||
|
task :uninstall_gem => [:clean] do
|
||||||
|
uninstaller = Gem::Uninstaller.new( PKG_FILE_NAME )
|
||||||
|
uninstaller.uninstall
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
125
chunker/lib/chunker.rb
Normal file
125
chunker/lib/chunker.rb
Normal file
|
|
@ -0,0 +1,125 @@
|
||||||
|
#
|
||||||
|
# Chunker!
|
||||||
|
#
|
||||||
|
# Mahlon E. Smith <mahlon@martini.nu>
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
### Namespace for the datablock parser.
|
||||||
|
###
|
||||||
|
module Chunker
|
||||||
|
|
||||||
|
require 'strscan'
|
||||||
|
require 'stringio'
|
||||||
|
|
||||||
|
# SVN Revision
|
||||||
|
#
|
||||||
|
SVNRev = %q$Rev$
|
||||||
|
|
||||||
|
# SVN Id
|
||||||
|
#
|
||||||
|
SVNId = %q$Id$
|
||||||
|
|
||||||
|
# Package version
|
||||||
|
#
|
||||||
|
VERSION = '0.1'
|
||||||
|
|
||||||
|
|
||||||
|
### Parser class for __END__ data blocks.
|
||||||
|
### Find each __MARKER__ within the __END__, and put each into a
|
||||||
|
### DATA_MARKER constant within the namespace that included us.
|
||||||
|
###
|
||||||
|
class DataParser
|
||||||
|
|
||||||
|
# The mark for a DATA block.
|
||||||
|
#
|
||||||
|
END_MARKER = /^__END__\r?\n/
|
||||||
|
|
||||||
|
# The mark for a 'sub' block.
|
||||||
|
#
|
||||||
|
CHUNK_MARKER = /^__([A-Z\_0-9]+)__\r?\n/
|
||||||
|
|
||||||
|
|
||||||
|
### Constructor: Given a +klass+ and an +io+ to the class file,
|
||||||
|
### extract the data blocks and install constants.
|
||||||
|
###
|
||||||
|
def initialize( klass, io )
|
||||||
|
io.open if io.closed?
|
||||||
|
end_string = io.read.split( END_MARKER, 2 ).last
|
||||||
|
|
||||||
|
@klass = klass
|
||||||
|
@scanner = StringScanner.new( end_string )
|
||||||
|
io.close
|
||||||
|
|
||||||
|
if @scanner.check_until( CHUNK_MARKER )
|
||||||
|
# put each chunk into its own constant
|
||||||
|
self.extract_blocks
|
||||||
|
else
|
||||||
|
# no sub blocks, put the whole mess into DATA_END
|
||||||
|
@klass.const_set( :DATA_END, StringIO.new( end_string ) )
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
#########
|
||||||
|
protected
|
||||||
|
#########
|
||||||
|
|
||||||
|
### Parse the current +io+ for data blocks, set contents to
|
||||||
|
### IO constants in the including class.
|
||||||
|
###
|
||||||
|
def extract_blocks
|
||||||
|
label = nil
|
||||||
|
|
||||||
|
while @scanner.scan_until( CHUNK_MARKER ) and ! @scanner.eos?
|
||||||
|
data = ''
|
||||||
|
|
||||||
|
# First pass, __END__ contents (until next marker, instead
|
||||||
|
# of entire data block.)
|
||||||
|
#
|
||||||
|
if label.nil?
|
||||||
|
label = 'END'
|
||||||
|
data = @scanner.pre_match
|
||||||
|
|
||||||
|
@scanner.pos = self.next_position
|
||||||
|
else
|
||||||
|
label = @scanner[1]
|
||||||
|
|
||||||
|
if data = @scanner.scan_until( CHUNK_MARKER )
|
||||||
|
# Pull the next marker text out of the data, set up the next pass
|
||||||
|
#
|
||||||
|
data = data[ 0, data.length - @scanner[0].length ]
|
||||||
|
@scanner.pos = self.next_position
|
||||||
|
else
|
||||||
|
# No additional blocks
|
||||||
|
#
|
||||||
|
data = @scanner.rest
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Add the IO constant to the class that included me.
|
||||||
|
#
|
||||||
|
@klass.const_set( "DATA_#{label}".to_sym, StringIO.new( data ) )
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
### Return the next scanner position for searching.
|
||||||
|
###
|
||||||
|
def next_position
|
||||||
|
return @scanner.pos - @scanner[0].length
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
### Included hook: Find the file path for how we arrived here, and open
|
||||||
|
### it as an IO object. __FILE__ won't work, so we find it via caller().
|
||||||
|
### Start parsing this file for data blocks.
|
||||||
|
###
|
||||||
|
def self.included( klass )
|
||||||
|
# klass.instance_eval{ __FILE__ } awww, nope.
|
||||||
|
io = File.open( caller(1).last.sub(/:.*?$/, ''), 'r' )
|
||||||
|
DataParser.new( klass, io )
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
117
chunker/spec/chunker_spec.rb
Normal file
117
chunker/spec/chunker_spec.rb
Normal file
|
|
@ -0,0 +1,117 @@
|
||||||
|
#!/usr/bin/env ruby
|
||||||
|
|
||||||
|
BEGIN {
|
||||||
|
require 'pathname'
|
||||||
|
basedir = Pathname.new( __FILE__ ).dirname.parent
|
||||||
|
libdir = basedir + "lib"
|
||||||
|
|
||||||
|
$LOAD_PATH.unshift( libdir ) unless $LOAD_PATH.include?( libdir )
|
||||||
|
}
|
||||||
|
|
||||||
|
require 'chunker'
|
||||||
|
require 'rubygems'
|
||||||
|
require 'spec'
|
||||||
|
|
||||||
|
ENDSTUFF = <<ENDSTUFF
|
||||||
|
Stuff within the end block.
|
||||||
|
|
||||||
|
Content of the END block
|
||||||
|
Content of the END block
|
||||||
|
Content of the END block
|
||||||
|
Content of the END block
|
||||||
|
ENDSTUFF
|
||||||
|
|
||||||
|
HURGADURGA = <<HURGADURGA
|
||||||
|
|
||||||
|
Content of the HURGADURGA block
|
||||||
|
Content of the HURGADURGA block
|
||||||
|
Content of the HURGADURGA block
|
||||||
|
Content of the HURGADURGA block
|
||||||
|
|
||||||
|
HURGADURGA
|
||||||
|
|
||||||
|
HURRRRG = <<HURRRRG
|
||||||
|
123123123 123123123 123123123
|
||||||
|
123123123 123123123 123123123
|
||||||
|
123123123 123123123 123123123
|
||||||
|
HURRRRG
|
||||||
|
|
||||||
|
POOP = <<POOP
|
||||||
|
Content of the POOP block
|
||||||
|
POOP
|
||||||
|
|
||||||
|
FILE_TEXT = <<EO_FILE_TEXT
|
||||||
|
|
||||||
|
This is stuff we shouldn't see or care about.
|
||||||
|
You know, stuff like code, presumably.
|
||||||
|
|
||||||
|
__END__
|
||||||
|
#{ENDSTUFF}
|
||||||
|
EO_FILE_TEXT
|
||||||
|
|
||||||
|
FILE_TEXT_MULTIPLE = <<EO_FILE_TEXT
|
||||||
|
|
||||||
|
This is stuff we shouldn't see or care about.
|
||||||
|
You know, stuff like code, presumably.
|
||||||
|
|
||||||
|
__END__
|
||||||
|
#{ENDSTUFF}
|
||||||
|
__POOP__
|
||||||
|
#{POOP}
|
||||||
|
__HURRRRG__
|
||||||
|
#{HURRRRG}
|
||||||
|
__HURGADURGA__
|
||||||
|
#{HURGADURGA}
|
||||||
|
EO_FILE_TEXT
|
||||||
|
|
||||||
|
|
||||||
|
describe Chunker::DataParser do
|
||||||
|
|
||||||
|
it "doesn't include content above the __END__ marker" do
|
||||||
|
klass = Class.new
|
||||||
|
dp = Chunker::DataParser.new( klass, StringIO.new( FILE_TEXT_MULTIPLE ))
|
||||||
|
dp.instance_variable_get( :@scanner ).string.
|
||||||
|
should_not =~ /This is stuff we shouldn't see/
|
||||||
|
end
|
||||||
|
|
||||||
|
it "doesn't contain the __END__ marker itself" do
|
||||||
|
klass = Class.new
|
||||||
|
dp = Chunker::DataParser.new( klass, StringIO.new( FILE_TEXT ))
|
||||||
|
dp.instance_variable_get( :@scanner ).string.should_not =~ /^__END__/
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
describe 'A class that includes Chunker' do
|
||||||
|
|
||||||
|
it "has all content in DATA_END if there are no sub blocks" do
|
||||||
|
File.stub!( :open ).and_return( StringIO.new( FILE_TEXT ))
|
||||||
|
klass = Class.new { include Chunker }
|
||||||
|
|
||||||
|
klass.constants.should_not include( 'DATA_POOP' )
|
||||||
|
klass.constants.should_not include( 'DATA_HURRRRG' )
|
||||||
|
klass.constants.should_not include( 'DATA_HURGADURGA' )
|
||||||
|
klass.constants.should include( 'DATA_END' )
|
||||||
|
end
|
||||||
|
|
||||||
|
it "separates data sub blocks into individual constants" do
|
||||||
|
File.stub!( :open ).and_return( StringIO.new( FILE_TEXT_MULTIPLE ))
|
||||||
|
klass = Class.new { include Chunker }
|
||||||
|
|
||||||
|
klass.constants.should include( 'DATA_END' )
|
||||||
|
klass.constants.should include( 'DATA_POOP' )
|
||||||
|
klass.constants.should include( 'DATA_HURRRRG' )
|
||||||
|
klass.constants.should include( 'DATA_HURGADURGA' )
|
||||||
|
end
|
||||||
|
|
||||||
|
it "has IO constants that contain the data block contents" do
|
||||||
|
File.stub!( :open ).and_return( StringIO.new( FILE_TEXT_MULTIPLE ))
|
||||||
|
klass = Class.new { include Chunker }
|
||||||
|
|
||||||
|
klass.const_get( :DATA_END ).read.chomp.should == ENDSTUFF
|
||||||
|
klass.const_get( :DATA_POOP ).read.chomp.should == POOP
|
||||||
|
klass.const_get( :DATA_HURRRRG ).read.chomp.should == HURRRRG
|
||||||
|
klass.const_get( :DATA_HURGADURGA ).read.chomp.should == HURGADURGA
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue