Initial commit of chunker, a ruby module to aid with data blocks.
This commit is contained in:
parent
3654007126
commit
f4051c5a35
4 changed files with 451 additions and 0 deletions
7
chunker/README
Normal file
7
chunker/README
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
|
||||
The DATA constant
|
||||
|
||||
The problem
|
||||
|
||||
A workaround
|
||||
|
||||
202
chunker/Rakefile
Normal file
202
chunker/Rakefile
Normal file
|
|
@ -0,0 +1,202 @@
|
|||
#!/usr/bin/env rake
|
||||
#
|
||||
# Chunker Rakefile
|
||||
#
|
||||
|
||||
require 'rubygems'
|
||||
require 'pathname'
|
||||
|
||||
require 'rake'
|
||||
require 'rake/gempackagetask'
|
||||
require 'spec/rake/spectask'
|
||||
|
||||
|
||||
######################################################################
|
||||
### P A T H S
|
||||
######################################################################
|
||||
|
||||
BASEDIR = Pathname.new( __FILE__ ).expand_path.dirname.relative_path_from( Pathname.getwd )
|
||||
SPECDIR = BASEDIR + 'spec'
|
||||
LIBDIR = BASEDIR + 'lib'
|
||||
SPEC_FILES = Pathname.glob( SPECDIR + '**/*_spec.rb' ).reject {|f| f =~ /^\.svn/ }
|
||||
|
||||
######################################################################
|
||||
### H E L P E R S
|
||||
######################################################################
|
||||
|
||||
### Given a +file+ path, find the first captured match of +pattern+,
|
||||
### or the string 'UNKNOWN' if not found. (easy to notice something is wrong.)
|
||||
###
|
||||
def find_pattern( file, pattern )
|
||||
ver = nil
|
||||
File.open( file ) do |f|
|
||||
ver = f.each do |line|
|
||||
break $1 if line =~ pattern
|
||||
end
|
||||
end
|
||||
return ver.is_a?( String ) ? ver : 'UNKNOWN'
|
||||
end
|
||||
|
||||
######################################################################
|
||||
### P A C K A G E C O N S T A N T S
|
||||
######################################################################
|
||||
|
||||
PKG_NAME = 'chunker'
|
||||
PKG_VERSION = find_pattern( LIBDIR + 'chunker.rb', /VERSION = ['"](\d\.\d(?:\/\d)?)['"]/ )
|
||||
PKG_REVISION = find_pattern( LIBDIR + 'chunker.rb', /SVNRev = .+Rev: (\d+)/ )
|
||||
PKG_VERSION = begin
|
||||
ver = nil
|
||||
File.open( LIBDIR + 'chunker.rb' ) do |f|
|
||||
ver = f.each do |line|
|
||||
break $1 if line =~ /VERSION = ['"](\d\.\d(?:\/\d)?)['"]/
|
||||
end
|
||||
end
|
||||
ver.is_a?( String ) ? ver : 'UNKNOWN'
|
||||
end
|
||||
RELEASE_NAME = "REL #{PKG_VERSION}"
|
||||
PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"
|
||||
|
||||
|
||||
######################################################################
|
||||
### T A S K S
|
||||
######################################################################
|
||||
|
||||
task :default => [:test]
|
||||
|
||||
|
||||
### Task: run rspec tests
|
||||
###
|
||||
desc "Run tests"
|
||||
Spec::Rake::SpecTask.new('test') do |task|
|
||||
task.spec_files = FileList['spec/**/*.rb']
|
||||
task.spec_opts = %w{ -c -fs }
|
||||
end
|
||||
|
||||
|
||||
### Task: generate ctags
|
||||
### This assumes exuberant ctags, since ctags 'native' doesn't support ruby anyway.
|
||||
###
|
||||
desc "Generate a ctags 'tags' file from Chunker source"
|
||||
task :ctags do
|
||||
sh "ctags -R #{LIBDIR}"
|
||||
end
|
||||
|
||||
|
||||
### Task: Create gem from source
|
||||
###
|
||||
gem = Gem::Specification.new do |gem|
|
||||
end
|
||||
|
||||
Rake::GemPackageTask.new( gem ) do |pkg|
|
||||
pkg.need_zip = true
|
||||
pkg.need_tar = true
|
||||
end
|
||||
|
||||
|
||||
|
||||
__END__
|
||||
|
||||
spec = Gem::Specification.new do |s|
|
||||
s.platform = Gem::Platform::RUBY
|
||||
s.summary = "Ruby based make-like utility."
|
||||
s.name = 'rake'
|
||||
s.version = PKG_VERSION
|
||||
s.requirements << 'none'
|
||||
s.require_path = 'lib'
|
||||
s.autorequire = 'rake'
|
||||
s.files = PKG_FILES
|
||||
s.description = <<EOF
|
||||
Rake is a Make-like program implemented in Ruby. Tasks
|
||||
and dependencies are specified in standard Ruby syntax.
|
||||
EOF
|
||||
end
|
||||
|
||||
Rake::GemPackageTask.new(spec) do |pkg|
|
||||
pkg.need_zip = true
|
||||
pkg.need_tar = true
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
require 'rake/packagetask'
|
||||
require 'rake/gempackagetask'
|
||||
|
||||
### Task: gem
|
||||
gemspec = Gem::Specification.new do |gem|
|
||||
pkg_build = get_svn_rev( BASEDIR ) || 0
|
||||
|
||||
gem.name = PKG_NAME
|
||||
gem.version = "%s.%s" % [ PKG_VERSION, pkg_build ]
|
||||
|
||||
gem.summary = "ThingFish - A highly-accessable network datastore"
|
||||
gem.description = "ThingFish is a network-accessable, searchable, extensible " +
|
||||
"datastore. It can be used to store chunks of data on the " +
|
||||
"network in an application-independent way, associate the chunks " +
|
||||
"with other chunks through metadata, and then search for the chunk " +
|
||||
"you need later and fetch it again, all through a REST API over HTTP."
|
||||
|
||||
gem.authors = "Michael Granger and Mahlon E. Smith"
|
||||
gem.email = "mgranger@laika.com, mahlon@laika.com"
|
||||
gem.homepage = "http://opensource.laika.com/wiki/ThingFish"
|
||||
|
||||
gem.rubyforge_project = 'laika'
|
||||
|
||||
gem.has_rdoc = true
|
||||
|
||||
gem.files = RELEASE_FILES.
|
||||
collect {|f| f.relative_path_from(BASEDIR).to_s }
|
||||
gem.test_files = SPEC_FILES.
|
||||
collect {|f| f.relative_path_from(BASEDIR).to_s }
|
||||
gem.executables = BIN_FILES .
|
||||
collect {|f| f.relative_path_from(BINDIR).to_s }
|
||||
|
||||
gem.add_dependency( 'uuidtools', '>= 1.0.0' )
|
||||
gem.add_dependency( 'pluginfactory', '>= 1.0.3' )
|
||||
end
|
||||
Rake::GemPackageTask.new( gemspec ) do |task|
|
||||
task.gem_spec = gemspec
|
||||
task.need_tar = false
|
||||
task.need_tar_gz = true
|
||||
task.need_tar_bz2 = true
|
||||
task.need_zip = true
|
||||
end
|
||||
|
||||
|
||||
desc "Build the ThingFish gem and gems for all the standard plugins"
|
||||
task :gems => [:gem] do
|
||||
log "Building gems for plugins in: %s" % [PLUGINS.join(', ')]
|
||||
PLUGINS.each do |plugindir|
|
||||
log plugindir.basename
|
||||
cp BASEDIR + 'LICENSE', plugindir
|
||||
Dir.chdir( plugindir ) do
|
||||
system 'rake', 'gem'
|
||||
end
|
||||
|
||||
fail unless $?.success?
|
||||
|
||||
pkgdir = plugindir + 'pkg'
|
||||
gems = Pathname.glob( pkgdir + '*.gem' )
|
||||
cp gems, PKGDIR
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
### Task: install
|
||||
task :install_gem => [:package] do
|
||||
$stderr.puts
|
||||
installer = Gem::Installer.new( %{pkg/#{PKG_FILE_NAME}.gem} )
|
||||
installer.install
|
||||
end
|
||||
|
||||
### Task: uninstall
|
||||
task :uninstall_gem => [:clean] do
|
||||
uninstaller = Gem::Uninstaller.new( PKG_FILE_NAME )
|
||||
uninstaller.uninstall
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
125
chunker/lib/chunker.rb
Normal file
125
chunker/lib/chunker.rb
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
#
|
||||
# Chunker!
|
||||
#
|
||||
# Mahlon E. Smith <mahlon@martini.nu>
|
||||
#
|
||||
|
||||
|
||||
### Namespace for the datablock parser.
|
||||
###
|
||||
module Chunker
|
||||
|
||||
require 'strscan'
|
||||
require 'stringio'
|
||||
|
||||
# SVN Revision
|
||||
#
|
||||
SVNRev = %q$Rev$
|
||||
|
||||
# SVN Id
|
||||
#
|
||||
SVNId = %q$Id$
|
||||
|
||||
# Package version
|
||||
#
|
||||
VERSION = '0.1'
|
||||
|
||||
|
||||
### Parser class for __END__ data blocks.
|
||||
### Find each __MARKER__ within the __END__, and put each into a
|
||||
### DATA_MARKER constant within the namespace that included us.
|
||||
###
|
||||
class DataParser
|
||||
|
||||
# The mark for a DATA block.
|
||||
#
|
||||
END_MARKER = /^__END__\r?\n/
|
||||
|
||||
# The mark for a 'sub' block.
|
||||
#
|
||||
CHUNK_MARKER = /^__([A-Z\_0-9]+)__\r?\n/
|
||||
|
||||
|
||||
### Constructor: Given a +klass+ and an +io+ to the class file,
|
||||
### extract the data blocks and install constants.
|
||||
###
|
||||
def initialize( klass, io )
|
||||
io.open if io.closed?
|
||||
end_string = io.read.split( END_MARKER, 2 ).last
|
||||
|
||||
@klass = klass
|
||||
@scanner = StringScanner.new( end_string )
|
||||
io.close
|
||||
|
||||
if @scanner.check_until( CHUNK_MARKER )
|
||||
# put each chunk into its own constant
|
||||
self.extract_blocks
|
||||
else
|
||||
# no sub blocks, put the whole mess into DATA_END
|
||||
@klass.const_set( :DATA_END, StringIO.new( end_string ) )
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
#########
|
||||
protected
|
||||
#########
|
||||
|
||||
### Parse the current +io+ for data blocks, set contents to
|
||||
### IO constants in the including class.
|
||||
###
|
||||
def extract_blocks
|
||||
label = nil
|
||||
|
||||
while @scanner.scan_until( CHUNK_MARKER ) and ! @scanner.eos?
|
||||
data = ''
|
||||
|
||||
# First pass, __END__ contents (until next marker, instead
|
||||
# of entire data block.)
|
||||
#
|
||||
if label.nil?
|
||||
label = 'END'
|
||||
data = @scanner.pre_match
|
||||
|
||||
@scanner.pos = self.next_position
|
||||
else
|
||||
label = @scanner[1]
|
||||
|
||||
if data = @scanner.scan_until( CHUNK_MARKER )
|
||||
# Pull the next marker text out of the data, set up the next pass
|
||||
#
|
||||
data = data[ 0, data.length - @scanner[0].length ]
|
||||
@scanner.pos = self.next_position
|
||||
else
|
||||
# No additional blocks
|
||||
#
|
||||
data = @scanner.rest
|
||||
end
|
||||
end
|
||||
|
||||
# Add the IO constant to the class that included me.
|
||||
#
|
||||
@klass.const_set( "DATA_#{label}".to_sym, StringIO.new( data ) )
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
### Return the next scanner position for searching.
|
||||
###
|
||||
def next_position
|
||||
return @scanner.pos - @scanner[0].length
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
### Included hook: Find the file path for how we arrived here, and open
|
||||
### it as an IO object. __FILE__ won't work, so we find it via caller().
|
||||
### Start parsing this file for data blocks.
|
||||
###
|
||||
def self.included( klass )
|
||||
# klass.instance_eval{ __FILE__ } awww, nope.
|
||||
io = File.open( caller(1).last.sub(/:.*?$/, ''), 'r' )
|
||||
DataParser.new( klass, io )
|
||||
end
|
||||
end
|
||||
|
||||
117
chunker/spec/chunker_spec.rb
Normal file
117
chunker/spec/chunker_spec.rb
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
#!/usr/bin/env ruby
|
||||
|
||||
BEGIN {
|
||||
require 'pathname'
|
||||
basedir = Pathname.new( __FILE__ ).dirname.parent
|
||||
libdir = basedir + "lib"
|
||||
|
||||
$LOAD_PATH.unshift( libdir ) unless $LOAD_PATH.include?( libdir )
|
||||
}
|
||||
|
||||
require 'chunker'
|
||||
require 'rubygems'
|
||||
require 'spec'
|
||||
|
||||
ENDSTUFF = <<ENDSTUFF
|
||||
Stuff within the end block.
|
||||
|
||||
Content of the END block
|
||||
Content of the END block
|
||||
Content of the END block
|
||||
Content of the END block
|
||||
ENDSTUFF
|
||||
|
||||
HURGADURGA = <<HURGADURGA
|
||||
|
||||
Content of the HURGADURGA block
|
||||
Content of the HURGADURGA block
|
||||
Content of the HURGADURGA block
|
||||
Content of the HURGADURGA block
|
||||
|
||||
HURGADURGA
|
||||
|
||||
HURRRRG = <<HURRRRG
|
||||
123123123 123123123 123123123
|
||||
123123123 123123123 123123123
|
||||
123123123 123123123 123123123
|
||||
HURRRRG
|
||||
|
||||
POOP = <<POOP
|
||||
Content of the POOP block
|
||||
POOP
|
||||
|
||||
FILE_TEXT = <<EO_FILE_TEXT
|
||||
|
||||
This is stuff we shouldn't see or care about.
|
||||
You know, stuff like code, presumably.
|
||||
|
||||
__END__
|
||||
#{ENDSTUFF}
|
||||
EO_FILE_TEXT
|
||||
|
||||
FILE_TEXT_MULTIPLE = <<EO_FILE_TEXT
|
||||
|
||||
This is stuff we shouldn't see or care about.
|
||||
You know, stuff like code, presumably.
|
||||
|
||||
__END__
|
||||
#{ENDSTUFF}
|
||||
__POOP__
|
||||
#{POOP}
|
||||
__HURRRRG__
|
||||
#{HURRRRG}
|
||||
__HURGADURGA__
|
||||
#{HURGADURGA}
|
||||
EO_FILE_TEXT
|
||||
|
||||
|
||||
describe Chunker::DataParser do
|
||||
|
||||
it "doesn't include content above the __END__ marker" do
|
||||
klass = Class.new
|
||||
dp = Chunker::DataParser.new( klass, StringIO.new( FILE_TEXT_MULTIPLE ))
|
||||
dp.instance_variable_get( :@scanner ).string.
|
||||
should_not =~ /This is stuff we shouldn't see/
|
||||
end
|
||||
|
||||
it "doesn't contain the __END__ marker itself" do
|
||||
klass = Class.new
|
||||
dp = Chunker::DataParser.new( klass, StringIO.new( FILE_TEXT ))
|
||||
dp.instance_variable_get( :@scanner ).string.should_not =~ /^__END__/
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
describe 'A class that includes Chunker' do
|
||||
|
||||
it "has all content in DATA_END if there are no sub blocks" do
|
||||
File.stub!( :open ).and_return( StringIO.new( FILE_TEXT ))
|
||||
klass = Class.new { include Chunker }
|
||||
|
||||
klass.constants.should_not include( 'DATA_POOP' )
|
||||
klass.constants.should_not include( 'DATA_HURRRRG' )
|
||||
klass.constants.should_not include( 'DATA_HURGADURGA' )
|
||||
klass.constants.should include( 'DATA_END' )
|
||||
end
|
||||
|
||||
it "separates data sub blocks into individual constants" do
|
||||
File.stub!( :open ).and_return( StringIO.new( FILE_TEXT_MULTIPLE ))
|
||||
klass = Class.new { include Chunker }
|
||||
|
||||
klass.constants.should include( 'DATA_END' )
|
||||
klass.constants.should include( 'DATA_POOP' )
|
||||
klass.constants.should include( 'DATA_HURRRRG' )
|
||||
klass.constants.should include( 'DATA_HURGADURGA' )
|
||||
end
|
||||
|
||||
it "has IO constants that contain the data block contents" do
|
||||
File.stub!( :open ).and_return( StringIO.new( FILE_TEXT_MULTIPLE ))
|
||||
klass = Class.new { include Chunker }
|
||||
|
||||
klass.const_get( :DATA_END ).read.chomp.should == ENDSTUFF
|
||||
klass.const_get( :DATA_POOP ).read.chomp.should == POOP
|
||||
klass.const_get( :DATA_HURRRRG ).read.chomp.should == HURRRRG
|
||||
klass.const_get( :DATA_HURGADURGA ).read.chomp.should == HURGADURGA
|
||||
end
|
||||
end
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue