chunker/lib/chunker.rb
author Mahlon E. Smith <mahlon@martini.nu>
Sat, 22 Jan 2011 01:20:16 -0800
branchruby-modules
changeset 5 3d9956beee25
parent 4 01a3332bfe0a
permissions -rw-r--r--
Bug metadata has been initialized.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
4
01a3332bfe0a Bump Chunker to 1.0.0 (release), updates for rspec 2.
Mahlon E. Smith <mahlon@martini.nu>
parents: 2
diff changeset
     1
# vim: set nosta noet ts=4 sw=4:
01a3332bfe0a Bump Chunker to 1.0.0 (release), updates for rspec 2.
Mahlon E. Smith <mahlon@martini.nu>
parents: 2
diff changeset
     2
01a3332bfe0a Bump Chunker to 1.0.0 (release), updates for rspec 2.
Mahlon E. Smith <mahlon@martini.nu>
parents: 2
diff changeset
     3
require 'strscan'
01a3332bfe0a Bump Chunker to 1.0.0 (release), updates for rspec 2.
Mahlon E. Smith <mahlon@martini.nu>
parents: 2
diff changeset
     4
require 'stringio'
01a3332bfe0a Bump Chunker to 1.0.0 (release), updates for rspec 2.
Mahlon E. Smith <mahlon@martini.nu>
parents: 2
diff changeset
     5
1
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
     6
#
2
e5c705047540 * Rename 'markers' to 'token'
mahlon
parents: 1
diff changeset
     7
# Chunker: A convenience library for parsing __END__ tokens consistently.
e5c705047540 * Rename 'markers' to 'token'
mahlon
parents: 1
diff changeset
     8
#
e5c705047540 * Rename 'markers' to 'token'
mahlon
parents: 1
diff changeset
     9
# == Version
1
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    10
#
2
e5c705047540 * Rename 'markers' to 'token'
mahlon
parents: 1
diff changeset
    11
#	$Id$
e5c705047540 * Rename 'markers' to 'token'
mahlon
parents: 1
diff changeset
    12
#
e5c705047540 * Rename 'markers' to 'token'
mahlon
parents: 1
diff changeset
    13
# == Author
1
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    14
#
2
e5c705047540 * Rename 'markers' to 'token'
mahlon
parents: 1
diff changeset
    15
# * Mahlon E. Smith <mahlon@martini.nu>
e5c705047540 * Rename 'markers' to 'token'
mahlon
parents: 1
diff changeset
    16
#
e5c705047540 * Rename 'markers' to 'token'
mahlon
parents: 1
diff changeset
    17
# :include: LICENSE
e5c705047540 * Rename 'markers' to 'token'
mahlon
parents: 1
diff changeset
    18
#
1
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    19
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    20
### Namespace for the datablock parser.
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    21
###
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    22
module Chunker
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    23
4
01a3332bfe0a Bump Chunker to 1.0.0 (release), updates for rspec 2.
Mahlon E. Smith <mahlon@martini.nu>
parents: 2
diff changeset
    24
	# VCS Revision
01a3332bfe0a Bump Chunker to 1.0.0 (release), updates for rspec 2.
Mahlon E. Smith <mahlon@martini.nu>
parents: 2
diff changeset
    25
	VCSRev = %q$Rev$
1
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    26
4
01a3332bfe0a Bump Chunker to 1.0.0 (release), updates for rspec 2.
Mahlon E. Smith <mahlon@martini.nu>
parents: 2
diff changeset
    27
	# VCS Id
01a3332bfe0a Bump Chunker to 1.0.0 (release), updates for rspec 2.
Mahlon E. Smith <mahlon@martini.nu>
parents: 2
diff changeset
    28
	VCSId = %q$Id$
1
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    29
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    30
	# Package version
4
01a3332bfe0a Bump Chunker to 1.0.0 (release), updates for rspec 2.
Mahlon E. Smith <mahlon@martini.nu>
parents: 2
diff changeset
    31
	VERSION = '1.0.0'
1
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    32
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    33
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    34
	### Parser class for __END__ data blocks.
2
e5c705047540 * Rename 'markers' to 'token'
mahlon
parents: 1
diff changeset
    35
	### Find each __TOKEN__ within the __END__, and put each into a
e5c705047540 * Rename 'markers' to 'token'
mahlon
parents: 1
diff changeset
    36
	### DATA_TOKEN constant within the namespace that included us.
1
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    37
	###
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    38
	class DataParser
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    39
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    40
		# The mark for a DATA block.
2
e5c705047540 * Rename 'markers' to 'token'
mahlon
parents: 1
diff changeset
    41
		END_TOKEN = /^__END__\r?\n/
1
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    42
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    43
		# The mark for a 'sub' block.
2
e5c705047540 * Rename 'markers' to 'token'
mahlon
parents: 1
diff changeset
    44
		CHUNK_TOKEN = /^__([A-Z\_0-9]+)__\r?\n/
1
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    45
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    46
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    47
		### Constructor: Given a +klass+ and an +io+ to the class file,
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    48
		### extract the data blocks and install constants.
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    49
		###
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    50
		def initialize( klass, io )
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    51
			io.open if io.closed?
2
e5c705047540 * Rename 'markers' to 'token'
mahlon
parents: 1
diff changeset
    52
			end_string = io.read.split( END_TOKEN, 2 ).last
1
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    53
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    54
			@klass   = klass
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    55
			@scanner = StringScanner.new( end_string )
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    56
			io.close
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    57
4
01a3332bfe0a Bump Chunker to 1.0.0 (release), updates for rspec 2.
Mahlon E. Smith <mahlon@martini.nu>
parents: 2
diff changeset
    58
			# put each chunk into its own constant
01a3332bfe0a Bump Chunker to 1.0.0 (release), updates for rspec 2.
Mahlon E. Smith <mahlon@martini.nu>
parents: 2
diff changeset
    59
			#
2
e5c705047540 * Rename 'markers' to 'token'
mahlon
parents: 1
diff changeset
    60
			if @scanner.check_until( CHUNK_TOKEN )
1
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    61
				self.extract_blocks
4
01a3332bfe0a Bump Chunker to 1.0.0 (release), updates for rspec 2.
Mahlon E. Smith <mahlon@martini.nu>
parents: 2
diff changeset
    62
01a3332bfe0a Bump Chunker to 1.0.0 (release), updates for rspec 2.
Mahlon E. Smith <mahlon@martini.nu>
parents: 2
diff changeset
    63
			# no sub blocks, put the whole mess into DATA_END
01a3332bfe0a Bump Chunker to 1.0.0 (release), updates for rspec 2.
Mahlon E. Smith <mahlon@martini.nu>
parents: 2
diff changeset
    64
			#
1
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    65
			else
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    66
				@klass.const_set( :DATA_END, StringIO.new( end_string ) )
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    67
			end
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    68
		end
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    69
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    70
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    71
		#########
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    72
		protected
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    73
		#########
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    74
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    75
		### Parse the current +io+ for data blocks, set contents to
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    76
		### IO constants in the including class.
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    77
		###
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    78
		def extract_blocks
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    79
			label = nil
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    80
2
e5c705047540 * Rename 'markers' to 'token'
mahlon
parents: 1
diff changeset
    81
			while @scanner.scan_until( CHUNK_TOKEN ) and ! @scanner.eos?
1
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    82
				data = ''
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    83
2
e5c705047540 * Rename 'markers' to 'token'
mahlon
parents: 1
diff changeset
    84
				# First pass, __END__ contents (until next token, instead
1
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    85
				# of entire data block.)
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    86
				#
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    87
				if label.nil?
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    88
					label = 'END'
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    89
					data  = @scanner.pre_match
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    90
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    91
					@scanner.pos = self.next_position
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    92
				else
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    93
					label = @scanner[1]
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    94
4
01a3332bfe0a Bump Chunker to 1.0.0 (release), updates for rspec 2.
Mahlon E. Smith <mahlon@martini.nu>
parents: 2
diff changeset
    95
					# Pull the next token text out of the data, set up the next pass
01a3332bfe0a Bump Chunker to 1.0.0 (release), updates for rspec 2.
Mahlon E. Smith <mahlon@martini.nu>
parents: 2
diff changeset
    96
					#
2
e5c705047540 * Rename 'markers' to 'token'
mahlon
parents: 1
diff changeset
    97
					if data = @scanner.scan_until( CHUNK_TOKEN )
4
01a3332bfe0a Bump Chunker to 1.0.0 (release), updates for rspec 2.
Mahlon E. Smith <mahlon@martini.nu>
parents: 2
diff changeset
    98
						data = data[ 0, data.length - @scanner[0].length ]
1
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
    99
						@scanner.pos = self.next_position
4
01a3332bfe0a Bump Chunker to 1.0.0 (release), updates for rspec 2.
Mahlon E. Smith <mahlon@martini.nu>
parents: 2
diff changeset
   100
01a3332bfe0a Bump Chunker to 1.0.0 (release), updates for rspec 2.
Mahlon E. Smith <mahlon@martini.nu>
parents: 2
diff changeset
   101
					# No additional blocks
01a3332bfe0a Bump Chunker to 1.0.0 (release), updates for rspec 2.
Mahlon E. Smith <mahlon@martini.nu>
parents: 2
diff changeset
   102
					#
1
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
   103
					else
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
   104
						data = @scanner.rest
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
   105
					end
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
   106
				end
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
   107
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
   108
				# Add the IO constant to the class that included me.
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
   109
				@klass.const_set( "DATA_#{label}".to_sym, StringIO.new( data ) )
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
   110
			end
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
   111
		end
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
   112
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
   113
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
   114
		### Return the next scanner position for searching.
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
   115
		###
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
   116
		def next_position
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
   117
			return @scanner.pos - @scanner[0].length
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
   118
		end
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
   119
	end
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
   120
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
   121
2
e5c705047540 * Rename 'markers' to 'token'
mahlon
parents: 1
diff changeset
   122
	### Hook included: Find the file path for how we arrived here, and open
e5c705047540 * Rename 'markers' to 'token'
mahlon
parents: 1
diff changeset
   123
	### it as an IO object.  Parse the IO for data block tokens.
1
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
   124
	###
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
   125
    def self.included( klass )
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
   126
		# klass.instance_eval{ __FILE__ }   awww, nope.
2
e5c705047540 * Rename 'markers' to 'token'
mahlon
parents: 1
diff changeset
   127
		# __FILE__ won't work here, so we find the filename via caller().
1
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
   128
		io = File.open( caller(1).last.sub(/:.*?$/, ''), 'r' )
2
e5c705047540 * Rename 'markers' to 'token'
mahlon
parents: 1
diff changeset
   129
1
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
   130
		DataParser.new( klass, io )
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
   131
    end
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
   132
end
9e127bf6e84f Initial commit of chunker, a ruby module to aid with data blocks.
mahlon
parents:
diff changeset
   133