Get the ragel line parser properly tokenizing the input lines. Add a
authorMahlon E. Smith <mahlon@martini.nu>
Mon, 17 Oct 2011 09:12:00 -0700
changeset 10 d07309450285
parent 9 bdf20e6eefd7
child 11 9aa5114326e8
Get the ragel line parser properly tokenizing the input lines. Add a "lines per second" timer. General cleanup and memory management.
LICENSE
Makefile
README
accept_loop.c
db.c
main.c
parser.rl
process.c
sql/1.sql
util.c
volta.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/LICENSE	Mon Oct 17 09:12:00 2011 -0700
@@ -0,0 +1,28 @@
+
+Copyright (c) 2011, Mahlon E. Smith <mahlon@martini.nu>
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of Mahlon E. Smith nor the names of his
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
--- a/Makefile	Wed Sep 28 09:04:16 2011 -0700
+++ b/Makefile	Mon Oct 17 09:12:00 2011 -0700
@@ -1,7 +1,7 @@
 UNAME       := $(shell uname)
 DEPS         = sqlite3
 DEPS_DEBUG   = sqlite3 libprofiler
-CFLAGS       = $(shell pkg-config --cflags-only-I --libs-only-L $(DEPS)) -O2
+CFLAGS       = -O2 -ansi $(shell pkg-config --cflags-only-I --libs-only-L $(DEPS))
 LIBS         = $(shell pkg-config --libs-only-l $(DEPS))
 #OBJS         = $(patsubst %.c,%.o,$(wildcard *.c)) parser.o
 OBJS         = accept_loop.o db.o main.o parser.o process.o util.o
@@ -31,10 +31,10 @@
 # proftools doesn't currently register a .pc file on Ubuntu, hence these
 # Makefile gymnastics
 ifeq ($(UNAME), Linux)
-debug: CFLAGS = -ggdb -Wall -DDEBUG -DPROG='"volta (debugmode)"'
+debug: CFLAGS = -ggdb -ansi -Wall -DDEBUG -DPROG='"volta (debugmode)"'
 debug: LIBS = -lsqlite3 -lprofiler
 else
-debug: CFLAGS = -ggdb -Wall -DDEBUG -DPROG='"volta (debugmode)"'\
+debug: CFLAGS = -ggdb -ansi -Wall -DDEBUG -DPROG='"volta (debugmode)"'\
 	$(shell pkg-config --cflags-only-I --libs-only-L $(DEPS_DEBUG))
 debug: LIBS = $(shell pkg-config --libs-only-l $(DEPS_DEBUG))
 endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README	Mon Oct 17 09:12:00 2011 -0700
@@ -0,0 +1,8 @@
+
+Volta
+=====
+
+Why "volta"?
+Configuring squid
+Using volta
+
--- a/accept_loop.c	Wed Sep 28 09:04:16 2011 -0700
+++ b/accept_loop.c	Mon Oct 17 09:12:00 2011 -0700
@@ -31,7 +31,7 @@
 #include "volta.h"
 
 /*
- * Accept lines from squid and pass to the parser.
+ * Accept and process lines from squid.
  */
 int
 accept_loop( void )
@@ -47,7 +47,6 @@
 
 	debug( 1, LOC, "Waiting for input...\n" );
 	while ( fgets( buf, LINE_BUFSIZE, stdin ) != NULL ) {
-
 		bufsize = strlen( buf );
 
 		/* Common case, or last iteration of loop:
@@ -55,18 +54,17 @@
 		 */
 		if ( bufsize + 1 < LINE_BUFSIZE ) {
 			/* line wasn't concatenated onto in previous loops,
-			 * just pass it directly to parse() */
+			 * just pass it directly to process() */
 			if ( line == NULL ) {
-				parse( buf );
+				process( buf );
 			}
 
 			/* memory was previously allocated to contain the line, 
-			 * append the final chunk, pass to parse(), and cleanup. */
+			 * append the final chunk, pass to process(), and cleanup. */
 			else {
 				if ( (line = extend_line( line, buf )) == NULL ) continue;
-				parse( line );
-				free( line );
-				line = NULL;
+				process( line );
+				free( line ), line = NULL;
 			}
 		}
 
@@ -87,15 +85,15 @@
 			 * (within the current line) that still need to be appended.
 			 */
 			if ( buf[ bufsize - 1 ] == '\n' ) {
-				parse( line );
-				free( line );
-				line = NULL;
+				process( line );
+				free( line ), line = NULL;
 			}
 		}
 	}
 
 	/* stdin closed */
 	debug( 1, LOC, "End of stream\n" );
+	report_speed();
 	return( 0 );
 }
 
--- a/db.c	Wed Sep 28 09:04:16 2011 -0700
+++ b/db.c	Mon Oct 17 09:12:00 2011 -0700
@@ -106,8 +106,7 @@
 					(i == 1 ? "initalizing" : "upgrading"), sqlite3_errmsg(v.db) );
 			return( sqlite3_errcode(v.db) );
 		}
-		free( upgrade_sql );
-		upgrade_sql = NULL;
+		free( upgrade_sql ), upgrade_sql = NULL;
 
 		/* update version metadata in DB if update was successful */
 		current_version = i;
--- a/main.c	Wed Sep 28 09:04:16 2011 -0700
+++ b/main.c	Mon Oct 17 09:12:00 2011 -0700
@@ -104,10 +104,16 @@
 	argc -= optind;
 	argv += optind;
 
+	/* set timer vars for lines/sec counter */
+	if ( v.debugmode > 2 ) {
+		v.timer.start = time( NULL );
+		v.timer.lines = 0;
+	}
+
 	/* get the initial database handle or bomb immediately. */
 	if ( db_attach() != SQLITE_OK ) exit( 1 );
 
 	/* enter stdin parsing loop */
-	return accept_loop();
+	return( accept_loop() );
 }
 
--- a/parser.rl	Wed Sep 28 09:04:16 2011 -0700
+++ b/parser.rl	Mon Oct 17 09:12:00 2011 -0700
@@ -28,122 +28,258 @@
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-/*
-Squid docs:
----------------------------------------------------------------------------
-TAG: url_rewrite_program
-Specify the location of the executable for the URL rewriter.
-Since they can perform almost any function there isn't one included.
-
-For each requested URL rewriter will receive on line with the format
-
-URL <SP> client_ip "/" fqdn <SP> user <SP> method [<SP> kvpairs]<NL>
-
-In the future, the rewriter interface will be extended with
-key=value pairs ("kvpairs" shown above).  Rewriter programs
-should be prepared to receive and possibly ignore additional
-whitespace-separated tokens on each input line.
-
-And the rewriter may return a rewritten URL. The other components of
-the request line does not need to be returned (ignored if they are).
-
-The rewriter can also indicate that a client-side redirect should
-be performed to the new URL. This is done by prefixing the returned
-URL with "301:" (moved permanently) or 302: (moved temporarily).
-
-By default, a URL rewriter is not used.
----------------------------------------------------------------------------
-*/
-
 #include "volta.h"
 
 %%{
 	machine redirector;
 
-	action parse_error {
-		debug( 2, LOC, "parse error\n" );
-		return( NULL );
-	}
+	action success { valid = 1; }
+	action error   { valid = 0; }
 
-	action yay {
-		printf( "I saw: %s", p+1 );
+	action channel_id_found  {
+		debug( 1, LOC, "Channel ID found in redirector input.  Set 'url_rewrite_concurrency' to '0' in squid.\n" );
+		fbreak;
 	}
 
-	# http://, ftp://, https://, etc
-	proto = alpha{3,5} . '://';
+	action scheme_start  { p_request->tokens.scheme_start  = fpc; }
+	action scheme_finish { p_request->tokens.scheme_length = fpc - ( *pe + p_request->tokens.scheme_start ); }
+	action scheme_error  { debug( 3, LOC, "Unable to parse scheme.\n" ); }
+
+	action host_start  { p_request->tokens.host_start  = fpc; }
+	action host_finish { p_request->tokens.host_length = fpc - ( *pe + p_request->tokens.host_start ); }
+	action host_error  { debug( 3, LOC, "Unable to parse hostname.\n" ); }
+
+	action port_start  { p_request->tokens.port_start  = fpc; }
+	action port_finish { p_request->tokens.port_length = fpc - ( *pe + p_request->tokens.port_start ); }
+
+	action path_start  { p_request->tokens.path_start  = fpc; }
+	action path_finish { p_request->tokens.path_length = fpc - ( *pe + p_request->tokens.path_start ); }
+
+	action meth_start  { p_request->tokens.meth_start  = fpc; }
+	action meth_finish { p_request->tokens.meth_length = fpc - ( *pe + p_request->tokens.meth_start ); }
+	action meth_error  { debug( 3, LOC, "Unable to parse method.\n" ); }
+
+	action c_ip_start  { p_request->tokens.c_ip_start  = fpc; }
+	action c_ip_finish { p_request->tokens.c_ip_length = fpc - ( *pe + p_request->tokens.c_ip_start ); }
+	action c_ip_error  { debug( 3, LOC, "Unable to parse the client IP address.\n" ); }
 
-	# http://mahlon:password@example.com or http://mahlon@example.com
-    #       username              optional password
-	creds = ( alnum | [+._\-] )+ . ( ':' . any+ )? . '@';
+    # 
+    # Squid line: URL <SP> client_ip "/" fqdn <SP> user <SP> method [<SP> kvpairs]<NL>
+    # 
+    # URI Syntax (RFC 3986) misc notes:
+    #
+    # - Scheme isn't passed to redirectors on CONNECT method requests
+    #
+    # - Hostname segments aren't supposed to be individually greater than 63 chars,
+    #   and the hostname in total shouldn't exceed 255.  They also shouldn't be entirely
+    #   made up of digits, or contain underscores.  In practice, these rules appear to
+    #   be violated constantly by some pretty big sites. I'm looking at you, facebook.
+    #   (( alnum ) | ( alnum . [a-zA-Z0-9\-]{0,63} . alnum )) & !( digit+ );
+    #
+    # - ipv6 has some utterly insane rules (RFC 5952) in the name of "shortcuts", which
+    #   only seem like shortcuts to someone writing IP addresses by hand.  Anyone that
+    #   has to parse (or even just read) them has a bunch of seemingly arbitrary work
+    #   dumped in their lap.  Heck, it's impossible to even search for an ipv6 address
+    #   that contains zeros in a text editor, because you have no idea what how it might
+    #   be represented.  Rad!
+    #
+    #   The parser just trusts any ipv6 squid hands us as being valid, without
+    #   any real parsing/validation, other than it consists of hex digits and colons.
+    #
+    # - This parser originally validated path/query/fragment as well, but there were
+    #   enough inconsistencies with unescaped chars and other real-life RFC deviations
+    #   that I opted to just accept what we get from squid.
+	#
+	# - Redirectors aren't handed any userinfo (http://mahlon:password@example.com),
+    #   so no need to check for that.
+    #
+
+	host_component = alnum | ( alnum [a-zA-Z0-9\-_]* alnum );
+	pchar          = ( alnum | [\-._~!$%&'()*+,;=] );
+	path_segment   = '/' ( any - space )*;
 
-	main  := ( proto . creds ) | proto @yay '\n';
+	hostname       = host_component ( '.' host_component )* '.'?;
+	ipv4           = digit{1,3} '.' digit{1,3} '.' digit{1,3} '.' digit{1,3};
+	ipv6           = ( xdigit | ':' )+;
+
+	channel_id     = ( digit+ space )      %channel_id_found;
+	scheme         = ( alpha{3,5} '://' )  >scheme_start %scheme_finish @!scheme_error;
+	host           = ( hostname | ipv4 )   >host_start   %host_finish   @!host_error;
+	port           = ( ':' digit{1,5} )    >port_start   %port_finish;
+	path           = path_segment*         >path_start   %path_finish;
+	client_ip      = ipv4                  >c_ip_start   %c_ip_finish   @!c_ip_error;
+	method         = upper+                >meth_start   %meth_finish   @!meth_error;
+
+	Line = (
+ 		start: (
+			channel_id? -> Url
+		),
+
+		Url: (
+		   	scheme? host port? path? space -> Client
+ 		),
+
+		Client: (
+			client_ip '/' ( hostname | '-' ) space -> User
+		),
+
+		User: (
+			pchar+ space -> Method
+		),
+
+		Method: (
+			method -> KVPairs
+		),
+
+		KVPairs: (
+			( space any+ )? -> final
+		)
+ 	) %success @!error;
+
+
+	main := Line '\n';
 }%%
 %% write data;
 
-/*
-%%{
-	machine redirector;
-
-	action yay {
-		printf( "I saw: %s", p+1 );
-	}
-
-	# http://, ftp://, https://, etc
-	proto = alpha{3,5} . '://';
-
-	# http://mahlon:password@example.com or http://mahlon@example.com
-    #       username              optional password
-	creds = ( alnum | [+._\-] )+ . ( ':' . any+ )? . '@';
-
-	main := ( proto . creds ) | proto @yay '\n';
-}%%
-%% write data;
+/* 
+ * Tokenize an incoming line from squid, returning a parsed and populated
+ * structure to make redirection decisions against.  This pointer should
+ * be freed using cleanup_request() after use.
+ * 
+ * Squid documentation about redirectors:
+ * ---------------------------------------------------------------------------
+ * TAG: url_rewrite_program
+ * Specify the location of the executable for the URL rewriter.
+ * Since they can perform almost any function there isn't one included.
+ * 
+ * For each requested URL rewriter will receive on line with the format
+ * 
+ * URL <SP> client_ip "/" fqdn <SP> user <SP> method [<SP> kvpairs]<NL>
+ * 
+ * In the future, the rewriter interface will be extended with
+ * key=value pairs ("kvpairs" shown above).  Rewriter programs
+ * should be prepared to receive and possibly ignore additional
+ * whitespace-separated tokens on each input line.
+ * 
+ * And the rewriter may return a rewritten URL. The other components of
+ * the request line does not need to be returned (ignored if they are).
+ * 
+ * The rewriter can also indicate that a client-side redirect should
+ * be performed to the new URL. This is done by prefixing the returned
+ * URL with "301:" (moved permanently) or 302: (moved temporarily).
+ * 
+ * By default, a URL rewriter is not used.
+ * ---------------------------------------------------------------------------
 */
-
-
-/*
-%%{
-	machine foo;
-
-	OPEN = 0;
-	CLOSE = 1;
+request *
+parse( char *line )
+{
+   	/* machine required vars */
+	int  cs   = 0;
+	char *p   = line;
+	char *pe  = p + strlen(p);
+	char *eof = NULL;
 
-	main :=
-		start:
-		door_closed: (
-			OPEN -> door_open -> final
-		),
-		door_open: (
-			CLOSE -> door_closed
-		);
-}%%
-*/
-
-struct request *
-parse( char *p )
-{
-   	/* initial machine state */
-	short int cs = 0;
-
-	/* the client request object */
-	request c_request;
-	request *cp_request = &c_request;
-
-	/*
-	char ip[ INET_ADDRSTRLEN ];
-	inet_pton( AF_INET, "127.0.0.1", &cp_request->ip );
-	inet_ntop( AF_INET, &cp_request->ip, ip, INET_ADDRSTRLEN );
-	*/
-
-	/* initalize state machine with current line */
-	char *pe = p + strlen(p) + 1;
+	/* the client request pointer */
+	unsigned char valid = 0;
+	request *p_request = init_request();
 
 	/* enter state machine */
 	%% write init;
 	%% write exec;
 
-	/* reset the request */
-	/* c_request = reset_request; */
-	return( cp_request );
+	/* If we were given an invalid line, bail early */
+	if ( valid == 0 ) {
+		free( p_request ), p_request = NULL;
+		debug( 3, LOC, "Invalid line (%d), skipped\n", v.timer.lines + 1 );
+		debug( 4, LOC, "%s", line );
+		return( NULL );
+	}
+
+	(void)populate_request( p_request );
+	return( p_request );
 }
 
+
+/*
+ * Initialize and return a pointer to a new request object.
+ *
+ */
+request *
+init_request( void )
+{
+	request *p_request = NULL;
+	if ( (p_request = malloc( sizeof(request) )) == NULL ) {
+		debug( 1, LOC, "Unable to allocate memory for request struct: %s\n", strerror(errno) );
+		return( NULL );
+	}
+	p_request->scheme    = NULL;
+	p_request->host      = NULL;
+	p_request->port      = NULL;
+	p_request->path      = NULL;
+	p_request->user      = NULL;
+	p_request->method    = NULL;
+	p_request->client_ip = NULL;
+
+	p_request->tokens.scheme_start  = NULL;
+	p_request->tokens.host_start    = NULL;
+	p_request->tokens.port_start    = NULL;
+	p_request->tokens.path_start    = NULL;
+	p_request->tokens.meth_start    = NULL;
+	p_request->tokens.c_ip_start    = NULL;
+	p_request->tokens.scheme_length = 0;
+	p_request->tokens.host_length   = 0;
+	p_request->tokens.port_length   = 0;
+	p_request->tokens.path_length   = 0;
+	p_request->tokens.meth_length   = 0;
+	p_request->tokens.c_ip_length   = 0;
+
+	return p_request;
+}
+
+
+/*
+ * Take the previously parsed token locations and copy them into the request struct.
+ *
+ */
+void
+populate_request( struct request *p_request )
+{
+	p_request->scheme =
+		copy_string_token( p_request->tokens.scheme_start, p_request->tokens.scheme_length );
+	p_request->host =
+		copy_string_token( p_request->tokens.host_start, p_request->tokens.host_length );
+	p_request->port =
+		copy_string_token( p_request->tokens.port_start, p_request->tokens.port_length );
+	p_request->path =
+		copy_string_token( p_request->tokens.path_start, p_request->tokens.path_length );
+	p_request->method =
+		copy_string_token( p_request->tokens.meth_start, p_request->tokens.meth_length );
+	p_request->client_ip =
+		copy_ipv4_token( p_request->tokens.c_ip_start, p_request->tokens.c_ip_length );
+
+	return;
+}
+
+
+/*
+ * Release memory used by request struct.
+ *
+ */
+void
+cleanup_request( struct request *p_request )
+{
+	if ( p_request == NULL ) return;
+
+	free( p_request->scheme );
+	free( p_request->host );
+	free( p_request->port );
+	free( p_request->path );
+	free( p_request->method );
+	free( p_request->client_ip );
+
+	free( p_request ), p_request = NULL;
+
+	return;
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/process.c	Mon Oct 17 09:12:00 2011 -0700
@@ -0,0 +1,56 @@
+/* vim: set noet nosta sw=4 ts=4 ft=c : */
+/*
+Copyright (c) 2011, Mahlon E. Smith <mahlon@martini.nu>
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of Mahlon E. Smith nor the names of his
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "volta.h"
+#include "db.h"
+
+void
+process( char *line )
+{
+	request *p_request = parse( line );
+
+	/* count lines in debugmode */
+	if ( v.debugmode > 2 ) v.timer.lines++;
+
+	/* If parsing failed for some reason, return a blank line to squid. */
+	if ( p_request == NULL ) {
+		printf( "\n" );
+		return;
+	}
+
+	printf( "* %s", line );
+	printf( "%s%s%s%s\n\n", p_request->scheme, p_request->host, p_request->port, p_request->path );
+
+	/* TODO: everything */
+
+	cleanup_request( p_request );
+	return;
+}
+
--- a/sql/1.sql	Wed Sep 28 09:04:16 2011 -0700
+++ b/sql/1.sql	Mon Oct 17 09:12:00 2011 -0700
@@ -1,3 +1,5 @@
+--- vim: set noet nosta sw=4 ts=4 ft=sql:
+
 BEGIN;
 
 DROP TABLE IF EXISTS rules;
--- a/util.c	Wed Sep 28 09:04:16 2011 -0700
+++ b/util.c	Mon Oct 17 09:12:00 2011 -0700
@@ -31,6 +31,7 @@
 
 #include "volta.h"
 
+
 /*
  * Output basic usage information.
  */
@@ -69,8 +70,8 @@
 
 	va_list args;
 	va_start( args, fmt );
-	fprintf( stderr, "%s [%s] #%d (%s:%04d): ",
-			 PROG, timestamp, getpid(), file, line );
+	fprintf( stderr, "%s [%s] #%d %d (%s:%d): ",
+			 PROG, timestamp, getpid(), level, file, line );
 	vfprintf( stderr, fmt, args );
 	va_end( args );
 
@@ -88,7 +89,6 @@
 extend_line( char *line, const char *buf )
 {
 	char *line_realloc;
-
 	unsigned short int offset;
 	size_t new_len;
 
@@ -103,12 +103,12 @@
 		new_len = offset + LINE_BUFSIZE;
 	}
 
-	debug( 3, LOC, "Extending line to %d bytes at offset %d...\n", new_len, offset );
-	if ( (line_realloc = realloc(line, sizeof(char) * new_len)) == NULL ) {
-		/* cleanup on allocation errors */
-		debug( 3, LOC, "Ignoring line, error while allocating memory: %s\n", strerror(errno) );
-		if ( line != NULL ) free( line );
-		line = NULL;
+	debug( 4, LOC, "Extending line %d to %d bytes at offset %d\n", v.timer.lines+1, new_len, offset );
+	if ( new_len > LINE_MAX || (line_realloc = realloc(line, sizeof(char) * new_len)) == NULL ) {
+		debug( 1, LOC, "Ignoring line, error while allocating memory: %s\n",
+				(line_realloc == NULL ? strerror(errno) : "Line too large") );
+		if ( line != NULL ) free( line ), line = NULL;
+		printf( "\n" );
 	}
 	else {
 		line = line_realloc;
@@ -120,7 +120,7 @@
 
 
 /*
- * Read an entire file into memory, returning a pointer the contents.
+ * Read an entire file into memory, returning a pointer to the contents.
  * Returns NULL on error.
  *
  */
@@ -149,7 +149,7 @@
 		return( NULL );
 	}
 
-	if ( fread( contents, sizeof(char), sb.st_size, fh ) != sb.st_size ) {
+	if ( fread( contents, sizeof(char), sb.st_size, fh ) != (unsigned int)sb.st_size ) {
 		debug( 1, LOC, "Short read for file '%s'?: %s\n", file );
 		fclose( fh );
 		return( NULL );
@@ -160,4 +160,79 @@
 }
 
 
+/* 
+ * Allocate memory and copy +length+ bytes (plus 1 for null) from the given
+ * +string+ into a new string, returning a pointer to it.
+ * 
+ */
+char *
+copy_string_token( char *string, unsigned short int length )
+{
+	char *alloc_ptr = NULL;
+	if ( string == NULL || length == 0 ) return ( NULL );
 
+	if ( (alloc_ptr = calloc( length + 1, sizeof(char) )) == NULL ) {
+		debug( 1, LOC, "Unable to allocate memory for token: %s\n", strerror(errno) );
+		return( NULL );
+	}
+
+	(void)memcpy( alloc_ptr, string, length );
+
+	return( alloc_ptr );
+}
+
+
+/* 
+ * Allocate memory and copy +length+ bytes from the given dotted quad style
+ * +ip_string+ into an in_addr struct, returning a pointer to it.
+ * 
+ */
+struct in_addr *
+copy_ipv4_token( char *ip_string, unsigned short int length )
+{
+	struct in_addr *alloc_ptr = NULL;
+	char c_ip[ INET_ADDRSTRLEN ];
+
+	if ( ip_string == NULL ) return ( NULL );
+
+	(void)strncpy( c_ip, ip_string, length );
+	c_ip[ length ] = '\0';
+
+	if ( (alloc_ptr = calloc( length, sizeof(struct in_addr) )) == NULL ) {
+		debug( 1, LOC, "Unable to allocate memory for ip '%s': %s\n",
+				c_ip, strerror(errno) );
+	}
+
+	if ( inet_pton( AF_INET, c_ip, alloc_ptr ) < 1 ) {
+		debug( 1, LOC, "Unable to create in_addr struct for client ip '%s': %s\n",
+				c_ip, strerror(errno) );
+		free( alloc_ptr ), alloc_ptr = NULL;
+	}
+
+	return( alloc_ptr );
+}
+
+
+/*
+ * Report how many lines were processed per second.
+ *
+ */
+void
+report_speed( void )
+{
+	if ( v.debugmode < 3 ) return;
+
+	time_t end_time = time( NULL );
+	double elapsed  = difftime( end_time, v.timer.start );
+
+	if ( elapsed > 0 )  {
+		debug( 3, LOC, "Processed %lu lines in %0.1f seconds. (%0.1f lines/sec)\n",
+				v.timer.lines, elapsed, v.timer.lines/elapsed );
+	}
+	else {
+		debug( 3, LOC, "Processed %lu lines in under a second.\n", v.timer.lines );
+	}
+
+	return;
+}
+
--- a/volta.h	Wed Sep 28 09:04:16 2011 -0700
+++ b/volta.h	Mon Oct 17 09:12:00 2011 -0700
@@ -44,6 +44,7 @@
 #include <unistd.h>
 #include <time.h>
 #include <sys/stat.h>
+#include <signal.h>
 
 #include <sys/types.h>
 #include <sys/socket.h>
@@ -56,28 +57,60 @@
 
 /* Default line size we accept from squid, longer lines (huge URLs?) malloc. */
 #define LINE_BUFSIZE 2048
+/* Ceiling for how many bytes can be allocated at one for a single line. */
+#define LINE_MAX 256000 /* 250k */
+
+/* Redirect types */
+#define REDIR_TEMPORARY   0
+#define REDIR_PERMANENT   1
+#define REDIR_TRANSPARENT 2
 
 /* Aid debugging */
 #define LOC __FILE__, __LINE__
 
-/* a global struct for easy access to common vars */
+/*
+ * a global struct for easy access to common vars 
+ *
+ */
 struct v_globals {
 	unsigned short int debugmode; /* debug level */
 	char dbname[128];             /* path to database file */
 	struct sqlite3 *db;           /* database handle */
+
+	struct {
+		time_t start;            /* start time */
+		unsigned long int lines; /* line count for determining speed */
+	} timer;
 };
 extern struct v_globals v;        /* defined in main.c */
 
-/* The parsed attributes from the request line, as given to us by squid.
- * URL <SP> client_ip "/" fqdn <SP> user <SP> method [<SP> kvpairs]<NL> */
+/*
+ * The parsed attributes from the request line, as given to us by squid.
+ *
+ */
 typedef struct request {
-	char   *url;
+	char   *scheme;
 	char   *host;
-	struct sockaddr_in ip;
-	char   *ip_fqdn;
+	char   *path;
+	char   *port;
+	struct in_addr *client_ip;
 	char   *user;
 	char   *method;
-	char   *kvpairs;
+
+	struct {
+		char *scheme_start;
+		char *host_start;
+		char *port_start;
+		char *path_start;
+		char *meth_start;
+		char *c_ip_start;
+		unsigned short int scheme_length;
+		unsigned short int host_length;
+		unsigned short int port_length;
+		unsigned short int path_length;
+		unsigned short int meth_length;
+		unsigned short int c_ip_length;
+	} tokens;
 } request;
 
 /*
@@ -87,13 +120,20 @@
  */
 int getopt( int, char * const [], const char *);
 
-void usage( char *prg );
-void debug( int level, char *file, int line, const char *fmt, ... );
-char *slurp_file( char *file );
-char *extend_line( char *line, const char *buf );
+void usage( char * );
+void debug( int, char *, int, const char *, ... );
+void report_speed( void );
+char *slurp_file( char * );
+char *extend_line( char *, const char * );
+char *copy_string_token( char *, unsigned short int );
+struct in_addr *copy_ipv4_token( char *, unsigned short int );
 
 int accept_loop( void );
 struct request *parse( char *p );
+void process( char * );
+struct request *parse( char * );
+void populate_request( struct request * );
+void cleanup_request( struct request * );
 
 #endif