Ensure that parsing can't be subverted by requests larger than the
authorMahlon E. Smith <mahlon@martini.nu>
Tue, 13 Sep 2011 22:13:02 -0700
changeset 2 8c88756f81b0
parent 1 823d42546cea
child 3 97f767832c52
Ensure that parsing can't be subverted by requests larger than the default line buffer maximum size. If a line fits in the stack, great. If not, allocate additional memory for it instead of truncating. Add google-proftools to debug builds. Don't clean parser.c by default, so distributions don't require ragel to build. Add BSD licensing. Move helper functions out to util.c. Rename files to match function names. Start playing with sqlite API.
Makefile
accept_loop.c
accept_loop.o
database.c
main.c
main.o
parser.c
parser.o
parser.rl
util.c
util.o
volta.c
volta.h
--- a/Makefile	Sat Sep 03 14:12:06 2011 -0700
+++ b/Makefile	Tue Sep 13 22:13:02 2011 -0700
@@ -1,11 +1,13 @@
 
 CFLAGS       = -O2
+CFLAGS_DEBUG = -Wall -L /usr/lib -L /opt/local/lib -DDEBUG -DPROG='"volta (debugmode)"'
 LIBS         = -lsqlite3
+LIBS_DEBUG   = -lprofiler  # requires proftools
 #OBJS         = $(patsubst %.c,%.o,$(wildcard *.c)) parser.o
-OBJS         =  volta.o parser.o
+OBJS         = accept_loop.o database.o main.o parser.o util.o
 
 ########################################################################
-### M A I N
+### P R O D U C T I O N
 ########################################################################
 
 volta: $(OBJS)
@@ -14,7 +16,9 @@
 
 $(OBJS): volta.h
 
-parser.c: parser.rl
+# don't actually depend on parser.rl, so distributions don't require ragel
+# ragel -C -T0 parser.rl -o $@
+parser.c:
 	ragel -L -C -e -G2 parser.rl -o $@
 
 
@@ -22,20 +26,34 @@
 ### D E B U G
 ########################################################################
 
-debug: CFLAGS = -Wall -DDEBUG -DPROG='"volta (debugmode)"'
-debug: volta parser_state.xml parser_state.png parser_state.dot
+debug: CFLAGS += $(CFLAGS_DEBUG)
+debug: LIBS   += $(LIBS_DEBUG)
+debug: volta-debug parser_graph.xml parser_graph.png parser_graph.dot
+
+volta-debug: $(OBJS)
+	$(CC) $(CFLAGS) $(LIBS) -o volta $(OBJS)
 
-parser_state.xml parser_state.png parser_state.dot: parser.rl
-	ragel -Vp parser.rl > parser_state.dot
-	ragel -C -e -G2 -x parser.rl -o parser_state.xml
-	dot -Tpng parser_state.dot > parser_state.png
+parser_graph.xml parser_graph.png parser_graph.dot: parser.rl
+	ragel -Vp parser.rl > parser_graph.dot
+	ragel -C -e -G2 -x parser.rl -o parser_graph.xml
+	dot -Tpng parser_graph.dot > parser_graph.png
+
+# export CPUPROFILE="cpu.prof" before running volta for cpu profiling
+# export CPUPROFILE_FREQUENCY=100 (default)
+profile:
+	pprof --dot ./volta $(CPUPROFILE) | dot -Tpng > $(CPUPROFILE).png
+	pprof --text ./volta $(CPUPROFILE)
 
 
 ########################################################################
 ### U T I L
 ########################################################################
 
-.PHONY : clean
+.PHONY : clean cleanall
+
+cleanall: clean
+	rm -f parser.c
+
 clean:
-	-rm -f volta volta_debug* parser.c parser_state.* *.o
+	-rm -f volta volta_debug* parser_graph.* *.o *.prof*
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/accept_loop.c	Tue Sep 13 22:13:02 2011 -0700
@@ -0,0 +1,101 @@
+/* vim: set noet nosta sw=4 ts=4 ft=c : */
+/*
+Copyright (c) 2011, Mahlon E. Smith <mahlon@martini.nu>
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of Mahlon E. Smith nor the names of his
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "volta.h"
+
+/*
+ * Accept lines from squid and pass to the parser.
+ */
+int
+accept_loop( void )
+{
+	/* incoming line (stack) */
+	char buf[ LINE_BUFSIZE ];
+
+	/* incoming line (heap) */
+	char *line = NULL;
+
+	/* the length of the incoming line buffer */
+	unsigned short int bufsize;
+
+	debug( 1, LOC, "Waiting for input...\n" );
+	while ( fgets( buf, LINE_BUFSIZE, stdin ) != NULL ) {
+
+		bufsize = strlen( buf );
+
+		/* Common case, or last iteration of loop:
+		   End of line was found without filling LINE_BUFSIZE
+		 */
+		if ( bufsize + 1 < LINE_BUFSIZE ) {
+			/* line wasn't concatenated onto in previous loops,
+			 * just pass it directly to parse() */
+			if ( line == NULL ) {
+				parse( buf );
+			}
+
+			/* memory was previously allocated to contain the line, 
+			 * append the final chunk, pass to parse(), and cleanup. */
+			else {
+				if ( (line = extend_line( line, buf )) == NULL ) continue;
+				parse( line );
+				free( line );
+				line = NULL;
+			}
+		}
+
+		/* If we've filled LINE_BUFSIZE on this iteration, we were likely
+		 * passed a very long URL that we'll need to allocate more memory
+		 * for.  Rebuild the line in chunks for each loop iteration.
+		 *
+		 * If we didn't do this, you could thwart proper parsing by
+		 * simply sending requests greater than LINE_BUFSIZE characters.
+		 */
+		else {
+			if ( (line = extend_line( line, buf )) == NULL ) continue;
+
+			/* Special case if the current buffer's last character is a
+			 * newline. This means that the buffer length is exactly
+			 * LINE_BUFSIZE, but we can't simply check for that, since
+			 * we don't know if there are more iterations pending
+			 * (within the current line) that still need to be appended.
+			 */
+			if ( buf[ bufsize - 1 ] == '\n' ) {
+				parse( line );
+				free( line );
+				line = NULL;
+			}
+		}
+	}
+
+	/* stdin closed */
+	debug( 1, LOC, "End of stream\n" );
+	return( 0 );
+}
+
Binary file accept_loop.o has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/database.c	Tue Sep 13 22:13:02 2011 -0700
@@ -0,0 +1,67 @@
+/* vim: set noet nosta sw=4 ts=4 ft=c : */
+/*
+Copyright (c) 2011, Mahlon E. Smith <mahlon@martini.nu>
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of Mahlon E. Smith nor the names of his
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "volta.h"
+
+
+/*
+ * Parse command line options, perform actions, and enter accept loop.
+ *
+ */
+int
+db_initialize( void )
+{
+	debug( 1, LOC, "init! init! init!\n" );
+
+	struct sqlite3 *db;
+	struct sqlite3_stmt *stmt;
+
+	if ( sqlite3_open( "testing.db", &db ) != SQLITE_OK ) {
+		debug( 1, LOC, "Error when initializing database: %s\n", sqlite3_errmsg(db) );
+		return( sqlite3_errcode(db) );
+	}
+
+	if ( sqlite3_prepare( db, "create table foo(bar int);", -1, &stmt, NULL ) != SQLITE_OK ) {
+		debug( 1, LOC, "Error preparing statement: %s\n", sqlite3_errmsg(db) );
+		return( sqlite3_errcode(db) );
+	}
+
+	if ( sqlite3_step( stmt ) != SQLITE_DONE ) {
+		debug( 1, LOC, "Error executing statement: %s\n", sqlite3_errmsg(db) );
+		return( sqlite3_errcode(db) );
+	}
+
+	sqlite3_finalize( stmt );
+	sqlite3_close( db );
+
+	debug( 1, LOC, "okay! okay! okay!\n" );
+	return( SQLITE_OK );
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/main.c	Tue Sep 13 22:13:02 2011 -0700
@@ -0,0 +1,130 @@
+/* vim: set noet nosta sw=4 ts=4 ft=c : */
+/*
+Copyright (c) 2011, Mahlon E. Smith <mahlon@martini.nu>
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of Mahlon E. Smith nor the names of his
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*
+ * TODO
+ *
+ * empty struct not necessary?
+ * inet_pton( AF_INET, *char src, dest )
+ * an option to run the DB out of memory?
+ * PRAGMA user_version = 1;
+ *
+ */
+
+#include "volta.h"
+unsigned short int debugmode;
+
+
+/*
+ * Parse command line options, perform actions, and enter accept loop.
+ *
+ */
+int
+main( int argc, char *argv[] )
+{
+	/* opt action flags */
+	struct {
+		unsigned char init;
+		unsigned char dump;
+	} actions = {0};
+
+#ifdef DEBUG
+	/* debugmode set at compile time,
+	 * default to display everything */
+	debugmode = 99;
+#else
+	debugmode = 0;
+#endif
+
+	/* get_opt vars */
+	int opt = 0;
+	opterr  = 0;
+
+	/* parse options */
+	while ( (opt = getopt( argc, argv, "a:d:hv" )) != -1 ) {
+		switch ( opt ) {
+
+			/* action */
+			case 'a':
+				if ( strcmp( optarg, "init" ) == 0 ) actions.init++;
+				if ( strcmp( optarg, "dump" ) == 0 ) actions.dump++;
+				break;
+
+			/* debug */
+			case 'd':
+				if ( optarg[2] == '-' ) {
+					argc++; argv -= 1;
+					debugmode = 1;
+				}
+				sscanf( optarg, "%hu", &debugmode );
+				break;
+
+			/* help */
+			case 'h':
+				usage( argv[0] );
+				return( 0 );
+
+			/* version */
+			case 'v':
+				printf( "%s %s\n", PROG, VERSION );
+				return( 0 );
+
+			/* unknown option or option argument missing */
+			case '?':
+				switch( optopt ) {
+					case 'd': /* no debug argument, default to level 1 */
+						debugmode = 1;
+						break;
+					default:
+						usage( argv[0] );
+						return( 1 );
+				}
+
+			default:
+				break;
+		}
+	}
+	argc -= optind;
+	argv += optind;
+
+	/* perform any requested actions */
+	if ( actions.init ) {
+		db_initialize();
+		return( 0 );
+	}
+	if ( actions.dump ) {
+		debug( 1, LOC, "dump.\n" );
+		return( 0 );
+	}
+
+	/* enter stdin parsing loop */
+	return accept_loop();
+}
+
Binary file main.o has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/parser.c	Tue Sep 13 22:13:02 2011 -0700
@@ -0,0 +1,323 @@
+
+/* #line 1 "parser.rl" */
+/* vim: set noet nosta sw=4 ts=4 ft=ragel : */
+/*
+Copyright (c) 2011, Mahlon E. Smith <mahlon@martini.nu>
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of Mahlon E. Smith nor the names of his
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*
+Squid docs:
+---------------------------------------------------------------------------
+TAG: url_rewrite_program
+Specify the location of the executable for the URL rewriter.
+Since they can perform almost any function there isn't one included.
+
+For each requested URL rewriter will receive on line with the format
+
+URL <SP> client_ip "/" fqdn <SP> user <SP> method [<SP> kvpairs]<NL>
+
+In the future, the rewriter interface will be extended with
+key=value pairs ("kvpairs" shown above).  Rewriter programs
+should be prepared to receive and possibly ignore additional
+whitespace-separated tokens on each input line.
+
+And the rewriter may return a rewritten URL. The other components of
+the request line does not need to be returned (ignored if they are).
+
+The rewriter can also indicate that a client-side redirect should
+be performed to the new URL. This is done by prefixing the returned
+URL with "301:" (moved permanently) or 302: (moved temporarily).
+
+By default, a URL rewriter is not used.
+---------------------------------------------------------------------------
+*/
+
+#include "volta.h"
+
+
+/* #line 80 "parser.rl" */
+
+
+/* #line 67 "parser.c" */
+static const int redirector_start = 1;
+static const int redirector_first_final = 13;
+static const int redirector_error = 0;
+
+static const int redirector_en_main = 1;
+
+
+/* #line 82 "parser.rl" */
+
+/*
+%%{
+	machine redirector;
+
+	action yay {
+		printf( "I saw: %s", p+1 );
+	}
+
+	# http://, ftp://, https://, etc
+	proto = alpha{3,5} . '://';
+
+	# http://mahlon:password@example.com or http://mahlon@example.com
+    #       username              optional password
+	creds = ( alnum | [+._\-] )+ . ( ':' . any+ )? . '@';
+
+	main := ( proto . creds ) | proto @yay '\n';
+}%%
+%% write data;
+*/
+
+
+/*
+%%{
+	machine foo;
+
+	OPEN = 0;
+	CLOSE = 1;
+
+	main :=
+		start:
+		door_closed: (
+			OPEN -> door_open -> final
+		),
+		door_open: (
+			CLOSE -> door_closed
+		);
+}%%
+*/
+
+struct request *
+parse( char *p )
+{
+   	/* initial machine state */
+	short int cs = 0;
+
+	/* the client request object */
+	request c_request;
+	request *cp_request = &c_request;
+
+	/*
+	char ip[ INET_ADDRSTRLEN ];
+	inet_pton( AF_INET, "127.0.0.1", &cp_request->ip );
+	inet_ntop( AF_INET, &cp_request->ip, ip, INET_ADDRSTRLEN );
+	*/
+
+	/* initalize state machine with current line */
+	char *pe = p + strlen(p) + 1;
+
+	/* enter state machine */
+	
+/* #line 137 "parser.c" */
+	{
+	cs = redirector_start;
+	}
+
+/* #line 143 "parser.rl" */
+	
+/* #line 144 "parser.c" */
+	{
+	if ( p == pe )
+		goto _test_eof;
+	switch ( cs )
+	{
+case 1:
+	if ( (*p) > 90 ) {
+		if ( 97 <= (*p) && (*p) <= 122 )
+			goto st2;
+	} else if ( (*p) >= 65 )
+		goto st2;
+	goto st0;
+st0:
+cs = 0;
+	goto _out;
+st2:
+	if ( ++p == pe )
+		goto _test_eof2;
+case 2:
+	if ( (*p) > 90 ) {
+		if ( 97 <= (*p) && (*p) <= 122 )
+			goto st3;
+	} else if ( (*p) >= 65 )
+		goto st3;
+	goto st0;
+st3:
+	if ( ++p == pe )
+		goto _test_eof3;
+case 3:
+	if ( (*p) > 90 ) {
+		if ( 97 <= (*p) && (*p) <= 122 )
+			goto st4;
+	} else if ( (*p) >= 65 )
+		goto st4;
+	goto st0;
+st4:
+	if ( ++p == pe )
+		goto _test_eof4;
+case 4:
+	if ( (*p) == 58 )
+		goto st5;
+	if ( (*p) > 90 ) {
+		if ( 97 <= (*p) && (*p) <= 122 )
+			goto st11;
+	} else if ( (*p) >= 65 )
+		goto st11;
+	goto st0;
+st5:
+	if ( ++p == pe )
+		goto _test_eof5;
+case 5:
+	if ( (*p) == 47 )
+		goto st6;
+	goto st0;
+st6:
+	if ( ++p == pe )
+		goto _test_eof6;
+case 6:
+	if ( (*p) == 47 )
+		goto tr7;
+	goto st0;
+tr7:
+/* #line 68 "parser.rl" */
+	{
+		printf( "I saw: %s", p+1 );
+	}
+	goto st7;
+st7:
+	if ( ++p == pe )
+		goto _test_eof7;
+case 7:
+/* #line 216 "parser.c" */
+	switch( (*p) ) {
+		case 10: goto st13;
+		case 43: goto st8;
+		case 95: goto st8;
+	}
+	if ( (*p) < 48 ) {
+		if ( 45 <= (*p) && (*p) <= 46 )
+			goto st8;
+	} else if ( (*p) > 57 ) {
+		if ( (*p) > 90 ) {
+			if ( 97 <= (*p) && (*p) <= 122 )
+				goto st8;
+		} else if ( (*p) >= 65 )
+			goto st8;
+	} else
+		goto st8;
+	goto st0;
+st13:
+	if ( ++p == pe )
+		goto _test_eof13;
+case 13:
+	goto st0;
+st8:
+	if ( ++p == pe )
+		goto _test_eof8;
+case 8:
+	switch( (*p) ) {
+		case 43: goto st8;
+		case 58: goto st9;
+		case 64: goto st13;
+		case 95: goto st8;
+	}
+	if ( (*p) < 48 ) {
+		if ( 45 <= (*p) && (*p) <= 46 )
+			goto st8;
+	} else if ( (*p) > 57 ) {
+		if ( (*p) > 90 ) {
+			if ( 97 <= (*p) && (*p) <= 122 )
+				goto st8;
+		} else if ( (*p) >= 65 )
+			goto st8;
+	} else
+		goto st8;
+	goto st0;
+st9:
+	if ( ++p == pe )
+		goto _test_eof9;
+case 9:
+	goto st10;
+st10:
+	if ( ++p == pe )
+		goto _test_eof10;
+case 10:
+	if ( (*p) == 64 )
+		goto st14;
+	goto st10;
+st14:
+	if ( ++p == pe )
+		goto _test_eof14;
+case 14:
+	if ( (*p) == 64 )
+		goto st14;
+	goto st10;
+st11:
+	if ( ++p == pe )
+		goto _test_eof11;
+case 11:
+	if ( (*p) == 58 )
+		goto st5;
+	if ( (*p) > 90 ) {
+		if ( 97 <= (*p) && (*p) <= 122 )
+			goto st12;
+	} else if ( (*p) >= 65 )
+		goto st12;
+	goto st0;
+st12:
+	if ( ++p == pe )
+		goto _test_eof12;
+case 12:
+	if ( (*p) == 58 )
+		goto st5;
+	goto st0;
+	}
+	_test_eof2: cs = 2; goto _test_eof; 
+	_test_eof3: cs = 3; goto _test_eof; 
+	_test_eof4: cs = 4; goto _test_eof; 
+	_test_eof5: cs = 5; goto _test_eof; 
+	_test_eof6: cs = 6; goto _test_eof; 
+	_test_eof7: cs = 7; goto _test_eof; 
+	_test_eof13: cs = 13; goto _test_eof; 
+	_test_eof8: cs = 8; goto _test_eof; 
+	_test_eof9: cs = 9; goto _test_eof; 
+	_test_eof10: cs = 10; goto _test_eof; 
+	_test_eof14: cs = 14; goto _test_eof; 
+	_test_eof11: cs = 11; goto _test_eof; 
+	_test_eof12: cs = 12; goto _test_eof; 
+
+	_test_eof: {}
+	_out: {}
+	}
+
+/* #line 144 "parser.rl" */
+
+	/* reset the request */
+	/* c_request = reset_request; */
+	return( cp_request );
+}
+
Binary file parser.o has changed
--- a/parser.rl	Sat Sep 03 14:12:06 2011 -0700
+++ b/parser.rl	Tue Sep 13 22:13:02 2011 -0700
@@ -1,12 +1,72 @@
 /* vim: set noet nosta sw=4 ts=4 ft=ragel : */
+/*
+Copyright (c) 2011, Mahlon E. Smith <mahlon@martini.nu>
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of Mahlon E. Smith nor the names of his
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*
+Squid docs:
+---------------------------------------------------------------------------
+TAG: url_rewrite_program
+Specify the location of the executable for the URL rewriter.
+Since they can perform almost any function there isn't one included.
+
+For each requested URL rewriter will receive on line with the format
+
+URL <SP> client_ip "/" fqdn <SP> user <SP> method [<SP> kvpairs]<NL>
+
+In the future, the rewriter interface will be extended with
+key=value pairs ("kvpairs" shown above).  Rewriter programs
+should be prepared to receive and possibly ignore additional
+whitespace-separated tokens on each input line.
+
+And the rewriter may return a rewritten URL. The other components of
+the request line does not need to be returned (ignored if they are).
+
+The rewriter can also indicate that a client-side redirect should
+be performed to the new URL. This is done by prefixing the returned
+URL with "301:" (moved permanently) or 302: (moved temporarily).
+
+By default, a URL rewriter is not used.
+---------------------------------------------------------------------------
+*/
 
 #include "volta.h"
 
 %%{
 	machine redirector;
 
+	action parse_error {
+		debug( 2, LOC, "parse error\n" );
+		return( NULL );
+	}
+
 	action yay {
-		printf( "YAH\n" );
+		printf( "I saw: %s", p+1 );
 	}
 
 	# http://, ftp://, https://, etc
@@ -16,17 +76,16 @@
     #       username              optional password
 	creds = ( alnum | [+._\-] )+ . ( ':' . any+ )? . '@';
 
-	main := ' ' @yay;
+	main  := ( proto . creds ) | proto @yay '\n';
 }%%
 %% write data;
 
-/* state machine data */
 /*
 %%{
 	machine redirector;
 
 	action yay {
-		printf( "YAH\n" );
+		printf( "I saw: %s", p+1 );
 	}
 
 	# http://, ftp://, https://, etc
@@ -41,15 +100,34 @@
 %% write data;
 */
 
-int
+
+/*
+%%{
+	machine foo;
+
+	OPEN = 0;
+	CLOSE = 1;
+
+	main :=
+		start:
+		door_closed: (
+			OPEN -> door_open -> final
+		),
+		door_open: (
+			CLOSE -> door_closed
+		);
+}%%
+*/
+
+struct request *
 parse( char *p )
 {
    	/* initial machine state */
 	short int cs = 0;
 
 	/* the client request object */
-	/* request c_request; */
-	/* request *cp_request = &c_request; */
+	request c_request;
+	request *cp_request = &c_request;
 
 	/*
 	char ip[ INET_ADDRSTRLEN ];
@@ -66,6 +144,6 @@
 
 	/* reset the request */
 	/* c_request = reset_request; */
-	return( 0 );
+	return( cp_request );
 }
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/util.c	Tue Sep 13 22:13:02 2011 -0700
@@ -0,0 +1,122 @@
+/* vim: set noet nosta sw=4 ts=4 ft=c : */
+/*
+Copyright (c) 2011, Mahlon E. Smith <mahlon@martini.nu>
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of Mahlon E. Smith nor the names of his
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#include "volta.h"
+
+/*
+ * Output basic usage information.
+ */
+void
+usage( char *prg )
+{
+	printf( "%s [-vh] [-d <level>] [-a <init>]\n", prg );
+	printf( "    -v Display version\n" );
+	printf( "    -d <level> Show debug information on stderr\n" );
+	printf( "    -h Usage (you're lookin' at it)\n" );
+	printf( "    -a Perform an action, being one of:\n" );
+	printf( "         init: Initialize a new, empty database\n" );
+	printf( "         dump: DUMP IT\n" );
+	printf( "\n" );
+	return;
+}
+
+
+/*
+ * Debug function, only output to stderr if the debug level is
+ * equal or greater to the set output level.
+ *
+ * 	level: The minimum debug level that must be set for the 
+ * 	       line to be logged.
+ * 	file:  The current code file that is emitting the log
+ * 	line:  The line number of the code file that is emitting the log
+ * 	... :  any printf style strings and formats that constitute the log message
+ */
+void
+debug( int level, char *file, int line, const char *fmt, ... )
+{
+	if ( debugmode < level ) return;
+
+	char timestamp[20];
+	time_t t = time( NULL );
+	struct tm *now = localtime( &t );
+	strftime( timestamp, 20, "%F %T", now );
+
+	va_list args;
+	va_start( args, fmt );
+	fprintf( stderr, "%s [%s] #%d (%s:%04d): ",
+			 PROG, timestamp, getpid(), file, line );
+	vfprintf( stderr, fmt, args );
+	va_end( args );
+
+	return;
+}
+
+
+/*
+ * Append 'buf' to the end of 'line', a la strcat, except dynamically
+ * grow memory for the target string.
+ *
+ * 'buf' should be null terminated.  Returns the modified line.
+ */
+char *
+extend_line( char *line, const char *buf )
+{
+	char *line_realloc;
+
+	unsigned short int offset;
+	size_t new_len;
+
+	/* find offset and lengths, first assignment */
+	if ( line == NULL ) {
+		offset  = 0;
+		new_len = strlen( buf );
+	}
+	/* find offset and lengths, append to existing string */
+	else {
+		offset  = strlen( line ); /* not including '\0' */
+		new_len = offset + LINE_BUFSIZE;
+	}
+
+	debug( 3, LOC, "Extending line to %d bytes at offset %d...\n", new_len, offset );
+	if ( (line_realloc = realloc(line, sizeof(char) * new_len)) == NULL ) {
+		/* cleanup on allocation errors */
+		debug( 3, LOC, "Ignoring line, error while allocating memory: %s\n", strerror(errno) );
+		if ( line != NULL ) free( line );
+		line = NULL;
+	}
+	else {
+		line = line_realloc;
+		memcpy( line + offset, buf, strlen(buf) );
+	}
+
+	return( line );
+}
+
Binary file util.o has changed
--- a/volta.c	Sat Sep 03 14:12:06 2011 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,170 +0,0 @@
-/* vim: set noet nosta sw=4 ts=4 ft=c : */
-/* Squid docs:
-   ---------------------------------------------------------------------------
-TAG: url_rewrite_program
-Specify the location of the executable for the URL rewriter.
-Since they can perform almost any function there isn't one included.
-
-For each requested URL rewriter will receive on line with the format
-
-URL <SP> client_ip "/" fqdn <SP> user <SP> method [<SP> kvpairs]<NL>
-
-In the future, the rewriter interface will be extended with
-key=value pairs ("kvpairs" shown above).  Rewriter programs
-should be prepared to receive and possibly ignore additional
-whitespace-separated tokens on each input line.
-
-And the rewriter may return a rewritten URL. The other components of
-the request line does not need to be returned (ignored if they are).
-
-The rewriter can also indicate that a client-side redirect should
-be performed to the new URL. This is done by prefixing the returned
-URL with "301:" (moved permanently) or 302: (moved temporarily).
-
-By default, a URL rewriter is not used.
---------------------------------------------------------------------------- */
-
-/*
- * TODO
- *
- * flush stdout on writes
- * empty struct not necessary?
- * inet_pton( AF_INET, *char src, dest )
- * an option to run the DB out of memory?
- * PRAGMA user_version = 1;
- *
- */
-
-#include "volta.h"
-#include <time.h>
-unsigned short int debugmode;
-
-
-int
-main( int argc, char *argv[] ) {
-
-	/* opt action flags */
-	struct {
-		unsigned short int init;
-	} actions = {0};
-
-#ifdef DEBUG
-	/* debugmode set at compile time, default to display everything */
-	debugmode = 99;
-#else
-	debugmode = 0;
-#endif
-
-	/* get_opt vars */
-	int opt = 0;
-	opterr  = 0;
-
-	/* parse options */
-	while ( (opt = getopt( argc, argv, "a:d:hv" )) != -1 ) {
-		switch ( opt ) {
-
-			/* action */
-			case 'a':
-				if ( strcmp( optarg, "init" ) == 0 ) actions.init++;
-				break;
-
-			/* debug */
-			case 'd':
-				if ( optarg[0] == '-' ) {
-					argc++; argv -= 1;
-					debugmode = 1;
-				}
-				sscanf( optarg, "%hu", &debugmode );
-				break;
-
-			/* help */
-			case 'h':
-				usage( argv[0] );
-				return( 0 );
-
-			/* version */
-			case 'v':
-				printf( "%s %s\n", PROG, VERSION );
-				return( 0 );
-
-			/* unknown option or option argument missing */
-			case '?':
-				switch( optopt ) {
-					case 'd': /* no debug argument, default to level 1 */
-						debugmode = 1;
-						break;
-					default:
-						usage( argv[0] );
-						return( 1 );
-				}
-
-			default:
-				break;
-		}
-	}
-	argc -= optind;
-	argv += optind;
-
-	/* perform actions */
-	if ( actions.init ) {
-		debug( 1, LOC, "init! init! init!\n" );
-		return( 0 );
-	}
-
-	/* start stdin parsing loop */
-	char line[ LINE_BUFSIZE ];
-	debug( 1, LOC, "Waiting for input...\n" );
-	while( fgets( line, LINE_BUFSIZE, stdin ) != NULL ) parse( line );
-
-	/* stdin closed */
-	debug( 1, LOC, "End of stream, shutting down.\n" );
-	return( 0 );
-}
-
-
-/*
- * Basic usage
- */
-void
-usage( char *prg )
-{
-	printf( "%s [-vh] [-d <level>] [-a <init>]\n", prg );
-	printf( "    -v Display version\n" );
-	printf( "    -d <level> Show debug information on stderr\n" );
-	printf( "    -h Usage (you're lookin' at it)\n" );
-	printf( "    -a Perform an action, being one of:\n" );
-	printf( "         init: Initialize a new, empty database\n" );
-	printf( "\n" );
-	return;
-}
-
-
-/*
- * Debug function, only output to stderr if the debug level is
- * equal or greated to the output level.
- *
- * 	level: The minimum debug level that must be set for the 
- * 	       line to be logged.
- * 	file:  The current code file that is emitting the log
- * 	line:  The line number of the code file that is emitting the log
- * 	... :  any printf style strings and formats that constitute the log message
- */
-void
-debug( int level, char *file, int line, const char *fmt, ... )
-{
-	if ( debugmode < level ) return;
-
-	char timestamp[20];
-	time_t t = time( NULL );
-	struct tm *now = localtime( &t );
-	strftime( timestamp, 20, "%F %T", now );
-
-	va_list args;
-	va_start( args, fmt );
-	fprintf( stderr, "%s [%s] #%d (%s:%04d): ", PROG, timestamp, getpid(), file, line );
-	vfprintf( stderr, fmt, args );
-	va_end( args );
-
-	return;
-}
-
--- a/volta.h	Sat Sep 03 14:12:06 2011 -0700
+++ b/volta.h	Tue Sep 13 22:13:02 2011 -0700
@@ -1,3 +1,32 @@
+/* vim: set noet nosta sw=4 ts=4 ft=c : */
+/*
+Copyright (c) 2011, Mahlon E. Smith <mahlon@martini.nu>
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of Mahlon E. Smith nor the names of his
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
 
 #ifndef _VOLTA_H
 #define _VOLTA_H
@@ -10,6 +39,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdarg.h>
+#include <errno.h>
 #include <string.h>
 #include <unistd.h>
 #include <time.h>
@@ -21,8 +51,11 @@
 
 #include <sqlite3.h>
 
+#ifdef DEBUG
+#include <google/profiler.h>
+#endif
 
-/* Maximum length per line from Squid */
+/* Default line size we accept from squid, longer lines (huge URLs?) malloc. */
 #define LINE_BUFSIZE 2048
 
 /* Aid debugging */
@@ -42,13 +75,23 @@
 	char   *kvpairs;
 } request;
 
-/* An "empty" request struct used for re-assignment */
+/* FIXME: An "empty" request struct used for re-assignment */
 static const struct request reset_request;
 
-/* Function prototypes */
+/*
+ *
+ * Function prototypes
+ *
+ */
+
 void usage( char *prg );
 void debug( int level, char *file, int line, const char *fmt, ... );
-int  parse( char *p );
+char *extend_line( char *line, const char *buf );
+
+int db_initialize( void );
+
+int accept_loop( void );
+struct request *parse( char *p );
 
 #endif