Get the ragel line parser properly tokenizing the input lines. Add a
"lines per second" timer. General cleanup and memory management.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/LICENSE Mon Oct 17 09:12:00 2011 -0700
@@ -0,0 +1,28 @@
+
+Copyright (c) 2011, Mahlon E. Smith <mahlon@martini.nu>
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of Mahlon E. Smith nor the names of his
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
--- a/Makefile Wed Sep 28 09:04:16 2011 -0700
+++ b/Makefile Mon Oct 17 09:12:00 2011 -0700
@@ -1,7 +1,7 @@
UNAME := $(shell uname)
DEPS = sqlite3
DEPS_DEBUG = sqlite3 libprofiler
-CFLAGS = $(shell pkg-config --cflags-only-I --libs-only-L $(DEPS)) -O2
+CFLAGS = -O2 -ansi $(shell pkg-config --cflags-only-I --libs-only-L $(DEPS))
LIBS = $(shell pkg-config --libs-only-l $(DEPS))
#OBJS = $(patsubst %.c,%.o,$(wildcard *.c)) parser.o
OBJS = accept_loop.o db.o main.o parser.o process.o util.o
@@ -31,10 +31,10 @@
# proftools doesn't currently register a .pc file on Ubuntu, hence these
# Makefile gymnastics
ifeq ($(UNAME), Linux)
-debug: CFLAGS = -ggdb -Wall -DDEBUG -DPROG='"volta (debugmode)"'
+debug: CFLAGS = -ggdb -ansi -Wall -DDEBUG -DPROG='"volta (debugmode)"'
debug: LIBS = -lsqlite3 -lprofiler
else
-debug: CFLAGS = -ggdb -Wall -DDEBUG -DPROG='"volta (debugmode)"'\
+debug: CFLAGS = -ggdb -ansi -Wall -DDEBUG -DPROG='"volta (debugmode)"'\
$(shell pkg-config --cflags-only-I --libs-only-L $(DEPS_DEBUG))
debug: LIBS = $(shell pkg-config --libs-only-l $(DEPS_DEBUG))
endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README Mon Oct 17 09:12:00 2011 -0700
@@ -0,0 +1,8 @@
+
+Volta
+=====
+
+Why "volta"?
+Configuring squid
+Using volta
+
--- a/accept_loop.c Wed Sep 28 09:04:16 2011 -0700
+++ b/accept_loop.c Mon Oct 17 09:12:00 2011 -0700
@@ -31,7 +31,7 @@
#include "volta.h"
/*
- * Accept lines from squid and pass to the parser.
+ * Accept and process lines from squid.
*/
int
accept_loop( void )
@@ -47,7 +47,6 @@
debug( 1, LOC, "Waiting for input...\n" );
while ( fgets( buf, LINE_BUFSIZE, stdin ) != NULL ) {
-
bufsize = strlen( buf );
/* Common case, or last iteration of loop:
@@ -55,18 +54,17 @@
*/
if ( bufsize + 1 < LINE_BUFSIZE ) {
/* line wasn't concatenated onto in previous loops,
- * just pass it directly to parse() */
+ * just pass it directly to process() */
if ( line == NULL ) {
- parse( buf );
+ process( buf );
}
/* memory was previously allocated to contain the line,
- * append the final chunk, pass to parse(), and cleanup. */
+ * append the final chunk, pass to process(), and cleanup. */
else {
if ( (line = extend_line( line, buf )) == NULL ) continue;
- parse( line );
- free( line );
- line = NULL;
+ process( line );
+ free( line ), line = NULL;
}
}
@@ -87,15 +85,15 @@
* (within the current line) that still need to be appended.
*/
if ( buf[ bufsize - 1 ] == '\n' ) {
- parse( line );
- free( line );
- line = NULL;
+ process( line );
+ free( line ), line = NULL;
}
}
}
/* stdin closed */
debug( 1, LOC, "End of stream\n" );
+ report_speed();
return( 0 );
}
--- a/db.c Wed Sep 28 09:04:16 2011 -0700
+++ b/db.c Mon Oct 17 09:12:00 2011 -0700
@@ -106,8 +106,7 @@
(i == 1 ? "initalizing" : "upgrading"), sqlite3_errmsg(v.db) );
return( sqlite3_errcode(v.db) );
}
- free( upgrade_sql );
- upgrade_sql = NULL;
+ free( upgrade_sql ), upgrade_sql = NULL;
/* update version metadata in DB if update was successful */
current_version = i;
--- a/main.c Wed Sep 28 09:04:16 2011 -0700
+++ b/main.c Mon Oct 17 09:12:00 2011 -0700
@@ -104,10 +104,16 @@
argc -= optind;
argv += optind;
+ /* set timer vars for lines/sec counter */
+ if ( v.debugmode > 2 ) {
+ v.timer.start = time( NULL );
+ v.timer.lines = 0;
+ }
+
/* get the initial database handle or bomb immediately. */
if ( db_attach() != SQLITE_OK ) exit( 1 );
/* enter stdin parsing loop */
- return accept_loop();
+ return( accept_loop() );
}
--- a/parser.rl Wed Sep 28 09:04:16 2011 -0700
+++ b/parser.rl Mon Oct 17 09:12:00 2011 -0700
@@ -28,122 +28,258 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/*
-Squid docs:
----------------------------------------------------------------------------
-TAG: url_rewrite_program
-Specify the location of the executable for the URL rewriter.
-Since they can perform almost any function there isn't one included.
-
-For each requested URL rewriter will receive on line with the format
-
-URL <SP> client_ip "/" fqdn <SP> user <SP> method [<SP> kvpairs]<NL>
-
-In the future, the rewriter interface will be extended with
-key=value pairs ("kvpairs" shown above). Rewriter programs
-should be prepared to receive and possibly ignore additional
-whitespace-separated tokens on each input line.
-
-And the rewriter may return a rewritten URL. The other components of
-the request line does not need to be returned (ignored if they are).
-
-The rewriter can also indicate that a client-side redirect should
-be performed to the new URL. This is done by prefixing the returned
-URL with "301:" (moved permanently) or 302: (moved temporarily).
-
-By default, a URL rewriter is not used.
----------------------------------------------------------------------------
-*/
-
#include "volta.h"
%%{
machine redirector;
- action parse_error {
- debug( 2, LOC, "parse error\n" );
- return( NULL );
- }
+ action success { valid = 1; }
+ action error { valid = 0; }
- action yay {
- printf( "I saw: %s", p+1 );
+ action channel_id_found {
+ debug( 1, LOC, "Channel ID found in redirector input. Set 'url_rewrite_concurrency' to '0' in squid.\n" );
+ fbreak;
}
- # http://, ftp://, https://, etc
- proto = alpha{3,5} . '://';
+ action scheme_start { p_request->tokens.scheme_start = fpc; }
+ action scheme_finish { p_request->tokens.scheme_length = fpc - ( *pe + p_request->tokens.scheme_start ); }
+ action scheme_error { debug( 3, LOC, "Unable to parse scheme.\n" ); }
+
+ action host_start { p_request->tokens.host_start = fpc; }
+ action host_finish { p_request->tokens.host_length = fpc - ( *pe + p_request->tokens.host_start ); }
+ action host_error { debug( 3, LOC, "Unable to parse hostname.\n" ); }
+
+ action port_start { p_request->tokens.port_start = fpc; }
+ action port_finish { p_request->tokens.port_length = fpc - ( *pe + p_request->tokens.port_start ); }
+
+ action path_start { p_request->tokens.path_start = fpc; }
+ action path_finish { p_request->tokens.path_length = fpc - ( *pe + p_request->tokens.path_start ); }
+
+ action meth_start { p_request->tokens.meth_start = fpc; }
+ action meth_finish { p_request->tokens.meth_length = fpc - ( *pe + p_request->tokens.meth_start ); }
+ action meth_error { debug( 3, LOC, "Unable to parse method.\n" ); }
+
+ action c_ip_start { p_request->tokens.c_ip_start = fpc; }
+ action c_ip_finish { p_request->tokens.c_ip_length = fpc - ( *pe + p_request->tokens.c_ip_start ); }
+ action c_ip_error { debug( 3, LOC, "Unable to parse the client IP address.\n" ); }
- # http://mahlon:password@example.com or http://mahlon@example.com
- # username optional password
- creds = ( alnum | [+._\-] )+ . ( ':' . any+ )? . '@';
+ #
+ # Squid line: URL <SP> client_ip "/" fqdn <SP> user <SP> method [<SP> kvpairs]<NL>
+ #
+ # URI Syntax (RFC 3986) misc notes:
+ #
+ # - Scheme isn't passed to redirectors on CONNECT method requests
+ #
+ # - Hostname segments aren't supposed to be individually greater than 63 chars,
+ # and the hostname in total shouldn't exceed 255. They also shouldn't be entirely
+ # made up of digits, or contain underscores. In practice, these rules appear to
+ # be violated constantly by some pretty big sites. I'm looking at you, facebook.
+ # (( alnum ) | ( alnum . [a-zA-Z0-9\-]{0,63} . alnum )) & !( digit+ );
+ #
+ # - ipv6 has some utterly insane rules (RFC 5952) in the name of "shortcuts", which
+ # only seem like shortcuts to someone writing IP addresses by hand. Anyone that
+ # has to parse (or even just read) them has a bunch of seemingly arbitrary work
+ # dumped in their lap. Heck, it's impossible to even search for an ipv6 address
+ # that contains zeros in a text editor, because you have no idea what how it might
+ # be represented. Rad!
+ #
+ # The parser just trusts any ipv6 squid hands us as being valid, without
+ # any real parsing/validation, other than it consists of hex digits and colons.
+ #
+ # - This parser originally validated path/query/fragment as well, but there were
+ # enough inconsistencies with unescaped chars and other real-life RFC deviations
+ # that I opted to just accept what we get from squid.
+ #
+ # - Redirectors aren't handed any userinfo (http://mahlon:password@example.com),
+ # so no need to check for that.
+ #
+
+ host_component = alnum | ( alnum [a-zA-Z0-9\-_]* alnum );
+ pchar = ( alnum | [\-._~!$%&'()*+,;=] );
+ path_segment = '/' ( any - space )*;
- main := ( proto . creds ) | proto @yay '\n';
+ hostname = host_component ( '.' host_component )* '.'?;
+ ipv4 = digit{1,3} '.' digit{1,3} '.' digit{1,3} '.' digit{1,3};
+ ipv6 = ( xdigit | ':' )+;
+
+ channel_id = ( digit+ space ) %channel_id_found;
+ scheme = ( alpha{3,5} '://' ) >scheme_start %scheme_finish @!scheme_error;
+ host = ( hostname | ipv4 ) >host_start %host_finish @!host_error;
+ port = ( ':' digit{1,5} ) >port_start %port_finish;
+ path = path_segment* >path_start %path_finish;
+ client_ip = ipv4 >c_ip_start %c_ip_finish @!c_ip_error;
+ method = upper+ >meth_start %meth_finish @!meth_error;
+
+ Line = (
+ start: (
+ channel_id? -> Url
+ ),
+
+ Url: (
+ scheme? host port? path? space -> Client
+ ),
+
+ Client: (
+ client_ip '/' ( hostname | '-' ) space -> User
+ ),
+
+ User: (
+ pchar+ space -> Method
+ ),
+
+ Method: (
+ method -> KVPairs
+ ),
+
+ KVPairs: (
+ ( space any+ )? -> final
+ )
+ ) %success @!error;
+
+
+ main := Line '\n';
}%%
%% write data;
-/*
-%%{
- machine redirector;
-
- action yay {
- printf( "I saw: %s", p+1 );
- }
-
- # http://, ftp://, https://, etc
- proto = alpha{3,5} . '://';
-
- # http://mahlon:password@example.com or http://mahlon@example.com
- # username optional password
- creds = ( alnum | [+._\-] )+ . ( ':' . any+ )? . '@';
-
- main := ( proto . creds ) | proto @yay '\n';
-}%%
-%% write data;
+/*
+ * Tokenize an incoming line from squid, returning a parsed and populated
+ * structure to make redirection decisions against. This pointer should
+ * be freed using cleanup_request() after use.
+ *
+ * Squid documentation about redirectors:
+ * ---------------------------------------------------------------------------
+ * TAG: url_rewrite_program
+ * Specify the location of the executable for the URL rewriter.
+ * Since they can perform almost any function there isn't one included.
+ *
+ * For each requested URL rewriter will receive on line with the format
+ *
+ * URL <SP> client_ip "/" fqdn <SP> user <SP> method [<SP> kvpairs]<NL>
+ *
+ * In the future, the rewriter interface will be extended with
+ * key=value pairs ("kvpairs" shown above). Rewriter programs
+ * should be prepared to receive and possibly ignore additional
+ * whitespace-separated tokens on each input line.
+ *
+ * And the rewriter may return a rewritten URL. The other components of
+ * the request line does not need to be returned (ignored if they are).
+ *
+ * The rewriter can also indicate that a client-side redirect should
+ * be performed to the new URL. This is done by prefixing the returned
+ * URL with "301:" (moved permanently) or 302: (moved temporarily).
+ *
+ * By default, a URL rewriter is not used.
+ * ---------------------------------------------------------------------------
*/
-
-
-/*
-%%{
- machine foo;
-
- OPEN = 0;
- CLOSE = 1;
+request *
+parse( char *line )
+{
+ /* machine required vars */
+ int cs = 0;
+ char *p = line;
+ char *pe = p + strlen(p);
+ char *eof = NULL;
- main :=
- start:
- door_closed: (
- OPEN -> door_open -> final
- ),
- door_open: (
- CLOSE -> door_closed
- );
-}%%
-*/
-
-struct request *
-parse( char *p )
-{
- /* initial machine state */
- short int cs = 0;
-
- /* the client request object */
- request c_request;
- request *cp_request = &c_request;
-
- /*
- char ip[ INET_ADDRSTRLEN ];
- inet_pton( AF_INET, "127.0.0.1", &cp_request->ip );
- inet_ntop( AF_INET, &cp_request->ip, ip, INET_ADDRSTRLEN );
- */
-
- /* initalize state machine with current line */
- char *pe = p + strlen(p) + 1;
+ /* the client request pointer */
+ unsigned char valid = 0;
+ request *p_request = init_request();
/* enter state machine */
%% write init;
%% write exec;
- /* reset the request */
- /* c_request = reset_request; */
- return( cp_request );
+ /* If we were given an invalid line, bail early */
+ if ( valid == 0 ) {
+ free( p_request ), p_request = NULL;
+ debug( 3, LOC, "Invalid line (%d), skipped\n", v.timer.lines + 1 );
+ debug( 4, LOC, "%s", line );
+ return( NULL );
+ }
+
+ (void)populate_request( p_request );
+ return( p_request );
}
+
+/*
+ * Initialize and return a pointer to a new request object.
+ *
+ */
+request *
+init_request( void )
+{
+ request *p_request = NULL;
+ if ( (p_request = malloc( sizeof(request) )) == NULL ) {
+ debug( 1, LOC, "Unable to allocate memory for request struct: %s\n", strerror(errno) );
+ return( NULL );
+ }
+ p_request->scheme = NULL;
+ p_request->host = NULL;
+ p_request->port = NULL;
+ p_request->path = NULL;
+ p_request->user = NULL;
+ p_request->method = NULL;
+ p_request->client_ip = NULL;
+
+ p_request->tokens.scheme_start = NULL;
+ p_request->tokens.host_start = NULL;
+ p_request->tokens.port_start = NULL;
+ p_request->tokens.path_start = NULL;
+ p_request->tokens.meth_start = NULL;
+ p_request->tokens.c_ip_start = NULL;
+ p_request->tokens.scheme_length = 0;
+ p_request->tokens.host_length = 0;
+ p_request->tokens.port_length = 0;
+ p_request->tokens.path_length = 0;
+ p_request->tokens.meth_length = 0;
+ p_request->tokens.c_ip_length = 0;
+
+ return p_request;
+}
+
+
+/*
+ * Take the previously parsed token locations and copy them into the request struct.
+ *
+ */
+void
+populate_request( struct request *p_request )
+{
+ p_request->scheme =
+ copy_string_token( p_request->tokens.scheme_start, p_request->tokens.scheme_length );
+ p_request->host =
+ copy_string_token( p_request->tokens.host_start, p_request->tokens.host_length );
+ p_request->port =
+ copy_string_token( p_request->tokens.port_start, p_request->tokens.port_length );
+ p_request->path =
+ copy_string_token( p_request->tokens.path_start, p_request->tokens.path_length );
+ p_request->method =
+ copy_string_token( p_request->tokens.meth_start, p_request->tokens.meth_length );
+ p_request->client_ip =
+ copy_ipv4_token( p_request->tokens.c_ip_start, p_request->tokens.c_ip_length );
+
+ return;
+}
+
+
+/*
+ * Release memory used by request struct.
+ *
+ */
+void
+cleanup_request( struct request *p_request )
+{
+ if ( p_request == NULL ) return;
+
+ free( p_request->scheme );
+ free( p_request->host );
+ free( p_request->port );
+ free( p_request->path );
+ free( p_request->method );
+ free( p_request->client_ip );
+
+ free( p_request ), p_request = NULL;
+
+ return;
+}
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/process.c Mon Oct 17 09:12:00 2011 -0700
@@ -0,0 +1,56 @@
+/* vim: set noet nosta sw=4 ts=4 ft=c : */
+/*
+Copyright (c) 2011, Mahlon E. Smith <mahlon@martini.nu>
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of Mahlon E. Smith nor the names of his
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "volta.h"
+#include "db.h"
+
+void
+process( char *line )
+{
+ request *p_request = parse( line );
+
+ /* count lines in debugmode */
+ if ( v.debugmode > 2 ) v.timer.lines++;
+
+ /* If parsing failed for some reason, return a blank line to squid. */
+ if ( p_request == NULL ) {
+ printf( "\n" );
+ return;
+ }
+
+ printf( "* %s", line );
+ printf( "%s%s%s%s\n\n", p_request->scheme, p_request->host, p_request->port, p_request->path );
+
+ /* TODO: everything */
+
+ cleanup_request( p_request );
+ return;
+}
+
--- a/sql/1.sql Wed Sep 28 09:04:16 2011 -0700
+++ b/sql/1.sql Mon Oct 17 09:12:00 2011 -0700
@@ -1,3 +1,5 @@
+--- vim: set noet nosta sw=4 ts=4 ft=sql:
+
BEGIN;
DROP TABLE IF EXISTS rules;
--- a/util.c Wed Sep 28 09:04:16 2011 -0700
+++ b/util.c Mon Oct 17 09:12:00 2011 -0700
@@ -31,6 +31,7 @@
#include "volta.h"
+
/*
* Output basic usage information.
*/
@@ -69,8 +70,8 @@
va_list args;
va_start( args, fmt );
- fprintf( stderr, "%s [%s] #%d (%s:%04d): ",
- PROG, timestamp, getpid(), file, line );
+ fprintf( stderr, "%s [%s] #%d %d (%s:%d): ",
+ PROG, timestamp, getpid(), level, file, line );
vfprintf( stderr, fmt, args );
va_end( args );
@@ -88,7 +89,6 @@
extend_line( char *line, const char *buf )
{
char *line_realloc;
-
unsigned short int offset;
size_t new_len;
@@ -103,12 +103,12 @@
new_len = offset + LINE_BUFSIZE;
}
- debug( 3, LOC, "Extending line to %d bytes at offset %d...\n", new_len, offset );
- if ( (line_realloc = realloc(line, sizeof(char) * new_len)) == NULL ) {
- /* cleanup on allocation errors */
- debug( 3, LOC, "Ignoring line, error while allocating memory: %s\n", strerror(errno) );
- if ( line != NULL ) free( line );
- line = NULL;
+ debug( 4, LOC, "Extending line %d to %d bytes at offset %d\n", v.timer.lines+1, new_len, offset );
+ if ( new_len > LINE_MAX || (line_realloc = realloc(line, sizeof(char) * new_len)) == NULL ) {
+ debug( 1, LOC, "Ignoring line, error while allocating memory: %s\n",
+ (line_realloc == NULL ? strerror(errno) : "Line too large") );
+ if ( line != NULL ) free( line ), line = NULL;
+ printf( "\n" );
}
else {
line = line_realloc;
@@ -120,7 +120,7 @@
/*
- * Read an entire file into memory, returning a pointer the contents.
+ * Read an entire file into memory, returning a pointer to the contents.
* Returns NULL on error.
*
*/
@@ -149,7 +149,7 @@
return( NULL );
}
- if ( fread( contents, sizeof(char), sb.st_size, fh ) != sb.st_size ) {
+ if ( fread( contents, sizeof(char), sb.st_size, fh ) != (unsigned int)sb.st_size ) {
debug( 1, LOC, "Short read for file '%s'?: %s\n", file );
fclose( fh );
return( NULL );
@@ -160,4 +160,79 @@
}
+/*
+ * Allocate memory and copy +length+ bytes (plus 1 for null) from the given
+ * +string+ into a new string, returning a pointer to it.
+ *
+ */
+char *
+copy_string_token( char *string, unsigned short int length )
+{
+ char *alloc_ptr = NULL;
+ if ( string == NULL || length == 0 ) return ( NULL );
+ if ( (alloc_ptr = calloc( length + 1, sizeof(char) )) == NULL ) {
+ debug( 1, LOC, "Unable to allocate memory for token: %s\n", strerror(errno) );
+ return( NULL );
+ }
+
+ (void)memcpy( alloc_ptr, string, length );
+
+ return( alloc_ptr );
+}
+
+
+/*
+ * Allocate memory and copy +length+ bytes from the given dotted quad style
+ * +ip_string+ into an in_addr struct, returning a pointer to it.
+ *
+ */
+struct in_addr *
+copy_ipv4_token( char *ip_string, unsigned short int length )
+{
+ struct in_addr *alloc_ptr = NULL;
+ char c_ip[ INET_ADDRSTRLEN ];
+
+ if ( ip_string == NULL ) return ( NULL );
+
+ (void)strncpy( c_ip, ip_string, length );
+ c_ip[ length ] = '\0';
+
+ if ( (alloc_ptr = calloc( length, sizeof(struct in_addr) )) == NULL ) {
+ debug( 1, LOC, "Unable to allocate memory for ip '%s': %s\n",
+ c_ip, strerror(errno) );
+ }
+
+ if ( inet_pton( AF_INET, c_ip, alloc_ptr ) < 1 ) {
+ debug( 1, LOC, "Unable to create in_addr struct for client ip '%s': %s\n",
+ c_ip, strerror(errno) );
+ free( alloc_ptr ), alloc_ptr = NULL;
+ }
+
+ return( alloc_ptr );
+}
+
+
+/*
+ * Report how many lines were processed per second.
+ *
+ */
+void
+report_speed( void )
+{
+ if ( v.debugmode < 3 ) return;
+
+ time_t end_time = time( NULL );
+ double elapsed = difftime( end_time, v.timer.start );
+
+ if ( elapsed > 0 ) {
+ debug( 3, LOC, "Processed %lu lines in %0.1f seconds. (%0.1f lines/sec)\n",
+ v.timer.lines, elapsed, v.timer.lines/elapsed );
+ }
+ else {
+ debug( 3, LOC, "Processed %lu lines in under a second.\n", v.timer.lines );
+ }
+
+ return;
+}
+
--- a/volta.h Wed Sep 28 09:04:16 2011 -0700
+++ b/volta.h Mon Oct 17 09:12:00 2011 -0700
@@ -44,6 +44,7 @@
#include <unistd.h>
#include <time.h>
#include <sys/stat.h>
+#include <signal.h>
#include <sys/types.h>
#include <sys/socket.h>
@@ -56,28 +57,60 @@
/* Default line size we accept from squid, longer lines (huge URLs?) malloc. */
#define LINE_BUFSIZE 2048
+/* Ceiling for how many bytes can be allocated at one for a single line. */
+#define LINE_MAX 256000 /* 250k */
+
+/* Redirect types */
+#define REDIR_TEMPORARY 0
+#define REDIR_PERMANENT 1
+#define REDIR_TRANSPARENT 2
/* Aid debugging */
#define LOC __FILE__, __LINE__
-/* a global struct for easy access to common vars */
+/*
+ * a global struct for easy access to common vars
+ *
+ */
struct v_globals {
unsigned short int debugmode; /* debug level */
char dbname[128]; /* path to database file */
struct sqlite3 *db; /* database handle */
+
+ struct {
+ time_t start; /* start time */
+ unsigned long int lines; /* line count for determining speed */
+ } timer;
};
extern struct v_globals v; /* defined in main.c */
-/* The parsed attributes from the request line, as given to us by squid.
- * URL <SP> client_ip "/" fqdn <SP> user <SP> method [<SP> kvpairs]<NL> */
+/*
+ * The parsed attributes from the request line, as given to us by squid.
+ *
+ */
typedef struct request {
- char *url;
+ char *scheme;
char *host;
- struct sockaddr_in ip;
- char *ip_fqdn;
+ char *path;
+ char *port;
+ struct in_addr *client_ip;
char *user;
char *method;
- char *kvpairs;
+
+ struct {
+ char *scheme_start;
+ char *host_start;
+ char *port_start;
+ char *path_start;
+ char *meth_start;
+ char *c_ip_start;
+ unsigned short int scheme_length;
+ unsigned short int host_length;
+ unsigned short int port_length;
+ unsigned short int path_length;
+ unsigned short int meth_length;
+ unsigned short int c_ip_length;
+ } tokens;
} request;
/*
@@ -87,13 +120,20 @@
*/
int getopt( int, char * const [], const char *);
-void usage( char *prg );
-void debug( int level, char *file, int line, const char *fmt, ... );
-char *slurp_file( char *file );
-char *extend_line( char *line, const char *buf );
+void usage( char * );
+void debug( int, char *, int, const char *, ... );
+void report_speed( void );
+char *slurp_file( char * );
+char *extend_line( char *, const char * );
+char *copy_string_token( char *, unsigned short int );
+struct in_addr *copy_ipv4_token( char *, unsigned short int );
int accept_loop( void );
struct request *parse( char *p );
+void process( char * );
+struct request *parse( char * );
+void populate_request( struct request * );
+void cleanup_request( struct request * );
#endif