# HG changeset patch # User Mahlon E. Smith # Date 1318867920 25200 # Node ID d0730945028573c1dea41a442b7b1f5cc52ae7f8 # Parent bdf20e6eefd778bd1728f568881be4c5c15b242f Get the ragel line parser properly tokenizing the input lines. Add a "lines per second" timer. General cleanup and memory management. diff -r bdf20e6eefd7 -r d07309450285 LICENSE --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/LICENSE Mon Oct 17 09:12:00 2011 -0700 @@ -0,0 +1,28 @@ + +Copyright (c) 2011, Mahlon E. Smith +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Mahlon E. Smith nor the names of his + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff -r bdf20e6eefd7 -r d07309450285 Makefile --- a/Makefile Wed Sep 28 09:04:16 2011 -0700 +++ b/Makefile Mon Oct 17 09:12:00 2011 -0700 @@ -1,7 +1,7 @@ UNAME := $(shell uname) DEPS = sqlite3 DEPS_DEBUG = sqlite3 libprofiler -CFLAGS = $(shell pkg-config --cflags-only-I --libs-only-L $(DEPS)) -O2 +CFLAGS = -O2 -ansi $(shell pkg-config --cflags-only-I --libs-only-L $(DEPS)) LIBS = $(shell pkg-config --libs-only-l $(DEPS)) #OBJS = $(patsubst %.c,%.o,$(wildcard *.c)) parser.o OBJS = accept_loop.o db.o main.o parser.o process.o util.o @@ -31,10 +31,10 @@ # proftools doesn't currently register a .pc file on Ubuntu, hence these # Makefile gymnastics ifeq ($(UNAME), Linux) -debug: CFLAGS = -ggdb -Wall -DDEBUG -DPROG='"volta (debugmode)"' +debug: CFLAGS = -ggdb -ansi -Wall -DDEBUG -DPROG='"volta (debugmode)"' debug: LIBS = -lsqlite3 -lprofiler else -debug: CFLAGS = -ggdb -Wall -DDEBUG -DPROG='"volta (debugmode)"'\ +debug: CFLAGS = -ggdb -ansi -Wall -DDEBUG -DPROG='"volta (debugmode)"'\ $(shell pkg-config --cflags-only-I --libs-only-L $(DEPS_DEBUG)) debug: LIBS = $(shell pkg-config --libs-only-l $(DEPS_DEBUG)) endif diff -r bdf20e6eefd7 -r d07309450285 README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README Mon Oct 17 09:12:00 2011 -0700 @@ -0,0 +1,8 @@ + +Volta +===== + +Why "volta"? +Configuring squid +Using volta + diff -r bdf20e6eefd7 -r d07309450285 accept_loop.c --- a/accept_loop.c Wed Sep 28 09:04:16 2011 -0700 +++ b/accept_loop.c Mon Oct 17 09:12:00 2011 -0700 @@ -31,7 +31,7 @@ #include "volta.h" /* - * Accept lines from squid and pass to the parser. + * Accept and process lines from squid. */ int accept_loop( void ) @@ -47,7 +47,6 @@ debug( 1, LOC, "Waiting for input...\n" ); while ( fgets( buf, LINE_BUFSIZE, stdin ) != NULL ) { - bufsize = strlen( buf ); /* Common case, or last iteration of loop: @@ -55,18 +54,17 @@ */ if ( bufsize + 1 < LINE_BUFSIZE ) { /* line wasn't concatenated onto in previous loops, - * just pass it directly to parse() */ + * just pass it directly to process() */ if ( line == NULL ) { - parse( buf ); + process( buf ); } /* memory was previously allocated to contain the line, - * append the final chunk, pass to parse(), and cleanup. */ + * append the final chunk, pass to process(), and cleanup. */ else { if ( (line = extend_line( line, buf )) == NULL ) continue; - parse( line ); - free( line ); - line = NULL; + process( line ); + free( line ), line = NULL; } } @@ -87,15 +85,15 @@ * (within the current line) that still need to be appended. */ if ( buf[ bufsize - 1 ] == '\n' ) { - parse( line ); - free( line ); - line = NULL; + process( line ); + free( line ), line = NULL; } } } /* stdin closed */ debug( 1, LOC, "End of stream\n" ); + report_speed(); return( 0 ); } diff -r bdf20e6eefd7 -r d07309450285 db.c --- a/db.c Wed Sep 28 09:04:16 2011 -0700 +++ b/db.c Mon Oct 17 09:12:00 2011 -0700 @@ -106,8 +106,7 @@ (i == 1 ? "initalizing" : "upgrading"), sqlite3_errmsg(v.db) ); return( sqlite3_errcode(v.db) ); } - free( upgrade_sql ); - upgrade_sql = NULL; + free( upgrade_sql ), upgrade_sql = NULL; /* update version metadata in DB if update was successful */ current_version = i; diff -r bdf20e6eefd7 -r d07309450285 main.c --- a/main.c Wed Sep 28 09:04:16 2011 -0700 +++ b/main.c Mon Oct 17 09:12:00 2011 -0700 @@ -104,10 +104,16 @@ argc -= optind; argv += optind; + /* set timer vars for lines/sec counter */ + if ( v.debugmode > 2 ) { + v.timer.start = time( NULL ); + v.timer.lines = 0; + } + /* get the initial database handle or bomb immediately. */ if ( db_attach() != SQLITE_OK ) exit( 1 ); /* enter stdin parsing loop */ - return accept_loop(); + return( accept_loop() ); } diff -r bdf20e6eefd7 -r d07309450285 parser.rl --- a/parser.rl Wed Sep 28 09:04:16 2011 -0700 +++ b/parser.rl Mon Oct 17 09:12:00 2011 -0700 @@ -28,122 +28,258 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* -Squid docs: ---------------------------------------------------------------------------- -TAG: url_rewrite_program -Specify the location of the executable for the URL rewriter. -Since they can perform almost any function there isn't one included. - -For each requested URL rewriter will receive on line with the format - -URL client_ip "/" fqdn user method [ kvpairs] - -In the future, the rewriter interface will be extended with -key=value pairs ("kvpairs" shown above). Rewriter programs -should be prepared to receive and possibly ignore additional -whitespace-separated tokens on each input line. - -And the rewriter may return a rewritten URL. The other components of -the request line does not need to be returned (ignored if they are). - -The rewriter can also indicate that a client-side redirect should -be performed to the new URL. This is done by prefixing the returned -URL with "301:" (moved permanently) or 302: (moved temporarily). - -By default, a URL rewriter is not used. ---------------------------------------------------------------------------- -*/ - #include "volta.h" %%{ machine redirector; - action parse_error { - debug( 2, LOC, "parse error\n" ); - return( NULL ); - } + action success { valid = 1; } + action error { valid = 0; } - action yay { - printf( "I saw: %s", p+1 ); + action channel_id_found { + debug( 1, LOC, "Channel ID found in redirector input. Set 'url_rewrite_concurrency' to '0' in squid.\n" ); + fbreak; } - # http://, ftp://, https://, etc - proto = alpha{3,5} . '://'; + action scheme_start { p_request->tokens.scheme_start = fpc; } + action scheme_finish { p_request->tokens.scheme_length = fpc - ( *pe + p_request->tokens.scheme_start ); } + action scheme_error { debug( 3, LOC, "Unable to parse scheme.\n" ); } + + action host_start { p_request->tokens.host_start = fpc; } + action host_finish { p_request->tokens.host_length = fpc - ( *pe + p_request->tokens.host_start ); } + action host_error { debug( 3, LOC, "Unable to parse hostname.\n" ); } + + action port_start { p_request->tokens.port_start = fpc; } + action port_finish { p_request->tokens.port_length = fpc - ( *pe + p_request->tokens.port_start ); } + + action path_start { p_request->tokens.path_start = fpc; } + action path_finish { p_request->tokens.path_length = fpc - ( *pe + p_request->tokens.path_start ); } + + action meth_start { p_request->tokens.meth_start = fpc; } + action meth_finish { p_request->tokens.meth_length = fpc - ( *pe + p_request->tokens.meth_start ); } + action meth_error { debug( 3, LOC, "Unable to parse method.\n" ); } + + action c_ip_start { p_request->tokens.c_ip_start = fpc; } + action c_ip_finish { p_request->tokens.c_ip_length = fpc - ( *pe + p_request->tokens.c_ip_start ); } + action c_ip_error { debug( 3, LOC, "Unable to parse the client IP address.\n" ); } - # http://mahlon:password@example.com or http://mahlon@example.com - # username optional password - creds = ( alnum | [+._\-] )+ . ( ':' . any+ )? . '@'; + # + # Squid line: URL client_ip "/" fqdn user method [ kvpairs] + # + # URI Syntax (RFC 3986) misc notes: + # + # - Scheme isn't passed to redirectors on CONNECT method requests + # + # - Hostname segments aren't supposed to be individually greater than 63 chars, + # and the hostname in total shouldn't exceed 255. They also shouldn't be entirely + # made up of digits, or contain underscores. In practice, these rules appear to + # be violated constantly by some pretty big sites. I'm looking at you, facebook. + # (( alnum ) | ( alnum . [a-zA-Z0-9\-]{0,63} . alnum )) & !( digit+ ); + # + # - ipv6 has some utterly insane rules (RFC 5952) in the name of "shortcuts", which + # only seem like shortcuts to someone writing IP addresses by hand. Anyone that + # has to parse (or even just read) them has a bunch of seemingly arbitrary work + # dumped in their lap. Heck, it's impossible to even search for an ipv6 address + # that contains zeros in a text editor, because you have no idea what how it might + # be represented. Rad! + # + # The parser just trusts any ipv6 squid hands us as being valid, without + # any real parsing/validation, other than it consists of hex digits and colons. + # + # - This parser originally validated path/query/fragment as well, but there were + # enough inconsistencies with unescaped chars and other real-life RFC deviations + # that I opted to just accept what we get from squid. + # + # - Redirectors aren't handed any userinfo (http://mahlon:password@example.com), + # so no need to check for that. + # + + host_component = alnum | ( alnum [a-zA-Z0-9\-_]* alnum ); + pchar = ( alnum | [\-._~!$%&'()*+,;=] ); + path_segment = '/' ( any - space )*; - main := ( proto . creds ) | proto @yay '\n'; + hostname = host_component ( '.' host_component )* '.'?; + ipv4 = digit{1,3} '.' digit{1,3} '.' digit{1,3} '.' digit{1,3}; + ipv6 = ( xdigit | ':' )+; + + channel_id = ( digit+ space ) %channel_id_found; + scheme = ( alpha{3,5} '://' ) >scheme_start %scheme_finish @!scheme_error; + host = ( hostname | ipv4 ) >host_start %host_finish @!host_error; + port = ( ':' digit{1,5} ) >port_start %port_finish; + path = path_segment* >path_start %path_finish; + client_ip = ipv4 >c_ip_start %c_ip_finish @!c_ip_error; + method = upper+ >meth_start %meth_finish @!meth_error; + + Line = ( + start: ( + channel_id? -> Url + ), + + Url: ( + scheme? host port? path? space -> Client + ), + + Client: ( + client_ip '/' ( hostname | '-' ) space -> User + ), + + User: ( + pchar+ space -> Method + ), + + Method: ( + method -> KVPairs + ), + + KVPairs: ( + ( space any+ )? -> final + ) + ) %success @!error; + + + main := Line '\n'; }%% %% write data; -/* -%%{ - machine redirector; - - action yay { - printf( "I saw: %s", p+1 ); - } - - # http://, ftp://, https://, etc - proto = alpha{3,5} . '://'; - - # http://mahlon:password@example.com or http://mahlon@example.com - # username optional password - creds = ( alnum | [+._\-] )+ . ( ':' . any+ )? . '@'; - - main := ( proto . creds ) | proto @yay '\n'; -}%% -%% write data; +/* + * Tokenize an incoming line from squid, returning a parsed and populated + * structure to make redirection decisions against. This pointer should + * be freed using cleanup_request() after use. + * + * Squid documentation about redirectors: + * --------------------------------------------------------------------------- + * TAG: url_rewrite_program + * Specify the location of the executable for the URL rewriter. + * Since they can perform almost any function there isn't one included. + * + * For each requested URL rewriter will receive on line with the format + * + * URL client_ip "/" fqdn user method [ kvpairs] + * + * In the future, the rewriter interface will be extended with + * key=value pairs ("kvpairs" shown above). Rewriter programs + * should be prepared to receive and possibly ignore additional + * whitespace-separated tokens on each input line. + * + * And the rewriter may return a rewritten URL. The other components of + * the request line does not need to be returned (ignored if they are). + * + * The rewriter can also indicate that a client-side redirect should + * be performed to the new URL. This is done by prefixing the returned + * URL with "301:" (moved permanently) or 302: (moved temporarily). + * + * By default, a URL rewriter is not used. + * --------------------------------------------------------------------------- */ - - -/* -%%{ - machine foo; - - OPEN = 0; - CLOSE = 1; +request * +parse( char *line ) +{ + /* machine required vars */ + int cs = 0; + char *p = line; + char *pe = p + strlen(p); + char *eof = NULL; - main := - start: - door_closed: ( - OPEN -> door_open -> final - ), - door_open: ( - CLOSE -> door_closed - ); -}%% -*/ - -struct request * -parse( char *p ) -{ - /* initial machine state */ - short int cs = 0; - - /* the client request object */ - request c_request; - request *cp_request = &c_request; - - /* - char ip[ INET_ADDRSTRLEN ]; - inet_pton( AF_INET, "127.0.0.1", &cp_request->ip ); - inet_ntop( AF_INET, &cp_request->ip, ip, INET_ADDRSTRLEN ); - */ - - /* initalize state machine with current line */ - char *pe = p + strlen(p) + 1; + /* the client request pointer */ + unsigned char valid = 0; + request *p_request = init_request(); /* enter state machine */ %% write init; %% write exec; - /* reset the request */ - /* c_request = reset_request; */ - return( cp_request ); + /* If we were given an invalid line, bail early */ + if ( valid == 0 ) { + free( p_request ), p_request = NULL; + debug( 3, LOC, "Invalid line (%d), skipped\n", v.timer.lines + 1 ); + debug( 4, LOC, "%s", line ); + return( NULL ); + } + + (void)populate_request( p_request ); + return( p_request ); } + +/* + * Initialize and return a pointer to a new request object. + * + */ +request * +init_request( void ) +{ + request *p_request = NULL; + if ( (p_request = malloc( sizeof(request) )) == NULL ) { + debug( 1, LOC, "Unable to allocate memory for request struct: %s\n", strerror(errno) ); + return( NULL ); + } + p_request->scheme = NULL; + p_request->host = NULL; + p_request->port = NULL; + p_request->path = NULL; + p_request->user = NULL; + p_request->method = NULL; + p_request->client_ip = NULL; + + p_request->tokens.scheme_start = NULL; + p_request->tokens.host_start = NULL; + p_request->tokens.port_start = NULL; + p_request->tokens.path_start = NULL; + p_request->tokens.meth_start = NULL; + p_request->tokens.c_ip_start = NULL; + p_request->tokens.scheme_length = 0; + p_request->tokens.host_length = 0; + p_request->tokens.port_length = 0; + p_request->tokens.path_length = 0; + p_request->tokens.meth_length = 0; + p_request->tokens.c_ip_length = 0; + + return p_request; +} + + +/* + * Take the previously parsed token locations and copy them into the request struct. + * + */ +void +populate_request( struct request *p_request ) +{ + p_request->scheme = + copy_string_token( p_request->tokens.scheme_start, p_request->tokens.scheme_length ); + p_request->host = + copy_string_token( p_request->tokens.host_start, p_request->tokens.host_length ); + p_request->port = + copy_string_token( p_request->tokens.port_start, p_request->tokens.port_length ); + p_request->path = + copy_string_token( p_request->tokens.path_start, p_request->tokens.path_length ); + p_request->method = + copy_string_token( p_request->tokens.meth_start, p_request->tokens.meth_length ); + p_request->client_ip = + copy_ipv4_token( p_request->tokens.c_ip_start, p_request->tokens.c_ip_length ); + + return; +} + + +/* + * Release memory used by request struct. + * + */ +void +cleanup_request( struct request *p_request ) +{ + if ( p_request == NULL ) return; + + free( p_request->scheme ); + free( p_request->host ); + free( p_request->port ); + free( p_request->path ); + free( p_request->method ); + free( p_request->client_ip ); + + free( p_request ), p_request = NULL; + + return; +} + diff -r bdf20e6eefd7 -r d07309450285 process.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/process.c Mon Oct 17 09:12:00 2011 -0700 @@ -0,0 +1,56 @@ +/* vim: set noet nosta sw=4 ts=4 ft=c : */ +/* +Copyright (c) 2011, Mahlon E. Smith +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Mahlon E. Smith nor the names of his + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "volta.h" +#include "db.h" + +void +process( char *line ) +{ + request *p_request = parse( line ); + + /* count lines in debugmode */ + if ( v.debugmode > 2 ) v.timer.lines++; + + /* If parsing failed for some reason, return a blank line to squid. */ + if ( p_request == NULL ) { + printf( "\n" ); + return; + } + + printf( "* %s", line ); + printf( "%s%s%s%s\n\n", p_request->scheme, p_request->host, p_request->port, p_request->path ); + + /* TODO: everything */ + + cleanup_request( p_request ); + return; +} + diff -r bdf20e6eefd7 -r d07309450285 sql/1.sql --- a/sql/1.sql Wed Sep 28 09:04:16 2011 -0700 +++ b/sql/1.sql Mon Oct 17 09:12:00 2011 -0700 @@ -1,3 +1,5 @@ +--- vim: set noet nosta sw=4 ts=4 ft=sql: + BEGIN; DROP TABLE IF EXISTS rules; diff -r bdf20e6eefd7 -r d07309450285 util.c --- a/util.c Wed Sep 28 09:04:16 2011 -0700 +++ b/util.c Mon Oct 17 09:12:00 2011 -0700 @@ -31,6 +31,7 @@ #include "volta.h" + /* * Output basic usage information. */ @@ -69,8 +70,8 @@ va_list args; va_start( args, fmt ); - fprintf( stderr, "%s [%s] #%d (%s:%04d): ", - PROG, timestamp, getpid(), file, line ); + fprintf( stderr, "%s [%s] #%d %d (%s:%d): ", + PROG, timestamp, getpid(), level, file, line ); vfprintf( stderr, fmt, args ); va_end( args ); @@ -88,7 +89,6 @@ extend_line( char *line, const char *buf ) { char *line_realloc; - unsigned short int offset; size_t new_len; @@ -103,12 +103,12 @@ new_len = offset + LINE_BUFSIZE; } - debug( 3, LOC, "Extending line to %d bytes at offset %d...\n", new_len, offset ); - if ( (line_realloc = realloc(line, sizeof(char) * new_len)) == NULL ) { - /* cleanup on allocation errors */ - debug( 3, LOC, "Ignoring line, error while allocating memory: %s\n", strerror(errno) ); - if ( line != NULL ) free( line ); - line = NULL; + debug( 4, LOC, "Extending line %d to %d bytes at offset %d\n", v.timer.lines+1, new_len, offset ); + if ( new_len > LINE_MAX || (line_realloc = realloc(line, sizeof(char) * new_len)) == NULL ) { + debug( 1, LOC, "Ignoring line, error while allocating memory: %s\n", + (line_realloc == NULL ? strerror(errno) : "Line too large") ); + if ( line != NULL ) free( line ), line = NULL; + printf( "\n" ); } else { line = line_realloc; @@ -120,7 +120,7 @@ /* - * Read an entire file into memory, returning a pointer the contents. + * Read an entire file into memory, returning a pointer to the contents. * Returns NULL on error. * */ @@ -149,7 +149,7 @@ return( NULL ); } - if ( fread( contents, sizeof(char), sb.st_size, fh ) != sb.st_size ) { + if ( fread( contents, sizeof(char), sb.st_size, fh ) != (unsigned int)sb.st_size ) { debug( 1, LOC, "Short read for file '%s'?: %s\n", file ); fclose( fh ); return( NULL ); @@ -160,4 +160,79 @@ } +/* + * Allocate memory and copy +length+ bytes (plus 1 for null) from the given + * +string+ into a new string, returning a pointer to it. + * + */ +char * +copy_string_token( char *string, unsigned short int length ) +{ + char *alloc_ptr = NULL; + if ( string == NULL || length == 0 ) return ( NULL ); + if ( (alloc_ptr = calloc( length + 1, sizeof(char) )) == NULL ) { + debug( 1, LOC, "Unable to allocate memory for token: %s\n", strerror(errno) ); + return( NULL ); + } + + (void)memcpy( alloc_ptr, string, length ); + + return( alloc_ptr ); +} + + +/* + * Allocate memory and copy +length+ bytes from the given dotted quad style + * +ip_string+ into an in_addr struct, returning a pointer to it. + * + */ +struct in_addr * +copy_ipv4_token( char *ip_string, unsigned short int length ) +{ + struct in_addr *alloc_ptr = NULL; + char c_ip[ INET_ADDRSTRLEN ]; + + if ( ip_string == NULL ) return ( NULL ); + + (void)strncpy( c_ip, ip_string, length ); + c_ip[ length ] = '\0'; + + if ( (alloc_ptr = calloc( length, sizeof(struct in_addr) )) == NULL ) { + debug( 1, LOC, "Unable to allocate memory for ip '%s': %s\n", + c_ip, strerror(errno) ); + } + + if ( inet_pton( AF_INET, c_ip, alloc_ptr ) < 1 ) { + debug( 1, LOC, "Unable to create in_addr struct for client ip '%s': %s\n", + c_ip, strerror(errno) ); + free( alloc_ptr ), alloc_ptr = NULL; + } + + return( alloc_ptr ); +} + + +/* + * Report how many lines were processed per second. + * + */ +void +report_speed( void ) +{ + if ( v.debugmode < 3 ) return; + + time_t end_time = time( NULL ); + double elapsed = difftime( end_time, v.timer.start ); + + if ( elapsed > 0 ) { + debug( 3, LOC, "Processed %lu lines in %0.1f seconds. (%0.1f lines/sec)\n", + v.timer.lines, elapsed, v.timer.lines/elapsed ); + } + else { + debug( 3, LOC, "Processed %lu lines in under a second.\n", v.timer.lines ); + } + + return; +} + diff -r bdf20e6eefd7 -r d07309450285 volta.h --- a/volta.h Wed Sep 28 09:04:16 2011 -0700 +++ b/volta.h Mon Oct 17 09:12:00 2011 -0700 @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -56,28 +57,60 @@ /* Default line size we accept from squid, longer lines (huge URLs?) malloc. */ #define LINE_BUFSIZE 2048 +/* Ceiling for how many bytes can be allocated at one for a single line. */ +#define LINE_MAX 256000 /* 250k */ + +/* Redirect types */ +#define REDIR_TEMPORARY 0 +#define REDIR_PERMANENT 1 +#define REDIR_TRANSPARENT 2 /* Aid debugging */ #define LOC __FILE__, __LINE__ -/* a global struct for easy access to common vars */ +/* + * a global struct for easy access to common vars + * + */ struct v_globals { unsigned short int debugmode; /* debug level */ char dbname[128]; /* path to database file */ struct sqlite3 *db; /* database handle */ + + struct { + time_t start; /* start time */ + unsigned long int lines; /* line count for determining speed */ + } timer; }; extern struct v_globals v; /* defined in main.c */ -/* The parsed attributes from the request line, as given to us by squid. - * URL client_ip "/" fqdn user method [ kvpairs] */ +/* + * The parsed attributes from the request line, as given to us by squid. + * + */ typedef struct request { - char *url; + char *scheme; char *host; - struct sockaddr_in ip; - char *ip_fqdn; + char *path; + char *port; + struct in_addr *client_ip; char *user; char *method; - char *kvpairs; + + struct { + char *scheme_start; + char *host_start; + char *port_start; + char *path_start; + char *meth_start; + char *c_ip_start; + unsigned short int scheme_length; + unsigned short int host_length; + unsigned short int port_length; + unsigned short int path_length; + unsigned short int meth_length; + unsigned short int c_ip_length; + } tokens; } request; /* @@ -87,13 +120,20 @@ */ int getopt( int, char * const [], const char *); -void usage( char *prg ); -void debug( int level, char *file, int line, const char *fmt, ... ); -char *slurp_file( char *file ); -char *extend_line( char *line, const char *buf ); +void usage( char * ); +void debug( int, char *, int, const char *, ... ); +void report_speed( void ); +char *slurp_file( char * ); +char *extend_line( char *, const char * ); +char *copy_string_token( char *, unsigned short int ); +struct in_addr *copy_ipv4_token( char *, unsigned short int ); int accept_loop( void ); struct request *parse( char *p ); +void process( char * ); +struct request *parse( char * ); +void populate_request( struct request * ); +void cleanup_request( struct request * ); #endif