# HG changeset patch # User Mahlon E. Smith # Date 1320522749 25200 # Node ID 2706fc514dea0438888e1adb62c3eab7263b6573 # Parent 51eb85ae4de4cc433843329f9a36af2c30475515 Add whitelisting rules, to negate other matches if they come first in the ruleset. Remove the results set array completely, it's faster and requires less memory to do the comparisons inline. diff -r 51eb85ae4de4 -r 2706fc514dea README --- a/README Fri Nov 04 20:34:28 2011 -0700 +++ b/README Sat Nov 05 12:52:29 2011 -0700 @@ -3,7 +3,7 @@ ===== What is volta? - - high performance redirector + - high performance / low resource redirector Why "volta"? - latin term, turn diff -r 51eb85ae4de4 -r 2706fc514dea db.c --- a/db.c Fri Nov 04 20:34:28 2011 -0700 +++ b/db.c Sat Nov 05 12:52:29 2011 -0700 @@ -140,63 +140,30 @@ } -/* Fast single record lookup. - * Returns a pointer to the found value or NULL if there is no match. - * - * The returned pointer should be freed after use. +/* + * Search the CDB for all occurances of the given +key+, checking + * each one against the +p_request+. First match wins and is + * returned. NULL on no match. * */ -char * -find_record( char *key ) +parsed * +find_rule( char *key, parsed *p_request ) { if ( key == NULL ) return( NULL ); - char *val = NULL; - cdbi_t vlen; - - if ( cdb_seek( v.db_fd, key, (int)strlen(key), &vlen) > 0 ) { - - if ( (val = malloc( vlen + 1 )) == NULL ) { - debug( 5, LOC, "Unable to allocate memory for value storage: %s\n", strerror(errno) ); - return( NULL ); - } - - cdb_bread( v.db_fd, val, vlen ); - val[vlen] = '\0'; - debug( 4, LOC, "Match for key '%s': %s\n", key, val ); - } - - return val; -} - - -/* - * Search the CDB for all occurences of the given +key+, - * populating the +results+ array with pointers to parsed rule structs. - * - * Returns the number of successful matches. reset_results() - * should be called after the result set is examined. - * - */ -unsigned int -find_records( char *key, parsed **results ) -{ - if ( key == NULL ) return( 0 ); - struct cdb cdb; struct cdb_find cdbf; /* structure to hold current find position */ - unsigned int match = 0; - parsed *result = NULL; - char *val = NULL; + parsed *rule = NULL; + char *val = NULL; unsigned int vlen, vpos; /* initialize search structs */ - if ( db_attach() == -1 ) return( 0 ); + if ( db_attach() == -1 ) return( NULL ); cdb_init( &cdb, v.db_fd ); cdb_findinit( &cdbf, &cdb, key, (int)strlen(key) ); - while ( cdb_findnext( &cdbf ) > 0 && match < DB_RESULTS_MAX ) { + while ( cdb_findnext( &cdbf ) > 0 ) { vpos = cdb_datapos( &cdb ); vlen = cdb_datalen( &cdb ); @@ -204,22 +171,26 @@ if ( (val = calloc( vlen, sizeof(char) )) == NULL ) { debug( 5, LOC, "Unable to allocate memory for DB value storage: %s\n", strerror(errno) ); - return( 0 ); + cdb_free( &cdb ); + return( NULL ); } cdb_read( &cdb, val, vlen, vpos ); - /* if it parses properly, add it to the result set. */ - result = parse_rule( val ); - if ( result != NULL ) { - results[match] = result; - debug( 4, LOC, "DB match %d for key '%s': %s\n", match+1, key, val ); + /* check it against the request */ + debug( 4, LOC, "DB match for key '%s': %s\n", key, val ); + rule = parse_rule( val ); + free( val ); + if ( rule != NULL ) { + if ( check_rule( rule, p_request ) == 0 ) { + finish_parsed( rule ); + } + else { + break; + } } - - match++; - free( val ); } cdb_free( &cdb ); - return match; + return( rule ); } diff -r 51eb85ae4de4 -r 2706fc514dea db.h --- a/db.h Fri Nov 04 20:34:28 2011 -0700 +++ b/db.h Sat Nov 05 12:52:29 2011 -0700 @@ -38,8 +38,7 @@ short int db_attach( void ); unsigned short int db_create_new( char * ); struct db_input *parse_dbinput( char * ); -char *find_record( char * ); -unsigned int find_records( char *, parsed ** ); +parsed *find_rule( char *, parsed * ); #endif diff -r 51eb85ae4de4 -r 2706fc514dea parser.rl --- a/parser.rl Fri Nov 04 20:34:28 2011 -0700 +++ b/parser.rl Sat Nov 05 12:52:29 2011 -0700 @@ -199,10 +199,11 @@ %%{ machine rule_parser; - action match_start { MARK_S(path_re) } - action match_finish { MARK_E(path_re) } - action redir_start { MARK_S(redir) } - action redir_finish { p_parsed->tokens.redir_length = 3; } # strip trailing colon + action match_start { MARK_S(path_re) } + action match_finish { MARK_E(path_re) } + action redir_start { MARK_S(redir) } + action redir_finish { p_parsed->tokens.redir_length = 3; } # strip trailing colon + action wl_finish { p_parsed->wl = 1; } action scheme_start { MARK_S(scheme) } action scheme_finish { MARK_E(scheme) } @@ -219,20 +220,23 @@ host_component = alnum | ( alnum [a-zA-Z0-9\-_]* alnum ); path_segment = '/' ( any - space )*; - sep = space+; - hostname = host_component ( '.' host_component )* '.'?; - ipv4 = digit{1,3} '.' digit{1,3} '.' digit{1,3} '.' digit{1,3}; - ipv6 = ( xdigit | ':' )+; + sep = space+; + hostname = host_component ( '.' host_component )* '.'?; + ipv4 = digit{1,3} '.' digit{1,3} '.' digit{1,3} '.' digit{1,3}; + ipv6 = ( xdigit | ':' )+; + + whitelist = ( '-' sep ) %wl_finish; + path_re = ( any - space )+ >match_start %match_finish @!match_error; - path_re = ( any - space )+ >match_start %match_finish @!match_error; - redir = ( digit{3} ':' ) >redir_start %redir_finish; + redir = ( digit{3} ':' ) >redir_start %redir_finish; + scheme = ( alpha{3,5} '://' ) >scheme_start %scheme_finish; + host = ( hostname | ipv4 ) >host_start %host_finish @!host_error; + port = ( ':' digit{1,5} ) >port_start %port_finish; + path = path_segment* >path_start %path_finish; - scheme = ( alpha{3,5} '://' ) >scheme_start %scheme_finish; - host = ( hostname | ipv4 ) >host_start %host_finish @!host_error; - port = ( ':' digit{1,5} ) >port_start %port_finish; - path = path_segment* >path_start %path_finish; + rewrite = ( sep redir? scheme? host port? path? ); - main := path_re sep ( redir? scheme? host port? path? ); + main := whitelist? path_re rewrite?; }%% /* state machine */ @@ -297,8 +301,9 @@ redir = ( digit{3} ':' ); host = ( hostname | ipv4 ); - key = ( host | '*' ) >key_start %key_finish @!key_error; - val = ( token sep token ) >val_start %val_finish @!val_error; + key = ( host | '*' ) >key_start %key_finish @!key_error; + val = ( (token sep)? token (sep token)? ) >val_start %val_finish @!val_error; + # wl regex rewrite main:= key sep val '\n'; }%% @@ -346,6 +351,7 @@ } p_parsed->type = 0; + p_parsed->wl = 0; p_parsed->path_re = NULL; p_parsed->redir = NULL; p_parsed->scheme = NULL; @@ -450,9 +456,9 @@ parse_tld( parsed *p_request ) { unsigned short int cs = 5, mark = 0; - char *p = p_request->host; - char *pe = p + p_request->tokens.host_length; - char *ts = 0, *te = 0, *eof = pe; + char *p = p_request->host; + char *pe = p + p_request->tokens.host_length; + char *ts = 0, *te = 0, *eof = pe; %%{ machine tld_parser; diff -r 51eb85ae4de4 -r 2706fc514dea process.c --- a/process.c Fri Nov 04 20:34:28 2011 -0700 +++ b/process.c Sat Nov 05 12:52:29 2011 -0700 @@ -41,8 +41,6 @@ process( char *line ) { parsed *p_request = parse_request( line ), *rule = NULL; - parsed *results[ DB_RESULTS_MAX ] = { NULL }; /* array of response matches */ - unsigned int rcount = 0; /* count lines in debugmode */ if ( v.debugmode > 2 ) v.timer.lines++; @@ -76,31 +74,20 @@ * path intact, or redir to https, for example.) * */ - rcount = find_records( p_request->host, results ); - rule = find_matching_rule( results, rcount, p_request ); - - if ( rule == NULL ) { - reset_results( results, rcount ); - rcount = find_records( p_request->tld, results ); - rule = find_matching_rule( results, rcount, p_request ); - } + rule = find_rule( p_request->host, p_request ); + if ( rule == NULL ) rule = find_rule( p_request->tld, p_request ); + if ( rule == NULL ) rule = find_rule( "*", p_request ); - if ( rule == NULL ) { - reset_results( results, rcount ); - rcount = find_records( "*", results ); - rule = find_matching_rule( results, rcount, p_request ); - } - - /* no matching rule still? no need to rewrite anything. */ - if ( rule == NULL ) { + /* no matching rule still or whitelist rule? no need to rewrite anything. */ + if ( rule == NULL || rule->wl ) { out( "\n" ); } - /* otherwise, perform the rewrite */ + /* otherwise, perform the rewrite. */ else { rewrite( p_request, rule ); } - reset_results( results, rcount ); + finish_parsed( rule ); finish_parsed( p_request ); return; } @@ -126,63 +113,43 @@ /* - * Search through a result set, and return the first - * matching path (or NULL). + * Compare a parsed +rule+ against the +request+. + * Returns 1 on a match, 0 otherwise. * */ -parsed * -find_matching_rule( parsed **results, unsigned int resultcount, parsed *p_request ) +unsigned short int +check_rule( parsed *rule, parsed *p_request ) { - unsigned int i = 0; int re_rv; regex_t re; char re_err[128]; - parsed *rule = NULL; - if ( resultcount == 0 || p_request->path == NULL ) return( NULL ); - - for ( i = 0; i < resultcount; i++ ) { - /* quick comparison */ - if ( (strcasecmp( results[i]->path_re, p_request->path ) == 0) || - (strcmp( results[i]->path_re, "*" ) == 0) ) { - debug( 4, LOC, "Rule %d match (non regexp)\n", i+1 ); - rule = results[i]; - break; - } + if ( rule == NULL || p_request->path == NULL ) return( 0 ); - /* compile the regexp */ - if ( (re_rv = regcomp( &re, results[i]->path_re, REG_EXTENDED | REG_NOSUB )) != 0 ) { - regerror( re_rv, &re, re_err, 128 ); - debug( 4, LOC, "Invalid regex: \"%s\": %s\n", results[i]->path_re, re_err ); - regfree( &re ); - continue; - } - - /* compare! */ - if ( (regexec( &re, p_request->path, 0, NULL, 0 )) == 0 ) { - debug( 4, LOC, "Rule %d match (regexp)\n", i+1 ); - rule = results[i]; - regfree( &re ); - break; - } + /* quick comparison */ + if ( (strcasecmp( rule->path_re, p_request->path ) == 0) || + (strcmp( rule->path_re, "*" ) == 0) ) { + debug( 4, LOC, "Rule match \"%s\" (non regexp)\n", rule->path_re ); + return( 1 ); } - return( rule ); + /* compile the regexp */ + if ( (re_rv = regcomp( &re, rule->path_re, REG_EXTENDED | REG_NOSUB )) != 0 ) { + regerror( re_rv, &re, re_err, 128 ); + debug( 4, LOC, "Invalid regex: \"%s\": %s\n", rule->path_re, re_err ); + regfree( &re ); + return( 0 ); + } + + /* compare! */ + if ( (regexec( &re, p_request->path, 0, NULL, 0 )) == 0 ) { + debug( 4, LOC, "Rule match \"%s\" (regexp)\n", rule->path_re ); + regfree( &re ); + return( 1 ); + } + else { + regfree( &re ); + return( 0 ); + } } - -/* - * Clear the results array and free memory. - * - */ -void -reset_results( parsed **results, unsigned int count ) -{ - unsigned int i = 0; - - for ( ; i < count && i < DB_RESULTS_MAX; i++ ) finish_parsed( results[i] ); - memset( results, 0, sizeof(results) ); - - return; -} - diff -r 51eb85ae4de4 -r 2706fc514dea volta.h --- a/volta.h Fri Nov 04 20:34:28 2011 -0700 +++ b/volta.h Sat Nov 05 12:52:29 2011 -0700 @@ -67,8 +67,6 @@ #define LINE_BUFSIZE 2048 /* Ceiling for how many bytes can be allocated at once for a single line. */ #define LINE_MAX 256000 /* 250k */ -/* Maximum DB results for a single query */ -#define DB_RESULTS_MAX 1000 /* Parsed line types */ #define REQUEST 1 @@ -118,6 +116,7 @@ */ typedef struct parsed { unsigned short int type; + unsigned short int wl; char *path_re; char *redir; char *scheme; @@ -180,7 +179,7 @@ void parse_tld( parsed * ); void finish_parsed( parsed * ); void reset_results( parsed **, unsigned int ); -parsed *find_matching_rule( parsed **, unsigned int, parsed * ); +unsigned short int check_rule( parsed *, parsed * ); void rewrite( parsed *, parsed * ); #endif