Add whitelisting rules, to negate other matches if they come first in
authorMahlon E. Smith <mahlon@martini.nu>
Sat, 05 Nov 2011 12:52:29 -0700
changeset 15 2706fc514dea
parent 14 51eb85ae4de4
child 16 e6a640ad2cc2
Add whitelisting rules, to negate other matches if they come first in the ruleset. Remove the results set array completely, it's faster and requires less memory to do the comparisons inline.
README
db.c
db.h
parser.rl
process.c
volta.h
--- a/README	Fri Nov 04 20:34:28 2011 -0700
+++ b/README	Sat Nov 05 12:52:29 2011 -0700
@@ -3,7 +3,7 @@
 =====
 
 What is volta?
-	- high performance redirector
+	- high performance / low resource redirector
 
 Why "volta"?
 	- latin term, turn
--- a/db.c	Fri Nov 04 20:34:28 2011 -0700
+++ b/db.c	Sat Nov 05 12:52:29 2011 -0700
@@ -140,63 +140,30 @@
 }
 
 
-/* Fast single record lookup.
- * Returns a pointer to the found value or NULL if there is no match.
- *
- * The returned pointer should be freed after use.
+/* 
+ * Search the CDB for all occurances of the given +key+, checking
+ * each one against the +p_request+.  First match wins and is
+ * returned.  NULL on no match.
  *
  */
-char *
-find_record( char *key )
+parsed *
+find_rule( char *key, parsed *p_request )
 {
 	if ( key == NULL ) return( NULL );
 
-	char *val = NULL;
-	cdbi_t vlen;
-
-	if ( cdb_seek( v.db_fd, key, (int)strlen(key), &vlen) > 0 ) {
-
-		if ( (val = malloc( vlen + 1 )) == NULL ) {
-			debug( 5, LOC, "Unable to allocate memory for value storage: %s\n", strerror(errno) );
-			return( NULL );
-		}
-
-		cdb_bread( v.db_fd, val, vlen );
-		val[vlen] = '\0';
-		debug( 4, LOC, "Match for key '%s': %s\n", key, val );
-	}
-
-	return val;
-}
-
-
-/* 
- * Search the CDB for all occurences of the given +key+,
- * populating the +results+ array with pointers to parsed rule structs.
- *
- * Returns the number of successful matches.  reset_results()
- * should be called after the result set is examined.
- *
- */
-unsigned int
-find_records( char *key, parsed **results )
-{
-	if ( key == NULL ) return( 0 );
-
 	struct cdb cdb;
 	struct cdb_find cdbf; /* structure to hold current find position */
 
-	unsigned int match = 0;
-	parsed *result     = NULL;
-	char *val          = NULL;
+	parsed *rule = NULL;
+	char *val    = NULL;
 	unsigned int vlen, vpos;
 
 	/* initialize search structs */
-	if ( db_attach() == -1 ) return( 0 );
+	if ( db_attach() == -1 ) return( NULL );
 	cdb_init( &cdb, v.db_fd );
 	cdb_findinit( &cdbf, &cdb, key, (int)strlen(key) );
 
-	while ( cdb_findnext( &cdbf ) > 0 && match < DB_RESULTS_MAX ) {
+	while ( cdb_findnext( &cdbf ) > 0 ) {
 		vpos = cdb_datapos( &cdb );
 		vlen = cdb_datalen( &cdb );
 
@@ -204,22 +171,26 @@
 		if ( (val = calloc( vlen, sizeof(char) )) == NULL ) {
 			debug( 5, LOC, "Unable to allocate memory for DB value storage: %s\n",
 					strerror(errno) );
-			return( 0 );
+			cdb_free( &cdb );
+			return( NULL );
 		}
 		cdb_read( &cdb, val, vlen, vpos );
 
-		/* if it parses properly, add it to the result set. */
-		result = parse_rule( val );
-		if ( result != NULL ) {
-			results[match] = result;
-			debug( 4, LOC, "DB match %d for key '%s': %s\n", match+1, key, val );
+		/* check it against the request */
+		debug( 4, LOC, "DB match for key '%s': %s\n", key, val );
+		rule = parse_rule( val );
+		free( val );
+		if ( rule != NULL ) {
+			if ( check_rule( rule, p_request ) == 0 ) {
+				finish_parsed( rule );
+			}
+			else {
+				break;
+			}
 		}
-
-		match++;
-		free( val );
 	}
 
 	cdb_free( &cdb );
-	return match;
+	return( rule );
 }
 
--- a/db.h	Fri Nov 04 20:34:28 2011 -0700
+++ b/db.h	Sat Nov 05 12:52:29 2011 -0700
@@ -38,8 +38,7 @@
 short int db_attach( void );
 unsigned short int db_create_new( char * );
 struct db_input *parse_dbinput( char * );
-char *find_record( char * );
-unsigned int find_records( char *, parsed ** );
+parsed *find_rule( char *, parsed * );
 
 #endif
 
--- a/parser.rl	Fri Nov 04 20:34:28 2011 -0700
+++ b/parser.rl	Sat Nov 05 12:52:29 2011 -0700
@@ -199,10 +199,11 @@
 %%{
 	machine rule_parser;
 
-	action match_start    { MARK_S(path_re) }
-	action match_finish   { MARK_E(path_re) }
-	action redir_start    { MARK_S(redir) }
-	action redir_finish   { p_parsed->tokens.redir_length = 3; } # strip trailing colon
+	action match_start   { MARK_S(path_re) }
+	action match_finish  { MARK_E(path_re) }
+	action redir_start   { MARK_S(redir) }
+	action redir_finish  { p_parsed->tokens.redir_length = 3; } # strip trailing colon
+	action wl_finish     { p_parsed->wl = 1; }
 
 	action scheme_start  { MARK_S(scheme) }
 	action scheme_finish { MARK_E(scheme) }
@@ -219,20 +220,23 @@
 	host_component  = alnum | ( alnum [a-zA-Z0-9\-_]* alnum );
 	path_segment    = '/' ( any - space )*;
 
-	sep            = space+;
-	hostname       = host_component ( '.' host_component )* '.'?;
-	ipv4           = digit{1,3} '.' digit{1,3} '.' digit{1,3} '.' digit{1,3};
-	ipv6           = ( xdigit | ':' )+;
+	sep       = space+;
+	hostname  = host_component ( '.' host_component )* '.'?;
+	ipv4      = digit{1,3} '.' digit{1,3} '.' digit{1,3} '.' digit{1,3};
+	ipv6      = ( xdigit | ':' )+;
+
+	whitelist = ( '-' sep )          %wl_finish;
+	path_re   = ( any - space )+     >match_start  %match_finish @!match_error;
 
-	path_re        = ( any - space )+      >match_start  %match_finish @!match_error;
-	redir          = ( digit{3} ':' )      >redir_start  %redir_finish;
+	redir     = ( digit{3} ':' )     >redir_start  %redir_finish;
+	scheme    = ( alpha{3,5} '://' ) >scheme_start %scheme_finish;
+	host      = ( hostname | ipv4 )  >host_start   %host_finish   @!host_error;
+	port      = ( ':' digit{1,5} )   >port_start   %port_finish;
+	path      = path_segment*        >path_start   %path_finish;
 
-	scheme         = ( alpha{3,5} '://' )  >scheme_start %scheme_finish;
-	host           = ( hostname | ipv4 )   >host_start   %host_finish   @!host_error;
-	port           = ( ':' digit{1,5} )    >port_start   %port_finish;
-	path           = path_segment*         >path_start   %path_finish;
+	rewrite   = ( sep redir? scheme? host port? path? );
 
-	main := path_re sep ( redir? scheme? host port? path? );
+	main := whitelist? path_re rewrite?;
 }%%
 
 	/* state machine */
@@ -297,8 +301,9 @@
 	redir          = ( digit{3} ':' );
 	host           = ( hostname | ipv4 );
 
-	key = ( host | '*' )      >key_start %key_finish @!key_error;
-	val = ( token sep token ) >val_start %val_finish @!val_error;
+	key = ( host | '*' )                      >key_start %key_finish @!key_error;
+	val = ( (token sep)? token (sep token)? ) >val_start %val_finish @!val_error;
+	#           wl       regex   rewrite
 	
 	main:= key sep val '\n';
 }%%
@@ -346,6 +351,7 @@
 	}
 
 	p_parsed->type      = 0;
+	p_parsed->wl        = 0;
 	p_parsed->path_re   = NULL;
 	p_parsed->redir     = NULL;
 	p_parsed->scheme    = NULL;
@@ -450,9 +456,9 @@
 parse_tld( parsed *p_request )
 {
 	unsigned short int cs = 5, mark = 0;
-	char *p   = p_request->host;
-	char *pe  = p + p_request->tokens.host_length;
-	char *ts  = 0, *te = 0, *eof = pe;
+	char *p  = p_request->host;
+	char *pe = p + p_request->tokens.host_length;
+	char *ts = 0, *te = 0, *eof = pe;
 
 %%{
     machine tld_parser;
--- a/process.c	Fri Nov 04 20:34:28 2011 -0700
+++ b/process.c	Sat Nov 05 12:52:29 2011 -0700
@@ -41,8 +41,6 @@
 process( char *line )
 {
 	parsed *p_request = parse_request( line ), *rule = NULL;
-	parsed *results[ DB_RESULTS_MAX ] = { NULL }; /* array of response matches */
-	unsigned int rcount = 0;
 
 	/* count lines in debugmode */
 	if ( v.debugmode > 2 ) v.timer.lines++;
@@ -76,31 +74,20 @@
 	 * path intact, or redir to https, for example.)
 	 *
 	 */
-	rcount = find_records( p_request->host, results );
-	rule = find_matching_rule( results, rcount, p_request );
-
-	if ( rule == NULL ) {
-		reset_results( results, rcount );
-		rcount = find_records( p_request->tld, results );
-		rule = find_matching_rule( results, rcount, p_request );
-	}
+	rule = find_rule( p_request->host, p_request );
+	if ( rule == NULL ) rule = find_rule( p_request->tld, p_request );
+	if ( rule == NULL ) rule = find_rule( "*", p_request );
 
-	if ( rule == NULL ) {
-		reset_results( results, rcount );
-		rcount = find_records( "*", results );
-		rule = find_matching_rule( results, rcount, p_request );
-	}
-
-	/* no matching rule still?  no need to rewrite anything. */
-	if ( rule == NULL ) {
+	/* no matching rule still or whitelist rule?  no need to rewrite anything. */
+	if ( rule == NULL || rule->wl ) {
 		out( "\n" );
 	}
-	/* otherwise, perform the rewrite */
+	/* otherwise, perform the rewrite. */
 	else {
 		rewrite( p_request, rule );
 	}
 
-	reset_results( results, rcount );
+	finish_parsed( rule );
 	finish_parsed( p_request );
 	return;
 }
@@ -126,63 +113,43 @@
 
 
 /*
- * Search through a result set, and return the first
- * matching path (or NULL).
+ * Compare a parsed +rule+ against the +request+.
+ * Returns 1 on a match, 0 otherwise.
  *
  */
-parsed *
-find_matching_rule( parsed **results, unsigned int resultcount, parsed *p_request )
+unsigned short int
+check_rule( parsed *rule, parsed *p_request )
 {
-	unsigned int i = 0;
 	int re_rv;
 	regex_t re;
 	char re_err[128];
-	parsed *rule = NULL;
 
-	if ( resultcount == 0 || p_request->path == NULL ) return( NULL );
-
-	for ( i = 0; i < resultcount; i++ ) {
-		/* quick comparison */
-		if ( (strcasecmp( results[i]->path_re, p_request->path ) == 0) ||
-			 (strcmp( results[i]->path_re, "*" ) == 0) ) {
-			debug( 4, LOC, "Rule %d match (non regexp)\n", i+1 );
-			rule = results[i];
-			break;
-		}
+	if ( rule == NULL || p_request->path == NULL ) return( 0 );
 
-		/* compile the regexp */
-		if ( (re_rv = regcomp( &re, results[i]->path_re, REG_EXTENDED | REG_NOSUB )) != 0 ) {
-			regerror( re_rv, &re, re_err, 128 );
-			debug( 4, LOC, "Invalid regex: \"%s\": %s\n", results[i]->path_re, re_err );
-			regfree( &re );
-			continue;
-		}
-
-		/* compare! */
-		if ( (regexec( &re, p_request->path, 0, NULL, 0 )) == 0 ) {
-			debug( 4, LOC, "Rule %d match (regexp)\n", i+1 );
-			rule = results[i];
-			regfree( &re );
-			break;
-		}
+	/* quick comparison */
+	if ( (strcasecmp( rule->path_re, p_request->path ) == 0) ||
+			(strcmp( rule->path_re, "*" ) == 0) ) {
+		debug( 4, LOC, "Rule match \"%s\" (non regexp)\n", rule->path_re );
+		return( 1 );
 	}
 
-	return( rule );
+	/* compile the regexp */
+	if ( (re_rv = regcomp( &re, rule->path_re, REG_EXTENDED | REG_NOSUB )) != 0 ) {
+		regerror( re_rv, &re, re_err, 128 );
+		debug( 4, LOC, "Invalid regex: \"%s\": %s\n", rule->path_re, re_err );
+		regfree( &re );
+		return( 0 );
+	}
+
+	/* compare! */
+	if ( (regexec( &re, p_request->path, 0, NULL, 0 )) == 0 ) {
+		debug( 4, LOC, "Rule match \"%s\" (regexp)\n", rule->path_re );
+		regfree( &re );
+		return( 1 );
+	}
+	else {
+		regfree( &re );
+		return( 0 );
+	}
 }
 
-
-/*
- * Clear the results array and free memory.
- *
- */
-void
-reset_results( parsed **results, unsigned int count )
-{
-	unsigned int i = 0;
-
-	for ( ; i < count && i < DB_RESULTS_MAX; i++ ) finish_parsed( results[i] );
-	memset( results, 0, sizeof(results) );
-
-	return;
-}
-
--- a/volta.h	Fri Nov 04 20:34:28 2011 -0700
+++ b/volta.h	Sat Nov 05 12:52:29 2011 -0700
@@ -67,8 +67,6 @@
 #define LINE_BUFSIZE 2048
 /* Ceiling for how many bytes can be allocated at once for a single line. */
 #define LINE_MAX 256000 /* 250k */
-/* Maximum DB results for a single query */
-#define DB_RESULTS_MAX 1000
 
 /* Parsed line types */
 #define REQUEST 1
@@ -118,6 +116,7 @@
  */
 typedef struct parsed {
 	unsigned short int type;
+	unsigned short int wl;
 	char   *path_re;
 	char   *redir;
 	char   *scheme;
@@ -180,7 +179,7 @@
 void parse_tld( parsed * );
 void finish_parsed( parsed * );
 void reset_results( parsed **, unsigned int );
-parsed *find_matching_rule( parsed **, unsigned int, parsed * );
+unsigned short int check_rule( parsed *, parsed * );
 void rewrite( parsed *, parsed * );
 
 #endif