process.c
changeset 14 51eb85ae4de4
parent 13 23a242d7b7fa
child 15 2706fc514dea
--- a/process.c	Mon Oct 31 17:17:07 2011 -0700
+++ b/process.c	Fri Nov 04 20:34:28 2011 -0700
@@ -31,64 +31,158 @@
 #include "volta.h"
 #include "db.h"
 
+
+/*
+ * Given a redirect +line+ from squid, send it to the parser,
+ * perform database lookups, and conditonally perform the rewrite.
+ *
+ */
 void
 process( char *line )
 {
-	request *p_request = parse( line );
-	rewrite *p_rewrite = prepare_rewrite( p_request );
+	parsed *p_request = parse_request( line ), *rule = NULL;
+	parsed *results[ DB_RESULTS_MAX ] = { NULL }; /* array of response matches */
+	unsigned int rcount = 0;
 
 	/* count lines in debugmode */
 	if ( v.debugmode > 2 ) v.timer.lines++;
 
-	/* If parsing failed or there wasn't a successful rewrite match,
-	 * return a blank line to squid to allow the request to pass
-	 * through unmolested. */
-	if ( p_request == NULL || p_rewrite == NULL ) {
+	/* If request parsing failed, return a blank line to squid
+	   to allow the request to pass through unmolested. */
+	if ( p_request == NULL ) {
 		out( "\n" );
-		finish_request( p_request );
-		finish_rewrite( p_rewrite );
+		finish_parsed( p_request );
 		return;
 	}
 
-	if ( v.debugmode < 4 ) {
-		if ( p_rewrite->redir == REDIR_TEMPORARY ) printf( "302:" );
-		if ( p_rewrite->redir == REDIR_PERMANENT ) printf( "301:" );
+	/*
+	 * Main rewrite logic.
+	 *
+	 * First, try and match the host exactly.
+	 *
+	 * Second, match the TLD of the host, so separate rules aren't needed for
+	 * every possible subdomain of one particular domain.
+	 *
+	 * Finally, look for '*', if for some reason the rules provided don't care to
+	 * match specific hosts, and instead just match on any path.
+	 *
+	 * If DB matches are found at any step above, the rules are tried in order
+	 * to attempt a match against the path.  Exact string match attempted
+	 * first, then fallback to regexp.
+	 * 
+	 * First rule match wins, and elements of the URL are rewritten based on
+	 * what is present in the rule -- any missing parts just use the original
+	 * URL element.  (this way, you can rewrite just the host and leave the
+	 * path intact, or redir to https, for example.)
+	 *
+	 */
+	rcount = find_records( p_request->host, results );
+	rule = find_matching_rule( results, rcount, p_request );
 
-		if ( p_request->scheme || p_rewrite->scheme )
-			printf( "%s", p_rewrite->scheme ? p_rewrite->scheme : p_request->scheme );
-		printf( "%s", p_rewrite->host   ? p_rewrite->host   : p_request->host );
-		printf( "%s", p_rewrite->path   ? p_rewrite->path   : p_request->path );
-		if ( p_request->port != 0 || p_rewrite->port != 0 )
-			printf( ":%d", p_rewrite->port ? p_rewrite->port : p_request->port );
-		printf("\n");
-	}
-	else {
-		debug( 5, LOC, "Rewrite match on %s/%s\n", p_request->host, p_request->path );
-		debug( 5, LOC, "    --> %s/%s\n", p_rewrite->host, p_rewrite->path );
+	if ( rule == NULL ) {
+		reset_results( results, rcount );
+		rcount = find_records( p_request->tld, results );
+		rule = find_matching_rule( results, rcount, p_request );
 	}
 
-
-	/* unsigned long hst, net; */
-	/* hst = inet_lnaof( *(p_request->client_ip) ); */
-	/* net = inet_netof( *(p_request->client_ip) ); */
-	/* printf("%14s : net=0x%08lX host=0x%08lX\n", inet_ntoa( *(p_request->client_ip) ), net, hst); */
-	/* printf("%14s : net=%lu host=%lu\n", inet_ntoa( *(p_request->client_ip) ), net, hst); */
+	if ( rule == NULL ) {
+		reset_results( results, rcount );
+		rcount = find_records( "*", results );
+		rule = find_matching_rule( results, rcount, p_request );
+	}
 
-	/*
-	 * create function bigint_to_inet(bigint) returns inet as $$
-	 * select
-	 * (($1>>24&255)||'.'||($1>>16&255)||'.'||($1>>8&255)||'.'||($1>>0&255))::inet
-	 * $$ language sql;
-	 * */
+	/* no matching rule still?  no need to rewrite anything. */
+	if ( rule == NULL ) {
+		out( "\n" );
+	}
+	/* otherwise, perform the rewrite */
+	else {
+		rewrite( p_request, rule );
+	}
 
-	/*
-	char ip[ INET_ADDRSTRLEN ];
-	inet_ntop( AF_INET, p_request->client_ip, ip, INET_ADDRSTRLEN );
-	printf( "%s\n", ip );
-	*/
-
-	finish_request( p_request );
-	finish_rewrite( p_rewrite );
+	reset_results( results, rcount );
+	finish_parsed( p_request );
 	return;
 }
 
+
+/*
+ * Output a rewritten URL for squid.
+ *
+ */
+void
+rewrite( parsed *request, parsed *rule )
+{
+	if ( rule == NULL || v.debugmode >= 5 ) return;
+
+	if ( rule->redir ) printf( "%s:", rule->redir );
+	printf( "%s%s", (rule->scheme ? rule->scheme : request->scheme), rule->host );
+	if ( rule->port ) printf( ":%s", rule->port );
+	printf( "%s", rule->path ? rule->path : request->path );
+
+	printf("\n");
+	return;
+}
+
+
+/*
+ * Search through a result set, and return the first
+ * matching path (or NULL).
+ *
+ */
+parsed *
+find_matching_rule( parsed **results, unsigned int resultcount, parsed *p_request )
+{
+	unsigned int i = 0;
+	int re_rv;
+	regex_t re;
+	char re_err[128];
+	parsed *rule = NULL;
+
+	if ( resultcount == 0 || p_request->path == NULL ) return( NULL );
+
+	for ( i = 0; i < resultcount; i++ ) {
+		/* quick comparison */
+		if ( (strcasecmp( results[i]->path_re, p_request->path ) == 0) ||
+			 (strcmp( results[i]->path_re, "*" ) == 0) ) {
+			debug( 4, LOC, "Rule %d match (non regexp)\n", i+1 );
+			rule = results[i];
+			break;
+		}
+
+		/* compile the regexp */
+		if ( (re_rv = regcomp( &re, results[i]->path_re, REG_EXTENDED | REG_NOSUB )) != 0 ) {
+			regerror( re_rv, &re, re_err, 128 );
+			debug( 4, LOC, "Invalid regex: \"%s\": %s\n", results[i]->path_re, re_err );
+			regfree( &re );
+			continue;
+		}
+
+		/* compare! */
+		if ( (regexec( &re, p_request->path, 0, NULL, 0 )) == 0 ) {
+			debug( 4, LOC, "Rule %d match (regexp)\n", i+1 );
+			rule = results[i];
+			regfree( &re );
+			break;
+		}
+	}
+
+	return( rule );
+}
+
+
+/*
+ * Clear the results array and free memory.
+ *
+ */
+void
+reset_results( parsed **results, unsigned int count )
+{
+	unsigned int i = 0;
+
+	for ( ; i < count && i < DB_RESULTS_MAX; i++ ) finish_parsed( results[i] );
+	memset( results, 0, sizeof(results) );
+
+	return;
+}
+