--- a/process.c Mon Oct 31 17:17:07 2011 -0700
+++ b/process.c Fri Nov 04 20:34:28 2011 -0700
@@ -31,64 +31,158 @@
#include "volta.h"
#include "db.h"
+
+/*
+ * Given a redirect +line+ from squid, send it to the parser,
+ * perform database lookups, and conditonally perform the rewrite.
+ *
+ */
void
process( char *line )
{
- request *p_request = parse( line );
- rewrite *p_rewrite = prepare_rewrite( p_request );
+ parsed *p_request = parse_request( line ), *rule = NULL;
+ parsed *results[ DB_RESULTS_MAX ] = { NULL }; /* array of response matches */
+ unsigned int rcount = 0;
/* count lines in debugmode */
if ( v.debugmode > 2 ) v.timer.lines++;
- /* If parsing failed or there wasn't a successful rewrite match,
- * return a blank line to squid to allow the request to pass
- * through unmolested. */
- if ( p_request == NULL || p_rewrite == NULL ) {
+ /* If request parsing failed, return a blank line to squid
+ to allow the request to pass through unmolested. */
+ if ( p_request == NULL ) {
out( "\n" );
- finish_request( p_request );
- finish_rewrite( p_rewrite );
+ finish_parsed( p_request );
return;
}
- if ( v.debugmode < 4 ) {
- if ( p_rewrite->redir == REDIR_TEMPORARY ) printf( "302:" );
- if ( p_rewrite->redir == REDIR_PERMANENT ) printf( "301:" );
+ /*
+ * Main rewrite logic.
+ *
+ * First, try and match the host exactly.
+ *
+ * Second, match the TLD of the host, so separate rules aren't needed for
+ * every possible subdomain of one particular domain.
+ *
+ * Finally, look for '*', if for some reason the rules provided don't care to
+ * match specific hosts, and instead just match on any path.
+ *
+ * If DB matches are found at any step above, the rules are tried in order
+ * to attempt a match against the path. Exact string match attempted
+ * first, then fallback to regexp.
+ *
+ * First rule match wins, and elements of the URL are rewritten based on
+ * what is present in the rule -- any missing parts just use the original
+ * URL element. (this way, you can rewrite just the host and leave the
+ * path intact, or redir to https, for example.)
+ *
+ */
+ rcount = find_records( p_request->host, results );
+ rule = find_matching_rule( results, rcount, p_request );
- if ( p_request->scheme || p_rewrite->scheme )
- printf( "%s", p_rewrite->scheme ? p_rewrite->scheme : p_request->scheme );
- printf( "%s", p_rewrite->host ? p_rewrite->host : p_request->host );
- printf( "%s", p_rewrite->path ? p_rewrite->path : p_request->path );
- if ( p_request->port != 0 || p_rewrite->port != 0 )
- printf( ":%d", p_rewrite->port ? p_rewrite->port : p_request->port );
- printf("\n");
- }
- else {
- debug( 5, LOC, "Rewrite match on %s/%s\n", p_request->host, p_request->path );
- debug( 5, LOC, " --> %s/%s\n", p_rewrite->host, p_rewrite->path );
+ if ( rule == NULL ) {
+ reset_results( results, rcount );
+ rcount = find_records( p_request->tld, results );
+ rule = find_matching_rule( results, rcount, p_request );
}
-
- /* unsigned long hst, net; */
- /* hst = inet_lnaof( *(p_request->client_ip) ); */
- /* net = inet_netof( *(p_request->client_ip) ); */
- /* printf("%14s : net=0x%08lX host=0x%08lX\n", inet_ntoa( *(p_request->client_ip) ), net, hst); */
- /* printf("%14s : net=%lu host=%lu\n", inet_ntoa( *(p_request->client_ip) ), net, hst); */
+ if ( rule == NULL ) {
+ reset_results( results, rcount );
+ rcount = find_records( "*", results );
+ rule = find_matching_rule( results, rcount, p_request );
+ }
- /*
- * create function bigint_to_inet(bigint) returns inet as $$
- * select
- * (($1>>24&255)||'.'||($1>>16&255)||'.'||($1>>8&255)||'.'||($1>>0&255))::inet
- * $$ language sql;
- * */
+ /* no matching rule still? no need to rewrite anything. */
+ if ( rule == NULL ) {
+ out( "\n" );
+ }
+ /* otherwise, perform the rewrite */
+ else {
+ rewrite( p_request, rule );
+ }
- /*
- char ip[ INET_ADDRSTRLEN ];
- inet_ntop( AF_INET, p_request->client_ip, ip, INET_ADDRSTRLEN );
- printf( "%s\n", ip );
- */
-
- finish_request( p_request );
- finish_rewrite( p_rewrite );
+ reset_results( results, rcount );
+ finish_parsed( p_request );
return;
}
+
+/*
+ * Output a rewritten URL for squid.
+ *
+ */
+void
+rewrite( parsed *request, parsed *rule )
+{
+ if ( rule == NULL || v.debugmode >= 5 ) return;
+
+ if ( rule->redir ) printf( "%s:", rule->redir );
+ printf( "%s%s", (rule->scheme ? rule->scheme : request->scheme), rule->host );
+ if ( rule->port ) printf( ":%s", rule->port );
+ printf( "%s", rule->path ? rule->path : request->path );
+
+ printf("\n");
+ return;
+}
+
+
+/*
+ * Search through a result set, and return the first
+ * matching path (or NULL).
+ *
+ */
+parsed *
+find_matching_rule( parsed **results, unsigned int resultcount, parsed *p_request )
+{
+ unsigned int i = 0;
+ int re_rv;
+ regex_t re;
+ char re_err[128];
+ parsed *rule = NULL;
+
+ if ( resultcount == 0 || p_request->path == NULL ) return( NULL );
+
+ for ( i = 0; i < resultcount; i++ ) {
+ /* quick comparison */
+ if ( (strcasecmp( results[i]->path_re, p_request->path ) == 0) ||
+ (strcmp( results[i]->path_re, "*" ) == 0) ) {
+ debug( 4, LOC, "Rule %d match (non regexp)\n", i+1 );
+ rule = results[i];
+ break;
+ }
+
+ /* compile the regexp */
+ if ( (re_rv = regcomp( &re, results[i]->path_re, REG_EXTENDED | REG_NOSUB )) != 0 ) {
+ regerror( re_rv, &re, re_err, 128 );
+ debug( 4, LOC, "Invalid regex: \"%s\": %s\n", results[i]->path_re, re_err );
+ regfree( &re );
+ continue;
+ }
+
+ /* compare! */
+ if ( (regexec( &re, p_request->path, 0, NULL, 0 )) == 0 ) {
+ debug( 4, LOC, "Rule %d match (regexp)\n", i+1 );
+ rule = results[i];
+ regfree( &re );
+ break;
+ }
+ }
+
+ return( rule );
+}
+
+
+/*
+ * Clear the results array and free memory.
+ *
+ */
+void
+reset_results( parsed **results, unsigned int count )
+{
+ unsigned int i = 0;
+
+ for ( ; i < count && i < DB_RESULTS_MAX; i++ ) finish_parsed( results[i] );
+ memset( results, 0, sizeof(results) );
+
+ return;
+}
+