Add support for Squid's 'url_rewriter_concurrency' pipelining.
authorMahlon E. Smith <mahlon@laika.com>
Tue, 24 Jul 2012 12:12:07 -0700
changeset 29 c5d00a24af56
parent 25 6ceafe5ffe7f
child 30 5cc836e06759
Add support for Squid's 'url_rewriter_concurrency' pipelining.
Makefile
README
examples/squid_output.txt
parser.rl
process.c
volta.h
--- a/Makefile	Fri May 04 10:25:31 2012 -0700
+++ b/Makefile	Tue Jul 24 12:12:07 2012 -0700
@@ -38,7 +38,6 @@
 volta: CFLAGS += -L/usr/lib -I/usr/include
 volta: CFLAGS += $(shell pkg-config --cflags-only-I --libs-only-L lua5.1)
 volta: LIBS   += $(shell pkg-config --libs-only-l lua5.1)
-debug: CFLAGS += $(CFLAGS_DEBUG)
 debug: CFLAGS += $(CFLAGS_DEBUG)\
 	$(shell pkg-config --cflags-only-I --libs-only-L lua5.1)
 debug: LIBS   += $(shell pkg-config --libs-only-l lua5.1) -lprofiler
--- a/README	Fri May 04 10:25:31 2012 -0700
+++ b/README	Tue Jul 24 12:12:07 2012 -0700
@@ -46,13 +46,13 @@
 
 Busy servers:
 
-Make sure rewrite_concurrency is disabled, volta is single threaded.
-Instead, just add more volta children.  They are lightweight, so load em
-up.  A proxy at my $DAYJOB is in use by around 450 people, and we get by
-nicely with 10 volta children.
+While Volta is lightweight enough to simply increase the amount of
+rewriter children, it also supports Squid's rewrite_concurrency format
+if you find that to be more efficient for your environment.  Adjust to
+taste.
 
-	url_rewrite_concurrency 0
-	url_rewrite_children 10
+	url_rewrite_concurrency 60
+	url_rewrite_children 5
 
 
 Using volta
@@ -235,4 +235,3 @@
 directory, distributed with volta.
 
 
-
--- a/examples/squid_output.txt	Fri May 04 10:25:31 2012 -0700
+++ b/examples/squid_output.txt	Tue Jul 24 12:12:07 2012 -0700
@@ -8,3 +8,7 @@
 http://www.reddit.com/r/WTF/?wooyup=1 10.1.1.30/- - GET myip=10.1.1.1 myport=3128
 http://www.martini.nu/blog/2011/some-post.html 10.1.1.30/- - GET myip=10.1.1.1 myport=3128
 http://www.martini.nu/blog/2009/some-old-post.html 10.1.1.30/- - GET myip=10.1.1.1 myport=3128
+4 http://www.martini.nu/projects/volta/ 10.1.1.30/- - GET myip=10.1.1.1 myport=3128
+5 http://i.imgur.com/whatever.jpg 10.1.1.30/- - GET myip=10.1.1.1 myport=3128
+6 http://www.reddit.com/r/WTF/?wooyup=1 10.1.1.30/- - GET myip=10.1.1.1 myport=3128
+7 http://www.example.com/yup 10.1.1.30/- - GET myip=10.1.1.1 myport=3128
--- a/parser.rl	Fri May 04 10:25:31 2012 -0700
+++ b/parser.rl	Tue Jul 24 12:12:07 2012 -0700
@@ -33,6 +33,9 @@
 #define MARK_S( LBL ) p_parsed->tokens.LBL ## _start = p;
 #define MARK_E( LBL ) p_parsed->tokens.LBL ## _length = p - ( *pe + p_parsed->tokens.LBL ## _start );
 
+#define COPY_STR( LBL ) copy_string_token( p_parsed->tokens.LBL ## _start, p_parsed->tokens.LBL ## _length )
+/* #define COPY_IP4( LBL ) copy_ipv4_token(   p_request->tokens.LBL ## _start, p_request->tokens.LBL ## _length ) */
+
 /* 
  * Tokenize an incoming line from squid, returning a parsed and populated
  * structure to make redirection decisions against.  This pointer should
@@ -79,11 +82,8 @@
 %%{
 	machine request_parser;
 
-	action channel_id_found  {
-		debug( 1, LOC, "Channel ID found in redirector input.  Set 'url_rewrite_concurrency' to '0' in squid.\n" );
-		fbreak;
-	}
-
+	action chid_start    { MARK_S(chid) }
+	action chid_finish   { MARK_E(chid) }
 	action scheme_start  { MARK_S(scheme) }
 	action scheme_finish { MARK_E(scheme) }
 	action host_start    { MARK_S(host) }
@@ -141,7 +141,7 @@
 	ipv4           = digit{1,3} '.' digit{1,3} '.' digit{1,3} '.' digit{1,3};
 	ipv6           = ( xdigit | ':' )+;
 
-	channel_id     = ( digit+ space )      %channel_id_found;
+	channel_id     = ( digit+ space )      >chid_start   %chid_finish;
 	scheme         = ( alpha{3,5} '://' )  >scheme_start %scheme_finish @!scheme_error;
 	host           = ( hostname | ipv4 )   >host_start   %host_finish   @!host_error;
 	port           = ( ':' digit{1,5} )    >port_start   %port_finish;
@@ -168,12 +168,16 @@
 	/* state machine */
 	%% write exec;
 
-	/* If we were given an invalid line, bail early */
+	/*
+	 * If we were given an invalid line, bail early after remembering
+	 * the channel ID.
+	 *
+	 */
 	if ( cs < %%{ write first_final; }%% ) {
-		free( p_parsed ), p_parsed = NULL;
 		debug( 3, LOC, "Invalid request line (%d), skipped\n", v.timer.lines + 1 );
 		debug( 4, LOC, "%s", line );
-		return( NULL );
+		p_parsed->chid = COPY_STR( chid );
+		return( p_parsed );
 	}
 
 	debug( 6, LOC, "%s", line );
@@ -357,9 +361,11 @@
 		return( NULL );
 	}
 
+	p_parsed->valid     = 0;
 	p_parsed->type      = 0;
 	p_parsed->negate    = 0;
 	p_parsed->lua       = 0;
+	p_parsed->chid      = NULL;
 	p_parsed->path_re   = NULL;
 	p_parsed->redir     = NULL;
 	p_parsed->scheme    = NULL;
@@ -372,6 +378,7 @@
 	p_parsed->client_ip = NULL;
 	p_parsed->luapath   = NULL;
 
+	p_parsed->tokens.chid_start     = NULL;
 	p_parsed->tokens.path_re_start  = NULL;
 	p_parsed->tokens.redir_start    = NULL;
 	p_parsed->tokens.scheme_start   = NULL;
@@ -381,6 +388,7 @@
 	p_parsed->tokens.meth_start     = NULL;
 	p_parsed->tokens.c_ip_start     = NULL;
 	p_parsed->tokens.luapath_start  = NULL;
+	p_parsed->tokens.chid_length    = 0;
 	p_parsed->tokens.path_re_length = 0;
 	p_parsed->tokens.redir_length   = 0;
 	p_parsed->tokens.scheme_length  = 0;
@@ -410,6 +418,7 @@
 	free( p_parsed->port );
 
 	if ( p_parsed->type == REQUEST ) {
+		free( p_parsed->chid );
 		free( p_parsed->tld );
 		free( p_parsed->method );
 		free( p_parsed->client_ip );
@@ -427,9 +436,6 @@
 }
 
 
-#define COPY_STR( LBL ) copy_string_token( p_parsed->tokens.LBL ## _start, p_parsed->tokens.LBL ## _length )
-/* #define COPY_IP4( LBL ) copy_ipv4_token(   p_request->tokens.LBL ## _start, p_request->tokens.LBL ## _length ) */
-
 /*
  * Take the previously parsed token locations and copy them into the request struct.
  *
@@ -443,6 +449,8 @@
 	p_parsed->port   = COPY_STR( port );
 
 	if ( p_parsed->type == REQUEST ) {
+		p_parsed->valid     = 1;
+		p_parsed->chid      = COPY_STR( chid );
 		p_parsed->method    = COPY_STR( meth );
 		p_parsed->client_ip = COPY_STR( c_ip );
 		/* p_request->client_ip = COPY_IP4( c_ip ); */
--- a/process.c	Fri May 04 10:25:31 2012 -0700
+++ b/process.c	Tue Jul 24 12:12:07 2012 -0700
@@ -48,7 +48,7 @@
 
 	/* If request parsing failed, return a blank line to squid
 	   to allow the request to pass through unmolested. */
-	if ( p_request == NULL )
+	if ( p_request == NULL || p_request->valid == 0 )
 		return pass( p_request, rule );
 
 	/*
@@ -102,6 +102,7 @@
 
 		/* send squid the lua return value. */
 		if ( v.debugmode < 5 ) {
+			if ( p_request->chid ) printf( "%s", p_request->chid );
 			puts( rewrite_string );
 			fflush( stdout );
 		}
@@ -125,14 +126,18 @@
 void
 pass( parsed *request, parsed *rule )
 {
-	finish_parsed( rule );
-	finish_parsed( request );
+	if ( v.debugmode >= 5 ) {
+		finish_parsed( rule );
+		finish_parsed( request );
+		return;
+	}
 
-	if ( v.debugmode >= 5 ) return;
-
+	if ( request && request->chid ) printf( "%s", request->chid );
 	printf( "\n" );
 	fflush( stdout );
 
+	finish_parsed( rule );
+	finish_parsed( request );
 	return;
 }
 
@@ -146,6 +151,7 @@
 {
 	if ( rule == NULL || v.debugmode >= 5 ) return;
 
+	if ( request->chid ) printf( "%s", request->chid );
 	if ( rule->redir ) printf( "%s:", rule->redir );
 	printf( "%s%s", (rule->scheme ? rule->scheme : request->scheme), rule->host );
 	if ( rule->port ) printf( ":%s", rule->port );
--- a/volta.h	Fri May 04 10:25:31 2012 -0700
+++ b/volta.h	Tue Jul 24 12:12:07 2012 -0700
@@ -120,9 +120,11 @@
  *
  */
 typedef struct parsed {
+	unsigned short int valid;
 	unsigned short int type;
 	unsigned short int negate;
 	unsigned short int lua;
+	char   *chid;
 	char   *path_re;
 	char   *redir;
 	char   *scheme;
@@ -137,6 +139,7 @@
 	char   *luapath;
 
 	struct {
+		char *chid_start;
 		char *path_re_start;
 		char *redir_start;
 		char *scheme_start;
@@ -146,6 +149,7 @@
 		char *meth_start;
 		char *c_ip_start;
 		char *luapath_start;
+		unsigned short int chid_length;
 		unsigned short int path_re_length;
 		unsigned short int redir_length;
 		unsigned short int scheme_length;