branch merge 0.2.0
authorMahlon E. Smith <mahlon@laika.com>
Wed, 25 Jul 2012 11:47:12 -0700
changeset 30 5cc836e06759
parent 29 c5d00a24af56 (diff)
parent 28 46e23ce07981 (current diff)
child 31 9c66c159a3a6
branch merge
README
--- a/.hgignore	Wed Nov 09 16:40:38 2011 -0800
+++ b/.hgignore	Wed Jul 25 11:47:12 2012 -0700
@@ -2,7 +2,6 @@
 ^volta.db$
 ^.*_graph.*
 ^tags$
-.*debug
-.*.o
+.o$
 ^misc
 ^parser.c
--- a/.hgtags	Wed Nov 09 16:40:38 2011 -0800
+++ b/.hgtags	Wed Jul 25 11:47:12 2012 -0700
@@ -2,3 +2,5 @@
 d4ce82194b640f863a817fdd085f06b19b232005 0.1
 0000000000000000000000000000000000000000 0.1
 c5cbe1eb96f4d641875d0557022f08feefe20004 0.0.1
+822094314703a5033c644a9c6cf5627ffaeaa0a9 0.1.0
+d3b6f9ccbe20b7b7c89751be98be375a24b68b88 0.1.1
--- a/INSTALL	Wed Nov 09 16:40:38 2011 -0800
+++ b/INSTALL	Wed Jul 25 11:47:12 2012 -0700
@@ -11,6 +11,7 @@
 
  - TinyCDB  (http://www.corpit.ru/mjt/tinycdb.html)
  - GNU make (http://www.gnu.org/software/make/)
+ - Lua      (http://www.lua.org/)
 
 If available, install these dependencies from your OS packaging system
 of choice.
--- a/LICENSE	Wed Nov 09 16:40:38 2011 -0800
+++ b/LICENSE	Wed Jul 25 11:47:12 2012 -0700
@@ -1,5 +1,5 @@
 
-Copyright (c) 2011, Mahlon E. Smith <mahlon@martini.nu>
+Copyright (c) 2011-2012, Mahlon E. Smith <mahlon@martini.nu>
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
--- a/Makefile	Wed Nov 09 16:40:38 2011 -0800
+++ b/Makefile	Wed Jul 25 11:47:12 2012 -0700
@@ -12,17 +12,57 @@
 
 .PHONY : parsegraph profile clean clobber release
 
-# Ubuntu: perftools doesn't currently register a .pc file
+
+########################################################################
+### P L A T F O R M  T A R G E T S
+########################################################################
+
+# I hate this.  Tools like pkg-config are supposed to make finding
+# libraries easy across platforms.  They only work when everyone names
+# the libraries the same thing, unfortunately.  (Why name the libs with
+# the version number, when pkg-config supports built in versioning??)
+# And no, I'm not going to use autoconf, which just takes your build
+# problems and makes a whole bunch of new baby problems for you to
+# deal with.  Gaaarrgghghh.
+#
+# If you have problems building volta, manually pass the correct CFLAGS
+# and LIBS to the 'make' command line for your platform.
+#
+# The following works for OSX with macports or homebrew (10.6/10.7),
+# FreeBSD 8.x and 9.x, and Ubuntu 11.10 and 12.04.
+
+# Ubuntu
+#  - perftools doesn't currently register a .pc file at all
+#  - lua is called 'lua5.1'
 ifeq ($(UNAME), Linux)
-debug: CFLAGS += $(CFLAGS_DEBUG)
-debug: LIBS += -lprofiler
+volta: CFLAGS += -L/usr/lib -I/usr/include
+volta: CFLAGS += $(shell pkg-config --cflags-only-I --libs-only-L lua5.1)
+volta: LIBS   += $(shell pkg-config --libs-only-l lua5.1)
+debug: CFLAGS += $(CFLAGS_DEBUG)\
+	$(shell pkg-config --cflags-only-I --libs-only-L lua5.1)
+debug: LIBS   += $(shell pkg-config --libs-only-l lua5.1) -lprofiler
+
+# FreeBSD
+# - lua is called 'lua-5.1'
+else ifeq ($(UNAME), FreeBSD)
+volta: CFLAGS += -L/usr/local/lib -I/usr/local/include
+volta: CFLAGS += $(shell pkg-config --cflags-only-I --libs-only-L lua-5.1)
+volta: LIBS   += $(shell pkg-config --libs-only-l lua-5.1)
+debug: CFLAGS += $(CFLAGS_DEBUG)\
+    $(shell pkg-config --cflags-only-I --libs-only-L lua-5.1 $(DEPS_DEBUG))
+debug: LIBS += $(shell pkg-config --libs-only-l lua-5.1 $(DEPS_DEBUG))
+
+# Darwin, everyone else (best guess?)
+# - lua is called 'lua', hopefully!
 else
-volta: CFLAGS += -L/opt/local/lib -I/opt/local/include -L/usr/local/lib -I/usr/local/include
+volta: CFLAGS += $(shell pkg-config --cflags-only-I --libs-only-L lua)
+volta: LIBS   += $(shell pkg-config --libs-only-l lua) 
 debug: CFLAGS += $(CFLAGS_DEBUG)\
-	$(shell pkg-config --cflags-only-I --libs-only-L $(DEPS_DEBUG))
-debug: LIBS += $(shell pkg-config --libs-only-l $(DEPS_DEBUG))
+    $(shell pkg-config --cflags-only-I --libs-only-L lua $(DEPS_DEBUG))
+debug: LIBS += $(shell pkg-config --libs-only-l lua $(DEPS_DEBUG))
 endif
 
+
 # Fix parser line number display in debug mode
 ifeq (,$(findstring debug,$(MAKECMDGOALS)))
 	RAGEL_FLAGS = -LCe -G2
--- a/README	Wed Nov 09 16:40:38 2011 -0800
+++ b/README	Wed Jul 25 11:47:12 2012 -0700
@@ -5,13 +5,14 @@
 What is volta?
 --------------
 
-Volta is a high-performance, low-resource URI rewriter for use with the
+Volta is a high performance, low resource URI rewriter for use with the
 Squid caching proxy server (http://www.squid-cache.org/.)  With it, you
 can dynamically alter URI requests that pass through Squid based on
 various criteria.
 
 It uses a state machine to parse URIs and rules, and a constant database
-to store and access those rules.
+to store and access those rules.  It can then either perform conditional
+rewrites internally, or by evaluating Lua scripts.
 
 
 Why is it called "volta"?
@@ -36,22 +37,22 @@
 
 You must enable url rewriting from within the squid.conf file.
 
-    url_rewrite_program /usr/local/bin/volta
+	url_rewrite_program /usr/local/bin/volta
 
 ... and that's it.  You may need some additional customization, like where
 the volta database is stored on disk:
 
-    url_rewrite_program /usr/local/bin/volta -f /var/db/squid/volta.db
+	url_rewrite_program /usr/local/bin/volta -f /var/db/squid/volta.db
 
 Busy servers:
 
-Make sure rewrite_concurrency is disabled, volta is single threaded.
-Instead, just add more volta children.  They are lightweight, so load em
-up.  A proxy at my $DAYJOB is in use by around 450 people, and we get by
-nicely with 10 volta children.
+While Volta is lightweight enough to simply increase the amount of
+rewriter children, it also supports Squid's rewrite_concurrency format
+if you find that to be more efficient for your environment.  Adjust to
+taste.
 
-    url_rewrite_concurrency 0
-    url_rewrite_children 10
+	url_rewrite_concurrency 60
+	url_rewrite_children 5
 
 
 Using volta
@@ -62,7 +63,7 @@
 Volta reads its rewrite rules from a local database.  You can create the
 rules in a text editor, then convert it to the database like so:
 
-    % volta -c rules.txt
+	% volta -c rules.txt
 
 You'll be left with a "volta.db" file in the current directory.  Put it
 wherever you please, and use the -f flag to point to it.
@@ -74,18 +75,20 @@
 Volta's rule syntax is designed to be easy to parse by humans and
 machines.  Blank lines are skipped, as is any line that starts with the
 '#' character, so you can keep the ascii version of your rules well
-documented and in version control.
+documented and in version control.  There is no practical limit on the
+number of rules in this database.
 
 When compiling the ruleset into the database format, volta detects
 malformed rules and stops if there are any problems, leaving your
-original database intact. You can change the ruleset and recompile the
-database at any time while volta is running, and the new rules will take
-affect within about 10 seconds. No need to restart squid!
+original database intact.  You can change the ruleset at any time while
+volta is running, and the new rules will take affect within about 10
+seconds.  No need to restart squid!
 
 There are two types of rules -- positive matches, and negative matches.
-Positive matches cause the rewrite, negative matches allow the original
-request to pass.  Rule order is consistent, top-down, first match wins.
-Fields are separated by any amount of whitespace (spaces or tabs.)
+Positive matches cause the rewrite, negative matches intentionally allow
+the original request to pass.  Rule order is consistent, top-down, first
+match wins.  Fields are separated by any amount of whitespace (spaces or
+tabs.)
 
 
 ### Positive matches:
@@ -102,39 +105,43 @@
 
     Second field: the path to match.
 
-      This can be an exact match ('/path/to/something.html'), a regular
-      expression ('\.(jpg|gif|png)$'), or a single '*' to match for any
-      path. Regular expressions are matches without case sensitivity. There
-      is currently no support for capturing, though this may be added in a
-      future release.
+	  This can be an exact match ('/path/to/something.html'), a regular
+	  expression ('\.(jpg|gif|png)$'), or a single '*' to match for any
+	  path. Regular expressions are matched without case sensitivity.  There
+	  is currently no internal support for captures, though you can use
+	  a Lua rule (see below) for more complex processing.
 
 
     Third field: The redirect code and url to rewrite to.
 
-      Any pieces of a url that are omitted are automatically replaced with
-      the original request's element -- the exception is a hostname, which
-      is required. If you omit a redirect code, the URL rewrite is
-      transparent to the client. You can attach a 301: or 302: prefix to
-      cause a permanent or temporary (respectively) redirect response to be
-      sent, instead.
+      Any pieces of a url that are omitted are automatically replaced
+      with the original request's element -- the exception is a hostname,
+      which is required.  If you omit a redirect code, the URL rewrite is
+      transparent to the client.  You can attach a 301: or 302: prefix to
+      cause a permanent or temporary code to be respectively sent, instead.
+
+      If you require more complex processing than what volta provides
+      internally, you can also specify a path to a Lua script (prefixed
+      with 'lua:'.)  See the 'Lua rules' section of this README for more
+	  information.
 
 
 ### Negative matches:
 
     First field: the hostname to match.
 
-      See above -- all the same rules apply.
+	  See above -- all the same rules apply.
 
 
     Second field: the path to match.
 
-      See above -- all the same rules apply.
+	  See above -- all the same rules apply.
 
 
-    Third field: the 'negative' marker.
+	Third field: the 'negative' marker.
 
-      This is simply the '-' character, that signals to volta that this is
-      a negative matching rule.
+	  This is simply the '-' character, that signals to volta that this is
+	  a negative matching rule.
 
 
 You can easily test your rules by running volta on the command line, and
@@ -148,24 +155,83 @@
 
     google.com * 302:https://www.google.com
 
-    This will redirect the request "http://www.google.com/search?q=test" to
-    "https://www.google.com/search?q=test".
+	This will redirect the request "http://www.google.com/search?q=test" to
+	"https://www.google.com/search?q=test".
 
 
 Transparently alter all uploaded images on imgur to be my face:  :)
 
-    i.imgur.com \.(gif|png|jpg)$ http://www.martini.nu/images/mahlon.jpg
+	i.imgur.com \.(gif|png|jpg)$ http://www.martini.nu/images/mahlon.jpg
 
 
 Expand a local, non qualified hostname to a FQDN (useful alongside the
 'dns_defnames' squid setting to enforce browser proxy behaviors):
 
-    local-example * local-example.company.com
+	local-example * local-example.company.com
 
 
 Cause all blog content except for 2011 posts to permanently redirect to
 an archival page:
 
-    martini.nu /blog/2011 -
-    martini.nu /blog 301:martini.nu/content-archived.html
+	martini.nu /blog/2011 -
+	martini.nu /blog 301:martini.nu/content-archived.html
+
+
+Send all requests to reddit/r/WTF/* through a lua script for further processing.
+
+	reddit.com /r/wtf lua:/path/to/a/lua-script.lua
+
+
+Turn off rewriting for specific network segment or IP address:
+
+	Squid has this ability built in -- see the 'url_rewrite_access' setting.
+	Alternatively, do the checks in lua.
+
+
+
+Lua Rules
+---------
+
+Volta has an embedded Lua interpreter that you can use to perform all
+kinds of conditional rewrites.  Read more about the syntax of the Lua
+language here: http://www.lua.org/manual/5.1/
+
+### Loading a script
+
+To use a Lua script, prefix the rewrite target of a volta rule with
+'lua:'.  The rest of the target is then treated as a path to the script.
+(You can find an example in the Examples section of this README.)
 
+You can specify a path to either an ascii file, or Lua bytecode. (If
+speed is an absolute premium, I'm seeing around a 25% performance
+increase by using Lua bytecode files.)
+
+You can use different scripts for different rules, or use the same
+script across any number of separate rules.
+
+There is no need to restart squid when modifying Lua rules.  Changes are
+seen immediately.
+
+
+### Environment
+
+* Global variable declarations are disabled, so scripts can't accidently stomp on each other.  All variables must be declared with the 'local' keyword.
+* There is a global table called 'shared' you may use if you want to share data between separate scripts, or remember things in-between rule evaluations.
+* The details of the request can be found in a table, appropriately named 'request'.  HTTP scheme, host, path, port, method, client_ip, and domain are all available by default from the request table.
+* Calling Lua's print() function emits debug information to stderr.  Use a debug level of 2 or higher to see it.
+
+
+### Return value
+
+The return value of the script is sent unmodified to squid, which should
+be a URL the request is rewritten to, with an optional redirect code
+prefix (301 or 302.)
+
+Omitting a return value, or returning 'nil' has the same effect as a negative
+rule match -- the original request is allowed through without any rewrite.
+
+
+An extremely simple Lua rule script can be found in the 'examples'
+directory, distributed with volta.
+
+
--- a/accept_loop.c	Wed Nov 09 16:40:38 2011 -0800
+++ b/accept_loop.c	Wed Jul 25 11:47:12 2012 -0700
@@ -1,6 +1,6 @@
 /* vim: set noet nosta sw=4 ts=4 ft=c : */
 /*
-Copyright (c) 2011, Mahlon E. Smith <mahlon@martini.nu>
+Copyright (c) 2011-2012, Mahlon E. Smith <mahlon@martini.nu>
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
--- a/db.c	Wed Nov 09 16:40:38 2011 -0800
+++ b/db.c	Wed Jul 25 11:47:12 2012 -0700
@@ -1,6 +1,6 @@
 /* vim: set noet nosta sw=4 ts=4 ft=c : */
 /*
-Copyright (c) 2011, Mahlon E. Smith <mahlon@martini.nu>
+Copyright (c) 2011-2012, Mahlon E. Smith <mahlon@martini.nu>
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
@@ -122,7 +122,9 @@
 		rule = parse_rule( dbline->val );
 		if ( rule == NULL ||
 			( rule->negate == 1 && rule->host != NULL ) ||
-			( rule->negate == 0 && rule->host == NULL )) {
+			( rule->negate == 0 && rule->host == NULL ) ||
+			( rule->lua    == 1 && rule->luapath == NULL )
+		   ) {
 
 			debug( 0, LOC, "Invalid rule (line %d), stopping: %s", linenum, buf );
 			error = 1;
--- a/db.h	Wed Nov 09 16:40:38 2011 -0800
+++ b/db.h	Wed Jul 25 11:47:12 2012 -0700
@@ -1,6 +1,6 @@
 /* vim: set noet nosta sw=4 ts=4 ft=c : */
 /*
-Copyright (c) 2011, Mahlon E. Smith <mahlon@martini.nu>
+Copyright (c) 2011-2012, Mahlon E. Smith <mahlon@martini.nu>
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/examples/lua-example.lua	Wed Jul 25 11:47:12 2012 -0700
@@ -0,0 +1,18 @@
+
+-- examine the request.
+-- this emits to stderr!
+--
+for k,v in pairs( request ) do print( string.format("request.%-6s --> %s", k, v) ) end
+
+-- all variables need to be declared using local scoping!
+--
+local redir = 302
+
+-- temporary redirect to a different site, every other second.
+-- why would you want to do this?  you probably wouldn't.  just illustrating
+-- how easy it is to do custom stuff.
+--
+if os.time() % 2 == 0 then
+	return string.format( "%d:http://example.com%s", redir, request.path )
+end
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/examples/rules.txt	Wed Jul 25 11:47:12 2012 -0700
@@ -0,0 +1,27 @@
+
+#------------------------------------------------------------------------------
+# This is an example volta rules file.  In order for volta to use it,
+# it needs to be converted to a database for fast lookup.  See the
+# README file, distributed with volta, for instructions.
+#------------------------------------------------------------------------------
+
+# Force all requests to Google to use SSL
+# http://www.google.com/search?q=test --> https://www.google.com/search?q=test
+google.com * 302:https://www.google.com
+
+# Transparently alter all uploaded images on imgur to be my face:  :)
+i.imgur.com \.(gif|png|jpg)$ http://www.martini.nu/images/mahlon.jpg
+
+# Expand a local, non qualified hostname to a FQDN (useful alongside the
+# 'dns_defnames' squid setting to enforce browser proxy behaviors):
+local-example * local-example.company.com
+
+# Cause all blog content except for 2011 posts to permanently redirect to
+# an archival page.
+martini.nu /blog/2011 -
+martini.nu /blog 301:martini.nu/content-archived.html
+
+# Send all requests to reddit/r/WTF/* through a lua script for
+# further processing.
+reddit.com /r/wtf lua:examples/lua-example.lua
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/examples/squid_output.txt	Wed Jul 25 11:47:12 2012 -0700
@@ -0,0 +1,14 @@
+http://www.google.com/search?q=test 10.1.1.30/- - GET myip=10.1.1.1 myport=3128
+https://www.google.com/search?q=test 10.1.1.30/- - GET myip=10.1.1.1 myport=3128
+http://www.imgur.com/ 10.1.1.30/- - GET myip=10.1.1.1 myport=3128
+http://i.imgur.com/whatever.jpg 10.1.1.30/- - GET myip=10.1.1.1 myport=3128
+googleusercontent.com:443 10.1.1.40/- - CONNECT myip=10.1.1.1 myport=3128
+http://local-example/ 10.1.1.30/- - GET myip=10.1.1.1 myport=3128
+http://www.martini.nu/projects/volta/ 10.1.1.30/- - GET myip=10.1.1.1 myport=3128
+http://www.reddit.com/r/WTF/?wooyup=1 10.1.1.30/- - GET myip=10.1.1.1 myport=3128
+http://www.martini.nu/blog/2011/some-post.html 10.1.1.30/- - GET myip=10.1.1.1 myport=3128
+http://www.martini.nu/blog/2009/some-old-post.html 10.1.1.30/- - GET myip=10.1.1.1 myport=3128
+4 http://www.martini.nu/projects/volta/ 10.1.1.30/- - GET myip=10.1.1.1 myport=3128
+5 http://i.imgur.com/whatever.jpg 10.1.1.30/- - GET myip=10.1.1.1 myport=3128
+6 http://www.reddit.com/r/WTF/?wooyup=1 10.1.1.30/- - GET myip=10.1.1.1 myport=3128
+7 http://www.example.com/yup 10.1.1.30/- - GET myip=10.1.1.1 myport=3128
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lua.c	Wed Jul 25 11:47:12 2012 -0700
@@ -0,0 +1,173 @@
+/* vim: set noet nosta sw=4 ts=4 ft=c : */
+/*
+Copyright (c) 2011-2012, Mahlon E. Smith <mahlon@martini.nu>
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of Mahlon E. Smith nor the names of his
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "volta.h"
+#include "lua.h"
+
+/*
+ * Emit a lua error if a variable is declared without the use of 'local'.
+ * Since we only start up one lua interpreter, we want to ensure the
+ * global namespace isn't polluted by random scripts.
+ *
+ */
+int
+luaV_globalindex( lua_State *lua )
+{
+	return luaL_error( lua, "attempt to set global var '%s' (use local!)", lua_tostring(lua, -2) );
+}
+
+
+/*
+ * Override the regular lua print() function with one that formats the string
+ * in the same fashion as the rest of volta output -- and goes to stderr instead
+ * of stdout, which would just confuse squid.
+ *
+ */
+int
+luaV_print( lua_State *lua )
+{
+	lua_Debug info;
+	int i = 0;
+
+	/* get the file and line number print() was called from. */
+	lua_getstack( lua, 1, &info );
+	lua_getinfo( lua, "Sl", &info );
+
+	/* output each argument */
+	for( i = 1; i <= lua_gettop( lua ); i++ )
+		debug( 2, info.short_src, info.currentline, "%s\n", lua_tostring(lua, i) );
+
+	return( 1 );
+}
+
+
+/*
+ * Create the initial lua interpreter, and configure the default environment.
+ *
+ */
+lua_State *
+luaV_setup( void )
+{
+	lua_State *lua = luaL_newstate();
+	luaL_openlibs( lua ); /* include lua standard libraries */
+
+	/* Predeclare the request table. */
+	lua_pushstring( lua, "request" );
+	lua_createtable( lua, 0, 7 );
+	lua_settable( lua, LUA_GLOBALSINDEX );
+
+	/* Predeclare a table for shared data */
+	lua_pushstring( lua, "shared" );
+	lua_newtable( lua );
+	lua_settable( lua, LUA_GLOBALSINDEX );
+
+	/* replace the lua print() function with one that calls debug() instead */
+	lua_register( lua, "print", luaV_print );
+
+	/* Restrict additional globals. */
+    lua_createtable( lua, 0, 1);
+    lua_pushcfunction( lua, luaV_globalindex );
+    lua_setfield( lua, -2, "__newindex");
+    lua_pushboolean( lua, 0 );
+    lua_setfield( lua, -2, "__metatable");
+    lua_setmetatable( lua, LUA_GLOBALSINDEX );
+
+	lua_settop( lua, 0 );  /* wipe the stack */
+	return( lua );
+}
+
+
+/*
+ * Convert the request struct into a lua table, and inject it into the interpreter.
+ *
+ */
+void
+luaV_setup_request( parsed *request )
+{
+	lua_getfield( v.lua, LUA_GLOBALSINDEX, "request" );
+	lua_pushstring( v.lua, request->scheme );
+	lua_setfield( v.lua, 1, "scheme" );
+	lua_pushstring( v.lua, request->host );
+	lua_setfield( v.lua, 1, "host" );
+	lua_pushstring( v.lua, request->path );
+	lua_setfield( v.lua, 1, "path" );
+	lua_pushstring( v.lua, request->port );
+	lua_setfield( v.lua, 1, "port" );
+	lua_pushstring( v.lua, request->method );
+	lua_setfield( v.lua, 1, "method" );
+	lua_pushstring( v.lua, request->client_ip );
+	lua_setfield( v.lua, 1, "client_ip" );
+	lua_pushstring( v.lua, request->tld );
+	lua_setfield( v.lua, 1, "domain" );
+
+	return;
+}
+
+
+/*
+ * Given a request struct and a path to a lua script (or bytecode),
+ * execute the script within the global lua interpreter, and return
+ * a pointer to the string it generated (or NULL).
+ *
+ */
+char *
+luaV_run( parsed *request, char *path )
+{
+	int lua_err = 0;
+
+	/* provide access to the request struct */
+	luaV_setup_request( request );
+
+	/* suck in the lua chunk(s) */
+	debug( 4, LOC, "Loading Lua code from '%s'\n", path );
+	lua_err = luaL_loadfile( v.lua, path );
+	if ( lua_err ) {
+		debug( 2, LOC, "Unable to run lua rule: %s\n", lua_tostring(v.lua, -1) );
+		lua_settop( v.lua, 0 );
+		return( NULL );
+	}
+
+	/* execute the lua, expecting one value to be returned. */
+	lua_err = lua_pcall( v.lua, 0, 1, 0 );
+	if ( lua_err ) {
+		debug( 2, LOC, "Unable to run lua rule: %s\n", lua_tostring(v.lua, -1) );
+		lua_settop( v.lua, 0 );
+		return( NULL );
+	}
+
+	/* get the last element in the stack, which should be the script's return value. */
+	char *rewrite = (char *)lua_tostring( v.lua, -1 );
+
+	debug( 5, LOC, "Lua is currently consuming %dKB of memory\n", lua_gc(v.lua, LUA_GCCOUNT, 0) );
+	lua_settop( v.lua, 0 ); /* reset the stack. */
+
+	return( rewrite );
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lua.h	Wed Jul 25 11:47:12 2012 -0700
@@ -0,0 +1,45 @@
+/* vim: set noet nosta sw=4 ts=4 ft=c : */
+/*
+Copyright (c) 2011-2012, Mahlon E. Smith <mahlon@martini.nu>
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of Mahlon E. Smith nor the names of his
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _LUA_H
+#define _LUA_H
+
+/*
+ * Function prototypes
+ *
+ */
+int luaV_globalindex( lua_State * );
+int luaV_print( lua_State * );
+lua_State *luaV_setup( void );
+void luaV_setup_request( parsed * );
+char *luaV_run( parsed *, char * );
+
+#endif
+
--- a/main.c	Wed Nov 09 16:40:38 2011 -0800
+++ b/main.c	Wed Jul 25 11:47:12 2012 -0700
@@ -1,6 +1,6 @@
 /* vim: set noet nosta sw=4 ts=4 ft=c : */
 /*
-Copyright (c) 2011, Mahlon E. Smith <mahlon@martini.nu>
+Copyright (c) 2011-2012, Mahlon E. Smith <mahlon@martini.nu>
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
@@ -30,6 +30,7 @@
 
 #include "volta.h"
 #include "db.h"
+#include "lua.h"
 
 struct v_globals v;
 
@@ -53,6 +54,7 @@
 	v.timer.db_lastcheck = 0;
 	v.timer.start        = time( NULL );
 	v.timer.lines        = 0;
+	v.lua                = luaV_setup();
 
 	/* get_opt vars */
 	int opt = 0;
@@ -131,6 +133,7 @@
 void
 shutdown_actions( void )
 {
+	lua_close( v.lua );
 	cdb_free( &v.db );
 	close( v.db_fd );
 	report_speed();
--- a/parser.rl	Wed Nov 09 16:40:38 2011 -0800
+++ b/parser.rl	Wed Jul 25 11:47:12 2012 -0700
@@ -33,6 +33,9 @@
 #define MARK_S( LBL ) p_parsed->tokens.LBL ## _start = p;
 #define MARK_E( LBL ) p_parsed->tokens.LBL ## _length = p - ( *pe + p_parsed->tokens.LBL ## _start );
 
+#define COPY_STR( LBL ) copy_string_token( p_parsed->tokens.LBL ## _start, p_parsed->tokens.LBL ## _length )
+/* #define COPY_IP4( LBL ) copy_ipv4_token(   p_request->tokens.LBL ## _start, p_request->tokens.LBL ## _length ) */
+
 /* 
  * Tokenize an incoming line from squid, returning a parsed and populated
  * structure to make redirection decisions against.  This pointer should
@@ -79,11 +82,8 @@
 %%{
 	machine request_parser;
 
-	action channel_id_found  {
-		debug( 1, LOC, "Channel ID found in redirector input.  Set 'url_rewrite_concurrency' to '0' in squid.\n" );
-		fbreak;
-	}
-
+	action chid_start    { MARK_S(chid) }
+	action chid_finish   { MARK_E(chid) }
 	action scheme_start  { MARK_S(scheme) }
 	action scheme_finish { MARK_E(scheme) }
 	action host_start    { MARK_S(host) }
@@ -141,7 +141,7 @@
 	ipv4           = digit{1,3} '.' digit{1,3} '.' digit{1,3} '.' digit{1,3};
 	ipv6           = ( xdigit | ':' )+;
 
-	channel_id     = ( digit+ space )      %channel_id_found;
+	channel_id     = ( digit+ space )      >chid_start   %chid_finish;
 	scheme         = ( alpha{3,5} '://' )  >scheme_start %scheme_finish @!scheme_error;
 	host           = ( hostname | ipv4 )   >host_start   %host_finish   @!host_error;
 	port           = ( ':' digit{1,5} )    >port_start   %port_finish;
@@ -168,12 +168,16 @@
 	/* state machine */
 	%% write exec;
 
-	/* If we were given an invalid line, bail early */
+	/*
+	 * If we were given an invalid line, bail early after remembering
+	 * the channel ID.
+	 *
+	 */
 	if ( cs < %%{ write first_final; }%% ) {
-		free( p_parsed ), p_parsed = NULL;
 		debug( 3, LOC, "Invalid request line (%d), skipped\n", v.timer.lines + 1 );
 		debug( 4, LOC, "%s", line );
-		return( NULL );
+		p_parsed->chid = COPY_STR( chid );
+		return( p_parsed );
 	}
 
 	debug( 6, LOC, "%s", line );
@@ -208,18 +212,21 @@
 	action redir_start   { MARK_S(redir) }
 	action redir_finish  { p_parsed->tokens.redir_length = 3; } # strip trailing colon
 	action negate_finish { p_parsed->negate = 1; }
+	action luapath_start { p_parsed->lua = 1; MARK_S(luapath) }
 
-	action scheme_start  { MARK_S(scheme) }
-	action scheme_finish { MARK_E(scheme) }
-	action host_start    { MARK_S(host) }
-	action host_finish   { MARK_E(host) }
-	action port_start    { p_parsed->tokens.port_start = p+1; } # strip leading colon
-	action port_finish   { MARK_E(port) }
-	action path_start    { MARK_S(path) }
-	action path_finish   { MARK_E(path) }
+	action scheme_start   { MARK_S(scheme) }
+	action scheme_finish  { MARK_E(scheme) }
+	action host_start     { MARK_S(host) }
+	action host_finish    { MARK_E(host) }
+	action port_start     { p_parsed->tokens.port_start = p+1; } # strip leading colon
+	action port_finish    { MARK_E(port) }
+	action path_start     { MARK_S(path) }
+	action path_finish    { MARK_E(path) }
+	action luapath_finish { MARK_E(luapath) }
 
-	action match_error { debug( 3, LOC, "Unable to parse the rule path matcher.\n" ); }
-	action host_error  { debug( 3, LOC, "Unable to parse the rule hostname.\n" ); }
+	action match_error   { debug( 3, LOC, "Unable to parse the rule path matcher.\n" ); }
+	action host_error    { debug( 3, LOC, "Unable to parse the rule hostname.\n" ); }
+	action luapath_error { debug( 3, LOC, "Unable to parse the lua path.\n" ); }
 
 	host_component  = alnum | ( alnum [a-zA-Z0-9\-_]* alnum );
 	path_segment    = '/' ( any - space )*;
@@ -230,7 +237,8 @@
 	ipv6      = ( xdigit | ':' )+;
 
 	negate    = ( '-' )                 %negate_finish;
-	path_re   = ( any - space )+        >match_start  %match_finish @!match_error;
+	path_re   = ( any - space )+        >match_start    %match_finish   @!match_error;
+	luapath   = ( any - space )+        >luapath_start  %luapath_finish @!luapath_error;
 
 	redir     = ( ('301' | '302') ':' ) >redir_start  %redir_finish;
 	scheme    = ( alpha{3,5} '://' )    >scheme_start %scheme_finish;
@@ -239,8 +247,9 @@
 	path      = path_segment*           >path_start   %path_finish;
 
 	rewrite   = ( redir? scheme? host port? path? );
+	luarule   = ( 'lua:' luapath );
 
-	main := path_re sep ( rewrite | negate );
+	main := path_re sep ( rewrite | negate | luarule );
 }%%
 
 	/* state machine */
@@ -302,12 +311,10 @@
 	hostname       = host_component ( '.' host_component )* '.'?;
 	ipv4           = digit{1,3} '.' digit{1,3} '.' digit{1,3} '.' digit{1,3};
 	token          = ( any - space )+;
-	redir          = ( digit{3} ':' );
 	host           = ( hostname | ipv4 );
 
 	key = ( host | '*' )      >key_start %key_finish @!key_error;
 	val = ( token sep token ) >val_start %val_finish @!val_error;
-	#       regex     rewrite or negate
 	
 	main:= key sep val '\n';
 }%%
@@ -354,8 +361,11 @@
 		return( NULL );
 	}
 
+	p_parsed->valid     = 0;
 	p_parsed->type      = 0;
 	p_parsed->negate    = 0;
+	p_parsed->lua       = 0;
+	p_parsed->chid      = NULL;
 	p_parsed->path_re   = NULL;
 	p_parsed->redir     = NULL;
 	p_parsed->scheme    = NULL;
@@ -366,7 +376,9 @@
 	p_parsed->user      = NULL;
 	p_parsed->method    = NULL;
 	p_parsed->client_ip = NULL;
+	p_parsed->luapath   = NULL;
 
+	p_parsed->tokens.chid_start     = NULL;
 	p_parsed->tokens.path_re_start  = NULL;
 	p_parsed->tokens.redir_start    = NULL;
 	p_parsed->tokens.scheme_start   = NULL;
@@ -375,6 +387,8 @@
 	p_parsed->tokens.path_start     = NULL;
 	p_parsed->tokens.meth_start     = NULL;
 	p_parsed->tokens.c_ip_start     = NULL;
+	p_parsed->tokens.luapath_start  = NULL;
+	p_parsed->tokens.chid_length    = 0;
 	p_parsed->tokens.path_re_length = 0;
 	p_parsed->tokens.redir_length   = 0;
 	p_parsed->tokens.scheme_length  = 0;
@@ -383,6 +397,7 @@
 	p_parsed->tokens.path_length    = 0;
 	p_parsed->tokens.meth_length    = 0;
 	p_parsed->tokens.c_ip_length    = 0;
+	p_parsed->tokens.luapath_length = 0;
 
 	return p_parsed;
 }
@@ -403,6 +418,7 @@
 	free( p_parsed->port );
 
 	if ( p_parsed->type == REQUEST ) {
+		free( p_parsed->chid );
 		free( p_parsed->tld );
 		free( p_parsed->method );
 		free( p_parsed->client_ip );
@@ -411,6 +427,7 @@
 	if ( p_parsed->type == RULE ) {
 		free( p_parsed->path_re );
 		free( p_parsed->redir );
+		free( p_parsed->luapath );
 	}
 
 	free( p_parsed ), p_parsed = NULL;
@@ -419,9 +436,6 @@
 }
 
 
-#define COPY_STR( LBL ) copy_string_token( p_parsed->tokens.LBL ## _start, p_parsed->tokens.LBL ## _length )
-/* #define COPY_IP4( LBL ) copy_ipv4_token(   p_request->tokens.LBL ## _start, p_request->tokens.LBL ## _length ) */
-
 /*
  * Take the previously parsed token locations and copy them into the request struct.
  *
@@ -435,6 +449,8 @@
 	p_parsed->port   = COPY_STR( port );
 
 	if ( p_parsed->type == REQUEST ) {
+		p_parsed->valid     = 1;
+		p_parsed->chid      = COPY_STR( chid );
 		p_parsed->method    = COPY_STR( meth );
 		p_parsed->client_ip = COPY_STR( c_ip );
 		/* p_request->client_ip = COPY_IP4( c_ip ); */
@@ -446,6 +462,7 @@
 	if ( p_parsed->type == RULE ) {
 		p_parsed->path_re = COPY_STR( path_re );
 		p_parsed->redir   = COPY_STR( redir );
+		p_parsed->luapath = COPY_STR( luapath );
 	}
 
 	return;
--- a/process.c	Wed Nov 09 16:40:38 2011 -0800
+++ b/process.c	Wed Jul 25 11:47:12 2012 -0700
@@ -1,6 +1,6 @@
 /* vim: set noet nosta sw=4 ts=4 ft=c : */
 /*
-Copyright (c) 2011, Mahlon E. Smith <mahlon@martini.nu>
+Copyright (c) 2011-2012, Mahlon E. Smith <mahlon@martini.nu>
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
@@ -30,6 +30,7 @@
 
 #include "volta.h"
 #include "db.h"
+#include "lua.h"
 
 
 /*
@@ -47,7 +48,7 @@
 
 	/* If request parsing failed, return a blank line to squid
 	   to allow the request to pass through unmolested. */
-	if ( p_request == NULL )
+	if ( p_request == NULL || p_request->valid == 0 )
 		return pass( p_request, rule );
 
 	/*
@@ -82,16 +83,35 @@
 	/* avoid trivial redirect loops */
 	if (
 		( rule->redir ) &&
-		( rule->scheme == NULL || ( strcmp(p_request->scheme, rule->scheme) == 0) ) &&
-		( rule->path   == NULL || ( strcmp(p_request->path,     rule->path) == 0) ) &&
-		( strcmp( p_request->host, rule->host) == 0 )
+		( rule->scheme == NULL || ( p_request->scheme && ( strcmp(p_request->scheme, rule->scheme) == 0) )) &&
+		( rule->path   == NULL || ( strcmp(p_request->path, rule->path) == 0) ) &&
+		( strcmp( p_request->host, rule->host ) == 0 )
 	   ) {
 		debug( 2, LOC, "Potential rewrite loop, skipping rewrite.\n" );
 		return pass( p_request, rule );
 	}
 
-	/* otherwise, perform the rewrite. */
-	rewrite( p_request, rule );
+	/* At this point we know we'll be doing a rewrite. */
+
+	/* Pass the request to lua for processing if we saw a 'lua:' tag. */
+	if ( rule->lua == 1 ) {
+		char *rewrite_string = luaV_run( p_request, rule->luapath );
+
+		/* the script returned nil, or otherwise had an error. */
+		if ( rewrite_string == NULL ) return pass( p_request, rule );
+
+		/* send squid the lua return value. */
+		if ( v.debugmode < 5 ) {
+			if ( p_request->chid ) printf( "%s", p_request->chid );
+			puts( rewrite_string );
+			fflush( stdout );
+		}
+	}
+
+	/* otherwise, perform the rewrite internally. */
+	else {
+		rewrite( p_request, rule );
+	}
 
 	finish_parsed( rule );
 	finish_parsed( p_request );
@@ -106,14 +126,18 @@
 void
 pass( parsed *request, parsed *rule )
 {
-	finish_parsed( rule );
-	finish_parsed( request );
+	if ( v.debugmode >= 5 ) {
+		finish_parsed( rule );
+		finish_parsed( request );
+		return;
+	}
 
-	if ( v.debugmode >= 5 ) return;
-
+	if ( request && request->chid ) printf( "%s", request->chid );
 	printf( "\n" );
 	fflush( stdout );
 
+	finish_parsed( rule );
+	finish_parsed( request );
 	return;
 }
 
@@ -127,6 +151,7 @@
 {
 	if ( rule == NULL || v.debugmode >= 5 ) return;
 
+	if ( request->chid ) printf( "%s", request->chid );
 	if ( rule->redir ) printf( "%s:", rule->redir );
 	printf( "%s%s", (rule->scheme ? rule->scheme : request->scheme), rule->host );
 	if ( rule->port ) printf( ":%s", rule->port );
--- a/util.c	Wed Nov 09 16:40:38 2011 -0800
+++ b/util.c	Wed Jul 25 11:47:12 2012 -0700
@@ -1,6 +1,6 @@
 /* vim: set noet nosta sw=4 ts=4 ft=c : */
 /*
-Copyright (c) 2011, Mahlon E. Smith <mahlon@martini.nu>
+Copyright (c) 2011-2012, Mahlon E. Smith <mahlon@martini.nu>
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
--- a/volta.h	Wed Nov 09 16:40:38 2011 -0800
+++ b/volta.h	Wed Jul 25 11:47:12 2012 -0700
@@ -1,6 +1,6 @@
 /* vim: set noet nosta sw=4 ts=4 ft=c : */
 /*
-Copyright (c) 2011, Mahlon E. Smith <mahlon@martini.nu>
+Copyright (c) 2011-2012, Mahlon E. Smith <mahlon@martini.nu>
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
@@ -57,8 +57,6 @@
 #include <arpa/inet.h>
 */
 
-#include <cdb.h>
-
 #ifdef DEBUG
 #include <google/profiler.h>
 #endif
@@ -75,6 +73,11 @@
 /* Aid debugging */
 #define LOC __FILE__, __LINE__
 
+#include <cdb.h>
+#include <lua.h>
+#include <lualib.h>
+#include <lauxlib.h>
+
 
 /*
  * a global struct for easy access to common vars 
@@ -85,6 +88,7 @@
 	char dbname[128];             /* path to database file */
 	short int db_fd;              /* opened db file descriptor */
 	struct cdb db;                /* the cdb struct */
+	lua_State *lua;               /* the lua interpreter */
 
 	struct {
 		time_t start;             /* start time */
@@ -116,8 +120,11 @@
  *
  */
 typedef struct parsed {
+	unsigned short int valid;
 	unsigned short int type;
 	unsigned short int negate;
+	unsigned short int lua;
+	char   *chid;
 	char   *path_re;
 	char   *redir;
 	char   *scheme;
@@ -129,8 +136,10 @@
 	char   *client_ip;
 	char   *user;
 	char   *method;
+	char   *luapath;
 
 	struct {
+		char *chid_start;
 		char *path_re_start;
 		char *redir_start;
 		char *scheme_start;
@@ -139,6 +148,8 @@
 		char *path_start;
 		char *meth_start;
 		char *c_ip_start;
+		char *luapath_start;
+		unsigned short int chid_length;
 		unsigned short int path_re_length;
 		unsigned short int redir_length;
 		unsigned short int scheme_length;
@@ -147,6 +158,7 @@
 		unsigned int       path_length;
 		unsigned short int meth_length;
 		unsigned short int c_ip_length;
+		unsigned short int luapath_length;
 	} tokens;
 } parsed;