# HG changeset patch # User Mahlon E. Smith # Date 1343242032 25200 # Node ID 5cc836e06759e9354006534e7ddd692eb1d750ef # Parent c5d00a24af565edc0a10a71e0d1868b32b35d214# Parent 46e23ce0798121b3f4b550f43bfea6234c7ceceb branch merge diff -r 46e23ce07981 -r 5cc836e06759 .hgignore --- a/.hgignore Wed Nov 09 16:40:38 2011 -0800 +++ b/.hgignore Wed Jul 25 11:47:12 2012 -0700 @@ -2,7 +2,6 @@ ^volta.db$ ^.*_graph.* ^tags$ -.*debug -.*.o +.o$ ^misc ^parser.c diff -r 46e23ce07981 -r 5cc836e06759 .hgtags --- a/.hgtags Wed Nov 09 16:40:38 2011 -0800 +++ b/.hgtags Wed Jul 25 11:47:12 2012 -0700 @@ -2,3 +2,5 @@ d4ce82194b640f863a817fdd085f06b19b232005 0.1 0000000000000000000000000000000000000000 0.1 c5cbe1eb96f4d641875d0557022f08feefe20004 0.0.1 +822094314703a5033c644a9c6cf5627ffaeaa0a9 0.1.0 +d3b6f9ccbe20b7b7c89751be98be375a24b68b88 0.1.1 diff -r 46e23ce07981 -r 5cc836e06759 INSTALL --- a/INSTALL Wed Nov 09 16:40:38 2011 -0800 +++ b/INSTALL Wed Jul 25 11:47:12 2012 -0700 @@ -11,6 +11,7 @@ - TinyCDB (http://www.corpit.ru/mjt/tinycdb.html) - GNU make (http://www.gnu.org/software/make/) + - Lua (http://www.lua.org/) If available, install these dependencies from your OS packaging system of choice. diff -r 46e23ce07981 -r 5cc836e06759 LICENSE --- a/LICENSE Wed Nov 09 16:40:38 2011 -0800 +++ b/LICENSE Wed Jul 25 11:47:12 2012 -0700 @@ -1,5 +1,5 @@ -Copyright (c) 2011, Mahlon E. Smith +Copyright (c) 2011-2012, Mahlon E. Smith All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff -r 46e23ce07981 -r 5cc836e06759 Makefile --- a/Makefile Wed Nov 09 16:40:38 2011 -0800 +++ b/Makefile Wed Jul 25 11:47:12 2012 -0700 @@ -12,17 +12,57 @@ .PHONY : parsegraph profile clean clobber release -# Ubuntu: perftools doesn't currently register a .pc file + +######################################################################## +### P L A T F O R M T A R G E T S +######################################################################## + +# I hate this. Tools like pkg-config are supposed to make finding +# libraries easy across platforms. They only work when everyone names +# the libraries the same thing, unfortunately. (Why name the libs with +# the version number, when pkg-config supports built in versioning??) +# And no, I'm not going to use autoconf, which just takes your build +# problems and makes a whole bunch of new baby problems for you to +# deal with. Gaaarrgghghh. +# +# If you have problems building volta, manually pass the correct CFLAGS +# and LIBS to the 'make' command line for your platform. +# +# The following works for OSX with macports or homebrew (10.6/10.7), +# FreeBSD 8.x and 9.x, and Ubuntu 11.10 and 12.04. + +# Ubuntu +# - perftools doesn't currently register a .pc file at all +# - lua is called 'lua5.1' ifeq ($(UNAME), Linux) -debug: CFLAGS += $(CFLAGS_DEBUG) -debug: LIBS += -lprofiler +volta: CFLAGS += -L/usr/lib -I/usr/include +volta: CFLAGS += $(shell pkg-config --cflags-only-I --libs-only-L lua5.1) +volta: LIBS += $(shell pkg-config --libs-only-l lua5.1) +debug: CFLAGS += $(CFLAGS_DEBUG)\ + $(shell pkg-config --cflags-only-I --libs-only-L lua5.1) +debug: LIBS += $(shell pkg-config --libs-only-l lua5.1) -lprofiler + +# FreeBSD +# - lua is called 'lua-5.1' +else ifeq ($(UNAME), FreeBSD) +volta: CFLAGS += -L/usr/local/lib -I/usr/local/include +volta: CFLAGS += $(shell pkg-config --cflags-only-I --libs-only-L lua-5.1) +volta: LIBS += $(shell pkg-config --libs-only-l lua-5.1) +debug: CFLAGS += $(CFLAGS_DEBUG)\ + $(shell pkg-config --cflags-only-I --libs-only-L lua-5.1 $(DEPS_DEBUG)) +debug: LIBS += $(shell pkg-config --libs-only-l lua-5.1 $(DEPS_DEBUG)) + +# Darwin, everyone else (best guess?) +# - lua is called 'lua', hopefully! else -volta: CFLAGS += -L/opt/local/lib -I/opt/local/include -L/usr/local/lib -I/usr/local/include +volta: CFLAGS += $(shell pkg-config --cflags-only-I --libs-only-L lua) +volta: LIBS += $(shell pkg-config --libs-only-l lua) debug: CFLAGS += $(CFLAGS_DEBUG)\ - $(shell pkg-config --cflags-only-I --libs-only-L $(DEPS_DEBUG)) -debug: LIBS += $(shell pkg-config --libs-only-l $(DEPS_DEBUG)) + $(shell pkg-config --cflags-only-I --libs-only-L lua $(DEPS_DEBUG)) +debug: LIBS += $(shell pkg-config --libs-only-l lua $(DEPS_DEBUG)) endif + # Fix parser line number display in debug mode ifeq (,$(findstring debug,$(MAKECMDGOALS))) RAGEL_FLAGS = -LCe -G2 diff -r 46e23ce07981 -r 5cc836e06759 README --- a/README Wed Nov 09 16:40:38 2011 -0800 +++ b/README Wed Jul 25 11:47:12 2012 -0700 @@ -5,13 +5,14 @@ What is volta? -------------- -Volta is a high-performance, low-resource URI rewriter for use with the +Volta is a high performance, low resource URI rewriter for use with the Squid caching proxy server (http://www.squid-cache.org/.) With it, you can dynamically alter URI requests that pass through Squid based on various criteria. It uses a state machine to parse URIs and rules, and a constant database -to store and access those rules. +to store and access those rules. It can then either perform conditional +rewrites internally, or by evaluating Lua scripts. Why is it called "volta"? @@ -36,22 +37,22 @@ You must enable url rewriting from within the squid.conf file. - url_rewrite_program /usr/local/bin/volta + url_rewrite_program /usr/local/bin/volta ... and that's it. You may need some additional customization, like where the volta database is stored on disk: - url_rewrite_program /usr/local/bin/volta -f /var/db/squid/volta.db + url_rewrite_program /usr/local/bin/volta -f /var/db/squid/volta.db Busy servers: -Make sure rewrite_concurrency is disabled, volta is single threaded. -Instead, just add more volta children. They are lightweight, so load em -up. A proxy at my $DAYJOB is in use by around 450 people, and we get by -nicely with 10 volta children. +While Volta is lightweight enough to simply increase the amount of +rewriter children, it also supports Squid's rewrite_concurrency format +if you find that to be more efficient for your environment. Adjust to +taste. - url_rewrite_concurrency 0 - url_rewrite_children 10 + url_rewrite_concurrency 60 + url_rewrite_children 5 Using volta @@ -62,7 +63,7 @@ Volta reads its rewrite rules from a local database. You can create the rules in a text editor, then convert it to the database like so: - % volta -c rules.txt + % volta -c rules.txt You'll be left with a "volta.db" file in the current directory. Put it wherever you please, and use the -f flag to point to it. @@ -74,18 +75,20 @@ Volta's rule syntax is designed to be easy to parse by humans and machines. Blank lines are skipped, as is any line that starts with the '#' character, so you can keep the ascii version of your rules well -documented and in version control. +documented and in version control. There is no practical limit on the +number of rules in this database. When compiling the ruleset into the database format, volta detects malformed rules and stops if there are any problems, leaving your -original database intact. You can change the ruleset and recompile the -database at any time while volta is running, and the new rules will take -affect within about 10 seconds. No need to restart squid! +original database intact. You can change the ruleset at any time while +volta is running, and the new rules will take affect within about 10 +seconds. No need to restart squid! There are two types of rules -- positive matches, and negative matches. -Positive matches cause the rewrite, negative matches allow the original -request to pass. Rule order is consistent, top-down, first match wins. -Fields are separated by any amount of whitespace (spaces or tabs.) +Positive matches cause the rewrite, negative matches intentionally allow +the original request to pass. Rule order is consistent, top-down, first +match wins. Fields are separated by any amount of whitespace (spaces or +tabs.) ### Positive matches: @@ -102,39 +105,43 @@ Second field: the path to match. - This can be an exact match ('/path/to/something.html'), a regular - expression ('\.(jpg|gif|png)$'), or a single '*' to match for any - path. Regular expressions are matches without case sensitivity. There - is currently no support for capturing, though this may be added in a - future release. + This can be an exact match ('/path/to/something.html'), a regular + expression ('\.(jpg|gif|png)$'), or a single '*' to match for any + path. Regular expressions are matched without case sensitivity. There + is currently no internal support for captures, though you can use + a Lua rule (see below) for more complex processing. Third field: The redirect code and url to rewrite to. - Any pieces of a url that are omitted are automatically replaced with - the original request's element -- the exception is a hostname, which - is required. If you omit a redirect code, the URL rewrite is - transparent to the client. You can attach a 301: or 302: prefix to - cause a permanent or temporary (respectively) redirect response to be - sent, instead. + Any pieces of a url that are omitted are automatically replaced + with the original request's element -- the exception is a hostname, + which is required. If you omit a redirect code, the URL rewrite is + transparent to the client. You can attach a 301: or 302: prefix to + cause a permanent or temporary code to be respectively sent, instead. + + If you require more complex processing than what volta provides + internally, you can also specify a path to a Lua script (prefixed + with 'lua:'.) See the 'Lua rules' section of this README for more + information. ### Negative matches: First field: the hostname to match. - See above -- all the same rules apply. + See above -- all the same rules apply. Second field: the path to match. - See above -- all the same rules apply. + See above -- all the same rules apply. - Third field: the 'negative' marker. + Third field: the 'negative' marker. - This is simply the '-' character, that signals to volta that this is - a negative matching rule. + This is simply the '-' character, that signals to volta that this is + a negative matching rule. You can easily test your rules by running volta on the command line, and @@ -148,24 +155,83 @@ google.com * 302:https://www.google.com - This will redirect the request "http://www.google.com/search?q=test" to - "https://www.google.com/search?q=test". + This will redirect the request "http://www.google.com/search?q=test" to + "https://www.google.com/search?q=test". Transparently alter all uploaded images on imgur to be my face: :) - i.imgur.com \.(gif|png|jpg)$ http://www.martini.nu/images/mahlon.jpg + i.imgur.com \.(gif|png|jpg)$ http://www.martini.nu/images/mahlon.jpg Expand a local, non qualified hostname to a FQDN (useful alongside the 'dns_defnames' squid setting to enforce browser proxy behaviors): - local-example * local-example.company.com + local-example * local-example.company.com Cause all blog content except for 2011 posts to permanently redirect to an archival page: - martini.nu /blog/2011 - - martini.nu /blog 301:martini.nu/content-archived.html + martini.nu /blog/2011 - + martini.nu /blog 301:martini.nu/content-archived.html + + +Send all requests to reddit/r/WTF/* through a lua script for further processing. + + reddit.com /r/wtf lua:/path/to/a/lua-script.lua + + +Turn off rewriting for specific network segment or IP address: + + Squid has this ability built in -- see the 'url_rewrite_access' setting. + Alternatively, do the checks in lua. + + + +Lua Rules +--------- + +Volta has an embedded Lua interpreter that you can use to perform all +kinds of conditional rewrites. Read more about the syntax of the Lua +language here: http://www.lua.org/manual/5.1/ + +### Loading a script + +To use a Lua script, prefix the rewrite target of a volta rule with +'lua:'. The rest of the target is then treated as a path to the script. +(You can find an example in the Examples section of this README.) +You can specify a path to either an ascii file, or Lua bytecode. (If +speed is an absolute premium, I'm seeing around a 25% performance +increase by using Lua bytecode files.) + +You can use different scripts for different rules, or use the same +script across any number of separate rules. + +There is no need to restart squid when modifying Lua rules. Changes are +seen immediately. + + +### Environment + +* Global variable declarations are disabled, so scripts can't accidently stomp on each other. All variables must be declared with the 'local' keyword. +* There is a global table called 'shared' you may use if you want to share data between separate scripts, or remember things in-between rule evaluations. +* The details of the request can be found in a table, appropriately named 'request'. HTTP scheme, host, path, port, method, client_ip, and domain are all available by default from the request table. +* Calling Lua's print() function emits debug information to stderr. Use a debug level of 2 or higher to see it. + + +### Return value + +The return value of the script is sent unmodified to squid, which should +be a URL the request is rewritten to, with an optional redirect code +prefix (301 or 302.) + +Omitting a return value, or returning 'nil' has the same effect as a negative +rule match -- the original request is allowed through without any rewrite. + + +An extremely simple Lua rule script can be found in the 'examples' +directory, distributed with volta. + + diff -r 46e23ce07981 -r 5cc836e06759 accept_loop.c --- a/accept_loop.c Wed Nov 09 16:40:38 2011 -0800 +++ b/accept_loop.c Wed Jul 25 11:47:12 2012 -0700 @@ -1,6 +1,6 @@ /* vim: set noet nosta sw=4 ts=4 ft=c : */ /* -Copyright (c) 2011, Mahlon E. Smith +Copyright (c) 2011-2012, Mahlon E. Smith All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff -r 46e23ce07981 -r 5cc836e06759 db.c --- a/db.c Wed Nov 09 16:40:38 2011 -0800 +++ b/db.c Wed Jul 25 11:47:12 2012 -0700 @@ -1,6 +1,6 @@ /* vim: set noet nosta sw=4 ts=4 ft=c : */ /* -Copyright (c) 2011, Mahlon E. Smith +Copyright (c) 2011-2012, Mahlon E. Smith All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -122,7 +122,9 @@ rule = parse_rule( dbline->val ); if ( rule == NULL || ( rule->negate == 1 && rule->host != NULL ) || - ( rule->negate == 0 && rule->host == NULL )) { + ( rule->negate == 0 && rule->host == NULL ) || + ( rule->lua == 1 && rule->luapath == NULL ) + ) { debug( 0, LOC, "Invalid rule (line %d), stopping: %s", linenum, buf ); error = 1; diff -r 46e23ce07981 -r 5cc836e06759 db.h --- a/db.h Wed Nov 09 16:40:38 2011 -0800 +++ b/db.h Wed Jul 25 11:47:12 2012 -0700 @@ -1,6 +1,6 @@ /* vim: set noet nosta sw=4 ts=4 ft=c : */ /* -Copyright (c) 2011, Mahlon E. Smith +Copyright (c) 2011-2012, Mahlon E. Smith All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff -r 46e23ce07981 -r 5cc836e06759 examples/lua-example.lua --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/examples/lua-example.lua Wed Jul 25 11:47:12 2012 -0700 @@ -0,0 +1,18 @@ + +-- examine the request. +-- this emits to stderr! +-- +for k,v in pairs( request ) do print( string.format("request.%-6s --> %s", k, v) ) end + +-- all variables need to be declared using local scoping! +-- +local redir = 302 + +-- temporary redirect to a different site, every other second. +-- why would you want to do this? you probably wouldn't. just illustrating +-- how easy it is to do custom stuff. +-- +if os.time() % 2 == 0 then + return string.format( "%d:http://example.com%s", redir, request.path ) +end + diff -r 46e23ce07981 -r 5cc836e06759 examples/rules.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/examples/rules.txt Wed Jul 25 11:47:12 2012 -0700 @@ -0,0 +1,27 @@ + +#------------------------------------------------------------------------------ +# This is an example volta rules file. In order for volta to use it, +# it needs to be converted to a database for fast lookup. See the +# README file, distributed with volta, for instructions. +#------------------------------------------------------------------------------ + +# Force all requests to Google to use SSL +# http://www.google.com/search?q=test --> https://www.google.com/search?q=test +google.com * 302:https://www.google.com + +# Transparently alter all uploaded images on imgur to be my face: :) +i.imgur.com \.(gif|png|jpg)$ http://www.martini.nu/images/mahlon.jpg + +# Expand a local, non qualified hostname to a FQDN (useful alongside the +# 'dns_defnames' squid setting to enforce browser proxy behaviors): +local-example * local-example.company.com + +# Cause all blog content except for 2011 posts to permanently redirect to +# an archival page. +martini.nu /blog/2011 - +martini.nu /blog 301:martini.nu/content-archived.html + +# Send all requests to reddit/r/WTF/* through a lua script for +# further processing. +reddit.com /r/wtf lua:examples/lua-example.lua + diff -r 46e23ce07981 -r 5cc836e06759 examples/squid_output.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/examples/squid_output.txt Wed Jul 25 11:47:12 2012 -0700 @@ -0,0 +1,14 @@ +http://www.google.com/search?q=test 10.1.1.30/- - GET myip=10.1.1.1 myport=3128 +https://www.google.com/search?q=test 10.1.1.30/- - GET myip=10.1.1.1 myport=3128 +http://www.imgur.com/ 10.1.1.30/- - GET myip=10.1.1.1 myport=3128 +http://i.imgur.com/whatever.jpg 10.1.1.30/- - GET myip=10.1.1.1 myport=3128 +googleusercontent.com:443 10.1.1.40/- - CONNECT myip=10.1.1.1 myport=3128 +http://local-example/ 10.1.1.30/- - GET myip=10.1.1.1 myport=3128 +http://www.martini.nu/projects/volta/ 10.1.1.30/- - GET myip=10.1.1.1 myport=3128 +http://www.reddit.com/r/WTF/?wooyup=1 10.1.1.30/- - GET myip=10.1.1.1 myport=3128 +http://www.martini.nu/blog/2011/some-post.html 10.1.1.30/- - GET myip=10.1.1.1 myport=3128 +http://www.martini.nu/blog/2009/some-old-post.html 10.1.1.30/- - GET myip=10.1.1.1 myport=3128 +4 http://www.martini.nu/projects/volta/ 10.1.1.30/- - GET myip=10.1.1.1 myport=3128 +5 http://i.imgur.com/whatever.jpg 10.1.1.30/- - GET myip=10.1.1.1 myport=3128 +6 http://www.reddit.com/r/WTF/?wooyup=1 10.1.1.30/- - GET myip=10.1.1.1 myport=3128 +7 http://www.example.com/yup 10.1.1.30/- - GET myip=10.1.1.1 myport=3128 diff -r 46e23ce07981 -r 5cc836e06759 lua.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lua.c Wed Jul 25 11:47:12 2012 -0700 @@ -0,0 +1,173 @@ +/* vim: set noet nosta sw=4 ts=4 ft=c : */ +/* +Copyright (c) 2011-2012, Mahlon E. Smith +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Mahlon E. Smith nor the names of his + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "volta.h" +#include "lua.h" + +/* + * Emit a lua error if a variable is declared without the use of 'local'. + * Since we only start up one lua interpreter, we want to ensure the + * global namespace isn't polluted by random scripts. + * + */ +int +luaV_globalindex( lua_State *lua ) +{ + return luaL_error( lua, "attempt to set global var '%s' (use local!)", lua_tostring(lua, -2) ); +} + + +/* + * Override the regular lua print() function with one that formats the string + * in the same fashion as the rest of volta output -- and goes to stderr instead + * of stdout, which would just confuse squid. + * + */ +int +luaV_print( lua_State *lua ) +{ + lua_Debug info; + int i = 0; + + /* get the file and line number print() was called from. */ + lua_getstack( lua, 1, &info ); + lua_getinfo( lua, "Sl", &info ); + + /* output each argument */ + for( i = 1; i <= lua_gettop( lua ); i++ ) + debug( 2, info.short_src, info.currentline, "%s\n", lua_tostring(lua, i) ); + + return( 1 ); +} + + +/* + * Create the initial lua interpreter, and configure the default environment. + * + */ +lua_State * +luaV_setup( void ) +{ + lua_State *lua = luaL_newstate(); + luaL_openlibs( lua ); /* include lua standard libraries */ + + /* Predeclare the request table. */ + lua_pushstring( lua, "request" ); + lua_createtable( lua, 0, 7 ); + lua_settable( lua, LUA_GLOBALSINDEX ); + + /* Predeclare a table for shared data */ + lua_pushstring( lua, "shared" ); + lua_newtable( lua ); + lua_settable( lua, LUA_GLOBALSINDEX ); + + /* replace the lua print() function with one that calls debug() instead */ + lua_register( lua, "print", luaV_print ); + + /* Restrict additional globals. */ + lua_createtable( lua, 0, 1); + lua_pushcfunction( lua, luaV_globalindex ); + lua_setfield( lua, -2, "__newindex"); + lua_pushboolean( lua, 0 ); + lua_setfield( lua, -2, "__metatable"); + lua_setmetatable( lua, LUA_GLOBALSINDEX ); + + lua_settop( lua, 0 ); /* wipe the stack */ + return( lua ); +} + + +/* + * Convert the request struct into a lua table, and inject it into the interpreter. + * + */ +void +luaV_setup_request( parsed *request ) +{ + lua_getfield( v.lua, LUA_GLOBALSINDEX, "request" ); + lua_pushstring( v.lua, request->scheme ); + lua_setfield( v.lua, 1, "scheme" ); + lua_pushstring( v.lua, request->host ); + lua_setfield( v.lua, 1, "host" ); + lua_pushstring( v.lua, request->path ); + lua_setfield( v.lua, 1, "path" ); + lua_pushstring( v.lua, request->port ); + lua_setfield( v.lua, 1, "port" ); + lua_pushstring( v.lua, request->method ); + lua_setfield( v.lua, 1, "method" ); + lua_pushstring( v.lua, request->client_ip ); + lua_setfield( v.lua, 1, "client_ip" ); + lua_pushstring( v.lua, request->tld ); + lua_setfield( v.lua, 1, "domain" ); + + return; +} + + +/* + * Given a request struct and a path to a lua script (or bytecode), + * execute the script within the global lua interpreter, and return + * a pointer to the string it generated (or NULL). + * + */ +char * +luaV_run( parsed *request, char *path ) +{ + int lua_err = 0; + + /* provide access to the request struct */ + luaV_setup_request( request ); + + /* suck in the lua chunk(s) */ + debug( 4, LOC, "Loading Lua code from '%s'\n", path ); + lua_err = luaL_loadfile( v.lua, path ); + if ( lua_err ) { + debug( 2, LOC, "Unable to run lua rule: %s\n", lua_tostring(v.lua, -1) ); + lua_settop( v.lua, 0 ); + return( NULL ); + } + + /* execute the lua, expecting one value to be returned. */ + lua_err = lua_pcall( v.lua, 0, 1, 0 ); + if ( lua_err ) { + debug( 2, LOC, "Unable to run lua rule: %s\n", lua_tostring(v.lua, -1) ); + lua_settop( v.lua, 0 ); + return( NULL ); + } + + /* get the last element in the stack, which should be the script's return value. */ + char *rewrite = (char *)lua_tostring( v.lua, -1 ); + + debug( 5, LOC, "Lua is currently consuming %dKB of memory\n", lua_gc(v.lua, LUA_GCCOUNT, 0) ); + lua_settop( v.lua, 0 ); /* reset the stack. */ + + return( rewrite ); +} + diff -r 46e23ce07981 -r 5cc836e06759 lua.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lua.h Wed Jul 25 11:47:12 2012 -0700 @@ -0,0 +1,45 @@ +/* vim: set noet nosta sw=4 ts=4 ft=c : */ +/* +Copyright (c) 2011-2012, Mahlon E. Smith +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Mahlon E. Smith nor the names of his + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _LUA_H +#define _LUA_H + +/* + * Function prototypes + * + */ +int luaV_globalindex( lua_State * ); +int luaV_print( lua_State * ); +lua_State *luaV_setup( void ); +void luaV_setup_request( parsed * ); +char *luaV_run( parsed *, char * ); + +#endif + diff -r 46e23ce07981 -r 5cc836e06759 main.c --- a/main.c Wed Nov 09 16:40:38 2011 -0800 +++ b/main.c Wed Jul 25 11:47:12 2012 -0700 @@ -1,6 +1,6 @@ /* vim: set noet nosta sw=4 ts=4 ft=c : */ /* -Copyright (c) 2011, Mahlon E. Smith +Copyright (c) 2011-2012, Mahlon E. Smith All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -30,6 +30,7 @@ #include "volta.h" #include "db.h" +#include "lua.h" struct v_globals v; @@ -53,6 +54,7 @@ v.timer.db_lastcheck = 0; v.timer.start = time( NULL ); v.timer.lines = 0; + v.lua = luaV_setup(); /* get_opt vars */ int opt = 0; @@ -131,6 +133,7 @@ void shutdown_actions( void ) { + lua_close( v.lua ); cdb_free( &v.db ); close( v.db_fd ); report_speed(); diff -r 46e23ce07981 -r 5cc836e06759 parser.rl --- a/parser.rl Wed Nov 09 16:40:38 2011 -0800 +++ b/parser.rl Wed Jul 25 11:47:12 2012 -0700 @@ -33,6 +33,9 @@ #define MARK_S( LBL ) p_parsed->tokens.LBL ## _start = p; #define MARK_E( LBL ) p_parsed->tokens.LBL ## _length = p - ( *pe + p_parsed->tokens.LBL ## _start ); +#define COPY_STR( LBL ) copy_string_token( p_parsed->tokens.LBL ## _start, p_parsed->tokens.LBL ## _length ) +/* #define COPY_IP4( LBL ) copy_ipv4_token( p_request->tokens.LBL ## _start, p_request->tokens.LBL ## _length ) */ + /* * Tokenize an incoming line from squid, returning a parsed and populated * structure to make redirection decisions against. This pointer should @@ -79,11 +82,8 @@ %%{ machine request_parser; - action channel_id_found { - debug( 1, LOC, "Channel ID found in redirector input. Set 'url_rewrite_concurrency' to '0' in squid.\n" ); - fbreak; - } - + action chid_start { MARK_S(chid) } + action chid_finish { MARK_E(chid) } action scheme_start { MARK_S(scheme) } action scheme_finish { MARK_E(scheme) } action host_start { MARK_S(host) } @@ -141,7 +141,7 @@ ipv4 = digit{1,3} '.' digit{1,3} '.' digit{1,3} '.' digit{1,3}; ipv6 = ( xdigit | ':' )+; - channel_id = ( digit+ space ) %channel_id_found; + channel_id = ( digit+ space ) >chid_start %chid_finish; scheme = ( alpha{3,5} '://' ) >scheme_start %scheme_finish @!scheme_error; host = ( hostname | ipv4 ) >host_start %host_finish @!host_error; port = ( ':' digit{1,5} ) >port_start %port_finish; @@ -168,12 +168,16 @@ /* state machine */ %% write exec; - /* If we were given an invalid line, bail early */ + /* + * If we were given an invalid line, bail early after remembering + * the channel ID. + * + */ if ( cs < %%{ write first_final; }%% ) { - free( p_parsed ), p_parsed = NULL; debug( 3, LOC, "Invalid request line (%d), skipped\n", v.timer.lines + 1 ); debug( 4, LOC, "%s", line ); - return( NULL ); + p_parsed->chid = COPY_STR( chid ); + return( p_parsed ); } debug( 6, LOC, "%s", line ); @@ -208,18 +212,21 @@ action redir_start { MARK_S(redir) } action redir_finish { p_parsed->tokens.redir_length = 3; } # strip trailing colon action negate_finish { p_parsed->negate = 1; } + action luapath_start { p_parsed->lua = 1; MARK_S(luapath) } - action scheme_start { MARK_S(scheme) } - action scheme_finish { MARK_E(scheme) } - action host_start { MARK_S(host) } - action host_finish { MARK_E(host) } - action port_start { p_parsed->tokens.port_start = p+1; } # strip leading colon - action port_finish { MARK_E(port) } - action path_start { MARK_S(path) } - action path_finish { MARK_E(path) } + action scheme_start { MARK_S(scheme) } + action scheme_finish { MARK_E(scheme) } + action host_start { MARK_S(host) } + action host_finish { MARK_E(host) } + action port_start { p_parsed->tokens.port_start = p+1; } # strip leading colon + action port_finish { MARK_E(port) } + action path_start { MARK_S(path) } + action path_finish { MARK_E(path) } + action luapath_finish { MARK_E(luapath) } - action match_error { debug( 3, LOC, "Unable to parse the rule path matcher.\n" ); } - action host_error { debug( 3, LOC, "Unable to parse the rule hostname.\n" ); } + action match_error { debug( 3, LOC, "Unable to parse the rule path matcher.\n" ); } + action host_error { debug( 3, LOC, "Unable to parse the rule hostname.\n" ); } + action luapath_error { debug( 3, LOC, "Unable to parse the lua path.\n" ); } host_component = alnum | ( alnum [a-zA-Z0-9\-_]* alnum ); path_segment = '/' ( any - space )*; @@ -230,7 +237,8 @@ ipv6 = ( xdigit | ':' )+; negate = ( '-' ) %negate_finish; - path_re = ( any - space )+ >match_start %match_finish @!match_error; + path_re = ( any - space )+ >match_start %match_finish @!match_error; + luapath = ( any - space )+ >luapath_start %luapath_finish @!luapath_error; redir = ( ('301' | '302') ':' ) >redir_start %redir_finish; scheme = ( alpha{3,5} '://' ) >scheme_start %scheme_finish; @@ -239,8 +247,9 @@ path = path_segment* >path_start %path_finish; rewrite = ( redir? scheme? host port? path? ); + luarule = ( 'lua:' luapath ); - main := path_re sep ( rewrite | negate ); + main := path_re sep ( rewrite | negate | luarule ); }%% /* state machine */ @@ -302,12 +311,10 @@ hostname = host_component ( '.' host_component )* '.'?; ipv4 = digit{1,3} '.' digit{1,3} '.' digit{1,3} '.' digit{1,3}; token = ( any - space )+; - redir = ( digit{3} ':' ); host = ( hostname | ipv4 ); key = ( host | '*' ) >key_start %key_finish @!key_error; val = ( token sep token ) >val_start %val_finish @!val_error; - # regex rewrite or negate main:= key sep val '\n'; }%% @@ -354,8 +361,11 @@ return( NULL ); } + p_parsed->valid = 0; p_parsed->type = 0; p_parsed->negate = 0; + p_parsed->lua = 0; + p_parsed->chid = NULL; p_parsed->path_re = NULL; p_parsed->redir = NULL; p_parsed->scheme = NULL; @@ -366,7 +376,9 @@ p_parsed->user = NULL; p_parsed->method = NULL; p_parsed->client_ip = NULL; + p_parsed->luapath = NULL; + p_parsed->tokens.chid_start = NULL; p_parsed->tokens.path_re_start = NULL; p_parsed->tokens.redir_start = NULL; p_parsed->tokens.scheme_start = NULL; @@ -375,6 +387,8 @@ p_parsed->tokens.path_start = NULL; p_parsed->tokens.meth_start = NULL; p_parsed->tokens.c_ip_start = NULL; + p_parsed->tokens.luapath_start = NULL; + p_parsed->tokens.chid_length = 0; p_parsed->tokens.path_re_length = 0; p_parsed->tokens.redir_length = 0; p_parsed->tokens.scheme_length = 0; @@ -383,6 +397,7 @@ p_parsed->tokens.path_length = 0; p_parsed->tokens.meth_length = 0; p_parsed->tokens.c_ip_length = 0; + p_parsed->tokens.luapath_length = 0; return p_parsed; } @@ -403,6 +418,7 @@ free( p_parsed->port ); if ( p_parsed->type == REQUEST ) { + free( p_parsed->chid ); free( p_parsed->tld ); free( p_parsed->method ); free( p_parsed->client_ip ); @@ -411,6 +427,7 @@ if ( p_parsed->type == RULE ) { free( p_parsed->path_re ); free( p_parsed->redir ); + free( p_parsed->luapath ); } free( p_parsed ), p_parsed = NULL; @@ -419,9 +436,6 @@ } -#define COPY_STR( LBL ) copy_string_token( p_parsed->tokens.LBL ## _start, p_parsed->tokens.LBL ## _length ) -/* #define COPY_IP4( LBL ) copy_ipv4_token( p_request->tokens.LBL ## _start, p_request->tokens.LBL ## _length ) */ - /* * Take the previously parsed token locations and copy them into the request struct. * @@ -435,6 +449,8 @@ p_parsed->port = COPY_STR( port ); if ( p_parsed->type == REQUEST ) { + p_parsed->valid = 1; + p_parsed->chid = COPY_STR( chid ); p_parsed->method = COPY_STR( meth ); p_parsed->client_ip = COPY_STR( c_ip ); /* p_request->client_ip = COPY_IP4( c_ip ); */ @@ -446,6 +462,7 @@ if ( p_parsed->type == RULE ) { p_parsed->path_re = COPY_STR( path_re ); p_parsed->redir = COPY_STR( redir ); + p_parsed->luapath = COPY_STR( luapath ); } return; diff -r 46e23ce07981 -r 5cc836e06759 process.c --- a/process.c Wed Nov 09 16:40:38 2011 -0800 +++ b/process.c Wed Jul 25 11:47:12 2012 -0700 @@ -1,6 +1,6 @@ /* vim: set noet nosta sw=4 ts=4 ft=c : */ /* -Copyright (c) 2011, Mahlon E. Smith +Copyright (c) 2011-2012, Mahlon E. Smith All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -30,6 +30,7 @@ #include "volta.h" #include "db.h" +#include "lua.h" /* @@ -47,7 +48,7 @@ /* If request parsing failed, return a blank line to squid to allow the request to pass through unmolested. */ - if ( p_request == NULL ) + if ( p_request == NULL || p_request->valid == 0 ) return pass( p_request, rule ); /* @@ -82,16 +83,35 @@ /* avoid trivial redirect loops */ if ( ( rule->redir ) && - ( rule->scheme == NULL || ( strcmp(p_request->scheme, rule->scheme) == 0) ) && - ( rule->path == NULL || ( strcmp(p_request->path, rule->path) == 0) ) && - ( strcmp( p_request->host, rule->host) == 0 ) + ( rule->scheme == NULL || ( p_request->scheme && ( strcmp(p_request->scheme, rule->scheme) == 0) )) && + ( rule->path == NULL || ( strcmp(p_request->path, rule->path) == 0) ) && + ( strcmp( p_request->host, rule->host ) == 0 ) ) { debug( 2, LOC, "Potential rewrite loop, skipping rewrite.\n" ); return pass( p_request, rule ); } - /* otherwise, perform the rewrite. */ - rewrite( p_request, rule ); + /* At this point we know we'll be doing a rewrite. */ + + /* Pass the request to lua for processing if we saw a 'lua:' tag. */ + if ( rule->lua == 1 ) { + char *rewrite_string = luaV_run( p_request, rule->luapath ); + + /* the script returned nil, or otherwise had an error. */ + if ( rewrite_string == NULL ) return pass( p_request, rule ); + + /* send squid the lua return value. */ + if ( v.debugmode < 5 ) { + if ( p_request->chid ) printf( "%s", p_request->chid ); + puts( rewrite_string ); + fflush( stdout ); + } + } + + /* otherwise, perform the rewrite internally. */ + else { + rewrite( p_request, rule ); + } finish_parsed( rule ); finish_parsed( p_request ); @@ -106,14 +126,18 @@ void pass( parsed *request, parsed *rule ) { - finish_parsed( rule ); - finish_parsed( request ); + if ( v.debugmode >= 5 ) { + finish_parsed( rule ); + finish_parsed( request ); + return; + } - if ( v.debugmode >= 5 ) return; - + if ( request && request->chid ) printf( "%s", request->chid ); printf( "\n" ); fflush( stdout ); + finish_parsed( rule ); + finish_parsed( request ); return; } @@ -127,6 +151,7 @@ { if ( rule == NULL || v.debugmode >= 5 ) return; + if ( request->chid ) printf( "%s", request->chid ); if ( rule->redir ) printf( "%s:", rule->redir ); printf( "%s%s", (rule->scheme ? rule->scheme : request->scheme), rule->host ); if ( rule->port ) printf( ":%s", rule->port ); diff -r 46e23ce07981 -r 5cc836e06759 util.c --- a/util.c Wed Nov 09 16:40:38 2011 -0800 +++ b/util.c Wed Jul 25 11:47:12 2012 -0700 @@ -1,6 +1,6 @@ /* vim: set noet nosta sw=4 ts=4 ft=c : */ /* -Copyright (c) 2011, Mahlon E. Smith +Copyright (c) 2011-2012, Mahlon E. Smith All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff -r 46e23ce07981 -r 5cc836e06759 volta.h --- a/volta.h Wed Nov 09 16:40:38 2011 -0800 +++ b/volta.h Wed Jul 25 11:47:12 2012 -0700 @@ -1,6 +1,6 @@ /* vim: set noet nosta sw=4 ts=4 ft=c : */ /* -Copyright (c) 2011, Mahlon E. Smith +Copyright (c) 2011-2012, Mahlon E. Smith All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -57,8 +57,6 @@ #include */ -#include - #ifdef DEBUG #include #endif @@ -75,6 +73,11 @@ /* Aid debugging */ #define LOC __FILE__, __LINE__ +#include +#include +#include +#include + /* * a global struct for easy access to common vars @@ -85,6 +88,7 @@ char dbname[128]; /* path to database file */ short int db_fd; /* opened db file descriptor */ struct cdb db; /* the cdb struct */ + lua_State *lua; /* the lua interpreter */ struct { time_t start; /* start time */ @@ -116,8 +120,11 @@ * */ typedef struct parsed { + unsigned short int valid; unsigned short int type; unsigned short int negate; + unsigned short int lua; + char *chid; char *path_re; char *redir; char *scheme; @@ -129,8 +136,10 @@ char *client_ip; char *user; char *method; + char *luapath; struct { + char *chid_start; char *path_re_start; char *redir_start; char *scheme_start; @@ -139,6 +148,8 @@ char *path_start; char *meth_start; char *c_ip_start; + char *luapath_start; + unsigned short int chid_length; unsigned short int path_re_length; unsigned short int redir_length; unsigned short int scheme_length; @@ -147,6 +158,7 @@ unsigned int path_length; unsigned short int meth_length; unsigned short int c_ip_length; + unsigned short int luapath_length; } tokens; } parsed;