- 1st pass at documentation with the README.
- Attempt to detect trivial redirect loops.
- Loosen the request parser enough to just accept URLs, so you can
easily test rewrite behavior by just running volta and pasting URLs.
- Alter the whitelist "negative" rule syntax to more closely match
normal rules. Rename references to negative rules from "whitelist"
rules, to avoid confusion.
--- a/Makefile Mon Nov 07 10:43:09 2011 -0800
+++ b/Makefile Wed Nov 09 15:54:37 2011 -0800
@@ -17,7 +17,7 @@
debug: CFLAGS += $(CFLAGS_DEBUG)
debug: LIBS += -lprofiler
else
-volta: CFLAGS += -L/opt/local/lib -I/opt/local/include
+volta: CFLAGS += -L/opt/local/lib -I/opt/local/include -L/usr/local/lib -I/usr/local/include
debug: CFLAGS += $(CFLAGS_DEBUG)\
$(shell pkg-config --cflags-only-I --libs-only-L $(DEPS_DEBUG))
debug: LIBS += $(shell pkg-config --libs-only-l $(DEPS_DEBUG))
--- a/README Mon Nov 07 10:43:09 2011 -0800
+++ b/README Wed Nov 09 15:54:37 2011 -0800
@@ -3,14 +3,173 @@
=====
What is volta?
- - high performance / low resource redirector
+--------------
+
+Volta is a high performance, low resource URI rewriter for use with the
+Squid caching proxy server (http://www.squid-cache.org/.) With it, you
+can dynamically alter URI requests that pass through Squid based on
+various criteria.
+
+It uses a state machine to parse URIs and rules, and a constant database
+to store and access those rules.
+
+
+Why is it called "volta"?
+-------------------------
-Why "volta"?
- - latin term, turn
+It's a type of old Italian music written in triple-time. Quick!
+
+
+How fast is it?
+---------------
+
+On a 2Ghz Xeon 5130, it can process a million squid requests against
+10000 rules in less than 8 seconds, using about 800k of ram. On an
+1.8Ghz Intel E4300, it can do it in 3 seconds.
+
+Your mileage may vary, but for most all intents and purposes the answer
+is "definitely fast enough."
+
Configuring squid
+-----------------
+
+You must enable url rewriting from within the squid.conf file.
+
+ url_rewrite_program /usr/local/bin/volta
+
+... and that's it. You may need some additional customization, like where
+the volta database is stored on disk:
+
+ url_rewrite_program /usr/local/bin/volta -f /var/db/squid/volta.db
+
+Busy servers:
+
+Make sure rewrite_concurrency is disabled, volta is single threaded.
+Instead, just add more volta children. They are lightweight, so load em
+up. A proxy at my $DAYJOB is in use by around 450 people, and we get by
+nicely with 10 volta children.
+
+ url_rewrite_concurrency 0
+ url_rewrite_children 10
+
Using volta
+-----------
-How to
+See the INSTALL file for instructions on how to compile volta.
+
+Volta reads its rewrite rules from a local database. You can create the
+rules in a text editor, then convert it to the database like so:
+
+ % volta -c rules.txt
+
+You'll be left with a "volta.db" file in the current directory. Put it
+wherever you please, and use the -f flag to point to it.
+
+
+Rule file syntax
+----------------
+
+Volta's rule syntax is designed to be easy to parse by humans and
+machines. Blank lines are skipped, as is any line that starts with the
+'#' character, so you can keep the ascii version of your rules well
+documented and in version control.
+
+When compiling the ruleset into the database format, volta detects
+malformed rules and stops if there are any problems, leaving your
+original database intact. You can change the ruleset at any time while
+volta is running, and the new rules will take affect within about 10
+seconds. No need to restart squid!
+
+There are two types of rules -- positive matches, and negative matches.
+Positive matches cause the rewrite, negative matches allow the original
+request to pass. Rule order is consistent, top-down, first match wins.
+Fields are separated by any amount of whitespace (spaces or tabs.)
+
+
+### Positive matches:
+
+ First field: the hostname to match.
+
+ You can use an exact hostname (www.example.com), or the top level
+ domain (tld) if you want to match everything under a specific host
+ (example.com.) You can also use a single '*' to match every request,
+ though this essentially bypasses a lot of what makes volta quick, it
+ is included for completeness. You may have an unlimited amount of
+ rules per hostname. Hostnames are compared without case sensitivity.
+
+
+ Second field: the path to match.
+
+ This can be an exact match ('/path/to/something.html'), a regular
+ expression ('\.(jpg|gif|png)$'), or a single '*' to match for any
+ path. Regular expressions are matches without case sensitivity. There
+ is currently no support for capturing, though this may be added in
+ a future release.
+
+
+ Third field: The redirect code and url to rewrite to.
+ Any pieces of a url that are omitted are automatically replaced
+ with the original request's element -- the exception is a hostname,
+ which is required. If you omit a redirect code, the URL rewrite is
+ transparent to the client. You can attach a 301: or 302: prefix to
+ cause a permanent or temporary code to be respectively sent, instead.
+
+
+### Negative matches:
+
+ First field: the hostname to match.
+
+ See above -- all the same rules apply.
+
+
+ Second field: the path to match.
+
+ See above -- all the same rules apply.
+
+
+ Third field: the 'negative' marker.
+
+ This is simply the '-' character, that signals to volta that this is
+ a negative matching rule.
+
+
+You can easily test your rules by running volta on the command line, and
+pasting URLs into it. Boost the debug level (-d4) if you're having any issues.
+
+
+Examples
+--------
+
+Rewrite all requests to Google to the SSL version:
+
+ google.com * 302:https://www.google.com
+
+ This will redirect the request "http://www.google.com/search?q=test" to
+ "https://www.google.com/search?q=test".
+
+
+Transparently alter all uploaded images on imgur to be my face: :)
+
+ i.imgur.com \.(gif|png|jpg)$ http://www.martini.nu/images/mahlon.jpg
+
+
+Expand a local, non qualified hostname to a FQDN (useful alongside the
+'dns_defnames' squid setting to enforce browser proxy behaviors):
+
+ local-example * local-example.company.com
+
+
+Cause all blog content except for 2011 posts to permanently redirect to
+an archival page:
+
+ martini.nu /blog/2011 -
+ martini.nu /blog 301:martini.nu/content-archived.html
+
+
+Turn off rewriting for specific network segment or IP address:
+
+ Squid has this ability built in -- see the 'url_rewrite_access' setting.
+
--- a/db.c Mon Nov 07 10:43:09 2011 -0800
+++ b/db.c Wed Nov 09 15:54:37 2011 -0800
@@ -80,13 +80,11 @@
unsigned short int
db_create_new( char *txt )
{
+ FILE *txt_f = NULL;
+ parsed *rule = NULL;
+ char buf[ LINE_BUFSIZE*10 ], tmpfile[25];
+ int tmp_fd, linenum = 0, parsed = 0, error = 0;
struct cdb_make cdbm;
-
- char buf[ LINE_BUFSIZE*10 ];
- char tmpfile[25];
- int tmp_fd;
- FILE *txt_f = NULL;
- int linenum = 0, parsed = 0;
struct db_input *dbline;
/* open temporary file */
@@ -112,19 +110,33 @@
/* skip blank lines and comments */
if ( strlen(buf) == 1 || buf[0] == '#' ) continue;
- /* validate and add! */
+ /* validate line */
dbline = parse_dbinput( buf );
if ( dbline == NULL ) {
- debug( 0, LOC, "Invalid rule (line %d), skipping: %s", linenum, buf );
- continue;
+ debug( 0, LOC, "Invalid rule (line %d), stopping: %s", linenum, buf );
+ error = 1;
+ break;
}
+ /* validate rule */
+ rule = parse_rule( dbline->val );
+ if ( rule == NULL ||
+ ( rule->negate == 1 && rule->host != NULL ) ||
+ ( rule->negate == 0 && rule->host == NULL )) {
+
+ debug( 0, LOC, "Invalid rule (line %d), stopping: %s", linenum, buf );
+ error = 1;
+ break;
+ }
+
+ /* looking good, add rule */
cdb_make_add( &cdbm, dbline->key, dbline->klen, dbline->val, dbline->vlen );
parsed++;
free( dbline->key );
free( dbline->val );
- free( dbline );
+ free( dbline ), dbline = NULL;
+ finish_parsed( rule ), rule = NULL;
}
/* write indexes */
@@ -132,13 +144,22 @@
cdb_make_finish( &cdbm );
close( tmp_fd );
- /* move cdb into place */
- if ( (rename( tmpfile, v.dbname )) == -1 ) {
- debug( 0, LOC, "Unable to move temp cdb into place: %s", strerror(errno) );
- return( 1 );
+ if ( error == 1 ) {
+ /* delete the tmp db on errors */
+ if ( (unlink( tmpfile )) != 0 ) {
+ debug( 0, LOC, "Unable to remove temp cdb: %s", strerror(errno) );
+ return( 1 );
+ }
+ }
+ else {
+ /* move cdb into place */
+ if ( (rename( tmpfile, v.dbname )) == -1 ) {
+ debug( 0, LOC, "Unable to move temp cdb into place: %s", strerror(errno) );
+ return( 1 );
+ }
+ debug( 0, LOC, "Added %d rules to %s.\n", parsed, v.dbname );
}
- debug( 0, LOC, "Added %d rules to %s.\n", parsed, v.dbname );
return( 0 );
}
@@ -169,7 +190,7 @@
vlen = cdb_datalen( &v.db );
/* pull the value from the db */
- if ( (val = calloc( vlen, sizeof(char) )) == NULL ) {
+ if ( (val = calloc( vlen+1, sizeof(char) )) == NULL ) {
debug( 5, LOC, "Unable to allocate memory for DB value storage: %s\n",
strerror(errno) );
return( NULL );
@@ -179,6 +200,7 @@
/* check it against the request */
debug( 4, LOC, "DB match for key '%s': %s\n", key, val );
rule = parse_rule( val );
+
free( val );
if ( rule != NULL ) {
if ( check_rule( rule, p_request ) == 0 ) {
--- a/parser.rl Mon Nov 07 10:43:09 2011 -0800
+++ b/parser.rl Wed Nov 09 15:54:37 2011 -0800
@@ -66,7 +66,7 @@
parsed *
parse_request( char *line )
{
- /* machine required vars */
+ /* machine required vars */
unsigned short int cs = 1;
char *p = line;
char *pe = p + strlen(p);
@@ -149,16 +149,20 @@
client_ip = ipv4 >c_ip_start %c_ip_finish @!c_ip_error;
method = upper+ >meth_start %meth_finish @!meth_error;
- SquidLine = (
- start: channel_id? -> Url,
- Url: scheme? host port? path? -> Client,
+ URL = (
+ start: channel_id? -> Url,
+ Url: scheme? host port? path? -> final
+ );
+
+ Squidvars = (
+ start:
Client: space client_ip '/' ( hostname | '-' ) -> User,
User: space pchar+ -> Method,
Method: space method -> KVPairs,
KVPairs: ( space any+ )? -> final
);
- main := SquidLine '\n';
+ main := URL Squidvars? '\n';
}%%
/* state machine */
@@ -186,7 +190,7 @@
parsed *
parse_rule( char *rewrite )
{
- /* machine required vars */
+ /* machine required vars */
unsigned short int cs = 1;
char *p = rewrite;
char *pe = p + strlen(p);
@@ -203,7 +207,7 @@
action match_finish { MARK_E(path_re) }
action redir_start { MARK_S(redir) }
action redir_finish { p_parsed->tokens.redir_length = 3; } # strip trailing colon
- action wl_finish { p_parsed->wl = 1; }
+ action negate_finish { p_parsed->negate = 1; }
action scheme_start { MARK_S(scheme) }
action scheme_finish { MARK_E(scheme) }
@@ -225,18 +229,18 @@
ipv4 = digit{1,3} '.' digit{1,3} '.' digit{1,3} '.' digit{1,3};
ipv6 = ( xdigit | ':' )+;
- whitelist = ( '-' sep ) %wl_finish;
- path_re = ( any - space )+ >match_start %match_finish @!match_error;
+ negate = ( '-' ) %negate_finish;
+ path_re = ( any - space )+ >match_start %match_finish @!match_error;
- redir = ( digit{3} ':' ) >redir_start %redir_finish;
- scheme = ( alpha{3,5} '://' ) >scheme_start %scheme_finish;
- host = ( hostname | ipv4 ) >host_start %host_finish @!host_error;
- port = ( ':' digit{1,5} ) >port_start %port_finish;
- path = path_segment* >path_start %path_finish;
+ redir = ( ('301' | '302') ':' ) >redir_start %redir_finish;
+ scheme = ( alpha{3,5} '://' ) >scheme_start %scheme_finish;
+ host = ( hostname | ipv4 ) >host_start %host_finish @!host_error;
+ port = ( ':' digit{1,5} ) >port_start %port_finish;
+ path = path_segment* >path_start %path_finish;
- rewrite = ( sep redir? scheme? host port? path? );
+ rewrite = ( redir? scheme? host port? path? );
- main := whitelist? path_re rewrite?;
+ main := path_re sep ( rewrite | negate );
}%%
/* state machine */
@@ -264,7 +268,7 @@
struct db_input *
parse_dbinput( char *line )
{
- /* machine required vars */
+ /* machine required vars */
unsigned short int cs = 1;
char *p = line;
char *pe = p + strlen(p);
@@ -301,9 +305,9 @@
redir = ( digit{3} ':' );
host = ( hostname | ipv4 );
- key = ( host | '*' ) >key_start %key_finish @!key_error;
- val = ( (token sep)? token (sep token)? ) >val_start %val_finish @!val_error;
- # wl regex rewrite
+ key = ( host | '*' ) >key_start %key_finish @!key_error;
+ val = ( token sep token ) >val_start %val_finish @!val_error;
+ # regex rewrite or negate
main:= key sep val '\n';
}%%
@@ -351,7 +355,7 @@
}
p_parsed->type = 0;
- p_parsed->wl = 0;
+ p_parsed->negate = 0;
p_parsed->path_re = NULL;
p_parsed->redir = NULL;
p_parsed->scheme = NULL;
--- a/process.c Mon Nov 07 10:43:09 2011 -0800
+++ b/process.c Wed Nov 09 15:54:37 2011 -0800
@@ -47,11 +47,8 @@
/* If request parsing failed, return a blank line to squid
to allow the request to pass through unmolested. */
- if ( p_request == NULL ) {
- out( "\n" );
- finish_parsed( p_request );
- return;
- }
+ if ( p_request == NULL )
+ return pass( p_request, rule );
/*
* Main rewrite logic.
@@ -78,14 +75,23 @@
if ( rule == NULL ) rule = find_rule( p_request->tld, p_request );
if ( rule == NULL ) rule = find_rule( "*", p_request );
- /* no matching rule still or whitelist rule? no need to rewrite anything. */
- if ( rule == NULL || rule->wl ) {
- out( "\n" );
+ /* no matching rule still or negated rule? no need to rewrite anything. */
+ if ( rule == NULL || rule->negate )
+ return pass( p_request, rule );
+
+ /* avoid trivial redirect loops */
+ if (
+ ( rule->redir ) &&
+ ( rule->scheme == NULL || ( strcmp(p_request->scheme, rule->scheme) == 0) ) &&
+ ( rule->path == NULL || ( strcmp(p_request->path, rule->path) == 0) ) &&
+ ( strcmp( p_request->host, rule->host) == 0 )
+ ) {
+ debug( 2, LOC, "Potential rewrite loop, skipping rewrite.\n" );
+ return pass( p_request, rule );
}
+
/* otherwise, perform the rewrite. */
- else {
- rewrite( p_request, rule );
- }
+ rewrite( p_request, rule );
finish_parsed( rule );
finish_parsed( p_request );
@@ -94,6 +100,25 @@
/*
+ * Allow the request to pass through without being rewritten.
+ *
+ */
+void
+pass( parsed *request, parsed *rule )
+{
+ finish_parsed( rule );
+ finish_parsed( request );
+
+ if ( v.debugmode >= 5 ) return;
+
+ printf( "\n" );
+ fflush( stdout );
+
+ return;
+}
+
+
+/*
* Output a rewritten URL for squid.
*
*/
@@ -109,6 +134,7 @@
printf("\n");
fflush( stdout );
+
return;
}
--- a/util.c Mon Nov 07 10:43:09 2011 -0800
+++ b/util.c Wed Nov 09 15:54:37 2011 -0800
@@ -82,19 +82,6 @@
/*
- * Output to stdout for squid, unless the debug level is at or above 5.
- */
-void
-out( const char *str )
-{
- if ( v.debugmode >= 5 ) return;
- fprintf( stdout, "%s", str );
- fflush( stdout );
- return;
-}
-
-
-/*
* Given a string, reverse it in place.
*/
void
@@ -177,47 +164,6 @@
}
-/*
- * Read an entire file into memory, returning a pointer to the contents.
- * Returns NULL on error.
- *
- */
-char *
-slurp_file( char *file )
-{
- FILE *fh = NULL;
- char *contents = NULL;
- struct stat sb;
-
- if ( stat( file, &sb ) != 0 ) {
- debug( 1, LOC, "Unable to stat() file '%s': %s\n",
- file, strerror(errno) );
- return( NULL );
- }
-
- if ( (contents = malloc( sb.st_size + 1 )) == NULL ) {
- debug( 5, LOC, "Unable to allocate memory for file '%s': %s\n",
- file, strerror(errno) );
- return( NULL );
- }
-
- if ( (fh = fopen( file, "r" )) == NULL ) {
- debug( 1, LOC, "Could not open file for reading '%s': %s\n",
- file, strerror(errno) );
- return( NULL );
- }
-
- if ( fread( contents, sizeof(char), sb.st_size, fh ) != (unsigned int)sb.st_size ) {
- debug( 5, LOC, "Short read for file '%s'?: %s\n", file );
- fclose( fh );
- return( NULL );
- }
-
- fclose( fh );
- return( contents );
-}
-
-
/*
* Allocate memory and copy +length+ bytes (plus 1 for null) from the given
* +string+ into a new string, returning a pointer to it.
--- a/volta.h Mon Nov 07 10:43:09 2011 -0800
+++ b/volta.h Wed Nov 09 15:54:37 2011 -0800
@@ -117,7 +117,7 @@
*/
typedef struct parsed {
unsigned short int type;
- unsigned short int wl;
+ unsigned short int negate;
char *path_re;
char *redir;
char *scheme;
@@ -162,11 +162,9 @@
void usage( char * );
void debug( int, char *, int, const char *, ... );
-void out( const char * );
void reverse_str( char * );
void lowercase_str( char *, unsigned short int );
void report_speed( void );
-char *slurp_file( char * );
char *extend_line( char *, const char * );
char *copy_string_token( char *, unsigned short int );
/* struct in_addr *copy_ipv4_token( char *, unsigned short int ); */
@@ -181,6 +179,7 @@
void finish_parsed( parsed * );
void reset_results( parsed **, unsigned int );
unsigned short int check_rule( parsed *, parsed * );
+void pass( parsed *, parsed * );
void rewrite( parsed *, parsed * );
#endif