1st iteration of volta actually doing something. Process the request,
find the best matching rule, and rewrite the request.
Without the DB queries, volta was parsing over 750k requests a second.
Currently, it's down to 129.5 with 1161 rules in place. Yikes. I may
need to re-evaluate some design choices here.
--- a/.hgignore Sun Oct 23 22:59:59 2011 -0700
+++ b/.hgignore Mon Oct 31 17:17:07 2011 -0700
@@ -1,6 +1,6 @@
^volta$
^volta.db$
-^parser_graph.*
+^.*_graph.*
^tags$
.*debug
.*.o
--- a/Makefile Sun Oct 23 22:59:59 2011 -0700
+++ b/Makefile Mon Oct 31 17:17:07 2011 -0700
@@ -33,7 +33,7 @@
ifeq (,$(findstring debug,$(MAKECMDGOALS)))
RAGEL_FLAGS = -LCe -G2
else
- RAGEL_FLAGS = -Ce -G2
+ RAGEL_FLAGS = -Ces -G2
endif
# Ensure the parser is included in the objs list
@@ -52,10 +52,11 @@
strip $@
$(OBJS): volta.h
+db.o: db.h
# don't actually depend on parser.rl, so distributions don't require ragel
parser.c:
- ragel $(RAGEL_FLAGS) -s parser.rl -o $@
+ ragel $(RAGEL_FLAGS) parser.rl -o $@
debug: $(OBJS)
$(CC) $(CFLAGS) -o volta $(OBJS) $(LIBS)
@@ -65,11 +66,14 @@
### U T I L
########################################################################
-parsegraph: parser_graph.xml parser_graph.pdf parser_graph.dot
-parser_graph.xml parser_graph.pdf parser_graph.dot: parser.rl
- ragel -Vp parser.rl > parser_graph.dot
- ragel $(RAGEL_FLAGS) -x parser.rl -o parser_graph.xml
- dot -Tpdf parser_graph.dot > parser_graph.pdf
+parsegraph: squidline_graph.xml squidline_graph.pdf squidline_graph.dot tld_graph.xml tld_graph.pdf tld_graph.dot
+squidline_graph.xml squidline_graph.pdf squidline_graph.dot tld_graph.xml tld_graph.pdf tld_graph.dot: parser.rl
+ ragel -Vp -S squidline_parser parser.rl > squidline_graph.dot
+ ragel -Vp -S tld_parser parser.rl > tld_graph.dot
+ ragel $(RAGEL_FLAGS) -S squidline_parser -x parser.rl -o squidline_graph.xml
+ ragel $(RAGEL_FLAGS) -S tld_parser -x parser.rl -o tld_graph.xml
+ dot -Tpdf squidline_graph.dot > squidline_graph.pdf
+ dot -Tpdf tld_graph.dot > tld_graph.pdf
# export CPUPROFILE="cpu.prof" before running volta for cpu profiling
# export CPUPROFILE_FREQUENCY=100 (default)
@@ -84,12 +88,12 @@
rm -f parser.c volta.db ChangeLog tags
clean:
- -rm -f volta parser_graph.* *.o *.prof*
+ -rm -f volta *_graph.* *.o *.prof*
# requires BSD tar
release: VERSION = $(shell hg id -t | awk '{ print $$1 }')
-release: cleanall parser.c
+release: clobber parser.c
hg log --style changelog > ChangeLog
- tar -C .. --exclude misc --exclude .\* -s '/^volta/volta-$(VERSION)/' -czvf /tmp/volta-$(VERSION).tgz volta
+ tar -C .. --exclude misc --exclude .\* --exclude \*.rl -s '/^volta/volta-$(VERSION)/' -czvf /tmp/volta-$(VERSION).tgz volta
mv /tmp/volta-$(VERSION).tgz .
--- a/README Sun Oct 23 22:59:59 2011 -0700
+++ b/README Mon Oct 31 17:17:07 2011 -0700
@@ -2,7 +2,15 @@
Volta
=====
+What is volta?
+ - high performance redirector
+
Why "volta"?
+ - latin term, turn
+
Configuring squid
+
Using volta
+How to
+
--- a/accept_loop.c Sun Oct 23 22:59:59 2011 -0700
+++ b/accept_loop.c Mon Oct 31 17:17:07 2011 -0700
@@ -93,7 +93,6 @@
/* stdin closed */
debug( 1, LOC, "End of stream\n" );
- report_speed();
return( 0 );
}
--- a/db.c Sun Oct 23 22:59:59 2011 -0700
+++ b/db.c Mon Oct 31 17:17:07 2011 -0700
@@ -71,6 +71,9 @@
}
}
+ /* initialize prepared statements */
+ if ( prepare_statements() != 0 ) return SQLITE_ERROR;
+
return( SQLITE_OK );
}
@@ -144,3 +147,143 @@
return( version );
}
+
+/*
+ * Initialize the DB statements, returning 0 on success.
+ *
+ */
+unsigned short int
+prepare_statements( void )
+{
+ unsigned short int rv = 0;
+
+ rv = rv + sqlite3_prepare_v2( v.db, DBSQL_GET_REWRITE_RULE, -1, &v.db_stmt.get_rewrite_rule, NULL );
+ if ( rv != 0 )
+ debug( 2, LOC, "Error preparing DB statement \"%s\": %s\n",
+ DBSQL_GET_REWRITE_RULE, sqlite3_errmsg(v.db) );
+
+ rv = rv + sqlite3_prepare_v2( v.db, DBSQL_MATCH_REQUEST, -1, &v.db_stmt.match_request, NULL );
+ if ( rv != 0 )
+ debug( 2, LOC, "Error preparing DB statement \"%s\": %s\n",
+ DBSQL_MATCH_REQUEST, sqlite3_errmsg(v.db) );
+
+ return( rv );
+}
+
+
+/*
+ * Initialize and return a pointer to a new rewrite object.
+ *
+ */
+rewrite *
+init_rewrite( void )
+{
+ rewrite *p_rewrite = NULL;
+ if ( (p_rewrite = malloc( sizeof(rewrite) )) == NULL ) {
+ debug( 5, LOC, "Unable to allocate memory for rewrite struct: %s\n", strerror(errno) );
+ return( NULL );
+ }
+
+ p_rewrite->scheme = NULL;
+ p_rewrite->host = NULL;
+ p_rewrite->path = NULL;
+ p_rewrite->port = 0;
+ p_rewrite->redir = 0;
+
+ return( p_rewrite );
+}
+
+
+#define COPY_REWRITE_ROW( INDEX ) copy_string_token( \
+ (char *)sqlite3_column_text( v.db_stmt.get_rewrite_rule, INDEX ),\
+ sqlite3_column_bytes( v.db_stmt.get_rewrite_rule, INDEX ))
+/*
+ * Given a request struct pointer, try and find the best matching
+ * rewrite rule, returning a pointer to a rewrite struct.
+ *
+ */
+rewrite *
+prepare_rewrite( request *p_request )
+{
+ if ( p_request == NULL ) return( NULL );
+
+ unsigned short int rewrite_id = 0;
+ rewrite *p_rewrite = init_rewrite();
+
+ sqlite3_bind_text( v.db_stmt.match_request, 3, p_request->tld, -1, SQLITE_STATIC );
+ sqlite3_bind_text( v.db_stmt.match_request, 1, p_request->scheme, -1, SQLITE_STATIC );
+ sqlite3_bind_text( v.db_stmt.match_request, 2, p_request->host, -1, SQLITE_STATIC );
+ sqlite3_bind_text( v.db_stmt.match_request, 3, p_request->tld, -1, SQLITE_STATIC );
+ sqlite3_bind_text( v.db_stmt.match_request, 4, p_request->path, -1, SQLITE_STATIC );
+ sqlite3_bind_int( v.db_stmt.match_request, 5, p_request->port );
+ /*
+ sqlite3_bind_text( v.db_stmt.match_request, 6, NULL, -1, SQLITE_STATIC );
+ sqlite3_bind_text( v.db_stmt.match_request, 6, p_request->client_ip, -1, SQLITE_STATIC );
+ */
+ sqlite3_bind_text( v.db_stmt.match_request, 7, p_request->user, -1, SQLITE_STATIC );
+ sqlite3_bind_text( v.db_stmt.match_request, 8, p_request->method, -1, SQLITE_STATIC );
+
+ switch ( sqlite3_step( v.db_stmt.match_request )) {
+ case SQLITE_ROW:
+ rewrite_id = sqlite3_column_int( v.db_stmt.match_request, 0 );
+ break;
+
+ case SQLITE_DONE:
+ break;
+
+ default:
+ return( NULL );
+ }
+
+ /* FIXME: CHECK for rewrite_rule being NULL on successful match, emit warning, continue */
+
+ /* return early if we didn't get a matching request */
+ if ( rewrite_id == 0 ) return( NULL );
+
+ /* pull the rewrite data, populate the struct. only one
+ * row should ever be returned for this. */
+ sqlite3_bind_int( v.db_stmt.get_rewrite_rule, 1, rewrite_id );
+ switch ( sqlite3_step( v.db_stmt.get_rewrite_rule )) {
+ case SQLITE_ROW:
+ p_rewrite->scheme = COPY_REWRITE_ROW( 1 );
+ p_rewrite->host = COPY_REWRITE_ROW( 2 );
+ p_rewrite->path = COPY_REWRITE_ROW( 3 );
+ p_rewrite->port = sqlite3_column_int( v.db_stmt.get_rewrite_rule, 4 );
+ p_rewrite->redir = sqlite3_column_int( v.db_stmt.get_rewrite_rule, 5 );
+ break;
+
+ case SQLITE_DONE:
+ break;
+
+ default:
+ return( NULL );
+ }
+
+ return( p_rewrite );
+}
+
+
+/*
+ * Release memory used by the rewrite struct and
+ * reset prepared statements.
+ *
+ */
+void
+finish_rewrite( rewrite *p_rewrite )
+{
+ sqlite3_reset( v.db_stmt.get_rewrite_rule );
+ sqlite3_reset( v.db_stmt.match_request );
+ sqlite3_clear_bindings( v.db_stmt.get_rewrite_rule );
+ sqlite3_clear_bindings( v.db_stmt.match_request );
+
+ if ( p_rewrite == NULL ) return;
+
+ free( p_rewrite->scheme );
+ free( p_rewrite->host );
+ free( p_rewrite->path );
+
+ free( p_rewrite ), p_rewrite = NULL;
+
+ return;
+}
+
--- a/db.h Sun Oct 23 22:59:59 2011 -0700
+++ b/db.h Mon Oct 31 17:17:07 2011 -0700
@@ -33,6 +33,39 @@
#include "sqlite3.h"
+#define DBSQL_MATCH_REQUEST " \
+ SELECT rewrite_rule, ( \
+ CASE WHEN scheme IS null THEN 1 ELSE 0 END + \
+ CASE WHEN host IS null THEN 1 ELSE 0 END + \
+ CASE WHEN tld IS null THEN 1 ELSE 0 END + \
+ CASE WHEN path IS null THEN 1 ELSE 0 END + \
+ CASE WHEN port IS null THEN 1 ELSE 0 END + \
+ CASE WHEN ip IS null THEN 1 ELSE 0 END + \
+ CASE WHEN user IS null THEN 1 ELSE 0 END + \
+ CASE WHEN method IS null THEN 1 ELSE 0 END + \
+ CASE WHEN rewrite_rule IS null THEN 1 ELSE 0 END ) as nullc \
+ FROM requests \
+ WHERE \
+ ( scheme IS NULL OR scheme = lower(?1) ) AND \
+ ( host IS NULL OR lower( host ) = lower(?2) ) AND \
+ ( tld IS NULL OR lower( tld ) = lower(?3) ) AND \
+ ( path IS NULL OR lower( path ) LIKE '?4%' ) AND \
+ ( port IS NULL OR port = ?5 ) AND \
+ ( ip IS NULL OR ip = ?6 ) AND \
+ ( user IS NULL OR lower( user ) = lower(?7) ) AND \
+ ( method IS NULL OR lower( method ) = lower(?8) ) AND \
+ rewrite_rule IS NOT null \
+ ORDER BY \
+ length(path) DESC, \
+ nullc ASC \
+ LIMIT 1"
+
+/* Pull the entire rewrite rule row. */
+#define DBSQL_GET_REWRITE_RULE "\
+ SELECT * \
+ FROM rewrite_rules \
+ WHERE id = ?1"
+
extern const unsigned short int DB_VERSION;
/*
@@ -40,8 +73,12 @@
*
*/
int db_attach( void );
-int db_upgrade( unsigned short int current_version );
+int db_upgrade( unsigned short int );
+unsigned short int prepare_statements( void );
short int db_version( void );
+rewrite *init_rewrite( void );
+rewrite *prepare_rewrite( request * );
+void finish_rewrite( rewrite * );
#endif
--- a/main.c Sun Oct 23 22:59:59 2011 -0700
+++ b/main.c Mon Oct 31 17:17:07 2011 -0700
@@ -48,6 +48,8 @@
v.debugmode = 0;
#endif
+ (void)signal( SIGINT, shutdown_handler );
+
/* default database file name */
v.db = NULL;
strcpy( v.dbname, "volta.db" );
@@ -114,6 +116,35 @@
if ( db_attach() != SQLITE_OK ) exit( 1 );
/* enter stdin parsing loop */
- return( accept_loop() );
+ unsigned char exitval = accept_loop();
+ shutdown_actions();
+ return( exitval );
}
+
+/*
+ * Perform actions in preparation for a graceful shutdown.
+ *
+ */
+void
+shutdown_actions( void )
+{
+ sqlite3_finalize( v.db_stmt.match_request );
+ sqlite3_finalize( v.db_stmt.get_rewrite_rule );
+ sqlite3_close( v.db );
+ report_speed();
+}
+
+
+/*
+ * Signal handler for shutting things down.
+ *
+ */
+void
+shutdown_handler( int sig )
+{
+ debug( 1, LOC, "Exiting via signal %d.\n", sig );
+ shutdown_actions();
+ exit( 0 );
+}
+
--- a/parser.rl Sun Oct 23 22:59:59 2011 -0700
+++ b/parser.rl Mon Oct 31 17:17:07 2011 -0700
@@ -70,13 +70,13 @@
unsigned short int cs = 1;
char *p = line;
char *pe = p + strlen(p);
- char *eof = NULL;
+ char *eof = pe;
/* the client request pointer */
request *p_request = init_request();
%%{
- machine input_parser;
+ machine squidline_parser;
action channel_id_found {
debug( 1, LOC, "Channel ID found in redirector input. Set 'url_rewrite_concurrency' to '0' in squid.\n" );
@@ -87,7 +87,7 @@
action scheme_finish { MARK_E(scheme) }
action host_start { MARK_S(host) }
action host_finish { MARK_E(host) }
- action port_start { MARK_S(port) }
+ action port_start { p_request->tokens.port_start = p+1; } # strip leading colon
action port_finish { MARK_E(port) }
action path_start { MARK_S(path) }
action path_finish { MARK_E(path) }
@@ -192,7 +192,7 @@
p_request->scheme = NULL;
p_request->host = NULL;
p_request->tld = NULL;
- p_request->port = NULL;
+ p_request->port = 0;
p_request->path = NULL;
p_request->user = NULL;
p_request->method = NULL;
@@ -227,12 +227,32 @@
{
p_request->scheme = COPY_STR( scheme );
p_request->host = COPY_STR( host );
- p_request->port = COPY_STR( port );
p_request->path = COPY_STR( path );
p_request->method = COPY_STR( meth );
p_request->client_ip = COPY_IP4( c_ip );
- parse_tld( p_request );
+ (void)parse_port( p_request );
+ (void)parse_tld( p_request );
+
+ return;
+}
+
+
+/*
+ * Pull out the port number and convert it to an integer before
+ * storing in the request struct.
+ *
+ */
+void
+parse_port( request *p_request )
+{
+ if ( p_request->tokens.port_start == NULL || p_request->tokens.port_length == 0 ) return;
+
+ char port[5];
+
+ (void)strncpy( port, p_request->tokens.port_start, p_request->tokens.port_length );
+ port[ p_request->tokens.port_length ] = '\0';
+ (void)sscanf( port, "%hu", &p_request->port );
return;
}
@@ -248,7 +268,7 @@
unsigned short int cs = 5, mark = 0;
char *p = p_request->host;
char *pe = p + p_request->tokens.host_length;
- char *ts = 0, *te = 0, *eof = NULL;
+ char *ts = 0, *te = 0, *eof = pe;
%%{
machine tld_parser;
@@ -291,14 +311,13 @@
*
*/
void
-cleanup_request( struct request *p_request )
+finish_request( request *p_request )
{
if ( p_request == NULL ) return;
free( p_request->scheme );
free( p_request->host );
free( p_request->tld );
- free( p_request->port );
free( p_request->path );
free( p_request->method );
free( p_request->client_ip );
--- a/process.c Sun Oct 23 22:59:59 2011 -0700
+++ b/process.c Mon Oct 31 17:17:07 2011 -0700
@@ -35,22 +35,60 @@
process( char *line )
{
request *p_request = parse( line );
+ rewrite *p_rewrite = prepare_rewrite( p_request );
/* count lines in debugmode */
if ( v.debugmode > 2 ) v.timer.lines++;
- /* If parsing failed for some reason, return a blank line to squid. */
- if ( p_request == NULL ) {
- printf( "\n" );
+ /* If parsing failed or there wasn't a successful rewrite match,
+ * return a blank line to squid to allow the request to pass
+ * through unmolested. */
+ if ( p_request == NULL || p_rewrite == NULL ) {
+ out( "\n" );
+ finish_request( p_request );
+ finish_rewrite( p_rewrite );
return;
}
- printf( "* %s", line );
- printf( "%s%s%s%s\n\n", p_request->scheme, p_request->host, p_request->port, p_request->path );
+ if ( v.debugmode < 4 ) {
+ if ( p_rewrite->redir == REDIR_TEMPORARY ) printf( "302:" );
+ if ( p_rewrite->redir == REDIR_PERMANENT ) printf( "301:" );
+
+ if ( p_request->scheme || p_rewrite->scheme )
+ printf( "%s", p_rewrite->scheme ? p_rewrite->scheme : p_request->scheme );
+ printf( "%s", p_rewrite->host ? p_rewrite->host : p_request->host );
+ printf( "%s", p_rewrite->path ? p_rewrite->path : p_request->path );
+ if ( p_request->port != 0 || p_rewrite->port != 0 )
+ printf( ":%d", p_rewrite->port ? p_rewrite->port : p_request->port );
+ printf("\n");
+ }
+ else {
+ debug( 5, LOC, "Rewrite match on %s/%s\n", p_request->host, p_request->path );
+ debug( 5, LOC, " --> %s/%s\n", p_rewrite->host, p_rewrite->path );
+ }
+
- /* TODO: everything */
+ /* unsigned long hst, net; */
+ /* hst = inet_lnaof( *(p_request->client_ip) ); */
+ /* net = inet_netof( *(p_request->client_ip) ); */
+ /* printf("%14s : net=0x%08lX host=0x%08lX\n", inet_ntoa( *(p_request->client_ip) ), net, hst); */
+ /* printf("%14s : net=%lu host=%lu\n", inet_ntoa( *(p_request->client_ip) ), net, hst); */
- cleanup_request( p_request );
+ /*
+ * create function bigint_to_inet(bigint) returns inet as $$
+ * select
+ * (($1>>24&255)||'.'||($1>>16&255)||'.'||($1>>8&255)||'.'||($1>>0&255))::inet
+ * $$ language sql;
+ * */
+
+ /*
+ char ip[ INET_ADDRSTRLEN ];
+ inet_ntop( AF_INET, p_request->client_ip, ip, INET_ADDRSTRLEN );
+ printf( "%s\n", ip );
+ */
+
+ finish_request( p_request );
+ finish_rewrite( p_rewrite );
return;
}
--- a/sql/1.sql Sun Oct 23 22:59:59 2011 -0700
+++ b/sql/1.sql Mon Oct 31 17:17:07 2011 -0700
@@ -1,16 +1,28 @@
--- vim: set noet nosta sw=4 ts=4 ft=sql:
BEGIN;
-DROP TABLE IF EXISTS requests;
-CREATE TABLE requests (
- hi INT,
+CREATE TABLE IF NOT EXISTS requests (
+ scheme VARCHAR(5) DEFAULT NULL,
+ host VARCHAR(255) DEFAULT NULL,
+ tld VARCHAR(255) DEFAULT NULL,
+ path TEXT DEFAULT NULL,
+ port INTEGER DEFAULT NULL,
+ ip VARCHAR(72) DEFAULT NULL,
+ user VARCHAR(40) DEFAULT NULL,
+ method VARCHAR(10) DEFAULT NULL,
rewrite_rule INTEGER REFERENCES rewrite_rules( id ) ON DELETE SET NULL ON UPDATE CASCADE DEFERRABLE INITIALLY DEFERRED
);
+CREATE INDEX IF NOT EXISTS host_idx ON requests ( host );
+CREATE INDEX IF NOT EXISTS tld_idx ON requests ( tld );
+CREATE INDEX IF NOT EXISTS path_idx ON requests ( path );
-DROP TABLE IF EXISTS rewrite_rules;
-CREATE TABLE rewrite_rules (
- id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
- redir TINYINT NOT NULL DEFAULT 0 CHECK( redir IN (0,1,2) )
+CREATE TABLE IF NOT EXISTS rewrite_rules (
+ id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+ scheme VARCHAR(5) DEFAULT NULL,
+ host VARCHAR(255) DEFAULT NULL,
+ path TEXT DEFAULT NULL,
+ port INTEGER DEFAULT NULL,
+ redir TINYINT NOT NULL DEFAULT 0 CHECK( redir IN (0,1,2) )
);
COMMIT;
--- a/volta.h Sun Oct 23 22:59:59 2011 -0700
+++ b/volta.h Mon Oct 31 17:17:07 2011 -0700
@@ -77,6 +77,12 @@
char dbname[128]; /* path to database file */
struct sqlite3 *db; /* database handle */
+ /* prepared statements */
+ struct {
+ struct sqlite3_stmt *match_request;
+ struct sqlite3_stmt *get_rewrite_rule;
+ } db_stmt;
+
struct {
time_t start; /* start time */
unsigned long int lines; /* line count for determining speed */
@@ -93,7 +99,7 @@
char *host;
char *tld;
char *path;
- char *port;
+ unsigned short int port;
struct in_addr *client_ip;
char *user;
char *method;
@@ -115,6 +121,19 @@
} request;
/*
+ * The URL elements to rewrite a user's request into.
+ *
+ */
+typedef struct rewrite {
+ char *scheme;
+ char *host;
+ char *path;
+ unsigned short int port;
+ unsigned short int redir;
+} rewrite;
+
+
+/*
*
* Function prototypes
*
@@ -131,13 +150,16 @@
char *copy_string_token( char *, unsigned short int );
struct in_addr *copy_ipv4_token( char *, unsigned short int );
+void shutdown_handler( int );
+void shutdown_actions( void );
int accept_loop( void );
void process( char * );
request *parse( char * );
request *init_request( void );
void populate_request( request * );
void parse_tld( request * );
-void cleanup_request( request * );
+void parse_port( request * );
+void finish_request( request * );
#endif