# HG changeset patch # User Mahlon E. Smith # Date 1320106627 25200 # Node ID 23a242d7b7fae511c42de2dd1d88462cdd259e3c # Parent 191b3c25974aaf116b7f4239a1b5a5f3772dff06 1st iteration of volta actually doing something. Process the request, find the best matching rule, and rewrite the request. Without the DB queries, volta was parsing over 750k requests a second. Currently, it's down to 129.5 with 1161 rules in place. Yikes. I may need to re-evaluate some design choices here. diff -r 191b3c25974a -r 23a242d7b7fa .hgignore --- a/.hgignore Sun Oct 23 22:59:59 2011 -0700 +++ b/.hgignore Mon Oct 31 17:17:07 2011 -0700 @@ -1,6 +1,6 @@ ^volta$ ^volta.db$ -^parser_graph.* +^.*_graph.* ^tags$ .*debug .*.o diff -r 191b3c25974a -r 23a242d7b7fa Makefile --- a/Makefile Sun Oct 23 22:59:59 2011 -0700 +++ b/Makefile Mon Oct 31 17:17:07 2011 -0700 @@ -33,7 +33,7 @@ ifeq (,$(findstring debug,$(MAKECMDGOALS))) RAGEL_FLAGS = -LCe -G2 else - RAGEL_FLAGS = -Ce -G2 + RAGEL_FLAGS = -Ces -G2 endif # Ensure the parser is included in the objs list @@ -52,10 +52,11 @@ strip $@ $(OBJS): volta.h +db.o: db.h # don't actually depend on parser.rl, so distributions don't require ragel parser.c: - ragel $(RAGEL_FLAGS) -s parser.rl -o $@ + ragel $(RAGEL_FLAGS) parser.rl -o $@ debug: $(OBJS) $(CC) $(CFLAGS) -o volta $(OBJS) $(LIBS) @@ -65,11 +66,14 @@ ### U T I L ######################################################################## -parsegraph: parser_graph.xml parser_graph.pdf parser_graph.dot -parser_graph.xml parser_graph.pdf parser_graph.dot: parser.rl - ragel -Vp parser.rl > parser_graph.dot - ragel $(RAGEL_FLAGS) -x parser.rl -o parser_graph.xml - dot -Tpdf parser_graph.dot > parser_graph.pdf +parsegraph: squidline_graph.xml squidline_graph.pdf squidline_graph.dot tld_graph.xml tld_graph.pdf tld_graph.dot +squidline_graph.xml squidline_graph.pdf squidline_graph.dot tld_graph.xml tld_graph.pdf tld_graph.dot: parser.rl + ragel -Vp -S squidline_parser parser.rl > squidline_graph.dot + ragel -Vp -S tld_parser parser.rl > tld_graph.dot + ragel $(RAGEL_FLAGS) -S squidline_parser -x parser.rl -o squidline_graph.xml + ragel $(RAGEL_FLAGS) -S tld_parser -x parser.rl -o tld_graph.xml + dot -Tpdf squidline_graph.dot > squidline_graph.pdf + dot -Tpdf tld_graph.dot > tld_graph.pdf # export CPUPROFILE="cpu.prof" before running volta for cpu profiling # export CPUPROFILE_FREQUENCY=100 (default) @@ -84,12 +88,12 @@ rm -f parser.c volta.db ChangeLog tags clean: - -rm -f volta parser_graph.* *.o *.prof* + -rm -f volta *_graph.* *.o *.prof* # requires BSD tar release: VERSION = $(shell hg id -t | awk '{ print $$1 }') -release: cleanall parser.c +release: clobber parser.c hg log --style changelog > ChangeLog - tar -C .. --exclude misc --exclude .\* -s '/^volta/volta-$(VERSION)/' -czvf /tmp/volta-$(VERSION).tgz volta + tar -C .. --exclude misc --exclude .\* --exclude \*.rl -s '/^volta/volta-$(VERSION)/' -czvf /tmp/volta-$(VERSION).tgz volta mv /tmp/volta-$(VERSION).tgz . diff -r 191b3c25974a -r 23a242d7b7fa README --- a/README Sun Oct 23 22:59:59 2011 -0700 +++ b/README Mon Oct 31 17:17:07 2011 -0700 @@ -2,7 +2,15 @@ Volta ===== +What is volta? + - high performance redirector + Why "volta"? + - latin term, turn + Configuring squid + Using volta +How to + diff -r 191b3c25974a -r 23a242d7b7fa accept_loop.c --- a/accept_loop.c Sun Oct 23 22:59:59 2011 -0700 +++ b/accept_loop.c Mon Oct 31 17:17:07 2011 -0700 @@ -93,7 +93,6 @@ /* stdin closed */ debug( 1, LOC, "End of stream\n" ); - report_speed(); return( 0 ); } diff -r 191b3c25974a -r 23a242d7b7fa db.c --- a/db.c Sun Oct 23 22:59:59 2011 -0700 +++ b/db.c Mon Oct 31 17:17:07 2011 -0700 @@ -71,6 +71,9 @@ } } + /* initialize prepared statements */ + if ( prepare_statements() != 0 ) return SQLITE_ERROR; + return( SQLITE_OK ); } @@ -144,3 +147,143 @@ return( version ); } + +/* + * Initialize the DB statements, returning 0 on success. + * + */ +unsigned short int +prepare_statements( void ) +{ + unsigned short int rv = 0; + + rv = rv + sqlite3_prepare_v2( v.db, DBSQL_GET_REWRITE_RULE, -1, &v.db_stmt.get_rewrite_rule, NULL ); + if ( rv != 0 ) + debug( 2, LOC, "Error preparing DB statement \"%s\": %s\n", + DBSQL_GET_REWRITE_RULE, sqlite3_errmsg(v.db) ); + + rv = rv + sqlite3_prepare_v2( v.db, DBSQL_MATCH_REQUEST, -1, &v.db_stmt.match_request, NULL ); + if ( rv != 0 ) + debug( 2, LOC, "Error preparing DB statement \"%s\": %s\n", + DBSQL_MATCH_REQUEST, sqlite3_errmsg(v.db) ); + + return( rv ); +} + + +/* + * Initialize and return a pointer to a new rewrite object. + * + */ +rewrite * +init_rewrite( void ) +{ + rewrite *p_rewrite = NULL; + if ( (p_rewrite = malloc( sizeof(rewrite) )) == NULL ) { + debug( 5, LOC, "Unable to allocate memory for rewrite struct: %s\n", strerror(errno) ); + return( NULL ); + } + + p_rewrite->scheme = NULL; + p_rewrite->host = NULL; + p_rewrite->path = NULL; + p_rewrite->port = 0; + p_rewrite->redir = 0; + + return( p_rewrite ); +} + + +#define COPY_REWRITE_ROW( INDEX ) copy_string_token( \ + (char *)sqlite3_column_text( v.db_stmt.get_rewrite_rule, INDEX ),\ + sqlite3_column_bytes( v.db_stmt.get_rewrite_rule, INDEX )) +/* + * Given a request struct pointer, try and find the best matching + * rewrite rule, returning a pointer to a rewrite struct. + * + */ +rewrite * +prepare_rewrite( request *p_request ) +{ + if ( p_request == NULL ) return( NULL ); + + unsigned short int rewrite_id = 0; + rewrite *p_rewrite = init_rewrite(); + + sqlite3_bind_text( v.db_stmt.match_request, 3, p_request->tld, -1, SQLITE_STATIC ); + sqlite3_bind_text( v.db_stmt.match_request, 1, p_request->scheme, -1, SQLITE_STATIC ); + sqlite3_bind_text( v.db_stmt.match_request, 2, p_request->host, -1, SQLITE_STATIC ); + sqlite3_bind_text( v.db_stmt.match_request, 3, p_request->tld, -1, SQLITE_STATIC ); + sqlite3_bind_text( v.db_stmt.match_request, 4, p_request->path, -1, SQLITE_STATIC ); + sqlite3_bind_int( v.db_stmt.match_request, 5, p_request->port ); + /* + sqlite3_bind_text( v.db_stmt.match_request, 6, NULL, -1, SQLITE_STATIC ); + sqlite3_bind_text( v.db_stmt.match_request, 6, p_request->client_ip, -1, SQLITE_STATIC ); + */ + sqlite3_bind_text( v.db_stmt.match_request, 7, p_request->user, -1, SQLITE_STATIC ); + sqlite3_bind_text( v.db_stmt.match_request, 8, p_request->method, -1, SQLITE_STATIC ); + + switch ( sqlite3_step( v.db_stmt.match_request )) { + case SQLITE_ROW: + rewrite_id = sqlite3_column_int( v.db_stmt.match_request, 0 ); + break; + + case SQLITE_DONE: + break; + + default: + return( NULL ); + } + + /* FIXME: CHECK for rewrite_rule being NULL on successful match, emit warning, continue */ + + /* return early if we didn't get a matching request */ + if ( rewrite_id == 0 ) return( NULL ); + + /* pull the rewrite data, populate the struct. only one + * row should ever be returned for this. */ + sqlite3_bind_int( v.db_stmt.get_rewrite_rule, 1, rewrite_id ); + switch ( sqlite3_step( v.db_stmt.get_rewrite_rule )) { + case SQLITE_ROW: + p_rewrite->scheme = COPY_REWRITE_ROW( 1 ); + p_rewrite->host = COPY_REWRITE_ROW( 2 ); + p_rewrite->path = COPY_REWRITE_ROW( 3 ); + p_rewrite->port = sqlite3_column_int( v.db_stmt.get_rewrite_rule, 4 ); + p_rewrite->redir = sqlite3_column_int( v.db_stmt.get_rewrite_rule, 5 ); + break; + + case SQLITE_DONE: + break; + + default: + return( NULL ); + } + + return( p_rewrite ); +} + + +/* + * Release memory used by the rewrite struct and + * reset prepared statements. + * + */ +void +finish_rewrite( rewrite *p_rewrite ) +{ + sqlite3_reset( v.db_stmt.get_rewrite_rule ); + sqlite3_reset( v.db_stmt.match_request ); + sqlite3_clear_bindings( v.db_stmt.get_rewrite_rule ); + sqlite3_clear_bindings( v.db_stmt.match_request ); + + if ( p_rewrite == NULL ) return; + + free( p_rewrite->scheme ); + free( p_rewrite->host ); + free( p_rewrite->path ); + + free( p_rewrite ), p_rewrite = NULL; + + return; +} + diff -r 191b3c25974a -r 23a242d7b7fa db.h --- a/db.h Sun Oct 23 22:59:59 2011 -0700 +++ b/db.h Mon Oct 31 17:17:07 2011 -0700 @@ -33,6 +33,39 @@ #include "sqlite3.h" +#define DBSQL_MATCH_REQUEST " \ + SELECT rewrite_rule, ( \ + CASE WHEN scheme IS null THEN 1 ELSE 0 END + \ + CASE WHEN host IS null THEN 1 ELSE 0 END + \ + CASE WHEN tld IS null THEN 1 ELSE 0 END + \ + CASE WHEN path IS null THEN 1 ELSE 0 END + \ + CASE WHEN port IS null THEN 1 ELSE 0 END + \ + CASE WHEN ip IS null THEN 1 ELSE 0 END + \ + CASE WHEN user IS null THEN 1 ELSE 0 END + \ + CASE WHEN method IS null THEN 1 ELSE 0 END + \ + CASE WHEN rewrite_rule IS null THEN 1 ELSE 0 END ) as nullc \ + FROM requests \ + WHERE \ + ( scheme IS NULL OR scheme = lower(?1) ) AND \ + ( host IS NULL OR lower( host ) = lower(?2) ) AND \ + ( tld IS NULL OR lower( tld ) = lower(?3) ) AND \ + ( path IS NULL OR lower( path ) LIKE '?4%' ) AND \ + ( port IS NULL OR port = ?5 ) AND \ + ( ip IS NULL OR ip = ?6 ) AND \ + ( user IS NULL OR lower( user ) = lower(?7) ) AND \ + ( method IS NULL OR lower( method ) = lower(?8) ) AND \ + rewrite_rule IS NOT null \ + ORDER BY \ + length(path) DESC, \ + nullc ASC \ + LIMIT 1" + +/* Pull the entire rewrite rule row. */ +#define DBSQL_GET_REWRITE_RULE "\ + SELECT * \ + FROM rewrite_rules \ + WHERE id = ?1" + extern const unsigned short int DB_VERSION; /* @@ -40,8 +73,12 @@ * */ int db_attach( void ); -int db_upgrade( unsigned short int current_version ); +int db_upgrade( unsigned short int ); +unsigned short int prepare_statements( void ); short int db_version( void ); +rewrite *init_rewrite( void ); +rewrite *prepare_rewrite( request * ); +void finish_rewrite( rewrite * ); #endif diff -r 191b3c25974a -r 23a242d7b7fa main.c --- a/main.c Sun Oct 23 22:59:59 2011 -0700 +++ b/main.c Mon Oct 31 17:17:07 2011 -0700 @@ -48,6 +48,8 @@ v.debugmode = 0; #endif + (void)signal( SIGINT, shutdown_handler ); + /* default database file name */ v.db = NULL; strcpy( v.dbname, "volta.db" ); @@ -114,6 +116,35 @@ if ( db_attach() != SQLITE_OK ) exit( 1 ); /* enter stdin parsing loop */ - return( accept_loop() ); + unsigned char exitval = accept_loop(); + shutdown_actions(); + return( exitval ); } + +/* + * Perform actions in preparation for a graceful shutdown. + * + */ +void +shutdown_actions( void ) +{ + sqlite3_finalize( v.db_stmt.match_request ); + sqlite3_finalize( v.db_stmt.get_rewrite_rule ); + sqlite3_close( v.db ); + report_speed(); +} + + +/* + * Signal handler for shutting things down. + * + */ +void +shutdown_handler( int sig ) +{ + debug( 1, LOC, "Exiting via signal %d.\n", sig ); + shutdown_actions(); + exit( 0 ); +} + diff -r 191b3c25974a -r 23a242d7b7fa parser.rl --- a/parser.rl Sun Oct 23 22:59:59 2011 -0700 +++ b/parser.rl Mon Oct 31 17:17:07 2011 -0700 @@ -70,13 +70,13 @@ unsigned short int cs = 1; char *p = line; char *pe = p + strlen(p); - char *eof = NULL; + char *eof = pe; /* the client request pointer */ request *p_request = init_request(); %%{ - machine input_parser; + machine squidline_parser; action channel_id_found { debug( 1, LOC, "Channel ID found in redirector input. Set 'url_rewrite_concurrency' to '0' in squid.\n" ); @@ -87,7 +87,7 @@ action scheme_finish { MARK_E(scheme) } action host_start { MARK_S(host) } action host_finish { MARK_E(host) } - action port_start { MARK_S(port) } + action port_start { p_request->tokens.port_start = p+1; } # strip leading colon action port_finish { MARK_E(port) } action path_start { MARK_S(path) } action path_finish { MARK_E(path) } @@ -192,7 +192,7 @@ p_request->scheme = NULL; p_request->host = NULL; p_request->tld = NULL; - p_request->port = NULL; + p_request->port = 0; p_request->path = NULL; p_request->user = NULL; p_request->method = NULL; @@ -227,12 +227,32 @@ { p_request->scheme = COPY_STR( scheme ); p_request->host = COPY_STR( host ); - p_request->port = COPY_STR( port ); p_request->path = COPY_STR( path ); p_request->method = COPY_STR( meth ); p_request->client_ip = COPY_IP4( c_ip ); - parse_tld( p_request ); + (void)parse_port( p_request ); + (void)parse_tld( p_request ); + + return; +} + + +/* + * Pull out the port number and convert it to an integer before + * storing in the request struct. + * + */ +void +parse_port( request *p_request ) +{ + if ( p_request->tokens.port_start == NULL || p_request->tokens.port_length == 0 ) return; + + char port[5]; + + (void)strncpy( port, p_request->tokens.port_start, p_request->tokens.port_length ); + port[ p_request->tokens.port_length ] = '\0'; + (void)sscanf( port, "%hu", &p_request->port ); return; } @@ -248,7 +268,7 @@ unsigned short int cs = 5, mark = 0; char *p = p_request->host; char *pe = p + p_request->tokens.host_length; - char *ts = 0, *te = 0, *eof = NULL; + char *ts = 0, *te = 0, *eof = pe; %%{ machine tld_parser; @@ -291,14 +311,13 @@ * */ void -cleanup_request( struct request *p_request ) +finish_request( request *p_request ) { if ( p_request == NULL ) return; free( p_request->scheme ); free( p_request->host ); free( p_request->tld ); - free( p_request->port ); free( p_request->path ); free( p_request->method ); free( p_request->client_ip ); diff -r 191b3c25974a -r 23a242d7b7fa process.c --- a/process.c Sun Oct 23 22:59:59 2011 -0700 +++ b/process.c Mon Oct 31 17:17:07 2011 -0700 @@ -35,22 +35,60 @@ process( char *line ) { request *p_request = parse( line ); + rewrite *p_rewrite = prepare_rewrite( p_request ); /* count lines in debugmode */ if ( v.debugmode > 2 ) v.timer.lines++; - /* If parsing failed for some reason, return a blank line to squid. */ - if ( p_request == NULL ) { - printf( "\n" ); + /* If parsing failed or there wasn't a successful rewrite match, + * return a blank line to squid to allow the request to pass + * through unmolested. */ + if ( p_request == NULL || p_rewrite == NULL ) { + out( "\n" ); + finish_request( p_request ); + finish_rewrite( p_rewrite ); return; } - printf( "* %s", line ); - printf( "%s%s%s%s\n\n", p_request->scheme, p_request->host, p_request->port, p_request->path ); + if ( v.debugmode < 4 ) { + if ( p_rewrite->redir == REDIR_TEMPORARY ) printf( "302:" ); + if ( p_rewrite->redir == REDIR_PERMANENT ) printf( "301:" ); + + if ( p_request->scheme || p_rewrite->scheme ) + printf( "%s", p_rewrite->scheme ? p_rewrite->scheme : p_request->scheme ); + printf( "%s", p_rewrite->host ? p_rewrite->host : p_request->host ); + printf( "%s", p_rewrite->path ? p_rewrite->path : p_request->path ); + if ( p_request->port != 0 || p_rewrite->port != 0 ) + printf( ":%d", p_rewrite->port ? p_rewrite->port : p_request->port ); + printf("\n"); + } + else { + debug( 5, LOC, "Rewrite match on %s/%s\n", p_request->host, p_request->path ); + debug( 5, LOC, " --> %s/%s\n", p_rewrite->host, p_rewrite->path ); + } + - /* TODO: everything */ + /* unsigned long hst, net; */ + /* hst = inet_lnaof( *(p_request->client_ip) ); */ + /* net = inet_netof( *(p_request->client_ip) ); */ + /* printf("%14s : net=0x%08lX host=0x%08lX\n", inet_ntoa( *(p_request->client_ip) ), net, hst); */ + /* printf("%14s : net=%lu host=%lu\n", inet_ntoa( *(p_request->client_ip) ), net, hst); */ - cleanup_request( p_request ); + /* + * create function bigint_to_inet(bigint) returns inet as $$ + * select + * (($1>>24&255)||'.'||($1>>16&255)||'.'||($1>>8&255)||'.'||($1>>0&255))::inet + * $$ language sql; + * */ + + /* + char ip[ INET_ADDRSTRLEN ]; + inet_ntop( AF_INET, p_request->client_ip, ip, INET_ADDRSTRLEN ); + printf( "%s\n", ip ); + */ + + finish_request( p_request ); + finish_rewrite( p_rewrite ); return; } diff -r 191b3c25974a -r 23a242d7b7fa sql/1.sql --- a/sql/1.sql Sun Oct 23 22:59:59 2011 -0700 +++ b/sql/1.sql Mon Oct 31 17:17:07 2011 -0700 @@ -1,16 +1,28 @@ --- vim: set noet nosta sw=4 ts=4 ft=sql: BEGIN; -DROP TABLE IF EXISTS requests; -CREATE TABLE requests ( - hi INT, +CREATE TABLE IF NOT EXISTS requests ( + scheme VARCHAR(5) DEFAULT NULL, + host VARCHAR(255) DEFAULT NULL, + tld VARCHAR(255) DEFAULT NULL, + path TEXT DEFAULT NULL, + port INTEGER DEFAULT NULL, + ip VARCHAR(72) DEFAULT NULL, + user VARCHAR(40) DEFAULT NULL, + method VARCHAR(10) DEFAULT NULL, rewrite_rule INTEGER REFERENCES rewrite_rules( id ) ON DELETE SET NULL ON UPDATE CASCADE DEFERRABLE INITIALLY DEFERRED ); +CREATE INDEX IF NOT EXISTS host_idx ON requests ( host ); +CREATE INDEX IF NOT EXISTS tld_idx ON requests ( tld ); +CREATE INDEX IF NOT EXISTS path_idx ON requests ( path ); -DROP TABLE IF EXISTS rewrite_rules; -CREATE TABLE rewrite_rules ( - id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, - redir TINYINT NOT NULL DEFAULT 0 CHECK( redir IN (0,1,2) ) +CREATE TABLE IF NOT EXISTS rewrite_rules ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + scheme VARCHAR(5) DEFAULT NULL, + host VARCHAR(255) DEFAULT NULL, + path TEXT DEFAULT NULL, + port INTEGER DEFAULT NULL, + redir TINYINT NOT NULL DEFAULT 0 CHECK( redir IN (0,1,2) ) ); COMMIT; diff -r 191b3c25974a -r 23a242d7b7fa volta.h --- a/volta.h Sun Oct 23 22:59:59 2011 -0700 +++ b/volta.h Mon Oct 31 17:17:07 2011 -0700 @@ -77,6 +77,12 @@ char dbname[128]; /* path to database file */ struct sqlite3 *db; /* database handle */ + /* prepared statements */ + struct { + struct sqlite3_stmt *match_request; + struct sqlite3_stmt *get_rewrite_rule; + } db_stmt; + struct { time_t start; /* start time */ unsigned long int lines; /* line count for determining speed */ @@ -93,7 +99,7 @@ char *host; char *tld; char *path; - char *port; + unsigned short int port; struct in_addr *client_ip; char *user; char *method; @@ -115,6 +121,19 @@ } request; /* + * The URL elements to rewrite a user's request into. + * + */ +typedef struct rewrite { + char *scheme; + char *host; + char *path; + unsigned short int port; + unsigned short int redir; +} rewrite; + + +/* * * Function prototypes * @@ -131,13 +150,16 @@ char *copy_string_token( char *, unsigned short int ); struct in_addr *copy_ipv4_token( char *, unsigned short int ); +void shutdown_handler( int ); +void shutdown_actions( void ); int accept_loop( void ); void process( char * ); request *parse( char * ); request *init_request( void ); void populate_request( request * ); void parse_tld( request * ); -void cleanup_request( request * ); +void parse_port( request * ); +void finish_request( request * ); #endif