--- a/parser.rl Mon Oct 31 17:17:07 2011 -0700
+++ b/parser.rl Fri Nov 04 20:34:28 2011 -0700
@@ -30,13 +30,13 @@
#include "volta.h"
-#define MARK_S( LBL ) p_request->tokens.LBL ## _start = p;
-#define MARK_E( LBL ) p_request->tokens.LBL ## _length = p - ( *pe + p_request->tokens.LBL ## _start );
+#define MARK_S( LBL ) p_parsed->tokens.LBL ## _start = p;
+#define MARK_E( LBL ) p_parsed->tokens.LBL ## _length = p - ( *pe + p_parsed->tokens.LBL ## _start );
/*
* Tokenize an incoming line from squid, returning a parsed and populated
* structure to make redirection decisions against. This pointer should
- * be freed using cleanup_request() after use.
+ * be freed using finish_parsed() after use.
*
* Squid documentation about redirectors:
* ---------------------------------------------------------------------------
@@ -63,8 +63,8 @@
* By default, a URL rewriter is not used.
* ---------------------------------------------------------------------------
*/
-request *
-parse( char *line )
+parsed *
+parse_request( char *line )
{
/* machine required vars */
unsigned short int cs = 1;
@@ -73,10 +73,11 @@
char *eof = pe;
/* the client request pointer */
- request *p_request = init_request();
+ parsed *p_parsed = init_parsed();
+ p_parsed->type = REQUEST;
%%{
- machine squidline_parser;
+ machine request_parser;
action channel_id_found {
debug( 1, LOC, "Channel ID found in redirector input. Set 'url_rewrite_concurrency' to '0' in squid.\n" );
@@ -87,7 +88,7 @@
action scheme_finish { MARK_E(scheme) }
action host_start { MARK_S(host) }
action host_finish { MARK_E(host) }
- action port_start { p_request->tokens.port_start = p+1; } # strip leading colon
+ action port_start { p_parsed->tokens.port_start = p+1; } # strip leading colon
action port_finish { MARK_E(port) }
action path_start { MARK_S(path) }
action path_finish { MARK_E(path) }
@@ -96,9 +97,9 @@
action c_ip_start { MARK_S(c_ip) }
action c_ip_finish { MARK_E(c_ip) }
- action host_error { debug( 3, LOC, "Unable to parse hostname.\n" ); }
- action scheme_error { debug( 3, LOC, "Unable to parse scheme.\n" ); }
- action meth_error { debug( 3, LOC, "Unable to parse method.\n" ); }
+ action host_error { debug( 3, LOC, "Unable to parse the request hostname.\n" ); }
+ action scheme_error { debug( 3, LOC, "Unable to parse the request scheme.\n" ); }
+ action meth_error { debug( 3, LOC, "Unable to parse the request method.\n" ); }
action c_ip_error { debug( 3, LOC, "Unable to parse the client IP address.\n" ); }
#
@@ -150,10 +151,10 @@
SquidLine = (
start: channel_id? -> Url,
- Url: scheme? host port? path? space -> Client,
- Client: client_ip '/' ( hostname | '-' ) space -> User,
- User: pchar+ space -> Method,
- Method: method -> KVPairs,
+ Url: scheme? host port? path? -> Client,
+ Client: space client_ip '/' ( hostname | '-' ) -> User,
+ User: space pchar+ -> Method,
+ Method: space method -> KVPairs,
KVPairs: ( space any+ )? -> final
);
@@ -165,94 +166,277 @@
/* If we were given an invalid line, bail early */
if ( cs < %%{ write first_final; }%% ) {
- free( p_request ), p_request = NULL;
- debug( 3, LOC, "Invalid line (%d), skipped\n", v.timer.lines + 1 );
+ free( p_parsed ), p_parsed = NULL;
+ debug( 3, LOC, "Invalid request line (%d), skipped\n", v.timer.lines + 1 );
debug( 4, LOC, "%s", line );
return( NULL );
}
- (void)populate_request( p_request );
- return( p_request );
+ debug( 6, LOC, "%s", line );
+ (void)populate_parsed( p_parsed );
+ return( p_parsed );
+}
+
+
+/*
+ * Tokenize a value string from a successful database lookup, returning a parsed
+ * and populated structure. This pointer should be freed using finish_parsed() after use.
+ *
+ */
+parsed *
+parse_rule( char *rewrite )
+{
+ /* machine required vars */
+ unsigned short int cs = 1;
+ char *p = rewrite;
+ char *pe = p + strlen(p);
+ char *eof = pe;
+
+ /* the client rule pointer */
+ parsed *p_parsed = init_parsed();
+ p_parsed->type = RULE;
+
+%%{
+ machine rule_parser;
+
+ action match_start { MARK_S(path_re) }
+ action match_finish { MARK_E(path_re) }
+ action redir_start { MARK_S(redir) }
+ action redir_finish { p_parsed->tokens.redir_length = 3; } # strip trailing colon
+
+ action scheme_start { MARK_S(scheme) }
+ action scheme_finish { MARK_E(scheme) }
+ action host_start { MARK_S(host) }
+ action host_finish { MARK_E(host) }
+ action port_start { p_parsed->tokens.port_start = p+1; } # strip leading colon
+ action port_finish { MARK_E(port) }
+ action path_start { MARK_S(path) }
+ action path_finish { MARK_E(path) }
+
+ action match_error { debug( 3, LOC, "Unable to parse the rule path matcher.\n" ); }
+ action host_error { debug( 3, LOC, "Unable to parse the rule hostname.\n" ); }
+
+ host_component = alnum | ( alnum [a-zA-Z0-9\-_]* alnum );
+ path_segment = '/' ( any - space )*;
+
+ sep = space+;
+ hostname = host_component ( '.' host_component )* '.'?;
+ ipv4 = digit{1,3} '.' digit{1,3} '.' digit{1,3} '.' digit{1,3};
+ ipv6 = ( xdigit | ':' )+;
+
+ path_re = ( any - space )+ >match_start %match_finish @!match_error;
+ redir = ( digit{3} ':' ) >redir_start %redir_finish;
+
+ scheme = ( alpha{3,5} '://' ) >scheme_start %scheme_finish;
+ host = ( hostname | ipv4 ) >host_start %host_finish @!host_error;
+ port = ( ':' digit{1,5} ) >port_start %port_finish;
+ path = path_segment* >path_start %path_finish;
+
+ main := path_re sep ( redir? scheme? host port? path? );
+}%%
+
+ /* state machine */
+ %% write exec;
+
+ /* If we were given an invalid rule, bail early */
+ if ( cs < %%{ write first_final; }%% ) {
+ free( p_parsed ), p_parsed = NULL;
+ debug( 3, LOC, "Invalid rule\n" );
+ debug( 4, LOC, "%s\n", rewrite );
+ return( NULL );
+ }
+
+ (void)populate_parsed( p_parsed );
+ return( p_parsed );
}
/*
- * Initialize and return a pointer to a new request object.
+ * Tokenize a line from an ascii representation of the database, returning
+ * a pointer to a parsed struct. Used for creation of a new cdb file,
+ * validating data prior to use.
*
*/
-request *
-init_request( void )
+struct db_input *
+parse_dbinput( char *line )
{
- request *p_request = NULL;
- if ( (p_request = malloc( sizeof(request) )) == NULL ) {
- debug( 5, LOC, "Unable to allocate memory for request struct: %s\n", strerror(errno) );
+ /* machine required vars */
+ unsigned short int cs = 1;
+ char *p = line;
+ char *pe = p + strlen(p);
+ char *eof = pe;
+
+ /* the db line input pointer */
+ struct db_input *dbline = NULL;
+ if ( (dbline = malloc( sizeof(struct db_input) )) == NULL ) {
+ debug( 5, LOC, "Unable to allocate memory for db input: %s\n", strerror(errno) );
+ return( NULL );
+ }
+ dbline->klen = 0;
+ dbline->vlen = 0;
+ dbline->kstart = NULL;
+ dbline->key = NULL;
+ dbline->vstart = NULL;
+ dbline->val = NULL;
+
+%%{
+ machine dbinput_parser;
+
+ action key_start { dbline->kstart = p; }
+ action key_finish { dbline->klen = p - ( *pe + dbline->kstart ); }
+ action key_error { debug( 0, LOC, "Invalid key format\n" ); }
+ action val_start { dbline->vstart = p; }
+ action val_finish { dbline->vlen = p - ( *pe + dbline->vstart ); }
+ action val_error { debug( 0, LOC, "Invalid rewrite value\n" ); }
+
+ sep = space+;
+ host_component = alnum | ( alnum [a-zA-Z0-9\-_]* alnum );
+ hostname = host_component ( '.' host_component )* '.'?;
+ ipv4 = digit{1,3} '.' digit{1,3} '.' digit{1,3} '.' digit{1,3};
+ token = ( any - space )+;
+ redir = ( digit{3} ':' );
+ host = ( hostname | ipv4 );
+
+ key = ( host | '*' ) >key_start %key_finish @!key_error;
+ val = ( token sep token ) >val_start %val_finish @!val_error;
+
+ main:= key sep val '\n';
+}%%
+
+ /* state machine */
+ %% write exec;
+
+ /* If the input line was invalid, bail early */
+ if ( cs < %%{ write first_final; }%% ) {
+ free( dbline ), dbline = NULL;
return( NULL );
}
- p_request->scheme = NULL;
- p_request->host = NULL;
- p_request->tld = NULL;
- p_request->port = 0;
- p_request->path = NULL;
- p_request->user = NULL;
- p_request->method = NULL;
- p_request->client_ip = NULL;
+ /* populate struct */
+ dbline->key = copy_string_token( dbline->kstart, dbline->klen );
+ dbline->val = copy_string_token( dbline->vstart, dbline->vlen );
- p_request->tokens.scheme_start = NULL;
- p_request->tokens.scheme_length = 0;
- p_request->tokens.host_start = NULL;
- p_request->tokens.host_length = 0;
- p_request->tokens.port_start = NULL;
- p_request->tokens.port_length = 0;
- p_request->tokens.path_start = NULL;
- p_request->tokens.path_length = 0;
- p_request->tokens.meth_start = NULL;
- p_request->tokens.meth_length = 0;
- p_request->tokens.c_ip_start = NULL;
- p_request->tokens.c_ip_length = 0;
+ /* check the val to make sure it is a valid rewrite rule */
+ parsed *valstr = NULL;
+ valstr = parse_rule( dbline->val );
+ if ( valstr == NULL ) {
+ free( dbline->key );
+ free( dbline->val );
+ free( dbline );
+ return( NULL );
+ }
+ finish_parsed( valstr );
- return p_request;
+ return( dbline );
}
-#define COPY_STR( LBL ) copy_string_token( p_request->tokens.LBL ## _start, p_request->tokens.LBL ## _length )
-#define COPY_IP4( LBL ) copy_ipv4_token( p_request->tokens.LBL ## _start, p_request->tokens.LBL ## _length )
+
+/*
+ * Initialize and return a pointer to a new parser object.
+ *
+ */
+parsed *
+init_parsed( void )
+{
+ parsed *p_parsed = NULL;
+ if ( (p_parsed = malloc( sizeof(parsed) )) == NULL ) {
+ debug( 5, LOC, "Unable to allocate memory for parsed struct: %s\n", strerror(errno) );
+ return( NULL );
+ }
+
+ p_parsed->type = 0;
+ p_parsed->path_re = NULL;
+ p_parsed->redir = NULL;
+ p_parsed->scheme = NULL;
+ p_parsed->host = NULL;
+ p_parsed->tld = NULL;
+ p_parsed->port = NULL;
+ p_parsed->path = NULL;
+ p_parsed->user = NULL;
+ p_parsed->method = NULL;
+ p_parsed->client_ip = NULL;
+
+ p_parsed->tokens.path_re_start = NULL;
+ p_parsed->tokens.redir_start = NULL;
+ p_parsed->tokens.scheme_start = NULL;
+ p_parsed->tokens.host_start = NULL;
+ p_parsed->tokens.port_start = NULL;
+ p_parsed->tokens.path_start = NULL;
+ p_parsed->tokens.meth_start = NULL;
+ p_parsed->tokens.c_ip_start = NULL;
+ p_parsed->tokens.path_re_length = 0;
+ p_parsed->tokens.redir_length = 0;
+ p_parsed->tokens.scheme_length = 0;
+ p_parsed->tokens.host_length = 0;
+ p_parsed->tokens.port_length = 0;
+ p_parsed->tokens.path_length = 0;
+ p_parsed->tokens.meth_length = 0;
+ p_parsed->tokens.c_ip_length = 0;
+
+ return p_parsed;
+}
+
+
+/*
+ * Release memory used by the parsed struct.
+ *
+ */
+void
+finish_parsed( parsed *p_parsed )
+{
+ if ( p_parsed == NULL ) return;
+
+ free( p_parsed->scheme );
+ free( p_parsed->host );
+ free( p_parsed->path );
+ free( p_parsed->port );
+
+ if ( p_parsed->type == REQUEST ) {
+ free( p_parsed->tld );
+ free( p_parsed->method );
+ free( p_parsed->client_ip );
+ }
+
+ if ( p_parsed->type == RULE ) {
+ free( p_parsed->path_re );
+ free( p_parsed->redir );
+ }
+
+ free( p_parsed ), p_parsed = NULL;
+
+ return;
+}
+
+
+#define COPY_STR( LBL ) copy_string_token( p_parsed->tokens.LBL ## _start, p_parsed->tokens.LBL ## _length )
+/* #define COPY_IP4( LBL ) copy_ipv4_token( p_request->tokens.LBL ## _start, p_request->tokens.LBL ## _length ) */
/*
* Take the previously parsed token locations and copy them into the request struct.
*
*/
void
-populate_request( request *p_request )
+populate_parsed( parsed *p_parsed )
{
- p_request->scheme = COPY_STR( scheme );
- p_request->host = COPY_STR( host );
- p_request->path = COPY_STR( path );
- p_request->method = COPY_STR( meth );
- p_request->client_ip = COPY_IP4( c_ip );
-
- (void)parse_port( p_request );
- (void)parse_tld( p_request );
-
- return;
-}
-
+ p_parsed->scheme = COPY_STR( scheme );
+ p_parsed->host = COPY_STR( host );
+ p_parsed->path = COPY_STR( path );
+ p_parsed->port = COPY_STR( port );
-/*
- * Pull out the port number and convert it to an integer before
- * storing in the request struct.
- *
- */
-void
-parse_port( request *p_request )
-{
- if ( p_request->tokens.port_start == NULL || p_request->tokens.port_length == 0 ) return;
+ if ( p_parsed->type == REQUEST ) {
+ p_parsed->method = COPY_STR( meth );
+ p_parsed->client_ip = COPY_STR( c_ip );
+ /* p_request->client_ip = COPY_IP4( c_ip ); */
- char port[5];
+ (void)lowercase_str( p_parsed->host, p_parsed->tokens.host_length );
+ (void)parse_tld( p_parsed );
+ }
- (void)strncpy( port, p_request->tokens.port_start, p_request->tokens.port_length );
- port[ p_request->tokens.port_length ] = '\0';
- (void)sscanf( port, "%hu", &p_request->port );
+ if ( p_parsed->type == RULE ) {
+ p_parsed->path_re = COPY_STR( path_re );
+ p_parsed->redir = COPY_STR( redir );
+ }
return;
}
@@ -263,7 +447,7 @@
*
*/
void
-parse_tld( request *p_request )
+parse_tld( parsed *p_request )
{
unsigned short int cs = 5, mark = 0;
char *p = p_request->host;
@@ -305,25 +489,3 @@
return;
}
-
-/*
- * Release memory used by the request struct.
- *
- */
-void
-finish_request( request *p_request )
-{
- if ( p_request == NULL ) return;
-
- free( p_request->scheme );
- free( p_request->host );
- free( p_request->tld );
- free( p_request->path );
- free( p_request->method );
- free( p_request->client_ip );
-
- free( p_request ), p_request = NULL;
-
- return;
-}
-