diff -r 23a242d7b7fa -r 51eb85ae4de4 parser.rl --- a/parser.rl Mon Oct 31 17:17:07 2011 -0700 +++ b/parser.rl Fri Nov 04 20:34:28 2011 -0700 @@ -30,13 +30,13 @@ #include "volta.h" -#define MARK_S( LBL ) p_request->tokens.LBL ## _start = p; -#define MARK_E( LBL ) p_request->tokens.LBL ## _length = p - ( *pe + p_request->tokens.LBL ## _start ); +#define MARK_S( LBL ) p_parsed->tokens.LBL ## _start = p; +#define MARK_E( LBL ) p_parsed->tokens.LBL ## _length = p - ( *pe + p_parsed->tokens.LBL ## _start ); /* * Tokenize an incoming line from squid, returning a parsed and populated * structure to make redirection decisions against. This pointer should - * be freed using cleanup_request() after use. + * be freed using finish_parsed() after use. * * Squid documentation about redirectors: * --------------------------------------------------------------------------- @@ -63,8 +63,8 @@ * By default, a URL rewriter is not used. * --------------------------------------------------------------------------- */ -request * -parse( char *line ) +parsed * +parse_request( char *line ) { /* machine required vars */ unsigned short int cs = 1; @@ -73,10 +73,11 @@ char *eof = pe; /* the client request pointer */ - request *p_request = init_request(); + parsed *p_parsed = init_parsed(); + p_parsed->type = REQUEST; %%{ - machine squidline_parser; + machine request_parser; action channel_id_found { debug( 1, LOC, "Channel ID found in redirector input. Set 'url_rewrite_concurrency' to '0' in squid.\n" ); @@ -87,7 +88,7 @@ action scheme_finish { MARK_E(scheme) } action host_start { MARK_S(host) } action host_finish { MARK_E(host) } - action port_start { p_request->tokens.port_start = p+1; } # strip leading colon + action port_start { p_parsed->tokens.port_start = p+1; } # strip leading colon action port_finish { MARK_E(port) } action path_start { MARK_S(path) } action path_finish { MARK_E(path) } @@ -96,9 +97,9 @@ action c_ip_start { MARK_S(c_ip) } action c_ip_finish { MARK_E(c_ip) } - action host_error { debug( 3, LOC, "Unable to parse hostname.\n" ); } - action scheme_error { debug( 3, LOC, "Unable to parse scheme.\n" ); } - action meth_error { debug( 3, LOC, "Unable to parse method.\n" ); } + action host_error { debug( 3, LOC, "Unable to parse the request hostname.\n" ); } + action scheme_error { debug( 3, LOC, "Unable to parse the request scheme.\n" ); } + action meth_error { debug( 3, LOC, "Unable to parse the request method.\n" ); } action c_ip_error { debug( 3, LOC, "Unable to parse the client IP address.\n" ); } # @@ -150,10 +151,10 @@ SquidLine = ( start: channel_id? -> Url, - Url: scheme? host port? path? space -> Client, - Client: client_ip '/' ( hostname | '-' ) space -> User, - User: pchar+ space -> Method, - Method: method -> KVPairs, + Url: scheme? host port? path? -> Client, + Client: space client_ip '/' ( hostname | '-' ) -> User, + User: space pchar+ -> Method, + Method: space method -> KVPairs, KVPairs: ( space any+ )? -> final ); @@ -165,94 +166,277 @@ /* If we were given an invalid line, bail early */ if ( cs < %%{ write first_final; }%% ) { - free( p_request ), p_request = NULL; - debug( 3, LOC, "Invalid line (%d), skipped\n", v.timer.lines + 1 ); + free( p_parsed ), p_parsed = NULL; + debug( 3, LOC, "Invalid request line (%d), skipped\n", v.timer.lines + 1 ); debug( 4, LOC, "%s", line ); return( NULL ); } - (void)populate_request( p_request ); - return( p_request ); + debug( 6, LOC, "%s", line ); + (void)populate_parsed( p_parsed ); + return( p_parsed ); +} + + +/* + * Tokenize a value string from a successful database lookup, returning a parsed + * and populated structure. This pointer should be freed using finish_parsed() after use. + * + */ +parsed * +parse_rule( char *rewrite ) +{ + /* machine required vars */ + unsigned short int cs = 1; + char *p = rewrite; + char *pe = p + strlen(p); + char *eof = pe; + + /* the client rule pointer */ + parsed *p_parsed = init_parsed(); + p_parsed->type = RULE; + +%%{ + machine rule_parser; + + action match_start { MARK_S(path_re) } + action match_finish { MARK_E(path_re) } + action redir_start { MARK_S(redir) } + action redir_finish { p_parsed->tokens.redir_length = 3; } # strip trailing colon + + action scheme_start { MARK_S(scheme) } + action scheme_finish { MARK_E(scheme) } + action host_start { MARK_S(host) } + action host_finish { MARK_E(host) } + action port_start { p_parsed->tokens.port_start = p+1; } # strip leading colon + action port_finish { MARK_E(port) } + action path_start { MARK_S(path) } + action path_finish { MARK_E(path) } + + action match_error { debug( 3, LOC, "Unable to parse the rule path matcher.\n" ); } + action host_error { debug( 3, LOC, "Unable to parse the rule hostname.\n" ); } + + host_component = alnum | ( alnum [a-zA-Z0-9\-_]* alnum ); + path_segment = '/' ( any - space )*; + + sep = space+; + hostname = host_component ( '.' host_component )* '.'?; + ipv4 = digit{1,3} '.' digit{1,3} '.' digit{1,3} '.' digit{1,3}; + ipv6 = ( xdigit | ':' )+; + + path_re = ( any - space )+ >match_start %match_finish @!match_error; + redir = ( digit{3} ':' ) >redir_start %redir_finish; + + scheme = ( alpha{3,5} '://' ) >scheme_start %scheme_finish; + host = ( hostname | ipv4 ) >host_start %host_finish @!host_error; + port = ( ':' digit{1,5} ) >port_start %port_finish; + path = path_segment* >path_start %path_finish; + + main := path_re sep ( redir? scheme? host port? path? ); +}%% + + /* state machine */ + %% write exec; + + /* If we were given an invalid rule, bail early */ + if ( cs < %%{ write first_final; }%% ) { + free( p_parsed ), p_parsed = NULL; + debug( 3, LOC, "Invalid rule\n" ); + debug( 4, LOC, "%s\n", rewrite ); + return( NULL ); + } + + (void)populate_parsed( p_parsed ); + return( p_parsed ); } /* - * Initialize and return a pointer to a new request object. + * Tokenize a line from an ascii representation of the database, returning + * a pointer to a parsed struct. Used for creation of a new cdb file, + * validating data prior to use. * */ -request * -init_request( void ) +struct db_input * +parse_dbinput( char *line ) { - request *p_request = NULL; - if ( (p_request = malloc( sizeof(request) )) == NULL ) { - debug( 5, LOC, "Unable to allocate memory for request struct: %s\n", strerror(errno) ); + /* machine required vars */ + unsigned short int cs = 1; + char *p = line; + char *pe = p + strlen(p); + char *eof = pe; + + /* the db line input pointer */ + struct db_input *dbline = NULL; + if ( (dbline = malloc( sizeof(struct db_input) )) == NULL ) { + debug( 5, LOC, "Unable to allocate memory for db input: %s\n", strerror(errno) ); + return( NULL ); + } + dbline->klen = 0; + dbline->vlen = 0; + dbline->kstart = NULL; + dbline->key = NULL; + dbline->vstart = NULL; + dbline->val = NULL; + +%%{ + machine dbinput_parser; + + action key_start { dbline->kstart = p; } + action key_finish { dbline->klen = p - ( *pe + dbline->kstart ); } + action key_error { debug( 0, LOC, "Invalid key format\n" ); } + action val_start { dbline->vstart = p; } + action val_finish { dbline->vlen = p - ( *pe + dbline->vstart ); } + action val_error { debug( 0, LOC, "Invalid rewrite value\n" ); } + + sep = space+; + host_component = alnum | ( alnum [a-zA-Z0-9\-_]* alnum ); + hostname = host_component ( '.' host_component )* '.'?; + ipv4 = digit{1,3} '.' digit{1,3} '.' digit{1,3} '.' digit{1,3}; + token = ( any - space )+; + redir = ( digit{3} ':' ); + host = ( hostname | ipv4 ); + + key = ( host | '*' ) >key_start %key_finish @!key_error; + val = ( token sep token ) >val_start %val_finish @!val_error; + + main:= key sep val '\n'; +}%% + + /* state machine */ + %% write exec; + + /* If the input line was invalid, bail early */ + if ( cs < %%{ write first_final; }%% ) { + free( dbline ), dbline = NULL; return( NULL ); } - p_request->scheme = NULL; - p_request->host = NULL; - p_request->tld = NULL; - p_request->port = 0; - p_request->path = NULL; - p_request->user = NULL; - p_request->method = NULL; - p_request->client_ip = NULL; + /* populate struct */ + dbline->key = copy_string_token( dbline->kstart, dbline->klen ); + dbline->val = copy_string_token( dbline->vstart, dbline->vlen ); - p_request->tokens.scheme_start = NULL; - p_request->tokens.scheme_length = 0; - p_request->tokens.host_start = NULL; - p_request->tokens.host_length = 0; - p_request->tokens.port_start = NULL; - p_request->tokens.port_length = 0; - p_request->tokens.path_start = NULL; - p_request->tokens.path_length = 0; - p_request->tokens.meth_start = NULL; - p_request->tokens.meth_length = 0; - p_request->tokens.c_ip_start = NULL; - p_request->tokens.c_ip_length = 0; + /* check the val to make sure it is a valid rewrite rule */ + parsed *valstr = NULL; + valstr = parse_rule( dbline->val ); + if ( valstr == NULL ) { + free( dbline->key ); + free( dbline->val ); + free( dbline ); + return( NULL ); + } + finish_parsed( valstr ); - return p_request; + return( dbline ); } -#define COPY_STR( LBL ) copy_string_token( p_request->tokens.LBL ## _start, p_request->tokens.LBL ## _length ) -#define COPY_IP4( LBL ) copy_ipv4_token( p_request->tokens.LBL ## _start, p_request->tokens.LBL ## _length ) + +/* + * Initialize and return a pointer to a new parser object. + * + */ +parsed * +init_parsed( void ) +{ + parsed *p_parsed = NULL; + if ( (p_parsed = malloc( sizeof(parsed) )) == NULL ) { + debug( 5, LOC, "Unable to allocate memory for parsed struct: %s\n", strerror(errno) ); + return( NULL ); + } + + p_parsed->type = 0; + p_parsed->path_re = NULL; + p_parsed->redir = NULL; + p_parsed->scheme = NULL; + p_parsed->host = NULL; + p_parsed->tld = NULL; + p_parsed->port = NULL; + p_parsed->path = NULL; + p_parsed->user = NULL; + p_parsed->method = NULL; + p_parsed->client_ip = NULL; + + p_parsed->tokens.path_re_start = NULL; + p_parsed->tokens.redir_start = NULL; + p_parsed->tokens.scheme_start = NULL; + p_parsed->tokens.host_start = NULL; + p_parsed->tokens.port_start = NULL; + p_parsed->tokens.path_start = NULL; + p_parsed->tokens.meth_start = NULL; + p_parsed->tokens.c_ip_start = NULL; + p_parsed->tokens.path_re_length = 0; + p_parsed->tokens.redir_length = 0; + p_parsed->tokens.scheme_length = 0; + p_parsed->tokens.host_length = 0; + p_parsed->tokens.port_length = 0; + p_parsed->tokens.path_length = 0; + p_parsed->tokens.meth_length = 0; + p_parsed->tokens.c_ip_length = 0; + + return p_parsed; +} + + +/* + * Release memory used by the parsed struct. + * + */ +void +finish_parsed( parsed *p_parsed ) +{ + if ( p_parsed == NULL ) return; + + free( p_parsed->scheme ); + free( p_parsed->host ); + free( p_parsed->path ); + free( p_parsed->port ); + + if ( p_parsed->type == REQUEST ) { + free( p_parsed->tld ); + free( p_parsed->method ); + free( p_parsed->client_ip ); + } + + if ( p_parsed->type == RULE ) { + free( p_parsed->path_re ); + free( p_parsed->redir ); + } + + free( p_parsed ), p_parsed = NULL; + + return; +} + + +#define COPY_STR( LBL ) copy_string_token( p_parsed->tokens.LBL ## _start, p_parsed->tokens.LBL ## _length ) +/* #define COPY_IP4( LBL ) copy_ipv4_token( p_request->tokens.LBL ## _start, p_request->tokens.LBL ## _length ) */ /* * Take the previously parsed token locations and copy them into the request struct. * */ void -populate_request( request *p_request ) +populate_parsed( parsed *p_parsed ) { - p_request->scheme = COPY_STR( scheme ); - p_request->host = COPY_STR( host ); - p_request->path = COPY_STR( path ); - p_request->method = COPY_STR( meth ); - p_request->client_ip = COPY_IP4( c_ip ); - - (void)parse_port( p_request ); - (void)parse_tld( p_request ); - - return; -} - + p_parsed->scheme = COPY_STR( scheme ); + p_parsed->host = COPY_STR( host ); + p_parsed->path = COPY_STR( path ); + p_parsed->port = COPY_STR( port ); -/* - * Pull out the port number and convert it to an integer before - * storing in the request struct. - * - */ -void -parse_port( request *p_request ) -{ - if ( p_request->tokens.port_start == NULL || p_request->tokens.port_length == 0 ) return; + if ( p_parsed->type == REQUEST ) { + p_parsed->method = COPY_STR( meth ); + p_parsed->client_ip = COPY_STR( c_ip ); + /* p_request->client_ip = COPY_IP4( c_ip ); */ - char port[5]; + (void)lowercase_str( p_parsed->host, p_parsed->tokens.host_length ); + (void)parse_tld( p_parsed ); + } - (void)strncpy( port, p_request->tokens.port_start, p_request->tokens.port_length ); - port[ p_request->tokens.port_length ] = '\0'; - (void)sscanf( port, "%hu", &p_request->port ); + if ( p_parsed->type == RULE ) { + p_parsed->path_re = COPY_STR( path_re ); + p_parsed->redir = COPY_STR( redir ); + } return; } @@ -263,7 +447,7 @@ * */ void -parse_tld( request *p_request ) +parse_tld( parsed *p_request ) { unsigned short int cs = 5, mark = 0; char *p = p_request->host; @@ -305,25 +489,3 @@ return; } - -/* - * Release memory used by the request struct. - * - */ -void -finish_request( request *p_request ) -{ - if ( p_request == NULL ) return; - - free( p_request->scheme ); - free( p_request->host ); - free( p_request->tld ); - free( p_request->path ); - free( p_request->method ); - free( p_request->client_ip ); - - free( p_request ), p_request = NULL; - - return; -} -