parser.rl
changeset 14 51eb85ae4de4
parent 13 23a242d7b7fa
child 15 2706fc514dea
equal deleted inserted replaced
13:23a242d7b7fa 14:51eb85ae4de4
    28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    29 */
    29 */
    30 
    30 
    31 #include "volta.h"
    31 #include "volta.h"
    32 
    32 
    33 #define MARK_S( LBL ) p_request->tokens.LBL ## _start = p;
    33 #define MARK_S( LBL ) p_parsed->tokens.LBL ## _start = p;
    34 #define MARK_E( LBL ) p_request->tokens.LBL ## _length = p - ( *pe + p_request->tokens.LBL ## _start );
    34 #define MARK_E( LBL ) p_parsed->tokens.LBL ## _length = p - ( *pe + p_parsed->tokens.LBL ## _start );
    35 
    35 
    36 /* 
    36 /* 
    37  * Tokenize an incoming line from squid, returning a parsed and populated
    37  * Tokenize an incoming line from squid, returning a parsed and populated
    38  * structure to make redirection decisions against.  This pointer should
    38  * structure to make redirection decisions against.  This pointer should
    39  * be freed using cleanup_request() after use.
    39  * be freed using finish_parsed() after use.
    40  * 
    40  * 
    41  * Squid documentation about redirectors:
    41  * Squid documentation about redirectors:
    42  * ---------------------------------------------------------------------------
    42  * ---------------------------------------------------------------------------
    43  * TAG: url_rewrite_program
    43  * TAG: url_rewrite_program
    44  * Specify the location of the executable for the URL rewriter.
    44  * Specify the location of the executable for the URL rewriter.
    61  * URL with "301:" (moved permanently) or 302: (moved temporarily).
    61  * URL with "301:" (moved permanently) or 302: (moved temporarily).
    62  * 
    62  * 
    63  * By default, a URL rewriter is not used.
    63  * By default, a URL rewriter is not used.
    64  * ---------------------------------------------------------------------------
    64  * ---------------------------------------------------------------------------
    65 */
    65 */
    66 request *
    66 parsed *
    67 parse( char *line )
    67 parse_request( char *line )
    68 {
    68 {
    69    	/* machine required vars */
    69    	/* machine required vars */
    70 	unsigned short int cs = 1;
    70 	unsigned short int cs = 1;
    71 	char *p   = line;
    71 	char *p   = line;
    72 	char *pe  = p + strlen(p);
    72 	char *pe  = p + strlen(p);
    73 	char *eof = pe;
    73 	char *eof = pe;
    74 
    74 
    75 	/* the client request pointer */
    75 	/* the client request pointer */
    76 	request *p_request = init_request();
    76 	parsed *p_parsed = init_parsed();
       
    77 	p_parsed->type   = REQUEST;
    77 
    78 
    78 %%{
    79 %%{
    79 	machine squidline_parser;
    80 	machine request_parser;
    80 
    81 
    81 	action channel_id_found  {
    82 	action channel_id_found  {
    82 		debug( 1, LOC, "Channel ID found in redirector input.  Set 'url_rewrite_concurrency' to '0' in squid.\n" );
    83 		debug( 1, LOC, "Channel ID found in redirector input.  Set 'url_rewrite_concurrency' to '0' in squid.\n" );
    83 		fbreak;
    84 		fbreak;
    84 	}
    85 	}
    85 
    86 
    86 	action scheme_start  { MARK_S(scheme) }
    87 	action scheme_start  { MARK_S(scheme) }
    87 	action scheme_finish { MARK_E(scheme) }
    88 	action scheme_finish { MARK_E(scheme) }
    88 	action host_start    { MARK_S(host) }
    89 	action host_start    { MARK_S(host) }
    89 	action host_finish   { MARK_E(host) }
    90 	action host_finish   { MARK_E(host) }
    90 	action port_start    { p_request->tokens.port_start = p+1; } # strip leading colon
    91 	action port_start    { p_parsed->tokens.port_start = p+1; } # strip leading colon
    91 	action port_finish   { MARK_E(port) }
    92 	action port_finish   { MARK_E(port) }
    92 	action path_start    { MARK_S(path) }
    93 	action path_start    { MARK_S(path) }
    93 	action path_finish   { MARK_E(path) }
    94 	action path_finish   { MARK_E(path) }
    94 	action meth_start    { MARK_S(meth) }
    95 	action meth_start    { MARK_S(meth) }
    95 	action meth_finish   { MARK_E(meth) }
    96 	action meth_finish   { MARK_E(meth) }
    96 	action c_ip_start    { MARK_S(c_ip) }
    97 	action c_ip_start    { MARK_S(c_ip) }
    97 	action c_ip_finish   { MARK_E(c_ip) }
    98 	action c_ip_finish   { MARK_E(c_ip) }
    98 
    99 
    99 	action host_error   { debug( 3, LOC, "Unable to parse hostname.\n" ); }
   100 	action host_error   { debug( 3, LOC, "Unable to parse the request hostname.\n" ); }
   100 	action scheme_error { debug( 3, LOC, "Unable to parse scheme.\n" ); }
   101 	action scheme_error { debug( 3, LOC, "Unable to parse the request scheme.\n" ); }
   101 	action meth_error   { debug( 3, LOC, "Unable to parse method.\n" ); }
   102 	action meth_error   { debug( 3, LOC, "Unable to parse the request method.\n" ); }
   102 	action c_ip_error   { debug( 3, LOC, "Unable to parse the client IP address.\n" ); }
   103 	action c_ip_error   { debug( 3, LOC, "Unable to parse the client IP address.\n" ); }
   103 
   104 
   104 	# 
   105 	# 
   105 	# Squid line: URL <SP> client_ip "/" fqdn <SP> user <SP> method [<SP> kvpairs]<NL>
   106 	# Squid line: URL <SP> client_ip "/" fqdn <SP> user <SP> method [<SP> kvpairs]<NL>
   106 	# 
   107 	# 
   148 	client_ip      = ipv4                  >c_ip_start   %c_ip_finish   @!c_ip_error;
   149 	client_ip      = ipv4                  >c_ip_start   %c_ip_finish   @!c_ip_error;
   149 	method         = upper+                >meth_start   %meth_finish   @!meth_error;
   150 	method         = upper+                >meth_start   %meth_finish   @!meth_error;
   150 
   151 
   151 	SquidLine = (
   152 	SquidLine = (
   152  		start:   channel_id?                            -> Url,
   153  		start:   channel_id?                            -> Url,
   153 		Url:     scheme? host port? path? space         -> Client,
   154 		Url:     scheme? host port? path?               -> Client,
   154 		Client:  client_ip '/' ( hostname | '-' ) space -> User,
   155 		Client:  space client_ip '/' ( hostname | '-' ) -> User,
   155 		User:    pchar+ space                           -> Method,
   156 		User:    space pchar+                           -> Method,
   156 		Method:  method                                 -> KVPairs,
   157 		Method:  space method                           -> KVPairs,
   157 		KVPairs: ( space any+ )?                        -> final
   158 		KVPairs: ( space any+ )?                        -> final
   158  	);
   159  	);
   159 
   160 
   160 	main := SquidLine '\n';
   161 	main := SquidLine '\n';
   161 }%%
   162 }%%
   163 	/* state machine */
   164 	/* state machine */
   164 	%% write exec;
   165 	%% write exec;
   165 
   166 
   166 	/* If we were given an invalid line, bail early */
   167 	/* If we were given an invalid line, bail early */
   167 	if ( cs < %%{ write first_final; }%% ) {
   168 	if ( cs < %%{ write first_final; }%% ) {
   168 		free( p_request ), p_request = NULL;
   169 		free( p_parsed ), p_parsed = NULL;
   169 		debug( 3, LOC, "Invalid line (%d), skipped\n", v.timer.lines + 1 );
   170 		debug( 3, LOC, "Invalid request line (%d), skipped\n", v.timer.lines + 1 );
   170 		debug( 4, LOC, "%s", line );
   171 		debug( 4, LOC, "%s", line );
   171 		return( NULL );
   172 		return( NULL );
   172 	}
   173 	}
   173 
   174 
   174 	(void)populate_request( p_request );
   175 	debug( 6, LOC, "%s", line );
   175 	return( p_request );
   176 	(void)populate_parsed( p_parsed );
   176 }
   177 	return( p_parsed );
   177 
   178 }
   178 
   179 
   179 /*
   180 
   180  * Initialize and return a pointer to a new request object.
   181 /*
   181  *
   182  * Tokenize a value string from a successful database lookup, returning a parsed
   182  */
   183  * and populated structure. This pointer should be freed using finish_parsed() after use.
   183 request *
   184  *
   184 init_request( void )
   185  */
   185 {
   186 parsed *
   186 	request *p_request = NULL;
   187 parse_rule( char *rewrite )
   187 	if ( (p_request = malloc( sizeof(request) )) == NULL ) {
   188 {
   188 		debug( 5, LOC, "Unable to allocate memory for request struct: %s\n", strerror(errno) );
   189    	/* machine required vars */
   189 		return( NULL );
   190 	unsigned short int cs = 1;
   190 	}
   191 	char *p   = rewrite;
   191 
   192 	char *pe  = p + strlen(p);
   192 	p_request->scheme    = NULL;
   193 	char *eof = pe;
   193 	p_request->host      = NULL;
   194 
   194 	p_request->tld       = NULL;
   195 	/* the client rule pointer */
   195 	p_request->port      = 0;
   196 	parsed *p_parsed = init_parsed();
   196 	p_request->path      = NULL;
   197 	p_parsed->type   = RULE;
   197 	p_request->user      = NULL;
   198 
   198 	p_request->method    = NULL;
   199 %%{
   199 	p_request->client_ip = NULL;
   200 	machine rule_parser;
   200 
   201 
   201 	p_request->tokens.scheme_start  = NULL;
   202 	action match_start    { MARK_S(path_re) }
   202 	p_request->tokens.scheme_length = 0;
   203 	action match_finish   { MARK_E(path_re) }
   203 	p_request->tokens.host_start    = NULL;
   204 	action redir_start    { MARK_S(redir) }
   204 	p_request->tokens.host_length   = 0;
   205 	action redir_finish   { p_parsed->tokens.redir_length = 3; } # strip trailing colon
   205 	p_request->tokens.port_start    = NULL;
   206 
   206 	p_request->tokens.port_length   = 0;
   207 	action scheme_start  { MARK_S(scheme) }
   207 	p_request->tokens.path_start    = NULL;
   208 	action scheme_finish { MARK_E(scheme) }
   208 	p_request->tokens.path_length   = 0;
   209 	action host_start    { MARK_S(host) }
   209 	p_request->tokens.meth_start    = NULL;
   210 	action host_finish   { MARK_E(host) }
   210 	p_request->tokens.meth_length   = 0;
   211 	action port_start    { p_parsed->tokens.port_start = p+1; } # strip leading colon
   211 	p_request->tokens.c_ip_start    = NULL;
   212 	action port_finish   { MARK_E(port) }
   212 	p_request->tokens.c_ip_length   = 0;
   213 	action path_start    { MARK_S(path) }
   213 
   214 	action path_finish   { MARK_E(path) }
   214 	return p_request;
   215 
   215 }
   216 	action match_error { debug( 3, LOC, "Unable to parse the rule path matcher.\n" ); }
   216 
   217 	action host_error  { debug( 3, LOC, "Unable to parse the rule hostname.\n" ); }
   217 
   218 
   218 #define COPY_STR( LBL ) copy_string_token( p_request->tokens.LBL ## _start, p_request->tokens.LBL ## _length )
   219 	host_component  = alnum | ( alnum [a-zA-Z0-9\-_]* alnum );
   219 #define COPY_IP4( LBL ) copy_ipv4_token(   p_request->tokens.LBL ## _start, p_request->tokens.LBL ## _length )
   220 	path_segment    = '/' ( any - space )*;
       
   221 
       
   222 	sep            = space+;
       
   223 	hostname       = host_component ( '.' host_component )* '.'?;
       
   224 	ipv4           = digit{1,3} '.' digit{1,3} '.' digit{1,3} '.' digit{1,3};
       
   225 	ipv6           = ( xdigit | ':' )+;
       
   226 
       
   227 	path_re        = ( any - space )+      >match_start  %match_finish @!match_error;
       
   228 	redir          = ( digit{3} ':' )      >redir_start  %redir_finish;
       
   229 
       
   230 	scheme         = ( alpha{3,5} '://' )  >scheme_start %scheme_finish;
       
   231 	host           = ( hostname | ipv4 )   >host_start   %host_finish   @!host_error;
       
   232 	port           = ( ':' digit{1,5} )    >port_start   %port_finish;
       
   233 	path           = path_segment*         >path_start   %path_finish;
       
   234 
       
   235 	main := path_re sep ( redir? scheme? host port? path? );
       
   236 }%%
       
   237 
       
   238 	/* state machine */
       
   239 	%% write exec;
       
   240 
       
   241 	/* If we were given an invalid rule, bail early */
       
   242 	if ( cs < %%{ write first_final; }%% ) {
       
   243 		free( p_parsed ), p_parsed = NULL;
       
   244 		debug( 3, LOC, "Invalid rule\n" );
       
   245 		debug( 4, LOC, "%s\n", rewrite );
       
   246 		return( NULL );
       
   247 	}
       
   248 
       
   249 	(void)populate_parsed( p_parsed );
       
   250 	return( p_parsed );
       
   251 }
       
   252 
       
   253 
       
   254 /*
       
   255  * Tokenize a line from an ascii representation of the database, returning
       
   256  * a pointer to a parsed struct.  Used for creation of a new cdb file,
       
   257  * validating data prior to use.
       
   258  *
       
   259  */
       
   260 struct db_input *
       
   261 parse_dbinput( char *line )
       
   262 {
       
   263    	/* machine required vars */
       
   264 	unsigned short int cs = 1;
       
   265 	char *p   = line;
       
   266 	char *pe  = p + strlen(p);
       
   267 	char *eof = pe;
       
   268 
       
   269 	/* the db line input pointer */
       
   270 	struct db_input *dbline = NULL;
       
   271 	if ( (dbline = malloc( sizeof(struct db_input) )) == NULL ) {
       
   272 		debug( 5, LOC, "Unable to allocate memory for db input: %s\n", strerror(errno) );
       
   273 		return( NULL );
       
   274 	}
       
   275 	dbline->klen   = 0;
       
   276 	dbline->vlen   = 0;
       
   277 	dbline->kstart = NULL;
       
   278 	dbline->key    = NULL;
       
   279 	dbline->vstart = NULL;
       
   280 	dbline->val    = NULL;
       
   281 
       
   282 %%{
       
   283 	machine dbinput_parser;
       
   284 
       
   285 	action key_start  { dbline->kstart = p; }
       
   286 	action key_finish { dbline->klen = p - ( *pe + dbline->kstart ); }
       
   287 	action key_error  { debug( 0, LOC, "Invalid key format\n" ); }
       
   288 	action val_start  { dbline->vstart = p; }
       
   289 	action val_finish { dbline->vlen = p - ( *pe + dbline->vstart ); }
       
   290 	action val_error  { debug( 0, LOC, "Invalid rewrite value\n" ); }
       
   291 
       
   292 	sep            = space+;
       
   293 	host_component = alnum | ( alnum [a-zA-Z0-9\-_]* alnum );
       
   294 	hostname       = host_component ( '.' host_component )* '.'?;
       
   295 	ipv4           = digit{1,3} '.' digit{1,3} '.' digit{1,3} '.' digit{1,3};
       
   296 	token          = ( any - space )+;
       
   297 	redir          = ( digit{3} ':' );
       
   298 	host           = ( hostname | ipv4 );
       
   299 
       
   300 	key = ( host | '*' )      >key_start %key_finish @!key_error;
       
   301 	val = ( token sep token ) >val_start %val_finish @!val_error;
       
   302 	
       
   303 	main:= key sep val '\n';
       
   304 }%%
       
   305 
       
   306 	/* state machine */
       
   307 	%% write exec;
       
   308 
       
   309 	/* If the input line was invalid, bail early */
       
   310 	if ( cs < %%{ write first_final; }%% ) {
       
   311 		free( dbline ), dbline = NULL;
       
   312 		return( NULL );
       
   313 	}
       
   314 
       
   315 	/* populate struct */
       
   316 	dbline->key = copy_string_token( dbline->kstart, dbline->klen );
       
   317 	dbline->val = copy_string_token( dbline->vstart, dbline->vlen );
       
   318 
       
   319 	/* check the val to make sure it is a valid rewrite rule */
       
   320 	parsed *valstr = NULL;
       
   321 	valstr = parse_rule( dbline->val );
       
   322 	if ( valstr == NULL ) {
       
   323 		free( dbline->key );
       
   324 		free( dbline->val );
       
   325 		free( dbline );
       
   326 		return( NULL );
       
   327 	}
       
   328 	finish_parsed( valstr );
       
   329 
       
   330 	return( dbline );
       
   331 }
       
   332 
       
   333 
       
   334 
       
   335 /*
       
   336  * Initialize and return a pointer to a new parser object.
       
   337  *
       
   338  */
       
   339 parsed *
       
   340 init_parsed( void )
       
   341 {
       
   342 	parsed *p_parsed = NULL;
       
   343 	if ( (p_parsed = malloc( sizeof(parsed) )) == NULL ) {
       
   344 		debug( 5, LOC, "Unable to allocate memory for parsed struct: %s\n", strerror(errno) );
       
   345 		return( NULL );
       
   346 	}
       
   347 
       
   348 	p_parsed->type      = 0;
       
   349 	p_parsed->path_re   = NULL;
       
   350 	p_parsed->redir     = NULL;
       
   351 	p_parsed->scheme    = NULL;
       
   352 	p_parsed->host      = NULL;
       
   353 	p_parsed->tld       = NULL;
       
   354 	p_parsed->port      = NULL;
       
   355 	p_parsed->path      = NULL;
       
   356 	p_parsed->user      = NULL;
       
   357 	p_parsed->method    = NULL;
       
   358 	p_parsed->client_ip = NULL;
       
   359 
       
   360 	p_parsed->tokens.path_re_start  = NULL;
       
   361 	p_parsed->tokens.redir_start    = NULL;
       
   362 	p_parsed->tokens.scheme_start   = NULL;
       
   363 	p_parsed->tokens.host_start     = NULL;
       
   364 	p_parsed->tokens.port_start     = NULL;
       
   365 	p_parsed->tokens.path_start     = NULL;
       
   366 	p_parsed->tokens.meth_start     = NULL;
       
   367 	p_parsed->tokens.c_ip_start     = NULL;
       
   368 	p_parsed->tokens.path_re_length = 0;
       
   369 	p_parsed->tokens.redir_length   = 0;
       
   370 	p_parsed->tokens.scheme_length  = 0;
       
   371 	p_parsed->tokens.host_length    = 0;
       
   372 	p_parsed->tokens.port_length    = 0;
       
   373 	p_parsed->tokens.path_length    = 0;
       
   374 	p_parsed->tokens.meth_length    = 0;
       
   375 	p_parsed->tokens.c_ip_length    = 0;
       
   376 
       
   377 	return p_parsed;
       
   378 }
       
   379 
       
   380 
       
   381 /*
       
   382  * Release memory used by the parsed struct.
       
   383  *
       
   384  */
       
   385 void
       
   386 finish_parsed( parsed *p_parsed )
       
   387 {
       
   388 	if ( p_parsed == NULL ) return;
       
   389 
       
   390 	free( p_parsed->scheme );
       
   391 	free( p_parsed->host );
       
   392 	free( p_parsed->path );
       
   393 	free( p_parsed->port );
       
   394 
       
   395 	if ( p_parsed->type == REQUEST ) {
       
   396 		free( p_parsed->tld );
       
   397 		free( p_parsed->method );
       
   398 		free( p_parsed->client_ip );
       
   399 	}
       
   400 
       
   401 	if ( p_parsed->type == RULE ) {
       
   402 		free( p_parsed->path_re );
       
   403 		free( p_parsed->redir );
       
   404 	}
       
   405 
       
   406 	free( p_parsed ), p_parsed = NULL;
       
   407 
       
   408 	return;
       
   409 }
       
   410 
       
   411 
       
   412 #define COPY_STR( LBL ) copy_string_token( p_parsed->tokens.LBL ## _start, p_parsed->tokens.LBL ## _length )
       
   413 /* #define COPY_IP4( LBL ) copy_ipv4_token(   p_request->tokens.LBL ## _start, p_request->tokens.LBL ## _length ) */
   220 
   414 
   221 /*
   415 /*
   222  * Take the previously parsed token locations and copy them into the request struct.
   416  * Take the previously parsed token locations and copy them into the request struct.
   223  *
   417  *
   224  */
   418  */
   225 void
   419 void
   226 populate_request( request *p_request )
   420 populate_parsed( parsed *p_parsed )
   227 {
   421 {
   228 	p_request->scheme    = COPY_STR( scheme );
   422 	p_parsed->scheme = COPY_STR( scheme );
   229 	p_request->host      = COPY_STR( host );
   423 	p_parsed->host   = COPY_STR( host );
   230 	p_request->path      = COPY_STR( path );
   424 	p_parsed->path   = COPY_STR( path );
   231 	p_request->method    = COPY_STR( meth );
   425 	p_parsed->port   = COPY_STR( port );
   232 	p_request->client_ip = COPY_IP4( c_ip );
   426 
   233 
   427 	if ( p_parsed->type == REQUEST ) {
   234 	(void)parse_port( p_request );
   428 		p_parsed->method    = COPY_STR( meth );
   235 	(void)parse_tld(  p_request );
   429 		p_parsed->client_ip = COPY_STR( c_ip );
       
   430 		/* p_request->client_ip = COPY_IP4( c_ip ); */
       
   431 
       
   432 		(void)lowercase_str( p_parsed->host, p_parsed->tokens.host_length );
       
   433 		(void)parse_tld( p_parsed );
       
   434 	}
       
   435 
       
   436 	if ( p_parsed->type == RULE ) {
       
   437 		p_parsed->path_re = COPY_STR( path_re );
       
   438 		p_parsed->redir   = COPY_STR( redir );
       
   439 	}
   236 
   440 
   237 	return;
   441 	return;
   238 }
   442 }
   239 
   443 
   240 
   444 
   241 /*
   445 /*
   242  * Pull out the port number and convert it to an integer before
   446  * Pull the top level domain out of the requested hostname.
   243  * storing in the request struct.
       
   244  *
   447  *
   245  */
   448  */
   246 void
   449 void
   247 parse_port( request *p_request )
   450 parse_tld( parsed *p_request )
   248 {
       
   249 	if ( p_request->tokens.port_start == NULL || p_request->tokens.port_length == 0 ) return;
       
   250 
       
   251 	char port[5];
       
   252 
       
   253 	(void)strncpy( port, p_request->tokens.port_start, p_request->tokens.port_length );
       
   254 	port[ p_request->tokens.port_length ] = '\0';
       
   255 	(void)sscanf( port, "%hu", &p_request->port );
       
   256 
       
   257 	return;
       
   258 }
       
   259 
       
   260 
       
   261 /*
       
   262  * Pull the top level domain out of the requested hostname.
       
   263  *
       
   264  */
       
   265 void
       
   266 parse_tld( request *p_request )
       
   267 {
   451 {
   268 	unsigned short int cs = 5, mark = 0;
   452 	unsigned short int cs = 5, mark = 0;
   269 	char *p   = p_request->host;
   453 	char *p   = p_request->host;
   270 	char *pe  = p + p_request->tokens.host_length;
   454 	char *pe  = p + p_request->tokens.host_length;
   271 	char *ts  = 0, *te = 0, *eof = pe;
   455 	char *ts  = 0, *te = 0, *eof = pe;
   303 	/* restore the hostname. */
   487 	/* restore the hostname. */
   304 	reverse_str( p_request->host );
   488 	reverse_str( p_request->host );
   305 	return;
   489 	return;
   306 }
   490 }
   307 
   491 
   308 
       
   309 /*
       
   310  * Release memory used by the request struct.
       
   311  *
       
   312  */
       
   313 void
       
   314 finish_request( request *p_request )
       
   315 {
       
   316 	if ( p_request == NULL ) return;
       
   317 
       
   318 	free( p_request->scheme );
       
   319 	free( p_request->host );
       
   320 	free( p_request->tld );
       
   321 	free( p_request->path );
       
   322 	free( p_request->method );
       
   323 	free( p_request->client_ip );
       
   324 
       
   325 	free( p_request ), p_request = NULL;
       
   326 
       
   327 	return;
       
   328 }
       
   329