parser.rl
changeset 15 2706fc514dea
parent 14 51eb85ae4de4
child 18 d4ce82194b64
equal deleted inserted replaced
14:51eb85ae4de4 15:2706fc514dea
   197 	p_parsed->type   = RULE;
   197 	p_parsed->type   = RULE;
   198 
   198 
   199 %%{
   199 %%{
   200 	machine rule_parser;
   200 	machine rule_parser;
   201 
   201 
   202 	action match_start    { MARK_S(path_re) }
   202 	action match_start   { MARK_S(path_re) }
   203 	action match_finish   { MARK_E(path_re) }
   203 	action match_finish  { MARK_E(path_re) }
   204 	action redir_start    { MARK_S(redir) }
   204 	action redir_start   { MARK_S(redir) }
   205 	action redir_finish   { p_parsed->tokens.redir_length = 3; } # strip trailing colon
   205 	action redir_finish  { p_parsed->tokens.redir_length = 3; } # strip trailing colon
       
   206 	action wl_finish     { p_parsed->wl = 1; }
   206 
   207 
   207 	action scheme_start  { MARK_S(scheme) }
   208 	action scheme_start  { MARK_S(scheme) }
   208 	action scheme_finish { MARK_E(scheme) }
   209 	action scheme_finish { MARK_E(scheme) }
   209 	action host_start    { MARK_S(host) }
   210 	action host_start    { MARK_S(host) }
   210 	action host_finish   { MARK_E(host) }
   211 	action host_finish   { MARK_E(host) }
   217 	action host_error  { debug( 3, LOC, "Unable to parse the rule hostname.\n" ); }
   218 	action host_error  { debug( 3, LOC, "Unable to parse the rule hostname.\n" ); }
   218 
   219 
   219 	host_component  = alnum | ( alnum [a-zA-Z0-9\-_]* alnum );
   220 	host_component  = alnum | ( alnum [a-zA-Z0-9\-_]* alnum );
   220 	path_segment    = '/' ( any - space )*;
   221 	path_segment    = '/' ( any - space )*;
   221 
   222 
   222 	sep            = space+;
   223 	sep       = space+;
   223 	hostname       = host_component ( '.' host_component )* '.'?;
   224 	hostname  = host_component ( '.' host_component )* '.'?;
   224 	ipv4           = digit{1,3} '.' digit{1,3} '.' digit{1,3} '.' digit{1,3};
   225 	ipv4      = digit{1,3} '.' digit{1,3} '.' digit{1,3} '.' digit{1,3};
   225 	ipv6           = ( xdigit | ':' )+;
   226 	ipv6      = ( xdigit | ':' )+;
   226 
   227 
   227 	path_re        = ( any - space )+      >match_start  %match_finish @!match_error;
   228 	whitelist = ( '-' sep )          %wl_finish;
   228 	redir          = ( digit{3} ':' )      >redir_start  %redir_finish;
   229 	path_re   = ( any - space )+     >match_start  %match_finish @!match_error;
   229 
   230 
   230 	scheme         = ( alpha{3,5} '://' )  >scheme_start %scheme_finish;
   231 	redir     = ( digit{3} ':' )     >redir_start  %redir_finish;
   231 	host           = ( hostname | ipv4 )   >host_start   %host_finish   @!host_error;
   232 	scheme    = ( alpha{3,5} '://' ) >scheme_start %scheme_finish;
   232 	port           = ( ':' digit{1,5} )    >port_start   %port_finish;
   233 	host      = ( hostname | ipv4 )  >host_start   %host_finish   @!host_error;
   233 	path           = path_segment*         >path_start   %path_finish;
   234 	port      = ( ':' digit{1,5} )   >port_start   %port_finish;
   234 
   235 	path      = path_segment*        >path_start   %path_finish;
   235 	main := path_re sep ( redir? scheme? host port? path? );
   236 
       
   237 	rewrite   = ( sep redir? scheme? host port? path? );
       
   238 
       
   239 	main := whitelist? path_re rewrite?;
   236 }%%
   240 }%%
   237 
   241 
   238 	/* state machine */
   242 	/* state machine */
   239 	%% write exec;
   243 	%% write exec;
   240 
   244 
   295 	ipv4           = digit{1,3} '.' digit{1,3} '.' digit{1,3} '.' digit{1,3};
   299 	ipv4           = digit{1,3} '.' digit{1,3} '.' digit{1,3} '.' digit{1,3};
   296 	token          = ( any - space )+;
   300 	token          = ( any - space )+;
   297 	redir          = ( digit{3} ':' );
   301 	redir          = ( digit{3} ':' );
   298 	host           = ( hostname | ipv4 );
   302 	host           = ( hostname | ipv4 );
   299 
   303 
   300 	key = ( host | '*' )      >key_start %key_finish @!key_error;
   304 	key = ( host | '*' )                      >key_start %key_finish @!key_error;
   301 	val = ( token sep token ) >val_start %val_finish @!val_error;
   305 	val = ( (token sep)? token (sep token)? ) >val_start %val_finish @!val_error;
       
   306 	#           wl       regex   rewrite
   302 	
   307 	
   303 	main:= key sep val '\n';
   308 	main:= key sep val '\n';
   304 }%%
   309 }%%
   305 
   310 
   306 	/* state machine */
   311 	/* state machine */
   344 		debug( 5, LOC, "Unable to allocate memory for parsed struct: %s\n", strerror(errno) );
   349 		debug( 5, LOC, "Unable to allocate memory for parsed struct: %s\n", strerror(errno) );
   345 		return( NULL );
   350 		return( NULL );
   346 	}
   351 	}
   347 
   352 
   348 	p_parsed->type      = 0;
   353 	p_parsed->type      = 0;
       
   354 	p_parsed->wl        = 0;
   349 	p_parsed->path_re   = NULL;
   355 	p_parsed->path_re   = NULL;
   350 	p_parsed->redir     = NULL;
   356 	p_parsed->redir     = NULL;
   351 	p_parsed->scheme    = NULL;
   357 	p_parsed->scheme    = NULL;
   352 	p_parsed->host      = NULL;
   358 	p_parsed->host      = NULL;
   353 	p_parsed->tld       = NULL;
   359 	p_parsed->tld       = NULL;
   448  */
   454  */
   449 void
   455 void
   450 parse_tld( parsed *p_request )
   456 parse_tld( parsed *p_request )
   451 {
   457 {
   452 	unsigned short int cs = 5, mark = 0;
   458 	unsigned short int cs = 5, mark = 0;
   453 	char *p   = p_request->host;
   459 	char *p  = p_request->host;
   454 	char *pe  = p + p_request->tokens.host_length;
   460 	char *pe = p + p_request->tokens.host_length;
   455 	char *ts  = 0, *te = 0, *eof = pe;
   461 	char *ts = 0, *te = 0, *eof = pe;
   456 
   462 
   457 %%{
   463 %%{
   458     machine tld_parser;
   464     machine tld_parser;
   459 
   465 
   460 	host_component = alnum | ( alnum [a-zA-Z0-9\-_]* alnum );
   466 	host_component = alnum | ( alnum [a-zA-Z0-9\-_]* alnum );