parser.rl
changeset 15 2706fc514dea
parent 14 51eb85ae4de4
child 18 d4ce82194b64
--- a/parser.rl	Fri Nov 04 20:34:28 2011 -0700
+++ b/parser.rl	Sat Nov 05 12:52:29 2011 -0700
@@ -199,10 +199,11 @@
 %%{
 	machine rule_parser;
 
-	action match_start    { MARK_S(path_re) }
-	action match_finish   { MARK_E(path_re) }
-	action redir_start    { MARK_S(redir) }
-	action redir_finish   { p_parsed->tokens.redir_length = 3; } # strip trailing colon
+	action match_start   { MARK_S(path_re) }
+	action match_finish  { MARK_E(path_re) }
+	action redir_start   { MARK_S(redir) }
+	action redir_finish  { p_parsed->tokens.redir_length = 3; } # strip trailing colon
+	action wl_finish     { p_parsed->wl = 1; }
 
 	action scheme_start  { MARK_S(scheme) }
 	action scheme_finish { MARK_E(scheme) }
@@ -219,20 +220,23 @@
 	host_component  = alnum | ( alnum [a-zA-Z0-9\-_]* alnum );
 	path_segment    = '/' ( any - space )*;
 
-	sep            = space+;
-	hostname       = host_component ( '.' host_component )* '.'?;
-	ipv4           = digit{1,3} '.' digit{1,3} '.' digit{1,3} '.' digit{1,3};
-	ipv6           = ( xdigit | ':' )+;
+	sep       = space+;
+	hostname  = host_component ( '.' host_component )* '.'?;
+	ipv4      = digit{1,3} '.' digit{1,3} '.' digit{1,3} '.' digit{1,3};
+	ipv6      = ( xdigit | ':' )+;
+
+	whitelist = ( '-' sep )          %wl_finish;
+	path_re   = ( any - space )+     >match_start  %match_finish @!match_error;
 
-	path_re        = ( any - space )+      >match_start  %match_finish @!match_error;
-	redir          = ( digit{3} ':' )      >redir_start  %redir_finish;
+	redir     = ( digit{3} ':' )     >redir_start  %redir_finish;
+	scheme    = ( alpha{3,5} '://' ) >scheme_start %scheme_finish;
+	host      = ( hostname | ipv4 )  >host_start   %host_finish   @!host_error;
+	port      = ( ':' digit{1,5} )   >port_start   %port_finish;
+	path      = path_segment*        >path_start   %path_finish;
 
-	scheme         = ( alpha{3,5} '://' )  >scheme_start %scheme_finish;
-	host           = ( hostname | ipv4 )   >host_start   %host_finish   @!host_error;
-	port           = ( ':' digit{1,5} )    >port_start   %port_finish;
-	path           = path_segment*         >path_start   %path_finish;
+	rewrite   = ( sep redir? scheme? host port? path? );
 
-	main := path_re sep ( redir? scheme? host port? path? );
+	main := whitelist? path_re rewrite?;
 }%%
 
 	/* state machine */
@@ -297,8 +301,9 @@
 	redir          = ( digit{3} ':' );
 	host           = ( hostname | ipv4 );
 
-	key = ( host | '*' )      >key_start %key_finish @!key_error;
-	val = ( token sep token ) >val_start %val_finish @!val_error;
+	key = ( host | '*' )                      >key_start %key_finish @!key_error;
+	val = ( (token sep)? token (sep token)? ) >val_start %val_finish @!val_error;
+	#           wl       regex   rewrite
 	
 	main:= key sep val '\n';
 }%%
@@ -346,6 +351,7 @@
 	}
 
 	p_parsed->type      = 0;
+	p_parsed->wl        = 0;
 	p_parsed->path_re   = NULL;
 	p_parsed->redir     = NULL;
 	p_parsed->scheme    = NULL;
@@ -450,9 +456,9 @@
 parse_tld( parsed *p_request )
 {
 	unsigned short int cs = 5, mark = 0;
-	char *p   = p_request->host;
-	char *pe  = p + p_request->tokens.host_length;
-	char *ts  = 0, *te = 0, *eof = pe;
+	char *p  = p_request->host;
+	char *pe = p + p_request->tokens.host_length;
+	char *ts = 0, *te = 0, *eof = pe;
 
 %%{
     machine tld_parser;