[Gd-chatter] r11582 - in trunk/libraries/regular-expressions: . tests

cgay at gwydiondylan.org cgay at gwydiondylan.org
Wed Dec 26 13:08:45 CET 2007


Author: cgay
Date: Wed Dec 26 13:08:44 2007
New Revision: 11582

Modified:
   trunk/libraries/regular-expressions/interface.dylan
   trunk/libraries/regular-expressions/od-library.dylan
   trunk/libraries/regular-expressions/parse.dylan
   trunk/libraries/regular-expressions/tests/pcre-testoutput1.txt
   trunk/libraries/regular-expressions/tests/pcre.dylan
   trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.dylan
Log:
Bug: 7357
Renamed a few things.

Modified: trunk/libraries/regular-expressions/interface.dylan
==============================================================================
--- trunk/libraries/regular-expressions/interface.dylan	(original)
+++ trunk/libraries/regular-expressions/interface.dylan	Wed Dec 26 13:08:44 2007
@@ -132,7 +132,7 @@
     (regex :: <string>, parse-info :: <parse-info>)
  => (parsed-regex :: <parsed-regex>, last-group :: <integer>);
   let key = make(<cache-key>, regex-string: regex, 
-		 character-set-type: parse-info.set-type); 
+		 character-set-type: parse-info.character-set-type); 
   let cached-value = element(*regex-cache*, key, default: #f);
   if (cached-value)
     values(cached-value.parse-tree, cached-value.last-group);

Modified: trunk/libraries/regular-expressions/od-library.dylan
==============================================================================
--- trunk/libraries/regular-expressions/od-library.dylan	(original)
+++ trunk/libraries/regular-expressions/od-library.dylan	Wed Dec 26 13:08:44 2007
@@ -61,7 +61,6 @@
       regex-group-count,
       regex-position,
       make-regex-positioner,
-      regex-match,  // todo -- rename to regex-search-strings?
       regex-replace,
       make-regex-replacer,
     <regex-error>,

Modified: trunk/libraries/regular-expressions/parse.dylan
==============================================================================
--- trunk/libraries/regular-expressions/parse.dylan	(original)
+++ trunk/libraries/regular-expressions/parse.dylan	Wed Dec 26 13:08:44 2007
@@ -178,21 +178,24 @@
 // being parsed.
 //
 define class <parse-info> (<object>)
-  // Whether or not the function includes \1, \2, etc in the regex.
-  // Name this has-backreferences, for consistency with the other slots.
-  // Add ? to all the has-* slots.  --cgay
-  // Also, not sure why anyone cares about these three things.
-  slot backreference-used :: <boolean> = #f;
-  slot has-alternatives :: <boolean> = #f;
-  slot has-quantifiers :: <boolean> = #f;
+  slot has-back-references? :: <boolean> = #f;
+  slot has-alternatives? :: <boolean> = #f;
+  slot has-quantifiers? :: <boolean> = #f;
   slot current-group-number :: <integer> = 0;
   constant slot group-number-to-name :: <table> = make(<table>);
-  constant slot set-type :: <class>,
+  constant slot character-set-type :: <class>,
     required-init-keyword: #"set-type";
+
+  // If true then . matches \n.  (?s) /s
   slot dot-matches-all? :: <boolean>,
     required-init-keyword: #"dot-matches-all";
-  slot verbose? :: <boolean>,
+
+  // Ignore whitespace and comments within a regex pattern. (?x) /x
+  slot extended? :: <boolean>,
     required-init-keyword: #"verbose";
+
+  // If multi-line? is true then ^ and $ match and \n boundaries as well as
+  // at the beginning and end of the subject string.  (?m) /m
   slot multi-line? :: <boolean>,
     required-init-keyword: #"multi-line";
 end class <parse-info>;
@@ -201,8 +204,8 @@
 // them via subpatterns like (?i).  Until then, this prevents warnings.
 begin
   dot-matches-all?-setter;
-  verbose?;
-  verbose?-setter;
+  extended?;
+  extended?-setter;
   multi-line?;
   multi-line?-setter;
 end;
@@ -253,9 +256,9 @@
   else
     values(optimized-regex,
 	   parse-info.current-group-number,
-	   parse-info.backreference-used,
-	   parse-info.has-alternatives,
-	   parse-info.has-quantifiers);
+	   parse-info.has-back-references?,
+	   parse-info.has-alternatives?,
+	   parse-info.has-quantifiers?);
   end if
 end method parse;
 
@@ -266,7 +269,7 @@
   if (~alternative)
     parse-error(str.parse-string, "");
   elseif (lookahead(str) = '|')
-    info.has-alternatives := #t;
+    info.has-alternatives? := #t;
     make(<union>, left: alternative, right: parse-regex(consume(str), info))
   else
     alternative
@@ -291,22 +294,22 @@
   let char = lookahead(str);
   select (char by \=)
     '*' =>
-      info.has-quantifiers := #t;
+      info.has-quantifiers? := #t;
       consume(str);
       make(<quantified-atom>, min: 0, atom: atom);
 
     '+' =>
-      info.has-quantifiers := #t;
+      info.has-quantifiers? := #t;
       consume(str);
       make(<quantified-atom>, min: 1, atom: atom);
 
     '?' =>
-      info.has-quantifiers := #t;
+      info.has-quantifiers? := #t;
       consume(str);
       make(<quantified-atom>, min: 0, max: 1, atom: atom);
 
     '{' =>
-      info.has-quantifiers := #t;
+      info.has-quantifiers? := #t;
       consume(str);
       parse-minmax-quantifier(atom, str);
 
@@ -564,7 +567,7 @@
       end select
     end for;
   end block;
-  make(<parsed-set>, set: make(info.set-type, description: set-string))
+  make(<parsed-set>, set: make(info.character-set-type, description: set-string))
 end function parse-character-set;
 
 // This only handles escaped characters *outside* of a character
@@ -581,10 +584,12 @@
   end;
   consume(str);
   select (next-char)
-    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' =>
-      info.backreference-used := #t;
-      make(<parsed-backreference>, group: digit-to-integer(next-char));
-      
+    // not yet
+    //'0' =>
+    //  parse-octal-escape(str, info);
+    '1', '2', '3', '4', '5', '6', '7', '8', '9' =>
+      parse-back-reference(str, info);
+
     // Hmm.  Why would you write \\n in your regex instead of \n?  It has the
     // same effect.  Also, what about the rest of the Dylan character escapes?
     // --cgay
@@ -611,6 +616,34 @@
   end select;
 end method parse-escaped-character;
 
+define function parse-back-reference
+    (str :: <parse-string>, info :: <parse-info>)
+ => (backref :: <parsed-backreference>)
+  // We've just parsed '\' and a digit between '1' and '9'.
+  let start-index = str.parse-index - 1;   // include the char we already read
+  let end-index = str.parse-index;
+  while (member?(lookahead(str), $digit-chars))
+    end-index := end-index + 1;
+    consume(str);
+  end;
+  let group = string-to-integer(copy-sequence(str.parse-string,
+                                              start: start-index,
+                                              end: end-index));
+  // todo -- If I understand the PCRE doc correctly it's not an error for a
+  // backref to be a forward ref if it is between 1 and 9, which implies we
+  // need to do a separate pass AFTER the entire parse is done, so we can
+  // verify that there are enough groups.  Does a forward reference ever work???
+  // Maybe only if numbering the groups the way perl 6 does it rather than
+  // strictly left to right?
+  if (group >= info.current-group-number)
+    parse-error(str.parse-string,
+                "Invalid back reference at index %d",
+                str.parse-index);
+  end if;
+  info.has-back-references? := #t;
+  make(<parsed-backreference>, group: group)
+end function parse-back-reference;
+
 define method is-anchored? (regex :: <parsed-regex>)
  => (result :: <boolean>);
   select (regex by instance?)

Modified: trunk/libraries/regular-expressions/tests/pcre-testoutput1.txt
==============================================================================
--- trunk/libraries/regular-expressions/tests/pcre-testoutput1.txt	(original)
+++ trunk/libraries/regular-expressions/tests/pcre-testoutput1.txt	Wed Dec 26 13:08:44 2007
@@ -4270,10 +4270,6 @@
 No match
     b
 No match
-    
-
-/a*/
-    
 
 /([abc])*d/
     abbbcd
@@ -4830,9 +4826,6 @@
 
 /abc/i
 
-/a*/i
-    
-
 /([abc])*d/i
     ABBBCD
  0: ABBBCD

Modified: trunk/libraries/regular-expressions/tests/pcre.dylan
==============================================================================
--- trunk/libraries/regular-expressions/tests/pcre.dylan	(original)
+++ trunk/libraries/regular-expressions/tests/pcre.dylan	Wed Dec 26 13:08:44 2007
@@ -164,9 +164,9 @@
           values(pattern, flags)
         end method read-pattern-and-flags;
   let (pattern, flags) = read-pattern-and-flags();
-  //test-output("pattern: %s (flags = %s)\n", pattern, flags);
+  //test-output("pattern: '%s' (flags = '%s')\n", pattern, flags);
   for (flag in flags)
-    check-true(sprintf("For regex %s, flag %s is recognized", pattern, flag),
+    check-true(sprintf("For regex '%s', flag '%s' is recognized", pattern, flag),
                member?(flag, "ixms"));
   end for;
   block ()
@@ -180,7 +180,7 @@
   // <invalid-character-set-description> which isn't related to <regex-error>
   // (and isn't even exported).
   exception (ex :: <error>)
-    check-true(sprintf("can compile regex %s", pattern), #f);
+    check-true(sprintf("can compile regex '%s'", pattern), #f);
     //test-output("  ERROR: %s\n", ex);
     #f
   end block
@@ -198,7 +198,7 @@
      pcre-groups :: <sequence>)
  => ()
   if (match)
-    check-equal(sprintf("Match %s against %s -- same # of groups",
+    check-equal(sprintf("Match '%s' against regex '%s' -- same # of groups",
                         test-string, pattern),
                 size(match-groups(match)),
                 pcre-groups.size);
@@ -210,13 +210,13 @@
       let our-group = /* if (group-number < size(match-groups(match))) */
                         match-group(match, group-number)
                       /* end */;
-      check-equal(sprintf("Match %s against %s -- group %d is the same",
+      check-equal(sprintf("Match '%s' against regex '%s' -- group %d is the same",
                           test-string, pattern, group-number),
                   our-group,
                   pcre-group);
     end;
   else
-    check-equal(sprintf("Pattern %s doesn't match test string %s",
+    check-equal(sprintf("Regex '%s' doesn't match test string '%s'",
                         pattern, test-string),
                 0,
                 pcre-groups.size);

Modified: trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.dylan
==============================================================================
--- trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.dylan	(original)
+++ trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.dylan	Wed Dec 26 13:08:44 2007
@@ -52,15 +52,18 @@
   end;
 end function check-matches;
 
-// These are to cover the basics, as I add new features to the code.
-// The PCRE tests should cover a lot of the more esoteric cases, I hope.
+// These are to cover the basics, as I add new features to the code or
+// read through the pcrepattern docs.  The PCRE tests should cover a lot
+// of the more esoteric cases, I hope.
 //
 define test ad-hoc-regex-test ()
+  //args: check-matches(regex, string, group1, group2, ..., flag1: x, flag2: y, ...)
   check-matches("", "abc", "");
   check-matches("a()b", "ab", "ab", "");
   check-matches("a(?#blah)b", "ab", "ab"); // comments shouldn't create a group
   check-matches(".", "x", "x");
   check-matches(".", "\n", "\n", dot-matches-all: #t);
+  check-matches("[a-]", "-", "-");
 end test ad-hoc-regex-test;
 
 // All these regexes should signal <invalid-regex> on compilation.
@@ -68,7 +71,8 @@
 define test invalid-regex-test ()
   let patterns = #(
     "(?P<name>x)(?P<name>y)",         // can't use same name twice
-    "(?@abc)"                         // invalid extended character '@'
+    "(?@abc)",                        // invalid extended character '@'
+    "(a)\\2"                          // invalid back reference
     );
   for (pattern in patterns)
     check-condition(sprintf("Compiling '%s' gets an error", pattern),



More information about the chatter mailing list