[Gd-chatter] r11504 - in trunk: fundev/sources/lib/string-extensions libraries/regular-expressions libraries/regular-expressions/tests

cgay at gwydiondylan.org cgay at gwydiondylan.org
Tue Nov 27 06:35:47 CET 2007


Author: cgay
Date: Tue Nov 27 06:35:45 2007
New Revision: 11504

Modified:
   trunk/fundev/sources/lib/string-extensions/string-hacking.dylan
   trunk/libraries/regular-expressions/od-library.dylan
   trunk/libraries/regular-expressions/parse.dylan
   trunk/libraries/regular-expressions/regex.dylan
   trunk/libraries/regular-expressions/tests/new-api-test-suite.dylan
Log:
Job: 7357
Fixed [a-] and []...] character classes.
Improved parsing of PCRE test output file a bit:
  Ran 2071 checks:  1856 passed (89.7%), 161 failed, 0 not executed, 54 crashed

Modified: trunk/fundev/sources/lib/string-extensions/string-hacking.dylan
==============================================================================
--- trunk/fundev/sources/lib/string-extensions/string-hacking.dylan	(original)
+++ trunk/fundev/sources/lib/string-extensions/string-hacking.dylan	Tue Nov 27 06:35:45 2007
@@ -269,19 +269,26 @@
 define method parse-description (string :: <sequence>);
   let s = make(<parse-string>, string: string);
   let negated = (lookahead(s) == '^');
-  if (negated)   consume(s)   end;
+  if (negated)
+    consume(s)
+  end;
 
   let char-list  = #();
   let range-list = #();
-
   until (lookahead(s) = #f)         // until end of string
     let char = lookahead(s);
     consume(s);
     if (lookahead(s) = '-')
       consume(s);
       let second-char = lookahead(s);
-      consume(s);
-      range-list := add!(range-list, pair(char, second-char));
+      if (second-char)
+        consume(s);
+        range-list := add!(range-list, pair(char, second-char));
+      else
+        // e.g., [a-] is the set containing 'a' and '-'.
+        char-list := add!(char-list, char);
+        char-list := add!(char-list, '-');
+      end;
     elseif (char = '\\')
       let escaped-char = lookahead(s);
       consume(s);

Modified: trunk/libraries/regular-expressions/od-library.dylan
==============================================================================
--- trunk/libraries/regular-expressions/od-library.dylan	(original)
+++ trunk/libraries/regular-expressions/od-library.dylan	Tue Nov 27 06:35:45 2007
@@ -56,8 +56,6 @@
       group-start,
       group-end,
       group-text,
-      groups-by-position,
-      groups-by-name,
       <invalid-match-group>;
 end module regexp;
 

Modified: trunk/libraries/regular-expressions/parse.dylan
==============================================================================
--- trunk/libraries/regular-expressions/parse.dylan	(original)
+++ trunk/libraries/regular-expressions/parse.dylan	Tue Nov 27 06:35:45 2007
@@ -316,7 +316,6 @@
       // Insert more special characters here
 
     otherwise =>
-      let char = lookahead(s);
       consume(s);
       make(<parsed-character>, character: char);
   end select;
@@ -343,30 +342,39 @@
 define inline function parse-character-set
     (s :: <parse-string>, info :: <parse-info>)
  => (set :: <parsed-set>)
-  let set-string = make(<deque>);      // Need something that'll 
-                                       // preserve the right ordering
+  let set-string = make(<deque>);
   let start-index = s.parse-index;
-  for (char = lookahead(s) then lookahead(s),
-       until: char == ']')
-    consume(s);                    // eat char
-    if (~char)
-      parse-error(s.parse-string,
-                  "Unterminated character set at index %d.",start-index);
-    elseif (char ~== '\\')
-      push-last(set-string, char);
-    else
-      let char2 = lookahead(s);
-      consume(s);  // Eat escaped char
-      if (char2 == ']')
-        push-last(set-string, ']');
+  local method peek ()
+          lookahead(s)
+            | parse-error(s.parse-string,
+                          "Unterminated character set starting at at index %d.",
+                          start-index);
+        end;
+  block (done)
+    for (char = peek() then peek(),
+         charset-index from 0)
+      consume(s);
+      if (char == ']')
+        if (charset-index == 0)
+          push-last(set-string, char);  // e.g., []] is the set containing ']'.
+        else
+          done();
+        end;
+      elseif (char == '\\')
+        let char2 = peek();
+        consume(s);  // Eat escaped char
+        if (char2 == ']')
+          push-last(set-string, ']');
+        else
+          push-last(set-string, '\\');
+          push-last(set-string, char2);
+        end if;
       else
-        push-last(set-string, '\\');
-        push-last(set-string, char2);
+        push-last(set-string, char);
       end if;
-    end if;
-  end for;
-  consume(s);     // Eat ending brace
-  make(<parsed-set>, set: make(info.set-type, description: set-string));
+    end for;
+  end block;
+  make(<parsed-set>, set: make(info.set-type, description: set-string))
 end function parse-character-set;
 
 define constant any-char 

Modified: trunk/libraries/regular-expressions/regex.dylan
==============================================================================
--- trunk/libraries/regular-expressions/regex.dylan	(original)
+++ trunk/libraries/regular-expressions/regex.dylan	Tue Nov 27 06:35:45 2007
@@ -2,8 +2,11 @@
 Author: Carl Gay
 Synopsis: A new API for the regular-expressions library
 
+// todo -- Improve error message for <invalid-match-group> errors.
+//         Make %s and %= display the regex elided if it's too long.
+
+
 
-// Rename a few things...
 define constant <invalid-regexp> = <illegal-regexp>;
 
 
@@ -191,7 +194,7 @@
   else
     let ng = match.groups-by-position.size;
     signal(make(<invalid-match-group>,
-                format-string: "Group index %d is out of bounds for regex %s match.  %s",
+                format-string: "Group number %d is out of bounds for regex %s match.  %s",
                 format-arguments: list(group-number,
                                        match.regular-expression.regexp-pattern,
                                        if (ng == 1)

Modified: trunk/libraries/regular-expressions/tests/new-api-test-suite.dylan
==============================================================================
--- trunk/libraries/regular-expressions/tests/new-api-test-suite.dylan	(original)
+++ trunk/libraries/regular-expressions/tests/new-api-test-suite.dylan	Tue Nov 27 06:35:45 2007
@@ -25,12 +25,12 @@
                                   lines: lines,
                                   start-line-number: line-number - lines.size));
           lines := make(<stretchy-vector>);
-        elseif (lines.size > 0) // a section never begins with a blank line
-          test-output("Line %d: lines.size = %d, matches = %s, prev = %s\n",
-                      line-number,
-                      lines.size,
-                      regexp-search(lines[lines.size - 1], $group-regexp),
-                      lines[lines.size - 1]);
+        elseif (lines.size < 3)
+          // A section must have a regex, a test string and one group result to make
+          // any sense.  Note we don't add the line to the section here.
+          test-output("Line %d: empty line in first 3 lines of a section.\n",
+                      line-number);
+        else
           add!(lines, line);
         end;
       else
@@ -56,6 +56,11 @@
   end
 end method consume-line;
 
+define method line-number
+    (section :: <section>) => (line-number :: <integer>)
+  section.start-line-number + section.%index
+end method line-number;
+
 define method peek-line
     (section :: <section>) => (line :: false-or(<string>))
   if (section.%index < section.section-lines.size)
@@ -74,44 +79,48 @@
 define function check-pcre-section
     (section :: <section>)
   let regexp = parse-pcre-regexp(section);
-  while (peek-line(section))
-    let test-string = consume-line(section);
-    //test-output("  test string: %s\n", test-string);
-    let group-strings = make(<stretchy-vector>);
-    block (done-with-this-test-string)
-      while (#t)
-        let line = peek-line(section);
-        let match = line & regexp-search(line, $group-regexp);
-        if (match)
-          consume-line(section);
-          let group-text = regexp-match-group(match, $group-index-of-what-pcre-matched);
-          //test-output("   pcre group: %s\n", group-text | "No match");
-          if (group-text)
-            add!(group-strings, group-text);
+  // If the section has fewer than 3 lines (a regex, a test string and at least
+  // one group result) then all we do is try to compile it (above).
+  if (section.section-lines.size >= 3)
+    while (peek-line(section))
+      let test-string = consume-line(section);
+      //test-output("  test string: %s\n", test-string);
+      let group-strings = make(<stretchy-vector>);
+      block (done-with-this-test-string)
+        while (#t)
+          let line = peek-line(section);
+          let match = line & regexp-search(line, $group-regexp);
+          if (match)
+            consume-line(section);
+            let group-text = regexp-match-group(match, $group-index-of-what-pcre-matched);
+            //test-output("   pcre group: %s\n", group-text | "No match");
+            if (group-text)
+              add!(group-strings, group-text);
+            else
+              assert(regexp-match-group(match, 0) = "No match",
+                     "previous line was 'No match'");
+              done-with-this-test-string();
+            end;
           else
-            assert(regexp-match-group(match, 0) = "No match",
-                   "previous line was 'No match'");
             done-with-this-test-string();
           end;
-        else
-          done-with-this-test-string();
         end;
       end;
-    end;
-    if (regexp)
-      check-no-errors(format-to-string("search for %s in %s",
-                                       test-string, regexp.regexp-pattern),
-                      regexp-search(test-string, regexp));
-      let match = block ()
-                    regexp-search(test-string, regexp)
-                  exception (ex :: <error>)
-                    #f
-                  end;
-      if (match)
-        compare-to-pcre-results(regexp.regexp-pattern, test-string, match, group-strings);
-      end;
-    end if;
-  end while;
+      if (regexp)
+        check-no-errors(format-to-string("search for %s in %s",
+                                         test-string, regexp.regexp-pattern),
+                        regexp-search(test-string, regexp));
+        let match = block ()
+                      regexp-search(test-string, regexp)
+                    exception (ex :: <error>)
+                      #f
+                    end;
+        if (match)
+          compare-to-pcre-results(regexp.regexp-pattern, test-string, match, group-strings);
+        end;
+      end if;
+    end while;
+  end if;
 end function check-pcre-section;
 
 define function parse-pcre-regexp
@@ -185,11 +194,16 @@
   if (match)
     check-equal(format-to-string("Match %s against %s -- same # of groups",
                                  test-string, pattern),
-                match.groups-by-position.size,
+                size(regexp-match-groups(match)),
                 pcre-groups.size);
     for (group-number from 0,
          pcre-group in pcre-groups)
-      let our-group = regexp-match-group(match, group-number);
+      // Adding block/exception here causes an infinite loop.
+      // Could it be related to using the Visual Studio 8 linker?
+      // The if also causes an infinite loop.  Hmmm.
+      let our-group = /* if (group-number < size(regexp-match-groups(match))) */
+                        regexp-match-group(match, group-number)
+                      /* end */;
       check-equal(format-to-string("Match %s against %s -- group %d is the same",
                                    test-string, pattern, group-number),
                   our-group,
@@ -202,7 +216,7 @@
                 pcre-groups.size);
   end if;
 end function compare-to-pcre-results;
-      
+
 define test pcre-test ()
   let source-directory = environment-variable("OPEN_DYLAN_USER_SOURCES");
   if (source-directory)



More information about the chatter mailing list