[Gd-chatter] r11686 - in trunk/libraries/regular-expressions: . tests
cgay at gwydiondylan.org
cgay at gwydiondylan.org
Tue Feb 19 00:54:49 CET 2008
Author: cgay
Date: Tue Feb 19 00:54:48 2008
New Revision: 11686
Added:
trunk/libraries/regular-expressions/tests/api.dylan (contents, props changed)
Removed:
trunk/libraries/regular-expressions/regex.dylan
Modified:
trunk/libraries/regular-expressions/gd-library.dylan
trunk/libraries/regular-expressions/gd-regular-expressions.lid
trunk/libraries/regular-expressions/interface.dylan
trunk/libraries/regular-expressions/match.dylan
trunk/libraries/regular-expressions/od-library.dylan
trunk/libraries/regular-expressions/od-regular-expressions.lid
trunk/libraries/regular-expressions/parse.dylan
trunk/libraries/regular-expressions/tests/library.dylan
trunk/libraries/regular-expressions/tests/pcre.dylan
trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.dylan
trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.lid
Log:
job: 7357
Cleanup interface and use new common-dylan split implementation.
Removed match-groups, which has been replaced by groups-by-position.
Modified: trunk/libraries/regular-expressions/gd-library.dylan
==============================================================================
--- trunk/libraries/regular-expressions/gd-library.dylan (original)
+++ trunk/libraries/regular-expressions/gd-library.dylan Tue Feb 19 00:54:48 2008
@@ -31,59 +31,53 @@
//
//======================================================================
-
-// Added regex module with new API. --cgay, June 2007
-
define library regular-expressions
use common-dylan;
use string-extensions;
use table-extensions;
export
- regex, // new API
- regular-expressions; // old API
+ regular-expressions,
+ regex-implementation;
end library regular-expressions;
-define module regex // new API module
+define module regular-expressions
create
- compile-regex,
- regex-search,
- regex-search-strings,
<regex>,
+ <regex-match>,
+ <match-group>,
+ <regex-error>,
<invalid-regex>,
- invalid-regex-pattern,
- <regex-match>, // results of a successful search
- regex-match-group,
- regex-match-group-count,
- group-start,
- group-end,
- group-text,
- <invalid-match-group>;
-end module regex;
+ <invalid-match-group>,
-define module regular-expressions // old API module
- create
- regexp-position, make-regexp-positioner,
- regexp-match,
- regexp-replace, make-regexp-replacer,
- translate, make-translator,
- split, make-splitter,
- join,
- <illegal-regexp>,
- regexp-pattern;
+ // Compiling and accessing regex info
+ compile-regex,
+ regex-group-count,
+ regex-pattern,
- create
- split-string;
+ // Search and replace
+ regex-search,
+ regex-search-strings,
+ regex-position,
+ regex-replace,
+
+ // Accessing match groups and individual group info
+ match-group,
+ match-groups,
+ group-text,
+ group-start,
+ group-end;
end module regular-expressions;
-define module regular-expressions-impl
- use common-dylan,
- exclude: { split };
+define module regex-implementation
+ use common-dylan;
use string-conversions;
use character-type;
use string-hacking;
use %do-replacement;
use %parse-string;
use substring-search;
- use regular-expressions; // API module
- use regex;
-end module regular-expressions-impl;
+ use regular-expressions,
+ export: all;
+ export
+ <mark>;
+end module regex-implementation;
Modified: trunk/libraries/regular-expressions/gd-regular-expressions.lid
==============================================================================
--- trunk/libraries/regular-expressions/gd-regular-expressions.lid (original)
+++ trunk/libraries/regular-expressions/gd-regular-expressions.lid Tue Feb 19 00:54:48 2008
@@ -4,4 +4,4 @@
match
parse
interface
- regex
+
Modified: trunk/libraries/regular-expressions/interface.dylan
==============================================================================
--- trunk/libraries/regular-expressions/interface.dylan (original)
+++ trunk/libraries/regular-expressions/interface.dylan Tue Feb 19 00:54:48 2008
@@ -1,14 +1,13 @@
-module: regular-expressions
+module: regex-implementation
author: Nick Kramer (nkramer at cs.cmu.edu)
-synopsis: This provides a useable interface for users. Functions
- defined outside this file are really too strange and quirky
- to be of use to people.
+ Carl Gay (changed everything except regex-position)
+synopsis: The regular-expressions API, insofar as it can be separated into one file.
copyright: see below
//======================================================================
//
// Copyright (c) 1994 Carnegie Mellon University
-// Copyright (c) 1998, 1999, 2000 Gwydion Dylan Maintainers
+// Copyright (c) 1998-2008 Gwydion Dylan Maintainers
// All rights reserved.
//
// Use and copying of this software and preparation of derivative
@@ -32,302 +31,447 @@
//
//======================================================================
-// There are quite a few make-fooer functions hanging around. Now
-// that regex-position does caching, these are basically useless, but
-// we've kept them around for backwards compatibility. Unfortunately,
-// internally most of the functions are implemented in terms of
-// make-regex-positioner. To minimize the amount of rewriting, I've
-// liberally applied seals and inline declarations so that
-// make-regex-positioner won't clobber all type information. The
-// downside, of course, is that everything's sealed, but hey, no one
-// ever subclassed regex-position anyway.
+//// Caching
-
-// Caching
-//
// Parsing a regex is not cheap, so we cache the parsed regexs and
// only parse a string if we haven't seen it before. Because in
// practice almost all regex strings are string literals, we're free
// to choose == or = depending on whatever's fastest. However,
-// because a string is parsed differently depending on whether the
-// search is case sensitive or not, we also have to keep track of that
-// information as well. (The case dependent parse boils down to the
-// parse creating a <character-set>, which must be either case
-// sensitive or case insensitive)
-//
-
-// This caching scheme fails if we later introduce the ability to change
-// attributes such as case-sensitivity mid-parse, the way (I believe) perl
-// does? --cgay
-
-// ### Currently, only regex-position uses this cache, because the
-// other functions are still using make-regex-positioner. With
-// caching, that make-regex-whatever stuff should probably go.
-
-// <cache-key> -- internal
-//
-// What we use for keys in the *regex-cache*.
-//
-define class <cache-key> (<object>)
- constant slot regex-string :: <string>,
- required-init-keyword: #"regex-string";
- constant slot character-set-type :: <class>,
- required-init-keyword: #"character-set-type";
-end class <cache-key>;
-
-// <cache-element> -- internal
-//
-// What we use for elements in a *regex-cache*
+// because a string is parsed differently depending on the arguments
+// passed to compile-regex, we also have to keep track of that
+// information as well.
//
-define class <cache-element> (<object>)
- constant slot parse-tree :: <parsed-regex>,
- required-init-keyword: #"parse-tree";
- constant slot last-group :: <integer>,
- required-init-keyword: #"last-group";
-end class <cache-element>;
-// <regex-cache> -- internal
-//
-// Maps <cache-key> to <cache-element>. ### Ideally, we'd be using
-// weak pointers to these strings. In practice, however, most of the
-// regex strings are literals, so this isn't usually a drawback.
-//
-// This used to compare strings with == rather than =, but this leaks
-// lots of memory
-//
define class <regex-cache> (<table>) end;
-// table-protocol{<regex-cache>} -- method on imported G.F.
-//
-define method table-protocol (table :: <regex-cache>)
- => (equal? :: <function>, hash :: <function>);
- values(method (key1 :: <cache-key>, key2 :: <cache-key>) // equal?
- => res :: <boolean>;
- key1.regex-string = key2.regex-string
- & key1.character-set-type == key2.character-set-type;
- end method,
- method (key :: <cache-key>, initial-state) => (id :: <integer>, state); // hash()
- let (string-id, string-state) = object-hash(key.regex-string, initial-state);
- let (set-type-id, set-type-state)
- = object-hash(key.character-set-type, string-state);
- values(merge-hash-ids(string-id, set-type-id, ordered: #t), set-type-state);
- end method);
+define method table-protocol
+ (table :: <regex-cache>)
+ => (equal? :: <function>, hash :: <function>)
+ local method hash (key :: <list>, initial-state)
+ => (id :: <integer>, state)
+ let (id, state) = string-hash(head(key), initial-state);
+ for (boolean in tail(key))
+ let (next-id, next-state) = object-hash(boolean, state);
+ id := merge-hash-ids(id, next-id, ordered: #t);
+ state := next-state;
+ end;
+ values(id, state)
+ end;
+ values(\=, hash)
end method table-protocol;
-// *regex-cache* -- internal
-//
-// The only instance of <regex-cache>. ### Not threadsafe.
-//
// Technically not thread safe, but does it matter? Worst case seems to
// be a duplicated regex parse. --cgay
//
define constant *regex-cache* = make(<regex-cache>);
-// parse-or-use-cached -- internal
+// Compile the given string into an optimized regular expression.
//
-// Tries to use the cached version of the regex, and if not possible,
-// parses it and adds it to the cache.
-//
-define inline function parse-or-use-cached
- (regex :: <string>, parse-info :: <parse-info>)
- => (parsed-regex :: <parsed-regex>, last-group :: <integer>);
- let key = make(<cache-key>, regex-string: regex,
- character-set-type: parse-info.character-set-type);
- let cached-value = element(*regex-cache*, key, default: #f);
- if (cached-value)
- values(cached-value.parse-tree, cached-value.last-group);
+// @param case-sensitive -- Whether to be case sensitive when matching character
+// sets (e.g., [a-z]). This does not affect other character/string matching yet.
+// TODO -- but it should
+//
+// @param verbose -- If true, allows you to write regular expressions that
+// are easier to read by including whitespace and comments in them that
+// will be ignored.
+//
+// @param multi-line -- If true, '^' matches at the beginning of the string and
+// at the beginning of each line (immediately following each newline); and '$'
+// matches at the end of the string and at the end of each line (immediately
+// preceding each newline). By default, "^" matches only at the beginning of
+// the string, and "$" only at the end of the string.
+//
+// @param dot-matches-all -- Normally '.' matches any character except for
+// newline. If this parameter is true '.' matches newline as well.
+//
+// @param use-cache -- If true then check for a regex in the cache matching
+// the given set of arguments. If not found in the cache, compile it and
+// then add it to the cache (and return it).
+//
+// This function signals <invalid-regex> if the regular expression is invalid.
+//
+define sealed generic compile-regex
+ (pattern :: <string>,
+ #key case-sensitive :: <boolean> = #t,
+ verbose :: <boolean> = #f,
+ multi-line :: <boolean> = #f,
+ dot-matches-all :: <boolean> = #f,
+ use-cache :: <boolean> = #t)
+ => (regex :: <regex>);
+
+define method compile-regex
+ (pattern :: <string>,
+ #key case-sensitive :: <boolean> = #t,
+ verbose :: <boolean> = #f,
+ multi-line :: <boolean> = #f,
+ dot-matches-all :: <boolean> = #f,
+ use-cache :: <boolean> = #t)
+ => (regex :: <regex>)
+ if (use-cache)
+ let cache-key = list(pattern, case-sensitive, verbose, multi-line,
+ dot-matches-all);
+ element(*regex-cache*, cache-key, default: #f)
+ | begin
+ *regex-cache*[cache-key]
+ := compile-regex(pattern,
+ case-sensitive: case-sensitive,
+ verbose: verbose,
+ dot-matches-all: dot-matches-all,
+ use-cache: #f);
+ end
else
- let (parsed-regex, last-group) = parse(regex, parse-info);
- *regex-cache*[key] := make(<cache-element>,
- parse-tree: parsed-regex,
- last-group: last-group);
- values(parsed-regex, last-group);
- end if;
-end function parse-or-use-cached;
-
-
-// regex positioner stuff
+ parse(pattern,
+ make-parse-info(case-sensitive: case-sensitive,
+ verbose: verbose,
+ multi-line: multi-line,
+ dot-matches-all: dot-matches-all))
+ end
+end method compile-regex;
// Find the position of a regular expression inside a string. If the
// regex is not found, return #f, otherwise return a variable number
-// of marks.
-//
-define function regex-position
- (regex :: <string>, big :: <string>, #key start: big-start = 0,
- end: big-end = #f, case-sensitive = #f)
- => (regex-start :: false-or(<integer>), #rest marks :: false-or(<integer>));
- let substring = make(<substring>, string: big, start: big-start,
- end: big-end | big.size);
- let (parsed-regex, last-group)
- = parse-or-use-cached(regex, make-parse-info(case-sensitive: case-sensitive));
-
+// of marks. This is a low-level API, returning indices marking the
+// start and end of groups. Use regex-search if you want to get a
+// <regex-match> object back.
+//
+define generic regex-position
+ (regex :: <object>, big :: <string>,
+ #key start :: <integer>,
+ end: epos :: <integer>,
+ case-sensitive :: <boolean>)
+ => (regex-start :: false-or(<integer>), #rest marks);
+
+define method regex-position
+ (pattern :: <string>, string :: <string>,
+ #key start :: <integer> = 0,
+ end: epos :: <integer> = string.size,
+ case-sensitive :: <boolean> = #t)
+ => (regex-start :: false-or(<integer>), #rest marks :: false-or(<integer>))
+ regex-position(compile-regex(pattern), string, start: start, end: epos,
+ case-sensitive: case-sensitive)
+end method regex-position;
+
+define method regex-position
+ (regex :: <regex>, string :: <string>,
+ #key start :: <integer> = 0,
+ end: epos :: <integer> = string.size,
+ case-sensitive :: <boolean> = #t)
+ => (regex-start :: false-or(<integer>), #rest marks :: false-or(<integer>))
+ let substring = make(<substring>, string: string, start: start, end: epos);
let (matched, marks)
- = if (parsed-regex.is-anchored?)
- anchored-match-root?(parsed-regex, substring, case-sensitive,
- last-group + 1, #f);
+ = if (regex.is-anchored?)
+ let searcher = #f;
+ anchored-match-root?(regex, substring, case-sensitive,
+ regex.regex-group-count, searcher);
else
- let initial = parsed-regex.initial-substring;
+ let initial = regex.initial-substring;
let searcher = ~initial.empty?
& make-substring-positioner(initial, case-sensitive: case-sensitive);
- match-root?(parsed-regex, substring, case-sensitive, last-group + 1,
+ match-root?(regex, substring, case-sensitive, regex.regex-group-count,
searcher);
end if;
if (matched)
- apply(values, marks);
+ apply(values, marks)
else
- #f
- end if;
-end function regex-position;
-
-// Once upon a time, this was how you interfaced to the NFA stuff
-// (maximum-compile: #t). That's gone. Now it's just here for
-// backwards compatibility. All keywords except case-sensitive are
-// now ignored.
+ #f
+ end
+end method regex-position;
+
+// Deprecated. Use curry(regex-position, regex) or a local method instead.
//
define inline function make-regex-positioner
- (regex :: <string>,
- #key byte-characters-only = #f, need-marks = #t, maximum-compile = #f,
- case-sensitive = #f)
- => regex-positioner :: <function>;
- method (big :: <string>, #key start: big-start = 0,
- end: big-end = #f)
+ (regex :: type-union(<string>, <regex>),
+ #key case-sensitive :: <boolean> = #t)
+ => (regex-positioner :: <function>)
+ method (string :: <string>,
+ #key start :: <integer> = 0,
+ end: epos :: <integer> = string.size)
=> (regex-start :: false-or(<integer>),
- #rest marks :: false-or(<integer>));
- regex-position(regex, big, case-sensitive: case-sensitive,
- start: big-start, end: big-end);
+ #rest marks :: false-or(<integer>))
+ regex-position(regex, string,
+ case-sensitive: case-sensitive,
+ start: start,
+ end: epos);
end method;
end function make-regex-positioner;
-
-// Functions based on regex-position
-
-define function regex-replace
- (input :: <string>, regex :: <string>, new-substring :: <string>,
- #key count = #f, case-sensitive = #f, start = 0, end: input-end = #f)
- => changed-string :: <string>;
+define generic regex-replace
+ (regex :: <object>, big :: <string>, new-substring :: <string>,
+ #key start :: <integer>,
+ end: epos :: <integer>,
+ count :: false-or(<integer>),
+ case-sensitive :: <boolean>)
+ => (new-string :: <string>);
+
+define method regex-replace
+ (regex :: <string>, big :: <string>, new-substring :: <string>,
+ #key count :: false-or(<integer>),
+ start :: <integer> = 0,
+ end: epos :: <integer> = big.size,
+ case-sensitive :: <boolean> = #t)
+ => (new-string :: <string>)
+ regex-replace(compile-regex(regex), big, new-substring,
+ start: start,
+ end: epos,
+ count: count,
+ case-sensitive: case-sensitive)
+end method regex-replace;
+
+define method regex-replace
+ (regex :: <regex>, big :: <string>, new-substring :: <string>,
+ #key count :: false-or(<integer>),
+ start :: <integer> = 0,
+ end: epos :: <integer> = big.size,
+ case-sensitive :: <boolean> = #t)
+ => (new-string :: <string>)
let positioner
= make-regex-positioner(regex, case-sensitive: case-sensitive);
- do-replacement(positioner, new-substring, input, start,
- input-end, count, #t);
-end function regex-replace;
-
-define inline function make-regex-replacer
- (regex :: <string>, #key replace-with, case-sensitive = #f)
- => replacer :: <function>;
- let positioner
- = make-regex-positioner(regex, case-sensitive: case-sensitive);
- if (replace-with)
- method (input :: <string>, #key count: count,
- start = 0, end: input-end = #f)
- => string :: <string>;
- do-replacement(positioner, replace-with, input, start,
- input-end, count, #t);
- end method;
- else
- method (input :: <string>, new-substring :: <string>,
- #key count = #f, start = 0, end: input-end = #f)
- => string :: <string>;
- do-replacement(positioner, new-substring, input,
- start, input-end, count, #t);
- end method;
- end if;
-end function make-regex-replacer;
-
-// Like Perl's split function
-//
-define function split
- (input :: <string>, pattern :: <string>,
- #key count = #f, remove-empty-items = #t, start = 0, end: input-end = #f)
- => (strings :: <sequence>);
- let positioner = make-regex-positioner(pattern);
- split-string(positioner, input, start, input-end | size(input),
- count, remove-empty-items);
-end function split;
-
-define inline function make-splitter
- (pattern :: <string>) => splitter :: <function>;
- let positioner = make-regex-positioner(pattern);
- method (string :: <string>, #key count = #f,
- remove-empty-items = #t, start = 0, end: input-end = #f)
- => (#rest whole-bunch-of-strings :: <string>);
- split-string(positioner, string, start, input-end | size(string),
- count, remove-empty-items);
- end method;
-end function make-splitter;
+ do-replacement(positioner, new-substring, big, start,
+ epos, count, #t);
+end method regex-replace;
+
+// todo -- Improve error message for <invalid-match-group> errors.
+// Make %s and %= display the regex elided if it's too long.
-// Used by split. Not exported. (Yes it is. --cgay)
-//
-define function split-string
- (positioner :: <function>, input :: <string>, start :: <integer>,
- input-end :: <integer>, count :: false-or(<integer>),
- remove-empty-items :: <object>)
- => (strings :: <sequence>);
- let strings = make(<deque>);
- block (done)
- let end-of-last-match = 0;
- let start-of-where-to-look = start;
- let string-number = 1; // Since count: starts at 1, so
- // should string-number
- while (#t)
- let (substring-start, substring-end)
- = positioner(input, start: start-of-where-to-look, end: input-end);
- if (~substring-start | (count & (count <= string-number)))
- push-last(strings, copy-sequence(input, start: end-of-last-match));
- done();
- elseif ((substring-start = start-of-where-to-look)
- & remove-empty-items)
- // delimited item is empty
- end-of-last-match := substring-end;
- start-of-where-to-look := end-of-last-match;
+
+
+// Returns a <regex-match> containing info about a successful match, or #f if
+// no match was found.
+//
+// @param big -- The string in which to search.
+// @param pattern -- The pattern to search for. If not a <regex>, it will be
+// compiled first with compile-regex (implying that <invalid-regex> may be
+// signalled), using the defaults for the keyword arguments. If you wish
+// to override them, call compile-regex directly.
+// @param anchored -- Whether or not the search should be anchored at the start
+// position. This is useful because "^..." will only match at the beginning
+// of a string, or after \n if the regex was compiled with multi-line = #t.
+// @param start -- Where to begin the search.
+// @param end -- Where to stop searching.
+// @param case-sensitive -- Whether to be case-sensitive while matching. Default
+// is #t. (I don't believe this affects character set (e.g., [a-z]) matching.
+// Check it.)
+//
+// todo -- Should $ anchor at the provided end position or at the end of the string?
+//
+define sealed generic regex-search
+ (pattern :: <object>, string :: <string>,
+ #key anchored :: <boolean>,
+ start :: <integer>,
+ end: epos :: <integer>,
+ case-sensitive :: <boolean>)
+ => (match :: false-or(<regex-match>));
+
+define method regex-search
+ (pattern :: <string>, string :: <string>,
+ #key anchored :: <boolean> = #f,
+ start :: <integer> = 0,
+ end: epos :: <integer> = string.size,
+ case-sensitive :: <boolean> = #t)
+ => (match :: false-or(<regex-match>))
+ regex-search(compile-regex(pattern), string,
+ anchored: anchored,
+ start: start,
+ end: epos,
+ case-sensitive: case-sensitive)
+end method regex-search;
+
+define method regex-search
+ (pattern :: <regex>, string :: <string>,
+ #key anchored :: <boolean> = #f,
+ start :: <integer> = 0,
+ end: epos :: <integer> = string.size,
+ case-sensitive :: <boolean> = #t)
+ => (match :: false-or(<regex-match>))
+ let substring = make(<substring>, string: string, start: start, end: epos);
+ let num-groups = pattern.regex-group-count;
+ let (matched?, marks)
+ = if (pattern.is-anchored?)
+ anchored-match-root?(pattern, substring, case-sensitive, num-groups, #f);
else
- let new-string = copy-sequence(input, start: end-of-last-match,
- end: substring-start);
- if (~new-string.empty? | ~remove-empty-items)
- push-last(strings, new-string);
- string-number := string-number + 1;
- end-of-last-match := substring-end;
- start-of-where-to-look := end-of-last-match;
- end if;
+ let initial = pattern.initial-substring;
+ let searcher = ~initial.empty?
+ & make-substring-positioner(initial, case-sensitive: case-sensitive);
+ match-root?(pattern, substring, case-sensitive, num-groups, searcher);
end if;
- end while;
- end block;
- if (remove-empty-items)
- remove!(strings, #f, test: method (a, b) a.empty? end);
+ if (matched?)
+ let regex-match = make(<regex-match>, regular-expression: pattern);
+ let group-number-to-name :: <table> = pattern.group-number-to-name;
+ for (index from 0 below marks.size by 2)
+ let group-number = floor/(index, 2);
+ let group-name = element(group-number-to-name, group-number, default: #f);
+ let bpos = marks[index];
+ let epos = marks[index + 1];
+ if (bpos & epos)
+ add-group(regex-match,
+ make(<match-group>,
+ text: copy-sequence(string, start: bpos, end: epos),
+ start: bpos,
+ end: epos),
+ group-name);
+ else
+ // This group wasn't matched.
+ add-group(regex-match, #f, group-name);
+ end;
+ end;
+ regex-match
else
- strings
- end if;
-end function split-string;
-
-// join--like Perl's join
-//
-// This is not really any more efficient than concatenate-as, but it's
-// more convenient.
-//
-define function join (delimiter :: <byte-string>, #rest strings)
- => big-string :: <byte-string>;
- let length = max(0, (strings.size - 1 ) * delimiter.size);
- for (string in strings)
- length := length + string.size;
- end for;
- let big-string = make(<byte-string>, size: length);
- let big-index = 0;
- for (i from 0 to strings.size - 2) // Don't iterate over the last string
- let string = strings[i];
- let new-index = big-index + string.size;
- big-string := replace-subsequence!(big-string, string,
- start: big-index, end: new-index);
- big-index := new-index;
- let new-index = big-index + delimiter.size;
- big-string := replace-subsequence!(big-string, delimiter,
- start: big-index, end: new-index);
- big-index := new-index;
- end for;
- if (strings.size > 0)
- big-string
- := replace-subsequence!(big-string, strings.last,
- start: big-index, end: big-string.size);
- end if;
- big-string;
-end function join;
-
+ #f
+ end
+end method regex-search;
+
+// Like regex-search, but returns a string or #f for each group in the regular
+// expression, instead of a <regex-match>.
+define sealed generic regex-search-strings
+ (pattern :: <object>, string :: <string>,
+ #key anchored :: <boolean>,
+ start :: <integer>,
+ end: epos :: <integer>,
+ case-sensitive :: <boolean>)
+ => (#rest strings);
+
+define method regex-search-strings
+ (pattern :: <string>, string :: <string>,
+ #key anchored :: <boolean> = #f,
+ start :: <integer> = 0,
+ end: epos :: <integer> = string.size,
+ case-sensitive :: <boolean> = #t)
+ => (#rest strings)
+ regex-search-strings(compile-regex(pattern), string,
+ anchored: anchored,
+ start: start,
+ end: epos,
+ case-sensitive: case-sensitive)
+end method regex-search-strings;
+
+define method regex-search-strings
+ (pattern :: <regex>, string :: <string>,
+ #key anchored :: <boolean> = #f,
+ start :: <integer> = 0,
+ end: epos :: <integer> = string.size,
+ case-sensitive :: <boolean> = #t)
+ => (#rest strings)
+ let match = regex-search(pattern, string,
+ anchored: anchored,
+ start: start,
+ end: epos,
+ case-sensitive: case-sensitive);
+ if (match)
+ apply(values, map(method (group) group & group.group-text end,
+ match.groups-by-position))
+ else
+ #f
+ end
+end method regex-search-strings;
+
+define sealed class <match-group> (<object>)
+ constant slot group-text :: <string>,
+ required-init-keyword: text:;
+ constant slot group-start :: <integer>,
+ required-init-keyword: start:;
+ constant slot group-end :: <integer>,
+ required-init-keyword: end:;
+end class <match-group>;
+
+define sealed class <regex-match> (<object>)
+ // Groups by position. Zero is the entire match.
+ constant slot groups-by-position :: <stretchy-vector> = make(<stretchy-vector>);
+ // Named groups, if any. Initial size 0 on the assumption that most regular
+ // expressions won't use named groups.
+ constant slot groups-by-name :: <string-table> = make(<string-table>, size: 0);
+ constant slot regular-expression :: <regex>, required-init-keyword: regular-expression:;
+end class <regex-match>;
+
+define method add-group
+ (match :: <regex-match>,
+ group :: false-or(<match-group>),
+ name :: false-or(<string>))
+ => (match :: <regex-match>)
+ add!(match.groups-by-position, group);
+ if (name)
+ match.groups-by-name[name] := group;
+ end;
+ match
+end;
+
+define sealed class <invalid-match-group> (<regex-error>)
+end class <invalid-match-group>;
+
+// This has methods for group :: <string> and group :: <integer>.
+// Group zero is always the entire match.
+//
+define sealed generic match-group
+ (match :: <regex-match>, group :: <object>)
+ => (text :: false-or(<string>),
+ start-index :: false-or(<integer>),
+ end-index :: false-or(<integer>));
+
+define method match-group
+ (match :: <regex-match>, group-number :: <integer>)
+ => (text :: false-or(<string>),
+ start-index :: false-or(<integer>),
+ end-index :: false-or(<integer>))
+ if (0 <= group-number & group-number < match.groups-by-position.size)
+ let group = match.groups-by-position[group-number];
+ if (group)
+ values(group.group-text, group.group-start, group.group-end)
+ else
+ values(#f, #f, #f)
+ end
+ else
+ let ng = match.groups-by-position.size;
+ signal(make(<invalid-match-group>,
+ format-string: "Group number %d is out of bounds for regex %s match. %s",
+ format-arguments: list(group-number,
+ match.regular-expression.regex-pattern,
+ if (ng == 1)
+ "There is only 1 group."
+ else
+ format-to-string("There are %d groups.", ng)
+ end)));
+ end;
+end method match-group;
+
+define method match-group
+ (match :: <regex-match>, group :: <string>)
+ => (text :: false-or(<string>),
+ start-index :: false-or(<integer>),
+ end-index :: false-or(<integer>))
+ let group = element(match.groups-by-name, group, default: #f);
+ if (group)
+ values(group.group-text, group.group-start, group.group-end)
+ else
+ signal(make(<invalid-match-group>,
+ format-string: "There is no group named %=.",
+ format-arguments: list(group)));
+ end
+end method match-group;
+
+
+//// Utilities
+
+// The split method is exported from the common-dylan module.
+//
+define method split
+ (string :: <string>, separator :: <regex>,
+ #key start :: <integer> = 0,
+ end: epos :: <integer> = string.size,
+ count :: <integer> = epos + 1,
+ case-sensitive :: <boolean> = #t,
+ remove-if-empty :: <boolean> = #f)
+ => (parts :: <sequence>)
+ local method find-regex (string :: <string>,
+ bpos :: <integer>,
+ epos :: false-or(<integer>))
+ let match = regex-search(separator, string, start: bpos, end: epos);
+ if (match)
+ let (ignore, match-start, match-end) = match-group(match, 0);
+ values(match-start, match-end)
+ else
+ #f
+ end
+ end method find-regex;
+ split(string, find-regex, start: start, end: epos, count: count,
+ remove-if-empty: remove-if-empty)
+end method split;
Modified: trunk/libraries/regular-expressions/match.dylan
==============================================================================
--- trunk/libraries/regular-expressions/match.dylan (original)
+++ trunk/libraries/regular-expressions/match.dylan Tue Feb 19 00:54:48 2008
@@ -1,4 +1,4 @@
-module: regular-expressions
+module: regex-implementation
author: Nick Kramer (nkramer at cs.cmu.edu)
synopsis: This takes a parsed regular expression and tries to find a match
for it.
Modified: trunk/libraries/regular-expressions/od-library.dylan
==============================================================================
--- trunk/libraries/regular-expressions/od-library.dylan (original)
+++ trunk/libraries/regular-expressions/od-library.dylan Tue Feb 19 00:54:48 2008
@@ -31,57 +31,58 @@
//
//======================================================================
-// Revamped. --cgay Dec 2007
-
define library regular-expressions
+ use dylan;
use common-dylan;
use string-extensions;
- use io,
- import: { format-out }; // for debugging only
export
- regular-expressions;
-end;
+ regular-expressions,
+ regex-implementation;
+end library regular-expressions;
define module regular-expressions
- use common-dylan,
- exclude: {
- split // todo -- just add a method to this one
- };
- use format-out; // for debugging only
+ create
+ <regex>,
+ <regex-match>,
+ <match-group>,
+ <regex-error>,
+ <invalid-regex>,
+ <invalid-match-group>,
+
+ // Compiling and accessing regex info
+ compile-regex,
+ regex-group-count,
+ regex-pattern,
+
+ // Search and replace
+ regex-search,
+ regex-search-strings,
+ regex-position,
+ regex-replace,
+
+ // Accessing match groups and individual group info
+ match-group,
+ groups-by-position,
+ groups-by-name,
+ group-text,
+ group-start,
+ group-end;
+end module regular-expressions;
+
+define module regex-implementation
+ use common-dylan;
+ use dylan-extensions,
+ import: { values-hash, string-hash, gefiltafishk };
use string-conversions;
use character-type;
use string-hacking;
use %do-replacement;
use %parse-string;
use substring-search;
+ use regular-expressions,
+ export: all;
export
- compile-regex,
- <regex>,
- regex-search,
- regex-search-strings,
- regex-group-count,
- regex-position,
- make-regex-positioner,
- regex-replace,
- make-regex-replacer,
- <regex-error>,
- <invalid-regex>,
- regex-pattern,
- <regex-match>, // results of a successful search
- <match-group>,
- groups-by-position,
- groups-by-name,
- match-group,
- match-groups,
- group-start,
- group-end,
- group-text,
- <invalid-match-group>,
-
- split,
- make-splitter,
- join;
- export
- split-string; // ???
-end module regular-expressions;
-
+ // extra exports for the test suite to use
+ <mark>,
+ *regex-cache*;
+end module regex-implementation;
Modified: trunk/libraries/regular-expressions/od-regular-expressions.lid
==============================================================================
--- trunk/libraries/regular-expressions/od-regular-expressions.lid (original)
+++ trunk/libraries/regular-expressions/od-regular-expressions.lid Tue Feb 19 00:54:48 2008
@@ -3,4 +3,4 @@
match.dylan
parse.dylan
interface.dylan
- regex.dylan
+
Modified: trunk/libraries/regular-expressions/parse.dylan
==============================================================================
--- trunk/libraries/regular-expressions/parse.dylan (original)
+++ trunk/libraries/regular-expressions/parse.dylan Tue Feb 19 00:54:48 2008
@@ -1,4 +1,4 @@
-module: regular-expressions
+module: regex-implementation
author: Nick Kramer (nkramer at cs.cmu.edu)
copyright: see below
@@ -119,18 +119,18 @@
define class <regex-error> (<format-string-condition>, <error>)
end class <regex-error>;
-define class <illegal-regex> (<regex-error>)
+define class <invalid-regex> (<regex-error>)
constant slot regex-pattern :: <string>,
required-init-keyword: #"pattern";
-end class <illegal-regex>;
+end class <invalid-regex>;
-define sealed domain make (singleton(<illegal-regex>));
-define sealed domain initialize (<illegal-regex>);
+define sealed domain make (singleton(<invalid-regex>));
+define sealed domain initialize (<invalid-regex>);
define function parse-error
(pattern :: <string>, format-string :: <string>, #rest format-args)
let msg = apply(format-to-string, format-string, format-args);
- signal(make(<illegal-regex>,
+ signal(make(<invalid-regex>,
format-string: "Invalid regular expression: %=. %s",
format-arguments: list(pattern, msg),
pattern: pattern));
@@ -182,8 +182,12 @@
slot has-quantifiers? :: <boolean> = #f;
slot current-group-number :: <integer> = 0;
constant slot group-number-to-name :: <table> = make(<table>);
- constant slot character-set-type :: <class>,
- required-init-keyword: #"set-type";
+
+ // Currently this is only used for character sets (e.g., [a-zA-z]).
+ // It could also be used to generate case-insensitive parsed-characters
+ // and parsed-strings, but right now you get that by passing
+ // case-insensitive: #t to the match function.
+ slot case-sensitive? :: <boolean> = #t;
// If true then . matches \n. (?s) /s
slot dot-matches-all? :: <boolean>,
@@ -202,6 +206,7 @@
// These setters will be used eventually, when we implement the ability to change
// them via subpatterns like (?i). Until then, this prevents warnings.
begin
+ case-sensitive?-setter;
dot-matches-all?-setter;
extended?;
extended?-setter;
@@ -217,18 +222,22 @@
=> (info :: <parse-info>)
verbose & not-yet-implemented("'verbose' option");
multi-line & not-yet-implemented("'multi-line' option");
- let char-set-type = if (case-sensitive)
- <case-sensitive-character-set>
- else
- <case-insensitive-character-set>
- end;
make(<parse-info>,
- set-type: char-set-type,
+ case-sensitive: case-sensitive,
verbose: verbose,
multi-line: multi-line,
dot-matches-all: dot-matches-all)
end function make-parse-info;
+define inline method character-set-type
+ (info :: <parse-info>) => (set-type :: subclass(<character-set>))
+ if (info.case-sensitive?)
+ <case-sensitive-character-set>
+ else
+ <case-insensitive-character-set>
+ end
+end method character-set-type;
+
define method has-named-group?
(info :: <parse-info>, name :: <string>)
member?(name, info.group-number-to-name, test: \=)
Added: trunk/libraries/regular-expressions/tests/api.dylan
==============================================================================
--- (empty file)
+++ trunk/libraries/regular-expressions/tests/api.dylan Tue Feb 19 00:54:48 2008
@@ -0,0 +1,259 @@
+Module: regular-expressions-test-suite
+
+define library-spec regular-expressions-api ()
+ module regular-expressions;
+end library-spec regular-expressions-api;
+
+define module-spec regular-expressions ()
+ sealed instantiable class <regex> (<mark>);
+ sealed instantiable class <regex-error> (<format-string-condition>, <error>);
+ sealed instantiable class <invalid-regex> (<regex-error>);
+ sealed instantiable class <invalid-match-group> (<regex-error>);
+ sealed instantiable class <match-group> (<object>);
+ sealed instantiable class <regex-match> (<object>);
+
+ // Compiling and accessing regex info
+ sealed generic-function compile-regex
+ (<string>, #"key", #"case-sensitive", #"dot-matches-all", #"verbose", #"multi-line")
+ => (<regex>);
+ sealed generic-function regex-pattern (<regex>) => (<string>);
+ sealed generic-function regex-group-count
+ (<regex>) => (<integer>);
+
+ // Search and replace
+ sealed generic-function regex-position
+ (<object>, <string>, #"key", #"start", #"end", #"case-sensitive")
+ => (false-or(<string>), #"rest");
+ sealed generic-function regex-replace
+ (<object>, <string>, <string>, #"key", #"start", #"end", #"case-sensitive", #"count")
+ => (<string>);
+ sealed generic-function regex-search
+ (<object>, <string>, #"key", #"anchored", #"start", #"end")
+ => (false-or(<regex-match>));
+ sealed generic-function regex-search-strings
+ (<object>, <string>, #"key", #"anchored", #"start", #"end")
+ => (false-or(<regex-match>));
+
+ // Accessing match groups
+ sealed generic-function groups-by-position
+ (<regex-match>) => (<sequence>);
+ sealed generic-function groups-by-name
+ (<regex-match>) => (<sequence>);
+ sealed generic-function match-group
+ (<regex-match>) => (false-or(<string>), false-or(<integer>), false-or(<integer>));
+
+ // Accessing individual group data
+ sealed generic-function group-text
+ (<match-group>) => (false-or(<string>));
+ sealed generic-function group-end
+ (<match-group>) => (false-or(<integer>));
+ sealed generic-function group-start
+ (<match-group>) => (false-or(<integer>));
+end module-spec regular-expressions;
+
+define regular-expressions function-test regex-position ()
+ check-no-errors("regex-position with a string regex",
+ regex-position("pattern", "pattern"));
+ check-no-errors("regex-position with a regex regex",
+ regex-position(compile-regex("pattern"), "pattern"));
+ local method check-pos
+ (test-name :: <string>, regex :: <string>, big :: <string>,
+ positions :: <vector>, #rest args)
+ check-equal(test-name,
+ positions,
+ begin
+ let (#rest marks) = apply(regex-position, regex, big, args);
+ marks
+ end);
+ end method check-pos;
+
+ check-pos("pos test #1", "a*", "aaaaaaaaaa", #[0, 10]);
+ check-pos("pos test #2", "a*", "aaaaabaaaa", #[0, 5]);
+ check-pos("pos test #3", "ab*(cd|e)", "acd", #[0, 3, 1, 3]);
+ check-pos("pos test #4", "ab*(cd|e)", "abbbbe", #[0, 6, 5, 6]);
+ check-pos("pos test #5", "ab*(cd|e)", "ab", #[#f]);
+
+ check-pos("pos test #6", "^a$", "aaaaaaaaaaaaaa", #[#f]);
+ check-pos("pos test #7", "^a$", "a", #[0, 1]);
+ check-pos("pos test #8", "(^a$)|aba", "abba", #[#f]);
+ check-pos("pos test #9", "(^a$)|aba", "aba", #[0, 3, #f, #f]);
+
+ check-pos("pos test #a",
+ "\\bthe rain (in){1,5} spain$",
+ "the rain in spain",
+ #[0, 17, 9, 11]);
+ check-pos("pos test #b",
+ "\\bthe rain (in){1,5} spain$",
+ "the rain spain",
+ #[#f]);
+ check-pos("pos test #c",
+ "\\bthe rain (in){1,5} spain$",
+ "the rain ininin spain",
+ #[0, 21, 13, 15]);
+ check-pos("pos test #d",
+ "\\bthe rain (in){1,5} spain$",
+ "bork the rain in spain",
+ #[5, 22, 14, 16]);
+ check-pos("pos test #e",
+ "\\bthe rain (in){1,5} spain$",
+ "the rain in spainland",
+ #[#f]);
+ check-pos("pos test #f",
+ "\\bthe rain (in){1,5} spain$",
+ "blathe rain in spain",
+ #[#f]);
+ check-pos("pos test #g",
+ "\\bthe rain (in){1,5} spain$",
+ "the rain ininininin spain",
+ #[0, 25, 17, 19]);
+ check-pos("pos test #h",
+ "\\bthe rain (in){1,5} spain$",
+ "the rain inininininin spain",
+ #[#f]);
+ check-pos("pos test #i", "a*", "aaaaa", #[0, 5]);
+ check-pos("pos test #j", "a*", "a", #[0, 1]);
+ check-pos("pos test #k", "a*", "", #[0, 0]);
+ check-pos("pos test #L", "bba*c", "bbc", #[0, 3]);
+ check-pos("pos test #m", "a", "bbbb", #[#f]);
+ check-pos("pos test #n", "a*", "aaaaa", #[3, 4], start: 3, end: 4);
+ check-pos("pos test #o", "^a*", "aaaaa", #[2, 5], start: 2);
+ check-pos("pos test #p", "^a*", "baaaaa", #[2, 6], start: 2);
+ check-pos("pos test #q", "^a+", "bbbaaaaa", #[#f], start: 2);
+ check-pos("pos test #r", "a+", "AAaAA", #[0, 5], case-sensitive: #f);
+ check-pos("pos test #s", "a+", "AAaAA", #[2, 3]);
+ check-pos("pos test #t", "[a-f]+", "SdFbIeNvI", #[1, 2]);
+ // This one is failing due to bug 7371
+ check-pos("pos test #u", "[a-f]+", "SdFbIeNvI", #[1, 4], case-sensitve: #f);
+ check-pos("pos test #v", "[\\s\\]]+", "blah[ \t]", #[5, 10]);
+
+ // test escaped characters
+ check-pos("pos test #w", "\\\"", "\\\"", #[1, 2]);
+ check-pos("pos test #x", "\\\\\"", "\\\"", #[0, 2]);
+ check-condition("pos test #y",
+ <invalid-regex>,
+ compile-regex("((a*)|(b*))*c"));
+end function-test regex-position;
+
+define regular-expressions function-test regex-replace ()
+ let big-string = "The rain in spain and some other text";
+ check-no-errors("regex-replace with regex pattern",
+ regex-replace(compile-regex("the (.*) in (\\w*\\b)"),
+ big-string,
+ "\\2 has its \\1"));
+ check-equal("regex-replace #1",
+ regex-replace("the (.*) in (\\w*\\b)", big-string, "\\2 has its \\1"),
+ "spain has its rain and some other text");
+ check-equal("regex-replace #2",
+ regex-replace("in", big-string, "out"),
+ "The raout out spaout and some other text");
+ check-equal("regex-replace #3",
+ regex-replace("in", big-string, "out", count: 2),
+ "The raout out spain and some other text");
+ check-equal("regex-replace #4",
+ regex-replace("in", big-string, "out", start: 8, end: 15),
+ "The rain out spain and some other text");
+end function-test regex-replace;
+
+define regular-expressions function-test regex-group-count ()
+ //---*** Fill this in...
+end function-test regex-group-count;
+
+define regular-expressions function-test regex-search ()
+ // Test case-sensitive parameter
+ // See bug 7371
+ check-true("regex-search(..., case-sensitive: #f) works for character sets",
+ regex-search("[a-z]", "A", case-sensitive: #f));
+ check-true("regex-search(..., case-sensitive: #t) works on character sets",
+ regex-search("[a-z]", "A", case-sensitive: #f));
+ check-false("case-sensitive: #t works for regular strings",
+ regex-search("abc", "aBc", case-sensitive: #t));
+ check-true("case-sensitive: #f works for regular strings",
+ regex-search("abc", "ABC", case-sensitive: #f));
+end function-test regex-search;
+
+define regular-expressions function-test compile-regex ()
+ // Test caching
+ check-true("use-cache: #t uses the cache",
+ compile-regex("abc") == compile-regex("abc", use-cache: #t));
+ check-true("use-cache: #f doesn't use the cache",
+ compile-regex("abc") ~== compile-regex("abc", use-cache: #f));
+
+ // Test case-sensitive parameter
+ // Test verbose parameter
+ // Test multi-line parameter
+ // Test dot-matches-all parameter
+end function-test compile-regex;
+
+define regular-expressions class-test <regex> ()
+ //---*** Fill this in...
+end class-test <regex>;
+
+define sideways method make-test-instance
+ (class == <regex>) => (regex :: <regex>)
+ compile-regex("foo")
+end;
+
+define regular-expressions class-test <regex-error> ()
+ //---*** Fill this in...
+end class-test <regex-error>;
+
+define regular-expressions class-test <invalid-regex> ()
+ //---*** Fill this in...
+end class-test <invalid-regex>;
+
+define sideways method make-test-instance
+ (class == <invalid-regex>) => (error :: <invalid-regex>)
+ make(<invalid-regex>, pattern: "[unterminated")
+end;
+
+define regular-expressions function-test regex-search-strings ()
+ //---*** Fill this in...
+end function-test regex-search-strings;
+
+define regular-expressions class-test <invalid-match-group> ()
+ //---*** Fill this in...
+end class-test <invalid-match-group>;
+
+define regular-expressions function-test group-text ()
+ //---*** Fill this in...
+end function-test group-text;
+
+define regular-expressions function-test group-end ()
+ //---*** Fill this in...
+end function-test group-end;
+
+define regular-expressions function-test group-start ()
+ //---*** Fill this in...
+end function-test group-start;
+
+define regular-expressions function-test match-groups ()
+ //---*** Fill this in...
+end function-test match-groups;
+
+define regular-expressions function-test match-group ()
+ //---*** Fill this in...
+end function-test match-group;
+
+define regular-expressions class-test <match-group> ()
+ //---*** Fill this in...
+end class-test <match-group>;
+
+define sideways method make-test-instance
+ (class == <match-group>) => (group :: <match-group>)
+ make(<match-group>, text: "foo", start: 0, end: 3)
+end;
+
+define regular-expressions class-test <regex-match> ()
+ //---*** Fill this in...
+end class-test <regex-match>;
+
+define sideways method make-test-instance
+ (class == <regex-match>) => (match :: <regex-match>)
+ make(<regex-match>, regular-expression: compile-regex("foo"))
+end;
+
+define regular-expressions function-test regex-pattern ()
+ //---*** Fill this in...
+end function-test regex-pattern;
+
+
Modified: trunk/libraries/regular-expressions/tests/library.dylan
==============================================================================
--- trunk/libraries/regular-expressions/tests/library.dylan (original)
+++ trunk/libraries/regular-expressions/tests/library.dylan Tue Feb 19 00:54:48 2008
@@ -3,50 +3,37 @@
define library regular-expressions-test-suite
use common-dylan;
use io,
- import: {
- format-out, // for debugging only
- streams
- };
- use regular-expressions;
+ import: { streams };
use system,
- import: {
- file-system,
- locators,
- operating-system
- };
+ import: { file-system,
+ locators,
+ operating-system };
use strings;
use testworks;
+ use testworks-specs;
+ use regular-expressions,
+ import: { regex-implementation };
+
export
regular-expressions-test-suite;
end library regular-expressions-test-suite;
define module regular-expressions-test-suite
use common-dylan,
- rename: {
- format-to-string => sprintf // to long for 80 chars per line
- },
- exclude: {
- split
- };
- use regular-expressions;
+ rename: { format-to-string => sprintf };
+ use regex-implementation;
use file-system;
use locators,
- import: {
- <directory-locator>,
- <file-locator>,
- subdirectory-locator
- };
+ import: { <directory-locator>,
+ <file-locator>,
+ subdirectory-locator };
use operating-system,
- import: {
- environment-variable
- };
+ import: { environment-variable };
use testworks;
- use format-out; // for debugging only
+ use testworks-specs;
use streams;
use strings,
- import: {
- trim
- };
+ import: { trim };
export regular-expressions-test-suite;
end module regular-expressions-test-suite;
Modified: trunk/libraries/regular-expressions/tests/pcre.dylan
==============================================================================
--- trunk/libraries/regular-expressions/tests/pcre.dylan (original)
+++ trunk/libraries/regular-expressions/tests/pcre.dylan Tue Feb 19 00:54:48 2008
@@ -201,14 +201,14 @@
if (match)
check-equal(sprintf("Match '%s' against regex '%s' -- same # of groups",
test-string, pattern),
- size(match-groups(match)),
+ size(groups-by-position(match)),
pcre-groups.size);
for (group-number from 0,
pcre-group in pcre-groups)
// Adding block/exception here causes an infinite loop.
// Could it be related to using the Visual Studio 8 linker?
// The if also causes an infinite loop. Hmmm.
- let our-group = /* if (group-number < size(match-groups(match))) */
+ let our-group = /* if (group-number < size(groups-by-position(match))) */
match-group(match, group-number)
/* end */;
check-equal(sprintf("Match '%s' against regex '%s' -- group %d is the same",
Modified: trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.dylan
==============================================================================
--- trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.dylan (original)
+++ trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.dylan Tue Feb 19 00:54:48 2008
@@ -1,34 +1,7 @@
Module: regular-expressions-test-suite
Author: Carl Gay
-define function re/position (pattern, string, #rest args)
- let (#rest marks) = apply(regex-position, pattern, string, args);
- marks
-end function re/position;
-
-define test atom-test ()
- check-no-errors("atom-0", re/position("", ""));
- check-equal("atom-1", re/position("a", "a"), #[0, 1]);
- check-equal("atom-2", re/position("[a]", "a"), #[0, 1]);
- check-equal("atom-3", re/position("(a)b", "ab"), #[0, 2, 0, 1]);
- check-equal("atom-4", re/position("\\w", "a"), #[0, 1]);
- check-equal("atom-5", re/position(".", "a"), #[0, 1]);
- check-equal("atom-6", re/position("a{0}", "a"), #[0, 0]);
- check-equal("atom-7", re/position("a{2}", "aa"), #[0, 2]);
- check-equal("atom-8", re/position("a{1,}", "aa"), #[0, 2]);
- check-equal("atom-9", re/position("a{1,8}", "aaa"), #[0, 3]);
- check-equal("atom-A", re/position("a{,}", ""), #[0, 0]);
- check-equal("atom-A1", re/position("a{,}", "aaaaaa"), #[0, 6]);
- check-condition("atom-B", <invalid-regex>, re/position("a{m,n}", ""));
- check-condition("atom-C", <invalid-regex>, re/position("a{m,}", ""));
- check-condition("atom-D", <invalid-regex>, re/position("a{,n}", ""));
- check-condition("atom-E", <invalid-regex>, re/position("a{m}", ""));
- check-condition("atom-F", <invalid-regex>, re/position("a{,", ""));
- check-condition("atom-G", <invalid-regex>, re/position("[a", ""));
- check-condition("atom-H", <invalid-regex>, re/position("\\", ""));
- check-equal("atom-tan", "\<44>\<79>\<6c>\<61>\<6e>", "Dylan");
-end;
-
+// Helper function, e.g., check-matches("a(b|c)", "abc", "ab", "b")
// Note that flags must come at the end of groups-and-flags.
define function check-matches
(pattern, input-string, #rest groups-and-flags) => ()
@@ -58,6 +31,44 @@
end;
end function check-matches;
+define test split-test ()
+ let big-string = "The rain in spain and some other text";
+ check-equal("split #1",
+ split(big-string, compile-regex("\\s")),
+ #("The", "rain", "in", "spain", "and", "some", "other", "text"));
+ check-equal("split #2",
+ split(big-string, compile-regex("\\s"), count: 3),
+ #("The", "rain", "in spain and some other text"));
+ check-equal("split #3",
+ split(big-string, compile-regex("\\s"), start: 12),
+ #("spain", "and", "some", "other", "text"));
+ check-equal("split #4",
+ split(" Some text with lots of spaces ",
+ compile-regex("\\s"),
+ count: 3),
+ #("", "Some", " text with lots of spaces "));
+ check-equal("split #5",
+ split(" Some text with lots of spaces ",
+ compile-regex("\\s+")),
+ #("", "Some", "text", "with", "lots", "of", "spaces", ""));
+end test split-test;
+
+define test atom-test ()
+ check-matches("", "", "");
+ check-matches("a", "a", "a");
+ check-matches("[a]", "a", "a");
+ check-matches("(a)b", "ab", "ab", "a");
+ check-matches("\\w", "a", "a");
+ check-matches(".", "a", "a");
+ check-matches("a{0}", "a", "");
+ check-matches("a{2}", "aa", "aa");
+ check-matches("a{1,}", "aa", "aa");
+ check-matches("a{1,8}", "aaa", "aaa");
+ check-matches("a{1,2}", "aaa", "aa");
+ check-matches("a{,}", "", "");
+ check-matches("a{,}", "aaaaaa", "aaaaaa");
+end test atom-test;
+
// These are to cover the basics, as I add new features to the code or
// read through the pcrepattern docs. The PCRE tests should cover a lot
// of the more esoteric cases, I hope.
@@ -86,15 +97,25 @@
check-equal("start: and end: work?",
regex-search("a", "a b a", start: 1, end: 4),
#f);
+ check-equal("atom-tan", "\<44>\<79>\<6c>\<61>\<6e>", "Dylan");
end test ad-hoc-regex-test;
// All these regexes should signal <invalid-regex> on compilation.
//
define test invalid-regex-test ()
let patterns = #(
- "(?P<name>x)(?P<name>y)", // can't use same name twice
+ "(?P<foo>x)(?P<foo>y)", // can't use same group name twice
"(?@abc)", // invalid extended character '@'
- "(a)\\2" // invalid back reference
+ "(a)\\2", // invalid back reference
+ "a{m,n}",
+ "a{m,}",
+ "a{,n}",
+ "a{m}",
+ "a{,",
+ "[a",
+ "(",
+ "(()",
+ "((a)b|"
);
for (pattern in patterns)
check-condition(sprintf("Compiling '%s' gets an error", pattern),
@@ -116,10 +137,18 @@
end;
define suite regular-expressions-test-suite ()
+ test split-test;
test atom-test;
test ad-hoc-regex-test;
test invalid-regex-test;
test regressions-test;
+
+ // I've changed lots of things that make the gdref documentation
+ // out-of-date, but it still might be useful to look it over for
+ // test ideas.
+ //test gdref-documentation-test;
+
+ suite regular-expressions-api-test-suite;
// It's sometimes useful to use -ignore-suite to skip this one because it's so noisy.
suite pcre-test-suite;
end;
Modified: trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.lid
==============================================================================
--- trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.lid (original)
+++ trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.lid Tue Feb 19 00:54:48 2008
@@ -1,4 +1,5 @@
library: regular-expressions-test-suite
files: library
+ api
pcre
regular-expressions-test-suite
More information about the chatter
mailing list