[Gd-chatter] r11684 - in trunk/fundev/sources/common-dylan: . tests
cgay at gwydiondylan.org
cgay at gwydiondylan.org
Tue Feb 19 00:28:20 CET 2008
Author: cgay
Date: Tue Feb 19 00:28:19 2008
New Revision: 11684
Modified:
trunk/fundev/sources/common-dylan/common-extensions.dylan
trunk/fundev/sources/common-dylan/library.dylan
trunk/fundev/sources/common-dylan/tests/functions.dylan
Log:
job: 7372
New split implementation.
Modified: trunk/fundev/sources/common-dylan/common-extensions.dylan
==============================================================================
--- trunk/fundev/sources/common-dylan/common-extensions.dylan (original)
+++ trunk/fundev/sources/common-dylan/common-extensions.dylan Tue Feb 19 00:28:19 2008
@@ -154,52 +154,187 @@
end method position;
+// Split a sequence into parts at each occurrance of the 'separator'
+// and return a sequence containing the parts. The sequence is
+// searched from beginning to end for the given 'separator' and stops
+// when it reaches the end of 'sequence' or when the size of the
+// result reaches 'count' elements. The meaning of the 'start' and
+// 'end' parameters may differ for different methods, but the intent
+// is that it be the same as if you passed in the subsequence delimited
+// by 'start' and 'end'. See the individual methods for details.
+//
define open generic split
- (collection :: <string>, character :: <character>,
- #key start :: <integer>,
- end: _end :: <integer>,
- trim? :: <boolean>)
- => (strings :: <sequence>);
+ (sequence :: <sequence>, separator :: <object>,
+ #key start :: <integer>,
+ end: epos :: <integer>,
+ count :: <integer>,
+ remove-if-empty :: <boolean>)
+ => (parts :: <sequence>);
+
+// This is in some sense the most basic method, since others can be
+// implemented in terms of it. The 'separator' function must accept
+// three arguments: (1) the sequence in which to search for a
+// separator, (2) the start index in that sequence at which to begin
+// searching, and (3) the index at which to stop searching. The
+// 'separator' function must return #f to indicate that no separator
+// was found, or two values: the start and end indices of the
+// separator in the given sequence. The initial start and end
+// indices passed to the 'separator' function are the same as the
+// 'start' and 'end' arguments passed to this method. The
+// 'separator' function should stay within the given bounds whenever
+// possible. (In particular it may not always be possible when the
+// separator is a regex.)
+define method split
+ (seq :: <sequence>, find-separator :: <function>,
+ #key start :: <integer> = 0,
+ end: epos :: <integer> = seq.size,
+ count :: <integer> = epos + 1,
+ remove-if-empty :: <boolean> = #f)
+ => (parts :: <sequence>)
+ reverse!(iterate loop (bpos :: <integer> = start,
+ parts :: <list> = #(),
+ nparts :: <integer> = 1)
+ let (sep-start, sep-end) = find-separator(seq, bpos, epos);
+ if (sep-start & sep-end & (sep-end <= epos) & (nparts < count))
+ let part = copy-sequence(seq, start: bpos, end: sep-start);
+ let remove? = remove-if-empty & empty?(part);
+ loop(sep-end,
+ if (remove?) parts else pair(part, parts) end,
+ if (remove?) nparts else nparts + 1 end)
+ else
+ pair(copy-sequence(seq, start: bpos, end: epos), parts)
+ end
+ end)
+end method split;
-define sealed method split
- (string :: <byte-string>, character :: <byte-character>,
+// Splits seq around occurrances of the separator subsequence.
+// Works for the relatively common case where seq and separator
+// are both <string>s.
+define method split
+ (seq :: <sequence>, separator :: <sequence>,
#key start :: <integer> = 0,
- end: _end :: <integer> = string.size,
- trim? :: <boolean> = #t)
- => (strings :: <stretchy-object-vector>)
- let old-position :: <integer> = start;
- let end-position :: <integer> = _end;
- let new-position :: <integer> = old-position;
- let results :: <stretchy-object-vector> = make(<stretchy-object-vector>);
- local method add-substring
- (start :: <integer>, _end :: <integer>, #key last? :: <boolean> = #f) => ()
- if (trim?)
- while (start < _end & string[start] = ' ')
- start := start + 1
- end;
- while (start < _end & string[_end - 1] = ' ')
- _end := _end - 1
- end
- end;
- // Don't ever return just a single empty string
- if (~last? | start ~== _end | ~empty?(results))
- add!(results, copy-sequence(string, start: start, end: _end))
- end
- end method add-substring;
- while (new-position < end-position)
- if (string[new-position] = character)
- add-substring(old-position, new-position);
- new-position := new-position + 1;
- old-position := new-position
- else
- new-position := new-position + 1;
- end
- end;
- add-substring(old-position, new-position, last?: #t);
- results
+ end: epos :: <integer> = seq.size,
+ count :: <integer> = epos + 1,
+ test :: <function> = \==,
+ remove-if-empty :: <boolean> = #f)
+ => (parts :: <sequence>)
+ // Is there a function that does this already?
+ local method looking-at? (pattern :: <sequence>, big :: <sequence>,
+ bpos :: <integer>)
+ block (return)
+ let len :: <integer> = big.size;
+ for (thing in pattern, pos from bpos)
+ if (pos >= len | ~test(thing, big[pos]))
+ return(#f)
+ end if;
+ end for;
+ #t
+ end
+ end method looking-at?;
+ local method find-subseq (seq :: <sequence>,
+ bpos :: <integer>,
+ epos :: false-or(<integer>))
+ // Note that this only splits on the separator sequence if it is
+ // entirely contained between the start and end positions.
+ let epos :: <integer> = epos | seq.size;
+ let max-separator-start :: <integer> = epos - separator.size;
+ block (exit-loop)
+ for (seq-index from bpos to max-separator-start)
+ if (looking-at?(separator, seq, seq-index))
+ exit-loop(seq-index, seq-index + separator.size);
+ end;
+ end;
+ #f // separator not found
+ end
+ end;
+ split(seq, find-subseq, start: start, end: epos, count: count,
+ remove-if-empty: remove-if-empty)
end method split;
+// Split on a given object.
+// This handles the common (<string>, <character>) case.
+define method split
+ (seq :: <sequence>, separator :: <object>,
+ #key start :: <integer> = 0,
+ end: epos :: <integer> = seq.size,
+ count :: <integer> = epos + 1,
+ test :: <function> = \==,
+ remove-if-empty :: <boolean> = #f)
+ => (parts :: <sequence>)
+ local method find-pos (seq :: <sequence>,
+ bpos :: <integer>,
+ epos :: false-or(<integer>))
+ // Unfortunately the position function doesn't accept
+ // start and end parameters so we have to write our own.
+ block (exit-loop)
+ for (i from bpos below epos)
+ if (test(seq[i], separator))
+ exit-loop(i, i + 1)
+ end;
+ end;
+ #f
+ end block
+ end method;
+ split(seq, find-pos, start: start, end: epos, count: count,
+ remove-if-empty: remove-if-empty)
+end method split;
+// Join several sequences together, including a separator between each sequence.
+define open generic join
+ (items :: <sequence>, separator :: <sequence>, #key key, conjunction)
+ => (joined :: <sequence>);
+
+// join(range(from: 1, to: 3), ", ",
+// key: integer-to-string,
+// conjunction: " and ");
+// => "1, 2 and 3"
+
+define method join
+ (sequences :: <sequence>, separator :: <sequence>,
+ #key key :: <function> = identity,
+ conjunction :: false-or(<sequence>))
+ => (joined :: <sequence>)
+ let length :: <integer> = sequences.size;
+ if (length == 0)
+ error("Attempt to join an empty sequence.")
+ elseif (length == 1)
+ key(sequences[0])
+ else
+ let result-size :: <integer>
+ = (reduce(method (len, seq)
+ len + seq.size
+ end,
+ 0,
+ sequences)
+ + (separator.size * (length - 1))
+ + if (conjunction)
+ // the last separator is replaced by the conjunction
+ conjunction.size - separator.size
+ else
+ 0
+ end);
+ let first = key(sequences[0]); // don't call key > once on sequences[0]
+ let result = make(object-class(first), size: result-size);
+ let result-index :: <integer> = 0;
+ local method copy-to-result (seq :: <sequence>)
+ result := replace-subsequence!(result, seq, start: result-index);
+ result-index := result-index + seq.size;
+ end;
+ copy-to-result(first);
+ let max-index :: <integer> = length - 1;
+ for (i :: <integer> from 1 to max-index)
+ let seq :: <sequence> = sequences[i];
+ copy-to-result(if(conjunction & i == max-index)
+ conjunction
+ else
+ separator
+ end);
+ copy-to-result(key(seq));
+ end;
+ result
+ end if
+end method join;
+
define open generic find-element
(collection :: <collection>, predicate :: <function>,
#key skip :: <integer>, failure)
Modified: trunk/fundev/sources/common-dylan/library.dylan
==============================================================================
--- trunk/fundev/sources/common-dylan/library.dylan (original)
+++ trunk/fundev/sources/common-dylan/library.dylan Tue Feb 19 00:28:19 2008
@@ -91,6 +91,7 @@
difference,
position,
split,
+ join,
fill-table!,
find-element,
find-value,
Modified: trunk/fundev/sources/common-dylan/tests/functions.dylan
==============================================================================
--- trunk/fundev/sources/common-dylan/tests/functions.dylan (original)
+++ trunk/fundev/sources/common-dylan/tests/functions.dylan Tue Feb 19 00:28:19 2008
@@ -275,59 +275,119 @@
end function-test position;
define common-extensions function-test split ()
- check-equal("split on empty string",
- split("", '/'),
- #[]);
- check-equal("split on single character",
- split("a", '/'),
- #["a"]);
- check-equal("split on two characters",
- split("a/b", '/'),
- #["a", "b"]);
- check-equal("split on multiple single characters",
- split("aXbXcXdXeXfXg", 'X'),
- #["a", "b", "c", "d", "e", "f", "g"]);
- check-equal("split on single word",
- split("hello", '/'),
- #["hello"]);
- check-equal("split on two words",
- split("hello/world", '/'),
- #["hello", "world"]);
- check-equal("split on three words",
- split("majorXminorXbuild", 'X'),
- #["major", "minor", "build"]);
- check-equal("split on multiple words",
- split("x=100&y=200&width=30&height=10", '&'),
- #["x=100", "y=200", "width=30", "height=10"]);
- check-equal("split on single separator character",
- split("/", '/'),
- #["", ""]);
- check-equal("split on a/",
- split("a/", '/'),
- #["a", ""]);
- check-equal("split on /b",
- split("/b", '/'),
- #["", "b"]);
- check-equal("split with double separator",
- split("majorXXbuild", 'X'),
- #["major", "", "build"]);
- check-equal("split with spaces",
- split(" major X minor X build ", 'X'),
- #["major", "minor", "build"]);
- check-equal("no trim split with spaces",
- split(" major X minor X build ", 'X', trim?: #f),
- #[" major ", " minor ", " build "]);
- check-equal("split with start",
- split("123456789x123456789", 'x', start: 1),
- #["23456789", "123456789"]);
- check-equal("split with end",
- split("0123456789", '3', end: 8),
- #["012", "4567"]);
- check-equal("split with start and end",
- split("0123456789", '3', start: 2, end: 8),
- #["2", "4567"]);
+ // a character separator should act the same as a string separator that
+ // contains only that character...
+ for (separator in #('/', "/"))
+ local method fmt (name)
+ format-to-string("%s, sep = %=", name, separator);
+ end;
+ check-equal(fmt("split empty string"),
+ split("", separator),
+ #[""]);
+ check-equal(fmt("split single character"),
+ split("a", separator),
+ #["a"]);
+ check-equal(fmt("split two characters"),
+ split("a/b", separator),
+ #["a", "b"]);
+ check-equal(fmt("split multiple single characters"),
+ split("a/b/c/d/e/f/g", separator),
+ #["a", "b", "c", "d", "e", "f", "g"]);
+ check-equal(fmt("split single word"),
+ split("hello", separator),
+ #["hello"]);
+ check-equal(fmt("split two words"),
+ split("hello/world", separator),
+ #["hello", "world"]);
+ check-equal(fmt("split three words"),
+ split("major/minor/build", separator),
+ #["major", "minor", "build"]);
+ check-equal(fmt("split multiple words"),
+ split("x=100/y=200/width=30/height=10", separator),
+ #["x=100", "y=200", "width=30", "height=10"]);
+ check-equal(fmt("split only the separator character"),
+ split("/", separator),
+ #["", ""]);
+ check-equal(fmt("split a/"),
+ split("a/", separator),
+ #["a", ""]);
+ check-equal(fmt("split /b"),
+ split("/b", separator),
+ #["", "b"]);
+ check-equal(fmt("split with double separator"),
+ split("major//build", separator),
+ #["major", "", "build"]);
+ check-equal(fmt("split with spaces"),
+ split(" major / minor / build ", separator),
+ #[" major ", " minor ", " build "]);
+ check-equal(fmt("split with start"),
+ split("123456789/123456789", separator, start: 1),
+ #["23456789", "123456789"]);
+ check-equal(fmt("split with end"),
+ split("012/456789", separator, end: 8),
+ #["012", "4567"]);
+ check-equal(fmt("split with start and end"),
+ split("012/456789", separator, start: 2, end: 8),
+ #["2", "4567"]);
+ check-equal(fmt("split with count"),
+ split("1/2/3/4", separator, count: 2),
+ #["1", "2/3/4"]);
+ check-equal(fmt("split with count and start"),
+ split("1/2/3/4", separator, count: 2, start: 2),
+ #["2", "3/4"]);
+ check-equal(fmt("split with count and end"),
+ split("1/2/3/4", separator, count: 2, end: 5),
+ #["1", "2/3"]);
+ check-equal(fmt("split with count, start, and end"),
+ split("1/2/3/4", separator, count: 2, start: 2, end: 5),
+ #["2", "3"]);
+ check-equal(fmt("split with count = 1"),
+ split("a/b/c/d", separator, count: 1),
+ #["a/b/c/d"]);
+ end for;
+ check-equal("split with separator crossing start:",
+ split("xxx one xxx two xxx", "xxx", start: 1),
+ #["xx one ", " two ", ""]);
+ check-equal("split with separator crossing end:",
+ split("xxx one xxx two xxx", "xxx", end: 17),
+ #["", " one ", " two x"]);
+ check-equal("split with separator crossing start: and end:",
+ split("xxx one xxx two xxx", "xxx", start: 1, end: 17),
+ #["xx one ", " two x"]);
end function-test split;
+define common-extensions function-test join ()
+ let abc = #("a", "b", "c");
+ check-condition("join empty sequence is an error",
+ <error>,
+ join(#(), ", "));
+ check-equal("basic join",
+ "a, b, c",
+ join(abc, ", "));
+ check-equal("join of one element",
+ "singleton",
+ join(#("singleton"), " "));
+ check-equal("join with conjunction",
+ "a, b and c",
+ join(abc, ", ",
+ conjunction: " and "));
+ check-equal("join with key",
+ "1, 2, 3",
+ join(#(1, 2, 3), ", ",
+ key: integer-to-string));
+ check-equal("join with conjunction and key",
+ "1, 2 and 3",
+ join(#(1, 2, 3), ", ",
+ conjunction: " and ",
+ key: integer-to-string));
+ check-equal("non-string join",
+ #(1, #t, 2, #t, 3),
+ join(#(1, 2, 3), #(#t)));
+ check-equal("join with empty separator",
+ #(1, 2, 3),
+ join(#(1, 2, 3), #()))
+end function-test join;
+
define common-extensions function-test remove-all-keys! ()
//---*** Do all collections by using dylan-test-suite collection code
end function-test remove-all-keys!;
@@ -535,3 +595,4 @@
define simple-profiling function-test profiling-type-result ()
//---*** Fill this in...
end function-test profiling-type-result;
+
More information about the chatter
mailing list