[Gd-chatter] r11684 - in trunk/fundev/sources/common-dylan: . tests

cgay at gwydiondylan.org cgay at gwydiondylan.org
Tue Feb 19 00:28:20 CET 2008


Author: cgay
Date: Tue Feb 19 00:28:19 2008
New Revision: 11684

Modified:
   trunk/fundev/sources/common-dylan/common-extensions.dylan
   trunk/fundev/sources/common-dylan/library.dylan
   trunk/fundev/sources/common-dylan/tests/functions.dylan
Log:
job: 7372
New split implementation.

Modified: trunk/fundev/sources/common-dylan/common-extensions.dylan
==============================================================================
--- trunk/fundev/sources/common-dylan/common-extensions.dylan	(original)
+++ trunk/fundev/sources/common-dylan/common-extensions.dylan	Tue Feb 19 00:28:19 2008
@@ -154,52 +154,187 @@
 end method position;
 
 
+// Split a sequence into parts at each occurrance of the 'separator'
+// and return a sequence containing the parts.  The sequence is
+// searched from beginning to end for the given 'separator' and stops
+// when it reaches the end of 'sequence' or when the size of the
+// result reaches 'count' elements.  The meaning of the 'start' and
+// 'end' parameters may differ for different methods, but the intent
+// is that it be the same as if you passed in the subsequence delimited
+// by 'start' and 'end'.  See the individual methods for details.
+//
 define open generic split
-    (collection :: <string>, character :: <character>,
-     #key start :: <integer>, 
-          end: _end :: <integer>,
-          trim? :: <boolean>)
- => (strings :: <sequence>);
+    (sequence :: <sequence>, separator :: <object>,
+     #key start :: <integer>,
+          end: epos :: <integer>,
+          count :: <integer>,
+          remove-if-empty :: <boolean>)
+ => (parts :: <sequence>);
+
+// This is in some sense the most basic method, since others can be
+// implemented in terms of it.  The 'separator' function must accept
+// three arguments: (1) the sequence in which to search for a
+// separator, (2) the start index in that sequence at which to begin
+// searching, and (3) the index at which to stop searching.  The
+// 'separator' function must return #f to indicate that no separator
+// was found, or two values: the start and end indices of the
+// separator in the given sequence.  The initial start and end
+// indices passed to the 'separator' function are the same as the
+// 'start' and 'end' arguments passed to this method.  The
+// 'separator' function should stay within the given bounds whenever
+// possible.  (In particular it may not always be possible when the
+// separator is a regex.)
+define method split
+    (seq :: <sequence>, find-separator :: <function>,
+     #key start :: <integer> = 0,
+          end: epos :: <integer> = seq.size,
+          count :: <integer> = epos + 1,
+          remove-if-empty :: <boolean> = #f)
+ => (parts :: <sequence>)
+  reverse!(iterate loop (bpos :: <integer> = start,
+                         parts :: <list> = #(),
+                         nparts :: <integer> = 1)
+             let (sep-start, sep-end) = find-separator(seq, bpos, epos);
+             if (sep-start & sep-end & (sep-end <= epos) & (nparts < count))
+               let part = copy-sequence(seq, start: bpos, end: sep-start);
+               let remove? = remove-if-empty & empty?(part);
+               loop(sep-end,
+                    if (remove?) parts else pair(part, parts) end,
+                    if (remove?) nparts else nparts + 1 end)
+             else
+               pair(copy-sequence(seq, start: bpos, end: epos), parts)
+             end
+           end)
+end method split;
 
-define sealed method split
-    (string :: <byte-string>, character :: <byte-character>,
+// Splits seq around occurrances of the separator subsequence.
+// Works for the relatively common case where seq and separator
+// are both <string>s.
+define method split
+    (seq :: <sequence>, separator :: <sequence>,
      #key start :: <integer> = 0,
-          end: _end :: <integer> = string.size,
-          trim? :: <boolean> = #t)
- => (strings :: <stretchy-object-vector>)
-  let old-position :: <integer> = start;
-  let end-position :: <integer> = _end;
-  let new-position :: <integer> = old-position;
-  let results :: <stretchy-object-vector> = make(<stretchy-object-vector>);
-  local method add-substring
-	    (start :: <integer>, _end :: <integer>, #key last? :: <boolean> = #f) => ()
-	  if (trim?)
-	    while (start < _end & string[start] = ' ')
-	      start := start + 1
-	    end;
-	    while (start < _end & string[_end - 1] = ' ')
-	      _end := _end - 1
-	    end
-	  end;
-	  // Don't ever return just a single empty string
-	  if (~last? | start ~== _end | ~empty?(results))
-	    add!(results, copy-sequence(string, start: start, end: _end))
-	  end
-	end method add-substring;
-  while (new-position < end-position)
-    if (string[new-position] = character)
-      add-substring(old-position, new-position);
-      new-position := new-position + 1;
-      old-position := new-position
-    else
-      new-position := new-position + 1;
-    end
-  end;
-  add-substring(old-position, new-position, last?: #t);
-  results
+          end: epos :: <integer> = seq.size,
+          count :: <integer> = epos + 1,
+          test :: <function> = \==,
+          remove-if-empty :: <boolean> = #f)
+ => (parts :: <sequence>)
+  // Is there a function that does this already?
+  local method looking-at? (pattern :: <sequence>, big :: <sequence>,
+                            bpos :: <integer>)
+          block (return)
+            let len :: <integer> = big.size;
+            for (thing in pattern, pos from bpos)
+              if (pos >= len | ~test(thing, big[pos]))
+                return(#f)
+              end if;
+            end for;
+            #t
+          end
+        end method looking-at?;
+  local method find-subseq (seq :: <sequence>,
+                            bpos :: <integer>,
+                            epos :: false-or(<integer>))
+          // Note that this only splits on the separator sequence if it is
+          // entirely contained between the start and end positions.
+          let epos :: <integer> = epos | seq.size;
+          let max-separator-start :: <integer> = epos - separator.size;
+          block (exit-loop)
+            for (seq-index from bpos to max-separator-start)
+              if (looking-at?(separator, seq, seq-index))
+                exit-loop(seq-index, seq-index + separator.size);
+              end;
+            end;
+            #f      // separator not found
+          end
+        end;
+  split(seq, find-subseq, start: start, end: epos, count: count,
+        remove-if-empty: remove-if-empty)
 end method split;
 
+// Split on a given object.
+// This handles the common (<string>, <character>) case.
+define method split
+    (seq :: <sequence>, separator :: <object>,
+     #key start :: <integer> = 0,
+          end: epos :: <integer> = seq.size,
+          count :: <integer> = epos + 1,
+          test :: <function> = \==,
+          remove-if-empty :: <boolean> = #f)
+ => (parts :: <sequence>)
+  local method find-pos (seq :: <sequence>,
+                         bpos :: <integer>,
+                         epos :: false-or(<integer>))
+          // Unfortunately the position function doesn't accept
+          // start and end parameters so we have to write our own.
+          block (exit-loop)
+            for (i from bpos below epos)
+              if (test(seq[i], separator))
+                exit-loop(i, i + 1)
+              end;
+            end;
+            #f
+          end block
+        end method;
+  split(seq, find-pos, start: start, end: epos, count: count,
+        remove-if-empty: remove-if-empty)
+end method split;
 
+// Join several sequences together, including a separator between each sequence.
+define open generic join
+    (items :: <sequence>, separator :: <sequence>, #key key, conjunction)
+ => (joined :: <sequence>);
+
+// join(range(from: 1, to: 3), ", ",
+//      key: integer-to-string,
+//      conjunction: " and ");
+// => "1, 2 and 3"
+
+define method join
+    (sequences :: <sequence>, separator :: <sequence>,
+     #key key :: <function> = identity,
+          conjunction :: false-or(<sequence>))
+ => (joined :: <sequence>)
+  let length :: <integer> = sequences.size;
+  if (length == 0)
+    error("Attempt to join an empty sequence.")
+  elseif (length == 1)
+    key(sequences[0])
+  else
+    let result-size :: <integer>
+      = (reduce(method (len, seq)
+                  len + seq.size
+                end,
+                0,
+                sequences)
+           + (separator.size * (length - 1))
+           + if (conjunction)
+               // the last separator is replaced by the conjunction
+               conjunction.size - separator.size
+             else
+               0
+             end);
+    let first = key(sequences[0]);   // don't call key > once on sequences[0]
+    let result = make(object-class(first), size: result-size);
+    let result-index :: <integer> = 0;
+    local method copy-to-result (seq :: <sequence>)
+            result := replace-subsequence!(result, seq, start: result-index);
+            result-index := result-index + seq.size;
+          end;
+    copy-to-result(first);
+    let max-index :: <integer> = length - 1;
+    for (i :: <integer> from 1 to max-index)
+      let seq :: <sequence> = sequences[i];
+      copy-to-result(if(conjunction & i == max-index)
+                       conjunction
+                     else
+                       separator
+                     end);
+      copy-to-result(key(seq));
+    end;
+    result
+  end if
+end method join;
+    
 define open generic find-element
     (collection :: <collection>, predicate :: <function>,
      #key skip :: <integer>, failure)

Modified: trunk/fundev/sources/common-dylan/library.dylan
==============================================================================
--- trunk/fundev/sources/common-dylan/library.dylan	(original)
+++ trunk/fundev/sources/common-dylan/library.dylan	Tue Feb 19 00:28:19 2008
@@ -91,6 +91,7 @@
 	 difference,
 	 position,
 	 split,
+         join,
 	 fill-table!,
 	 find-element,
 	 find-value,

Modified: trunk/fundev/sources/common-dylan/tests/functions.dylan
==============================================================================
--- trunk/fundev/sources/common-dylan/tests/functions.dylan	(original)
+++ trunk/fundev/sources/common-dylan/tests/functions.dylan	Tue Feb 19 00:28:19 2008
@@ -275,59 +275,119 @@
 end function-test position;
 
 define common-extensions function-test split ()
-  check-equal("split on empty string",
-              split("", '/'),
-              #[]);
-  check-equal("split on single character",
-	      split("a", '/'),
-	      #["a"]);
-  check-equal("split on two characters",
-	      split("a/b", '/'),
-	      #["a", "b"]);
-  check-equal("split on multiple single characters",
-	      split("aXbXcXdXeXfXg", 'X'),
-	      #["a", "b", "c", "d", "e", "f", "g"]);
-  check-equal("split on single word",
-	      split("hello", '/'),
-	      #["hello"]);
-  check-equal("split on two words",
-	      split("hello/world", '/'),
-	      #["hello", "world"]);
-  check-equal("split on three words",
-	      split("majorXminorXbuild", 'X'),
-	      #["major", "minor", "build"]);
-  check-equal("split on multiple words",
-	      split("x=100&y=200&width=30&height=10", '&'),
-	      #["x=100", "y=200", "width=30", "height=10"]);
-  check-equal("split on single separator character",
-	      split("/", '/'),
-	      #["", ""]);
-  check-equal("split on a/",
-	      split("a/", '/'),
-	      #["a", ""]);
-  check-equal("split on /b",
-	      split("/b", '/'),
-	      #["", "b"]);
-  check-equal("split with double separator",
-	      split("majorXXbuild", 'X'),
-	      #["major", "", "build"]);
-  check-equal("split with spaces",
-	      split(" major X minor X build ", 'X'),
-	      #["major", "minor", "build"]);
-  check-equal("no trim split with spaces",
-	      split(" major X minor X build ", 'X', trim?: #f),
-	      #[" major ", " minor ", " build "]);
-  check-equal("split with start",
-	      split("123456789x123456789", 'x', start: 1),
-	      #["23456789", "123456789"]);
-  check-equal("split with end",
-	      split("0123456789", '3', end: 8),
-	      #["012", "4567"]);
-  check-equal("split with start and end",
-	      split("0123456789", '3', start: 2, end: 8),
-	      #["2", "4567"]);
+  // a character separator should act the same as a string separator that
+  // contains only that character...
+  for (separator in #('/', "/"))
+    local method fmt (name)
+            format-to-string("%s, sep = %=", name, separator);
+          end;
+    check-equal(fmt("split empty string"),
+                split("", separator),
+                #[""]);
+    check-equal(fmt("split single character"),
+                split("a", separator),
+                #["a"]);
+    check-equal(fmt("split two characters"),
+                split("a/b", separator),
+                #["a", "b"]);
+    check-equal(fmt("split multiple single characters"),
+                split("a/b/c/d/e/f/g", separator),
+                #["a", "b", "c", "d", "e", "f", "g"]);
+    check-equal(fmt("split single word"),
+                split("hello", separator),
+                #["hello"]);
+    check-equal(fmt("split two words"),
+                split("hello/world", separator),
+                #["hello", "world"]);
+    check-equal(fmt("split three words"),
+                split("major/minor/build", separator),
+                #["major", "minor", "build"]);
+    check-equal(fmt("split multiple words"),
+                split("x=100/y=200/width=30/height=10", separator),
+                #["x=100", "y=200", "width=30", "height=10"]);
+    check-equal(fmt("split only the separator character"),
+                split("/", separator),
+                #["", ""]);
+    check-equal(fmt("split a/"),
+                split("a/", separator),
+                #["a", ""]);
+    check-equal(fmt("split /b"),
+                split("/b", separator),
+                #["", "b"]);
+    check-equal(fmt("split with double separator"),
+                split("major//build", separator),
+                #["major", "", "build"]);
+    check-equal(fmt("split with spaces"),
+                split(" major / minor / build ", separator),
+                #[" major ", " minor ", " build "]);
+    check-equal(fmt("split with start"),
+                split("123456789/123456789", separator, start: 1),
+                #["23456789", "123456789"]);
+    check-equal(fmt("split with end"),
+                split("012/456789", separator, end: 8),
+                #["012", "4567"]);
+    check-equal(fmt("split with start and end"),
+                split("012/456789", separator, start: 2, end: 8),
+                #["2", "4567"]);
+    check-equal(fmt("split with count"),
+                split("1/2/3/4", separator, count: 2),
+                #["1", "2/3/4"]);
+    check-equal(fmt("split with count and start"),
+                split("1/2/3/4", separator, count: 2, start: 2),
+                #["2", "3/4"]);
+    check-equal(fmt("split with count and end"),
+                split("1/2/3/4", separator, count: 2, end: 5),
+                #["1", "2/3"]);
+    check-equal(fmt("split with count, start, and end"),
+                split("1/2/3/4", separator, count: 2, start: 2, end: 5),
+                #["2", "3"]);
+    check-equal(fmt("split with count = 1"),
+                split("a/b/c/d", separator, count: 1),
+                #["a/b/c/d"]);
+  end for;
+  check-equal("split with separator crossing start:",
+              split("xxx one xxx two xxx", "xxx", start: 1),
+              #["xx one ", " two ", ""]);
+  check-equal("split with separator crossing end:",
+              split("xxx one xxx two xxx", "xxx", end: 17),
+              #["", " one ", " two x"]);
+  check-equal("split with separator crossing start: and end:",
+              split("xxx one xxx two xxx", "xxx", start: 1, end: 17),
+              #["xx one ", " two x"]);
 end function-test split;
 
+define common-extensions function-test join ()
+  let abc = #("a", "b", "c");
+  check-condition("join empty sequence is an error",
+                  <error>,
+                  join(#(), ", "));
+  check-equal("basic join",
+              "a, b, c",
+              join(abc, ", "));
+  check-equal("join of one element",
+              "singleton",
+              join(#("singleton"), " "));
+  check-equal("join with conjunction",
+              "a, b and c",
+              join(abc, ", ",
+                   conjunction: " and "));
+  check-equal("join with key",
+              "1, 2, 3",
+              join(#(1, 2, 3), ", ",
+                   key: integer-to-string));
+  check-equal("join with conjunction and key",
+              "1, 2 and 3",
+              join(#(1, 2, 3), ", ",
+                   conjunction: " and ",
+                   key: integer-to-string));
+  check-equal("non-string join",
+              #(1, #t, 2, #t, 3),
+              join(#(1, 2, 3), #(#t)));
+  check-equal("join with empty separator",
+              #(1, 2, 3),
+              join(#(1, 2, 3), #()))
+end function-test join;
+
 define common-extensions function-test remove-all-keys! ()
   //---*** Do all collections by using dylan-test-suite collection code
 end function-test remove-all-keys!;
@@ -535,3 +595,4 @@
 define simple-profiling function-test profiling-type-result ()
   //---*** Fill this in...
 end function-test profiling-type-result;
+



More information about the chatter mailing list