Re: [PATCH 1/3] add mergesort() for linked lists

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Am 05.04.2012 21:17, schrieb Junio C Hamano:
> After seeing "I wrote it myself due to NIH", it strikes me a bit odd that
> you still used "start from bunch of singleton sublist, elongating twice
> per round as we go" structure from the original.

It's just becasue the dumb bottom-up approach is the most simple way to
implement merge sort.

> I wonder if it would be an improvement if you structured the loop so that:
> 
>   (1) the first sublist 'p' grabs as many elements in the ascending order
>       as you find;
> 
>   (2) the second sublist 'q' begins at the end of the first sublist and
>       grabs as many elements in the ascending order;
> 
>   (3) 'p' and 'q' are merge-sorted into the result list;
> 
>   (4) if your two sublists did not cover "list" in its entirety, process
>       the remainder (i.e. where the second sublist stopped because of an
>       unordered element) by going back to step (1); and
> 
>   (5) if you did not need to jump back to step (1) from step (4), then you
>       had only two sublists (or less), so the result is sorted.  Otherwise,
>       the result now has fewer ascending sublists than the original, so go
>       back to (1) and iterate.
> 
> If the input is in a random order, this may end up doing the same number
> of iterations as the original, but if the input is mostly sorted, wouldn't
> it allow us to take advantage of the fact by starting with a longer
> sublist in the earlier rounds?

This optimization speeds up the pre-sorted case but slows down the case of
a reversed pre-sorted list because we have to determine the length of the
sublists each time, while the dumb implementation already knows it.  I
didn't measure a significant difference for Jeff's test case.  Here's my
attempt at an implementation, for reference.

---
 mergesort.c |   61 +++++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 41 insertions(+), 20 deletions(-)

diff --git a/mergesort.c b/mergesort.c
index c0f1874..3a61b9b 100644
--- a/mergesort.c
+++ b/mergesort.c
@@ -8,12 +8,37 @@ struct mergesort_sublist {
 	unsigned long len;
 };
 
-static void *get_nth_next(void *list, unsigned long n,
-			  void *(*get_next_fn)(const void *))
+static unsigned long run_length(const void *list,
+				struct mergesort_sublist *next_list,
+				void *(*get_next_fn)(const void *),
+				int (*compare_fn)(const void *, const void *))
 {
-	while (n-- && list)
-		list = get_next_fn(list);
-	return list;
+	unsigned long len = 1;
+
+	if (!list)
+		return 0;
+	for (;;) {
+		void *next = get_next_fn(list);
+
+		if (!next || compare_fn(list, next) > 0) {
+			if (next_list)
+				next_list->ptr = next;
+			break;
+		}
+		list = next;
+		len++;
+	}
+	return len;
+}
+
+static void set_next_pair(struct mergesort_sublist *p,
+			  struct mergesort_sublist *q, void *list,
+			  void *(*get_next_fn)(const void *),
+			  int (*compare_fn)(const void *, const void *))
+{
+	p->ptr = list;
+	p->len = run_length(p->ptr, q, get_next_fn, compare_fn);
+	q->len = q->ptr ? run_length(q->ptr, NULL, get_next_fn, compare_fn) : 0;
 }
 
 static void *pop_item(struct mergesort_sublist *l,
@@ -30,24 +55,16 @@ void *mergesort(void *list,
 		void (*set_next_fn)(void *, void *),
 		int (*compare_fn)(const void *, const void *))
 {
-	unsigned long l;
-
 	if (!list)
 		return NULL;
-	for (l = 1; ; l *= 2) {
+	for (;;) {
 		void *curr;
 		struct mergesort_sublist p, q;
 
-		p.ptr = list;
-		q.ptr = get_nth_next(p.ptr, l, get_next_fn);
+		set_next_pair(&p, &q, list, get_next_fn, compare_fn);
 		if (!q.ptr)
 			break;
-		p.len = q.len = l;
-
-		if (compare_fn(p.ptr, q.ptr) > 0)
-			list = curr = pop_item(&q, get_next_fn);
-		else
-			list = curr = pop_item(&p, get_next_fn);
+		list = curr = pop_item(&q, get_next_fn);
 
 		while (p.ptr) {
 			while (p.len || q.len) {
@@ -63,10 +80,14 @@ void *mergesort(void *list,
 					curr = pop_item(&p, get_next_fn);
 				set_next_fn(prev, curr);
 			}
-			p.ptr = q.ptr;
-			p.len = l;
-			q.ptr = get_nth_next(p.ptr, l, get_next_fn);
-			q.len = q.ptr ? l : 0;
+
+			set_next_pair(&p, &q, q.ptr, get_next_fn, compare_fn);
+			if (q.ptr) {
+				void *prev = curr;
+
+				curr = pop_item(&q, get_next_fn);
+				set_next_fn(prev, curr);
+			}
 
 		}
 		set_next_fn(curr, NULL);
-- 
1.7.10
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]