[PATCH 2/6] dir-iterator: support iteration in sorted order

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The `struct dir_iterator` is a helper that allows us to iterate through
directory entries. This iterator returns entries in the exact same order
as readdir(3P) does -- or in other words, it guarantees no specific
order at all.

This is about to become problematic as we are introducing a new reflog
subcommand to list reflogs. As the "files" backend uses the directory
iterator to enumerate reflogs, returning reflog names and exposing them
to the user would inherit the indeterministic ordering. Naturally, it
would make for a terrible user interface to show a list with no
discernible order. While this could be handled at a higher level by the
new subcommand itself by collecting and ordering the reflogs, this would
be inefficient and introduce latency when there are many reflogs.

Instead, introduce a new option into the directory iterator that asks
for its entries to be yielded in lexicographical order. If set, the
iterator will read all directory entries greedily end sort them before
we start to iterate over them.

While this will of course also incur overhead as we cannot yield the
directory entries immediately, it should at least be more efficient than
having to sort the complete list of reflogs as we only need to sort one
directory at a time.

This functionality will be used in a follow-up commit.

Signed-off-by: Patrick Steinhardt <ps@xxxxxx>
---
 dir-iterator.c | 87 ++++++++++++++++++++++++++++++++++++++++----------
 dir-iterator.h |  3 ++
 2 files changed, 73 insertions(+), 17 deletions(-)

diff --git a/dir-iterator.c b/dir-iterator.c
index f58a97e089..396c28178f 100644
--- a/dir-iterator.c
+++ b/dir-iterator.c
@@ -2,9 +2,12 @@
 #include "dir.h"
 #include "iterator.h"
 #include "dir-iterator.h"
+#include "string-list.h"
 
 struct dir_iterator_level {
 	DIR *dir;
+	struct string_list entries;
+	size_t entries_idx;
 
 	/*
 	 * The length of the directory part of path at this level
@@ -72,6 +75,40 @@ static int push_level(struct dir_iterator_int *iter)
 		return -1;
 	}
 
+	string_list_init_dup(&level->entries);
+	level->entries_idx = 0;
+
+	/*
+	 * When the iterator is sorted we read and sort all directory entries
+	 * directly.
+	 */
+	if (iter->flags & DIR_ITERATOR_SORTED) {
+		while (1) {
+			struct dirent *de;
+
+			errno = 0;
+			de = readdir(level->dir);
+			if (!de) {
+				if (errno && errno != ENOENT) {
+					warning_errno("error reading directory '%s'",
+						      iter->base.path.buf);
+					return -1;
+				}
+
+				break;
+			}
+
+			if (is_dot_or_dotdot(de->d_name))
+				continue;
+
+			string_list_append(&level->entries, de->d_name);
+		}
+		string_list_sort(&level->entries);
+
+		closedir(level->dir);
+		level->dir = NULL;
+	}
+
 	return 0;
 }
 
@@ -88,6 +125,7 @@ static int pop_level(struct dir_iterator_int *iter)
 		warning_errno("error closing directory '%s'",
 			      iter->base.path.buf);
 	level->dir = NULL;
+	string_list_clear(&level->entries, 0);
 
 	return --iter->levels_nr;
 }
@@ -136,30 +174,43 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
 
 	/* Loop until we find an entry that we can give back to the caller. */
 	while (1) {
-		struct dirent *de;
 		struct dir_iterator_level *level =
 			&iter->levels[iter->levels_nr - 1];
+		struct dirent *de;
+		const char *name;
 
 		strbuf_setlen(&iter->base.path, level->prefix_len);
-		errno = 0;
-		de = readdir(level->dir);
-
-		if (!de) {
-			if (errno) {
-				warning_errno("error reading directory '%s'",
-					      iter->base.path.buf);
-				if (iter->flags & DIR_ITERATOR_PEDANTIC)
-					goto error_out;
-			} else if (pop_level(iter) == 0) {
-				return dir_iterator_abort(dir_iterator);
+
+		if (level->dir) {
+			errno = 0;
+			de = readdir(level->dir);
+			if (!de) {
+				if (errno) {
+					warning_errno("error reading directory '%s'",
+						      iter->base.path.buf);
+					if (iter->flags & DIR_ITERATOR_PEDANTIC)
+						goto error_out;
+				} else if (pop_level(iter) == 0) {
+					return dir_iterator_abort(dir_iterator);
+				}
+				continue;
 			}
-			continue;
-		}
 
-		if (is_dot_or_dotdot(de->d_name))
-			continue;
+			if (is_dot_or_dotdot(de->d_name))
+				continue;
 
-		if (prepare_next_entry_data(iter, de->d_name)) {
+			name = de->d_name;
+		} else {
+			if (level->entries_idx >= level->entries.nr) {
+				if (pop_level(iter) == 0)
+					return dir_iterator_abort(dir_iterator);
+				continue;
+			}
+
+			name = level->entries.items[level->entries_idx++].string;
+		}
+
+		if (prepare_next_entry_data(iter, name)) {
 			if (errno != ENOENT && iter->flags & DIR_ITERATOR_PEDANTIC)
 				goto error_out;
 			continue;
@@ -188,6 +239,8 @@ int dir_iterator_abort(struct dir_iterator *dir_iterator)
 			warning_errno("error closing directory '%s'",
 				      iter->base.path.buf);
 		}
+
+		string_list_clear(&level->entries, 0);
 	}
 
 	free(iter->levels);
diff --git a/dir-iterator.h b/dir-iterator.h
index 479e1ec784..6d438809b6 100644
--- a/dir-iterator.h
+++ b/dir-iterator.h
@@ -54,8 +54,11 @@
  *   and ITER_ERROR is returned immediately. In both cases, a meaningful
  *   warning is emitted. Note: ENOENT errors are always ignored so that
  *   the API users may remove files during iteration.
+ *
+ * - DIR_ITERATOR_SORTED: sort directory entries alphabetically.
  */
 #define DIR_ITERATOR_PEDANTIC (1 << 0)
+#define DIR_ITERATOR_SORTED   (1 << 1)
 
 struct dir_iterator {
 	/* The current path: */
-- 
2.44.0-rc1

Attachment: signature.asc
Description: PGP signature


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux