[PATCH 4/7] Make hash_name_lookup able to do case-independent lookups

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxxxxxxxx>
Date: Fri, 21 Mar 2008 15:55:19 -0700

Right now nobody uses it, but "index_name_exists()" gets a flag so
you can enable it on a case-by-case basis.

Signed-of-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
---

Oooh.. We actually have some (admittedly stupid) case insensitivity code 
starting to appear. So we now hash the names insensitively, and we have 
the _capability_ to do case-insensitive lookups, but nobody actually uses 
that insensitive lookup capability yet.

But things are now starting to get interesting.

 cache.h        |    4 ++--
 dir.c          |    2 +-
 name-hash.c    |   50 ++++++++++++++++++++++++++++++++++++++++++++++++--
 unpack-trees.c |    2 +-
 4 files changed, 52 insertions(+), 6 deletions(-)

diff --git a/cache.h b/cache.h
index 76d95d2..a9ddaa1 100644
--- a/cache.h
+++ b/cache.h
@@ -264,7 +264,7 @@ static inline void remove_name_hash(struct cache_entry *ce)
 #define refresh_cache(flags) refresh_index(&the_index, (flags), NULL, NULL)
 #define ce_match_stat(ce, st, options) ie_match_stat(&the_index, (ce), (st), (options))
 #define ce_modified(ce, st, options) ie_modified(&the_index, (ce), (st), (options))
-#define cache_name_exists(name, namelen) index_name_exists(&the_index, (name), (namelen))
+#define cache_name_exists(name, namelen, igncase) index_name_exists(&the_index, (name), (namelen), (igncase))
 #endif
 
 enum object_type {
@@ -353,7 +353,7 @@ extern int write_index(const struct index_state *, int newfd);
 extern int discard_index(struct index_state *);
 extern int unmerged_index(const struct index_state *);
 extern int verify_path(const char *path);
-extern struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen);
+extern struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int igncase);
 extern int index_name_pos(const struct index_state *, const char *name, int namelen);
 #define ADD_CACHE_OK_TO_ADD 1		/* Ok to add */
 #define ADD_CACHE_OK_TO_REPLACE 2	/* Ok to replace file/directory */
diff --git a/dir.c b/dir.c
index edc458e..7362e83 100644
--- a/dir.c
+++ b/dir.c
@@ -371,7 +371,7 @@ static struct dir_entry *dir_entry_new(const char *pathname, int len)
 
 struct dir_entry *dir_add_name(struct dir_struct *dir, const char *pathname, int len)
 {
-	if (cache_name_exists(pathname, len))
+	if (cache_name_exists(pathname, len, 0))
 		return NULL;
 
 	ALLOC_GROW(dir->entries, dir->nr+1, dir->alloc);
diff --git a/name-hash.c b/name-hash.c
index 2678148..2253870 100644
--- a/name-hash.c
+++ b/name-hash.c
@@ -8,12 +8,25 @@
 #define NO_THE_INDEX_COMPATIBILITY_MACROS
 #include "cache.h"
 
+/*
+ * This removes bit 5 if bit 6 is set.
+ *
+ * That will make US-ASCII characters hash to their upper-case
+ * equivalent. We could easily do this one whole word at a time,
+ * but that's for future worries.
+ */
+static inline unsigned char icase_hash(unsigned char c)
+{
+	return c & ~((c & 0x40) >> 1);
+}
+
 static unsigned int hash_name(const char *name, int namelen)
 {
 	unsigned int hash = 0x123;
 
 	do {
 		unsigned char c = *name++;
+		c = icase_hash(c);
 		hash = hash*101 + c;
 	} while (--namelen);
 	return hash;
@@ -54,7 +67,40 @@ void add_name_hash(struct index_state *istate, struct cache_entry *ce)
 		hash_index_entry(istate, ce);
 }
 
-struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen)
+static int slow_same_name(const char *name1, int len1, const char *name2, int len2)
+{
+	if (len1 != len2)
+		return 0;
+
+	while (len1) {
+		unsigned char c1 = *name1++;
+		unsigned char c2 = *name2++;
+		len1--;
+		if (c1 != c2) {
+			c1 = toupper(c1);
+			c2 = toupper(c2);
+			if (c1 != c2)
+				return 0;
+		}
+	}
+	return 1;
+}
+
+static int same_name(const struct cache_entry *ce, const char *name, int namelen, int icase)
+{
+	int len = ce_namelen(ce);
+
+	/*
+	 * Always fo exact compare (even if we want a case-ignoring comparison
+	 * we do the quick exact one first, because it will be the common case).
+	 */
+	if (len == namelen && !cache_name_compare(name, namelen, ce->name, len))
+		return 1;
+
+	return icase && slow_same_name(name, namelen, ce->name, len);
+}
+
+struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int icase)
 {
 	unsigned int hash = hash_name(name, namelen);
 	struct cache_entry *ce;
@@ -64,7 +110,7 @@ struct cache_entry *index_name_exists(struct index_state *istate, const char *na
 
 	while (ce) {
 		if (!(ce->ce_flags & CE_UNHASHED)) {
-			if (!cache_name_compare(name, namelen, ce->name, ce->ce_flags))
+			if (same_name(ce, name, namelen, icase))
 				return ce;
 		}
 		ce = ce->next;
diff --git a/unpack-trees.c b/unpack-trees.c
index ca4c845..bf7d8f6 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -582,7 +582,7 @@ static int verify_absent(struct cache_entry *ce, const char *action,
 		 * delete this path, which is in a subdirectory that
 		 * is being replaced with a blob.
 		 */
-		result = index_name_exists(&o->result, ce->name, ce_namelen(ce));
+		result = index_name_exists(&o->result, ce->name, ce_namelen(ce), 0);
 		if (result) {
 			if (result->ce_flags & CE_REMOVE)
 				return 0;
-- 
1.5.5.rc0.28.g61a0.dirty

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux