On Thu, Dec 18, 2003 at 10:17:06AM +1100, Adam Cassar wrote: > > This has to be done in userspace because a directory can be > > arbitrarily big, so we can't do it in the kernel. However, for people > > who don't want to modify their application, I do have an LD_PRELOAD > > module which you can try using that should also do the trick (see > > attached). > > Could you resend please - it didn't come through. Oops, sorry about that. I forgot to attach it before I hit "send". > So htree workloads are best suited for applications that know the name > of the file in advance? From your comments above I would assume that > even 'ls' would perform worse? Well, "ls" is fine. "ls -sF", which requires that you stat every single file that is returned by readdir, will perform much, much worse unless you sort the returned files by inode number. The same is true with find as well. If you compile the attached C source file as a shared library, and then point the LD_PRELOAD environment variable (or place the pathame in /etc/ld.so.preload --- which is preferred), it will intercept all opendir/readdir system calls, and sort the results first before returning them to the user progam. It will require a little bit more memory (enough to hold the entire contents of the directory at once), but while this might be a problem in the kernel, for a userspace program, this is generally not a problem. - Ted
/* * readdir accelerator * * (C) Copyright 2003 by Theodore Ts'o. * * %Begin-Header% * This file may be redistributed under the terms of the GNU Public * License. * %End-Header% * */ #define ALLOC_STEPSIZE 100 #define MAX_DIRSIZE 0 #define DEBUG #ifdef DEBUG #define DEBUG_DIR(x) {if (do_debug) { x; }} #else #define DEBUG_DIR(x) #endif #define _GNU_SOURCE #define __USE_LARGEFILE64 #include <stdio.h> #include <unistd.h> #include <sys/types.h> #include <sys/stat.h> #include <stdlib.h> #include <string.h> #include <dirent.h> #include <errno.h> #include <dlfcn.h> struct dirent_s { unsigned long long d_ino; long long d_off; unsigned short int d_reclen; unsigned char d_type; char *d_name; }; struct dir_s { DIR *dir; int num; int max; struct dirent_s *dp; int pos; struct dirent ret_dir; struct dirent64 ret_dir64; }; static int (*real_closedir)(DIR *dir) = 0; static DIR *(*real_opendir)(const char *name) = 0; static struct dirent *(*real_readdir)(DIR *dir) = 0; static struct dirent64 *(*real_readdir64)(DIR *dir) = 0; static off_t (*real_telldir)(DIR *dir) = 0; static void (*real_seekdir)(DIR *dir, off_t offset) = 0; static unsigned long max_dirsize = MAX_DIRSIZE; #ifdef DEBUG static int do_debug = 0; #endif static void setup_ptr() { char *cp; real_opendir = dlsym(RTLD_NEXT, "opendir"); real_closedir = dlsym(RTLD_NEXT, "closedir"); real_readdir = dlsym(RTLD_NEXT, "readdir"); real_readdir64 = dlsym(RTLD_NEXT, "readdir64"); real_telldir = dlsym(RTLD_NEXT, "telldir"); real_seekdir = dlsym(RTLD_NEXT, "seekdir"); if ((cp = getenv("SPD_READDIR_MAX_SIZE")) != NULL) { max_dirsize = atol(cp); } #ifdef DEBUG if (getenv("SPD_READDIR_DEBUG")) do_debug++; #endif } static void free_cached_dir(struct dir_s *dirstruct) { int i; if (!dirstruct->dp) return; for (i=0; i < dirstruct->num; i++) { free(dirstruct->dp[i].d_name); } free(dirstruct->dp); dirstruct->dp = 0; } static int ino_cmp(const void *a, const void *b) { const struct dirent_s *ds_a = (const struct dirent_s *) a; const struct dirent_s *ds_b = (const struct dirent_s *) b; ino_t i_a, i_b; i_a = ds_a->d_ino; i_b = ds_b->d_ino; if (ds_a->d_name[0] == '.') { if (ds_a->d_name[1] == 0) i_a = 0; else if ((ds_a->d_name[1] == '.') && (ds_a->d_name[2] == 0)) i_a = 1; } if (ds_b->d_name[0] == '.') { if (ds_b->d_name[1] == 0) i_b = 0; else if ((ds_b->d_name[1] == '.') && (ds_b->d_name[2] == 0)) i_b = 1; } return (i_a - i_b); } DIR *opendir(const char *name) { DIR *dir; struct dir_s *dirstruct; struct dirent_s *ds, *dnew; struct dirent64 *d; struct stat st; if (!real_opendir) setup_ptr(); dir = (*real_opendir)(name); if (!dir) return NULL; dirstruct = malloc(sizeof(struct dir_s)); if (!dirstruct) { (*real_closedir)(dir); errno = -ENOMEM; return NULL; } dirstruct->num = 0; dirstruct->max = 0; dirstruct->dp = 0; dirstruct->pos = 0; dirstruct->dir = 0; if (max_dirsize && (stat(name, &st) == 0) && (st.st_size > max_dirsize)) { DEBUG_DIR(printf("Directory size %ld, using direct readdir\n", st.st_size)); dirstruct->dir = dir; return (DIR *) dirstruct; } while ((d = (*real_readdir64)(dir)) != NULL) { if (dirstruct->num >= dirstruct->max) { dirstruct->max += ALLOC_STEPSIZE; DEBUG_DIR(printf("Reallocating to size %d\n", dirstruct->max)); dnew = realloc(dirstruct->dp, dirstruct->max * sizeof(struct dir_s)); if (!dnew) goto nomem; dirstruct->dp = dnew; } ds = &dirstruct->dp[dirstruct->num++]; ds->d_ino = d->d_ino; ds->d_off = d->d_off; ds->d_reclen = d->d_reclen; ds->d_type = d->d_type; if ((ds->d_name = malloc(strlen(d->d_name)+1)) == NULL) { dirstruct->num--; goto nomem; } strcpy(ds->d_name, d->d_name); DEBUG_DIR(printf("readdir: %lu %s\n", (unsigned long) d->d_ino, d->d_name)); } (*real_closedir)(dir); qsort(dirstruct->dp, dirstruct->num, sizeof(struct dirent_s), ino_cmp); return ((DIR *) dirstruct); nomem: DEBUG_DIR(printf("No memory, backing off to direct readdir\n")); free_cached_dir(dirstruct); dirstruct->dir = dir; return ((DIR *) dirstruct); } int closedir(DIR *dir) { struct dir_s *dirstruct = (struct dir_s *) dir; if (dirstruct->dir) (*real_closedir)(dirstruct->dir); free_cached_dir(dirstruct); free(dirstruct); return 0; } struct dirent *readdir(DIR *dir) { struct dir_s *dirstruct = (struct dir_s *) dir; struct dirent_s *ds; if (dirstruct->dir) return (*real_readdir)(dirstruct->dir); if (dirstruct->pos >= dirstruct->num) return NULL; ds = &dirstruct->dp[dirstruct->pos++]; dirstruct->ret_dir.d_ino = ds->d_ino; dirstruct->ret_dir.d_off = ds->d_off; dirstruct->ret_dir.d_reclen = ds->d_reclen; dirstruct->ret_dir.d_type = ds->d_type; strncpy(dirstruct->ret_dir.d_name, ds->d_name, sizeof(dirstruct->ret_dir.d_name)); return (&dirstruct->ret_dir); } struct dirent64 *readdir64(DIR *dir) { struct dir_s *dirstruct = (struct dir_s *) dir; struct dirent_s *ds; if (dirstruct->dir) return (*real_readdir64)(dirstruct->dir); if (dirstruct->pos >= dirstruct->num) return NULL; ds = &dirstruct->dp[dirstruct->pos++]; dirstruct->ret_dir64.d_ino = ds->d_ino; dirstruct->ret_dir64.d_off = ds->d_off; dirstruct->ret_dir64.d_reclen = ds->d_reclen; dirstruct->ret_dir64.d_type = ds->d_type; strncpy(dirstruct->ret_dir64.d_name, ds->d_name, sizeof(dirstruct->ret_dir64.d_name)); return (&dirstruct->ret_dir64); } off_t telldir(DIR *dir) { struct dir_s *dirstruct = (struct dir_s *) dir; if (dirstruct->dir) return (*real_telldir)(dirstruct->dir); return ((off_t) dirstruct->pos); } void seekdir(DIR *dir, off_t offset) { struct dir_s *dirstruct = (struct dir_s *) dir; if (dirstruct->dir) { (*real_seekdir)(dirstruct->dir, offset); return; } dirstruct->pos = offset; }