Re: [PATCH 0/2] Test overlayfs readdir cache

[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]



On Thu, Apr 22, 2021 at 10:47 AM Amir Goldstein <amir73il@xxxxxxxxx> wrote:
>
> On Thu, Apr 22, 2021 at 10:53 AM Miklos Szeredi <miklos@xxxxxxxxxx> wrote:
> >
> > On Thu, Apr 22, 2021 at 8:18 AM Amir Goldstein <amir73il@xxxxxxxxx> wrote:
> > >
> > > On Wed, Apr 21, 2021 at 12:23 PM Amir Goldstein <amir73il@xxxxxxxxx> wrote:
> > > >
> > > > Eryu,
> > > >
> > > > This extends the generic t_dir_offset2 test to verify
> > > > some more test cases and adds a new generic test which
> > > > passes on overlayfs (and other fs) on upstream kernel.
> > > >
> > > > The overlayfs specific test fails on upstream kernel
> > > > and the fix commit is currently in linux-next.
> > > > As usual, you may want to wait with merging until the fix
> > > > commit hits upstream.
> > > >
> > > > Miklos,
> > > >
> > > > I had noticed in the test full logs that readdir of
> > > > a merged dir behaves strangely - when seeking backwards
> > > > to offsets > 0, readdir returns unlinked entries in results.
> > > > The test does not fail on that behavior because the test
> > > > only asserts that this is not allowed after seek to offset 0.
> > > >
> > > > Knowing the implementation of overlayfs readdir cache this is
> > > > not surprising to me, but I wonder if this behavior is POSIX
> > > > compliant, and if not, whether we should document it and/or
> > > > add a failing test for it.
> > > >
> > >
> > > I think the outcome could be worse.
> > > If a copied up entry is unlinked after populating the dir cache
> > > but before ovl_cache_update_ino() then ovl_cache_update_ino()
> > > and subsequently the getdents call will fail with ENOENT.
> > >
> > > This test is not smart enough to cover this case (if it really exists).
> > > I think we need to relax the case of negative lookup result in
> > > ovl_cache_update_ino() and just set p->whiteout without and
> > > continue with no error.
> > >
> > > This can solve several test cases.
> > > In principle, we can "semi-reset" the merge dir cache if the dir was
> > > modified after every seek, not just seek to 0.
> > > By "semi-reset" I mean use the list, but force ovl_cache_update_ino()
> > > to all upper entries, similar to ovl_dir_read_impure().
> > >
> > > OR.. we can just do that unconditionally in ovl_iterate().
> > > The ovl dentry cache for the children will be populated after the first
> > > ovl_iterate() anyway, so maybe the penalty is not so bad?
> >
> > POSIX does allow stale readdir data (not just in case of non-zero seek):
> >
> > "If a file is removed from or added to the directory after the most
> > recent call to opendir() or rewinddir(), whether a subsequent call to
> > readdir() returns an entry for that file is unspecified."
> >
> > If you think about the way readdir(3) is implemented by the libc, this
> > is inevitable.
>
> I see. In that case, I would defer merging this test as it assumes too much
> about readdir behavior (even though applications may expect this behavior).

FWIW, I started writing a readdir stress/validator similar to
fsx-linux but for directories.

It's unfinished and  has performance problem as the directory grows
due to linear searches.

Putting it out in case someone wants to continue working on it, or
just take some ideas.

Thanks,
Miklos
/*
 * Copyright (C) 2021 Miklos Szeredi <miklos@xxxxxxxxxx>
 *
 * This program can be distributed under the terms of the GNU GPLv2.
 */

#include <stdio.h>
#include <unistd.h>
#include <stdint.h>
#include <stdlib.h>
#include <fcntl.h>
#include <dirent.h>
#include <string.h>
#include <err.h>
#include <sys/stat.h>
#include <sys/syscall.h>

struct linux_dirent64 {
	uint64_t	d_ino;
	int64_t		d_off;
	unsigned short	d_reclen;
	unsigned char	d_type;
	char		d_name[0];
};

struct entry {
	char *name;
	unsigned char type;
	ino_t ino;
	int add_ver;
	int del_ver;
};

struct list_entry {
	char *name;
	uint64_t ino;
	int64_t off;
	unsigned char type;
	int version;

	struct list_entry *next;
};

struct dir {
	int fd;
	off_t pos;
	int version;
	struct list_entry *list;
};

static struct dir global_dir;

static int version = 1;
static unsigned int num = 0;
static unsigned int num_active = 0;
static struct entry *array = NULL;

static void *oom(void *ptr)
{
	if (!ptr)
		errx(1, "out of memory");
	return ptr;
}

static void add_entry(char *name, unsigned char type)
{
	struct stat st;
	int res;

	res = lstat(name, &st);
	if (res == -1)
		err(1, "failed to stat <%s>", name);

	array = oom(realloc(array, (num + 1) * sizeof(*array)));

	array[num].name = oom(strdup(name));

	array[num].type = type;
	array[num].ino = st.st_ino;
	version++;
	array[num].add_ver = version;
	array[num].del_ver = 0;
	num++;
	num_active++;
}

static int create_entry(void)
{
	unsigned char type;
	unsigned int namelen;
	unsigned int i;
	char name[256];
	mode_t mode;
	int res;

	type = (random() % 7) * 2;
	if (!type)
		type = 1;

	if (type == DT_CHR || type == DT_BLK)
		type = DT_REG;

	mode = type << 12 | 0666;

retry:
	namelen = (random() % 10) ? (random() % 32 + 1) : (random() % 222 + 33);
//	namelen = random() % 30 + 1;

	for (i = 0; i < namelen; i++) {
		unsigned char c;

		c = random() % 94 + 32;
		if (c >= '/')
			c++;
		name[i] = c;
	}
	name[namelen] = '\0';

	if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0)
		goto retry;

	for (i = 0; i < num; i++) {
		if (strcmp(name, array[i].name) == 0)
			goto retry;
	}

	if (S_ISDIR(mode))
		res = mkdir(name, 0777);
	else if (S_ISLNK(mode))
		res = symlink("target", name);
	else
		res = mknod(name, mode, 1);

	if (res == -1)
		err(1, "failed to create \"%s\", mode 0%o", name, mode);

	add_entry(name, type);

	return 0;
}

static int remove_entry(void)
{
	unsigned int i;
	struct entry *x;
	int res;

	if (!num_active)
		return 0;
retry:
	i = random() % num;
	x = &array[i];
	if (x->del_ver)
		goto retry;

	if (x->type == DT_DIR)
		res = rmdir(x->name);
	else
		res = unlink(x->name);
	if (res == -1)
		err(1, "failed to remove \"%s\"", x->name);

	version++;
	x->del_ver = version;
	num_active--;

	return 0;
}

static void add_list_entry(struct dir *dir, struct list_entry *ent)
{
	struct list_entry **pp;

	for (pp = &dir->list; *pp != NULL; pp = &(*pp)->next) {
		struct list_entry *p = *pp;

		if (ent->off == p->off)
			errx(1, "two entries have the same offset: %li",
				ent->off);
		if (strcmp(ent->name, p->name) == 0)
			errx(1, "two entries have the same name");
	}
	*pp = ent;
}

static void check_entry(struct dir *dir, struct list_entry *ent)
{
	unsigned int i;

	if (strcmp(ent->name, ".") == 0 || strcmp(ent->name, "..") == 0)
		return;

	for (i = 0; i < num; i++) {
		struct entry *x = &array[i];

		if (strcmp(ent->name, x->name) == 0 &&
		    (!x->del_ver || x->del_ver > dir->version)) {
			return;
		}
	}
	errx(1, "unknown entry returned by getdents64() \"%s\"", ent->name);
}

static void check_complete(struct dir *dir)
{
	unsigned int i;

	for (i = 0; i < num; i++) {
		struct entry *x = &array[i];

		if (x->add_ver <= dir->version && !x->del_ver) {
			struct list_entry *p;

			for (p = dir->list; p; p = p->next) {
				if (strcmp(x->name, p->name) == 0)
					break;
			}
			if (!p) {
				errx(1, "\"%s\" not found in complete listing",
				     x->name);
			}
		}
	}
}

static int read_dir(struct dir *dir)
{
	char buf[65536];
	unsigned int size = random() % (sizeof(buf) - 512) + 512;
	int res;
	int nread;
	struct linux_dirent64 *d;
	struct list_entry *ent;
	int bpos;
	int version = version;

	res = syscall(SYS_getdents64, dir->fd, buf, size);
	if (res == -1)
		err(1, "getdents64(%i, %p, %u)", dir->fd, buf, size);
	nread = res;

	if (nread == 0) {
		check_complete(dir);
		return 0;
	}

	for (bpos = 0; bpos < nread; ) {
		d = (struct linux_dirent64 *) (buf + bpos);

		ent = oom(calloc(1, sizeof(*ent)));
		ent->name = oom(strdup(d->d_name));
		ent->ino = d->d_ino;
		ent->type = d->d_type;
		ent->off = dir->pos;
		ent->version = version;
		dir->pos = d->d_off;

		check_entry(dir, ent);

		add_list_entry(dir, ent);

		bpos += d->d_reclen;
	}
	if (bpos != nread)
		errx(1, "invalid readdir length");

	return 0;
}

static int seek_dir(struct dir *dir)
{
	off_t res;
	struct list_entry *p, *next;

	dir->version = version;

	res = lseek(dir->fd, 0, SEEK_SET);
	if (res == (off_t) -1)
		err(1, "rewind");

	for (p = dir->list; p; p = next) {
		next = p->next;
		free(p->name);
		free(p);
	}
	dir->list = NULL;
	dir->pos = 0;

	return 0;
}

static int one(void)
{
	switch (random() % 10) {
	case 0:
	case 1:
		return create_entry();
	case 2:
		return remove_entry();
	case 3:
	case 4:
	case 5:
	case 6:
	case 7:
	case 8:
		return read_dir(&global_dir);
	case 9:
		return seek_dir(&global_dir);
	}
	errx(1, "unreachable");
}

static int run(unsigned int iters)
{
	unsigned int i;
	int res = 0;

	srandom(1);

	global_dir.version = version;
	global_dir.fd = open(".", O_RDONLY | O_DIRECTORY);
	if (global_dir.fd == -1)
		err(1, "failed to open \".\"");

	for (i = 0; i < iters; i++) {
		if (i % 100 == 1)
			printf("[%i] total entries: %u active entries: %u\n",
			       i, num, num_active);
		res = one();
		if (res)
			break;
	}

	close(global_dir.fd);

	return res;
}

int main(int argc, char *argv[])
{
	char *testdir = argv[1];
	int res;

	if (argc < 2)
		errx(1, "usage: %s test_directory", argv[0]);

	res = chdir(testdir);
	if (res == -1)
		err(1, "chdir(%s)", testdir);


	return run(1000000);
}

[Index of Archives]     [Linux Filesystems Development]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux