[RFC] [PATCH v2 1/1]: nfsd: Change caching from ino/dev to file-handle

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Krishna Kumar <krkumar2@xxxxxxxxxx>

Implement the file handle caching. List of changes:

	1. Remove all implementation and users of readahead, rename RA to FH,
           parm to cache.
	2. Add fields in the fhparms to cache file, svc_export, expiry list
	   and expiry time. Modify some other fields (eg p_count is atomic).
	3. Implement a daemon to clean up cached FH's.
	4. Added four helper functions:
		fh_cache_get: Hold a reference to dentry and svc_export.
		fh_cache_put: Drop a reference to file, dentry and svc_export.
		fh_get_cached_entries: Returns cached file and svc_export.
		fh_cache_upd: Cache file and svc_export. Add entry to list for
			      daemon to cleanup.
	5. nfsd_get_raparms is slightly rewritten (changed to nfsd_get_fhcache).
	6. nfsd_read rewritten to use the cache, remove RA from nfsd_vfs_read.
	7. File remove operation from the client results in the server checking
	   the cache and drops reference immediately (remove operation on the
	   server still retains the reference for a short time before the
	   daemon frees up the entry).
	8. init and shutdown are slightly modified.

	(ra_size, ra_depth, nfsd_racache_init and nfsd_racache_shutdown still
	   retain the "ra" prefix for now)


Signed-off-by: Krishna Kumar <krkumar2@xxxxxxxxxx>
---
 fs/nfsd/vfs.c |  429 +++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 338 insertions(+), 91 deletions(-)

diff -ruNp 2.6.29-rc2.org/fs/nfsd/vfs.c 2.6.29-rc2.new/fs/nfsd/vfs.c
--- 2.6.29-rc2.org/fs/nfsd/vfs.c	2009-01-22 13:23:18.000000000 +0530
+++ 2.6.29-rc2.new/fs/nfsd/vfs.c	2009-01-22 13:23:18.000000000 +0530
@@ -55,38 +55,53 @@
 #include <linux/security.h>
 #endif /* CONFIG_NFSD_V4 */
 #include <linux/jhash.h>
+#include <linux/kthread.h>
 
 #include <asm/uaccess.h>
 
 #define NFSDDBG_FACILITY		NFSDDBG_FILEOP
 
+/* Number of jiffies to cache the file before releasing */
+#define NFSD_CACHE_JIFFIES		100
 
 /*
- * This is a cache of readahead params that help us choose the proper
- * readahead strategy. Initially, we set all readahead parameters to 0
- * and let the VFS handle things.
+ * This is a cache of file handles to quicken file lookup. This also
+ * helps prevent multiple open/close of a file when a client reads it.
+ *
  * If you increase the number of cached files very much, you'll need to
  * add a hash table here.
  */
-struct raparms {
-	struct raparms		*p_next;
-	unsigned int		p_count;
-	ino_t			p_ino;
-	dev_t			p_dev;
-	int			p_set;
-	struct file_ra_state	p_ra;
+struct fhcache {
+	struct fhcache		*p_next;
+
+	/* Hashed on this parameter */
+	__u32			p_auth;
+
+	/* Cached information */
+	struct file		*p_filp;
+	struct svc_export	*p_exp;
+
+	/* Refcount for overwrite */
+	atomic_t		p_count;
+
+	/* When this entry expires */
+	unsigned long		p_expires;
+
+	/* List of entries linked to 'nfsd_daemon_list' */
+	struct list_head	p_list;
+
 	unsigned int		p_hindex;
 };
 
-struct raparm_hbucket {
-	struct raparms		*pb_head;
+struct fhcache_hbucket {
+	struct fhcache		*pb_head;
 	spinlock_t		pb_lock;
 } ____cacheline_aligned_in_smp;
 
-#define RAPARM_HASH_BITS	4
-#define RAPARM_HASH_SIZE	(1<<RAPARM_HASH_BITS)
-#define RAPARM_HASH_MASK	(RAPARM_HASH_SIZE-1)
-static struct raparm_hbucket	raparm_hash[RAPARM_HASH_SIZE];
+#define FHPARM_HASH_BITS	8
+#define FHPARM_HASH_SIZE	(1<<FHPARM_HASH_BITS)
+#define FHPARM_HASH_MASK	(FHPARM_HASH_SIZE-1)
+static struct fhcache_hbucket	fhcache_hash[FHPARM_HASH_SIZE];
 
 /* 
  * Called from nfsd_lookup and encode_dirent. Check if we have crossed 
@@ -784,51 +799,223 @@ nfsd_sync_dir(struct dentry *dp)
 	return nfsd_dosync(NULL, dp, dp->d_inode->i_fop);
 }
 
+/* Daemon to handle expired fh cache entries */
+static struct task_struct	*k_nfsd_task;
+
+/* Synchronization for daemon with enqueuer's */
+static DEFINE_SPINLOCK(k_nfsd_lock);
+
+/* List of FH cache entries that has to be cleaned up when they expire */
+static struct list_head		nfsd_daemon_list;
+
+/*
+ * Returns cached values of 'file' and svc_export; resets these entries
+ * to NULL.
+ */
+static inline void fh_get_cached_entries(struct fhcache *fh,
+					 struct file **filep,
+					 struct svc_export **expp)
+{
+	*filep = fh->p_filp;
+	*expp = fh->p_exp;
+
+	fh->p_filp = NULL;
+	fh->p_exp = NULL;
+}
+
+/*
+ * Hold a reference to dentry and svc_export (file already has an extra
+ * reference count as it is not closed normally.
+ */
+static inline void fh_cache_get(struct file *file, struct svc_export *exp)
+{
+	dget(file->f_dentry);
+	cache_get(&exp->h);
+}
+
+/* Drop a reference to file, dentry and svc_export */
+static inline void fh_cache_put(struct file *file, struct svc_export *exp)
+{
+	cache_put(&exp->h, &svc_export_cache);
+	dput(file->f_dentry);
+	fput(file);
+}
+
 /*
- * Obtain the readahead parameters for the file
- * specified by (dev, ino).
+ * Holds a reference to dentry and svc_export, and caches both. Add fh entry
+ * to list for daemon to cleanup later. Once we add the entry to the list,
+ * we'd rather it expire prematurely rather than updating it on every read.
  */
+static inline void fh_cache_upd(struct fhcache *fh, struct file *file,
+				struct svc_export *exp)
+{
+	if (fh) {
+		if (!fh->p_filp && file) {
+			struct fhcache_hbucket *fhb;
+
+			fh_cache_get(file, exp);
+
+			fhb = &fhcache_hash[fh->p_hindex];
+
+			spin_lock(&fhb->pb_lock);
+			fh->p_filp = file;
+			fh->p_exp = exp;
+			fh->p_expires = jiffies + NFSD_CACHE_JIFFIES;
 
-static inline struct raparms *
-nfsd_get_raparms(dev_t dev, ino_t ino)
+			spin_lock(&k_nfsd_lock);
+			list_add_tail(&fh->p_list, &nfsd_daemon_list);
+			spin_unlock(&k_nfsd_lock);
+
+			spin_unlock(&fhb->pb_lock);
+		}
+
+		/* Drop our reference */
+		atomic_dec(&fh->p_count);
+	} else if (file)
+		nfsd_close(file);
+}
+
+/* Daemon cache cleanup handler */
+void daemon_free_entries(void)
 {
-	struct raparms	*ra, **rap, **frap = NULL;
-	int depth = 0;
-	unsigned int hash;
-	struct raparm_hbucket *rab;
+	unsigned long now = jiffies;
+
+	spin_lock(&k_nfsd_lock);
+	while (!list_empty(&nfsd_daemon_list)) {
+		struct fhcache *fh = list_entry(nfsd_daemon_list.next,
+                                                struct fhcache, p_list);
+		struct fhcache_hbucket *fhb;
+		struct file *file;
+		struct svc_export *exp;
+
+		if (time_after(fh->p_expires, now) || now != jiffies) {
+			/*
+			 * This (and all subsequent entries) have not expired;
+			 * or we have spent too long in this loop.
+			 */
+			break;
+		}
+
+		fhb = &fhcache_hash[fh->p_hindex];
+
+		/*
+		 * Make sure we do not deadlock with updaters - we can free
+		 * entry next time in case of a race.
+		 */
+		if (!spin_trylock(&fhb->pb_lock)) {
+			/*
+			 * Entry is being used, no need to free this, try later
+			 */
+			break;
+		}
+
+		if (atomic_read(&fh->p_count)) {
+			spin_unlock(&fhb->pb_lock);
+			break;
+		}
 
-	hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK;
-	rab = &raparm_hash[hash];
+		list_del(&fh->p_list);
+		fh_get_cached_entries(fh, &file, &exp);
+		spin_unlock(&fhb->pb_lock);
+		spin_unlock(&k_nfsd_lock);
+
+		fh_cache_put(file, exp);
+		spin_lock(&k_nfsd_lock);
+	}
+	spin_unlock(&k_nfsd_lock);
+}
+
+static int k_nfsd_thread(void *unused)
+{
+	while (!kthread_should_stop()) {
+		schedule_timeout_interruptible(NFSD_CACHE_JIFFIES);
 
-	spin_lock(&rab->pb_lock);
-	for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) {
-		if (ra->p_ino == ino && ra->p_dev == dev)
+		if (kthread_should_stop())
+			break;
+
+		daemon_free_entries();
+	}
+	__set_current_state(TASK_RUNNING);
+
+	return 0;
+}
+
+/*
+ * Obtain the cached file, export and d_inode values for the FH
+ * specified by fh->auth[3]
+ */
+static inline struct fhcache *
+nfsd_get_fhcache(__u32 auth)
+{
+	struct fhcache		*fh, **fhp, **ffhp = NULL;
+	int			depth = 0;
+	unsigned int		hash;
+	struct fhcache_hbucket	*fhb;
+
+	if (!auth)
+		return NULL;
+
+	hash = jhash_1word(auth, 0xfeedbeef) & FHPARM_HASH_MASK;
+	fhb = &fhcache_hash[hash];
+
+	spin_lock(&fhb->pb_lock);
+	for (fhp = &fhb->pb_head; (fh = *fhp); fhp = &fh->p_next) {
+		if (fh->p_auth == auth) {
+			/* Same inode */
+			if (unlikely(!fh->p_filp)) {
+				/*
+				 * Someone is racing in the same code, and
+				 * this is the first reference to this file.
+				 */
+				spin_unlock(&fhb->pb_lock);
+				return NULL;
+			}
+
+			/*
+			 * Hold an extra reference to dentry/exp since these
+			 * are released in fh_put(). 'file' already has an
+			 * extra hold from the first lookup which was never
+			 * dropped.
+			 */
+			fh_cache_get(fh->p_filp, fh->p_exp);
 			goto found;
+		}
+
 		depth++;
-		if (ra->p_count == 0)
-			frap = rap;
+
+		if (!ffhp && !fh->p_filp) {
+			/*
+			 * This is an unused inode (or a different one), and no
+			 * entry was found till now.
+			 */
+			if (!atomic_read(&fh->p_count)) /* Entry is unused */
+				ffhp = fhp;
+		}
 	}
-	depth = nfsdstats.ra_size*11/10;
-	if (!frap) {	
-		spin_unlock(&rab->pb_lock);
+
+	if (!ffhp) {
+		spin_unlock(&fhb->pb_lock);
 		return NULL;
 	}
-	rap = frap;
-	ra = *frap;
-	ra->p_dev = dev;
-	ra->p_ino = ino;
-	ra->p_set = 0;
-	ra->p_hindex = hash;
+
+	depth = nfsdstats.ra_size*11/10;
+	fhp = ffhp;
+	fh = *ffhp;
+	fh->p_hindex = hash;
+	fh->p_auth = auth;
+
 found:
-	if (rap != &rab->pb_head) {
-		*rap = ra->p_next;
-		ra->p_next   = rab->pb_head;
-		rab->pb_head = ra;
+	if (fhp != &fhb->pb_head) {
+		*fhp = fh->p_next;
+		fh->p_next   = fhb->pb_head;
+		fhb->pb_head = fh;
 	}
-	ra->p_count++;
+
+	atomic_inc(&fh->p_count);
 	nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
-	spin_unlock(&rab->pb_lock);
-	return ra;
+	spin_unlock(&fhb->pb_lock);
+
+	return fh;
 }
 
 /*
@@ -892,7 +1079,6 @@ nfsd_vfs_read(struct svc_rqst *rqstp, st
               loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
 {
 	struct inode *inode;
-	struct raparms	*ra;
 	mm_segment_t	oldfs;
 	__be32		err;
 	int		host_err;
@@ -903,11 +1089,6 @@ nfsd_vfs_read(struct svc_rqst *rqstp, st
 	if (svc_msnfs(fhp) && !lock_may_read(inode, offset, *count))
 		goto out;
 
-	/* Get readahead parameters */
-	ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
-
-	if (ra && ra->p_set)
-		file->f_ra = ra->p_ra;
 
 	if (file->f_op->splice_read && rqstp->rq_splice_ok) {
 		struct splice_desc sd = {
@@ -926,16 +1107,6 @@ nfsd_vfs_read(struct svc_rqst *rqstp, st
 		set_fs(oldfs);
 	}
 
-	/* Write back readahead params */
-	if (ra) {
-		struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
-		spin_lock(&rab->pb_lock);
-		ra->p_ra = file->f_ra;
-		ra->p_set = 1;
-		ra->p_count--;
-		spin_unlock(&rab->pb_lock);
-	}
-
 	if (host_err >= 0) {
 		nfsdstats.io_read += host_err;
 		*count = host_err;
@@ -1078,12 +1249,30 @@ nfsd_read(struct svc_rqst *rqstp, struct
 			goto out;
 		err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
 	} else {
-		err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
-		if (err)
-			goto out;
-		err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
-		nfsd_close(file);
+		struct fhcache	*fh;
+
+		/* Check if this fh is cached */
+		fh = nfsd_get_fhcache(fhp->fh_handle.fh_auth[3]);
+		if (fh && fh->p_filp) {
+			/* Got cached values */
+			file = fh->p_filp;
+			fhp->fh_dentry = file->f_dentry;
+			fhp->fh_export = fh->p_exp;
+			err = fh_verify(rqstp, fhp, S_IFREG, NFSD_MAY_READ);
+		} else {
+			/* Nothing in cache */
+			err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ,
+					&file);
+		}
+
+		if (!err)
+			err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen,
+					    count);
+
+		/* Update cached values if required, and clean up */
+		fh_cache_upd(fh, file, fhp->fh_export);
 	}
+
 out:
 	return err;
 }
@@ -1791,6 +1980,39 @@ nfsd_unlink(struct svc_rqst *rqstp, stru
 		goto out_nfserr;
 
 	if (type != S_IFDIR) { /* It's UNLINK */
+		int i, found = 0;
+
+		for (i = 0 ; i < FHPARM_HASH_SIZE && !found; i++) {
+			struct fhcache_hbucket *fhb = &fhcache_hash[i];
+			struct fhcache *fh;
+
+			spin_lock(&fhb->pb_lock);
+			for (fh = fhb->pb_head; fh; fh = fh->p_next) {
+				if (fh->p_filp &&
+				    fh->p_filp->f_dentry == rdentry) {
+					/* Found the entry for removed file */
+					struct file *file;
+					struct svc_export *exp;
+
+					fh_get_cached_entries(fh, &file, &exp);
+
+					spin_lock(&k_nfsd_lock);
+					list_del(&fh->p_list);
+					spin_unlock(&k_nfsd_lock);
+
+					spin_unlock(&fhb->pb_lock);
+
+					/* Drop reference to this entry */
+					fh_cache_put(file, exp);
+
+					spin_lock(&fhb->pb_lock);
+					found = 1;
+					break;
+				}
+			}
+			spin_unlock(&fhb->pb_lock);
+		}
+
 #ifdef MSNFS
 		if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
 			(atomic_read(&rdentry->d_count) > 1)) {
@@ -2061,23 +2283,36 @@ nfsd_permission(struct svc_rqst *rqstp, 
 void
 nfsd_racache_shutdown(void)
 {
-	struct raparms *raparm, *last_raparm;
 	unsigned int i;
 
-	dprintk("nfsd: freeing readahead buffers.\n");
+	dprintk("nfsd: freeing FH buffers.\n");
 
-	for (i = 0; i < RAPARM_HASH_SIZE; i++) {
-		raparm = raparm_hash[i].pb_head;
-		while(raparm) {
-			last_raparm = raparm;
-			raparm = raparm->p_next;
-			kfree(last_raparm);
+	/* Stop the daemon and free the list entries */
+        kthread_stop(k_nfsd_task);
+        k_nfsd_task = NULL;
+
+	for (i = 0; i < FHPARM_HASH_SIZE; i++) {
+		struct fhcache *fhcache, *last_fhcache;
+
+		fhcache = fhcache_hash[i].pb_head;
+		while(fhcache) {
+			last_fhcache = fhcache;
+			if (fhcache->p_filp) {
+				struct file *file;
+				struct svc_export *exp;
+
+				fh_get_cached_entries(fhcache, &file, &exp);
+				list_del(&fhcache->p_list);
+				fh_cache_put(file, exp);
+			}
+			fhcache = fhcache->p_next;
+			kfree(last_fhcache);
 		}
-		raparm_hash[i].pb_head = NULL;
+		fhcache_hash[i].pb_head = NULL;
 	}
 }
 /*
- * Initialize readahead param cache
+ * Initialize file handle cache
  */
 int
 nfsd_racache_init(int cache_size)
@@ -2085,36 +2320,48 @@ nfsd_racache_init(int cache_size)
 	int	i;
 	int	j = 0;
 	int	nperbucket;
-	struct raparms **raparm = NULL;
+	struct fhcache **fhcache = NULL;
 
 
-	if (raparm_hash[0].pb_head)
+	if (fhcache_hash[0].pb_head)
 		return 0;
-	nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE);
+	nperbucket = DIV_ROUND_UP(cache_size, FHPARM_HASH_SIZE);
 	if (nperbucket < 2)
 		nperbucket = 2;
-	cache_size = nperbucket * RAPARM_HASH_SIZE;
+	cache_size = nperbucket * FHPARM_HASH_SIZE;
 
-	dprintk("nfsd: allocating %d readahead buffers.\n", cache_size);
+	dprintk("nfsd: allocating %d file handle cache buffers.\n", cache_size);
 
-	for (i = 0; i < RAPARM_HASH_SIZE; i++) {
-		spin_lock_init(&raparm_hash[i].pb_lock);
+	for (i = 0; i < FHPARM_HASH_SIZE; i++) {
+		spin_lock_init(&fhcache_hash[i].pb_lock);
 
-		raparm = &raparm_hash[i].pb_head;
+		fhcache = &fhcache_hash[i].pb_head;
 		for (j = 0; j < nperbucket; j++) {
-			*raparm = kzalloc(sizeof(struct raparms), GFP_KERNEL);
-			if (!*raparm)
+			*fhcache = kzalloc(sizeof(struct fhcache), GFP_KERNEL);
+			if (!*fhcache) {
+				dprintk("nfsd: kmalloc failed, freeing file cache buffers\n");
 				goto out_nomem;
-			raparm = &(*raparm)->p_next;
+			}
+			INIT_LIST_HEAD(&(*fhcache)->p_list);
+			fhcache = &(*fhcache)->p_next;
 		}
-		*raparm = NULL;
+		*fhcache = NULL;
 	}
 
 	nfsdstats.ra_size = cache_size;
+
+	INIT_LIST_HEAD(&nfsd_daemon_list);
+	k_nfsd_task = kthread_run(k_nfsd_thread, NULL, "nfsd_cacher");
+
+	if (IS_ERR(k_nfsd_task)) {
+		printk(KERN_ERR "%s: unable to create kernel thread: %ld\n",
+		       __FUNCTION__, PTR_ERR(k_nfsd_task));
+		goto out_nomem;
+	}
+
 	return 0;
 
 out_nomem:
-	dprintk("nfsd: kmalloc failed, freeing readahead buffers\n");
 	nfsd_racache_shutdown();
 	return -ENOMEM;
 }
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux