Dang, that's a lot of inlines... AFAICS, approx half of fs/nfs/fscache.h should really be moved into fscache.c. Otherwise, this looks a lot less intrusive than previous patches. See inlined comments. On Thu, 2007-08-09 at 17:05 +0100, David Howells wrote: > The attached patch makes it possible for the NFS filesystem to make use of the > network filesystem local caching service (FS-Cache). > > To be able to use this, an updated mount program is required. This can be > obtained from: > > http://people.redhat.com/steved/cachefs/util-linux/ > > To mount an NFS filesystem to use caching, add an "fsc" option to the mount: > > mount warthog:/ /a -o fsc > > Signed-Off-By: David Howells <dhowells@xxxxxxxxxx> > --- > > fs/Kconfig | 8 + > fs/nfs/Makefile | 1 > fs/nfs/client.c | 11 + > fs/nfs/file.c | 38 +++- > fs/nfs/fscache.c | 303 +++++++++++++++++++++++++++++ > fs/nfs/fscache.h | 464 ++++++++++++++++++++++++++++++++++++++++++++ > fs/nfs/inode.c | 16 ++ > fs/nfs/internal.h | 8 + > fs/nfs/read.c | 27 ++- > fs/nfs/super.c | 1 > fs/nfs/sysctl.c | 43 ++++ > fs/nfs/write.c | 3 > include/linux/nfs4_mount.h | 3 > include/linux/nfs_fs.h | 6 + > include/linux/nfs_fs_sb.h | 5 > include/linux/nfs_mount.h | 3 > 16 files changed, 931 insertions(+), 9 deletions(-) > > diff --git a/fs/Kconfig b/fs/Kconfig > index 7feb4cb..76d5d16 100644 > --- a/fs/Kconfig > +++ b/fs/Kconfig > @@ -1600,6 +1600,14 @@ config NFS_V4 > > If unsure, say N. > > +config NFS_FSCACHE > + bool "Provide NFS client caching support (EXPERIMENTAL)" > + depends on EXPERIMENTAL > + depends on NFS_FS=m && FSCACHE || NFS_FS=y && FSCACHE=y > + help > + Say Y here if you want NFS data to be cached locally on disc through > + the general filesystem cache manager > + > config NFS_DIRECTIO > bool "Allow direct I/O on NFS files" > depends on NFS_FS > diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile > index b55cb23..c9e7c43 100644 > --- a/fs/nfs/Makefile > +++ b/fs/nfs/Makefile > @@ -16,4 +16,5 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ > nfs4namespace.o > nfs-$(CONFIG_NFS_DIRECTIO) += direct.o > nfs-$(CONFIG_SYSCTL) += sysctl.o > +nfs-$(CONFIG_NFS_FSCACHE) += fscache.o > nfs-objs := $(nfs-y) > diff --git a/fs/nfs/client.c b/fs/nfs/client.c > index a49f9fe..7be7807 100644 > --- a/fs/nfs/client.c > +++ b/fs/nfs/client.c > @@ -137,6 +137,8 @@ static struct nfs_client *nfs_alloc_client(const char *hostname, > clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; > #endif > > + nfs_fscache_get_client_cookie(clp); > + > return clp; > > error_3: > @@ -168,6 +170,8 @@ static void nfs_free_client(struct nfs_client *clp) > > nfs4_shutdown_client(clp); > > + nfs_fscache_release_client_cookie(clp); > + > /* -EIO all pending I/O */ > if (!IS_ERR(clp->cl_rpcclient)) > rpc_shutdown_client(clp->cl_rpcclient); > @@ -1308,7 +1312,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) > > /* display header on line 1 */ > if (v == &nfs_volume_list) { > - seq_puts(m, "NV SERVER PORT DEV FSID\n"); > + seq_puts(m, "NV SERVER PORT DEV FSID FSC\n"); > return 0; > } > /* display one transport per line on subsequent lines */ > @@ -1322,12 +1326,13 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) > (unsigned long long) server->fsid.major, > (unsigned long long) server->fsid.minor); > > - seq_printf(m, "v%d %02x%02x%02x%02x %4hx %-7s %-17s\n", > + seq_printf(m, "v%d %02x%02x%02x%02x %4hx %-7s %-17s %s\n", > clp->cl_nfsversion, > NIPQUAD(clp->cl_addr.sin_addr), > ntohs(clp->cl_addr.sin_port), > dev, > - fsid); > + fsid, > + nfs_server_fscache_state(server)); Please send these changes as a separate patch: they change an existing interface. > > return 0; > } > diff --git a/fs/nfs/file.c b/fs/nfs/file.c > index c87dc71..dfd36e0 100644 > --- a/fs/nfs/file.c > +++ b/fs/nfs/file.c > @@ -34,6 +34,7 @@ > > #include "delegation.h" > #include "iostat.h" > +#include "internal.h" > > #define NFSDBG_FACILITY NFSDBG_FILE > > @@ -259,6 +260,10 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) > status = nfs_revalidate_mapping(inode, file->f_mapping); > if (!status) > status = generic_file_mmap(file, vma); > + > + if (status == 0) > + nfs_fscache_install_vm_ops(inode, vma); > + > return status; Please note that in 2.6.24 the NFS client will be gaining a page_mkwrite() method in order to fix the remaining mmap() races. AFAICS, it should be fairly trivial to merge with your nfs_file_page_mkwrite. > } > > @@ -311,22 +316,51 @@ static int nfs_commit_write(struct file *file, struct page *page, unsigned offse > return status; > } > > +/* > + * partially or wholly invalidate a page > + * - release the private state associated with a page if undergoing complete > + * page invalidation > + * - caller holds page lock > + */ > static void nfs_invalidate_page(struct page *page, unsigned long offset) > { > if (offset != 0) > return; > /* Cancel any unstarted writes on this page */ > nfs_wb_page_priority(page->mapping->host, page, FLUSH_INVALIDATE); > + > + nfs_fscache_invalidate_page(page, page->mapping->host, offset); > + > + /* we can do this here as the bits are only set with the page lock > + * held, and our caller is holding that */ > + if (!page->private) > + ClearPagePrivate(page); Why would PG_private be set at this point? In any case, please send this and other PagePrivate changes as a separate patch. Any changes to the PagePrivate semantics must be made easy to debug. > } > > +/* > + * release the private state associated with a page, if the page isn't busy > + * - caller holds page lock > + * - return true (may release) or false (may not) > + */ > static int nfs_release_page(struct page *page, gfp_t gfp) > { > - /* If PagePrivate() is set, then the page is not freeable */ > - return 0; > + /* > + * Avoid deadlock on nfs_wait_on_request(). > + */ > + if (!(gfp & __GFP_FS)) > + return 0; We got rid of this. There should be no race with nfs_wait_on_request(). Either fix up the comment, or remove the test. > + > + if (!nfs_fscache_release_page(page, gfp)) > + return 0; This looks _very_ dubious. Why shouldn't I be able to discard a page just because fscache isn't done writing it out? I may have very good reasons to do so. > + BUG_ON(page->private != 0); > + ClearPagePrivate(page); > + return 1; > } > > static int nfs_launder_page(struct page *page) > { > + nfs_fscache_invalidate_page(page, page->mapping->host, 0); Why? The function of launder_page() is to clean the page, not to truncate it. > return nfs_wb_page(page->mapping->host, page); > } > > diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c > new file mode 100644 > index 0000000..2b2785a > --- /dev/null > +++ b/fs/nfs/fscache.c > @@ -0,0 +1,303 @@ > +/* fscache.c: NFS filesystem cache interface > + * > + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. > + * Written by David Howells (dhowells@xxxxxxxxxx) > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + */ > + > + > +#include <linux/init.h> > +#include <linux/kernel.h> > +#include <linux/sched.h> > +#include <linux/mm.h> > +#include <linux/nfs_fs.h> > +#include <linux/nfs_fs_sb.h> > +#include <linux/in6.h> > + > +#include "internal.h" > + > +/* > + * Sysctl variables > + */ > +atomic_t nfs_fscache_to_pages; > +atomic_t nfs_fscache_from_pages; > +atomic_t nfs_fscache_uncache_page; > +int nfs_fscache_from_error; > +int nfs_fscache_to_error; > + > +#define NFSDBG_FACILITY NFSDBG_FSCACHE > + > +/* the auxiliary data in the cache (used for coherency management) */ > +struct nfs_fh_auxdata { > + struct timespec i_mtime; > + struct timespec i_ctime; > + loff_t i_size; > +}; > + > +static struct fscache_netfs_operations nfs_cache_ops = { > +}; > + > +struct fscache_netfs nfs_cache_netfs = { > + .name = "nfs", > + .version = 0, > + .ops = &nfs_cache_ops, > +}; > + > +static const uint8_t nfs_cache_ipv6_wrapper_for_ipv4[12] = { > + [0 ... 9] = 0x00, > + [10 ... 11] = 0xff > +}; > + > +struct nfs_server_key { > + uint16_t nfsversion; > + uint16_t port; > + union { > + struct { > + uint8_t ipv6wrapper[12]; > + struct in_addr addr; > + } ipv4_addr; > + struct in6_addr ipv6_addr; > + }; > +}; > + > +static uint16_t nfs_server_get_key(const void *cookie_netfs_data, > + void *buffer, uint16_t bufmax) > +{ > + const struct nfs_client *clp = cookie_netfs_data; > + struct nfs_server_key *key = buffer; > + uint16_t len = 0; > + > + key->nfsversion = clp->cl_nfsversion; > + > + switch (clp->cl_addr.sin_family) { > + case AF_INET: > + key->port = clp->cl_addr.sin_port; > + > + memcpy(&key->ipv4_addr.ipv6wrapper, > + &nfs_cache_ipv6_wrapper_for_ipv4, > + sizeof(key->ipv4_addr.ipv6wrapper)); > + memcpy(&key->ipv4_addr.addr, > + &clp->cl_addr.sin_addr, > + sizeof(key->ipv4_addr.addr)); > + len = sizeof(struct nfs_server_key); > + break; > + > + case AF_INET6: > + key->port = clp->cl_addr.sin_port; > + > + memcpy(&key->ipv6_addr, > + &clp->cl_addr.sin_addr, > + sizeof(key->ipv6_addr)); > + len = sizeof(struct nfs_server_key); > + break; > + > + default: > + len = 0; > + printk(KERN_WARNING "NFS: Unknown network family '%d'\n", > + clp->cl_addr.sin_family); > + break; > + } > + > + return len; > +} > + > +/* > + * the root index for the filesystem is defined by nfsd IP address and ports > + */ > +struct fscache_cookie_def nfs_cache_server_index_def = { > + .name = "NFS.servers", > + .type = FSCACHE_COOKIE_TYPE_INDEX, > + .get_key = nfs_server_get_key, > +}; > + > +static uint16_t nfs_fh_get_key(const void *cookie_netfs_data, > + void *buffer, uint16_t bufmax) > +{ > + const struct nfs_inode *nfsi = cookie_netfs_data; > + uint16_t nsize; > + > + /* set the file handle */ > + nsize = nfsi->fh.size; > + memcpy(buffer, nfsi->fh.data, nsize); > + return nsize; > +} > + > +/* > + * get an extra reference on a read context > + * - this function can be absent if the completion function doesn't > + * require a context > + */ > +static void nfs_fh_get_context(void *cookie_netfs_data, void *context) > +{ > + get_nfs_open_context(context); > +} > + > +/* > + * release an extra reference on a read context > + * - this function can be absent if the completion function doesn't > + * require a context > + */ > +static void nfs_fh_put_context(void *cookie_netfs_data, void *context) > +{ > + if (context) > + put_nfs_open_context(context); > +} > + > +/* > + * indication the cookie is no longer uncached > + * - this function is called when the backing store currently caching a cookie > + * is removed > + * - the netfs should use this to clean up any markers indicating cached pages > + * - this is mandatory for any object that may have data > + */ > +static void nfs_fh_now_uncached(void *cookie_netfs_data) > +{ > + struct nfs_inode *nfsi = cookie_netfs_data; > + struct pagevec pvec; > + pgoff_t first; > + int loop, nr_pages; > + > + pagevec_init(&pvec, 0); > + first = 0; > + > + dprintk("NFS: nfs_fh_now_uncached: nfs_inode 0x%p\n", nfsi); > + > + for (;;) { > + /* grab a bunch of pages to clean */ > + nr_pages = pagevec_lookup(&pvec, > + nfsi->vfs_inode.i_mapping, > + first, > + PAGEVEC_SIZE - pagevec_count(&pvec)); > + if (!nr_pages) > + break; > + > + for (loop = 0; loop < nr_pages; loop++) > + ClearPageFsCache(pvec.pages[loop]); > + > + first = pvec.pages[nr_pages - 1]->index + 1; > + > + pvec.nr = nr_pages; > + pagevec_release(&pvec); > + cond_resched(); > + } > +} > + > +/* > + * get certain file attributes from the netfs data > + * - this function can be absent for an index > + * - not permitted to return an error > + * - the netfs data from the cookie being used as the source is > + * presented > + */ > +static void nfs_fh_get_attr(const void *cookie_netfs_data, uint64_t *size) > +{ > + const struct nfs_inode *nfsi = cookie_netfs_data; > + > + *size = nfsi->vfs_inode.i_size; > +} > + > +/* > + * get the auxilliary data from netfs data > + * - this function can be absent if the index carries no state data > + * - should store the auxilliary data in the buffer > + * - should return the amount of amount stored > + * - not permitted to return an error > + * - the netfs data from the cookie being used as the source is > + * presented > + */ > +static uint16_t nfs_fh_get_aux(const void *cookie_netfs_data, > + void *buffer, uint16_t bufmax) > +{ > + struct nfs_fh_auxdata auxdata; > + const struct nfs_inode *nfsi = cookie_netfs_data; > + > + auxdata.i_size = nfsi->vfs_inode.i_size; > + auxdata.i_mtime = nfsi->vfs_inode.i_mtime; > + auxdata.i_ctime = nfsi->vfs_inode.i_ctime; > + > + if (bufmax > sizeof(auxdata)) > + bufmax = sizeof(auxdata); > + > + memcpy(buffer, &auxdata, bufmax); > + return bufmax; > +} > + > +/* > + * consult the netfs about the state of an object > + * - this function can be absent if the index carries no state data > + * - the netfs data from the cookie being used as the target is > + * presented, as is the auxilliary data > + */ > +static fscache_checkaux_t nfs_fh_check_aux(void *cookie_netfs_data, > + const void *data, uint16_t datalen) > +{ > + struct nfs_fh_auxdata auxdata; > + struct nfs_inode *nfsi = cookie_netfs_data; > + > + if (datalen > sizeof(auxdata)) > + return FSCACHE_CHECKAUX_OBSOLETE; > + > + auxdata.i_size = nfsi->vfs_inode.i_size; > + auxdata.i_mtime = nfsi->vfs_inode.i_mtime; > + auxdata.i_ctime = nfsi->vfs_inode.i_ctime; > + > + if (memcmp(data, &auxdata, datalen) != 0) > + return FSCACHE_CHECKAUX_OBSOLETE; > + > + return FSCACHE_CHECKAUX_OKAY; > +} > + > +/* > + * the primary index for each server is simply made up of a series of NFS file > + * handles > + */ > +struct fscache_cookie_def nfs_cache_fh_index_def = { > + .name = "NFS.fh", > + .type = FSCACHE_COOKIE_TYPE_DATAFILE, > + .get_key = nfs_fh_get_key, > + .get_attr = nfs_fh_get_attr, > + .get_aux = nfs_fh_get_aux, > + .check_aux = nfs_fh_check_aux, > + .get_context = nfs_fh_get_context, > + .put_context = nfs_fh_put_context, > + .now_uncached = nfs_fh_now_uncached, > +}; > + > +static int nfs_file_page_mkwrite(struct vm_area_struct *vma, struct page *page) > +{ > + wait_on_page_fscache_write(page); > + return 0; > +} > + > +struct vm_operations_struct nfs_fs_vm_operations = { > + .fault = filemap_fault, > + .page_mkwrite = nfs_file_page_mkwrite, > +}; > + > +/* > + * handle completion of a page being read from the cache > + * - called in process (keventd) context > + */ > +void nfs_readpage_from_fscache_complete(struct page *page, > + void *context, > + int error) > +{ > + dfprintk(FSCACHE, > + "NFS: readpage_from_fscache_complete (0x%p/0x%p/%d)\n", > + page, context, error); > + > + /* if the read completes with an error, we just unlock the page and let > + * the VM reissue the readpage */ > + if (!error) { > + SetPageUptodate(page); > + unlock_page(page); > + } else { > + error = nfs_readpage_async(context, page->mapping->host, page); > + if (error) > + unlock_page(page); > + } > +} > diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h > new file mode 100644 > index 0000000..152309c > --- /dev/null > +++ b/fs/nfs/fscache.h > @@ -0,0 +1,464 @@ > +/* fscache.h: NFS filesystem cache interface definitions > + * > + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. > + * Written by David Howells (dhowells@xxxxxxxxxx) > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + */ > + > +#ifndef _NFS_FSCACHE_H > +#define _NFS_FSCACHE_H > + > +#include <linux/nfs_fs.h> > +#include <linux/nfs_mount.h> > +#include <linux/nfs4_mount.h> > + > +#ifdef CONFIG_NFS_FSCACHE > +#include <linux/fscache.h> > + > +extern struct fscache_netfs nfs_cache_netfs; > +extern struct fscache_cookie_def nfs_cache_server_index_def; > +extern struct fscache_cookie_def nfs_cache_fh_index_def; > +extern struct vm_operations_struct nfs_fs_vm_operations; > + > +extern void nfs_invalidatepage(struct page *, unsigned long); > +extern int nfs_releasepage(struct page *, gfp_t); > + > +extern atomic_t nfs_fscache_to_pages; > +extern atomic_t nfs_fscache_from_pages; > +extern atomic_t nfs_fscache_uncache_page; > +extern int nfs_fscache_from_error; > +extern int nfs_fscache_to_error; > + > +/* > + * register NFS for caching > + */ > +static inline int nfs_fscache_register(void) > +{ > + return fscache_register_netfs(&nfs_cache_netfs); > +} > + > +/* > + * unregister NFS for caching > + */ > +static inline void nfs_fscache_unregister(void) > +{ > + fscache_unregister_netfs(&nfs_cache_netfs); > +} > + > +/* > + * get the per-client index cookie for an NFS client if the appropriate mount > + * flag was set > + * - we always try and get an index cookie for the client, but get filehandle > + * cookies on a per-superblock basis, depending on the mount flags > + */ > +static inline void nfs_fscache_get_client_cookie(struct nfs_client *clp) > +{ > + /* create a cache index for looking up filehandles */ > + clp->fscache = fscache_acquire_cookie(nfs_cache_netfs.primary_index, > + &nfs_cache_server_index_def, > + clp); > + dfprintk(FSCACHE,"NFS: get client cookie (0x%p/0x%p)\n", > + clp, clp->fscache); > +} > + > +/* > + * dispose of a per-client cookie > + */ > +static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) > +{ > + dfprintk(FSCACHE,"NFS: releasing client cookie (0x%p/0x%p)\n", > + clp, clp->fscache); > + > + fscache_relinquish_cookie(clp->fscache, 0); > + clp->fscache = NULL; > +} > + > +/* > + * indicate the client caching state as readable text > + */ > +static inline const char *nfs_server_fscache_state(struct nfs_server *server) > +{ > + if (server->nfs_client->fscache && (server->flags & NFS_MOUNT_FSCACHE)) > + return "yes"; > + return "no "; > +} > + > +/* > + * get the per-filehandle cookie for an NFS inode > + */ > +static inline void nfs_fscache_init_fh_cookie(struct inode *inode) > +{ > + NFS_I(inode)->fscache = NULL; > + if (S_ISREG(inode->i_mode)) > + set_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags); > +} > + > +/* > + * get the per-filehandle cookie for an NFS inode > + */ > +static inline void nfs_fscache_enable_fh_cookie(struct inode *inode) > +{ > + struct super_block *sb = inode->i_sb; > + struct nfs_inode *nfsi = NFS_I(inode); > + > + if (nfsi->fscache || !NFS_FSCACHE(inode)) > + return; > + > + if ((NFS_SB(sb)->flags & NFS_MOUNT_FSCACHE)) { > + nfsi->fscache = fscache_acquire_cookie( > + NFS_SB(sb)->nfs_client->fscache, > + &nfs_cache_fh_index_def, > + nfsi); > + > + dfprintk(FSCACHE, "NFS: get FH cookie (0x%p/0x%p/0x%p)\n", > + sb, nfsi, nfsi->fscache); > + } > +} > + > +/* > + * change the filesize associated with a per-filehandle cookie > + */ > +static inline void nfs_fscache_attr_changed(struct inode *inode) > +{ > + fscache_attr_changed(NFS_I(inode)->fscache); > +} > + > +/* > + * replace a per-filehandle cookie due to revalidation detecting a file having > + * changed on the server > + */ > +static inline void nfs_fscache_renew_fh_cookie(struct inode *inode) > +{ > + struct nfs_inode *nfsi = NFS_I(inode); > + struct nfs_server *server = NFS_SERVER(inode); > + struct fscache_cookie *old = nfsi->fscache; > + > + if (nfsi->fscache) { > + /* retire the current fscache cache and get a new one */ > + fscache_relinquish_cookie(nfsi->fscache, 1); > + > + nfsi->fscache = fscache_acquire_cookie( > + server->nfs_client->fscache, > + &nfs_cache_fh_index_def, > + nfsi); > + > + dfprintk(FSCACHE, > + "NFS: revalidation new cookie (0x%p/0x%p/0x%p/0x%p)\n", > + server, nfsi, old, nfsi->fscache); > + } > +} > + > +/* > + * release a per-filehandle cookie > + */ > +static inline void nfs_fscache_release_fh_cookie(struct inode *inode) > +{ > + struct nfs_inode *nfsi = NFS_I(inode); > + > + dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n", > + nfsi, nfsi->fscache); > + > + fscache_relinquish_cookie(nfsi->fscache, 0); > + nfsi->fscache = NULL; > +} > + > +/* > + * retire a per-filehandle cookie, destroying the data attached to it > + */ > +static inline void nfs_fscache_zap_fh_cookie(struct inode *inode) > +{ > + struct nfs_inode *nfsi = NFS_I(inode); > + > + dfprintk(FSCACHE,"NFS: zapping cookie (0x%p/0x%p)\n", > + nfsi, nfsi->fscache); > + > + fscache_relinquish_cookie(nfsi->fscache, 1); > + nfsi->fscache = NULL; > +} > + > +/* > + * turn off the cache with regard to a filehandle cookie if opened for writing, > + * invalidating all the pages in the page cache relating to the associated > + * inode to clear the per-page caching > + */ > +static inline void nfs_fscache_disable_fh_cookie(struct inode *inode) > +{ > + clear_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags); > + > + if (NFS_I(inode)->fscache) { > + dfprintk(FSCACHE, > + "NFS: nfsi 0x%p turning cache off\n", NFS_I(inode)); > + > + /* Need to invalided any mapped pages that were read in before > + * turning off the cache. > + */ > + if (inode->i_mapping && inode->i_mapping->nrpages) > + invalidate_inode_pages2(inode->i_mapping); > + > + nfs_fscache_zap_fh_cookie(inode); > + } > +} > + > +/* > + * decide if we should enable or disable the FS cache for this inode > + * - for now, only regular files that are open read-only will be able to use > + * the cache > + */ > +static inline void nfs_fscache_set_fh_cookie(struct inode *inode, > + struct file *filp) > +{ > + if (NFS_FSCACHE(inode)) { > + if ((filp->f_flags & O_ACCMODE) != O_RDONLY) > + nfs_fscache_disable_fh_cookie(inode); > + else > + nfs_fscache_enable_fh_cookie(inode); > + } > +} > + > +/* > + * install the VM ops for mmap() of an NFS file so that we can hold up writes > + * to pages on shared writable mappings until the store to the cache is > + * complete > + */ > +static inline void nfs_fscache_install_vm_ops(struct inode *inode, > + struct vm_area_struct *vma) > +{ > + if (NFS_I(inode)->fscache) > + vma->vm_ops = &nfs_fs_vm_operations; > +} > + > +/* > + * release the caching state associated with a page, if the page isn't busy > + * interacting with the cache > + * - returns true (can release page) or false (page busy) > + */ > +static inline int nfs_fscache_release_page(struct page *page, gfp_t gfp) > +{ > + if (PageFsCacheWrite(page)) { > + if (!(gfp & __GFP_WAIT)) > + return 0; > + wait_on_page_fscache_write(page); > + } > + > + if (PageFsCache(page)) { > + struct nfs_inode *nfsi = NFS_I(page->mapping->host); > + > + BUG_ON(!nfsi->fscache); > + > + dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n", > + nfsi->fscache, page, nfsi); > + > + fscache_uncache_page(nfsi->fscache, page); > + atomic_inc(&nfs_fscache_uncache_page); > + ClearPageFsCache(page); > + } > + > + return 1; > +} > + > +/* > + * release the caching state associated with a page if undergoing complete page > + * invalidation > + */ > +static inline void nfs_fscache_invalidate_page(struct page *page, > + struct inode *inode, > + unsigned long offset) > +{ > + struct nfs_inode *nfsi = NFS_I(page->mapping->host); > + > + if (PageFsCache(page)) { > + BUG_ON(!nfsi->fscache); > + > + dfprintk(FSCACHE, > + "NFS: fscache invalidatepage (0x%p/0x%p/0x%p)\n", > + nfsi->fscache, page, nfsi); > + > + wait_on_page_fscache_write(page); > + > + if (offset == 0) { > + BUG_ON(!PageLocked(page)); > + if (!PageWriteback(page)) { > + fscache_uncache_page(nfsi->fscache, page); > + atomic_inc(&nfs_fscache_uncache_page); > + ClearPageFsCache(page); > + } > + } > + } > +} > + > +/* > + * store a newly fetched page in fscache > + */ > +static inline void nfs_readpage_to_fscache(struct inode *inode, > + struct page *page, > + int sync) > +{ > + int ret; > + > + if (PageFsCache(page)) { > + dfprintk(FSCACHE, > + "NFS: " > + "readpage_to_fscache(fsc:%p/p:%p(i:%lx f:%lx)/%d)\n", > + NFS_I(inode)->fscache, page, page->index, page->flags, > + sync); > + > + ret = fscache_write_page(NFS_I(inode)->fscache, page, > + GFP_KERNEL); > + dfprintk(FSCACHE, > + "NFS: " > + "readpage_to_fscache: p:%p(i:%lu f:%lx) ret %d\n", > + page, page->index, page->flags, ret); > + > + if (ret != 0) { > + fscache_uncache_page(NFS_I(inode)->fscache, page); > + atomic_inc(&nfs_fscache_uncache_page); > + ClearPageFsCache(page); > + nfs_fscache_to_error = ret; > + } else { > + atomic_inc(&nfs_fscache_to_pages); > + } > + } > +} > + > +/* > + * retrieve a page from fscache > + */ > +extern void nfs_readpage_from_fscache_complete(struct page *, void *, int); > + > +static inline > +int nfs_readpage_from_fscache(struct nfs_open_context *ctx, > + struct inode *inode, > + struct page *page) > +{ > + int ret; > + > + if (!NFS_I(inode)->fscache) > + return 1; > + > + dfprintk(FSCACHE, > + "NFS: readpage_from_fscache(fsc:%p/p:%p(i:%lx f:%lx)/0x%p)\n", > + NFS_I(inode)->fscache, page, page->index, page->flags, inode); > + > + ret = fscache_read_or_alloc_page(NFS_I(inode)->fscache, > + page, > + nfs_readpage_from_fscache_complete, > + ctx, > + GFP_KERNEL); > + > + switch (ret) { > + case 0: /* read BIO submitted (page in fscache) */ > + dfprintk(FSCACHE, > + "NFS: readpage_from_fscache: BIO submitted\n"); > + atomic_inc(&nfs_fscache_from_pages); > + return ret; > + > + case -ENOBUFS: /* inode not in cache */ > + case -ENODATA: /* page not in cache */ > + dfprintk(FSCACHE, > + "NFS: readpage_from_fscache error %d\n", ret); > + return 1; > + > + default: > + dfprintk(FSCACHE, "NFS: readpage_from_fscache %d\n", ret); > + nfs_fscache_from_error = ret; > + } > + return ret; > +} > + > +/* > + * retrieve a set of pages from fscache > + */ > +static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx, > + struct inode *inode, > + struct address_space *mapping, > + struct list_head *pages, > + unsigned *nr_pages) > +{ > + int ret, npages = *nr_pages; > + > + if (!NFS_I(inode)->fscache) > + return 1; > + > + dfprintk(FSCACHE, > + "NFS: nfs_getpages_from_fscache (0x%p/%u/0x%p)\n", > + NFS_I(inode)->fscache, *nr_pages, inode); > + > + ret = fscache_read_or_alloc_pages(NFS_I(inode)->fscache, > + mapping, pages, nr_pages, > + nfs_readpage_from_fscache_complete, > + ctx, > + mapping_gfp_mask(mapping)); > + > + > + switch (ret) { > + case 0: /* read BIO submitted (page in fscache) */ > + BUG_ON(!list_empty(pages)); > + BUG_ON(*nr_pages != 0); > + dfprintk(FSCACHE, > + "NFS: nfs_getpages_from_fscache: BIO submitted\n"); > + > + atomic_add(npages, &nfs_fscache_from_pages); > + return ret; > + > + case -ENOBUFS: /* inode not in cache */ > + case -ENODATA: /* page not in cache */ > + dfprintk(FSCACHE, > + "NFS: nfs_getpages_from_fscache: no page: %d\n", ret); > + return 1; > + > + default: > + dfprintk(FSCACHE, > + "NFS: nfs_getpages_from_fscache: ret %d\n", ret); > + nfs_fscache_from_error = ret; > + } > + > + return ret; > +} > + > +#else /* CONFIG_NFS_FSCACHE */ > +static inline int nfs_fscache_register(void) { return 0; } > +static inline void nfs_fscache_unregister(void) {} > +static inline void nfs_fscache_get_client_cookie(struct nfs_client *clp) {} > +static inline void nfs4_fscache_get_client_cookie(struct nfs_client *clp) {} > +static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {} > +static inline const char *nfs_server_fscache_state(struct nfs_server *server) { return "no "; } > + > +static inline void nfs_fscache_init_fh_cookie(struct inode *inode) {} > +static inline void nfs_fscache_enable_fh_cookie(struct inode *inode) {} > +static inline void nfs_fscache_attr_changed(struct inode *inode) {} > +static inline void nfs_fscache_release_fh_cookie(struct inode *inode) {} > +static inline void nfs_fscache_zap_fh_cookie(struct inode *inode) {} > +static inline void nfs_fscache_renew_fh_cookie(struct inode *inode) {} > +static inline void nfs_fscache_disable_fh_cookie(struct inode *inode) {} > +static inline void nfs_fscache_set_fh_cookie(struct inode *inode, struct file *filp) {} > +static inline void nfs_fscache_install_vm_ops(struct inode *inode, struct vm_area_struct *vma) {} > +static inline int nfs_fscache_release_page(struct page *page, gfp_t gfp) > +{ > + return 1; /* True: may release page */ > +} > +static inline void nfs_fscache_invalidate_page(struct page *page, > + struct inode *inode, > + unsigned long offset) > +{ > +} > +static inline void nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync) {} > +static inline int nfs_readpage_from_fscache(struct nfs_open_context *ctx, > + struct inode *inode, struct page *page) > +{ > + return -ENOBUFS; > +} > +static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx, > + struct inode *inode, > + struct address_space *mapping, > + struct list_head *pages, > + unsigned *nr_pages) > +{ > + return -ENOBUFS; > +} > + > +#endif /* CONFIG_NFS_FSCACHE */ > +#endif /* _NFS_FSCACHE_H */ > diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c > index bca6cdc..8b7a39c 100644 > --- a/fs/nfs/inode.c > +++ b/fs/nfs/inode.c > @@ -88,6 +88,7 @@ void nfs_clear_inode(struct inode *inode) > BUG_ON(atomic_read(&NFS_I(inode)->data_updates) != 0); > nfs_zap_acl_cache(inode); > nfs_access_zap_cache(inode); > + nfs_fscache_release_fh_cookie(inode); > } > > /** > @@ -220,6 +221,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) > }; > struct inode *inode = ERR_PTR(-ENOENT); > unsigned long hash; > + int maycache = 1; > > if ((fattr->valid & NFS_ATTR_FATTR) == 0) > goto out_no_inode; > @@ -269,6 +271,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) > else > inode->i_op = &nfs_mountpoint_inode_operations; > inode->i_fop = NULL; > + maycache = 0; > } > } else if (S_ISLNK(inode->i_mode)) > inode->i_op = &nfs_symlink_inode_operations; > @@ -300,6 +303,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) > memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); > nfsi->access_cache = RB_ROOT; > > + nfs_fscache_init_fh_cookie(inode); > + > unlock_new_inode(inode); > } else > nfs_refresh_inode(inode, fattr); > @@ -573,6 +578,7 @@ int nfs_open(struct inode *inode, struct file *filp) > ctx->mode = filp->f_mode; > nfs_file_set_open_context(filp, ctx); > put_nfs_open_context(ctx); > + nfs_fscache_set_fh_cookie(inode, filp); > return 0; > } > > @@ -698,6 +704,7 @@ static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_spa > } > spin_unlock(&inode->i_lock); > nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); > + nfs_fscache_renew_fh_cookie(inode); > dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", > inode->i_sb->s_id, (long long)NFS_FILEID(inode)); > return 0; > @@ -1000,11 +1007,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) > if (data_stable) { > inode->i_size = new_isize; > invalid |= NFS_INO_INVALID_DATA; > + nfs_fscache_attr_changed(inode); Can't fscache_attr_changed() call kmalloc(GFP_KERNEL)? You are in a spinlocked context here. > } > invalid |= NFS_INO_INVALID_ATTR; > } else if (new_isize > cur_isize) { > inode->i_size = new_isize; > invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; > + nfs_fscache_attr_changed(inode); > } > nfsi->cache_change_attribute = now; > dprintk("NFS: isize change on server for file %s/%ld\n", > @@ -1191,6 +1200,10 @@ static int __init init_nfs_fs(void) > { > int err; > > + err = nfs_fscache_register(); > + if (err < 0) > + goto out6; > + > err = nfs_fs_proc_init(); > if (err) > goto out5; > @@ -1237,6 +1250,8 @@ out3: > out4: > nfs_fs_proc_exit(); > out5: > + nfs_fscache_unregister(); > +out6: > return err; > } > > @@ -1247,6 +1262,7 @@ static void __exit exit_nfs_fs(void) > nfs_destroy_readpagecache(); > nfs_destroy_inodecache(); > nfs_destroy_nfspagecache(); > + nfs_fscache_unregister(); > #ifdef CONFIG_PROC_FS > rpc_proc_unregister("nfs"); > #endif > diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h > index 76cf55d..fc75251 100644 > --- a/fs/nfs/internal.h > +++ b/fs/nfs/internal.h > @@ -27,6 +27,11 @@ struct nfs_clone_mount { > rpc_authflavor_t authflavor; > }; > > +/* > + * include filesystem caching stuff here > + */ > +#include "fscache.h" Why? internal.h has no dependencies on anything from fscache.h. > + > /* client.c */ > extern struct rpc_program nfs_program; > > @@ -149,6 +154,9 @@ extern int nfs4_path_walk(struct nfs_server *server, > const char *path); > #endif > > +/* read.c */ > +extern int nfs_readpage_async(struct nfs_open_context *, struct inode *, struct page *); > + Please put this in nfs_fs.h together with the other NFS read exports. > /* > * Determine the device name as a string > */ > diff --git a/fs/nfs/read.c b/fs/nfs/read.c > index 19e0563..ad6f516 100644 > --- a/fs/nfs/read.c > +++ b/fs/nfs/read.c > @@ -18,6 +18,7 @@ > #include <linux/sunrpc/clnt.h> > #include <linux/nfs_fs.h> > #include <linux/nfs_page.h> > +#include <linux/nfs_mount.h> > #include <linux/smp_lock.h> > > #include <asm/system.h> > @@ -114,8 +115,8 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) > } > } > > -static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, > - struct page *page) > +int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, > + struct page *page) > { > LIST_HEAD(one_request); > struct nfs_page *new; > @@ -142,6 +143,11 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, > > static void nfs_readpage_release(struct nfs_page *req) > { > + struct inode *d_inode = req->wb_context->path.dentry->d_inode; > + > + if (PageUptodate(req->wb_page)) > + nfs_readpage_to_fscache(d_inode, req->wb_page, 0); > + > unlock_page(req->wb_page); > > dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", > @@ -500,8 +506,15 @@ int nfs_readpage(struct file *file, struct page *page) > ctx = get_nfs_open_context((struct nfs_open_context *) > file->private_data); > > + if (!IS_SYNC(inode)) { > + error = nfs_readpage_from_fscache(ctx, inode, page); > + if (error == 0) > + goto out; > + } > + > error = nfs_readpage_async(ctx, inode, page); > > +out: > put_nfs_open_context(ctx); > return error; > out_unlock: > @@ -578,6 +591,15 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, > } else > desc.ctx = get_nfs_open_context((struct nfs_open_context *) > filp->private_data); > + > + /* attempt to read as many of the pages as possible from the cache > + * - this returns -ENOBUFS immediately if the cookie is negative > + */ > + ret = nfs_readpages_from_fscache(desc.ctx, inode, mapping, > + pages, &nr_pages); > + if (ret == 0) > + goto read_complete; /* all pages were read */ > + > if (rsize < PAGE_CACHE_SIZE) > nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); > else > @@ -588,6 +610,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, > nfs_pageio_complete(&pgio); > npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; > nfs_add_stats(inode, NFSIOS_READPAGES, npages); > +read_complete: > put_nfs_open_context(desc.ctx); > out: > return ret; > diff --git a/fs/nfs/super.c b/fs/nfs/super.c > index b2a851c..7a74677 100644 > --- a/fs/nfs/super.c > +++ b/fs/nfs/super.c > @@ -456,6 +456,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, > { NFS_MOUNT_NOACL, ",noacl", "" }, > { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" }, > { NFS_MOUNT_UNSHARED, ",nosharecache", ""}, > + { NFS_MOUNT_FSCACHE, ",fsc", "" }, > { 0, NULL, NULL } > }; > const struct proc_nfs_info *nfs_infop; > diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c > index b62481d..e522177 100644 > --- a/fs/nfs/sysctl.c > +++ b/fs/nfs/sysctl.c > @@ -14,6 +14,7 @@ > #include <linux/nfs_fs.h> > > #include "callback.h" > +#include "internal.h" > > static const int nfs_set_port_min = 0; > static const int nfs_set_port_max = 65535; > @@ -58,6 +59,48 @@ static ctl_table nfs_cb_sysctls[] = { > .mode = 0644, > .proc_handler = &proc_dointvec, > }, > +#ifdef CONFIG_NFS_FSCACHE > + { > + .ctl_name = CTL_UNNUMBERED, > + .procname = "fscache_from_error", > + .data = &nfs_fscache_from_error, > + .maxlen = sizeof(int), > + .mode = 0644, > + .proc_handler = &proc_dointvec, > + }, > + { > + .ctl_name = CTL_UNNUMBERED, > + .procname = "fscache_to_error", > + .data = &nfs_fscache_to_error, > + .maxlen = sizeof(int), > + .mode = 0644, > + .proc_handler = &proc_dointvec, > + }, > + { > + .ctl_name = CTL_UNNUMBERED, > + .procname = "fscache_uncache_page", > + .data = &nfs_fscache_uncache_page, > + .maxlen = sizeof(int), > + .mode = 0644, > + .proc_handler = &proc_dointvec, > + }, > + { > + .ctl_name = CTL_UNNUMBERED, > + .procname = "fscache_to_pages", > + .data = &nfs_fscache_to_pages, > + .maxlen = sizeof(int), > + .mode = 0644, > + .proc_handler = &proc_dointvec_minmax, > + }, > + { > + .ctl_name = CTL_UNNUMBERED, > + .procname = "fscache_from_pages", > + .data = &nfs_fscache_from_pages, > + .maxlen = sizeof(int), > + .mode = 0644, > + .proc_handler = &proc_dointvec, > + }, > +#endif > { .ctl_name = 0 } > }; > > diff --git a/fs/nfs/write.c b/fs/nfs/write.c > index ef97e0c..6576738 100644 > --- a/fs/nfs/write.c > +++ b/fs/nfs/write.c > @@ -147,6 +147,9 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c > return; > nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); > i_size_write(inode, end); > +#ifdef FSCACHE_WRITE_SUPPORT > + nfs_set_fscsize(NFS_SERVER(inode), NFS_I(inode), end); > +#endif I can't see how this belongs in this patch. > } > > /* A writeback failed: mark the page as bad, and invalidate the page cache */ > diff --git a/include/linux/nfs4_mount.h b/include/linux/nfs4_mount.h > index a0dcf66..40595d0 100644 > --- a/include/linux/nfs4_mount.h > +++ b/include/linux/nfs4_mount.h > @@ -66,6 +66,7 @@ struct nfs4_mount_data { > #define NFS4_MOUNT_NOAC 0x0020 /* 1 */ > #define NFS4_MOUNT_STRICTLOCK 0x1000 /* 1 */ > #define NFS4_MOUNT_UNSHARED 0x8000 /* 1 */ > -#define NFS4_MOUNT_FLAGMASK 0x9033 > +#define NFS4_MOUNT_FSCACHE 0x10000 /* 1 */ > +#define NFS4_MOUNT_FLAGMASK 0x19033 > > #endif > diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h > index 9ba4aec..d1d6192 100644 > --- a/include/linux/nfs_fs.h > +++ b/include/linux/nfs_fs.h > @@ -172,6 +172,9 @@ struct nfs_inode { > int delegation_state; > struct rw_semaphore rwsem; > #endif /* CONFIG_NFS_V4*/ > +#ifdef CONFIG_NFS_FSCACHE > + struct fscache_cookie *fscache; > +#endif > struct inode vfs_inode; > }; > > @@ -193,6 +196,7 @@ struct nfs_inode { > #define NFS_INO_ADVISE_RDPLUS (1) /* advise readdirplus */ > #define NFS_INO_STALE (2) /* possible stale inode */ > #define NFS_INO_ACL_LRU_SET (3) /* Inode is on the LRU list */ > +#define NFS_INO_FSCACHE (4) /* inode can be cached by FS-Cache */ > > static inline struct nfs_inode *NFS_I(struct inode *inode) > { > @@ -218,6 +222,7 @@ static inline struct nfs_inode *NFS_I(struct inode *inode) > > #define NFS_FLAGS(inode) (NFS_I(inode)->flags) > #define NFS_STALE(inode) (test_bit(NFS_INO_STALE, &NFS_FLAGS(inode))) > +#define NFS_FSCACHE(inode) (test_bit(NFS_INO_FSCACHE, &NFS_FLAGS(inode))) > > #define NFS_FILEID(inode) (NFS_I(inode)->fileid) > > @@ -553,6 +558,7 @@ extern void * nfs_root_data(void); > #define NFSDBG_CALLBACK 0x0100 > #define NFSDBG_CLIENT 0x0200 > #define NFSDBG_MOUNT 0x0400 > +#define NFSDBG_FSCACHE 0x0800 > #define NFSDBG_ALL 0xFFFF > > #ifdef __KERNEL__ > diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h > index 0cac49b..abe1043 100644 > --- a/include/linux/nfs_fs_sb.h > +++ b/include/linux/nfs_fs_sb.h > @@ -3,6 +3,7 @@ > > #include <linux/list.h> > #include <linux/backing-dev.h> > +#include <linux/fscache.h> > > struct nfs_iostats; > > @@ -65,6 +66,10 @@ struct nfs_client { > char cl_ipaddr[16]; > unsigned char cl_id_uniquifier; > #endif > + > +#ifdef CONFIG_NFS_FSCACHE > + struct fscache_cookie *fscache; /* client index cache cookie */ > +#endif > }; > > /* > diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h > index a3ade89..36971cd 100644 > --- a/include/linux/nfs_mount.h > +++ b/include/linux/nfs_mount.h > @@ -63,6 +63,7 @@ struct nfs_mount_data { > #define NFS_MOUNT_SECFLAVOUR 0x2000 /* 5 */ > #define NFS_MOUNT_NORDIRPLUS 0x4000 /* 5 */ > #define NFS_MOUNT_UNSHARED 0x8000 /* 5 */ > -#define NFS_MOUNT_FLAGMASK 0xFFFF > +#define NFS_MOUNT_FSCACHE 0x10000 /* 5 */ > +#define NFS_MOUNT_FLAGMASK 0x1FFFF > > #endif > I'm not too happy about the change to the binary mount interface. As far as I'm concerned, the binary interface should really be considered frozen, and should not be touched. Instead, feel free to update the text-based mount interface (which can be found in 2.6.23-rc1 and later). Please put any such mount interface changes in a separate patch together with the Kconfig changes. Cheers Trond - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html