File linux-2.6-nfs-acl-cache-to-nfs-client.patch of Package kernel
Date: Fri, 29 Sep 2006 12:41:36 -0400
From: Steve Dickson <SteveD@redhat.com>
Subject: [RHEL5][PATCH 1/3 ] NFS is revalidating directory entries too often
NFS: Add a new ACL cache to the linux nfs client
From: Trond Myklebust <Trond.Myklebust@netapp.com>
The current ACL cache only allows one ACL at a time to be cached for each
inode. Add a per-inode red-black tree in order to allow more than one to
be cached at a time.
Should significantly cut down the access time for shared directories such
as /bin etc.
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
--- linux-2.6.18.i686/fs/nfs/dir.c.001 2006-09-29 10:12:21.000000000 -0400
+++ linux-2.6.18.i686/fs/nfs/dir.c 2006-09-29 12:13:59.000000000 -0400
@@ -1634,35 +1634,134 @@ out:
return error;
}
-int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
+static void nfs_access_free_entry(struct nfs_access_entry *entry)
+{
+ put_rpccred(entry->cred);
+ kfree(entry);
+}
+
+static void __nfs_access_zap_cache(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
- struct nfs_access_entry *cache = &nfsi->cache_access;
+ struct rb_root *root_node = &nfsi->access_cache;
+ struct rb_node *n, *dispose = NULL;
+ struct nfs_access_entry *entry;
+
+ /* Unhook entries from the cache */
+ while ((n = rb_first(root_node)) != NULL) {
+ entry = rb_entry(n, struct nfs_access_entry, rb_node);
+ rb_erase(n, root_node);
+ n->rb_left = dispose;
+ dispose = n;
+ }
+ nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
+ spin_unlock(&inode->i_lock);
- if (cache->cred != cred
- || time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))
- || (nfsi->cache_validity & NFS_INO_INVALID_ACCESS))
- return -ENOENT;
- memcpy(res, cache, sizeof(*res));
- return 0;
+ /* Now kill them all! */
+ while (dispose != NULL) {
+ n = dispose;
+ dispose = n->rb_left;
+ nfs_access_free_entry(rb_entry(n, struct nfs_access_entry, rb_node));
+ }
}
-void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
+void nfs_access_zap_cache(struct inode *inode)
{
- struct nfs_inode *nfsi = NFS_I(inode);
- struct nfs_access_entry *cache = &nfsi->cache_access;
+ spin_lock(&inode->i_lock);
+ /* This will release the spinlock */
+ __nfs_access_zap_cache(inode);
+}
+
+static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, struct rpc_cred *cred)
+{
+ struct rb_node *n = NFS_I(inode)->access_cache.rb_node;
+ struct nfs_access_entry *entry;
+
+ while (n != NULL) {
+ entry = rb_entry(n, struct nfs_access_entry, rb_node);
- if (cache->cred != set->cred) {
- if (cache->cred)
- put_rpccred(cache->cred);
- cache->cred = get_rpccred(set->cred);
+ if (cred < entry->cred)
+ n = n->rb_left;
+ else if (cred > entry->cred)
+ n = n->rb_right;
+ else
+ return entry;
}
- /* FIXME: replace current access_cache BKL reliance with inode->i_lock */
+ return NULL;
+}
+
+int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_access_entry *cache;
+ int err = -ENOENT;
+
spin_lock(&inode->i_lock);
- nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
+ if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
+ goto out_zap;
+ cache = nfs_access_search_rbtree(inode, cred);
+ if (cache == NULL)
+ goto out;
+ if (time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)))
+ goto out_stale;
+ res->jiffies = cache->jiffies;
+ res->cred = cache->cred;
+ res->mask = cache->mask;
+ err = 0;
+out:
spin_unlock(&inode->i_lock);
+ return err;
+out_stale:
+ rb_erase(&cache->rb_node, &nfsi->access_cache);
+ spin_unlock(&inode->i_lock);
+ nfs_access_free_entry(cache);
+ return -ENOENT;
+out_zap:
+ /* This will release the spinlock */
+ __nfs_access_zap_cache(inode);
+ return -ENOENT;
+}
+
+static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
+{
+ struct rb_root *root_node = &NFS_I(inode)->access_cache;
+ struct rb_node **p = &root_node->rb_node;
+ struct rb_node *parent = NULL;
+ struct nfs_access_entry *entry;
+
+ spin_lock(&inode->i_lock);
+ while (*p != NULL) {
+ parent = *p;
+ entry = rb_entry(parent, struct nfs_access_entry, rb_node);
+
+ if (set->cred < entry->cred)
+ p = &parent->rb_left;
+ else if (set->cred > entry->cred)
+ p = &parent->rb_right;
+ else
+ goto found;
+ }
+ rb_link_node(&set->rb_node, parent, p);
+ rb_insert_color(&set->rb_node, root_node);
+ spin_unlock(&inode->i_lock);
+ return;
+found:
+ rb_replace_node(parent, &set->rb_node, root_node);
+ spin_unlock(&inode->i_lock);
+ nfs_access_free_entry(entry);
+}
+
+void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
+{
+ struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL);
+ if (cache == NULL)
+ return;
+ RB_CLEAR_NODE(&cache->rb_node);
cache->jiffies = set->jiffies;
+ cache->cred = get_rpccred(set->cred);
cache->mask = set->mask;
+
+ nfs_access_add_rbtree(inode, cache);
}
static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
--- linux-2.6.18.i686/fs/nfs/inode.c.001 2006-09-29 10:12:21.000000000 -0400
+++ linux-2.6.18.i686/fs/nfs/inode.c 2006-09-29 12:16:50.000000000 -0400
@@ -70,21 +70,16 @@ int nfs_write_inode(struct inode *inode,
void nfs_clear_inode(struct inode *inode)
{
- struct nfs_inode *nfsi = NFS_I(inode);
- struct rpc_cred *cred;
-
/*
* The following should never happen...
*/
BUG_ON(nfs_have_writebacks(inode));
- BUG_ON (!list_empty(&nfsi->open_files));
+ BUG_ON (!list_empty(&NFS_I(inode)->open_files));
+ BUG_ON(atomic_read(&NFS_I(inode)->data_updates) != 0);
nfs_zap_acl_cache(inode);
- cred = nfsi->cache_access.cred;
- if (cred)
- put_rpccred(cred);
+ nfs_access_zap_cache(inode);
- nfs_fscache_release_fh_cookie(NFS_SERVER(inode), nfsi);
- BUG_ON(atomic_read(&nfsi->data_updates) != 0);
+ nfs_fscache_release_fh_cookie(NFS_SERVER(inode), NFS_I(inode));
}
/**
@@ -291,7 +286,7 @@ nfs_fhget(struct super_block *sb, struct
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
nfsi->attrtimeo_timestamp = jiffies;
memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
- nfsi->cache_access.cred = NULL;
+ nfsi->access_cache = RB_ROOT;
nfs_fscache_get_fh_cookie(sb, nfsi, maycache);
--- linux-2.6.18.i686/include/linux/nfs_fs.h.001 2006-09-29 10:12:21.000000000 -0400
+++ linux-2.6.18.i686/include/linux/nfs_fs.h 2006-09-29 12:14:00.000000000 -0400
@@ -42,6 +42,7 @@
#include <linux/in.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
+#include <linux/rbtree.h>
#include <linux/rwsem.h>
#include <linux/wait.h>
@@ -70,6 +71,7 @@
* NFSv3/v4 Access mode cache entry
*/
struct nfs_access_entry {
+ struct rb_node rb_node;
unsigned long jiffies;
struct rpc_cred * cred;
int mask;
@@ -146,7 +148,7 @@ struct nfs_inode {
*/
atomic_t data_updates;
- struct nfs_access_entry cache_access;
+ struct rb_root access_cache;
#ifdef CONFIG_NFS_V3_ACL
struct posix_acl *acl_access;
struct posix_acl *acl_default;
@@ -301,6 +303,7 @@ extern int nfs_getattr(struct vfsmount *
extern int nfs_permission(struct inode *, int, struct nameidata *);
extern int nfs_access_get_cached(struct inode *, struct rpc_cred *, struct nfs_access_entry *);
extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *);
+extern void nfs_access_zap_cache(struct inode *inode);
extern int nfs_open(struct inode *, struct file *);
extern int nfs_release(struct inode *, struct file *);
extern int nfs_attribute_timeout(struct inode *inode);
Date: Fri, 29 Sep 2006 12:42:27 -0400
From: Steve Dickson <SteveD@redhat.com>
Subject: [RHEL5][PATCH 2/3 ] NFS is revalidating directory entries too often
NFS: Add a global LRU list for the ACL cache
From: Trond Myklebust <Trond.Myklebust@netapp.com>
...in order to allow the addition of a memory shrinker.
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
--- linux-2.6.18.i686/fs/nfs/dir.c.002 2006-09-29 12:13:59.000000000 -0400
+++ linux-2.6.18.i686/fs/nfs/dir.c 2006-09-29 12:20:58.000000000 -0400
@@ -1634,10 +1634,17 @@ out:
return error;
}
+static DEFINE_SPINLOCK(nfs_access_lru_lock);
+static LIST_HEAD(nfs_access_lru_list);
+static atomic_long_t nfs_access_nr_entries;
+
static void nfs_access_free_entry(struct nfs_access_entry *entry)
{
put_rpccred(entry->cred);
kfree(entry);
+ smp_mb__before_atomic_dec();
+ atomic_long_dec(&nfs_access_nr_entries);
+ smp_mb__after_atomic_dec();
}
static void __nfs_access_zap_cache(struct inode *inode)
@@ -1651,6 +1658,7 @@ static void __nfs_access_zap_cache(struc
while ((n = rb_first(root_node)) != NULL) {
entry = rb_entry(n, struct nfs_access_entry, rb_node);
rb_erase(n, root_node);
+ list_del(&entry->lru);
n->rb_left = dispose;
dispose = n;
}
@@ -1667,6 +1675,13 @@ static void __nfs_access_zap_cache(struc
void nfs_access_zap_cache(struct inode *inode)
{
+ /* Remove from global LRU init */
+ if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_FLAGS(inode))) {
+ spin_lock(&nfs_access_lru_lock);
+ list_del_init(&NFS_I(inode)->access_cache_inode_lru);
+ spin_unlock(&nfs_access_lru_lock);
+ }
+
spin_lock(&inode->i_lock);
/* This will release the spinlock */
__nfs_access_zap_cache(inode);
@@ -1707,12 +1722,14 @@ int nfs_access_get_cached(struct inode *
res->jiffies = cache->jiffies;
res->cred = cache->cred;
res->mask = cache->mask;
+ list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru);
err = 0;
out:
spin_unlock(&inode->i_lock);
return err;
out_stale:
rb_erase(&cache->rb_node, &nfsi->access_cache);
+ list_del(&cache->lru);
spin_unlock(&inode->i_lock);
nfs_access_free_entry(cache);
return -ENOENT;
@@ -1724,7 +1741,8 @@ out_zap:
static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
{
- struct rb_root *root_node = &NFS_I(inode)->access_cache;
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct rb_root *root_node = &nfsi->access_cache;
struct rb_node **p = &root_node->rb_node;
struct rb_node *parent = NULL;
struct nfs_access_entry *entry;
@@ -1743,10 +1761,13 @@ static void nfs_access_add_rbtree(struct
}
rb_link_node(&set->rb_node, parent, p);
rb_insert_color(&set->rb_node, root_node);
+ list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
spin_unlock(&inode->i_lock);
return;
found:
rb_replace_node(parent, &set->rb_node, root_node);
+ list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
+ list_del(&entry->lru);
spin_unlock(&inode->i_lock);
nfs_access_free_entry(entry);
}
@@ -1762,6 +1783,18 @@ void nfs_access_add_cache(struct inode *
cache->mask = set->mask;
nfs_access_add_rbtree(inode, cache);
+
+ /* Update accounting */
+ smp_mb__before_atomic_inc();
+ atomic_long_inc(&nfs_access_nr_entries);
+ smp_mb__after_atomic_inc();
+
+ /* Add inode to global LRU list */
+ if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_FLAGS(inode))) {
+ spin_lock(&nfs_access_lru_lock);
+ list_add_tail(&NFS_I(inode)->access_cache_inode_lru, &nfs_access_lru_list);
+ spin_unlock(&nfs_access_lru_lock);
+ }
}
static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
--- linux-2.6.18.i686/fs/nfs/inode.c.002 2006-09-29 12:16:50.000000000 -0400
+++ linux-2.6.18.i686/fs/nfs/inode.c 2006-09-29 12:20:58.000000000 -0400
@@ -1114,6 +1114,8 @@ static void init_once(void * foo, kmem_c
INIT_LIST_HEAD(&nfsi->dirty);
INIT_LIST_HEAD(&nfsi->commit);
INIT_LIST_HEAD(&nfsi->open_files);
+ INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
+ INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
atomic_set(&nfsi->data_updates, 0);
nfsi->ndirty = 0;
--- linux-2.6.18.i686/include/linux/nfs_fs.h.002 2006-09-29 12:14:00.000000000 -0400
+++ linux-2.6.18.i686/include/linux/nfs_fs.h 2006-09-29 12:20:59.000000000 -0400
@@ -72,6 +72,7 @@
*/
struct nfs_access_entry {
struct rb_node rb_node;
+ struct list_head lru;
unsigned long jiffies;
struct rpc_cred * cred;
int mask;
@@ -149,6 +150,8 @@ struct nfs_inode {
atomic_t data_updates;
struct rb_root access_cache;
+ struct list_head access_cache_entry_lru;
+ struct list_head access_cache_inode_lru;
#ifdef CONFIG_NFS_V3_ACL
struct posix_acl *acl_access;
struct posix_acl *acl_default;
@@ -205,6 +208,7 @@ struct nfs_inode {
#define NFS_INO_REVALIDATING (0) /* revalidating attrs */
#define NFS_INO_ADVISE_RDPLUS (1) /* advise readdirplus */
#define NFS_INO_STALE (2) /* possible stale inode */
+#define NFS_INO_ACL_LRU_SET (3) /* Inode is on the LRU list */
static inline struct nfs_inode *NFS_I(struct inode *inode)
{
Date: Fri, 29 Sep 2006 12:42:56 -0400
From: Steve Dickson <SteveD@redhat.com>
Subject: [RHEL5][PATCH 3/3 ] NFS is revalidating directory entries too often
NFS: Add acl cache shrinker for the VM
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
--- linux-2.6.18.i686/fs/nfs/dir.c.003 2006-09-29 12:20:58.000000000 -0400
+++ linux-2.6.18.i686/fs/nfs/dir.c 2006-09-29 12:22:24.000000000 -0400
@@ -1647,6 +1647,50 @@ static void nfs_access_free_entry(struct
smp_mb__after_atomic_dec();
}
+int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
+{
+ LIST_HEAD(head);
+ struct nfs_inode *nfsi;
+ struct nfs_access_entry *cache;
+
+ spin_lock(&nfs_access_lru_lock);
+restart:
+ list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) {
+ struct inode *inode;
+
+ if (nr_to_scan-- == 0)
+ break;
+ inode = igrab(&nfsi->vfs_inode);
+ if (inode == NULL)
+ continue;
+ spin_lock(&inode->i_lock);
+ if (list_empty(&nfsi->access_cache_entry_lru))
+ goto remove_lru_entry;
+ cache = list_entry(nfsi->access_cache_entry_lru.next,
+ struct nfs_access_entry, lru);
+ list_move(&cache->lru, &head);
+ rb_erase(&cache->rb_node, &nfsi->access_cache);
+ if (!list_empty(&nfsi->access_cache_entry_lru))
+ list_move_tail(&nfsi->access_cache_inode_lru,
+ &nfs_access_lru_list);
+ else {
+remove_lru_entry:
+ list_del_init(&nfsi->access_cache_inode_lru);
+ clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
+ }
+ spin_unlock(&inode->i_lock);
+ iput(inode);
+ goto restart;
+ }
+ spin_unlock(&nfs_access_lru_lock);
+ while (!list_empty(&head)) {
+ cache = list_entry(head.next, struct nfs_access_entry, lru);
+ list_del(&cache->lru);
+ nfs_access_free_entry(cache);
+ }
+ return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure;
+}
+
static void __nfs_access_zap_cache(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
--- linux-2.6.18.i686/fs/nfs/internal.h.003 2006-09-29 10:12:21.000000000 -0400
+++ linux-2.6.18.i686/fs/nfs/internal.h 2006-09-29 12:22:24.000000000 -0400
@@ -142,6 +142,9 @@ extern int nfs4_proc_fs_locations(struct
struct page *page);
#endif
+/* dir.c */
+extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask);
+
/* inode.c */
extern struct inode *nfs_alloc_inode(struct super_block *sb);
extern void nfs_destroy_inode(struct inode *);
--- linux-2.6.18.i686/fs/nfs/super.c.003 2006-09-29 10:12:21.000000000 -0400
+++ linux-2.6.18.i686/fs/nfs/super.c 2006-09-29 12:22:24.000000000 -0400
@@ -137,6 +137,8 @@ static struct super_operations nfs4_sops
};
#endif
+static struct shrinker *acl_shrinker;
+
/*
* Register the NFS filesystems
*/
@@ -156,6 +158,7 @@ int __init register_nfs_fs(void)
if (ret < 0)
goto error_2;
#endif
+ acl_shrinker = set_shrinker(DEFAULT_SEEKS, nfs_access_cache_shrinker);
return 0;
#ifdef CONFIG_NFS_V4
@@ -173,6 +176,8 @@ error_0:
*/
void __exit unregister_nfs_fs(void)
{
+ if (acl_shrinker != NULL)
+ remove_shrinker(acl_shrinker);
#ifdef CONFIG_NFS_V4
unregister_filesystem(&nfs4_fs_type);
nfs_unregister_sysctl();