The patch entitled 'epoll: optimize EPOLL_CTL_DEL using rcu', increases the size of 'struct epitem' over 128 bytes as it adds a 'struct rcu_head' field to 'struct epitem', in order to make use of RCU. This patch restores the size of 'struct epitem' back to <= 128 bytes. This size restriction and enforcement was orignally brought in by commit 39732ca5af4b09f4db561149041ddad7211019a5. The idea of this patch is to use the 'struct rb_node rbn', which is embedded in the 'stuct epitem' as the 'rcu_head' callback point. This is ok, since the 'rbn' is no longer in use when we schedule the item to be freed with RCU and its access is guarded by ep->mtx. Further, the RCU reader does not access the rbn field. I've also added a build-time check to ensure that 'struct rb_node' is >= 'struct rcu_head'. Note, I've kept this separate from 'epoll: optimize EPOLL_CTL_DEL using rcu' in order to make clear the hack-ish nature of this thing. Tested-by: Nathan Zimmer <nzimmer@xxxxxxx> Signed-off-by: Jason Baron <jbaron@xxxxxxxxxx> --- fs/eventpoll.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index daaec16..2f06737 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -168,11 +168,7 @@ struct epitem { /* The structure that describe the interested events and the source fd */ struct epoll_event event; - /* - * Free the epitem using rcu to enable a CTL_DEL to happen in parallel - * with reverse path checks. - */ - struct rcu_head rcu; + /* The fllink is in use. Since rcu can't do 'list_del_init()' */ int on_list; }; @@ -682,9 +678,10 @@ static int ep_scan_ready_list(struct eventpoll *ep, return error; } -static void epi_rcu_free(struct rcu_head *head) +static void epi_rcu_free(struct rcu_head *rcu) { - struct epitem *epi = container_of(head, struct epitem, rcu); + struct epitem *epi = (struct epitem *)((char *)rcu - + offsetof(struct epitem, rbn)); kmem_cache_free(epi_cache, epi); } @@ -723,9 +720,15 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) spin_unlock_irqrestore(&ep->lock, flags); wakeup_source_unregister(ep_wakeup_source(epi)); - - /* At this point it is safe to free the eventpoll item */ - call_rcu(&epi->rcu, epi_rcu_free); + /* + * At this point it is safe to free the eventpoll item. + * Use epi->rbn, instead of struct rcu_head, since we + * are trying to minimize the size of 'struct epitem'. + * The 'rbn' field is no longer in use. Protected + * by ep->mtx. the rcu read side, reverse_path_check_proc(), + * does not make use of the rbn field. + */ + call_rcu((struct rcu_head *)&epi->rbn, epi_rcu_free); atomic_long_dec(&ep->user->epoll_watches); @@ -2126,6 +2129,15 @@ static int __init eventpoll_init(void) /* Initialize the structure used to perform file's f_op->poll() calls */ ep_nested_calls_init(&poll_readywalk_ncalls); + /* + * We can have many thousands of epitems, so prevent this from + * using an extra cache line on 64-bit (and smaller) CPUs + */ + BUILD_BUG_ON(sizeof(void *) <= 8 && sizeof(struct epitem) > 128); + + /* make sure the overloading continues to work */ + BUILD_BUG_ON(sizeof(struct rb_node) < sizeof(struct rcu_head)); + /* Allocates slab cache used to allocate "struct epitem" items */ epi_cache = kmem_cache_create("eventpoll_epi", sizeof(struct epitem), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); -- 1.8.2 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html