On Mon, Oct 26, 2020 at 12:08 PM Willem de Bruijn <willemdebruijn.kernel@xxxxxxxxx> wrote: > > From: Willem de Bruijn <willemb@xxxxxxxxxx> > > The underlying hrtimer is programmed with nanosecond resolution. > > Use cases such as datacenter networking operate on timescales well > below milliseconds. Setting shorter timeouts bounds tail latency. > > Add epoll_create1 flag EPOLL_NSTIMEO. When passed, this changes the > interpretation of argument timeout in epoll_wait from msec to nsec. > > The new eventpoll state fits in existing 4B of padding when busy poll > is compiled in (the default), and reads the same cacheline. > > Signed-off-by: Willem de Bruijn <willemb@xxxxxxxxxx> Acked-by: Soheil Hassas Yeganeh <soheil@xxxxxxxxxx> Thanks for adding the feature! > --- > > Selftest for now at github. Can follow-up for kselftests. > https://github.com/wdebruij/kerneltools/blob/master/tests/epoll_nstimeo.c > --- > fs/eventpoll.c | 26 +++++++++++++++++++------- > include/uapi/linux/eventpoll.h | 1 + > 2 files changed, 20 insertions(+), 7 deletions(-) > > diff --git a/fs/eventpoll.c b/fs/eventpoll.c > index 4df61129566d..1216b909d155 100644 > --- a/fs/eventpoll.c > +++ b/fs/eventpoll.c > @@ -225,6 +225,9 @@ struct eventpoll { > unsigned int napi_id; > #endif > > + /* Accept timeout in ns resolution (EPOLL_NSTIMEO) */ > + unsigned int nstimeout:1; > + > #ifdef CONFIG_DEBUG_LOCK_ALLOC > /* tracks wakeup nests for lockdep validation */ > u8 nests; > @@ -1787,17 +1790,20 @@ static int ep_send_events(struct eventpoll *ep, > return esed.res; > } > > -static inline struct timespec64 ep_set_mstimeout(long ms) > +static inline struct timespec64 ep_set_nstimeout(long ns) > { > - struct timespec64 now, ts = { > - .tv_sec = ms / MSEC_PER_SEC, > - .tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC), > - }; > + struct timespec64 now, ts; > > + ts = ns_to_timespec64(ns); > ktime_get_ts64(&now); > return timespec64_add_safe(now, ts); > } > > +static inline struct timespec64 ep_set_mstimeout(long ms) > +{ > + return ep_set_nstimeout(NSEC_PER_MSEC * ms); > +} > + > /** > * ep_poll - Retrieves ready events, and delivers them to the caller supplied > * event buffer. > @@ -1826,7 +1832,10 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, > lockdep_assert_irqs_enabled(); > > if (timeout > 0) { > - struct timespec64 end_time = ep_set_mstimeout(timeout); > + struct timespec64 end_time; > + > + end_time = ep->nstimeout ? ep_set_nstimeout(timeout) : > + ep_set_mstimeout(timeout); > > slack = select_estimate_accuracy(&end_time); > to = &expires; > @@ -2046,7 +2055,7 @@ static int do_epoll_create(int flags) > /* Check the EPOLL_* constant for consistency. */ > BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC); > > - if (flags & ~EPOLL_CLOEXEC) > + if (flags & ~(EPOLL_CLOEXEC | EPOLL_NSTIMEO)) > return -EINVAL; > /* > * Create the internal data structure ("struct eventpoll"). > @@ -2054,6 +2063,9 @@ static int do_epoll_create(int flags) > error = ep_alloc(&ep); > if (error < 0) > return error; > + > + ep->nstimeout = !!(flags & EPOLL_NSTIMEO); > + > /* > * Creates all the items needed to setup an eventpoll file. That is, > * a file structure and a free file descriptor. > diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h > index 8a3432d0f0dc..f6ef9c9f8ac2 100644 > --- a/include/uapi/linux/eventpoll.h > +++ b/include/uapi/linux/eventpoll.h > @@ -21,6 +21,7 @@ > > /* Flags for epoll_create1. */ > #define EPOLL_CLOEXEC O_CLOEXEC > +#define EPOLL_NSTIMEO 0x1 > > /* Valid opcodes to issue to sys_epoll_ctl() */ > #define EPOLL_CTL_ADD 1 > -- > 2.29.0.rc1.297.gfa9743e501-goog >