On Mon, 17 Jan 2011 11:04:17 +0900 FUJITA Tomonori <fujita.tomonori@xxxxxxxxxxxxx> wrote: > On Sun, 16 Jan 2011 18:24:59 +0200 > Alexander Nezhinsky <alexandern@xxxxxxxxxxxx> wrote: > > > On 01/15/2011 01:14 PM, FUJITA Tomonori wrote: > > >> Re-implementing the time-based work scheduler. This patch implements > > >> a timer-based scheme. > > > > > You need this patch for more precise timer, right? In another way, the > > > problem that you try to fix is that the current timer is too > > > unexpected. > > > > Yes, the jiffies mechanism relied on the timeout values passed to > > epoll_wait(), but the events could pop up before the timed has expired. > > So under load, the intervals between jiffies counts are practically zero. > > On the other hand, event handlers can take indefinite times, but this > > would be undetected by the jiffies count. > > > > In the new scheme, the timer never "underflows" - at least the requested > > interval is alwys guaranteed. Other event handlers can still delay > > the actual work scheduling, but after they return all overdue work units > > are submitted for execution immediately. > > > > The underlying timer fires every 250ms, setting the expected accuracy of > > scheduling. I guess that all event handlers practically complete well > > within this interval, so we can fairly count on delays bounded by, > > let's say, ~300ms. > > Even if the timer fires every 250ms, there is no guarantee that the > file descriptor is handled immediately, right? > > Adding the better timer is fine by me. Can we use timerfd instead of > using another thread? timerfd is exactly what we want here. > > We still need to care about too old kernels that don't support > timerfd? Here's an example to use timerfd. We need to keep the old jiffies code for old kernels and non Linux but timerfd-capable kernels can enjoy timerfd. diff --git a/usr/tgtd.c b/usr/tgtd.c index 2fd4959..13ff65c 100644 --- a/usr/tgtd.c +++ b/usr/tgtd.c @@ -337,7 +337,7 @@ static void event_loop(void) retry: sched_remains = tgt_exec_scheduled(); - timeout = sched_remains ? 0 : TGTD_TICK_PERIOD * 1000; + timeout = sched_remains ? 0 : -1; nevent = epoll_wait(ep_fd, events, ARRAY_SIZE(events), timeout); if (nevent < 0) { @@ -350,8 +350,7 @@ retry: tev = (struct event_data *) events[i].data.ptr; tev->handler(tev->fd, events[i].events, tev->data); } - } else - schedule(); + } if (system_active) goto retry; @@ -517,12 +516,18 @@ int main(int argc, char **argv) } } + err = work_timer_start(); + if (err) + exit(1); + bs_init(); event_loop(); lld_exit(); + work_timer_stop(); + ipc_exit(); log_close(); diff --git a/usr/work.c b/usr/work.c index 3080a59..2373e78 100644 --- a/usr/work.c +++ b/usr/work.c @@ -1,8 +1,9 @@ /* - * bogus scheduler + * work scheduler, loosely timer-based * * Copyright (C) 2006-2007 FUJITA Tomonori <tomof@xxxxxxx> * Copyright (C) 2006-2007 Mike Christie <michaelc@xxxxxxxxxxx> + * Copyright (C) 2011 Alexander Nezhinsky <alexandern@xxxxxxxxxxxx> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -21,27 +22,107 @@ */ #include <stdlib.h> #include <stdint.h> +#include <sys/epoll.h> +#include <sys/timerfd.h> #include "list.h" #include "util.h" #include "log.h" #include "work.h" +#include "tgtd.h" + +#define WORK_TIMER_INT_MSEC 250 +#define WORK_TIMER_INT_NSEC (WORK_TIMER_INT_MSEC * 1000 * 1000) + +static int timer_fd = -1; +static unsigned int current_time; -static unsigned int jiffies; static LIST_HEAD(active_work_list); static LIST_HEAD(inactive_work_list); +static void execute_work(void); + +static void work_timer_evt_handler(int fd, int events, void *data) +{ + unsigned long long s; + static int first; + int ret; + + if (!first) { + first++; + return; + } + + ret = read(timer_fd, &s, sizeof(s)); + if (ret < 0) { + if (ret == -EAGAIN) + return; + eprintf("Failed to read from pipe, %m\n"); + return; + } + + current_time += s * WORK_TIMER_INT_MSEC; + + execute_work(); +} + +int work_timer_start(void) +{ + struct itimerspec new, old; + int ret; + + if (timer_fd != -1) + return 0; + + timer_fd = timerfd_create(CLOCK_REALTIME, TFD_NONBLOCK); + if (timer_fd < 0) { + eprintf("the system doesn't support timerfd"); + goto timer_err; + } + + new.it_value.tv_sec = 0; + new.it_value.tv_nsec = 1; + + new.it_interval.tv_sec = 0; + new.it_interval.tv_nsec = WORK_TIMER_INT_NSEC; + + ret = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &new, &old); + if (ret < 0) { + eprintf("the system doesn't support timerfd"); + close(timer_fd); + goto timer_err; + } + + ret = tgt_event_add(timer_fd, EPOLLIN, + work_timer_evt_handler, NULL); + if (ret) { + eprintf("failed to add timer event, fd:%d\n", timer_fd); + goto timer_err; + } + + return 0; +timer_err: + work_timer_stop(); + return -1; +} + +int work_timer_stop(void) +{ + if (timer_fd == -1) + return 0; + + tgt_event_del(timer_fd); + close(timer_fd); + + return 0; +} + void add_work(struct tgt_work *work, unsigned int second) { - unsigned int when; struct tgt_work *ent; if (second) { - when = second / TGTD_TICK_PERIOD; - if (!when) - when = 1; - - work->when = when + jiffies; + work->when = current_time + second * 1000; list_for_each_entry(ent, &inactive_work_list, entry) { if (before(work->when, ent->when)) @@ -49,8 +130,10 @@ void add_work(struct tgt_work *work, unsigned int second) } list_add_tail(&work->entry, &ent->entry); - } else + } else { list_add_tail(&work->entry, &active_work_list); + execute_work(); + } } void del_work(struct tgt_work *work) @@ -58,20 +141,16 @@ void del_work(struct tgt_work *work) list_del_init(&work->entry); } -/* - * this function is called only when the system is idle. So this - * scheduler is pretty bogus. Your job would be delayed unexpectedly. - */ -void schedule(void) +static void execute_work() { struct tgt_work *work, *n; list_for_each_entry_safe(work, n, &inactive_work_list, entry) { - if (after(jiffies, work->when)) { - list_del(&work->entry); - list_add_tail(&work->entry, &active_work_list); - } else + if (before(current_time, work->when)) break; + + list_del(&work->entry); + list_add_tail(&work->entry, &active_work_list); } while (!list_empty(&active_work_list)) { @@ -80,6 +159,4 @@ void schedule(void) list_del_init(&work->entry); work->func(work->data); } - - jiffies++; } diff --git a/usr/work.h b/usr/work.h index 3d5e75e..7b1876a 100644 --- a/usr/work.h +++ b/usr/work.h @@ -1,8 +1,6 @@ #ifndef __SCHED_H #define __SCHED_H -#define TGTD_TICK_PERIOD 2 - struct tgt_work { struct list_head entry; void (*func)(void *); @@ -10,7 +8,9 @@ struct tgt_work { unsigned int when; }; -extern void schedule(void); +extern int work_timer_start(void); +extern int work_timer_stop(void); + extern void add_work(struct tgt_work *work, unsigned int second); extern void del_work(struct tgt_work *work); -- To unsubscribe from this list: send the line "unsubscribe stgt" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html