Introduce attributes and functions in res_counter to implement throttling-based cgroup subsystems. The following attributes have been added to struct res_counter: * @policy: the limiting policy / algorithm * @capacity: the maximum capacity of the resource (the unit of measurement depends on the particular resource) * @timestamp: timestamp of the last accounted resource request Currently the available policies are: token-bucket and leaky-bucket and the attribute @capacity is only used by token-bucket policy (to represent the bucket size). The following function has been implemented to return the amount of time a cgroup should be throttled to remain within the defined resource limits. unsigned long long res_counter_ratelimit_sleep(struct res_counter *res, ssize_t val); [ Note: only the interfaces needed by the cgroup IO controller are implemented right now ] Signed-off-by: Andrea Righi <righi.andrea@xxxxxxxxx> --- include/linux/res_counter.h | 81 +++++++++++++++++++++++++++++++++--------- kernel/res_counter.c | 62 +++++++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+), 18 deletions(-) diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index 4c5bcf6..c18cee2 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -14,38 +14,50 @@ */ #include <linux/cgroup.h> +#include <linux/jiffies.h> /* - * The core object. the cgroup that wishes to account for some - * resource may include this counter into its structures and use - * the helpers described beyond + * res_counter flags + * + * bit 0 -- ratelimiting policy: leaky bucket / token bucket */ +#define RES_COUNTER_POLICY 0 + +#define res_counter_flagged(rc, flag) ((rc)->flags & (1 << (flag))) +/* The various policies that can be used for ratelimiting resources */ +#define RATELIMIT_LEAKY_BUCKET 0 +#define RATELIMIT_TOKEN_BUCKET 1 + +/** + * struct res_counter - the core object to account cgroup resources + * + * @flags: resource counter attributes + * @usage: the current resource consumption level + * @max_usage: the maximal value of the usage from the counter creation, + * or the maximum capacity of the resource (for ratelimited + * resources) + * @limit: the limit that usage cannot be exceeded + * @failcnt: the number of unsuccessful attempts to consume the resource + * @timestamp: timestamp of the last accounted resource request + * @lock: the lock to protect all of the above + * @parent: Parent counter, used for hierarchial resource accounting + * + * The cgroup that wishes to account for some resource may include this counter + * into its structures and use the helpers described beyond. + */ struct res_counter { - /* - * the current resource consumption level - */ + unsigned long flags; unsigned long long usage; - /* - * the maximal value of the usage from the counter creation - */ unsigned long long max_usage; - /* - * the limit that usage cannot exceed - */ unsigned long long limit; - /* - * the number of unsuccessful attempts to consume the resource - */ unsigned long long failcnt; + unsigned long long timestamp; /* * the lock to protect all of the above. * the routines below consider this to be IRQ-safe */ spinlock_t lock; - /* - * Parent counter, used for hierarchial resource accounting - */ struct res_counter *parent; }; @@ -84,6 +96,7 @@ enum { RES_USAGE, RES_MAX_USAGE, RES_LIMIT, + RES_TIMESTAMP, RES_FAILCNT, }; @@ -130,6 +143,15 @@ static inline bool res_counter_limit_check_locked(struct res_counter *cnt) return false; } +static inline unsigned long long +res_counter_ratelimit_delta_t(struct res_counter *res) +{ + return (long long)get_jiffies_64() - (long long)res->timestamp; +} + +unsigned long long +res_counter_ratelimit_sleep(struct res_counter *res, ssize_t val); + /* * Helper function to detect if the cgroup is within it's limit or * not. It's currently called from cgroup_rss_prepare() @@ -163,6 +185,29 @@ static inline void res_counter_reset_failcnt(struct res_counter *cnt) spin_unlock_irqrestore(&cnt->lock, flags); } +static inline int +res_counter_ratelimit_set_limit(struct res_counter *cnt, + unsigned long policy, + unsigned long long limit, unsigned long long max) +{ + unsigned long flags; + + spin_lock_irqsave(&cnt->lock, flags); + cnt->limit = limit; + /* + * In ratelimited res_counter max_usage is used to save the token + * bucket capacity. + */ + cnt->max_usage = max; + cnt->flags = 0; + if (policy == RATELIMIT_TOKEN_BUCKET) + set_bit(RES_COUNTER_POLICY, &cnt->flags); + cnt->timestamp = get_jiffies_64(); + cnt->usage = 0; + spin_unlock_irqrestore(&cnt->lock, flags); + return 0; +} + static inline int res_counter_set_limit(struct res_counter *cnt, unsigned long long limit) { diff --git a/kernel/res_counter.c b/kernel/res_counter.c index bf8e753..f6d97a2 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c @@ -9,6 +9,7 @@ #include <linux/types.h> #include <linux/parser.h> +#include <linux/jiffies.h> #include <linux/fs.h> #include <linux/slab.h> #include <linux/res_counter.h> @@ -20,6 +21,7 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent) spin_lock_init(&counter->lock); counter->limit = (unsigned long long)LLONG_MAX; counter->parent = parent; + counter->timestamp = get_jiffies_64(); } int res_counter_charge_locked(struct res_counter *counter, unsigned long val) @@ -99,6 +101,8 @@ res_counter_member(struct res_counter *counter, int member) return &counter->max_usage; case RES_LIMIT: return &counter->limit; + case RES_TIMESTAMP: + return &counter->timestamp; case RES_FAILCNT: return &counter->failcnt; }; @@ -163,3 +167,61 @@ int res_counter_write(struct res_counter *counter, int member, spin_unlock_irqrestore(&counter->lock, flags); return 0; } + +/* Note: called with res->lock held */ +static unsigned long long +ratelimit_leaky_bucket(struct res_counter *res, ssize_t val) +{ + unsigned long long delta, t; + + res->usage += val; + delta = res_counter_ratelimit_delta_t(res); + if (!delta) + return 0; + t = res->usage * USEC_PER_SEC; + t = usecs_to_jiffies(div_u64(t, res->limit)); + if (t > delta) + return t - delta; + /* Reset i/o statistics */ + res->usage = 0; + res->timestamp = get_jiffies_64(); + return 0; +} + +/* Note: called with res->lock held */ +static unsigned long long +ratelimit_token_bucket(struct res_counter *res, ssize_t val) +{ + unsigned long long delta; + long long tok; + + res->usage -= val; + delta = jiffies_to_msecs(res_counter_ratelimit_delta_t(res)); + res->timestamp = get_jiffies_64(); + tok = (long long)res->usage * MSEC_PER_SEC; + if (delta) { + long long max = (long long)res->max_usage * MSEC_PER_SEC; + + tok += delta * res->limit; + tok = max_t(long long, tok, max); + res->usage = (unsigned long long)div_s64(tok, MSEC_PER_SEC); + } + return (tok < 0) ? msecs_to_jiffies(div_u64(-tok, res->limit)) : 0; +} + +unsigned long long +res_counter_ratelimit_sleep(struct res_counter *res, ssize_t val) +{ + unsigned long long sleep = 0; + unsigned long flags; + + spin_lock_irqsave(&res->lock, flags); + if (res->limit) { + if (res_counter_flagged(res, RES_COUNTER_POLICY)) + sleep = ratelimit_token_bucket(res, val); + else + sleep = ratelimit_leaky_bucket(res, val); + } + spin_unlock_irqrestore(&res->lock, flags); + return sleep; +} -- 1.6.0.4 _______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linux-foundation.org/mailman/listinfo/containers