Re: [tip:sched/core] sched: Lower chances of cputime scaling overflow

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Apr 12, 2013 at 09:55:56AM +0200, Peter Zijlstra wrote:
> > The above is totally untested, but each step is pretty damn simple and
> > fairly cheap. Sure, it's a loop, but it's bounded to 32 (cheap)
> > iterations, and the normal case is that it's not done at all, or done
> > only a few times.
> 
> Right it gets gradually heavier the bigger the numbers get; which is
> more and more unlikely.
> 
> > And the advantage is that the end result is always that simple
> > 32x32/32 case that we started out with as the common case.
> > 
> > I dunno. Maybe I'm overlooking something, and the above is horrible,
> > but the above seems reasonably efficient if not optimal, and
> > *understandable*.
> 
> I suppose that entirely matters on what one is used to ;-) I had to
> stare rather hard at it for a little while.
> 
> But yes, you take it one step further and are willing to ditch rtime
> bits too and I suppose that's fine.
> 
> Should work,.. Stanislaw could you stick this into your userspace
> thingy and verify the numbers are sane enough? 

It works fine - gives relative error less than 0.1% for very big
numbers.

For the record I'm attaching test program and script.

Thanks
Stanislaw

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <strings.h>
#include <stdint.h>

typedef uint64_t u64;
typedef uint32_t u32;

static u64 div_u64_u32(u64 a, u32 b)
{
	return a / b;
}

static u64 scale_stime(u64 stime, u64 rtime, u64 total)
{

    /* We know one of the values has a bit set in the high 32 bits */
    for (;;) {
        /* Make sure "rtime" is the bigger of stime/rtime */
        if (stime > rtime) {
            u64 tmp = rtime; rtime = stime; stime = tmp;
       	}

        /* Do we need to balance stime/rtime bits? */
        if (rtime >> 32) {
            if (stime >> 31)
                goto drop_precision;

            /* We can grow rtime and shrink stime and try to make them both fit */
            stime <<= 1;
            rtime >>= 1;
            continue;
       	}

        /* stime/rtime fits in 32 bits, how about total? */
        if (!(total >> 32))
            break;

drop_precision:
        /* We drop from rtime, it has more bits than stime */
        rtime >>= 1;
        total >>= 1;
    }

	if (!total)
		return stime;
		
/* Make sure gcc understands that this is a 32x32->64 multiply,
 * followed by a 64/32->64 divide */
    return div_u64_u32((u64) (u32) stime * (u64) (u32) rtime, (u32)total);
}
 
int main(int argc, char *argv[])
{
	u64 rtime, total, stime, scaled;

	if (argc != 4)
		return;

	rtime = strtoll(argv[1], NULL, 10);
	total = strtoll(argv[2], NULL, 10);
	stime = strtoll(argv[3], NULL, 10);

	assert (total >= stime);

	scaled = scale_stime(stime, rtime, total);
	printf("%llu\n", scaled);

	return 0;
}
#!/usr/bin/python

import subprocess
import random
import math

def kernel_scale (rtime, total, stime):
	p = subprocess.Popen("./scale_stime5 " + str(rtime) + " " + str(total) + " " + str(stime) , shell=True, stdout=subprocess.PIPE)
	return int(p.stdout.read())

def python_scale (rtime, total, stime):
	return (stime * rtime) / total

max_rtime = 10*4096*364*24*60*60*1000;  # 10 years for 4096 threads

fail=False
K=10000
for i in range(0, K):
	rtime = random.randrange(max_rtime)
	total = int(random.uniform(0.1, 1.9) * rtime)

	for n in range(1, 100):
		stime = (n * total / 100)
		r1 = kernel_scale(rtime, total, stime)
		r2 = python_scale(rtime, total, stime)
		if (float(abs(r1 - r2)) / float(r2)) > 0.001:
			print "FAIL!"
			print "rtime: " + str(rtime)
			print "total: " + str(total)
			print "stime: " + str(stime)
			print "kernel: " + str(r1)
			print "python: " + str(r2)

			fail=True
			break
	if fail:
		break;
	if (i % 100) == 99:
		print str(i/100) + "/" + str(K/100) + " OK"
		

[Index of Archives]     [Linux Stable Commits]     [Linux Stable Kernel]     [Linux Kernel]     [Linux USB Devel]     [Linux Video &Media]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]

  Powered by Linux