[RFC] Separate time support for using cpu timer

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Background
----------

The current arch/mips/kernel/time.c has been stretched over the time
such that it now looks like a convoluted spaghetti to me:

1) it was originally designed to be flexible so that we could support
   all imaginable timer sources and timer setups:
	. use cpu timer (count/compare pair)
	. use board timer, with cpu count and known frequency
	. use board timer, with cpu count and unknown frequency
	. use board timer, with cpu count and unknown frequency, and
	  we can use 64bit division
	. jiffy interrupt through do_IRQ
	. jiffy interrupt through ll_timer_interrupt
	.....

2) introduction of 32bit SMP causes more complexity

3) the hpt_xxx stuff introduces another abstraction layer between hw
   timer (where cpu timer is treated as one kind of hw timers) and the time 
   subsystem.


Solution
--------

All the boards that I am really concerned right now have cpu count/compare
registers.  I believe this will even more so in the future.

Therefore I like to propose a separate time support for systems that use
cpu timer as their system timer.

As you can see from the patch, the new code is much simpler.


The hidden agenda
-----------------

OK, I admit there is another motivation in all of this.  Linux is moving
to have higher resolution timer.  For example, see the introduction of high resolution 
posix timer (http://sourceforge.net/projects/high-res-timers/).  Having a MIPS common
time routine based on cpu timer makes it much easier to support
such a feature for MIPS boards.  We don't need to mess with individual board timer
anymore.

In addition I think in 2.7 time frame Linux needs to replace its ancient jiffy
time system with a natively higher resolution time system.  A MIPS cpu timer based 
routine would evolve much better into the future.


The patch
---------

The attached is the patch for UP case.  I will post an additional patch for SMP
case later.

The patch is currently designed to be drop-in replace for arch/mips/kernel/time.c.
As you can see from the patch, you will only need to modify the Kconfig to define
CPU_TIMER for the qualified boards.

Comments?

Jun
diff -Nru linux/arch/mips/Kconfig.orig linux/arch/mips/Kconfig
--- linux/arch/mips/Kconfig.orig	2004-04-19 16:33:42.000000000 -0700
+++ linux/arch/mips/Kconfig	2004-04-19 16:42:18.000000000 -0700
@@ -320,6 +320,7 @@
 config DDB5477
 	bool "Support for NEC DDB Vrc-5477"
 	select IRQ_CPU
+	select CPU_TIMER
 	help
 	  This enables support for the R5432-based NEC DDB Vrc-5477,
 	  or Rockhopper/SolutionGear boards with R5432/R5500 CPUs.
@@ -516,6 +517,7 @@
 config SIBYTE_SWARM
 	bool "BCM91250A-SWARM"
 	select SIBYTE_SB1250
+	select CPU_TIMER
 
 config SIBYTE_SENTOSA
 	bool "BCM91250E-Sentosa"
@@ -800,6 +802,13 @@
 	  byte order. These modes require different kernels. Say Y if your
 	  machine is little endian, N if it's a big endian machine.
 
+config CPU_TIMER
+	bool
+	help
+	  If CPU has count/compare registers (most do), and they are used
+	  as system timer, you can say 'Y' here to use the alternative
+	  time routines.
+
 config IRQ_CPU
 	bool
 
diff -Nru linux/arch/mips/kernel/cpu-timer.c.orig linux/arch/mips/kernel/cpu-timer.c
--- linux/arch/mips/kernel/cpu-timer.c.orig	2004-04-19 16:33:42.000000000 -0700
+++ linux/arch/mips/kernel/cpu-timer.c	2004-04-19 17:01:13.000000000 -0700
@@ -0,0 +1,445 @@
+/*
+ * Copyright 2004 MontaVista Software Inc.
+ * Author: Jun Sun, jsun@xxxxxxxxxx or jsun@xxxxxxxxxx
+ *
+ * This routine provides time routines for boards that use cpu count/compare
+ * as their system timer.  A couple of requirements:
+ *   . Must have count/compare register and use them as your system timer
+ *     (obviously)
+ *   . Timer interrupt must go through do_IRQ() or ll_timer_interrupt()
+ *   . You must know or calibrate cpu timer frequency.
+ *
+ * See more in Documentation/mips/time.README.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/param.h>
+#include <linux/time.h>
+#include <linux/timex.h>
+#include <linux/smp.h>
+#include <linux/kernel_stat.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+
+#include <asm/bootinfo.h>
+#include <asm/cpu.h>
+#include <asm/cpu-features.h>
+#include <asm/div64.h>
+#include <asm/hardirq.h>
+#include <asm/sections.h>
+#include <asm/time.h>
+#include <asm/debug.h>
+
+/*
+ * The integer part of the number of usecs per jiffy is taken from tick,
+ * but the fractional part is not recorded, so we calculate it using the
+ * initial value of HZ.  This aids systems where tick isn't really an
+ * integer (e.g. for HZ = 128).
+ */
+#define USECS_PER_JIFFY		TICK_SIZE
+
+#define TICK_SIZE	(tick_nsec / 1000)
+
+u64 jiffies_64 = INITIAL_JIFFIES;
+
+EXPORT_SYMBOL(jiffies_64);
+
+/*
+ * forward reference
+ */
+extern volatile unsigned long wall_jiffies;
+
+/*
+ * By default we provide the null RTC ops
+ */
+static unsigned long null_rtc_get_time(void)
+{
+	return mktime(2000, 1, 1, 0, 0, 0);
+}
+
+static int null_rtc_set_time(unsigned long sec)
+{
+	return 0;
+}
+
+unsigned long (*rtc_get_time)(void) = null_rtc_get_time;
+int (*rtc_set_time)(unsigned long) = null_rtc_set_time;
+int (*rtc_set_mmss)(unsigned long);
+
+
+/* usecs per counter cycle, shifted to left by 32 bits */
+static unsigned int sll32_usecs_per_cycle;
+
+/* how many counter cycles in a jiffy */
+static unsigned long cycles_per_jiffy;
+
+/* Cycle counter value at the previous timer interrupt.. */
+static unsigned int last_count;
+
+/* last time when xtime and rtc are sync'ed up */
+static long last_rtc_update;
+
+/* any missed timer interrupts */
+int missed_timer_count;
+
+
+/*
+ * Gettimeoffset routines.  These routines returns the time duration
+ * since last timer interrupt in usecs.
+ */
+static unsigned long get_intra_jiffy_offset(void)
+{
+	u32 count;
+	unsigned long res;
+
+	/* Get last timer tick in absolute kernel time */
+	count = read_c0_count();
+
+	/* 
+	 * .. relative to previous jiffy (32 bits is enough).
+	 * This routine should be protected by xtime_lock.  No race condition.
+	 * In SMP case, count may occasionally be behind last_count.
+	 */ 
+	/*
+	 * FIXME: time_after/time_before() not 64bit safe?
+	 */
+	if (time_after(count, last_count))
+	       count -= last_count;
+	else
+		count = 0;
+
+	__asm__("multu	%1,%2"
+		: "=h" (res)
+		: "r" (count), "r" (sll32_usecs_per_cycle)
+		: "lo", "accum");
+
+	/*
+	 * Due to possible jiffies inconsistencies, we need to check
+	 * the result so that we'll get a timer that is monotonic.
+	 */
+	if (res >= USECS_PER_JIFFY)
+		res = USECS_PER_JIFFY;
+
+	return res;
+}
+
+/*
+ * This version of gettimeofday has better than microsecond precision.
+ */
+void do_gettimeofday(struct timeval *tv)
+{
+	unsigned long seq;
+	unsigned long lost;
+	unsigned long usec, sec;
+	unsigned long max_ntp_tick;
+
+	do {
+		seq = read_seqbegin(&xtime_lock);
+
+		usec = get_intra_jiffy_offset();
+
+		lost = jiffies - wall_jiffies;
+
+		/*
+		 * If time_adjust is negative then NTP is slowing the clock
+		 * so make sure not to go into next possible interval.
+		 * Better to lose some accuracy than have time go backwards..
+		 */
+		if (unlikely(time_adjust < 0)) {
+			max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj;
+			usec = min(usec, max_ntp_tick);
+
+			if (lost)
+				usec += lost * max_ntp_tick;
+		} else if (unlikely(lost))
+			usec += lost * (USEC_PER_SEC / HZ);
+
+		sec = xtime.tv_sec;
+		usec += (xtime.tv_nsec / 1000);
+	} while (read_seqretry(&xtime_lock, seq));
+
+	while (usec >= 1000000) {
+		usec -= 1000000;
+		sec++;
+	}
+	
+	tv->tv_sec = sec;
+	tv->tv_usec = usec;
+}
+
+EXPORT_SYMBOL(do_gettimeofday);
+
+int do_settimeofday(struct timespec *tv)
+{
+	time_t wtm_sec, sec = tv->tv_sec;
+	long wtm_nsec, nsec = tv->tv_nsec;
+
+	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
+		return -EINVAL;
+
+	write_seqlock_irq(&xtime_lock);
+
+	/*
+	 * This is revolting.  We need to set "xtime" correctly.  However,
+	 * the value in this location is the value at the most recent update
+	 * of wall time.  Discover what correction gettimeofday() would have
+	 * made, and then undo it!
+	 */
+	nsec -= get_intra_jiffy_offset() * NSEC_PER_USEC;
+	nsec -= (jiffies - wall_jiffies) * tick_nsec;
+
+	wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
+	wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
+
+	set_normalized_timespec(&xtime, sec, nsec);
+	set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
+
+	time_adjust = 0;			/* stop active adjtime() */
+	time_status |= STA_UNSYNC;
+	time_maxerror = NTP_PHASE_LIMIT;
+	time_esterror = NTP_PHASE_LIMIT;
+
+	write_sequnlock_irq(&xtime_lock);
+
+	return 0;
+}
+
+EXPORT_SYMBOL(do_settimeofday);
+
+
+/*
+ * local_timer_interrupt() does profiling and process accounting
+ * on a per-CPU basis.
+ *
+ * In UP mode, it is invoked from the (global) timer_interrupt.
+ *
+ * In SMP mode, it might invoked by per-CPU timer interrupt, or
+ * a broadcasted inter-processor interrupt which itself is triggered
+ * by the global timer interrupt.
+ */
+void local_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+	if (!user_mode(regs)) {
+		if (prof_buffer && current->pid) {
+			unsigned long pc = regs->cp0_epc;
+
+			pc -= (unsigned long) _stext;
+			pc >>= prof_shift;
+			/*
+			 * Dont ignore out-of-bounds pc values silently,
+			 * put them into the last histogram slot, so if
+			 * present, they will show up as a sharp peak.
+			 */
+			if (pc > prof_len - 1)
+				pc = prof_len - 1;
+			atomic_inc((atomic_t *)&prof_buffer[pc]);
+		}
+	}
+}
+
+/*
+ * Timer interrupt service routines.  This function
+ * is set as irqaction->handler and is invoked through do_IRQ.
+ */
+irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+	unsigned long compare;
+
+	db_assert(smp_processor_id() == 0);
+
+	write_seqlock(&xtime_lock);
+
+	missed_timer_count--;
+	do {
+		missed_timer_count++;
+
+		/* Ack this timer interrupt and set the next one.  */
+		last_count += cycles_per_jiffy;
+		compare = last_count + cycles_per_jiffy;
+		write_c0_compare(compare);
+
+		do_timer(regs);
+
+	} while (time_before_eq(compare, (unsigned long)read_c0_count()));
+
+	/*
+	 * If we have an externally synchronized Linux clock, then update
+	 * CMOS clock accordingly every ~11 minutes. rtc_set_time() has to be
+	 * called as close as possible to 500 ms before the new second starts.
+	 */
+	if ((time_status & STA_UNSYNC) == 0 &&
+	    xtime.tv_sec > last_rtc_update + 660 &&
+	    (xtime.tv_nsec / 1000) >= 500000 - ((unsigned) TICK_SIZE) / 2 &&
+	    (xtime.tv_nsec / 1000) <= 500000 + ((unsigned) TICK_SIZE) / 2) {
+		if (rtc_set_mmss(xtime.tv_sec) == 0) {
+			last_rtc_update = xtime.tv_sec;
+		} else {
+			/* do it again in 60 s */
+			last_rtc_update = xtime.tv_sec - 600;
+		}
+	}
+
+	write_sequnlock(&xtime_lock);
+
+	/*
+	 * We call local_timer_interrupt() to do profiling and process 
+	 * accouting.
+	 */
+	local_timer_interrupt(irq, dev_id, regs);
+
+	return IRQ_HANDLED;
+}
+
+asmlinkage void ll_timer_interrupt(int irq, struct pt_regs *regs)
+{
+	irq_enter();
+	kstat_this_cpu.irqs[irq]++;
+
+	/* we keep interrupt disabled all the time */
+	timer_interrupt(irq, NULL, regs);
+
+	irq_exit();
+}
+
+/*
+ * time_init() - it does the following things.
+ *
+ * .) board_time_init() (or in board setup routine) -
+ * 	a) set up RTC routines,
+ *      b) calibrate and set the mips_hpt_frequency
+ * .) set rtc_set_mmss if it is not set by board code
+ * .) setup xtime based on rtc_get_time().
+ * .) init walt_to_monotonic
+ * .) calculate a couple of cached variables for later usage
+ * .) board_timer_setup() -
+ * 	. If you use ll_timer_interrupt(), do
+ *			set_c0_status(IE_IRQ5);
+ *		
+ *	. Otherwise if you are using IRQ_CPU, do
+ *		setup_irq(CPU_IRQ_BASE + 7, irq)
+ *
+ *	. If you are not using ll_timer_interrupt() (i.e., go through
+ *	  do_IRQ()) and you are not using IRQ_CPU, you can work around,
+ *	  but you probably really should ask yourself why.
+ */
+
+void (*board_time_init)(void);
+void (*board_timer_setup)(struct irqaction *irq);
+
+unsigned int mips_hpt_frequency;
+
+static struct irqaction timer_irqaction = {
+	.handler = timer_interrupt,
+	.flags = SA_INTERRUPT,
+	.name = "timer",
+};
+
+void __init time_init(void)
+{
+	if (board_time_init)
+		board_time_init();
+
+	db_assert(mips_hpt_frequency != 0);
+
+	if (!rtc_set_mmss)
+		rtc_set_mmss = rtc_set_time;
+
+	xtime.tv_sec = rtc_get_time();
+	xtime.tv_nsec = 0;
+
+	set_normalized_timespec(&wall_to_monotonic,
+	                        -xtime.tv_sec, -xtime.tv_nsec);
+
+	/* Calculate cache parameters.  */
+	cycles_per_jiffy = (mips_hpt_frequency + HZ / 2) / HZ;
+
+	/* sll32_usecs_per_cycle = 10^6 * 2^32 / mips_hpt_frequency  */
+	{ 
+		u64 div = ((u64)1000000 << 32) + mips_hpt_frequency / 2;
+		do_div(div, mips_hpt_frequency);
+		sll32_usecs_per_cycle = div;
+	}
+
+	/* Report the high precision timer rate for a reference.  */
+	printk("Using %u.%03u MHz cpu timer.\n",
+		       ((mips_hpt_frequency + 500) / 1000) / 1000,
+		       ((mips_hpt_frequency + 500) / 1000) % 1000);
+
+	/* initialize cp0 count and compare */
+	write_c0_compare(cycles_per_jiffy);
+	write_c0_count(0);
+	last_count = 0;
+
+	/*
+	 * Call board specific timer interrupt setup.
+	 *
+	 * this pointer must be setup in machine setup routine.
+	 *
+	 * Even if a machine chooses to use a low-level timer interrupt,
+	 * it still needs to setup the timer_irqaction.
+	 * In that case, it might be better to set timer_irqaction.handler
+	 * to be NULL function so that we are sure the high-level code
+	 * is not invoked accidentally.
+	 */
+	board_timer_setup(&timer_irqaction);
+}
+
+#define FEBRUARY		2
+#define STARTOFTIME		1970
+#define SECDAY			86400L
+#define SECYR			(SECDAY * 365)
+#define leapyear(y)		((!((y) % 4) && ((y) % 100)) || !((y) % 400))
+#define days_in_year(y)		(leapyear(y) ? 366 : 365)
+#define days_in_month(m)	(month_days[(m) - 1])
+
+static int month_days[12] = {
+	31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
+};
+
+void to_tm(unsigned long tim, struct rtc_time *tm)
+{
+	long hms, day, gday;
+	int i;
+
+	gday = day = tim / SECDAY;
+	hms = tim % SECDAY;
+
+	/* Hours, minutes, seconds are easy */
+	tm->tm_hour = hms / 3600;
+	tm->tm_min = (hms % 3600) / 60;
+	tm->tm_sec = (hms % 3600) % 60;
+
+	/* Number of years in days */
+	for (i = STARTOFTIME; day >= days_in_year(i); i++)
+		day -= days_in_year(i);
+	tm->tm_year = i;
+
+	/* Number of months in days left */
+	if (leapyear(tm->tm_year))
+		days_in_month(FEBRUARY) = 29;
+	for (i = 1; day >= days_in_month(i); i++)
+		day -= days_in_month(i);
+	days_in_month(FEBRUARY) = 28;
+	tm->tm_mon = i - 1;		/* tm_mon starts from 0 to 11 */
+
+	/* Days are what is left over (+1) from all that. */
+	tm->tm_mday = day + 1;
+
+	/*
+	 * Determine the day of week
+	 */
+	tm->tm_wday = (gday + 4) % 7;	/* 1970/1/1 was Thursday */
+}
+
+EXPORT_SYMBOL(to_tm);
+EXPORT_SYMBOL(rtc_set_time);
+EXPORT_SYMBOL(rtc_get_time);
diff -Nru linux/arch/mips/kernel/Makefile.orig linux/arch/mips/kernel/Makefile
--- linux/arch/mips/kernel/Makefile.orig	2004-04-19 16:33:42.000000000 -0700
+++ linux/arch/mips/kernel/Makefile	2004-04-19 16:42:18.000000000 -0700
@@ -6,7 +6,13 @@
 
 obj-y		+= cpu-probe.o branch.o entry.o genex.o irq.o process.o \
 		   ptrace.o reset.o semaphore.o setup.o signal.o syscall.o \
-		   time.o traps.o unaligned.o
+		   traps.o unaligned.o
+
+ifdef CONFIG_CPU_TIMER
+obj-y				+= cpu-timer.o
+else
+obj-y				+= time.o
+endif
 
 ifdef CONFIG_MODULES
 obj-y				+= mips_ksyms.o
diff -Nru linux/arch/mips/kernel/proc.c.orig linux/arch/mips/kernel/proc.c
--- linux/arch/mips/kernel/proc.c.orig	2004-04-19 16:33:42.000000000 -0700
+++ linux/arch/mips/kernel/proc.c	2004-04-19 16:42:18.000000000 -0700
@@ -78,7 +78,9 @@
 	[CPU_SR71000]	"Sandcraft SR71000"
 };
 
-
+#if defined(CONFIG_CPU_TIMER)
+extern int missed_timer_count;
+#endif
 static int show_cpuinfo(struct seq_file *m, void *v)
 {
 	unsigned int version = current_cpu_data.processor_id;
@@ -121,6 +123,10 @@
 	seq_printf(m, fmt, 'D', vced_count);
 	seq_printf(m, fmt, 'I', vcei_count);
 
+#if defined(CONFIG_CPU_TIMER)
+	seq_printf(m, "missed timers\t\t: %d\n", missed_timer_count);
+#endif
+
 	return 0;
 }
 

[Index of Archives]     [Linux MIPS Home]     [LKML Archive]     [Linux ARM Kernel]     [Linux ARM]     [Linux]     [Git]     [Yosemite News]     [Linux SCSI]     [Linux Hams]

  Powered by Linux