Re: [RFC PATCH REBASED 2/3] x86: Move fpu_counter into ARCH specific thread_struct

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 09/17/2013 04:04 PM, Vineet Gupta wrote:
> [+cc: x86 folks] as the mailer skipped the CC list.
> 
> On 09/17/2013 11:48 AM, Vineet Gupta wrote:
>> Only a couple of arches (sh/x86) use fpu_counter in task_struct so it
>> can be moved out into ARCH specific thread_struct, reducing the size of
>> task_struct for other arches.
>>
>> Compile tested i386_defconfig + gcc 4.7.3
>>
>> Signed-off-by: Vineet Gupta <vgupta@xxxxxxxxxxxx>
>> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
>> Cc: Ingo Molnar <mingo@xxxxxxxxxx>
>> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
>> Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
>> Cc: x86@xxxxxxxxxx
>> Cc: Suresh Siddha <suresh.b.siddha@xxxxxxxxx>
>> Cc: Borislav Petkov <bp@xxxxxxx>
>> Cc: Vincent Palatin <vpalatin@xxxxxxxxxxxx>
>> Cc: Len Brown <len.brown@xxxxxxxxx>
>> Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
>> Cc: Paul Gortmaker <paul.gortmaker@xxxxxxxxxxxxx>
>> Cc: Pekka Riikonen <priikone@xxxxxx>
>> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
>> Cc: Dave Jones <davej@xxxxxxxxxx>
>> Cc: Frederic Weisbecker <fweisbec@xxxxxxxxx>
>> Cc: "Paul E. McKenney" <paulmck@xxxxxxxxxxxxxxxxxx>
>> Cc: linux-kernel@xxxxxxxxxxxxxxx
>> ---
>>  arch/x86/include/asm/fpu-internal.h | 10 +++++-----
>>  arch/x86/include/asm/processor.h    |  9 +++++++++
>>  arch/x86/kernel/i387.c              |  2 +-
>>  arch/x86/kernel/process_32.c        |  4 ++--
>>  arch/x86/kernel/process_64.c        |  2 +-
>>  arch/x86/kernel/traps.c             |  2 +-
>>  6 files changed, 19 insertions(+), 10 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
>> index 4d0bda7..c49a613 100644
>> --- a/arch/x86/include/asm/fpu-internal.h
>> +++ b/arch/x86/include/asm/fpu-internal.h
>> @@ -365,7 +365,7 @@ static inline void drop_fpu(struct task_struct *tsk)
>>  	 * Forget coprocessor state..
>>  	 */
>>  	preempt_disable();
>> -	tsk->fpu_counter = 0;
>> +	tsk->thread.fpu_counter = 0;
>>  	__drop_fpu(tsk);
>>  	clear_used_math();
>>  	preempt_enable();
>> @@ -424,7 +424,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
>>  	 * or if the past 5 consecutive context-switches used math.
>>  	 */
>>  	fpu.preload = tsk_used_math(new) && (use_eager_fpu() ||
>> -					     new->fpu_counter > 5);
>> +					     new->thread.fpu_counter > 5);
>>  	if (__thread_has_fpu(old)) {
>>  		if (!__save_init_fpu(old))
>>  			cpu = ~0;
>> @@ -433,16 +433,16 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
>>  
>>  		/* Don't change CR0.TS if we just switch! */
>>  		if (fpu.preload) {
>> -			new->fpu_counter++;
>> +			new->thread.fpu_counter++;
>>  			__thread_set_has_fpu(new);
>>  			prefetch(new->thread.fpu.state);
>>  		} else if (!use_eager_fpu())
>>  			stts();
>>  	} else {
>> -		old->fpu_counter = 0;
>> +		old->thread.fpu_counter = 0;
>>  		old->thread.fpu.last_cpu = ~0;
>>  		if (fpu.preload) {
>> -			new->fpu_counter++;
>> +			new->thread.fpu_counter++;
>>  			if (!use_eager_fpu() && fpu_lazy_restore(new, cpu))
>>  				fpu.preload = 0;
>>  			else
>> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
>> index 987c75e..7b034a4 100644
>> --- a/arch/x86/include/asm/processor.h
>> +++ b/arch/x86/include/asm/processor.h
>> @@ -488,6 +488,15 @@ struct thread_struct {
>>  	unsigned long		iopl;
>>  	/* Max allowed port in the bitmap, in bytes: */
>>  	unsigned		io_bitmap_max;
>> +	/*
>> +	 * fpu_counter contains the number of consecutive context switches
>> +	 * that the FPU is used. If this is over a threshold, the lazy fpu
>> +	 * saving becomes unlazy to save the trap. This is an unsigned char
>> +	 * so that after 256 times the counter wraps and the behavior turns
>> +	 * lazy again; this to deal with bursty apps that only use FPU for
>> +	 * a short time
>> +	 */
>> +	unsigned char fpu_counter;
>>  };
>>  
>>  /*
>> diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
>> index 5d576ab..e8368c6 100644
>> --- a/arch/x86/kernel/i387.c
>> +++ b/arch/x86/kernel/i387.c
>> @@ -100,7 +100,7 @@ void unlazy_fpu(struct task_struct *tsk)
>>  		__save_init_fpu(tsk);
>>  		__thread_fpu_end(tsk);
>>  	} else
>> -		tsk->fpu_counter = 0;
>> +		tsk->thread.fpu_counter = 0;
>>  	preempt_enable();
>>  }
>>  EXPORT_SYMBOL(unlazy_fpu);
>> diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
>> index 884f98f..6af43b0 100644
>> --- a/arch/x86/kernel/process_32.c
>> +++ b/arch/x86/kernel/process_32.c
>> @@ -153,7 +153,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
>>  		childregs->orig_ax = -1;
>>  		childregs->cs = __KERNEL_CS | get_kernel_rpl();
>>  		childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
>> -		p->fpu_counter = 0;
>> +		p->thread.fpu_counter = 0;
>>  		p->thread.io_bitmap_ptr = NULL;
>>  		memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
>>  		return 0;
>> @@ -166,7 +166,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
>>  	p->thread.ip = (unsigned long) ret_from_fork;
>>  	task_user_gs(p) = get_user_gs(current_pt_regs());
>>  
>> -	p->fpu_counter = 0;
>> +	p->thread.fpu_counter = 0;
>>  	p->thread.io_bitmap_ptr = NULL;
>>  	tsk = current;
>>  	err = -ENOMEM;
>> diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
>> index bb1dc51..bbab295 100644
>> --- a/arch/x86/kernel/process_64.c
>> +++ b/arch/x86/kernel/process_64.c
>> @@ -163,7 +163,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
>>  	p->thread.sp = (unsigned long) childregs;
>>  	p->thread.usersp = me->thread.usersp;
>>  	set_tsk_thread_flag(p, TIF_FORK);
>> -	p->fpu_counter = 0;
>> +	p->thread.fpu_counter = 0;
>>  	p->thread.io_bitmap_ptr = NULL;
>>  
>>  	savesegment(gs, p->thread.gsindex);
>> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
>> index 8c8093b..64b980f 100644
>> --- a/arch/x86/kernel/traps.c
>> +++ b/arch/x86/kernel/traps.c
>> @@ -653,7 +653,7 @@ void math_state_restore(void)
>>  		return;
>>  	}
>>  
>> -	tsk->fpu_counter++;
>> +	tsk->thread.fpu_counter++;
>>  }
>>  EXPORT_SYMBOL_GPL(math_state_restore);
>>  
>>
> 

Comments please ! Can I get ACK/NAK on this while waiting for SH folks to respond
on a similar change. It seems a straightforward mechanical change !

-Vineet
--
To unsubscribe from this list: send the line "unsubscribe linux-arch" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel]     [Kernel Newbies]     [x86 Platform Driver]     [Netdev]     [Linux Wireless]     [Netfilter]     [Bugtraq]     [Linux Filesystems]     [Yosemite Discussion]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]

  Powered by Linux