On Fri, Jan 08, 2016 at 01:18:03PM -0800, Tony Luck wrote: > Make use of the EXTABLE_FAULT exception table entries. This routine > returns a structure to indicate the result of the copy: > > struct mcsafe_ret { > u64 trapnr; > u64 remain; > }; > > If the copy is successful, then both 'trapnr' and 'remain' are zero. > > If we faulted during the copy, then 'trapnr' will say which type > of trap (X86_TRAP_PF or X86_TRAP_MC) and 'remain' says how many > bytes were not copied. > > Note that this is probably the first of several copy functions. > We can make new ones for non-temporal cache handling etc. > > Signed-off-by: Tony Luck <tony.luck@xxxxxxxxx> > --- > arch/x86/include/asm/string_64.h | 8 +++ > arch/x86/kernel/x8664_ksyms_64.c | 2 + > arch/x86/lib/memcpy_64.S | 134 +++++++++++++++++++++++++++++++++++++++ > 3 files changed, 144 insertions(+) ... > diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S > index 16698bba87de..f576acad485e 100644 > --- a/arch/x86/lib/memcpy_64.S > +++ b/arch/x86/lib/memcpy_64.S > @@ -177,3 +177,137 @@ ENTRY(memcpy_orig) > .Lend: > retq > ENDPROC(memcpy_orig) > + > +#ifndef CONFIG_UML > +/* > + * __mcsafe_copy - memory copy with machine check exception handling > + * Note that we only catch machine checks when reading the source addresses. > + * Writes to target are posted and don't generate machine checks. > + */ > +ENTRY(__mcsafe_copy) > + cmpl $8,%edx > + jb 20f /* less then 8 bytes, go to byte copy loop */ > + > + /* check for bad alignment of source */ > + movl %esi,%ecx You can save yourself this MOV here in what is, I'm assuming, the general likely case where @src is aligned and do: /* check for bad alignment of source */ testl $7, %esi /* already aligned? */ jz 102f movl %esi,%ecx subl $8,%ecx negl %ecx subl %ecx,%edx 0: movb (%rsi),%al movb %al,(%rdi) incq %rsi incq %rdi decl %ecx jnz 0b > + andl $7,%ecx > + jz 102f /* already aligned */ Please move side-comments over the line they're referring to. > + subl $8,%ecx > + negl %ecx > + subl %ecx,%edx > +0: movb (%rsi),%al > + movb %al,(%rdi) > + incq %rsi > + incq %rdi > + decl %ecx > + jnz 0b > +102: > + movl %edx,%ecx > + andl $63,%edx > + shrl $6,%ecx > + jz 17f Please add a \n after the JMPs for better readability - those blocks are dense as it is. They could use some comments too. > +1: movq (%rsi),%r8 > +2: movq 1*8(%rsi),%r9 > +3: movq 2*8(%rsi),%r10 > +4: movq 3*8(%rsi),%r11 > + mov %r8,(%rdi) > + mov %r9,1*8(%rdi) > + mov %r10,2*8(%rdi) > + mov %r11,3*8(%rdi) You can say "movq" too here, for consistency. > +9: movq 4*8(%rsi),%r8 > +10: movq 5*8(%rsi),%r9 > +11: movq 6*8(%rsi),%r10 > +12: movq 7*8(%rsi),%r11 Why aren't we pushing %r12-%r15 on the stack after the "jz 17f" above and using them too and thus copying a whole cacheline in one go? We would need to restore them when we're done with the cacheline-wise shuffle, of course. > + mov %r8,4*8(%rdi) > + mov %r9,5*8(%rdi) > + mov %r10,6*8(%rdi) > + mov %r11,7*8(%rdi) > + leaq 64(%rsi),%rsi > + leaq 64(%rdi),%rdi > + decl %ecx > + jnz 1b ... -- Regards/Gruss, Boris. ECO tip #101: Trim your mails when you reply. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>