Re: [patch 02/14] tmpfs: fix regressions from wider use of ZERO_PAGE

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sun, Apr 24, 2022 at 12:54:57PM -0700, Linus Torvalds wrote:
> I suspect it's a %rax vs %rcx confusion again, but with your "patch on
> top of earlier patch" I didn't go and sort it out.

Finally had some quiet time to stare at this.

So when we enter the function, we shift %rcx to get the number of
qword-sized quantities to zero:

SYM_FUNC_START(clear_user_original)
	mov %rcx,%rax
	shr $3,%rcx		# qwords	<---

and we zero in qword quantities merrily:

        # do the qwords first
        .p2align 4
0:      movq $0,(%rdi)
        lea 8(%rdi),%rdi
        dec %rcx
        jnz   0b 


but when we encounter the fault here, we return *%rcx* - not %rcx << 3
- latter being the *bytes* leftover which we *actually* need to return
when we encounter the #PF.

So, we need to shift back when we fault during the qword-sized zeroing,
i.e., full function below, see label 3 there.

With that, strace looks good too:

openat(AT_FDCWD, "/dev/zero", O_RDONLY) = 3
mmap(NULL, 196608, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7ffff7dc5000
munmap(0x7ffff7dd5000, 65536)           = 0
read(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 65536) = 16
exit_group(16)                          = ?
+++ exited with 16 +++

As to the byte-exact deal, I'll put it on my TODO to play with it later
and see how much asm we can shed from this simplification so thanks for
the pointers!

/* 
 * Default clear user-space.
 * Input:
 * rdi destination
 * rcx count
 *
 * Output:
 * rcx uncleared bytes or 0 if successful.
 */
SYM_FUNC_START(clear_user_original)
        mov %rcx,%rax
        shr $3,%rcx             # qwords
        and $7,%rax             # rest bytes
        test %rcx,%rcx
        jz 1f

        # do the qwords first
        .p2align 4
0:      movq $0,(%rdi)
        lea 8(%rdi),%rdi
        dec %rcx
        jnz   0b

1:      test %rax,%rax
        jz 3f

        # now do the rest bytes
2:      movb $0,(%rdi)
        inc %rdi
        decl %eax
        jnz  2b

3:
        # convert qwords back into bytes to return to caller
        shl $3, %rcx

4:
        xorl %eax,%eax
        RET

        _ASM_EXTABLE_UA(0b, 3b)

        /*
         * The %rcx value gets fixed up with EX_TYPE_UCOPY_LEN (which basically ends
         * up doing "%rcx = %rcx*8 + %rax" in ex_handler_ucopy_len() for the exception
         * case). That is, we use %rax above at label 2: for simpler asm but the number
         * of uncleared bytes will land in %rcx, as expected by the caller.
         *
         * %rax at label 3: still needs to be cleared in the exception case because this
         * is called from inline asm and the compiler expects %rax to be zero when exiting
         * the inline asm, in case it might reuse it somewhere.
         */
        _ASM_EXTABLE_TYPE_REG(2b, 4b, EX_TYPE_UCOPY_LEN8, %rax)


Btw, I'm wondering if using descriptive label names would make this function even more
understandable:

/*      
 * Default clear user-space.
 * Input:
 * rdi destination
 * rcx count
 *
 * Output:
 * rcx uncleared bytes or 0 if successful.
 */
SYM_FUNC_START(clear_user_original)
        mov %rcx,%rax
        shr $3,%rcx             # qwords
        and $7,%rax             # rest bytes
        test %rcx,%rcx
        jz .Lrest_bytes

        # do the qwords first
        .p2align 4

.Lqwords:
        movq $0,(%rdi)
        lea 8(%rdi),%rdi
        dec %rcx
        jnz .Lqwords
        
.Lrest_bytes:
        test %rax,%rax
        jz .Lexit
        
        # now do the rest bytes
.Lbytes:
        movb $0,(%rdi)
        inc %rdi
        decl %eax
        jnz .Lbytes
        
.Lqwords_exit:
        # convert qwords back into bytes to return to caller
        shl $3, %rcx

.Lexit:
        xorl %eax,%eax
        RET
        
        _ASM_EXTABLE_UA(.Lqwords, .Lqwords_exit)
      
        /*
         * The %rcx value gets fixed up with EX_TYPE_UCOPY_LEN (which basically ends
         * up doing "%rcx = %rcx*8 + %rax" in ex_handler_ucopy_len() for the exception
         * case). That is, we use %rax above at label 2: for simpler asm but the number
         * of uncleared bytes will land in %rcx, as expected by the caller.
         *
         * %rax at label 3: still needs to be cleared in the exception case because this
         * is called from inline asm and the compiler expects %rax to be zero when exiting
         * the inline asm, in case it might reuse it somewhere.
         */
        _ASM_EXTABLE_TYPE_REG(.Lbytes, .Lexit, EX_TYPE_UCOPY_LEN8, %rax)
SYM_FUNC_END(clear_user_original) 
EXPORT_SYMBOL(clear_user_original)


-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette



[Index of Archives]     [Kernel Archive]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux