Re: [PATCH 2/2] mm/page_alloc: avoid counting event if no successful allocation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 7/9/21 8:26 PM, Mel Gorman wrote:
[Please note: This e-mail is from an EXTERNAL e-mail address]

On Fri, Jul 09, 2021 at 06:28:55PM +0800, Yanfei Xu wrote:
While the nr_populated is non-zero, however the nr_account might be
zero if allocating fails. In this case, not to count event can save
some cycles.


The much more likely path is that nr_account is positive so we avoid a
branch in the common case.


Got it.

And this commit extract the check of "page_array" from a while
statement to avoid unnecessary checks for it.


I'm surprised the compiler does not catch that page_array is invariant
for the loop. Did you check if gcc generates different code is page_array
is explicitly checked once instead of putting it in the loop?


Hm.. In fact, the page_array always doesn't appear in the loop due to the compiler's optimization. And I just confirmed the assembly.

not apply my patch:

ffffffff81267100 <__alloc_pages_bulk>:
ffffffff81267100: e8 0b 73 df ff callq ffffffff8105e410 <__fentry__>
ffffffff81267105:       55                      push   %rbp
ffffffff81267106:       48 89 e5                mov    %rsp,%rbp
ffffffff81267109:       41 57                   push   %r15
ffffffff8126710b:       41 56                   push   %r14
ffffffff8126710d:       41 55                   push   %r13
ffffffff8126710f:       41 54                   push   %r12
ffffffff81267111:       53                      push   %rbx
ffffffff81267112:       48 83 ec 58             sub    $0x58,%rsp
ffffffff81267116:       85 c9                   test   %ecx,%ecx
ffffffff81267118:       89 7d c4                mov    %edi,-0x3c(%rbp)
ffffffff8126711b:       89 75 9c                mov    %esi,-0x64(%rbp)
ffffffff8126711e:       48 89 55 a0             mov    %rdx,-0x60(%rbp)
ffffffff81267122:       89 4d d4                mov    %ecx,-0x2c(%rbp)
ffffffff81267125:       4c 89 45 a8             mov    %r8,-0x58(%rbp)
ffffffff81267129:       4c 89 4d c8             mov    %r9,-0x38(%rbp)
ffffffff8126712d: 0f 8e 7c 05 00 00 jle ffffffff812676af <__alloc_pages_bulk+0x5af>
ffffffff81267133:       4d 85 c9                test   %r9,%r9
ffffffff81267136: 0f 84 84 05 00 00 je ffffffff812676c0 <__alloc_pages_bulk+0x5c0>
ffffffff8126713c:       49 83 39 00             cmpq   $0x0,(%r9)
ffffffff81267140: 0f 84 7a 05 00 00 je ffffffff812676c0 <__alloc_pages_bulk+0x5c0>
ffffffff81267146:       49 8d 41 08             lea    0x8(%r9),%rax
ffffffff8126714a:       89 ca                   mov    %ecx,%edx
ffffffff8126714c:       45 31 e4                xor    %r12d,%r12d
ffffffff8126714f: eb 0b jmp ffffffff8126715c <__alloc_pages_bulk+0x5c>
ffffffff81267151:       48 83 c0 08             add    $0x8,%rax
ffffffff81267155:       48 83 78 f8 00          cmpq   $0x0,-0x8(%rax)
ffffffff8126715a: 74 1c je ffffffff81267178 <__alloc_pages_bulk+0x78>
ffffffff8126715c:       41 83 c4 01             add    $0x1,%r12d
ffffffff81267160:       44 39 e2                cmp    %r12d,%edx
ffffffff81267163: 75 ec jne ffffffff81267151 <__alloc_pages_bulk+0x51>
ffffffff81267165:       48 63 45 d4             movslq -0x2c(%rbp),%rax
ffffffff81267169:       48 83 c4 58             add    $0x58,%rsp
ffffffff8126716d:       5b                      pop    %rbx
ffffffff8126716e:       41 5c                   pop    %r12
ffffffff81267170:       41 5d                   pop    %r13
ffffffff81267172:       41 5e                   pop    %r14
ffffffff81267174:       41 5f                   pop    %r15
ffffffff81267176:       5d                      pop    %rbp
ffffffff81267177:       c3                      retq
ffffffff81267178:       8b 45 d4                mov    -0x2c(%rbp),%eax
ffffffff8126717b:       44 29 e0                sub    %r12d,%eax
ffffffff8126717e:       83 f8 01                cmp    $0x1,%eax
ffffffff81267181: 0f 84 47 04 00 00 je ffffffff812675ce <__alloc_pages_bulk+0x4ce> ffffffff81267187: 8b 0d 53 22 a4 01 mov 0x1a42253(%rip),%ecx # ffffffff82ca93e0 <page_group_by_mobility_disabled>
ffffffff8126718d:       48 63 45 9c             movslq -0x64(%rbp),%rax
ffffffff81267191:       44 8b 7d c4             mov    -0x3c(%rbp),%r15d
ffffffff81267195: 44 23 3d 58 22 a4 01 and 0x1a42258(%rip),%r15d # ffffffff82ca93f4 <gfp_allowed_mask>


applied my patch:

ffffffff81267100 <__alloc_pages_bulk>:
ffffffff81267100: e8 0b 73 df ff callq ffffffff8105e410 <__fentry__>
ffffffff81267105:       55                      push   %rbp
ffffffff81267106:       48 89 e5                mov    %rsp,%rbp
ffffffff81267109:       41 57                   push   %r15
ffffffff8126710b:       41 56                   push   %r14
ffffffff8126710d:       41 55                   push   %r13
ffffffff8126710f:       41 54                   push   %r12
ffffffff81267111:       53                      push   %rbx
ffffffff81267112:       48 83 ec 60             sub    $0x60,%rsp
ffffffff81267116:       85 c9                   test   %ecx,%ecx
ffffffff81267118:       89 7d c8                mov    %edi,-0x38(%rbp)
ffffffff8126711b:       89 75 9c                mov    %esi,-0x64(%rbp)
ffffffff8126711e:       48 89 55 a0             mov    %rdx,-0x60(%rbp)
ffffffff81267122:       89 4d d0                mov    %ecx,-0x30(%rbp)
ffffffff81267125:       4c 89 45 a8             mov    %r8,-0x58(%rbp)
ffffffff81267129: 0f 8e 91 05 00 00 jle ffffffff812676c0 <__alloc_pages_bulk+0x5c0>
ffffffff8126712f:       4d 85 c9                test   %r9,%r9
ffffffff81267132:       4d 89 cf                mov    %r9,%r15
ffffffff81267135: 0f 84 4d 05 00 00 je ffffffff81267688 <__alloc_pages_bulk+0x588>
ffffffff8126713b:       8d 49 ff                lea    -0x1(%rcx),%ecx
ffffffff8126713e:       31 c0                   xor    %eax,%eax
ffffffff81267140: eb 03 jmp ffffffff81267145 <__alloc_pages_bulk+0x45>
ffffffff81267142:       48 89 d0                mov    %rdx,%rax
ffffffff81267145:       49 83 3c c7 00          cmpq   $0x0,(%r15,%rax,8)
ffffffff8126714a:       41 89 c4                mov    %eax,%r12d
ffffffff8126714d: 74 0d je ffffffff8126715c <__alloc_pages_bulk+0x5c>
ffffffff8126714f:       44 8d 60 01             lea    0x1(%rax),%r12d
ffffffff81267153:       48 39 c1                cmp    %rax,%rcx
ffffffff81267156:       48 8d 50 01             lea    0x1(%rax),%rdx
ffffffff8126715a: 75 e6 jne ffffffff81267142 <__alloc_pages_bulk+0x42>
ffffffff8126715c:       8b 45 d0                mov    -0x30(%rbp),%eax
ffffffff8126715f:       41 39 c4                cmp    %eax,%r12d
ffffffff81267162: 0f 84 19 04 00 00 je ffffffff81267581 <__alloc_pages_bulk+0x481>
ffffffff81267168:       44 29 e0                sub    %r12d,%eax
ffffffff8126716b:       83 f8 01                cmp    $0x1,%eax
ffffffff8126716e: 0f 84 66 04 00 00 je ffffffff812675da <__alloc_pages_bulk+0x4da>
ffffffff81267174:       48 63 45 9c             movslq -0x64(%rbp),%rax
ffffffff81267178: 8b 0d 62 22 a4 01 mov 0x1a42262(%rip),%ecx # ffffffff82ca93e0 <page_group_by_mobility_disabled>
ffffffff8126717e:       8b 5d c8                mov    -0x38(%rbp),%ebx
ffffffff81267181: 23 1d 6d 22 a4 01 and 0x1a4226d(%rip),%ebx # ffffffff82ca93f4 <gfp_allowed_mask>


Thanks,
Yanfei

--
Mel Gorman
SUSE Labs





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux