Greg raised the question of whether the inline function is really as efficient as a macro. I tried the following definitions: #define to_gbphy_dev(d) container_of(d, struct gbphy_device, dev) static inline struct gbphy_device *extra_to_gbphy_dev(const struct device *_dev) { return container_of(_dev, struct gbphy_device, dev); } And the following uses: ssize_t macro_protocol_id_show(struct device *dev, struct device_attribute *attr, char *buf) { struct gbphy_device *gbphy_dev = to_gbphy_dev(dev); return sprintf(buf, "%c macro 0x%02x\n", *buf, gbphy_dev->cport_desc->protocol_id); } ssize_t extra_protocol_id_show(struct device *dev, struct device_attribute *attr, char *buf) { struct gbphy_device *gbphy_dev = extra_to_gbphy_dev(dev); return sprintf(buf, "extra 0x%02x %c\n", gbphy_dev->cport_desc->protocol_id, *buf); } They are a little bit different to avoid too much compiler optimization. After doing make drivers/staging/greybus/gbphy.s, I get similar looking code in both cases: Macro version: .type macro_protocol_id_show, @function macro_protocol_id_show: endbr64 1: call __fentry__ .section __mcount_loc, "a",@progbits .quad 1b .previous pushq %rbp # movq %rdx, %rbp # tmp96, buf pushq %rbx # # drivers/staging/greybus/gbphy.c:40: { movq %rdi, %rbx # tmp95, dev # drivers/staging/greybus/gbphy.c:43: return sprintf(buf, "%c macro 0x%02x\n", *buf, gbphy_dev->cport_desc->protocol_id); call __sanitizer_cov_trace_pc # # drivers/staging/greybus/gbphy.c:43: return sprintf(buf, "%c macro 0x%02x\n", *buf, gbphy_dev->cport_desc->protocol_id); movq -32(%rbx), %rax # MEM[(struct gbphy_device *)dev_7(D) + -40B].cport_desc, MEM[(struct gbphy_device *)dev_7(D) + -40B].cport_desc # drivers/staging/greybus/gbphy.c:43: return sprintf(buf, "%c macro 0x%02x\n", *buf, gbphy_dev->cport_desc->protocol_id); movzbl 0(%rbp), %edx # *buf_9(D), *buf_9(D) movq %rbp, %rdi # buf, movq $.LC18, %rsi #, movzbl 3(%rax), %ecx # _1->protocol_id, _1->protocol_id call sprintf # # drivers/staging/greybus/gbphy.c:44: } movl $13, %eax #, popq %rbx # popq %rbp # jmp __x86_return_thunk .size macro_protocol_id_show, .-macro_protocol_id_show Function version: .type extra_protocol_id_show, @function extra_protocol_id_show: endbr64 1: call __fentry__ .section __mcount_loc, "a",@progbits .quad 1b .previous pushq %rbp # movq %rdx, %rbp # tmp96, buf pushq %rbx # # drivers/staging/greybus/gbphy.c:47: { movq %rdi, %rbx # tmp95, dev # drivers/staging/greybus/gbphy.c:50: return sprintf(buf, "extra 0x%02x %c\n", gbphy_dev->cport_desc->protocol_id, *buf); call __sanitizer_cov_trace_pc # # drivers/staging/greybus/gbphy.c:50: return sprintf(buf, "extra 0x%02x %c\n", gbphy_dev->cport_desc->protocol_id, *buf); movq -32(%rbx), %rax # MEM[(struct gbphy_device *)dev_8(D) + -40B].cport_desc, MEM[(struct gbphy_device *)dev_8(D) + -40B].cport_desc # drivers/staging/greybus/gbphy.c:50: return sprintf(buf, "extra 0x%02x %c\n", gbphy_dev->cport_desc->protocol_id, *buf); movzbl 0(%rbp), %ecx # *buf_9(D), *buf_9(D) movq %rbp, %rdi # buf, movq $.LC19, %rsi #, movzbl 3(%rax), %edx # _3->protocol_id, _3->protocol_id call sprintf # # drivers/staging/greybus/gbphy.c:51: } movl $13, %eax #, popq %rbx # popq %rbp # jmp __x86_return_thunk .size extra_protocol_id_show, .-extra_protocol_id_show Both seem to access the memory directly. Maybe the example is too simple, and the compiler is more likely to optimize aggressively? julia