Re: [RFC Part1 PATCH v3 13/17] x86/io: Unroll string I/O when SEV is active

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 07/26/2017 02:26 PM, H. Peter Anvin wrote:
   \
   static inline void outs##bwl(int port, const void *addr, unsigned
long count) \
   {

This will clash with a fix I did to add a "memory" clobber
for the traditional implementation, see
https://patchwork.kernel.org/patch/9854573/

Is it even worth leaving these as inline functions?
Given the speed of IO cycles it is unlikely that the cost of calling
a real
function will be significant.
The code bloat reduction will be significant.

I think the smallest code would be the original "rep insb" etc, which
should be smaller than a function call, unlike the loop. Then again,
there is a rather small number of affected device drivers, almost all
of them for ancient hardware that you won't even build in a 64-bit
x86 kernel, see the list below. The only user I found that is
actually
still relevant is drivers/tty/hvc/hvc_xen.c, which uses it for the
early
console.


There are some indirect user of string I/O functions. The following
functions
defined in lib/iomap.c calls rep version of ins and outs.

- ioread8_rep, ioread16_rep, ioread32_rep
- iowrite8_rep, iowrite16_rep, iowrite32_rep

I found that several drivers use above functions.

Here is one approach to convert it into non-inline functions. In this
approach,
I have added a new file arch/x86/kernel/io.c which provides non rep
version of
string I/O routines. The file gets built and used only when
AMD_MEM_ENCRYPT is
enabled. On positive side, if we don't build kernel with
AMD_MEM_ENCRYPT support
then we use inline routines, when AMD_MEM_ENCRYPT is built then we make
a function
call. Inside the function we unroll only when SEV is active.

Do you see any issue with this approach ? thanks

diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index e080a39..104927d 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -323,8 +323,9 @@ static inline unsigned type in##bwl##_p(int port)
                \
      unsigned type value = in##bwl(port);                            \
      slow_down_io();                                                 \
      return value;                                                   \
-}
\
-
\
+}
+
+#define BUILDIO_REP(bwl, bw, type)
\
static inline void outs##bwl(int port, const void *addr, unsigned long
count) \
{
\
      asm volatile("rep; outs" #bwl                                   \
@@ -335,12 +336,31 @@ static inline void ins##bwl(int port, void *addr,
unsigned long count)    \
{
\
      asm volatile("rep; ins" #bwl                                    \
                   : "+D"(addr), "+c"(count) : "d"(port));            \
-}
+}
\
BUILDIO(b, b, char)
  BUILDIO(w, w, short)
  BUILDIO(l, , int)
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+extern void outsb_try_rep(int port, const void *addr, unsigned long
count);
+extern void insb_try_rep(int port, void *addr, unsigned long count);
+extern void outsw_try_rep(int port, const void *addr, unsigned long
count);
+extern void insw_try_rep(int port, void *addr, unsigned long count);
+extern void outsl_try_rep(int port, const void *addr, unsigned long
count);
+extern void insl_try_rep(int port, void *addr, unsigned long count);
+#define outsb  outsb_try_rep
+#define insb   insb_try_rep
+#define outsw  outsw_try_rep
+#define insw   insw_try_rep
+#define outsl  outsl_try_rep
+#define insl   insl_try_rep
+#else
+BUILDIO_REP(b, b, char)
+BUILDIO_REP(w, w, short)
+BUILDIO_REP(l, , int)
+#endif
+
  extern void *xlate_dev_mem_ptr(phys_addr_t phys);
  extern void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index a01892b..3b6e2a3 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -42,6 +42,7 @@ CFLAGS_irq.o := -I$(src)/../include/asm/trace
obj-y := process_$(BITS).o signal.o
  obj-$(CONFIG_COMPAT)   += signal_compat.o
+obj-$(CONFIG_AMD_MEM_ENCRYPT) += io.o
obj-y                  += traps.o irq.o irq_$(BITS).o
dumpstack_$(BITS).o
  obj-y                  += time.o ioport.o dumpstack.o nmi.o
  obj-$(CONFIG_MODIFY_LDT_SYSCALL)       += ldt.o
diff --git a/arch/x86/kernel/io.c b/arch/x86/kernel/io.c
new file mode 100644
index 0000000..f58afa9
--- /dev/null
+++ b/arch/x86/kernel/io.c
@@ -0,0 +1,87 @@
+#include <linux/types.h>
+#include <linux/io.h>
+#include <asm/io.h>
+
+void outsb_try_rep(int port, const void *addr, unsigned long count)
+{
+       if (sev_active()) {
+               unsigned char *value = (unsigned char *)addr;
+               while (count) {
+                       outb(*value, port);
+                       value++;
+                       count--;
+               }
+       } else {
+               asm volatile("rep; outsb" : "+S"(addr), "+c"(count) :
"d"(port));
+       }
+}
+
+void insb_try_rep(int port, void *addr, unsigned long count)
+{
+       if (sev_active()) {
+               unsigned char *value = (unsigned char *)addr;
+               while (count) {
+                       *value = inb(port);
+                       value++;
+                       count--;
+               }
+       } else {
+               asm volatile("rep; insb" : "+D"(addr), "+c"(count) :
"d"(port));
+       }
+}
+
+void outsw_try_rep(int port, const void *addr, unsigned long count)
+{
+       if (sev_active()) {
+               unsigned short *value = (unsigned short *)addr;
+               while (count) {
+                       outw(*value, port);
+                       value++;
+                       count--;
+               }
+       } else {
+               asm volatile("rep; outsw" : "+S"(addr), "+c"(count) :
"d"(port));
+       }
+}
+void insw_try_rep(int port, void *addr, unsigned long count)
+{
+       if (sev_active()) {
+               unsigned short *value = (unsigned short *)addr;
+               while (count) {
+                       *value = inw(port);
+                       value++;
+                       count--;
+               }
+       } else {
+               asm volatile("rep; insw" : "+D"(addr), "+c"(count) :
"d"(port));
+       }
+}
+
+void outsl_try_rep(int port, const void *addr, unsigned long count)
+{
+       if (sev_active()) {
+               unsigned int *value = (unsigned int *)addr;
+               while (count) {
+                       outl(*value, port);
+                       value++;
+                       count--;
+               }
+       } else {
+               asm volatile("rep; outsl" : "+S"(addr), "+c"(count) :
"d"(port));
+       }
+}
+
+void insl_try_rep(int port, void *addr, unsigned long count)
+{
+       if (sev_active()) {
+               unsigned int *value = (unsigned int *)addr;
+               while (count) {
+                       *value = inl(port);
+                       value++;
+                       count--;
+               }
+       } else {
+               asm volatile("rep; insl" : "+D"(addr), "+c"(count) :
"d"(port));
+       }
+}

What the heck?


I am not sure if I understand your concern.

Are you commenting on amount of code duplication ? If so, I can certainly improve
and use the similar macro used into header file to generate the functions body.

Are you commenting that we should not attempt to make those functions non-inline
and you prefer them to stay inline ?

thanks



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux