Martin J. Bligh wrote: >So I'd really, >really rather merge stuff together in a smaller group first - and yes, >mea culpa for throwing stuff out on LKML to start with ... I'll assign >that to frustration from mindless hours of patch refactoring. > > Agree. How about a file by file from 2.6.13-rc4-mm1 (prior to the code movement changes) and then work our differences into updated patches for those code movement changes to rc5-mm1. This alleviates a lot of grief on Andrew's part, and lets us have a better united front when pushing the code movement to LKML again (as you all saw, there was definitely some object to this). We've got selected movement from the following: include/asm-i386/desc.h -> include/asm-i386/mach-default/mach_desc.h include/asm-i386/io.h -> include/asm-i386/mach-default/mach_io.h include/asm-i386/msr.h -> include/asm-i386/mach-default/mach_msr.h include/asm-i386/processor.h -> include/asm-i386/mach-default/mach_processor.h include/asm-i386/system.h -> include/asm-i386/mach-default/mach_system.h include/asm-i386/tlbflush.h -> include/asm-i386/mach-default/mach_tlbflush.h And two oddballs: include/asm-i386/pgtable* -> various include/asm-i386/ptrace.h -> include/asm-i386/mach-default/mach_segment.h Want to start at the top? Here's my patches for descriptor related goo. -------------- next part -------------- Introduce a write acessor for updating the current LDT. This is required for hypervisors like Xen that do not allow LDT pages to be directly written. Testing - here's a fun little LDT test that can be trivially modified to test limits as well. /* * Copyright (c) 2005, Zachary Amsden (zach@xxxxxxxxxx) * This is licensed under the GPL. */ #include <stdio.h> #include <signal.h> #include <asm/ldt.h> #include <asm/segment.h> #include <sys/types.h> #include <unistd.h> #include <sys/mman.h> #define __KERNEL__ #include <asm/page.h> void main(void) { struct user_desc desc; char *code; unsigned long long tsc; code = (char *)mmap(0, 8192, PROT_EXEC|PROT_READ|PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); desc.entry_number = 0; desc.base_addr = code; desc.limit = 1; desc.seg_32bit = 1; desc.contents = MODIFY_LDT_CONTENTS_CODE; desc.read_exec_only = 0; desc.limit_in_pages = 1; desc.seg_not_present = 0; desc.useable = 1; if (modify_ldt(1, &desc, sizeof(desc)) != 0) { perror("modify_ldt"); } printf("code base is 0x%08x\n", (unsigned)code); code[0x0ffe] = 0x0f; /* rdtsc */ code[0x0fff] = 0x31; code[0x1000] = 0xcb; /* lret */ __asm__ __volatile("lcall $7,$0xffe" : "=A" (tsc)); printf("TSC is 0x%016llx\n", tsc); } Signed-off-by: Zachary Amsden <zach@xxxxxxxxxx> Index: linux-2.6.13/arch/i386/kernel/ldt.c =================================================================== --- linux-2.6.13.orig/arch/i386/kernel/ldt.c 2005-08-03 15:44:24.000000000 -0700 +++ linux-2.6.13/arch/i386/kernel/ldt.c 2005-08-03 15:48:53.000000000 -0700 @@ -177,7 +177,7 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) { struct mm_struct * mm = current->mm; - __u32 entry_1, entry_2, *lp; + __u32 entry_1, entry_2; int error; struct user_desc ldt_info; @@ -205,8 +205,6 @@ goto out_unlock; } - lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt); - /* Allow LDTs to be cleared by the user. */ if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { if (oldmode || LDT_empty(&ldt_info)) { @@ -223,8 +221,7 @@ /* Install the new entry ... */ install: - *lp = entry_1; - *(lp+1) = entry_2; + write_ldt_entry(mm->context.ldt, ldt_info.entry_number, entry_1, entry_2); error = 0; out_unlock: Index: linux-2.6.13/include/asm-i386/desc.h =================================================================== --- linux-2.6.13.orig/include/asm-i386/desc.h 2005-08-03 15:44:24.000000000 -0700 +++ linux-2.6.13/include/asm-i386/desc.h 2005-08-03 16:17:25.000000000 -0700 @@ -96,6 +96,13 @@ (info)->seg_not_present == 1 && \ (info)->useable == 0 ) +static inline void write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b) +{ + __u32 *lp = (__u32 *)((char *)ldt + entry*8); + *lp = entry_a; + *(lp+1) = entry_b; +} + #if TLS_SIZE != 24 # error update this code. #endif -------------- next part -------------- i386 Transparent paravirtualization subarch patch #5 This change encapsulates descriptor and task register management. Diffs against: 2.6.13-rc4-mm1 Signed-off-by: Zachary Amsden <zach@xxxxxxxxxx> Index: linux-2.6.13/include/asm-i386/desc.h =================================================================== --- linux-2.6.13.orig/include/asm-i386/desc.h 2005-08-03 16:24:09.000000000 -0700 +++ linux-2.6.13/include/asm-i386/desc.h 2005-08-03 16:31:40.000000000 -0700 @@ -27,19 +27,6 @@ extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS]; -#define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8)) -#define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8)) - -#define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr)) -#define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr)) -#define load_tr(tr) __asm__ __volatile("ltr %0"::"mr" (tr)) -#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"mr" (ldt)) - -#define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr)) -#define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr)) -#define store_tr(tr) __asm__ ("str %0":"=mr" (tr)) -#define store_ldt(ldt) __asm__ ("sldt %0":"=mr" (ldt)) - /* * This is the ldt that every process will get unless we need * something other than this. @@ -58,19 +45,10 @@ "rorl $16,%1" \ : "=m"(*(n)) : "q" (addr), "r"(n), "ir"(limit), "i"(type)) -static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, void *addr) -{ - _set_tssldt_desc(&per_cpu(cpu_gdt_table, cpu)[entry], (int)addr, - offsetof(struct tss_struct, __cacheline_filler) - 1, 0x89); -} +#include <mach_desc.h> #define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr) -static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size) -{ - _set_tssldt_desc(&per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82); -} - #define LDT_entry_a(info) \ ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) @@ -96,24 +74,6 @@ (info)->seg_not_present == 1 && \ (info)->useable == 0 ) -static inline void write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b) -{ - __u32 *lp = (__u32 *)((char *)ldt + entry*8); - *lp = entry_a; - *(lp+1) = entry_b; -} - -#if TLS_SIZE != 24 -# error update this code. -#endif - -static inline void load_TLS(struct thread_struct *t, unsigned int cpu) -{ -#define C(i) per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i] - C(0); C(1); C(2); -#undef C -} - static inline void clear_LDT(void) { int cpu = get_cpu(); Index: linux-2.6.13/include/asm-i386/mach-default/mach_desc.h =================================================================== --- linux-2.6.13.orig/include/asm-i386/mach-default/mach_desc.h 2005-08-03 16:31:40.000000000 -0700 +++ linux-2.6.13/include/asm-i386/mach-default/mach_desc.h 2005-08-03 16:32:52.000000000 -0700 @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2005, VMware, Inc. + * Copyright (C) 1992-2004, Linus Torvalds and authors + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef __MACH_DESC_H +#define __MACH_DESC_H + +#define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8)) +#define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8)) + +#define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr)) +#define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr)) +#define load_tr(tr) __asm__ __volatile("ltr %0"::"mr" (tr)) +#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"mr" (ldt)) + +#define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr)) +#define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr)) +#define store_tr(tr) __asm__ ("str %0":"=mr" (tr)) +#define store_ldt(ldt) __asm__ ("sldt %0":"=mr" (ldt)) + +static inline unsigned int get_TR_desc(void) +{ + unsigned int tr; + __asm__ ("str %w0":"=q" (tr)); + return tr; +} + +static inline unsigned int get_LDT_desc(void) +{ + unsigned int ldt; + __asm__ ("sldt %w0":"=q" (ldt)); + return ldt; +} + +static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, void *addr) +{ + _set_tssldt_desc(&per_cpu(cpu_gdt_table, cpu)[entry], (int)addr, + offsetof(struct tss_struct, __cacheline_filler) - 1, 0x89); +} + +static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size) +{ + _set_tssldt_desc(&per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82); +} + +static inline void write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b) +{ + __u32 *lp = (__u32 *)((char *)ldt + entry*8); + *lp = entry_a; + *(lp+1) = entry_b; +} + +#if TLS_SIZE != 24 +# error update this code. +#endif + +static inline void load_TLS(struct thread_struct *t, unsigned int cpu) +{ +#define C(i) per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i] + C(0); C(1); C(2); +#undef C +} + +#endif -------------- next part -------------- When reviewing GDT updates, I found the code: set_tss_desc(cpu,t); /* This just modifies memory; ... */ per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_TSS].b &= 0xfffffdff; This second line is unnecessary, since set_tss_desc() has already cleared the busy bit. Commented disassembly, line 1: c028b8bd: 8b 0c 86 mov (%esi,%eax,4),%ecx c028b8c0: 01 cb add %ecx,%ebx c028b8c2: 8d 0c 39 lea (%ecx,%edi,1),%ecx => %ecx = per_cpu(cpu_gdt_table, cpu) c028b8c5: 8d 91 80 00 00 00 lea 0x80(%ecx),%edx => %edx = &per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_TSS] c028b8cb: 66 c7 42 00 73 20 movw $0x2073,0x0(%edx) c028b8d1: 66 89 5a 02 mov %bx,0x2(%edx) c028b8d5: c1 cb 10 ror $0x10,%ebx c028b8d8: 88 5a 04 mov %bl,0x4(%edx) c028b8db: c6 42 05 89 movb $0x89,0x5(%edx) => ((char *)%edx)[5] = 0x89 (equivalent) ((char *)per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_TSS])[5] = 0x89 c028b8df: c6 42 06 00 movb $0x0,0x6(%edx) c028b8e3: 88 7a 07 mov %bh,0x7(%edx) c028b8e6: c1 cb 10 ror $0x10,%ebx => other bits Commented disassembly, line 2: c028b8e9: 8b 14 86 mov (%esi,%eax,4),%edx c028b8ec: 8d 04 3a lea (%edx,%edi,1),%eax => %eax = per_cpu(cpu_gdt_table, cpu) c028b8ef: 81 a0 84 00 00 00 ff andl $0xfffffdff,0x84(%eax) => per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_TSS].b &= 0xfffffdff; (equivalent) ((char *)per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_TSS])[5] &= 0xfd Note that (0x89 & ~0xfd) == 0; i.e, set_tss_desc(cpu,t) has already stored the type field in the GDT with the busy bit clear. Eliminating redundant and obscure code is always a good thing; in fact, I pointed out this same optimization many moons ago in arch/i386/setup.c, back when it used to be called that. Signed-off-by: Zachary Amsden <zach@xxxxxxxxxx> Index: linux-2.6.13/arch/i386/power/cpu.c =================================================================== --- linux-2.6.13.orig/arch/i386/power/cpu.c 2005-08-02 17:06:21.000000000 -0700 +++ linux-2.6.13/arch/i386/power/cpu.c 2005-08-03 15:27:57.000000000 -0700 @@ -84,7 +84,6 @@ struct tss_struct * t = &per_cpu(init_tss, cpu); set_tss_desc(cpu,t); /* This just modifies memory; should not be necessary. But... This is necessary, because 386 hardware has concept of busy TSS or some similar stupidity. */ - per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_TSS].b &= 0xfffffdff; load_TR_desc(); /* This does ltr */ load_LDT(¤t->active_mm->context); /* This does lldt */