With ARMv8.2-LVA architecture extension availability, arm64 hardware which supports this extension can support upto 52-bit virtual addresses. It is specially useful for having a 52-bit user-space virtual address space while the kernel can still retain 48-bit/52-bit virtual addressing. Since at the moment we enable the support of this extension in the kernel via a CONFIG flag (CONFIG_ARM64_VA_BITS_52), so there are no clear mechanisms in user-space to determine this CONFIG flag value and use it to determine the kernel-space VA address range values. 'makedumpfile' can instead use 'TCR_EL1.T1SZ' value from vmcoreinfo which indicates the size offset of the memory region addressed by TTBR1_EL1 (and hence can be used for determining the vabits_actual value). Using the vmcoreinfo variable exported by kernel commit bbdbc11804ff ("arm64/crash_core: Export TCR_EL1.T1SZ in vmcoreinfo"), the user-space can use the following computation for determining whether an address lies in the linear map range (for newer kernels >= 5.4): #define __is_lm_address(addr) (!(((u64)addr) & BIT(vabits_actual - 1))) Note that for the --mem-usage case though we need to calculate vabits_actual value before the vmcoreinfo read functionality is ready, so we can instead read the architecture register ID_AA64MMFR2_EL1 directly to see if the underlying hardware supports 52-bit addressing and accordingly set vabits_actual as: read_id_aa64mmfr2_el1(); if (hardware supports 52-bit addressing) vabits_actual = 52; else vabits_actual = va_bits value calculated via _stext symbol; Also make sure that the page_offset, is_linear_addr(addr) and __pa() calculations work both for older (< 5.4) and newer kernels (>= 5.4). I have tested several combinations with both kernel categories [for e.g. with different VA (39, 42, 48 and 52-bit) and PA combinations (48 and 52-bit)] on at-least 3 different boards. Unfortunately, this means that we need to call 'populate_kernel_version()' earlier 'get_page_offset_arm64()' as 'info->kernel_version' remains uninitialized before its first use otherwise. This patch is in accordance with ARMv8 Architecture Reference Manual Cc: Kazuhito Hagio <k-hagio@xxxxxxxxxxxxx> Cc: John Donnelly <john.p.donnelly@xxxxxxxxxx> Cc: kexec@xxxxxxxxxxxxxxxxxxx Signed-off-by: Bhupesh Sharma <bhsharma@xxxxxxxxxx> --- arch/arm64.c | 233 ++++++++++++++++++++++++++++++++++++++++++------- common.h | 10 +++ makedumpfile.c | 4 +- makedumpfile.h | 6 +- 4 files changed, 218 insertions(+), 35 deletions(-) diff --git a/arch/arm64.c b/arch/arm64.c index 709e0a506916..ccaa8641ca66 100644 --- a/arch/arm64.c +++ b/arch/arm64.c @@ -19,10 +19,23 @@ #ifdef __aarch64__ +#include <asm/hwcap.h> +#include <sys/auxv.h> #include "../elf_info.h" #include "../makedumpfile.h" #include "../print_info.h" +/* ID_AA64MMFR2_EL1 related helpers: */ +#define ID_AA64MMFR2_LVA_SHIFT 16 +#define ID_AA64MMFR2_LVA_MASK (0xf << ID_AA64MMFR2_LVA_SHIFT) + +/* CPU feature ID registers */ +#define get_cpu_ftr(id) ({ \ + unsigned long __val; \ + asm volatile("mrs %0, " __stringify(id) : "=r" (__val)); \ + __val; \ +}) + typedef struct { unsigned long pgd; } pgd_t; @@ -47,6 +60,7 @@ typedef struct { static int lpa_52_bit_support_available; static int pgtable_level; static int va_bits; +static int vabits_actual; static unsigned long kimage_voffset; #define SZ_4K 4096 @@ -58,7 +72,6 @@ static unsigned long kimage_voffset; #define PAGE_OFFSET_42 ((0xffffffffffffffffUL) << 42) #define PAGE_OFFSET_47 ((0xffffffffffffffffUL) << 47) #define PAGE_OFFSET_48 ((0xffffffffffffffffUL) << 48) -#define PAGE_OFFSET_52 ((0xffffffffffffffffUL) << 52) #define pgd_val(x) ((x).pgd) #define pud_val(x) (pgd_val((x).pgd)) @@ -219,13 +232,25 @@ pmd_page_paddr(pmd_t pmd) #define pte_index(vaddr) (((vaddr) >> PAGESHIFT()) & (PTRS_PER_PTE - 1)) #define pte_offset(dir, vaddr) (pmd_page_paddr((*dir)) + pte_index(vaddr) * sizeof(pte_t)) +/* + * The linear kernel range starts at the bottom of the virtual address + * space. Testing the top bit for the start of the region is a + * sufficient check and avoids having to worry about the tag. + */ +#define is_linear_addr(addr) ((info->kernel_version < KERNEL_VERSION(5, 4, 0)) ? \ + (!!((unsigned long)(addr) & (1UL << (vabits_actual - 1)))) : \ + (!((unsigned long)(addr) & (1UL << (vabits_actual - 1))))) + static unsigned long long __pa(unsigned long vaddr) { if (kimage_voffset == NOT_FOUND_NUMBER || - (vaddr >= PAGE_OFFSET)) - return (vaddr - PAGE_OFFSET + info->phys_base); - else + is_linear_addr(vaddr)) { + if (info->kernel_version < KERNEL_VERSION(5, 4, 0)) + return ((vaddr & ~PAGE_OFFSET) + info->phys_base); + else + return (vaddr + info->phys_base - PAGE_OFFSET); + } else return (vaddr - kimage_voffset); } @@ -254,6 +279,7 @@ static int calculate_plat_config(void) (PAGESIZE() == SZ_64K && va_bits == 42)) { pgtable_level = 2; } else if ((PAGESIZE() == SZ_64K && va_bits == 48) || + (PAGESIZE() == SZ_64K && va_bits == 52) || (PAGESIZE() == SZ_4K && va_bits == 39) || (PAGESIZE() == SZ_16K && va_bits == 47)) { pgtable_level = 3; @@ -288,8 +314,14 @@ get_phys_base_arm64(void) return TRUE; } + /* Ignore the 1st PT_LOAD */ if (get_num_pt_loads() && PAGE_OFFSET) { - for (i = 0; + /* Note that the following loop starts with i = 1. + * This is required to make sure that the following logic + * works both for old and newer kernels (with flipped + * VA space, i.e. >= 5.4.0) + */ + for (i = 1; get_pt_load(i, &phys_start, NULL, &virt_start, NULL); i++) { if (virt_start != NOT_KV_ADDR @@ -346,6 +378,139 @@ get_stext_symbol(void) return(found ? kallsym : FALSE); } +static int +get_va_bits_from_stext_arm64(void) +{ + ulong _stext; + + _stext = get_stext_symbol(); + if (!_stext) { + ERRMSG("Can't get the symbol of _stext.\n"); + return FALSE; + } + + /* Derive va_bits as per arch/arm64/Kconfig. Note that this is a + * best case approximation at the moment, as there can be + * inconsistencies in this calculation (for e.g., for + * 52-bit kernel VA case, the 48th bit is set in + * the _stext symbol). + * + * So, we need to rely on the vabits_actual symbol in the + * vmcoreinfo or read via system register for a accurate value + * of the virtual addressing supported by the underlying kernel. + */ + if ((_stext & PAGE_OFFSET_48) == PAGE_OFFSET_48) { + va_bits = 48; + } else if ((_stext & PAGE_OFFSET_47) == PAGE_OFFSET_47) { + va_bits = 47; + } else if ((_stext & PAGE_OFFSET_42) == PAGE_OFFSET_42) { + va_bits = 42; + } else if ((_stext & PAGE_OFFSET_39) == PAGE_OFFSET_39) { + va_bits = 39; + } else if ((_stext & PAGE_OFFSET_36) == PAGE_OFFSET_36) { + va_bits = 36; + } else { + ERRMSG("Cannot find a proper _stext for calculating VA_BITS\n"); + return FALSE; + } + + DEBUG_MSG("va_bits : %d (approximation via _stext)\n", va_bits); + + return TRUE; +} + +/* Note that its important to note that the + * ID_AA64MMFR2_EL1 architecture register can be read + * only when we give an .arch hint to the gcc/binutils, + * so we use the gcc construct '__attribute__ ((target ("arch=armv8.2-a")))' + * here which is an .arch directive (see AArch64-Target-selection-directives + * documentation from ARM for details). This is required only for + * this function to make sure it compiles well with gcc/binutils. + */ +__attribute__ ((target ("arch=armv8.2-a"))) +static unsigned long +read_id_aa64mmfr2_el1(void) +{ + return get_cpu_ftr(ID_AA64MMFR2_EL1); +} + +static int +get_vabits_actual_from_id_aa64mmfr2_el1(void) +{ + int l_vabits_actual; + unsigned long val; + + /* Check if ID_AA64MMFR2_EL1 CPU-ID register indicates + * ARMv8.2/LVA support: + * VARange, bits [19:16] + * From ARMv8.2: + * Indicates support for a larger virtual address. + * Defined values are: + * 0b0000 VMSAv8-64 supports 48-bit VAs. + * 0b0001 VMSAv8-64 supports 52-bit VAs when using the 64KB + * page size. The other translation granules support + * 48-bit VAs. + * + * See ARMv8 ARM for more details. + */ + if (!(getauxval(AT_HWCAP) & HWCAP_CPUID)) { + ERRMSG("arm64 CPUID registers unavailable.\n"); + return ERROR; + } + + val = read_id_aa64mmfr2_el1(); + val = (val & ID_AA64MMFR2_LVA_MASK) > ID_AA64MMFR2_LVA_SHIFT; + + if ((val == 0x1) && (PAGESIZE() == SZ_64K)) + l_vabits_actual = 52; + else + l_vabits_actual = 48; + + return l_vabits_actual; +} + +static void +get_page_offset_arm64(void) +{ + /* Check if 'vabits_actual' is initialized yet. + * If not, our best bet is to read ID_AA64MMFR2_EL1 CPU-ID + * register. + */ + if (!vabits_actual) { + vabits_actual = get_vabits_actual_from_id_aa64mmfr2_el1(); + if ((vabits_actual == ERROR) || (vabits_actual != 52)) { + /* If we cannot read ID_AA64MMFR2_EL1 arch + * register or if this register does not indicate + * support for a larger virtual address, our last + * option is to use the VA_BITS to calculate the + * PAGE_OFFSET value, i.e. vabits_actual = VA_BITS. + */ + vabits_actual = va_bits; + DEBUG_MSG("vabits_actual : %d (approximation via va_bits)\n", + vabits_actual); + } else + DEBUG_MSG("vabits_actual : %d (via id_aa64mmfr2_el1)\n", + vabits_actual); + } + + if (!populate_kernel_version()) { + ERRMSG("Cannot get information about current kernel\n"); + return; + } + + /* See arch/arm64/include/asm/memory.h for more details of + * the PAGE_OFFSET calculation. + */ + if (info->kernel_version < KERNEL_VERSION(5, 4, 0)) + info->page_offset = ((0xffffffffffffffffUL) - + ((1UL) << (vabits_actual - 1)) + 1); + else + info->page_offset = (-(1UL << vabits_actual)); + + DEBUG_MSG("page_offset : %lx (via vabits_actual)\n", + info->page_offset); +} + int get_machdep_info_arm64(void) { @@ -360,8 +525,33 @@ get_machdep_info_arm64(void) /* Check if va_bits is still not initialized. If still 0, call * get_versiondep_info() to initialize the same. */ + if (NUMBER(VA_BITS) != NOT_FOUND_NUMBER) { + va_bits = NUMBER(VA_BITS); + DEBUG_MSG("va_bits : %d (vmcoreinfo)\n", + va_bits); + } + + /* Check if va_bits is still not initialized. If still 0, call + * get_versiondep_info() to initialize the same from _stext + * symbol. + */ if (!va_bits) - get_versiondep_info_arm64(); + if (get_va_bits_from_stext_arm64() == FALSE) + return FALSE; + + /* See TCR_EL1, Translation Control Register (EL1) register + * description in the ARMv8 Architecture Reference Manual. + * Basically, we can use the TCR_EL1.T1SZ + * value to determine the virtual addressing range supported + * in the kernel-space (i.e. vabits_actual). + */ + if (NUMBER(TCR_EL1_T1SZ) != NOT_FOUND_NUMBER) { + vabits_actual = 64 - NUMBER(TCR_EL1_T1SZ); + DEBUG_MSG("vabits_actual : %d (vmcoreinfo)\n", + vabits_actual); + } + + get_page_offset_arm64(); if (!calculate_plat_config()) { ERRMSG("Can't determine platform config values\n"); @@ -399,34 +589,11 @@ get_xen_info_arm64(void) int get_versiondep_info_arm64(void) { - ulong _stext; - - _stext = get_stext_symbol(); - if (!_stext) { - ERRMSG("Can't get the symbol of _stext.\n"); - return FALSE; - } - - /* Derive va_bits as per arch/arm64/Kconfig */ - if ((_stext & PAGE_OFFSET_36) == PAGE_OFFSET_36) { - va_bits = 36; - } else if ((_stext & PAGE_OFFSET_39) == PAGE_OFFSET_39) { - va_bits = 39; - } else if ((_stext & PAGE_OFFSET_42) == PAGE_OFFSET_42) { - va_bits = 42; - } else if ((_stext & PAGE_OFFSET_47) == PAGE_OFFSET_47) { - va_bits = 47; - } else if ((_stext & PAGE_OFFSET_48) == PAGE_OFFSET_48) { - va_bits = 48; - } else { - ERRMSG("Cannot find a proper _stext for calculating VA_BITS\n"); - return FALSE; - } - - info->page_offset = (0xffffffffffffffffUL) << (va_bits - 1); + if (!va_bits) + if (get_va_bits_from_stext_arm64() == FALSE) + return FALSE; - DEBUG_MSG("va_bits : %d\n", va_bits); - DEBUG_MSG("page_offset : %lx\n", info->page_offset); + get_page_offset_arm64(); return TRUE; } diff --git a/common.h b/common.h index 6e2f657a79c7..1901df195e9d 100644 --- a/common.h +++ b/common.h @@ -50,5 +50,15 @@ #define NOT_PADDR (ULONGLONG_MAX) #define BADADDR ((ulong)(-1)) +/* Indirect stringification. Doing two levels allows the parameter to be a + * macro itself. For example, compile with -DFOO=bar, __stringify(FOO) + * converts to "bar". + * + * Copied from linux source: 'include/linux/stringify.h' + */ + +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) + #endif /* COMMON_H */ diff --git a/makedumpfile.c b/makedumpfile.c index 4c4251ea8719..5ab82fd3cf14 100644 --- a/makedumpfile.c +++ b/makedumpfile.c @@ -1133,7 +1133,7 @@ fallback_to_current_page_size(void) return TRUE; } -static int populate_kernel_version(void) +int populate_kernel_version(void) { struct utsname utsname; @@ -2323,6 +2323,7 @@ write_vmcoreinfo_data(void) WRITE_NUMBER("HUGETLB_PAGE_DTOR", HUGETLB_PAGE_DTOR); #ifdef __aarch64__ WRITE_NUMBER("VA_BITS", VA_BITS); + WRITE_NUMBER_UNSIGNED("TCR_EL1_T1SZ", TCR_EL1_T1SZ); WRITE_NUMBER_UNSIGNED("PHYS_OFFSET", PHYS_OFFSET); WRITE_NUMBER_UNSIGNED("kimage_voffset", kimage_voffset); #endif @@ -2729,6 +2730,7 @@ read_vmcoreinfo(void) READ_NUMBER("KERNEL_IMAGE_SIZE", KERNEL_IMAGE_SIZE); #ifdef __aarch64__ READ_NUMBER("VA_BITS", VA_BITS); + READ_NUMBER_UNSIGNED("TCR_EL1_T1SZ", TCR_EL1_T1SZ); READ_NUMBER_UNSIGNED("PHYS_OFFSET", PHYS_OFFSET); READ_NUMBER_UNSIGNED("kimage_voffset", kimage_voffset); #endif diff --git a/makedumpfile.h b/makedumpfile.h index 03fb4ce06872..dc65f002bad6 100644 --- a/makedumpfile.h +++ b/makedumpfile.h @@ -974,7 +974,9 @@ unsigned long long vaddr_to_paddr_arm64(unsigned long vaddr); int get_versiondep_info_arm64(void); int get_xen_basic_info_arm64(void); int get_xen_info_arm64(void); -#define paddr_to_vaddr_arm64(X) (((X) - info->phys_base) | PAGE_OFFSET) +#define paddr_to_vaddr_arm64(X) ((info->kernel_version < KERNEL_VERSION(5, 4, 0)) ? \ + ((X) - (info->phys_base - PAGE_OFFSET)) : \ + (((X) - info->phys_base) | PAGE_OFFSET)) #define find_vmemmap() stub_false() #define vaddr_to_paddr(X) vaddr_to_paddr_arm64(X) @@ -1938,6 +1940,7 @@ struct number_table { long KERNEL_IMAGE_SIZE; #ifdef __aarch64__ long VA_BITS; + unsigned long TCR_EL1_T1SZ; unsigned long PHYS_OFFSET; unsigned long kimage_voffset; #endif @@ -2389,5 +2392,6 @@ ulong htol(char *s, int flags); int hexadecimal(char *s, int count); int decimal(char *s, int count); int file_exists(char *file); +int populate_kernel_version(void); #endif /* MAKEDUMPFILE_H */ -- 2.26.2 _______________________________________________ kexec mailing list kexec@xxxxxxxxxxxxxxxxxxx http://lists.infradead.org/mailman/listinfo/kexec