Hi Raphael, On Fri, May 20, 2016 at 03:11:06AM +0200, Raphael Poggi wrote: > Hello, > > This patch series introduces a basic support for arm64. > > [PATCH 1/3] arch: add minimal aarch64 support : > Introduce new architecture by creating a new root directory, > I choose this approach for now, because it is simpler for now, maybe later we will merge this in arch/arm. I just applied this series to a temporary branch, did a cp -r arch/arm64/* arch/arm and committed everything, see the result below. This of course breaks arm32 support, but it nicely reveals all places that need fixup for arm64. How about we proceed like this: We continue to work on the patch below. We continuously find proper solutions for the different places that need fixup. For every item that is fixed properly we apply the resulting patch mainline and rebase the big work-in-progress patch ontop of it. This way the patch should continuously get smaller until we finally have working arm32 and arm64 support in a single architecture. One of the first things we'll need is ARM32/ARM64 Kconfig options which we can use for the different lib/ functions to depend on (maybe create a lib32/ and a lib64/ directory?) This should already make the work-in-progress patch much smaller. What do you think? Sascha ----------------------------8<------------------------------ >From 79e852820d19e3620bfe63b87161317e616546d5 Mon Sep 17 00:00:00 2001 From: Sascha Hauer <s.hauer@xxxxxxxxxxxxxx> Date: Mon, 23 May 2016 08:47:36 +0200 Subject: [PATCH] wip Signed-off-by: Sascha Hauer <s.hauer@xxxxxxxxxxxxxx> --- arch/arm/Kconfig | 276 ++------------ arch/arm/Makefile | 190 +--------- arch/arm/boards/Makefile | 1 + arch/arm/boards/virt/Kconfig | 8 + arch/arm/boards/virt/Makefile | 1 + arch/arm/boards/virt/env/bin/_update | 36 ++ arch/arm/boards/virt/env/bin/boot | 38 ++ arch/arm/boards/virt/env/bin/init | 20 + arch/arm/boards/virt/env/bin/update_kernel | 8 + arch/arm/boards/virt/env/bin/update_root | 8 + arch/arm/boards/virt/env/config | 38 ++ arch/arm/boards/virt/env/init/mtdparts-nor | 11 + arch/arm/boards/virt/init.c | 67 ++++ arch/arm/configs/virt_defconfig | 53 +++ arch/arm/cpu/Kconfig | 102 +---- arch/arm/cpu/Makefile | 29 +- arch/arm/cpu/cache-armv8.S | 168 +++++++++ arch/arm/cpu/cache.c | 69 +--- arch/arm/cpu/cpu.c | 44 +-- arch/arm/cpu/cpuinfo.c | 86 ++--- arch/arm/cpu/entry.c | 5 +- arch/arm/cpu/exceptions.S | 313 ++++++---------- arch/arm/cpu/interrupts.c | 91 ++--- arch/arm/cpu/lowlevel.S | 89 ++--- arch/arm/cpu/mmu.c | 578 ++++++++--------------------- arch/arm/cpu/mmu.h | 155 ++++++++ arch/arm/cpu/start.c | 12 +- arch/arm/cpu/uncompress.c | 2 +- arch/arm/include/asm/barebox-arm.h | 2 +- arch/arm/include/asm/bitops.h | 192 ++-------- arch/arm/include/asm/boarddata.h | 5 + arch/arm/include/asm/cache-l2x0.h | 8 - arch/arm/include/asm/cache.h | 4 +- arch/arm/include/asm/errata.h | 9 - arch/arm/include/asm/gic.h | 128 +++++++ arch/arm/include/asm/mmu.h | 6 +- arch/arm/include/asm/pgtable.h | 5 +- arch/arm/include/asm/ptrace.h | 111 +----- arch/arm/include/asm/system.h | 173 +++++---- arch/arm/include/asm/system_info.h | 73 ++-- arch/arm/lib/Makefile | 13 - arch/arm/lib/armlinux.c | 6 - arch/arm/lib/barebox.lds.S | 5 +- arch/arm/lib/bootm.c | 109 +++--- arch/arm/lib/copy_template.S | 438 +++++++++------------- arch/arm/lib/memcpy.S | 84 +++-- arch/arm/lib/memset.S | 305 +++++++++------ arch/arm/lib/runtime-offset.S | 18 +- arch/arm/mach-virt/Kconfig | 15 + arch/arm/mach-virt/Makefile | 3 + arch/arm/mach-virt/devices.c | 30 ++ arch/arm/mach-virt/include/mach/debug_ll.h | 24 ++ arch/arm/mach-virt/include/mach/devices.h | 13 + arch/arm/mach-virt/lowlevel.c | 19 + arch/arm/mach-virt/reset.c | 24 ++ 55 files changed, 2008 insertions(+), 2312 deletions(-) create mode 100644 arch/arm/boards/virt/Kconfig create mode 100644 arch/arm/boards/virt/Makefile create mode 100644 arch/arm/boards/virt/env/bin/_update create mode 100644 arch/arm/boards/virt/env/bin/boot create mode 100644 arch/arm/boards/virt/env/bin/init create mode 100644 arch/arm/boards/virt/env/bin/update_kernel create mode 100644 arch/arm/boards/virt/env/bin/update_root create mode 100644 arch/arm/boards/virt/env/config create mode 100644 arch/arm/boards/virt/env/init/mtdparts-nor create mode 100644 arch/arm/boards/virt/init.c create mode 100644 arch/arm/configs/virt_defconfig create mode 100644 arch/arm/cpu/cache-armv8.S create mode 100644 arch/arm/include/asm/boarddata.h create mode 100644 arch/arm/include/asm/gic.h create mode 100644 arch/arm/mach-virt/Kconfig create mode 100644 arch/arm/mach-virt/Makefile create mode 100644 arch/arm/mach-virt/devices.c create mode 100644 arch/arm/mach-virt/include/mach/debug_ll.h create mode 100644 arch/arm/mach-virt/include/mach/devices.h create mode 100644 arch/arm/mach-virt/lowlevel.c create mode 100644 arch/arm/mach-virt/reset.c diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 1fc887b..34085f6 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -22,15 +22,6 @@ config ARM_USE_COMPRESSED_DTB select UNCOMPRESS select LZO_DECOMPRESS -config ARCH_BCM283X - bool - select GPIOLIB - select CLKDEV_LOOKUP - select COMMON_CLK - select CLOCKSOURCE_BCM283X - select ARM_AMBA - select HAS_DEBUG_LL - menu "System Type" config BUILTIN_DTB @@ -50,236 +41,18 @@ config BUILTIN_DTB_NAME choice prompt "ARM system type" -config ARCH_AT91 - bool "Atmel AT91" - select GPIOLIB - select CLKDEV_LOOKUP - select HAS_DEBUG_LL - select HAVE_MACH_ARM_HEAD - select HAVE_CLK - select PINCTRL_AT91 - -config ARCH_BCM2835 - bool "Broadcom BCM2835 boards" - select ARCH_BCM283X - select CPU_ARM1176 - -config ARCH_BCM2836 - bool "Broadcom BCM2836 boards" - select ARCH_BCM283X - select CPU_V7 - -config ARCH_CLPS711X - bool "Cirrus Logic EP711x/EP721x/EP731x" - select CLKDEV_LOOKUP - select CLOCKSOURCE_CLPS711X - select COMMON_CLK - select CPU_32v4T - select GPIOLIB - select MFD_SYSCON - -config ARCH_DAVINCI - bool "TI Davinci" - select CPU_ARM926T - select HAS_DEBUG_LL - select GPIOLIB - -config ARCH_DIGIC - bool "Canon DIGIC-based cameras" - select CPU_ARM946E - select HAS_DEBUG_LL - select CLOCKSOURCE_DIGIC - select GPIOLIB - help - Support for Canon's digital cameras that use the DIGIC4 chip. - -config ARCH_EP93XX - bool "Cirrus Logic EP93xx" - select CPU_ARM920T - select GENERIC_GPIO - -config ARCH_HIGHBANK - bool "Calxeda Highbank" +config ARCH_VIRT + bool "ARM QEMU virt boards" select HAS_DEBUG_LL - select HAS_POWEROFF - select ARCH_HAS_L2X0 - select CPU_V7 + select CPU_V8 + select SYS_SUPPORTS_64BIT_KERNEL select ARM_AMBA - select AMBA_SP804 - select CLKDEV_LOOKUP - select COMMON_CLK - select GPIOLIB - -config ARCH_IMX - bool "Freescale iMX-based" - select GPIOLIB - select COMMON_CLK - select CLKDEV_LOOKUP - select WATCHDOG_IMX_RESET_SOURCE - select HAS_DEBUG_LL - -config ARCH_MVEBU - bool "Marvell EBU platforms" - select COMMON_CLK - select COMMON_CLK_OF_PROVIDER - select CLKDEV_LOOKUP - select GPIOLIB - select HAS_DEBUG_LL - select HAVE_PBL_MULTI_IMAGES - select HW_HAS_PCI - select MVEBU_MBUS - select OFTREE - select OF_ADDRESS_PCI - select PINCTRL - -config ARCH_MXS - bool "Freescale i.MX23/28 (mxs) based" - select GPIOLIB - select GENERIC_GPIO - select COMMON_CLK - select CLKDEV_LOOKUP - select HAS_DEBUG_LL - -config ARCH_NETX - bool "Hilscher NetX based" - select CPU_ARM926T - -config ARCH_NOMADIK - bool "STMicroelectronics Nomadik" - select CPU_ARM926T - select CLOCKSOURCE_NOMADIK - select HAVE_CLK - help - Support for the Nomadik platform by ST-Ericsson - -config ARCH_OMAP - bool "TI OMAP" - select HAS_DEBUG_LL - select GPIOLIB - -config ARCH_PXA - bool "Intel/Marvell PXA based" - select GENERIC_GPIO - select HAS_POWEROFF - -config ARCH_ROCKCHIP - bool "Rockchip RX3xxx" - select CPU_V7 - select ARM_SMP_TWD - select COMMON_CLK - select CLKDEV_LOOKUP - select COMMON_CLK_OF_PROVIDER - select GPIOLIB - select PINCTRL - select PINCTRL_ROCKCHIP - select OFTREE - select HAVE_PBL_MULTI_IMAGES - select HAS_DEBUG_LL - select ARCH_HAS_L2X0 - -config ARCH_SOCFPGA - bool "Altera SOCFPGA cyclone5" - select HAS_DEBUG_LL - select ARM_SMP_TWD - select CPU_V7 - select COMMON_CLK - select CLKDEV_LOOKUP - select GPIOLIB - select HAVE_PBL_MULTI_IMAGES - select OFDEVICE if !ARCH_SOCFPGA_XLOAD - select OFTREE if !ARCH_SOCFPGA_XLOAD - -config ARCH_S3C24xx - bool "Samsung S3C2410, S3C2440" - select ARCH_SAMSUNG - select CPU_ARM920T - select GENERIC_GPIO - -config ARCH_S5PCxx - bool "Samsung S5PC110, S5PV210" - select ARCH_SAMSUNG - select CPU_V7 - select GENERIC_GPIO - -config ARCH_S3C64xx - bool "Samsung S3C64xx" - select ARCH_SAMSUNG - select CPU_V6 - select GENERIC_GPIO - -config ARCH_VERSATILE - bool "ARM Versatile boards (ARM926EJ-S)" - select GPIOLIB - select HAVE_CLK - select HAS_DEBUG_LL - -config ARCH_VEXPRESS - bool "ARM Vexpres boards" - select HAS_DEBUG_LL - select CPU_V7 - select ARM_AMBA - select AMBA_SP804 - select CLKDEV_LOOKUP - select COMMON_CLK - -config ARCH_TEGRA - bool "NVIDIA Tegra" - select CPU_V7 - select HAS_DEBUG_LL - select HW_HAS_PCI - select COMMON_CLK - select COMMON_CLK_OF_PROVIDER - select CLKDEV_LOOKUP - select GPIOLIB - select GPIO_TEGRA - select HAVE_DEFAULT_ENVIRONMENT_NEW - select HAVE_PBL_MULTI_IMAGES - select OFDEVICE - select OFTREE - select RELOCATABLE - select RESET_CONTROLLER - select PINCTRL - -config ARCH_UEMD - bool "RC Module UEMD Platform" - select CPU_ARM1176 - select COMMON_CLK - select COMMON_CLK_OF_PROVIDER - select CLKDEV_LOOKUP - select OFDEVICE - select OFTREE - select CLOCKSOURCE_UEMD - select HAS_DEBUG_LL - -config ARCH_ZYNQ - bool "Xilinx Zynq-based boards" - select HAS_DEBUG_LL + select HAVE_CONFIGURABLE_MEMORY_LAYOUT endchoice source arch/arm/cpu/Kconfig -source arch/arm/mach-at91/Kconfig -source arch/arm/mach-bcm283x/Kconfig -source arch/arm/mach-clps711x/Kconfig -source arch/arm/mach-davinci/Kconfig -source arch/arm/mach-digic/Kconfig -source arch/arm/mach-ep93xx/Kconfig -source arch/arm/mach-highbank/Kconfig -source arch/arm/mach-imx/Kconfig -source arch/arm/mach-mxs/Kconfig -source arch/arm/mach-mvebu/Kconfig -source arch/arm/mach-netx/Kconfig -source arch/arm/mach-nomadik/Kconfig -source arch/arm/mach-omap/Kconfig -source arch/arm/mach-pxa/Kconfig -source arch/arm/mach-rockchip/Kconfig -source arch/arm/mach-samsung/Kconfig -source arch/arm/mach-socfpga/Kconfig -source arch/arm/mach-versatile/Kconfig -source arch/arm/mach-vexpress/Kconfig -source arch/arm/mach-tegra/Kconfig -source arch/arm/mach-uemd/Kconfig -source arch/arm/mach-zynq/Kconfig +source arch/arm/mach-virt/Kconfig config ARM_ASM_UNIFIED bool @@ -292,20 +65,6 @@ config AEABI To use this you need GCC version 4.0.0 or later. -config THUMB2_BAREBOX - select ARM_ASM_UNIFIED - select AEABI - depends on !ARCH_TEGRA && !ARCH_AT91 - depends on CPU_V7 && !CPU_32v4T && !CPU_32v5 && !CPU_32v6 - bool "Compile barebox in thumb-2 mode (read help)" - help - This enables compilation of barebox in thumb-2 mode which generates - ~25% smaller binaries. ARM assembly code needs some fixups to be able - to work correctly in thumb-2 mode. the barebox core should have these - fixups since most assembly code is derived from the Kernel. However, - your board lowlevel init code may break in thumb-2 mode. You have been - warned. - config ARM_BOARD_APPEND_ATAG bool "Let board specific code to add ATAGs to be passed to the kernel" depends on ARM_LINUX @@ -315,6 +74,29 @@ config ARM_BOARD_APPEND_ATAG endmenu +choice + prompt "Barebox code model" + help + You should only select this option if you have a workload that + actually benefits from 64-bit processing or if your machine has + large memory. You will only be presented a single option in this + menu if your system does not support both 32-bit and 64-bit modes. + +config 32BIT + bool "32-bit barebox" + depends on CPU_SUPPORTS_32BIT_KERNEL && SYS_SUPPORTS_32BIT_KERNEL + help + Select this option if you want to build a 32-bit barebox. + +config 64BIT + bool "64-bit barebox" + depends on CPU_SUPPORTS_64BIT_KERNEL && SYS_SUPPORTS_64BIT_KERNEL + select ARCH_DMA_ADDR_T_64BIT + help + Select this option if you want to build a 64-bit barebox. + +endchoice + menu "ARM specific settings" config ARM_OPTIMZED_STRING_FUNCTIONS diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 5ccdb83..ad250c4 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -1,7 +1,6 @@ CPPFLAGS += -D__ARM__ -fno-strict-aliasing -# Explicitly specifiy 32-bit ARM ISA since toolchain default can be -mthumb: -CPPFLAGS +=$(call cc-option,-marm,) +CPPFLAGS +=$(call cc-option,-maarch64,) ifeq ($(CONFIG_CPU_BIG_ENDIAN),y) CPPFLAGS += -mbig-endian @@ -13,91 +12,27 @@ AS += -EL LD += -EL endif -# Unaligned access is not supported when MMU is disabled, so given how -# at least some of the code would be executed with MMU off, lets be -# conservative and instruct the compiler not to generate any unaligned -# accesses -CFLAGS += -mno-unaligned-access - - # This selects which instruction set is used. # Note that GCC does not numerically define an architecture version # macro, but instead defines a whole series of macros which makes # testing for a specific architecture or later rather impossible. -arch-$(CONFIG_CPU_32v7) :=-D__LINUX_ARM_ARCH__=7 $(call cc-option,-march=armv7-a,-march=armv5t -Wa$(comma)-march=armv7-a) -arch-$(CONFIG_CPU_32v6) :=-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6,-march=armv5t -Wa$(comma)-march=armv6) -arch-$(CONFIG_CPU_32v5) :=-D__LINUX_ARM_ARCH__=5 $(call cc-option,-march=armv5te,-march=armv4t) -arch-$(CONFIG_CPU_32v4T) :=-D__LINUX_ARM_ARCH__=4 -march=armv4t - -# This selects how we optimise for the processor. -tune-$(CONFIG_CPU_ARM920T) :=-mtune=arm9tdmi -tune-$(CONFIG_CPU_ARM926T) :=-mtune=arm9tdmi -tune-$(CONFIG_CPU_XSCALE) :=$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale +arch-$(CONFIG_CPU_64v8) := -D__LINUX_ARM_ARCH__=8 $(call cc-option,-march=armv8-a) -ifeq ($(CONFIG_AEABI),y) -CFLAGS_ABI :=-mabi=aapcs-linux -mno-thumb-interwork -else -CFLAGS_ABI :=$(call cc-option,-mapcs-32,-mabi=apcs-gnu) $(call cc-option,-mno-thumb-interwork,) -endif +CFLAGS_ABI :=-mabi=lp64 ifeq ($(CONFIG_ARM_UNWIND),y) CFLAGS_ABI +=-funwind-tables endif -ifeq ($(CONFIG_THUMB2_BAREBOX),y) -AFLAGS_AUTOIT :=$(call as-option,-Wa$(comma)-mimplicit-it=always,-Wa$(comma)-mauto-it) -AFLAGS_NOWARN :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W) -CFLAGS_THUMB2 :=-mthumb $(AFLAGS_AUTOIT) $(AFLAGS_NOWARN) -AFLAGS_THUMB2 :=$(CFLAGS_THUMB2) -Wa$(comma)-mthumb -endif +CPPFLAGS += $(CFLAGS_ABI) $(arch-y) $(tune-y) -CPPFLAGS += $(CFLAGS_ABI) $(arch-y) $(tune-y) -msoft-float $(CFLAGS_THUMB2) -AFLAGS += -include asm/unified.h -msoft-float $(AFLAGS_THUMB2) +AFLAGS += -include asm/unified.h # Machine directory name. This list is sorted alphanumerically # by CONFIG_* macro name. -machine-$(CONFIG_ARCH_AT91) := at91 -machine-$(CONFIG_ARCH_BCM283X) := bcm283x -machine-$(CONFIG_ARCH_CLPS711X) := clps711x -machine-$(CONFIG_ARCH_DAVINCI) := davinci -machine-$(CONFIG_ARCH_DIGIC) := digic -machine-$(CONFIG_ARCH_EP93XX) := ep93xx -machine-$(CONFIG_ARCH_HIGHBANK) := highbank -machine-$(CONFIG_ARCH_IMX) := imx -machine-$(CONFIG_ARCH_MXS) := mxs -machine-$(CONFIG_ARCH_MVEBU) := mvebu -machine-$(CONFIG_ARCH_NOMADIK) := nomadik -machine-$(CONFIG_ARCH_NETX) := netx -machine-$(CONFIG_ARCH_OMAP) := omap -machine-$(CONFIG_ARCH_PXA) := pxa -machine-$(CONFIG_ARCH_ROCKCHIP) := rockchip -machine-$(CONFIG_ARCH_SAMSUNG) := samsung -machine-$(CONFIG_ARCH_SOCFPGA) := socfpga -machine-$(CONFIG_ARCH_VERSATILE) := versatile -machine-$(CONFIG_ARCH_VEXPRESS) := vexpress -machine-$(CONFIG_ARCH_TEGRA) := tegra -machine-$(CONFIG_ARCH_UEMD) := uemd -machine-$(CONFIG_ARCH_ZYNQ) := zynq +machine-$(CONFIG_ARCH_VIRT) := virt -# Board directory name. This list is sorted alphanumerically -# by CONFIG_* macro name. -# -# DO NOT ADD NEW ENTRIES TO THIS LIST! -# Add to arch/arm/boards/Makefile instead. -# -# These are here only because they have a board specific config.h. -# TODO: Get rid of board specific config.h and move these to -# arch/arm/boards/Makefile aswell. -board-$(CONFIG_MACH_A9M2410) += a9m2410 -board-$(CONFIG_MACH_A9M2440) += a9m2440 -board-$(CONFIG_MACH_AT91RM9200EK) += at91rm9200ek -board-$(CONFIG_MACH_MINI2440) += friendlyarm-mini2440 -board-$(CONFIG_MACH_MINI6410) += friendlyarm-mini6410 -board-$(CONFIG_MACH_PCM027) += phytec-phycore-pxa270 -board-$(CONFIG_MACH_TINY210) += friendlyarm-tiny210 -board-$(CONFIG_MACH_TINY6410) += friendlyarm-tiny6410 - machdirs := $(patsubst %,arch/arm/mach-%/,$(machine-y)) ifeq ($(KBUILD_SRC),) @@ -141,106 +76,7 @@ endif barebox.s5p: $(KBUILD_BINARY) $(Q)scripts/s5p_cksum $< barebox.s5p -ifeq ($(CONFIG_ARCH_S5PCxx),y) -KBUILD_IMAGE := barebox.s5p -endif - -quiet_cmd_mlo ?= IFT $@ - cmd_mlo ?= scripts/omap_signGP -o MLO -l $(TEXT_BASE) -c $< - -MLO: $(KBUILD_BINARY) - $(call if_changed,mlo) - -ifeq ($(CONFIG_OMAP_BUILD_IFT),y) -KBUILD_IMAGE := MLO -endif - -quiet_cmd_davinci_ubl_image = UBL-IMG $@ - cmd_davinci_ubl_image = set -e; \ - scripts/mkublheader $< > $@; \ - cat $< >> $@ - -barebox.ubl: $(KBUILD_BINARY) FORCE - $(call if_changed,davinci_ubl_image) - -ifeq ($(CONFIG_ARCH_DAVINCI),y) -KBUILD_IMAGE := barebox.ubl -endif - -quiet_cmd_am35xx_spi_image = SPI-IMG $@ - cmd_am35xx_spi_image = scripts/mk-omap-image -s -a $(TEXT_BASE) $< > $@ - -barebox.spi: $(KBUILD_BINARY) FORCE - $(call if_changed,am35xx_spi_image) - -MLO.spi: MLO FORCE - $(call if_changed,am35xx_spi_image) - -ifeq ($(CONFIG_OMAP_BUILD_SPI),y) -KBUILD_IMAGE := MLO.spi -endif - -quiet_cmd_zynq_image = ZYNQ-IMG $@ - cmd_zynq_image = scripts/zynq_mkimage $< $@ - -barebox.zynq: $(KBUILD_BINARY) FORCE - $(call if_changed,zynq_image) - -ifeq ($(machine-y),zynq) -KBUILD_IMAGE := barebox.zynq -endif - -quiet_cmd_canon_a1100_image = DD $@ - cmd_canon_a1100_image = scripts/canon-a1100-image $< $@ || \ - echo "WARNING: Couldn't create Canon A1100 image due to previous errors." -barebox.canon-a1100.bin: $(KBUILD_BINARY) FORCE - $(call if_changed,canon_a1100_image) - -ifeq ($(CONFIG_MACH_CANON_A1100),y) -KBUILD_IMAGE := barebox.canon-a1100.bin -endif - -KWBIMAGE_OPTS = \ - -c -i $(srctree)/$(BOARD)/kwbimage.cfg -d $(TEXT_BASE) -e $(TEXT_BASE) - -quiet_cmd_kwbimage = KWB $@ - cmd_kwbimage = scripts/kwbimage -p $< $(KWBIMAGE_OPTS) -o $@ || \ - echo "WARNING: Couldn't create KWB image due to previous errors." - -quiet_cmd_kwbimage_uart = KWBUART $@ - cmd_kwbimage_uart = scripts/kwbimage -m uart -p $< $(KWBIMAGE_OPTS) -o $@ || \ - echo "WARNING Couldn't create KWB image due to previous errors." - -barebox.kwb: $(KBUILD_BINARY) FORCE - $(call if_changed,kwbimage) - -barebox.kwbuart: $(KBUILD_BINARY) FORCE - $(call if_changed,kwbimage_uart) - -ifeq ($(CONFIG_ARCH_MVEBU),y) -KBUILD_IMAGE := barebox.kwb barebox.kwbuart -endif - -barebox.imximg: $(KBUILD_BINARY) FORCE - $(call if_changed,imx_image,$(CFG_$(@F)),) - boarddir = $(srctree)/arch/arm/boards -imxcfg-$(CONFIG_MACH_FREESCALE_MX53_SMD) += $(boarddir)/freescale-mx53-smd/flash-header.imxcfg -imxcfg-$(CONFIG_MACH_CCMX51) += $(boarddir)/ccxmx51/flash-header.imxcfg -imxcfg-$(CONFIG_MACH_TX51) += $(boarddir)/karo-tx51/flash-header-karo-tx51.imxcfg -imxcfg-$(CONFIG_MACH_GUF_VINCELL) += $(boarddir)/guf-vincell/flash-header.imxcfg -imxcfg-$(CONFIG_MACH_EUKREA_CPUIMX51SD) += $(boarddir)/eukrea_cpuimx51/flash-header.imxcfg -imxcfg-$(CONFIG_MACH_FREESCALE_MX25_3STACK) += $(boarddir)/freescale-mx25-3ds/flash-header.imxcfg -imxcfg-$(CONFIG_MACH_FREESCALE_MX35_3STACK) += $(boarddir)/freescale-mx35-3ds/flash-header.imxcfg -imxcfg-$(CONFIG_MACH_TQMA53) += $(boarddir)/tqma53/flash-header.imxcfg -imxcfg-$(CONFIG_MACH_EUKREA_CPUIMX25) += $(boarddir)/eukrea_cpuimx25/flash-header.imxcfg -imxcfg-$(CONFIG_MACH_EUKREA_CPUIMX35) += $(boarddir)/eukrea_cpuimx35/flash-header.imxcfg -imxcfg-$(CONFIG_TX53_REV_1011) += $(boarddir)/karo-tx53/flash-header-tx53-rev1011.imxcfg -imxcfg-$(CONFIG_TX53_REV_XX30) += $(boarddir)/karo-tx53/flash-header-tx53-revxx30.imxcfg -ifneq ($(imxcfg-y),) -CFG_barebox.imximg := $(imxcfg-y) -KBUILD_IMAGE := barebox.imximg -endif pbl := arch/arm/pbl $(pbl)/zbarebox.S $(pbl)/zbarebox.bin $(pbl)/zbarebox: barebox.bin FORCE @@ -249,19 +85,8 @@ $(pbl)/zbarebox.S $(pbl)/zbarebox.bin $(pbl)/zbarebox: barebox.bin FORCE archclean: $(MAKE) $(clean)=$(pbl) -dts := arch/arm/dts - -%.dtb: scripts - $(Q)$(MAKE) $(build)=$(dts) $(dts)/$@ - KBUILD_IMAGE ?= $(KBUILD_BINARY) -archprepare: maketools -maketools: - $(Q)$(MAKE) $(build)=arch/arm/tools include/generated/mach-types.h - -PHONY += maketools - ifneq ($(board-y),) BOARD := arch/arm/boards/$(board-y)/ else @@ -276,9 +101,6 @@ endif common-y += $(BOARD) arch/arm/boards/ $(MACH) common-y += arch/arm/lib/ arch/arm/cpu/ -common-y += arch/arm/crypto/ - -common-$(CONFIG_OFTREE) += arch/arm/dts/ lds-y := arch/arm/lib/barebox.lds diff --git a/arch/arm/boards/Makefile b/arch/arm/boards/Makefile index 9241b66..f9cb059 100644 --- a/arch/arm/boards/Makefile +++ b/arch/arm/boards/Makefile @@ -135,3 +135,4 @@ obj-$(CONFIG_MACH_VIRT2REAL) += virt2real/ obj-$(CONFIG_MACH_ZEDBOARD) += avnet-zedboard/ obj-$(CONFIG_MACH_ZYLONITE) += zylonite/ obj-$(CONFIG_MACH_VARISCITE_MX6) += variscite-mx6/ +obj-$(CONFIG_MACH_VIRT) += virt/ diff --git a/arch/arm/boards/virt/Kconfig b/arch/arm/boards/virt/Kconfig new file mode 100644 index 0000000..b239127 --- /dev/null +++ b/arch/arm/boards/virt/Kconfig @@ -0,0 +1,8 @@ + +if MACH_VIRT + +config ARCH_TEXT_BASE + hex + default 0x40000000 + +endif diff --git a/arch/arm/boards/virt/Makefile b/arch/arm/boards/virt/Makefile new file mode 100644 index 0000000..eb072c0 --- /dev/null +++ b/arch/arm/boards/virt/Makefile @@ -0,0 +1 @@ +obj-y += init.o diff --git a/arch/arm/boards/virt/env/bin/_update b/arch/arm/boards/virt/env/bin/_update new file mode 100644 index 0000000..014bce3 --- /dev/null +++ b/arch/arm/boards/virt/env/bin/_update @@ -0,0 +1,36 @@ +#!/bin/sh + +if [ -z "$part" -o -z "$image" ]; then + echo "define \$part and \$image" + exit 1 +fi + +if [ ! -e "$part" ]; then + echo "Partition $part does not exist" + exit 1 +fi + +if [ $# = 1 ]; then + image=$1 +fi + +if [ x$ip = xdhcp ]; then + dhcp +fi + +ping $eth0.serverip +if [ $? -ne 0 ] ; then + echo "update aborted" + exit 1 +fi + +unprotect $part + +echo +echo "erasing partition $part" +erase $part + +echo +echo "flashing $image to $part" +echo +tftp $image $part diff --git a/arch/arm/boards/virt/env/bin/boot b/arch/arm/boards/virt/env/bin/boot new file mode 100644 index 0000000..3859dc1 --- /dev/null +++ b/arch/arm/boards/virt/env/bin/boot @@ -0,0 +1,38 @@ +#!/bin/sh + +. /env/config + +if [ x$1 = xflash ]; then + root=flash + kernel=flash +fi + +if [ x$1 = xnet ]; then + root=net + kernel=net +fi + +if [ x$ip = xdhcp ]; then + bootargs="$bootargs ip=dhcp" +else + bootargs="$bootargs ip=$eth0.ipaddr:$eth0.serverip:$eth0.gateway:$eth0.netmask:::" +fi + +if [ x$root = xflash ]; then + bootargs="$bootargs root=$rootpart rootfstype=jffs2" +else + bootargs="$bootargs root=/dev/nfs nfsroot=$eth0.serverip:$nfsroot,v3,tcp" +fi + +bootargs="$bootargs mtdparts=physmap-flash.0:$mtdparts" + +if [ $kernel = net ]; then + if [ x$ip = xdhcp ]; then + dhcp + fi + tftp $uimage uImage || exit 1 + bootm uImage +else + bootm /dev/nor0.kernel +fi + diff --git a/arch/arm/boards/virt/env/bin/init b/arch/arm/boards/virt/env/bin/init new file mode 100644 index 0000000..48e2139 --- /dev/null +++ b/arch/arm/boards/virt/env/bin/init @@ -0,0 +1,20 @@ +#!/bin/sh + +PATH=/env/bin +export PATH + +. /env/config +addpart /dev/nor0 $mtdparts + +echo +echo -n "Hit any key to stop autoboot: " +timeout -a $autoboot_timeout +if [ $? != 0 ]; then + echo + echo "type update_kernel [<imagename>] to update kernel into flash" + echo "type udate_root [<imagename>] to update rootfs into flash" + echo + exit +fi + +boot \ No newline at end of file diff --git a/arch/arm/boards/virt/env/bin/update_kernel b/arch/arm/boards/virt/env/bin/update_kernel new file mode 100644 index 0000000..1ad95fc --- /dev/null +++ b/arch/arm/boards/virt/env/bin/update_kernel @@ -0,0 +1,8 @@ +#!/bin/sh + +. /env/config + +image=$uimage +part=/dev/nor0.kernel + +. /env/bin/_update $1 diff --git a/arch/arm/boards/virt/env/bin/update_root b/arch/arm/boards/virt/env/bin/update_root new file mode 100644 index 0000000..b757a5b --- /dev/null +++ b/arch/arm/boards/virt/env/bin/update_root @@ -0,0 +1,8 @@ +#!/bin/sh + +. /env/config + +image=$jffs2 +part=/dev/nor0.root + +. /env/bin/_update $1 diff --git a/arch/arm/boards/virt/env/config b/arch/arm/boards/virt/env/config new file mode 100644 index 0000000..6c0abda --- /dev/null +++ b/arch/arm/boards/virt/env/config @@ -0,0 +1,38 @@ +#!/bin/sh + +# use 'dhcp' to do dhcp in barebox and in kernel +# use 'none' if you want to skip kernel ip autoconfiguration +ip=dhcp +global.dhcp.vendor_id=barebox-${global.hostname} + +# or set your networking parameters here +#eth0.ipaddr=a.b.c.d +#eth0.netmask=a.b.c.d +#eth0.gateway=a.b.c.d +#eth0.serverip=a.b.c.d + +# can be either 'nfs', 'tftp' or 'nor' +kernel_loc=tftp +# can be either 'net', 'nor' or 'initrd' +rootfs_loc=initrd + +# can be either 'jffs2' or 'ubifs' +rootfs_type=ubifs +rootfsimage=root.$rootfs_type + +kernelimage=zImage +#kernelimage=uImage +#kernelimage=Image +#kernelimage=Image.lzo + +nfsroot="$eth0.serverip:/opt/work/busybox/arm9/rootfs_arm" + +nor_parts="256k(barebox)ro,64k(bareboxenv),1536k(kernel),-(root)" +rootfs_mtdblock_nor=3 + +autoboot_timeout=3 + +bootargs="console=ttyAMA0,115200n8 CONSOLE=/dev/ttyAMA0" + +# set a fancy prompt (if support is compiled in) +PS1="\e[1;31m[barebox@\h]:\w\e[0m\n# " diff --git a/arch/arm/boards/virt/env/init/mtdparts-nor b/arch/arm/boards/virt/env/init/mtdparts-nor new file mode 100644 index 0000000..3307596 --- /dev/null +++ b/arch/arm/boards/virt/env/init/mtdparts-nor @@ -0,0 +1,11 @@ +#!/bin/sh + +if [ "$1" = menu ]; then + init-menu-add-entry "$0" "NOR partitions" + exit +fi + +mtdparts="2048k@0(nor0.barebox)ro,256k(nor0.barebox-env),256k(nor0.barebox-logo),256k(nor0.barebox-logo2),5120k(nor0.kernel),-(nor0.root)" +kernelname="application-flash" + +mtdparts-add -d nor0 -k ${kernelname} -p ${mtdparts} diff --git a/arch/arm/boards/virt/init.c b/arch/arm/boards/virt/init.c new file mode 100644 index 0000000..9626067 --- /dev/null +++ b/arch/arm/boards/virt/init.c @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2016 Raphaël Poggi <poggi.raph@xxxxxxxxx> + * + * GPLv2 only + */ + +#include <common.h> +#include <init.h> +#include <asm/armlinux.h> +#include <asm/system_info.h> +#include <mach/devices.h> +#include <environment.h> +#include <linux/sizes.h> +#include <io.h> +#include <globalvar.h> +#include <asm/mmu.h> + +static int virt_mem_init(void) +{ + virt_add_ddram(SZ_512M); + + add_cfi_flash_device(0, 0x00000000, SZ_4M, 0); + + devfs_add_partition("nor0", 0x00000, 0x40000, DEVFS_PARTITION_FIXED, "self0"); + devfs_add_partition("nor0", 0x40000, 0x20000, DEVFS_PARTITION_FIXED, "env0"); + + return 0; +} +mem_initcall(virt_mem_init); + +static int virt_console_init(void) +{ + virt_register_uart(0); + + return 0; +} +console_initcall(virt_console_init); + +static int virt_core_init(void) +{ + char *hostname = "virt"; + + if (cpu_is_cortex_a53()) + hostname = "virt-a53"; + else if (cpu_is_cortex_a57()) + hostname = "virt-a57"; + + barebox_set_model("ARM QEMU virt"); + barebox_set_hostname(hostname); + + return 0; +} +postcore_initcall(virt_core_init); + +static int virt_mmu_enable(void) +{ + /* Mapping all periph range */ + arch_remap_range(0x09000000, 0x01000000, PMD_SECT_DEF_CACHED); + + /* Mapping all flash range */ + arch_remap_range(0x00000000, 0x08000000, PMD_SECT_DEF_CACHED); + + mmu_enable(); + + return 0; +} +postmmu_initcall(virt_mmu_enable); diff --git a/arch/arm/configs/virt_defconfig b/arch/arm/configs/virt_defconfig new file mode 100644 index 0000000..ae928a2 --- /dev/null +++ b/arch/arm/configs/virt_defconfig @@ -0,0 +1,53 @@ +CONFIG_AEABI=y +CONFIG_ARM_OPTIMZED_STRING_FUNCTIONS=y +CONFIG_BAREBOX_MAX_IMAGE_SIZE=0x05000000 +CONFIG_BAREBOX_MAX_BARE_INIT_SIZE=0x01000000 +CONFIG_MEMORY_LAYOUT_FIXED=y +CONFIG_STACK_BASE=0x60000000 +CONFIG_MALLOC_BASE=0x50000000 +CONFIG_PROMPT="virt: " +CONFIG_HUSH_FANCY_PROMPT=y +CONFIG_CMDLINE_EDITING=y +CONFIG_AUTO_COMPLETE=y +CONFIG_MENU=y +CONFIG_PASSWORD=y +CONFIG_PARTITION=y +CONFIG_DEFAULT_ENVIRONMENT_GENERIC_NEW=y +CONFIG_DEFAULT_ENVIRONMENT_PATH="arch/arm/boards/virt/env" +CONFIG_DEBUG_INFO=y +# CONFIG_CMD_ARM_CPUINFO is not set +CONFIG_LONGHELP=y +# CONFIG_CMD_BOOTM is not set +# CONFIG_CMD_BOOTU is not set +# CONFIG_CMD_MOUNT is not set +# CONFIG_CMD_UMOUNT is not set +# CONFIG_CMD_CAT is not set +# CONFIG_CMD_CD is not set +# CONFIG_CMD_CP is not set +# CONFIG_CMD_LS is not set +# CONFIG_CMD_MKDIR is not set +# CONFIG_CMD_PWD is not set +# CONFIG_CMD_RM is not set +# CONFIG_CMD_RMDIR is not set +# CONFIG_CMD_FALSE is not set +# CONFIG_CMD_TEST is not set +# CONFIG_CMD_TRUE is not set +# CONFIG_CMD_CLEAR is not set +# CONFIG_CMD_ECHO is not set +CONFIG_CMD_CRC=y +CONFIG_CMD_CRC_CMP=y +# CONFIG_CMD_MD is not set +# CONFIG_CMD_MEMCMP is not set +# CONFIG_CMD_MEMCPY is not set +# CONFIG_CMD_MEMSET is not set +# CONFIG_CMD_MW is not set +CONFIG_SERIAL_AMBA_PL011=y +# CONFIG_SPI is not set +CONFIG_MTD=y +CONFIG_DRIVER_CFI=y +CONFIG_DRIVER_CFI_BANK_WIDTH_8=y +CONFIG_CFI_BUFFER_WRITE=y +CONFIG_NAND=y +# CONFIG_FS_RAMFS is not set +CONFIG_DIGEST_SHA1_GENERIC=y +CONFIG_DIGEST_SHA256_GENERIC=y diff --git a/arch/arm/cpu/Kconfig b/arch/arm/cpu/Kconfig index 4f5d9b6..86d64a4 100644 --- a/arch/arm/cpu/Kconfig +++ b/arch/arm/cpu/Kconfig @@ -1,6 +1,6 @@ comment "Processor Type" -config CPU_32 +config CPU_64 bool default y @@ -8,90 +8,13 @@ config CPU_32 # which CPUs we support in the kernel image, and the compiler instruction # optimiser behaviour. -# ARM1176 -config CPU_ARM1176 +# ARMv8 +config CPU_V8 bool - select CPU_V6 + select CPU_64v8 + select CPU_SUPPORTS_64BIT_KERNEL -# ARM920T -config CPU_ARM920T - bool - select CPU_32v4T - help - The ARM920T is licensed to be produced by numerous vendors, - and is used in the Maverick EP9312 and the Samsung S3C2410. - - More information on the Maverick EP9312 at - <http://www.cirrus.com/en/products/ep9312.html>. - - Say Y if you want support for the ARM920T processor. - Otherwise, say N. - -# ARM926T -config CPU_ARM926T - bool - select CPU_32v5 - help - This is a variant of the ARM920. It has slightly different - instruction sequences for cache and TLB operations. Curiously, - there is no documentation on it at the ARM corporate website. - - Say Y if you want support for the ARM926T processor. - Otherwise, say N. - -# ARM946E-S -config CPU_ARM946E - bool - select CPU_32v4T - help - ARM946E-S is a member of the ARM9E-S family of high- - performance, 32-bit system-on-chip processor solutions. - The TCM and ARMv5TE 32-bit instruction set is supported. - - Say Y if you want support for the ARM946E-S processor. - Otherwise, say N. - -# Feroceon -config CPU_FEROCEON - bool - select CPU_32v5 - help - This is a Marvell implementation of an ARMv5TE compatible - ARM core, used in the Marvell Kirkwood SoC family. - -# ARMv6 -config CPU_V6 - bool - select CPU_32v6 - -# ARMv7 -config CPU_V7 - bool - select CPU_32v7 - -config CPU_XSC3 - bool - select CPU_32v4T - help - Select code specific to PXA3xx variants - -# Xscale PXA25x, PXA27x -config CPU_XSCALE - bool - select CPU_32v4T - -# Figure out what processor architecture version we should be using. -# This defines the compiler instruction set which depends on the machine type. -config CPU_32v4T - bool - -config CPU_32v5 - bool - -config CPU_32v6 - bool - -config CPU_32v7 +config CPU_64v8 bool comment "processor features" @@ -117,10 +40,15 @@ config BOOT_ENDIANNESS_SWITCH Currently implemented only by "bootz" command. -config ARCH_HAS_L2X0 +config SYS_SUPPORTS_32BIT_KERNEL + bool + +config SYS_SUPPORTS_64BIT_KERNEL + bool + +config CPU_SUPPORTS_32BIT_KERNEL bool -config CACHE_L2X0 - bool "Enable L2x0 PrimeCell" - depends on MMU && ARCH_HAS_L2X0 +config CPU_SUPPORTS_64BIT_KERNEL + bool diff --git a/arch/arm/cpu/Makefile b/arch/arm/cpu/Makefile index 854df60e..fe6e7af 100644 --- a/arch/arm/cpu/Makefile +++ b/arch/arm/cpu/Makefile @@ -1,38 +1,25 @@ obj-y += cpu.o obj-$(CONFIG_ARM_EXCEPTIONS) += exceptions.o obj-$(CONFIG_ARM_EXCEPTIONS) += interrupts.o -obj-y += start.o setupc.o entry.o +obj-y += start.o entry.o # # Any variants can be called as start-armxyz.S # obj-$(CONFIG_CMD_ARM_CPUINFO) += cpuinfo.o obj-$(CONFIG_CMD_ARM_MMUINFO) += mmuinfo.o -obj-$(CONFIG_OFDEVICE) += dtb.o -obj-$(CONFIG_MMU) += mmu.o cache.o mmu-early.o -pbl-$(CONFIG_MMU) += mmu-early.o +obj-$(CONFIG_MMU) += mmu.o cache.o -ifeq ($(CONFIG_MMU),) -obj-y += no-mmu.o -endif - -obj-$(CONFIG_CPU_32v4T) += cache-armv4.o -pbl-$(CONFIG_CPU_32v4T) += cache-armv4.o -obj-$(CONFIG_CPU_32v5) += cache-armv5.o -pbl-$(CONFIG_CPU_32v5) += cache-armv5.o -obj-$(CONFIG_CPU_32v6) += cache-armv6.o -pbl-$(CONFIG_CPU_32v6) += cache-armv6.o -AFLAGS_cache-armv7.o :=-Wa,-march=armv7-a -obj-$(CONFIG_CPU_32v7) += cache-armv7.o -AFLAGS_pbl-cache-armv7.o :=-Wa,-march=armv7-a -pbl-$(CONFIG_CPU_32v7) += cache-armv7.o -obj-$(CONFIG_CACHE_L2X0) += cache-l2x0.o +AFLAGS_cache-armv8.o :=-Wa,-march=armv8-a +obj-$(CONFIG_CPU_64v8) += cache-armv8.o +AFLAGS_pbl-cache-armv8.o :=-Wa,-march=armv8-a +pbl-$(CONFIG_CPU_64v8) += cache-armv8.o pbl-y += setupc.o entry.o pbl-$(CONFIG_PBL_SINGLE_IMAGE) += start-pbl.o pbl-$(CONFIG_PBL_MULTI_IMAGES) += uncompress.o -obj-y += common.o cache.o -pbl-y += common.o cache.o +obj-y += cache.o +pbl-y += cache.o lwl-y += lowlevel.o diff --git a/arch/arm/cpu/cache-armv8.S b/arch/arm/cpu/cache-armv8.S new file mode 100644 index 0000000..82b2f81 --- /dev/null +++ b/arch/arm/cpu/cache-armv8.S @@ -0,0 +1,168 @@ +/* + * (C) Copyright 2013 + * David Feng <fenghua@xxxxxxxxxxxxxx> + * + * This file is based on sample code from ARMv8 ARM. + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include <config.h> +#include <linux/linkage.h> +#include <init.h> + +/* + * void v8_flush_dcache_level(level) + * + * clean and invalidate one level cache. + * + * x0: cache level + * x1: 0 flush & invalidate, 1 invalidate only + * x2~x9: clobbered + */ +.section .text.v8_flush_dcache_level +ENTRY(v8_flush_dcache_level) + lsl x12, x0, #1 + msr csselr_el1, x12 /* select cache level */ + isb /* sync change of cssidr_el1 */ + mrs x6, ccsidr_el1 /* read the new cssidr_el1 */ + and x2, x6, #7 /* x2 <- log2(cache line size)-4 */ + add x2, x2, #4 /* x2 <- log2(cache line size) */ + mov x3, #0x3ff + and x3, x3, x6, lsr #3 /* x3 <- max number of #ways */ + clz w5, w3 /* bit position of #ways */ + mov x4, #0x7fff + and x4, x4, x6, lsr #13 /* x4 <- max number of #sets */ + /* x12 <- cache level << 1 */ + /* x2 <- line length offset */ + /* x3 <- number of cache ways - 1 */ + /* x4 <- number of cache sets - 1 */ + /* x5 <- bit position of #ways */ + +loop_set: + mov x6, x3 /* x6 <- working copy of #ways */ +loop_way: + lsl x7, x6, x5 + orr x9, x12, x7 /* map way and level to cisw value */ + lsl x7, x4, x2 + orr x9, x9, x7 /* map set number to cisw value */ + tbz w1, #0, 1f + dc isw, x9 + b 2f +1: dc cisw, x9 /* clean & invalidate by set/way */ +2: subs x6, x6, #1 /* decrement the way */ + b.ge loop_way + subs x4, x4, #1 /* decrement the set */ + b.ge loop_set + + ret +ENDPROC(v8_flush_dcache_level) + +/* + * void v8_flush_dcache_all(int invalidate_only) + * + * x0: 0 flush & invalidate, 1 invalidate only + * + * clean and invalidate all data cache by SET/WAY. + */ +.section .text.v8_dcache_all +ENTRY(v8_dcache_all) + mov x1, x0 + dsb sy + mrs x10, clidr_el1 /* read clidr_el1 */ + lsr x11, x10, #24 + and x11, x11, #0x7 /* x11 <- loc */ + cbz x11, finished /* if loc is 0, exit */ + mov x15, x30 + mov x0, #0 /* start flush at cache level 0 */ + /* x0 <- cache level */ + /* x10 <- clidr_el1 */ + /* x11 <- loc */ + /* x15 <- return address */ + +loop_level: + lsl x12, x0, #1 + add x12, x12, x0 /* x0 <- tripled cache level */ + lsr x12, x10, x12 + and x12, x12, #7 /* x12 <- cache type */ + cmp x12, #2 + b.lt skip /* skip if no cache or icache */ + bl v8_flush_dcache_level /* x1 = 0 flush, 1 invalidate */ +skip: + add x0, x0, #1 /* increment cache level */ + cmp x11, x0 + b.gt loop_level + + mov x0, #0 + msr csselr_el1, x0 /* restore csselr_el1 */ + dsb sy + isb + mov x30, x15 + +finished: + ret +ENDPROC(v8_dcache_all) + +.section .text.v8_flush_dcache_all +ENTRY(v8_flush_dcache_all) + mov x16, x30 + mov x0, #0 + bl v8_dcache_all + mov x30, x16 + ret +ENDPROC(v8_flush_dcache_all) + +.section .text.v8_invalidate_dcache_all +ENTRY(v8_invalidate_dcache_all) + mov x16, x30 + mov x0, #0x1 + bl v8_dcache_all + mov x30, x16 + ret +ENDPROC(v8_invalidate_dcache_all) + +/* + * void v8_flush_dcache_range(start, end) + * + * clean & invalidate data cache in the range + * + * x0: start address + * x1: end address + */ +.section .text.v8_flush_dcache_range +ENTRY(v8_flush_dcache_range) + mrs x3, ctr_el0 + lsr x3, x3, #16 + and x3, x3, #0xf + mov x2, #4 + lsl x2, x2, x3 /* cache line size */ + + /* x2 <- minimal cache line size in cache system */ + sub x3, x2, #1 + bic x0, x0, x3 +1: dc civac, x0 /* clean & invalidate data or unified cache */ + add x0, x0, x2 + cmp x0, x1 + b.lo 1b + dsb sy + ret +ENDPROC(v8_flush_dcache_range) + +/* + * void v8_invalidate_icache_all(void) + * + * invalidate all tlb entries. + */ +.section .text.v8_invalidate_icache_all +ENTRY(v8_invalidate_icache_all) + ic ialluis + isb sy + ret +ENDPROC(v8_invalidate_icache_all) + +.section .text.v8_flush_l3_cache +ENTRY(v8_flush_l3_cache) + mov x0, #0 /* return status as success */ + ret +ENDPROC(v8_flush_l3_cache) + .weak v8_flush_l3_cache diff --git a/arch/arm/cpu/cache.c b/arch/arm/cpu/cache.c index 27ead1c..8465cf9 100644 --- a/arch/arm/cpu/cache.c +++ b/arch/arm/cpu/cache.c @@ -32,10 +32,7 @@ struct cache_fns *cache_fns; .mmu_cache_flush = arch##_mmu_cache_flush, \ }; -DEFINE_CPU_FNS(v4) -DEFINE_CPU_FNS(v5) -DEFINE_CPU_FNS(v6) -DEFINE_CPU_FNS(v7) +DEFINE_CPU_FNS(v8) void __dma_clean_range(unsigned long start, unsigned long end) { @@ -78,29 +75,9 @@ void __mmu_cache_flush(void) int arm_set_cache_functions(void) { switch (cpu_architecture()) { -#ifdef CONFIG_CPU_32v4T - case CPU_ARCH_ARMv4T: - cache_fns = &cache_fns_armv4; + case CPU_ARCH_ARMv8: + cache_fns = &cache_fns_armv8; break; -#endif -#ifdef CONFIG_CPU_32v5 - case CPU_ARCH_ARMv5: - case CPU_ARCH_ARMv5T: - case CPU_ARCH_ARMv5TE: - case CPU_ARCH_ARMv5TEJ: - cache_fns = &cache_fns_armv5; - break; -#endif -#ifdef CONFIG_CPU_32v6 - case CPU_ARCH_ARMv6: - cache_fns = &cache_fns_armv6; - break; -#endif -#ifdef CONFIG_CPU_32v7 - case CPU_ARCH_ARMv7: - cache_fns = &cache_fns_armv7; - break; -#endif default: while(1); } @@ -115,49 +92,19 @@ int arm_set_cache_functions(void) void arm_early_mmu_cache_flush(void) { switch (arm_early_get_cpu_architecture()) { -#ifdef CONFIG_CPU_32v4T - case CPU_ARCH_ARMv4T: - v4_mmu_cache_flush(); - return; -#endif -#ifdef CONFIG_CPU_32v5 - case CPU_ARCH_ARMv5: - case CPU_ARCH_ARMv5T: - case CPU_ARCH_ARMv5TE: - case CPU_ARCH_ARMv5TEJ: - v5_mmu_cache_flush(); + case CPU_ARCH_ARMv8: +// v7_mmu_cache_flush(); return; -#endif -#ifdef CONFIG_CPU_32v6 - case CPU_ARCH_ARMv6: - v6_mmu_cache_flush(); - return; -#endif -#ifdef CONFIG_CPU_32v7 - case CPU_ARCH_ARMv7: - v7_mmu_cache_flush(); - return; -#endif } } -void v7_mmu_cache_invalidate(void); +//void v7_mmu_cache_invalidate(void); void arm_early_mmu_cache_invalidate(void) { switch (arm_early_get_cpu_architecture()) { - case CPU_ARCH_ARMv4T: - case CPU_ARCH_ARMv5: - case CPU_ARCH_ARMv5T: - case CPU_ARCH_ARMv5TE: - case CPU_ARCH_ARMv5TEJ: - case CPU_ARCH_ARMv6: - asm volatile("mcr p15, 0, %0, c7, c6, 0\n" : : "r"(0)); - return; -#ifdef CONFIG_CPU_32v7 - case CPU_ARCH_ARMv7: - v7_mmu_cache_invalidate(); + case CPU_ARCH_ARMv8: +// v7_mmu_cache_invalidate(); return; -#endif } } diff --git a/arch/arm/cpu/cpu.c b/arch/arm/cpu/cpu.c index eb12166..19cd944 100644 --- a/arch/arm/cpu/cpu.c +++ b/arch/arm/cpu/cpu.c @@ -40,11 +40,8 @@ */ void icache_enable(void) { - u32 r; - - r = get_cr(); - r |= CR_I; - set_cr(r); + v8_invalidate_icache_all(); + set_sctlr(get_sctlr() | CR_I); } /** @@ -52,11 +49,7 @@ void icache_enable(void) */ void icache_disable(void) { - u32 r; - - r = get_cr(); - r &= ~CR_I; - set_cr(r); + set_sctlr(get_sctlr() & ~CR_I); } /** @@ -65,26 +58,7 @@ void icache_disable(void) */ int icache_status(void) { - return (get_cr () & CR_I) != 0; -} - -/* - * SoC like the ux500 have the l2x0 always enable - * with or without MMU enable - */ -struct outer_cache_fns outer_cache; - -/* - * Clean and invalide caches, disable MMU - */ -void mmu_disable(void) -{ - __mmu_cache_flush(); - if (outer_cache.disable) { - outer_cache.flush_all(); - outer_cache.disable(); - } - __mmu_cache_off(); + return (get_sctlr() & CR_I) != 0; } /** @@ -96,18 +70,8 @@ void mmu_disable(void) */ static void arch_shutdown(void) { - uint32_t r; - mmu_disable(); flush_icache(); - /* - * barebox normally does not use interrupts, but some functionalities - * (eg. OMAP4_USBBOOT) require them enabled. So be sure interrupts are - * disabled before exiting. - */ - __asm__ __volatile__("mrs %0, cpsr" : "=r"(r)); - r |= PSR_I_BIT; - __asm__ __volatile__("msr cpsr, %0" : : "r"(r)); } archshutdown_exitcall(arch_shutdown); diff --git a/arch/arm/cpu/cpuinfo.c b/arch/arm/cpu/cpuinfo.c index 8b22e9b..2306101 100644 --- a/arch/arm/cpu/cpuinfo.c +++ b/arch/arm/cpu/cpuinfo.c @@ -21,21 +21,10 @@ #include <complete.h> #define CPU_ARCH_UNKNOWN 0 -#define CPU_ARCH_ARMv3 1 -#define CPU_ARCH_ARMv4 2 -#define CPU_ARCH_ARMv4T 3 -#define CPU_ARCH_ARMv5 4 -#define CPU_ARCH_ARMv5T 5 -#define CPU_ARCH_ARMv5TE 6 -#define CPU_ARCH_ARMv5TEJ 7 -#define CPU_ARCH_ARMv6 8 -#define CPU_ARCH_ARMv7 9 - -#define ARM_CPU_PART_CORTEX_A5 0xC050 -#define ARM_CPU_PART_CORTEX_A7 0xC070 -#define ARM_CPU_PART_CORTEX_A8 0xC080 -#define ARM_CPU_PART_CORTEX_A9 0xC090 -#define ARM_CPU_PART_CORTEX_A15 0xC0F0 +#define CPU_ARCH_ARMv8 10 + +#define ARM_CPU_PART_CORTEX_A53 0xD034 +#define ARM_CPU_PART_CORTEX_A57 0xD070 static void decode_cache(unsigned long size) { @@ -61,22 +50,22 @@ static int do_cpuinfo(int argc, char *argv[]) int cpu_arch; __asm__ __volatile__( - "mrc p15, 0, %0, c0, c0, 0 @ read control reg\n" + "mrs %0, midr_el1\n" : "=r" (mainid) : : "memory"); - __asm__ __volatile__( - "mrc p15, 0, %0, c0, c0, 1 @ read control reg\n" - : "=r" (cache) - : - : "memory"); - - __asm__ __volatile__( - "mrc p15, 0, %0, c1, c0, 0 @ read control reg\n" - : "=r" (cr) - : - : "memory"); +// __asm__ __volatile__( +// "mrc p15, 0, %0, c0, c0, 1 @ read control reg\n" +// : "=r" (cache) +// : +// : "memory"); +// +// __asm__ __volatile__( +// "mrc p15, 0, %0, c1, c0, 0 @ read control reg\n" +// : "=r" (cr) +// : +// : "memory"); switch (mainid >> 24) { case 0x41: @@ -111,8 +100,8 @@ static int do_cpuinfo(int argc, char *argv[]) /* Revised CPUID format. Read the Memory Model Feature * Register 0 and check for VMSAv7 or PMSAv7 */ - asm("mrc p15, 0, %0, c0, c1, 4" - : "=r" (mmfr0)); +// asm("mrc p15, 0, %0, c0, c1, 4" +// : "=r" (mmfr0)); if ((mmfr0 & 0x0000000f) >= 0x00000003 || (mmfr0 & 0x000000f0) >= 0x00000030) cpu_arch = CPU_ARCH_ARMv7; @@ -152,6 +141,9 @@ static int do_cpuinfo(int argc, char *argv[]) case CPU_ARCH_ARMv7: architecture = "v7"; break; + case CPU_ARCH_ARMv8: + architecture = "v8"; + break; case CPU_ARCH_UNKNOWN: default: architecture = "Unknown"; @@ -181,29 +173,31 @@ static int do_cpuinfo(int argc, char *argv[]) case ARM_CPU_PART_CORTEX_A15: part = "Cortex-A15"; break; + case ARM_CPU_PART_CORTEX_A53: + part = "Cortex-A53"; default: part = "unknown"; } printf("core: %s r%up%u\n", part, major, minor); } - if (cache & (1 << 24)) { - /* separate I/D cache */ - printf("I-cache: "); - decode_cache(cache & 0xfff); - printf("D-cache: "); - decode_cache((cache >> 12) & 0xfff); - } else { - /* unified I/D cache */ - printf("cache: "); - decode_cache(cache & 0xfff); - } - - printf("Control register: "); - for (i = 0; i < ARRAY_SIZE(crbits); i++) - if (cr & (1 << i)) - printf("%s ", crbits[i]); - printf("\n"); +// if (cache & (1 << 24)) { +// /* separate I/D cache */ +// printf("I-cache: "); +// decode_cache(cache & 0xfff); +// printf("D-cache: "); +// decode_cache((cache >> 12) & 0xfff); +// } else { +// /* unified I/D cache */ +// printf("cache: "); +// decode_cache(cache & 0xfff); +// } + +// printf("Control register: "); +// for (i = 0; i < ARRAY_SIZE(crbits); i++) +// if (cr & (1 << i)) +// printf("%s ", crbits[i]); +// printf("\n"); return 0; } diff --git a/arch/arm/cpu/entry.c b/arch/arm/cpu/entry.c index 0cdcfec..a029f09 100644 --- a/arch/arm/cpu/entry.c +++ b/arch/arm/cpu/entry.c @@ -1,7 +1,6 @@ #include <types.h> #include <asm/cache.h> -#include <asm/barebox-arm.h> #include "entry.h" @@ -24,10 +23,10 @@ * be fine. */ -void __naked __noreturn barebox_arm_entry(unsigned long membase, +void __noreturn barebox_arm_entry(unsigned long membase, unsigned long memsize, void *boarddata) { - arm_setup_stack(arm_mem_stack(membase, membase + memsize) + STACK_SIZE - 16); + arm_setup_stack(membase + memsize - 16); arm_early_mmu_cache_invalidate(); if (IS_ENABLED(CONFIG_PBL_MULTI_IMAGES)) diff --git a/arch/arm/cpu/exceptions.S b/arch/arm/cpu/exceptions.S index eda0d6a..5812025 100644 --- a/arch/arm/cpu/exceptions.S +++ b/arch/arm/cpu/exceptions.S @@ -1,220 +1,119 @@ -#include <config.h> -#include <linux/linkage.h> -#include <asm-generic/memory_layout.h> - /* - ************************************************************************* - * - * Interrupt handling + * (C) Copyright 2013 + * David Feng <fenghua@xxxxxxxxxxxxxx> * - ************************************************************************* + * SPDX-License-Identifier: GPL-2.0+ */ -@ -@ IRQ stack frame. -@ -#define S_FRAME_SIZE 72 - -#define S_OLD_R0 68 -#define S_PSR 64 -#define S_PC 60 -#define S_LR 56 -#define S_SP 52 - -#define S_IP 48 -#define S_FP 44 -#define S_R10 40 -#define S_R9 36 -#define S_R8 32 -#define S_R7 28 -#define S_R6 24 -#define S_R5 20 -#define S_R4 16 -#define S_R3 12 -#define S_R2 8 -#define S_R1 4 -#define S_R0 0 - -#define MODE_SVC 0x13 +#include <config.h> +#include <asm/ptrace.h> +#include <linux/linkage.h> /* - * use bad_save_user_regs for abort/prefetch/undef/swi ... - * use irq_save_user_regs / irq_restore_user_regs for IRQ/FIQ handling + * Enter Exception. + * This will save the processor state that is ELR/X0~X30 + * to the stack frame. */ - - .macro bad_save_user_regs - sub sp, sp, #S_FRAME_SIZE - stmia sp, {r0 - r12} @ Calling r0-r12 - ldr r2, =abort_stack - ldmia r2, {r2 - r3} @ get pc, cpsr - add r0, sp, #S_FRAME_SIZE @ restore sp_SVC - - add r5, sp, #S_SP - mov r1, lr - stmia r5, {r0 - r3} @ save sp_SVC, lr_SVC, pc, cpsr - mov r0, sp - .endm - - .macro irq_save_user_regs - sub sp, sp, #S_FRAME_SIZE - stmia sp, {r0 - r12} @ Calling r0-r12 - add r8, sp, #S_PC - stmdb r8, {sp, lr}^ @ Calling SP, LR - str lr, [r8, #0] @ Save calling PC - mrs r6, spsr - str r6, [r8, #4] @ Save CPSR - str r0, [r8, #8] @ Save OLD_R0 - mov r0, sp - .endm - - .macro irq_restore_user_regs - ldmia sp, {r0 - lr}^ @ Calling r0 - lr - mov r0, r0 - ldr lr, [sp, #S_PC] @ Get PC - add sp, sp, #S_FRAME_SIZE - subs pc, lr, #4 @ return & move spsr_svc into cpsr - .endm - - .macro get_bad_stack - ldr r13, =abort_stack - str lr, [r13] @ save caller lr / spsr - mrs lr, spsr - str lr, [r13, #4] - - mov r13, #MODE_SVC @ prepare SVC-Mode - @ msr spsr_c, r13 - msr spsr, r13 - mov lr, pc - movs pc, lr - .endm - - .macro try_data_abort - ldr r13, =arm_ignore_data_abort @ check try mode - ldr r13, [r13] - cmp r13, #0 - beq do_abort_\@ - ldr r13, =arm_data_abort_occurred - str r13, [r13] - mrs r13, spsr @ read saved CPSR - tst r13, #1<<5 @ check Thumb mode - subeq lr, #4 @ next ARM instr - subne lr, #6 @ next Thumb instr - movs pc, lr -do_abort_\@: - .endm - - .macro get_irq_stack @ setup IRQ stack - ldr sp, IRQ_STACK_START - .endm - - .macro get_fiq_stack @ setup FIQ stack - ldr sp, FIQ_STACK_START - .endm +.macro exception_entry + stp x29, x30, [sp, #-16]! + stp x27, x28, [sp, #-16]! + stp x25, x26, [sp, #-16]! + stp x23, x24, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x19, x20, [sp, #-16]! + stp x17, x18, [sp, #-16]! + stp x15, x16, [sp, #-16]! + stp x13, x14, [sp, #-16]! + stp x11, x12, [sp, #-16]! + stp x9, x10, [sp, #-16]! + stp x7, x8, [sp, #-16]! + stp x5, x6, [sp, #-16]! + stp x3, x4, [sp, #-16]! + stp x1, x2, [sp, #-16]! + + /* Could be running at EL3/EL2/EL1 */ + mrs x11, CurrentEL + cmp x11, #0xC /* Check EL3 state */ + b.eq 1f + cmp x11, #0x8 /* Check EL2 state */ + b.eq 2f + cmp x11, #0x4 /* Check EL1 state */ + b.eq 3f +3: mrs x1, esr_el3 + mrs x2, elr_el3 + b 0f +2: mrs x1, esr_el2 + mrs x2, elr_el2 + b 0f +1: mrs x1, esr_el1 + mrs x2, elr_el1 +0: + stp x2, x0, [sp, #-16]! + mov x0, sp +.endm /* - * exception handlers + * Exception vectors. */ - .section ".text","ax" - .arm - - .align 5 -undefined_instruction: - get_bad_stack - bad_save_user_regs - bl do_undefined_instruction - - .align 5 -software_interrupt: - get_bad_stack - bad_save_user_regs - bl do_software_interrupt - - .align 5 -prefetch_abort: - get_bad_stack - bad_save_user_regs - bl do_prefetch_abort - - .align 5 -data_abort: - try_data_abort - get_bad_stack - bad_save_user_regs - bl do_data_abort - - .align 5 -irq: - get_bad_stack - bad_save_user_regs - bl do_irq - - .align 5 -fiq: - get_bad_stack - bad_save_user_regs - bl do_fiq - -#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_ARM_EXCEPTIONS) -/* - * With relocatable binary support the runtime exception vectors do not match - * the addresses in the binary. We have to fix them up during runtime - */ -ENTRY(arm_fixup_vectors) - ldr r0, =undefined_instruction - ldr r1, =_undefined_instruction - str r0, [r1] - ldr r0, =software_interrupt - ldr r1, =_software_interrupt - str r0, [r1] - ldr r0, =prefetch_abort - ldr r1, =_prefetch_abort - str r0, [r1] - ldr r0, =data_abort - ldr r1, =_data_abort - str r0, [r1] - ldr r0, =irq - ldr r1, =_irq - str r0, [r1] - ldr r0, =fiq - ldr r1, =_fiq - str r0, [r1] - bx lr -ENDPROC(arm_fixup_vectors) -#endif - -.section .text_exceptions -.globl extable -extable: -1: b 1b /* barebox_arm_reset_vector */ -#ifdef CONFIG_ARM_EXCEPTIONS - ldr pc, _undefined_instruction /* undefined instruction */ - ldr pc, _software_interrupt /* software interrupt (SWI) */ - ldr pc, _prefetch_abort /* prefetch abort */ - ldr pc, _data_abort /* data abort */ -1: b 1b /* (reserved) */ - ldr pc, _irq /* irq (interrupt) */ - ldr pc, _fiq /* fiq (fast interrupt) */ -.globl _undefined_instruction -_undefined_instruction: .word undefined_instruction -.globl _software_interrupt -_software_interrupt: .word software_interrupt -.globl _prefetch_abort -_prefetch_abort: .word prefetch_abort -.globl _data_abort -_data_abort: .word data_abort -.globl _irq -_irq: .word irq -.globl _fiq -_fiq: .word fiq -#else -1: b 1b /* undefined instruction */ -1: b 1b /* software interrupt (SWI) */ -1: b 1b /* prefetch abort */ -1: b 1b /* data abort */ -1: b 1b /* (reserved) */ -1: b 1b /* irq (interrupt) */ -1: b 1b /* fiq (fast interrupt) */ -#endif + .align 11 + .globl vectors +vectors: + .align 7 + b _do_bad_sync /* Current EL Synchronous Thread */ + + .align 7 + b _do_bad_irq /* Current EL IRQ Thread */ + + .align 7 + b _do_bad_fiq /* Current EL FIQ Thread */ + + .align 7 + b _do_bad_error /* Current EL Error Thread */ + + .align 7 + b _do_sync /* Current EL Synchronous Handler */ + + .align 7 + b _do_irq /* Current EL IRQ Handler */ + + .align 7 + b _do_fiq /* Current EL FIQ Handler */ + + .align 7 + b _do_error /* Current EL Error Handler */ + + +_do_bad_sync: + exception_entry + bl do_bad_sync + +_do_bad_irq: + exception_entry + bl do_bad_irq + +_do_bad_fiq: + exception_entry + bl do_bad_fiq + +_do_bad_error: + exception_entry + bl do_bad_error + +_do_sync: + exception_entry + bl do_sync + +_do_irq: + exception_entry + bl do_irq + +_do_fiq: + exception_entry + bl do_fiq + +_do_error: + exception_entry + bl do_error .section .data .align 4 diff --git a/arch/arm/cpu/interrupts.c b/arch/arm/cpu/interrupts.c index fb4bb78..d42a5b1 100644 --- a/arch/arm/cpu/interrupts.c +++ b/arch/arm/cpu/interrupts.c @@ -27,54 +27,8 @@ #include <asm/ptrace.h> #include <asm/unwind.h> -/** - * Display current register set content - * @param[in] regs Guess what - */ -void show_regs (struct pt_regs *regs) -{ - unsigned long flags; - const char *processor_modes[] = { - "USER_26", "FIQ_26", "IRQ_26", "SVC_26", - "UK4_26", "UK5_26", "UK6_26", "UK7_26", - "UK8_26", "UK9_26", "UK10_26", "UK11_26", - "UK12_26", "UK13_26", "UK14_26", "UK15_26", - "USER_32", "FIQ_32", "IRQ_32", "SVC_32", - "UK4_32", "UK5_32", "UK6_32", "ABT_32", - "UK8_32", "UK9_32", "UK10_32", "UND_32", - "UK12_32", "UK13_32", "UK14_32", "SYS_32", - }; - - flags = condition_codes (regs); - - printf ("pc : [<%08lx>] lr : [<%08lx>]\n" - "sp : %08lx ip : %08lx fp : %08lx\n", - instruction_pointer (regs), - regs->ARM_lr, regs->ARM_sp, regs->ARM_ip, regs->ARM_fp); - printf ("r10: %08lx r9 : %08lx r8 : %08lx\n", - regs->ARM_r10, regs->ARM_r9, regs->ARM_r8); - printf ("r7 : %08lx r6 : %08lx r5 : %08lx r4 : %08lx\n", - regs->ARM_r7, regs->ARM_r6, regs->ARM_r5, regs->ARM_r4); - printf ("r3 : %08lx r2 : %08lx r1 : %08lx r0 : %08lx\n", - regs->ARM_r3, regs->ARM_r2, regs->ARM_r1, regs->ARM_r0); - printf ("Flags: %c%c%c%c", - flags & PSR_N_BIT ? 'N' : 'n', - flags & PSR_Z_BIT ? 'Z' : 'z', - flags & PSR_C_BIT ? 'C' : 'c', flags & PSR_V_BIT ? 'V' : 'v'); - printf (" IRQs %s FIQs %s Mode %s%s\n", - interrupts_enabled (regs) ? "on" : "off", - fast_interrupts_enabled (regs) ? "on" : "off", - processor_modes[processor_mode (regs)], - thumb_mode (regs) ? " (T)" : ""); -#ifdef CONFIG_ARM_UNWIND - unwind_backtrace(regs); -#endif -} - static void __noreturn do_exception(struct pt_regs *pt_regs) { - show_regs(pt_regs); - panic(""); } @@ -121,14 +75,6 @@ void do_prefetch_abort (struct pt_regs *pt_regs) */ void do_data_abort (struct pt_regs *pt_regs) { - u32 far; - - asm volatile ("mrc p15, 0, %0, c6, c0, 0" : "=r" (far) : : "cc"); - - printf("unable to handle %s at address 0x%08x\n", - far < PAGE_SIZE ? "NULL pointer dereference" : - "paging request", far); - do_exception(pt_regs); } @@ -156,6 +102,43 @@ void do_irq (struct pt_regs *pt_regs) do_exception(pt_regs); } +void do_bad_sync(struct pt_regs *pt_regs) +{ + printf("bad sync\n"); + do_exception(pt_regs); +} + +void do_bad_irq(struct pt_regs *pt_regs) +{ + printf("bad irq\n"); + do_exception(pt_regs); +} + +void do_bad_fiq(struct pt_regs *pt_regs) +{ + printf("bad fiq\n"); + do_exception(pt_regs); +} + +void do_bad_error(struct pt_regs *pt_regs) +{ + printf("bad error\n"); + do_exception(pt_regs); +} + +void do_sync(struct pt_regs *pt_regs) +{ + printf("sync exception\n"); + do_exception(pt_regs); +} + + +void do_error(struct pt_regs *pt_regs) +{ + printf("error exception\n"); + do_exception(pt_regs); +} + extern volatile int arm_ignore_data_abort; extern volatile int arm_data_abort_occurred; diff --git a/arch/arm/cpu/lowlevel.S b/arch/arm/cpu/lowlevel.S index e5baa12..0691b2b 100644 --- a/arch/arm/cpu/lowlevel.S +++ b/arch/arm/cpu/lowlevel.S @@ -1,60 +1,43 @@ #include <linux/linkage.h> #include <init.h> #include <asm/system.h> +#include <asm/gic.h> +#include <asm-generic/memory_layout.h> .section ".text_bare_init_","ax" + ENTRY(arm_cpu_lowlevel_init) - /* save lr, since it may be banked away with a processor mode change */ - mov r2, lr - /* set the cpu to SVC32 mode, mask irq and fiq */ - mrs r12, cpsr - bic r12, r12, #0x1f - orr r12, r12, #0xd3 - msr cpsr, r12 - -#if __LINUX_ARM_ARCH__ >= 6 - /* - * ICIALLU: Invalidate all instruction caches to PoU, - * includes flushing of branch predictors. - * Even if the i-cache is off it might contain stale entries - * that are better discarded before enabling the cache. - * Architectually this is even possible after a cold reset. - */ - mcr p15, 0, r12, c7, c5, 0 - /* DSB, ensure completion of the invalidation */ - mcr p15, 0, r12, c7, c10, 4 - /* - * ISB, ensure instruction fetch path is in sync. - * Note that the ARM Architecture Reference Manual, ARMv7-A and ARMv7-R - * edition (ARM DDI 0406C.c) doesn't define this instruction in the - * ARMv6 part (D12.7.10). It only has: "Support of additional - * operations is IMPLEMENTATION DEFINED". - * But an earlier version of the ARMARM (ARM DDI 0100I) does define it - * as "Flush prefetch buffer (PrefetchFlush)". - */ - mcr p15, 0, r12, c7, c5, 4 -#endif - - /* disable MMU stuff and data/unified caches */ - mrc p15, 0, r12, c1, c0, 0 /* SCTLR */ - bic r12, r12, #(CR_M | CR_C | CR_B) - bic r12, r12, #(CR_S | CR_R | CR_V) - - /* enable instruction cache */ - orr r12, r12, #CR_I - -#if __LINUX_ARM_ARCH__ >= 6 - orr r12, r12, #CR_U - bic r12, r12, #CR_A -#else - orr r12, r12, #CR_A -#endif - -#ifdef __ARMEB__ - orr r12, r12, #CR_B -#endif - - mcr p15, 0, r12, c1, c0, 0 /* SCTLR */ - - mov pc, r2 + adr x0, vectors + mrs x1, CurrentEL + cmp x1, #0xC /* Check EL3 state */ + b.eq 1f + cmp x1, #0x8 /* Check EL2 state */ + b.eq 2f + cmp x1, #0x4 /* Check EL1 state */ + b.eq 3f + +1: + msr vbar_el3, x0 + mov x0, #1 /* Non-Secure EL0/1 */ + orr x0, x0, #(1 << 10) /* 64-bit EL2 */ + msr scr_el3, x0 + msr cptr_el3, xzr + b done + +2: + msr vbar_el2, x0 + mov x0, #0x33ff /* Enable FP/SIMD */ + msr cptr_el2, x0 + b done + + +3: + msr vbar_el1, x0 + mov x0, #(3 << 20) /* Enable FP/SIMD */ + msr cpacr_el1, x0 + b done + +done: + ret + ENDPROC(arm_cpu_lowlevel_init) diff --git a/arch/arm/cpu/mmu.c b/arch/arm/cpu/mmu.c index a31bce4..b171f80 100644 --- a/arch/arm/cpu/mmu.c +++ b/arch/arm/cpu/mmu.c @@ -32,54 +32,8 @@ #include "mmu.h" -static unsigned long *ttb; - -static void create_sections(unsigned long virt, unsigned long phys, int size_m, - unsigned int flags) -{ - int i; - - phys >>= 20; - virt >>= 20; - - for (i = size_m; i > 0; i--, virt++, phys++) - ttb[virt] = (phys << 20) | flags; - - __mmu_cache_flush(); -} - -/* - * Do it the simple way for now and invalidate the entire - * tlb - */ -static inline void tlb_invalidate(void) -{ - asm volatile ( - "mov r0, #0\n" - "mcr p15, 0, r0, c7, c10, 4; @ drain write buffer\n" - "mcr p15, 0, r0, c8, c6, 0; @ invalidate D TLBs\n" - "mcr p15, 0, r0, c8, c5, 0; @ invalidate I TLBs\n" - : - : - : "r0" - ); -} - -#define PTE_FLAGS_CACHED_V7 (PTE_EXT_TEX(1) | PTE_BUFFERABLE | PTE_CACHEABLE) -#define PTE_FLAGS_WC_V7 PTE_EXT_TEX(1) -#define PTE_FLAGS_UNCACHED_V7 (0) -#define PTE_FLAGS_CACHED_V4 (PTE_SMALL_AP_UNO_SRW | PTE_BUFFERABLE | PTE_CACHEABLE) -#define PTE_FLAGS_UNCACHED_V4 PTE_SMALL_AP_UNO_SRW - -/* - * PTE flags to set cached and uncached areas. - * This will be determined at runtime. - */ -static uint32_t pte_flags_cached; -static uint32_t pte_flags_wc; -static uint32_t pte_flags_uncached; - -#define PTE_MASK ((1 << 12) - 1) +static uint64_t *ttb; +static int free_idx; static void arm_mmu_not_initialized_error(void) { @@ -92,329 +46,175 @@ static void arm_mmu_not_initialized_error(void) panic("MMU not initialized\n"); } + /* - * Create a second level translation table for the given virtual address. - * We initially create a flat uncached mapping on it. - * Not yet exported, but may be later if someone finds use for it. + * Do it the simple way for now and invalidate the entire + * tlb */ -static u32 *arm_create_pte(unsigned long virt) +static inline void tlb_invalidate(void) { - u32 *table; - int i; + unsigned int el = current_el(); - table = memalign(0x400, 0x400); + dsb(); - if (!ttb) - arm_mmu_not_initialized_error(); - - ttb[virt >> 20] = (unsigned long)table | PMD_TYPE_TABLE; - - for (i = 0; i < 256; i++) { - table[i] = virt | PTE_TYPE_SMALL | pte_flags_uncached; - virt += PAGE_SIZE; - } + if (el == 1) + __asm__ __volatile__("tlbi alle1\n\t" : : : "memory"); + else if (el == 2) + __asm__ __volatile__("tlbi alle2\n\t" : : : "memory"); + else if (el == 3) + __asm__ __volatile__("tlbi alle3\n\t" : : : "memory"); - return table; + dsb(); + isb(); } -static u32 *find_pte(unsigned long adr) +static int level2shift(int level) { - u32 *table; - - if (!ttb) - arm_mmu_not_initialized_error(); - - if ((ttb[adr >> 20] & PMD_TYPE_MASK) != PMD_TYPE_TABLE) { - struct memory_bank *bank; - int i = 0; + /* Page is 12 bits wide, every level translates 9 bits */ + return (12 + 9 * (3 - level)); +} - /* - * This should only be called for page mapped memory inside our - * memory banks. It's a bug to call it with section mapped memory - * locations. - */ - pr_crit("%s: TTB for address 0x%08lx is not of type table\n", - __func__, adr); - pr_crit("Memory banks:\n"); - for_each_memory_bank(bank) - pr_crit("#%d 0x%08lx - 0x%08lx\n", i, bank->start, - bank->start + bank->size - 1); - BUG(); - } +static uint64_t level2mask(int level) +{ + uint64_t mask = -EINVAL; - /* find the coarse page table base address */ - table = (u32 *)(ttb[adr >> 20] & ~0x3ff); + if (level == 1) + mask = L1_ADDR_MASK; + else if (level == 2) + mask = L2_ADDR_MASK; + else if (level == 3) + mask = L3_ADDR_MASK; - /* find second level descriptor */ - return &table[(adr >> PAGE_SHIFT) & 0xff]; + return mask; } -static void dma_flush_range(unsigned long start, unsigned long end) +static int pte_type(uint64_t *pte) { - __dma_flush_range(start, end); - if (outer_cache.flush_range) - outer_cache.flush_range(start, end); + return *pte & PMD_TYPE_MASK; } -static void dma_inv_range(unsigned long start, unsigned long end) +static void set_table(uint64_t *pt, uint64_t *table_addr) { - if (outer_cache.inv_range) - outer_cache.inv_range(start, end); - __dma_inv_range(start, end); + uint64_t val; + + val = PMD_TYPE_TABLE | (uint64_t)table_addr; + *pt = val; } -static int __remap_range(void *_start, size_t size, u32 pte_flags) +static uint64_t *create_table(void) { - unsigned long start = (unsigned long)_start; - u32 *p; - int numentries, i; - - numentries = size >> PAGE_SHIFT; - p = find_pte(start); - - for (i = 0; i < numentries; i++) { - p[i] &= ~PTE_MASK; - p[i] |= pte_flags | PTE_TYPE_SMALL; - } + uint64_t *new_table = ttb + free_idx * GRANULE_SIZE; - dma_flush_range((unsigned long)p, - (unsigned long)p + numentries * sizeof(u32)); + /* Mark all entries as invalid */ + memset(new_table, 0, GRANULE_SIZE); - tlb_invalidate(); + free_idx++; - return 0; + return new_table; } -int arch_remap_range(void *start, size_t size, unsigned flags) +static uint64_t *get_level_table(uint64_t *pte) { - u32 pte_flags; - - switch (flags) { - case MAP_CACHED: - pte_flags = pte_flags_cached; - break; - case MAP_UNCACHED: - pte_flags = pte_flags_uncached; - break; - default: - return -EINVAL; + uint64_t *table = (uint64_t *)(*pte & XLAT_ADDR_MASK); + + if (pte_type(pte) != PMD_TYPE_TABLE) { + table = create_table(); + set_table(pte, table); } - return __remap_range(start, size, pte_flags); + return table; } -void *map_io_sections(unsigned long phys, void *_start, size_t size) +static uint64_t *find_pte(uint64_t addr) { - unsigned long start = (unsigned long)_start, sec; + uint64_t *pte; + uint64_t block_shift; + uint64_t idx; + int i; - phys >>= 20; - for (sec = start; sec < start + size; sec += (1 << 20)) - ttb[sec >> 20] = (phys++ << 20) | PMD_SECT_DEF_UNCACHED; + pte = ttb; - dma_flush_range((unsigned long)ttb, (unsigned long)ttb + 0x4000); - tlb_invalidate(); - return _start; + for (i = 1; i < 4; i++) { + block_shift = level2shift(i); + idx = (addr & level2mask(i)) >> block_shift; + pte += idx; + + if ((pte_type(pte) != PMD_TYPE_TABLE) || (block_shift <= GRANULE_SIZE_SHIFT)) + break; + else + pte = (uint64_t *)(*pte & XLAT_ADDR_MASK); + } + + return pte; } -/* - * remap the memory bank described by mem cachable and - * bufferable - */ -static int arm_mmu_remap_sdram(struct memory_bank *bank) +static void map_region(uint64_t virt, uint64_t phys, uint64_t size, uint64_t attr) { - unsigned long phys = (unsigned long)bank->start; - unsigned long ttb_start = phys >> 20; - unsigned long ttb_end = (phys >> 20) + (bank->size >> 20); - unsigned long num_ptes = bank->size >> 12; - int i, pte; - u32 *ptes; + uint64_t block_size; + uint64_t block_shift; + uint64_t *pte; + uint64_t idx; + uint64_t addr; + uint64_t *table; + int level; - pr_debug("remapping SDRAM from 0x%08lx (size 0x%08lx)\n", - phys, bank->size); + if (!ttb) + arm_mmu_not_initialized_error(); - /* - * We replace each 1MiB section in this range with second level page - * tables, therefore we must have 1Mib aligment here. - */ - if ((phys & (SZ_1M - 1)) || (bank->size & (SZ_1M - 1))) - return -EINVAL; + addr = virt; - ptes = xmemalign(PAGE_SIZE, num_ptes * sizeof(u32)); + attr &= ~(PMD_TYPE_SECT); - pr_debug("ptes: 0x%p ttb_start: 0x%08lx ttb_end: 0x%08lx\n", - ptes, ttb_start, ttb_end); + while (size) { + table = ttb; + for (level = 1; level < 4; level++) { + block_shift = level2shift(level); + idx = (addr & level2mask(level)) >> block_shift; + block_size = (1 << block_shift); - for (i = 0; i < num_ptes; i++) { - ptes[i] = (phys + i * PAGE_SIZE) | PTE_TYPE_SMALL | - pte_flags_cached; - } + pte = table + idx; - pte = 0; + if (level == 3) + attr |= PMD_TYPE_PAGE; + else + attr |= PMD_TYPE_SECT; - for (i = ttb_start; i < ttb_end; i++) { - ttb[i] = (unsigned long)(&ptes[pte]) | PMD_TYPE_TABLE | - (0 << 4); - pte += 256; - } + if (size >= block_size && IS_ALIGNED(addr, block_size)) { + *pte = phys | attr; + addr += block_size; + phys += block_size; + size -= block_size; + break; - dma_flush_range((unsigned long)ttb, (unsigned long)ttb + 0x4000); - dma_flush_range((unsigned long)ptes, - (unsigned long)ptes + num_ptes * sizeof(u32)); + } - tlb_invalidate(); + table = get_level_table(pte); + } - return 0; + } } -/* - * We have 8 exception vectors and the table consists of absolute - * jumps, so we need 8 * 4 bytes for the instructions and another - * 8 * 4 bytes for the addresses. - */ -#define ARM_VECTORS_SIZE (sizeof(u32) * 8 * 2) - -#define ARM_HIGH_VECTORS 0xffff0000 -#define ARM_LOW_VECTORS 0x0 -/** - * create_vector_table - create a vector table at given address - * @adr - The address where the vector table should be created - * - * After executing this function the vector table is found at the - * virtual address @adr. - */ -static void create_vector_table(unsigned long adr) +static void create_sections(uint64_t virt, uint64_t phys, uint64_t size_m, uint64_t flags) { - struct resource *vectors_sdram; - void *vectors; - u32 *exc; - int idx; - - vectors_sdram = request_sdram_region("vector table", adr, SZ_4K); - if (vectors_sdram) { - /* - * The vector table address is inside the SDRAM physical - * address space. Use the existing identity mapping for - * the vector table. - */ - pr_debug("Creating vector table, virt = phys = 0x%08lx\n", adr); - vectors = (void *)vectors_sdram->start; - } else { - /* - * The vector table address is outside of SDRAM. Create - * a secondary page table for the section and map - * allocated memory to the vector address. - */ - vectors = xmemalign(PAGE_SIZE, PAGE_SIZE); - pr_debug("Creating vector table, virt = 0x%p, phys = 0x%08lx\n", - vectors, adr); - exc = arm_create_pte(adr); - idx = (adr & (SZ_1M - 1)) >> PAGE_SHIFT; - exc[idx] = (u32)vectors | PTE_TYPE_SMALL | pte_flags_cached; - } - arm_fixup_vectors(); - - memset(vectors, 0, PAGE_SIZE); - memcpy(vectors, __exceptions_start, __exceptions_stop - __exceptions_start); + map_region(virt, phys, size_m, flags); } -/** - * set_vector_table - let CPU use the vector table at given address - * @adr - The address of the vector table - * - * Depending on the CPU the possibilities differ. ARMv7 and later allow - * to map the vector table to arbitrary addresses. Other CPUs only allow - * vectors at 0xffff0000 or at 0x0. - */ -static int set_vector_table(unsigned long adr) +void *map_io_sections(uint64_t phys, void *_start, size_t size) { - u32 cr; - - if (cpu_architecture() >= CPU_ARCH_ARMv7) { - pr_debug("Vectors are at 0x%08lx\n", adr); - set_vbar(adr); - return 0; - } - if (adr == ARM_HIGH_VECTORS) { - cr = get_cr(); - cr |= CR_V; - set_cr(cr); - cr = get_cr(); - if (cr & CR_V) { - pr_debug("Vectors are at 0x%08lx\n", adr); - return 0; - } else { - return -EINVAL; - } - } - - if (adr == ARM_LOW_VECTORS) { - cr = get_cr(); - cr &= ~CR_V; - set_cr(cr); - cr = get_cr(); - if (cr & CR_V) { - return -EINVAL; - } else { - pr_debug("Vectors are at 0x%08lx\n", adr); - return 0; - } - } + map_region((uint64_t)_start, phys, (uint64_t)size, PMD_SECT_DEF_UNCACHED); - return -EINVAL; + tlb_invalidate(); + return _start; } -static void create_zero_page(void) -{ - struct resource *zero_sdram; - u32 *zero; - - zero_sdram = request_sdram_region("zero page", 0x0, SZ_4K); - if (zero_sdram) { - /* - * Here we would need to set the second level page table - * entry to faulting. This is not yet implemented. - */ - pr_debug("zero page is in SDRAM area, currently not supported\n"); - } else { - zero = arm_create_pte(0x0); - zero[0] = 0; - pr_debug("Created zero page\n"); - } -} -/* - * Map vectors and zero page - */ -static void vectors_init(void) +int arch_remap_range(void *_start, size_t size, unsigned flags) { - /* - * First try to use the vectors where they actually are, works - * on ARMv7 and later. - */ - if (!set_vector_table((unsigned long)__exceptions_start)) { - arm_fixup_vectors(); - create_zero_page(); - return; - } - - /* - * Next try high vectors at 0xffff0000. - */ - if (!set_vector_table(ARM_HIGH_VECTORS)) { - create_zero_page(); - create_vector_table(ARM_HIGH_VECTORS); - return; - } + map_region((uint64_t)_start, (uint64_t)_start, (uint64_t)size, flags); - /* - * As a last resort use low vectors at 0x0. With this we can't - * set the zero page to faulting and can't catch NULL pointer - * exceptions. - */ - set_vector_table(ARM_LOW_VECTORS); - create_vector_table(ARM_LOW_VECTORS); + return 0; } /* @@ -423,7 +223,6 @@ static void vectors_init(void) static int mmu_init(void) { struct memory_bank *bank; - int i; if (list_empty(&memory_banks)) /* @@ -434,56 +233,31 @@ static int mmu_init(void) */ panic("MMU: No memory bank found! Cannot continue\n"); - arm_set_cache_functions(); - - if (cpu_architecture() >= CPU_ARCH_ARMv7) { - pte_flags_cached = PTE_FLAGS_CACHED_V7; - pte_flags_wc = PTE_FLAGS_WC_V7; - pte_flags_uncached = PTE_FLAGS_UNCACHED_V7; - } else { - pte_flags_cached = PTE_FLAGS_CACHED_V4; - pte_flags_wc = PTE_FLAGS_UNCACHED_V4; - pte_flags_uncached = PTE_FLAGS_UNCACHED_V4; - } - - if (get_cr() & CR_M) { - /* - * Early MMU code has already enabled the MMU. We assume a - * flat 1:1 section mapping in this case. - */ - asm volatile ("mrc p15,0,%0,c2,c0,0" : "=r"(ttb)); - - /* Clear unpredictable bits [13:0] */ - ttb = (unsigned long *)((unsigned long)ttb & ~0x3fff); - + if (get_sctlr() & CR_M) { + ttb = (uint64_t *)get_ttbr(1); if (!request_sdram_region("ttb", (unsigned long)ttb, SZ_16K)) /* - * This can mean that: - * - the early MMU code has put the ttb into a place - * which we don't have inside our available memory - * - Somebody else has occupied the ttb region which means - * the ttb will get corrupted. - */ + * This can mean that: + * - the early MMU code has put the ttb into a place + * which we don't have inside our available memory + * - Somebody else has occupied the ttb region which means + * the ttb will get corrupted. + */ pr_crit("Critical Error: Can't request SDRAM region for ttb at %p\n", - ttb); + ttb); } else { - ttb = memalign(0x10000, 0x4000); - } + ttb = memalign(0x1000, SZ_16K); + free_idx = 1; - pr_debug("ttb: 0x%p\n", ttb); + memset(ttb, 0, GRANULE_SIZE); - /* Set the ttb register */ - asm volatile ("mcr p15,0,%0,c2,c0,0" : : "r"(ttb) /*:*/); + set_ttbr_tcr_mair(current_el(), (uint64_t)ttb, TCR_FLAGS, MEMORY_ATTR); + } - /* Set the Domain Access Control Register */ - i = 0x3; - asm volatile ("mcr p15,0,%0,c3,c0,0" : : "r"(i) /*:*/); + pr_debug("ttb: 0x%p\n", ttb); /* create a flat mapping using 1MiB sections */ - create_sections(0, 0, PAGE_SIZE, PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | - PMD_TYPE_SECT); - - vectors_init(); + create_sections(0, 0, GRANULE_SIZE, PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | PMD_TYPE_SECT); /* * First remap sdram cached using sections. @@ -491,92 +265,70 @@ static int mmu_init(void) * below */ for_each_memory_bank(bank) - create_sections(bank->start, bank->start, bank->size >> 20, - PMD_SECT_DEF_CACHED); - - __mmu_cache_on(); - - /* - * Now that we have the MMU and caches on remap sdram again using - * page tables - */ - for_each_memory_bank(bank) - arm_mmu_remap_sdram(bank); + create_sections(bank->start, bank->start, bank->size, PMD_SECT_DEF_CACHED); return 0; } mmu_initcall(mmu_init); -void *dma_alloc_coherent(size_t size, dma_addr_t *dma_handle) +void mmu_enable(void) { - void *ret; - - size = PAGE_ALIGN(size); - ret = xmemalign(PAGE_SIZE, size); - if (dma_handle) - *dma_handle = (dma_addr_t)ret; - - dma_inv_range((unsigned long)ret, (unsigned long)ret + size); + if (!ttb) + arm_mmu_not_initialized_error(); - __remap_range(ret, size, pte_flags_uncached); + if (!(get_sctlr() & CR_M)) { - return ret; + isb(); + set_sctlr(get_sctlr() | CR_M | CR_C | CR_I); + } } -void *dma_alloc_writecombine(size_t size, dma_addr_t *dma_handle) +void mmu_disable(void) { - void *ret; + unsigned int sctlr; + + if (!ttb) + arm_mmu_not_initialized_error(); - size = PAGE_ALIGN(size); - ret = xmemalign(PAGE_SIZE, size); - if (dma_handle) - *dma_handle = (dma_addr_t)ret; + sctlr = get_sctlr(); + sctlr &= ~(CR_M | CR_C | CR_I); - dma_inv_range((unsigned long)ret, (unsigned long)ret + size); + tlb_invalidate(); - __remap_range(ret, size, pte_flags_wc); + dsb(); + isb(); - return ret; -} + set_sctlr(sctlr); -unsigned long virt_to_phys(volatile void *virt) -{ - return (unsigned long)virt; + dsb(); + isb(); } -void *phys_to_virt(unsigned long phys) +void mmu_early_enable(uint64_t membase, uint64_t memsize, uint64_t _ttb) { - return (void *)phys; -} + ttb = (uint64_t *)_ttb; -void dma_free_coherent(void *mem, dma_addr_t dma_handle, size_t size) -{ - size = PAGE_ALIGN(size); - __remap_range(mem, size, pte_flags_cached); + memset(ttb, 0, GRANULE_SIZE); + free_idx = 1; + + set_ttbr_tcr_mair(current_el(), (uint64_t)ttb, TCR_FLAGS, MEMORY_ATTR); - free(mem); + create_sections(0, 0, 4096, PMD_SECT_AP_WRITE | + PMD_SECT_AP_READ | PMD_TYPE_SECT); + + create_sections(membase, membase, memsize, PMD_SECT_AP_WRITE | + PMD_SECT_AP_READ | PMD_TYPE_SECT | PMD_SECT_WB); + + isb(); + set_sctlr(get_sctlr() | CR_M); } -void dma_sync_single_for_cpu(unsigned long address, size_t size, - enum dma_data_direction dir) +unsigned long virt_to_phys(volatile void *virt) { - if (dir != DMA_TO_DEVICE) { - if (outer_cache.inv_range) - outer_cache.inv_range(address, address + size); - __dma_inv_range(address, address + size); - } + return (unsigned long)virt; } -void dma_sync_single_for_device(unsigned long address, size_t size, - enum dma_data_direction dir) +void *phys_to_virt(unsigned long phys) { - if (dir == DMA_FROM_DEVICE) { - __dma_inv_range(address, address + size); - if (outer_cache.inv_range) - outer_cache.inv_range(address, address + size); - } else { - __dma_clean_range(address, address + size); - if (outer_cache.clean_range) - outer_cache.clean_range(address, address + size); - } + return (void *)phys; } diff --git a/arch/arm/cpu/mmu.h b/arch/arm/cpu/mmu.h index 79ebc80..a20adec 100644 --- a/arch/arm/cpu/mmu.h +++ b/arch/arm/cpu/mmu.h @@ -1,6 +1,159 @@ #ifndef __ARM_MMU_H #define __ARM_MMU_H +#define UL(x) _AC(x, UL) + +#define UNUSED_DESC 0x6EbAAD0BBADbA6E0 + +#define VA_START 0x0 +#define BITS_PER_VA 33 + +/* Granule size of 4KB is being used */ +#define GRANULE_SIZE_SHIFT 12 +#define GRANULE_SIZE (1 << GRANULE_SIZE_SHIFT) +#define XLAT_ADDR_MASK ((1UL << BITS_PER_VA) - GRANULE_SIZE) +#define GRANULE_SIZE_MASK ((1 << GRANULE_SIZE_SHIFT) - 1) + +#define BITS_RESOLVED_PER_LVL (GRANULE_SIZE_SHIFT - 3) +#define L1_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 2) +#define L2_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 1) +#define L3_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 0) + + +#define L1_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L1_ADDR_SHIFT) +#define L2_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L2_ADDR_SHIFT) +#define L3_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L3_ADDR_SHIFT) + +/* These macros give the size of the region addressed by each entry of a xlat + table at any given level */ +#define L3_XLAT_SIZE (1UL << L3_ADDR_SHIFT) +#define L2_XLAT_SIZE (1UL << L2_ADDR_SHIFT) +#define L1_XLAT_SIZE (1UL << L1_ADDR_SHIFT) + +#define GRANULE_MASK GRANULE_SIZE + +/* + * Memory types + */ +#define MT_DEVICE_NGNRNE 0 +#define MT_DEVICE_NGNRE 1 +#define MT_DEVICE_GRE 2 +#define MT_NORMAL_NC 3 +#define MT_NORMAL 4 + +#define MEMORY_ATTRIBUTES ((0x00 << (MT_DEVICE_NGNRNE*8)) | \ + (0x04 << (MT_DEVICE_NGNRE*8)) | \ + (0x0c << (MT_DEVICE_GRE*8)) | \ + (0x44 << (MT_NORMAL_NC*8)) | \ + (UL(0xff) << (MT_NORMAL*8))) + +/* + * Hardware page table definitions. + * + * Level 2 descriptor (PMD). + */ +#define PMD_TYPE_MASK (3 << 0) +#define PMD_TYPE_FAULT (0 << 0) +#define PMD_TYPE_TABLE (3 << 0) +#define PMD_TYPE_PAGE (3 << 0) +#define PMD_TYPE_SECT (1 << 0) + +/* + * Section + */ +#define PMD_SECT_NON_SHARE (0 << 8) +#define PMD_SECT_OUTER_SHARE (2 << 8) +#define PMD_SECT_INNER_SHARE (3 << 8) +#define PMD_SECT_AF (1 << 10) +#define PMD_SECT_NG (1 << 11) +#define PMD_SECT_PXN (UL(1) << 53) +#define PMD_SECT_UXN (UL(1) << 54) + +/* + * AttrIndx[2:0] + */ +#define PMD_ATTRINDX(t) ((t) << 2) +#define PMD_ATTRINDX_MASK (7 << 2) + +/* + * TCR flags. + */ +#define TCR_T0SZ(x) ((64 - (x)) << 0) +#define TCR_IRGN_NC (0 << 8) +#define TCR_IRGN_WBWA (1 << 8) +#define TCR_IRGN_WT (2 << 8) +#define TCR_IRGN_WBNWA (3 << 8) +#define TCR_IRGN_MASK (3 << 8) +#define TCR_ORGN_NC (0 << 10) +#define TCR_ORGN_WBWA (1 << 10) +#define TCR_ORGN_WT (2 << 10) +#define TCR_ORGN_WBNWA (3 << 10) +#define TCR_ORGN_MASK (3 << 10) +#define TCR_SHARED_NON (0 << 12) +#define TCR_SHARED_OUTER (2 << 12) +#define TCR_SHARED_INNER (3 << 12) +#define TCR_TG0_4K (0 << 14) +#define TCR_TG0_64K (1 << 14) +#define TCR_TG0_16K (2 << 14) +#define TCR_EL1_IPS_BITS (UL(3) << 32) /* 42 bits physical address */ +#define TCR_EL2_IPS_BITS (3 << 16) /* 42 bits physical address */ +#define TCR_EL3_IPS_BITS (3 << 16) /* 42 bits physical address */ + +#define TCR_EL1_RSVD (1 << 31) +#define TCR_EL2_RSVD (1 << 31 | 1 << 23) +#define TCR_EL3_RSVD (1 << 31 | 1 << 23) + +#define TCR_FLAGS (TCR_TG0_4K | \ + TCR_SHARED_OUTER | \ + TCR_SHARED_INNER | \ + TCR_IRGN_WBWA | \ + TCR_ORGN_WBWA | \ + TCR_T0SZ(BITS_PER_VA)) + +#define MEMORY_ATTR (PMD_SECT_AF | PMD_SECT_INNER_SHARE | \ + PMD_ATTRINDX(MT_NORMAL) | \ + PMD_TYPE_SECT) + +#ifndef __ASSEMBLY__ + +static inline void set_ttbr_tcr_mair(int el, uint64_t table, uint64_t tcr, uint64_t attr) +{ + asm volatile("dsb sy"); + if (el == 1) { + asm volatile("msr ttbr0_el1, %0" : : "r" (table) : "memory"); + asm volatile("msr tcr_el1, %0" : : "r" (tcr) : "memory"); + asm volatile("msr mair_el1, %0" : : "r" (attr) : "memory"); + } else if (el == 2) { + asm volatile("msr ttbr0_el2, %0" : : "r" (table) : "memory"); + asm volatile("msr tcr_el2, %0" : : "r" (tcr) : "memory"); + asm volatile("msr mair_el2, %0" : : "r" (attr) : "memory"); + } else if (el == 3) { + asm volatile("msr ttbr0_el3, %0" : : "r" (table) : "memory"); + asm volatile("msr tcr_el3, %0" : : "r" (tcr) : "memory"); + asm volatile("msr mair_el3, %0" : : "r" (attr) : "memory"); + } else { + hang(); + } + asm volatile("isb"); +} + +static inline uint64_t get_ttbr(int el) +{ + uint64_t val; + if (el == 1) { + asm volatile("mrs %0, ttbr0_el1" : "=r" (val)); + } else if (el == 2) { + asm volatile("mrs %0, ttbr0_el2" : "=r" (val)); + } else if (el == 3) { + asm volatile("mrs %0, ttbr0_el3" : "=r" (val)); + } else { + hang(); + } + + return val; +} +#endif + #ifdef CONFIG_MMU void __mmu_cache_on(void); void __mmu_cache_off(void); @@ -11,4 +164,6 @@ static inline void __mmu_cache_off(void) {} static inline void __mmu_cache_flush(void) {} #endif +void mmu_early_enable(uint64_t membase, uint64_t memsize, uint64_t _ttb); + #endif /* __ARM_MMU_H */ diff --git a/arch/arm/cpu/start.c b/arch/arm/cpu/start.c index e037d91..1d017bc 100644 --- a/arch/arm/cpu/start.c +++ b/arch/arm/cpu/start.c @@ -31,7 +31,7 @@ #include <malloc.h> #include <debug_ll.h> -#include "mmu-early.h" +#include "mmu.h" unsigned long arm_stack_top; static unsigned long arm_head_bottom; @@ -151,7 +151,7 @@ __noreturn void barebox_non_pbl_start(unsigned long membase, relocate_to_adr(barebox_base); } - setup_c(); +// setup_c(); barrier(); @@ -170,7 +170,7 @@ __noreturn void barebox_non_pbl_start(unsigned long membase, } else { pr_debug("enabling MMU, ttb @ 0x%08lx\n", ttb); arm_early_mmu_cache_invalidate(); - mmu_early_enable(membase, memsize, ttb); + mmu_early_enable((uint64_t)membase, (uint64_t)memsize, (uint64_t)ttb); } } @@ -193,7 +193,7 @@ __noreturn void barebox_non_pbl_start(unsigned long membase, if (totalsize) { unsigned long mem = arm_mem_boarddata(membase, endmem, totalsize); - pr_debug("found %s in boarddata, copying to 0x%08lx\n", + pr_debug("found %s in boarddata, copying to 0x%lu\n", name, mem); barebox_boarddata = memcpy((void *)mem, boarddata, totalsize); @@ -229,7 +229,7 @@ __noreturn void barebox_non_pbl_start(unsigned long membase, #ifndef CONFIG_PBL_IMAGE -void __naked __section(.text_entry) start(void) +void __section(.text_entry) start(void) { barebox_arm_head(); } @@ -239,7 +239,7 @@ void __naked __section(.text_entry) start(void) * First function in the uncompressed image. We get here from * the pbl. The stack already has been set up by the pbl. */ -void __naked __section(.text_entry) start(unsigned long membase, +void __section(.text_entry) start(unsigned long membase, unsigned long memsize, void *boarddata) { barebox_non_pbl_start(membase, memsize, boarddata); diff --git a/arch/arm/cpu/uncompress.c b/arch/arm/cpu/uncompress.c index b8e2e9f..5bcce6b 100644 --- a/arch/arm/cpu/uncompress.c +++ b/arch/arm/cpu/uncompress.c @@ -60,7 +60,7 @@ void __noreturn barebox_multi_pbl_start(unsigned long membase, * to the current address. Otherwise it may be a readonly location. * Copy and relocate to the start of the memory in this case. */ - if (pc > membase && pc - membase < memsize) + if (pc > membase && pc < membase + memsize) relocate_to_current_adr(); else relocate_to_adr(membase); diff --git a/arch/arm/include/asm/barebox-arm.h b/arch/arm/include/asm/barebox-arm.h index 8e7b45c..6713326 100644 --- a/arch/arm/include/asm/barebox-arm.h +++ b/arch/arm/include/asm/barebox-arm.h @@ -97,7 +97,7 @@ void *barebox_arm_boot_dtb(void); static inline unsigned long arm_mem_stack(unsigned long membase, unsigned long endmem) { - return endmem - SZ_64K - STACK_SIZE; + return endmem - STACK_SIZE; } static inline unsigned long arm_mem_ttb(unsigned long membase, diff --git a/arch/arm/include/asm/bitops.h b/arch/arm/include/asm/bitops.h index 138ebe2..ac85a0a 100644 --- a/arch/arm/include/asm/bitops.h +++ b/arch/arm/include/asm/bitops.h @@ -1,184 +1,48 @@ /* - * Copyright 1995, Russell King. - * Various bits and pieces copyrights include: - * Linus Torvalds (test_bit). + * Copyright (C) 2012 ARM Ltd. * - * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. * - * Please note that the code in this file should never be included - * from user space. Many of these are not implemented in assembler - * since they would be too costly. Also, they require priviledged - * instructions (which are not available from user mode) to ensure - * that they are atomic. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#ifndef __ASM_BITOPS_H +#define __ASM_BITOPS_H -#ifndef __ASM_ARM_BITOPS_H -#define __ASM_ARM_BITOPS_H +#include <linux/compiler.h> #ifndef _LINUX_BITOPS_H #error only <linux/bitops.h> can be included directly #endif /* - * Functions equivalent of ops.h - */ -static inline void __set_bit(int nr, volatile void *addr) -{ - ((unsigned char *) addr)[nr >> 3] |= (1U << (nr & 7)); -} - -static inline void __clear_bit(int nr, volatile void *addr) -{ - ((unsigned char *) addr)[nr >> 3] &= ~(1U << (nr & 7)); -} - -static inline void __change_bit(int nr, volatile void *addr) -{ - ((unsigned char *) addr)[nr >> 3] ^= (1U << (nr & 7)); -} - -static inline int __test_and_set_bit(int nr, volatile void *addr) -{ - unsigned int mask = 1 << (nr & 7); - unsigned int oldval; - - oldval = ((unsigned char *) addr)[nr >> 3]; - ((unsigned char *) addr)[nr >> 3] = oldval | mask; - return oldval & mask; -} - -static inline int __test_and_clear_bit(int nr, volatile void *addr) -{ - unsigned int mask = 1 << (nr & 7); - unsigned int oldval; - - oldval = ((unsigned char *) addr)[nr >> 3]; - ((unsigned char *) addr)[nr >> 3] = oldval & ~mask; - return oldval & mask; -} - -static inline int __test_and_change_bit(int nr, volatile void *addr) -{ - unsigned int mask = 1 << (nr & 7); - unsigned int oldval; - - oldval = ((unsigned char *) addr)[nr >> 3]; - ((unsigned char *) addr)[nr >> 3] = oldval ^ mask; - return oldval & mask; -} - -/* - * This routine doesn't need to be atomic. - */ -static inline int test_bit(int nr, const void * addr) -{ - return ((unsigned char *) addr)[nr >> 3] & (1U << (nr & 7)); -} - -#define set_bit(x, y) __set_bit(x, y) -#define clear_bit(x, y) __clear_bit(x, y) -#define change_bit(x, y) __change_bit(x, y) -#define test_and_set_bit(x, y) __test_and_set_bit(x, y) -#define test_and_clear_bit(x, y) __test_and_clear_bit(x, y) -#define test_and_change_bit(x, y) __test_and_change_bit(x, y) - -#ifndef __ARMEB__ -/* - * These are the little endian definitions. + * Little endian assembly atomic bitops. */ -extern int _find_first_zero_bit_le(const void *p, unsigned size); -extern int _find_next_zero_bit_le(const void *p, int size, int offset); -extern int _find_first_bit_le(const unsigned long *p, unsigned size); -extern int _find_next_bit_le(const unsigned long *p, int size, int offset); -#define find_first_zero_bit(p, sz) _find_first_zero_bit_le(p, sz) -#define find_next_zero_bit(p, sz, off) _find_next_zero_bit_le(p, sz, off) -#define find_first_bit(p, sz) _find_first_bit_le(p, sz) -#define find_next_bit(p, sz, off) _find_next_bit_le(p, sz, off) +extern void set_bit(int nr, volatile unsigned long *p); +extern void clear_bit(int nr, volatile unsigned long *p); +extern void change_bit(int nr, volatile unsigned long *p); +extern int test_and_set_bit(int nr, volatile unsigned long *p); +extern int test_and_clear_bit(int nr, volatile unsigned long *p); +extern int test_and_change_bit(int nr, volatile unsigned long *p); -#define WORD_BITOFF_TO_LE(x) ((x)) - -#else /* ! __ARMEB__ */ - -/* - * These are the big endian definitions. - */ -extern int _find_first_zero_bit_be(const void *p, unsigned size); -extern int _find_next_zero_bit_be(const void *p, int size, int offset); -extern int _find_first_bit_be(const unsigned long *p, unsigned size); -extern int _find_next_bit_be(const unsigned long *p, int size, int offset); -#define find_first_zero_bit(p, sz) _find_first_zero_bit_be(p, sz) -#define find_next_zero_bit(p, sz, off) _find_next_zero_bit_be(p, sz, off) -#define find_first_bit(p, sz) _find_first_bit_be(p, sz) -#define find_next_bit(p, sz, off) _find_next_bit_be(p, sz, off) - -#define WORD_BITOFF_TO_LE(x) ((x) ^ 0x18) - -#endif /* __ARMEB__ */ - -#if defined(__LINUX_ARM_ARCH__) && (__LINUX_ARM_ARCH__ >= 5) -static inline int constant_fls(int x) -{ - int r = 32; - - if (!x) - return 0; - if (!(x & 0xffff0000u)) { - x <<= 16; - r -= 16; - } - if (!(x & 0xff000000u)) { - x <<= 8; - r -= 8; - } - if (!(x & 0xf0000000u)) { - x <<= 4; - r -= 4; - } - if (!(x & 0xc0000000u)) { - x <<= 2; - r -= 2; - } - if (!(x & 0x80000000u)) { - x <<= 1; - r -= 1; - } - return r; -} - -/* - * On ARMv5 and above those functions can be implemented around - * the clz instruction for much better code efficiency. - */ -#define fls(x) \ - (__builtin_constant_p(x) ? constant_fls(x) : \ - ({ int __r; asm("clz\t%0, %1" : "=r"(__r) : "r"(x) : "cc"); 32-__r; })) -#define ffs(x) ({ unsigned long __t = (x); fls(__t &-__t); }) -#define __ffs(x) (ffs(x) - 1) -#define ffz(x) __ffs(~(x)) -#else /* ! __ARM__USE_GENERIC_FF */ -/* - * ffz = Find First Zero in word. Undefined if no zero exists, - * so code should check against ~0UL first.. - */ -static inline unsigned long ffz(unsigned long word) -{ - int k; - - word = ~word; - k = 31; - if (word & 0x0000ffff) { k -= 16; word <<= 16; } - if (word & 0x00ff0000) { k -= 8; word <<= 8; } - if (word & 0x0f000000) { k -= 4; word <<= 4; } - if (word & 0x30000000) { k -= 2; word <<= 2; } - if (word & 0x40000000) { k -= 1; } - return k; -} #include <asm-generic/bitops/__ffs.h> #include <asm-generic/bitops/ffs.h> +#include <asm-generic/bitops/__fls.h> #include <asm-generic/bitops/fls.h> -#endif /* __ARM__USE_GENERIC_FF */ + +#include <asm-generic/bitops/ffz.h> #include <asm-generic/bitops/fls64.h> +#include <asm-generic/bitops/find.h> #include <asm-generic/bitops/hweight.h> -#endif /* _ARM_BITOPS_H */ +#include <asm-generic/bitops/ops.h> + +#endif /* __ASM_BITOPS_H */ diff --git a/arch/arm/include/asm/boarddata.h b/arch/arm/include/asm/boarddata.h new file mode 100644 index 0000000..8c3c5f0 --- /dev/null +++ b/arch/arm/include/asm/boarddata.h @@ -0,0 +1,5 @@ +#ifndef __ASM_BOARDDATA_H +#define __ASM_BOARDDATA_H + + +#endif /* __ASM_BOARDDATA_H */ diff --git a/arch/arm/include/asm/cache-l2x0.h b/arch/arm/include/asm/cache-l2x0.h index 9bb245b..963dd99 100644 --- a/arch/arm/include/asm/cache-l2x0.h +++ b/arch/arm/include/asm/cache-l2x0.h @@ -56,14 +56,6 @@ #define L2X0_LINE_TAG 0xF30 #define L2X0_DEBUG_CTRL 0xF40 #define L2X0_PREFETCH_CTRL 0xF60 -#define L2X0_DOUBLE_LINEFILL_EN (1 << 30) -#define L2X0_INSTRUCTION_PREFETCH_EN (1 << 29) -#define L2X0_DATA_PREFETCH_EN (1 << 28) -#define L2X0_DOUBLE_LINEFILL_ON_WRAP_READ_DIS (1 << 27) -#define L2X0_PREFETCH_DROP_EN (1 << 24) -#define L2X0_INCR_DOUBLE_LINEFILL_EN (1 << 23) -#define L2X0_ESCLUSIVE_SEQUENCE_EN (1 << 21) - #define L2X0_POWER_CTRL 0xF80 #define L2X0_DYNAMIC_CLK_GATING_EN (1 << 1) #define L2X0_STNDBY_MODE_EN (1 << 0) diff --git a/arch/arm/include/asm/cache.h b/arch/arm/include/asm/cache.h index 2f6eab0..5a524f3 100644 --- a/arch/arm/include/asm/cache.h +++ b/arch/arm/include/asm/cache.h @@ -1,9 +1,11 @@ #ifndef __ASM_CACHE_H #define __ASM_CACHE_H +extern void v8_invalidate_icache_all(void); + static inline void flush_icache(void) { - asm volatile("mcr p15, 0, %0, c7, c5, 0" : : "r" (0)); + v8_invalidate_icache_all(); } int arm_set_cache_functions(void); diff --git a/arch/arm/include/asm/errata.h b/arch/arm/include/asm/errata.h index 98137b5..9525823 100644 --- a/arch/arm/include/asm/errata.h +++ b/arch/arm/include/asm/errata.h @@ -77,12 +77,3 @@ static inline void enable_arm_errata_794072_war(void) "mcr p15, 0, r0, c15, c0, 1\n" ); } - -static inline void enable_arm_errata_845369_war(void) -{ - __asm__ __volatile__ ( - "mrc p15, 0, r0, c15, c0, 1\n" - "orr r0, r0, #1 << 22\n" - "mcr p15, 0, r0, c15, c0, 1\n" - ); -} diff --git a/arch/arm/include/asm/gic.h b/arch/arm/include/asm/gic.h new file mode 100644 index 0000000..c7c17e3 --- /dev/null +++ b/arch/arm/include/asm/gic.h @@ -0,0 +1,128 @@ +#ifndef __GIC_H__ +#define __GIC_H__ + +/* Generic Interrupt Controller Definitions */ +//#ifdef CONFIG_GICV3 +//#define GICD_BASE (0x2f000000) +//#define GICR_BASE (0x2f100000) +//#else + +//#if defined(CONFIG_TARGET_VEXPRESS64_BASE_FVP) || \ + defined(CONFIG_TARGET_VEXPRESS64_BASE_FVP_DRAM) +//#define GICD_BASE (0x2f000000) +//#define GICC_BASE (0x2c000000) +//#elif CONFIG_TARGET_VEXPRESS64_JUNO +#define GIC_DIST_BASE (0x2C010000) +#define GIC_CPU_BASE (0x2C02f000) +//#else +//#error "Unknown board variant" +//#endif +//#endif /* !CONFIG_GICV3 */ + +/* Register offsets for the ARM generic interrupt controller (GIC) */ + +#define GIC_DIST_OFFSET 0x1000 +#define GIC_CPU_OFFSET_A9 0x0100 +#define GIC_CPU_OFFSET_A15 0x2000 + +/* Distributor Registers */ +#define GICD_CTLR 0x0000 +#define GICD_TYPER 0x0004 +#define GICD_IIDR 0x0008 +#define GICD_STATUSR 0x0010 +#define GICD_SETSPI_NSR 0x0040 +#define GICD_CLRSPI_NSR 0x0048 +#define GICD_SETSPI_SR 0x0050 +#define GICD_CLRSPI_SR 0x0058 +#define GICD_SEIR 0x0068 +#define GICD_IGROUPRn 0x0080 +#define GICD_ISENABLERn 0x0100 +#define GICD_ICENABLERn 0x0180 +#define GICD_ISPENDRn 0x0200 +#define GICD_ICPENDRn 0x0280 +#define GICD_ISACTIVERn 0x0300 +#define GICD_ICACTIVERn 0x0380 +#define GICD_IPRIORITYRn 0x0400 +#define GICD_ITARGETSRn 0x0800 +#define GICD_ICFGR 0x0c00 +#define GICD_IGROUPMODRn 0x0d00 +#define GICD_NSACRn 0x0e00 +#define GICD_SGIR 0x0f00 +#define GICD_CPENDSGIRn 0x0f10 +#define GICD_SPENDSGIRn 0x0f20 +#define GICD_IROUTERn 0x6000 + +/* Cpu Interface Memory Mapped Registers */ +#define GICC_CTLR 0x0000 +#define GICC_PMR 0x0004 +#define GICC_BPR 0x0008 +#define GICC_IAR 0x000C +#define GICC_EOIR 0x0010 +#define GICC_RPR 0x0014 +#define GICC_HPPIR 0x0018 +#define GICC_ABPR 0x001c +#define GICC_AIAR 0x0020 +#define GICC_AEOIR 0x0024 +#define GICC_AHPPIR 0x0028 +#define GICC_APRn 0x00d0 +#define GICC_NSAPRn 0x00e0 +#define GICC_IIDR 0x00fc +#define GICC_DIR 0x1000 + +/* ReDistributor Registers for Control and Physical LPIs */ +#define GICR_CTLR 0x0000 +#define GICR_IIDR 0x0004 +#define GICR_TYPER 0x0008 +#define GICR_STATUSR 0x0010 +#define GICR_WAKER 0x0014 +#define GICR_SETLPIR 0x0040 +#define GICR_CLRLPIR 0x0048 +#define GICR_SEIR 0x0068 +#define GICR_PROPBASER 0x0070 +#define GICR_PENDBASER 0x0078 +#define GICR_INVLPIR 0x00a0 +#define GICR_INVALLR 0x00b0 +#define GICR_SYNCR 0x00c0 +#define GICR_MOVLPIR 0x0100 +#define GICR_MOVALLR 0x0110 + +/* ReDistributor Registers for SGIs and PPIs */ +#define GICR_IGROUPRn 0x0080 +#define GICR_ISENABLERn 0x0100 +#define GICR_ICENABLERn 0x0180 +#define GICR_ISPENDRn 0x0200 +#define GICR_ICPENDRn 0x0280 +#define GICR_ISACTIVERn 0x0300 +#define GICR_ICACTIVERn 0x0380 +#define GICR_IPRIORITYRn 0x0400 +#define GICR_ICFGR0 0x0c00 +#define GICR_ICFGR1 0x0c04 +#define GICR_IGROUPMODRn 0x0d00 +#define GICR_NSACRn 0x0e00 + +/* Cpu Interface System Registers */ +#define ICC_IAR0_EL1 S3_0_C12_C8_0 +#define ICC_IAR1_EL1 S3_0_C12_C12_0 +#define ICC_EOIR0_EL1 S3_0_C12_C8_1 +#define ICC_EOIR1_EL1 S3_0_C12_C12_1 +#define ICC_HPPIR0_EL1 S3_0_C12_C8_2 +#define ICC_HPPIR1_EL1 S3_0_C12_C12_2 +#define ICC_BPR0_EL1 S3_0_C12_C8_3 +#define ICC_BPR1_EL1 S3_0_C12_C12_3 +#define ICC_DIR_EL1 S3_0_C12_C11_1 +#define ICC_PMR_EL1 S3_0_C4_C6_0 +#define ICC_RPR_EL1 S3_0_C12_C11_3 +#define ICC_CTLR_EL1 S3_0_C12_C12_4 +#define ICC_CTLR_EL3 S3_6_C12_C12_4 +#define ICC_SRE_EL1 S3_0_C12_C12_5 +#define ICC_SRE_EL2 S3_4_C12_C9_5 +#define ICC_SRE_EL3 S3_6_C12_C12_5 +#define ICC_IGRPEN0_EL1 S3_0_C12_C12_6 +#define ICC_IGRPEN1_EL1 S3_0_C12_C12_7 +#define ICC_IGRPEN1_EL3 S3_6_C12_C12_7 +#define ICC_SEIEN_EL1 S3_0_C12_C13_0 +#define ICC_SGI0R_EL1 S3_0_C12_C11_7 +#define ICC_SGI1R_EL1 S3_0_C12_C11_5 +#define ICC_ASGI1R_EL1 S3_0_C12_C11_6 + +#endif /* __GIC_H__ */ diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h index 8de6544..8a1d80a 100644 --- a/arch/arm/include/asm/mmu.h +++ b/arch/arm/include/asm/mmu.h @@ -13,9 +13,7 @@ struct arm_memory; -static inline void mmu_enable(void) -{ -} +void mmu_enable(void); void mmu_disable(void); static inline void arm_create_section(unsigned long virt, unsigned long phys, int size_m, unsigned int flags) @@ -30,7 +28,7 @@ static inline void setup_dma_coherent(unsigned long offset) #define ARCH_HAS_REMAP #define MAP_ARCH_DEFAULT MAP_CACHED int arch_remap_range(void *_start, size_t size, unsigned flags); -void *map_io_sections(unsigned long physaddr, void *start, size_t size); +void *map_io_sections(uint64_t phys, void *_start, size_t size); #else #define MAP_ARCH_DEFAULT MAP_UNCACHED static inline void *map_io_sections(unsigned long phys, void *start, size_t size) diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index fd1521d..e4a3c53 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -18,8 +18,9 @@ */ #define PMD_TYPE_MASK (3 << 0) #define PMD_TYPE_FAULT (0 << 0) -#define PMD_TYPE_TABLE (1 << 0) -#define PMD_TYPE_SECT (2 << 0) +#define PMD_TYPE_TABLE (3 << 0) +#define PMD_TYPE_PAGE (3 << 0) +#define PMD_TYPE_SECT (1 << 0) #define PMD_BIT4 (1 << 4) #define PMD_DOMAIN(x) ((x) << 5) #define PMD_PROTECTION (1 << 9) /* v5 */ diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index 022d365..450b63a 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -20,124 +20,15 @@ /* options set using PTRACE_SETOPTIONS */ #define PTRACE_O_TRACESYSGOOD 0x00000001 -/* - * PSR bits - */ -#define USR26_MODE 0x00000000 -#define FIQ26_MODE 0x00000001 -#define IRQ26_MODE 0x00000002 -#define SVC26_MODE 0x00000003 -#define USR_MODE 0x00000010 -#define FIQ_MODE 0x00000011 -#define IRQ_MODE 0x00000012 -#define SVC_MODE 0x00000013 -#define ABT_MODE 0x00000017 -#define UND_MODE 0x0000001b -#define SYSTEM_MODE 0x0000001f -#define MODE32_BIT 0x00000010 -#define MODE_MASK 0x0000001f -#define PSR_T_BIT 0x00000020 -#define PSR_F_BIT 0x00000040 -#define PSR_I_BIT 0x00000080 -#define PSR_A_BIT 0x00000100 -#define PSR_E_BIT 0x00000200 -#define PSR_J_BIT 0x01000000 -#define PSR_Q_BIT 0x08000000 -#define PSR_V_BIT 0x10000000 -#define PSR_C_BIT 0x20000000 -#define PSR_Z_BIT 0x40000000 -#define PSR_N_BIT 0x80000000 -#define PCMASK 0 - #ifndef __ASSEMBLY__ /* this struct defines the way the registers are stored on the stack during a system call. */ struct pt_regs { - long uregs[18]; + long uregs[31]; }; -#define ARM_cpsr uregs[16] -#define ARM_pc uregs[15] -#define ARM_lr uregs[14] -#define ARM_sp uregs[13] -#define ARM_ip uregs[12] -#define ARM_fp uregs[11] -#define ARM_r10 uregs[10] -#define ARM_r9 uregs[9] -#define ARM_r8 uregs[8] -#define ARM_r7 uregs[7] -#define ARM_r6 uregs[6] -#define ARM_r5 uregs[5] -#define ARM_r4 uregs[4] -#define ARM_r3 uregs[3] -#define ARM_r2 uregs[2] -#define ARM_r1 uregs[1] -#define ARM_r0 uregs[0] -#define ARM_ORIG_r0 uregs[17] - -#ifdef __KERNEL__ - -#define user_mode(regs) \ - (((regs)->ARM_cpsr & 0xf) == 0) - -#ifdef CONFIG_ARM_THUMB -#define thumb_mode(regs) \ - (((regs)->ARM_cpsr & PSR_T_BIT)) -#else -#define thumb_mode(regs) (0) -#endif - -#define processor_mode(regs) \ - ((regs)->ARM_cpsr & MODE_MASK) - -#define interrupts_enabled(regs) \ - (!((regs)->ARM_cpsr & PSR_I_BIT)) - -#define fast_interrupts_enabled(regs) \ - (!((regs)->ARM_cpsr & PSR_F_BIT)) - -#define condition_codes(regs) \ - ((regs)->ARM_cpsr & (PSR_V_BIT | PSR_C_BIT | PSR_Z_BIT | PSR_N_BIT)) - -/* Are the current registers suitable for user mode? - * (used to maintain security in signal handlers) - */ -static inline int valid_user_regs(struct pt_regs *regs) -{ - if ((regs->ARM_cpsr & 0xf) == 0 && - (regs->ARM_cpsr & (PSR_F_BIT | PSR_I_BIT)) == 0) - return 1; - - /* - * Force CPSR to something logical... - */ - regs->ARM_cpsr &= (PSR_V_BIT | PSR_C_BIT | PSR_Z_BIT | PSR_N_BIT | - 0x10); - - return 0; -} - -#endif /* __KERNEL__ */ - #endif /* __ASSEMBLY__ */ -#ifndef __ASSEMBLY__ -#define pc_pointer(v) \ - ((v) & ~PCMASK) - -#define instruction_pointer(regs) \ - (pc_pointer((regs)->ARM_pc)) - -#ifdef __KERNEL__ -extern void show_regs(struct pt_regs *); - -#define predicate(x) (x & 0xf0000000) -#define PREDICATE_ALWAYS 0xe0000000 - -#endif - -#endif /* __ASSEMBLY__ */ - #endif diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index b118a42..04a79c4 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -1,96 +1,125 @@ #ifndef __ASM_ARM_SYSTEM_H #define __ASM_ARM_SYSTEM_H -#if __LINUX_ARM_ARCH__ >= 7 #define isb() __asm__ __volatile__ ("isb" : : : "memory") -#define dsb() __asm__ __volatile__ ("dsb" : : : "memory") +#define dsb() __asm__ __volatile__ ("dsb sy" : : : "memory") #define dmb() __asm__ __volatile__ ("dmb" : : : "memory") -#elif defined(CONFIG_CPU_XSC3) || __LINUX_ARM_ARCH__ == 6 -#define isb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \ - : : "r" (0) : "memory") -#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ - : : "r" (0) : "memory") -#define dmb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \ - : : "r" (0) : "memory") -#elif defined(CONFIG_CPU_FA526) -#define isb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \ - : : "r" (0) : "memory") -#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ - : : "r" (0) : "memory") -#define dmb() __asm__ __volatile__ ("" : : : "memory") -#else -#define isb() __asm__ __volatile__ ("" : : : "memory") -#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ - : : "r" (0) : "memory") -#define dmb() __asm__ __volatile__ ("" : : : "memory") -#endif /* - * CR1 bits (CP#15 CR1) + * SCTLR_EL1/SCTLR_EL2/SCTLR_EL3 bits definitions */ -#define CR_M (1 << 0) /* MMU enable */ -#define CR_A (1 << 1) /* Alignment abort enable */ -#define CR_C (1 << 2) /* Dcache enable */ -#define CR_W (1 << 3) /* Write buffer enable */ -#define CR_P (1 << 4) /* 32-bit exception handler */ -#define CR_D (1 << 5) /* 32-bit data address range */ -#define CR_L (1 << 6) /* Implementation defined */ -#define CR_B (1 << 7) /* Big endian */ -#define CR_S (1 << 8) /* System MMU protection */ -#define CR_R (1 << 9) /* ROM MMU protection */ -#define CR_F (1 << 10) /* Implementation defined */ -#define CR_Z (1 << 11) /* Implementation defined */ -#define CR_I (1 << 12) /* Icache enable */ -#define CR_V (1 << 13) /* Vectors relocated to 0xffff0000 */ -#define CR_RR (1 << 14) /* Round Robin cache replacement */ -#define CR_L4 (1 << 15) /* LDR pc can set T bit */ -#define CR_DT (1 << 16) -#define CR_IT (1 << 18) -#define CR_ST (1 << 19) -#define CR_FI (1 << 21) /* Fast interrupt (lower latency mode) */ -#define CR_U (1 << 22) /* Unaligned access operation */ -#define CR_XP (1 << 23) /* Extended page tables */ -#define CR_VE (1 << 24) /* Vectored interrupts */ -#define CR_EE (1 << 25) /* Exception (Big) Endian */ -#define CR_TRE (1 << 28) /* TEX remap enable */ -#define CR_AFE (1 << 29) /* Access flag enable */ -#define CR_TE (1 << 30) /* Thumb exception enable */ +#define CR_M (1 << 0) /* MMU enable */ +#define CR_A (1 << 1) /* Alignment abort enable */ +#define CR_C (1 << 2) /* Dcache enable */ +#define CR_SA (1 << 3) /* Stack Alignment Check Enable */ +#define CR_I (1 << 12) /* Icache enable */ +#define CR_WXN (1 << 19) /* Write Permision Imply XN */ +#define CR_EE (1 << 25) /* Exception (Big) Endian */ + +#ifndef CONFIG_SYS_FULL_VA +#define PGTABLE_SIZE (0x10000) +#else +#define PGTABLE_SIZE CONFIG_SYS_PGTABLE_SIZE +#endif + +/* 2MB granularity */ +#define MMU_SECTION_SHIFT 21 +#define MMU_SECTION_SIZE (1 << MMU_SECTION_SHIFT) #ifndef __ASSEMBLY__ -static inline unsigned int get_cr(void) + +enum dcache_option { + DCACHE_OFF = 0x3, +}; + +#define wfi() \ + ({asm volatile( \ + "wfi" : : : "memory"); \ + }) + +static inline unsigned int current_el(void) { - unsigned int val; - asm volatile ("mrc p15, 0, %0, c1, c0, 0 @ get CR" : "=r" (val) : : "cc"); - return val; + unsigned int el; + asm volatile("mrs %0, CurrentEL" : "=r" (el) : : "cc"); + return el >> 2; } -static inline void set_cr(unsigned int val) +static inline unsigned int get_sctlr(void) { - asm volatile("mcr p15, 0, %0, c1, c0, 0 @ set CR" - : : "r" (val) : "cc"); - isb(); + unsigned int el, val; + + el = current_el(); + if (el == 1) + asm volatile("mrs %0, sctlr_el1" : "=r" (val) : : "cc"); + else if (el == 2) + asm volatile("mrs %0, sctlr_el2" : "=r" (val) : : "cc"); + else + asm volatile("mrs %0, sctlr_el3" : "=r" (val) : : "cc"); + + return val; } -#ifdef CONFIG_CPU_32v7 -static inline unsigned int get_vbar(void) +static inline void set_sctlr(unsigned int val) { - unsigned int vbar; - asm volatile("mrc p15, 0, %0, c12, c0, 0 @ get VBAR" - : "=r" (vbar) : : "cc"); - return vbar; + unsigned int el; + + el = current_el(); + if (el == 1) + asm volatile("msr sctlr_el1, %0" : : "r" (val) : "cc"); + else if (el == 2) + asm volatile("msr sctlr_el2, %0" : : "r" (val) : "cc"); + else + asm volatile("msr sctlr_el3, %0" : : "r" (val) : "cc"); + + asm volatile("isb"); } -static inline void set_vbar(unsigned int vbar) +static inline unsigned long read_mpidr(void) { - asm volatile("mcr p15, 0, %0, c12, c0, 0 @ set VBAR" - : : "r" (vbar) : "cc"); - isb(); + unsigned long val; + + asm volatile("mrs %0, mpidr_el1" : "=r" (val)); + + return val; } -#else -static inline unsigned int get_vbar(void) { return 0; } -static inline void set_vbar(unsigned int vbar) {} -#endif -#endif +#define BSP_COREID 0 + +void __asm_flush_dcache_all(void); +void __asm_invalidate_dcache_all(void); +void __asm_flush_dcache_range(u64 start, u64 end); +void __asm_invalidate_tlb_all(void); +void __asm_invalidate_icache_all(void); +int __asm_flush_l3_cache(void); + +void armv8_switch_to_el2(void); +void armv8_switch_to_el1(void); +void gic_init(void); +void gic_send_sgi(unsigned long sgino); +void wait_for_wakeup(void); +void protect_secure_region(void); +void smp_kick_all_cpus(void); + +void flush_l3_cache(void); + +/* + *Issue a hypervisor call in accordance with ARM "SMC Calling convention", + * DEN0028A + * + * @args: input and output arguments + * + */ +void hvc_call(struct pt_regs *args); + +/* + *Issue a secure monitor call in accordance with ARM "SMC Calling convention", + * DEN0028A + * + * @args: input and output arguments + * + */ +void smc_call(struct pt_regs *args); + +#endif /* __ASSEMBLY__ */ #endif /* __ASM_ARM_SYSTEM_H */ diff --git a/arch/arm/include/asm/system_info.h b/arch/arm/include/asm/system_info.h index 0761848..f595aae 100644 --- a/arch/arm/include/asm/system_info.h +++ b/arch/arm/include/asm/system_info.h @@ -13,6 +13,7 @@ #define CPU_ARCH_ARMv5TEJ 7 #define CPU_ARCH_ARMv6 8 #define CPU_ARCH_ARMv7 9 +#define CPU_ARCH_ARMv8 10 #define CPU_IS_ARM720 0x41007200 #define CPU_IS_ARM720_MASK 0xff00fff0 @@ -41,6 +42,12 @@ #define CPU_IS_CORTEX_A15 0x410fc0f0 #define CPU_IS_CORTEX_A15_MASK 0xff0ffff0 +#define CPU_IS_CORTEX_A53 0x410fd034 +#define CPU_IS_CORTEX_A53_MASK 0xff0ffff0 + +#define CPU_IS_CORTEX_A57 0x411fd070 +#define CPU_IS_CORTEX_A57_MASK 0xff0ffff0 + #define CPU_IS_PXA250 0x69052100 #define CPU_IS_PXA250_MASK 0xfffff7f0 @@ -112,6 +119,20 @@ #define cpu_is_cortex_a15() (0) #endif + +#ifdef CONFIG_CPU_64v8 +#ifdef ARM_ARCH +#define ARM_MULTIARCH +#else +#define ARM_ARCH CPU_ARCH_ARMv8 +#endif +#define cpu_is_cortex_a53() cpu_is_arm(CORTEX_A53) +#define cpu_is_cortex_a57() cpu_is_arm(CORTEX_A57) +#else +#define cpu_is_cortex_a53() (0) +#define cpu_is_cortex_a57() (0) +#endif + #ifndef __ASSEMBLY__ #ifdef ARM_MULTIARCH @@ -124,31 +145,33 @@ static inline int arm_early_get_cpu_architecture(void) { int cpu_arch; - if ((read_cpuid_id() & 0x0008f000) == 0) { - cpu_arch = CPU_ARCH_UNKNOWN; - } else if ((read_cpuid_id() & 0x0008f000) == 0x00007000) { - cpu_arch = (read_cpuid_id() & (1 << 23)) ? CPU_ARCH_ARMv4T : CPU_ARCH_ARMv3; - } else if ((read_cpuid_id() & 0x00080000) == 0x00000000) { - cpu_arch = (read_cpuid_id() >> 16) & 7; - if (cpu_arch) - cpu_arch += CPU_ARCH_ARMv3; - } else if ((read_cpuid_id() & 0x000f0000) == 0x000f0000) { - unsigned int mmfr0; - - /* Revised CPUID format. Read the Memory Model Feature - * Register 0 and check for VMSAv7 or PMSAv7 */ - asm("mrc p15, 0, %0, c0, c1, 4" - : "=r" (mmfr0)); - if ((mmfr0 & 0x0000000f) >= 0x00000003 || - (mmfr0 & 0x000000f0) >= 0x00000030) - cpu_arch = CPU_ARCH_ARMv7; - else if ((mmfr0 & 0x0000000f) == 0x00000002 || - (mmfr0 & 0x000000f0) == 0x00000020) - cpu_arch = CPU_ARCH_ARMv6; - else - cpu_arch = CPU_ARCH_UNKNOWN; - } else - cpu_arch = CPU_ARCH_UNKNOWN; +// if ((read_cpuid_id() & 0x0008f000) == 0) { +// cpu_arch = CPU_ARCH_UNKNOWN; +// } else if ((read_cpuid_id() & 0x0008f000) == 0x00007000) { +// cpu_arch = (read_cpuid_id() & (1 << 23)) ? CPU_ARCH_ARMv4T : CPU_ARCH_ARMv3; +// } else if ((read_cpuid_id() & 0x00080000) == 0x00000000) { +// cpu_arch = (read_cpuid_id() >> 16) & 7; +// if (cpu_arch) +// cpu_arch += CPU_ARCH_ARMv3; +// } else if ((read_cpuid_id() & 0x000f0000) == 0x000f0000) { +// unsigned int mmfr0; +// +// /* Revised CPUID format. Read the Memory Model Feature +// * Register 0 and check for VMSAv7 or PMSAv7 */ +// asm("mrc p15, 0, %0, c0, c1, 4" +// : "=r" (mmfr0)); +// if ((mmfr0 & 0x0000000f) >= 0x00000003 || +// (mmfr0 & 0x000000f0) >= 0x00000030) +// cpu_arch = CPU_ARCH_ARMv7; +// else if ((mmfr0 & 0x0000000f) == 0x00000002 || +// (mmfr0 & 0x000000f0) == 0x00000020) +// cpu_arch = CPU_ARCH_ARMv6; +// else +// cpu_arch = CPU_ARCH_UNKNOWN; +// } else +// cpu_arch = CPU_ARCH_UNKNOWN; + + cpu_arch = CPU_ARCH_ARMv8; return cpu_arch; } diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index e1c6f5b..5b9d4a5 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -3,24 +3,11 @@ obj-$(CONFIG_BOOTM) += bootm.o obj-$(CONFIG_CMD_BOOTZ) += bootz.o obj-$(CONFIG_CMD_BOOTU) += bootu.o obj-y += div0.o -obj-y += findbit.o -obj-y += io.o -obj-y += io-readsb.o -obj-y += io-readsw-armv4.o -obj-y += io-readsl.o -obj-y += io-writesb.o -obj-y += io-writesw-armv4.o -obj-y += io-writesl.o -obj-y += lib1funcs.o -obj-y += ashrdi3.o -obj-y += ashldi3.o -obj-y += lshrdi3.o obj-y += runtime-offset.o pbl-y += runtime-offset.o obj-$(CONFIG_ARM_OPTIMZED_STRING_FUNCTIONS) += memcpy.o obj-$(CONFIG_ARM_OPTIMZED_STRING_FUNCTIONS) += memset.o obj-$(CONFIG_ARM_UNWIND) += unwind.o -obj-$(CONFIG_ARM_SEMIHOSTING) += semihosting-trap.o semihosting.o obj-$(CONFIG_MODULES) += module.o extra-y += barebox.lds diff --git a/arch/arm/lib/armlinux.c b/arch/arm/lib/armlinux.c index 47b9bd3..21a2292 100644 --- a/arch/arm/lib/armlinux.c +++ b/arch/arm/lib/armlinux.c @@ -270,12 +270,6 @@ void start_linux(void *adr, int swap, unsigned long initrd_address, architecture = armlinux_get_architecture(); shutdown_barebox(); - if (swap) { - u32 reg; - __asm__ __volatile__("mrc p15, 0, %0, c1, c0" : "=r" (reg)); - reg ^= CR_B; /* swap big-endian flag */ - __asm__ __volatile__("mcr p15, 0, %0, c1, c0" :: "r" (reg)); - } kernel(0, architecture, params); } diff --git a/arch/arm/lib/barebox.lds.S b/arch/arm/lib/barebox.lds.S index 6dc8bd2..240699f 100644 --- a/arch/arm/lib/barebox.lds.S +++ b/arch/arm/lib/barebox.lds.S @@ -20,8 +20,8 @@ #include <asm-generic/barebox.lds.h> -OUTPUT_FORMAT("elf32-littlearm", "elf32-littlearm", "elf32-littlearm") -OUTPUT_ARCH(arm) +OUTPUT_FORMAT("elf64-littleaarch64", "elf64-littleaarch64", "elf64-littleaarch64") +OUTPUT_ARCH(aarch64) ENTRY(start) SECTIONS { @@ -43,7 +43,6 @@ SECTIONS __bare_init_start = .; *(.text_bare_init*) __bare_init_end = .; - . = ALIGN(4); __exceptions_start = .; KEEP(*(.text_exceptions*)) __exceptions_stop = .; diff --git a/arch/arm/lib/bootm.c b/arch/arm/lib/bootm.c index f6024c8..1913d5f 100644 --- a/arch/arm/lib/bootm.c +++ b/arch/arm/lib/bootm.c @@ -67,55 +67,6 @@ static int sdram_start_and_size(unsigned long *start, unsigned long *size) return 0; } -static int get_kernel_addresses(size_t image_size, - int verbose, unsigned long *load_address, - unsigned long *mem_free) -{ - unsigned long mem_start, mem_size; - int ret; - size_t image_decomp_size; - unsigned long spacing; - - ret = sdram_start_and_size(&mem_start, &mem_size); - if (ret) - return ret; - - /* - * We don't know the exact decompressed size so just use a conservative - * default of 4 times the size of the compressed image. - */ - image_decomp_size = PAGE_ALIGN(image_size * 4); - - /* - * By default put oftree/initrd close behind compressed kernel image to - * avoid placing it outside of the kernels lowmem region. - */ - spacing = SZ_1M; - - if (*load_address == UIMAGE_INVALID_ADDRESS) { - /* - * Place the kernel at an address where it does not need to - * relocate itself before decompression. - */ - *load_address = mem_start + image_decomp_size; - if (verbose) - printf("no OS load address, defaulting to 0x%08lx\n", - *load_address); - } else if (*load_address <= mem_start + image_decomp_size) { - /* - * If the user/image specified an address where the kernel needs - * to relocate itself before decompression we need to extend the - * spacing to allow this relocation to happen without - * overwriting anything placed behind the kernel. - */ - spacing += image_decomp_size; - } - - *mem_free = PAGE_ALIGN(*load_address + image_size + spacing); - - return 0; -} - static int __do_bootm_linux(struct image_data *data, unsigned long free_mem, int swap) { unsigned long kernel; @@ -173,20 +124,38 @@ static int __do_bootm_linux(struct image_data *data, unsigned long free_mem, int static int do_bootm_linux(struct image_data *data) { - unsigned long load_address, mem_free; + unsigned long load_address, mem_start, mem_size, mem_free; int ret; - load_address = data->os_address; - - ret = get_kernel_addresses(bootm_get_os_size(data), - bootm_verbose(data), &load_address, &mem_free); + ret = sdram_start_and_size(&mem_start, &mem_size); if (ret) return ret; + load_address = data->os_address; + + if (load_address == UIMAGE_INVALID_ADDRESS) { + /* + * Just use a conservative default of 4 times the size of the + * compressed image, to avoid the need for the kernel to + * relocate itself before decompression. + */ + load_address = mem_start + PAGE_ALIGN( + bootm_get_os_size(data) * 4); + if (bootm_verbose(data)) + printf("no OS load address, defaulting to 0x%08lx\n", + load_address); + } + ret = bootm_load_os(data, load_address); if (ret) return ret; + /* + * put oftree/initrd close behind compressed kernel image to avoid + * placing it outside of the kernels lowmem. + */ + mem_free = PAGE_ALIGN(data->os_res->end + SZ_1M); + return __do_bootm_linux(data, mem_free, 0); } @@ -282,7 +251,11 @@ static int do_bootz_linux(struct image_data *data) u32 end, start; size_t image_size; unsigned long load_address = data->os_address; - unsigned long mem_free; + unsigned long mem_start, mem_size, mem_free; + + ret = sdram_start_and_size(&mem_start, &mem_size); + if (ret) + return ret; fd = open(data->os_file, O_RDONLY); if (fd < 0) { @@ -318,12 +291,20 @@ static int do_bootz_linux(struct image_data *data) } image_size = end - start; - load_address = data->os_address; - ret = get_kernel_addresses(image_size, bootm_verbose(data), - &load_address, &mem_free); - if (ret) - return ret; + if (load_address == UIMAGE_INVALID_ADDRESS) { + /* + * Just use a conservative default of 4 times the size of the + * compressed image, to avoid the need for the kernel to + * relocate itself before decompression. + */ + data->os_address = mem_start + PAGE_ALIGN(image_size * 4); + + load_address = data->os_address; + if (bootm_verbose(data)) + printf("no OS load address, defaulting to 0x%08lx\n", + load_address); + } data->os_res = request_sdram_region("zimage", load_address, image_size); if (!data->os_res) { @@ -359,6 +340,12 @@ static int do_bootz_linux(struct image_data *data) close(fd); + /* + * put oftree/initrd close behind compressed kernel image to avoid + * placing it outside of the kernels lowmem. + */ + mem_free = PAGE_ALIGN(data->os_res->end + SZ_1M); + return __do_bootm_linux(data, mem_free, swap); err_out: @@ -575,7 +562,7 @@ static int armlinux_register_image_handler(void) register_image_handler(&aimage_handler); binfmt_register(&binfmt_aimage_hook); } - if (IS_BUILTIN(CONFIG_FITIMAGE)) + if (IS_BUILTIN(CONFIG_CMD_BOOTM_FITIMAGE)) register_image_handler(&arm_fit_handler); binfmt_register(&binfmt_arm_zimage_hook); binfmt_register(&binfmt_barebox_hook); diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S index d8eb063..cc9a842 100644 --- a/arch/arm/lib/copy_template.S +++ b/arch/arm/lib/copy_template.S @@ -1,268 +1,192 @@ /* - * linux/arch/arm/lib/copy_template.s + * Copyright (C) 2013 ARM Ltd. + * Copyright (C) 2013 Linaro. * - * Code template for optimized memory copy functions + * This code is based on glibc cortex strings work originally authored by Linaro + * and re-licensed under GPLv2 for the Linux kernel. The original code can + * be found @ * - * Author: Nicolas Pitre - * Created: Sep 28, 2005 - * Copyright: MontaVista Software, Inc. + * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ + * files/head:/src/aarch64/ * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -/* - * Theory of operation - * ------------------- - * - * This file provides the core code for a forward memory copy used in - * the implementation of memcopy(), copy_to_user() and copy_from_user(). - * - * The including file must define the following accessor macros - * according to the need of the given function: - * - * ldr1w ptr reg abort - * - * This loads one word from 'ptr', stores it in 'reg' and increments - * 'ptr' to the next word. The 'abort' argument is used for fixup tables. - * - * ldr4w ptr reg1 reg2 reg3 reg4 abort - * ldr8w ptr, reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort - * - * This loads four or eight words starting from 'ptr', stores them - * in provided registers and increments 'ptr' past those words. - * The'abort' argument is used for fixup tables. - * - * ldr1b ptr reg cond abort - * - * Similar to ldr1w, but it loads a byte and increments 'ptr' one byte. - * It also must apply the condition code if provided, otherwise the - * "al" condition is assumed by default. - * - * str1w ptr reg abort - * str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort - * str1b ptr reg cond abort - * - * Same as their ldr* counterparts, but data is stored to 'ptr' location - * rather than being loaded. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. * - * enter reg1 reg2 + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * - * Preserve the provided registers on the stack plus any additional - * data as needed by the implementation including this code. Called - * upon code entry. - * - * exit reg1 reg2 - * - * Restore registers with the values previously saved with the - * 'preserv' macro. Called upon code termination. - * - * LDR1W_SHIFT - * STR1W_SHIFT - * - * Correction to be applied to the "ip" register when branching into - * the ldr1w or str1w instructions (some of these macros may expand to - * than one 32bit instruction in Thumb-2) + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. */ - enter r4, lr - - subs r2, r2, #4 - blt 8f - ands ip, r0, #3 - PLD( pld [r1, #0] ) - bne 9f - ands ip, r1, #3 - bne 10f - -1: subs r2, r2, #(28) - stmfd sp!, {r5 - r8} - blt 5f - - CALGN( ands ip, r0, #31 ) - CALGN( rsb r3, ip, #32 ) - CALGN( sbcnes r4, r3, r2 ) @ C is always set here - CALGN( bcs 2f ) - CALGN( adr r4, 6f ) - CALGN( subs r2, r2, r3 ) @ C gets set - CALGN( add pc, r4, ip ) - - PLD( pld [r1, #0] ) -2: PLD( subs r2, r2, #96 ) - PLD( pld [r1, #28] ) - PLD( blt 4f ) - PLD( pld [r1, #60] ) - PLD( pld [r1, #92] ) - -3: PLD( pld [r1, #124] ) -4: ldr8w r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f - subs r2, r2, #32 - str8w r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f - bge 3b - PLD( cmn r2, #96 ) - PLD( bge 4b ) - -5: ands ip, r2, #28 - rsb ip, ip, #32 -#if LDR1W_SHIFT > 0 - lsl ip, ip, #LDR1W_SHIFT -#endif - addne pc, pc, ip @ C is always clear here - b 7f -6: - .rept (1 << LDR1W_SHIFT) - W(nop) - .endr - ldr1w r1, r3, abort=20f - ldr1w r1, r4, abort=20f - ldr1w r1, r5, abort=20f - ldr1w r1, r6, abort=20f - ldr1w r1, r7, abort=20f - ldr1w r1, r8, abort=20f - ldr1w r1, lr, abort=20f - -#if LDR1W_SHIFT < STR1W_SHIFT - lsl ip, ip, #STR1W_SHIFT - LDR1W_SHIFT -#elif LDR1W_SHIFT > STR1W_SHIFT - lsr ip, ip, #LDR1W_SHIFT - STR1W_SHIFT -#endif - add pc, pc, ip - nop - .rept (1 << STR1W_SHIFT) - W(nop) - .endr - str1w r0, r3, abort=20f - str1w r0, r4, abort=20f - str1w r0, r5, abort=20f - str1w r0, r6, abort=20f - str1w r0, r7, abort=20f - str1w r0, r8, abort=20f - str1w r0, lr, abort=20f - - CALGN( bcs 2b ) - -7: ldmfd sp!, {r5 - r8} - -8: movs r2, r2, lsl #31 - ldr1b r1, r3, ne, abort=21f - ldr1b r1, r4, cs, abort=21f - ldr1b r1, ip, cs, abort=21f - str1b r0, r3, ne, abort=21f - str1b r0, r4, cs, abort=21f - str1b r0, ip, cs, abort=21f - - exit r4, pc - -9: rsb ip, ip, #4 - cmp ip, #2 - ldr1b r1, r3, gt, abort=21f - ldr1b r1, r4, ge, abort=21f - ldr1b r1, lr, abort=21f - str1b r0, r3, gt, abort=21f - str1b r0, r4, ge, abort=21f - subs r2, r2, ip - str1b r0, lr, abort=21f - blt 8b - ands ip, r1, #3 - beq 1b - -10: bic r1, r1, #3 - cmp ip, #2 - ldr1w r1, lr, abort=21f - beq 17f - bgt 18f - - - .macro forward_copy_shift pull push - - subs r2, r2, #28 - blt 14f - - CALGN( ands ip, r0, #31 ) - CALGN( rsb ip, ip, #32 ) - CALGN( sbcnes r4, ip, r2 ) @ C is always set here - CALGN( subcc r2, r2, ip ) - CALGN( bcc 15f ) - -11: stmfd sp!, {r5 - r9} - - PLD( pld [r1, #0] ) - PLD( subs r2, r2, #96 ) - PLD( pld [r1, #28] ) - PLD( blt 13f ) - PLD( pld [r1, #60] ) - PLD( pld [r1, #92] ) - -12: PLD( pld [r1, #124] ) -13: ldr4w r1, r4, r5, r6, r7, abort=19f - mov r3, lr, pull #\pull - subs r2, r2, #32 - ldr4w r1, r8, r9, ip, lr, abort=19f - orr r3, r3, r4, push #\push - mov r4, r4, pull #\pull - orr r4, r4, r5, push #\push - mov r5, r5, pull #\pull - orr r5, r5, r6, push #\push - mov r6, r6, pull #\pull - orr r6, r6, r7, push #\push - mov r7, r7, pull #\pull - orr r7, r7, r8, push #\push - mov r8, r8, pull #\pull - orr r8, r8, r9, push #\push - mov r9, r9, pull #\pull - orr r9, r9, ip, push #\push - mov ip, ip, pull #\pull - orr ip, ip, lr, push #\push - str8w r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f - bge 12b - PLD( cmn r2, #96 ) - PLD( bge 13b ) - - ldmfd sp!, {r5 - r9} - -14: ands ip, r2, #28 - beq 16f - -15: mov r3, lr, pull #\pull - ldr1w r1, lr, abort=21f - subs ip, ip, #4 - orr r3, r3, lr, push #\push - str1w r0, r3, abort=21f - bgt 15b - CALGN( cmp r2, #0 ) - CALGN( bge 11b ) - -16: sub r1, r1, #(\push / 8) - b 8b - - .endm - - - forward_copy_shift pull=8 push=24 - -17: forward_copy_shift pull=16 push=16 - -18: forward_copy_shift pull=24 push=8 - - /* - * Abort preamble and completion macros. - * If a fixup handler is required then those macros must surround it. - * It is assumed that the fixup code will handle the private part of - * the exit macro. + * Copy a buffer from src to dest (alignment handled by the hardware) + * + * Parameters: + * x0 - dest + * x1 - src + * x2 - n + * Returns: + * x0 - dest */ - - .macro copy_abort_preamble -19: ldmfd sp!, {r5 - r9} - b 21f -20: ldmfd sp!, {r5 - r8} -21: - .endm - - .macro copy_abort_end - ldmfd sp!, {r4, pc} - .endm - - +dstin .req x0 +src .req x1 +count .req x2 +tmp1 .req x3 +tmp1w .req w3 +tmp2 .req x4 +tmp2w .req w4 +dst .req x6 + +A_l .req x7 +A_h .req x8 +B_l .req x9 +B_h .req x10 +C_l .req x11 +C_h .req x12 +D_l .req x13 +D_h .req x14 + + mov dst, dstin + cmp count, #16 + /*When memory length is less than 16, the accessed are not aligned.*/ + b.lo .Ltiny15 + + neg tmp2, src + ands tmp2, tmp2, #15/* Bytes to reach alignment. */ + b.eq .LSrcAligned + sub count, count, tmp2 + /* + * Copy the leading memory data from src to dst in an increasing + * address order.By this way,the risk of overwritting the source + * memory data is eliminated when the distance between src and + * dst is less than 16. The memory accesses here are alignment. + */ + tbz tmp2, #0, 1f + ldrb1 tmp1w, src, #1 + strb1 tmp1w, dst, #1 +1: + tbz tmp2, #1, 2f + ldrh1 tmp1w, src, #2 + strh1 tmp1w, dst, #2 +2: + tbz tmp2, #2, 3f + ldr1 tmp1w, src, #4 + str1 tmp1w, dst, #4 +3: + tbz tmp2, #3, .LSrcAligned + ldr1 tmp1, src, #8 + str1 tmp1, dst, #8 + +.LSrcAligned: + cmp count, #64 + b.ge .Lcpy_over64 + /* + * Deal with small copies quickly by dropping straight into the + * exit block. + */ +.Ltail63: + /* + * Copy up to 48 bytes of data. At this point we only need the + * bottom 6 bits of count to be accurate. + */ + ands tmp1, count, #0x30 + b.eq .Ltiny15 + cmp tmp1w, #0x20 + b.eq 1f + b.lt 2f + ldp1 A_l, A_h, src, #16 + stp1 A_l, A_h, dst, #16 +1: + ldp1 A_l, A_h, src, #16 + stp1 A_l, A_h, dst, #16 +2: + ldp1 A_l, A_h, src, #16 + stp1 A_l, A_h, dst, #16 +.Ltiny15: + /* + * Prefer to break one ldp/stp into several load/store to access + * memory in an increasing address order,rather than to load/store 16 + * bytes from (src-16) to (dst-16) and to backward the src to aligned + * address,which way is used in original cortex memcpy. If keeping + * the original memcpy process here, memmove need to satisfy the + * precondition that src address is at least 16 bytes bigger than dst + * address,otherwise some source data will be overwritten when memove + * call memcpy directly. To make memmove simpler and decouple the + * memcpy's dependency on memmove, withdrew the original process. + */ + tbz count, #3, 1f + ldr1 tmp1, src, #8 + str1 tmp1, dst, #8 +1: + tbz count, #2, 2f + ldr1 tmp1w, src, #4 + str1 tmp1w, dst, #4 +2: + tbz count, #1, 3f + ldrh1 tmp1w, src, #2 + strh1 tmp1w, dst, #2 +3: + tbz count, #0, .Lexitfunc + ldrb1 tmp1w, src, #1 + strb1 tmp1w, dst, #1 + + b .Lexitfunc + +.Lcpy_over64: + subs count, count, #128 + b.ge .Lcpy_body_large + /* + * Less than 128 bytes to copy, so handle 64 here and then jump + * to the tail. + */ + ldp1 A_l, A_h, src, #16 + stp1 A_l, A_h, dst, #16 + ldp1 B_l, B_h, src, #16 + ldp1 C_l, C_h, src, #16 + stp1 B_l, B_h, dst, #16 + stp1 C_l, C_h, dst, #16 + ldp1 D_l, D_h, src, #16 + stp1 D_l, D_h, dst, #16 + + tst count, #0x3f + b.ne .Ltail63 + b .Lexitfunc + + /* + * Critical loop. Start at a new cache line boundary. Assuming + * 64 bytes per line this ensures the entire loop is in one line. + */ +.Lcpy_body_large: + /* pre-get 64 bytes data. */ + ldp1 A_l, A_h, src, #16 + ldp1 B_l, B_h, src, #16 + ldp1 C_l, C_h, src, #16 + ldp1 D_l, D_h, src, #16 +1: + /* + * interlace the load of next 64 bytes data block with store of the last + * loaded 64 bytes data. + */ + stp1 A_l, A_h, dst, #16 + ldp1 A_l, A_h, src, #16 + stp1 B_l, B_h, dst, #16 + ldp1 B_l, B_h, src, #16 + stp1 C_l, C_h, dst, #16 + ldp1 C_l, C_h, src, #16 + stp1 D_l, D_h, dst, #16 + ldp1 D_l, D_h, src, #16 + subs count, count, #64 + b.ge 1b + stp1 A_l, A_h, dst, #16 + stp1 B_l, B_h, dst, #16 + stp1 C_l, C_h, dst, #16 + stp1 D_l, D_h, dst, #16 + + tst count, #0x3f + b.ne .Ltail63 +.Lexitfunc: diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S index 5123691..cfed319 100644 --- a/arch/arm/lib/memcpy.S +++ b/arch/arm/lib/memcpy.S @@ -1,64 +1,74 @@ /* - * linux/arch/arm/lib/memcpy.S + * Copyright (C) 2013 ARM Ltd. + * Copyright (C) 2013 Linaro. * - * Author: Nicolas Pitre - * Created: Sep 28, 2005 - * Copyright: MontaVista Software, Inc. + * This code is based on glibc cortex strings work originally authored by Linaro + * and re-licensed under GPLv2 for the Linux kernel. The original code can + * be found @ * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. + * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ + * files/head:/src/aarch64/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <linux/linkage.h> #include <asm/assembler.h> -#define LDR1W_SHIFT 0 -#define STR1W_SHIFT 0 - - .macro ldr1w ptr reg abort - W(ldr) \reg, [\ptr], #4 - .endm - - .macro ldr4w ptr reg1 reg2 reg3 reg4 abort - ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4} +/* + * Copy a buffer from src to dest (alignment handled by the hardware) + * + * Parameters: + * x0 - dest + * x1 - src + * x2 - n + * Returns: + * x0 - dest + */ + .macro ldrb1 ptr, regB, val + ldrb \ptr, [\regB], \val .endm - .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort - ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8} + .macro strb1 ptr, regB, val + strb \ptr, [\regB], \val .endm - .macro ldr1b ptr reg cond=al abort - ldr\cond\()b \reg, [\ptr], #1 + .macro ldrh1 ptr, regB, val + ldrh \ptr, [\regB], \val .endm - .macro str1w ptr reg abort - W(str) \reg, [\ptr], #4 + .macro strh1 ptr, regB, val + strh \ptr, [\regB], \val .endm - .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort - stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8} + .macro ldr1 ptr, regB, val + ldr \ptr, [\regB], \val .endm - .macro str1b ptr reg cond=al abort - str\cond\()b \reg, [\ptr], #1 + .macro str1 ptr, regB, val + str \ptr, [\regB], \val .endm - .macro enter reg1 reg2 - stmdb sp!, {r0, \reg1, \reg2} + .macro ldp1 ptr, regB, regC, val + ldp \ptr, \regB, [\regC], \val .endm - .macro exit reg1 reg2 - ldmfd sp!, {r0, \reg1, \reg2} + .macro stp1 ptr, regB, regC, val + stp \ptr, \regB, [\regC], \val .endm - .text - -/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */ - + .weak memcpy ENTRY(memcpy) - #include "copy_template.S" - + ret ENDPROC(memcpy) - diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S index c4d2672..380a540 100644 --- a/arch/arm/lib/memset.S +++ b/arch/arm/lib/memset.S @@ -1,124 +1,215 @@ /* - * linux/arch/arm/lib/memset.S + * Copyright (C) 2013 ARM Ltd. + * Copyright (C) 2013 Linaro. * - * Copyright (C) 1995-2000 Russell King + * This code is based on glibc cortex strings work originally authored by Linaro + * and re-licensed under GPLv2 for the Linux kernel. The original code can + * be found @ + * + * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ + * files/head:/src/aarch64/ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * - * ASM optimised string functions + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. */ + #include <linux/linkage.h> #include <asm/assembler.h> - .text - .align 5 - -ENTRY(memset) - ands r3, r0, #3 @ 1 unaligned? - mov ip, r0 @ preserve r0 as return value - bne 6f @ 1 /* - * we know that the pointer in ip is aligned to a word boundary. - */ -1: orr r1, r1, r1, lsl #8 - orr r1, r1, r1, lsl #16 - mov r3, r1 - cmp r2, #16 - blt 4f - -#if ! CALGN(1)+0 - -/* - * We need an 2 extra registers for this loop - use r8 and the LR - */ - stmfd sp!, {r8, lr} - mov r8, r1 - mov lr, r1 - -2: subs r2, r2, #64 - stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time. - stmgeia ip!, {r1, r3, r8, lr} - stmgeia ip!, {r1, r3, r8, lr} - stmgeia ip!, {r1, r3, r8, lr} - bgt 2b - ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go. -/* - * No need to correct the count; we're only testing bits from now on + * Fill in the buffer with character c (alignment handled by the hardware) + * + * Parameters: + * x0 - buf + * x1 - c + * x2 - n + * Returns: + * x0 - buf */ - tst r2, #32 - stmneia ip!, {r1, r3, r8, lr} - stmneia ip!, {r1, r3, r8, lr} - tst r2, #16 - stmneia ip!, {r1, r3, r8, lr} - ldmfd sp!, {r8, lr} - -#else +dstin .req x0 +val .req w1 +count .req x2 +tmp1 .req x3 +tmp1w .req w3 +tmp2 .req x4 +tmp2w .req w4 +zva_len_x .req x5 +zva_len .req w5 +zva_bits_x .req x6 + +A_l .req x7 +A_lw .req w7 +dst .req x8 +tmp3w .req w9 +tmp3 .req x9 + + .weak memset +ENTRY(memset) + mov dst, dstin /* Preserve return value. */ + and A_lw, val, #255 + orr A_lw, A_lw, A_lw, lsl #8 + orr A_lw, A_lw, A_lw, lsl #16 + orr A_l, A_l, A_l, lsl #32 + + cmp count, #15 + b.hi .Lover16_proc + /*All store maybe are non-aligned..*/ + tbz count, #3, 1f + str A_l, [dst], #8 +1: + tbz count, #2, 2f + str A_lw, [dst], #4 +2: + tbz count, #1, 3f + strh A_lw, [dst], #2 +3: + tbz count, #0, 4f + strb A_lw, [dst] +4: + ret + +.Lover16_proc: + /*Whether the start address is aligned with 16.*/ + neg tmp2, dst + ands tmp2, tmp2, #15 + b.eq .Laligned /* - * This version aligns the destination pointer in order to write - * whole cache lines at once. - */ - - stmfd sp!, {r4-r8, lr} - mov r4, r1 - mov r5, r1 - mov r6, r1 - mov r7, r1 - mov r8, r1 - mov lr, r1 - - cmp r2, #96 - tstgt ip, #31 - ble 3f - - and r8, ip, #31 - rsb r8, r8, #32 - sub r2, r2, r8 - movs r8, r8, lsl #(32 - 4) - stmcsia ip!, {r4, r5, r6, r7} - stmmiia ip!, {r4, r5} - tst r8, #(1 << 30) - mov r8, r1 - strne r1, [ip], #4 - -3: subs r2, r2, #64 - stmgeia ip!, {r1, r3-r8, lr} - stmgeia ip!, {r1, r3-r8, lr} - bgt 3b - ldmeqfd sp!, {r4-r8, pc} - - tst r2, #32 - stmneia ip!, {r1, r3-r8, lr} - tst r2, #16 - stmneia ip!, {r4-r7} - ldmfd sp!, {r4-r8, lr} - -#endif - -4: tst r2, #8 - stmneia ip!, {r1, r3} - tst r2, #4 - strne r1, [ip], #4 +* The count is not less than 16, we can use stp to store the start 16 bytes, +* then adjust the dst aligned with 16.This process will make the current +* memory address at alignment boundary. +*/ + stp A_l, A_l, [dst] /*non-aligned store..*/ + /*make the dst aligned..*/ + sub count, count, tmp2 + add dst, dst, tmp2 + +.Laligned: + cbz A_l, .Lzero_mem + +.Ltail_maybe_long: + cmp count, #64 + b.ge .Lnot_short +.Ltail63: + ands tmp1, count, #0x30 + b.eq 3f + cmp tmp1w, #0x20 + b.eq 1f + b.lt 2f + stp A_l, A_l, [dst], #16 +1: + stp A_l, A_l, [dst], #16 +2: + stp A_l, A_l, [dst], #16 /* - * When we get here, we've got less than 4 bytes to zero. We - * may have an unaligned pointer as well. - */ -5: tst r2, #2 - strneb r1, [ip], #1 - strneb r1, [ip], #1 - tst r2, #1 - strneb r1, [ip], #1 - mov pc, lr - -6: subs r2, r2, #4 @ 1 do we have enough - blt 5b @ 1 bytes to align with? - cmp r3, #2 @ 1 - strltb r1, [ip], #1 @ 1 - strleb r1, [ip], #1 @ 1 - strb r1, [ip], #1 @ 1 - add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) - b 1b +* The last store length is less than 16,use stp to write last 16 bytes. +* It will lead some bytes written twice and the access is non-aligned. +*/ +3: + ands count, count, #15 + cbz count, 4f + add dst, dst, count + stp A_l, A_l, [dst, #-16] /* Repeat some/all of last store. */ +4: + ret + + /* + * Critical loop. Start at a new cache line boundary. Assuming + * 64 bytes per line, this ensures the entire loop is in one line. + */ +.Lnot_short: + sub dst, dst, #16/* Pre-bias. */ + sub count, count, #64 +1: + stp A_l, A_l, [dst, #16] + stp A_l, A_l, [dst, #32] + stp A_l, A_l, [dst, #48] + stp A_l, A_l, [dst, #64]! + subs count, count, #64 + b.ge 1b + tst count, #0x3f + add dst, dst, #16 + b.ne .Ltail63 +.Lexitfunc: + ret + + /* + * For zeroing memory, check to see if we can use the ZVA feature to + * zero entire 'cache' lines. + */ +.Lzero_mem: + cmp count, #63 + b.le .Ltail63 + /* + * For zeroing small amounts of memory, it's not worth setting up + * the line-clear code. + */ + cmp count, #128 + b.lt .Lnot_short /*count is at least 128 bytes*/ + + mrs tmp1, dczid_el0 + tbnz tmp1, #4, .Lnot_short + mov tmp3w, #4 + and zva_len, tmp1w, #15 /* Safety: other bits reserved. */ + lsl zva_len, tmp3w, zva_len + + ands tmp3w, zva_len, #63 + /* + * ensure the zva_len is not less than 64. + * It is not meaningful to use ZVA if the block size is less than 64. + */ + b.ne .Lnot_short +.Lzero_by_line: + /* + * Compute how far we need to go to become suitably aligned. We're + * already at quad-word alignment. + */ + cmp count, zva_len_x + b.lt .Lnot_short /* Not enough to reach alignment. */ + sub zva_bits_x, zva_len_x, #1 + neg tmp2, dst + ands tmp2, tmp2, zva_bits_x + b.eq 2f /* Already aligned. */ + /* Not aligned, check that there's enough to copy after alignment.*/ + sub tmp1, count, tmp2 + /* + * grantee the remain length to be ZVA is bigger than 64, + * avoid to make the 2f's process over mem range.*/ + cmp tmp1, #64 + ccmp tmp1, zva_len_x, #8, ge /* NZCV=0b1000 */ + b.lt .Lnot_short + /* + * We know that there's at least 64 bytes to zero and that it's safe + * to overrun by 64 bytes. + */ + mov count, tmp1 +1: + stp A_l, A_l, [dst] + stp A_l, A_l, [dst, #16] + stp A_l, A_l, [dst, #32] + subs tmp2, tmp2, #64 + stp A_l, A_l, [dst, #48] + add dst, dst, #64 + b.ge 1b + /* We've overrun a bit, so adjust dst downwards.*/ + add dst, dst, tmp2 +2: + sub count, count, zva_len_x +3: + dc zva, dst + add dst, dst, zva_len_x + subs count, count, zva_len_x + b.ge 3b + ands count, count, zva_bits_x + b.ne .Ltail_maybe_long + ret ENDPROC(memset) - diff --git a/arch/arm/lib/runtime-offset.S b/arch/arm/lib/runtime-offset.S index f10c4c8..e368baa 100644 --- a/arch/arm/lib/runtime-offset.S +++ b/arch/arm/lib/runtime-offset.S @@ -8,11 +8,11 @@ * we are currently running at. */ ENTRY(get_runtime_offset) -1: adr r0, 1b - ldr r1, linkadr - subs r0, r1, r0 -THUMB( subs r0, r0, #1) - mov pc, lr +1: adr x0, 1b + adr x1, get_runtime_offset + subs x0, x1, x0 + subs x0, x0, #1 + ret linkadr: .word get_runtime_offset @@ -28,7 +28,7 @@ __ld_var_base: */ .macro ld_var_entry name ENTRY(__ld_var_\name) - ldr r0, __\name + ldr x0, __\name b 1f __\name: .word \name - __ld_var_base ENDPROC(__ld_var_\name) @@ -47,6 +47,6 @@ ld_var_entry __image_end #endif 1: - ldr r1, =__ld_var_base - adds r0, r0, r1 - mov pc, lr + ldr x1, =__ld_var_base + adds x0, x0, x1 + ret diff --git a/arch/arm/mach-virt/Kconfig b/arch/arm/mach-virt/Kconfig new file mode 100644 index 0000000..1f43606 --- /dev/null +++ b/arch/arm/mach-virt/Kconfig @@ -0,0 +1,15 @@ +if ARCH_VIRT + +config ARCH_TEXT_BASE + hex + default 0x40000000 + +choice + prompt "ARM Board type" + +config MACH_VIRT + bool "ARM QEMU virt" + +endchoice + +endif diff --git a/arch/arm/mach-virt/Makefile b/arch/arm/mach-virt/Makefile new file mode 100644 index 0000000..3924a10 --- /dev/null +++ b/arch/arm/mach-virt/Makefile @@ -0,0 +1,3 @@ +obj-y += devices.o reset.o + +lwl-y += lowlevel.o diff --git a/arch/arm/mach-virt/devices.c b/arch/arm/mach-virt/devices.c new file mode 100644 index 0000000..999f463 --- /dev/null +++ b/arch/arm/mach-virt/devices.c @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2016 Raphaël Poggi <poggi.raph@xxxxxxxxx> + * + * GPLv2 only + */ + +#include <common.h> +#include <linux/amba/bus.h> +#include <asm/memory.h> +#include <mach/devices.h> +#include <linux/ioport.h> + +void virt_add_ddram(u32 size) +{ + arm_add_mem_device("ram0", 0x40000000, size); +} + +void virt_register_uart(unsigned id) +{ + resource_size_t start; + + switch (id) { + case 0: + start = 0x09000000; + break; + default: + return; + } + amba_apb_device_add(NULL, "uart-pl011", id, start, 4096, NULL, 0); +} diff --git a/arch/arm/mach-virt/include/mach/debug_ll.h b/arch/arm/mach-virt/include/mach/debug_ll.h new file mode 100644 index 0000000..89b0692 --- /dev/null +++ b/arch/arm/mach-virt/include/mach/debug_ll.h @@ -0,0 +1,24 @@ +/* + * Copyright 2013 Jean-Christophe PLAGNIOL-VILLARD <plagniol@xxxxxxxxxxxx> + * + * GPLv2 only + */ + +#ifndef __MACH_DEBUG_LL_H__ +#define __MACH_DEBUG_LL_H__ + +#include <linux/amba/serial.h> +#include <io.h> + +#define DEBUG_LL_PHYS_BASE 0x10000000 +#define DEBUG_LL_PHYS_BASE_RS1 0x1c000000 + +#ifdef MP +#define DEBUG_LL_UART_ADDR DEBUG_LL_PHYS_BASE +#else +#define DEBUG_LL_UART_ADDR DEBUG_LL_PHYS_BASE_RS1 +#endif + +#include <asm/debug_ll_pl011.h> + +#endif diff --git a/arch/arm/mach-virt/include/mach/devices.h b/arch/arm/mach-virt/include/mach/devices.h new file mode 100644 index 0000000..9872c61 --- /dev/null +++ b/arch/arm/mach-virt/include/mach/devices.h @@ -0,0 +1,13 @@ +/* + * Copyright (C) 2016 Raphaël Poggi <poggi.raph@xxxxxxxxx> + * + * GPLv2 only + */ + +#ifndef __ASM_ARCH_DEVICES_H__ +#define __ASM_ARCH_DEVICES_H__ + +void virt_add_ddram(u32 size); +void virt_register_uart(unsigned id); + +#endif /* __ASM_ARCH_DEVICES_H__ */ diff --git a/arch/arm/mach-virt/lowlevel.c b/arch/arm/mach-virt/lowlevel.c new file mode 100644 index 0000000..6f695a5 --- /dev/null +++ b/arch/arm/mach-virt/lowlevel.c @@ -0,0 +1,19 @@ +/* + * Copyright (C) 2013 Jean-Christophe PLAGNIOL-VILLARD <plagnio@xxxxxxxxxxxx> + * + * GPLv2 only + */ + +#include <common.h> +#include <linux/sizes.h> +#include <asm/barebox-arm-head.h> +#include <asm/barebox-arm.h> +#include <asm/system_info.h> + +void barebox_arm_reset_vector(void) +{ + arm_cpu_lowlevel_init(); + arm_setup_stack(STACK_BASE); + + barebox_arm_entry(0x40000000, SZ_512M, NULL); +} diff --git a/arch/arm/mach-virt/reset.c b/arch/arm/mach-virt/reset.c new file mode 100644 index 0000000..fb895eb --- /dev/null +++ b/arch/arm/mach-virt/reset.c @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2016 Raphaël Poggi <poggi.raph@xxxxxxxxx> + * + * GPLv2 only + */ + +#include <common.h> +#include <io.h> +#include <init.h> +#include <restart.h> +#include <mach/devices.h> + +static void virt_reset_soc(struct restart_handler *rst) +{ + hang(); +} + +static int restart_register_feature(void) +{ + restart_handler_register_fn(virt_reset_soc); + + return 0; +} +coredevice_initcall(restart_register_feature); -- 2.8.0.rc3 -- Pengutronix e.K. | | Industrial Linux Solutions | http://www.pengutronix.de/ | Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0 | Amtsgericht Hildesheim, HRA 2686 | Fax: +49-5121-206917-5555 | _______________________________________________ barebox mailing list barebox@xxxxxxxxxxxxxxxxxxx http://lists.infradead.org/mailman/listinfo/barebox