diff options
Diffstat (limited to 'arch')
379 files changed, 9941 insertions, 1914 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 1feb169274fe..af2cc6eabcc7 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -286,9 +286,6 @@ config HAVE_PERF_USER_STACK_DUMP config HAVE_ARCH_JUMP_LABEL bool -config HAVE_ARCH_MUTEX_CPU_RELAX - bool - config HAVE_RCU_TABLE_FREE bool diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index f158197ac5b0..b6a8c2dfbe6e 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -45,7 +45,14 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) static inline void arch_spin_unlock(arch_spinlock_t *lock) { - lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__; + unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__; + + __asm__ __volatile__( + " ex %0, [%1] \n" + : "+r" (tmp) + : "r"(&(lock->slock)) + : "memory"); + smp_mb(); } diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h index 32420824375b..30c9baffa96f 100644 --- a/arch/arc/include/asm/uaccess.h +++ b/arch/arc/include/asm/uaccess.h @@ -43,7 +43,7 @@ * Because it essentially checks if buffer end is within limit and @len is * non-ngeative, which implies that buffer start will be within limit too. * - * The reason for rewriting being, for majorit yof cases, @len is generally + * The reason for rewriting being, for majority of cases, @len is generally * compile time constant, causing first sub-expression to be compile time * subsumed. * @@ -53,7 +53,7 @@ * */ #define __user_ok(addr, sz) (((sz) <= TASK_SIZE) && \ - (((addr)+(sz)) <= get_fs())) + ((addr) <= (get_fs() - (sz)))) #define __access_ok(addr, sz) (unlikely(__kernel_ok) || \ likely(__user_ok((addr), (sz)))) diff --git a/arch/arc/kernel/ptrace.c b/arch/arc/kernel/ptrace.c index 333238564b67..5d76706139dd 100644 --- a/arch/arc/kernel/ptrace.c +++ b/arch/arc/kernel/ptrace.c @@ -102,7 +102,7 @@ static int genregs_set(struct task_struct *target, REG_IGNORE_ONE(pad2); REG_IN_CHUNK(callee, efa, cregs); /* callee_regs[r25..r13] */ REG_IGNORE_ONE(efa); /* efa update invalid */ - REG_IN_ONE(stop_pc, &ptregs->ret); /* stop_pc: PC update */ + REG_IGNORE_ONE(stop_pc); /* PC updated via @ret */ return ret; } diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index ee6ef2f60a28..7e95e1a86510 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -101,7 +101,6 @@ SYSCALL_DEFINE0(rt_sigreturn) { struct rt_sigframe __user *sf; unsigned int magic; - int err; struct pt_regs *regs = current_pt_regs(); /* Always make any pending restarted system calls return -EINTR */ @@ -119,15 +118,16 @@ SYSCALL_DEFINE0(rt_sigreturn) if (!access_ok(VERIFY_READ, sf, sizeof(*sf))) goto badframe; - err = restore_usr_regs(regs, sf); - err |= __get_user(magic, &sf->sigret_magic); - if (err) + if (__get_user(magic, &sf->sigret_magic)) goto badframe; if (unlikely(is_do_ss_needed(magic))) if (restore_altstack(&sf->uc.uc_stack)) goto badframe; + if (restore_usr_regs(regs, sf)) + goto badframe; + /* Don't restart from sigreturn */ syscall_wont_restart(regs); @@ -191,6 +191,15 @@ setup_rt_frame(int signo, struct k_sigaction *ka, siginfo_t *info, return 1; /* + * w/o SA_SIGINFO, struct ucontext is partially populated (only + * uc_mcontext/uc_sigmask) for kernel's normal user state preservation + * during signal handler execution. This works for SA_SIGINFO as well + * although the semantics are now overloaded (the same reg state can be + * inspected by userland: but are they allowed to fiddle with it ? + */ + err |= stash_usr_regs(sf, regs, set); + + /* * SA_SIGINFO requires 3 args to signal handler: * #1: sig-no (common to any handler) * #2: struct siginfo @@ -213,14 +222,6 @@ setup_rt_frame(int signo, struct k_sigaction *ka, siginfo_t *info, magic = MAGIC_SIGALTSTK; } - /* - * w/o SA_SIGINFO, struct ucontext is partially populated (only - * uc_mcontext/uc_sigmask) for kernel's normal user state preservation - * during signal handler execution. This works for SA_SIGINFO as well - * although the semantics are now overloaded (the same reg state can be - * inspected by userland: but are they allowed to fiddle with it ? - */ - err |= stash_usr_regs(sf, regs, set); err |= __put_user(magic, &sf->sigret_magic); if (err) return err; diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c index 0e51e69cf30d..3fde7de3ea67 100644 --- a/arch/arc/kernel/time.c +++ b/arch/arc/kernel/time.c @@ -227,12 +227,9 @@ void __attribute__((weak)) arc_local_timer_setup(unsigned int cpu) { struct clock_event_device *clk = &per_cpu(arc_clockevent_device, cpu); - clockevents_calc_mult_shift(clk, arc_get_core_freq(), 5); - - clk->max_delta_ns = clockevent_delta2ns(ARC_TIMER_MAX, clk); clk->cpumask = cpumask_of(cpu); - - clockevents_register_device(clk); + clockevents_config_and_register(clk, arc_get_core_freq(), + 0, ARC_TIMER_MAX); /* * setup the per-cpu timer IRQ handler - for all cpus diff --git a/arch/arc/kernel/unaligned.c b/arch/arc/kernel/unaligned.c index 28d170060747..7ff5b5c183bb 100644 --- a/arch/arc/kernel/unaligned.c +++ b/arch/arc/kernel/unaligned.c @@ -245,6 +245,12 @@ int misaligned_fixup(unsigned long address, struct pt_regs *regs, regs->status32 &= ~STATUS_DE_MASK; } else { regs->ret += state.instr_len; + + /* handle zero-overhead-loop */ + if ((regs->ret == regs->lp_end) && (regs->lp_count)) { + regs->ret = regs->lp_start; + regs->lp_count--; + } } return 0; diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 3f7714d8d2d2..9f2d203162b2 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -5,6 +5,7 @@ config ARM select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_CUSTOM_GPIO_H + select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT if MMU select CLONE_BACKWARDS @@ -51,6 +52,8 @@ config ARM select HAVE_MOD_ARCH_SPECIFIC if ARM_UNWIND select HAVE_OPROFILE if (HAVE_PERF_EVENTS) select HAVE_PERF_EVENTS + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_SYSCALL_TRACEPOINTS select HAVE_UID16 @@ -481,6 +484,7 @@ config ARCH_IXP4XX bool "IXP4xx-based" depends on MMU select ARCH_HAS_DMA_SET_COHERENT_MASK + select ARCH_SUPPORTS_BIG_ENDIAN select ARCH_REQUIRE_GPIOLIB select CLKSRC_MMIO select CPU_XSCALE @@ -692,7 +696,6 @@ config ARCH_SA1100 select GENERIC_CLOCKEVENTS select HAVE_IDE select ISA - select NEED_MACH_GPIO_H select NEED_MACH_MEMORY_H select SPARSE_IRQ help @@ -1091,11 +1094,6 @@ config IWMMXT Enable support for iWMMXt context switching at run time if running on a CPU that supports it. -config XSCALE_PMU - bool - depends on CPU_XSCALE - default y - config MULTI_IRQ_HANDLER bool help @@ -1549,6 +1547,32 @@ config MCPM for (multi-)cluster based systems, such as big.LITTLE based systems. +config BIG_LITTLE + bool "big.LITTLE support (Experimental)" + depends on CPU_V7 && SMP + select MCPM + help + This option enables support selections for the big.LITTLE + system architecture. + +config BL_SWITCHER + bool "big.LITTLE switcher support" + depends on BIG_LITTLE && MCPM && HOTPLUG_CPU + select CPU_PM + select ARM_CPU_SUSPEND + help + The big.LITTLE "switcher" provides the core functionality to + transparently handle transition between a cluster of A15's + and a cluster of A7's in a big.LITTLE system. + +config BL_SWITCHER_DUMMY_IF + tristate "Simple big.LITTLE switcher user interface" + depends on BL_SWITCHER && DEBUG_KERNEL + help + This is a simple and dummy char dev interface to control + the big.LITTLE switcher core code. It is meant for + debugging purposes only. + choice prompt "Memory split" default VMSPLIT_3G @@ -2217,8 +2241,7 @@ config NEON config KERNEL_MODE_NEON bool "Support for NEON in kernel mode" - default n - depends on NEON + depends on NEON && AEABI help Say Y to include support for NEON in kernel mode. diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 9762c84b4198..a8f305b07f29 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -318,6 +318,7 @@ choice config DEBUG_MSM_UART1 bool "Kernel low-level debugging messages via MSM UART1" depends on ARCH_MSM7X00A || ARCH_MSM7X30 || ARCH_QSD8X50 + select DEBUG_MSM_UART help Say Y here if you want the debug print routines to direct their output to the first serial port on MSM devices. @@ -325,6 +326,7 @@ choice config DEBUG_MSM_UART2 bool "Kernel low-level debugging messages via MSM UART2" depends on ARCH_MSM7X00A || ARCH_MSM7X30 || ARCH_QSD8X50 + select DEBUG_MSM_UART help Say Y here if you want the debug print routines to direct their output to the second serial port on MSM devices. @@ -332,6 +334,7 @@ choice config DEBUG_MSM_UART3 bool "Kernel low-level debugging messages via MSM UART3" depends on ARCH_MSM7X00A || ARCH_MSM7X30 || ARCH_QSD8X50 + select DEBUG_MSM_UART help Say Y here if you want the debug print routines to direct their output to the third serial port on MSM devices. @@ -340,6 +343,7 @@ choice bool "Kernel low-level debugging messages via MSM 8660 UART" depends on ARCH_MSM8X60 select MSM_HAS_DEBUG_UART_HS + select DEBUG_MSM_UART help Say Y here if you want the debug print routines to direct their output to the serial port on MSM 8660 devices. @@ -348,10 +352,20 @@ choice bool "Kernel low-level debugging messages via MSM 8960 UART" depends on ARCH_MSM8960 select MSM_HAS_DEBUG_UART_HS + select DEBUG_MSM_UART help Say Y here if you want the debug print routines to direct their output to the serial port on MSM 8960 devices. + config DEBUG_MSM8974_UART + bool "Kernel low-level debugging messages via MSM 8974 UART" + depends on ARCH_MSM8974 + select MSM_HAS_DEBUG_UART_HS + select DEBUG_MSM_UART + help + Say Y here if you want the debug print routines to direct + their output to the serial port on MSM 8974 devices. + config DEBUG_MVEBU_UART bool "Kernel low-level debugging messages via MVEBU UART (old bootloaders)" depends on ARCH_MVEBU @@ -834,6 +848,20 @@ choice options; the platform specific options are deprecated and will be soon removed. + config DEBUG_LL_UART_EFM32 + bool "Kernel low-level debugging via efm32 UART" + depends on ARCH_EFM32 + help + Say Y here if you want the debug print routines to direct + their output to an UART or USART port on efm32 based + machines. Use the following addresses for DEBUG_UART_PHYS: + + 0x4000c000 | USART0 + 0x4000c400 | USART1 + 0x4000c800 | USART2 + 0x4000e000 | UART0 + 0x4000e400 | UART1 + config DEBUG_LL_UART_PL01X bool "Kernel low-level debugging via ARM Ltd PL01x Primecell UART" help @@ -880,11 +908,16 @@ config DEBUG_STI_UART bool depends on ARCH_STI +config DEBUG_MSM_UART + bool + depends on ARCH_MSM + config DEBUG_LL_INCLUDE string default "debug/8250.S" if DEBUG_LL_UART_8250 || DEBUG_UART_8250 default "debug/pl01x.S" if DEBUG_LL_UART_PL01X || DEBUG_UART_PL01X default "debug/exynos.S" if DEBUG_EXYNOS_UART + default "debug/efm32.S" if DEBUG_LL_UART_EFM32 default "debug/icedcc.S" if DEBUG_ICEDCC default "debug/imx.S" if DEBUG_IMX1_UART || \ DEBUG_IMX25_UART || \ @@ -895,11 +928,7 @@ config DEBUG_LL_INCLUDE DEBUG_IMX53_UART ||\ DEBUG_IMX6Q_UART || \ DEBUG_IMX6SL_UART - default "debug/msm.S" if DEBUG_MSM_UART1 || \ - DEBUG_MSM_UART2 || \ - DEBUG_MSM_UART3 || \ - DEBUG_MSM8660_UART || \ - DEBUG_MSM8960_UART + default "debug/msm.S" if DEBUG_MSM_UART default "debug/omap2plus.S" if DEBUG_OMAP2PLUS_UART default "debug/sirf.S" if DEBUG_SIRFPRIMA2_UART1 || DEBUG_SIRFMARCO_UART1 default "debug/sti.S" if DEBUG_STI_UART @@ -951,6 +980,7 @@ config DEBUG_UART_PHYS default 0x20064000 if DEBUG_RK29_UART1 || DEBUG_RK3X_UART2 default 0x20068000 if DEBUG_RK29_UART2 || DEBUG_RK3X_UART3 default 0x20201000 if DEBUG_BCM2835 + default 0x4000e400 if DEBUG_LL_UART_EFM32 default 0x40090000 if ARCH_LPC32XX default 0x40100000 if DEBUG_PXA_UART1 default 0x42000000 if ARCH_GEMINI @@ -981,6 +1011,7 @@ config DEBUG_UART_PHYS default 0xfff36000 if DEBUG_HIGHBANK_UART default 0xfffff700 if ARCH_IOP33X depends on DEBUG_LL_UART_8250 || DEBUG_LL_UART_PL01X || \ + DEBUG_LL_UART_EFM32 || \ DEBUG_UART_8250 || DEBUG_UART_PL01X config DEBUG_UART_VIRT diff --git a/arch/arm/Makefile b/arch/arm/Makefile index a37a50f575a2..25f45256f098 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -16,6 +16,7 @@ LDFLAGS := LDFLAGS_vmlinux :=-p --no-undefined -X ifeq ($(CONFIG_CPU_ENDIAN_BE8),y) LDFLAGS_vmlinux += --be8 +LDFLAGS_MODULE += --be8 endif OBJCOPYFLAGS :=-O binary -R .comment -S @@ -296,10 +297,15 @@ archprepare: # Convert bzImage to zImage bzImage: zImage -zImage Image xipImage bootpImage uImage: vmlinux +BOOT_TARGETS = zImage Image xipImage bootpImage uImage +INSTALL_TARGETS = zinstall uinstall install + +PHONY += bzImage $(BOOT_TARGETS) $(INSTALL_TARGETS) + +$(BOOT_TARGETS): vmlinux $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/$@ -zinstall uinstall install: vmlinux +$(INSTALL_TARGETS): $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $@ %.dtb: | scripts diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile index 84aa2caf07ed..ec2f8065f955 100644 --- a/arch/arm/boot/Makefile +++ b/arch/arm/boot/Makefile @@ -95,24 +95,24 @@ initrd: @test "$(INITRD)" != "" || \ (echo You must specify INITRD; exit -1) -install: $(obj)/Image - $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ +install: + $(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \ $(obj)/Image System.map "$(INSTALL_PATH)" -zinstall: $(obj)/zImage - $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ +zinstall: + $(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \ $(obj)/zImage System.map "$(INSTALL_PATH)" -uinstall: $(obj)/uImage - $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ +uinstall: + $(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \ $(obj)/uImage System.map "$(INSTALL_PATH)" zi: - $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ + $(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \ $(obj)/zImage System.map "$(INSTALL_PATH)" i: - $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ + $(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \ $(obj)/Image System.map "$(INSTALL_PATH)" subdir- := bootp compressed dts diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 75189f13cf54..066b03480b63 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -135,6 +135,7 @@ start: .word _edata @ zImage end address THUMB( .thumb ) 1: + ARM_BE8( setend be ) @ go BE8 if compiled for BE8 mrs r9, cpsr #ifdef CONFIG_ARM_VIRT_EXT bl __hyp_stub_install @ get into SVC mode, reversibly @@ -699,9 +700,7 @@ __armv4_mmu_cache_on: mrc p15, 0, r0, c1, c0, 0 @ read control reg orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement orr r0, r0, #0x0030 -#ifdef CONFIG_CPU_ENDIAN_BE8 - orr r0, r0, #1 << 25 @ big-endian page tables -#endif + ARM_BE8( orr r0, r0, #1 << 25 ) @ big-endian page tables bl __common_mmu_cache_on mov r0, #0 mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs @@ -728,9 +727,7 @@ __armv7_mmu_cache_on: orr r0, r0, #1 << 22 @ U (v6 unaligned access model) @ (needed for ARM1176) #ifdef CONFIG_MMU -#ifdef CONFIG_CPU_ENDIAN_BE8 - orr r0, r0, #1 << 25 @ big-endian page tables -#endif + ARM_BE8( orr r0, r0, #1 << 25 ) @ big-endian page tables mrcne p15, 0, r6, c2, c0, 2 @ read ttb control reg orrne r0, r0, #1 @ MMU enabled movne r1, #0xfffffffd @ domain 0 = client diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index cc0f1fb61753..802720e3e8fd 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -41,6 +41,8 @@ dtb-$(CONFIG_ARCH_AT91) += sama5d33ek.dtb dtb-$(CONFIG_ARCH_AT91) += sama5d34ek.dtb dtb-$(CONFIG_ARCH_AT91) += sama5d35ek.dtb +dtb-$(CONFIG_ARCH_ATLAS6) += atlas6-evb.dtb + dtb-$(CONFIG_ARCH_BCM2835) += bcm2835-rpi-b.dtb dtb-$(CONFIG_ARCH_BCM) += bcm11351-brt.dtb \ bcm28155-ap.dtb @@ -183,6 +185,7 @@ dtb-$(CONFIG_ARCH_OMAP2PLUS) += omap2420-h4.dtb \ am335x-evm.dtb \ am335x-evmsk.dtb \ am335x-bone.dtb \ + am335x-boneblack.dtb \ am3517-evm.dtb \ am3517_mt_ventoux.dtb \ am43x-epos-evm.dtb diff --git a/arch/arm/boot/dts/am335x-bone-common.dtsi b/arch/arm/boot/dts/am335x-bone-common.dtsi new file mode 100644 index 000000000000..2f66deda9f5c --- /dev/null +++ b/arch/arm/boot/dts/am335x-bone-common.dtsi @@ -0,0 +1,262 @@ +/* + * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/ { + model = "TI AM335x BeagleBone"; + compatible = "ti,am335x-bone", "ti,am33xx"; + + cpus { + cpu@0 { + cpu0-supply = <&dcdc2_reg>; + }; + }; + + memory { + device_type = "memory"; + reg = <0x80000000 0x10000000>; /* 256 MB */ + }; + + am33xx_pinmux: pinmux@44e10800 { + pinctrl-names = "default"; + pinctrl-0 = <&clkout2_pin>; + + user_leds_s0: user_leds_s0 { + pinctrl-single,pins = < + 0x54 (PIN_OUTPUT_PULLDOWN | MUX_MODE7) /* gpmc_a5.gpio1_21 */ + 0x58 (PIN_OUTPUT_PULLUP | MUX_MODE7) /* gpmc_a6.gpio1_22 */ + 0x5c (PIN_OUTPUT_PULLDOWN | MUX_MODE7) /* gpmc_a7.gpio1_23 */ + 0x60 (PIN_OUTPUT_PULLUP | MUX_MODE7) /* gpmc_a8.gpio1_24 */ + >; + }; + + i2c0_pins: pinmux_i2c0_pins { + pinctrl-single,pins = < + 0x188 (PIN_INPUT_PULLUP | MUX_MODE0) /* i2c0_sda.i2c0_sda */ + 0x18c (PIN_INPUT_PULLUP | MUX_MODE0) /* i2c0_scl.i2c0_scl */ + >; + }; + + uart0_pins: pinmux_uart0_pins { + pinctrl-single,pins = < + 0x170 (PIN_INPUT_PULLUP | MUX_MODE0) /* uart0_rxd.uart0_rxd */ + 0x174 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* uart0_txd.uart0_txd */ + >; + }; + + clkout2_pin: pinmux_clkout2_pin { + pinctrl-single,pins = < + 0x1b4 (PIN_OUTPUT_PULLDOWN | MUX_MODE3) /* xdma_event_intr1.clkout2 */ + >; + }; + + cpsw_default: cpsw_default { + pinctrl-single,pins = < + /* Slave 1 */ + 0x110 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxerr.mii1_rxerr */ + 0x114 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txen.mii1_txen */ + 0x118 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxdv.mii1_rxdv */ + 0x11c (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd3.mii1_txd3 */ + 0x120 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd2.mii1_txd2 */ + 0x124 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd1.mii1_txd1 */ + 0x128 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd0.mii1_txd0 */ + 0x12c (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_txclk.mii1_txclk */ + 0x130 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxclk.mii1_rxclk */ + 0x134 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd3.mii1_rxd3 */ + 0x138 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd2.mii1_rxd2 */ + 0x13c (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd1.mii1_rxd1 */ + 0x140 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd0.mii1_rxd0 */ + >; + }; + + cpsw_sleep: cpsw_sleep { + pinctrl-single,pins = < + /* Slave 1 reset value */ + 0x110 (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x114 (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x118 (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x11c (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x120 (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x124 (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x128 (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x12c (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x130 (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x134 (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x138 (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x13c (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x140 (PIN_INPUT_PULLDOWN | MUX_MODE7) + >; + }; + + davinci_mdio_default: davinci_mdio_default { + pinctrl-single,pins = < + /* MDIO */ + 0x148 (PIN_INPUT_PULLUP | SLEWCTRL_FAST | MUX_MODE0) /* mdio_data.mdio_data */ + 0x14c (PIN_OUTPUT_PULLUP | MUX_MODE0) /* mdio_clk.mdio_clk */ + >; + }; + + davinci_mdio_sleep: davinci_mdio_sleep { + pinctrl-single,pins = < + /* MDIO reset value */ + 0x148 (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x14c (PIN_INPUT_PULLDOWN | MUX_MODE7) + >; + }; + }; + + ocp { + uart0: serial@44e09000 { + pinctrl-names = "default"; + pinctrl-0 = <&uart0_pins>; + + status = "okay"; + }; + + musb: usb@47400000 { + status = "okay"; + + control@44e10000 { + status = "okay"; + }; + + usb-phy@47401300 { + status = "okay"; + }; + + usb-phy@47401b00 { + status = "okay"; + }; + + usb@47401000 { + status = "okay"; + }; + + usb@47401800 { + status = "okay"; + dr_mode = "host"; + }; + + dma-controller@07402000 { + status = "okay"; + }; + }; + + i2c0: i2c@44e0b000 { + pinctrl-names = "default"; + pinctrl-0 = <&i2c0_pins>; + + status = "okay"; + clock-frequency = <400000>; + + tps: tps@24 { + reg = <0x24>; + }; + + }; + }; + + leds { + pinctrl-names = "default"; + pinctrl-0 = <&user_leds_s0>; + + compatible = "gpio-leds"; + + led@2 { + label = "beaglebone:green:heartbeat"; + gpios = <&gpio1 21 GPIO_ACTIVE_HIGH>; + linux,default-trigger = "heartbeat"; + default-state = "off"; + }; + + led@3 { + label = "beaglebone:green:mmc0"; + gpios = <&gpio1 22 GPIO_ACTIVE_HIGH>; + linux,default-trigger = "mmc0"; + default-state = "off"; + }; + + led@4 { + label = "beaglebone:green:usr2"; + gpios = <&gpio1 23 GPIO_ACTIVE_HIGH>; + default-state = "off"; + }; + + led@5 { + label = "beaglebone:green:usr3"; + gpios = <&gpio1 24 GPIO_ACTIVE_HIGH>; + default-state = "off"; + }; + }; +}; + +/include/ "tps65217.dtsi" + +&tps { + regulators { + dcdc1_reg: regulator@0 { + regulator-always-on; + }; + + dcdc2_reg: regulator@1 { + /* VDD_MPU voltage limits 0.95V - 1.26V with +/-4% tolerance */ + regulator-name = "vdd_mpu"; + regulator-min-microvolt = <925000>; + regulator-max-microvolt = <1325000>; + regulator-boot-on; + regulator-always-on; + }; + + dcdc3_reg: regulator@2 { + /* VDD_CORE voltage limits 0.95V - 1.1V with +/-4% tolerance */ + regulator-name = "vdd_core"; + regulator-min-microvolt = <925000>; + regulator-max-microvolt = <1150000>; + regulator-boot-on; + regulator-always-on; + }; + + ldo1_reg: regulator@3 { + regulator-always-on; + }; + + ldo2_reg: regulator@4 { + regulator-always-on; + }; + + ldo3_reg: regulator@5 { + regulator-always-on; + }; + + ldo4_reg: regulator@6 { + regulator-always-on; + }; + }; +}; + +&cpsw_emac0 { + phy_id = <&davinci_mdio>, <0>; + phy-mode = "mii"; +}; + +&cpsw_emac1 { + phy_id = <&davinci_mdio>, <1>; + phy-mode = "mii"; +}; + +&mac { + pinctrl-names = "default", "sleep"; + pinctrl-0 = <&cpsw_default>; + pinctrl-1 = <&cpsw_sleep>; + +}; + +&davinci_mdio { + pinctrl-names = "default", "sleep"; + pinctrl-0 = <&davinci_mdio_default>; + pinctrl-1 = <&davinci_mdio_sleep>; +}; diff --git a/arch/arm/boot/dts/am335x-bone.dts b/arch/arm/boot/dts/am335x-bone.dts index d318987d44a1..7993c489982c 100644 --- a/arch/arm/boot/dts/am335x-bone.dts +++ b/arch/arm/boot/dts/am335x-bone.dts @@ -8,258 +8,4 @@ /dts-v1/; #include "am33xx.dtsi" - -/ { - model = "TI AM335x BeagleBone"; - compatible = "ti,am335x-bone", "ti,am33xx"; - - cpus { - cpu@0 { - cpu0-supply = <&dcdc2_reg>; - }; - }; - - memory { - device_type = "memory"; - reg = <0x80000000 0x10000000>; /* 256 MB */ - }; - - am33xx_pinmux: pinmux@44e10800 { - pinctrl-names = "default"; - pinctrl-0 = <&clkout2_pin>; - - user_leds_s0: user_leds_s0 { - pinctrl-single,pins = < - 0x54 (PIN_OUTPUT_PULLDOWN | MUX_MODE7) /* gpmc_a5.gpio1_21 */ - 0x58 (PIN_OUTPUT_PULLUP | MUX_MODE7) /* gpmc_a6.gpio1_22 */ - 0x5c (PIN_OUTPUT_PULLDOWN | MUX_MODE7) /* gpmc_a7.gpio1_23 */ - 0x60 (PIN_OUTPUT_PULLUP | MUX_MODE7) /* gpmc_a8.gpio1_24 */ - >; - }; - - i2c0_pins: pinmux_i2c0_pins { - pinctrl-single,pins = < - 0x188 (PIN_INPUT_PULLUP | MUX_MODE0) /* i2c0_sda.i2c0_sda */ - 0x18c (PIN_INPUT_PULLUP | MUX_MODE0) /* i2c0_scl.i2c0_scl */ - >; - }; - - uart0_pins: pinmux_uart0_pins { - pinctrl-single,pins = < - 0x170 (PIN_INPUT_PULLUP | MUX_MODE0) /* uart0_rxd.uart0_rxd */ - 0x174 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* uart0_txd.uart0_txd */ - >; - }; - - clkout2_pin: pinmux_clkout2_pin { - pinctrl-single,pins = < - 0x1b4 (PIN_OUTPUT_PULLDOWN | MUX_MODE3) /* xdma_event_intr1.clkout2 */ - >; - }; - - cpsw_default: cpsw_default { - pinctrl-single,pins = < - /* Slave 1 */ - 0x110 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxerr.mii1_rxerr */ - 0x114 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txen.mii1_txen */ - 0x118 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxdv.mii1_rxdv */ - 0x11c (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd3.mii1_txd3 */ - 0x120 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd2.mii1_txd2 */ - 0x124 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd1.mii1_txd1 */ - 0x128 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd0.mii1_txd0 */ - 0x12c (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_txclk.mii1_txclk */ - 0x130 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxclk.mii1_rxclk */ - 0x134 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd3.mii1_rxd3 */ - 0x138 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd2.mii1_rxd2 */ - 0x13c (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd1.mii1_rxd1 */ - 0x140 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd0.mii1_rxd0 */ - >; - }; - - cpsw_sleep: cpsw_sleep { - pinctrl-single,pins = < - /* Slave 1 reset value */ - 0x110 (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x114 (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x118 (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x11c (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x120 (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x124 (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x128 (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x12c (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x130 (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x134 (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x138 (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x13c (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x140 (PIN_INPUT_PULLDOWN | MUX_MODE7) - >; - }; - - davinci_mdio_default: davinci_mdio_default { - pinctrl-single,pins = < - /* MDIO */ - 0x148 (PIN_INPUT_PULLUP | SLEWCTRL_FAST | MUX_MODE0) /* mdio_data.mdio_data */ - 0x14c (PIN_OUTPUT_PULLUP | MUX_MODE0) /* mdio_clk.mdio_clk */ - >; - }; - - davinci_mdio_sleep: davinci_mdio_sleep { - pinctrl-single,pins = < - /* MDIO reset value */ - 0x148 (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x14c (PIN_INPUT_PULLDOWN | MUX_MODE7) - >; - }; - }; - - ocp { - uart0: serial@44e09000 { - pinctrl-names = "default"; - pinctrl-0 = <&uart0_pins>; - - status = "okay"; - }; - - musb: usb@47400000 { - status = "okay"; - - control@44e10000 { - status = "okay"; - }; - - usb-phy@47401300 { - status = "okay"; - }; - - usb-phy@47401b00 { - status = "okay"; - }; - - usb@47401000 { - status = "okay"; - }; - - usb@47401800 { - status = "okay"; - dr_mode = "host"; - }; - - dma-controller@07402000 { - status = "okay"; - }; - }; - - i2c0: i2c@44e0b000 { - pinctrl-names = "default"; - pinctrl-0 = <&i2c0_pins>; - - status = "okay"; - clock-frequency = <400000>; - - tps: tps@24 { - reg = <0x24>; - }; - - }; - }; - - leds { - pinctrl-names = "default"; - pinctrl-0 = <&user_leds_s0>; - - compatible = "gpio-leds"; - - led@2 { - label = "beaglebone:green:heartbeat"; - gpios = <&gpio1 21 GPIO_ACTIVE_HIGH>; - linux,default-trigger = "heartbeat"; - default-state = "off"; - }; - - led@3 { - label = "beaglebone:green:mmc0"; - gpios = <&gpio1 22 GPIO_ACTIVE_HIGH>; - linux,default-trigger = "mmc0"; - default-state = "off"; - }; - - led@4 { - label = "beaglebone:green:usr2"; - gpios = <&gpio1 23 GPIO_ACTIVE_HIGH>; - default-state = "off"; - }; - - led@5 { - label = "beaglebone:green:usr3"; - gpios = <&gpio1 24 GPIO_ACTIVE_HIGH>; - default-state = "off"; - }; - }; -}; - -/include/ "tps65217.dtsi" - -&tps { - regulators { - dcdc1_reg: regulator@0 { - regulator-always-on; - }; - - dcdc2_reg: regulator@1 { - /* VDD_MPU voltage limits 0.95V - 1.26V with +/-4% tolerance */ - regulator-name = "vdd_mpu"; - regulator-min-microvolt = <925000>; - regulator-max-microvolt = <1325000>; - regulator-boot-on; - regulator-always-on; - }; - - dcdc3_reg: regulator@2 { - /* VDD_CORE voltage limits 0.95V - 1.1V with +/-4% tolerance */ - regulator-name = "vdd_core"; - regulator-min-microvolt = <925000>; - regulator-max-microvolt = <1150000>; - regulator-boot-on; - regulator-always-on; - }; - - ldo1_reg: regulator@3 { - regulator-always-on; - }; - - ldo2_reg: regulator@4 { - regulator-always-on; - }; - - ldo3_reg: regulator@5 { - regulator-always-on; - }; - - ldo4_reg: regulator@6 { - regulator-always-on; - }; - }; -}; - -&cpsw_emac0 { - phy_id = <&davinci_mdio>, <0>; - phy-mode = "mii"; -}; - -&cpsw_emac1 { - phy_id = <&davinci_mdio>, <1>; - phy-mode = "mii"; -}; - -&mac { - pinctrl-names = "default", "sleep"; - pinctrl-0 = <&cpsw_default>; - pinctrl-1 = <&cpsw_sleep>; - -}; - -&davinci_mdio { - pinctrl-names = "default", "sleep"; - pinctrl-0 = <&davinci_mdio_default>; - pinctrl-1 = <&davinci_mdio_sleep>; -}; +#include "am335x-bone-common.dtsi" diff --git a/arch/arm/boot/dts/am335x-boneblack.dts b/arch/arm/boot/dts/am335x-boneblack.dts new file mode 100644 index 000000000000..197cadf72d2c --- /dev/null +++ b/arch/arm/boot/dts/am335x-boneblack.dts @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +/dts-v1/; + +#include "am33xx.dtsi" +#include "am335x-bone-common.dtsi" + +&ldo3_reg { + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + regulator-always-on; +}; diff --git a/arch/arm/boot/dts/armada-370-netgear-rn102.dts b/arch/arm/boot/dts/armada-370-netgear-rn102.dts index 05e4485a8225..8ac2ac1f69cc 100644 --- a/arch/arm/boot/dts/armada-370-netgear-rn102.dts +++ b/arch/arm/boot/dts/armada-370-netgear-rn102.dts @@ -27,6 +27,25 @@ }; soc { + ranges = <MBUS_ID(0xf0, 0x01) 0 0xd0000000 0x100000 + MBUS_ID(0x01, 0xe0) 0 0xfff00000 0x100000>; + + pcie-controller { + status = "okay"; + + /* Connected to Marvell SATA controller */ + pcie@1,0 { + /* Port 0, Lane 0 */ + status = "okay"; + }; + + /* Connected to FL1009 USB 3.0 controller */ + pcie@2,0 { + /* Port 1, Lane 0 */ + status = "okay"; + }; + }; + internal-regs { serial@12000 { clock-frequency = <200000000>; @@ -57,6 +76,11 @@ marvell,pins = "mpp56"; marvell,function = "gpio"; }; + + poweroff: poweroff { + marvell,pins = "mpp8"; + marvell,function = "gpio"; + }; }; mdio { @@ -89,22 +113,6 @@ pwm_polarity = <0>; }; }; - - pcie-controller { - status = "okay"; - - /* Connected to Marvell SATA controller */ - pcie@1,0 { - /* Port 0, Lane 0 */ - status = "okay"; - }; - - /* Connected to FL1009 USB 3.0 controller */ - pcie@2,0 { - /* Port 1, Lane 0 */ - status = "okay"; - }; - }; }; }; @@ -160,7 +168,7 @@ button@1 { label = "Power Button"; linux,code = <116>; /* KEY_POWER */ - gpios = <&gpio1 30 1>; + gpios = <&gpio1 30 0>; }; button@2 { @@ -176,4 +184,11 @@ }; }; + gpio_poweroff { + compatible = "gpio-poweroff"; + pinctrl-0 = <&poweroff>; + pinctrl-names = "default"; + gpios = <&gpio0 8 1>; + }; + }; diff --git a/arch/arm/boot/dts/armada-xp.dtsi b/arch/arm/boot/dts/armada-xp.dtsi index def125c0eeaa..3058522f5aad 100644 --- a/arch/arm/boot/dts/armada-xp.dtsi +++ b/arch/arm/boot/dts/armada-xp.dtsi @@ -70,6 +70,8 @@ timer@20300 { compatible = "marvell,armada-xp-timer"; + clocks = <&coreclk 2>, <&refclk>; + clock-names = "nbclk", "fixed"; }; coreclk: mvebu-sar@18230 { @@ -169,4 +171,13 @@ }; }; }; + + clocks { + /* 25 MHz reference crystal */ + refclk: oscillator { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <25000000>; + }; + }; }; diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi index cf78ac0b04b1..e74dc15efa9d 100644 --- a/arch/arm/boot/dts/at91sam9x5.dtsi +++ b/arch/arm/boot/dts/at91sam9x5.dtsi @@ -190,12 +190,12 @@ AT91_PIOA 8 AT91_PERIPH_A AT91_PINCTRL_NONE>; /* PA8 periph A */ }; - pinctrl_uart2_rts: uart2_rts-0 { + pinctrl_usart2_rts: usart2_rts-0 { atmel,pins = <AT91_PIOB 0 AT91_PERIPH_B AT91_PINCTRL_NONE>; /* PB0 periph B */ }; - pinctrl_uart2_cts: uart2_cts-0 { + pinctrl_usart2_cts: usart2_cts-0 { atmel,pins = <AT91_PIOB 1 AT91_PERIPH_B AT91_PINCTRL_NONE>; /* PB1 periph B */ }; @@ -556,6 +556,7 @@ interrupts = <12 IRQ_TYPE_LEVEL_HIGH 0>; dmas = <&dma0 1 AT91_DMA_CFG_PER_ID(0)>; dma-names = "rxtx"; + pinctrl-names = "default"; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -567,6 +568,7 @@ interrupts = <26 IRQ_TYPE_LEVEL_HIGH 0>; dmas = <&dma1 1 AT91_DMA_CFG_PER_ID(0)>; dma-names = "rxtx"; + pinctrl-names = "default"; #address-cells = <1>; #size-cells = <0>; status = "disabled"; diff --git a/arch/arm/boot/dts/atlas6.dtsi b/arch/arm/boot/dts/atlas6.dtsi index 8678e0c11119..6db4f81d4795 100644 --- a/arch/arm/boot/dts/atlas6.dtsi +++ b/arch/arm/boot/dts/atlas6.dtsi @@ -181,6 +181,8 @@ interrupts = <17>; fifosize = <128>; clocks = <&clks 13>; + sirf,uart-dma-rx-channel = <21>; + sirf,uart-dma-tx-channel = <2>; }; uart1: uart@b0060000 { @@ -199,6 +201,8 @@ interrupts = <19>; fifosize = <128>; clocks = <&clks 15>; + sirf,uart-dma-rx-channel = <6>; + sirf,uart-dma-tx-channel = <7>; }; usp0: usp@b0080000 { @@ -206,7 +210,10 @@ compatible = "sirf,prima2-usp"; reg = <0xb0080000 0x10000>; interrupts = <20>; + fifosize = <128>; clocks = <&clks 28>; + sirf,usp-dma-rx-channel = <17>; + sirf,usp-dma-tx-channel = <18>; }; usp1: usp@b0090000 { @@ -214,7 +221,10 @@ compatible = "sirf,prima2-usp"; reg = <0xb0090000 0x10000>; interrupts = <21>; + fifosize = <128>; clocks = <&clks 29>; + sirf,usp-dma-rx-channel = <14>; + sirf,usp-dma-tx-channel = <15>; }; dmac0: dma-controller@b00b0000 { @@ -237,6 +247,8 @@ compatible = "sirf,prima2-vip"; reg = <0xb00C0000 0x10000>; clocks = <&clks 31>; + interrupts = <14>; + sirf,vip-dma-rx-channel = <16>; }; spi0: spi@b00d0000 { diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi index 7d7cc777ff7b..bbac42a78ce5 100644 --- a/arch/arm/boot/dts/exynos5250.dtsi +++ b/arch/arm/boot/dts/exynos5250.dtsi @@ -96,6 +96,11 @@ <1 14 0xf08>, <1 11 0xf08>, <1 10 0xf08>; + /* Unfortunately we need this since some versions of U-Boot + * on Exynos don't set the CNTFRQ register, so we need the + * value from DT. + */ + clock-frequency = <24000000>; }; mct@101C0000 { diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi index c037c223619a..b7a1c6d950b9 100644 --- a/arch/arm/boot/dts/imx27.dtsi +++ b/arch/arm/boot/dts/imx27.dtsi @@ -187,7 +187,7 @@ compatible = "fsl,imx27-cspi"; reg = <0x1000e000 0x1000>; interrupts = <16>; - clocks = <&clks 53>, <&clks 53>; + clocks = <&clks 53>, <&clks 60>; clock-names = "ipg", "per"; status = "disabled"; }; @@ -198,7 +198,7 @@ compatible = "fsl,imx27-cspi"; reg = <0x1000f000 0x1000>; interrupts = <15>; - clocks = <&clks 52>, <&clks 52>; + clocks = <&clks 52>, <&clks 60>; clock-names = "ipg", "per"; status = "disabled"; }; @@ -309,7 +309,7 @@ compatible = "fsl,imx27-cspi"; reg = <0x10017000 0x1000>; interrupts = <6>; - clocks = <&clks 51>, <&clks 51>; + clocks = <&clks 51>, <&clks 60>; clock-names = "ipg", "per"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/imx51.dtsi b/arch/arm/boot/dts/imx51.dtsi index a85abb424c34..54cee6517902 100644 --- a/arch/arm/boot/dts/imx51.dtsi +++ b/arch/arm/boot/dts/imx51.dtsi @@ -474,7 +474,7 @@ compatible = "fsl,imx51-pata", "fsl,imx27-pata"; reg = <0x83fe0000 0x4000>; interrupts = <70>; - clocks = <&clks 161>; + clocks = <&clks 172>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/imx6q-pinfunc.h b/arch/arm/boot/dts/imx6q-pinfunc.h index c0e38a45e4bb..9bbe82bdee41 100644 --- a/arch/arm/boot/dts/imx6q-pinfunc.h +++ b/arch/arm/boot/dts/imx6q-pinfunc.h @@ -207,8 +207,8 @@ #define MX6QDL_PAD_EIM_D29__ECSPI4_SS0 0x0c8 0x3dc 0x824 0x2 0x1 #define MX6QDL_PAD_EIM_D29__UART2_RTS_B 0x0c8 0x3dc 0x924 0x4 0x1 #define MX6QDL_PAD_EIM_D29__UART2_CTS_B 0x0c8 0x3dc 0x000 0x4 0x0 -#define MX6QDL_PAD_EIM_D29__UART2_DTE_RTS_B 0x0c4 0x3dc 0x000 0x4 0x0 -#define MX6QDL_PAD_EIM_D29__UART2_DTE_CTS_B 0x0c4 0x3dc 0x924 0x4 0x1 +#define MX6QDL_PAD_EIM_D29__UART2_DTE_RTS_B 0x0c8 0x3dc 0x000 0x4 0x0 +#define MX6QDL_PAD_EIM_D29__UART2_DTE_CTS_B 0x0c8 0x3dc 0x924 0x4 0x1 #define MX6QDL_PAD_EIM_D29__GPIO3_IO29 0x0c8 0x3dc 0x000 0x5 0x0 #define MX6QDL_PAD_EIM_D29__IPU2_CSI1_VSYNC 0x0c8 0x3dc 0x8e4 0x6 0x0 #define MX6QDL_PAD_EIM_D29__IPU1_DI0_PIN14 0x0c8 0x3dc 0x000 0x7 0x0 diff --git a/arch/arm/boot/dts/integratorcp.dts b/arch/arm/boot/dts/integratorcp.dts index ff1aea0ee043..72693a69f830 100644 --- a/arch/arm/boot/dts/integratorcp.dts +++ b/arch/arm/boot/dts/integratorcp.dts @@ -9,11 +9,6 @@ model = "ARM Integrator/CP"; compatible = "arm,integrator-cp"; - aliases { - arm,timer-primary = &timer2; - arm,timer-secondary = &timer1; - }; - chosen { bootargs = "root=/dev/ram0 console=ttyAMA0,38400n8 earlyprintk"; }; @@ -24,14 +19,18 @@ }; timer0: timer@13000000 { + /* TIMER0 runs @ 25MHz */ compatible = "arm,integrator-cp-timer"; + status = "disabled"; }; timer1: timer@13000100 { + /* TIMER1 runs @ 1MHz */ compatible = "arm,integrator-cp-timer"; }; timer2: timer@13000200 { + /* TIMER2 runs @ 1MHz */ compatible = "arm,integrator-cp-timer"; }; diff --git a/arch/arm/boot/dts/kirkwood.dtsi b/arch/arm/boot/dts/kirkwood.dtsi index cf7aeaf89e9c..1335b2e1bed4 100644 --- a/arch/arm/boot/dts/kirkwood.dtsi +++ b/arch/arm/boot/dts/kirkwood.dtsi @@ -13,6 +13,7 @@ cpu@0 { device_type = "cpu"; compatible = "marvell,feroceon"; + reg = <0>; clocks = <&core_clk 1>, <&core_clk 3>, <&gate_clk 11>; clock-names = "cpu_clk", "ddrclk", "powersave"; }; @@ -167,7 +168,7 @@ xor@60900 { compatible = "marvell,orion-xor"; reg = <0x60900 0x100 - 0xd0B00 0x100>; + 0x60B00 0x100>; status = "okay"; clocks = <&gate_clk 16>; diff --git a/arch/arm/boot/dts/omap3-beagle-xm.dts b/arch/arm/boot/dts/omap3-beagle-xm.dts index afdb16417d4e..2816bf612672 100644 --- a/arch/arm/boot/dts/omap3-beagle-xm.dts +++ b/arch/arm/boot/dts/omap3-beagle-xm.dts @@ -11,7 +11,7 @@ / { model = "TI OMAP3 BeagleBoard xM"; - compatible = "ti,omap3-beagle-xm, ti,omap3-beagle", "ti,omap3"; + compatible = "ti,omap3-beagle-xm", "ti,omap36xx", "ti,omap3"; cpus { cpu@0 { diff --git a/arch/arm/boot/dts/omap3-igep.dtsi b/arch/arm/boot/dts/omap3-igep.dtsi index bc48b114eae6..2326d11462a5 100644 --- a/arch/arm/boot/dts/omap3-igep.dtsi +++ b/arch/arm/boot/dts/omap3-igep.dtsi @@ -48,6 +48,15 @@ >; }; + mcbsp2_pins: pinmux_mcbsp2_pins { + pinctrl-single,pins = < + 0x10c (PIN_INPUT | MUX_MODE0) /* mcbsp2_fsx.mcbsp2_fsx */ + 0x10e (PIN_INPUT | MUX_MODE0) /* mcbsp2_clkx.mcbsp2_clkx */ + 0x110 (PIN_INPUT | MUX_MODE0) /* mcbsp2_dr.mcbsp2.dr */ + 0x112 (PIN_OUTPUT | MUX_MODE0) /* mcbsp2_dx.mcbsp2_dx */ + >; + }; + mmc1_pins: pinmux_mmc1_pins { pinctrl-single,pins = < 0x114 (PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc1_clk.sdmmc1_clk */ @@ -93,6 +102,11 @@ clock-frequency = <400000>; }; +&mcbsp2 { + pinctrl-names = "default"; + pinctrl-0 = <&mcbsp2_pins>; +}; + &mmc1 { pinctrl-names = "default"; pinctrl-0 = <&mmc1_pins>; diff --git a/arch/arm/boot/dts/omap3.dtsi b/arch/arm/boot/dts/omap3.dtsi index 7d95cda1fae4..b41bd57f4328 100644 --- a/arch/arm/boot/dts/omap3.dtsi +++ b/arch/arm/boot/dts/omap3.dtsi @@ -108,7 +108,7 @@ #address-cells = <1>; #size-cells = <0>; pinctrl-single,register-width = <16>; - pinctrl-single,function-mask = <0x7f1f>; + pinctrl-single,function-mask = <0xff1f>; }; omap3_pmx_wkup: pinmux@0x48002a00 { @@ -117,7 +117,7 @@ #address-cells = <1>; #size-cells = <0>; pinctrl-single,register-width = <16>; - pinctrl-single,function-mask = <0x7f1f>; + pinctrl-single,function-mask = <0xff1f>; }; gpio1: gpio@48310000 { diff --git a/arch/arm/boot/dts/omap4-panda-common.dtsi b/arch/arm/boot/dts/omap4-panda-common.dtsi index faa95b5b242e..814ab67c8c29 100644 --- a/arch/arm/boot/dts/omap4-panda-common.dtsi +++ b/arch/arm/boot/dts/omap4-panda-common.dtsi @@ -107,6 +107,19 @@ */ clock-frequency = <19200000>; }; + + /* regulator for wl12xx on sdio5 */ + wl12xx_vmmc: wl12xx_vmmc { + pinctrl-names = "default"; + pinctrl-0 = <&wl12xx_gpio>; + compatible = "regulator-fixed"; + regulator-name = "vwl1271"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + gpio = <&gpio2 11 0>; + startup-delay-us = <70000>; + enable-active-high; + }; }; &omap4_pmx_wkup { @@ -235,6 +248,33 @@ 0x1c (PIN_OUTPUT | MUX_MODE3) /* gpio_wk8 */ >; }; + + /* + * wl12xx GPIO outputs for WLAN_EN, BT_EN, FM_EN, BT_WAKEUP + * REVISIT: Are the pull-ups needed for GPIO 48 and 49? + */ + wl12xx_gpio: pinmux_wl12xx_gpio { + pinctrl-single,pins = < + 0x26 (PIN_OUTPUT | MUX_MODE3) /* gpmc_a19.gpio_43 */ + 0x2c (PIN_OUTPUT | MUX_MODE3) /* gpmc_a22.gpio_46 */ + 0x30 (PIN_OUTPUT_PULLUP | MUX_MODE3) /* gpmc_a24.gpio_48 */ + 0x32 (PIN_OUTPUT_PULLUP | MUX_MODE3) /* gpmc_a25.gpio_49 */ + >; + }; + + /* wl12xx GPIO inputs and SDIO pins */ + wl12xx_pins: pinmux_wl12xx_pins { + pinctrl-single,pins = < + 0x38 (PIN_INPUT | MUX_MODE3) /* gpmc_ncs2.gpio_52 */ + 0x3a (PIN_INPUT | MUX_MODE3) /* gpmc_ncs3.gpio_53 */ + 0x108 (PIN_OUTPUT | MUX_MODE0) /* sdmmc5_clk.sdmmc5_clk */ + 0x10a (PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc5_cmd.sdmmc5_cmd */ + 0x10c (PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc5_dat0.sdmmc5_dat0 */ + 0x10e (PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc5_dat1.sdmmc5_dat1 */ + 0x110 (PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc5_dat2.sdmmc5_dat2 */ + 0x112 (PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc5_dat3.sdmmc5_dat3 */ + >; + }; }; &i2c1 { @@ -314,8 +354,12 @@ }; &mmc5 { - ti,non-removable; + pinctrl-names = "default"; + pinctrl-0 = <&wl12xx_pins>; + vmmc-supply = <&wl12xx_vmmc>; + non-removable; bus-width = <4>; + cap-power-off-card; }; &emif1 { diff --git a/arch/arm/boot/dts/omap4-sdp.dts b/arch/arm/boot/dts/omap4-sdp.dts index 7951b4ea500a..4f78380ecdb8 100644 --- a/arch/arm/boot/dts/omap4-sdp.dts +++ b/arch/arm/boot/dts/omap4-sdp.dts @@ -140,6 +140,19 @@ "DMic", "Digital Mic", "Digital Mic", "Digital Mic1 Bias"; }; + + /* regulator for wl12xx on sdio5 */ + wl12xx_vmmc: wl12xx_vmmc { + pinctrl-names = "default"; + pinctrl-0 = <&wl12xx_gpio>; + compatible = "regulator-fixed"; + regulator-name = "vwl1271"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + gpio = <&gpio2 22 0>; + startup-delay-us = <70000>; + enable-active-high; + }; }; &omap4_pmx_wkup { @@ -295,6 +308,26 @@ 0xf0 (PIN_INPUT_PULLUP | MUX_MODE0) /* i2c4_sda */ >; }; + + /* wl12xx GPIO output for WLAN_EN */ + wl12xx_gpio: pinmux_wl12xx_gpio { + pinctrl-single,pins = < + 0x3c (PIN_OUTPUT | MUX_MODE3) /* gpmc_nwp.gpio_54 */ + >; + }; + + /* wl12xx GPIO inputs and SDIO pins */ + wl12xx_pins: pinmux_wl12xx_pins { + pinctrl-single,pins = < + 0x3a (PIN_INPUT | MUX_MODE3) /* gpmc_ncs3.gpio_53 */ + 0x108 (PIN_OUTPUT | MUX_MODE3) /* sdmmc5_clk.sdmmc5_clk */ + 0x10a (PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc5_cmd.sdmmc5_cmd */ + 0x10c (PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc5_dat0.sdmmc5_dat0 */ + 0x10e (PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc5_dat1.sdmmc5_dat1 */ + 0x110 (PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc5_dat2.sdmmc5_dat2 */ + 0x112 (PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc5_dat3.sdmmc5_dat3 */ + >; + }; }; &i2c1 { @@ -420,8 +453,12 @@ }; &mmc5 { + pinctrl-names = "default"; + pinctrl-0 = <&wl12xx_pins>; + vmmc-supply = <&wl12xx_vmmc>; + non-removable; bus-width = <4>; - ti,non-removable; + cap-power-off-card; }; &emif1 { diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi index 07be2cd7b318..7cdea1bfea09 100644 --- a/arch/arm/boot/dts/omap5.dtsi +++ b/arch/arm/boot/dts/omap5.dtsi @@ -637,7 +637,7 @@ omap_dwc3@4a020000 { compatible = "ti,dwc3"; ti,hwmods = "usb_otg_ss"; - reg = <0x4a020000 0x1000>; + reg = <0x4a020000 0x10000>; interrupts = <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>; #address-cells = <1>; #size-cells = <1>; @@ -645,17 +645,18 @@ ranges; dwc3@4a030000 { compatible = "snps,dwc3"; - reg = <0x4a030000 0x1000>; + reg = <0x4a030000 0x10000>; interrupts = <GIC_SPI 92 IRQ_TYPE_LEVEL_HIGH>; usb-phy = <&usb2_phy>, <&usb3_phy>; tx-fifo-resize; }; }; - ocp2scp { + ocp2scp@4a080000 { compatible = "ti,omap-ocp2scp"; #address-cells = <1>; #size-cells = <1>; + reg = <0x4a080000 0x20>; ranges; ti,hwmods = "ocp2scp1"; usb2_phy: usb2phy@4a084000 { diff --git a/arch/arm/boot/dts/prima2.dtsi b/arch/arm/boot/dts/prima2.dtsi index bbeb623fc2c6..27ed9f5144bc 100644 --- a/arch/arm/boot/dts/prima2.dtsi +++ b/arch/arm/boot/dts/prima2.dtsi @@ -171,7 +171,8 @@ compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; - ranges = <0xb0000000 0xb0000000 0x180000>; + ranges = <0xb0000000 0xb0000000 0x180000>, + <0x56000000 0x56000000 0x1b00000>; timer@b0020000 { compatible = "sirf,prima2-tick"; @@ -196,25 +197,32 @@ uart0: uart@b0050000 { cell-index = <0>; compatible = "sirf,prima2-uart"; - reg = <0xb0050000 0x10000>; + reg = <0xb0050000 0x1000>; interrupts = <17>; + fifosize = <128>; clocks = <&clks 13>; + sirf,uart-dma-rx-channel = <21>; + sirf,uart-dma-tx-channel = <2>; }; uart1: uart@b0060000 { cell-index = <1>; compatible = "sirf,prima2-uart"; - reg = <0xb0060000 0x10000>; + reg = <0xb0060000 0x1000>; interrupts = <18>; + fifosize = <32>; clocks = <&clks 14>; }; uart2: uart@b0070000 { cell-index = <2>; compatible = "sirf,prima2-uart"; - reg = <0xb0070000 0x10000>; + reg = <0xb0070000 0x1000>; interrupts = <19>; + fifosize = <128>; clocks = <&clks 15>; + sirf,uart-dma-rx-channel = <6>; + sirf,uart-dma-tx-channel = <7>; }; usp0: usp@b0080000 { @@ -222,7 +230,10 @@ compatible = "sirf,prima2-usp"; reg = <0xb0080000 0x10000>; interrupts = <20>; + fifosize = <128>; clocks = <&clks 28>; + sirf,usp-dma-rx-channel = <17>; + sirf,usp-dma-tx-channel = <18>; }; usp1: usp@b0090000 { @@ -230,7 +241,10 @@ compatible = "sirf,prima2-usp"; reg = <0xb0090000 0x10000>; interrupts = <21>; + fifosize = <128>; clocks = <&clks 29>; + sirf,usp-dma-rx-channel = <14>; + sirf,usp-dma-tx-channel = <15>; }; usp2: usp@b00a0000 { @@ -238,7 +252,10 @@ compatible = "sirf,prima2-usp"; reg = <0xb00a0000 0x10000>; interrupts = <22>; + fifosize = <128>; clocks = <&clks 30>; + sirf,usp-dma-rx-channel = <10>; + sirf,usp-dma-tx-channel = <11>; }; dmac0: dma-controller@b00b0000 { @@ -261,6 +278,8 @@ compatible = "sirf,prima2-vip"; reg = <0xb00C0000 0x10000>; clocks = <&clks 31>; + interrupts = <14>; + sirf,vip-dma-rx-channel = <16>; }; spi0: spi@b00d0000 { diff --git a/arch/arm/boot/dts/r8a73a4.dtsi b/arch/arm/boot/dts/r8a73a4.dtsi index 6c26caa880f2..658fcc537576 100644 --- a/arch/arm/boot/dts/r8a73a4.dtsi +++ b/arch/arm/boot/dts/r8a73a4.dtsi @@ -193,7 +193,7 @@ }; sdhi0: sdhi@ee100000 { - compatible = "renesas,r8a73a4-sdhi"; + compatible = "renesas,sdhi-r8a73a4"; reg = <0 0xee100000 0 0x100>; interrupt-parent = <&gic>; interrupts = <0 165 4>; @@ -202,7 +202,7 @@ }; sdhi1: sdhi@ee120000 { - compatible = "renesas,r8a73a4-sdhi"; + compatible = "renesas,sdhi-r8a73a4"; reg = <0 0xee120000 0 0x100>; interrupt-parent = <&gic>; interrupts = <0 166 4>; @@ -211,7 +211,7 @@ }; sdhi2: sdhi@ee140000 { - compatible = "renesas,r8a73a4-sdhi"; + compatible = "renesas,sdhi-r8a73a4"; reg = <0 0xee140000 0 0x100>; interrupt-parent = <&gic>; interrupts = <0 167 4>; diff --git a/arch/arm/boot/dts/r8a7778.dtsi b/arch/arm/boot/dts/r8a7778.dtsi index 45ac404ab6d8..3577aba82583 100644 --- a/arch/arm/boot/dts/r8a7778.dtsi +++ b/arch/arm/boot/dts/r8a7778.dtsi @@ -96,6 +96,5 @@ pfc: pfc@fffc0000 { compatible = "renesas,pfc-r8a7778"; reg = <0xfffc000 0x118>; - #gpio-range-cells = <3>; }; }; diff --git a/arch/arm/boot/dts/r8a7779.dtsi b/arch/arm/boot/dts/r8a7779.dtsi index 23a62447359c..ebbe507fcbfa 100644 --- a/arch/arm/boot/dts/r8a7779.dtsi +++ b/arch/arm/boot/dts/r8a7779.dtsi @@ -188,7 +188,6 @@ pfc: pfc@fffc0000 { compatible = "renesas,pfc-r8a7779"; reg = <0xfffc0000 0x23c>; - #gpio-range-cells = <3>; }; thermal@ffc48000 { diff --git a/arch/arm/boot/dts/r8a7790.dtsi b/arch/arm/boot/dts/r8a7790.dtsi index 3b879e7c697c..413b4c29e782 100644 --- a/arch/arm/boot/dts/r8a7790.dtsi +++ b/arch/arm/boot/dts/r8a7790.dtsi @@ -148,11 +148,10 @@ pfc: pfc@e6060000 { compatible = "renesas,pfc-r8a7790"; reg = <0 0xe6060000 0 0x250>; - #gpio-range-cells = <3>; }; sdhi0: sdhi@ee100000 { - compatible = "renesas,r8a7790-sdhi"; + compatible = "renesas,sdhi-r8a7790"; reg = <0 0xee100000 0 0x100>; interrupt-parent = <&gic>; interrupts = <0 165 4>; @@ -161,7 +160,7 @@ }; sdhi1: sdhi@ee120000 { - compatible = "renesas,r8a7790-sdhi"; + compatible = "renesas,sdhi-r8a7790"; reg = <0 0xee120000 0 0x100>; interrupt-parent = <&gic>; interrupts = <0 166 4>; @@ -170,7 +169,7 @@ }; sdhi2: sdhi@ee140000 { - compatible = "renesas,r8a7790-sdhi"; + compatible = "renesas,sdhi-r8a7790"; reg = <0 0xee140000 0 0x100>; interrupt-parent = <&gic>; interrupts = <0 167 4>; @@ -179,7 +178,7 @@ }; sdhi3: sdhi@ee160000 { - compatible = "renesas,r8a7790-sdhi"; + compatible = "renesas,sdhi-r8a7790"; reg = <0 0xee160000 0 0x100>; interrupt-parent = <&gic>; interrupts = <0 168 4>; diff --git a/arch/arm/boot/dts/sh73a0.dtsi b/arch/arm/boot/dts/sh73a0.dtsi index ba59a5875a10..3955c7606a6f 100644 --- a/arch/arm/boot/dts/sh73a0.dtsi +++ b/arch/arm/boot/dts/sh73a0.dtsi @@ -196,7 +196,7 @@ }; sdhi0: sdhi@ee100000 { - compatible = "renesas,r8a7740-sdhi"; + compatible = "renesas,sdhi-r8a7740"; reg = <0xee100000 0x100>; interrupt-parent = <&gic>; interrupts = <0 83 4 @@ -208,7 +208,7 @@ /* SDHI1 and SDHI2 have no CD pins, no need for CD IRQ */ sdhi1: sdhi@ee120000 { - compatible = "renesas,r8a7740-sdhi"; + compatible = "renesas,sdhi-r8a7740"; reg = <0xee120000 0x100>; interrupt-parent = <&gic>; interrupts = <0 88 4 @@ -219,7 +219,7 @@ }; sdhi2: sdhi@ee140000 { - compatible = "renesas,r8a7740-sdhi"; + compatible = "renesas,sdhi-r8a7740"; reg = <0xee140000 0x100>; interrupt-parent = <&gic>; interrupts = <0 104 4 diff --git a/arch/arm/boot/install.sh b/arch/arm/boot/install.sh index 06ea7d42ce8e..2a45092a40e3 100644 --- a/arch/arm/boot/install.sh +++ b/arch/arm/boot/install.sh @@ -20,6 +20,20 @@ # $4 - default install path (blank if root directory) # +verify () { + if [ ! -f "$1" ]; then + echo "" 1>&2 + echo " *** Missing file: $1" 1>&2 + echo ' *** You need to run "make" before "make install".' 1>&2 + echo "" 1>&2 + exit 1 + fi +} + +# Make sure the files actually exist +verify "$2" +verify "$3" + # User may have a custom install script if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index 8c60f473e976..5c8584c4944d 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile @@ -17,3 +17,5 @@ obj-$(CONFIG_MCPM) += mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o AFLAGS_mcpm_head.o := -march=armv7-a AFLAGS_vlock.o := -march=armv7-a obj-$(CONFIG_TI_PRIV_EDMA) += edma.o +obj-$(CONFIG_BL_SWITCHER) += bL_switcher.o +obj-$(CONFIG_BL_SWITCHER_DUMMY_IF) += bL_switcher_dummy_if.o diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c new file mode 100644 index 000000000000..5774b6ea7ad5 --- /dev/null +++ b/arch/arm/common/bL_switcher.c @@ -0,0 +1,822 @@ +/* + * arch/arm/common/bL_switcher.c -- big.LITTLE cluster switcher core driver + * + * Created by: Nicolas Pitre, March 2012 + * Copyright: (C) 2012-2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/atomic.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/cpu_pm.h> +#include <linux/cpu.h> +#include <linux/cpumask.h> +#include <linux/kthread.h> +#include <linux/wait.h> +#include <linux/time.h> +#include <linux/clockchips.h> +#include <linux/hrtimer.h> +#include <linux/tick.h> +#include <linux/notifier.h> +#include <linux/mm.h> +#include <linux/mutex.h> +#include <linux/smp.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/sysfs.h> +#include <linux/irqchip/arm-gic.h> +#include <linux/moduleparam.h> + +#include <asm/smp_plat.h> +#include <asm/cputype.h> +#include <asm/suspend.h> +#include <asm/mcpm.h> +#include <asm/bL_switcher.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/power_cpu_migrate.h> + + +/* + * Use our own MPIDR accessors as the generic ones in asm/cputype.h have + * __attribute_const__ and we don't want the compiler to assume any + * constness here as the value _does_ change along some code paths. + */ + +static int read_mpidr(void) +{ + unsigned int id; + asm volatile ("mrc p15, 0, %0, c0, c0, 5" : "=r" (id)); + return id & MPIDR_HWID_BITMASK; +} + +/* + * Get a global nanosecond time stamp for tracing. + */ +static s64 get_ns(void) +{ + struct timespec ts; + getnstimeofday(&ts); + return timespec_to_ns(&ts); +} + +/* + * bL switcher core code. + */ + +static void bL_do_switch(void *_arg) +{ + unsigned ib_mpidr, ib_cpu, ib_cluster; + long volatile handshake, **handshake_ptr = _arg; + + pr_debug("%s\n", __func__); + + ib_mpidr = cpu_logical_map(smp_processor_id()); + ib_cpu = MPIDR_AFFINITY_LEVEL(ib_mpidr, 0); + ib_cluster = MPIDR_AFFINITY_LEVEL(ib_mpidr, 1); + + /* Advertise our handshake location */ + if (handshake_ptr) { + handshake = 0; + *handshake_ptr = &handshake; + } else + handshake = -1; + + /* + * Our state has been saved at this point. Let's release our + * inbound CPU. + */ + mcpm_set_entry_vector(ib_cpu, ib_cluster, cpu_resume); + sev(); + + /* + * From this point, we must assume that our counterpart CPU might + * have taken over in its parallel world already, as if execution + * just returned from cpu_suspend(). It is therefore important to + * be very careful not to make any change the other guy is not + * expecting. This is why we need stack isolation. + * + * Fancy under cover tasks could be performed here. For now + * we have none. + */ + + /* + * Let's wait until our inbound is alive. + */ + while (!handshake) { + wfe(); + smp_mb(); + } + + /* Let's put ourself down. */ + mcpm_cpu_power_down(); + + /* should never get here */ + BUG(); +} + +/* + * Stack isolation. To ensure 'current' remains valid, we just use another + * piece of our thread's stack space which should be fairly lightly used. + * The selected area starts just above the thread_info structure located + * at the very bottom of the stack, aligned to a cache line, and indexed + * with the cluster number. + */ +#define STACK_SIZE 512 +extern void call_with_stack(void (*fn)(void *), void *arg, void *sp); +static int bL_switchpoint(unsigned long _arg) +{ + unsigned int mpidr = read_mpidr(); + unsigned int clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1); + void *stack = current_thread_info() + 1; + stack = PTR_ALIGN(stack, L1_CACHE_BYTES); + stack += clusterid * STACK_SIZE + STACK_SIZE; + call_with_stack(bL_do_switch, (void *)_arg, stack); + BUG(); +} + +/* + * Generic switcher interface + */ + +static unsigned int bL_gic_id[MAX_CPUS_PER_CLUSTER][MAX_NR_CLUSTERS]; +static int bL_switcher_cpu_pairing[NR_CPUS]; + +/* + * bL_switch_to - Switch to a specific cluster for the current CPU + * @new_cluster_id: the ID of the cluster to switch to. + * + * This function must be called on the CPU to be switched. + * Returns 0 on success, else a negative status code. + */ +static int bL_switch_to(unsigned int new_cluster_id) +{ + unsigned int mpidr, this_cpu, that_cpu; + unsigned int ob_mpidr, ob_cpu, ob_cluster, ib_mpidr, ib_cpu, ib_cluster; + struct completion inbound_alive; + struct tick_device *tdev; + enum clock_event_mode tdev_mode; + long volatile *handshake_ptr; + int ipi_nr, ret; + + this_cpu = smp_processor_id(); + ob_mpidr = read_mpidr(); + ob_cpu = MPIDR_AFFINITY_LEVEL(ob_mpidr, 0); + ob_cluster = MPIDR_AFFINITY_LEVEL(ob_mpidr, 1); + BUG_ON(cpu_logical_map(this_cpu) != ob_mpidr); + + if (new_cluster_id == ob_cluster) + return 0; + + that_cpu = bL_switcher_cpu_pairing[this_cpu]; + ib_mpidr = cpu_logical_map(that_cpu); + ib_cpu = MPIDR_AFFINITY_LEVEL(ib_mpidr, 0); + ib_cluster = MPIDR_AFFINITY_LEVEL(ib_mpidr, 1); + + pr_debug("before switch: CPU %d MPIDR %#x -> %#x\n", + this_cpu, ob_mpidr, ib_mpidr); + + this_cpu = smp_processor_id(); + + /* Close the gate for our entry vectors */ + mcpm_set_entry_vector(ob_cpu, ob_cluster, NULL); + mcpm_set_entry_vector(ib_cpu, ib_cluster, NULL); + + /* Install our "inbound alive" notifier. */ + init_completion(&inbound_alive); + ipi_nr = register_ipi_completion(&inbound_alive, this_cpu); + ipi_nr |= ((1 << 16) << bL_gic_id[ob_cpu][ob_cluster]); + mcpm_set_early_poke(ib_cpu, ib_cluster, gic_get_sgir_physaddr(), ipi_nr); + + /* + * Let's wake up the inbound CPU now in case it requires some delay + * to come online, but leave it gated in our entry vector code. + */ + ret = mcpm_cpu_power_up(ib_cpu, ib_cluster); + if (ret) { + pr_err("%s: mcpm_cpu_power_up() returned %d\n", __func__, ret); + return ret; + } + + /* + * Raise a SGI on the inbound CPU to make sure it doesn't stall + * in a possible WFI, such as in bL_power_down(). + */ + gic_send_sgi(bL_gic_id[ib_cpu][ib_cluster], 0); + + /* + * Wait for the inbound to come up. This allows for other + * tasks to be scheduled in the mean time. + */ + wait_for_completion(&inbound_alive); + mcpm_set_early_poke(ib_cpu, ib_cluster, 0, 0); + + /* + * From this point we are entering the switch critical zone + * and can't take any interrupts anymore. + */ + local_irq_disable(); + local_fiq_disable(); + trace_cpu_migrate_begin(get_ns(), ob_mpidr); + + /* redirect GIC's SGIs to our counterpart */ + gic_migrate_target(bL_gic_id[ib_cpu][ib_cluster]); + + tdev = tick_get_device(this_cpu); + if (tdev && !cpumask_equal(tdev->evtdev->cpumask, cpumask_of(this_cpu))) + tdev = NULL; + if (tdev) { + tdev_mode = tdev->evtdev->mode; + clockevents_set_mode(tdev->evtdev, CLOCK_EVT_MODE_SHUTDOWN); + } + + ret = cpu_pm_enter(); + + /* we can not tolerate errors at this point */ + if (ret) + panic("%s: cpu_pm_enter() returned %d\n", __func__, ret); + + /* Swap the physical CPUs in the logical map for this logical CPU. */ + cpu_logical_map(this_cpu) = ib_mpidr; + cpu_logical_map(that_cpu) = ob_mpidr; + + /* Let's do the actual CPU switch. */ + ret = cpu_suspend((unsigned long)&handshake_ptr, bL_switchpoint); + if (ret > 0) + panic("%s: cpu_suspend() returned %d\n", __func__, ret); + + /* We are executing on the inbound CPU at this point */ + mpidr = read_mpidr(); + pr_debug("after switch: CPU %d MPIDR %#x\n", this_cpu, mpidr); + BUG_ON(mpidr != ib_mpidr); + + mcpm_cpu_powered_up(); + + ret = cpu_pm_exit(); + + if (tdev) { + clockevents_set_mode(tdev->evtdev, tdev_mode); + clockevents_program_event(tdev->evtdev, + tdev->evtdev->next_event, 1); + } + + trace_cpu_migrate_finish(get_ns(), ib_mpidr); + local_fiq_enable(); + local_irq_enable(); + + *handshake_ptr = 1; + dsb_sev(); + + if (ret) + pr_err("%s exiting with error %d\n", __func__, ret); + return ret; +} + +struct bL_thread { + spinlock_t lock; + struct task_struct *task; + wait_queue_head_t wq; + int wanted_cluster; + struct completion started; + bL_switch_completion_handler completer; + void *completer_cookie; +}; + +static struct bL_thread bL_threads[NR_CPUS]; + +static int bL_switcher_thread(void *arg) +{ + struct bL_thread *t = arg; + struct sched_param param = { .sched_priority = 1 }; + int cluster; + bL_switch_completion_handler completer; + void *completer_cookie; + + sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m); + complete(&t->started); + + do { + if (signal_pending(current)) + flush_signals(current); + wait_event_interruptible(t->wq, + t->wanted_cluster != -1 || + kthread_should_stop()); + + spin_lock(&t->lock); + cluster = t->wanted_cluster; + completer = t->completer; + completer_cookie = t->completer_cookie; + t->wanted_cluster = -1; + t->completer = NULL; + spin_unlock(&t->lock); + + if (cluster != -1) { + bL_switch_to(cluster); + + if (completer) + completer(completer_cookie); + } + } while (!kthread_should_stop()); + + return 0; +} + +static struct task_struct *bL_switcher_thread_create(int cpu, void *arg) +{ + struct task_struct *task; + + task = kthread_create_on_node(bL_switcher_thread, arg, + cpu_to_node(cpu), "kswitcher_%d", cpu); + if (!IS_ERR(task)) { + kthread_bind(task, cpu); + wake_up_process(task); + } else + pr_err("%s failed for CPU %d\n", __func__, cpu); + return task; +} + +/* + * bL_switch_request_cb - Switch to a specific cluster for the given CPU, + * with completion notification via a callback + * + * @cpu: the CPU to switch + * @new_cluster_id: the ID of the cluster to switch to. + * @completer: switch completion callback. if non-NULL, + * @completer(@completer_cookie) will be called on completion of + * the switch, in non-atomic context. + * @completer_cookie: opaque context argument for @completer. + * + * This function causes a cluster switch on the given CPU by waking up + * the appropriate switcher thread. This function may or may not return + * before the switch has occurred. + * + * If a @completer callback function is supplied, it will be called when + * the switch is complete. This can be used to determine asynchronously + * when the switch is complete, regardless of when bL_switch_request() + * returns. When @completer is supplied, no new switch request is permitted + * for the affected CPU until after the switch is complete, and @completer + * has returned. + */ +int bL_switch_request_cb(unsigned int cpu, unsigned int new_cluster_id, + bL_switch_completion_handler completer, + void *completer_cookie) +{ + struct bL_thread *t; + + if (cpu >= ARRAY_SIZE(bL_threads)) { + pr_err("%s: cpu %d out of bounds\n", __func__, cpu); + return -EINVAL; + } + + t = &bL_threads[cpu]; + + if (IS_ERR(t->task)) + return PTR_ERR(t->task); + if (!t->task) + return -ESRCH; + + spin_lock(&t->lock); + if (t->completer) { + spin_unlock(&t->lock); + return -EBUSY; + } + t->completer = completer; + t->completer_cookie = completer_cookie; + t->wanted_cluster = new_cluster_id; + spin_unlock(&t->lock); + wake_up(&t->wq); + return 0; +} +EXPORT_SYMBOL_GPL(bL_switch_request_cb); + +/* + * Activation and configuration code. + */ + +static DEFINE_MUTEX(bL_switcher_activation_lock); +static BLOCKING_NOTIFIER_HEAD(bL_activation_notifier); +static unsigned int bL_switcher_active; +static unsigned int bL_switcher_cpu_original_cluster[NR_CPUS]; +static cpumask_t bL_switcher_removed_logical_cpus; + +int bL_switcher_register_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&bL_activation_notifier, nb); +} +EXPORT_SYMBOL_GPL(bL_switcher_register_notifier); + +int bL_switcher_unregister_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&bL_activation_notifier, nb); +} +EXPORT_SYMBOL_GPL(bL_switcher_unregister_notifier); + +static int bL_activation_notify(unsigned long val) +{ + int ret; + + ret = blocking_notifier_call_chain(&bL_activation_notifier, val, NULL); + if (ret & NOTIFY_STOP_MASK) + pr_err("%s: notifier chain failed with status 0x%x\n", + __func__, ret); + return notifier_to_errno(ret); +} + +static void bL_switcher_restore_cpus(void) +{ + int i; + + for_each_cpu(i, &bL_switcher_removed_logical_cpus) + cpu_up(i); +} + +static int bL_switcher_halve_cpus(void) +{ + int i, j, cluster_0, gic_id, ret; + unsigned int cpu, cluster, mask; + cpumask_t available_cpus; + + /* First pass to validate what we have */ + mask = 0; + for_each_online_cpu(i) { + cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0); + cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1); + if (cluster >= 2) { + pr_err("%s: only dual cluster systems are supported\n", __func__); + return -EINVAL; + } + if (WARN_ON(cpu >= MAX_CPUS_PER_CLUSTER)) + return -EINVAL; + mask |= (1 << cluster); + } + if (mask != 3) { + pr_err("%s: no CPU pairing possible\n", __func__); + return -EINVAL; + } + + /* + * Now let's do the pairing. We match each CPU with another CPU + * from a different cluster. To get a uniform scheduling behavior + * without fiddling with CPU topology and compute capacity data, + * we'll use logical CPUs initially belonging to the same cluster. + */ + memset(bL_switcher_cpu_pairing, -1, sizeof(bL_switcher_cpu_pairing)); + cpumask_copy(&available_cpus, cpu_online_mask); + cluster_0 = -1; + for_each_cpu(i, &available_cpus) { + int match = -1; + cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1); + if (cluster_0 == -1) + cluster_0 = cluster; + if (cluster != cluster_0) + continue; + cpumask_clear_cpu(i, &available_cpus); + for_each_cpu(j, &available_cpus) { + cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(j), 1); + /* + * Let's remember the last match to create "odd" + * pairings on purpose in order for other code not + * to assume any relation between physical and + * logical CPU numbers. + */ + if (cluster != cluster_0) + match = j; + } + if (match != -1) { + bL_switcher_cpu_pairing[i] = match; + cpumask_clear_cpu(match, &available_cpus); + pr_info("CPU%d paired with CPU%d\n", i, match); + } + } + + /* + * Now we disable the unwanted CPUs i.e. everything that has no + * pairing information (that includes the pairing counterparts). + */ + cpumask_clear(&bL_switcher_removed_logical_cpus); + for_each_online_cpu(i) { + cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0); + cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1); + + /* Let's take note of the GIC ID for this CPU */ + gic_id = gic_get_cpu_id(i); + if (gic_id < 0) { + pr_err("%s: bad GIC ID for CPU %d\n", __func__, i); + bL_switcher_restore_cpus(); + return -EINVAL; + } + bL_gic_id[cpu][cluster] = gic_id; + pr_info("GIC ID for CPU %u cluster %u is %u\n", + cpu, cluster, gic_id); + + if (bL_switcher_cpu_pairing[i] != -1) { + bL_switcher_cpu_original_cluster[i] = cluster; + continue; + } + + ret = cpu_down(i); + if (ret) { + bL_switcher_restore_cpus(); + return ret; + } + cpumask_set_cpu(i, &bL_switcher_removed_logical_cpus); + } + + return 0; +} + +/* Determine the logical CPU a given physical CPU is grouped on. */ +int bL_switcher_get_logical_index(u32 mpidr) +{ + int cpu; + + if (!bL_switcher_active) + return -EUNATCH; + + mpidr &= MPIDR_HWID_BITMASK; + for_each_online_cpu(cpu) { + int pairing = bL_switcher_cpu_pairing[cpu]; + if (pairing == -1) + continue; + if ((mpidr == cpu_logical_map(cpu)) || + (mpidr == cpu_logical_map(pairing))) + return cpu; + } + return -EINVAL; +} + +static void bL_switcher_trace_trigger_cpu(void *__always_unused info) +{ + trace_cpu_migrate_current(get_ns(), read_mpidr()); +} + +int bL_switcher_trace_trigger(void) +{ + int ret; + + preempt_disable(); + + bL_switcher_trace_trigger_cpu(NULL); + ret = smp_call_function(bL_switcher_trace_trigger_cpu, NULL, true); + + preempt_enable(); + + return ret; +} +EXPORT_SYMBOL_GPL(bL_switcher_trace_trigger); + +static int bL_switcher_enable(void) +{ + int cpu, ret; + + mutex_lock(&bL_switcher_activation_lock); + lock_device_hotplug(); + if (bL_switcher_active) { + unlock_device_hotplug(); + mutex_unlock(&bL_switcher_activation_lock); + return 0; + } + + pr_info("big.LITTLE switcher initializing\n"); + + ret = bL_activation_notify(BL_NOTIFY_PRE_ENABLE); + if (ret) + goto error; + + ret = bL_switcher_halve_cpus(); + if (ret) + goto error; + + bL_switcher_trace_trigger(); + + for_each_online_cpu(cpu) { + struct bL_thread *t = &bL_threads[cpu]; + spin_lock_init(&t->lock); + init_waitqueue_head(&t->wq); + init_completion(&t->started); + t->wanted_cluster = -1; + t->task = bL_switcher_thread_create(cpu, t); + } + + bL_switcher_active = 1; + bL_activation_notify(BL_NOTIFY_POST_ENABLE); + pr_info("big.LITTLE switcher initialized\n"); + goto out; + +error: + pr_warn("big.LITTLE switcher initialization failed\n"); + bL_activation_notify(BL_NOTIFY_POST_DISABLE); + +out: + unlock_device_hotplug(); + mutex_unlock(&bL_switcher_activation_lock); + return ret; +} + +#ifdef CONFIG_SYSFS + +static void bL_switcher_disable(void) +{ + unsigned int cpu, cluster; + struct bL_thread *t; + struct task_struct *task; + + mutex_lock(&bL_switcher_activation_lock); + lock_device_hotplug(); + + if (!bL_switcher_active) + goto out; + + if (bL_activation_notify(BL_NOTIFY_PRE_DISABLE) != 0) { + bL_activation_notify(BL_NOTIFY_POST_ENABLE); + goto out; + } + + bL_switcher_active = 0; + + /* + * To deactivate the switcher, we must shut down the switcher + * threads to prevent any other requests from being accepted. + * Then, if the final cluster for given logical CPU is not the + * same as the original one, we'll recreate a switcher thread + * just for the purpose of switching the CPU back without any + * possibility for interference from external requests. + */ + for_each_online_cpu(cpu) { + t = &bL_threads[cpu]; + task = t->task; + t->task = NULL; + if (!task || IS_ERR(task)) + continue; + kthread_stop(task); + /* no more switch may happen on this CPU at this point */ + cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1); + if (cluster == bL_switcher_cpu_original_cluster[cpu]) + continue; + init_completion(&t->started); + t->wanted_cluster = bL_switcher_cpu_original_cluster[cpu]; + task = bL_switcher_thread_create(cpu, t); + if (!IS_ERR(task)) { + wait_for_completion(&t->started); + kthread_stop(task); + cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1); + if (cluster == bL_switcher_cpu_original_cluster[cpu]) + continue; + } + /* If execution gets here, we're in trouble. */ + pr_crit("%s: unable to restore original cluster for CPU %d\n", + __func__, cpu); + pr_crit("%s: CPU %d can't be restored\n", + __func__, bL_switcher_cpu_pairing[cpu]); + cpumask_clear_cpu(bL_switcher_cpu_pairing[cpu], + &bL_switcher_removed_logical_cpus); + } + + bL_switcher_restore_cpus(); + bL_switcher_trace_trigger(); + + bL_activation_notify(BL_NOTIFY_POST_DISABLE); + +out: + unlock_device_hotplug(); + mutex_unlock(&bL_switcher_activation_lock); +} + +static ssize_t bL_switcher_active_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", bL_switcher_active); +} + +static ssize_t bL_switcher_active_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + int ret; + + switch (buf[0]) { + case '0': + bL_switcher_disable(); + ret = 0; + break; + case '1': + ret = bL_switcher_enable(); + break; + default: + ret = -EINVAL; + } + + return (ret >= 0) ? count : ret; +} + +static ssize_t bL_switcher_trace_trigger_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + int ret = bL_switcher_trace_trigger(); + + return ret ? ret : count; +} + +static struct kobj_attribute bL_switcher_active_attr = + __ATTR(active, 0644, bL_switcher_active_show, bL_switcher_active_store); + +static struct kobj_attribute bL_switcher_trace_trigger_attr = + __ATTR(trace_trigger, 0200, NULL, bL_switcher_trace_trigger_store); + +static struct attribute *bL_switcher_attrs[] = { + &bL_switcher_active_attr.attr, + &bL_switcher_trace_trigger_attr.attr, + NULL, +}; + +static struct attribute_group bL_switcher_attr_group = { + .attrs = bL_switcher_attrs, +}; + +static struct kobject *bL_switcher_kobj; + +static int __init bL_switcher_sysfs_init(void) +{ + int ret; + + bL_switcher_kobj = kobject_create_and_add("bL_switcher", kernel_kobj); + if (!bL_switcher_kobj) + return -ENOMEM; + ret = sysfs_create_group(bL_switcher_kobj, &bL_switcher_attr_group); + if (ret) + kobject_put(bL_switcher_kobj); + return ret; +} + +#endif /* CONFIG_SYSFS */ + +bool bL_switcher_get_enabled(void) +{ + mutex_lock(&bL_switcher_activation_lock); + + return bL_switcher_active; +} +EXPORT_SYMBOL_GPL(bL_switcher_get_enabled); + +void bL_switcher_put_enabled(void) +{ + mutex_unlock(&bL_switcher_activation_lock); +} +EXPORT_SYMBOL_GPL(bL_switcher_put_enabled); + +/* + * Veto any CPU hotplug operation on those CPUs we've removed + * while the switcher is active. + * We're just not ready to deal with that given the trickery involved. + */ +static int bL_switcher_hotplug_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + if (bL_switcher_active) { + int pairing = bL_switcher_cpu_pairing[(unsigned long)hcpu]; + switch (action & 0xf) { + case CPU_UP_PREPARE: + case CPU_DOWN_PREPARE: + if (pairing == -1) + return NOTIFY_BAD; + } + } + return NOTIFY_DONE; +} + +static bool no_bL_switcher; +core_param(no_bL_switcher, no_bL_switcher, bool, 0644); + +static int __init bL_switcher_init(void) +{ + int ret; + + if (MAX_NR_CLUSTERS != 2) { + pr_err("%s: only dual cluster systems are supported\n", __func__); + return -EINVAL; + } + + cpu_notifier(bL_switcher_hotplug_callback, 0); + + if (!no_bL_switcher) { + ret = bL_switcher_enable(); + if (ret) + return ret; + } + +#ifdef CONFIG_SYSFS + ret = bL_switcher_sysfs_init(); + if (ret) + pr_err("%s: unable to create sysfs entry\n", __func__); +#endif + + return 0; +} + +late_initcall(bL_switcher_init); diff --git a/arch/arm/common/bL_switcher_dummy_if.c b/arch/arm/common/bL_switcher_dummy_if.c new file mode 100644 index 000000000000..3f47f1203c6b --- /dev/null +++ b/arch/arm/common/bL_switcher_dummy_if.c @@ -0,0 +1,71 @@ +/* + * arch/arm/common/bL_switcher_dummy_if.c -- b.L switcher dummy interface + * + * Created by: Nicolas Pitre, November 2012 + * Copyright: (C) 2012-2013 Linaro Limited + * + * Dummy interface to user space for debugging purpose only. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/miscdevice.h> +#include <asm/uaccess.h> +#include <asm/bL_switcher.h> + +static ssize_t bL_switcher_write(struct file *file, const char __user *buf, + size_t len, loff_t *pos) +{ + unsigned char val[3]; + unsigned int cpu, cluster; + int ret; + + pr_debug("%s\n", __func__); + + if (len < 3) + return -EINVAL; + + if (copy_from_user(val, buf, 3)) + return -EFAULT; + + /* format: <cpu#>,<cluster#> */ + if (val[0] < '0' || val[0] > '9' || + val[1] != ',' || + val[2] < '0' || val[2] > '1') + return -EINVAL; + + cpu = val[0] - '0'; + cluster = val[2] - '0'; + ret = bL_switch_request(cpu, cluster); + + return ret ? : len; +} + +static const struct file_operations bL_switcher_fops = { + .write = bL_switcher_write, + .owner = THIS_MODULE, +}; + +static struct miscdevice bL_switcher_device = { + MISC_DYNAMIC_MINOR, + "b.L_switcher", + &bL_switcher_fops +}; + +static int __init bL_switcher_dummy_if_init(void) +{ + return misc_register(&bL_switcher_device); +} + +static void __exit bL_switcher_dummy_if_exit(void) +{ + misc_deregister(&bL_switcher_device); +} + +module_init(bL_switcher_dummy_if_init); +module_exit(bL_switcher_dummy_if_exit); diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c index 117f955a2a06..8e1a0245907f 100644 --- a/arch/arm/common/edma.c +++ b/arch/arm/common/edma.c @@ -269,6 +269,11 @@ static const struct edmacc_param dummy_paramset = { .ccnt = 1, }; +static const struct of_device_id edma_of_ids[] = { + { .compatible = "ti,edma3", }, + {} +}; + /*****************************************************************************/ static void map_dmach_queue(unsigned ctlr, unsigned ch_no, @@ -560,14 +565,38 @@ static int reserve_contiguous_slots(int ctlr, unsigned int id, static int prepare_unused_channel_list(struct device *dev, void *data) { struct platform_device *pdev = to_platform_device(dev); - int i, ctlr; + int i, count, ctlr; + struct of_phandle_args dma_spec; + if (dev->of_node) { + count = of_property_count_strings(dev->of_node, "dma-names"); + if (count < 0) + return 0; + for (i = 0; i < count; i++) { + if (of_parse_phandle_with_args(dev->of_node, "dmas", + "#dma-cells", i, + &dma_spec)) + continue; + + if (!of_match_node(edma_of_ids, dma_spec.np)) { + of_node_put(dma_spec.np); + continue; + } + + clear_bit(EDMA_CHAN_SLOT(dma_spec.args[0]), + edma_cc[0]->edma_unused); + of_node_put(dma_spec.np); + } + return 0; + } + + /* For non-OF case */ for (i = 0; i < pdev->num_resources; i++) { if ((pdev->resource[i].flags & IORESOURCE_DMA) && (int)pdev->resource[i].start >= 0) { ctlr = EDMA_CTLR(pdev->resource[i].start); clear_bit(EDMA_CHAN_SLOT(pdev->resource[i].start), - edma_cc[ctlr]->edma_unused); + edma_cc[ctlr]->edma_unused); } } @@ -1762,11 +1791,6 @@ static int edma_probe(struct platform_device *pdev) return 0; } -static const struct of_device_id edma_of_ids[] = { - { .compatible = "ti,edma3", }, - {} -}; - static struct platform_driver edma_driver = { .driver = { .name = "edma", diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c index 370236dd1a03..26020a03f659 100644 --- a/arch/arm/common/mcpm_entry.c +++ b/arch/arm/common/mcpm_entry.c @@ -27,6 +27,18 @@ void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr) sync_cache_w(&mcpm_entry_vectors[cluster][cpu]); } +extern unsigned long mcpm_entry_early_pokes[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER][2]; + +void mcpm_set_early_poke(unsigned cpu, unsigned cluster, + unsigned long poke_phys_addr, unsigned long poke_val) +{ + unsigned long *poke = &mcpm_entry_early_pokes[cluster][cpu][0]; + poke[0] = poke_phys_addr; + poke[1] = poke_val; + __cpuc_flush_dcache_area((void *)poke, 8); + outer_clean_range(__pa(poke), __pa(poke + 2)); +} + static const struct mcpm_platform_ops *platform_ops; int __init mcpm_platform_register(const struct mcpm_platform_ops *ops) @@ -51,7 +63,8 @@ void mcpm_cpu_power_down(void) { phys_reset_t phys_reset; - BUG_ON(!platform_ops); + if (WARN_ON_ONCE(!platform_ops || !platform_ops->power_down)) + return; BUG_ON(!irqs_disabled()); /* @@ -89,11 +102,27 @@ void mcpm_cpu_power_down(void) BUG(); } +int mcpm_cpu_power_down_finish(unsigned int cpu, unsigned int cluster) +{ + int ret; + + if (WARN_ON_ONCE(!platform_ops || !platform_ops->power_down_finish)) + return -EUNATCH; + + ret = platform_ops->power_down_finish(cpu, cluster); + if (ret) + pr_warn("%s: cpu %u, cluster %u failed to power down (%d)\n", + __func__, cpu, cluster, ret); + + return ret; +} + void mcpm_cpu_suspend(u64 expected_residency) { phys_reset_t phys_reset; - BUG_ON(!platform_ops); + if (WARN_ON_ONCE(!platform_ops || !platform_ops->suspend)) + return; BUG_ON(!irqs_disabled()); /* Very similar to mcpm_cpu_power_down() */ diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S index 39c96df3477a..e02db4b81a66 100644 --- a/arch/arm/common/mcpm_head.S +++ b/arch/arm/common/mcpm_head.S @@ -15,6 +15,7 @@ #include <linux/linkage.h> #include <asm/mcpm.h> +#include <asm/assembler.h> #include "vlock.h" @@ -47,6 +48,7 @@ ENTRY(mcpm_entry_point) + ARM_BE8(setend be) THUMB( adr r12, BSYM(1f) ) THUMB( bx r12 ) THUMB( .thumb ) @@ -71,12 +73,19 @@ ENTRY(mcpm_entry_point) * position independent way. */ adr r5, 3f - ldmia r5, {r6, r7, r8, r11} + ldmia r5, {r0, r6, r7, r8, r11} + add r0, r5, r0 @ r0 = mcpm_entry_early_pokes add r6, r5, r6 @ r6 = mcpm_entry_vectors ldr r7, [r5, r7] @ r7 = mcpm_power_up_setup_phys add r8, r5, r8 @ r8 = mcpm_sync add r11, r5, r11 @ r11 = first_man_locks + @ Perform an early poke, if any + add r0, r0, r4, lsl #3 + ldmia r0, {r0, r1} + teq r0, #0 + strne r1, [r0] + mov r0, #MCPM_SYNC_CLUSTER_SIZE mla r8, r0, r10, r8 @ r8 = sync cluster base @@ -195,7 +204,8 @@ mcpm_entry_gated: .align 2 -3: .word mcpm_entry_vectors - . +3: .word mcpm_entry_early_pokes - . + .word mcpm_entry_vectors - 3b .word mcpm_power_up_setup_phys - 3b .word mcpm_sync - 3b .word first_man_locks - 3b @@ -214,6 +224,10 @@ first_man_locks: ENTRY(mcpm_entry_vectors) .space 4 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER + .type mcpm_entry_early_pokes, #object +ENTRY(mcpm_entry_early_pokes) + .space 8 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER + .type mcpm_power_up_setup_phys, #object ENTRY(mcpm_power_up_setup_phys) .space 4 @ set by mcpm_sync_init() diff --git a/arch/arm/common/mcpm_platsmp.c b/arch/arm/common/mcpm_platsmp.c index 1bc34c7567fd..177251a4dd9a 100644 --- a/arch/arm/common/mcpm_platsmp.c +++ b/arch/arm/common/mcpm_platsmp.c @@ -19,14 +19,23 @@ #include <asm/smp.h> #include <asm/smp_plat.h> +static void cpu_to_pcpu(unsigned int cpu, + unsigned int *pcpu, unsigned int *pcluster) +{ + unsigned int mpidr; + + mpidr = cpu_logical_map(cpu); + *pcpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + *pcluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); +} + static int mcpm_boot_secondary(unsigned int cpu, struct task_struct *idle) { - unsigned int mpidr, pcpu, pcluster, ret; + unsigned int pcpu, pcluster, ret; extern void secondary_startup(void); - mpidr = cpu_logical_map(cpu); - pcpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); - pcluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + cpu_to_pcpu(cpu, &pcpu, &pcluster); + pr_debug("%s: logical CPU %d is physical CPU %d cluster %d\n", __func__, cpu, pcpu, pcluster); @@ -47,6 +56,15 @@ static void mcpm_secondary_init(unsigned int cpu) #ifdef CONFIG_HOTPLUG_CPU +static int mcpm_cpu_kill(unsigned int cpu) +{ + unsigned int pcpu, pcluster; + + cpu_to_pcpu(cpu, &pcpu, &pcluster); + + return !mcpm_cpu_power_down_finish(pcpu, pcluster); +} + static int mcpm_cpu_disable(unsigned int cpu) { /* @@ -73,6 +91,7 @@ static struct smp_operations __initdata mcpm_smp_ops = { .smp_boot_secondary = mcpm_boot_secondary, .smp_secondary_init = mcpm_secondary_init, #ifdef CONFIG_HOTPLUG_CPU + .cpu_kill = mcpm_cpu_kill, .cpu_disable = mcpm_cpu_disable, .cpu_die = mcpm_cpu_die, #endif diff --git a/arch/arm/common/sharpsl_param.c b/arch/arm/common/sharpsl_param.c index d56c932580eb..025f6ce38596 100644 --- a/arch/arm/common/sharpsl_param.c +++ b/arch/arm/common/sharpsl_param.c @@ -15,6 +15,7 @@ #include <linux/module.h> #include <linux/string.h> #include <asm/mach/sharpsl_param.h> +#include <asm/memory.h> /* * Certain hardware parameters determined at the time of device manufacture, @@ -25,8 +26,10 @@ */ #ifdef CONFIG_ARCH_SA1100 #define PARAM_BASE 0xe8ffc000 +#define param_start(x) (void *)(x) #else #define PARAM_BASE 0xa0000a00 +#define param_start(x) __va(x) #endif #define MAGIC_CHG(a,b,c,d) ( ( d << 24 ) | ( c << 16 ) | ( b << 8 ) | a ) @@ -41,7 +44,7 @@ EXPORT_SYMBOL(sharpsl_param); void sharpsl_save_param(void) { - memcpy(&sharpsl_param, (void *)PARAM_BASE, sizeof(struct sharpsl_param_info)); + memcpy(&sharpsl_param, param_start(PARAM_BASE), sizeof(struct sharpsl_param_info)); if (sharpsl_param.comadj_keyword != COMADJ_MAGIC) sharpsl_param.comadj=-1; diff --git a/arch/arm/common/timer-sp.c b/arch/arm/common/timer-sp.c index e901d0f3e0bb..ce922d0ea7aa 100644 --- a/arch/arm/common/timer-sp.c +++ b/arch/arm/common/timer-sp.c @@ -175,7 +175,7 @@ static struct clock_event_device sp804_clockevent = { static struct irqaction sp804_timer_irq = { .name = "timer", - .flags = IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL, + .flags = IRQF_TIMER | IRQF_IRQPOLL, .handler = sp804_timer_interrupt, .dev_id = &sp804_clockevent, }; diff --git a/arch/arm/configs/h3600_defconfig b/arch/arm/configs/h3600_defconfig index 317960f12488..0142ec37e0be 100644 --- a/arch/arm/configs/h3600_defconfig +++ b/arch/arm/configs/h3600_defconfig @@ -1,5 +1,6 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y +CONFIG_NO_HZ_IDLE=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y CONFIG_MODULES=y @@ -11,11 +12,11 @@ CONFIG_ARCH_SA1100=y CONFIG_SA1100_H3600=y CONFIG_PCCARD=y CONFIG_PCMCIA_SA1100=y +CONFIG_PREEMPT=y CONFIG_ZBOOT_ROM_TEXT=0x0 CONFIG_ZBOOT_ROM_BSS=0x0 # CONFIG_CPU_FREQ_STAT is not set CONFIG_FPE_NWFPE=y -CONFIG_PM=y CONFIG_NET=y CONFIG_UNIX=y CONFIG_INET=y @@ -24,13 +25,10 @@ CONFIG_IRDA=m CONFIG_IRLAN=m CONFIG_IRNET=m CONFIG_IRCOMM=m -CONFIG_SA1100_FIR=m # CONFIG_WIRELESS is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y -CONFIG_MTD_PARTITIONS=y CONFIG_MTD_REDBOOT_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_ADV_OPTIONS=y @@ -41,19 +39,15 @@ CONFIG_MTD_SA1100=y CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 -# CONFIG_MISC_DEVICES is not set CONFIG_IDE=y CONFIG_BLK_DEV_IDECS=y CONFIG_NETDEVICES=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set -# CONFIG_WLAN is not set -CONFIG_NET_PCMCIA=y CONFIG_PCMCIA_PCNET=y CONFIG_PPP=m -CONFIG_PPP_ASYNC=m -CONFIG_PPP_DEFLATE=m CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_ASYNC=m +# CONFIG_WLAN is not set # CONFIG_KEYBOARD_ATKBD is not set CONFIG_KEYBOARD_GPIO=y # CONFIG_INPUT_MOUSE is not set @@ -64,8 +58,6 @@ CONFIG_SERIAL_SA1100_CONSOLE=y # CONFIG_HWMON is not set CONFIG_FB=y CONFIG_FB_SA1100=y -# CONFIG_VGA_CONSOLE is not set -# CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y CONFIG_MSDOS_FS=m @@ -74,6 +66,4 @@ CONFIG_JFFS2_FS=y CONFIG_CRAMFS=m CONFIG_NFS_FS=y CONFIG_NFSD=m -CONFIG_SMB_FS=m CONFIG_NLS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 6e572c64cf5a..119fc378fc52 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -36,6 +36,7 @@ CONFIG_ARCH_TEGRA_114_SOC=y CONFIG_TEGRA_PCI=y CONFIG_TEGRA_EMC_SCALING_ENABLE=y CONFIG_ARCH_U8500=y +CONFIG_MACH_HREFV60=y CONFIG_MACH_SNOWBALL=y CONFIG_MACH_UX500_DT=y CONFIG_ARCH_VEXPRESS=y @@ -46,6 +47,7 @@ CONFIG_ARCH_ZYNQ=y CONFIG_SMP=y CONFIG_HIGHPTE=y CONFIG_ARM_APPENDED_DTB=y +CONFIG_ARM_ATAG_DTB_COMPAT=y CONFIG_NET=y CONFIG_UNIX=y CONFIG_INET=y @@ -133,6 +135,7 @@ CONFIG_MMC=y CONFIG_MMC_ARMMMCI=y CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y +CONFIG_MMC_SDHCI_ESDHC_IMX=y CONFIG_MMC_SDHCI_TEGRA=y CONFIG_MMC_SDHCI_SPEAR=y CONFIG_MMC_OMAP=y diff --git a/arch/arm/crypto/.gitignore b/arch/arm/crypto/.gitignore new file mode 100644 index 000000000000..6231d36b3635 --- /dev/null +++ b/arch/arm/crypto/.gitignore @@ -0,0 +1 @@ +aesbs-core.S diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index a2c83851bc90..81cda39860c5 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -3,7 +3,17 @@ # obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o +obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o -aes-arm-y := aes-armv4.o aes_glue.o -sha1-arm-y := sha1-armv4-large.o sha1_glue.o +aes-arm-y := aes-armv4.o aes_glue.o +aes-arm-bs-y := aesbs-core.o aesbs-glue.o +sha1-arm-y := sha1-armv4-large.o sha1_glue.o + +quiet_cmd_perl = PERL $@ + cmd_perl = $(PERL) $(<) > $(@) + +$(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl + $(call cmd,perl) + +.PRECIOUS: $(obj)/aesbs-core.S diff --git a/arch/arm/crypto/aes-armv4.S b/arch/arm/crypto/aes-armv4.S index 19d6cd6f29f9..3a14ea8fe97e 100644 --- a/arch/arm/crypto/aes-armv4.S +++ b/arch/arm/crypto/aes-armv4.S @@ -148,7 +148,7 @@ AES_Te: @ const AES_KEY *key) { .align 5 ENTRY(AES_encrypt) - sub r3,pc,#8 @ AES_encrypt + adr r3,AES_encrypt stmdb sp!,{r1,r4-r12,lr} mov r12,r0 @ inp mov r11,r2 @@ -381,7 +381,7 @@ _armv4_AES_encrypt: .align 5 ENTRY(private_AES_set_encrypt_key) _armv4_AES_set_encrypt_key: - sub r3,pc,#8 @ AES_set_encrypt_key + adr r3,_armv4_AES_set_encrypt_key teq r0,#0 moveq r0,#-1 beq .Labrt @@ -843,7 +843,7 @@ AES_Td: @ const AES_KEY *key) { .align 5 ENTRY(AES_decrypt) - sub r3,pc,#8 @ AES_decrypt + adr r3,AES_decrypt stmdb sp!,{r1,r4-r12,lr} mov r12,r0 @ inp mov r11,r2 diff --git a/arch/arm/crypto/aes_glue.c b/arch/arm/crypto/aes_glue.c index 59f7877ead6a..3003fa1f6fb4 100644 --- a/arch/arm/crypto/aes_glue.c +++ b/arch/arm/crypto/aes_glue.c @@ -6,22 +6,12 @@ #include <linux/crypto.h> #include <crypto/aes.h> -#define AES_MAXNR 14 +#include "aes_glue.h" -typedef struct { - unsigned int rd_key[4 *(AES_MAXNR + 1)]; - int rounds; -} AES_KEY; - -struct AES_CTX { - AES_KEY enc_key; - AES_KEY dec_key; -}; - -asmlinkage void AES_encrypt(const u8 *in, u8 *out, AES_KEY *ctx); -asmlinkage void AES_decrypt(const u8 *in, u8 *out, AES_KEY *ctx); -asmlinkage int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key); -asmlinkage int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key); +EXPORT_SYMBOL(AES_encrypt); +EXPORT_SYMBOL(AES_decrypt); +EXPORT_SYMBOL(private_AES_set_encrypt_key); +EXPORT_SYMBOL(private_AES_set_decrypt_key); static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { @@ -81,7 +71,7 @@ static struct crypto_alg aes_alg = { .cipher = { .cia_min_keysize = AES_MIN_KEY_SIZE, .cia_max_keysize = AES_MAX_KEY_SIZE, - .cia_setkey = aes_set_key, + .cia_setkey = aes_set_key, .cia_encrypt = aes_encrypt, .cia_decrypt = aes_decrypt } diff --git a/arch/arm/crypto/aes_glue.h b/arch/arm/crypto/aes_glue.h new file mode 100644 index 000000000000..cca3e51eb606 --- /dev/null +++ b/arch/arm/crypto/aes_glue.h @@ -0,0 +1,19 @@ + +#define AES_MAXNR 14 + +struct AES_KEY { + unsigned int rd_key[4 * (AES_MAXNR + 1)]; + int rounds; +}; + +struct AES_CTX { + struct AES_KEY enc_key; + struct AES_KEY dec_key; +}; + +asmlinkage void AES_encrypt(const u8 *in, u8 *out, struct AES_KEY *ctx); +asmlinkage void AES_decrypt(const u8 *in, u8 *out, struct AES_KEY *ctx); +asmlinkage int private_AES_set_decrypt_key(const unsigned char *userKey, + const int bits, struct AES_KEY *key); +asmlinkage int private_AES_set_encrypt_key(const unsigned char *userKey, + const int bits, struct AES_KEY *key); diff --git a/arch/arm/crypto/aesbs-core.S_shipped b/arch/arm/crypto/aesbs-core.S_shipped new file mode 100644 index 000000000000..64205d453260 --- /dev/null +++ b/arch/arm/crypto/aesbs-core.S_shipped @@ -0,0 +1,2544 @@ + +@ ==================================================================== +@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ +@ Specific modes and adaptation for Linux kernel by Ard Biesheuvel +@ <ard.biesheuvel@linaro.org>. Permission to use under GPL terms is +@ granted. +@ ==================================================================== + +@ Bit-sliced AES for ARM NEON +@ +@ February 2012. +@ +@ This implementation is direct adaptation of bsaes-x86_64 module for +@ ARM NEON. Except that this module is endian-neutral [in sense that +@ it can be compiled for either endianness] by courtesy of vld1.8's +@ neutrality. Initial version doesn't implement interface to OpenSSL, +@ only low-level primitives and unsupported entry points, just enough +@ to collect performance results, which for Cortex-A8 core are: +@ +@ encrypt 19.5 cycles per byte processed with 128-bit key +@ decrypt 22.1 cycles per byte processed with 128-bit key +@ key conv. 440 cycles per 128-bit key/0.18 of 8x block +@ +@ Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7, +@ which is [much] worse than anticipated (for further details see +@ http://www.openssl.org/~appro/Snapdragon-S4.html). +@ +@ Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code +@ manages in 20.0 cycles]. +@ +@ When comparing to x86_64 results keep in mind that NEON unit is +@ [mostly] single-issue and thus can't [fully] benefit from +@ instruction-level parallelism. And when comparing to aes-armv4 +@ results keep in mind key schedule conversion overhead (see +@ bsaes-x86_64.pl for further details)... +@ +@ <appro@openssl.org> + +@ April-August 2013 +@ +@ Add CBC, CTR and XTS subroutines, adapt for kernel use. +@ +@ <ard.biesheuvel@linaro.org> + +#ifndef __KERNEL__ +# include "arm_arch.h" + +# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} +# define VFP_ABI_POP vldmia sp!,{d8-d15} +# define VFP_ABI_FRAME 0x40 +#else +# define VFP_ABI_PUSH +# define VFP_ABI_POP +# define VFP_ABI_FRAME 0 +# define BSAES_ASM_EXTENDED_KEY +# define XTS_CHAIN_TWEAK +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +#endif + +#ifdef __thumb__ +# define adrl adr +#endif + +#if __ARM_ARCH__>=7 +.text +.syntax unified @ ARMv7-capable assembler is expected to handle this +#ifdef __thumb2__ +.thumb +#else +.code 32 +#endif + +.fpu neon + +.type _bsaes_decrypt8,%function +.align 4 +_bsaes_decrypt8: + adr r6,_bsaes_decrypt8 + vldmia r4!, {q9} @ round 0 key + add r6,r6,#.LM0ISR-_bsaes_decrypt8 + + vldmia r6!, {q8} @ .LM0ISR + veor q10, q0, q9 @ xor with round0 key + veor q11, q1, q9 + vtbl.8 d0, {q10}, d16 + vtbl.8 d1, {q10}, d17 + veor q12, q2, q9 + vtbl.8 d2, {q11}, d16 + vtbl.8 d3, {q11}, d17 + veor q13, q3, q9 + vtbl.8 d4, {q12}, d16 + vtbl.8 d5, {q12}, d17 + veor q14, q4, q9 + vtbl.8 d6, {q13}, d16 + vtbl.8 d7, {q13}, d17 + veor q15, q5, q9 + vtbl.8 d8, {q14}, d16 + vtbl.8 d9, {q14}, d17 + veor q10, q6, q9 + vtbl.8 d10, {q15}, d16 + vtbl.8 d11, {q15}, d17 + veor q11, q7, q9 + vtbl.8 d12, {q10}, d16 + vtbl.8 d13, {q10}, d17 + vtbl.8 d14, {q11}, d16 + vtbl.8 d15, {q11}, d17 + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q6, #1 + vshr.u64 q11, q4, #1 + veor q10, q10, q7 + veor q11, q11, q5 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #1 + veor q5, q5, q11 + vshl.u64 q11, q11, #1 + veor q6, q6, q10 + veor q4, q4, q11 + vshr.u64 q10, q2, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q3 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q3, q3, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q2, q2, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q5, #2 + vshr.u64 q11, q4, #2 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q9 + vand q11, q11, q9 + veor q7, q7, q10 + vshl.u64 q10, q10, #2 + veor q6, q6, q11 + vshl.u64 q11, q11, #2 + veor q5, q5, q10 + veor q4, q4, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q3 + veor q11, q11, q2 + vand q10, q10, q9 + vand q11, q11, q9 + veor q3, q3, q10 + vshl.u64 q10, q10, #2 + veor q2, q2, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q3, #4 + vshr.u64 q11, q2, #4 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q6, q6, q11 + vshl.u64 q11, q11, #4 + veor q3, q3, q10 + veor q2, q2, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q5 + veor q11, q11, q4 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q4, q4, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + sub r5,r5,#1 + b .Ldec_sbox +.align 4 +.Ldec_loop: + vldmia r4!, {q8-q11} + veor q8, q8, q0 + veor q9, q9, q1 + vtbl.8 d0, {q8}, d24 + vtbl.8 d1, {q8}, d25 + vldmia r4!, {q8} + veor q10, q10, q2 + vtbl.8 d2, {q9}, d24 + vtbl.8 d3, {q9}, d25 + vldmia r4!, {q9} + veor q11, q11, q3 + vtbl.8 d4, {q10}, d24 + vtbl.8 d5, {q10}, d25 + vldmia r4!, {q10} + vtbl.8 d6, {q11}, d24 + vtbl.8 d7, {q11}, d25 + vldmia r4!, {q11} + veor q8, q8, q4 + veor q9, q9, q5 + vtbl.8 d8, {q8}, d24 + vtbl.8 d9, {q8}, d25 + veor q10, q10, q6 + vtbl.8 d10, {q9}, d24 + vtbl.8 d11, {q9}, d25 + veor q11, q11, q7 + vtbl.8 d12, {q10}, d24 + vtbl.8 d13, {q10}, d25 + vtbl.8 d14, {q11}, d24 + vtbl.8 d15, {q11}, d25 +.Ldec_sbox: + veor q1, q1, q4 + veor q3, q3, q4 + + veor q4, q4, q7 + veor q1, q1, q6 + veor q2, q2, q7 + veor q6, q6, q4 + + veor q0, q0, q1 + veor q2, q2, q5 + veor q7, q7, q6 + veor q3, q3, q0 + veor q5, q5, q0 + veor q1, q1, q3 + veor q11, q3, q0 + veor q10, q7, q4 + veor q9, q1, q6 + veor q13, q4, q0 + vmov q8, q10 + veor q12, q5, q2 + + vorr q10, q10, q9 + veor q15, q11, q8 + vand q14, q11, q12 + vorr q11, q11, q12 + veor q12, q12, q9 + vand q8, q8, q9 + veor q9, q6, q2 + vand q15, q15, q12 + vand q13, q13, q9 + veor q9, q3, q7 + veor q12, q1, q5 + veor q11, q11, q13 + veor q10, q10, q13 + vand q13, q9, q12 + vorr q9, q9, q12 + veor q11, q11, q15 + veor q8, q8, q13 + veor q10, q10, q14 + veor q9, q9, q15 + veor q8, q8, q14 + vand q12, q4, q6 + veor q9, q9, q14 + vand q13, q0, q2 + vand q14, q7, q1 + vorr q15, q3, q5 + veor q11, q11, q12 + veor q9, q9, q14 + veor q8, q8, q15 + veor q10, q10, q13 + + @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3 + + @ new smaller inversion + + vand q14, q11, q9 + vmov q12, q8 + + veor q13, q10, q14 + veor q15, q8, q14 + veor q14, q8, q14 @ q14=q15 + + vbsl q13, q9, q8 + vbsl q15, q11, q10 + veor q11, q11, q10 + + vbsl q12, q13, q14 + vbsl q8, q14, q13 + + vand q14, q12, q15 + veor q9, q9, q8 + + veor q14, q14, q11 + veor q12, q5, q2 + veor q8, q1, q6 + veor q10, q15, q14 + vand q10, q10, q5 + veor q5, q5, q1 + vand q11, q1, q15 + vand q5, q5, q14 + veor q1, q11, q10 + veor q5, q5, q11 + veor q15, q15, q13 + veor q14, q14, q9 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q2 + veor q12, q12, q8 + veor q2, q2, q6 + vand q8, q8, q15 + vand q6, q6, q13 + vand q12, q12, q14 + vand q2, q2, q9 + veor q8, q8, q12 + veor q2, q2, q6 + veor q12, q12, q11 + veor q6, q6, q10 + veor q5, q5, q12 + veor q2, q2, q12 + veor q1, q1, q8 + veor q6, q6, q8 + + veor q12, q3, q0 + veor q8, q7, q4 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q0 + veor q12, q12, q8 + veor q0, q0, q4 + vand q8, q8, q15 + vand q4, q4, q13 + vand q12, q12, q14 + vand q0, q0, q9 + veor q8, q8, q12 + veor q0, q0, q4 + veor q12, q12, q11 + veor q4, q4, q10 + veor q15, q15, q13 + veor q14, q14, q9 + veor q10, q15, q14 + vand q10, q10, q3 + veor q3, q3, q7 + vand q11, q7, q15 + vand q3, q3, q14 + veor q7, q11, q10 + veor q3, q3, q11 + veor q3, q3, q12 + veor q0, q0, q12 + veor q7, q7, q8 + veor q4, q4, q8 + veor q1, q1, q7 + veor q6, q6, q5 + + veor q4, q4, q1 + veor q2, q2, q7 + veor q5, q5, q7 + veor q4, q4, q2 + veor q7, q7, q0 + veor q4, q4, q5 + veor q3, q3, q6 + veor q6, q6, q1 + veor q3, q3, q4 + + veor q4, q4, q0 + veor q7, q7, q3 + subs r5,r5,#1 + bcc .Ldec_done + @ multiplication by 0x05-0x00-0x04-0x00 + vext.8 q8, q0, q0, #8 + vext.8 q14, q3, q3, #8 + vext.8 q15, q5, q5, #8 + veor q8, q8, q0 + vext.8 q9, q1, q1, #8 + veor q14, q14, q3 + vext.8 q10, q6, q6, #8 + veor q15, q15, q5 + vext.8 q11, q4, q4, #8 + veor q9, q9, q1 + vext.8 q12, q2, q2, #8 + veor q10, q10, q6 + vext.8 q13, q7, q7, #8 + veor q11, q11, q4 + veor q12, q12, q2 + veor q13, q13, q7 + + veor q0, q0, q14 + veor q1, q1, q14 + veor q6, q6, q8 + veor q2, q2, q10 + veor q4, q4, q9 + veor q1, q1, q15 + veor q6, q6, q15 + veor q2, q2, q14 + veor q7, q7, q11 + veor q4, q4, q14 + veor q3, q3, q12 + veor q2, q2, q15 + veor q7, q7, q15 + veor q5, q5, q13 + vext.8 q8, q0, q0, #12 @ x0 <<< 32 + vext.8 q9, q1, q1, #12 + veor q0, q0, q8 @ x0 ^ (x0 <<< 32) + vext.8 q10, q6, q6, #12 + veor q1, q1, q9 + vext.8 q11, q4, q4, #12 + veor q6, q6, q10 + vext.8 q12, q2, q2, #12 + veor q4, q4, q11 + vext.8 q13, q7, q7, #12 + veor q2, q2, q12 + vext.8 q14, q3, q3, #12 + veor q7, q7, q13 + vext.8 q15, q5, q5, #12 + veor q3, q3, q14 + + veor q9, q9, q0 + veor q5, q5, q15 + vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64) + veor q10, q10, q1 + veor q8, q8, q5 + veor q9, q9, q5 + vext.8 q1, q1, q1, #8 + veor q13, q13, q2 + veor q0, q0, q8 + veor q14, q14, q7 + veor q1, q1, q9 + vext.8 q8, q2, q2, #8 + veor q12, q12, q4 + vext.8 q9, q7, q7, #8 + veor q15, q15, q3 + vext.8 q2, q4, q4, #8 + veor q11, q11, q6 + vext.8 q7, q5, q5, #8 + veor q12, q12, q5 + vext.8 q4, q3, q3, #8 + veor q11, q11, q5 + vext.8 q3, q6, q6, #8 + veor q5, q9, q13 + veor q11, q11, q2 + veor q7, q7, q15 + veor q6, q4, q14 + veor q4, q8, q12 + veor q2, q3, q10 + vmov q3, q11 + @ vmov q5, q9 + vldmia r6, {q12} @ .LISR + ite eq @ Thumb2 thing, sanity check in ARM + addeq r6,r6,#0x10 + bne .Ldec_loop + vldmia r6, {q12} @ .LISRM0 + b .Ldec_loop +.align 4 +.Ldec_done: + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q3, #1 + vshr.u64 q11, q2, #1 + veor q10, q10, q5 + veor q11, q11, q7 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #1 + veor q7, q7, q11 + vshl.u64 q11, q11, #1 + veor q3, q3, q10 + veor q2, q2, q11 + vshr.u64 q10, q6, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q4 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q4, q4, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q6, q6, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q7, #2 + vshr.u64 q11, q2, #2 + veor q10, q10, q5 + veor q11, q11, q3 + vand q10, q10, q9 + vand q11, q11, q9 + veor q5, q5, q10 + vshl.u64 q10, q10, #2 + veor q3, q3, q11 + vshl.u64 q11, q11, #2 + veor q7, q7, q10 + veor q2, q2, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q4 + veor q11, q11, q6 + vand q10, q10, q9 + vand q11, q11, q9 + veor q4, q4, q10 + vshl.u64 q10, q10, #2 + veor q6, q6, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q4, #4 + vshr.u64 q11, q6, #4 + veor q10, q10, q5 + veor q11, q11, q3 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q3, q3, q11 + vshl.u64 q11, q11, #4 + veor q4, q4, q10 + veor q6, q6, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q7 + veor q11, q11, q2 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q2, q2, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + vldmia r4, {q8} @ last round key + veor q6, q6, q8 + veor q4, q4, q8 + veor q2, q2, q8 + veor q7, q7, q8 + veor q3, q3, q8 + veor q5, q5, q8 + veor q0, q0, q8 + veor q1, q1, q8 + bx lr +.size _bsaes_decrypt8,.-_bsaes_decrypt8 + +.type _bsaes_const,%object +.align 6 +_bsaes_const: +.LM0ISR: @ InvShiftRows constants + .quad 0x0a0e0206070b0f03, 0x0004080c0d010509 +.LISR: + .quad 0x0504070602010003, 0x0f0e0d0c080b0a09 +.LISRM0: + .quad 0x01040b0e0205080f, 0x0306090c00070a0d +.LM0SR: @ ShiftRows constants + .quad 0x0a0e02060f03070b, 0x0004080c05090d01 +.LSR: + .quad 0x0504070600030201, 0x0f0e0d0c0a09080b +.LSRM0: + .quad 0x0304090e00050a0f, 0x01060b0c0207080d +.LM0: + .quad 0x02060a0e03070b0f, 0x0004080c0105090d +.LREVM0SR: + .quad 0x090d01050c000408, 0x03070b0f060a0e02 +.asciz "Bit-sliced AES for NEON, CRYPTOGAMS by <appro@openssl.org>" +.align 6 +.size _bsaes_const,.-_bsaes_const + +.type _bsaes_encrypt8,%function +.align 4 +_bsaes_encrypt8: + adr r6,_bsaes_encrypt8 + vldmia r4!, {q9} @ round 0 key + sub r6,r6,#_bsaes_encrypt8-.LM0SR + + vldmia r6!, {q8} @ .LM0SR +_bsaes_encrypt8_alt: + veor q10, q0, q9 @ xor with round0 key + veor q11, q1, q9 + vtbl.8 d0, {q10}, d16 + vtbl.8 d1, {q10}, d17 + veor q12, q2, q9 + vtbl.8 d2, {q11}, d16 + vtbl.8 d3, {q11}, d17 + veor q13, q3, q9 + vtbl.8 d4, {q12}, d16 + vtbl.8 d5, {q12}, d17 + veor q14, q4, q9 + vtbl.8 d6, {q13}, d16 + vtbl.8 d7, {q13}, d17 + veor q15, q5, q9 + vtbl.8 d8, {q14}, d16 + vtbl.8 d9, {q14}, d17 + veor q10, q6, q9 + vtbl.8 d10, {q15}, d16 + vtbl.8 d11, {q15}, d17 + veor q11, q7, q9 + vtbl.8 d12, {q10}, d16 + vtbl.8 d13, {q10}, d17 + vtbl.8 d14, {q11}, d16 + vtbl.8 d15, {q11}, d17 +_bsaes_encrypt8_bitslice: + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q6, #1 + vshr.u64 q11, q4, #1 + veor q10, q10, q7 + veor q11, q11, q5 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #1 + veor q5, q5, q11 + vshl.u64 q11, q11, #1 + veor q6, q6, q10 + veor q4, q4, q11 + vshr.u64 q10, q2, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q3 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q3, q3, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q2, q2, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q5, #2 + vshr.u64 q11, q4, #2 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q9 + vand q11, q11, q9 + veor q7, q7, q10 + vshl.u64 q10, q10, #2 + veor q6, q6, q11 + vshl.u64 q11, q11, #2 + veor q5, q5, q10 + veor q4, q4, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q3 + veor q11, q11, q2 + vand q10, q10, q9 + vand q11, q11, q9 + veor q3, q3, q10 + vshl.u64 q10, q10, #2 + veor q2, q2, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q3, #4 + vshr.u64 q11, q2, #4 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q6, q6, q11 + vshl.u64 q11, q11, #4 + veor q3, q3, q10 + veor q2, q2, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q5 + veor q11, q11, q4 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q4, q4, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + sub r5,r5,#1 + b .Lenc_sbox +.align 4 +.Lenc_loop: + vldmia r4!, {q8-q11} + veor q8, q8, q0 + veor q9, q9, q1 + vtbl.8 d0, {q8}, d24 + vtbl.8 d1, {q8}, d25 + vldmia r4!, {q8} + veor q10, q10, q2 + vtbl.8 d2, {q9}, d24 + vtbl.8 d3, {q9}, d25 + vldmia r4!, {q9} + veor q11, q11, q3 + vtbl.8 d4, {q10}, d24 + vtbl.8 d5, {q10}, d25 + vldmia r4!, {q10} + vtbl.8 d6, {q11}, d24 + vtbl.8 d7, {q11}, d25 + vldmia r4!, {q11} + veor q8, q8, q4 + veor q9, q9, q5 + vtbl.8 d8, {q8}, d24 + vtbl.8 d9, {q8}, d25 + veor q10, q10, q6 + vtbl.8 d10, {q9}, d24 + vtbl.8 d11, {q9}, d25 + veor q11, q11, q7 + vtbl.8 d12, {q10}, d24 + vtbl.8 d13, {q10}, d25 + vtbl.8 d14, {q11}, d24 + vtbl.8 d15, {q11}, d25 +.Lenc_sbox: + veor q2, q2, q1 + veor q5, q5, q6 + veor q3, q3, q0 + veor q6, q6, q2 + veor q5, q5, q0 + + veor q6, q6, q3 + veor q3, q3, q7 + veor q7, q7, q5 + veor q3, q3, q4 + veor q4, q4, q5 + + veor q2, q2, q7 + veor q3, q3, q1 + veor q1, q1, q5 + veor q11, q7, q4 + veor q10, q1, q2 + veor q9, q5, q3 + veor q13, q2, q4 + vmov q8, q10 + veor q12, q6, q0 + + vorr q10, q10, q9 + veor q15, q11, q8 + vand q14, q11, q12 + vorr q11, q11, q12 + veor q12, q12, q9 + vand q8, q8, q9 + veor q9, q3, q0 + vand q15, q15, q12 + vand q13, q13, q9 + veor q9, q7, q1 + veor q12, q5, q6 + veor q11, q11, q13 + veor q10, q10, q13 + vand q13, q9, q12 + vorr q9, q9, q12 + veor q11, q11, q15 + veor q8, q8, q13 + veor q10, q10, q14 + veor q9, q9, q15 + veor q8, q8, q14 + vand q12, q2, q3 + veor q9, q9, q14 + vand q13, q4, q0 + vand q14, q1, q5 + vorr q15, q7, q6 + veor q11, q11, q12 + veor q9, q9, q14 + veor q8, q8, q15 + veor q10, q10, q13 + + @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3 + + @ new smaller inversion + + vand q14, q11, q9 + vmov q12, q8 + + veor q13, q10, q14 + veor q15, q8, q14 + veor q14, q8, q14 @ q14=q15 + + vbsl q13, q9, q8 + vbsl q15, q11, q10 + veor q11, q11, q10 + + vbsl q12, q13, q14 + vbsl q8, q14, q13 + + vand q14, q12, q15 + veor q9, q9, q8 + + veor q14, q14, q11 + veor q12, q6, q0 + veor q8, q5, q3 + veor q10, q15, q14 + vand q10, q10, q6 + veor q6, q6, q5 + vand q11, q5, q15 + vand q6, q6, q14 + veor q5, q11, q10 + veor q6, q6, q11 + veor q15, q15, q13 + veor q14, q14, q9 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q0 + veor q12, q12, q8 + veor q0, q0, q3 + vand q8, q8, q15 + vand q3, q3, q13 + vand q12, q12, q14 + vand q0, q0, q9 + veor q8, q8, q12 + veor q0, q0, q3 + veor q12, q12, q11 + veor q3, q3, q10 + veor q6, q6, q12 + veor q0, q0, q12 + veor q5, q5, q8 + veor q3, q3, q8 + + veor q12, q7, q4 + veor q8, q1, q2 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q4 + veor q12, q12, q8 + veor q4, q4, q2 + vand q8, q8, q15 + vand q2, q2, q13 + vand q12, q12, q14 + vand q4, q4, q9 + veor q8, q8, q12 + veor q4, q4, q2 + veor q12, q12, q11 + veor q2, q2, q10 + veor q15, q15, q13 + veor q14, q14, q9 + veor q10, q15, q14 + vand q10, q10, q7 + veor q7, q7, q1 + vand q11, q1, q15 + vand q7, q7, q14 + veor q1, q11, q10 + veor q7, q7, q11 + veor q7, q7, q12 + veor q4, q4, q12 + veor q1, q1, q8 + veor q2, q2, q8 + veor q7, q7, q0 + veor q1, q1, q6 + veor q6, q6, q0 + veor q4, q4, q7 + veor q0, q0, q1 + + veor q1, q1, q5 + veor q5, q5, q2 + veor q2, q2, q3 + veor q3, q3, q5 + veor q4, q4, q5 + + veor q6, q6, q3 + subs r5,r5,#1 + bcc .Lenc_done + vext.8 q8, q0, q0, #12 @ x0 <<< 32 + vext.8 q9, q1, q1, #12 + veor q0, q0, q8 @ x0 ^ (x0 <<< 32) + vext.8 q10, q4, q4, #12 + veor q1, q1, q9 + vext.8 q11, q6, q6, #12 + veor q4, q4, q10 + vext.8 q12, q3, q3, #12 + veor q6, q6, q11 + vext.8 q13, q7, q7, #12 + veor q3, q3, q12 + vext.8 q14, q2, q2, #12 + veor q7, q7, q13 + vext.8 q15, q5, q5, #12 + veor q2, q2, q14 + + veor q9, q9, q0 + veor q5, q5, q15 + vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64) + veor q10, q10, q1 + veor q8, q8, q5 + veor q9, q9, q5 + vext.8 q1, q1, q1, #8 + veor q13, q13, q3 + veor q0, q0, q8 + veor q14, q14, q7 + veor q1, q1, q9 + vext.8 q8, q3, q3, #8 + veor q12, q12, q6 + vext.8 q9, q7, q7, #8 + veor q15, q15, q2 + vext.8 q3, q6, q6, #8 + veor q11, q11, q4 + vext.8 q7, q5, q5, #8 + veor q12, q12, q5 + vext.8 q6, q2, q2, #8 + veor q11, q11, q5 + vext.8 q2, q4, q4, #8 + veor q5, q9, q13 + veor q4, q8, q12 + veor q3, q3, q11 + veor q7, q7, q15 + veor q6, q6, q14 + @ vmov q4, q8 + veor q2, q2, q10 + @ vmov q5, q9 + vldmia r6, {q12} @ .LSR + ite eq @ Thumb2 thing, samity check in ARM + addeq r6,r6,#0x10 + bne .Lenc_loop + vldmia r6, {q12} @ .LSRM0 + b .Lenc_loop +.align 4 +.Lenc_done: + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q2, #1 + vshr.u64 q11, q3, #1 + veor q10, q10, q5 + veor q11, q11, q7 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #1 + veor q7, q7, q11 + vshl.u64 q11, q11, #1 + veor q2, q2, q10 + veor q3, q3, q11 + vshr.u64 q10, q4, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q6 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q6, q6, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q4, q4, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q7, #2 + vshr.u64 q11, q3, #2 + veor q10, q10, q5 + veor q11, q11, q2 + vand q10, q10, q9 + vand q11, q11, q9 + veor q5, q5, q10 + vshl.u64 q10, q10, #2 + veor q2, q2, q11 + vshl.u64 q11, q11, #2 + veor q7, q7, q10 + veor q3, q3, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q6 + veor q11, q11, q4 + vand q10, q10, q9 + vand q11, q11, q9 + veor q6, q6, q10 + vshl.u64 q10, q10, #2 + veor q4, q4, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q6, #4 + vshr.u64 q11, q4, #4 + veor q10, q10, q5 + veor q11, q11, q2 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q2, q2, q11 + vshl.u64 q11, q11, #4 + veor q6, q6, q10 + veor q4, q4, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q7 + veor q11, q11, q3 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q3, q3, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + vldmia r4, {q8} @ last round key + veor q4, q4, q8 + veor q6, q6, q8 + veor q3, q3, q8 + veor q7, q7, q8 + veor q2, q2, q8 + veor q5, q5, q8 + veor q0, q0, q8 + veor q1, q1, q8 + bx lr +.size _bsaes_encrypt8,.-_bsaes_encrypt8 +.type _bsaes_key_convert,%function +.align 4 +_bsaes_key_convert: + adr r6,_bsaes_key_convert + vld1.8 {q7}, [r4]! @ load round 0 key + sub r6,r6,#_bsaes_key_convert-.LM0 + vld1.8 {q15}, [r4]! @ load round 1 key + + vmov.i8 q8, #0x01 @ bit masks + vmov.i8 q9, #0x02 + vmov.i8 q10, #0x04 + vmov.i8 q11, #0x08 + vmov.i8 q12, #0x10 + vmov.i8 q13, #0x20 + vldmia r6, {q14} @ .LM0 + +#ifdef __ARMEL__ + vrev32.8 q7, q7 + vrev32.8 q15, q15 +#endif + sub r5,r5,#1 + vstmia r12!, {q7} @ save round 0 key + b .Lkey_loop + +.align 4 +.Lkey_loop: + vtbl.8 d14,{q15},d28 + vtbl.8 d15,{q15},d29 + vmov.i8 q6, #0x40 + vmov.i8 q15, #0x80 + + vtst.8 q0, q7, q8 + vtst.8 q1, q7, q9 + vtst.8 q2, q7, q10 + vtst.8 q3, q7, q11 + vtst.8 q4, q7, q12 + vtst.8 q5, q7, q13 + vtst.8 q6, q7, q6 + vtst.8 q7, q7, q15 + vld1.8 {q15}, [r4]! @ load next round key + vmvn q0, q0 @ "pnot" + vmvn q1, q1 + vmvn q5, q5 + vmvn q6, q6 +#ifdef __ARMEL__ + vrev32.8 q15, q15 +#endif + subs r5,r5,#1 + vstmia r12!,{q0-q7} @ write bit-sliced round key + bne .Lkey_loop + + vmov.i8 q7,#0x63 @ compose .L63 + @ don't save last round key + bx lr +.size _bsaes_key_convert,.-_bsaes_key_convert +.extern AES_cbc_encrypt +.extern AES_decrypt + +.global bsaes_cbc_encrypt +.type bsaes_cbc_encrypt,%function +.align 5 +bsaes_cbc_encrypt: +#ifndef __KERNEL__ + cmp r2, #128 +#ifndef __thumb__ + blo AES_cbc_encrypt +#else + bhs 1f + b AES_cbc_encrypt +1: +#endif +#endif + + @ it is up to the caller to make sure we are called with enc == 0 + + mov ip, sp + stmdb sp!, {r4-r10, lr} + VFP_ABI_PUSH + ldr r8, [ip] @ IV is 1st arg on the stack + mov r2, r2, lsr#4 @ len in 16 byte blocks + sub sp, #0x10 @ scratch space to carry over the IV + mov r9, sp @ save sp + + ldr r10, [r3, #240] @ get # of rounds +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key + add r12, #96 @ sifze of bit-slices key schedule + + @ populate the key schedule + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + mov sp, r12 @ sp is sp + bl _bsaes_key_convert + vldmia sp, {q6} + vstmia r12, {q15} @ save last round key + veor q7, q7, q6 @ fix up round 0 key + vstmia sp, {q7} +#else + ldr r12, [r3, #244] + eors r12, #1 + beq 0f + + @ populate the key schedule + str r12, [r3, #244] + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + add r12, r3, #248 @ pass key schedule + bl _bsaes_key_convert + add r4, r3, #248 + vldmia r4, {q6} + vstmia r12, {q15} @ save last round key + veor q7, q7, q6 @ fix up round 0 key + vstmia r4, {q7} + +.align 2 +0: +#endif + + vld1.8 {q15}, [r8] @ load IV + b .Lcbc_dec_loop + +.align 4 +.Lcbc_dec_loop: + subs r2, r2, #0x8 + bmi .Lcbc_dec_loop_finish + + vld1.8 {q0-q1}, [r0]! @ load input + vld1.8 {q2-q3}, [r0]! +#ifndef BSAES_ASM_EXTENDED_KEY + mov r4, sp @ pass the key +#else + add r4, r3, #248 +#endif + vld1.8 {q4-q5}, [r0]! + mov r5, r10 + vld1.8 {q6-q7}, [r0] + sub r0, r0, #0x60 + vstmia r9, {q15} @ put aside IV + + bl _bsaes_decrypt8 + + vldmia r9, {q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10-q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q12-q13}, [r0]! + veor q4, q4, q10 + veor q2, q2, q11 + vld1.8 {q14-q15}, [r0]! + veor q7, q7, q12 + vst1.8 {q0-q1}, [r1]! @ write output + veor q3, q3, q13 + vst1.8 {q6}, [r1]! + veor q5, q5, q14 + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + vst1.8 {q7}, [r1]! + vst1.8 {q3}, [r1]! + vst1.8 {q5}, [r1]! + + b .Lcbc_dec_loop + +.Lcbc_dec_loop_finish: + adds r2, r2, #8 + beq .Lcbc_dec_done + + vld1.8 {q0}, [r0]! @ load input + cmp r2, #2 + blo .Lcbc_dec_one + vld1.8 {q1}, [r0]! +#ifndef BSAES_ASM_EXTENDED_KEY + mov r4, sp @ pass the key +#else + add r4, r3, #248 +#endif + mov r5, r10 + vstmia r9, {q15} @ put aside IV + beq .Lcbc_dec_two + vld1.8 {q2}, [r0]! + cmp r2, #4 + blo .Lcbc_dec_three + vld1.8 {q3}, [r0]! + beq .Lcbc_dec_four + vld1.8 {q4}, [r0]! + cmp r2, #6 + blo .Lcbc_dec_five + vld1.8 {q5}, [r0]! + beq .Lcbc_dec_six + vld1.8 {q6}, [r0]! + sub r0, r0, #0x70 + + bl _bsaes_decrypt8 + + vldmia r9, {q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10-q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q12-q13}, [r0]! + veor q4, q4, q10 + veor q2, q2, q11 + vld1.8 {q15}, [r0]! + veor q7, q7, q12 + vst1.8 {q0-q1}, [r1]! @ write output + veor q3, q3, q13 + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + vst1.8 {q7}, [r1]! + vst1.8 {q3}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_six: + sub r0, r0, #0x60 + bl _bsaes_decrypt8 + vldmia r9,{q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10-q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q12}, [r0]! + veor q4, q4, q10 + veor q2, q2, q11 + vld1.8 {q15}, [r0]! + veor q7, q7, q12 + vst1.8 {q0-q1}, [r1]! @ write output + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + vst1.8 {q7}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_five: + sub r0, r0, #0x50 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10-q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q15}, [r0]! + veor q4, q4, q10 + vst1.8 {q0-q1}, [r1]! @ write output + veor q2, q2, q11 + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_four: + sub r0, r0, #0x40 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q15}, [r0]! + veor q4, q4, q10 + vst1.8 {q0-q1}, [r1]! @ write output + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_three: + sub r0, r0, #0x30 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q15}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vst1.8 {q0-q1}, [r1]! @ write output + vst1.8 {q6}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_two: + sub r0, r0, #0x20 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q15}, [r0]! @ reload input + veor q1, q1, q8 + vst1.8 {q0-q1}, [r1]! @ write output + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_one: + sub r0, r0, #0x10 + mov r10, r1 @ save original out pointer + mov r1, r9 @ use the iv scratch space as out buffer + mov r2, r3 + vmov q4,q15 @ just in case ensure that IV + vmov q5,q0 @ and input are preserved + bl AES_decrypt + vld1.8 {q0}, [r9,:64] @ load result + veor q0, q0, q4 @ ^= IV + vmov q15, q5 @ q5 holds input + vst1.8 {q0}, [r10] @ write output + +.Lcbc_dec_done: +#ifndef BSAES_ASM_EXTENDED_KEY + vmov.i32 q0, #0 + vmov.i32 q1, #0 +.Lcbc_dec_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r9 + bne .Lcbc_dec_bzero +#endif + + mov sp, r9 + add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb + vst1.8 {q15}, [r8] @ return IV + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} +.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt +.extern AES_encrypt +.global bsaes_ctr32_encrypt_blocks +.type bsaes_ctr32_encrypt_blocks,%function +.align 5 +bsaes_ctr32_encrypt_blocks: + cmp r2, #8 @ use plain AES for + blo .Lctr_enc_short @ small sizes + + mov ip, sp + stmdb sp!, {r4-r10, lr} + VFP_ABI_PUSH + ldr r8, [ip] @ ctr is 1st arg on the stack + sub sp, sp, #0x10 @ scratch space to carry over the ctr + mov r9, sp @ save sp + + ldr r10, [r3, #240] @ get # of rounds +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key + add r12, #96 @ size of bit-sliced key schedule + + @ populate the key schedule + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + mov sp, r12 @ sp is sp + bl _bsaes_key_convert + veor q7,q7,q15 @ fix up last round key + vstmia r12, {q7} @ save last round key + + vld1.8 {q0}, [r8] @ load counter + add r8, r6, #.LREVM0SR-.LM0 @ borrow r8 + vldmia sp, {q4} @ load round0 key +#else + ldr r12, [r3, #244] + eors r12, #1 + beq 0f + + @ populate the key schedule + str r12, [r3, #244] + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + add r12, r3, #248 @ pass key schedule + bl _bsaes_key_convert + veor q7,q7,q15 @ fix up last round key + vstmia r12, {q7} @ save last round key + +.align 2 +0: add r12, r3, #248 + vld1.8 {q0}, [r8] @ load counter + adrl r8, .LREVM0SR @ borrow r8 + vldmia r12, {q4} @ load round0 key + sub sp, #0x10 @ place for adjusted round0 key +#endif + + vmov.i32 q8,#1 @ compose 1<<96 + veor q9,q9,q9 + vrev32.8 q0,q0 + vext.8 q8,q9,q8,#4 + vrev32.8 q4,q4 + vadd.u32 q9,q8,q8 @ compose 2<<96 + vstmia sp, {q4} @ save adjusted round0 key + b .Lctr_enc_loop + +.align 4 +.Lctr_enc_loop: + vadd.u32 q10, q8, q9 @ compose 3<<96 + vadd.u32 q1, q0, q8 @ +1 + vadd.u32 q2, q0, q9 @ +2 + vadd.u32 q3, q0, q10 @ +3 + vadd.u32 q4, q1, q10 + vadd.u32 q5, q2, q10 + vadd.u32 q6, q3, q10 + vadd.u32 q7, q4, q10 + vadd.u32 q10, q5, q10 @ next counter + + @ Borrow prologue from _bsaes_encrypt8 to use the opportunity + @ to flip byte order in 32-bit counter + + vldmia sp, {q9} @ load round0 key +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x10 @ pass next round key +#else + add r4, r3, #264 +#endif + vldmia r8, {q8} @ .LREVM0SR + mov r5, r10 @ pass rounds + vstmia r9, {q10} @ save next counter + sub r6, r8, #.LREVM0SR-.LSR @ pass constants + + bl _bsaes_encrypt8_alt + + subs r2, r2, #8 + blo .Lctr_enc_loop_done + + vld1.8 {q8-q9}, [r0]! @ load input + vld1.8 {q10-q11}, [r0]! + veor q0, q8 + veor q1, q9 + vld1.8 {q12-q13}, [r0]! + veor q4, q10 + veor q6, q11 + vld1.8 {q14-q15}, [r0]! + veor q3, q12 + vst1.8 {q0-q1}, [r1]! @ write output + veor q7, q13 + veor q2, q14 + vst1.8 {q4}, [r1]! + veor q5, q15 + vst1.8 {q6}, [r1]! + vmov.i32 q8, #1 @ compose 1<<96 + vst1.8 {q3}, [r1]! + veor q9, q9, q9 + vst1.8 {q7}, [r1]! + vext.8 q8, q9, q8, #4 + vst1.8 {q2}, [r1]! + vadd.u32 q9,q8,q8 @ compose 2<<96 + vst1.8 {q5}, [r1]! + vldmia r9, {q0} @ load counter + + bne .Lctr_enc_loop + b .Lctr_enc_done + +.align 4 +.Lctr_enc_loop_done: + add r2, r2, #8 + vld1.8 {q8}, [r0]! @ load input + veor q0, q8 + vst1.8 {q0}, [r1]! @ write output + cmp r2, #2 + blo .Lctr_enc_done + vld1.8 {q9}, [r0]! + veor q1, q9 + vst1.8 {q1}, [r1]! + beq .Lctr_enc_done + vld1.8 {q10}, [r0]! + veor q4, q10 + vst1.8 {q4}, [r1]! + cmp r2, #4 + blo .Lctr_enc_done + vld1.8 {q11}, [r0]! + veor q6, q11 + vst1.8 {q6}, [r1]! + beq .Lctr_enc_done + vld1.8 {q12}, [r0]! + veor q3, q12 + vst1.8 {q3}, [r1]! + cmp r2, #6 + blo .Lctr_enc_done + vld1.8 {q13}, [r0]! + veor q7, q13 + vst1.8 {q7}, [r1]! + beq .Lctr_enc_done + vld1.8 {q14}, [r0] + veor q2, q14 + vst1.8 {q2}, [r1]! + +.Lctr_enc_done: + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifndef BSAES_ASM_EXTENDED_KEY +.Lctr_enc_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r9 + bne .Lctr_enc_bzero +#else + vstmia sp, {q0-q1} +#endif + + mov sp, r9 + add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.align 4 +.Lctr_enc_short: + ldr ip, [sp] @ ctr pointer is passed on stack + stmdb sp!, {r4-r8, lr} + + mov r4, r0 @ copy arguments + mov r5, r1 + mov r6, r2 + mov r7, r3 + ldr r8, [ip, #12] @ load counter LSW + vld1.8 {q1}, [ip] @ load whole counter value +#ifdef __ARMEL__ + rev r8, r8 +#endif + sub sp, sp, #0x10 + vst1.8 {q1}, [sp,:64] @ copy counter value + sub sp, sp, #0x10 + +.Lctr_enc_short_loop: + add r0, sp, #0x10 @ input counter value + mov r1, sp @ output on the stack + mov r2, r7 @ key + + bl AES_encrypt + + vld1.8 {q0}, [r4]! @ load input + vld1.8 {q1}, [sp,:64] @ load encrypted counter + add r8, r8, #1 +#ifdef __ARMEL__ + rev r0, r8 + str r0, [sp, #0x1c] @ next counter value +#else + str r8, [sp, #0x1c] @ next counter value +#endif + veor q0,q0,q1 + vst1.8 {q0}, [r5]! @ store output + subs r6, r6, #1 + bne .Lctr_enc_short_loop + + vmov.i32 q0, #0 + vmov.i32 q1, #0 + vstmia sp!, {q0-q1} + + ldmia sp!, {r4-r8, pc} +.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks +.globl bsaes_xts_encrypt +.type bsaes_xts_encrypt,%function +.align 4 +bsaes_xts_encrypt: + mov ip, sp + stmdb sp!, {r4-r10, lr} @ 0x20 + VFP_ABI_PUSH + mov r6, sp @ future r3 + + mov r7, r0 + mov r8, r1 + mov r9, r2 + mov r10, r3 + + sub r0, sp, #0x10 @ 0x10 + bic r0, #0xf @ align at 16 bytes + mov sp, r0 + +#ifdef XTS_CHAIN_TWEAK + ldr r0, [ip] @ pointer to input tweak +#else + @ generate initial tweak + ldr r0, [ip, #4] @ iv[] + mov r1, sp + ldr r2, [ip, #0] @ key2 + bl AES_encrypt + mov r0,sp @ pointer to initial tweak +#endif + + ldr r1, [r10, #240] @ get # of rounds + mov r3, r6 +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, r1, lsl#7 @ 128 bytes per inner round key + @ add r12, #96 @ size of bit-sliced key schedule + sub r12, #48 @ place for tweak[9] + + @ populate the key schedule + mov r4, r10 @ pass key + mov r5, r1 @ pass # of rounds + mov sp, r12 + add r12, #0x90 @ pass key schedule + bl _bsaes_key_convert + veor q7, q7, q15 @ fix up last round key + vstmia r12, {q7} @ save last round key +#else + ldr r12, [r10, #244] + eors r12, #1 + beq 0f + + str r12, [r10, #244] + mov r4, r10 @ pass key + mov r5, r1 @ pass # of rounds + add r12, r10, #248 @ pass key schedule + bl _bsaes_key_convert + veor q7, q7, q15 @ fix up last round key + vstmia r12, {q7} + +.align 2 +0: sub sp, #0x90 @ place for tweak[9] +#endif + + vld1.8 {q8}, [r0] @ initial tweak + adr r2, .Lxts_magic + + subs r9, #0x80 + blo .Lxts_enc_short + b .Lxts_enc_loop + +.align 4 +.Lxts_enc_loop: + vldmia r2, {q5} @ load XTS magic + vshr.s64 q6, q8, #63 + mov r0, sp + vand q6, q6, q5 + vadd.u64 q9, q8, q8 + vst1.64 {q8}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q9, #63 + veor q9, q9, q6 + vand q7, q7, q5 + vadd.u64 q10, q9, q9 + vst1.64 {q9}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q10, #63 + veor q10, q10, q7 + vand q6, q6, q5 + vld1.8 {q0}, [r7]! + vadd.u64 q11, q10, q10 + vst1.64 {q10}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q11, #63 + veor q11, q11, q6 + vand q7, q7, q5 + vld1.8 {q1}, [r7]! + veor q0, q0, q8 + vadd.u64 q12, q11, q11 + vst1.64 {q11}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q12, #63 + veor q12, q12, q7 + vand q6, q6, q5 + vld1.8 {q2}, [r7]! + veor q1, q1, q9 + vadd.u64 q13, q12, q12 + vst1.64 {q12}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q13, #63 + veor q13, q13, q6 + vand q7, q7, q5 + vld1.8 {q3}, [r7]! + veor q2, q2, q10 + vadd.u64 q14, q13, q13 + vst1.64 {q13}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q14, #63 + veor q14, q14, q7 + vand q6, q6, q5 + vld1.8 {q4}, [r7]! + veor q3, q3, q11 + vadd.u64 q15, q14, q14 + vst1.64 {q14}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q15, #63 + veor q15, q15, q6 + vand q7, q7, q5 + vld1.8 {q5}, [r7]! + veor q4, q4, q12 + vadd.u64 q8, q15, q15 + vst1.64 {q15}, [r0,:128]! + vswp d15,d14 + veor q8, q8, q7 + vst1.64 {q8}, [r0,:128] @ next round tweak + + vld1.8 {q6-q7}, [r7]! + veor q5, q5, q13 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q6, q6, q14 + mov r5, r1 @ pass rounds + veor q7, q7, q15 + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q6, q11 + vld1.64 {q14-q15}, [r0,:128]! + veor q10, q3, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + veor q12, q2, q14 + vst1.8 {q10-q11}, [r8]! + veor q13, q5, q15 + vst1.8 {q12-q13}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + + subs r9, #0x80 + bpl .Lxts_enc_loop + +.Lxts_enc_short: + adds r9, #0x70 + bmi .Lxts_enc_done + + vldmia r2, {q5} @ load XTS magic + vshr.s64 q7, q8, #63 + mov r0, sp + vand q7, q7, q5 + vadd.u64 q9, q8, q8 + vst1.64 {q8}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q9, #63 + veor q9, q9, q7 + vand q6, q6, q5 + vadd.u64 q10, q9, q9 + vst1.64 {q9}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q10, #63 + veor q10, q10, q6 + vand q7, q7, q5 + vld1.8 {q0}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_1 + vadd.u64 q11, q10, q10 + vst1.64 {q10}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q11, #63 + veor q11, q11, q7 + vand q6, q6, q5 + vld1.8 {q1}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_2 + veor q0, q0, q8 + vadd.u64 q12, q11, q11 + vst1.64 {q11}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q12, #63 + veor q12, q12, q6 + vand q7, q7, q5 + vld1.8 {q2}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_3 + veor q1, q1, q9 + vadd.u64 q13, q12, q12 + vst1.64 {q12}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q13, #63 + veor q13, q13, q7 + vand q6, q6, q5 + vld1.8 {q3}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_4 + veor q2, q2, q10 + vadd.u64 q14, q13, q13 + vst1.64 {q13}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q14, #63 + veor q14, q14, q6 + vand q7, q7, q5 + vld1.8 {q4}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_5 + veor q3, q3, q11 + vadd.u64 q15, q14, q14 + vst1.64 {q14}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q15, #63 + veor q15, q15, q7 + vand q6, q6, q5 + vld1.8 {q5}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_6 + veor q4, q4, q12 + sub r9, #0x10 + vst1.64 {q15}, [r0,:128] @ next round tweak + + vld1.8 {q6}, [r7]! + veor q5, q5, q13 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q6, q6, q14 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q6, q11 + vld1.64 {q14}, [r0,:128]! + veor q10, q3, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + veor q12, q2, q14 + vst1.8 {q10-q11}, [r8]! + vst1.8 {q12}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_6: + vst1.64 {q14}, [r0,:128] @ next round tweak + + veor q4, q4, q12 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q5, q5, q13 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q6, q11 + veor q10, q3, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + vst1.8 {q10-q11}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done + +@ put this in range for both ARM and Thumb mode adr instructions +.align 5 +.Lxts_magic: + .quad 1, 0x87 + +.align 5 +.Lxts_enc_5: + vst1.64 {q13}, [r0,:128] @ next round tweak + + veor q3, q3, q11 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q4, q4, q12 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12}, [r0,:128]! + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q6, q11 + veor q10, q3, q12 + vst1.8 {q8-q9}, [r8]! + vst1.8 {q10}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_4: + vst1.64 {q12}, [r0,:128] @ next round tweak + + veor q2, q2, q10 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q3, q3, q11 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q6, q11 + vst1.8 {q8-q9}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_3: + vst1.64 {q11}, [r0,:128] @ next round tweak + + veor q1, q1, q9 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q2, q2, q10 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + vst1.8 {q8}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_2: + vst1.64 {q10}, [r0,:128] @ next round tweak + + veor q0, q0, q8 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q1, q1, q9 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + vst1.8 {q0-q1}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_1: + mov r0, sp + veor q0, q8 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + mov r4, r3 @ preserve fp + + bl AES_encrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q8 + vst1.8 {q0}, [r8]! + mov r3, r4 + + vmov q8, q9 @ next round tweak + +.Lxts_enc_done: +#ifndef XTS_CHAIN_TWEAK + adds r9, #0x10 + beq .Lxts_enc_ret + sub r6, r8, #0x10 + +.Lxts_enc_steal: + ldrb r0, [r7], #1 + ldrb r1, [r8, #-0x10] + strb r0, [r8, #-0x10] + strb r1, [r8], #1 + + subs r9, #1 + bhi .Lxts_enc_steal + + vld1.8 {q0}, [r6] + mov r0, sp + veor q0, q0, q8 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + mov r4, r3 @ preserve fp + + bl AES_encrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q8 + vst1.8 {q0}, [r6] + mov r3, r4 +#endif + +.Lxts_enc_ret: + bic r0, r3, #0xf + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifdef XTS_CHAIN_TWEAK + ldr r1, [r3, #0x20+VFP_ABI_FRAME] @ chain tweak +#endif +.Lxts_enc_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r0 + bne .Lxts_enc_bzero + + mov sp, r3 +#ifdef XTS_CHAIN_TWEAK + vst1.8 {q8}, [r1] +#endif + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.size bsaes_xts_encrypt,.-bsaes_xts_encrypt + +.globl bsaes_xts_decrypt +.type bsaes_xts_decrypt,%function +.align 4 +bsaes_xts_decrypt: + mov ip, sp + stmdb sp!, {r4-r10, lr} @ 0x20 + VFP_ABI_PUSH + mov r6, sp @ future r3 + + mov r7, r0 + mov r8, r1 + mov r9, r2 + mov r10, r3 + + sub r0, sp, #0x10 @ 0x10 + bic r0, #0xf @ align at 16 bytes + mov sp, r0 + +#ifdef XTS_CHAIN_TWEAK + ldr r0, [ip] @ pointer to input tweak +#else + @ generate initial tweak + ldr r0, [ip, #4] @ iv[] + mov r1, sp + ldr r2, [ip, #0] @ key2 + bl AES_encrypt + mov r0, sp @ pointer to initial tweak +#endif + + ldr r1, [r10, #240] @ get # of rounds + mov r3, r6 +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, r1, lsl#7 @ 128 bytes per inner round key + @ add r12, #96 @ size of bit-sliced key schedule + sub r12, #48 @ place for tweak[9] + + @ populate the key schedule + mov r4, r10 @ pass key + mov r5, r1 @ pass # of rounds + mov sp, r12 + add r12, #0x90 @ pass key schedule + bl _bsaes_key_convert + add r4, sp, #0x90 + vldmia r4, {q6} + vstmia r12, {q15} @ save last round key + veor q7, q7, q6 @ fix up round 0 key + vstmia r4, {q7} +#else + ldr r12, [r10, #244] + eors r12, #1 + beq 0f + + str r12, [r10, #244] + mov r4, r10 @ pass key + mov r5, r1 @ pass # of rounds + add r12, r10, #248 @ pass key schedule + bl _bsaes_key_convert + add r4, r10, #248 + vldmia r4, {q6} + vstmia r12, {q15} @ save last round key + veor q7, q7, q6 @ fix up round 0 key + vstmia r4, {q7} + +.align 2 +0: sub sp, #0x90 @ place for tweak[9] +#endif + vld1.8 {q8}, [r0] @ initial tweak + adr r2, .Lxts_magic + + tst r9, #0xf @ if not multiple of 16 + it ne @ Thumb2 thing, sanity check in ARM + subne r9, #0x10 @ subtract another 16 bytes + subs r9, #0x80 + + blo .Lxts_dec_short + b .Lxts_dec_loop + +.align 4 +.Lxts_dec_loop: + vldmia r2, {q5} @ load XTS magic + vshr.s64 q6, q8, #63 + mov r0, sp + vand q6, q6, q5 + vadd.u64 q9, q8, q8 + vst1.64 {q8}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q9, #63 + veor q9, q9, q6 + vand q7, q7, q5 + vadd.u64 q10, q9, q9 + vst1.64 {q9}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q10, #63 + veor q10, q10, q7 + vand q6, q6, q5 + vld1.8 {q0}, [r7]! + vadd.u64 q11, q10, q10 + vst1.64 {q10}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q11, #63 + veor q11, q11, q6 + vand q7, q7, q5 + vld1.8 {q1}, [r7]! + veor q0, q0, q8 + vadd.u64 q12, q11, q11 + vst1.64 {q11}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q12, #63 + veor q12, q12, q7 + vand q6, q6, q5 + vld1.8 {q2}, [r7]! + veor q1, q1, q9 + vadd.u64 q13, q12, q12 + vst1.64 {q12}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q13, #63 + veor q13, q13, q6 + vand q7, q7, q5 + vld1.8 {q3}, [r7]! + veor q2, q2, q10 + vadd.u64 q14, q13, q13 + vst1.64 {q13}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q14, #63 + veor q14, q14, q7 + vand q6, q6, q5 + vld1.8 {q4}, [r7]! + veor q3, q3, q11 + vadd.u64 q15, q14, q14 + vst1.64 {q14}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q15, #63 + veor q15, q15, q6 + vand q7, q7, q5 + vld1.8 {q5}, [r7]! + veor q4, q4, q12 + vadd.u64 q8, q15, q15 + vst1.64 {q15}, [r0,:128]! + vswp d15,d14 + veor q8, q8, q7 + vst1.64 {q8}, [r0,:128] @ next round tweak + + vld1.8 {q6-q7}, [r7]! + veor q5, q5, q13 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q6, q6, q14 + mov r5, r1 @ pass rounds + veor q7, q7, q15 + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q4, q11 + vld1.64 {q14-q15}, [r0,:128]! + veor q10, q2, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + veor q12, q3, q14 + vst1.8 {q10-q11}, [r8]! + veor q13, q5, q15 + vst1.8 {q12-q13}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + + subs r9, #0x80 + bpl .Lxts_dec_loop + +.Lxts_dec_short: + adds r9, #0x70 + bmi .Lxts_dec_done + + vldmia r2, {q5} @ load XTS magic + vshr.s64 q7, q8, #63 + mov r0, sp + vand q7, q7, q5 + vadd.u64 q9, q8, q8 + vst1.64 {q8}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q9, #63 + veor q9, q9, q7 + vand q6, q6, q5 + vadd.u64 q10, q9, q9 + vst1.64 {q9}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q10, #63 + veor q10, q10, q6 + vand q7, q7, q5 + vld1.8 {q0}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_1 + vadd.u64 q11, q10, q10 + vst1.64 {q10}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q11, #63 + veor q11, q11, q7 + vand q6, q6, q5 + vld1.8 {q1}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_2 + veor q0, q0, q8 + vadd.u64 q12, q11, q11 + vst1.64 {q11}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q12, #63 + veor q12, q12, q6 + vand q7, q7, q5 + vld1.8 {q2}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_3 + veor q1, q1, q9 + vadd.u64 q13, q12, q12 + vst1.64 {q12}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q13, #63 + veor q13, q13, q7 + vand q6, q6, q5 + vld1.8 {q3}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_4 + veor q2, q2, q10 + vadd.u64 q14, q13, q13 + vst1.64 {q13}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q14, #63 + veor q14, q14, q6 + vand q7, q7, q5 + vld1.8 {q4}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_5 + veor q3, q3, q11 + vadd.u64 q15, q14, q14 + vst1.64 {q14}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q15, #63 + veor q15, q15, q7 + vand q6, q6, q5 + vld1.8 {q5}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_6 + veor q4, q4, q12 + sub r9, #0x10 + vst1.64 {q15}, [r0,:128] @ next round tweak + + vld1.8 {q6}, [r7]! + veor q5, q5, q13 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q6, q6, q14 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q4, q11 + vld1.64 {q14}, [r0,:128]! + veor q10, q2, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + veor q12, q3, q14 + vst1.8 {q10-q11}, [r8]! + vst1.8 {q12}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_6: + vst1.64 {q14}, [r0,:128] @ next round tweak + + veor q4, q4, q12 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q5, q5, q13 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q4, q11 + veor q10, q2, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + vst1.8 {q10-q11}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_5: + vst1.64 {q13}, [r0,:128] @ next round tweak + + veor q3, q3, q11 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q4, q4, q12 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12}, [r0,:128]! + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q4, q11 + veor q10, q2, q12 + vst1.8 {q8-q9}, [r8]! + vst1.8 {q10}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_4: + vst1.64 {q12}, [r0,:128] @ next round tweak + + veor q2, q2, q10 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q3, q3, q11 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q4, q11 + vst1.8 {q8-q9}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_3: + vst1.64 {q11}, [r0,:128] @ next round tweak + + veor q1, q1, q9 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q2, q2, q10 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + vst1.8 {q8}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_2: + vst1.64 {q10}, [r0,:128] @ next round tweak + + veor q0, q0, q8 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q1, q1, q9 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + vst1.8 {q0-q1}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_1: + mov r0, sp + veor q0, q8 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + mov r4, r3 @ preserve fp + mov r5, r2 @ preserve magic + + bl AES_decrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q8 + vst1.8 {q0}, [r8]! + mov r3, r4 + mov r2, r5 + + vmov q8, q9 @ next round tweak + +.Lxts_dec_done: +#ifndef XTS_CHAIN_TWEAK + adds r9, #0x10 + beq .Lxts_dec_ret + + @ calculate one round of extra tweak for the stolen ciphertext + vldmia r2, {q5} + vshr.s64 q6, q8, #63 + vand q6, q6, q5 + vadd.u64 q9, q8, q8 + vswp d13,d12 + veor q9, q9, q6 + + @ perform the final decryption with the last tweak value + vld1.8 {q0}, [r7]! + mov r0, sp + veor q0, q0, q9 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + mov r4, r3 @ preserve fp + + bl AES_decrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q9 + vst1.8 {q0}, [r8] + + mov r6, r8 +.Lxts_dec_steal: + ldrb r1, [r8] + ldrb r0, [r7], #1 + strb r1, [r8, #0x10] + strb r0, [r8], #1 + + subs r9, #1 + bhi .Lxts_dec_steal + + vld1.8 {q0}, [r6] + mov r0, sp + veor q0, q8 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + + bl AES_decrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q8 + vst1.8 {q0}, [r6] + mov r3, r4 +#endif + +.Lxts_dec_ret: + bic r0, r3, #0xf + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifdef XTS_CHAIN_TWEAK + ldr r1, [r3, #0x20+VFP_ABI_FRAME] @ chain tweak +#endif +.Lxts_dec_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r0 + bne .Lxts_dec_bzero + + mov sp, r3 +#ifdef XTS_CHAIN_TWEAK + vst1.8 {q8}, [r1] +#endif + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.size bsaes_xts_decrypt,.-bsaes_xts_decrypt +#endif diff --git a/arch/arm/crypto/aesbs-glue.c b/arch/arm/crypto/aesbs-glue.c new file mode 100644 index 000000000000..4522366da759 --- /dev/null +++ b/arch/arm/crypto/aesbs-glue.c @@ -0,0 +1,434 @@ +/* + * linux/arch/arm/crypto/aesbs-glue.c - glue code for NEON bit sliced AES + * + * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <asm/neon.h> +#include <crypto/aes.h> +#include <crypto/ablk_helper.h> +#include <crypto/algapi.h> +#include <linux/module.h> + +#include "aes_glue.h" + +#define BIT_SLICED_KEY_MAXSIZE (128 * (AES_MAXNR - 1) + 2 * AES_BLOCK_SIZE) + +struct BS_KEY { + struct AES_KEY rk; + int converted; + u8 __aligned(8) bs[BIT_SLICED_KEY_MAXSIZE]; +} __aligned(8); + +asmlinkage void bsaes_enc_key_convert(u8 out[], struct AES_KEY const *in); +asmlinkage void bsaes_dec_key_convert(u8 out[], struct AES_KEY const *in); + +asmlinkage void bsaes_cbc_encrypt(u8 const in[], u8 out[], u32 bytes, + struct BS_KEY *key, u8 iv[]); + +asmlinkage void bsaes_ctr32_encrypt_blocks(u8 const in[], u8 out[], u32 blocks, + struct BS_KEY *key, u8 const iv[]); + +asmlinkage void bsaes_xts_encrypt(u8 const in[], u8 out[], u32 bytes, + struct BS_KEY *key, u8 tweak[]); + +asmlinkage void bsaes_xts_decrypt(u8 const in[], u8 out[], u32 bytes, + struct BS_KEY *key, u8 tweak[]); + +struct aesbs_cbc_ctx { + struct AES_KEY enc; + struct BS_KEY dec; +}; + +struct aesbs_ctr_ctx { + struct BS_KEY enc; +}; + +struct aesbs_xts_ctx { + struct BS_KEY enc; + struct BS_KEY dec; + struct AES_KEY twkey; +}; + +static int aesbs_cbc_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct aesbs_cbc_ctx *ctx = crypto_tfm_ctx(tfm); + int bits = key_len * 8; + + if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc)) { + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + ctx->dec.rk = ctx->enc; + private_AES_set_decrypt_key(in_key, bits, &ctx->dec.rk); + ctx->dec.converted = 0; + return 0; +} + +static int aesbs_ctr_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct aesbs_ctr_ctx *ctx = crypto_tfm_ctx(tfm); + int bits = key_len * 8; + + if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) { + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + ctx->enc.converted = 0; + return 0; +} + +static int aesbs_xts_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct aesbs_xts_ctx *ctx = crypto_tfm_ctx(tfm); + int bits = key_len * 4; + + if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) { + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + ctx->dec.rk = ctx->enc.rk; + private_AES_set_decrypt_key(in_key, bits, &ctx->dec.rk); + private_AES_set_encrypt_key(in_key + key_len / 2, bits, &ctx->twkey); + ctx->enc.converted = ctx->dec.converted = 0; + return 0; +} + +static int aesbs_cbc_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct aesbs_cbc_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while (walk.nbytes) { + u32 blocks = walk.nbytes / AES_BLOCK_SIZE; + u8 *src = walk.src.virt.addr; + + if (walk.dst.virt.addr == walk.src.virt.addr) { + u8 *iv = walk.iv; + + do { + crypto_xor(src, iv, AES_BLOCK_SIZE); + AES_encrypt(src, src, &ctx->enc); + iv = src; + src += AES_BLOCK_SIZE; + } while (--blocks); + memcpy(walk.iv, iv, AES_BLOCK_SIZE); + } else { + u8 *dst = walk.dst.virt.addr; + + do { + crypto_xor(walk.iv, src, AES_BLOCK_SIZE); + AES_encrypt(walk.iv, dst, &ctx->enc); + memcpy(walk.iv, dst, AES_BLOCK_SIZE); + src += AES_BLOCK_SIZE; + dst += AES_BLOCK_SIZE; + } while (--blocks); + } + err = blkcipher_walk_done(desc, &walk, 0); + } + return err; +} + +static int aesbs_cbc_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct aesbs_cbc_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE); + + while ((walk.nbytes / AES_BLOCK_SIZE) >= 8) { + kernel_neon_begin(); + bsaes_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr, + walk.nbytes, &ctx->dec, walk.iv); + kernel_neon_end(); + err = blkcipher_walk_done(desc, &walk, 0); + } + while (walk.nbytes) { + u32 blocks = walk.nbytes / AES_BLOCK_SIZE; + u8 *dst = walk.dst.virt.addr; + u8 *src = walk.src.virt.addr; + u8 bk[2][AES_BLOCK_SIZE]; + u8 *iv = walk.iv; + + do { + if (walk.dst.virt.addr == walk.src.virt.addr) + memcpy(bk[blocks & 1], src, AES_BLOCK_SIZE); + + AES_decrypt(src, dst, &ctx->dec.rk); + crypto_xor(dst, iv, AES_BLOCK_SIZE); + + if (walk.dst.virt.addr == walk.src.virt.addr) + iv = bk[blocks & 1]; + else + iv = src; + + dst += AES_BLOCK_SIZE; + src += AES_BLOCK_SIZE; + } while (--blocks); + err = blkcipher_walk_done(desc, &walk, 0); + } + return err; +} + +static void inc_be128_ctr(__be32 ctr[], u32 addend) +{ + int i; + + for (i = 3; i >= 0; i--, addend = 1) { + u32 n = be32_to_cpu(ctr[i]) + addend; + + ctr[i] = cpu_to_be32(n); + if (n >= addend) + break; + } +} + +static int aesbs_ctr_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct aesbs_ctr_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + u32 blocks; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE); + + while ((blocks = walk.nbytes / AES_BLOCK_SIZE)) { + u32 tail = walk.nbytes % AES_BLOCK_SIZE; + __be32 *ctr = (__be32 *)walk.iv; + u32 headroom = UINT_MAX - be32_to_cpu(ctr[3]); + + /* avoid 32 bit counter overflow in the NEON code */ + if (unlikely(headroom < blocks)) { + blocks = headroom + 1; + tail = walk.nbytes - blocks * AES_BLOCK_SIZE; + } + kernel_neon_begin(); + bsaes_ctr32_encrypt_blocks(walk.src.virt.addr, + walk.dst.virt.addr, blocks, + &ctx->enc, walk.iv); + kernel_neon_end(); + inc_be128_ctr(ctr, blocks); + + nbytes -= blocks * AES_BLOCK_SIZE; + if (nbytes && nbytes == tail && nbytes <= AES_BLOCK_SIZE) + break; + + err = blkcipher_walk_done(desc, &walk, tail); + } + if (walk.nbytes) { + u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; + u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; + u8 ks[AES_BLOCK_SIZE]; + + AES_encrypt(walk.iv, ks, &ctx->enc.rk); + if (tdst != tsrc) + memcpy(tdst, tsrc, nbytes); + crypto_xor(tdst, ks, nbytes); + err = blkcipher_walk_done(desc, &walk, 0); + } + return err; +} + +static int aesbs_xts_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct aesbs_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE); + + /* generate the initial tweak */ + AES_encrypt(walk.iv, walk.iv, &ctx->twkey); + + while (walk.nbytes) { + kernel_neon_begin(); + bsaes_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr, + walk.nbytes, &ctx->enc, walk.iv); + kernel_neon_end(); + err = blkcipher_walk_done(desc, &walk, 0); + } + return err; +} + +static int aesbs_xts_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct aesbs_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE); + + /* generate the initial tweak */ + AES_encrypt(walk.iv, walk.iv, &ctx->twkey); + + while (walk.nbytes) { + kernel_neon_begin(); + bsaes_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr, + walk.nbytes, &ctx->dec, walk.iv); + kernel_neon_end(); + err = blkcipher_walk_done(desc, &walk, 0); + } + return err; +} + +static struct crypto_alg aesbs_algs[] = { { + .cra_name = "__cbc-aes-neonbs", + .cra_driver_name = "__driver-cbc-aes-neonbs", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct aesbs_cbc_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_cbc_set_key, + .encrypt = aesbs_cbc_encrypt, + .decrypt = aesbs_cbc_decrypt, + }, +}, { + .cra_name = "__ctr-aes-neonbs", + .cra_driver_name = "__driver-ctr-aes-neonbs", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct aesbs_ctr_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_ctr_set_key, + .encrypt = aesbs_ctr_encrypt, + .decrypt = aesbs_ctr_encrypt, + }, +}, { + .cra_name = "__xts-aes-neonbs", + .cra_driver_name = "__driver-xts-aes-neonbs", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct aesbs_xts_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_xts_set_key, + .encrypt = aesbs_xts_encrypt, + .decrypt = aesbs_xts_decrypt, + }, +}, { + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-neonbs", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = __ablk_encrypt, + .decrypt = ablk_decrypt, + } +}, { + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-neonbs", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +}, { + .cra_name = "xts(aes)", + .cra_driver_name = "xts-aes-neonbs", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +} }; + +static int __init aesbs_mod_init(void) +{ + if (!cpu_has_neon()) + return -ENODEV; + + return crypto_register_algs(aesbs_algs, ARRAY_SIZE(aesbs_algs)); +} + +static void __exit aesbs_mod_exit(void) +{ + crypto_unregister_algs(aesbs_algs, ARRAY_SIZE(aesbs_algs)); +} + +module_init(aesbs_mod_init); +module_exit(aesbs_mod_exit); + +MODULE_DESCRIPTION("Bit sliced AES in CBC/CTR/XTS modes using NEON"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL"); diff --git a/arch/arm/crypto/bsaes-armv7.pl b/arch/arm/crypto/bsaes-armv7.pl new file mode 100644 index 000000000000..f3d96d932573 --- /dev/null +++ b/arch/arm/crypto/bsaes-armv7.pl @@ -0,0 +1,2467 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# +# Specific modes and adaptation for Linux kernel by Ard Biesheuvel +# <ard.biesheuvel@linaro.org>. Permission to use under GPL terms is +# granted. +# ==================================================================== + +# Bit-sliced AES for ARM NEON +# +# February 2012. +# +# This implementation is direct adaptation of bsaes-x86_64 module for +# ARM NEON. Except that this module is endian-neutral [in sense that +# it can be compiled for either endianness] by courtesy of vld1.8's +# neutrality. Initial version doesn't implement interface to OpenSSL, +# only low-level primitives and unsupported entry points, just enough +# to collect performance results, which for Cortex-A8 core are: +# +# encrypt 19.5 cycles per byte processed with 128-bit key +# decrypt 22.1 cycles per byte processed with 128-bit key +# key conv. 440 cycles per 128-bit key/0.18 of 8x block +# +# Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7, +# which is [much] worse than anticipated (for further details see +# http://www.openssl.org/~appro/Snapdragon-S4.html). +# +# Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code +# manages in 20.0 cycles]. +# +# When comparing to x86_64 results keep in mind that NEON unit is +# [mostly] single-issue and thus can't [fully] benefit from +# instruction-level parallelism. And when comparing to aes-armv4 +# results keep in mind key schedule conversion overhead (see +# bsaes-x86_64.pl for further details)... +# +# <appro@openssl.org> + +# April-August 2013 +# +# Add CBC, CTR and XTS subroutines, adapt for kernel use. +# +# <ard.biesheuvel@linaro.org> + +while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +my ($inp,$out,$len,$key)=("r0","r1","r2","r3"); +my @XMM=map("q$_",(0..15)); + +{ +my ($key,$rounds,$const)=("r4","r5","r6"); + +sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } +sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } + +sub Sbox { +# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb +# output in lsb > [b0, b1, b4, b6, b3, b7, b2, b5] < msb +my @b=@_[0..7]; +my @t=@_[8..11]; +my @s=@_[12..15]; + &InBasisChange (@b); + &Inv_GF256 (@b[6,5,0,3,7,1,4,2],@t,@s); + &OutBasisChange (@b[7,1,4,2,6,5,0,3]); +} + +sub InBasisChange { +# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb +# output in lsb > [b6, b5, b0, b3, b7, b1, b4, b2] < msb +my @b=@_[0..7]; +$code.=<<___; + veor @b[2], @b[2], @b[1] + veor @b[5], @b[5], @b[6] + veor @b[3], @b[3], @b[0] + veor @b[6], @b[6], @b[2] + veor @b[5], @b[5], @b[0] + + veor @b[6], @b[6], @b[3] + veor @b[3], @b[3], @b[7] + veor @b[7], @b[7], @b[5] + veor @b[3], @b[3], @b[4] + veor @b[4], @b[4], @b[5] + + veor @b[2], @b[2], @b[7] + veor @b[3], @b[3], @b[1] + veor @b[1], @b[1], @b[5] +___ +} + +sub OutBasisChange { +# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb +# output in lsb > [b6, b1, b2, b4, b7, b0, b3, b5] < msb +my @b=@_[0..7]; +$code.=<<___; + veor @b[0], @b[0], @b[6] + veor @b[1], @b[1], @b[4] + veor @b[4], @b[4], @b[6] + veor @b[2], @b[2], @b[0] + veor @b[6], @b[6], @b[1] + + veor @b[1], @b[1], @b[5] + veor @b[5], @b[5], @b[3] + veor @b[3], @b[3], @b[7] + veor @b[7], @b[7], @b[5] + veor @b[2], @b[2], @b[5] + + veor @b[4], @b[4], @b[7] +___ +} + +sub InvSbox { +# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb +# output in lsb > [b0, b1, b6, b4, b2, b7, b3, b5] < msb +my @b=@_[0..7]; +my @t=@_[8..11]; +my @s=@_[12..15]; + &InvInBasisChange (@b); + &Inv_GF256 (@b[5,1,2,6,3,7,0,4],@t,@s); + &InvOutBasisChange (@b[3,7,0,4,5,1,2,6]); +} + +sub InvInBasisChange { # OutBasisChange in reverse (with twist) +my @b=@_[5,1,2,6,3,7,0,4]; +$code.=<<___ + veor @b[1], @b[1], @b[7] + veor @b[4], @b[4], @b[7] + + veor @b[7], @b[7], @b[5] + veor @b[1], @b[1], @b[3] + veor @b[2], @b[2], @b[5] + veor @b[3], @b[3], @b[7] + + veor @b[6], @b[6], @b[1] + veor @b[2], @b[2], @b[0] + veor @b[5], @b[5], @b[3] + veor @b[4], @b[4], @b[6] + veor @b[0], @b[0], @b[6] + veor @b[1], @b[1], @b[4] +___ +} + +sub InvOutBasisChange { # InBasisChange in reverse +my @b=@_[2,5,7,3,6,1,0,4]; +$code.=<<___; + veor @b[1], @b[1], @b[5] + veor @b[2], @b[2], @b[7] + + veor @b[3], @b[3], @b[1] + veor @b[4], @b[4], @b[5] + veor @b[7], @b[7], @b[5] + veor @b[3], @b[3], @b[4] + veor @b[5], @b[5], @b[0] + veor @b[3], @b[3], @b[7] + veor @b[6], @b[6], @b[2] + veor @b[2], @b[2], @b[1] + veor @b[6], @b[6], @b[3] + + veor @b[3], @b[3], @b[0] + veor @b[5], @b[5], @b[6] +___ +} + +sub Mul_GF4 { +#;************************************************************* +#;* Mul_GF4: Input x0-x1,y0-y1 Output x0-x1 Temp t0 (8) * +#;************************************************************* +my ($x0,$x1,$y0,$y1,$t0,$t1)=@_; +$code.=<<___; + veor $t0, $y0, $y1 + vand $t0, $t0, $x0 + veor $x0, $x0, $x1 + vand $t1, $x1, $y0 + vand $x0, $x0, $y1 + veor $x1, $t1, $t0 + veor $x0, $x0, $t1 +___ +} + +sub Mul_GF4_N { # not used, see next subroutine +# multiply and scale by N +my ($x0,$x1,$y0,$y1,$t0)=@_; +$code.=<<___; + veor $t0, $y0, $y1 + vand $t0, $t0, $x0 + veor $x0, $x0, $x1 + vand $x1, $x1, $y0 + vand $x0, $x0, $y1 + veor $x1, $x1, $x0 + veor $x0, $x0, $t0 +___ +} + +sub Mul_GF4_N_GF4 { +# interleaved Mul_GF4_N and Mul_GF4 +my ($x0,$x1,$y0,$y1,$t0, + $x2,$x3,$y2,$y3,$t1)=@_; +$code.=<<___; + veor $t0, $y0, $y1 + veor $t1, $y2, $y3 + vand $t0, $t0, $x0 + vand $t1, $t1, $x2 + veor $x0, $x0, $x1 + veor $x2, $x2, $x3 + vand $x1, $x1, $y0 + vand $x3, $x3, $y2 + vand $x0, $x0, $y1 + vand $x2, $x2, $y3 + veor $x1, $x1, $x0 + veor $x2, $x2, $x3 + veor $x0, $x0, $t0 + veor $x3, $x3, $t1 +___ +} +sub Mul_GF16_2 { +my @x=@_[0..7]; +my @y=@_[8..11]; +my @t=@_[12..15]; +$code.=<<___; + veor @t[0], @x[0], @x[2] + veor @t[1], @x[1], @x[3] +___ + &Mul_GF4 (@x[0], @x[1], @y[0], @y[1], @t[2..3]); +$code.=<<___; + veor @y[0], @y[0], @y[2] + veor @y[1], @y[1], @y[3] +___ + Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3], + @x[2], @x[3], @y[2], @y[3], @t[2]); +$code.=<<___; + veor @x[0], @x[0], @t[0] + veor @x[2], @x[2], @t[0] + veor @x[1], @x[1], @t[1] + veor @x[3], @x[3], @t[1] + + veor @t[0], @x[4], @x[6] + veor @t[1], @x[5], @x[7] +___ + &Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3], + @x[6], @x[7], @y[2], @y[3], @t[2]); +$code.=<<___; + veor @y[0], @y[0], @y[2] + veor @y[1], @y[1], @y[3] +___ + &Mul_GF4 (@x[4], @x[5], @y[0], @y[1], @t[2..3]); +$code.=<<___; + veor @x[4], @x[4], @t[0] + veor @x[6], @x[6], @t[0] + veor @x[5], @x[5], @t[1] + veor @x[7], @x[7], @t[1] +___ +} +sub Inv_GF256 { +#;******************************************************************** +#;* Inv_GF256: Input x0-x7 Output x0-x7 Temp t0-t3,s0-s3 (144) * +#;******************************************************************** +my @x=@_[0..7]; +my @t=@_[8..11]; +my @s=@_[12..15]; +# direct optimizations from hardware +$code.=<<___; + veor @t[3], @x[4], @x[6] + veor @t[2], @x[5], @x[7] + veor @t[1], @x[1], @x[3] + veor @s[1], @x[7], @x[6] + vmov @t[0], @t[2] + veor @s[0], @x[0], @x[2] + + vorr @t[2], @t[2], @t[1] + veor @s[3], @t[3], @t[0] + vand @s[2], @t[3], @s[0] + vorr @t[3], @t[3], @s[0] + veor @s[0], @s[0], @t[1] + vand @t[0], @t[0], @t[1] + veor @t[1], @x[3], @x[2] + vand @s[3], @s[3], @s[0] + vand @s[1], @s[1], @t[1] + veor @t[1], @x[4], @x[5] + veor @s[0], @x[1], @x[0] + veor @t[3], @t[3], @s[1] + veor @t[2], @t[2], @s[1] + vand @s[1], @t[1], @s[0] + vorr @t[1], @t[1], @s[0] + veor @t[3], @t[3], @s[3] + veor @t[0], @t[0], @s[1] + veor @t[2], @t[2], @s[2] + veor @t[1], @t[1], @s[3] + veor @t[0], @t[0], @s[2] + vand @s[0], @x[7], @x[3] + veor @t[1], @t[1], @s[2] + vand @s[1], @x[6], @x[2] + vand @s[2], @x[5], @x[1] + vorr @s[3], @x[4], @x[0] + veor @t[3], @t[3], @s[0] + veor @t[1], @t[1], @s[2] + veor @t[0], @t[0], @s[3] + veor @t[2], @t[2], @s[1] + + @ Inv_GF16 \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3 + + @ new smaller inversion + + vand @s[2], @t[3], @t[1] + vmov @s[0], @t[0] + + veor @s[1], @t[2], @s[2] + veor @s[3], @t[0], @s[2] + veor @s[2], @t[0], @s[2] @ @s[2]=@s[3] + + vbsl @s[1], @t[1], @t[0] + vbsl @s[3], @t[3], @t[2] + veor @t[3], @t[3], @t[2] + + vbsl @s[0], @s[1], @s[2] + vbsl @t[0], @s[2], @s[1] + + vand @s[2], @s[0], @s[3] + veor @t[1], @t[1], @t[0] + + veor @s[2], @s[2], @t[3] +___ +# output in s3, s2, s1, t1 + +# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \t2, \t3, \t0, \t1, \s0, \s1, \s2, \s3 + +# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3 + &Mul_GF16_2(@x,@s[3,2,1],@t[1],@s[0],@t[0,2,3]); + +### output msb > [x3,x2,x1,x0,x7,x6,x5,x4] < lsb +} + +# AES linear components + +sub ShiftRows { +my @x=@_[0..7]; +my @t=@_[8..11]; +my $mask=pop; +$code.=<<___; + vldmia $key!, {@t[0]-@t[3]} + veor @t[0], @t[0], @x[0] + veor @t[1], @t[1], @x[1] + vtbl.8 `&Dlo(@x[0])`, {@t[0]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[0])`, {@t[0]}, `&Dhi($mask)` + vldmia $key!, {@t[0]} + veor @t[2], @t[2], @x[2] + vtbl.8 `&Dlo(@x[1])`, {@t[1]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[1])`, {@t[1]}, `&Dhi($mask)` + vldmia $key!, {@t[1]} + veor @t[3], @t[3], @x[3] + vtbl.8 `&Dlo(@x[2])`, {@t[2]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[2])`, {@t[2]}, `&Dhi($mask)` + vldmia $key!, {@t[2]} + vtbl.8 `&Dlo(@x[3])`, {@t[3]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[3])`, {@t[3]}, `&Dhi($mask)` + vldmia $key!, {@t[3]} + veor @t[0], @t[0], @x[4] + veor @t[1], @t[1], @x[5] + vtbl.8 `&Dlo(@x[4])`, {@t[0]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[4])`, {@t[0]}, `&Dhi($mask)` + veor @t[2], @t[2], @x[6] + vtbl.8 `&Dlo(@x[5])`, {@t[1]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[5])`, {@t[1]}, `&Dhi($mask)` + veor @t[3], @t[3], @x[7] + vtbl.8 `&Dlo(@x[6])`, {@t[2]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[6])`, {@t[2]}, `&Dhi($mask)` + vtbl.8 `&Dlo(@x[7])`, {@t[3]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[7])`, {@t[3]}, `&Dhi($mask)` +___ +} + +sub MixColumns { +# modified to emit output in order suitable for feeding back to aesenc[last] +my @x=@_[0..7]; +my @t=@_[8..15]; +my $inv=@_[16]; # optional +$code.=<<___; + vext.8 @t[0], @x[0], @x[0], #12 @ x0 <<< 32 + vext.8 @t[1], @x[1], @x[1], #12 + veor @x[0], @x[0], @t[0] @ x0 ^ (x0 <<< 32) + vext.8 @t[2], @x[2], @x[2], #12 + veor @x[1], @x[1], @t[1] + vext.8 @t[3], @x[3], @x[3], #12 + veor @x[2], @x[2], @t[2] + vext.8 @t[4], @x[4], @x[4], #12 + veor @x[3], @x[3], @t[3] + vext.8 @t[5], @x[5], @x[5], #12 + veor @x[4], @x[4], @t[4] + vext.8 @t[6], @x[6], @x[6], #12 + veor @x[5], @x[5], @t[5] + vext.8 @t[7], @x[7], @x[7], #12 + veor @x[6], @x[6], @t[6] + + veor @t[1], @t[1], @x[0] + veor @x[7], @x[7], @t[7] + vext.8 @x[0], @x[0], @x[0], #8 @ (x0 ^ (x0 <<< 32)) <<< 64) + veor @t[2], @t[2], @x[1] + veor @t[0], @t[0], @x[7] + veor @t[1], @t[1], @x[7] + vext.8 @x[1], @x[1], @x[1], #8 + veor @t[5], @t[5], @x[4] + veor @x[0], @x[0], @t[0] + veor @t[6], @t[6], @x[5] + veor @x[1], @x[1], @t[1] + vext.8 @t[0], @x[4], @x[4], #8 + veor @t[4], @t[4], @x[3] + vext.8 @t[1], @x[5], @x[5], #8 + veor @t[7], @t[7], @x[6] + vext.8 @x[4], @x[3], @x[3], #8 + veor @t[3], @t[3], @x[2] + vext.8 @x[5], @x[7], @x[7], #8 + veor @t[4], @t[4], @x[7] + vext.8 @x[3], @x[6], @x[6], #8 + veor @t[3], @t[3], @x[7] + vext.8 @x[6], @x[2], @x[2], #8 + veor @x[7], @t[1], @t[5] +___ +$code.=<<___ if (!$inv); + veor @x[2], @t[0], @t[4] + veor @x[4], @x[4], @t[3] + veor @x[5], @x[5], @t[7] + veor @x[3], @x[3], @t[6] + @ vmov @x[2], @t[0] + veor @x[6], @x[6], @t[2] + @ vmov @x[7], @t[1] +___ +$code.=<<___ if ($inv); + veor @t[3], @t[3], @x[4] + veor @x[5], @x[5], @t[7] + veor @x[2], @x[3], @t[6] + veor @x[3], @t[0], @t[4] + veor @x[4], @x[6], @t[2] + vmov @x[6], @t[3] + @ vmov @x[7], @t[1] +___ +} + +sub InvMixColumns_orig { +my @x=@_[0..7]; +my @t=@_[8..15]; + +$code.=<<___; + @ multiplication by 0x0e + vext.8 @t[7], @x[7], @x[7], #12 + vmov @t[2], @x[2] + veor @x[2], @x[2], @x[5] @ 2 5 + veor @x[7], @x[7], @x[5] @ 7 5 + vext.8 @t[0], @x[0], @x[0], #12 + vmov @t[5], @x[5] + veor @x[5], @x[5], @x[0] @ 5 0 [1] + veor @x[0], @x[0], @x[1] @ 0 1 + vext.8 @t[1], @x[1], @x[1], #12 + veor @x[1], @x[1], @x[2] @ 1 25 + veor @x[0], @x[0], @x[6] @ 01 6 [2] + vext.8 @t[3], @x[3], @x[3], #12 + veor @x[1], @x[1], @x[3] @ 125 3 [4] + veor @x[2], @x[2], @x[0] @ 25 016 [3] + veor @x[3], @x[3], @x[7] @ 3 75 + veor @x[7], @x[7], @x[6] @ 75 6 [0] + vext.8 @t[6], @x[6], @x[6], #12 + vmov @t[4], @x[4] + veor @x[6], @x[6], @x[4] @ 6 4 + veor @x[4], @x[4], @x[3] @ 4 375 [6] + veor @x[3], @x[3], @x[7] @ 375 756=36 + veor @x[6], @x[6], @t[5] @ 64 5 [7] + veor @x[3], @x[3], @t[2] @ 36 2 + vext.8 @t[5], @t[5], @t[5], #12 + veor @x[3], @x[3], @t[4] @ 362 4 [5] +___ + my @y = @x[7,5,0,2,1,3,4,6]; +$code.=<<___; + @ multiplication by 0x0b + veor @y[1], @y[1], @y[0] + veor @y[0], @y[0], @t[0] + vext.8 @t[2], @t[2], @t[2], #12 + veor @y[1], @y[1], @t[1] + veor @y[0], @y[0], @t[5] + vext.8 @t[4], @t[4], @t[4], #12 + veor @y[1], @y[1], @t[6] + veor @y[0], @y[0], @t[7] + veor @t[7], @t[7], @t[6] @ clobber t[7] + + veor @y[3], @y[3], @t[0] + veor @y[1], @y[1], @y[0] + vext.8 @t[0], @t[0], @t[0], #12 + veor @y[2], @y[2], @t[1] + veor @y[4], @y[4], @t[1] + vext.8 @t[1], @t[1], @t[1], #12 + veor @y[2], @y[2], @t[2] + veor @y[3], @y[3], @t[2] + veor @y[5], @y[5], @t[2] + veor @y[2], @y[2], @t[7] + vext.8 @t[2], @t[2], @t[2], #12 + veor @y[3], @y[3], @t[3] + veor @y[6], @y[6], @t[3] + veor @y[4], @y[4], @t[3] + veor @y[7], @y[7], @t[4] + vext.8 @t[3], @t[3], @t[3], #12 + veor @y[5], @y[5], @t[4] + veor @y[7], @y[7], @t[7] + veor @t[7], @t[7], @t[5] @ clobber t[7] even more + veor @y[3], @y[3], @t[5] + veor @y[4], @y[4], @t[4] + + veor @y[5], @y[5], @t[7] + vext.8 @t[4], @t[4], @t[4], #12 + veor @y[6], @y[6], @t[7] + veor @y[4], @y[4], @t[7] + + veor @t[7], @t[7], @t[5] + vext.8 @t[5], @t[5], @t[5], #12 + + @ multiplication by 0x0d + veor @y[4], @y[4], @y[7] + veor @t[7], @t[7], @t[6] @ restore t[7] + veor @y[7], @y[7], @t[4] + vext.8 @t[6], @t[6], @t[6], #12 + veor @y[2], @y[2], @t[0] + veor @y[7], @y[7], @t[5] + vext.8 @t[7], @t[7], @t[7], #12 + veor @y[2], @y[2], @t[2] + + veor @y[3], @y[3], @y[1] + veor @y[1], @y[1], @t[1] + veor @y[0], @y[0], @t[0] + veor @y[3], @y[3], @t[0] + veor @y[1], @y[1], @t[5] + veor @y[0], @y[0], @t[5] + vext.8 @t[0], @t[0], @t[0], #12 + veor @y[1], @y[1], @t[7] + veor @y[0], @y[0], @t[6] + veor @y[3], @y[3], @y[1] + veor @y[4], @y[4], @t[1] + vext.8 @t[1], @t[1], @t[1], #12 + + veor @y[7], @y[7], @t[7] + veor @y[4], @y[4], @t[2] + veor @y[5], @y[5], @t[2] + veor @y[2], @y[2], @t[6] + veor @t[6], @t[6], @t[3] @ clobber t[6] + vext.8 @t[2], @t[2], @t[2], #12 + veor @y[4], @y[4], @y[7] + veor @y[3], @y[3], @t[6] + + veor @y[6], @y[6], @t[6] + veor @y[5], @y[5], @t[5] + vext.8 @t[5], @t[5], @t[5], #12 + veor @y[6], @y[6], @t[4] + vext.8 @t[4], @t[4], @t[4], #12 + veor @y[5], @y[5], @t[6] + veor @y[6], @y[6], @t[7] + vext.8 @t[7], @t[7], @t[7], #12 + veor @t[6], @t[6], @t[3] @ restore t[6] + vext.8 @t[3], @t[3], @t[3], #12 + + @ multiplication by 0x09 + veor @y[4], @y[4], @y[1] + veor @t[1], @t[1], @y[1] @ t[1]=y[1] + veor @t[0], @t[0], @t[5] @ clobber t[0] + vext.8 @t[6], @t[6], @t[6], #12 + veor @t[1], @t[1], @t[5] + veor @y[3], @y[3], @t[0] + veor @t[0], @t[0], @y[0] @ t[0]=y[0] + veor @t[1], @t[1], @t[6] + veor @t[6], @t[6], @t[7] @ clobber t[6] + veor @y[4], @y[4], @t[1] + veor @y[7], @y[7], @t[4] + veor @y[6], @y[6], @t[3] + veor @y[5], @y[5], @t[2] + veor @t[4], @t[4], @y[4] @ t[4]=y[4] + veor @t[3], @t[3], @y[3] @ t[3]=y[3] + veor @t[5], @t[5], @y[5] @ t[5]=y[5] + veor @t[2], @t[2], @y[2] @ t[2]=y[2] + veor @t[3], @t[3], @t[7] + veor @XMM[5], @t[5], @t[6] + veor @XMM[6], @t[6], @y[6] @ t[6]=y[6] + veor @XMM[2], @t[2], @t[6] + veor @XMM[7], @t[7], @y[7] @ t[7]=y[7] + + vmov @XMM[0], @t[0] + vmov @XMM[1], @t[1] + @ vmov @XMM[2], @t[2] + vmov @XMM[3], @t[3] + vmov @XMM[4], @t[4] + @ vmov @XMM[5], @t[5] + @ vmov @XMM[6], @t[6] + @ vmov @XMM[7], @t[7] +___ +} + +sub InvMixColumns { +my @x=@_[0..7]; +my @t=@_[8..15]; + +# Thanks to Jussi Kivilinna for providing pointer to +# +# | 0e 0b 0d 09 | | 02 03 01 01 | | 05 00 04 00 | +# | 09 0e 0b 0d | = | 01 02 03 01 | x | 00 05 00 04 | +# | 0d 09 0e 0b | | 01 01 02 03 | | 04 00 05 00 | +# | 0b 0d 09 0e | | 03 01 01 02 | | 00 04 00 05 | + +$code.=<<___; + @ multiplication by 0x05-0x00-0x04-0x00 + vext.8 @t[0], @x[0], @x[0], #8 + vext.8 @t[6], @x[6], @x[6], #8 + vext.8 @t[7], @x[7], @x[7], #8 + veor @t[0], @t[0], @x[0] + vext.8 @t[1], @x[1], @x[1], #8 + veor @t[6], @t[6], @x[6] + vext.8 @t[2], @x[2], @x[2], #8 + veor @t[7], @t[7], @x[7] + vext.8 @t[3], @x[3], @x[3], #8 + veor @t[1], @t[1], @x[1] + vext.8 @t[4], @x[4], @x[4], #8 + veor @t[2], @t[2], @x[2] + vext.8 @t[5], @x[5], @x[5], #8 + veor @t[3], @t[3], @x[3] + veor @t[4], @t[4], @x[4] + veor @t[5], @t[5], @x[5] + + veor @x[0], @x[0], @t[6] + veor @x[1], @x[1], @t[6] + veor @x[2], @x[2], @t[0] + veor @x[4], @x[4], @t[2] + veor @x[3], @x[3], @t[1] + veor @x[1], @x[1], @t[7] + veor @x[2], @x[2], @t[7] + veor @x[4], @x[4], @t[6] + veor @x[5], @x[5], @t[3] + veor @x[3], @x[3], @t[6] + veor @x[6], @x[6], @t[4] + veor @x[4], @x[4], @t[7] + veor @x[5], @x[5], @t[7] + veor @x[7], @x[7], @t[5] +___ + &MixColumns (@x,@t,1); # flipped 2<->3 and 4<->6 +} + +sub swapmove { +my ($a,$b,$n,$mask,$t)=@_; +$code.=<<___; + vshr.u64 $t, $b, #$n + veor $t, $t, $a + vand $t, $t, $mask + veor $a, $a, $t + vshl.u64 $t, $t, #$n + veor $b, $b, $t +___ +} +sub swapmove2x { +my ($a0,$b0,$a1,$b1,$n,$mask,$t0,$t1)=@_; +$code.=<<___; + vshr.u64 $t0, $b0, #$n + vshr.u64 $t1, $b1, #$n + veor $t0, $t0, $a0 + veor $t1, $t1, $a1 + vand $t0, $t0, $mask + vand $t1, $t1, $mask + veor $a0, $a0, $t0 + vshl.u64 $t0, $t0, #$n + veor $a1, $a1, $t1 + vshl.u64 $t1, $t1, #$n + veor $b0, $b0, $t0 + veor $b1, $b1, $t1 +___ +} + +sub bitslice { +my @x=reverse(@_[0..7]); +my ($t0,$t1,$t2,$t3)=@_[8..11]; +$code.=<<___; + vmov.i8 $t0,#0x55 @ compose .LBS0 + vmov.i8 $t1,#0x33 @ compose .LBS1 +___ + &swapmove2x(@x[0,1,2,3],1,$t0,$t2,$t3); + &swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3); +$code.=<<___; + vmov.i8 $t0,#0x0f @ compose .LBS2 +___ + &swapmove2x(@x[0,2,1,3],2,$t1,$t2,$t3); + &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3); + + &swapmove2x(@x[0,4,1,5],4,$t0,$t2,$t3); + &swapmove2x(@x[2,6,3,7],4,$t0,$t2,$t3); +} + +$code.=<<___; +#ifndef __KERNEL__ +# include "arm_arch.h" + +# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} +# define VFP_ABI_POP vldmia sp!,{d8-d15} +# define VFP_ABI_FRAME 0x40 +#else +# define VFP_ABI_PUSH +# define VFP_ABI_POP +# define VFP_ABI_FRAME 0 +# define BSAES_ASM_EXTENDED_KEY +# define XTS_CHAIN_TWEAK +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +#endif + +#ifdef __thumb__ +# define adrl adr +#endif + +#if __ARM_ARCH__>=7 +.text +.syntax unified @ ARMv7-capable assembler is expected to handle this +#ifdef __thumb2__ +.thumb +#else +.code 32 +#endif + +.fpu neon + +.type _bsaes_decrypt8,%function +.align 4 +_bsaes_decrypt8: + adr $const,_bsaes_decrypt8 + vldmia $key!, {@XMM[9]} @ round 0 key + add $const,$const,#.LM0ISR-_bsaes_decrypt8 + + vldmia $const!, {@XMM[8]} @ .LM0ISR + veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key + veor @XMM[11], @XMM[1], @XMM[9] + vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])` + veor @XMM[12], @XMM[2], @XMM[9] + vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])` + veor @XMM[13], @XMM[3], @XMM[9] + vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])` + veor @XMM[14], @XMM[4], @XMM[9] + vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])` + veor @XMM[15], @XMM[5], @XMM[9] + vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])` + veor @XMM[10], @XMM[6], @XMM[9] + vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])` + veor @XMM[11], @XMM[7], @XMM[9] + vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])` + vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])` +___ + &bitslice (@XMM[0..7, 8..11]); +$code.=<<___; + sub $rounds,$rounds,#1 + b .Ldec_sbox +.align 4 +.Ldec_loop: +___ + &ShiftRows (@XMM[0..7, 8..12]); +$code.=".Ldec_sbox:\n"; + &InvSbox (@XMM[0..7, 8..15]); +$code.=<<___; + subs $rounds,$rounds,#1 + bcc .Ldec_done +___ + &InvMixColumns (@XMM[0,1,6,4,2,7,3,5, 8..15]); +$code.=<<___; + vldmia $const, {@XMM[12]} @ .LISR + ite eq @ Thumb2 thing, sanity check in ARM + addeq $const,$const,#0x10 + bne .Ldec_loop + vldmia $const, {@XMM[12]} @ .LISRM0 + b .Ldec_loop +.align 4 +.Ldec_done: +___ + &bitslice (@XMM[0,1,6,4,2,7,3,5, 8..11]); +$code.=<<___; + vldmia $key, {@XMM[8]} @ last round key + veor @XMM[6], @XMM[6], @XMM[8] + veor @XMM[4], @XMM[4], @XMM[8] + veor @XMM[2], @XMM[2], @XMM[8] + veor @XMM[7], @XMM[7], @XMM[8] + veor @XMM[3], @XMM[3], @XMM[8] + veor @XMM[5], @XMM[5], @XMM[8] + veor @XMM[0], @XMM[0], @XMM[8] + veor @XMM[1], @XMM[1], @XMM[8] + bx lr +.size _bsaes_decrypt8,.-_bsaes_decrypt8 + +.type _bsaes_const,%object +.align 6 +_bsaes_const: +.LM0ISR: @ InvShiftRows constants + .quad 0x0a0e0206070b0f03, 0x0004080c0d010509 +.LISR: + .quad 0x0504070602010003, 0x0f0e0d0c080b0a09 +.LISRM0: + .quad 0x01040b0e0205080f, 0x0306090c00070a0d +.LM0SR: @ ShiftRows constants + .quad 0x0a0e02060f03070b, 0x0004080c05090d01 +.LSR: + .quad 0x0504070600030201, 0x0f0e0d0c0a09080b +.LSRM0: + .quad 0x0304090e00050a0f, 0x01060b0c0207080d +.LM0: + .quad 0x02060a0e03070b0f, 0x0004080c0105090d +.LREVM0SR: + .quad 0x090d01050c000408, 0x03070b0f060a0e02 +.asciz "Bit-sliced AES for NEON, CRYPTOGAMS by <appro\@openssl.org>" +.align 6 +.size _bsaes_const,.-_bsaes_const + +.type _bsaes_encrypt8,%function +.align 4 +_bsaes_encrypt8: + adr $const,_bsaes_encrypt8 + vldmia $key!, {@XMM[9]} @ round 0 key + sub $const,$const,#_bsaes_encrypt8-.LM0SR + + vldmia $const!, {@XMM[8]} @ .LM0SR +_bsaes_encrypt8_alt: + veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key + veor @XMM[11], @XMM[1], @XMM[9] + vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])` + veor @XMM[12], @XMM[2], @XMM[9] + vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])` + veor @XMM[13], @XMM[3], @XMM[9] + vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])` + veor @XMM[14], @XMM[4], @XMM[9] + vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])` + veor @XMM[15], @XMM[5], @XMM[9] + vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])` + veor @XMM[10], @XMM[6], @XMM[9] + vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])` + veor @XMM[11], @XMM[7], @XMM[9] + vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])` + vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])` +_bsaes_encrypt8_bitslice: +___ + &bitslice (@XMM[0..7, 8..11]); +$code.=<<___; + sub $rounds,$rounds,#1 + b .Lenc_sbox +.align 4 +.Lenc_loop: +___ + &ShiftRows (@XMM[0..7, 8..12]); +$code.=".Lenc_sbox:\n"; + &Sbox (@XMM[0..7, 8..15]); +$code.=<<___; + subs $rounds,$rounds,#1 + bcc .Lenc_done +___ + &MixColumns (@XMM[0,1,4,6,3,7,2,5, 8..15]); +$code.=<<___; + vldmia $const, {@XMM[12]} @ .LSR + ite eq @ Thumb2 thing, samity check in ARM + addeq $const,$const,#0x10 + bne .Lenc_loop + vldmia $const, {@XMM[12]} @ .LSRM0 + b .Lenc_loop +.align 4 +.Lenc_done: +___ + # output in lsb > [t0, t1, t4, t6, t3, t7, t2, t5] < msb + &bitslice (@XMM[0,1,4,6,3,7,2,5, 8..11]); +$code.=<<___; + vldmia $key, {@XMM[8]} @ last round key + veor @XMM[4], @XMM[4], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[8] + veor @XMM[3], @XMM[3], @XMM[8] + veor @XMM[7], @XMM[7], @XMM[8] + veor @XMM[2], @XMM[2], @XMM[8] + veor @XMM[5], @XMM[5], @XMM[8] + veor @XMM[0], @XMM[0], @XMM[8] + veor @XMM[1], @XMM[1], @XMM[8] + bx lr +.size _bsaes_encrypt8,.-_bsaes_encrypt8 +___ +} +{ +my ($out,$inp,$rounds,$const)=("r12","r4","r5","r6"); + +sub bitslice_key { +my @x=reverse(@_[0..7]); +my ($bs0,$bs1,$bs2,$t2,$t3)=@_[8..12]; + + &swapmove (@x[0,1],1,$bs0,$t2,$t3); +$code.=<<___; + @ &swapmove(@x[2,3],1,$t0,$t2,$t3); + vmov @x[2], @x[0] + vmov @x[3], @x[1] +___ + #&swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3); + + &swapmove2x (@x[0,2,1,3],2,$bs1,$t2,$t3); +$code.=<<___; + @ &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3); + vmov @x[4], @x[0] + vmov @x[6], @x[2] + vmov @x[5], @x[1] + vmov @x[7], @x[3] +___ + &swapmove2x (@x[0,4,1,5],4,$bs2,$t2,$t3); + &swapmove2x (@x[2,6,3,7],4,$bs2,$t2,$t3); +} + +$code.=<<___; +.type _bsaes_key_convert,%function +.align 4 +_bsaes_key_convert: + adr $const,_bsaes_key_convert + vld1.8 {@XMM[7]}, [$inp]! @ load round 0 key + sub $const,$const,#_bsaes_key_convert-.LM0 + vld1.8 {@XMM[15]}, [$inp]! @ load round 1 key + + vmov.i8 @XMM[8], #0x01 @ bit masks + vmov.i8 @XMM[9], #0x02 + vmov.i8 @XMM[10], #0x04 + vmov.i8 @XMM[11], #0x08 + vmov.i8 @XMM[12], #0x10 + vmov.i8 @XMM[13], #0x20 + vldmia $const, {@XMM[14]} @ .LM0 + +#ifdef __ARMEL__ + vrev32.8 @XMM[7], @XMM[7] + vrev32.8 @XMM[15], @XMM[15] +#endif + sub $rounds,$rounds,#1 + vstmia $out!, {@XMM[7]} @ save round 0 key + b .Lkey_loop + +.align 4 +.Lkey_loop: + vtbl.8 `&Dlo(@XMM[7])`,{@XMM[15]},`&Dlo(@XMM[14])` + vtbl.8 `&Dhi(@XMM[7])`,{@XMM[15]},`&Dhi(@XMM[14])` + vmov.i8 @XMM[6], #0x40 + vmov.i8 @XMM[15], #0x80 + + vtst.8 @XMM[0], @XMM[7], @XMM[8] + vtst.8 @XMM[1], @XMM[7], @XMM[9] + vtst.8 @XMM[2], @XMM[7], @XMM[10] + vtst.8 @XMM[3], @XMM[7], @XMM[11] + vtst.8 @XMM[4], @XMM[7], @XMM[12] + vtst.8 @XMM[5], @XMM[7], @XMM[13] + vtst.8 @XMM[6], @XMM[7], @XMM[6] + vtst.8 @XMM[7], @XMM[7], @XMM[15] + vld1.8 {@XMM[15]}, [$inp]! @ load next round key + vmvn @XMM[0], @XMM[0] @ "pnot" + vmvn @XMM[1], @XMM[1] + vmvn @XMM[5], @XMM[5] + vmvn @XMM[6], @XMM[6] +#ifdef __ARMEL__ + vrev32.8 @XMM[15], @XMM[15] +#endif + subs $rounds,$rounds,#1 + vstmia $out!,{@XMM[0]-@XMM[7]} @ write bit-sliced round key + bne .Lkey_loop + + vmov.i8 @XMM[7],#0x63 @ compose .L63 + @ don't save last round key + bx lr +.size _bsaes_key_convert,.-_bsaes_key_convert +___ +} + +if (0) { # following four functions are unsupported interface + # used for benchmarking... +$code.=<<___; +.globl bsaes_enc_key_convert +.type bsaes_enc_key_convert,%function +.align 4 +bsaes_enc_key_convert: + stmdb sp!,{r4-r6,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so + + ldr r5,[$inp,#240] @ pass rounds + mov r4,$inp @ pass key + mov r12,$out @ pass key schedule + bl _bsaes_key_convert + veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key + vstmia r12, {@XMM[7]} @ save last round key + + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r6,pc} +.size bsaes_enc_key_convert,.-bsaes_enc_key_convert + +.globl bsaes_encrypt_128 +.type bsaes_encrypt_128,%function +.align 4 +bsaes_encrypt_128: + stmdb sp!,{r4-r6,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so +.Lenc128_loop: + vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input + vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! + mov r4,$key @ pass the key + vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! + mov r5,#10 @ pass rounds + vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! + + bl _bsaes_encrypt8 + + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + vst1.8 {@XMM[4]}, [$out]! + vst1.8 {@XMM[6]}, [$out]! + vst1.8 {@XMM[3]}, [$out]! + vst1.8 {@XMM[7]}, [$out]! + vst1.8 {@XMM[2]}, [$out]! + subs $len,$len,#0x80 + vst1.8 {@XMM[5]}, [$out]! + bhi .Lenc128_loop + + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r6,pc} +.size bsaes_encrypt_128,.-bsaes_encrypt_128 + +.globl bsaes_dec_key_convert +.type bsaes_dec_key_convert,%function +.align 4 +bsaes_dec_key_convert: + stmdb sp!,{r4-r6,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so + + ldr r5,[$inp,#240] @ pass rounds + mov r4,$inp @ pass key + mov r12,$out @ pass key schedule + bl _bsaes_key_convert + vldmia $out, {@XMM[6]} + vstmia r12, {@XMM[15]} @ save last round key + veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key + vstmia $out, {@XMM[7]} + + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r6,pc} +.size bsaes_dec_key_convert,.-bsaes_dec_key_convert + +.globl bsaes_decrypt_128 +.type bsaes_decrypt_128,%function +.align 4 +bsaes_decrypt_128: + stmdb sp!,{r4-r6,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so +.Ldec128_loop: + vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input + vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! + mov r4,$key @ pass the key + vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! + mov r5,#10 @ pass rounds + vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! + + bl _bsaes_decrypt8 + + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + vst1.8 {@XMM[6]}, [$out]! + vst1.8 {@XMM[4]}, [$out]! + vst1.8 {@XMM[2]}, [$out]! + vst1.8 {@XMM[7]}, [$out]! + vst1.8 {@XMM[3]}, [$out]! + subs $len,$len,#0x80 + vst1.8 {@XMM[5]}, [$out]! + bhi .Ldec128_loop + + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r6,pc} +.size bsaes_decrypt_128,.-bsaes_decrypt_128 +___ +} +{ +my ($inp,$out,$len,$key, $ivp,$fp,$rounds)=map("r$_",(0..3,8..10)); +my ($keysched)=("sp"); + +$code.=<<___; +.extern AES_cbc_encrypt +.extern AES_decrypt + +.global bsaes_cbc_encrypt +.type bsaes_cbc_encrypt,%function +.align 5 +bsaes_cbc_encrypt: +#ifndef __KERNEL__ + cmp $len, #128 +#ifndef __thumb__ + blo AES_cbc_encrypt +#else + bhs 1f + b AES_cbc_encrypt +1: +#endif +#endif + + @ it is up to the caller to make sure we are called with enc == 0 + + mov ip, sp + stmdb sp!, {r4-r10, lr} + VFP_ABI_PUSH + ldr $ivp, [ip] @ IV is 1st arg on the stack + mov $len, $len, lsr#4 @ len in 16 byte blocks + sub sp, #0x10 @ scratch space to carry over the IV + mov $fp, sp @ save sp + + ldr $rounds, [$key, #240] @ get # of rounds +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key + add r12, #`128-32` @ sifze of bit-slices key schedule + + @ populate the key schedule + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + mov sp, r12 @ sp is $keysched + bl _bsaes_key_convert + vldmia $keysched, {@XMM[6]} + vstmia r12, {@XMM[15]} @ save last round key + veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key + vstmia $keysched, {@XMM[7]} +#else + ldr r12, [$key, #244] + eors r12, #1 + beq 0f + + @ populate the key schedule + str r12, [$key, #244] + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + add r12, $key, #248 @ pass key schedule + bl _bsaes_key_convert + add r4, $key, #248 + vldmia r4, {@XMM[6]} + vstmia r12, {@XMM[15]} @ save last round key + veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key + vstmia r4, {@XMM[7]} + +.align 2 +0: +#endif + + vld1.8 {@XMM[15]}, [$ivp] @ load IV + b .Lcbc_dec_loop + +.align 4 +.Lcbc_dec_loop: + subs $len, $len, #0x8 + bmi .Lcbc_dec_loop_finish + + vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input + vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! +#ifndef BSAES_ASM_EXTENDED_KEY + mov r4, $keysched @ pass the key +#else + add r4, $key, #248 +#endif + vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! + mov r5, $rounds + vld1.8 {@XMM[6]-@XMM[7]}, [$inp] + sub $inp, $inp, #0x60 + vstmia $fp, {@XMM[15]} @ put aside IV + + bl _bsaes_decrypt8 + + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! + veor @XMM[1], @XMM[1], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[9] + vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! + veor @XMM[4], @XMM[4], @XMM[10] + veor @XMM[2], @XMM[2], @XMM[11] + vld1.8 {@XMM[14]-@XMM[15]}, [$inp]! + veor @XMM[7], @XMM[7], @XMM[12] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + veor @XMM[3], @XMM[3], @XMM[13] + vst1.8 {@XMM[6]}, [$out]! + veor @XMM[5], @XMM[5], @XMM[14] + vst1.8 {@XMM[4]}, [$out]! + vst1.8 {@XMM[2]}, [$out]! + vst1.8 {@XMM[7]}, [$out]! + vst1.8 {@XMM[3]}, [$out]! + vst1.8 {@XMM[5]}, [$out]! + + b .Lcbc_dec_loop + +.Lcbc_dec_loop_finish: + adds $len, $len, #8 + beq .Lcbc_dec_done + + vld1.8 {@XMM[0]}, [$inp]! @ load input + cmp $len, #2 + blo .Lcbc_dec_one + vld1.8 {@XMM[1]}, [$inp]! +#ifndef BSAES_ASM_EXTENDED_KEY + mov r4, $keysched @ pass the key +#else + add r4, $key, #248 +#endif + mov r5, $rounds + vstmia $fp, {@XMM[15]} @ put aside IV + beq .Lcbc_dec_two + vld1.8 {@XMM[2]}, [$inp]! + cmp $len, #4 + blo .Lcbc_dec_three + vld1.8 {@XMM[3]}, [$inp]! + beq .Lcbc_dec_four + vld1.8 {@XMM[4]}, [$inp]! + cmp $len, #6 + blo .Lcbc_dec_five + vld1.8 {@XMM[5]}, [$inp]! + beq .Lcbc_dec_six + vld1.8 {@XMM[6]}, [$inp]! + sub $inp, $inp, #0x70 + + bl _bsaes_decrypt8 + + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! + veor @XMM[1], @XMM[1], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[9] + vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! + veor @XMM[4], @XMM[4], @XMM[10] + veor @XMM[2], @XMM[2], @XMM[11] + vld1.8 {@XMM[15]}, [$inp]! + veor @XMM[7], @XMM[7], @XMM[12] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + veor @XMM[3], @XMM[3], @XMM[13] + vst1.8 {@XMM[6]}, [$out]! + vst1.8 {@XMM[4]}, [$out]! + vst1.8 {@XMM[2]}, [$out]! + vst1.8 {@XMM[7]}, [$out]! + vst1.8 {@XMM[3]}, [$out]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_six: + sub $inp, $inp, #0x60 + bl _bsaes_decrypt8 + vldmia $fp,{@XMM[14]} @ reload IV + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! + veor @XMM[1], @XMM[1], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[9] + vld1.8 {@XMM[12]}, [$inp]! + veor @XMM[4], @XMM[4], @XMM[10] + veor @XMM[2], @XMM[2], @XMM[11] + vld1.8 {@XMM[15]}, [$inp]! + veor @XMM[7], @XMM[7], @XMM[12] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + vst1.8 {@XMM[6]}, [$out]! + vst1.8 {@XMM[4]}, [$out]! + vst1.8 {@XMM[2]}, [$out]! + vst1.8 {@XMM[7]}, [$out]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_five: + sub $inp, $inp, #0x50 + bl _bsaes_decrypt8 + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! + veor @XMM[1], @XMM[1], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[9] + vld1.8 {@XMM[15]}, [$inp]! + veor @XMM[4], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + veor @XMM[2], @XMM[2], @XMM[11] + vst1.8 {@XMM[6]}, [$out]! + vst1.8 {@XMM[4]}, [$out]! + vst1.8 {@XMM[2]}, [$out]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_four: + sub $inp, $inp, #0x40 + bl _bsaes_decrypt8 + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[10]}, [$inp]! + veor @XMM[1], @XMM[1], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[9] + vld1.8 {@XMM[15]}, [$inp]! + veor @XMM[4], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + vst1.8 {@XMM[6]}, [$out]! + vst1.8 {@XMM[4]}, [$out]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_three: + sub $inp, $inp, #0x30 + bl _bsaes_decrypt8 + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[15]}, [$inp]! + veor @XMM[1], @XMM[1], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[9] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + vst1.8 {@XMM[6]}, [$out]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_two: + sub $inp, $inp, #0x20 + bl _bsaes_decrypt8 + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[8]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[15]}, [$inp]! @ reload input + veor @XMM[1], @XMM[1], @XMM[8] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_one: + sub $inp, $inp, #0x10 + mov $rounds, $out @ save original out pointer + mov $out, $fp @ use the iv scratch space as out buffer + mov r2, $key + vmov @XMM[4],@XMM[15] @ just in case ensure that IV + vmov @XMM[5],@XMM[0] @ and input are preserved + bl AES_decrypt + vld1.8 {@XMM[0]}, [$fp,:64] @ load result + veor @XMM[0], @XMM[0], @XMM[4] @ ^= IV + vmov @XMM[15], @XMM[5] @ @XMM[5] holds input + vst1.8 {@XMM[0]}, [$rounds] @ write output + +.Lcbc_dec_done: +#ifndef BSAES_ASM_EXTENDED_KEY + vmov.i32 q0, #0 + vmov.i32 q1, #0 +.Lcbc_dec_bzero: @ wipe key schedule [if any] + vstmia $keysched!, {q0-q1} + cmp $keysched, $fp + bne .Lcbc_dec_bzero +#endif + + mov sp, $fp + add sp, #0x10 @ add sp,$fp,#0x10 is no good for thumb + vst1.8 {@XMM[15]}, [$ivp] @ return IV + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} +.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt +___ +} +{ +my ($inp,$out,$len,$key, $ctr,$fp,$rounds)=(map("r$_",(0..3,8..10))); +my $const = "r6"; # shared with _bsaes_encrypt8_alt +my $keysched = "sp"; + +$code.=<<___; +.extern AES_encrypt +.global bsaes_ctr32_encrypt_blocks +.type bsaes_ctr32_encrypt_blocks,%function +.align 5 +bsaes_ctr32_encrypt_blocks: + cmp $len, #8 @ use plain AES for + blo .Lctr_enc_short @ small sizes + + mov ip, sp + stmdb sp!, {r4-r10, lr} + VFP_ABI_PUSH + ldr $ctr, [ip] @ ctr is 1st arg on the stack + sub sp, sp, #0x10 @ scratch space to carry over the ctr + mov $fp, sp @ save sp + + ldr $rounds, [$key, #240] @ get # of rounds +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key + add r12, #`128-32` @ size of bit-sliced key schedule + + @ populate the key schedule + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + mov sp, r12 @ sp is $keysched + bl _bsaes_key_convert + veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key + vstmia r12, {@XMM[7]} @ save last round key + + vld1.8 {@XMM[0]}, [$ctr] @ load counter + add $ctr, $const, #.LREVM0SR-.LM0 @ borrow $ctr + vldmia $keysched, {@XMM[4]} @ load round0 key +#else + ldr r12, [$key, #244] + eors r12, #1 + beq 0f + + @ populate the key schedule + str r12, [$key, #244] + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + add r12, $key, #248 @ pass key schedule + bl _bsaes_key_convert + veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key + vstmia r12, {@XMM[7]} @ save last round key + +.align 2 +0: add r12, $key, #248 + vld1.8 {@XMM[0]}, [$ctr] @ load counter + adrl $ctr, .LREVM0SR @ borrow $ctr + vldmia r12, {@XMM[4]} @ load round0 key + sub sp, #0x10 @ place for adjusted round0 key +#endif + + vmov.i32 @XMM[8],#1 @ compose 1<<96 + veor @XMM[9],@XMM[9],@XMM[9] + vrev32.8 @XMM[0],@XMM[0] + vext.8 @XMM[8],@XMM[9],@XMM[8],#4 + vrev32.8 @XMM[4],@XMM[4] + vadd.u32 @XMM[9],@XMM[8],@XMM[8] @ compose 2<<96 + vstmia $keysched, {@XMM[4]} @ save adjusted round0 key + b .Lctr_enc_loop + +.align 4 +.Lctr_enc_loop: + vadd.u32 @XMM[10], @XMM[8], @XMM[9] @ compose 3<<96 + vadd.u32 @XMM[1], @XMM[0], @XMM[8] @ +1 + vadd.u32 @XMM[2], @XMM[0], @XMM[9] @ +2 + vadd.u32 @XMM[3], @XMM[0], @XMM[10] @ +3 + vadd.u32 @XMM[4], @XMM[1], @XMM[10] + vadd.u32 @XMM[5], @XMM[2], @XMM[10] + vadd.u32 @XMM[6], @XMM[3], @XMM[10] + vadd.u32 @XMM[7], @XMM[4], @XMM[10] + vadd.u32 @XMM[10], @XMM[5], @XMM[10] @ next counter + + @ Borrow prologue from _bsaes_encrypt8 to use the opportunity + @ to flip byte order in 32-bit counter + + vldmia $keysched, {@XMM[9]} @ load round0 key +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, $keysched, #0x10 @ pass next round key +#else + add r4, $key, #`248+16` +#endif + vldmia $ctr, {@XMM[8]} @ .LREVM0SR + mov r5, $rounds @ pass rounds + vstmia $fp, {@XMM[10]} @ save next counter + sub $const, $ctr, #.LREVM0SR-.LSR @ pass constants + + bl _bsaes_encrypt8_alt + + subs $len, $len, #8 + blo .Lctr_enc_loop_done + + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ load input + vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! + veor @XMM[0], @XMM[8] + veor @XMM[1], @XMM[9] + vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! + veor @XMM[4], @XMM[10] + veor @XMM[6], @XMM[11] + vld1.8 {@XMM[14]-@XMM[15]}, [$inp]! + veor @XMM[3], @XMM[12] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + veor @XMM[7], @XMM[13] + veor @XMM[2], @XMM[14] + vst1.8 {@XMM[4]}, [$out]! + veor @XMM[5], @XMM[15] + vst1.8 {@XMM[6]}, [$out]! + vmov.i32 @XMM[8], #1 @ compose 1<<96 + vst1.8 {@XMM[3]}, [$out]! + veor @XMM[9], @XMM[9], @XMM[9] + vst1.8 {@XMM[7]}, [$out]! + vext.8 @XMM[8], @XMM[9], @XMM[8], #4 + vst1.8 {@XMM[2]}, [$out]! + vadd.u32 @XMM[9],@XMM[8],@XMM[8] @ compose 2<<96 + vst1.8 {@XMM[5]}, [$out]! + vldmia $fp, {@XMM[0]} @ load counter + + bne .Lctr_enc_loop + b .Lctr_enc_done + +.align 4 +.Lctr_enc_loop_done: + add $len, $len, #8 + vld1.8 {@XMM[8]}, [$inp]! @ load input + veor @XMM[0], @XMM[8] + vst1.8 {@XMM[0]}, [$out]! @ write output + cmp $len, #2 + blo .Lctr_enc_done + vld1.8 {@XMM[9]}, [$inp]! + veor @XMM[1], @XMM[9] + vst1.8 {@XMM[1]}, [$out]! + beq .Lctr_enc_done + vld1.8 {@XMM[10]}, [$inp]! + veor @XMM[4], @XMM[10] + vst1.8 {@XMM[4]}, [$out]! + cmp $len, #4 + blo .Lctr_enc_done + vld1.8 {@XMM[11]}, [$inp]! + veor @XMM[6], @XMM[11] + vst1.8 {@XMM[6]}, [$out]! + beq .Lctr_enc_done + vld1.8 {@XMM[12]}, [$inp]! + veor @XMM[3], @XMM[12] + vst1.8 {@XMM[3]}, [$out]! + cmp $len, #6 + blo .Lctr_enc_done + vld1.8 {@XMM[13]}, [$inp]! + veor @XMM[7], @XMM[13] + vst1.8 {@XMM[7]}, [$out]! + beq .Lctr_enc_done + vld1.8 {@XMM[14]}, [$inp] + veor @XMM[2], @XMM[14] + vst1.8 {@XMM[2]}, [$out]! + +.Lctr_enc_done: + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifndef BSAES_ASM_EXTENDED_KEY +.Lctr_enc_bzero: @ wipe key schedule [if any] + vstmia $keysched!, {q0-q1} + cmp $keysched, $fp + bne .Lctr_enc_bzero +#else + vstmia $keysched, {q0-q1} +#endif + + mov sp, $fp + add sp, #0x10 @ add sp,$fp,#0x10 is no good for thumb + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.align 4 +.Lctr_enc_short: + ldr ip, [sp] @ ctr pointer is passed on stack + stmdb sp!, {r4-r8, lr} + + mov r4, $inp @ copy arguments + mov r5, $out + mov r6, $len + mov r7, $key + ldr r8, [ip, #12] @ load counter LSW + vld1.8 {@XMM[1]}, [ip] @ load whole counter value +#ifdef __ARMEL__ + rev r8, r8 +#endif + sub sp, sp, #0x10 + vst1.8 {@XMM[1]}, [sp,:64] @ copy counter value + sub sp, sp, #0x10 + +.Lctr_enc_short_loop: + add r0, sp, #0x10 @ input counter value + mov r1, sp @ output on the stack + mov r2, r7 @ key + + bl AES_encrypt + + vld1.8 {@XMM[0]}, [r4]! @ load input + vld1.8 {@XMM[1]}, [sp,:64] @ load encrypted counter + add r8, r8, #1 +#ifdef __ARMEL__ + rev r0, r8 + str r0, [sp, #0x1c] @ next counter value +#else + str r8, [sp, #0x1c] @ next counter value +#endif + veor @XMM[0],@XMM[0],@XMM[1] + vst1.8 {@XMM[0]}, [r5]! @ store output + subs r6, r6, #1 + bne .Lctr_enc_short_loop + + vmov.i32 q0, #0 + vmov.i32 q1, #0 + vstmia sp!, {q0-q1} + + ldmia sp!, {r4-r8, pc} +.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks +___ +} +{ +###################################################################### +# void bsaes_xts_[en|de]crypt(const char *inp,char *out,size_t len, +# const AES_KEY *key1, const AES_KEY *key2, +# const unsigned char iv[16]); +# +my ($inp,$out,$len,$key,$rounds,$magic,$fp)=(map("r$_",(7..10,1..3))); +my $const="r6"; # returned by _bsaes_key_convert +my $twmask=@XMM[5]; +my @T=@XMM[6..7]; + +$code.=<<___; +.globl bsaes_xts_encrypt +.type bsaes_xts_encrypt,%function +.align 4 +bsaes_xts_encrypt: + mov ip, sp + stmdb sp!, {r4-r10, lr} @ 0x20 + VFP_ABI_PUSH + mov r6, sp @ future $fp + + mov $inp, r0 + mov $out, r1 + mov $len, r2 + mov $key, r3 + + sub r0, sp, #0x10 @ 0x10 + bic r0, #0xf @ align at 16 bytes + mov sp, r0 + +#ifdef XTS_CHAIN_TWEAK + ldr r0, [ip] @ pointer to input tweak +#else + @ generate initial tweak + ldr r0, [ip, #4] @ iv[] + mov r1, sp + ldr r2, [ip, #0] @ key2 + bl AES_encrypt + mov r0,sp @ pointer to initial tweak +#endif + + ldr $rounds, [$key, #240] @ get # of rounds + mov $fp, r6 +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key + @ add r12, #`128-32` @ size of bit-sliced key schedule + sub r12, #`32+16` @ place for tweak[9] + + @ populate the key schedule + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + mov sp, r12 + add r12, #0x90 @ pass key schedule + bl _bsaes_key_convert + veor @XMM[7], @XMM[7], @XMM[15] @ fix up last round key + vstmia r12, {@XMM[7]} @ save last round key +#else + ldr r12, [$key, #244] + eors r12, #1 + beq 0f + + str r12, [$key, #244] + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + add r12, $key, #248 @ pass key schedule + bl _bsaes_key_convert + veor @XMM[7], @XMM[7], @XMM[15] @ fix up last round key + vstmia r12, {@XMM[7]} + +.align 2 +0: sub sp, #0x90 @ place for tweak[9] +#endif + + vld1.8 {@XMM[8]}, [r0] @ initial tweak + adr $magic, .Lxts_magic + + subs $len, #0x80 + blo .Lxts_enc_short + b .Lxts_enc_loop + +.align 4 +.Lxts_enc_loop: + vldmia $magic, {$twmask} @ load XTS magic + vshr.s64 @T[0], @XMM[8], #63 + mov r0, sp + vand @T[0], @T[0], $twmask +___ +for($i=9;$i<16;$i++) { +$code.=<<___; + vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] + vst1.64 {@XMM[$i-1]}, [r0,:128]! + vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` + vshr.s64 @T[1], @XMM[$i], #63 + veor @XMM[$i], @XMM[$i], @T[0] + vand @T[1], @T[1], $twmask +___ + @T=reverse(@T); + +$code.=<<___ if ($i>=10); + vld1.8 {@XMM[$i-10]}, [$inp]! +___ +$code.=<<___ if ($i>=11); + veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] +___ +} +$code.=<<___; + vadd.u64 @XMM[8], @XMM[15], @XMM[15] + vst1.64 {@XMM[15]}, [r0,:128]! + vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` + veor @XMM[8], @XMM[8], @T[0] + vst1.64 {@XMM[8]}, [r0,:128] @ next round tweak + + vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! + veor @XMM[5], @XMM[5], @XMM[13] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[6], @XMM[6], @XMM[14] + mov r5, $rounds @ pass rounds + veor @XMM[7], @XMM[7], @XMM[15] + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[6], @XMM[11] + vld1.64 {@XMM[14]-@XMM[15]}, [r0,:128]! + veor @XMM[10], @XMM[3], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + veor @XMM[11], @XMM[7], @XMM[13] + veor @XMM[12], @XMM[2], @XMM[14] + vst1.8 {@XMM[10]-@XMM[11]}, [$out]! + veor @XMM[13], @XMM[5], @XMM[15] + vst1.8 {@XMM[12]-@XMM[13]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + + subs $len, #0x80 + bpl .Lxts_enc_loop + +.Lxts_enc_short: + adds $len, #0x70 + bmi .Lxts_enc_done + + vldmia $magic, {$twmask} @ load XTS magic + vshr.s64 @T[0], @XMM[8], #63 + mov r0, sp + vand @T[0], @T[0], $twmask +___ +for($i=9;$i<16;$i++) { +$code.=<<___; + vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] + vst1.64 {@XMM[$i-1]}, [r0,:128]! + vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` + vshr.s64 @T[1], @XMM[$i], #63 + veor @XMM[$i], @XMM[$i], @T[0] + vand @T[1], @T[1], $twmask +___ + @T=reverse(@T); + +$code.=<<___ if ($i>=10); + vld1.8 {@XMM[$i-10]}, [$inp]! + subs $len, #0x10 + bmi .Lxts_enc_`$i-9` +___ +$code.=<<___ if ($i>=11); + veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] +___ +} +$code.=<<___; + sub $len, #0x10 + vst1.64 {@XMM[15]}, [r0,:128] @ next round tweak + + vld1.8 {@XMM[6]}, [$inp]! + veor @XMM[5], @XMM[5], @XMM[13] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[6], @XMM[6], @XMM[14] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[6], @XMM[11] + vld1.64 {@XMM[14]}, [r0,:128]! + veor @XMM[10], @XMM[3], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + veor @XMM[11], @XMM[7], @XMM[13] + veor @XMM[12], @XMM[2], @XMM[14] + vst1.8 {@XMM[10]-@XMM[11]}, [$out]! + vst1.8 {@XMM[12]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_6: + vst1.64 {@XMM[14]}, [r0,:128] @ next round tweak + + veor @XMM[4], @XMM[4], @XMM[12] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[5], @XMM[5], @XMM[13] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[6], @XMM[11] + veor @XMM[10], @XMM[3], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + veor @XMM[11], @XMM[7], @XMM[13] + vst1.8 {@XMM[10]-@XMM[11]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_enc_done + +@ put this in range for both ARM and Thumb mode adr instructions +.align 5 +.Lxts_magic: + .quad 1, 0x87 + +.align 5 +.Lxts_enc_5: + vst1.64 {@XMM[13]}, [r0,:128] @ next round tweak + + veor @XMM[3], @XMM[3], @XMM[11] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[4], @XMM[4], @XMM[12] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[6], @XMM[11] + veor @XMM[10], @XMM[3], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + vst1.8 {@XMM[10]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_4: + vst1.64 {@XMM[12]}, [r0,:128] @ next round tweak + + veor @XMM[2], @XMM[2], @XMM[10] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[3], @XMM[3], @XMM[11] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[6], @XMM[11] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_3: + vst1.64 {@XMM[11]}, [r0,:128] @ next round tweak + + veor @XMM[1], @XMM[1], @XMM[9] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[2], @XMM[2], @XMM[10] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! + vld1.64 {@XMM[10]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + vst1.8 {@XMM[8]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_2: + vst1.64 {@XMM[10]}, [r0,:128] @ next round tweak + + veor @XMM[0], @XMM[0], @XMM[8] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[1], @XMM[1], @XMM[9] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + veor @XMM[1], @XMM[1], @XMM[ 9] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_1: + mov r0, sp + veor @XMM[0], @XMM[8] + mov r1, sp + vst1.8 {@XMM[0]}, [sp,:128] + mov r2, $key + mov r4, $fp @ preserve fp + + bl AES_encrypt + + vld1.8 {@XMM[0]}, [sp,:128] + veor @XMM[0], @XMM[0], @XMM[8] + vst1.8 {@XMM[0]}, [$out]! + mov $fp, r4 + + vmov @XMM[8], @XMM[9] @ next round tweak + +.Lxts_enc_done: +#ifndef XTS_CHAIN_TWEAK + adds $len, #0x10 + beq .Lxts_enc_ret + sub r6, $out, #0x10 + +.Lxts_enc_steal: + ldrb r0, [$inp], #1 + ldrb r1, [$out, #-0x10] + strb r0, [$out, #-0x10] + strb r1, [$out], #1 + + subs $len, #1 + bhi .Lxts_enc_steal + + vld1.8 {@XMM[0]}, [r6] + mov r0, sp + veor @XMM[0], @XMM[0], @XMM[8] + mov r1, sp + vst1.8 {@XMM[0]}, [sp,:128] + mov r2, $key + mov r4, $fp @ preserve fp + + bl AES_encrypt + + vld1.8 {@XMM[0]}, [sp,:128] + veor @XMM[0], @XMM[0], @XMM[8] + vst1.8 {@XMM[0]}, [r6] + mov $fp, r4 +#endif + +.Lxts_enc_ret: + bic r0, $fp, #0xf + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifdef XTS_CHAIN_TWEAK + ldr r1, [$fp, #0x20+VFP_ABI_FRAME] @ chain tweak +#endif +.Lxts_enc_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r0 + bne .Lxts_enc_bzero + + mov sp, $fp +#ifdef XTS_CHAIN_TWEAK + vst1.8 {@XMM[8]}, [r1] +#endif + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.size bsaes_xts_encrypt,.-bsaes_xts_encrypt + +.globl bsaes_xts_decrypt +.type bsaes_xts_decrypt,%function +.align 4 +bsaes_xts_decrypt: + mov ip, sp + stmdb sp!, {r4-r10, lr} @ 0x20 + VFP_ABI_PUSH + mov r6, sp @ future $fp + + mov $inp, r0 + mov $out, r1 + mov $len, r2 + mov $key, r3 + + sub r0, sp, #0x10 @ 0x10 + bic r0, #0xf @ align at 16 bytes + mov sp, r0 + +#ifdef XTS_CHAIN_TWEAK + ldr r0, [ip] @ pointer to input tweak +#else + @ generate initial tweak + ldr r0, [ip, #4] @ iv[] + mov r1, sp + ldr r2, [ip, #0] @ key2 + bl AES_encrypt + mov r0, sp @ pointer to initial tweak +#endif + + ldr $rounds, [$key, #240] @ get # of rounds + mov $fp, r6 +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key + @ add r12, #`128-32` @ size of bit-sliced key schedule + sub r12, #`32+16` @ place for tweak[9] + + @ populate the key schedule + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + mov sp, r12 + add r12, #0x90 @ pass key schedule + bl _bsaes_key_convert + add r4, sp, #0x90 + vldmia r4, {@XMM[6]} + vstmia r12, {@XMM[15]} @ save last round key + veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key + vstmia r4, {@XMM[7]} +#else + ldr r12, [$key, #244] + eors r12, #1 + beq 0f + + str r12, [$key, #244] + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + add r12, $key, #248 @ pass key schedule + bl _bsaes_key_convert + add r4, $key, #248 + vldmia r4, {@XMM[6]} + vstmia r12, {@XMM[15]} @ save last round key + veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key + vstmia r4, {@XMM[7]} + +.align 2 +0: sub sp, #0x90 @ place for tweak[9] +#endif + vld1.8 {@XMM[8]}, [r0] @ initial tweak + adr $magic, .Lxts_magic + + tst $len, #0xf @ if not multiple of 16 + it ne @ Thumb2 thing, sanity check in ARM + subne $len, #0x10 @ subtract another 16 bytes + subs $len, #0x80 + + blo .Lxts_dec_short + b .Lxts_dec_loop + +.align 4 +.Lxts_dec_loop: + vldmia $magic, {$twmask} @ load XTS magic + vshr.s64 @T[0], @XMM[8], #63 + mov r0, sp + vand @T[0], @T[0], $twmask +___ +for($i=9;$i<16;$i++) { +$code.=<<___; + vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] + vst1.64 {@XMM[$i-1]}, [r0,:128]! + vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` + vshr.s64 @T[1], @XMM[$i], #63 + veor @XMM[$i], @XMM[$i], @T[0] + vand @T[1], @T[1], $twmask +___ + @T=reverse(@T); + +$code.=<<___ if ($i>=10); + vld1.8 {@XMM[$i-10]}, [$inp]! +___ +$code.=<<___ if ($i>=11); + veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] +___ +} +$code.=<<___; + vadd.u64 @XMM[8], @XMM[15], @XMM[15] + vst1.64 {@XMM[15]}, [r0,:128]! + vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` + veor @XMM[8], @XMM[8], @T[0] + vst1.64 {@XMM[8]}, [r0,:128] @ next round tweak + + vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! + veor @XMM[5], @XMM[5], @XMM[13] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[6], @XMM[6], @XMM[14] + mov r5, $rounds @ pass rounds + veor @XMM[7], @XMM[7], @XMM[15] + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[6], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[4], @XMM[11] + vld1.64 {@XMM[14]-@XMM[15]}, [r0,:128]! + veor @XMM[10], @XMM[2], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + veor @XMM[11], @XMM[7], @XMM[13] + veor @XMM[12], @XMM[3], @XMM[14] + vst1.8 {@XMM[10]-@XMM[11]}, [$out]! + veor @XMM[13], @XMM[5], @XMM[15] + vst1.8 {@XMM[12]-@XMM[13]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + + subs $len, #0x80 + bpl .Lxts_dec_loop + +.Lxts_dec_short: + adds $len, #0x70 + bmi .Lxts_dec_done + + vldmia $magic, {$twmask} @ load XTS magic + vshr.s64 @T[0], @XMM[8], #63 + mov r0, sp + vand @T[0], @T[0], $twmask +___ +for($i=9;$i<16;$i++) { +$code.=<<___; + vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] + vst1.64 {@XMM[$i-1]}, [r0,:128]! + vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` + vshr.s64 @T[1], @XMM[$i], #63 + veor @XMM[$i], @XMM[$i], @T[0] + vand @T[1], @T[1], $twmask +___ + @T=reverse(@T); + +$code.=<<___ if ($i>=10); + vld1.8 {@XMM[$i-10]}, [$inp]! + subs $len, #0x10 + bmi .Lxts_dec_`$i-9` +___ +$code.=<<___ if ($i>=11); + veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] +___ +} +$code.=<<___; + sub $len, #0x10 + vst1.64 {@XMM[15]}, [r0,:128] @ next round tweak + + vld1.8 {@XMM[6]}, [$inp]! + veor @XMM[5], @XMM[5], @XMM[13] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[6], @XMM[6], @XMM[14] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[6], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[4], @XMM[11] + vld1.64 {@XMM[14]}, [r0,:128]! + veor @XMM[10], @XMM[2], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + veor @XMM[11], @XMM[7], @XMM[13] + veor @XMM[12], @XMM[3], @XMM[14] + vst1.8 {@XMM[10]-@XMM[11]}, [$out]! + vst1.8 {@XMM[12]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_6: + vst1.64 {@XMM[14]}, [r0,:128] @ next round tweak + + veor @XMM[4], @XMM[4], @XMM[12] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[5], @XMM[5], @XMM[13] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[6], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[4], @XMM[11] + veor @XMM[10], @XMM[2], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + veor @XMM[11], @XMM[7], @XMM[13] + vst1.8 {@XMM[10]-@XMM[11]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_5: + vst1.64 {@XMM[13]}, [r0,:128] @ next round tweak + + veor @XMM[3], @XMM[3], @XMM[11] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[4], @XMM[4], @XMM[12] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[6], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[4], @XMM[11] + veor @XMM[10], @XMM[2], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + vst1.8 {@XMM[10]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_4: + vst1.64 {@XMM[12]}, [r0,:128] @ next round tweak + + veor @XMM[2], @XMM[2], @XMM[10] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[3], @XMM[3], @XMM[11] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[6], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[4], @XMM[11] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_3: + vst1.64 {@XMM[11]}, [r0,:128] @ next round tweak + + veor @XMM[1], @XMM[1], @XMM[9] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[2], @XMM[2], @XMM[10] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! + vld1.64 {@XMM[10]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[6], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + vst1.8 {@XMM[8]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_2: + vst1.64 {@XMM[10]}, [r0,:128] @ next round tweak + + veor @XMM[0], @XMM[0], @XMM[8] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[1], @XMM[1], @XMM[9] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + veor @XMM[1], @XMM[1], @XMM[ 9] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_1: + mov r0, sp + veor @XMM[0], @XMM[8] + mov r1, sp + vst1.8 {@XMM[0]}, [sp,:128] + mov r2, $key + mov r4, $fp @ preserve fp + mov r5, $magic @ preserve magic + + bl AES_decrypt + + vld1.8 {@XMM[0]}, [sp,:128] + veor @XMM[0], @XMM[0], @XMM[8] + vst1.8 {@XMM[0]}, [$out]! + mov $fp, r4 + mov $magic, r5 + + vmov @XMM[8], @XMM[9] @ next round tweak + +.Lxts_dec_done: +#ifndef XTS_CHAIN_TWEAK + adds $len, #0x10 + beq .Lxts_dec_ret + + @ calculate one round of extra tweak for the stolen ciphertext + vldmia $magic, {$twmask} + vshr.s64 @XMM[6], @XMM[8], #63 + vand @XMM[6], @XMM[6], $twmask + vadd.u64 @XMM[9], @XMM[8], @XMM[8] + vswp `&Dhi("@XMM[6]")`,`&Dlo("@XMM[6]")` + veor @XMM[9], @XMM[9], @XMM[6] + + @ perform the final decryption with the last tweak value + vld1.8 {@XMM[0]}, [$inp]! + mov r0, sp + veor @XMM[0], @XMM[0], @XMM[9] + mov r1, sp + vst1.8 {@XMM[0]}, [sp,:128] + mov r2, $key + mov r4, $fp @ preserve fp + + bl AES_decrypt + + vld1.8 {@XMM[0]}, [sp,:128] + veor @XMM[0], @XMM[0], @XMM[9] + vst1.8 {@XMM[0]}, [$out] + + mov r6, $out +.Lxts_dec_steal: + ldrb r1, [$out] + ldrb r0, [$inp], #1 + strb r1, [$out, #0x10] + strb r0, [$out], #1 + + subs $len, #1 + bhi .Lxts_dec_steal + + vld1.8 {@XMM[0]}, [r6] + mov r0, sp + veor @XMM[0], @XMM[8] + mov r1, sp + vst1.8 {@XMM[0]}, [sp,:128] + mov r2, $key + + bl AES_decrypt + + vld1.8 {@XMM[0]}, [sp,:128] + veor @XMM[0], @XMM[0], @XMM[8] + vst1.8 {@XMM[0]}, [r6] + mov $fp, r4 +#endif + +.Lxts_dec_ret: + bic r0, $fp, #0xf + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifdef XTS_CHAIN_TWEAK + ldr r1, [$fp, #0x20+VFP_ABI_FRAME] @ chain tweak +#endif +.Lxts_dec_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r0 + bne .Lxts_dec_bzero + + mov sp, $fp +#ifdef XTS_CHAIN_TWEAK + vst1.8 {@XMM[8]}, [r1] +#endif + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.size bsaes_xts_decrypt,.-bsaes_xts_decrypt +___ +} +$code.=<<___; +#endif +___ + +$code =~ s/\`([^\`]*)\`/eval($1)/gem; + +open SELF,$0; +while(<SELF>) { + next if (/^#!/); + last if (!s/^#/@/ and !/^$/); + print; +} +close SELF; + +print $code; + +close STDOUT; diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index d3db39860b9c..a6395c027715 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -24,6 +24,7 @@ generic-y += sembuf.h generic-y += serial.h generic-y += shmbuf.h generic-y += siginfo.h +generic-y += simd.h generic-y += sizes.h generic-y += socket.h generic-y += sockios.h @@ -31,5 +32,4 @@ generic-y += termbits.h generic-y += termios.h generic-y += timex.h generic-y += trace_clock.h -generic-y += types.h generic-y += unaligned.h diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index fcc1b5bf6979..5c2285160575 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -53,6 +53,13 @@ #define put_byte_3 lsl #0 #endif +/* Select code for any configuration running in BE8 mode */ +#ifdef CONFIG_CPU_ENDIAN_BE8 +#define ARM_BE8(code...) code +#else +#define ARM_BE8(code...) +#endif + /* * Data preload for architectures that support it */ diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index da1c77d39327..62d2cb53b069 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h @@ -12,6 +12,7 @@ #define __ASM_ARM_ATOMIC_H #include <linux/compiler.h> +#include <linux/prefetch.h> #include <linux/types.h> #include <linux/irqflags.h> #include <asm/barrier.h> @@ -41,6 +42,7 @@ static inline void atomic_add(int i, atomic_t *v) unsigned long tmp; int result; + prefetchw(&v->counter); __asm__ __volatile__("@ atomic_add\n" "1: ldrex %0, [%3]\n" " add %0, %0, %4\n" @@ -79,6 +81,7 @@ static inline void atomic_sub(int i, atomic_t *v) unsigned long tmp; int result; + prefetchw(&v->counter); __asm__ __volatile__("@ atomic_sub\n" "1: ldrex %0, [%3]\n" " sub %0, %0, %4\n" @@ -114,7 +117,8 @@ static inline int atomic_sub_return(int i, atomic_t *v) static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) { - unsigned long oldval, res; + int oldval; + unsigned long res; smp_mb(); @@ -134,21 +138,6 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) return oldval; } -static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr) -{ - unsigned long tmp, tmp2; - - __asm__ __volatile__("@ atomic_clear_mask\n" -"1: ldrex %0, [%3]\n" -" bic %0, %0, %4\n" -" strex %1, %0, [%3]\n" -" teq %1, #0\n" -" bne 1b" - : "=&r" (tmp), "=&r" (tmp2), "+Qo" (*addr) - : "r" (addr), "Ir" (mask) - : "cc"); -} - #else /* ARM_ARCH_6 */ #ifdef CONFIG_SMP @@ -197,15 +186,6 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new) return ret; } -static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr) -{ - unsigned long flags; - - raw_local_irq_save(flags); - *addr &= ~mask; - raw_local_irq_restore(flags); -} - #endif /* __LINUX_ARM_ARCH__ */ #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) @@ -238,15 +218,15 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) #ifndef CONFIG_GENERIC_ATOMIC64 typedef struct { - u64 __aligned(8) counter; + long long counter; } atomic64_t; #define ATOMIC64_INIT(i) { (i) } #ifdef CONFIG_ARM_LPAE -static inline u64 atomic64_read(const atomic64_t *v) +static inline long long atomic64_read(const atomic64_t *v) { - u64 result; + long long result; __asm__ __volatile__("@ atomic64_read\n" " ldrd %0, %H0, [%1]" @@ -257,7 +237,7 @@ static inline u64 atomic64_read(const atomic64_t *v) return result; } -static inline void atomic64_set(atomic64_t *v, u64 i) +static inline void atomic64_set(atomic64_t *v, long long i) { __asm__ __volatile__("@ atomic64_set\n" " strd %2, %H2, [%1]" @@ -266,9 +246,9 @@ static inline void atomic64_set(atomic64_t *v, u64 i) ); } #else -static inline u64 atomic64_read(const atomic64_t *v) +static inline long long atomic64_read(const atomic64_t *v) { - u64 result; + long long result; __asm__ __volatile__("@ atomic64_read\n" " ldrexd %0, %H0, [%1]" @@ -279,10 +259,11 @@ static inline u64 atomic64_read(const atomic64_t *v) return result; } -static inline void atomic64_set(atomic64_t *v, u64 i) +static inline void atomic64_set(atomic64_t *v, long long i) { - u64 tmp; + long long tmp; + prefetchw(&v->counter); __asm__ __volatile__("@ atomic64_set\n" "1: ldrexd %0, %H0, [%2]\n" " strexd %0, %3, %H3, [%2]\n" @@ -294,15 +275,16 @@ static inline void atomic64_set(atomic64_t *v, u64 i) } #endif -static inline void atomic64_add(u64 i, atomic64_t *v) +static inline void atomic64_add(long long i, atomic64_t *v) { - u64 result; + long long result; unsigned long tmp; + prefetchw(&v->counter); __asm__ __volatile__("@ atomic64_add\n" "1: ldrexd %0, %H0, [%3]\n" -" adds %0, %0, %4\n" -" adc %H0, %H0, %H4\n" +" adds %Q0, %Q0, %Q4\n" +" adc %R0, %R0, %R4\n" " strexd %1, %0, %H0, [%3]\n" " teq %1, #0\n" " bne 1b" @@ -311,17 +293,17 @@ static inline void atomic64_add(u64 i, atomic64_t *v) : "cc"); } -static inline u64 atomic64_add_return(u64 i, atomic64_t *v) +static inline long long atomic64_add_return(long long i, atomic64_t *v) { - u64 result; + long long result; unsigned long tmp; smp_mb(); __asm__ __volatile__("@ atomic64_add_return\n" "1: ldrexd %0, %H0, [%3]\n" -" adds %0, %0, %4\n" -" adc %H0, %H0, %H4\n" +" adds %Q0, %Q0, %Q4\n" +" adc %R0, %R0, %R4\n" " strexd %1, %0, %H0, [%3]\n" " teq %1, #0\n" " bne 1b" @@ -334,15 +316,16 @@ static inline u64 atomic64_add_return(u64 i, atomic64_t *v) return result; } -static inline void atomic64_sub(u64 i, atomic64_t *v) +static inline void atomic64_sub(long long i, atomic64_t *v) { - u64 result; + long long result; unsigned long tmp; + prefetchw(&v->counter); __asm__ __volatile__("@ atomic64_sub\n" "1: ldrexd %0, %H0, [%3]\n" -" subs %0, %0, %4\n" -" sbc %H0, %H0, %H4\n" +" subs %Q0, %Q0, %Q4\n" +" sbc %R0, %R0, %R4\n" " strexd %1, %0, %H0, [%3]\n" " teq %1, #0\n" " bne 1b" @@ -351,17 +334,17 @@ static inline void atomic64_sub(u64 i, atomic64_t *v) : "cc"); } -static inline u64 atomic64_sub_return(u64 i, atomic64_t *v) +static inline long long atomic64_sub_return(long long i, atomic64_t *v) { - u64 result; + long long result; unsigned long tmp; smp_mb(); __asm__ __volatile__("@ atomic64_sub_return\n" "1: ldrexd %0, %H0, [%3]\n" -" subs %0, %0, %4\n" -" sbc %H0, %H0, %H4\n" +" subs %Q0, %Q0, %Q4\n" +" sbc %R0, %R0, %R4\n" " strexd %1, %0, %H0, [%3]\n" " teq %1, #0\n" " bne 1b" @@ -374,9 +357,10 @@ static inline u64 atomic64_sub_return(u64 i, atomic64_t *v) return result; } -static inline u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old, u64 new) +static inline long long atomic64_cmpxchg(atomic64_t *ptr, long long old, + long long new) { - u64 oldval; + long long oldval; unsigned long res; smp_mb(); @@ -398,9 +382,9 @@ static inline u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old, u64 new) return oldval; } -static inline u64 atomic64_xchg(atomic64_t *ptr, u64 new) +static inline long long atomic64_xchg(atomic64_t *ptr, long long new) { - u64 result; + long long result; unsigned long tmp; smp_mb(); @@ -419,18 +403,18 @@ static inline u64 atomic64_xchg(atomic64_t *ptr, u64 new) return result; } -static inline u64 atomic64_dec_if_positive(atomic64_t *v) +static inline long long atomic64_dec_if_positive(atomic64_t *v) { - u64 result; + long long result; unsigned long tmp; smp_mb(); __asm__ __volatile__("@ atomic64_dec_if_positive\n" "1: ldrexd %0, %H0, [%3]\n" -" subs %0, %0, #1\n" -" sbc %H0, %H0, #0\n" -" teq %H0, #0\n" +" subs %Q0, %Q0, #1\n" +" sbc %R0, %R0, #0\n" +" teq %R0, #0\n" " bmi 2f\n" " strexd %1, %0, %H0, [%3]\n" " teq %1, #0\n" @@ -445,9 +429,9 @@ static inline u64 atomic64_dec_if_positive(atomic64_t *v) return result; } -static inline int atomic64_add_unless(atomic64_t *v, u64 a, u64 u) +static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) { - u64 val; + long long val; unsigned long tmp; int ret = 1; @@ -459,8 +443,8 @@ static inline int atomic64_add_unless(atomic64_t *v, u64 a, u64 u) " teqeq %H0, %H5\n" " moveq %1, #0\n" " beq 2f\n" -" adds %0, %0, %6\n" -" adc %H0, %H0, %H6\n" +" adds %Q0, %Q0, %Q6\n" +" adc %R0, %R0, %R6\n" " strexd %2, %0, %H0, [%4]\n" " teq %2, #0\n" " bne 1b\n" diff --git a/arch/arm/include/asm/bL_switcher.h b/arch/arm/include/asm/bL_switcher.h new file mode 100644 index 000000000000..1714800fa113 --- /dev/null +++ b/arch/arm/include/asm/bL_switcher.h @@ -0,0 +1,77 @@ +/* + * arch/arm/include/asm/bL_switcher.h + * + * Created by: Nicolas Pitre, April 2012 + * Copyright: (C) 2012-2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef ASM_BL_SWITCHER_H +#define ASM_BL_SWITCHER_H + +#include <linux/compiler.h> +#include <linux/types.h> + +typedef void (*bL_switch_completion_handler)(void *cookie); + +int bL_switch_request_cb(unsigned int cpu, unsigned int new_cluster_id, + bL_switch_completion_handler completer, + void *completer_cookie); +static inline int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id) +{ + return bL_switch_request_cb(cpu, new_cluster_id, NULL, NULL); +} + +/* + * Register here to be notified about runtime enabling/disabling of + * the switcher. + * + * The notifier chain is called with the switcher activation lock held: + * the switcher will not be enabled or disabled during callbacks. + * Callbacks must not call bL_switcher_{get,put}_enabled(). + */ +#define BL_NOTIFY_PRE_ENABLE 0 +#define BL_NOTIFY_POST_ENABLE 1 +#define BL_NOTIFY_PRE_DISABLE 2 +#define BL_NOTIFY_POST_DISABLE 3 + +#ifdef CONFIG_BL_SWITCHER + +int bL_switcher_register_notifier(struct notifier_block *nb); +int bL_switcher_unregister_notifier(struct notifier_block *nb); + +/* + * Use these functions to temporarily prevent enabling/disabling of + * the switcher. + * bL_switcher_get_enabled() returns true if the switcher is currently + * enabled. Each call to bL_switcher_get_enabled() must be followed + * by a call to bL_switcher_put_enabled(). These functions are not + * recursive. + */ +bool bL_switcher_get_enabled(void); +void bL_switcher_put_enabled(void); + +int bL_switcher_trace_trigger(void); +int bL_switcher_get_logical_index(u32 mpidr); + +#else +static inline int bL_switcher_register_notifier(struct notifier_block *nb) +{ + return 0; +} + +static inline int bL_switcher_unregister_notifier(struct notifier_block *nb) +{ + return 0; +} + +static inline bool bL_switcher_get_enabled(void) { return false; } +static inline void bL_switcher_put_enabled(void) { } +static inline int bL_switcher_trace_trigger(void) { return 0; } +static inline int bL_switcher_get_logical_index(u32 mpidr) { return -EUNATCH; } +#endif /* CONFIG_BL_SWITCHER */ + +#endif diff --git a/arch/arm/include/asm/bug.h b/arch/arm/include/asm/bug.h index 7af5c6c3653a..b274bde24905 100644 --- a/arch/arm/include/asm/bug.h +++ b/arch/arm/include/asm/bug.h @@ -2,6 +2,8 @@ #define _ASMARM_BUG_H #include <linux/linkage.h> +#include <linux/types.h> +#include <asm/opcodes.h> #ifdef CONFIG_BUG @@ -12,10 +14,10 @@ */ #ifdef CONFIG_THUMB2_KERNEL #define BUG_INSTR_VALUE 0xde02 -#define BUG_INSTR_TYPE ".hword " +#define BUG_INSTR(__value) __inst_thumb16(__value) #else #define BUG_INSTR_VALUE 0xe7f001f2 -#define BUG_INSTR_TYPE ".word " +#define BUG_INSTR(__value) __inst_arm(__value) #endif @@ -33,7 +35,7 @@ #define __BUG(__file, __line, __value) \ do { \ - asm volatile("1:\t" BUG_INSTR_TYPE #__value "\n" \ + asm volatile("1:\t" BUG_INSTR(__value) "\n" \ ".pushsection .rodata.str, \"aMS\", %progbits, 1\n" \ "2:\t.asciz " #__file "\n" \ ".popsection\n" \ @@ -48,7 +50,7 @@ do { \ #define __BUG(__file, __line, __value) \ do { \ - asm volatile(BUG_INSTR_TYPE #__value); \ + asm volatile(BUG_INSTR(__value) "\n"); \ unreachable(); \ } while (0) #endif /* CONFIG_DEBUG_BUGVERBOSE */ diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 15f2d5bf8875..ee753f1749cd 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -435,4 +435,50 @@ static inline void __sync_cache_range_r(volatile void *p, size_t size) #define sync_cache_w(ptr) __sync_cache_range_w(ptr, sizeof *(ptr)) #define sync_cache_r(ptr) __sync_cache_range_r(ptr, sizeof *(ptr)) +/* + * Disabling cache access for one CPU in an ARMv7 SMP system is tricky. + * To do so we must: + * + * - Clear the SCTLR.C bit to prevent further cache allocations + * - Flush the desired level of cache + * - Clear the ACTLR "SMP" bit to disable local coherency + * + * ... and so without any intervening memory access in between those steps, + * not even to the stack. + * + * WARNING -- After this has been called: + * + * - No ldrex/strex (and similar) instructions must be used. + * - The CPU is obviously no longer coherent with the other CPUs. + * - This is unlikely to work as expected if Linux is running non-secure. + * + * Note: + * + * - This is known to apply to several ARMv7 processor implementations, + * however some exceptions may exist. Caveat emptor. + * + * - The clobber list is dictated by the call to v7_flush_dcache_*. + * fp is preserved to the stack explicitly prior disabling the cache + * since adding it to the clobber list is incompatible with having + * CONFIG_FRAME_POINTER=y. ip is saved as well if ever r12-clobbering + * trampoline are inserted by the linker and to keep sp 64-bit aligned. + */ +#define v7_exit_coherency_flush(level) \ + asm volatile( \ + "stmfd sp!, {fp, ip} \n\t" \ + "mrc p15, 0, r0, c1, c0, 0 @ get SCTLR \n\t" \ + "bic r0, r0, #"__stringify(CR_C)" \n\t" \ + "mcr p15, 0, r0, c1, c0, 0 @ set SCTLR \n\t" \ + "isb \n\t" \ + "bl v7_flush_dcache_"__stringify(level)" \n\t" \ + "clrex \n\t" \ + "mrc p15, 0, r0, c1, c0, 1 @ get ACTLR \n\t" \ + "bic r0, r0, #(1 << 6) @ disable local coherency \n\t" \ + "mcr p15, 0, r0, c1, c0, 1 @ set ACTLR \n\t" \ + "isb \n\t" \ + "dsb \n\t" \ + "ldmfd sp!, {fp, ip}" \ + : : : "r0","r1","r2","r3","r4","r5","r6","r7", \ + "r9","r10","lr","memory" ) + #endif diff --git a/arch/arm/include/asm/cmpxchg.h b/arch/arm/include/asm/cmpxchg.h index 4f009c10540d..df2fbba7efc8 100644 --- a/arch/arm/include/asm/cmpxchg.h +++ b/arch/arm/include/asm/cmpxchg.h @@ -223,6 +223,42 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr, return ret; } +static inline unsigned long long __cmpxchg64(unsigned long long *ptr, + unsigned long long old, + unsigned long long new) +{ + unsigned long long oldval; + unsigned long res; + + __asm__ __volatile__( +"1: ldrexd %1, %H1, [%3]\n" +" teq %1, %4\n" +" teqeq %H1, %H4\n" +" bne 2f\n" +" strexd %0, %5, %H5, [%3]\n" +" teq %0, #0\n" +" bne 1b\n" +"2:" + : "=&r" (res), "=&r" (oldval), "+Qo" (*ptr) + : "r" (ptr), "r" (old), "r" (new) + : "cc"); + + return oldval; +} + +static inline unsigned long long __cmpxchg64_mb(unsigned long long *ptr, + unsigned long long old, + unsigned long long new) +{ + unsigned long long ret; + + smp_mb(); + ret = __cmpxchg64(ptr, old, new); + smp_mb(); + + return ret; +} + #define cmpxchg_local(ptr,o,n) \ ((__typeof__(*(ptr)))__cmpxchg_local((ptr), \ (unsigned long)(o), \ @@ -230,18 +266,16 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr, sizeof(*(ptr)))) #define cmpxchg64(ptr, o, n) \ - ((__typeof__(*(ptr)))atomic64_cmpxchg(container_of((ptr), \ - atomic64_t, \ - counter), \ - (unsigned long long)(o), \ - (unsigned long long)(n))) - -#define cmpxchg64_local(ptr, o, n) \ - ((__typeof__(*(ptr)))local64_cmpxchg(container_of((ptr), \ - local64_t, \ - a), \ - (unsigned long long)(o), \ - (unsigned long long)(n))) + ((__typeof__(*(ptr)))__cmpxchg64_mb((ptr), \ + (unsigned long long)(o), \ + (unsigned long long)(n))) + +#define cmpxchg64_relaxed(ptr, o, n) \ + ((__typeof__(*(ptr)))__cmpxchg64((ptr), \ + (unsigned long long)(o), \ + (unsigned long long)(n))) + +#define cmpxchg64_local(ptr, o, n) cmpxchg64_relaxed((ptr), (o), (n)) #endif /* __LINUX_ARM_ARCH__ >= 6 */ diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index 9672e978d50d..acdde76b39bb 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -10,6 +10,7 @@ #define CPUID_TLBTYPE 3 #define CPUID_MPUIR 4 #define CPUID_MPIDR 5 +#define CPUID_REVIDR 6 #ifdef CONFIG_CPU_V7M #define CPUID_EXT_PFR0 0x40 diff --git a/arch/arm/include/asm/hardirq.h b/arch/arm/include/asm/hardirq.h index 2740c2a2df63..fe3ea776dc34 100644 --- a/arch/arm/include/asm/hardirq.h +++ b/arch/arm/include/asm/hardirq.h @@ -5,7 +5,7 @@ #include <linux/threads.h> #include <asm/irq.h> -#define NR_IPI 6 +#define NR_IPI 8 typedef struct { unsigned int __softirq_pending; diff --git a/arch/arm/include/asm/hardware/coresight.h b/arch/arm/include/asm/hardware/coresight.h index 0cf7a6b842ff..ad774f37c47c 100644 --- a/arch/arm/include/asm/hardware/coresight.h +++ b/arch/arm/include/asm/hardware/coresight.h @@ -24,8 +24,8 @@ #define TRACER_TIMEOUT 10000 #define etm_writel(t, v, x) \ - (__raw_writel((v), (t)->etm_regs + (x))) -#define etm_readl(t, x) (__raw_readl((t)->etm_regs + (x))) + (writel_relaxed((v), (t)->etm_regs + (x))) +#define etm_readl(t, x) (readl_relaxed((t)->etm_regs + (x))) /* CoreSight Management Registers */ #define CSMR_LOCKACCESS 0xfb0 @@ -142,8 +142,8 @@ #define ETBFF_TRIGFL BIT(10) #define etb_writel(t, v, x) \ - (__raw_writel((v), (t)->etb_regs + (x))) -#define etb_readl(t, x) (__raw_readl((t)->etb_regs + (x))) + (writel_relaxed((v), (t)->etb_regs + (x))) +#define etb_readl(t, x) (readl_relaxed((t)->etb_regs + (x))) #define etm_lock(t) do { etm_writel((t), 0, CSMR_LOCKACCESS); } while (0) #define etm_unlock(t) \ diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h index bfc198c75913..863c892b4aaa 100644 --- a/arch/arm/include/asm/jump_label.h +++ b/arch/arm/include/asm/jump_label.h @@ -16,7 +16,7 @@ static __always_inline bool arch_static_branch(struct static_key *key) { - asm goto("1:\n\t" + asm_volatile_goto("1:\n\t" JUMP_LABEL_NOP "\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".word 1b, %l[l_yes], %c0\n\t" diff --git a/arch/arm/include/asm/kgdb.h b/arch/arm/include/asm/kgdb.h index 48066ce9ea34..0a9d5dd93294 100644 --- a/arch/arm/include/asm/kgdb.h +++ b/arch/arm/include/asm/kgdb.h @@ -11,6 +11,7 @@ #define __ARM_KGDB_H__ #include <linux/ptrace.h> +#include <asm/opcodes.h> /* * GDB assumes that we're a user process being debugged, so @@ -41,7 +42,7 @@ static inline void arch_kgdb_breakpoint(void) { - asm(".word 0xe7ffdeff"); + asm(__inst_arm(0xe7ffdeff)); } extern void kgdb_handle_bus_error(void); diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h index 402a2bc6aa68..17a3fa2979e8 100644 --- a/arch/arm/include/asm/mach/arch.h +++ b/arch/arm/include/asm/mach/arch.h @@ -49,6 +49,7 @@ struct machine_desc { bool (*smp_init)(void); void (*fixup)(struct tag *, char **, struct meminfo *); + void (*init_meminfo)(void); void (*reserve)(void);/* reserve mem blocks */ void (*map_io)(void);/* IO mapping function */ void (*init_early)(void); diff --git a/arch/arm/include/asm/mcpm.h b/arch/arm/include/asm/mcpm.h index 0f7b7620e9a5..608516ebabfe 100644 --- a/arch/arm/include/asm/mcpm.h +++ b/arch/arm/include/asm/mcpm.h @@ -42,6 +42,14 @@ extern void mcpm_entry_point(void); void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr); /* + * This sets an early poke i.e a value to be poked into some address + * from very early assembly code before the CPU is ungated. The + * address must be physical, and if 0 then nothing will happen. + */ +void mcpm_set_early_poke(unsigned cpu, unsigned cluster, + unsigned long poke_phys_addr, unsigned long poke_val); + +/* * CPU/cluster power operations API for higher subsystems to use. */ @@ -76,12 +84,45 @@ int mcpm_cpu_power_up(unsigned int cpu, unsigned int cluster); * * This must be called with interrupts disabled. * - * This does not return. Re-entry in the kernel is expected via - * mcpm_entry_point. + * On success this does not return. Re-entry in the kernel is expected + * via mcpm_entry_point. + * + * This will return if mcpm_platform_register() has not been called + * previously in which case the caller should take appropriate action. + * + * On success, the CPU is not guaranteed to be truly halted until + * mcpm_cpu_power_down_finish() subsequently returns non-zero for the + * specified cpu. Until then, other CPUs should make sure they do not + * trash memory the target CPU might be executing/accessing. */ void mcpm_cpu_power_down(void); /** + * mcpm_cpu_power_down_finish - wait for a specified CPU to halt, and + * make sure it is powered off + * + * @cpu: CPU number within given cluster + * @cluster: cluster number for the CPU + * + * Call this function to ensure that a pending powerdown has taken + * effect and the CPU is safely parked before performing non-mcpm + * operations that may affect the CPU (such as kexec trashing the + * kernel text). + * + * It is *not* necessary to call this function if you only need to + * serialise a pending powerdown with mcpm_cpu_power_up() or a wakeup + * event. + * + * Do not call this function unless the specified CPU has already + * called mcpm_cpu_power_down() or has committed to doing so. + * + * @return: + * - zero if the CPU is in a safely parked state + * - nonzero otherwise (e.g., timeout) + */ +int mcpm_cpu_power_down_finish(unsigned int cpu, unsigned int cluster); + +/** * mcpm_cpu_suspend - bring the calling CPU in a suspended state * * @expected_residency: duration in microseconds the CPU is expected @@ -98,8 +139,11 @@ void mcpm_cpu_power_down(void); * * This must be called with interrupts disabled. * - * This does not return. Re-entry in the kernel is expected via - * mcpm_entry_point. + * On success this does not return. Re-entry in the kernel is expected + * via mcpm_entry_point. + * + * This will return if mcpm_platform_register() has not been called + * previously in which case the caller should take appropriate action. */ void mcpm_cpu_suspend(u64 expected_residency); @@ -120,6 +164,7 @@ int mcpm_cpu_powered_up(void); struct mcpm_platform_ops { int (*power_up)(unsigned int cpu, unsigned int cluster); void (*power_down)(void); + int (*power_down_finish)(unsigned int cpu, unsigned int cluster); void (*suspend)(u64); void (*powered_up)(void); }; diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index e750a938fd3c..4dd21457ef9d 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -172,8 +172,13 @@ * so that all we need to do is modify the 8-bit constant field. */ #define __PV_BITS_31_24 0x81000000 +#define __PV_BITS_7_0 0x81 + +extern u64 __pv_phys_offset; +extern u64 __pv_offset; +extern void fixup_pv_table(const void *, unsigned long); +extern const void *__pv_table_begin, *__pv_table_end; -extern unsigned long __pv_phys_offset; #define PHYS_OFFSET __pv_phys_offset #define __pv_stub(from,to,instr,type) \ @@ -185,22 +190,58 @@ extern unsigned long __pv_phys_offset; : "=r" (to) \ : "r" (from), "I" (type)) -static inline unsigned long __virt_to_phys(unsigned long x) +#define __pv_stub_mov_hi(t) \ + __asm__ volatile("@ __pv_stub_mov\n" \ + "1: mov %R0, %1\n" \ + " .pushsection .pv_table,\"a\"\n" \ + " .long 1b\n" \ + " .popsection\n" \ + : "=r" (t) \ + : "I" (__PV_BITS_7_0)) + +#define __pv_add_carry_stub(x, y) \ + __asm__ volatile("@ __pv_add_carry_stub\n" \ + "1: adds %Q0, %1, %2\n" \ + " adc %R0, %R0, #0\n" \ + " .pushsection .pv_table,\"a\"\n" \ + " .long 1b\n" \ + " .popsection\n" \ + : "+r" (y) \ + : "r" (x), "I" (__PV_BITS_31_24) \ + : "cc") + +static inline phys_addr_t __virt_to_phys(unsigned long x) { - unsigned long t; - __pv_stub(x, t, "add", __PV_BITS_31_24); + phys_addr_t t; + + if (sizeof(phys_addr_t) == 4) { + __pv_stub(x, t, "add", __PV_BITS_31_24); + } else { + __pv_stub_mov_hi(t); + __pv_add_carry_stub(x, t); + } return t; } -static inline unsigned long __phys_to_virt(unsigned long x) +static inline unsigned long __phys_to_virt(phys_addr_t x) { unsigned long t; __pv_stub(x, t, "sub", __PV_BITS_31_24); return t; } + #else -#define __virt_to_phys(x) ((x) - PAGE_OFFSET + PHYS_OFFSET) -#define __phys_to_virt(x) ((x) - PHYS_OFFSET + PAGE_OFFSET) + +static inline phys_addr_t __virt_to_phys(unsigned long x) +{ + return (phys_addr_t)x - PAGE_OFFSET + PHYS_OFFSET; +} + +static inline unsigned long __phys_to_virt(phys_addr_t x) +{ + return x - PHYS_OFFSET + PAGE_OFFSET; +} + #endif #endif #endif /* __ASSEMBLY__ */ @@ -238,16 +279,33 @@ static inline phys_addr_t virt_to_phys(const volatile void *x) static inline void *phys_to_virt(phys_addr_t x) { - return (void *)(__phys_to_virt((unsigned long)(x))); + return (void *)__phys_to_virt(x); } /* * Drivers should NOT use these either. */ #define __pa(x) __virt_to_phys((unsigned long)(x)) -#define __va(x) ((void *)__phys_to_virt((unsigned long)(x))) +#define __va(x) ((void *)__phys_to_virt((phys_addr_t)(x))) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) +extern phys_addr_t (*arch_virt_to_idmap)(unsigned long x); + +/* + * These are for systems that have a hardware interconnect supported alias of + * physical memory for idmap purposes. Most cases should leave these + * untouched. + */ +static inline phys_addr_t __virt_to_idmap(unsigned long x) +{ + if (arch_virt_to_idmap) + return arch_virt_to_idmap(x); + else + return __virt_to_phys(x); +} + +#define virt_to_idmap(x) __virt_to_idmap((unsigned long)(x)) + /* * Virtual <-> DMA view memory address translations * Again, these are *only* valid on the kernel direct mapped RAM diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h index 6f18da09668b..64fd15159b7d 100644 --- a/arch/arm/include/asm/mmu.h +++ b/arch/arm/include/asm/mmu.h @@ -16,7 +16,7 @@ typedef struct { #ifdef CONFIG_CPU_HAS_ASID #define ASID_BITS 8 #define ASID_MASK ((~0ULL) << ASID_BITS) -#define ASID(mm) ((mm)->context.id.counter & ~ASID_MASK) +#define ASID(mm) ((unsigned int)((mm)->context.id.counter & ~ASID_MASK)) #else #define ASID(mm) (0) #endif diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index f97ee02386ee..86a659a19526 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -181,6 +181,13 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,pte,ext) +/* + * We don't have huge page support for short descriptors, for the moment + * define empty stubs for use by pin_page_for_write. + */ +#define pmd_hugewillfault(pmd) (0) +#define pmd_thp_or_huge(pmd) (0) + #endif /* __ASSEMBLY__ */ #endif /* _ASM_PGTABLE_2LEVEL_H */ diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 5689c18c85f5..39c54cfa03e9 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -206,6 +206,9 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) #define __HAVE_ARCH_PMD_WRITE #define pmd_write(pmd) (!(pmd_val(pmd) & PMD_SECT_RDONLY)) +#define pmd_hugewillfault(pmd) (!pmd_young(pmd) || !pmd_write(pmd)) +#define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd)) + #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT)) #define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING) diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h index 413f3876341c..c3d5fc124a05 100644 --- a/arch/arm/include/asm/processor.h +++ b/arch/arm/include/asm/processor.h @@ -22,6 +22,7 @@ #include <asm/hw_breakpoint.h> #include <asm/ptrace.h> #include <asm/types.h> +#include <asm/unified.h> #ifdef __KERNEL__ #define STACK_TOP ((current->personality & ADDR_LIMIT_32BIT) ? \ @@ -87,6 +88,17 @@ unsigned long get_wchan(struct task_struct *p); #define KSTK_EIP(tsk) task_pt_regs(tsk)->ARM_pc #define KSTK_ESP(tsk) task_pt_regs(tsk)->ARM_sp +#ifdef CONFIG_SMP +#define __ALT_SMP_ASM(smp, up) \ + "9998: " smp "\n" \ + " .pushsection \".alt.smp.init\", \"a\"\n" \ + " .long 9998b\n" \ + " " up "\n" \ + " .popsection\n" +#else +#define __ALT_SMP_ASM(smp, up) up +#endif + /* * Prefetching support - only ARMv5. */ @@ -97,17 +109,22 @@ static inline void prefetch(const void *ptr) { __asm__ __volatile__( "pld\t%a0" - : - : "p" (ptr) - : "cc"); + :: "p" (ptr)); } +#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP) #define ARCH_HAS_PREFETCHW -#define prefetchw(ptr) prefetch(ptr) - -#define ARCH_HAS_SPINLOCK_PREFETCH -#define spin_lock_prefetch(x) do { } while (0) - +static inline void prefetchw(const void *ptr) +{ + __asm__ __volatile__( + ".arch_extension mp\n" + __ALT_SMP_ASM( + WASM(pldw) "\t%a0", + WASM(pld) "\t%a0" + ) + :: "p" (ptr)); +} +#endif #endif #define HAVE_ARCH_PICK_MMAP_LAYOUT diff --git a/arch/arm/include/asm/setup.h b/arch/arm/include/asm/setup.h index c50f05609501..8d6a089dfb76 100644 --- a/arch/arm/include/asm/setup.h +++ b/arch/arm/include/asm/setup.h @@ -49,7 +49,7 @@ extern struct meminfo meminfo; #define bank_phys_end(bank) ((bank)->start + (bank)->size) #define bank_phys_size(bank) (bank)->size -extern int arm_add_memory(phys_addr_t start, phys_addr_t size); +extern int arm_add_memory(u64 start, u64 size); extern void early_print(const char *str, ...); extern void dump_machine_table(void); diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index a8cae71caceb..22a3b9b5d4a1 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -84,6 +84,8 @@ extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask); +extern int register_ipi_completion(struct completion *completion, int cpu); + struct smp_operations { #ifdef CONFIG_SMP /* diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index 4f2c28060c9a..ef3c6072aa45 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -5,21 +5,13 @@ #error SMP not supported on pre-ARMv6 CPUs #endif -#include <asm/processor.h> +#include <linux/prefetch.h> /* * sev and wfe are ARMv6K extensions. Uniprocessor ARMv6 may not have the K * extensions, so when running on UP, we have to patch these instructions away. */ -#define ALT_SMP(smp, up) \ - "9998: " smp "\n" \ - " .pushsection \".alt.smp.init\", \"a\"\n" \ - " .long 9998b\n" \ - " " up "\n" \ - " .popsection\n" - #ifdef CONFIG_THUMB2_KERNEL -#define SEV ALT_SMP("sev.w", "nop.w") /* * For Thumb-2, special care is needed to ensure that the conditional WFE * instruction really does assemble to exactly 4 bytes (as required by @@ -31,17 +23,18 @@ * the assembler won't change IT instructions which are explicitly present * in the input. */ -#define WFE(cond) ALT_SMP( \ +#define WFE(cond) __ALT_SMP_ASM( \ "it " cond "\n\t" \ "wfe" cond ".n", \ \ "nop.w" \ ) #else -#define SEV ALT_SMP("sev", "nop") -#define WFE(cond) ALT_SMP("wfe" cond, "nop") +#define WFE(cond) __ALT_SMP_ASM("wfe" cond, "nop") #endif +#define SEV __ALT_SMP_ASM(WASM(sev), WASM(nop)) + static inline void dsb_sev(void) { #if __LINUX_ARM_ARCH__ >= 7 @@ -77,6 +70,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) u32 newval; arch_spinlock_t lockval; + prefetchw(&lock->slock); __asm__ __volatile__( "1: ldrex %0, [%3]\n" " add %1, %0, %4\n" @@ -100,6 +94,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) unsigned long contended, res; u32 slock; + prefetchw(&lock->slock); do { __asm__ __volatile__( " ldrex %0, [%3]\n" @@ -127,10 +122,14 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) dsb_sev(); } +static inline int arch_spin_value_unlocked(arch_spinlock_t lock) +{ + return lock.tickets.owner == lock.tickets.next; +} + static inline int arch_spin_is_locked(arch_spinlock_t *lock) { - struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets); - return tickets.owner != tickets.next; + return !arch_spin_value_unlocked(ACCESS_ONCE(*lock)); } static inline int arch_spin_is_contended(arch_spinlock_t *lock) @@ -152,6 +151,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw) { unsigned long tmp; + prefetchw(&rw->lock); __asm__ __volatile__( "1: ldrex %0, [%1]\n" " teq %0, #0\n" @@ -170,6 +170,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw) { unsigned long contended, res; + prefetchw(&rw->lock); do { __asm__ __volatile__( " ldrex %0, [%2]\n" @@ -203,7 +204,7 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) } /* write_can_lock - would write_trylock() succeed? */ -#define arch_write_can_lock(x) ((x)->lock == 0) +#define arch_write_can_lock(x) (ACCESS_ONCE((x)->lock) == 0) /* * Read locks are a bit more hairy: @@ -221,6 +222,7 @@ static inline void arch_read_lock(arch_rwlock_t *rw) { unsigned long tmp, tmp2; + prefetchw(&rw->lock); __asm__ __volatile__( "1: ldrex %0, [%2]\n" " adds %0, %0, #1\n" @@ -241,6 +243,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw) smp_mb(); + prefetchw(&rw->lock); __asm__ __volatile__( "1: ldrex %0, [%2]\n" " sub %0, %0, #1\n" @@ -259,6 +262,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) { unsigned long contended, res; + prefetchw(&rw->lock); do { __asm__ __volatile__( " ldrex %0, [%2]\n" @@ -280,7 +284,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) } /* read_can_lock - would read_trylock() succeed? */ -#define arch_read_can_lock(x) ((x)->lock < 0x80000000) +#define arch_read_can_lock(x) (ACCESS_ONCE((x)->lock) < 0x80000000) #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h index b262d2f8b478..47663fcb10ad 100644 --- a/arch/arm/include/asm/spinlock_types.h +++ b/arch/arm/include/asm/spinlock_types.h @@ -25,7 +25,7 @@ typedef struct { #define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } typedef struct { - volatile unsigned int lock; + u32 lock; } arch_rwlock_t; #define __ARCH_RW_LOCK_UNLOCKED { 0 } diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h index f1d96d4e8092..73ddd7239b33 100644 --- a/arch/arm/include/asm/syscall.h +++ b/arch/arm/include/asm/syscall.h @@ -57,6 +57,9 @@ static inline void syscall_get_arguments(struct task_struct *task, unsigned int i, unsigned int n, unsigned long *args) { + if (n == 0) + return; + if (i + n > SYSCALL_MAX_ARGS) { unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i; unsigned int n_bad = n + i - SYSCALL_MAX_ARGS; @@ -81,6 +84,9 @@ static inline void syscall_set_arguments(struct task_struct *task, unsigned int i, unsigned int n, const unsigned long *args) { + if (n == 0) + return; + if (i + n > SYSCALL_MAX_ARGS) { pr_warning("%s called with max args %d, handling only %d\n", __func__, i + n, SYSCALL_MAX_ARGS); diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h index 38960264040c..def9e570199f 100644 --- a/arch/arm/include/asm/tlbflush.h +++ b/arch/arm/include/asm/tlbflush.h @@ -560,37 +560,6 @@ static inline void __flush_bp_all(void) asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero)); } -#include <asm/cputype.h> -#ifdef CONFIG_ARM_ERRATA_798181 -static inline int erratum_a15_798181(void) -{ - unsigned int midr = read_cpuid_id(); - - /* Cortex-A15 r0p0..r3p2 affected */ - if ((midr & 0xff0ffff0) != 0x410fc0f0 || midr > 0x413fc0f2) - return 0; - return 1; -} - -static inline void dummy_flush_tlb_a15_erratum(void) -{ - /* - * Dummy TLBIMVAIS. Using the unmapped address 0 and ASID 0. - */ - asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (0)); - dsb(ish); -} -#else -static inline int erratum_a15_798181(void) -{ - return 0; -} - -static inline void dummy_flush_tlb_a15_erratum(void) -{ -} -#endif - /* * flush_pmd_entry * @@ -697,4 +666,21 @@ extern void flush_bp_all(void); #endif +#ifndef __ASSEMBLY__ +#ifdef CONFIG_ARM_ERRATA_798181 +extern void erratum_a15_798181_init(void); +#else +static inline void erratum_a15_798181_init(void) {} +#endif +extern bool (*erratum_a15_798181_handler)(void); + +static inline bool erratum_a15_798181(void) +{ + if (unlikely(IS_ENABLED(CONFIG_ARM_ERRATA_798181) && + erratum_a15_798181_handler)) + return erratum_a15_798181_handler(); + return false; +} +#endif + #endif diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 7e1f76027f66..72abdc541f38 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -19,6 +19,13 @@ #include <asm/unified.h> #include <asm/compiler.h> +#if __LINUX_ARM_ARCH__ < 6 +#include <asm-generic/uaccess-unaligned.h> +#else +#define __get_user_unaligned __get_user +#define __put_user_unaligned __put_user +#endif + #define VERIFY_READ 0 #define VERIFY_WRITE 1 diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h index f5989f46b4d2..b88beaba6b4a 100644 --- a/arch/arm/include/asm/unified.h +++ b/arch/arm/include/asm/unified.h @@ -38,6 +38,8 @@ #ifdef __ASSEMBLY__ #define W(instr) instr.w #define BSYM(sym) sym + 1 +#else +#define WASM(instr) #instr ".w" #endif #else /* !CONFIG_THUMB2_KERNEL */ @@ -50,6 +52,8 @@ #ifdef __ASSEMBLY__ #define W(instr) instr #define BSYM(sym) sym +#else +#define WASM(instr) #instr #endif #endif /* CONFIG_THUMB2_KERNEL */ diff --git a/arch/arm/include/debug/efm32.S b/arch/arm/include/debug/efm32.S new file mode 100644 index 000000000000..2265a199280c --- /dev/null +++ b/arch/arm/include/debug/efm32.S @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2013 Pengutronix + * Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define UARTn_CMD 0x000c +#define UARTn_CMD_TXEN 0x0004 + +#define UARTn_STATUS 0x0010 +#define UARTn_STATUS_TXC 0x0020 +#define UARTn_STATUS_TXBL 0x0040 + +#define UARTn_TXDATA 0x0034 + + .macro addruart, rx, tmp + ldr \rx, =(CONFIG_DEBUG_UART_PHYS) + + /* + * enable TX. The driver might disable it to save energy. We + * don't care about disabling at the end as during debug power + * consumption isn't that important. + */ + ldr \tmp, =(UARTn_CMD_TXEN) + str \tmp, [\rx, #UARTn_CMD] + .endm + + .macro senduart,rd,rx + strb \rd, [\rx, #UARTn_TXDATA] + .endm + + .macro waituart,rd,rx +1001: ldr \rd, [\rx, #UARTn_STATUS] + tst \rd, #UARTn_STATUS_TXBL + beq 1001b + .endm + + .macro busyuart,rd,rx +1001: ldr \rd, [\rx, UARTn_STATUS] + tst \rd, #UARTn_STATUS_TXC + bne 1001b + .endm diff --git a/arch/arm/include/debug/msm.S b/arch/arm/include/debug/msm.S index 9166e1bc470e..9d653d475903 100644 --- a/arch/arm/include/debug/msm.S +++ b/arch/arm/include/debug/msm.S @@ -46,6 +46,11 @@ #define MSM_DEBUG_UART_PHYS 0x16440000 #endif +#ifdef CONFIG_DEBUG_MSM8974_UART +#define MSM_DEBUG_UART_BASE 0xFA71E000 +#define MSM_DEBUG_UART_PHYS 0xF991E000 +#endif + .macro addruart, rp, rv, tmp #ifdef MSM_DEBUG_UART_PHYS ldr \rp, =MSM_DEBUG_UART_PHYS diff --git a/arch/arm/include/debug/pl01x.S b/arch/arm/include/debug/pl01x.S index 37c6895b87e6..92ef808a2337 100644 --- a/arch/arm/include/debug/pl01x.S +++ b/arch/arm/include/debug/pl01x.S @@ -25,12 +25,14 @@ .macro waituart,rd,rx 1001: ldr \rd, [\rx, #UART01x_FR] + ARM_BE8( rev \rd, \rd ) tst \rd, #UART01x_FR_TXFF bne 1001b .endm .macro busyuart,rd,rx 1001: ldr \rd, [\rx, #UART01x_FR] + ARM_BE8( rev \rd, \rd ) tst \rd, #UART01x_FR_BUSY bne 1001b .endm diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild index 18d76fd5a2af..70a1c9da30ca 100644 --- a/arch/arm/include/uapi/asm/Kbuild +++ b/arch/arm/include/uapi/asm/Kbuild @@ -7,6 +7,7 @@ header-y += hwcap.h header-y += ioctls.h header-y += kvm_para.h header-y += mman.h +header-y += perf_regs.h header-y += posix_types.h header-y += ptrace.h header-y += setup.h diff --git a/arch/arm/include/uapi/asm/perf_regs.h b/arch/arm/include/uapi/asm/perf_regs.h new file mode 100644 index 000000000000..ce59448458b2 --- /dev/null +++ b/arch/arm/include/uapi/asm/perf_regs.h @@ -0,0 +1,23 @@ +#ifndef _ASM_ARM_PERF_REGS_H +#define _ASM_ARM_PERF_REGS_H + +enum perf_event_arm_regs { + PERF_REG_ARM_R0, + PERF_REG_ARM_R1, + PERF_REG_ARM_R2, + PERF_REG_ARM_R3, + PERF_REG_ARM_R4, + PERF_REG_ARM_R5, + PERF_REG_ARM_R6, + PERF_REG_ARM_R7, + PERF_REG_ARM_R8, + PERF_REG_ARM_R9, + PERF_REG_ARM_R10, + PERF_REG_ARM_FP, + PERF_REG_ARM_IP, + PERF_REG_ARM_SP, + PERF_REG_ARM_LR, + PERF_REG_ARM_PC, + PERF_REG_ARM_MAX, +}; +#endif /* _ASM_ARM_PERF_REGS_H */ diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 5140df5f23aa..a30fc9be9e9e 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -17,7 +17,8 @@ CFLAGS_REMOVE_return_address.o = -pg obj-y := elf.o entry-common.o irq.o opcodes.o \ process.o ptrace.o return_address.o \ - setup.o signal.o stacktrace.o sys_arm.o time.o traps.o + setup.o signal.o sigreturn_codes.o \ + stacktrace.o sys_arm.o time.o traps.o obj-$(CONFIG_ATAGS) += atags_parse.o obj-$(CONFIG_ATAGS_PROC) += atags_proc.o @@ -78,6 +79,7 @@ obj-$(CONFIG_CPU_XSC3) += xscale-cp0.o obj-$(CONFIG_CPU_MOHAWK) += xscale-cp0.o obj-$(CONFIG_CPU_PJ4) += pj4-cp0.o obj-$(CONFIG_IWMMXT) += iwmmxt.o +obj-$(CONFIG_PERF_EVENTS) += perf_regs.o obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o perf_event_cpu.o AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index 60d3b738d420..1f031ddd0667 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -155,4 +155,5 @@ EXPORT_SYMBOL(__gnu_mcount_nc); #ifdef CONFIG_ARM_PATCH_PHYS_VIRT EXPORT_SYMBOL(__pv_phys_offset); +EXPORT_SYMBOL(__pv_offset); #endif diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 9cbe70c8b0ef..b3fb8c9e1ff2 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -192,6 +192,7 @@ __dabt_svc: svc_entry mov r2, sp dabt_helper + THUMB( ldr r5, [sp, #S_PSR] ) @ potentially updated CPSR svc_exit r5 @ return from exception UNWIND(.fnend ) ENDPROC(__dabt_svc) @@ -416,9 +417,8 @@ __und_usr: bne __und_usr_thumb sub r4, r2, #4 @ ARM instr at LR - 4 1: ldrt r0, [r4] -#ifdef CONFIG_CPU_ENDIAN_BE8 - rev r0, r0 @ little endian instruction -#endif + ARM_BE8(rev r0, r0) @ little endian instruction + @ r0 = 32-bit ARM instruction which caused the exception @ r2 = PC value for the following instruction (:= regs->ARM_pc) @ r4 = PC value for the faulting instruction diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 74ad15d1a065..a2dcafdf1bc8 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -393,9 +393,7 @@ ENTRY(vector_swi) #else USER( ldr r10, [lr, #-4] ) @ get SWI instruction #endif -#ifdef CONFIG_CPU_ENDIAN_BE8 - rev r10, r10 @ little endian instruction -#endif + ARM_BE8(rev r10, r10) @ little endian instruction #elif defined(CONFIG_AEABI) @@ -442,10 +440,10 @@ local_restart: ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine add r1, sp, #S_OFF - cmp scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE) +2: cmp scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE) eor r0, scno, #__NR_SYSCALL_BASE @ put OS number back bcs arm_syscall -2: mov why, #0 @ no longer a real syscall + mov why, #0 @ no longer a real syscall b sys_ni_syscall @ not private func #if defined(CONFIG_OABI_COMPAT) || !defined(CONFIG_AEABI) diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index de23a9beed13..39f89fbd5111 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -329,10 +329,10 @@ #ifdef CONFIG_CONTEXT_TRACKING .if \save stmdb sp!, {r0-r3, ip, lr} - bl user_exit + bl context_tracking_user_exit ldmia sp!, {r0-r3, ip, lr} .else - bl user_exit + bl context_tracking_user_exit .endif #endif .endm @@ -341,10 +341,10 @@ #ifdef CONFIG_CONTEXT_TRACKING .if \save stmdb sp!, {r0-r3, ip, lr} - bl user_enter + bl context_tracking_user_enter ldmia sp!, {r0-r3, ip, lr} .else - bl user_enter + bl context_tracking_user_enter .endif #endif .endm diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 2c7cc1e03473..7801866e626a 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -77,6 +77,7 @@ __HEAD ENTRY(stext) + ARM_BE8(setend be ) @ ensure we are in BE8 mode THUMB( adr r9, BSYM(1f) ) @ Kernel is always entered in ARM. THUMB( bx r9 ) @ If this is a Thumb-2 kernel, @@ -352,6 +353,9 @@ ENTRY(secondary_startup) * the processor type - there is no need to check the machine type * as it has already been validated by the primary processor. */ + + ARM_BE8(setend be) @ ensure we are in BE8 mode + #ifdef CONFIG_ARM_VIRT_EXT bl __hyp_stub_install_secondary #endif @@ -487,7 +491,26 @@ __fixup_smp: mrc p15, 0, r0, c0, c0, 5 @ read MPIDR and r0, r0, #0xc0000000 @ multiprocessing extensions and teq r0, #0x80000000 @ not part of a uniprocessor system? - moveq pc, lr @ yes, assume SMP + bne __fixup_smp_on_up @ no, assume UP + + @ Core indicates it is SMP. Check for Aegis SOC where a single + @ Cortex-A9 CPU is present but SMP operations fault. + mov r4, #0x41000000 + orr r4, r4, #0x0000c000 + orr r4, r4, #0x00000090 + teq r3, r4 @ Check for ARM Cortex-A9 + movne pc, lr @ Not ARM Cortex-A9, + + @ If a future SoC *does* use 0x0 as the PERIPH_BASE, then the + @ below address check will need to be #ifdef'd or equivalent + @ for the Aegis platform. + mrc p15, 4, r0, c15, c0 @ get SCU base address + teq r0, #0x0 @ '0' on actual UP A9 hardware + beq __fixup_smp_on_up @ So its an A9 UP + ldr r0, [r0, #4] @ read SCU Config + and r0, r0, #0x3 @ number of CPUs + teq r0, #0x0 @ is 1? + movne pc, lr __fixup_smp_on_up: adr r0, 1f @@ -536,6 +559,14 @@ ENTRY(fixup_smp) ldmfd sp!, {r4 - r6, pc} ENDPROC(fixup_smp) +#ifdef __ARMEB__ +#define LOW_OFFSET 0x4 +#define HIGH_OFFSET 0x0 +#else +#define LOW_OFFSET 0x0 +#define HIGH_OFFSET 0x4 +#endif + #ifdef CONFIG_ARM_PATCH_PHYS_VIRT /* __fixup_pv_table - patch the stub instructions with the delta between @@ -546,17 +577,20 @@ ENDPROC(fixup_smp) __HEAD __fixup_pv_table: adr r0, 1f - ldmia r0, {r3-r5, r7} - sub r3, r0, r3 @ PHYS_OFFSET - PAGE_OFFSET + ldmia r0, {r3-r7} + mvn ip, #0 + subs r3, r0, r3 @ PHYS_OFFSET - PAGE_OFFSET add r4, r4, r3 @ adjust table start address add r5, r5, r3 @ adjust table end address - add r7, r7, r3 @ adjust __pv_phys_offset address - str r8, [r7] @ save computed PHYS_OFFSET to __pv_phys_offset + add r6, r6, r3 @ adjust __pv_phys_offset address + add r7, r7, r3 @ adjust __pv_offset address + str r8, [r6, #LOW_OFFSET] @ save computed PHYS_OFFSET to __pv_phys_offset + strcc ip, [r7, #HIGH_OFFSET] @ save to __pv_offset high bits mov r6, r3, lsr #24 @ constant for add/sub instructions teq r3, r6, lsl #24 @ must be 16MiB aligned THUMB( it ne @ cross section branch ) bne __error - str r6, [r7, #4] @ save to __pv_offset + str r3, [r7, #LOW_OFFSET] @ save to __pv_offset low bits b __fixup_a_pv_table ENDPROC(__fixup_pv_table) @@ -565,10 +599,19 @@ ENDPROC(__fixup_pv_table) .long __pv_table_begin .long __pv_table_end 2: .long __pv_phys_offset + .long __pv_offset .text __fixup_a_pv_table: + adr r0, 3f + ldr r6, [r0] + add r6, r6, r3 + ldr r0, [r6, #HIGH_OFFSET] @ pv_offset high word + ldr r6, [r6, #LOW_OFFSET] @ pv_offset low word + mov r6, r6, lsr #24 + cmn r0, #1 #ifdef CONFIG_THUMB2_KERNEL + moveq r0, #0x200000 @ set bit 21, mov to mvn instruction lsls r6, #24 beq 2f clz r7, r6 @@ -582,18 +625,42 @@ __fixup_a_pv_table: b 2f 1: add r7, r3 ldrh ip, [r7, #2] - and ip, 0x8f00 - orr ip, r6 @ mask in offset bits 31-24 +ARM_BE8(rev16 ip, ip) + tst ip, #0x4000 + and ip, #0x8f00 + orrne ip, r6 @ mask in offset bits 31-24 + orreq ip, r0 @ mask in offset bits 7-0 +ARM_BE8(rev16 ip, ip) strh ip, [r7, #2] + bne 2f + ldrh ip, [r7] +ARM_BE8(rev16 ip, ip) + bic ip, #0x20 + orr ip, ip, r0, lsr #16 +ARM_BE8(rev16 ip, ip) + strh ip, [r7] 2: cmp r4, r5 ldrcc r7, [r4], #4 @ use branch for delay slot bcc 1b bx lr #else + moveq r0, #0x400000 @ set bit 22, mov to mvn instruction b 2f 1: ldr ip, [r7, r3] +#ifdef CONFIG_CPU_ENDIAN_BE8 + @ in BE8, we load data in BE, but instructions still in LE + bic ip, ip, #0xff000000 + tst ip, #0x000f0000 @ check the rotation field + orrne ip, ip, r6, lsl #24 @ mask in offset bits 31-24 + biceq ip, ip, #0x00004000 @ clear bit 22 + orreq ip, ip, r0, lsl #24 @ mask in offset bits 7-0 +#else bic ip, ip, #0x000000ff - orr ip, ip, r6 @ mask in offset bits 31-24 + tst ip, #0xf00 @ check the rotation field + orrne ip, ip, r6 @ mask in offset bits 31-24 + biceq ip, ip, #0x400000 @ clear bit 22 + orreq ip, ip, r0 @ mask in offset bits 7-0 +#endif str ip, [r7, r3] 2: cmp r4, r5 ldrcc r7, [r4], #4 @ use branch for delay slot @@ -602,28 +669,30 @@ __fixup_a_pv_table: #endif ENDPROC(__fixup_a_pv_table) + .align +3: .long __pv_offset + ENTRY(fixup_pv_table) stmfd sp!, {r4 - r7, lr} - ldr r2, 2f @ get address of __pv_phys_offset mov r3, #0 @ no offset mov r4, r0 @ r0 = table start add r5, r0, r1 @ r1 = table size - ldr r6, [r2, #4] @ get __pv_offset bl __fixup_a_pv_table ldmfd sp!, {r4 - r7, pc} ENDPROC(fixup_pv_table) - .align -2: .long __pv_phys_offset - .data .globl __pv_phys_offset .type __pv_phys_offset, %object __pv_phys_offset: - .long 0 - .size __pv_phys_offset, . - __pv_phys_offset + .quad 0 + .size __pv_phys_offset, . -__pv_phys_offset + + .globl __pv_offset + .type __pv_offset, %object __pv_offset: - .long 0 + .quad 0 + .size __pv_offset, . -__pv_offset #endif #include "head-common.S" diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 7b95de601357..3d446605cbf8 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -344,13 +344,13 @@ int arch_install_hw_breakpoint(struct perf_event *bp) /* Breakpoint */ ctrl_base = ARM_BASE_BCR; val_base = ARM_BASE_BVR; - slots = (struct perf_event **)__get_cpu_var(bp_on_reg); + slots = this_cpu_ptr(bp_on_reg); max_slots = core_num_brps; } else { /* Watchpoint */ ctrl_base = ARM_BASE_WCR; val_base = ARM_BASE_WVR; - slots = (struct perf_event **)__get_cpu_var(wp_on_reg); + slots = this_cpu_ptr(wp_on_reg); max_slots = core_num_wrps; } @@ -396,12 +396,12 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { /* Breakpoint */ base = ARM_BASE_BCR; - slots = (struct perf_event **)__get_cpu_var(bp_on_reg); + slots = this_cpu_ptr(bp_on_reg); max_slots = core_num_brps; } else { /* Watchpoint */ base = ARM_BASE_WCR; - slots = (struct perf_event **)__get_cpu_var(wp_on_reg); + slots = this_cpu_ptr(wp_on_reg); max_slots = core_num_wrps; } @@ -697,7 +697,7 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr, struct arch_hw_breakpoint *info; struct arch_hw_breakpoint_ctrl ctrl; - slots = (struct perf_event **)__get_cpu_var(wp_on_reg); + slots = this_cpu_ptr(wp_on_reg); for (i = 0; i < core_num_wrps; ++i) { rcu_read_lock(); @@ -768,7 +768,7 @@ static void watchpoint_single_step_handler(unsigned long pc) struct perf_event *wp, **slots; struct arch_hw_breakpoint *info; - slots = (struct perf_event **)__get_cpu_var(wp_on_reg); + slots = this_cpu_ptr(wp_on_reg); for (i = 0; i < core_num_wrps; ++i) { rcu_read_lock(); @@ -802,7 +802,7 @@ static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) struct arch_hw_breakpoint *info; struct arch_hw_breakpoint_ctrl ctrl; - slots = (struct perf_event **)__get_cpu_var(bp_on_reg); + slots = this_cpu_ptr(bp_on_reg); /* The exception entry code places the amended lr in the PC. */ addr = regs->ARM_pc; diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c index 170e9f34003f..a7b621ece23d 100644 --- a/arch/arm/kernel/kprobes.c +++ b/arch/arm/kernel/kprobes.c @@ -171,13 +171,13 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); kcb->kprobe_status = kcb->prev_kprobe.status; } static void __kprobes set_current_kprobe(struct kprobe *p) { - __get_cpu_var(current_kprobe) = p; + __this_cpu_write(current_kprobe, p); } static void __kprobes @@ -421,10 +421,10 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) continue; if (ri->rp && ri->rp->handler) { - __get_cpu_var(current_kprobe) = &ri->rp->kp; + __this_cpu_write(current_kprobe, &ri->rp->kp); get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; ri->rp->handler(ri, regs); - __get_cpu_var(current_kprobe) = NULL; + __this_cpu_write(current_kprobe, NULL); } orig_ret_address = (unsigned long)ri->ret_addr; diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index 084dc8896986..5fdb4038f969 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -24,6 +24,7 @@ #include <asm/sections.h> #include <asm/smp_plat.h> #include <asm/unwind.h> +#include <asm/opcodes.h> #ifdef CONFIG_XIP_KERNEL /* @@ -60,6 +61,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, Elf32_Sym *sym; const char *symname; s32 offset; + u32 tmp; #ifdef CONFIG_THUMB2_KERNEL u32 upper, lower, sign, j1, j2; #endif @@ -95,7 +97,8 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, case R_ARM_PC24: case R_ARM_CALL: case R_ARM_JUMP24: - offset = (*(u32 *)loc & 0x00ffffff) << 2; + offset = __mem_to_opcode_arm(*(u32 *)loc); + offset = (offset & 0x00ffffff) << 2; if (offset & 0x02000000) offset -= 0x04000000; @@ -111,9 +114,10 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, } offset >>= 2; + offset &= 0x00ffffff; - *(u32 *)loc &= 0xff000000; - *(u32 *)loc |= offset & 0x00ffffff; + *(u32 *)loc &= __opcode_to_mem_arm(0xff000000); + *(u32 *)loc |= __opcode_to_mem_arm(offset); break; case R_ARM_V4BX: @@ -121,8 +125,8 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, * other bits to re-code instruction as * MOV PC,Rm. */ - *(u32 *)loc &= 0xf000000f; - *(u32 *)loc |= 0x01a0f000; + *(u32 *)loc &= __opcode_to_mem_arm(0xf000000f); + *(u32 *)loc |= __opcode_to_mem_arm(0x01a0f000); break; case R_ARM_PREL31: @@ -132,7 +136,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, case R_ARM_MOVW_ABS_NC: case R_ARM_MOVT_ABS: - offset = *(u32 *)loc; + offset = tmp = __mem_to_opcode_arm(*(u32 *)loc); offset = ((offset & 0xf0000) >> 4) | (offset & 0xfff); offset = (offset ^ 0x8000) - 0x8000; @@ -140,16 +144,18 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS) offset >>= 16; - *(u32 *)loc &= 0xfff0f000; - *(u32 *)loc |= ((offset & 0xf000) << 4) | - (offset & 0x0fff); + tmp &= 0xfff0f000; + tmp |= ((offset & 0xf000) << 4) | + (offset & 0x0fff); + + *(u32 *)loc = __opcode_to_mem_arm(tmp); break; #ifdef CONFIG_THUMB2_KERNEL case R_ARM_THM_CALL: case R_ARM_THM_JUMP24: - upper = *(u16 *)loc; - lower = *(u16 *)(loc + 2); + upper = __mem_to_opcode_thumb16(*(u16 *)loc); + lower = __mem_to_opcode_thumb16(*(u16 *)(loc + 2)); /* * 25 bit signed address range (Thumb-2 BL and B.W @@ -198,17 +204,20 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, sign = (offset >> 24) & 1; j1 = sign ^ (~(offset >> 23) & 1); j2 = sign ^ (~(offset >> 22) & 1); - *(u16 *)loc = (u16)((upper & 0xf800) | (sign << 10) | + upper = (u16)((upper & 0xf800) | (sign << 10) | ((offset >> 12) & 0x03ff)); - *(u16 *)(loc + 2) = (u16)((lower & 0xd000) | - (j1 << 13) | (j2 << 11) | - ((offset >> 1) & 0x07ff)); + lower = (u16)((lower & 0xd000) | + (j1 << 13) | (j2 << 11) | + ((offset >> 1) & 0x07ff)); + + *(u16 *)loc = __opcode_to_mem_thumb16(upper); + *(u16 *)(loc + 2) = __opcode_to_mem_thumb16(lower); break; case R_ARM_THM_MOVW_ABS_NC: case R_ARM_THM_MOVT_ABS: - upper = *(u16 *)loc; - lower = *(u16 *)(loc + 2); + upper = __mem_to_opcode_thumb16(*(u16 *)loc); + lower = __mem_to_opcode_thumb16(*(u16 *)(loc + 2)); /* * MOVT/MOVW instructions encoding in Thumb-2: @@ -229,12 +238,14 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS) offset >>= 16; - *(u16 *)loc = (u16)((upper & 0xfbf0) | - ((offset & 0xf000) >> 12) | - ((offset & 0x0800) >> 1)); - *(u16 *)(loc + 2) = (u16)((lower & 0x8f00) | - ((offset & 0x0700) << 4) | - (offset & 0x00ff)); + upper = (u16)((upper & 0xfbf0) | + ((offset & 0xf000) >> 12) | + ((offset & 0x0800) >> 1)); + lower = (u16)((lower & 0x8f00) | + ((offset & 0x0700) << 4) | + (offset & 0x00ff)); + *(u16 *)loc = __opcode_to_mem_thumb16(upper); + *(u16 *)(loc + 2) = __opcode_to_mem_thumb16(lower); break; #endif diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index e186ee1e63f6..bc3f2efa0d86 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -256,12 +256,11 @@ validate_event(struct pmu_hw_events *hw_events, struct perf_event *event) { struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct pmu *leader_pmu = event->group_leader->pmu; if (is_software_event(event)) return 1; - if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF) + if (event->state < PERF_EVENT_STATE_OFF) return 1; if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 8d6147b2001f..d85055cd24ba 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -68,7 +68,7 @@ EXPORT_SYMBOL_GPL(perf_num_counters); static struct pmu_hw_events *cpu_pmu_get_cpu_events(void) { - return &__get_cpu_var(cpu_hw_events); + return this_cpu_ptr(&cpu_hw_events); } static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) diff --git a/arch/arm/kernel/perf_regs.c b/arch/arm/kernel/perf_regs.c new file mode 100644 index 000000000000..6e4379c67cbc --- /dev/null +++ b/arch/arm/kernel/perf_regs.c @@ -0,0 +1,30 @@ + +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/perf_event.h> +#include <linux/bug.h> +#include <asm/perf_regs.h> +#include <asm/ptrace.h> + +u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + if (WARN_ON_ONCE((u32)idx >= PERF_REG_ARM_MAX)) + return 0; + + return regs->uregs[idx]; +} + +#define REG_RESERVED (~((1ULL << PERF_REG_ARM_MAX) - 1)) + +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + + return 0; +} + +u64 perf_reg_abi(struct task_struct *task) +{ + return PERF_SAMPLE_REGS_ABI_32; +} diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 0e1e2b3afa45..f52150d2ec00 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -73,6 +73,8 @@ __setup("fpe=", fpe_setup); #endif extern void paging_init(const struct machine_desc *desc); +extern void early_paging_init(const struct machine_desc *, + struct proc_info_list *); extern void sanity_check_meminfo(void); extern enum reboot_mode reboot_mode; extern void setup_dma_zone(const struct machine_desc *desc); @@ -599,6 +601,8 @@ static void __init setup_processor(void) elf_hwcap &= ~(HWCAP_THUMB | HWCAP_IDIVT); #endif + erratum_a15_798181_init(); + feat_v6_fixup(); cacheid_init(); @@ -619,9 +623,10 @@ void __init dump_machine_table(void) /* can't use cpu_relax() here as it may require MMU setup */; } -int __init arm_add_memory(phys_addr_t start, phys_addr_t size) +int __init arm_add_memory(u64 start, u64 size) { struct membank *bank = &meminfo.bank[meminfo.nr_banks]; + u64 aligned_start; if (meminfo.nr_banks >= NR_BANKS) { printk(KERN_CRIT "NR_BANKS too low, " @@ -634,10 +639,16 @@ int __init arm_add_memory(phys_addr_t start, phys_addr_t size) * Size is appropriately rounded down, start is rounded up. */ size -= start & ~PAGE_MASK; - bank->start = PAGE_ALIGN(start); + aligned_start = PAGE_ALIGN(start); -#ifndef CONFIG_ARM_LPAE - if (bank->start + size < bank->start) { +#ifndef CONFIG_ARCH_PHYS_ADDR_T_64BIT + if (aligned_start > ULONG_MAX) { + printk(KERN_CRIT "Ignoring memory at 0x%08llx outside " + "32-bit physical address space\n", (long long)start); + return -EINVAL; + } + + if (aligned_start + size > ULONG_MAX) { printk(KERN_CRIT "Truncating memory at 0x%08llx to fit in " "32-bit physical address space\n", (long long)start); /* @@ -645,10 +656,11 @@ int __init arm_add_memory(phys_addr_t start, phys_addr_t size) * 32 bits, we use ULONG_MAX as the upper limit rather than 4GB. * This means we lose a page after masking. */ - size = ULONG_MAX - bank->start; + size = ULONG_MAX - aligned_start; } #endif + bank->start = aligned_start; bank->size = size & ~(phys_addr_t)(PAGE_SIZE - 1); /* @@ -669,8 +681,8 @@ int __init arm_add_memory(phys_addr_t start, phys_addr_t size) static int __init early_mem(char *p) { static int usermem __initdata = 0; - phys_addr_t size; - phys_addr_t start; + u64 size; + u64 start; char *endp; /* @@ -878,6 +890,8 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]), meminfo_cmp, NULL); + + early_paging_init(mdesc, lookup_processor_type(read_cpuid_id())); sanity_check_meminfo(); arm_memblock_init(&meminfo, mdesc); diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index ab3304225272..04d63880037f 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -21,29 +21,7 @@ #include <asm/unistd.h> #include <asm/vfp.h> -/* - * For ARM syscalls, we encode the syscall number into the instruction. - */ -#define SWI_SYS_SIGRETURN (0xef000000|(__NR_sigreturn)|(__NR_OABI_SYSCALL_BASE)) -#define SWI_SYS_RT_SIGRETURN (0xef000000|(__NR_rt_sigreturn)|(__NR_OABI_SYSCALL_BASE)) - -/* - * With EABI, the syscall number has to be loaded into r7. - */ -#define MOV_R7_NR_SIGRETURN (0xe3a07000 | (__NR_sigreturn - __NR_SYSCALL_BASE)) -#define MOV_R7_NR_RT_SIGRETURN (0xe3a07000 | (__NR_rt_sigreturn - __NR_SYSCALL_BASE)) - -/* - * For Thumb syscalls, we pass the syscall number via r7. We therefore - * need two 16-bit instructions. - */ -#define SWI_THUMB_SIGRETURN (0xdf00 << 16 | 0x2700 | (__NR_sigreturn - __NR_SYSCALL_BASE)) -#define SWI_THUMB_RT_SIGRETURN (0xdf00 << 16 | 0x2700 | (__NR_rt_sigreturn - __NR_SYSCALL_BASE)) - -static const unsigned long sigreturn_codes[7] = { - MOV_R7_NR_SIGRETURN, SWI_SYS_SIGRETURN, SWI_THUMB_SIGRETURN, - MOV_R7_NR_RT_SIGRETURN, SWI_SYS_RT_SIGRETURN, SWI_THUMB_RT_SIGRETURN, -}; +extern const unsigned long sigreturn_codes[7]; static unsigned long signal_return_offset; @@ -375,12 +353,18 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig, */ thumb = handler & 1; - if (thumb) { - cpsr |= PSR_T_BIT; #if __LINUX_ARM_ARCH__ >= 7 - /* clear the If-Then Thumb-2 execution state */ - cpsr &= ~PSR_IT_MASK; + /* + * Clear the If-Then Thumb-2 execution state + * ARM spec requires this to be all 000s in ARM mode + * Snapdragon S4/Krait misbehaves on a Thumb=>ARM + * signal transition without this. + */ + cpsr &= ~PSR_IT_MASK; #endif + + if (thumb) { + cpsr |= PSR_T_BIT; } else cpsr &= ~PSR_T_BIT; } diff --git a/arch/arm/kernel/sigreturn_codes.S b/arch/arm/kernel/sigreturn_codes.S new file mode 100644 index 000000000000..3c5d0f2170fd --- /dev/null +++ b/arch/arm/kernel/sigreturn_codes.S @@ -0,0 +1,80 @@ +/* + * sigreturn_codes.S - code sinpets for sigreturn syscalls + * + * Created by: Victor Kamensky, 2013-08-13 + * Copyright: (C) 2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <asm/unistd.h> + +/* + * For ARM syscalls, we encode the syscall number into the instruction. + * With EABI, the syscall number has to be loaded into r7. As result + * ARM syscall sequence snippet will have move and svc in .arm encoding + * + * For Thumb syscalls, we pass the syscall number via r7. We therefore + * need two 16-bit instructions in .thumb encoding + * + * Please note sigreturn_codes code are not executed in place. Instead + * they just copied by kernel into appropriate places. Code inside of + * arch/arm/kernel/signal.c is very sensitive to layout of these code + * snippets. + */ + +#if __LINUX_ARM_ARCH__ <= 4 + /* + * Note we manually set minimally required arch that supports + * required thumb opcodes for early arch versions. It is OK + * for this file to be used in combination with other + * lower arch variants, since these code snippets are only + * used as input data. + */ + .arch armv4t +#endif + + .section .rodata + .global sigreturn_codes + .type sigreturn_codes, #object + + .arm + +sigreturn_codes: + + /* ARM sigreturn syscall code snippet */ + mov r7, #(__NR_sigreturn - __NR_SYSCALL_BASE) + swi #(__NR_sigreturn)|(__NR_OABI_SYSCALL_BASE) + + /* Thumb sigreturn syscall code snippet */ + .thumb + movs r7, #(__NR_sigreturn - __NR_SYSCALL_BASE) + swi #0 + + /* ARM sigreturn_rt syscall code snippet */ + .arm + mov r7, #(__NR_rt_sigreturn - __NR_SYSCALL_BASE) + swi #(__NR_rt_sigreturn)|(__NR_OABI_SYSCALL_BASE) + + /* Thumb sigreturn_rt syscall code snippet */ + .thumb + movs r7, #(__NR_rt_sigreturn - __NR_SYSCALL_BASE) + swi #0 + + /* + * Note on addtional space: setup_return in signal.c + * algorithm uses two words copy regardless whether + * it is thumb case or not, so we need additional + * word after real last entry. + */ + .arm + .space 4 + + .size sigreturn_codes, . - sigreturn_codes diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index db1536b8b30b..b907d9b790ab 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -55,6 +55,7 @@ * specific registers and some other data for resume. * r0 = suspend function arg0 * r1 = suspend function + * r2 = MPIDR value the resuming CPU will use */ ENTRY(__cpu_suspend) stmfd sp!, {r4 - r11, lr} @@ -67,23 +68,18 @@ ENTRY(__cpu_suspend) mov r5, sp @ current virtual SP add r4, r4, #12 @ Space for pgd, virt sp, phys resume fn sub sp, sp, r4 @ allocate CPU state on stack - stmfd sp!, {r0, r1} @ save suspend func arg and pointer - add r0, sp, #8 @ save pointer to save block - mov r1, r4 @ size of save block - mov r2, r5 @ virtual SP ldr r3, =sleep_save_sp + stmfd sp!, {r0, r1} @ save suspend func arg and pointer ldr r3, [r3, #SLEEP_SAVE_SP_VIRT] - ALT_SMP(mrc p15, 0, r9, c0, c0, 5) - ALT_UP_B(1f) - ldr r8, =mpidr_hash - /* - * This ldmia relies on the memory layout of the mpidr_hash - * struct mpidr_hash. - */ - ldmia r8, {r4-r7} @ r4 = mpidr mask (r5,r6,r7) = l[0,1,2] shifts - compute_mpidr_hash lr, r5, r6, r7, r9, r4 - add r3, r3, lr, lsl #2 -1: + ALT_SMP(ldr r0, =mpidr_hash) + ALT_UP_B(1f) + /* This ldmia relies on the memory layout of the mpidr_hash struct */ + ldmia r0, {r1, r6-r8} @ r1 = mpidr mask (r6,r7,r8) = l[0,1,2] shifts + compute_mpidr_hash r0, r6, r7, r8, r2, r1 + add r3, r3, r0, lsl #2 +1: mov r2, r5 @ virtual SP + mov r1, r4 @ size of save block + add r0, sp, #8 @ pointer to save block bl __cpu_suspend_save adr lr, BSYM(cpu_suspend_abort) ldmfd sp!, {r0, pc} @ call suspend fn @@ -130,6 +126,7 @@ ENDPROC(cpu_resume_after_mmu) .data .align ENTRY(cpu_resume) +ARM_BE8(setend be) @ ensure we are in BE mode mov r1, #0 ALT_SMP(mrc p15, 0, r0, c0, c0, 5) ALT_UP_B(1f) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 72024ea8a3a6..dc894ab3622b 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -25,6 +25,7 @@ #include <linux/clockchips.h> #include <linux/completion.h> #include <linux/cpufreq.h> +#include <linux/irq_work.h> #include <linux/atomic.h> #include <asm/smp.h> @@ -66,6 +67,8 @@ enum ipi_msg_type { IPI_CALL_FUNC, IPI_CALL_FUNC_SINGLE, IPI_CPU_STOP, + IPI_IRQ_WORK, + IPI_COMPLETION, }; static DECLARE_COMPLETION(cpu_running); @@ -80,7 +83,7 @@ void __init smp_set_ops(struct smp_operations *ops) static unsigned long get_arch_pgd(pgd_t *pgd) { - phys_addr_t pgdir = virt_to_phys(pgd); + phys_addr_t pgdir = virt_to_idmap(pgd); BUG_ON(pgdir & ARCH_PGD_MASK); return pgdir >> ARCH_PGD_SHIFT; } @@ -448,6 +451,14 @@ void arch_send_call_function_single_ipi(int cpu) smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); } +#ifdef CONFIG_IRQ_WORK +void arch_irq_work_raise(void) +{ + if (is_smp()) + smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK); +} +#endif + static const char *ipi_types[NR_IPI] = { #define S(x,s) [x] = s S(IPI_WAKEUP, "CPU wakeup interrupts"), @@ -456,6 +467,8 @@ static const char *ipi_types[NR_IPI] = { S(IPI_CALL_FUNC, "Function call interrupts"), S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"), S(IPI_CPU_STOP, "CPU stop interrupts"), + S(IPI_IRQ_WORK, "IRQ work interrupts"), + S(IPI_COMPLETION, "completion interrupts"), }; void show_ipi_list(struct seq_file *p, int prec) @@ -515,6 +528,19 @@ static void ipi_cpu_stop(unsigned int cpu) cpu_relax(); } +static DEFINE_PER_CPU(struct completion *, cpu_completion); + +int register_ipi_completion(struct completion *completion, int cpu) +{ + per_cpu(cpu_completion, cpu) = completion; + return IPI_COMPLETION; +} + +static void ipi_complete(unsigned int cpu) +{ + complete(per_cpu(cpu_completion, cpu)); +} + /* * Main handler for inter-processor interrupts */ @@ -565,6 +591,20 @@ void handle_IPI(int ipinr, struct pt_regs *regs) irq_exit(); break; +#ifdef CONFIG_IRQ_WORK + case IPI_IRQ_WORK: + irq_enter(); + irq_work_run(); + irq_exit(); + break; +#endif + + case IPI_COMPLETION: + irq_enter(); + ipi_complete(cpu); + irq_exit(); + break; + default: printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr); diff --git a/arch/arm/kernel/smp_scu.c b/arch/arm/kernel/smp_scu.c index 5bc1a63284e3..1aafa0d785eb 100644 --- a/arch/arm/kernel/smp_scu.c +++ b/arch/arm/kernel/smp_scu.c @@ -28,7 +28,7 @@ */ unsigned int __init scu_get_core_count(void __iomem *scu_base) { - unsigned int ncores = __raw_readl(scu_base + SCU_CONFIG); + unsigned int ncores = readl_relaxed(scu_base + SCU_CONFIG); return (ncores & 0x03) + 1; } @@ -42,19 +42,19 @@ void scu_enable(void __iomem *scu_base) #ifdef CONFIG_ARM_ERRATA_764369 /* Cortex-A9 only */ if ((read_cpuid_id() & 0xff0ffff0) == 0x410fc090) { - scu_ctrl = __raw_readl(scu_base + 0x30); + scu_ctrl = readl_relaxed(scu_base + 0x30); if (!(scu_ctrl & 1)) - __raw_writel(scu_ctrl | 0x1, scu_base + 0x30); + writel_relaxed(scu_ctrl | 0x1, scu_base + 0x30); } #endif - scu_ctrl = __raw_readl(scu_base + SCU_CTRL); + scu_ctrl = readl_relaxed(scu_base + SCU_CTRL); /* already enabled? */ if (scu_ctrl & 1) return; scu_ctrl |= 1; - __raw_writel(scu_ctrl, scu_base + SCU_CTRL); + writel_relaxed(scu_ctrl, scu_base + SCU_CTRL); /* * Ensure that the data accessed by CPU0 before the SCU was @@ -80,9 +80,9 @@ int scu_power_mode(void __iomem *scu_base, unsigned int mode) if (mode > 3 || mode == 1 || cpu > 3) return -EINVAL; - val = __raw_readb(scu_base + SCU_CPU_STATUS + cpu) & ~0x03; + val = readb_relaxed(scu_base + SCU_CPU_STATUS + cpu) & ~0x03; val |= mode; - __raw_writeb(val, scu_base + SCU_CPU_STATUS + cpu); + writeb_relaxed(val, scu_base + SCU_CPU_STATUS + cpu); return 0; } diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c index 83ccca303df8..95d063620b76 100644 --- a/arch/arm/kernel/smp_tlb.c +++ b/arch/arm/kernel/smp_tlb.c @@ -70,6 +70,40 @@ static inline void ipi_flush_bp_all(void *ignored) local_flush_bp_all(); } +#ifdef CONFIG_ARM_ERRATA_798181 +bool (*erratum_a15_798181_handler)(void); + +static bool erratum_a15_798181_partial(void) +{ + asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (0)); + dsb(ish); + return false; +} + +static bool erratum_a15_798181_broadcast(void) +{ + asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (0)); + dsb(ish); + return true; +} + +void erratum_a15_798181_init(void) +{ + unsigned int midr = read_cpuid_id(); + unsigned int revidr = read_cpuid(CPUID_REVIDR); + + /* Cortex-A15 r0p0..r3p2 w/o ECO fix affected */ + if ((midr & 0xff0ffff0) != 0x410fc0f0 || midr > 0x413fc0f2 || + (revidr & 0x210) == 0x210) { + return; + } + if (revidr & 0x10) + erratum_a15_798181_handler = erratum_a15_798181_partial; + else + erratum_a15_798181_handler = erratum_a15_798181_broadcast; +} +#endif + static void ipi_flush_tlb_a15_erratum(void *arg) { dmb(); @@ -80,7 +114,6 @@ static void broadcast_tlb_a15_erratum(void) if (!erratum_a15_798181()) return; - dummy_flush_tlb_a15_erratum(); smp_call_function(ipi_flush_tlb_a15_erratum, NULL, 1); } @@ -92,7 +125,6 @@ static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm) if (!erratum_a15_798181()) return; - dummy_flush_tlb_a15_erratum(); this_cpu = get_cpu(); a15_erratum_get_cpumask(this_cpu, mm, &mask); smp_call_function_many(&mask, ipi_flush_tlb_a15_erratum, NULL, 1); diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index 2985c9f0905d..6591e26fc13f 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -45,7 +45,7 @@ static void twd_set_mode(enum clock_event_mode mode, case CLOCK_EVT_MODE_PERIODIC: ctrl = TWD_TIMER_CONTROL_ENABLE | TWD_TIMER_CONTROL_IT_ENABLE | TWD_TIMER_CONTROL_PERIODIC; - __raw_writel(DIV_ROUND_CLOSEST(twd_timer_rate, HZ), + writel_relaxed(DIV_ROUND_CLOSEST(twd_timer_rate, HZ), twd_base + TWD_TIMER_LOAD); break; case CLOCK_EVT_MODE_ONESHOT: @@ -58,18 +58,18 @@ static void twd_set_mode(enum clock_event_mode mode, ctrl = 0; } - __raw_writel(ctrl, twd_base + TWD_TIMER_CONTROL); + writel_relaxed(ctrl, twd_base + TWD_TIMER_CONTROL); } static int twd_set_next_event(unsigned long evt, struct clock_event_device *unused) { - unsigned long ctrl = __raw_readl(twd_base + TWD_TIMER_CONTROL); + unsigned long ctrl = readl_relaxed(twd_base + TWD_TIMER_CONTROL); ctrl |= TWD_TIMER_CONTROL_ENABLE; - __raw_writel(evt, twd_base + TWD_TIMER_COUNTER); - __raw_writel(ctrl, twd_base + TWD_TIMER_CONTROL); + writel_relaxed(evt, twd_base + TWD_TIMER_COUNTER); + writel_relaxed(ctrl, twd_base + TWD_TIMER_CONTROL); return 0; } @@ -82,8 +82,8 @@ static int twd_set_next_event(unsigned long evt, */ static int twd_timer_ack(void) { - if (__raw_readl(twd_base + TWD_TIMER_INTSTAT)) { - __raw_writel(1, twd_base + TWD_TIMER_INTSTAT); + if (readl_relaxed(twd_base + TWD_TIMER_INTSTAT)) { + writel_relaxed(1, twd_base + TWD_TIMER_INTSTAT); return 1; } @@ -211,15 +211,15 @@ static void twd_calibrate_rate(void) waitjiffies += 5; /* enable, no interrupt or reload */ - __raw_writel(0x1, twd_base + TWD_TIMER_CONTROL); + writel_relaxed(0x1, twd_base + TWD_TIMER_CONTROL); /* maximum value */ - __raw_writel(0xFFFFFFFFU, twd_base + TWD_TIMER_COUNTER); + writel_relaxed(0xFFFFFFFFU, twd_base + TWD_TIMER_COUNTER); while (get_jiffies_64() < waitjiffies) udelay(10); - count = __raw_readl(twd_base + TWD_TIMER_COUNTER); + count = readl_relaxed(twd_base + TWD_TIMER_COUNTER); twd_timer_rate = (0xFFFFFFFFU - count) * (HZ / 5); @@ -277,7 +277,7 @@ static void twd_timer_setup(void) * bother with the below. */ if (per_cpu(percpu_setup_called, cpu)) { - __raw_writel(0, twd_base + TWD_TIMER_CONTROL); + writel_relaxed(0, twd_base + TWD_TIMER_CONTROL); clockevents_register_device(clk); enable_percpu_irq(clk->irq, 0); return; @@ -290,7 +290,7 @@ static void twd_timer_setup(void) * The following is done once per CPU the first time .setup() is * called. */ - __raw_writel(0, twd_base + TWD_TIMER_CONTROL); + writel_relaxed(0, twd_base + TWD_TIMER_CONTROL); clk->name = "local_timer"; clk->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT | diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c index 41cf3cbf756d..2835d35234ca 100644 --- a/arch/arm/kernel/suspend.c +++ b/arch/arm/kernel/suspend.c @@ -10,7 +10,7 @@ #include <asm/suspend.h> #include <asm/tlbflush.h> -extern int __cpu_suspend(unsigned long, int (*)(unsigned long)); +extern int __cpu_suspend(unsigned long, int (*)(unsigned long), u32 cpuid); extern void cpu_resume_mmu(void); #ifdef CONFIG_MMU @@ -21,6 +21,7 @@ extern void cpu_resume_mmu(void); int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) { struct mm_struct *mm = current->active_mm; + u32 __mpidr = cpu_logical_map(smp_processor_id()); int ret; if (!idmap_pgd) @@ -32,7 +33,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) * resume (indicated by a zero return code), we need to switch * back to the correct page tables. */ - ret = __cpu_suspend(arg, fn); + ret = __cpu_suspend(arg, fn, __mpidr); if (ret == 0) { cpu_switch_mm(mm->pgd, mm); local_flush_bp_all(); @@ -44,7 +45,8 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) #else int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) { - return __cpu_suspend(arg, fn); + u32 __mpidr = cpu_logical_map(smp_processor_id()); + return __cpu_suspend(arg, fn, __mpidr); } #define idmap_pgd NULL #endif diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 8fcda140358d..6125f259b7b5 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -34,6 +34,7 @@ #include <asm/unwind.h> #include <asm/tls.h> #include <asm/system_misc.h> +#include <asm/opcodes.h> static const char *handler[]= { "prefetch abort", "data abort", "address exception", "interrupt" }; @@ -341,15 +342,17 @@ void arm_notify_die(const char *str, struct pt_regs *regs, int is_valid_bugaddr(unsigned long pc) { #ifdef CONFIG_THUMB2_KERNEL - unsigned short bkpt; + u16 bkpt; + u16 insn = __opcode_to_mem_thumb16(BUG_INSTR_VALUE); #else - unsigned long bkpt; + u32 bkpt; + u32 insn = __opcode_to_mem_arm(BUG_INSTR_VALUE); #endif if (probe_kernel_address((unsigned *)pc, bkpt)) return 0; - return bkpt == BUG_INSTR_VALUE; + return bkpt == insn; } #endif @@ -402,25 +405,28 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) if (processor_mode(regs) == SVC_MODE) { #ifdef CONFIG_THUMB2_KERNEL if (thumb_mode(regs)) { - instr = ((u16 *)pc)[0]; + instr = __mem_to_opcode_thumb16(((u16 *)pc)[0]); if (is_wide_instruction(instr)) { - instr <<= 16; - instr |= ((u16 *)pc)[1]; + u16 inst2; + inst2 = __mem_to_opcode_thumb16(((u16 *)pc)[1]); + instr = __opcode_thumb32_compose(instr, inst2); } } else #endif - instr = *(u32 *) pc; + instr = __mem_to_opcode_arm(*(u32 *) pc); } else if (thumb_mode(regs)) { if (get_user(instr, (u16 __user *)pc)) goto die_sig; + instr = __mem_to_opcode_thumb16(instr); if (is_wide_instruction(instr)) { unsigned int instr2; if (get_user(instr2, (u16 __user *)pc+1)) goto die_sig; - instr <<= 16; - instr |= instr2; + instr2 = __mem_to_opcode_thumb16(instr2); + instr = __opcode_thumb32_compose(instr, instr2); } } else if (get_user(instr, (u32 __user *)pc)) { + instr = __mem_to_opcode_arm(instr); goto die_sig; } diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 9c697db2787e..aea7ccb8d397 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -65,7 +65,7 @@ static bool vgic_present; static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) { BUG_ON(preemptible()); - __get_cpu_var(kvm_arm_running_vcpu) = vcpu; + __this_cpu_write(kvm_arm_running_vcpu, vcpu); } /** @@ -75,7 +75,7 @@ static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) struct kvm_vcpu *kvm_arm_get_running_vcpu(void) { BUG_ON(preemptible()); - return __get_cpu_var(kvm_arm_running_vcpu); + return __this_cpu_read(kvm_arm_running_vcpu); } /** @@ -815,7 +815,7 @@ static void cpu_init_hyp_mode(void *dummy) boot_pgd_ptr = kvm_mmu_get_boot_httbr(); pgd_ptr = kvm_mmu_get_httbr(); - stack_page = __get_cpu_var(kvm_arm_hyp_stack_page); + stack_page = __this_cpu_read(kvm_arm_hyp_stack_page); hyp_stack_ptr = stack_page + PAGE_SIZE; vector_ptr = (unsigned long)__kvm_hyp_vector; diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index 71e08baee209..c02ba4af599f 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c @@ -58,14 +58,14 @@ static const struct kvm_irq_level a15_vtimer_irq = { */ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { - struct kvm_regs *cpu_reset; + struct kvm_regs *reset_regs; const struct kvm_irq_level *cpu_vtimer_irq; switch (vcpu->arch.target) { case KVM_ARM_TARGET_CORTEX_A15: if (vcpu->vcpu_id > a15_max_cpu_idx) return -EINVAL; - cpu_reset = &a15_regs_reset; + reset_regs = &a15_regs_reset; vcpu->arch.midr = read_cpuid_id(); cpu_vtimer_irq = &a15_vtimer_irq; break; @@ -74,7 +74,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) } /* Reset core registers */ - memcpy(&vcpu->arch.regs, cpu_reset, sizeof(vcpu->arch.regs)); + memcpy(&vcpu->arch.regs, reset_regs, sizeof(vcpu->arch.regs)); /* Reset CP15 registers */ kvm_reset_coprocs(vcpu); diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h index d6408d1ee543..e0c68d5bb7dc 100644 --- a/arch/arm/lib/bitops.h +++ b/arch/arm/lib/bitops.h @@ -10,6 +10,11 @@ UNWIND( .fnstart ) and r3, r0, #31 @ Get bit offset mov r0, r0, lsr #5 add r1, r1, r0, lsl #2 @ Get word offset +#if __LINUX_ARM_ARCH__ >= 7 + .arch_extension mp + ALT_SMP(W(pldw) [r1]) + ALT_UP(W(nop)) +#endif mov r3, r2, lsl r3 1: ldrex r2, [r1] \instr r2, r2, r3 diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c index 025f742dd4df..3e58d710013c 100644 --- a/arch/arm/lib/uaccess_with_memcpy.c +++ b/arch/arm/lib/uaccess_with_memcpy.c @@ -18,6 +18,7 @@ #include <linux/hardirq.h> /* for in_atomic() */ #include <linux/gfp.h> #include <linux/highmem.h> +#include <linux/hugetlb.h> #include <asm/current.h> #include <asm/page.h> @@ -40,7 +41,35 @@ pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp) return 0; pmd = pmd_offset(pud, addr); - if (unlikely(pmd_none(*pmd) || pmd_bad(*pmd))) + if (unlikely(pmd_none(*pmd))) + return 0; + + /* + * A pmd can be bad if it refers to a HugeTLB or THP page. + * + * Both THP and HugeTLB pages have the same pmd layout + * and should not be manipulated by the pte functions. + * + * Lock the page table for the destination and check + * to see that it's still huge and whether or not we will + * need to fault on write, or if we have a splitting THP. + */ + if (unlikely(pmd_thp_or_huge(*pmd))) { + ptl = ¤t->mm->page_table_lock; + spin_lock(ptl); + if (unlikely(!pmd_thp_or_huge(*pmd) + || pmd_hugewillfault(*pmd) + || pmd_trans_splitting(*pmd))) { + spin_unlock(ptl); + return 0; + } + + *ptep = NULL; + *ptlp = ptl; + return 1; + } + + if (unlikely(pmd_bad(*pmd))) return 0; pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl); @@ -94,7 +123,10 @@ __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n) from += tocopy; n -= tocopy; - pte_unmap_unlock(pte, ptl); + if (pte) + pte_unmap_unlock(pte, ptl); + else + spin_unlock(ptl); } if (!atomic) up_read(¤t->mm->mmap_sem); @@ -147,7 +179,10 @@ __clear_user_memset(void __user *addr, unsigned long n) addr += tocopy; n -= tocopy; - pte_unmap_unlock(pte, ptl); + if (pte) + pte_unmap_unlock(pte, ptl); + else + spin_unlock(ptl); } up_read(¤t->mm->mmap_sem); diff --git a/arch/arm/mach-at91/at91rm9200_time.c b/arch/arm/mach-at91/at91rm9200_time.c index 180b3024bec3..f607deb40f4d 100644 --- a/arch/arm/mach-at91/at91rm9200_time.c +++ b/arch/arm/mach-at91/at91rm9200_time.c @@ -93,7 +93,7 @@ static irqreturn_t at91rm9200_timer_interrupt(int irq, void *dev_id) static struct irqaction at91rm9200_timer_irq = { .name = "at91_tick", - .flags = IRQF_SHARED | IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL, + .flags = IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, .handler = at91rm9200_timer_interrupt, .irq = NR_IRQS_LEGACY + AT91_ID_SYS, }; diff --git a/arch/arm/mach-at91/at91sam926x_time.c b/arch/arm/mach-at91/at91sam926x_time.c index 3a4bc2e1a65e..bb392320a0dd 100644 --- a/arch/arm/mach-at91/at91sam926x_time.c +++ b/arch/arm/mach-at91/at91sam926x_time.c @@ -171,7 +171,7 @@ static irqreturn_t at91sam926x_pit_interrupt(int irq, void *dev_id) static struct irqaction at91sam926x_pit_irq = { .name = "at91_tick", - .flags = IRQF_SHARED | IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL, + .flags = IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, .handler = at91sam926x_pit_interrupt, .irq = NR_IRQS_LEGACY + AT91_ID_SYS, }; diff --git a/arch/arm/mach-at91/at91sam9g45_reset.S b/arch/arm/mach-at91/at91sam9g45_reset.S index 721a1a34dd1d..c40c1e2ef80f 100644 --- a/arch/arm/mach-at91/at91sam9g45_reset.S +++ b/arch/arm/mach-at91/at91sam9g45_reset.S @@ -16,11 +16,17 @@ #include "at91_rstc.h" .arm +/* + * at91_ramc_base is an array void* + * init at NULL if only one DDR controler is present in or DT + */ .globl at91sam9g45_restart at91sam9g45_restart: ldr r5, =at91_ramc_base @ preload constants ldr r0, [r5] + ldr r5, [r5, #4] @ ddr1 + cmp r5, #0 ldr r4, =at91_rstc_base ldr r1, [r4] @@ -30,6 +36,8 @@ at91sam9g45_restart: .balign 32 @ align to cache line + strne r2, [r5, #AT91_DDRSDRC_RTR] @ disable DDR1 access + strne r3, [r5, #AT91_DDRSDRC_LPR] @ power down DDR1 str r2, [r0, #AT91_DDRSDRC_RTR] @ disable DDR0 access str r3, [r0, #AT91_DDRSDRC_LPR] @ power down DDR0 str r4, [r1, #AT91_RSTC_CR] @ reset processor diff --git a/arch/arm/mach-at91/at91x40_time.c b/arch/arm/mach-at91/at91x40_time.c index 2919eba41ff4..c0e637adf65d 100644 --- a/arch/arm/mach-at91/at91x40_time.c +++ b/arch/arm/mach-at91/at91x40_time.c @@ -57,7 +57,7 @@ static irqreturn_t at91x40_timer_interrupt(int irq, void *dev_id) static struct irqaction at91x40_timer_irq = { .name = "at91_tick", - .flags = IRQF_DISABLED | IRQF_TIMER, + .flags = IRQF_TIMER, .handler = at91x40_timer_interrupt }; diff --git a/arch/arm/mach-davinci/board-dm365-evm.c b/arch/arm/mach-davinci/board-dm365-evm.c index 92b7f770615a..4078ba93776b 100644 --- a/arch/arm/mach-davinci/board-dm365-evm.c +++ b/arch/arm/mach-davinci/board-dm365-evm.c @@ -176,7 +176,7 @@ static struct at24_platform_data eeprom_info = { .context = (void *)0x7f00, }; -static struct snd_platform_data dm365_evm_snd_data = { +static struct snd_platform_data dm365_evm_snd_data __maybe_unused = { .asp_chan_q = EVENTQ_3, }; diff --git a/arch/arm/mach-davinci/include/mach/serial.h b/arch/arm/mach-davinci/include/mach/serial.h index 52b8571b2e70..ce402cd21fa0 100644 --- a/arch/arm/mach-davinci/include/mach/serial.h +++ b/arch/arm/mach-davinci/include/mach/serial.h @@ -15,8 +15,6 @@ #include <mach/hardware.h> -#include <linux/platform_device.h> - #define DAVINCI_UART0_BASE (IO_PHYS + 0x20000) #define DAVINCI_UART1_BASE (IO_PHYS + 0x20400) #define DAVINCI_UART2_BASE (IO_PHYS + 0x20800) @@ -39,6 +37,8 @@ #define UART_DM646X_SCR_TX_WATERMARK 0x08 #ifndef __ASSEMBLY__ +#include <linux/platform_device.h> + extern int davinci_serial_init(struct platform_device *); #endif diff --git a/arch/arm/mach-footbridge/netwinder-hw.c b/arch/arm/mach-footbridge/netwinder-hw.c index 1fd2cf097e30..eb1fa5c84723 100644 --- a/arch/arm/mach-footbridge/netwinder-hw.c +++ b/arch/arm/mach-footbridge/netwinder-hw.c @@ -692,14 +692,14 @@ static void netwinder_led_set(struct led_classdev *cdev, unsigned long flags; u32 reg; - spin_lock_irqsave(&nw_gpio_lock, flags); + raw_spin_lock_irqsave(&nw_gpio_lock, flags); reg = nw_gpio_read(); if (b != LED_OFF) reg &= ~led->mask; else reg |= led->mask; nw_gpio_modify_op(led->mask, reg); - spin_unlock_irqrestore(&nw_gpio_lock, flags); + raw_spin_unlock_irqrestore(&nw_gpio_lock, flags); } static enum led_brightness netwinder_led_get(struct led_classdev *cdev) @@ -709,9 +709,9 @@ static enum led_brightness netwinder_led_get(struct led_classdev *cdev) unsigned long flags; u32 reg; - spin_lock_irqsave(&nw_gpio_lock, flags); + raw_spin_lock_irqsave(&nw_gpio_lock, flags); reg = nw_gpio_read(); - spin_unlock_irqrestore(&nw_gpio_lock, flags); + raw_spin_unlock_irqrestore(&nw_gpio_lock, flags); return (reg & led->mask) ? LED_OFF : LED_FULL; } diff --git a/arch/arm/mach-highbank/Kconfig b/arch/arm/mach-highbank/Kconfig index 8e8437dea3ce..0ca7377746c8 100644 --- a/arch/arm/mach-highbank/Kconfig +++ b/arch/arm/mach-highbank/Kconfig @@ -4,11 +4,12 @@ config ARCH_HIGHBANK select ARCH_HAS_CPUFREQ select ARCH_HAS_HOLES_MEMORYMODEL select ARCH_HAS_OPP + select ARCH_SUPPORTS_BIG_ENDIAN select ARCH_WANT_OPTIONAL_GPIOLIB select ARM_AMBA select ARM_ERRATA_764369 select ARM_ERRATA_775420 - select ARM_ERRATA_798181 + select ARM_ERRATA_798181 if SMP select ARM_GIC select ARM_TIMER_SP804 select CACHE_L2X0 diff --git a/arch/arm/mach-imx/clk-fixup-mux.c b/arch/arm/mach-imx/clk-fixup-mux.c index deb4b8093b30..0d40b35c557c 100644 --- a/arch/arm/mach-imx/clk-fixup-mux.c +++ b/arch/arm/mach-imx/clk-fixup-mux.c @@ -90,6 +90,7 @@ struct clk *imx_clk_fixup_mux(const char *name, void __iomem *reg, init.ops = &clk_fixup_mux_ops; init.parent_names = parents; init.num_parents = num_parents; + init.flags = 0; fixup_mux->mux.reg = reg; fixup_mux->mux.shift = shift; diff --git a/arch/arm/mach-imx/clk-imx27.c b/arch/arm/mach-imx/clk-imx27.c index c3cfa4116dc0..c6b40f386786 100644 --- a/arch/arm/mach-imx/clk-imx27.c +++ b/arch/arm/mach-imx/clk-imx27.c @@ -285,7 +285,7 @@ int __init mx27_clocks_init(unsigned long fref) clk_register_clkdev(clk[ata_ahb_gate], "ata", NULL); clk_register_clkdev(clk[rtc_ipg_gate], NULL, "imx21-rtc"); clk_register_clkdev(clk[scc_ipg_gate], "scc", NULL); - clk_register_clkdev(clk[cpu_div], NULL, "cpufreq-cpu0.0"); + clk_register_clkdev(clk[cpu_div], NULL, "cpu0"); clk_register_clkdev(clk[emi_ahb_gate], "emi_ahb" , NULL); mxc_timer_init(MX27_IO_ADDRESS(MX27_GPT1_BASE_ADDR), MX27_INT_GPT1); diff --git a/arch/arm/mach-imx/clk-imx51-imx53.c b/arch/arm/mach-imx/clk-imx51-imx53.c index 1a56a3319997..7c0dc4540aa4 100644 --- a/arch/arm/mach-imx/clk-imx51-imx53.c +++ b/arch/arm/mach-imx/clk-imx51-imx53.c @@ -328,7 +328,7 @@ static void __init mx5_clocks_common_init(unsigned long rate_ckil, clk_register_clkdev(clk[ssi2_ipg_gate], NULL, "imx-ssi.1"); clk_register_clkdev(clk[ssi3_ipg_gate], NULL, "imx-ssi.2"); clk_register_clkdev(clk[sdma_gate], NULL, "imx35-sdma"); - clk_register_clkdev(clk[cpu_podf], NULL, "cpufreq-cpu0.0"); + clk_register_clkdev(clk[cpu_podf], NULL, "cpu0"); clk_register_clkdev(clk[iim_gate], "iim", NULL); clk_register_clkdev(clk[dummy], NULL, "imx2-wdt.0"); clk_register_clkdev(clk[dummy], NULL, "imx2-wdt.1"); @@ -397,7 +397,7 @@ int __init mx51_clocks_init(unsigned long rate_ckil, unsigned long rate_osc, mx51_spdif_xtal_sel, ARRAY_SIZE(mx51_spdif_xtal_sel)); clk[spdif1_sel] = imx_clk_mux("spdif1_sel", MXC_CCM_CSCMR2, 2, 2, spdif_sel, ARRAY_SIZE(spdif_sel)); - clk[spdif1_pred] = imx_clk_divider("spdif1_podf", "spdif1_sel", MXC_CCM_CDCDR, 16, 3); + clk[spdif1_pred] = imx_clk_divider("spdif1_pred", "spdif1_sel", MXC_CCM_CDCDR, 16, 3); clk[spdif1_podf] = imx_clk_divider("spdif1_podf", "spdif1_pred", MXC_CCM_CDCDR, 9, 6); clk[spdif1_com_sel] = imx_clk_mux("spdif1_com_sel", MXC_CCM_CSCMR2, 5, 1, mx51_spdif1_com_sel, ARRAY_SIZE(mx51_spdif1_com_sel)); diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c index 85a1b51346c8..90372a21087f 100644 --- a/arch/arm/mach-imx/mach-imx6q.c +++ b/arch/arm/mach-imx/mach-imx6q.c @@ -233,10 +233,15 @@ put_node: of_node_put(np); } -static void __init imx6q_opp_init(struct device *cpu_dev) +static void __init imx6q_opp_init(void) { struct device_node *np; + struct device *cpu_dev = get_cpu_device(0); + if (!cpu_dev) { + pr_warn("failed to get cpu0 device\n"); + return; + } np = of_node_get(cpu_dev->of_node); if (!np) { pr_warn("failed to find cpu0 node\n"); @@ -268,7 +273,7 @@ static void __init imx6q_init_late(void) imx6q_cpuidle_init(); if (IS_ENABLED(CONFIG_ARM_IMX6Q_CPUFREQ)) { - imx6q_opp_init(&imx6q_cpufreq_pdev.dev); + imx6q_opp_init(); platform_device_register(&imx6q_cpufreq_pdev); } } diff --git a/arch/arm/mach-imx/system.c b/arch/arm/mach-imx/system.c index 64ff37ea72b1..80c177c36c5f 100644 --- a/arch/arm/mach-imx/system.c +++ b/arch/arm/mach-imx/system.c @@ -117,6 +117,17 @@ void __init imx_init_l2cache(void) /* Configure the L2 PREFETCH and POWER registers */ val = readl_relaxed(l2x0_base + L2X0_PREFETCH_CTRL); val |= 0x70800000; + /* + * The L2 cache controller(PL310) version on the i.MX6D/Q is r3p1-50rel0 + * The L2 cache controller(PL310) version on the i.MX6DL/SOLO/SL is r3p2 + * But according to ARM PL310 errata: 752271 + * ID: 752271: Double linefill feature can cause data corruption + * Fault Status: Present in: r3p0, r3p1, r3p1-50rel0. Fixed in r3p2 + * Workaround: The only workaround to this erratum is to disable the + * double linefill feature. This is the default behavior. + */ + if (cpu_is_imx6q()) + val &= ~(1 << 30 | 1 << 23); writel_relaxed(val, l2x0_base + L2X0_PREFETCH_CTRL); val = L2X0_DYNAMIC_CLK_GATING_EN | L2X0_STNDBY_MODE_EN; writel_relaxed(val, l2x0_base + L2X0_POWER_CTRL); diff --git a/arch/arm/mach-integrator/pci_v3.h b/arch/arm/mach-integrator/pci_v3.h index 755fd29fed4a..06a9e2e7d007 100644 --- a/arch/arm/mach-integrator/pci_v3.h +++ b/arch/arm/mach-integrator/pci_v3.h @@ -1,2 +1,9 @@ /* Simple oneliner include to the PCIv3 early init */ +#ifdef CONFIG_PCI extern int pci_v3_early_init(void); +#else +static inline int pci_v3_early_init(void) +{ + return 0; +} +#endif diff --git a/arch/arm/mach-ixp4xx/Kconfig b/arch/arm/mach-ixp4xx/Kconfig index 30e1ebe3a891..c342dc4e8a45 100644 --- a/arch/arm/mach-ixp4xx/Kconfig +++ b/arch/arm/mach-ixp4xx/Kconfig @@ -1,9 +1,5 @@ if ARCH_IXP4XX -config ARCH_SUPPORTS_BIG_ENDIAN - bool - default y - menu "Intel IXP4xx Implementation Options" comment "IXP4xx Platforms" diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig index 9eb63d724602..5e269d7263ce 100644 --- a/arch/arm/mach-mvebu/Kconfig +++ b/arch/arm/mach-mvebu/Kconfig @@ -1,5 +1,6 @@ config ARCH_MVEBU bool "Marvell SOCs with Device Tree support" if ARCH_MULTI_V7 + select ARCH_SUPPORTS_BIG_ENDIAN select CLKSRC_MMIO select COMMON_CLK select GENERIC_CLOCKEVENTS diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c index 4c24303ec481..58adf2fd9cfc 100644 --- a/arch/arm/mach-mvebu/coherency.c +++ b/arch/arm/mach-mvebu/coherency.c @@ -140,6 +140,7 @@ int __init coherency_init(void) coherency_base = of_iomap(np, 0); coherency_cpu_base = of_iomap(np, 1); set_cpu_coherent(cpu_logical_map(smp_processor_id()), 0); + of_node_put(np); } return 0; @@ -147,9 +148,14 @@ int __init coherency_init(void) static int __init coherency_late_init(void) { - if (of_find_matching_node(NULL, of_coherency_table)) + struct device_node *np; + + np = of_find_matching_node(NULL, of_coherency_table); + if (np) { bus_register_notifier(&platform_bus_type, &mvebu_hwcc_platform_nb); + of_node_put(np); + } return 0; } diff --git a/arch/arm/mach-mvebu/coherency_ll.S b/arch/arm/mach-mvebu/coherency_ll.S index 5476669ba905..ee7598fe75db 100644 --- a/arch/arm/mach-mvebu/coherency_ll.S +++ b/arch/arm/mach-mvebu/coherency_ll.S @@ -20,6 +20,8 @@ #define ARMADA_XP_CFB_CTL_REG_OFFSET 0x0 #define ARMADA_XP_CFB_CFG_REG_OFFSET 0x4 +#include <asm/assembler.h> + .text /* * r0: Coherency fabric base register address @@ -29,6 +31,7 @@ ENTRY(ll_set_cpu_coherent) /* Create bit by cpu index */ mov r3, #(1 << 24) lsl r1, r3, r1 +ARM_BE8(rev r1, r1) /* Add CPU to SMP group - Atomic */ add r3, r0, #ARMADA_XP_CFB_CTL_REG_OFFSET diff --git a/arch/arm/mach-mvebu/headsmp.S b/arch/arm/mach-mvebu/headsmp.S index 8a1b0c96e9ec..3dd80df428f7 100644 --- a/arch/arm/mach-mvebu/headsmp.S +++ b/arch/arm/mach-mvebu/headsmp.S @@ -21,12 +21,16 @@ #include <linux/linkage.h> #include <linux/init.h> +#include <asm/assembler.h> + /* * Armada XP specific entry point for secondary CPUs. * We add the CPU to the coherency fabric and then jump to secondary * startup */ ENTRY(armada_xp_secondary_startup) + ARM_BE8(setend be ) @ go BE8 if entered LE + /* Get coherency fabric base physical address */ adr r0, 1f ldr r1, [r0] diff --git a/arch/arm/mach-mvebu/pmsu.c b/arch/arm/mach-mvebu/pmsu.c index 3cc4bef6401c..27fc4f049474 100644 --- a/arch/arm/mach-mvebu/pmsu.c +++ b/arch/arm/mach-mvebu/pmsu.c @@ -67,6 +67,7 @@ int __init armada_370_xp_pmsu_init(void) pr_info("Initializing Power Management Service Unit\n"); pmsu_mp_base = of_iomap(np, 0); pmsu_reset_base = of_iomap(np, 1); + of_node_put(np); } return 0; diff --git a/arch/arm/mach-mvebu/system-controller.c b/arch/arm/mach-mvebu/system-controller.c index f875124ff4f9..5175083cdb34 100644 --- a/arch/arm/mach-mvebu/system-controller.c +++ b/arch/arm/mach-mvebu/system-controller.c @@ -98,6 +98,7 @@ static int __init mvebu_system_controller_init(void) BUG_ON(!match); system_controller_base = of_iomap(np, 0); mvebu_sc = (struct mvebu_system_controller *)match->data; + of_node_put(np); } return 0; diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c index 39c78387ddec..87162e1b94a5 100644 --- a/arch/arm/mach-omap2/board-generic.c +++ b/arch/arm/mach-omap2/board-generic.c @@ -129,6 +129,24 @@ DT_MACHINE_START(OMAP3_DT, "Generic OMAP3 (Flattened Device Tree)") .restart = omap3xxx_restart, MACHINE_END +static const char *omap36xx_boards_compat[] __initdata = { + "ti,omap36xx", + NULL, +}; + +DT_MACHINE_START(OMAP36XX_DT, "Generic OMAP36xx (Flattened Device Tree)") + .reserve = omap_reserve, + .map_io = omap3_map_io, + .init_early = omap3630_init_early, + .init_irq = omap_intc_of_init, + .handle_irq = omap3_intc_handle_irq, + .init_machine = omap_generic_init, + .init_late = omap3_init_late, + .init_time = omap3_sync32k_timer_init, + .dt_compat = omap36xx_boards_compat, + .restart = omap3xxx_restart, +MACHINE_END + static const char *omap3_gp_boards_compat[] __initdata = { "ti,omap3-beagle", "timll,omap3-devkit8000", diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c index c3270c0f1fce..f6fe388af989 100644 --- a/arch/arm/mach-omap2/board-rx51-peripherals.c +++ b/arch/arm/mach-omap2/board-rx51-peripherals.c @@ -167,38 +167,47 @@ static struct lp55xx_led_config rx51_lp5523_led_config[] = { .name = "lp5523:kb1", .chan_nr = 0, .led_current = 50, + .max_current = 100, }, { .name = "lp5523:kb2", .chan_nr = 1, .led_current = 50, + .max_current = 100, }, { .name = "lp5523:kb3", .chan_nr = 2, .led_current = 50, + .max_current = 100, }, { .name = "lp5523:kb4", .chan_nr = 3, .led_current = 50, + .max_current = 100, }, { .name = "lp5523:b", .chan_nr = 4, .led_current = 50, + .max_current = 100, }, { .name = "lp5523:g", .chan_nr = 5, .led_current = 50, + .max_current = 100, }, { .name = "lp5523:r", .chan_nr = 6, .led_current = 50, + .max_current = 100, }, { .name = "lp5523:kb5", .chan_nr = 7, .led_current = 50, + .max_current = 100, }, { .name = "lp5523:kb6", .chan_nr = 8, .led_current = 50, + .max_current = 100, } }; diff --git a/arch/arm/mach-omap2/cclock44xx_data.c b/arch/arm/mach-omap2/cclock44xx_data.c index 1d5b5290d2af..b237950eb8a3 100644 --- a/arch/arm/mach-omap2/cclock44xx_data.c +++ b/arch/arm/mach-omap2/cclock44xx_data.c @@ -1632,7 +1632,7 @@ static struct omap_clk omap44xx_clks[] = { CLK(NULL, "auxclk5_src_ck", &auxclk5_src_ck), CLK(NULL, "auxclk5_ck", &auxclk5_ck), CLK(NULL, "auxclkreq5_ck", &auxclkreq5_ck), - CLK("omap-gpmc", "fck", &dummy_ck), + CLK("50000000.gpmc", "fck", &dummy_ck), CLK("omap_i2c.1", "ick", &dummy_ck), CLK("omap_i2c.2", "ick", &dummy_ck), CLK("omap_i2c.3", "ick", &dummy_ck), diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c index c443f2e97e10..4c8982ae9529 100644 --- a/arch/arm/mach-omap2/cpuidle44xx.c +++ b/arch/arm/mach-omap2/cpuidle44xx.c @@ -143,7 +143,7 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, * Call idle CPU cluster PM exit notifier chain * to restore GIC and wakeupgen context. */ - if ((cx->mpu_state == PWRDM_POWER_RET) && + if (dev->cpu == 0 && (cx->mpu_state == PWRDM_POWER_RET) && (cx->mpu_logic_state == PWRDM_POWER_OFF)) cpu_cluster_pm_exit(); diff --git a/arch/arm/mach-omap2/gpmc-onenand.c b/arch/arm/mach-omap2/gpmc-onenand.c index 64b5a8346982..8b6876c98ce1 100644 --- a/arch/arm/mach-omap2/gpmc-onenand.c +++ b/arch/arm/mach-omap2/gpmc-onenand.c @@ -272,9 +272,19 @@ static int omap2_onenand_setup_async(void __iomem *onenand_base) struct gpmc_timings t; int ret; - if (gpmc_onenand_data->of_node) + if (gpmc_onenand_data->of_node) { gpmc_read_settings_dt(gpmc_onenand_data->of_node, &onenand_async); + if (onenand_async.sync_read || onenand_async.sync_write) { + if (onenand_async.sync_write) + gpmc_onenand_data->flags |= + ONENAND_SYNC_READWRITE; + else + gpmc_onenand_data->flags |= ONENAND_SYNC_READ; + onenand_async.sync_read = false; + onenand_async.sync_write = false; + } + } omap2_onenand_set_async_mode(onenand_base); diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c index 9f4795aff48a..579697adaae7 100644 --- a/arch/arm/mach-omap2/gpmc.c +++ b/arch/arm/mach-omap2/gpmc.c @@ -1491,8 +1491,8 @@ static int gpmc_probe_generic_child(struct platform_device *pdev, */ ret = gpmc_cs_remap(cs, res.start); if (ret < 0) { - dev_err(&pdev->dev, "cannot remap GPMC CS %d to 0x%x\n", - cs, res.start); + dev_err(&pdev->dev, "cannot remap GPMC CS %d to %pa\n", + cs, &res.start); goto err; } diff --git a/arch/arm/mach-omap2/mux.h b/arch/arm/mach-omap2/mux.h index 5d2080ef7923..16f78a990d04 100644 --- a/arch/arm/mach-omap2/mux.h +++ b/arch/arm/mach-omap2/mux.h @@ -28,7 +28,7 @@ #define OMAP_PULL_UP (1 << 4) #define OMAP_ALTELECTRICALSEL (1 << 5) -/* 34xx specific mux bit defines */ +/* omap3/4/5 specific mux bit defines */ #define OMAP_INPUT_EN (1 << 8) #define OMAP_OFF_EN (1 << 9) #define OMAP_OFFOUT_EN (1 << 10) @@ -36,8 +36,6 @@ #define OMAP_OFF_PULL_EN (1 << 12) #define OMAP_OFF_PULL_UP (1 << 13) #define OMAP_WAKEUP_EN (1 << 14) - -/* 44xx specific mux bit defines */ #define OMAP_WAKEUP_EVENT (1 << 15) /* Active pin states */ diff --git a/arch/arm/mach-omap2/mux34xx.c b/arch/arm/mach-omap2/mux34xx.c index c53609f46294..be271f1d585b 100644 --- a/arch/arm/mach-omap2/mux34xx.c +++ b/arch/arm/mach-omap2/mux34xx.c @@ -620,7 +620,7 @@ static struct omap_mux __initdata omap3_muxmodes[] = { "uart1_rts", "ssi1_flag_tx", NULL, NULL, "gpio_149", NULL, NULL, "safe_mode"), _OMAP3_MUXENTRY(UART1_RX, 151, - "uart1_rx", "ss1_wake_tx", "mcbsp1_clkr", "mcspi4_clk", + "uart1_rx", "ssi1_wake_tx", "mcbsp1_clkr", "mcspi4_clk", "gpio_151", NULL, NULL, "safe_mode"), _OMAP3_MUXENTRY(UART1_TX, 148, "uart1_tx", "ssi1_dat_tx", NULL, NULL, diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c index 8708b2a9da45..891211093295 100644 --- a/arch/arm/mach-omap2/omap-smp.c +++ b/arch/arm/mach-omap2/omap-smp.c @@ -1,5 +1,5 @@ /* - * OMAP4 SMP source file. It contains platform specific fucntions + * OMAP4 SMP source file. It contains platform specific functions * needed for the linux smp kernel. * * Copyright (C) 2009 Texas Instruments, Inc. diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c index f99f68e1e85b..b69dd9abb50a 100644 --- a/arch/arm/mach-omap2/omap_device.c +++ b/arch/arm/mach-omap2/omap_device.c @@ -158,7 +158,7 @@ static int omap_device_build_from_dt(struct platform_device *pdev) } od = omap_device_alloc(pdev, hwmods, oh_cnt); - if (!od) { + if (IS_ERR(od)) { dev_err(&pdev->dev, "Cannot allocate omap_device for :%s\n", oh_name); ret = PTR_ERR(od); diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c index fa74a0625da1..ead48fa5715e 100644 --- a/arch/arm/mach-omap2/timer.c +++ b/arch/arm/mach-omap2/timer.c @@ -628,7 +628,7 @@ void __init omap4_local_timer_init(void) #endif /* CONFIG_HAVE_ARM_TWD */ #endif /* CONFIG_ARCH_OMAP4 */ -#ifdef CONFIG_SOC_OMAP5 +#if defined(CONFIG_SOC_OMAP5) || defined(CONFIG_SOC_DRA7XX) void __init omap5_realtime_timer_init(void) { omap4_sync32k_timer_init(); @@ -636,7 +636,7 @@ void __init omap5_realtime_timer_init(void) clocksource_of_init(); } -#endif /* CONFIG_SOC_OMAP5 */ +#endif /* CONFIG_SOC_OMAP5 || CONFIG_SOC_DRA7XX */ /** * omap_timer_init - build and register timer device with an diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c index e838ba27e443..c9808c684152 100644 --- a/arch/arm/mach-sa1100/assabet.c +++ b/arch/arm/mach-sa1100/assabet.c @@ -512,6 +512,9 @@ static void __init assabet_map_io(void) * Its called GPCLKR0 in my SA1110 manual. */ Ser1SDCR0 |= SDCR0_SUS; + MSC1 = (MSC1 & ~0xffff) | + MSC_NonBrst | MSC_32BitStMem | + MSC_RdAcc(2) | MSC_WrAcc(2) | MSC_Rec(0); if (!machine_has_neponset()) sa1100_register_uart_fns(&assabet_port_fns); diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c index 612a45689770..7fb96ebdc0fb 100644 --- a/arch/arm/mach-sa1100/collie.c +++ b/arch/arm/mach-sa1100/collie.c @@ -289,7 +289,7 @@ static void collie_flash_exit(void) } static struct flash_platform_data collie_flash_data = { - .map_name = "cfi_probe", + .map_name = "jedec_probe", .init = collie_flash_init, .set_vpp = collie_set_vpp, .exit = collie_flash_exit, diff --git a/arch/arm/mach-sa1100/include/mach/gpio.h b/arch/arm/mach-sa1100/include/mach/gpio.h deleted file mode 100644 index 6a9eecf3137e..000000000000 --- a/arch/arm/mach-sa1100/include/mach/gpio.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * arch/arm/mach-sa1100/include/mach/gpio.h - * - * SA1100 GPIO wrappers for arch-neutral GPIO calls - * - * Written by Philipp Zabel <philipp.zabel@gmail.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#ifndef __ASM_ARCH_SA1100_GPIO_H -#define __ASM_ARCH_SA1100_GPIO_H - -#include <linux/io.h> -#include <mach/hardware.h> -#include <asm/irq.h> -#include <asm-generic/gpio.h> - -#define __ARM_GPIOLIB_COMPLEX - -static inline int gpio_get_value(unsigned gpio) -{ - if (__builtin_constant_p(gpio) && (gpio <= GPIO_MAX)) - return GPLR & GPIO_GPIO(gpio); - else - return __gpio_get_value(gpio); -} - -static inline void gpio_set_value(unsigned gpio, int value) -{ - if (__builtin_constant_p(gpio) && (gpio <= GPIO_MAX)) - if (value) - GPSR = GPIO_GPIO(gpio); - else - GPCR = GPIO_GPIO(gpio); - else - __gpio_set_value(gpio, value); -} - -#define gpio_cansleep __gpio_cansleep - -#endif diff --git a/arch/arm/mach-sa1100/include/mach/h3xxx.h b/arch/arm/mach-sa1100/include/mach/h3xxx.h index 7d9df16f04a2..c810620db53d 100644 --- a/arch/arm/mach-sa1100/include/mach/h3xxx.h +++ b/arch/arm/mach-sa1100/include/mach/h3xxx.h @@ -13,6 +13,8 @@ #ifndef _INCLUDE_H3XXX_H_ #define _INCLUDE_H3XXX_H_ +#include "hardware.h" /* Gives GPIO_MAX */ + /* Physical memory regions corresponding to chip selects */ #define H3600_EGPIO_PHYS (SA1100_CS5_PHYS + 0x01000000) #define H3600_BANK_2_PHYS SA1100_CS2_PHYS diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c index bcbc94540e45..41e476e571d7 100644 --- a/arch/arm/mach-sa1100/simpad.c +++ b/arch/arm/mach-sa1100/simpad.c @@ -19,6 +19,7 @@ #include <mach/hardware.h> #include <asm/setup.h> +#include <asm/irq.h> #include <asm/mach-types.h> #include <asm/mach/arch.h> diff --git a/arch/arm/mach-shmobile/board-armadillo800eva.c b/arch/arm/mach-shmobile/board-armadillo800eva.c index 5bd1479d3deb..7f8f6076d360 100644 --- a/arch/arm/mach-shmobile/board-armadillo800eva.c +++ b/arch/arm/mach-shmobile/board-armadillo800eva.c @@ -1108,9 +1108,9 @@ static const struct pinctrl_map eva_pinctrl_map[] = { PIN_MAP_MUX_GROUP_DEFAULT("asoc-simple-card.1", "pfc-r8a7740", "fsib_mclk_in", "fsib"), /* GETHER */ - PIN_MAP_MUX_GROUP_DEFAULT("sh-eth", "pfc-r8a7740", + PIN_MAP_MUX_GROUP_DEFAULT("r8a7740-gether", "pfc-r8a7740", "gether_mii", "gether"), - PIN_MAP_MUX_GROUP_DEFAULT("sh-eth", "pfc-r8a7740", + PIN_MAP_MUX_GROUP_DEFAULT("r8a7740-gether", "pfc-r8a7740", "gether_int", "gether"), /* HDMI */ PIN_MAP_MUX_GROUP_DEFAULT("sh-mobile-hdmi", "pfc-r8a7740", diff --git a/arch/arm/mach-shmobile/board-lager.c b/arch/arm/mach-shmobile/board-lager.c index ffb6f0ac7606..5930af8d434f 100644 --- a/arch/arm/mach-shmobile/board-lager.c +++ b/arch/arm/mach-shmobile/board-lager.c @@ -29,6 +29,7 @@ #include <linux/pinctrl/machine.h> #include <linux/platform_data/gpio-rcar.h> #include <linux/platform_device.h> +#include <linux/phy.h> #include <linux/regulator/fixed.h> #include <linux/regulator/machine.h> #include <linux/sh_eth.h> @@ -155,6 +156,30 @@ static void __init lager_add_standard_devices(void) ðer_pdata, sizeof(ether_pdata)); } +/* + * Ether LEDs on the Lager board are named LINK and ACTIVE which corresponds + * to non-default 01 setting of the Micrel KSZ8041 PHY control register 1 bits + * 14-15. We have to set them back to 01 from the default 00 value each time + * the PHY is reset. It's also important because the PHY's LED0 signal is + * connected to SoC's ETH_LINK signal and in the PHY's default mode it will + * bounce on and off after each packet, which we apparently want to avoid. + */ +static int lager_ksz8041_fixup(struct phy_device *phydev) +{ + u16 phyctrl1 = phy_read(phydev, 0x1e); + + phyctrl1 &= ~0xc000; + phyctrl1 |= 0x4000; + return phy_write(phydev, 0x1e, phyctrl1); +} + +static void __init lager_init(void) +{ + lager_add_standard_devices(); + + phy_register_fixup_for_id("r8a7790-ether-ff:01", lager_ksz8041_fixup); +} + static const char *lager_boards_compat_dt[] __initdata = { "renesas,lager", NULL, @@ -163,6 +188,6 @@ static const char *lager_boards_compat_dt[] __initdata = { DT_MACHINE_START(LAGER_DT, "lager") .init_early = r8a7790_init_delay, .init_time = r8a7790_timer_init, - .init_machine = lager_add_standard_devices, + .init_machine = lager_init, .dt_compat = lager_boards_compat_dt, MACHINE_END diff --git a/arch/arm/mach-shmobile/clock-r8a73a4.c b/arch/arm/mach-shmobile/clock-r8a73a4.c index 8ea5ef6c79cc..5bd2e851e3c7 100644 --- a/arch/arm/mach-shmobile/clock-r8a73a4.c +++ b/arch/arm/mach-shmobile/clock-r8a73a4.c @@ -555,7 +555,7 @@ static struct clk_lookup lookups[] = { CLKDEV_CON_ID("pll2h", &pll2h_clk), /* CPU clock */ - CLKDEV_DEV_ID("cpufreq-cpu0", &z_clk), + CLKDEV_DEV_ID("cpu0", &z_clk), /* DIV6 */ CLKDEV_CON_ID("zb", &div6_clks[DIV6_ZB]), diff --git a/arch/arm/mach-shmobile/clock-sh73a0.c b/arch/arm/mach-shmobile/clock-sh73a0.c index 1942eaef5181..c92c023f0d27 100644 --- a/arch/arm/mach-shmobile/clock-sh73a0.c +++ b/arch/arm/mach-shmobile/clock-sh73a0.c @@ -616,7 +616,7 @@ static struct clk_lookup lookups[] = { CLKDEV_DEV_ID("smp_twd", &twd_clk), /* smp_twd */ /* DIV4 clocks */ - CLKDEV_DEV_ID("cpufreq-cpu0", &div4_clks[DIV4_Z]), + CLKDEV_DEV_ID("cpu0", &div4_clks[DIV4_Z]), /* DIV6 clocks */ CLKDEV_CON_ID("vck1_clk", &div6_clks[DIV6_VCK1]), diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index 67a76f2dfb9f..f26428d8b62a 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig @@ -54,7 +54,7 @@ config ARCH_TEGRA_3x_SOC config ARCH_TEGRA_114_SOC bool "Enable support for Tegra114 family" select HAVE_ARM_ARCH_TIMER - select ARM_ERRATA_798181 + select ARM_ERRATA_798181 if SMP select ARM_L1_CACHE_SHIFT_6 select PINCTRL_TEGRA114 help diff --git a/arch/arm/mach-u300/Kconfig b/arch/arm/mach-u300/Kconfig index a85adcd00882..a1659863bfd5 100644 --- a/arch/arm/mach-u300/Kconfig +++ b/arch/arm/mach-u300/Kconfig @@ -1,7 +1,3 @@ -menu "ST-Ericsson AB U300/U335 Platform" - -comment "ST-Ericsson Mobile Platform Products" - config ARCH_U300 bool "ST-Ericsson U300 Series" if ARCH_MULTI_V5 depends on MMU @@ -25,7 +21,9 @@ config ARCH_U300 help Support for ST-Ericsson U300 series mobile platforms. -comment "ST-Ericsson U300/U335 Feature Selections" +if ARCH_U300 + +menu "ST-Ericsson AB U300/U335 Platform" config MACH_U300 depends on ARCH_U300 @@ -53,3 +51,5 @@ config MACH_U300_SPIDUMMY SPI framework and ARM PL022 support. endmenu + +endif diff --git a/arch/arm/mach-ux500/cache-l2x0.c b/arch/arm/mach-ux500/cache-l2x0.c index 82ccf1d98735..264f894c0e3d 100644 --- a/arch/arm/mach-ux500/cache-l2x0.c +++ b/arch/arm/mach-ux500/cache-l2x0.c @@ -69,6 +69,7 @@ static int __init ux500_l2x0_init(void) * some SMI service available. */ outer_cache.disable = NULL; + outer_cache.set_debug = NULL; return 0; } diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig index 365795447804..4fe8ebe5b2d4 100644 --- a/arch/arm/mach-vexpress/Kconfig +++ b/arch/arm/mach-vexpress/Kconfig @@ -1,6 +1,7 @@ config ARCH_VEXPRESS bool "ARM Ltd. Versatile Express family" if ARCH_MULTI_V7 select ARCH_REQUIRE_GPIOLIB + select ARCH_SUPPORTS_BIG_ENDIAN select ARM_AMBA select ARM_GIC select ARM_TIMER_SP804 diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c index 3a6384c6c435..14d499688736 100644 --- a/arch/arm/mach-vexpress/dcscb.c +++ b/arch/arm/mach-vexpress/dcscb.c @@ -133,38 +133,8 @@ static void dcscb_power_down(void) if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) { arch_spin_unlock(&dcscb_lock); - /* - * Flush all cache levels for this cluster. - * - * To do so we do: - * - Clear the SCTLR.C bit to prevent further cache allocations - * - Flush the whole cache - * - Clear the ACTLR "SMP" bit to disable local coherency - * - * Let's do it in the safest possible way i.e. with - * no memory access within the following sequence - * including to the stack. - * - * Note: fp is preserved to the stack explicitly prior doing - * this since adding it to the clobber list is incompatible - * with having CONFIG_FRAME_POINTER=y. - */ - asm volatile( - "str fp, [sp, #-4]! \n\t" - "mrc p15, 0, r0, c1, c0, 0 @ get CR \n\t" - "bic r0, r0, #"__stringify(CR_C)" \n\t" - "mcr p15, 0, r0, c1, c0, 0 @ set CR \n\t" - "isb \n\t" - "bl v7_flush_dcache_all \n\t" - "clrex \n\t" - "mrc p15, 0, r0, c1, c0, 1 @ get AUXCR \n\t" - "bic r0, r0, #(1 << 6) @ disable local coherency \n\t" - "mcr p15, 0, r0, c1, c0, 1 @ set AUXCR \n\t" - "isb \n\t" - "dsb \n\t" - "ldr fp, [sp], #4" - : : : "r0","r1","r2","r3","r4","r5","r6","r7", - "r9","r10","lr","memory"); + /* Flush all cache levels for this cluster. */ + v7_exit_coherency_flush(all); /* * This is a harmless no-op. On platforms with a real @@ -183,26 +153,8 @@ static void dcscb_power_down(void) } else { arch_spin_unlock(&dcscb_lock); - /* - * Flush the local CPU cache. - * Let's do it in the safest possible way as above. - */ - asm volatile( - "str fp, [sp, #-4]! \n\t" - "mrc p15, 0, r0, c1, c0, 0 @ get CR \n\t" - "bic r0, r0, #"__stringify(CR_C)" \n\t" - "mcr p15, 0, r0, c1, c0, 0 @ set CR \n\t" - "isb \n\t" - "bl v7_flush_dcache_louis \n\t" - "clrex \n\t" - "mrc p15, 0, r0, c1, c0, 1 @ get AUXCR \n\t" - "bic r0, r0, #(1 << 6) @ disable local coherency \n\t" - "mcr p15, 0, r0, c1, c0, 1 @ set AUXCR \n\t" - "isb \n\t" - "dsb \n\t" - "ldr fp, [sp], #4" - : : : "r0","r1","r2","r3","r4","r5","r6","r7", - "r9","r10","lr","memory"); + /* Disable and flush the local CPU cache. */ + v7_exit_coherency_flush(louis); } __mcpm_cpu_down(cpu, cluster); diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c index 7aeb5d60e484..4eb92ebfd953 100644 --- a/arch/arm/mach-vexpress/tc2_pm.c +++ b/arch/arm/mach-vexpress/tc2_pm.c @@ -131,6 +131,16 @@ static void tc2_pm_down(u64 residency) } else BUG(); + /* + * If the CPU is committed to power down, make sure + * the power controller will be in charge of waking it + * up upon IRQ, ie IRQ lines are cut from GIC CPU IF + * to the CPU by disabling the GIC CPU IF to prevent wfi + * from completing execution behind power controller back + */ + if (!skip_wfi) + gic_cpu_if_down(); + if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) { arch_spin_unlock(&tc2_pm_lock); @@ -146,32 +156,7 @@ static void tc2_pm_down(u64 residency) : : "r" (0x400) ); } - /* - * We need to disable and flush the whole (L1 and L2) cache. - * Let's do it in the safest possible way i.e. with - * no memory access within the following sequence - * including the stack. - * - * Note: fp is preserved to the stack explicitly prior doing - * this since adding it to the clobber list is incompatible - * with having CONFIG_FRAME_POINTER=y. - */ - asm volatile( - "str fp, [sp, #-4]! \n\t" - "mrc p15, 0, r0, c1, c0, 0 @ get CR \n\t" - "bic r0, r0, #"__stringify(CR_C)" \n\t" - "mcr p15, 0, r0, c1, c0, 0 @ set CR \n\t" - "isb \n\t" - "bl v7_flush_dcache_all \n\t" - "clrex \n\t" - "mrc p15, 0, r0, c1, c0, 1 @ get AUXCR \n\t" - "bic r0, r0, #(1 << 6) @ disable local coherency \n\t" - "mcr p15, 0, r0, c1, c0, 1 @ set AUXCR \n\t" - "isb \n\t" - "dsb \n\t" - "ldr fp, [sp], #4" - : : : "r0","r1","r2","r3","r4","r5","r6","r7", - "r9","r10","lr","memory"); + v7_exit_coherency_flush(all); cci_disable_port_by_cpu(mpidr); @@ -187,26 +172,7 @@ static void tc2_pm_down(u64 residency) arch_spin_unlock(&tc2_pm_lock); - /* - * We need to disable and flush only the L1 cache. - * Let's do it in the safest possible way as above. - */ - asm volatile( - "str fp, [sp, #-4]! \n\t" - "mrc p15, 0, r0, c1, c0, 0 @ get CR \n\t" - "bic r0, r0, #"__stringify(CR_C)" \n\t" - "mcr p15, 0, r0, c1, c0, 0 @ set CR \n\t" - "isb \n\t" - "bl v7_flush_dcache_louis \n\t" - "clrex \n\t" - "mrc p15, 0, r0, c1, c0, 1 @ get AUXCR \n\t" - "bic r0, r0, #(1 << 6) @ disable local coherency \n\t" - "mcr p15, 0, r0, c1, c0, 1 @ set AUXCR \n\t" - "isb \n\t" - "dsb \n\t" - "ldr fp, [sp], #4" - : : : "r0","r1","r2","r3","r4","r5","r6","r7", - "r9","r10","lr","memory"); + v7_exit_coherency_flush(louis); } __mcpm_cpu_down(cpu, cluster); @@ -231,7 +197,6 @@ static void tc2_pm_suspend(u64 residency) cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); ve_spc_set_resume_addr(cluster, cpu, virt_to_phys(mcpm_entry_point)); - gic_cpu_if_down(); tc2_pm_down(residency); } diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index cd2c88e7a8f7..1f8fed94c2a4 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -952,3 +952,9 @@ config ARCH_HAS_BARRIERS help This option allows the use of custom mandatory barriers included via the mach/barriers.h file. + +config ARCH_SUPPORTS_BIG_ENDIAN + bool + help + This option specifies the architecture can support big endian + operation. diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S index 80741992a9fc..3815a8262af0 100644 --- a/arch/arm/mm/abort-ev6.S +++ b/arch/arm/mm/abort-ev6.S @@ -38,9 +38,8 @@ ENTRY(v6_early_abort) bne do_DataAbort bic r1, r1, #1 << 11 @ clear bit 11 of FSR ldr r3, [r4] @ read aborted ARM instruction -#ifdef CONFIG_CPU_ENDIAN_BE8 - rev r3, r3 -#endif + ARM_BE8(rev r3, r3) + do_ldrd_abort tmp=ip, insn=r3 tst r3, #1 << 20 @ L = 0 -> write orreq r1, r1, #1 << 11 @ yes. diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 6f4585b89078..924036473b16 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -25,6 +25,7 @@ #include <asm/cp15.h> #include <asm/system_info.h> #include <asm/unaligned.h> +#include <asm/opcodes.h> #include "fault.h" @@ -762,21 +763,25 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (thumb_mode(regs)) { u16 *ptr = (u16 *)(instrptr & ~1); fault = probe_kernel_address(ptr, tinstr); + tinstr = __mem_to_opcode_thumb16(tinstr); if (!fault) { if (cpu_architecture() >= CPU_ARCH_ARMv7 && IS_T32(tinstr)) { /* Thumb-2 32-bit */ u16 tinst2 = 0; fault = probe_kernel_address(ptr + 1, tinst2); - instr = (tinstr << 16) | tinst2; + tinst2 = __mem_to_opcode_thumb16(tinst2); + instr = __opcode_thumb32_compose(tinstr, tinst2); thumb2_32b = 1; } else { isize = 2; instr = thumb2arm(tinstr); } } - } else + } else { fault = probe_kernel_address(instrptr, instr); + instr = __mem_to_opcode_arm(instr); + } if (fault) { type = TYPE_FAULT; diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index f5e1a8471714..b5a3f6031146 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -687,7 +687,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) { - pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel); + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); void *memory; if (dma_alloc_from_coherent(dev, size, handle, &memory)) @@ -700,7 +700,7 @@ void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, static void *arm_coherent_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) { - pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel); + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); void *memory; if (dma_alloc_from_coherent(dev, size, handle, &memory)) @@ -1232,7 +1232,8 @@ __iommu_create_mapping(struct device *dev, struct page **pages, size_t size) break; len = (j - i) << PAGE_SHIFT; - ret = iommu_map(mapping->domain, iova, phys, len, 0); + ret = iommu_map(mapping->domain, iova, phys, len, + IOMMU_READ|IOMMU_WRITE); if (ret < 0) goto fail; iova += len; @@ -1431,6 +1432,27 @@ static int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt, GFP_KERNEL); } +static int __dma_direction_to_prot(enum dma_data_direction dir) +{ + int prot; + + switch (dir) { + case DMA_BIDIRECTIONAL: + prot = IOMMU_READ | IOMMU_WRITE; + break; + case DMA_TO_DEVICE: + prot = IOMMU_READ; + break; + case DMA_FROM_DEVICE: + prot = IOMMU_WRITE; + break; + default: + prot = 0; + } + + return prot; +} + /* * Map a part of the scatter-gather list into contiguous io address space */ @@ -1444,6 +1466,7 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, int ret = 0; unsigned int count; struct scatterlist *s; + int prot; size = PAGE_ALIGN(size); *handle = DMA_ERROR_CODE; @@ -1460,7 +1483,9 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); - ret = iommu_map(mapping->domain, iova, phys, len, 0); + prot = __dma_direction_to_prot(dir); + + ret = iommu_map(mapping->domain, iova, phys, len, prot); if (ret < 0) goto fail; count += len >> PAGE_SHIFT; @@ -1665,19 +1690,7 @@ static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *p if (dma_addr == DMA_ERROR_CODE) return dma_addr; - switch (dir) { - case DMA_BIDIRECTIONAL: - prot = IOMMU_READ | IOMMU_WRITE; - break; - case DMA_TO_DEVICE: - prot = IOMMU_READ; - break; - case DMA_FROM_DEVICE: - prot = IOMMU_WRITE; - break; - default: - prot = 0; - } + prot = __dma_direction_to_prot(dir); ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, prot); if (ret < 0) diff --git a/arch/arm/mm/extable.c b/arch/arm/mm/extable.c index 9d285626bc7d..312e15e6d00b 100644 --- a/arch/arm/mm/extable.c +++ b/arch/arm/mm/extable.c @@ -9,8 +9,13 @@ int fixup_exception(struct pt_regs *regs) const struct exception_table_entry *fixup; fixup = search_exception_tables(instruction_pointer(regs)); - if (fixup) + if (fixup) { regs->ARM_pc = fixup->fixup; +#ifdef CONFIG_THUMB2_KERNEL + /* Clear the IT state to avoid nasty surprises in the fixup */ + regs->ARM_cpsr &= ~PSR_IT_MASK; +#endif + } return fixup != NULL; } diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c index 83cb3ac27095..8e0e52eb76b5 100644 --- a/arch/arm/mm/idmap.c +++ b/arch/arm/mm/idmap.c @@ -10,6 +10,7 @@ #include <asm/system_info.h> pgd_t *idmap_pgd; +phys_addr_t (*arch_virt_to_idmap) (unsigned long x); #ifdef CONFIG_ARM_LPAE static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end, @@ -67,8 +68,9 @@ static void identity_mapping_add(pgd_t *pgd, const char *text_start, unsigned long addr, end; unsigned long next; - addr = virt_to_phys(text_start); - end = virt_to_phys(text_end); + addr = virt_to_idmap(text_start); + end = virt_to_idmap(text_end); + pr_info("Setting up static identity map for 0x%lx - 0x%lx\n", addr, end); prot |= PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AF; @@ -90,8 +92,6 @@ static int __init init_static_idmap(void) if (!idmap_pgd) return -ENOMEM; - pr_info("Setting up static identity map for 0x%p - 0x%p\n", - __idmap_text_start, __idmap_text_end); identity_mapping_add(idmap_pgd, __idmap_text_start, __idmap_text_end, 0); diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index febaee7ca57b..18ec4c504abf 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -17,7 +17,6 @@ #include <linux/nodemask.h> #include <linux/initrd.h> #include <linux/of_fdt.h> -#include <linux/of_reserved_mem.h> #include <linux/highmem.h> #include <linux/gfp.h> #include <linux/memblock.h> @@ -379,8 +378,6 @@ void __init arm_memblock_init(struct meminfo *mi, if (mdesc->reserve) mdesc->reserve(); - early_init_dt_scan_reserved_mem(); - /* * reserve memory for DMA contigouos allocations, * must come from DMA area inside low memory diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 0c6356255fe3..d27158c38eb0 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -202,13 +202,11 @@ int valid_phys_addr_range(phys_addr_t addr, size_t size) } /* - * We don't use supersection mappings for mmap() on /dev/mem, which - * means that we can't map the memory area above the 4G barrier into - * userspace. + * Do not allow /dev/mem mappings beyond the supported physical range. */ int valid_mmap_phys_addr_range(unsigned long pfn, size_t size) { - return !(pfn + (size >> PAGE_SHIFT) > 0x00100000); + return (pfn + (size >> PAGE_SHIFT)) <= (1 + (PHYS_MASK >> PAGE_SHIFT)); } #ifdef CONFIG_STRICT_DEVMEM diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index b1d17eeb59b8..78eeeca78f5a 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -28,6 +28,8 @@ #include <asm/highmem.h> #include <asm/system_info.h> #include <asm/traps.h> +#include <asm/procinfo.h> +#include <asm/memory.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> @@ -1315,6 +1317,86 @@ static void __init map_lowmem(void) } } +#ifdef CONFIG_ARM_LPAE +/* + * early_paging_init() recreates boot time page table setup, allowing machines + * to switch over to a high (>4G) address space on LPAE systems + */ +void __init early_paging_init(const struct machine_desc *mdesc, + struct proc_info_list *procinfo) +{ + pmdval_t pmdprot = procinfo->__cpu_mm_mmu_flags; + unsigned long map_start, map_end; + pgd_t *pgd0, *pgdk; + pud_t *pud0, *pudk, *pud_start; + pmd_t *pmd0, *pmdk; + phys_addr_t phys; + int i; + + if (!(mdesc->init_meminfo)) + return; + + /* remap kernel code and data */ + map_start = init_mm.start_code; + map_end = init_mm.brk; + + /* get a handle on things... */ + pgd0 = pgd_offset_k(0); + pud_start = pud0 = pud_offset(pgd0, 0); + pmd0 = pmd_offset(pud0, 0); + + pgdk = pgd_offset_k(map_start); + pudk = pud_offset(pgdk, map_start); + pmdk = pmd_offset(pudk, map_start); + + mdesc->init_meminfo(); + + /* Run the patch stub to update the constants */ + fixup_pv_table(&__pv_table_begin, + (&__pv_table_end - &__pv_table_begin) << 2); + + /* + * Cache cleaning operations for self-modifying code + * We should clean the entries by MVA but running a + * for loop over every pv_table entry pointer would + * just complicate the code. + */ + flush_cache_louis(); + dsb(); + isb(); + + /* remap level 1 table */ + for (i = 0; i < PTRS_PER_PGD; pud0++, i++) { + set_pud(pud0, + __pud(__pa(pmd0) | PMD_TYPE_TABLE | L_PGD_SWAPPER)); + pmd0 += PTRS_PER_PMD; + } + + /* remap pmds for kernel mapping */ + phys = __pa(map_start) & PMD_MASK; + do { + *pmdk++ = __pmd(phys | pmdprot); + phys += PMD_SIZE; + } while (phys < map_end); + + flush_cache_all(); + cpu_switch_mm(pgd0, &init_mm); + cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET); + local_flush_bp_all(); + local_flush_tlb_all(); +} + +#else + +void __init early_paging_init(const struct machine_desc *mdesc, + struct proc_info_list *procinfo) +{ + if (mdesc->init_meminfo) + mdesc->init_meminfo(); +} + +#endif + /* * paging_init() sets up the page tables, initialises the zone memory * maps, and sets up the zero page, bad page and bad page tables. diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 34d4ab217bab..5c668b7a31f9 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -296,6 +296,15 @@ void __init sanity_check_meminfo(void) } /* + * early_paging_init() recreates boot time page table setup, allowing machines + * to switch over to a high (>4G) address space on LPAE systems + */ +void __init early_paging_init(const struct machine_desc *mdesc, + struct proc_info_list *procinfo) +{ +} + +/* * paging_init() sets up the page tables, initialises the zone memory * maps, and sets up the zero page, bad page and bad page tables. */ diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index 1128064fddcb..45dc29f85d56 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -220,9 +220,7 @@ __v6_setup: #endif /* CONFIG_MMU */ adr r5, v6_crval ldmia r5, {r5, r6} -#ifdef CONFIG_CPU_ENDIAN_BE8 - orr r6, r6, #1 << 25 @ big-endian page tables -#endif + ARM_BE8(orr r6, r6, #1 << 25) @ big-endian page tables mrc p15, 0, r0, c1, c0, 0 @ read control register bic r0, r0, r5 @ clear bits them orr r0, r0, r6 @ set them diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index c63d9bdee51e..60920f62fdf5 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -367,9 +367,7 @@ __v7_setup: #endif adr r5, v7_crval ldmia r5, {r5, r6} -#ifdef CONFIG_CPU_ENDIAN_BE8 - orr r6, r6, #1 << 25 @ big-endian page tables -#endif + ARM_BE8(orr r6, r6, #1 << 25) @ big-endian page tables #ifdef CONFIG_SWP_EMULATE orr r5, r5, #(1 << 10) @ set SW bit in "clear" bic r6, r6, #(1 << 10) @ clear it in "mmuset" diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index f50d223a0bd3..9ed155ad0f97 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -19,6 +19,7 @@ #include <linux/if_vlan.h> #include <asm/cacheflush.h> #include <asm/hwcap.h> +#include <asm/opcodes.h> #include "bpf_jit_32.h" @@ -113,8 +114,11 @@ static u32 jit_udiv(u32 dividend, u32 divisor) static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx) { + inst |= (cond << 28); + inst = __opcode_to_mem_arm(inst); + if (ctx->target != NULL) - ctx->target[ctx->idx] = inst | (cond << 28); + ctx->target[ctx->idx] = inst; ctx->idx++; } @@ -930,4 +934,5 @@ void bpf_jit_free(struct sk_filter *fp) { if (fp->bpf_func != sk_run_filter) module_free(NULL, fp->bpf_func); + kfree(fp); } diff --git a/arch/arm/plat-versatile/headsmp.S b/arch/arm/plat-versatile/headsmp.S index 2677bc3762d7..40f27e52de75 100644 --- a/arch/arm/plat-versatile/headsmp.S +++ b/arch/arm/plat-versatile/headsmp.S @@ -10,6 +10,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <asm/assembler.h> /* * Realview/Versatile Express specific entry point for secondary CPUs. @@ -17,6 +18,7 @@ * until we're ready for them to initialise. */ ENTRY(versatile_secondary_startup) + ARM_BE8(setend be) mrc p15, 0, r0, c0, c0, 5 bic r0, #0xff000000 adr r4, 1f diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 52b8f40b1c73..2f37e1d6cb45 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -642,9 +642,9 @@ int vfp_restore_user_hwstate(struct user_vfp __user *ufp, static int vfp_hotplug(struct notifier_block *b, unsigned long action, void *hcpu) { - if (action == CPU_DYING || action == CPU_DYING_FROZEN) { - vfp_force_reload((long)hcpu, current_thread_info()); - } else if (action == CPU_STARTING || action == CPU_STARTING_FROZEN) + if (action == CPU_DYING || action == CPU_DYING_FROZEN) + vfp_current_hw_state[(long)hcpu] = NULL; + else if (action == CPU_STARTING || action == CPU_STARTING_FROZEN) vfp_enable(NULL); return NOTIFY_OK; } diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index 1a6bfe954d49..835c559786bd 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -6,13 +6,6 @@ config FRAME_POINTER bool default y -config DEBUG_STACK_USAGE - bool "Enable stack utilization instrumentation" - depends on DEBUG_KERNEL - help - Enables the display of the minimum amount of free stack which each - task has ever had available in the sysrq-T output. - config EARLY_PRINTK bool "Early printk support" default y diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 5b3e83217b03..31c81e9b792e 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -42,7 +42,7 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_WIRELESS is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y -# CONFIG_BLK_DEV is not set +CONFIG_BLK_DEV=y CONFIG_SCSI=y # CONFIG_SCSI_PROC_FS is not set CONFIG_BLK_DEV_SD=y @@ -72,6 +72,7 @@ CONFIG_LOGO=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_EXT3_FS_XATTR is not set CONFIG_FUSE_FS=y @@ -90,3 +91,5 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO=y # CONFIG_FTRACE is not set CONFIG_ATOMIC64_SELFTEST=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_BLK=y diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 836364468571..01de5aaa3edc 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -126,20 +126,6 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) return oldval; } -static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr) -{ - unsigned long tmp, tmp2; - - asm volatile("// atomic_clear_mask\n" -"1: ldxr %0, %2\n" -" bic %0, %0, %3\n" -" stxr %w1, %0, %2\n" -" cbnz %w1, 1b" - : "=&r" (tmp), "=&r" (tmp2), "+Q" (*addr) - : "Ir" (mask) - : "cc"); -} - #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) static inline int __atomic_add_unless(atomic_t *v, int a, int u) diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 6d4482fa35bc..e2950b098e76 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -43,6 +43,6 @@ COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV) -extern unsigned int elf_hwcap; +extern unsigned long elf_hwcap; #endif #endif diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index edb3d5c73a32..7ecc2b23882e 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -166,9 +166,10 @@ do { \ #define get_user(x, ptr) \ ({ \ + __typeof__(*(ptr)) __user *__p = (ptr); \ might_fault(); \ - access_ok(VERIFY_READ, (ptr), sizeof(*(ptr))) ? \ - __get_user((x), (ptr)) : \ + access_ok(VERIFY_READ, __p, sizeof(*__p)) ? \ + __get_user((x), __p) : \ ((x) = 0, -EFAULT); \ }) @@ -227,9 +228,10 @@ do { \ #define put_user(x, ptr) \ ({ \ + __typeof__(*(ptr)) __user *__p = (ptr); \ might_fault(); \ - access_ok(VERIFY_WRITE, (ptr), sizeof(*(ptr))) ? \ - __put_user((x), (ptr)) : \ + access_ok(VERIFY_WRITE, __p, sizeof(*__p)) ? \ + __put_user((x), __p) : \ -EFAULT; \ }) diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index cbfacf7fb438..6a0a9b132d7a 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -27,7 +27,6 @@ #include <linux/uaccess.h> #include <asm/debug-monitors.h> -#include <asm/local.h> #include <asm/cputype.h> #include <asm/system_misc.h> @@ -89,8 +88,8 @@ early_param("nodebugmon", early_debug_disable); * Keep track of debug users on each core. * The ref counts are per-cpu so we use a local_t type. */ -static DEFINE_PER_CPU(local_t, mde_ref_count); -static DEFINE_PER_CPU(local_t, kde_ref_count); +static DEFINE_PER_CPU(int, mde_ref_count); +static DEFINE_PER_CPU(int, kde_ref_count); void enable_debug_monitors(enum debug_el el) { @@ -98,11 +97,11 @@ void enable_debug_monitors(enum debug_el el) WARN_ON(preemptible()); - if (local_inc_return(&__get_cpu_var(mde_ref_count)) == 1) + if (this_cpu_inc_return(mde_ref_count) == 1) enable = DBG_MDSCR_MDE; if (el == DBG_ACTIVE_EL1 && - local_inc_return(&__get_cpu_var(kde_ref_count)) == 1) + this_cpu_inc_return(kde_ref_count) == 1) enable |= DBG_MDSCR_KDE; if (enable && debug_enabled) { @@ -118,11 +117,11 @@ void disable_debug_monitors(enum debug_el el) WARN_ON(preemptible()); - if (local_dec_and_test(&__get_cpu_var(mde_ref_count))) + if (this_cpu_dec_return(mde_ref_count) == 0) disable = ~DBG_MDSCR_MDE; if (el == DBG_ACTIVE_EL1 && - local_dec_and_test(&__get_cpu_var(kde_ref_count))) + this_cpu_dec_return(kde_ref_count) == 0) disable &= ~DBG_MDSCR_KDE; if (disable) { diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 1f2e4d5a5c0f..bb785d23dbde 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -80,8 +80,10 @@ void fpsimd_thread_switch(struct task_struct *next) void fpsimd_flush_thread(void) { + preempt_disable(); memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); fpsimd_load_state(¤t->thread.fpsimd_state); + preempt_enable(); } #ifdef CONFIG_KERNEL_MODE_NEON diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 329218ca9ffb..ff516f6691e4 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -184,14 +184,14 @@ int arch_install_hw_breakpoint(struct perf_event *bp) /* Breakpoint */ ctrl_reg = AARCH64_DBG_REG_BCR; val_reg = AARCH64_DBG_REG_BVR; - slots = __get_cpu_var(bp_on_reg); + slots = this_cpu_ptr(bp_on_reg); max_slots = core_num_brps; reg_enable = !debug_info->bps_disabled; } else { /* Watchpoint */ ctrl_reg = AARCH64_DBG_REG_WCR; val_reg = AARCH64_DBG_REG_WVR; - slots = __get_cpu_var(wp_on_reg); + slots = this_cpu_ptr(wp_on_reg); max_slots = core_num_wrps; reg_enable = !debug_info->wps_disabled; } @@ -230,12 +230,12 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { /* Breakpoint */ base = AARCH64_DBG_REG_BCR; - slots = __get_cpu_var(bp_on_reg); + slots = this_cpu_ptr(bp_on_reg); max_slots = core_num_brps; } else { /* Watchpoint */ base = AARCH64_DBG_REG_WCR; - slots = __get_cpu_var(wp_on_reg); + slots = this_cpu_ptr(wp_on_reg); max_slots = core_num_wrps; } @@ -505,11 +505,11 @@ static void toggle_bp_registers(int reg, enum debug_el el, int enable) switch (reg) { case AARCH64_DBG_REG_BCR: - slots = __get_cpu_var(bp_on_reg); + slots = this_cpu_ptr(bp_on_reg); max_slots = core_num_brps; break; case AARCH64_DBG_REG_WCR: - slots = __get_cpu_var(wp_on_reg); + slots = this_cpu_ptr(wp_on_reg); max_slots = core_num_wrps; break; default: @@ -546,7 +546,7 @@ static int breakpoint_handler(unsigned long unused, unsigned int esr, struct debug_info *debug_info; struct arch_hw_breakpoint_ctrl ctrl; - slots = (struct perf_event **)__get_cpu_var(bp_on_reg); + slots = this_cpu_ptr(bp_on_reg); addr = instruction_pointer(regs); debug_info = ¤t->thread.debug; @@ -596,7 +596,7 @@ unlock: user_enable_single_step(current); } else { toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL1, 0); - kernel_step = &__get_cpu_var(stepping_kernel_bp); + kernel_step = this_cpu_ptr(&stepping_kernel_bp); if (*kernel_step != ARM_KERNEL_STEP_NONE) return 0; @@ -623,7 +623,7 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr, struct arch_hw_breakpoint *info; struct arch_hw_breakpoint_ctrl ctrl; - slots = (struct perf_event **)__get_cpu_var(wp_on_reg); + slots = this_cpu_ptr(wp_on_reg); debug_info = ¤t->thread.debug; for (i = 0; i < core_num_wrps; ++i) { @@ -698,7 +698,7 @@ unlock: user_enable_single_step(current); } else { toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL1, 0); - kernel_step = &__get_cpu_var(stepping_kernel_bp); + kernel_step = this_cpu_ptr(&stepping_kernel_bp); if (*kernel_step != ARM_KERNEL_STEP_NONE) return 0; @@ -722,7 +722,7 @@ int reinstall_suspended_bps(struct pt_regs *regs) struct debug_info *debug_info = ¤t->thread.debug; int handled_exception = 0, *kernel_step; - kernel_step = &__get_cpu_var(stepping_kernel_bp); + kernel_step = this_cpu_ptr(&stepping_kernel_bp); /* * Called from single-step exception handler. diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index cea1594ff933..6983ed5a351a 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -1044,7 +1044,7 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev) */ regs = get_irq_regs(); - cpuc = &__get_cpu_var(cpu_hw_events); + cpuc = this_cpu_ptr(&cpu_hw_events); for (idx = 0; idx < cpu_pmu->num_events; ++idx) { struct perf_event *event = cpuc->events[idx]; struct hw_perf_event *hwc; @@ -1257,7 +1257,7 @@ device_initcall(register_pmu_driver); static struct pmu_hw_events *armpmu_get_cpu_events(void) { - return &__get_cpu_var(cpu_hw_events); + return this_cpu_ptr(&cpu_hw_events); } static void __init cpu_pmu_init(struct arm_pmu *armpmu) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 57fb55c44c90..7ae8a1f00c3c 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -143,15 +143,26 @@ void machine_restart(char *cmd) void __show_regs(struct pt_regs *regs) { - int i; + int i, top_reg; + u64 lr, sp; + + if (compat_user_mode(regs)) { + lr = regs->compat_lr; + sp = regs->compat_sp; + top_reg = 12; + } else { + lr = regs->regs[30]; + sp = regs->sp; + top_reg = 29; + } show_regs_print_info(KERN_DEFAULT); print_symbol("PC is at %s\n", instruction_pointer(regs)); - print_symbol("LR is at %s\n", regs->regs[30]); + print_symbol("LR is at %s\n", lr); printk("pc : [<%016llx>] lr : [<%016llx>] pstate: %08llx\n", - regs->pc, regs->regs[30], regs->pstate); - printk("sp : %016llx\n", regs->sp); - for (i = 29; i >= 0; i--) { + regs->pc, lr, regs->pstate); + printk("sp : %016llx\n", sp); + for (i = top_reg; i >= 0; i--) { printk("x%-2d: %016llx ", i, regs->regs[i]); if (i % 2 == 0) printk("\n"); diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 12ad8f3d0cfd..055cfb80e05c 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -57,7 +57,7 @@ unsigned int processor_id; EXPORT_SYMBOL(processor_id); -unsigned int elf_hwcap __read_mostly; +unsigned long elf_hwcap __read_mostly; EXPORT_SYMBOL_GPL(elf_hwcap); static const char *cpu_name; diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 6d6acf153bff..c23751b06120 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -130,7 +130,7 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr, force_sig_info(sig, &si, tsk); } -void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) +static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) { struct task_struct *tsk = current; struct mm_struct *mm = tsk->active_mm; diff --git a/arch/arm64/mm/tlb.S b/arch/arm64/mm/tlb.S index 8ae80a18e8ec..19da91e0cd27 100644 --- a/arch/arm64/mm/tlb.S +++ b/arch/arm64/mm/tlb.S @@ -35,7 +35,7 @@ */ ENTRY(__cpu_flush_user_tlb_range) vma_vm_mm x3, x2 // get vma->vm_mm - mmid x3, x3 // get vm_mm->context.id + mmid w3, x3 // get vm_mm->context.id dsb sy lsr x0, x0, #12 // align address lsr x1, x1, #12 diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild index d22af851f3f6..fd7980743890 100644 --- a/arch/avr32/include/asm/Kbuild +++ b/arch/avr32/include/asm/Kbuild @@ -1,5 +1,19 @@ generic-y += clkdev.h +generic-y += cputime.h +generic-y += delay.h +generic-y += device.h +generic-y += div64.h +generic-y += emergency-restart.h generic-y += exec.h -generic-y += trace_clock.h +generic-y += futex.h +generic-y += irq_regs.h generic-y += param.h +generic-y += local.h +generic-y += local64.h +generic-y += percpu.h +generic-y += scatterlist.h +generic-y += sections.h +generic-y += topology.h +generic-y += trace_clock.h +generic-y += xor.h diff --git a/arch/avr32/include/asm/cputime.h b/arch/avr32/include/asm/cputime.h deleted file mode 100644 index e87e0f81cbeb..000000000000 --- a/arch/avr32/include/asm/cputime.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_AVR32_CPUTIME_H -#define __ASM_AVR32_CPUTIME_H - -#include <asm-generic/cputime.h> - -#endif /* __ASM_AVR32_CPUTIME_H */ diff --git a/arch/avr32/include/asm/delay.h b/arch/avr32/include/asm/delay.h deleted file mode 100644 index 9670e127b7b2..000000000000 --- a/arch/avr32/include/asm/delay.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/delay.h> diff --git a/arch/avr32/include/asm/device.h b/arch/avr32/include/asm/device.h deleted file mode 100644 index d8f9872b0e2d..000000000000 --- a/arch/avr32/include/asm/device.h +++ /dev/null @@ -1,7 +0,0 @@ -/* - * Arch specific extensions to struct device - * - * This file is released under the GPLv2 - */ -#include <asm-generic/device.h> - diff --git a/arch/avr32/include/asm/div64.h b/arch/avr32/include/asm/div64.h deleted file mode 100644 index d7ddd4fdeca6..000000000000 --- a/arch/avr32/include/asm/div64.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_AVR32_DIV64_H -#define __ASM_AVR32_DIV64_H - -#include <asm-generic/div64.h> - -#endif /* __ASM_AVR32_DIV64_H */ diff --git a/arch/avr32/include/asm/emergency-restart.h b/arch/avr32/include/asm/emergency-restart.h deleted file mode 100644 index 3e7e014776ba..000000000000 --- a/arch/avr32/include/asm/emergency-restart.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_AVR32_EMERGENCY_RESTART_H -#define __ASM_AVR32_EMERGENCY_RESTART_H - -#include <asm-generic/emergency-restart.h> - -#endif /* __ASM_AVR32_EMERGENCY_RESTART_H */ diff --git a/arch/avr32/include/asm/futex.h b/arch/avr32/include/asm/futex.h deleted file mode 100644 index 10419f14a68a..000000000000 --- a/arch/avr32/include/asm/futex.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_AVR32_FUTEX_H -#define __ASM_AVR32_FUTEX_H - -#include <asm-generic/futex.h> - -#endif /* __ASM_AVR32_FUTEX_H */ diff --git a/arch/avr32/include/asm/irq_regs.h b/arch/avr32/include/asm/irq_regs.h deleted file mode 100644 index 3dd9c0b70270..000000000000 --- a/arch/avr32/include/asm/irq_regs.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/irq_regs.h> diff --git a/arch/avr32/include/asm/local.h b/arch/avr32/include/asm/local.h deleted file mode 100644 index 1c1619694da3..000000000000 --- a/arch/avr32/include/asm/local.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_AVR32_LOCAL_H -#define __ASM_AVR32_LOCAL_H - -#include <asm-generic/local.h> - -#endif /* __ASM_AVR32_LOCAL_H */ diff --git a/arch/avr32/include/asm/local64.h b/arch/avr32/include/asm/local64.h deleted file mode 100644 index 36c93b5cc239..000000000000 --- a/arch/avr32/include/asm/local64.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/local64.h> diff --git a/arch/avr32/include/asm/percpu.h b/arch/avr32/include/asm/percpu.h deleted file mode 100644 index 69227b4cd0d4..000000000000 --- a/arch/avr32/include/asm/percpu.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_AVR32_PERCPU_H -#define __ASM_AVR32_PERCPU_H - -#include <asm-generic/percpu.h> - -#endif /* __ASM_AVR32_PERCPU_H */ diff --git a/arch/avr32/include/asm/scatterlist.h b/arch/avr32/include/asm/scatterlist.h deleted file mode 100644 index a5902d9834e8..000000000000 --- a/arch/avr32/include/asm/scatterlist.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_AVR32_SCATTERLIST_H -#define __ASM_AVR32_SCATTERLIST_H - -#include <asm-generic/scatterlist.h> - -#endif /* __ASM_AVR32_SCATTERLIST_H */ diff --git a/arch/avr32/include/asm/sections.h b/arch/avr32/include/asm/sections.h deleted file mode 100644 index aa14252e4181..000000000000 --- a/arch/avr32/include/asm/sections.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_AVR32_SECTIONS_H -#define __ASM_AVR32_SECTIONS_H - -#include <asm-generic/sections.h> - -#endif /* __ASM_AVR32_SECTIONS_H */ diff --git a/arch/avr32/include/asm/topology.h b/arch/avr32/include/asm/topology.h deleted file mode 100644 index 5b766cbb4806..000000000000 --- a/arch/avr32/include/asm/topology.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_AVR32_TOPOLOGY_H -#define __ASM_AVR32_TOPOLOGY_H - -#include <asm-generic/topology.h> - -#endif /* __ASM_AVR32_TOPOLOGY_H */ diff --git a/arch/avr32/include/asm/xor.h b/arch/avr32/include/asm/xor.h deleted file mode 100644 index 99c87aa0af4f..000000000000 --- a/arch/avr32/include/asm/xor.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_XOR_H -#define _ASM_XOR_H - -#include <asm-generic/xor.h> - -#endif diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c index c2731003edef..42a53e740a7e 100644 --- a/arch/avr32/kernel/process.c +++ b/arch/avr32/kernel/process.c @@ -289,7 +289,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, memset(childregs, 0, sizeof(struct pt_regs)); p->thread.cpu_context.r0 = arg; p->thread.cpu_context.r1 = usp; /* fn */ - p->thread.cpu_context.r2 = syscall_return; + p->thread.cpu_context.r2 = (unsigned long)syscall_return; p->thread.cpu_context.pc = (unsigned long)ret_from_kernel_thread; childregs->sr = MODE_SUPERVISOR; } else { diff --git a/arch/avr32/kernel/time.c b/arch/avr32/kernel/time.c index 869a1c6ffeee..12f828ad5058 100644 --- a/arch/avr32/kernel/time.c +++ b/arch/avr32/kernel/time.c @@ -98,7 +98,14 @@ static void comparator_mode(enum clock_event_mode mode, case CLOCK_EVT_MODE_SHUTDOWN: sysreg_write(COMPARE, 0); pr_debug("%s: stop\n", evdev->name); - cpu_idle_poll_ctrl(false); + if (evdev->mode == CLOCK_EVT_MODE_ONESHOT || + evdev->mode == CLOCK_EVT_MODE_RESUME) { + /* + * Only disable idle poll if we have forced that + * in a previous call. + */ + cpu_idle_poll_ctrl(false); + } break; default: BUG(); diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 75a36ad11ff5..ca8f8340d75f 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -288,9 +288,6 @@ endif vmlinux.32: vmlinux $(OBJCOPY) -O $(32bit-bfd) $(OBJCOPYFLAGS) $< $@ - -#obj-$(CONFIG_KPROBES) += kprobes.o - # # The 64-bit ELF tools are pretty broken so at this time we generate 64-bit # ELF files from 32-bit files by conversion. diff --git a/arch/mips/alchemy/board-mtx1.c b/arch/mips/alchemy/board-mtx1.c index 4a9baa9f6330..9969dbab19e3 100644 --- a/arch/mips/alchemy/board-mtx1.c +++ b/arch/mips/alchemy/board-mtx1.c @@ -276,7 +276,7 @@ static struct platform_device mtx1_pci_host = { .resource = alchemy_pci_host_res, }; -static struct __initdata platform_device * mtx1_devs[] = { +static struct platform_device *mtx1_devs[] __initdata = { &mtx1_pci_host, &mtx1_gpio_leds, &mtx1_wdt, diff --git a/arch/mips/alchemy/common/usb.c b/arch/mips/alchemy/common/usb.c index fcc695626117..2adc7edda49c 100644 --- a/arch/mips/alchemy/common/usb.c +++ b/arch/mips/alchemy/common/usb.c @@ -14,6 +14,7 @@ #include <linux/module.h> #include <linux/spinlock.h> #include <linux/syscore_ops.h> +#include <asm/cpu.h> #include <asm/mach-au1x00/au1000.h> /* control register offsets */ @@ -358,7 +359,7 @@ static inline int au1200_coherency_bug(void) { #if defined(CONFIG_DMA_COHERENT) /* Au1200 AB USB does not support coherent memory */ - if (!(read_c0_prid() & 0xff)) { + if (!(read_c0_prid() & PRID_REV_MASK)) { printk(KERN_INFO "Au1200 USB: this is chip revision AB !!\n"); printk(KERN_INFO "Au1200 USB: update your board or re-configure" " the kernel\n"); diff --git a/arch/mips/bcm63xx/cpu.c b/arch/mips/bcm63xx/cpu.c index 7e17374a9ae8..b713cd64b087 100644 --- a/arch/mips/bcm63xx/cpu.c +++ b/arch/mips/bcm63xx/cpu.c @@ -306,14 +306,14 @@ void __init bcm63xx_cpu_init(void) switch (c->cputype) { case CPU_BMIPS3300: - if ((read_c0_prid() & 0xff00) != PRID_IMP_BMIPS3300_ALT) + if ((read_c0_prid() & PRID_IMP_MASK) != PRID_IMP_BMIPS3300_ALT) __cpu_name[cpu] = "Broadcom BCM6338"; /* fall-through */ case CPU_BMIPS32: chipid_reg = BCM_6345_PERF_BASE; break; case CPU_BMIPS4350: - switch ((read_c0_prid() & 0xff)) { + switch ((read_c0_prid() & PRID_REV_MASK)) { case 0x04: chipid_reg = BCM_3368_PERF_BASE; break; diff --git a/arch/mips/boot/dts/include/dt-bindings b/arch/mips/boot/dts/include/dt-bindings index 68ae3887b3e5..08c00e4972fa 120000 --- a/arch/mips/boot/dts/include/dt-bindings +++ b/arch/mips/boot/dts/include/dt-bindings @@ -1 +1 @@ -../../../../../include/dt-bindings +../../../../../include/dt-bindings
\ No newline at end of file diff --git a/arch/mips/cavium-octeon/csrc-octeon.c b/arch/mips/cavium-octeon/csrc-octeon.c index 02193953eb9e..b752c4ed0b79 100644 --- a/arch/mips/cavium-octeon/csrc-octeon.c +++ b/arch/mips/cavium-octeon/csrc-octeon.c @@ -12,6 +12,7 @@ #include <linux/smp.h> #include <asm/cpu-info.h> +#include <asm/cpu-type.h> #include <asm/time.h> #include <asm/octeon/octeon.h> diff --git a/arch/mips/dec/prom/init.c b/arch/mips/dec/prom/init.c index ab169046e442..468f665de7bb 100644 --- a/arch/mips/dec/prom/init.c +++ b/arch/mips/dec/prom/init.c @@ -13,6 +13,7 @@ #include <asm/bootinfo.h> #include <asm/cpu.h> +#include <asm/cpu-type.h> #include <asm/processor.h> #include <asm/dec/prom.h> diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index fa44f3ec5302..d445d060e346 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h @@ -13,12 +13,6 @@ #include <asm/cpu-info.h> #include <cpu-feature-overrides.h> -#ifndef current_cpu_type -#define current_cpu_type() current_cpu_data.cputype -#endif - -#define boot_cpu_type() cpu_data[0].cputype - /* * SMP assumption: Options of CPU 0 are a superset of all processors. * This is true for all known MIPS systems. @@ -193,7 +187,7 @@ /* * MIPS32, MIPS64, VR5500, IDT32332, IDT32334 and maybe a few other - * pre-MIPS32/MIPS53 processors have CLO, CLZ. The IDT RC64574 is 64-bit and + * pre-MIPS32/MIPS64 processors have CLO, CLZ. The IDT RC64574 is 64-bit and * has CLO and CLZ but not DCLO nor DCLZ. For 64-bit kernels * cpu_has_clo_clz also indicates the availability of DCLO and DCLZ. */ diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h index 41401d8eb7d1..21c8e29c8f91 100644 --- a/arch/mips/include/asm/cpu-info.h +++ b/arch/mips/include/asm/cpu-info.h @@ -84,6 +84,7 @@ struct cpuinfo_mips { extern struct cpuinfo_mips cpu_data[]; #define current_cpu_data cpu_data[smp_processor_id()] #define raw_current_cpu_data cpu_data[raw_smp_processor_id()] +#define boot_cpu_data cpu_data[0] extern void cpu_probe(void); extern void cpu_report(void); diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h new file mode 100644 index 000000000000..4a402cc60c03 --- /dev/null +++ b/arch/mips/include/asm/cpu-type.h @@ -0,0 +1,203 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2003, 2004 Ralf Baechle + * Copyright (C) 2004 Maciej W. Rozycki + */ +#ifndef __ASM_CPU_TYPE_H +#define __ASM_CPU_TYPE_H + +#include <linux/smp.h> +#include <linux/compiler.h> + +static inline int __pure __get_cpu_type(const int cpu_type) +{ + switch (cpu_type) { +#if defined(CONFIG_SYS_HAS_CPU_LOONGSON2E) || \ + defined(CONFIG_SYS_HAS_CPU_LOONGSON2F) + case CPU_LOONGSON2: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_LOONGSON1B + case CPU_LOONGSON1: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_MIPS32_R1 + case CPU_4KC: + case CPU_ALCHEMY: + case CPU_BMIPS3300: + case CPU_BMIPS4350: + case CPU_PR4450: + case CPU_BMIPS32: + case CPU_JZRISC: +#endif + +#if defined(CONFIG_SYS_HAS_CPU_MIPS32_R1) || \ + defined(CONFIG_SYS_HAS_CPU_MIPS32_R2) + case CPU_4KEC: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_MIPS32_R2 + case CPU_4KSC: + case CPU_24K: + case CPU_34K: + case CPU_1004K: + case CPU_74K: + case CPU_M14KC: + case CPU_M14KEC: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_MIPS64_R1 + case CPU_5KC: + case CPU_5KE: + case CPU_20KC: + case CPU_25KF: + case CPU_SB1: + case CPU_SB1A: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_MIPS64_R2 + /* + * All MIPS64 R2 processors have their own special symbols. That is, + * there currently is no pure R2 core + */ +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R3000 + case CPU_R2000: + case CPU_R3000: + case CPU_R3000A: + case CPU_R3041: + case CPU_R3051: + case CPU_R3052: + case CPU_R3081: + case CPU_R3081E: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_TX39XX + case CPU_TX3912: + case CPU_TX3922: + case CPU_TX3927: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_VR41XX + case CPU_VR41XX: + case CPU_VR4111: + case CPU_VR4121: + case CPU_VR4122: + case CPU_VR4131: + case CPU_VR4133: + case CPU_VR4181: + case CPU_VR4181A: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R4300 + case CPU_R4300: + case CPU_R4310: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R4X00 + case CPU_R4000PC: + case CPU_R4000SC: + case CPU_R4000MC: + case CPU_R4200: + case CPU_R4400PC: + case CPU_R4400SC: + case CPU_R4400MC: + case CPU_R4600: + case CPU_R4700: + case CPU_R4640: + case CPU_R4650: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_TX49XX + case CPU_TX49XX: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R5000 + case CPU_R5000: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R5432 + case CPU_R5432: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R5500 + case CPU_R5500: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R6000 + case CPU_R6000: + case CPU_R6000A: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_NEVADA + case CPU_NEVADA: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R8000 + case CPU_R8000: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R10000 + case CPU_R10000: + case CPU_R12000: + case CPU_R14000: +#endif +#ifdef CONFIG_SYS_HAS_CPU_RM7000 + case CPU_RM7000: + case CPU_SR71000: +#endif +#ifdef CONFIG_SYS_HAS_CPU_RM9000 + case CPU_RM9000: +#endif +#ifdef CONFIG_SYS_HAS_CPU_SB1 + case CPU_SB1: + case CPU_SB1A: +#endif +#ifdef CONFIG_SYS_HAS_CPU_CAVIUM_OCTEON + case CPU_CAVIUM_OCTEON: + case CPU_CAVIUM_OCTEON_PLUS: + case CPU_CAVIUM_OCTEON2: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_BMIPS4380 + case CPU_BMIPS4380: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_BMIPS5000 + case CPU_BMIPS5000: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_XLP + case CPU_XLP: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_XLR + case CPU_XLR: +#endif + break; + default: + unreachable(); + } + + return cpu_type; +} + +static inline int __pure current_cpu_type(void) +{ + const int cpu_type = current_cpu_data.cputype; + + return __get_cpu_type(cpu_type); +} + +static inline int __pure boot_cpu_type(void) +{ + const int cpu_type = cpu_data[0].cputype; + + return __get_cpu_type(cpu_type); +} + +#endif /* __ASM_CPU_TYPE_H */ diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h index 71b9f1998be7..d2035e16502a 100644 --- a/arch/mips/include/asm/cpu.h +++ b/arch/mips/include/asm/cpu.h @@ -3,15 +3,14 @@ * various MIPS cpu types. * * Copyright (C) 1996 David S. Miller (davem@davemloft.net) - * Copyright (C) 2004 Maciej W. Rozycki + * Copyright (C) 2004, 2013 Maciej W. Rozycki */ #ifndef _ASM_CPU_H #define _ASM_CPU_H -/* Assigned Company values for bits 23:16 of the PRId Register - (CP0 register 15, select 0). As of the MIPS32 and MIPS64 specs from - MTI, the PRId register is defined in this (backwards compatible) - way: +/* + As of the MIPS32 and MIPS64 specs from MTI, the PRId register (CP0 + register 15, select 0) is defined in this (backwards compatible) way: +----------------+----------------+----------------+----------------+ | Company Options| Company ID | Processor ID | Revision | @@ -23,6 +22,14 @@ spec. */ +#define PRID_OPT_MASK 0xff000000 + +/* + * Assigned Company values for bits 23:16 of the PRId register. + */ + +#define PRID_COMP_MASK 0xff0000 + #define PRID_COMP_LEGACY 0x000000 #define PRID_COMP_MIPS 0x010000 #define PRID_COMP_BROADCOM 0x020000 @@ -38,10 +45,17 @@ #define PRID_COMP_INGENIC 0xd00000 /* - * Assigned values for the product ID register. In order to detect a - * certain CPU type exactly eventually additional registers may need to - * be examined. These are valid when 23:16 == PRID_COMP_LEGACY + * Assigned Processor ID (implementation) values for bits 15:8 of the PRId + * register. In order to detect a certain CPU type exactly eventually + * additional registers may need to be examined. */ + +#define PRID_IMP_MASK 0xff00 + +/* + * These are valid when 23:16 == PRID_COMP_LEGACY + */ + #define PRID_IMP_R2000 0x0100 #define PRID_IMP_AU1_REV1 0x0100 #define PRID_IMP_AU1_REV2 0x0200 @@ -182,11 +196,15 @@ #define PRID_IMP_NETLOGIC_XLP2XX 0x1200 /* - * Definitions for 7:0 on legacy processors + * Particular Revision values for bits 7:0 of the PRId register. */ #define PRID_REV_MASK 0x00ff +/* + * Definitions for 7:0 on legacy processors + */ + #define PRID_REV_TX4927 0x0022 #define PRID_REV_TX4937 0x0030 #define PRID_REV_R4400 0x0040 @@ -227,6 +245,8 @@ * 31 16 15 8 7 0 */ +#define FPIR_IMP_MASK 0xff00 + #define FPIR_IMP_NONE 0x0000 enum cpu_type_enum { diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index 4d6d77ed9b9d..e194f957ca8c 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -22,7 +22,7 @@ static __always_inline bool arch_static_branch(struct static_key *key) { - asm goto("1:\tnop\n\t" + asm_volatile_goto("1:\tnop\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" WORD_INSN " 1b, %l[l_yes], %0\n\t" diff --git a/arch/mips/include/asm/mach-au1x00/au1000.h b/arch/mips/include/asm/mach-au1x00/au1000.h index 3e11a468cdf8..54f9e84db8ac 100644 --- a/arch/mips/include/asm/mach-au1x00/au1000.h +++ b/arch/mips/include/asm/mach-au1x00/au1000.h @@ -43,6 +43,8 @@ #include <linux/io.h> #include <linux/irq.h> +#include <asm/cpu.h> + /* cpu pipeline flush */ void static inline au_sync(void) { @@ -140,7 +142,7 @@ static inline int au1xxx_cpu_needs_config_od(void) static inline int alchemy_get_cputype(void) { - switch (read_c0_prid() & 0xffff0000) { + switch (read_c0_prid() & (PRID_OPT_MASK | PRID_COMP_MASK)) { case 0x00030000: return ALCHEMY_CPU_AU1000; break; diff --git a/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h index f4caacd25552..1bcb6421205e 100644 --- a/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h @@ -8,6 +8,8 @@ #ifndef __ASM_MACH_IP22_CPU_FEATURE_OVERRIDES_H #define __ASM_MACH_IP22_CPU_FEATURE_OVERRIDES_H +#include <asm/cpu.h> + /* * IP22 with a variety of processors so we can't use defaults for everything. */ diff --git a/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h index 1d2b6ff60d33..d6111aa2e886 100644 --- a/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h @@ -8,6 +8,8 @@ #ifndef __ASM_MACH_IP27_CPU_FEATURE_OVERRIDES_H #define __ASM_MACH_IP27_CPU_FEATURE_OVERRIDES_H +#include <asm/cpu.h> + /* * IP27 only comes with R10000 family processors all using the same config */ diff --git a/arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h index 65e9c856390d..4cec06d133db 100644 --- a/arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h @@ -9,6 +9,8 @@ #ifndef __ASM_MACH_IP28_CPU_FEATURE_OVERRIDES_H #define __ASM_MACH_IP28_CPU_FEATURE_OVERRIDES_H +#include <asm/cpu.h> + /* * IP28 only comes with R10000 family processors all using the same config */ diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index fed1c3e9b486..e0331414c7d6 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -603,6 +603,13 @@ #define MIPS_CONF4_MMUEXTDEF (_ULCAST_(3) << 14) #define MIPS_CONF4_MMUEXTDEF_MMUSIZEEXT (_ULCAST_(1) << 14) +#define MIPS_CONF5_NF (_ULCAST_(1) << 0) +#define MIPS_CONF5_UFR (_ULCAST_(1) << 2) +#define MIPS_CONF5_MSAEN (_ULCAST_(1) << 27) +#define MIPS_CONF5_EVA (_ULCAST_(1) << 28) +#define MIPS_CONF5_CV (_ULCAST_(1) << 29) +#define MIPS_CONF5_K (_ULCAST_(1) << 30) + #define MIPS_CONF6_SYND (_ULCAST_(1) << 13) #define MIPS_CONF7_WII (_ULCAST_(1) << 31) diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h index f194c08bd057..12d6842962be 100644 --- a/arch/mips/include/asm/pci.h +++ b/arch/mips/include/asm/pci.h @@ -83,6 +83,18 @@ static inline void pcibios_penalize_isa_irq(int irq, int active) extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine); +#define HAVE_ARCH_PCI_RESOURCE_TO_USER + +static inline void pci_resource_to_user(const struct pci_dev *dev, int bar, + const struct resource *rsrc, resource_size_t *start, + resource_size_t *end) +{ + phys_t size = resource_size(rsrc); + + *start = fixup_bigphys_addr(rsrc->start, size); + *end = rsrc->start + size; +} + /* * Dynamic DMA mapping stuff. * MIPS has everything mapped statically. diff --git a/arch/mips/include/asm/timex.h b/arch/mips/include/asm/timex.h index 6529704aa73a..c5424757da65 100644 --- a/arch/mips/include/asm/timex.h +++ b/arch/mips/include/asm/timex.h @@ -10,7 +10,9 @@ #ifdef __KERNEL__ +#include <asm/cpu-features.h> #include <asm/mipsregs.h> +#include <asm/cpu-type.h> /* * This is the clock rate of the i8253 PIT. A MIPS system may not have @@ -33,9 +35,38 @@ typedef unsigned int cycles_t; +/* + * On R4000/R4400 before version 5.0 an erratum exists such that if the + * cycle counter is read in the exact moment that it is matching the + * compare register, no interrupt will be generated. + * + * There is a suggested workaround and also the erratum can't strike if + * the compare interrupt isn't being used as the clock source device. + * However for now the implementaton of this function doesn't get these + * fine details right. + */ static inline cycles_t get_cycles(void) { - return 0; + switch (boot_cpu_type()) { + case CPU_R4400PC: + case CPU_R4400SC: + case CPU_R4400MC: + if ((read_c0_prid() & 0xff) >= 0x0050) + return read_c0_count(); + break; + + case CPU_R4000PC: + case CPU_R4000SC: + case CPU_R4000MC: + break; + + default: + if (cpu_has_counter) + return read_c0_count(); + break; + } + + return 0; /* no usable counter */ } #endif /* __KERNEL__ */ diff --git a/arch/mips/include/asm/vga.h b/arch/mips/include/asm/vga.h index f4cff7e4fa8a..f82c83749a08 100644 --- a/arch/mips/include/asm/vga.h +++ b/arch/mips/include/asm/vga.h @@ -6,6 +6,7 @@ #ifndef _ASM_VGA_H #define _ASM_VGA_H +#include <asm/addrspace.h> #include <asm/byteorder.h> /* @@ -13,7 +14,7 @@ * access the videoram directly without any black magic. */ -#define VGA_MAP_MEM(x, s) (0xb0000000L + (unsigned long)(x)) +#define VGA_MAP_MEM(x, s) CKSEG1ADDR(0x10000000L + (unsigned long)(x)) #define vga_readb(x) (*(x)) #define vga_writeb(x, y) (*(y) = (x)) diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 37663c7862a5..5465dc183e5a 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -20,6 +20,7 @@ #include <asm/bugs.h> #include <asm/cpu.h> +#include <asm/cpu-type.h> #include <asm/fpu.h> #include <asm/mipsregs.h> #include <asm/watch.h> @@ -55,7 +56,7 @@ static inline void check_errata(void) { struct cpuinfo_mips *c = ¤t_cpu_data; - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_34K: /* * Erratum "RPS May Cause Incorrect Instruction Execution" @@ -122,7 +123,7 @@ static inline unsigned long cpu_get_fpu_id(void) */ static inline int __cpu_has_fpu(void) { - return ((cpu_get_fpu_id() & 0xff00) != FPIR_IMP_NONE); + return ((cpu_get_fpu_id() & FPIR_IMP_MASK) != FPIR_IMP_NONE); } static inline void cpu_probe_vmbits(struct cpuinfo_mips *c) @@ -290,6 +291,17 @@ static inline unsigned int decode_config4(struct cpuinfo_mips *c) return config4 & MIPS_CONF_M; } +static inline unsigned int decode_config5(struct cpuinfo_mips *c) +{ + unsigned int config5; + + config5 = read_c0_config5(); + config5 &= ~MIPS_CONF5_UFR; + write_c0_config5(config5); + + return config5 & MIPS_CONF_M; +} + static void decode_configs(struct cpuinfo_mips *c) { int ok; @@ -310,6 +322,8 @@ static void decode_configs(struct cpuinfo_mips *c) ok = decode_config3(c); if (ok) ok = decode_config4(c); + if (ok) + ok = decode_config5(c); mips_probe_watch_registers(c); @@ -322,7 +336,7 @@ static void decode_configs(struct cpuinfo_mips *c) static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) { - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_R2000: c->cputype = CPU_R2000; __cpu_name[cpu] = "R2000"; @@ -333,7 +347,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) c->tlbsize = 64; break; case PRID_IMP_R3000: - if ((c->processor_id & 0xff) == PRID_REV_R3000A) { + if ((c->processor_id & PRID_REV_MASK) == PRID_REV_R3000A) { if (cpu_has_confreg()) { c->cputype = CPU_R3081E; __cpu_name[cpu] = "R3081"; @@ -353,7 +367,8 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) break; case PRID_IMP_R4000: if (read_c0_config() & CONF_SC) { - if ((c->processor_id & 0xff) >= PRID_REV_R4400) { + if ((c->processor_id & PRID_REV_MASK) >= + PRID_REV_R4400) { c->cputype = CPU_R4400PC; __cpu_name[cpu] = "R4400PC"; } else { @@ -361,7 +376,8 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) __cpu_name[cpu] = "R4000PC"; } } else { - if ((c->processor_id & 0xff) >= PRID_REV_R4400) { + if ((c->processor_id & PRID_REV_MASK) >= + PRID_REV_R4400) { c->cputype = CPU_R4400SC; __cpu_name[cpu] = "R4400SC"; } else { @@ -454,7 +470,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) __cpu_name[cpu] = "TX3927"; c->tlbsize = 64; } else { - switch (c->processor_id & 0xff) { + switch (c->processor_id & PRID_REV_MASK) { case PRID_REV_TX3912: c->cputype = CPU_TX3912; __cpu_name[cpu] = "TX3912"; @@ -640,7 +656,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) static inline void cpu_probe_mips(struct cpuinfo_mips *c, unsigned int cpu) { decode_configs(c); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_4KC: c->cputype = CPU_4KC; __cpu_name[cpu] = "MIPS 4Kc"; @@ -711,7 +727,7 @@ static inline void cpu_probe_mips(struct cpuinfo_mips *c, unsigned int cpu) static inline void cpu_probe_alchemy(struct cpuinfo_mips *c, unsigned int cpu) { decode_configs(c); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_AU1_REV1: case PRID_IMP_AU1_REV2: c->cputype = CPU_ALCHEMY; @@ -730,7 +746,7 @@ static inline void cpu_probe_alchemy(struct cpuinfo_mips *c, unsigned int cpu) break; case 4: __cpu_name[cpu] = "Au1200"; - if ((c->processor_id & 0xff) == 2) + if ((c->processor_id & PRID_REV_MASK) == 2) __cpu_name[cpu] = "Au1250"; break; case 5: @@ -748,12 +764,12 @@ static inline void cpu_probe_sibyte(struct cpuinfo_mips *c, unsigned int cpu) { decode_configs(c); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_SB1: c->cputype = CPU_SB1; __cpu_name[cpu] = "SiByte SB1"; /* FPU in pass1 is known to have issues. */ - if ((c->processor_id & 0xff) < 0x02) + if ((c->processor_id & PRID_REV_MASK) < 0x02) c->options &= ~(MIPS_CPU_FPU | MIPS_CPU_32FPR); break; case PRID_IMP_SB1A: @@ -766,7 +782,7 @@ static inline void cpu_probe_sibyte(struct cpuinfo_mips *c, unsigned int cpu) static inline void cpu_probe_sandcraft(struct cpuinfo_mips *c, unsigned int cpu) { decode_configs(c); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_SR71000: c->cputype = CPU_SR71000; __cpu_name[cpu] = "Sandcraft SR71000"; @@ -779,7 +795,7 @@ static inline void cpu_probe_sandcraft(struct cpuinfo_mips *c, unsigned int cpu) static inline void cpu_probe_nxp(struct cpuinfo_mips *c, unsigned int cpu) { decode_configs(c); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_PR4450: c->cputype = CPU_PR4450; __cpu_name[cpu] = "Philips PR4450"; @@ -791,7 +807,7 @@ static inline void cpu_probe_nxp(struct cpuinfo_mips *c, unsigned int cpu) static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu) { decode_configs(c); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_BMIPS32_REV4: case PRID_IMP_BMIPS32_REV8: c->cputype = CPU_BMIPS32; @@ -806,7 +822,7 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu) set_elf_platform(cpu, "bmips3300"); break; case PRID_IMP_BMIPS43XX: { - int rev = c->processor_id & 0xff; + int rev = c->processor_id & PRID_REV_MASK; if (rev >= PRID_REV_BMIPS4380_LO && rev <= PRID_REV_BMIPS4380_HI) { @@ -832,7 +848,7 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu) static inline void cpu_probe_cavium(struct cpuinfo_mips *c, unsigned int cpu) { decode_configs(c); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_CAVIUM_CN38XX: case PRID_IMP_CAVIUM_CN31XX: case PRID_IMP_CAVIUM_CN30XX: @@ -875,7 +891,7 @@ static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu) decode_configs(c); /* JZRISC does not implement the CP0 counter. */ c->options &= ~MIPS_CPU_COUNTER; - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_JZRISC: c->cputype = CPU_JZRISC; __cpu_name[cpu] = "Ingenic JZRISC"; @@ -890,7 +906,7 @@ static inline void cpu_probe_netlogic(struct cpuinfo_mips *c, int cpu) { decode_configs(c); - if ((c->processor_id & 0xff00) == PRID_IMP_NETLOGIC_AU13XX) { + if ((c->processor_id & PRID_IMP_MASK) == PRID_IMP_NETLOGIC_AU13XX) { c->cputype = CPU_ALCHEMY; __cpu_name[cpu] = "Au1300"; /* following stuff is not for Alchemy */ @@ -905,7 +921,7 @@ static inline void cpu_probe_netlogic(struct cpuinfo_mips *c, int cpu) MIPS_CPU_EJTAG | MIPS_CPU_LLSC); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_NETLOGIC_XLP2XX: c->cputype = CPU_XLP; __cpu_name[cpu] = "Broadcom XLPII"; @@ -984,7 +1000,7 @@ void cpu_probe(void) c->cputype = CPU_UNKNOWN; c->processor_id = read_c0_prid(); - switch (c->processor_id & 0xff0000) { + switch (c->processor_id & PRID_COMP_MASK) { case PRID_COMP_LEGACY: cpu_probe_legacy(c, cpu); break; diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c index 42f8875d2444..f7991d95bff9 100644 --- a/arch/mips/kernel/idle.c +++ b/arch/mips/kernel/idle.c @@ -18,6 +18,7 @@ #include <linux/sched.h> #include <asm/cpu.h> #include <asm/cpu-info.h> +#include <asm/cpu-type.h> #include <asm/idle.h> #include <asm/mipsregs.h> @@ -136,7 +137,7 @@ void __init check_wait(void) return; } - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_R3081: case CPU_R3081E: cpu_wait = r3081_wait; diff --git a/arch/mips/kernel/octeon_switch.S b/arch/mips/kernel/octeon_switch.S index 4204d76af854..029e002a4ea0 100644 --- a/arch/mips/kernel/octeon_switch.S +++ b/arch/mips/kernel/octeon_switch.S @@ -73,7 +73,7 @@ 3: #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP) - PTR_L t8, __stack_chk_guard + PTR_LA t8, __stack_chk_guard LONG_L t9, TASK_STACK_CANARY(a1) LONG_S t9, 0(t8) #endif diff --git a/arch/mips/kernel/r2300_switch.S b/arch/mips/kernel/r2300_switch.S index 38af83f84c4a..20b7b040e76f 100644 --- a/arch/mips/kernel/r2300_switch.S +++ b/arch/mips/kernel/r2300_switch.S @@ -67,7 +67,7 @@ LEAF(resume) 1: #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP) - PTR_L t8, __stack_chk_guard + PTR_LA t8, __stack_chk_guard LONG_L t9, TASK_STACK_CANARY(a1) LONG_S t9, 0(t8) #endif diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S index 921238a6bd26..078de5eaca8f 100644 --- a/arch/mips/kernel/r4k_switch.S +++ b/arch/mips/kernel/r4k_switch.S @@ -69,7 +69,7 @@ 1: #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP) - PTR_L t8, __stack_chk_guard + PTR_LA t8, __stack_chk_guard LONG_L t9, TASK_STACK_CANARY(a1) LONG_S t9, 0(t8) #endif diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c index 364d26ae4215..dcb8e5d3bb8a 100644 --- a/arch/mips/kernel/time.c +++ b/arch/mips/kernel/time.c @@ -24,6 +24,7 @@ #include <linux/export.h> #include <asm/cpu-features.h> +#include <asm/cpu-type.h> #include <asm/div64.h> #include <asm/smtc_ipi.h> #include <asm/time.h> diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index aec3408edd4b..524841f02803 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -39,6 +39,7 @@ #include <asm/break.h> #include <asm/cop2.h> #include <asm/cpu.h> +#include <asm/cpu-type.h> #include <asm/dsp.h> #include <asm/fpu.h> #include <asm/fpu_emulator.h> @@ -622,7 +623,7 @@ static int simulate_rdhwr(struct pt_regs *regs, int rd, int rt) regs->regs[rt] = read_c0_count(); return 0; case 3: /* Count register resolution */ - switch (current_cpu_data.cputype) { + switch (current_cpu_type()) { case CPU_20KC: case CPU_25KF: regs->regs[rt] = 1; diff --git a/arch/mips/mm/c-octeon.c b/arch/mips/mm/c-octeon.c index 729e7702b1de..c8efdb5b6ee0 100644 --- a/arch/mips/mm/c-octeon.c +++ b/arch/mips/mm/c-octeon.c @@ -19,6 +19,7 @@ #include <asm/bootinfo.h> #include <asm/cacheops.h> #include <asm/cpu-features.h> +#include <asm/cpu-type.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/r4kcache.h> @@ -186,9 +187,10 @@ static void probe_octeon(void) unsigned long dcache_size; unsigned int config1; struct cpuinfo_mips *c = ¤t_cpu_data; + int cputype = current_cpu_type(); config1 = read_c0_config1(); - switch (c->cputype) { + switch (cputype) { case CPU_CAVIUM_OCTEON: case CPU_CAVIUM_OCTEON_PLUS: c->icache.linesz = 2 << ((config1 >> 19) & 7); @@ -199,7 +201,7 @@ static void probe_octeon(void) c->icache.sets * c->icache.ways * c->icache.linesz; c->icache.waybit = ffs(icache_size / c->icache.ways) - 1; c->dcache.linesz = 128; - if (c->cputype == CPU_CAVIUM_OCTEON_PLUS) + if (cputype == CPU_CAVIUM_OCTEON_PLUS) c->dcache.sets = 2; /* CN5XXX has two Dcache sets */ else c->dcache.sets = 1; /* CN3XXX has one Dcache set */ diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index f749f687ee87..bc6f96fcb529 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -12,6 +12,7 @@ #include <linux/highmem.h> #include <linux/kernel.h> #include <linux/linkage.h> +#include <linux/preempt.h> #include <linux/sched.h> #include <linux/smp.h> #include <linux/mm.h> @@ -24,6 +25,7 @@ #include <asm/cacheops.h> #include <asm/cpu.h> #include <asm/cpu-features.h> +#include <asm/cpu-type.h> #include <asm/io.h> #include <asm/page.h> #include <asm/pgtable.h> @@ -601,11 +603,13 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) /* Catch bad driver code */ BUG_ON(size == 0); + preempt_disable(); if (cpu_has_inclusive_pcaches) { if (size >= scache_size) r4k_blast_scache(); else blast_scache_range(addr, addr + size); + preempt_enable(); __sync(); return; } @@ -621,6 +625,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) R4600_HIT_CACHEOP_WAR_IMPL; blast_dcache_range(addr, addr + size); } + preempt_enable(); bc_wback_inv(addr, size); __sync(); @@ -631,6 +636,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) /* Catch bad driver code */ BUG_ON(size == 0); + preempt_disable(); if (cpu_has_inclusive_pcaches) { if (size >= scache_size) r4k_blast_scache(); @@ -645,6 +651,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) */ blast_inv_scache_range(addr, addr + size); } + preempt_enable(); __sync(); return; } @@ -655,6 +662,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) R4600_HIT_CACHEOP_WAR_IMPL; blast_inv_dcache_range(addr, addr + size); } + preempt_enable(); bc_inv(addr, size); __sync(); @@ -780,20 +788,30 @@ static inline void rm7k_erratum31(void) static inline void alias_74k_erratum(struct cpuinfo_mips *c) { + unsigned int imp = c->processor_id & PRID_IMP_MASK; + unsigned int rev = c->processor_id & PRID_REV_MASK; + /* * Early versions of the 74K do not update the cache tags on a * vtag miss/ptag hit which can occur in the case of KSEG0/KUSEG * aliases. In this case it is better to treat the cache as always * having aliases. */ - if ((c->processor_id & 0xff) <= PRID_REV_ENCODE_332(2, 4, 0)) - c->dcache.flags |= MIPS_CACHE_VTAG; - if ((c->processor_id & 0xff) == PRID_REV_ENCODE_332(2, 4, 0)) - write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND); - if (((c->processor_id & 0xff00) == PRID_IMP_1074K) && - ((c->processor_id & 0xff) <= PRID_REV_ENCODE_332(1, 1, 0))) { - c->dcache.flags |= MIPS_CACHE_VTAG; - write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND); + switch (imp) { + case PRID_IMP_74K: + if (rev <= PRID_REV_ENCODE_332(2, 4, 0)) + c->dcache.flags |= MIPS_CACHE_VTAG; + if (rev == PRID_REV_ENCODE_332(2, 4, 0)) + write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND); + break; + case PRID_IMP_1074K: + if (rev <= PRID_REV_ENCODE_332(1, 1, 0)) { + c->dcache.flags |= MIPS_CACHE_VTAG; + write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND); + } + break; + default: + BUG(); } } @@ -809,7 +827,7 @@ static void probe_pcache(void) unsigned long config1; unsigned int lsize; - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_R4600: /* QED style two way caches? */ case CPU_R4700: case CPU_R5000: @@ -1025,7 +1043,8 @@ static void probe_pcache(void) * presumably no vendor is shipping his hardware in the "bad" * configuration. */ - if ((prid & 0xff00) == PRID_IMP_R4000 && (prid & 0xff) < 0x40 && + if ((prid & PRID_IMP_MASK) == PRID_IMP_R4000 && + (prid & PRID_REV_MASK) < PRID_REV_R4400 && !(config & CONF_SC) && c->icache.linesz != 16 && PAGE_SIZE <= 0x8000) panic("Improper R4000SC processor configuration detected"); @@ -1045,7 +1064,7 @@ static void probe_pcache(void) * normally they'd suffer from aliases but magic in the hardware deals * with that for us so we don't need to take care ourselves. */ - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_20KC: case CPU_25KF: case CPU_SB1: @@ -1065,7 +1084,7 @@ static void probe_pcache(void) case CPU_34K: case CPU_74K: case CPU_1004K: - if (c->cputype == CPU_74K) + if (current_cpu_type() == CPU_74K) alias_74k_erratum(c); if ((read_c0_config7() & (1 << 16))) { /* effectively physically indexed dcache, @@ -1078,7 +1097,7 @@ static void probe_pcache(void) c->dcache.flags |= MIPS_CACHE_ALIASES; } - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_20KC: /* * Some older 20Kc chips doesn't have the 'VI' bit in @@ -1207,7 +1226,7 @@ static void setup_scache(void) * processors don't have a S-cache that would be relevant to the * Linux memory management. */ - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_R4000SC: case CPU_R4000MC: case CPU_R4400SC: @@ -1384,9 +1403,8 @@ static void r4k_cache_error_setup(void) { extern char __weak except_vec2_generic; extern char __weak except_vec2_sb1; - struct cpuinfo_mips *c = ¤t_cpu_data; - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_SB1: case CPU_SB1A: set_uncached_handler(0x100, &except_vec2_sb1, 0x80); diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index 664e523653d0..5f8b95512580 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -18,6 +18,7 @@ #include <linux/highmem.h> #include <asm/cache.h> +#include <asm/cpu-type.h> #include <asm/io.h> #include <dma-coherence.h> @@ -307,12 +308,10 @@ static void mips_dma_sync_sg_for_cpu(struct device *dev, { int i; - /* Make sure that gcc doesn't leave the empty loop body. */ - for (i = 0; i < nelems; i++, sg++) { - if (cpu_needs_post_dma_flush(dev)) + if (cpu_needs_post_dma_flush(dev)) + for (i = 0; i < nelems; i++, sg++) __dma_sync(sg_page(sg), sg->offset, sg->length, direction); - } } static void mips_dma_sync_sg_for_device(struct device *dev, @@ -320,12 +319,10 @@ static void mips_dma_sync_sg_for_device(struct device *dev, { int i; - /* Make sure that gcc doesn't leave the empty loop body. */ - for (i = 0; i < nelems; i++, sg++) { - if (!plat_device_is_coherent(dev)) + if (!plat_device_is_coherent(dev)) + for (i = 0; i < nelems; i++, sg++) __dma_sync(sg_page(sg), sg->offset, sg->length, direction); - } } int mips_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c index 218c2109a55d..cbd81d17793a 100644 --- a/arch/mips/mm/page.c +++ b/arch/mips/mm/page.c @@ -18,6 +18,7 @@ #include <asm/bugs.h> #include <asm/cacheops.h> +#include <asm/cpu-type.h> #include <asm/inst.h> #include <asm/io.h> #include <asm/page.h> diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c index 5d01392e3518..08d05aee8788 100644 --- a/arch/mips/mm/sc-mips.c +++ b/arch/mips/mm/sc-mips.c @@ -6,6 +6,7 @@ #include <linux/sched.h> #include <linux/mm.h> +#include <asm/cpu-type.h> #include <asm/mipsregs.h> #include <asm/bcache.h> #include <asm/cacheops.h> @@ -71,7 +72,7 @@ static inline int mips_sc_is_activated(struct cpuinfo_mips *c) unsigned int tmp; /* Check the bypass bit (L2B) */ - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_34K: case CPU_74K: case CPU_1004K: diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index 00b26a67a06d..bb3a5f643e97 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c @@ -16,6 +16,7 @@ #include <linux/module.h> #include <asm/cpu.h> +#include <asm/cpu-type.h> #include <asm/bootinfo.h> #include <asm/mmu_context.h> #include <asm/pgtable.h> diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 821b45175dc1..9bb3a9363b06 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -30,6 +30,7 @@ #include <linux/cache.h> #include <asm/cacheflush.h> +#include <asm/cpu-type.h> #include <asm/pgtable.h> #include <asm/war.h> #include <asm/uasm.h> diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c index 53aad4a35375..a18af5fce67e 100644 --- a/arch/mips/mti-malta/malta-time.c +++ b/arch/mips/mti-malta/malta-time.c @@ -27,6 +27,7 @@ #include <linux/timex.h> #include <linux/mc146818rtc.h> +#include <asm/cpu.h> #include <asm/mipsregs.h> #include <asm/mipsmtregs.h> #include <asm/hardirq.h> @@ -76,7 +77,7 @@ static void __init estimate_frequencies(void) #endif #if defined (CONFIG_KVM_GUEST) && defined (CONFIG_KVM_HOST_FREQ) - unsigned int prid = read_c0_prid() & 0xffff00; + unsigned int prid = read_c0_prid() & (PRID_COMP_MASK | PRID_IMP_MASK); /* * XXXKYMA: hardwire the CPU frequency to Host Freq/4 @@ -169,7 +170,7 @@ unsigned int get_c0_compare_int(void) void __init plat_time_init(void) { - unsigned int prid = read_c0_prid() & 0xffff00; + unsigned int prid = read_c0_prid() & (PRID_COMP_MASK | PRID_IMP_MASK); unsigned int freq; estimate_frequencies(); diff --git a/arch/mips/mti-sead3/sead3-time.c b/arch/mips/mti-sead3/sead3-time.c index a43ea3cc0a3b..552d26c34386 100644 --- a/arch/mips/mti-sead3/sead3-time.c +++ b/arch/mips/mti-sead3/sead3-time.c @@ -7,6 +7,7 @@ */ #include <linux/init.h> +#include <asm/cpu.h> #include <asm/setup.h> #include <asm/time.h> #include <asm/irq.h> @@ -34,7 +35,7 @@ static void __iomem *status_reg = (void __iomem *)0xbf000410; */ static unsigned int __init estimate_cpu_frequency(void) { - unsigned int prid = read_c0_prid() & 0xffff00; + unsigned int prid = read_c0_prid() & (PRID_COMP_MASK | PRID_IMP_MASK); unsigned int tick = 0; unsigned int freq; unsigned int orig; diff --git a/arch/mips/netlogic/xlr/fmn-config.c b/arch/mips/netlogic/xlr/fmn-config.c index ed3bf0e3f309..c7622c6e5f67 100644 --- a/arch/mips/netlogic/xlr/fmn-config.c +++ b/arch/mips/netlogic/xlr/fmn-config.c @@ -36,6 +36,7 @@ #include <linux/irq.h> #include <linux/interrupt.h> +#include <asm/cpu.h> #include <asm/mipsregs.h> #include <asm/netlogic/xlr/fmn.h> #include <asm/netlogic/xlr/xlr.h> @@ -187,7 +188,7 @@ void xlr_board_info_setup(void) int processor_id, num_core; num_core = hweight32(nlm_current_node()->coremask); - processor_id = read_c0_prid() & 0xff00; + processor_id = read_c0_prid() & PRID_IMP_MASK; setup_cpu_fmninfo(cpu, num_core); switch (processor_id) { diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c index 5e5424753b56..4d1736fc1955 100644 --- a/arch/mips/oprofile/common.c +++ b/arch/mips/oprofile/common.c @@ -12,6 +12,7 @@ #include <linux/oprofile.h> #include <linux/smp.h> #include <asm/cpu-info.h> +#include <asm/cpu-type.h> #include "op_impl.h" diff --git a/arch/mips/pci/pci-bcm1480.c b/arch/mips/pci/pci-bcm1480.c index 44dd5aa2e36f..5ec2a7bae02c 100644 --- a/arch/mips/pci/pci-bcm1480.c +++ b/arch/mips/pci/pci-bcm1480.c @@ -39,6 +39,7 @@ #include <linux/mm.h> #include <linux/console.h> #include <linux/tty.h> +#include <linux/vt.h> #include <asm/sibyte/bcm1480_regs.h> #include <asm/sibyte/bcm1480_scd.h> diff --git a/arch/mips/sibyte/bcm1480/setup.c b/arch/mips/sibyte/bcm1480/setup.c index 05ed92c92b69..8e2e04f77870 100644 --- a/arch/mips/sibyte/bcm1480/setup.c +++ b/arch/mips/sibyte/bcm1480/setup.c @@ -22,6 +22,7 @@ #include <linux/string.h> #include <asm/bootinfo.h> +#include <asm/cpu.h> #include <asm/mipsregs.h> #include <asm/io.h> #include <asm/sibyte/sb1250.h> @@ -119,7 +120,7 @@ void __init bcm1480_setup(void) uint64_t sys_rev; int plldiv; - sb1_pass = read_c0_prid() & 0xff; + sb1_pass = read_c0_prid() & PRID_REV_MASK; sys_rev = __raw_readq(IOADDR(A_SCD_SYSTEM_REVISION)); soc_type = SYS_SOC_TYPE(sys_rev); part_type = G_SYS_PART(sys_rev); diff --git a/arch/mips/sibyte/sb1250/setup.c b/arch/mips/sibyte/sb1250/setup.c index a14bd4cb0bc0..3c02b2a77ae9 100644 --- a/arch/mips/sibyte/sb1250/setup.c +++ b/arch/mips/sibyte/sb1250/setup.c @@ -22,6 +22,7 @@ #include <linux/string.h> #include <asm/bootinfo.h> +#include <asm/cpu.h> #include <asm/mipsregs.h> #include <asm/io.h> #include <asm/sibyte/sb1250.h> @@ -182,7 +183,7 @@ void __init sb1250_setup(void) int plldiv; int bad_config = 0; - sb1_pass = read_c0_prid() & 0xff; + sb1_pass = read_c0_prid() & PRID_REV_MASK; sys_rev = __raw_readq(IOADDR(A_SCD_SYSTEM_REVISION)); soc_type = SYS_SOC_TYPE(sys_rev); soc_pass = G_SYS_REVISION(sys_rev); diff --git a/arch/mips/sni/setup.c b/arch/mips/sni/setup.c index 5b09b3544edd..efad85c8c823 100644 --- a/arch/mips/sni/setup.c +++ b/arch/mips/sni/setup.c @@ -25,6 +25,7 @@ #endif #include <asm/bootinfo.h> +#include <asm/cpu.h> #include <asm/io.h> #include <asm/reboot.h> #include <asm/sni.h> @@ -173,7 +174,7 @@ void __init plat_mem_setup(void) system_type = "RM300-Cxx"; break; case SNI_BRD_PCI_DESKTOP: - switch (read_c0_prid() & 0xff00) { + switch (read_c0_prid() & PRID_IMP_MASK) { case PRID_IMP_R4600: case PRID_IMP_R4700: system_type = "RM200-C20"; diff --git a/arch/openrisc/include/asm/prom.h b/arch/openrisc/include/asm/prom.h index eb59bfe23e85..93c9980e1b6b 100644 --- a/arch/openrisc/include/asm/prom.h +++ b/arch/openrisc/include/asm/prom.h @@ -14,53 +14,9 @@ * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. */ - -#include <linux/of.h> /* linux/of.h gets to determine #include ordering */ - #ifndef _ASM_OPENRISC_PROM_H #define _ASM_OPENRISC_PROM_H -#ifdef __KERNEL__ -#ifndef __ASSEMBLY__ -#include <linux/types.h> -#include <asm/irq.h> -#include <linux/irqdomain.h> -#include <linux/atomic.h> -#include <linux/of_irq.h> -#include <linux/of_fdt.h> -#include <linux/of_address.h> -#include <linux/proc_fs.h> -#include <linux/platform_device.h> #define HAVE_ARCH_DEVTREE_FIXUPS -/* Other Prototypes */ -extern int early_uartlite_console(void); - -/* Parse the ibm,dma-window property of an OF node into the busno, phys and - * size parameters. - */ -void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop, - unsigned long *busno, unsigned long *phys, unsigned long *size); - -extern void kdump_move_device_tree(void); - -/* Get the MAC address */ -extern const void *of_get_mac_address(struct device_node *np); - -/** - * of_irq_map_pci - Resolve the interrupt for a PCI device - * @pdev: the device whose interrupt is to be resolved - * @out_irq: structure of_irq filled by this function - * - * This function resolves the PCI interrupt for a given PCI device. If a - * device-node exists for a given pci_dev, it will use normal OF tree - * walking. If not, it will implement standard swizzling and walk up the - * PCI tree until an device-node is found, at which point it will finish - * resolving using the OF tree walking. - */ -struct pci_dev; -extern int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq); - -#endif /* __ASSEMBLY__ */ -#endif /* __KERNEL__ */ #endif /* _ASM_OPENRISC_PROM_H */ diff --git a/arch/parisc/configs/712_defconfig b/arch/parisc/configs/712_defconfig index 0f90569b9d85..9387cc2693f6 100644 --- a/arch/parisc/configs/712_defconfig +++ b/arch/parisc/configs/712_defconfig @@ -40,6 +40,8 @@ CONFIG_IP_NF_QUEUE=m CONFIG_LLC2=m CONFIG_NET_PKTGEN=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set CONFIG_PARPORT=y diff --git a/arch/parisc/configs/a500_defconfig b/arch/parisc/configs/a500_defconfig index b647b182dacc..90025322b75e 100644 --- a/arch/parisc/configs/a500_defconfig +++ b/arch/parisc/configs/a500_defconfig @@ -79,6 +79,8 @@ CONFIG_IP_DCCP=m CONFIG_LLC2=m CONFIG_NET_PKTGEN=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set CONFIG_BLK_DEV_UMEM=m diff --git a/arch/parisc/configs/b180_defconfig b/arch/parisc/configs/b180_defconfig index e289f5bf3148..f1a0c25bef8d 100644 --- a/arch/parisc/configs/b180_defconfig +++ b/arch/parisc/configs/b180_defconfig @@ -4,6 +4,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=16 CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_BLK_DEV_INITRD=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODVERSIONS=y @@ -27,6 +28,8 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_LRO is not set CONFIG_IPV6=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y # CONFIG_PREVENT_FIRMWARE_BUILD is not set CONFIG_PARPORT=y CONFIG_PARPORT_PC=y diff --git a/arch/parisc/configs/c3000_defconfig b/arch/parisc/configs/c3000_defconfig index 311ca367b622..ec1b014952b6 100644 --- a/arch/parisc/configs/c3000_defconfig +++ b/arch/parisc/configs/c3000_defconfig @@ -5,6 +5,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=16 CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y @@ -39,6 +40,8 @@ CONFIG_NETFILTER_DEBUG=y CONFIG_IP_NF_QUEUE=m CONFIG_NET_PKTGEN=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set CONFIG_BLK_DEV_UMEM=m diff --git a/arch/parisc/configs/c8000_defconfig b/arch/parisc/configs/c8000_defconfig index f11006361297..e1c8d2015c89 100644 --- a/arch/parisc/configs/c8000_defconfig +++ b/arch/parisc/configs/c8000_defconfig @@ -62,6 +62,8 @@ CONFIG_TIPC=m CONFIG_LLC2=m CONFIG_DNS_RESOLVER=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y # CONFIG_STANDALONE is not set CONFIG_PARPORT=y CONFIG_PARPORT_PC=y diff --git a/arch/parisc/configs/default_defconfig b/arch/parisc/configs/default_defconfig index dfe88f6c95c4..ba61495e1fa4 100644 --- a/arch/parisc/configs/default_defconfig +++ b/arch/parisc/configs/default_defconfig @@ -49,6 +49,8 @@ CONFIG_INET6_ESP=y CONFIG_INET6_IPCOMP=y CONFIG_LLC2=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set CONFIG_PARPORT=y diff --git a/arch/parisc/include/asm/traps.h b/arch/parisc/include/asm/traps.h index 1945f995f2df..4736020ba5ea 100644 --- a/arch/parisc/include/asm/traps.h +++ b/arch/parisc/include/asm/traps.h @@ -6,7 +6,7 @@ struct pt_regs; /* traps.c */ void parisc_terminate(char *msg, struct pt_regs *regs, - int code, unsigned long offset); + int code, unsigned long offset) __noreturn __cold; /* mm/fault.c */ void do_page_fault(struct pt_regs *regs, unsigned long code, diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S index 37aabd772fbb..d2d58258aea6 100644 --- a/arch/parisc/kernel/head.S +++ b/arch/parisc/kernel/head.S @@ -195,6 +195,8 @@ common_stext: ldw MEM_PDC_HI(%r0),%r6 depd %r6, 31, 32, %r3 /* move to upper word */ + mfctl %cr30,%r6 /* PCX-W2 firmware bug */ + ldo PDC_PSW(%r0),%arg0 /* 21 */ ldo PDC_PSW_SET_DEFAULTS(%r0),%arg1 /* 2 */ ldo PDC_PSW_WIDE_BIT(%r0),%arg2 /* 2 */ @@ -203,6 +205,8 @@ common_stext: copy %r0,%arg3 stext_pdc_ret: + mtctl %r6,%cr30 /* restore task thread info */ + /* restore rfi target address*/ ldd TI_TASK-THREAD_SZ_ALGN(%sp), %r10 tophys_r1 %r10 diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 8a252f2d6c08..2b96602e812f 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -72,7 +72,6 @@ enum ipi_message_type { IPI_NOP=0, IPI_RESCHEDULE=1, IPI_CALL_FUNC, - IPI_CALL_FUNC_SINGLE, IPI_CPU_START, IPI_CPU_STOP, IPI_CPU_TEST @@ -164,11 +163,6 @@ ipi_interrupt(int irq, void *dev_id) generic_smp_call_function_interrupt(); break; - case IPI_CALL_FUNC_SINGLE: - smp_debug(100, KERN_DEBUG "CPU%d IPI_CALL_FUNC_SINGLE\n", this_cpu); - generic_smp_call_function_single_interrupt(); - break; - case IPI_CPU_START: smp_debug(100, KERN_DEBUG "CPU%d IPI_CPU_START\n", this_cpu); break; @@ -260,7 +254,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) void arch_send_call_function_single_ipi(int cpu) { - send_IPI_single(cpu, IPI_CALL_FUNC_SINGLE); + send_IPI_single(cpu, IPI_CALL_FUNC); } /* diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 04e47c6a4562..1cd1d0c83b6d 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -291,11 +291,6 @@ void die_if_kernel(char *str, struct pt_regs *regs, long err) do_exit(SIGSEGV); } -int syscall_ipi(int (*syscall) (struct pt_regs *), struct pt_regs *regs) -{ - return syscall(regs); -} - /* gdb uses break 4,8 */ #define GDB_BREAK_INSN 0x10004 static void handle_gdb_break(struct pt_regs *regs, int wot) @@ -805,14 +800,14 @@ void notrace handle_interruption(int code, struct pt_regs *regs) else { /* - * The kernel should never fault on its own address space. + * The kernel should never fault on its own address space, + * unless pagefault_disable() was called before. */ - if (fault_space == 0) + if (fault_space == 0 && !in_atomic()) { pdc_chassis_send_status(PDC_CHASSIS_DIRECT_PANIC); parisc_terminate("Kernel Fault", regs, code, fault_address); - } } diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c index ac4370b1ca40..b5507ec06b84 100644 --- a/arch/parisc/lib/memcpy.c +++ b/arch/parisc/lib/memcpy.c @@ -56,7 +56,7 @@ #ifdef __KERNEL__ #include <linux/module.h> #include <linux/compiler.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #define s_space "%%sr1" #define d_space "%%sr2" #else @@ -524,4 +524,17 @@ EXPORT_SYMBOL(copy_to_user); EXPORT_SYMBOL(copy_from_user); EXPORT_SYMBOL(copy_in_user); EXPORT_SYMBOL(memcpy); + +long probe_kernel_read(void *dst, const void *src, size_t size) +{ + unsigned long addr = (unsigned long)src; + + if (size < 0 || addr < PAGE_SIZE) + return -EFAULT; + + /* check for I/O space F_EXTEND(0xfff00000) access as well? */ + + return __probe_kernel_read(dst, src, size); +} + #endif diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index d10d27a720c0..0293588d5b8c 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -171,17 +171,25 @@ void do_page_fault(struct pt_regs *regs, unsigned long code, unsigned long address) { struct vm_area_struct *vma, *prev_vma; - struct task_struct *tsk = current; - struct mm_struct *mm = tsk->mm; + struct task_struct *tsk; + struct mm_struct *mm; unsigned long acc_type; int fault; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + unsigned int flags; - if (in_atomic() || !mm) + if (in_atomic()) goto no_context; + tsk = current; + mm = tsk->mm; + if (!mm) + goto no_context; + + flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; if (user_mode(regs)) flags |= FAULT_FLAG_USER; + + acc_type = parisc_acctyp(code, regs->iir); if (acc_type & VM_WRITE) flags |= FAULT_FLAG_WRITE; retry: @@ -196,8 +204,6 @@ retry: good_area: - acc_type = parisc_acctyp(code,regs->iir); - if ((vma->vm_flags & acc_type) != acc_type) goto bad_area; diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 6a15c968d214..15ca2255f438 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -74,7 +74,7 @@ src-wlib-$(CONFIG_8xx) += mpc8xx.c planetcore.c src-wlib-$(CONFIG_PPC_82xx) += pq2.c fsl-soc.c planetcore.c src-wlib-$(CONFIG_EMBEDDED6xx) += mv64x60.c mv64x60_i2c.c ugecon.c -src-plat-y := of.c +src-plat-y := of.c epapr.c src-plat-$(CONFIG_40x) += fixed-head.S ep405.c cuboot-hotfoot.c \ treeboot-walnut.c cuboot-acadia.c \ cuboot-kilauea.c simpleboot.c \ @@ -97,7 +97,7 @@ src-plat-$(CONFIG_EMBEDDED6xx) += cuboot-pq2.c cuboot-mpc7448hpc2.c \ prpmc2800.c src-plat-$(CONFIG_AMIGAONE) += cuboot-amigaone.c src-plat-$(CONFIG_PPC_PS3) += ps3-head.S ps3-hvcall.S ps3.c -src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c +src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c epapr-wrapper.c src-wlib := $(sort $(src-wlib-y)) src-plat := $(sort $(src-plat-y)) diff --git a/arch/powerpc/boot/epapr-wrapper.c b/arch/powerpc/boot/epapr-wrapper.c new file mode 100644 index 000000000000..c10191006673 --- /dev/null +++ b/arch/powerpc/boot/epapr-wrapper.c @@ -0,0 +1,9 @@ +extern void epapr_platform_init(unsigned long r3, unsigned long r4, + unsigned long r5, unsigned long r6, + unsigned long r7); + +void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7) +{ + epapr_platform_init(r3, r4, r5, r6, r7); +} diff --git a/arch/powerpc/boot/epapr.c b/arch/powerpc/boot/epapr.c index 06c1961bd124..02e91aa2194a 100644 --- a/arch/powerpc/boot/epapr.c +++ b/arch/powerpc/boot/epapr.c @@ -48,8 +48,8 @@ static void platform_fixups(void) fdt_addr, fdt_totalsize((void *)fdt_addr), ima_size); } -void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, - unsigned long r6, unsigned long r7) +void epapr_platform_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7) { epapr_magic = r6; ima_size = r7; diff --git a/arch/powerpc/boot/of.c b/arch/powerpc/boot/of.c index 61d9899aa0d0..62e2f43ec1df 100644 --- a/arch/powerpc/boot/of.c +++ b/arch/powerpc/boot/of.c @@ -26,6 +26,9 @@ static unsigned long claim_base; +void epapr_platform_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7); + static void *of_try_claim(unsigned long size) { unsigned long addr = 0; @@ -61,7 +64,7 @@ static void of_image_hdr(const void *hdr) } } -void platform_init(unsigned long a1, unsigned long a2, void *promptr) +static void of_platform_init(unsigned long a1, unsigned long a2, void *promptr) { platform_ops.image_hdr = of_image_hdr; platform_ops.malloc = of_try_claim; @@ -81,3 +84,14 @@ void platform_init(unsigned long a1, unsigned long a2, void *promptr) loader_info.initrd_size = a2; } } + +void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7) +{ + /* Detect OF vs. ePAPR boot */ + if (r5) + of_platform_init(r3, r4, (void *)r5); + else + epapr_platform_init(r3, r4, r5, r6, r7); +} + diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index 6761c746048d..cd7af841ba05 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -148,18 +148,18 @@ make_space=y case "$platform" in pseries) - platformo=$object/of.o + platformo="$object/of.o $object/epapr.o" link_address='0x4000000' ;; maple) - platformo=$object/of.o + platformo="$object/of.o $object/epapr.o" link_address='0x400000' ;; pmac|chrp) - platformo=$object/of.o + platformo="$object/of.o $object/epapr.o" ;; coff) - platformo="$object/crt0.o $object/of.o" + platformo="$object/crt0.o $object/of.o $object/epapr.o" lds=$object/zImage.coff.lds link_address='0x500000' pie= @@ -253,6 +253,7 @@ treeboot-iss4xx-mpic) platformo="$object/treeboot-iss4xx.o" ;; epapr) + platformo="$object/epapr.o $object/epapr-wrapper.o" link_address='0x20000000' pie=-pie ;; diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index 0e40843a1c6e..41f13cec8a8f 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -69,9 +69,9 @@ extern struct thread_info *softirq_ctx[NR_CPUS]; extern void irq_ctx_init(void); extern void call_do_softirq(struct thread_info *tp); -extern int call_handle_irq(int irq, void *p1, - struct thread_info *tp, void *func); +extern void call_do_irq(struct pt_regs *regs, struct thread_info *tp); extern void do_IRQ(struct pt_regs *regs); +extern void __do_irq(struct pt_regs *regs); int irq_choose_cpu(const struct cpumask *mask); diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index ae098c438f00..f016bb699b5f 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -19,7 +19,7 @@ static __always_inline bool arch_static_branch(struct static_key *key) { - asm goto("1:\n\t" + asm_volatile_goto("1:\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t" diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index e378cccfca55..ce4de5aed7b5 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -149,8 +149,6 @@ typedef struct { struct thread_struct { unsigned long ksp; /* Kernel stack pointer */ - unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */ - #ifdef CONFIG_PPC64 unsigned long ksp_vsid; #endif @@ -162,6 +160,7 @@ struct thread_struct { #endif #ifdef CONFIG_PPC32 void *pgdir; /* root of page-table tree */ + unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */ #endif #ifdef CONFIG_PPC_ADV_DEBUG_REGS /* @@ -321,7 +320,6 @@ struct thread_struct { #else #define INIT_THREAD { \ .ksp = INIT_SP, \ - .ksp_limit = INIT_SP_LIMIT, \ .regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \ .fs = KERNEL_DS, \ .fpr = {{0}}, \ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index d8958be5f31a..502c7a4e73f7 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -80,10 +80,11 @@ int main(void) DEFINE(TASKTHREADPPR, offsetof(struct task_struct, thread.ppr)); #else DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); + DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16)); + DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit)); #endif /* CONFIG_PPC64 */ DEFINE(KSP, offsetof(struct thread_struct, ksp)); - DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit)); DEFINE(PT_REGS, offsetof(struct thread_struct, regs)); #ifdef CONFIG_BOOKE DEFINE(THREAD_NORMSAVES, offsetof(struct thread_struct, normsave[0])); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 0adab06ce5c0..572bb5b95f35 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -661,7 +661,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) /* number of bytes needed for the bitmap */ sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long); - page = alloc_pages_node(nid, GFP_ATOMIC, get_order(sz)); + page = alloc_pages_node(nid, GFP_KERNEL, get_order(sz)); if (!page) panic("iommu_init_table: Can't allocate %ld bytes\n", sz); tbl->it_map = page_address(page); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index c69440cef7af..c7cb8c232d2f 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -441,50 +441,6 @@ void migrate_irqs(void) } #endif -static inline void handle_one_irq(unsigned int irq) -{ - struct thread_info *curtp, *irqtp; - unsigned long saved_sp_limit; - struct irq_desc *desc; - - desc = irq_to_desc(irq); - if (!desc) - return; - - /* Switch to the irq stack to handle this */ - curtp = current_thread_info(); - irqtp = hardirq_ctx[smp_processor_id()]; - - if (curtp == irqtp) { - /* We're already on the irq stack, just handle it */ - desc->handle_irq(irq, desc); - return; - } - - saved_sp_limit = current->thread.ksp_limit; - - irqtp->task = curtp->task; - irqtp->flags = 0; - - /* Copy the softirq bits in preempt_count so that the - * softirq checks work in the hardirq context. */ - irqtp->preempt_count = (irqtp->preempt_count & ~SOFTIRQ_MASK) | - (curtp->preempt_count & SOFTIRQ_MASK); - - current->thread.ksp_limit = (unsigned long)irqtp + - _ALIGN_UP(sizeof(struct thread_info), 16); - - call_handle_irq(irq, desc, irqtp, desc->handle_irq); - current->thread.ksp_limit = saved_sp_limit; - irqtp->task = NULL; - - /* Set any flag that may have been set on the - * alternate stack - */ - if (irqtp->flags) - set_bits(irqtp->flags, &curtp->flags); -} - static inline void check_stack_overflow(void) { #ifdef CONFIG_DEBUG_STACKOVERFLOW @@ -501,9 +457,9 @@ static inline void check_stack_overflow(void) #endif } -void do_IRQ(struct pt_regs *regs) +void __do_irq(struct pt_regs *regs) { - struct pt_regs *old_regs = set_irq_regs(regs); + struct irq_desc *desc; unsigned int irq; irq_enter(); @@ -519,18 +475,57 @@ void do_IRQ(struct pt_regs *regs) */ irq = ppc_md.get_irq(); - /* We can hard enable interrupts now */ + /* We can hard enable interrupts now to allow perf interrupts */ may_hard_irq_enable(); /* And finally process it */ - if (irq != NO_IRQ) - handle_one_irq(irq); - else + if (unlikely(irq == NO_IRQ)) __get_cpu_var(irq_stat).spurious_irqs++; + else { + desc = irq_to_desc(irq); + if (likely(desc)) + desc->handle_irq(irq, desc); + } trace_irq_exit(regs); irq_exit(); +} + +void do_IRQ(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + struct thread_info *curtp, *irqtp, *sirqtp; + + /* Switch to the irq stack to handle this */ + curtp = current_thread_info(); + irqtp = hardirq_ctx[raw_smp_processor_id()]; + sirqtp = softirq_ctx[raw_smp_processor_id()]; + + /* Already there ? */ + if (unlikely(curtp == irqtp || curtp == sirqtp)) { + __do_irq(regs); + set_irq_regs(old_regs); + return; + } + + /* Prepare the thread_info in the irq stack */ + irqtp->task = curtp->task; + irqtp->flags = 0; + + /* Copy the preempt_count so that the [soft]irq checks work. */ + irqtp->preempt_count = curtp->preempt_count; + + /* Switch stack and call */ + call_do_irq(regs, irqtp); + + /* Restore stack limit */ + irqtp->task = NULL; + + /* Copy back updates to the thread_info */ + if (irqtp->flags) + set_bits(irqtp->flags, &curtp->flags); + set_irq_regs(old_regs); } @@ -592,28 +587,22 @@ void irq_ctx_init(void) memset((void *)softirq_ctx[i], 0, THREAD_SIZE); tp = softirq_ctx[i]; tp->cpu = i; - tp->preempt_count = 0; memset((void *)hardirq_ctx[i], 0, THREAD_SIZE); tp = hardirq_ctx[i]; tp->cpu = i; - tp->preempt_count = HARDIRQ_OFFSET; } } static inline void do_softirq_onstack(void) { struct thread_info *curtp, *irqtp; - unsigned long saved_sp_limit = current->thread.ksp_limit; curtp = current_thread_info(); irqtp = softirq_ctx[smp_processor_id()]; irqtp->task = curtp->task; irqtp->flags = 0; - current->thread.ksp_limit = (unsigned long)irqtp + - _ALIGN_UP(sizeof(struct thread_info), 16); call_do_softirq(irqtp); - current->thread.ksp_limit = saved_sp_limit; irqtp->task = NULL; /* Set any flag that may have been set on the diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 777d999f563b..2b0ad9845363 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -36,26 +36,41 @@ .text +/* + * We store the saved ksp_limit in the unused part + * of the STACK_FRAME_OVERHEAD + */ _GLOBAL(call_do_softirq) mflr r0 stw r0,4(r1) + lwz r10,THREAD+KSP_LIMIT(r2) + addi r11,r3,THREAD_INFO_GAP stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) mr r1,r3 + stw r10,8(r1) + stw r11,THREAD+KSP_LIMIT(r2) bl __do_softirq + lwz r10,8(r1) lwz r1,0(r1) lwz r0,4(r1) + stw r10,THREAD+KSP_LIMIT(r2) mtlr r0 blr -_GLOBAL(call_handle_irq) +_GLOBAL(call_do_irq) mflr r0 stw r0,4(r1) - mtctr r6 - stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5) - mr r1,r5 - bctrl + lwz r10,THREAD+KSP_LIMIT(r2) + addi r11,r3,THREAD_INFO_GAP + stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) + mr r1,r4 + stw r10,8(r1) + stw r11,THREAD+KSP_LIMIT(r2) + bl __do_irq + lwz r10,8(r1) lwz r1,0(r1) lwz r0,4(r1) + stw r10,THREAD+KSP_LIMIT(r2) mtlr r0 blr diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 971d7e78aff2..e59caf874d05 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -40,14 +40,12 @@ _GLOBAL(call_do_softirq) mtlr r0 blr -_GLOBAL(call_handle_irq) - ld r8,0(r6) +_GLOBAL(call_do_irq) mflr r0 std r0,16(r1) - mtctr r8 - stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5) - mr r1,r5 - bctrl + stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) + mr r1,r4 + bl .__do_irq ld r1,0(r1) ld r0,16(r1) mtlr r0 diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 6f428da53e20..96d2fdf3aa9e 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1000,9 +1000,10 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, kregs = (struct pt_regs *) sp; sp -= STACK_FRAME_OVERHEAD; p->thread.ksp = sp; +#ifdef CONFIG_PPC32 p->thread.ksp_limit = (unsigned long)task_stack_page(p) + _ALIGN_UP(sizeof(struct thread_info), 16); - +#endif #ifdef CONFIG_HAVE_HW_BREAKPOINT p->thread.ptrace_bps[0] = NULL; #endif diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 12e656ffe60e..5fe2842e8bab 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -196,6 +196,8 @@ static int __initdata mem_reserve_cnt; static cell_t __initdata regbuf[1024]; +static bool rtas_has_query_cpu_stopped; + /* * Error results ... some OF calls will return "-1" on error, some @@ -1574,6 +1576,11 @@ static void __init prom_instantiate_rtas(void) prom_setprop(rtas_node, "/rtas", "linux,rtas-entry", &val, sizeof(val)); + /* Check if it supports "query-cpu-stopped-state" */ + if (prom_getprop(rtas_node, "query-cpu-stopped-state", + &val, sizeof(val)) != PROM_ERROR) + rtas_has_query_cpu_stopped = true; + #if defined(CONFIG_PPC_POWERNV) && defined(__BIG_ENDIAN__) /* PowerVN takeover hack */ prom_rtas_data = base; @@ -1815,6 +1822,18 @@ static void __init prom_hold_cpus(void) = (void *) LOW_ADDR(__secondary_hold_acknowledge); unsigned long secondary_hold = LOW_ADDR(__secondary_hold); + /* + * On pseries, if RTAS supports "query-cpu-stopped-state", + * we skip this stage, the CPUs will be started by the + * kernel using RTAS. + */ + if ((of_platform == PLATFORM_PSERIES || + of_platform == PLATFORM_PSERIES_LPAR) && + rtas_has_query_cpu_stopped) { + prom_printf("prom_hold_cpus: skipped\n"); + return; + } + prom_debug("prom_hold_cpus: start...\n"); prom_debug(" 1) spinloop = 0x%x\n", (unsigned long)spinloop); prom_debug(" 1) *spinloop = 0x%x\n", *spinloop); @@ -3011,6 +3030,8 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, * On non-powermacs, put all CPUs in spin-loops. * * PowerMacs use a different mechanism to spin CPUs + * + * (This must be done after instanciating RTAS) */ if (of_platform != PLATFORM_POWERMAC && of_platform != PLATFORM_OPAL) diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 27a90b99ef67..b4e667663d9b 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -17,6 +17,7 @@ #include <asm/machdep.h> #include <asm/smp.h> #include <asm/pmc.h> +#include <asm/firmware.h> #include "cacheinfo.h" @@ -179,15 +180,25 @@ SYSFS_PMCSETUP(spurr, SPRN_SPURR); SYSFS_PMCSETUP(dscr, SPRN_DSCR); SYSFS_PMCSETUP(pir, SPRN_PIR); +/* + Lets only enable read for phyp resources and + enable write when needed with a separate function. + Lets be conservative and default to pseries. +*/ static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra); static DEVICE_ATTR(spurr, 0400, show_spurr, NULL); static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr); -static DEVICE_ATTR(purr, 0600, show_purr, store_purr); +static DEVICE_ATTR(purr, 0400, show_purr, store_purr); static DEVICE_ATTR(pir, 0400, show_pir, NULL); unsigned long dscr_default = 0; EXPORT_SYMBOL(dscr_default); +static void add_write_permission_dev_attr(struct device_attribute *attr) +{ + attr->attr.mode |= 0200; +} + static ssize_t show_dscr_default(struct device *dev, struct device_attribute *attr, char *buf) { @@ -394,8 +405,11 @@ static void register_cpu_online(unsigned int cpu) if (cpu_has_feature(CPU_FTR_MMCRA)) device_create_file(s, &dev_attr_mmcra); - if (cpu_has_feature(CPU_FTR_PURR)) + if (cpu_has_feature(CPU_FTR_PURR)) { + if (!firmware_has_feature(FW_FEATURE_LPAR)) + add_write_permission_dev_attr(&dev_attr_purr); device_create_file(s, &dev_attr_purr); + } if (cpu_has_feature(CPU_FTR_SPURR)) device_create_file(s, &dev_attr_spurr); diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 7b60b9851469..cd809eaa8b5c 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -79,6 +79,11 @@ _GLOBAL(tm_abort) TABORT(R3) blr + .section ".toc","aw" +DSCR_DEFAULT: + .tc dscr_default[TC],dscr_default + + .section ".text" /* void tm_reclaim(struct thread_struct *thread, * unsigned long orig_msr, @@ -123,6 +128,7 @@ _GLOBAL(tm_reclaim) mr r15, r14 ori r15, r15, MSR_FP li r16, MSR_RI + ori r16, r16, MSR_EE /* IRQs hard off */ andc r15, r15, r16 oris r15, r15, MSR_VEC@h #ifdef CONFIG_VSX @@ -187,11 +193,18 @@ dont_backup_fp: std r1, PACATMSCRATCH(r13) ld r1, PACAR1(r13) + /* Store the PPR in r11 and reset to decent value */ + std r11, GPR11(r1) /* Temporary stash */ + mfspr r11, SPRN_PPR + HMT_MEDIUM + /* Now get some more GPRS free */ std r7, GPR7(r1) /* Temporary stash */ std r12, GPR12(r1) /* '' '' '' */ ld r12, STACK_PARAM(0)(r1) /* Param 0, thread_struct * */ + std r11, THREAD_TM_PPR(r12) /* Store PPR and free r11 */ + addi r7, r12, PT_CKPT_REGS /* Thread's ckpt_regs */ /* Make r7 look like an exception frame so that we @@ -203,15 +216,19 @@ dont_backup_fp: SAVE_GPR(0, r7) /* user r0 */ SAVE_GPR(2, r7) /* user r2 */ SAVE_4GPRS(3, r7) /* user r3-r6 */ - SAVE_4GPRS(8, r7) /* user r8-r11 */ + SAVE_GPR(8, r7) /* user r8 */ + SAVE_GPR(9, r7) /* user r9 */ + SAVE_GPR(10, r7) /* user r10 */ ld r3, PACATMSCRATCH(r13) /* user r1 */ ld r4, GPR7(r1) /* user r7 */ - ld r5, GPR12(r1) /* user r12 */ - GET_SCRATCH0(6) /* user r13 */ + ld r5, GPR11(r1) /* user r11 */ + ld r6, GPR12(r1) /* user r12 */ + GET_SCRATCH0(8) /* user r13 */ std r3, GPR1(r7) std r4, GPR7(r7) - std r5, GPR12(r7) - std r6, GPR13(r7) + std r5, GPR11(r7) + std r6, GPR12(r7) + std r8, GPR13(r7) SAVE_NVGPRS(r7) /* user r14-r31 */ @@ -234,14 +251,12 @@ dont_backup_fp: std r6, _XER(r7) - /* ******************** TAR, PPR, DSCR ********** */ + /* ******************** TAR, DSCR ********** */ mfspr r3, SPRN_TAR - mfspr r4, SPRN_PPR - mfspr r5, SPRN_DSCR + mfspr r4, SPRN_DSCR std r3, THREAD_TM_TAR(r12) - std r4, THREAD_TM_PPR(r12) - std r5, THREAD_TM_DSCR(r12) + std r4, THREAD_TM_DSCR(r12) /* MSR and flags: We don't change CRs, and we don't need to alter * MSR. @@ -258,7 +273,7 @@ dont_backup_fp: std r3, THREAD_TM_TFHAR(r12) std r4, THREAD_TM_TFIAR(r12) - /* AMR and PPR are checkpointed too, but are unsupported by Linux. */ + /* AMR is checkpointed too, but is unsupported by Linux. */ /* Restore original MSR/IRQ state & clear TM mode */ ld r14, TM_FRAME_L0(r1) /* Orig MSR */ @@ -274,6 +289,12 @@ dont_backup_fp: mtcr r4 mtlr r0 ld r2, 40(r1) + + /* Load system default DSCR */ + ld r4, DSCR_DEFAULT@toc(r2) + ld r0, 0(r4) + mtspr SPRN_DSCR, r0 + blr @@ -358,25 +379,24 @@ dont_restore_fp: restore_gprs: - /* ******************** TAR, PPR, DSCR ********** */ - ld r4, THREAD_TM_TAR(r3) - ld r5, THREAD_TM_PPR(r3) - ld r6, THREAD_TM_DSCR(r3) + /* ******************** CR,LR,CCR,MSR ********** */ + ld r4, _CTR(r7) + ld r5, _LINK(r7) + ld r6, _CCR(r7) + ld r8, _XER(r7) - mtspr SPRN_TAR, r4 - mtspr SPRN_PPR, r5 - mtspr SPRN_DSCR, r6 + mtctr r4 + mtlr r5 + mtcr r6 + mtxer r8 - /* ******************** CR,LR,CCR,MSR ********** */ - ld r3, _CTR(r7) - ld r4, _LINK(r7) - ld r5, _CCR(r7) - ld r6, _XER(r7) + /* ******************** TAR ******************** */ + ld r4, THREAD_TM_TAR(r3) + mtspr SPRN_TAR, r4 - mtctr r3 - mtlr r4 - mtcr r5 - mtxer r6 + /* Load up the PPR and DSCR in GPRs only at this stage */ + ld r5, THREAD_TM_DSCR(r3) + ld r6, THREAD_TM_PPR(r3) /* Clear the MSR RI since we are about to change R1. EE is already off */ @@ -384,19 +404,26 @@ restore_gprs: mtmsrd r4, 1 REST_4GPRS(0, r7) /* GPR0-3 */ - REST_GPR(4, r7) /* GPR4-6 */ - REST_GPR(5, r7) - REST_GPR(6, r7) + REST_GPR(4, r7) /* GPR4 */ REST_4GPRS(8, r7) /* GPR8-11 */ REST_2GPRS(12, r7) /* GPR12-13 */ REST_NVGPRS(r7) /* GPR14-31 */ - ld r7, GPR7(r7) /* GPR7 */ + /* Load up PPR and DSCR here so we don't run with user values for long + */ + mtspr SPRN_DSCR, r5 + mtspr SPRN_PPR, r6 + + REST_GPR(5, r7) /* GPR5-7 */ + REST_GPR(6, r7) + ld r7, GPR7(r7) /* Commit register state as checkpointed state: */ TRECHKPT + HMT_MEDIUM + /* Our transactional state has now changed. * * Now just get out of here. Transactional (current) state will be @@ -419,6 +446,12 @@ restore_gprs: mtcr r4 mtlr r0 ld r2, 40(r1) + + /* Load system default DSCR */ + ld r4, DSCR_DEFAULT@toc(r2) + ld r0, 0(r4) + mtspr SPRN_DSCR, r0 + blr /* ****************************************************************** */ diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 78a350670de3..d38cc08b16c7 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -1530,11 +1530,15 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, const char *cp; dn = dev->of_node; - if (!dn) - return -ENODEV; + if (!dn) { + strcat(buf, "\n"); + return strlen(buf); + } cp = of_get_property(dn, "compatible", NULL); - if (!cp) - return -ENODEV; + if (!cp) { + strcat(buf, "\n"); + return strlen(buf); + } return sprintf(buf, "vio:T%sS%s\n", vio_dev->type, cp); } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 294b7af28cdd..c71103b8a748 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1066,7 +1066,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) BEGIN_FTR_SECTION mfspr r8, SPRN_DSCR ld r7, HSTATE_DSCR(r13) - std r8, VCPU_DSCR(r7) + std r8, VCPU_DSCR(r9) mtspr SPRN_DSCR, r7 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 1c6a9d729df4..c65593abae8e 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -332,6 +332,13 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, unsigned long hva; int pfnmap = 0; int tsize = BOOK3E_PAGESZ_4K; + int ret = 0; + unsigned long mmu_seq; + struct kvm *kvm = vcpu_e500->vcpu.kvm; + + /* used to check for invalidations in progress */ + mmu_seq = kvm->mmu_notifier_seq; + smp_rmb(); /* * Translate guest physical to true physical, acquiring @@ -449,6 +456,12 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); } + spin_lock(&kvm->mmu_lock); + if (mmu_notifier_retry(kvm, mmu_seq)) { + ret = -EAGAIN; + goto out; + } + kvmppc_e500_ref_setup(ref, gtlbe, pfn); kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize, @@ -457,10 +470,13 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, /* Clear i-cache for new pages */ kvmppc_mmu_flush_icache(pfn); +out: + spin_unlock(&kvm->mmu_lock); + /* Drop refcount on page, so that mmu notifiers can clear it */ kvm_release_pfn_clean(pfn); - return 0; + return ret; } /* XXX only map the one-one case, for now use TLB0 */ diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S index 167f72555d60..57a072065057 100644 --- a/arch/powerpc/lib/checksum_64.S +++ b/arch/powerpc/lib/checksum_64.S @@ -226,19 +226,35 @@ _GLOBAL(csum_partial) blr - .macro source + .macro srcnr 100: .section __ex_table,"a" .align 3 - .llong 100b,.Lsrc_error + .llong 100b,.Lsrc_error_nr .previous .endm - .macro dest + .macro source +150: + .section __ex_table,"a" + .align 3 + .llong 150b,.Lsrc_error + .previous + .endm + + .macro dstnr 200: .section __ex_table,"a" .align 3 - .llong 200b,.Ldest_error + .llong 200b,.Ldest_error_nr + .previous + .endm + + .macro dest +250: + .section __ex_table,"a" + .align 3 + .llong 250b,.Ldest_error .previous .endm @@ -269,16 +285,16 @@ _GLOBAL(csum_partial_copy_generic) rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */ beq .Lcopy_aligned - li r7,4 - sub r6,r7,r6 + li r9,4 + sub r6,r9,r6 mtctr r6 1: -source; lhz r6,0(r3) /* align to doubleword */ +srcnr; lhz r6,0(r3) /* align to doubleword */ subi r5,r5,2 addi r3,r3,2 adde r0,r0,r6 -dest; sth r6,0(r4) +dstnr; sth r6,0(r4) addi r4,r4,2 bdnz 1b @@ -392,10 +408,10 @@ dest; std r16,56(r4) mtctr r6 3: -source; ld r6,0(r3) +srcnr; ld r6,0(r3) addi r3,r3,8 adde r0,r0,r6 -dest; std r6,0(r4) +dstnr; std r6,0(r4) addi r4,r4,8 bdnz 3b @@ -405,10 +421,10 @@ dest; std r6,0(r4) srdi. r6,r5,2 beq .Lcopy_tail_halfword -source; lwz r6,0(r3) +srcnr; lwz r6,0(r3) addi r3,r3,4 adde r0,r0,r6 -dest; stw r6,0(r4) +dstnr; stw r6,0(r4) addi r4,r4,4 subi r5,r5,4 @@ -416,10 +432,10 @@ dest; stw r6,0(r4) srdi. r6,r5,1 beq .Lcopy_tail_byte -source; lhz r6,0(r3) +srcnr; lhz r6,0(r3) addi r3,r3,2 adde r0,r0,r6 -dest; sth r6,0(r4) +dstnr; sth r6,0(r4) addi r4,r4,2 subi r5,r5,2 @@ -427,10 +443,10 @@ dest; sth r6,0(r4) andi. r6,r5,1 beq .Lcopy_finish -source; lbz r6,0(r3) +srcnr; lbz r6,0(r3) sldi r9,r6,8 /* Pad the byte out to 16 bits */ adde r0,r0,r9 -dest; stb r6,0(r4) +dstnr; stb r6,0(r4) .Lcopy_finish: addze r0,r0 /* add in final carry */ @@ -440,6 +456,11 @@ dest; stb r6,0(r4) blr .Lsrc_error: + ld r14,STK_REG(R14)(r1) + ld r15,STK_REG(R15)(r1) + ld r16,STK_REG(R16)(r1) + addi r1,r1,STACKFRAMESIZE +.Lsrc_error_nr: cmpdi 0,r7,0 beqlr li r6,-EFAULT @@ -447,6 +468,11 @@ dest; stb r6,0(r4) blr .Ldest_error: + ld r14,STK_REG(R14)(r1) + ld r15,STK_REG(R15)(r1) + ld r16,STK_REG(R16)(r1) + addi r1,r1,STACKFRAMESIZE +.Ldest_error_nr: cmpdi 0,r8,0 beqlr li r6,-EFAULT diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index a7ee978fb860..b1faa1593c90 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1505,6 +1505,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) */ if ((ra == 1) && !(regs->msr & MSR_PR) \ && (val3 >= (regs->gpr[1] - STACK_INT_FRAME_SIZE))) { +#ifdef CONFIG_PPC32 /* * Check if we will touch kernel sack overflow */ @@ -1513,7 +1514,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) err = -EINVAL; break; } - +#endif /* CONFIG_PPC32 */ /* * Check if we already set since that means we'll * lose the previous value. diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index d0cd9e4c6837..8ed035d2edb5 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -300,5 +300,9 @@ void vmemmap_free(unsigned long start, unsigned long end) { } +void register_page_bootmem_memmap(unsigned long section_nr, + struct page *start_page, unsigned long size) +{ +} #endif /* CONFIG_SPARSEMEM_VMEMMAP */ diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 1cf9c5b67f24..3fa93dc7fe75 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -297,12 +297,21 @@ void __init paging_init(void) } #endif /* ! CONFIG_NEED_MULTIPLE_NODES */ +static void __init register_page_bootmem_info(void) +{ + int i; + + for_each_online_node(i) + register_page_bootmem_info_node(NODE_DATA(i)); +} + void __init mem_init(void) { #ifdef CONFIG_SWIOTLB swiotlb_init(0); #endif + register_page_bootmem_info(); high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); set_max_mapnr(max_pfn); free_all_bootmem(); diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index bf56e33f8257..2345bdb4d917 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -691,4 +691,5 @@ void bpf_jit_free(struct sk_filter *fp) { if (fp->bpf_func != sk_run_filter) module_free(NULL, fp->bpf_func); + kfree(fp); } diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index 2ee4a707f0df..a3f7abd2f13f 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -199,6 +199,7 @@ #define MMCR1_UNIT_SHIFT(pmc) (60 - (4 * ((pmc) - 1))) #define MMCR1_COMBINE_SHIFT(pmc) (35 - ((pmc) - 1)) #define MMCR1_PMCSEL_SHIFT(pmc) (24 - (((pmc) - 1)) * 8) +#define MMCR1_FAB_SHIFT 36 #define MMCR1_DC_QUAL_SHIFT 47 #define MMCR1_IC_QUAL_SHIFT 46 @@ -388,8 +389,8 @@ static int power8_compute_mmcr(u64 event[], int n_ev, * the threshold bits are used for the match value. */ if (event_is_fab_match(event[i])) { - mmcr1 |= (event[i] >> EVENT_THR_CTL_SHIFT) & - EVENT_THR_CTL_MASK; + mmcr1 |= ((event[i] >> EVENT_THR_CTL_SHIFT) & + EVENT_THR_CTL_MASK) << MMCR1_FAB_SHIFT; } else { val = (event[i] >> EVENT_THR_CTL_SHIFT) & EVENT_THR_CTL_MASK; mmcra |= val << MMCRA_THR_CTL_SHIFT; diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 1c1771a40250..24f58cb0a543 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -233,18 +233,24 @@ static void __init smp_init_pseries(void) alloc_bootmem_cpumask_var(&of_spin_mask); - /* Mark threads which are still spinning in hold loops. */ - if (cpu_has_feature(CPU_FTR_SMT)) { - for_each_present_cpu(i) { - if (cpu_thread_in_core(i) == 0) - cpumask_set_cpu(i, of_spin_mask); - } - } else { - cpumask_copy(of_spin_mask, cpu_present_mask); + /* + * Mark threads which are still spinning in hold loops + * + * We know prom_init will not have started them if RTAS supports + * query-cpu-stopped-state. + */ + if (rtas_token("query-cpu-stopped-state") == RTAS_UNKNOWN_SERVICE) { + if (cpu_has_feature(CPU_FTR_SMT)) { + for_each_present_cpu(i) { + if (cpu_thread_in_core(i) == 0) + cpumask_set_cpu(i, of_spin_mask); + } + } else + cpumask_copy(of_spin_mask, cpu_present_mask); + + cpumask_clear_cpu(boot_cpuid, of_spin_mask); } - cpumask_clear_cpu(boot_cpuid, of_spin_mask); - /* Non-lpar has additional take/give timebase */ if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) { smp_ops->give_timebase = rtas_give_timebase; diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index dcc6ac2d8026..7143793859fa 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -93,6 +93,7 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_IRQ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE select ARCH_SAVE_PAGE_KEYS if HIBERNATION + select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS2 @@ -102,7 +103,6 @@ config S390 select GENERIC_TIME_VSYSCALL_OLD select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_ARCH_JUMP_LABEL if !MARCH_G5 - select HAVE_ARCH_MUTEX_CPU_RELAX select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index 6c32190dc73e..346b1c85ffb4 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -15,7 +15,7 @@ static __always_inline bool arch_static_branch(struct static_key *key) { - asm goto("0: brcl 0,0\n" + asm_volatile_goto("0: brcl 0,0\n" ".pushsection __jump_table, \"aw\"\n" ASM_ALIGN "\n" ASM_PTR " 0b, %l[label], %0\n" diff --git a/arch/s390/include/asm/mutex.h b/arch/s390/include/asm/mutex.h index 688271f5f2e4..458c1f7fbc18 100644 --- a/arch/s390/include/asm/mutex.h +++ b/arch/s390/include/asm/mutex.h @@ -7,5 +7,3 @@ */ #include <asm-generic/mutex-dec.h> - -#define arch_mutex_cpu_relax() barrier() diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 9b60a36c348d..2204400d0bd5 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -748,7 +748,9 @@ static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry) static inline void pgste_set_pte(pte_t *ptep, pte_t entry) { - if (!MACHINE_HAS_ESOP && (pte_val(entry) & _PAGE_WRITE)) { + if (!MACHINE_HAS_ESOP && + (pte_val(entry) & _PAGE_PRESENT) && + (pte_val(entry) & _PAGE_WRITE)) { /* * Without enhanced suppression-on-protection force * the dirty bit on for all writable ptes. diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 0eb37505cab1..ca7821f07260 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -198,6 +198,8 @@ static inline void cpu_relax(void) barrier(); } +#define arch_mutex_cpu_relax() barrier() + static inline void psw_set_key(unsigned int key) { asm volatile("spka 0(%0)" : : "d" (key)); diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 701fe8c59e1f..83e5d216105e 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -44,6 +44,11 @@ extern void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags); extern int arch_spin_trylock_retry(arch_spinlock_t *); extern void arch_spin_relax(arch_spinlock_t *lock); +static inline int arch_spin_value_unlocked(arch_spinlock_t lock) +{ + return lock.owner_cpu == 0; +} + static inline void arch_spin_lock(arch_spinlock_t *lp) { int old; diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 8ad8af915032..819b94d22720 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -71,30 +71,30 @@ static inline void local_tick_enable(unsigned long long comp) typedef unsigned long long cycles_t; -static inline unsigned long long get_tod_clock(void) -{ - unsigned long long clk; - -#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES - asm volatile(".insn s,0xb27c0000,%0" : "=Q" (clk) : : "cc"); -#else - asm volatile("stck %0" : "=Q" (clk) : : "cc"); -#endif - return clk; -} - static inline void get_tod_clock_ext(char *clk) { asm volatile("stcke %0" : "=Q" (*clk) : : "cc"); } -static inline unsigned long long get_tod_clock_xt(void) +static inline unsigned long long get_tod_clock(void) { unsigned char clk[16]; get_tod_clock_ext(clk); return *((unsigned long long *)&clk[1]); } +static inline unsigned long long get_tod_clock_fast(void) +{ +#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES + unsigned long long clk; + + asm volatile("stckf %0" : "=Q" (clk) : : "cc"); + return clk; +#else + return get_tod_clock(); +#endif +} + static inline cycles_t get_cycles(void) { return (cycles_t) get_tod_clock() >> 2; @@ -125,7 +125,7 @@ extern u64 sched_clock_base_cc; */ static inline unsigned long long get_tod_clock_monotonic(void) { - return get_tod_clock_xt() - sched_clock_base_cc; + return get_tod_clock() - sched_clock_base_cc; } /** diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 1389b637dae5..adaa9e9478d8 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -99,7 +99,7 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) break; } } - return err; + return err ? -EFAULT : 0; } int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) @@ -148,7 +148,7 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) break; } } - return err; + return err ? -EFAULT : 0; } static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs) diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index c84f33d51f7b..7dd21720e5b0 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -40,28 +40,26 @@ static inline void *load_real_addr(void *addr) } /* - * Copy up to one page to vmalloc or real memory + * Copy real to virtual or real memory */ -static ssize_t copy_page_real(void *buf, void *src, size_t csize) +static int copy_from_realmem(void *dest, void *src, size_t count) { - size_t size; + unsigned long size; + int rc; - if (is_vmalloc_addr(buf)) { - BUG_ON(csize >= PAGE_SIZE); - /* If buf is not page aligned, copy first part */ - size = min(roundup(__pa(buf), PAGE_SIZE) - __pa(buf), csize); - if (size) { - if (memcpy_real(load_real_addr(buf), src, size)) - return -EFAULT; - buf += size; - src += size; - } - /* Copy second part */ - size = csize - size; - return (size) ? memcpy_real(load_real_addr(buf), src, size) : 0; - } else { - return memcpy_real(buf, src, csize); - } + if (!count) + return 0; + if (!is_vmalloc_or_module_addr(dest)) + return memcpy_real(dest, src, count); + do { + size = min(count, PAGE_SIZE - (__pa(dest) & ~PAGE_MASK)); + if (memcpy_real(load_real_addr(dest), src, size)) + return -EFAULT; + count -= size; + dest += size; + src += size; + } while (count); + return 0; } /* @@ -114,7 +112,7 @@ static ssize_t copy_oldmem_page_kdump(char *buf, size_t csize, rc = copy_to_user_real((void __force __user *) buf, (void *) src, csize); else - rc = copy_page_real(buf, (void *) src, csize); + rc = copy_from_realmem(buf, (void *) src, csize); return (rc == 0) ? rc : csize; } @@ -210,7 +208,7 @@ int copy_from_oldmem(void *dest, void *src, size_t count) if (OLDMEM_BASE) { if ((unsigned long) src < OLDMEM_SIZE) { copied = min(count, OLDMEM_SIZE - (unsigned long) src); - rc = memcpy_real(dest, src + OLDMEM_BASE, copied); + rc = copy_from_realmem(dest, src + OLDMEM_BASE, copied); if (rc) return rc; } @@ -223,7 +221,7 @@ int copy_from_oldmem(void *dest, void *src, size_t count) return rc; } } - return memcpy_real(dest + copied, src + copied, count - copied); + return copy_from_realmem(dest + copied, src + copied, count - copied); } /* diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index f1279dc2e1bc..17d62fe5d7b7 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -867,7 +867,7 @@ static inline void debug_finish_entry(debug_info_t * id, debug_entry_t* active, int level, int exception) { - active->id.stck = get_tod_clock(); + active->id.stck = get_tod_clock_fast(); active->id.fields.cpuid = smp_processor_id(); active->caller = __builtin_return_address(0); active->id.fields.exception = exception; diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index cc30d1fb000c..0dc2b6d0a1ec 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -266,6 +266,7 @@ sysc_sigpending: tm __TI_flags+3(%r12),_TIF_SYSCALL jno sysc_return lm %r2,%r7,__PT_R2(%r11) # load svc arguments + l %r10,__TI_sysc_table(%r12) # 31 bit system call table xr %r8,%r8 # svc 0 returns -ENOSYS clc __PT_INT_CODE+2(2,%r11),BASED(.Lnr_syscalls+2) jnl sysc_nr_ok # invalid svc number -> do svc 0 diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 2b2188b97c6a..e5b43c97a834 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -297,6 +297,7 @@ sysc_sigpending: tm __TI_flags+7(%r12),_TIF_SYSCALL jno sysc_return lmg %r2,%r7,__PT_R2(%r11) # load svc arguments + lg %r10,__TI_sysc_table(%r12) # address of system call table lghi %r8,0 # svc 0 returns -ENOSYS llgh %r1,__PT_INT_CODE+2(%r11) # load new svc number cghi %r1,NR_syscalls diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 0ce9fb245034..d86e64eddb42 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -67,6 +67,11 @@ static int __kprobes is_prohibited_opcode(kprobe_opcode_t *insn) case 0xac: /* stnsm */ case 0xad: /* stosm */ return -EINVAL; + case 0xc6: + switch (insn[0] & 0x0f) { + case 0x00: /* exrl */ + return -EINVAL; + } } switch (insn[0]) { case 0x0101: /* pr */ @@ -180,7 +185,6 @@ static int __kprobes is_insn_relative_long(kprobe_opcode_t *insn) break; case 0xc6: switch (insn[0] & 0x0f) { - case 0x00: /* exrl */ case 0x02: /* pfdrl */ case 0x04: /* cghrl */ case 0x05: /* chrl */ diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 7f35cb33e510..7f1f7ac5cf7f 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -385,7 +385,7 @@ static int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) } if ((!rc) && (vcpu->arch.sie_block->ckc < - get_tod_clock() + vcpu->arch.sie_block->epoch)) { + get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) { if ((!psw_extint_disabled(vcpu)) && (vcpu->arch.sie_block->gcr[0] & 0x800ul)) rc = 1; @@ -425,7 +425,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) goto no_timer; } - now = get_tod_clock() + vcpu->arch.sie_block->epoch; + now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch; if (vcpu->arch.sie_block->ckc < now) { __unset_cpu_idle(vcpu); return 0; @@ -515,7 +515,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) } if ((vcpu->arch.sie_block->ckc < - get_tod_clock() + vcpu->arch.sie_block->epoch)) + get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) __try_deliver_ckc_interrupt(vcpu); if (atomic_read(&fi->active)) { diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index 57c87d7d7ede..a9f3d0042d58 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -44,7 +44,7 @@ static void __udelay_disabled(unsigned long long usecs) do { set_clock_comparator(end); vtime_stop_cpu(); - } while (get_tod_clock() < end); + } while (get_tod_clock_fast() < end); lockdep_on(); __ctl_load(cr0, 0, 0); __ctl_load(cr6, 6, 6); @@ -55,7 +55,7 @@ static void __udelay_enabled(unsigned long long usecs) { u64 clock_saved, end; - end = get_tod_clock() + (usecs << 12); + end = get_tod_clock_fast() + (usecs << 12); do { clock_saved = 0; if (end < S390_lowcore.clock_comparator) { @@ -65,7 +65,7 @@ static void __udelay_enabled(unsigned long long usecs) vtime_stop_cpu(); if (clock_saved) local_tick_enable(clock_saved); - } while (get_tod_clock() < end); + } while (get_tod_clock_fast() < end); } /* @@ -109,8 +109,8 @@ void udelay_simple(unsigned long long usecs) { u64 end; - end = get_tod_clock() + (usecs << 12); - while (get_tod_clock() < end) + end = get_tod_clock_fast() + (usecs << 12); + while (get_tod_clock_fast() < end) cpu_relax(); } @@ -120,10 +120,10 @@ void __ndelay(unsigned long long nsecs) nsecs <<= 9; do_div(nsecs, 125); - end = get_tod_clock() + nsecs; + end = get_tod_clock_fast() + nsecs; if (nsecs & ~0xfffUL) __udelay(nsecs >> 12); - while (get_tod_clock() < end) + while (get_tod_clock_fast() < end) barrier(); } EXPORT_SYMBOL(__ndelay); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 709239285869..a5df511e27a2 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -881,7 +881,9 @@ void bpf_jit_free(struct sk_filter *fp) struct bpf_binary_header *header = (void *)addr; if (fp->bpf_func == sk_run_filter) - return; + goto free_filter; set_memory_rw(addr, header->pages); module_free(NULL, header); +free_filter: + kfree(fp); } diff --git a/arch/score/Kconfig b/arch/score/Kconfig index a1be70db75fe..305f7ee1f382 100644 --- a/arch/score/Kconfig +++ b/arch/score/Kconfig @@ -2,6 +2,7 @@ menu "Machine selection" config SCORE def_bool y + select HAVE_GENERIC_HARDIRQS select GENERIC_IRQ_SHOW select GENERIC_IOMAP select GENERIC_ATOMIC64 @@ -110,3 +111,6 @@ source "security/Kconfig" source "crypto/Kconfig" source "lib/Kconfig" + +config NO_IOMEM + def_bool y diff --git a/arch/score/Makefile b/arch/score/Makefile index 974aefe86123..9e3e060290e0 100644 --- a/arch/score/Makefile +++ b/arch/score/Makefile @@ -20,8 +20,8 @@ cflags-y += -G0 -pipe -mel -mnhwloop -D__SCOREEL__ \ # KBUILD_AFLAGS += $(cflags-y) KBUILD_CFLAGS += $(cflags-y) -KBUILD_AFLAGS_MODULE += -mlong-calls -KBUILD_CFLAGS_MODULE += -mlong-calls +KBUILD_AFLAGS_MODULE += +KBUILD_CFLAGS_MODULE += LDFLAGS += --oformat elf32-littlescore LDFLAGS_vmlinux += -G0 -static -nostdlib diff --git a/arch/score/include/asm/checksum.h b/arch/score/include/asm/checksum.h index f909ac3144a4..961bd64015a8 100644 --- a/arch/score/include/asm/checksum.h +++ b/arch/score/include/asm/checksum.h @@ -184,48 +184,57 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr, __wsum sum) { __asm__ __volatile__( - ".set\tnoreorder\t\t\t# csum_ipv6_magic\n\t" - ".set\tnoat\n\t" - "addu\t%0, %5\t\t\t# proto (long in network byte order)\n\t" - "sltu\t$1, %0, %5\n\t" - "addu\t%0, $1\n\t" - "addu\t%0, %6\t\t\t# csum\n\t" - "sltu\t$1, %0, %6\n\t" - "lw\t%1, 0(%2)\t\t\t# four words source address\n\t" - "addu\t%0, $1\n\t" - "addu\t%0, %1\n\t" - "sltu\t$1, %0, %1\n\t" - "lw\t%1, 4(%2)\n\t" - "addu\t%0, $1\n\t" - "addu\t%0, %1\n\t" - "sltu\t$1, %0, %1\n\t" - "lw\t%1, 8(%2)\n\t" - "addu\t%0, $1\n\t" - "addu\t%0, %1\n\t" - "sltu\t$1, %0, %1\n\t" - "lw\t%1, 12(%2)\n\t" - "addu\t%0, $1\n\t" - "addu\t%0, %1\n\t" - "sltu\t$1, %0, %1\n\t" - "lw\t%1, 0(%3)\n\t" - "addu\t%0, $1\n\t" - "addu\t%0, %1\n\t" - "sltu\t$1, %0, %1\n\t" - "lw\t%1, 4(%3)\n\t" - "addu\t%0, $1\n\t" - "addu\t%0, %1\n\t" - "sltu\t$1, %0, %1\n\t" - "lw\t%1, 8(%3)\n\t" - "addu\t%0, $1\n\t" - "addu\t%0, %1\n\t" - "sltu\t$1, %0, %1\n\t" - "lw\t%1, 12(%3)\n\t" - "addu\t%0, $1\n\t" - "addu\t%0, %1\n\t" - "sltu\t$1, %0, %1\n\t" - "addu\t%0, $1\t\t\t# Add final carry\n\t" - ".set\tnoat\n\t" - ".set\tnoreorder" + ".set\tvolatile\t\t\t# csum_ipv6_magic\n\t" + "add\t%0, %0, %5\t\t\t# proto (long in network byte order)\n\t" + "cmp.c\t%5, %0\n\t" + "bleu 1f\n\t" + "addi\t%0, 0x1\n\t" + "1:add\t%0, %0, %6\t\t\t# csum\n\t" + "cmp.c\t%6, %0\n\t" + "lw\t%1, [%2, 0]\t\t\t# four words source address\n\t" + "bleu 1f\n\t" + "addi\t%0, 0x1\n\t" + "1:add\t%0, %0, %1\n\t" + "cmp.c\t%1, %0\n\t" + "1:lw\t%1, [%2, 4]\n\t" + "bleu 1f\n\t" + "addi\t%0, 0x1\n\t" + "1:add\t%0, %0, %1\n\t" + "cmp.c\t%1, %0\n\t" + "lw\t%1, [%2,8]\n\t" + "bleu 1f\n\t" + "addi\t%0, 0x1\n\t" + "1:add\t%0, %0, %1\n\t" + "cmp.c\t%1, %0\n\t" + "lw\t%1, [%2, 12]\n\t" + "bleu 1f\n\t" + "addi\t%0, 0x1\n\t" + "1:add\t%0, %0,%1\n\t" + "cmp.c\t%1, %0\n\t" + "lw\t%1, [%3, 0]\n\t" + "bleu 1f\n\t" + "addi\t%0, 0x1\n\t" + "1:add\t%0, %0, %1\n\t" + "cmp.c\t%1, %0\n\t" + "lw\t%1, [%3, 4]\n\t" + "bleu 1f\n\t" + "addi\t%0, 0x1\n\t" + "1:add\t%0, %0, %1\n\t" + "cmp.c\t%1, %0\n\t" + "lw\t%1, [%3, 8]\n\t" + "bleu 1f\n\t" + "addi\t%0, 0x1\n\t" + "1:add\t%0, %0, %1\n\t" + "cmp.c\t%1, %0\n\t" + "lw\t%1, [%3, 12]\n\t" + "bleu 1f\n\t" + "addi\t%0, 0x1\n\t" + "1:add\t%0, %0, %1\n\t" + "cmp.c\t%1, %0\n\t" + "bleu 1f\n\t" + "addi\t%0, 0x1\n\t" + "1:\n\t" + ".set\toptimize" : "=r" (sum), "=r" (proto) : "r" (saddr), "r" (daddr), "0" (htonl(len)), "1" (htonl(proto)), "r" (sum)); diff --git a/arch/score/include/asm/io.h b/arch/score/include/asm/io.h index fbbfd7132e3b..574c8827abe2 100644 --- a/arch/score/include/asm/io.h +++ b/arch/score/include/asm/io.h @@ -5,5 +5,4 @@ #define virt_to_bus virt_to_phys #define bus_to_virt phys_to_virt - #endif /* _ASM_SCORE_IO_H */ diff --git a/arch/score/include/asm/pgalloc.h b/arch/score/include/asm/pgalloc.h index 059a61b7071b..716b3fd1d863 100644 --- a/arch/score/include/asm/pgalloc.h +++ b/arch/score/include/asm/pgalloc.h @@ -2,7 +2,7 @@ #define _ASM_SCORE_PGALLOC_H #include <linux/mm.h> - +#include <linux/highmem.h> static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { diff --git a/arch/score/kernel/entry.S b/arch/score/kernel/entry.S index 7234ed09b7b7..befb87d30a89 100644 --- a/arch/score/kernel/entry.S +++ b/arch/score/kernel/entry.S @@ -264,7 +264,7 @@ resume_kernel: disable_irq lw r8, [r28, TI_PRE_COUNT] cmpz.c r8 - bne r8, restore_all + bne restore_all need_resched: lw r8, [r28, TI_FLAGS] andri.c r9, r8, _TIF_NEED_RESCHED @@ -415,7 +415,7 @@ ENTRY(handle_sys) sw r9, [r0, PT_EPC] cmpi.c r27, __NR_syscalls # check syscall number - bgeu illegal_syscall + bcs illegal_syscall slli r8, r27, 2 # get syscall routine la r11, sys_call_table diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c index f4c6d02421d3..a1519ad3d49d 100644 --- a/arch/score/kernel/process.c +++ b/arch/score/kernel/process.c @@ -78,8 +78,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, p->thread.reg0 = (unsigned long) childregs; if (unlikely(p->flags & PF_KTHREAD)) { memset(childregs, 0, sizeof(struct pt_regs)); - p->thread->reg12 = usp; - p->thread->reg13 = arg; + p->thread.reg12 = usp; + p->thread.reg13 = arg; p->thread.reg3 = (unsigned long) ret_from_kernel_thread; } else { *childregs = *current_pt_regs(); diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 2137ad667438..78c4fdb91bc5 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -506,12 +506,17 @@ config SUN_OPENPROMFS Only choose N if you know in advance that you will not need to modify OpenPROM settings on the running system. -# Makefile helper +# Makefile helpers config SPARC64_PCI bool default y depends on SPARC64 && PCI +config SPARC64_PCI_MSI + bool + default y + depends on SPARC64_PCI && PCI_MSI + endmenu menu "Executable file formats" diff --git a/arch/sparc/include/asm/floppy_64.h b/arch/sparc/include/asm/floppy_64.h index e204f902e6c9..7c90c50c200d 100644 --- a/arch/sparc/include/asm/floppy_64.h +++ b/arch/sparc/include/asm/floppy_64.h @@ -254,7 +254,7 @@ static int sun_fd_request_irq(void) once = 1; error = request_irq(FLOPPY_IRQ, sparc_floppy_irq, - IRQF_DISABLED, "floppy", NULL); + 0, "floppy", NULL); return ((error == 0) ? 0 : -1); } diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h index 5080d16a832f..ec2e2e2aba7d 100644 --- a/arch/sparc/include/asm/jump_label.h +++ b/arch/sparc/include/asm/jump_label.h @@ -9,7 +9,7 @@ static __always_inline bool arch_static_branch(struct static_key *key) { - asm goto("1:\n\t" + asm_volatile_goto("1:\n\t" "nop\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index d432fb20358e..d15cc1794b0e 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -1,3 +1,4 @@ + # # Makefile for the linux kernel. # @@ -99,7 +100,7 @@ obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_SPARC64_PCI) += pci.o pci_common.o psycho_common.o obj-$(CONFIG_SPARC64_PCI) += pci_psycho.o pci_sabre.o pci_schizo.o obj-$(CONFIG_SPARC64_PCI) += pci_sun4v.o pci_sun4v_asm.o pci_fire.o -obj-$(CONFIG_PCI_MSI) += pci_msi.o +obj-$(CONFIG_SPARC64_PCI_MSI) += pci_msi.o obj-$(CONFIG_COMPAT) += sys32.o sys_sparc32.o signal32.o diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c index 62d6b153ffa2..dff60abbea01 100644 --- a/arch/sparc/kernel/ds.c +++ b/arch/sparc/kernel/ds.c @@ -849,9 +849,8 @@ void ldom_reboot(const char *boot_command) if (boot_command && strlen(boot_command)) { unsigned long len; - strcpy(full_boot_str, "boot "); - strlcpy(full_boot_str + strlen("boot "), boot_command, - sizeof(full_boot_str + strlen("boot "))); + snprintf(full_boot_str, sizeof(full_boot_str), "boot %s", + boot_command); len = strlen(full_boot_str); if (reboot_data_supported) { diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c index 54df554b82d9..e01d75d40329 100644 --- a/arch/sparc/kernel/ldc.c +++ b/arch/sparc/kernel/ldc.c @@ -1249,12 +1249,12 @@ int ldc_bind(struct ldc_channel *lp, const char *name) snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name); snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name); - err = request_irq(lp->cfg.rx_irq, ldc_rx, IRQF_DISABLED, + err = request_irq(lp->cfg.rx_irq, ldc_rx, 0, lp->rx_irq_name, lp); if (err) return err; - err = request_irq(lp->cfg.tx_irq, ldc_tx, IRQF_DISABLED, + err = request_irq(lp->cfg.tx_irq, ldc_tx, 0, lp->tx_irq_name, lp); if (err) { free_irq(lp->cfg.rx_irq, lp); diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index 9c7be59e6f5a..218b6b23c378 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -808,4 +808,5 @@ void bpf_jit_free(struct sk_filter *fp) { if (fp->bpf_func != sk_run_filter) module_free(NULL, fp->bpf_func); + kfree(fp); } diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 8a7cc663b3f8..d45a2c48f185 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -361,7 +361,7 @@ config CMDLINE_OVERRIDE config VMALLOC_RESERVE hex - default 0x1000000 + default 0x2000000 config HARDWALL bool "Hardwall support to allow access to user dynamic network" diff --git a/arch/tile/gxio/iorpc_mpipe.c b/arch/tile/gxio/iorpc_mpipe.c index 4f8f3d619c4a..e19325c4c431 100644 --- a/arch/tile/gxio/iorpc_mpipe.c +++ b/arch/tile/gxio/iorpc_mpipe.c @@ -21,7 +21,7 @@ struct alloc_buffer_stacks_param { unsigned int flags; }; -int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { @@ -45,7 +45,7 @@ struct init_buffer_stack_aux_param { unsigned int buffer_size_enum; }; -int gxio_mpipe_init_buffer_stack_aux(gxio_mpipe_context_t * context, +int gxio_mpipe_init_buffer_stack_aux(gxio_mpipe_context_t *context, void *mem_va, size_t mem_size, unsigned int mem_flags, unsigned int stack, unsigned int buffer_size_enum) @@ -80,7 +80,7 @@ struct alloc_notif_rings_param { unsigned int flags; }; -int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { @@ -102,7 +102,7 @@ struct init_notif_ring_aux_param { unsigned int ring; }; -int gxio_mpipe_init_notif_ring_aux(gxio_mpipe_context_t * context, void *mem_va, +int gxio_mpipe_init_notif_ring_aux(gxio_mpipe_context_t *context, void *mem_va, size_t mem_size, unsigned int mem_flags, unsigned int ring) { @@ -133,7 +133,7 @@ struct request_notif_ring_interrupt_param { unsigned int ring; }; -int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t * context, +int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event, unsigned int ring) @@ -158,7 +158,7 @@ struct enable_notif_ring_interrupt_param { unsigned int ring; }; -int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t * context, +int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t *context, unsigned int ring) { struct enable_notif_ring_interrupt_param temp; @@ -179,7 +179,7 @@ struct alloc_notif_groups_param { unsigned int flags; }; -int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { @@ -201,7 +201,7 @@ struct init_notif_group_param { gxio_mpipe_notif_group_bits_t bits; }; -int gxio_mpipe_init_notif_group(gxio_mpipe_context_t * context, +int gxio_mpipe_init_notif_group(gxio_mpipe_context_t *context, unsigned int group, gxio_mpipe_notif_group_bits_t bits) { @@ -223,7 +223,7 @@ struct alloc_buckets_param { unsigned int flags; }; -int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t * context, unsigned int count, +int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { struct alloc_buckets_param temp; @@ -244,7 +244,7 @@ struct init_bucket_param { MPIPE_LBL_INIT_DAT_BSTS_TBL_t bucket_info; }; -int gxio_mpipe_init_bucket(gxio_mpipe_context_t * context, unsigned int bucket, +int gxio_mpipe_init_bucket(gxio_mpipe_context_t *context, unsigned int bucket, MPIPE_LBL_INIT_DAT_BSTS_TBL_t bucket_info) { struct init_bucket_param temp; @@ -265,7 +265,7 @@ struct alloc_edma_rings_param { unsigned int flags; }; -int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { @@ -288,7 +288,7 @@ struct init_edma_ring_aux_param { unsigned int channel; }; -int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t * context, void *mem_va, +int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t *context, void *mem_va, size_t mem_size, unsigned int mem_flags, unsigned int ring, unsigned int channel) { @@ -315,7 +315,7 @@ int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t * context, void *mem_va, EXPORT_SYMBOL(gxio_mpipe_init_edma_ring_aux); -int gxio_mpipe_commit_rules(gxio_mpipe_context_t * context, const void *blob, +int gxio_mpipe_commit_rules(gxio_mpipe_context_t *context, const void *blob, size_t blob_size) { const void *params = blob; @@ -332,7 +332,7 @@ struct register_client_memory_param { unsigned int flags; }; -int gxio_mpipe_register_client_memory(gxio_mpipe_context_t * context, +int gxio_mpipe_register_client_memory(gxio_mpipe_context_t *context, unsigned int iotlb, HV_PTE pte, unsigned int flags) { @@ -355,7 +355,7 @@ struct link_open_aux_param { unsigned int flags; }; -int gxio_mpipe_link_open_aux(gxio_mpipe_context_t * context, +int gxio_mpipe_link_open_aux(gxio_mpipe_context_t *context, _gxio_mpipe_link_name_t name, unsigned int flags) { struct link_open_aux_param temp; @@ -374,7 +374,7 @@ struct link_close_aux_param { int mac; }; -int gxio_mpipe_link_close_aux(gxio_mpipe_context_t * context, int mac) +int gxio_mpipe_link_close_aux(gxio_mpipe_context_t *context, int mac) { struct link_close_aux_param temp; struct link_close_aux_param *params = &temp; @@ -393,7 +393,7 @@ struct link_set_attr_aux_param { int64_t val; }; -int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t * context, int mac, +int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t *context, int mac, uint32_t attr, int64_t val) { struct link_set_attr_aux_param temp; @@ -415,8 +415,8 @@ struct get_timestamp_aux_param { uint64_t cycles; }; -int gxio_mpipe_get_timestamp_aux(gxio_mpipe_context_t * context, uint64_t * sec, - uint64_t * nsec, uint64_t * cycles) +int gxio_mpipe_get_timestamp_aux(gxio_mpipe_context_t *context, uint64_t *sec, + uint64_t *nsec, uint64_t *cycles) { int __result; struct get_timestamp_aux_param temp; @@ -440,7 +440,7 @@ struct set_timestamp_aux_param { uint64_t cycles; }; -int gxio_mpipe_set_timestamp_aux(gxio_mpipe_context_t * context, uint64_t sec, +int gxio_mpipe_set_timestamp_aux(gxio_mpipe_context_t *context, uint64_t sec, uint64_t nsec, uint64_t cycles) { struct set_timestamp_aux_param temp; @@ -460,8 +460,7 @@ struct adjust_timestamp_aux_param { int64_t nsec; }; -int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t * context, - int64_t nsec) +int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t *context, int64_t nsec) { struct adjust_timestamp_aux_param temp; struct adjust_timestamp_aux_param *params = &temp; @@ -475,25 +474,6 @@ int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t * context, EXPORT_SYMBOL(gxio_mpipe_adjust_timestamp_aux); -struct adjust_timestamp_freq_param { - int32_t ppb; -}; - -int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t * context, - int32_t ppb) -{ - struct adjust_timestamp_freq_param temp; - struct adjust_timestamp_freq_param *params = &temp; - - params->ppb = ppb; - - return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, - sizeof(*params), - GXIO_MPIPE_OP_ADJUST_TIMESTAMP_FREQ); -} - -EXPORT_SYMBOL(gxio_mpipe_adjust_timestamp_freq); - struct config_edma_ring_blks_param { unsigned int ering; unsigned int max_blks; @@ -501,7 +481,7 @@ struct config_edma_ring_blks_param { unsigned int db; }; -int gxio_mpipe_config_edma_ring_blks(gxio_mpipe_context_t * context, +int gxio_mpipe_config_edma_ring_blks(gxio_mpipe_context_t *context, unsigned int ering, unsigned int max_blks, unsigned int min_snf_blks, unsigned int db) { @@ -520,11 +500,29 @@ int gxio_mpipe_config_edma_ring_blks(gxio_mpipe_context_t * context, EXPORT_SYMBOL(gxio_mpipe_config_edma_ring_blks); +struct adjust_timestamp_freq_param { + int32_t ppb; +}; + +int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t *context, int32_t ppb) +{ + struct adjust_timestamp_freq_param temp; + struct adjust_timestamp_freq_param *params = &temp; + + params->ppb = ppb; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), + GXIO_MPIPE_OP_ADJUST_TIMESTAMP_FREQ); +} + +EXPORT_SYMBOL(gxio_mpipe_adjust_timestamp_freq); + struct arm_pollfd_param { union iorpc_pollfd pollfd; }; -int gxio_mpipe_arm_pollfd(gxio_mpipe_context_t * context, int pollfd_cookie) +int gxio_mpipe_arm_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie) { struct arm_pollfd_param temp; struct arm_pollfd_param *params = &temp; @@ -541,7 +539,7 @@ struct close_pollfd_param { union iorpc_pollfd pollfd; }; -int gxio_mpipe_close_pollfd(gxio_mpipe_context_t * context, int pollfd_cookie) +int gxio_mpipe_close_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie) { struct close_pollfd_param temp; struct close_pollfd_param *params = &temp; @@ -558,7 +556,7 @@ struct get_mmio_base_param { HV_PTE base; }; -int gxio_mpipe_get_mmio_base(gxio_mpipe_context_t * context, HV_PTE *base) +int gxio_mpipe_get_mmio_base(gxio_mpipe_context_t *context, HV_PTE *base) { int __result; struct get_mmio_base_param temp; @@ -579,7 +577,7 @@ struct check_mmio_offset_param { unsigned long size; }; -int gxio_mpipe_check_mmio_offset(gxio_mpipe_context_t * context, +int gxio_mpipe_check_mmio_offset(gxio_mpipe_context_t *context, unsigned long offset, unsigned long size) { struct check_mmio_offset_param temp; diff --git a/arch/tile/gxio/iorpc_mpipe_info.c b/arch/tile/gxio/iorpc_mpipe_info.c index 64883aabeb9c..77019c6e9b4a 100644 --- a/arch/tile/gxio/iorpc_mpipe_info.c +++ b/arch/tile/gxio/iorpc_mpipe_info.c @@ -15,12 +15,11 @@ /* This file is machine-generated; DO NOT EDIT! */ #include "gxio/iorpc_mpipe_info.h" - struct instance_aux_param { _gxio_mpipe_link_name_t name; }; -int gxio_mpipe_info_instance_aux(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_instance_aux(gxio_mpipe_info_context_t *context, _gxio_mpipe_link_name_t name) { struct instance_aux_param temp; @@ -39,10 +38,10 @@ struct enumerate_aux_param { _gxio_mpipe_link_mac_t mac; }; -int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t *context, unsigned int idx, - _gxio_mpipe_link_name_t * name, - _gxio_mpipe_link_mac_t * mac) + _gxio_mpipe_link_name_t *name, + _gxio_mpipe_link_mac_t *mac) { int __result; struct enumerate_aux_param temp; @@ -50,7 +49,7 @@ int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t * context, __result = hv_dev_pread(context->fd, 0, (HV_VirtAddr) params, sizeof(*params), - (((uint64_t) idx << 32) | + (((uint64_t)idx << 32) | GXIO_MPIPE_INFO_OP_ENUMERATE_AUX)); *name = params->name; *mac = params->mac; @@ -64,7 +63,7 @@ struct get_mmio_base_param { HV_PTE base; }; -int gxio_mpipe_info_get_mmio_base(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_get_mmio_base(gxio_mpipe_info_context_t *context, HV_PTE *base) { int __result; @@ -86,7 +85,7 @@ struct check_mmio_offset_param { unsigned long size; }; -int gxio_mpipe_info_check_mmio_offset(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_check_mmio_offset(gxio_mpipe_info_context_t *context, unsigned long offset, unsigned long size) { struct check_mmio_offset_param temp; diff --git a/arch/tile/gxio/iorpc_trio.c b/arch/tile/gxio/iorpc_trio.c index da6e18e049c3..1d3cedb9aeb4 100644 --- a/arch/tile/gxio/iorpc_trio.c +++ b/arch/tile/gxio/iorpc_trio.c @@ -21,7 +21,7 @@ struct alloc_asids_param { unsigned int flags; }; -int gxio_trio_alloc_asids(gxio_trio_context_t * context, unsigned int count, +int gxio_trio_alloc_asids(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { struct alloc_asids_param temp; @@ -44,7 +44,7 @@ struct alloc_memory_maps_param { unsigned int flags; }; -int gxio_trio_alloc_memory_maps(gxio_trio_context_t * context, +int gxio_trio_alloc_memory_maps(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { @@ -67,7 +67,7 @@ struct alloc_scatter_queues_param { unsigned int flags; }; -int gxio_trio_alloc_scatter_queues(gxio_trio_context_t * context, +int gxio_trio_alloc_scatter_queues(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { @@ -91,7 +91,7 @@ struct alloc_pio_regions_param { unsigned int flags; }; -int gxio_trio_alloc_pio_regions(gxio_trio_context_t * context, +int gxio_trio_alloc_pio_regions(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { @@ -115,7 +115,7 @@ struct init_pio_region_aux_param { unsigned int flags; }; -int gxio_trio_init_pio_region_aux(gxio_trio_context_t * context, +int gxio_trio_init_pio_region_aux(gxio_trio_context_t *context, unsigned int pio_region, unsigned int mac, uint32_t bus_address_hi, unsigned int flags) { @@ -145,7 +145,7 @@ struct init_memory_map_mmu_aux_param { unsigned int order_mode; }; -int gxio_trio_init_memory_map_mmu_aux(gxio_trio_context_t * context, +int gxio_trio_init_memory_map_mmu_aux(gxio_trio_context_t *context, unsigned int map, unsigned long va, uint64_t size, unsigned int asid, unsigned int mac, uint64_t bus_address, @@ -175,7 +175,7 @@ struct get_port_property_param { struct pcie_trio_ports_property trio_ports; }; -int gxio_trio_get_port_property(gxio_trio_context_t * context, +int gxio_trio_get_port_property(gxio_trio_context_t *context, struct pcie_trio_ports_property *trio_ports) { int __result; @@ -198,7 +198,7 @@ struct config_legacy_intr_param { unsigned int intx; }; -int gxio_trio_config_legacy_intr(gxio_trio_context_t * context, int inter_x, +int gxio_trio_config_legacy_intr(gxio_trio_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event, unsigned int mac, unsigned int intx) { @@ -227,7 +227,7 @@ struct config_msi_intr_param { unsigned int asid; }; -int gxio_trio_config_msi_intr(gxio_trio_context_t * context, int inter_x, +int gxio_trio_config_msi_intr(gxio_trio_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event, unsigned int mac, unsigned int mem_map, uint64_t mem_map_base, uint64_t mem_map_limit, @@ -259,7 +259,7 @@ struct set_mps_mrs_param { unsigned int mac; }; -int gxio_trio_set_mps_mrs(gxio_trio_context_t * context, uint16_t mps, +int gxio_trio_set_mps_mrs(gxio_trio_context_t *context, uint16_t mps, uint16_t mrs, unsigned int mac) { struct set_mps_mrs_param temp; @@ -279,7 +279,7 @@ struct force_rc_link_up_param { unsigned int mac; }; -int gxio_trio_force_rc_link_up(gxio_trio_context_t * context, unsigned int mac) +int gxio_trio_force_rc_link_up(gxio_trio_context_t *context, unsigned int mac) { struct force_rc_link_up_param temp; struct force_rc_link_up_param *params = &temp; @@ -296,7 +296,7 @@ struct force_ep_link_up_param { unsigned int mac; }; -int gxio_trio_force_ep_link_up(gxio_trio_context_t * context, unsigned int mac) +int gxio_trio_force_ep_link_up(gxio_trio_context_t *context, unsigned int mac) { struct force_ep_link_up_param temp; struct force_ep_link_up_param *params = &temp; @@ -313,7 +313,7 @@ struct get_mmio_base_param { HV_PTE base; }; -int gxio_trio_get_mmio_base(gxio_trio_context_t * context, HV_PTE *base) +int gxio_trio_get_mmio_base(gxio_trio_context_t *context, HV_PTE *base) { int __result; struct get_mmio_base_param temp; @@ -334,7 +334,7 @@ struct check_mmio_offset_param { unsigned long size; }; -int gxio_trio_check_mmio_offset(gxio_trio_context_t * context, +int gxio_trio_check_mmio_offset(gxio_trio_context_t *context, unsigned long offset, unsigned long size) { struct check_mmio_offset_param temp; diff --git a/arch/tile/gxio/iorpc_usb_host.c b/arch/tile/gxio/iorpc_usb_host.c index cf3c3cc12204..9c820073bfc0 100644 --- a/arch/tile/gxio/iorpc_usb_host.c +++ b/arch/tile/gxio/iorpc_usb_host.c @@ -19,7 +19,7 @@ struct cfg_interrupt_param { union iorpc_interrupt interrupt; }; -int gxio_usb_host_cfg_interrupt(gxio_usb_host_context_t * context, int inter_x, +int gxio_usb_host_cfg_interrupt(gxio_usb_host_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event) { struct cfg_interrupt_param temp; @@ -41,7 +41,7 @@ struct register_client_memory_param { unsigned int flags; }; -int gxio_usb_host_register_client_memory(gxio_usb_host_context_t * context, +int gxio_usb_host_register_client_memory(gxio_usb_host_context_t *context, HV_PTE pte, unsigned int flags) { struct register_client_memory_param temp; @@ -61,7 +61,7 @@ struct get_mmio_base_param { HV_PTE base; }; -int gxio_usb_host_get_mmio_base(gxio_usb_host_context_t * context, HV_PTE *base) +int gxio_usb_host_get_mmio_base(gxio_usb_host_context_t *context, HV_PTE *base) { int __result; struct get_mmio_base_param temp; @@ -82,7 +82,7 @@ struct check_mmio_offset_param { unsigned long size; }; -int gxio_usb_host_check_mmio_offset(gxio_usb_host_context_t * context, +int gxio_usb_host_check_mmio_offset(gxio_usb_host_context_t *context, unsigned long offset, unsigned long size) { struct check_mmio_offset_param temp; diff --git a/arch/tile/gxio/usb_host.c b/arch/tile/gxio/usb_host.c index 66b002f54ecc..785afad7922e 100644 --- a/arch/tile/gxio/usb_host.c +++ b/arch/tile/gxio/usb_host.c @@ -26,7 +26,7 @@ #include <gxio/kiorpc.h> #include <gxio/usb_host.h> -int gxio_usb_host_init(gxio_usb_host_context_t * context, int usb_index, +int gxio_usb_host_init(gxio_usb_host_context_t *context, int usb_index, int is_ehci) { char file[32]; @@ -63,7 +63,7 @@ int gxio_usb_host_init(gxio_usb_host_context_t * context, int usb_index, EXPORT_SYMBOL_GPL(gxio_usb_host_init); -int gxio_usb_host_destroy(gxio_usb_host_context_t * context) +int gxio_usb_host_destroy(gxio_usb_host_context_t *context) { iounmap((void __force __iomem *)(context->mmio_base)); hv_dev_close(context->fd); @@ -76,14 +76,14 @@ int gxio_usb_host_destroy(gxio_usb_host_context_t * context) EXPORT_SYMBOL_GPL(gxio_usb_host_destroy); -void *gxio_usb_host_get_reg_start(gxio_usb_host_context_t * context) +void *gxio_usb_host_get_reg_start(gxio_usb_host_context_t *context) { return context->mmio_base; } EXPORT_SYMBOL_GPL(gxio_usb_host_get_reg_start); -size_t gxio_usb_host_get_reg_len(gxio_usb_host_context_t * context) +size_t gxio_usb_host_get_reg_len(gxio_usb_host_context_t *context) { return HV_USB_HOST_MMIO_SIZE; } diff --git a/arch/tile/include/arch/mpipe.h b/arch/tile/include/arch/mpipe.h index 8a33912fd6cc..904538e754d8 100644 --- a/arch/tile/include/arch/mpipe.h +++ b/arch/tile/include/arch/mpipe.h @@ -176,7 +176,18 @@ typedef union */ uint_reg_t stack_idx : 5; /* Reserved. */ - uint_reg_t __reserved_2 : 5; + uint_reg_t __reserved_2 : 3; + /* + * Instance ID. For devices that support automatic buffer return between + * mPIPE instances, this field indicates the buffer owner. If the INST + * field does not match the mPIPE's instance number when a packet is + * egressed, buffers with HWB set will be returned to the other mPIPE + * instance. Note that not all devices support multi-mPIPE buffer + * return. The MPIPE_EDMA_INFO.REMOTE_BUFF_RTN_SUPPORT bit indicates + * whether the INST field in the buffer descriptor is populated by iDMA + * hardware. This field is ignored on writes. + */ + uint_reg_t inst : 2; /* * Reads as one to indicate that this is a hardware managed buffer. * Ignored on writes since all buffers on a given stack are the same size. @@ -205,7 +216,8 @@ typedef union uint_reg_t c : 2; uint_reg_t size : 3; uint_reg_t hwb : 1; - uint_reg_t __reserved_2 : 5; + uint_reg_t inst : 2; + uint_reg_t __reserved_2 : 3; uint_reg_t stack_idx : 5; uint_reg_t __reserved_1 : 6; int_reg_t va : 35; @@ -231,9 +243,9 @@ typedef union /* Reserved. */ uint_reg_t __reserved_0 : 3; /* eDMA ring being accessed */ - uint_reg_t ring : 5; + uint_reg_t ring : 6; /* Reserved. */ - uint_reg_t __reserved_1 : 18; + uint_reg_t __reserved_1 : 17; /* * This field of the address selects the region (address space) to be * accessed. For the egress DMA post region, this field must be 5. @@ -250,8 +262,8 @@ typedef union uint_reg_t svc_dom : 5; uint_reg_t __reserved_2 : 6; uint_reg_t region : 3; - uint_reg_t __reserved_1 : 18; - uint_reg_t ring : 5; + uint_reg_t __reserved_1 : 17; + uint_reg_t ring : 6; uint_reg_t __reserved_0 : 3; #endif }; diff --git a/arch/tile/include/arch/mpipe_constants.h b/arch/tile/include/arch/mpipe_constants.h index 410a0400e055..84022ac5fe82 100644 --- a/arch/tile/include/arch/mpipe_constants.h +++ b/arch/tile/include/arch/mpipe_constants.h @@ -16,13 +16,13 @@ #ifndef __ARCH_MPIPE_CONSTANTS_H__ #define __ARCH_MPIPE_CONSTANTS_H__ -#define MPIPE_NUM_CLASSIFIERS 10 +#define MPIPE_NUM_CLASSIFIERS 16 #define MPIPE_CLS_MHZ 1200 -#define MPIPE_NUM_EDMA_RINGS 32 +#define MPIPE_NUM_EDMA_RINGS 64 #define MPIPE_NUM_SGMII_MACS 16 -#define MPIPE_NUM_XAUI_MACS 4 +#define MPIPE_NUM_XAUI_MACS 16 #define MPIPE_NUM_LOOPBACK_CHANNELS 4 #define MPIPE_NUM_NON_LB_CHANNELS 28 diff --git a/arch/tile/include/arch/mpipe_shm.h b/arch/tile/include/arch/mpipe_shm.h index f2e9e122818d..13b3c4300e50 100644 --- a/arch/tile/include/arch/mpipe_shm.h +++ b/arch/tile/include/arch/mpipe_shm.h @@ -44,8 +44,14 @@ typedef union * descriptors toggles each time the ring tail pointer wraps. */ uint_reg_t gen : 1; + /** + * For devices with EDMA reorder support, this field allows the + * descriptor to select the egress FIFO. The associated DMA ring must + * have ALLOW_EFIFO_SEL enabled. + */ + uint_reg_t efifo_sel : 6; /** Reserved. Must be zero. */ - uint_reg_t r0 : 7; + uint_reg_t r0 : 1; /** Checksum generation enabled for this transfer. */ uint_reg_t csum : 1; /** @@ -110,7 +116,8 @@ typedef union uint_reg_t notif : 1; uint_reg_t ns : 1; uint_reg_t csum : 1; - uint_reg_t r0 : 7; + uint_reg_t r0 : 1; + uint_reg_t efifo_sel : 6; uint_reg_t gen : 1; #endif @@ -126,14 +133,16 @@ typedef union /** Reserved. */ uint_reg_t __reserved_1 : 3; /** - * Instance ID. For devices that support more than one mPIPE instance, - * this field indicates the buffer owner. If the INST field does not - * match the mPIPE's instance number when a packet is egressed, buffers - * with HWB set will be returned to the other mPIPE instance. + * Instance ID. For devices that support automatic buffer return between + * mPIPE instances, this field indicates the buffer owner. If the INST + * field does not match the mPIPE's instance number when a packet is + * egressed, buffers with HWB set will be returned to the other mPIPE + * instance. Note that not all devices support multi-mPIPE buffer + * return. The MPIPE_EDMA_INFO.REMOTE_BUFF_RTN_SUPPORT bit indicates + * whether the INST field in the buffer descriptor is populated by iDMA + * hardware. */ - uint_reg_t inst : 1; - /** Reserved. */ - uint_reg_t __reserved_2 : 1; + uint_reg_t inst : 2; /** * Always set to one by hardware in iDMA packet descriptors. For eDMA, * indicates whether the buffer will be released to the buffer stack @@ -166,8 +175,7 @@ typedef union uint_reg_t c : 2; uint_reg_t size : 3; uint_reg_t hwb : 1; - uint_reg_t __reserved_2 : 1; - uint_reg_t inst : 1; + uint_reg_t inst : 2; uint_reg_t __reserved_1 : 3; uint_reg_t stack_idx : 5; uint_reg_t __reserved_0 : 6; @@ -408,7 +416,10 @@ typedef union /** * Sequence number applied when packet is distributed. Classifier * selects which sequence number is to be applied by writing the 13-bit - * SQN-selector into this field. + * SQN-selector into this field. For devices that support EXT_SQN (as + * indicated in IDMA_INFO.EXT_SQN_SUPPORT), the GP_SQN can be extended to + * 32-bits via the IDMA_CTL.EXT_SQN register. In this case the + * PACKET_SQN will be reduced to 32 bits. */ uint_reg_t gp_sqn : 16; /** @@ -451,14 +462,16 @@ typedef union /** Reserved. */ uint_reg_t __reserved_5 : 3; /** - * Instance ID. For devices that support more than one mPIPE instance, - * this field indicates the buffer owner. If the INST field does not - * match the mPIPE's instance number when a packet is egressed, buffers - * with HWB set will be returned to the other mPIPE instance. + * Instance ID. For devices that support automatic buffer return between + * mPIPE instances, this field indicates the buffer owner. If the INST + * field does not match the mPIPE's instance number when a packet is + * egressed, buffers with HWB set will be returned to the other mPIPE + * instance. Note that not all devices support multi-mPIPE buffer + * return. The MPIPE_EDMA_INFO.REMOTE_BUFF_RTN_SUPPORT bit indicates + * whether the INST field in the buffer descriptor is populated by iDMA + * hardware. */ - uint_reg_t inst : 1; - /** Reserved. */ - uint_reg_t __reserved_6 : 1; + uint_reg_t inst : 2; /** * Always set to one by hardware in iDMA packet descriptors. For eDMA, * indicates whether the buffer will be released to the buffer stack @@ -491,8 +504,7 @@ typedef union uint_reg_t c : 2; uint_reg_t size : 3; uint_reg_t hwb : 1; - uint_reg_t __reserved_6 : 1; - uint_reg_t inst : 1; + uint_reg_t inst : 2; uint_reg_t __reserved_5 : 3; uint_reg_t stack_idx : 5; uint_reg_t __reserved_4 : 6; diff --git a/arch/tile/include/arch/trio_constants.h b/arch/tile/include/arch/trio_constants.h index 628b045436b8..85647e91a458 100644 --- a/arch/tile/include/arch/trio_constants.h +++ b/arch/tile/include/arch/trio_constants.h @@ -16,21 +16,21 @@ #ifndef __ARCH_TRIO_CONSTANTS_H__ #define __ARCH_TRIO_CONSTANTS_H__ -#define TRIO_NUM_ASIDS 16 +#define TRIO_NUM_ASIDS 32 #define TRIO_NUM_TLBS_PER_ASID 16 #define TRIO_NUM_TPIO_REGIONS 8 #define TRIO_LOG2_NUM_TPIO_REGIONS 3 -#define TRIO_NUM_MAP_MEM_REGIONS 16 -#define TRIO_LOG2_NUM_MAP_MEM_REGIONS 4 +#define TRIO_NUM_MAP_MEM_REGIONS 32 +#define TRIO_LOG2_NUM_MAP_MEM_REGIONS 5 #define TRIO_NUM_MAP_SQ_REGIONS 8 #define TRIO_LOG2_NUM_MAP_SQ_REGIONS 3 #define TRIO_LOG2_NUM_SQ_FIFO_ENTRIES 6 -#define TRIO_NUM_PUSH_DMA_RINGS 32 +#define TRIO_NUM_PUSH_DMA_RINGS 64 -#define TRIO_NUM_PULL_DMA_RINGS 32 +#define TRIO_NUM_PULL_DMA_RINGS 64 #endif /* __ARCH_TRIO_CONSTANTS_H__ */ diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h index d385eaadece7..709798460763 100644 --- a/arch/tile/include/asm/atomic.h +++ b/arch/tile/include/asm/atomic.h @@ -166,7 +166,7 @@ static inline int atomic_cmpxchg(atomic_t *v, int o, int n) * * Atomically sets @v to @i and returns old @v */ -static inline u64 atomic64_xchg(atomic64_t *v, u64 n) +static inline long long atomic64_xchg(atomic64_t *v, long long n) { return xchg64(&v->counter, n); } @@ -180,7 +180,8 @@ static inline u64 atomic64_xchg(atomic64_t *v, u64 n) * Atomically checks if @v holds @o and replaces it with @n if so. * Returns the old value at @v. */ -static inline u64 atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n) +static inline long long atomic64_cmpxchg(atomic64_t *v, long long o, + long long n) { return cmpxchg64(&v->counter, o, n); } diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h index 0d0395b1b152..1ad4a1f7d42b 100644 --- a/arch/tile/include/asm/atomic_32.h +++ b/arch/tile/include/asm/atomic_32.h @@ -80,7 +80,7 @@ static inline void atomic_set(atomic_t *v, int n) /* A 64bit atomic type */ typedef struct { - u64 __aligned(8) counter; + long long counter; } atomic64_t; #define ATOMIC64_INIT(val) { (val) } @@ -91,14 +91,14 @@ typedef struct { * * Atomically reads the value of @v. */ -static inline u64 atomic64_read(const atomic64_t *v) +static inline long long atomic64_read(const atomic64_t *v) { /* * Requires an atomic op to read both 32-bit parts consistently. * Casting away const is safe since the atomic support routines * do not write to memory if the value has not been modified. */ - return _atomic64_xchg_add((u64 *)&v->counter, 0); + return _atomic64_xchg_add((long long *)&v->counter, 0); } /** @@ -108,7 +108,7 @@ static inline u64 atomic64_read(const atomic64_t *v) * * Atomically adds @i to @v. */ -static inline void atomic64_add(u64 i, atomic64_t *v) +static inline void atomic64_add(long long i, atomic64_t *v) { _atomic64_xchg_add(&v->counter, i); } @@ -120,7 +120,7 @@ static inline void atomic64_add(u64 i, atomic64_t *v) * * Atomically adds @i to @v and returns @i + @v */ -static inline u64 atomic64_add_return(u64 i, atomic64_t *v) +static inline long long atomic64_add_return(long long i, atomic64_t *v) { smp_mb(); /* barrier for proper semantics */ return _atomic64_xchg_add(&v->counter, i) + i; @@ -135,7 +135,8 @@ static inline u64 atomic64_add_return(u64 i, atomic64_t *v) * Atomically adds @a to @v, so long as @v was not already @u. * Returns non-zero if @v was not @u, and zero otherwise. */ -static inline u64 atomic64_add_unless(atomic64_t *v, u64 a, u64 u) +static inline long long atomic64_add_unless(atomic64_t *v, long long a, + long long u) { smp_mb(); /* barrier for proper semantics */ return _atomic64_xchg_add_unless(&v->counter, a, u) != u; @@ -151,7 +152,7 @@ static inline u64 atomic64_add_unless(atomic64_t *v, u64 a, u64 u) * atomic64_set() can't be just a raw store, since it would be lost if it * fell between the load and store of one of the other atomic ops. */ -static inline void atomic64_set(atomic64_t *v, u64 n) +static inline void atomic64_set(atomic64_t *v, long long n) { _atomic64_xchg(&v->counter, n); } @@ -236,11 +237,13 @@ extern struct __get_user __atomic_xchg_add_unless(volatile int *p, extern struct __get_user __atomic_or(volatile int *p, int *lock, int n); extern struct __get_user __atomic_andn(volatile int *p, int *lock, int n); extern struct __get_user __atomic_xor(volatile int *p, int *lock, int n); -extern u64 __atomic64_cmpxchg(volatile u64 *p, int *lock, u64 o, u64 n); -extern u64 __atomic64_xchg(volatile u64 *p, int *lock, u64 n); -extern u64 __atomic64_xchg_add(volatile u64 *p, int *lock, u64 n); -extern u64 __atomic64_xchg_add_unless(volatile u64 *p, - int *lock, u64 o, u64 n); +extern long long __atomic64_cmpxchg(volatile long long *p, int *lock, + long long o, long long n); +extern long long __atomic64_xchg(volatile long long *p, int *lock, long long n); +extern long long __atomic64_xchg_add(volatile long long *p, int *lock, + long long n); +extern long long __atomic64_xchg_add_unless(volatile long long *p, + int *lock, long long o, long long n); /* Return failure from the atomic wrappers. */ struct __get_user __atomic_bad_address(int __user *addr); diff --git a/arch/tile/include/asm/cmpxchg.h b/arch/tile/include/asm/cmpxchg.h index 4001d5eab4bb..0ccda3c425be 100644 --- a/arch/tile/include/asm/cmpxchg.h +++ b/arch/tile/include/asm/cmpxchg.h @@ -35,10 +35,10 @@ int _atomic_xchg(int *ptr, int n); int _atomic_xchg_add(int *v, int i); int _atomic_xchg_add_unless(int *v, int a, int u); int _atomic_cmpxchg(int *ptr, int o, int n); -u64 _atomic64_xchg(u64 *v, u64 n); -u64 _atomic64_xchg_add(u64 *v, u64 i); -u64 _atomic64_xchg_add_unless(u64 *v, u64 a, u64 u); -u64 _atomic64_cmpxchg(u64 *v, u64 o, u64 n); +long long _atomic64_xchg(long long *v, long long n); +long long _atomic64_xchg_add(long long *v, long long i); +long long _atomic64_xchg_add_unless(long long *v, long long a, long long u); +long long _atomic64_cmpxchg(long long *v, long long o, long long n); #define xchg(ptr, n) \ ({ \ @@ -53,7 +53,8 @@ u64 _atomic64_cmpxchg(u64 *v, u64 o, u64 n); if (sizeof(*(ptr)) != 4) \ __cmpxchg_called_with_bad_pointer(); \ smp_mb(); \ - (typeof(*(ptr)))_atomic_cmpxchg((int *)ptr, (int)o, (int)n); \ + (typeof(*(ptr)))_atomic_cmpxchg((int *)ptr, (int)o, \ + (int)n); \ }) #define xchg64(ptr, n) \ @@ -61,7 +62,8 @@ u64 _atomic64_cmpxchg(u64 *v, u64 o, u64 n); if (sizeof(*(ptr)) != 8) \ __xchg_called_with_bad_pointer(); \ smp_mb(); \ - (typeof(*(ptr)))_atomic64_xchg((u64 *)(ptr), (u64)(n)); \ + (typeof(*(ptr)))_atomic64_xchg((long long *)(ptr), \ + (long long)(n)); \ }) #define cmpxchg64(ptr, o, n) \ @@ -69,7 +71,8 @@ u64 _atomic64_cmpxchg(u64 *v, u64 o, u64 n); if (sizeof(*(ptr)) != 8) \ __cmpxchg_called_with_bad_pointer(); \ smp_mb(); \ - (typeof(*(ptr)))_atomic64_cmpxchg((u64 *)ptr, (u64)o, (u64)n); \ + (typeof(*(ptr)))_atomic64_cmpxchg((long long *)ptr, \ + (long long)o, (long long)n); \ }) #else @@ -81,10 +84,11 @@ u64 _atomic64_cmpxchg(u64 *v, u64 o, u64 n); switch (sizeof(*(ptr))) { \ case 4: \ __x = (typeof(__x))(unsigned long) \ - __insn_exch4((ptr), (u32)(unsigned long)(n)); \ + __insn_exch4((ptr), \ + (u32)(unsigned long)(n)); \ break; \ case 8: \ - __x = (typeof(__x)) \ + __x = (typeof(__x)) \ __insn_exch((ptr), (unsigned long)(n)); \ break; \ default: \ @@ -103,10 +107,12 @@ u64 _atomic64_cmpxchg(u64 *v, u64 o, u64 n); switch (sizeof(*(ptr))) { \ case 4: \ __x = (typeof(__x))(unsigned long) \ - __insn_cmpexch4((ptr), (u32)(unsigned long)(n)); \ + __insn_cmpexch4((ptr), \ + (u32)(unsigned long)(n)); \ break; \ case 8: \ - __x = (typeof(__x))__insn_cmpexch((ptr), (u64)(n)); \ + __x = (typeof(__x))__insn_cmpexch((ptr), \ + (long long)(n)); \ break; \ default: \ __cmpxchg_called_with_bad_pointer(); \ diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h index 6346888f7bdc..672768008618 100644 --- a/arch/tile/include/asm/page.h +++ b/arch/tile/include/asm/page.h @@ -182,10 +182,9 @@ static inline __attribute_const__ int get_order(unsigned long size) #define PAGE_OFFSET (-(_AC(1, UL) << (MAX_VA_WIDTH - 1))) #define KERNEL_HIGH_VADDR _AC(0xfffffff800000000, UL) /* high 32GB */ -#define FIXADDR_BASE (KERNEL_HIGH_VADDR - 0x400000000) /* 4 GB */ -#define FIXADDR_TOP (KERNEL_HIGH_VADDR - 0x300000000) /* 4 GB */ +#define FIXADDR_BASE (KERNEL_HIGH_VADDR - 0x300000000) /* 4 GB */ +#define FIXADDR_TOP (KERNEL_HIGH_VADDR - 0x200000000) /* 4 GB */ #define _VMALLOC_START FIXADDR_TOP -#define HUGE_VMAP_BASE (KERNEL_HIGH_VADDR - 0x200000000) /* 4 GB */ #define MEM_SV_START (KERNEL_HIGH_VADDR - 0x100000000) /* 256 MB */ #define MEM_MODULE_START (MEM_SV_START + (256*1024*1024)) /* 256 MB */ #define MEM_MODULE_END (MEM_MODULE_START + (256*1024*1024)) diff --git a/arch/tile/include/asm/percpu.h b/arch/tile/include/asm/percpu.h index 63294f5a8efb..4f7ae39fa202 100644 --- a/arch/tile/include/asm/percpu.h +++ b/arch/tile/include/asm/percpu.h @@ -15,9 +15,37 @@ #ifndef _ASM_TILE_PERCPU_H #define _ASM_TILE_PERCPU_H -register unsigned long __my_cpu_offset __asm__("tp"); -#define __my_cpu_offset __my_cpu_offset -#define set_my_cpu_offset(tp) (__my_cpu_offset = (tp)) +register unsigned long my_cpu_offset_reg asm("tp"); + +#ifdef CONFIG_PREEMPT +/* + * For full preemption, we can't just use the register variable + * directly, since we need barrier() to hazard against it, causing the + * compiler to reload anything computed from a previous "tp" value. + * But we also don't want to use volatile asm, since we'd like the + * compiler to be able to cache the value across multiple percpu reads. + * So we use a fake stack read as a hazard against barrier(). + * The 'U' constraint is like 'm' but disallows postincrement. + */ +static inline unsigned long __my_cpu_offset(void) +{ + unsigned long tp; + register unsigned long *sp asm("sp"); + asm("move %0, tp" : "=r" (tp) : "U" (*sp)); + return tp; +} +#define __my_cpu_offset __my_cpu_offset() +#else +/* + * We don't need to hazard against barrier() since "tp" doesn't ever + * change with PREEMPT_NONE, and with PREEMPT_VOLUNTARY it only + * changes at function call points, at which we are already re-reading + * the value of "tp" due to "my_cpu_offset_reg" being a global variable. + */ +#define __my_cpu_offset my_cpu_offset_reg +#endif + +#define set_my_cpu_offset(tp) (my_cpu_offset_reg = (tp)) #include <asm-generic/percpu.h> diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h index 63142ab3b3dd..d26a42279036 100644 --- a/arch/tile/include/asm/pgtable_32.h +++ b/arch/tile/include/asm/pgtable_32.h @@ -55,17 +55,9 @@ #define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE*LAST_PKMAP) & PGDIR_MASK) #ifdef CONFIG_HIGHMEM -# define __VMAPPING_END (PKMAP_BASE & ~(HPAGE_SIZE-1)) +# define _VMALLOC_END (PKMAP_BASE & ~(HPAGE_SIZE-1)) #else -# define __VMAPPING_END (FIXADDR_START & ~(HPAGE_SIZE-1)) -#endif - -#ifdef CONFIG_HUGEVMAP -#define HUGE_VMAP_END __VMAPPING_END -#define HUGE_VMAP_BASE (HUGE_VMAP_END - CONFIG_NR_HUGE_VMAPS * HPAGE_SIZE) -#define _VMALLOC_END HUGE_VMAP_BASE -#else -#define _VMALLOC_END __VMAPPING_END +# define _VMALLOC_END (FIXADDR_START & ~(HPAGE_SIZE-1)) #endif /* diff --git a/arch/tile/include/asm/pgtable_64.h b/arch/tile/include/asm/pgtable_64.h index 3421177f7370..2c8a9cd102d3 100644 --- a/arch/tile/include/asm/pgtable_64.h +++ b/arch/tile/include/asm/pgtable_64.h @@ -52,12 +52,10 @@ * memory allocation code). The vmalloc code puts in an internal * guard page between each allocation. */ -#define _VMALLOC_END HUGE_VMAP_BASE +#define _VMALLOC_END MEM_SV_START #define VMALLOC_END _VMALLOC_END #define VMALLOC_START _VMALLOC_START -#define HUGE_VMAP_END (HUGE_VMAP_BASE + PGDIR_SIZE) - #ifndef __ASSEMBLY__ /* We have no pud since we are a three-level page table. */ diff --git a/arch/tile/include/gxio/iorpc_mpipe.h b/arch/tile/include/gxio/iorpc_mpipe.h index fdd07f88cfd7..4cda03de734f 100644 --- a/arch/tile/include/gxio/iorpc_mpipe.h +++ b/arch/tile/include/gxio/iorpc_mpipe.h @@ -56,89 +56,89 @@ #define GXIO_MPIPE_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000) #define GXIO_MPIPE_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) -int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_mpipe_init_buffer_stack_aux(gxio_mpipe_context_t * context, +int gxio_mpipe_init_buffer_stack_aux(gxio_mpipe_context_t *context, void *mem_va, size_t mem_size, unsigned int mem_flags, unsigned int stack, unsigned int buffer_size_enum); -int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_mpipe_init_notif_ring_aux(gxio_mpipe_context_t * context, void *mem_va, +int gxio_mpipe_init_notif_ring_aux(gxio_mpipe_context_t *context, void *mem_va, size_t mem_size, unsigned int mem_flags, unsigned int ring); -int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t * context, +int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event, unsigned int ring); -int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t * context, +int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t *context, unsigned int ring); -int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_mpipe_init_notif_group(gxio_mpipe_context_t * context, +int gxio_mpipe_init_notif_group(gxio_mpipe_context_t *context, unsigned int group, gxio_mpipe_notif_group_bits_t bits); -int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t * context, unsigned int count, +int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_mpipe_init_bucket(gxio_mpipe_context_t * context, unsigned int bucket, +int gxio_mpipe_init_bucket(gxio_mpipe_context_t *context, unsigned int bucket, MPIPE_LBL_INIT_DAT_BSTS_TBL_t bucket_info); -int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t * context, void *mem_va, +int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t *context, void *mem_va, size_t mem_size, unsigned int mem_flags, unsigned int ring, unsigned int channel); -int gxio_mpipe_commit_rules(gxio_mpipe_context_t * context, const void *blob, +int gxio_mpipe_commit_rules(gxio_mpipe_context_t *context, const void *blob, size_t blob_size); -int gxio_mpipe_register_client_memory(gxio_mpipe_context_t * context, +int gxio_mpipe_register_client_memory(gxio_mpipe_context_t *context, unsigned int iotlb, HV_PTE pte, unsigned int flags); -int gxio_mpipe_link_open_aux(gxio_mpipe_context_t * context, +int gxio_mpipe_link_open_aux(gxio_mpipe_context_t *context, _gxio_mpipe_link_name_t name, unsigned int flags); -int gxio_mpipe_link_close_aux(gxio_mpipe_context_t * context, int mac); +int gxio_mpipe_link_close_aux(gxio_mpipe_context_t *context, int mac); -int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t * context, int mac, +int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t *context, int mac, uint32_t attr, int64_t val); -int gxio_mpipe_get_timestamp_aux(gxio_mpipe_context_t * context, uint64_t * sec, - uint64_t * nsec, uint64_t * cycles); +int gxio_mpipe_get_timestamp_aux(gxio_mpipe_context_t *context, uint64_t *sec, + uint64_t *nsec, uint64_t *cycles); -int gxio_mpipe_set_timestamp_aux(gxio_mpipe_context_t * context, uint64_t sec, +int gxio_mpipe_set_timestamp_aux(gxio_mpipe_context_t *context, uint64_t sec, uint64_t nsec, uint64_t cycles); -int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t * context, +int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t *context, int64_t nsec); -int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t * context, +int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t *context, int32_t ppb); -int gxio_mpipe_arm_pollfd(gxio_mpipe_context_t * context, int pollfd_cookie); +int gxio_mpipe_arm_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie); -int gxio_mpipe_close_pollfd(gxio_mpipe_context_t * context, int pollfd_cookie); +int gxio_mpipe_close_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie); -int gxio_mpipe_get_mmio_base(gxio_mpipe_context_t * context, HV_PTE *base); +int gxio_mpipe_get_mmio_base(gxio_mpipe_context_t *context, HV_PTE *base); -int gxio_mpipe_check_mmio_offset(gxio_mpipe_context_t * context, +int gxio_mpipe_check_mmio_offset(gxio_mpipe_context_t *context, unsigned long offset, unsigned long size); #endif /* !__GXIO_MPIPE_LINUX_RPC_H__ */ diff --git a/arch/tile/include/gxio/iorpc_mpipe_info.h b/arch/tile/include/gxio/iorpc_mpipe_info.h index 476c5e5ca22c..f0b04284468b 100644 --- a/arch/tile/include/gxio/iorpc_mpipe_info.h +++ b/arch/tile/include/gxio/iorpc_mpipe_info.h @@ -33,18 +33,18 @@ #define GXIO_MPIPE_INFO_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) -int gxio_mpipe_info_instance_aux(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_instance_aux(gxio_mpipe_info_context_t *context, _gxio_mpipe_link_name_t name); -int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t *context, unsigned int idx, - _gxio_mpipe_link_name_t * name, - _gxio_mpipe_link_mac_t * mac); + _gxio_mpipe_link_name_t *name, + _gxio_mpipe_link_mac_t *mac); -int gxio_mpipe_info_get_mmio_base(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_get_mmio_base(gxio_mpipe_info_context_t *context, HV_PTE *base); -int gxio_mpipe_info_check_mmio_offset(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_check_mmio_offset(gxio_mpipe_info_context_t *context, unsigned long offset, unsigned long size); #endif /* !__GXIO_MPIPE_INFO_LINUX_RPC_H__ */ diff --git a/arch/tile/include/gxio/iorpc_trio.h b/arch/tile/include/gxio/iorpc_trio.h index d95b96fd6c93..376a4f771167 100644 --- a/arch/tile/include/gxio/iorpc_trio.h +++ b/arch/tile/include/gxio/iorpc_trio.h @@ -46,59 +46,59 @@ #define GXIO_TRIO_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000) #define GXIO_TRIO_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) -int gxio_trio_alloc_asids(gxio_trio_context_t * context, unsigned int count, +int gxio_trio_alloc_asids(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_trio_alloc_memory_maps(gxio_trio_context_t * context, +int gxio_trio_alloc_memory_maps(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_trio_alloc_scatter_queues(gxio_trio_context_t * context, +int gxio_trio_alloc_scatter_queues(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_trio_alloc_pio_regions(gxio_trio_context_t * context, +int gxio_trio_alloc_pio_regions(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_trio_init_pio_region_aux(gxio_trio_context_t * context, +int gxio_trio_init_pio_region_aux(gxio_trio_context_t *context, unsigned int pio_region, unsigned int mac, uint32_t bus_address_hi, unsigned int flags); -int gxio_trio_init_memory_map_mmu_aux(gxio_trio_context_t * context, +int gxio_trio_init_memory_map_mmu_aux(gxio_trio_context_t *context, unsigned int map, unsigned long va, uint64_t size, unsigned int asid, unsigned int mac, uint64_t bus_address, unsigned int node, unsigned int order_mode); -int gxio_trio_get_port_property(gxio_trio_context_t * context, +int gxio_trio_get_port_property(gxio_trio_context_t *context, struct pcie_trio_ports_property *trio_ports); -int gxio_trio_config_legacy_intr(gxio_trio_context_t * context, int inter_x, +int gxio_trio_config_legacy_intr(gxio_trio_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event, unsigned int mac, unsigned int intx); -int gxio_trio_config_msi_intr(gxio_trio_context_t * context, int inter_x, +int gxio_trio_config_msi_intr(gxio_trio_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event, unsigned int mac, unsigned int mem_map, uint64_t mem_map_base, uint64_t mem_map_limit, unsigned int asid); -int gxio_trio_set_mps_mrs(gxio_trio_context_t * context, uint16_t mps, +int gxio_trio_set_mps_mrs(gxio_trio_context_t *context, uint16_t mps, uint16_t mrs, unsigned int mac); -int gxio_trio_force_rc_link_up(gxio_trio_context_t * context, unsigned int mac); +int gxio_trio_force_rc_link_up(gxio_trio_context_t *context, unsigned int mac); -int gxio_trio_force_ep_link_up(gxio_trio_context_t * context, unsigned int mac); +int gxio_trio_force_ep_link_up(gxio_trio_context_t *context, unsigned int mac); -int gxio_trio_get_mmio_base(gxio_trio_context_t * context, HV_PTE *base); +int gxio_trio_get_mmio_base(gxio_trio_context_t *context, HV_PTE *base); -int gxio_trio_check_mmio_offset(gxio_trio_context_t * context, +int gxio_trio_check_mmio_offset(gxio_trio_context_t *context, unsigned long offset, unsigned long size); #endif /* !__GXIO_TRIO_LINUX_RPC_H__ */ diff --git a/arch/tile/include/gxio/iorpc_usb_host.h b/arch/tile/include/gxio/iorpc_usb_host.h index 8622e7d126ad..79962a97de8e 100644 --- a/arch/tile/include/gxio/iorpc_usb_host.h +++ b/arch/tile/include/gxio/iorpc_usb_host.h @@ -31,16 +31,16 @@ #define GXIO_USB_HOST_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000) #define GXIO_USB_HOST_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) -int gxio_usb_host_cfg_interrupt(gxio_usb_host_context_t * context, int inter_x, +int gxio_usb_host_cfg_interrupt(gxio_usb_host_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event); -int gxio_usb_host_register_client_memory(gxio_usb_host_context_t * context, +int gxio_usb_host_register_client_memory(gxio_usb_host_context_t *context, HV_PTE pte, unsigned int flags); -int gxio_usb_host_get_mmio_base(gxio_usb_host_context_t * context, +int gxio_usb_host_get_mmio_base(gxio_usb_host_context_t *context, HV_PTE *base); -int gxio_usb_host_check_mmio_offset(gxio_usb_host_context_t * context, +int gxio_usb_host_check_mmio_offset(gxio_usb_host_context_t *context, unsigned long offset, unsigned long size); #endif /* !__GXIO_USB_HOST_LINUX_RPC_H__ */ diff --git a/arch/tile/include/gxio/usb_host.h b/arch/tile/include/gxio/usb_host.h index 5eedec0e988e..93c9636d2dd7 100644 --- a/arch/tile/include/gxio/usb_host.h +++ b/arch/tile/include/gxio/usb_host.h @@ -53,7 +53,7 @@ typedef struct { * @return Zero if the context was successfully initialized, else a * GXIO_ERR_xxx error code. */ -extern int gxio_usb_host_init(gxio_usb_host_context_t * context, int usb_index, +extern int gxio_usb_host_init(gxio_usb_host_context_t *context, int usb_index, int is_ehci); /* Destroy a USB context. @@ -68,20 +68,20 @@ extern int gxio_usb_host_init(gxio_usb_host_context_t * context, int usb_index, * @return Zero if the context was successfully destroyed, else a * GXIO_ERR_xxx error code. */ -extern int gxio_usb_host_destroy(gxio_usb_host_context_t * context); +extern int gxio_usb_host_destroy(gxio_usb_host_context_t *context); /* Retrieve the address of the shim's MMIO registers. * * @param context Pointer to a properly initialized gxio_usb_host_context_t. * @return The address of the shim's MMIO registers. */ -extern void *gxio_usb_host_get_reg_start(gxio_usb_host_context_t * context); +extern void *gxio_usb_host_get_reg_start(gxio_usb_host_context_t *context); /* Retrieve the length of the shim's MMIO registers. * * @param context Pointer to a properly initialized gxio_usb_host_context_t. * @return The length of the shim's MMIO registers. */ -extern size_t gxio_usb_host_get_reg_len(gxio_usb_host_context_t * context); +extern size_t gxio_usb_host_get_reg_len(gxio_usb_host_context_t *context); #endif /* _GXIO_USB_H_ */ diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c index ed378416b86a..49120843ff96 100644 --- a/arch/tile/kernel/compat.c +++ b/arch/tile/kernel/compat.c @@ -84,7 +84,7 @@ COMPAT_SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned int, offset_high, { return sys_llseek(fd, offset_high, offset_low, result, origin); } - + /* Provide the compat syscall number to call mapping. */ #undef __SYSCALL #define __SYSCALL(nr, call) [nr] = (call), diff --git a/arch/tile/kernel/futex_64.S b/arch/tile/kernel/futex_64.S deleted file mode 100644 index f465d1eda20f..000000000000 --- a/arch/tile/kernel/futex_64.S +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright 2011 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - * - * Atomically access user memory, but use MMU to avoid propagating - * kernel exceptions. - */ - -#include <linux/linkage.h> -#include <asm/errno.h> -#include <asm/futex.h> -#include <asm/page.h> -#include <asm/processor.h> - -/* - * Provide a set of atomic memory operations supporting <asm/futex.h>. - * - * r0: user address to manipulate - * r1: new value to write, or for cmpxchg, old value to compare against - * r2: (cmpxchg only) new value to write - * - * Return __get_user struct, r0 with value, r1 with error. - */ -#define FUTEX_OP(name, ...) \ -STD_ENTRY(futex_##name) \ - __VA_ARGS__; \ - { \ - move r1, zero; \ - jrp lr \ - }; \ - STD_ENDPROC(futex_##name); \ - .pushsection __ex_table,"a"; \ - .quad 1b, get_user_fault; \ - .popsection - - .pushsection .fixup,"ax" -get_user_fault: - { movei r1, -EFAULT; jrp lr } - ENDPROC(get_user_fault) - .popsection - -FUTEX_OP(cmpxchg, mtspr CMPEXCH_VALUE, r1; 1: cmpexch4 r0, r0, r2) -FUTEX_OP(set, 1: exch4 r0, r0, r1) -FUTEX_OP(add, 1: fetchadd4 r0, r0, r1) -FUTEX_OP(or, 1: fetchor4 r0, r0, r1) -FUTEX_OP(andn, nor r1, r1, zero; 1: fetchand4 r0, r0, r1) diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c index df27a1fd94a3..531f4c365351 100644 --- a/arch/tile/kernel/hardwall.c +++ b/arch/tile/kernel/hardwall.c @@ -66,7 +66,7 @@ static struct hardwall_type hardwall_types[] = { 0, "udn", LIST_HEAD_INIT(hardwall_types[HARDWALL_UDN].list), - __SPIN_LOCK_INITIALIZER(hardwall_types[HARDWALL_UDN].lock), + __SPIN_LOCK_UNLOCKED(hardwall_types[HARDWALL_UDN].lock), NULL }, #ifndef __tilepro__ @@ -77,7 +77,7 @@ static struct hardwall_type hardwall_types[] = { 1, /* disabled pending hypervisor support */ "idn", LIST_HEAD_INIT(hardwall_types[HARDWALL_IDN].list), - __SPIN_LOCK_INITIALIZER(hardwall_types[HARDWALL_IDN].lock), + __SPIN_LOCK_UNLOCKED(hardwall_types[HARDWALL_IDN].lock), NULL }, { /* access to user-space IPI */ @@ -87,7 +87,7 @@ static struct hardwall_type hardwall_types[] = { 0, "ipi", LIST_HEAD_INIT(hardwall_types[HARDWALL_IPI].list), - __SPIN_LOCK_INITIALIZER(hardwall_types[HARDWALL_IPI].lock), + __SPIN_LOCK_UNLOCKED(hardwall_types[HARDWALL_IPI].lock), NULL }, #endif diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index 088d5c141e68..2cbe6d5dd6b0 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -815,6 +815,9 @@ STD_ENTRY(interrupt_return) } bzt r28, 1f bnz r29, 1f + /* Disable interrupts explicitly for preemption. */ + IRQ_DISABLE(r20,r21) + TRACE_IRQS_OFF jal preempt_schedule_irq FEEDBACK_REENTER(interrupt_return) 1: diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index ec755d3f3734..b8fc497f2437 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -841,6 +841,9 @@ STD_ENTRY(interrupt_return) } beqzt r28, 1f bnez r29, 1f + /* Disable interrupts explicitly for preemption. */ + IRQ_DISABLE(r20,r21) + TRACE_IRQS_OFF jal preempt_schedule_irq FEEDBACK_REENTER(interrupt_return) 1: diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 4c34caea9dd3..74c91729a62a 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -1268,8 +1268,7 @@ static void __init validate_va(void) if ((long)VMALLOC_START >= 0) early_panic( "Linux VMALLOC region below the 2GB line (%#lx)!\n" - "Reconfigure the kernel with fewer NR_HUGE_VMAPS\n" - "or smaller VMALLOC_RESERVE.\n", + "Reconfigure the kernel with smaller VMALLOC_RESERVE.\n", VMALLOC_START); #endif } diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index 362284af3afd..c93977a62116 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -23,6 +23,7 @@ #include <linux/mmzone.h> #include <linux/dcache.h> #include <linux/fs.h> +#include <linux/string.h> #include <asm/backtrace.h> #include <asm/page.h> #include <asm/ucontext.h> @@ -332,21 +333,18 @@ static void describe_addr(struct KBacktraceIterator *kbt, } if (vma->vm_file) { - char *s; p = d_path(&vma->vm_file->f_path, buf, bufsize); if (IS_ERR(p)) p = "?"; - s = strrchr(p, '/'); - if (s) - p = s+1; + name = kbasename(p); } else { - p = "anon"; + name = "anon"; } /* Generate a string description of the vma info. */ - namelen = strlen(p); + namelen = strlen(name); remaining = (bufsize - 1) - namelen; - memmove(buf, p, namelen); + memmove(buf, name, namelen); snprintf(buf + namelen, remaining, "[%lx+%lx] ", vma->vm_start, vma->vm_end - vma->vm_start); } diff --git a/arch/tile/kernel/unaligned.c b/arch/tile/kernel/unaligned.c index b425fb6a480d..b030b4e78845 100644 --- a/arch/tile/kernel/unaligned.c +++ b/arch/tile/kernel/unaligned.c @@ -551,8 +551,8 @@ static tilegx_bundle_bits jit_x1_bnezt(int ra, int broff) /* * This function generates unalign fixup JIT. * - * We fist find unalign load/store instruction's destination, source - * reguisters: ra, rb and rd. and 3 scratch registers by calling + * We first find unalign load/store instruction's destination, source + * registers: ra, rb and rd. and 3 scratch registers by calling * find_regs(...). 3 scratch clobbers should not alias with any register * used in the fault bundle. Then analyze the fault bundle to determine * if it's a load or store, operand width, branch or address increment etc. diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c index 759efa337be8..c89b211fd9e7 100644 --- a/arch/tile/lib/atomic_32.c +++ b/arch/tile/lib/atomic_32.c @@ -107,19 +107,19 @@ unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask) EXPORT_SYMBOL(_atomic_xor); -u64 _atomic64_xchg(u64 *v, u64 n) +long long _atomic64_xchg(long long *v, long long n) { return __atomic64_xchg(v, __atomic_setup(v), n); } EXPORT_SYMBOL(_atomic64_xchg); -u64 _atomic64_xchg_add(u64 *v, u64 i) +long long _atomic64_xchg_add(long long *v, long long i) { return __atomic64_xchg_add(v, __atomic_setup(v), i); } EXPORT_SYMBOL(_atomic64_xchg_add); -u64 _atomic64_xchg_add_unless(u64 *v, u64 a, u64 u) +long long _atomic64_xchg_add_unless(long long *v, long long a, long long u) { /* * Note: argument order is switched here since it is easier @@ -130,7 +130,7 @@ u64 _atomic64_xchg_add_unless(u64 *v, u64 a, u64 u) } EXPORT_SYMBOL(_atomic64_xchg_add_unless); -u64 _atomic64_cmpxchg(u64 *v, u64 o, u64 n) +long long _atomic64_cmpxchg(long long *v, long long o, long long n) { return __atomic64_cmpxchg(v, __atomic_setup(v), o, n); } diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index 4c288f199453..6c0571216a9d 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -149,8 +149,6 @@ static inline int vmalloc_fault(pgd_t *pgd, unsigned long address) pmd_k = vmalloc_sync_one(pgd, address); if (!pmd_k) return -1; - if (pmd_huge(*pmd_k)) - return 0; /* support TILE huge_vmap() API */ pte_k = pte_offset_kernel(pmd_k, address); if (!pte_present(*pte_k)) return -1; diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index 4e316deb92fd..0fa1acfac79a 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -828,10 +828,6 @@ void __init mem_init(void) printk(KERN_DEBUG " PKMAP %#lx - %#lx\n", PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP) - 1); #endif -#ifdef CONFIG_HUGEVMAP - printk(KERN_DEBUG " HUGEMAP %#lx - %#lx\n", - HUGE_VMAP_BASE, HUGE_VMAP_END - 1); -#endif printk(KERN_DEBUG " VMALLOC %#lx - %#lx\n", _VMALLOC_START, _VMALLOC_END - 1); #ifdef __tilegx__ diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index 2deaddf3e01f..4fd9ec0b58ed 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c @@ -127,8 +127,7 @@ void shatter_huge_page(unsigned long addr) } /* Shatter the huge page into the preallocated L2 page table. */ - pmd_populate_kernel(&init_mm, pmd, - get_prealloc_pte(pte_pfn(*(pte_t *)pmd))); + pmd_populate_kernel(&init_mm, pmd, get_prealloc_pte(pmd_pfn(*pmd))); #ifdef __PAGETABLE_PMD_FOLDED /* Walk every pgd on the system and update the pmd there. */ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e241a1930c98..f67e839f06c8 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -481,11 +481,12 @@ config X86_INTEL_LPSS bool "Intel Low Power Subsystem Support" depends on ACPI select COMMON_CLK + select PINCTRL ---help--- Select to build support for Intel Low Power Subsystem such as found on Intel Lynxpoint PCH. Selecting this option enables - things like clock tree (common clock framework) which are needed - by the LPSS peripheral drivers. + things like clock tree (common clock framework) and pincontrol + which are needed by the LPSS peripheral drivers. config X86_RDC321X bool "RDC R-321x SoC" @@ -859,7 +860,7 @@ source "kernel/Kconfig.preempt" config X86_UP_APIC bool "Local APIC support on uniprocessors" - depends on X86_32 && !SMP && !X86_32_NON_STANDARD + depends on X86_32 && !SMP && !X86_32_NON_STANDARD && !PCI_MSI ---help--- A local APIC (Advanced Programmable Interrupt Controller) is an integrated interrupt controller in the CPU. If you have a single-CPU @@ -884,11 +885,11 @@ config X86_UP_IOAPIC config X86_LOCAL_APIC def_bool y - depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC + depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI config X86_IO_APIC def_bool y - depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC + depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC || PCI_MSI config X86_VISWS_APIC def_bool y @@ -1032,6 +1033,7 @@ config X86_REBOOTFIXUPS config MICROCODE tristate "CPU microcode loading support" + depends on CPU_SUP_AMD || CPU_SUP_INTEL select FW_LOADER ---help--- diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index d3f5c63078d8..89270b4318db 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -374,7 +374,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) * Catch too early usage of this before alternatives * have run. */ - asm goto("1: jmp %l[t_warn]\n" + asm_volatile_goto("1: jmp %l[t_warn]\n" "2:\n" ".section .altinstructions,\"a\"\n" " .long 1b - .\n" @@ -388,7 +388,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) #endif - asm goto("1: jmp %l[t_no]\n" + asm_volatile_goto("1: jmp %l[t_no]\n" "2:\n" ".section .altinstructions,\"a\"\n" " .long 1b - .\n" @@ -453,7 +453,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit) * have. Thus, we force the jump to the widest, 4-byte, signed relative * offset even though the last would often fit in less bytes. */ - asm goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n" + asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n" "2:\n" ".section .altinstructions,\"a\"\n" " .long 1b - .\n" /* src offset */ diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 64507f35800c..6a2cefb4395a 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -18,7 +18,7 @@ static __always_inline bool arch_static_branch(struct static_key *key) { - asm goto("1:" + asm_volatile_goto("1:" ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t" ".pushsection __jump_table, \"aw\" \n\t" _ASM_ALIGN "\n\t" diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h index e7e6751648ed..07537a44216e 100644 --- a/arch/x86/include/asm/mutex_64.h +++ b/arch/x86/include/asm/mutex_64.h @@ -20,7 +20,7 @@ static inline void __mutex_fastpath_lock(atomic_t *v, void (*fail_fn)(atomic_t *)) { - asm volatile goto(LOCK_PREFIX " decl %0\n" + asm_volatile_goto(LOCK_PREFIX " decl %0\n" " jns %l[exit]\n" : : "m" (v->counter) : "memory", "cc" @@ -75,7 +75,7 @@ static inline int __mutex_fastpath_lock_retval(atomic_t *count) static inline void __mutex_fastpath_unlock(atomic_t *v, void (*fail_fn)(atomic_t *)) { - asm volatile goto(LOCK_PREFIX " incl %0\n" + asm_volatile_goto(LOCK_PREFIX " incl %0\n" " jg %l[exit]\n" : : "m" (v->counter) : "memory", "cc" diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 6aef9fbc09b7..b913915e8e63 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -79,30 +79,38 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn) return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY; } -static inline unsigned long mfn_to_pfn(unsigned long mfn) +static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn) { unsigned long pfn; - int ret = 0; + int ret; if (xen_feature(XENFEAT_auto_translated_physmap)) return mfn; - if (unlikely(mfn >= machine_to_phys_nr)) { - pfn = ~0; - goto try_override; - } - pfn = 0; + if (unlikely(mfn >= machine_to_phys_nr)) + return ~0; + /* * The array access can fail (e.g., device space beyond end of RAM). * In such cases it doesn't matter what we return (we return garbage), * but we must handle the fault without crashing! */ ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); -try_override: - /* ret might be < 0 if there are no entries in the m2p for mfn */ if (ret < 0) - pfn = ~0; - else if (get_phys_to_machine(pfn) != mfn) + return ~0; + + return pfn; +} + +static inline unsigned long mfn_to_pfn(unsigned long mfn) +{ + unsigned long pfn; + + if (xen_feature(XENFEAT_auto_translated_physmap)) + return mfn; + + pfn = mfn_to_pfn_no_overrides(mfn); + if (get_phys_to_machine(pfn) != mfn) { /* * If this appears to be a foreign mfn (because the pfn * doesn't map back to the mfn), then check the local override @@ -111,6 +119,7 @@ try_override: * m2p_find_override_pfn returns ~0 if it doesn't find anything. */ pfn = m2p_find_override_pfn(mfn, ~0); + } /* * pfn is ~0 if there are no entries in the m2p for mfn or if the diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 1191ac1c9d25..a419814cea57 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -113,7 +113,7 @@ static int __init early_get_pnodeid(void) break; case UV3_HUB_PART_NUMBER: case UV3_HUB_PART_NUMBER_X: - uv_min_hub_revision_id += UV3_HUB_REVISION_BASE - 1; + uv_min_hub_revision_id += UV3_HUB_REVISION_BASE; break; } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 8355c84b9729..9d8449158cf9 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1506,7 +1506,7 @@ static int __init init_hw_perf_events(void) err = amd_pmu_init(); break; default: - return 0; + err = -ENOTSUPP; } if (err != 0) { pr_cont("no PMU driver, software events only.\n"); @@ -1883,26 +1883,21 @@ static struct pmu pmu = { void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) { - userpg->cap_usr_time = 0; - userpg->cap_usr_time_zero = 0; - userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc; + userpg->cap_user_time = 0; + userpg->cap_user_time_zero = 0; + userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc; userpg->pmc_width = x86_pmu.cntval_bits; - if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) - return; - - if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) + if (!sched_clock_stable) return; - userpg->cap_usr_time = 1; + userpg->cap_user_time = 1; userpg->time_mult = this_cpu_read(cyc2ns); userpg->time_shift = CYC2NS_SCALE_FACTOR; userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; - if (sched_clock_stable && !check_tsc_disabled()) { - userpg->cap_usr_time_zero = 1; - userpg->time_zero = this_cpu_read(cyc2ns_offset); - } + userpg->cap_user_time_zero = 1; + userpg->time_zero = this_cpu_read(cyc2ns_offset); } /* diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index c62d88396ad5..f31a1655d1ff 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -899,8 +899,8 @@ static __initconst const u64 atom_hw_cache_event_ids static struct extra_reg intel_slm_extra_regs[] __read_mostly = { /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ - INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffff, RSP_0), - INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x768005ffff, RSP_1), + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x768005ffffull, RSP_1), EVENT_EXTRA_END }; @@ -2325,6 +2325,7 @@ __init int intel_pmu_init(void) break; case 55: /* Atom 22nm "Silvermont" */ + case 77: /* Avoton "Silvermont" */ memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 63438aad177f..ab3ba1c1b7dd 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -584,6 +584,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = { INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ EVENT_CONSTRAINT_END }; diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 8ed44589b0e4..4118f9f68315 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -2706,14 +2706,14 @@ static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) box->hrtimer.function = uncore_pmu_hrtimer; } -struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cpu) +static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node) { struct intel_uncore_box *box; int i, size; size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg); - box = kzalloc_node(size, GFP_KERNEL, cpu_to_node(cpu)); + box = kzalloc_node(size, GFP_KERNEL, node); if (!box) return NULL; @@ -3031,7 +3031,7 @@ static int uncore_validate_group(struct intel_uncore_pmu *pmu, struct intel_uncore_box *fake_box; int ret = -EINVAL, n; - fake_box = uncore_alloc_box(pmu->type, smp_processor_id()); + fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE); if (!fake_box) return -ENOMEM; @@ -3294,7 +3294,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id } type = pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; - box = uncore_alloc_box(type, 0); + box = uncore_alloc_box(type, NUMA_NO_NODE); if (!box) return -ENOMEM; @@ -3499,7 +3499,7 @@ static int uncore_cpu_prepare(int cpu, int phys_id) if (pmu->func_id < 0) pmu->func_id = j; - box = uncore_alloc_box(type, cpu); + box = uncore_alloc_box(type, cpu_to_node(cpu)); if (!box) return -ENOMEM; diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 1b69951a81e2..b077f4cc225a 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -487,21 +487,6 @@ ENDPROC(native_usergs_sysret64) TRACE_IRQS_OFF .endm -ENTRY(save_rest) - PARTIAL_FRAME 1 (REST_SKIP+8) - movq 5*8+16(%rsp), %r11 /* save return address */ - movq_cfi rbx, RBX+16 - movq_cfi rbp, RBP+16 - movq_cfi r12, R12+16 - movq_cfi r13, R13+16 - movq_cfi r14, R14+16 - movq_cfi r15, R15+16 - movq %r11, 8(%rsp) /* return address */ - FIXUP_TOP_OF_STACK %r11, 16 - ret - CFI_ENDPROC -END(save_rest) - /* save complete stack frame */ .pushsection .kprobes.text, "ax" ENTRY(save_paranoid) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 697b93af02dd..a0e2a8a80c94 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -775,11 +775,22 @@ void __init kvm_spinlock_init(void) if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) return; - printk(KERN_INFO "KVM setup paravirtual spinlock\n"); + pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning); + pv_lock_ops.unlock_kick = kvm_unlock_kick; +} + +static __init int kvm_spinlock_init_jump(void) +{ + if (!kvm_para_available()) + return 0; + if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) + return 0; static_key_slow_inc(¶virt_ticketlocks_enabled); + printk(KERN_INFO "KVM setup paravirtual spinlock\n"); - pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning); - pv_lock_ops.unlock_kick = kvm_unlock_kick; + return 0; } +early_initcall(kvm_spinlock_init_jump); + #endif /* CONFIG_PARAVIRT_SPINLOCKS */ diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 7123b5df479d..af99f71aeb7f 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -216,6 +216,7 @@ int apply_microcode_amd(int cpu) /* need to apply patch? */ if (rev >= mc_amd->hdr.patch_id) { c->microcode = rev; + uci->cpu_sig.rev = rev; return 0; } diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 563ed91e6faa..7e920bff99a3 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -326,6 +326,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6320"), }, }, + { /* Handle problems with rebooting on the Latitude E5410. */ + .callback = set_pci_reboot, + .ident = "Dell Latitude E5410", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E5410"), + }, + }, { /* Handle problems with rebooting on the Latitude E5420. */ .callback = set_pci_reboot, .ident = "Dell Latitude E5420", @@ -352,12 +360,28 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { }, { /* Handle problems with rebooting on the Precision M6600. */ .callback = set_pci_reboot, - .ident = "Dell OptiPlex 990", + .ident = "Dell Precision M6600", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"), }, }, + { /* Handle problems with rebooting on the Dell PowerEdge C6100. */ + .callback = set_pci_reboot, + .ident = "Dell PowerEdge C6100", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "C6100"), + }, + }, + { /* Some C6100 machines were shipped with vendor being 'Dell'. */ + .callback = set_pci_reboot, + .ident = "Dell PowerEdge C6100", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell"), + DMI_MATCH(DMI_PRODUCT_NAME, "C6100"), + }, + }, { } }; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index aecc98a93d1b..6cacab671f9b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -653,6 +653,7 @@ static void announce_cpu(int cpu, int apicid) { static int current_node = -1; int node = early_cpu_to_node(cpu); + int max_cpu_present = find_last_bit(cpumask_bits(cpu_present_mask), NR_CPUS); if (system_state == SYSTEM_BOOTING) { if (node != current_node) { @@ -661,7 +662,7 @@ static void announce_cpu(int cpu, int apicid) current_node = node; pr_info("Booting Node %3d, Processors ", node); } - pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " OK\n" : ""); + pr_cont(" #%4d%s", cpu, cpu == max_cpu_present ? " OK\n" : ""); return; } else pr_info("Booting Node %d Processor %d APIC 0x%x\n", diff --git a/arch/x86/kernel/sysfb_simplefb.c b/arch/x86/kernel/sysfb_simplefb.c index 22513e96b012..86179d409893 100644 --- a/arch/x86/kernel/sysfb_simplefb.c +++ b/arch/x86/kernel/sysfb_simplefb.c @@ -72,14 +72,14 @@ __init int create_simplefb(const struct screen_info *si, * the part that is occupied by the framebuffer */ len = mode->height * mode->stride; len = PAGE_ALIGN(len); - if (len > si->lfb_size << 16) { + if (len > (u64)si->lfb_size << 16) { printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n"); return -EINVAL; } /* setup IORESOURCE_MEM as framebuffer memory */ memset(&res, 0, sizeof(res)); - res.flags = IORESOURCE_MEM; + res.flags = IORESOURCE_MEM | IORESOURCE_BUSY; res.name = simplefb_resname; res.start = si->lfb_base; res.end = si->lfb_base + len - 1; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 2bc1e81045b0..ddc3f3d2afdb 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2025,6 +2025,17 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) return rc; } +static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt) +{ + int rc; + + rc = em_ret_far(ctxt); + if (rc != X86EMUL_CONTINUE) + return rc; + rsp_increment(ctxt, ctxt->src.val); + return X86EMUL_CONTINUE; +} + static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) { /* Save real source value, then compare EAX against destination. */ @@ -3763,7 +3774,8 @@ static const struct opcode opcode_table[256] = { G(ByteOp, group11), G(0, group11), /* 0xC8 - 0xCF */ I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave), - N, I(ImplicitOps | Stack, em_ret_far), + I(ImplicitOps | Stack | SrcImmU16, em_ret_far_imm), + I(ImplicitOps | Stack, em_ret_far), D(ImplicitOps), DI(SrcImmByte, intn), D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret), /* 0xD0 - 0xD7 */ diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 043330159179..ad75d77999d0 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -99,6 +99,7 @@ struct guest_walker { pt_element_t prefetch_ptes[PTE_PREFETCH_NUM]; gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS]; + bool pte_writable[PT_MAX_FULL_LEVELS]; unsigned pt_access; unsigned pte_access; gfn_t gfn; @@ -235,6 +236,22 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, if (pte == orig_pte) continue; + /* + * If the slot is read-only, simply do not process the accessed + * and dirty bits. This is the correct thing to do if the slot + * is ROM, and page tables in read-as-ROM/write-as-MMIO slots + * are only supported if the accessed and dirty bits are already + * set in the ROM (so that MMIO writes are never needed). + * + * Note that NPT does not allow this at all and faults, since + * it always wants nested page table entries for the guest + * page tables to be writable. And EPT works but will simply + * overwrite the read-only memory to set the accessed and dirty + * bits. + */ + if (unlikely(!walker->pte_writable[level - 1])) + continue; + ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte); if (ret) return ret; @@ -309,7 +326,8 @@ retry_walk: goto error; real_gfn = gpa_to_gfn(real_gfn); - host_addr = gfn_to_hva(vcpu->kvm, real_gfn); + host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn, + &walker->pte_writable[walker->level - 1]); if (unlikely(kvm_is_error_hva(host_addr))) goto error; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1f1da43ff2a2..2b2fce1b2009 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3255,25 +3255,29 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) static void ept_load_pdptrs(struct kvm_vcpu *vcpu) { + struct kvm_mmu *mmu = vcpu->arch.walk_mmu; + if (!test_bit(VCPU_EXREG_PDPTR, (unsigned long *)&vcpu->arch.regs_dirty)) return; if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { - vmcs_write64(GUEST_PDPTR0, vcpu->arch.mmu.pdptrs[0]); - vmcs_write64(GUEST_PDPTR1, vcpu->arch.mmu.pdptrs[1]); - vmcs_write64(GUEST_PDPTR2, vcpu->arch.mmu.pdptrs[2]); - vmcs_write64(GUEST_PDPTR3, vcpu->arch.mmu.pdptrs[3]); + vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]); + vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]); + vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]); + vmcs_write64(GUEST_PDPTR3, mmu->pdptrs[3]); } } static void ept_save_pdptrs(struct kvm_vcpu *vcpu) { + struct kvm_mmu *mmu = vcpu->arch.walk_mmu; + if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { - vcpu->arch.mmu.pdptrs[0] = vmcs_read64(GUEST_PDPTR0); - vcpu->arch.mmu.pdptrs[1] = vmcs_read64(GUEST_PDPTR1); - vcpu->arch.mmu.pdptrs[2] = vmcs_read64(GUEST_PDPTR2); - vcpu->arch.mmu.pdptrs[3] = vmcs_read64(GUEST_PDPTR3); + mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0); + mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1); + mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2); + mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3); } __set_bit(VCPU_EXREG_PDPTR, @@ -5339,6 +5343,17 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) return 0; } + /* + * EPT violation happened while executing iret from NMI, + * "blocked by NMI" bit has to be set before next VM entry. + * There are errata that may cause this bit to not be set: + * AAK134, BY25. + */ + if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && + cpu_has_virtual_nmis() && + (exit_qualification & INTR_INFO_UNBLOCK_NMI)) + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); + gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); trace_kvm_page_fault(gpa, exit_qualification); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 79c216aa0e2b..516593e1ce33 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -772,13 +772,21 @@ out: return; } +static void bpf_jit_free_deferred(struct work_struct *work) +{ + struct sk_filter *fp = container_of(work, struct sk_filter, work); + unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; + struct bpf_binary_header *header = (void *)addr; + + set_memory_rw(addr, header->pages); + module_free(NULL, header); + kfree(fp); +} + void bpf_jit_free(struct sk_filter *fp) { if (fp->bpf_func != sk_run_filter) { - unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; - struct bpf_binary_header *header = (void *)addr; - - set_memory_rw(addr, header->pages); - module_free(NULL, header); + INIT_WORK(&fp->work, bpf_jit_free_deferred); + schedule_work(&fp->work); } } diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 5596c7bdd327..082e88129712 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -700,7 +700,7 @@ int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end, if (!(pci_probe & PCI_PROBE_MMCONF) || pci_mmcfg_arch_init_failed) return -ENODEV; - if (start > end || !addr) + if (start > end) return -EINVAL; mutex_lock(&pci_mmcfg_lock); @@ -716,6 +716,11 @@ int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end, return -EEXIST; } + if (!addr) { + mutex_unlock(&pci_mmcfg_lock); + return -EINVAL; + } + rc = -EBUSY; cfg = pci_mmconfig_alloc(seg, start, end, addr); if (cfg == NULL) { diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 90f6ed127096..c7e22ab29a5a 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -912,10 +912,13 @@ void __init efi_enter_virtual_mode(void) for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { md = p; - if (!(md->attribute & EFI_MEMORY_RUNTIME) && - md->type != EFI_BOOT_SERVICES_CODE && - md->type != EFI_BOOT_SERVICES_DATA) - continue; + if (!(md->attribute & EFI_MEMORY_RUNTIME)) { +#ifdef CONFIG_X86_64 + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) +#endif + continue; + } size = md->num_pages << EFI_PAGE_SHIFT; end = md->phys_addr + size; diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 8b901e8d782d..a61c7d5811be 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -879,7 +879,6 @@ int m2p_add_override(unsigned long mfn, struct page *page, unsigned long uninitialized_var(address); unsigned level; pte_t *ptep = NULL; - int ret = 0; pfn = page_to_pfn(page); if (!PageHighMem(page)) { @@ -926,8 +925,8 @@ int m2p_add_override(unsigned long mfn, struct page *page, * frontend pages while they are being shared with the backend, * because mfn_to_pfn (that ends up being called by GUPF) will * return the backend pfn rather than the frontend pfn. */ - ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); - if (ret == 0 && get_phys_to_machine(pfn) == mfn) + pfn = mfn_to_pfn_no_overrides(mfn); + if (get_phys_to_machine(pfn) == mfn) set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)); return 0; @@ -942,7 +941,6 @@ int m2p_remove_override(struct page *page, unsigned long uninitialized_var(address); unsigned level; pte_t *ptep = NULL; - int ret = 0; pfn = page_to_pfn(page); mfn = get_phys_to_machine(pfn); @@ -1029,8 +1027,8 @@ int m2p_remove_override(struct page *page, * the original pfn causes mfn_to_pfn(mfn) to return the frontend * pfn again. */ mfn &= ~FOREIGN_FRAME_BIT; - ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); - if (ret == 0 && get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) && + pfn = mfn_to_pfn_no_overrides(mfn); + if (get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) && m2p_find_override(mfn) == NULL) set_phys_to_machine(pfn, mfn); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index d1e4777b4e75..31d04758b76f 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -278,6 +278,15 @@ static void __init xen_smp_prepare_boot_cpu(void) old memory can be recycled */ make_lowmem_page_readwrite(xen_initial_gdt); +#ifdef CONFIG_X86_32 + /* + * Xen starts us with XEN_FLAT_RING1_DS, but linux code + * expects __USER_DS + */ + loadsegment(ds, __USER_DS); + loadsegment(es, __USER_DS); +#endif + xen_filter_cpu_maps(); xen_setup_vcpu_info_placement(); } diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 253f63fceea1..be6b86078957 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -259,6 +259,14 @@ void xen_uninit_lock_cpu(int cpu) } +/* + * Our init of PV spinlocks is split in two init functions due to us + * using paravirt patching and jump labels patching and having to do + * all of this before SMP code is invoked. + * + * The paravirt patching needs to be done _before_ the alternative asm code + * is started, otherwise we would not patch the core kernel code. + */ void __init xen_init_spinlocks(void) { @@ -267,12 +275,26 @@ void __init xen_init_spinlocks(void) return; } - static_key_slow_inc(¶virt_ticketlocks_enabled); - pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning); pv_lock_ops.unlock_kick = xen_unlock_kick; } +/* + * While the jump_label init code needs to happend _after_ the jump labels are + * enabled and before SMP is started. Hence we use pre-SMP initcall level + * init. We cannot do it in xen_init_spinlocks as that is done before + * jump labels are activated. + */ +static __init int xen_init_spinlocks_jump(void) +{ + if (!xen_pvspin) + return 0; + + static_key_slow_inc(¶virt_ticketlocks_enabled); + return 0; +} +early_initcall(xen_init_spinlocks_jump); + static __init int xen_parse_nopvspin(char *arg) { xen_pvspin = false; |