diff options
35 files changed, 765 insertions, 170 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 3dcd7ec69bca..b37ac9b2a36b 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -3,6 +3,7 @@ config ARM64 select ACPI_CCA_REQUIRED if ACPI select ACPI_GENERIC_GSI if ACPI select ACPI_GTDT if ACPI + select ACPI_IORT if ACPI select ACPI_REDUCED_HARDWARE_ONLY if ACPI select ACPI_MCFG if ACPI select ACPI_SPCR_TABLE if ACPI @@ -20,6 +21,7 @@ config ARM64 select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_SUPPORTS_MEMORY_FAILURE select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_WANT_COMPAT_IPC_PARSE_VERSION @@ -244,6 +246,9 @@ config PGTABLE_LEVELS config ARCH_SUPPORTS_UPROBES def_bool y +config ARCH_PROC_KCORE_TEXT + def_bool y + source "init/Kconfig" source "kernel/Kconfig.freezer" @@ -982,7 +987,7 @@ config RANDOMIZE_BASE config RANDOMIZE_MODULE_REGION_FULL bool "Randomize the module region independently from the core kernel" - depends on RANDOMIZE_BASE && !DYNAMIC_FTRACE + depends on RANDOMIZE_BASE default y help Randomizes the location of the module region without considering the diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index f839ecd919f9..9b41f1e3b1a0 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -52,17 +52,19 @@ KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) KBUILD_CPPFLAGS += -mbig-endian +CHECKFLAGS += -D__AARCH64EB__ AS += -EB LD += -EB UTS_MACHINE := aarch64_be else KBUILD_CPPFLAGS += -mlittle-endian +CHECKFLAGS += -D__AARCH64EL__ AS += -EL LD += -EL UTS_MACHINE := aarch64 endif -CHECKFLAGS += -D__aarch64__ +CHECKFLAGS += -D__aarch64__ -m64 ifeq ($(CONFIG_ARM64_MODULE_CMODEL_LARGE), y) KBUILD_CFLAGS_MODULE += -mcmodel=large @@ -70,6 +72,9 @@ endif ifeq ($(CONFIG_ARM64_MODULE_PLTS),y) KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds +ifeq ($(CONFIG_DYNAMIC_FTRACE),y) +KBUILD_LDFLAGS_MODULE += $(objtree)/arch/arm64/kernel/ftrace-mod.o +endif endif # Default value diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index 5392dbeffa45..f72779aad276 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -48,8 +48,6 @@ void arch_teardown_dma_ops(struct device *dev); /* do not use this function in a driver */ static inline bool is_device_dma_coherent(struct device *dev) { - if (!dev) - return false; return dev->archdata.dma_coherent; } diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index 5d1700425efe..ac3fb7441510 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -142,6 +142,7 @@ typedef struct user_fpsimd_state elf_fpregset_t; ({ \ clear_bit(TIF_32BIT, ¤t->mm->context.flags); \ clear_thread_flag(TIF_32BIT); \ + current->personality &= ~READ_IMPLIES_EXEC; \ }) /* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ @@ -187,6 +188,11 @@ typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG]; ((x)->e_flags & EF_ARM_EABI_MASK)) #define compat_start_thread compat_start_thread +/* + * Unlike the native SET_PERSONALITY macro, the compat version inherits + * READ_IMPLIES_EXEC across a fork() since this is the behaviour on + * arch/arm/. + */ #define COMPAT_SET_PERSONALITY(ex) \ ({ \ set_bit(TIF_32BIT, ¤t->mm->context.flags); \ diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 85c4a8981d47..f32b42e8725d 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -48,16 +48,16 @@ do { \ } while (0) static inline int -futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) +futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr) { int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; + int oparg = (int)(encoded_op << 8) >> 20; + int cmparg = (int)(encoded_op << 20) >> 20; int oldval = 0, ret, tmp; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; + oparg = 1U << (oparg & 0x1f); if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index d57693f5d4ec..19bd97671bb8 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -30,6 +30,9 @@ struct mod_plt_sec { struct mod_arch_specific { struct mod_plt_sec core; struct mod_plt_sec init; + + /* for CONFIG_DYNAMIC_FTRACE */ + void *ftrace_trampoline; }; #endif diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index c213fdbd056c..6eae342ced6b 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -441,7 +441,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT)) -#define pud_present(pud) (pud_val(pud)) +#define pud_present(pud) pte_present(pud_pte(pud)) static inline void set_pud(pud_t *pudp, pud_t pud) { diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 9428b93fefb2..64c9e78f9882 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -104,6 +104,9 @@ struct thread_struct { #define task_user_tls(t) (&(t)->thread.tp_value) #endif +/* Sync TPIDR_EL0 back to thread_struct for current */ +void tls_preserve_current_state(void); + #define INIT_THREAD { } static inline void start_thread_common(struct pt_regs *regs, unsigned long pc) diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 801a16dbbdf6..5b6eafccc5d8 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -30,5 +30,6 @@ struct stackframe { extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame); extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame, int (*fn)(struct stackframe *, void *), void *data); +extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk); #endif /* __ASM_STACKTRACE_H */ diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h index bc812435bc76..d0beefeb6d25 100644 --- a/arch/arm64/include/asm/system_misc.h +++ b/arch/arm64/include/asm/system_misc.h @@ -40,7 +40,7 @@ void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned int, int sig, int code, const char *name); struct mm_struct; -extern void show_pte(struct mm_struct *mm, unsigned long addr); +extern void show_pte(unsigned long addr); extern void __show_regs(struct pt_regs *); extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h index ee469be1ae1d..f0a76b9fcd6e 100644 --- a/arch/arm64/include/uapi/asm/sigcontext.h +++ b/arch/arm64/include/uapi/asm/sigcontext.h @@ -34,6 +34,26 @@ struct sigcontext { }; /* + * Allocation of __reserved[]: + * (Note: records do not necessarily occur in the order shown here.) + * + * size description + * + * 0x210 fpsimd_context + * 0x10 esr_context + * 0x20 extra_context (optional) + * 0x10 terminator (null _aarch64_ctx) + * + * 0xdb0 (reserved for future allocation) + * + * New records that can exceed this space need to be opt-in for userspace, so + * that an expanded signal frame is not generated unexpectedly. The mechanism + * for opting in will depend on the extension that generates each new record. + * The above table documents the maximum set and sizes of records than can be + * generated when userspace does not opt in for any such extension. + */ + +/* * Header to be used at the beginning of structures extending the user * context. Such structures must be placed after the rt_sigframe on the stack * and be 16-byte aligned. The last structure must be a dummy one with the @@ -61,4 +81,39 @@ struct esr_context { __u64 esr; }; +/* + * extra_context: describes extra space in the signal frame for + * additional structures that don't fit in sigcontext.__reserved[]. + * + * Note: + * + * 1) fpsimd_context, esr_context and extra_context must be placed in + * sigcontext.__reserved[] if present. They cannot be placed in the + * extra space. Any other record can be placed either in the extra + * space or in sigcontext.__reserved[], unless otherwise specified in + * this file. + * + * 2) There must not be more than one extra_context. + * + * 3) If extra_context is present, it must be followed immediately in + * sigcontext.__reserved[] by the terminating null _aarch64_ctx. + * + * 4) The extra space to which datap points must start at the first + * 16-byte aligned address immediately after the terminating null + * _aarch64_ctx that follows the extra_context structure in + * __reserved[]. The extra space may overrun the end of __reserved[], + * as indicated by a sufficiently large value for the size field. + * + * 5) The extra space must itself be terminated with a null + * _aarch64_ctx. + */ +#define EXTRA_MAGIC 0x45585401 + +struct extra_context { + struct _aarch64_ctx head; + __u64 datap; /* 16-byte aligned pointer to extra space cast to __u64 */ + __u32 size; /* size in bytes of the extra space */ + __u32 __reserved[3]; +}; + #endif /* _UAPI__ASM_SIGCONTEXT_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 1dcb69d3d0e5..f2b4e816b6de 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -62,3 +62,6 @@ extra-y += $(head-y) vmlinux.lds ifeq ($(CONFIG_DEBUG_EFI),y) AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\"" endif + +# will be included by each individual module but not by the core kernel itself +extra-$(CONFIG_DYNAMIC_FTRACE) += ftrace-mod.o diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 817ce3365e20..9f9e0064c8c1 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -51,6 +51,25 @@ unsigned int compat_elf_hwcap2 __read_mostly; DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); EXPORT_SYMBOL(cpu_hwcaps); +static int dump_cpu_hwcaps(struct notifier_block *self, unsigned long v, void *p) +{ + /* file-wide pr_fmt adds "CPU features: " prefix */ + pr_emerg("0x%*pb\n", ARM64_NCAPS, &cpu_hwcaps); + return 0; +} + +static struct notifier_block cpu_hwcaps_notifier = { + .notifier_call = dump_cpu_hwcaps +}; + +static int __init register_cpu_hwcaps_dumper(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, + &cpu_hwcaps_notifier); + return 0; +} +__initcall(register_cpu_hwcaps_dumper); + DEFINE_STATIC_KEY_ARRAY_FALSE(cpu_hwcap_keys, ARM64_NCAPS); EXPORT_SYMBOL(cpu_hwcap_keys); @@ -639,8 +658,10 @@ void update_cpu_features(int cpu, * Mismatched CPU features are a recipe for disaster. Don't even * pretend to support them. */ - WARN_TAINT_ONCE(taint, TAINT_CPU_OUT_OF_SPEC, - "Unsupported CPU feature variation.\n"); + if (taint) { + pr_warn_once("Unsupported CPU feature variation detected.\n"); + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); + } } u64 read_sanitised_ftr_reg(u32 id) diff --git a/arch/arm64/kernel/ftrace-mod.S b/arch/arm64/kernel/ftrace-mod.S new file mode 100644 index 000000000000..00c4025be4ff --- /dev/null +++ b/arch/arm64/kernel/ftrace-mod.S @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + + .section ".text.ftrace_trampoline", "ax" + .align 3 +0: .quad 0 +__ftrace_trampoline: + ldr x16, 0b + br x16 +ENDPROC(__ftrace_trampoline) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 40ad08ac569a..c13b1fca0e5b 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -10,10 +10,12 @@ */ #include <linux/ftrace.h> +#include <linux/module.h> #include <linux/swab.h> #include <linux/uaccess.h> #include <asm/cacheflush.h> +#include <asm/debug-monitors.h> #include <asm/ftrace.h> #include <asm/insn.h> @@ -70,6 +72,58 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned long pc = rec->ip; u32 old, new; + long offset = (long)pc - (long)addr; + + if (offset < -SZ_128M || offset >= SZ_128M) { +#ifdef CONFIG_ARM64_MODULE_PLTS + unsigned long *trampoline; + struct module *mod; + + /* + * On kernels that support module PLTs, the offset between the + * branch instruction and its target may legally exceed the + * range of an ordinary relative 'bl' opcode. In this case, we + * need to branch via a trampoline in the module. + * + * NOTE: __module_text_address() must be called with preemption + * disabled, but we can rely on ftrace_lock to ensure that 'mod' + * retains its validity throughout the remainder of this code. + */ + preempt_disable(); + mod = __module_text_address(pc); + preempt_enable(); + + if (WARN_ON(!mod)) + return -EINVAL; + + /* + * There is only one ftrace trampoline per module. For now, + * this is not a problem since on arm64, all dynamic ftrace + * invocations are routed via ftrace_caller(). This will need + * to be revisited if support for multiple ftrace entry points + * is added in the future, but for now, the pr_err() below + * deals with a theoretical issue only. + */ + trampoline = (unsigned long *)mod->arch.ftrace_trampoline; + if (trampoline[0] != addr) { + if (trampoline[0] != 0) { + pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n"); + return -EINVAL; + } + + /* point the trampoline to our ftrace entry point */ + module_disable_ro(mod); + trampoline[0] = addr; + module_enable_ro(mod, true); + + /* update trampoline before patching in the branch */ + smp_wmb(); + } + addr = (unsigned long)&trampoline[1]; +#else /* CONFIG_ARM64_MODULE_PLTS */ + return -EINVAL; +#endif /* CONFIG_ARM64_MODULE_PLTS */ + } old = aarch64_insn_gen_nop(); new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); @@ -84,12 +138,55 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { unsigned long pc = rec->ip; - u32 old, new; + bool validate = true; + u32 old = 0, new; + long offset = (long)pc - (long)addr; + + if (offset < -SZ_128M || offset >= SZ_128M) { +#ifdef CONFIG_ARM64_MODULE_PLTS + u32 replaced; + + /* + * 'mod' is only set at module load time, but if we end up + * dealing with an out-of-range condition, we can assume it + * is due to a module being loaded far away from the kernel. + */ + if (!mod) { + preempt_disable(); + mod = __module_text_address(pc); + preempt_enable(); + + if (WARN_ON(!mod)) + return -EINVAL; + } + + /* + * The instruction we are about to patch may be a branch and + * link instruction that was redirected via a PLT entry. In + * this case, the normal validation will fail, but we can at + * least check that we are dealing with a branch and link + * instruction that points into the right module. + */ + if (aarch64_insn_read((void *)pc, &replaced)) + return -EFAULT; + + if (!aarch64_insn_is_bl(replaced) || + !within_module(pc + aarch64_get_branch_offset(replaced), + mod)) + return -EINVAL; + + validate = false; +#else /* CONFIG_ARM64_MODULE_PLTS */ + return -EINVAL; +#endif /* CONFIG_ARM64_MODULE_PLTS */ + } else { + old = aarch64_insn_gen_branch_imm(pc, addr, + AARCH64_INSN_BRANCH_LINK); + } - old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); new = aarch64_insn_gen_nop(); - return ftrace_modify_code(pc, old, new, true); + return ftrace_modify_code(pc, old, new, validate); } void arch_ftrace_update_code(int command) diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index f035ff6fb223..8c3a7264fb0f 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -420,8 +420,12 @@ int module_finalize(const Elf_Ehdr *hdr, for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { if (strcmp(".altinstructions", secstrs + s->sh_name) == 0) { apply_alternatives((void *)s->sh_addr, s->sh_size); - return 0; } +#ifdef CONFIG_ARM64_MODULE_PLTS + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) && + !strcmp(".text.ftrace_trampoline", secstrs + s->sh_name)) + me->arch.ftrace_trampoline = (void *)s->sh_addr; +#endif } return 0; diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c index 4f0e3ebfea4b..108283443336 100644 --- a/arch/arm64/kernel/pci.c +++ b/arch/arm64/kernel/pci.c @@ -108,7 +108,10 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) if (!acpi_disabled) { struct pci_config_window *cfg = bridge->bus->sysdata; struct acpi_device *adev = to_acpi_device(cfg->parent); + struct device *bus_dev = &bridge->bus->dev; + ACPI_COMPANION_SET(&bridge->dev, adev); + set_dev_node(bus_dev, acpi_get_node(acpi_device_handle(adev))); } return 0; diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index c5c45942fb6e..d849d9804011 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -522,9 +522,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) pr_err("current sp %lx does not match saved sp %lx\n", orig_sp, stack_addr); pr_err("Saved registers for jprobe %p\n", jp); - show_regs(saved_regs); + __show_regs(saved_regs); pr_err("Current registers\n"); - show_regs(regs); + __show_regs(regs); BUG(); } unpause_graph_tracing(); diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index ae2a835898d7..659ae8094ed5 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -210,6 +210,7 @@ void __show_regs(struct pt_regs *regs) void show_regs(struct pt_regs * regs) { __show_regs(regs); + dump_backtrace(regs, NULL); } static void tls_thread_flush(void) @@ -297,12 +298,16 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, return 0; } +void tls_preserve_current_state(void) +{ + *task_user_tls(current) = read_sysreg(tpidr_el0); +} + static void tls_thread_switch(struct task_struct *next) { unsigned long tpidr, tpidrro; - tpidr = read_sysreg(tpidr_el0); - *task_user_tls(current) = tpidr; + tls_preserve_current_state(); tpidr = *task_user_tls(next); tpidrro = is_compat_thread(task_thread_info(next)) ? diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index c142459a88f3..35846f1550db 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -623,6 +623,10 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset, { struct user_fpsimd_state *uregs; uregs = &target->thread.fpsimd_state.user_fpsimd; + + if (target == current) + fpsimd_preserve_current_state(); + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, -1); } @@ -648,6 +652,10 @@ static int tls_get(struct task_struct *target, const struct user_regset *regset, void *kbuf, void __user *ubuf) { unsigned long *tls = &target->thread.tp_value; + + if (target == current) + tls_preserve_current_state(); + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, tls, 0, -1); } @@ -894,21 +902,27 @@ static int compat_vfp_get(struct task_struct *target, { struct user_fpsimd_state *uregs; compat_ulong_t fpscr; - int ret; + int ret, vregs_end_pos; uregs = &target->thread.fpsimd_state.user_fpsimd; + if (target == current) + fpsimd_preserve_current_state(); + /* * The VFP registers are packed into the fpsimd_state, so they all sit * nicely together for us. We just need to create the fpscr separately. */ - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, - VFP_STATE_SIZE - sizeof(compat_ulong_t)); + vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t); + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, + 0, vregs_end_pos); if (count && !ret) { fpscr = (uregs->fpsr & VFP_FPSCR_STAT_MASK) | (uregs->fpcr & VFP_FPSCR_CTRL_MASK); - ret = put_user(fpscr, (compat_ulong_t *)ubuf); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &fpscr, + vregs_end_pos, VFP_STATE_SIZE); } return ret; diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 2c822ef94f34..d4b740538ad5 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -194,6 +194,9 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys) } name = of_flat_dt_get_machine_name(); + if (!name) + return; + pr_info("Machine model: %s\n", name); dump_stack_set_arch_desc("%s (DT)", name); } diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index c7b6de62f9d3..089c3747995d 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -19,10 +19,14 @@ #include <linux/compat.h> #include <linux/errno.h> +#include <linux/kernel.h> #include <linux/signal.h> #include <linux/personality.h> #include <linux/freezer.h> +#include <linux/stddef.h> #include <linux/uaccess.h> +#include <linux/sizes.h> +#include <linux/string.h> #include <linux/tracehook.h> #include <linux/ratelimit.h> @@ -41,10 +45,133 @@ struct rt_sigframe { struct siginfo info; struct ucontext uc; +}; + +struct frame_record { u64 fp; u64 lr; }; +struct rt_sigframe_user_layout { + struct rt_sigframe __user *sigframe; + struct frame_record __user *next_frame; + + unsigned long size; /* size of allocated sigframe data */ + unsigned long limit; /* largest allowed size */ + + unsigned long fpsimd_offset; + unsigned long esr_offset; + unsigned long extra_offset; + unsigned long end_offset; +}; + +#define BASE_SIGFRAME_SIZE round_up(sizeof(struct rt_sigframe), 16) +#define TERMINATOR_SIZE round_up(sizeof(struct _aarch64_ctx), 16) +#define EXTRA_CONTEXT_SIZE round_up(sizeof(struct extra_context), 16) + +static void init_user_layout(struct rt_sigframe_user_layout *user) +{ + const size_t reserved_size = + sizeof(user->sigframe->uc.uc_mcontext.__reserved); + + memset(user, 0, sizeof(*user)); + user->size = offsetof(struct rt_sigframe, uc.uc_mcontext.__reserved); + + user->limit = user->size + reserved_size; + + user->limit -= TERMINATOR_SIZE; + user->limit -= EXTRA_CONTEXT_SIZE; + /* Reserve space for extension and terminator ^ */ +} + +static size_t sigframe_size(struct rt_sigframe_user_layout const *user) +{ + return round_up(max(user->size, sizeof(struct rt_sigframe)), 16); +} + +/* + * Sanity limit on the approximate maximum size of signal frame we'll + * try to generate. Stack alignment padding and the frame record are + * not taken into account. This limit is not a guarantee and is + * NOT ABI. + */ +#define SIGFRAME_MAXSZ SZ_64K + +static int __sigframe_alloc(struct rt_sigframe_user_layout *user, + unsigned long *offset, size_t size, bool extend) +{ + size_t padded_size = round_up(size, 16); + + if (padded_size > user->limit - user->size && + !user->extra_offset && + extend) { + int ret; + + user->limit += EXTRA_CONTEXT_SIZE; + ret = __sigframe_alloc(user, &user->extra_offset, + sizeof(struct extra_context), false); + if (ret) { + user->limit -= EXTRA_CONTEXT_SIZE; + return ret; + } + + /* Reserve space for the __reserved[] terminator */ + user->size += TERMINATOR_SIZE; + + /* + * Allow expansion up to SIGFRAME_MAXSZ, ensuring space for + * the terminator: + */ + user->limit = SIGFRAME_MAXSZ - TERMINATOR_SIZE; + } + + /* Still not enough space? Bad luck! */ + if (padded_size > user->limit - user->size) + return -ENOMEM; + + *offset = user->size; + user->size += padded_size; + + return 0; +} + +/* + * Allocate space for an optional record of <size> bytes in the user + * signal frame. The offset from the signal frame base address to the + * allocated block is assigned to *offset. + */ +static int sigframe_alloc(struct rt_sigframe_user_layout *user, + unsigned long *offset, size_t size) +{ + return __sigframe_alloc(user, offset, size, true); +} + +/* Allocate the null terminator record and prevent further allocations */ +static int sigframe_alloc_end(struct rt_sigframe_user_layout *user) +{ + int ret; + + /* Un-reserve the space reserved for the terminator: */ + user->limit += TERMINATOR_SIZE; + + ret = sigframe_alloc(user, &user->end_offset, + sizeof(struct _aarch64_ctx)); + if (ret) + return ret; + + /* Prevent further allocation: */ + user->limit = user->size; + return 0; +} + +static void __user *apply_user_offset( + struct rt_sigframe_user_layout const *user, unsigned long offset) +{ + char __user *base = (char __user *)user->sigframe; + + return base + offset; +} + static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) { struct fpsimd_state *fpsimd = ¤t->thread.fpsimd_state; @@ -92,12 +219,159 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx) return err ? -EFAULT : 0; } +struct user_ctxs { + struct fpsimd_context __user *fpsimd; +}; + +static int parse_user_sigframe(struct user_ctxs *user, + struct rt_sigframe __user *sf) +{ + struct sigcontext __user *const sc = &sf->uc.uc_mcontext; + struct _aarch64_ctx __user *head; + char __user *base = (char __user *)&sc->__reserved; + size_t offset = 0; + size_t limit = sizeof(sc->__reserved); + bool have_extra_context = false; + char const __user *const sfp = (char const __user *)sf; + + user->fpsimd = NULL; + + if (!IS_ALIGNED((unsigned long)base, 16)) + goto invalid; + + while (1) { + int err = 0; + u32 magic, size; + char const __user *userp; + struct extra_context const __user *extra; + u64 extra_datap; + u32 extra_size; + struct _aarch64_ctx const __user *end; + u32 end_magic, end_size; + + if (limit - offset < sizeof(*head)) + goto invalid; + + if (!IS_ALIGNED(offset, 16)) + goto invalid; + + head = (struct _aarch64_ctx __user *)(base + offset); + __get_user_error(magic, &head->magic, err); + __get_user_error(size, &head->size, err); + if (err) + return err; + + if (limit - offset < size) + goto invalid; + + switch (magic) { + case 0: + if (size) + goto invalid; + + goto done; + + case FPSIMD_MAGIC: + if (user->fpsimd) + goto invalid; + + if (size < sizeof(*user->fpsimd)) + goto invalid; + + user->fpsimd = (struct fpsimd_context __user *)head; + break; + + case ESR_MAGIC: + /* ignore */ + break; + + case EXTRA_MAGIC: + if (have_extra_context) + goto invalid; + + if (size < sizeof(*extra)) + goto invalid; + + userp = (char const __user *)head; + + extra = (struct extra_context const __user *)userp; + userp += size; + + __get_user_error(extra_datap, &extra->datap, err); + __get_user_error(extra_size, &extra->size, err); + if (err) + return err; + + /* Check for the dummy terminator in __reserved[]: */ + + if (limit - offset - size < TERMINATOR_SIZE) + goto invalid; + + end = (struct _aarch64_ctx const __user *)userp; + userp += TERMINATOR_SIZE; + + __get_user_error(end_magic, &end->magic, err); + __get_user_error(end_size, &end->size, err); + if (err) + return err; + + if (end_magic || end_size) + goto invalid; + + /* Prevent looping/repeated parsing of extra_context */ + have_extra_context = true; + + base = (__force void __user *)extra_datap; + if (!IS_ALIGNED((unsigned long)base, 16)) + goto invalid; + + if (!IS_ALIGNED(extra_size, 16)) + goto invalid; + + if (base != userp) + goto invalid; + + /* Reject "unreasonably large" frames: */ + if (extra_size > sfp + SIGFRAME_MAXSZ - userp) + goto invalid; + + /* + * Ignore trailing terminator in __reserved[] + * and start parsing extra data: + */ + offset = 0; + limit = extra_size; + continue; + + default: + goto invalid; + } + + if (size < sizeof(*head)) + goto invalid; + + if (limit - offset < size) + goto invalid; + + offset += size; + } + +done: + if (!user->fpsimd) + goto invalid; + + return 0; + +invalid: + return -EINVAL; +} + static int restore_sigframe(struct pt_regs *regs, struct rt_sigframe __user *sf) { sigset_t set; int i, err; - void *aux = sf->uc.uc_mcontext.__reserved; + struct user_ctxs user; err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set)); if (err == 0) @@ -116,12 +390,11 @@ static int restore_sigframe(struct pt_regs *regs, regs->syscallno = ~0UL; err |= !valid_user_regs(®s->user_regs, current); + if (err == 0) + err = parse_user_sigframe(&user, sf); - if (err == 0) { - struct fpsimd_context *fpsimd_ctx = - container_of(aux, struct fpsimd_context, head); - err |= restore_fpsimd_context(fpsimd_ctx); - } + if (err == 0) + err = restore_fpsimd_context(user.fpsimd); return err; } @@ -162,16 +435,37 @@ badframe: return 0; } -static int setup_sigframe(struct rt_sigframe __user *sf, +/* Determine the layout of optional records in the signal frame */ +static int setup_sigframe_layout(struct rt_sigframe_user_layout *user) +{ + int err; + + err = sigframe_alloc(user, &user->fpsimd_offset, + sizeof(struct fpsimd_context)); + if (err) + return err; + + /* fault information, if valid */ + if (current->thread.fault_code) { + err = sigframe_alloc(user, &user->esr_offset, + sizeof(struct esr_context)); + if (err) + return err; + } + + return sigframe_alloc_end(user); +} + + +static int setup_sigframe(struct rt_sigframe_user_layout *user, struct pt_regs *regs, sigset_t *set) { int i, err = 0; - void *aux = sf->uc.uc_mcontext.__reserved; - struct _aarch64_ctx *end; + struct rt_sigframe __user *sf = user->sigframe; /* set up the stack frame for unwinding */ - __put_user_error(regs->regs[29], &sf->fp, err); - __put_user_error(regs->regs[30], &sf->lr, err); + __put_user_error(regs->regs[29], &user->next_frame->fp, err); + __put_user_error(regs->regs[30], &user->next_frame->lr, err); for (i = 0; i < 31; i++) __put_user_error(regs->regs[i], &sf->uc.uc_mcontext.regs[i], @@ -185,58 +479,103 @@ static int setup_sigframe(struct rt_sigframe __user *sf, err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set)); if (err == 0) { - struct fpsimd_context *fpsimd_ctx = - container_of(aux, struct fpsimd_context, head); + struct fpsimd_context __user *fpsimd_ctx = + apply_user_offset(user, user->fpsimd_offset); err |= preserve_fpsimd_context(fpsimd_ctx); - aux += sizeof(*fpsimd_ctx); } /* fault information, if valid */ - if (current->thread.fault_code) { - struct esr_context *esr_ctx = - container_of(aux, struct esr_context, head); + if (err == 0 && user->esr_offset) { + struct esr_context __user *esr_ctx = + apply_user_offset(user, user->esr_offset); + __put_user_error(ESR_MAGIC, &esr_ctx->head.magic, err); __put_user_error(sizeof(*esr_ctx), &esr_ctx->head.size, err); __put_user_error(current->thread.fault_code, &esr_ctx->esr, err); - aux += sizeof(*esr_ctx); + } + + if (err == 0 && user->extra_offset) { + char __user *sfp = (char __user *)user->sigframe; + char __user *userp = + apply_user_offset(user, user->extra_offset); + + struct extra_context __user *extra; + struct _aarch64_ctx __user *end; + u64 extra_datap; + u32 extra_size; + + extra = (struct extra_context __user *)userp; + userp += EXTRA_CONTEXT_SIZE; + + end = (struct _aarch64_ctx __user *)userp; + userp += TERMINATOR_SIZE; + + /* + * extra_datap is just written to the signal frame. + * The value gets cast back to a void __user * + * during sigreturn. + */ + extra_datap = (__force u64)userp; + extra_size = sfp + round_up(user->size, 16) - userp; + + __put_user_error(EXTRA_MAGIC, &extra->head.magic, err); + __put_user_error(EXTRA_CONTEXT_SIZE, &extra->head.size, err); + __put_user_error(extra_datap, &extra->datap, err); + __put_user_error(extra_size, &extra->size, err); + + /* Add the terminator */ + __put_user_error(0, &end->magic, err); + __put_user_error(0, &end->size, err); } /* set the "end" magic */ - end = aux; - __put_user_error(0, &end->magic, err); - __put_user_error(0, &end->size, err); + if (err == 0) { + struct _aarch64_ctx __user *end = + apply_user_offset(user, user->end_offset); + + __put_user_error(0, &end->magic, err); + __put_user_error(0, &end->size, err); + } return err; } -static struct rt_sigframe __user *get_sigframe(struct ksignal *ksig, - struct pt_regs *regs) +static int get_sigframe(struct rt_sigframe_user_layout *user, + struct ksignal *ksig, struct pt_regs *regs) { unsigned long sp, sp_top; - struct rt_sigframe __user *frame; + int err; + + init_user_layout(user); + err = setup_sigframe_layout(user); + if (err) + return err; sp = sp_top = sigsp(regs->sp, ksig); - sp = (sp - sizeof(struct rt_sigframe)) & ~15; - frame = (struct rt_sigframe __user *)sp; + sp = round_down(sp - sizeof(struct frame_record), 16); + user->next_frame = (struct frame_record __user *)sp; + + sp = round_down(sp, 16) - sigframe_size(user); + user->sigframe = (struct rt_sigframe __user *)sp; /* * Check that we can actually write to the signal frame. */ - if (!access_ok(VERIFY_WRITE, frame, sp_top - sp)) - frame = NULL; + if (!access_ok(VERIFY_WRITE, user->sigframe, sp_top - sp)) + return -EFAULT; - return frame; + return 0; } static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, - void __user *frame, int usig) + struct rt_sigframe_user_layout *user, int usig) { __sigrestore_t sigtramp; regs->regs[0] = usig; - regs->sp = (unsigned long)frame; - regs->regs[29] = regs->sp + offsetof(struct rt_sigframe, fp); + regs->sp = (unsigned long)user->sigframe; + regs->regs[29] = (unsigned long)&user->next_frame->fp; regs->pc = (unsigned long)ka->sa.sa_handler; if (ka->sa.sa_flags & SA_RESTORER) @@ -250,20 +589,22 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { + struct rt_sigframe_user_layout user; struct rt_sigframe __user *frame; int err = 0; - frame = get_sigframe(ksig, regs); - if (!frame) + if (get_sigframe(&user, ksig, regs)) return 1; + frame = user.sigframe; + __put_user_error(0, &frame->uc.uc_flags, err); __put_user_error(NULL, &frame->uc.uc_link, err); err |= __save_altstack(&frame->uc.uc_stack, regs->sp); - err |= setup_sigframe(frame, regs, set); + err |= setup_sigframe(&user, regs, set); if (err == 0) { - setup_return(regs, &ksig->ka, frame, usig); + setup_return(regs, &ksig->ka, &user, usig); if (ksig->ka.sa.sa_flags & SA_SIGINFO) { err |= copy_siginfo_to_user(&frame->info, &ksig->info); regs->regs[1] = (unsigned long)&frame->info; diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index feac80c22f61..09d37d66b630 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -210,6 +210,7 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) put_task_stack(tsk); } +EXPORT_SYMBOL_GPL(save_stack_trace_tsk); void save_stack_trace(struct stack_trace *trace) { diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 0805b44f986a..3ebfb1d00b53 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -140,7 +140,7 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) } } -static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) +void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) { struct stackframe frame; unsigned long irq_stack_ptr; @@ -728,8 +728,6 @@ static int bug_handler(struct pt_regs *regs, unsigned int esr) break; case BUG_TRAP_TYPE_WARN: - /* Ideally, report_bug() should backtrace for us... but no. */ - dump_backtrace(regs, NULL); break; default: diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 41b6e31f8f55..ae35f1855e06 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -37,7 +37,7 @@ #include <asm/vdso.h> #include <asm/vdso_datapage.h> -extern char vdso_start, vdso_end; +extern char vdso_start[], vdso_end[]; static unsigned long vdso_pages __ro_after_init; /* @@ -125,14 +125,14 @@ static int __init vdso_init(void) struct page **vdso_pagelist; unsigned long pfn; - if (memcmp(&vdso_start, "\177ELF", 4)) { + if (memcmp(vdso_start, "\177ELF", 4)) { pr_err("vDSO is not a valid ELF object!\n"); return -EINVAL; } - vdso_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT; + vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT; pr_info("vdso: %ld pages (%ld code @ %p, %ld data @ %p)\n", - vdso_pages + 1, vdso_pages, &vdso_start, 1L, vdso_data); + vdso_pages + 1, vdso_pages, vdso_start, 1L, vdso_data); /* Allocate the vDSO pagelist, plus a page for the data. */ vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *), @@ -145,7 +145,7 @@ static int __init vdso_init(void) /* Grab the vDSO code pages. */ - pfn = sym_to_pfn(&vdso_start); + pfn = sym_to_pfn(vdso_start); for (i = 0; i < vdso_pages; i++) vdso_pagelist[i + 1] = pfn_to_page(pfn + i); diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 3216e098c058..3e340b625436 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -95,11 +95,6 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) { - if (dev == NULL) { - WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); - return NULL; - } - if (IS_ENABLED(CONFIG_ZONE_DMA) && dev->coherent_dma_mask <= DMA_BIT_MASK(32)) flags |= GFP_DMA; @@ -128,10 +123,6 @@ static void __dma_free_coherent(struct device *dev, size_t size, bool freed; phys_addr_t paddr = dma_to_phys(dev, dma_handle); - if (dev == NULL) { - WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); - return; - } freed = dma_release_from_contiguous(dev, phys_to_page(paddr), diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 37b95dff0b07..ea2ea68d1bd7 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -31,6 +31,7 @@ #include <linux/highmem.h> #include <linux/perf_event.h> #include <linux/preempt.h> +#include <linux/hugetlb.h> #include <asm/bug.h> #include <asm/cpufeature.h> @@ -80,18 +81,35 @@ static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) #endif /* - * Dump out the page tables associated with 'addr' in mm 'mm'. + * Dump out the page tables associated with 'addr' in the currently active mm. */ -void show_pte(struct mm_struct *mm, unsigned long addr) +void show_pte(unsigned long addr) { + struct mm_struct *mm; pgd_t *pgd; - if (!mm) + if (addr < TASK_SIZE) { + /* TTBR0 */ + mm = current->active_mm; + if (mm == &init_mm) { + pr_alert("[%016lx] user address but active_mm is swapper\n", + addr); + return; + } + } else if (addr >= VA_START) { + /* TTBR1 */ mm = &init_mm; + } else { + pr_alert("[%016lx] address between user and kernel address ranges\n", + addr); + return; + } - pr_alert("pgd = %p\n", mm->pgd); + pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgd = %p\n", + mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K, + VA_BITS, mm->pgd); pgd = pgd_offset(mm, addr); - pr_alert("[%08lx] *pgd=%016llx", addr, pgd_val(*pgd)); + pr_alert("[%016lx] *pgd=%016llx", addr, pgd_val(*pgd)); do { pud_t *pud; @@ -196,8 +214,8 @@ static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs, /* * The kernel tried to access some page that wasn't present. */ -static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr, - unsigned int esr, struct pt_regs *regs) +static void __do_kernel_fault(unsigned long addr, unsigned int esr, + struct pt_regs *regs) { const char *msg; @@ -227,7 +245,7 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr, pr_alert("Unable to handle kernel %s at virtual address %08lx\n", msg, addr); - show_pte(mm, addr); + show_pte(addr); die("Oops", regs, esr); bust_spinlocks(0); do_exit(SIGKILL); @@ -239,18 +257,20 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr, */ static void __do_user_fault(struct task_struct *tsk, unsigned long addr, unsigned int esr, unsigned int sig, int code, - struct pt_regs *regs) + struct pt_regs *regs, int fault) { struct siginfo si; const struct fault_info *inf; + unsigned int lsb = 0; if (unhandled_signal(tsk, sig) && show_unhandled_signals_ratelimited()) { inf = esr_to_fault_info(esr); - pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x\n", + pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x", tsk->comm, task_pid_nr(tsk), inf->name, sig, addr, esr); - show_pte(tsk->mm, addr); - show_regs(regs); + print_vma_addr(KERN_CONT ", in ", regs->pc); + pr_cont("\n"); + __show_regs(regs); } tsk->thread.fault_address = addr; @@ -259,13 +279,23 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr, si.si_errno = 0; si.si_code = code; si.si_addr = (void __user *)addr; + /* + * Either small page or large page may be poisoned. + * In other words, VM_FAULT_HWPOISON_LARGE and + * VM_FAULT_HWPOISON are mutually exclusive. + */ + if (fault & VM_FAULT_HWPOISON_LARGE) + lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); + else if (fault & VM_FAULT_HWPOISON) + lsb = PAGE_SHIFT; + si.si_addr_lsb = lsb; + force_sig_info(sig, &si, tsk); } static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) { struct task_struct *tsk = current; - struct mm_struct *mm = tsk->active_mm; const struct fault_info *inf; /* @@ -274,9 +304,9 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re */ if (user_mode(regs)) { inf = esr_to_fault_info(esr); - __do_user_fault(tsk, addr, esr, inf->sig, inf->code, regs); + __do_user_fault(tsk, addr, esr, inf->sig, inf->code, regs, 0); } else - __do_kernel_fault(mm, addr, esr, regs); + __do_kernel_fault(addr, esr, regs); } #define VM_FAULT_BADMAP 0x010000 @@ -329,7 +359,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, { struct task_struct *tsk; struct mm_struct *mm; - int fault, sig, code; + int fault, sig, code, major = 0; unsigned long vm_flags = VM_READ | VM_WRITE; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; @@ -368,6 +398,8 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, die("Accessing user space memory outside uaccess.h routines", regs, esr); } + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); + /* * As per x86, we may deadlock here. However, since the kernel only * validly references user space from well defined areas of the code, @@ -391,24 +423,42 @@ retry: } fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk); + major |= fault & VM_FAULT_MAJOR; - /* - * If we need to retry but a fatal signal is pending, handle the - * signal first. We do not need to release the mmap_sem because it - * would already be released in __lock_page_or_retry in mm/filemap.c. - */ - if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) - return 0; + if (fault & VM_FAULT_RETRY) { + /* + * If we need to retry but a fatal signal is pending, + * handle the signal first. We do not need to release + * the mmap_sem because it would already be released + * in __lock_page_or_retry in mm/filemap.c. + */ + if (fatal_signal_pending(current)) + return 0; + + /* + * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of + * starvation. + */ + if (mm_flags & FAULT_FLAG_ALLOW_RETRY) { + mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; + mm_flags |= FAULT_FLAG_TRIED; + goto retry; + } + } + up_read(&mm->mmap_sem); /* - * Major/minor page fault accounting is only done on the initial - * attempt. If we go through a retry, it is extremely likely that the - * page will be found in page cache at that point. + * Handle the "normal" (no error) case first. */ - - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); - if (mm_flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { + if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | + VM_FAULT_BADACCESS)))) { + /* + * Major/minor page fault accounting is only done + * once. If we go through a retry, it is extremely + * likely that the page will be found in page cache at + * that point. + */ + if (major) { tsk->maj_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, addr); @@ -417,25 +467,9 @@ retry: perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, addr); } - if (fault & VM_FAULT_RETRY) { - /* - * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of - * starvation. - */ - mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; - mm_flags |= FAULT_FLAG_TRIED; - goto retry; - } - } - - up_read(&mm->mmap_sem); - /* - * Handle the "normal" case first - VM_FAULT_MAJOR - */ - if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | - VM_FAULT_BADACCESS)))) return 0; + } /* * If we are in kernel mode at this point, we have no context to @@ -461,6 +495,9 @@ retry: */ sig = SIGBUS; code = BUS_ADRERR; + } else if (fault & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) { + sig = SIGBUS; + code = BUS_MCEERR_AR; } else { /* * Something tried to access memory that isn't in our memory @@ -471,11 +508,11 @@ retry: SEGV_ACCERR : SEGV_MAPERR; } - __do_user_fault(tsk, addr, esr, sig, code, regs); + __do_user_fault(tsk, addr, esr, sig, code, regs, fault); return 0; no_context: - __do_kernel_fault(mm, addr, esr, regs); + __do_kernel_fault(addr, esr, regs); return 0; } diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 7514a000e361..69b8200b1cfd 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -136,36 +136,27 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; pud_t *pud; - pmd_t *pmd = NULL; - pte_t *pte = NULL; + pmd_t *pmd; pgd = pgd_offset(mm, addr); pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd); if (!pgd_present(*pgd)) return NULL; + pud = pud_offset(pgd, addr); - if (!pud_present(*pud)) + if (pud_none(*pud)) return NULL; - - if (pud_huge(*pud)) + /* swap or huge page */ + if (!pud_present(*pud) || pud_huge(*pud)) return (pte_t *)pud; + /* table; check the next level */ + pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) + if (pmd_none(*pmd)) return NULL; - - if (pte_cont(pmd_pte(*pmd))) { - pmd = pmd_offset( - pud, (addr & CONT_PMD_MASK)); - return (pte_t *)pmd; - } - if (pmd_huge(*pmd)) + if (!pmd_present(*pmd) || pmd_huge(*pmd)) return (pte_t *)pmd; - pte = pte_offset_kernel(pmd, addr); - if (pte_present(*pte) && pte_cont(*pte)) { - pte = pte_offset_kernel( - pmd, (addr & CONT_PTE_MASK)); - return pte; - } + return NULL; } diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index 7b0d55756eb1..adc208c2ae9c 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -18,6 +18,7 @@ #include <linux/elf.h> #include <linux/fs.h> +#include <linux/memblock.h> #include <linux/mm.h> #include <linux/mman.h> #include <linux/export.h> @@ -103,12 +104,18 @@ void arch_pick_mmap_layout(struct mm_struct *mm) */ int valid_phys_addr_range(phys_addr_t addr, size_t size) { - if (addr < PHYS_OFFSET) - return 0; - if (addr + size > __pa(high_memory - 1) + 1) - return 0; - - return 1; + /* + * Check whether addr is covered by a memory region without the + * MEMBLOCK_NOMAP attribute, and whether that region covers the + * entire range. In theory, this could lead to false negatives + * if the range is covered by distinct but adjacent memory regions + * that only differ in other attributes. However, few of such + * attributes have been defined, and it is debatable whether it + * follows that /dev/mem read() calls should be able traverse + * such boundaries. + */ + return memblock_is_region_memory(addr, size) && + memblock_is_map_memory(addr); } /* diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 0c429ec6fde8..23c2d89a362e 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -31,6 +31,7 @@ #include <linux/fs.h> #include <linux/io.h> #include <linux/mm.h> +#include <linux/vmalloc.h> #include <asm/barrier.h> #include <asm/cputype.h> diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index c5fecf97ee2f..8c77598dd6aa 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -234,21 +234,6 @@ static struct acpi_iort_node *iort_scan_node(enum acpi_iort_node_type type, return NULL; } -static acpi_status -iort_match_type_callback(struct acpi_iort_node *node, void *context) -{ - return AE_OK; -} - -bool iort_node_match(u8 type) -{ - struct acpi_iort_node *node; - - node = iort_scan_node(type, iort_match_type_callback, NULL); - - return node != NULL; -} - static acpi_status iort_match_node_callback(struct acpi_iort_node *node, void *context) { diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 31adbebf812e..8102ee7b3247 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -17,6 +17,8 @@ config DEVMEM config DEVKMEM bool "/dev/kmem virtual device support" + # On arm64, VMALLOC_START < PAGE_OFFSET, which confuses kmem read/write + depends on !ARM64 help Say Y here if you want to support the /dev/kmem device. The /dev/kmem device is rarely used, but can be used for certain diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 478f8ace2664..4a57b8f21488 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -39,7 +39,6 @@ config ARM_GIC_V3_ITS bool depends on PCI depends on PCI_MSI - select ACPI_IORT if ACPI config ARM_NVIC bool diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 4ee55274f155..45629f4b5402 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -504,7 +504,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) if (&m->list == &kclist_head) { if (clear_user(buffer, tsz)) return -EFAULT; - } else if (is_vmalloc_or_module_addr((void *)start)) { + } else if (m->type == KCORE_VMALLOC) { vread(buf, (char *)start, tsz); /* we have to zero-fill user buffer even if no read */ if (copy_to_user(buffer, buf, tsz)) diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index 3ff9acea8616..8379d406ad2e 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -31,7 +31,6 @@ void iort_deregister_domain_token(int trans_id); struct fwnode_handle *iort_find_domain_token(int trans_id); #ifdef CONFIG_ACPI_IORT void acpi_iort_init(void); -bool iort_node_match(u8 type); u32 iort_msi_map_rid(struct device *dev, u32 req_id); struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id); void acpi_configure_pmsi_domain(struct device *dev); @@ -41,7 +40,6 @@ void iort_set_dma_mask(struct device *dev); const struct iommu_ops *iort_iommu_configure(struct device *dev); #else static inline void acpi_iort_init(void) { } -static inline bool iort_node_match(u8 type) { return false; } static inline u32 iort_msi_map_rid(struct device *dev, u32 req_id) { return req_id; } static inline struct irq_domain *iort_get_device_domain(struct device *dev, |