android_kernel_motorola_sm6225/mm/usercopy.c
Ivaylo Georgiev 63399e395a Merge android-4.19-q.79 (40321f2) into msm-4.19
* refs/heads/tmp-40321f2:
  Linux 4.19.79
  nl80211: validate beacon head
  cfg80211: Use const more consistently in for_each_element macros
  cfg80211: add and use strongly typed element iteration macros
  staging: erofs: detect potential multiref due to corrupted images
  staging: erofs: add two missing erofs_workgroup_put for corrupted images
  staging: erofs: some compressed cluster should be submitted for corrupted images
  staging: erofs: fix an error handling in erofs_readdir()
  coresight: etm4x: Use explicit barriers on enable/disable
  vfs: Fix EOVERFLOW testing in put_compat_statfs64
  arm64/speculation: Support 'mitigations=' cmdline option
  arm64: Use firmware to detect CPUs that are not affected by Spectre-v2
  arm64: Force SSBS on context switch
  arm64: ssbs: Don't treat CPUs with SSBS as unaffected by SSB
  arm64: add sysfs vulnerability show for speculative store bypass
  arm64: add sysfs vulnerability show for spectre-v2
  arm64: Always enable spectre-v2 vulnerability detection
  arm64: Advertise mitigation of Spectre-v2, or lack thereof
  arm64: Provide a command line to disable spectre_v2 mitigation
  arm64: Always enable ssb vulnerability detection
  arm64: enable generic CPU vulnerabilites support
  arm64: add sysfs vulnerability show for meltdown
  arm64: Add sysfs vulnerability show for spectre-v1
  arm64: fix SSBS sanitization
  arm64: docs: Document SSBS HWCAP
  KVM: arm64: Set SCTLR_EL2.DSSBS if SSBD is forcefully disabled and !vhe
  arm64: ssbd: Add support for PSTATE.SSBS rather than trapping to EL3
  riscv: Avoid interrupts being erroneously enabled in handle_exception()
  perf stat: Reset previous counts on repeat with interval
  perf tools: Fix segfault in cpu_cache_level__read()
  tick: broadcast-hrtimer: Fix a race in bc_set_next
  tools lib traceevent: Do not free tep->cmdlines in add_new_comm() on failure
  powerpc/book3s64/radix: Rename CPU_FTR_P9_TLBIE_BUG feature flag
  powerpc/pseries: Fix cpu_hotplug_lock acquisition in resize_hpt()
  nbd: fix crash when the blksize is zero
  KVM: nVMX: Fix consistency check on injected exception error code
  KVM: PPC: Book3S HV: XIVE: Free escalation interrupts before disabling the VP
  drm/radeon: Bail earlier when radeon.cik_/si_support=0 is passed
  nfp: flower: fix memory leak in nfp_flower_spawn_vnic_reprs
  perf unwind: Fix libunwind build failure on i386 systems
  kernel/elfcore.c: include proper prototypes
  perf build: Add detection of java-11-openjdk-devel package
  sched/core: Fix migration to invalid CPU in __set_cpus_allowed_ptr()
  sched/membarrier: Fix private expedited registration check
  sched/membarrier: Call sync_core only before usermode for same mm
  libnvdimm/nfit_test: Fix acpi_handle redefinition
  fuse: fix memleak in cuse_channel_open
  libnvdimm/region: Initialize bad block for volatile namespaces
  thermal_hwmon: Sanitize thermal_zone type
  thermal: Fix use-after-free when unregistering thermal zone device
  ntb: point to right memory window index
  x86/purgatory: Disable the stackleak GCC plugin for the purgatory
  pwm: stm32-lp: Add check in case requested period cannot be achieved
  pNFS: Ensure we do clear the return-on-close layout stateid on fatal errors
  drm/amdgpu: Check for valid number of registers to read
  drm/amdgpu: Fix KFD-related kernel oops on Hawaii
  netfilter: nf_tables: allow lookups in dynamic sets
  watchdog: aspeed: Add support for AST2600
  ceph: reconnect connection if session hang in opening state
  ceph: fix directories inode i_blkbits initialization
  xen/pci: reserve MCFG areas earlier
  9p: avoid attaching writeback_fid on mmap with type PRIVATE
  9p: Transport error uninitialized
  fs: nfs: Fix possible null-pointer dereferences in encode_attrs()
  ima: fix freeing ongoing ahash_request
  ima: always return negative code for error
  arm64: cpufeature: Detect SSBS and advertise to userspace
  cfg80211: initialize on-stack chandefs
  s390/cio: avoid calling strlen on null pointer
  ieee802154: atusb: fix use-after-free at disconnect
  xen/xenbus: fix self-deadlock after killing user process
  Revert "locking/pvqspinlock: Don't wait if vCPU is preempted"
  mmc: sdhci-of-esdhc: set DMA snooping based on DMA coherence
  mmc: sdhci: improve ADMA error reporting
  drm/i915/gvt: update vgpu workload head pointer correctly
  drm/nouveau/kms/nv50-: Don't create MSTMs for eDP connectors
  drm/msm/dsi: Fix return value check for clk_get_parent
  drm/omap: fix max fclk divider for omap36xx
  perf stat: Fix a segmentation fault when using repeat forever
  watchdog: imx2_wdt: fix min() calculation in imx2_wdt_set_timeout
  PCI: Restore Resizable BAR size bits correctly for 1MB BARs
  PCI: vmd: Fix shadow offsets to reflect spec changes
  timer: Read jiffies once when forwarding base clk
  usercopy: Avoid HIGHMEM pfn warning
  tracing: Make sure variable reference alias has correct var_ref_idx
  power: supply: sbs-battery: only return health when battery present
  power: supply: sbs-battery: use correct flags field
  MIPS: Treat Loongson Extensions as ASEs
  crypto: ccree - use the full crypt length value
  crypto: ccree - account for TEE not ready to report
  crypto: caam - fix concurrency issue in givencrypt descriptor
  crypto: cavium/zip - Add missing single_release()
  crypto: skcipher - Unmap pages after an external error
  crypto: qat - Silence smp_processor_id() warning
  tools lib traceevent: Fix "robust" test of do_generate_dynamic_list_file
  can: mcp251x: mcp251x_hw_reset(): allow more time after a reset
  powerpc/book3s64/mm: Don't do tlbie fixup for some hardware revisions
  powerpc/powernv/ioda: Fix race in TCE level allocation
  powerpc/powernv: Restrict OPAL symbol map to only be readable by root
  powerpc/mce: Schedule work from irq_work
  powerpc/mce: Fix MCE handling for huge pages
  ASoC: sgtl5000: Improve VAG power and mute control
  ASoC: Define a set of DAPM pre/post-up events
  PM / devfreq: tegra: Fix kHz to Hz conversion
  nbd: fix max number of supported devs
  KVM: nVMX: handle page fault in vmread fix
  KVM: X86: Fix userspace set invalid CR4
  KVM: PPC: Book3S HV: Don't lose pending doorbell request on migration on P9
  KVM: PPC: Book3S HV: Check for MMU ready on piggybacked virtual cores
  KVM: PPC: Book3S HV: Fix race in re-enabling XIVE escalation interrupts
  s390/cio: exclude subchannels with no parent from pseudo check
  s390/topology: avoid firing events before kobjs are created
  KVM: s390: Test for bad access register and size at the start of S390_MEM_OP
  s390/process: avoid potential reading of freed stack
  ANDROID: cuttlefish_defconfig: Enable BPF_JIT and BPF_JIT_ALWAYS_ON
  ANDROID: arm64: bpf: implement arch_bpf_jit_check_func
  ANDROID: bpf: validate bpf_func when BPF_JIT is enabled with CFI
  UPSTREAM: kcm: use BPF_PROG_RUN

Conflicts:
	arch/arm64/include/asm/cpucaps.h
	arch/arm64/include/asm/sysreg.h
	arch/arm64/kernel/cpu_errata.c
	arch/arm64/kernel/cpufeature.c
	arch/arm64/kernel/process.c
	drivers/mmc/host/sdhci.c
	include/linux/ieee80211.h
	kernel/sched/core.c
	net/wireless/scan.c

Change-Id: I57a49ff57133edeaed48cb90e2b03fa453bf1451
Signed-off-by: Ivaylo Georgiev <irgeorgiev@codeaurora.org>
2019-10-21 05:07:30 -07:00

319 lines
9.9 KiB
C

/*
* This implements the various checks for CONFIG_HARDENED_USERCOPY*,
* which are designed to protect kernel memory from needless exposure
* and overwrite under many unintended conditions. This code is based
* on PAX_USERCOPY, which is:
*
* Copyright (C) 2001-2016 PaX Team, Bradley Spengler, Open Source
* Security Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
#include <linux/thread_info.h>
#include <linux/atomic.h>
#include <linux/jump_label.h>
#include <asm/sections.h>
/*
* Checks if a given pointer and length is contained by the current
* stack frame (if possible).
*
* Returns:
* NOT_STACK: not at all on the stack
* GOOD_FRAME: fully within a valid stack frame
* GOOD_STACK: fully on the stack (when can't do frame-checking)
* BAD_STACK: error condition (invalid stack position or bad stack frame)
*/
static noinline int check_stack_object(const void *obj, unsigned long len)
{
const void * const stack = task_stack_page(current);
const void * const stackend = stack + THREAD_SIZE;
int ret;
/* Object is not on the stack at all. */
if (obj + len <= stack || stackend <= obj)
return NOT_STACK;
/*
* Reject: object partially overlaps the stack (passing the
* the check above means at least one end is within the stack,
* so if this check fails, the other end is outside the stack).
*/
if (obj < stack || stackend < obj + len)
return BAD_STACK;
/* Check if object is safely within a valid frame. */
ret = arch_within_stack_frames(stack, stackend, obj, len);
if (ret)
return ret;
return GOOD_STACK;
}
/*
* If these functions are reached, then CONFIG_HARDENED_USERCOPY has found
* an unexpected state during a copy_from_user() or copy_to_user() call.
* There are several checks being performed on the buffer by the
* __check_object_size() function. Normal stack buffer usage should never
* trip the checks, and kernel text addressing will always trip the check.
* For cache objects, it is checking that only the whitelisted range of
* bytes for a given cache is being accessed (via the cache's usersize and
* useroffset fields). To adjust a cache whitelist, use the usercopy-aware
* kmem_cache_create_usercopy() function to create the cache (and
* carefully audit the whitelist range).
*/
void usercopy_warn(const char *name, const char *detail, bool to_user,
unsigned long offset, unsigned long len)
{
WARN_ONCE(1, "Bad or missing usercopy whitelist? Kernel memory %s attempt detected %s %s%s%s%s (offset %lu, size %lu)!\n",
to_user ? "exposure" : "overwrite",
to_user ? "from" : "to",
name ? : "unknown?!",
detail ? " '" : "", detail ? : "", detail ? "'" : "",
offset, len);
}
void __noreturn usercopy_abort(const char *name, const char *detail,
bool to_user, unsigned long offset,
unsigned long len)
{
pr_emerg("Kernel memory %s attempt detected %s %s%s%s%s (offset %lu, size %lu)!\n",
to_user ? "exposure" : "overwrite",
to_user ? "from" : "to",
name ? : "unknown?!",
detail ? " '" : "", detail ? : "", detail ? "'" : "",
offset, len);
/*
* For greater effect, it would be nice to do do_group_exit(),
* but BUG() actually hooks all the lock-breaking and per-arch
* Oops code, so that is used here instead.
*/
BUG();
}
/* Returns true if any portion of [ptr,ptr+n) over laps with [low,high). */
static bool overlaps(const unsigned long ptr, unsigned long n,
unsigned long low, unsigned long high)
{
const unsigned long check_low = ptr;
unsigned long check_high = check_low + n;
/* Does not overlap if entirely above or entirely below. */
if (check_low >= high || check_high <= low)
return false;
return true;
}
/* Is this address range in the kernel text area? */
static inline void check_kernel_text_object(const unsigned long ptr,
unsigned long n, bool to_user)
{
unsigned long textlow = (unsigned long)_stext;
unsigned long texthigh = (unsigned long)_etext;
unsigned long textlow_linear, texthigh_linear;
if (overlaps(ptr, n, textlow, texthigh))
usercopy_abort("kernel text", NULL, to_user, ptr - textlow, n);
/*
* Some architectures have virtual memory mappings with a secondary
* mapping of the kernel text, i.e. there is more than one virtual
* kernel address that points to the kernel image. It is usually
* when there is a separate linear physical memory mapping, in that
* __pa() is not just the reverse of __va(). This can be detected
* and checked:
*/
textlow_linear = (unsigned long)lm_alias(textlow);
/* No different mapping: we're done. */
if (textlow_linear == textlow)
return;
/* Check the secondary mapping... */
texthigh_linear = (unsigned long)lm_alias(texthigh);
if (overlaps(ptr, n, textlow_linear, texthigh_linear))
usercopy_abort("linear kernel text", NULL, to_user,
ptr - textlow_linear, n);
}
static inline void check_bogus_address(const unsigned long ptr, unsigned long n,
bool to_user)
{
/* Reject if object wraps past end of memory. */
if (ptr + (n - 1) < ptr)
usercopy_abort("wrapped address", NULL, to_user, 0, ptr + n);
/* Reject if NULL or ZERO-allocation. */
if (ZERO_OR_NULL_PTR(ptr))
usercopy_abort("null address", NULL, to_user, ptr, n);
}
/* Checks for allocs that are marked in some way as spanning multiple pages. */
static inline void check_page_span(const void *ptr, unsigned long n,
struct page *page, bool to_user)
{
#ifdef CONFIG_HARDENED_USERCOPY_PAGESPAN
const void *end = ptr + n - 1;
struct page *endpage;
bool is_reserved, is_cma;
const void * const stack = task_stack_page(current);
const void * const stackend = stack + THREAD_SIZE;
/*
* Sometimes the kernel data regions are not marked Reserved (see
* check below). And sometimes [_sdata,_edata) does not cover
* rodata and/or bss, so check each range explicitly.
*/
/* Allow reads of kernel rodata region (if not marked as Reserved). */
if (ptr >= (const void *)__start_rodata &&
end <= (const void *)__end_rodata) {
if (!to_user)
usercopy_abort("rodata", NULL, to_user, 0, n);
return;
}
/* Allow kernel data region (if not marked as Reserved). */
if (ptr >= (const void *)_sdata && end <= (const void *)_edata)
return;
/* Allow kernel bss region (if not marked as Reserved). */
if (ptr >= (const void *)__bss_start &&
end <= (const void *)__bss_stop)
return;
/* Allow stack region to span multiple pages */
if (ptr >= stack && end <= stackend)
return;
/* Is the object wholly within one base page? */
if (likely(((unsigned long)ptr & (unsigned long)PAGE_MASK) ==
((unsigned long)end & (unsigned long)PAGE_MASK)))
return;
/* Allow if fully inside the same compound (__GFP_COMP) page. */
endpage = virt_to_head_page(end);
if (likely(endpage == page))
return;
/*
* Reject if range is entirely either Reserved (i.e. special or
* device memory), or CMA. Otherwise, reject since the object spans
* several independently allocated pages.
*/
is_reserved = PageReserved(page);
is_cma = is_migrate_cma_page(page);
if (!is_reserved && !is_cma)
usercopy_abort("spans multiple pages", NULL, to_user, 0, n);
for (ptr += PAGE_SIZE; ptr <= end; ptr += PAGE_SIZE) {
page = virt_to_head_page(ptr);
if (is_reserved && !PageReserved(page))
usercopy_abort("spans Reserved and non-Reserved pages",
NULL, to_user, 0, n);
if (is_cma && !is_migrate_cma_page(page))
usercopy_abort("spans CMA and non-CMA pages", NULL,
to_user, 0, n);
}
#endif
}
static inline void check_heap_object(const void *ptr, unsigned long n,
bool to_user)
{
struct page *page;
if (!virt_addr_valid(ptr))
return;
/*
* When CONFIG_HIGHMEM=y, kmap_to_page() will give either the
* highmem page or fallback to virt_to_page(). The following
* is effectively a highmem-aware virt_to_head_page().
*/
page = compound_head(kmap_to_page((void *)ptr));
if (PageSlab(page)) {
/* Check slab allocator for flags and size. */
__check_heap_object(ptr, n, page, to_user);
} else {
/* Verify object does not incorrectly span multiple pages. */
check_page_span(ptr, n, page, to_user);
}
}
static DEFINE_STATIC_KEY_FALSE_RO(bypass_usercopy_checks);
/*
* Validates that the given object is:
* - not bogus address
* - fully contained by stack (or stack frame, when available)
* - fully within SLAB object (or object whitelist area, when available)
* - not in kernel text
*/
void __check_object_size(const void *ptr, unsigned long n, bool to_user)
{
if (static_branch_unlikely(&bypass_usercopy_checks))
return;
/* Skip all tests if size is zero. */
if (!n)
return;
/* Check for invalid addresses. */
check_bogus_address((const unsigned long)ptr, n, to_user);
/* Check for bad stack object. */
switch (check_stack_object(ptr, n)) {
case NOT_STACK:
/* Object is not touching the current process stack. */
break;
case GOOD_FRAME:
case GOOD_STACK:
/*
* Object is either in the correct frame (when it
* is possible to check) or just generally on the
* process stack (when frame checking not available).
*/
return;
default:
usercopy_abort("process stack", NULL, to_user, 0, n);
}
/* Check for bad heap object. */
check_heap_object(ptr, n, to_user);
/* Check for object in kernel to avoid text exposure. */
check_kernel_text_object((const unsigned long)ptr, n, to_user);
}
EXPORT_SYMBOL(__check_object_size);
static bool enable_checks __initdata = true;
static int __init parse_hardened_usercopy(char *str)
{
return strtobool(str, &enable_checks);
}
__setup("hardened_usercopy=", parse_hardened_usercopy);
static int __init set_hardened_usercopy(void)
{
if (enable_checks == false)
static_branch_enable(&bypass_usercopy_checks);
return 1;
}
late_initcall(set_hardened_usercopy);