android_kernel_samsung_a7y1.../mm
Miles Chen 4a226305ac mm/memcontrol.c: fix use after free in mem_cgroup_iter()
commit 54a83d6bcbf8f4700013766b974bf9190d40b689 upstream.

This patch is sent to report an use after free in mem_cgroup_iter()
after merging commit be2657752e9e ("mm: memcg: fix use after free in
mem_cgroup_iter()").

I work with android kernel tree (4.9 & 4.14), and commit be2657752e9e
("mm: memcg: fix use after free in mem_cgroup_iter()") has been merged
to the trees.  However, I can still observe use after free issues
addressed in the commit be2657752e9e.  (on low-end devices, a few times
this month)

backtrace:
        css_tryget <- crash here
        mem_cgroup_iter
        shrink_node
        shrink_zones
        do_try_to_free_pages
        try_to_free_pages
        __perform_reclaim
        __alloc_pages_direct_reclaim
        __alloc_pages_slowpath
        __alloc_pages_nodemask

To debug, I poisoned mem_cgroup before freeing it:

  static void __mem_cgroup_free(struct mem_cgroup *memcg)
        for_each_node(node)
        free_mem_cgroup_per_node_info(memcg, node);
        free_percpu(memcg->stat);
  +     /* poison memcg before freeing it */
  +     memset(memcg, 0x78, sizeof(struct mem_cgroup));
        kfree(memcg);
  }

The coredump shows the position=0xdbbc2a00 is freed.

  (gdb) p/x ((struct mem_cgroup_per_node *)0xe5009e00)->iter[8]
  $13 = {position = 0xdbbc2a00, generation = 0x2efd}

  0xdbbc2a00:     0xdbbc2e00      0x00000000      0xdbbc2800      0x00000100
  0xdbbc2a10:     0x00000200      0x78787878      0x00026218      0x00000000
  0xdbbc2a20:     0xdcad6000      0x00000001      0x78787800      0x00000000
  0xdbbc2a30:     0x78780000      0x00000000      0x0068fb84      0x78787878
  0xdbbc2a40:     0x78787878      0x78787878      0x78787878      0xe3fa5cc0
  0xdbbc2a50:     0x78787878      0x78787878      0x00000000      0x00000000
  0xdbbc2a60:     0x00000000      0x00000000      0x00000000      0x00000000
  0xdbbc2a70:     0x00000000      0x00000000      0x00000000      0x00000000
  0xdbbc2a80:     0x00000000      0x00000000      0x00000000      0x00000000
  0xdbbc2a90:     0x00000001      0x00000000      0x00000000      0x00100000
  0xdbbc2aa0:     0x00000001      0xdbbc2ac8      0x00000000      0x00000000
  0xdbbc2ab0:     0x00000000      0x00000000      0x00000000      0x00000000
  0xdbbc2ac0:     0x00000000      0x00000000      0xe5b02618      0x00001000
  0xdbbc2ad0:     0x00000000      0x78787878      0x78787878      0x78787878
  0xdbbc2ae0:     0x78787878      0x78787878      0x78787878      0x78787878
  0xdbbc2af0:     0x78787878      0x78787878      0x78787878      0x78787878
  0xdbbc2b00:     0x78787878      0x78787878      0x78787878      0x78787878
  0xdbbc2b10:     0x78787878      0x78787878      0x78787878      0x78787878
  0xdbbc2b20:     0x78787878      0x78787878      0x78787878      0x78787878
  0xdbbc2b30:     0x78787878      0x78787878      0x78787878      0x78787878
  0xdbbc2b40:     0x78787878      0x78787878      0x78787878      0x78787878
  0xdbbc2b50:     0x78787878      0x78787878      0x78787878      0x78787878
  0xdbbc2b60:     0x78787878      0x78787878      0x78787878      0x78787878
  0xdbbc2b70:     0x78787878      0x78787878      0x78787878      0x78787878
  0xdbbc2b80:     0x78787878      0x78787878      0x00000000      0x78787878
  0xdbbc2b90:     0x78787878      0x78787878      0x78787878      0x78787878
  0xdbbc2ba0:     0x78787878      0x78787878      0x78787878      0x78787878

In the reclaim path, try_to_free_pages() does not setup
sc.target_mem_cgroup and sc is passed to do_try_to_free_pages(), ...,
shrink_node().

In mem_cgroup_iter(), root is set to root_mem_cgroup because
sc->target_mem_cgroup is NULL.  It is possible to assign a memcg to
root_mem_cgroup.nodeinfo.iter in mem_cgroup_iter().

        try_to_free_pages
        	struct scan_control sc = {...}, target_mem_cgroup is 0x0;
        do_try_to_free_pages
        shrink_zones
        shrink_node
        	 mem_cgroup *root = sc->target_mem_cgroup;
        	 memcg = mem_cgroup_iter(root, NULL, &reclaim);
        mem_cgroup_iter()
        	if (!root)
        		root = root_mem_cgroup;
        	...

        	css = css_next_descendant_pre(css, &root->css);
        	memcg = mem_cgroup_from_css(css);
        	cmpxchg(&iter->position, pos, memcg);

My device uses memcg non-hierarchical mode.  When we release a memcg:
invalidate_reclaim_iterators() reaches only dead_memcg and its parents.
If non-hierarchical mode is used, invalidate_reclaim_iterators() never
reaches root_mem_cgroup.

  static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
  {
        struct mem_cgroup *memcg = dead_memcg;

        for (; memcg; memcg = parent_mem_cgroup(memcg)
        ...
  }

So the use after free scenario looks like:

  CPU1						CPU2

  try_to_free_pages
  do_try_to_free_pages
  shrink_zones
  shrink_node
  mem_cgroup_iter()
      if (!root)
      	root = root_mem_cgroup;
      ...
      css = css_next_descendant_pre(css, &root->css);
      memcg = mem_cgroup_from_css(css);
      cmpxchg(&iter->position, pos, memcg);

        				invalidate_reclaim_iterators(memcg);
        				...
        				__mem_cgroup_free()
        					kfree(memcg);

  try_to_free_pages
  do_try_to_free_pages
  shrink_zones
  shrink_node
  mem_cgroup_iter()
      if (!root)
      	root = root_mem_cgroup;
      ...
      mz = mem_cgroup_nodeinfo(root, reclaim->pgdat->node_id);
      iter = &mz->iter[reclaim->priority];
      pos = READ_ONCE(iter->position);
      css_tryget(&pos->css) <- use after free

To avoid this, we should also invalidate root_mem_cgroup.nodeinfo.iter
in invalidate_reclaim_iterators().

[cai@lca.pw: fix -Wparentheses compilation warning]
  Link: http://lkml.kernel.org/r/1564580753-17531-1-git-send-email-cai@lca.pw
Link: http://lkml.kernel.org/r/20190730015729.4406-1-miles.chen@mediatek.com
Fixes: 5ac8fb31ad2e ("mm: memcontrol: convert reclaim iterator to simple css refcounting")
Signed-off-by: Miles Chen <miles.chen@mediatek.com>
Signed-off-by: Qian Cai <cai@lca.pw>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2020-04-06 20:47:53 +02:00
..
kasan A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
backing-dev.c writeback: synchronize sync(2) against cgroup writeback membership switches 2020-04-06 18:12:57 +02:00
balloon_compaction.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
bootmem.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
cleancache.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
cma_debug.c mm/cma_debug.c: fix the break condition in cma_maxchunk_get() 2020-04-06 19:01:41 +02:00
cma.c mm/cma.c: fail if fixed declaration can't be honored 2020-04-06 20:27:19 +02:00
cma.h A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
compaction.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
debug-pagealloc.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
debug.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
dmapool.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
early_ioremap.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
fadvise.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
failslab.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
filemap.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
frame_vector.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
frontswap.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
gup.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
highmem.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
hpa.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
huge_memory.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
hugetlb_cgroup.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
hugetlb.c hugetlbfs: on restore reserve error path retain subpool reservation 2020-04-06 19:01:37 +02:00
hwpoison-inject.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
init-mm.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
internal.h A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
interval_tree.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
io_record.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
Kconfig A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
Kconfig.debug A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
kmemcheck.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
kmemleak-test.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
kmemleak.c mm/kmemleak.c: fix check for softirq context 2020-04-06 20:16:21 +02:00
ksm.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
list_lru.c mm/list_lru.c: fix memory leak in __memcg_init_list_lru_node 2020-04-06 19:02:54 +02:00
maccess.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
madvise.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
Makefile A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
memblock.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
memcontrol.c mm/memcontrol.c: fix use after free in mem_cgroup_iter() 2020-04-06 20:47:53 +02:00
memory_hotplug.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
memory-failure.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
memory.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
mempolicy.c mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified 2020-04-06 13:43:53 +02:00
mempool.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
memtest.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
migrate.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
mincore.c mm/mincore.c: make mincore() more conservative 2020-04-06 18:12:28 +02:00
mlock.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
mm_init.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
mmap.c Revert "coredump: fix race condition between mmget_not_zero()/get_task_mm() and core dumping" 2020-04-06 22:47:46 +05:30
mmu_context.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
mmu_notifier.c mm/mmu_notifier: use hlist_add_head_rcu() 2020-04-06 20:16:25 +02:00
mmzone.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
mprotect.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
mremap.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
msync.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
nobootmem.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
nommu.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
oom_kill.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
page_alloc.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
page_counter.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
page_ext.c mm/page_ext.c: fix an imbalance with kmemleak 2020-04-06 14:22:59 +02:00
page_idle.c mm/page_idle.c: fix oops because end_pfn is larger than max_pfn 2020-04-06 19:14:16 +02:00
page_io.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
page_isolation.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
page_owner.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
page-writeback.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
pagewalk.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
percpu-km.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
percpu-vm.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
percpu.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
pgtable-generic.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
process_vm_access.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
quicklist.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
readahead.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
rmap.c mm/rmap: replace BUG_ON(anon_vma->degree) with VM_WARN_ON 2020-04-06 11:16:40 +02:00
shmem.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
showmem_extra.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
slab_common.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
slab.c mm/slab.c: kmemleak no scan alien caches 2020-04-06 14:23:23 +02:00
slab.h A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
slob.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
slub.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
sparse-vmemmap.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
sparse.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
swap_cgroup.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
swap_state.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
swap.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
swapfile.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
truncate.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
usercopy.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
userfaultfd.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
util.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
vmacache.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
vmalloc.c mm/vmalloc: Sync unmappings in __purge_vmap_area_lazy() 2020-04-06 20:38:23 +02:00
vmpressure.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
vmscan.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
vmstat.c mm, vmstat: make quiet_vmstat lighter 2020-04-06 20:25:57 +02:00
workingset.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
zbud.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
zpool.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
zsmalloc.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30
zswap.c A750FXXU4CTBC 2020-03-27 21:51:54 +05:30