diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 194e67b1f..f7ee59d6a 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -1197,6 +1197,13 @@ static void binder_do_set_priority(struct task_struct *task, priority = MIN_NICE; } else if (priority > max_rtprio) { priority = max_rtprio; + + if (mm) { + down_write(&mm->mmap_sem); + if (!mmget_still_valid(mm)) { + if (allocate == 0) + goto free_range; + goto err_no_vma; } } diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 67c4c7334..6968154a0 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1042,6 +1042,8 @@ static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) * mlx4_ib_vma_close(). */ down_write(&owning_mm->mmap_sem); + if (!mmget_still_valid(owning_mm)) + goto skip_mm; for (i = 0; i < HW_BAR_COUNT; i++) { vma = context->hw_bar_info[i].vma; if (!vma) @@ -1061,6 +1063,7 @@ static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) context->hw_bar_info[i].vma->vm_ops = NULL; } +skip_mm: up_write(&owning_mm->mmap_sem); mmput(owning_mm); put_task_struct(owning_process); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 04a3ff0c3..1f91e476d 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1120,6 +1120,24 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, continue; up_read(&mm->mmap_sem); down_write(&mm->mmap_sem); + /* + * Avoid to modify vma->vm_flags + * without locked ops while the + * coredump reads the vm_flags. + */ + if (!mmget_still_valid(mm)) { + /* + * Silently return "count" + * like if get_task_mm() + * failed. FIXME: should this + * function have returned + * -ESRCH if get_task_mm() + * failed like if + * get_proc_task() fails? + */ + up_write(&mm->mmap_sem); + goto out_mm; + } for (vma = mm->mmap; vma; vma = vma->vm_next) { vma->vm_flags &= ~VM_SOFTDIRTY; vma_set_page_prot(vma); diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 9de2b7a2e..08cc09b99 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -446,6 +446,8 @@ static int userfaultfd_release(struct inode *inode, struct file *file) * taking the mmap_sem for writing. */ down_write(&mm->mmap_sem); + if (!mmget_still_valid(mm)) + goto skip_mm; prev = NULL; for (vma = mm->mmap; vma; vma = vma->vm_next) { cond_resched(); @@ -469,6 +471,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file) vma->vm_flags = new_flags; vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; } +skip_mm: up_write(&mm->mmap_sem); mmput(mm); wakeup: @@ -770,6 +773,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, goto out; down_write(&mm->mmap_sem); + if (!mmget_still_valid(mm)) + goto out_unlock; vma = find_vma_prev(mm, start, &prev); if (!vma) goto out_unlock; @@ -929,6 +934,8 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, goto out; down_write(&mm->mmap_sem); + if (!mmget_still_valid(mm)) + goto out_unlock; vma = find_vma_prev(mm, start, &prev); if (!vma) goto out_unlock; diff --git a/include/linux/mm.h b/include/linux/mm.h index 6b60de9e9..ac87debb7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1133,6 +1133,27 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long address, void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma, unsigned long start, unsigned long end); +/* + * This has to be called after a get_task_mm()/mmget_not_zero() + * followed by taking the mmap_sem for writing before modifying the + * vmas or anything the coredump pretends not to change from under it. + * + * NOTE: find_extend_vma() called from GUP context is the only place + * that can modify the "mm" (notably the vm_start/end) under mmap_sem + * for reading and outside the context of the process, so it is also + * the only case that holds the mmap_sem for reading that must call + * this function. Generally if the mmap_sem is hold for reading + * there's no need of this check after get_task_mm()/mmget_not_zero(). + * + * This function can be obsoleted and the check can be removed, after + * the coredump code will hold the mmap_sem for writing before + * invoking the ->core_dump methods. + */ +static inline bool mmget_still_valid(struct mm_struct *mm) +{ + return likely(!mm->core_state); +} + /** * mm_walk - callbacks for walk_page_range * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry diff --git a/mm/mmap.c b/mm/mmap.c index 82c6bfc51..68b408720 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -2422,7 +2423,8 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr) vma = find_vma_prev(mm, addr, &prev); if (vma && (vma->vm_start <= addr)) return vma; - if (!prev || expand_stack(prev, addr)) + /* don't alter vm_end if the coredump is running */ + if (!prev || !mmget_still_valid(mm) || expand_stack(prev, addr)) return NULL; if (prev->vm_flags & VM_LOCKED) populate_vma_page_range(prev, addr, prev->vm_end, NULL); @@ -2448,6 +2450,9 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr) return vma; if (!(vma->vm_flags & VM_GROWSDOWN)) return NULL; + /* don't alter vm_start if the coredump is running */ + if (!mmget_still_valid(mm)) + return NULL; start = vma->vm_start; if (expand_stack(vma, addr)) return NULL;