munmap ends up calling tlb_flush() which for ARC was flushing the entire TLB unconditionally (by moving the MMU to a new ASID) do_munmap unmap_region unmap_vmas unmap_single_vma unmap_page_range tlb_start_vma zap_pud_range tlb_end_vma() tlb_finish_mmu tlb_flush() ---> unconditional flush_tlb_mm() So even a single page munmap, a frequent operation when uClibc dynamic linker (ldso) is loading the dependent shared libraries, would move the the ASID multiple times - needlessly invalidating the pre-faulted TLB entries (and increasing the rate of ASID wraparound + full TLB flush). This is now optimised to only be called if tlb->full_mm (which means for exit/execve) cases only. And for those cases, flush_tlb_mm() is already optimised to be a no-op for mm->mm_users == 0. So essentially there are no mmore full mm flushes - except for fork which anyhow needs it for properly COW'ing parent address space. munmap now needs to do TLB range flush, which is implemented with tlb_end_vma() Results ------- 1. ASID now consistenly moves by 4 during a simple ls (as opposed to 5 or 7 before). 2. LMBench microbenchmark also shows improvements Basic system parameters ------------------------------------------------------------------------------ Host OS Description Mhz tlb cache mem scal pages line par load bytes --------- ------------- ----------------------- ---- ----- ----- ------ ---- 3.9-rc5-0 Linux 3.9.0-r 3.9-rc5-0404-gcc-4.4-ba 80 8 64 1.1000 1 3.9-rc5-0 Linux 3.9.0-r 3.9-rc5-0405-avoid-full 80 8 64 1.1200 1 Processor, Processes - times in microseconds - smaller is better ------------------------------------------------------------------------------ Host OS Mhz null null open slct sig sig fork exec sh call I/O stat clos TCP inst hndl proc proc proc --------- ------------- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 3.9-rc5-0 Linux 3.9.0-r 80 4.81 8.69 68.6 118. 239. 8.53 31.6 4839 13.K 34.K 3.9-rc5-0 Linux 3.9.0-r 80 4.46 8.36 53.8 91.3 223. 8.12 24.2 4725 13.K 33.K File & VM system latencies in microseconds - smaller is better ------------------------------------------------------------------------------- Host OS 0K File 10K File Mmap Prot Page 100fd Create Delete Create Delete Latency Fault Fault selct --------- ------------- ------ ------ ------ ------ ------- ----- ------- ----- 3.9-rc5-0 Linux 3.9.0-r 314.7 223.2 1054.9 390.2 3615.0 1.590 20.1 126.6 3.9-rc5-0 Linux 3.9.0-r 265.8 183.8 1014.2 314.1 3193.0 6.910 18.8 110.4 Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
67 lines
1.7 KiB
C
67 lines
1.7 KiB
C
/*
|
|
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
* published by the Free Software Foundation.
|
|
*/
|
|
|
|
#ifndef _ASM_ARC_TLB_H
|
|
#define _ASM_ARC_TLB_H
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
#include <asm/pgtable.h>
|
|
|
|
/* Masks for actual TLB "PD"s */
|
|
#define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT)
|
|
#define PTE_BITS_IN_PD1 (PAGE_MASK | _PAGE_CACHEABLE | \
|
|
_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ | \
|
|
_PAGE_K_EXECUTE | _PAGE_K_WRITE | _PAGE_K_READ)
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
#define tlb_flush(tlb) \
|
|
do { \
|
|
if (tlb->fullmm) \
|
|
flush_tlb_mm((tlb)->mm); \
|
|
} while (0)
|
|
|
|
/*
|
|
* This pair is called at time of munmap/exit to flush cache and TLB entries
|
|
* for mappings being torn down.
|
|
* 1) cache-flush part -implemented via tlb_start_vma( ) can be NOP (for now)
|
|
* as we don't support aliasing configs in our VIPT D$.
|
|
* 2) tlb-flush part - implemted via tlb_end_vma( ) flushes the TLB range
|
|
*
|
|
* Note, read http://lkml.org/lkml/2004/1/15/6
|
|
*/
|
|
#define tlb_start_vma(tlb, vma)
|
|
|
|
#define tlb_end_vma(tlb, vma) \
|
|
do { \
|
|
if (!tlb->fullmm) \
|
|
flush_tlb_range(vma, vma->vm_start, vma->vm_end); \
|
|
} while (0)
|
|
|
|
#define __tlb_remove_tlb_entry(tlb, ptep, address)
|
|
|
|
#include <linux/pagemap.h>
|
|
#include <asm-generic/tlb.h>
|
|
|
|
#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
|
|
void tlb_paranoid_check(unsigned int pid_sw, unsigned long address);
|
|
#else
|
|
#define tlb_paranoid_check(a, b)
|
|
#endif
|
|
|
|
void arc_mmu_init(void);
|
|
extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
|
|
void __init read_decode_mmu_bcr(void);
|
|
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
#endif /* __KERNEL__ */
|
|
|
|
#endif /* _ASM_ARC_TLB_H */
|