Peter Zijlstra a8b0ca17b8 perf: Remove the nmi parameter from the swevent and overflow interface
The nmi parameter indicated if we could do wakeups from the current
context, if not, we would set some state and self-IPI and let the
resulting interrupt do the wakeup.

For the various event classes:

  - hardware: nmi=0; PMI is in fact an NMI or we run irq_work_run from
    the PMI-tail (ARM etc.)
  - tracepoint: nmi=0; since tracepoint could be from NMI context.
  - software: nmi=[0,1]; some, like the schedule thing cannot
    perform wakeups, and hence need 0.

As one can see, there is very little nmi=1 usage, and the down-side of
not using it is that on some platforms some software events can have a
jiffy delay in wakeup (when arch_irq_work_raise isn't implemented).

The up-side however is that we can remove the nmi parameter and save a
bunch of conditionals in fast paths.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Michael Cree <mcree@orcon.net.nz>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: Anton Blanchard <anton@samba.org>
Cc: Eric B Munson <emunson@mgebm.net>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jason Wessel <jason.wessel@windriver.com>
Cc: Don Zickus <dzickus@redhat.com>
Link: http://lkml.kernel.org/n/tip-agjev8eu666tvknpb3iaj0fg@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-07-01 11:06:35 +02:00

589 lines
14 KiB
C

/*
* Linux performance counter support for MIPS.
*
* Copyright (C) 2010 MIPS Technologies, Inc.
* Author: Deng-Cheng Zhu
*
* This code is based on the implementation for ARM, which is in turn
* based on the sparc64 perf event code and the x86 code. Performance
* counter access is based on the MIPS Oprofile code. And the callchain
* support references the code of MIPS stacktrace.c.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/cpumask.h>
#include <linux/interrupt.h>
#include <linux/smp.h>
#include <linux/kernel.h>
#include <linux/perf_event.h>
#include <linux/uaccess.h>
#include <asm/irq.h>
#include <asm/irq_regs.h>
#include <asm/stacktrace.h>
#include <asm/time.h> /* For perf_irq */
/* These are for 32bit counters. For 64bit ones, define them accordingly. */
#define MAX_PERIOD ((1ULL << 32) - 1)
#define VALID_COUNT 0x7fffffff
#define TOTAL_BITS 32
#define HIGHEST_BIT 31
#define MIPS_MAX_HWEVENTS 4
struct cpu_hw_events {
/* Array of events on this cpu. */
struct perf_event *events[MIPS_MAX_HWEVENTS];
/*
* Set the bit (indexed by the counter number) when the counter
* is used for an event.
*/
unsigned long used_mask[BITS_TO_LONGS(MIPS_MAX_HWEVENTS)];
/*
* The borrowed MSB for the performance counter. A MIPS performance
* counter uses its bit 31 (for 32bit counters) or bit 63 (for 64bit
* counters) as a factor of determining whether a counter overflow
* should be signaled. So here we use a separate MSB for each
* counter to make things easy.
*/
unsigned long msbs[BITS_TO_LONGS(MIPS_MAX_HWEVENTS)];
/*
* Software copy of the control register for each performance counter.
* MIPS CPUs vary in performance counters. They use this differently,
* and even may not use it.
*/
unsigned int saved_ctrl[MIPS_MAX_HWEVENTS];
};
DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
.saved_ctrl = {0},
};
/* The description of MIPS performance events. */
struct mips_perf_event {
unsigned int event_id;
/*
* MIPS performance counters are indexed starting from 0.
* CNTR_EVEN indicates the indexes of the counters to be used are
* even numbers.
*/
unsigned int cntr_mask;
#define CNTR_EVEN 0x55555555
#define CNTR_ODD 0xaaaaaaaa
#ifdef CONFIG_MIPS_MT_SMP
enum {
T = 0,
V = 1,
P = 2,
} range;
#else
#define T
#define V
#define P
#endif
};
static struct mips_perf_event raw_event;
static DEFINE_MUTEX(raw_event_mutex);
#define UNSUPPORTED_PERF_EVENT_ID 0xffffffff
#define C(x) PERF_COUNT_HW_CACHE_##x
struct mips_pmu {
const char *name;
int irq;
irqreturn_t (*handle_irq)(int irq, void *dev);
int (*handle_shared_irq)(void);
void (*start)(void);
void (*stop)(void);
int (*alloc_counter)(struct cpu_hw_events *cpuc,
struct hw_perf_event *hwc);
u64 (*read_counter)(unsigned int idx);
void (*write_counter)(unsigned int idx, u64 val);
void (*enable_event)(struct hw_perf_event *evt, int idx);
void (*disable_event)(int idx);
const struct mips_perf_event *(*map_raw_event)(u64 config);
const struct mips_perf_event (*general_event_map)[PERF_COUNT_HW_MAX];
const struct mips_perf_event (*cache_event_map)
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
unsigned int num_counters;
};
static const struct mips_pmu *mipspmu;
static int
mipspmu_event_set_period(struct perf_event *event,
struct hw_perf_event *hwc,
int idx)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
s64 left = local64_read(&hwc->period_left);
s64 period = hwc->sample_period;
int ret = 0;
u64 uleft;
unsigned long flags;
if (unlikely(left <= -period)) {
left = period;
local64_set(&hwc->period_left, left);
hwc->last_period = period;
ret = 1;
}
if (unlikely(left <= 0)) {
left += period;
local64_set(&hwc->period_left, left);
hwc->last_period = period;
ret = 1;
}
if (left > (s64)MAX_PERIOD)
left = MAX_PERIOD;
local64_set(&hwc->prev_count, (u64)-left);
local_irq_save(flags);
uleft = (u64)(-left) & MAX_PERIOD;
uleft > VALID_COUNT ?
set_bit(idx, cpuc->msbs) : clear_bit(idx, cpuc->msbs);
mipspmu->write_counter(idx, (u64)(-left) & VALID_COUNT);
local_irq_restore(flags);
perf_event_update_userpage(event);
return ret;
}
static void mipspmu_event_update(struct perf_event *event,
struct hw_perf_event *hwc,
int idx)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
unsigned long flags;
int shift = 64 - TOTAL_BITS;
s64 prev_raw_count, new_raw_count;
u64 delta;
again:
prev_raw_count = local64_read(&hwc->prev_count);
local_irq_save(flags);
/* Make the counter value be a "real" one. */
new_raw_count = mipspmu->read_counter(idx);
if (new_raw_count & (test_bit(idx, cpuc->msbs) << HIGHEST_BIT)) {
new_raw_count &= VALID_COUNT;
clear_bit(idx, cpuc->msbs);
} else
new_raw_count |= (test_bit(idx, cpuc->msbs) << HIGHEST_BIT);
local_irq_restore(flags);
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
goto again;
delta = (new_raw_count << shift) - (prev_raw_count << shift);
delta >>= shift;
local64_add(delta, &event->count);
local64_sub(delta, &hwc->period_left);
return;
}
static void mipspmu_start(struct perf_event *event, int flags)
{
struct hw_perf_event *hwc = &event->hw;
if (!mipspmu)
return;
if (flags & PERF_EF_RELOAD)
WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
hwc->state = 0;
/* Set the period for the event. */
mipspmu_event_set_period(event, hwc, hwc->idx);
/* Enable the event. */
mipspmu->enable_event(hwc, hwc->idx);
}
static void mipspmu_stop(struct perf_event *event, int flags)
{
struct hw_perf_event *hwc = &event->hw;
if (!mipspmu)
return;
if (!(hwc->state & PERF_HES_STOPPED)) {
/* We are working on a local event. */
mipspmu->disable_event(hwc->idx);
barrier();
mipspmu_event_update(event, hwc, hwc->idx);
hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
}
}
static int mipspmu_add(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
int idx;
int err = 0;
perf_pmu_disable(event->pmu);
/* To look for a free counter for this event. */
idx = mipspmu->alloc_counter(cpuc, hwc);
if (idx < 0) {
err = idx;
goto out;
}
/*
* If there is an event in the counter we are going to use then
* make sure it is disabled.
*/
event->hw.idx = idx;
mipspmu->disable_event(idx);
cpuc->events[idx] = event;
hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
if (flags & PERF_EF_START)
mipspmu_start(event, PERF_EF_RELOAD);
/* Propagate our changes to the userspace mapping. */
perf_event_update_userpage(event);
out:
perf_pmu_enable(event->pmu);
return err;
}
static void mipspmu_del(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
WARN_ON(idx < 0 || idx >= mipspmu->num_counters);
mipspmu_stop(event, PERF_EF_UPDATE);
cpuc->events[idx] = NULL;
clear_bit(idx, cpuc->used_mask);
perf_event_update_userpage(event);
}
static void mipspmu_read(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
/* Don't read disabled counters! */
if (hwc->idx < 0)
return;
mipspmu_event_update(event, hwc, hwc->idx);
}
static void mipspmu_enable(struct pmu *pmu)
{
if (mipspmu)
mipspmu->start();
}
static void mipspmu_disable(struct pmu *pmu)
{
if (mipspmu)
mipspmu->stop();
}
static atomic_t active_events = ATOMIC_INIT(0);
static DEFINE_MUTEX(pmu_reserve_mutex);
static int (*save_perf_irq)(void);
static int mipspmu_get_irq(void)
{
int err;
if (mipspmu->irq >= 0) {
/* Request my own irq handler. */
err = request_irq(mipspmu->irq, mipspmu->handle_irq,
IRQF_DISABLED | IRQF_NOBALANCING,
"mips_perf_pmu", NULL);
if (err) {
pr_warning("Unable to request IRQ%d for MIPS "
"performance counters!\n", mipspmu->irq);
}
} else if (cp0_perfcount_irq < 0) {
/*
* We are sharing the irq number with the timer interrupt.
*/
save_perf_irq = perf_irq;
perf_irq = mipspmu->handle_shared_irq;
err = 0;
} else {
pr_warning("The platform hasn't properly defined its "
"interrupt controller.\n");
err = -ENOENT;
}
return err;
}
static void mipspmu_free_irq(void)
{
if (mipspmu->irq >= 0)
free_irq(mipspmu->irq, NULL);
else if (cp0_perfcount_irq < 0)
perf_irq = save_perf_irq;
}
/*
* mipsxx/rm9000/loongson2 have different performance counters, they have
* specific low-level init routines.
*/
static void reset_counters(void *arg);
static int __hw_perf_event_init(struct perf_event *event);
static void hw_perf_event_destroy(struct perf_event *event)
{
if (atomic_dec_and_mutex_lock(&active_events,
&pmu_reserve_mutex)) {
/*
* We must not call the destroy function with interrupts
* disabled.
*/
on_each_cpu(reset_counters,
(void *)(long)mipspmu->num_counters, 1);
mipspmu_free_irq();
mutex_unlock(&pmu_reserve_mutex);
}
}
static int mipspmu_event_init(struct perf_event *event)
{
int err = 0;
switch (event->attr.type) {
case PERF_TYPE_RAW:
case PERF_TYPE_HARDWARE:
case PERF_TYPE_HW_CACHE:
break;
default:
return -ENOENT;
}
if (!mipspmu || event->cpu >= nr_cpumask_bits ||
(event->cpu >= 0 && !cpu_online(event->cpu)))
return -ENODEV;
if (!atomic_inc_not_zero(&active_events)) {
if (atomic_read(&active_events) > MIPS_MAX_HWEVENTS) {
atomic_dec(&active_events);
return -ENOSPC;
}
mutex_lock(&pmu_reserve_mutex);
if (atomic_read(&active_events) == 0)
err = mipspmu_get_irq();
if (!err)
atomic_inc(&active_events);
mutex_unlock(&pmu_reserve_mutex);
}
if (err)
return err;
err = __hw_perf_event_init(event);
if (err)
hw_perf_event_destroy(event);
return err;
}
static struct pmu pmu = {
.pmu_enable = mipspmu_enable,
.pmu_disable = mipspmu_disable,
.event_init = mipspmu_event_init,
.add = mipspmu_add,
.del = mipspmu_del,
.start = mipspmu_start,
.stop = mipspmu_stop,
.read = mipspmu_read,
};
static inline unsigned int
mipspmu_perf_event_encode(const struct mips_perf_event *pev)
{
/*
* Top 8 bits for range, next 16 bits for cntr_mask, lowest 8 bits for
* event_id.
*/
#ifdef CONFIG_MIPS_MT_SMP
return ((unsigned int)pev->range << 24) |
(pev->cntr_mask & 0xffff00) |
(pev->event_id & 0xff);
#else
return (pev->cntr_mask & 0xffff00) |
(pev->event_id & 0xff);
#endif
}
static const struct mips_perf_event *
mipspmu_map_general_event(int idx)
{
const struct mips_perf_event *pev;
pev = ((*mipspmu->general_event_map)[idx].event_id ==
UNSUPPORTED_PERF_EVENT_ID ? ERR_PTR(-EOPNOTSUPP) :
&(*mipspmu->general_event_map)[idx]);
return pev;
}
static const struct mips_perf_event *
mipspmu_map_cache_event(u64 config)
{
unsigned int cache_type, cache_op, cache_result;
const struct mips_perf_event *pev;
cache_type = (config >> 0) & 0xff;
if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
return ERR_PTR(-EINVAL);
cache_op = (config >> 8) & 0xff;
if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
return ERR_PTR(-EINVAL);
cache_result = (config >> 16) & 0xff;
if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
return ERR_PTR(-EINVAL);
pev = &((*mipspmu->cache_event_map)
[cache_type]
[cache_op]
[cache_result]);
if (pev->event_id == UNSUPPORTED_PERF_EVENT_ID)
return ERR_PTR(-EOPNOTSUPP);
return pev;
}
static int validate_event(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
struct hw_perf_event fake_hwc = event->hw;
/* Allow mixed event group. So return 1 to pass validation. */
if (event->pmu != &pmu || event->state <= PERF_EVENT_STATE_OFF)
return 1;
return mipspmu->alloc_counter(cpuc, &fake_hwc) >= 0;
}
static int validate_group(struct perf_event *event)
{
struct perf_event *sibling, *leader = event->group_leader;
struct cpu_hw_events fake_cpuc;
memset(&fake_cpuc, 0, sizeof(fake_cpuc));
if (!validate_event(&fake_cpuc, leader))
return -ENOSPC;
list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
if (!validate_event(&fake_cpuc, sibling))
return -ENOSPC;
}
if (!validate_event(&fake_cpuc, event))
return -ENOSPC;
return 0;
}
/* This is needed by specific irq handlers in perf_event_*.c */
static void
handle_associated_event(struct cpu_hw_events *cpuc,
int idx, struct perf_sample_data *data, struct pt_regs *regs)
{
struct perf_event *event = cpuc->events[idx];
struct hw_perf_event *hwc = &event->hw;
mipspmu_event_update(event, hwc, idx);
data->period = event->hw.last_period;
if (!mipspmu_event_set_period(event, hwc, idx))
return;
if (perf_event_overflow(event, data, regs))
mipspmu->disable_event(idx);
}
#include "perf_event_mipsxx.c"
/* Callchain handling code. */
/*
* Leave userspace callchain empty for now. When we find a way to trace
* the user stack callchains, we add here.
*/
void perf_callchain_user(struct perf_callchain_entry *entry,
struct pt_regs *regs)
{
}
static void save_raw_perf_callchain(struct perf_callchain_entry *entry,
unsigned long reg29)
{
unsigned long *sp = (unsigned long *)reg29;
unsigned long addr;
while (!kstack_end(sp)) {
addr = *sp++;
if (__kernel_text_address(addr)) {
perf_callchain_store(entry, addr);
if (entry->nr >= PERF_MAX_STACK_DEPTH)
break;
}
}
}
void perf_callchain_kernel(struct perf_callchain_entry *entry,
struct pt_regs *regs)
{
unsigned long sp = regs->regs[29];
#ifdef CONFIG_KALLSYMS
unsigned long ra = regs->regs[31];
unsigned long pc = regs->cp0_epc;
if (raw_show_trace || !__kernel_text_address(pc)) {
unsigned long stack_page =
(unsigned long)task_stack_page(current);
if (stack_page && sp >= stack_page &&
sp <= stack_page + THREAD_SIZE - 32)
save_raw_perf_callchain(entry, sp);
return;
}
do {
perf_callchain_store(entry, pc);
if (entry->nr >= PERF_MAX_STACK_DEPTH)
break;
pc = unwind_stack(current, &sp, pc, &ra);
} while (pc);
#else
save_raw_perf_callchain(entry, sp);
#endif
}