perf: Add support for exclude_idle attribute

Enable the perf exclude_idle event attribute to avoid waking
possilby sleeping CPUS. The counter values are updated when CPU
enters idle. If the CPU is idle when perf reads the current
event value (of an exclude idle event) then the value from
when the CPU went idle is returned.
This commit supercedes the commits below. The context for the
commits below changed too much to enable cherry-picking.
commit 573979dee2a7 ("perf: Add support for exclude_idle attribute")
commit 54f6e4ae87be ("perf: Enable updating exclude_idle events
at idle")
commit 960dbb1751f3 ("perf: Skip permission checks on kernel owned
perf events")

Squashed commit:
Perf: arm64: enable idle_update at end of probe

Currently armv8pmu_idle_update is enabled before
arm_pmu_device_probe has completed. arm_pmu_device_probe
initializes some structures need by armv8pmu_idle_update.
armv8pmu_idle_update can get called before these structures
are complete leading to seg faults. Set a flag to prevent
armv8pmu_idle_update from trying to access these structures
until arm_pmu_device_probe is finished. Also move the call
to perf_event_cpu_hp_init after arm_pmu_device_probe so
that hotplug doesn't overwrite the flag.

Change-Id: Ib554c9fe106963ec1b42e72aeaf84fc73201bbb7
Signed-off-by: Patrick Fay <pfay@codeaurora.org>
Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
This commit is contained in:
Patrick Fay
2017-04-05 10:47:28 -07:00
committed by Rishabh Bhatnagar
parent e5414a4200
commit 6ac9df8dec
2 changed files with 118 additions and 13 deletions

View File

@@ -990,9 +990,6 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
{
unsigned long config_base = 0;
if (attr->exclude_idle)
return -EPERM;
/*
* If we're running in hyp mode, then we *are* the hypervisor.
* Therefore we ignore exclude_hv in this configuration, since
@@ -1140,6 +1137,52 @@ static void __armv8pmu_probe_pmu(void *info)
pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS);
}
static void armv8pmu_idle_update(struct arm_pmu *cpu_pmu)
{
struct pmu_hw_events *hw_events;
struct perf_event *event;
int idx;
if (!cpu_pmu)
return;
hw_events = this_cpu_ptr(cpu_pmu->hw_events);
if (!hw_events)
return;
for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
if (!test_bit(idx, hw_events->used_mask))
continue;
event = hw_events->events[idx];
if (!event || !event->attr.exclude_idle ||
event->state != PERF_EVENT_STATE_ACTIVE)
continue;
cpu_pmu->pmu.read(event);
}
}
struct arm_pmu_and_idle_nb {
struct arm_pmu *cpu_pmu;
struct notifier_block perf_cpu_idle_nb;
};
static int perf_cpu_idle_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
struct arm_pmu_and_idle_nb *pmu_nb = container_of(nb,
struct arm_pmu_and_idle_nb, perf_cpu_idle_nb);
if (action == IDLE_START)
armv8pmu_idle_update(pmu_nb->cpu_pmu);
return NOTIFY_OK;
}
static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
{
struct armv8pmu_probe_info probe = {
@@ -1147,6 +1190,15 @@ static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
.present = false,
};
int ret;
struct arm_pmu_and_idle_nb *pmu_idle_nb;
pmu_idle_nb = devm_kzalloc(&cpu_pmu->plat_device->dev,
sizeof(*pmu_idle_nb), GFP_KERNEL);
if (!pmu_idle_nb)
return -ENOMEM;
pmu_idle_nb->cpu_pmu = cpu_pmu;
pmu_idle_nb->perf_cpu_idle_nb.notifier_call = perf_cpu_idle_notifier;
ret = smp_call_function_any(&cpu_pmu->supported_cpus,
__armv8pmu_probe_pmu,
@@ -1154,7 +1206,13 @@ static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
if (ret)
return ret;
return probe.present ? 0 : -ENODEV;
if (!probe.present)
return -ENODEV;
idle_notifier_register(&pmu_idle_nb->perf_cpu_idle_nb);
return 0;
}
static int armv8_pmu_init(struct arm_pmu *cpu_pmu)
@@ -1319,9 +1377,27 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = {
{},
};
/*
* Non DT systems have their micro/arch events probed at run-time.
* A fairly complete list of generic events are provided and ones that
* aren't supported by the current PMU are disabled.
*/
static const struct pmu_probe_info armv8_pmu_probe_table[] = {
PMU_PROBE(0, 0, armv8_pmuv3_init), /* enable all defined counters */
{ /* sentinel value */ }
};
static int armv8_pmu_device_probe(struct platform_device *pdev)
{
return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, NULL);
int ret;
/* set to true so armv8pmu_idle_update doesn't try to load
* hw_events before arm_pmu_device_probe has initialized it.
*/
ret = arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids,
(acpi_disabled ? NULL : armv8_pmu_probe_table));
return ret;
}
static struct platform_driver armv8_pmu_driver = {

View File

@@ -378,6 +378,7 @@ static atomic_t perf_sched_count;
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
static DEFINE_PER_CPU(int, perf_sched_cb_usages);
static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
static DEFINE_PER_CPU(bool, is_idle);
static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
@@ -3975,6 +3976,7 @@ static int perf_event_read(struct perf_event *event, bool group)
{
enum perf_event_state state = READ_ONCE(event->state);
int event_cpu, ret = 0;
bool active_event_skip_read = false;
/*
* If event is enabled and currently active on a CPU, update the
@@ -3982,7 +3984,6 @@ static int perf_event_read(struct perf_event *event, bool group)
*/
again:
if (state == PERF_EVENT_STATE_ACTIVE) {
struct perf_read_data data;
/*
* Orders the ->state and ->oncpu loads such that if we see
@@ -3995,8 +3996,14 @@ static int perf_event_read(struct perf_event *event, bool group)
event_cpu = READ_ONCE(event->oncpu);
if ((unsigned)event_cpu >= nr_cpu_ids)
return 0;
data = (struct perf_read_data){
if (cpu_isolated(event_cpu) ||
(event->attr.exclude_idle &&
per_cpu(is_idle, event_cpu)))
active_event_skip_read = true;
}
if (state == PERF_EVENT_STATE_ACTIVE &&
!active_event_skip_read) {
struct perf_read_data data = {
.event = event,
.group = group,
.ret = 0,
@@ -4015,16 +4022,16 @@ static int perf_event_read(struct perf_event *event, bool group)
* Therefore, either way, we'll have an up-to-date event count
* after this.
*/
(void)smp_call_function_single(event_cpu, __perf_event_read, &data, 1);
(void)smp_call_function_single(event_cpu,
__perf_event_read, &data, 1);
preempt_enable();
ret = data.ret;
} else if (state == PERF_EVENT_STATE_INACTIVE) {
} else if (state == PERF_EVENT_STATE_INACTIVE ||
active_event_skip_read) {
struct perf_event_context *ctx = event->ctx;
unsigned long flags;
raw_spin_lock_irqsave(&ctx->lock, flags);
state = event->state;
if (state != PERF_EVENT_STATE_INACTIVE) {
raw_spin_unlock_irqrestore(&ctx->lock, flags);
goto again;
@@ -4119,7 +4126,8 @@ find_get_context(struct pmu *pmu, struct task_struct *task,
if (!task) {
/* Must be root to operate on a CPU event: */
if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
if (!is_kernel_event(event) && perf_paranoid_cpu() &&
!capable(CAP_SYS_ADMIN))
return ERR_PTR(-EACCES);
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
@@ -11705,6 +11713,26 @@ static struct notifier_block perf_reboot_notifier = {
.priority = INT_MIN,
};
static int event_idle_notif(struct notifier_block *nb, unsigned long action,
void *data)
{
switch (action) {
case IDLE_START:
__this_cpu_write(is_idle, true);
break;
case IDLE_END:
__this_cpu_write(is_idle, false);
break;
}
return NOTIFY_OK;
}
static struct notifier_block perf_event_idle_nb = {
.notifier_call = event_idle_notif,
};
void __init perf_event_init(void)
{
int ret;
@@ -11718,6 +11746,7 @@ void __init perf_event_init(void)
perf_pmu_register(&perf_task_clock, NULL, -1);
perf_tp_register();
perf_event_init_cpu(smp_processor_id());
idle_notifier_register(&perf_event_idle_nb);
register_reboot_notifier(&perf_reboot_notifier);
ret = init_hw_breakpoint();