PM / devfreq: memlat: Look for min stall% in addition to ratio criteria
Some workloads doing memory access might appear memory latency bound even though they might not actually be memory latency bound. This error can happen when the core that's running the workload is very parallelized or can do out of order executions, etc so not all memory accesses would actually stall the core. This can also happen when the the memory access monitoring capabilities aren't ideal and end up counting more kinds of memory accesses than what would be ideal. In this case, the IPM ratio can be lower than what it would be if we had ideal monitoring capabilities. To account for these errors, if the core has a stall cycle counting capabilities, check for a minimum stall% before the workload is considered memory latency bound. This would help reduce the inaccuracies, but is not a replacement for IPM ratio scheme because the stall% method doesn't allow us to detect which level of memory the workload is latency bound on, but the IPM ratio does (based on which memory accesses we use for calculating the ratio). Change-Id: I4363d7848584e5562f6683b5ad6b0f99017ec71b Signed-off-by: Saravana Kannan <skannan@codeaurora.org> Signed-off-by: Rama Aparna Mallavarapu <aparnam@codeaurora.org>
This commit is contained in:
@@ -28,6 +28,7 @@ enum ev_index {
|
||||
INST_IDX,
|
||||
CM_IDX,
|
||||
CYC_IDX,
|
||||
STALL_CYC_IDX,
|
||||
NUM_EVENTS
|
||||
};
|
||||
#define INST_EV 0x08
|
||||
@@ -92,12 +93,19 @@ static void read_perf_counters(int cpu, struct cpu_grp_info *cpu_grp)
|
||||
{
|
||||
struct cpu_pmu_stats *cpustats = to_cpustats(cpu_grp, cpu);
|
||||
struct dev_stats *devstats = to_devstats(cpu_grp, cpu);
|
||||
unsigned long cyc_cnt;
|
||||
unsigned long cyc_cnt, stall_cnt;
|
||||
|
||||
devstats->inst_count = read_event(&cpustats->events[INST_IDX]);
|
||||
devstats->mem_count = read_event(&cpustats->events[CM_IDX]);
|
||||
cyc_cnt = read_event(&cpustats->events[CYC_IDX]);
|
||||
devstats->freq = compute_freq(cpustats, cyc_cnt);
|
||||
if (cpustats->events[STALL_CYC_IDX].pevent) {
|
||||
stall_cnt = read_event(&cpustats->events[STALL_CYC_IDX]);
|
||||
stall_cnt = min(stall_cnt, cyc_cnt);
|
||||
devstats->stall_pct = mult_frac(100, stall_cnt, cyc_cnt);
|
||||
} else {
|
||||
devstats->stall_pct = 100;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned long get_cnt(struct memlat_hwmon *hw)
|
||||
@@ -117,7 +125,10 @@ static void delete_events(struct cpu_pmu_stats *cpustats)
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(cpustats->events); i++) {
|
||||
cpustats->events[i].prev_count = 0;
|
||||
perf_event_release_kernel(cpustats->events[i].pevent);
|
||||
if (cpustats->events[i].pevent) {
|
||||
perf_event_release_kernel(cpustats->events[i].pevent);
|
||||
cpustats->events[i].pevent = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -135,6 +146,7 @@ static void stop_hwmon(struct memlat_hwmon *hw)
|
||||
devstats->inst_count = 0;
|
||||
devstats->mem_count = 0;
|
||||
devstats->freq = 0;
|
||||
devstats->stall_pct = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -159,6 +171,7 @@ static int set_events(struct cpu_grp_info *cpu_grp, int cpu)
|
||||
struct perf_event *pevent;
|
||||
struct perf_event_attr *attr;
|
||||
int err, i;
|
||||
unsigned int event_id;
|
||||
struct cpu_pmu_stats *cpustats = to_cpustats(cpu_grp, cpu);
|
||||
|
||||
/* Allocate an attribute for event initialization */
|
||||
@@ -167,7 +180,11 @@ static int set_events(struct cpu_grp_info *cpu_grp, int cpu)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(cpustats->events); i++) {
|
||||
attr->config = cpu_grp->event_ids[i];
|
||||
event_id = cpu_grp->event_ids[i];
|
||||
if (!event_id)
|
||||
continue;
|
||||
|
||||
attr->config = event_id;
|
||||
pevent = perf_event_create_kernel_counter(attr, cpu, NULL,
|
||||
NULL, NULL);
|
||||
if (IS_ERR(pevent))
|
||||
@@ -282,6 +299,13 @@ static int arm_memlat_mon_driver_probe(struct platform_device *pdev)
|
||||
}
|
||||
cpu_grp->event_ids[INST_IDX] = event_id;
|
||||
|
||||
ret = of_property_read_u32(dev->of_node, "qcom,stall-cycle-ev",
|
||||
&event_id);
|
||||
if (ret)
|
||||
dev_dbg(dev, "Stall cycle event not specified. Event ignored.\n");
|
||||
else
|
||||
cpu_grp->event_ids[STALL_CYC_IDX] = event_id;
|
||||
|
||||
for_each_cpu(cpu, &cpu_grp->cpus)
|
||||
to_devstats(cpu_grp, cpu)->id = cpu;
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
|
||||
struct memlat_node {
|
||||
unsigned int ratio_ceil;
|
||||
unsigned int stall_floor;
|
||||
bool mon_started;
|
||||
bool already_zero;
|
||||
struct list_head list;
|
||||
@@ -239,9 +240,11 @@ static int devfreq_memlat_get_freq(struct devfreq *df,
|
||||
hw->core_stats[i].id,
|
||||
hw->core_stats[i].inst_count,
|
||||
hw->core_stats[i].mem_count,
|
||||
hw->core_stats[i].freq, ratio);
|
||||
hw->core_stats[i].freq,
|
||||
hw->core_stats[i].stall_pct, ratio);
|
||||
|
||||
if (ratio <= node->ratio_ceil
|
||||
&& hw->core_stats[i].stall_pct >= node->stall_floor
|
||||
&& hw->core_stats[i].freq > max_freq) {
|
||||
lat_dev = i;
|
||||
max_freq = hw->core_stats[i].freq;
|
||||
@@ -267,9 +270,11 @@ static int devfreq_memlat_get_freq(struct devfreq *df,
|
||||
}
|
||||
|
||||
gov_attr(ratio_ceil, 1U, 10000U);
|
||||
gov_attr(stall_floor, 0U, 100U);
|
||||
|
||||
static struct attribute *dev_attr[] = {
|
||||
&dev_attr_ratio_ceil.attr,
|
||||
&dev_attr_stall_floor.attr,
|
||||
&dev_attr_freq_map.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
@@ -21,6 +21,7 @@ struct dev_stats {
|
||||
unsigned long inst_count;
|
||||
unsigned long mem_count;
|
||||
unsigned long freq;
|
||||
unsigned long stall_pct;
|
||||
};
|
||||
|
||||
struct core_dev_map {
|
||||
|
||||
@@ -730,9 +730,10 @@ TRACE_EVENT(cache_hwmon_update,
|
||||
TRACE_EVENT(memlat_dev_meas,
|
||||
|
||||
TP_PROTO(const char *name, unsigned int dev_id, unsigned long inst,
|
||||
unsigned long mem, unsigned long freq, unsigned int ratio),
|
||||
unsigned long mem, unsigned long freq, unsigned int stall,
|
||||
unsigned int ratio),
|
||||
|
||||
TP_ARGS(name, dev_id, inst, mem, freq, ratio),
|
||||
TP_ARGS(name, dev_id, inst, mem, freq, stall, ratio),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__string(name, name)
|
||||
@@ -740,6 +741,7 @@ TRACE_EVENT(memlat_dev_meas,
|
||||
__field(unsigned long, inst)
|
||||
__field(unsigned long, mem)
|
||||
__field(unsigned long, freq)
|
||||
__field(unsigned int, stall)
|
||||
__field(unsigned int, ratio)
|
||||
),
|
||||
|
||||
@@ -749,15 +751,17 @@ TRACE_EVENT(memlat_dev_meas,
|
||||
__entry->inst = inst;
|
||||
__entry->mem = mem;
|
||||
__entry->freq = freq;
|
||||
__entry->stall = stall;
|
||||
__entry->ratio = ratio;
|
||||
),
|
||||
|
||||
TP_printk("dev: %s, id=%u, inst=%lu, mem=%lu, freq=%lu, ratio=%u",
|
||||
TP_printk("dev: %s, id=%u, inst=%lu, mem=%lu, freq=%lu, stall=%u, ratio=%u",
|
||||
__get_str(name),
|
||||
__entry->dev_id,
|
||||
__entry->inst,
|
||||
__entry->mem,
|
||||
__entry->freq,
|
||||
__entry->stall,
|
||||
__entry->ratio)
|
||||
);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user