sched: Add snapshot of Window Assisted Load Tracking (WALT)
This snapshot is taken from msm-4.14 as of
commit 871eac76e6be567 ("sched: Improve the scheduler").
Change-Id: Ib4e0b39526d3009cedebb626ece5a767d8247846
Signed-off-by: Satya Durga Srinivasu Prabhala <satyap@codeaurora.org>
This commit is contained in:
@@ -206,6 +206,21 @@ struct task_group;
|
||||
/* Task command name length: */
|
||||
#define TASK_COMM_LEN 16
|
||||
|
||||
enum task_event {
|
||||
PUT_PREV_TASK = 0,
|
||||
PICK_NEXT_TASK = 1,
|
||||
TASK_WAKE = 2,
|
||||
TASK_MIGRATE = 3,
|
||||
TASK_UPDATE = 4,
|
||||
IRQ_UPDATE = 5,
|
||||
};
|
||||
|
||||
/* Note: this need to be in sync with migrate_type_names array */
|
||||
enum migrate_types {
|
||||
GROUP_TO_RQ,
|
||||
RQ_TO_GROUP,
|
||||
};
|
||||
|
||||
extern void scheduler_tick(void);
|
||||
|
||||
#define MAX_SCHEDULE_TIMEOUT LONG_MAX
|
||||
@@ -480,6 +495,102 @@ struct sched_entity {
|
||||
#endif
|
||||
};
|
||||
|
||||
struct sched_load {
|
||||
unsigned long prev_load;
|
||||
unsigned long new_task_load;
|
||||
unsigned long predicted_load;
|
||||
};
|
||||
|
||||
struct cpu_cycle_counter_cb {
|
||||
u64 (*get_cpu_cycle_counter)(int cpu);
|
||||
};
|
||||
|
||||
#define MAX_NUM_CGROUP_COLOC_ID 20
|
||||
|
||||
extern DEFINE_PER_CPU_READ_MOSTLY(int, sched_load_boost);
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
extern void sched_exit(struct task_struct *p);
|
||||
extern int register_cpu_cycle_counter_cb(struct cpu_cycle_counter_cb *cb);
|
||||
extern void sched_set_io_is_busy(int val);
|
||||
extern int sched_set_group_id(struct task_struct *p, unsigned int group_id);
|
||||
extern unsigned int sched_get_group_id(struct task_struct *p);
|
||||
extern int sched_set_init_task_load(struct task_struct *p, int init_load_pct);
|
||||
extern u32 sched_get_init_task_load(struct task_struct *p);
|
||||
extern void sched_update_cpu_freq_min_max(const cpumask_t *cpus, u32 fmin,
|
||||
u32 fmax);
|
||||
extern int sched_set_boost(int enable);
|
||||
extern void free_task_load_ptrs(struct task_struct *p);
|
||||
|
||||
#define RAVG_HIST_SIZE_MAX 5
|
||||
#define NUM_BUSY_BUCKETS 10
|
||||
|
||||
/* ravg represents frequency scaled cpu-demand of tasks */
|
||||
struct ravg {
|
||||
/*
|
||||
* 'mark_start' marks the beginning of an event (task waking up, task
|
||||
* starting to execute, task being preempted) within a window
|
||||
*
|
||||
* 'sum' represents how runnable a task has been within current
|
||||
* window. It incorporates both running time and wait time and is
|
||||
* frequency scaled.
|
||||
*
|
||||
* 'sum_history' keeps track of history of 'sum' seen over previous
|
||||
* RAVG_HIST_SIZE windows. Windows where task was entirely sleeping are
|
||||
* ignored.
|
||||
*
|
||||
* 'demand' represents maximum sum seen over previous
|
||||
* sysctl_sched_ravg_hist_size windows. 'demand' could drive frequency
|
||||
* demand for tasks.
|
||||
*
|
||||
* 'curr_window_cpu' represents task's contribution to cpu busy time on
|
||||
* various CPUs in the current window
|
||||
*
|
||||
* 'prev_window_cpu' represents task's contribution to cpu busy time on
|
||||
* various CPUs in the previous window
|
||||
*
|
||||
* 'curr_window' represents the sum of all entries in curr_window_cpu
|
||||
*
|
||||
* 'prev_window' represents the sum of all entries in prev_window_cpu
|
||||
*
|
||||
* 'pred_demand' represents task's current predicted cpu busy time
|
||||
*
|
||||
* 'busy_buckets' groups historical busy time into different buckets
|
||||
* used for prediction
|
||||
*
|
||||
* 'demand_scaled' represents task's demand scaled to 1024
|
||||
*/
|
||||
u64 mark_start;
|
||||
u32 sum, demand;
|
||||
u32 coloc_demand;
|
||||
u32 sum_history[RAVG_HIST_SIZE_MAX];
|
||||
u32 *curr_window_cpu, *prev_window_cpu;
|
||||
u32 curr_window, prev_window;
|
||||
u16 active_windows;
|
||||
u32 pred_demand;
|
||||
u8 busy_buckets[NUM_BUSY_BUCKETS];
|
||||
u16 demand_scaled;
|
||||
u16 pred_demand_scaled;
|
||||
};
|
||||
#else
|
||||
static inline void sched_exit(struct task_struct *p) { }
|
||||
static inline int
|
||||
register_cpu_cycle_counter_cb(struct cpu_cycle_counter_cb *cb)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void sched_set_io_is_busy(int val) {};
|
||||
|
||||
static inline int sched_set_boost(int enable)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
static inline void free_task_load_ptrs(struct task_struct *p) { }
|
||||
|
||||
static inline void sched_update_cpu_freq_min_max(const cpumask_t *cpus,
|
||||
u32 fmin, u32 fmax) { }
|
||||
#endif /* CONFIG_SCHED_WALT */
|
||||
|
||||
struct sched_rt_entity {
|
||||
struct list_head run_list;
|
||||
unsigned long timeout;
|
||||
@@ -644,6 +755,22 @@ struct task_struct {
|
||||
const struct sched_class *sched_class;
|
||||
struct sched_entity se;
|
||||
struct sched_rt_entity rt;
|
||||
u64 last_sleep_ts;
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
struct ravg ravg;
|
||||
/*
|
||||
* 'init_load_pct' represents the initial task load assigned to children
|
||||
* of this task
|
||||
*/
|
||||
u32 init_load_pct;
|
||||
u64 last_wake_ts;
|
||||
u64 last_enqueued_ts;
|
||||
struct related_thread_group *grp;
|
||||
struct list_head grp_list;
|
||||
u64 cpu_cycles;
|
||||
bool misfit;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CGROUP_SCHED
|
||||
struct task_group *sched_task_group;
|
||||
#endif
|
||||
@@ -1394,6 +1521,7 @@ extern struct pid *cad_pid;
|
||||
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
|
||||
#define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */
|
||||
#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
|
||||
#define PF_WAKE_UP_IDLE 0x01000000 /* TTWU on an idle CPU */
|
||||
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */
|
||||
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
|
||||
#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
|
||||
@@ -1904,4 +2032,32 @@ static inline void rseq_syscall(struct pt_regs *regs)
|
||||
|
||||
#endif
|
||||
|
||||
static inline u32 sched_get_wake_up_idle(struct task_struct *p)
|
||||
{
|
||||
u32 enabled = p->flags & PF_WAKE_UP_IDLE;
|
||||
|
||||
return !!enabled;
|
||||
}
|
||||
|
||||
static inline int sched_set_wake_up_idle(struct task_struct *p,
|
||||
int wake_up_idle)
|
||||
{
|
||||
int enable = !!wake_up_idle;
|
||||
|
||||
if (enable)
|
||||
p->flags |= PF_WAKE_UP_IDLE;
|
||||
else
|
||||
p->flags &= ~PF_WAKE_UP_IDLE;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void set_wake_up_idle(bool enabled)
|
||||
{
|
||||
if (enabled)
|
||||
current->flags |= PF_WAKE_UP_IDLE;
|
||||
else
|
||||
current->flags &= ~PF_WAKE_UP_IDLE;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
28
include/linux/sched/core_ctl.h
Normal file
28
include/linux/sched/core_ctl.h
Normal file
@@ -0,0 +1,28 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2016, 2018, The Linux Foundation. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef __CORE_CTL_H
|
||||
#define __CORE_CTL_H
|
||||
|
||||
struct core_ctl_notif_data {
|
||||
unsigned int nr_big;
|
||||
unsigned int coloc_load_pct;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_SCHED_CORE_CTL
|
||||
void core_ctl_check(u64 wallclock);
|
||||
int core_ctl_set_boost(bool boost);
|
||||
void core_ctl_notifier_register(struct notifier_block *n);
|
||||
void core_ctl_notifier_unregister(struct notifier_block *n);
|
||||
#else
|
||||
static inline void core_ctl_check(u64 wallclock) {}
|
||||
static inline int core_ctl_set_boost(bool boost)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void core_ctl_notifier_register(struct notifier_block *n) {}
|
||||
static inline void core_ctl_notifier_unregister(struct notifier_block *n) {}
|
||||
#endif
|
||||
#endif
|
||||
@@ -11,6 +11,12 @@
|
||||
|
||||
#define SCHED_CPUFREQ_IOWAIT (1U << 0)
|
||||
#define SCHED_CPUFREQ_MIGRATION (1U << 1)
|
||||
#define SCHED_CPUFREQ_INTERCLUSTER_MIG (1U << 3)
|
||||
#define SCHED_CPUFREQ_WALT (1U << 4)
|
||||
#define SCHED_CPUFREQ_PL (1U << 5)
|
||||
#define SCHED_CPUFREQ_EARLY_DET (1U << 6)
|
||||
#define SCHED_CPUFREQ_FORCE_UPDATE (1U << 7)
|
||||
#define SCHED_CPUFREQ_CONTINUE (1U << 8)
|
||||
|
||||
#ifdef CONFIG_CPU_FREQ
|
||||
struct update_util_data {
|
||||
|
||||
@@ -22,6 +22,20 @@ extern unsigned long nr_iowait(void);
|
||||
extern unsigned long nr_iowait_cpu(int cpu);
|
||||
extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
extern unsigned int sched_get_cpu_util(int cpu);
|
||||
extern u64 sched_get_cpu_last_busy_time(int cpu);
|
||||
#else
|
||||
static inline unsigned int sched_get_cpu_util(int cpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline u64 sched_get_cpu_last_busy_time(int cpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline int sched_info_on(void)
|
||||
{
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
|
||||
@@ -26,6 +26,25 @@ extern unsigned int sysctl_sched_sync_hint_enable;
|
||||
extern unsigned int sysctl_sched_cstate_aware;
|
||||
extern unsigned int sysctl_sched_wakeup_granularity;
|
||||
extern unsigned int sysctl_sched_child_runs_first;
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
extern unsigned int sysctl_sched_use_walt_cpu_util;
|
||||
extern unsigned int sysctl_sched_use_walt_task_util;
|
||||
extern unsigned int sysctl_sched_walt_init_task_load_pct;
|
||||
extern unsigned int sysctl_sched_cpu_high_irqload;
|
||||
extern unsigned int sysctl_sched_boost;
|
||||
extern unsigned int sysctl_sched_group_upmigrate_pct;
|
||||
extern unsigned int sysctl_sched_group_downmigrate_pct;
|
||||
extern unsigned int sysctl_sched_walt_rotate_big_tasks;
|
||||
extern unsigned int sysctl_sched_min_task_util_for_boost;
|
||||
extern unsigned int sysctl_sched_min_task_util_for_colocation;
|
||||
extern unsigned int sysctl_sched_little_cluster_coloc_fmin_khz;
|
||||
|
||||
extern int
|
||||
walt_proc_update_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
loff_t *ppos);
|
||||
|
||||
#endif
|
||||
|
||||
enum sched_tunable_scaling {
|
||||
SCHED_TUNABLESCALING_NONE,
|
||||
@@ -49,6 +68,8 @@ int sched_proc_update_handler(struct ctl_table *table, int write,
|
||||
loff_t *ppos);
|
||||
#endif
|
||||
|
||||
extern int sched_boost_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp, loff_t *ppos);
|
||||
/*
|
||||
* control realtime throttling:
|
||||
*
|
||||
@@ -85,6 +106,12 @@ extern int sysctl_schedstats(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
loff_t *ppos);
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
extern int sched_little_cluster_coloc_fmin_khz_handler(struct ctl_table *table,
|
||||
int write, void __user *buffer,
|
||||
size_t *lenp, loff_t *ppos);
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
|
||||
extern unsigned int sysctl_sched_energy_aware;
|
||||
extern int sched_energy_aware_handler(struct ctl_table *table, int write,
|
||||
|
||||
@@ -66,6 +66,8 @@ struct sched_domain_attr {
|
||||
|
||||
extern int sched_domain_level_max;
|
||||
|
||||
unsigned long capacity_curr_of(int cpu);
|
||||
|
||||
struct sched_group;
|
||||
|
||||
struct sched_domain_shared {
|
||||
@@ -173,6 +175,7 @@ bool cpus_share_cache(int this_cpu, int that_cpu);
|
||||
|
||||
typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
|
||||
typedef int (*sched_domain_flags_f)(void);
|
||||
extern bool sched_is_energy_aware(void);
|
||||
|
||||
#define SDTL_OVERLAP 0x01
|
||||
|
||||
|
||||
@@ -10,6 +10,9 @@
|
||||
#define DECLARE_BITMAP(name,bits) \
|
||||
unsigned long name[BITS_TO_LONGS(bits)]
|
||||
|
||||
#define DECLARE_BITMAP_ARRAY(name, nr, bits) \
|
||||
unsigned long name[nr][BITS_TO_LONGS(bits)]
|
||||
|
||||
typedef u32 __kernel_dev_t;
|
||||
|
||||
typedef __kernel_fd_set fd_set;
|
||||
|
||||
@@ -705,6 +705,51 @@ TRACE_EVENT(sched_load_rt_rq,
|
||||
__entry->util)
|
||||
);
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
extern unsigned int sysctl_sched_use_walt_cpu_util;
|
||||
extern unsigned int sysctl_sched_use_walt_task_util;
|
||||
extern unsigned int sched_ravg_window;
|
||||
extern unsigned int walt_disabled;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Tracepoint for accounting cpu root cfs_rq
|
||||
*/
|
||||
TRACE_EVENT(sched_load_avg_cpu,
|
||||
|
||||
TP_PROTO(int cpu, struct cfs_rq *cfs_rq),
|
||||
|
||||
TP_ARGS(cpu, cfs_rq),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(int, cpu)
|
||||
__field(unsigned long, load_avg)
|
||||
__field(unsigned long, util_avg)
|
||||
__field(unsigned long, util_avg_pelt)
|
||||
__field(unsigned long, util_avg_walt)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->cpu = cpu;
|
||||
__entry->load_avg = cfs_rq->avg.load_avg;
|
||||
__entry->util_avg = cfs_rq->avg.util_avg;
|
||||
__entry->util_avg_pelt = cfs_rq->avg.util_avg;
|
||||
__entry->util_avg_walt = 0;
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
__entry->util_avg_walt =
|
||||
cpu_rq(cpu)->prev_runnable_sum << SCHED_CAPACITY_SHIFT;
|
||||
do_div(__entry->util_avg_walt, sched_ravg_window);
|
||||
if (!walt_disabled && sysctl_sched_use_walt_cpu_util)
|
||||
__entry->util_avg = __entry->util_avg_walt;
|
||||
#endif
|
||||
),
|
||||
|
||||
TP_printk("cpu=%d load_avg=%lu util_avg=%lu util_avg_pelt=%lu util_avg_walt=%lu",
|
||||
__entry->cpu, __entry->load_avg, __entry->util_avg,
|
||||
__entry->util_avg_pelt, __entry->util_avg_walt)
|
||||
);
|
||||
|
||||
|
||||
/*
|
||||
* Tracepoint for sched_entity load tracking:
|
||||
*/
|
||||
@@ -1022,6 +1067,8 @@ TRACE_EVENT(sched_overutilized,
|
||||
__entry->overutilized)
|
||||
);
|
||||
|
||||
#include "walt.h"
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
#endif /* _TRACE_SCHED_H */
|
||||
|
||||
|
||||
604
include/trace/events/walt.h
Normal file
604
include/trace/events/walt.h
Normal file
@@ -0,0 +1,604 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2017-2018, The Linux Foundation. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
struct rq;
|
||||
struct group_cpu_time;
|
||||
extern const char *task_event_names[];
|
||||
|
||||
#if defined(CREATE_TRACE_POINTS) && defined(CONFIG_SCHED_WALT)
|
||||
static inline void __window_data(u32 *dst, u32 *src)
|
||||
{
|
||||
if (src)
|
||||
memcpy(dst, src, nr_cpu_ids * sizeof(u32));
|
||||
else
|
||||
memset(dst, 0, nr_cpu_ids * sizeof(u32));
|
||||
}
|
||||
|
||||
struct trace_seq;
|
||||
const char *__window_print(struct trace_seq *p, const u32 *buf, int buf_len)
|
||||
{
|
||||
int i;
|
||||
const char *ret = p->buffer + seq_buf_used(&p->seq);
|
||||
|
||||
for (i = 0; i < buf_len; i++)
|
||||
trace_seq_printf(p, "%u ", buf[i]);
|
||||
|
||||
trace_seq_putc(p, 0);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline s64 __rq_update_sum(struct rq *rq, bool curr, bool new)
|
||||
{
|
||||
if (curr)
|
||||
if (new)
|
||||
return rq->nt_curr_runnable_sum;
|
||||
else
|
||||
return rq->curr_runnable_sum;
|
||||
else
|
||||
if (new)
|
||||
return rq->nt_prev_runnable_sum;
|
||||
else
|
||||
return rq->prev_runnable_sum;
|
||||
}
|
||||
|
||||
static inline s64 __grp_update_sum(struct rq *rq, bool curr, bool new)
|
||||
{
|
||||
if (curr)
|
||||
if (new)
|
||||
return rq->grp_time.nt_curr_runnable_sum;
|
||||
else
|
||||
return rq->grp_time.curr_runnable_sum;
|
||||
else
|
||||
if (new)
|
||||
return rq->grp_time.nt_prev_runnable_sum;
|
||||
else
|
||||
return rq->grp_time.prev_runnable_sum;
|
||||
}
|
||||
|
||||
static inline s64
|
||||
__get_update_sum(struct rq *rq, enum migrate_types migrate_type,
|
||||
bool src, bool new, bool curr)
|
||||
{
|
||||
switch (migrate_type) {
|
||||
case RQ_TO_GROUP:
|
||||
if (src)
|
||||
return __rq_update_sum(rq, curr, new);
|
||||
else
|
||||
return __grp_update_sum(rq, curr, new);
|
||||
case GROUP_TO_RQ:
|
||||
if (src)
|
||||
return __grp_update_sum(rq, curr, new);
|
||||
else
|
||||
return __rq_update_sum(rq, curr, new);
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
TRACE_EVENT(sched_update_pred_demand,
|
||||
|
||||
TP_PROTO(struct rq *rq, struct task_struct *p, u32 runtime, int pct,
|
||||
unsigned int pred_demand),
|
||||
|
||||
TP_ARGS(rq, p, runtime, pct, pred_demand),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, comm, TASK_COMM_LEN)
|
||||
__field(pid_t, pid)
|
||||
__field(unsigned int, runtime)
|
||||
__field(int, pct)
|
||||
__field(unsigned int, pred_demand)
|
||||
__array(u8, bucket, NUM_BUSY_BUCKETS)
|
||||
__field(int, cpu)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
|
||||
__entry->pid = p->pid;
|
||||
__entry->runtime = runtime;
|
||||
__entry->pct = pct;
|
||||
__entry->pred_demand = pred_demand;
|
||||
memcpy(__entry->bucket, p->ravg.busy_buckets,
|
||||
NUM_BUSY_BUCKETS * sizeof(u8));
|
||||
__entry->cpu = rq->cpu;
|
||||
),
|
||||
|
||||
TP_printk("%d (%s): runtime %u pct %d cpu %d pred_demand %u (buckets: %u %u %u %u %u %u %u %u %u %u)",
|
||||
__entry->pid, __entry->comm,
|
||||
__entry->runtime, __entry->pct, __entry->cpu,
|
||||
__entry->pred_demand, __entry->bucket[0], __entry->bucket[1],
|
||||
__entry->bucket[2], __entry->bucket[3], __entry->bucket[4],
|
||||
__entry->bucket[5], __entry->bucket[6], __entry->bucket[7],
|
||||
__entry->bucket[8], __entry->bucket[9])
|
||||
);
|
||||
|
||||
TRACE_EVENT(sched_update_history,
|
||||
|
||||
TP_PROTO(struct rq *rq, struct task_struct *p, u32 runtime, int samples,
|
||||
enum task_event evt),
|
||||
|
||||
TP_ARGS(rq, p, runtime, samples, evt),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, comm, TASK_COMM_LEN)
|
||||
__field(pid_t, pid)
|
||||
__field(unsigned int, runtime)
|
||||
__field(int, samples)
|
||||
__field(enum task_event, evt)
|
||||
__field(unsigned int, demand)
|
||||
__field(unsigned int, coloc_demand)
|
||||
__field(unsigned int, pred_demand)
|
||||
__array(u32, hist, RAVG_HIST_SIZE_MAX)
|
||||
__field(unsigned int, nr_big_tasks)
|
||||
__field(int, cpu)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
|
||||
__entry->pid = p->pid;
|
||||
__entry->runtime = runtime;
|
||||
__entry->samples = samples;
|
||||
__entry->evt = evt;
|
||||
__entry->demand = p->ravg.demand;
|
||||
__entry->coloc_demand = p->ravg.coloc_demand;
|
||||
__entry->pred_demand = p->ravg.pred_demand;
|
||||
memcpy(__entry->hist, p->ravg.sum_history,
|
||||
RAVG_HIST_SIZE_MAX * sizeof(u32));
|
||||
__entry->nr_big_tasks = rq->walt_stats.nr_big_tasks;
|
||||
__entry->cpu = rq->cpu;
|
||||
),
|
||||
|
||||
TP_printk("%d (%s): runtime %u samples %d event %s demand %u coloc_demand %u pred_demand %u (hist: %u %u %u %u %u) cpu %d nr_big %u",
|
||||
__entry->pid, __entry->comm,
|
||||
__entry->runtime, __entry->samples,
|
||||
task_event_names[__entry->evt],
|
||||
__entry->demand, __entry->coloc_demand, __entry->pred_demand,
|
||||
__entry->hist[0], __entry->hist[1],
|
||||
__entry->hist[2], __entry->hist[3],
|
||||
__entry->hist[4], __entry->cpu, __entry->nr_big_tasks)
|
||||
);
|
||||
|
||||
TRACE_EVENT(sched_get_task_cpu_cycles,
|
||||
|
||||
TP_PROTO(int cpu, int event, u64 cycles,
|
||||
u64 exec_time, struct task_struct *p),
|
||||
|
||||
TP_ARGS(cpu, event, cycles, exec_time, p),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(int, cpu)
|
||||
__field(int, event)
|
||||
__field(u64, cycles)
|
||||
__field(u64, exec_time)
|
||||
__field(u32, freq)
|
||||
__field(u32, legacy_freq)
|
||||
__field(u32, max_freq)
|
||||
__field(pid_t, pid)
|
||||
__array(char, comm, TASK_COMM_LEN)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->cpu = cpu;
|
||||
__entry->event = event;
|
||||
__entry->cycles = cycles;
|
||||
__entry->exec_time = exec_time;
|
||||
__entry->freq = cpu_cycles_to_freq(cycles, exec_time);
|
||||
__entry->legacy_freq = sched_cpu_legacy_freq(cpu);
|
||||
__entry->max_freq = cpu_max_freq(cpu);
|
||||
__entry->pid = p->pid;
|
||||
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
|
||||
),
|
||||
|
||||
TP_printk("cpu=%d event=%d cycles=%llu exec_time=%llu freq=%u legacy_freq=%u max_freq=%u task=%d (%s)",
|
||||
__entry->cpu, __entry->event, __entry->cycles,
|
||||
__entry->exec_time, __entry->freq, __entry->legacy_freq,
|
||||
__entry->max_freq, __entry->pid, __entry->comm)
|
||||
);
|
||||
|
||||
TRACE_EVENT(sched_update_task_ravg,
|
||||
|
||||
TP_PROTO(struct task_struct *p, struct rq *rq, enum task_event evt,
|
||||
u64 wallclock, u64 irqtime, u64 cycles, u64 exec_time,
|
||||
struct group_cpu_time *cpu_time),
|
||||
|
||||
TP_ARGS(p, rq, evt, wallclock, irqtime, cycles, exec_time, cpu_time),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, comm, TASK_COMM_LEN)
|
||||
__field(pid_t, pid)
|
||||
__field(pid_t, cur_pid)
|
||||
__field(unsigned int, cur_freq)
|
||||
__field(u64, wallclock)
|
||||
__field(u64, mark_start)
|
||||
__field(u64, delta_m)
|
||||
__field(u64, win_start)
|
||||
__field(u64, delta)
|
||||
__field(u64, irqtime)
|
||||
__field(enum task_event, evt)
|
||||
__field(unsigned int, demand)
|
||||
__field(unsigned int, coloc_demand)
|
||||
__field(unsigned int, sum)
|
||||
__field(int, cpu)
|
||||
__field(unsigned int, pred_demand)
|
||||
__field(u64, rq_cs)
|
||||
__field(u64, rq_ps)
|
||||
__field(u64, grp_cs)
|
||||
__field(u64, grp_ps)
|
||||
__field(u64, grp_nt_cs)
|
||||
__field(u64, grp_nt_ps)
|
||||
__field(u32, curr_window)
|
||||
__field(u32, prev_window)
|
||||
__dynamic_array(u32, curr_sum, nr_cpu_ids)
|
||||
__dynamic_array(u32, prev_sum, nr_cpu_ids)
|
||||
__field(u64, nt_cs)
|
||||
__field(u64, nt_ps)
|
||||
__field(u32, active_windows)
|
||||
__field(u8, curr_top)
|
||||
__field(u8, prev_top)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->wallclock = wallclock;
|
||||
__entry->win_start = rq->window_start;
|
||||
__entry->delta = (wallclock - rq->window_start);
|
||||
__entry->evt = evt;
|
||||
__entry->cpu = rq->cpu;
|
||||
__entry->cur_pid = rq->curr->pid;
|
||||
__entry->cur_freq = cpu_cycles_to_freq(cycles, exec_time);
|
||||
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
|
||||
__entry->pid = p->pid;
|
||||
__entry->mark_start = p->ravg.mark_start;
|
||||
__entry->delta_m = (wallclock - p->ravg.mark_start);
|
||||
__entry->demand = p->ravg.demand;
|
||||
__entry->coloc_demand = p->ravg.coloc_demand;
|
||||
__entry->sum = p->ravg.sum;
|
||||
__entry->irqtime = irqtime;
|
||||
__entry->pred_demand = p->ravg.pred_demand;
|
||||
__entry->rq_cs = rq->curr_runnable_sum;
|
||||
__entry->rq_ps = rq->prev_runnable_sum;
|
||||
__entry->grp_cs = cpu_time ? cpu_time->curr_runnable_sum : 0;
|
||||
__entry->grp_ps = cpu_time ? cpu_time->prev_runnable_sum : 0;
|
||||
__entry->grp_nt_cs = cpu_time ?
|
||||
cpu_time->nt_curr_runnable_sum : 0;
|
||||
__entry->grp_nt_ps = cpu_time ?
|
||||
cpu_time->nt_prev_runnable_sum : 0;
|
||||
__entry->curr_window = p->ravg.curr_window;
|
||||
__entry->prev_window = p->ravg.prev_window;
|
||||
__window_data(__get_dynamic_array(curr_sum),
|
||||
p->ravg.curr_window_cpu);
|
||||
__window_data(__get_dynamic_array(prev_sum),
|
||||
p->ravg.prev_window_cpu);
|
||||
__entry->nt_cs = rq->nt_curr_runnable_sum;
|
||||
__entry->nt_ps = rq->nt_prev_runnable_sum;
|
||||
__entry->active_windows = p->ravg.active_windows;
|
||||
__entry->curr_top = rq->curr_top;
|
||||
__entry->prev_top = rq->prev_top;
|
||||
),
|
||||
|
||||
TP_printk("wc %llu ws %llu delta %llu event %s cpu %d cur_freq %u cur_pid %d task %d (%s) ms %llu delta %llu demand %u coloc_demand: %u sum %u irqtime %llu pred_demand %u rq_cs %llu rq_ps %llu cur_window %u (%s) prev_window %u (%s) nt_cs %llu nt_ps %llu active_wins %u grp_cs %lld grp_ps %lld, grp_nt_cs %llu, grp_nt_ps: %llu curr_top %u prev_top %u",
|
||||
__entry->wallclock, __entry->win_start, __entry->delta,
|
||||
task_event_names[__entry->evt], __entry->cpu,
|
||||
__entry->cur_freq, __entry->cur_pid,
|
||||
__entry->pid, __entry->comm, __entry->mark_start,
|
||||
__entry->delta_m, __entry->demand, __entry->coloc_demand,
|
||||
__entry->sum, __entry->irqtime, __entry->pred_demand,
|
||||
__entry->rq_cs, __entry->rq_ps, __entry->curr_window,
|
||||
__window_print(p, __get_dynamic_array(curr_sum), nr_cpu_ids),
|
||||
__entry->prev_window,
|
||||
__window_print(p, __get_dynamic_array(prev_sum), nr_cpu_ids),
|
||||
__entry->nt_cs, __entry->nt_ps,
|
||||
__entry->active_windows, __entry->grp_cs,
|
||||
__entry->grp_ps, __entry->grp_nt_cs, __entry->grp_nt_ps,
|
||||
__entry->curr_top, __entry->prev_top)
|
||||
);
|
||||
|
||||
TRACE_EVENT(sched_update_task_ravg_mini,
|
||||
|
||||
TP_PROTO(struct task_struct *p, struct rq *rq, enum task_event evt,
|
||||
u64 wallclock, u64 irqtime, u64 cycles, u64 exec_time,
|
||||
struct group_cpu_time *cpu_time),
|
||||
|
||||
TP_ARGS(p, rq, evt, wallclock, irqtime, cycles, exec_time, cpu_time),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array(char, comm, TASK_COMM_LEN)
|
||||
__field(pid_t, pid)
|
||||
__field(u64, wallclock)
|
||||
__field(u64, mark_start)
|
||||
__field(u64, delta_m)
|
||||
__field(u64, win_start)
|
||||
__field(u64, delta)
|
||||
__field(enum task_event, evt)
|
||||
__field(unsigned int, demand)
|
||||
__field(int, cpu)
|
||||
__field(u64, rq_cs)
|
||||
__field(u64, rq_ps)
|
||||
__field(u64, grp_cs)
|
||||
__field(u64, grp_ps)
|
||||
__field(u32, curr_window)
|
||||
__field(u32, prev_window)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->wallclock = wallclock;
|
||||
__entry->win_start = rq->window_start;
|
||||
__entry->delta = (wallclock - rq->window_start);
|
||||
__entry->evt = evt;
|
||||
__entry->cpu = rq->cpu;
|
||||
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
|
||||
__entry->pid = p->pid;
|
||||
__entry->mark_start = p->ravg.mark_start;
|
||||
__entry->delta_m = (wallclock - p->ravg.mark_start);
|
||||
__entry->demand = p->ravg.demand;
|
||||
__entry->rq_cs = rq->curr_runnable_sum;
|
||||
__entry->rq_ps = rq->prev_runnable_sum;
|
||||
__entry->grp_cs = cpu_time ? cpu_time->curr_runnable_sum : 0;
|
||||
__entry->grp_ps = cpu_time ? cpu_time->prev_runnable_sum : 0;
|
||||
__entry->curr_window = p->ravg.curr_window;
|
||||
__entry->prev_window = p->ravg.prev_window;
|
||||
),
|
||||
|
||||
TP_printk("wc %llu ws %llu delta %llu event %s cpu %d task %d (%s) ms %llu delta %llu demand %u rq_cs %llu rq_ps %llu cur_window %u prev_window %u grp_cs %lld grp_ps %lld",
|
||||
__entry->wallclock, __entry->win_start, __entry->delta,
|
||||
task_event_names[__entry->evt], __entry->cpu,
|
||||
__entry->pid, __entry->comm, __entry->mark_start,
|
||||
__entry->delta_m, __entry->demand,
|
||||
__entry->rq_cs, __entry->rq_ps, __entry->curr_window,
|
||||
__entry->prev_window, __entry->grp_cs, __entry->grp_ps)
|
||||
);
|
||||
|
||||
struct migration_sum_data;
|
||||
extern const char *migrate_type_names[];
|
||||
|
||||
TRACE_EVENT(sched_set_preferred_cluster,
|
||||
|
||||
TP_PROTO(struct related_thread_group *grp, u64 total_demand),
|
||||
|
||||
TP_ARGS(grp, total_demand),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(int, id)
|
||||
__field(u64, demand)
|
||||
__field(int, cluster_first_cpu)
|
||||
__array(char, comm, TASK_COMM_LEN)
|
||||
__field(pid_t, pid)
|
||||
__field(unsigned int, task_demand)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->id = grp->id;
|
||||
__entry->demand = total_demand;
|
||||
__entry->cluster_first_cpu = grp->preferred_cluster ?
|
||||
cluster_first_cpu(grp->preferred_cluster) : -1;
|
||||
),
|
||||
|
||||
TP_printk("group_id %d total_demand %llu preferred_cluster_first_cpu %d",
|
||||
__entry->id, __entry->demand,
|
||||
__entry->cluster_first_cpu)
|
||||
);
|
||||
|
||||
TRACE_EVENT(sched_migration_update_sum,
|
||||
|
||||
TP_PROTO(struct task_struct *p, enum migrate_types migrate_type,
|
||||
struct rq *rq),
|
||||
|
||||
TP_ARGS(p, migrate_type, rq),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(int, tcpu)
|
||||
__field(int, pid)
|
||||
__field(enum migrate_types, migrate_type)
|
||||
__field(s64, src_cs)
|
||||
__field(s64, src_ps)
|
||||
__field(s64, dst_cs)
|
||||
__field(s64, dst_ps)
|
||||
__field(s64, src_nt_cs)
|
||||
__field(s64, src_nt_ps)
|
||||
__field(s64, dst_nt_cs)
|
||||
__field(s64, dst_nt_ps)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->tcpu = task_cpu(p);
|
||||
__entry->pid = p->pid;
|
||||
__entry->migrate_type = migrate_type;
|
||||
__entry->src_cs = __get_update_sum(rq, migrate_type,
|
||||
true, false, true);
|
||||
__entry->src_ps = __get_update_sum(rq, migrate_type,
|
||||
true, false, false);
|
||||
__entry->dst_cs = __get_update_sum(rq, migrate_type,
|
||||
false, false, true);
|
||||
__entry->dst_ps = __get_update_sum(rq, migrate_type,
|
||||
false, false, false);
|
||||
__entry->src_nt_cs = __get_update_sum(rq, migrate_type,
|
||||
true, true, true);
|
||||
__entry->src_nt_ps = __get_update_sum(rq, migrate_type,
|
||||
true, true, false);
|
||||
__entry->dst_nt_cs = __get_update_sum(rq, migrate_type,
|
||||
false, true, true);
|
||||
__entry->dst_nt_ps = __get_update_sum(rq, migrate_type,
|
||||
false, true, false);
|
||||
),
|
||||
|
||||
TP_printk("pid %d task_cpu %d migrate_type %s src_cs %llu src_ps %llu dst_cs %lld dst_ps %lld src_nt_cs %llu src_nt_ps %llu dst_nt_cs %lld dst_nt_ps %lld",
|
||||
__entry->pid, __entry->tcpu,
|
||||
migrate_type_names[__entry->migrate_type],
|
||||
__entry->src_cs, __entry->src_ps, __entry->dst_cs,
|
||||
__entry->dst_ps, __entry->src_nt_cs, __entry->src_nt_ps,
|
||||
__entry->dst_nt_cs, __entry->dst_nt_ps)
|
||||
);
|
||||
|
||||
TRACE_EVENT(sched_set_boost,
|
||||
|
||||
TP_PROTO(int type),
|
||||
|
||||
TP_ARGS(type),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(int, type)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->type = type;
|
||||
),
|
||||
|
||||
TP_printk("type %d", __entry->type)
|
||||
);
|
||||
|
||||
TRACE_EVENT(sched_load_balance_skip_tasks,
|
||||
|
||||
TP_PROTO(int scpu, int dcpu, int grp_type, int pid,
|
||||
unsigned long h_load, unsigned long task_util,
|
||||
unsigned long affinity),
|
||||
|
||||
TP_ARGS(scpu, dcpu, grp_type, pid, h_load, task_util, affinity),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(int, scpu)
|
||||
__field(unsigned long, src_util_cum)
|
||||
__field(int, grp_type)
|
||||
__field(int, dcpu)
|
||||
__field(unsigned long, dst_util_cum)
|
||||
__field(int, pid)
|
||||
__field(unsigned long, affinity)
|
||||
__field(unsigned long, task_util)
|
||||
__field(unsigned long, h_load)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->scpu = scpu;
|
||||
__entry->src_util_cum =
|
||||
cpu_rq(scpu)->cum_window_demand_scaled;
|
||||
__entry->grp_type = grp_type;
|
||||
__entry->dcpu = dcpu;
|
||||
__entry->dst_util_cum =
|
||||
cpu_rq(dcpu)->cum_window_demand_scaled;
|
||||
__entry->pid = pid;
|
||||
__entry->affinity = affinity;
|
||||
__entry->task_util = task_util;
|
||||
__entry->h_load = h_load;
|
||||
),
|
||||
|
||||
TP_printk("source_cpu=%d util_cum=%lu group_type=%d dest_cpu=%d util_cum=%lu pid=%d affinity=%#lx task_util=%lu task_h_load=%lu",
|
||||
__entry->scpu, __entry->src_util_cum, __entry->grp_type,
|
||||
__entry->dcpu, __entry->dst_util_cum, __entry->pid,
|
||||
__entry->affinity, __entry->task_util, __entry->h_load)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(sched_cpu_load,
|
||||
|
||||
TP_PROTO(struct rq *rq, int idle, u64 irqload, unsigned int power_cost),
|
||||
|
||||
TP_ARGS(rq, idle, irqload, power_cost),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, cpu)
|
||||
__field(unsigned int, idle)
|
||||
__field(unsigned int, nr_running)
|
||||
__field(unsigned int, nr_big_tasks)
|
||||
__field(unsigned int, load_scale_factor)
|
||||
__field(unsigned int, capacity)
|
||||
__field(u64, cumulative_runnable_avg)
|
||||
__field(u64, irqload)
|
||||
__field(unsigned int, max_freq)
|
||||
__field(unsigned int, power_cost)
|
||||
__field(int, cstate)
|
||||
__field(int, dstate)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->cpu = rq->cpu;
|
||||
__entry->idle = idle;
|
||||
__entry->nr_running = rq->nr_running;
|
||||
__entry->nr_big_tasks = rq->walt_stats.nr_big_tasks;
|
||||
__entry->load_scale_factor =
|
||||
cpu_load_scale_factor(rq->cpu);
|
||||
__entry->capacity = cpu_capacity(rq->cpu);
|
||||
__entry->cumulative_runnable_avg =
|
||||
rq->walt_stats.cumulative_runnable_avg_scaled;
|
||||
__entry->irqload = irqload;
|
||||
__entry->max_freq = cpu_max_freq(rq->cpu);
|
||||
__entry->power_cost = power_cost;
|
||||
__entry->cstate = rq->cstate;
|
||||
__entry->dstate = rq->cluster->dstate;
|
||||
),
|
||||
|
||||
TP_printk("cpu %u idle %d nr_run %u nr_big %u lsf %u capacity %u cr_avg %llu irqload %llu fmax %u power_cost %u cstate %d dstate %d",
|
||||
__entry->cpu, __entry->idle, __entry->nr_running,
|
||||
__entry->nr_big_tasks, __entry->load_scale_factor,
|
||||
__entry->capacity, __entry->cumulative_runnable_avg,
|
||||
__entry->irqload, __entry->max_freq, __entry->power_cost,
|
||||
__entry->cstate, __entry->dstate)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(sched_cpu_load, sched_cpu_load_lb,
|
||||
TP_PROTO(struct rq *rq, int idle, u64 irqload, unsigned int power_cost),
|
||||
TP_ARGS(rq, idle, irqload, power_cost)
|
||||
);
|
||||
|
||||
TRACE_EVENT(sched_load_to_gov,
|
||||
|
||||
TP_PROTO(struct rq *rq, u64 aggr_grp_load, u32 tt_load,
|
||||
u64 freq_aggr_thresh, u64 load, int policy,
|
||||
int big_task_rotation,
|
||||
unsigned int sysctl_sched_little_cluster_coloc_fmin_khz,
|
||||
u64 coloc_boost_load),
|
||||
TP_ARGS(rq, aggr_grp_load, tt_load, freq_aggr_thresh, load, policy,
|
||||
big_task_rotation, sysctl_sched_little_cluster_coloc_fmin_khz,
|
||||
coloc_boost_load),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(int, cpu)
|
||||
__field(int, policy)
|
||||
__field(int, ed_task_pid)
|
||||
__field(u64, aggr_grp_load)
|
||||
__field(u64, freq_aggr_thresh)
|
||||
__field(u64, tt_load)
|
||||
__field(u64, rq_ps)
|
||||
__field(u64, grp_rq_ps)
|
||||
__field(u64, nt_ps)
|
||||
__field(u64, grp_nt_ps)
|
||||
__field(u64, pl)
|
||||
__field(u64, load)
|
||||
__field(int, big_task_rotation)
|
||||
__field(unsigned int,
|
||||
sysctl_sched_little_cluster_coloc_fmin_khz)
|
||||
__field(u64, coloc_boost_load)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->cpu = cpu_of(rq);
|
||||
__entry->policy = policy;
|
||||
__entry->ed_task_pid = rq->ed_task ? rq->ed_task->pid : -1;
|
||||
__entry->aggr_grp_load = aggr_grp_load;
|
||||
__entry->freq_aggr_thresh = freq_aggr_thresh;
|
||||
__entry->tt_load = tt_load;
|
||||
__entry->rq_ps = rq->prev_runnable_sum;
|
||||
__entry->grp_rq_ps = rq->grp_time.prev_runnable_sum;
|
||||
__entry->nt_ps = rq->nt_prev_runnable_sum;
|
||||
__entry->grp_nt_ps = rq->grp_time.nt_prev_runnable_sum;
|
||||
__entry->pl =
|
||||
rq->walt_stats.pred_demands_sum_scaled;
|
||||
__entry->load = load;
|
||||
__entry->big_task_rotation = big_task_rotation;
|
||||
__entry->sysctl_sched_little_cluster_coloc_fmin_khz =
|
||||
sysctl_sched_little_cluster_coloc_fmin_khz;
|
||||
__entry->coloc_boost_load = coloc_boost_load;
|
||||
),
|
||||
|
||||
TP_printk("cpu=%d policy=%d ed_task_pid=%d aggr_grp_load=%llu freq_aggr_thresh=%llu tt_load=%llu rq_ps=%llu grp_rq_ps=%llu nt_ps=%llu grp_nt_ps=%llu pl=%llu load=%llu big_task_rotation=%d sysctl_sched_little_cluster_coloc_fmin_khz=%u coloc_boost_load=%llu",
|
||||
__entry->cpu, __entry->policy, __entry->ed_task_pid,
|
||||
__entry->aggr_grp_load, __entry->freq_aggr_thresh,
|
||||
__entry->tt_load, __entry->rq_ps, __entry->grp_rq_ps,
|
||||
__entry->nt_ps, __entry->grp_nt_ps, __entry->pl, __entry->load,
|
||||
__entry->big_task_rotation,
|
||||
__entry->sysctl_sched_little_cluster_coloc_fmin_khz,
|
||||
__entry->coloc_boost_load)
|
||||
);
|
||||
#endif
|
||||
@@ -420,6 +420,15 @@ config IRQ_TIME_ACCOUNTING
|
||||
|
||||
If in doubt, say N here.
|
||||
|
||||
config SCHED_WALT
|
||||
bool "Support window based load tracking"
|
||||
depends on SMP
|
||||
help
|
||||
This feature will allow the scheduler to maintain a tunable window
|
||||
based set of metrics for tasks and runqueues. These metrics can be
|
||||
used to guide task placement as well as task frequency requirements
|
||||
for cpufreq governors.
|
||||
|
||||
config BSD_PROCESS_ACCT
|
||||
bool "BSD Process Accounting"
|
||||
depends on MULTIUSER
|
||||
|
||||
@@ -811,6 +811,7 @@ void __noreturn do_exit(long code)
|
||||
}
|
||||
|
||||
exit_signals(tsk); /* sets PF_EXITING */
|
||||
sched_exit(tsk);
|
||||
/*
|
||||
* Ensure that all new tsk->pi_lock acquisitions must observe
|
||||
* PF_EXITING. Serializes against futex.c:attach_to_pi_owner().
|
||||
|
||||
@@ -2089,6 +2089,7 @@ static __latent_entropy struct task_struct *copy_process(
|
||||
perf_event_free_task(p);
|
||||
bad_fork_cleanup_policy:
|
||||
lockdep_free_task(p);
|
||||
free_task_load_ptrs(p);
|
||||
#ifdef CONFIG_NUMA
|
||||
mpol_put(p->mempolicy);
|
||||
bad_fork_cleanup_threadgroup_lock:
|
||||
|
||||
@@ -21,6 +21,7 @@ obj-y += idle.o fair.o rt.o deadline.o
|
||||
obj-y += wait.o wait_bit.o swait.o completion.o
|
||||
|
||||
obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
|
||||
obj-$(CONFIG_SCHED_WALT) += walt.o boost.o
|
||||
obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
|
||||
obj-$(CONFIG_SCHEDSTATS) += stats.o
|
||||
obj-$(CONFIG_SCHED_DEBUG) += debug.o
|
||||
|
||||
255
kernel/sched/boost.c
Normal file
255
kernel/sched/boost.c
Normal file
@@ -0,0 +1,255 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved.
|
||||
*/
|
||||
|
||||
#include "sched.h"
|
||||
#include <linux/of.h>
|
||||
#include <linux/sched/core_ctl.h>
|
||||
#include <trace/events/sched.h>
|
||||
|
||||
/*
|
||||
* Scheduler boost is a mechanism to temporarily place tasks on CPUs
|
||||
* with higher capacity than those where a task would have normally
|
||||
* ended up with their load characteristics. Any entity enabling
|
||||
* boost is responsible for disabling it as well.
|
||||
*/
|
||||
|
||||
unsigned int sysctl_sched_boost;
|
||||
static enum sched_boost_policy boost_policy;
|
||||
static enum sched_boost_policy boost_policy_dt = SCHED_BOOST_NONE;
|
||||
static DEFINE_MUTEX(boost_mutex);
|
||||
static unsigned int freq_aggr_threshold_backup;
|
||||
static int boost_refcount[MAX_NUM_BOOST_TYPE];
|
||||
|
||||
static inline void boost_kick(int cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
|
||||
if (!test_and_set_bit(BOOST_KICK, &rq->walt_flags))
|
||||
smp_send_reschedule(cpu);
|
||||
}
|
||||
|
||||
static void boost_kick_cpus(void)
|
||||
{
|
||||
int i;
|
||||
struct cpumask kick_mask;
|
||||
|
||||
if (boost_policy != SCHED_BOOST_ON_BIG)
|
||||
return;
|
||||
|
||||
cpumask_andnot(&kick_mask, cpu_online_mask, cpu_isolated_mask);
|
||||
|
||||
for_each_cpu(i, &kick_mask) {
|
||||
if (cpu_capacity(i) != max_capacity)
|
||||
boost_kick(i);
|
||||
}
|
||||
}
|
||||
|
||||
int got_boost_kick(void)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
|
||||
return test_bit(BOOST_KICK, &rq->walt_flags);
|
||||
}
|
||||
|
||||
void clear_boost_kick(int cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
|
||||
clear_bit(BOOST_KICK, &rq->walt_flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Scheduler boost type and boost policy might at first seem unrelated,
|
||||
* however, there exists a connection between them that will allow us
|
||||
* to use them interchangeably during placement decisions. We'll explain
|
||||
* the connection here in one possible way so that the implications are
|
||||
* clear when looking at placement policies.
|
||||
*
|
||||
* When policy = SCHED_BOOST_NONE, type is either none or RESTRAINED
|
||||
* When policy = SCHED_BOOST_ON_ALL or SCHED_BOOST_ON_BIG, type can
|
||||
* neither be none nor RESTRAINED.
|
||||
*/
|
||||
static void set_boost_policy(int type)
|
||||
{
|
||||
if (type == NO_BOOST || type == RESTRAINED_BOOST) {
|
||||
boost_policy = SCHED_BOOST_NONE;
|
||||
return;
|
||||
}
|
||||
|
||||
if (boost_policy_dt) {
|
||||
boost_policy = boost_policy_dt;
|
||||
return;
|
||||
}
|
||||
|
||||
if (min_possible_efficiency != max_possible_efficiency) {
|
||||
boost_policy = SCHED_BOOST_ON_BIG;
|
||||
return;
|
||||
}
|
||||
|
||||
boost_policy = SCHED_BOOST_ON_ALL;
|
||||
}
|
||||
|
||||
enum sched_boost_policy sched_boost_policy(void)
|
||||
{
|
||||
return boost_policy;
|
||||
}
|
||||
|
||||
static bool verify_boost_params(int type)
|
||||
{
|
||||
return type >= RESTRAINED_BOOST_DISABLE && type <= RESTRAINED_BOOST;
|
||||
}
|
||||
|
||||
static void _sched_set_boost(int type)
|
||||
{
|
||||
switch (type) {
|
||||
case NO_BOOST: /* All boost clear */
|
||||
if (boost_refcount[FULL_THROTTLE_BOOST] > 0) {
|
||||
core_ctl_set_boost(false);
|
||||
boost_refcount[FULL_THROTTLE_BOOST] = 0;
|
||||
}
|
||||
if (boost_refcount[CONSERVATIVE_BOOST] > 0) {
|
||||
restore_cgroup_boost_settings();
|
||||
boost_refcount[CONSERVATIVE_BOOST] = 0;
|
||||
}
|
||||
if (boost_refcount[RESTRAINED_BOOST] > 0) {
|
||||
update_freq_aggregate_threshold(
|
||||
freq_aggr_threshold_backup);
|
||||
boost_refcount[RESTRAINED_BOOST] = 0;
|
||||
}
|
||||
break;
|
||||
|
||||
case FULL_THROTTLE_BOOST:
|
||||
boost_refcount[FULL_THROTTLE_BOOST]++;
|
||||
if (boost_refcount[FULL_THROTTLE_BOOST] == 1) {
|
||||
core_ctl_set_boost(true);
|
||||
restore_cgroup_boost_settings();
|
||||
boost_kick_cpus();
|
||||
}
|
||||
break;
|
||||
|
||||
case CONSERVATIVE_BOOST:
|
||||
boost_refcount[CONSERVATIVE_BOOST]++;
|
||||
if ((boost_refcount[CONSERVATIVE_BOOST] == 1) &&
|
||||
!boost_refcount[FULL_THROTTLE_BOOST]) {
|
||||
update_cgroup_boost_settings();
|
||||
boost_kick_cpus();
|
||||
}
|
||||
break;
|
||||
|
||||
case RESTRAINED_BOOST:
|
||||
boost_refcount[RESTRAINED_BOOST]++;
|
||||
if (boost_refcount[RESTRAINED_BOOST] == 1) {
|
||||
freq_aggr_threshold_backup =
|
||||
update_freq_aggregate_threshold(1);
|
||||
}
|
||||
break;
|
||||
|
||||
case FULL_THROTTLE_BOOST_DISABLE:
|
||||
if (boost_refcount[FULL_THROTTLE_BOOST] >= 1) {
|
||||
boost_refcount[FULL_THROTTLE_BOOST]--;
|
||||
if (!boost_refcount[FULL_THROTTLE_BOOST]) {
|
||||
core_ctl_set_boost(false);
|
||||
if (boost_refcount[CONSERVATIVE_BOOST] >= 1)
|
||||
update_cgroup_boost_settings();
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case CONSERVATIVE_BOOST_DISABLE:
|
||||
if (boost_refcount[CONSERVATIVE_BOOST] >= 1) {
|
||||
boost_refcount[CONSERVATIVE_BOOST]--;
|
||||
if (!boost_refcount[CONSERVATIVE_BOOST])
|
||||
restore_cgroup_boost_settings();
|
||||
}
|
||||
break;
|
||||
|
||||
case RESTRAINED_BOOST_DISABLE:
|
||||
if (boost_refcount[RESTRAINED_BOOST] >= 1) {
|
||||
boost_refcount[RESTRAINED_BOOST]--;
|
||||
if (!boost_refcount[RESTRAINED_BOOST])
|
||||
update_freq_aggregate_threshold(
|
||||
freq_aggr_threshold_backup);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
WARN_ON(1);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Aggregate final boost type */
|
||||
if (boost_refcount[FULL_THROTTLE_BOOST] >= 1)
|
||||
type = FULL_THROTTLE_BOOST;
|
||||
else if (boost_refcount[CONSERVATIVE_BOOST] >= 1)
|
||||
type = CONSERVATIVE_BOOST;
|
||||
else if (boost_refcount[RESTRAINED_BOOST] >= 1)
|
||||
type = RESTRAINED_BOOST;
|
||||
else
|
||||
type = NO_BOOST;
|
||||
|
||||
set_boost_policy(type);
|
||||
sysctl_sched_boost = type;
|
||||
trace_sched_set_boost(type);
|
||||
}
|
||||
|
||||
void sched_boost_parse_dt(void)
|
||||
{
|
||||
struct device_node *sn;
|
||||
const char *boost_policy;
|
||||
|
||||
sn = of_find_node_by_path("/sched-hmp");
|
||||
if (!sn)
|
||||
return;
|
||||
|
||||
if (!of_property_read_string(sn, "boost-policy", &boost_policy)) {
|
||||
if (!strcmp(boost_policy, "boost-on-big"))
|
||||
boost_policy_dt = SCHED_BOOST_ON_BIG;
|
||||
else if (!strcmp(boost_policy, "boost-on-all"))
|
||||
boost_policy_dt = SCHED_BOOST_ON_ALL;
|
||||
}
|
||||
}
|
||||
|
||||
int sched_set_boost(int type)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&boost_mutex);
|
||||
if (verify_boost_params(type))
|
||||
_sched_set_boost(type);
|
||||
else
|
||||
ret = -EINVAL;
|
||||
mutex_unlock(&boost_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int sched_boost_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
loff_t *ppos)
|
||||
{
|
||||
int ret;
|
||||
unsigned int *data = (unsigned int *)table->data;
|
||||
|
||||
mutex_lock(&boost_mutex);
|
||||
|
||||
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
||||
|
||||
if (ret || !write)
|
||||
goto done;
|
||||
|
||||
if (verify_boost_params(*data))
|
||||
_sched_set_boost(*data);
|
||||
else
|
||||
ret = -EINVAL;
|
||||
|
||||
done:
|
||||
mutex_unlock(&boost_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int sched_boost(void)
|
||||
{
|
||||
return sysctl_sched_boost;
|
||||
}
|
||||
@@ -18,6 +18,7 @@
|
||||
#include "../smpboot.h"
|
||||
|
||||
#include "pelt.h"
|
||||
#include "walt.h"
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/sched.h>
|
||||
@@ -757,6 +758,9 @@ void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
|
||||
if (task_contributes_to_load(p))
|
||||
rq->nr_uninterruptible++;
|
||||
|
||||
if (flags & DEQUEUE_SLEEP)
|
||||
clear_ed_task(p, rq);
|
||||
|
||||
dequeue_task(rq, p, flags);
|
||||
}
|
||||
|
||||
@@ -918,8 +922,9 @@ static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf,
|
||||
|
||||
p->on_rq = TASK_ON_RQ_MIGRATING;
|
||||
dequeue_task(rq, p, DEQUEUE_NOCLOCK);
|
||||
double_lock_balance(rq, cpu_rq(new_cpu));
|
||||
set_task_cpu(p, new_cpu);
|
||||
rq_unlock(rq, rf);
|
||||
double_rq_unlock(cpu_rq(new_cpu), rq);
|
||||
|
||||
rq = cpu_rq(new_cpu);
|
||||
|
||||
@@ -1177,12 +1182,13 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
|
||||
p->se.nr_migrations++;
|
||||
rseq_migrate(p);
|
||||
perf_event_task_migrate(p);
|
||||
|
||||
fixup_busy_time(p, new_cpu);
|
||||
}
|
||||
|
||||
__set_task_cpu(p, new_cpu);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NUMA_BALANCING
|
||||
static void __migrate_swap_task(struct task_struct *p, int cpu)
|
||||
{
|
||||
if (task_on_rq_queued(p)) {
|
||||
@@ -1299,7 +1305,6 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p,
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
#endif /* CONFIG_NUMA_BALANCING */
|
||||
|
||||
/*
|
||||
* wait_task_inactive - wait for a thread to unschedule.
|
||||
@@ -1752,6 +1757,7 @@ void sched_ttwu_pending(void)
|
||||
|
||||
void scheduler_ipi(void)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
/*
|
||||
* Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
|
||||
* TIF_NEED_RESCHED remotely (for the first time) will also send
|
||||
@@ -1759,9 +1765,18 @@ void scheduler_ipi(void)
|
||||
*/
|
||||
preempt_fold_need_resched();
|
||||
|
||||
if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick())
|
||||
if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()
|
||||
&& !got_boost_kick())
|
||||
return;
|
||||
|
||||
if (got_boost_kick()) {
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
|
||||
if (rq->curr->sched_class == &fair_sched_class)
|
||||
check_for_migration(rq, rq->curr);
|
||||
clear_boost_kick(cpu);
|
||||
}
|
||||
|
||||
/*
|
||||
* Not all reschedule IPI handlers call irq_enter/irq_exit, since
|
||||
* traditionally all their work was done from the interrupt return
|
||||
@@ -1934,6 +1949,36 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
|
||||
* accesses to the task state; see try_to_wake_up() and set_current_state().
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
/* utility function to update walt signals at wakeup */
|
||||
static inline void walt_try_to_wake_up(struct task_struct *p)
|
||||
{
|
||||
struct rq *rq = cpu_rq(task_cpu(p));
|
||||
struct rq_flags rf;
|
||||
u64 wallclock;
|
||||
unsigned int old_load;
|
||||
struct related_thread_group *grp = NULL;
|
||||
|
||||
rq_lock_irqsave(rq, &rf);
|
||||
old_load = task_load(p);
|
||||
wallclock = sched_ktime_clock();
|
||||
update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
|
||||
update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
|
||||
note_task_waking(p, wallclock);
|
||||
rq_unlock_irqrestore(rq, &rf);
|
||||
|
||||
rcu_read_lock();
|
||||
grp = task_related_thread_group(p);
|
||||
if (update_preferred_cluster(grp, p, old_load))
|
||||
set_preferred_cluster(grp);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
#else
|
||||
#define walt_try_to_wake_up(a) {}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
* try_to_wake_up - wake up a thread
|
||||
* @p: the thread to be awakened
|
||||
@@ -2036,6 +2081,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags,
|
||||
*/
|
||||
smp_cond_load_acquire(&p->on_cpu, !VAL);
|
||||
|
||||
walt_try_to_wake_up(p);
|
||||
|
||||
p->sched_contributes_to_load = !!task_contributes_to_load(p);
|
||||
p->state = TASK_WAKING;
|
||||
|
||||
@@ -2066,6 +2113,14 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags,
|
||||
out:
|
||||
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
||||
|
||||
if (success && sched_predl) {
|
||||
raw_spin_lock_irqsave(&cpu_rq(cpu)->lock, flags);
|
||||
if (do_pl_notif(cpu_rq(cpu)))
|
||||
cpufreq_update_util(cpu_rq(cpu),
|
||||
SCHED_CPUFREQ_WALT |
|
||||
SCHED_CPUFREQ_PL);
|
||||
raw_spin_unlock_irqrestore(&cpu_rq(cpu)->lock, flags);
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
@@ -2106,11 +2161,17 @@ static void try_to_wake_up_local(struct task_struct *p, struct rq_flags *rf)
|
||||
trace_sched_waking(p);
|
||||
|
||||
if (!task_on_rq_queued(p)) {
|
||||
u64 wallclock = sched_ktime_clock();
|
||||
|
||||
update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
|
||||
update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
|
||||
|
||||
if (p->in_iowait) {
|
||||
delayacct_blkio_end(p);
|
||||
atomic_dec(&rq->nr_iowait);
|
||||
}
|
||||
ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK);
|
||||
note_task_waking(p, wallclock);
|
||||
}
|
||||
|
||||
ttwu_do_wakeup(rq, p, 0, rf);
|
||||
@@ -2157,6 +2218,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
|
||||
p->se.prev_sum_exec_runtime = 0;
|
||||
p->se.nr_migrations = 0;
|
||||
p->se.vruntime = 0;
|
||||
p->last_sleep_ts = 0;
|
||||
INIT_LIST_HEAD(&p->se.group_node);
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
@@ -2305,6 +2367,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
init_new_task_load(p);
|
||||
__sched_fork(clone_flags, p);
|
||||
/*
|
||||
* We mark the process as NEW here. This guarantees that
|
||||
@@ -2408,7 +2471,9 @@ void wake_up_new_task(struct task_struct *p)
|
||||
struct rq_flags rf;
|
||||
struct rq *rq;
|
||||
|
||||
add_new_task_to_grp(p);
|
||||
raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
|
||||
|
||||
p->state = TASK_RUNNING;
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
@@ -2426,7 +2491,9 @@ void wake_up_new_task(struct task_struct *p)
|
||||
update_rq_clock(rq);
|
||||
post_init_entity_util_avg(&p->se);
|
||||
|
||||
mark_task_starting(p);
|
||||
activate_task(rq, p, ENQUEUE_NOCLOCK);
|
||||
|
||||
p->on_rq = TASK_ON_RQ_QUEUED;
|
||||
trace_sched_wakeup_new(p);
|
||||
check_preempt_curr(rq, p, WF_FORK);
|
||||
@@ -3053,16 +3120,30 @@ void scheduler_tick(void)
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
struct task_struct *curr = rq->curr;
|
||||
struct rq_flags rf;
|
||||
u64 wallclock;
|
||||
bool early_notif;
|
||||
u32 old_load;
|
||||
struct related_thread_group *grp;
|
||||
unsigned int flag = 0;
|
||||
|
||||
sched_clock_tick();
|
||||
|
||||
rq_lock(rq, &rf);
|
||||
|
||||
old_load = task_load(curr);
|
||||
set_window_start(rq);
|
||||
wallclock = sched_ktime_clock();
|
||||
update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
|
||||
update_rq_clock(rq);
|
||||
curr->sched_class->task_tick(rq, curr, 0);
|
||||
cpu_load_update_active(rq);
|
||||
calc_global_load_tick(rq);
|
||||
|
||||
early_notif = early_detection_notify(rq, wallclock);
|
||||
if (early_notif)
|
||||
flag = SCHED_CPUFREQ_WALT | SCHED_CPUFREQ_EARLY_DET;
|
||||
|
||||
cpufreq_update_util(rq, flag);
|
||||
rq_unlock(rq, &rf);
|
||||
|
||||
perf_event_task_tick();
|
||||
@@ -3071,6 +3152,15 @@ void scheduler_tick(void)
|
||||
rq->idle_balance = idle_cpu(cpu);
|
||||
trigger_load_balance(rq);
|
||||
#endif
|
||||
|
||||
rcu_read_lock();
|
||||
grp = task_related_thread_group(curr);
|
||||
if (update_preferred_cluster(grp, curr, old_load))
|
||||
set_preferred_cluster(grp);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (curr->sched_class == &fair_sched_class)
|
||||
check_for_migration(rq, curr);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
@@ -3399,6 +3489,7 @@ static void __sched notrace __schedule(bool preempt)
|
||||
struct rq_flags rf;
|
||||
struct rq *rq;
|
||||
int cpu;
|
||||
u64 wallclock;
|
||||
|
||||
cpu = smp_processor_id();
|
||||
rq = cpu_rq(cpu);
|
||||
@@ -3460,7 +3551,13 @@ static void __sched notrace __schedule(bool preempt)
|
||||
clear_tsk_need_resched(prev);
|
||||
clear_preempt_need_resched();
|
||||
|
||||
wallclock = sched_ktime_clock();
|
||||
if (likely(prev != next)) {
|
||||
if (!prev->on_rq)
|
||||
prev->last_sleep_ts = wallclock;
|
||||
|
||||
update_task_ravg(prev, rq, PUT_PREV_TASK, wallclock, 0);
|
||||
update_task_ravg(next, rq, PICK_NEXT_TASK, wallclock, 0);
|
||||
rq->nr_switches++;
|
||||
rq->curr = next;
|
||||
/*
|
||||
@@ -3484,6 +3581,7 @@ static void __sched notrace __schedule(bool preempt)
|
||||
/* Also unlocks the rq: */
|
||||
rq = context_switch(rq, prev, next, &rf);
|
||||
} else {
|
||||
update_task_ravg(prev, rq, TASK_UPDATE, wallclock, 0);
|
||||
rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
|
||||
rq_unlock_irq(rq, &rf);
|
||||
}
|
||||
@@ -5383,10 +5481,11 @@ void init_idle(struct task_struct *idle, int cpu)
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
unsigned long flags;
|
||||
|
||||
__sched_fork(0, idle);
|
||||
|
||||
raw_spin_lock_irqsave(&idle->pi_lock, flags);
|
||||
raw_spin_lock(&rq->lock);
|
||||
|
||||
__sched_fork(0, idle);
|
||||
idle->state = TASK_RUNNING;
|
||||
idle->se.exec_start = sched_clock();
|
||||
idle->flags |= PF_IDLE;
|
||||
@@ -5823,6 +5922,11 @@ int sched_cpu_deactivate(unsigned int cpu)
|
||||
static void sched_rq_cpu_starting(unsigned int cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&rq->lock, flags);
|
||||
set_window_start(rq);
|
||||
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
||||
|
||||
rq->calc_load_update = calc_load_update;
|
||||
update_max_interval();
|
||||
@@ -5846,6 +5950,7 @@ int sched_cpu_dying(unsigned int cpu)
|
||||
sched_tick_stop(cpu);
|
||||
|
||||
rq_lock_irqsave(rq, &rf);
|
||||
|
||||
if (rq->rd) {
|
||||
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
|
||||
set_rq_offline(rq);
|
||||
@@ -5854,6 +5959,8 @@ int sched_cpu_dying(unsigned int cpu)
|
||||
BUG_ON(rq->nr_running != 1);
|
||||
rq_unlock_irqrestore(rq, &rf);
|
||||
|
||||
clear_walt_request(cpu);
|
||||
|
||||
calc_load_migrate(rq);
|
||||
update_max_interval();
|
||||
nohz_balance_exit_idle(rq);
|
||||
@@ -5878,6 +5985,8 @@ void __init sched_init_smp(void)
|
||||
mutex_unlock(&sched_domains_mutex);
|
||||
cpus_read_unlock();
|
||||
|
||||
update_cluster_topology();
|
||||
|
||||
/* Move init over to a non-isolated CPU */
|
||||
if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
|
||||
BUG();
|
||||
@@ -5932,6 +6041,8 @@ void __init sched_init(void)
|
||||
|
||||
wait_bit_init();
|
||||
|
||||
init_clusters();
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
alloc_size += 2 * nr_cpu_ids * sizeof(void **);
|
||||
#endif
|
||||
@@ -6047,6 +6158,8 @@ void __init sched_init(void)
|
||||
rq->idle_stamp = 0;
|
||||
rq->avg_idle = 2*sysctl_sched_migration_cost;
|
||||
rq->max_idle_balance_cost = sysctl_sched_migration_cost;
|
||||
rq->push_task = NULL;
|
||||
walt_sched_init_rq(rq);
|
||||
|
||||
INIT_LIST_HEAD(&rq->cfs_tasks);
|
||||
|
||||
@@ -6061,6 +6174,8 @@ void __init sched_init(void)
|
||||
atomic_set(&rq->nr_iowait, 0);
|
||||
}
|
||||
|
||||
BUG_ON(alloc_related_thread_groups());
|
||||
|
||||
set_load_weight(&init_task, false);
|
||||
|
||||
/*
|
||||
@@ -6076,6 +6191,7 @@ void __init sched_init(void)
|
||||
* when this runqueue becomes "idle".
|
||||
*/
|
||||
init_idle(current, smp_processor_id());
|
||||
init_new_task_load(current);
|
||||
|
||||
calc_load_update = jiffies + LOAD_FREQ;
|
||||
|
||||
@@ -7071,3 +7187,49 @@ const u32 sched_prio_to_wmult[40] = {
|
||||
};
|
||||
|
||||
#undef CREATE_TRACE_POINTS
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
/*
|
||||
* sched_exit() - Set EXITING_TASK_MARKER in task's ravg.demand field
|
||||
*
|
||||
* Stop accounting (exiting) task's future cpu usage
|
||||
*
|
||||
* We need this so that reset_all_windows_stats() can function correctly.
|
||||
* reset_all_window_stats() depends on do_each_thread/for_each_thread task
|
||||
* iterators to reset *all* task's statistics. Exiting tasks however become
|
||||
* invisible to those iterators. sched_exit() is called on a exiting task prior
|
||||
* to being removed from task_list, which will let reset_all_window_stats()
|
||||
* function correctly.
|
||||
*/
|
||||
void sched_exit(struct task_struct *p)
|
||||
{
|
||||
struct rq_flags rf;
|
||||
struct rq *rq;
|
||||
u64 wallclock;
|
||||
|
||||
sched_set_group_id(p, 0);
|
||||
|
||||
rq = task_rq_lock(p, &rf);
|
||||
|
||||
/* rq->curr == p */
|
||||
wallclock = sched_ktime_clock();
|
||||
update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
|
||||
dequeue_task(rq, p, 0);
|
||||
/*
|
||||
* task's contribution is already removed from the
|
||||
* cumulative window demand in dequeue. As the
|
||||
* task's stats are reset, the next enqueue does
|
||||
* not change the cumulative window demand.
|
||||
*/
|
||||
reset_task_stats(p);
|
||||
p->ravg.mark_start = wallclock;
|
||||
p->ravg.sum_history[0] = EXITING_TASK_MARKER;
|
||||
|
||||
enqueue_task(rq, p, 0);
|
||||
clear_ed_task(p, rq);
|
||||
task_rq_unlock(rq, p, &rf);
|
||||
free_task_load_ptrs(p);
|
||||
}
|
||||
#endif /* CONFIG_SCHED_WALT */
|
||||
|
||||
__read_mostly bool sched_predl = 1;
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
*/
|
||||
#include <linux/cpufreq_times.h>
|
||||
#include "sched.h"
|
||||
#include "walt.h"
|
||||
|
||||
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
|
||||
|
||||
@@ -52,11 +53,18 @@ void irqtime_account_irq(struct task_struct *curr)
|
||||
struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
|
||||
s64 delta;
|
||||
int cpu;
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
u64 wallclock;
|
||||
bool account = true;
|
||||
#endif
|
||||
|
||||
if (!sched_clock_irqtime)
|
||||
return;
|
||||
|
||||
cpu = smp_processor_id();
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
wallclock = sched_clock_cpu(cpu);
|
||||
#endif
|
||||
delta = sched_clock_cpu(cpu) - irqtime->irq_start_time;
|
||||
irqtime->irq_start_time += delta;
|
||||
|
||||
@@ -70,6 +78,15 @@ void irqtime_account_irq(struct task_struct *curr)
|
||||
irqtime_account_delta(irqtime, delta, CPUTIME_IRQ);
|
||||
else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
|
||||
irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ);
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
else
|
||||
account = false;
|
||||
|
||||
if (account)
|
||||
sched_account_irqtime(cpu, curr, delta, wallclock);
|
||||
else if (curr != this_cpu_ksoftirqd())
|
||||
sched_account_irqstart(cpu, curr, wallclock);
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irqtime_account_irq);
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
*/
|
||||
#include "sched.h"
|
||||
#include "pelt.h"
|
||||
#include "walt.h"
|
||||
|
||||
struct dl_bandwidth def_dl_bandwidth;
|
||||
|
||||
@@ -1348,6 +1349,7 @@ void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
|
||||
WARN_ON(!dl_prio(prio));
|
||||
dl_rq->dl_nr_running++;
|
||||
add_nr_running(rq_of_dl_rq(dl_rq), 1);
|
||||
walt_inc_cumulative_runnable_avg(rq_of_dl_rq(dl_rq), dl_task_of(dl_se));
|
||||
|
||||
inc_dl_deadline(dl_rq, deadline);
|
||||
inc_dl_migration(dl_se, dl_rq);
|
||||
@@ -1362,6 +1364,7 @@ void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
|
||||
WARN_ON(!dl_rq->dl_nr_running);
|
||||
dl_rq->dl_nr_running--;
|
||||
sub_nr_running(rq_of_dl_rq(dl_rq), 1);
|
||||
walt_dec_cumulative_runnable_avg(rq_of_dl_rq(dl_rq), dl_task_of(dl_se));
|
||||
|
||||
dec_dl_deadline(dl_rq, dl_se->deadline);
|
||||
dec_dl_migration(dl_se, dl_rq);
|
||||
@@ -2093,7 +2096,9 @@ static int push_dl_task(struct rq *rq)
|
||||
deactivate_task(rq, next_task, 0);
|
||||
sub_running_bw(&next_task->dl, &rq->dl);
|
||||
sub_rq_bw(&next_task->dl, &rq->dl);
|
||||
next_task->on_rq = TASK_ON_RQ_MIGRATING;
|
||||
set_task_cpu(next_task, later_rq->cpu);
|
||||
next_task->on_rq = TASK_ON_RQ_QUEUED;
|
||||
add_rq_bw(&next_task->dl, &later_rq->dl);
|
||||
|
||||
/*
|
||||
@@ -2191,7 +2196,9 @@ static void pull_dl_task(struct rq *this_rq)
|
||||
deactivate_task(src_rq, p, 0);
|
||||
sub_running_bw(&p->dl, &src_rq->dl);
|
||||
sub_rq_bw(&p->dl, &src_rq->dl);
|
||||
p->on_rq = TASK_ON_RQ_MIGRATING;
|
||||
set_task_cpu(p, this_cpu);
|
||||
p->on_rq = TASK_ON_RQ_QUEUED;
|
||||
add_rq_bw(&p->dl, &this_rq->dl);
|
||||
add_running_bw(&p->dl, &this_rq->dl);
|
||||
activate_task(this_rq, p, 0);
|
||||
@@ -2425,6 +2432,9 @@ const struct sched_class dl_sched_class = {
|
||||
.switched_to = switched_to_dl,
|
||||
|
||||
.update_curr = update_curr_dl,
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
.fixup_walt_sched_stats = fixup_walt_sched_stats_common,
|
||||
#endif
|
||||
};
|
||||
|
||||
int sched_dl_global_validate(void)
|
||||
|
||||
@@ -24,6 +24,41 @@
|
||||
|
||||
#include <trace/events/sched.h>
|
||||
|
||||
#include "walt.h"
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static inline bool task_fits_max(struct task_struct *p, int cpu);
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
static void walt_fixup_sched_stats_fair(struct rq *rq, struct task_struct *p,
|
||||
u16 updated_demand_scaled,
|
||||
u16 updated_pred_demand_scaled);
|
||||
#endif /* CONFIG_SCHED_WALT */
|
||||
|
||||
#if defined(CONFIG_SCHED_WALT) && defined(CONFIG_CFS_BANDWIDTH)
|
||||
|
||||
static void walt_init_cfs_rq_stats(struct cfs_rq *cfs_rq);
|
||||
static void walt_inc_cfs_rq_stats(struct cfs_rq *cfs_rq,
|
||||
struct task_struct *p);
|
||||
static void walt_dec_cfs_rq_stats(struct cfs_rq *cfs_rq,
|
||||
struct task_struct *p);
|
||||
static void walt_inc_throttled_cfs_rq_stats(struct walt_sched_stats *stats,
|
||||
struct cfs_rq *cfs_rq);
|
||||
static void walt_dec_throttled_cfs_rq_stats(struct walt_sched_stats *stats,
|
||||
struct cfs_rq *cfs_rq);
|
||||
#else
|
||||
static inline void walt_init_cfs_rq_stats(struct cfs_rq *cfs_rq) {}
|
||||
static inline void
|
||||
walt_inc_cfs_rq_stats(struct cfs_rq *cfs_rq, struct task_struct *p) {}
|
||||
static inline void
|
||||
walt_dec_cfs_rq_stats(struct cfs_rq *cfs_rq, struct task_struct *p) {}
|
||||
|
||||
#define walt_inc_throttled_cfs_rq_stats(...)
|
||||
#define walt_dec_throttled_cfs_rq_stats(...)
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Targeted preemption latency for CPU-bound tasks:
|
||||
*
|
||||
@@ -95,6 +130,14 @@ unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
|
||||
unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
|
||||
|
||||
const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
|
||||
DEFINE_PER_CPU_READ_MOSTLY(int, sched_load_boost);
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
unsigned int sysctl_sched_use_walt_cpu_util = 1;
|
||||
unsigned int sysctl_sched_use_walt_task_util = 1;
|
||||
__read_mostly unsigned int sysctl_sched_walt_cpu_high_irqload =
|
||||
(10 * NSEC_PER_MSEC);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
@@ -128,6 +171,13 @@ unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL;
|
||||
*/
|
||||
unsigned int capacity_margin = 1280;
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
/* 1ms default for 20ms window size scaled to 1024 */
|
||||
unsigned int sysctl_sched_min_task_util_for_boost = 51;
|
||||
/* 0.68ms default for 20ms window size scaled to 1024 */
|
||||
unsigned int sysctl_sched_min_task_util_for_colocation = 35;
|
||||
#endif
|
||||
|
||||
static inline void update_load_add(struct load_weight *lw, unsigned long inc)
|
||||
{
|
||||
lw->weight += inc;
|
||||
@@ -3637,11 +3687,6 @@ static inline unsigned long cfs_rq_load_avg(struct cfs_rq *cfs_rq)
|
||||
|
||||
static int idle_balance(struct rq *this_rq, struct rq_flags *rf);
|
||||
|
||||
static inline unsigned long task_util(struct task_struct *p)
|
||||
{
|
||||
return READ_ONCE(p->se.avg.util_avg);
|
||||
}
|
||||
|
||||
static inline unsigned long _task_util_est(struct task_struct *p)
|
||||
{
|
||||
struct util_est ue = READ_ONCE(p->se.avg.util_est);
|
||||
@@ -3651,6 +3696,10 @@ static inline unsigned long _task_util_est(struct task_struct *p)
|
||||
|
||||
static inline unsigned long task_util_est(struct task_struct *p)
|
||||
{
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
if (likely(!walt_disabled && sysctl_sched_use_walt_task_util))
|
||||
return p->ravg.demand_scaled;
|
||||
#endif
|
||||
return max(task_util(p), _task_util_est(p));
|
||||
}
|
||||
|
||||
@@ -4515,13 +4564,16 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
|
||||
if (dequeue)
|
||||
dequeue_entity(qcfs_rq, se, DEQUEUE_SLEEP);
|
||||
qcfs_rq->h_nr_running -= task_delta;
|
||||
walt_dec_throttled_cfs_rq_stats(&qcfs_rq->walt_stats, cfs_rq);
|
||||
|
||||
if (qcfs_rq->load.weight)
|
||||
dequeue = 0;
|
||||
}
|
||||
|
||||
if (!se)
|
||||
if (!se) {
|
||||
sub_nr_running(rq, task_delta);
|
||||
walt_dec_throttled_cfs_rq_stats(&rq->walt_stats, cfs_rq);
|
||||
}
|
||||
|
||||
cfs_rq->throttled = 1;
|
||||
cfs_rq->throttled_clock = rq_clock(rq);
|
||||
@@ -4555,6 +4607,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
|
||||
struct sched_entity *se;
|
||||
int enqueue = 1;
|
||||
long task_delta;
|
||||
struct cfs_rq *tcfs_rq __maybe_unused = cfs_rq;
|
||||
|
||||
se = cfs_rq->tg->se[cpu_of(rq)];
|
||||
|
||||
@@ -4582,13 +4635,16 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
|
||||
if (enqueue)
|
||||
enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP);
|
||||
cfs_rq->h_nr_running += task_delta;
|
||||
walt_inc_throttled_cfs_rq_stats(&cfs_rq->walt_stats, tcfs_rq);
|
||||
|
||||
if (cfs_rq_throttled(cfs_rq))
|
||||
break;
|
||||
}
|
||||
|
||||
if (!se)
|
||||
if (!se) {
|
||||
add_nr_running(rq, task_delta);
|
||||
walt_inc_throttled_cfs_rq_stats(&rq->walt_stats, tcfs_rq);
|
||||
}
|
||||
|
||||
/* Determine whether we need to wake up potentially idle CPU: */
|
||||
if (rq->curr == rq->idle && rq->cfs.nr_running)
|
||||
@@ -4943,6 +4999,7 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
cfs_rq->runtime_enabled = 0;
|
||||
INIT_LIST_HEAD(&cfs_rq->throttled_list);
|
||||
walt_init_cfs_rq_stats(cfs_rq);
|
||||
}
|
||||
|
||||
void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
|
||||
@@ -4984,8 +5041,6 @@ static void __maybe_unused update_runtime_enabled(struct rq *rq)
|
||||
struct task_group *tg;
|
||||
|
||||
lockdep_assert_held(&rq->lock);
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(tg, &task_groups, list) {
|
||||
struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
|
||||
struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
|
||||
@@ -5125,7 +5180,6 @@ static inline void hrtick_update(struct rq *rq)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static inline unsigned long cpu_util(int cpu);
|
||||
static unsigned long capacity_of(int cpu);
|
||||
|
||||
static inline bool cpu_overutilized(int cpu)
|
||||
@@ -5204,6 +5258,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
||||
if (cfs_rq_throttled(cfs_rq))
|
||||
break;
|
||||
cfs_rq->h_nr_running++;
|
||||
walt_inc_cfs_rq_stats(cfs_rq, p);
|
||||
|
||||
flags = ENQUEUE_WAKEUP;
|
||||
}
|
||||
@@ -5211,6 +5266,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
||||
for_each_sched_entity(se) {
|
||||
cfs_rq = cfs_rq_of(se);
|
||||
cfs_rq->h_nr_running++;
|
||||
walt_inc_cfs_rq_stats(cfs_rq, p);
|
||||
|
||||
if (cfs_rq_throttled(cfs_rq))
|
||||
break;
|
||||
@@ -5221,6 +5277,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
||||
|
||||
if (!se) {
|
||||
add_nr_running(rq, 1);
|
||||
inc_rq_walt_stats(rq, p);
|
||||
/*
|
||||
* Since new tasks are assigned an initial util_avg equal to
|
||||
* half of the spare capacity of their CPU, tiny tasks have the
|
||||
@@ -5237,7 +5294,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
||||
*/
|
||||
if (flags & ENQUEUE_WAKEUP)
|
||||
update_overutilized_status(rq);
|
||||
|
||||
}
|
||||
|
||||
hrtick_update(rq);
|
||||
@@ -5277,6 +5333,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
||||
if (cfs_rq_throttled(cfs_rq))
|
||||
break;
|
||||
cfs_rq->h_nr_running--;
|
||||
walt_dec_cfs_rq_stats(cfs_rq, p);
|
||||
|
||||
/* Don't dequeue parent if it has other entities besides us */
|
||||
if (cfs_rq->load.weight) {
|
||||
@@ -5296,6 +5353,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
||||
for_each_sched_entity(se) {
|
||||
cfs_rq = cfs_rq_of(se);
|
||||
cfs_rq->h_nr_running--;
|
||||
walt_dec_cfs_rq_stats(cfs_rq, p);
|
||||
|
||||
if (cfs_rq_throttled(cfs_rq))
|
||||
break;
|
||||
@@ -5304,8 +5362,10 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
||||
update_cfs_group(se);
|
||||
}
|
||||
|
||||
if (!se)
|
||||
if (!se) {
|
||||
sub_nr_running(rq, 1);
|
||||
dec_rq_walt_stats(rq, p);
|
||||
}
|
||||
|
||||
util_est_dequeue(&rq->cfs, p, task_sleep);
|
||||
hrtick_update(rq);
|
||||
@@ -5622,16 +5682,6 @@ static unsigned long target_load(int cpu, int type)
|
||||
return max(rq->cpu_load[type-1], total);
|
||||
}
|
||||
|
||||
static unsigned long capacity_of(int cpu)
|
||||
{
|
||||
return cpu_rq(cpu)->cpu_capacity;
|
||||
}
|
||||
|
||||
static unsigned long capacity_orig_of(int cpu)
|
||||
{
|
||||
return cpu_rq(cpu)->cpu_capacity_orig;
|
||||
}
|
||||
|
||||
static unsigned long cpu_avg_load_per_task(int cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
@@ -5661,6 +5711,15 @@ static void record_wakee(struct task_struct *p)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Externally visible function. Let's keep the one above
|
||||
* so that the check is inlined/optimized in the sched paths.
|
||||
*/
|
||||
bool sched_is_energy_aware(void)
|
||||
{
|
||||
return energy_aware();
|
||||
}
|
||||
|
||||
/*
|
||||
* Detect M:N waker/wakee relationships via a switching-frequency heuristic.
|
||||
*
|
||||
@@ -6376,58 +6435,6 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
||||
return target;
|
||||
}
|
||||
|
||||
/**
|
||||
* Amount of capacity of a CPU that is (estimated to be) used by CFS tasks
|
||||
* @cpu: the CPU to get the utilization of
|
||||
*
|
||||
* The unit of the return value must be the one of capacity so we can compare
|
||||
* the utilization with the capacity of the CPU that is available for CFS task
|
||||
* (ie cpu_capacity).
|
||||
*
|
||||
* cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the
|
||||
* recent utilization of currently non-runnable tasks on a CPU. It represents
|
||||
* the amount of utilization of a CPU in the range [0..capacity_orig] where
|
||||
* capacity_orig is the cpu_capacity available at the highest frequency
|
||||
* (arch_scale_freq_capacity()).
|
||||
* The utilization of a CPU converges towards a sum equal to or less than the
|
||||
* current capacity (capacity_curr <= capacity_orig) of the CPU because it is
|
||||
* the running time on this CPU scaled by capacity_curr.
|
||||
*
|
||||
* The estimated utilization of a CPU is defined to be the maximum between its
|
||||
* cfs_rq.avg.util_avg and the sum of the estimated utilization of the tasks
|
||||
* currently RUNNABLE on that CPU.
|
||||
* This allows to properly represent the expected utilization of a CPU which
|
||||
* has just got a big task running since a long sleep period. At the same time
|
||||
* however it preserves the benefits of the "blocked utilization" in
|
||||
* describing the potential for other tasks waking up on the same CPU.
|
||||
*
|
||||
* Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even
|
||||
* higher than capacity_orig because of unfortunate rounding in
|
||||
* cfs.avg.util_avg or just after migrating tasks and new task wakeups until
|
||||
* the average stabilizes with the new running time. We need to check that the
|
||||
* utilization stays within the range of [0..capacity_orig] and cap it if
|
||||
* necessary. Without utilization capping, a group could be seen as overloaded
|
||||
* (CPU0 utilization at 121% + CPU1 utilization at 80%) whereas CPU1 has 20% of
|
||||
* available capacity. We allow utilization to overshoot capacity_curr (but not
|
||||
* capacity_orig) as it useful for predicting the capacity required after task
|
||||
* migrations (scheduler-driven DVFS).
|
||||
*
|
||||
* Return: the (estimated) utilization for the specified CPU
|
||||
*/
|
||||
static inline unsigned long cpu_util(int cpu)
|
||||
{
|
||||
struct cfs_rq *cfs_rq;
|
||||
unsigned int util;
|
||||
|
||||
cfs_rq = &cpu_rq(cpu)->cfs;
|
||||
util = READ_ONCE(cfs_rq->avg.util_avg);
|
||||
|
||||
if (sched_feat(UTIL_EST))
|
||||
util = max(util, READ_ONCE(cfs_rq->avg.util_est.enqueued));
|
||||
|
||||
return min_t(unsigned long, util, capacity_orig_of(cpu));
|
||||
}
|
||||
|
||||
/*
|
||||
* cpu_util_without: compute cpu utilization without any contributions from *p
|
||||
* @cpu: the CPU which utilization is requested
|
||||
@@ -6443,13 +6450,30 @@ static inline unsigned long cpu_util(int cpu)
|
||||
*/
|
||||
static unsigned long cpu_util_without(int cpu, struct task_struct *p)
|
||||
{
|
||||
#ifndef CONFIG_SCHED_WALT
|
||||
struct cfs_rq *cfs_rq;
|
||||
#endif
|
||||
unsigned int util;
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
/*
|
||||
* WALT does not decay idle tasks in the same manner
|
||||
* as PELT, so it makes little sense to subtract task
|
||||
* utilization from cpu utilization. Instead just use
|
||||
* cpu_util for this case.
|
||||
*/
|
||||
if (likely(!walt_disabled && sysctl_sched_use_walt_cpu_util) &&
|
||||
p->state == TASK_WAKING)
|
||||
return cpu_util(cpu);
|
||||
#endif
|
||||
|
||||
/* Task has no contribution or is new */
|
||||
if (cpu != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time))
|
||||
return cpu_util(cpu);
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
util = max_t(long, cpu_util(cpu) - task_util(p), 0);
|
||||
#else
|
||||
cfs_rq = &cpu_rq(cpu)->cfs;
|
||||
util = READ_ONCE(cfs_rq->avg.util_avg);
|
||||
|
||||
@@ -6509,6 +6533,7 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p)
|
||||
}
|
||||
util = max(util, estimated);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Utilization (estimated) can exceed the CPU capacity, thus let's
|
||||
@@ -6574,6 +6599,9 @@ static void find_best_target(struct sched_domain *sd, cpumask_t *cpus,
|
||||
if (!cpu_online(i))
|
||||
continue;
|
||||
|
||||
if (sched_cpu_high_irqload(i))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* p's blocked utilization is still accounted for on prev_cpu
|
||||
* so prev_cpu will receive a negative bias due to the double
|
||||
@@ -7974,7 +8002,11 @@ static void detach_task(struct task_struct *p, struct lb_env *env)
|
||||
|
||||
p->on_rq = TASK_ON_RQ_MIGRATING;
|
||||
deactivate_task(env->src_rq, p, DEQUEUE_NOCLOCK);
|
||||
double_lock_balance(env->src_rq, env->dst_rq);
|
||||
if (!(env->src_rq->clock_update_flags & RQCF_UPDATED))
|
||||
update_rq_clock(env->src_rq);
|
||||
set_task_cpu(p, env->dst_cpu);
|
||||
double_unlock_balance(env->src_rq, env->dst_rq);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -11164,6 +11196,10 @@ const struct sched_class fair_sched_class = {
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
.task_change_group = task_change_group_fair,
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
.fixup_walt_sched_stats = walt_fixup_sched_stats_fair,
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
@@ -11211,3 +11247,336 @@ __init void init_sched_fair_class(void)
|
||||
#endif /* SMP */
|
||||
|
||||
}
|
||||
|
||||
/* WALT sched implementation begins here */
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
|
||||
#ifdef CONFIG_CFS_BANDWIDTH
|
||||
|
||||
static void walt_init_cfs_rq_stats(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
cfs_rq->walt_stats.nr_big_tasks = 0;
|
||||
cfs_rq->walt_stats.cumulative_runnable_avg_scaled = 0;
|
||||
cfs_rq->walt_stats.pred_demands_sum_scaled = 0;
|
||||
}
|
||||
|
||||
static void walt_inc_cfs_rq_stats(struct cfs_rq *cfs_rq, struct task_struct *p)
|
||||
{
|
||||
inc_nr_big_task(&cfs_rq->walt_stats, p);
|
||||
fixup_cumulative_runnable_avg(&cfs_rq->walt_stats,
|
||||
p->ravg.demand_scaled,
|
||||
p->ravg.pred_demand_scaled);
|
||||
}
|
||||
|
||||
static void walt_dec_cfs_rq_stats(struct cfs_rq *cfs_rq, struct task_struct *p)
|
||||
{
|
||||
dec_nr_big_task(&cfs_rq->walt_stats, p);
|
||||
fixup_cumulative_runnable_avg(&cfs_rq->walt_stats,
|
||||
-(s64)p->ravg.demand_scaled,
|
||||
-(s64)p->ravg.pred_demand_scaled);
|
||||
}
|
||||
|
||||
static void walt_inc_throttled_cfs_rq_stats(struct walt_sched_stats *stats,
|
||||
struct cfs_rq *tcfs_rq)
|
||||
{
|
||||
struct rq *rq = rq_of(tcfs_rq);
|
||||
|
||||
stats->nr_big_tasks += tcfs_rq->walt_stats.nr_big_tasks;
|
||||
fixup_cumulative_runnable_avg(stats,
|
||||
tcfs_rq->walt_stats.cumulative_runnable_avg_scaled,
|
||||
tcfs_rq->walt_stats.pred_demands_sum_scaled);
|
||||
|
||||
if (stats == &rq->walt_stats)
|
||||
walt_fixup_cum_window_demand(rq,
|
||||
tcfs_rq->walt_stats.cumulative_runnable_avg_scaled);
|
||||
|
||||
}
|
||||
|
||||
static void walt_dec_throttled_cfs_rq_stats(struct walt_sched_stats *stats,
|
||||
struct cfs_rq *tcfs_rq)
|
||||
{
|
||||
struct rq *rq = rq_of(tcfs_rq);
|
||||
|
||||
stats->nr_big_tasks -= tcfs_rq->walt_stats.nr_big_tasks;
|
||||
fixup_cumulative_runnable_avg(stats,
|
||||
-tcfs_rq->walt_stats.cumulative_runnable_avg_scaled,
|
||||
-tcfs_rq->walt_stats.pred_demands_sum_scaled);
|
||||
|
||||
/*
|
||||
* We remove the throttled cfs_rq's tasks's contribution from the
|
||||
* cumulative window demand so that the same can be added
|
||||
* unconditionally when the cfs_rq is unthrottled.
|
||||
*/
|
||||
if (stats == &rq->walt_stats)
|
||||
walt_fixup_cum_window_demand(rq,
|
||||
-tcfs_rq->walt_stats.cumulative_runnable_avg_scaled);
|
||||
}
|
||||
|
||||
static void walt_fixup_sched_stats_fair(struct rq *rq, struct task_struct *p,
|
||||
u16 updated_demand_scaled,
|
||||
u16 updated_pred_demand_scaled)
|
||||
{
|
||||
struct cfs_rq *cfs_rq;
|
||||
struct sched_entity *se = &p->se;
|
||||
s64 task_load_delta = (s64)updated_demand_scaled -
|
||||
p->ravg.demand_scaled;
|
||||
s64 pred_demand_delta = (s64)updated_pred_demand_scaled -
|
||||
p->ravg.pred_demand_scaled;
|
||||
|
||||
for_each_sched_entity(se) {
|
||||
cfs_rq = cfs_rq_of(se);
|
||||
|
||||
fixup_cumulative_runnable_avg(&cfs_rq->walt_stats,
|
||||
task_load_delta,
|
||||
pred_demand_delta);
|
||||
if (cfs_rq_throttled(cfs_rq))
|
||||
break;
|
||||
}
|
||||
|
||||
/* Fix up rq->walt_stats only if we didn't find any throttled cfs_rq */
|
||||
if (!se) {
|
||||
fixup_cumulative_runnable_avg(&rq->walt_stats,
|
||||
task_load_delta,
|
||||
pred_demand_delta);
|
||||
walt_fixup_cum_window_demand(rq, task_load_delta);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if task is part of a hierarchy where some cfs_rq does not have any
|
||||
* runtime left.
|
||||
*
|
||||
* We can't rely on throttled_hierarchy() to do this test, as
|
||||
* cfs_rq->throttle_count will not be updated yet when this function is called
|
||||
* from scheduler_tick()
|
||||
*/
|
||||
static int task_will_be_throttled(struct task_struct *p)
|
||||
{
|
||||
struct sched_entity *se = &p->se;
|
||||
struct cfs_rq *cfs_rq;
|
||||
|
||||
if (!cfs_bandwidth_used())
|
||||
return 0;
|
||||
|
||||
for_each_sched_entity(se) {
|
||||
cfs_rq = cfs_rq_of(se);
|
||||
if (!cfs_rq->runtime_enabled)
|
||||
continue;
|
||||
if (cfs_rq->runtime_remaining <= 0)
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else /* CONFIG_CFS_BANDWIDTH */
|
||||
|
||||
static void walt_fixup_sched_stats_fair(struct rq *rq, struct task_struct *p,
|
||||
u16 updated_demand_scaled,
|
||||
u16 updated_pred_demand_scaled)
|
||||
{
|
||||
fixup_walt_sched_stats_common(rq, p, updated_demand_scaled,
|
||||
updated_pred_demand_scaled);
|
||||
}
|
||||
|
||||
static int task_will_be_throttled(struct task_struct *p)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_CFS_BANDWIDTH */
|
||||
|
||||
static inline int
|
||||
kick_active_balance(struct rq *rq, struct task_struct *p, int new_cpu)
|
||||
{
|
||||
unsigned long flags;
|
||||
int rc = 0;
|
||||
|
||||
/* Invoke active balance to force migrate currently running task */
|
||||
raw_spin_lock_irqsave(&rq->lock, flags);
|
||||
if (!rq->active_balance) {
|
||||
rq->active_balance = 1;
|
||||
rq->push_cpu = new_cpu;
|
||||
get_task_struct(p);
|
||||
rq->push_task = p;
|
||||
rc = 1;
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
struct walt_rotate_work {
|
||||
struct work_struct w;
|
||||
struct task_struct *src_task;
|
||||
struct task_struct *dst_task;
|
||||
int src_cpu;
|
||||
int dst_cpu;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct walt_rotate_work, walt_rotate_works);
|
||||
|
||||
static void walt_rotate_work_func(struct work_struct *work)
|
||||
{
|
||||
struct walt_rotate_work *wr = container_of(work,
|
||||
struct walt_rotate_work, w);
|
||||
|
||||
migrate_swap(wr->src_task, wr->dst_task, wr->dst_cpu, wr->src_cpu);
|
||||
|
||||
put_task_struct(wr->src_task);
|
||||
put_task_struct(wr->dst_task);
|
||||
|
||||
clear_reserved(wr->src_cpu);
|
||||
clear_reserved(wr->dst_cpu);
|
||||
}
|
||||
|
||||
void walt_rotate_work_init(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
struct walt_rotate_work *wr = &per_cpu(walt_rotate_works, i);
|
||||
|
||||
INIT_WORK(&wr->w, walt_rotate_work_func);
|
||||
}
|
||||
}
|
||||
|
||||
#define WALT_ROTATION_THRESHOLD_NS 16000000
|
||||
static void walt_check_for_rotation(struct rq *src_rq)
|
||||
{
|
||||
u64 wc, wait, max_wait = 0, run, max_run = 0;
|
||||
int deserved_cpu = nr_cpu_ids, dst_cpu = nr_cpu_ids;
|
||||
int i, src_cpu = cpu_of(src_rq);
|
||||
struct rq *dst_rq;
|
||||
struct walt_rotate_work *wr = NULL;
|
||||
|
||||
if (!walt_rotation_enabled)
|
||||
return;
|
||||
|
||||
if (got_boost_kick())
|
||||
return;
|
||||
|
||||
if (!is_min_capacity_cpu(src_cpu))
|
||||
return;
|
||||
|
||||
wc = sched_ktime_clock();
|
||||
for_each_possible_cpu(i) {
|
||||
struct rq *rq = cpu_rq(i);
|
||||
|
||||
if (!is_min_capacity_cpu(i))
|
||||
break;
|
||||
|
||||
if (is_reserved(i))
|
||||
continue;
|
||||
|
||||
if (!rq->misfit_task_load || rq->curr->sched_class !=
|
||||
&fair_sched_class)
|
||||
continue;
|
||||
|
||||
wait = wc - rq->curr->last_enqueued_ts;
|
||||
if (wait > max_wait) {
|
||||
max_wait = wait;
|
||||
deserved_cpu = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (deserved_cpu != src_cpu)
|
||||
return;
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
struct rq *rq = cpu_rq(i);
|
||||
|
||||
if (is_min_capacity_cpu(i))
|
||||
continue;
|
||||
|
||||
if (is_reserved(i))
|
||||
continue;
|
||||
|
||||
if (rq->curr->sched_class != &fair_sched_class)
|
||||
continue;
|
||||
|
||||
if (rq->nr_running > 1)
|
||||
continue;
|
||||
|
||||
run = wc - rq->curr->last_enqueued_ts;
|
||||
|
||||
if (run < WALT_ROTATION_THRESHOLD_NS)
|
||||
continue;
|
||||
|
||||
if (run > max_run) {
|
||||
max_run = run;
|
||||
dst_cpu = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (dst_cpu == nr_cpu_ids)
|
||||
return;
|
||||
|
||||
dst_rq = cpu_rq(dst_cpu);
|
||||
|
||||
double_rq_lock(src_rq, dst_rq);
|
||||
if (dst_rq->curr->sched_class == &fair_sched_class) {
|
||||
get_task_struct(src_rq->curr);
|
||||
get_task_struct(dst_rq->curr);
|
||||
|
||||
mark_reserved(src_cpu);
|
||||
mark_reserved(dst_cpu);
|
||||
wr = &per_cpu(walt_rotate_works, src_cpu);
|
||||
|
||||
wr->src_task = src_rq->curr;
|
||||
wr->dst_task = dst_rq->curr;
|
||||
|
||||
wr->src_cpu = src_cpu;
|
||||
wr->dst_cpu = dst_cpu;
|
||||
}
|
||||
double_rq_unlock(src_rq, dst_rq);
|
||||
|
||||
if (wr)
|
||||
queue_work_on(src_cpu, system_highpri_wq, &wr->w);
|
||||
}
|
||||
#else
|
||||
static inline void walt_check_for_rotation(struct rq *rq)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
static DEFINE_RAW_SPINLOCK(migration_lock);
|
||||
void check_for_migration(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
int active_balance;
|
||||
int new_cpu = -1;
|
||||
int cpu = smp_processor_id();
|
||||
int prev_cpu = task_cpu(p);
|
||||
|
||||
if (rq->misfit_task_load) {
|
||||
if (rq->curr->state != TASK_RUNNING ||
|
||||
rq->curr->nr_cpus_allowed == 1)
|
||||
return;
|
||||
|
||||
if (task_will_be_throttled(p))
|
||||
return;
|
||||
|
||||
raw_spin_lock(&migration_lock);
|
||||
rcu_read_lock();
|
||||
new_cpu = find_energy_efficient_cpu(p, prev_cpu, 0);
|
||||
rcu_read_unlock();
|
||||
if ((new_cpu != -1) &&
|
||||
(capacity_orig_of(new_cpu) > capacity_orig_of(cpu))) {
|
||||
active_balance = kick_active_balance(rq, p, new_cpu);
|
||||
if (active_balance) {
|
||||
mark_reserved(new_cpu);
|
||||
raw_spin_unlock(&migration_lock);
|
||||
stop_one_cpu_nowait(cpu,
|
||||
active_load_balance_cpu_stop, rq,
|
||||
&rq->active_balance_work);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
walt_check_for_rotation(rq);
|
||||
}
|
||||
raw_spin_unlock(&migration_lock);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* CONFIG_SCHED_WALT */
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include "sched.h"
|
||||
|
||||
#include "pelt.h"
|
||||
#include "walt.h"
|
||||
|
||||
int sched_rr_timeslice = RR_TIMESLICE;
|
||||
int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
|
||||
@@ -2409,6 +2410,9 @@ const struct sched_class rt_sched_class = {
|
||||
.switched_to = switched_to_rt,
|
||||
|
||||
.update_curr = update_curr_rt,
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
.fixup_walt_sched_stats = fixup_walt_sched_stats_common,
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
|
||||
@@ -86,6 +86,72 @@
|
||||
struct rq;
|
||||
struct cpuidle_state;
|
||||
|
||||
extern __read_mostly bool sched_predl;
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
extern unsigned int sched_ravg_window;
|
||||
extern unsigned int walt_cpu_util_freq_divisor;
|
||||
|
||||
struct walt_sched_stats {
|
||||
int nr_big_tasks;
|
||||
u64 cumulative_runnable_avg_scaled;
|
||||
u64 pred_demands_sum_scaled;
|
||||
};
|
||||
|
||||
struct cpu_cycle {
|
||||
u64 cycles;
|
||||
u64 time;
|
||||
};
|
||||
|
||||
struct group_cpu_time {
|
||||
u64 curr_runnable_sum;
|
||||
u64 prev_runnable_sum;
|
||||
u64 nt_curr_runnable_sum;
|
||||
u64 nt_prev_runnable_sum;
|
||||
};
|
||||
|
||||
struct load_subtractions {
|
||||
u64 window_start;
|
||||
u64 subs;
|
||||
u64 new_subs;
|
||||
};
|
||||
|
||||
#define NUM_TRACKED_WINDOWS 2
|
||||
#define NUM_LOAD_INDICES 1000
|
||||
|
||||
struct sched_cluster {
|
||||
raw_spinlock_t load_lock;
|
||||
struct list_head list;
|
||||
struct cpumask cpus;
|
||||
int id;
|
||||
int max_power_cost;
|
||||
int min_power_cost;
|
||||
int max_possible_capacity;
|
||||
int capacity;
|
||||
int efficiency; /* Differentiate cpus with different IPC capability */
|
||||
int load_scale_factor;
|
||||
unsigned int exec_scale_factor;
|
||||
/*
|
||||
* max_freq = user maximum
|
||||
* max_mitigated_freq = thermal defined maximum
|
||||
* max_possible_freq = maximum supported by hardware
|
||||
*/
|
||||
unsigned int cur_freq, max_freq, max_mitigated_freq, min_freq;
|
||||
unsigned int max_possible_freq;
|
||||
bool freq_init_done;
|
||||
int dstate, dstate_wakeup_latency, dstate_wakeup_energy;
|
||||
unsigned int static_cluster_pwr_cost;
|
||||
int notifier_sent;
|
||||
bool wake_up_idle;
|
||||
u64 aggr_grp_load;
|
||||
u64 coloc_boost_load;
|
||||
};
|
||||
|
||||
extern unsigned int sched_disable_window_stats;
|
||||
|
||||
extern struct timer_list sched_grp_timer;
|
||||
#endif /* CONFIG_SCHED_WALT */
|
||||
|
||||
/* task_struct::on_rq states: */
|
||||
#define TASK_ON_RQ_QUEUED 1
|
||||
#define TASK_ON_RQ_MIGRATING 2
|
||||
@@ -556,6 +622,10 @@ struct cfs_rq {
|
||||
struct list_head leaf_cfs_rq_list;
|
||||
struct task_group *tg; /* group that "owns" this runqueue */
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
struct walt_sched_stats walt_stats;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CFS_BANDWIDTH
|
||||
int runtime_enabled;
|
||||
int expires_seq;
|
||||
@@ -885,6 +955,7 @@ struct rq {
|
||||
/* For active balancing */
|
||||
int active_balance;
|
||||
int push_cpu;
|
||||
struct task_struct *push_task;
|
||||
struct cpu_stop_work active_balance_work;
|
||||
|
||||
/* CPU of this runqueue: */
|
||||
@@ -906,6 +977,42 @@ struct rq {
|
||||
u64 max_idle_balance_cost;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
struct sched_cluster *cluster;
|
||||
struct cpumask freq_domain_cpumask;
|
||||
struct walt_sched_stats walt_stats;
|
||||
|
||||
int cstate, wakeup_latency, wakeup_energy;
|
||||
u64 window_start;
|
||||
s64 cum_window_start;
|
||||
unsigned long walt_flags;
|
||||
|
||||
u64 cur_irqload;
|
||||
u64 avg_irqload;
|
||||
u64 irqload_ts;
|
||||
unsigned int static_cpu_pwr_cost;
|
||||
struct task_struct *ed_task;
|
||||
struct cpu_cycle cc;
|
||||
u64 old_busy_time, old_busy_time_group;
|
||||
u64 old_estimated_time;
|
||||
u64 curr_runnable_sum;
|
||||
u64 prev_runnable_sum;
|
||||
u64 nt_curr_runnable_sum;
|
||||
u64 nt_prev_runnable_sum;
|
||||
u64 cum_window_demand_scaled;
|
||||
struct group_cpu_time grp_time;
|
||||
struct load_subtractions load_subs[NUM_TRACKED_WINDOWS];
|
||||
DECLARE_BITMAP_ARRAY(top_tasks_bitmap,
|
||||
NUM_TRACKED_WINDOWS, NUM_LOAD_INDICES);
|
||||
u8 *top_tasks[NUM_TRACKED_WINDOWS];
|
||||
u8 curr_table;
|
||||
int prev_top;
|
||||
int curr_top;
|
||||
bool notif_pending;
|
||||
u64 last_cc_update;
|
||||
u64 cycles;
|
||||
#endif /* CONFIG_SCHED_WALT */
|
||||
|
||||
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
|
||||
u64 prev_irq_time;
|
||||
#endif
|
||||
@@ -1137,8 +1244,6 @@ enum numa_faults_stats {
|
||||
};
|
||||
extern void sched_setnuma(struct task_struct *p, int node);
|
||||
extern int migrate_task_to(struct task_struct *p, int cpu);
|
||||
extern int migrate_swap(struct task_struct *p, struct task_struct *t,
|
||||
int cpu, int scpu);
|
||||
extern void init_numa_balancing(unsigned long clone_flags, struct task_struct *p);
|
||||
#else
|
||||
static inline void
|
||||
@@ -1147,6 +1252,9 @@ init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
|
||||
}
|
||||
#endif /* CONFIG_NUMA_BALANCING */
|
||||
|
||||
extern int migrate_swap(struct task_struct *p, struct task_struct *t,
|
||||
int cpu, int scpu);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
static inline void
|
||||
@@ -1602,8 +1710,15 @@ struct sched_class {
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
void (*task_change_group)(struct task_struct *p, int type);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
void (*fixup_walt_sched_stats)(struct rq *rq, struct task_struct *p,
|
||||
u16 updated_demand_scaled,
|
||||
u16 updated_pred_demand_scaled);
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
|
||||
{
|
||||
prev->sched_class->put_prev_task(rq, prev);
|
||||
@@ -1661,6 +1776,10 @@ static inline void idle_set_state_idx(struct rq *rq, int idle_state_idx)
|
||||
static inline int idle_get_state_idx(struct rq *rq)
|
||||
{
|
||||
WARN_ON(!rcu_read_lock_held());
|
||||
|
||||
if (rq->nr_running || cpu_of(rq) == raw_smp_processor_id())
|
||||
return -1;
|
||||
|
||||
return rq->idle_state_idx;
|
||||
}
|
||||
#else
|
||||
@@ -1807,6 +1926,15 @@ static inline int hrtick_enabled(struct rq *rq)
|
||||
|
||||
#endif /* CONFIG_SCHED_HRTICK */
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
u64 sched_ktime_clock(void);
|
||||
#else
|
||||
static inline u64 sched_ktime_clock(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef arch_scale_freq_capacity
|
||||
static __always_inline
|
||||
unsigned long arch_scale_freq_capacity(int cpu)
|
||||
@@ -1824,6 +1952,193 @@ unsigned long arch_scale_max_freq_capacity(struct sched_domain *sd, int cpu)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static inline unsigned long capacity_of(int cpu)
|
||||
{
|
||||
return cpu_rq(cpu)->cpu_capacity;
|
||||
}
|
||||
|
||||
static inline unsigned long capacity_orig_of(int cpu)
|
||||
{
|
||||
return cpu_rq(cpu)->cpu_capacity_orig;
|
||||
}
|
||||
|
||||
extern unsigned int sysctl_sched_use_walt_cpu_util;
|
||||
extern unsigned int walt_disabled;
|
||||
|
||||
static inline unsigned long task_util(struct task_struct *p)
|
||||
{
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
if (likely(!walt_disabled && sysctl_sched_use_walt_task_util))
|
||||
return p->ravg.demand_scaled;
|
||||
#endif
|
||||
return READ_ONCE(p->se.avg.util_avg);
|
||||
}
|
||||
|
||||
/**
|
||||
* Amount of capacity of a CPU that is (estimated to be) used by CFS tasks
|
||||
* @cpu: the CPU to get the utilization of
|
||||
*
|
||||
* The unit of the return value must be the one of capacity so we can compare
|
||||
* the utilization with the capacity of the CPU that is available for CFS task
|
||||
* (ie cpu_capacity).
|
||||
*
|
||||
* cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the
|
||||
* recent utilization of currently non-runnable tasks on a CPU. It represents
|
||||
* the amount of utilization of a CPU in the range [0..capacity_orig] where
|
||||
* capacity_orig is the cpu_capacity available at the highest frequency
|
||||
* (arch_scale_freq_capacity()).
|
||||
* The utilization of a CPU converges towards a sum equal to or less than the
|
||||
* current capacity (capacity_curr <= capacity_orig) of the CPU because it is
|
||||
* the running time on this CPU scaled by capacity_curr.
|
||||
*
|
||||
* The estimated utilization of a CPU is defined to be the maximum between its
|
||||
* cfs_rq.avg.util_avg and the sum of the estimated utilization of the tasks
|
||||
* currently RUNNABLE on that CPU.
|
||||
* This allows to properly represent the expected utilization of a CPU which
|
||||
* has just got a big task running since a long sleep period. At the same time
|
||||
* however it preserves the benefits of the "blocked utilization" in
|
||||
* describing the potential for other tasks waking up on the same CPU.
|
||||
*
|
||||
* Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even
|
||||
* higher than capacity_orig because of unfortunate rounding in
|
||||
* cfs.avg.util_avg or just after migrating tasks and new task wakeups until
|
||||
* the average stabilizes with the new running time. We need to check that the
|
||||
* utilization stays within the range of [0..capacity_orig] and cap it if
|
||||
* necessary. Without utilization capping, a group could be seen as overloaded
|
||||
* (CPU0 utilization at 121% + CPU1 utilization at 80%) whereas CPU1 has 20% of
|
||||
* available capacity. We allow utilization to overshoot capacity_curr (but not
|
||||
* capacity_orig) as it useful for predicting the capacity required after task
|
||||
* migrations (scheduler-driven DVFS).
|
||||
*
|
||||
* Return: the (estimated) utilization for the specified CPU
|
||||
*/
|
||||
static inline unsigned long cpu_util(int cpu)
|
||||
{
|
||||
struct cfs_rq *cfs_rq;
|
||||
unsigned int util;
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
if (likely(!walt_disabled && sysctl_sched_use_walt_cpu_util)) {
|
||||
u64 walt_cpu_util =
|
||||
cpu_rq(cpu)->walt_stats.cumulative_runnable_avg_scaled;
|
||||
|
||||
return min_t(unsigned long, walt_cpu_util,
|
||||
capacity_orig_of(cpu));
|
||||
}
|
||||
#endif
|
||||
|
||||
cfs_rq = &cpu_rq(cpu)->cfs;
|
||||
util = READ_ONCE(cfs_rq->avg.util_avg);
|
||||
|
||||
if (sched_feat(UTIL_EST))
|
||||
util = max(util, READ_ONCE(cfs_rq->avg.util_est.enqueued));
|
||||
|
||||
return min_t(unsigned long, util, capacity_orig_of(cpu));
|
||||
}
|
||||
|
||||
struct sched_walt_cpu_load {
|
||||
unsigned long prev_window_util;
|
||||
unsigned long nl;
|
||||
unsigned long pl;
|
||||
u64 ws;
|
||||
};
|
||||
|
||||
static inline unsigned long cpu_util_cum(int cpu, int delta)
|
||||
{
|
||||
u64 util = cpu_rq(cpu)->cfs.avg.util_avg;
|
||||
unsigned long capacity = capacity_orig_of(cpu);
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
if (!walt_disabled && sysctl_sched_use_walt_cpu_util)
|
||||
util = cpu_rq(cpu)->cum_window_demand_scaled;
|
||||
#endif
|
||||
delta += util;
|
||||
if (delta < 0)
|
||||
return 0;
|
||||
|
||||
return (delta >= capacity) ? capacity : delta;
|
||||
}
|
||||
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
u64 freq_policy_load(struct rq *rq);
|
||||
|
||||
extern u64 walt_load_reported_window;
|
||||
|
||||
static inline unsigned long
|
||||
cpu_util_freq_walt(int cpu, struct sched_walt_cpu_load *walt_load)
|
||||
{
|
||||
u64 util, util_unboosted;
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
unsigned long capacity = capacity_orig_of(cpu);
|
||||
int boost;
|
||||
|
||||
if (walt_disabled || !sysctl_sched_use_walt_cpu_util)
|
||||
return cpu_util(cpu);
|
||||
|
||||
boost = per_cpu(sched_load_boost, cpu);
|
||||
util_unboosted = util = freq_policy_load(rq);
|
||||
util = div64_u64(util * (100 + boost),
|
||||
walt_cpu_util_freq_divisor);
|
||||
|
||||
if (walt_load) {
|
||||
u64 nl = cpu_rq(cpu)->nt_prev_runnable_sum +
|
||||
rq->grp_time.nt_prev_runnable_sum;
|
||||
u64 pl = rq->walt_stats.pred_demands_sum_scaled;
|
||||
|
||||
/* do_pl_notif() needs unboosted signals */
|
||||
rq->old_busy_time = div64_u64(util_unboosted,
|
||||
sched_ravg_window >>
|
||||
SCHED_CAPACITY_SHIFT);
|
||||
rq->old_estimated_time = pl;
|
||||
|
||||
nl = div64_u64(nl * (100 + boost),
|
||||
walt_cpu_util_freq_divisor);
|
||||
pl = div64_u64(pl * (100 + boost), 100);
|
||||
|
||||
walt_load->prev_window_util = util;
|
||||
walt_load->nl = nl;
|
||||
walt_load->pl = pl;
|
||||
walt_load->ws = walt_load_reported_window;
|
||||
}
|
||||
|
||||
return (util >= capacity) ? capacity : util;
|
||||
}
|
||||
|
||||
static inline unsigned long
|
||||
cpu_util_freq(int cpu, struct sched_walt_cpu_load *walt_load)
|
||||
{
|
||||
return cpu_util_freq_walt(cpu, walt_load);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline unsigned long
|
||||
cpu_util_freq(int cpu, struct sched_walt_cpu_load *walt_load)
|
||||
{
|
||||
return cpu_util(cpu);
|
||||
}
|
||||
|
||||
#define sched_ravg_window TICK_NSEC
|
||||
#define sysctl_sched_use_walt_cpu_util 0
|
||||
|
||||
#endif /* CONFIG_SCHED_WALT */
|
||||
|
||||
extern unsigned int capacity_margin_freq;
|
||||
|
||||
static inline unsigned long
|
||||
add_capacity_margin(unsigned long cpu_capacity, int cpu)
|
||||
{
|
||||
cpu_capacity = cpu_capacity * capacity_margin_freq *
|
||||
(100 + per_cpu(sched_load_boost, cpu));
|
||||
cpu_capacity /= 100;
|
||||
cpu_capacity /= SCHED_CAPACITY_SCALE;
|
||||
return cpu_capacity;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
|
||||
__acquires(rq->lock);
|
||||
|
||||
@@ -2218,6 +2533,11 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
|
||||
{
|
||||
struct update_util_data *data;
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
if (!(flags & SCHED_CPUFREQ_WALT))
|
||||
return;
|
||||
#endif
|
||||
|
||||
data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,
|
||||
cpu_of(rq)));
|
||||
if (data)
|
||||
@@ -2333,3 +2653,601 @@ unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned
|
||||
#ifdef CONFIG_SMP
|
||||
extern struct static_key_false sched_energy_present;
|
||||
#endif
|
||||
|
||||
enum sched_boost_policy {
|
||||
SCHED_BOOST_NONE,
|
||||
SCHED_BOOST_ON_BIG,
|
||||
SCHED_BOOST_ON_ALL,
|
||||
};
|
||||
|
||||
#define NO_BOOST 0
|
||||
#define FULL_THROTTLE_BOOST 1
|
||||
#define CONSERVATIVE_BOOST 2
|
||||
#define RESTRAINED_BOOST 3
|
||||
|
||||
/*
|
||||
* Returns the rq capacity of any rq in a group. This does not play
|
||||
* well with groups where rq capacity can change independently.
|
||||
*/
|
||||
#define group_rq_capacity(group) cpu_capacity(group_first_cpu(group))
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
|
||||
static inline int cluster_first_cpu(struct sched_cluster *cluster)
|
||||
{
|
||||
return cpumask_first(&cluster->cpus);
|
||||
}
|
||||
|
||||
struct related_thread_group {
|
||||
int id;
|
||||
raw_spinlock_t lock;
|
||||
struct list_head tasks;
|
||||
struct list_head list;
|
||||
struct sched_cluster *preferred_cluster;
|
||||
struct rcu_head rcu;
|
||||
u64 last_update;
|
||||
};
|
||||
|
||||
extern struct list_head cluster_head;
|
||||
extern struct sched_cluster *sched_cluster[NR_CPUS];
|
||||
|
||||
#define for_each_sched_cluster(cluster) \
|
||||
list_for_each_entry_rcu(cluster, &cluster_head, list)
|
||||
|
||||
#define WINDOW_STATS_RECENT 0
|
||||
#define WINDOW_STATS_MAX 1
|
||||
#define WINDOW_STATS_MAX_RECENT_AVG 2
|
||||
#define WINDOW_STATS_AVG 3
|
||||
#define WINDOW_STATS_INVALID_POLICY 4
|
||||
|
||||
#define SCHED_UPMIGRATE_MIN_NICE 15
|
||||
#define EXITING_TASK_MARKER 0xdeaddead
|
||||
|
||||
#define UP_MIGRATION 1
|
||||
#define DOWN_MIGRATION 2
|
||||
#define IRQLOAD_MIGRATION 3
|
||||
|
||||
extern struct mutex policy_mutex;
|
||||
extern unsigned int sched_disable_window_stats;
|
||||
extern unsigned int max_possible_freq;
|
||||
extern unsigned int min_max_freq;
|
||||
extern unsigned int max_possible_efficiency;
|
||||
extern unsigned int min_possible_efficiency;
|
||||
extern unsigned int max_capacity;
|
||||
extern unsigned int min_capacity;
|
||||
extern unsigned int max_load_scale_factor;
|
||||
extern unsigned int max_possible_capacity;
|
||||
extern unsigned int min_max_possible_capacity;
|
||||
extern unsigned int max_power_cost;
|
||||
extern unsigned int __read_mostly sched_init_task_load_windows;
|
||||
extern unsigned int up_down_migrate_scale_factor;
|
||||
extern unsigned int sysctl_sched_restrict_cluster_spill;
|
||||
extern unsigned int sched_pred_alert_load;
|
||||
extern struct sched_cluster init_cluster;
|
||||
extern unsigned int __read_mostly sched_short_sleep_task_threshold;
|
||||
extern unsigned int __read_mostly sched_long_cpu_selection_threshold;
|
||||
extern unsigned int __read_mostly sched_big_waker_task_load;
|
||||
extern unsigned int __read_mostly sched_small_wakee_task_load;
|
||||
extern unsigned int __read_mostly sched_spill_load;
|
||||
extern unsigned int __read_mostly sched_upmigrate;
|
||||
extern unsigned int __read_mostly sched_downmigrate;
|
||||
extern unsigned int __read_mostly sysctl_sched_spill_nr_run;
|
||||
extern unsigned int __read_mostly sched_load_granule;
|
||||
|
||||
extern int register_cpu_cycle_counter_cb(struct cpu_cycle_counter_cb *cb);
|
||||
extern int update_preferred_cluster(struct related_thread_group *grp,
|
||||
struct task_struct *p, u32 old_load);
|
||||
extern void set_preferred_cluster(struct related_thread_group *grp);
|
||||
extern void add_new_task_to_grp(struct task_struct *new);
|
||||
extern unsigned int update_freq_aggregate_threshold(unsigned int threshold);
|
||||
|
||||
#define NO_BOOST 0
|
||||
#define FULL_THROTTLE_BOOST 1
|
||||
#define CONSERVATIVE_BOOST 2
|
||||
#define RESTRAINED_BOOST 3
|
||||
#define FULL_THROTTLE_BOOST_DISABLE -1
|
||||
#define CONSERVATIVE_BOOST_DISABLE -2
|
||||
#define RESTRAINED_BOOST_DISABLE -3
|
||||
#define MAX_NUM_BOOST_TYPE (RESTRAINED_BOOST+1)
|
||||
|
||||
static inline int cpu_capacity(int cpu)
|
||||
{
|
||||
return cpu_rq(cpu)->cluster->capacity;
|
||||
}
|
||||
|
||||
static inline int cpu_max_possible_capacity(int cpu)
|
||||
{
|
||||
return cpu_rq(cpu)->cluster->max_possible_capacity;
|
||||
}
|
||||
|
||||
static inline int cpu_load_scale_factor(int cpu)
|
||||
{
|
||||
return cpu_rq(cpu)->cluster->load_scale_factor;
|
||||
}
|
||||
|
||||
static inline int cpu_efficiency(int cpu)
|
||||
{
|
||||
return cpu_rq(cpu)->cluster->efficiency;
|
||||
}
|
||||
|
||||
static inline unsigned int cpu_min_freq(int cpu)
|
||||
{
|
||||
return cpu_rq(cpu)->cluster->min_freq;
|
||||
}
|
||||
|
||||
static inline unsigned int cluster_max_freq(struct sched_cluster *cluster)
|
||||
{
|
||||
/*
|
||||
* Governor and thermal driver don't know the other party's mitigation
|
||||
* voting. So struct cluster saves both and return min() for current
|
||||
* cluster fmax.
|
||||
*/
|
||||
return min(cluster->max_mitigated_freq, cluster->max_freq);
|
||||
}
|
||||
|
||||
static inline unsigned int cpu_max_freq(int cpu)
|
||||
{
|
||||
return cluster_max_freq(cpu_rq(cpu)->cluster);
|
||||
}
|
||||
|
||||
static inline unsigned int cpu_max_possible_freq(int cpu)
|
||||
{
|
||||
return cpu_rq(cpu)->cluster->max_possible_freq;
|
||||
}
|
||||
|
||||
/* Keep track of max/min capacity possible across CPUs "currently" */
|
||||
static inline void __update_min_max_capacity(void)
|
||||
{
|
||||
int i;
|
||||
int max_cap = 0, min_cap = INT_MAX;
|
||||
|
||||
for_each_online_cpu(i) {
|
||||
max_cap = max(max_cap, cpu_capacity(i));
|
||||
min_cap = min(min_cap, cpu_capacity(i));
|
||||
}
|
||||
|
||||
max_capacity = max_cap;
|
||||
min_capacity = min_cap;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return load_scale_factor of a cpu in reference to "most" efficient cpu, so
|
||||
* that "most" efficient cpu gets a load_scale_factor of 1
|
||||
*/
|
||||
static inline unsigned long
|
||||
load_scale_cpu_efficiency(struct sched_cluster *cluster)
|
||||
{
|
||||
return DIV_ROUND_UP(1024 * max_possible_efficiency,
|
||||
cluster->efficiency);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return load_scale_factor of a cpu in reference to cpu with best max_freq
|
||||
* (max_possible_freq), so that one with best max_freq gets a load_scale_factor
|
||||
* of 1.
|
||||
*/
|
||||
static inline unsigned long load_scale_cpu_freq(struct sched_cluster *cluster)
|
||||
{
|
||||
return DIV_ROUND_UP(1024 * max_possible_freq,
|
||||
cluster_max_freq(cluster));
|
||||
}
|
||||
|
||||
static inline int compute_load_scale_factor(struct sched_cluster *cluster)
|
||||
{
|
||||
int load_scale = 1024;
|
||||
|
||||
/*
|
||||
* load_scale_factor accounts for the fact that task load
|
||||
* is in reference to "best" performing cpu. Task's load will need to be
|
||||
* scaled (up) by a factor to determine suitability to be placed on a
|
||||
* (little) cpu.
|
||||
*/
|
||||
load_scale *= load_scale_cpu_efficiency(cluster);
|
||||
load_scale >>= 10;
|
||||
|
||||
load_scale *= load_scale_cpu_freq(cluster);
|
||||
load_scale >>= 10;
|
||||
|
||||
return load_scale;
|
||||
}
|
||||
|
||||
static inline int cpu_max_power_cost(int cpu)
|
||||
{
|
||||
return cpu_rq(cpu)->cluster->max_power_cost;
|
||||
}
|
||||
|
||||
static inline int cpu_min_power_cost(int cpu)
|
||||
{
|
||||
return cpu_rq(cpu)->cluster->min_power_cost;
|
||||
}
|
||||
|
||||
static inline bool hmp_capable(void)
|
||||
{
|
||||
return max_possible_capacity != min_max_possible_capacity;
|
||||
}
|
||||
|
||||
static inline bool is_max_capacity_cpu(int cpu)
|
||||
{
|
||||
return cpu_max_possible_capacity(cpu) == max_possible_capacity;
|
||||
}
|
||||
|
||||
static inline bool is_min_capacity_cpu(int cpu)
|
||||
{
|
||||
return cpu_max_possible_capacity(cpu) == min_max_possible_capacity;
|
||||
}
|
||||
|
||||
/*
|
||||
* 'load' is in reference to "best cpu" at its best frequency.
|
||||
* Scale that in reference to a given cpu, accounting for how bad it is
|
||||
* in reference to "best cpu".
|
||||
*/
|
||||
static inline u64 scale_load_to_cpu(u64 task_load, int cpu)
|
||||
{
|
||||
u64 lsf = cpu_load_scale_factor(cpu);
|
||||
|
||||
if (lsf != 1024) {
|
||||
task_load *= lsf;
|
||||
task_load /= 1024;
|
||||
}
|
||||
|
||||
return task_load;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return 'capacity' of a cpu in reference to "least" efficient cpu, such that
|
||||
* least efficient cpu gets capacity of 1024
|
||||
*/
|
||||
static unsigned long
|
||||
capacity_scale_cpu_efficiency(struct sched_cluster *cluster)
|
||||
{
|
||||
return (1024 * cluster->efficiency) / min_possible_efficiency;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return 'capacity' of a cpu in reference to cpu with lowest max_freq
|
||||
* (min_max_freq), such that one with lowest max_freq gets capacity of 1024.
|
||||
*/
|
||||
static unsigned long capacity_scale_cpu_freq(struct sched_cluster *cluster)
|
||||
{
|
||||
return (1024 * cluster_max_freq(cluster)) / min_max_freq;
|
||||
}
|
||||
|
||||
static inline int compute_capacity(struct sched_cluster *cluster)
|
||||
{
|
||||
int capacity = 1024;
|
||||
|
||||
capacity *= capacity_scale_cpu_efficiency(cluster);
|
||||
capacity >>= 10;
|
||||
|
||||
capacity *= capacity_scale_cpu_freq(cluster);
|
||||
capacity >>= 10;
|
||||
|
||||
return capacity;
|
||||
}
|
||||
|
||||
static inline unsigned int task_load(struct task_struct *p)
|
||||
{
|
||||
return p->ravg.demand;
|
||||
}
|
||||
|
||||
static inline unsigned int task_pl(struct task_struct *p)
|
||||
{
|
||||
return p->ravg.pred_demand;
|
||||
}
|
||||
|
||||
#define pct_to_real(tunable) \
|
||||
(div64_u64((u64)tunable * (u64)max_task_load(), 100))
|
||||
|
||||
#define real_to_pct(tunable) \
|
||||
(div64_u64((u64)tunable * (u64)100, (u64)max_task_load()))
|
||||
|
||||
static inline bool task_in_related_thread_group(struct task_struct *p)
|
||||
{
|
||||
return !!(rcu_access_pointer(p->grp) != NULL);
|
||||
}
|
||||
|
||||
static inline
|
||||
struct related_thread_group *task_related_thread_group(struct task_struct *p)
|
||||
{
|
||||
return rcu_dereference(p->grp);
|
||||
}
|
||||
|
||||
/* Is frequency of two cpus synchronized with each other? */
|
||||
static inline int same_freq_domain(int src_cpu, int dst_cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(src_cpu);
|
||||
|
||||
if (src_cpu == dst_cpu)
|
||||
return 1;
|
||||
|
||||
return cpumask_test_cpu(dst_cpu, &rq->freq_domain_cpumask);
|
||||
}
|
||||
|
||||
#define BOOST_KICK 0
|
||||
#define CPU_RESERVED 1
|
||||
|
||||
extern int sched_boost(void);
|
||||
extern int preferred_cluster(struct sched_cluster *cluster,
|
||||
struct task_struct *p);
|
||||
extern struct sched_cluster *rq_cluster(struct rq *rq);
|
||||
extern void reset_task_stats(struct task_struct *p);
|
||||
extern void clear_top_tasks_bitmap(unsigned long *bitmap);
|
||||
|
||||
#if defined(CONFIG_SCHED_TUNE)
|
||||
extern bool task_sched_boost(struct task_struct *p);
|
||||
extern int sync_cgroup_colocation(struct task_struct *p, bool insert);
|
||||
extern bool same_schedtune(struct task_struct *tsk1, struct task_struct *tsk2);
|
||||
extern void update_cgroup_boost_settings(void);
|
||||
extern void restore_cgroup_boost_settings(void);
|
||||
|
||||
#else
|
||||
static inline bool
|
||||
same_schedtune(struct task_struct *tsk1, struct task_struct *tsk2)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool task_sched_boost(struct task_struct *p)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void update_cgroup_boost_settings(void) { }
|
||||
static inline void restore_cgroup_boost_settings(void) { }
|
||||
#endif
|
||||
|
||||
extern int alloc_related_thread_groups(void);
|
||||
|
||||
extern unsigned long all_cluster_ids[];
|
||||
|
||||
extern void check_for_migration(struct rq *rq, struct task_struct *p);
|
||||
|
||||
static inline int is_reserved(int cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
|
||||
return test_bit(CPU_RESERVED, &rq->walt_flags);
|
||||
}
|
||||
|
||||
static inline int mark_reserved(int cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
|
||||
return test_and_set_bit(CPU_RESERVED, &rq->walt_flags);
|
||||
}
|
||||
|
||||
static inline void clear_reserved(int cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
|
||||
clear_bit(CPU_RESERVED, &rq->walt_flags);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
task_in_cum_window_demand(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
return cpu_of(rq) == task_cpu(p) && (p->on_rq || p->last_sleep_ts >=
|
||||
rq->window_start);
|
||||
}
|
||||
|
||||
static inline void walt_fixup_cum_window_demand(struct rq *rq, s64 scaled_delta)
|
||||
{
|
||||
rq->cum_window_demand_scaled += scaled_delta;
|
||||
if (unlikely((s64)rq->cum_window_demand_scaled < 0))
|
||||
rq->cum_window_demand_scaled = 0;
|
||||
}
|
||||
|
||||
extern void update_cpu_cluster_capacity(const cpumask_t *cpus);
|
||||
|
||||
extern unsigned long thermal_cap(int cpu);
|
||||
|
||||
extern void clear_walt_request(int cpu);
|
||||
|
||||
extern int got_boost_kick(void);
|
||||
extern void clear_boost_kick(int cpu);
|
||||
extern enum sched_boost_policy sched_boost_policy(void);
|
||||
extern void sched_boost_parse_dt(void);
|
||||
extern void clear_ed_task(struct task_struct *p, struct rq *rq);
|
||||
extern bool early_detection_notify(struct rq *rq, u64 wallclock);
|
||||
|
||||
static inline unsigned int power_cost(int cpu, u64 demand)
|
||||
{
|
||||
return cpu_max_possible_capacity(cpu);
|
||||
}
|
||||
|
||||
void note_task_waking(struct task_struct *p, u64 wallclock);
|
||||
|
||||
static inline bool task_placement_boost_enabled(struct task_struct *p)
|
||||
{
|
||||
if (task_sched_boost(p))
|
||||
return sched_boost_policy() != SCHED_BOOST_NONE;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline enum sched_boost_policy task_boost_policy(struct task_struct *p)
|
||||
{
|
||||
enum sched_boost_policy policy = task_sched_boost(p) ?
|
||||
sched_boost_policy() :
|
||||
SCHED_BOOST_NONE;
|
||||
if (policy == SCHED_BOOST_ON_BIG) {
|
||||
/*
|
||||
* Filter out tasks less than min task util threshold
|
||||
* under conservative boost.
|
||||
*/
|
||||
if (sysctl_sched_boost == CONSERVATIVE_BOOST &&
|
||||
task_util(p) <=
|
||||
sysctl_sched_min_task_util_for_boost)
|
||||
policy = SCHED_BOOST_NONE;
|
||||
}
|
||||
|
||||
return policy;
|
||||
}
|
||||
|
||||
extern void walt_map_freq_to_load(void);
|
||||
|
||||
static inline bool is_min_capacity_cluster(struct sched_cluster *cluster)
|
||||
{
|
||||
return is_min_capacity_cpu(cluster_first_cpu(cluster));
|
||||
}
|
||||
|
||||
#else /* CONFIG_SCHED_WALT */
|
||||
|
||||
struct walt_sched_stats;
|
||||
struct related_thread_group;
|
||||
struct sched_cluster;
|
||||
|
||||
static inline bool task_sched_boost(struct task_struct *p)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool task_placement_boost_enabled(struct task_struct *p)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void check_for_migration(struct rq *rq, struct task_struct *p) { }
|
||||
|
||||
static inline int sched_boost(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline enum sched_boost_policy task_boost_policy(struct task_struct *p)
|
||||
{
|
||||
return SCHED_BOOST_NONE;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
task_in_cum_window_demand(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool hmp_capable(void) { return false; }
|
||||
static inline bool is_max_capacity_cpu(int cpu) { return true; }
|
||||
static inline bool is_min_capacity_cpu(int cpu) { return true; }
|
||||
|
||||
static inline int
|
||||
preferred_cluster(struct sched_cluster *cluster, struct task_struct *p)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline struct sched_cluster *rq_cluster(struct rq *rq)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline u64 scale_load_to_cpu(u64 load, int cpu)
|
||||
{
|
||||
return load;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static inline int cpu_capacity(int cpu)
|
||||
{
|
||||
return SCHED_CAPACITY_SCALE;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void set_preferred_cluster(struct related_thread_group *grp) { }
|
||||
|
||||
static inline bool task_in_related_thread_group(struct task_struct *p)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline
|
||||
struct related_thread_group *task_related_thread_group(struct task_struct *p)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline u32 task_load(struct task_struct *p) { return 0; }
|
||||
static inline u32 task_pl(struct task_struct *p) { return 0; }
|
||||
|
||||
static inline int update_preferred_cluster(struct related_thread_group *grp,
|
||||
struct task_struct *p, u32 old_load)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void add_new_task_to_grp(struct task_struct *new) {}
|
||||
|
||||
static inline int same_freq_domain(int src_cpu, int dst_cpu)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline void clear_reserved(int cpu) { }
|
||||
static inline int alloc_related_thread_groups(void) { return 0; }
|
||||
|
||||
#define trace_sched_cpu_load(...)
|
||||
#define trace_sched_cpu_load_lb(...)
|
||||
#define trace_sched_cpu_load_cgroup(...)
|
||||
#define trace_sched_cpu_load_wakeup(...)
|
||||
|
||||
static inline void walt_fixup_cum_window_demand(struct rq *rq,
|
||||
s64 scaled_delta) { }
|
||||
|
||||
static inline void update_cpu_cluster_capacity(const cpumask_t *cpus) { }
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static inline unsigned long thermal_cap(int cpu)
|
||||
{
|
||||
return cpu_rq(cpu)->cpu_capacity_orig;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void clear_walt_request(int cpu) { }
|
||||
|
||||
static inline int is_reserved(int cpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int got_boost_kick(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void clear_boost_kick(int cpu) { }
|
||||
|
||||
static inline enum sched_boost_policy sched_boost_policy(void)
|
||||
{
|
||||
return SCHED_BOOST_NONE;
|
||||
}
|
||||
|
||||
static inline void sched_boost_parse_dt(void) { }
|
||||
|
||||
static inline void clear_ed_task(struct task_struct *p, struct rq *rq) { }
|
||||
|
||||
static inline bool early_detection_notify(struct rq *rq, u64 wallclock)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static inline unsigned int power_cost(int cpu, u64 demand)
|
||||
{
|
||||
return SCHED_CAPACITY_SCALE;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void note_task_waking(struct task_struct *p, u64 wallclock) { }
|
||||
static inline void walt_map_freq_to_load(void) { }
|
||||
#endif /* CONFIG_SCHED_WALT */
|
||||
|
||||
#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
|
||||
static inline bool energy_aware(void)
|
||||
{
|
||||
return sysctl_sched_energy_aware;
|
||||
}
|
||||
#else
|
||||
static inline bool energy_aware(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
* See kernel/stop_machine.c
|
||||
*/
|
||||
#include "sched.h"
|
||||
#include "walt.h"
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static int
|
||||
@@ -43,12 +44,14 @@ static void
|
||||
enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
add_nr_running(rq, 1);
|
||||
walt_inc_cumulative_runnable_avg(rq, p);
|
||||
}
|
||||
|
||||
static void
|
||||
dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
sub_nr_running(rq, 1);
|
||||
walt_dec_cumulative_runnable_avg(rq, p);
|
||||
}
|
||||
|
||||
static void yield_task_stop(struct rq *rq)
|
||||
@@ -143,4 +146,7 @@ const struct sched_class stop_sched_class = {
|
||||
.prio_changed = prio_changed_stop,
|
||||
.switched_to = switched_to_stop,
|
||||
.update_curr = update_curr_stop,
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
.fixup_walt_sched_stats = fixup_walt_sched_stats_common,
|
||||
#endif
|
||||
};
|
||||
|
||||
@@ -89,6 +89,33 @@ struct schedtune {
|
||||
/* Boost value for tasks on that SchedTune CGroup */
|
||||
int boost;
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
/* Toggle ability to override sched boost enabled */
|
||||
bool sched_boost_no_override;
|
||||
|
||||
/*
|
||||
* Controls whether a cgroup is eligible for sched boost or not. This
|
||||
* can temporariliy be disabled by the kernel based on the no_override
|
||||
* flag above.
|
||||
*/
|
||||
bool sched_boost_enabled;
|
||||
|
||||
/*
|
||||
* This tracks the default value of sched_boost_enabled and is used
|
||||
* restore the value following any temporary changes to that flag.
|
||||
*/
|
||||
bool sched_boost_enabled_backup;
|
||||
|
||||
/*
|
||||
* Controls whether tasks of this cgroup should be colocated with each
|
||||
* other and tasks of other cgroups that have the same flag turned on.
|
||||
*/
|
||||
bool colocate;
|
||||
|
||||
/* Controls whether further updates are allowed to the colocate flag */
|
||||
bool colocate_update_disabled;
|
||||
#endif /* CONFIG_SCHED_WALT */
|
||||
|
||||
/* Hint to bias scheduling of tasks on that SchedTune CGroup
|
||||
* towards idle CPUs */
|
||||
int prefer_idle;
|
||||
@@ -121,6 +148,13 @@ static inline struct schedtune *parent_st(struct schedtune *st)
|
||||
static struct schedtune
|
||||
root_schedtune = {
|
||||
.boost = 0,
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
.sched_boost_no_override = false,
|
||||
.sched_boost_enabled = true,
|
||||
.sched_boost_enabled_backup = true,
|
||||
.colocate = false,
|
||||
.colocate_update_disabled = false,
|
||||
#endif
|
||||
.prefer_idle = 0,
|
||||
};
|
||||
|
||||
@@ -172,6 +206,77 @@ struct boost_groups {
|
||||
/* Boost groups affecting each CPU in the system */
|
||||
DEFINE_PER_CPU(struct boost_groups, cpu_boost_groups);
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
static inline void init_sched_boost(struct schedtune *st)
|
||||
{
|
||||
st->sched_boost_no_override = false;
|
||||
st->sched_boost_enabled = true;
|
||||
st->sched_boost_enabled_backup = st->sched_boost_enabled;
|
||||
st->colocate = false;
|
||||
st->colocate_update_disabled = false;
|
||||
}
|
||||
|
||||
bool same_schedtune(struct task_struct *tsk1, struct task_struct *tsk2)
|
||||
{
|
||||
return task_schedtune(tsk1) == task_schedtune(tsk2);
|
||||
}
|
||||
|
||||
void update_cgroup_boost_settings(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BOOSTGROUPS_COUNT; i++) {
|
||||
if (!allocated_group[i])
|
||||
break;
|
||||
|
||||
if (allocated_group[i]->sched_boost_no_override)
|
||||
continue;
|
||||
|
||||
allocated_group[i]->sched_boost_enabled = false;
|
||||
}
|
||||
}
|
||||
|
||||
void restore_cgroup_boost_settings(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BOOSTGROUPS_COUNT; i++) {
|
||||
if (!allocated_group[i])
|
||||
break;
|
||||
|
||||
allocated_group[i]->sched_boost_enabled =
|
||||
allocated_group[i]->sched_boost_enabled_backup;
|
||||
}
|
||||
}
|
||||
|
||||
bool task_sched_boost(struct task_struct *p)
|
||||
{
|
||||
struct schedtune *st = task_schedtune(p);
|
||||
|
||||
return st->sched_boost_enabled;
|
||||
}
|
||||
|
||||
static u64
|
||||
sched_boost_override_read(struct cgroup_subsys_state *css,
|
||||
struct cftype *cft)
|
||||
{
|
||||
struct schedtune *st = css_st(css);
|
||||
|
||||
return st->sched_boost_no_override;
|
||||
}
|
||||
|
||||
static int sched_boost_override_write(struct cgroup_subsys_state *css,
|
||||
struct cftype *cft, u64 override)
|
||||
{
|
||||
struct schedtune *st = css_st(css);
|
||||
|
||||
st->sched_boost_no_override = !!override;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_SCHED_WALT */
|
||||
|
||||
static inline bool schedtune_boost_timeout(u64 now, u64 ts)
|
||||
{
|
||||
return ((now - ts) > SCHEDTUNE_BOOST_HOLD_NS);
|
||||
@@ -422,6 +527,53 @@ int schedtune_can_attach(struct cgroup_taskset *tset)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
static u64 sched_boost_enabled_read(struct cgroup_subsys_state *css,
|
||||
struct cftype *cft)
|
||||
{
|
||||
struct schedtune *st = css_st(css);
|
||||
|
||||
return st->sched_boost_enabled;
|
||||
}
|
||||
|
||||
static int sched_boost_enabled_write(struct cgroup_subsys_state *css,
|
||||
struct cftype *cft, u64 enable)
|
||||
{
|
||||
struct schedtune *st = css_st(css);
|
||||
|
||||
st->sched_boost_enabled = !!enable;
|
||||
st->sched_boost_enabled_backup = st->sched_boost_enabled;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 sched_colocate_read(struct cgroup_subsys_state *css,
|
||||
struct cftype *cft)
|
||||
{
|
||||
struct schedtune *st = css_st(css);
|
||||
|
||||
return st->colocate;
|
||||
}
|
||||
|
||||
static int sched_colocate_write(struct cgroup_subsys_state *css,
|
||||
struct cftype *cft, u64 colocate)
|
||||
{
|
||||
struct schedtune *st = css_st(css);
|
||||
|
||||
if (st->colocate_update_disabled)
|
||||
return -EPERM;
|
||||
|
||||
st->colocate = !!colocate;
|
||||
st->colocate_update_disabled = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else /* CONFIG_SCHED_WALT */
|
||||
|
||||
static inline void init_sched_boost(struct schedtune *st) { }
|
||||
|
||||
#endif /* CONFIG_SCHED_WALT */
|
||||
|
||||
void schedtune_cancel_attach(struct cgroup_taskset *tset)
|
||||
{
|
||||
/* This can happen only if SchedTune controller is mounted with
|
||||
@@ -535,6 +687,28 @@ boost_read(struct cgroup_subsys_state *css, struct cftype *cft)
|
||||
return st->boost;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
static void schedtune_attach(struct cgroup_taskset *tset)
|
||||
{
|
||||
struct task_struct *task;
|
||||
struct cgroup_subsys_state *css;
|
||||
struct schedtune *st;
|
||||
bool colocate;
|
||||
|
||||
cgroup_taskset_first(tset, &css);
|
||||
st = css_st(css);
|
||||
|
||||
colocate = st->colocate;
|
||||
|
||||
cgroup_taskset_for_each(task, css, tset)
|
||||
sync_cgroup_colocation(task, colocate);
|
||||
}
|
||||
#else
|
||||
static void schedtune_attach(struct cgroup_taskset *tset)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
boost_write(struct cgroup_subsys_state *css, struct cftype *cft,
|
||||
s64 boost)
|
||||
@@ -553,6 +727,23 @@ boost_write(struct cgroup_subsys_state *css, struct cftype *cft,
|
||||
}
|
||||
|
||||
static struct cftype files[] = {
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
{
|
||||
.name = "sched_boost_no_override",
|
||||
.read_u64 = sched_boost_override_read,
|
||||
.write_u64 = sched_boost_override_write,
|
||||
},
|
||||
{
|
||||
.name = "sched_boost_enabled",
|
||||
.read_u64 = sched_boost_enabled_read,
|
||||
.write_u64 = sched_boost_enabled_write,
|
||||
},
|
||||
{
|
||||
.name = "colocate",
|
||||
.read_u64 = sched_colocate_read,
|
||||
.write_u64 = sched_colocate_write,
|
||||
},
|
||||
#endif
|
||||
{
|
||||
.name = "boost",
|
||||
.read_s64 = boost_read,
|
||||
@@ -615,6 +806,7 @@ schedtune_css_alloc(struct cgroup_subsys_state *parent_css)
|
||||
goto out;
|
||||
|
||||
/* Initialize per CPUs boost group support */
|
||||
init_sched_boost(st);
|
||||
schedtune_boostgroup_init(st, idx);
|
||||
|
||||
return &st->css;
|
||||
@@ -653,6 +845,7 @@ schedtune_css_free(struct cgroup_subsys_state *css)
|
||||
struct cgroup_subsys schedtune_cgrp_subsys = {
|
||||
.css_alloc = schedtune_css_alloc,
|
||||
.css_free = schedtune_css_free,
|
||||
.attach = schedtune_attach,
|
||||
.can_attach = schedtune_can_attach,
|
||||
.cancel_attach = schedtune_cancel_attach,
|
||||
.legacy_cftypes = files,
|
||||
|
||||
3420
kernel/sched/walt.c
Normal file
3420
kernel/sched/walt.c
Normal file
File diff suppressed because it is too large
Load Diff
387
kernel/sched/walt.h
Normal file
387
kernel/sched/walt.h
Normal file
@@ -0,0 +1,387 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2016-2018, The Linux Foundation. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef __WALT_H
|
||||
#define __WALT_H
|
||||
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
|
||||
#include <linux/sched/sysctl.h>
|
||||
|
||||
#define WINDOW_STATS_RECENT 0
|
||||
#define WINDOW_STATS_MAX 1
|
||||
#define WINDOW_STATS_MAX_RECENT_AVG 2
|
||||
#define WINDOW_STATS_AVG 3
|
||||
#define WINDOW_STATS_INVALID_POLICY 4
|
||||
|
||||
#define EXITING_TASK_MARKER 0xdeaddead
|
||||
|
||||
#define FREQ_REPORT_MAX_CPU_LOAD_TOP_TASK 0
|
||||
#define FREQ_REPORT_CPU_LOAD 1
|
||||
#define FREQ_REPORT_TOP_TASK 2
|
||||
|
||||
#define for_each_related_thread_group(grp) \
|
||||
list_for_each_entry(grp, &active_related_thread_groups, list)
|
||||
|
||||
#define SCHED_NEW_TASK_WINDOWS 5
|
||||
|
||||
extern unsigned int sched_ravg_window;
|
||||
extern unsigned int max_possible_efficiency;
|
||||
extern unsigned int min_possible_efficiency;
|
||||
extern unsigned int max_possible_freq;
|
||||
extern unsigned int sched_major_task_runtime;
|
||||
extern unsigned int __read_mostly sched_init_task_load_windows;
|
||||
extern unsigned int __read_mostly sched_load_granule;
|
||||
|
||||
extern struct mutex cluster_lock;
|
||||
extern rwlock_t related_thread_group_lock;
|
||||
extern __read_mostly unsigned int sched_ravg_hist_size;
|
||||
extern __read_mostly unsigned int sched_freq_aggregate;
|
||||
extern __read_mostly int sched_freq_aggregate_threshold;
|
||||
extern __read_mostly unsigned int sched_window_stats_policy;
|
||||
extern __read_mostly unsigned int sched_group_upmigrate;
|
||||
extern __read_mostly unsigned int sched_group_downmigrate;
|
||||
|
||||
extern struct sched_cluster init_cluster;
|
||||
|
||||
extern void update_task_ravg(struct task_struct *p, struct rq *rq, int event,
|
||||
u64 wallclock, u64 irqtime);
|
||||
|
||||
extern unsigned int walt_big_tasks(int cpu);
|
||||
|
||||
static inline void
|
||||
inc_nr_big_task(struct walt_sched_stats *stats, struct task_struct *p)
|
||||
{
|
||||
if (sched_disable_window_stats)
|
||||
return;
|
||||
|
||||
if (p->misfit)
|
||||
stats->nr_big_tasks++;
|
||||
}
|
||||
|
||||
static inline void
|
||||
dec_nr_big_task(struct walt_sched_stats *stats, struct task_struct *p)
|
||||
{
|
||||
if (sched_disable_window_stats)
|
||||
return;
|
||||
|
||||
if (p->misfit)
|
||||
stats->nr_big_tasks--;
|
||||
|
||||
BUG_ON(stats->nr_big_tasks < 0);
|
||||
}
|
||||
|
||||
static inline void
|
||||
walt_adjust_nr_big_tasks(struct rq *rq, int delta, bool inc)
|
||||
{
|
||||
if (sched_disable_window_stats)
|
||||
return;
|
||||
|
||||
//sched_update_nr_prod(cpu_of(rq), 0, true); SATYA: Cross check
|
||||
rq->walt_stats.nr_big_tasks += inc ? delta : -delta;
|
||||
|
||||
BUG_ON(rq->walt_stats.nr_big_tasks < 0);
|
||||
}
|
||||
|
||||
static inline void
|
||||
fixup_cumulative_runnable_avg(struct walt_sched_stats *stats,
|
||||
s64 demand_scaled_delta,
|
||||
s64 pred_demand_scaled_delta)
|
||||
{
|
||||
if (sched_disable_window_stats)
|
||||
return;
|
||||
|
||||
stats->cumulative_runnable_avg_scaled += demand_scaled_delta;
|
||||
BUG_ON((s64)stats->cumulative_runnable_avg_scaled < 0);
|
||||
|
||||
stats->pred_demands_sum_scaled += pred_demand_scaled_delta;
|
||||
BUG_ON((s64)stats->pred_demands_sum_scaled < 0);
|
||||
}
|
||||
|
||||
static inline void
|
||||
walt_inc_cumulative_runnable_avg(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
if (sched_disable_window_stats)
|
||||
return;
|
||||
|
||||
fixup_cumulative_runnable_avg(&rq->walt_stats, p->ravg.demand_scaled,
|
||||
p->ravg.pred_demand_scaled);
|
||||
|
||||
/*
|
||||
* Add a task's contribution to the cumulative window demand when
|
||||
*
|
||||
* (1) task is enqueued with on_rq = 1 i.e migration,
|
||||
* prio/cgroup/class change.
|
||||
* (2) task is waking for the first time in this window.
|
||||
*/
|
||||
if (p->on_rq || (p->last_sleep_ts < rq->window_start))
|
||||
walt_fixup_cum_window_demand(rq, p->ravg.demand_scaled);
|
||||
}
|
||||
|
||||
static inline void
|
||||
walt_dec_cumulative_runnable_avg(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
if (sched_disable_window_stats)
|
||||
return;
|
||||
|
||||
fixup_cumulative_runnable_avg(&rq->walt_stats,
|
||||
-(s64)p->ravg.demand_scaled,
|
||||
-(s64)p->ravg.pred_demand_scaled);
|
||||
|
||||
/*
|
||||
* on_rq will be 1 for sleeping tasks. So check if the task
|
||||
* is migrating or dequeuing in RUNNING state to change the
|
||||
* prio/cgroup/class.
|
||||
*/
|
||||
if (task_on_rq_migrating(p) || p->state == TASK_RUNNING)
|
||||
walt_fixup_cum_window_demand(rq, -(s64)p->ravg.demand_scaled);
|
||||
}
|
||||
|
||||
extern void fixup_walt_sched_stats_common(struct rq *rq, struct task_struct *p,
|
||||
u16 updated_demand_scaled,
|
||||
u16 updated_pred_demand_scaled);
|
||||
extern void inc_rq_walt_stats(struct rq *rq, struct task_struct *p);
|
||||
extern void dec_rq_walt_stats(struct rq *rq, struct task_struct *p);
|
||||
extern void fixup_busy_time(struct task_struct *p, int new_cpu);
|
||||
extern void init_new_task_load(struct task_struct *p);
|
||||
extern void mark_task_starting(struct task_struct *p);
|
||||
extern void set_window_start(struct rq *rq);
|
||||
void account_irqtime(int cpu, struct task_struct *curr, u64 delta,
|
||||
u64 wallclock);
|
||||
extern bool do_pl_notif(struct rq *rq);
|
||||
|
||||
#define SCHED_HIGH_IRQ_TIMEOUT 3
|
||||
static inline u64 sched_irqload(int cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
s64 delta;
|
||||
|
||||
delta = get_jiffies_64() - rq->irqload_ts;
|
||||
/*
|
||||
* Current context can be preempted by irq and rq->irqload_ts can be
|
||||
* updated by irq context so that delta can be negative.
|
||||
* But this is okay and we can safely return as this means there
|
||||
* was recent irq occurrence.
|
||||
*/
|
||||
|
||||
if (delta < SCHED_HIGH_IRQ_TIMEOUT)
|
||||
return rq->avg_irqload;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int sched_cpu_high_irqload(int cpu)
|
||||
{
|
||||
return sched_irqload(cpu) >= sysctl_sched_cpu_high_irqload;
|
||||
}
|
||||
|
||||
static inline int exiting_task(struct task_struct *p)
|
||||
{
|
||||
return (p->ravg.sum_history[0] == EXITING_TASK_MARKER);
|
||||
}
|
||||
|
||||
static inline struct sched_cluster *cpu_cluster(int cpu)
|
||||
{
|
||||
return cpu_rq(cpu)->cluster;
|
||||
}
|
||||
|
||||
static inline u64
|
||||
scale_load_to_freq(u64 load, unsigned int src_freq, unsigned int dst_freq)
|
||||
{
|
||||
return div64_u64(load * (u64)src_freq, (u64)dst_freq);
|
||||
}
|
||||
|
||||
static inline bool is_new_task(struct task_struct *p)
|
||||
{
|
||||
return p->ravg.active_windows < SCHED_NEW_TASK_WINDOWS;
|
||||
}
|
||||
|
||||
static inline void clear_top_tasks_table(u8 *table)
|
||||
{
|
||||
memset(table, 0, NUM_LOAD_INDICES * sizeof(u8));
|
||||
}
|
||||
|
||||
extern void update_cluster_load_subtractions(struct task_struct *p,
|
||||
int cpu, u64 ws, bool new_task);
|
||||
extern void sched_account_irqstart(int cpu, struct task_struct *curr,
|
||||
u64 wallclock);
|
||||
|
||||
static inline unsigned int max_task_load(void)
|
||||
{
|
||||
return sched_ravg_window;
|
||||
}
|
||||
|
||||
static inline u32 cpu_cycles_to_freq(u64 cycles, u64 period)
|
||||
{
|
||||
return div64_u64(cycles, period);
|
||||
}
|
||||
|
||||
static inline unsigned int cpu_cur_freq(int cpu)
|
||||
{
|
||||
return cpu_rq(cpu)->cluster->cur_freq;
|
||||
}
|
||||
|
||||
static inline unsigned int sched_cpu_legacy_freq(int cpu)
|
||||
{
|
||||
unsigned long curr_cap = arch_scale_freq_capacity(cpu);
|
||||
|
||||
return (curr_cap * (u64) cpu_rq(cpu)->cluster->max_possible_freq) >>
|
||||
SCHED_CAPACITY_SHIFT;
|
||||
}
|
||||
|
||||
static inline void
|
||||
move_list(struct list_head *dst, struct list_head *src, bool sync_rcu)
|
||||
{
|
||||
struct list_head *first, *last;
|
||||
|
||||
first = src->next;
|
||||
last = src->prev;
|
||||
|
||||
if (sync_rcu) {
|
||||
INIT_LIST_HEAD_RCU(src);
|
||||
synchronize_rcu();
|
||||
}
|
||||
|
||||
first->prev = dst;
|
||||
dst->prev = last;
|
||||
last->next = dst;
|
||||
|
||||
/* Ensure list sanity before making the head visible to all CPUs. */
|
||||
smp_mb();
|
||||
dst->next = first;
|
||||
}
|
||||
|
||||
extern void reset_task_stats(struct task_struct *p);
|
||||
extern void update_cluster_topology(void);
|
||||
|
||||
extern struct list_head cluster_head;
|
||||
#define for_each_sched_cluster(cluster) \
|
||||
list_for_each_entry_rcu(cluster, &cluster_head, list)
|
||||
|
||||
extern void init_clusters(void);
|
||||
|
||||
extern void clear_top_tasks_bitmap(unsigned long *bitmap);
|
||||
|
||||
extern void sched_account_irqtime(int cpu, struct task_struct *curr,
|
||||
u64 delta, u64 wallclock);
|
||||
|
||||
static inline void assign_cluster_ids(struct list_head *head)
|
||||
{
|
||||
struct sched_cluster *cluster;
|
||||
int pos = 0;
|
||||
|
||||
list_for_each_entry(cluster, head, list) {
|
||||
cluster->id = pos;
|
||||
sched_cluster[pos++] = cluster;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int same_cluster(int src_cpu, int dst_cpu)
|
||||
{
|
||||
return cpu_rq(src_cpu)->cluster == cpu_rq(dst_cpu)->cluster;
|
||||
}
|
||||
|
||||
void sort_clusters(void);
|
||||
|
||||
void walt_irq_work(struct irq_work *irq_work);
|
||||
|
||||
void walt_sched_init_rq(struct rq *rq);
|
||||
|
||||
static inline void walt_update_last_enqueue(struct task_struct *p)
|
||||
{
|
||||
p->last_enqueued_ts = sched_ktime_clock();
|
||||
}
|
||||
extern void walt_rotate_work_init(void);
|
||||
extern void walt_rotation_checkpoint(int nr_big);
|
||||
extern unsigned int walt_rotation_enabled;
|
||||
extern unsigned int walt_get_default_coloc_group_load(void);
|
||||
|
||||
#else /* CONFIG_SCHED_WALT */
|
||||
|
||||
static inline void walt_sched_init_rq(struct rq *rq) { }
|
||||
|
||||
static inline void walt_rotate_work_init(void) { }
|
||||
static inline void walt_rotation_checkpoint(int nr_big) { }
|
||||
static inline void walt_update_last_enqueue(struct task_struct *p) { }
|
||||
static inline unsigned int walt_get_default_coloc_group_load(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void update_task_ravg(struct task_struct *p, struct rq *rq,
|
||||
int event, u64 wallclock, u64 irqtime) { }
|
||||
static inline void walt_inc_cumulative_runnable_avg(struct rq *rq,
|
||||
struct task_struct *p)
|
||||
{
|
||||
}
|
||||
|
||||
static inline unsigned int walt_big_tasks(int cpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void walt_adjust_nr_big_tasks(struct rq *rq,
|
||||
int delta, bool inc)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void inc_nr_big_task(struct walt_sched_stats *stats,
|
||||
struct task_struct *p)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void dec_nr_big_task(struct walt_sched_stats *stats,
|
||||
struct task_struct *p)
|
||||
{
|
||||
}
|
||||
static inline void walt_dec_cumulative_runnable_avg(struct rq *rq,
|
||||
struct task_struct *p)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void fixup_busy_time(struct task_struct *p, int new_cpu) { }
|
||||
static inline void init_new_task_load(struct task_struct *p)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void mark_task_starting(struct task_struct *p) { }
|
||||
static inline void set_window_start(struct rq *rq) { }
|
||||
static inline int sched_cpu_high_irqload(int cpu) { return 0; }
|
||||
|
||||
static inline void sched_account_irqstart(int cpu, struct task_struct *curr,
|
||||
u64 wallclock)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void update_cluster_topology(void) { }
|
||||
static inline void init_clusters(void) {}
|
||||
static inline void sched_account_irqtime(int cpu, struct task_struct *curr,
|
||||
u64 delta, u64 wallclock)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int same_cluster(int src_cpu, int dst_cpu) { return 1; }
|
||||
static inline bool do_pl_notif(struct rq *rq) { return false; }
|
||||
|
||||
static inline void
|
||||
inc_rq_walt_stats(struct rq *rq, struct task_struct *p) { }
|
||||
|
||||
static inline void
|
||||
dec_rq_walt_stats(struct rq *rq, struct task_struct *p) { }
|
||||
|
||||
static inline void
|
||||
fixup_walt_sched_stats_common(struct rq *rq, struct task_struct *p,
|
||||
u16 updated_demand_scaled,
|
||||
u16 updated_pred_demand_scaled)
|
||||
{
|
||||
}
|
||||
|
||||
static inline u64 sched_irqload(int cpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_SCHED_WALT */
|
||||
|
||||
#endif
|
||||
@@ -121,14 +121,19 @@ static int sixty = 60;
|
||||
#endif
|
||||
|
||||
static int __maybe_unused neg_one = -1;
|
||||
static int __maybe_unused neg_three = -3;
|
||||
|
||||
static int zero;
|
||||
static int __maybe_unused one = 1;
|
||||
static int __maybe_unused two = 2;
|
||||
static int __maybe_unused three = 3;
|
||||
static int __maybe_unused four = 4;
|
||||
static unsigned long one_ul = 1;
|
||||
static int one_hundred = 100;
|
||||
static int one_thousand = 1000;
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
static int two_million = 2000000;
|
||||
#endif
|
||||
#ifdef CONFIG_PRINTK
|
||||
static int ten_thousand = 10000;
|
||||
#endif
|
||||
@@ -320,6 +325,77 @@ static struct ctl_table kern_table[] = {
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
#ifdef CONFIG_SCHED_WALT
|
||||
{
|
||||
.procname = "sched_cpu_high_irqload",
|
||||
.data = &sysctl_sched_cpu_high_irqload,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "sched_group_upmigrate",
|
||||
.data = &sysctl_sched_group_upmigrate_pct,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = walt_proc_update_handler,
|
||||
.extra1 = &sysctl_sched_group_downmigrate_pct,
|
||||
},
|
||||
{
|
||||
.procname = "sched_group_downmigrate",
|
||||
.data = &sysctl_sched_group_downmigrate_pct,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = walt_proc_update_handler,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &sysctl_sched_group_upmigrate_pct,
|
||||
},
|
||||
{
|
||||
.procname = "sched_boost",
|
||||
.data = &sysctl_sched_boost,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = sched_boost_handler,
|
||||
.extra1 = &neg_three,
|
||||
.extra2 = &three,
|
||||
},
|
||||
{
|
||||
.procname = "sched_walt_rotate_big_tasks",
|
||||
.data = &sysctl_sched_walt_rotate_big_tasks,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &one,
|
||||
},
|
||||
{
|
||||
.procname = "sched_min_task_util_for_boost",
|
||||
.data = &sysctl_sched_min_task_util_for_boost,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &one_thousand,
|
||||
},
|
||||
{
|
||||
.procname = "sched_min_task_util_for_colocation",
|
||||
.data = &sysctl_sched_min_task_util_for_colocation,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &one_thousand,
|
||||
},
|
||||
{
|
||||
.procname = "sched_little_cluster_coloc_fmin_khz",
|
||||
.data = &sysctl_sched_little_cluster_coloc_fmin_khz,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = sched_little_cluster_coloc_fmin_khz_handler,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &two_million,
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
{
|
||||
.procname = "sched_cstate_aware",
|
||||
|
||||
Reference in New Issue
Block a user