summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--kernel/sched.c28
1 files changed, 25 insertions, 3 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index d985956bbfbd..f33788b77119 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3208,6 +3208,10 @@ static void pull_task(struct rq *src_rq, struct task_struct *p,
set_task_cpu(p, this_cpu);
activate_task(this_rq, p, 0);
check_preempt_curr(this_rq, p, 0);
+
+ /* re-arm NEWIDLE balancing when moving tasks */
+ src_rq->avg_idle = this_rq->avg_idle = 2*sysctl_sched_migration_cost;
+ this_rq->idle_stamp = 0;
}
/*
@@ -3425,12 +3429,14 @@ struct sd_lb_stats {
unsigned long this_load;
unsigned long this_load_per_task;
unsigned long this_nr_running;
+ unsigned long this_has_capacity;
/* Statistics of the busiest group */
unsigned long max_load;
unsigned long busiest_load_per_task;
unsigned long busiest_nr_running;
unsigned long busiest_group_capacity;
+ unsigned long busiest_has_capacity;
int group_imb; /* Is there imbalance in this sd */
#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
@@ -3453,6 +3459,7 @@ struct sg_lb_stats {
unsigned long sum_weighted_load; /* Weighted load of group's tasks */
unsigned long group_capacity;
int group_imb; /* Is there an imbalance in the group ? */
+ int group_has_capacity; /* Is there extra capacity in the group? */
};
/**
@@ -3822,6 +3829,9 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
sgs->group_imb = 1;
sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
+
+ if (sgs->group_capacity > sgs->sum_nr_running)
+ sgs->group_has_capacity = 1;
}
/**
@@ -3878,6 +3888,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
sds->this = group;
sds->this_nr_running = sgs.sum_nr_running;
sds->this_load_per_task = sgs.sum_weighted_load;
+ sds->this_has_capacity = sgs.group_has_capacity;
} else if (sgs.avg_load > sds->max_load &&
(sgs.sum_nr_running > sgs.group_capacity ||
sgs.group_imb)) {
@@ -3886,6 +3897,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
sds->busiest_nr_running = sgs.sum_nr_running;
sds->busiest_group_capacity = sgs.group_capacity;
sds->busiest_load_per_task = sgs.sum_weighted_load;
+ sds->busiest_has_capacity = sgs.group_has_capacity;
sds->group_imb = sgs.group_imb;
}
@@ -4031,6 +4043,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
return fix_small_imbalance(sds, this_cpu, imbalance);
}
+
/******* find_busiest_group() helpers end here *********************/
/**
@@ -4082,6 +4095,11 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
* 4) This group is more busy than the avg busieness at this
* sched_domain.
* 5) The imbalance is within the specified limit.
+ *
+ * Note: when doing newidle balance, if the local group has excess
+ * capacity (i.e. nr_running < group_capacity) and the busiest group
+ * does not have any capacity, we force a load balance to pull tasks
+ * to the local group. In this case, we skip past checks 3, 4 and 5.
*/
if (balance && !(*balance))
goto ret;
@@ -4089,6 +4107,11 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
if (!sds.busiest || sds.busiest_nr_running == 0)
goto out_balanced;
+ /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */
+ if (idle == CPU_NEWLY_IDLE && sds.this_has_capacity &&
+ !sds.busiest_has_capacity)
+ goto force_balance;
+
if (sds.this_load >= sds.max_load)
goto out_balanced;
@@ -4100,6 +4123,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
goto out_balanced;
+force_balance:
/* Looks like there is an imbalance. Compute it */
calculate_imbalance(&sds, this_cpu, imbalance);
return sds.busiest;
@@ -4515,10 +4539,8 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
interval = msecs_to_jiffies(sd->balance_interval);
if (time_after(next_balance, sd->last_balance + interval))
next_balance = sd->last_balance + interval;
- if (pulled_task) {
- this_rq->idle_stamp = 0;
+ if (pulled_task)
break;
- }
}
if (pulled_task || time_after(jiffies, this_rq->next_balance)) {
/*