summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorStefan Agner <stefan.agner@toradex.com>2019-06-03 10:59:09 +0200
committerStefan Agner <stefan.agner@toradex.com>2019-06-03 10:59:09 +0200
commit2115c1bc6e396d5ffe9ecbe394d1c50a6e25c404 (patch)
tree20fff445991414f23c0a29237940b75cd756de86 /kernel
parentb794ea49ba3816c0d5cf05506964a8e69ce4efa3 (diff)
parent3f7c1cab1a61108821cf47dda8a32ed25cc3588b (diff)
Merge tag 'v5.0.19' into toradex_5.0.ytoradex_5.0.y
This is the 5.0.19 stable release
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/hashtab.c23
-rw-r--r--kernel/bpf/inode.c2
-rw-r--r--kernel/bpf/syscall.c5
-rw-r--r--kernel/fork.c31
-rw-r--r--kernel/locking/rwsem-xadd.c44
-rw-r--r--kernel/sched/cpufreq_schedutil.c1
-rw-r--r--kernel/trace/trace_events.c3
-rw-r--r--kernel/trace/trace_probe.c13
8 files changed, 90 insertions, 32 deletions
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index f9274114c88d..be5747a5337a 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -527,18 +527,30 @@ static u32 htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
return insn - insn_buf;
}
-static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key)
+static __always_inline void *__htab_lru_map_lookup_elem(struct bpf_map *map,
+ void *key, const bool mark)
{
struct htab_elem *l = __htab_map_lookup_elem(map, key);
if (l) {
- bpf_lru_node_set_ref(&l->lru_node);
+ if (mark)
+ bpf_lru_node_set_ref(&l->lru_node);
return l->key + round_up(map->key_size, 8);
}
return NULL;
}
+static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key)
+{
+ return __htab_lru_map_lookup_elem(map, key, true);
+}
+
+static void *htab_lru_map_lookup_elem_sys(struct bpf_map *map, void *key)
+{
+ return __htab_lru_map_lookup_elem(map, key, false);
+}
+
static u32 htab_lru_map_gen_lookup(struct bpf_map *map,
struct bpf_insn *insn_buf)
{
@@ -1215,6 +1227,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
.map_free = htab_map_free,
.map_get_next_key = htab_map_get_next_key,
.map_lookup_elem = htab_lru_map_lookup_elem,
+ .map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,
.map_update_elem = htab_lru_map_update_elem,
.map_delete_elem = htab_lru_map_delete_elem,
.map_gen_lookup = htab_lru_map_gen_lookup,
@@ -1246,7 +1259,6 @@ static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key)
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
{
- struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct htab_elem *l;
void __percpu *pptr;
int ret = -ENOENT;
@@ -1262,8 +1274,9 @@ int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
l = __htab_map_lookup_elem(map, key);
if (!l)
goto out;
- if (htab_is_lru(htab))
- bpf_lru_node_set_ref(&l->lru_node);
+ /* We do not mark LRU map element here in order to not mess up
+ * eviction heuristics when user space does a map walk.
+ */
pptr = htab_elem_get_ptr(l, map->key_size);
for_each_possible_cpu(cpu) {
bpf_long_memcpy(value + off,
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 4a8f390a2b82..dc9d7ac8228d 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -518,7 +518,7 @@ out:
static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type)
{
struct bpf_prog *prog;
- int ret = inode_permission(inode, MAY_READ | MAY_WRITE);
+ int ret = inode_permission(inode, MAY_READ);
if (ret)
return ERR_PTR(ret);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 84470d1480aa..07d9b76e90ce 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -738,7 +738,10 @@ static int map_lookup_elem(union bpf_attr *attr)
err = map->ops->map_peek_elem(map, value);
} else {
rcu_read_lock();
- ptr = map->ops->map_lookup_elem(map, key);
+ if (map->ops->map_lookup_elem_sys_only)
+ ptr = map->ops->map_lookup_elem_sys_only(map, key);
+ else
+ ptr = map->ops->map_lookup_elem(map, key);
if (IS_ERR(ptr)) {
err = PTR_ERR(ptr);
} else if (!ptr) {
diff --git a/kernel/fork.c b/kernel/fork.c
index b69248e6f0e0..95fd41e92031 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -953,6 +953,15 @@ static void mm_init_aio(struct mm_struct *mm)
#endif
}
+static __always_inline void mm_clear_owner(struct mm_struct *mm,
+ struct task_struct *p)
+{
+#ifdef CONFIG_MEMCG
+ if (mm->owner == p)
+ WRITE_ONCE(mm->owner, NULL);
+#endif
+}
+
static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
{
#ifdef CONFIG_MEMCG
@@ -1332,6 +1341,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk)
free_pt:
/* don't put binfmt in mmput, we haven't got module yet */
mm->binfmt = NULL;
+ mm_init_owner(mm, NULL);
mmput(mm);
fail_nomem:
@@ -1663,6 +1673,21 @@ static inline void rcu_copy_process(struct task_struct *p)
#endif /* #ifdef CONFIG_TASKS_RCU */
}
+static void __delayed_free_task(struct rcu_head *rhp)
+{
+ struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
+
+ free_task(tsk);
+}
+
+static __always_inline void delayed_free_task(struct task_struct *tsk)
+{
+ if (IS_ENABLED(CONFIG_MEMCG))
+ call_rcu(&tsk->rcu, __delayed_free_task);
+ else
+ free_task(tsk);
+}
+
/*
* This creates a new process as a copy of the old one,
* but does not actually start it yet.
@@ -2124,8 +2149,10 @@ bad_fork_cleanup_io:
bad_fork_cleanup_namespaces:
exit_task_namespaces(p);
bad_fork_cleanup_mm:
- if (p->mm)
+ if (p->mm) {
+ mm_clear_owner(p->mm, p);
mmput(p->mm);
+ }
bad_fork_cleanup_signal:
if (!(clone_flags & CLONE_THREAD))
free_signal_struct(p->signal);
@@ -2156,7 +2183,7 @@ bad_fork_cleanup_count:
bad_fork_free:
p->state = TASK_DEAD;
put_task_stack(p);
- free_task(p);
+ delayed_free_task(p);
fork_out:
spin_lock_irq(&current->sighand->siglock);
hlist_del_init(&delayed.node);
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 50d9af615dc4..115860164c36 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -130,6 +130,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
{
struct rwsem_waiter *waiter, *tmp;
long oldcount, woken = 0, adjustment = 0;
+ struct list_head wlist;
/*
* Take a peek at the queue head waiter such that we can determine
@@ -188,18 +189,42 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
* of the queue. We know that woken will be at least 1 as we accounted
* for above. Note we increment the 'active part' of the count by the
* number of readers before waking any processes up.
+ *
+ * We have to do wakeup in 2 passes to prevent the possibility that
+ * the reader count may be decremented before it is incremented. It
+ * is because the to-be-woken waiter may not have slept yet. So it
+ * may see waiter->task got cleared, finish its critical section and
+ * do an unlock before the reader count increment.
+ *
+ * 1) Collect the read-waiters in a separate list, count them and
+ * fully increment the reader count in rwsem.
+ * 2) For each waiters in the new list, clear waiter->task and
+ * put them into wake_q to be woken up later.
*/
- list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
- struct task_struct *tsk;
-
+ list_for_each_entry(waiter, &sem->wait_list, list) {
if (waiter->type == RWSEM_WAITING_FOR_WRITE)
break;
woken++;
- tsk = waiter->task;
+ }
+ list_cut_before(&wlist, &sem->wait_list, &waiter->list);
+
+ adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
+ if (list_empty(&sem->wait_list)) {
+ /* hit end of list above */
+ adjustment -= RWSEM_WAITING_BIAS;
+ }
+
+ if (adjustment)
+ atomic_long_add(adjustment, &sem->count);
+
+ /* 2nd pass */
+ list_for_each_entry_safe(waiter, tmp, &wlist, list) {
+ struct task_struct *tsk;
+ tsk = waiter->task;
get_task_struct(tsk);
- list_del(&waiter->list);
+
/*
* Ensure calling get_task_struct() before setting the reader
* waiter to nil such that rwsem_down_read_failed() cannot
@@ -215,15 +240,6 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
/* wake_q_add() already take the task ref */
put_task_struct(tsk);
}
-
- adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
- if (list_empty(&sem->wait_list)) {
- /* hit end of list above */
- adjustment -= RWSEM_WAITING_BIAS;
- }
-
- if (adjustment)
- atomic_long_add(adjustment, &sem->count);
}
/*
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 1ccf77f6d346..d4ab9245e016 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -771,6 +771,7 @@ out:
return 0;
fail:
+ kobject_put(&tunables->attr_set.kobj);
policy->governor_data = NULL;
sugov_tunables_free(tunables);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 5b3b0c3c8a47..d910e36c34b5 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1318,9 +1318,6 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
char buf[32];
int len;
- if (*ppos)
- return 0;
-
if (unlikely(!id))
return -ENODEV;
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 9962cb5da8ac..44f078cda0ac 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -405,13 +405,14 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
return -E2BIG;
}
}
- /*
- * The default type of $comm should be "string", and it can't be
- * dereferenced.
- */
- if (!t && strcmp(arg, "$comm") == 0)
+
+ /* Since $comm can not be dereferred, we can find $comm by strcmp */
+ if (strcmp(arg, "$comm") == 0) {
+ /* The type of $comm must be "string", and not an array. */
+ if (parg->count || (t && strcmp(t, "string")))
+ return -EINVAL;
parg->type = find_fetch_type("string");
- else
+ } else
parg->type = find_fetch_type(t);
if (!parg->type) {
pr_info("Unsupported type: %s\n", t);