summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.hz2
-rw-r--r--kernel/Makefile50
-rw-r--r--kernel/audit.c153
-rw-r--r--kernel/audit.h3
-rw-r--r--kernel/auditfilter.c3
-rw-r--r--kernel/auditsc.c133
-rw-r--r--kernel/bounds.c2
-rw-r--r--kernel/cgroup.c42
-rw-r--r--kernel/cpuset.c8
-rw-r--r--kernel/events/core.c8
-rw-r--r--kernel/extable.c4
-rw-r--r--kernel/fork.c6
-rw-r--r--kernel/hung_task.c11
-rw-r--r--kernel/irq/chip.c2
-rw-r--r--kernel/irq/manage.c2
-rw-r--r--kernel/irq/settings.h7
-rw-r--r--kernel/irq/spurious.c12
-rw-r--r--kernel/kexec.c2
-rw-r--r--kernel/modsign_certificate.S12
-rw-r--r--kernel/modsign_pubkey.c104
-rw-r--r--kernel/module-internal.h2
-rw-r--r--kernel/module.c66
-rw-r--r--kernel/module_signing.c11
-rw-r--r--kernel/padata.c9
-rw-r--r--kernel/power/snapshot.c3
-rw-r--r--kernel/power/user.c1
-rw-r--r--kernel/rcu/tiny.c2
-rw-r--r--kernel/rcu/tree.c4
-rw-r--r--kernel/smp.c5
-rw-r--r--kernel/softirq.c131
-rw-r--r--kernel/system_certificates.S10
-rw-r--r--kernel/system_keyring.c105
-rw-r--r--kernel/taskstats.c38
-rw-r--r--kernel/trace/ftrace.c225
-rw-r--r--kernel/trace/trace.c82
-rw-r--r--kernel/trace/trace.h50
-rw-r--r--kernel/trace/trace_branch.c2
-rw-r--r--kernel/trace/trace_event_perf.c8
-rw-r--r--kernel/trace/trace_events.c32
-rw-r--r--kernel/trace/trace_events_filter.c218
-rw-r--r--kernel/trace/trace_export.c2
-rw-r--r--kernel/trace/trace_functions_graph.c82
-rw-r--r--kernel/trace/trace_kprobe.c4
-rw-r--r--kernel/trace/trace_mmiotrace.c4
-rw-r--r--kernel/trace/trace_sched_switch.c4
-rw-r--r--kernel/trace/trace_stat.c41
-rw-r--r--kernel/trace/trace_syscalls.c42
-rw-r--r--kernel/trace/trace_uprobe.c3
-rw-r--r--kernel/up.c11
-rw-r--r--kernel/user.c4
-rw-r--r--kernel/user_namespace.c6
-rw-r--r--kernel/workqueue.c50
52 files changed, 1117 insertions, 706 deletions
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 94fabd534b03..2a202a846757 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -55,4 +55,4 @@ config HZ
default 1000 if HZ_1000
config SCHED_HRTICK
- def_bool HIGH_RES_TIMERS && (!SMP || USE_GENERIC_SMP_HELPERS)
+ def_bool HIGH_RES_TIMERS
diff --git a/kernel/Makefile b/kernel/Makefile
index 09a9c94f42bd..bbaf7d59c1bb 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -41,8 +41,9 @@ ifneq ($(CONFIG_SMP),y)
obj-y += up.o
endif
obj-$(CONFIG_UID16) += uid16.o
+obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o
obj-$(CONFIG_MODULES) += module.o
-obj-$(CONFIG_MODULE_SIG) += module_signing.o modsign_pubkey.o modsign_certificate.o
+obj-$(CONFIG_MODULE_SIG) += module_signing.o
obj-$(CONFIG_KALLSYMS) += kallsyms.o
obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
obj-$(CONFIG_KEXEC) += kexec.o
@@ -122,19 +123,52 @@ targets += timeconst.h
$(obj)/timeconst.h: $(obj)/hz.bc $(src)/timeconst.bc FORCE
$(call if_changed,bc)
-ifeq ($(CONFIG_MODULE_SIG),y)
+###############################################################################
+#
+# Roll all the X.509 certificates that we can find together and pull them into
+# the kernel so that they get loaded into the system trusted keyring during
+# boot.
#
-# Pull the signing certificate and any extra certificates into the kernel
+# We look in the source root and the build root for all files whose name ends
+# in ".x509". Unfortunately, this will generate duplicate filenames, so we
+# have make canonicalise the pathnames and then sort them to discard the
+# duplicates.
#
+###############################################################################
+ifeq ($(CONFIG_SYSTEM_TRUSTED_KEYRING),y)
+X509_CERTIFICATES-y := $(wildcard *.x509) $(wildcard $(srctree)/*.x509)
+X509_CERTIFICATES-$(CONFIG_MODULE_SIG) += signing_key.x509
+X509_CERTIFICATES := $(sort $(foreach CERT,$(X509_CERTIFICATES-y), \
+ $(or $(realpath $(CERT)),$(CERT))))
+
+ifeq ($(X509_CERTIFICATES),)
+$(warning *** No X.509 certificates found ***)
+endif
+
+ifneq ($(wildcard $(obj)/.x509.list),)
+ifneq ($(shell cat $(obj)/.x509.list),$(X509_CERTIFICATES))
+$(info X.509 certificate list changed)
+$(shell rm $(obj)/.x509.list)
+endif
+endif
+
+kernel/system_certificates.o: $(obj)/x509_certificate_list
-quiet_cmd_touch = TOUCH $@
- cmd_touch = touch $@
+quiet_cmd_x509certs = CERTS $@
+ cmd_x509certs = cat $(X509_CERTIFICATES) /dev/null >$@ $(foreach X509,$(X509_CERTIFICATES),; echo " - Including cert $(X509)")
-extra_certificates:
- $(call cmd,touch)
+targets += $(obj)/x509_certificate_list
+$(obj)/x509_certificate_list: $(X509_CERTIFICATES) $(obj)/.x509.list
+ $(call if_changed,x509certs)
-kernel/modsign_certificate.o: signing_key.x509 extra_certificates
+targets += $(obj)/.x509.list
+$(obj)/.x509.list:
+ @echo $(X509_CERTIFICATES) >$@
+clean-files := x509_certificate_list .x509.list
+endif
+
+ifeq ($(CONFIG_MODULE_SIG),y)
###############################################################################
#
# If module signing is requested, say by allyesconfig, but a key has not been
diff --git a/kernel/audit.c b/kernel/audit.c
index 7b0e23a740ce..906ae5a0233a 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -60,7 +60,6 @@
#ifdef CONFIG_SECURITY
#include <linux/security.h>
#endif
-#include <net/netlink.h>
#include <linux/freezer.h>
#include <linux/tty.h>
#include <linux/pid_namespace.h>
@@ -140,6 +139,17 @@ static struct task_struct *kauditd_task;
static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait);
static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait);
+static struct audit_features af = {.vers = AUDIT_FEATURE_VERSION,
+ .mask = -1,
+ .features = 0,
+ .lock = 0,};
+
+static char *audit_feature_names[2] = {
+ "only_unset_loginuid",
+ "loginuid_immutable",
+};
+
+
/* Serialize requests from userspace. */
DEFINE_MUTEX(audit_cmd_mutex);
@@ -584,6 +594,8 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
return -EOPNOTSUPP;
case AUDIT_GET:
case AUDIT_SET:
+ case AUDIT_GET_FEATURE:
+ case AUDIT_SET_FEATURE:
case AUDIT_LIST_RULES:
case AUDIT_ADD_RULE:
case AUDIT_DEL_RULE:
@@ -613,7 +625,7 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type)
int rc = 0;
uid_t uid = from_kuid(&init_user_ns, current_uid());
- if (!audit_enabled) {
+ if (!audit_enabled && msg_type != AUDIT_USER_AVC) {
*ab = NULL;
return rc;
}
@@ -628,6 +640,94 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type)
return rc;
}
+int is_audit_feature_set(int i)
+{
+ return af.features & AUDIT_FEATURE_TO_MASK(i);
+}
+
+
+static int audit_get_feature(struct sk_buff *skb)
+{
+ u32 seq;
+
+ seq = nlmsg_hdr(skb)->nlmsg_seq;
+
+ audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_GET, 0, 0,
+ &af, sizeof(af));
+
+ return 0;
+}
+
+static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature,
+ u32 old_lock, u32 new_lock, int res)
+{
+ struct audit_buffer *ab;
+
+ ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_FEATURE_CHANGE);
+ audit_log_format(ab, "feature=%s new=%d old=%d old_lock=%d new_lock=%d res=%d",
+ audit_feature_names[which], !!old_feature, !!new_feature,
+ !!old_lock, !!new_lock, res);
+ audit_log_end(ab);
+}
+
+static int audit_set_feature(struct sk_buff *skb)
+{
+ struct audit_features *uaf;
+ int i;
+
+ BUILD_BUG_ON(AUDIT_LAST_FEATURE + 1 > sizeof(audit_feature_names)/sizeof(audit_feature_names[0]));
+ uaf = nlmsg_data(nlmsg_hdr(skb));
+
+ /* if there is ever a version 2 we should handle that here */
+
+ for (i = 0; i <= AUDIT_LAST_FEATURE; i++) {
+ u32 feature = AUDIT_FEATURE_TO_MASK(i);
+ u32 old_feature, new_feature, old_lock, new_lock;
+
+ /* if we are not changing this feature, move along */
+ if (!(feature & uaf->mask))
+ continue;
+
+ old_feature = af.features & feature;
+ new_feature = uaf->features & feature;
+ new_lock = (uaf->lock | af.lock) & feature;
+ old_lock = af.lock & feature;
+
+ /* are we changing a locked feature? */
+ if ((af.lock & feature) && (new_feature != old_feature)) {
+ audit_log_feature_change(i, old_feature, new_feature,
+ old_lock, new_lock, 0);
+ return -EPERM;
+ }
+ }
+ /* nothing invalid, do the changes */
+ for (i = 0; i <= AUDIT_LAST_FEATURE; i++) {
+ u32 feature = AUDIT_FEATURE_TO_MASK(i);
+ u32 old_feature, new_feature, old_lock, new_lock;
+
+ /* if we are not changing this feature, move along */
+ if (!(feature & uaf->mask))
+ continue;
+
+ old_feature = af.features & feature;
+ new_feature = uaf->features & feature;
+ old_lock = af.lock & feature;
+ new_lock = (uaf->lock | af.lock) & feature;
+
+ if (new_feature != old_feature)
+ audit_log_feature_change(i, old_feature, new_feature,
+ old_lock, new_lock, 1);
+
+ if (new_feature)
+ af.features |= feature;
+ else
+ af.features &= ~feature;
+ af.lock |= new_lock;
+ }
+
+ return 0;
+}
+
static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
u32 seq;
@@ -659,6 +759,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
switch (msg_type) {
case AUDIT_GET:
+ memset(&status_set, 0, sizeof(status_set));
status_set.enabled = audit_enabled;
status_set.failure = audit_failure;
status_set.pid = audit_pid;
@@ -670,7 +771,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
&status_set, sizeof(status_set));
break;
case AUDIT_SET:
- if (nlh->nlmsg_len < sizeof(struct audit_status))
+ if (nlmsg_len(nlh) < sizeof(struct audit_status))
return -EINVAL;
status_get = (struct audit_status *)data;
if (status_get->mask & AUDIT_STATUS_ENABLED) {
@@ -699,6 +800,16 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT)
err = audit_set_backlog_limit(status_get->backlog_limit);
break;
+ case AUDIT_GET_FEATURE:
+ err = audit_get_feature(skb);
+ if (err)
+ return err;
+ break;
+ case AUDIT_SET_FEATURE:
+ err = audit_set_feature(skb);
+ if (err)
+ return err;
+ break;
case AUDIT_USER:
case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG:
case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2:
@@ -715,7 +826,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
}
audit_log_common_recv_msg(&ab, msg_type);
if (msg_type != AUDIT_USER_TTY)
- audit_log_format(ab, " msg='%.1024s'",
+ audit_log_format(ab, " msg='%.*s'",
+ AUDIT_MESSAGE_TEXT_MAX,
(char *)data);
else {
int size;
@@ -818,7 +930,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
struct task_struct *tsk = current;
spin_lock(&tsk->sighand->siglock);
- s.enabled = tsk->signal->audit_tty != 0;
+ s.enabled = tsk->signal->audit_tty;
s.log_passwd = tsk->signal->audit_tty_log_passwd;
spin_unlock(&tsk->sighand->siglock);
@@ -832,7 +944,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
memset(&s, 0, sizeof(s));
/* guard against past and future API changes */
- memcpy(&s, data, min(sizeof(s), (size_t)nlh->nlmsg_len));
+ memcpy(&s, data, min_t(size_t, sizeof(s), nlmsg_len(nlh)));
if ((s.enabled != 0 && s.enabled != 1) ||
(s.log_passwd != 0 && s.log_passwd != 1))
return -EINVAL;
@@ -1067,13 +1179,6 @@ static void wait_for_auditd(unsigned long sleep_time)
remove_wait_queue(&audit_backlog_wait, &wait);
}
-/* Obtain an audit buffer. This routine does locking to obtain the
- * audit buffer, but then no locking is required for calls to
- * audit_log_*format. If the tsk is a task that is currently in a
- * syscall, then the syscall is marked as auditable and an audit record
- * will be written at syscall exit. If there is no associated task, tsk
- * should be NULL. */
-
/**
* audit_log_start - obtain an audit buffer
* @ctx: audit_context (may be NULL)
@@ -1389,7 +1494,7 @@ void audit_log_session_info(struct audit_buffer *ab)
u32 sessionid = audit_get_sessionid(current);
uid_t auid = from_kuid(&init_user_ns, audit_get_loginuid(current));
- audit_log_format(ab, " auid=%u ses=%u\n", auid, sessionid);
+ audit_log_format(ab, " auid=%u ses=%u", auid, sessionid);
}
void audit_log_key(struct audit_buffer *ab, char *key)
@@ -1536,6 +1641,26 @@ void audit_log_name(struct audit_context *context, struct audit_names *n,
}
}
+ /* log the audit_names record type */
+ audit_log_format(ab, " nametype=");
+ switch(n->type) {
+ case AUDIT_TYPE_NORMAL:
+ audit_log_format(ab, "NORMAL");
+ break;
+ case AUDIT_TYPE_PARENT:
+ audit_log_format(ab, "PARENT");
+ break;
+ case AUDIT_TYPE_CHILD_DELETE:
+ audit_log_format(ab, "DELETE");
+ break;
+ case AUDIT_TYPE_CHILD_CREATE:
+ audit_log_format(ab, "CREATE");
+ break;
+ default:
+ audit_log_format(ab, "UNKNOWN");
+ break;
+ }
+
audit_log_fcaps(ab, n);
audit_log_end(ab);
}
diff --git a/kernel/audit.h b/kernel/audit.h
index 123c9b7c3979..b779642b29af 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -197,6 +197,9 @@ struct audit_context {
int fd;
int flags;
} mmap;
+ struct {
+ int argc;
+ } execve;
};
int fds[2];
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index f7aee8be7fb2..51f3fd4c1ed3 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -343,6 +343,7 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f)
case AUDIT_DEVMINOR:
case AUDIT_EXIT:
case AUDIT_SUCCESS:
+ case AUDIT_INODE:
/* bit ops are only useful on syscall args */
if (f->op == Audit_bitmask || f->op == Audit_bittest)
return -EINVAL;
@@ -423,7 +424,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
f->lsm_rule = NULL;
/* Support legacy tests for a valid loginuid */
- if ((f->type == AUDIT_LOGINUID) && (f->val == ~0U)) {
+ if ((f->type == AUDIT_LOGINUID) && (f->val == AUDIT_UID_UNSET)) {
f->type = AUDIT_LOGINUID_SET;
f->val = 0;
}
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 9845cb32b60a..90594c9f7552 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -95,13 +95,6 @@ struct audit_aux_data {
/* Number of target pids per aux struct. */
#define AUDIT_AUX_PIDS 16
-struct audit_aux_data_execve {
- struct audit_aux_data d;
- int argc;
- int envc;
- struct mm_struct *mm;
-};
-
struct audit_aux_data_pids {
struct audit_aux_data d;
pid_t target_pid[AUDIT_AUX_PIDS];
@@ -121,12 +114,6 @@ struct audit_aux_data_bprm_fcaps {
struct audit_cap_data new_pcap;
};
-struct audit_aux_data_capset {
- struct audit_aux_data d;
- pid_t pid;
- struct audit_cap_data cap;
-};
-
struct audit_tree_refs {
struct audit_tree_refs *next;
struct audit_chunk *c[31];
@@ -566,7 +553,7 @@ static int audit_filter_rules(struct task_struct *tsk,
break;
case AUDIT_INODE:
if (name)
- result = (name->ino == f->val);
+ result = audit_comparator(name->ino, f->op, f->val);
else if (ctx) {
list_for_each_entry(n, &ctx->names_list, list) {
if (audit_comparator(n->ino, f->op, f->val)) {
@@ -943,8 +930,10 @@ int audit_alloc(struct task_struct *tsk)
return 0; /* Return if not auditing. */
state = audit_filter_task(tsk, &key);
- if (state == AUDIT_DISABLED)
+ if (state == AUDIT_DISABLED) {
+ clear_tsk_thread_flag(tsk, TIF_SYSCALL_AUDIT);
return 0;
+ }
if (!(context = audit_alloc_context(state))) {
kfree(key);
@@ -1149,20 +1138,16 @@ static int audit_log_single_execve_arg(struct audit_context *context,
}
static void audit_log_execve_info(struct audit_context *context,
- struct audit_buffer **ab,
- struct audit_aux_data_execve *axi)
+ struct audit_buffer **ab)
{
int i, len;
size_t len_sent = 0;
const char __user *p;
char *buf;
- if (axi->mm != current->mm)
- return; /* execve failed, no additional info */
-
- p = (const char __user *)axi->mm->arg_start;
+ p = (const char __user *)current->mm->arg_start;
- audit_log_format(*ab, "argc=%d", axi->argc);
+ audit_log_format(*ab, "argc=%d", context->execve.argc);
/*
* we need some kernel buffer to hold the userspace args. Just
@@ -1176,7 +1161,7 @@ static void audit_log_execve_info(struct audit_context *context,
return;
}
- for (i = 0; i < axi->argc; i++) {
+ for (i = 0; i < context->execve.argc; i++) {
len = audit_log_single_execve_arg(context, ab, i,
&len_sent, p, buf);
if (len <= 0)
@@ -1279,6 +1264,9 @@ static void show_special(struct audit_context *context, int *call_panic)
audit_log_format(ab, "fd=%d flags=0x%x", context->mmap.fd,
context->mmap.flags);
break; }
+ case AUDIT_EXECVE: {
+ audit_log_execve_info(context, &ab);
+ break; }
}
audit_log_end(ab);
}
@@ -1325,11 +1313,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
switch (aux->type) {
- case AUDIT_EXECVE: {
- struct audit_aux_data_execve *axi = (void *)aux;
- audit_log_execve_info(context, &ab, axi);
- break; }
-
case AUDIT_BPRM_FCAPS: {
struct audit_aux_data_bprm_fcaps *axs = (void *)aux;
audit_log_format(ab, "fver=%x", axs->fcap_ver);
@@ -1964,6 +1947,43 @@ int auditsc_get_stamp(struct audit_context *ctx,
/* global counter which is incremented every time something logs in */
static atomic_t session_id = ATOMIC_INIT(0);
+static int audit_set_loginuid_perm(kuid_t loginuid)
+{
+ /* if we are unset, we don't need privs */
+ if (!audit_loginuid_set(current))
+ return 0;
+ /* if AUDIT_FEATURE_LOGINUID_IMMUTABLE means never ever allow a change*/
+ if (is_audit_feature_set(AUDIT_FEATURE_LOGINUID_IMMUTABLE))
+ return -EPERM;
+ /* it is set, you need permission */
+ if (!capable(CAP_AUDIT_CONTROL))
+ return -EPERM;
+ /* reject if this is not an unset and we don't allow that */
+ if (is_audit_feature_set(AUDIT_FEATURE_ONLY_UNSET_LOGINUID) && uid_valid(loginuid))
+ return -EPERM;
+ return 0;
+}
+
+static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid,
+ unsigned int oldsessionid, unsigned int sessionid,
+ int rc)
+{
+ struct audit_buffer *ab;
+ uid_t uid, ologinuid, nloginuid;
+
+ uid = from_kuid(&init_user_ns, task_uid(current));
+ ologinuid = from_kuid(&init_user_ns, koldloginuid);
+ nloginuid = from_kuid(&init_user_ns, kloginuid),
+
+ ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN);
+ if (!ab)
+ return;
+ audit_log_format(ab, "pid=%d uid=%u old auid=%u new auid=%u old "
+ "ses=%u new ses=%u res=%d", current->pid, uid, ologinuid,
+ nloginuid, oldsessionid, sessionid, !rc);
+ audit_log_end(ab);
+}
+
/**
* audit_set_loginuid - set current task's audit_context loginuid
* @loginuid: loginuid value
@@ -1975,37 +1995,26 @@ static atomic_t session_id = ATOMIC_INIT(0);
int audit_set_loginuid(kuid_t loginuid)
{
struct task_struct *task = current;
- struct audit_context *context = task->audit_context;
- unsigned int sessionid;
+ unsigned int oldsessionid, sessionid = (unsigned int)-1;
+ kuid_t oldloginuid;
+ int rc;
-#ifdef CONFIG_AUDIT_LOGINUID_IMMUTABLE
- if (audit_loginuid_set(task))
- return -EPERM;
-#else /* CONFIG_AUDIT_LOGINUID_IMMUTABLE */
- if (!capable(CAP_AUDIT_CONTROL))
- return -EPERM;
-#endif /* CONFIG_AUDIT_LOGINUID_IMMUTABLE */
+ oldloginuid = audit_get_loginuid(current);
+ oldsessionid = audit_get_sessionid(current);
- sessionid = atomic_inc_return(&session_id);
- if (context && context->in_syscall) {
- struct audit_buffer *ab;
+ rc = audit_set_loginuid_perm(loginuid);
+ if (rc)
+ goto out;
+
+ /* are we setting or clearing? */
+ if (uid_valid(loginuid))
+ sessionid = atomic_inc_return(&session_id);
- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN);
- if (ab) {
- audit_log_format(ab, "login pid=%d uid=%u "
- "old auid=%u new auid=%u"
- " old ses=%u new ses=%u",
- task->pid,
- from_kuid(&init_user_ns, task_uid(task)),
- from_kuid(&init_user_ns, task->loginuid),
- from_kuid(&init_user_ns, loginuid),
- task->sessionid, sessionid);
- audit_log_end(ab);
- }
- }
task->sessionid = sessionid;
task->loginuid = loginuid;
- return 0;
+out:
+ audit_log_set_loginuid(oldloginuid, loginuid, oldsessionid, sessionid, rc);
+ return rc;
}
/**
@@ -2126,22 +2135,12 @@ void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mo
context->ipc.has_perm = 1;
}
-int __audit_bprm(struct linux_binprm *bprm)
+void __audit_bprm(struct linux_binprm *bprm)
{
- struct audit_aux_data_execve *ax;
struct audit_context *context = current->audit_context;
- ax = kmalloc(sizeof(*ax), GFP_KERNEL);
- if (!ax)
- return -ENOMEM;
-
- ax->argc = bprm->argc;
- ax->envc = bprm->envc;
- ax->mm = bprm->mm;
- ax->d.type = AUDIT_EXECVE;
- ax->d.next = context->aux;
- context->aux = (void *)ax;
- return 0;
+ context->type = AUDIT_EXECVE;
+ context->execve.argc = bprm->argc;
}
diff --git a/kernel/bounds.c b/kernel/bounds.c
index e8ca97b5c386..5253204afdca 100644
--- a/kernel/bounds.c
+++ b/kernel/bounds.c
@@ -11,6 +11,7 @@
#include <linux/kbuild.h>
#include <linux/page_cgroup.h>
#include <linux/log2.h>
+#include <linux/spinlock_types.h>
void foo(void)
{
@@ -21,5 +22,6 @@ void foo(void)
#ifdef CONFIG_SMP
DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS));
#endif
+ DEFINE(BLOATED_SPINLOCKS, sizeof(spinlock_t) > sizeof(int));
/* End of constants */
}
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e0839bcd48c8..8b729c278b64 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -90,6 +90,14 @@ static DEFINE_MUTEX(cgroup_mutex);
static DEFINE_MUTEX(cgroup_root_mutex);
/*
+ * cgroup destruction makes heavy use of work items and there can be a lot
+ * of concurrent destructions. Use a separate workqueue so that cgroup
+ * destruction work items don't end up filling up max_active of system_wq
+ * which may lead to deadlock.
+ */
+static struct workqueue_struct *cgroup_destroy_wq;
+
+/*
* Generate an array of cgroup subsystem pointers. At boot time, this is
* populated with the built in subsystems, and modular subsystems are
* registered after that. The mutable section of this array is protected by
@@ -191,6 +199,7 @@ static void cgroup_destroy_css_killed(struct cgroup *cgrp);
static int cgroup_destroy_locked(struct cgroup *cgrp);
static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
bool is_add);
+static int cgroup_file_release(struct inode *inode, struct file *file);
/**
* cgroup_css - obtain a cgroup's css for the specified subsystem
@@ -871,7 +880,7 @@ static void cgroup_free_rcu(struct rcu_head *head)
struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head);
INIT_WORK(&cgrp->destroy_work, cgroup_free_fn);
- schedule_work(&cgrp->destroy_work);
+ queue_work(cgroup_destroy_wq, &cgrp->destroy_work);
}
static void cgroup_diput(struct dentry *dentry, struct inode *inode)
@@ -895,11 +904,6 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
iput(inode);
}
-static int cgroup_delete(const struct dentry *d)
-{
- return 1;
-}
-
static void remove_dir(struct dentry *d)
{
struct dentry *parent = dget(d->d_parent);
@@ -1486,7 +1490,7 @@ static int cgroup_get_rootdir(struct super_block *sb)
{
static const struct dentry_operations cgroup_dops = {
.d_iput = cgroup_diput,
- .d_delete = cgroup_delete,
+ .d_delete = always_delete_dentry,
};
struct inode *inode =
@@ -2426,7 +2430,7 @@ static const struct file_operations cgroup_seqfile_operations = {
.read = seq_read,
.write = cgroup_file_write,
.llseek = seq_lseek,
- .release = single_release,
+ .release = cgroup_file_release,
};
static int cgroup_file_open(struct inode *inode, struct file *file)
@@ -2487,6 +2491,8 @@ static int cgroup_file_release(struct inode *inode, struct file *file)
ret = cft->release(inode, file);
if (css->ss)
css_put(css);
+ if (file->f_op == &cgroup_seqfile_operations)
+ single_release(inode, file);
return ret;
}
@@ -4254,7 +4260,7 @@ static void css_free_rcu_fn(struct rcu_head *rcu_head)
* css_put(). dput() requires process context which we don't have.
*/
INIT_WORK(&css->destroy_work, css_free_work_fn);
- schedule_work(&css->destroy_work);
+ queue_work(cgroup_destroy_wq, &css->destroy_work);
}
static void css_release(struct percpu_ref *ref)
@@ -4544,7 +4550,7 @@ static void css_killed_ref_fn(struct percpu_ref *ref)
container_of(ref, struct cgroup_subsys_state, refcnt);
INIT_WORK(&css->destroy_work, css_killed_work_fn);
- schedule_work(&css->destroy_work);
+ queue_work(cgroup_destroy_wq, &css->destroy_work);
}
/**
@@ -5068,6 +5074,22 @@ out:
return err;
}
+static int __init cgroup_wq_init(void)
+{
+ /*
+ * There isn't much point in executing destruction path in
+ * parallel. Good chunk is serialized with cgroup_mutex anyway.
+ * Use 1 for @max_active.
+ *
+ * We would prefer to do this in cgroup_init() above, but that
+ * is called before init_workqueues(): so leave this until after.
+ */
+ cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
+ BUG_ON(!cgroup_destroy_wq);
+ return 0;
+}
+core_initcall(cgroup_wq_init);
+
/*
* proc_cgroup_show()
* - Print task's cgroup paths into seq_file, one line for each hierarchy
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 6bf981e13c43..4772034b4b17 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1033,8 +1033,10 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk,
need_loop = task_has_mempolicy(tsk) ||
!nodes_intersects(*newmems, tsk->mems_allowed);
- if (need_loop)
+ if (need_loop) {
+ local_irq_disable();
write_seqcount_begin(&tsk->mems_allowed_seq);
+ }
nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);
@@ -1042,8 +1044,10 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk,
mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2);
tsk->mems_allowed = *newmems;
- if (need_loop)
+ if (need_loop) {
write_seqcount_end(&tsk->mems_allowed_seq);
+ local_irq_enable();
+ }
task_unlock(tsk);
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index d724e7757cd1..72348dc192c1 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5680,11 +5680,6 @@ static void swevent_hlist_put(struct perf_event *event)
{
int cpu;
- if (event->cpu != -1) {
- swevent_hlist_put_cpu(event, event->cpu);
- return;
- }
-
for_each_possible_cpu(cpu)
swevent_hlist_put_cpu(event, cpu);
}
@@ -5718,9 +5713,6 @@ static int swevent_hlist_get(struct perf_event *event)
int err;
int cpu, failed_cpu;
- if (event->cpu != -1)
- return swevent_hlist_get_cpu(event, event->cpu);
-
get_online_cpus();
for_each_possible_cpu(cpu) {
err = swevent_hlist_get_cpu(event, cpu);
diff --git a/kernel/extable.c b/kernel/extable.c
index 832cb28105bb..763faf037ec1 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -61,7 +61,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
static inline int init_kernel_text(unsigned long addr)
{
if (addr >= (unsigned long)_sinittext &&
- addr <= (unsigned long)_einittext)
+ addr < (unsigned long)_einittext)
return 1;
return 0;
}
@@ -69,7 +69,7 @@ static inline int init_kernel_text(unsigned long addr)
int core_kernel_text(unsigned long addr)
{
if (addr >= (unsigned long)_stext &&
- addr <= (unsigned long)_etext)
+ addr < (unsigned long)_etext)
return 1;
if (system_state == SYSTEM_BOOTING &&
diff --git a/kernel/fork.c b/kernel/fork.c
index f6d11fc67f72..728d5be9548c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -532,7 +532,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
mm->flags = (current->mm) ?
(current->mm->flags & MMF_INIT_MASK) : default_dump_filter;
mm->core_state = NULL;
- mm->nr_ptes = 0;
+ atomic_long_set(&mm->nr_ptes, 0);
memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
spin_lock_init(&mm->page_table_lock);
mm_init_aio(mm);
@@ -560,7 +560,7 @@ static void check_mm(struct mm_struct *mm)
"mm:%p idx:%d val:%ld\n", mm, i, x);
}
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
VM_BUG_ON(mm->pmd_huge_pte);
#endif
}
@@ -814,7 +814,7 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
memcpy(mm, oldmm, sizeof(*mm));
mm_init_cpumask(mm);
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
mm->pmd_huge_pte = NULL;
#endif
if (!mm_init(mm, tsk))
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 8807061ca004..9328b80eaf14 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -207,6 +207,14 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
return ret;
}
+static atomic_t reset_hung_task = ATOMIC_INIT(0);
+
+void reset_hung_task_detector(void)
+{
+ atomic_set(&reset_hung_task, 1);
+}
+EXPORT_SYMBOL_GPL(reset_hung_task_detector);
+
/*
* kthread which checks for tasks stuck in D state
*/
@@ -220,6 +228,9 @@ static int watchdog(void *dummy)
while (schedule_timeout_interruptible(timeout_jiffies(timeout)))
timeout = sysctl_hung_task_timeout_secs;
+ if (atomic_xchg(&reset_hung_task, 0))
+ continue;
+
check_hung_uninterruptible_tasks(timeout);
}
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index a3bb14fbe5c6..dc04c166c54d 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -214,7 +214,7 @@ void irq_enable(struct irq_desc *desc)
}
/**
- * irq_disable - Mark interupt disabled
+ * irq_disable - Mark interrupt disabled
* @desc: irq descriptor which should be disabled
*
* If the chip does not implement the irq_disable callback, we
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 3e59f951d42f..481a13c43b17 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -786,7 +786,7 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action)
}
/*
- * Interrupts explicitely requested as threaded interupts want to be
+ * Interrupts explicitly requested as threaded interrupts want to be
* preemtible - many of them need to sleep and wait for slow busses to
* complete.
*/
diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h
index 1162f1030f18..3320b84cc60f 100644
--- a/kernel/irq/settings.h
+++ b/kernel/irq/settings.h
@@ -14,6 +14,7 @@ enum {
_IRQ_NO_BALANCING = IRQ_NO_BALANCING,
_IRQ_NESTED_THREAD = IRQ_NESTED_THREAD,
_IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID,
+ _IRQ_IS_POLLED = IRQ_IS_POLLED,
_IRQF_MODIFY_MASK = IRQF_MODIFY_MASK,
};
@@ -26,6 +27,7 @@ enum {
#define IRQ_NOAUTOEN GOT_YOU_MORON
#define IRQ_NESTED_THREAD GOT_YOU_MORON
#define IRQ_PER_CPU_DEVID GOT_YOU_MORON
+#define IRQ_IS_POLLED GOT_YOU_MORON
#undef IRQF_MODIFY_MASK
#define IRQF_MODIFY_MASK GOT_YOU_MORON
@@ -147,3 +149,8 @@ static inline bool irq_settings_is_nested_thread(struct irq_desc *desc)
{
return desc->status_use_accessors & _IRQ_NESTED_THREAD;
}
+
+static inline bool irq_settings_is_polled(struct irq_desc *desc)
+{
+ return desc->status_use_accessors & _IRQ_IS_POLLED;
+}
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 7b5f012bde9d..a1d8cc63b56e 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -67,8 +67,13 @@ static int try_one_irq(int irq, struct irq_desc *desc, bool force)
raw_spin_lock(&desc->lock);
- /* PER_CPU and nested thread interrupts are never polled */
- if (irq_settings_is_per_cpu(desc) || irq_settings_is_nested_thread(desc))
+ /*
+ * PER_CPU, nested thread interrupts and interrupts explicitely
+ * marked polled are excluded from polling.
+ */
+ if (irq_settings_is_per_cpu(desc) ||
+ irq_settings_is_nested_thread(desc) ||
+ irq_settings_is_polled(desc))
goto out;
/*
@@ -268,7 +273,8 @@ try_misrouted_irq(unsigned int irq, struct irq_desc *desc,
void note_interrupt(unsigned int irq, struct irq_desc *desc,
irqreturn_t action_ret)
{
- if (desc->istate & IRQS_POLL_INPROGRESS)
+ if (desc->istate & IRQS_POLL_INPROGRESS ||
+ irq_settings_is_polled(desc))
return;
/* we get here again via the threaded handler */
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 2a74f307c5ec..490afc03627e 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -921,7 +921,7 @@ static int kimage_load_segment(struct kimage *image,
* reinitialize them.
*
* - A machine specific part that includes the syscall number
- * and the copies the image to it's final destination. And
+ * and then copies the image to it's final destination. And
* jumps into the image at entry.
*
* kexec does not sync, or unmount filesystems so if you need
diff --git a/kernel/modsign_certificate.S b/kernel/modsign_certificate.S
deleted file mode 100644
index 4a9a86d12c8b..000000000000
--- a/kernel/modsign_certificate.S
+++ /dev/null
@@ -1,12 +0,0 @@
-#include <linux/export.h>
-
-#define GLOBAL(name) \
- .globl VMLINUX_SYMBOL(name); \
- VMLINUX_SYMBOL(name):
-
- .section ".init.data","aw"
-
-GLOBAL(modsign_certificate_list)
- .incbin "signing_key.x509"
- .incbin "extra_certificates"
-GLOBAL(modsign_certificate_list_end)
diff --git a/kernel/modsign_pubkey.c b/kernel/modsign_pubkey.c
deleted file mode 100644
index 7cbd4507a7e6..000000000000
--- a/kernel/modsign_pubkey.c
+++ /dev/null
@@ -1,104 +0,0 @@
-/* Public keys for module signature verification
- *
- * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/cred.h>
-#include <linux/err.h>
-#include <keys/asymmetric-type.h>
-#include "module-internal.h"
-
-struct key *modsign_keyring;
-
-extern __initconst const u8 modsign_certificate_list[];
-extern __initconst const u8 modsign_certificate_list_end[];
-
-/*
- * We need to make sure ccache doesn't cache the .o file as it doesn't notice
- * if modsign.pub changes.
- */
-static __initconst const char annoy_ccache[] = __TIME__ "foo";
-
-/*
- * Load the compiled-in keys
- */
-static __init int module_verify_init(void)
-{
- pr_notice("Initialise module verification\n");
-
- modsign_keyring = keyring_alloc(".module_sign",
- KUIDT_INIT(0), KGIDT_INIT(0),
- current_cred(),
- ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
- KEY_USR_VIEW | KEY_USR_READ),
- KEY_ALLOC_NOT_IN_QUOTA, NULL);
- if (IS_ERR(modsign_keyring))
- panic("Can't allocate module signing keyring\n");
-
- return 0;
-}
-
-/*
- * Must be initialised before we try and load the keys into the keyring.
- */
-device_initcall(module_verify_init);
-
-/*
- * Load the compiled-in keys
- */
-static __init int load_module_signing_keys(void)
-{
- key_ref_t key;
- const u8 *p, *end;
- size_t plen;
-
- pr_notice("Loading module verification certificates\n");
-
- end = modsign_certificate_list_end;
- p = modsign_certificate_list;
- while (p < end) {
- /* Each cert begins with an ASN.1 SEQUENCE tag and must be more
- * than 256 bytes in size.
- */
- if (end - p < 4)
- goto dodgy_cert;
- if (p[0] != 0x30 &&
- p[1] != 0x82)
- goto dodgy_cert;
- plen = (p[2] << 8) | p[3];
- plen += 4;
- if (plen > end - p)
- goto dodgy_cert;
-
- key = key_create_or_update(make_key_ref(modsign_keyring, 1),
- "asymmetric",
- NULL,
- p,
- plen,
- (KEY_POS_ALL & ~KEY_POS_SETATTR) |
- KEY_USR_VIEW,
- KEY_ALLOC_NOT_IN_QUOTA);
- if (IS_ERR(key))
- pr_err("MODSIGN: Problem loading in-kernel X.509 certificate (%ld)\n",
- PTR_ERR(key));
- else
- pr_notice("MODSIGN: Loaded cert '%s'\n",
- key_ref_to_ptr(key)->description);
- p += plen;
- }
-
- return 0;
-
-dodgy_cert:
- pr_err("MODSIGN: Problem parsing in-kernel X.509 certificate list\n");
- return 0;
-}
-late_initcall(load_module_signing_keys);
diff --git a/kernel/module-internal.h b/kernel/module-internal.h
index 24f9247b7d02..915e123a430f 100644
--- a/kernel/module-internal.h
+++ b/kernel/module-internal.h
@@ -9,6 +9,4 @@
* 2 of the Licence, or (at your option) any later version.
*/
-extern struct key *modsign_keyring;
-
extern int mod_verify_sig(const void *mod, unsigned long *_modlen);
diff --git a/kernel/module.c b/kernel/module.c
index af5ebd21d77b..f5a3b1e8ec51 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -641,8 +641,6 @@ static int module_unload_init(struct module *mod)
/* Hold reference count during initialization. */
__this_cpu_write(mod->refptr->incs, 1);
- /* Backwards compatibility macros put refcount during init. */
- mod->waiter = current;
return 0;
}
@@ -768,16 +766,9 @@ static int __try_stop_module(void *_sref)
static int try_stop_module(struct module *mod, int flags, int *forced)
{
- if (flags & O_NONBLOCK) {
- struct stopref sref = { mod, flags, forced };
+ struct stopref sref = { mod, flags, forced };
- return stop_machine(__try_stop_module, &sref, NULL);
- } else {
- /* We don't need to stop the machine for this. */
- mod->state = MODULE_STATE_GOING;
- synchronize_sched();
- return 0;
- }
+ return stop_machine(__try_stop_module, &sref, NULL);
}
unsigned long module_refcount(struct module *mod)
@@ -810,21 +801,6 @@ EXPORT_SYMBOL(module_refcount);
/* This exists whether we can unload or not */
static void free_module(struct module *mod);
-static void wait_for_zero_refcount(struct module *mod)
-{
- /* Since we might sleep for some time, release the mutex first */
- mutex_unlock(&module_mutex);
- for (;;) {
- pr_debug("Looking at refcount...\n");
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (module_refcount(mod) == 0)
- break;
- schedule();
- }
- current->state = TASK_RUNNING;
- mutex_lock(&module_mutex);
-}
-
SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
unsigned int, flags)
{
@@ -839,6 +815,11 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
return -EFAULT;
name[MODULE_NAME_LEN-1] = '\0';
+ if (!(flags & O_NONBLOCK)) {
+ printk(KERN_WARNING
+ "waiting module removal not supported: please upgrade");
+ }
+
if (mutex_lock_interruptible(&module_mutex) != 0)
return -EINTR;
@@ -856,8 +837,7 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
/* Doing init or already dying? */
if (mod->state != MODULE_STATE_LIVE) {
- /* FIXME: if (force), slam module count and wake up
- waiter --RR */
+ /* FIXME: if (force), slam module count damn the torpedoes */
pr_debug("%s already dying\n", mod->name);
ret = -EBUSY;
goto out;
@@ -873,18 +853,11 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
}
}
- /* Set this up before setting mod->state */
- mod->waiter = current;
-
/* Stop the machine so refcounts can't move and disable module. */
ret = try_stop_module(mod, flags, &forced);
if (ret != 0)
goto out;
- /* Never wait if forced. */
- if (!forced && module_refcount(mod) != 0)
- wait_for_zero_refcount(mod);
-
mutex_unlock(&module_mutex);
/* Final destruction now no one is using it. */
if (mod->exit != NULL)
@@ -1002,9 +975,6 @@ void module_put(struct module *module)
__this_cpu_inc(module->refptr->decs);
trace_module_put(module, _RET_IP_);
- /* Maybe they're waiting for us to drop reference? */
- if (unlikely(!module_is_live(module)))
- wake_up_process(module->waiter);
preempt_enable();
}
}
@@ -2728,7 +2698,7 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags)
return 0;
}
-static void find_module_sections(struct module *mod, struct load_info *info)
+static int find_module_sections(struct module *mod, struct load_info *info)
{
mod->kp = section_objs(info, "__param",
sizeof(*mod->kp), &mod->num_kp);
@@ -2758,6 +2728,18 @@ static void find_module_sections(struct module *mod, struct load_info *info)
#ifdef CONFIG_CONSTRUCTORS
mod->ctors = section_objs(info, ".ctors",
sizeof(*mod->ctors), &mod->num_ctors);
+ if (!mod->ctors)
+ mod->ctors = section_objs(info, ".init_array",
+ sizeof(*mod->ctors), &mod->num_ctors);
+ else if (find_sec(info, ".init_array")) {
+ /*
+ * This shouldn't happen with same compiler and binutils
+ * building all parts of the module.
+ */
+ printk(KERN_WARNING "%s: has both .ctors and .init_array.\n",
+ mod->name);
+ return -EINVAL;
+ }
#endif
#ifdef CONFIG_TRACEPOINTS
@@ -2795,6 +2777,8 @@ static void find_module_sections(struct module *mod, struct load_info *info)
info->debug = section_objs(info, "__verbose",
sizeof(*info->debug), &info->num_debug);
+
+ return 0;
}
static int move_module(struct module *mod, struct load_info *info)
@@ -3248,7 +3232,9 @@ static int load_module(struct load_info *info, const char __user *uargs,
/* Now we've got everything in the final locations, we can
* find optional sections. */
- find_module_sections(mod, info);
+ err = find_module_sections(mod, info);
+ if (err)
+ goto free_unload;
err = check_module_license_and_versions(mod);
if (err)
diff --git a/kernel/module_signing.c b/kernel/module_signing.c
index f2970bddc5ea..be5b8fac4bd0 100644
--- a/kernel/module_signing.c
+++ b/kernel/module_signing.c
@@ -14,6 +14,7 @@
#include <crypto/public_key.h>
#include <crypto/hash.h>
#include <keys/asymmetric-type.h>
+#include <keys/system_keyring.h>
#include "module-internal.h"
/*
@@ -28,7 +29,7 @@
*/
struct module_signature {
u8 algo; /* Public-key crypto algorithm [enum pkey_algo] */
- u8 hash; /* Digest algorithm [enum pkey_hash_algo] */
+ u8 hash; /* Digest algorithm [enum hash_algo] */
u8 id_type; /* Key identifier type [enum pkey_id_type] */
u8 signer_len; /* Length of signer's name */
u8 key_id_len; /* Length of key identifier */
@@ -39,7 +40,7 @@ struct module_signature {
/*
* Digest the module contents.
*/
-static struct public_key_signature *mod_make_digest(enum pkey_hash_algo hash,
+static struct public_key_signature *mod_make_digest(enum hash_algo hash,
const void *mod,
unsigned long modlen)
{
@@ -54,7 +55,7 @@ static struct public_key_signature *mod_make_digest(enum pkey_hash_algo hash,
/* Allocate the hashing algorithm we're going to need and find out how
* big the hash operational data will be.
*/
- tfm = crypto_alloc_shash(pkey_hash_algo[hash], 0, 0);
+ tfm = crypto_alloc_shash(hash_algo_name[hash], 0, 0);
if (IS_ERR(tfm))
return (PTR_ERR(tfm) == -ENOENT) ? ERR_PTR(-ENOPKG) : ERR_CAST(tfm);
@@ -157,7 +158,7 @@ static struct key *request_asymmetric_key(const char *signer, size_t signer_len,
pr_debug("Look up: \"%s\"\n", id);
- key = keyring_search(make_key_ref(modsign_keyring, 1),
+ key = keyring_search(make_key_ref(system_trusted_keyring, 1),
&key_type_asymmetric, id);
if (IS_ERR(key))
pr_warn("Request for unknown module key '%s' err %ld\n",
@@ -217,7 +218,7 @@ int mod_verify_sig(const void *mod, unsigned long *_modlen)
return -ENOPKG;
if (ms.hash >= PKEY_HASH__LAST ||
- !pkey_hash_algo[ms.hash])
+ !hash_algo_name[ms.hash])
return -ENOPKG;
key = request_asymmetric_key(sig, ms.signer_len,
diff --git a/kernel/padata.c b/kernel/padata.c
index 07af2c95dcfe..2abd25d79cc8 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -46,6 +46,7 @@ static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
static int padata_cpu_hash(struct parallel_data *pd)
{
+ unsigned int seq_nr;
int cpu_index;
/*
@@ -53,10 +54,8 @@ static int padata_cpu_hash(struct parallel_data *pd)
* seq_nr mod. number of cpus in use.
*/
- spin_lock(&pd->seq_lock);
- cpu_index = pd->seq_nr % cpumask_weight(pd->cpumask.pcpu);
- pd->seq_nr++;
- spin_unlock(&pd->seq_lock);
+ seq_nr = atomic_inc_return(&pd->seq_nr);
+ cpu_index = seq_nr % cpumask_weight(pd->cpumask.pcpu);
return padata_index_to_cpu(pd, cpu_index);
}
@@ -429,7 +428,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
padata_init_pqueues(pd);
padata_init_squeues(pd);
setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd);
- pd->seq_nr = 0;
+ atomic_set(&pd->seq_nr, -1);
atomic_set(&pd->reorder_objects, 0);
atomic_set(&pd->refcnt, 0);
pd->pinst = pinst;
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 10c22cae83a0..b38109e204af 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -792,7 +792,8 @@ void free_basic_memory_bitmaps(void)
{
struct memory_bitmap *bm1, *bm2;
- BUG_ON(!(forbidden_pages_map && free_pages_map));
+ if (WARN_ON(!(forbidden_pages_map && free_pages_map)))
+ return;
bm1 = forbidden_pages_map;
bm2 = free_pages_map;
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 24850270c802..98d357584cd6 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -70,6 +70,7 @@ static int snapshot_open(struct inode *inode, struct file *filp)
data->swap = swsusp_resume_device ?
swap_type_of(swsusp_resume_device, 0, NULL) : -1;
data->mode = O_RDONLY;
+ data->free_bitmaps = false;
error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
if (error)
pm_notifier_call_chain(PM_POST_HIBERNATION);
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index 0c9a934cfec1..1254f312d024 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -181,7 +181,7 @@ EXPORT_SYMBOL_GPL(rcu_irq_enter);
/*
* Test whether RCU thinks that the current CPU is idle.
*/
-bool __rcu_is_watching(void)
+bool notrace __rcu_is_watching(void)
{
return rcu_dynticks_nesting;
}
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 4c06ddfea7cd..dd081987a8ec 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -664,7 +664,7 @@ void rcu_nmi_exit(void)
* rcu_is_watching(), the caller of __rcu_is_watching() must have at
* least disabled preemption.
*/
-bool __rcu_is_watching(void)
+bool notrace __rcu_is_watching(void)
{
return atomic_read(this_cpu_ptr(&rcu_dynticks.dynticks)) & 0x1;
}
@@ -675,7 +675,7 @@ bool __rcu_is_watching(void)
* If the current CPU is in its idle loop and is neither in an interrupt
* or NMI handler, return true.
*/
-bool rcu_is_watching(void)
+bool notrace rcu_is_watching(void)
{
int ret;
diff --git a/kernel/smp.c b/kernel/smp.c
index 46116100f0ee..bd9f94028838 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -15,7 +15,6 @@
#include "smpboot.h"
-#ifdef CONFIG_USE_GENERIC_SMP_HELPERS
enum {
CSD_FLAG_LOCK = 0x01,
CSD_FLAG_WAIT = 0x02,
@@ -140,8 +139,7 @@ static void csd_unlock(struct call_single_data *csd)
* for execution on the given CPU. data must already have
* ->func, ->info, and ->flags set.
*/
-static
-void generic_exec_single(int cpu, struct call_single_data *csd, int wait)
+static void generic_exec_single(int cpu, struct call_single_data *csd, int wait)
{
struct call_single_queue *dst = &per_cpu(call_single_queue, cpu);
unsigned long flags;
@@ -464,7 +462,6 @@ int smp_call_function(smp_call_func_t func, void *info, int wait)
return 0;
}
EXPORT_SYMBOL(smp_call_function);
-#endif /* USE_GENERIC_SMP_HELPERS */
/* Setup configured maximum number of CPUs to activate */
unsigned int setup_max_cpus = NR_CPUS;
diff --git a/kernel/softirq.c b/kernel/softirq.c
index b24988353458..11025ccc06dd 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -6,8 +6,6 @@
* Distribute under GPLv2.
*
* Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
- *
- * Remote softirq infrastructure is by Jens Axboe.
*/
#include <linux/export.h>
@@ -627,146 +625,17 @@ void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
}
EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);
-/*
- * Remote softirq bits
- */
-
-DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
-EXPORT_PER_CPU_SYMBOL(softirq_work_list);
-
-static void __local_trigger(struct call_single_data *cp, int softirq)
-{
- struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]);
-
- list_add_tail(&cp->list, head);
-
- /* Trigger the softirq only if the list was previously empty. */
- if (head->next == &cp->list)
- raise_softirq_irqoff(softirq);
-}
-
-#ifdef CONFIG_USE_GENERIC_SMP_HELPERS
-static void remote_softirq_receive(void *data)
-{
- struct call_single_data *cp = data;
- unsigned long flags;
- int softirq;
-
- softirq = *(int *)cp->info;
- local_irq_save(flags);
- __local_trigger(cp, softirq);
- local_irq_restore(flags);
-}
-
-static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
-{
- if (cpu_online(cpu)) {
- cp->func = remote_softirq_receive;
- cp->info = &softirq;
- cp->flags = 0;
-
- __smp_call_function_single(cpu, cp, 0);
- return 0;
- }
- return 1;
-}
-#else /* CONFIG_USE_GENERIC_SMP_HELPERS */
-static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
-{
- return 1;
-}
-#endif
-
-/**
- * __send_remote_softirq - try to schedule softirq work on a remote cpu
- * @cp: private SMP call function data area
- * @cpu: the remote cpu
- * @this_cpu: the currently executing cpu
- * @softirq: the softirq for the work
- *
- * Attempt to schedule softirq work on a remote cpu. If this cannot be
- * done, the work is instead queued up on the local cpu.
- *
- * Interrupts must be disabled.
- */
-void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq)
-{
- if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq))
- __local_trigger(cp, softirq);
-}
-EXPORT_SYMBOL(__send_remote_softirq);
-
-/**
- * send_remote_softirq - try to schedule softirq work on a remote cpu
- * @cp: private SMP call function data area
- * @cpu: the remote cpu
- * @softirq: the softirq for the work
- *
- * Like __send_remote_softirq except that disabling interrupts and
- * computing the current cpu is done for the caller.
- */
-void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
-{
- unsigned long flags;
- int this_cpu;
-
- local_irq_save(flags);
- this_cpu = smp_processor_id();
- __send_remote_softirq(cp, cpu, this_cpu, softirq);
- local_irq_restore(flags);
-}
-EXPORT_SYMBOL(send_remote_softirq);
-
-static int remote_softirq_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
-{
- /*
- * If a CPU goes away, splice its entries to the current CPU
- * and trigger a run of the softirq
- */
- if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
- int cpu = (unsigned long) hcpu;
- int i;
-
- local_irq_disable();
- for (i = 0; i < NR_SOFTIRQS; i++) {
- struct list_head *head = &per_cpu(softirq_work_list[i], cpu);
- struct list_head *local_head;
-
- if (list_empty(head))
- continue;
-
- local_head = &__get_cpu_var(softirq_work_list[i]);
- list_splice_init(head, local_head);
- raise_softirq_irqoff(i);
- }
- local_irq_enable();
- }
-
- return NOTIFY_OK;
-}
-
-static struct notifier_block remote_softirq_cpu_notifier = {
- .notifier_call = remote_softirq_cpu_notify,
-};
-
void __init softirq_init(void)
{
int cpu;
for_each_possible_cpu(cpu) {
- int i;
-
per_cpu(tasklet_vec, cpu).tail =
&per_cpu(tasklet_vec, cpu).head;
per_cpu(tasklet_hi_vec, cpu).tail =
&per_cpu(tasklet_hi_vec, cpu).head;
- for (i = 0; i < NR_SOFTIRQS; i++)
- INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu));
}
- register_hotcpu_notifier(&remote_softirq_cpu_notifier);
-
open_softirq(TASKLET_SOFTIRQ, tasklet_action);
open_softirq(HI_SOFTIRQ, tasklet_hi_action);
}
diff --git a/kernel/system_certificates.S b/kernel/system_certificates.S
new file mode 100644
index 000000000000..4aef390671cb
--- /dev/null
+++ b/kernel/system_certificates.S
@@ -0,0 +1,10 @@
+#include <linux/export.h>
+#include <linux/init.h>
+
+ __INITRODATA
+
+ .globl VMLINUX_SYMBOL(system_certificate_list)
+VMLINUX_SYMBOL(system_certificate_list):
+ .incbin "kernel/x509_certificate_list"
+ .globl VMLINUX_SYMBOL(system_certificate_list_end)
+VMLINUX_SYMBOL(system_certificate_list_end):
diff --git a/kernel/system_keyring.c b/kernel/system_keyring.c
new file mode 100644
index 000000000000..564dd93430a2
--- /dev/null
+++ b/kernel/system_keyring.c
@@ -0,0 +1,105 @@
+/* System trusted keyring for trusted public keys
+ *
+ * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/cred.h>
+#include <linux/err.h>
+#include <keys/asymmetric-type.h>
+#include <keys/system_keyring.h>
+#include "module-internal.h"
+
+struct key *system_trusted_keyring;
+EXPORT_SYMBOL_GPL(system_trusted_keyring);
+
+extern __initconst const u8 system_certificate_list[];
+extern __initconst const u8 system_certificate_list_end[];
+
+/*
+ * Load the compiled-in keys
+ */
+static __init int system_trusted_keyring_init(void)
+{
+ pr_notice("Initialise system trusted keyring\n");
+
+ system_trusted_keyring =
+ keyring_alloc(".system_keyring",
+ KUIDT_INIT(0), KGIDT_INIT(0), current_cred(),
+ ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH),
+ KEY_ALLOC_NOT_IN_QUOTA, NULL);
+ if (IS_ERR(system_trusted_keyring))
+ panic("Can't allocate system trusted keyring\n");
+
+ set_bit(KEY_FLAG_TRUSTED_ONLY, &system_trusted_keyring->flags);
+ return 0;
+}
+
+/*
+ * Must be initialised before we try and load the keys into the keyring.
+ */
+device_initcall(system_trusted_keyring_init);
+
+/*
+ * Load the compiled-in list of X.509 certificates.
+ */
+static __init int load_system_certificate_list(void)
+{
+ key_ref_t key;
+ const u8 *p, *end;
+ size_t plen;
+
+ pr_notice("Loading compiled-in X.509 certificates\n");
+
+ end = system_certificate_list_end;
+ p = system_certificate_list;
+ while (p < end) {
+ /* Each cert begins with an ASN.1 SEQUENCE tag and must be more
+ * than 256 bytes in size.
+ */
+ if (end - p < 4)
+ goto dodgy_cert;
+ if (p[0] != 0x30 &&
+ p[1] != 0x82)
+ goto dodgy_cert;
+ plen = (p[2] << 8) | p[3];
+ plen += 4;
+ if (plen > end - p)
+ goto dodgy_cert;
+
+ key = key_create_or_update(make_key_ref(system_trusted_keyring, 1),
+ "asymmetric",
+ NULL,
+ p,
+ plen,
+ ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ),
+ KEY_ALLOC_NOT_IN_QUOTA |
+ KEY_ALLOC_TRUSTED);
+ if (IS_ERR(key)) {
+ pr_err("Problem loading in-kernel X.509 certificate (%ld)\n",
+ PTR_ERR(key));
+ } else {
+ pr_notice("Loaded X.509 cert '%s'\n",
+ key_ref_to_ptr(key)->description);
+ key_ref_put(key);
+ }
+ p += plen;
+ }
+
+ return 0;
+
+dodgy_cert:
+ pr_err("Problem parsing in-kernel X.509 certificate list\n");
+ return 0;
+}
+late_initcall(load_system_certificate_list);
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 9f4618eb51c8..13d2f7cd65db 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -673,17 +673,18 @@ err:
nlmsg_free(rep_skb);
}
-static struct genl_ops taskstats_ops = {
- .cmd = TASKSTATS_CMD_GET,
- .doit = taskstats_user_cmd,
- .policy = taskstats_cmd_get_policy,
- .flags = GENL_ADMIN_PERM,
-};
-
-static struct genl_ops cgroupstats_ops = {
- .cmd = CGROUPSTATS_CMD_GET,
- .doit = cgroupstats_user_cmd,
- .policy = cgroupstats_cmd_get_policy,
+static const struct genl_ops taskstats_ops[] = {
+ {
+ .cmd = TASKSTATS_CMD_GET,
+ .doit = taskstats_user_cmd,
+ .policy = taskstats_cmd_get_policy,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = CGROUPSTATS_CMD_GET,
+ .doit = cgroupstats_user_cmd,
+ .policy = cgroupstats_cmd_get_policy,
+ },
};
/* Needed early in initialization */
@@ -702,26 +703,13 @@ static int __init taskstats_init(void)
{
int rc;
- rc = genl_register_family(&family);
+ rc = genl_register_family_with_ops(&family, taskstats_ops);
if (rc)
return rc;
- rc = genl_register_ops(&family, &taskstats_ops);
- if (rc < 0)
- goto err;
-
- rc = genl_register_ops(&family, &cgroupstats_ops);
- if (rc < 0)
- goto err_cgroup_ops;
-
family_registered = 1;
pr_info("registered taskstats version %d\n", TASKSTATS_GENL_VERSION);
return 0;
-err_cgroup_ops:
- genl_unregister_ops(&family, &taskstats_ops);
-err:
- genl_unregister_family(&family);
- return rc;
}
/*
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 03cf44ac54d3..0e9f9eaade2f 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -367,9 +367,6 @@ static int remove_ftrace_list_ops(struct ftrace_ops **list,
static int __register_ftrace_function(struct ftrace_ops *ops)
{
- if (unlikely(ftrace_disabled))
- return -ENODEV;
-
if (FTRACE_WARN_ON(ops == &global_ops))
return -EINVAL;
@@ -428,9 +425,6 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
{
int ret;
- if (ftrace_disabled)
- return -ENODEV;
-
if (WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED)))
return -EBUSY;
@@ -2088,10 +2082,15 @@ static void ftrace_startup_enable(int command)
static int ftrace_startup(struct ftrace_ops *ops, int command)
{
bool hash_enable = true;
+ int ret;
if (unlikely(ftrace_disabled))
return -ENODEV;
+ ret = __register_ftrace_function(ops);
+ if (ret)
+ return ret;
+
ftrace_start_up++;
command |= FTRACE_UPDATE_CALLS;
@@ -2113,12 +2112,17 @@ static int ftrace_startup(struct ftrace_ops *ops, int command)
return 0;
}
-static void ftrace_shutdown(struct ftrace_ops *ops, int command)
+static int ftrace_shutdown(struct ftrace_ops *ops, int command)
{
bool hash_disable = true;
+ int ret;
if (unlikely(ftrace_disabled))
- return;
+ return -ENODEV;
+
+ ret = __unregister_ftrace_function(ops);
+ if (ret)
+ return ret;
ftrace_start_up--;
/*
@@ -2153,9 +2157,10 @@ static void ftrace_shutdown(struct ftrace_ops *ops, int command)
}
if (!command || !ftrace_enabled)
- return;
+ return 0;
ftrace_run_update_code(command);
+ return 0;
}
static void ftrace_startup_sysctl(void)
@@ -3060,16 +3065,13 @@ static void __enable_ftrace_function_probe(void)
if (i == FTRACE_FUNC_HASHSIZE)
return;
- ret = __register_ftrace_function(&trace_probe_ops);
- if (!ret)
- ret = ftrace_startup(&trace_probe_ops, 0);
+ ret = ftrace_startup(&trace_probe_ops, 0);
ftrace_probe_registered = 1;
}
static void __disable_ftrace_function_probe(void)
{
- int ret;
int i;
if (!ftrace_probe_registered)
@@ -3082,9 +3084,7 @@ static void __disable_ftrace_function_probe(void)
}
/* no more funcs left */
- ret = __unregister_ftrace_function(&trace_probe_ops);
- if (!ret)
- ftrace_shutdown(&trace_probe_ops, 0);
+ ftrace_shutdown(&trace_probe_ops, 0);
ftrace_probe_registered = 0;
}
@@ -3307,7 +3307,11 @@ void unregister_ftrace_function_probe_all(char *glob)
static LIST_HEAD(ftrace_commands);
static DEFINE_MUTEX(ftrace_cmd_mutex);
-int register_ftrace_command(struct ftrace_func_command *cmd)
+/*
+ * Currently we only register ftrace commands from __init, so mark this
+ * __init too.
+ */
+__init int register_ftrace_command(struct ftrace_func_command *cmd)
{
struct ftrace_func_command *p;
int ret = 0;
@@ -3326,7 +3330,11 @@ int register_ftrace_command(struct ftrace_func_command *cmd)
return ret;
}
-int unregister_ftrace_command(struct ftrace_func_command *cmd)
+/*
+ * Currently we only unregister ftrace commands from __init, so mark
+ * this __init too.
+ */
+__init int unregister_ftrace_command(struct ftrace_func_command *cmd)
{
struct ftrace_func_command *p, *n;
int ret = -ENODEV;
@@ -3641,7 +3649,7 @@ __setup("ftrace_filter=", set_ftrace_filter);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata;
-static int ftrace_set_func(unsigned long *array, int *idx, char *buffer);
+static int ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer);
static int __init set_graph_function(char *str)
{
@@ -3659,7 +3667,7 @@ static void __init set_ftrace_early_graph(char *buf)
func = strsep(&buf, ",");
/* we allow only one expression at a time */
ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count,
- func);
+ FTRACE_GRAPH_MAX_FUNCS, func);
if (ret)
printk(KERN_DEBUG "ftrace: function %s not "
"traceable\n", func);
@@ -3776,15 +3784,25 @@ static const struct file_operations ftrace_notrace_fops = {
static DEFINE_MUTEX(graph_lock);
int ftrace_graph_count;
-int ftrace_graph_filter_enabled;
+int ftrace_graph_notrace_count;
unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
+unsigned long ftrace_graph_notrace_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
+
+struct ftrace_graph_data {
+ unsigned long *table;
+ size_t size;
+ int *count;
+ const struct seq_operations *seq_ops;
+};
static void *
__g_next(struct seq_file *m, loff_t *pos)
{
- if (*pos >= ftrace_graph_count)
+ struct ftrace_graph_data *fgd = m->private;
+
+ if (*pos >= *fgd->count)
return NULL;
- return &ftrace_graph_funcs[*pos];
+ return &fgd->table[*pos];
}
static void *
@@ -3796,10 +3814,12 @@ g_next(struct seq_file *m, void *v, loff_t *pos)
static void *g_start(struct seq_file *m, loff_t *pos)
{
+ struct ftrace_graph_data *fgd = m->private;
+
mutex_lock(&graph_lock);
/* Nothing, tell g_show to print all functions are enabled */
- if (!ftrace_graph_filter_enabled && !*pos)
+ if (!*fgd->count && !*pos)
return (void *)1;
return __g_next(m, pos);
@@ -3835,38 +3855,88 @@ static const struct seq_operations ftrace_graph_seq_ops = {
};
static int
-ftrace_graph_open(struct inode *inode, struct file *file)
+__ftrace_graph_open(struct inode *inode, struct file *file,
+ struct ftrace_graph_data *fgd)
{
int ret = 0;
- if (unlikely(ftrace_disabled))
- return -ENODEV;
-
mutex_lock(&graph_lock);
if ((file->f_mode & FMODE_WRITE) &&
(file->f_flags & O_TRUNC)) {
- ftrace_graph_filter_enabled = 0;
- ftrace_graph_count = 0;
- memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs));
+ *fgd->count = 0;
+ memset(fgd->table, 0, fgd->size * sizeof(*fgd->table));
}
mutex_unlock(&graph_lock);
- if (file->f_mode & FMODE_READ)
- ret = seq_open(file, &ftrace_graph_seq_ops);
+ if (file->f_mode & FMODE_READ) {
+ ret = seq_open(file, fgd->seq_ops);
+ if (!ret) {
+ struct seq_file *m = file->private_data;
+ m->private = fgd;
+ }
+ } else
+ file->private_data = fgd;
return ret;
}
static int
+ftrace_graph_open(struct inode *inode, struct file *file)
+{
+ struct ftrace_graph_data *fgd;
+
+ if (unlikely(ftrace_disabled))
+ return -ENODEV;
+
+ fgd = kmalloc(sizeof(*fgd), GFP_KERNEL);
+ if (fgd == NULL)
+ return -ENOMEM;
+
+ fgd->table = ftrace_graph_funcs;
+ fgd->size = FTRACE_GRAPH_MAX_FUNCS;
+ fgd->count = &ftrace_graph_count;
+ fgd->seq_ops = &ftrace_graph_seq_ops;
+
+ return __ftrace_graph_open(inode, file, fgd);
+}
+
+static int
+ftrace_graph_notrace_open(struct inode *inode, struct file *file)
+{
+ struct ftrace_graph_data *fgd;
+
+ if (unlikely(ftrace_disabled))
+ return -ENODEV;
+
+ fgd = kmalloc(sizeof(*fgd), GFP_KERNEL);
+ if (fgd == NULL)
+ return -ENOMEM;
+
+ fgd->table = ftrace_graph_notrace_funcs;
+ fgd->size = FTRACE_GRAPH_MAX_FUNCS;
+ fgd->count = &ftrace_graph_notrace_count;
+ fgd->seq_ops = &ftrace_graph_seq_ops;
+
+ return __ftrace_graph_open(inode, file, fgd);
+}
+
+static int
ftrace_graph_release(struct inode *inode, struct file *file)
{
- if (file->f_mode & FMODE_READ)
+ if (file->f_mode & FMODE_READ) {
+ struct seq_file *m = file->private_data;
+
+ kfree(m->private);
seq_release(inode, file);
+ } else {
+ kfree(file->private_data);
+ }
+
return 0;
}
static int
-ftrace_set_func(unsigned long *array, int *idx, char *buffer)
+ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer)
{
struct dyn_ftrace *rec;
struct ftrace_page *pg;
@@ -3879,7 +3949,7 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
/* decode regex */
type = filter_parse_regex(buffer, strlen(buffer), &search, &not);
- if (!not && *idx >= FTRACE_GRAPH_MAX_FUNCS)
+ if (!not && *idx >= size)
return -EBUSY;
search_len = strlen(search);
@@ -3907,7 +3977,7 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
fail = 0;
if (!exists) {
array[(*idx)++] = rec->ip;
- if (*idx >= FTRACE_GRAPH_MAX_FUNCS)
+ if (*idx >= size)
goto out;
}
} else {
@@ -3925,8 +3995,6 @@ out:
if (fail)
return -EINVAL;
- ftrace_graph_filter_enabled = !!(*idx);
-
return 0;
}
@@ -3935,36 +4003,33 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
struct trace_parser parser;
- ssize_t read, ret;
+ ssize_t read, ret = 0;
+ struct ftrace_graph_data *fgd = file->private_data;
if (!cnt)
return 0;
- mutex_lock(&graph_lock);
-
- if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) {
- ret = -ENOMEM;
- goto out_unlock;
- }
+ if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX))
+ return -ENOMEM;
read = trace_get_user(&parser, ubuf, cnt, ppos);
if (read >= 0 && trace_parser_loaded((&parser))) {
parser.buffer[parser.idx] = 0;
+ mutex_lock(&graph_lock);
+
/* we allow only one expression at a time */
- ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count,
- parser.buffer);
- if (ret)
- goto out_free;
+ ret = ftrace_set_func(fgd->table, fgd->count, fgd->size,
+ parser.buffer);
+
+ mutex_unlock(&graph_lock);
}
- ret = read;
+ if (!ret)
+ ret = read;
-out_free:
trace_parser_put(&parser);
-out_unlock:
- mutex_unlock(&graph_lock);
return ret;
}
@@ -3976,6 +4041,14 @@ static const struct file_operations ftrace_graph_fops = {
.llseek = ftrace_filter_lseek,
.release = ftrace_graph_release,
};
+
+static const struct file_operations ftrace_graph_notrace_fops = {
+ .open = ftrace_graph_notrace_open,
+ .read = seq_read,
+ .write = ftrace_graph_write,
+ .llseek = ftrace_filter_lseek,
+ .release = ftrace_graph_release,
+};
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
@@ -3997,6 +4070,9 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
trace_create_file("set_graph_function", 0444, d_tracer,
NULL,
&ftrace_graph_fops);
+ trace_create_file("set_graph_notrace", 0444, d_tracer,
+ NULL,
+ &ftrace_graph_notrace_fops);
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
return 0;
@@ -4290,12 +4366,15 @@ core_initcall(ftrace_nodyn_init);
static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; }
static inline void ftrace_startup_enable(int command) { }
/* Keep as macros so we do not need to define the commands */
-# define ftrace_startup(ops, command) \
- ({ \
- (ops)->flags |= FTRACE_OPS_FL_ENABLED; \
- 0; \
+# define ftrace_startup(ops, command) \
+ ({ \
+ int ___ret = __register_ftrace_function(ops); \
+ if (!___ret) \
+ (ops)->flags |= FTRACE_OPS_FL_ENABLED; \
+ ___ret; \
})
-# define ftrace_shutdown(ops, command) do { } while (0)
+# define ftrace_shutdown(ops, command) __unregister_ftrace_function(ops)
+
# define ftrace_startup_sysctl() do { } while (0)
# define ftrace_shutdown_sysctl() do { } while (0)
@@ -4320,12 +4399,21 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
*/
preempt_disable_notrace();
trace_recursion_set(TRACE_CONTROL_BIT);
+
+ /*
+ * Control funcs (perf) uses RCU. Only trace if
+ * RCU is currently active.
+ */
+ if (!rcu_is_watching())
+ goto out;
+
do_for_each_ftrace_op(op, ftrace_control_list) {
if (!(op->flags & FTRACE_OPS_FL_STUB) &&
!ftrace_function_local_disabled(op) &&
ftrace_ops_test(op, ip, regs))
op->func(ip, parent_ip, op, regs);
} while_for_each_ftrace_op(op);
+ out:
trace_recursion_clear(TRACE_CONTROL_BIT);
preempt_enable_notrace();
}
@@ -4695,9 +4783,7 @@ int register_ftrace_function(struct ftrace_ops *ops)
mutex_lock(&ftrace_lock);
- ret = __register_ftrace_function(ops);
- if (!ret)
- ret = ftrace_startup(ops, 0);
+ ret = ftrace_startup(ops, 0);
mutex_unlock(&ftrace_lock);
@@ -4716,9 +4802,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
int ret;
mutex_lock(&ftrace_lock);
- ret = __unregister_ftrace_function(ops);
- if (!ret)
- ftrace_shutdown(ops, 0);
+ ret = ftrace_shutdown(ops, 0);
mutex_unlock(&ftrace_lock);
return ret;
@@ -4912,6 +4996,13 @@ ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state,
return NOTIFY_DONE;
}
+/* Just a place holder for function graph */
+static struct ftrace_ops fgraph_ops __read_mostly = {
+ .func = ftrace_stub,
+ .flags = FTRACE_OPS_FL_STUB | FTRACE_OPS_FL_GLOBAL |
+ FTRACE_OPS_FL_RECURSION_SAFE,
+};
+
int register_ftrace_graph(trace_func_graph_ret_t retfunc,
trace_func_graph_ent_t entryfunc)
{
@@ -4938,7 +5029,7 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
ftrace_graph_return = retfunc;
ftrace_graph_entry = entryfunc;
- ret = ftrace_startup(&global_ops, FTRACE_START_FUNC_RET);
+ ret = ftrace_startup(&fgraph_ops, FTRACE_START_FUNC_RET);
out:
mutex_unlock(&ftrace_lock);
@@ -4955,7 +5046,7 @@ void unregister_ftrace_graph(void)
ftrace_graph_active--;
ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
ftrace_graph_entry = ftrace_graph_entry_stub;
- ftrace_shutdown(&global_ops, FTRACE_STOP_FUNC_RET);
+ ftrace_shutdown(&fgraph_ops, FTRACE_STOP_FUNC_RET);
unregister_pm_notifier(&ftrace_suspend_notifier);
unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d9fea7dfd5d3..9d20cd9743ef 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -235,13 +235,33 @@ void trace_array_put(struct trace_array *this_tr)
mutex_unlock(&trace_types_lock);
}
-int filter_current_check_discard(struct ring_buffer *buffer,
- struct ftrace_event_call *call, void *rec,
- struct ring_buffer_event *event)
+int filter_check_discard(struct ftrace_event_file *file, void *rec,
+ struct ring_buffer *buffer,
+ struct ring_buffer_event *event)
{
- return filter_check_discard(call, rec, buffer, event);
+ if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) &&
+ !filter_match_preds(file->filter, rec)) {
+ ring_buffer_discard_commit(buffer, event);
+ return 1;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(filter_check_discard);
+
+int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
+ struct ring_buffer *buffer,
+ struct ring_buffer_event *event)
+{
+ if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
+ !filter_match_preds(call->filter, rec)) {
+ ring_buffer_discard_commit(buffer, event);
+ return 1;
+ }
+
+ return 0;
}
-EXPORT_SYMBOL_GPL(filter_current_check_discard);
+EXPORT_SYMBOL_GPL(call_filter_check_discard);
cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
{
@@ -843,9 +863,12 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
if (isspace(ch)) {
parser->buffer[parser->idx] = 0;
parser->cont = false;
- } else {
+ } else if (parser->idx < parser->size - 1) {
parser->cont = true;
parser->buffer[parser->idx++] = ch;
+ } else {
+ ret = -EINVAL;
+ goto out;
}
*ppos += read;
@@ -1261,21 +1284,6 @@ int is_tracing_stopped(void)
}
/**
- * ftrace_off_permanent - disable all ftrace code permanently
- *
- * This should only be called when a serious anomally has
- * been detected. This will turn off the function tracing,
- * ring buffers, and other tracing utilites. It takes no
- * locks and can be called from any context.
- */
-void ftrace_off_permanent(void)
-{
- tracing_disabled = 1;
- ftrace_stop();
- tracing_off_permanent();
-}
-
-/**
* tracing_start - quick start of the tracer
*
* If tracing is enabled but was stopped by tracing_stop,
@@ -1631,7 +1639,7 @@ trace_function(struct trace_array *tr,
entry->ip = ip;
entry->parent_ip = parent_ip;
- if (!filter_check_discard(call, entry, buffer, event))
+ if (!call_filter_check_discard(call, entry, buffer, event))
__buffer_unlock_commit(buffer, event);
}
@@ -1715,7 +1723,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
entry->size = trace.nr_entries;
- if (!filter_check_discard(call, entry, buffer, event))
+ if (!call_filter_check_discard(call, entry, buffer, event))
__buffer_unlock_commit(buffer, event);
out:
@@ -1817,7 +1825,7 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
trace.entries = entry->caller;
save_stack_trace_user(&trace);
- if (!filter_check_discard(call, entry, buffer, event))
+ if (!call_filter_check_discard(call, entry, buffer, event))
__buffer_unlock_commit(buffer, event);
out_drop_count:
@@ -2009,7 +2017,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
entry->fmt = fmt;
memcpy(entry->buf, tbuffer, sizeof(u32) * len);
- if (!filter_check_discard(call, entry, buffer, event)) {
+ if (!call_filter_check_discard(call, entry, buffer, event)) {
__buffer_unlock_commit(buffer, event);
ftrace_trace_stack(buffer, flags, 6, pc);
}
@@ -2064,7 +2072,7 @@ __trace_array_vprintk(struct ring_buffer *buffer,
memcpy(&entry->buf, tbuffer, len);
entry->buf[len] = '\0';
- if (!filter_check_discard(call, entry, buffer, event)) {
+ if (!call_filter_check_discard(call, entry, buffer, event)) {
__buffer_unlock_commit(buffer, event);
ftrace_trace_stack(buffer, flags, 6, pc);
}
@@ -2761,7 +2769,7 @@ static void show_snapshot_main_help(struct seq_file *m)
seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
seq_printf(m, "# Takes a snapshot of the main buffer.\n");
- seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate)\n");
+ seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n");
seq_printf(m, "# (Doesn't have to be '2' works with any number that\n");
seq_printf(m, "# is not a '0' or '1')\n");
}
@@ -2965,6 +2973,11 @@ int tracing_open_generic(struct inode *inode, struct file *filp)
return 0;
}
+bool tracing_is_disabled(void)
+{
+ return (tracing_disabled) ? true: false;
+}
+
/*
* Open and update trace_array ref count.
* Must have the current trace_array passed to it.
@@ -5455,12 +5468,12 @@ static struct ftrace_func_command ftrace_snapshot_cmd = {
.func = ftrace_trace_snapshot_callback,
};
-static int register_snapshot_cmd(void)
+static __init int register_snapshot_cmd(void)
{
return register_ftrace_command(&ftrace_snapshot_cmd);
}
#else
-static inline int register_snapshot_cmd(void) { return 0; }
+static inline __init int register_snapshot_cmd(void) { return 0; }
#endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
@@ -6254,6 +6267,17 @@ void trace_init_global_iter(struct trace_iterator *iter)
iter->trace = iter->tr->current_trace;
iter->cpu_file = RING_BUFFER_ALL_CPUS;
iter->trace_buffer = &global_trace.trace_buffer;
+
+ if (iter->trace && iter->trace->open)
+ iter->trace->open(iter);
+
+ /* Annotate start of buffers if we had overruns */
+ if (ring_buffer_overruns(iter->trace_buffer->buffer))
+ iter->iter_flags |= TRACE_FILE_ANNOTATE;
+
+ /* Output in nanoseconds only if we are using a clock in nanoseconds. */
+ if (trace_clocks[iter->tr->clock_id].in_ns)
+ iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
}
void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 73d08aa25b55..ea189e027b80 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -193,8 +193,8 @@ struct trace_array {
#ifdef CONFIG_FTRACE_SYSCALLS
int sys_refcount_enter;
int sys_refcount_exit;
- DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
- DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
+ struct ftrace_event_file __rcu *enter_syscall_files[NR_syscalls];
+ struct ftrace_event_file __rcu *exit_syscall_files[NR_syscalls];
#endif
int stop_count;
int clock_id;
@@ -515,6 +515,7 @@ void tracing_reset_online_cpus(struct trace_buffer *buf);
void tracing_reset_current(int cpu);
void tracing_reset_all_online_cpus(void);
int tracing_open_generic(struct inode *inode, struct file *filp);
+bool tracing_is_disabled(void);
struct dentry *trace_create_file(const char *name,
umode_t mode,
struct dentry *parent,
@@ -712,6 +713,8 @@ extern unsigned long trace_flags;
#define TRACE_GRAPH_PRINT_PROC 0x8
#define TRACE_GRAPH_PRINT_DURATION 0x10
#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
+#define TRACE_GRAPH_PRINT_FILL_SHIFT 28
+#define TRACE_GRAPH_PRINT_FILL_MASK (0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT)
extern enum print_line_t
print_graph_function_flags(struct trace_iterator *iter, u32 flags);
@@ -731,15 +734,16 @@ extern void __trace_graph_return(struct trace_array *tr,
#ifdef CONFIG_DYNAMIC_FTRACE
/* TODO: make this variable */
#define FTRACE_GRAPH_MAX_FUNCS 32
-extern int ftrace_graph_filter_enabled;
extern int ftrace_graph_count;
extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS];
+extern int ftrace_graph_notrace_count;
+extern unsigned long ftrace_graph_notrace_funcs[FTRACE_GRAPH_MAX_FUNCS];
static inline int ftrace_graph_addr(unsigned long addr)
{
int i;
- if (!ftrace_graph_filter_enabled)
+ if (!ftrace_graph_count)
return 1;
for (i = 0; i < ftrace_graph_count; i++) {
@@ -759,11 +763,31 @@ static inline int ftrace_graph_addr(unsigned long addr)
return 0;
}
+
+static inline int ftrace_graph_notrace_addr(unsigned long addr)
+{
+ int i;
+
+ if (!ftrace_graph_notrace_count)
+ return 0;
+
+ for (i = 0; i < ftrace_graph_notrace_count; i++) {
+ if (addr == ftrace_graph_notrace_funcs[i])
+ return 1;
+ }
+
+ return 0;
+}
#else
static inline int ftrace_graph_addr(unsigned long addr)
{
return 1;
}
+
+static inline int ftrace_graph_notrace_addr(unsigned long addr)
+{
+ return 0;
+}
#endif /* CONFIG_DYNAMIC_FTRACE */
#else /* CONFIG_FUNCTION_GRAPH_TRACER */
static inline enum print_line_t
@@ -987,9 +1011,9 @@ struct filter_pred {
extern enum regex_type
filter_parse_regex(char *buff, int len, char **search, int *not);
-extern void print_event_filter(struct ftrace_event_call *call,
+extern void print_event_filter(struct ftrace_event_file *file,
struct trace_seq *s);
-extern int apply_event_filter(struct ftrace_event_call *call,
+extern int apply_event_filter(struct ftrace_event_file *file,
char *filter_string);
extern int apply_subsystem_event_filter(struct ftrace_subsystem_dir *dir,
char *filter_string);
@@ -1000,20 +1024,6 @@ extern int filter_assign_type(const char *type);
struct ftrace_event_field *
trace_find_event_field(struct ftrace_event_call *call, char *name);
-static inline int
-filter_check_discard(struct ftrace_event_call *call, void *rec,
- struct ring_buffer *buffer,
- struct ring_buffer_event *event)
-{
- if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
- !filter_match_preds(call->filter, rec)) {
- ring_buffer_discard_commit(buffer, event);
- return 1;
- }
-
- return 0;
-}
-
extern void trace_event_enable_cmd_record(bool enable);
extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr);
extern int event_trace_del_tracer(struct trace_array *tr);
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index d594da0dc03c..697fb9bac8f0 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -78,7 +78,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
entry->line = f->line;
entry->correct = val == expect;
- if (!filter_check_discard(call, entry, buffer, event))
+ if (!call_filter_check_discard(call, entry, buffer, event))
__buffer_unlock_commit(buffer, event);
out:
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 78e27e3b52ac..e854f420e033 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -24,6 +24,12 @@ static int total_ref_count;
static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
struct perf_event *p_event)
{
+ if (tp_event->perf_perm) {
+ int ret = tp_event->perf_perm(tp_event, p_event);
+ if (ret)
+ return ret;
+ }
+
/* The ftrace function trace is allowed only for root. */
if (ftrace_event_is_function(tp_event) &&
perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
@@ -173,7 +179,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event,
int perf_trace_init(struct perf_event *p_event)
{
struct ftrace_event_call *tp_event;
- int event_id = p_event->attr.config;
+ u64 event_id = p_event->attr.config;
int ret = -EINVAL;
mutex_lock(&event_mutex);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 368a4d50cc30..f919a2e21bf3 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -989,7 +989,7 @@ static ssize_t
event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
loff_t *ppos)
{
- struct ftrace_event_call *call;
+ struct ftrace_event_file *file;
struct trace_seq *s;
int r = -ENODEV;
@@ -1004,12 +1004,12 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
trace_seq_init(s);
mutex_lock(&event_mutex);
- call = event_file_data(filp);
- if (call)
- print_event_filter(call, s);
+ file = event_file_data(filp);
+ if (file)
+ print_event_filter(file, s);
mutex_unlock(&event_mutex);
- if (call)
+ if (file)
r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
kfree(s);
@@ -1021,7 +1021,7 @@ static ssize_t
event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
loff_t *ppos)
{
- struct ftrace_event_call *call;
+ struct ftrace_event_file *file;
char *buf;
int err = -ENODEV;
@@ -1039,9 +1039,9 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
buf[cnt] = '\0';
mutex_lock(&event_mutex);
- call = event_file_data(filp);
- if (call)
- err = apply_event_filter(call, buf);
+ file = event_file_data(filp);
+ if (file)
+ err = apply_event_filter(file, buf);
mutex_unlock(&event_mutex);
free_page((unsigned long) buf);
@@ -1062,6 +1062,9 @@ static int subsystem_open(struct inode *inode, struct file *filp)
struct trace_array *tr;
int ret;
+ if (tracing_is_disabled())
+ return -ENODEV;
+
/* Make sure the system still exists */
mutex_lock(&trace_types_lock);
mutex_lock(&event_mutex);
@@ -1108,6 +1111,9 @@ static int system_tr_open(struct inode *inode, struct file *filp)
struct trace_array *tr = inode->i_private;
int ret;
+ if (tracing_is_disabled())
+ return -ENODEV;
+
if (trace_array_get(tr) < 0)
return -ENODEV;
@@ -1124,11 +1130,12 @@ static int system_tr_open(struct inode *inode, struct file *filp)
if (ret < 0) {
trace_array_put(tr);
kfree(dir);
+ return ret;
}
filp->private_data = dir;
- return ret;
+ return 0;
}
static int subsystem_release(struct inode *inode, struct file *file)
@@ -1539,7 +1546,7 @@ event_create_dir(struct dentry *parent, struct ftrace_event_file *file)
return -1;
}
}
- trace_create_file("filter", 0644, file->dir, call,
+ trace_create_file("filter", 0644, file->dir, file,
&ftrace_event_filter_fops);
trace_create_file("format", 0444, file->dir, call,
@@ -1577,6 +1584,7 @@ static void event_remove(struct ftrace_event_call *call)
if (file->event_call != call)
continue;
ftrace_event_enable_disable(file, 0);
+ destroy_preds(file);
/*
* The do_for_each_event_file() is
* a double loop. After finding the call for this
@@ -1700,7 +1708,7 @@ static void __trace_remove_event_call(struct ftrace_event_call *call)
{
event_remove(call);
trace_destroy_fields(call);
- destroy_preds(call);
+ destroy_call_preds(call);
}
static int probe_remove_event_call(struct ftrace_event_call *call)
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 97daa8cf958d..2468f56dc5db 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -637,10 +637,18 @@ static void append_filter_err(struct filter_parse_state *ps,
free_page((unsigned long) buf);
}
+static inline struct event_filter *event_filter(struct ftrace_event_file *file)
+{
+ if (file->event_call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
+ return file->event_call->filter;
+ else
+ return file->filter;
+}
+
/* caller must hold event_mutex */
-void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
+void print_event_filter(struct ftrace_event_file *file, struct trace_seq *s)
{
- struct event_filter *filter = call->filter;
+ struct event_filter *filter = event_filter(file);
if (filter && filter->filter_string)
trace_seq_printf(s, "%s\n", filter->filter_string);
@@ -766,11 +774,21 @@ static void __free_preds(struct event_filter *filter)
filter->n_preds = 0;
}
-static void filter_disable(struct ftrace_event_call *call)
+static void call_filter_disable(struct ftrace_event_call *call)
{
call->flags &= ~TRACE_EVENT_FL_FILTERED;
}
+static void filter_disable(struct ftrace_event_file *file)
+{
+ struct ftrace_event_call *call = file->event_call;
+
+ if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
+ call_filter_disable(call);
+ else
+ file->flags &= ~FTRACE_EVENT_FL_FILTERED;
+}
+
static void __free_filter(struct event_filter *filter)
{
if (!filter)
@@ -781,16 +799,30 @@ static void __free_filter(struct event_filter *filter)
kfree(filter);
}
+void destroy_call_preds(struct ftrace_event_call *call)
+{
+ __free_filter(call->filter);
+ call->filter = NULL;
+}
+
+static void destroy_file_preds(struct ftrace_event_file *file)
+{
+ __free_filter(file->filter);
+ file->filter = NULL;
+}
+
/*
- * Called when destroying the ftrace_event_call.
- * The call is being freed, so we do not need to worry about
- * the call being currently used. This is for module code removing
+ * Called when destroying the ftrace_event_file.
+ * The file is being freed, so we do not need to worry about
+ * the file being currently used. This is for module code removing
* the tracepoints from within it.
*/
-void destroy_preds(struct ftrace_event_call *call)
+void destroy_preds(struct ftrace_event_file *file)
{
- __free_filter(call->filter);
- call->filter = NULL;
+ if (file->event_call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
+ destroy_call_preds(file->event_call);
+ else
+ destroy_file_preds(file);
}
static struct event_filter *__alloc_filter(void)
@@ -825,28 +857,56 @@ static int __alloc_preds(struct event_filter *filter, int n_preds)
return 0;
}
-static void filter_free_subsystem_preds(struct event_subsystem *system)
+static inline void __remove_filter(struct ftrace_event_file *file)
{
+ struct ftrace_event_call *call = file->event_call;
+
+ filter_disable(file);
+ if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
+ remove_filter_string(call->filter);
+ else
+ remove_filter_string(file->filter);
+}
+
+static void filter_free_subsystem_preds(struct event_subsystem *system,
+ struct trace_array *tr)
+{
+ struct ftrace_event_file *file;
struct ftrace_event_call *call;
- list_for_each_entry(call, &ftrace_events, list) {
+ list_for_each_entry(file, &tr->events, list) {
+ call = file->event_call;
if (strcmp(call->class->system, system->name) != 0)
continue;
- filter_disable(call);
- remove_filter_string(call->filter);
+ __remove_filter(file);
}
}
-static void filter_free_subsystem_filters(struct event_subsystem *system)
+static inline void __free_subsystem_filter(struct ftrace_event_file *file)
{
+ struct ftrace_event_call *call = file->event_call;
+
+ if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) {
+ __free_filter(call->filter);
+ call->filter = NULL;
+ } else {
+ __free_filter(file->filter);
+ file->filter = NULL;
+ }
+}
+
+static void filter_free_subsystem_filters(struct event_subsystem *system,
+ struct trace_array *tr)
+{
+ struct ftrace_event_file *file;
struct ftrace_event_call *call;
- list_for_each_entry(call, &ftrace_events, list) {
+ list_for_each_entry(file, &tr->events, list) {
+ call = file->event_call;
if (strcmp(call->class->system, system->name) != 0)
continue;
- __free_filter(call->filter);
- call->filter = NULL;
+ __free_subsystem_filter(file);
}
}
@@ -1617,15 +1677,85 @@ fail:
return err;
}
+static inline void event_set_filtered_flag(struct ftrace_event_file *file)
+{
+ struct ftrace_event_call *call = file->event_call;
+
+ if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
+ call->flags |= TRACE_EVENT_FL_FILTERED;
+ else
+ file->flags |= FTRACE_EVENT_FL_FILTERED;
+}
+
+static inline void event_set_filter(struct ftrace_event_file *file,
+ struct event_filter *filter)
+{
+ struct ftrace_event_call *call = file->event_call;
+
+ if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
+ rcu_assign_pointer(call->filter, filter);
+ else
+ rcu_assign_pointer(file->filter, filter);
+}
+
+static inline void event_clear_filter(struct ftrace_event_file *file)
+{
+ struct ftrace_event_call *call = file->event_call;
+
+ if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
+ RCU_INIT_POINTER(call->filter, NULL);
+ else
+ RCU_INIT_POINTER(file->filter, NULL);
+}
+
+static inline void
+event_set_no_set_filter_flag(struct ftrace_event_file *file)
+{
+ struct ftrace_event_call *call = file->event_call;
+
+ if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
+ call->flags |= TRACE_EVENT_FL_NO_SET_FILTER;
+ else
+ file->flags |= FTRACE_EVENT_FL_NO_SET_FILTER;
+}
+
+static inline void
+event_clear_no_set_filter_flag(struct ftrace_event_file *file)
+{
+ struct ftrace_event_call *call = file->event_call;
+
+ if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
+ call->flags &= ~TRACE_EVENT_FL_NO_SET_FILTER;
+ else
+ file->flags &= ~FTRACE_EVENT_FL_NO_SET_FILTER;
+}
+
+static inline bool
+event_no_set_filter_flag(struct ftrace_event_file *file)
+{
+ struct ftrace_event_call *call = file->event_call;
+
+ if (file->flags & FTRACE_EVENT_FL_NO_SET_FILTER)
+ return true;
+
+ if ((call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) &&
+ (call->flags & TRACE_EVENT_FL_NO_SET_FILTER))
+ return true;
+
+ return false;
+}
+
struct filter_list {
struct list_head list;
struct event_filter *filter;
};
static int replace_system_preds(struct event_subsystem *system,
+ struct trace_array *tr,
struct filter_parse_state *ps,
char *filter_string)
{
+ struct ftrace_event_file *file;
struct ftrace_event_call *call;
struct filter_list *filter_item;
struct filter_list *tmp;
@@ -1633,8 +1763,8 @@ static int replace_system_preds(struct event_subsystem *system,
bool fail = true;
int err;
- list_for_each_entry(call, &ftrace_events, list) {
-
+ list_for_each_entry(file, &tr->events, list) {
+ call = file->event_call;
if (strcmp(call->class->system, system->name) != 0)
continue;
@@ -1644,18 +1774,20 @@ static int replace_system_preds(struct event_subsystem *system,
*/
err = replace_preds(call, NULL, ps, filter_string, true);
if (err)
- call->flags |= TRACE_EVENT_FL_NO_SET_FILTER;
+ event_set_no_set_filter_flag(file);
else
- call->flags &= ~TRACE_EVENT_FL_NO_SET_FILTER;
+ event_clear_no_set_filter_flag(file);
}
- list_for_each_entry(call, &ftrace_events, list) {
+ list_for_each_entry(file, &tr->events, list) {
struct event_filter *filter;
+ call = file->event_call;
+
if (strcmp(call->class->system, system->name) != 0)
continue;
- if (call->flags & TRACE_EVENT_FL_NO_SET_FILTER)
+ if (event_no_set_filter_flag(file))
continue;
filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL);
@@ -1676,17 +1808,17 @@ static int replace_system_preds(struct event_subsystem *system,
err = replace_preds(call, filter, ps, filter_string, false);
if (err) {
- filter_disable(call);
+ filter_disable(file);
parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
append_filter_err(ps, filter);
} else
- call->flags |= TRACE_EVENT_FL_FILTERED;
+ event_set_filtered_flag(file);
/*
* Regardless of if this returned an error, we still
* replace the filter for the call.
*/
- filter = call->filter;
- rcu_assign_pointer(call->filter, filter_item->filter);
+ filter = event_filter(file);
+ event_set_filter(file, filter_item->filter);
filter_item->filter = filter;
fail = false;
@@ -1816,6 +1948,7 @@ static int create_filter(struct ftrace_event_call *call,
* and always remembers @filter_str.
*/
static int create_system_filter(struct event_subsystem *system,
+ struct trace_array *tr,
char *filter_str, struct event_filter **filterp)
{
struct event_filter *filter = NULL;
@@ -1824,7 +1957,7 @@ static int create_system_filter(struct event_subsystem *system,
err = create_filter_start(filter_str, true, &ps, &filter);
if (!err) {
- err = replace_system_preds(system, ps, filter_str);
+ err = replace_system_preds(system, tr, ps, filter_str);
if (!err) {
/* System filters just show a default message */
kfree(filter->filter_string);
@@ -1840,20 +1973,25 @@ static int create_system_filter(struct event_subsystem *system,
}
/* caller must hold event_mutex */
-int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
+int apply_event_filter(struct ftrace_event_file *file, char *filter_string)
{
+ struct ftrace_event_call *call = file->event_call;
struct event_filter *filter;
int err;
if (!strcmp(strstrip(filter_string), "0")) {
- filter_disable(call);
- filter = call->filter;
+ filter_disable(file);
+ filter = event_filter(file);
+
if (!filter)
return 0;
- RCU_INIT_POINTER(call->filter, NULL);
+
+ event_clear_filter(file);
+
/* Make sure the filter is not being used */
synchronize_sched();
__free_filter(filter);
+
return 0;
}
@@ -1866,14 +2004,15 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
* string
*/
if (filter) {
- struct event_filter *tmp = call->filter;
+ struct event_filter *tmp;
+ tmp = event_filter(file);
if (!err)
- call->flags |= TRACE_EVENT_FL_FILTERED;
+ event_set_filtered_flag(file);
else
- filter_disable(call);
+ filter_disable(file);
- rcu_assign_pointer(call->filter, filter);
+ event_set_filter(file, filter);
if (tmp) {
/* Make sure the call is done with the filter */
@@ -1889,6 +2028,7 @@ int apply_subsystem_event_filter(struct ftrace_subsystem_dir *dir,
char *filter_string)
{
struct event_subsystem *system = dir->subsystem;
+ struct trace_array *tr = dir->tr;
struct event_filter *filter;
int err = 0;
@@ -1901,18 +2041,18 @@ int apply_subsystem_event_filter(struct ftrace_subsystem_dir *dir,
}
if (!strcmp(strstrip(filter_string), "0")) {
- filter_free_subsystem_preds(system);
+ filter_free_subsystem_preds(system, tr);
remove_filter_string(system->filter);
filter = system->filter;
system->filter = NULL;
/* Ensure all filters are no longer used */
synchronize_sched();
- filter_free_subsystem_filters(system);
+ filter_free_subsystem_filters(system, tr);
__free_filter(filter);
goto out_unlock;
}
- err = create_system_filter(system, filter_string, &filter);
+ err = create_system_filter(system, tr, filter_string, &filter);
if (filter) {
/*
* No event actually uses the system filter
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index d21a74670088..7c3e3e72e2b6 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -180,7 +180,7 @@ struct ftrace_event_call __used event_##call = { \
.event.type = etype, \
.class = &event_class_ftrace_##call, \
.print_fmt = print, \
- .flags = TRACE_EVENT_FL_IGNORE_ENABLE, \
+ .flags = TRACE_EVENT_FL_IGNORE_ENABLE | TRACE_EVENT_FL_USE_CALL_FILTER, \
}; \
struct ftrace_event_call __used \
__attribute__((section("_ftrace_events"))) *__event_##call = &event_##call;
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index b5c09242683d..0b99120d395c 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -82,9 +82,9 @@ static struct trace_array *graph_array;
* to fill in space into DURATION column.
*/
enum {
- DURATION_FILL_FULL = -1,
- DURATION_FILL_START = -2,
- DURATION_FILL_END = -3,
+ FLAGS_FILL_FULL = 1 << TRACE_GRAPH_PRINT_FILL_SHIFT,
+ FLAGS_FILL_START = 2 << TRACE_GRAPH_PRINT_FILL_SHIFT,
+ FLAGS_FILL_END = 3 << TRACE_GRAPH_PRINT_FILL_SHIFT,
};
static enum print_line_t
@@ -114,16 +114,37 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
return -EBUSY;
}
+ /*
+ * The curr_ret_stack is an index to ftrace return stack of
+ * current task. Its value should be in [0, FTRACE_RETFUNC_
+ * DEPTH) when the function graph tracer is used. To support
+ * filtering out specific functions, it makes the index
+ * negative by subtracting huge value (FTRACE_NOTRACE_DEPTH)
+ * so when it sees a negative index the ftrace will ignore
+ * the record. And the index gets recovered when returning
+ * from the filtered function by adding the FTRACE_NOTRACE_
+ * DEPTH and then it'll continue to record functions normally.
+ *
+ * The curr_ret_stack is initialized to -1 and get increased
+ * in this function. So it can be less than -1 only if it was
+ * filtered out via ftrace_graph_notrace_addr() which can be
+ * set from set_graph_notrace file in debugfs by user.
+ */
+ if (current->curr_ret_stack < -1)
+ return -EBUSY;
+
calltime = trace_clock_local();
index = ++current->curr_ret_stack;
+ if (ftrace_graph_notrace_addr(func))
+ current->curr_ret_stack -= FTRACE_NOTRACE_DEPTH;
barrier();
current->ret_stack[index].ret = ret;
current->ret_stack[index].func = func;
current->ret_stack[index].calltime = calltime;
current->ret_stack[index].subtime = 0;
current->ret_stack[index].fp = frame_pointer;
- *depth = index;
+ *depth = current->curr_ret_stack;
return 0;
}
@@ -137,7 +158,17 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
index = current->curr_ret_stack;
- if (unlikely(index < 0)) {
+ /*
+ * A negative index here means that it's just returned from a
+ * notrace'd function. Recover index to get an original
+ * return address. See ftrace_push_return_trace().
+ *
+ * TODO: Need to check whether the stack gets corrupted.
+ */
+ if (index < 0)
+ index += FTRACE_NOTRACE_DEPTH;
+
+ if (unlikely(index < 0 || index >= FTRACE_RETFUNC_DEPTH)) {
ftrace_graph_stop();
WARN_ON(1);
/* Might as well panic, otherwise we have no where to go */
@@ -193,6 +224,15 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
trace.rettime = trace_clock_local();
barrier();
current->curr_ret_stack--;
+ /*
+ * The curr_ret_stack can be less than -1 only if it was
+ * filtered out and it's about to return from the function.
+ * Recover the index and continue to trace normal functions.
+ */
+ if (current->curr_ret_stack < -1) {
+ current->curr_ret_stack += FTRACE_NOTRACE_DEPTH;
+ return ret;
+ }
/*
* The trace should run after decrementing the ret counter
@@ -230,7 +270,7 @@ int __trace_graph_entry(struct trace_array *tr,
return 0;
entry = ring_buffer_event_data(event);
entry->graph_ent = *trace;
- if (!filter_current_check_discard(buffer, call, entry, event))
+ if (!call_filter_check_discard(call, entry, buffer, event))
__buffer_unlock_commit(buffer, event);
return 1;
@@ -259,10 +299,20 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
/* trace it when it is-nested-in or is a function enabled. */
if ((!(trace->depth || ftrace_graph_addr(trace->func)) ||
- ftrace_graph_ignore_irqs()) ||
+ ftrace_graph_ignore_irqs()) || (trace->depth < 0) ||
(max_depth && trace->depth >= max_depth))
return 0;
+ /*
+ * Do not trace a function if it's filtered by set_graph_notrace.
+ * Make the index of ret stack negative to indicate that it should
+ * ignore further functions. But it needs its own ret stack entry
+ * to recover the original index in order to continue tracing after
+ * returning from the function.
+ */
+ if (ftrace_graph_notrace_addr(trace->func))
+ return 1;
+
local_irq_save(flags);
cpu = raw_smp_processor_id();
data = per_cpu_ptr(tr->trace_buffer.data, cpu);
@@ -335,7 +385,7 @@ void __trace_graph_return(struct trace_array *tr,
return;
entry = ring_buffer_event_data(event);
entry->ret = *trace;
- if (!filter_current_check_discard(buffer, call, entry, event))
+ if (!call_filter_check_discard(call, entry, buffer, event))
__buffer_unlock_commit(buffer, event);
}
@@ -652,7 +702,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
}
/* No overhead */
- ret = print_graph_duration(DURATION_FILL_START, s, flags);
+ ret = print_graph_duration(0, s, flags | FLAGS_FILL_START);
if (ret != TRACE_TYPE_HANDLED)
return ret;
@@ -664,7 +714,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
- ret = print_graph_duration(DURATION_FILL_END, s, flags);
+ ret = print_graph_duration(0, s, flags | FLAGS_FILL_END);
if (ret != TRACE_TYPE_HANDLED)
return ret;
@@ -729,14 +779,14 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s,
return TRACE_TYPE_HANDLED;
/* No real adata, just filling the column with spaces */
- switch (duration) {
- case DURATION_FILL_FULL:
+ switch (flags & TRACE_GRAPH_PRINT_FILL_MASK) {
+ case FLAGS_FILL_FULL:
ret = trace_seq_puts(s, " | ");
return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
- case DURATION_FILL_START:
+ case FLAGS_FILL_START:
ret = trace_seq_puts(s, " ");
return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
- case DURATION_FILL_END:
+ case FLAGS_FILL_END:
ret = trace_seq_puts(s, " |");
return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
}
@@ -852,7 +902,7 @@ print_graph_entry_nested(struct trace_iterator *iter,
}
/* No time */
- ret = print_graph_duration(DURATION_FILL_FULL, s, flags);
+ ret = print_graph_duration(0, s, flags | FLAGS_FILL_FULL);
if (ret != TRACE_TYPE_HANDLED)
return ret;
@@ -1172,7 +1222,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
return TRACE_TYPE_PARTIAL_LINE;
/* No time */
- ret = print_graph_duration(DURATION_FILL_FULL, s, flags);
+ ret = print_graph_duration(0, s, flags | FLAGS_FILL_FULL);
if (ret != TRACE_TYPE_HANDLED)
return ret;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 243f6834d026..dae9541ada9e 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -835,7 +835,7 @@ __kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs,
entry->ip = (unsigned long)tp->rp.kp.addr;
store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
- if (!filter_current_check_discard(buffer, call, entry, event))
+ if (!filter_check_discard(ftrace_file, entry, buffer, event))
trace_buffer_unlock_commit_regs(buffer, event,
irq_flags, pc, regs);
}
@@ -884,7 +884,7 @@ __kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri,
entry->ret_ip = (unsigned long)ri->ret_addr;
store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
- if (!filter_current_check_discard(buffer, call, entry, event))
+ if (!filter_check_discard(ftrace_file, entry, buffer, event))
trace_buffer_unlock_commit_regs(buffer, event,
irq_flags, pc, regs);
}
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index b3dcfb2f0fef..0abd9b863474 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -323,7 +323,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
entry = ring_buffer_event_data(event);
entry->rw = *rw;
- if (!filter_check_discard(call, entry, buffer, event))
+ if (!call_filter_check_discard(call, entry, buffer, event))
trace_buffer_unlock_commit(buffer, event, 0, pc);
}
@@ -353,7 +353,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
entry = ring_buffer_event_data(event);
entry->map = *map;
- if (!filter_check_discard(call, entry, buffer, event))
+ if (!call_filter_check_discard(call, entry, buffer, event))
trace_buffer_unlock_commit(buffer, event, 0, pc);
}
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 4e98e3b257a3..3f34dc9b40f3 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -45,7 +45,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
entry->next_state = next->state;
entry->next_cpu = task_cpu(next);
- if (!filter_check_discard(call, entry, buffer, event))
+ if (!call_filter_check_discard(call, entry, buffer, event))
trace_buffer_unlock_commit(buffer, event, flags, pc);
}
@@ -101,7 +101,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
entry->next_state = wakee->state;
entry->next_cpu = task_cpu(wakee);
- if (!filter_check_discard(call, entry, buffer, event))
+ if (!call_filter_check_discard(call, entry, buffer, event))
trace_buffer_unlock_commit(buffer, event, flags, pc);
}
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index 847f88a6194b..7af67360b330 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -43,46 +43,15 @@ static DEFINE_MUTEX(all_stat_sessions_mutex);
/* The root directory for all stat files */
static struct dentry *stat_dir;
-/*
- * Iterate through the rbtree using a post order traversal path
- * to release the next node.
- * It won't necessary release one at each iteration
- * but it will at least advance closer to the next one
- * to be released.
- */
-static struct rb_node *release_next(struct tracer_stat *ts,
- struct rb_node *node)
+static void __reset_stat_session(struct stat_session *session)
{
- struct stat_node *snode;
- struct rb_node *parent = rb_parent(node);
-
- if (node->rb_left)
- return node->rb_left;
- else if (node->rb_right)
- return node->rb_right;
- else {
- if (!parent)
- ;
- else if (parent->rb_left == node)
- parent->rb_left = NULL;
- else
- parent->rb_right = NULL;
+ struct stat_node *snode, *n;
- snode = container_of(node, struct stat_node, node);
- if (ts->stat_release)
- ts->stat_release(snode->stat);
+ rbtree_postorder_for_each_entry_safe(snode, n, &session->stat_root, node) {
+ if (session->ts->stat_release)
+ session->ts->stat_release(snode->stat);
kfree(snode);
-
- return parent;
}
-}
-
-static void __reset_stat_session(struct stat_session *session)
-{
- struct rb_node *node = session->stat_root.rb_node;
-
- while (node)
- node = release_next(session->ts, node);
session->stat_root = RB_ROOT;
}
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 559329d9bd2f..e4b6d11bdf78 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -302,6 +302,7 @@ static int __init syscall_exit_define_fields(struct ftrace_event_call *call)
static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
{
struct trace_array *tr = data;
+ struct ftrace_event_file *ftrace_file;
struct syscall_trace_enter *entry;
struct syscall_metadata *sys_data;
struct ring_buffer_event *event;
@@ -314,7 +315,13 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
syscall_nr = trace_get_syscall_nr(current, regs);
if (syscall_nr < 0)
return;
- if (!test_bit(syscall_nr, tr->enabled_enter_syscalls))
+
+ /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */
+ ftrace_file = rcu_dereference_sched(tr->enter_syscall_files[syscall_nr]);
+ if (!ftrace_file)
+ return;
+
+ if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags))
return;
sys_data = syscall_nr_to_meta(syscall_nr);
@@ -336,8 +343,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
entry->nr = syscall_nr;
syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
- if (!filter_current_check_discard(buffer, sys_data->enter_event,
- entry, event))
+ if (!filter_check_discard(ftrace_file, entry, buffer, event))
trace_current_buffer_unlock_commit(buffer, event,
irq_flags, pc);
}
@@ -345,6 +351,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
{
struct trace_array *tr = data;
+ struct ftrace_event_file *ftrace_file;
struct syscall_trace_exit *entry;
struct syscall_metadata *sys_data;
struct ring_buffer_event *event;
@@ -356,7 +363,13 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
syscall_nr = trace_get_syscall_nr(current, regs);
if (syscall_nr < 0)
return;
- if (!test_bit(syscall_nr, tr->enabled_exit_syscalls))
+
+ /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */
+ ftrace_file = rcu_dereference_sched(tr->exit_syscall_files[syscall_nr]);
+ if (!ftrace_file)
+ return;
+
+ if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags))
return;
sys_data = syscall_nr_to_meta(syscall_nr);
@@ -377,8 +390,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
entry->nr = syscall_nr;
entry->ret = syscall_get_return_value(current, regs);
- if (!filter_current_check_discard(buffer, sys_data->exit_event,
- entry, event))
+ if (!filter_check_discard(ftrace_file, entry, buffer, event))
trace_current_buffer_unlock_commit(buffer, event,
irq_flags, pc);
}
@@ -397,7 +409,7 @@ static int reg_event_syscall_enter(struct ftrace_event_file *file,
if (!tr->sys_refcount_enter)
ret = register_trace_sys_enter(ftrace_syscall_enter, tr);
if (!ret) {
- set_bit(num, tr->enabled_enter_syscalls);
+ rcu_assign_pointer(tr->enter_syscall_files[num], file);
tr->sys_refcount_enter++;
}
mutex_unlock(&syscall_trace_lock);
@@ -415,10 +427,15 @@ static void unreg_event_syscall_enter(struct ftrace_event_file *file,
return;
mutex_lock(&syscall_trace_lock);
tr->sys_refcount_enter--;
- clear_bit(num, tr->enabled_enter_syscalls);
+ rcu_assign_pointer(tr->enter_syscall_files[num], NULL);
if (!tr->sys_refcount_enter)
unregister_trace_sys_enter(ftrace_syscall_enter, tr);
mutex_unlock(&syscall_trace_lock);
+ /*
+ * Callers expect the event to be completely disabled on
+ * return, so wait for current handlers to finish.
+ */
+ synchronize_sched();
}
static int reg_event_syscall_exit(struct ftrace_event_file *file,
@@ -435,7 +452,7 @@ static int reg_event_syscall_exit(struct ftrace_event_file *file,
if (!tr->sys_refcount_exit)
ret = register_trace_sys_exit(ftrace_syscall_exit, tr);
if (!ret) {
- set_bit(num, tr->enabled_exit_syscalls);
+ rcu_assign_pointer(tr->exit_syscall_files[num], file);
tr->sys_refcount_exit++;
}
mutex_unlock(&syscall_trace_lock);
@@ -453,10 +470,15 @@ static void unreg_event_syscall_exit(struct ftrace_event_file *file,
return;
mutex_lock(&syscall_trace_lock);
tr->sys_refcount_exit--;
- clear_bit(num, tr->enabled_exit_syscalls);
+ rcu_assign_pointer(tr->exit_syscall_files[num], NULL);
if (!tr->sys_refcount_exit)
unregister_trace_sys_exit(ftrace_syscall_exit, tr);
mutex_unlock(&syscall_trace_lock);
+ /*
+ * Callers expect the event to be completely disabled on
+ * return, so wait for current handlers to finish.
+ */
+ synchronize_sched();
}
static int __init init_syscall_trace(struct ftrace_event_call *call)
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 272261b5f94f..b6dcc42ef7f5 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -128,6 +128,7 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret)
if (is_ret)
tu->consumer.ret_handler = uretprobe_dispatcher;
init_trace_uprobe_filter(&tu->filter);
+ tu->call.flags |= TRACE_EVENT_FL_USE_CALL_FILTER;
return tu;
error:
@@ -561,7 +562,7 @@ static void uprobe_trace_print(struct trace_uprobe *tu,
for (i = 0; i < tu->nr_args; i++)
call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
- if (!filter_current_check_discard(buffer, call, entry, event))
+ if (!call_filter_check_discard(call, entry, buffer, event))
trace_buffer_unlock_commit(buffer, event, 0, 0);
}
diff --git a/kernel/up.c b/kernel/up.c
index 630d72bf7e41..509403e3fbc6 100644
--- a/kernel/up.c
+++ b/kernel/up.c
@@ -22,6 +22,17 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
}
EXPORT_SYMBOL(smp_call_function_single);
+void __smp_call_function_single(int cpu, struct call_single_data *csd,
+ int wait)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ csd->func(csd->info);
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL(__smp_call_function_single);
+
int on_each_cpu(smp_call_func_t func, void *info, int wait)
{
unsigned long flags;
diff --git a/kernel/user.c b/kernel/user.c
index 5bbb91988e69..a3a0dbfda329 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -51,6 +51,10 @@ struct user_namespace init_user_ns = {
.owner = GLOBAL_ROOT_UID,
.group = GLOBAL_ROOT_GID,
.proc_inum = PROC_USER_INIT_INO,
+#ifdef CONFIG_KEYS_KERBEROS_CACHE
+ .krb_cache_register_sem =
+ __RWSEM_INITIALIZER(init_user_ns.krb_cache_register_sem),
+#endif
};
EXPORT_SYMBOL_GPL(init_user_ns);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 13fb1134ba58..240fb62cf394 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -101,6 +101,9 @@ int create_user_ns(struct cred *new)
set_cred_user_ns(new, ns);
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+ init_rwsem(&ns->persistent_keyring_register_sem);
+#endif
return 0;
}
@@ -130,6 +133,9 @@ void free_user_ns(struct user_namespace *ns)
do {
parent = ns->parent;
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+ key_put(ns->persistent_keyring_register);
+#endif
proc_free_inum(ns->proc_inum);
kmem_cache_free(user_ns_cachep, ns);
ns = parent;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 987293d03ebc..c66912be990f 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -305,6 +305,9 @@ static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);
/* I: attributes used when instantiating standard unbound pools on demand */
static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
+/* I: attributes used when instantiating ordered pools on demand */
+static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS];
+
struct workqueue_struct *system_wq __read_mostly;
EXPORT_SYMBOL(system_wq);
struct workqueue_struct *system_highpri_wq __read_mostly;
@@ -518,14 +521,21 @@ static inline void debug_work_activate(struct work_struct *work) { }
static inline void debug_work_deactivate(struct work_struct *work) { }
#endif
-/* allocate ID and assign it to @pool */
+/**
+ * worker_pool_assign_id - allocate ID and assing it to @pool
+ * @pool: the pool pointer of interest
+ *
+ * Returns 0 if ID in [0, WORK_OFFQ_POOL_NONE) is allocated and assigned
+ * successfully, -errno on failure.
+ */
static int worker_pool_assign_id(struct worker_pool *pool)
{
int ret;
lockdep_assert_held(&wq_pool_mutex);
- ret = idr_alloc(&worker_pool_idr, pool, 0, 0, GFP_KERNEL);
+ ret = idr_alloc(&worker_pool_idr, pool, 0, WORK_OFFQ_POOL_NONE,
+ GFP_KERNEL);
if (ret >= 0) {
pool->id = ret;
return 0;
@@ -1320,7 +1330,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
debug_work_activate(work);
- /* if dying, only works from the same workqueue are allowed */
+ /* if draining, only works from the same workqueue are allowed */
if (unlikely(wq->flags & __WQ_DRAINING) &&
WARN_ON_ONCE(!is_chained_work(wq)))
return;
@@ -1736,16 +1746,17 @@ static struct worker *create_worker(struct worker_pool *pool)
if (IS_ERR(worker->task))
goto fail;
+ set_user_nice(worker->task, pool->attrs->nice);
+
+ /* prevent userland from meddling with cpumask of workqueue workers */
+ worker->task->flags |= PF_NO_SETAFFINITY;
+
/*
* set_cpus_allowed_ptr() will fail if the cpumask doesn't have any
* online CPUs. It'll be re-applied when any of the CPUs come up.
*/
- set_user_nice(worker->task, pool->attrs->nice);
set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
- /* prevent userland from meddling with cpumask of workqueue workers */
- worker->task->flags |= PF_NO_SETAFFINITY;
-
/*
* The caller is responsible for ensuring %POOL_DISASSOCIATED
* remains stable across this function. See the comments above the
@@ -4106,7 +4117,7 @@ out_unlock:
static int alloc_and_link_pwqs(struct workqueue_struct *wq)
{
bool highpri = wq->flags & WQ_HIGHPRI;
- int cpu;
+ int cpu, ret;
if (!(wq->flags & WQ_UNBOUND)) {
wq->cpu_pwqs = alloc_percpu(struct pool_workqueue);
@@ -4126,6 +4137,13 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
mutex_unlock(&wq->mutex);
}
return 0;
+ } else if (wq->flags & __WQ_ORDERED) {
+ ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
+ /* there should only be single pwq for ordering guarantee */
+ WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node ||
+ wq->pwqs.prev != &wq->dfl_pwq->pwqs_node),
+ "ordering guarantee broken for workqueue %s\n", wq->name);
+ return ret;
} else {
return apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
}
@@ -5009,10 +5027,6 @@ static int __init init_workqueues(void)
int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
int i, cpu;
- /* make sure we have enough bits for OFFQ pool ID */
- BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT)) <
- WORK_CPU_END * NR_STD_WORKER_POOLS);
-
WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
@@ -5051,13 +5065,23 @@ static int __init init_workqueues(void)
}
}
- /* create default unbound wq attrs */
+ /* create default unbound and ordered wq attrs */
for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
struct workqueue_attrs *attrs;
BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
attrs->nice = std_nice[i];
unbound_std_wq_attrs[i] = attrs;
+
+ /*
+ * An ordered wq should have only one pwq as ordering is
+ * guaranteed by max_active which is enforced by pwqs.
+ * Turn off NUMA so that dfl_pwq is used for all nodes.
+ */
+ BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
+ attrs->nice = std_nice[i];
+ attrs->no_numa = true;
+ ordered_wq_attrs[i] = attrs;
}
system_wq = alloc_workqueue("events", 0, 0);