From 42d35d48ce7cefb9429880af19d1c329d1554e7a Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Mon, 29 Dec 2008 15:49:53 -0800 Subject: futex: make futex_(get|put)_key() calls symmetric Impact: cleanup This patch makes the calls to futex_get_key_refs() and futex_drop_key_refs() explicitly symmetric by only "putting" keys we successfully "got". Also cleanup a couple return points that didn't "put" after a successful "get". Build and boot tested on an x86_64 system. Signed-off-by: Darren Hart Cc: Signed-off-by: Ingo Molnar --- kernel/futex.c | 67 +++++++++++++++++++++++++++++++--------------------------- 1 file changed, 36 insertions(+), 31 deletions(-) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index b4f87bac91c1..c5ac55cc0c16 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -723,8 +723,8 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) } spin_unlock(&hb->lock); -out: put_futex_key(fshared, &key); +out: return ret; } @@ -748,7 +748,7 @@ retryfull: goto out; ret = get_futex_key(uaddr2, fshared, &key2); if (unlikely(ret != 0)) - goto out; + goto out_put_key1; hb1 = hash_futex(&key1); hb2 = hash_futex(&key2); @@ -770,12 +770,12 @@ retry: * but we might get them from range checking */ ret = op_ret; - goto out; + goto out_put_keys; #endif if (unlikely(op_ret != -EFAULT)) { ret = op_ret; - goto out; + goto out_put_keys; } /* @@ -789,7 +789,7 @@ retry: ret = futex_handle_fault((unsigned long)uaddr2, attempt); if (ret) - goto out; + goto out_put_keys; goto retry; } @@ -827,10 +827,11 @@ retry: spin_unlock(&hb1->lock); if (hb1 != hb2) spin_unlock(&hb2->lock); -out: +out_put_keys: put_futex_key(fshared, &key2); +out_put_key1: put_futex_key(fshared, &key1); - +out: return ret; } @@ -847,13 +848,13 @@ static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, struct futex_q *this, *next; int ret, drop_count = 0; - retry: +retry: ret = get_futex_key(uaddr1, fshared, &key1); if (unlikely(ret != 0)) goto out; ret = get_futex_key(uaddr2, fshared, &key2); if (unlikely(ret != 0)) - goto out; + goto out_put_key1; hb1 = hash_futex(&key1); hb2 = hash_futex(&key2); @@ -875,7 +876,7 @@ static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, if (!ret) goto retry; - return ret; + goto out_put_keys; } if (curval != *cmpval) { ret = -EAGAIN; @@ -920,9 +921,11 @@ out_unlock: while (--drop_count >= 0) drop_futex_key_refs(&key1); -out: +out_put_keys: put_futex_key(fshared, &key2); +out_put_key1: put_futex_key(fshared, &key1); +out: return ret; } @@ -983,7 +986,7 @@ static int unqueue_me(struct futex_q *q) int ret = 0; /* In the common case we don't take the spinlock, which is nice. */ - retry: +retry: lock_ptr = q->lock_ptr; barrier(); if (lock_ptr != NULL) { @@ -1165,11 +1168,11 @@ static int futex_wait(u32 __user *uaddr, int fshared, q.pi_state = NULL; q.bitset = bitset; - retry: +retry: q.key = FUTEX_KEY_INIT; ret = get_futex_key(uaddr, fshared, &q.key); if (unlikely(ret != 0)) - goto out_release_sem; + goto out; hb = queue_lock(&q); @@ -1197,6 +1200,7 @@ static int futex_wait(u32 __user *uaddr, int fshared, if (unlikely(ret)) { queue_unlock(&q, hb); + put_futex_key(fshared, &q.key); ret = get_user(uval, uaddr); @@ -1206,7 +1210,7 @@ static int futex_wait(u32 __user *uaddr, int fshared, } ret = -EWOULDBLOCK; if (uval != val) - goto out_unlock_release_sem; + goto out_unlock_put_key; /* Only actually queue if *uaddr contained val. */ queue_me(&q, hb); @@ -1298,11 +1302,11 @@ static int futex_wait(u32 __user *uaddr, int fshared, return -ERESTART_RESTARTBLOCK; } - out_unlock_release_sem: +out_unlock_put_key: queue_unlock(&q, hb); - - out_release_sem: put_futex_key(fshared, &q.key); + +out: return ret; } @@ -1351,16 +1355,16 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, } q.pi_state = NULL; - retry: +retry: q.key = FUTEX_KEY_INIT; ret = get_futex_key(uaddr, fshared, &q.key); if (unlikely(ret != 0)) - goto out_release_sem; + goto out; - retry_unlocked: +retry_unlocked: hb = queue_lock(&q); - retry_locked: +retry_locked: ret = lock_taken = 0; /* @@ -1381,14 +1385,14 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, */ if (unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(current))) { ret = -EDEADLK; - goto out_unlock_release_sem; + goto out_unlock_put_key; } /* * Surprise - we got the lock. Just return to userspace: */ if (unlikely(!curval)) - goto out_unlock_release_sem; + goto out_unlock_put_key; uval = curval; @@ -1424,7 +1428,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, * We took the lock due to owner died take over. */ if (unlikely(lock_taken)) - goto out_unlock_release_sem; + goto out_unlock_put_key; /* * We dont have the lock. Look up the PI state (or create it if @@ -1463,7 +1467,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, goto retry_locked; } default: - goto out_unlock_release_sem; + goto out_unlock_put_key; } } @@ -1554,16 +1558,17 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, destroy_hrtimer_on_stack(&to->timer); return ret != -EINTR ? ret : -ERESTARTNOINTR; - out_unlock_release_sem: +out_unlock_put_key: queue_unlock(&q, hb); - out_release_sem: +out_put_key: put_futex_key(fshared, &q.key); +out: if (to) destroy_hrtimer_on_stack(&to->timer); return ret; - uaddr_faulted: +uaddr_faulted: /* * We have to r/w *(int __user *)uaddr, and we have to modify it * atomically. Therefore, if we continue to fault after get_user() @@ -1576,7 +1581,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, if (attempt++) { ret = futex_handle_fault((unsigned long)uaddr, attempt); if (ret) - goto out_release_sem; + goto out_put_key; goto retry_unlocked; } @@ -1668,9 +1673,9 @@ retry_unlocked: out_unlock: spin_unlock(&hb->lock); -out: put_futex_key(fshared, &key); +out: return ret; pi_faulted: -- cgit v1.2.3 From 90621c40cc4ab7b0a414311ce37e7cc7173403b6 Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Mon, 29 Dec 2008 19:43:21 -0800 Subject: futex: catch certain assymetric (get|put)_futex_key calls Impact: add debug check Following up on my previous key reference accounting patches, this patch will catch puts on keys that haven't been "got". This won't catch nested get/put mismatches though. Build and boot tested, with minimal desktop activity and a run of the open_posix_testsuite in LTP for testing. No warnings logged. Signed-off-by: Darren Hart Cc: Signed-off-by: Ingo Molnar --- kernel/futex.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index c5ac55cc0c16..206d4c906885 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -170,8 +170,11 @@ static void get_futex_key_refs(union futex_key *key) */ static void drop_futex_key_refs(union futex_key *key) { - if (!key->both.ptr) + if (!key->both.ptr) { + /* If we're here then we tried to put a key we failed to get */ + WARN_ON_ONCE(1); return; + } switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { case FUT_OFF_INODE: -- cgit v1.2.3 From 4f6b434fee2402b3decdeae9d16eb648725ae426 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 9 Dec 2008 19:50:34 -0500 Subject: don't reallocate buffer in every audit_sockaddr() No need to do that more than once per process lifetime; allocating/freeing on each sendto/accept/etc. is bloody pointless. Signed-off-by: Al Viro --- kernel/auditsc.c | 46 ++++++++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 24 deletions(-) (limited to 'kernel') diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 4819f3711973..c2e43ebb1b68 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -174,12 +174,6 @@ struct audit_aux_data_socketcall { unsigned long args[0]; }; -struct audit_aux_data_sockaddr { - struct audit_aux_data d; - int len; - char a[0]; -}; - struct audit_aux_data_fd_pair { struct audit_aux_data d; int fd[2]; @@ -234,7 +228,8 @@ struct audit_context { struct audit_context *previous; /* For nested syscalls */ struct audit_aux_data *aux; struct audit_aux_data *aux_pids; - + struct sockaddr_storage *sockaddr; + size_t sockaddr_len; /* Save things to print about task_struct */ pid_t pid, ppid; uid_t uid, euid, suid, fsuid; @@ -921,6 +916,7 @@ static inline void audit_free_context(struct audit_context *context) free_tree_refs(context); audit_free_aux(context); kfree(context->filterkey); + kfree(context->sockaddr); kfree(context); context = previous; } while (context); @@ -1383,13 +1379,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts audit_log_format(ab, " a%d=%lx", i, axs->args[i]); break; } - case AUDIT_SOCKADDR: { - struct audit_aux_data_sockaddr *axs = (void *)aux; - - audit_log_format(ab, "saddr="); - audit_log_n_hex(ab, axs->a, axs->len); - break; } - case AUDIT_FD_PAIR: { struct audit_aux_data_fd_pair *axs = (void *)aux; audit_log_format(ab, "fd0=%d fd1=%d", axs->fd[0], axs->fd[1]); @@ -1421,6 +1410,16 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts audit_log_end(ab); } + if (context->sockaddr_len) { + ab = audit_log_start(context, GFP_KERNEL, AUDIT_SOCKADDR); + if (ab) { + audit_log_format(ab, "saddr="); + audit_log_n_hex(ab, (void *)context->sockaddr, + context->sockaddr_len); + audit_log_end(ab); + } + } + for (aux = context->aux_pids; aux; aux = aux->next) { struct audit_aux_data_pids *axs = (void *)aux; @@ -1689,6 +1688,7 @@ void audit_syscall_exit(int valid, long return_code) context->aux_pids = NULL; context->target_pid = 0; context->target_sid = 0; + context->sockaddr_len = 0; kfree(context->filterkey); context->filterkey = NULL; tsk->audit_context = context; @@ -2468,22 +2468,20 @@ int __audit_fd_pair(int fd1, int fd2) */ int audit_sockaddr(int len, void *a) { - struct audit_aux_data_sockaddr *ax; struct audit_context *context = current->audit_context; if (likely(!context || context->dummy)) return 0; - ax = kmalloc(sizeof(*ax) + len, GFP_KERNEL); - if (!ax) - return -ENOMEM; - - ax->len = len; - memcpy(ax->a, a, len); + if (!context->sockaddr) { + void *p = kmalloc(sizeof(struct sockaddr_storage), GFP_KERNEL); + if (!p) + return -ENOMEM; + context->sockaddr = p; + } - ax->d.type = AUDIT_SOCKADDR; - ax->d.next = context->aux; - context->aux = (void *)ax; + context->sockaddr_len = len; + memcpy(context->sockaddr, a, len); return 0; } -- cgit v1.2.3 From f3298dc4f2277874d40cb4fc3a6e277317d6603b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 10 Dec 2008 03:16:51 -0500 Subject: sanitize audit_socketcall * don't bother with allocations * now that it can't fail, make it return void Signed-off-by: Al Viro --- kernel/auditsc.c | 66 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 38 insertions(+), 28 deletions(-) (limited to 'kernel') diff --git a/kernel/auditsc.c b/kernel/auditsc.c index c2e43ebb1b68..5cda66466e14 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -168,12 +168,6 @@ struct audit_aux_data_execve { struct mm_struct *mm; }; -struct audit_aux_data_socketcall { - struct audit_aux_data d; - int nargs; - unsigned long args[0]; -}; - struct audit_aux_data_fd_pair { struct audit_aux_data d; int fd[2]; @@ -247,6 +241,14 @@ struct audit_context { struct audit_tree_refs *trees, *first_trees; int tree_count; + int type; + union { + struct { + int nargs; + long args[6]; + } socketcall; + }; + #if AUDIT_DEBUG int put_count; int ino_count; @@ -1226,6 +1228,27 @@ static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name) audit_log_format(ab, " cap_fe=%d cap_fver=%x", name->fcap.fE, name->fcap_ver); } +static void show_special(struct audit_context *context) +{ + struct audit_buffer *ab; + int i; + + ab = audit_log_start(context, GFP_KERNEL, context->type); + if (!ab) + return; + + switch (context->type) { + case AUDIT_SOCKETCALL: { + int nargs = context->socketcall.nargs; + audit_log_format(ab, "nargs=%d", nargs); + for (i = 0; i < nargs; i++) + audit_log_format(ab, " a%d=%lx", i, + context->socketcall.args[i]); + break; } + } + audit_log_end(ab); +} + static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) { const struct cred *cred; @@ -1372,13 +1395,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts audit_log_execve_info(context, &ab, axi); break; } - case AUDIT_SOCKETCALL: { - struct audit_aux_data_socketcall *axs = (void *)aux; - audit_log_format(ab, "nargs=%d", axs->nargs); - for (i=0; inargs; i++) - audit_log_format(ab, " a%d=%lx", i, axs->args[i]); - break; } - case AUDIT_FD_PAIR: { struct audit_aux_data_fd_pair *axs = (void *)aux; audit_log_format(ab, "fd0=%d fd1=%d", axs->fd[0], axs->fd[1]); @@ -1410,6 +1426,9 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts audit_log_end(ab); } + if (context->type) + show_special(context); + if (context->sockaddr_len) { ab = audit_log_start(context, GFP_KERNEL, AUDIT_SOCKADDR); if (ab) { @@ -1689,6 +1708,7 @@ void audit_syscall_exit(int valid, long return_code) context->target_pid = 0; context->target_sid = 0; context->sockaddr_len = 0; + context->type = 0; kfree(context->filterkey); context->filterkey = NULL; tsk->audit_context = context; @@ -2406,27 +2426,17 @@ int audit_bprm(struct linux_binprm *bprm) * @nargs: number of args * @args: args array * - * Returns 0 for success or NULL context or < 0 on error. */ -int audit_socketcall(int nargs, unsigned long *args) +void audit_socketcall(int nargs, unsigned long *args) { - struct audit_aux_data_socketcall *ax; struct audit_context *context = current->audit_context; if (likely(!context || context->dummy)) - return 0; - - ax = kmalloc(sizeof(*ax) + nargs * sizeof(unsigned long), GFP_KERNEL); - if (!ax) - return -ENOMEM; - - ax->nargs = nargs; - memcpy(ax->args, args, nargs * sizeof(unsigned long)); + return; - ax->d.type = AUDIT_SOCKETCALL; - ax->d.next = context->aux; - context->aux = (void *)ax; - return 0; + context->type = AUDIT_SOCKETCALL; + context->socketcall.nargs = nargs; + memcpy(context->socketcall.args, args, nargs * sizeof(unsigned long)); } /** -- cgit v1.2.3 From a33e6751003c5ade603737d828b1519d980ce392 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 10 Dec 2008 03:40:06 -0500 Subject: sanitize audit_ipc_obj() * get rid of allocations * make it return void * simplify callers Signed-off-by: Al Viro --- kernel/auditsc.c | 88 ++++++++++++++++++++++++-------------------------------- 1 file changed, 37 insertions(+), 51 deletions(-) (limited to 'kernel') diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 5cda66466e14..73504313264f 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -247,6 +247,12 @@ struct audit_context { int nargs; long args[6]; } socketcall; + struct { + uid_t uid; + gid_t gid; + mode_t mode; + u32 osid; + } ipc; }; #if AUDIT_DEBUG @@ -605,19 +611,12 @@ static int audit_filter_rules(struct task_struct *tsk, } } /* Find ipc objects that match */ - if (ctx) { - struct audit_aux_data *aux; - for (aux = ctx->aux; aux; - aux = aux->next) { - if (aux->type == AUDIT_IPC) { - struct audit_aux_data_ipcctl *axi = (void *)aux; - if (security_audit_rule_match(axi->osid, f->type, f->op, f->lsm_rule, ctx)) { - ++result; - break; - } - } - } - } + if (!ctx || ctx->type != AUDIT_IPC) + break; + if (security_audit_rule_match(ctx->ipc.osid, + f->type, f->op, + f->lsm_rule, ctx)) + ++result; } break; case AUDIT_ARG0: @@ -1228,7 +1227,7 @@ static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name) audit_log_format(ab, " cap_fe=%d cap_fver=%x", name->fcap.fE, name->fcap_ver); } -static void show_special(struct audit_context *context) +static void show_special(struct audit_context *context, int *call_panic) { struct audit_buffer *ab; int i; @@ -1245,6 +1244,23 @@ static void show_special(struct audit_context *context) audit_log_format(ab, " a%d=%lx", i, context->socketcall.args[i]); break; } + case AUDIT_IPC: { + u32 osid = context->ipc.osid; + + audit_log_format(ab, "ouid=%u ogid=%u mode=%#o", + context->ipc.uid, context->ipc.gid, context->ipc.mode); + if (osid) { + char *ctx = NULL; + u32 len; + if (security_secid_to_secctx(osid, &ctx, &len)) { + audit_log_format(ab, " osid=%u", osid); + *call_panic = 1; + } else { + audit_log_format(ab, " obj=%s", ctx); + security_release_secctx(ctx, len); + } + } + break; } } audit_log_end(ab); } @@ -1363,26 +1379,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts axi->mqstat.mq_msgsize, axi->mqstat.mq_curmsgs); break; } - case AUDIT_IPC: { - struct audit_aux_data_ipcctl *axi = (void *)aux; - audit_log_format(ab, - "ouid=%u ogid=%u mode=%#o", - axi->uid, axi->gid, axi->mode); - if (axi->osid != 0) { - char *ctx = NULL; - u32 len; - if (security_secid_to_secctx( - axi->osid, &ctx, &len)) { - audit_log_format(ab, " osid=%u", - axi->osid); - call_panic = 1; - } else { - audit_log_format(ab, " obj=%s", ctx); - security_release_secctx(ctx, len); - } - } - break; } - case AUDIT_IPC_SET_PERM: { struct audit_aux_data_ipcctl *axi = (void *)aux; audit_log_format(ab, @@ -1427,7 +1423,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts } if (context->type) - show_special(context); + show_special(context, &call_panic); if (context->sockaddr_len) { ab = audit_log_start(context, GFP_KERNEL, AUDIT_SOCKADDR); @@ -2349,25 +2345,15 @@ int __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) * audit_ipc_obj - record audit data for ipc object * @ipcp: ipc permissions * - * Returns 0 for success or NULL context or < 0 on error. */ -int __audit_ipc_obj(struct kern_ipc_perm *ipcp) +void __audit_ipc_obj(struct kern_ipc_perm *ipcp) { - struct audit_aux_data_ipcctl *ax; struct audit_context *context = current->audit_context; - - ax = kmalloc(sizeof(*ax), GFP_ATOMIC); - if (!ax) - return -ENOMEM; - - ax->uid = ipcp->uid; - ax->gid = ipcp->gid; - ax->mode = ipcp->mode; - security_ipc_getsecid(ipcp, &ax->osid); - ax->d.type = AUDIT_IPC; - ax->d.next = context->aux; - context->aux = (void *)ax; - return 0; + context->ipc.uid = ipcp->uid; + context->ipc.gid = ipcp->gid; + context->ipc.mode = ipcp->mode; + security_ipc_getsecid(ipcp, &context->ipc.osid); + context->type = AUDIT_IPC; } /** -- cgit v1.2.3 From e816f370cbadd2afea9f1a42f232d0636137d563 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 10 Dec 2008 03:47:15 -0500 Subject: sanitize audit_ipc_set_perm() * get rid of allocations * make it return void * simplify callers Signed-off-by: Al Viro --- kernel/auditsc.c | 59 +++++++++++++++++++++++++------------------------------- 1 file changed, 26 insertions(+), 33 deletions(-) (limited to 'kernel') diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 73504313264f..fbed62e05bce 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -151,16 +151,6 @@ struct audit_aux_data_mq_getsetattr { struct mq_attr mqstat; }; -struct audit_aux_data_ipcctl { - struct audit_aux_data d; - struct ipc_perm p; - unsigned long qbytes; - uid_t uid; - gid_t gid; - mode_t mode; - u32 osid; -}; - struct audit_aux_data_execve { struct audit_aux_data d; int argc; @@ -252,6 +242,11 @@ struct audit_context { gid_t gid; mode_t mode; u32 osid; + int has_perm; + uid_t perm_uid; + gid_t perm_gid; + mode_t perm_mode; + unsigned long qbytes; } ipc; }; @@ -1260,6 +1255,19 @@ static void show_special(struct audit_context *context, int *call_panic) security_release_secctx(ctx, len); } } + if (context->ipc.has_perm) { + audit_log_end(ab); + ab = audit_log_start(context, GFP_KERNEL, + AUDIT_IPC_SET_PERM); + audit_log_format(ab, + "qbytes=%lx ouid=%u ogid=%u mode=%#o", + context->ipc.qbytes, + context->ipc.perm_uid, + context->ipc.perm_gid, + context->ipc.perm_mode); + if (!ab) + return; + } break; } } audit_log_end(ab); @@ -1379,13 +1387,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts axi->mqstat.mq_msgsize, axi->mqstat.mq_curmsgs); break; } - case AUDIT_IPC_SET_PERM: { - struct audit_aux_data_ipcctl *axi = (void *)aux; - audit_log_format(ab, - "qbytes=%lx ouid=%u ogid=%u mode=%#o", - axi->qbytes, axi->uid, axi->gid, axi->mode); - break; } - case AUDIT_EXECVE: { struct audit_aux_data_execve *axi = (void *)aux; audit_log_execve_info(context, &ab, axi); @@ -2352,6 +2353,7 @@ void __audit_ipc_obj(struct kern_ipc_perm *ipcp) context->ipc.uid = ipcp->uid; context->ipc.gid = ipcp->gid; context->ipc.mode = ipcp->mode; + context->ipc.has_perm = 0; security_ipc_getsecid(ipcp, &context->ipc.osid); context->type = AUDIT_IPC; } @@ -2363,26 +2365,17 @@ void __audit_ipc_obj(struct kern_ipc_perm *ipcp) * @gid: msgq group id * @mode: msgq mode (permissions) * - * Returns 0 for success or NULL context or < 0 on error. + * Called only after audit_ipc_obj(). */ -int __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode) +void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode) { - struct audit_aux_data_ipcctl *ax; struct audit_context *context = current->audit_context; - ax = kmalloc(sizeof(*ax), GFP_ATOMIC); - if (!ax) - return -ENOMEM; - - ax->qbytes = qbytes; - ax->uid = uid; - ax->gid = gid; - ax->mode = mode; - - ax->d.type = AUDIT_IPC_SET_PERM; - ax->d.next = context->aux; - context->aux = (void *)ax; - return 0; + context->ipc.qbytes = qbytes; + context->ipc.perm_uid = uid; + context->ipc.perm_gid = gid; + context->ipc.perm_mode = mode; + context->ipc.has_perm = 1; } int audit_bprm(struct linux_binprm *bprm) -- cgit v1.2.3 From 7392906ea915b9a2c14dea32b3604b4e178f82f7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 10 Dec 2008 06:58:59 -0500 Subject: sanitize audit_mq_getsetattr() * get rid of allocations * make it return void * don't duplicate parts of audit_dummy_context() Signed-off-by: Al Viro --- kernel/auditsc.c | 54 +++++++++++++++++------------------------------------- 1 file changed, 17 insertions(+), 37 deletions(-) (limited to 'kernel') diff --git a/kernel/auditsc.c b/kernel/auditsc.c index fbed62e05bce..c50178c7e245 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -145,12 +145,6 @@ struct audit_aux_data_mq_notify { struct sigevent notification; }; -struct audit_aux_data_mq_getsetattr { - struct audit_aux_data d; - mqd_t mqdes; - struct mq_attr mqstat; -}; - struct audit_aux_data_execve { struct audit_aux_data d; int argc; @@ -248,6 +242,10 @@ struct audit_context { mode_t perm_mode; unsigned long qbytes; } ipc; + struct { + mqd_t mqdes; + struct mq_attr mqstat; + } mq_getsetattr; }; #if AUDIT_DEBUG @@ -1269,6 +1267,15 @@ static void show_special(struct audit_context *context, int *call_panic) return; } break; } + case AUDIT_MQ_GETSETATTR: { + struct mq_attr *attr = &context->mq_getsetattr.mqstat; + audit_log_format(ab, + "mqdes=%d mq_flags=0x%lx mq_maxmsg=%ld mq_msgsize=%ld " + "mq_curmsgs=%ld ", + context->mq_getsetattr.mqdes, + attr->mq_flags, attr->mq_maxmsg, + attr->mq_msgsize, attr->mq_curmsgs); + break; } } audit_log_end(ab); } @@ -1377,16 +1384,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts axi->notification.sigev_signo); break; } - case AUDIT_MQ_GETSETATTR: { - struct audit_aux_data_mq_getsetattr *axi = (void *)aux; - audit_log_format(ab, - "mqdes=%d mq_flags=0x%lx mq_maxmsg=%ld mq_msgsize=%ld " - "mq_curmsgs=%ld ", - axi->mqdes, - axi->mqstat.mq_flags, axi->mqstat.mq_maxmsg, - axi->mqstat.mq_msgsize, axi->mqstat.mq_curmsgs); - break; } - case AUDIT_EXECVE: { struct audit_aux_data_execve *axi = (void *)aux; audit_log_execve_info(context, &ab, axi); @@ -2316,30 +2313,13 @@ int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification) * @mqdes: MQ descriptor * @mqstat: MQ flags * - * Returns 0 for success or NULL context or < 0 on error. */ -int __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) +void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) { - struct audit_aux_data_mq_getsetattr *ax; struct audit_context *context = current->audit_context; - - if (!audit_enabled) - return 0; - - if (likely(!context)) - return 0; - - ax = kmalloc(sizeof(*ax), GFP_ATOMIC); - if (!ax) - return -ENOMEM; - - ax->mqdes = mqdes; - ax->mqstat = *mqstat; - - ax->d.type = AUDIT_MQ_GETSETATTR; - ax->d.next = context->aux; - context->aux = (void *)ax; - return 0; + context->mq_getsetattr.mqdes = mqdes; + context->mq_getsetattr.mqstat = *mqstat; + context->type = AUDIT_MQ_GETSETATTR; } /** -- cgit v1.2.3 From 20114f71b27cafeb7c7e41d2b0f0b68c3fbb022b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 10 Dec 2008 07:16:12 -0500 Subject: sanitize audit_mq_notify() * don't copy_from_user() twice * don't bother with allocations * don't duplicate parts of audit_dummy_context() * make it return void Signed-off-by: Al Viro --- kernel/auditsc.c | 56 ++++++++++++++++---------------------------------------- 1 file changed, 16 insertions(+), 40 deletions(-) (limited to 'kernel') diff --git a/kernel/auditsc.c b/kernel/auditsc.c index c50178c7e245..3ece960de894 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -139,12 +139,6 @@ struct audit_aux_data_mq_sendrecv { struct timespec abs_timeout; }; -struct audit_aux_data_mq_notify { - struct audit_aux_data d; - mqd_t mqdes; - struct sigevent notification; -}; - struct audit_aux_data_execve { struct audit_aux_data d; int argc; @@ -246,6 +240,10 @@ struct audit_context { mqd_t mqdes; struct mq_attr mqstat; } mq_getsetattr; + struct { + mqd_t mqdes; + int sigev_signo; + } mq_notify; }; #if AUDIT_DEBUG @@ -1267,6 +1265,11 @@ static void show_special(struct audit_context *context, int *call_panic) return; } break; } + case AUDIT_MQ_NOTIFY: { + audit_log_format(ab, "mqdes=%d sigev_signo=%d", + context->mq_notify.mqdes, + context->mq_notify.sigev_signo); + break; } case AUDIT_MQ_GETSETATTR: { struct mq_attr *attr = &context->mq_getsetattr.mqstat; audit_log_format(ab, @@ -1376,14 +1379,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts axi->abs_timeout.tv_sec, axi->abs_timeout.tv_nsec); break; } - case AUDIT_MQ_NOTIFY: { - struct audit_aux_data_mq_notify *axi = (void *)aux; - audit_log_format(ab, - "mqdes=%d sigev_signo=%d", - axi->mqdes, - axi->notification.sigev_signo); - break; } - case AUDIT_EXECVE: { struct audit_aux_data_execve *axi = (void *)aux; audit_log_execve_info(context, &ab, axi); @@ -2274,38 +2269,19 @@ int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, * @mqdes: MQ descriptor * @u_notification: Notification event * - * Returns 0 for success or NULL context or < 0 on error. */ -int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification) +void __audit_mq_notify(mqd_t mqdes, const struct sigevent *notification) { - struct audit_aux_data_mq_notify *ax; struct audit_context *context = current->audit_context; - if (!audit_enabled) - return 0; - - if (likely(!context)) - return 0; - - ax = kmalloc(sizeof(*ax), GFP_ATOMIC); - if (!ax) - return -ENOMEM; - - if (u_notification != NULL) { - if (copy_from_user(&ax->notification, u_notification, sizeof(ax->notification))) { - kfree(ax); - return -EFAULT; - } - } else - memset(&ax->notification, 0, sizeof(ax->notification)); - - ax->mqdes = mqdes; + if (notification) + context->mq_notify.sigev_signo = notification->sigev_signo; + else + context->mq_notify.sigev_signo = 0; - ax->d.type = AUDIT_MQ_NOTIFY; - ax->d.next = context->aux; - context->aux = (void *)ax; - return 0; + context->mq_notify.mqdes = mqdes; + context->type = AUDIT_MQ_NOTIFY; } /** -- cgit v1.2.3 From c32c8af43b9adde8d6f938d8e6328c13b8de79ac Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 14 Dec 2008 03:46:48 -0500 Subject: sanitize AUDIT_MQ_SENDRECV * logging the original value of *msg_prio in mq_timedreceive(2) is insane - the argument is write-only (i.e. syscall always ignores the original value and only overwrites it). * merge __audit_mq_timed{send,receive} * don't do copy_from_user() twice * don't mess with allocations in auditsc part * ... and don't bother checking !audit_enabled and !context in there - we'd already checked for audit_dummy_context(). Signed-off-by: Al Viro --- kernel/auditsc.c | 127 +++++++++++++------------------------------------------ 1 file changed, 29 insertions(+), 98 deletions(-) (limited to 'kernel') diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 3ece960de894..140c47453470 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -131,14 +131,6 @@ struct audit_aux_data_mq_open { struct mq_attr attr; }; -struct audit_aux_data_mq_sendrecv { - struct audit_aux_data d; - mqd_t mqdes; - size_t msg_len; - unsigned int msg_prio; - struct timespec abs_timeout; -}; - struct audit_aux_data_execve { struct audit_aux_data d; int argc; @@ -244,6 +236,12 @@ struct audit_context { mqd_t mqdes; int sigev_signo; } mq_notify; + struct { + mqd_t mqdes; + size_t msg_len; + unsigned int msg_prio; + struct timespec abs_timeout; + } mq_sendrecv; }; #if AUDIT_DEBUG @@ -1265,6 +1263,16 @@ static void show_special(struct audit_context *context, int *call_panic) return; } break; } + case AUDIT_MQ_SENDRECV: { + audit_log_format(ab, + "mqdes=%d msg_len=%zd msg_prio=%u " + "abs_timeout_sec=%ld abs_timeout_nsec=%ld", + context->mq_sendrecv.mqdes, + context->mq_sendrecv.msg_len, + context->mq_sendrecv.msg_prio, + context->mq_sendrecv.abs_timeout.tv_sec, + context->mq_sendrecv.abs_timeout.tv_nsec); + break; } case AUDIT_MQ_NOTIFY: { audit_log_format(ab, "mqdes=%d sigev_signo=%d", context->mq_notify.mqdes, @@ -1370,15 +1378,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts axi->attr.mq_curmsgs); break; } - case AUDIT_MQ_SENDRECV: { - struct audit_aux_data_mq_sendrecv *axi = (void *)aux; - audit_log_format(ab, - "mqdes=%d msg_len=%zd msg_prio=%u " - "abs_timeout_sec=%ld abs_timeout_nsec=%ld", - axi->mqdes, axi->msg_len, axi->msg_prio, - axi->abs_timeout.tv_sec, axi->abs_timeout.tv_nsec); - break; } - case AUDIT_EXECVE: { struct audit_aux_data_execve *axi = (void *)aux; audit_log_execve_info(context, &ab, axi); @@ -2171,97 +2170,29 @@ int __audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr) } /** - * __audit_mq_timedsend - record audit data for a POSIX MQ timed send + * __audit_mq_sendrecv - record audit data for a POSIX MQ timed send/receive * @mqdes: MQ descriptor * @msg_len: Message length * @msg_prio: Message priority - * @u_abs_timeout: Message timeout in absolute time - * - * Returns 0 for success or NULL context or < 0 on error. - */ -int __audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, - const struct timespec __user *u_abs_timeout) -{ - struct audit_aux_data_mq_sendrecv *ax; - struct audit_context *context = current->audit_context; - - if (!audit_enabled) - return 0; - - if (likely(!context)) - return 0; - - ax = kmalloc(sizeof(*ax), GFP_ATOMIC); - if (!ax) - return -ENOMEM; - - if (u_abs_timeout != NULL) { - if (copy_from_user(&ax->abs_timeout, u_abs_timeout, sizeof(ax->abs_timeout))) { - kfree(ax); - return -EFAULT; - } - } else - memset(&ax->abs_timeout, 0, sizeof(ax->abs_timeout)); - - ax->mqdes = mqdes; - ax->msg_len = msg_len; - ax->msg_prio = msg_prio; - - ax->d.type = AUDIT_MQ_SENDRECV; - ax->d.next = context->aux; - context->aux = (void *)ax; - return 0; -} - -/** - * __audit_mq_timedreceive - record audit data for a POSIX MQ timed receive - * @mqdes: MQ descriptor - * @msg_len: Message length - * @u_msg_prio: Message priority - * @u_abs_timeout: Message timeout in absolute time + * @abs_timeout: Message timeout in absolute time * - * Returns 0 for success or NULL context or < 0 on error. */ -int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, - unsigned int __user *u_msg_prio, - const struct timespec __user *u_abs_timeout) +void __audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, + const struct timespec *abs_timeout) { - struct audit_aux_data_mq_sendrecv *ax; struct audit_context *context = current->audit_context; + struct timespec *p = &context->mq_sendrecv.abs_timeout; - if (!audit_enabled) - return 0; - - if (likely(!context)) - return 0; - - ax = kmalloc(sizeof(*ax), GFP_ATOMIC); - if (!ax) - return -ENOMEM; - - if (u_msg_prio != NULL) { - if (get_user(ax->msg_prio, u_msg_prio)) { - kfree(ax); - return -EFAULT; - } - } else - ax->msg_prio = 0; - - if (u_abs_timeout != NULL) { - if (copy_from_user(&ax->abs_timeout, u_abs_timeout, sizeof(ax->abs_timeout))) { - kfree(ax); - return -EFAULT; - } - } else - memset(&ax->abs_timeout, 0, sizeof(ax->abs_timeout)); + if (abs_timeout) + memcpy(p, abs_timeout, sizeof(struct timespec)); + else + memset(p, 0, sizeof(struct timespec)); - ax->mqdes = mqdes; - ax->msg_len = msg_len; + context->mq_sendrecv.mqdes = mqdes; + context->mq_sendrecv.msg_len = msg_len; + context->mq_sendrecv.msg_prio = msg_prio; - ax->d.type = AUDIT_MQ_SENDRECV; - ax->d.next = context->aux; - context->aux = (void *)ax; - return 0; + context->type = AUDIT_MQ_SENDRECV; } /** -- cgit v1.2.3 From 564f6993ffef656aebaf46cf2f1f6cb4f5c97207 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 14 Dec 2008 04:02:26 -0500 Subject: sanitize audit_mq_open() * don't bother with allocations * don't do double copy_from_user() * don't duplicate parts of check for audit_dummy_context() Signed-off-by: Al Viro --- kernel/auditsc.c | 65 ++++++++++++++++++++------------------------------------ 1 file changed, 23 insertions(+), 42 deletions(-) (limited to 'kernel') diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 140c47453470..83e946f1cdde 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -124,13 +124,6 @@ struct audit_aux_data { /* Number of target pids per aux struct. */ #define AUDIT_AUX_PIDS 16 -struct audit_aux_data_mq_open { - struct audit_aux_data d; - int oflag; - mode_t mode; - struct mq_attr attr; -}; - struct audit_aux_data_execve { struct audit_aux_data d; int argc; @@ -242,6 +235,11 @@ struct audit_context { unsigned int msg_prio; struct timespec abs_timeout; } mq_sendrecv; + struct { + int oflag; + mode_t mode; + struct mq_attr attr; + } mq_open; }; #if AUDIT_DEBUG @@ -1263,6 +1261,16 @@ static void show_special(struct audit_context *context, int *call_panic) return; } break; } + case AUDIT_MQ_OPEN: { + audit_log_format(ab, + "oflag=0x%x mode=%#o mq_flags=0x%lx mq_maxmsg=%ld " + "mq_msgsize=%ld mq_curmsgs=%ld", + context->mq_open.oflag, context->mq_open.mode, + context->mq_open.attr.mq_flags, + context->mq_open.attr.mq_maxmsg, + context->mq_open.attr.mq_msgsize, + context->mq_open.attr.mq_curmsgs); + break; } case AUDIT_MQ_SENDRECV: { audit_log_format(ab, "mqdes=%d msg_len=%zd msg_prio=%u " @@ -1368,15 +1376,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts continue; /* audit_panic has been called */ switch (aux->type) { - case AUDIT_MQ_OPEN: { - struct audit_aux_data_mq_open *axi = (void *)aux; - audit_log_format(ab, - "oflag=0x%x mode=%#o mq_flags=0x%lx mq_maxmsg=%ld " - "mq_msgsize=%ld mq_curmsgs=%ld", - axi->oflag, axi->mode, axi->attr.mq_flags, - axi->attr.mq_maxmsg, axi->attr.mq_msgsize, - axi->attr.mq_curmsgs); - break; } case AUDIT_EXECVE: { struct audit_aux_data_execve *axi = (void *)aux; @@ -2135,38 +2134,20 @@ int audit_set_loginuid(struct task_struct *task, uid_t loginuid) * @mode: mode bits * @u_attr: queue attributes * - * Returns 0 for success or NULL context or < 0 on error. */ -int __audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr) +void __audit_mq_open(int oflag, mode_t mode, struct mq_attr *attr) { - struct audit_aux_data_mq_open *ax; struct audit_context *context = current->audit_context; - if (!audit_enabled) - return 0; - - if (likely(!context)) - return 0; - - ax = kmalloc(sizeof(*ax), GFP_ATOMIC); - if (!ax) - return -ENOMEM; - - if (u_attr != NULL) { - if (copy_from_user(&ax->attr, u_attr, sizeof(ax->attr))) { - kfree(ax); - return -EFAULT; - } - } else - memset(&ax->attr, 0, sizeof(ax->attr)); + if (attr) + memcpy(&context->mq_open.attr, attr, sizeof(struct mq_attr)); + else + memset(&context->mq_open.attr, 0, sizeof(struct mq_attr)); - ax->oflag = oflag; - ax->mode = mode; + context->mq_open.oflag = oflag; + context->mq_open.mode = mode; - ax->d.type = AUDIT_MQ_OPEN; - ax->d.next = context->aux; - context->aux = (void *)ax; - return 0; + context->type = AUDIT_MQ_OPEN; } /** -- cgit v1.2.3 From 157cf649a735a2f7e8dba0ed08e6e38b6c30d886 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 14 Dec 2008 04:57:47 -0500 Subject: sanitize audit_fd_pair() * no allocations * return void Signed-off-by: Al Viro --- kernel/auditsc.c | 44 ++++++++++++++------------------------------ 1 file changed, 14 insertions(+), 30 deletions(-) (limited to 'kernel') diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 83e946f1cdde..327e65d50674 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -131,11 +131,6 @@ struct audit_aux_data_execve { struct mm_struct *mm; }; -struct audit_aux_data_fd_pair { - struct audit_aux_data d; - int fd[2]; -}; - struct audit_aux_data_pids { struct audit_aux_data d; pid_t target_pid[AUDIT_AUX_PIDS]; @@ -241,6 +236,7 @@ struct audit_context { struct mq_attr attr; } mq_open; }; + int fds[2]; #if AUDIT_DEBUG int put_count; @@ -1382,11 +1378,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts audit_log_execve_info(context, &ab, axi); break; } - case AUDIT_FD_PAIR: { - struct audit_aux_data_fd_pair *axs = (void *)aux; - audit_log_format(ab, "fd0=%d fd1=%d", axs->fd[0], axs->fd[1]); - break; } - case AUDIT_BPRM_FCAPS: { struct audit_aux_data_bprm_fcaps *axs = (void *)aux; audit_log_format(ab, "fver=%x", axs->fcap_ver); @@ -1416,6 +1407,15 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts if (context->type) show_special(context, &call_panic); + if (context->fds[0] >= 0) { + ab = audit_log_start(context, GFP_KERNEL, AUDIT_FD_PAIR); + if (ab) { + audit_log_format(ab, "fd0=%d fd1=%d", + context->fds[0], context->fds[1]); + audit_log_end(ab); + } + } + if (context->sockaddr_len) { ab = audit_log_start(context, GFP_KERNEL, AUDIT_SOCKADDR); if (ab) { @@ -1696,6 +1696,7 @@ void audit_syscall_exit(int valid, long return_code) context->target_sid = 0; context->sockaddr_len = 0; context->type = 0; + context->fds[0] = -1; kfree(context->filterkey); context->filterkey = NULL; tsk->audit_context = context; @@ -2291,29 +2292,12 @@ void audit_socketcall(int nargs, unsigned long *args) * @fd1: the first file descriptor * @fd2: the second file descriptor * - * Returns 0 for success or NULL context or < 0 on error. */ -int __audit_fd_pair(int fd1, int fd2) +void __audit_fd_pair(int fd1, int fd2) { struct audit_context *context = current->audit_context; - struct audit_aux_data_fd_pair *ax; - - if (likely(!context)) { - return 0; - } - - ax = kmalloc(sizeof(*ax), GFP_KERNEL); - if (!ax) { - return -ENOMEM; - } - - ax->fd[0] = fd1; - ax->fd[1] = fd2; - - ax->d.type = AUDIT_FD_PAIR; - ax->d.next = context->aux; - context->aux = (void *)ax; - return 0; + context->fds[0] = fd1; + context->fds[1] = fd2; } /** -- cgit v1.2.3 From 57f71a0af4244d9ba3c0bce74b1d2e66e8d520bd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 4 Jan 2009 14:52:57 -0500 Subject: sanitize audit_log_capset() * no allocations * return void * don't duplicate checked for dummy context Signed-off-by: Al Viro --- kernel/auditsc.c | 44 ++++++++++++++++---------------------------- kernel/capability.c | 4 +--- 2 files changed, 17 insertions(+), 31 deletions(-) (limited to 'kernel') diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 327e65d50674..c76a58215f54 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -235,6 +235,10 @@ struct audit_context { mode_t mode; struct mq_attr attr; } mq_open; + struct { + pid_t pid; + struct audit_cap_data cap; + } capset; }; int fds[2]; @@ -1291,6 +1295,12 @@ static void show_special(struct audit_context *context, int *call_panic) attr->mq_flags, attr->mq_maxmsg, attr->mq_msgsize, attr->mq_curmsgs); break; } + case AUDIT_CAPSET: { + audit_log_format(ab, "pid=%d", context->capset.pid); + audit_log_cap(ab, "cap_pi", &context->capset.cap.inheritable); + audit_log_cap(ab, "cap_pp", &context->capset.cap.permitted); + audit_log_cap(ab, "cap_pe", &context->capset.cap.effective); + break; } } audit_log_end(ab); } @@ -1392,14 +1402,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts audit_log_cap(ab, "new_pe", &axs->new_pcap.effective); break; } - case AUDIT_CAPSET: { - struct audit_aux_data_capset *axs = (void *)aux; - audit_log_format(ab, "pid=%d", axs->pid); - audit_log_cap(ab, "cap_pi", &axs->cap.inheritable); - audit_log_cap(ab, "cap_pp", &axs->cap.permitted); - audit_log_cap(ab, "cap_pe", &axs->cap.effective); - break; } - } audit_log_end(ab); } @@ -2456,29 +2458,15 @@ int __audit_log_bprm_fcaps(struct linux_binprm *bprm, * Record the aguments userspace sent to sys_capset for later printing by the * audit system if applicable */ -int __audit_log_capset(pid_t pid, +void __audit_log_capset(pid_t pid, const struct cred *new, const struct cred *old) { - struct audit_aux_data_capset *ax; struct audit_context *context = current->audit_context; - - if (likely(!audit_enabled || !context || context->dummy)) - return 0; - - ax = kmalloc(sizeof(*ax), GFP_KERNEL); - if (!ax) - return -ENOMEM; - - ax->d.type = AUDIT_CAPSET; - ax->d.next = context->aux; - context->aux = (void *)ax; - - ax->pid = pid; - ax->cap.effective = new->cap_effective; - ax->cap.inheritable = new->cap_effective; - ax->cap.permitted = new->cap_permitted; - - return 0; + context->capset.pid = pid; + context->capset.cap.effective = new->cap_effective; + context->capset.cap.inheritable = new->cap_effective; + context->capset.cap.permitted = new->cap_permitted; + context->type = AUDIT_CAPSET; } /** diff --git a/kernel/capability.c b/kernel/capability.c index 36b4b4daebec..c598d9d5be4f 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -280,9 +280,7 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) if (ret < 0) goto error; - ret = audit_log_capset(pid, new, current_cred()); - if (ret < 0) - return ret; + audit_log_capset(pid, new, current_cred()); return commit_creds(new); -- cgit v1.2.3 From 1a9d0797b8977d413435277bf9661efbbd584693 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 14 Dec 2008 12:04:02 -0500 Subject: audit_update_lsm_rules() misses the audit_inode_hash[] ones Signed-off-by: Al Viro --- kernel/auditfilter.c | 77 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 30 deletions(-) (limited to 'kernel') diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 9fd85a4640a0..0febaa0f784c 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1778,6 +1778,41 @@ unlock_and_return: return result; } +static int update_lsm_rule(struct audit_entry *entry) +{ + struct audit_entry *nentry; + struct audit_watch *watch; + struct audit_tree *tree; + int err = 0; + + if (!security_audit_rule_known(&entry->rule)) + return 0; + + watch = entry->rule.watch; + tree = entry->rule.tree; + nentry = audit_dupe_rule(&entry->rule, watch); + if (IS_ERR(nentry)) { + /* save the first error encountered for the + * return value */ + err = PTR_ERR(nentry); + audit_panic("error updating LSM filters"); + if (watch) + list_del(&entry->rule.rlist); + list_del_rcu(&entry->list); + } else { + if (watch) { + list_add(&nentry->rule.rlist, &watch->rules); + list_del(&entry->rule.rlist); + } else if (tree) + list_replace_init(&entry->rule.rlist, + &nentry->rule.rlist); + list_replace_rcu(&entry->list, &nentry->list); + } + call_rcu(&entry->rcu, audit_free_rule_rcu); + + return err; +} + /* This function will re-initialize the lsm_rule field of all applicable rules. * It will traverse the filter lists serarching for rules that contain LSM * specific filter fields. When such a rule is found, it is copied, the @@ -1785,42 +1820,24 @@ unlock_and_return: * updated rule. */ int audit_update_lsm_rules(void) { - struct audit_entry *entry, *n, *nentry; - struct audit_watch *watch; - struct audit_tree *tree; + struct audit_entry *e, *n; int i, err = 0; /* audit_filter_mutex synchronizes the writers */ mutex_lock(&audit_filter_mutex); for (i = 0; i < AUDIT_NR_FILTERS; i++) { - list_for_each_entry_safe(entry, n, &audit_filter_list[i], list) { - if (!security_audit_rule_known(&entry->rule)) - continue; - - watch = entry->rule.watch; - tree = entry->rule.tree; - nentry = audit_dupe_rule(&entry->rule, watch); - if (IS_ERR(nentry)) { - /* save the first error encountered for the - * return value */ - if (!err) - err = PTR_ERR(nentry); - audit_panic("error updating LSM filters"); - if (watch) - list_del(&entry->rule.rlist); - list_del_rcu(&entry->list); - } else { - if (watch) { - list_add(&nentry->rule.rlist, - &watch->rules); - list_del(&entry->rule.rlist); - } else if (tree) - list_replace_init(&entry->rule.rlist, - &nentry->rule.rlist); - list_replace_rcu(&entry->list, &nentry->list); - } - call_rcu(&entry->rcu, audit_free_rule_rcu); + list_for_each_entry_safe(e, n, &audit_filter_list[i], list) { + int res = update_lsm_rule(e); + if (!err) + err = res; + } + } + for (i=0; i< AUDIT_INODE_BUCKETS; i++) { + list_for_each_entry_safe(e, n, &audit_inode_hash[i], list) { + int res = update_lsm_rule(e); + if (!err) + err = res; } } -- cgit v1.2.3 From 0590b9335a1c72a3f0defcc6231287f7817e07c8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 14 Dec 2008 23:45:27 -0500 Subject: fixing audit rule ordering mess, part 1 Problem: ordering between the rules on exit chain is currently lost; all watch and inode rules are listed after everything else _and_ exit,never on one kind doesn't stop exit,always on another from being matched. Solution: assign priorities to rules, keep track of the current highest-priority matching rule and its result (always/never). Signed-off-by: Al Viro --- kernel/audit.h | 5 +--- kernel/auditfilter.c | 17 +++++++++-- kernel/auditsc.c | 79 ++++++++++++++++++++++++++++------------------------ 3 files changed, 58 insertions(+), 43 deletions(-) (limited to 'kernel') diff --git a/kernel/audit.h b/kernel/audit.h index 9d6717412fec..16f18cac661b 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -159,11 +159,8 @@ static inline int audit_signal_info(int sig, struct task_struct *t) return __audit_signal_info(sig, t); return 0; } -extern enum audit_state audit_filter_inodes(struct task_struct *, - struct audit_context *); -extern void audit_set_auditable(struct audit_context *); +extern void audit_filter_inodes(struct task_struct *, struct audit_context *); #else #define audit_signal_info(s,t) AUDIT_DISABLED #define audit_filter_inodes(t,c) AUDIT_DISABLED -#define audit_set_auditable(c) #endif diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 0febaa0f784c..995a2e86808d 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -919,6 +919,7 @@ static struct audit_entry *audit_dupe_rule(struct audit_krule *old, new->action = old->action; for (i = 0; i < AUDIT_BITMASK_SIZE; i++) new->mask[i] = old->mask[i]; + new->prio = old->prio; new->buflen = old->buflen; new->inode_f = old->inode_f; new->watch = NULL; @@ -987,9 +988,8 @@ static void audit_update_watch(struct audit_parent *parent, /* If the update involves invalidating rules, do the inode-based * filtering now, so we don't omit records. */ - if (invalidating && current->audit_context && - audit_filter_inodes(current, current->audit_context) == AUDIT_RECORD_CONTEXT) - audit_set_auditable(current->audit_context); + if (invalidating && current->audit_context) + audit_filter_inodes(current, current->audit_context); nwatch = audit_dupe_watch(owatch); if (IS_ERR(nwatch)) { @@ -1258,6 +1258,9 @@ static int audit_add_watch(struct audit_krule *krule, struct nameidata *ndp, return ret; } +static u64 prio_low = ~0ULL/2; +static u64 prio_high = ~0ULL/2 - 1; + /* Add rule to given filterlist if not a duplicate. */ static inline int audit_add_rule(struct audit_entry *entry, struct list_head *list) @@ -1319,6 +1322,14 @@ static inline int audit_add_rule(struct audit_entry *entry, } } + entry->rule.prio = ~0ULL; + if (entry->rule.listnr == AUDIT_FILTER_EXIT) { + if (entry->rule.flags & AUDIT_FILTER_PREPEND) + entry->rule.prio = ++prio_high; + else + entry->rule.prio = --prio_low; + } + if (entry->rule.flags & AUDIT_FILTER_PREPEND) { list_add_rcu(&entry->list, list); entry->rule.flags &= ~AUDIT_FILTER_PREPEND; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index c76a58215f54..19d2c2747c8d 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -165,14 +165,14 @@ struct audit_tree_refs { struct audit_context { int dummy; /* must be the first element */ int in_syscall; /* 1 if task is in a syscall */ - enum audit_state state; + enum audit_state state, current_state; unsigned int serial; /* serial number for record */ struct timespec ctime; /* time of syscall entry */ int major; /* syscall number */ unsigned long argv[4]; /* syscall arguments */ int return_valid; /* return code is valid */ long return_code;/* syscall return code */ - int auditable; /* 1 if record should be written */ + u64 prio; int name_count; struct audit_names names[AUDIT_NAMES]; char * filterkey; /* key for rule that triggered record */ @@ -630,8 +630,16 @@ static int audit_filter_rules(struct task_struct *tsk, return 0; } } - if (rule->filterkey && ctx) - ctx->filterkey = kstrdup(rule->filterkey, GFP_ATOMIC); + + if (ctx) { + if (rule->prio <= ctx->prio) + return 0; + if (rule->filterkey) { + kfree(ctx->filterkey); + ctx->filterkey = kstrdup(rule->filterkey, GFP_ATOMIC); + } + ctx->prio = rule->prio; + } switch (rule->action) { case AUDIT_NEVER: *state = AUDIT_DISABLED; break; case AUDIT_ALWAYS: *state = AUDIT_RECORD_CONTEXT; break; @@ -685,6 +693,7 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk, audit_filter_rules(tsk, &e->rule, ctx, NULL, &state)) { rcu_read_unlock(); + ctx->current_state = state; return state; } } @@ -698,15 +707,14 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk, * buckets applicable to the inode numbers in audit_names[]. * Regarding audit_state, same rules apply as for audit_filter_syscall(). */ -enum audit_state audit_filter_inodes(struct task_struct *tsk, - struct audit_context *ctx) +void audit_filter_inodes(struct task_struct *tsk, struct audit_context *ctx) { int i; struct audit_entry *e; enum audit_state state; if (audit_pid && tsk->tgid == audit_pid) - return AUDIT_DISABLED; + return; rcu_read_lock(); for (i = 0; i < ctx->name_count; i++) { @@ -723,17 +731,20 @@ enum audit_state audit_filter_inodes(struct task_struct *tsk, if ((e->rule.mask[word] & bit) == bit && audit_filter_rules(tsk, &e->rule, ctx, n, &state)) { rcu_read_unlock(); - return state; + ctx->current_state = state; + return; } } } rcu_read_unlock(); - return AUDIT_BUILD_CONTEXT; } -void audit_set_auditable(struct audit_context *ctx) +static void audit_set_auditable(struct audit_context *ctx) { - ctx->auditable = 1; + if (!ctx->prio) { + ctx->prio = 1; + ctx->current_state = AUDIT_RECORD_CONTEXT; + } } static inline struct audit_context *audit_get_context(struct task_struct *tsk, @@ -764,23 +775,11 @@ static inline struct audit_context *audit_get_context(struct task_struct *tsk, else context->return_code = return_code; - if (context->in_syscall && !context->dummy && !context->auditable) { - enum audit_state state; - - state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_EXIT]); - if (state == AUDIT_RECORD_CONTEXT) { - context->auditable = 1; - goto get_context; - } - - state = audit_filter_inodes(tsk, context); - if (state == AUDIT_RECORD_CONTEXT) - context->auditable = 1; - + if (context->in_syscall && !context->dummy) { + audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_EXIT]); + audit_filter_inodes(tsk, context); } -get_context: - tsk->audit_context = NULL; return context; } @@ -790,8 +789,7 @@ static inline void audit_free_names(struct audit_context *context) int i; #if AUDIT_DEBUG == 2 - if (context->auditable - ||context->put_count + context->ino_count != context->name_count) { + if (context->put_count + context->ino_count != context->name_count) { printk(KERN_ERR "%s:%d(:%d): major=%d in_syscall=%d" " name_count=%d put_count=%d" " ino_count=%d [NOT freeing]\n", @@ -842,6 +840,7 @@ static inline void audit_zero_context(struct audit_context *context, { memset(context, 0, sizeof(*context)); context->state = state; + context->prio = state == AUDIT_RECORD_CONTEXT ? ~0ULL : 0; } static inline struct audit_context *audit_alloc_context(enum audit_state state) @@ -1543,7 +1542,7 @@ void audit_free(struct task_struct *tsk) * We use GFP_ATOMIC here because we might be doing this * in the context of the idle thread */ /* that can happen only if we are called from do_exit() */ - if (context->in_syscall && context->auditable) + if (context->in_syscall && context->current_state == AUDIT_RECORD_CONTEXT) audit_log_exit(context, tsk); audit_free_context(context); @@ -1627,15 +1626,17 @@ void audit_syscall_entry(int arch, int major, state = context->state; context->dummy = !audit_n_rules; - if (!context->dummy && (state == AUDIT_SETUP_CONTEXT || state == AUDIT_BUILD_CONTEXT)) + if (!context->dummy && state == AUDIT_BUILD_CONTEXT) { + context->prio = 0; state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_ENTRY]); + } if (likely(state == AUDIT_DISABLED)) return; context->serial = 0; context->ctime = CURRENT_TIME; context->in_syscall = 1; - context->auditable = !!(state == AUDIT_RECORD_CONTEXT); + context->current_state = state; context->ppid = 0; } @@ -1643,17 +1644,20 @@ void audit_finish_fork(struct task_struct *child) { struct audit_context *ctx = current->audit_context; struct audit_context *p = child->audit_context; - if (!p || !ctx || !ctx->auditable) + if (!p || !ctx) + return; + if (!ctx->in_syscall || ctx->current_state != AUDIT_RECORD_CONTEXT) return; p->arch = ctx->arch; p->major = ctx->major; memcpy(p->argv, ctx->argv, sizeof(ctx->argv)); p->ctime = ctx->ctime; p->dummy = ctx->dummy; - p->auditable = ctx->auditable; p->in_syscall = ctx->in_syscall; p->filterkey = kstrdup(ctx->filterkey, GFP_KERNEL); p->ppid = current->pid; + p->prio = ctx->prio; + p->current_state = ctx->current_state; } /** @@ -1677,11 +1681,11 @@ void audit_syscall_exit(int valid, long return_code) if (likely(!context)) return; - if (context->in_syscall && context->auditable) + if (context->in_syscall && context->current_state == AUDIT_RECORD_CONTEXT) audit_log_exit(context, tsk); context->in_syscall = 0; - context->auditable = 0; + context->prio = context->state == AUDIT_RECORD_CONTEXT ? ~0ULL : 0; if (context->previous) { struct audit_context *new_context = context->previous; @@ -2091,7 +2095,10 @@ int auditsc_get_stamp(struct audit_context *ctx, t->tv_sec = ctx->ctime.tv_sec; t->tv_nsec = ctx->ctime.tv_nsec; *serial = ctx->serial; - ctx->auditable = 1; + if (!ctx->prio) { + ctx->prio = 1; + ctx->current_state = AUDIT_RECORD_CONTEXT; + } return 1; } -- cgit v1.2.3 From e45aa212ea81d39b38ba158df344dc3a500153e5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 15 Dec 2008 01:17:50 -0500 Subject: audit rules ordering, part 2 Fix the actual rule listing; add per-type lists _not_ used for matching, with all exit,... sitting on one such list. Simplifies "do something for all rules" logics, while we are at it... Signed-off-by: Al Viro --- kernel/audit_tree.c | 1 + kernel/auditfilter.c | 95 +++++++++++++++++++++------------------------------- 2 files changed, 40 insertions(+), 56 deletions(-) (limited to 'kernel') diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 8b509441f49a..48bddad2a3dc 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -450,6 +450,7 @@ static void kill_rules(struct audit_tree *tree) audit_log_end(ab); rule->tree = NULL; list_del_rcu(&entry->list); + list_del(&entry->rule.list); call_rcu(&entry->rcu, audit_free_rule_rcu); } } diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 995a2e86808d..5d4edc6f7a32 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -86,6 +86,14 @@ struct list_head audit_filter_list[AUDIT_NR_FILTERS] = { #error Fix audit_filter_list initialiser #endif }; +static struct list_head audit_rules_list[AUDIT_NR_FILTERS] = { + LIST_HEAD_INIT(audit_rules_list[0]), + LIST_HEAD_INIT(audit_rules_list[1]), + LIST_HEAD_INIT(audit_rules_list[2]), + LIST_HEAD_INIT(audit_rules_list[3]), + LIST_HEAD_INIT(audit_rules_list[4]), + LIST_HEAD_INIT(audit_rules_list[5]), +}; DEFINE_MUTEX(audit_filter_mutex); @@ -1007,12 +1015,15 @@ static void audit_update_watch(struct audit_parent *parent, list_del_rcu(&oentry->list); nentry = audit_dupe_rule(&oentry->rule, nwatch); - if (IS_ERR(nentry)) + if (IS_ERR(nentry)) { + list_del(&oentry->rule.list); audit_panic("error updating watch, removing"); - else { + } else { int h = audit_hash_ino((u32)ino); list_add(&nentry->rule.rlist, &nwatch->rules); list_add_rcu(&nentry->list, &audit_inode_hash[h]); + list_replace(&oentry->rule.list, + &nentry->rule.list); } call_rcu(&oentry->rcu, audit_free_rule_rcu); @@ -1077,6 +1088,7 @@ static void audit_remove_parent_watches(struct audit_parent *parent) audit_log_end(ab); } list_del(&r->rlist); + list_del(&r->list); list_del_rcu(&e->list); call_rcu(&e->rcu, audit_free_rule_rcu); } @@ -1331,9 +1343,13 @@ static inline int audit_add_rule(struct audit_entry *entry, } if (entry->rule.flags & AUDIT_FILTER_PREPEND) { + list_add(&entry->rule.list, + &audit_rules_list[entry->rule.listnr]); list_add_rcu(&entry->list, list); entry->rule.flags &= ~AUDIT_FILTER_PREPEND; } else { + list_add_tail(&entry->rule.list, + &audit_rules_list[entry->rule.listnr]); list_add_tail_rcu(&entry->list, list); } #ifdef CONFIG_AUDITSYSCALL @@ -1415,6 +1431,7 @@ static inline int audit_del_rule(struct audit_entry *entry, audit_remove_tree_rule(&e->rule); list_del_rcu(&e->list); + list_del(&e->rule.list); call_rcu(&e->rcu, audit_free_rule_rcu); #ifdef CONFIG_AUDITSYSCALL @@ -1443,30 +1460,16 @@ out: static void audit_list(int pid, int seq, struct sk_buff_head *q) { struct sk_buff *skb; - struct audit_entry *entry; + struct audit_krule *r; int i; /* This is a blocking read, so use audit_filter_mutex instead of rcu * iterator to sync with list writers. */ for (i=0; irule); - if (unlikely(!rule)) - break; - skb = audit_make_reply(pid, seq, AUDIT_LIST, 0, 1, - rule, sizeof(*rule)); - if (skb) - skb_queue_tail(q, skb); - kfree(rule); - } - } - for (i = 0; i < AUDIT_INODE_BUCKETS; i++) { - list_for_each_entry(entry, &audit_inode_hash[i], list) { + list_for_each_entry(r, &audit_rules_list[i], list) { struct audit_rule *rule; - rule = audit_krule_to_rule(&entry->rule); + rule = audit_krule_to_rule(r); if (unlikely(!rule)) break; skb = audit_make_reply(pid, seq, AUDIT_LIST, 0, 1, @@ -1485,30 +1488,16 @@ static void audit_list(int pid, int seq, struct sk_buff_head *q) static void audit_list_rules(int pid, int seq, struct sk_buff_head *q) { struct sk_buff *skb; - struct audit_entry *e; + struct audit_krule *r; int i; /* This is a blocking read, so use audit_filter_mutex instead of rcu * iterator to sync with list writers. */ for (i=0; irule); - if (unlikely(!data)) - break; - skb = audit_make_reply(pid, seq, AUDIT_LIST_RULES, 0, 1, - data, sizeof(*data) + data->buflen); - if (skb) - skb_queue_tail(q, skb); - kfree(data); - } - } - for (i=0; i< AUDIT_INODE_BUCKETS; i++) { - list_for_each_entry(e, &audit_inode_hash[i], list) { + list_for_each_entry(r, &audit_rules_list[i], list) { struct audit_rule_data *data; - data = audit_krule_to_data(&e->rule); + data = audit_krule_to_data(r); if (unlikely(!data)) break; skb = audit_make_reply(pid, seq, AUDIT_LIST_RULES, 0, 1, @@ -1789,35 +1778,37 @@ unlock_and_return: return result; } -static int update_lsm_rule(struct audit_entry *entry) +static int update_lsm_rule(struct audit_krule *r) { + struct audit_entry *entry = container_of(r, struct audit_entry, rule); struct audit_entry *nentry; struct audit_watch *watch; struct audit_tree *tree; int err = 0; - if (!security_audit_rule_known(&entry->rule)) + if (!security_audit_rule_known(r)) return 0; - watch = entry->rule.watch; - tree = entry->rule.tree; - nentry = audit_dupe_rule(&entry->rule, watch); + watch = r->watch; + tree = r->tree; + nentry = audit_dupe_rule(r, watch); if (IS_ERR(nentry)) { /* save the first error encountered for the * return value */ err = PTR_ERR(nentry); audit_panic("error updating LSM filters"); if (watch) - list_del(&entry->rule.rlist); + list_del(&r->rlist); list_del_rcu(&entry->list); + list_del(&r->list); } else { if (watch) { list_add(&nentry->rule.rlist, &watch->rules); - list_del(&entry->rule.rlist); + list_del(&r->rlist); } else if (tree) - list_replace_init(&entry->rule.rlist, - &nentry->rule.rlist); + list_replace_init(&r->rlist, &nentry->rule.rlist); list_replace_rcu(&entry->list, &nentry->list); + list_replace(&r->list, &nentry->rule.list); } call_rcu(&entry->rcu, audit_free_rule_rcu); @@ -1831,27 +1822,19 @@ static int update_lsm_rule(struct audit_entry *entry) * updated rule. */ int audit_update_lsm_rules(void) { - struct audit_entry *e, *n; + struct audit_krule *r, *n; int i, err = 0; /* audit_filter_mutex synchronizes the writers */ mutex_lock(&audit_filter_mutex); for (i = 0; i < AUDIT_NR_FILTERS; i++) { - list_for_each_entry_safe(e, n, &audit_filter_list[i], list) { - int res = update_lsm_rule(e); - if (!err) - err = res; - } - } - for (i=0; i< AUDIT_INODE_BUCKETS; i++) { - list_for_each_entry_safe(e, n, &audit_inode_hash[i], list) { - int res = update_lsm_rule(e); + list_for_each_entry_safe(r, n, &audit_rules_list[i], list) { + int res = update_lsm_rule(r); if (!err) err = res; } } - mutex_unlock(&audit_filter_mutex); return err; -- cgit v1.2.3 From e048e02c89db7bd49d1a5fac77a11c8fb3603087 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Dec 2008 03:51:22 -0500 Subject: make sure that filterkey of task,always rules is reported Signed-off-by: Al Viro --- kernel/auditsc.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 19d2c2747c8d..8cbddff6c283 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -652,7 +652,7 @@ static int audit_filter_rules(struct task_struct *tsk, * completely disabled for this task. Since we only have the task * structure at this point, we can only check uid and gid. */ -static enum audit_state audit_filter_task(struct task_struct *tsk) +static enum audit_state audit_filter_task(struct task_struct *tsk, char **key) { struct audit_entry *e; enum audit_state state; @@ -660,6 +660,8 @@ static enum audit_state audit_filter_task(struct task_struct *tsk) rcu_read_lock(); list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_TASK], list) { if (audit_filter_rules(tsk, &e->rule, NULL, NULL, &state)) { + if (state == AUDIT_RECORD_CONTEXT) + *key = kstrdup(e->rule.filterkey, GFP_ATOMIC); rcu_read_unlock(); return state; } @@ -866,18 +868,21 @@ int audit_alloc(struct task_struct *tsk) { struct audit_context *context; enum audit_state state; + char *key = NULL; if (likely(!audit_ever_enabled)) return 0; /* Return if not auditing. */ - state = audit_filter_task(tsk); + state = audit_filter_task(tsk, &key); if (likely(state == AUDIT_DISABLED)) return 0; if (!(context = audit_alloc_context(state))) { + kfree(key); audit_log_lost("out of memory in audit_alloc"); return -ENOMEM; } + context->filterkey = key; tsk->audit_context = context; set_tsk_thread_flag(tsk, TIF_SYSCALL_AUDIT); @@ -1703,8 +1708,10 @@ void audit_syscall_exit(int valid, long return_code) context->sockaddr_len = 0; context->type = 0; context->fds[0] = -1; - kfree(context->filterkey); - context->filterkey = NULL; + if (context->state != AUDIT_RECORD_CONTEXT) { + kfree(context->filterkey); + context->filterkey = NULL; + } tsk->audit_context = context; } } -- cgit v1.2.3 From 36c4f1b18c8a7d0adb4085e7f531860b837bb6b0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 15 Dec 2008 01:50:28 -0500 Subject: clean up audit_rule_{add,del} a bit Signed-off-by: Al Viro --- kernel/auditfilter.c | 42 +++++++++++++++++------------------------- 1 file changed, 17 insertions(+), 25 deletions(-) (limited to 'kernel') diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 5d4edc6f7a32..e6e3829cadd1 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1114,12 +1114,16 @@ static void audit_inotify_unregister(struct list_head *in_list) /* Find an existing audit rule. * Caller must hold audit_filter_mutex to prevent stale rule data. */ static struct audit_entry *audit_find_rule(struct audit_entry *entry, - struct list_head *list) + struct list_head **p) { struct audit_entry *e, *found = NULL; + struct list_head *list; int h; - if (entry->rule.watch) { + if (entry->rule.inode_f) { + h = audit_hash_ino(entry->rule.inode_f->val); + *p = list = &audit_inode_hash[h]; + } else if (entry->rule.watch) { /* we don't know the inode number, so must walk entire hash */ for (h = 0; h < AUDIT_INODE_BUCKETS; h++) { list = &audit_inode_hash[h]; @@ -1130,6 +1134,8 @@ static struct audit_entry *audit_find_rule(struct audit_entry *entry, } } goto out; + } else { + *p = list = &audit_filter_list[entry->rule.listnr]; } list_for_each_entry(e, list, list) @@ -1274,14 +1280,13 @@ static u64 prio_low = ~0ULL/2; static u64 prio_high = ~0ULL/2 - 1; /* Add rule to given filterlist if not a duplicate. */ -static inline int audit_add_rule(struct audit_entry *entry, - struct list_head *list) +static inline int audit_add_rule(struct audit_entry *entry) { struct audit_entry *e; - struct audit_field *inode_f = entry->rule.inode_f; struct audit_watch *watch = entry->rule.watch; struct audit_tree *tree = entry->rule.tree; struct nameidata *ndp = NULL, *ndw = NULL; + struct list_head *list; int h, err; #ifdef CONFIG_AUDITSYSCALL int dont_count = 0; @@ -1292,13 +1297,8 @@ static inline int audit_add_rule(struct audit_entry *entry, dont_count = 1; #endif - if (inode_f) { - h = audit_hash_ino(inode_f->val); - list = &audit_inode_hash[h]; - } - mutex_lock(&audit_filter_mutex); - e = audit_find_rule(entry, list); + e = audit_find_rule(entry, &list); mutex_unlock(&audit_filter_mutex); if (e) { err = -EEXIST; @@ -1372,15 +1372,14 @@ error: } /* Remove an existing rule from filterlist. */ -static inline int audit_del_rule(struct audit_entry *entry, - struct list_head *list) +static inline int audit_del_rule(struct audit_entry *entry) { struct audit_entry *e; - struct audit_field *inode_f = entry->rule.inode_f; struct audit_watch *watch, *tmp_watch = entry->rule.watch; struct audit_tree *tree = entry->rule.tree; + struct list_head *list; LIST_HEAD(inotify_list); - int h, ret = 0; + int ret = 0; #ifdef CONFIG_AUDITSYSCALL int dont_count = 0; @@ -1390,13 +1389,8 @@ static inline int audit_del_rule(struct audit_entry *entry, dont_count = 1; #endif - if (inode_f) { - h = audit_hash_ino(inode_f->val); - list = &audit_inode_hash[h]; - } - mutex_lock(&audit_filter_mutex); - e = audit_find_rule(entry, list); + e = audit_find_rule(entry, &list); if (!e) { mutex_unlock(&audit_filter_mutex); ret = -ENOENT; @@ -1603,8 +1597,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data, if (IS_ERR(entry)) return PTR_ERR(entry); - err = audit_add_rule(entry, - &audit_filter_list[entry->rule.listnr]); + err = audit_add_rule(entry); audit_log_rule_change(loginuid, sessionid, sid, "add", &entry->rule, !err); @@ -1620,8 +1613,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data, if (IS_ERR(entry)) return PTR_ERR(entry); - err = audit_del_rule(entry, - &audit_filter_list[entry->rule.listnr]); + err = audit_del_rule(entry); audit_log_rule_change(loginuid, sessionid, sid, "remove", &entry->rule, !err); -- cgit v1.2.3 From 5af75d8d58d0f9f7b7c0515b35786b22892d5f12 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Dec 2008 05:59:26 -0500 Subject: audit: validate comparison operations, store them in sane form Don't store the field->op in the messy (and very inconvenient for e.g. audit_comparator()) form; translate to dense set of values and do full validation of userland-submitted value while we are at it. ->audit_init_rule() and ->audit_match_rule() get new values now; in-tree instances updated. Signed-off-by: Al Viro --- kernel/audit_tree.c | 2 +- kernel/auditfilter.c | 132 +++++++++++++++++++++++++-------------------------- 2 files changed, 66 insertions(+), 68 deletions(-) (limited to 'kernel') diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 48bddad2a3dc..8ad9545b8db9 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -618,7 +618,7 @@ int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op) if (pathname[0] != '/' || rule->listnr != AUDIT_FILTER_EXIT || - op & ~AUDIT_EQUAL || + op != Audit_equal || rule->inode_f || rule->watch || rule->tree) return -EINVAL; rule->tree = alloc_tree(pathname); diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index e6e3829cadd1..fbf24d121d97 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -252,7 +252,8 @@ static inline int audit_to_inode(struct audit_krule *krule, struct audit_field *f) { if (krule->listnr != AUDIT_FILTER_EXIT || - krule->watch || krule->inode_f || krule->tree) + krule->watch || krule->inode_f || krule->tree || + (f->op != Audit_equal && f->op != Audit_not_equal)) return -EINVAL; krule->inode_f = f; @@ -270,7 +271,7 @@ static int audit_to_watch(struct audit_krule *krule, char *path, int len, if (path[0] != '/' || path[len-1] == '/' || krule->listnr != AUDIT_FILTER_EXIT || - op & ~AUDIT_EQUAL || + op != Audit_equal || krule->inode_f || krule->watch || krule->tree) return -EINVAL; @@ -420,12 +421,32 @@ exit_err: return ERR_PTR(err); } +static u32 audit_ops[] = +{ + [Audit_equal] = AUDIT_EQUAL, + [Audit_not_equal] = AUDIT_NOT_EQUAL, + [Audit_bitmask] = AUDIT_BIT_MASK, + [Audit_bittest] = AUDIT_BIT_TEST, + [Audit_lt] = AUDIT_LESS_THAN, + [Audit_gt] = AUDIT_GREATER_THAN, + [Audit_le] = AUDIT_LESS_THAN_OR_EQUAL, + [Audit_ge] = AUDIT_GREATER_THAN_OR_EQUAL, +}; + +static u32 audit_to_op(u32 op) +{ + u32 n; + for (n = Audit_equal; n < Audit_bad && audit_ops[n] != op; n++) + ; + return n; +} + + /* Translate struct audit_rule to kernel's rule respresentation. * Exists for backward compatibility with userspace. */ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) { struct audit_entry *entry; - struct audit_field *ino_f; int err = 0; int i; @@ -435,12 +456,28 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) for (i = 0; i < rule->field_count; i++) { struct audit_field *f = &entry->rule.fields[i]; + u32 n; + + n = rule->fields[i] & (AUDIT_NEGATE|AUDIT_OPERATORS); + + /* Support for legacy operators where + * AUDIT_NEGATE bit signifies != and otherwise assumes == */ + if (n & AUDIT_NEGATE) + f->op = Audit_not_equal; + else if (!n) + f->op = Audit_equal; + else + f->op = audit_to_op(n); + + entry->rule.vers_ops = (n & AUDIT_OPERATORS) ? 2 : 1; - f->op = rule->fields[i] & (AUDIT_NEGATE|AUDIT_OPERATORS); f->type = rule->fields[i] & ~(AUDIT_NEGATE|AUDIT_OPERATORS); f->val = rule->values[i]; err = -EINVAL; + if (f->op == Audit_bad) + goto exit_free; + switch(f->type) { default: goto exit_free; @@ -462,11 +499,8 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) case AUDIT_EXIT: case AUDIT_SUCCESS: /* bit ops are only useful on syscall args */ - if (f->op == AUDIT_BIT_MASK || - f->op == AUDIT_BIT_TEST) { - err = -EINVAL; + if (f->op == Audit_bitmask || f->op == Audit_bittest) goto exit_free; - } break; case AUDIT_ARG0: case AUDIT_ARG1: @@ -475,11 +509,8 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) break; /* arch is only allowed to be = or != */ case AUDIT_ARCH: - if ((f->op != AUDIT_NOT_EQUAL) && (f->op != AUDIT_EQUAL) - && (f->op != AUDIT_NEGATE) && (f->op)) { - err = -EINVAL; + if (f->op != Audit_not_equal && f->op != Audit_equal) goto exit_free; - } entry->rule.arch_f = f; break; case AUDIT_PERM: @@ -496,33 +527,10 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) goto exit_free; break; } - - entry->rule.vers_ops = (f->op & AUDIT_OPERATORS) ? 2 : 1; - - /* Support for legacy operators where - * AUDIT_NEGATE bit signifies != and otherwise assumes == */ - if (f->op & AUDIT_NEGATE) - f->op = AUDIT_NOT_EQUAL; - else if (!f->op) - f->op = AUDIT_EQUAL; - else if (f->op == AUDIT_OPERATORS) { - err = -EINVAL; - goto exit_free; - } } - ino_f = entry->rule.inode_f; - if (ino_f) { - switch(ino_f->op) { - case AUDIT_NOT_EQUAL: - entry->rule.inode_f = NULL; - case AUDIT_EQUAL: - break; - default: - err = -EINVAL; - goto exit_free; - } - } + if (entry->rule.inode_f && entry->rule.inode_f->op == Audit_not_equal) + entry->rule.inode_f = NULL; exit_nofree: return entry; @@ -538,7 +546,6 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, { int err = 0; struct audit_entry *entry; - struct audit_field *ino_f; void *bufp; size_t remain = datasz - sizeof(struct audit_rule_data); int i; @@ -554,11 +561,11 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, struct audit_field *f = &entry->rule.fields[i]; err = -EINVAL; - if (!(data->fieldflags[i] & AUDIT_OPERATORS) || - data->fieldflags[i] & ~AUDIT_OPERATORS) + + f->op = audit_to_op(data->fieldflags[i]); + if (f->op == Audit_bad) goto exit_free; - f->op = data->fieldflags[i] & AUDIT_OPERATORS; f->type = data->fields[i]; f->val = data->values[i]; f->lsm_str = NULL; @@ -670,18 +677,8 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, } } - ino_f = entry->rule.inode_f; - if (ino_f) { - switch(ino_f->op) { - case AUDIT_NOT_EQUAL: - entry->rule.inode_f = NULL; - case AUDIT_EQUAL: - break; - default: - err = -EINVAL; - goto exit_free; - } - } + if (entry->rule.inode_f && entry->rule.inode_f->op == Audit_not_equal) + entry->rule.inode_f = NULL; exit_nofree: return entry; @@ -721,10 +718,10 @@ static struct audit_rule *audit_krule_to_rule(struct audit_krule *krule) rule->fields[i] = krule->fields[i].type; if (krule->vers_ops == 1) { - if (krule->fields[i].op & AUDIT_NOT_EQUAL) + if (krule->fields[i].op == Audit_not_equal) rule->fields[i] |= AUDIT_NEGATE; } else { - rule->fields[i] |= krule->fields[i].op; + rule->fields[i] |= audit_ops[krule->fields[i].op]; } } for (i = 0; i < AUDIT_BITMASK_SIZE; i++) rule->mask[i] = krule->mask[i]; @@ -752,7 +749,7 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule) struct audit_field *f = &krule->fields[i]; data->fields[i] = f->type; - data->fieldflags[i] = f->op; + data->fieldflags[i] = audit_ops[f->op]; switch(f->type) { case AUDIT_SUBJ_USER: case AUDIT_SUBJ_ROLE: @@ -1626,28 +1623,29 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data, return err; } -int audit_comparator(const u32 left, const u32 op, const u32 right) +int audit_comparator(u32 left, u32 op, u32 right) { switch (op) { - case AUDIT_EQUAL: + case Audit_equal: return (left == right); - case AUDIT_NOT_EQUAL: + case Audit_not_equal: return (left != right); - case AUDIT_LESS_THAN: + case Audit_lt: return (left < right); - case AUDIT_LESS_THAN_OR_EQUAL: + case Audit_le: return (left <= right); - case AUDIT_GREATER_THAN: + case Audit_gt: return (left > right); - case AUDIT_GREATER_THAN_OR_EQUAL: + case Audit_ge: return (left >= right); - case AUDIT_BIT_MASK: + case Audit_bitmask: return (left & right); - case AUDIT_BIT_TEST: + case Audit_bittest: return ((left & right) == right); + default: + BUG(); + return 0; } - BUG(); - return 0; } /* Compare given dentry name with last component in given path, -- cgit v1.2.3 From 7b574b7b0124ed344911f5d581e9bc2d83bbeb19 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Sun, 4 Jan 2009 12:00:45 -0800 Subject: cgroups: fix a race between cgroup_clone and umount The race is calling cgroup_clone() while umounting the ns cgroup subsys, and thus cgroup_clone() might access invalid cgroup_fs, or kill_sb() is called after cgroup_clone() created a new dir in it. The BUG I triggered is BUG_ON(root->number_of_cgroups != 1); ------------[ cut here ]------------ kernel BUG at kernel/cgroup.c:1093! invalid opcode: 0000 [#1] SMP ... Process umount (pid: 5177, ti=e411e000 task=e40c4670 task.ti=e411e000) ... Call Trace: [] ? deactivate_super+0x3f/0x51 [] ? mntput_no_expire+0xb3/0xdd [] ? sys_umount+0x265/0x2ac [] ? sys_oldumount+0xd/0xf [] ? sysenter_do_call+0x12/0x31 ... EIP: [] cgroup_kill_sb+0x23/0xe0 SS:ESP 0068:e411ef2c ---[ end trace c766c1be3bf944ac ]--- Cc: Serge E. Hallyn Signed-off-by: Li Zefan Cc: Paul Menage Cc: "Serge E. Hallyn" Cc: Balbir Singh Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 48348dde6d81..891a84eb9d30 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2945,7 +2945,11 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, parent = task_cgroup(tsk, subsys->subsys_id); /* Pin the hierarchy */ - atomic_inc(&parent->root->sb->s_active); + if (!atomic_inc_not_zero(&parent->root->sb->s_active)) { + /* We race with the final deactivate_super() */ + mutex_unlock(&cgroup_mutex); + return 0; + } /* Keep the cgroup alive */ get_css_set(cg); -- cgit v1.2.3 From ca4787b779dd698a2a33a328aa5fa90a3e954077 Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Mon, 5 Jan 2009 08:40:10 -0600 Subject: kernel/module.c: compare symbol values when marking symbols as exported in /proc/kallsyms. When there are two symbols in a module with the same name, one of which is exported, both will be marked as exported in /proc/kallsyms. There aren't any instances of this in the current kernel, but it is easy to construct a simple module with two compilation units that exhibits the problem. $ objdump -j .text -t testmod.ko | grep foo 00000000 l F .text 00000032 foo 00000080 g F .text 00000001 foo $ sudo insmod testmod.ko $ grep "T foo" /proc/kallsyms c28e8000 T foo [testmod] c28e8080 T foo [testmod] Fix this by comparing the symbol values once we've found the exported symbol table entry matching the symbol name. Tested using Ksplice: $ ksplice-create --patch=this_commit.patch --id=bar . $ sudo ksplice-apply ksplice-bar.tar.gz Done! $ grep "T foo" /proc/kallsyms c28e8080 T foo [testmod] Signed-off-by: Tim Abbott Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Rusty Russell --- kernel/module.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/module.c b/kernel/module.c index dd2a54155b54..895c5675edb7 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1725,15 +1725,15 @@ static const struct kernel_symbol *lookup_symbol(const char *name, return NULL; } -static int is_exported(const char *name, const struct module *mod) +static int is_exported(const char *name, unsigned long value, + const struct module *mod) { - if (!mod && lookup_symbol(name, __start___ksymtab, __stop___ksymtab)) - return 1; + const struct kernel_symbol *ks; + if (!mod) + ks = lookup_symbol(name, __start___ksymtab, __stop___ksymtab); else - if (mod && lookup_symbol(name, mod->syms, mod->syms + mod->num_syms)) - return 1; - else - return 0; + ks = lookup_symbol(name, mod->syms, mod->syms + mod->num_syms); + return ks != NULL && ks->value == value; } /* As per nm */ @@ -2504,7 +2504,7 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, strlcpy(name, mod->strtab + mod->symtab[symnum].st_name, KSYM_NAME_LEN); strlcpy(module_name, mod->name, MODULE_NAME_LEN); - *exported = is_exported(name, mod); + *exported = is_exported(name, *value, mod); preempt_enable(); return 0; } -- cgit v1.2.3 From d1e99d7ae4e6bbd1ebb5e81ecd3af2b8793efee0 Mon Sep 17 00:00:00 2001 From: Jianjun Kong Date: Mon, 8 Dec 2008 14:26:29 +0800 Subject: module: fix warning of unused function when !CONFIG_PROC_FS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix this warning: kernel/module.c:824: warning: ‘print_unload_info’ defined but not used print_unload_info() just was used when CONFIG_PROC_FS was defined. This patch mark print_unload_info() inline to solve the problem. Signed-off-by: Jianjun Kong Signed-off-by: Rusty Russell CC: Ingo Molnar CC: Américo Wang --- kernel/module.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/module.c b/kernel/module.c index 895c5675edb7..d3d254571bda 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -820,7 +820,7 @@ sys_delete_module(const char __user *name_user, unsigned int flags) return ret; } -static void print_unload_info(struct seq_file *m, struct module *mod) +static inline void print_unload_info(struct seq_file *m, struct module *mod) { struct module_use *use; int printed_something = 0; @@ -893,7 +893,7 @@ void module_put(struct module *module) EXPORT_SYMBOL(module_put); #else /* !CONFIG_MODULE_UNLOAD */ -static void print_unload_info(struct seq_file *m, struct module *mod) +static inline void print_unload_info(struct seq_file *m, struct module *mod) { /* We don't know the usage count, or what modules are using. */ seq_printf(m, " - -"); -- cgit v1.2.3 From 088af9a6e05d51e7c3dc85d45d8b7a52c3ee08d7 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 31 Dec 2008 12:31:18 +0100 Subject: module: fix module loading failure of large kernel modules for parisc When creating the final layout of a kernel module in memory, allow the module loader to reserve some additional memory in front of a given section. This is currently only needed for the parisc port which needs to put the stub entries there to fulfill the 17/22bit PCREL relocations with large kernel modules like xfs. Signed-off-by: Helge Deller Signed-off-by: Rusty Russell (renamed fn) --- kernel/module.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/module.c b/kernel/module.c index d3d254571bda..4299aefc20b8 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1578,11 +1578,21 @@ static int simplify_symbols(Elf_Shdr *sechdrs, return ret; } +/* Additional bytes needed by arch in front of individual sections */ +unsigned int __weak arch_mod_section_prepend(struct module *mod, + unsigned int section) +{ + /* default implementation just returns zero */ + return 0; +} + /* Update size with this section: return offset. */ -static long get_offset(unsigned int *size, Elf_Shdr *sechdr) +static long get_offset(struct module *mod, unsigned int *size, + Elf_Shdr *sechdr, unsigned int section) { long ret; + *size += arch_mod_section_prepend(mod, section); ret = ALIGN(*size, sechdr->sh_addralign ?: 1); *size = ret + sechdr->sh_size; return ret; @@ -1622,7 +1632,7 @@ static void layout_sections(struct module *mod, || strncmp(secstrings + s->sh_name, ".init", 5) == 0) continue; - s->sh_entsize = get_offset(&mod->core_size, s); + s->sh_entsize = get_offset(mod, &mod->core_size, s, i); DEBUGP("\t%s\n", secstrings + s->sh_name); } if (m == 0) @@ -1640,7 +1650,7 @@ static void layout_sections(struct module *mod, || strncmp(secstrings + s->sh_name, ".init", 5) != 0) continue; - s->sh_entsize = (get_offset(&mod->init_size, s) + s->sh_entsize = (get_offset(mod, &mod->init_size, s, i) | INIT_OFFSET_MASK); DEBUGP("\t%s\n", secstrings + s->sh_name); } -- cgit v1.2.3 From 9ea09af3bd3090e8349ca2899ca2011bd94cda85 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 22 Dec 2008 12:36:30 +0100 Subject: stop_machine: introduce stop_machine_create/destroy. Introduce stop_machine_create/destroy. With this interface subsystems that need a non-failing stop_machine environment can create the stop_machine machine threads before actually calling stop_machine. When the threads aren't needed anymore they can be killed with stop_machine_destroy again. When stop_machine gets called and the threads aren't present they will be created and destroyed automatically. This restores the old behaviour of stop_machine. This patch also converts cpu hotplug to the new interface since it is special: cpu_down calls __stop_machine instead of stop_machine. However the kstop threads will only be created when stop_machine gets called. Changing the code so that the threads would be created automatically on __stop_machine is currently not possible: when __stop_machine gets called we hold cpu_add_remove_lock, which is the same lock that create_rt_workqueue would take. So the workqueue needs to be created before the cpu hotplug code locks cpu_add_remove_lock. Signed-off-by: Heiko Carstens Signed-off-by: Rusty Russell --- kernel/cpu.c | 6 +++++- kernel/stop_machine.c | 55 +++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 50 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/cpu.c b/kernel/cpu.c index 47fff3b63cbf..30e74dd6d01b 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -269,8 +269,11 @@ out_release: int __ref cpu_down(unsigned int cpu) { - int err = 0; + int err; + err = stop_machine_create(); + if (err) + return err; cpu_maps_update_begin(); if (cpu_hotplug_disabled) { @@ -297,6 +300,7 @@ int __ref cpu_down(unsigned int cpu) out: cpu_maps_update_done(); + stop_machine_destroy(); return err; } EXPORT_SYMBOL(cpu_down); diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 286c41722e8c..0cd415ee62a2 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -38,7 +38,10 @@ struct stop_machine_data { static unsigned int num_threads; static atomic_t thread_ack; static DEFINE_MUTEX(lock); - +/* setup_lock protects refcount, stop_machine_wq and stop_machine_work. */ +static DEFINE_MUTEX(setup_lock); +/* Users of stop_machine. */ +static int refcount; static struct workqueue_struct *stop_machine_wq; static struct stop_machine_data active, idle; static const cpumask_t *active_cpus; @@ -109,6 +112,43 @@ static int chill(void *unused) return 0; } +int stop_machine_create(void) +{ + mutex_lock(&setup_lock); + if (refcount) + goto done; + stop_machine_wq = create_rt_workqueue("kstop"); + if (!stop_machine_wq) + goto err_out; + stop_machine_work = alloc_percpu(struct work_struct); + if (!stop_machine_work) + goto err_out; +done: + refcount++; + mutex_unlock(&setup_lock); + return 0; + +err_out: + if (stop_machine_wq) + destroy_workqueue(stop_machine_wq); + mutex_unlock(&setup_lock); + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(stop_machine_create); + +void stop_machine_destroy(void) +{ + mutex_lock(&setup_lock); + refcount--; + if (refcount) + goto done; + destroy_workqueue(stop_machine_wq); + free_percpu(stop_machine_work); +done: + mutex_unlock(&setup_lock); +} +EXPORT_SYMBOL_GPL(stop_machine_destroy); + int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) { struct work_struct *sm_work; @@ -146,19 +186,14 @@ int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) { int ret; + ret = stop_machine_create(); + if (ret) + return ret; /* No CPUs can come up or down during this. */ get_online_cpus(); ret = __stop_machine(fn, data, cpus); put_online_cpus(); - + stop_machine_destroy(); return ret; } EXPORT_SYMBOL_GPL(stop_machine); - -static int __init stop_machine_init(void) -{ - stop_machine_wq = create_rt_workqueue("kstop"); - stop_machine_work = alloc_percpu(struct work_struct); - return 0; -} -core_initcall(stop_machine_init); -- cgit v1.2.3 From 9e01892c4234070bbcf3a9f582514c8b91464375 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 22 Dec 2008 12:36:31 +0100 Subject: module: convert to stop_machine_create/destroy. The module code relies on a non-failing stop_machine call. So we create the kstop threads in advance and with that make sure the call won't fail. Signed-off-by: Heiko Carstens Signed-off-by: Rusty Russell --- kernel/module.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/module.c b/kernel/module.c index 4299aefc20b8..f47cce910f25 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -757,8 +757,16 @@ sys_delete_module(const char __user *name_user, unsigned int flags) return -EFAULT; name[MODULE_NAME_LEN-1] = '\0'; - if (mutex_lock_interruptible(&module_mutex) != 0) - return -EINTR; + /* Create stop_machine threads since free_module relies on + * a non-failing stop_machine call. */ + ret = stop_machine_create(); + if (ret) + return ret; + + if (mutex_lock_interruptible(&module_mutex) != 0) { + ret = -EINTR; + goto out_stop; + } mod = find_module(name); if (!mod) { @@ -817,6 +825,8 @@ sys_delete_module(const char __user *name_user, unsigned int flags) out: mutex_unlock(&module_mutex); +out_stop: + stop_machine_destroy(); return ret; } @@ -1875,6 +1885,13 @@ static noinline struct module *load_module(void __user *umod, /* vmalloc barfs on "unusual" numbers. Check here */ if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL) return ERR_PTR(-ENOMEM); + + /* Create stop_machine threads since the error path relies on + * a non-failing stop_machine call. */ + err = stop_machine_create(); + if (err) + goto free_hdr; + if (copy_from_user(hdr, umod, len) != 0) { err = -EFAULT; goto free_hdr; @@ -2258,6 +2275,7 @@ static noinline struct module *load_module(void __user *umod, /* Get rid of temporary copy */ vfree(hdr); + stop_machine_destroy(); /* Done! */ return mod; @@ -2280,6 +2298,7 @@ static noinline struct module *load_module(void __user *umod, kfree(args); free_hdr: vfree(hdr); + stop_machine_destroy(); return ERR_PTR(err); truncated: -- cgit v1.2.3 From 56ff5efad96182f4d3cb3dc6b07396762c658f16 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 9 Dec 2008 09:34:39 -0500 Subject: zero i_uid/i_gid on inode allocation ... and don't bother in callers. Don't bother with zeroing i_blocks, while we are at it - it's already been zeroed. i_mode is not worth the effort; it has no common default value. Signed-off-by: Al Viro --- kernel/cgroup.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 48348dde6d81..f7c5099a0572 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -573,7 +573,6 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); - inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info; } -- cgit v1.2.3