diff options
Diffstat (limited to 'kernel')
34 files changed, 293 insertions, 147 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index 4414e93d8750..ce6d8ea3131e 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -61,8 +61,11 @@ #include "audit.h" -/* No auditing will take place until audit_initialized != 0. +/* No auditing will take place until audit_initialized == AUDIT_INITIALIZED. * (Initialization happens after skb_init is called.) */ +#define AUDIT_DISABLED -1 +#define AUDIT_UNINITIALIZED 0 +#define AUDIT_INITIALIZED 1 static int audit_initialized; #define AUDIT_OFF 0 @@ -965,6 +968,9 @@ static int __init audit_init(void) { int i; + if (audit_initialized == AUDIT_DISABLED) + return 0; + printk(KERN_INFO "audit: initializing netlink socket (%s)\n", audit_default ? "enabled" : "disabled"); audit_sock = netlink_kernel_create(&init_net, NETLINK_AUDIT, 0, @@ -976,7 +982,7 @@ static int __init audit_init(void) skb_queue_head_init(&audit_skb_queue); skb_queue_head_init(&audit_skb_hold_queue); - audit_initialized = 1; + audit_initialized = AUDIT_INITIALIZED; audit_enabled = audit_default; audit_ever_enabled |= !!audit_default; @@ -999,13 +1005,21 @@ __initcall(audit_init); static int __init audit_enable(char *str) { audit_default = !!simple_strtol(str, NULL, 0); - printk(KERN_INFO "audit: %s%s\n", - audit_default ? "enabled" : "disabled", - audit_initialized ? "" : " (after initialization)"); - if (audit_initialized) { + if (!audit_default) + audit_initialized = AUDIT_DISABLED; + + printk(KERN_INFO "audit: %s", audit_default ? "enabled" : "disabled"); + + if (audit_initialized == AUDIT_INITIALIZED) { audit_enabled = audit_default; audit_ever_enabled |= !!audit_default; + } else if (audit_initialized == AUDIT_UNINITIALIZED) { + printk(" (after initialization)"); + } else { + printk(" (until reboot)"); } + printk("\n"); + return 1; } @@ -1107,9 +1121,7 @@ unsigned int audit_serial(void) static inline void audit_get_stamp(struct audit_context *ctx, struct timespec *t, unsigned int *serial) { - if (ctx) - auditsc_get_stamp(ctx, t, serial); - else { + if (!ctx || !auditsc_get_stamp(ctx, t, serial)) { *t = CURRENT_TIME; *serial = audit_serial(); } @@ -1146,7 +1158,7 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, int reserve; unsigned long timeout_start = jiffies; - if (!audit_initialized) + if (audit_initialized != AUDIT_INITIALIZED) return NULL; if (unlikely(audit_filter_type(type))) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index cf5bc2f5f9c3..2a3f0afc4d2a 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1459,7 +1459,6 @@ void audit_free(struct task_struct *tsk) /** * audit_syscall_entry - fill in an audit record at syscall entry - * @tsk: task being audited * @arch: architecture type * @major: major syscall type (function) * @a1: additional syscall register 1 @@ -1548,9 +1547,25 @@ void audit_syscall_entry(int arch, int major, context->ppid = 0; } +void audit_finish_fork(struct task_struct *child) +{ + struct audit_context *ctx = current->audit_context; + struct audit_context *p = child->audit_context; + if (!p || !ctx || !ctx->auditable) + return; + p->arch = ctx->arch; + p->major = ctx->major; + memcpy(p->argv, ctx->argv, sizeof(ctx->argv)); + p->ctime = ctx->ctime; + p->dummy = ctx->dummy; + p->auditable = ctx->auditable; + p->in_syscall = ctx->in_syscall; + p->filterkey = kstrdup(ctx->filterkey, GFP_KERNEL); + p->ppid = current->pid; +} + /** * audit_syscall_exit - deallocate audit context after a system call - * @tsk: task being audited * @valid: success/failure flag * @return_code: syscall return value * @@ -1942,15 +1957,18 @@ EXPORT_SYMBOL_GPL(__audit_inode_child); * * Also sets the context as auditable. */ -void auditsc_get_stamp(struct audit_context *ctx, +int auditsc_get_stamp(struct audit_context *ctx, struct timespec *t, unsigned int *serial) { + if (!ctx->in_syscall) + return 0; if (!ctx->serial) ctx->serial = audit_serial(); t->tv_sec = ctx->ctime.tv_sec; t->tv_nsec = ctx->ctime.tv_nsec; *serial = ctx->serial; ctx->auditable = 1; + return 1; } /* global counter which is incremented every time something logs in */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index fe00b3b983a8..2606d0fb4e54 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -702,7 +702,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, * any child cgroups exist. This is theoretically supportable * but involves complex error handling, so it's being left until * later */ - if (!list_empty(&cgrp->children)) + if (root->number_of_cgroups > 1) return -EBUSY; /* Process each subsystem */ @@ -1024,7 +1024,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type, if (ret == -EBUSY) { mutex_unlock(&cgroup_mutex); mutex_unlock(&inode->i_mutex); - goto drop_new_super; + goto free_cg_links; } /* EBUSY should be the only error here */ @@ -1073,10 +1073,11 @@ static int cgroup_get_sb(struct file_system_type *fs_type, return simple_set_mnt(mnt, sb); + free_cg_links: + free_cg_links(&tmp_cg_links); drop_new_super: up_write(&sb->s_umount); deactivate_super(sb); - free_cg_links(&tmp_cg_links); return ret; } @@ -2934,9 +2935,6 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, again: root = subsys->root; if (root == &rootnode) { - printk(KERN_INFO - "Not cloning cgroup for unused subsystem %s\n", - subsys->name); mutex_unlock(&cgroup_mutex); return 0; } diff --git a/kernel/cpu.c b/kernel/cpu.c index 5a732c5ef08b..8ea32e8d68b0 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -462,7 +462,7 @@ out: * It must be called by the arch code on the new cpu, before the new cpu * enables interrupts and before the "boot" cpu returns from __cpu_up(). */ -void notify_cpu_starting(unsigned int cpu) +void __cpuinit notify_cpu_starting(unsigned int cpu) { unsigned long val = CPU_STARTING; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index da7ff6137f37..96c0ba13b8cd 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -585,7 +585,7 @@ static int generate_sched_domains(cpumask_t **domains, int i, j, k; /* indices for partition finding loops */ cpumask_t *doms; /* resulting partition; i.e. sched domains */ struct sched_domain_attr *dattr; /* attributes for custom domains */ - int ndoms; /* number of sched domains in result */ + int ndoms = 0; /* number of sched domains in result */ int nslot; /* next empty doms[] cpumask_t slot */ doms = NULL; diff --git a/kernel/exit.c b/kernel/exit.c index 2d8be7ebb0f7..30fcdf16737a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1321,10 +1321,10 @@ static int wait_task_zombie(struct task_struct *p, int options, * group, which consolidates times for all threads in the * group including the group leader. */ + thread_group_cputime(p, &cputime); spin_lock_irq(&p->parent->sighand->siglock); psig = p->parent->signal; sig = p->signal; - thread_group_cputime(p, &cputime); psig->cutime = cputime_add(psig->cutime, cputime_add(cputime.utime, diff --git a/kernel/extable.c b/kernel/extable.c index a26cb2e17023..adf0cc9c02d6 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -66,3 +66,19 @@ int kernel_text_address(unsigned long addr) return 1; return module_text_address(addr) != NULL; } + +/* + * On some architectures (PPC64, IA64) function pointers + * are actually only tokens to some data that then holds the + * real function address. As a result, to find if a function + * pointer is part of the kernel text, we need to do some + * special dereferencing first. + */ +int func_ptr_is_kernel_text(void *ptr) +{ + unsigned long addr; + addr = (unsigned long) dereference_function_descriptor(ptr); + if (core_kernel_text(addr)) + return 1; + return module_text_address(addr) != NULL; +} diff --git a/kernel/fork.c b/kernel/fork.c index 2a372a0e206f..495da2e9a8b4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -315,17 +315,20 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) file = tmp->vm_file; if (file) { struct inode *inode = file->f_path.dentry->d_inode; + struct address_space *mapping = file->f_mapping; + get_file(file); if (tmp->vm_flags & VM_DENYWRITE) atomic_dec(&inode->i_writecount); - - /* insert tmp into the share list, just after mpnt */ - spin_lock(&file->f_mapping->i_mmap_lock); + spin_lock(&mapping->i_mmap_lock); + if (tmp->vm_flags & VM_SHARED) + mapping->i_mmap_writable++; tmp->vm_truncate_count = mpnt->vm_truncate_count; - flush_dcache_mmap_lock(file->f_mapping); + flush_dcache_mmap_lock(mapping); + /* insert tmp into the share list, just after mpnt */ vma_prio_tree_add(tmp, mpnt); - flush_dcache_mmap_unlock(file->f_mapping); - spin_unlock(&file->f_mapping->i_mmap_lock); + flush_dcache_mmap_unlock(mapping); + spin_unlock(&mapping->i_mmap_lock); } /* @@ -1398,6 +1401,7 @@ long do_fork(unsigned long clone_flags, init_completion(&vfork); } + audit_finish_fork(p); tracehook_report_clone(trace, regs, clone_flags, nr, p); /* diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index c9767e641980..64c1c7253dae 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -25,6 +25,8 @@ static inline void unregister_handler_proc(unsigned int irq, struct irqaction *action) { } #endif +extern int irq_select_affinity_usr(unsigned int irq); + /* * Debugging printout: */ diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index c498a1b8c621..801addda3c43 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -82,24 +82,27 @@ int irq_can_set_affinity(unsigned int irq) int irq_set_affinity(unsigned int irq, cpumask_t cpumask) { struct irq_desc *desc = irq_to_desc(irq); + unsigned long flags; if (!desc->chip->set_affinity) return -EINVAL; + spin_lock_irqsave(&desc->lock, flags); + #ifdef CONFIG_GENERIC_PENDING_IRQ if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) { - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); desc->affinity = cpumask; desc->chip->set_affinity(irq, cpumask); - spin_unlock_irqrestore(&desc->lock, flags); - } else - set_pending_irq(irq, cpumask); + } else { + desc->status |= IRQ_MOVE_PENDING; + desc->pending_mask = cpumask; + } #else desc->affinity = cpumask; desc->chip->set_affinity(irq, cpumask); #endif + desc->status |= IRQ_AFFINITY_SET; + spin_unlock_irqrestore(&desc->lock, flags); return 0; } @@ -107,24 +110,59 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask) /* * Generic version of the affinity autoselector. */ -int irq_select_affinity(unsigned int irq) +int do_irq_select_affinity(unsigned int irq, struct irq_desc *desc) { cpumask_t mask; - struct irq_desc *desc; if (!irq_can_set_affinity(irq)) return 0; cpus_and(mask, cpu_online_map, irq_default_affinity); - desc = irq_to_desc(irq); + /* + * Preserve an userspace affinity setup, but make sure that + * one of the targets is online. + */ + if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) { + if (cpus_intersects(desc->affinity, cpu_online_map)) + mask = desc->affinity; + else + desc->status &= ~IRQ_AFFINITY_SET; + } + desc->affinity = mask; desc->chip->set_affinity(irq, mask); return 0; } +#else +static inline int do_irq_select_affinity(unsigned int irq, struct irq_desc *d) +{ + return irq_select_affinity(irq); +} #endif +/* + * Called when affinity is set via /proc/irq + */ +int irq_select_affinity_usr(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + unsigned long flags; + int ret; + + spin_lock_irqsave(&desc->lock, flags); + ret = do_irq_select_affinity(irq, desc); + spin_unlock_irqrestore(&desc->lock, flags); + + return ret; +} + +#else +static inline int do_irq_select_affinity(int irq, struct irq_desc *desc) +{ + return 0; +} #endif /** @@ -327,7 +365,7 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, * IRQF_TRIGGER_* but the PIC does not support multiple * flow-types? */ - pr_warning("No set_type function for IRQ %d (%s)\n", irq, + pr_debug("No set_type function for IRQ %d (%s)\n", irq, chip ? (chip->name ? : "unknown") : "unknown"); return 0; } @@ -445,8 +483,12 @@ __setup_irq(unsigned int irq, struct irq_desc * desc, struct irqaction *new) /* Undo nested disables: */ desc->depth = 1; + /* Exclude IRQ from balancing if requested */ + if (new->flags & IRQF_NOBALANCING) + desc->status |= IRQ_NO_BALANCING; + /* Set default affinity mask once everything is setup */ - irq_select_affinity(irq); + do_irq_select_affinity(irq, desc); } else if ((new->flags & IRQF_TRIGGER_MASK) && (new->flags & IRQF_TRIGGER_MASK) @@ -459,10 +501,6 @@ __setup_irq(unsigned int irq, struct irq_desc * desc, struct irqaction *new) *p = new; - /* Exclude IRQ from balancing */ - if (new->flags & IRQF_NOBALANCING) - desc->status |= IRQ_NO_BALANCING; - /* Reset broken irq detection when installing new handler */ desc->irq_count = 0; desc->irqs_unhandled = 0; diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index 90b920d3f52b..9db681d95814 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c @@ -1,17 +1,6 @@ #include <linux/irq.h> -void set_pending_irq(unsigned int irq, cpumask_t mask) -{ - struct irq_desc *desc = irq_to_desc(irq); - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - desc->status |= IRQ_MOVE_PENDING; - desc->pending_mask = mask; - spin_unlock_irqrestore(&desc->lock, flags); -} - void move_masked_irq(int irq) { struct irq_desc *desc = irq_to_desc(irq); diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 4d161c70ba55..d257e7d6a8a4 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -62,7 +62,7 @@ static ssize_t irq_affinity_proc_write(struct file *file, if (!cpus_intersects(new_value, cpu_online_map)) /* Special case for empty set - allow the architecture code to set default SMP affinity. */ - return irq_select_affinity(irq) ? -EINVAL : count; + return irq_select_affinity_usr(irq) ? -EINVAL : count; irq_set_affinity(irq, new_value); diff --git a/kernel/latencytop.c b/kernel/latencytop.c index 5e7b45c56923..449db466bdbc 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c @@ -191,7 +191,7 @@ static int lstats_show(struct seq_file *m, void *v) latency_record[i].time, latency_record[i].max); for (q = 0; q < LT_BACKTRACEDEPTH; q++) { - char sym[KSYM_NAME_LEN]; + char sym[KSYM_SYMBOL_LEN]; char *c; if (!latency_record[i].backtrace[q]) break; diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 06e157119d2b..e4bdda8dcf04 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -136,16 +136,16 @@ static inline struct lock_class *hlock_class(struct held_lock *hlock) #ifdef CONFIG_LOCK_STAT static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats); -static int lock_contention_point(struct lock_class *class, unsigned long ip) +static int lock_point(unsigned long points[], unsigned long ip) { int i; - for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) { - if (class->contention_point[i] == 0) { - class->contention_point[i] = ip; + for (i = 0; i < LOCKSTAT_POINTS; i++) { + if (points[i] == 0) { + points[i] = ip; break; } - if (class->contention_point[i] == ip) + if (points[i] == ip) break; } @@ -185,6 +185,9 @@ struct lock_class_stats lock_stats(struct lock_class *class) for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++) stats.contention_point[i] += pcs->contention_point[i]; + for (i = 0; i < ARRAY_SIZE(stats.contending_point); i++) + stats.contending_point[i] += pcs->contending_point[i]; + lock_time_add(&pcs->read_waittime, &stats.read_waittime); lock_time_add(&pcs->write_waittime, &stats.write_waittime); @@ -209,6 +212,7 @@ void clear_lock_stats(struct lock_class *class) memset(cpu_stats, 0, sizeof(struct lock_class_stats)); } memset(class->contention_point, 0, sizeof(class->contention_point)); + memset(class->contending_point, 0, sizeof(class->contending_point)); } static struct lock_class_stats *get_lock_stats(struct lock_class *class) @@ -2999,7 +3003,7 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip) struct held_lock *hlock, *prev_hlock; struct lock_class_stats *stats; unsigned int depth; - int i, point; + int i, contention_point, contending_point; depth = curr->lockdep_depth; if (DEBUG_LOCKS_WARN_ON(!depth)) @@ -3023,18 +3027,22 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip) found_it: hlock->waittime_stamp = sched_clock(); - point = lock_contention_point(hlock_class(hlock), ip); + contention_point = lock_point(hlock_class(hlock)->contention_point, ip); + contending_point = lock_point(hlock_class(hlock)->contending_point, + lock->ip); stats = get_lock_stats(hlock_class(hlock)); - if (point < ARRAY_SIZE(stats->contention_point)) - stats->contention_point[point]++; + if (contention_point < LOCKSTAT_POINTS) + stats->contention_point[contention_point]++; + if (contending_point < LOCKSTAT_POINTS) + stats->contending_point[contending_point]++; if (lock->cpu != smp_processor_id()) stats->bounces[bounce_contended + !!hlock->read]++; put_lock_stats(stats); } static void -__lock_acquired(struct lockdep_map *lock) +__lock_acquired(struct lockdep_map *lock, unsigned long ip) { struct task_struct *curr = current; struct held_lock *hlock, *prev_hlock; @@ -3083,6 +3091,7 @@ found_it: put_lock_stats(stats); lock->cpu = cpu; + lock->ip = ip; } void lock_contended(struct lockdep_map *lock, unsigned long ip) @@ -3104,7 +3113,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip) } EXPORT_SYMBOL_GPL(lock_contended); -void lock_acquired(struct lockdep_map *lock) +void lock_acquired(struct lockdep_map *lock, unsigned long ip) { unsigned long flags; @@ -3117,7 +3126,7 @@ void lock_acquired(struct lockdep_map *lock) raw_local_irq_save(flags); check_flags(flags); current->lockdep_recursion = 1; - __lock_acquired(lock); + __lock_acquired(lock, ip); current->lockdep_recursion = 0; raw_local_irq_restore(flags); } @@ -3276,10 +3285,10 @@ void __init lockdep_info(void) { printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n"); - printk("... MAX_LOCKDEP_SUBCLASSES: %lu\n", MAX_LOCKDEP_SUBCLASSES); + printk("... MAX_LOCKDEP_SUBCLASSES: %lu\n", MAX_LOCKDEP_SUBCLASSES); printk("... MAX_LOCK_DEPTH: %lu\n", MAX_LOCK_DEPTH); printk("... MAX_LOCKDEP_KEYS: %lu\n", MAX_LOCKDEP_KEYS); - printk("... CLASSHASH_SIZE: %lu\n", CLASSHASH_SIZE); + printk("... CLASSHASH_SIZE: %lu\n", CLASSHASH_SIZE); printk("... MAX_LOCKDEP_ENTRIES: %lu\n", MAX_LOCKDEP_ENTRIES); printk("... MAX_LOCKDEP_CHAINS: %lu\n", MAX_LOCKDEP_CHAINS); printk("... CHAINHASH_SIZE: %lu\n", CHAINHASH_SIZE); diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index 20dbcbf9c7dd..13716b813896 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c @@ -470,11 +470,12 @@ static void seq_line(struct seq_file *m, char c, int offset, int length) static void snprint_time(char *buf, size_t bufsiz, s64 nr) { - unsigned long rem; + s64 div; + s32 rem; nr += 5; /* for display rounding */ - rem = do_div(nr, 1000); /* XXX: do_div_signed */ - snprintf(buf, bufsiz, "%lld.%02d", (long long)nr, (int)rem/10); + div = div_s64_rem(nr, 1000, &rem); + snprintf(buf, bufsiz, "%lld.%02d", (long long)div, (int)rem/10); } static void seq_time(struct seq_file *m, s64 time) @@ -556,7 +557,7 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) if (stats->read_holdtime.nr) namelen += 2; - for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) { + for (i = 0; i < LOCKSTAT_POINTS; i++) { char sym[KSYM_SYMBOL_LEN]; char ip[32]; @@ -573,6 +574,23 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) stats->contention_point[i], ip, sym); } + for (i = 0; i < LOCKSTAT_POINTS; i++) { + char sym[KSYM_SYMBOL_LEN]; + char ip[32]; + + if (class->contending_point[i] == 0) + break; + + if (!i) + seq_line(m, '-', 40-namelen, namelen); + + sprint_symbol(sym, class->contending_point[i]); + snprintf(ip, sizeof(ip), "[<%p>]", + (void *)class->contending_point[i]); + seq_printf(m, "%40s %14lu %29s %s\n", name, + stats->contending_point[i], + ip, sym); + } if (i) { seq_puts(m, "\n"); seq_line(m, '.', 0, 40 + 1 + 10 * (14 + 1)); @@ -582,7 +600,7 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) static void seq_header(struct seq_file *m) { - seq_printf(m, "lock_stat version 0.2\n"); + seq_printf(m, "lock_stat version 0.3\n"); seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1)); seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s " "%14s %14s\n", diff --git a/kernel/mutex.c b/kernel/mutex.c index 12c779dc65d4..4f45d4b658ef 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -59,7 +59,7 @@ EXPORT_SYMBOL(__mutex_init); * We also put the fastpath first in the kernel image, to make sure the * branch is predicted by the CPU as default-untaken. */ -static void noinline __sched +static __used noinline void __sched __mutex_lock_slowpath(atomic_t *lock_count); /*** @@ -96,7 +96,7 @@ void inline __sched mutex_lock(struct mutex *lock) EXPORT_SYMBOL(mutex_lock); #endif -static noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count); +static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count); /*** * mutex_unlock - release the mutex @@ -184,7 +184,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, } done: - lock_acquired(&lock->dep_map); + lock_acquired(&lock->dep_map, ip); /* got the lock - rejoice! */ mutex_remove_waiter(lock, &waiter, task_thread_info(task)); debug_mutex_set_owner(lock, task_thread_info(task)); @@ -268,7 +268,7 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested) /* * Release the lock, slowpath: */ -static noinline void +static __used noinline void __mutex_unlock_slowpath(atomic_t *lock_count) { __mutex_unlock_common_slowpath(lock_count, 1); @@ -313,7 +313,7 @@ int __sched mutex_lock_killable(struct mutex *lock) } EXPORT_SYMBOL(mutex_lock_killable); -static noinline void __sched +static __used noinline void __sched __mutex_lock_slowpath(atomic_t *lock_count) { struct mutex *lock = container_of(lock_count, struct mutex, count); diff --git a/kernel/notifier.c b/kernel/notifier.c index 4282c0a40a57..61d5aa5eced3 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -82,6 +82,14 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl, while (nb && nr_to_call) { next_nb = rcu_dereference(nb->next); + +#ifdef CONFIG_DEBUG_NOTIFIERS + if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) { + WARN(1, "Invalid notifier called!"); + nb = next_nb; + continue; + } +#endif ret = nb->notifier_call(nb, val, v); if (nr_calls) diff --git a/kernel/panic.c b/kernel/panic.c index 6513aac8e992..13f06349a786 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -21,6 +21,7 @@ #include <linux/debug_locks.h> #include <linux/random.h> #include <linux/kallsyms.h> +#include <linux/dmi.h> int panic_on_oops; static unsigned long tainted_mask; @@ -167,6 +168,7 @@ static const struct tnt tnts[] = { * 'M' - System experienced a machine check exception. * 'B' - System has hit bad_page. * 'U' - Userspace-defined naughtiness. + * 'D' - Kernel has oopsed before * 'A' - ACPI table overridden. * 'W' - Taint on warning. * 'C' - modules from drivers/staging are loaded. @@ -320,36 +322,27 @@ void oops_exit(void) } #ifdef WANT_WARN_ON_SLOWPATH -void warn_on_slowpath(const char *file, int line) -{ - char function[KSYM_SYMBOL_LEN]; - unsigned long caller = (unsigned long) __builtin_return_address(0); - sprint_symbol(function, caller); - - printk(KERN_WARNING "------------[ cut here ]------------\n"); - printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file, - line, function); - print_modules(); - dump_stack(); - print_oops_end_marker(); - add_taint(TAINT_WARN); -} -EXPORT_SYMBOL(warn_on_slowpath); - - void warn_slowpath(const char *file, int line, const char *fmt, ...) { va_list args; char function[KSYM_SYMBOL_LEN]; unsigned long caller = (unsigned long)__builtin_return_address(0); + const char *board; + sprint_symbol(function, caller); printk(KERN_WARNING "------------[ cut here ]------------\n"); printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file, line, function); - va_start(args, fmt); - vprintk(fmt, args); - va_end(args); + board = dmi_get_system_info(DMI_PRODUCT_NAME); + if (board) + printk(KERN_WARNING "Hardware name: %s\n", board); + + if (fmt) { + va_start(args, fmt); + vprintk(fmt, args); + va_end(args); + } print_modules(); dump_stack(); diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 895337b16a24..157de3a47832 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -58,21 +58,21 @@ void thread_group_cputime( struct task_struct *tsk, struct task_cputime *times) { - struct signal_struct *sig; + struct task_cputime *totals, *tot; int i; - struct task_cputime *tot; - sig = tsk->signal; - if (unlikely(!sig) || !sig->cputime.totals) { + totals = tsk->signal->cputime.totals; + if (!totals) { times->utime = tsk->utime; times->stime = tsk->stime; times->sum_exec_runtime = tsk->se.sum_exec_runtime; return; } + times->stime = times->utime = cputime_zero; times->sum_exec_runtime = 0; for_each_possible_cpu(i) { - tot = per_cpu_ptr(tsk->signal->cputime.totals, i); + tot = per_cpu_ptr(totals, i); times->utime = cputime_add(times->utime, tot->utime); times->stime = cputime_add(times->stime, tot->stime); times->sum_exec_runtime += tot->sum_exec_runtime; @@ -311,7 +311,7 @@ static int cpu_clock_sample_group(const clockid_t which_clock, struct task_cputime cputime; thread_group_cputime(p, &cputime); - switch (which_clock) { + switch (CPUCLOCK_WHICH(which_clock)) { default: return -EINVAL; case CPUCLOCK_PROF: diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 5e79c662294b..a140e44eebba 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -197,6 +197,11 @@ static int common_timer_create(struct k_itimer *new_timer) return 0; } +static int no_timer_create(struct k_itimer *new_timer) +{ + return -EOPNOTSUPP; +} + /* * Return nonzero if we know a priori this clockid_t value is bogus. */ @@ -248,6 +253,7 @@ static __init int init_posix_timers(void) .clock_getres = hrtimer_get_res, .clock_get = posix_get_monotonic_raw, .clock_set = do_posix_clock_nosettime, + .timer_create = no_timer_create, }; register_posix_clock(CLOCK_REALTIME, &clock_realtime); diff --git a/kernel/power/swap.c b/kernel/power/swap.c index b7713b53d07a..6da14358537c 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -633,7 +633,7 @@ void swsusp_close(fmode_t mode) return; } - blkdev_put(resume_bdev, mode); /* move up */ + blkdev_put(resume_bdev, mode); } static int swsusp_header_init(void) diff --git a/kernel/profile.c b/kernel/profile.c index 5b7d1ac7124c..dc41827fbfee 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -351,7 +351,7 @@ out: put_cpu(); } -static int __devinit profile_cpu_callback(struct notifier_block *info, +static int __cpuinit profile_cpu_callback(struct notifier_block *info, unsigned long action, void *__cpu) { int node, cpu = (unsigned long)__cpu; @@ -596,7 +596,7 @@ out_cleanup: #define create_hash_tables() ({ 0; }) #endif -int create_proc_profile(void) +int __ref create_proc_profile(void) /* false positive from hotcpu_notifier */ { struct proc_dir_entry *entry; diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 1e68e4c39e2c..4c8bcd7dd8e0 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -612,7 +612,7 @@ int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data) return (copied == sizeof(data)) ? 0 : -EIO; } -#if defined CONFIG_COMPAT && defined __ARCH_WANT_COMPAT_SYS_PTRACE +#if defined CONFIG_COMPAT #include <linux/compat.h> int compat_ptrace_request(struct task_struct *child, compat_long_t request, @@ -709,4 +709,4 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, unlock_kernel(); return ret; } -#endif /* CONFIG_COMPAT && __ARCH_WANT_COMPAT_SYS_PTRACE */ +#endif /* CONFIG_COMPAT */ diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c index 37f72e551542..e503a002f330 100644 --- a/kernel/rcuclassic.c +++ b/kernel/rcuclassic.c @@ -191,7 +191,7 @@ static void print_other_cpu_stall(struct rcu_ctrlblk *rcp) /* OK, time to rat on our buddy... */ - printk(KERN_ERR "RCU detected CPU stalls:"); + printk(KERN_ERR "INFO: RCU detected CPU stalls:"); for_each_possible_cpu(cpu) { if (cpu_isset(cpu, rcp->cpumask)) printk(" %d", cpu); @@ -204,7 +204,7 @@ static void print_cpu_stall(struct rcu_ctrlblk *rcp) { unsigned long flags; - printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n", + printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu/%lu jiffies)\n", smp_processor_id(), jiffies, jiffies - rcp->gp_start); dump_stack(); diff --git a/kernel/relay.c b/kernel/relay.c index 32b0befdcb6a..09ac2008f77b 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -1317,12 +1317,9 @@ static ssize_t relay_file_splice_read(struct file *in, if (ret < 0) break; else if (!ret) { - if (spliced) - break; - if (flags & SPLICE_F_NONBLOCK) { + if (flags & SPLICE_F_NONBLOCK) ret = -EAGAIN; - break; - } + break; } *ppos += ret; diff --git a/kernel/sched.c b/kernel/sched.c index 9b1e79371c20..3e70963120a0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1453,9 +1453,10 @@ static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); static unsigned long cpu_avg_load_per_task(int cpu) { struct rq *rq = cpu_rq(cpu); + unsigned long nr_running = ACCESS_ONCE(rq->nr_running); - if (rq->nr_running) - rq->avg_load_per_task = rq->load.weight / rq->nr_running; + if (nr_running) + rq->avg_load_per_task = rq->load.weight / nr_running; else rq->avg_load_per_task = 0; @@ -4202,7 +4203,6 @@ void account_steal_time(struct task_struct *p, cputime_t steal) if (p == rq->idle) { p->stime = cputime_add(p->stime, steal); - account_group_system_time(p, steal); if (atomic_read(&rq->nr_iowait) > 0) cpustat->iowait = cputime64_add(cpustat->iowait, tmp); else @@ -4338,7 +4338,7 @@ void __kprobes sub_preempt_count(int val) /* * Underflow? */ - if (DEBUG_LOCKS_WARN_ON(val > preempt_count())) + if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked()))) return; /* * Is the spinlock portion underflowing? @@ -6586,7 +6586,9 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) req = list_entry(rq->migration_queue.next, struct migration_req, list); list_del_init(&req->list); + spin_unlock_irq(&rq->lock); complete(&req->done); + spin_lock_irq(&rq->lock); } spin_unlock_irq(&rq->lock); break; diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c index 81787248b60f..e8ab096ddfe3 100644 --- a/kernel/sched_clock.c +++ b/kernel/sched_clock.c @@ -118,13 +118,13 @@ static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now) /* * scd->clock = clamp(scd->tick_gtod + delta, - * max(scd->tick_gtod, scd->clock), - * max(scd->clock, scd->tick_gtod + TICK_NSEC)); + * max(scd->tick_gtod, scd->clock), + * scd->tick_gtod + TICK_NSEC); */ clock = scd->tick_gtod + delta; min_clock = wrap_max(scd->tick_gtod, scd->clock); - max_clock = wrap_max(scd->clock, scd->tick_gtod + TICK_NSEC); + max_clock = scd->tick_gtod + TICK_NSEC; clock = wrap_max(clock, min_clock); clock = wrap_min(clock, max_clock); diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 3953e4aed733..1ab790c67b17 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -164,7 +164,7 @@ unsigned long __read_mostly sysctl_hung_task_check_count = 1024; /* * Zero means infinite timeout - no checking done: */ -unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120; +unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480; unsigned long __read_mostly sysctl_hung_task_warnings = 10; @@ -188,7 +188,7 @@ static void check_hung_task(struct task_struct *t, unsigned long now) if ((long)(now - t->last_switch_timestamp) < sysctl_hung_task_timeout_secs) return; - if (sysctl_hung_task_warnings < 0) + if (!sysctl_hung_task_warnings) return; sysctl_hung_task_warnings--; diff --git a/kernel/sys.c b/kernel/sys.c index 31deba8f7d16..5fc3a0cfb994 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -858,8 +858,8 @@ void do_sys_times(struct tms *tms) struct task_cputime cputime; cputime_t cutime, cstime; - spin_lock_irq(¤t->sighand->siglock); thread_group_cputime(current, &cputime); + spin_lock_irq(¤t->sighand->siglock); cutime = current->signal->cutime; cstime = current->signal->cstime; spin_unlock_irq(¤t->sighand->siglock); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9d048fa2d902..3d56fe7570da 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -176,6 +176,9 @@ extern struct ctl_table random_table[]; #ifdef CONFIG_INOTIFY_USER extern struct ctl_table inotify_table[]; #endif +#ifdef CONFIG_EPOLL +extern struct ctl_table epoll_table[]; +#endif #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT int sysctl_legacy_va_layout; @@ -1325,6 +1328,13 @@ static struct ctl_table fs_table[] = { .child = inotify_table, }, #endif +#ifdef CONFIG_EPOLL + { + .procname = "epoll", + .mode = 0555, + .child = epoll_table, + }, +#endif #endif { .ctl_name = KERN_SETUID_DUMPABLE, diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index e7acfb482a68..fa05e88aa76f 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -518,6 +518,28 @@ void update_wall_time(void) /* correct the clock when NTP error is too big */ clocksource_adjust(offset); + /* + * Since in the loop above, we accumulate any amount of time + * in xtime_nsec over a second into xtime.tv_sec, its possible for + * xtime_nsec to be fairly small after the loop. Further, if we're + * slightly speeding the clocksource up in clocksource_adjust(), + * its possible the required corrective factor to xtime_nsec could + * cause it to underflow. + * + * Now, we cannot simply roll the accumulated second back, since + * the NTP subsystem has been notified via second_overflow. So + * instead we push xtime_nsec forward by the amount we underflowed, + * and add that amount into the error. + * + * We'll correct this error next time through this function, when + * xtime_nsec is not as small. + */ + if (unlikely((s64)clock->xtime_nsec < 0)) { + s64 neg = -(s64)clock->xtime_nsec; + clock->xtime_nsec = 0; + clock->error += neg << (NTP_SCALE_SHIFT - clock->shift); + } + /* store full nanoseconds into xtime after rounding it up and * add the remainder to the error difference. */ diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index f780e9552f91..668bbb5ef2bd 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1215,7 +1215,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, out: if (resched) - preempt_enable_notrace(); + preempt_enable_no_resched_notrace(); else preempt_enable_notrace(); return NULL; diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index f28484618ff0..e62cbf78eab6 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -18,12 +18,14 @@ struct header_iter { static struct trace_array *mmio_trace_array; static bool overrun_detected; +static unsigned long prev_overruns; static void mmio_reset_data(struct trace_array *tr) { int cpu; overrun_detected = false; + prev_overruns = 0; tr->time_start = ftrace_now(tr->cpu); for_each_online_cpu(cpu) @@ -128,16 +130,12 @@ static void mmio_close(struct trace_iterator *iter) static unsigned long count_overruns(struct trace_iterator *iter) { - int cpu; unsigned long cnt = 0; -/* FIXME: */ -#if 0 - for_each_online_cpu(cpu) { - cnt += iter->overrun[cpu]; - iter->overrun[cpu] = 0; - } -#endif - (void)cpu; + unsigned long over = ring_buffer_overruns(iter->tr->buffer); + + if (over > prev_overruns) + cnt = over - prev_overruns; + prev_overruns = over; return cnt; } diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index be682b62fe58..3bdb44bde4b7 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -184,11 +184,16 @@ static struct file_operations stack_max_size_fops = { static void * t_next(struct seq_file *m, void *v, loff_t *pos) { - long i = (long)m->private; + long i; (*pos)++; - i++; + if (v == SEQ_START_TOKEN) + i = 0; + else { + i = *(long *)v; + i++; + } if (i >= max_stack_trace.nr_entries || stack_dump_trace[i] == ULONG_MAX) @@ -201,12 +206,15 @@ t_next(struct seq_file *m, void *v, loff_t *pos) static void *t_start(struct seq_file *m, loff_t *pos) { - void *t = &m->private; + void *t = SEQ_START_TOKEN; loff_t l = 0; local_irq_disable(); __raw_spin_lock(&max_stack_lock); + if (*pos == 0) + return SEQ_START_TOKEN; + for (; t && l < *pos; t = t_next(m, t, &l)) ; @@ -235,10 +243,10 @@ static int trace_lookup_stack(struct seq_file *m, long i) static int t_show(struct seq_file *m, void *v) { - long i = *(long *)v; + long i; int size; - if (i < 0) { + if (v == SEQ_START_TOKEN) { seq_printf(m, " Depth Size Location" " (%d entries)\n" " ----- ---- --------\n", @@ -246,6 +254,8 @@ static int t_show(struct seq_file *m, void *v) return 0; } + i = *(long *)v; + if (i >= max_stack_trace.nr_entries || stack_dump_trace[i] == ULONG_MAX) return 0; @@ -275,10 +285,6 @@ static int stack_trace_open(struct inode *inode, struct file *file) int ret; ret = seq_open(file, &stack_trace_seq_ops); - if (!ret) { - struct seq_file *m = file->private_data; - m->private = (void *)-1; - } return ret; } |