summaryrefslogtreecommitdiff
path: root/fs/proc
diff options
context:
space:
mode:
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/array.c125
-rw-r--r--fs/proc/base.c247
-rw-r--r--fs/proc/generic.c52
-rw-r--r--fs/proc/inode.c279
-rw-r--r--fs/proc/proc_misc.c25
-rw-r--r--fs/proc/proc_tty.c15
6 files changed, 595 insertions, 148 deletions
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 74f30e0c0381..ee4814dd98f9 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -62,6 +62,8 @@
#include <linux/mman.h>
#include <linux/proc_fs.h>
#include <linux/ioport.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/pagemap.h>
@@ -76,9 +78,7 @@
#include <linux/rcupdate.h>
#include <linux/delayacct.h>
-#include <asm/uaccess.h>
#include <asm/pgtable.h>
-#include <asm/io.h>
#include <asm/processor.h>
#include "internal.h"
@@ -87,10 +87,10 @@
do { memcpy(buffer, string, strlen(string)); \
buffer += strlen(string); } while (0)
-static inline char * task_name(struct task_struct *p, char * buf)
+static inline char *task_name(struct task_struct *p, char *buf)
{
int i;
- char * name;
+ char *name;
char tcomm[sizeof(p->comm)];
get_task_comm(tcomm, p);
@@ -138,7 +138,7 @@ static const char *task_state_array[] = {
"X (dead)" /* 32 */
};
-static inline const char * get_task_state(struct task_struct *tsk)
+static inline const char *get_task_state(struct task_struct *tsk)
{
unsigned int state = (tsk->state & (TASK_RUNNING |
TASK_INTERRUPTIBLE |
@@ -156,7 +156,7 @@ static inline const char * get_task_state(struct task_struct *tsk)
return *p;
}
-static inline char * task_state(struct task_struct *p, char *buffer)
+static inline char *task_state(struct task_struct *p, char *buffer)
{
struct group_info *group_info;
int g;
@@ -165,7 +165,6 @@ static inline char * task_state(struct task_struct *p, char *buffer)
rcu_read_lock();
buffer += sprintf(buffer,
"State:\t%s\n"
- "SleepAVG:\t%lu%%\n"
"Tgid:\t%d\n"
"Pid:\t%d\n"
"PPid:\t%d\n"
@@ -173,9 +172,8 @@ static inline char * task_state(struct task_struct *p, char *buffer)
"Uid:\t%d\t%d\t%d\t%d\n"
"Gid:\t%d\t%d\t%d\t%d\n",
get_task_state(p),
- (p->sleep_avg/1024)*100/(1020000000/1024),
- p->tgid, p->pid,
- pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0,
+ p->tgid, p->pid,
+ pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0,
pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0,
p->uid, p->euid, p->suid, p->fsuid,
p->gid, p->egid, p->sgid, p->fsgid);
@@ -193,15 +191,15 @@ static inline char * task_state(struct task_struct *p, char *buffer)
get_group_info(group_info);
task_unlock(p);
- for (g = 0; g < min(group_info->ngroups,NGROUPS_SMALL); g++)
- buffer += sprintf(buffer, "%d ", GROUP_AT(group_info,g));
+ for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++)
+ buffer += sprintf(buffer, "%d ", GROUP_AT(group_info, g));
put_group_info(group_info);
buffer += sprintf(buffer, "\n");
return buffer;
}
-static char * render_sigset_t(const char *header, sigset_t *set, char *buffer)
+static char *render_sigset_t(const char *header, sigset_t *set, char *buffer)
{
int i, len;
@@ -241,7 +239,7 @@ static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign,
}
}
-static inline char * task_sig(struct task_struct *p, char *buffer)
+static inline char *task_sig(struct task_struct *p, char *buffer)
{
unsigned long flags;
sigset_t pending, shpending, blocked, ignored, caught;
@@ -291,14 +289,23 @@ static inline char *task_cap(struct task_struct *p, char *buffer)
cap_t(p->cap_effective));
}
-int proc_pid_status(struct task_struct *task, char * buffer)
+static inline char *task_context_switch_counts(struct task_struct *p,
+ char *buffer)
{
- char * orig = buffer;
+ return buffer + sprintf(buffer, "voluntary_ctxt_switches:\t%lu\n"
+ "nonvoluntary_ctxt_switches:\t%lu\n",
+ p->nvcsw,
+ p->nivcsw);
+}
+
+int proc_pid_status(struct task_struct *task, char *buffer)
+{
+ char *orig = buffer;
struct mm_struct *mm = get_task_mm(task);
buffer = task_name(task, buffer);
buffer = task_state(task, buffer);
-
+
if (mm) {
buffer = task_mem(mm, buffer);
mmput(mm);
@@ -309,10 +316,61 @@ int proc_pid_status(struct task_struct *task, char * buffer)
#if defined(CONFIG_S390)
buffer = task_show_regs(task, buffer);
#endif
+ buffer = task_context_switch_counts(task, buffer);
return buffer - orig;
}
-static int do_task_stat(struct task_struct *task, char * buffer, int whole)
+/*
+ * Use precise platform statistics if available:
+ */
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+static cputime_t task_utime(struct task_struct *p)
+{
+ return p->utime;
+}
+
+static cputime_t task_stime(struct task_struct *p)
+{
+ return p->stime;
+}
+#else
+static cputime_t task_utime(struct task_struct *p)
+{
+ clock_t utime = cputime_to_clock_t(p->utime),
+ total = utime + cputime_to_clock_t(p->stime);
+ u64 temp;
+
+ /*
+ * Use CFS's precise accounting:
+ */
+ temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
+
+ if (total) {
+ temp *= utime;
+ do_div(temp, total);
+ }
+ utime = (clock_t)temp;
+
+ return clock_t_to_cputime(utime);
+}
+
+static cputime_t task_stime(struct task_struct *p)
+{
+ clock_t stime;
+
+ /*
+ * Use CFS's precise accounting. (we subtract utime from
+ * the total, to make sure the total observed by userspace
+ * grows monotonically - apps rely on that):
+ */
+ stime = nsec_to_clock_t(p->se.sum_exec_runtime) -
+ cputime_to_clock_t(task_utime(p));
+
+ return clock_t_to_cputime(stime);
+}
+#endif
+
+static int do_task_stat(struct task_struct *task, char *buffer, int whole)
{
unsigned long vsize, eip, esp, wchan = ~0UL;
long priority, nice;
@@ -320,7 +378,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
sigset_t sigign, sigcatch;
char state;
int res;
- pid_t ppid = 0, pgid = -1, sid = -1;
+ pid_t ppid = 0, pgid = -1, sid = -1;
int num_threads = 0;
struct mm_struct *mm;
unsigned long long start_time;
@@ -370,8 +428,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
do {
min_flt += t->min_flt;
maj_flt += t->maj_flt;
- utime = cputime_add(utime, t->utime);
- stime = cputime_add(stime, t->stime);
+ utime = cputime_add(utime, task_utime(t));
+ stime = cputime_add(stime, task_stime(t));
t = next_thread(t);
} while (t != task);
@@ -389,13 +447,13 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
}
rcu_read_unlock();
- if (!whole || num_threads<2)
+ if (!whole || num_threads < 2)
wchan = get_wchan(task);
if (!whole) {
min_flt = task->min_flt;
maj_flt = task->maj_flt;
- utime = task->utime;
- stime = task->stime;
+ utime = task_utime(task);
+ stime = task_stime(task);
}
/* scale priority and nice values from timeslices to -20..20 */
@@ -405,12 +463,13 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
/* Temporary variable needed for gcc-2.96 */
/* convert timespec -> nsec*/
- start_time = (unsigned long long)task->start_time.tv_sec * NSEC_PER_SEC
- + task->start_time.tv_nsec;
+ start_time =
+ (unsigned long long)task->real_start_time.tv_sec * NSEC_PER_SEC
+ + task->real_start_time.tv_nsec;
/* convert nsec -> ticks */
start_time = nsec_to_clock_t(start_time);
- res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %u %lu \
+ res = sprintf(buffer, "%d (%s) %c %d %d %d %d %d %u %lu \
%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu\n",
task->pid,
@@ -436,7 +495,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
start_time,
vsize,
mm ? get_mm_rss(mm) : 0,
- rsslim,
+ rsslim,
mm ? mm->start_code : 0,
mm ? mm->end_code : 0,
mm ? mm->start_stack : 0,
@@ -458,17 +517,17 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
task->rt_priority,
task->policy,
(unsigned long long)delayacct_blkio_ticks(task));
- if(mm)
+ if (mm)
mmput(mm);
return res;
}
-int proc_tid_stat(struct task_struct *task, char * buffer)
+int proc_tid_stat(struct task_struct *task, char *buffer)
{
return do_task_stat(task, buffer, 0);
}
-int proc_tgid_stat(struct task_struct *task, char * buffer)
+int proc_tgid_stat(struct task_struct *task, char *buffer)
{
return do_task_stat(task, buffer, 1);
}
@@ -477,12 +536,12 @@ int proc_pid_statm(struct task_struct *task, char *buffer)
{
int size = 0, resident = 0, shared = 0, text = 0, lib = 0, data = 0;
struct mm_struct *mm = get_task_mm(task);
-
+
if (mm) {
size = task_statm(mm, &shared, &text, &data, &resident);
mmput(mm);
}
- return sprintf(buffer,"%d %d %d %d %d %d %d\n",
+ return sprintf(buffer, "%d %d %d %d %d %d %d\n",
size, resident, shared, text, lib, data, 0);
}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a5fa1fdafc4e..19489b0d5554 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -67,12 +67,12 @@
#include <linux/mount.h>
#include <linux/security.h>
#include <linux/ptrace.h>
-#include <linux/seccomp.h>
#include <linux/cpuset.h>
#include <linux/audit.h>
#include <linux/poll.h>
#include <linux/nsproxy.h>
#include <linux/oom.h>
+#include <linux/elf.h>
#include "internal.h"
/* NOTE:
@@ -204,12 +204,17 @@ static int proc_pid_environ(struct task_struct *task, char * buffer)
int res = 0;
struct mm_struct *mm = get_task_mm(task);
if (mm) {
- unsigned int len = mm->env_end - mm->env_start;
+ unsigned int len;
+
+ res = -ESRCH;
+ if (!ptrace_may_attach(task))
+ goto out;
+
+ len = mm->env_end - mm->env_start;
if (len > PAGE_SIZE)
len = PAGE_SIZE;
res = access_process_vm(task, mm->env_start, buffer, len, 0);
- if (!ptrace_may_attach(task))
- res = -ESRCH;
+out:
mmput(mm);
}
return res;
@@ -279,7 +284,7 @@ static int proc_pid_auxv(struct task_struct *task, char *buffer)
static int proc_pid_wchan(struct task_struct *task, char *buffer)
{
unsigned long wchan;
- char symname[KSYM_NAME_LEN+1];
+ char symname[KSYM_NAME_LEN];
wchan = get_wchan(task);
@@ -296,7 +301,7 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer)
*/
static int proc_pid_schedstat(struct task_struct *task, char *buffer)
{
- return sprintf(buffer, "%lu %lu %lu\n",
+ return sprintf(buffer, "%llu %llu %lu\n",
task->sched_info.cpu_time,
task->sched_info.run_delay,
task->sched_info.pcnt);
@@ -812,71 +817,6 @@ static const struct file_operations proc_loginuid_operations = {
};
#endif
-#ifdef CONFIG_SECCOMP
-static ssize_t seccomp_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode);
- char __buf[20];
- size_t len;
-
- if (!tsk)
- return -ESRCH;
- /* no need to print the trailing zero, so use only len */
- len = sprintf(__buf, "%u\n", tsk->seccomp.mode);
- put_task_struct(tsk);
-
- return simple_read_from_buffer(buf, count, ppos, __buf, len);
-}
-
-static ssize_t seccomp_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode);
- char __buf[20], *end;
- unsigned int seccomp_mode;
- ssize_t result;
-
- result = -ESRCH;
- if (!tsk)
- goto out_no_task;
-
- /* can set it only once to be even more secure */
- result = -EPERM;
- if (unlikely(tsk->seccomp.mode))
- goto out;
-
- result = -EFAULT;
- memset(__buf, 0, sizeof(__buf));
- count = min(count, sizeof(__buf) - 1);
- if (copy_from_user(__buf, buf, count))
- goto out;
-
- seccomp_mode = simple_strtoul(__buf, &end, 0);
- if (*end == '\n')
- end++;
- result = -EINVAL;
- if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) {
- tsk->seccomp.mode = seccomp_mode;
- set_tsk_thread_flag(tsk, TIF_SECCOMP);
- } else
- goto out;
- result = -EIO;
- if (unlikely(!(end - __buf)))
- goto out;
- result = end - __buf;
-out:
- put_task_struct(tsk);
-out_no_task:
- return result;
-}
-
-static const struct file_operations proc_seccomp_operations = {
- .read = seccomp_read,
- .write = seccomp_write,
-};
-#endif /* CONFIG_SECCOMP */
-
#ifdef CONFIG_FAULT_INJECTION
static ssize_t proc_fault_inject_read(struct file * file, char __user * buf,
size_t count, loff_t *ppos)
@@ -929,6 +869,69 @@ static const struct file_operations proc_fault_inject_operations = {
};
#endif
+#ifdef CONFIG_SCHED_DEBUG
+/*
+ * Print out various scheduling related per-task fields:
+ */
+static int sched_show(struct seq_file *m, void *v)
+{
+ struct inode *inode = m->private;
+ struct task_struct *p;
+
+ WARN_ON(!inode);
+
+ p = get_proc_task(inode);
+ if (!p)
+ return -ESRCH;
+ proc_sched_show_task(p, m);
+
+ put_task_struct(p);
+
+ return 0;
+}
+
+static ssize_t
+sched_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *offset)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct task_struct *p;
+
+ WARN_ON(!inode);
+
+ p = get_proc_task(inode);
+ if (!p)
+ return -ESRCH;
+ proc_sched_set_task(p);
+
+ put_task_struct(p);
+
+ return count;
+}
+
+static int sched_open(struct inode *inode, struct file *filp)
+{
+ int ret;
+
+ ret = single_open(filp, sched_show, NULL);
+ if (!ret) {
+ struct seq_file *m = filp->private_data;
+
+ m->private = inode;
+ }
+ return ret;
+}
+
+static const struct file_operations proc_pid_sched_operations = {
+ .open = sched_open,
+ .read = seq_read,
+ .write = sched_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+#endif
+
static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
{
struct inode *inode = dentry->d_inode;
@@ -1012,7 +1015,7 @@ static int task_dumpable(struct task_struct *task)
task_lock(task);
mm = task->mm;
if (mm)
- dumpable = mm->dumpable;
+ dumpable = get_dumpable(mm);
task_unlock(task);
if(dumpable == 1)
return 1;
@@ -1783,6 +1786,91 @@ static const struct inode_operations proc_attr_dir_inode_operations = {
#endif
+#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
+static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
+ struct mm_struct *mm;
+ char buffer[PROC_NUMBUF];
+ size_t len;
+ int ret;
+
+ if (!task)
+ return -ESRCH;
+
+ ret = 0;
+ mm = get_task_mm(task);
+ if (mm) {
+ len = snprintf(buffer, sizeof(buffer), "%08lx\n",
+ ((mm->flags & MMF_DUMP_FILTER_MASK) >>
+ MMF_DUMP_FILTER_SHIFT));
+ mmput(mm);
+ ret = simple_read_from_buffer(buf, count, ppos, buffer, len);
+ }
+
+ put_task_struct(task);
+
+ return ret;
+}
+
+static ssize_t proc_coredump_filter_write(struct file *file,
+ const char __user *buf,
+ size_t count,
+ loff_t *ppos)
+{
+ struct task_struct *task;
+ struct mm_struct *mm;
+ char buffer[PROC_NUMBUF], *end;
+ unsigned int val;
+ int ret;
+ int i;
+ unsigned long mask;
+
+ ret = -EFAULT;
+ memset(buffer, 0, sizeof(buffer));
+ if (count > sizeof(buffer) - 1)
+ count = sizeof(buffer) - 1;
+ if (copy_from_user(buffer, buf, count))
+ goto out_no_task;
+
+ ret = -EINVAL;
+ val = (unsigned int)simple_strtoul(buffer, &end, 0);
+ if (*end == '\n')
+ end++;
+ if (end - buffer == 0)
+ goto out_no_task;
+
+ ret = -ESRCH;
+ task = get_proc_task(file->f_dentry->d_inode);
+ if (!task)
+ goto out_no_task;
+
+ ret = end - buffer;
+ mm = get_task_mm(task);
+ if (!mm)
+ goto out_no_mm;
+
+ for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) {
+ if (val & mask)
+ set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
+ else
+ clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
+ }
+
+ mmput(mm);
+ out_no_mm:
+ put_task_struct(task);
+ out_no_task:
+ return ret;
+}
+
+static const struct file_operations proc_coredump_filter_operations = {
+ .read = proc_coredump_filter_read,
+ .write = proc_coredump_filter_write,
+};
+#endif
+
/*
* /proc/self:
*/
@@ -1963,6 +2051,9 @@ static const struct pid_entry tgid_base_stuff[] = {
INF("environ", S_IRUSR, pid_environ),
INF("auxv", S_IRUSR, pid_auxv),
INF("status", S_IRUGO, pid_status),
+#ifdef CONFIG_SCHED_DEBUG
+ REG("sched", S_IRUGO|S_IWUSR, pid_sched),
+#endif
INF("cmdline", S_IRUGO, pid_cmdline),
INF("stat", S_IRUGO, tgid_stat),
INF("statm", S_IRUGO, pid_statm),
@@ -1971,9 +2062,6 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("numa_maps", S_IRUGO, numa_maps),
#endif
REG("mem", S_IRUSR|S_IWUSR, mem),
-#ifdef CONFIG_SECCOMP
- REG("seccomp", S_IRUSR|S_IWUSR, seccomp),
-#endif
LNK("cwd", cwd),
LNK("root", root),
LNK("exe", exe),
@@ -2003,6 +2091,9 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
#endif
+#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
+ REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter),
+#endif
#ifdef CONFIG_TASK_IO_ACCOUNTING
INF("io", S_IRUGO, pid_io_accounting),
#endif
@@ -2247,6 +2338,9 @@ static const struct pid_entry tid_base_stuff[] = {
INF("environ", S_IRUSR, pid_environ),
INF("auxv", S_IRUSR, pid_auxv),
INF("status", S_IRUGO, pid_status),
+#ifdef CONFIG_SCHED_DEBUG
+ REG("sched", S_IRUGO|S_IWUSR, pid_sched),
+#endif
INF("cmdline", S_IRUGO, pid_cmdline),
INF("stat", S_IRUGO, tid_stat),
INF("statm", S_IRUGO, pid_statm),
@@ -2255,9 +2349,6 @@ static const struct pid_entry tid_base_stuff[] = {
REG("numa_maps", S_IRUGO, numa_maps),
#endif
REG("mem", S_IRUSR|S_IWUSR, mem),
-#ifdef CONFIG_SECCOMP
- REG("seccomp", S_IRUSR|S_IWUSR, seccomp),
-#endif
LNK("cwd", cwd),
LNK("root", root),
LNK("exe", exe),
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 8a40e15f5ecb..b5e7155d30d8 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -20,6 +20,7 @@
#include <linux/namei.h>
#include <linux/bitops.h>
#include <linux/spinlock.h>
+#include <linux/completion.h>
#include <asm/uaccess.h>
#include "internal.h"
@@ -529,12 +530,6 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
return -EAGAIN;
dp->low_ino = i;
- spin_lock(&proc_subdir_lock);
- dp->next = dir->subdir;
- dp->parent = dir;
- dir->subdir = dp;
- spin_unlock(&proc_subdir_lock);
-
if (S_ISDIR(dp->mode)) {
if (dp->proc_iops == NULL) {
dp->proc_fops = &proc_dir_operations;
@@ -550,6 +545,13 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
if (dp->proc_iops == NULL)
dp->proc_iops = &proc_file_inode_operations;
}
+
+ spin_lock(&proc_subdir_lock);
+ dp->next = dir->subdir;
+ dp->parent = dir;
+ dir->subdir = dp;
+ spin_unlock(&proc_subdir_lock);
+
return 0;
}
@@ -613,6 +615,9 @@ static struct proc_dir_entry *proc_create(struct proc_dir_entry **parent,
ent->namelen = len;
ent->mode = mode;
ent->nlink = nlink;
+ ent->pde_users = 0;
+ spin_lock_init(&ent->pde_unload_lock);
+ ent->pde_unload_completion = NULL;
out:
return ent;
}
@@ -649,9 +654,6 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode,
ent = proc_create(&parent, name, S_IFDIR | mode, 2);
if (ent) {
- ent->proc_fops = &proc_dir_operations;
- ent->proc_iops = &proc_dir_inode_operations;
-
if (proc_register(parent, ent) < 0) {
kfree(ent);
ent = NULL;
@@ -686,10 +688,6 @@ struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
ent = proc_create(&parent,name,mode,nlink);
if (ent) {
- if (S_ISDIR(mode)) {
- ent->proc_fops = &proc_dir_operations;
- ent->proc_iops = &proc_dir_inode_operations;
- }
if (proc_register(parent, ent) < 0) {
kfree(ent);
ent = NULL;
@@ -734,9 +732,35 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
de = *p;
*p = de->next;
de->next = NULL;
+
+ spin_lock(&de->pde_unload_lock);
+ /*
+ * Stop accepting new callers into module. If you're
+ * dynamically allocating ->proc_fops, save a pointer somewhere.
+ */
+ de->proc_fops = NULL;
+ /* Wait until all existing callers into module are done. */
+ if (de->pde_users > 0) {
+ DECLARE_COMPLETION_ONSTACK(c);
+
+ if (!de->pde_unload_completion)
+ de->pde_unload_completion = &c;
+
+ spin_unlock(&de->pde_unload_lock);
+ spin_unlock(&proc_subdir_lock);
+
+ wait_for_completion(de->pde_unload_completion);
+
+ spin_lock(&proc_subdir_lock);
+ goto continue_removing;
+ }
+ spin_unlock(&de->pde_unload_lock);
+
+continue_removing:
if (S_ISDIR(de->mode))
parent->nlink--;
- proc_kill_inodes(de);
+ if (!S_ISREG(de->mode))
+ proc_kill_inodes(de);
de->nlink = 0;
WARN_ON(de->subdir);
if (!atomic_read(&de->count))
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index d5ce65c68d7b..0e4d37c93eea 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -10,6 +10,8 @@
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/stat.h>
+#include <linux/completion.h>
+#include <linux/poll.h>
#include <linux/file.h>
#include <linux/limits.h>
#include <linux/init.h>
@@ -111,14 +113,14 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
inode_init_once(&ei->vfs_inode);
}
-
+
int __init proc_init_inodecache(void)
{
proc_inode_cachep = kmem_cache_create("proc_inode_cache",
sizeof(struct proc_inode),
0, (SLAB_RECLAIM_ACCOUNT|
SLAB_MEM_SPREAD),
- init_once, NULL);
+ init_once);
if (proc_inode_cachep == NULL)
return -ENOMEM;
return 0;
@@ -140,6 +142,264 @@ static const struct super_operations proc_sops = {
.remount_fs = proc_remount,
};
+static void pde_users_dec(struct proc_dir_entry *pde)
+{
+ spin_lock(&pde->pde_unload_lock);
+ pde->pde_users--;
+ if (pde->pde_unload_completion && pde->pde_users == 0)
+ complete(pde->pde_unload_completion);
+ spin_unlock(&pde->pde_unload_lock);
+}
+
+static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
+{
+ struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+ loff_t rv = -EINVAL;
+ loff_t (*llseek)(struct file *, loff_t, int);
+
+ spin_lock(&pde->pde_unload_lock);
+ /*
+ * remove_proc_entry() is going to delete PDE (as part of module
+ * cleanup sequence). No new callers into module allowed.
+ */
+ if (!pde->proc_fops) {
+ spin_unlock(&pde->pde_unload_lock);
+ return rv;
+ }
+ /*
+ * Bump refcount so that remove_proc_entry will wail for ->llseek to
+ * complete.
+ */
+ pde->pde_users++;
+ /*
+ * Save function pointer under lock, to protect against ->proc_fops
+ * NULL'ifying right after ->pde_unload_lock is dropped.
+ */
+ llseek = pde->proc_fops->llseek;
+ spin_unlock(&pde->pde_unload_lock);
+
+ if (!llseek)
+ llseek = default_llseek;
+ rv = llseek(file, offset, whence);
+
+ pde_users_dec(pde);
+ return rv;
+}
+
+static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+{
+ struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+ ssize_t rv = -EIO;
+ ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
+
+ spin_lock(&pde->pde_unload_lock);
+ if (!pde->proc_fops) {
+ spin_unlock(&pde->pde_unload_lock);
+ return rv;
+ }
+ pde->pde_users++;
+ read = pde->proc_fops->read;
+ spin_unlock(&pde->pde_unload_lock);
+
+ if (read)
+ rv = read(file, buf, count, ppos);
+
+ pde_users_dec(pde);
+ return rv;
+}
+
+static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
+{
+ struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+ ssize_t rv = -EIO;
+ ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
+
+ spin_lock(&pde->pde_unload_lock);
+ if (!pde->proc_fops) {
+ spin_unlock(&pde->pde_unload_lock);
+ return rv;
+ }
+ pde->pde_users++;
+ write = pde->proc_fops->write;
+ spin_unlock(&pde->pde_unload_lock);
+
+ if (write)
+ rv = write(file, buf, count, ppos);
+
+ pde_users_dec(pde);
+ return rv;
+}
+
+static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *pts)
+{
+ struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+ unsigned int rv = DEFAULT_POLLMASK;
+ unsigned int (*poll)(struct file *, struct poll_table_struct *);
+
+ spin_lock(&pde->pde_unload_lock);
+ if (!pde->proc_fops) {
+ spin_unlock(&pde->pde_unload_lock);
+ return rv;
+ }
+ pde->pde_users++;
+ poll = pde->proc_fops->poll;
+ spin_unlock(&pde->pde_unload_lock);
+
+ if (poll)
+ rv = poll(file, pts);
+
+ pde_users_dec(pde);
+ return rv;
+}
+
+static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+ long rv = -ENOTTY;
+ long (*unlocked_ioctl)(struct file *, unsigned int, unsigned long);
+ int (*ioctl)(struct inode *, struct file *, unsigned int, unsigned long);
+
+ spin_lock(&pde->pde_unload_lock);
+ if (!pde->proc_fops) {
+ spin_unlock(&pde->pde_unload_lock);
+ return rv;
+ }
+ pde->pde_users++;
+ unlocked_ioctl = pde->proc_fops->unlocked_ioctl;
+ ioctl = pde->proc_fops->ioctl;
+ spin_unlock(&pde->pde_unload_lock);
+
+ if (unlocked_ioctl) {
+ rv = unlocked_ioctl(file, cmd, arg);
+ if (rv == -ENOIOCTLCMD)
+ rv = -EINVAL;
+ } else if (ioctl) {
+ lock_kernel();
+ rv = ioctl(file->f_path.dentry->d_inode, file, cmd, arg);
+ unlock_kernel();
+ }
+
+ pde_users_dec(pde);
+ return rv;
+}
+
+#ifdef CONFIG_COMPAT
+static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+ long rv = -ENOTTY;
+ long (*compat_ioctl)(struct file *, unsigned int, unsigned long);
+
+ spin_lock(&pde->pde_unload_lock);
+ if (!pde->proc_fops) {
+ spin_unlock(&pde->pde_unload_lock);
+ return rv;
+ }
+ pde->pde_users++;
+ compat_ioctl = pde->proc_fops->compat_ioctl;
+ spin_unlock(&pde->pde_unload_lock);
+
+ if (compat_ioctl)
+ rv = compat_ioctl(file, cmd, arg);
+
+ pde_users_dec(pde);
+ return rv;
+}
+#endif
+
+static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+ int rv = -EIO;
+ int (*mmap)(struct file *, struct vm_area_struct *);
+
+ spin_lock(&pde->pde_unload_lock);
+ if (!pde->proc_fops) {
+ spin_unlock(&pde->pde_unload_lock);
+ return rv;
+ }
+ pde->pde_users++;
+ mmap = pde->proc_fops->mmap;
+ spin_unlock(&pde->pde_unload_lock);
+
+ if (mmap)
+ rv = mmap(file, vma);
+
+ pde_users_dec(pde);
+ return rv;
+}
+
+static int proc_reg_open(struct inode *inode, struct file *file)
+{
+ struct proc_dir_entry *pde = PDE(inode);
+ int rv = 0;
+ int (*open)(struct inode *, struct file *);
+
+ spin_lock(&pde->pde_unload_lock);
+ if (!pde->proc_fops) {
+ spin_unlock(&pde->pde_unload_lock);
+ return rv;
+ }
+ pde->pde_users++;
+ open = pde->proc_fops->open;
+ spin_unlock(&pde->pde_unload_lock);
+
+ if (open)
+ rv = open(inode, file);
+
+ pde_users_dec(pde);
+ return rv;
+}
+
+static int proc_reg_release(struct inode *inode, struct file *file)
+{
+ struct proc_dir_entry *pde = PDE(inode);
+ int rv = 0;
+ int (*release)(struct inode *, struct file *);
+
+ spin_lock(&pde->pde_unload_lock);
+ if (!pde->proc_fops) {
+ spin_unlock(&pde->pde_unload_lock);
+ return rv;
+ }
+ pde->pde_users++;
+ release = pde->proc_fops->release;
+ spin_unlock(&pde->pde_unload_lock);
+
+ if (release)
+ rv = release(inode, file);
+
+ pde_users_dec(pde);
+ return rv;
+}
+
+static const struct file_operations proc_reg_file_ops = {
+ .llseek = proc_reg_llseek,
+ .read = proc_reg_read,
+ .write = proc_reg_write,
+ .poll = proc_reg_poll,
+ .unlocked_ioctl = proc_reg_unlocked_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = proc_reg_compat_ioctl,
+#endif
+ .mmap = proc_reg_mmap,
+ .open = proc_reg_open,
+ .release = proc_reg_release,
+};
+
+#ifdef CONFIG_COMPAT
+static const struct file_operations proc_reg_file_ops_no_compat = {
+ .llseek = proc_reg_llseek,
+ .read = proc_reg_read,
+ .write = proc_reg_write,
+ .poll = proc_reg_poll,
+ .unlocked_ioctl = proc_reg_unlocked_ioctl,
+ .mmap = proc_reg_mmap,
+ .open = proc_reg_open,
+ .release = proc_reg_release,
+};
+#endif
+
struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
struct proc_dir_entry *de)
{
@@ -166,8 +426,19 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
inode->i_nlink = de->nlink;
if (de->proc_iops)
inode->i_op = de->proc_iops;
- if (de->proc_fops)
- inode->i_fop = de->proc_fops;
+ if (de->proc_fops) {
+ if (S_ISREG(inode->i_mode)) {
+#ifdef CONFIG_COMPAT
+ if (!de->proc_fops->compat_ioctl)
+ inode->i_fop =
+ &proc_reg_file_ops_no_compat;
+ else
+#endif
+ inode->i_fop = &proc_reg_file_ops;
+ }
+ else
+ inode->i_fop = de->proc_fops;
+ }
}
return inode;
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 5fd49e47f83a..bee251cb87c8 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -105,6 +105,7 @@ static int uptime_read_proc(char *page, char **start, off_t off,
cputime_t idletime = cputime_add(init_task.utime, init_task.stime);
do_posix_clock_monotonic_gettime(&uptime);
+ monotonic_to_bootbased(&uptime);
cputime_to_timespec(idletime, &idle);
len = sprintf(page,"%lu.%02lu %lu.%02lu\n",
(unsigned long) uptime.tv_sec,
@@ -443,12 +444,17 @@ static int show_stat(struct seq_file *p, void *v)
unsigned long jif;
cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
u64 sum = 0;
+ struct timespec boottime;
+ unsigned int *per_irq_sum;
+
+ per_irq_sum = kzalloc(sizeof(unsigned int)*NR_IRQS, GFP_KERNEL);
+ if (!per_irq_sum)
+ return -ENOMEM;
user = nice = system = idle = iowait =
irq = softirq = steal = cputime64_zero;
- jif = - wall_to_monotonic.tv_sec;
- if (wall_to_monotonic.tv_nsec)
- --jif;
+ getboottime(&boottime);
+ jif = boottime.tv_sec;
for_each_possible_cpu(i) {
int j;
@@ -461,8 +467,11 @@ static int show_stat(struct seq_file *p, void *v)
irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq);
softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
- for (j = 0 ; j < NR_IRQS ; j++)
- sum += kstat_cpu(i).irqs[j];
+ for (j = 0; j < NR_IRQS; j++) {
+ unsigned int temp = kstat_cpu(i).irqs[j];
+ sum += temp;
+ per_irq_sum[j] += temp;
+ }
}
seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu\n",
@@ -498,9 +507,10 @@ static int show_stat(struct seq_file *p, void *v)
}
seq_printf(p, "intr %llu", (unsigned long long)sum);
-#if !defined(CONFIG_PPC64) && !defined(CONFIG_ALPHA) && !defined(CONFIG_IA64)
+#ifndef CONFIG_SMP
+ /* Touches too many cache lines on SMP setups */
for (i = 0; i < NR_IRQS; i++)
- seq_printf(p, " %u", kstat_irqs(i));
+ seq_printf(p, " %u", per_irq_sum[i]);
#endif
seq_printf(p,
@@ -515,6 +525,7 @@ static int show_stat(struct seq_file *p, void *v)
nr_running(),
nr_iowait());
+ kfree(per_irq_sum);
return 0;
}
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index b3a473b0a191..22846225acfa 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -69,7 +69,7 @@ static void show_tty_range(struct seq_file *m, struct tty_driver *p,
static int show_tty_driver(struct seq_file *m, void *v)
{
- struct tty_driver *p = v;
+ struct tty_driver *p = list_entry(v, struct tty_driver, tty_drivers);
dev_t from = MKDEV(p->major, p->minor_start);
dev_t to = from + p->num;
@@ -106,22 +106,13 @@ static int show_tty_driver(struct seq_file *m, void *v)
/* iterator */
static void *t_start(struct seq_file *m, loff_t *pos)
{
- struct list_head *p;
- loff_t l = *pos;
-
mutex_lock(&tty_mutex);
- list_for_each(p, &tty_drivers)
- if (!l--)
- return list_entry(p, struct tty_driver, tty_drivers);
- return NULL;
+ return seq_list_start(&tty_drivers, *pos);
}
static void *t_next(struct seq_file *m, void *v, loff_t *pos)
{
- struct list_head *p = ((struct tty_driver *)v)->tty_drivers.next;
- (*pos)++;
- return p==&tty_drivers ? NULL :
- list_entry(p, struct tty_driver, tty_drivers);
+ return seq_list_next(v, &tty_drivers, pos);
}
static void t_stop(struct seq_file *m, void *v)