summaryrefslogtreecommitdiff
path: root/kernel/kcmp.c
diff options
context:
space:
mode:
authorCyrill Gorcunov <gorcunov@gmail.com>2017-07-12 14:34:28 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-12 16:26:01 -0700
commit0791e3644e5ef21646fe565b9061788d05ec71d4 (patch)
treef16169f6551fe0b2fea513080fcecbe0d4a5cccc /kernel/kcmp.c
parent77493f04b74cdff3a61fb3fb14b1f5a71d88fd5f (diff)
kcmp: add KCMP_EPOLL_TFD mode to compare epoll target files
With current epoll architecture target files are addressed with file_struct and file descriptor number, where the last is not unique. Moreover files can be transferred from another process via unix socket, added into queue and closed then so we won't find this descriptor in the task fdinfo list. Thus to checkpoint and restore such processes CRIU needs to find out where exactly the target file is present to add it into epoll queue. For this sake one can use kcmp call where some particular target file from the queue is compared with arbitrary file passed as an argument. Because epoll target files can have same file descriptor number but different file_struct a caller should explicitly specify the offset within. To test if some particular file is matching entry inside epoll one have to - fill kcmp_epoll_slot structure with epoll file descriptor, target file number and target file offset (in case if only one target is present then it should be 0) - call kcmp as kcmp(pid1, pid2, KCMP_EPOLL_TFD, fd, &kcmp_epoll_slot) - the kernel fetch file pointer matching file descriptor @fd of pid1 - lookups for file struct in epoll queue of pid2 and returns traditional 0,1,2 result for sorting purpose Link: http://lkml.kernel.org/r/20170424154423.511592110@gmail.com Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Acked-by: Andrey Vagin <avagin@openvz.org> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Pavel Emelyanov <xemul@virtuozzo.com> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Cc: Jason Baron <jbaron@akamai.com> Cc: Andy Lutomirski <luto@amacapital.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/kcmp.c')
-rw-r--r--kernel/kcmp.c57
1 files changed, 57 insertions, 0 deletions
diff --git a/kernel/kcmp.c b/kernel/kcmp.c
index 3a47fa998fe0..ea34ed8bb952 100644
--- a/kernel/kcmp.c
+++ b/kernel/kcmp.c
@@ -11,6 +11,10 @@
#include <linux/bug.h>
#include <linux/err.h>
#include <linux/kcmp.h>
+#include <linux/capability.h>
+#include <linux/list.h>
+#include <linux/eventpoll.h>
+#include <linux/file.h>
#include <asm/unistd.h>
@@ -94,6 +98,56 @@ static int kcmp_lock(struct mutex *m1, struct mutex *m2)
return err;
}
+#ifdef CONFIG_EPOLL
+static int kcmp_epoll_target(struct task_struct *task1,
+ struct task_struct *task2,
+ unsigned long idx1,
+ struct kcmp_epoll_slot __user *uslot)
+{
+ struct file *filp, *filp_epoll, *filp_tgt;
+ struct kcmp_epoll_slot slot;
+ struct files_struct *files;
+
+ if (copy_from_user(&slot, uslot, sizeof(slot)))
+ return -EFAULT;
+
+ filp = get_file_raw_ptr(task1, idx1);
+ if (!filp)
+ return -EBADF;
+
+ files = get_files_struct(task2);
+ if (!files)
+ return -EBADF;
+
+ spin_lock(&files->file_lock);
+ filp_epoll = fcheck_files(files, slot.efd);
+ if (filp_epoll)
+ get_file(filp_epoll);
+ else
+ filp_tgt = ERR_PTR(-EBADF);
+ spin_unlock(&files->file_lock);
+ put_files_struct(files);
+
+ if (filp_epoll) {
+ filp_tgt = get_epoll_tfile_raw_ptr(filp_epoll, slot.tfd, slot.toff);
+ fput(filp_epoll);
+ } else
+
+ if (IS_ERR(filp_tgt))
+ return PTR_ERR(filp_tgt);
+
+ return kcmp_ptr(filp, filp_tgt, KCMP_FILE);
+}
+#else
+static int kcmp_epoll_target(struct task_struct *task1,
+ struct task_struct *task2,
+ unsigned long idx1,
+ struct kcmp_epoll_slot __user *uslot)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
unsigned long, idx1, unsigned long, idx2)
{
@@ -165,6 +219,9 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
ret = -EOPNOTSUPP;
#endif
break;
+ case KCMP_EPOLL_TFD:
+ ret = kcmp_epoll_target(task1, task2, idx1, (void *)idx2);
+ break;
default:
ret = -EINVAL;
break;