summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/Makefile1
-rw-r--r--net/9p/client.c166
-rw-r--r--net/9p/protocol.c44
-rw-r--r--net/9p/trans_common.c97
-rw-r--r--net/9p/trans_common.h32
-rw-r--r--net/9p/trans_fd.c52
-rw-r--r--net/9p/trans_virtio.c129
-rw-r--r--net/core/scm.c2
-rw-r--r--net/rds/ib.c9
-rw-r--r--net/rds/ib.h2
-rw-r--r--net/rds/ib_rdma.c27
-rw-r--r--net/sunrpc/sched.c2
-rw-r--r--net/unix/af_unix.c2
-rw-r--r--net/unix/garbage.c2
14 files changed, 440 insertions, 127 deletions
diff --git a/net/9p/Makefile b/net/9p/Makefile
index 198a640d53a6..a0874cc1f718 100644
--- a/net/9p/Makefile
+++ b/net/9p/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o
util.o \
protocol.o \
trans_fd.o \
+ trans_common.o \
9pnet_virtio-objs := \
trans_virtio.o \
diff --git a/net/9p/client.c b/net/9p/client.c
index a848bca9fbff..347ec0cd2718 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -229,10 +229,23 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
return ERR_PTR(-ENOMEM);
}
init_waitqueue_head(req->wq);
- req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize,
- GFP_KERNEL);
- req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize,
- GFP_KERNEL);
+ if ((c->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
+ P9_TRANS_PREF_PAYLOAD_SEP) {
+ int alloc_msize = min(c->msize, 4096);
+ req->tc = kmalloc(sizeof(struct p9_fcall)+alloc_msize,
+ GFP_KERNEL);
+ req->tc->capacity = alloc_msize;
+ req->rc = kmalloc(sizeof(struct p9_fcall)+alloc_msize,
+ GFP_KERNEL);
+ req->rc->capacity = alloc_msize;
+ } else {
+ req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize,
+ GFP_KERNEL);
+ req->tc->capacity = c->msize;
+ req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize,
+ GFP_KERNEL);
+ req->rc->capacity = c->msize;
+ }
if ((!req->tc) || (!req->rc)) {
printk(KERN_ERR "Couldn't grow tag array\n");
kfree(req->tc);
@@ -243,9 +256,7 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
return ERR_PTR(-ENOMEM);
}
req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall);
- req->tc->capacity = c->msize;
req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall);
- req->rc->capacity = c->msize;
}
p9pdu_reset(req->tc);
@@ -443,6 +454,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
{
int8_t type;
int err;
+ int ecode;
err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
if (err) {
@@ -450,36 +462,53 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
return err;
}
- if (type == P9_RERROR || type == P9_RLERROR) {
- int ecode;
-
- if (!p9_is_proto_dotl(c)) {
- char *ename;
+ if (type != P9_RERROR && type != P9_RLERROR)
+ return 0;
- err = p9pdu_readf(req->rc, c->proto_version, "s?d",
- &ename, &ecode);
- if (err)
- goto out_err;
+ if (!p9_is_proto_dotl(c)) {
+ char *ename;
+
+ if (req->tc->pbuf_size) {
+ /* Handle user buffers */
+ size_t len = req->rc->size - req->rc->offset;
+ if (req->tc->pubuf) {
+ /* User Buffer */
+ err = copy_from_user(
+ &req->rc->sdata[req->rc->offset],
+ req->tc->pubuf, len);
+ if (err) {
+ err = -EFAULT;
+ goto out_err;
+ }
+ } else {
+ /* Kernel Buffer */
+ memmove(&req->rc->sdata[req->rc->offset],
+ req->tc->pkbuf, len);
+ }
+ }
+ err = p9pdu_readf(req->rc, c->proto_version, "s?d",
+ &ename, &ecode);
+ if (err)
+ goto out_err;
- if (p9_is_proto_dotu(c))
- err = -ecode;
+ if (p9_is_proto_dotu(c))
+ err = -ecode;
- if (!err || !IS_ERR_VALUE(err)) {
- err = p9_errstr2errno(ename, strlen(ename));
+ if (!err || !IS_ERR_VALUE(err)) {
+ err = p9_errstr2errno(ename, strlen(ename));
- P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename);
+ P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode,
+ ename);
- kfree(ename);
- }
- } else {
- err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
- err = -ecode;
-
- P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
+ kfree(ename);
}
+ } else {
+ err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
+ err = -ecode;
+
+ P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
+ }
- } else
- err = 0;
return err;
@@ -1191,6 +1220,27 @@ error:
}
EXPORT_SYMBOL(p9_client_fsync);
+int p9_client_sync_fs(struct p9_fid *fid)
+{
+ int err = 0;
+ struct p9_req_t *req;
+ struct p9_client *clnt;
+
+ P9_DPRINTK(P9_DEBUG_9P, ">>> TSYNC_FS fid %d\n", fid->fid);
+
+ clnt = fid->clnt;
+ req = p9_client_rpc(clnt, P9_TSYNCFS, "d", fid->fid);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ goto error;
+ }
+ P9_DPRINTK(P9_DEBUG_9P, "<<< RSYNCFS fid %d\n", fid->fid);
+ p9_free_req(clnt, req);
+error:
+ return err;
+}
+EXPORT_SYMBOL(p9_client_sync_fs);
+
int p9_client_clunk(struct p9_fid *fid)
{
int err;
@@ -1270,7 +1320,15 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
if (count < rsize)
rsize = count;
- req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, rsize);
+ /* Don't bother zerocopy form small IO (< 1024) */
+ if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
+ P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) {
+ req = p9_client_rpc(clnt, P9_TREAD, "dqE", fid->fid, offset,
+ rsize, data, udata);
+ } else {
+ req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset,
+ rsize);
+ }
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
@@ -1284,13 +1342,15 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
- if (data) {
- memmove(data, dataptr, count);
- } else {
- err = copy_to_user(udata, dataptr, count);
- if (err) {
- err = -EFAULT;
- goto free_and_error;
+ if (!req->tc->pbuf_size) {
+ if (data) {
+ memmove(data, dataptr, count);
+ } else {
+ err = copy_to_user(udata, dataptr, count);
+ if (err) {
+ err = -EFAULT;
+ goto free_and_error;
+ }
}
}
p9_free_req(clnt, req);
@@ -1323,12 +1383,21 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
if (count < rsize)
rsize = count;
- if (data)
- req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, offset,
- rsize, data);
- else
- req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, offset,
- rsize, udata);
+
+ /* Don't bother zerocopy form small IO (< 1024) */
+ if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
+ P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) {
+ req = p9_client_rpc(clnt, P9_TWRITE, "dqE", fid->fid, offset,
+ rsize, data, udata);
+ } else {
+
+ if (data)
+ req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid,
+ offset, rsize, data);
+ else
+ req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid,
+ offset, rsize, udata);
+ }
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
@@ -1716,7 +1785,14 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
if (count < rsize)
rsize = count;
- req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, offset, rsize);
+ if ((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
+ P9_TRANS_PREF_PAYLOAD_SEP) {
+ req = p9_client_rpc(clnt, P9_TREADDIR, "dqF", fid->fid,
+ offset, rsize, data);
+ } else {
+ req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid,
+ offset, rsize);
+ }
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
@@ -1730,7 +1806,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count);
- if (data)
+ if (!req->tc->pbuf_size && data)
memmove(data, dataptr, count);
p9_free_req(clnt, req);
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 1e308f210928..2ce515b859b3 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -114,6 +114,26 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
return size - len;
}
+static size_t
+pdu_write_urw(struct p9_fcall *pdu, const char *kdata, const char __user *udata,
+ size_t size)
+{
+ BUG_ON(pdu->size > P9_IOHDRSZ);
+ pdu->pubuf = (char __user *)udata;
+ pdu->pkbuf = (char *)kdata;
+ pdu->pbuf_size = size;
+ return 0;
+}
+
+static size_t
+pdu_write_readdir(struct p9_fcall *pdu, const char *kdata, size_t size)
+{
+ BUG_ON(pdu->size > P9_READDIRHDRSZ);
+ pdu->pkbuf = (char *)kdata;
+ pdu->pbuf_size = size;
+ return 0;
+}
+
/*
b - int8_t
w - int16_t
@@ -445,6 +465,25 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
errcode = -EFAULT;
}
break;
+ case 'E':{
+ int32_t cnt = va_arg(ap, int32_t);
+ const char *k = va_arg(ap, const void *);
+ const char *u = va_arg(ap, const void *);
+ errcode = p9pdu_writef(pdu, proto_version, "d",
+ cnt);
+ if (!errcode && pdu_write_urw(pdu, k, u, cnt))
+ errcode = -EFAULT;
+ }
+ break;
+ case 'F':{
+ int32_t cnt = va_arg(ap, int32_t);
+ const char *k = va_arg(ap, const void *);
+ errcode = p9pdu_writef(pdu, proto_version, "d",
+ cnt);
+ if (!errcode && pdu_write_readdir(pdu, k, cnt))
+ errcode = -EFAULT;
+ }
+ break;
case 'U':{
int32_t count = va_arg(ap, int32_t);
const char __user *udata =
@@ -579,6 +618,7 @@ EXPORT_SYMBOL(p9stat_read);
int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type)
{
+ pdu->id = type;
return p9pdu_writef(pdu, 0, "dbw", 0, type, tag);
}
@@ -606,6 +646,10 @@ void p9pdu_reset(struct p9_fcall *pdu)
{
pdu->offset = 0;
pdu->size = 0;
+ pdu->private = NULL;
+ pdu->pubuf = NULL;
+ pdu->pkbuf = NULL;
+ pdu->pbuf_size = 0;
}
int p9dirent_read(char *buf, int len, struct p9_dirent *dirent,
diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c
new file mode 100644
index 000000000000..d62b9aa58df8
--- /dev/null
+++ b/net/9p/trans_common.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright IBM Corporation, 2010
+ * Author Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
+#include <linux/scatterlist.h>
+#include "trans_common.h"
+
+/**
+ * p9_release_req_pages - Release pages after the transaction.
+ * @*private: PDU's private page of struct trans_rpage_info
+ */
+void
+p9_release_req_pages(struct trans_rpage_info *rpinfo)
+{
+ int i = 0;
+
+ while (rpinfo->rp_data[i] && rpinfo->rp_nr_pages--) {
+ put_page(rpinfo->rp_data[i]);
+ i++;
+ }
+}
+EXPORT_SYMBOL(p9_release_req_pages);
+
+/**
+ * p9_nr_pages - Return number of pages needed to accomodate the payload.
+ */
+int
+p9_nr_pages(struct p9_req_t *req)
+{
+ int start_page, end_page;
+ start_page = (unsigned long long)req->tc->pubuf >> PAGE_SHIFT;
+ end_page = ((unsigned long long)req->tc->pubuf + req->tc->pbuf_size +
+ PAGE_SIZE - 1) >> PAGE_SHIFT;
+ return end_page - start_page;
+}
+EXPORT_SYMBOL(p9_nr_pages);
+
+/**
+ * payload_gup - Translates user buffer into kernel pages and
+ * pins them either for read/write through get_user_pages_fast().
+ * @req: Request to be sent to server.
+ * @pdata_off: data offset into the first page after translation (gup).
+ * @pdata_len: Total length of the IO. gup may not return requested # of pages.
+ * @nr_pages: number of pages to accomodate the payload
+ * @rw: Indicates if the pages are for read or write.
+ */
+int
+p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len,
+ int nr_pages, u8 rw)
+{
+ uint32_t first_page_bytes = 0;
+ uint32_t pdata_mapped_pages;
+ struct trans_rpage_info *rpinfo;
+
+ *pdata_off = (size_t)req->tc->pubuf & (PAGE_SIZE-1);
+
+ if (*pdata_off)
+ first_page_bytes = min((PAGE_SIZE - *pdata_off),
+ req->tc->pbuf_size);
+
+ rpinfo = req->tc->private;
+ pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf,
+ nr_pages, rw, &rpinfo->rp_data[0]);
+
+ if (pdata_mapped_pages < 0) {
+ printk(KERN_ERR "get_user_pages_fast failed:%d udata:%p"
+ "nr_pages:%d\n", pdata_mapped_pages,
+ req->tc->pubuf, nr_pages);
+ pdata_mapped_pages = 0;
+ return -EIO;
+ }
+ rpinfo->rp_nr_pages = pdata_mapped_pages;
+ if (*pdata_off) {
+ *pdata_len = first_page_bytes;
+ *pdata_len += min((req->tc->pbuf_size - *pdata_len),
+ ((size_t)pdata_mapped_pages - 1) << PAGE_SHIFT);
+ } else {
+ *pdata_len = min(req->tc->pbuf_size,
+ (size_t)pdata_mapped_pages << PAGE_SHIFT);
+ }
+ return 0;
+}
+EXPORT_SYMBOL(p9_payload_gup);
diff --git a/net/9p/trans_common.h b/net/9p/trans_common.h
new file mode 100644
index 000000000000..76309223bb02
--- /dev/null
+++ b/net/9p/trans_common.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright IBM Corporation, 2010
+ * Author Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+/* TRUE if it is user context */
+#define P9_IS_USER_CONTEXT (!segment_eq(get_fs(), KERNEL_DS))
+
+/**
+ * struct trans_rpage_info - To store mapped page information in PDU.
+ * @rp_alloc:Set if this structure is allocd, not a reuse unused space in pdu.
+ * @rp_nr_pages: Number of mapped pages
+ * @rp_data: Array of page pointers
+ */
+struct trans_rpage_info {
+ u8 rp_alloc;
+ int rp_nr_pages;
+ struct page *rp_data[0];
+};
+
+void p9_release_req_pages(struct trans_rpage_info *);
+int p9_payload_gup(struct p9_req_t *, size_t *, int *, int, u8);
+int p9_nr_pages(struct p9_req_t *);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 078eb162d9bf..a30471e51740 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -153,10 +153,11 @@ struct p9_conn {
unsigned long wsched;
};
+static void p9_poll_workfn(struct work_struct *work);
+
static DEFINE_SPINLOCK(p9_poll_lock);
static LIST_HEAD(p9_poll_pending_list);
-static struct workqueue_struct *p9_mux_wq;
-static struct task_struct *p9_poll_task;
+static DECLARE_WORK(p9_poll_work, p9_poll_workfn);
static void p9_mux_poll_stop(struct p9_conn *m)
{
@@ -384,7 +385,7 @@ static void p9_read_work(struct work_struct *work)
if (n & POLLIN) {
P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m);
- queue_work(p9_mux_wq, &m->rq);
+ schedule_work(&m->rq);
} else
clear_bit(Rworksched, &m->wsched);
} else
@@ -497,7 +498,7 @@ static void p9_write_work(struct work_struct *work)
if (n & POLLOUT) {
P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m);
- queue_work(p9_mux_wq, &m->wq);
+ schedule_work(&m->wq);
} else
clear_bit(Wworksched, &m->wsched);
} else
@@ -516,15 +517,14 @@ static int p9_pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
container_of(wait, struct p9_poll_wait, wait);
struct p9_conn *m = pwait->conn;
unsigned long flags;
- DECLARE_WAITQUEUE(dummy_wait, p9_poll_task);
spin_lock_irqsave(&p9_poll_lock, flags);
if (list_empty(&m->poll_pending_link))
list_add_tail(&m->poll_pending_link, &p9_poll_pending_list);
spin_unlock_irqrestore(&p9_poll_lock, flags);
- /* perform the default wake up operation */
- return default_wake_function(&dummy_wait, mode, sync, key);
+ schedule_work(&p9_poll_work);
+ return 1;
}
/**
@@ -629,7 +629,7 @@ static void p9_poll_mux(struct p9_conn *m)
P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m);
if (!test_and_set_bit(Rworksched, &m->wsched)) {
P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m);
- queue_work(p9_mux_wq, &m->rq);
+ schedule_work(&m->rq);
}
}
@@ -639,7 +639,7 @@ static void p9_poll_mux(struct p9_conn *m)
if ((m->wsize || !list_empty(&m->unsent_req_list)) &&
!test_and_set_bit(Wworksched, &m->wsched)) {
P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m);
- queue_work(p9_mux_wq, &m->wq);
+ schedule_work(&m->wq);
}
}
}
@@ -677,7 +677,7 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
n = p9_fd_poll(m->client, NULL);
if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
- queue_work(p9_mux_wq, &m->wq);
+ schedule_work(&m->wq);
return 0;
}
@@ -1047,12 +1047,12 @@ static struct p9_trans_module p9_fd_trans = {
*
*/
-static int p9_poll_proc(void *a)
+static void p9_poll_workfn(struct work_struct *work)
{
unsigned long flags;
P9_DPRINTK(P9_DEBUG_TRANS, "start %p\n", current);
- repeat:
+
spin_lock_irqsave(&p9_poll_lock, flags);
while (!list_empty(&p9_poll_pending_list)) {
struct p9_conn *conn = list_first_entry(&p9_poll_pending_list,
@@ -1067,35 +1067,11 @@ static int p9_poll_proc(void *a)
}
spin_unlock_irqrestore(&p9_poll_lock, flags);
- set_current_state(TASK_INTERRUPTIBLE);
- if (list_empty(&p9_poll_pending_list)) {
- P9_DPRINTK(P9_DEBUG_TRANS, "sleeping...\n");
- schedule();
- }
- __set_current_state(TASK_RUNNING);
-
- if (!kthread_should_stop())
- goto repeat;
-
P9_DPRINTK(P9_DEBUG_TRANS, "finish\n");
- return 0;
}
int p9_trans_fd_init(void)
{
- p9_mux_wq = create_workqueue("v9fs");
- if (!p9_mux_wq) {
- printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n");
- return -ENOMEM;
- }
-
- p9_poll_task = kthread_run(p9_poll_proc, NULL, "v9fs-poll");
- if (IS_ERR(p9_poll_task)) {
- destroy_workqueue(p9_mux_wq);
- printk(KERN_WARNING "v9fs: mux: creating poll task failed\n");
- return PTR_ERR(p9_poll_task);
- }
-
v9fs_register_trans(&p9_tcp_trans);
v9fs_register_trans(&p9_unix_trans);
v9fs_register_trans(&p9_fd_trans);
@@ -1105,10 +1081,8 @@ int p9_trans_fd_init(void)
void p9_trans_fd_exit(void)
{
- kthread_stop(p9_poll_task);
+ flush_work_sync(&p9_poll_work);
v9fs_unregister_trans(&p9_tcp_trans);
v9fs_unregister_trans(&p9_unix_trans);
v9fs_unregister_trans(&p9_fd_trans);
-
- destroy_workqueue(p9_mux_wq);
}
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index c8f3f72ab20e..9b550ed9c711 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -45,6 +45,7 @@
#include <linux/scatterlist.h>
#include <linux/virtio.h>
#include <linux/virtio_9p.h>
+#include "trans_common.h"
#define VIRTQUEUE_NUM 128
@@ -155,6 +156,14 @@ static void req_done(struct virtqueue *vq)
rc->tag);
req = p9_tag_lookup(chan->client, rc->tag);
req->status = REQ_STATUS_RCVD;
+ if (req->tc->private) {
+ struct trans_rpage_info *rp = req->tc->private;
+ /*Release pages */
+ p9_release_req_pages(rp);
+ if (rp->rp_alloc)
+ kfree(rp);
+ req->tc->private = NULL;
+ }
p9_client_cb(chan->client, req);
} else {
spin_unlock_irqrestore(&chan->lock, flags);
@@ -203,6 +212,38 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
}
/**
+ * pack_sg_list_p - Just like pack_sg_list. Instead of taking a buffer,
+ * this takes a list of pages.
+ * @sg: scatter/gather list to pack into
+ * @start: which segment of the sg_list to start at
+ * @pdata_off: Offset into the first page
+ * @**pdata: a list of pages to add into sg.
+ * @count: amount of data to pack into the scatter/gather list
+ */
+static int
+pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off,
+ struct page **pdata, int count)
+{
+ int s;
+ int i = 0;
+ int index = start;
+
+ if (pdata_off) {
+ s = min((int)(PAGE_SIZE - pdata_off), count);
+ sg_set_page(&sg[index++], pdata[i++], s, pdata_off);
+ count -= s;
+ }
+
+ while (count) {
+ BUG_ON(index > limit);
+ s = min((int)PAGE_SIZE, count);
+ sg_set_page(&sg[index++], pdata[i++], s, 0);
+ count -= s;
+ }
+ return index-start;
+}
+
+/**
* p9_virtio_request - issue a request
* @client: client instance issuing the request
* @req: request to be issued
@@ -212,22 +253,97 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
static int
p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
{
- int in, out;
+ int in, out, inp, outp;
struct virtio_chan *chan = client->trans;
char *rdata = (char *)req->rc+sizeof(struct p9_fcall);
unsigned long flags;
- int err;
+ size_t pdata_off = 0;
+ struct trans_rpage_info *rpinfo = NULL;
+ int err, pdata_len = 0;
P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n");
req_retry:
req->status = REQ_STATUS_SENT;
+ if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) {
+ int nr_pages = p9_nr_pages(req);
+ int rpinfo_size = sizeof(struct trans_rpage_info) +
+ sizeof(struct page *) * nr_pages;
+
+ if (rpinfo_size <= (req->tc->capacity - req->tc->size)) {
+ /* We can use sdata */
+ req->tc->private = req->tc->sdata + req->tc->size;
+ rpinfo = (struct trans_rpage_info *)req->tc->private;
+ rpinfo->rp_alloc = 0;
+ } else {
+ req->tc->private = kmalloc(rpinfo_size, GFP_NOFS);
+ if (!req->tc->private) {
+ P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: "
+ "private kmalloc returned NULL");
+ return -ENOMEM;
+ }
+ rpinfo = (struct trans_rpage_info *)req->tc->private;
+ rpinfo->rp_alloc = 1;
+ }
+
+ err = p9_payload_gup(req, &pdata_off, &pdata_len, nr_pages,
+ req->tc->id == P9_TREAD ? 1 : 0);
+ if (err < 0) {
+ if (rpinfo->rp_alloc)
+ kfree(rpinfo);
+ return err;
+ }
+ }
+
spin_lock_irqsave(&chan->lock, flags);
+
+ /* Handle out VirtIO ring buffers */
out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata,
- req->tc->size);
- in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata,
- client->msize);
+ req->tc->size);
+
+ if (req->tc->pbuf_size && (req->tc->id == P9_TWRITE)) {
+ /* We have additional write payload buffer to take care */
+ if (req->tc->pubuf && P9_IS_USER_CONTEXT) {
+ outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
+ pdata_off, rpinfo->rp_data, pdata_len);
+ } else {
+ char *pbuf = req->tc->pubuf ? req->tc->pubuf :
+ req->tc->pkbuf;
+ outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf,
+ req->tc->pbuf_size);
+ }
+ out += outp;
+ }
+
+ /* Handle in VirtIO ring buffers */
+ if (req->tc->pbuf_size &&
+ ((req->tc->id == P9_TREAD) || (req->tc->id == P9_TREADDIR))) {
+ /*
+ * Take care of additional Read payload.
+ * 11 is the read/write header = PDU Header(7) + IO Size (4).
+ * Arrange in such a way that server places header in the
+ * alloced memory and payload onto the user buffer.
+ */
+ inp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, 11);
+ /*
+ * Running executables in the filesystem may result in
+ * a read request with kernel buffer as opposed to user buffer.
+ */
+ if (req->tc->pubuf && P9_IS_USER_CONTEXT) {
+ in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM,
+ pdata_off, rpinfo->rp_data, pdata_len);
+ } else {
+ char *pbuf = req->tc->pubuf ? req->tc->pubuf :
+ req->tc->pkbuf;
+ in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM,
+ pbuf, req->tc->pbuf_size);
+ }
+ in += inp;
+ } else {
+ in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata,
+ client->msize);
+ }
err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
if (err < 0) {
@@ -246,6 +362,8 @@ req_retry:
P9_DPRINTK(P9_DEBUG_TRANS,
"9p debug: "
"virtio rpc add_buf returned failure");
+ if (rpinfo && rpinfo->rp_alloc)
+ kfree(rpinfo);
return -EIO;
}
}
@@ -448,6 +566,7 @@ static struct p9_trans_module p9_virtio_trans = {
.request = p9_virtio_request,
.cancel = p9_virtio_cancel,
.maxsize = PAGE_SIZE*16,
+ .pref = P9_TRANS_PREF_PAYLOAD_SEP,
.def = 0,
.owner = THIS_MODULE,
};
diff --git a/net/core/scm.c b/net/core/scm.c
index bbe454450801..4c1ef026d695 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -95,7 +95,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
int fd = fdp[i];
struct file *file;
- if (fd < 0 || !(file = fget(fd)))
+ if (fd < 0 || !(file = fget_raw(fd)))
return -EBADF;
*fpp++ = file;
fpl->count++;
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 4123967d4d65..cce19f95c624 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -364,7 +364,6 @@ void rds_ib_exit(void)
rds_ib_sysctl_exit();
rds_ib_recv_exit();
rds_trans_unregister(&rds_ib_transport);
- rds_ib_fmr_exit();
}
struct rds_transport rds_ib_transport = {
@@ -400,13 +399,9 @@ int rds_ib_init(void)
INIT_LIST_HEAD(&rds_ib_devices);
- ret = rds_ib_fmr_init();
- if (ret)
- goto out;
-
ret = ib_register_client(&rds_ib_client);
if (ret)
- goto out_fmr_exit;
+ goto out;
ret = rds_ib_sysctl_init();
if (ret)
@@ -430,8 +425,6 @@ out_sysctl:
rds_ib_sysctl_exit();
out_ibreg:
rds_ib_unregister_client();
-out_fmr_exit:
- rds_ib_fmr_exit();
out:
return ret;
}
diff --git a/net/rds/ib.h b/net/rds/ib.h
index e34ad032b66d..4297d92788dc 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -307,8 +307,6 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
void rds_ib_sync_mr(void *trans_private, int dir);
void rds_ib_free_mr(void *trans_private, int invalidate);
void rds_ib_flush_mrs(void);
-int rds_ib_fmr_init(void);
-void rds_ib_fmr_exit(void);
/* ib_recv.c */
int rds_ib_recv_init(void);
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 18a833c450c8..819c35a0d9cb 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -38,8 +38,6 @@
#include "ib.h"
#include "xlist.h"
-static struct workqueue_struct *rds_ib_fmr_wq;
-
static DEFINE_PER_CPU(unsigned long, clean_list_grace);
#define CLEAN_LIST_BUSY_BIT 0
@@ -307,7 +305,7 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
int err = 0, iter = 0;
if (atomic_read(&pool->dirty_count) >= pool->max_items / 10)
- queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10);
+ schedule_delayed_work(&pool->flush_worker, 10);
while (1) {
ibmr = rds_ib_reuse_fmr(pool);
@@ -696,24 +694,6 @@ out_nolock:
return ret;
}
-int rds_ib_fmr_init(void)
-{
- rds_ib_fmr_wq = create_workqueue("rds_fmr_flushd");
- if (!rds_ib_fmr_wq)
- return -ENOMEM;
- return 0;
-}
-
-/*
- * By the time this is called all the IB devices should have been torn down and
- * had their pools freed. As each pool is freed its work struct is waited on,
- * so the pool flushing work queue should be idle by the time we get here.
- */
-void rds_ib_fmr_exit(void)
-{
- destroy_workqueue(rds_ib_fmr_wq);
-}
-
static void rds_ib_mr_pool_flush_worker(struct work_struct *work)
{
struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker.work);
@@ -741,7 +721,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
/* If we've pinned too many pages, request a flush */
if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
atomic_read(&pool->dirty_count) >= pool->max_items / 10)
- queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10);
+ schedule_delayed_work(&pool->flush_worker, 10);
if (invalidate) {
if (likely(!in_interrupt())) {
@@ -749,8 +729,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
} else {
/* We get here if the user created a MR marked
* as use_once and invalidate at the same time. */
- queue_delayed_work(rds_ib_fmr_wq,
- &pool->flush_worker, 10);
+ schedule_delayed_work(&pool->flush_worker, 10);
}
}
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 59e599498e37..3fc8624fcd17 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -955,7 +955,7 @@ static int rpciod_start(void)
* Create the rpciod thread and wait for it to start.
*/
dprintk("RPC: creating workqueue rpciod\n");
- wq = alloc_workqueue("rpciod", WQ_RESCUER, 0);
+ wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 0);
rpciod_workqueue = wq;
return rpciod_workqueue != NULL;
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 437a99e560e1..ba5b8c208498 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -850,7 +850,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
* Get the parent directory, calculate the hash for last
* component.
*/
- err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
+ err = kern_path_parent(sunaddr->sun_path, &nd);
if (err)
goto out_mknod_parent;
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index f89f83bf828e..b6f4b994eb35 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -104,7 +104,7 @@ struct sock *unix_get_socket(struct file *filp)
/*
* Socket ?
*/
- if (S_ISSOCK(inode->i_mode)) {
+ if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
struct socket *sock = SOCKET_I(inode);
struct sock *s = sock->sk;