summaryrefslogtreecommitdiff
path: root/fs/ceph
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 10:25:24 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 10:25:24 -0800
commita1703154200c390ab03c10224c586e815d3e31e8 (patch)
treedf90865eed3cfdf7af8664b5453a90e09d17480a /fs/ceph
parent67b5ad9a63caa2ce56ddd2b22b802dae00d72c13 (diff)
parent766fc43973b16f9becb6b7402b3e052dbb84adee (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: rbd: fix cleanup when trying to mount inexistent image net/ceph: make ceph_msgr_wq non-reentrant ceph: fsc->*_wq's aren't used in memory reclaim path ceph: Always free allocated memory in osdmap_decode() ceph: Makefile: Remove unnessary code ceph: associate requests with opening sessions ceph: drop redundant r_mds field ceph: implement DIRLAYOUTHASH feature to get dir layout from MDS ceph: add dir_layout to inode
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/Makefile23
-rw-r--r--fs/ceph/debugfs.c9
-rw-r--r--fs/ceph/dir.c20
-rw-r--r--fs/ceph/export.c2
-rw-r--r--fs/ceph/inode.c4
-rw-r--r--fs/ceph/mds_client.c56
-rw-r--r--fs/ceph/mds_client.h2
-rw-r--r--fs/ceph/super.c13
-rw-r--r--fs/ceph/super.h2
9 files changed, 80 insertions, 51 deletions
diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile
index 9e6c4f2e8ff1..bd352125e829 100644
--- a/fs/ceph/Makefile
+++ b/fs/ceph/Makefile
@@ -2,31 +2,10 @@
# Makefile for CEPH filesystem.
#
-ifneq ($(KERNELRELEASE),)
-
obj-$(CONFIG_CEPH_FS) += ceph.o
-ceph-objs := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
+ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
export.o caps.o snap.o xattr.o \
mds_client.o mdsmap.o strings.o ceph_frag.o \
debugfs.o
-else
-#Otherwise we were called directly from the command
-# line; invoke the kernel build system.
-
-KERNELDIR ?= /lib/modules/$(shell uname -r)/build
-PWD := $(shell pwd)
-
-default: all
-
-all:
- $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_FS=m modules
-
-modules_install:
- $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_FS=m modules_install
-
-clean:
- $(MAKE) -C $(KERNELDIR) M=$(PWD) clean
-
-endif
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 7ae1b3d55b58..08f65faac112 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -60,10 +60,13 @@ static int mdsc_show(struct seq_file *s, void *p)
for (rp = rb_first(&mdsc->request_tree); rp; rp = rb_next(rp)) {
req = rb_entry(rp, struct ceph_mds_request, r_node);
- if (req->r_request)
- seq_printf(s, "%lld\tmds%d\t", req->r_tid, req->r_mds);
- else
+ if (req->r_request && req->r_session)
+ seq_printf(s, "%lld\tmds%d\t", req->r_tid,
+ req->r_session->s_mds);
+ else if (!req->r_request)
seq_printf(s, "%lld\t(no request)\t", req->r_tid);
+ else
+ seq_printf(s, "%lld\t(no session)\t", req->r_tid);
seq_printf(s, "%s", ceph_mds_op_name(req->r_op));
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index fa7ca04ee816..0bc68de8edd7 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1224,6 +1224,26 @@ void ceph_dentry_lru_del(struct dentry *dn)
}
}
+/*
+ * Return name hash for a given dentry. This is dependent on
+ * the parent directory's hash function.
+ */
+unsigned ceph_dentry_hash(struct dentry *dn)
+{
+ struct inode *dir = dn->d_parent->d_inode;
+ struct ceph_inode_info *dci = ceph_inode(dir);
+
+ switch (dci->i_dir_layout.dl_dir_hash) {
+ case 0: /* for backward compat */
+ case CEPH_STR_HASH_LINUX:
+ return dn->d_name.hash;
+
+ default:
+ return ceph_str_hash(dci->i_dir_layout.dl_dir_hash,
+ dn->d_name.name, dn->d_name.len);
+ }
+}
+
const struct file_operations ceph_dir_fops = {
.read = ceph_read_dir,
.readdir = ceph_readdir,
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 2297d9426992..e41056174bf8 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -59,7 +59,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len,
dout("encode_fh %p connectable\n", dentry);
cfh->ino = ceph_ino(dentry->d_inode);
cfh->parent_ino = ceph_ino(parent->d_inode);
- cfh->parent_name_hash = parent->d_name.hash;
+ cfh->parent_name_hash = ceph_dentry_hash(parent);
*max_len = connected_handle_length;
type = 2;
} else if (*max_len >= handle_length) {
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index e61de4f7b99d..e835eff551e3 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -297,6 +297,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
ci->i_release_count = 0;
ci->i_symlink = NULL;
+ memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout));
+
ci->i_fragtree = RB_ROOT;
mutex_init(&ci->i_fragtree_mutex);
@@ -689,6 +691,8 @@ static int fill_inode(struct inode *inode,
inode->i_op = &ceph_dir_iops;
inode->i_fop = &ceph_dir_fops;
+ ci->i_dir_layout = iinfo->dir_layout;
+
ci->i_files = le64_to_cpu(info->files);
ci->i_subdirs = le64_to_cpu(info->subdirs);
ci->i_rbytes = le64_to_cpu(info->rbytes);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a50fca1e03be..1e30d194a8e3 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -60,7 +60,8 @@ static const struct ceph_connection_operations mds_con_ops;
* parse individual inode info
*/
static int parse_reply_info_in(void **p, void *end,
- struct ceph_mds_reply_info_in *info)
+ struct ceph_mds_reply_info_in *info,
+ int features)
{
int err = -EIO;
@@ -74,6 +75,12 @@ static int parse_reply_info_in(void **p, void *end,
info->symlink = *p;
*p += info->symlink_len;
+ if (features & CEPH_FEATURE_DIRLAYOUTHASH)
+ ceph_decode_copy_safe(p, end, &info->dir_layout,
+ sizeof(info->dir_layout), bad);
+ else
+ memset(&info->dir_layout, 0, sizeof(info->dir_layout));
+
ceph_decode_32_safe(p, end, info->xattr_len, bad);
ceph_decode_need(p, end, info->xattr_len, bad);
info->xattr_data = *p;
@@ -88,12 +95,13 @@ bad:
* target inode.
*/
static int parse_reply_info_trace(void **p, void *end,
- struct ceph_mds_reply_info_parsed *info)
+ struct ceph_mds_reply_info_parsed *info,
+ int features)
{
int err;
if (info->head->is_dentry) {
- err = parse_reply_info_in(p, end, &info->diri);
+ err = parse_reply_info_in(p, end, &info->diri, features);
if (err < 0)
goto out_bad;
@@ -114,7 +122,7 @@ static int parse_reply_info_trace(void **p, void *end,
}
if (info->head->is_target) {
- err = parse_reply_info_in(p, end, &info->targeti);
+ err = parse_reply_info_in(p, end, &info->targeti, features);
if (err < 0)
goto out_bad;
}
@@ -134,7 +142,8 @@ out_bad:
* parse readdir results
*/
static int parse_reply_info_dir(void **p, void *end,
- struct ceph_mds_reply_info_parsed *info)
+ struct ceph_mds_reply_info_parsed *info,
+ int features)
{
u32 num, i = 0;
int err;
@@ -182,7 +191,7 @@ static int parse_reply_info_dir(void **p, void *end,
*p += sizeof(struct ceph_mds_reply_lease);
/* inode */
- err = parse_reply_info_in(p, end, &info->dir_in[i]);
+ err = parse_reply_info_in(p, end, &info->dir_in[i], features);
if (err < 0)
goto out_bad;
i++;
@@ -205,7 +214,8 @@ out_bad:
* parse fcntl F_GETLK results
*/
static int parse_reply_info_filelock(void **p, void *end,
- struct ceph_mds_reply_info_parsed *info)
+ struct ceph_mds_reply_info_parsed *info,
+ int features)
{
if (*p + sizeof(*info->filelock_reply) > end)
goto bad;
@@ -225,19 +235,21 @@ bad:
* parse extra results
*/
static int parse_reply_info_extra(void **p, void *end,
- struct ceph_mds_reply_info_parsed *info)
+ struct ceph_mds_reply_info_parsed *info,
+ int features)
{
if (info->head->op == CEPH_MDS_OP_GETFILELOCK)
- return parse_reply_info_filelock(p, end, info);
+ return parse_reply_info_filelock(p, end, info, features);
else
- return parse_reply_info_dir(p, end, info);
+ return parse_reply_info_dir(p, end, info, features);
}
/*
* parse entire mds reply
*/
static int parse_reply_info(struct ceph_msg *msg,
- struct ceph_mds_reply_info_parsed *info)
+ struct ceph_mds_reply_info_parsed *info,
+ int features)
{
void *p, *end;
u32 len;
@@ -250,7 +262,7 @@ static int parse_reply_info(struct ceph_msg *msg,
/* trace */
ceph_decode_32_safe(&p, end, len, bad);
if (len > 0) {
- err = parse_reply_info_trace(&p, p+len, info);
+ err = parse_reply_info_trace(&p, p+len, info, features);
if (err < 0)
goto out_bad;
}
@@ -258,7 +270,7 @@ static int parse_reply_info(struct ceph_msg *msg,
/* extra */
ceph_decode_32_safe(&p, end, len, bad);
if (len > 0) {
- err = parse_reply_info_extra(&p, p+len, info);
+ err = parse_reply_info_extra(&p, p+len, info, features);
if (err < 0)
goto out_bad;
}
@@ -654,7 +666,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
} else {
/* dir + name */
inode = dir;
- hash = req->r_dentry->d_name.hash;
+ hash = ceph_dentry_hash(req->r_dentry);
is_hash = true;
}
}
@@ -1693,7 +1705,6 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
struct ceph_msg *msg;
int flags = 0;
- req->r_mds = mds;
req->r_attempts++;
if (req->r_inode) {
struct ceph_cap *cap =
@@ -1780,6 +1791,8 @@ static int __do_request(struct ceph_mds_client *mdsc,
goto finish;
}
+ put_request_session(req);
+
mds = __choose_mds(mdsc, req);
if (mds < 0 ||
ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) {
@@ -1797,6 +1810,8 @@ static int __do_request(struct ceph_mds_client *mdsc,
goto finish;
}
}
+ req->r_session = get_session(session);
+
dout("do_request mds%d session %p state %s\n", mds, session,
session_state_name(session->s_state));
if (session->s_state != CEPH_MDS_SESSION_OPEN &&
@@ -1809,7 +1824,6 @@ static int __do_request(struct ceph_mds_client *mdsc,
}
/* send request */
- req->r_session = get_session(session);
req->r_resend_mds = -1; /* forget any previous mds hint */
if (req->r_request_started == 0) /* note request start time */
@@ -1863,7 +1877,6 @@ static void kick_requests(struct ceph_mds_client *mdsc, int mds)
if (req->r_session &&
req->r_session->s_mds == mds) {
dout(" kicking tid %llu\n", req->r_tid);
- put_request_session(req);
__do_request(mdsc, req);
}
}
@@ -2056,8 +2069,11 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
goto out;
} else {
struct ceph_inode_info *ci = ceph_inode(req->r_inode);
- struct ceph_cap *cap =
- ceph_get_cap_for_mds(ci, req->r_mds);;
+ struct ceph_cap *cap = NULL;
+
+ if (req->r_session)
+ cap = ceph_get_cap_for_mds(ci,
+ req->r_session->s_mds);
dout("already using auth");
if ((!cap || cap != ci->i_auth_cap) ||
@@ -2101,7 +2117,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
dout("handle_reply tid %lld result %d\n", tid, result);
rinfo = &req->r_reply_info;
- err = parse_reply_info(msg, rinfo);
+ err = parse_reply_info(msg, rinfo, session->s_con.peer_features);
mutex_unlock(&mdsc->mutex);
mutex_lock(&session->s_mutex);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index aabe563b54db..4e3a9cc0bba6 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -35,6 +35,7 @@ struct ceph_cap;
*/
struct ceph_mds_reply_info_in {
struct ceph_mds_reply_inode *in;
+ struct ceph_dir_layout dir_layout;
u32 symlink_len;
char *symlink;
u32 xattr_len;
@@ -165,7 +166,6 @@ struct ceph_mds_request {
struct ceph_mds_client *r_mdsc;
int r_op; /* mds op code */
- int r_mds;
/* operation on what? */
struct inode *r_inode; /* arg1 */
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 08b460ae0539..bf6f0f34082a 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -428,7 +428,8 @@ struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
goto fail;
}
fsc->client->extra_mon_dispatch = extra_mon_dispatch;
- fsc->client->supported_features |= CEPH_FEATURE_FLOCK;
+ fsc->client->supported_features |= CEPH_FEATURE_FLOCK |
+ CEPH_FEATURE_DIRLAYOUTHASH;
fsc->client->monc.want_mdsmap = 1;
fsc->mount_options = fsopt;
@@ -443,13 +444,17 @@ struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
goto fail_client;
err = -ENOMEM;
- fsc->wb_wq = create_workqueue("ceph-writeback");
+ /*
+ * The number of concurrent works can be high but they don't need
+ * to be processed in parallel, limit concurrency.
+ */
+ fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1);
if (fsc->wb_wq == NULL)
goto fail_bdi;
- fsc->pg_inv_wq = create_singlethread_workqueue("ceph-pg-invalid");
+ fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1);
if (fsc->pg_inv_wq == NULL)
goto fail_wb_wq;
- fsc->trunc_wq = create_singlethread_workqueue("ceph-trunc");
+ fsc->trunc_wq = alloc_workqueue("ceph-trunc", 0, 1);
if (fsc->trunc_wq == NULL)
goto fail_pg_inv_wq;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 4553d8829edb..20b907d76ae2 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -239,6 +239,7 @@ struct ceph_inode_info {
unsigned i_ceph_flags;
unsigned long i_release_count;
+ struct ceph_dir_layout i_dir_layout;
struct ceph_file_layout i_layout;
char *i_symlink;
@@ -768,6 +769,7 @@ extern void ceph_dentry_lru_add(struct dentry *dn);
extern void ceph_dentry_lru_touch(struct dentry *dn);
extern void ceph_dentry_lru_del(struct dentry *dn);
extern void ceph_invalidate_dentry_lease(struct dentry *dentry);
+extern unsigned ceph_dentry_hash(struct dentry *dn);
/*
* our d_ops vary depending on whether the inode is live,