summaryrefslogtreecommitdiff
path: root/fs/ceph
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/caps.c17
-rw-r--r--fs/ceph/dir.c6
-rw-r--r--fs/ceph/inode.c17
-rw-r--r--fs/ceph/mds_client.c13
-rw-r--r--fs/ceph/snap.c7
-rw-r--r--fs/ceph/super.c61
-rw-r--r--fs/ceph/super.h9
-rw-r--r--fs/ceph/xattr.c22
8 files changed, 106 insertions, 46 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index f916cd7b1918..f5d9835264aa 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -933,6 +933,11 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
+ /* remove from inode's cap rbtree, and clear auth cap */
+ rb_erase(&cap->ci_node, &ci->i_caps);
+ if (ci->i_auth_cap == cap)
+ ci->i_auth_cap = NULL;
+
/* remove from session list */
spin_lock(&session->s_cap_lock);
if (session->s_cap_iterator == cap) {
@@ -968,11 +973,6 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
spin_unlock(&session->s_cap_lock);
- /* remove from inode list */
- rb_erase(&cap->ci_node, &ci->i_caps);
- if (ci->i_auth_cap == cap)
- ci->i_auth_cap = NULL;
-
if (removed)
ceph_put_cap(mdsc, cap);
@@ -1081,20 +1081,23 @@ static int send_cap_msg(struct ceph_mds_session *session,
}
/*
- * Queue cap releases when an inode is dropped from our cache. Since
- * inode is about to be destroyed, there is no need for i_ceph_lock.
+ * Queue cap releases when an inode is dropped from our cache.
*/
void ceph_queue_caps_release(struct inode *inode)
{
struct ceph_inode_info *ci = ceph_inode(inode);
struct rb_node *p;
+ /* lock i_ceph_lock, because ceph_d_revalidate(..., LOOKUP_RCU)
+ * may call __ceph_caps_issued_mask() on a freeing inode. */
+ spin_lock(&ci->i_ceph_lock);
p = rb_first(&ci->i_caps);
while (p) {
struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
p = rb_next(p);
__ceph_remove_cap(cap, true);
}
+ spin_unlock(&ci->i_ceph_lock);
}
/*
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index cec25691cbae..2ffc7fe8da52 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1471,6 +1471,7 @@ void ceph_dentry_lru_del(struct dentry *dn)
unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
{
struct ceph_inode_info *dci = ceph_inode(dir);
+ unsigned hash;
switch (dci->i_dir_layout.dl_dir_hash) {
case 0: /* for backward compat */
@@ -1478,8 +1479,11 @@ unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
return dn->d_name.hash;
default:
- return ceph_str_hash(dci->i_dir_layout.dl_dir_hash,
+ spin_lock(&dn->d_lock);
+ hash = ceph_str_hash(dci->i_dir_layout.dl_dir_hash,
dn->d_name.name, dn->d_name.len);
+ spin_unlock(&dn->d_lock);
+ return hash;
}
}
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 30d9d9e7057d..049cff197d2a 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -523,6 +523,7 @@ static void ceph_i_callback(struct rcu_head *head)
struct inode *inode = container_of(head, struct inode, i_rcu);
struct ceph_inode_info *ci = ceph_inode(inode);
+ kfree(ci->i_symlink);
kmem_cache_free(ceph_inode_cachep, ci);
}
@@ -554,7 +555,6 @@ void ceph_destroy_inode(struct inode *inode)
ceph_put_snap_realm(mdsc, realm);
}
- kfree(ci->i_symlink);
while ((n = rb_first(&ci->i_fragtree)) != NULL) {
frag = rb_entry(n, struct ceph_inode_frag, node);
rb_erase(n, &ci->i_fragtree);
@@ -741,6 +741,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
int issued = 0, implemented, new_issued;
struct timespec mtime, atime, ctime;
struct ceph_buffer *xattr_blob = NULL;
+ struct ceph_buffer *old_blob = NULL;
struct ceph_string *pool_ns = NULL;
struct ceph_cap *new_cap = NULL;
int err = 0;
@@ -799,7 +800,12 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
ci->i_version = le64_to_cpu(info->version);
inode->i_version++;
inode->i_rdev = le32_to_cpu(info->rdev);
- inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
+ /* directories have fl_stripe_unit set to zero */
+ if (le32_to_cpu(info->layout.fl_stripe_unit))
+ inode->i_blkbits =
+ fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
+ else
+ inode->i_blkbits = CEPH_BLOCK_SHIFT;
if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) &&
(issued & CEPH_CAP_AUTH_EXCL) == 0) {
@@ -858,7 +864,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
if ((ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL)) &&
le64_to_cpu(info->xattr_version) > ci->i_xattrs.version) {
if (ci->i_xattrs.blob)
- ceph_buffer_put(ci->i_xattrs.blob);
+ old_blob = ci->i_xattrs.blob;
ci->i_xattrs.blob = xattr_blob;
if (xattr_blob)
memcpy(ci->i_xattrs.blob->vec.iov_base,
@@ -1004,8 +1010,8 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
out:
if (new_cap)
ceph_put_cap(mdsc, new_cap);
- if (xattr_blob)
- ceph_buffer_put(xattr_blob);
+ ceph_buffer_put(old_blob);
+ ceph_buffer_put(xattr_blob);
ceph_put_string(pool_ns);
return err;
}
@@ -1624,7 +1630,6 @@ retry_lookup:
if (IS_ERR(realdn)) {
err = PTR_ERR(realdn);
d_drop(dn);
- dn = NULL;
goto next_item;
}
dn = realdn;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 6cbd0d805c9d..3139fbd4c34e 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1187,6 +1187,15 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove);
ci->i_prealloc_cap_flush = NULL;
}
+
+ if (drop &&
+ ci->i_wrbuffer_ref_head == 0 &&
+ ci->i_wr_ref == 0 &&
+ ci->i_dirty_caps == 0 &&
+ ci->i_flushing_caps == 0) {
+ ceph_put_snap_context(ci->i_head_snapc);
+ ci->i_head_snapc = NULL;
+ }
}
spin_unlock(&ci->i_ceph_lock);
while (!list_empty(&to_remove)) {
@@ -3401,7 +3410,9 @@ static void delayed_work(struct work_struct *work)
pr_info("mds%d hung\n", s->s_mds);
}
}
- if (s->s_state < CEPH_MDS_SESSION_OPEN) {
+ if (s->s_state == CEPH_MDS_SESSION_NEW ||
+ s->s_state == CEPH_MDS_SESSION_RESTARTING ||
+ s->s_state == CEPH_MDS_SESSION_REJECTED) {
/* this mds is failed or recovering, just wait */
ceph_put_mds_session(s);
continue;
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 411e9df0d40e..3a76ae001360 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -563,7 +563,12 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
old_snapc = NULL;
update_snapc:
- if (ci->i_head_snapc) {
+ if (ci->i_wrbuffer_ref_head == 0 &&
+ ci->i_wr_ref == 0 &&
+ ci->i_dirty_caps == 0 &&
+ ci->i_flushing_caps == 0) {
+ ci->i_head_snapc = NULL;
+ } else {
ci->i_head_snapc = ceph_get_snap_context(new_snapc);
dout(" new snapc is %p\n", new_snapc);
}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 2a8903025853..ec1640f3167b 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -85,7 +85,6 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
return 0;
}
-
static int ceph_sync_fs(struct super_block *sb, int wait)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
@@ -178,6 +177,26 @@ static match_table_t fsopt_tokens = {
{-1, NULL}
};
+/*
+ * Remove adjacent slashes and then the trailing slash, unless it is
+ * the only remaining character.
+ *
+ * E.g. "//dir1////dir2///" --> "/dir1/dir2", "///" --> "/".
+ */
+static void canonicalize_path(char *path)
+{
+ int i, j = 0;
+
+ for (i = 0; path[i] != '\0'; i++) {
+ if (path[i] != '/' || j < 1 || path[j - 1] != '/')
+ path[j++] = path[i];
+ }
+
+ if (j > 1 && path[j - 1] == '/')
+ j--;
+ path[j] = '\0';
+}
+
static int parse_fsopt_token(char *c, void *private)
{
struct ceph_mount_options *fsopt = private;
@@ -337,6 +356,7 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt,
ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name);
if (ret)
return ret;
+
ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace);
if (ret)
return ret;
@@ -396,13 +416,17 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt,
*/
dev_name_end = strchr(dev_name, '/');
if (dev_name_end) {
- if (strlen(dev_name_end) > 1) {
- fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL);
- if (!fsopt->server_path) {
- err = -ENOMEM;
- goto out;
- }
+ /*
+ * The server_path will include the whole chars from userland
+ * including the leading '/'.
+ */
+ fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL);
+ if (!fsopt->server_path) {
+ err = -ENOMEM;
+ goto out;
}
+
+ canonicalize_path(fsopt->server_path);
} else {
dev_name_end = dev_name + strlen(dev_name);
}
@@ -725,7 +749,6 @@ static void destroy_caches(void)
ceph_fscache_unregister();
}
-
/*
* ceph_umount_begin - initiate forced umount. Tear down down the
* mount, skipping steps that may hang while waiting for server(s).
@@ -742,6 +765,12 @@ static void ceph_umount_begin(struct super_block *sb)
return;
}
+static int ceph_remount(struct super_block *sb, int *flags, char *data)
+{
+ sync_filesystem(sb);
+ return 0;
+}
+
static const struct super_operations ceph_super_ops = {
.alloc_inode = ceph_alloc_inode,
.destroy_inode = ceph_destroy_inode,
@@ -750,6 +779,7 @@ static const struct super_operations ceph_super_ops = {
.evict_inode = ceph_evict_inode,
.sync_fs = ceph_sync_fs,
.put_super = ceph_put_super,
+ .remount_fs = ceph_remount,
.show_options = ceph_show_options,
.statfs = ceph_statfs,
.umount_begin = ceph_umount_begin,
@@ -805,9 +835,6 @@ out:
return root;
}
-
-
-
/*
* mount: join the ceph cluster, and open root directory.
*/
@@ -821,18 +848,14 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
mutex_lock(&fsc->client->mount_mutex);
if (!fsc->sb->s_root) {
- const char *path;
+ const char *path = fsc->mount_options->server_path ?
+ fsc->mount_options->server_path + 1 : "";
+
err = __ceph_open_session(fsc->client, started);
if (err < 0)
goto out;
- if (!fsc->mount_options->server_path) {
- path = "";
- dout("mount opening path \\t\n");
- } else {
- path = fsc->mount_options->server_path + 1;
- dout("mount opening path %s\n", path);
- }
+ dout("mount opening path '%s'\n", path);
err = ceph_fs_debugfs_init(fsc);
if (err < 0)
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 622d5dd9f616..9f18635f78c7 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -70,7 +70,7 @@ struct ceph_mount_options {
char *snapdir_name; /* default ".snap" */
char *mds_namespace; /* default NULL */
- char *server_path; /* default "/" */
+ char *server_path; /* default NULL (means "/") */
};
struct ceph_fs_client {
@@ -476,7 +476,12 @@ static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci,
long long release_count,
long long ordered_count)
{
- smp_mb__before_atomic();
+ /*
+ * Makes sure operations that setup readdir cache (update page
+ * cache and i_size) are strongly ordered w.r.t. the following
+ * atomic64_set() operations.
+ */
+ smp_mb();
atomic64_set(&ci->i_complete_seq[0], release_count);
atomic64_set(&ci->i_complete_seq[1], ordered_count);
}
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 75267cdd5dfd..18b999deed03 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -74,7 +74,7 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
const char *ns_field = " pool_namespace=";
char buf[128];
size_t len, total_len = 0;
- int ret;
+ ssize_t ret;
pool_ns = ceph_try_get_string(ci->i_layout.pool_ns);
@@ -98,11 +98,8 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
if (pool_ns)
total_len += strlen(ns_field) + pool_ns->len;
- if (!size) {
- ret = total_len;
- } else if (total_len > size) {
- ret = -ERANGE;
- } else {
+ ret = total_len;
+ if (size >= total_len) {
memcpy(val, buf, len);
ret = len;
if (pool_name) {
@@ -757,8 +754,11 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
vxattr = ceph_match_vxattr(inode, name);
if (vxattr) {
err = -ENODATA;
- if (!(vxattr->exists_cb && !vxattr->exists_cb(ci)))
+ if (!(vxattr->exists_cb && !vxattr->exists_cb(ci))) {
err = vxattr->getxattr_cb(ci, value, size);
+ if (size && size < err)
+ err = -ERANGE;
+ }
return err;
}
@@ -951,6 +951,7 @@ int __ceph_setxattr(struct inode *inode, const char *name,
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_cap_flush *prealloc_cf = NULL;
+ struct ceph_buffer *old_blob = NULL;
int issued;
int err;
int dirty = 0;
@@ -1019,13 +1020,15 @@ retry:
struct ceph_buffer *blob;
spin_unlock(&ci->i_ceph_lock);
- dout(" preaallocating new blob size=%d\n", required_blob_size);
+ ceph_buffer_put(old_blob); /* Shouldn't be required */
+ dout(" pre-allocating new blob size=%d\n", required_blob_size);
blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
if (!blob)
goto do_sync_unlocked;
spin_lock(&ci->i_ceph_lock);
+ /* prealloc_blob can't be released while holding i_ceph_lock */
if (ci->i_xattrs.prealloc_blob)
- ceph_buffer_put(ci->i_xattrs.prealloc_blob);
+ old_blob = ci->i_xattrs.prealloc_blob;
ci->i_xattrs.prealloc_blob = blob;
goto retry;
}
@@ -1041,6 +1044,7 @@ retry:
}
spin_unlock(&ci->i_ceph_lock);
+ ceph_buffer_put(old_blob);
if (lock_snap_rwsem)
up_read(&mdsc->snap_rwsem);
if (dirty)