diff options
Diffstat (limited to 'fs/ceph')
-rw-r--r-- | fs/ceph/caps.c | 17 | ||||
-rw-r--r-- | fs/ceph/dir.c | 6 | ||||
-rw-r--r-- | fs/ceph/inode.c | 17 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 13 | ||||
-rw-r--r-- | fs/ceph/snap.c | 7 | ||||
-rw-r--r-- | fs/ceph/super.c | 61 | ||||
-rw-r--r-- | fs/ceph/super.h | 9 | ||||
-rw-r--r-- | fs/ceph/xattr.c | 22 |
8 files changed, 106 insertions, 46 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index f916cd7b1918..f5d9835264aa 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -933,6 +933,11 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); + /* remove from inode's cap rbtree, and clear auth cap */ + rb_erase(&cap->ci_node, &ci->i_caps); + if (ci->i_auth_cap == cap) + ci->i_auth_cap = NULL; + /* remove from session list */ spin_lock(&session->s_cap_lock); if (session->s_cap_iterator == cap) { @@ -968,11 +973,6 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) spin_unlock(&session->s_cap_lock); - /* remove from inode list */ - rb_erase(&cap->ci_node, &ci->i_caps); - if (ci->i_auth_cap == cap) - ci->i_auth_cap = NULL; - if (removed) ceph_put_cap(mdsc, cap); @@ -1081,20 +1081,23 @@ static int send_cap_msg(struct ceph_mds_session *session, } /* - * Queue cap releases when an inode is dropped from our cache. Since - * inode is about to be destroyed, there is no need for i_ceph_lock. + * Queue cap releases when an inode is dropped from our cache. */ void ceph_queue_caps_release(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); struct rb_node *p; + /* lock i_ceph_lock, because ceph_d_revalidate(..., LOOKUP_RCU) + * may call __ceph_caps_issued_mask() on a freeing inode. */ + spin_lock(&ci->i_ceph_lock); p = rb_first(&ci->i_caps); while (p) { struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node); p = rb_next(p); __ceph_remove_cap(cap, true); } + spin_unlock(&ci->i_ceph_lock); } /* diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index cec25691cbae..2ffc7fe8da52 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1471,6 +1471,7 @@ void ceph_dentry_lru_del(struct dentry *dn) unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn) { struct ceph_inode_info *dci = ceph_inode(dir); + unsigned hash; switch (dci->i_dir_layout.dl_dir_hash) { case 0: /* for backward compat */ @@ -1478,8 +1479,11 @@ unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn) return dn->d_name.hash; default: - return ceph_str_hash(dci->i_dir_layout.dl_dir_hash, + spin_lock(&dn->d_lock); + hash = ceph_str_hash(dci->i_dir_layout.dl_dir_hash, dn->d_name.name, dn->d_name.len); + spin_unlock(&dn->d_lock); + return hash; } } diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 30d9d9e7057d..049cff197d2a 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -523,6 +523,7 @@ static void ceph_i_callback(struct rcu_head *head) struct inode *inode = container_of(head, struct inode, i_rcu); struct ceph_inode_info *ci = ceph_inode(inode); + kfree(ci->i_symlink); kmem_cache_free(ceph_inode_cachep, ci); } @@ -554,7 +555,6 @@ void ceph_destroy_inode(struct inode *inode) ceph_put_snap_realm(mdsc, realm); } - kfree(ci->i_symlink); while ((n = rb_first(&ci->i_fragtree)) != NULL) { frag = rb_entry(n, struct ceph_inode_frag, node); rb_erase(n, &ci->i_fragtree); @@ -741,6 +741,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page, int issued = 0, implemented, new_issued; struct timespec mtime, atime, ctime; struct ceph_buffer *xattr_blob = NULL; + struct ceph_buffer *old_blob = NULL; struct ceph_string *pool_ns = NULL; struct ceph_cap *new_cap = NULL; int err = 0; @@ -799,7 +800,12 @@ static int fill_inode(struct inode *inode, struct page *locked_page, ci->i_version = le64_to_cpu(info->version); inode->i_version++; inode->i_rdev = le32_to_cpu(info->rdev); - inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; + /* directories have fl_stripe_unit set to zero */ + if (le32_to_cpu(info->layout.fl_stripe_unit)) + inode->i_blkbits = + fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; + else + inode->i_blkbits = CEPH_BLOCK_SHIFT; if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) && (issued & CEPH_CAP_AUTH_EXCL) == 0) { @@ -858,7 +864,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page, if ((ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL)) && le64_to_cpu(info->xattr_version) > ci->i_xattrs.version) { if (ci->i_xattrs.blob) - ceph_buffer_put(ci->i_xattrs.blob); + old_blob = ci->i_xattrs.blob; ci->i_xattrs.blob = xattr_blob; if (xattr_blob) memcpy(ci->i_xattrs.blob->vec.iov_base, @@ -1004,8 +1010,8 @@ static int fill_inode(struct inode *inode, struct page *locked_page, out: if (new_cap) ceph_put_cap(mdsc, new_cap); - if (xattr_blob) - ceph_buffer_put(xattr_blob); + ceph_buffer_put(old_blob); + ceph_buffer_put(xattr_blob); ceph_put_string(pool_ns); return err; } @@ -1624,7 +1630,6 @@ retry_lookup: if (IS_ERR(realdn)) { err = PTR_ERR(realdn); d_drop(dn); - dn = NULL; goto next_item; } dn = realdn; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 6cbd0d805c9d..3139fbd4c34e 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1187,6 +1187,15 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove); ci->i_prealloc_cap_flush = NULL; } + + if (drop && + ci->i_wrbuffer_ref_head == 0 && + ci->i_wr_ref == 0 && + ci->i_dirty_caps == 0 && + ci->i_flushing_caps == 0) { + ceph_put_snap_context(ci->i_head_snapc); + ci->i_head_snapc = NULL; + } } spin_unlock(&ci->i_ceph_lock); while (!list_empty(&to_remove)) { @@ -3401,7 +3410,9 @@ static void delayed_work(struct work_struct *work) pr_info("mds%d hung\n", s->s_mds); } } - if (s->s_state < CEPH_MDS_SESSION_OPEN) { + if (s->s_state == CEPH_MDS_SESSION_NEW || + s->s_state == CEPH_MDS_SESSION_RESTARTING || + s->s_state == CEPH_MDS_SESSION_REJECTED) { /* this mds is failed or recovering, just wait */ ceph_put_mds_session(s); continue; diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 411e9df0d40e..3a76ae001360 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -563,7 +563,12 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) old_snapc = NULL; update_snapc: - if (ci->i_head_snapc) { + if (ci->i_wrbuffer_ref_head == 0 && + ci->i_wr_ref == 0 && + ci->i_dirty_caps == 0 && + ci->i_flushing_caps == 0) { + ci->i_head_snapc = NULL; + } else { ci->i_head_snapc = ceph_get_snap_context(new_snapc); dout(" new snapc is %p\n", new_snapc); } diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 2a8903025853..ec1640f3167b 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -85,7 +85,6 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } - static int ceph_sync_fs(struct super_block *sb, int wait) { struct ceph_fs_client *fsc = ceph_sb_to_client(sb); @@ -178,6 +177,26 @@ static match_table_t fsopt_tokens = { {-1, NULL} }; +/* + * Remove adjacent slashes and then the trailing slash, unless it is + * the only remaining character. + * + * E.g. "//dir1////dir2///" --> "/dir1/dir2", "///" --> "/". + */ +static void canonicalize_path(char *path) +{ + int i, j = 0; + + for (i = 0; path[i] != '\0'; i++) { + if (path[i] != '/' || j < 1 || path[j - 1] != '/') + path[j++] = path[i]; + } + + if (j > 1 && path[j - 1] == '/') + j--; + path[j] = '\0'; +} + static int parse_fsopt_token(char *c, void *private) { struct ceph_mount_options *fsopt = private; @@ -337,6 +356,7 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt, ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); if (ret) return ret; + ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace); if (ret) return ret; @@ -396,13 +416,17 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt, */ dev_name_end = strchr(dev_name, '/'); if (dev_name_end) { - if (strlen(dev_name_end) > 1) { - fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); - if (!fsopt->server_path) { - err = -ENOMEM; - goto out; - } + /* + * The server_path will include the whole chars from userland + * including the leading '/'. + */ + fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); + if (!fsopt->server_path) { + err = -ENOMEM; + goto out; } + + canonicalize_path(fsopt->server_path); } else { dev_name_end = dev_name + strlen(dev_name); } @@ -725,7 +749,6 @@ static void destroy_caches(void) ceph_fscache_unregister(); } - /* * ceph_umount_begin - initiate forced umount. Tear down down the * mount, skipping steps that may hang while waiting for server(s). @@ -742,6 +765,12 @@ static void ceph_umount_begin(struct super_block *sb) return; } +static int ceph_remount(struct super_block *sb, int *flags, char *data) +{ + sync_filesystem(sb); + return 0; +} + static const struct super_operations ceph_super_ops = { .alloc_inode = ceph_alloc_inode, .destroy_inode = ceph_destroy_inode, @@ -750,6 +779,7 @@ static const struct super_operations ceph_super_ops = { .evict_inode = ceph_evict_inode, .sync_fs = ceph_sync_fs, .put_super = ceph_put_super, + .remount_fs = ceph_remount, .show_options = ceph_show_options, .statfs = ceph_statfs, .umount_begin = ceph_umount_begin, @@ -805,9 +835,6 @@ out: return root; } - - - /* * mount: join the ceph cluster, and open root directory. */ @@ -821,18 +848,14 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc) mutex_lock(&fsc->client->mount_mutex); if (!fsc->sb->s_root) { - const char *path; + const char *path = fsc->mount_options->server_path ? + fsc->mount_options->server_path + 1 : ""; + err = __ceph_open_session(fsc->client, started); if (err < 0) goto out; - if (!fsc->mount_options->server_path) { - path = ""; - dout("mount opening path \\t\n"); - } else { - path = fsc->mount_options->server_path + 1; - dout("mount opening path %s\n", path); - } + dout("mount opening path '%s'\n", path); err = ceph_fs_debugfs_init(fsc); if (err < 0) diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 622d5dd9f616..9f18635f78c7 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -70,7 +70,7 @@ struct ceph_mount_options { char *snapdir_name; /* default ".snap" */ char *mds_namespace; /* default NULL */ - char *server_path; /* default "/" */ + char *server_path; /* default NULL (means "/") */ }; struct ceph_fs_client { @@ -476,7 +476,12 @@ static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci, long long release_count, long long ordered_count) { - smp_mb__before_atomic(); + /* + * Makes sure operations that setup readdir cache (update page + * cache and i_size) are strongly ordered w.r.t. the following + * atomic64_set() operations. + */ + smp_mb(); atomic64_set(&ci->i_complete_seq[0], release_count); atomic64_set(&ci->i_complete_seq[1], ordered_count); } diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 75267cdd5dfd..18b999deed03 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -74,7 +74,7 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, const char *ns_field = " pool_namespace="; char buf[128]; size_t len, total_len = 0; - int ret; + ssize_t ret; pool_ns = ceph_try_get_string(ci->i_layout.pool_ns); @@ -98,11 +98,8 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, if (pool_ns) total_len += strlen(ns_field) + pool_ns->len; - if (!size) { - ret = total_len; - } else if (total_len > size) { - ret = -ERANGE; - } else { + ret = total_len; + if (size >= total_len) { memcpy(val, buf, len); ret = len; if (pool_name) { @@ -757,8 +754,11 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value, vxattr = ceph_match_vxattr(inode, name); if (vxattr) { err = -ENODATA; - if (!(vxattr->exists_cb && !vxattr->exists_cb(ci))) + if (!(vxattr->exists_cb && !vxattr->exists_cb(ci))) { err = vxattr->getxattr_cb(ci, value, size); + if (size && size < err) + err = -ERANGE; + } return err; } @@ -951,6 +951,7 @@ int __ceph_setxattr(struct inode *inode, const char *name, struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_cap_flush *prealloc_cf = NULL; + struct ceph_buffer *old_blob = NULL; int issued; int err; int dirty = 0; @@ -1019,13 +1020,15 @@ retry: struct ceph_buffer *blob; spin_unlock(&ci->i_ceph_lock); - dout(" preaallocating new blob size=%d\n", required_blob_size); + ceph_buffer_put(old_blob); /* Shouldn't be required */ + dout(" pre-allocating new blob size=%d\n", required_blob_size); blob = ceph_buffer_new(required_blob_size, GFP_NOFS); if (!blob) goto do_sync_unlocked; spin_lock(&ci->i_ceph_lock); + /* prealloc_blob can't be released while holding i_ceph_lock */ if (ci->i_xattrs.prealloc_blob) - ceph_buffer_put(ci->i_xattrs.prealloc_blob); + old_blob = ci->i_xattrs.prealloc_blob; ci->i_xattrs.prealloc_blob = blob; goto retry; } @@ -1041,6 +1044,7 @@ retry: } spin_unlock(&ci->i_ceph_lock); + ceph_buffer_put(old_blob); if (lock_snap_rwsem) up_read(&mdsc->snap_rwsem); if (dirty) |