summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorVarun Wadekar <vwadekar@nvidia.com>2012-08-13 14:23:04 +0530
committerVarun Wadekar <vwadekar@nvidia.com>2012-08-13 14:23:04 +0530
commit82cfe176157cf926133dea3a8879135bf20b9af2 (patch)
tree8efe310a6dd0d411399ce299aecfaf3bd96de393 /fs
parent564e6b431e9696d6b7186d120d563a74ae290092 (diff)
parent8067fa23092f2c4e18c29c90f7d5bb765dbf8954 (diff)
Merge commit 'v3.4.8' into android-t114-3.4-rebased
Linux v3.4.8 Conflicts: drivers/net/tun.c kernel/power/suspend.c Change-Id: Ia26546425cd20f127dbf4dd58cfca41bda47d23d Signed-off-by: Varun Wadekar <vwadekar@nvidia.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/async-thread.c9
-rw-r--r--fs/cifs/cifssmb.c30
-rw-r--r--fs/cifs/connect.c27
-rw-r--r--fs/cifs/readdir.c7
-rw-r--r--fs/exec.c2
-rw-r--r--fs/exofs/ore.c8
-rw-r--r--fs/exofs/ore_raid.c67
-rw-r--r--fs/ext4/balloc.c3
-rw-r--r--fs/ext4/bitmap.c12
-rw-r--r--fs/ext4/ext4.h6
-rw-r--r--fs/ext4/extents.c46
-rw-r--r--fs/ext4/ialloc.c3
-rw-r--r--fs/ext4/inode.c41
-rw-r--r--fs/ext4/ioctl.c1
-rw-r--r--fs/ext4/resize.c7
-rw-r--r--fs/ext4/super.c174
-rw-r--r--fs/locks.c6
-rw-r--r--fs/nfs/file.c7
-rw-r--r--fs/nfs/idmap.c26
-rw-r--r--fs/nfs/objlayout/objio_osd.c25
-rw-r--r--fs/nfsd/nfs4xdr.c2
-rw-r--r--fs/select.c10
-rw-r--r--fs/ubifs/sb.c8
-rw-r--r--fs/udf/super.c2
24 files changed, 364 insertions, 165 deletions
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 42704149b723..58b7d14b08ee 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -206,10 +206,17 @@ static noinline void run_ordered_completions(struct btrfs_workers *workers,
work->ordered_func(work);
- /* now take the lock again and call the freeing code */
+ /* now take the lock again and drop our item from the list */
spin_lock(&workers->order_lock);
list_del(&work->order_list);
+ spin_unlock(&workers->order_lock);
+
+ /*
+ * we don't want to call the ordered free functions
+ * with the lock held though
+ */
work->ordered_free(work);
+ spin_lock(&workers->order_lock);
}
spin_unlock(&workers->order_lock);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 6b79efd7d75f..3a75ee5a6b33 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -89,6 +89,32 @@ static struct {
/* Forward declarations */
static void cifs_readv_complete(struct work_struct *work);
+#ifdef CONFIG_HIGHMEM
+/*
+ * On arches that have high memory, kmap address space is limited. By
+ * serializing the kmap operations on those arches, we ensure that we don't
+ * end up with a bunch of threads in writeback with partially mapped page
+ * arrays, stuck waiting for kmap to come back. That situation prevents
+ * progress and can deadlock.
+ */
+static DEFINE_MUTEX(cifs_kmap_mutex);
+
+static inline void
+cifs_kmap_lock(void)
+{
+ mutex_lock(&cifs_kmap_mutex);
+}
+
+static inline void
+cifs_kmap_unlock(void)
+{
+ mutex_unlock(&cifs_kmap_mutex);
+}
+#else /* !CONFIG_HIGHMEM */
+#define cifs_kmap_lock() do { ; } while(0)
+#define cifs_kmap_unlock() do { ; } while(0)
+#endif /* CONFIG_HIGHMEM */
+
/* Mark as invalid, all open files on tree connections since they
were closed when session to server was lost */
static void mark_open_files_invalid(struct cifs_tcon *pTcon)
@@ -1557,6 +1583,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
+ cifs_kmap_lock();
list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
if (remaining >= PAGE_CACHE_SIZE) {
/* enough data to fill the page */
@@ -1606,6 +1633,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
page_cache_release(page);
}
}
+ cifs_kmap_unlock();
/* issue the read if we have any iovecs left to fill */
if (rdata->nr_iov > 1) {
@@ -2194,7 +2222,9 @@ cifs_async_writev(struct cifs_writedata *wdata)
* and set the iov_len properly for each one. It may also set
* wdata->bytes too.
*/
+ cifs_kmap_lock();
wdata->marshal_iov(iov, wdata);
+ cifs_kmap_unlock();
cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 402fa0f9bc01..65a78e9a5d40 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -238,8 +238,8 @@ static const match_table_t cifs_mount_option_tokens = {
enum {
Opt_sec_krb5, Opt_sec_krb5i, Opt_sec_krb5p,
Opt_sec_ntlmsspi, Opt_sec_ntlmssp,
- Opt_ntlm, Opt_sec_ntlmi, Opt_sec_ntlmv2i,
- Opt_sec_nontlm, Opt_sec_lanman,
+ Opt_ntlm, Opt_sec_ntlmi, Opt_sec_ntlmv2,
+ Opt_sec_ntlmv2i, Opt_sec_lanman,
Opt_sec_none,
Opt_sec_err
@@ -253,8 +253,9 @@ static const match_table_t cifs_secflavor_tokens = {
{ Opt_sec_ntlmssp, "ntlmssp" },
{ Opt_ntlm, "ntlm" },
{ Opt_sec_ntlmi, "ntlmi" },
+ { Opt_sec_ntlmv2, "nontlm" },
+ { Opt_sec_ntlmv2, "ntlmv2" },
{ Opt_sec_ntlmv2i, "ntlmv2i" },
- { Opt_sec_nontlm, "nontlm" },
{ Opt_sec_lanman, "lanman" },
{ Opt_sec_none, "none" },
@@ -1163,7 +1164,7 @@ static int cifs_parse_security_flavors(char *value,
case Opt_sec_ntlmi:
vol->secFlg |= CIFSSEC_MAY_NTLM | CIFSSEC_MUST_SIGN;
break;
- case Opt_sec_nontlm:
+ case Opt_sec_ntlmv2:
vol->secFlg |= CIFSSEC_MAY_NTLMV2;
break;
case Opt_sec_ntlmv2i:
@@ -3348,6 +3349,18 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
#define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024)
#define CIFS_DEFAULT_NON_POSIX_WSIZE (65536)
+/*
+ * On hosts with high memory, we can't currently support wsize/rsize that are
+ * larger than we can kmap at once. Cap the rsize/wsize at
+ * LAST_PKMAP * PAGE_SIZE. We'll never be able to fill a read or write request
+ * larger than that anyway.
+ */
+#ifdef CONFIG_HIGHMEM
+#define CIFS_KMAP_SIZE_LIMIT (LAST_PKMAP * PAGE_CACHE_SIZE)
+#else /* CONFIG_HIGHMEM */
+#define CIFS_KMAP_SIZE_LIMIT (1<<24)
+#endif /* CONFIG_HIGHMEM */
+
static unsigned int
cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
{
@@ -3378,6 +3391,9 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
wsize = min_t(unsigned int, wsize,
server->maxBuf - sizeof(WRITE_REQ) + 4);
+ /* limit to the amount that we can kmap at once */
+ wsize = min_t(unsigned int, wsize, CIFS_KMAP_SIZE_LIMIT);
+
/* hard limit of CIFS_MAX_WSIZE */
wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE);
@@ -3419,6 +3435,9 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
if (!(server->capabilities & CAP_LARGE_READ_X))
rsize = min_t(unsigned int, CIFSMaxBufSize, rsize);
+ /* limit to the amount that we can kmap at once */
+ rsize = min_t(unsigned int, rsize, CIFS_KMAP_SIZE_LIMIT);
+
/* hard limit of CIFS_MAX_RSIZE */
rsize = min_t(unsigned int, rsize, CIFS_MAX_RSIZE);
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 0a8224d1c4c5..a4217f02fab2 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -86,9 +86,12 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
dentry = d_lookup(parent, name);
if (dentry) {
- /* FIXME: check for inode number changes? */
- if (dentry->d_inode != NULL)
+ inode = dentry->d_inode;
+ /* update inode in place if i_ino didn't change */
+ if (inode && CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) {
+ cifs_fattr_to_inode(inode, fattr);
return dentry;
+ }
d_drop(dentry);
dput(dentry);
}
diff --git a/fs/exec.c b/fs/exec.c
index 421e0815f848..9ba382d7e726 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1024,7 +1024,7 @@ static void flush_old_files(struct files_struct * files)
unsigned long set, i;
j++;
- i = j * __NFDBITS;
+ i = j * BITS_PER_LONG;
fdt = files_fdtable(files);
if (i >= fdt->max_fds)
break;
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index 49cf230554a2..24a49d47e935 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -735,13 +735,7 @@ static int _prepare_for_striping(struct ore_io_state *ios)
out:
ios->numdevs = devs_in_group;
ios->pages_consumed = cur_pg;
- if (unlikely(ret)) {
- if (length == ios->length)
- return ret;
- else
- ios->length -= length;
- }
- return 0;
+ return ret;
}
int ore_create(struct ore_io_state *ios)
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index d222c77cfa1b..fff2070c6751 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -461,16 +461,12 @@ static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret)
* ios->sp2d[p][*], xor is calculated the same way. These pages are
* allocated/freed and don't go through cache
*/
-static int _read_4_write(struct ore_io_state *ios)
+static int _read_4_write_first_stripe(struct ore_io_state *ios)
{
- struct ore_io_state *ios_read;
struct ore_striping_info read_si;
struct __stripe_pages_2d *sp2d = ios->sp2d;
u64 offset = ios->si.first_stripe_start;
- u64 last_stripe_end;
- unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
- unsigned i, c, p, min_p = sp2d->pages_in_unit, max_p = -1;
- int ret;
+ unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
if (offset == ios->offset) /* Go to start collect $200 */
goto read_last_stripe;
@@ -478,6 +474,9 @@ static int _read_4_write(struct ore_io_state *ios)
min_p = _sp2d_min_pg(sp2d);
max_p = _sp2d_max_pg(sp2d);
+ ORE_DBGMSG("stripe_start=0x%llx ios->offset=0x%llx min_p=%d max_p=%d\n",
+ offset, ios->offset, min_p, max_p);
+
for (c = 0; ; c++) {
ore_calc_stripe_info(ios->layout, offset, 0, &read_si);
read_si.obj_offset += min_p * PAGE_SIZE;
@@ -512,6 +511,18 @@ static int _read_4_write(struct ore_io_state *ios)
}
read_last_stripe:
+ return 0;
+}
+
+static int _read_4_write_last_stripe(struct ore_io_state *ios)
+{
+ struct ore_striping_info read_si;
+ struct __stripe_pages_2d *sp2d = ios->sp2d;
+ u64 offset;
+ u64 last_stripe_end;
+ unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
+ unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
+
offset = ios->offset + ios->length;
if (offset % PAGE_SIZE)
_add_to_r4w_last_page(ios, &offset);
@@ -527,15 +538,15 @@ read_last_stripe:
c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1,
ios->layout->mirrors_p1, read_si.par_dev, read_si.dev);
- BUG_ON(ios->si.first_stripe_start + bytes_in_stripe != last_stripe_end);
- /* unaligned IO must be within a single stripe */
-
if (min_p == sp2d->pages_in_unit) {
/* Didn't do it yet */
min_p = _sp2d_min_pg(sp2d);
max_p = _sp2d_max_pg(sp2d);
}
+ ORE_DBGMSG("offset=0x%llx stripe_end=0x%llx min_p=%d max_p=%d\n",
+ offset, last_stripe_end, min_p, max_p);
+
while (offset < last_stripe_end) {
struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
@@ -568,6 +579,15 @@ read_last_stripe:
}
read_it:
+ return 0;
+}
+
+static int _read_4_write_execute(struct ore_io_state *ios)
+{
+ struct ore_io_state *ios_read;
+ unsigned i;
+ int ret;
+
ios_read = ios->ios_read_4_write;
if (!ios_read)
return 0;
@@ -591,6 +611,8 @@ read_it:
}
_mark_read4write_pages_uptodate(ios_read, ret);
+ ore_put_io_state(ios_read);
+ ios->ios_read_4_write = NULL; /* Might need a reuse at last stripe */
return 0;
}
@@ -626,8 +648,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
/* If first stripe, Read in all read4write pages
* (if needed) before we calculate the first parity.
*/
- _read_4_write(ios);
+ _read_4_write_first_stripe(ios);
}
+ if (!cur_len) /* If last stripe r4w pages of last stripe */
+ _read_4_write_last_stripe(ios);
+ _read_4_write_execute(ios);
for (i = 0; i < num_pages; i++) {
pages[i] = _raid_page_alloc();
@@ -654,34 +679,14 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
int _ore_post_alloc_raid_stuff(struct ore_io_state *ios)
{
- struct ore_layout *layout = ios->layout;
-
if (ios->parity_pages) {
+ struct ore_layout *layout = ios->layout;
unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE;
- unsigned stripe_size = ios->si.bytes_in_stripe;
- u64 last_stripe, first_stripe;
if (_sp2d_alloc(pages_in_unit, layout->group_width,
layout->parity, &ios->sp2d)) {
return -ENOMEM;
}
-
- /* Round io down to last full strip */
- first_stripe = div_u64(ios->offset, stripe_size);
- last_stripe = div_u64(ios->offset + ios->length, stripe_size);
-
- /* If an IO spans more then a single stripe it must end at
- * a stripe boundary. The reminder at the end is pushed into the
- * next IO.
- */
- if (last_stripe != first_stripe) {
- ios->length = last_stripe * stripe_size - ios->offset;
-
- BUG_ON(!ios->length);
- ios->nr_pages = (ios->length + PAGE_SIZE - 1) /
- PAGE_SIZE;
- ios->si.length = ios->length; /*make it consistent */
- }
}
return 0;
}
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 8da837be0c82..df76291d692b 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -584,7 +584,8 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
if (bitmap_bh == NULL)
continue;
- x = ext4_count_free(bitmap_bh, sb->s_blocksize);
+ x = ext4_count_free(bitmap_bh->b_data,
+ EXT4_BLOCKS_PER_GROUP(sb) / 8);
printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n",
i, ext4_free_group_clusters(sb, gdp), x);
bitmap_count += x;
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index fa3af81ac565..bbde5d582978 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -11,21 +11,15 @@
#include <linux/jbd2.h>
#include "ext4.h"
-#ifdef EXT4FS_DEBUG
-
static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
-unsigned int ext4_count_free(struct buffer_head *map, unsigned int numchars)
+unsigned int ext4_count_free(char *bitmap, unsigned int numchars)
{
unsigned int i, sum = 0;
- if (!map)
- return 0;
for (i = 0; i < numchars; i++)
- sum += nibblemap[map->b_data[i] & 0xf] +
- nibblemap[(map->b_data[i] >> 4) & 0xf];
+ sum += nibblemap[bitmap[i] & 0xf] +
+ nibblemap[(bitmap[i] >> 4) & 0xf];
return sum;
}
-#endif /* EXT4FS_DEBUG */
-
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0e01e90add8b..47d1c8cda0cb 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1140,8 +1140,7 @@ struct ext4_sb_info {
unsigned long s_desc_per_block; /* Number of group descriptors per block */
ext4_group_t s_groups_count; /* Number of groups in the fs */
ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */
- unsigned long s_overhead_last; /* Last calculated overhead */
- unsigned long s_blocks_last; /* Last seen block count */
+ unsigned long s_overhead; /* # of fs overhead clusters */
unsigned int s_cluster_ratio; /* Number of blocks per cluster */
unsigned int s_cluster_bits; /* log2 of s_cluster_ratio */
loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
@@ -1783,7 +1782,7 @@ struct mmpd_data {
# define NORET_AND noreturn,
/* bitmap.c */
-extern unsigned int ext4_count_free(struct buffer_head *, unsigned);
+extern unsigned int ext4_count_free(char *bitmap, unsigned numchars);
/* balloc.c */
extern unsigned int ext4_block_group(struct super_block *sb,
@@ -1950,6 +1949,7 @@ extern int ext4_group_extend(struct super_block *sb,
extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count);
/* super.c */
+extern int ext4_calculate_overhead(struct super_block *sb);
extern void *ext4_kvmalloc(size_t size, gfp_t flags);
extern void *ext4_kvzalloc(size_t size, gfp_t flags);
extern void ext4_kvfree(void *ptr);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index abcdeab67f52..6b7daa45f6bf 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2503,10 +2503,10 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
{
struct super_block *sb = inode->i_sb;
int depth = ext_depth(inode);
- struct ext4_ext_path *path;
+ struct ext4_ext_path *path = NULL;
ext4_fsblk_t partial_cluster = 0;
handle_t *handle;
- int i, err;
+ int i = 0, err;
ext_debug("truncate since %u to %u\n", start, end);
@@ -2539,8 +2539,12 @@ again:
}
depth = ext_depth(inode);
ex = path[depth].p_ext;
- if (!ex)
+ if (!ex) {
+ ext4_ext_drop_refs(path);
+ kfree(path);
+ path = NULL;
goto cont;
+ }
ee_block = le32_to_cpu(ex->ee_block);
@@ -2570,8 +2574,6 @@ again:
if (err < 0)
goto out;
}
- ext4_ext_drop_refs(path);
- kfree(path);
}
cont:
@@ -2580,19 +2582,27 @@ cont:
* after i_size and walking into the tree depth-wise.
*/
depth = ext_depth(inode);
- path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS);
- if (path == NULL) {
- ext4_journal_stop(handle);
- return -ENOMEM;
- }
- path[0].p_depth = depth;
- path[0].p_hdr = ext_inode_hdr(inode);
+ if (path) {
+ int k = i = depth;
+ while (--k > 0)
+ path[k].p_block =
+ le16_to_cpu(path[k].p_hdr->eh_entries)+1;
+ } else {
+ path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1),
+ GFP_NOFS);
+ if (path == NULL) {
+ ext4_journal_stop(handle);
+ return -ENOMEM;
+ }
+ path[0].p_depth = depth;
+ path[0].p_hdr = ext_inode_hdr(inode);
- if (ext4_ext_check(inode, path[0].p_hdr, depth)) {
- err = -EIO;
- goto out;
+ if (ext4_ext_check(inode, path[0].p_hdr, depth)) {
+ err = -EIO;
+ goto out;
+ }
}
- i = err = 0;
+ err = 0;
while (i >= 0 && err == 0) {
if (i == depth) {
@@ -2706,8 +2716,10 @@ cont:
out:
ext4_ext_drop_refs(path);
kfree(path);
- if (err == -EAGAIN)
+ if (err == -EAGAIN) {
+ path = NULL;
goto again;
+ }
ext4_journal_stop(handle);
return err;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 8900f8b2ad48..0ee374d27fdb 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1013,7 +1013,8 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
if (!bitmap_bh)
continue;
- x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8);
+ x = ext4_count_free(bitmap_bh->b_data,
+ EXT4_INODES_PER_GROUP(sb) / 8);
printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n",
(unsigned long) i, ext4_free_inodes_count(sb, gdp), x);
bitmap_count += x;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c77b0bd2c711..55a654d86182 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -279,6 +279,15 @@ void ext4_da_update_reserve_space(struct inode *inode,
used = ei->i_reserved_data_blocks;
}
+ if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) {
+ ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, allocated %d "
+ "with only %d reserved metadata blocks\n", __func__,
+ inode->i_ino, ei->i_allocated_meta_blocks,
+ ei->i_reserved_meta_blocks);
+ WARN_ON(1);
+ ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks;
+ }
+
/* Update per-inode reservations */
ei->i_reserved_data_blocks -= used;
ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
@@ -1104,6 +1113,17 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned int md_needed;
int ret;
+ ext4_lblk_t save_last_lblock;
+ int save_len;
+
+ /*
+ * We will charge metadata quota at writeout time; this saves
+ * us from metadata over-estimation, though we may go over by
+ * a small amount in the end. Here we just reserve for data.
+ */
+ ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
+ if (ret)
+ return ret;
/*
* recalculate the amount of metadata blocks to reserve
@@ -1112,32 +1132,31 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
*/
repeat:
spin_lock(&ei->i_block_reservation_lock);
+ /*
+ * ext4_calc_metadata_amount() has side effects, which we have
+ * to be prepared undo if we fail to claim space.
+ */
+ save_len = ei->i_da_metadata_calc_len;
+ save_last_lblock = ei->i_da_metadata_calc_last_lblock;
md_needed = EXT4_NUM_B2C(sbi,
ext4_calc_metadata_amount(inode, lblock));
trace_ext4_da_reserve_space(inode, md_needed);
- spin_unlock(&ei->i_block_reservation_lock);
/*
- * We will charge metadata quota at writeout time; this saves
- * us from metadata over-estimation, though we may go over by
- * a small amount in the end. Here we just reserve for data.
- */
- ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
- if (ret)
- return ret;
- /*
* We do still charge estimated metadata to the sb though;
* we cannot afford to run out of free blocks.
*/
if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) {
- dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
+ ei->i_da_metadata_calc_len = save_len;
+ ei->i_da_metadata_calc_last_lblock = save_last_lblock;
+ spin_unlock(&ei->i_block_reservation_lock);
if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
yield();
goto repeat;
}
+ dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
return -ENOSPC;
}
- spin_lock(&ei->i_block_reservation_lock);
ei->i_reserved_data_blocks++;
ei->i_reserved_meta_blocks += md_needed;
spin_unlock(&ei->i_block_reservation_lock);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 1365903a5144..9727522e2716 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -261,7 +261,6 @@ group_extend_out:
err = ext4_move_extents(filp, donor_filp, me.orig_start,
me.donor_start, me.len, &me.moved_len);
mnt_drop_write_file(filp);
- mnt_drop_write(filp->f_path.mnt);
if (copy_to_user((struct move_extent __user *)arg,
&me, sizeof(me)))
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 53589ff8824b..3407a62590d6 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1141,7 +1141,7 @@ static void ext4_update_super(struct super_block *sb,
struct ext4_new_group_data *group_data = flex_gd->groups;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
- int i;
+ int i, ret;
BUG_ON(flex_gd->count == 0 || group_data == NULL);
/*
@@ -1216,6 +1216,11 @@ static void ext4_update_super(struct super_block *sb,
&sbi->s_flex_groups[flex_group].free_inodes);
}
+ /*
+ * Update the fs overhead information
+ */
+ ext4_calculate_overhead(sb);
+
if (test_opt(sb, DEBUG))
printk(KERN_DEBUG "EXT4-fs: added group %u:"
"%llu blocks(%llu free %llu reserved)\n", flex_gd->count,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7449e77b9728..65f3e0d203ae 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2944,6 +2944,114 @@ static void ext4_destroy_lazyinit_thread(void)
kthread_stop(ext4_lazyinit_task);
}
+/*
+ * Note: calculating the overhead so we can be compatible with
+ * historical BSD practice is quite difficult in the face of
+ * clusters/bigalloc. This is because multiple metadata blocks from
+ * different block group can end up in the same allocation cluster.
+ * Calculating the exact overhead in the face of clustered allocation
+ * requires either O(all block bitmaps) in memory or O(number of block
+ * groups**2) in time. We will still calculate the superblock for
+ * older file systems --- and if we come across with a bigalloc file
+ * system with zero in s_overhead_clusters the estimate will be close to
+ * correct especially for very large cluster sizes --- but for newer
+ * file systems, it's better to calculate this figure once at mkfs
+ * time, and store it in the superblock. If the superblock value is
+ * present (even for non-bigalloc file systems), we will use it.
+ */
+static int count_overhead(struct super_block *sb, ext4_group_t grp,
+ char *buf)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_group_desc *gdp;
+ ext4_fsblk_t first_block, last_block, b;
+ ext4_group_t i, ngroups = ext4_get_groups_count(sb);
+ int s, j, count = 0;
+
+ first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
+ (grp * EXT4_BLOCKS_PER_GROUP(sb));
+ last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
+ for (i = 0; i < ngroups; i++) {
+ gdp = ext4_get_group_desc(sb, i, NULL);
+ b = ext4_block_bitmap(sb, gdp);
+ if (b >= first_block && b <= last_block) {
+ ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
+ count++;
+ }
+ b = ext4_inode_bitmap(sb, gdp);
+ if (b >= first_block && b <= last_block) {
+ ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
+ count++;
+ }
+ b = ext4_inode_table(sb, gdp);
+ if (b >= first_block && b + sbi->s_itb_per_group <= last_block)
+ for (j = 0; j < sbi->s_itb_per_group; j++, b++) {
+ int c = EXT4_B2C(sbi, b - first_block);
+ ext4_set_bit(c, buf);
+ count++;
+ }
+ if (i != grp)
+ continue;
+ s = 0;
+ if (ext4_bg_has_super(sb, grp)) {
+ ext4_set_bit(s++, buf);
+ count++;
+ }
+ for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) {
+ ext4_set_bit(EXT4_B2C(sbi, s++), buf);
+ count++;
+ }
+ }
+ if (!count)
+ return 0;
+ return EXT4_CLUSTERS_PER_GROUP(sb) -
+ ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8);
+}
+
+/*
+ * Compute the overhead and stash it in sbi->s_overhead
+ */
+int ext4_calculate_overhead(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_super_block *es = sbi->s_es;
+ ext4_group_t i, ngroups = ext4_get_groups_count(sb);
+ ext4_fsblk_t overhead = 0;
+ char *buf = (char *) get_zeroed_page(GFP_KERNEL);
+
+ memset(buf, 0, PAGE_SIZE);
+ if (!buf)
+ return -ENOMEM;
+
+ /*
+ * Compute the overhead (FS structures). This is constant
+ * for a given filesystem unless the number of block groups
+ * changes so we cache the previous value until it does.
+ */
+
+ /*
+ * All of the blocks before first_data_block are overhead
+ */
+ overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
+
+ /*
+ * Add the overhead found in each block group
+ */
+ for (i = 0; i < ngroups; i++) {
+ int blks;
+
+ blks = count_overhead(sb, i, buf);
+ overhead += blks;
+ if (blks)
+ memset(buf, 0, PAGE_SIZE);
+ cond_resched();
+ }
+ sbi->s_overhead = overhead;
+ smp_wmb();
+ free_page((unsigned long) buf);
+ return 0;
+}
+
static int ext4_fill_super(struct super_block *sb, void *data, int silent)
{
char *orig_data = kstrdup(data, GFP_KERNEL);
@@ -3559,6 +3667,18 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
no_journal:
/*
+ * Get the # of file system overhead blocks from the
+ * superblock if present.
+ */
+ if (es->s_overhead_clusters)
+ sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
+ else {
+ ret = ext4_calculate_overhead(sb);
+ if (ret)
+ goto failed_mount_wq;
+ }
+
+ /*
* The maximum number of concurrent works can be high and
* concurrency isn't really necessary. Limit it to 1.
*/
@@ -4421,67 +4541,21 @@ restore_opts:
return err;
}
-/*
- * Note: calculating the overhead so we can be compatible with
- * historical BSD practice is quite difficult in the face of
- * clusters/bigalloc. This is because multiple metadata blocks from
- * different block group can end up in the same allocation cluster.
- * Calculating the exact overhead in the face of clustered allocation
- * requires either O(all block bitmaps) in memory or O(number of block
- * groups**2) in time. We will still calculate the superblock for
- * older file systems --- and if we come across with a bigalloc file
- * system with zero in s_overhead_clusters the estimate will be close to
- * correct especially for very large cluster sizes --- but for newer
- * file systems, it's better to calculate this figure once at mkfs
- * time, and store it in the superblock. If the superblock value is
- * present (even for non-bigalloc file systems), we will use it.
- */
static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
- struct ext4_group_desc *gdp;
+ ext4_fsblk_t overhead = 0;
u64 fsid;
s64 bfree;
- if (test_opt(sb, MINIX_DF)) {
- sbi->s_overhead_last = 0;
- } else if (es->s_overhead_clusters) {
- sbi->s_overhead_last = le32_to_cpu(es->s_overhead_clusters);
- } else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
- ext4_group_t i, ngroups = ext4_get_groups_count(sb);
- ext4_fsblk_t overhead = 0;
-
- /*
- * Compute the overhead (FS structures). This is constant
- * for a given filesystem unless the number of block groups
- * changes so we cache the previous value until it does.
- */
-
- /*
- * All of the blocks before first_data_block are
- * overhead
- */
- overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
-
- /*
- * Add the overhead found in each block group
- */
- for (i = 0; i < ngroups; i++) {
- gdp = ext4_get_group_desc(sb, i, NULL);
- overhead += ext4_num_overhead_clusters(sb, i, gdp);
- cond_resched();
- }
- sbi->s_overhead_last = overhead;
- smp_wmb();
- sbi->s_blocks_last = ext4_blocks_count(es);
- }
+ if (!test_opt(sb, MINIX_DF))
+ overhead = sbi->s_overhead;
buf->f_type = EXT4_SUPER_MAGIC;
buf->f_bsize = sb->s_blocksize;
- buf->f_blocks = (ext4_blocks_count(es) -
- EXT4_C2B(sbi, sbi->s_overhead_last));
+ buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, sbi->s_overhead);
bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
/* prevent underflow in case that few free space is available */
diff --git a/fs/locks.c b/fs/locks.c
index 6a64f154db04..fcc50ab71cc6 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -308,7 +308,7 @@ static int flock_make_lock(struct file *filp, struct file_lock **lock,
return 0;
}
-static int assign_type(struct file_lock *fl, int type)
+static int assign_type(struct file_lock *fl, long type)
{
switch (type) {
case F_RDLCK:
@@ -445,7 +445,7 @@ static const struct lock_manager_operations lease_manager_ops = {
/*
* Initialize a lease, use the default lock manager operations
*/
-static int lease_init(struct file *filp, int type, struct file_lock *fl)
+static int lease_init(struct file *filp, long type, struct file_lock *fl)
{
if (assign_type(fl, type) != 0)
return -EINVAL;
@@ -463,7 +463,7 @@ static int lease_init(struct file *filp, int type, struct file_lock *fl)
}
/* Allocate a file_lock initialised to this type of lease */
-static struct file_lock *lease_alloc(struct file *filp, int type)
+static struct file_lock *lease_alloc(struct file *filp, long type)
{
struct file_lock *fl = locks_alloc_lock();
int error = -ENOMEM;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index aa9b709fd328..f0f439dfeaa3 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -451,8 +451,11 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
- /* Only do I/O if gfp is a superset of GFP_KERNEL */
- if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL) {
+ /* Only do I/O if gfp is a superset of GFP_KERNEL, and we're not
+ * doing this memory reclaim for a fs-related allocation.
+ */
+ if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL &&
+ !(current->flags & PF_FSTRANS)) {
int how = FLUSH_SYNC;
/* Don't let kswapd deadlock waiting for OOM RPC calls */
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 93aa3a4c4b0f..929ba0111724 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -205,12 +205,18 @@ static int nfs_idmap_init_keyring(void)
if (ret < 0)
goto failed_put_key;
+ ret = register_key_type(&key_type_id_resolver_legacy);
+ if (ret < 0)
+ goto failed_reg_legacy;
+
set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags);
cred->thread_keyring = keyring;
cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
id_resolver_cache = cred;
return 0;
+failed_reg_legacy:
+ unregister_key_type(&key_type_id_resolver);
failed_put_key:
key_put(keyring);
failed_put_cred:
@@ -222,6 +228,7 @@ static void nfs_idmap_quit_keyring(void)
{
key_revoke(id_resolver_cache->thread_keyring);
unregister_key_type(&key_type_id_resolver);
+ unregister_key_type(&key_type_id_resolver_legacy);
put_cred(id_resolver_cache);
}
@@ -385,7 +392,7 @@ static const struct rpc_pipe_ops idmap_upcall_ops = {
};
static struct key_type key_type_id_resolver_legacy = {
- .name = "id_resolver",
+ .name = "id_legacy",
.instantiate = user_instantiate,
.match = user_match,
.revoke = user_revoke,
@@ -658,6 +665,7 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
if (ret < 0)
goto out2;
+ BUG_ON(idmap->idmap_key_cons != NULL);
idmap->idmap_key_cons = cons;
ret = rpc_queue_upcall(idmap->idmap_pipe, msg);
@@ -671,8 +679,7 @@ out2:
out1:
kfree(msg);
out0:
- key_revoke(cons->key);
- key_revoke(cons->authkey);
+ complete_request_key(cons, ret);
return ret;
}
@@ -706,11 +713,18 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
{
struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode);
struct idmap *idmap = (struct idmap *)rpci->private;
- struct key_construction *cons = idmap->idmap_key_cons;
+ struct key_construction *cons;
struct idmap_msg im;
size_t namelen_in;
int ret;
+ /* If instantiation is successful, anyone waiting for key construction
+ * will have been woken up and someone else may now have used
+ * idmap_key_cons - so after this point we may no longer touch it.
+ */
+ cons = ACCESS_ONCE(idmap->idmap_key_cons);
+ idmap->idmap_key_cons = NULL;
+
if (mlen != sizeof(im)) {
ret = -ENOSPC;
goto out;
@@ -723,7 +737,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
if (!(im.im_status & IDMAP_STATUS_SUCCESS)) {
ret = mlen;
- complete_request_key(idmap->idmap_key_cons, -ENOKEY);
+ complete_request_key(cons, -ENOKEY);
goto out_incomplete;
}
@@ -740,7 +754,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
}
out:
- complete_request_key(idmap->idmap_key_cons, ret);
+ complete_request_key(cons, ret);
out_incomplete:
return ret;
}
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 4bff4a3dab46..1afe74c42c8a 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -453,7 +453,10 @@ int objio_read_pagelist(struct nfs_read_data *rdata)
objios->ios->done = _read_done;
dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
rdata->args.offset, rdata->args.count);
- return ore_read(objios->ios);
+ ret = ore_read(objios->ios);
+ if (unlikely(ret))
+ objio_free_result(&objios->oir);
+ return ret;
}
/*
@@ -484,8 +487,16 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
struct objio_state *objios = priv;
struct nfs_write_data *wdata = objios->oir.rpcdata;
pgoff_t index = offset / PAGE_SIZE;
- struct page *page = find_get_page(wdata->inode->i_mapping, index);
+ struct page *page;
+ loff_t i_size = i_size_read(wdata->inode);
+
+ if (offset >= i_size) {
+ *uptodate = true;
+ dprintk("%s: g_zero_page index=0x%lx\n", __func__, index);
+ return ZERO_PAGE(0);
+ }
+ page = find_get_page(wdata->inode->i_mapping, index);
if (!page) {
page = find_or_create_page(wdata->inode->i_mapping,
index, GFP_NOFS);
@@ -506,8 +517,10 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
static void __r4w_put_page(void *priv, struct page *page)
{
- dprintk("%s: index=0x%lx\n", __func__, page->index);
- page_cache_release(page);
+ dprintk("%s: index=0x%lx\n", __func__,
+ (page == ZERO_PAGE(0)) ? -1UL : page->index);
+ if (ZERO_PAGE(0) != page)
+ page_cache_release(page);
return;
}
@@ -537,8 +550,10 @@ int objio_write_pagelist(struct nfs_write_data *wdata, int how)
dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
wdata->args.offset, wdata->args.count);
ret = ore_write(objios->ios);
- if (unlikely(ret))
+ if (unlikely(ret)) {
+ objio_free_result(&objios->oir);
return ret;
+ }
if (objios->sync)
_write_done(objios->ios, objios);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 74c00bc92b9a..283d15e9f46d 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2233,7 +2233,7 @@ out_acl:
if (bmval0 & FATTR4_WORD0_CASE_INSENSITIVE) {
if ((buflen -= 4) < 0)
goto out_resource;
- WRITE32(1);
+ WRITE32(0);
}
if (bmval0 & FATTR4_WORD0_CASE_PRESERVING) {
if ((buflen -= 4) < 0)
diff --git a/fs/select.c b/fs/select.c
index 17d33d09fc16..0baa0a351a1c 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -345,8 +345,8 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds)
struct fdtable *fdt;
/* handle last in-complete long-word first */
- set = ~(~0UL << (n & (__NFDBITS-1)));
- n /= __NFDBITS;
+ set = ~(~0UL << (n & (BITS_PER_LONG-1)));
+ n /= BITS_PER_LONG;
fdt = files_fdtable(current->files);
open_fds = fdt->open_fds + n;
max = 0;
@@ -373,7 +373,7 @@ get_max:
max++;
set >>= 1;
} while (set);
- max += n * __NFDBITS;
+ max += n * BITS_PER_LONG;
}
return max;
@@ -435,11 +435,11 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
in = *inp++; out = *outp++; ex = *exp++;
all_bits = in | out | ex;
if (all_bits == 0) {
- i += __NFDBITS;
+ i += BITS_PER_LONG;
continue;
}
- for (j = 0; j < __NFDBITS; ++j, ++i, bit <<= 1) {
+ for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) {
int fput_needed;
if (i >= n)
break;
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index 771f7fb6ce92..a7be8e2b4b9d 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -724,8 +724,12 @@ static int fixup_free_space(struct ubifs_info *c)
lnum = ubifs_next_log_lnum(c, lnum);
}
- /* Fixup the current log head */
- err = fixup_leb(c, c->lhead_lnum, c->lhead_offs);
+ /*
+ * Fixup the log head which contains the only a CS node at the
+ * beginning.
+ */
+ err = fixup_leb(c, c->lhead_lnum,
+ ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size));
if (err)
goto out;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 8d86a8706c0e..e660ffdd91b6 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1283,7 +1283,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
BUG_ON(ident != TAG_IDENT_LVD);
lvd = (struct logicalVolDesc *)bh->b_data;
table_len = le32_to_cpu(lvd->mapTableLength);
- if (sizeof(*lvd) + table_len > sb->s_blocksize) {
+ if (table_len > sb->s_blocksize - sizeof(*lvd)) {
udf_err(sb, "error loading logical volume descriptor: "
"Partition table too long (%u > %lu)\n", table_len,
sb->s_blocksize - sizeof(*lvd));