summaryrefslogtreecommitdiff
path: root/fs/xfs/linux-2.6
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-03-06 11:32:21 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2010-03-06 11:32:21 -0800
commit66ce3cf84deba6cc71dcf43c9d56a4278e5f712d (patch)
tree7580bcc42fc7c52620b98d78ebdc654bd7ed83ea /fs/xfs/linux-2.6
parent05c5cb31ec47cacf38db56d9efaa37ca9d473132 (diff)
parent9b1f56d60acfd634728f91f34922066c6f80ede6 (diff)
Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs
* 'for-linus' of git://oss.sgi.com/xfs/xfs: (21 commits) xfs: return inode fork offset in bulkstat for fsr xfs: Increase the default size of the reserved blocks pool xfs: truncate delalloc extents when IO fails in writeback xfs: check for more work before sleeping in xfssyncd xfs: Fix a build warning in xfs_aops.c xfs: fix locking for inode cache radix tree tag updates xfs: remove xfs_ipin/xfs_iunpin xfs: cleanup xfs_iunpin_wait/xfs_iunpin_nowait xfs: kill xfs_lrw.h xfs: factor common xfs_trans_bjoin code xfs: stop passing opaque handles to xfs_log.c routines xfs: split xfs_bmap_btalloc xfs: fix xfs_fsblock_t tracing xfs: fix inode pincount check in fsync xfs: Non-blocking inode locking in IO completion xfs: implement optimized fdatasync xfs: remove wrapper for the fsync file operation xfs: remove wrappers for read/write file operations xfs: merge xfs_lrw.c into xfs_file.c xfs: fix dquota trace format ...
Diffstat (limited to 'fs/xfs/linux-2.6')
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c221
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c854
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c10
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c796
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.h29
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c10
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.c16
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h22
9 files changed, 1003 insertions, 956 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 66abe36c1213..9083357f9e44 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -39,6 +39,7 @@
#include "xfs_iomap.h"
#include "xfs_vnodeops.h"
#include "xfs_trace.h"
+#include "xfs_bmap.h"
#include <linux/mpage.h>
#include <linux/pagevec.h>
#include <linux/writeback.h>
@@ -163,14 +164,17 @@ xfs_ioend_new_eof(
}
/*
- * Update on-disk file size now that data has been written to disk.
- * The current in-memory file size is i_size. If a write is beyond
- * eof i_new_size will be the intended file size until i_size is
- * updated. If this write does not extend all the way to the valid
- * file size then restrict this update to the end of the write.
+ * Update on-disk file size now that data has been written to disk. The
+ * current in-memory file size is i_size. If a write is beyond eof i_new_size
+ * will be the intended file size until i_size is updated. If this write does
+ * not extend all the way to the valid file size then restrict this update to
+ * the end of the write.
+ *
+ * This function does not block as blocking on the inode lock in IO completion
+ * can lead to IO completion order dependency deadlocks.. If it can't get the
+ * inode ilock it will return EAGAIN. Callers must handle this.
*/
-
-STATIC void
+STATIC int
xfs_setfilesize(
xfs_ioend_t *ioend)
{
@@ -181,16 +185,40 @@ xfs_setfilesize(
ASSERT(ioend->io_type != IOMAP_READ);
if (unlikely(ioend->io_error))
- return;
+ return 0;
+
+ if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
+ return EAGAIN;
- xfs_ilock(ip, XFS_ILOCK_EXCL);
isize = xfs_ioend_new_eof(ioend);
if (isize) {
ip->i_d.di_size = isize;
- xfs_mark_inode_dirty_sync(ip);
+ xfs_mark_inode_dirty(ip);
}
xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return 0;
+}
+
+/*
+ * Schedule IO completion handling on a xfsdatad if this was
+ * the final hold on this ioend. If we are asked to wait,
+ * flush the workqueue.
+ */
+STATIC void
+xfs_finish_ioend(
+ xfs_ioend_t *ioend,
+ int wait)
+{
+ if (atomic_dec_and_test(&ioend->io_remaining)) {
+ struct workqueue_struct *wq;
+
+ wq = (ioend->io_type == IOMAP_UNWRITTEN) ?
+ xfsconvertd_workqueue : xfsdatad_workqueue;
+ queue_work(wq, &ioend->io_work);
+ if (wait)
+ flush_workqueue(wq);
+ }
}
/*
@@ -198,11 +226,11 @@ xfs_setfilesize(
*/
STATIC void
xfs_end_io(
- struct work_struct *work)
+ struct work_struct *work)
{
- xfs_ioend_t *ioend =
- container_of(work, xfs_ioend_t, io_work);
- struct xfs_inode *ip = XFS_I(ioend->io_inode);
+ xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work);
+ struct xfs_inode *ip = XFS_I(ioend->io_inode);
+ int error = 0;
/*
* For unwritten extents we need to issue transactions to convert a
@@ -210,7 +238,6 @@ xfs_end_io(
*/
if (ioend->io_type == IOMAP_UNWRITTEN &&
likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) {
- int error;
error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
ioend->io_size);
@@ -222,30 +249,23 @@ xfs_end_io(
* We might have to update the on-disk file size after extending
* writes.
*/
- if (ioend->io_type != IOMAP_READ)
- xfs_setfilesize(ioend);
- xfs_destroy_ioend(ioend);
-}
-
-/*
- * Schedule IO completion handling on a xfsdatad if this was
- * the final hold on this ioend. If we are asked to wait,
- * flush the workqueue.
- */
-STATIC void
-xfs_finish_ioend(
- xfs_ioend_t *ioend,
- int wait)
-{
- if (atomic_dec_and_test(&ioend->io_remaining)) {
- struct workqueue_struct *wq;
-
- wq = (ioend->io_type == IOMAP_UNWRITTEN) ?
- xfsconvertd_workqueue : xfsdatad_workqueue;
- queue_work(wq, &ioend->io_work);
- if (wait)
- flush_workqueue(wq);
+ if (ioend->io_type != IOMAP_READ) {
+ error = xfs_setfilesize(ioend);
+ ASSERT(!error || error == EAGAIN);
}
+
+ /*
+ * If we didn't complete processing of the ioend, requeue it to the
+ * tail of the workqueue for another attempt later. Otherwise destroy
+ * it.
+ */
+ if (error == EAGAIN) {
+ atomic_inc(&ioend->io_remaining);
+ xfs_finish_ioend(ioend, 0);
+ /* ensure we don't spin on blocked ioends */
+ delay(1);
+ } else
+ xfs_destroy_ioend(ioend);
}
/*
@@ -341,7 +361,7 @@ xfs_submit_ioend_bio(
* but don't update the inode size until I/O completion.
*/
if (xfs_ioend_new_eof(ioend))
- xfs_mark_inode_dirty_sync(XFS_I(ioend->io_inode));
+ xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
submit_bio(wbc->sync_mode == WB_SYNC_ALL ?
WRITE_SYNC_PLUG : WRITE, bio);
@@ -874,6 +894,118 @@ xfs_cluster_write(
}
}
+STATIC void
+xfs_vm_invalidatepage(
+ struct page *page,
+ unsigned long offset)
+{
+ trace_xfs_invalidatepage(page->mapping->host, page, offset);
+ block_invalidatepage(page, offset);
+}
+
+/*
+ * If the page has delalloc buffers on it, we need to punch them out before we
+ * invalidate the page. If we don't, we leave a stale delalloc mapping on the
+ * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read
+ * is done on that same region - the delalloc extent is returned when none is
+ * supposed to be there.
+ *
+ * We prevent this by truncating away the delalloc regions on the page before
+ * invalidating it. Because they are delalloc, we can do this without needing a
+ * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this
+ * truncation without a transaction as there is no space left for block
+ * reservation (typically why we see a ENOSPC in writeback).
+ *
+ * This is not a performance critical path, so for now just do the punching a
+ * buffer head at a time.
+ */
+STATIC void
+xfs_aops_discard_page(
+ struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ struct xfs_inode *ip = XFS_I(inode);
+ struct buffer_head *bh, *head;
+ loff_t offset = page_offset(page);
+ ssize_t len = 1 << inode->i_blkbits;
+
+ if (!xfs_is_delayed_page(page, IOMAP_DELAY))
+ goto out_invalidate;
+
+ xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
+ "page discard on page %p, inode 0x%llx, offset %llu.",
+ page, ip->i_ino, offset);
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ bh = head = page_buffers(page);
+ do {
+ int done;
+ xfs_fileoff_t offset_fsb;
+ xfs_bmbt_irec_t imap;
+ int nimaps = 1;
+ int error;
+ xfs_fsblock_t firstblock;
+ xfs_bmap_free_t flist;
+
+ if (!buffer_delay(bh))
+ goto next_buffer;
+
+ offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
+
+ /*
+ * Map the range first and check that it is a delalloc extent
+ * before trying to unmap the range. Otherwise we will be
+ * trying to remove a real extent (which requires a
+ * transaction) or a hole, which is probably a bad idea...
+ */
+ error = xfs_bmapi(NULL, ip, offset_fsb, 1,
+ XFS_BMAPI_ENTIRE, NULL, 0, &imap,
+ &nimaps, NULL, NULL);
+
+ if (error) {
+ /* something screwed, just bail */
+ xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
+ "page discard failed delalloc mapping lookup.");
+ break;
+ }
+ if (!nimaps) {
+ /* nothing there */
+ goto next_buffer;
+ }
+ if (imap.br_startblock != DELAYSTARTBLOCK) {
+ /* been converted, ignore */
+ goto next_buffer;
+ }
+ WARN_ON(imap.br_blockcount == 0);
+
+ /*
+ * Note: while we initialise the firstblock/flist pair, they
+ * should never be used because blocks should never be
+ * allocated or freed for a delalloc extent and hence we need
+ * don't cancel or finish them after the xfs_bunmapi() call.
+ */
+ xfs_bmap_init(&flist, &firstblock);
+ error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock,
+ &flist, NULL, &done);
+
+ ASSERT(!flist.xbf_count && !flist.xbf_first);
+ if (error) {
+ /* something screwed, just bail */
+ xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
+ "page discard unable to remove delalloc mapping.");
+ break;
+ }
+next_buffer:
+ offset += len;
+
+ } while ((bh = bh->b_this_page) != head);
+
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+out_invalidate:
+ xfs_vm_invalidatepage(page, 0);
+ return;
+}
+
/*
* Calling this without startio set means we are being asked to make a dirty
* page ready for freeing it's buffers. When called with startio set then
@@ -1125,7 +1257,7 @@ error:
*/
if (err != -EAGAIN) {
if (!unmapped)
- block_invalidatepage(page, 0);
+ xfs_aops_discard_page(page);
ClearPageUptodate(page);
}
return err;
@@ -1535,15 +1667,6 @@ xfs_vm_readpages(
return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
}
-STATIC void
-xfs_vm_invalidatepage(
- struct page *page,
- unsigned long offset)
-{
- trace_xfs_invalidatepage(page->mapping->host, page, offset);
- block_invalidatepage(page, offset);
-}
-
const struct address_space_operations xfs_address_space_operations = {
.readpage = xfs_vm_readpage,
.readpages = xfs_vm_readpages,
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index e4caeb28ce2e..42dd3bcfba6b 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -16,6 +16,7 @@
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
+#include "xfs_fs.h"
#include "xfs_bit.h"
#include "xfs_log.h"
#include "xfs_inum.h"
@@ -34,52 +35,279 @@
#include "xfs_dir2_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
+#include "xfs_inode_item.h"
+#include "xfs_bmap.h"
#include "xfs_error.h"
#include "xfs_rw.h"
#include "xfs_vnodeops.h"
#include "xfs_da_btree.h"
#include "xfs_ioctl.h"
+#include "xfs_trace.h"
#include <linux/dcache.h>
static const struct vm_operations_struct xfs_file_vm_ops;
-STATIC ssize_t
-xfs_file_aio_read(
- struct kiocb *iocb,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t pos)
+/*
+ * xfs_iozero
+ *
+ * xfs_iozero clears the specified range of buffer supplied,
+ * and marks all the affected blocks as valid and modified. If
+ * an affected block is not allocated, it will be allocated. If
+ * an affected block is not completely overwritten, and is not
+ * valid before the operation, it will be read from disk before
+ * being partially zeroed.
+ */
+STATIC int
+xfs_iozero(
+ struct xfs_inode *ip, /* inode */
+ loff_t pos, /* offset in file */
+ size_t count) /* size of data to zero */
{
- struct file *file = iocb->ki_filp;
- int ioflags = 0;
+ struct page *page;
+ struct address_space *mapping;
+ int status;
- BUG_ON(iocb->ki_pos != pos);
- if (unlikely(file->f_flags & O_DIRECT))
- ioflags |= IO_ISDIRECT;
- if (file->f_mode & FMODE_NOCMTIME)
- ioflags |= IO_INVIS;
- return xfs_read(XFS_I(file->f_path.dentry->d_inode), iocb, iov,
- nr_segs, &iocb->ki_pos, ioflags);
+ mapping = VFS_I(ip)->i_mapping;
+ do {
+ unsigned offset, bytes;
+ void *fsdata;
+
+ offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
+ bytes = PAGE_CACHE_SIZE - offset;
+ if (bytes > count)
+ bytes = count;
+
+ status = pagecache_write_begin(NULL, mapping, pos, bytes,
+ AOP_FLAG_UNINTERRUPTIBLE,
+ &page, &fsdata);
+ if (status)
+ break;
+
+ zero_user(page, offset, bytes);
+
+ status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
+ page, fsdata);
+ WARN_ON(status <= 0); /* can't return less than zero! */
+ pos += bytes;
+ count -= bytes;
+ status = 0;
+ } while (count);
+
+ return (-status);
+}
+
+STATIC int
+xfs_file_fsync(
+ struct file *file,
+ struct dentry *dentry,
+ int datasync)
+{
+ struct xfs_inode *ip = XFS_I(dentry->d_inode);
+ struct xfs_trans *tp;
+ int error = 0;
+ int log_flushed = 0;
+
+ xfs_itrace_entry(ip);
+
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+ return -XFS_ERROR(EIO);
+
+ xfs_iflags_clear(ip, XFS_ITRUNCATED);
+
+ /*
+ * We always need to make sure that the required inode state is safe on
+ * disk. The inode might be clean but we still might need to force the
+ * log because of committed transactions that haven't hit the disk yet.
+ * Likewise, there could be unflushed non-transactional changes to the
+ * inode core that have to go to disk and this requires us to issue
+ * a synchronous transaction to capture these changes correctly.
+ *
+ * This code relies on the assumption that if the i_update_core field
+ * of the inode is clear and the inode is unpinned then it is clean
+ * and no action is required.
+ */
+ xfs_ilock(ip, XFS_ILOCK_SHARED);
+
+ /*
+ * First check if the VFS inode is marked dirty. All the dirtying
+ * of non-transactional updates no goes through mark_inode_dirty*,
+ * which allows us to distinguish beteeen pure timestamp updates
+ * and i_size updates which need to be caught for fdatasync.
+ * After that also theck for the dirty state in the XFS inode, which
+ * might gets cleared when the inode gets written out via the AIL
+ * or xfs_iflush_cluster.
+ */
+ if (((dentry->d_inode->i_state & I_DIRTY_DATASYNC) ||
+ ((dentry->d_inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
+ ip->i_update_core) {
+ /*
+ * Kick off a transaction to log the inode core to get the
+ * updates. The sync transaction will also force the log.
+ */
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
+ error = xfs_trans_reserve(tp, 0,
+ XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0);
+ if (error) {
+ xfs_trans_cancel(tp, 0);
+ return -error;
+ }
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+ /*
+ * Note - it's possible that we might have pushed ourselves out
+ * of the way during trans_reserve which would flush the inode.
+ * But there's no guarantee that the inode buffer has actually
+ * gone out yet (it's delwri). Plus the buffer could be pinned
+ * anyway if it's part of an inode in another recent
+ * transaction. So we play it safe and fire off the
+ * transaction anyway.
+ */
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+ xfs_trans_ihold(tp, ip);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ xfs_trans_set_sync(tp);
+ error = _xfs_trans_commit(tp, 0, &log_flushed);
+
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ } else {
+ /*
+ * Timestamps/size haven't changed since last inode flush or
+ * inode transaction commit. That means either nothing got
+ * written or a transaction committed which caught the updates.
+ * If the latter happened and the transaction hasn't hit the
+ * disk yet, the inode will be still be pinned. If it is,
+ * force the log.
+ */
+ if (xfs_ipincount(ip)) {
+ error = _xfs_log_force_lsn(ip->i_mount,
+ ip->i_itemp->ili_last_lsn,
+ XFS_LOG_SYNC, &log_flushed);
+ }
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ }
+
+ if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) {
+ /*
+ * If the log write didn't issue an ordered tag we need
+ * to flush the disk cache for the data device now.
+ */
+ if (!log_flushed)
+ xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp);
+
+ /*
+ * If this inode is on the RT dev we need to flush that
+ * cache as well.
+ */
+ if (XFS_IS_REALTIME_INODE(ip))
+ xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
+ }
+
+ return -error;
}
STATIC ssize_t
-xfs_file_aio_write(
+xfs_file_aio_read(
struct kiocb *iocb,
- const struct iovec *iov,
+ const struct iovec *iovp,
unsigned long nr_segs,
loff_t pos)
{
struct file *file = iocb->ki_filp;
+ struct inode *inode = file->f_mapping->host;
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ size_t size = 0;
+ ssize_t ret = 0;
int ioflags = 0;
+ xfs_fsize_t n;
+ unsigned long seg;
+
+ XFS_STATS_INC(xs_read_calls);
BUG_ON(iocb->ki_pos != pos);
+
if (unlikely(file->f_flags & O_DIRECT))
ioflags |= IO_ISDIRECT;
if (file->f_mode & FMODE_NOCMTIME)
ioflags |= IO_INVIS;
- return xfs_write(XFS_I(file->f_mapping->host), iocb, iov, nr_segs,
- &iocb->ki_pos, ioflags);
+
+ /* START copy & waste from filemap.c */
+ for (seg = 0; seg < nr_segs; seg++) {
+ const struct iovec *iv = &iovp[seg];
+
+ /*
+ * If any segment has a negative length, or the cumulative
+ * length ever wraps negative then return -EINVAL.
+ */
+ size += iv->iov_len;
+ if (unlikely((ssize_t)(size|iv->iov_len) < 0))
+ return XFS_ERROR(-EINVAL);
+ }
+ /* END copy & waste from filemap.c */
+
+ if (unlikely(ioflags & IO_ISDIRECT)) {
+ xfs_buftarg_t *target =
+ XFS_IS_REALTIME_INODE(ip) ?
+ mp->m_rtdev_targp : mp->m_ddev_targp;
+ if ((iocb->ki_pos & target->bt_smask) ||
+ (size & target->bt_smask)) {
+ if (iocb->ki_pos == ip->i_size)
+ return 0;
+ return -XFS_ERROR(EINVAL);
+ }
+ }
+
+ n = XFS_MAXIOFFSET(mp) - iocb->ki_pos;
+ if (n <= 0 || size == 0)
+ return 0;
+
+ if (n < size)
+ size = n;
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return -EIO;
+
+ if (unlikely(ioflags & IO_ISDIRECT))
+ mutex_lock(&inode->i_mutex);
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
+
+ if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
+ int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags);
+ int iolock = XFS_IOLOCK_SHARED;
+
+ ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, iocb->ki_pos, size,
+ dmflags, &iolock);
+ if (ret) {
+ xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+ if (unlikely(ioflags & IO_ISDIRECT))
+ mutex_unlock(&inode->i_mutex);
+ return ret;
+ }
+ }
+
+ if (unlikely(ioflags & IO_ISDIRECT)) {
+ if (inode->i_mapping->nrpages) {
+ ret = -xfs_flushinval_pages(ip,
+ (iocb->ki_pos & PAGE_CACHE_MASK),
+ -1, FI_REMAPF_LOCKED);
+ }
+ mutex_unlock(&inode->i_mutex);
+ if (ret) {
+ xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+ return ret;
+ }
+ }
+
+ trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
+
+ ret = generic_file_aio_read(iocb, iovp, nr_segs, iocb->ki_pos);
+ if (ret > 0)
+ XFS_STATS_ADD(xs_read_bytes, ret);
+
+ xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+ return ret;
}
STATIC ssize_t
@@ -87,16 +315,44 @@ xfs_file_splice_read(
struct file *infilp,
loff_t *ppos,
struct pipe_inode_info *pipe,
- size_t len,
+ size_t count,
unsigned int flags)
{
+ struct xfs_inode *ip = XFS_I(infilp->f_mapping->host);
+ struct xfs_mount *mp = ip->i_mount;
int ioflags = 0;
+ ssize_t ret;
+
+ XFS_STATS_INC(xs_read_calls);
if (infilp->f_mode & FMODE_NOCMTIME)
ioflags |= IO_INVIS;
- return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode),
- infilp, ppos, pipe, len, flags, ioflags);
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+ return -EIO;
+
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
+
+ if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
+ int iolock = XFS_IOLOCK_SHARED;
+ int error;
+
+ error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count,
+ FILP_DELAY_FLAG(infilp), &iolock);
+ if (error) {
+ xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+ return -error;
+ }
+ }
+
+ trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
+
+ ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
+ if (ret > 0)
+ XFS_STATS_ADD(xs_read_bytes, ret);
+
+ xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+ return ret;
}
STATIC ssize_t
@@ -104,16 +360,538 @@ xfs_file_splice_write(
struct pipe_inode_info *pipe,
struct file *outfilp,
loff_t *ppos,
- size_t len,
+ size_t count,
unsigned int flags)
{
+ struct inode *inode = outfilp->f_mapping->host;
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ xfs_fsize_t isize, new_size;
int ioflags = 0;
+ ssize_t ret;
+
+ XFS_STATS_INC(xs_write_calls);
if (outfilp->f_mode & FMODE_NOCMTIME)
ioflags |= IO_INVIS;
- return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode),
- pipe, outfilp, ppos, len, flags, ioflags);
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+ return -EIO;
+
+ xfs_ilock(ip, XFS_IOLOCK_EXCL);
+
+ if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) {
+ int iolock = XFS_IOLOCK_EXCL;
+ int error;
+
+ error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count,
+ FILP_DELAY_FLAG(outfilp), &iolock);
+ if (error) {
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ return -error;
+ }
+ }
+
+ new_size = *ppos + count;
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ if (new_size > ip->i_size)
+ ip->i_new_size = new_size;
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+ trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
+
+ ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
+ if (ret > 0)
+ XFS_STATS_ADD(xs_write_bytes, ret);
+
+ isize = i_size_read(inode);
+ if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize))
+ *ppos = isize;
+
+ if (*ppos > ip->i_size) {
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ if (*ppos > ip->i_size)
+ ip->i_size = *ppos;
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ }
+
+ if (ip->i_new_size) {
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ ip->i_new_size = 0;
+ if (ip->i_d.di_size > ip->i_size)
+ ip->i_d.di_size = ip->i_size;
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ }
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ return ret;
+}
+
+/*
+ * This routine is called to handle zeroing any space in the last
+ * block of the file that is beyond the EOF. We do this since the
+ * size is being increased without writing anything to that block
+ * and we don't want anyone to read the garbage on the disk.
+ */
+STATIC int /* error (positive) */
+xfs_zero_last_block(
+ xfs_inode_t *ip,
+ xfs_fsize_t offset,
+ xfs_fsize_t isize)
+{
+ xfs_fileoff_t last_fsb;
+ xfs_mount_t *mp = ip->i_mount;
+ int nimaps;
+ int zero_offset;
+ int zero_len;
+ int error = 0;
+ xfs_bmbt_irec_t imap;
+
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+
+ zero_offset = XFS_B_FSB_OFFSET(mp, isize);
+ if (zero_offset == 0) {
+ /*
+ * There are no extra bytes in the last block on disk to
+ * zero, so return.
+ */
+ return 0;
+ }
+
+ last_fsb = XFS_B_TO_FSBT(mp, isize);
+ nimaps = 1;
+ error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap,
+ &nimaps, NULL, NULL);
+ if (error) {
+ return error;
+ }
+ ASSERT(nimaps > 0);
+ /*
+ * If the block underlying isize is just a hole, then there
+ * is nothing to zero.
+ */
+ if (imap.br_startblock == HOLESTARTBLOCK) {
+ return 0;
+ }
+ /*
+ * Zero the part of the last block beyond the EOF, and write it
+ * out sync. We need to drop the ilock while we do this so we
+ * don't deadlock when the buffer cache calls back to us.
+ */
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+ zero_len = mp->m_sb.sb_blocksize - zero_offset;
+ if (isize + zero_len > offset)
+ zero_len = offset - isize;
+ error = xfs_iozero(ip, isize, zero_len);
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ ASSERT(error >= 0);
+ return error;
+}
+
+/*
+ * Zero any on disk space between the current EOF and the new,
+ * larger EOF. This handles the normal case of zeroing the remainder
+ * of the last block in the file and the unusual case of zeroing blocks
+ * out beyond the size of the file. This second case only happens
+ * with fixed size extents and when the system crashes before the inode
+ * size was updated but after blocks were allocated. If fill is set,
+ * then any holes in the range are filled and zeroed. If not, the holes
+ * are left alone as holes.
+ */
+
+int /* error (positive) */
+xfs_zero_eof(
+ xfs_inode_t *ip,
+ xfs_off_t offset, /* starting I/O offset */
+ xfs_fsize_t isize) /* current inode size */
+{
+ xfs_mount_t *mp = ip->i_mount;
+ xfs_fileoff_t start_zero_fsb;
+ xfs_fileoff_t end_zero_fsb;
+ xfs_fileoff_t zero_count_fsb;
+ xfs_fileoff_t last_fsb;
+ xfs_fileoff_t zero_off;
+ xfs_fsize_t zero_len;
+ int nimaps;
+ int error = 0;
+ xfs_bmbt_irec_t imap;
+
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+ ASSERT(offset > isize);
+
+ /*
+ * First handle zeroing the block on which isize resides.
+ * We only zero a part of that block so it is handled specially.
+ */
+ error = xfs_zero_last_block(ip, offset, isize);
+ if (error) {
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+ return error;
+ }
+
+ /*
+ * Calculate the range between the new size and the old
+ * where blocks needing to be zeroed may exist. To get the
+ * block where the last byte in the file currently resides,
+ * we need to subtract one from the size and truncate back
+ * to a block boundary. We subtract 1 in case the size is
+ * exactly on a block boundary.
+ */
+ last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
+ start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
+ end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
+ ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
+ if (last_fsb == end_zero_fsb) {
+ /*
+ * The size was only incremented on its last block.
+ * We took care of that above, so just return.
+ */
+ return 0;
+ }
+
+ ASSERT(start_zero_fsb <= end_zero_fsb);
+ while (start_zero_fsb <= end_zero_fsb) {
+ nimaps = 1;
+ zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
+ error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
+ 0, NULL, 0, &imap, &nimaps, NULL, NULL);
+ if (error) {
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+ return error;
+ }
+ ASSERT(nimaps > 0);
+
+ if (imap.br_state == XFS_EXT_UNWRITTEN ||
+ imap.br_startblock == HOLESTARTBLOCK) {
+ /*
+ * This loop handles initializing pages that were
+ * partially initialized by the code below this
+ * loop. It basically zeroes the part of the page
+ * that sits on a hole and sets the page as P_HOLE
+ * and calls remapf if it is a mapped file.
+ */
+ start_zero_fsb = imap.br_startoff + imap.br_blockcount;
+ ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
+ continue;
+ }
+
+ /*
+ * There are blocks we need to zero.
+ * Drop the inode lock while we're doing the I/O.
+ * We'll still have the iolock to protect us.
+ */
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+ zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
+ zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
+
+ if ((zero_off + zero_len) > offset)
+ zero_len = offset - zero_off;
+
+ error = xfs_iozero(ip, zero_off, zero_len);
+ if (error) {
+ goto out_lock;
+ }
+
+ start_zero_fsb = imap.br_startoff + imap.br_blockcount;
+ ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ }
+
+ return 0;
+
+out_lock:
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ ASSERT(error >= 0);
+ return error;
+}
+
+STATIC ssize_t
+xfs_file_aio_write(
+ struct kiocb *iocb,
+ const struct iovec *iovp,
+ unsigned long nr_segs,
+ loff_t pos)
+{
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ ssize_t ret = 0, error = 0;
+ int ioflags = 0;
+ xfs_fsize_t isize, new_size;
+ int iolock;
+ int eventsent = 0;
+ size_t ocount = 0, count;
+ int need_i_mutex;
+
+ XFS_STATS_INC(xs_write_calls);
+
+ BUG_ON(iocb->ki_pos != pos);
+
+ if (unlikely(file->f_flags & O_DIRECT))
+ ioflags |= IO_ISDIRECT;
+ if (file->f_mode & FMODE_NOCMTIME)
+ ioflags |= IO_INVIS;
+
+ error = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ);
+ if (error)
+ return error;
+
+ count = ocount;
+ if (count == 0)
+ return 0;
+
+ xfs_wait_for_freeze(mp, SB_FREEZE_WRITE);
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return -EIO;
+
+relock:
+ if (ioflags & IO_ISDIRECT) {
+ iolock = XFS_IOLOCK_SHARED;
+ need_i_mutex = 0;
+ } else {
+ iolock = XFS_IOLOCK_EXCL;
+ need_i_mutex = 1;
+ mutex_lock(&inode->i_mutex);
+ }
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL|iolock);
+
+start:
+ error = -generic_write_checks(file, &pos, &count,
+ S_ISBLK(inode->i_mode));
+ if (error) {
+ xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
+ goto out_unlock_mutex;
+ }
+
+ if ((DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) &&
+ !(ioflags & IO_INVIS) && !eventsent)) {
+ int dmflags = FILP_DELAY_FLAG(file);
+
+ if (need_i_mutex)
+ dmflags |= DM_FLAGS_IMUX;
+
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ error = XFS_SEND_DATA(ip->i_mount, DM_EVENT_WRITE, ip,
+ pos, count, dmflags, &iolock);
+ if (error) {
+ goto out_unlock_internal;
+ }
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ eventsent = 1;
+
+ /*
+ * The iolock was dropped and reacquired in XFS_SEND_DATA
+ * so we have to recheck the size when appending.
+ * We will only "goto start;" once, since having sent the
+ * event prevents another call to XFS_SEND_DATA, which is
+ * what allows the size to change in the first place.
+ */
+ if ((file->f_flags & O_APPEND) && pos != ip->i_size)
+ goto start;
+ }
+
+ if (ioflags & IO_ISDIRECT) {
+ xfs_buftarg_t *target =
+ XFS_IS_REALTIME_INODE(ip) ?
+ mp->m_rtdev_targp : mp->m_ddev_targp;
+
+ if ((pos & target->bt_smask) || (count & target->bt_smask)) {
+ xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
+ return XFS_ERROR(-EINVAL);
+ }
+
+ if (!need_i_mutex && (mapping->nrpages || pos > ip->i_size)) {
+ xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
+ iolock = XFS_IOLOCK_EXCL;
+ need_i_mutex = 1;
+ mutex_lock(&inode->i_mutex);
+ xfs_ilock(ip, XFS_ILOCK_EXCL|iolock);
+ goto start;
+ }
+ }
+
+ new_size = pos + count;
+ if (new_size > ip->i_size)
+ ip->i_new_size = new_size;
+
+ if (likely(!(ioflags & IO_INVIS)))
+ file_update_time(file);
+
+ /*
+ * If the offset is beyond the size of the file, we have a couple
+ * of things to do. First, if there is already space allocated
+ * we need to either create holes or zero the disk or ...
+ *
+ * If there is a page where the previous size lands, we need
+ * to zero it out up to the new size.
+ */
+
+ if (pos > ip->i_size) {
+ error = xfs_zero_eof(ip, pos, ip->i_size);
+ if (error) {
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ goto out_unlock_internal;
+ }
+ }
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+ /*
+ * If we're writing the file then make sure to clear the
+ * setuid and setgid bits if the process is not being run
+ * by root. This keeps people from modifying setuid and
+ * setgid binaries.
+ */
+ error = -file_remove_suid(file);
+ if (unlikely(error))
+ goto out_unlock_internal;
+
+ /* We can write back this queue in page reclaim */
+ current->backing_dev_info = mapping->backing_dev_info;
+
+ if ((ioflags & IO_ISDIRECT)) {
+ if (mapping->nrpages) {
+ WARN_ON(need_i_mutex == 0);
+ error = xfs_flushinval_pages(ip,
+ (pos & PAGE_CACHE_MASK),
+ -1, FI_REMAPF_LOCKED);
+ if (error)
+ goto out_unlock_internal;
+ }
+
+ if (need_i_mutex) {
+ /* demote the lock now the cached pages are gone */
+ xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
+ mutex_unlock(&inode->i_mutex);
+
+ iolock = XFS_IOLOCK_SHARED;
+ need_i_mutex = 0;
+ }
+
+ trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags);
+ ret = generic_file_direct_write(iocb, iovp,
+ &nr_segs, pos, &iocb->ki_pos, count, ocount);
+
+ /*
+ * direct-io write to a hole: fall through to buffered I/O
+ * for completing the rest of the request.
+ */
+ if (ret >= 0 && ret != count) {
+ XFS_STATS_ADD(xs_write_bytes, ret);
+
+ pos += ret;
+ count -= ret;
+
+ ioflags &= ~IO_ISDIRECT;
+ xfs_iunlock(ip, iolock);
+ goto relock;
+ }
+ } else {
+ int enospc = 0;
+ ssize_t ret2 = 0;
+
+write_retry:
+ trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, ioflags);
+ ret2 = generic_file_buffered_write(iocb, iovp, nr_segs,
+ pos, &iocb->ki_pos, count, ret);
+ /*
+ * if we just got an ENOSPC, flush the inode now we
+ * aren't holding any page locks and retry *once*
+ */
+ if (ret2 == -ENOSPC && !enospc) {
+ error = xfs_flush_pages(ip, 0, -1, 0, FI_NONE);
+ if (error)
+ goto out_unlock_internal;
+ enospc = 1;
+ goto write_retry;
+ }
+ ret = ret2;
+ }
+
+ current->backing_dev_info = NULL;
+
+ isize = i_size_read(inode);
+ if (unlikely(ret < 0 && ret != -EFAULT && iocb->ki_pos > isize))
+ iocb->ki_pos = isize;
+
+ if (iocb->ki_pos > ip->i_size) {
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ if (iocb->ki_pos > ip->i_size)
+ ip->i_size = iocb->ki_pos;
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ }
+
+ if (ret == -ENOSPC &&
+ DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) {
+ xfs_iunlock(ip, iolock);
+ if (need_i_mutex)
+ mutex_unlock(&inode->i_mutex);
+ error = XFS_SEND_NAMESP(ip->i_mount, DM_EVENT_NOSPACE, ip,
+ DM_RIGHT_NULL, ip, DM_RIGHT_NULL, NULL, NULL,
+ 0, 0, 0); /* Delay flag intentionally unused */
+ if (need_i_mutex)
+ mutex_lock(&inode->i_mutex);
+ xfs_ilock(ip, iolock);
+ if (error)
+ goto out_unlock_internal;
+ goto start;
+ }
+
+ error = -ret;
+ if (ret <= 0)
+ goto out_unlock_internal;
+
+ XFS_STATS_ADD(xs_write_bytes, ret);
+
+ /* Handle various SYNC-type writes */
+ if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
+ loff_t end = pos + ret - 1;
+ int error2;
+
+ xfs_iunlock(ip, iolock);
+ if (need_i_mutex)
+ mutex_unlock(&inode->i_mutex);
+
+ error2 = filemap_write_and_wait_range(mapping, pos, end);
+ if (!error)
+ error = error2;
+ if (need_i_mutex)
+ mutex_lock(&inode->i_mutex);
+ xfs_ilock(ip, iolock);
+
+ error2 = -xfs_file_fsync(file, file->f_path.dentry,
+ (file->f_flags & __O_SYNC) ? 0 : 1);
+ if (!error)
+ error = error2;
+ }
+
+ out_unlock_internal:
+ if (ip->i_new_size) {
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ ip->i_new_size = 0;
+ /*
+ * If this was a direct or synchronous I/O that failed (such
+ * as ENOSPC) then part of the I/O may have been written to
+ * disk before the error occured. In this case the on-disk
+ * file size may have been adjusted beyond the in-memory file
+ * size and now needs to be truncated back.
+ */
+ if (ip->i_d.di_size > ip->i_size)
+ ip->i_d.di_size = ip->i_size;
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ }
+ xfs_iunlock(ip, iolock);
+ out_unlock_mutex:
+ if (need_i_mutex)
+ mutex_unlock(&inode->i_mutex);
+ return -error;
}
STATIC int
@@ -160,28 +938,6 @@ xfs_file_release(
return -xfs_release(XFS_I(inode));
}
-/*
- * We ignore the datasync flag here because a datasync is effectively
- * identical to an fsync. That is, datasync implies that we need to write
- * only the metadata needed to be able to access the data that is written
- * if we crash after the call completes. Hence if we are writing beyond
- * EOF we have to log the inode size change as well, which makes it a
- * full fsync. If we don't write beyond EOF, the inode core will be
- * clean in memory and so we don't need to log the inode, just like
- * fsync.
- */
-STATIC int
-xfs_file_fsync(
- struct file *file,
- struct dentry *dentry,
- int datasync)
-{
- struct xfs_inode *ip = XFS_I(dentry->d_inode);
-
- xfs_iflags_clear(ip, XFS_ITRUNCATED);
- return -xfs_fsync(ip);
-}
-
STATIC int
xfs_file_readdir(
struct file *filp,
@@ -203,9 +959,9 @@ xfs_file_readdir(
*
* Try to give it an estimate that's good enough, maybe at some
* point we can change the ->readdir prototype to include the
- * buffer size.
+ * buffer size. For now we use the current glibc buffer size.
*/
- bufsize = (size_t)min_t(loff_t, PAGE_SIZE, ip->i_d.di_size);
+ bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size);
error = xfs_readdir(ip, dirent, bufsize,
(xfs_off_t *)&filp->f_pos, filldir);
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index e8566bbf0f00..61a99608731e 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -91,6 +91,16 @@ xfs_mark_inode_dirty_sync(
mark_inode_dirty_sync(inode);
}
+void
+xfs_mark_inode_dirty(
+ xfs_inode_t *ip)
+{
+ struct inode *inode = VFS_I(ip);
+
+ if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR)))
+ mark_inode_dirty(inode);
+}
+
/*
* Change the requested timestamp in the given inode.
* We don't lock across timestamp updates, and we don't log them but
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 5af0c81ca1ae..facfb323a706 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -88,7 +88,6 @@
#include <xfs_super.h>
#include <xfs_globals.h>
#include <xfs_fs_subr.h>
-#include <xfs_lrw.h>
#include <xfs_buf.h>
/*
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
deleted file mode 100644
index eac6f80d786d..000000000000
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ /dev/null
@@ -1,796 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_alloc.h"
-#include "xfs_dmapi.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_itable.h"
-#include "xfs_rw.h"
-#include "xfs_attr.h"
-#include "xfs_inode_item.h"
-#include "xfs_buf_item.h"
-#include "xfs_utils.h"
-#include "xfs_iomap.h"
-#include "xfs_vnodeops.h"
-#include "xfs_trace.h"
-
-#include <linux/capability.h>
-#include <linux/writeback.h>
-
-
-/*
- * xfs_iozero
- *
- * xfs_iozero clears the specified range of buffer supplied,
- * and marks all the affected blocks as valid and modified. If
- * an affected block is not allocated, it will be allocated. If
- * an affected block is not completely overwritten, and is not
- * valid before the operation, it will be read from disk before
- * being partially zeroed.
- */
-STATIC int
-xfs_iozero(
- struct xfs_inode *ip, /* inode */
- loff_t pos, /* offset in file */
- size_t count) /* size of data to zero */
-{
- struct page *page;
- struct address_space *mapping;
- int status;
-
- mapping = VFS_I(ip)->i_mapping;
- do {
- unsigned offset, bytes;
- void *fsdata;
-
- offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
- bytes = PAGE_CACHE_SIZE - offset;
- if (bytes > count)
- bytes = count;
-
- status = pagecache_write_begin(NULL, mapping, pos, bytes,
- AOP_FLAG_UNINTERRUPTIBLE,
- &page, &fsdata);
- if (status)
- break;
-
- zero_user(page, offset, bytes);
-
- status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
- page, fsdata);
- WARN_ON(status <= 0); /* can't return less than zero! */
- pos += bytes;
- count -= bytes;
- status = 0;
- } while (count);
-
- return (-status);
-}
-
-ssize_t /* bytes read, or (-) error */
-xfs_read(
- xfs_inode_t *ip,
- struct kiocb *iocb,
- const struct iovec *iovp,
- unsigned int segs,
- loff_t *offset,
- int ioflags)
-{
- struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
- xfs_mount_t *mp = ip->i_mount;
- size_t size = 0;
- ssize_t ret = 0;
- xfs_fsize_t n;
- unsigned long seg;
-
-
- XFS_STATS_INC(xs_read_calls);
-
- /* START copy & waste from filemap.c */
- for (seg = 0; seg < segs; seg++) {
- const struct iovec *iv = &iovp[seg];
-
- /*
- * If any segment has a negative length, or the cumulative
- * length ever wraps negative then return -EINVAL.
- */
- size += iv->iov_len;
- if (unlikely((ssize_t)(size|iv->iov_len) < 0))
- return XFS_ERROR(-EINVAL);
- }
- /* END copy & waste from filemap.c */
-
- if (unlikely(ioflags & IO_ISDIRECT)) {
- xfs_buftarg_t *target =
- XFS_IS_REALTIME_INODE(ip) ?
- mp->m_rtdev_targp : mp->m_ddev_targp;
- if ((*offset & target->bt_smask) ||
- (size & target->bt_smask)) {
- if (*offset == ip->i_size) {
- return (0);
- }
- return -XFS_ERROR(EINVAL);
- }
- }
-
- n = XFS_MAXIOFFSET(mp) - *offset;
- if ((n <= 0) || (size == 0))
- return 0;
-
- if (n < size)
- size = n;
-
- if (XFS_FORCED_SHUTDOWN(mp))
- return -EIO;
-
- if (unlikely(ioflags & IO_ISDIRECT))
- mutex_lock(&inode->i_mutex);
- xfs_ilock(ip, XFS_IOLOCK_SHARED);
-
- if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
- int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags);
- int iolock = XFS_IOLOCK_SHARED;
-
- ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *offset, size,
- dmflags, &iolock);
- if (ret) {
- xfs_iunlock(ip, XFS_IOLOCK_SHARED);
- if (unlikely(ioflags & IO_ISDIRECT))
- mutex_unlock(&inode->i_mutex);
- return ret;
- }
- }
-
- if (unlikely(ioflags & IO_ISDIRECT)) {
- if (inode->i_mapping->nrpages)
- ret = -xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK),
- -1, FI_REMAPF_LOCKED);
- mutex_unlock(&inode->i_mutex);
- if (ret) {
- xfs_iunlock(ip, XFS_IOLOCK_SHARED);
- return ret;
- }
- }
-
- trace_xfs_file_read(ip, size, *offset, ioflags);
-
- iocb->ki_pos = *offset;
- ret = generic_file_aio_read(iocb, iovp, segs, *offset);
- if (ret > 0)
- XFS_STATS_ADD(xs_read_bytes, ret);
-
- xfs_iunlock(ip, XFS_IOLOCK_SHARED);
- return ret;
-}
-
-ssize_t
-xfs_splice_read(
- xfs_inode_t *ip,
- struct file *infilp,
- loff_t *ppos,
- struct pipe_inode_info *pipe,
- size_t count,
- int flags,
- int ioflags)
-{
- xfs_mount_t *mp = ip->i_mount;
- ssize_t ret;
-
- XFS_STATS_INC(xs_read_calls);
- if (XFS_FORCED_SHUTDOWN(ip->i_mount))
- return -EIO;
-
- xfs_ilock(ip, XFS_IOLOCK_SHARED);
-
- if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
- int iolock = XFS_IOLOCK_SHARED;
- int error;
-
- error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count,
- FILP_DELAY_FLAG(infilp), &iolock);
- if (error) {
- xfs_iunlock(ip, XFS_IOLOCK_SHARED);
- return -error;
- }
- }
-
- trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
-
- ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
- if (ret > 0)
- XFS_STATS_ADD(xs_read_bytes, ret);
-
- xfs_iunlock(ip, XFS_IOLOCK_SHARED);
- return ret;
-}
-
-ssize_t
-xfs_splice_write(
- xfs_inode_t *ip,
- struct pipe_inode_info *pipe,
- struct file *outfilp,
- loff_t *ppos,
- size_t count,
- int flags,
- int ioflags)
-{
- xfs_mount_t *mp = ip->i_mount;
- ssize_t ret;
- struct inode *inode = outfilp->f_mapping->host;
- xfs_fsize_t isize, new_size;
-
- XFS_STATS_INC(xs_write_calls);
- if (XFS_FORCED_SHUTDOWN(ip->i_mount))
- return -EIO;
-
- xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
- if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) {
- int iolock = XFS_IOLOCK_EXCL;
- int error;
-
- error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count,
- FILP_DELAY_FLAG(outfilp), &iolock);
- if (error) {
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- return -error;
- }
- }
-
- new_size = *ppos + count;
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- if (new_size > ip->i_size)
- ip->i_new_size = new_size;
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
- trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
-
- ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
- if (ret > 0)
- XFS_STATS_ADD(xs_write_bytes, ret);
-
- isize = i_size_read(inode);
- if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize))
- *ppos = isize;
-
- if (*ppos > ip->i_size) {
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- if (*ppos > ip->i_size)
- ip->i_size = *ppos;
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- }
-
- if (ip->i_new_size) {
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- ip->i_new_size = 0;
- if (ip->i_d.di_size > ip->i_size)
- ip->i_d.di_size = ip->i_size;
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- }
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- return ret;
-}
-
-/*
- * This routine is called to handle zeroing any space in the last
- * block of the file that is beyond the EOF. We do this since the
- * size is being increased without writing anything to that block
- * and we don't want anyone to read the garbage on the disk.
- */
-STATIC int /* error (positive) */
-xfs_zero_last_block(
- xfs_inode_t *ip,
- xfs_fsize_t offset,
- xfs_fsize_t isize)
-{
- xfs_fileoff_t last_fsb;
- xfs_mount_t *mp = ip->i_mount;
- int nimaps;
- int zero_offset;
- int zero_len;
- int error = 0;
- xfs_bmbt_irec_t imap;
-
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-
- zero_offset = XFS_B_FSB_OFFSET(mp, isize);
- if (zero_offset == 0) {
- /*
- * There are no extra bytes in the last block on disk to
- * zero, so return.
- */
- return 0;
- }
-
- last_fsb = XFS_B_TO_FSBT(mp, isize);
- nimaps = 1;
- error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap,
- &nimaps, NULL, NULL);
- if (error) {
- return error;
- }
- ASSERT(nimaps > 0);
- /*
- * If the block underlying isize is just a hole, then there
- * is nothing to zero.
- */
- if (imap.br_startblock == HOLESTARTBLOCK) {
- return 0;
- }
- /*
- * Zero the part of the last block beyond the EOF, and write it
- * out sync. We need to drop the ilock while we do this so we
- * don't deadlock when the buffer cache calls back to us.
- */
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
- zero_len = mp->m_sb.sb_blocksize - zero_offset;
- if (isize + zero_len > offset)
- zero_len = offset - isize;
- error = xfs_iozero(ip, isize, zero_len);
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- ASSERT(error >= 0);
- return error;
-}
-
-/*
- * Zero any on disk space between the current EOF and the new,
- * larger EOF. This handles the normal case of zeroing the remainder
- * of the last block in the file and the unusual case of zeroing blocks
- * out beyond the size of the file. This second case only happens
- * with fixed size extents and when the system crashes before the inode
- * size was updated but after blocks were allocated. If fill is set,
- * then any holes in the range are filled and zeroed. If not, the holes
- * are left alone as holes.
- */
-
-int /* error (positive) */
-xfs_zero_eof(
- xfs_inode_t *ip,
- xfs_off_t offset, /* starting I/O offset */
- xfs_fsize_t isize) /* current inode size */
-{
- xfs_mount_t *mp = ip->i_mount;
- xfs_fileoff_t start_zero_fsb;
- xfs_fileoff_t end_zero_fsb;
- xfs_fileoff_t zero_count_fsb;
- xfs_fileoff_t last_fsb;
- xfs_fileoff_t zero_off;
- xfs_fsize_t zero_len;
- int nimaps;
- int error = 0;
- xfs_bmbt_irec_t imap;
-
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
- ASSERT(offset > isize);
-
- /*
- * First handle zeroing the block on which isize resides.
- * We only zero a part of that block so it is handled specially.
- */
- error = xfs_zero_last_block(ip, offset, isize);
- if (error) {
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
- return error;
- }
-
- /*
- * Calculate the range between the new size and the old
- * where blocks needing to be zeroed may exist. To get the
- * block where the last byte in the file currently resides,
- * we need to subtract one from the size and truncate back
- * to a block boundary. We subtract 1 in case the size is
- * exactly on a block boundary.
- */
- last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
- start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
- end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
- ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
- if (last_fsb == end_zero_fsb) {
- /*
- * The size was only incremented on its last block.
- * We took care of that above, so just return.
- */
- return 0;
- }
-
- ASSERT(start_zero_fsb <= end_zero_fsb);
- while (start_zero_fsb <= end_zero_fsb) {
- nimaps = 1;
- zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
- error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
- 0, NULL, 0, &imap, &nimaps, NULL, NULL);
- if (error) {
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
- return error;
- }
- ASSERT(nimaps > 0);
-
- if (imap.br_state == XFS_EXT_UNWRITTEN ||
- imap.br_startblock == HOLESTARTBLOCK) {
- /*
- * This loop handles initializing pages that were
- * partially initialized by the code below this
- * loop. It basically zeroes the part of the page
- * that sits on a hole and sets the page as P_HOLE
- * and calls remapf if it is a mapped file.
- */
- start_zero_fsb = imap.br_startoff + imap.br_blockcount;
- ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
- continue;
- }
-
- /*
- * There are blocks we need to zero.
- * Drop the inode lock while we're doing the I/O.
- * We'll still have the iolock to protect us.
- */
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
- zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
- zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
-
- if ((zero_off + zero_len) > offset)
- zero_len = offset - zero_off;
-
- error = xfs_iozero(ip, zero_off, zero_len);
- if (error) {
- goto out_lock;
- }
-
- start_zero_fsb = imap.br_startoff + imap.br_blockcount;
- ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- }
-
- return 0;
-
-out_lock:
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- ASSERT(error >= 0);
- return error;
-}
-
-ssize_t /* bytes written, or (-) error */
-xfs_write(
- struct xfs_inode *xip,
- struct kiocb *iocb,
- const struct iovec *iovp,
- unsigned int nsegs,
- loff_t *offset,
- int ioflags)
-{
- struct file *file = iocb->ki_filp;
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- unsigned long segs = nsegs;
- xfs_mount_t *mp;
- ssize_t ret = 0, error = 0;
- xfs_fsize_t isize, new_size;
- int iolock;
- int eventsent = 0;
- size_t ocount = 0, count;
- loff_t pos;
- int need_i_mutex;
-
- XFS_STATS_INC(xs_write_calls);
-
- error = generic_segment_checks(iovp, &segs, &ocount, VERIFY_READ);
- if (error)
- return error;
-
- count = ocount;
- pos = *offset;
-
- if (count == 0)
- return 0;
-
- mp = xip->i_mount;
-
- xfs_wait_for_freeze(mp, SB_FREEZE_WRITE);
-
- if (XFS_FORCED_SHUTDOWN(mp))
- return -EIO;
-
-relock:
- if (ioflags & IO_ISDIRECT) {
- iolock = XFS_IOLOCK_SHARED;
- need_i_mutex = 0;
- } else {
- iolock = XFS_IOLOCK_EXCL;
- need_i_mutex = 1;
- mutex_lock(&inode->i_mutex);
- }
-
- xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
-
-start:
- error = -generic_write_checks(file, &pos, &count,
- S_ISBLK(inode->i_mode));
- if (error) {
- xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
- goto out_unlock_mutex;
- }
-
- if ((DM_EVENT_ENABLED(xip, DM_EVENT_WRITE) &&
- !(ioflags & IO_INVIS) && !eventsent)) {
- int dmflags = FILP_DELAY_FLAG(file);
-
- if (need_i_mutex)
- dmflags |= DM_FLAGS_IMUX;
-
- xfs_iunlock(xip, XFS_ILOCK_EXCL);
- error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, xip,
- pos, count, dmflags, &iolock);
- if (error) {
- goto out_unlock_internal;
- }
- xfs_ilock(xip, XFS_ILOCK_EXCL);
- eventsent = 1;
-
- /*
- * The iolock was dropped and reacquired in XFS_SEND_DATA
- * so we have to recheck the size when appending.
- * We will only "goto start;" once, since having sent the
- * event prevents another call to XFS_SEND_DATA, which is
- * what allows the size to change in the first place.
- */
- if ((file->f_flags & O_APPEND) && pos != xip->i_size)
- goto start;
- }
-
- if (ioflags & IO_ISDIRECT) {
- xfs_buftarg_t *target =
- XFS_IS_REALTIME_INODE(xip) ?
- mp->m_rtdev_targp : mp->m_ddev_targp;
-
- if ((pos & target->bt_smask) || (count & target->bt_smask)) {
- xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
- return XFS_ERROR(-EINVAL);
- }
-
- if (!need_i_mutex && (mapping->nrpages || pos > xip->i_size)) {
- xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
- iolock = XFS_IOLOCK_EXCL;
- need_i_mutex = 1;
- mutex_lock(&inode->i_mutex);
- xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
- goto start;
- }
- }
-
- new_size = pos + count;
- if (new_size > xip->i_size)
- xip->i_new_size = new_size;
-
- if (likely(!(ioflags & IO_INVIS)))
- file_update_time(file);
-
- /*
- * If the offset is beyond the size of the file, we have a couple
- * of things to do. First, if there is already space allocated
- * we need to either create holes or zero the disk or ...
- *
- * If there is a page where the previous size lands, we need
- * to zero it out up to the new size.
- */
-
- if (pos > xip->i_size) {
- error = xfs_zero_eof(xip, pos, xip->i_size);
- if (error) {
- xfs_iunlock(xip, XFS_ILOCK_EXCL);
- goto out_unlock_internal;
- }
- }
- xfs_iunlock(xip, XFS_ILOCK_EXCL);
-
- /*
- * If we're writing the file then make sure to clear the
- * setuid and setgid bits if the process is not being run
- * by root. This keeps people from modifying setuid and
- * setgid binaries.
- */
- error = -file_remove_suid(file);
- if (unlikely(error))
- goto out_unlock_internal;
-
- /* We can write back this queue in page reclaim */
- current->backing_dev_info = mapping->backing_dev_info;
-
- if ((ioflags & IO_ISDIRECT)) {
- if (mapping->nrpages) {
- WARN_ON(need_i_mutex == 0);
- error = xfs_flushinval_pages(xip,
- (pos & PAGE_CACHE_MASK),
- -1, FI_REMAPF_LOCKED);
- if (error)
- goto out_unlock_internal;
- }
-
- if (need_i_mutex) {
- /* demote the lock now the cached pages are gone */
- xfs_ilock_demote(xip, XFS_IOLOCK_EXCL);
- mutex_unlock(&inode->i_mutex);
-
- iolock = XFS_IOLOCK_SHARED;
- need_i_mutex = 0;
- }
-
- trace_xfs_file_direct_write(xip, count, *offset, ioflags);
- ret = generic_file_direct_write(iocb, iovp,
- &segs, pos, offset, count, ocount);
-
- /*
- * direct-io write to a hole: fall through to buffered I/O
- * for completing the rest of the request.
- */
- if (ret >= 0 && ret != count) {
- XFS_STATS_ADD(xs_write_bytes, ret);
-
- pos += ret;
- count -= ret;
-
- ioflags &= ~IO_ISDIRECT;
- xfs_iunlock(xip, iolock);
- goto relock;
- }
- } else {
- int enospc = 0;
- ssize_t ret2 = 0;
-
-write_retry:
- trace_xfs_file_buffered_write(xip, count, *offset, ioflags);
- ret2 = generic_file_buffered_write(iocb, iovp, segs,
- pos, offset, count, ret);
- /*
- * if we just got an ENOSPC, flush the inode now we
- * aren't holding any page locks and retry *once*
- */
- if (ret2 == -ENOSPC && !enospc) {
- error = xfs_flush_pages(xip, 0, -1, 0, FI_NONE);
- if (error)
- goto out_unlock_internal;
- enospc = 1;
- goto write_retry;
- }
- ret = ret2;
- }
-
- current->backing_dev_info = NULL;
-
- isize = i_size_read(inode);
- if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize))
- *offset = isize;
-
- if (*offset > xip->i_size) {
- xfs_ilock(xip, XFS_ILOCK_EXCL);
- if (*offset > xip->i_size)
- xip->i_size = *offset;
- xfs_iunlock(xip, XFS_ILOCK_EXCL);
- }
-
- if (ret == -ENOSPC &&
- DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) {
- xfs_iunlock(xip, iolock);
- if (need_i_mutex)
- mutex_unlock(&inode->i_mutex);
- error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, xip,
- DM_RIGHT_NULL, xip, DM_RIGHT_NULL, NULL, NULL,
- 0, 0, 0); /* Delay flag intentionally unused */
- if (need_i_mutex)
- mutex_lock(&inode->i_mutex);
- xfs_ilock(xip, iolock);
- if (error)
- goto out_unlock_internal;
- goto start;
- }
-
- error = -ret;
- if (ret <= 0)
- goto out_unlock_internal;
-
- XFS_STATS_ADD(xs_write_bytes, ret);
-
- /* Handle various SYNC-type writes */
- if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
- loff_t end = pos + ret - 1;
- int error2;
-
- xfs_iunlock(xip, iolock);
- if (need_i_mutex)
- mutex_unlock(&inode->i_mutex);
-
- error2 = filemap_write_and_wait_range(mapping, pos, end);
- if (!error)
- error = error2;
- if (need_i_mutex)
- mutex_lock(&inode->i_mutex);
- xfs_ilock(xip, iolock);
-
- error2 = xfs_fsync(xip);
- if (!error)
- error = error2;
- }
-
- out_unlock_internal:
- if (xip->i_new_size) {
- xfs_ilock(xip, XFS_ILOCK_EXCL);
- xip->i_new_size = 0;
- /*
- * If this was a direct or synchronous I/O that failed (such
- * as ENOSPC) then part of the I/O may have been written to
- * disk before the error occured. In this case the on-disk
- * file size may have been adjusted beyond the in-memory file
- * size and now needs to be truncated back.
- */
- if (xip->i_d.di_size > xip->i_size)
- xip->i_d.di_size = xip->i_size;
- xfs_iunlock(xip, XFS_ILOCK_EXCL);
- }
- xfs_iunlock(xip, iolock);
- out_unlock_mutex:
- if (need_i_mutex)
- mutex_unlock(&inode->i_mutex);
- return -error;
-}
-
-/*
- * If the underlying (data/log/rt) device is readonly, there are some
- * operations that cannot proceed.
- */
-int
-xfs_dev_is_read_only(
- xfs_mount_t *mp,
- char *message)
-{
- if (xfs_readonly_buftarg(mp->m_ddev_targp) ||
- xfs_readonly_buftarg(mp->m_logdev_targp) ||
- (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
- cmn_err(CE_NOTE,
- "XFS: %s required on read-only device.", message);
- cmn_err(CE_NOTE,
- "XFS: write access unavailable, cannot proceed.");
- return EROFS;
- }
- return 0;
-}
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
deleted file mode 100644
index 342ae8c0d011..000000000000
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_LRW_H__
-#define __XFS_LRW_H__
-
-struct xfs_mount;
-struct xfs_inode;
-struct xfs_buf;
-
-extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
-
-extern int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t);
-
-#endif /* __XFS_LRW_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index a9f6d20aff41..05cd85317f6f 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -607,7 +607,8 @@ xfssyncd(
set_freezable();
timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
for (;;) {
- timeleft = schedule_timeout_interruptible(timeleft);
+ if (list_empty(&mp->m_sync_list))
+ timeleft = schedule_timeout_interruptible(timeleft);
/* swsusp */
try_to_freeze();
if (kthread_should_stop() && list_empty(&mp->m_sync_list))
@@ -627,8 +628,7 @@ xfssyncd(
list_add_tail(&mp->m_sync_work.w_list,
&mp->m_sync_list);
}
- list_for_each_entry_safe(work, n, &mp->m_sync_list, w_list)
- list_move(&work->w_list, &tmp);
+ list_splice_init(&mp->m_sync_list, &tmp);
spin_unlock(&mp->m_sync_lock);
list_for_each_entry_safe(work, n, &tmp, w_list) {
@@ -688,12 +688,12 @@ xfs_inode_set_reclaim_tag(
struct xfs_perag *pag;
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
- read_lock(&pag->pag_ici_lock);
+ write_lock(&pag->pag_ici_lock);
spin_lock(&ip->i_flags_lock);
__xfs_inode_set_reclaim_tag(pag, ip);
__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
spin_unlock(&ip->i_flags_lock);
- read_unlock(&pag->pag_ici_lock);
+ write_unlock(&pag->pag_ici_lock);
xfs_perag_put(pag);
}
diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c
index 856eb3c8d605..5a107601e969 100644
--- a/fs/xfs/linux-2.6/xfs_trace.c
+++ b/fs/xfs/linux-2.6/xfs_trace.c
@@ -52,22 +52,6 @@
#include "quota/xfs_dquot.h"
/*
- * Format fsblock number into a static buffer & return it.
- */
-STATIC char *xfs_fmtfsblock(xfs_fsblock_t bno)
-{
- static char rval[50];
-
- if (bno == NULLFSBLOCK)
- sprintf(rval, "NULLFSBLOCK");
- else if (isnullstartblock(bno))
- sprintf(rval, "NULLSTARTBLOCK(%lld)", startblockval(bno));
- else
- sprintf(rval, "%lld", (xfs_dfsbno_t)bno);
- return rval;
-}
-
-/*
* We include this last to have the helpers above available for the trace
* event implementations.
*/
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index a4574dcf5065..fcaa62f0799e 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -197,13 +197,13 @@ TRACE_EVENT(xfs_iext_insert,
__entry->caller_ip = caller_ip;
),
TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
- "offset %lld block %s count %lld flag %d caller %pf",
+ "offset %lld block %lld count %lld flag %d caller %pf",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
(long)__entry->idx,
__entry->startoff,
- xfs_fmtfsblock(__entry->startblock),
+ (__int64_t)__entry->startblock,
__entry->blockcount,
__entry->state,
(char *)__entry->caller_ip)
@@ -241,13 +241,13 @@ DECLARE_EVENT_CLASS(xfs_bmap_class,
__entry->caller_ip = caller_ip;
),
TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
- "offset %lld block %s count %lld flag %d caller %pf",
+ "offset %lld block %lld count %lld flag %d caller %pf",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
(long)__entry->idx,
__entry->startoff,
- xfs_fmtfsblock(__entry->startblock),
+ (__int64_t)__entry->startblock,
__entry->blockcount,
__entry->state,
(char *)__entry->caller_ip)
@@ -593,7 +593,7 @@ DECLARE_EVENT_CLASS(xfs_dquot_class,
TP_ARGS(dqp),
TP_STRUCT__entry(
__field(dev_t, dev)
- __field(__be32, id)
+ __field(u32, id)
__field(unsigned, flags)
__field(unsigned, nrefs)
__field(unsigned long long, res_bcount)
@@ -606,7 +606,7 @@ DECLARE_EVENT_CLASS(xfs_dquot_class,
), \
TP_fast_assign(
__entry->dev = dqp->q_mount->m_super->s_dev;
- __entry->id = dqp->q_core.d_id;
+ __entry->id = be32_to_cpu(dqp->q_core.d_id);
__entry->flags = dqp->dq_flags;
__entry->nrefs = dqp->q_nrefs;
__entry->res_bcount = dqp->q_res_bcount;
@@ -622,10 +622,10 @@ DECLARE_EVENT_CLASS(xfs_dquot_class,
be64_to_cpu(dqp->q_core.d_ino_softlimit);
),
TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx "
- "bcnt 0x%llx [hard 0x%llx | soft 0x%llx] "
- "icnt 0x%llx [hard 0x%llx | soft 0x%llx]",
+ "bcnt 0x%llx bhardlimit 0x%llx bsoftlimit 0x%llx "
+ "icnt 0x%llx ihardlimit 0x%llx isoftlimit 0x%llx]",
MAJOR(__entry->dev), MINOR(__entry->dev),
- be32_to_cpu(__entry->id),
+ __entry->id,
__print_flags(__entry->flags, "|", XFS_DQ_FLAGS),
__entry->nrefs,
__entry->res_bcount,
@@ -881,7 +881,7 @@ TRACE_EVENT(name, \
), \
TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
"offset 0x%llx count %zd flags %s " \
- "startoff 0x%llx startblock %s blockcount 0x%llx", \
+ "startoff 0x%llx startblock %lld blockcount 0x%llx", \
MAJOR(__entry->dev), MINOR(__entry->dev), \
__entry->ino, \
__entry->size, \
@@ -890,7 +890,7 @@ TRACE_EVENT(name, \
__entry->count, \
__print_flags(__entry->flags, "|", BMAPI_FLAGS), \
__entry->startoff, \
- xfs_fmtfsblock(__entry->startblock), \
+ (__int64_t)__entry->startblock, \
__entry->blockcount) \
)
DEFINE_IOMAP_EVENT(xfs_iomap_enter);