From 282a8e0391c377b64c7d065b18fc2f6267a24dad Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Wed, 22 Feb 2017 15:39:50 -0800 Subject: dax: add tracepoint infrastructure, PMD tracing Tracepoints are the standard way to capture debugging and tracing information in many parts of the kernel, including the XFS and ext4 filesystems. Create a tracepoint header for FS DAX and add the first DAX tracepoints to the PMD fault handler. This allows the tracing for DAX to be done in the same way as the filesystem tracing so that developers can look at them together and get a coherent idea of what the system is doing. I added both an entry and exit tracepoint because future patches will add tracepoints to child functions of dax_iomap_pmd_fault() like dax_pmd_load_hole() and dax_pmd_insert_mapping(). We want those messages to be wrapped by the parent function tracepoints so the code flow is more easily understood. Having entry and exit tracepoints for faults also allows us to easily see what filesystems functions were called during the fault. These filesystem functions get executed via iomap_begin() and iomap_end() calls, for example, and will have their own tracepoints. For PMD faults we primarily want to understand the type of mapping, the fault flags, the faulting address and whether it fell back to 4k faults. If it fell back to 4k faults the tracepoints should let us understand why. I named the new tracepoint header file "fs_dax.h" to allow for device DAX to have its own separate tracing header in the same directory at some point. Here is an example output for these events from a successful PMD fault: big-1441 [005] .... 32.582758: xfs_filemap_pmd_fault: dev 259:0 ino 0x1003 big-1441 [005] .... 32.582776: dax_pmd_fault: dev 259:0 ino 0x1003 shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10505000 vm_start 0x10200000 vm_end 0x10700000 pgoff 0x200 max_pgoff 0x1400 big-1441 [005] .... 32.583292: dax_pmd_fault_done: dev 259:0 ino 0x1003 shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10505000 vm_start 0x10200000 vm_end 0x10700000 pgoff 0x200 max_pgoff 0x1400 NOPAGE Link: http://lkml.kernel.org/r/1484085142-2297-3-git-send-email-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Suggested-by: Dave Chinner Reviewed-by: Jan Kara Acked-by: Steven Rostedt Cc: Dave Jiang Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dax.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) (limited to 'fs/dax.c') diff --git a/fs/dax.c b/fs/dax.c index e9cf8b4cd234..dc11a2b90731 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -35,6 +35,9 @@ #include #include "internal.h" +#define CREATE_TRACE_POINTS +#include + /* We choose 4096 entries - same as per-zone page wait tables */ #define DAX_WAIT_TABLE_BITS 12 #define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS) @@ -1341,6 +1344,16 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, loff_t pos; int error; + /* + * Check whether offset isn't beyond end of file now. Caller is + * supposed to hold locks serializing us with truncate / punch hole so + * this is a reliable test. + */ + pgoff = linear_page_index(vma, pmd_addr); + max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT; + + trace_dax_pmd_fault(inode, vma, address, flags, pgoff, max_pgoff, 0); + /* Fall back to PTEs if we're going to COW */ if (write && !(vma->vm_flags & VM_SHARED)) goto fallback; @@ -1351,16 +1364,10 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, if ((pmd_addr + PMD_SIZE) > vma->vm_end) goto fallback; - /* - * Check whether offset isn't beyond end of file now. Caller is - * supposed to hold locks serializing us with truncate / punch hole so - * this is a reliable test. - */ - pgoff = linear_page_index(vma, pmd_addr); - max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT; - - if (pgoff > max_pgoff) - return VM_FAULT_SIGBUS; + if (pgoff > max_pgoff) { + result = VM_FAULT_SIGBUS; + goto out; + } /* If the PMD would extend beyond the file size */ if ((pgoff | PG_PMD_COLOUR) > max_pgoff) @@ -1432,6 +1439,9 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, split_huge_pmd(vma, pmd, address); count_vm_event(THP_FAULT_FALLBACK); } +out: + trace_dax_pmd_fault_done(inode, vma, address, flags, pgoff, max_pgoff, + result); return result; } EXPORT_SYMBOL_GPL(dax_iomap_pmd_fault); -- cgit v1.2.3 From 653b2ea3396fda0b5e23a37b3356dd2ca82fe5c1 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Wed, 22 Feb 2017 15:39:57 -0800 Subject: dax: add tracepoints to dax_pmd_load_hole() Add tracepoints to dax_pmd_load_hole(), following the same logging conventions as the tracepoints in dax_iomap_pmd_fault(). Here is an example PMD fault showing the new tracepoints: read_big-1478 [004] .... 238.242188: xfs_filemap_pmd_fault: dev 259:0 ino 0x1003 read_big-1478 [004] .... 238.242191: dax_pmd_fault: dev 259:0 ino 0x1003 shared ALLOW_RETRY|KILLABLE|USER address 0x10400000 vm_start 0x10200000 vm_end 0x10600000 pgoff 0x200 max_pgoff 0x1400 read_big-1478 [004] .... 238.242390: dax_pmd_load_hole: dev 259:0 ino 0x1003 shared address 0x10400000 zero_page ffffea0002c20000 radix_entry 0x1e read_big-1478 [004] .... 238.242392: dax_pmd_fault_done: dev 259:0 ino 0x1003 shared ALLOW_RETRY|KILLABLE|USER address 0x10400000 vm_start 0x10200000 vm_end 0x10600000 pgoff 0x200 max_pgoff 0x1400 NOPAGE Link: http://lkml.kernel.org/r/1484085142-2297-5-git-send-email-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reviewed-by: Jan Kara Acked-by: Steven Rostedt Cc: Dave Chinner Cc: Dave Jiang Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dax.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'fs/dax.c') diff --git a/fs/dax.c b/fs/dax.c index dc11a2b90731..a622961a717c 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1299,33 +1299,39 @@ static int dax_pmd_load_hole(struct vm_area_struct *vma, pmd_t *pmd, { struct address_space *mapping = vma->vm_file->f_mapping; unsigned long pmd_addr = address & PMD_MASK; + struct inode *inode = mapping->host; struct page *zero_page; + void *ret = NULL; spinlock_t *ptl; pmd_t pmd_entry; - void *ret; zero_page = mm_get_huge_zero_page(vma->vm_mm); if (unlikely(!zero_page)) - return VM_FAULT_FALLBACK; + goto fallback; ret = dax_insert_mapping_entry(mapping, vmf, *entryp, 0, RADIX_DAX_PMD | RADIX_DAX_HZP); if (IS_ERR(ret)) - return VM_FAULT_FALLBACK; + goto fallback; *entryp = ret; ptl = pmd_lock(vma->vm_mm, pmd); if (!pmd_none(*pmd)) { spin_unlock(ptl); - return VM_FAULT_FALLBACK; + goto fallback; } pmd_entry = mk_pmd(zero_page, vma->vm_page_prot); pmd_entry = pmd_mkhuge(pmd_entry); set_pmd_at(vma->vm_mm, pmd_addr, pmd, pmd_entry); spin_unlock(ptl); + trace_dax_pmd_load_hole(inode, vma, address, zero_page, ret); return VM_FAULT_NOPAGE; + +fallback: + trace_dax_pmd_load_hole_fallback(inode, vma, address, zero_page, ret); + return VM_FAULT_FALLBACK; } int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, -- cgit v1.2.3 From 27a7ffaccd915675c88045ba83493804a0267ab7 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Wed, 22 Feb 2017 15:40:00 -0800 Subject: dax: add tracepoints to dax_pmd_insert_mapping() Add tracepoints to dax_pmd_insert_mapping(), following the same logging conventions as the tracepoints in dax_iomap_pmd_fault(). Here is an example PMD fault showing the new tracepoints: big-1504 [001] .... 326.960743: xfs_filemap_pmd_fault: dev 259:0 ino 0x1003 big-1504 [001] .... 326.960753: dax_pmd_fault: dev 259:0 ino 0x1003 shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10505000 vm_start 0x10200000 vm_end 0x10700000 pgoff 0x200 max_pgoff 0x1400 big-1504 [001] .... 326.960981: dax_pmd_insert_mapping: dev 259:0 ino 0x1003 shared write address 0x10505000 length 0x200000 pfn 0x100600 DEV|MAP radix_entry 0xc000e big-1504 [001] .... 326.960986: dax_pmd_fault_done: dev 259:0 ino 0x1003 shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10505000 vm_start 0x10200000 vm_end 0x10700000 pgoff 0x200 max_pgoff 0x1400 NOPAGE Link: http://lkml.kernel.org/r/1484085142-2297-6-git-send-email-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reviewed-by: Jan Kara Acked-by: Steven Rostedt Cc: Dave Chinner Cc: Dave Jiang Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dax.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'fs/dax.c') diff --git a/fs/dax.c b/fs/dax.c index a622961a717c..06c5dffaa7bc 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1262,15 +1262,16 @@ static int dax_pmd_insert_mapping(struct vm_area_struct *vma, pmd_t *pmd, { struct address_space *mapping = vma->vm_file->f_mapping; struct block_device *bdev = iomap->bdev; + struct inode *inode = mapping->host; struct blk_dax_ctl dax = { .sector = dax_iomap_sector(iomap, pos), .size = PMD_SIZE, }; long length = dax_map_atomic(bdev, &dax); - void *ret; + void *ret = NULL; if (length < 0) /* dax_map_atomic() failed */ - return VM_FAULT_FALLBACK; + goto fallback; if (length < PMD_SIZE) goto unmap_fallback; if (pfn_t_to_pfn(dax.pfn) & PG_PMD_COLOUR) @@ -1283,13 +1284,18 @@ static int dax_pmd_insert_mapping(struct vm_area_struct *vma, pmd_t *pmd, ret = dax_insert_mapping_entry(mapping, vmf, *entryp, dax.sector, RADIX_DAX_PMD); if (IS_ERR(ret)) - return VM_FAULT_FALLBACK; + goto fallback; *entryp = ret; + trace_dax_pmd_insert_mapping(inode, vma, address, write, length, + dax.pfn, ret); return vmf_insert_pfn_pmd(vma, address, pmd, dax.pfn, write); unmap_fallback: dax_unmap_atomic(bdev, &dax); +fallback: + trace_dax_pmd_insert_mapping_fallback(inode, vma, address, write, + length, dax.pfn, ret); return VM_FAULT_FALLBACK; } -- cgit v1.2.3 From d8a849e1bc123790bbbf1facba94452a3aef5736 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 22 Feb 2017 15:40:03 -0800 Subject: mm, dax: make pmd_fault() and friends be the same as fault() Instead of passing in multiple parameters in the pmd_fault() handler, a vmf can be passed in just like a fault() handler. This will simplify code and remove the need for the actual pmd fault handlers to allocate a vmf. Related functions are also modified to do the same. [dave.jiang@intel.com: fix issue with xfs_tests stall when DAX option is off] Link: http://lkml.kernel.org/r/148469861071.195597.3619476895250028518.stgit@djiang5-desk3.ch.intel.com Link: http://lkml.kernel.org/r/1484085142-2297-7-git-send-email-ross.zwisler@linux.intel.com Signed-off-by: Dave Jiang Reviewed-by: Ross Zwisler Reviewed-by: Jan Kara Cc: Dave Chinner Cc: Matthew Wilcox Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dax.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) (limited to 'fs/dax.c') diff --git a/fs/dax.c b/fs/dax.c index 06c5dffaa7bc..01fdbc86ee8c 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1340,18 +1340,17 @@ fallback: return VM_FAULT_FALLBACK; } -int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmd, unsigned int flags, struct iomap_ops *ops) +int dax_iomap_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf, + struct iomap_ops *ops) { struct address_space *mapping = vma->vm_file->f_mapping; - unsigned long pmd_addr = address & PMD_MASK; - bool write = flags & FAULT_FLAG_WRITE; + unsigned long pmd_addr = vmf->address & PMD_MASK; + bool write = vmf->flags & FAULT_FLAG_WRITE; unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT; struct inode *inode = mapping->host; int result = VM_FAULT_FALLBACK; struct iomap iomap = { 0 }; pgoff_t max_pgoff, pgoff; - struct vm_fault vmf; void *entry; loff_t pos; int error; @@ -1364,7 +1363,7 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, pgoff = linear_page_index(vma, pmd_addr); max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT; - trace_dax_pmd_fault(inode, vma, address, flags, pgoff, max_pgoff, 0); + trace_dax_pmd_fault(inode, vma, vmf, max_pgoff, 0); /* Fall back to PTEs if we're going to COW */ if (write && !(vma->vm_flags & VM_SHARED)) @@ -1408,21 +1407,17 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, if (IS_ERR(entry)) goto finish_iomap; - vmf.pgoff = pgoff; - vmf.flags = flags; - vmf.gfp_mask = mapping_gfp_mask(mapping) | __GFP_IO; - switch (iomap.type) { case IOMAP_MAPPED: - result = dax_pmd_insert_mapping(vma, pmd, &vmf, address, - &iomap, pos, write, &entry); + result = dax_pmd_insert_mapping(vma, vmf->pmd, vmf, + vmf->address, &iomap, pos, write, &entry); break; case IOMAP_UNWRITTEN: case IOMAP_HOLE: if (WARN_ON_ONCE(write)) goto unlock_entry; - result = dax_pmd_load_hole(vma, pmd, &vmf, address, &iomap, - &entry); + result = dax_pmd_load_hole(vma, vmf->pmd, vmf, vmf->address, + &iomap, &entry); break; default: WARN_ON_ONCE(1); @@ -1448,12 +1443,11 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, } fallback: if (result == VM_FAULT_FALLBACK) { - split_huge_pmd(vma, pmd, address); + split_huge_pmd(vma, vmf->pmd, vmf->address); count_vm_event(THP_FAULT_FALLBACK); } out: - trace_dax_pmd_fault_done(inode, vma, address, flags, pgoff, max_pgoff, - result); + trace_dax_pmd_fault_done(inode, vma, vmf, max_pgoff, result); return result; } EXPORT_SYMBOL_GPL(dax_iomap_pmd_fault); -- cgit v1.2.3 From f42003917b4569a2f4f0c79c35e1e3df2859f81a Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 22 Feb 2017 15:40:06 -0800 Subject: mm, dax: change pmd_fault() to take only vmf parameter pmd_fault() and related functions really only need the vmf parameter since the additional parameters are all included in the vmf struct. Remove the additional parameter and simplify pmd_fault() and friends. Link: http://lkml.kernel.org/r/1484085142-2297-8-git-send-email-ross.zwisler@linux.intel.com Signed-off-by: Dave Jiang Reviewed-by: Ross Zwisler Reviewed-by: Jan Kara Cc: Dave Chinner Cc: Dave Jiang Cc: Matthew Wilcox Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dax.c | 54 +++++++++++++++++++++++++----------------------------- 1 file changed, 25 insertions(+), 29 deletions(-) (limited to 'fs/dax.c') diff --git a/fs/dax.c b/fs/dax.c index 01fdbc86ee8c..d800197aba34 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1256,11 +1256,10 @@ EXPORT_SYMBOL_GPL(dax_iomap_fault); */ #define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1) -static int dax_pmd_insert_mapping(struct vm_area_struct *vma, pmd_t *pmd, - struct vm_fault *vmf, unsigned long address, - struct iomap *iomap, loff_t pos, bool write, void **entryp) +static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap, + loff_t pos, void **entryp) { - struct address_space *mapping = vma->vm_file->f_mapping; + struct address_space *mapping = vmf->vma->vm_file->f_mapping; struct block_device *bdev = iomap->bdev; struct inode *inode = mapping->host; struct blk_dax_ctl dax = { @@ -1287,31 +1286,30 @@ static int dax_pmd_insert_mapping(struct vm_area_struct *vma, pmd_t *pmd, goto fallback; *entryp = ret; - trace_dax_pmd_insert_mapping(inode, vma, address, write, length, - dax.pfn, ret); - return vmf_insert_pfn_pmd(vma, address, pmd, dax.pfn, write); + trace_dax_pmd_insert_mapping(inode, vmf, length, dax.pfn, ret); + return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, + dax.pfn, vmf->flags & FAULT_FLAG_WRITE); unmap_fallback: dax_unmap_atomic(bdev, &dax); fallback: - trace_dax_pmd_insert_mapping_fallback(inode, vma, address, write, - length, dax.pfn, ret); + trace_dax_pmd_insert_mapping_fallback(inode, vmf, length, + dax.pfn, ret); return VM_FAULT_FALLBACK; } -static int dax_pmd_load_hole(struct vm_area_struct *vma, pmd_t *pmd, - struct vm_fault *vmf, unsigned long address, - struct iomap *iomap, void **entryp) +static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap, + void **entryp) { - struct address_space *mapping = vma->vm_file->f_mapping; - unsigned long pmd_addr = address & PMD_MASK; + struct address_space *mapping = vmf->vma->vm_file->f_mapping; + unsigned long pmd_addr = vmf->address & PMD_MASK; struct inode *inode = mapping->host; struct page *zero_page; void *ret = NULL; spinlock_t *ptl; pmd_t pmd_entry; - zero_page = mm_get_huge_zero_page(vma->vm_mm); + zero_page = mm_get_huge_zero_page(vmf->vma->vm_mm); if (unlikely(!zero_page)) goto fallback; @@ -1322,27 +1320,27 @@ static int dax_pmd_load_hole(struct vm_area_struct *vma, pmd_t *pmd, goto fallback; *entryp = ret; - ptl = pmd_lock(vma->vm_mm, pmd); - if (!pmd_none(*pmd)) { + ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd); + if (!pmd_none(*(vmf->pmd))) { spin_unlock(ptl); goto fallback; } - pmd_entry = mk_pmd(zero_page, vma->vm_page_prot); + pmd_entry = mk_pmd(zero_page, vmf->vma->vm_page_prot); pmd_entry = pmd_mkhuge(pmd_entry); - set_pmd_at(vma->vm_mm, pmd_addr, pmd, pmd_entry); + set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry); spin_unlock(ptl); - trace_dax_pmd_load_hole(inode, vma, address, zero_page, ret); + trace_dax_pmd_load_hole(inode, vmf, zero_page, ret); return VM_FAULT_NOPAGE; fallback: - trace_dax_pmd_load_hole_fallback(inode, vma, address, zero_page, ret); + trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, ret); return VM_FAULT_FALLBACK; } -int dax_iomap_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf, - struct iomap_ops *ops) +int dax_iomap_pmd_fault(struct vm_fault *vmf, struct iomap_ops *ops) { + struct vm_area_struct *vma = vmf->vma; struct address_space *mapping = vma->vm_file->f_mapping; unsigned long pmd_addr = vmf->address & PMD_MASK; bool write = vmf->flags & FAULT_FLAG_WRITE; @@ -1363,7 +1361,7 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf, pgoff = linear_page_index(vma, pmd_addr); max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT; - trace_dax_pmd_fault(inode, vma, vmf, max_pgoff, 0); + trace_dax_pmd_fault(inode, vmf, max_pgoff, 0); /* Fall back to PTEs if we're going to COW */ if (write && !(vma->vm_flags & VM_SHARED)) @@ -1409,15 +1407,13 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf, switch (iomap.type) { case IOMAP_MAPPED: - result = dax_pmd_insert_mapping(vma, vmf->pmd, vmf, - vmf->address, &iomap, pos, write, &entry); + result = dax_pmd_insert_mapping(vmf, &iomap, pos, &entry); break; case IOMAP_UNWRITTEN: case IOMAP_HOLE: if (WARN_ON_ONCE(write)) goto unlock_entry; - result = dax_pmd_load_hole(vma, vmf->pmd, vmf, vmf->address, - &iomap, &entry); + result = dax_pmd_load_hole(vmf, &iomap, &entry); break; default: WARN_ON_ONCE(1); @@ -1447,7 +1443,7 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf, count_vm_event(THP_FAULT_FALLBACK); } out: - trace_dax_pmd_fault_done(inode, vma, vmf, max_pgoff, result); + trace_dax_pmd_fault_done(inode, vmf, max_pgoff, result); return result; } EXPORT_SYMBOL_GPL(dax_iomap_pmd_fault); -- cgit v1.2.3