summaryrefslogtreecommitdiff
path: root/include/trace
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-09-10 18:56:14 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-09-10 18:56:14 -0700
commitb0a1ea51bda4c2bcdde460221e1772f3a4f8c44f (patch)
tree9684c11b72718cd7e96e5eb93298690269ecf447 /include/trace
parent33e247c7e58d335d70ecb84fd869091e2e4b8dcb (diff)
parent69d7fde5909b614114343974cfc52cb8ff30b544 (diff)
Merge branch 'for-4.3/blkcg' of git://git.kernel.dk/linux-block
Pull blk-cg updates from Jens Axboe: "A bit later in the cycle, but this has been in the block tree for a a while. This is basically four patchsets from Tejun, that improve our buffered cgroup writeback. It was dependent on the other cgroup changes, but they went in earlier in this cycle. Series 1 is set of 5 patches that has cgroup writeback updates: - bdi_writeback iteration fix which could lead to some wb's being skipped or repeated during e.g. sync under memory pressure. - Simplification of wb work wait mechanism. - Writeback tracepoints updated to report cgroup. Series 2 is is a set of updates for the CFQ cgroup writeback handling: cfq has always charged all async IOs to the root cgroup. It didn't have much choice as writeback didn't know about cgroups and there was no way to tell who to blame for a given writeback IO. writeback finally grew support for cgroups and now tags each writeback IO with the appropriate cgroup to charge it against. This patchset updates cfq so that it follows the blkcg each bio is tagged with. Async cfq_queues are now shared across cfq_group, which is per-cgroup, instead of per-request_queue cfq_data. This makes all IOs follow the weight based IO resource distribution implemented by cfq. - Switched from GFP_ATOMIC to GFP_NOWAIT as suggested by Jeff. - Other misc review points addressed, acks added and rebased. Series 3 is the blkcg policy cleanup patches: This patchset contains assorted cleanups for blkcg_policy methods and blk[c]g_policy_data handling. - alloc/free added for blkg_policy_data. exit dropped. - alloc/free added for blkcg_policy_data. - blk-throttle's async percpu allocation is replaced with direct allocation. - all methods now take blk[c]g_policy_data instead of blkcg_gq or blkcg. And finally, series 4 is a set of patches cleaning up the blkcg stats handling: blkcg's stats have always been somwhat of a mess. This patchset tries to improve the situation a bit. - The following patches added to consolidate blkcg entry point and blkg creation. This is in itself is an improvement and helps colllecting common stats on bio issue. - per-blkg stats now accounted on bio issue rather than request completion so that bio based and request based drivers can behave the same way. The issue was spotted by Vivek. - cfq-iosched implements custom recursive stats and blk-throttle implements custom per-cpu stats. This patchset make blkcg core support both by default. - cfq-iosched and blk-throttle keep track of the same stats multiple times. Unify them" * 'for-4.3/blkcg' of git://git.kernel.dk/linux-block: (45 commits) blkcg: use CGROUP_WEIGHT_* scale for io.weight on the unified hierarchy blkcg: s/CFQ_WEIGHT_*/CFQ_WEIGHT_LEGACY_*/ blkcg: implement interface for the unified hierarchy blkcg: misc preparations for unified hierarchy interface blkcg: separate out tg_conf_updated() from tg_set_conf() blkcg: move body parsing from blkg_conf_prep() to its callers blkcg: mark existing cftypes as legacy blkcg: rename subsystem name from blkio to io blkcg: refine error codes returned during blkcg configuration blkcg: remove unnecessary NULL checks from __cfqg_set_weight_device() blkcg: reduce stack usage of blkg_rwstat_recursive_sum() blkcg: remove cfqg_stats->sectors blkcg: move io_service_bytes and io_serviced stats into blkcg_gq blkcg: make blkg_[rw]stat_recursive_sum() to be able to index into blkcg_gq blkcg: make blkcg_[rw]stat per-cpu blkcg: add blkg_[rw]stat->aux_cnt and replace cfq_group->dead_stats with it blkcg: consolidate blkg creation in blkcg_bio_issue_check() blk-throttle: improve queue bypass handling blkcg: move root blkg lookup optimization from throtl_lookup_tg() to __blkg_lookup() blkcg: inline [__]blkg_lookup() ...
Diffstat (limited to 'include/trace')
-rw-r--r--include/trace/events/writeback.h180
1 files changed, 141 insertions, 39 deletions
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index a7aa607a4c55..fff846b512e6 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -131,6 +131,66 @@ DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode,
TP_ARGS(inode, flags)
);
+#ifdef CREATE_TRACE_POINTS
+#ifdef CONFIG_CGROUP_WRITEBACK
+
+static inline size_t __trace_wb_cgroup_size(struct bdi_writeback *wb)
+{
+ return kernfs_path_len(wb->memcg_css->cgroup->kn) + 1;
+}
+
+static inline void __trace_wb_assign_cgroup(char *buf, struct bdi_writeback *wb)
+{
+ struct cgroup *cgrp = wb->memcg_css->cgroup;
+ char *path;
+
+ path = cgroup_path(cgrp, buf, kernfs_path_len(cgrp->kn) + 1);
+ WARN_ON_ONCE(path != buf);
+}
+
+static inline size_t __trace_wbc_cgroup_size(struct writeback_control *wbc)
+{
+ if (wbc->wb)
+ return __trace_wb_cgroup_size(wbc->wb);
+ else
+ return 2;
+}
+
+static inline void __trace_wbc_assign_cgroup(char *buf,
+ struct writeback_control *wbc)
+{
+ if (wbc->wb)
+ __trace_wb_assign_cgroup(buf, wbc->wb);
+ else
+ strcpy(buf, "/");
+}
+
+#else /* CONFIG_CGROUP_WRITEBACK */
+
+static inline size_t __trace_wb_cgroup_size(struct bdi_writeback *wb)
+{
+ return 2;
+}
+
+static inline void __trace_wb_assign_cgroup(char *buf, struct bdi_writeback *wb)
+{
+ strcpy(buf, "/");
+}
+
+static inline size_t __trace_wbc_cgroup_size(struct writeback_control *wbc)
+{
+ return 2;
+}
+
+static inline void __trace_wbc_assign_cgroup(char *buf,
+ struct writeback_control *wbc)
+{
+ strcpy(buf, "/");
+}
+
+#endif /* CONFIG_CGROUP_WRITEBACK */
+#endif /* CREATE_TRACE_POINTS */
+
DECLARE_EVENT_CLASS(writeback_write_inode_template,
TP_PROTO(struct inode *inode, struct writeback_control *wbc),
@@ -141,6 +201,7 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template,
__array(char, name, 32)
__field(unsigned long, ino)
__field(int, sync_mode)
+ __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc))
),
TP_fast_assign(
@@ -148,12 +209,14 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template,
dev_name(inode_to_bdi(inode)->dev), 32);
__entry->ino = inode->i_ino;
__entry->sync_mode = wbc->sync_mode;
+ __trace_wbc_assign_cgroup(__get_str(cgroup), wbc);
),
- TP_printk("bdi %s: ino=%lu sync_mode=%d",
+ TP_printk("bdi %s: ino=%lu sync_mode=%d cgroup=%s",
__entry->name,
__entry->ino,
- __entry->sync_mode
+ __entry->sync_mode,
+ __get_str(cgroup)
)
);
@@ -172,8 +235,8 @@ DEFINE_EVENT(writeback_write_inode_template, writeback_write_inode,
);
DECLARE_EVENT_CLASS(writeback_work_class,
- TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work),
- TP_ARGS(bdi, work),
+ TP_PROTO(struct bdi_writeback *wb, struct wb_writeback_work *work),
+ TP_ARGS(wb, work),
TP_STRUCT__entry(
__array(char, name, 32)
__field(long, nr_pages)
@@ -183,10 +246,11 @@ DECLARE_EVENT_CLASS(writeback_work_class,
__field(int, range_cyclic)
__field(int, for_background)
__field(int, reason)
+ __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
),
TP_fast_assign(
strncpy(__entry->name,
- bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32);
+ wb->bdi->dev ? dev_name(wb->bdi->dev) : "(unknown)", 32);
__entry->nr_pages = work->nr_pages;
__entry->sb_dev = work->sb ? work->sb->s_dev : 0;
__entry->sync_mode = work->sync_mode;
@@ -194,9 +258,10 @@ DECLARE_EVENT_CLASS(writeback_work_class,
__entry->range_cyclic = work->range_cyclic;
__entry->for_background = work->for_background;
__entry->reason = work->reason;
+ __trace_wb_assign_cgroup(__get_str(cgroup), wb);
),
TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d "
- "kupdate=%d range_cyclic=%d background=%d reason=%s",
+ "kupdate=%d range_cyclic=%d background=%d reason=%s cgroup=%s",
__entry->name,
MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev),
__entry->nr_pages,
@@ -204,13 +269,14 @@ DECLARE_EVENT_CLASS(writeback_work_class,
__entry->for_kupdate,
__entry->range_cyclic,
__entry->for_background,
- __print_symbolic(__entry->reason, WB_WORK_REASON)
+ __print_symbolic(__entry->reason, WB_WORK_REASON),
+ __get_str(cgroup)
)
);
#define DEFINE_WRITEBACK_WORK_EVENT(name) \
DEFINE_EVENT(writeback_work_class, name, \
- TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), \
- TP_ARGS(bdi, work))
+ TP_PROTO(struct bdi_writeback *wb, struct wb_writeback_work *work), \
+ TP_ARGS(wb, work))
DEFINE_WRITEBACK_WORK_EVENT(writeback_queue);
DEFINE_WRITEBACK_WORK_EVENT(writeback_exec);
DEFINE_WRITEBACK_WORK_EVENT(writeback_start);
@@ -230,26 +296,42 @@ TRACE_EVENT(writeback_pages_written,
);
DECLARE_EVENT_CLASS(writeback_class,
- TP_PROTO(struct backing_dev_info *bdi),
- TP_ARGS(bdi),
+ TP_PROTO(struct bdi_writeback *wb),
+ TP_ARGS(wb),
TP_STRUCT__entry(
__array(char, name, 32)
+ __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
),
TP_fast_assign(
- strncpy(__entry->name, dev_name(bdi->dev), 32);
+ strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
+ __trace_wb_assign_cgroup(__get_str(cgroup), wb);
),
- TP_printk("bdi %s",
- __entry->name
+ TP_printk("bdi %s: cgroup=%s",
+ __entry->name,
+ __get_str(cgroup)
)
);
#define DEFINE_WRITEBACK_EVENT(name) \
DEFINE_EVENT(writeback_class, name, \
- TP_PROTO(struct backing_dev_info *bdi), \
- TP_ARGS(bdi))
+ TP_PROTO(struct bdi_writeback *wb), \
+ TP_ARGS(wb))
DEFINE_WRITEBACK_EVENT(writeback_nowork);
DEFINE_WRITEBACK_EVENT(writeback_wake_background);
-DEFINE_WRITEBACK_EVENT(writeback_bdi_register);
+
+TRACE_EVENT(writeback_bdi_register,
+ TP_PROTO(struct backing_dev_info *bdi),
+ TP_ARGS(bdi),
+ TP_STRUCT__entry(
+ __array(char, name, 32)
+ ),
+ TP_fast_assign(
+ strncpy(__entry->name, dev_name(bdi->dev), 32);
+ ),
+ TP_printk("bdi %s",
+ __entry->name
+ )
+);
DECLARE_EVENT_CLASS(wbc_class,
TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi),
@@ -265,6 +347,7 @@ DECLARE_EVENT_CLASS(wbc_class,
__field(int, range_cyclic)
__field(long, range_start)
__field(long, range_end)
+ __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc))
),
TP_fast_assign(
@@ -278,11 +361,12 @@ DECLARE_EVENT_CLASS(wbc_class,
__entry->range_cyclic = wbc->range_cyclic;
__entry->range_start = (long)wbc->range_start;
__entry->range_end = (long)wbc->range_end;
+ __trace_wbc_assign_cgroup(__get_str(cgroup), wbc);
),
TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d "
"bgrd=%d reclm=%d cyclic=%d "
- "start=0x%lx end=0x%lx",
+ "start=0x%lx end=0x%lx cgroup=%s",
__entry->name,
__entry->nr_to_write,
__entry->pages_skipped,
@@ -292,7 +376,9 @@ DECLARE_EVENT_CLASS(wbc_class,
__entry->for_reclaim,
__entry->range_cyclic,
__entry->range_start,
- __entry->range_end)
+ __entry->range_end,
+ __get_str(cgroup)
+ )
)
#define DEFINE_WBC_EVENT(name) \
@@ -312,6 +398,7 @@ TRACE_EVENT(writeback_queue_io,
__field(long, age)
__field(int, moved)
__field(int, reason)
+ __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
),
TP_fast_assign(
unsigned long *older_than_this = work->older_than_this;
@@ -321,13 +408,15 @@ TRACE_EVENT(writeback_queue_io,
(jiffies - *older_than_this) * 1000 / HZ : -1;
__entry->moved = moved;
__entry->reason = work->reason;
+ __trace_wb_assign_cgroup(__get_str(cgroup), wb);
),
- TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s",
+ TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup=%s",
__entry->name,
__entry->older, /* older_than_this in jiffies */
__entry->age, /* older_than_this in relative milliseconds */
__entry->moved,
- __print_symbolic(__entry->reason, WB_WORK_REASON)
+ __print_symbolic(__entry->reason, WB_WORK_REASON),
+ __get_str(cgroup)
)
);
@@ -381,11 +470,11 @@ TRACE_EVENT(global_dirty_state,
TRACE_EVENT(bdi_dirty_ratelimit,
- TP_PROTO(struct backing_dev_info *bdi,
+ TP_PROTO(struct bdi_writeback *wb,
unsigned long dirty_rate,
unsigned long task_ratelimit),
- TP_ARGS(bdi, dirty_rate, task_ratelimit),
+ TP_ARGS(wb, dirty_rate, task_ratelimit),
TP_STRUCT__entry(
__array(char, bdi, 32)
@@ -395,36 +484,39 @@ TRACE_EVENT(bdi_dirty_ratelimit,
__field(unsigned long, dirty_ratelimit)
__field(unsigned long, task_ratelimit)
__field(unsigned long, balanced_dirty_ratelimit)
+ __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
),
TP_fast_assign(
- strlcpy(__entry->bdi, dev_name(bdi->dev), 32);
- __entry->write_bw = KBps(bdi->wb.write_bandwidth);
- __entry->avg_write_bw = KBps(bdi->wb.avg_write_bandwidth);
+ strlcpy(__entry->bdi, dev_name(wb->bdi->dev), 32);
+ __entry->write_bw = KBps(wb->write_bandwidth);
+ __entry->avg_write_bw = KBps(wb->avg_write_bandwidth);
__entry->dirty_rate = KBps(dirty_rate);
- __entry->dirty_ratelimit = KBps(bdi->wb.dirty_ratelimit);
+ __entry->dirty_ratelimit = KBps(wb->dirty_ratelimit);
__entry->task_ratelimit = KBps(task_ratelimit);
__entry->balanced_dirty_ratelimit =
- KBps(bdi->wb.balanced_dirty_ratelimit);
+ KBps(wb->balanced_dirty_ratelimit);
+ __trace_wb_assign_cgroup(__get_str(cgroup), wb);
),
TP_printk("bdi %s: "
"write_bw=%lu awrite_bw=%lu dirty_rate=%lu "
"dirty_ratelimit=%lu task_ratelimit=%lu "
- "balanced_dirty_ratelimit=%lu",
+ "balanced_dirty_ratelimit=%lu cgroup=%s",
__entry->bdi,
__entry->write_bw, /* write bandwidth */
__entry->avg_write_bw, /* avg write bandwidth */
__entry->dirty_rate, /* bdi dirty rate */
__entry->dirty_ratelimit, /* base ratelimit */
__entry->task_ratelimit, /* ratelimit with position control */
- __entry->balanced_dirty_ratelimit /* the balanced ratelimit */
+ __entry->balanced_dirty_ratelimit, /* the balanced ratelimit */
+ __get_str(cgroup)
)
);
TRACE_EVENT(balance_dirty_pages,
- TP_PROTO(struct backing_dev_info *bdi,
+ TP_PROTO(struct bdi_writeback *wb,
unsigned long thresh,
unsigned long bg_thresh,
unsigned long dirty,
@@ -437,7 +529,7 @@ TRACE_EVENT(balance_dirty_pages,
long pause,
unsigned long start_time),
- TP_ARGS(bdi, thresh, bg_thresh, dirty, bdi_thresh, bdi_dirty,
+ TP_ARGS(wb, thresh, bg_thresh, dirty, bdi_thresh, bdi_dirty,
dirty_ratelimit, task_ratelimit,
dirtied, period, pause, start_time),
@@ -456,11 +548,12 @@ TRACE_EVENT(balance_dirty_pages,
__field( long, pause)
__field(unsigned long, period)
__field( long, think)
+ __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
),
TP_fast_assign(
unsigned long freerun = (thresh + bg_thresh) / 2;
- strlcpy(__entry->bdi, dev_name(bdi->dev), 32);
+ strlcpy(__entry->bdi, dev_name(wb->bdi->dev), 32);
__entry->limit = global_wb_domain.dirty_limit;
__entry->setpoint = (global_wb_domain.dirty_limit +
@@ -478,6 +571,7 @@ TRACE_EVENT(balance_dirty_pages,
__entry->period = period * 1000 / HZ;
__entry->pause = pause * 1000 / HZ;
__entry->paused = (jiffies - start_time) * 1000 / HZ;
+ __trace_wb_assign_cgroup(__get_str(cgroup), wb);
),
@@ -486,7 +580,7 @@ TRACE_EVENT(balance_dirty_pages,
"bdi_setpoint=%lu bdi_dirty=%lu "
"dirty_ratelimit=%lu task_ratelimit=%lu "
"dirtied=%u dirtied_pause=%u "
- "paused=%lu pause=%ld period=%lu think=%ld",
+ "paused=%lu pause=%ld period=%lu think=%ld cgroup=%s",
__entry->bdi,
__entry->limit,
__entry->setpoint,
@@ -500,7 +594,8 @@ TRACE_EVENT(balance_dirty_pages,
__entry->paused, /* ms */
__entry->pause, /* ms */
__entry->period, /* ms */
- __entry->think /* ms */
+ __entry->think, /* ms */
+ __get_str(cgroup)
)
);
@@ -514,6 +609,8 @@ TRACE_EVENT(writeback_sb_inodes_requeue,
__field(unsigned long, ino)
__field(unsigned long, state)
__field(unsigned long, dirtied_when)
+ __dynamic_array(char, cgroup,
+ __trace_wb_cgroup_size(inode_to_wb(inode)))
),
TP_fast_assign(
@@ -522,14 +619,16 @@ TRACE_EVENT(writeback_sb_inodes_requeue,
__entry->ino = inode->i_ino;
__entry->state = inode->i_state;
__entry->dirtied_when = inode->dirtied_when;
+ __trace_wb_assign_cgroup(__get_str(cgroup), inode_to_wb(inode));
),
- TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu",
+ TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu cgroup=%s",
__entry->name,
__entry->ino,
show_inode_state(__entry->state),
__entry->dirtied_when,
- (jiffies - __entry->dirtied_when) / HZ
+ (jiffies - __entry->dirtied_when) / HZ,
+ __get_str(cgroup)
)
);
@@ -585,6 +684,7 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template,
__field(unsigned long, writeback_index)
__field(long, nr_to_write)
__field(unsigned long, wrote)
+ __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc))
),
TP_fast_assign(
@@ -596,10 +696,11 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template,
__entry->writeback_index = inode->i_mapping->writeback_index;
__entry->nr_to_write = nr_to_write;
__entry->wrote = nr_to_write - wbc->nr_to_write;
+ __trace_wbc_assign_cgroup(__get_str(cgroup), wbc);
),
TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu "
- "index=%lu to_write=%ld wrote=%lu",
+ "index=%lu to_write=%ld wrote=%lu cgroup=%s",
__entry->name,
__entry->ino,
show_inode_state(__entry->state),
@@ -607,7 +708,8 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template,
(jiffies - __entry->dirtied_when) / HZ,
__entry->writeback_index,
__entry->nr_to_write,
- __entry->wrote
+ __entry->wrote,
+ __get_str(cgroup)
)
);