summaryrefslogtreecommitdiff
path: root/drivers/block/drbd/drbd_nl.c
diff options
context:
space:
mode:
authorPhilipp Reisner <philipp.reisner@linbit.com>2011-05-20 16:39:13 +0200
committerPhilipp Reisner <philipp.reisner@linbit.com>2011-05-24 10:08:58 +0200
commit99432fcc528d7a5ac8494a4c07ad4726670c96e2 (patch)
tree0b86df2b3e86af1eab14b987e81b7f4a5f88c090 /drivers/block/drbd/drbd_nl.c
parent21423fa79119a80e335de0c82ec29f67ed59f1bc (diff)
drbd: Take a more conservative approach when deciding max_bio_size
The old (optimistic) implementation could shrink the bio size on an primary device. Shrinking the bio size on a primary device is bad. Since there we might get BIOs with the old (bigger) size shortly after we published the new size. The new implementation is more conservative, and eventually increases the max_bio_size on a primary device (which is valid). It does so, when it knows the local limit AND the remote limit. We cache the last seen max_bio_size of the peer in the meta data, and rely on that, to make the operation of single nodes more efficient. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block/drbd/drbd_nl.c')
-rw-r--r--drivers/block/drbd/drbd_nl.c96
1 files changed, 68 insertions, 28 deletions
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 9dfe58a09625..7c64ec042124 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -278,8 +278,14 @@ static int _try_outdate_peer_async(void *data)
/* Not using
drbd_request_state(mdev, NS(pdsk, nps));
- here, because we might were able to re-establish the connection in the
- meantime.
+ here, because we might were able to re-establish the connection
+ in the meantime. This can only partially be solved in the state's
+ engine is_valid_state() and is_valid_state_transition()
+ functions.
+
+ nps can be D_INCONSISTENT, D_OUTDATED or D_UNKNOWN.
+ pdsk == D_INCONSISTENT while conn >= C_CONNECTED is valid,
+ therefore we have to have the pre state change check here.
*/
spin_lock_irq(&mdev->req_lock);
ns = mdev->state;
@@ -786,30 +792,78 @@ static int drbd_check_al_size(struct drbd_conf *mdev)
return 0;
}
-void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size) __must_hold(local)
+static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size)
{
struct request_queue * const q = mdev->rq_queue;
- struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
- int max_segments = mdev->ldev->dc.max_bio_bvecs;
- int max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
+ int max_hw_sectors = max_bio_size >> 9;
+ int max_segments = 0;
+
+ if (get_ldev_if_state(mdev, D_ATTACHING)) {
+ struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
+
+ max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
+ max_segments = mdev->ldev->dc.max_bio_bvecs;
+ put_ldev(mdev);
+ }
blk_queue_logical_block_size(q, 512);
blk_queue_max_hw_sectors(q, max_hw_sectors);
/* This is the workaround for "bio would need to, but cannot, be split" */
blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
- blk_queue_stack_limits(q, b);
- dev_info(DEV, "max BIO size = %u\n", queue_max_hw_sectors(q) << 9);
+ if (get_ldev_if_state(mdev, D_ATTACHING)) {
+ struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
- if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
- dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
- q->backing_dev_info.ra_pages,
- b->backing_dev_info.ra_pages);
- q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
+ blk_queue_stack_limits(q, b);
+
+ if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
+ dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
+ q->backing_dev_info.ra_pages,
+ b->backing_dev_info.ra_pages);
+ q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
+ }
+ put_ldev(mdev);
}
}
+void drbd_reconsider_max_bio_size(struct drbd_conf *mdev)
+{
+ int now, new, local, peer;
+
+ now = queue_max_hw_sectors(mdev->rq_queue) << 9;
+ local = mdev->local_max_bio_size; /* Eventually last known value, from volatile memory */
+ peer = mdev->peer_max_bio_size; /* Eventually last known value, from meta data */
+
+ if (get_ldev_if_state(mdev, D_ATTACHING)) {
+ local = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
+ mdev->local_max_bio_size = local;
+ put_ldev(mdev);
+ }
+
+ /* We may ignore peer limits if the peer is modern enough.
+ Because new from 8.3.8 onwards the peer can use multiple
+ BIOs for a single peer_request */
+ if (mdev->state.conn >= C_CONNECTED) {
+ if (mdev->agreed_pro_version < 94)
+ peer = mdev->peer_max_bio_size;
+ else if (mdev->agreed_pro_version == 94)
+ peer = DRBD_MAX_SIZE_H80_PACKET;
+ else /* drbd 8.3.8 onwards */
+ peer = DRBD_MAX_BIO_SIZE;
+ }
+
+ new = min_t(int, local, peer);
+
+ if (mdev->state.role == R_PRIMARY && new < now)
+ dev_err(DEV, "ASSERT FAILED new < now; (%d < %d)\n", new, now);
+
+ if (new != now)
+ dev_info(DEV, "max BIO size = %u\n", new);
+
+ drbd_setup_queue_param(mdev, new);
+}
+
/* serialize deconfig (worker exiting, doing cleanup)
* and reconfig (drbdsetup disk, drbdsetup net)
*
@@ -878,7 +932,6 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
struct block_device *bdev;
struct lru_cache *resync_lru = NULL;
union drbd_state ns, os;
- unsigned int max_bio_size;
enum drbd_state_rv rv;
int cp_discovered = 0;
int logical_block_size;
@@ -1130,20 +1183,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
mdev->read_cnt = 0;
mdev->writ_cnt = 0;
- max_bio_size = DRBD_MAX_BIO_SIZE;
- if (mdev->state.conn == C_CONNECTED) {
- /* We are Primary, Connected, and now attach a new local
- * backing store. We must not increase the user visible maximum
- * bio size on this device to something the peer may not be
- * able to handle. */
- if (mdev->agreed_pro_version < 94)
- max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9;
- else if (mdev->agreed_pro_version == 94)
- max_bio_size = DRBD_MAX_SIZE_H80_PACKET;
- /* else: drbd 8.3.9 and later, stay with default */
- }
-
- drbd_setup_queue_param(mdev, max_bio_size);
+ drbd_reconsider_max_bio_size(mdev);
/* If I am currently not R_PRIMARY,
* but meta data primary indicator is set,