drbd: introduce WRITE_SAME support

We will support WRITE_SAME, if
 * all peers support WRITE_SAME (both in kernel and DRBD version),
 * all peer devices support WRITE_SAME
 * logical_block_size is identical on all peers.

We may at some point introduce a fallback on the receiving side
for devices/kernels that do not support WRITE_SAME,
by open-coding a submit loop. But not yet.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
This commit is contained in:
Lars Ellenberg
2016-06-14 00:26:31 +02:00
committed by Jens Axboe
parent 60bac04012
commit 9104d31a75
10 changed files with 360 additions and 80 deletions

View File

@@ -920,6 +920,31 @@ void drbd_gen_and_send_sync_uuid(struct drbd_peer_device *peer_device)
}
}
/* communicated if (agreed_features & DRBD_FF_WSAME) */
void assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p, struct request_queue *q)
{
if (q) {
p->qlim->physical_block_size = cpu_to_be32(queue_physical_block_size(q));
p->qlim->logical_block_size = cpu_to_be32(queue_logical_block_size(q));
p->qlim->alignment_offset = cpu_to_be32(queue_alignment_offset(q));
p->qlim->io_min = cpu_to_be32(queue_io_min(q));
p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
p->qlim->discard_enabled = blk_queue_discard(q);
p->qlim->discard_zeroes_data = queue_discard_zeroes_data(q);
p->qlim->write_same_capable = !!q->limits.max_write_same_sectors;
} else {
q = device->rq_queue;
p->qlim->physical_block_size = cpu_to_be32(queue_physical_block_size(q));
p->qlim->logical_block_size = cpu_to_be32(queue_logical_block_size(q));
p->qlim->alignment_offset = 0;
p->qlim->io_min = cpu_to_be32(queue_io_min(q));
p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
p->qlim->discard_enabled = 0;
p->qlim->discard_zeroes_data = 0;
p->qlim->write_same_capable = 0;
}
}
int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enum dds_flags flags)
{
struct drbd_device *device = peer_device->device;
@@ -928,29 +953,37 @@ int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enu
sector_t d_size, u_size;
int q_order_type;
unsigned int max_bio_size;
unsigned int packet_size;
sock = &peer_device->connection->data;
p = drbd_prepare_command(peer_device, sock);
if (!p)
return -EIO;
packet_size = sizeof(*p);
if (peer_device->connection->agreed_features & DRBD_FF_WSAME)
packet_size += sizeof(p->qlim[0]);
memset(p, 0, packet_size);
if (get_ldev_if_state(device, D_NEGOTIATING)) {
D_ASSERT(device, device->ldev->backing_bdev);
struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
d_size = drbd_get_max_capacity(device->ldev);
rcu_read_lock();
u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
rcu_read_unlock();
q_order_type = drbd_queue_order_type(device);
max_bio_size = queue_max_hw_sectors(device->ldev->backing_bdev->bd_disk->queue) << 9;
max_bio_size = queue_max_hw_sectors(q) << 9;
max_bio_size = min(max_bio_size, DRBD_MAX_BIO_SIZE);
assign_p_sizes_qlim(device, p, q);
put_ldev(device);
} else {
d_size = 0;
u_size = 0;
q_order_type = QUEUE_ORDERED_NONE;
max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */
assign_p_sizes_qlim(device, p, NULL);
}
sock = &peer_device->connection->data;
p = drbd_prepare_command(peer_device, sock);
if (!p)
return -EIO;
if (peer_device->connection->agreed_pro_version <= 94)
max_bio_size = min(max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
else if (peer_device->connection->agreed_pro_version < 100)
@@ -962,7 +995,8 @@ int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enu
p->max_bio_size = cpu_to_be32(max_bio_size);
p->queue_order_type = cpu_to_be16(q_order_type);
p->dds_flags = cpu_to_be16(flags);
return drbd_send_command(peer_device, sock, P_SIZES, sizeof(*p), NULL, 0);
return drbd_send_command(peer_device, sock, P_SIZES, packet_size, NULL, 0);
}
/**
@@ -1577,6 +1611,9 @@ static int _drbd_send_bio(struct drbd_peer_device *peer_device, struct bio *bio)
? 0 : MSG_MORE);
if (err)
return err;
/* REQ_OP_WRITE_SAME has only one segment */
if (bio_op(bio) == REQ_OP_WRITE_SAME)
break;
}
return 0;
}
@@ -1595,6 +1632,9 @@ static int _drbd_send_zc_bio(struct drbd_peer_device *peer_device, struct bio *b
bio_iter_last(bvec, iter) ? 0 : MSG_MORE);
if (err)
return err;
/* REQ_OP_WRITE_SAME has only one segment */
if (bio_op(bio) == REQ_OP_WRITE_SAME)
break;
}
return 0;
}
@@ -1626,6 +1666,7 @@ static u32 bio_flags_to_wire(struct drbd_connection *connection,
return (bio->bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) |
(bio->bi_rw & REQ_FUA ? DP_FUA : 0) |
(bio->bi_rw & REQ_PREFLUSH ? DP_FLUSH : 0) |
(bio_op(bio) == REQ_OP_WRITE_SAME ? DP_WSAME : 0) |
(bio_op(bio) == REQ_OP_DISCARD ? DP_DISCARD : 0);
else
return bio->bi_rw & REQ_SYNC ? DP_RW_SYNC : 0;
@@ -1639,6 +1680,8 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
struct drbd_device *device = peer_device->device;
struct drbd_socket *sock;
struct p_data *p;
struct p_wsame *wsame = NULL;
void *digest_out;
unsigned int dp_flags = 0;
int digest_size;
int err;
@@ -1674,12 +1717,29 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
err = __send_command(peer_device->connection, device->vnr, sock, P_TRIM, sizeof(*t), NULL, 0);
goto out;
}
if (dp_flags & DP_WSAME) {
/* this will only work if DRBD_FF_WSAME is set AND the
* handshake agreed that all nodes and backend devices are
* WRITE_SAME capable and agree on logical_block_size */
wsame = (struct p_wsame*)p;
digest_out = wsame + 1;
wsame->size = cpu_to_be32(req->i.size);
} else
digest_out = p + 1;
/* our digest is still only over the payload.
* TRIM does not carry any payload. */
if (digest_size)
drbd_csum_bio(peer_device->connection->integrity_tfm, req->master_bio, p + 1);
err = __send_command(peer_device->connection, device->vnr, sock, P_DATA, sizeof(*p) + digest_size, NULL, req->i.size);
drbd_csum_bio(peer_device->connection->integrity_tfm, req->master_bio, digest_out);
if (wsame) {
err =
__send_command(peer_device->connection, device->vnr, sock, P_WSAME,
sizeof(*wsame) + digest_size, NULL,
bio_iovec(req->master_bio).bv_len);
} else
err =
__send_command(peer_device->connection, device->vnr, sock, P_DATA,
sizeof(*p) + digest_size, NULL, req->i.size);
if (!err) {
/* For protocol A, we have to memcpy the payload into
* socket buffers, as we may complete right away
@@ -3660,6 +3720,8 @@ const char *cmdname(enum drbd_packet cmd)
* one PRO_VERSION */
static const char *cmdnames[] = {
[P_DATA] = "Data",
[P_WSAME] = "WriteSame",
[P_TRIM] = "Trim",
[P_DATA_REPLY] = "DataReply",
[P_RS_DATA_REPLY] = "RSDataReply",
[P_BARRIER] = "Barrier",