IB/iser: Support T10-PI operations

Add logic to initialize protection information entities.  Upon each
iSCSI task, we keep the scsi_cmnd in order to query the scsi
protection operations and reference to protection buffers.

Modify iser_fast_reg_mr to receive indication whether it is
registering the data or protection buffers.

In addition introduce iser_reg_sig_mr which performs fast registration
work-request for a signature enabled memory region
(IB_WR_REG_SIG_MR).  In this routine we set all the protection
relevants for the device to offload protection data-transfer and
verification.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Alex Tabachnik <alext@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
This commit is contained in:
Sagi Grimberg 2014-03-05 19:43:48 +02:00 committed by Roland Dreier
parent 6b5a8fb0d2
commit 177e31bd5a
4 changed files with 304 additions and 27 deletions

View file

@ -184,6 +184,8 @@ iscsi_iser_task_init(struct iscsi_task *task)
iser_task->command_sent = 0;
iser_task_rdma_init(iser_task);
iser_task->sc = task->sc;
return 0;
}

View file

@ -46,6 +46,8 @@
#include <linux/printk.h>
#include <scsi/libiscsi.h>
#include <scsi/scsi_transport_iscsi.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_device.h>
#include <linux/interrupt.h>
#include <linux/wait.h>
@ -289,6 +291,10 @@ struct iser_device {
enum iser_data_dir cmd_dir);
};
#define ISER_CHECK_GUARD 0xc0
#define ISER_CHECK_REFTAG 0x0f
#define ISER_CHECK_APPTAG 0x30
enum iser_reg_indicator {
ISER_DATA_KEY_VALID = 1 << 0,
ISER_PROT_KEY_VALID = 1 << 1,
@ -361,11 +367,14 @@ struct iscsi_iser_task {
struct iser_tx_desc desc;
struct iscsi_iser_conn *iser_conn;
enum iser_task_status status;
struct scsi_cmnd *sc;
int command_sent; /* set if command sent */
int dir[ISER_DIRS_NUM]; /* set if dir use*/
struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */
struct iser_data_buf data[ISER_DIRS_NUM]; /* orig. data des*/
struct iser_data_buf data_copy[ISER_DIRS_NUM];/* contig. copy */
struct iser_data_buf prot[ISER_DIRS_NUM]; /* prot desc */
struct iser_data_buf prot_copy[ISER_DIRS_NUM];/* prot copy */
};
struct iser_page_vec {

View file

@ -62,6 +62,17 @@ static int iser_prepare_read_cmd(struct iscsi_task *task,
if (err)
return err;
if (scsi_prot_sg_count(iser_task->sc)) {
struct iser_data_buf *pbuf_in = &iser_task->prot[ISER_DIR_IN];
err = iser_dma_map_task_data(iser_task,
pbuf_in,
ISER_DIR_IN,
DMA_FROM_DEVICE);
if (err)
return err;
}
if (edtl > iser_task->data[ISER_DIR_IN].data_len) {
iser_err("Total data length: %ld, less than EDTL: "
"%d, in READ cmd BHS itt: %d, conn: 0x%p\n",
@ -113,6 +124,17 @@ iser_prepare_write_cmd(struct iscsi_task *task,
if (err)
return err;
if (scsi_prot_sg_count(iser_task->sc)) {
struct iser_data_buf *pbuf_out = &iser_task->prot[ISER_DIR_OUT];
err = iser_dma_map_task_data(iser_task,
pbuf_out,
ISER_DIR_OUT,
DMA_TO_DEVICE);
if (err)
return err;
}
if (edtl > iser_task->data[ISER_DIR_OUT].data_len) {
iser_err("Total data length: %ld, less than EDTL: %d, "
"in WRITE cmd BHS itt: %d, conn: 0x%p\n",
@ -368,7 +390,7 @@ int iser_send_command(struct iscsi_conn *conn,
struct iscsi_iser_task *iser_task = task->dd_data;
unsigned long edtl;
int err;
struct iser_data_buf *data_buf;
struct iser_data_buf *data_buf, *prot_buf;
struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr;
struct scsi_cmnd *sc = task->sc;
struct iser_tx_desc *tx_desc = &iser_task->desc;
@ -379,18 +401,26 @@ int iser_send_command(struct iscsi_conn *conn,
tx_desc->type = ISCSI_TX_SCSI_COMMAND;
iser_create_send_desc(iser_conn->ib_conn, tx_desc);
if (hdr->flags & ISCSI_FLAG_CMD_READ)
if (hdr->flags & ISCSI_FLAG_CMD_READ) {
data_buf = &iser_task->data[ISER_DIR_IN];
else
prot_buf = &iser_task->prot[ISER_DIR_IN];
} else {
data_buf = &iser_task->data[ISER_DIR_OUT];
prot_buf = &iser_task->prot[ISER_DIR_OUT];
}
if (scsi_sg_count(sc)) { /* using a scatter list */
data_buf->buf = scsi_sglist(sc);
data_buf->size = scsi_sg_count(sc);
}
data_buf->data_len = scsi_bufflen(sc);
if (scsi_prot_sg_count(sc)) {
prot_buf->buf = scsi_prot_sglist(sc);
prot_buf->size = scsi_prot_sg_count(sc);
prot_buf->data_len = sc->prot_sdb->length;
}
if (hdr->flags & ISCSI_FLAG_CMD_READ) {
err = iser_prepare_read_cmd(task, edtl);
if (err)
@ -635,6 +665,9 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
iser_task->data[ISER_DIR_IN].data_len = 0;
iser_task->data[ISER_DIR_OUT].data_len = 0;
iser_task->prot[ISER_DIR_IN].data_len = 0;
iser_task->prot[ISER_DIR_OUT].data_len = 0;
memset(&iser_task->rdma_regd[ISER_DIR_IN], 0,
sizeof(struct iser_regd_buf));
memset(&iser_task->rdma_regd[ISER_DIR_OUT], 0,
@ -645,6 +678,8 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
{
struct iser_device *device = iser_task->iser_conn->ib_conn->device;
int is_rdma_data_aligned = 1;
int is_rdma_prot_aligned = 1;
int prot_count = scsi_prot_sg_count(iser_task->sc);
/* if we were reading, copy back to unaligned sglist,
* anyway dma_unmap and free the copy
@ -665,12 +700,30 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
ISER_DIR_OUT);
}
if (iser_task->prot_copy[ISER_DIR_IN].copy_buf != NULL) {
is_rdma_prot_aligned = 0;
iser_finalize_rdma_unaligned_sg(iser_task,
&iser_task->prot[ISER_DIR_IN],
&iser_task->prot_copy[ISER_DIR_IN],
ISER_DIR_IN);
}
if (iser_task->prot_copy[ISER_DIR_OUT].copy_buf != NULL) {
is_rdma_prot_aligned = 0;
iser_finalize_rdma_unaligned_sg(iser_task,
&iser_task->prot[ISER_DIR_OUT],
&iser_task->prot_copy[ISER_DIR_OUT],
ISER_DIR_OUT);
}
if (iser_task->dir[ISER_DIR_IN]) {
device->iser_unreg_rdma_mem(iser_task, ISER_DIR_IN);
if (is_rdma_data_aligned)
iser_dma_unmap_task_data(iser_task,
&iser_task->data[ISER_DIR_IN]);
if (prot_count && is_rdma_prot_aligned)
iser_dma_unmap_task_data(iser_task,
&iser_task->prot[ISER_DIR_IN]);
}
if (iser_task->dir[ISER_DIR_OUT]) {

View file

@ -440,15 +440,180 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
return 0;
}
static inline enum ib_t10_dif_type
scsi2ib_prot_type(unsigned char prot_type)
{
switch (prot_type) {
case SCSI_PROT_DIF_TYPE0:
return IB_T10DIF_NONE;
case SCSI_PROT_DIF_TYPE1:
return IB_T10DIF_TYPE1;
case SCSI_PROT_DIF_TYPE2:
return IB_T10DIF_TYPE2;
case SCSI_PROT_DIF_TYPE3:
return IB_T10DIF_TYPE3;
default:
return IB_T10DIF_NONE;
}
}
static int
iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs)
{
unsigned char scsi_ptype = scsi_get_prot_type(sc);
sig_attrs->mem.sig_type = IB_SIG_TYPE_T10_DIF;
sig_attrs->wire.sig_type = IB_SIG_TYPE_T10_DIF;
sig_attrs->mem.sig.dif.pi_interval = sc->device->sector_size;
sig_attrs->wire.sig.dif.pi_interval = sc->device->sector_size;
switch (scsi_get_prot_op(sc)) {
case SCSI_PROT_WRITE_INSERT:
case SCSI_PROT_READ_STRIP:
sig_attrs->mem.sig.dif.type = IB_T10DIF_NONE;
sig_attrs->wire.sig.dif.type = scsi2ib_prot_type(scsi_ptype);
sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
sig_attrs->wire.sig.dif.ref_tag = scsi_get_lba(sc) &
0xffffffff;
break;
case SCSI_PROT_READ_INSERT:
case SCSI_PROT_WRITE_STRIP:
sig_attrs->mem.sig.dif.type = scsi2ib_prot_type(scsi_ptype);
sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC;
sig_attrs->mem.sig.dif.ref_tag = scsi_get_lba(sc) &
0xffffffff;
sig_attrs->wire.sig.dif.type = IB_T10DIF_NONE;
break;
case SCSI_PROT_READ_PASS:
case SCSI_PROT_WRITE_PASS:
sig_attrs->mem.sig.dif.type = scsi2ib_prot_type(scsi_ptype);
sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC;
sig_attrs->mem.sig.dif.ref_tag = scsi_get_lba(sc) &
0xffffffff;
sig_attrs->wire.sig.dif.type = scsi2ib_prot_type(scsi_ptype);
sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
sig_attrs->wire.sig.dif.ref_tag = scsi_get_lba(sc) &
0xffffffff;
break;
default:
iser_err("Unsupported PI operation %d\n",
scsi_get_prot_op(sc));
return -EINVAL;
}
return 0;
}
static int
iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
{
switch (scsi_get_prot_type(sc)) {
case SCSI_PROT_DIF_TYPE0:
*mask = 0x0;
break;
case SCSI_PROT_DIF_TYPE1:
case SCSI_PROT_DIF_TYPE2:
*mask = ISER_CHECK_GUARD | ISER_CHECK_REFTAG;
break;
case SCSI_PROT_DIF_TYPE3:
*mask = ISER_CHECK_GUARD;
break;
default:
iser_err("Unsupported protection type %d\n",
scsi_get_prot_type(sc));
return -EINVAL;
}
return 0;
}
static int
iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
struct fast_reg_descriptor *desc, struct ib_sge *data_sge,
struct ib_sge *prot_sge, struct ib_sge *sig_sge)
{
struct iser_conn *iser_conn = iser_task->iser_conn->ib_conn;
struct iser_pi_context *pi_ctx = desc->pi_ctx;
struct ib_send_wr sig_wr, inv_wr;
struct ib_send_wr *bad_wr, *wr = NULL;
struct ib_sig_attrs sig_attrs;
int ret;
u32 key;
memset(&sig_attrs, 0, sizeof(sig_attrs));
ret = iser_set_sig_attrs(iser_task->sc, &sig_attrs);
if (ret)
goto err;
ret = iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask);
if (ret)
goto err;
if (!(desc->reg_indicators & ISER_SIG_KEY_VALID)) {
memset(&inv_wr, 0, sizeof(inv_wr));
inv_wr.opcode = IB_WR_LOCAL_INV;
inv_wr.wr_id = ISER_FASTREG_LI_WRID;
inv_wr.ex.invalidate_rkey = pi_ctx->sig_mr->rkey;
wr = &inv_wr;
/* Bump the key */
key = (u8)(pi_ctx->sig_mr->rkey & 0x000000FF);
ib_update_fast_reg_key(pi_ctx->sig_mr, ++key);
}
memset(&sig_wr, 0, sizeof(sig_wr));
sig_wr.opcode = IB_WR_REG_SIG_MR;
sig_wr.wr_id = ISER_FASTREG_LI_WRID;
sig_wr.sg_list = data_sge;
sig_wr.num_sge = 1;
sig_wr.wr.sig_handover.sig_attrs = &sig_attrs;
sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr;
if (scsi_prot_sg_count(iser_task->sc))
sig_wr.wr.sig_handover.prot = prot_sge;
sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_READ |
IB_ACCESS_REMOTE_WRITE;
if (!wr)
wr = &sig_wr;
else
wr->next = &sig_wr;
ret = ib_post_send(iser_conn->qp, wr, &bad_wr);
if (ret) {
iser_err("reg_sig_mr failed, ret:%d\n", ret);
goto err;
}
desc->reg_indicators &= ~ISER_SIG_KEY_VALID;
sig_sge->lkey = pi_ctx->sig_mr->lkey;
sig_sge->addr = 0;
sig_sge->length = data_sge->length + prot_sge->length;
if (scsi_get_prot_op(iser_task->sc) == SCSI_PROT_WRITE_INSERT ||
scsi_get_prot_op(iser_task->sc) == SCSI_PROT_READ_STRIP) {
sig_sge->length += (data_sge->length /
iser_task->sc->device->sector_size) * 8;
}
iser_dbg("sig_sge: addr: 0x%llx length: %u lkey: 0x%x\n",
sig_sge->addr, sig_sge->length,
sig_sge->lkey);
err:
return ret;
}
static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
struct iser_regd_buf *regd_buf,
struct iser_data_buf *mem,
enum iser_reg_indicator ind,
struct ib_sge *sge)
{
struct fast_reg_descriptor *desc = regd_buf->reg.mem_h;
struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn;
struct iser_device *device = ib_conn->device;
struct ib_device *ibdev = device->ib_device;
struct ib_mr *mr;
struct ib_fast_reg_page_list *frpl;
struct ib_send_wr fastreg_wr, inv_wr;
struct ib_send_wr *bad_wr, *wr = NULL;
u8 key;
@ -467,35 +632,42 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
return 0;
}
plen = iser_sg_to_page_vec(mem, device->ib_device,
desc->data_frpl->page_list,
if (ind == ISER_DATA_KEY_VALID) {
mr = desc->data_mr;
frpl = desc->data_frpl;
} else {
mr = desc->pi_ctx->prot_mr;
frpl = desc->pi_ctx->prot_frpl;
}
plen = iser_sg_to_page_vec(mem, device->ib_device, frpl->page_list,
&offset, &size);
if (plen * SIZE_4K < size) {
iser_err("fast reg page_list too short to hold this SG\n");
return -EINVAL;
}
if (!(desc->reg_indicators & ISER_DATA_KEY_VALID)) {
if (!(desc->reg_indicators & ind)) {
memset(&inv_wr, 0, sizeof(inv_wr));
inv_wr.wr_id = ISER_FASTREG_LI_WRID;
inv_wr.opcode = IB_WR_LOCAL_INV;
inv_wr.ex.invalidate_rkey = desc->data_mr->rkey;
inv_wr.ex.invalidate_rkey = mr->rkey;
wr = &inv_wr;
/* Bump the key */
key = (u8)(desc->data_mr->rkey & 0x000000FF);
ib_update_fast_reg_key(desc->data_mr, ++key);
key = (u8)(mr->rkey & 0x000000FF);
ib_update_fast_reg_key(mr, ++key);
}
/* Prepare FASTREG WR */
memset(&fastreg_wr, 0, sizeof(fastreg_wr));
fastreg_wr.wr_id = ISER_FASTREG_LI_WRID;
fastreg_wr.opcode = IB_WR_FAST_REG_MR;
fastreg_wr.wr.fast_reg.iova_start = desc->data_frpl->page_list[0] + offset;
fastreg_wr.wr.fast_reg.page_list = desc->data_frpl;
fastreg_wr.wr.fast_reg.iova_start = frpl->page_list[0] + offset;
fastreg_wr.wr.fast_reg.page_list = frpl;
fastreg_wr.wr.fast_reg.page_list_len = plen;
fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K;
fastreg_wr.wr.fast_reg.length = size;
fastreg_wr.wr.fast_reg.rkey = desc->data_mr->rkey;
fastreg_wr.wr.fast_reg.rkey = mr->rkey;
fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_REMOTE_READ);
@ -510,10 +682,10 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
iser_err("fast registration failed, ret:%d\n", ret);
return ret;
}
desc->reg_indicators &= ~ISER_DATA_KEY_VALID;
desc->reg_indicators &= ~ind;
sge->lkey = desc->data_mr->lkey;
sge->addr = desc->data_frpl->page_list[0] + offset;
sge->lkey = mr->lkey;
sge->addr = frpl->page_list[0] + offset;
sge->length = size;
return ret;
@ -550,7 +722,8 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task,
mem = &iser_task->data_copy[cmd_dir];
}
if (mem->dma_nents != 1) {
if (mem->dma_nents != 1 ||
scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
spin_lock_irqsave(&ib_conn->lock, flags);
desc = list_first_entry(&ib_conn->fastreg.pool,
struct fast_reg_descriptor, list);
@ -559,21 +732,61 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task,
regd_buf->reg.mem_h = desc;
}
err = iser_fast_reg_mr(iser_task, regd_buf, mem, &data_sge);
err = iser_fast_reg_mr(iser_task, regd_buf, mem,
ISER_DATA_KEY_VALID, &data_sge);
if (err)
goto err_reg;
if (desc) {
regd_buf->reg.rkey = desc->data_mr->rkey;
if (scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
struct ib_sge prot_sge, sig_sge;
memset(&prot_sge, 0, sizeof(prot_sge));
if (scsi_prot_sg_count(iser_task->sc)) {
mem = &iser_task->prot[cmd_dir];
aligned_len = iser_data_buf_aligned_len(mem, ibdev);
if (aligned_len != mem->dma_nents) {
err = fall_to_bounce_buf(iser_task, ibdev, mem,
&iser_task->prot_copy[cmd_dir],
cmd_dir, aligned_len);
if (err) {
iser_err("failed to allocate bounce buffer\n");
return err;
}
mem = &iser_task->prot_copy[cmd_dir];
}
err = iser_fast_reg_mr(iser_task, regd_buf, mem,
ISER_PROT_KEY_VALID, &prot_sge);
if (err)
goto err_reg;
}
err = iser_reg_sig_mr(iser_task, desc, &data_sge,
&prot_sge, &sig_sge);
if (err) {
iser_err("Failed to register signature mr\n");
return err;
}
desc->reg_indicators |= ISER_FASTREG_PROTECTED;
regd_buf->reg.lkey = sig_sge.lkey;
regd_buf->reg.rkey = desc->pi_ctx->sig_mr->rkey;
regd_buf->reg.va = sig_sge.addr;
regd_buf->reg.len = sig_sge.length;
regd_buf->reg.is_mr = 1;
} else {
regd_buf->reg.rkey = device->mr->rkey;
regd_buf->reg.is_mr = 0;
}
if (desc) {
regd_buf->reg.rkey = desc->data_mr->rkey;
regd_buf->reg.is_mr = 1;
} else {
regd_buf->reg.rkey = device->mr->rkey;
regd_buf->reg.is_mr = 0;
}
regd_buf->reg.lkey = data_sge.lkey;
regd_buf->reg.va = data_sge.addr;
regd_buf->reg.len = data_sge.length;
regd_buf->reg.lkey = data_sge.lkey;
regd_buf->reg.va = data_sge.addr;
regd_buf->reg.len = data_sge.length;
}
return 0;
err_reg: