Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 1 | /* |
| 2 | * QEMU paravirtual RDMA - QP implementation |
| 3 | * |
| 4 | * Copyright (C) 2018 Oracle |
| 5 | * Copyright (C) 2018 Red Hat Inc |
| 6 | * |
| 7 | * Authors: |
| 8 | * Yuval Shaia <yuval.shaia@oracle.com> |
| 9 | * Marcel Apfelbaum <marcel@redhat.com> |
| 10 | * |
| 11 | * This work is licensed under the terms of the GNU GPL, version 2 or later. |
| 12 | * See the COPYING file in the top-level directory. |
| 13 | * |
| 14 | */ |
| 15 | |
Michael S. Tsirkin | 0efc951 | 2018-03-21 17:22:07 +0200 | [diff] [blame] | 16 | #include "qemu/osdep.h" |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 17 | |
| 18 | #include "../rdma_utils.h" |
| 19 | #include "../rdma_rm.h" |
| 20 | #include "../rdma_backend.h" |
| 21 | |
Yuval Shaia | 4d71b38 | 2019-03-11 03:29:05 -0700 | [diff] [blame] | 22 | #include "trace.h" |
| 23 | |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 24 | #include "pvrdma.h" |
Michael S. Tsirkin | 0efc951 | 2018-03-21 17:22:07 +0200 | [diff] [blame] | 25 | #include "standard-headers/rdma/vmw_pvrdma-abi.h" |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 26 | #include "pvrdma_qp_ops.h" |
| 27 | |
| 28 | typedef struct CompHandlerCtx { |
| 29 | PVRDMADev *dev; |
| 30 | uint32_t cq_handle; |
| 31 | struct pvrdma_cqe cqe; |
| 32 | } CompHandlerCtx; |
| 33 | |
| 34 | /* Send Queue WQE */ |
| 35 | typedef struct PvrdmaSqWqe { |
| 36 | struct pvrdma_sq_wqe_hdr hdr; |
Philippe Mathieu-Daudé | f7795e4 | 2020-03-04 16:38:15 +0100 | [diff] [blame] | 37 | struct pvrdma_sge sge[]; |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 38 | } PvrdmaSqWqe; |
| 39 | |
| 40 | /* Recv Queue WQE */ |
| 41 | typedef struct PvrdmaRqWqe { |
| 42 | struct pvrdma_rq_wqe_hdr hdr; |
Philippe Mathieu-Daudé | f7795e4 | 2020-03-04 16:38:15 +0100 | [diff] [blame] | 43 | struct pvrdma_sge sge[]; |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 44 | } PvrdmaRqWqe; |
| 45 | |
| 46 | /* |
| 47 | * 1. Put CQE on send CQ ring |
| 48 | * 2. Put CQ number on dsr completion ring |
| 49 | * 3. Interrupt host |
| 50 | */ |
| 51 | static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle, |
Yuval Shaia | eaac010 | 2018-12-21 16:40:30 +0200 | [diff] [blame] | 52 | struct pvrdma_cqe *cqe, struct ibv_wc *wc) |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 53 | { |
| 54 | struct pvrdma_cqe *cqe1; |
| 55 | struct pvrdma_cqne *cqne; |
| 56 | PvrdmaRing *ring; |
| 57 | RdmaRmCQ *cq = rdma_rm_get_cq(&dev->rdma_dev_res, cq_handle); |
| 58 | |
| 59 | if (unlikely(!cq)) { |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 60 | return -EINVAL; |
| 61 | } |
| 62 | |
| 63 | ring = (PvrdmaRing *)cq->opaque; |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 64 | |
| 65 | /* Step #1: Put CQE on CQ ring */ |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 66 | cqe1 = pvrdma_ring_next_elem_write(ring); |
| 67 | if (unlikely(!cqe1)) { |
| 68 | return -EINVAL; |
| 69 | } |
| 70 | |
Yuval Shaia | eca0f2a | 2018-08-05 18:35:09 +0300 | [diff] [blame] | 71 | memset(cqe1, 0, sizeof(*cqe1)); |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 72 | cqe1->wr_id = cqe->wr_id; |
Kamal Heib | 355b7cf | 2019-04-03 14:33:43 +0300 | [diff] [blame] | 73 | cqe1->qp = cqe->qp ? cqe->qp : wc->qp_num; |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 74 | cqe1->opcode = cqe->opcode; |
Yuval Shaia | eaac010 | 2018-12-21 16:40:30 +0200 | [diff] [blame] | 75 | cqe1->status = wc->status; |
| 76 | cqe1->byte_len = wc->byte_len; |
| 77 | cqe1->src_qp = wc->src_qp; |
| 78 | cqe1->wc_flags = wc->wc_flags; |
| 79 | cqe1->vendor_err = wc->vendor_err; |
| 80 | |
Yuval Shaia | 4d71b38 | 2019-03-11 03:29:05 -0700 | [diff] [blame] | 81 | trace_pvrdma_post_cqe(cq_handle, cq->notify, cqe1->wr_id, cqe1->qp, |
| 82 | cqe1->opcode, cqe1->status, cqe1->byte_len, |
| 83 | cqe1->src_qp, cqe1->wc_flags, cqe1->vendor_err); |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 84 | |
| 85 | pvrdma_ring_write_inc(ring); |
| 86 | |
| 87 | /* Step #2: Put CQ number on dsr completion ring */ |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 88 | cqne = pvrdma_ring_next_elem_write(&dev->dsr_info.cq); |
| 89 | if (unlikely(!cqne)) { |
| 90 | return -EINVAL; |
| 91 | } |
| 92 | |
| 93 | cqne->info = cq_handle; |
| 94 | pvrdma_ring_write_inc(&dev->dsr_info.cq); |
| 95 | |
Yuval Shaia | 4082e53 | 2018-12-21 16:40:16 +0200 | [diff] [blame] | 96 | if (cq->notify != CNT_CLEAR) { |
| 97 | if (cq->notify == CNT_ARM) { |
| 98 | cq->notify = CNT_CLEAR; |
| 99 | } |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 100 | post_interrupt(dev, INTR_VEC_CMD_COMPLETION_Q); |
| 101 | } |
| 102 | |
| 103 | return 0; |
| 104 | } |
| 105 | |
Yuval Shaia | eaac010 | 2018-12-21 16:40:30 +0200 | [diff] [blame] | 106 | static void pvrdma_qp_ops_comp_handler(void *ctx, struct ibv_wc *wc) |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 107 | { |
| 108 | CompHandlerCtx *comp_ctx = (CompHandlerCtx *)ctx; |
| 109 | |
Yuval Shaia | eaac010 | 2018-12-21 16:40:30 +0200 | [diff] [blame] | 110 | pvrdma_post_cqe(comp_ctx->dev, comp_ctx->cq_handle, &comp_ctx->cqe, wc); |
| 111 | |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 112 | g_free(ctx); |
| 113 | } |
| 114 | |
Yuval Shaia | ffef477 | 2019-01-09 21:41:23 +0200 | [diff] [blame] | 115 | static void complete_with_error(uint32_t vendor_err, void *ctx) |
| 116 | { |
Kamal Heib | a421c81 | 2019-03-14 17:30:30 +0200 | [diff] [blame] | 117 | struct ibv_wc wc = {}; |
Yuval Shaia | ffef477 | 2019-01-09 21:41:23 +0200 | [diff] [blame] | 118 | |
| 119 | wc.status = IBV_WC_GENERAL_ERR; |
| 120 | wc.vendor_err = vendor_err; |
| 121 | |
| 122 | pvrdma_qp_ops_comp_handler(ctx, &wc); |
| 123 | } |
| 124 | |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 125 | void pvrdma_qp_ops_fini(void) |
| 126 | { |
| 127 | rdma_backend_unregister_comp_handler(); |
| 128 | } |
| 129 | |
| 130 | int pvrdma_qp_ops_init(void) |
| 131 | { |
| 132 | rdma_backend_register_comp_handler(pvrdma_qp_ops_comp_handler); |
| 133 | |
| 134 | return 0; |
| 135 | } |
| 136 | |
Yuval Shaia | 5bb8b73 | 2019-01-09 22:21:40 +0200 | [diff] [blame] | 137 | void pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle) |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 138 | { |
| 139 | RdmaRmQP *qp; |
| 140 | PvrdmaSqWqe *wqe; |
| 141 | PvrdmaRing *ring; |
Yuval Shaia | 2b05705 | 2018-12-21 16:40:25 +0200 | [diff] [blame] | 142 | int sgid_idx; |
| 143 | union ibv_gid *sgid; |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 144 | |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 145 | qp = rdma_rm_get_qp(&dev->rdma_dev_res, qp_handle); |
| 146 | if (unlikely(!qp)) { |
Yuval Shaia | 5bb8b73 | 2019-01-09 22:21:40 +0200 | [diff] [blame] | 147 | return; |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 148 | } |
| 149 | |
| 150 | ring = (PvrdmaRing *)qp->opaque; |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 151 | |
Markus Armbruster | 3d55833 | 2022-11-23 14:38:11 +0100 | [diff] [blame] | 152 | wqe = pvrdma_ring_next_elem_read(ring); |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 153 | while (wqe) { |
| 154 | CompHandlerCtx *comp_ctx; |
| 155 | |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 156 | /* Prepare CQE */ |
Markus Armbruster | b21e238 | 2022-03-15 15:41:56 +0100 | [diff] [blame] | 157 | comp_ctx = g_new(CompHandlerCtx, 1); |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 158 | comp_ctx->dev = dev; |
| 159 | comp_ctx->cq_handle = qp->send_cq_handle; |
| 160 | comp_ctx->cqe.wr_id = wqe->hdr.wr_id; |
| 161 | comp_ctx->cqe.qp = qp_handle; |
Yuval Shaia | 1625bb1 | 2018-12-21 16:40:23 +0200 | [diff] [blame] | 162 | comp_ctx->cqe.opcode = IBV_WC_SEND; |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 163 | |
Yuval Shaia | 2b05705 | 2018-12-21 16:40:25 +0200 | [diff] [blame] | 164 | sgid = rdma_rm_get_gid(&dev->rdma_dev_res, wqe->hdr.wr.ud.av.gid_index); |
| 165 | if (!sgid) { |
Yuval Shaia | 4d71b38 | 2019-03-11 03:29:05 -0700 | [diff] [blame] | 166 | rdma_error_report("Failed to get gid for idx %d", |
| 167 | wqe->hdr.wr.ud.av.gid_index); |
Yuval Shaia | 26fd869 | 2019-01-09 22:15:59 +0200 | [diff] [blame] | 168 | complete_with_error(VENDOR_ERR_INV_GID_IDX, comp_ctx); |
| 169 | continue; |
Yuval Shaia | 2b05705 | 2018-12-21 16:40:25 +0200 | [diff] [blame] | 170 | } |
Yuval Shaia | 2b05705 | 2018-12-21 16:40:25 +0200 | [diff] [blame] | 171 | |
| 172 | sgid_idx = rdma_rm_get_backend_gid_index(&dev->rdma_dev_res, |
| 173 | &dev->backend_dev, |
| 174 | wqe->hdr.wr.ud.av.gid_index); |
| 175 | if (sgid_idx <= 0) { |
Yuval Shaia | 4d71b38 | 2019-03-11 03:29:05 -0700 | [diff] [blame] | 176 | rdma_error_report("Failed to get bk sgid_idx for sgid_idx %d", |
| 177 | wqe->hdr.wr.ud.av.gid_index); |
Yuval Shaia | 26fd869 | 2019-01-09 22:15:59 +0200 | [diff] [blame] | 178 | complete_with_error(VENDOR_ERR_INV_GID_IDX, comp_ctx); |
| 179 | continue; |
Yuval Shaia | 2b05705 | 2018-12-21 16:40:25 +0200 | [diff] [blame] | 180 | } |
| 181 | |
Yuval Shaia | ffef477 | 2019-01-09 21:41:23 +0200 | [diff] [blame] | 182 | if (wqe->hdr.num_sge > dev->dev_attr.max_sge) { |
Yuval Shaia | 4d71b38 | 2019-03-11 03:29:05 -0700 | [diff] [blame] | 183 | rdma_error_report("Invalid num_sge=%d (max %d)", wqe->hdr.num_sge, |
| 184 | dev->dev_attr.max_sge); |
Yuval Shaia | ffef477 | 2019-01-09 21:41:23 +0200 | [diff] [blame] | 185 | complete_with_error(VENDOR_ERR_INV_NUM_SGE, comp_ctx); |
| 186 | continue; |
| 187 | } |
| 188 | |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 189 | rdma_backend_post_send(&dev->backend_dev, &qp->backend_qp, qp->qp_type, |
| 190 | (struct ibv_sge *)&wqe->sge[0], wqe->hdr.num_sge, |
Yuval Shaia | 2b05705 | 2018-12-21 16:40:25 +0200 | [diff] [blame] | 191 | sgid_idx, sgid, |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 192 | (union ibv_gid *)wqe->hdr.wr.ud.av.dgid, |
| 193 | wqe->hdr.wr.ud.remote_qpn, |
| 194 | wqe->hdr.wr.ud.remote_qkey, comp_ctx); |
| 195 | |
| 196 | pvrdma_ring_read_inc(ring); |
| 197 | |
| 198 | wqe = pvrdma_ring_next_elem_read(ring); |
| 199 | } |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 200 | } |
| 201 | |
Yuval Shaia | 5bb8b73 | 2019-01-09 22:21:40 +0200 | [diff] [blame] | 202 | void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle) |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 203 | { |
| 204 | RdmaRmQP *qp; |
| 205 | PvrdmaRqWqe *wqe; |
| 206 | PvrdmaRing *ring; |
| 207 | |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 208 | qp = rdma_rm_get_qp(&dev->rdma_dev_res, qp_handle); |
| 209 | if (unlikely(!qp)) { |
Yuval Shaia | 5bb8b73 | 2019-01-09 22:21:40 +0200 | [diff] [blame] | 210 | return; |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 211 | } |
| 212 | |
| 213 | ring = &((PvrdmaRing *)qp->opaque)[1]; |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 214 | |
Markus Armbruster | 3d55833 | 2022-11-23 14:38:11 +0100 | [diff] [blame] | 215 | wqe = pvrdma_ring_next_elem_read(ring); |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 216 | while (wqe) { |
| 217 | CompHandlerCtx *comp_ctx; |
| 218 | |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 219 | /* Prepare CQE */ |
Markus Armbruster | b21e238 | 2022-03-15 15:41:56 +0100 | [diff] [blame] | 220 | comp_ctx = g_new(CompHandlerCtx, 1); |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 221 | comp_ctx->dev = dev; |
| 222 | comp_ctx->cq_handle = qp->recv_cq_handle; |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 223 | comp_ctx->cqe.wr_id = wqe->hdr.wr_id; |
Yuval Shaia | 2bff59e | 2018-12-21 16:40:22 +0200 | [diff] [blame] | 224 | comp_ctx->cqe.qp = qp_handle; |
| 225 | comp_ctx->cqe.opcode = IBV_WC_RECV; |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 226 | |
Yuval Shaia | ffef477 | 2019-01-09 21:41:23 +0200 | [diff] [blame] | 227 | if (wqe->hdr.num_sge > dev->dev_attr.max_sge) { |
Yuval Shaia | 4d71b38 | 2019-03-11 03:29:05 -0700 | [diff] [blame] | 228 | rdma_error_report("Invalid num_sge=%d (max %d)", wqe->hdr.num_sge, |
| 229 | dev->dev_attr.max_sge); |
Yuval Shaia | ffef477 | 2019-01-09 21:41:23 +0200 | [diff] [blame] | 230 | complete_with_error(VENDOR_ERR_INV_NUM_SGE, comp_ctx); |
| 231 | continue; |
| 232 | } |
| 233 | |
Yuval Shaia | 3c890bc | 2019-03-11 03:29:12 -0700 | [diff] [blame] | 234 | rdma_backend_post_recv(&dev->backend_dev, &qp->backend_qp, qp->qp_type, |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 235 | (struct ibv_sge *)&wqe->sge[0], wqe->hdr.num_sge, |
| 236 | comp_ctx); |
| 237 | |
| 238 | pvrdma_ring_read_inc(ring); |
| 239 | |
| 240 | wqe = pvrdma_ring_next_elem_read(ring); |
| 241 | } |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 242 | } |
| 243 | |
Kamal Heib | 355b7cf | 2019-04-03 14:33:43 +0300 | [diff] [blame] | 244 | void pvrdma_srq_recv(PVRDMADev *dev, uint32_t srq_handle) |
| 245 | { |
| 246 | RdmaRmSRQ *srq; |
| 247 | PvrdmaRqWqe *wqe; |
| 248 | PvrdmaRing *ring; |
| 249 | |
| 250 | srq = rdma_rm_get_srq(&dev->rdma_dev_res, srq_handle); |
| 251 | if (unlikely(!srq)) { |
| 252 | return; |
| 253 | } |
| 254 | |
| 255 | ring = (PvrdmaRing *)srq->opaque; |
| 256 | |
Markus Armbruster | 3d55833 | 2022-11-23 14:38:11 +0100 | [diff] [blame] | 257 | wqe = pvrdma_ring_next_elem_read(ring); |
Kamal Heib | 355b7cf | 2019-04-03 14:33:43 +0300 | [diff] [blame] | 258 | while (wqe) { |
| 259 | CompHandlerCtx *comp_ctx; |
| 260 | |
| 261 | /* Prepare CQE */ |
Markus Armbruster | b21e238 | 2022-03-15 15:41:56 +0100 | [diff] [blame] | 262 | comp_ctx = g_new(CompHandlerCtx, 1); |
Kamal Heib | 355b7cf | 2019-04-03 14:33:43 +0300 | [diff] [blame] | 263 | comp_ctx->dev = dev; |
| 264 | comp_ctx->cq_handle = srq->recv_cq_handle; |
| 265 | comp_ctx->cqe.wr_id = wqe->hdr.wr_id; |
| 266 | comp_ctx->cqe.qp = 0; |
| 267 | comp_ctx->cqe.opcode = IBV_WC_RECV; |
| 268 | |
| 269 | if (wqe->hdr.num_sge > dev->dev_attr.max_sge) { |
| 270 | rdma_error_report("Invalid num_sge=%d (max %d)", wqe->hdr.num_sge, |
| 271 | dev->dev_attr.max_sge); |
| 272 | complete_with_error(VENDOR_ERR_INV_NUM_SGE, comp_ctx); |
| 273 | continue; |
| 274 | } |
| 275 | |
| 276 | rdma_backend_post_srq_recv(&dev->backend_dev, &srq->backend_srq, |
| 277 | (struct ibv_sge *)&wqe->sge[0], |
| 278 | wqe->hdr.num_sge, |
| 279 | comp_ctx); |
| 280 | |
| 281 | pvrdma_ring_read_inc(ring); |
| 282 | |
| 283 | wqe = pvrdma_ring_next_elem_read(ring); |
| 284 | } |
| 285 | |
| 286 | } |
| 287 | |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 288 | void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle) |
| 289 | { |
| 290 | RdmaRmCQ *cq; |
| 291 | |
| 292 | cq = rdma_rm_get_cq(dev_res, cq_handle); |
| 293 | if (!cq) { |
Marcel Apfelbaum | b0197cf | 2018-04-30 23:02:18 +0300 | [diff] [blame] | 294 | return; |
Yuval Shaia | 98d176f | 2018-02-09 15:39:19 +0200 | [diff] [blame] | 295 | } |
| 296 | |
| 297 | rdma_backend_poll_cq(dev_res, &cq->backend_cq); |
| 298 | } |