blob: c30c8344f67fffde64512e6c9dbeb2f3381085dc [file] [log] [blame]
Yuval Shaia98d176f2018-02-09 15:39:19 +02001/*
2 * QEMU paravirtual RDMA - QP implementation
3 *
4 * Copyright (C) 2018 Oracle
5 * Copyright (C) 2018 Red Hat Inc
6 *
7 * Authors:
8 * Yuval Shaia <yuval.shaia@oracle.com>
9 * Marcel Apfelbaum <marcel@redhat.com>
10 *
11 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12 * See the COPYING file in the top-level directory.
13 *
14 */
15
Michael S. Tsirkin0efc9512018-03-21 17:22:07 +020016#include "qemu/osdep.h"
Yuval Shaia98d176f2018-02-09 15:39:19 +020017
18#include "../rdma_utils.h"
19#include "../rdma_rm.h"
20#include "../rdma_backend.h"
21
Yuval Shaia4d71b382019-03-11 03:29:05 -070022#include "trace.h"
23
Yuval Shaia98d176f2018-02-09 15:39:19 +020024#include "pvrdma.h"
Michael S. Tsirkin0efc9512018-03-21 17:22:07 +020025#include "standard-headers/rdma/vmw_pvrdma-abi.h"
Yuval Shaia98d176f2018-02-09 15:39:19 +020026#include "pvrdma_qp_ops.h"
27
28typedef struct CompHandlerCtx {
29 PVRDMADev *dev;
30 uint32_t cq_handle;
31 struct pvrdma_cqe cqe;
32} CompHandlerCtx;
33
34/* Send Queue WQE */
35typedef struct PvrdmaSqWqe {
36 struct pvrdma_sq_wqe_hdr hdr;
Philippe Mathieu-Daudéf7795e42020-03-04 16:38:15 +010037 struct pvrdma_sge sge[];
Yuval Shaia98d176f2018-02-09 15:39:19 +020038} PvrdmaSqWqe;
39
40/* Recv Queue WQE */
41typedef struct PvrdmaRqWqe {
42 struct pvrdma_rq_wqe_hdr hdr;
Philippe Mathieu-Daudéf7795e42020-03-04 16:38:15 +010043 struct pvrdma_sge sge[];
Yuval Shaia98d176f2018-02-09 15:39:19 +020044} PvrdmaRqWqe;
45
46/*
47 * 1. Put CQE on send CQ ring
48 * 2. Put CQ number on dsr completion ring
49 * 3. Interrupt host
50 */
51static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle,
Yuval Shaiaeaac0102018-12-21 16:40:30 +020052 struct pvrdma_cqe *cqe, struct ibv_wc *wc)
Yuval Shaia98d176f2018-02-09 15:39:19 +020053{
54 struct pvrdma_cqe *cqe1;
55 struct pvrdma_cqne *cqne;
56 PvrdmaRing *ring;
57 RdmaRmCQ *cq = rdma_rm_get_cq(&dev->rdma_dev_res, cq_handle);
58
59 if (unlikely(!cq)) {
Yuval Shaia98d176f2018-02-09 15:39:19 +020060 return -EINVAL;
61 }
62
63 ring = (PvrdmaRing *)cq->opaque;
Yuval Shaia98d176f2018-02-09 15:39:19 +020064
65 /* Step #1: Put CQE on CQ ring */
Yuval Shaia98d176f2018-02-09 15:39:19 +020066 cqe1 = pvrdma_ring_next_elem_write(ring);
67 if (unlikely(!cqe1)) {
68 return -EINVAL;
69 }
70
Yuval Shaiaeca0f2a2018-08-05 18:35:09 +030071 memset(cqe1, 0, sizeof(*cqe1));
Yuval Shaia98d176f2018-02-09 15:39:19 +020072 cqe1->wr_id = cqe->wr_id;
Kamal Heib355b7cf2019-04-03 14:33:43 +030073 cqe1->qp = cqe->qp ? cqe->qp : wc->qp_num;
Yuval Shaia98d176f2018-02-09 15:39:19 +020074 cqe1->opcode = cqe->opcode;
Yuval Shaiaeaac0102018-12-21 16:40:30 +020075 cqe1->status = wc->status;
76 cqe1->byte_len = wc->byte_len;
77 cqe1->src_qp = wc->src_qp;
78 cqe1->wc_flags = wc->wc_flags;
79 cqe1->vendor_err = wc->vendor_err;
80
Yuval Shaia4d71b382019-03-11 03:29:05 -070081 trace_pvrdma_post_cqe(cq_handle, cq->notify, cqe1->wr_id, cqe1->qp,
82 cqe1->opcode, cqe1->status, cqe1->byte_len,
83 cqe1->src_qp, cqe1->wc_flags, cqe1->vendor_err);
Yuval Shaia98d176f2018-02-09 15:39:19 +020084
85 pvrdma_ring_write_inc(ring);
86
87 /* Step #2: Put CQ number on dsr completion ring */
Yuval Shaia98d176f2018-02-09 15:39:19 +020088 cqne = pvrdma_ring_next_elem_write(&dev->dsr_info.cq);
89 if (unlikely(!cqne)) {
90 return -EINVAL;
91 }
92
93 cqne->info = cq_handle;
94 pvrdma_ring_write_inc(&dev->dsr_info.cq);
95
Yuval Shaia4082e532018-12-21 16:40:16 +020096 if (cq->notify != CNT_CLEAR) {
97 if (cq->notify == CNT_ARM) {
98 cq->notify = CNT_CLEAR;
99 }
Yuval Shaia98d176f2018-02-09 15:39:19 +0200100 post_interrupt(dev, INTR_VEC_CMD_COMPLETION_Q);
101 }
102
103 return 0;
104}
105
Yuval Shaiaeaac0102018-12-21 16:40:30 +0200106static void pvrdma_qp_ops_comp_handler(void *ctx, struct ibv_wc *wc)
Yuval Shaia98d176f2018-02-09 15:39:19 +0200107{
108 CompHandlerCtx *comp_ctx = (CompHandlerCtx *)ctx;
109
Yuval Shaiaeaac0102018-12-21 16:40:30 +0200110 pvrdma_post_cqe(comp_ctx->dev, comp_ctx->cq_handle, &comp_ctx->cqe, wc);
111
Yuval Shaia98d176f2018-02-09 15:39:19 +0200112 g_free(ctx);
113}
114
Yuval Shaiaffef4772019-01-09 21:41:23 +0200115static void complete_with_error(uint32_t vendor_err, void *ctx)
116{
Kamal Heiba421c812019-03-14 17:30:30 +0200117 struct ibv_wc wc = {};
Yuval Shaiaffef4772019-01-09 21:41:23 +0200118
119 wc.status = IBV_WC_GENERAL_ERR;
120 wc.vendor_err = vendor_err;
121
122 pvrdma_qp_ops_comp_handler(ctx, &wc);
123}
124
Yuval Shaia98d176f2018-02-09 15:39:19 +0200125void pvrdma_qp_ops_fini(void)
126{
127 rdma_backend_unregister_comp_handler();
128}
129
130int pvrdma_qp_ops_init(void)
131{
132 rdma_backend_register_comp_handler(pvrdma_qp_ops_comp_handler);
133
134 return 0;
135}
136
Yuval Shaia5bb8b732019-01-09 22:21:40 +0200137void pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle)
Yuval Shaia98d176f2018-02-09 15:39:19 +0200138{
139 RdmaRmQP *qp;
140 PvrdmaSqWqe *wqe;
141 PvrdmaRing *ring;
Yuval Shaia2b057052018-12-21 16:40:25 +0200142 int sgid_idx;
143 union ibv_gid *sgid;
Yuval Shaia98d176f2018-02-09 15:39:19 +0200144
Yuval Shaia98d176f2018-02-09 15:39:19 +0200145 qp = rdma_rm_get_qp(&dev->rdma_dev_res, qp_handle);
146 if (unlikely(!qp)) {
Yuval Shaia5bb8b732019-01-09 22:21:40 +0200147 return;
Yuval Shaia98d176f2018-02-09 15:39:19 +0200148 }
149
150 ring = (PvrdmaRing *)qp->opaque;
Yuval Shaia98d176f2018-02-09 15:39:19 +0200151
Markus Armbruster3d558332022-11-23 14:38:11 +0100152 wqe = pvrdma_ring_next_elem_read(ring);
Yuval Shaia98d176f2018-02-09 15:39:19 +0200153 while (wqe) {
154 CompHandlerCtx *comp_ctx;
155
Yuval Shaia98d176f2018-02-09 15:39:19 +0200156 /* Prepare CQE */
Markus Armbrusterb21e2382022-03-15 15:41:56 +0100157 comp_ctx = g_new(CompHandlerCtx, 1);
Yuval Shaia98d176f2018-02-09 15:39:19 +0200158 comp_ctx->dev = dev;
159 comp_ctx->cq_handle = qp->send_cq_handle;
160 comp_ctx->cqe.wr_id = wqe->hdr.wr_id;
161 comp_ctx->cqe.qp = qp_handle;
Yuval Shaia1625bb12018-12-21 16:40:23 +0200162 comp_ctx->cqe.opcode = IBV_WC_SEND;
Yuval Shaia98d176f2018-02-09 15:39:19 +0200163
Yuval Shaia2b057052018-12-21 16:40:25 +0200164 sgid = rdma_rm_get_gid(&dev->rdma_dev_res, wqe->hdr.wr.ud.av.gid_index);
165 if (!sgid) {
Yuval Shaia4d71b382019-03-11 03:29:05 -0700166 rdma_error_report("Failed to get gid for idx %d",
167 wqe->hdr.wr.ud.av.gid_index);
Yuval Shaia26fd8692019-01-09 22:15:59 +0200168 complete_with_error(VENDOR_ERR_INV_GID_IDX, comp_ctx);
169 continue;
Yuval Shaia2b057052018-12-21 16:40:25 +0200170 }
Yuval Shaia2b057052018-12-21 16:40:25 +0200171
172 sgid_idx = rdma_rm_get_backend_gid_index(&dev->rdma_dev_res,
173 &dev->backend_dev,
174 wqe->hdr.wr.ud.av.gid_index);
175 if (sgid_idx <= 0) {
Yuval Shaia4d71b382019-03-11 03:29:05 -0700176 rdma_error_report("Failed to get bk sgid_idx for sgid_idx %d",
177 wqe->hdr.wr.ud.av.gid_index);
Yuval Shaia26fd8692019-01-09 22:15:59 +0200178 complete_with_error(VENDOR_ERR_INV_GID_IDX, comp_ctx);
179 continue;
Yuval Shaia2b057052018-12-21 16:40:25 +0200180 }
181
Yuval Shaiaffef4772019-01-09 21:41:23 +0200182 if (wqe->hdr.num_sge > dev->dev_attr.max_sge) {
Yuval Shaia4d71b382019-03-11 03:29:05 -0700183 rdma_error_report("Invalid num_sge=%d (max %d)", wqe->hdr.num_sge,
184 dev->dev_attr.max_sge);
Yuval Shaiaffef4772019-01-09 21:41:23 +0200185 complete_with_error(VENDOR_ERR_INV_NUM_SGE, comp_ctx);
186 continue;
187 }
188
Yuval Shaia98d176f2018-02-09 15:39:19 +0200189 rdma_backend_post_send(&dev->backend_dev, &qp->backend_qp, qp->qp_type,
190 (struct ibv_sge *)&wqe->sge[0], wqe->hdr.num_sge,
Yuval Shaia2b057052018-12-21 16:40:25 +0200191 sgid_idx, sgid,
Yuval Shaia98d176f2018-02-09 15:39:19 +0200192 (union ibv_gid *)wqe->hdr.wr.ud.av.dgid,
193 wqe->hdr.wr.ud.remote_qpn,
194 wqe->hdr.wr.ud.remote_qkey, comp_ctx);
195
196 pvrdma_ring_read_inc(ring);
197
198 wqe = pvrdma_ring_next_elem_read(ring);
199 }
Yuval Shaia98d176f2018-02-09 15:39:19 +0200200}
201
Yuval Shaia5bb8b732019-01-09 22:21:40 +0200202void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle)
Yuval Shaia98d176f2018-02-09 15:39:19 +0200203{
204 RdmaRmQP *qp;
205 PvrdmaRqWqe *wqe;
206 PvrdmaRing *ring;
207
Yuval Shaia98d176f2018-02-09 15:39:19 +0200208 qp = rdma_rm_get_qp(&dev->rdma_dev_res, qp_handle);
209 if (unlikely(!qp)) {
Yuval Shaia5bb8b732019-01-09 22:21:40 +0200210 return;
Yuval Shaia98d176f2018-02-09 15:39:19 +0200211 }
212
213 ring = &((PvrdmaRing *)qp->opaque)[1];
Yuval Shaia98d176f2018-02-09 15:39:19 +0200214
Markus Armbruster3d558332022-11-23 14:38:11 +0100215 wqe = pvrdma_ring_next_elem_read(ring);
Yuval Shaia98d176f2018-02-09 15:39:19 +0200216 while (wqe) {
217 CompHandlerCtx *comp_ctx;
218
Yuval Shaia98d176f2018-02-09 15:39:19 +0200219 /* Prepare CQE */
Markus Armbrusterb21e2382022-03-15 15:41:56 +0100220 comp_ctx = g_new(CompHandlerCtx, 1);
Yuval Shaia98d176f2018-02-09 15:39:19 +0200221 comp_ctx->dev = dev;
222 comp_ctx->cq_handle = qp->recv_cq_handle;
Yuval Shaia98d176f2018-02-09 15:39:19 +0200223 comp_ctx->cqe.wr_id = wqe->hdr.wr_id;
Yuval Shaia2bff59e2018-12-21 16:40:22 +0200224 comp_ctx->cqe.qp = qp_handle;
225 comp_ctx->cqe.opcode = IBV_WC_RECV;
Yuval Shaia98d176f2018-02-09 15:39:19 +0200226
Yuval Shaiaffef4772019-01-09 21:41:23 +0200227 if (wqe->hdr.num_sge > dev->dev_attr.max_sge) {
Yuval Shaia4d71b382019-03-11 03:29:05 -0700228 rdma_error_report("Invalid num_sge=%d (max %d)", wqe->hdr.num_sge,
229 dev->dev_attr.max_sge);
Yuval Shaiaffef4772019-01-09 21:41:23 +0200230 complete_with_error(VENDOR_ERR_INV_NUM_SGE, comp_ctx);
231 continue;
232 }
233
Yuval Shaia3c890bc2019-03-11 03:29:12 -0700234 rdma_backend_post_recv(&dev->backend_dev, &qp->backend_qp, qp->qp_type,
Yuval Shaia98d176f2018-02-09 15:39:19 +0200235 (struct ibv_sge *)&wqe->sge[0], wqe->hdr.num_sge,
236 comp_ctx);
237
238 pvrdma_ring_read_inc(ring);
239
240 wqe = pvrdma_ring_next_elem_read(ring);
241 }
Yuval Shaia98d176f2018-02-09 15:39:19 +0200242}
243
Kamal Heib355b7cf2019-04-03 14:33:43 +0300244void pvrdma_srq_recv(PVRDMADev *dev, uint32_t srq_handle)
245{
246 RdmaRmSRQ *srq;
247 PvrdmaRqWqe *wqe;
248 PvrdmaRing *ring;
249
250 srq = rdma_rm_get_srq(&dev->rdma_dev_res, srq_handle);
251 if (unlikely(!srq)) {
252 return;
253 }
254
255 ring = (PvrdmaRing *)srq->opaque;
256
Markus Armbruster3d558332022-11-23 14:38:11 +0100257 wqe = pvrdma_ring_next_elem_read(ring);
Kamal Heib355b7cf2019-04-03 14:33:43 +0300258 while (wqe) {
259 CompHandlerCtx *comp_ctx;
260
261 /* Prepare CQE */
Markus Armbrusterb21e2382022-03-15 15:41:56 +0100262 comp_ctx = g_new(CompHandlerCtx, 1);
Kamal Heib355b7cf2019-04-03 14:33:43 +0300263 comp_ctx->dev = dev;
264 comp_ctx->cq_handle = srq->recv_cq_handle;
265 comp_ctx->cqe.wr_id = wqe->hdr.wr_id;
266 comp_ctx->cqe.qp = 0;
267 comp_ctx->cqe.opcode = IBV_WC_RECV;
268
269 if (wqe->hdr.num_sge > dev->dev_attr.max_sge) {
270 rdma_error_report("Invalid num_sge=%d (max %d)", wqe->hdr.num_sge,
271 dev->dev_attr.max_sge);
272 complete_with_error(VENDOR_ERR_INV_NUM_SGE, comp_ctx);
273 continue;
274 }
275
276 rdma_backend_post_srq_recv(&dev->backend_dev, &srq->backend_srq,
277 (struct ibv_sge *)&wqe->sge[0],
278 wqe->hdr.num_sge,
279 comp_ctx);
280
281 pvrdma_ring_read_inc(ring);
282
283 wqe = pvrdma_ring_next_elem_read(ring);
284 }
285
286}
287
Yuval Shaia98d176f2018-02-09 15:39:19 +0200288void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle)
289{
290 RdmaRmCQ *cq;
291
292 cq = rdma_rm_get_cq(dev_res, cq_handle);
293 if (!cq) {
Marcel Apfelbaumb0197cf2018-04-30 23:02:18 +0300294 return;
Yuval Shaia98d176f2018-02-09 15:39:19 +0200295 }
296
297 rdma_backend_poll_cq(dev_res, &cq->backend_cq);
298}