blob: 247b8e7a044fc1116c435d956b8c1821793dd262 [file] [log] [blame]
/*
* Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*
* You can also choose to distribute this program under the terms of
* the Unmodified Binary Distribution Licence (as given in the file
* COPYING.UBDL), provided that you have satisfied its requirements.
*/
FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <byteswap.h>
#include <errno.h>
#include <assert.h>
#include <ipxe/infiniband.h>
#include <ipxe/ib_mi.h>
#include <ipxe/ib_pathrec.h>
#include <ipxe/ib_cm.h>
/**
* @file
*
* Infiniband communication management
*
*/
/** List of connections */
static LIST_HEAD ( ib_cm_conns );
/**
* Find connection by local communication ID
*
* @v local_id Local communication ID
* @ret conn Connection, or NULL
*/
static struct ib_connection * ib_cm_find ( uint32_t local_id ) {
struct ib_connection *conn;
list_for_each_entry ( conn, &ib_cm_conns, list ) {
if ( conn->local_id == local_id )
return conn;
}
return NULL;
}
/**
* Send "ready to use" response
*
* @v ibdev Infiniband device
* @v mi Management interface
* @v tid Transaction identifier
* @v av Address vector
* @v local_id Local communication ID
* @v remote_id Remote communication ID
* @ret rc Return status code
*/
static int ib_cm_send_rtu ( struct ib_device *ibdev,
struct ib_mad_interface *mi,
struct ib_mad_tid *tid,
struct ib_address_vector *av,
uint32_t local_id, uint32_t remote_id ) {
union ib_mad mad;
struct ib_cm_ready_to_use *rtu = &mad.cm.cm_data.ready_to_use;
int rc;
/* Construct "ready to use" response */
memset ( &mad, 0, sizeof ( mad ) );
mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
mad.hdr.class_version = IB_CM_CLASS_VERSION;
mad.hdr.method = IB_MGMT_METHOD_SEND;
memcpy ( &mad.hdr.tid, tid, sizeof ( mad.hdr.tid ) );
mad.hdr.attr_id = htons ( IB_CM_ATTR_READY_TO_USE );
rtu->local_id = htonl ( local_id );
rtu->remote_id = htonl ( remote_id );
if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ) {
DBGC ( local_id, "CM %08x could not send RTU: %s\n",
local_id, strerror ( rc ) );
return rc;
}
return 0;
}
/**
* Handle duplicate connection replies
*
* @v ibdev Infiniband device
* @v mi Management interface
* @v mad Received MAD
* @v av Source address vector
* @ret rc Return status code
*
* If a "ready to use" MAD is lost, the peer may resend the connection
* reply. We have to respond to these with duplicate "ready to use"
* MADs, otherwise the peer may time out and drop the connection.
*/
static void ib_cm_recv_rep ( struct ib_device *ibdev,
struct ib_mad_interface *mi,
union ib_mad *mad,
struct ib_address_vector *av ) {
struct ib_cm_connect_reply *rep = &mad->cm.cm_data.connect_reply;
struct ib_connection *conn;
uint32_t local_id = ntohl ( rep->remote_id );
int rc;
/* Identify connection */
conn = ib_cm_find ( local_id );
if ( conn ) {
/* Try to send "ready to use" reply */
if ( ( rc = ib_cm_send_rtu ( ibdev, mi, &mad->hdr.tid, av,
conn->local_id,
conn->remote_id ) ) != 0 ) {
/* Ignore errors; the remote end will retry */
}
} else {
DBGC ( local_id, "CM %08x unexpected REP\n", local_id );
}
}
/**
* Send reply to disconnection request
*
* @v ibdev Infiniband device
* @v mi Management interface
* @v tid Transaction identifier
* @v av Address vector
* @v local_id Local communication ID
* @v remote_id Remote communication ID
* @ret rc Return status code
*/
static int ib_cm_send_drep ( struct ib_device *ibdev,
struct ib_mad_interface *mi,
struct ib_mad_tid *tid,
struct ib_address_vector *av,
uint32_t local_id, uint32_t remote_id ) {
union ib_mad mad;
struct ib_cm_disconnect_reply *drep = &mad.cm.cm_data.disconnect_reply;
int rc;
/* Construct reply to disconnection request */
memset ( &mad, 0, sizeof ( mad ) );
mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
mad.hdr.class_version = IB_CM_CLASS_VERSION;
mad.hdr.method = IB_MGMT_METHOD_SEND;
memcpy ( &mad.hdr.tid, tid, sizeof ( mad.hdr.tid ) );
mad.hdr.attr_id = htons ( IB_CM_ATTR_DISCONNECT_REPLY );
drep->local_id = htonl ( local_id );
drep->remote_id = htonl ( remote_id );
if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ) {
DBGC ( local_id, "CM %08x could not send DREP: %s\n",
local_id, strerror ( rc ) );
return rc;
}
return 0;
}
/**
* Handle disconnection requests
*
* @v ibdev Infiniband device
* @v mi Management interface
* @v mad Received MAD
* @v av Source address vector
* @ret rc Return status code
*/
static void ib_cm_recv_dreq ( struct ib_device *ibdev,
struct ib_mad_interface *mi,
union ib_mad *mad,
struct ib_address_vector *av ) {
struct ib_cm_disconnect_request *dreq =
&mad->cm.cm_data.disconnect_request;
struct ib_connection *conn;
uint32_t local_id = ntohl ( dreq->remote_id );
uint32_t remote_id = ntohl ( dreq->local_id );
int rc;
/* Identify connection */
conn = ib_cm_find ( local_id );
if ( conn ) {
/* Notify upper layer */
conn->op->changed ( ibdev, conn->qp, conn, -ENOTCONN,
&dreq->private_data,
sizeof ( dreq->private_data ) );
} else {
DBGC ( local_id, "CM %08x unexpected DREQ\n", local_id );
}
/* Send reply */
if ( ( rc = ib_cm_send_drep ( ibdev, mi, &mad->hdr.tid, av, local_id,
remote_id ) ) != 0 ) {
/* Ignore errors; the remote end will retry */
}
};
/** Communication management agents */
struct ib_mad_agent ib_cm_agent[] __ib_mad_agent = {
{
.mgmt_class = IB_MGMT_CLASS_CM,
.class_version = IB_CM_CLASS_VERSION,
.attr_id = htons ( IB_CM_ATTR_CONNECT_REPLY ),
.handle = ib_cm_recv_rep,
},
{
.mgmt_class = IB_MGMT_CLASS_CM,
.class_version = IB_CM_CLASS_VERSION,
.attr_id = htons ( IB_CM_ATTR_DISCONNECT_REQUEST ),
.handle = ib_cm_recv_dreq,
},
};
/**
* Convert connection rejection reason to return status code
*
* @v reason Rejection reason (in network byte order)
* @ret rc Return status code
*/
static int ib_cm_rejection_reason_to_rc ( uint16_t reason ) {
switch ( reason ) {
case htons ( IB_CM_REJECT_BAD_SERVICE_ID ) :
return -ENODEV;
case htons ( IB_CM_REJECT_STALE_CONN ) :
return -EALREADY;
case htons ( IB_CM_REJECT_CONSUMER ) :
return -ENOTTY;
default:
return -EPERM;
}
}
/**
* Handle connection request transaction completion
*
* @v ibdev Infiniband device
* @v mi Management interface
* @v madx Management transaction
* @v rc Status code
* @v mad Received MAD (or NULL on error)
* @v av Source address vector (or NULL on error)
*/
static void ib_cm_req_complete ( struct ib_device *ibdev,
struct ib_mad_interface *mi,
struct ib_mad_transaction *madx,
int rc, union ib_mad *mad,
struct ib_address_vector *av ) {
struct ib_connection *conn = ib_madx_get_ownerdata ( madx );
struct ib_queue_pair *qp = conn->qp;
struct ib_cm_common *common = &mad->cm.cm_data.common;
struct ib_cm_connect_reply *rep = &mad->cm.cm_data.connect_reply;
struct ib_cm_connect_reject *rej = &mad->cm.cm_data.connect_reject;
uint32_t local_id = conn->local_id;
void *private_data = NULL;
size_t private_data_len = 0;
/* Report failures */
if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) ))
rc = -EIO;
if ( rc != 0 ) {
DBGC ( local_id, "CM %08x connection request failed: %s\n",
local_id, strerror ( rc ) );
goto out;
}
/* Record remote communication ID */
conn->remote_id = ntohl ( common->local_id );
/* Handle response */
switch ( mad->hdr.attr_id ) {
case htons ( IB_CM_ATTR_CONNECT_REPLY ) :
/* Extract fields */
qp->av.qpn = ( ntohl ( rep->local_qpn ) >> 8 );
qp->send.psn = ( ntohl ( rep->starting_psn ) >> 8 );
private_data = &rep->private_data;
private_data_len = sizeof ( rep->private_data );
DBGC ( local_id, "CM %08x connected to QPN %#lx PSN %#x\n",
local_id, qp->av.qpn, qp->send.psn );
/* Modify queue pair */
if ( ( rc = ib_modify_qp ( ibdev, qp ) ) != 0 ) {
DBGC ( local_id, "CM %08x could not modify queue "
"pair: %s\n", local_id, strerror ( rc ) );
goto out;
}
/* Send "ready to use" reply */
if ( ( rc = ib_cm_send_rtu ( ibdev, mi, &mad->hdr.tid, av,
conn->local_id,
conn->remote_id ) ) != 0 ) {
/* Treat as non-fatal */
rc = 0;
}
break;
case htons ( IB_CM_ATTR_CONNECT_REJECT ) :
/* Extract fields */
DBGC ( local_id, "CM %08x connection rejected (reason %d)\n",
local_id, ntohs ( rej->reason ) );
/* Private data is valid only for a Consumer Reject */
if ( rej->reason == htons ( IB_CM_REJECT_CONSUMER ) ) {
private_data = &rej->private_data;
private_data_len = sizeof ( rej->private_data );
}
rc = ib_cm_rejection_reason_to_rc ( rej->reason );
break;
default:
DBGC ( local_id, "CM %08x unexpected response (attribute "
"%04x)\n", local_id, ntohs ( mad->hdr.attr_id ) );
rc = -ENOTSUP;
break;
}
out:
/* Destroy the completed transaction */
ib_destroy_madx ( ibdev, ibdev->gsi, madx );
conn->madx = NULL;
/* Hand off to the upper completion handler */
conn->op->changed ( ibdev, qp, conn, rc, private_data,
private_data_len );
}
/** Connection request operations */
static struct ib_mad_transaction_operations ib_cm_req_op = {
.complete = ib_cm_req_complete,
};
/**
* Handle connection path transaction completion
*
* @v ibdev Infiniband device
* @v path Path
* @v rc Status code
* @v av Address vector, or NULL on error
*/
static void ib_cm_path_complete ( struct ib_device *ibdev,
struct ib_path *path, int rc,
struct ib_address_vector *av ) {
struct ib_connection *conn = ib_path_get_ownerdata ( path );
struct ib_queue_pair *qp = conn->qp;
union ib_mad mad;
struct ib_cm_connect_request *req = &mad.cm.cm_data.connect_request;
uint32_t local_id = conn->local_id;
size_t private_data_len;
/* Report failures */
if ( rc != 0 ) {
DBGC ( local_id, "CM %08x path lookup failed: %s\n",
local_id, strerror ( rc ) );
conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 );
goto out;
}
/* Update queue pair peer path */
memcpy ( &qp->av, av, sizeof ( qp->av ) );
/* Construct connection request */
memset ( &mad, 0, sizeof ( mad ) );
mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
mad.hdr.class_version = IB_CM_CLASS_VERSION;
mad.hdr.method = IB_MGMT_METHOD_SEND;
mad.hdr.attr_id = htons ( IB_CM_ATTR_CONNECT_REQUEST );
req->local_id = htonl ( conn->local_id );
memcpy ( &req->service_id, &conn->service_id,
sizeof ( req->service_id ) );
memcpy ( &req->local_ca, &ibdev->node_guid, sizeof ( req->local_ca ) );
req->local_qpn__responder_resources = htonl ( ( qp->qpn << 8 ) | 1 );
req->local_eecn__initiator_depth = htonl ( ( 0 << 8 ) | 1 );
req->remote_eecn__remote_timeout__service_type__ee_flow_ctrl =
htonl ( ( 0x14 << 3 ) | ( IB_CM_TRANSPORT_RC << 1 ) |
( 0 << 0 ) );
req->starting_psn__local_timeout__retry_count =
htonl ( ( qp->recv.psn << 8 ) | ( 0x14 << 3 ) |
( 0x07 << 0 ) );
req->pkey = htons ( ibdev->pkey );
req->payload_mtu__rdc_exists__rnr_retry =
( ( IB_MTU_2048 << 4 ) | ( 1 << 3 ) | ( 0x07 << 0 ) );
req->max_cm_retries__srq = ( ( 0x0f << 4 ) | ( 0 << 3 ) );
req->primary.local_lid = htons ( ibdev->lid );
req->primary.remote_lid = htons ( conn->qp->av.lid );
memcpy ( &req->primary.local_gid, &ibdev->gid,
sizeof ( req->primary.local_gid ) );
memcpy ( &req->primary.remote_gid, &conn->qp->av.gid,
sizeof ( req->primary.remote_gid ) );
req->primary.flow_label__rate =
htonl ( ( 0 << 12 ) | ( conn->qp->av.rate << 0 ) );
req->primary.hop_limit = 0;
req->primary.sl__subnet_local =
( ( conn->qp->av.sl << 4 ) | ( 1 << 3 ) );
req->primary.local_ack_timeout = ( 0x13 << 3 );
private_data_len = conn->private_data_len;
if ( private_data_len > sizeof ( req->private_data ) )
private_data_len = sizeof ( req->private_data );
memcpy ( &req->private_data, &conn->private_data, private_data_len );
/* Create connection request */
av->qpn = IB_QPN_GSI;
av->qkey = IB_QKEY_GSI;
conn->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, av,
&ib_cm_req_op );
if ( ! conn->madx ) {
DBGC ( local_id, "CM %08x could not create connection "
"request\n", local_id );
conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 );
goto out;
}
ib_madx_set_ownerdata ( conn->madx, conn );
out:
/* Destroy the completed transaction */
ib_destroy_path ( ibdev, path );
conn->path = NULL;
}
/** Connection path operations */
static struct ib_path_operations ib_cm_path_op = {
.complete = ib_cm_path_complete,
};
/**
* Create connection to remote QP
*
* @v ibdev Infiniband device
* @v qp Queue pair
* @v dgid Target GID
* @v service_id Target service ID
* @v private_data Connection request private data
* @v private_data_len Length of connection request private data
* @v op Connection operations
* @ret conn Connection
*/
struct ib_connection *
ib_create_conn ( struct ib_device *ibdev, struct ib_queue_pair *qp,
union ib_gid *dgid, union ib_guid *service_id,
void *private_data, size_t private_data_len,
struct ib_connection_operations *op ) {
struct ib_connection *conn;
uint32_t local_id;
/* Allocate and initialise request */
conn = zalloc ( sizeof ( *conn ) + private_data_len );
if ( ! conn )
goto err_alloc_conn;
conn->ibdev = ibdev;
conn->qp = qp;
memset ( &qp->av, 0, sizeof ( qp->av ) );
qp->av.gid_present = 1;
memcpy ( &qp->av.gid, dgid, sizeof ( qp->av.gid ) );
conn->local_id = local_id = random();
memcpy ( &conn->service_id, service_id, sizeof ( conn->service_id ) );
conn->op = op;
conn->private_data_len = private_data_len;
memcpy ( &conn->private_data, private_data, private_data_len );
/* Create path */
conn->path = ib_create_path ( ibdev, &qp->av, &ib_cm_path_op );
if ( ! conn->path )
goto err_create_path;
ib_path_set_ownerdata ( conn->path, conn );
/* Add to list of connections */
list_add ( &conn->list, &ib_cm_conns );
DBGC ( local_id, "CM %08x created for IBDEV %s QPN %#lx\n",
local_id, ibdev->name, qp->qpn );
DBGC ( local_id, "CM %08x connecting to " IB_GID_FMT " "
IB_GUID_FMT "\n", local_id, IB_GID_ARGS ( dgid ),
IB_GUID_ARGS ( service_id ) );
return conn;
ib_destroy_path ( ibdev, conn->path );
err_create_path:
free ( conn );
err_alloc_conn:
return NULL;
}
/**
* Destroy connection to remote QP
*
* @v ibdev Infiniband device
* @v qp Queue pair
* @v conn Connection
*/
void ib_destroy_conn ( struct ib_device *ibdev,
struct ib_queue_pair *qp __unused,
struct ib_connection *conn ) {
list_del ( &conn->list );
if ( conn->madx )
ib_destroy_madx ( ibdev, ibdev->gsi, conn->madx );
if ( conn->path )
ib_destroy_path ( ibdev, conn->path );
free ( conn );
}