blob: a6f400f59d34ed93640790468a3dd062f32954c1 [file] [log] [blame]
/*
* Copyright (c) 2021 Nutanix Inc. All rights reserved.
*
* Authors: Thanos Makatos <thanos@nutanix.com>
* Swapnil Ingle <swapnil.ingle@nutanix.com>
* Felipe Franciosi <felipe@nutanix.com>
* John Levon <john.levon@nutanix.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Nutanix nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
*/
/*
* Capability handling. We handle reads and writes to standard capabilities
* ourselves, and optionally for vendor capabilities too. For each access (via
* pci_config_space_access() -> pci_cap_access()), if we find that we're
* reading from a particular capability offset:
*
* - if VFU_CAP_FLAG_CALLBACK is set, we call the config space region callback
* given by the user
* - else we memcpy() the capability data back out to the client
*
* For writes:
*
* - if VFU_CAP_FLAG_READONLY is set, we fail the write
* - if VFU_CAP_FLAG_CALLBACK is set, we call the config space region callback
* given by the user
* - else we call the cap-specific callback to handle the write.
*
* Extended capabilities live in extended space (after the first 256 bytes), so
* can never clash with a standard capability. An empty capability list is
* signalled by a zeroed header at offset 256 (which the config space has by
* default).
*/
#include <assert.h>
#include <errno.h>
#include <stdlib.h>
#include <stdio.h>
#include <stddef.h>
#include <string.h>
#include "common.h"
#include "libvfio-user.h"
#include "pci_caps.h"
#include "pci.h"
#include "private.h"
/* All capabilities must be dword-aligned. */
#define CAP_ROUND (4)
static void *
cap_data(vfu_ctx_t *vfu_ctx, struct pci_cap *cap)
{
return (void *)pci_config_space_ptr(vfu_ctx, cap->off);
}
static size_t
cap_size(vfu_ctx_t *vfu_ctx, void *data, bool extended)
{
if (extended) {
uint16_t id = ((struct pcie_ext_cap_hdr *)data)->id;
switch (id) {
case PCI_EXT_CAP_ID_DSN:
return PCI_EXT_CAP_DSN_SIZEOF;
case PCI_EXT_CAP_ID_VNDR:
return ((struct pcie_ext_cap_vsc_hdr *)data)->len;
default:
vfu_log(vfu_ctx, LOG_ERR, "invalid cap id %u", id);
abort();
}
} else {
uint8_t id = ((struct cap_hdr *)data)->id;
switch (id) {
case PCI_CAP_ID_PM:
return PCI_PM_SIZEOF;
case PCI_CAP_ID_EXP:
return VFIO_USER_PCI_CAP_EXP_SIZEOF;
case PCI_CAP_ID_MSI:
return VFIO_USER_PCI_CAP_MSI_SIZEOF;
case PCI_CAP_ID_MSIX:
return PCI_CAP_MSIX_SIZEOF;
case PCI_CAP_ID_VNDR:
return ((struct vsc *)data)->size;
default:
vfu_log(vfu_ctx, LOG_ERR, "invalid cap id %u", id);
abort();
}
}
}
static ssize_t
handle_pmcs_write(vfu_ctx_t *vfu_ctx, struct pmcap *pm,
const struct pmcs *const pmcs)
{
if (pm->pmcs.ps != pmcs->ps) {
vfu_log(vfu_ctx, LOG_DEBUG, "power state set to %#x", pmcs->ps);
}
if (pm->pmcs.pmee != pmcs->pmee) {
vfu_log(vfu_ctx, LOG_DEBUG, "PME enable set to %#x", pmcs->pmee);
}
if (pm->pmcs.dse != pmcs->dse) {
vfu_log(vfu_ctx, LOG_DEBUG, "data select set to %#x", pmcs->dse);
}
if (pm->pmcs.pmes != pmcs->pmes) {
vfu_log(vfu_ctx, LOG_DEBUG, "PME status set to %#x", pmcs->pmes);
}
pm->pmcs = *pmcs;
return 0;
}
static ssize_t
cap_write_pm(vfu_ctx_t *vfu_ctx, struct pci_cap *cap, char * buf,
size_t count, loff_t offset)
{
struct pmcap *pm = cap_data(vfu_ctx, cap);
switch (offset - cap->off) {
case offsetof(struct pmcap, pc):
if (count != sizeof(struct pc)) {
return ERROR_INT(EINVAL);
}
vfu_log(vfu_ctx, LOG_ERR, "FIXME: write to pmcap::pc unimplemented");
return ERROR_INT(ENOTSUP);
case offsetof(struct pmcap, pmcs):
if (count != sizeof(struct pmcs)) {
return ERROR_INT(EINVAL);
}
handle_pmcs_write(vfu_ctx, pm, (struct pmcs *)buf);
return sizeof(struct pmcs);
case offsetof(struct pmcap, pmcsr_bse):
if (count != 1) {
return ERROR_INT(EINVAL);
}
vfu_log(vfu_ctx, LOG_ERR,
"FIXME: write to pmcap::pmcsr_bse unimplemented");
return ERROR_INT(ENOTSUP);
case offsetof(struct pmcap, data):
if (count != 1) {
return ERROR_INT(EINVAL);
}
vfu_log(vfu_ctx, LOG_ERR, "FIXME: write to pmcap::data unimplemented");
return ERROR_INT(ENOTSUP);
}
return ERROR_INT(EINVAL);
}
static ssize_t
cap_write_msi(vfu_ctx_t *vfu_ctx, struct pci_cap *cap, char *buf,
size_t count, loff_t offset)
{
struct msicap *msi = cap_data(vfu_ctx, cap);
struct msicap new_msi = *msi;
memcpy((char *)&new_msi + offset - cap->off, buf, count);
if (msi->mc.msie != new_msi.mc.msie) {
msi->mc.msie = new_msi.mc.msie;
vfu_log(vfu_ctx, LOG_DEBUG, "%s MSI",
msi->mc.msie ? "enable" : "disable");
}
if (msi->mc.mme != new_msi.mc.mme) {
if (new_msi.mc.mme > 5) {
vfu_log(vfu_ctx, LOG_ERR,
"MSI cannot have more than 32 interrupt vectors");
return ERROR_INT(EINVAL);
}
if (new_msi.mc.mme > msi->mc.mmc) {
vfu_log(vfu_ctx, LOG_ERR,
"MSI cannot have more interrupt vectors"
" in MME than defined in MMC");
return ERROR_INT(EINVAL);
}
msi->mc.mme = new_msi.mc.mme;
vfu_log(vfu_ctx, LOG_DEBUG,
"MSI Updated Multiple Message Enable count");
}
if (msi->ma.addr != new_msi.ma.addr) {
msi->ma.addr = new_msi.ma.addr;
vfu_log(vfu_ctx, LOG_DEBUG,
"MSI Message Address set to %x", msi->ma.addr << 2);
}
if (msi->mua != new_msi.mua) {
msi->mua = new_msi.mua;
vfu_log(vfu_ctx, LOG_DEBUG,
"MSI Message Upper Address set to %x", msi->mua);
}
if (msi->md != new_msi.md) {
msi->md = new_msi.md;
vfu_log(vfu_ctx, LOG_DEBUG,
"MSI Message Data set to %x", msi->md);
}
if (msi->mmask != new_msi.mmask) {
msi->mmask = new_msi.mmask;
vfu_log(vfu_ctx, LOG_DEBUG,
"MSI Mask Bits set to %x", msi->mmask);
}
return count;
}
static ssize_t
cap_write_msix(vfu_ctx_t *vfu_ctx, struct pci_cap *cap, char *buf,
size_t count, loff_t offset)
{
struct msixcap *msix = cap_data(vfu_ctx, cap);
struct msixcap new_msix = *msix;
memcpy((char *)&new_msix + offset - cap->off, buf, count);
/*
* Same as doing &= (PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE), but
* prefer to log what's changing.
*/
if (msix->mxc.fm != new_msix.mxc.fm) {
msix->mxc.fm = new_msix.mxc.fm;
if (msix->mxc.fm) {
vfu_log(vfu_ctx, LOG_DEBUG, "all MSI-X vectors masked");
} else {
vfu_log(vfu_ctx, LOG_DEBUG,
"vector's mask bit determines whether vector is masked");
}
}
if (msix->mxc.mxe != new_msix.mxc.mxe) {
msix->mxc.mxe = new_msix.mxc.mxe;
vfu_log(vfu_ctx, LOG_DEBUG, "%s MSI-X",
msix->mxc.mxe ? "enable" : "disable");
}
return count;
}
static int
handle_px_pxdc_write(vfu_ctx_t *vfu_ctx, struct pxcap *px,
const union pxdc *const p)
{
assert(px != NULL);
assert(p != NULL);
if (p->cere != px->pxdc.cere) {
px->pxdc.cere = p->cere;
vfu_log(vfu_ctx, LOG_DEBUG, "CERE %s", p->cere ? "enable" : "disable");
}
if (p->nfere != px->pxdc.nfere) {
px->pxdc.nfere = p->nfere;
vfu_log(vfu_ctx, LOG_DEBUG, "NFERE %s",
p->nfere ? "enable" : "disable");
}
if (p->fere != px->pxdc.fere) {
px->pxdc.fere = p->fere;
vfu_log(vfu_ctx, LOG_DEBUG, "FERE %s", p->fere ? "enable" : "disable");
}
if (p->urre != px->pxdc.urre) {
px->pxdc.urre = p->urre;
vfu_log(vfu_ctx, LOG_DEBUG, "URRE %s", p->urre ? "enable" : "disable");
}
if (p->ero != px->pxdc.ero) {
px->pxdc.ero = p->ero;
vfu_log(vfu_ctx, LOG_DEBUG, "ERO %s", p->ero ? "enable" : "disable");
}
if (p->mps != px->pxdc.mps) {
px->pxdc.mps = p->mps;
vfu_log(vfu_ctx, LOG_DEBUG, "MPS set to %d", p->mps);
}
if (p->ete != px->pxdc.ete) {
px->pxdc.ete = p->ete;
vfu_log(vfu_ctx, LOG_DEBUG, "ETE %s", p->ete ? "enable" : "disable");
}
if (p->pfe != px->pxdc.pfe) {
px->pxdc.pfe = p->pfe;
vfu_log(vfu_ctx, LOG_DEBUG, "PFE %s", p->pfe ? "enable" : "disable");
}
if (p->appme != px->pxdc.appme) {
px->pxdc.appme = p->appme;
vfu_log(vfu_ctx, LOG_DEBUG, "APPME %s",
p->appme ? "enable" : "disable");
}
if (p->ens != px->pxdc.ens) {
px->pxdc.ens = p->ens;
vfu_log(vfu_ctx, LOG_DEBUG, "ENS %s", p->ens ? "enable" : "disable");
}
if (p->mrrs != px->pxdc.mrrs) {
px->pxdc.mrrs = p->mrrs;
vfu_log(vfu_ctx, LOG_DEBUG, "MRRS set to %d", p->mrrs);
}
if (p->iflr) {
if (px->pxdcap.flrc == 0) {
vfu_log(vfu_ctx, LOG_ERR, "FLR capability is not supported");
return ERROR_INT(EINVAL);
}
if (vfu_ctx->reset != NULL) {
vfu_log(vfu_ctx, LOG_DEBUG, "initiate function level reset");
return call_reset_cb(vfu_ctx, VFU_RESET_PCI_FLR);
} else {
vfu_log(vfu_ctx, LOG_ERR, "FLR callback is not implemented");
}
}
return 0;
}
/* TODO implement */
static int
handle_px_pxlc_write(vfu_ctx_t *vfu_ctx UNUSED, struct pxcap *px UNUSED,
const union pxlc *const p UNUSED)
{
return 0;
}
/* TODO implement */
static int
handle_px_pxsc_write(vfu_ctx_t *vfu_ctx UNUSED, struct pxcap *px UNUSED,
const struct pxsc *const p UNUSED)
{
return 0;
}
/* TODO implement */
static int
handle_px_pxrc_write(vfu_ctx_t *vfu_ctx UNUSED, struct pxcap *px UNUSED,
const struct pxrc *const p UNUSED)
{
return 0;
}
static int
handle_px_pxdc2_write(vfu_ctx_t *vfu_ctx, struct pxcap *px,
const union pxdc2 *const p)
{
assert(px != NULL);
assert(p != NULL);
if (p->raw != px->pxdc2.raw) {
vfu_log(vfu_ctx, LOG_DEBUG, "Device Control 2 set to %#x", p->raw);
}
px->pxdc2 = *p;
return 0;
}
static int
handle_px_pxlc2_write(vfu_ctx_t *vfu_ctx, struct pxcap *px,
const struct pxlc2 *const p)
{
assert(px != NULL);
assert(p != NULL);
if (p->stuff != px->pxlc2.stuff) {
vfu_log(vfu_ctx, LOG_DEBUG, "Link Control 2 set to %#x", p->stuff);
}
px->pxlc2 = *p;
return 0;
}
static int
handle_px_write_2_bytes(vfu_ctx_t *vfu_ctx, struct pxcap *px, char *buf,
loff_t off)
{
switch (off) {
case offsetof(struct pxcap, pxdc):
return handle_px_pxdc_write(vfu_ctx, px, (union pxdc *)buf);
case offsetof(struct pxcap, pxlc):
return handle_px_pxlc_write(vfu_ctx, px, (union pxlc *)buf);
case offsetof(struct pxcap, pxsc):
return handle_px_pxsc_write(vfu_ctx, px, (struct pxsc *)buf);
case offsetof(struct pxcap, pxrc):
return handle_px_pxrc_write(vfu_ctx, px, (struct pxrc *)buf);
case offsetof(struct pxcap, pxdc2):
return handle_px_pxdc2_write(vfu_ctx, px, (union pxdc2 *)buf);
case offsetof(struct pxcap, pxlc2):
return handle_px_pxlc2_write(vfu_ctx, px, (struct pxlc2 *)buf);
case offsetof(struct pxcap, pxsc2): /* RsvdZ */
return 0;
}
return ERROR_INT(EINVAL);
}
static ssize_t
cap_write_px(vfu_ctx_t *vfu_ctx, struct pci_cap *cap, char *buf,
size_t count, loff_t offset)
{
struct pxcap *px = cap_data(vfu_ctx, cap);
int err;
switch (count) {
case 2:
err = handle_px_write_2_bytes(vfu_ctx, px, buf, offset - cap->off);
break;
default:
err = ERROR_INT(EINVAL);
break;
}
if (err != 0) {
return err;
}
return count;
}
static ssize_t
cap_write_vendor(vfu_ctx_t *vfu_ctx, struct pci_cap *cap UNUSED, char *buf,
size_t count, loff_t offset)
{
memcpy(pci_config_space_ptr(vfu_ctx, offset), buf, count);
return count;
}
static ssize_t
ext_cap_write_dsn(vfu_ctx_t *vfu_ctx, struct pci_cap *cap, char *buf UNUSED,
size_t count UNUSED, loff_t offset UNUSED)
{
vfu_log(vfu_ctx, LOG_ERR, "%s capability is read-only", cap->name);
return ERROR_INT(EPERM);
}
static ssize_t
ext_cap_write_vendor(vfu_ctx_t *vfu_ctx, struct pci_cap *cap UNUSED, char *buf,
size_t count, loff_t offset)
{
memcpy(pci_config_space_ptr(vfu_ctx, offset), buf, count);
return count;
}
static bool
ranges_intersect(size_t off1, size_t size1, size_t off2, size_t size2)
{
return (off1 < (off2 + size2) && (off1 + size1) >= off2);
}
struct pci_cap *
cap_find_by_offset(vfu_ctx_t *vfu_ctx, loff_t offset, size_t count)
{
size_t i;
for (i = 0; i < vfu_ctx->pci.nr_caps; i++) {
struct pci_cap *cap = &vfu_ctx->pci.caps[i];
if (ranges_intersect(offset, count, cap->off, cap->size)) {
return cap;
}
}
for (i = 0; i < vfu_ctx->pci.nr_ext_caps; i++) {
struct pci_cap *cap = &vfu_ctx->pci.ext_caps[i];
if (ranges_intersect(offset, count, cap->off, cap->size)) {
return cap;
}
}
return NULL;
}
ssize_t
pci_cap_access(vfu_ctx_t *vfu_ctx, char *buf, size_t count, loff_t offset,
bool is_write)
{
struct pci_cap *cap = cap_find_by_offset(vfu_ctx, offset, count);
assert(cap != NULL);
assert((size_t)offset >= cap->off);
assert(count <= cap->size);
if (is_write && (cap->flags & VFU_CAP_FLAG_READONLY)) {
vfu_log(vfu_ctx, LOG_ERR, "write of %zu bytes to read-only capability "
"%u (%s)", count, cap->id, cap->name);
return ERROR_INT(EPERM);
}
if (cap->flags & VFU_CAP_FLAG_CALLBACK) {
return pci_nonstd_access(vfu_ctx, buf, count, offset, is_write);
}
if (!is_write) {
memcpy(buf, pci_config_space_ptr(vfu_ctx, offset), count);
return count;
}
if (offset - cap->off < cap->hdr_size) {
vfu_log(vfu_ctx, LOG_ERR,
"disallowed write to header for cap %d (%s)",
cap->id, cap->name);
return ERROR_INT(EPERM);
}
return cap->cb(vfu_ctx, cap, buf, count, offset);
}
/*
* Place the new capability after the previous (or after the standard header if
* this is the first capability).
*
* If cap->off is already provided, place it directly, but first check it
* doesn't overlap an existing capability, or the PCI header. We still also need
* to link it into the list. There's no guarantee that the list is ordered by
* offset after doing so.
*/
static int
cap_place(vfu_ctx_t *vfu_ctx, struct pci_cap *cap, void *data)
{
vfu_pci_config_space_t *config_space;
uint8_t *prevp = NULL;
size_t offset;
config_space = vfu_pci_get_config_space(vfu_ctx);
prevp = &config_space->hdr.cap;
if (cap->off != 0) {
if (cap->off < PCI_STD_HEADER_SIZEOF) {
vfu_log(vfu_ctx, LOG_ERR, "invalid offset %zx for capability "
"%u (%s)", cap->off, cap->id, cap->name);
return ERROR_INT(EINVAL);
}
if (cap_find_by_offset(vfu_ctx, cap->off, cap->size) != NULL) {
vfu_log(vfu_ctx, LOG_ERR, "overlap found for capability "
"%u (%s)", cap->id, cap->name);
return ERROR_INT(EINVAL);
}
while (*prevp != 0) {
prevp = pci_config_space_ptr(vfu_ctx, *prevp + PCI_CAP_LIST_NEXT);
}
} else if (*prevp == 0) {
cap->off = PCI_STD_HEADER_SIZEOF;
} else {
for (offset = *prevp; offset != 0; offset = *prevp) {
size_t size;
prevp = pci_config_space_ptr(vfu_ctx, offset + PCI_CAP_LIST_NEXT);
if (*prevp == 0) {
size = cap_size(vfu_ctx, pci_config_space_ptr(vfu_ctx, offset),
false);
cap->off = ROUND_UP(offset + size, 4);
break;
}
}
}
if (cap->off + cap->size > pci_config_space_size(vfu_ctx)) {
vfu_log(vfu_ctx, LOG_ERR, "no config space left for capability "
"%u (%s) of size %zu bytes at offset %zx", cap->id,
cap->name, cap->size, cap->off);
return ERROR_INT(ENOSPC);
}
memcpy(cap_data(vfu_ctx, cap), data, cap->size);
/* Make sure the previous cap's PCI_CAP_LIST_NEXT points to us. */
*prevp = cap->off;
/* Make sure our PCI_CAP_LIST_NEXT is zeroed. */
*pci_config_space_ptr(vfu_ctx, cap->off + PCI_CAP_LIST_NEXT) = 0;
return 0;
}
/*
* Place the new extended capability after the previous (or at the beginning of
* extended config space, replacing the initial zeroed capability).
*
* If cap->off is already provided, place it directly, but first check it
* doesn't overlap an existing extended capability, and that the first one
* replaces the initial zeroed capability. We also still need to link it into
* the list.
*/
static int
ext_cap_place(vfu_ctx_t *vfu_ctx, struct pci_cap *cap, void *data)
{
struct pcie_ext_cap_hdr *hdr = NULL;
hdr = (void *)pci_config_space_ptr(vfu_ctx, PCI_CFG_SPACE_SIZE);
if (cap->off != 0) {
if (cap->off < PCI_CFG_SPACE_SIZE) {
vfu_log(vfu_ctx, LOG_ERR, "invalid offset %zx for capability "
"%u (%s)", cap->off, cap->id, cap->name);
return ERROR_INT(EINVAL);
}
if (cap_find_by_offset(vfu_ctx, cap->off, cap->size) != NULL) {
vfu_log(vfu_ctx, LOG_ERR, "overlap found for capability "
"%u (%s)", cap->id, cap->name);
return ERROR_INT(EINVAL);
}
if (hdr->id == 0x0 && cap->off != PCI_CFG_SPACE_SIZE) {
vfu_log(vfu_ctx, LOG_ERR, "first extended capability must be at "
"%#x", PCI_CFG_SPACE_SIZE);
return ERROR_INT(EINVAL);
}
while (hdr->next != 0) {
hdr = (void *)pci_config_space_ptr(vfu_ctx, hdr->next);
}
} else if (hdr->id == 0x0) {
hdr = NULL;
cap->off = PCI_CFG_SPACE_SIZE;
} else {
while (hdr->next != 0) {
hdr = (void *)pci_config_space_ptr(vfu_ctx, hdr->next);
}
cap->off = ROUND_UP((uint8_t *)hdr + cap_size(vfu_ctx, hdr, true) -
pci_config_space_ptr(vfu_ctx, 0), CAP_ROUND);
}
if (cap->off + cap->size > pci_config_space_size(vfu_ctx)) {
vfu_log(vfu_ctx, LOG_ERR, "no config space left for capability "
"%u (%s) of size %zu bytes at offset %zu", cap->id,
cap->name, cap->size, cap->off);
return ERROR_INT(ENOSPC);
}
memcpy(cap_data(vfu_ctx, cap), data, cap->size);
/* Make sure the previous cap's next points to us. */
if (hdr != NULL) {
assert((cap->off & 0x3) == 0);
hdr->next = cap->off;
}
hdr = (void *)pci_config_space_ptr(vfu_ctx, cap->off);
hdr->next = 0;
return 0;
}
EXPORT ssize_t
vfu_pci_add_capability(vfu_ctx_t *vfu_ctx, size_t pos, int flags, void *data)
{
bool extended = (flags & VFU_CAP_FLAG_EXTENDED);
struct pci_cap cap = { 0 };
int ret;
assert(vfu_ctx != NULL);
if (flags & ~(VFU_CAP_FLAG_EXTENDED | VFU_CAP_FLAG_CALLBACK |
VFU_CAP_FLAG_READONLY)) {
vfu_log(vfu_ctx, LOG_DEBUG, "bad flags %#x", flags);
return ERROR_INT(EINVAL);
}
if ((flags & VFU_CAP_FLAG_CALLBACK) &&
vfu_ctx->reg_info[VFU_PCI_DEV_CFG_REGION_IDX].cb == NULL) {
vfu_log(vfu_ctx, LOG_DEBUG, "no callback");
return ERROR_INT(EINVAL);
}
cap.off = pos;
cap.flags = flags;
cap.extended = extended;
if (extended) {
switch (vfu_ctx->pci.type) {
case VFU_PCI_TYPE_PCI_X_2:
case VFU_PCI_TYPE_EXPRESS:
break;
default:
vfu_log(vfu_ctx, LOG_DEBUG, "bad PCI type %#x", vfu_ctx->pci.type);
return ERROR_INT(EINVAL);
}
if (vfu_ctx->pci.nr_ext_caps == VFU_MAX_CAPS) {
return ERROR_INT(ENOSPC);
}
cap.id = ((struct pcie_ext_cap_hdr *)data)->id;
cap.hdr_size = sizeof(struct pcie_ext_cap_hdr);
switch (cap.id) {
case PCI_EXT_CAP_ID_DSN:
cap.name = "Device Serial Number";
cap.cb = ext_cap_write_dsn;
break;
case PCI_EXT_CAP_ID_VNDR:
cap.name = "Vendor-Specific";
cap.cb = ext_cap_write_vendor;
cap.hdr_size = sizeof(struct pcie_ext_cap_vsc_hdr);
break;
default:
vfu_log(vfu_ctx, LOG_ERR, "unsupported capability %#x", cap.id);
return ERROR_INT(ENOTSUP);
}
cap.size = cap_size(vfu_ctx, data, extended);
if (cap.off + cap.size >= pci_config_space_size(vfu_ctx)) {
vfu_log(vfu_ctx, LOG_DEBUG, "bad PCIe capability offset");
return ERROR_INT(EINVAL);
}
ret = ext_cap_place(vfu_ctx, &cap, data);
} else {
if (vfu_ctx->pci.nr_caps == VFU_MAX_CAPS) {
return ERROR_INT(ENOSPC);
}
cap.id = ((struct cap_hdr *)data)->id;
cap.hdr_size = sizeof(struct cap_hdr);
switch (cap.id) {
case PCI_CAP_ID_PM:
cap.name = "Power Management";
cap.cb = cap_write_pm;
break;
case PCI_CAP_ID_EXP:
cap.name = "PCI Express";
cap.cb = cap_write_px;
break;
case PCI_CAP_ID_MSI:
cap.name = "MSI";
cap.cb = cap_write_msi;
break;
case PCI_CAP_ID_MSIX:
cap.name = "MSI-X";
cap.cb = cap_write_msix;
break;
case PCI_CAP_ID_VNDR:
cap.name = "Vendor-Specific";
cap.cb = cap_write_vendor;
cap.hdr_size = sizeof(struct vsc);
break;
default:
vfu_log(vfu_ctx, LOG_ERR, "unsupported capability %#x", cap.id);
return ERROR_INT(ENOTSUP);
}
cap.size = cap_size(vfu_ctx, data, extended);
if (cap.off + cap.size >= pci_config_space_size(vfu_ctx)) {
vfu_log(vfu_ctx, LOG_DEBUG,
"PCI capability past end of config space, %zx >= %zx",
cap.off + cap.size, pci_config_space_size(vfu_ctx));
return ERROR_INT(EINVAL);
}
ret = cap_place(vfu_ctx, &cap, data);
}
if (ret != 0) {
return ret;
}
vfu_log(vfu_ctx, LOG_DEBUG, "added PCI cap \"%s\" size=%#zx offset=%#zx",
cap.name, cap.size, cap.off);
if (extended) {
memcpy(&vfu_ctx->pci.ext_caps[vfu_ctx->pci.nr_ext_caps],
&cap, sizeof(cap));
vfu_ctx->pci.nr_ext_caps++;
} else {
memcpy(&vfu_ctx->pci.caps[vfu_ctx->pci.nr_caps], &cap, sizeof(cap));
vfu_ctx->pci.nr_caps++;
}
if (cap.id == PCI_CAP_ID_EXP) {
vfu_ctx->pci_cap_exp_off = cap.off;
}
return cap.off;
}
static size_t
vfu_pci_find_next_ext_capability(vfu_ctx_t *vfu_ctx, size_t offset, int cap_id)
{
struct pcie_ext_cap_hdr *hdr = NULL;
if (offset + sizeof(*hdr) >= pci_config_space_size(vfu_ctx)) {
errno = EINVAL;
return 0;
}
if (offset == 0) {
offset = PCI_CFG_SPACE_SIZE;
hdr = (void *)pci_config_space_ptr(vfu_ctx, offset);
} else {
hdr = (void *)pci_config_space_ptr(vfu_ctx, offset);
hdr = (void *)pci_config_space_ptr(vfu_ctx, hdr->next);
}
for (;;) {
offset = (uint8_t *)hdr - pci_config_space_ptr(vfu_ctx, 0);
if (offset + sizeof(*hdr) >= pci_config_space_size(vfu_ctx)) {
errno = EINVAL;
return 0;
}
if (hdr->id == cap_id) {
return offset;
}
if (hdr->next == 0) {
break;
}
hdr = (void *)pci_config_space_ptr(vfu_ctx, hdr->next);
}
errno = ENOENT;
return 0;
}
EXPORT size_t
vfu_pci_find_next_capability(vfu_ctx_t *vfu_ctx, bool extended,
size_t offset, int cap_id)
{
assert(vfu_ctx != NULL);
if (extended) {
return vfu_pci_find_next_ext_capability(vfu_ctx, offset, cap_id);
}
if (offset + PCI_CAP_LIST_NEXT >= pci_config_space_size(vfu_ctx)) {
errno = EINVAL;
return 0;
}
if (offset == 0) {
offset = vfu_pci_get_config_space(vfu_ctx)->hdr.cap;
} else {
offset = *pci_config_space_ptr(vfu_ctx, offset + PCI_CAP_LIST_NEXT);
}
if (offset == 0) {
errno = ENOENT;
return 0;
}
for (;;) {
uint8_t id, next;
/* Sanity check. */
if (offset + PCI_CAP_LIST_NEXT >= pci_config_space_size(vfu_ctx)) {
errno = EINVAL;
return 0;
}
id = *pci_config_space_ptr(vfu_ctx, offset + PCI_CAP_LIST_ID);
next = *pci_config_space_ptr(vfu_ctx, offset + PCI_CAP_LIST_NEXT);
if (id == cap_id) {
return offset;
}
offset = next;
if (offset == 0) {
errno = ENOENT;
return 0;
}
}
}
EXPORT size_t
vfu_pci_find_capability(vfu_ctx_t *vfu_ctx, bool extended, int cap_id)
{
return vfu_pci_find_next_capability(vfu_ctx, extended, 0, cap_id);
}
bool
access_is_pci_cap_exp(const vfu_ctx_t *vfu_ctx, size_t region_index,
uint64_t offset)
{
size_t _offset = vfu_ctx->pci_cap_exp_off + offsetof(struct pxcap, pxdc);
return region_index == VFU_PCI_DEV_CFG_REGION_IDX && offset == _offset;
}
/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */