| /* |
| * Copyright (c) 2019 Nutanix Inc. All rights reserved. |
| * |
| * Authors: Thanos Makatos <thanos@nutanix.com> |
| * Swapnil Ingle <swapnil.ingle@nutanix.com> |
| * Felipe Franciosi <felipe@nutanix.com> |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * * Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * * Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * * Neither the name of Nutanix nor the names of its contributors may be |
| * used to endorse or promote products derived from this software without |
| * specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY |
| * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
| * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH |
| * DAMAGE. |
| * |
| */ |
| |
| #include <assert.h> |
| #include <errno.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <sys/param.h> |
| |
| #include "pci_caps.h" |
| #include "common.h" |
| #include "libvfio-user.h" |
| #include "pci.h" |
| #include "private.h" |
| |
| static inline void |
| pci_hdr_write_bar(vfu_ctx_t *vfu_ctx, uint16_t bar_index, const char *buf) |
| { |
| uint32_t cfg_addr; |
| unsigned long mask; |
| vfu_pci_hdr_t *hdr; |
| |
| assert(vfu_ctx != NULL); |
| |
| if (vfu_ctx->reg_info[bar_index].size == 0) { |
| return; |
| } |
| |
| hdr = &vfu_pci_get_config_space(vfu_ctx)->hdr; |
| |
| cfg_addr = *(uint32_t *) buf; |
| |
| vfu_log(vfu_ctx, LOG_DEBUG, "BAR%d addr 0x%x", bar_index, cfg_addr); |
| |
| if (cfg_addr == 0xffffffff) { |
| cfg_addr = ~(vfu_ctx->reg_info[bar_index].size) + 1; |
| } |
| |
| if ((vfu_ctx->reg_info[bar_index].flags & VFU_REGION_FLAG_MEM)) { |
| mask = PCI_BASE_ADDRESS_MEM_MASK; |
| } else { |
| mask = PCI_BASE_ADDRESS_IO_MASK; |
| } |
| cfg_addr |= (hdr->bars[bar_index].raw & ~mask); |
| |
| hdr->bars[bar_index].raw = htole32(cfg_addr); |
| } |
| |
| #define BAR_INDEX(offset) ((offset - PCI_BASE_ADDRESS_0) >> 2) |
| |
| static int |
| handle_command_write(vfu_ctx_t *ctx, vfu_pci_config_space_t *pci, |
| const char *buf) |
| { |
| uint16_t v; |
| |
| assert(ctx != NULL); |
| |
| assert(pci != NULL); |
| assert(buf != NULL); |
| |
| v = *(uint16_t*)buf; |
| |
| if ((v & PCI_COMMAND_IO) == PCI_COMMAND_IO) { |
| if (!pci->hdr.cmd.iose) { |
| pci->hdr.cmd.iose = 0x1; |
| vfu_log(ctx, LOG_DEBUG, "I/O space enabled"); |
| } |
| v &= ~PCI_COMMAND_IO; |
| } else { |
| if (pci->hdr.cmd.iose) { |
| pci->hdr.cmd.iose = 0x0; |
| vfu_log(ctx, LOG_DEBUG, "I/O space disabled"); |
| } |
| } |
| |
| if ((v & PCI_COMMAND_MEMORY) == PCI_COMMAND_MEMORY) { |
| if (!pci->hdr.cmd.mse) { |
| pci->hdr.cmd.mse = 0x1; |
| vfu_log(ctx, LOG_DEBUG, "memory space enabled"); |
| } |
| v &= ~PCI_COMMAND_MEMORY; |
| } else { |
| if (pci->hdr.cmd.mse) { |
| pci->hdr.cmd.mse = 0x0; |
| vfu_log(ctx, LOG_DEBUG, "memory space disabled"); |
| } |
| } |
| |
| if ((v & PCI_COMMAND_MASTER) == PCI_COMMAND_MASTER) { |
| if (!pci->hdr.cmd.bme) { |
| pci->hdr.cmd.bme = 0x1; |
| vfu_log(ctx, LOG_DEBUG, "bus master enabled"); |
| } |
| v &= ~PCI_COMMAND_MASTER; |
| } else { |
| if (pci->hdr.cmd.bme) { |
| pci->hdr.cmd.bme = 0x0; |
| vfu_log(ctx, LOG_DEBUG, "bus master disabled"); |
| } |
| } |
| |
| if ((v & PCI_COMMAND_SERR) == PCI_COMMAND_SERR) { |
| if (!pci->hdr.cmd.see) { |
| pci->hdr.cmd.see = 0x1; |
| vfu_log(ctx, LOG_DEBUG, "SERR# enabled"); |
| } |
| v &= ~PCI_COMMAND_SERR; |
| } else { |
| if (pci->hdr.cmd.see) { |
| pci->hdr.cmd.see = 0x0; |
| vfu_log(ctx, LOG_DEBUG, "SERR# disabled"); |
| } |
| } |
| |
| if ((v & PCI_COMMAND_INTX_DISABLE) == PCI_COMMAND_INTX_DISABLE) { |
| if (!pci->hdr.cmd.id) { |
| pci->hdr.cmd.id = 0x1; |
| vfu_log(ctx, LOG_DEBUG, "INTx emulation disabled"); |
| } |
| v &= ~PCI_COMMAND_INTX_DISABLE; |
| } else { |
| if (pci->hdr.cmd.id) { |
| pci->hdr.cmd.id = 0x0; |
| vfu_log(ctx, LOG_DEBUG, "INTx emulation enabled"); |
| } |
| } |
| |
| if ((v & PCI_COMMAND_INVALIDATE) == PCI_COMMAND_INVALIDATE) { |
| if (!pci->hdr.cmd.mwie) { |
| pci->hdr.cmd.mwie = 1U; |
| vfu_log(ctx, LOG_DEBUG, "memory write and invalidate enabled"); |
| } |
| v &= ~PCI_COMMAND_INVALIDATE; |
| } else { |
| if (pci->hdr.cmd.mwie) { |
| pci->hdr.cmd.mwie = 0; |
| vfu_log(ctx, LOG_DEBUG, "memory write and invalidate disabled"); |
| } |
| } |
| |
| if ((v & PCI_COMMAND_VGA_PALETTE) == PCI_COMMAND_VGA_PALETTE) { |
| vfu_log(ctx, LOG_DEBUG, "enabling VGA palette snooping ignored"); |
| v &= ~PCI_COMMAND_VGA_PALETTE; |
| } |
| |
| if (v != 0) { |
| vfu_log(ctx, LOG_ERR, "unconsumed command flags %x", v); |
| return ERROR_INT(EINVAL); |
| } |
| |
| return 0; |
| } |
| |
| static int |
| handle_erom_write(vfu_ctx_t *ctx, vfu_pci_config_space_t *pci, |
| const char *buf) |
| { |
| uint32_t v; |
| |
| assert(ctx != NULL); |
| assert(pci != NULL); |
| |
| v = *(uint32_t*)buf; |
| |
| if (v == (uint32_t)PCI_ROM_ADDRESS_MASK) { |
| vfu_log(ctx, LOG_DEBUG, "write mask to EROM ignored"); |
| } else if (v == 0) { |
| vfu_log(ctx, LOG_DEBUG, "cleared EROM"); |
| pci->hdr.erom = 0; |
| } else if (v == (uint32_t)~PCI_ROM_ADDRESS_ENABLE) { |
| vfu_log(ctx, LOG_DEBUG, "EROM disable ignored"); |
| } else if (v == ~0U) { |
| vfu_log(ctx, LOG_INFO, "EROM not implemented"); |
| } else { |
| vfu_log(ctx, LOG_ERR, "bad write to EROM 0x%x bytes", v); |
| return ERROR_INT(EINVAL); |
| } |
| return 0; |
| } |
| |
| static int |
| pci_hdr_write(vfu_ctx_t *vfu_ctx, const char *buf, loff_t offset) |
| { |
| vfu_pci_config_space_t *cfg_space; |
| int ret = 0; |
| |
| assert(vfu_ctx != NULL); |
| assert(buf != NULL); |
| |
| cfg_space = vfu_pci_get_config_space(vfu_ctx); |
| |
| switch (offset) { |
| case PCI_COMMAND: |
| ret = handle_command_write(vfu_ctx, cfg_space, buf); |
| break; |
| case PCI_STATUS: |
| /* FIXME ignoring write completely is wrong as some bits are RW1C */ |
| vfu_log(vfu_ctx, LOG_INFO, "write to status ignored"); |
| break; |
| /* |
| * According to the PCI spec, writing to read-only registers must be |
| * ignored by the device. Some OSes tend to do this, e.g. FreeBSD. |
| */ |
| case offsetof(vfu_pci_hdr_t, rid): |
| case offsetof(vfu_pci_hdr_t, cc): |
| case offsetof(vfu_pci_hdr_t, intr.ipin): |
| case offsetof(vfu_pci_hdr_t, mgnt): |
| case offsetof(vfu_pci_hdr_t, mlat): |
| break; |
| case PCI_INTERRUPT_LINE: |
| cfg_space->hdr.intr.iline = buf[0]; |
| vfu_log(vfu_ctx, LOG_DEBUG, "ILINE=%0x", cfg_space->hdr.intr.iline); |
| break; |
| case PCI_CACHE_LINE_SIZE: |
| cfg_space->hdr.cls = (uint8_t)buf[0]; |
| vfu_log(vfu_ctx, LOG_DEBUG, "cache line size set to %#hhx", |
| cfg_space->hdr.cls); |
| break; |
| case PCI_LATENCY_TIMER: |
| cfg_space->hdr.mlt = (uint8_t)buf[0]; |
| vfu_log(vfu_ctx, LOG_DEBUG, "set to latency timer to %hhx", |
| cfg_space->hdr.mlt); |
| break; |
| case PCI_BASE_ADDRESS_0: |
| case PCI_BASE_ADDRESS_1: |
| case PCI_BASE_ADDRESS_2: |
| case PCI_BASE_ADDRESS_3: |
| case PCI_BASE_ADDRESS_4: |
| case PCI_BASE_ADDRESS_5: |
| pci_hdr_write_bar(vfu_ctx, BAR_INDEX(offset), buf); |
| break; |
| case PCI_ROM_ADDRESS: |
| ret = handle_erom_write(vfu_ctx, cfg_space, buf); |
| break; |
| default: |
| vfu_log(vfu_ctx, LOG_ERR, "PCI config write %#llx not handled", |
| (ull_t)offset); |
| ret = ERROR_INT(EINVAL); |
| } |
| |
| return ret; |
| } |
| |
| /* |
| * Access to the standard PCI header at the given offset. |
| */ |
| static ssize_t |
| pci_hdr_access(vfu_ctx_t *vfu_ctx, char *buf, size_t count, |
| loff_t offset, bool is_write) |
| { |
| ssize_t ret; |
| |
| assert(count <= PCI_STD_HEADER_SIZEOF); |
| |
| if (is_write) { |
| ret = pci_hdr_write(vfu_ctx, buf, offset); |
| if (ret < 0) { |
| vfu_log(vfu_ctx, LOG_ERR, "failed to write to PCI header: %m"); |
| } else { |
| ret = count; |
| } |
| } else { |
| memcpy(buf, pci_config_space_ptr(vfu_ctx, offset), count); |
| ret = count; |
| } |
| |
| return ret; |
| } |
| |
| /* |
| * Access to the PCI config space that isn't handled by pci_hdr_access() or a |
| * capability handler. |
| */ |
| ssize_t |
| pci_nonstd_access(vfu_ctx_t *vfu_ctx, char *buf, size_t count, |
| loff_t offset, bool is_write) |
| { |
| vfu_region_access_cb_t *cb = |
| vfu_ctx->reg_info[VFU_PCI_DEV_CFG_REGION_IDX].cb; |
| |
| if (cb != NULL) { |
| return cb(vfu_ctx, buf, count, offset, is_write); |
| } |
| |
| if (is_write) { |
| vfu_log(vfu_ctx, LOG_ERR, "no callback for write to config space " |
| "offset %#llx size %zu", (ull_t)offset, count); |
| return ERROR_INT(EINVAL); |
| } |
| |
| memcpy(buf, pci_config_space_ptr(vfu_ctx, offset), count); |
| return count; |
| } |
| |
| #define PCI_REG_SZ(reg) \ |
| [offsetof(vfu_pci_hdr_t, reg)] = sizeof(((vfu_pci_hdr_t *)0)->reg) |
| |
| static size_t |
| pci_config_space_size_for_reg(loff_t offset) |
| { |
| static const size_t off2sz[] = { |
| PCI_REG_SZ(id), |
| PCI_REG_SZ(cmd), |
| PCI_REG_SZ(sts), |
| PCI_REG_SZ(rid), |
| PCI_REG_SZ(cc), |
| PCI_REG_SZ(cls), |
| PCI_REG_SZ(mlt), |
| PCI_REG_SZ(htype), |
| PCI_REG_SZ(bist), |
| PCI_REG_SZ(bars[0]), |
| PCI_REG_SZ(bars[1]), |
| PCI_REG_SZ(bars[2]), |
| PCI_REG_SZ(bars[3]), |
| PCI_REG_SZ(bars[4]), |
| PCI_REG_SZ(bars[5]), |
| PCI_REG_SZ(ccptr), |
| PCI_REG_SZ(ss), |
| PCI_REG_SZ(erom), |
| PCI_REG_SZ(cap), |
| PCI_REG_SZ(intr.iline), |
| PCI_REG_SZ(intr.ipin), |
| PCI_REG_SZ(mgnt), |
| PCI_REG_SZ(mlat) |
| }; |
| assert(offset < PCI_STD_HEADER_SIZEOF); |
| return off2sz[offset]; |
| } |
| |
| /* |
| * Returns the size of the next segment to access, which may be less than |
| * @count: we might need to split up an access that straddles capabilities and |
| * normal config space, for example. |
| * |
| * @cb is set to the callback to use for accessing the segment. |
| */ |
| static size_t |
| pci_config_space_next_segment(vfu_ctx_t *ctx, size_t count, loff_t offset, |
| bool is_write, vfu_region_access_cb_t **cb) |
| { |
| struct pci_cap *cap; |
| |
| if (offset < PCI_STD_HEADER_SIZEOF) { |
| *cb = pci_hdr_access; |
| if (is_write) { |
| size_t reg_size = pci_config_space_size_for_reg(offset); |
| if (reg_size == 0) { |
| *cb = NULL; |
| return 0; |
| } |
| count = MIN(count, reg_size); |
| } else { |
| count = MIN(count, (size_t)(PCI_STD_HEADER_SIZEOF - offset)); |
| } |
| return count; |
| } |
| |
| cap = cap_find_by_offset(ctx, offset, count); |
| |
| if (cap == NULL) { |
| *cb = pci_nonstd_access; |
| return count; |
| } |
| |
| /* If we have config space before the capability. */ |
| if (offset < (loff_t)cap->off) { |
| *cb = pci_nonstd_access; |
| return cap->off - offset; |
| } |
| |
| *cb = pci_cap_access; |
| return MIN(count, cap->size); |
| } |
| |
| /* |
| * Special handler for config space: we handle all accesses to the standard PCI |
| * header, as well as to any capabilities. |
| * |
| * Outside of those areas, if a callback is specified for the region, we'll use |
| * that; otherwise, writes are not allowed, and reads are satisfied with |
| * memcpy(). |
| * |
| * Returns the number of bytes handled, or -1 and errno on error. |
| */ |
| ssize_t |
| pci_config_space_access(vfu_ctx_t *vfu_ctx, char *buf, size_t count, |
| loff_t offset, bool is_write) |
| { |
| loff_t start = offset; |
| ssize_t ret = 0; |
| |
| assert(vfu_ctx != NULL); |
| |
| while (count > 0) { |
| vfu_region_access_cb_t *cb; |
| size_t size; |
| |
| size = pci_config_space_next_segment(vfu_ctx, count, offset, is_write, |
| &cb); |
| if (cb == NULL) { |
| vfu_log(vfu_ctx, LOG_ERR, |
| "bad write to PCI config space %#llx-%#llx", |
| (ull_t)offset, |
| (ull_t)(offset + count - 1)); |
| return size; |
| } |
| |
| ret = cb(vfu_ctx, buf, size, offset, is_write); |
| |
| // FIXME: partial reads, still return an error? |
| if (ret < 0) { |
| return ret; |
| } |
| |
| offset += ret; |
| count -= ret; |
| buf += ret; |
| } |
| |
| return offset - start; |
| } |
| |
| EXPORT int |
| vfu_pci_init(vfu_ctx_t *vfu_ctx, vfu_pci_type_t pci_type, |
| int hdr_type, int revision UNUSED) |
| { |
| vfu_pci_config_space_t *cfg_space; |
| size_t size; |
| |
| assert(vfu_ctx != NULL); |
| |
| switch (pci_type) { |
| case VFU_PCI_TYPE_CONVENTIONAL: |
| case VFU_PCI_TYPE_PCI_X_1: |
| size = PCI_CFG_SPACE_SIZE; |
| break; |
| case VFU_PCI_TYPE_PCI_X_2: |
| case VFU_PCI_TYPE_EXPRESS: |
| size = PCI_CFG_SPACE_EXP_SIZE; |
| break; |
| default: |
| vfu_log(vfu_ctx, LOG_ERR, "invalid PCI type %u", pci_type); |
| return ERROR_INT(EINVAL); |
| } |
| |
| if (hdr_type != PCI_HEADER_TYPE_NORMAL) { |
| vfu_log(vfu_ctx, LOG_ERR, "invalid PCI header type %d", hdr_type); |
| return ERROR_INT(EINVAL); |
| } |
| |
| /* |
| * TODO there no real reason why we shouldn't allow this, we should just |
| * clean up and redo it. |
| */ |
| if (vfu_ctx->pci.config_space != NULL) { |
| vfu_log(vfu_ctx, LOG_ERR, |
| "PCI configuration space header already setup"); |
| return ERROR_INT(EEXIST); |
| } |
| |
| // Allocate a buffer for the config space. |
| cfg_space = calloc(1, size); |
| if (cfg_space == NULL) { |
| return ERROR_INT(ENOMEM); |
| } |
| |
| vfu_ctx->pci.type = pci_type; |
| vfu_ctx->pci.config_space = cfg_space; |
| vfu_ctx->reg_info[VFU_PCI_DEV_CFG_REGION_IDX].size = size; |
| vfu_ctx->reg_info[VFU_PCI_DEV_CFG_REGION_IDX].flags = VFU_REGION_FLAG_RW; |
| |
| return 0; |
| } |
| |
| EXPORT void |
| vfu_pci_set_id(vfu_ctx_t *vfu_ctx, uint16_t vid, uint16_t did, |
| uint16_t ssvid, uint16_t ssid) |
| { |
| vfu_ctx->pci.config_space->hdr.id.vid = vid; |
| vfu_ctx->pci.config_space->hdr.id.did = did; |
| vfu_ctx->pci.config_space->hdr.ss.vid = ssvid; |
| vfu_ctx->pci.config_space->hdr.ss.sid = ssid; |
| } |
| |
| EXPORT void |
| vfu_pci_set_class(vfu_ctx_t *vfu_ctx, uint8_t base, uint8_t sub, uint8_t pi) |
| { |
| vfu_ctx->pci.config_space->hdr.cc.bcc = base; |
| vfu_ctx->pci.config_space->hdr.cc.scc = sub; |
| vfu_ctx->pci.config_space->hdr.cc.pi = pi; |
| } |
| |
| EXPORT vfu_pci_config_space_t * |
| vfu_pci_get_config_space(vfu_ctx_t *vfu_ctx) |
| { |
| assert(vfu_ctx != NULL); |
| return vfu_ctx->pci.config_space; |
| } |
| |
| /* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ |