| /* |
| * QEMU emulation of an RISC-V IOMMU |
| * |
| * Copyright (C) 2021-2023, Rivos Inc. |
| * |
| * This program is free software; you can redistribute it and/or modify it |
| * under the terms and conditions of the GNU General Public License, |
| * version 2 or later, as published by the Free Software Foundation. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| * |
| * You should have received a copy of the GNU General Public License along |
| * with this program; if not, see <http://www.gnu.org/licenses/>. |
| */ |
| |
| #include "qemu/osdep.h" |
| #include "qom/object.h" |
| #include "hw/pci/pci_bus.h" |
| #include "hw/pci/pci_device.h" |
| #include "hw/qdev-properties.h" |
| #include "hw/riscv/riscv_hart.h" |
| #include "migration/vmstate.h" |
| #include "qapi/error.h" |
| #include "qemu/timer.h" |
| |
| #include "cpu_bits.h" |
| #include "riscv-iommu.h" |
| #include "riscv-iommu-bits.h" |
| #include "trace.h" |
| |
| #define LIMIT_CACHE_CTX (1U << 7) |
| #define LIMIT_CACHE_IOT (1U << 20) |
| |
| /* Physical page number coversions */ |
| #define PPN_PHYS(ppn) ((ppn) << TARGET_PAGE_BITS) |
| #define PPN_DOWN(phy) ((phy) >> TARGET_PAGE_BITS) |
| |
| typedef struct RISCVIOMMUContext RISCVIOMMUContext; |
| typedef struct RISCVIOMMUEntry RISCVIOMMUEntry; |
| |
| /* Device assigned I/O address space */ |
| struct RISCVIOMMUSpace { |
| IOMMUMemoryRegion iova_mr; /* IOVA memory region for attached device */ |
| AddressSpace iova_as; /* IOVA address space for attached device */ |
| RISCVIOMMUState *iommu; /* Managing IOMMU device state */ |
| uint32_t devid; /* Requester identifier, AKA device_id */ |
| bool notifier; /* IOMMU unmap notifier enabled */ |
| QLIST_ENTRY(RISCVIOMMUSpace) list; |
| }; |
| |
| /* Device translation context state. */ |
| struct RISCVIOMMUContext { |
| uint64_t devid:24; /* Requester Id, AKA device_id */ |
| uint64_t process_id:20; /* Process ID. PASID for PCIe */ |
| uint64_t tc; /* Translation Control */ |
| uint64_t ta; /* Translation Attributes */ |
| uint64_t satp; /* S-Stage address translation and protection */ |
| uint64_t gatp; /* G-Stage address translation and protection */ |
| uint64_t msi_addr_mask; /* MSI filtering - address mask */ |
| uint64_t msi_addr_pattern; /* MSI filtering - address pattern */ |
| uint64_t msiptp; /* MSI redirection page table pointer */ |
| }; |
| |
| /* IOMMU index for transactions without process_id specified. */ |
| #define RISCV_IOMMU_NOPROCID 0 |
| |
| static uint8_t riscv_iommu_get_icvec_vector(uint32_t icvec, uint32_t vec_type) |
| { |
| switch (vec_type) { |
| case RISCV_IOMMU_INTR_CQ: |
| return icvec & RISCV_IOMMU_ICVEC_CIV; |
| case RISCV_IOMMU_INTR_FQ: |
| return (icvec & RISCV_IOMMU_ICVEC_FIV) >> 4; |
| case RISCV_IOMMU_INTR_PM: |
| return (icvec & RISCV_IOMMU_ICVEC_PMIV) >> 8; |
| case RISCV_IOMMU_INTR_PQ: |
| return (icvec & RISCV_IOMMU_ICVEC_PIV) >> 12; |
| default: |
| g_assert_not_reached(); |
| } |
| } |
| |
| static void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type) |
| { |
| const uint32_t fctl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FCTL); |
| uint32_t ipsr, icvec, vector; |
| |
| if (fctl & RISCV_IOMMU_FCTL_WSI || !s->notify) { |
| return; |
| } |
| |
| icvec = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_ICVEC); |
| ipsr = riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, (1 << vec_type), 0); |
| |
| if (!(ipsr & (1 << vec_type))) { |
| vector = riscv_iommu_get_icvec_vector(icvec, vec_type); |
| s->notify(s, vector); |
| trace_riscv_iommu_notify_int_vector(vec_type, vector); |
| } |
| } |
| |
| static void riscv_iommu_fault(RISCVIOMMUState *s, |
| struct riscv_iommu_fq_record *ev) |
| { |
| uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); |
| uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQH) & s->fq_mask; |
| uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQT) & s->fq_mask; |
| uint32_t next = (tail + 1) & s->fq_mask; |
| uint32_t devid = get_field(ev->hdr, RISCV_IOMMU_FQ_HDR_DID); |
| |
| trace_riscv_iommu_flt(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid), |
| PCI_FUNC(devid), ev->hdr, ev->iotval); |
| |
| if (!(ctrl & RISCV_IOMMU_FQCSR_FQON) || |
| !!(ctrl & (RISCV_IOMMU_FQCSR_FQOF | RISCV_IOMMU_FQCSR_FQMF))) { |
| return; |
| } |
| |
| if (head == next) { |
| riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, |
| RISCV_IOMMU_FQCSR_FQOF, 0); |
| } else { |
| dma_addr_t addr = s->fq_addr + tail * sizeof(*ev); |
| if (dma_memory_write(s->target_as, addr, ev, sizeof(*ev), |
| MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { |
| riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, |
| RISCV_IOMMU_FQCSR_FQMF, 0); |
| } else { |
| riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_FQT, next); |
| } |
| } |
| |
| if (ctrl & RISCV_IOMMU_FQCSR_FIE) { |
| riscv_iommu_notify(s, RISCV_IOMMU_INTR_FQ); |
| } |
| } |
| |
| static void riscv_iommu_pri(RISCVIOMMUState *s, |
| struct riscv_iommu_pq_record *pr) |
| { |
| uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); |
| uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQH) & s->pq_mask; |
| uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQT) & s->pq_mask; |
| uint32_t next = (tail + 1) & s->pq_mask; |
| uint32_t devid = get_field(pr->hdr, RISCV_IOMMU_PREQ_HDR_DID); |
| |
| trace_riscv_iommu_pri(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid), |
| PCI_FUNC(devid), pr->payload); |
| |
| if (!(ctrl & RISCV_IOMMU_PQCSR_PQON) || |
| !!(ctrl & (RISCV_IOMMU_PQCSR_PQOF | RISCV_IOMMU_PQCSR_PQMF))) { |
| return; |
| } |
| |
| if (head == next) { |
| riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, |
| RISCV_IOMMU_PQCSR_PQOF, 0); |
| } else { |
| dma_addr_t addr = s->pq_addr + tail * sizeof(*pr); |
| if (dma_memory_write(s->target_as, addr, pr, sizeof(*pr), |
| MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { |
| riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, |
| RISCV_IOMMU_PQCSR_PQMF, 0); |
| } else { |
| riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_PQT, next); |
| } |
| } |
| |
| if (ctrl & RISCV_IOMMU_PQCSR_PIE) { |
| riscv_iommu_notify(s, RISCV_IOMMU_INTR_PQ); |
| } |
| } |
| |
| /* Portable implementation of pext_u64, bit-mask extraction. */ |
| static uint64_t _pext_u64(uint64_t val, uint64_t ext) |
| { |
| uint64_t ret = 0; |
| uint64_t rot = 1; |
| |
| while (ext) { |
| if (ext & 1) { |
| if (val & 1) { |
| ret |= rot; |
| } |
| rot <<= 1; |
| } |
| val >>= 1; |
| ext >>= 1; |
| } |
| |
| return ret; |
| } |
| |
| /* Check if GPA matches MSI/MRIF pattern. */ |
| static bool riscv_iommu_msi_check(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, |
| dma_addr_t gpa) |
| { |
| if (!s->enable_msi) { |
| return false; |
| } |
| |
| if (get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE) != |
| RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) { |
| return false; /* Invalid MSI/MRIF mode */ |
| } |
| |
| if ((PPN_DOWN(gpa) ^ ctx->msi_addr_pattern) & ~ctx->msi_addr_mask) { |
| return false; /* GPA not in MSI range defined by AIA IMSIC rules. */ |
| } |
| |
| return true; |
| } |
| |
| /* |
| * RISCV IOMMU Address Translation Lookup - Page Table Walk |
| * |
| * Note: Code is based on get_physical_address() from target/riscv/cpu_helper.c |
| * Both implementation can be merged into single helper function in future. |
| * Keeping them separate for now, as error reporting and flow specifics are |
| * sufficiently different for separate implementation. |
| * |
| * @s : IOMMU Device State |
| * @ctx : Translation context for device id and process address space id. |
| * @iotlb : translation data: physical address and access mode. |
| * @return : success or fault cause code. |
| */ |
| static int riscv_iommu_spa_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, |
| IOMMUTLBEntry *iotlb) |
| { |
| dma_addr_t addr, base; |
| uint64_t satp, gatp, pte; |
| bool en_s, en_g; |
| struct { |
| unsigned char step; |
| unsigned char levels; |
| unsigned char ptidxbits; |
| unsigned char ptesize; |
| } sc[2]; |
| /* Translation stage phase */ |
| enum { |
| S_STAGE = 0, |
| G_STAGE = 1, |
| } pass; |
| MemTxResult ret; |
| |
| satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD); |
| gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); |
| |
| en_s = satp != RISCV_IOMMU_DC_FSC_MODE_BARE; |
| en_g = gatp != RISCV_IOMMU_DC_IOHGATP_MODE_BARE; |
| |
| /* |
| * Early check for MSI address match when IOVA == GPA. |
| * Note that the (!en_s) condition means that the MSI |
| * page table may only be used when guest pages are |
| * mapped using the g-stage page table, whether single- |
| * or two-stage paging is enabled. It's unavoidable though, |
| * because the spec mandates that we do a first-stage |
| * translation before we check the MSI page table, which |
| * means we can't do an early MSI check unless we have |
| * strictly !en_s. |
| */ |
| if (!en_s && (iotlb->perm & IOMMU_WO) && |
| riscv_iommu_msi_check(s, ctx, iotlb->iova)) { |
| iotlb->target_as = &s->trap_as; |
| iotlb->translated_addr = iotlb->iova; |
| iotlb->addr_mask = ~TARGET_PAGE_MASK; |
| return 0; |
| } |
| |
| /* Exit early for pass-through mode. */ |
| if (!(en_s || en_g)) { |
| iotlb->translated_addr = iotlb->iova; |
| iotlb->addr_mask = ~TARGET_PAGE_MASK; |
| /* Allow R/W in pass-through mode */ |
| iotlb->perm = IOMMU_RW; |
| return 0; |
| } |
| |
| /* S/G translation parameters. */ |
| for (pass = 0; pass < 2; pass++) { |
| uint32_t sv_mode; |
| |
| sc[pass].step = 0; |
| if (pass ? (s->fctl & RISCV_IOMMU_FCTL_GXL) : |
| (ctx->tc & RISCV_IOMMU_DC_TC_SXL)) { |
| /* 32bit mode for GXL/SXL == 1 */ |
| switch (pass ? gatp : satp) { |
| case RISCV_IOMMU_DC_IOHGATP_MODE_BARE: |
| sc[pass].levels = 0; |
| sc[pass].ptidxbits = 0; |
| sc[pass].ptesize = 0; |
| break; |
| case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4: |
| sv_mode = pass ? RISCV_IOMMU_CAP_SV32X4 : RISCV_IOMMU_CAP_SV32; |
| if (!(s->cap & sv_mode)) { |
| return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; |
| } |
| sc[pass].levels = 2; |
| sc[pass].ptidxbits = 10; |
| sc[pass].ptesize = 4; |
| break; |
| default: |
| return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; |
| } |
| } else { |
| /* 64bit mode for GXL/SXL == 0 */ |
| switch (pass ? gatp : satp) { |
| case RISCV_IOMMU_DC_IOHGATP_MODE_BARE: |
| sc[pass].levels = 0; |
| sc[pass].ptidxbits = 0; |
| sc[pass].ptesize = 0; |
| break; |
| case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4: |
| sv_mode = pass ? RISCV_IOMMU_CAP_SV39X4 : RISCV_IOMMU_CAP_SV39; |
| if (!(s->cap & sv_mode)) { |
| return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; |
| } |
| sc[pass].levels = 3; |
| sc[pass].ptidxbits = 9; |
| sc[pass].ptesize = 8; |
| break; |
| case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4: |
| sv_mode = pass ? RISCV_IOMMU_CAP_SV48X4 : RISCV_IOMMU_CAP_SV48; |
| if (!(s->cap & sv_mode)) { |
| return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; |
| } |
| sc[pass].levels = 4; |
| sc[pass].ptidxbits = 9; |
| sc[pass].ptesize = 8; |
| break; |
| case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4: |
| sv_mode = pass ? RISCV_IOMMU_CAP_SV57X4 : RISCV_IOMMU_CAP_SV57; |
| if (!(s->cap & sv_mode)) { |
| return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; |
| } |
| sc[pass].levels = 5; |
| sc[pass].ptidxbits = 9; |
| sc[pass].ptesize = 8; |
| break; |
| default: |
| return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; |
| } |
| } |
| }; |
| |
| /* S/G stages translation tables root pointers */ |
| gatp = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD)); |
| satp = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_ATP_PPN_FIELD)); |
| addr = (en_s && en_g) ? satp : iotlb->iova; |
| base = en_g ? gatp : satp; |
| pass = en_g ? G_STAGE : S_STAGE; |
| |
| do { |
| const unsigned widened = (pass && !sc[pass].step) ? 2 : 0; |
| const unsigned va_bits = widened + sc[pass].ptidxbits; |
| const unsigned va_skip = TARGET_PAGE_BITS + sc[pass].ptidxbits * |
| (sc[pass].levels - 1 - sc[pass].step); |
| const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1); |
| const dma_addr_t pte_addr = base + idx * sc[pass].ptesize; |
| const bool ade = |
| ctx->tc & (pass ? RISCV_IOMMU_DC_TC_GADE : RISCV_IOMMU_DC_TC_SADE); |
| |
| /* Address range check before first level lookup */ |
| if (!sc[pass].step) { |
| const uint64_t va_mask = (1ULL << (va_skip + va_bits)) - 1; |
| if ((addr & va_mask) != addr) { |
| return RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED; |
| } |
| } |
| |
| /* Read page table entry */ |
| if (sc[pass].ptesize == 4) { |
| uint32_t pte32 = 0; |
| ret = ldl_le_dma(s->target_as, pte_addr, &pte32, |
| MEMTXATTRS_UNSPECIFIED); |
| pte = pte32; |
| } else { |
| ret = ldq_le_dma(s->target_as, pte_addr, &pte, |
| MEMTXATTRS_UNSPECIFIED); |
| } |
| if (ret != MEMTX_OK) { |
| return (iotlb->perm & IOMMU_WO) ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT |
| : RISCV_IOMMU_FQ_CAUSE_RD_FAULT; |
| } |
| |
| sc[pass].step++; |
| hwaddr ppn = pte >> PTE_PPN_SHIFT; |
| |
| if (!(pte & PTE_V)) { |
| break; /* Invalid PTE */ |
| } else if (!(pte & (PTE_R | PTE_W | PTE_X))) { |
| base = PPN_PHYS(ppn); /* Inner PTE, continue walking */ |
| } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) { |
| break; /* Reserved leaf PTE flags: PTE_W */ |
| } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) { |
| break; /* Reserved leaf PTE flags: PTE_W + PTE_X */ |
| } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) { |
| break; /* Misaligned PPN */ |
| } else if ((iotlb->perm & IOMMU_RO) && !(pte & PTE_R)) { |
| break; /* Read access check failed */ |
| } else if ((iotlb->perm & IOMMU_WO) && !(pte & PTE_W)) { |
| break; /* Write access check failed */ |
| } else if ((iotlb->perm & IOMMU_RO) && !ade && !(pte & PTE_A)) { |
| break; /* Access bit not set */ |
| } else if ((iotlb->perm & IOMMU_WO) && !ade && !(pte & PTE_D)) { |
| break; /* Dirty bit not set */ |
| } else { |
| /* Leaf PTE, translation completed. */ |
| sc[pass].step = sc[pass].levels; |
| base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1)); |
| /* Update address mask based on smallest translation granularity */ |
| iotlb->addr_mask &= (1ULL << va_skip) - 1; |
| /* Continue with S-Stage translation? */ |
| if (pass && sc[0].step != sc[0].levels) { |
| pass = S_STAGE; |
| addr = iotlb->iova; |
| continue; |
| } |
| /* Translation phase completed (GPA or SPA) */ |
| iotlb->translated_addr = base; |
| iotlb->perm = (pte & PTE_W) ? ((pte & PTE_R) ? IOMMU_RW : IOMMU_WO) |
| : IOMMU_RO; |
| |
| /* Check MSI GPA address match */ |
| if (pass == S_STAGE && (iotlb->perm & IOMMU_WO) && |
| riscv_iommu_msi_check(s, ctx, base)) { |
| /* Trap MSI writes and return GPA address. */ |
| iotlb->target_as = &s->trap_as; |
| iotlb->addr_mask = ~TARGET_PAGE_MASK; |
| return 0; |
| } |
| |
| /* Continue with G-Stage translation? */ |
| if (!pass && en_g) { |
| pass = G_STAGE; |
| addr = base; |
| base = gatp; |
| sc[pass].step = 0; |
| continue; |
| } |
| |
| return 0; |
| } |
| |
| if (sc[pass].step == sc[pass].levels) { |
| break; /* Can't find leaf PTE */ |
| } |
| |
| /* Continue with G-Stage translation? */ |
| if (!pass && en_g) { |
| pass = G_STAGE; |
| addr = base; |
| base = gatp; |
| sc[pass].step = 0; |
| } |
| } while (1); |
| |
| return (iotlb->perm & IOMMU_WO) ? |
| (pass ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS : |
| RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S) : |
| (pass ? RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS : |
| RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S); |
| } |
| |
| static void riscv_iommu_report_fault(RISCVIOMMUState *s, |
| RISCVIOMMUContext *ctx, |
| uint32_t fault_type, uint32_t cause, |
| bool pv, |
| uint64_t iotval, uint64_t iotval2) |
| { |
| struct riscv_iommu_fq_record ev = { 0 }; |
| |
| if (ctx->tc & RISCV_IOMMU_DC_TC_DTF) { |
| switch (cause) { |
| case RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED: |
| case RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT: |
| case RISCV_IOMMU_FQ_CAUSE_DDT_INVALID: |
| case RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED: |
| case RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED: |
| case RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR: |
| case RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT: |
| break; |
| default: |
| /* DTF prevents reporting a fault for this given cause */ |
| return; |
| } |
| } |
| |
| ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_CAUSE, cause); |
| ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_TTYPE, fault_type); |
| ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_DID, ctx->devid); |
| ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PV, true); |
| |
| if (pv) { |
| ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PID, ctx->process_id); |
| } |
| |
| ev.iotval = iotval; |
| ev.iotval2 = iotval2; |
| |
| riscv_iommu_fault(s, &ev); |
| } |
| |
| /* Redirect MSI write for given GPA. */ |
| static MemTxResult riscv_iommu_msi_write(RISCVIOMMUState *s, |
| RISCVIOMMUContext *ctx, uint64_t gpa, uint64_t data, |
| unsigned size, MemTxAttrs attrs) |
| { |
| MemTxResult res; |
| dma_addr_t addr; |
| uint64_t intn; |
| uint32_t n190; |
| uint64_t pte[2]; |
| int fault_type = RISCV_IOMMU_FQ_TTYPE_UADDR_WR; |
| int cause; |
| |
| /* Interrupt File Number */ |
| intn = _pext_u64(PPN_DOWN(gpa), ctx->msi_addr_mask); |
| if (intn >= 256) { |
| /* Interrupt file number out of range */ |
| res = MEMTX_ACCESS_ERROR; |
| cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; |
| goto err; |
| } |
| |
| /* fetch MSI PTE */ |
| addr = PPN_PHYS(get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_PPN)); |
| addr = addr | (intn * sizeof(pte)); |
| res = dma_memory_read(s->target_as, addr, &pte, sizeof(pte), |
| MEMTXATTRS_UNSPECIFIED); |
| if (res != MEMTX_OK) { |
| if (res == MEMTX_DECODE_ERROR) { |
| cause = RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED; |
| } else { |
| cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; |
| } |
| goto err; |
| } |
| |
| le64_to_cpus(&pte[0]); |
| le64_to_cpus(&pte[1]); |
| |
| if (!(pte[0] & RISCV_IOMMU_MSI_PTE_V) || (pte[0] & RISCV_IOMMU_MSI_PTE_C)) { |
| /* |
| * The spec mentions that: "If msipte.C == 1, then further |
| * processing to interpret the PTE is implementation |
| * defined.". We'll abort with cause = 262 for this |
| * case too. |
| */ |
| res = MEMTX_ACCESS_ERROR; |
| cause = RISCV_IOMMU_FQ_CAUSE_MSI_INVALID; |
| goto err; |
| } |
| |
| switch (get_field(pte[0], RISCV_IOMMU_MSI_PTE_M)) { |
| case RISCV_IOMMU_MSI_PTE_M_BASIC: |
| /* MSI Pass-through mode */ |
| addr = PPN_PHYS(get_field(pte[0], RISCV_IOMMU_MSI_PTE_PPN)); |
| |
| trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid), |
| PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid), |
| gpa, addr); |
| |
| res = dma_memory_write(s->target_as, addr, &data, size, attrs); |
| if (res != MEMTX_OK) { |
| cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; |
| goto err; |
| } |
| |
| return MEMTX_OK; |
| case RISCV_IOMMU_MSI_PTE_M_MRIF: |
| /* MRIF mode, continue. */ |
| break; |
| default: |
| res = MEMTX_ACCESS_ERROR; |
| cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED; |
| goto err; |
| } |
| |
| /* |
| * Report an error for interrupt identities exceeding the maximum allowed |
| * for an IMSIC interrupt file (2047) or destination address is not 32-bit |
| * aligned. See IOMMU Specification, Chapter 2.3. MSI page tables. |
| */ |
| if ((data > 2047) || (gpa & 3)) { |
| res = MEMTX_ACCESS_ERROR; |
| cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED; |
| goto err; |
| } |
| |
| /* MSI MRIF mode, non atomic pending bit update */ |
| |
| /* MRIF pending bit address */ |
| addr = get_field(pte[0], RISCV_IOMMU_MSI_PTE_MRIF_ADDR) << 9; |
| addr = addr | ((data & 0x7c0) >> 3); |
| |
| trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid), |
| PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid), |
| gpa, addr); |
| |
| /* MRIF pending bit mask */ |
| data = 1ULL << (data & 0x03f); |
| res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs); |
| if (res != MEMTX_OK) { |
| cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; |
| goto err; |
| } |
| |
| intn = intn | data; |
| res = dma_memory_write(s->target_as, addr, &intn, sizeof(intn), attrs); |
| if (res != MEMTX_OK) { |
| cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; |
| goto err; |
| } |
| |
| /* Get MRIF enable bits */ |
| addr = addr + sizeof(intn); |
| res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs); |
| if (res != MEMTX_OK) { |
| cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; |
| goto err; |
| } |
| |
| if (!(intn & data)) { |
| /* notification disabled, MRIF update completed. */ |
| return MEMTX_OK; |
| } |
| |
| /* Send notification message */ |
| addr = PPN_PHYS(get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NPPN)); |
| n190 = get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID) | |
| (get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID_MSB) << 10); |
| |
| res = dma_memory_write(s->target_as, addr, &n190, sizeof(n190), attrs); |
| if (res != MEMTX_OK) { |
| cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; |
| goto err; |
| } |
| |
| trace_riscv_iommu_mrif_notification(s->parent_obj.id, n190, addr); |
| |
| return MEMTX_OK; |
| |
| err: |
| riscv_iommu_report_fault(s, ctx, fault_type, cause, |
| !!ctx->process_id, 0, 0); |
| return res; |
| } |
| |
| /* |
| * Check device context configuration as described by the |
| * riscv-iommu spec section "Device-context configuration |
| * checks". |
| */ |
| static bool riscv_iommu_validate_device_ctx(RISCVIOMMUState *s, |
| RISCVIOMMUContext *ctx) |
| { |
| uint32_t fsc_mode, msi_mode; |
| |
| if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI) && |
| ctx->tc & RISCV_IOMMU_DC_TC_PRPR) { |
| return false; |
| } |
| |
| if (!(s->cap & RISCV_IOMMU_CAP_T2GPA) && |
| ctx->tc & RISCV_IOMMU_DC_TC_T2GPA) { |
| return false; |
| } |
| |
| if (s->cap & RISCV_IOMMU_CAP_MSI_FLAT) { |
| msi_mode = get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE); |
| |
| if (msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_OFF && |
| msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) { |
| return false; |
| } |
| } |
| |
| fsc_mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); |
| |
| if (ctx->tc & RISCV_IOMMU_DC_TC_PDTV) { |
| switch (fsc_mode) { |
| case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8: |
| if (!(s->cap & RISCV_IOMMU_CAP_PD8)) { |
| return false; |
| } |
| break; |
| case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17: |
| if (!(s->cap & RISCV_IOMMU_CAP_PD17)) { |
| return false; |
| } |
| break; |
| case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20: |
| if (!(s->cap & RISCV_IOMMU_CAP_PD20)) { |
| return false; |
| } |
| break; |
| } |
| } else { |
| /* DC.tc.PDTV is 0 */ |
| if (ctx->tc & RISCV_IOMMU_DC_TC_DPE) { |
| return false; |
| } |
| |
| if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) { |
| if (fsc_mode == RISCV_IOMMU_CAP_SV32 && |
| !(s->cap & RISCV_IOMMU_CAP_SV32)) { |
| return false; |
| } |
| } else { |
| switch (fsc_mode) { |
| case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: |
| if (!(s->cap & RISCV_IOMMU_CAP_SV39)) { |
| return false; |
| } |
| break; |
| case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: |
| if (!(s->cap & RISCV_IOMMU_CAP_SV48)) { |
| return false; |
| } |
| break; |
| case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: |
| if (!(s->cap & RISCV_IOMMU_CAP_SV57)) { |
| return false; |
| } |
| break; |
| } |
| } |
| } |
| |
| /* |
| * CAP_END is always zero (only one endianess). FCTL_BE is |
| * always zero (little-endian accesses). Thus TC_SBE must |
| * always be LE, i.e. zero. |
| */ |
| if (ctx->tc & RISCV_IOMMU_DC_TC_SBE) { |
| return false; |
| } |
| |
| return true; |
| } |
| |
| /* |
| * Validate process context (PC) according to section |
| * "Process-context configuration checks". |
| */ |
| static bool riscv_iommu_validate_process_ctx(RISCVIOMMUState *s, |
| RISCVIOMMUContext *ctx) |
| { |
| uint32_t mode; |
| |
| if (get_field(ctx->ta, RISCV_IOMMU_PC_TA_RESERVED)) { |
| return false; |
| } |
| |
| if (get_field(ctx->satp, RISCV_IOMMU_PC_FSC_RESERVED)) { |
| return false; |
| } |
| |
| mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); |
| switch (mode) { |
| case RISCV_IOMMU_DC_FSC_MODE_BARE: |
| /* sv39 and sv32 modes have the same value (8) */ |
| case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: |
| case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: |
| case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: |
| break; |
| default: |
| return false; |
| } |
| |
| if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) { |
| if (mode == RISCV_IOMMU_CAP_SV32 && |
| !(s->cap & RISCV_IOMMU_CAP_SV32)) { |
| return false; |
| } |
| } else { |
| switch (mode) { |
| case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: |
| if (!(s->cap & RISCV_IOMMU_CAP_SV39)) { |
| return false; |
| } |
| break; |
| case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: |
| if (!(s->cap & RISCV_IOMMU_CAP_SV48)) { |
| return false; |
| } |
| break; |
| case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: |
| if (!(s->cap & RISCV_IOMMU_CAP_SV57)) { |
| return false; |
| } |
| break; |
| } |
| } |
| |
| return true; |
| } |
| |
| /* |
| * RISC-V IOMMU Device Context Loopkup - Device Directory Tree Walk |
| * |
| * @s : IOMMU Device State |
| * @ctx : Device Translation Context with devid and process_id set. |
| * @return : success or fault code. |
| */ |
| static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx) |
| { |
| const uint64_t ddtp = s->ddtp; |
| unsigned mode = get_field(ddtp, RISCV_IOMMU_DDTP_MODE); |
| dma_addr_t addr = PPN_PHYS(get_field(ddtp, RISCV_IOMMU_DDTP_PPN)); |
| struct riscv_iommu_dc dc; |
| /* Device Context format: 0: extended (64 bytes) | 1: base (32 bytes) */ |
| const int dc_fmt = !s->enable_msi; |
| const size_t dc_len = sizeof(dc) >> dc_fmt; |
| unsigned depth; |
| uint64_t de; |
| |
| switch (mode) { |
| case RISCV_IOMMU_DDTP_MODE_OFF: |
| return RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED; |
| |
| case RISCV_IOMMU_DDTP_MODE_BARE: |
| /* mock up pass-through translation context */ |
| ctx->gatp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD, |
| RISCV_IOMMU_DC_IOHGATP_MODE_BARE); |
| ctx->satp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD, |
| RISCV_IOMMU_DC_FSC_MODE_BARE); |
| ctx->tc = RISCV_IOMMU_DC_TC_V; |
| ctx->ta = 0; |
| ctx->msiptp = 0; |
| return 0; |
| |
| case RISCV_IOMMU_DDTP_MODE_1LVL: |
| depth = 0; |
| break; |
| |
| case RISCV_IOMMU_DDTP_MODE_2LVL: |
| depth = 1; |
| break; |
| |
| case RISCV_IOMMU_DDTP_MODE_3LVL: |
| depth = 2; |
| break; |
| |
| default: |
| return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; |
| } |
| |
| /* |
| * Check supported device id width (in bits). |
| * See IOMMU Specification, Chapter 6. Software guidelines. |
| * - if extended device-context format is used: |
| * 1LVL: 6, 2LVL: 15, 3LVL: 24 |
| * - if base device-context format is used: |
| * 1LVL: 7, 2LVL: 16, 3LVL: 24 |
| */ |
| if (ctx->devid >= (1 << (depth * 9 + 6 + (dc_fmt && depth != 2)))) { |
| return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; |
| } |
| |
| /* Device directory tree walk */ |
| for (; depth-- > 0; ) { |
| /* |
| * Select device id index bits based on device directory tree level |
| * and device context format. |
| * See IOMMU Specification, Chapter 2. Data Structures. |
| * - if extended device-context format is used: |
| * device index: [23:15][14:6][5:0] |
| * - if base device-context format is used: |
| * device index: [23:16][15:7][6:0] |
| */ |
| const int split = depth * 9 + 6 + dc_fmt; |
| addr |= ((ctx->devid >> split) << 3) & ~TARGET_PAGE_MASK; |
| if (dma_memory_read(s->target_as, addr, &de, sizeof(de), |
| MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { |
| return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT; |
| } |
| le64_to_cpus(&de); |
| if (!(de & RISCV_IOMMU_DDTE_VALID)) { |
| /* invalid directory entry */ |
| return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; |
| } |
| if (de & ~(RISCV_IOMMU_DDTE_PPN | RISCV_IOMMU_DDTE_VALID)) { |
| /* reserved bits set */ |
| return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; |
| } |
| addr = PPN_PHYS(get_field(de, RISCV_IOMMU_DDTE_PPN)); |
| } |
| |
| /* index into device context entry page */ |
| addr |= (ctx->devid * dc_len) & ~TARGET_PAGE_MASK; |
| |
| memset(&dc, 0, sizeof(dc)); |
| if (dma_memory_read(s->target_as, addr, &dc, dc_len, |
| MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { |
| return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT; |
| } |
| |
| /* Set translation context. */ |
| ctx->tc = le64_to_cpu(dc.tc); |
| ctx->gatp = le64_to_cpu(dc.iohgatp); |
| ctx->satp = le64_to_cpu(dc.fsc); |
| ctx->ta = le64_to_cpu(dc.ta); |
| ctx->msiptp = le64_to_cpu(dc.msiptp); |
| ctx->msi_addr_mask = le64_to_cpu(dc.msi_addr_mask); |
| ctx->msi_addr_pattern = le64_to_cpu(dc.msi_addr_pattern); |
| |
| if (!(ctx->tc & RISCV_IOMMU_DC_TC_V)) { |
| return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; |
| } |
| |
| if (!riscv_iommu_validate_device_ctx(s, ctx)) { |
| return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; |
| } |
| |
| /* FSC field checks */ |
| mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); |
| addr = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_DC_FSC_PPN)); |
| |
| if (!(ctx->tc & RISCV_IOMMU_DC_TC_PDTV)) { |
| if (ctx->process_id != RISCV_IOMMU_NOPROCID) { |
| /* PID is disabled */ |
| return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; |
| } |
| if (mode > RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57) { |
| /* Invalid translation mode */ |
| return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; |
| } |
| return 0; |
| } |
| |
| if (ctx->process_id == RISCV_IOMMU_NOPROCID) { |
| if (!(ctx->tc & RISCV_IOMMU_DC_TC_DPE)) { |
| /* No default process_id enabled, set BARE mode */ |
| ctx->satp = 0ULL; |
| return 0; |
| } else { |
| /* Use default process_id #0 */ |
| ctx->process_id = 0; |
| } |
| } |
| |
| if (mode == RISCV_IOMMU_DC_FSC_MODE_BARE) { |
| /* No S-Stage translation, done. */ |
| return 0; |
| } |
| |
| /* FSC.TC.PDTV enabled */ |
| if (mode > RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20) { |
| /* Invalid PDTP.MODE */ |
| return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED; |
| } |
| |
| for (depth = mode - RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8; depth-- > 0; ) { |
| /* |
| * Select process id index bits based on process directory tree |
| * level. See IOMMU Specification, 2.2. Process-Directory-Table. |
| */ |
| const int split = depth * 9 + 8; |
| addr |= ((ctx->process_id >> split) << 3) & ~TARGET_PAGE_MASK; |
| if (dma_memory_read(s->target_as, addr, &de, sizeof(de), |
| MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { |
| return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; |
| } |
| le64_to_cpus(&de); |
| if (!(de & RISCV_IOMMU_PC_TA_V)) { |
| return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID; |
| } |
| addr = PPN_PHYS(get_field(de, RISCV_IOMMU_PC_FSC_PPN)); |
| } |
| |
| /* Leaf entry in PDT */ |
| addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK; |
| if (dma_memory_read(s->target_as, addr, &dc.ta, sizeof(uint64_t) * 2, |
| MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { |
| return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; |
| } |
| |
| /* Use FSC and TA from process directory entry. */ |
| ctx->ta = le64_to_cpu(dc.ta); |
| ctx->satp = le64_to_cpu(dc.fsc); |
| |
| if (!(ctx->ta & RISCV_IOMMU_PC_TA_V)) { |
| return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID; |
| } |
| |
| if (!riscv_iommu_validate_process_ctx(s, ctx)) { |
| return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED; |
| } |
| |
| return 0; |
| } |
| |
| /* Translation Context cache support */ |
| static gboolean riscv_iommu_ctx_equal(gconstpointer v1, gconstpointer v2) |
| { |
| RISCVIOMMUContext *c1 = (RISCVIOMMUContext *) v1; |
| RISCVIOMMUContext *c2 = (RISCVIOMMUContext *) v2; |
| return c1->devid == c2->devid && |
| c1->process_id == c2->process_id; |
| } |
| |
| static guint riscv_iommu_ctx_hash(gconstpointer v) |
| { |
| RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) v; |
| /* |
| * Generate simple hash of (process_id, devid) |
| * assuming 24-bit wide devid. |
| */ |
| return (guint)(ctx->devid) + ((guint)(ctx->process_id) << 24); |
| } |
| |
| static void riscv_iommu_ctx_inval_devid_procid(gpointer key, gpointer value, |
| gpointer data) |
| { |
| RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; |
| RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data; |
| if (ctx->tc & RISCV_IOMMU_DC_TC_V && |
| ctx->devid == arg->devid && |
| ctx->process_id == arg->process_id) { |
| ctx->tc &= ~RISCV_IOMMU_DC_TC_V; |
| } |
| } |
| |
| static void riscv_iommu_ctx_inval_devid(gpointer key, gpointer value, |
| gpointer data) |
| { |
| RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; |
| RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data; |
| if (ctx->tc & RISCV_IOMMU_DC_TC_V && |
| ctx->devid == arg->devid) { |
| ctx->tc &= ~RISCV_IOMMU_DC_TC_V; |
| } |
| } |
| |
| static void riscv_iommu_ctx_inval_all(gpointer key, gpointer value, |
| gpointer data) |
| { |
| RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; |
| if (ctx->tc & RISCV_IOMMU_DC_TC_V) { |
| ctx->tc &= ~RISCV_IOMMU_DC_TC_V; |
| } |
| } |
| |
| static void riscv_iommu_ctx_inval(RISCVIOMMUState *s, GHFunc func, |
| uint32_t devid, uint32_t process_id) |
| { |
| GHashTable *ctx_cache; |
| RISCVIOMMUContext key = { |
| .devid = devid, |
| .process_id = process_id, |
| }; |
| ctx_cache = g_hash_table_ref(s->ctx_cache); |
| g_hash_table_foreach(ctx_cache, func, &key); |
| g_hash_table_unref(ctx_cache); |
| } |
| |
| /* Find or allocate translation context for a given {device_id, process_id} */ |
| static RISCVIOMMUContext *riscv_iommu_ctx(RISCVIOMMUState *s, |
| unsigned devid, unsigned process_id, |
| void **ref) |
| { |
| GHashTable *ctx_cache; |
| RISCVIOMMUContext *ctx; |
| RISCVIOMMUContext key = { |
| .devid = devid, |
| .process_id = process_id, |
| }; |
| |
| ctx_cache = g_hash_table_ref(s->ctx_cache); |
| ctx = g_hash_table_lookup(ctx_cache, &key); |
| |
| if (ctx && (ctx->tc & RISCV_IOMMU_DC_TC_V)) { |
| *ref = ctx_cache; |
| return ctx; |
| } |
| |
| ctx = g_new0(RISCVIOMMUContext, 1); |
| ctx->devid = devid; |
| ctx->process_id = process_id; |
| |
| int fault = riscv_iommu_ctx_fetch(s, ctx); |
| if (!fault) { |
| if (g_hash_table_size(ctx_cache) >= LIMIT_CACHE_CTX) { |
| g_hash_table_unref(ctx_cache); |
| ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash, |
| riscv_iommu_ctx_equal, |
| g_free, NULL); |
| g_hash_table_ref(ctx_cache); |
| g_hash_table_unref(qatomic_xchg(&s->ctx_cache, ctx_cache)); |
| } |
| g_hash_table_add(ctx_cache, ctx); |
| *ref = ctx_cache; |
| return ctx; |
| } |
| |
| g_hash_table_unref(ctx_cache); |
| *ref = NULL; |
| |
| riscv_iommu_report_fault(s, ctx, RISCV_IOMMU_FQ_TTYPE_UADDR_RD, |
| fault, !!process_id, 0, 0); |
| |
| g_free(ctx); |
| return NULL; |
| } |
| |
| static void riscv_iommu_ctx_put(RISCVIOMMUState *s, void *ref) |
| { |
| if (ref) { |
| g_hash_table_unref((GHashTable *)ref); |
| } |
| } |
| |
| /* Find or allocate address space for a given device */ |
| static AddressSpace *riscv_iommu_space(RISCVIOMMUState *s, uint32_t devid) |
| { |
| RISCVIOMMUSpace *as; |
| |
| /* FIXME: PCIe bus remapping for attached endpoints. */ |
| devid |= s->bus << 8; |
| |
| QLIST_FOREACH(as, &s->spaces, list) { |
| if (as->devid == devid) { |
| break; |
| } |
| } |
| |
| if (as == NULL) { |
| char name[64]; |
| as = g_new0(RISCVIOMMUSpace, 1); |
| |
| as->iommu = s; |
| as->devid = devid; |
| |
| snprintf(name, sizeof(name), "riscv-iommu-%04x:%02x.%d-iova", |
| PCI_BUS_NUM(as->devid), PCI_SLOT(as->devid), PCI_FUNC(as->devid)); |
| |
| /* IOVA address space, untranslated addresses */ |
| memory_region_init_iommu(&as->iova_mr, sizeof(as->iova_mr), |
| TYPE_RISCV_IOMMU_MEMORY_REGION, |
| OBJECT(as), "riscv_iommu", UINT64_MAX); |
| address_space_init(&as->iova_as, MEMORY_REGION(&as->iova_mr), name); |
| |
| QLIST_INSERT_HEAD(&s->spaces, as, list); |
| |
| trace_riscv_iommu_new(s->parent_obj.id, PCI_BUS_NUM(as->devid), |
| PCI_SLOT(as->devid), PCI_FUNC(as->devid)); |
| } |
| return &as->iova_as; |
| } |
| |
| static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, |
| IOMMUTLBEntry *iotlb) |
| { |
| bool enable_pid; |
| bool enable_pri; |
| int fault; |
| |
| /* |
| * TC[32] is reserved for custom extensions, used here to temporarily |
| * enable automatic page-request generation for ATS queries. |
| */ |
| enable_pri = (iotlb->perm == IOMMU_NONE) && (ctx->tc & BIT_ULL(32)); |
| enable_pid = (ctx->tc & RISCV_IOMMU_DC_TC_PDTV); |
| |
| /* Translate using device directory / page table information. */ |
| fault = riscv_iommu_spa_fetch(s, ctx, iotlb); |
| |
| if (enable_pri && fault) { |
| struct riscv_iommu_pq_record pr = {0}; |
| if (enable_pid) { |
| pr.hdr = set_field(RISCV_IOMMU_PREQ_HDR_PV, |
| RISCV_IOMMU_PREQ_HDR_PID, ctx->process_id); |
| } |
| pr.hdr = set_field(pr.hdr, RISCV_IOMMU_PREQ_HDR_DID, ctx->devid); |
| pr.payload = (iotlb->iova & TARGET_PAGE_MASK) | |
| RISCV_IOMMU_PREQ_PAYLOAD_M; |
| riscv_iommu_pri(s, &pr); |
| return fault; |
| } |
| |
| if (fault) { |
| unsigned ttype; |
| |
| if (iotlb->perm & IOMMU_RW) { |
| ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_WR; |
| } else { |
| ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_RD; |
| } |
| |
| riscv_iommu_report_fault(s, ctx, ttype, fault, enable_pid, |
| iotlb->iova, iotlb->translated_addr); |
| return fault; |
| } |
| |
| return 0; |
| } |
| |
| /* IOMMU Command Interface */ |
| static MemTxResult riscv_iommu_iofence(RISCVIOMMUState *s, bool notify, |
| uint64_t addr, uint32_t data) |
| { |
| /* |
| * ATS processing in this implementation of the IOMMU is synchronous, |
| * no need to wait for completions here. |
| */ |
| if (!notify) { |
| return MEMTX_OK; |
| } |
| |
| return dma_memory_write(s->target_as, addr, &data, sizeof(data), |
| MEMTXATTRS_UNSPECIFIED); |
| } |
| |
| static void riscv_iommu_process_ddtp(RISCVIOMMUState *s) |
| { |
| uint64_t old_ddtp = s->ddtp; |
| uint64_t new_ddtp = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_DDTP); |
| unsigned new_mode = get_field(new_ddtp, RISCV_IOMMU_DDTP_MODE); |
| unsigned old_mode = get_field(old_ddtp, RISCV_IOMMU_DDTP_MODE); |
| bool ok = false; |
| |
| /* |
| * Check for allowed DDTP.MODE transitions: |
| * {OFF, BARE} -> {OFF, BARE, 1LVL, 2LVL, 3LVL} |
| * {1LVL, 2LVL, 3LVL} -> {OFF, BARE} |
| */ |
| if (new_mode == old_mode || |
| new_mode == RISCV_IOMMU_DDTP_MODE_OFF || |
| new_mode == RISCV_IOMMU_DDTP_MODE_BARE) { |
| ok = true; |
| } else if (new_mode == RISCV_IOMMU_DDTP_MODE_1LVL || |
| new_mode == RISCV_IOMMU_DDTP_MODE_2LVL || |
| new_mode == RISCV_IOMMU_DDTP_MODE_3LVL) { |
| ok = old_mode == RISCV_IOMMU_DDTP_MODE_OFF || |
| old_mode == RISCV_IOMMU_DDTP_MODE_BARE; |
| } |
| |
| if (ok) { |
| /* clear reserved and busy bits, report back sanitized version */ |
| new_ddtp = set_field(new_ddtp & RISCV_IOMMU_DDTP_PPN, |
| RISCV_IOMMU_DDTP_MODE, new_mode); |
| } else { |
| new_ddtp = old_ddtp; |
| } |
| s->ddtp = new_ddtp; |
| |
| riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, new_ddtp); |
| } |
| |
| /* Command function and opcode field. */ |
| #define RISCV_IOMMU_CMD(func, op) (((func) << 7) | (op)) |
| |
| static void riscv_iommu_process_cq_tail(RISCVIOMMUState *s) |
| { |
| struct riscv_iommu_command cmd; |
| MemTxResult res; |
| dma_addr_t addr; |
| uint32_t tail, head, ctrl; |
| uint64_t cmd_opcode; |
| GHFunc func; |
| |
| ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); |
| tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQT) & s->cq_mask; |
| head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQH) & s->cq_mask; |
| |
| /* Check for pending error or queue processing disabled */ |
| if (!(ctrl & RISCV_IOMMU_CQCSR_CQON) || |
| !!(ctrl & (RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CQMF))) { |
| return; |
| } |
| |
| while (tail != head) { |
| addr = s->cq_addr + head * sizeof(cmd); |
| res = dma_memory_read(s->target_as, addr, &cmd, sizeof(cmd), |
| MEMTXATTRS_UNSPECIFIED); |
| |
| if (res != MEMTX_OK) { |
| riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, |
| RISCV_IOMMU_CQCSR_CQMF, 0); |
| goto fault; |
| } |
| |
| trace_riscv_iommu_cmd(s->parent_obj.id, cmd.dword0, cmd.dword1); |
| |
| cmd_opcode = get_field(cmd.dword0, |
| RISCV_IOMMU_CMD_OPCODE | RISCV_IOMMU_CMD_FUNC); |
| |
| switch (cmd_opcode) { |
| case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOFENCE_FUNC_C, |
| RISCV_IOMMU_CMD_IOFENCE_OPCODE): |
| res = riscv_iommu_iofence(s, |
| cmd.dword0 & RISCV_IOMMU_CMD_IOFENCE_AV, cmd.dword1 << 2, |
| get_field(cmd.dword0, RISCV_IOMMU_CMD_IOFENCE_DATA)); |
| |
| if (res != MEMTX_OK) { |
| riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, |
| RISCV_IOMMU_CQCSR_CQMF, 0); |
| goto fault; |
| } |
| break; |
| |
| case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA, |
| RISCV_IOMMU_CMD_IOTINVAL_OPCODE): |
| if (cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV) { |
| /* illegal command arguments IOTINVAL.GVMA & PSCV == 1 */ |
| goto cmd_ill; |
| } |
| /* translation cache not implemented yet */ |
| break; |
| |
| case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA, |
| RISCV_IOMMU_CMD_IOTINVAL_OPCODE): |
| /* translation cache not implemented yet */ |
| break; |
| |
| case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT, |
| RISCV_IOMMU_CMD_IODIR_OPCODE): |
| if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) { |
| /* invalidate all device context cache mappings */ |
| func = riscv_iommu_ctx_inval_all; |
| } else { |
| /* invalidate all device context matching DID */ |
| func = riscv_iommu_ctx_inval_devid; |
| } |
| riscv_iommu_ctx_inval(s, func, |
| get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), 0); |
| break; |
| |
| case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT, |
| RISCV_IOMMU_CMD_IODIR_OPCODE): |
| if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) { |
| /* illegal command arguments IODIR_PDT & DV == 0 */ |
| goto cmd_ill; |
| } else { |
| func = riscv_iommu_ctx_inval_devid_procid; |
| } |
| riscv_iommu_ctx_inval(s, func, |
| get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), |
| get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_PID)); |
| break; |
| |
| default: |
| cmd_ill: |
| /* Invalid instruction, do not advance instruction index. */ |
| riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, |
| RISCV_IOMMU_CQCSR_CMD_ILL, 0); |
| goto fault; |
| } |
| |
| /* Advance and update head pointer after command completes. */ |
| head = (head + 1) & s->cq_mask; |
| riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_CQH, head); |
| } |
| return; |
| |
| fault: |
| if (ctrl & RISCV_IOMMU_CQCSR_CIE) { |
| riscv_iommu_notify(s, RISCV_IOMMU_INTR_CQ); |
| } |
| } |
| |
| static void riscv_iommu_process_cq_control(RISCVIOMMUState *s) |
| { |
| uint64_t base; |
| uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); |
| uint32_t ctrl_clr; |
| bool enable = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQEN); |
| bool active = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQON); |
| |
| if (enable && !active) { |
| base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_CQB); |
| s->cq_mask = (2ULL << get_field(base, RISCV_IOMMU_CQB_LOG2SZ)) - 1; |
| s->cq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_CQB_PPN)); |
| stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~s->cq_mask); |
| stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQH], 0); |
| stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQT], 0); |
| ctrl_set = RISCV_IOMMU_CQCSR_CQON; |
| ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQMF | |
| RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CMD_TO | |
| RISCV_IOMMU_CQCSR_FENCE_W_IP; |
| } else if (!enable && active) { |
| stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~0); |
| ctrl_set = 0; |
| ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQON; |
| } else { |
| ctrl_set = 0; |
| ctrl_clr = RISCV_IOMMU_CQCSR_BUSY; |
| } |
| |
| riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, ctrl_set, ctrl_clr); |
| } |
| |
| static void riscv_iommu_process_fq_control(RISCVIOMMUState *s) |
| { |
| uint64_t base; |
| uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); |
| uint32_t ctrl_clr; |
| bool enable = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQEN); |
| bool active = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQON); |
| |
| if (enable && !active) { |
| base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_FQB); |
| s->fq_mask = (2ULL << get_field(base, RISCV_IOMMU_FQB_LOG2SZ)) - 1; |
| s->fq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_FQB_PPN)); |
| stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~s->fq_mask); |
| stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQH], 0); |
| stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQT], 0); |
| ctrl_set = RISCV_IOMMU_FQCSR_FQON; |
| ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQMF | |
| RISCV_IOMMU_FQCSR_FQOF; |
| } else if (!enable && active) { |
| stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~0); |
| ctrl_set = 0; |
| ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQON; |
| } else { |
| ctrl_set = 0; |
| ctrl_clr = RISCV_IOMMU_FQCSR_BUSY; |
| } |
| |
| riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, ctrl_set, ctrl_clr); |
| } |
| |
| static void riscv_iommu_process_pq_control(RISCVIOMMUState *s) |
| { |
| uint64_t base; |
| uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); |
| uint32_t ctrl_clr; |
| bool enable = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQEN); |
| bool active = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQON); |
| |
| if (enable && !active) { |
| base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_PQB); |
| s->pq_mask = (2ULL << get_field(base, RISCV_IOMMU_PQB_LOG2SZ)) - 1; |
| s->pq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_PQB_PPN)); |
| stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~s->pq_mask); |
| stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQH], 0); |
| stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQT], 0); |
| ctrl_set = RISCV_IOMMU_PQCSR_PQON; |
| ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQMF | |
| RISCV_IOMMU_PQCSR_PQOF; |
| } else if (!enable && active) { |
| stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~0); |
| ctrl_set = 0; |
| ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQON; |
| } else { |
| ctrl_set = 0; |
| ctrl_clr = RISCV_IOMMU_PQCSR_BUSY; |
| } |
| |
| riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, ctrl_set, ctrl_clr); |
| } |
| |
| typedef void riscv_iommu_process_fn(RISCVIOMMUState *s); |
| |
| static void riscv_iommu_update_icvec(RISCVIOMMUState *s, uint64_t data) |
| { |
| uint64_t icvec = 0; |
| |
| icvec |= MIN(data & RISCV_IOMMU_ICVEC_CIV, |
| s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_CIV); |
| |
| icvec |= MIN(data & RISCV_IOMMU_ICVEC_FIV, |
| s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_FIV); |
| |
| icvec |= MIN(data & RISCV_IOMMU_ICVEC_PMIV, |
| s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PMIV); |
| |
| icvec |= MIN(data & RISCV_IOMMU_ICVEC_PIV, |
| s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PIV); |
| |
| trace_riscv_iommu_icvec_write(data, icvec); |
| |
| riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_ICVEC, icvec); |
| } |
| |
| static void riscv_iommu_update_ipsr(RISCVIOMMUState *s, uint64_t data) |
| { |
| uint32_t cqcsr, fqcsr, pqcsr; |
| uint32_t ipsr_set = 0; |
| uint32_t ipsr_clr = 0; |
| |
| if (data & RISCV_IOMMU_IPSR_CIP) { |
| cqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); |
| |
| if (cqcsr & RISCV_IOMMU_CQCSR_CIE && |
| (cqcsr & RISCV_IOMMU_CQCSR_FENCE_W_IP || |
| cqcsr & RISCV_IOMMU_CQCSR_CMD_ILL || |
| cqcsr & RISCV_IOMMU_CQCSR_CMD_TO || |
| cqcsr & RISCV_IOMMU_CQCSR_CQMF)) { |
| ipsr_set |= RISCV_IOMMU_IPSR_CIP; |
| } else { |
| ipsr_clr |= RISCV_IOMMU_IPSR_CIP; |
| } |
| } else { |
| ipsr_clr |= RISCV_IOMMU_IPSR_CIP; |
| } |
| |
| if (data & RISCV_IOMMU_IPSR_FIP) { |
| fqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); |
| |
| if (fqcsr & RISCV_IOMMU_FQCSR_FIE && |
| (fqcsr & RISCV_IOMMU_FQCSR_FQOF || |
| fqcsr & RISCV_IOMMU_FQCSR_FQMF)) { |
| ipsr_set |= RISCV_IOMMU_IPSR_FIP; |
| } else { |
| ipsr_clr |= RISCV_IOMMU_IPSR_FIP; |
| } |
| } else { |
| ipsr_clr |= RISCV_IOMMU_IPSR_FIP; |
| } |
| |
| if (data & RISCV_IOMMU_IPSR_PIP) { |
| pqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); |
| |
| if (pqcsr & RISCV_IOMMU_PQCSR_PIE && |
| (pqcsr & RISCV_IOMMU_PQCSR_PQOF || |
| pqcsr & RISCV_IOMMU_PQCSR_PQMF)) { |
| ipsr_set |= RISCV_IOMMU_IPSR_PIP; |
| } else { |
| ipsr_clr |= RISCV_IOMMU_IPSR_PIP; |
| } |
| } else { |
| ipsr_clr |= RISCV_IOMMU_IPSR_PIP; |
| } |
| |
| riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, ipsr_set, ipsr_clr); |
| } |
| |
| /* |
| * Write the resulting value of 'data' for the reg specified |
| * by 'reg_addr', after considering read-only/read-write/write-clear |
| * bits, in the pointer 'dest'. |
| * |
| * The result is written in little-endian. |
| */ |
| static void riscv_iommu_write_reg_val(RISCVIOMMUState *s, |
| void *dest, hwaddr reg_addr, |
| int size, uint64_t data) |
| { |
| uint64_t ro = ldn_le_p(&s->regs_ro[reg_addr], size); |
| uint64_t wc = ldn_le_p(&s->regs_wc[reg_addr], size); |
| uint64_t rw = ldn_le_p(&s->regs_rw[reg_addr], size); |
| |
| stn_le_p(dest, size, ((rw & ro) | (data & ~ro)) & ~(data & wc)); |
| } |
| |
| static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr, |
| uint64_t data, unsigned size, |
| MemTxAttrs attrs) |
| { |
| riscv_iommu_process_fn *process_fn = NULL; |
| RISCVIOMMUState *s = opaque; |
| uint32_t regb = addr & ~3; |
| uint32_t busy = 0; |
| uint64_t val = 0; |
| |
| if ((addr & (size - 1)) != 0) { |
| /* Unsupported MMIO alignment or access size */ |
| return MEMTX_ERROR; |
| } |
| |
| if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) { |
| /* Unsupported MMIO access location. */ |
| return MEMTX_ACCESS_ERROR; |
| } |
| |
| /* Track actionable MMIO write. */ |
| switch (regb) { |
| case RISCV_IOMMU_REG_DDTP: |
| case RISCV_IOMMU_REG_DDTP + 4: |
| process_fn = riscv_iommu_process_ddtp; |
| regb = RISCV_IOMMU_REG_DDTP; |
| busy = RISCV_IOMMU_DDTP_BUSY; |
| break; |
| |
| case RISCV_IOMMU_REG_CQT: |
| process_fn = riscv_iommu_process_cq_tail; |
| break; |
| |
| case RISCV_IOMMU_REG_CQCSR: |
| process_fn = riscv_iommu_process_cq_control; |
| busy = RISCV_IOMMU_CQCSR_BUSY; |
| break; |
| |
| case RISCV_IOMMU_REG_FQCSR: |
| process_fn = riscv_iommu_process_fq_control; |
| busy = RISCV_IOMMU_FQCSR_BUSY; |
| break; |
| |
| case RISCV_IOMMU_REG_PQCSR: |
| process_fn = riscv_iommu_process_pq_control; |
| busy = RISCV_IOMMU_PQCSR_BUSY; |
| break; |
| |
| case RISCV_IOMMU_REG_ICVEC: |
| case RISCV_IOMMU_REG_IPSR: |
| /* |
| * ICVEC and IPSR have special read/write procedures. We'll |
| * call their respective helpers and exit. |
| */ |
| riscv_iommu_write_reg_val(s, &val, addr, size, data); |
| |
| /* |
| * 'val' is stored as LE. Switch to host endianess |
| * before using it. |
| */ |
| val = le64_to_cpu(val); |
| |
| if (regb == RISCV_IOMMU_REG_ICVEC) { |
| riscv_iommu_update_icvec(s, val); |
| } else { |
| riscv_iommu_update_ipsr(s, val); |
| } |
| |
| return MEMTX_OK; |
| |
| default: |
| break; |
| } |
| |
| /* |
| * Registers update might be not synchronized with core logic. |
| * If system software updates register when relevant BUSY bit |
| * is set IOMMU behavior of additional writes to the register |
| * is UNSPECIFIED. |
| */ |
| riscv_iommu_write_reg_val(s, &s->regs_rw[addr], addr, size, data); |
| |
| /* Busy flag update, MSB 4-byte register. */ |
| if (busy) { |
| uint32_t rw = ldl_le_p(&s->regs_rw[regb]); |
| stl_le_p(&s->regs_rw[regb], rw | busy); |
| } |
| |
| if (process_fn) { |
| process_fn(s); |
| } |
| |
| return MEMTX_OK; |
| } |
| |
| static MemTxResult riscv_iommu_mmio_read(void *opaque, hwaddr addr, |
| uint64_t *data, unsigned size, MemTxAttrs attrs) |
| { |
| RISCVIOMMUState *s = opaque; |
| uint64_t val = -1; |
| uint8_t *ptr; |
| |
| if ((addr & (size - 1)) != 0) { |
| /* Unsupported MMIO alignment. */ |
| return MEMTX_ERROR; |
| } |
| |
| if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) { |
| return MEMTX_ACCESS_ERROR; |
| } |
| |
| ptr = &s->regs_rw[addr]; |
| val = ldn_le_p(ptr, size); |
| |
| *data = val; |
| |
| return MEMTX_OK; |
| } |
| |
| static const MemoryRegionOps riscv_iommu_mmio_ops = { |
| .read_with_attrs = riscv_iommu_mmio_read, |
| .write_with_attrs = riscv_iommu_mmio_write, |
| .endianness = DEVICE_NATIVE_ENDIAN, |
| .impl = { |
| .min_access_size = 4, |
| .max_access_size = 8, |
| .unaligned = false, |
| }, |
| .valid = { |
| .min_access_size = 4, |
| .max_access_size = 8, |
| } |
| }; |
| |
| /* |
| * Translations matching MSI pattern check are redirected to "riscv-iommu-trap" |
| * memory region as untranslated address, for additional MSI/MRIF interception |
| * by IOMMU interrupt remapping implementation. |
| * Note: Device emulation code generating an MSI is expected to provide a valid |
| * memory transaction attributes with requested_id set. |
| */ |
| static MemTxResult riscv_iommu_trap_write(void *opaque, hwaddr addr, |
| uint64_t data, unsigned size, MemTxAttrs attrs) |
| { |
| RISCVIOMMUState* s = (RISCVIOMMUState *)opaque; |
| RISCVIOMMUContext *ctx; |
| MemTxResult res; |
| void *ref; |
| uint32_t devid = attrs.requester_id; |
| |
| if (attrs.unspecified) { |
| return MEMTX_ACCESS_ERROR; |
| } |
| |
| /* FIXME: PCIe bus remapping for attached endpoints. */ |
| devid |= s->bus << 8; |
| |
| ctx = riscv_iommu_ctx(s, devid, 0, &ref); |
| if (ctx == NULL) { |
| res = MEMTX_ACCESS_ERROR; |
| } else { |
| res = riscv_iommu_msi_write(s, ctx, addr, data, size, attrs); |
| } |
| riscv_iommu_ctx_put(s, ref); |
| return res; |
| } |
| |
| static MemTxResult riscv_iommu_trap_read(void *opaque, hwaddr addr, |
| uint64_t *data, unsigned size, MemTxAttrs attrs) |
| { |
| return MEMTX_ACCESS_ERROR; |
| } |
| |
| static const MemoryRegionOps riscv_iommu_trap_ops = { |
| .read_with_attrs = riscv_iommu_trap_read, |
| .write_with_attrs = riscv_iommu_trap_write, |
| .endianness = DEVICE_LITTLE_ENDIAN, |
| .impl = { |
| .min_access_size = 4, |
| .max_access_size = 8, |
| .unaligned = true, |
| }, |
| .valid = { |
| .min_access_size = 4, |
| .max_access_size = 8, |
| } |
| }; |
| |
| static void riscv_iommu_realize(DeviceState *dev, Error **errp) |
| { |
| RISCVIOMMUState *s = RISCV_IOMMU(dev); |
| |
| s->cap = s->version & RISCV_IOMMU_CAP_VERSION; |
| if (s->enable_msi) { |
| s->cap |= RISCV_IOMMU_CAP_MSI_FLAT | RISCV_IOMMU_CAP_MSI_MRIF; |
| } |
| if (s->enable_s_stage) { |
| s->cap |= RISCV_IOMMU_CAP_SV32 | RISCV_IOMMU_CAP_SV39 | |
| RISCV_IOMMU_CAP_SV48 | RISCV_IOMMU_CAP_SV57; |
| } |
| if (s->enable_g_stage) { |
| s->cap |= RISCV_IOMMU_CAP_SV32X4 | RISCV_IOMMU_CAP_SV39X4 | |
| RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4; |
| } |
| /* Report QEMU target physical address space limits */ |
| s->cap = set_field(s->cap, RISCV_IOMMU_CAP_PAS, |
| TARGET_PHYS_ADDR_SPACE_BITS); |
| |
| /* TODO: method to report supported PID bits */ |
| s->pid_bits = 8; /* restricted to size of MemTxAttrs.pid */ |
| s->cap |= RISCV_IOMMU_CAP_PD8; |
| |
| /* Out-of-reset translation mode: OFF (DMA disabled) BARE (passthrough) */ |
| s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, s->enable_off ? |
| RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE); |
| |
| /* register storage */ |
| s->regs_rw = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); |
| s->regs_ro = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); |
| s->regs_wc = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); |
| |
| /* Mark all registers read-only */ |
| memset(s->regs_ro, 0xff, RISCV_IOMMU_REG_SIZE); |
| |
| /* |
| * Register complete MMIO space, including MSI/PBA registers. |
| * Note, PCIDevice implementation will add overlapping MR for MSI/PBA, |
| * managed directly by the PCIDevice implementation. |
| */ |
| memory_region_init_io(&s->regs_mr, OBJECT(dev), &riscv_iommu_mmio_ops, s, |
| "riscv-iommu-regs", RISCV_IOMMU_REG_SIZE); |
| |
| /* Set power-on register state */ |
| stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_CAP], s->cap); |
| stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_FCTL], 0); |
| stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FCTL], |
| ~(RISCV_IOMMU_FCTL_BE | RISCV_IOMMU_FCTL_WSI)); |
| stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_DDTP], |
| ~(RISCV_IOMMU_DDTP_PPN | RISCV_IOMMU_DDTP_MODE)); |
| stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQB], |
| ~(RISCV_IOMMU_CQB_LOG2SZ | RISCV_IOMMU_CQB_PPN)); |
| stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQB], |
| ~(RISCV_IOMMU_FQB_LOG2SZ | RISCV_IOMMU_FQB_PPN)); |
| stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQB], |
| ~(RISCV_IOMMU_PQB_LOG2SZ | RISCV_IOMMU_PQB_PPN)); |
| stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQMF | |
| RISCV_IOMMU_CQCSR_CMD_TO | RISCV_IOMMU_CQCSR_CMD_ILL); |
| stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQON | |
| RISCV_IOMMU_CQCSR_BUSY); |
| stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQMF | |
| RISCV_IOMMU_FQCSR_FQOF); |
| stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQON | |
| RISCV_IOMMU_FQCSR_BUSY); |
| stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQMF | |
| RISCV_IOMMU_PQCSR_PQOF); |
| stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQON | |
| RISCV_IOMMU_PQCSR_BUSY); |
| stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_IPSR], ~0); |
| stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_ICVEC], 0); |
| stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_DDTP], s->ddtp); |
| |
| /* Memory region for downstream access, if specified. */ |
| if (s->target_mr) { |
| s->target_as = g_new0(AddressSpace, 1); |
| address_space_init(s->target_as, s->target_mr, |
| "riscv-iommu-downstream"); |
| } else { |
| /* Fallback to global system memory. */ |
| s->target_as = &address_space_memory; |
| } |
| |
| /* Memory region for untranslated MRIF/MSI writes */ |
| memory_region_init_io(&s->trap_mr, OBJECT(dev), &riscv_iommu_trap_ops, s, |
| "riscv-iommu-trap", ~0ULL); |
| address_space_init(&s->trap_as, &s->trap_mr, "riscv-iommu-trap-as"); |
| |
| /* Device translation context cache */ |
| s->ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash, |
| riscv_iommu_ctx_equal, |
| g_free, NULL); |
| |
| s->iommus.le_next = NULL; |
| s->iommus.le_prev = NULL; |
| QLIST_INIT(&s->spaces); |
| } |
| |
| static void riscv_iommu_unrealize(DeviceState *dev) |
| { |
| RISCVIOMMUState *s = RISCV_IOMMU(dev); |
| |
| g_hash_table_unref(s->ctx_cache); |
| } |
| |
| static Property riscv_iommu_properties[] = { |
| DEFINE_PROP_UINT32("version", RISCVIOMMUState, version, |
| RISCV_IOMMU_SPEC_DOT_VER), |
| DEFINE_PROP_UINT32("bus", RISCVIOMMUState, bus, 0x0), |
| DEFINE_PROP_BOOL("intremap", RISCVIOMMUState, enable_msi, TRUE), |
| DEFINE_PROP_BOOL("off", RISCVIOMMUState, enable_off, TRUE), |
| DEFINE_PROP_BOOL("s-stage", RISCVIOMMUState, enable_s_stage, TRUE), |
| DEFINE_PROP_BOOL("g-stage", RISCVIOMMUState, enable_g_stage, TRUE), |
| DEFINE_PROP_LINK("downstream-mr", RISCVIOMMUState, target_mr, |
| TYPE_MEMORY_REGION, MemoryRegion *), |
| DEFINE_PROP_END_OF_LIST(), |
| }; |
| |
| static void riscv_iommu_class_init(ObjectClass *klass, void* data) |
| { |
| DeviceClass *dc = DEVICE_CLASS(klass); |
| |
| /* internal device for riscv-iommu-{pci/sys}, not user-creatable */ |
| dc->user_creatable = false; |
| dc->realize = riscv_iommu_realize; |
| dc->unrealize = riscv_iommu_unrealize; |
| device_class_set_props(dc, riscv_iommu_properties); |
| } |
| |
| static const TypeInfo riscv_iommu_info = { |
| .name = TYPE_RISCV_IOMMU, |
| .parent = TYPE_DEVICE, |
| .instance_size = sizeof(RISCVIOMMUState), |
| .class_init = riscv_iommu_class_init, |
| }; |
| |
| static const char *IOMMU_FLAG_STR[] = { |
| "NA", |
| "RO", |
| "WR", |
| "RW", |
| }; |
| |
| /* RISC-V IOMMU Memory Region - Address Translation Space */ |
| static IOMMUTLBEntry riscv_iommu_memory_region_translate( |
| IOMMUMemoryRegion *iommu_mr, hwaddr addr, |
| IOMMUAccessFlags flag, int iommu_idx) |
| { |
| RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); |
| RISCVIOMMUContext *ctx; |
| void *ref; |
| IOMMUTLBEntry iotlb = { |
| .iova = addr, |
| .target_as = as->iommu->target_as, |
| .addr_mask = ~0ULL, |
| .perm = flag, |
| }; |
| |
| ctx = riscv_iommu_ctx(as->iommu, as->devid, iommu_idx, &ref); |
| if (ctx == NULL) { |
| /* Translation disabled or invalid. */ |
| iotlb.addr_mask = 0; |
| iotlb.perm = IOMMU_NONE; |
| } else if (riscv_iommu_translate(as->iommu, ctx, &iotlb)) { |
| /* Translation disabled or fault reported. */ |
| iotlb.addr_mask = 0; |
| iotlb.perm = IOMMU_NONE; |
| } |
| |
| /* Trace all dma translations with original access flags. */ |
| trace_riscv_iommu_dma(as->iommu->parent_obj.id, PCI_BUS_NUM(as->devid), |
| PCI_SLOT(as->devid), PCI_FUNC(as->devid), iommu_idx, |
| IOMMU_FLAG_STR[flag & IOMMU_RW], iotlb.iova, |
| iotlb.translated_addr); |
| |
| riscv_iommu_ctx_put(as->iommu, ref); |
| |
| return iotlb; |
| } |
| |
| static int riscv_iommu_memory_region_notify( |
| IOMMUMemoryRegion *iommu_mr, IOMMUNotifierFlag old, |
| IOMMUNotifierFlag new, Error **errp) |
| { |
| RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); |
| |
| if (old == IOMMU_NOTIFIER_NONE) { |
| as->notifier = true; |
| trace_riscv_iommu_notifier_add(iommu_mr->parent_obj.name); |
| } else if (new == IOMMU_NOTIFIER_NONE) { |
| as->notifier = false; |
| trace_riscv_iommu_notifier_del(iommu_mr->parent_obj.name); |
| } |
| |
| return 0; |
| } |
| |
| static inline bool pci_is_iommu(PCIDevice *pdev) |
| { |
| return pci_get_word(pdev->config + PCI_CLASS_DEVICE) == 0x0806; |
| } |
| |
| static AddressSpace *riscv_iommu_find_as(PCIBus *bus, void *opaque, int devfn) |
| { |
| RISCVIOMMUState *s = (RISCVIOMMUState *) opaque; |
| PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), devfn); |
| AddressSpace *as = NULL; |
| |
| if (pdev && pci_is_iommu(pdev)) { |
| return s->target_as; |
| } |
| |
| /* Find first registered IOMMU device */ |
| while (s->iommus.le_prev) { |
| s = *(s->iommus.le_prev); |
| } |
| |
| /* Find first matching IOMMU */ |
| while (s != NULL && as == NULL) { |
| as = riscv_iommu_space(s, PCI_BUILD_BDF(pci_bus_num(bus), devfn)); |
| s = s->iommus.le_next; |
| } |
| |
| return as ? as : &address_space_memory; |
| } |
| |
| static const PCIIOMMUOps riscv_iommu_ops = { |
| .get_address_space = riscv_iommu_find_as, |
| }; |
| |
| void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus, |
| Error **errp) |
| { |
| if (bus->iommu_ops && |
| bus->iommu_ops->get_address_space == riscv_iommu_find_as) { |
| /* Allow multiple IOMMUs on the same PCIe bus, link known devices */ |
| RISCVIOMMUState *last = (RISCVIOMMUState *)bus->iommu_opaque; |
| QLIST_INSERT_AFTER(last, iommu, iommus); |
| } else if (!bus->iommu_ops && !bus->iommu_opaque) { |
| pci_setup_iommu(bus, &riscv_iommu_ops, iommu); |
| } else { |
| error_setg(errp, "can't register secondary IOMMU for PCI bus #%d", |
| pci_bus_num(bus)); |
| } |
| } |
| |
| static int riscv_iommu_memory_region_index(IOMMUMemoryRegion *iommu_mr, |
| MemTxAttrs attrs) |
| { |
| return attrs.unspecified ? RISCV_IOMMU_NOPROCID : (int)attrs.pid; |
| } |
| |
| static int riscv_iommu_memory_region_index_len(IOMMUMemoryRegion *iommu_mr) |
| { |
| RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); |
| return 1 << as->iommu->pid_bits; |
| } |
| |
| static void riscv_iommu_memory_region_init(ObjectClass *klass, void *data) |
| { |
| IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); |
| |
| imrc->translate = riscv_iommu_memory_region_translate; |
| imrc->notify_flag_changed = riscv_iommu_memory_region_notify; |
| imrc->attrs_to_index = riscv_iommu_memory_region_index; |
| imrc->num_indexes = riscv_iommu_memory_region_index_len; |
| } |
| |
| static const TypeInfo riscv_iommu_memory_region_info = { |
| .parent = TYPE_IOMMU_MEMORY_REGION, |
| .name = TYPE_RISCV_IOMMU_MEMORY_REGION, |
| .class_init = riscv_iommu_memory_region_init, |
| }; |
| |
| static void riscv_iommu_register_mr_types(void) |
| { |
| type_register_static(&riscv_iommu_memory_region_info); |
| type_register_static(&riscv_iommu_info); |
| } |
| |
| type_init(riscv_iommu_register_mr_types); |