| /* |
| * DMA helper functions |
| * |
| * Copyright (c) 2009,2020 Red Hat |
| * |
| * This work is licensed under the terms of the GNU General Public License |
| * (GNU GPL), version 2 or later. |
| */ |
| |
| #include "qemu/osdep.h" |
| #include "sysemu/block-backend.h" |
| #include "sysemu/dma.h" |
| #include "trace/trace-root.h" |
| #include "qemu/thread.h" |
| #include "qemu/main-loop.h" |
| #include "sysemu/cpu-timers.h" |
| #include "qemu/range.h" |
| |
| /* #define DEBUG_IOMMU */ |
| |
| MemTxResult dma_memory_set(AddressSpace *as, dma_addr_t addr, |
| uint8_t c, dma_addr_t len, MemTxAttrs attrs) |
| { |
| dma_barrier(as, DMA_DIRECTION_FROM_DEVICE); |
| |
| return address_space_set(as, addr, c, len, attrs); |
| } |
| |
| void qemu_sglist_init(QEMUSGList *qsg, DeviceState *dev, int alloc_hint, |
| AddressSpace *as) |
| { |
| qsg->sg = g_malloc(alloc_hint * sizeof(ScatterGatherEntry)); |
| qsg->nsg = 0; |
| qsg->nalloc = alloc_hint; |
| qsg->size = 0; |
| qsg->as = as; |
| qsg->dev = dev; |
| object_ref(OBJECT(dev)); |
| } |
| |
| void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len) |
| { |
| if (qsg->nsg == qsg->nalloc) { |
| qsg->nalloc = 2 * qsg->nalloc + 1; |
| qsg->sg = g_realloc(qsg->sg, qsg->nalloc * sizeof(ScatterGatherEntry)); |
| } |
| qsg->sg[qsg->nsg].base = base; |
| qsg->sg[qsg->nsg].len = len; |
| qsg->size += len; |
| ++qsg->nsg; |
| } |
| |
| void qemu_sglist_destroy(QEMUSGList *qsg) |
| { |
| object_unref(OBJECT(qsg->dev)); |
| g_free(qsg->sg); |
| memset(qsg, 0, sizeof(*qsg)); |
| } |
| |
| typedef struct { |
| BlockAIOCB common; |
| AioContext *ctx; |
| BlockAIOCB *acb; |
| QEMUSGList *sg; |
| uint32_t align; |
| uint64_t offset; |
| DMADirection dir; |
| int sg_cur_index; |
| dma_addr_t sg_cur_byte; |
| QEMUIOVector iov; |
| QEMUBH *bh; |
| DMAIOFunc *io_func; |
| void *io_func_opaque; |
| } DMAAIOCB; |
| |
| static void dma_blk_cb(void *opaque, int ret); |
| |
| static void reschedule_dma(void *opaque) |
| { |
| DMAAIOCB *dbs = (DMAAIOCB *)opaque; |
| |
| assert(!dbs->acb && dbs->bh); |
| qemu_bh_delete(dbs->bh); |
| dbs->bh = NULL; |
| dma_blk_cb(dbs, 0); |
| } |
| |
| static void dma_blk_unmap(DMAAIOCB *dbs) |
| { |
| int i; |
| |
| for (i = 0; i < dbs->iov.niov; ++i) { |
| dma_memory_unmap(dbs->sg->as, dbs->iov.iov[i].iov_base, |
| dbs->iov.iov[i].iov_len, dbs->dir, |
| dbs->iov.iov[i].iov_len); |
| } |
| qemu_iovec_reset(&dbs->iov); |
| } |
| |
| static void dma_complete(DMAAIOCB *dbs, int ret) |
| { |
| trace_dma_complete(dbs, ret, dbs->common.cb); |
| |
| assert(!dbs->acb && !dbs->bh); |
| dma_blk_unmap(dbs); |
| if (dbs->common.cb) { |
| dbs->common.cb(dbs->common.opaque, ret); |
| } |
| qemu_iovec_destroy(&dbs->iov); |
| qemu_aio_unref(dbs); |
| } |
| |
| static void dma_blk_cb(void *opaque, int ret) |
| { |
| DMAAIOCB *dbs = (DMAAIOCB *)opaque; |
| dma_addr_t cur_addr, cur_len; |
| void *mem; |
| |
| trace_dma_blk_cb(dbs, ret); |
| |
| dbs->acb = NULL; |
| dbs->offset += dbs->iov.size; |
| |
| if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) { |
| dma_complete(dbs, ret); |
| return; |
| } |
| dma_blk_unmap(dbs); |
| |
| while (dbs->sg_cur_index < dbs->sg->nsg) { |
| cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte; |
| cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte; |
| mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir, |
| MEMTXATTRS_UNSPECIFIED); |
| /* |
| * Make reads deterministic in icount mode. Windows sometimes issues |
| * disk read requests with overlapping SGs. It leads |
| * to non-determinism, because resulting buffer contents may be mixed |
| * from several sectors. This code splits all SGs into several |
| * groups. SGs in every group do not overlap. |
| */ |
| if (mem && icount_enabled() && dbs->dir == DMA_DIRECTION_FROM_DEVICE) { |
| int i; |
| for (i = 0 ; i < dbs->iov.niov ; ++i) { |
| if (ranges_overlap((intptr_t)dbs->iov.iov[i].iov_base, |
| dbs->iov.iov[i].iov_len, (intptr_t)mem, |
| cur_len)) { |
| dma_memory_unmap(dbs->sg->as, mem, cur_len, |
| dbs->dir, cur_len); |
| mem = NULL; |
| break; |
| } |
| } |
| } |
| if (!mem) |
| break; |
| qemu_iovec_add(&dbs->iov, mem, cur_len); |
| dbs->sg_cur_byte += cur_len; |
| if (dbs->sg_cur_byte == dbs->sg->sg[dbs->sg_cur_index].len) { |
| dbs->sg_cur_byte = 0; |
| ++dbs->sg_cur_index; |
| } |
| } |
| |
| if (dbs->iov.size == 0) { |
| trace_dma_map_wait(dbs); |
| dbs->bh = aio_bh_new(dbs->ctx, reschedule_dma, dbs); |
| cpu_register_map_client(dbs->bh); |
| return; |
| } |
| |
| if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) { |
| qemu_iovec_discard_back(&dbs->iov, |
| QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align)); |
| } |
| |
| aio_context_acquire(dbs->ctx); |
| dbs->acb = dbs->io_func(dbs->offset, &dbs->iov, |
| dma_blk_cb, dbs, dbs->io_func_opaque); |
| aio_context_release(dbs->ctx); |
| assert(dbs->acb); |
| } |
| |
| static void dma_aio_cancel(BlockAIOCB *acb) |
| { |
| DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common); |
| |
| trace_dma_aio_cancel(dbs); |
| |
| assert(!(dbs->acb && dbs->bh)); |
| if (dbs->acb) { |
| /* This will invoke dma_blk_cb. */ |
| blk_aio_cancel_async(dbs->acb); |
| return; |
| } |
| |
| if (dbs->bh) { |
| cpu_unregister_map_client(dbs->bh); |
| qemu_bh_delete(dbs->bh); |
| dbs->bh = NULL; |
| } |
| if (dbs->common.cb) { |
| dbs->common.cb(dbs->common.opaque, -ECANCELED); |
| } |
| } |
| |
| static AioContext *dma_get_aio_context(BlockAIOCB *acb) |
| { |
| DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common); |
| |
| return dbs->ctx; |
| } |
| |
| static const AIOCBInfo dma_aiocb_info = { |
| .aiocb_size = sizeof(DMAAIOCB), |
| .cancel_async = dma_aio_cancel, |
| .get_aio_context = dma_get_aio_context, |
| }; |
| |
| BlockAIOCB *dma_blk_io(AioContext *ctx, |
| QEMUSGList *sg, uint64_t offset, uint32_t align, |
| DMAIOFunc *io_func, void *io_func_opaque, |
| BlockCompletionFunc *cb, |
| void *opaque, DMADirection dir) |
| { |
| DMAAIOCB *dbs = qemu_aio_get(&dma_aiocb_info, NULL, cb, opaque); |
| |
| trace_dma_blk_io(dbs, io_func_opaque, offset, (dir == DMA_DIRECTION_TO_DEVICE)); |
| |
| dbs->acb = NULL; |
| dbs->sg = sg; |
| dbs->ctx = ctx; |
| dbs->offset = offset; |
| dbs->align = align; |
| dbs->sg_cur_index = 0; |
| dbs->sg_cur_byte = 0; |
| dbs->dir = dir; |
| dbs->io_func = io_func; |
| dbs->io_func_opaque = io_func_opaque; |
| dbs->bh = NULL; |
| qemu_iovec_init(&dbs->iov, sg->nsg); |
| dma_blk_cb(dbs, 0); |
| return &dbs->common; |
| } |
| |
| |
| static |
| BlockAIOCB *dma_blk_read_io_func(int64_t offset, QEMUIOVector *iov, |
| BlockCompletionFunc *cb, void *cb_opaque, |
| void *opaque) |
| { |
| BlockBackend *blk = opaque; |
| return blk_aio_preadv(blk, offset, iov, 0, cb, cb_opaque); |
| } |
| |
| BlockAIOCB *dma_blk_read(BlockBackend *blk, |
| QEMUSGList *sg, uint64_t offset, uint32_t align, |
| void (*cb)(void *opaque, int ret), void *opaque) |
| { |
| return dma_blk_io(blk_get_aio_context(blk), sg, offset, align, |
| dma_blk_read_io_func, blk, cb, opaque, |
| DMA_DIRECTION_FROM_DEVICE); |
| } |
| |
| static |
| BlockAIOCB *dma_blk_write_io_func(int64_t offset, QEMUIOVector *iov, |
| BlockCompletionFunc *cb, void *cb_opaque, |
| void *opaque) |
| { |
| BlockBackend *blk = opaque; |
| return blk_aio_pwritev(blk, offset, iov, 0, cb, cb_opaque); |
| } |
| |
| BlockAIOCB *dma_blk_write(BlockBackend *blk, |
| QEMUSGList *sg, uint64_t offset, uint32_t align, |
| void (*cb)(void *opaque, int ret), void *opaque) |
| { |
| return dma_blk_io(blk_get_aio_context(blk), sg, offset, align, |
| dma_blk_write_io_func, blk, cb, opaque, |
| DMA_DIRECTION_TO_DEVICE); |
| } |
| |
| |
| static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual, |
| QEMUSGList *sg, DMADirection dir, |
| MemTxAttrs attrs) |
| { |
| uint8_t *ptr = buf; |
| dma_addr_t xresidual; |
| int sg_cur_index; |
| MemTxResult res = MEMTX_OK; |
| |
| xresidual = sg->size; |
| sg_cur_index = 0; |
| len = MIN(len, xresidual); |
| while (len > 0) { |
| ScatterGatherEntry entry = sg->sg[sg_cur_index++]; |
| dma_addr_t xfer = MIN(len, entry.len); |
| res |= dma_memory_rw(sg->as, entry.base, ptr, xfer, dir, attrs); |
| ptr += xfer; |
| len -= xfer; |
| xresidual -= xfer; |
| } |
| |
| if (residual) { |
| *residual = xresidual; |
| } |
| return res; |
| } |
| |
| MemTxResult dma_buf_read(void *ptr, dma_addr_t len, dma_addr_t *residual, |
| QEMUSGList *sg, MemTxAttrs attrs) |
| { |
| return dma_buf_rw(ptr, len, residual, sg, DMA_DIRECTION_FROM_DEVICE, attrs); |
| } |
| |
| MemTxResult dma_buf_write(void *ptr, dma_addr_t len, dma_addr_t *residual, |
| QEMUSGList *sg, MemTxAttrs attrs) |
| { |
| return dma_buf_rw(ptr, len, residual, sg, DMA_DIRECTION_TO_DEVICE, attrs); |
| } |
| |
| void dma_acct_start(BlockBackend *blk, BlockAcctCookie *cookie, |
| QEMUSGList *sg, enum BlockAcctType type) |
| { |
| block_acct_start(blk_get_stats(blk), cookie, sg->size, type); |
| } |
| |
| uint64_t dma_aligned_pow2_mask(uint64_t start, uint64_t end, int max_addr_bits) |
| { |
| uint64_t max_mask = UINT64_MAX, addr_mask = end - start; |
| uint64_t alignment_mask, size_mask; |
| |
| if (max_addr_bits != 64) { |
| max_mask = (1ULL << max_addr_bits) - 1; |
| } |
| |
| alignment_mask = start ? (start & -start) - 1 : max_mask; |
| alignment_mask = MIN(alignment_mask, max_mask); |
| size_mask = MIN(addr_mask, max_mask); |
| |
| if (alignment_mask <= size_mask) { |
| /* Increase the alignment of start */ |
| return alignment_mask; |
| } else { |
| /* Find the largest page mask from size */ |
| if (addr_mask == UINT64_MAX) { |
| return UINT64_MAX; |
| } |
| return (1ULL << (63 - clz64(addr_mask + 1))) - 1; |
| } |
| } |
| |