|  | /* | 
|  | * DMA helper functions | 
|  | * | 
|  | * Copyright (c) 2009,2020 Red Hat | 
|  | * | 
|  | * This work is licensed under the terms of the GNU General Public License | 
|  | * (GNU GPL), version 2 or later. | 
|  | */ | 
|  |  | 
|  | #include "qemu/osdep.h" | 
|  | #include "sysemu/block-backend.h" | 
|  | #include "sysemu/dma.h" | 
|  | #include "trace/trace-root.h" | 
|  | #include "qemu/thread.h" | 
|  | #include "qemu/main-loop.h" | 
|  | #include "sysemu/cpu-timers.h" | 
|  | #include "qemu/range.h" | 
|  |  | 
|  | /* #define DEBUG_IOMMU */ | 
|  |  | 
|  | MemTxResult dma_memory_set(AddressSpace *as, dma_addr_t addr, | 
|  | uint8_t c, dma_addr_t len, MemTxAttrs attrs) | 
|  | { | 
|  | dma_barrier(as, DMA_DIRECTION_FROM_DEVICE); | 
|  |  | 
|  | return address_space_set(as, addr, c, len, attrs); | 
|  | } | 
|  |  | 
|  | void qemu_sglist_init(QEMUSGList *qsg, DeviceState *dev, int alloc_hint, | 
|  | AddressSpace *as) | 
|  | { | 
|  | qsg->sg = g_new(ScatterGatherEntry, alloc_hint); | 
|  | qsg->nsg = 0; | 
|  | qsg->nalloc = alloc_hint; | 
|  | qsg->size = 0; | 
|  | qsg->as = as; | 
|  | qsg->dev = dev; | 
|  | object_ref(OBJECT(dev)); | 
|  | } | 
|  |  | 
|  | void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len) | 
|  | { | 
|  | if (qsg->nsg == qsg->nalloc) { | 
|  | qsg->nalloc = 2 * qsg->nalloc + 1; | 
|  | qsg->sg = g_renew(ScatterGatherEntry, qsg->sg, qsg->nalloc); | 
|  | } | 
|  | qsg->sg[qsg->nsg].base = base; | 
|  | qsg->sg[qsg->nsg].len = len; | 
|  | qsg->size += len; | 
|  | ++qsg->nsg; | 
|  | } | 
|  |  | 
|  | void qemu_sglist_destroy(QEMUSGList *qsg) | 
|  | { | 
|  | object_unref(OBJECT(qsg->dev)); | 
|  | g_free(qsg->sg); | 
|  | memset(qsg, 0, sizeof(*qsg)); | 
|  | } | 
|  |  | 
|  | typedef struct { | 
|  | BlockAIOCB common; | 
|  | AioContext *ctx; | 
|  | BlockAIOCB *acb; | 
|  | QEMUSGList *sg; | 
|  | uint32_t align; | 
|  | uint64_t offset; | 
|  | DMADirection dir; | 
|  | int sg_cur_index; | 
|  | dma_addr_t sg_cur_byte; | 
|  | QEMUIOVector iov; | 
|  | QEMUBH *bh; | 
|  | DMAIOFunc *io_func; | 
|  | void *io_func_opaque; | 
|  | } DMAAIOCB; | 
|  |  | 
|  | static void dma_blk_cb(void *opaque, int ret); | 
|  |  | 
|  | static void reschedule_dma(void *opaque) | 
|  | { | 
|  | DMAAIOCB *dbs = (DMAAIOCB *)opaque; | 
|  |  | 
|  | assert(!dbs->acb && dbs->bh); | 
|  | qemu_bh_delete(dbs->bh); | 
|  | dbs->bh = NULL; | 
|  | dma_blk_cb(dbs, 0); | 
|  | } | 
|  |  | 
|  | static void dma_blk_unmap(DMAAIOCB *dbs) | 
|  | { | 
|  | int i; | 
|  |  | 
|  | for (i = 0; i < dbs->iov.niov; ++i) { | 
|  | dma_memory_unmap(dbs->sg->as, dbs->iov.iov[i].iov_base, | 
|  | dbs->iov.iov[i].iov_len, dbs->dir, | 
|  | dbs->iov.iov[i].iov_len); | 
|  | } | 
|  | qemu_iovec_reset(&dbs->iov); | 
|  | } | 
|  |  | 
|  | static void dma_complete(DMAAIOCB *dbs, int ret) | 
|  | { | 
|  | trace_dma_complete(dbs, ret, dbs->common.cb); | 
|  |  | 
|  | assert(!dbs->acb && !dbs->bh); | 
|  | dma_blk_unmap(dbs); | 
|  | if (dbs->common.cb) { | 
|  | dbs->common.cb(dbs->common.opaque, ret); | 
|  | } | 
|  | qemu_iovec_destroy(&dbs->iov); | 
|  | qemu_aio_unref(dbs); | 
|  | } | 
|  |  | 
|  | static void dma_blk_cb(void *opaque, int ret) | 
|  | { | 
|  | DMAAIOCB *dbs = (DMAAIOCB *)opaque; | 
|  | AioContext *ctx = dbs->ctx; | 
|  | dma_addr_t cur_addr, cur_len; | 
|  | void *mem; | 
|  |  | 
|  | trace_dma_blk_cb(dbs, ret); | 
|  |  | 
|  | /* DMAAIOCB is not thread-safe and must be accessed only from dbs->ctx */ | 
|  | assert(ctx == qemu_get_current_aio_context()); | 
|  |  | 
|  | dbs->acb = NULL; | 
|  | dbs->offset += dbs->iov.size; | 
|  |  | 
|  | if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) { | 
|  | dma_complete(dbs, ret); | 
|  | return; | 
|  | } | 
|  | dma_blk_unmap(dbs); | 
|  |  | 
|  | while (dbs->sg_cur_index < dbs->sg->nsg) { | 
|  | cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte; | 
|  | cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte; | 
|  | mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir, | 
|  | MEMTXATTRS_UNSPECIFIED); | 
|  | /* | 
|  | * Make reads deterministic in icount mode. Windows sometimes issues | 
|  | * disk read requests with overlapping SGs. It leads | 
|  | * to non-determinism, because resulting buffer contents may be mixed | 
|  | * from several sectors. This code splits all SGs into several | 
|  | * groups. SGs in every group do not overlap. | 
|  | */ | 
|  | if (mem && icount_enabled() && dbs->dir == DMA_DIRECTION_FROM_DEVICE) { | 
|  | int i; | 
|  | for (i = 0 ; i < dbs->iov.niov ; ++i) { | 
|  | if (ranges_overlap((intptr_t)dbs->iov.iov[i].iov_base, | 
|  | dbs->iov.iov[i].iov_len, (intptr_t)mem, | 
|  | cur_len)) { | 
|  | dma_memory_unmap(dbs->sg->as, mem, cur_len, | 
|  | dbs->dir, cur_len); | 
|  | mem = NULL; | 
|  | break; | 
|  | } | 
|  | } | 
|  | } | 
|  | if (!mem) | 
|  | break; | 
|  | qemu_iovec_add(&dbs->iov, mem, cur_len); | 
|  | dbs->sg_cur_byte += cur_len; | 
|  | if (dbs->sg_cur_byte == dbs->sg->sg[dbs->sg_cur_index].len) { | 
|  | dbs->sg_cur_byte = 0; | 
|  | ++dbs->sg_cur_index; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (dbs->iov.size == 0) { | 
|  | trace_dma_map_wait(dbs); | 
|  | dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs); | 
|  | address_space_register_map_client(dbs->sg->as, dbs->bh); | 
|  | return; | 
|  | } | 
|  |  | 
|  | if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) { | 
|  | qemu_iovec_discard_back(&dbs->iov, | 
|  | QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align)); | 
|  | } | 
|  |  | 
|  | dbs->acb = dbs->io_func(dbs->offset, &dbs->iov, | 
|  | dma_blk_cb, dbs, dbs->io_func_opaque); | 
|  | assert(dbs->acb); | 
|  | } | 
|  |  | 
|  | static void dma_aio_cancel(BlockAIOCB *acb) | 
|  | { | 
|  | DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common); | 
|  |  | 
|  | trace_dma_aio_cancel(dbs); | 
|  |  | 
|  | assert(!(dbs->acb && dbs->bh)); | 
|  | if (dbs->acb) { | 
|  | /* This will invoke dma_blk_cb.  */ | 
|  | blk_aio_cancel_async(dbs->acb); | 
|  | return; | 
|  | } | 
|  |  | 
|  | if (dbs->bh) { | 
|  | address_space_unregister_map_client(dbs->sg->as, dbs->bh); | 
|  | qemu_bh_delete(dbs->bh); | 
|  | dbs->bh = NULL; | 
|  | } | 
|  | if (dbs->common.cb) { | 
|  | dbs->common.cb(dbs->common.opaque, -ECANCELED); | 
|  | } | 
|  | } | 
|  |  | 
|  | static const AIOCBInfo dma_aiocb_info = { | 
|  | .aiocb_size         = sizeof(DMAAIOCB), | 
|  | .cancel_async       = dma_aio_cancel, | 
|  | }; | 
|  |  | 
|  | BlockAIOCB *dma_blk_io(AioContext *ctx, | 
|  | QEMUSGList *sg, uint64_t offset, uint32_t align, | 
|  | DMAIOFunc *io_func, void *io_func_opaque, | 
|  | BlockCompletionFunc *cb, | 
|  | void *opaque, DMADirection dir) | 
|  | { | 
|  | DMAAIOCB *dbs = qemu_aio_get(&dma_aiocb_info, NULL, cb, opaque); | 
|  |  | 
|  | trace_dma_blk_io(dbs, io_func_opaque, offset, (dir == DMA_DIRECTION_TO_DEVICE)); | 
|  |  | 
|  | dbs->acb = NULL; | 
|  | dbs->sg = sg; | 
|  | dbs->ctx = ctx; | 
|  | dbs->offset = offset; | 
|  | dbs->align = align; | 
|  | dbs->sg_cur_index = 0; | 
|  | dbs->sg_cur_byte = 0; | 
|  | dbs->dir = dir; | 
|  | dbs->io_func = io_func; | 
|  | dbs->io_func_opaque = io_func_opaque; | 
|  | dbs->bh = NULL; | 
|  | qemu_iovec_init(&dbs->iov, sg->nsg); | 
|  | dma_blk_cb(dbs, 0); | 
|  | return &dbs->common; | 
|  | } | 
|  |  | 
|  |  | 
|  | static | 
|  | BlockAIOCB *dma_blk_read_io_func(int64_t offset, QEMUIOVector *iov, | 
|  | BlockCompletionFunc *cb, void *cb_opaque, | 
|  | void *opaque) | 
|  | { | 
|  | BlockBackend *blk = opaque; | 
|  | return blk_aio_preadv(blk, offset, iov, 0, cb, cb_opaque); | 
|  | } | 
|  |  | 
|  | BlockAIOCB *dma_blk_read(BlockBackend *blk, | 
|  | QEMUSGList *sg, uint64_t offset, uint32_t align, | 
|  | void (*cb)(void *opaque, int ret), void *opaque) | 
|  | { | 
|  | return dma_blk_io(blk_get_aio_context(blk), sg, offset, align, | 
|  | dma_blk_read_io_func, blk, cb, opaque, | 
|  | DMA_DIRECTION_FROM_DEVICE); | 
|  | } | 
|  |  | 
|  | static | 
|  | BlockAIOCB *dma_blk_write_io_func(int64_t offset, QEMUIOVector *iov, | 
|  | BlockCompletionFunc *cb, void *cb_opaque, | 
|  | void *opaque) | 
|  | { | 
|  | BlockBackend *blk = opaque; | 
|  | return blk_aio_pwritev(blk, offset, iov, 0, cb, cb_opaque); | 
|  | } | 
|  |  | 
|  | BlockAIOCB *dma_blk_write(BlockBackend *blk, | 
|  | QEMUSGList *sg, uint64_t offset, uint32_t align, | 
|  | void (*cb)(void *opaque, int ret), void *opaque) | 
|  | { | 
|  | return dma_blk_io(blk_get_aio_context(blk), sg, offset, align, | 
|  | dma_blk_write_io_func, blk, cb, opaque, | 
|  | DMA_DIRECTION_TO_DEVICE); | 
|  | } | 
|  |  | 
|  |  | 
|  | static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual, | 
|  | QEMUSGList *sg, DMADirection dir, | 
|  | MemTxAttrs attrs) | 
|  | { | 
|  | uint8_t *ptr = buf; | 
|  | dma_addr_t xresidual; | 
|  | int sg_cur_index; | 
|  | MemTxResult res = MEMTX_OK; | 
|  |  | 
|  | xresidual = sg->size; | 
|  | sg_cur_index = 0; | 
|  | len = MIN(len, xresidual); | 
|  | while (len > 0) { | 
|  | ScatterGatherEntry entry = sg->sg[sg_cur_index++]; | 
|  | dma_addr_t xfer = MIN(len, entry.len); | 
|  | res |= dma_memory_rw(sg->as, entry.base, ptr, xfer, dir, attrs); | 
|  | ptr += xfer; | 
|  | len -= xfer; | 
|  | xresidual -= xfer; | 
|  | } | 
|  |  | 
|  | if (residual) { | 
|  | *residual = xresidual; | 
|  | } | 
|  | return res; | 
|  | } | 
|  |  | 
|  | MemTxResult dma_buf_read(void *ptr, dma_addr_t len, dma_addr_t *residual, | 
|  | QEMUSGList *sg, MemTxAttrs attrs) | 
|  | { | 
|  | return dma_buf_rw(ptr, len, residual, sg, DMA_DIRECTION_FROM_DEVICE, attrs); | 
|  | } | 
|  |  | 
|  | MemTxResult dma_buf_write(void *ptr, dma_addr_t len, dma_addr_t *residual, | 
|  | QEMUSGList *sg, MemTxAttrs attrs) | 
|  | { | 
|  | return dma_buf_rw(ptr, len, residual, sg, DMA_DIRECTION_TO_DEVICE, attrs); | 
|  | } | 
|  |  | 
|  | void dma_acct_start(BlockBackend *blk, BlockAcctCookie *cookie, | 
|  | QEMUSGList *sg, enum BlockAcctType type) | 
|  | { | 
|  | block_acct_start(blk_get_stats(blk), cookie, sg->size, type); | 
|  | } | 
|  |  | 
|  | uint64_t dma_aligned_pow2_mask(uint64_t start, uint64_t end, int max_addr_bits) | 
|  | { | 
|  | uint64_t max_mask = UINT64_MAX, addr_mask = end - start; | 
|  | uint64_t alignment_mask, size_mask; | 
|  |  | 
|  | if (max_addr_bits != 64) { | 
|  | max_mask = (1ULL << max_addr_bits) - 1; | 
|  | } | 
|  |  | 
|  | alignment_mask = start ? (start & -start) - 1 : max_mask; | 
|  | alignment_mask = MIN(alignment_mask, max_mask); | 
|  | size_mask = MIN(addr_mask, max_mask); | 
|  |  | 
|  | if (alignment_mask <= size_mask) { | 
|  | /* Increase the alignment of start */ | 
|  | return alignment_mask; | 
|  | } else { | 
|  | /* Find the largest page mask from size */ | 
|  | if (addr_mask == UINT64_MAX) { | 
|  | return UINT64_MAX; | 
|  | } | 
|  | return (1ULL << (63 - clz64(addr_mask + 1))) - 1; | 
|  | } | 
|  | } | 
|  |  |