blob: ec9da7bd4621a03ca83dd1a53eca70a29f1f5761 [file] [log] [blame]
#
# Copyright (c) 2021 Nutanix Inc. All rights reserved.
#
# Authors: John Levon <john.levon@nutanix.com>
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of Nutanix nor the names of its contributors may be
# used to endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
# DAMAGE.
#
#
# Note that we don't use enum here, as class.value is a little verbose
#
from types import SimpleNamespace
import ctypes as c
import array
import errno
import json
import mmap
import os
import socket
import struct
import syslog
import copy
import tempfile
UINT64_MAX = 18446744073709551615
# from linux/pci_regs.h and linux/pci_defs.h
PCI_HEADER_TYPE_NORMAL = 0
PCI_STD_HEADER_SIZEOF = 64
PCI_BARS_NR = 6
PCI_PM_SIZEOF = 8
PCI_CFG_SPACE_SIZE = 256
PCI_CFG_SPACE_EXP_SIZE = 4096
PCI_CAP_LIST_NEXT = 1
PCI_CAP_ID_PM = 0x1
PCI_CAP_ID_VNDR = 0x9
PCI_CAP_ID_MSIX = 0x11
PCI_CAP_ID_EXP = 0x10
PCI_EXP_DEVCTL2 = 40
PCI_EXP_LNKCTL2 = 48
PCI_EXT_CAP_ID_DSN = 0x03
PCI_EXT_CAP_ID_VNDR = 0x0b
PCI_EXT_CAP_DSN_SIZEOF = 12
PCI_EXT_CAP_VNDR_HDR_SIZEOF = 8
# MSI-X registers
PCI_MSIX_FLAGS = 2 # Message Control
PCI_MSIX_TABLE = 4 # Table offset
PCI_MSIX_FLAGS_MASKALL = 0x4000 # Mask all vectors for this function
PCI_MSIX_FLAGS_ENABLE = 0x8000 # MSI-X enable
PCI_CAP_MSIX_SIZEOF = 12 # size of MSIX registers
# from linux/vfio.h
VFIO_DEVICE_FLAGS_RESET = (1 << 0)
VFIO_DEVICE_FLAGS_PCI = (1 << 1)
VFIO_REGION_INFO_FLAG_READ = (1 << 0)
VFIO_REGION_INFO_FLAG_WRITE = (1 << 1)
VFIO_REGION_INFO_FLAG_MMAP = (1 << 2)
VFIO_REGION_INFO_FLAG_CAPS = (1 << 3)
VFIO_REGION_TYPE_MIGRATION = 3
VFIO_REGION_SUBTYPE_MIGRATION = 1
VFIO_REGION_INFO_CAP_SPARSE_MMAP = 1
VFIO_REGION_INFO_CAP_TYPE = 2
VFIO_IRQ_INFO_EVENTFD = (1 << 0)
VFIO_IRQ_SET_DATA_NONE = (1 << 0)
VFIO_IRQ_SET_DATA_BOOL = (1 << 1)
VFIO_IRQ_SET_DATA_EVENTFD = (1 << 2)
VFIO_IRQ_SET_ACTION_MASK = (1 << 3)
VFIO_IRQ_SET_ACTION_UNMASK = (1 << 4)
VFIO_IRQ_SET_ACTION_TRIGGER = (1 << 5)
VFIO_DMA_UNMAP_FLAG_ALL = (1 << 1)
VFIO_DEVICE_STATE_V1_STOP = (0)
VFIO_DEVICE_STATE_V1_RUNNING = (1 << 0)
VFIO_DEVICE_STATE_V1_SAVING = (1 << 1)
VFIO_DEVICE_STATE_V1_RESUMING = (1 << 2)
VFIO_DEVICE_STATE_MASK = ((1 << 3) - 1)
# libvfio-user defines
VFU_TRANS_SOCK = 0
VFU_TRANS_PIPE = 1
VFU_TRANS_MAX = 2
LIBVFIO_USER_FLAG_ATTACH_NB = (1 << 0)
VFU_DEV_TYPE_PCI = 0
LIBVFIO_USER_MAJOR = 0
LIBVFIO_USER_MINOR = 1
VFIO_USER_CLIENT_MAX_FDS_LIMIT = 1024
SERVER_MAX_FDS = 8
ONE_TB = (1024 * 1024 * 1024 * 1024)
VFIO_USER_DEFAULT_MAX_DATA_XFER_SIZE = (1024 * 1024)
SERVER_MAX_DATA_XFER_SIZE = VFIO_USER_DEFAULT_MAX_DATA_XFER_SIZE
SERVER_MAX_MSG_SIZE = SERVER_MAX_DATA_XFER_SIZE + 16 + 16
MAX_DMA_REGIONS = 16
MAX_DMA_SIZE = (8 * ONE_TB)
# enum vfio_user_command
VFIO_USER_VERSION = 1
VFIO_USER_DMA_MAP = 2
VFIO_USER_DMA_UNMAP = 3
VFIO_USER_DEVICE_GET_INFO = 4
VFIO_USER_DEVICE_GET_REGION_INFO = 5
VFIO_USER_DEVICE_GET_REGION_IO_FDS = 6
VFIO_USER_DEVICE_GET_IRQ_INFO = 7
VFIO_USER_DEVICE_SET_IRQS = 8
VFIO_USER_REGION_READ = 9
VFIO_USER_REGION_WRITE = 10
VFIO_USER_DMA_READ = 11
VFIO_USER_DMA_WRITE = 12
VFIO_USER_DEVICE_RESET = 13
VFIO_USER_DIRTY_PAGES = 14
VFIO_USER_MAX = 15
VFIO_USER_F_TYPE_COMMAND = 0
VFIO_USER_F_TYPE_REPLY = 1
SIZEOF_VFIO_USER_HEADER = 16
VFU_PCI_DEV_BAR0_REGION_IDX = 0
VFU_PCI_DEV_BAR1_REGION_IDX = 1
VFU_PCI_DEV_BAR2_REGION_IDX = 2
VFU_PCI_DEV_BAR3_REGION_IDX = 3
VFU_PCI_DEV_BAR4_REGION_IDX = 4
VFU_PCI_DEV_BAR5_REGION_IDX = 5
VFU_PCI_DEV_ROM_REGION_IDX = 6
VFU_PCI_DEV_CFG_REGION_IDX = 7
VFU_PCI_DEV_VGA_REGION_IDX = 8
VFU_PCI_DEV_MIGR_REGION_IDX = 9
VFU_PCI_DEV_NUM_REGIONS = 10
VFU_REGION_FLAG_READ = 1
VFU_REGION_FLAG_WRITE = 2
VFU_REGION_FLAG_RW = (VFU_REGION_FLAG_READ | VFU_REGION_FLAG_WRITE)
VFU_REGION_FLAG_MEM = 4
VFU_REGION_FLAG_ALWAYS_CB = 8
VFIO_USER_F_DMA_REGION_READ = (1 << 0)
VFIO_USER_F_DMA_REGION_WRITE = (1 << 1)
VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP = (1 << 0)
VFIO_IOMMU_DIRTY_PAGES_FLAG_START = (1 << 0)
VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP = (1 << 1)
VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP = (1 << 2)
VFIO_USER_IO_FD_TYPE_IOEVENTFD = 0
VFIO_USER_IO_FD_TYPE_IOREGIONFD = 1
# enum vfu_dev_irq_type
VFU_DEV_INTX_IRQ = 0
VFU_DEV_MSI_IRQ = 1
VFU_DEV_MSIX_IRQ = 2
VFU_DEV_ERR_IRQ = 3
VFU_DEV_REQ_IRQ = 4
VFU_DEV_NUM_IRQS = 5
# enum vfu_reset_type
VFU_RESET_DEVICE = 0
VFU_RESET_LOST_CONN = 1
VFU_RESET_PCI_FLR = 2
# vfu_pci_type_t
VFU_PCI_TYPE_CONVENTIONAL = 0
VFU_PCI_TYPE_PCI_X_1 = 1
VFU_PCI_TYPE_PCI_X_2 = 2
VFU_PCI_TYPE_EXPRESS = 3
VFU_CAP_FLAG_EXTENDED = (1 << 0)
VFU_CAP_FLAG_CALLBACK = (1 << 1)
VFU_CAP_FLAG_READONLY = (1 << 2)
VFU_MIGR_CALLBACKS_VERS = 1
SOCK_PATH = b"/tmp/vfio-user.sock.%d" % os.getpid()
topdir = os.path.realpath(os.path.dirname(__file__) + "/../..")
libname = os.path.join(os.getenv("LIBVFIO_SO_DIR"), "libvfio-user.so")
lib = c.CDLL(libname, use_errno=True)
libc = c.CDLL("libc.so.6", use_errno=True)
#
# Structures
#
class Structure(c.Structure):
def __len__(self):
"""Handy method to return length in bytes."""
return len(bytes(self))
@classmethod
def pop_from_buffer(cls, buf):
""""Pop a new object from the given bytes buffer."""
obj = cls.from_buffer_copy(buf)
return obj, buf[c.sizeof(obj):]
class vfu_bar_t(c.Union):
_pack_ = 1
_fields_ = [
("mem", c.c_int32),
("io", c.c_int32)
]
class vfu_pci_hdr_intr_t(Structure):
_pack_ = 1
_fields_ = [
("iline", c.c_byte),
("ipin", c.c_byte)
]
class vfu_pci_hdr_t(Structure):
_pack_ = 1
_fields_ = [
("id", c.c_int32),
("cmd", c.c_uint16),
("sts", c.c_uint16),
("rid", c.c_byte),
("cc_pi", c.c_byte),
("cc_scc", c.c_byte),
("cc_bcc", c.c_byte),
("cls", c.c_byte),
("mlt", c.c_byte),
("htype", c.c_byte),
("bist", c.c_byte),
("bars", vfu_bar_t * PCI_BARS_NR),
("ccptr", c.c_int32),
("ss", c.c_int32),
("erom", c.c_int32),
("cap", c.c_byte),
("res1", c.c_byte * 7),
("intr", vfu_pci_hdr_intr_t),
("mgnt", c.c_byte),
("mlat", c.c_byte)
]
class iovec_t(Structure):
_fields_ = [
("iov_base", c.c_void_p),
("iov_len", c.c_int32)
]
def __eq__(self, other):
if type(self) != type(other):
return False
return self.iov_base == other.iov_base \
and self.iov_len == other.iov_len
def __str__(self):
return "%s-%s" % \
(hex(self.iov_base or 0), hex((self.iov_base or 0) + self.iov_len))
def __copy__(self):
cls = self.__class__
result = cls.__new__(cls)
result.iov_base = self.iov_base
result.iov_len = self.iov_len
return result
class vfio_irq_info(Structure):
_pack_ = 1
_fields_ = [
("argsz", c.c_uint32),
("flags", c.c_uint32),
("index", c.c_uint32),
("count", c.c_uint32),
]
class vfio_irq_set(Structure):
_pack_ = 1
_fields_ = [
("argsz", c.c_uint32),
("flags", c.c_uint32),
("index", c.c_uint32),
("start", c.c_uint32),
("count", c.c_uint32),
]
class vfio_user_device_info(Structure):
_pack_ = 1
_fields_ = [
("argsz", c.c_uint32),
("flags", c.c_uint32),
("num_regions", c.c_uint32),
("num_irqs", c.c_uint32),
]
class vfio_region_info(Structure):
_pack_ = 1
_fields_ = [
("argsz", c.c_uint32),
("flags", c.c_uint32),
("index", c.c_uint32),
("cap_offset", c.c_uint32),
("size", c.c_uint64),
("offset", c.c_uint64),
]
class vfio_region_info_cap_type(Structure):
_pack_ = 1
_fields_ = [
("id", c.c_uint16),
("version", c.c_uint16),
("next", c.c_uint32),
("type", c.c_uint32),
("subtype", c.c_uint32),
]
class vfio_region_info_cap_sparse_mmap(Structure):
_pack_ = 1
_fields_ = [
("id", c.c_uint16),
("version", c.c_uint16),
("next", c.c_uint32),
("nr_areas", c.c_uint32),
("reserved", c.c_uint32),
]
class vfio_region_sparse_mmap_area(Structure):
_pack_ = 1
_fields_ = [
("offset", c.c_uint64),
("size", c.c_uint64),
]
class vfio_user_region_io_fds_request(Structure):
_pack_ = 1
_fields_ = [
("argsz", c.c_uint32),
("flags", c.c_uint32),
("index", c.c_uint32),
("count", c.c_uint32)
]
class vfio_user_sub_region_ioeventfd(Structure):
_pack_ = 1
_fields_ = [
("offset", c.c_uint64),
("size", c.c_uint64),
("fd_index", c.c_uint32),
("type", c.c_uint32),
("flags", c.c_uint32),
("padding", c.c_uint32),
("datamatch", c.c_uint64)
]
class vfio_user_sub_region_ioregionfd(Structure):
_pack_ = 1
_fields_ = [
("offset", c.c_uint64),
("size", c.c_uint64),
("fd_index", c.c_uint32),
("type", c.c_uint32),
("flags", c.c_uint32),
("padding", c.c_uint32),
("user_data", c.c_uint64)
]
class vfio_user_sub_region_io_fd(c.Union):
_pack_ = 1
_fields_ = [
("sub_region_ioeventfd", vfio_user_sub_region_ioeventfd),
("sub_region_ioregionfd", vfio_user_sub_region_ioregionfd)
]
class vfio_user_region_io_fds_reply(Structure):
_pack_ = 1
_fields_ = [
("argsz", c.c_uint32),
("flags", c.c_uint32),
("index", c.c_uint32),
("count", c.c_uint32)
]
class vfio_user_dma_map(Structure):
_pack_ = 1
_fields_ = [
("argsz", c.c_uint32),
("flags", c.c_uint32),
("offset", c.c_uint64),
("addr", c.c_uint64),
("size", c.c_uint64),
]
class vfio_user_dma_unmap(Structure):
_pack_ = 1
_fields_ = [
("argsz", c.c_uint32),
("flags", c.c_uint32),
("addr", c.c_uint64),
("size", c.c_uint64),
]
class vfu_dma_info_t(Structure):
_fields_ = [
("iova", iovec_t),
("vaddr", c.c_void_p),
("mapping", iovec_t),
("page_size", c.c_size_t),
("prot", c.c_uint32)
]
def __eq__(self, other):
if type(self) != type(other):
return False
return self.iova == other.iova \
and self.vaddr == other.vaddr \
and self.mapping == other.mapping \
and self.page_size == other.page_size \
and self.prot == other.prot
def __str__(self):
return "IOVA=%s vaddr=%s mapping=%s page_size=%s prot=%s" % \
(self.iova, self.vaddr, self.mapping, hex(self.page_size),
bin(self.prot))
def __copy__(self):
cls = self.__class__
result = cls.__new__(cls)
result.iova = self.iova
result.vaddr = self.vaddr
result.mapping = self.mapping
result.page_size = self.page_size
result.prot = self.prot
return result
class vfio_user_dirty_pages(Structure):
_pack_ = 1
_fields_ = [
("argsz", c.c_uint32),
("flags", c.c_uint32)
]
class vfio_user_bitmap(Structure):
_pack_ = 1
_fields_ = [
("pgsize", c.c_uint64),
("size", c.c_uint64)
]
class vfio_user_bitmap_range(Structure):
_pack_ = 1
_fields_ = [
("iova", c.c_uint64),
("size", c.c_uint64),
("bitmap", vfio_user_bitmap)
]
transition_cb_t = c.CFUNCTYPE(c.c_int, c.c_void_p, c.c_int, use_errno=True)
get_pending_bytes_cb_t = c.CFUNCTYPE(c.c_uint64, c.c_void_p)
prepare_data_cb_t = c.CFUNCTYPE(c.c_void_p, c.POINTER(c.c_uint64),
c.POINTER(c.c_uint64))
read_data_cb_t = c.CFUNCTYPE(c.c_ssize_t, c.c_void_p, c.c_void_p,
c.c_uint64, c.c_uint64)
write_data_cb_t = c.CFUNCTYPE(c.c_ssize_t, c.c_void_p, c.c_uint64)
data_written_cb_t = c.CFUNCTYPE(c.c_int, c.c_void_p, c.c_uint64)
class vfu_migration_callbacks_t(Structure):
_fields_ = [
("version", c.c_int),
("transition", transition_cb_t),
("get_pending_bytes", get_pending_bytes_cb_t),
("prepare_data", prepare_data_cb_t),
("read_data", read_data_cb_t),
("write_data", write_data_cb_t),
("data_written", data_written_cb_t),
]
class dma_sg_t(Structure):
_fields_ = [
("dma_addr", c.c_void_p),
("region", c.c_int),
("length", c.c_uint64),
("offset", c.c_uint64),
("writeable", c.c_bool),
("le_next", c.c_void_p),
("le_prev", c.c_void_p),
]
def __str__(self):
return "DMA addr=%s, region index=%s, length=%s, offset=%s, RW=%s" % \
(hex(self.dma_addr), self.region, hex(self.length),
hex(self.offset), self.writeable)
class vfio_user_migration_info(Structure):
_pack_ = 1
_fields_ = [
("device_state", c.c_uint32),
("reserved", c.c_uint32),
("pending_bytes", c.c_uint64),
("data_offset", c.c_uint64),
("data_size", c.c_uint64),
]
#
# Util functions
#
lib.vfu_create_ctx.argtypes = (c.c_int, c.c_char_p, c.c_int,
c.c_void_p, c.c_int)
lib.vfu_create_ctx.restype = (c.c_void_p)
lib.vfu_setup_log.argtypes = (c.c_void_p, c.c_void_p, c.c_int)
lib.vfu_realize_ctx.argtypes = (c.c_void_p,)
lib.vfu_attach_ctx.argtypes = (c.c_void_p,)
lib.vfu_run_ctx.argtypes = (c.c_void_p,)
lib.vfu_destroy_ctx.argtypes = (c.c_void_p,)
vfu_region_access_cb_t = c.CFUNCTYPE(c.c_int, c.c_void_p, c.POINTER(c.c_char),
c.c_ulong, c.c_long, c.c_bool)
lib.vfu_setup_region.argtypes = (c.c_void_p, c.c_int, c.c_ulong,
vfu_region_access_cb_t, c.c_int, c.c_void_p,
c.c_uint32, c.c_int, c.c_ulong)
vfu_reset_cb_t = c.CFUNCTYPE(c.c_int, c.c_void_p, c.c_int)
lib.vfu_setup_device_reset_cb.argtypes = (c.c_void_p, vfu_reset_cb_t)
lib.vfu_pci_get_config_space.argtypes = (c.c_void_p,)
lib.vfu_pci_get_config_space.restype = (c.c_void_p)
lib.vfu_setup_device_nr_irqs.argtypes = (c.c_void_p, c.c_int, c.c_uint32)
lib.vfu_pci_init.argtypes = (c.c_void_p, c.c_int, c.c_int, c.c_int)
lib.vfu_pci_add_capability.argtypes = (c.c_void_p, c.c_ulong, c.c_int,
c.POINTER(c.c_byte))
lib.vfu_pci_find_capability.argtypes = (c.c_void_p, c.c_bool, c.c_int)
lib.vfu_pci_find_capability.restype = (c.c_ulong)
lib.vfu_pci_find_next_capability.argtypes = (c.c_void_p, c.c_bool, c.c_ulong,
c.c_int)
lib.vfu_pci_find_next_capability.restype = (c.c_ulong)
lib.vfu_irq_trigger.argtypes = (c.c_void_p, c.c_uint)
vfu_device_quiesce_cb_t = c.CFUNCTYPE(c.c_int, c.c_void_p, use_errno=True)
lib.vfu_setup_device_quiesce_cb.argtypes = (c.c_void_p,
vfu_device_quiesce_cb_t)
vfu_dma_register_cb_t = c.CFUNCTYPE(None, c.c_void_p,
c.POINTER(vfu_dma_info_t), use_errno=True)
vfu_dma_unregister_cb_t = c.CFUNCTYPE(None, c.c_void_p,
c.POINTER(vfu_dma_info_t),
use_errno=True)
lib.vfu_setup_device_dma.argtypes = (c.c_void_p, vfu_dma_register_cb_t,
vfu_dma_unregister_cb_t)
lib.vfu_setup_device_migration_callbacks.argtypes = (c.c_void_p,
c.POINTER(vfu_migration_callbacks_t), c.c_uint64)
lib.dma_sg_size.restype = (c.c_size_t)
lib.vfu_addr_to_sg.argtypes = (c.c_void_p, c.c_void_p, c.c_size_t,
c.POINTER(dma_sg_t), c.c_int, c.c_int)
lib.vfu_map_sg.argtypes = (c.c_void_p, c.POINTER(dma_sg_t), c.POINTER(iovec_t),
c.c_int, c.c_int)
lib.vfu_unmap_sg.argtypes = (c.c_void_p, c.POINTER(dma_sg_t),
c.POINTER(iovec_t), c.c_int)
lib.vfu_create_ioeventfd.argtypes = (c.c_void_p, c.c_uint32, c.c_int,
c.c_size_t, c.c_uint32, c.c_uint32,
c.c_uint64)
lib.vfu_device_quiesced.argtypes = (c.c_void_p, c.c_int)
def to_byte(val):
"""Cast an int to a byte value."""
return val.to_bytes(1, 'little')
def skip(fmt, buf):
"""Return the data remaining after skipping the given elements."""
return buf[struct.calcsize(fmt):]
def parse_json(json_str):
"""Parse JSON into an object with attributes (instead of using a dict)."""
return json.loads(json_str, object_hook=lambda d: SimpleNamespace(**d))
def eventfd(initval=0, flags=0):
libc.eventfd.argtypes = (c.c_uint, c.c_int)
return libc.eventfd(initval, flags)
def connect_sock():
sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
sock.connect(SOCK_PATH)
return sock
def connect_client(ctx):
sock = connect_sock()
json = b'{ "capabilities": { "max_msg_fds": 8 } }'
# struct vfio_user_version
payload = struct.pack("HH%dsc" % len(json), LIBVFIO_USER_MAJOR,
LIBVFIO_USER_MINOR, json, b'\0')
hdr = vfio_user_header(VFIO_USER_VERSION, size=len(payload))
sock.send(hdr + payload)
vfu_attach_ctx(ctx, expect=0)
payload = get_reply(sock, expect=0)
return sock
def disconnect_client(ctx, sock):
sock.close()
# notice client closed connection
vfu_run_ctx(ctx, errno.ENOTCONN)
def get_reply(sock, expect=0):
buf = sock.recv(4096)
(msg_id, cmd, msg_size, flags, errno) = struct.unpack("HHIII", buf[0:16])
assert (flags & VFIO_USER_F_TYPE_REPLY) != 0
assert errno == expect
return buf[16:]
def msg(ctx, sock, cmd, payload=bytearray(), expect=0, fds=None,
rsp=True, busy=False):
"""
Round trip a request and reply to the server. vfu_run_ctx will be
called once for the server to process the incoming message,
If a response is not expected then @rsp must be set to False, otherwise
this function will block indefinitely.
If busy is True, then we expect the server to have returned EBUSY from a
quiesce callback, and hence vfu_run_ctx(); in this case, there will be no
response: it can later be retrieved, post vfu_device_quiesced(), with
get_reply().
"""
hdr = vfio_user_header(cmd, size=len(payload))
if fds:
sock.sendmsg([hdr + payload], [(socket.SOL_SOCKET, socket.SCM_RIGHTS,
struct.pack("I" * len(fds), *fds))])
else:
sock.send(hdr + payload)
if busy:
vfu_run_ctx(ctx, errno.EBUSY)
rsp = False
else:
vfu_run_ctx(ctx)
if not rsp:
return
return get_reply(sock, expect=expect)
def get_reply_fds(sock, expect=0):
"""Receives a message from a socket and pulls the returned file descriptors
out of the message."""
fds = array.array("i")
data, ancillary, flags, addr = sock.recvmsg(4096,
socket.CMSG_LEN(64 * fds.itemsize))
(msg_id, cmd, msg_size, msg_flags, errno) = struct.unpack("HHIII",
data[0:16])
assert errno == expect
cmsg_level, cmsg_type, packed_fd = ancillary[0] if len(ancillary) != 0 \
else (0, 0, [])
unpacked_fds = []
for i in range(0, len(packed_fd), 4):
[unpacked_fd] = struct.unpack_from("i", packed_fd, offset=i)
unpacked_fds.append(unpacked_fd)
assert len(packed_fd)/4 == len(unpacked_fds)
assert (msg_flags & VFIO_USER_F_TYPE_REPLY) != 0
return (unpacked_fds, data[16:])
def msg_fds(ctx, sock, cmd, payload, expect=0, fds=None):
"""Round trip a request and reply to the server. With the server returning
new fds"""
hdr = vfio_user_header(cmd, size=len(payload))
if fds:
sock.sendmsg([hdr + payload], [(socket.SOL_SOCKET, socket.SCM_RIGHTS,
struct.pack("I" * len(fds), *fds))])
else:
sock.send(hdr + payload)
vfu_run_ctx(ctx)
return get_reply_fds(sock, expect=expect)
def get_pci_header(ctx):
ptr = lib.vfu_pci_get_config_space(ctx)
return c.cast(ptr, c.POINTER(vfu_pci_hdr_t)).contents
def get_pci_cfg_space(ctx):
ptr = lib.vfu_pci_get_config_space(ctx)
return c.cast(ptr, c.POINTER(c.c_char))[0:PCI_CFG_SPACE_SIZE]
def get_pci_ext_cfg_space(ctx):
ptr = lib.vfu_pci_get_config_space(ctx)
return c.cast(ptr, c.POINTER(c.c_char))[0:PCI_CFG_SPACE_EXP_SIZE]
def read_pci_cfg_space(ctx, buf, count, offset, extended=False):
space = get_pci_ext_cfg_space(ctx) if extended else get_pci_cfg_space(ctx)
for i in range(count):
buf[i] = space[offset+i]
return count
def write_pci_cfg_space(ctx, buf, count, offset, extended=False):
max_offset = PCI_CFG_SPACE_EXP_SIZE if extended else PCI_CFG_SPACE_SIZE
assert offset + count <= max_offset
space = c.cast(lib.vfu_pci_get_config_space(ctx), c.POINTER(c.c_char))
for i in range(count):
space[offset+i] = buf[i]
return count
def access_region(ctx, sock, is_write, region, offset, count,
data=None, expect=0, rsp=True, busy=False):
# struct vfio_user_region_access
payload = struct.pack("QII", offset, region, count)
if is_write:
payload += data
cmd = VFIO_USER_REGION_WRITE if is_write else VFIO_USER_REGION_READ
result = msg(ctx, sock, cmd, payload, expect=expect, rsp=rsp, busy=busy)
if is_write:
return None
if rsp:
return skip("QII", result)
def write_region(ctx, sock, region, offset, count, data, expect=0, rsp=True,
busy=False):
access_region(ctx, sock, True, region, offset, count, data, expect=expect,
rsp=rsp, busy=busy)
def read_region(ctx, sock, region, offset, count, expect=0, rsp=True,
busy=False):
return access_region(ctx, sock, False, region, offset, count,
expect=expect, rsp=rsp, busy=busy)
def ext_cap_hdr(buf, offset):
"""Read an extended cap header."""
# struct pcie_ext_cap_hdr
cap_id, cap_next = struct.unpack_from('HH', buf, offset)
cap_next >>= 4
return cap_id, cap_next
def dma_register(ctx, info):
pass
@vfu_dma_register_cb_t
def __dma_register(ctx, info):
# The copy is required because in case of deliberate failure (e.g.
# test_dma_map_busy_reply_fail) the memory gets deallocated and mock only
# records the pointer, so the contents are all null/zero.
dma_register(ctx, copy.copy(info.contents))
def dma_unregister(ctx, info):
pass
@vfu_dma_unregister_cb_t
def __dma_unregister(ctx, info):
dma_unregister(ctx, copy.copy(info.contents))
def quiesce_cb(ctx):
return 0
@vfu_device_quiesce_cb_t
def _quiesce_cb(ctx):
return quiesce_cb(ctx)
def vfu_setup_device_quiesce_cb(ctx, quiesce_cb=_quiesce_cb):
assert ctx is not None
lib.vfu_setup_device_quiesce_cb(ctx,
c.cast(quiesce_cb,
vfu_device_quiesce_cb_t))
def reset_cb(ctx, reset_type):
return 0
@vfu_reset_cb_t
def _reset_cb(ctx, reset_type):
return reset_cb(ctx, reset_type)
def vfu_setup_device_reset_cb(ctx, cb=_reset_cb):
assert ctx is not None
return lib.vfu_setup_device_reset_cb(ctx, c.cast(cb, vfu_reset_cb_t))
def prepare_ctx_for_dma(dma_register=__dma_register,
dma_unregister=__dma_unregister, quiesce=_quiesce_cb,
reset=_reset_cb, migration_callbacks=False):
ctx = vfu_create_ctx(flags=LIBVFIO_USER_FLAG_ATTACH_NB)
assert ctx is not None
ret = vfu_pci_init(ctx)
assert ret == 0
ret = vfu_setup_device_dma(ctx, dma_register, dma_unregister)
assert ret == 0
if quiesce is not None:
vfu_setup_device_quiesce_cb(ctx, quiesce)
if reset is not None:
ret = vfu_setup_device_reset_cb(ctx, reset)
assert ret == 0
f = tempfile.TemporaryFile()
f.truncate(0x2000)
mmap_areas = [(0x1000, 0x1000)]
ret = vfu_setup_region(ctx, index=VFU_PCI_DEV_MIGR_REGION_IDX, size=0x2000,
flags=VFU_REGION_FLAG_RW, mmap_areas=mmap_areas,
fd=f.fileno())
assert ret == 0
if migration_callbacks:
ret = vfu_setup_device_migration_callbacks(ctx)
assert ret == 0
ret = vfu_realize_ctx(ctx)
assert ret == 0
return ctx
#
# Library wrappers
#
msg_id = 1
@c.CFUNCTYPE(None, c.c_void_p, c.c_int, c.c_char_p)
def log(ctx, level, msg):
lvl2str = {syslog.LOG_EMERG: "EMERGENCY",
syslog.LOG_ALERT: "ALERT",
syslog.LOG_CRIT: "CRITICAL",
syslog.LOG_ERR: "ERROR",
syslog.LOG_WARNING: "WANRING",
syslog.LOG_NOTICE: "NOTICE",
syslog.LOG_INFO: "INFO",
syslog.LOG_DEBUG: "DEBUG"}
print(lvl2str[level] + ": " + msg.decode("utf-8"))
def vfio_user_header(cmd, size, no_reply=False, error=False, error_no=0):
global msg_id
buf = struct.pack("HHIII", msg_id, cmd, SIZEOF_VFIO_USER_HEADER + size,
VFIO_USER_F_TYPE_COMMAND, error_no)
msg_id += 1
return buf
def vfu_create_ctx(trans=VFU_TRANS_SOCK, sock_path=SOCK_PATH, flags=0,
private=None, dev_type=VFU_DEV_TYPE_PCI):
if os.path.exists(sock_path):
os.remove(sock_path)
ctx = lib.vfu_create_ctx(trans, sock_path, flags, private, dev_type)
if ctx:
lib.vfu_setup_log(ctx, log, syslog.LOG_DEBUG)
return ctx
def vfu_realize_ctx(ctx):
return lib.vfu_realize_ctx(ctx)
def vfu_attach_ctx(ctx, expect=0):
ret = lib.vfu_attach_ctx(ctx)
if expect == 0:
assert ret == 0, "failed to attach: %s" % os.strerror(c.get_errno())
else:
assert ret == -1
assert c.get_errno() == expect
return ret
def vfu_run_ctx(ctx, expect=0):
ret = lib.vfu_run_ctx(ctx)
if expect == 0:
assert ret >= 0, "vfu_run_ctx(): %s" % os.strerror(c.get_errno())
else:
assert ret == -1
assert c.get_errno() == expect
return ret
def vfu_destroy_ctx(ctx):
lib.vfu_destroy_ctx(ctx)
ctx = None
if os.path.exists(SOCK_PATH):
os.remove(SOCK_PATH)
def pci_region_cb(ctx, buf, count, offset, is_write):
pass
@vfu_region_access_cb_t
def __pci_region_cb(ctx, buf, count, offset, is_write):
return pci_region_cb(ctx, buf, count, offset, is_write)
def vfu_setup_region(ctx, index, size, cb=__pci_region_cb, flags=0,
mmap_areas=None, nr_mmap_areas=None, fd=-1, offset=0):
assert ctx is not None
c_mmap_areas = None
if mmap_areas:
c_mmap_areas = (iovec_t * len(mmap_areas))(*mmap_areas)
if nr_mmap_areas is None:
if mmap_areas:
nr_mmap_areas = len(mmap_areas)
else:
nr_mmap_areas = 0
# We're sending a file descriptor to ourselves; to pretend the server is
# separate, we need to dup() here.
if fd != -1:
fd = os.dup(fd)
ret = lib.vfu_setup_region(ctx, index, size,
c.cast(cb, vfu_region_access_cb_t),
flags, c_mmap_areas, nr_mmap_areas, fd, offset)
if fd != -1 and ret != 0:
os.close(fd)
return ret
def vfu_setup_device_nr_irqs(ctx, irqtype, count):
assert ctx is not None
return lib.vfu_setup_device_nr_irqs(ctx, irqtype, count)
def vfu_pci_init(ctx, pci_type=VFU_PCI_TYPE_EXPRESS,
hdr_type=PCI_HEADER_TYPE_NORMAL):
assert ctx is not None
return lib.vfu_pci_init(ctx, pci_type, hdr_type, 0)
def vfu_pci_add_capability(ctx, pos, flags, data):
assert ctx is not None
databuf = (c.c_byte * len(data)).from_buffer(bytearray(data))
return lib.vfu_pci_add_capability(ctx, pos, flags, databuf)
def vfu_pci_find_capability(ctx, extended, cap_id):
assert ctx is not None
return lib.vfu_pci_find_capability(ctx, extended, cap_id)
def vfu_pci_find_next_capability(ctx, extended, offset, cap_id):
assert ctx is not None
return lib.vfu_pci_find_next_capability(ctx, extended, offset, cap_id)
def vfu_irq_trigger(ctx, subindex):
assert ctx is not None
return lib.vfu_irq_trigger(ctx, subindex)
def vfu_setup_device_dma(ctx, register_cb=None, unregister_cb=None):
assert ctx is not None
return lib.vfu_setup_device_dma(ctx, c.cast(register_cb,
vfu_dma_register_cb_t),
c.cast(unregister_cb,
vfu_dma_unregister_cb_t))
# FIXME some of the migration arguments are probably wrong as in the C version
# they're pointer. Check how we handle the read/write region callbacks.
def migr_trans_cb(ctx, state):
pass
@transition_cb_t
def __migr_trans_cb(ctx, state):
return migr_trans_cb(ctx, state)
def migr_get_pending_bytes_cb(ctx):
pass
@get_pending_bytes_cb_t
def __migr_get_pending_bytes_cb(ctx):
return migr_get_pending_bytes_cb(ctx)
def migr_prepare_data_cb(ctx, offset, size):
pass
@prepare_data_cb_t
def __migr_prepare_data_cb(ctx, offset, size):
return migr_prepare_data_cb(ctx, offset, size)
def migr_read_data_cb(ctx, buf, count, offset):
pass
@read_data_cb_t
def __migr_read_data_cb(ctx, buf, count, offset):
return migr_read_data_cb(ctx, buf, count, offset)
def migr_write_data_cb(ctx, buf, count, offset):
pass
@write_data_cb_t
def __migr_write_data_cb(ctx, buf, count, offset):
return migr_write_data_cb(ctx, buf, count, offset)
def migr_data_written_cb(ctx, count):
pass
@data_written_cb_t
def __migr_data_written_cb(ctx, count):
return migr_data_written_cb(ctx, count)
def vfu_setup_device_migration_callbacks(ctx, cbs=None, offset=0x4000):
assert ctx is not None
if not cbs:
cbs = vfu_migration_callbacks_t()
cbs.version = VFU_MIGR_CALLBACKS_VERS
cbs.transition = __migr_trans_cb
cbs.get_pending_bytes = __migr_get_pending_bytes_cb
cbs.prepare_data = __migr_prepare_data_cb
cbs.read_data = __migr_read_data_cb
cbs.write_data = __migr_write_data_cb
cbs.data_written = __migr_data_written_cb
return lib.vfu_setup_device_migration_callbacks(ctx, cbs, offset)
def dma_sg_size():
return lib.dma_sg_size()
def vfu_addr_to_sg(ctx, dma_addr, length, max_sg=1,
prot=(mmap.PROT_READ | mmap.PROT_WRITE)):
assert ctx is not None
sg = (dma_sg_t * max_sg)()
return (lib.vfu_addr_to_sg(ctx, dma_addr, length, sg, max_sg, prot), sg)
def vfu_map_sg(ctx, sg, iovec, cnt=1, flags=0):
return lib.vfu_map_sg(ctx, sg, iovec, cnt, flags)
def vfu_unmap_sg(ctx, sg, iovec, cnt=1):
return lib.vfu_unmap_sg(ctx, sg, iovec, cnt)
def vfu_create_ioeventfd(ctx, region_idx, fd, offset, size, flags, datamatch):
assert ctx is not None
return lib.vfu_create_ioeventfd(ctx, region_idx, fd, offset, size,
flags, datamatch)
def vfu_device_quiesced(ctx, err):
return lib.vfu_device_quiesced(ctx, err)
def fail_with_errno(err):
def side_effect(args, *kwargs):
c.set_errno(err)
return -1
return side_effect
# ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: #