Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging

* Granite Rapids CPU model
* Miscellaneous bugfixes

# -----BEGIN PGP SIGNATURE-----
#
# iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmSn7uYUHHBib256aW5p
# QHJlZGhhdC5jb20ACgkQv/vSX3jHroPi1gf+MJNyMneyyEZgBwlwgs2NYjz+cKwW
# KxtCOHDfew5S1qpq+gyvUnq5K0JJBGZKoFMwS6JwOpHASGx1o6mlF06CgLAk7wKh
# yCf1kzvRA4y3tYbSwvxD5iKV3YSsayIHuJ8q2GslVXBtAZ0xC2cREQLzKLNuEV6M
# rO4bj6QUV2fRc9u9TlurXijsdalUAEjmkIeZhtghhkD+lJo44yzcF7qAROaE3pFa
# IYEp8pTgcbJeiI0BUNFTRk0OlE5f7MT3GIQwTC34WWPO+r/uBXL5FXNqN38svugh
# 7hjOliIMU4I6jpL1t7v2+9Vs38gAEPchJ0Nly4TV+dydh7l1pIn9G7ssoA==
# =OBRZ
# -----END PGP SIGNATURE-----
# gpg: Signature made Fri 07 Jul 2023 11:54:30 AM BST
# gpg:                using RSA key F13338574B662389866C7682BFFBD25F78C7AE83
# gpg:                issuer "pbonzini@redhat.com"
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [undefined]
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>" [undefined]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* tag 'for-upstream' of https://gitlab.com/bonzini/qemu:
  target/i386: Add new CPU model GraniteRapids
  target/i386: Add few security fix bits in ARCH_CAPABILITIES into SapphireRapids CPU model
  target/i386: Add new bit definitions of MSR_IA32_ARCH_CAPABILITIES
  target/i386: Allow MCDT_NO if host supports
  target/i386: Add support for MCDT_NO in CPUID enumeration
  target/i386: Adjust feature level according to FEAT_7_1_EDX
  qemu_cleanup: begin drained section after vm_shutdown()
  meson.build: Remove the logic to link C code with the C++ linker
  python: bump minimum requirements so they are compatible with 3.12

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index aae5f0b..093c2d4 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -888,7 +888,6 @@
     uint16_t vlan_special = 0;
     uint8_t vlan_status = 0;
     uint8_t min_buf[ETH_ZLEN];
-    struct iovec min_iov;
     uint8_t *filter_buf = iov->iov_base;
     size_t size = iov_size(iov, iovcnt);
     size_t iov_ofs = 0;
@@ -905,15 +904,7 @@
         return 0;
     }
 
-    /* Pad to minimum Ethernet frame length */
-    if (size < sizeof(min_buf)) {
-        iov_to_buf(iov, iovcnt, 0, min_buf, size);
-        memset(&min_buf[size], 0, sizeof(min_buf) - size);
-        min_iov.iov_base = filter_buf = min_buf;
-        min_iov.iov_len = size = sizeof(min_buf);
-        iovcnt = 1;
-        iov = &min_iov;
-    } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
+    if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
         /* This is very unlikely, but may happen. */
         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
         filter_buf = min_buf;
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
index 9f185d0..f8aeafa 100644
--- a/hw/net/e1000e_core.c
+++ b/hw/net/e1000e_core.c
@@ -2604,12 +2604,38 @@
         e1000e_lower_interrupts(core, ICR, 0xffffffff);
     }
 
-    if ((core->mac[ICR] & E1000_ICR_ASSERTED) &&
-        (core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) {
-        trace_e1000e_irq_icr_clear_iame();
-        e1000e_lower_interrupts(core, ICR, 0xffffffff);
-        trace_e1000e_irq_icr_process_iame();
-        e1000e_lower_interrupts(core, IMS, core->mac[IAM]);
+    if (core->mac[ICR] & E1000_ICR_ASSERTED) {
+        if (core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME) {
+            trace_e1000e_irq_icr_clear_iame();
+            e1000e_lower_interrupts(core, ICR, 0xffffffff);
+            trace_e1000e_irq_icr_process_iame();
+            e1000e_lower_interrupts(core, IMS, core->mac[IAM]);
+        }
+
+        /*
+         * The datasheet does not say what happens when interrupt was asserted
+         * (ICR.INT_ASSERT=1) and auto mask is *not* active.
+         * However, section of 13.3.27 the PCIe* GbE Controllers Open Source
+         * Software Developer’s Manual, which were written for older devices,
+         * namely 631xESB/632xESB, 82563EB/82564EB, 82571EB/82572EI &
+         * 82573E/82573V/82573L, does say:
+         * > If IMS = 0b, then the ICR register is always clear-on-read. If IMS
+         * > is not 0b, but some ICR bit is set where the corresponding IMS bit
+         * > is not set, then a read does not clear the ICR register. For
+         * > example, if IMS = 10101010b and ICR = 01010101b, then a read to the
+         * > ICR register does not clear it. If IMS = 10101010b and
+         * > ICR = 0101011b, then a read to the ICR register clears it entirely
+         * > (ICR.INT_ASSERTED = 1b).
+         *
+         * Linux does no longer activate auto mask since commit
+         * 0a8047ac68e50e4ccbadcfc6b6b070805b976885 and the real hardware
+         * clears ICR even in such a case so we also should do so.
+         */
+        if (core->mac[ICR] & core->mac[IMS]) {
+            trace_e1000e_irq_icr_clear_icr_bit_ims(core->mac[ICR],
+                                                   core->mac[IMS]);
+            e1000e_lower_interrupts(core, ICR, 0xffffffff);
+        }
     }
 
     return ret;
diff --git a/hw/net/ftgmac100.c b/hw/net/ftgmac100.c
index d3bf14b..702b001 100644
--- a/hw/net/ftgmac100.c
+++ b/hw/net/ftgmac100.c
@@ -968,14 +968,6 @@
         return -1;
     }
 
-    /* TODO : Pad to minimum Ethernet frame length */
-    /* handle small packets.  */
-    if (size < 10) {
-        qemu_log_mask(LOG_GUEST_ERROR, "%s: dropped frame of %zd bytes\n",
-                      __func__, size);
-        return size;
-    }
-
     if (!ftgmac100_filter(s, buf, size)) {
         return size;
     }
diff --git a/hw/net/i82596.c b/hw/net/i82596.c
index ec21e26..ab26f8b 100644
--- a/hw/net/i82596.c
+++ b/hw/net/i82596.c
@@ -72,10 +72,6 @@
 #define I596_EOF        0x8000
 #define SIZE_MASK       0x3fff
 
-#define ETHER_TYPE_LEN 2
-#define VLAN_TCI_LEN 2
-#define VLAN_HLEN (ETHER_TYPE_LEN + VLAN_TCI_LEN)
-
 /* various flags in the chip config registers */
 #define I596_PREFETCH   (s->config[0] & 0x80)
 #define I596_PROMISC    (s->config[8] & 0x01)
@@ -488,8 +484,6 @@
     return true;
 }
 
-#define MIN_BUF_SIZE 60
-
 ssize_t i82596_receive(NetClientState *nc, const uint8_t *buf, size_t sz)
 {
     I82596State *s = qemu_get_nic_opaque(nc);
@@ -500,7 +494,6 @@
     size_t bufsz = sz; /* length of data in buf */
     uint32_t crc;
     uint8_t *crc_ptr;
-    uint8_t buf1[MIN_BUF_SIZE + VLAN_HLEN];
     static const uint8_t broadcast_macaddr[6] = {
                 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
@@ -583,17 +576,6 @@
         }
     }
 
-    /* if too small buffer, then expand it */
-    if (len < MIN_BUF_SIZE + VLAN_HLEN) {
-        memcpy(buf1, buf, len);
-        memset(buf1 + len, 0, MIN_BUF_SIZE + VLAN_HLEN - len);
-        buf = buf1;
-        if (len < MIN_BUF_SIZE) {
-            len = MIN_BUF_SIZE;
-        }
-        bufsz = len;
-    }
-
     /* Calculate the ethernet checksum (4 bytes) */
     len += 4;
     crc = cpu_to_be32(crc32(~0, buf, sz));
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index d00b1ca..8b6b75c 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -2678,12 +2678,7 @@
         res |= E1000_STATUS_IOV_MODE;
     }
 
-    /*
-     * Windows driver 12.18.9.23 resets if E1000_STATUS_GIO_MASTER_ENABLE is
-     * left set after E1000_CTRL_LRST is set.
-     */
-    if (!(core->mac[CTRL] & E1000_CTRL_GIO_MASTER_DISABLE) &&
-        !(core->mac[CTRL] & E1000_CTRL_LRST)) {
+    if (!(core->mac[CTRL] & E1000_CTRL_GIO_MASTER_DISABLE)) {
         res |= E1000_STATUS_GIO_MASTER_ENABLE;
     }
 
diff --git a/hw/net/ne2000.c b/hw/net/ne2000.c
index 3f31d04..d79c884 100644
--- a/hw/net/ne2000.c
+++ b/hw/net/ne2000.c
@@ -167,15 +167,12 @@
     return 0;
 }
 
-#define MIN_BUF_SIZE 60
-
 ssize_t ne2000_receive(NetClientState *nc, const uint8_t *buf, size_t size_)
 {
     NE2000State *s = qemu_get_nic_opaque(nc);
     size_t size = size_;
     uint8_t *p;
     unsigned int total_len, next, avail, len, index, mcast_idx;
-    uint8_t buf1[60];
     static const uint8_t broadcast_macaddr[6] =
         { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
@@ -213,15 +210,6 @@
         }
     }
 
-
-    /* if too small buffer, then expand it */
-    if (size < MIN_BUF_SIZE) {
-        memcpy(buf1, buf, size);
-        memset(buf1 + size, 0, MIN_BUF_SIZE - size);
-        buf = buf1;
-        size = MIN_BUF_SIZE;
-    }
-
     index = s->curpag << 8;
     if (index >= NE2000_PMEM_END) {
         index = s->start;
diff --git a/hw/net/pcnet.c b/hw/net/pcnet.c
index d456094..02828ae 100644
--- a/hw/net/pcnet.c
+++ b/hw/net/pcnet.c
@@ -987,7 +987,6 @@
 {
     PCNetState *s = qemu_get_nic_opaque(nc);
     int is_padr = 0, is_bcast = 0, is_ladr = 0;
-    uint8_t buf1[60];
     int remaining;
     int crc_err = 0;
     size_t size = size_;
@@ -1000,14 +999,6 @@
     printf("pcnet_receive size=%zu\n", size);
 #endif
 
-    /* if too small buffer, then expand it */
-    if (size < MIN_BUF_SIZE) {
-        memcpy(buf1, buf, size);
-        memset(buf1 + size, 0, MIN_BUF_SIZE - size);
-        buf = buf1;
-        size = MIN_BUF_SIZE;
-    }
-
     if (CSR_PROM(s)
         || (is_padr=padr_match(s, buf, size))
         || (is_bcast=padr_bcast(s, buf, size))
diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
index 5f1a4d3..b4df75b 100644
--- a/hw/net/rtl8139.c
+++ b/hw/net/rtl8139.c
@@ -826,7 +826,6 @@
 
     uint32_t packet_header = 0;
 
-    uint8_t buf1[MIN_BUF_SIZE + VLAN_HLEN];
     static const uint8_t broadcast_macaddr[6] =
         { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
@@ -938,17 +937,6 @@
         }
     }
 
-    /* if too small buffer, then expand it
-     * Include some tailroom in case a vlan tag is later removed. */
-    if (size < MIN_BUF_SIZE + VLAN_HLEN) {
-        memcpy(buf1, buf, size);
-        memset(buf1 + size, 0, MIN_BUF_SIZE + VLAN_HLEN - size);
-        buf = buf1;
-        if (size < MIN_BUF_SIZE) {
-            size = MIN_BUF_SIZE;
-        }
-    }
-
     if (rtl8139_cp_receiver_enabled(s))
     {
         if (!rtl8139_cp_rx_valid(s)) {
diff --git a/hw/net/sungem.c b/hw/net/sungem.c
index eb01520..103376c 100644
--- a/hw/net/sungem.c
+++ b/hw/net/sungem.c
@@ -550,7 +550,6 @@
     PCIDevice *d = PCI_DEVICE(s);
     uint32_t mac_crc, done, kick, max_fsize;
     uint32_t fcs_size, ints, rxdma_cfg, rxmac_cfg, csum, coff;
-    uint8_t smallbuf[60];
     struct gem_rxd desc;
     uint64_t dbase, baddr;
     unsigned int rx_cond;
@@ -584,19 +583,6 @@
         return size;
     }
 
-    /* We don't drop too small frames since we get them in qemu, we pad
-     * them instead. We should probably use the min frame size register
-     * but I don't want to use a variable size staging buffer and I
-     * know both MacOS and Linux use the default 64 anyway. We use 60
-     * here to account for the non-existent FCS.
-     */
-    if (size < 60) {
-        memcpy(smallbuf, buf, size);
-        memset(&smallbuf[size], 0, 60 - size);
-        buf = smallbuf;
-        size = 60;
-    }
-
     /* Get MAC crc */
     mac_crc = net_crc32_le(buf, ETH_ALEN);
 
diff --git a/hw/net/sunhme.c b/hw/net/sunhme.c
index 1f3d801..391d26f 100644
--- a/hw/net/sunhme.c
+++ b/hw/net/sunhme.c
@@ -714,8 +714,6 @@
     s->erxregs[HME_ERXI_RING >> 2] = ring;
 }
 
-#define MIN_BUF_SIZE 60
-
 static ssize_t sunhme_receive(NetClientState *nc, const uint8_t *buf,
                               size_t size)
 {
@@ -724,7 +722,6 @@
     dma_addr_t rb, addr;
     uint32_t intstatus, status, buffer, buffersize, sum;
     uint16_t csum;
-    uint8_t buf1[60];
     int nr, cr, len, rxoffset, csum_offset;
 
     trace_sunhme_rx_incoming(size);
@@ -775,14 +772,6 @@
 
     trace_sunhme_rx_filter_accept();
 
-    /* If too small buffer, then expand it */
-    if (size < MIN_BUF_SIZE) {
-        memcpy(buf1, buf, size);
-        memset(buf1 + size, 0, MIN_BUF_SIZE - size);
-        buf = buf1;
-        size = MIN_BUF_SIZE;
-    }
-
     rb = s->erxregs[HME_ERXI_RING >> 2] & HME_ERXI_RING_ADDR;
     nr = sunhme_get_rx_ring_count(s);
     cr = sunhme_get_rx_ring_nr(s);
diff --git a/hw/net/trace-events b/hw/net/trace-events
index e4a98b2..3eeacc5 100644
--- a/hw/net/trace-events
+++ b/hw/net/trace-events
@@ -217,6 +217,7 @@
 e1000e_irq_icr_clear_nonmsix_icr_read(void) "Clearing ICR on read due to non MSI-X int"
 e1000e_irq_icr_clear_zero_ims(void) "Clearing ICR on read due to zero IMS"
 e1000e_irq_icr_clear_iame(void) "Clearing ICR on read due to IAME"
+e1000e_irq_icr_clear_icr_bit_ims(uint32_t icr, uint32_t ims) "Clearing ICR on read due corresponding IMS bit: 0x%x & 0x%x"
 e1000e_irq_iam_clear_eiame(uint32_t iam, uint32_t cause) "Clearing IMS due to EIAME, IAM: 0x%X, cause: 0x%X"
 e1000e_irq_icr_clear_eiac(uint32_t icr, uint32_t eiac) "Clearing ICR bits due to EIAC, ICR: 0x%X, EIAC: 0x%X"
 e1000e_irq_ims_clear_set_imc(uint32_t val) "Clearing IMS bits due to IMC write 0x%x"
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index aa421a9..04783f5 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -3630,12 +3630,12 @@
     }
 
     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
-        n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
+        n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) ||
         !is_power_of_2(n->net_conf.tx_queue_size)) {
         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
                    "must be a power of 2 between %d and %d",
                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
-                   VIRTQUEUE_MAX_SIZE);
+                   virtio_net_max_tx_queue_size(n));
         virtio_cleanup(vdev);
         return;
     }
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 18b9edf..5dfacb1 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -40,7 +40,6 @@
 
 #define PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION 0x1
 #define VMXNET3_MSIX_BAR_SIZE 0x2000
-#define MIN_BUF_SIZE 60
 
 /* Compatibility flags for migration */
 #define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT 0
@@ -1977,7 +1976,6 @@
 {
     VMXNET3State *s = qemu_get_nic_opaque(nc);
     size_t bytes_indicated;
-    uint8_t min_buf[MIN_BUF_SIZE];
 
     if (!vmxnet3_can_receive(nc)) {
         VMW_PKPRN("Cannot receive now");
@@ -1990,14 +1988,6 @@
         size -= sizeof(struct virtio_net_hdr);
     }
 
-    /* Pad to minimum Ethernet frame length */
-    if (size < sizeof(min_buf)) {
-        memcpy(min_buf, buf, size);
-        memset(&min_buf[size], 0, sizeof(min_buf) - size);
-        buf = min_buf;
-        size = sizeof(min_buf);
-    }
-
     net_rx_pkt_set_packet_type(s->rx_pkt,
         get_eth_packet_type(PKT_GET_ETH_HDR(buf)));
 
diff --git a/net/socket.c b/net/socket.c
index ba6e5b0..8e3702e 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -446,31 +446,21 @@
     return s;
 }
 
-static NetSocketState *net_socket_fd_init(NetClientState *peer,
-                                          const char *model, const char *name,
-                                          int fd, int is_connected,
-                                          const char *mc, Error **errp)
+static int net_socket_fd_check(int fd, Error **errp)
 {
-    int so_type = -1, optlen=sizeof(so_type);
+    int so_type, optlen = sizeof(so_type);
 
-    if(getsockopt(fd, SOL_SOCKET, SO_TYPE, (char *)&so_type,
-        (socklen_t *)&optlen)< 0) {
+    if (getsockopt(fd, SOL_SOCKET, SO_TYPE, (char *)&so_type,
+        (socklen_t *)&optlen) < 0) {
         error_setg(errp, "can't get socket option SO_TYPE");
-        close(fd);
-        return NULL;
+        return -1;
     }
-    switch(so_type) {
-    case SOCK_DGRAM:
-        return net_socket_fd_init_dgram(peer, model, name, fd, is_connected,
-                                        mc, errp);
-    case SOCK_STREAM:
-        return net_socket_fd_init_stream(peer, model, name, fd, is_connected);
-    default:
+    if (so_type != SOCK_DGRAM && so_type != SOCK_STREAM) {
         error_setg(errp, "socket type=%d for fd=%d must be either"
                    " SOCK_DGRAM or SOCK_STREAM", so_type, fd);
-        close(fd);
+        return -1;
     }
-    return NULL;
+    return so_type;
 }
 
 static void net_socket_accept(void *opaque)
@@ -587,7 +577,7 @@
             break;
         }
     }
-    s = net_socket_fd_init(peer, model, name, fd, connected, NULL, errp);
+    s = net_socket_fd_init_stream(peer, model, name, fd, connected);
     if (!s) {
         return -1;
     }
@@ -629,7 +619,7 @@
         return -1;
     }
 
-    s = net_socket_fd_init(peer, model, name, fd, 0, NULL, errp);
+    s = net_socket_fd_init_dgram(peer, model, name, fd, 0, NULL, errp);
     if (!s) {
         return -1;
     }
@@ -683,7 +673,7 @@
     }
     qemu_socket_set_nonblock(fd);
 
-    s = net_socket_fd_init(peer, model, name, fd, 0, NULL, errp);
+    s = net_socket_fd_init_dgram(peer, model, name, fd, 0, NULL, errp);
     if (!s) {
         return -1;
     }
@@ -716,21 +706,34 @@
     }
 
     if (sock->fd) {
-        int fd, ret;
+        int fd, ret, so_type;
 
         fd = monitor_fd_param(monitor_cur(), sock->fd, errp);
         if (fd == -1) {
             return -1;
         }
+        so_type = net_socket_fd_check(fd, errp);
+        if (so_type < 0) {
+            return -1;
+        }
         ret = qemu_socket_try_set_nonblock(fd);
         if (ret < 0) {
             error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d",
                              name, fd);
             return -1;
         }
-        if (!net_socket_fd_init(peer, "socket", name, fd, 1, sock->mcast,
-                                errp)) {
-            return -1;
+        switch (so_type) {
+        case SOCK_DGRAM:
+            if (!net_socket_fd_init_dgram(peer, "socket", name, fd, 1,
+                                          sock->mcast, errp)) {
+                return -1;
+            }
+            break;
+        case SOCK_STREAM:
+            if (!net_socket_fd_init_stream(peer, "socket", name, fd, 1)) {
+                return -1;
+            }
+            break;
         }
         return 0;
     }