Merge tag 'for-upstream' of https://repo.or.cz/qemu/kevin into staging

Block layer patches

- Fix VHDX image corruption bug
- Fix for performance regression: Remove bdrv_co_get_geometry coroutines
  from I/O hot path

# -----BEGIN PGP SIGNATURE-----
#
# iQJFBAABCAAvFiEE3D3rFZqa+V09dFb+fwmycsiPL9YFAmQ1dDARHGt3b2xmQHJl
# ZGhhdC5jb20ACgkQfwmycsiPL9ZM8BAArqnJjr2iAVT/yYHZKO8GUyrt3Ndq9aAb
# hlAoMud0Xkof588I1W4AelOBYz/Cm4OEeFNAYxFbWif6t1iSB/J3FG6EQMCRqOnV
# 1GHIrJO9tolhjGx9GcjbYjXVJDyIsKDhcNCFJ9gke7+zVZLT8bLA5ibdZ2xYEcAp
# DfH27pBa6dlLd2CnDfkatpUwqqUDju+iXLaB4kGN/AG4Xv61Jk9ZqpRIyl1lToXO
# C9HDbHh3U/7fT2q9lMUXecOQnRFhXhvYSyiU+vcCFJPdijYPacC/HqJo200fG67y
# NDw/xviip3nFQWpxB06qx5A/H3UtmacGRSeckPvN7ZuEG4qFJSgYFsJL2+Rd11gu
# y2it06WWpYz+CFtlbfTkDuKj35F9VGFcmdfwnWxcmpMYDBWLbCJuzMpZJkJj5ahm
# QT6cv138nSvhvMpXLLZXER9opdGqqTU7LS2NqSTDFDKlPOnhofl1+FK0dhjrecEf
# A3bVfY8z8j+R2CYRzFINf2FUJA91XJjbv2kaJkV6Jq3x1usmgsm+QmCEefPpYF2l
# nlx5wFewxlqg8skMKDrKPXpB7d3KiKHy829HRJJtgg9RBoI9yST9kSRQ/o1IXlnP
# xCPG23Trik0dj9W178MDrBwf9ug0EKg2a4Ny3ohLq48sJP9pzjL1bR6j0Zww+tcz
# XMvgFSKspeY=
# =4z1y
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 11 Apr 2023 15:52:32 BST
# gpg:                using RSA key DC3DEB159A9AF95D3D7456FE7F09B272C88F2FD6
# gpg:                issuer "kwolf@redhat.com"
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full]
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* tag 'for-upstream' of https://repo.or.cz/qemu/kevin:
  block, block-backend: write some hot coroutine wrappers by hand
  block-backend: ignore inserted state in blk_co_nb_sectors
  block-backend: inline bdrv_co_get_geometry
  migration/block: replace uses of blk_nb_sectors that do not check result
  block: remove has_variable_length from BlockDriver
  block: refresh bs->total_sectors on reopen
  block: remove has_variable_length from filters
  block: move has_variable_length to BlockLimits
  iotests: Regression test for vhdx log corruption
  block/vhdx: fix dynamic VHDX BAT corruption

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/block.c b/block.c
index e0c6c64..d79a52c 100644
--- a/block.c
+++ b/block.c
@@ -4918,6 +4918,7 @@
     qdict_del(bs->options, "backing");
 
     bdrv_refresh_limits(bs, NULL, NULL);
+    bdrv_refresh_total_sectors(bs, bs->total_sectors);
 }
 
 /*
@@ -5849,7 +5850,7 @@
     if (!drv)
         return -ENOMEDIUM;
 
-    if (drv->has_variable_length) {
+    if (bs->bl.has_variable_length) {
         int ret = bdrv_co_refresh_total_sectors(bs, bs->total_sectors);
         if (ret < 0) {
             return ret;
@@ -5858,6 +5859,28 @@
     return bs->total_sectors;
 }
 
+/*
+ * This wrapper is written by hand because this function is in the hot I/O path,
+ * via blk_get_geometry.
+ */
+int64_t coroutine_mixed_fn bdrv_nb_sectors(BlockDriverState *bs)
+{
+    BlockDriver *drv = bs->drv;
+    IO_CODE();
+
+    if (!drv)
+        return -ENOMEDIUM;
+
+    if (bs->bl.has_variable_length) {
+        int ret = bdrv_refresh_total_sectors(bs, bs->total_sectors);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    return bs->total_sectors;
+}
+
 /**
  * Return length in bytes on success, -errno on error.
  * The length is always a multiple of BDRV_SECTOR_SIZE.
@@ -5878,16 +5901,6 @@
     return ret * BDRV_SECTOR_SIZE;
 }
 
-/* return 0 as number of sectors if no device present or error */
-void coroutine_fn bdrv_co_get_geometry(BlockDriverState *bs,
-                                       uint64_t *nb_sectors_ptr)
-{
-    int64_t nb_sectors = bdrv_co_nb_sectors(bs);
-    IO_CODE();
-
-    *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
-}
-
 bool bdrv_is_sg(BlockDriverState *bs)
 {
     IO_CODE();
diff --git a/block/block-backend.c b/block/block-backend.c
index 2ee3922..55efc73 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1615,29 +1615,53 @@
     return bdrv_co_getlength(blk_bs(blk));
 }
 
-void coroutine_fn blk_co_get_geometry(BlockBackend *blk,
-                                      uint64_t *nb_sectors_ptr)
+int64_t coroutine_fn blk_co_nb_sectors(BlockBackend *blk)
 {
+    BlockDriverState *bs = blk_bs(blk);
+
     IO_CODE();
     GRAPH_RDLOCK_GUARD();
 
-    if (!blk_bs(blk)) {
-        *nb_sectors_ptr = 0;
+    if (!bs) {
+        return -ENOMEDIUM;
     } else {
-        bdrv_co_get_geometry(blk_bs(blk), nb_sectors_ptr);
+        return bdrv_co_nb_sectors(bs);
     }
 }
 
-int64_t coroutine_fn blk_co_nb_sectors(BlockBackend *blk)
+/*
+ * This wrapper is written by hand because this function is in the hot I/O path,
+ * via blk_get_geometry.
+ */
+int64_t coroutine_mixed_fn blk_nb_sectors(BlockBackend *blk)
 {
+    BlockDriverState *bs = blk_bs(blk);
+
     IO_CODE();
-    GRAPH_RDLOCK_GUARD();
 
-    if (!blk_co_is_available(blk)) {
+    if (!bs) {
         return -ENOMEDIUM;
+    } else {
+        return bdrv_nb_sectors(bs);
     }
+}
 
-    return bdrv_co_nb_sectors(blk_bs(blk));
+/* return 0 as number of sectors if no device present or error */
+void coroutine_fn blk_co_get_geometry(BlockBackend *blk,
+                                      uint64_t *nb_sectors_ptr)
+{
+    int64_t ret = blk_co_nb_sectors(blk);
+    *nb_sectors_ptr = ret < 0 ? 0 : ret;
+}
+
+/*
+ * This wrapper is written by hand because this function is in the hot I/O path.
+ */
+void coroutine_mixed_fn blk_get_geometry(BlockBackend *blk,
+                                         uint64_t *nb_sectors_ptr)
+{
+    int64_t ret = blk_nb_sectors(blk);
+    *nb_sectors_ptr = ret < 0 ? 0 : ret;
 }
 
 BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index cc0f848..b4d6b7e 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -259,7 +259,6 @@
     .bdrv_co_eject                      = cor_co_eject,
     .bdrv_co_lock_medium                = cor_co_lock_medium,
 
-    .has_variable_length                = true,
     .is_filter                          = true,
 };
 
diff --git a/block/file-posix.c b/block/file-posix.c
index 5760cf2..c2dee3f 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -3743,6 +3743,12 @@
 {
     bdrv_parse_filename_strip_prefix(filename, "host_cdrom:", options);
 }
+
+static void cdrom_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+    bs->bl.has_variable_length = true;
+    raw_refresh_limits(bs, errp);
+}
 #endif
 
 #ifdef __linux__
@@ -3838,14 +3844,13 @@
     .bdrv_co_preadv         = raw_co_preadv,
     .bdrv_co_pwritev        = raw_co_pwritev,
     .bdrv_co_flush_to_disk  = raw_co_flush_to_disk,
-    .bdrv_refresh_limits = raw_refresh_limits,
+    .bdrv_refresh_limits    = cdrom_refresh_limits,
     .bdrv_co_io_plug        = raw_co_io_plug,
     .bdrv_co_io_unplug      = raw_co_io_unplug,
     .bdrv_attach_aio_context = raw_aio_attach_aio_context,
 
     .bdrv_co_truncate                   = raw_co_truncate,
     .bdrv_co_getlength                  = raw_co_getlength,
-    .has_variable_length                = true,
     .bdrv_co_get_allocated_file_size    = raw_co_get_allocated_file_size,
 
     /* removable device support */
@@ -3967,14 +3972,13 @@
     .bdrv_co_preadv         = raw_co_preadv,
     .bdrv_co_pwritev        = raw_co_pwritev,
     .bdrv_co_flush_to_disk  = raw_co_flush_to_disk,
-    .bdrv_refresh_limits = raw_refresh_limits,
+    .bdrv_refresh_limits    = cdrom_refresh_limits,
     .bdrv_co_io_plug        = raw_co_io_plug,
     .bdrv_co_io_unplug      = raw_co_io_unplug,
     .bdrv_attach_aio_context = raw_aio_attach_aio_context,
 
     .bdrv_co_truncate                   = raw_co_truncate,
     .bdrv_co_getlength                  = raw_co_getlength,
-    .has_variable_length                = true,
     .bdrv_co_get_allocated_file_size    = raw_co_get_allocated_file_size,
 
     /* removable device support */
diff --git a/block/file-win32.c b/block/file-win32.c
index c7d0b85..1763b86 100644
--- a/block/file-win32.c
+++ b/block/file-win32.c
@@ -838,6 +838,7 @@
 {
     /* XXX Does Windows support AIO on less than 512-byte alignment? */
     bs->bl.request_alignment = 512;
+    bs->bl.has_variable_length = true;
 }
 
 static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
@@ -933,7 +934,6 @@
     .bdrv_attach_aio_context = raw_attach_aio_context,
 
     .bdrv_co_getlength                = raw_co_getlength,
-    .has_variable_length              = true,
     .bdrv_co_get_allocated_file_size  = raw_co_get_allocated_file_size,
 };
 
diff --git a/block/filter-compress.c b/block/filter-compress.c
index ac285f4..320d957 100644
--- a/block/filter-compress.c
+++ b/block/filter-compress.c
@@ -146,7 +146,6 @@
     .bdrv_co_eject                      = compress_co_eject,
     .bdrv_co_lock_medium                = compress_co_lock_medium,
 
-    .has_variable_length                = true,
     .is_filter                          = true,
 };
 
diff --git a/block/io.c b/block/io.c
index 8974d46..2e267a8 100644
--- a/block/io.c
+++ b/block/io.c
@@ -190,6 +190,10 @@
             bdrv_merge_limits(&bs->bl, &c->bs->bl);
             have_limits = true;
         }
+
+        if (c->role & BDRV_CHILD_FILTERED) {
+            bs->bl.has_variable_length |= c->bs->bl.has_variable_length;
+        }
     }
 
     if (!have_limits) {
diff --git a/block/preallocate.c b/block/preallocate.c
index 71c3601..4d82125 100644
--- a/block/preallocate.c
+++ b/block/preallocate.c
@@ -558,7 +558,6 @@
     .bdrv_set_perm = preallocate_set_perm,
     .bdrv_child_perm = preallocate_child_perm,
 
-    .has_variable_length = true,
     .is_filter = true,
 };
 
diff --git a/block/raw-format.c b/block/raw-format.c
index 66783ed..06b8030 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -377,6 +377,8 @@
 
 static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
 {
+    bs->bl.has_variable_length = bs->file->bs->bl.has_variable_length;
+
     if (bs->probed) {
         /* To make it easier to protect the first sector, any probed
          * image is restricted to read-modify-write on sub-sector
@@ -623,7 +625,6 @@
     .bdrv_co_truncate     = &raw_co_truncate,
     .bdrv_co_getlength    = &raw_co_getlength,
     .is_format            = true,
-    .has_variable_length  = true,
     .bdrv_measure         = &raw_measure,
     .bdrv_co_get_info     = &raw_co_get_info,
     .bdrv_refresh_limits  = &raw_refresh_limits,
diff --git a/block/replication.c b/block/replication.c
index de01f96..ea4bf1a 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -762,7 +762,6 @@
 
     .is_filter                  = true,
 
-    .has_variable_length        = true,
     .strong_runtime_opts        = replication_strong_runtime_opts,
 };
 
diff --git a/block/vhdx-log.c b/block/vhdx-log.c
index c48cf65..38148f1 100644
--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
@@ -981,7 +981,7 @@
             sector_write = merged_sector;
         } else if (i == sectors - 1 && trailing_length) {
             /* partial sector at the end of the buffer */
-            ret = bdrv_pread(bs->file, file_offset,
+            ret = bdrv_pread(bs->file, file_offset + trailing_length,
                              VHDX_LOG_SECTOR_SIZE - trailing_length,
                              merged_sector + trailing_length, 0);
             if (ret < 0) {
diff --git a/include/block/block-io.h b/include/block/block-io.h
index dbc034b..5dab885 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -79,7 +79,7 @@
                  PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
 
 int64_t coroutine_fn GRAPH_RDLOCK bdrv_co_nb_sectors(BlockDriverState *bs);
-int64_t co_wrapper_mixed_bdrv_rdlock bdrv_nb_sectors(BlockDriverState *bs);
+int64_t coroutine_mixed_fn bdrv_nb_sectors(BlockDriverState *bs);
 
 int64_t coroutine_fn GRAPH_RDLOCK bdrv_co_getlength(BlockDriverState *bs);
 int64_t co_wrapper_mixed_bdrv_rdlock bdrv_getlength(BlockDriverState *bs);
@@ -90,9 +90,6 @@
 BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
                                BlockDriverState *in_bs, Error **errp);
 
-void coroutine_fn GRAPH_RDLOCK
-bdrv_co_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
-
 int coroutine_fn GRAPH_RDLOCK
 bdrv_co_delete_file(BlockDriverState *bs, Error **errp);
 
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index d419017..f01bb8b 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -158,8 +158,6 @@
      */
     bool supports_backing;
 
-    bool has_variable_length;
-
     /*
      * Drivers setting this field must be able to work with just a plain
      * filename with '<protocol_name>:' as a prefix, and no other options.
@@ -855,6 +853,14 @@
 
     /* maximum number of iovec elements */
     int max_iov;
+
+    /*
+     * true if the length of the underlying file can change, and QEMU
+     * is expected to adjust automatically.  Mostly for CD-ROM drives,
+     * whose length is zero when the tray is empty (they don't need
+     * an explicit monitor command to load the disk inside the guest).
+     */
+    bool has_variable_length;
 } BlockLimits;
 
 typedef struct BdrvOpBlocker BdrvOpBlocker;
diff --git a/include/sysemu/block-backend-io.h b/include/sysemu/block-backend-io.h
index c672b77..bb25493 100644
--- a/include/sysemu/block-backend-io.h
+++ b/include/sysemu/block-backend-io.h
@@ -72,11 +72,10 @@
 
 void coroutine_fn blk_co_get_geometry(BlockBackend *blk,
                                       uint64_t *nb_sectors_ptr);
-void co_wrapper_mixed blk_get_geometry(BlockBackend *blk,
-                                       uint64_t *nb_sectors_ptr);
+void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr);
 
 int64_t coroutine_fn blk_co_nb_sectors(BlockBackend *blk);
-int64_t co_wrapper_mixed blk_nb_sectors(BlockBackend *blk);
+int64_t blk_nb_sectors(BlockBackend *blk);
 
 void *blk_try_blockalign(BlockBackend *blk, size_t size);
 void *blk_blockalign(BlockBackend *blk, size_t size);
diff --git a/migration/block.c b/migration/block.c
index 426a25b..b2497bb 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -195,7 +195,7 @@
 {
     int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
 
-    if (sector < blk_nb_sectors(bmds->blk)) {
+    if (sector < bmds->total_sectors) {
         return !!(bmds->aio_bitmap[chunk / (sizeof(unsigned long) * 8)] &
             (1UL << (chunk % (sizeof(unsigned long) * 8))));
     } else {
@@ -229,10 +229,9 @@
 
 static void alloc_aio_bitmap(BlkMigDevState *bmds)
 {
-    BlockBackend *bb = bmds->blk;
     int64_t bitmap_size;
 
-    bitmap_size = blk_nb_sectors(bb) + BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
+    bitmap_size = bmds->total_sectors + BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
     bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
 
     bmds->aio_bitmap = g_malloc0(bitmap_size);
diff --git a/tests/qemu-iotests/tests/regression-vhdx-log b/tests/qemu-iotests/tests/regression-vhdx-log
new file mode 100755
index 0000000..ca264e9
--- /dev/null
+++ b/tests/qemu-iotests/tests/regression-vhdx-log
@@ -0,0 +1,62 @@
+#!/usr/bin/env bash
+# group: rw auto quick
+#
+# vhdx regression test: Updating the first entry of a BAT sector corrupted the
+# following entries.
+#
+# Copyright (C) 2023 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=kwolf@redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+status=1 # failure is the default!
+
+_cleanup()
+{
+    _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+cd ..
+. ./common.rc
+. ./common.filter
+
+_supported_fmt generic
+_supported_proto generic
+_unsupported_imgopts "subformat=streamOptimized"
+
+size=64M
+_make_test_img $size
+
+echo
+echo "creating pattern"
+$QEMU_IO -c "write -P 1 32M 4k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "write -P 2 0 4k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 1 32M 4k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "checking image for errors"
+_check_test_img
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/tests/regression-vhdx-log.out b/tests/qemu-iotests/tests/regression-vhdx-log.out
new file mode 100644
index 0000000..350c257
--- /dev/null
+++ b/tests/qemu-iotests/tests/regression-vhdx-log.out
@@ -0,0 +1,14 @@
+QA output created by regression-vhdx-log
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+
+creating pattern
+wrote 4096/4096 bytes at offset 33554432
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4096/4096 bytes at offset 33554432
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+checking image for errors
+No errors were found on the image.
+*** done