| #!/usr/bin/env bash |
| # |
| # Test case for image corruption (overlapping data structures) in qcow2 |
| # |
| # Copyright (C) 2013 Red Hat, Inc. |
| # |
| # This program is free software; you can redistribute it and/or modify |
| # it under the terms of the GNU General Public License as published by |
| # the Free Software Foundation; either version 2 of the License, or |
| # (at your option) any later version. |
| # |
| # This program is distributed in the hope that it will be useful, |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| # GNU General Public License for more details. |
| # |
| # You should have received a copy of the GNU General Public License |
| # along with this program. If not, see <http://www.gnu.org/licenses/>. |
| # |
| |
| # creator |
| owner=mreitz@redhat.com |
| |
| seq="$(basename $0)" |
| echo "QA output created by $seq" |
| |
| status=1 # failure is the default! |
| |
| _cleanup() |
| { |
| _cleanup_test_img |
| } |
| trap "_cleanup; exit \$status" 0 1 2 3 15 |
| |
| # Sometimes the error line might be dumped before/after an event |
| # randomly. Mask it out for specific test that may trigger this |
| # uncertainty for current test for now. |
| _filter_io_error() |
| { |
| sed '/Input\/output error/d' |
| } |
| |
| # get standard environment, filters and checks |
| . ./common.rc |
| . ./common.filter |
| |
| # This tests qcow2-specific low-level functionality |
| _supported_fmt qcow2 |
| _supported_proto file |
| _supported_os Linux |
| # These tests only work for compat=1.1 images with refcount_bits=16 |
| _unsupported_imgopts 'compat=0.10' 'refcount_bits=\([^1]\|.\([^6]\|$\)\)' |
| |
| # The repair process will create a large file - so check for availability first |
| _require_large_file 64G |
| |
| rt_offset=65536 # 0x10000 (XXX: just an assumption) |
| rb_offset=131072 # 0x20000 (XXX: just an assumption) |
| l1_offset=196608 # 0x30000 (XXX: just an assumption) |
| l2_offset=262144 # 0x40000 (XXX: just an assumption) |
| l2_offset_after_snapshot=524288 # 0x80000 (XXX: just an assumption) |
| |
| OPEN_RW="open -o overlap-check=all $TEST_IMG" |
| # Overlap checks are done before write operations only, therefore opening an |
| # image read-only makes the overlap-check option irrelevant |
| OPEN_RO="open -r $TEST_IMG" |
| |
| echo |
| echo "=== Testing L2 reference into L1 ===" |
| echo |
| _make_test_img 64M |
| # Link first L1 entry (first L2 table) onto itself |
| # (Note the MSb in the L1 entry is set, ensuring the refcount is one - else any |
| # later write will result in a COW operation, effectively ruining this attempt |
| # on image corruption) |
| poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x03\x00\x00" |
| _check_test_img |
| |
| # The corrupt bit should not be set anyway |
| $PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features |
| |
| # Try to write something, thereby forcing the corrupt bit to be set |
| $QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io |
| |
| # The corrupt bit must now be set |
| $PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features |
| |
| # This information should be available through qemu-img info |
| _img_info --format-specific |
| |
| # Try to open the image R/W (which should fail) |
| $QEMU_IO -c "$OPEN_RW" -c "read 0 512" 2>&1 | _filter_qemu_io \ |
| | _filter_testdir \ |
| | _filter_imgfmt |
| |
| # Try to open it RO (which should succeed) |
| $QEMU_IO -c "$OPEN_RO" -c "read 0 512" | _filter_qemu_io |
| |
| # We could now try to fix the image, but this would probably fail (how should an |
| # L2 table linked onto the L1 table be fixed?) |
| |
| echo |
| echo "=== Testing cluster data reference into refcount block ===" |
| echo |
| _make_test_img 64M |
| # Allocate L2 table |
| truncate -s "$(($l2_offset+65536))" "$TEST_IMG" |
| poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x00\x00" |
| # Mark cluster as used |
| poke_file "$TEST_IMG" "$(($rb_offset+8))" "\x00\x01" |
| # Redirect new data cluster onto refcount block |
| poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x02\x00\x00" |
| _check_test_img |
| $PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features |
| $QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io |
| $PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features |
| |
| # Try to fix it |
| _check_test_img -r all |
| |
| # The corrupt bit should be cleared |
| $PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features |
| |
| # Look if it's really really fixed |
| $QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io |
| $PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features |
| |
| echo |
| echo "=== Testing cluster data reference into inactive L2 table ===" |
| echo |
| _make_test_img 64M |
| $QEMU_IO -c "$OPEN_RW" -c "write -P 1 0 512" | _filter_qemu_io |
| $QEMU_IMG snapshot -c foo "$TEST_IMG" |
| $QEMU_IO -c "$OPEN_RW" -c "write -P 2 0 512" | _filter_qemu_io |
| # The inactive L2 table remains at its old offset |
| poke_file "$TEST_IMG" "$l2_offset_after_snapshot" \ |
| "\x80\x00\x00\x00\x00\x04\x00\x00" |
| _check_test_img |
| $PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features |
| $QEMU_IO -c "$OPEN_RW" -c "write -P 3 0 512" | _filter_qemu_io |
| $PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features |
| _check_test_img -r all |
| $PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features |
| $QEMU_IO -c "$OPEN_RW" -c "write -P 4 0 512" | _filter_qemu_io |
| $PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features |
| |
| # Check data |
| $QEMU_IO -c "$OPEN_RO" -c "read -P 4 0 512" | _filter_qemu_io |
| $QEMU_IMG snapshot -a foo "$TEST_IMG" |
| _check_test_img |
| $QEMU_IO -c "$OPEN_RO" -c "read -P 1 0 512" | _filter_qemu_io |
| |
| echo |
| echo "=== Testing overlap while COW is in flight ===" |
| echo |
| BACKING_IMG=$TEST_IMG.base |
| TEST_IMG=$BACKING_IMG _make_test_img 1G |
| |
| $QEMU_IO -c 'write 0k 64k' "$BACKING_IMG" | _filter_qemu_io |
| |
| # compat=0.10 is required in order to make the following discard actually |
| # unallocate the sector rather than make it a zero sector - we want COW, after |
| # all. |
| _make_test_img -o 'compat=0.10' -b "$BACKING_IMG" 1G |
| # Write two clusters, the second one enforces creation of an L2 table after |
| # the first data cluster. |
| $QEMU_IO -c 'write 0k 64k' -c 'write 512M 64k' "$TEST_IMG" | _filter_qemu_io |
| # Discard the first cluster. This cluster will soon enough be reallocated and |
| # used for COW. |
| $QEMU_IO -c 'discard 0k 64k' "$TEST_IMG" | _filter_qemu_io |
| # Now, corrupt the image by marking the second L2 table cluster as free. |
| poke_file "$TEST_IMG" '131084' "\x00\x00" # 0x2000c |
| # Start a write operation requiring COW on the image stopping it right before |
| # doing the read; then, trigger the corruption prevention by writing anything to |
| # any unallocated cluster, leading to an attempt to overwrite the second L2 |
| # table. Finally, resume the COW write and see it fail (but not crash). |
| echo "open -o file.driver=blkdebug $TEST_IMG |
| break cow_read 0 |
| aio_write 0k 1k |
| wait_break 0 |
| write 64k 64k |
| resume 0" | $QEMU_IO | _filter_qemu_io |
| |
| echo |
| echo "=== Testing unallocated image header ===" |
| echo |
| _make_test_img 64M |
| # Create L1/L2 |
| $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io |
| poke_file "$TEST_IMG" "$rb_offset" "\x00\x00" |
| $QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io |
| |
| echo |
| echo "=== Testing unaligned L1 entry ===" |
| echo |
| _make_test_img 64M |
| $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io |
| # This will be masked with ~(512 - 1) = ~0x1ff, so whether the lower 9 bits are |
| # aligned or not does not matter |
| poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00" |
| $QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io |
| |
| # Test how well zero cluster expansion can cope with this |
| _make_test_img 64M |
| $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io |
| poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00" |
| $QEMU_IMG amend -o compat=0.10 "$TEST_IMG" |
| |
| echo |
| echo "=== Testing unaligned L2 entry ===" |
| echo |
| _make_test_img 64M |
| $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io |
| poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" |
| $QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io |
| |
| echo |
| echo "=== Testing unaligned pre-allocated zero cluster ===" |
| echo |
| _make_test_img 64M |
| $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io |
| poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x01" |
| # zero cluster expansion |
| $QEMU_IMG amend -o compat=0.10 "$TEST_IMG" |
| |
| echo |
| echo "=== Testing unaligned reftable entry ===" |
| echo |
| _make_test_img 64M |
| poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x02\x2a\x00" |
| $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io |
| |
| echo |
| echo "=== Testing non-fatal corruption on freeing ===" |
| echo |
| _make_test_img 64M |
| $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io |
| poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" |
| $QEMU_IO -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io |
| |
| echo |
| echo "=== Testing read-only corruption report ===" |
| echo |
| _make_test_img 64M |
| $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io |
| poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" |
| # Should only emit a single error message |
| $QEMU_IO -c "$OPEN_RO" -c "read 0 64k" -c "read 0 64k" | _filter_qemu_io |
| |
| echo |
| echo "=== Testing non-fatal and then fatal corruption report ===" |
| echo |
| _make_test_img 64M |
| $QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io |
| poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" |
| poke_file "$TEST_IMG" "$(($l2_offset+8))" "\x80\x00\x00\x00\x00\x06\x2a\x00" |
| # Should emit two error messages |
| $QEMU_IO -c "discard 0 64k" -c "read 64k 64k" "$TEST_IMG" | _filter_qemu_io |
| |
| echo |
| echo "=== Testing empty refcount table ===" |
| echo |
| _make_test_img 64M |
| poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" |
| $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io |
| # Repair the image |
| _check_test_img -r all |
| |
| echo |
| echo "=== Testing empty refcount table with valid L1 and L2 tables ===" |
| echo |
| _make_test_img 64M |
| $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io |
| poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" |
| # Since the first data cluster is already allocated this triggers an |
| # allocation with an explicit offset (using qcow2_alloc_clusters_at()) |
| # causing a refcount block to be allocated at offset 0 |
| $QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io |
| # Repair the image |
| _check_test_img -r all |
| |
| echo |
| echo "=== Testing empty refcount block ===" |
| echo |
| _make_test_img 64M |
| poke_file "$TEST_IMG" "$rb_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" |
| $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io |
| # Repair the image |
| _check_test_img -r all |
| |
| echo |
| echo "=== Testing empty refcount block with compressed write ===" |
| echo |
| _make_test_img 64M |
| $QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io |
| poke_file "$TEST_IMG" "$rb_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" |
| # The previous write already allocated an L2 table, so now this new |
| # write will try to allocate a compressed data cluster at offset 0. |
| $QEMU_IO -c "write -c 0k 64k" "$TEST_IMG" | _filter_qemu_io |
| # Repair the image |
| _check_test_img -r all |
| |
| echo |
| echo "=== Testing zero refcount table size ===" |
| echo |
| _make_test_img 64M |
| poke_file "$TEST_IMG" "56" "\x00\x00\x00\x00" |
| $QEMU_IO -c "write 0 64k" "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt |
| # Repair the image |
| _check_test_img -r all |
| |
| echo |
| echo "=== Testing incorrect refcount table offset ===" |
| echo |
| _make_test_img 64M |
| poke_file "$TEST_IMG" "48" "\x00\x00\x00\x00\x00\x00\x00\x00" |
| $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io |
| |
| echo |
| echo "=== Testing dirty corrupt image ===" |
| echo |
| |
| _make_test_img 64M |
| |
| # Let the refblock appear unaligned |
| poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\xff\xff\x2a\x00" |
| # Mark the image dirty, thus forcing an automatic check when opening it |
| poke_file "$TEST_IMG" 72 "\x00\x00\x00\x00\x00\x00\x00\x01" |
| # Open the image (qemu should refuse to do so) |
| $QEMU_IO -c close "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt |
| |
| echo '--- Repairing ---' |
| |
| # The actual repair should have happened (because of the dirty bit), |
| # but some cleanup may have failed (like freeing the old reftable) |
| # because the image was already marked corrupt by that point |
| _check_test_img -r all |
| |
| echo |
| echo "=== Writing to an unaligned preallocated zero cluster ===" |
| echo |
| |
| _make_test_img 64M |
| |
| # Allocate the L2 table |
| $QEMU_IO -c "write 0 64k" -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io |
| # Pretend there is a preallocated zero cluster somewhere inside the |
| # image header |
| poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x00\x2a\x01" |
| # Let's write to it! |
| $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io |
| |
| echo '--- Repairing ---' |
| _check_test_img -r all |
| |
| echo |
| echo '=== Discarding with an unaligned refblock ===' |
| echo |
| |
| _make_test_img 64M |
| |
| $QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io |
| # Make our refblock unaligned |
| poke_file "$TEST_IMG" "$(($rt_offset))" "\x00\x00\x00\x00\x00\x00\x2a\x00" |
| # Now try to discard something that will be submitted as two requests |
| # (main part + tail) |
| $QEMU_IO -c "discard 0 65537" "$TEST_IMG" |
| |
| echo '--- Repairing ---' |
| # Fails the first repair because the corruption prevents the check |
| # function from double-checking |
| # (Using -q for the first invocation, because otherwise the |
| # double-check error message appears above the summary for some |
| # reason -- so let's just hide the summary) |
| _check_test_img -q -r all |
| _check_test_img -r all |
| |
| echo |
| echo "=== Discarding an out-of-bounds refblock ===" |
| echo |
| |
| _make_test_img 64M |
| |
| # Pretend there's a refblock really up high |
| poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\xff\xff\xff\x00\x00\x00\x00" |
| # Let's try to shrink the qcow2 image so that the block driver tries |
| # to discard that refblock (and see what happens!) |
| $QEMU_IMG resize --shrink "$TEST_IMG" 32M |
| |
| echo '--- Checking and retrying ---' |
| # Image should not be resized |
| _img_info | grep 'virtual size' |
| # But it should pass this check, because the "partial" resize has |
| # already overwritten refblocks past the end |
| _check_test_img -r all |
| # So let's try again |
| $QEMU_IMG resize --shrink "$TEST_IMG" 32M |
| _img_info | grep 'virtual size' |
| |
| echo |
| echo "=== Discarding a non-covered in-bounds refblock ===" |
| echo |
| |
| _make_test_img -o 'refcount_bits=1' 64M |
| |
| # Pretend there's a refblock somewhere where there is no refblock to |
| # cover it (but the covering refblock has a valid index in the |
| # reftable) |
| # Every refblock covers 65536 * 8 * 65536 = 32 GB, so we have to point |
| # to 0x10_0000_0000 (64G) to point to the third refblock |
| poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00" |
| $QEMU_IMG resize --shrink "$TEST_IMG" 32M |
| |
| echo '--- Checking and retrying ---' |
| # Image should not be resized |
| _img_info | grep 'virtual size' |
| # But it should pass this check, because the "partial" resize has |
| # already overwritten refblocks past the end |
| _check_test_img -r all |
| # So let's try again |
| $QEMU_IMG resize --shrink "$TEST_IMG" 32M |
| _img_info | grep 'virtual size' |
| |
| echo |
| echo "=== Discarding a refblock covered by an unaligned refblock ===" |
| echo |
| |
| _make_test_img -o 'refcount_bits=1' 64M |
| |
| # Same as above |
| poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00" |
| # But now we actually "create" an unaligned third refblock |
| poke_file "$TEST_IMG" "$(($rt_offset+16))" "\x00\x00\x00\x00\x00\x00\x02\x00" |
| $QEMU_IMG resize --shrink "$TEST_IMG" 32M |
| |
| echo '--- Repairing ---' |
| # Fails the first repair because the corruption prevents the check |
| # function from double-checking |
| # (Using -q for the first invocation, because otherwise the |
| # double-check error message appears above the summary for some |
| # reason -- so let's just hide the summary) |
| _check_test_img -q -r all |
| _check_test_img -r all |
| |
| echo |
| echo "=== Testing the QEMU shutdown with a corrupted image ===" |
| echo |
| _make_test_img 64M |
| poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" |
| echo "{'execute': 'qmp_capabilities'} |
| {'execute': 'human-monitor-command', |
| 'arguments': {'command-line': 'qemu-io drive \"write 0 512\"'}} |
| {'execute': 'quit'}" \ |
| | $QEMU -qmp stdio -nographic -nodefaults \ |
| -drive if=none,node-name=drive,file="$TEST_IMG",driver=qcow2 \ |
| | _filter_qmp | _filter_qemu_io |
| |
| echo |
| echo "=== Testing incoming inactive corrupted image ===" |
| echo |
| |
| _make_test_img 64M |
| # Create an unaligned L1 entry, so qemu will signal a corruption when |
| # reading from the covered area |
| poke_file "$TEST_IMG" "$l1_offset" "\x00\x00\x00\x00\x2a\x2a\x2a\x2a" |
| |
| # Inactive images are effectively read-only images, so this should be a |
| # non-fatal corruption (which does not modify the image) |
| echo "{'execute': 'qmp_capabilities'} |
| {'execute': 'human-monitor-command', |
| 'arguments': {'command-line': 'qemu-io drive \"read 0 512\"'}} |
| {'execute': 'quit'}" \ |
| | $QEMU -qmp stdio -nographic -nodefaults \ |
| -blockdev "{'node-name': 'drive', |
| 'driver': 'qcow2', |
| 'file': { |
| 'driver': 'file', |
| 'filename': '$TEST_IMG' |
| }}" \ |
| -incoming exec:'cat /dev/null' \ |
| 2>&1 \ |
| | _filter_qmp | _filter_qemu_io | _filter_io_error |
| |
| echo |
| # Image should not have been marked corrupt |
| _img_info --format-specific | grep 'corrupt:' |
| |
| # success, all done |
| echo "*** done" |
| rm -f $seq.full |
| status=0 |