Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 1 | /* |
| 2 | * Block driver for the QCOW version 2 format |
| 3 | * |
| 4 | * Copyright (c) 2004-2006 Fabrice Bellard |
| 5 | * |
| 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 7 | * of this software and associated documentation files (the "Software"), to deal |
| 8 | * in the Software without restriction, including without limitation the rights |
| 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 10 | * copies of the Software, and to permit persons to whom the Software is |
| 11 | * furnished to do so, subject to the following conditions: |
| 12 | * |
| 13 | * The above copyright notice and this permission notice shall be included in |
| 14 | * all copies or substantial portions of the Software. |
| 15 | * |
| 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| 22 | * THE SOFTWARE. |
| 23 | */ |
| 24 | |
| 25 | #include <zlib.h> |
| 26 | |
| 27 | #include "qemu-common.h" |
Paolo Bonzini | 737e150 | 2012-12-17 18:19:44 +0100 | [diff] [blame] | 28 | #include "block/block_int.h" |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 29 | #include "block/qcow2.h" |
Kevin Wolf | 3cce16f | 2012-03-01 18:36:21 +0100 | [diff] [blame] | 30 | #include "trace.h" |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 31 | |
Kevin Wolf | 2cf7cfa | 2013-05-14 16:14:33 +0200 | [diff] [blame] | 32 | int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, |
| 33 | bool exact_size) |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 34 | { |
| 35 | BDRVQcowState *s = bs->opaque; |
Kevin Wolf | 2cf7cfa | 2013-05-14 16:14:33 +0200 | [diff] [blame] | 36 | int new_l1_size2, ret, i; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 37 | uint64_t *new_l1_table; |
Max Reitz | fda74f8 | 2013-09-30 17:57:21 +0200 | [diff] [blame] | 38 | int64_t old_l1_table_offset, old_l1_size; |
Kevin Wolf | 2cf7cfa | 2013-05-14 16:14:33 +0200 | [diff] [blame] | 39 | int64_t new_l1_table_offset, new_l1_size; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 40 | uint8_t data[12]; |
| 41 | |
Stefan Hajnoczi | 7289375 | 2010-10-18 16:53:53 +0100 | [diff] [blame] | 42 | if (min_size <= s->l1_size) |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 43 | return 0; |
Stefan Hajnoczi | 7289375 | 2010-10-18 16:53:53 +0100 | [diff] [blame] | 44 | |
Max Reitz | b93f995 | 2014-04-29 19:03:14 +0200 | [diff] [blame] | 45 | /* Do a sanity check on min_size before trying to calculate new_l1_size |
| 46 | * (this prevents overflows during the while loop for the calculation of |
| 47 | * new_l1_size) */ |
| 48 | if (min_size > INT_MAX / sizeof(uint64_t)) { |
| 49 | return -EFBIG; |
| 50 | } |
| 51 | |
Stefan Hajnoczi | 7289375 | 2010-10-18 16:53:53 +0100 | [diff] [blame] | 52 | if (exact_size) { |
| 53 | new_l1_size = min_size; |
| 54 | } else { |
| 55 | /* Bump size up to reduce the number of times we have to grow */ |
| 56 | new_l1_size = s->l1_size; |
| 57 | if (new_l1_size == 0) { |
| 58 | new_l1_size = 1; |
| 59 | } |
| 60 | while (min_size > new_l1_size) { |
| 61 | new_l1_size = (new_l1_size * 3 + 1) / 2; |
| 62 | } |
Stefan Weil | d191d12 | 2009-10-26 16:11:16 +0100 | [diff] [blame] | 63 | } |
Stefan Hajnoczi | 7289375 | 2010-10-18 16:53:53 +0100 | [diff] [blame] | 64 | |
Kevin Wolf | cab60de | 2014-03-26 13:05:53 +0100 | [diff] [blame] | 65 | if (new_l1_size > INT_MAX / sizeof(uint64_t)) { |
Kevin Wolf | 2cf7cfa | 2013-05-14 16:14:33 +0200 | [diff] [blame] | 66 | return -EFBIG; |
| 67 | } |
| 68 | |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 69 | #ifdef DEBUG_ALLOC2 |
Kevin Wolf | 2cf7cfa | 2013-05-14 16:14:33 +0200 | [diff] [blame] | 70 | fprintf(stderr, "grow l1_table from %d to %" PRId64 "\n", |
| 71 | s->l1_size, new_l1_size); |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 72 | #endif |
| 73 | |
| 74 | new_l1_size2 = sizeof(uint64_t) * new_l1_size; |
Anthony Liguori | 7267c09 | 2011-08-20 22:09:37 -0500 | [diff] [blame] | 75 | new_l1_table = g_malloc0(align_offset(new_l1_size2, 512)); |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 76 | memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t)); |
| 77 | |
| 78 | /* write new table (align to cluster) */ |
Kevin Wolf | 66f82ce | 2010-04-14 14:17:38 +0200 | [diff] [blame] | 79 | BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE); |
Kevin Wolf | ed6ccf0 | 2009-05-28 16:07:07 +0200 | [diff] [blame] | 80 | new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2); |
Kevin Wolf | 5d757b5 | 2010-01-20 15:04:01 +0100 | [diff] [blame] | 81 | if (new_l1_table_offset < 0) { |
Anthony Liguori | 7267c09 | 2011-08-20 22:09:37 -0500 | [diff] [blame] | 82 | g_free(new_l1_table); |
Kevin Wolf | 5d757b5 | 2010-01-20 15:04:01 +0100 | [diff] [blame] | 83 | return new_l1_table_offset; |
| 84 | } |
Kevin Wolf | 29c1a73 | 2011-01-10 17:17:28 +0100 | [diff] [blame] | 85 | |
| 86 | ret = qcow2_cache_flush(bs, s->refcount_block_cache); |
| 87 | if (ret < 0) { |
Kevin Wolf | 80fa334 | 2011-06-01 10:50:00 +0200 | [diff] [blame] | 88 | goto fail; |
Kevin Wolf | 29c1a73 | 2011-01-10 17:17:28 +0100 | [diff] [blame] | 89 | } |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 90 | |
Max Reitz | cf93980 | 2013-08-30 14:34:26 +0200 | [diff] [blame] | 91 | /* the L1 position has not yet been updated, so these clusters must |
| 92 | * indeed be completely free */ |
Max Reitz | 231bb26 | 2013-10-10 11:09:23 +0200 | [diff] [blame] | 93 | ret = qcow2_pre_write_overlap_check(bs, 0, new_l1_table_offset, |
| 94 | new_l1_size2); |
Max Reitz | cf93980 | 2013-08-30 14:34:26 +0200 | [diff] [blame] | 95 | if (ret < 0) { |
| 96 | goto fail; |
| 97 | } |
| 98 | |
Kevin Wolf | 66f82ce | 2010-04-14 14:17:38 +0200 | [diff] [blame] | 99 | BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE); |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 100 | for(i = 0; i < s->l1_size; i++) |
| 101 | new_l1_table[i] = cpu_to_be64(new_l1_table[i]); |
Kevin Wolf | 8b3b720 | 2010-06-16 17:44:35 +0200 | [diff] [blame] | 102 | ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset, new_l1_table, new_l1_size2); |
| 103 | if (ret < 0) |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 104 | goto fail; |
| 105 | for(i = 0; i < s->l1_size; i++) |
| 106 | new_l1_table[i] = be64_to_cpu(new_l1_table[i]); |
| 107 | |
| 108 | /* set new table */ |
Kevin Wolf | 66f82ce | 2010-04-14 14:17:38 +0200 | [diff] [blame] | 109 | BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE); |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 110 | cpu_to_be32w((uint32_t*)data, new_l1_size); |
Peter Maydell | e4ef9f4 | 2013-11-05 16:38:36 +0000 | [diff] [blame] | 111 | stq_be_p(data + 4, new_l1_table_offset); |
Kevin Wolf | 8b3b720 | 2010-06-16 17:44:35 +0200 | [diff] [blame] | 112 | ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), data,sizeof(data)); |
| 113 | if (ret < 0) { |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 114 | goto fail; |
Kevin Wolf | fb8fa77 | 2010-01-20 15:02:58 +0100 | [diff] [blame] | 115 | } |
Anthony Liguori | 7267c09 | 2011-08-20 22:09:37 -0500 | [diff] [blame] | 116 | g_free(s->l1_table); |
Max Reitz | fda74f8 | 2013-09-30 17:57:21 +0200 | [diff] [blame] | 117 | old_l1_table_offset = s->l1_table_offset; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 118 | s->l1_table_offset = new_l1_table_offset; |
| 119 | s->l1_table = new_l1_table; |
Max Reitz | fda74f8 | 2013-09-30 17:57:21 +0200 | [diff] [blame] | 120 | old_l1_size = s->l1_size; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 121 | s->l1_size = new_l1_size; |
Max Reitz | fda74f8 | 2013-09-30 17:57:21 +0200 | [diff] [blame] | 122 | qcow2_free_clusters(bs, old_l1_table_offset, old_l1_size * sizeof(uint64_t), |
| 123 | QCOW2_DISCARD_OTHER); |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 124 | return 0; |
| 125 | fail: |
Anthony Liguori | 7267c09 | 2011-08-20 22:09:37 -0500 | [diff] [blame] | 126 | g_free(new_l1_table); |
Kevin Wolf | 6cfcb9b | 2013-06-19 13:44:18 +0200 | [diff] [blame] | 127 | qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2, |
| 128 | QCOW2_DISCARD_OTHER); |
Kevin Wolf | 8b3b720 | 2010-06-16 17:44:35 +0200 | [diff] [blame] | 129 | return ret; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 130 | } |
| 131 | |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 132 | /* |
| 133 | * l2_load |
| 134 | * |
| 135 | * Loads a L2 table into memory. If the table is in the cache, the cache |
| 136 | * is used; otherwise the L2 table is loaded from the image file. |
| 137 | * |
| 138 | * Returns a pointer to the L2 table on success, or NULL if the read from |
| 139 | * the image file failed. |
| 140 | */ |
| 141 | |
Kevin Wolf | 55c17e9 | 2010-05-21 18:25:20 +0200 | [diff] [blame] | 142 | static int l2_load(BlockDriverState *bs, uint64_t l2_offset, |
| 143 | uint64_t **l2_table) |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 144 | { |
| 145 | BDRVQcowState *s = bs->opaque; |
Kevin Wolf | 55c17e9 | 2010-05-21 18:25:20 +0200 | [diff] [blame] | 146 | int ret; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 147 | |
Kevin Wolf | 29c1a73 | 2011-01-10 17:17:28 +0100 | [diff] [blame] | 148 | ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, (void**) l2_table); |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 149 | |
Kevin Wolf | 29c1a73 | 2011-01-10 17:17:28 +0100 | [diff] [blame] | 150 | return ret; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 151 | } |
| 152 | |
| 153 | /* |
Kevin Wolf | 6583e3c | 2009-06-16 11:31:28 +0200 | [diff] [blame] | 154 | * Writes one sector of the L1 table to the disk (can't update single entries |
| 155 | * and we really don't want bdrv_pread to perform a read-modify-write) |
| 156 | */ |
| 157 | #define L1_ENTRIES_PER_SECTOR (512 / 8) |
Max Reitz | e23e400 | 2013-08-30 14:34:28 +0200 | [diff] [blame] | 158 | int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index) |
Kevin Wolf | 6583e3c | 2009-06-16 11:31:28 +0200 | [diff] [blame] | 159 | { |
Kevin Wolf | 66f82ce | 2010-04-14 14:17:38 +0200 | [diff] [blame] | 160 | BDRVQcowState *s = bs->opaque; |
Kevin Wolf | 6583e3c | 2009-06-16 11:31:28 +0200 | [diff] [blame] | 161 | uint64_t buf[L1_ENTRIES_PER_SECTOR]; |
| 162 | int l1_start_index; |
Kevin Wolf | f7defcb | 2010-03-23 17:28:22 +0100 | [diff] [blame] | 163 | int i, ret; |
Kevin Wolf | 6583e3c | 2009-06-16 11:31:28 +0200 | [diff] [blame] | 164 | |
| 165 | l1_start_index = l1_index & ~(L1_ENTRIES_PER_SECTOR - 1); |
| 166 | for (i = 0; i < L1_ENTRIES_PER_SECTOR; i++) { |
| 167 | buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]); |
| 168 | } |
| 169 | |
Max Reitz | 231bb26 | 2013-10-10 11:09:23 +0200 | [diff] [blame] | 170 | ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1, |
Max Reitz | cf93980 | 2013-08-30 14:34:26 +0200 | [diff] [blame] | 171 | s->l1_table_offset + 8 * l1_start_index, sizeof(buf)); |
| 172 | if (ret < 0) { |
| 173 | return ret; |
| 174 | } |
| 175 | |
Kevin Wolf | 66f82ce | 2010-04-14 14:17:38 +0200 | [diff] [blame] | 176 | BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); |
Kevin Wolf | 8b3b720 | 2010-06-16 17:44:35 +0200 | [diff] [blame] | 177 | ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset + 8 * l1_start_index, |
Kevin Wolf | f7defcb | 2010-03-23 17:28:22 +0100 | [diff] [blame] | 178 | buf, sizeof(buf)); |
| 179 | if (ret < 0) { |
| 180 | return ret; |
Kevin Wolf | 6583e3c | 2009-06-16 11:31:28 +0200 | [diff] [blame] | 181 | } |
| 182 | |
| 183 | return 0; |
| 184 | } |
| 185 | |
| 186 | /* |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 187 | * l2_allocate |
| 188 | * |
| 189 | * Allocate a new l2 entry in the file. If l1_index points to an already |
| 190 | * used entry in the L2 table (i.e. we are doing a copy on write for the L2 |
| 191 | * table) copy the contents of the old L2 table into the newly allocated one. |
| 192 | * Otherwise the new table is initialized with zeros. |
| 193 | * |
| 194 | */ |
| 195 | |
Kevin Wolf | c46e116 | 2010-03-23 17:41:24 +0100 | [diff] [blame] | 196 | static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 197 | { |
| 198 | BDRVQcowState *s = bs->opaque; |
Kevin Wolf | 6583e3c | 2009-06-16 11:31:28 +0200 | [diff] [blame] | 199 | uint64_t old_l2_offset; |
Max Reitz | 8585afd | 2013-09-25 16:37:18 +0200 | [diff] [blame] | 200 | uint64_t *l2_table = NULL; |
Kevin Wolf | f4f0d39 | 2010-02-02 15:20:57 +0100 | [diff] [blame] | 201 | int64_t l2_offset; |
Kevin Wolf | c46e116 | 2010-03-23 17:41:24 +0100 | [diff] [blame] | 202 | int ret; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 203 | |
| 204 | old_l2_offset = s->l1_table[l1_index]; |
| 205 | |
Kevin Wolf | 3cce16f | 2012-03-01 18:36:21 +0100 | [diff] [blame] | 206 | trace_qcow2_l2_allocate(bs, l1_index); |
| 207 | |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 208 | /* allocate a new l2 entry */ |
| 209 | |
Kevin Wolf | ed6ccf0 | 2009-05-28 16:07:07 +0200 | [diff] [blame] | 210 | l2_offset = qcow2_alloc_clusters(bs, s->l2_size * sizeof(uint64_t)); |
Kevin Wolf | 5d757b5 | 2010-01-20 15:04:01 +0100 | [diff] [blame] | 211 | if (l2_offset < 0) { |
Max Reitz | be0b742 | 2013-09-25 16:37:20 +0200 | [diff] [blame] | 212 | ret = l2_offset; |
| 213 | goto fail; |
Kevin Wolf | 5d757b5 | 2010-01-20 15:04:01 +0100 | [diff] [blame] | 214 | } |
Kevin Wolf | 29c1a73 | 2011-01-10 17:17:28 +0100 | [diff] [blame] | 215 | |
| 216 | ret = qcow2_cache_flush(bs, s->refcount_block_cache); |
| 217 | if (ret < 0) { |
| 218 | goto fail; |
| 219 | } |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 220 | |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 221 | /* allocate a new entry in the l2 cache */ |
| 222 | |
Kevin Wolf | 3cce16f | 2012-03-01 18:36:21 +0100 | [diff] [blame] | 223 | trace_qcow2_l2_allocate_get_empty(bs, l1_index); |
Kevin Wolf | 29c1a73 | 2011-01-10 17:17:28 +0100 | [diff] [blame] | 224 | ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table); |
| 225 | if (ret < 0) { |
Max Reitz | be0b742 | 2013-09-25 16:37:20 +0200 | [diff] [blame] | 226 | goto fail; |
Kevin Wolf | 29c1a73 | 2011-01-10 17:17:28 +0100 | [diff] [blame] | 227 | } |
| 228 | |
| 229 | l2_table = *table; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 230 | |
Kevin Wolf | 8e37f68 | 2012-02-23 15:40:55 +0100 | [diff] [blame] | 231 | if ((old_l2_offset & L1E_OFFSET_MASK) == 0) { |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 232 | /* if there was no old l2 table, clear the new table */ |
| 233 | memset(l2_table, 0, s->l2_size * sizeof(uint64_t)); |
| 234 | } else { |
Kevin Wolf | 29c1a73 | 2011-01-10 17:17:28 +0100 | [diff] [blame] | 235 | uint64_t* old_table; |
| 236 | |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 237 | /* if there was an old l2 table, read it from the disk */ |
Kevin Wolf | 66f82ce | 2010-04-14 14:17:38 +0200 | [diff] [blame] | 238 | BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ); |
Kevin Wolf | 8e37f68 | 2012-02-23 15:40:55 +0100 | [diff] [blame] | 239 | ret = qcow2_cache_get(bs, s->l2_table_cache, |
| 240 | old_l2_offset & L1E_OFFSET_MASK, |
Kevin Wolf | 29c1a73 | 2011-01-10 17:17:28 +0100 | [diff] [blame] | 241 | (void**) &old_table); |
| 242 | if (ret < 0) { |
| 243 | goto fail; |
| 244 | } |
| 245 | |
| 246 | memcpy(l2_table, old_table, s->cluster_size); |
| 247 | |
| 248 | ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &old_table); |
Kevin Wolf | c46e116 | 2010-03-23 17:41:24 +0100 | [diff] [blame] | 249 | if (ret < 0) { |
Kevin Wolf | 175e115 | 2010-05-12 16:23:26 +0200 | [diff] [blame] | 250 | goto fail; |
Kevin Wolf | c46e116 | 2010-03-23 17:41:24 +0100 | [diff] [blame] | 251 | } |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 252 | } |
Kevin Wolf | 29c1a73 | 2011-01-10 17:17:28 +0100 | [diff] [blame] | 253 | |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 254 | /* write the l2 table to the file */ |
Kevin Wolf | 66f82ce | 2010-04-14 14:17:38 +0200 | [diff] [blame] | 255 | BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE); |
Kevin Wolf | 29c1a73 | 2011-01-10 17:17:28 +0100 | [diff] [blame] | 256 | |
Kevin Wolf | 3cce16f | 2012-03-01 18:36:21 +0100 | [diff] [blame] | 257 | trace_qcow2_l2_allocate_write_l2(bs, l1_index); |
Kevin Wolf | 29c1a73 | 2011-01-10 17:17:28 +0100 | [diff] [blame] | 258 | qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table); |
| 259 | ret = qcow2_cache_flush(bs, s->l2_table_cache); |
Kevin Wolf | c46e116 | 2010-03-23 17:41:24 +0100 | [diff] [blame] | 260 | if (ret < 0) { |
Kevin Wolf | 175e115 | 2010-05-12 16:23:26 +0200 | [diff] [blame] | 261 | goto fail; |
| 262 | } |
| 263 | |
| 264 | /* update the L1 entry */ |
Kevin Wolf | 3cce16f | 2012-03-01 18:36:21 +0100 | [diff] [blame] | 265 | trace_qcow2_l2_allocate_write_l1(bs, l1_index); |
Kevin Wolf | 175e115 | 2010-05-12 16:23:26 +0200 | [diff] [blame] | 266 | s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED; |
Max Reitz | e23e400 | 2013-08-30 14:34:28 +0200 | [diff] [blame] | 267 | ret = qcow2_write_l1_entry(bs, l1_index); |
Kevin Wolf | 175e115 | 2010-05-12 16:23:26 +0200 | [diff] [blame] | 268 | if (ret < 0) { |
| 269 | goto fail; |
Kevin Wolf | c46e116 | 2010-03-23 17:41:24 +0100 | [diff] [blame] | 270 | } |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 271 | |
Kevin Wolf | c46e116 | 2010-03-23 17:41:24 +0100 | [diff] [blame] | 272 | *table = l2_table; |
Kevin Wolf | 3cce16f | 2012-03-01 18:36:21 +0100 | [diff] [blame] | 273 | trace_qcow2_l2_allocate_done(bs, l1_index, 0); |
Kevin Wolf | c46e116 | 2010-03-23 17:41:24 +0100 | [diff] [blame] | 274 | return 0; |
Kevin Wolf | 175e115 | 2010-05-12 16:23:26 +0200 | [diff] [blame] | 275 | |
| 276 | fail: |
Kevin Wolf | 3cce16f | 2012-03-01 18:36:21 +0100 | [diff] [blame] | 277 | trace_qcow2_l2_allocate_done(bs, l1_index, ret); |
Max Reitz | 8585afd | 2013-09-25 16:37:18 +0200 | [diff] [blame] | 278 | if (l2_table != NULL) { |
| 279 | qcow2_cache_put(bs, s->l2_table_cache, (void**) table); |
| 280 | } |
Kevin Wolf | 68dba0b | 2010-06-07 16:43:22 +0200 | [diff] [blame] | 281 | s->l1_table[l1_index] = old_l2_offset; |
Max Reitz | e3b21ef | 2013-09-25 16:37:19 +0200 | [diff] [blame] | 282 | if (l2_offset > 0) { |
| 283 | qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t), |
| 284 | QCOW2_DISCARD_ALWAYS); |
| 285 | } |
Kevin Wolf | 175e115 | 2010-05-12 16:23:26 +0200 | [diff] [blame] | 286 | return ret; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 287 | } |
| 288 | |
Kevin Wolf | 2bfcc4a | 2012-03-15 16:37:40 +0100 | [diff] [blame] | 289 | /* |
| 290 | * Checks how many clusters in a given L2 table are contiguous in the image |
| 291 | * file. As soon as one of the flags in the bitmask stop_flags changes compared |
| 292 | * to the first cluster, the search is stopped and the cluster is not counted |
| 293 | * as contiguous. (This allows it, for example, to stop at the first compressed |
| 294 | * cluster which may require a different handling) |
| 295 | */ |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 296 | static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size, |
Kevin Wolf | 6165300 | 2013-09-27 13:36:11 +0200 | [diff] [blame] | 297 | uint64_t *l2_table, uint64_t stop_flags) |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 298 | { |
| 299 | int i; |
Peter Lieven | 78a52ad | 2013-11-12 13:48:07 +0100 | [diff] [blame] | 300 | uint64_t mask = stop_flags | L2E_OFFSET_MASK | QCOW_OFLAG_COMPRESSED; |
Max Reitz | 15684a4 | 2013-09-27 12:14:15 +0200 | [diff] [blame] | 301 | uint64_t first_entry = be64_to_cpu(l2_table[0]); |
| 302 | uint64_t offset = first_entry & mask; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 303 | |
| 304 | if (!offset) |
| 305 | return 0; |
| 306 | |
Max Reitz | 15684a4 | 2013-09-27 12:14:15 +0200 | [diff] [blame] | 307 | assert(qcow2_get_cluster_type(first_entry) != QCOW2_CLUSTER_COMPRESSED); |
| 308 | |
Kevin Wolf | 6165300 | 2013-09-27 13:36:11 +0200 | [diff] [blame] | 309 | for (i = 0; i < nb_clusters; i++) { |
Kevin Wolf | 2bfcc4a | 2012-03-15 16:37:40 +0100 | [diff] [blame] | 310 | uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask; |
| 311 | if (offset + (uint64_t) i * cluster_size != l2_entry) { |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 312 | break; |
Kevin Wolf | 2bfcc4a | 2012-03-15 16:37:40 +0100 | [diff] [blame] | 313 | } |
| 314 | } |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 315 | |
Kevin Wolf | 6165300 | 2013-09-27 13:36:11 +0200 | [diff] [blame] | 316 | return i; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 317 | } |
| 318 | |
| 319 | static int count_contiguous_free_clusters(uint64_t nb_clusters, uint64_t *l2_table) |
| 320 | { |
Kevin Wolf | 2bfcc4a | 2012-03-15 16:37:40 +0100 | [diff] [blame] | 321 | int i; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 322 | |
Kevin Wolf | 2bfcc4a | 2012-03-15 16:37:40 +0100 | [diff] [blame] | 323 | for (i = 0; i < nb_clusters; i++) { |
| 324 | int type = qcow2_get_cluster_type(be64_to_cpu(l2_table[i])); |
| 325 | |
| 326 | if (type != QCOW2_CLUSTER_UNALLOCATED) { |
| 327 | break; |
| 328 | } |
| 329 | } |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 330 | |
| 331 | return i; |
| 332 | } |
| 333 | |
| 334 | /* The crypt function is compatible with the linux cryptoloop |
| 335 | algorithm for < 4 GB images. NOTE: out_buf == in_buf is |
| 336 | supported */ |
Kevin Wolf | ed6ccf0 | 2009-05-28 16:07:07 +0200 | [diff] [blame] | 337 | void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num, |
| 338 | uint8_t *out_buf, const uint8_t *in_buf, |
| 339 | int nb_sectors, int enc, |
| 340 | const AES_KEY *key) |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 341 | { |
| 342 | union { |
| 343 | uint64_t ll[2]; |
| 344 | uint8_t b[16]; |
| 345 | } ivec; |
| 346 | int i; |
| 347 | |
| 348 | for(i = 0; i < nb_sectors; i++) { |
| 349 | ivec.ll[0] = cpu_to_le64(sector_num); |
| 350 | ivec.ll[1] = 0; |
| 351 | AES_cbc_encrypt(in_buf, out_buf, 512, key, |
| 352 | ivec.b, enc); |
| 353 | sector_num++; |
| 354 | in_buf += 512; |
| 355 | out_buf += 512; |
| 356 | } |
| 357 | } |
| 358 | |
Stefan Hajnoczi | aef4acb | 2011-11-30 12:23:41 +0000 | [diff] [blame] | 359 | static int coroutine_fn copy_sectors(BlockDriverState *bs, |
| 360 | uint64_t start_sect, |
| 361 | uint64_t cluster_offset, |
| 362 | int n_start, int n_end) |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 363 | { |
| 364 | BDRVQcowState *s = bs->opaque; |
Stefan Hajnoczi | aef4acb | 2011-11-30 12:23:41 +0000 | [diff] [blame] | 365 | QEMUIOVector qiov; |
| 366 | struct iovec iov; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 367 | int n, ret; |
Kevin Wolf | 1b9f149 | 2011-09-19 11:26:48 +0200 | [diff] [blame] | 368 | |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 369 | n = n_end - n_start; |
Kevin Wolf | 1b9f149 | 2011-09-19 11:26:48 +0200 | [diff] [blame] | 370 | if (n <= 0) { |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 371 | return 0; |
Kevin Wolf | 1b9f149 | 2011-09-19 11:26:48 +0200 | [diff] [blame] | 372 | } |
| 373 | |
Stefan Hajnoczi | aef4acb | 2011-11-30 12:23:41 +0000 | [diff] [blame] | 374 | iov.iov_len = n * BDRV_SECTOR_SIZE; |
| 375 | iov.iov_base = qemu_blockalign(bs, iov.iov_len); |
| 376 | |
| 377 | qemu_iovec_init_external(&qiov, &iov, 1); |
Kevin Wolf | 1b9f149 | 2011-09-19 11:26:48 +0200 | [diff] [blame] | 378 | |
Kevin Wolf | 66f82ce | 2010-04-14 14:17:38 +0200 | [diff] [blame] | 379 | BLKDBG_EVENT(bs->file, BLKDBG_COW_READ); |
Stefan Hajnoczi | aef4acb | 2011-11-30 12:23:41 +0000 | [diff] [blame] | 380 | |
Max Reitz | dba2855 | 2014-03-10 23:44:07 +0100 | [diff] [blame] | 381 | if (!bs->drv) { |
| 382 | return -ENOMEDIUM; |
| 383 | } |
| 384 | |
Stefan Hajnoczi | aef4acb | 2011-11-30 12:23:41 +0000 | [diff] [blame] | 385 | /* Call .bdrv_co_readv() directly instead of using the public block-layer |
| 386 | * interface. This avoids double I/O throttling and request tracking, |
| 387 | * which can lead to deadlock when block layer copy-on-read is enabled. |
| 388 | */ |
| 389 | ret = bs->drv->bdrv_co_readv(bs, start_sect + n_start, n, &qiov); |
Kevin Wolf | 1b9f149 | 2011-09-19 11:26:48 +0200 | [diff] [blame] | 390 | if (ret < 0) { |
| 391 | goto out; |
| 392 | } |
| 393 | |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 394 | if (s->crypt_method) { |
Kevin Wolf | ed6ccf0 | 2009-05-28 16:07:07 +0200 | [diff] [blame] | 395 | qcow2_encrypt_sectors(s, start_sect + n_start, |
Stefan Hajnoczi | aef4acb | 2011-11-30 12:23:41 +0000 | [diff] [blame] | 396 | iov.iov_base, iov.iov_base, n, 1, |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 397 | &s->aes_encrypt_key); |
| 398 | } |
Kevin Wolf | 1b9f149 | 2011-09-19 11:26:48 +0200 | [diff] [blame] | 399 | |
Max Reitz | 231bb26 | 2013-10-10 11:09:23 +0200 | [diff] [blame] | 400 | ret = qcow2_pre_write_overlap_check(bs, 0, |
Max Reitz | cf93980 | 2013-08-30 14:34:26 +0200 | [diff] [blame] | 401 | cluster_offset + n_start * BDRV_SECTOR_SIZE, n * BDRV_SECTOR_SIZE); |
| 402 | if (ret < 0) { |
| 403 | goto out; |
| 404 | } |
| 405 | |
Kevin Wolf | 66f82ce | 2010-04-14 14:17:38 +0200 | [diff] [blame] | 406 | BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE); |
Stefan Hajnoczi | aef4acb | 2011-11-30 12:23:41 +0000 | [diff] [blame] | 407 | ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + n_start, n, &qiov); |
Kevin Wolf | 1b9f149 | 2011-09-19 11:26:48 +0200 | [diff] [blame] | 408 | if (ret < 0) { |
| 409 | goto out; |
| 410 | } |
| 411 | |
| 412 | ret = 0; |
| 413 | out: |
Stefan Hajnoczi | aef4acb | 2011-11-30 12:23:41 +0000 | [diff] [blame] | 414 | qemu_vfree(iov.iov_base); |
Kevin Wolf | 1b9f149 | 2011-09-19 11:26:48 +0200 | [diff] [blame] | 415 | return ret; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 416 | } |
| 417 | |
| 418 | |
| 419 | /* |
| 420 | * get_cluster_offset |
| 421 | * |
Kevin Wolf | 1c46efa | 2010-05-21 17:59:36 +0200 | [diff] [blame] | 422 | * For a given offset of the disk image, find the cluster offset in |
| 423 | * qcow2 file. The offset is stored in *cluster_offset. |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 424 | * |
Devin Nakamura | d57237f | 2011-08-07 19:47:36 -0400 | [diff] [blame] | 425 | * on entry, *num is the number of contiguous sectors we'd like to |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 426 | * access following offset. |
| 427 | * |
Devin Nakamura | d57237f | 2011-08-07 19:47:36 -0400 | [diff] [blame] | 428 | * on exit, *num is the number of contiguous sectors we can read. |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 429 | * |
Kevin Wolf | 68d000a | 2012-03-14 19:15:03 +0100 | [diff] [blame] | 430 | * Returns the cluster type (QCOW2_CLUSTER_*) on success, -errno in error |
| 431 | * cases. |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 432 | */ |
Kevin Wolf | 1c46efa | 2010-05-21 17:59:36 +0200 | [diff] [blame] | 433 | int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, |
| 434 | int *num, uint64_t *cluster_offset) |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 435 | { |
| 436 | BDRVQcowState *s = bs->opaque; |
Kevin Wolf | 2cf7cfa | 2013-05-14 16:14:33 +0200 | [diff] [blame] | 437 | unsigned int l2_index; |
| 438 | uint64_t l1_index, l2_offset, *l2_table; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 439 | int l1_bits, c; |
Kevin Wolf | 80ee15a | 2009-09-15 12:30:43 +0200 | [diff] [blame] | 440 | unsigned int index_in_cluster, nb_clusters; |
| 441 | uint64_t nb_available, nb_needed; |
Kevin Wolf | 55c17e9 | 2010-05-21 18:25:20 +0200 | [diff] [blame] | 442 | int ret; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 443 | |
| 444 | index_in_cluster = (offset >> 9) & (s->cluster_sectors - 1); |
| 445 | nb_needed = *num + index_in_cluster; |
| 446 | |
| 447 | l1_bits = s->l2_bits + s->cluster_bits; |
| 448 | |
| 449 | /* compute how many bytes there are between the offset and |
| 450 | * the end of the l1 entry |
| 451 | */ |
| 452 | |
Kevin Wolf | 80ee15a | 2009-09-15 12:30:43 +0200 | [diff] [blame] | 453 | nb_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1)); |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 454 | |
| 455 | /* compute the number of available sectors */ |
| 456 | |
| 457 | nb_available = (nb_available >> 9) + index_in_cluster; |
| 458 | |
| 459 | if (nb_needed > nb_available) { |
| 460 | nb_needed = nb_available; |
| 461 | } |
| 462 | |
Kevin Wolf | 1c46efa | 2010-05-21 17:59:36 +0200 | [diff] [blame] | 463 | *cluster_offset = 0; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 464 | |
| 465 | /* seek the the l2 offset in the l1 table */ |
| 466 | |
| 467 | l1_index = offset >> l1_bits; |
Kevin Wolf | 68d000a | 2012-03-14 19:15:03 +0100 | [diff] [blame] | 468 | if (l1_index >= s->l1_size) { |
| 469 | ret = QCOW2_CLUSTER_UNALLOCATED; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 470 | goto out; |
Kevin Wolf | 68d000a | 2012-03-14 19:15:03 +0100 | [diff] [blame] | 471 | } |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 472 | |
Kevin Wolf | 68d000a | 2012-03-14 19:15:03 +0100 | [diff] [blame] | 473 | l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; |
| 474 | if (!l2_offset) { |
| 475 | ret = QCOW2_CLUSTER_UNALLOCATED; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 476 | goto out; |
Kevin Wolf | 68d000a | 2012-03-14 19:15:03 +0100 | [diff] [blame] | 477 | } |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 478 | |
| 479 | /* load the l2 table in memory */ |
| 480 | |
Kevin Wolf | 55c17e9 | 2010-05-21 18:25:20 +0200 | [diff] [blame] | 481 | ret = l2_load(bs, l2_offset, &l2_table); |
| 482 | if (ret < 0) { |
| 483 | return ret; |
Kevin Wolf | 1c46efa | 2010-05-21 17:59:36 +0200 | [diff] [blame] | 484 | } |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 485 | |
| 486 | /* find the cluster offset for the given disk offset */ |
| 487 | |
| 488 | l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); |
Kevin Wolf | 1c46efa | 2010-05-21 17:59:36 +0200 | [diff] [blame] | 489 | *cluster_offset = be64_to_cpu(l2_table[l2_index]); |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 490 | nb_clusters = size_to_clusters(s, nb_needed << 9); |
| 491 | |
Kevin Wolf | 68d000a | 2012-03-14 19:15:03 +0100 | [diff] [blame] | 492 | ret = qcow2_get_cluster_type(*cluster_offset); |
| 493 | switch (ret) { |
| 494 | case QCOW2_CLUSTER_COMPRESSED: |
| 495 | /* Compressed clusters can only be processed one by one */ |
| 496 | c = 1; |
| 497 | *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK; |
| 498 | break; |
Kevin Wolf | 6377af4 | 2012-03-16 15:02:38 +0100 | [diff] [blame] | 499 | case QCOW2_CLUSTER_ZERO: |
Paolo Bonzini | 381b487 | 2013-03-06 18:02:01 +0100 | [diff] [blame] | 500 | if (s->qcow_version < 3) { |
Kevin Wolf | 8885ead | 2014-02-08 17:44:59 +0100 | [diff] [blame] | 501 | qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); |
Paolo Bonzini | 381b487 | 2013-03-06 18:02:01 +0100 | [diff] [blame] | 502 | return -EIO; |
| 503 | } |
Kevin Wolf | 6377af4 | 2012-03-16 15:02:38 +0100 | [diff] [blame] | 504 | c = count_contiguous_clusters(nb_clusters, s->cluster_size, |
Kevin Wolf | 6165300 | 2013-09-27 13:36:11 +0200 | [diff] [blame] | 505 | &l2_table[l2_index], QCOW_OFLAG_ZERO); |
Kevin Wolf | 6377af4 | 2012-03-16 15:02:38 +0100 | [diff] [blame] | 506 | *cluster_offset = 0; |
| 507 | break; |
Kevin Wolf | 68d000a | 2012-03-14 19:15:03 +0100 | [diff] [blame] | 508 | case QCOW2_CLUSTER_UNALLOCATED: |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 509 | /* how many empty clusters ? */ |
| 510 | c = count_contiguous_free_clusters(nb_clusters, &l2_table[l2_index]); |
Kevin Wolf | 68d000a | 2012-03-14 19:15:03 +0100 | [diff] [blame] | 511 | *cluster_offset = 0; |
| 512 | break; |
| 513 | case QCOW2_CLUSTER_NORMAL: |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 514 | /* how many allocated clusters ? */ |
| 515 | c = count_contiguous_clusters(nb_clusters, s->cluster_size, |
Kevin Wolf | 6165300 | 2013-09-27 13:36:11 +0200 | [diff] [blame] | 516 | &l2_table[l2_index], QCOW_OFLAG_ZERO); |
Kevin Wolf | 68d000a | 2012-03-14 19:15:03 +0100 | [diff] [blame] | 517 | *cluster_offset &= L2E_OFFSET_MASK; |
| 518 | break; |
Kevin Wolf | 1417d7e | 2012-06-15 13:43:18 +0200 | [diff] [blame] | 519 | default: |
| 520 | abort(); |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 521 | } |
| 522 | |
Kevin Wolf | 29c1a73 | 2011-01-10 17:17:28 +0100 | [diff] [blame] | 523 | qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); |
| 524 | |
Kevin Wolf | 68d000a | 2012-03-14 19:15:03 +0100 | [diff] [blame] | 525 | nb_available = (c * s->cluster_sectors); |
| 526 | |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 527 | out: |
| 528 | if (nb_available > nb_needed) |
| 529 | nb_available = nb_needed; |
| 530 | |
| 531 | *num = nb_available - index_in_cluster; |
| 532 | |
Kevin Wolf | 68d000a | 2012-03-14 19:15:03 +0100 | [diff] [blame] | 533 | return ret; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 534 | } |
| 535 | |
| 536 | /* |
| 537 | * get_cluster_table |
| 538 | * |
| 539 | * for a given disk offset, load (and allocate if needed) |
| 540 | * the l2 table. |
| 541 | * |
| 542 | * the l2 table offset in the qcow2 file and the cluster index |
| 543 | * in the l2 table are given to the caller. |
| 544 | * |
Kevin Wolf | 1e3e8f1 | 2010-01-20 15:03:00 +0100 | [diff] [blame] | 545 | * Returns 0 on success, -errno in failure case |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 546 | */ |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 547 | static int get_cluster_table(BlockDriverState *bs, uint64_t offset, |
| 548 | uint64_t **new_l2_table, |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 549 | int *new_l2_index) |
| 550 | { |
| 551 | BDRVQcowState *s = bs->opaque; |
Kevin Wolf | 2cf7cfa | 2013-05-14 16:14:33 +0200 | [diff] [blame] | 552 | unsigned int l2_index; |
| 553 | uint64_t l1_index, l2_offset; |
Kevin Wolf | c46e116 | 2010-03-23 17:41:24 +0100 | [diff] [blame] | 554 | uint64_t *l2_table = NULL; |
Kevin Wolf | 80ee15a | 2009-09-15 12:30:43 +0200 | [diff] [blame] | 555 | int ret; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 556 | |
| 557 | /* seek the the l2 offset in the l1 table */ |
| 558 | |
| 559 | l1_index = offset >> (s->l2_bits + s->cluster_bits); |
| 560 | if (l1_index >= s->l1_size) { |
Stefan Hajnoczi | 7289375 | 2010-10-18 16:53:53 +0100 | [diff] [blame] | 561 | ret = qcow2_grow_l1_table(bs, l1_index + 1, false); |
Kevin Wolf | 1e3e8f1 | 2010-01-20 15:03:00 +0100 | [diff] [blame] | 562 | if (ret < 0) { |
| 563 | return ret; |
| 564 | } |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 565 | } |
Kevin Wolf | 8e37f68 | 2012-02-23 15:40:55 +0100 | [diff] [blame] | 566 | |
Kevin Wolf | 2cf7cfa | 2013-05-14 16:14:33 +0200 | [diff] [blame] | 567 | assert(l1_index < s->l1_size); |
Kevin Wolf | 8e37f68 | 2012-02-23 15:40:55 +0100 | [diff] [blame] | 568 | l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 569 | |
| 570 | /* seek the l2 table of the given l2 offset */ |
| 571 | |
Kevin Wolf | 8e37f68 | 2012-02-23 15:40:55 +0100 | [diff] [blame] | 572 | if (s->l1_table[l1_index] & QCOW_OFLAG_COPIED) { |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 573 | /* load the l2 table in memory */ |
Kevin Wolf | 55c17e9 | 2010-05-21 18:25:20 +0200 | [diff] [blame] | 574 | ret = l2_load(bs, l2_offset, &l2_table); |
| 575 | if (ret < 0) { |
| 576 | return ret; |
Kevin Wolf | 1e3e8f1 | 2010-01-20 15:03:00 +0100 | [diff] [blame] | 577 | } |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 578 | } else { |
Kevin Wolf | 16fde5f | 2011-02-09 17:36:19 +0100 | [diff] [blame] | 579 | /* First allocate a new L2 table (and do COW if needed) */ |
Kevin Wolf | c46e116 | 2010-03-23 17:41:24 +0100 | [diff] [blame] | 580 | ret = l2_allocate(bs, l1_index, &l2_table); |
| 581 | if (ret < 0) { |
| 582 | return ret; |
Kevin Wolf | 1e3e8f1 | 2010-01-20 15:03:00 +0100 | [diff] [blame] | 583 | } |
Kevin Wolf | 16fde5f | 2011-02-09 17:36:19 +0100 | [diff] [blame] | 584 | |
| 585 | /* Then decrease the refcount of the old table */ |
| 586 | if (l2_offset) { |
Kevin Wolf | 6cfcb9b | 2013-06-19 13:44:18 +0200 | [diff] [blame] | 587 | qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t), |
| 588 | QCOW2_DISCARD_OTHER); |
Kevin Wolf | 16fde5f | 2011-02-09 17:36:19 +0100 | [diff] [blame] | 589 | } |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 590 | } |
| 591 | |
| 592 | /* find the cluster offset for the given disk offset */ |
| 593 | |
| 594 | l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); |
| 595 | |
| 596 | *new_l2_table = l2_table; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 597 | *new_l2_index = l2_index; |
| 598 | |
Kevin Wolf | 1e3e8f1 | 2010-01-20 15:03:00 +0100 | [diff] [blame] | 599 | return 0; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 600 | } |
| 601 | |
| 602 | /* |
| 603 | * alloc_compressed_cluster_offset |
| 604 | * |
| 605 | * For a given offset of the disk image, return cluster offset in |
| 606 | * qcow2 file. |
| 607 | * |
| 608 | * If the offset is not found, allocate a new compressed cluster. |
| 609 | * |
| 610 | * Return the cluster offset if successful, |
| 611 | * Return 0, otherwise. |
| 612 | * |
| 613 | */ |
| 614 | |
Kevin Wolf | ed6ccf0 | 2009-05-28 16:07:07 +0200 | [diff] [blame] | 615 | uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, |
| 616 | uint64_t offset, |
| 617 | int compressed_size) |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 618 | { |
| 619 | BDRVQcowState *s = bs->opaque; |
| 620 | int l2_index, ret; |
Kevin Wolf | 3948d1d | 2012-03-12 17:46:51 +0100 | [diff] [blame] | 621 | uint64_t *l2_table; |
Kevin Wolf | f4f0d39 | 2010-02-02 15:20:57 +0100 | [diff] [blame] | 622 | int64_t cluster_offset; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 623 | int nb_csectors; |
| 624 | |
Kevin Wolf | 3948d1d | 2012-03-12 17:46:51 +0100 | [diff] [blame] | 625 | ret = get_cluster_table(bs, offset, &l2_table, &l2_index); |
Kevin Wolf | 1e3e8f1 | 2010-01-20 15:03:00 +0100 | [diff] [blame] | 626 | if (ret < 0) { |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 627 | return 0; |
Kevin Wolf | 1e3e8f1 | 2010-01-20 15:03:00 +0100 | [diff] [blame] | 628 | } |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 629 | |
Kevin Wolf | b0b6862 | 2012-03-15 17:20:11 +0100 | [diff] [blame] | 630 | /* Compression can't overwrite anything. Fail if the cluster was already |
| 631 | * allocated. */ |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 632 | cluster_offset = be64_to_cpu(l2_table[l2_index]); |
Kevin Wolf | b0b6862 | 2012-03-15 17:20:11 +0100 | [diff] [blame] | 633 | if (cluster_offset & L2E_OFFSET_MASK) { |
Kevin Wolf | 8f1efd0 | 2011-10-18 17:12:44 +0200 | [diff] [blame] | 634 | qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); |
| 635 | return 0; |
| 636 | } |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 637 | |
Kevin Wolf | ed6ccf0 | 2009-05-28 16:07:07 +0200 | [diff] [blame] | 638 | cluster_offset = qcow2_alloc_bytes(bs, compressed_size); |
Kevin Wolf | 5d757b5 | 2010-01-20 15:04:01 +0100 | [diff] [blame] | 639 | if (cluster_offset < 0) { |
Kevin Wolf | 29c1a73 | 2011-01-10 17:17:28 +0100 | [diff] [blame] | 640 | qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); |
Kevin Wolf | 5d757b5 | 2010-01-20 15:04:01 +0100 | [diff] [blame] | 641 | return 0; |
| 642 | } |
| 643 | |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 644 | nb_csectors = ((cluster_offset + compressed_size - 1) >> 9) - |
| 645 | (cluster_offset >> 9); |
| 646 | |
| 647 | cluster_offset |= QCOW_OFLAG_COMPRESSED | |
| 648 | ((uint64_t)nb_csectors << s->csize_shift); |
| 649 | |
| 650 | /* update L2 table */ |
| 651 | |
| 652 | /* compressed clusters never have the copied flag */ |
| 653 | |
Kevin Wolf | 66f82ce | 2010-04-14 14:17:38 +0200 | [diff] [blame] | 654 | BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED); |
Kevin Wolf | 29c1a73 | 2011-01-10 17:17:28 +0100 | [diff] [blame] | 655 | qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table); |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 656 | l2_table[l2_index] = cpu_to_be64(cluster_offset); |
Kevin Wolf | 29c1a73 | 2011-01-10 17:17:28 +0100 | [diff] [blame] | 657 | ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); |
Kevin Wolf | 79a3118 | 2010-03-23 12:49:17 +0100 | [diff] [blame] | 658 | if (ret < 0) { |
Kevin Wolf | 29c1a73 | 2011-01-10 17:17:28 +0100 | [diff] [blame] | 659 | return 0; |
Kevin Wolf | 4c1612d | 2009-06-16 11:31:29 +0200 | [diff] [blame] | 660 | } |
| 661 | |
Kevin Wolf | 29c1a73 | 2011-01-10 17:17:28 +0100 | [diff] [blame] | 662 | return cluster_offset; |
Kevin Wolf | 4c1612d | 2009-06-16 11:31:29 +0200 | [diff] [blame] | 663 | } |
| 664 | |
Kevin Wolf | 593fb83 | 2012-12-07 18:08:43 +0100 | [diff] [blame] | 665 | static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r) |
| 666 | { |
| 667 | BDRVQcowState *s = bs->opaque; |
| 668 | int ret; |
| 669 | |
| 670 | if (r->nb_sectors == 0) { |
| 671 | return 0; |
| 672 | } |
| 673 | |
| 674 | qemu_co_mutex_unlock(&s->lock); |
| 675 | ret = copy_sectors(bs, m->offset / BDRV_SECTOR_SIZE, m->alloc_offset, |
| 676 | r->offset / BDRV_SECTOR_SIZE, |
| 677 | r->offset / BDRV_SECTOR_SIZE + r->nb_sectors); |
| 678 | qemu_co_mutex_lock(&s->lock); |
| 679 | |
| 680 | if (ret < 0) { |
| 681 | return ret; |
| 682 | } |
| 683 | |
| 684 | /* |
| 685 | * Before we update the L2 table to actually point to the new cluster, we |
| 686 | * need to be sure that the refcounts have been increased and COW was |
| 687 | * handled. |
| 688 | */ |
| 689 | qcow2_cache_depends_on_flush(s->l2_table_cache); |
| 690 | |
| 691 | return 0; |
| 692 | } |
| 693 | |
Kevin Wolf | 148da7e | 2010-01-20 15:03:01 +0100 | [diff] [blame] | 694 | int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 695 | { |
| 696 | BDRVQcowState *s = bs->opaque; |
| 697 | int i, j = 0, l2_index, ret; |
Kevin Wolf | 593fb83 | 2012-12-07 18:08:43 +0100 | [diff] [blame] | 698 | uint64_t *old_cluster, *l2_table; |
Kevin Wolf | 250196f | 2012-03-02 14:10:54 +0100 | [diff] [blame] | 699 | uint64_t cluster_offset = m->alloc_offset; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 700 | |
Kevin Wolf | 3cce16f | 2012-03-01 18:36:21 +0100 | [diff] [blame] | 701 | trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters); |
Kevin Wolf | f50f88b | 2012-12-07 18:08:46 +0100 | [diff] [blame] | 702 | assert(m->nb_clusters > 0); |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 703 | |
Anthony Liguori | 7267c09 | 2011-08-20 22:09:37 -0500 | [diff] [blame] | 704 | old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t)); |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 705 | |
| 706 | /* copy content of unmodified sectors */ |
Kevin Wolf | 593fb83 | 2012-12-07 18:08:43 +0100 | [diff] [blame] | 707 | ret = perform_cow(bs, m, &m->cow_start); |
| 708 | if (ret < 0) { |
| 709 | goto err; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 710 | } |
| 711 | |
Kevin Wolf | 593fb83 | 2012-12-07 18:08:43 +0100 | [diff] [blame] | 712 | ret = perform_cow(bs, m, &m->cow_end); |
| 713 | if (ret < 0) { |
| 714 | goto err; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 715 | } |
| 716 | |
Kevin Wolf | 593fb83 | 2012-12-07 18:08:43 +0100 | [diff] [blame] | 717 | /* Update L2 table. */ |
Kevin Wolf | 74c4510 | 2013-03-15 10:35:08 +0100 | [diff] [blame] | 718 | if (s->use_lazy_refcounts) { |
Kevin Wolf | 280d373 | 2012-12-07 18:08:47 +0100 | [diff] [blame] | 719 | qcow2_mark_dirty(bs); |
| 720 | } |
Stefan Hajnoczi | bfe8043 | 2012-07-27 09:05:22 +0100 | [diff] [blame] | 721 | if (qcow2_need_accurate_refcounts(s)) { |
| 722 | qcow2_cache_set_dependency(bs, s->l2_table_cache, |
| 723 | s->refcount_block_cache); |
| 724 | } |
Kevin Wolf | 280d373 | 2012-12-07 18:08:47 +0100 | [diff] [blame] | 725 | |
Kevin Wolf | 3948d1d | 2012-03-12 17:46:51 +0100 | [diff] [blame] | 726 | ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index); |
Kevin Wolf | 1e3e8f1 | 2010-01-20 15:03:00 +0100 | [diff] [blame] | 727 | if (ret < 0) { |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 728 | goto err; |
Kevin Wolf | 1e3e8f1 | 2010-01-20 15:03:00 +0100 | [diff] [blame] | 729 | } |
Kevin Wolf | 29c1a73 | 2011-01-10 17:17:28 +0100 | [diff] [blame] | 730 | qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table); |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 731 | |
Max Reitz | c01dbcc | 2013-09-25 17:48:55 +0200 | [diff] [blame] | 732 | assert(l2_index + m->nb_clusters <= s->l2_size); |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 733 | for (i = 0; i < m->nb_clusters; i++) { |
| 734 | /* if two concurrent writes happen to the same unallocated cluster |
| 735 | * each write allocates separate cluster and writes data concurrently. |
| 736 | * The first one to complete updates l2 table with pointer to its |
| 737 | * cluster the second one has to do RMW (which is done above by |
| 738 | * copy_sectors()), update l2 table with its cluster pointer and free |
| 739 | * old cluster. This is what this loop does */ |
| 740 | if(l2_table[l2_index + i] != 0) |
| 741 | old_cluster[j++] = l2_table[l2_index + i]; |
| 742 | |
| 743 | l2_table[l2_index + i] = cpu_to_be64((cluster_offset + |
| 744 | (i << s->cluster_bits)) | QCOW_OFLAG_COPIED); |
| 745 | } |
| 746 | |
Kevin Wolf | 9f8e668 | 2010-09-17 17:02:09 +0200 | [diff] [blame] | 747 | |
Kevin Wolf | 29c1a73 | 2011-01-10 17:17:28 +0100 | [diff] [blame] | 748 | ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); |
Kevin Wolf | c835d00 | 2010-03-23 12:53:47 +0100 | [diff] [blame] | 749 | if (ret < 0) { |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 750 | goto err; |
Kevin Wolf | 4c1612d | 2009-06-16 11:31:29 +0200 | [diff] [blame] | 751 | } |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 752 | |
Kevin Wolf | 7ec5e6a | 2010-09-01 12:40:52 +0200 | [diff] [blame] | 753 | /* |
| 754 | * If this was a COW, we need to decrease the refcount of the old cluster. |
| 755 | * Also flush bs->file to get the right order for L2 and refcount update. |
Kevin Wolf | 6cfcb9b | 2013-06-19 13:44:18 +0200 | [diff] [blame] | 756 | * |
| 757 | * Don't discard clusters that reach a refcount of 0 (e.g. compressed |
| 758 | * clusters), the next write will reuse them anyway. |
Kevin Wolf | 7ec5e6a | 2010-09-01 12:40:52 +0200 | [diff] [blame] | 759 | */ |
| 760 | if (j != 0) { |
Kevin Wolf | 7ec5e6a | 2010-09-01 12:40:52 +0200 | [diff] [blame] | 761 | for (i = 0; i < j; i++) { |
Kevin Wolf | 6cfcb9b | 2013-06-19 13:44:18 +0200 | [diff] [blame] | 762 | qcow2_free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1, |
| 763 | QCOW2_DISCARD_NEVER); |
Kevin Wolf | 7ec5e6a | 2010-09-01 12:40:52 +0200 | [diff] [blame] | 764 | } |
| 765 | } |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 766 | |
| 767 | ret = 0; |
| 768 | err: |
Anthony Liguori | 7267c09 | 2011-08-20 22:09:37 -0500 | [diff] [blame] | 769 | g_free(old_cluster); |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 770 | return ret; |
| 771 | } |
| 772 | |
| 773 | /* |
Kevin Wolf | bf319ec | 2012-03-02 19:27:53 +0100 | [diff] [blame] | 774 | * Returns the number of contiguous clusters that can be used for an allocating |
| 775 | * write, but require COW to be performed (this includes yet unallocated space, |
| 776 | * which must copy from the backing file) |
| 777 | */ |
| 778 | static int count_cow_clusters(BDRVQcowState *s, int nb_clusters, |
| 779 | uint64_t *l2_table, int l2_index) |
| 780 | { |
Kevin Wolf | 143550a | 2012-03-27 13:17:22 +0200 | [diff] [blame] | 781 | int i; |
Kevin Wolf | bf319ec | 2012-03-02 19:27:53 +0100 | [diff] [blame] | 782 | |
Kevin Wolf | 143550a | 2012-03-27 13:17:22 +0200 | [diff] [blame] | 783 | for (i = 0; i < nb_clusters; i++) { |
| 784 | uint64_t l2_entry = be64_to_cpu(l2_table[l2_index + i]); |
| 785 | int cluster_type = qcow2_get_cluster_type(l2_entry); |
| 786 | |
| 787 | switch(cluster_type) { |
| 788 | case QCOW2_CLUSTER_NORMAL: |
| 789 | if (l2_entry & QCOW_OFLAG_COPIED) { |
| 790 | goto out; |
| 791 | } |
Kevin Wolf | bf319ec | 2012-03-02 19:27:53 +0100 | [diff] [blame] | 792 | break; |
Kevin Wolf | 143550a | 2012-03-27 13:17:22 +0200 | [diff] [blame] | 793 | case QCOW2_CLUSTER_UNALLOCATED: |
| 794 | case QCOW2_CLUSTER_COMPRESSED: |
Kevin Wolf | 6377af4 | 2012-03-16 15:02:38 +0100 | [diff] [blame] | 795 | case QCOW2_CLUSTER_ZERO: |
Kevin Wolf | 143550a | 2012-03-27 13:17:22 +0200 | [diff] [blame] | 796 | break; |
| 797 | default: |
| 798 | abort(); |
Kevin Wolf | bf319ec | 2012-03-02 19:27:53 +0100 | [diff] [blame] | 799 | } |
Kevin Wolf | bf319ec | 2012-03-02 19:27:53 +0100 | [diff] [blame] | 800 | } |
| 801 | |
Kevin Wolf | 143550a | 2012-03-27 13:17:22 +0200 | [diff] [blame] | 802 | out: |
Kevin Wolf | bf319ec | 2012-03-02 19:27:53 +0100 | [diff] [blame] | 803 | assert(i <= nb_clusters); |
| 804 | return i; |
| 805 | } |
| 806 | |
| 807 | /* |
Kevin Wolf | 226c3c2 | 2012-12-07 18:08:49 +0100 | [diff] [blame] | 808 | * Check if there already is an AIO write request in flight which allocates |
| 809 | * the same cluster. In this case we need to wait until the previous |
| 810 | * request has completed and updated the L2 table accordingly. |
Kevin Wolf | 65eb2e3 | 2013-03-26 17:49:58 +0100 | [diff] [blame] | 811 | * |
| 812 | * Returns: |
| 813 | * 0 if there was no dependency. *cur_bytes indicates the number of |
| 814 | * bytes from guest_offset that can be read before the next |
| 815 | * dependency must be processed (or the request is complete) |
| 816 | * |
| 817 | * -EAGAIN if we had to wait for another request, previously gathered |
| 818 | * information on cluster allocation may be invalid now. The caller |
| 819 | * must start over anyway, so consider *cur_bytes undefined. |
Kevin Wolf | 250196f | 2012-03-02 14:10:54 +0100 | [diff] [blame] | 820 | */ |
Kevin Wolf | 226c3c2 | 2012-12-07 18:08:49 +0100 | [diff] [blame] | 821 | static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset, |
Kevin Wolf | ecdd533 | 2013-03-27 11:43:49 +0100 | [diff] [blame] | 822 | uint64_t *cur_bytes, QCowL2Meta **m) |
Kevin Wolf | 250196f | 2012-03-02 14:10:54 +0100 | [diff] [blame] | 823 | { |
| 824 | BDRVQcowState *s = bs->opaque; |
Kevin Wolf | 250196f | 2012-03-02 14:10:54 +0100 | [diff] [blame] | 825 | QCowL2Meta *old_alloc; |
Kevin Wolf | 65eb2e3 | 2013-03-26 17:49:58 +0100 | [diff] [blame] | 826 | uint64_t bytes = *cur_bytes; |
Kevin Wolf | 250196f | 2012-03-02 14:10:54 +0100 | [diff] [blame] | 827 | |
Kevin Wolf | 250196f | 2012-03-02 14:10:54 +0100 | [diff] [blame] | 828 | QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) { |
| 829 | |
Kevin Wolf | 65eb2e3 | 2013-03-26 17:49:58 +0100 | [diff] [blame] | 830 | uint64_t start = guest_offset; |
| 831 | uint64_t end = start + bytes; |
| 832 | uint64_t old_start = l2meta_cow_start(old_alloc); |
| 833 | uint64_t old_end = l2meta_cow_end(old_alloc); |
Kevin Wolf | 250196f | 2012-03-02 14:10:54 +0100 | [diff] [blame] | 834 | |
Kevin Wolf | d9d74f4 | 2013-03-26 17:49:57 +0100 | [diff] [blame] | 835 | if (end <= old_start || start >= old_end) { |
Kevin Wolf | 250196f | 2012-03-02 14:10:54 +0100 | [diff] [blame] | 836 | /* No intersection */ |
| 837 | } else { |
| 838 | if (start < old_start) { |
| 839 | /* Stop at the start of a running allocation */ |
Kevin Wolf | 65eb2e3 | 2013-03-26 17:49:58 +0100 | [diff] [blame] | 840 | bytes = old_start - start; |
Kevin Wolf | 250196f | 2012-03-02 14:10:54 +0100 | [diff] [blame] | 841 | } else { |
Kevin Wolf | 65eb2e3 | 2013-03-26 17:49:58 +0100 | [diff] [blame] | 842 | bytes = 0; |
Kevin Wolf | 250196f | 2012-03-02 14:10:54 +0100 | [diff] [blame] | 843 | } |
| 844 | |
Kevin Wolf | ecdd533 | 2013-03-27 11:43:49 +0100 | [diff] [blame] | 845 | /* Stop if already an l2meta exists. After yielding, it wouldn't |
| 846 | * be valid any more, so we'd have to clean up the old L2Metas |
| 847 | * and deal with requests depending on them before starting to |
| 848 | * gather new ones. Not worth the trouble. */ |
| 849 | if (bytes == 0 && *m) { |
| 850 | *cur_bytes = 0; |
| 851 | return 0; |
| 852 | } |
| 853 | |
Kevin Wolf | 65eb2e3 | 2013-03-26 17:49:58 +0100 | [diff] [blame] | 854 | if (bytes == 0) { |
Kevin Wolf | 250196f | 2012-03-02 14:10:54 +0100 | [diff] [blame] | 855 | /* Wait for the dependency to complete. We need to recheck |
| 856 | * the free/allocated clusters when we continue. */ |
| 857 | qemu_co_mutex_unlock(&s->lock); |
| 858 | qemu_co_queue_wait(&old_alloc->dependent_requests); |
| 859 | qemu_co_mutex_lock(&s->lock); |
| 860 | return -EAGAIN; |
| 861 | } |
| 862 | } |
| 863 | } |
| 864 | |
Kevin Wolf | 65eb2e3 | 2013-03-26 17:49:58 +0100 | [diff] [blame] | 865 | /* Make sure that existing clusters and new allocations are only used up to |
| 866 | * the next dependency if we shortened the request above */ |
| 867 | *cur_bytes = bytes; |
Kevin Wolf | 250196f | 2012-03-02 14:10:54 +0100 | [diff] [blame] | 868 | |
Kevin Wolf | 226c3c2 | 2012-12-07 18:08:49 +0100 | [diff] [blame] | 869 | return 0; |
| 870 | } |
| 871 | |
| 872 | /* |
Kevin Wolf | 0af729e | 2013-03-26 17:50:05 +0100 | [diff] [blame] | 873 | * Checks how many already allocated clusters that don't require a copy on |
| 874 | * write there are at the given guest_offset (up to *bytes). If |
| 875 | * *host_offset is not zero, only physically contiguous clusters beginning at |
| 876 | * this host offset are counted. |
| 877 | * |
Kevin Wolf | 411d62b | 2013-03-26 17:50:09 +0100 | [diff] [blame] | 878 | * Note that guest_offset may not be cluster aligned. In this case, the |
| 879 | * returned *host_offset points to exact byte referenced by guest_offset and |
| 880 | * therefore isn't cluster aligned as well. |
Kevin Wolf | 0af729e | 2013-03-26 17:50:05 +0100 | [diff] [blame] | 881 | * |
| 882 | * Returns: |
| 883 | * 0: if no allocated clusters are available at the given offset. |
| 884 | * *bytes is normally unchanged. It is set to 0 if the cluster |
| 885 | * is allocated and doesn't need COW, but doesn't have the right |
| 886 | * physical offset. |
| 887 | * |
| 888 | * 1: if allocated clusters that don't require a COW are available at |
| 889 | * the requested offset. *bytes may have decreased and describes |
| 890 | * the length of the area that can be written to. |
| 891 | * |
| 892 | * -errno: in error cases |
Kevin Wolf | 0af729e | 2013-03-26 17:50:05 +0100 | [diff] [blame] | 893 | */ |
| 894 | static int handle_copied(BlockDriverState *bs, uint64_t guest_offset, |
Kevin Wolf | c53ede9 | 2013-03-26 17:50:07 +0100 | [diff] [blame] | 895 | uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m) |
Kevin Wolf | 0af729e | 2013-03-26 17:50:05 +0100 | [diff] [blame] | 896 | { |
| 897 | BDRVQcowState *s = bs->opaque; |
| 898 | int l2_index; |
| 899 | uint64_t cluster_offset; |
| 900 | uint64_t *l2_table; |
Kevin Wolf | acb0467 | 2013-03-26 17:50:06 +0100 | [diff] [blame] | 901 | unsigned int nb_clusters; |
Kevin Wolf | c53ede9 | 2013-03-26 17:50:07 +0100 | [diff] [blame] | 902 | unsigned int keep_clusters; |
Kevin Wolf | 0af729e | 2013-03-26 17:50:05 +0100 | [diff] [blame] | 903 | int ret, pret; |
| 904 | |
| 905 | trace_qcow2_handle_copied(qemu_coroutine_self(), guest_offset, *host_offset, |
| 906 | *bytes); |
Kevin Wolf | 0af729e | 2013-03-26 17:50:05 +0100 | [diff] [blame] | 907 | |
Kevin Wolf | 411d62b | 2013-03-26 17:50:09 +0100 | [diff] [blame] | 908 | assert(*host_offset == 0 || offset_into_cluster(s, guest_offset) |
| 909 | == offset_into_cluster(s, *host_offset)); |
| 910 | |
Kevin Wolf | acb0467 | 2013-03-26 17:50:06 +0100 | [diff] [blame] | 911 | /* |
| 912 | * Calculate the number of clusters to look for. We stop at L2 table |
| 913 | * boundaries to keep things simple. |
| 914 | */ |
| 915 | nb_clusters = |
| 916 | size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes); |
| 917 | |
| 918 | l2_index = offset_to_l2_index(s, guest_offset); |
| 919 | nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); |
| 920 | |
Kevin Wolf | 0af729e | 2013-03-26 17:50:05 +0100 | [diff] [blame] | 921 | /* Find L2 entry for the first involved cluster */ |
| 922 | ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index); |
| 923 | if (ret < 0) { |
| 924 | return ret; |
| 925 | } |
| 926 | |
| 927 | cluster_offset = be64_to_cpu(l2_table[l2_index]); |
| 928 | |
| 929 | /* Check how many clusters are already allocated and don't need COW */ |
| 930 | if (qcow2_get_cluster_type(cluster_offset) == QCOW2_CLUSTER_NORMAL |
| 931 | && (cluster_offset & QCOW_OFLAG_COPIED)) |
| 932 | { |
Kevin Wolf | e62daaf | 2013-03-26 17:50:08 +0100 | [diff] [blame] | 933 | /* If a specific host_offset is required, check it */ |
| 934 | bool offset_matches = |
| 935 | (cluster_offset & L2E_OFFSET_MASK) == *host_offset; |
| 936 | |
| 937 | if (*host_offset != 0 && !offset_matches) { |
| 938 | *bytes = 0; |
| 939 | ret = 0; |
| 940 | goto out; |
| 941 | } |
| 942 | |
Kevin Wolf | 0af729e | 2013-03-26 17:50:05 +0100 | [diff] [blame] | 943 | /* We keep all QCOW_OFLAG_COPIED clusters */ |
Kevin Wolf | c53ede9 | 2013-03-26 17:50:07 +0100 | [diff] [blame] | 944 | keep_clusters = |
Kevin Wolf | acb0467 | 2013-03-26 17:50:06 +0100 | [diff] [blame] | 945 | count_contiguous_clusters(nb_clusters, s->cluster_size, |
Kevin Wolf | 6165300 | 2013-09-27 13:36:11 +0200 | [diff] [blame] | 946 | &l2_table[l2_index], |
Kevin Wolf | 0af729e | 2013-03-26 17:50:05 +0100 | [diff] [blame] | 947 | QCOW_OFLAG_COPIED | QCOW_OFLAG_ZERO); |
Kevin Wolf | c53ede9 | 2013-03-26 17:50:07 +0100 | [diff] [blame] | 948 | assert(keep_clusters <= nb_clusters); |
| 949 | |
| 950 | *bytes = MIN(*bytes, |
| 951 | keep_clusters * s->cluster_size |
| 952 | - offset_into_cluster(s, guest_offset)); |
Kevin Wolf | 0af729e | 2013-03-26 17:50:05 +0100 | [diff] [blame] | 953 | |
| 954 | ret = 1; |
| 955 | } else { |
Kevin Wolf | 0af729e | 2013-03-26 17:50:05 +0100 | [diff] [blame] | 956 | ret = 0; |
| 957 | } |
| 958 | |
Kevin Wolf | 0af729e | 2013-03-26 17:50:05 +0100 | [diff] [blame] | 959 | /* Cleanup */ |
Kevin Wolf | e62daaf | 2013-03-26 17:50:08 +0100 | [diff] [blame] | 960 | out: |
Kevin Wolf | 0af729e | 2013-03-26 17:50:05 +0100 | [diff] [blame] | 961 | pret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); |
| 962 | if (pret < 0) { |
| 963 | return pret; |
| 964 | } |
| 965 | |
Kevin Wolf | e62daaf | 2013-03-26 17:50:08 +0100 | [diff] [blame] | 966 | /* Only return a host offset if we actually made progress. Otherwise we |
| 967 | * would make requirements for handle_alloc() that it can't fulfill */ |
| 968 | if (ret) { |
Kevin Wolf | 411d62b | 2013-03-26 17:50:09 +0100 | [diff] [blame] | 969 | *host_offset = (cluster_offset & L2E_OFFSET_MASK) |
| 970 | + offset_into_cluster(s, guest_offset); |
Kevin Wolf | e62daaf | 2013-03-26 17:50:08 +0100 | [diff] [blame] | 971 | } |
| 972 | |
Kevin Wolf | 0af729e | 2013-03-26 17:50:05 +0100 | [diff] [blame] | 973 | return ret; |
| 974 | } |
| 975 | |
| 976 | /* |
Kevin Wolf | 226c3c2 | 2012-12-07 18:08:49 +0100 | [diff] [blame] | 977 | * Allocates new clusters for the given guest_offset. |
| 978 | * |
| 979 | * At most *nb_clusters are allocated, and on return *nb_clusters is updated to |
| 980 | * contain the number of clusters that have been allocated and are contiguous |
| 981 | * in the image file. |
| 982 | * |
| 983 | * If *host_offset is non-zero, it specifies the offset in the image file at |
| 984 | * which the new clusters must start. *nb_clusters can be 0 on return in this |
| 985 | * case if the cluster at host_offset is already in use. If *host_offset is |
| 986 | * zero, the clusters can be allocated anywhere in the image file. |
| 987 | * |
| 988 | * *host_offset is updated to contain the offset into the image file at which |
| 989 | * the first allocated cluster starts. |
| 990 | * |
| 991 | * Return 0 on success and -errno in error cases. -EAGAIN means that the |
| 992 | * function has been waiting for another request and the allocation must be |
| 993 | * restarted, but the whole request should not be failed. |
| 994 | */ |
| 995 | static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, |
| 996 | uint64_t *host_offset, unsigned int *nb_clusters) |
| 997 | { |
| 998 | BDRVQcowState *s = bs->opaque; |
Kevin Wolf | 226c3c2 | 2012-12-07 18:08:49 +0100 | [diff] [blame] | 999 | |
| 1000 | trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset, |
| 1001 | *host_offset, *nb_clusters); |
| 1002 | |
Kevin Wolf | 250196f | 2012-03-02 14:10:54 +0100 | [diff] [blame] | 1003 | /* Allocate new clusters */ |
| 1004 | trace_qcow2_cluster_alloc_phys(qemu_coroutine_self()); |
| 1005 | if (*host_offset == 0) { |
Kevin Wolf | df02179 | 2012-05-24 12:56:32 +0200 | [diff] [blame] | 1006 | int64_t cluster_offset = |
| 1007 | qcow2_alloc_clusters(bs, *nb_clusters * s->cluster_size); |
| 1008 | if (cluster_offset < 0) { |
| 1009 | return cluster_offset; |
| 1010 | } |
| 1011 | *host_offset = cluster_offset; |
| 1012 | return 0; |
Kevin Wolf | 250196f | 2012-03-02 14:10:54 +0100 | [diff] [blame] | 1013 | } else { |
Kevin Wolf | 17a71e5 | 2013-03-26 17:49:56 +0100 | [diff] [blame] | 1014 | int ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters); |
Kevin Wolf | df02179 | 2012-05-24 12:56:32 +0200 | [diff] [blame] | 1015 | if (ret < 0) { |
| 1016 | return ret; |
| 1017 | } |
| 1018 | *nb_clusters = ret; |
| 1019 | return 0; |
Kevin Wolf | 250196f | 2012-03-02 14:10:54 +0100 | [diff] [blame] | 1020 | } |
Kevin Wolf | 250196f | 2012-03-02 14:10:54 +0100 | [diff] [blame] | 1021 | } |
| 1022 | |
| 1023 | /* |
Kevin Wolf | 10f0ed8 | 2013-03-26 17:50:00 +0100 | [diff] [blame] | 1024 | * Allocates new clusters for an area that either is yet unallocated or needs a |
| 1025 | * copy on write. If *host_offset is non-zero, clusters are only allocated if |
| 1026 | * the new allocation can match the specified host offset. |
| 1027 | * |
Kevin Wolf | 411d62b | 2013-03-26 17:50:09 +0100 | [diff] [blame] | 1028 | * Note that guest_offset may not be cluster aligned. In this case, the |
| 1029 | * returned *host_offset points to exact byte referenced by guest_offset and |
| 1030 | * therefore isn't cluster aligned as well. |
Kevin Wolf | 10f0ed8 | 2013-03-26 17:50:00 +0100 | [diff] [blame] | 1031 | * |
| 1032 | * Returns: |
| 1033 | * 0: if no clusters could be allocated. *bytes is set to 0, |
| 1034 | * *host_offset is left unchanged. |
| 1035 | * |
| 1036 | * 1: if new clusters were allocated. *bytes may be decreased if the |
| 1037 | * new allocation doesn't cover all of the requested area. |
| 1038 | * *host_offset is updated to contain the host offset of the first |
| 1039 | * newly allocated cluster. |
| 1040 | * |
| 1041 | * -errno: in error cases |
Kevin Wolf | 10f0ed8 | 2013-03-26 17:50:00 +0100 | [diff] [blame] | 1042 | */ |
| 1043 | static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, |
Kevin Wolf | c37f4cd | 2013-03-26 17:50:03 +0100 | [diff] [blame] | 1044 | uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m) |
Kevin Wolf | 10f0ed8 | 2013-03-26 17:50:00 +0100 | [diff] [blame] | 1045 | { |
| 1046 | BDRVQcowState *s = bs->opaque; |
| 1047 | int l2_index; |
| 1048 | uint64_t *l2_table; |
| 1049 | uint64_t entry; |
Kevin Wolf | f5bc635 | 2013-03-26 17:50:01 +0100 | [diff] [blame] | 1050 | unsigned int nb_clusters; |
Kevin Wolf | 10f0ed8 | 2013-03-26 17:50:00 +0100 | [diff] [blame] | 1051 | int ret; |
| 1052 | |
Kevin Wolf | 10f0ed8 | 2013-03-26 17:50:00 +0100 | [diff] [blame] | 1053 | uint64_t alloc_cluster_offset; |
Kevin Wolf | 10f0ed8 | 2013-03-26 17:50:00 +0100 | [diff] [blame] | 1054 | |
| 1055 | trace_qcow2_handle_alloc(qemu_coroutine_self(), guest_offset, *host_offset, |
| 1056 | *bytes); |
| 1057 | assert(*bytes > 0); |
| 1058 | |
Kevin Wolf | f5bc635 | 2013-03-26 17:50:01 +0100 | [diff] [blame] | 1059 | /* |
| 1060 | * Calculate the number of clusters to look for. We stop at L2 table |
| 1061 | * boundaries to keep things simple. |
| 1062 | */ |
Kevin Wolf | c37f4cd | 2013-03-26 17:50:03 +0100 | [diff] [blame] | 1063 | nb_clusters = |
| 1064 | size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes); |
| 1065 | |
Kevin Wolf | f5bc635 | 2013-03-26 17:50:01 +0100 | [diff] [blame] | 1066 | l2_index = offset_to_l2_index(s, guest_offset); |
Kevin Wolf | c37f4cd | 2013-03-26 17:50:03 +0100 | [diff] [blame] | 1067 | nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); |
Kevin Wolf | f5bc635 | 2013-03-26 17:50:01 +0100 | [diff] [blame] | 1068 | |
Kevin Wolf | 10f0ed8 | 2013-03-26 17:50:00 +0100 | [diff] [blame] | 1069 | /* Find L2 entry for the first involved cluster */ |
| 1070 | ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index); |
| 1071 | if (ret < 0) { |
| 1072 | return ret; |
| 1073 | } |
| 1074 | |
Kevin Wolf | 3b8e2e2 | 2013-03-26 17:50:02 +0100 | [diff] [blame] | 1075 | entry = be64_to_cpu(l2_table[l2_index]); |
Kevin Wolf | 10f0ed8 | 2013-03-26 17:50:00 +0100 | [diff] [blame] | 1076 | |
| 1077 | /* For the moment, overwrite compressed clusters one by one */ |
| 1078 | if (entry & QCOW_OFLAG_COMPRESSED) { |
| 1079 | nb_clusters = 1; |
| 1080 | } else { |
Kevin Wolf | 3b8e2e2 | 2013-03-26 17:50:02 +0100 | [diff] [blame] | 1081 | nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, l2_index); |
Kevin Wolf | 10f0ed8 | 2013-03-26 17:50:00 +0100 | [diff] [blame] | 1082 | } |
| 1083 | |
Kevin Wolf | ecdd533 | 2013-03-27 11:43:49 +0100 | [diff] [blame] | 1084 | /* This function is only called when there were no non-COW clusters, so if |
| 1085 | * we can't find any unallocated or COW clusters either, something is |
| 1086 | * wrong with our code. */ |
| 1087 | assert(nb_clusters > 0); |
| 1088 | |
Kevin Wolf | 10f0ed8 | 2013-03-26 17:50:00 +0100 | [diff] [blame] | 1089 | ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); |
| 1090 | if (ret < 0) { |
| 1091 | return ret; |
| 1092 | } |
| 1093 | |
Kevin Wolf | 10f0ed8 | 2013-03-26 17:50:00 +0100 | [diff] [blame] | 1094 | /* Allocate, if necessary at a given offset in the image file */ |
Kevin Wolf | 411d62b | 2013-03-26 17:50:09 +0100 | [diff] [blame] | 1095 | alloc_cluster_offset = start_of_cluster(s, *host_offset); |
Kevin Wolf | 83baa9a | 2013-03-26 17:50:04 +0100 | [diff] [blame] | 1096 | ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset, |
Kevin Wolf | 10f0ed8 | 2013-03-26 17:50:00 +0100 | [diff] [blame] | 1097 | &nb_clusters); |
| 1098 | if (ret < 0) { |
| 1099 | goto fail; |
| 1100 | } |
| 1101 | |
Kevin Wolf | 83baa9a | 2013-03-26 17:50:04 +0100 | [diff] [blame] | 1102 | /* Can't extend contiguous allocation */ |
| 1103 | if (nb_clusters == 0) { |
Kevin Wolf | 10f0ed8 | 2013-03-26 17:50:00 +0100 | [diff] [blame] | 1104 | *bytes = 0; |
| 1105 | return 0; |
| 1106 | } |
| 1107 | |
Kevin Wolf | 83baa9a | 2013-03-26 17:50:04 +0100 | [diff] [blame] | 1108 | /* |
| 1109 | * Save info needed for meta data update. |
| 1110 | * |
| 1111 | * requested_sectors: Number of sectors from the start of the first |
| 1112 | * newly allocated cluster to the end of the (possibly shortened |
| 1113 | * before) write request. |
| 1114 | * |
| 1115 | * avail_sectors: Number of sectors from the start of the first |
| 1116 | * newly allocated to the end of the last newly allocated cluster. |
| 1117 | * |
| 1118 | * nb_sectors: The number of sectors from the start of the first |
| 1119 | * newly allocated cluster to the end of the area that the write |
| 1120 | * request actually writes to (excluding COW at the end) |
| 1121 | */ |
| 1122 | int requested_sectors = |
| 1123 | (*bytes + offset_into_cluster(s, guest_offset)) |
| 1124 | >> BDRV_SECTOR_BITS; |
| 1125 | int avail_sectors = nb_clusters |
| 1126 | << (s->cluster_bits - BDRV_SECTOR_BITS); |
| 1127 | int alloc_n_start = offset_into_cluster(s, guest_offset) |
| 1128 | >> BDRV_SECTOR_BITS; |
| 1129 | int nb_sectors = MIN(requested_sectors, avail_sectors); |
Kevin Wolf | 88c6588 | 2013-03-26 17:50:11 +0100 | [diff] [blame] | 1130 | QCowL2Meta *old_m = *m; |
Kevin Wolf | 83baa9a | 2013-03-26 17:50:04 +0100 | [diff] [blame] | 1131 | |
Kevin Wolf | 83baa9a | 2013-03-26 17:50:04 +0100 | [diff] [blame] | 1132 | *m = g_malloc0(sizeof(**m)); |
| 1133 | |
| 1134 | **m = (QCowL2Meta) { |
Kevin Wolf | 88c6588 | 2013-03-26 17:50:11 +0100 | [diff] [blame] | 1135 | .next = old_m, |
| 1136 | |
Kevin Wolf | 411d62b | 2013-03-26 17:50:09 +0100 | [diff] [blame] | 1137 | .alloc_offset = alloc_cluster_offset, |
Kevin Wolf | 83baa9a | 2013-03-26 17:50:04 +0100 | [diff] [blame] | 1138 | .offset = start_of_cluster(s, guest_offset), |
| 1139 | .nb_clusters = nb_clusters, |
| 1140 | .nb_available = nb_sectors, |
| 1141 | |
| 1142 | .cow_start = { |
| 1143 | .offset = 0, |
| 1144 | .nb_sectors = alloc_n_start, |
| 1145 | }, |
| 1146 | .cow_end = { |
| 1147 | .offset = nb_sectors * BDRV_SECTOR_SIZE, |
| 1148 | .nb_sectors = avail_sectors - nb_sectors, |
| 1149 | }, |
| 1150 | }; |
| 1151 | qemu_co_queue_init(&(*m)->dependent_requests); |
| 1152 | QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight); |
| 1153 | |
Kevin Wolf | 411d62b | 2013-03-26 17:50:09 +0100 | [diff] [blame] | 1154 | *host_offset = alloc_cluster_offset + offset_into_cluster(s, guest_offset); |
Kevin Wolf | 83baa9a | 2013-03-26 17:50:04 +0100 | [diff] [blame] | 1155 | *bytes = MIN(*bytes, (nb_sectors * BDRV_SECTOR_SIZE) |
| 1156 | - offset_into_cluster(s, guest_offset)); |
| 1157 | assert(*bytes != 0); |
| 1158 | |
Kevin Wolf | 10f0ed8 | 2013-03-26 17:50:00 +0100 | [diff] [blame] | 1159 | return 1; |
| 1160 | |
| 1161 | fail: |
| 1162 | if (*m && (*m)->nb_clusters > 0) { |
| 1163 | QLIST_REMOVE(*m, next_in_flight); |
| 1164 | } |
| 1165 | return ret; |
| 1166 | } |
| 1167 | |
| 1168 | /* |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 1169 | * alloc_cluster_offset |
| 1170 | * |
Kevin Wolf | 250196f | 2012-03-02 14:10:54 +0100 | [diff] [blame] | 1171 | * For a given offset on the virtual disk, find the cluster offset in qcow2 |
| 1172 | * file. If the offset is not found, allocate a new cluster. |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 1173 | * |
Kevin Wolf | 250196f | 2012-03-02 14:10:54 +0100 | [diff] [blame] | 1174 | * If the cluster was already allocated, m->nb_clusters is set to 0 and |
Frediano Ziglio | a791236 | 2011-09-07 16:19:17 +0200 | [diff] [blame] | 1175 | * other fields in m are meaningless. |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 1176 | * |
Kevin Wolf | 148da7e | 2010-01-20 15:03:01 +0100 | [diff] [blame] | 1177 | * If the cluster is newly allocated, m->nb_clusters is set to the number of |
Kevin Wolf | 68d100e | 2011-06-30 17:42:09 +0200 | [diff] [blame] | 1178 | * contiguous clusters that have been allocated. In this case, the other |
| 1179 | * fields of m are valid and contain information about the first allocated |
| 1180 | * cluster. |
Kevin Wolf | 148da7e | 2010-01-20 15:03:01 +0100 | [diff] [blame] | 1181 | * |
Kevin Wolf | 68d100e | 2011-06-30 17:42:09 +0200 | [diff] [blame] | 1182 | * If the request conflicts with another write request in flight, the coroutine |
| 1183 | * is queued and will be reentered when the dependency has completed. |
Kevin Wolf | 148da7e | 2010-01-20 15:03:01 +0100 | [diff] [blame] | 1184 | * |
| 1185 | * Return 0 on success and -errno in error cases |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 1186 | */ |
Kevin Wolf | f4f0d39 | 2010-02-02 15:20:57 +0100 | [diff] [blame] | 1187 | int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, |
Hu Tao | 16f0587 | 2014-01-26 11:12:37 +0800 | [diff] [blame] | 1188 | int *num, uint64_t *host_offset, QCowL2Meta **m) |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 1189 | { |
| 1190 | BDRVQcowState *s = bs->opaque; |
Kevin Wolf | 710c249 | 2013-03-26 17:50:10 +0100 | [diff] [blame] | 1191 | uint64_t start, remaining; |
Kevin Wolf | 250196f | 2012-03-02 14:10:54 +0100 | [diff] [blame] | 1192 | uint64_t cluster_offset; |
Kevin Wolf | 65eb2e3 | 2013-03-26 17:49:58 +0100 | [diff] [blame] | 1193 | uint64_t cur_bytes; |
Kevin Wolf | 710c249 | 2013-03-26 17:50:10 +0100 | [diff] [blame] | 1194 | int ret; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 1195 | |
Hu Tao | 16f0587 | 2014-01-26 11:12:37 +0800 | [diff] [blame] | 1196 | trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, *num); |
Kevin Wolf | 3cce16f | 2012-03-01 18:36:21 +0100 | [diff] [blame] | 1197 | |
Hu Tao | 16f0587 | 2014-01-26 11:12:37 +0800 | [diff] [blame] | 1198 | assert((offset & ~BDRV_SECTOR_MASK) == 0); |
Kevin Wolf | 710c249 | 2013-03-26 17:50:10 +0100 | [diff] [blame] | 1199 | |
Kevin Wolf | 7242411 | 2012-04-24 16:10:56 +0200 | [diff] [blame] | 1200 | again: |
Hu Tao | 16f0587 | 2014-01-26 11:12:37 +0800 | [diff] [blame] | 1201 | start = offset; |
| 1202 | remaining = *num << BDRV_SECTOR_BITS; |
Kevin Wolf | 0af729e | 2013-03-26 17:50:05 +0100 | [diff] [blame] | 1203 | cluster_offset = 0; |
| 1204 | *host_offset = 0; |
Kevin Wolf | ecdd533 | 2013-03-27 11:43:49 +0100 | [diff] [blame] | 1205 | cur_bytes = 0; |
| 1206 | *m = NULL; |
Kevin Wolf | 0af729e | 2013-03-26 17:50:05 +0100 | [diff] [blame] | 1207 | |
Kevin Wolf | 2c3b32d | 2013-03-26 17:50:12 +0100 | [diff] [blame] | 1208 | while (true) { |
Kevin Wolf | ecdd533 | 2013-03-27 11:43:49 +0100 | [diff] [blame] | 1209 | |
| 1210 | if (!*host_offset) { |
| 1211 | *host_offset = start_of_cluster(s, cluster_offset); |
| 1212 | } |
| 1213 | |
| 1214 | assert(remaining >= cur_bytes); |
| 1215 | |
| 1216 | start += cur_bytes; |
| 1217 | remaining -= cur_bytes; |
| 1218 | cluster_offset += cur_bytes; |
| 1219 | |
| 1220 | if (remaining == 0) { |
| 1221 | break; |
| 1222 | } |
| 1223 | |
| 1224 | cur_bytes = remaining; |
| 1225 | |
Kevin Wolf | 2c3b32d | 2013-03-26 17:50:12 +0100 | [diff] [blame] | 1226 | /* |
| 1227 | * Now start gathering as many contiguous clusters as possible: |
| 1228 | * |
| 1229 | * 1. Check for overlaps with in-flight allocations |
| 1230 | * |
| 1231 | * a) Overlap not in the first cluster -> shorten this request and |
| 1232 | * let the caller handle the rest in its next loop iteration. |
| 1233 | * |
| 1234 | * b) Real overlaps of two requests. Yield and restart the search |
| 1235 | * for contiguous clusters (the situation could have changed |
| 1236 | * while we were sleeping) |
| 1237 | * |
| 1238 | * c) TODO: Request starts in the same cluster as the in-flight |
| 1239 | * allocation ends. Shorten the COW of the in-fight allocation, |
| 1240 | * set cluster_offset to write to the same cluster and set up |
| 1241 | * the right synchronisation between the in-flight request and |
| 1242 | * the new one. |
| 1243 | */ |
Kevin Wolf | ecdd533 | 2013-03-27 11:43:49 +0100 | [diff] [blame] | 1244 | ret = handle_dependencies(bs, start, &cur_bytes, m); |
Kevin Wolf | 2c3b32d | 2013-03-26 17:50:12 +0100 | [diff] [blame] | 1245 | if (ret == -EAGAIN) { |
Kevin Wolf | ecdd533 | 2013-03-27 11:43:49 +0100 | [diff] [blame] | 1246 | /* Currently handle_dependencies() doesn't yield if we already had |
| 1247 | * an allocation. If it did, we would have to clean up the L2Meta |
| 1248 | * structs before starting over. */ |
| 1249 | assert(*m == NULL); |
Kevin Wolf | 2c3b32d | 2013-03-26 17:50:12 +0100 | [diff] [blame] | 1250 | goto again; |
| 1251 | } else if (ret < 0) { |
| 1252 | return ret; |
Kevin Wolf | ecdd533 | 2013-03-27 11:43:49 +0100 | [diff] [blame] | 1253 | } else if (cur_bytes == 0) { |
| 1254 | break; |
Kevin Wolf | 2c3b32d | 2013-03-26 17:50:12 +0100 | [diff] [blame] | 1255 | } else { |
| 1256 | /* handle_dependencies() may have decreased cur_bytes (shortened |
| 1257 | * the allocations below) so that the next dependency is processed |
| 1258 | * correctly during the next loop iteration. */ |
Kevin Wolf | 710c249 | 2013-03-26 17:50:10 +0100 | [diff] [blame] | 1259 | } |
Kevin Wolf | 10f0ed8 | 2013-03-26 17:50:00 +0100 | [diff] [blame] | 1260 | |
Kevin Wolf | 2c3b32d | 2013-03-26 17:50:12 +0100 | [diff] [blame] | 1261 | /* |
| 1262 | * 2. Count contiguous COPIED clusters. |
| 1263 | */ |
| 1264 | ret = handle_copied(bs, start, &cluster_offset, &cur_bytes, m); |
| 1265 | if (ret < 0) { |
| 1266 | return ret; |
| 1267 | } else if (ret) { |
Kevin Wolf | ecdd533 | 2013-03-27 11:43:49 +0100 | [diff] [blame] | 1268 | continue; |
Kevin Wolf | 2c3b32d | 2013-03-26 17:50:12 +0100 | [diff] [blame] | 1269 | } else if (cur_bytes == 0) { |
| 1270 | break; |
| 1271 | } |
| 1272 | |
Kevin Wolf | 2c3b32d | 2013-03-26 17:50:12 +0100 | [diff] [blame] | 1273 | /* |
| 1274 | * 3. If the request still hasn't completed, allocate new clusters, |
| 1275 | * considering any cluster_offset of steps 1c or 2. |
| 1276 | */ |
| 1277 | ret = handle_alloc(bs, start, &cluster_offset, &cur_bytes, m); |
| 1278 | if (ret < 0) { |
| 1279 | return ret; |
| 1280 | } else if (ret) { |
Kevin Wolf | ecdd533 | 2013-03-27 11:43:49 +0100 | [diff] [blame] | 1281 | continue; |
Kevin Wolf | 2c3b32d | 2013-03-26 17:50:12 +0100 | [diff] [blame] | 1282 | } else { |
| 1283 | assert(cur_bytes == 0); |
| 1284 | break; |
| 1285 | } |
Kevin Wolf | f5bc635 | 2013-03-26 17:50:01 +0100 | [diff] [blame] | 1286 | } |
Kevin Wolf | 10f0ed8 | 2013-03-26 17:50:00 +0100 | [diff] [blame] | 1287 | |
Hu Tao | 16f0587 | 2014-01-26 11:12:37 +0800 | [diff] [blame] | 1288 | *num -= remaining >> BDRV_SECTOR_BITS; |
Kevin Wolf | 710c249 | 2013-03-26 17:50:10 +0100 | [diff] [blame] | 1289 | assert(*num > 0); |
| 1290 | assert(*host_offset != 0); |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 1291 | |
Kevin Wolf | 148da7e | 2010-01-20 15:03:01 +0100 | [diff] [blame] | 1292 | return 0; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 1293 | } |
| 1294 | |
| 1295 | static int decompress_buffer(uint8_t *out_buf, int out_buf_size, |
| 1296 | const uint8_t *buf, int buf_size) |
| 1297 | { |
| 1298 | z_stream strm1, *strm = &strm1; |
| 1299 | int ret, out_len; |
| 1300 | |
| 1301 | memset(strm, 0, sizeof(*strm)); |
| 1302 | |
| 1303 | strm->next_in = (uint8_t *)buf; |
| 1304 | strm->avail_in = buf_size; |
| 1305 | strm->next_out = out_buf; |
| 1306 | strm->avail_out = out_buf_size; |
| 1307 | |
| 1308 | ret = inflateInit2(strm, -12); |
| 1309 | if (ret != Z_OK) |
| 1310 | return -1; |
| 1311 | ret = inflate(strm, Z_FINISH); |
| 1312 | out_len = strm->next_out - out_buf; |
| 1313 | if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || |
| 1314 | out_len != out_buf_size) { |
| 1315 | inflateEnd(strm); |
| 1316 | return -1; |
| 1317 | } |
| 1318 | inflateEnd(strm); |
| 1319 | return 0; |
| 1320 | } |
| 1321 | |
Kevin Wolf | 66f82ce | 2010-04-14 14:17:38 +0200 | [diff] [blame] | 1322 | int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset) |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 1323 | { |
Kevin Wolf | 66f82ce | 2010-04-14 14:17:38 +0200 | [diff] [blame] | 1324 | BDRVQcowState *s = bs->opaque; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 1325 | int ret, csize, nb_csectors, sector_offset; |
| 1326 | uint64_t coffset; |
| 1327 | |
| 1328 | coffset = cluster_offset & s->cluster_offset_mask; |
| 1329 | if (s->cluster_cache_offset != coffset) { |
| 1330 | nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1; |
| 1331 | sector_offset = coffset & 511; |
| 1332 | csize = nb_csectors * 512 - sector_offset; |
Kevin Wolf | 66f82ce | 2010-04-14 14:17:38 +0200 | [diff] [blame] | 1333 | BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED); |
| 1334 | ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data, nb_csectors); |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 1335 | if (ret < 0) { |
Kevin Wolf | 8af3648 | 2011-02-09 10:26:06 +0100 | [diff] [blame] | 1336 | return ret; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 1337 | } |
| 1338 | if (decompress_buffer(s->cluster_cache, s->cluster_size, |
| 1339 | s->cluster_data + sector_offset, csize) < 0) { |
Kevin Wolf | 8af3648 | 2011-02-09 10:26:06 +0100 | [diff] [blame] | 1340 | return -EIO; |
Kevin Wolf | 45aba42 | 2009-05-28 16:07:05 +0200 | [diff] [blame] | 1341 | } |
| 1342 | s->cluster_cache_offset = coffset; |
| 1343 | } |
| 1344 | return 0; |
| 1345 | } |
Kevin Wolf | 5ea929e | 2011-01-26 16:56:48 +0100 | [diff] [blame] | 1346 | |
| 1347 | /* |
| 1348 | * This discards as many clusters of nb_clusters as possible at once (i.e. |
| 1349 | * all clusters in the same L2 table) and returns the number of discarded |
| 1350 | * clusters. |
| 1351 | */ |
| 1352 | static int discard_single_l2(BlockDriverState *bs, uint64_t offset, |
Kevin Wolf | 670df5e | 2013-09-06 12:18:47 +0200 | [diff] [blame] | 1353 | unsigned int nb_clusters, enum qcow2_discard_type type) |
Kevin Wolf | 5ea929e | 2011-01-26 16:56:48 +0100 | [diff] [blame] | 1354 | { |
| 1355 | BDRVQcowState *s = bs->opaque; |
Kevin Wolf | 3948d1d | 2012-03-12 17:46:51 +0100 | [diff] [blame] | 1356 | uint64_t *l2_table; |
Kevin Wolf | 5ea929e | 2011-01-26 16:56:48 +0100 | [diff] [blame] | 1357 | int l2_index; |
| 1358 | int ret; |
| 1359 | int i; |
| 1360 | |
Kevin Wolf | 3948d1d | 2012-03-12 17:46:51 +0100 | [diff] [blame] | 1361 | ret = get_cluster_table(bs, offset, &l2_table, &l2_index); |
Kevin Wolf | 5ea929e | 2011-01-26 16:56:48 +0100 | [diff] [blame] | 1362 | if (ret < 0) { |
| 1363 | return ret; |
| 1364 | } |
| 1365 | |
| 1366 | /* Limit nb_clusters to one L2 table */ |
| 1367 | nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); |
| 1368 | |
| 1369 | for (i = 0; i < nb_clusters; i++) { |
Max Reitz | c883db0 | 2014-04-29 16:12:30 +0200 | [diff] [blame] | 1370 | uint64_t old_l2_entry; |
Kevin Wolf | 5ea929e | 2011-01-26 16:56:48 +0100 | [diff] [blame] | 1371 | |
Max Reitz | c883db0 | 2014-04-29 16:12:30 +0200 | [diff] [blame] | 1372 | old_l2_entry = be64_to_cpu(l2_table[l2_index + i]); |
Kevin Wolf | a71835a | 2014-02-08 14:38:33 +0100 | [diff] [blame] | 1373 | |
| 1374 | /* |
| 1375 | * Make sure that a discarded area reads back as zeroes for v3 images |
| 1376 | * (we cannot do it for v2 without actually writing a zero-filled |
| 1377 | * buffer). We can skip the operation if the cluster is already marked |
| 1378 | * as zero, or if it's unallocated and we don't have a backing file. |
| 1379 | * |
| 1380 | * TODO We might want to use bdrv_get_block_status(bs) here, but we're |
| 1381 | * holding s->lock, so that doesn't work today. |
| 1382 | */ |
Max Reitz | c883db0 | 2014-04-29 16:12:30 +0200 | [diff] [blame] | 1383 | switch (qcow2_get_cluster_type(old_l2_entry)) { |
| 1384 | case QCOW2_CLUSTER_UNALLOCATED: |
| 1385 | if (!bs->backing_hd) { |
| 1386 | continue; |
| 1387 | } |
| 1388 | break; |
Kevin Wolf | a71835a | 2014-02-08 14:38:33 +0100 | [diff] [blame] | 1389 | |
Max Reitz | c883db0 | 2014-04-29 16:12:30 +0200 | [diff] [blame] | 1390 | case QCOW2_CLUSTER_ZERO: |
| 1391 | continue; |
| 1392 | |
| 1393 | case QCOW2_CLUSTER_NORMAL: |
| 1394 | case QCOW2_CLUSTER_COMPRESSED: |
| 1395 | break; |
| 1396 | |
| 1397 | default: |
| 1398 | abort(); |
Kevin Wolf | 5ea929e | 2011-01-26 16:56:48 +0100 | [diff] [blame] | 1399 | } |
| 1400 | |
| 1401 | /* First remove L2 entries */ |
| 1402 | qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table); |
Kevin Wolf | a71835a | 2014-02-08 14:38:33 +0100 | [diff] [blame] | 1403 | if (s->qcow_version >= 3) { |
| 1404 | l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); |
| 1405 | } else { |
| 1406 | l2_table[l2_index + i] = cpu_to_be64(0); |
| 1407 | } |
Kevin Wolf | 5ea929e | 2011-01-26 16:56:48 +0100 | [diff] [blame] | 1408 | |
| 1409 | /* Then decrease the refcount */ |
Max Reitz | c883db0 | 2014-04-29 16:12:30 +0200 | [diff] [blame] | 1410 | qcow2_free_any_clusters(bs, old_l2_entry, 1, type); |
Kevin Wolf | 5ea929e | 2011-01-26 16:56:48 +0100 | [diff] [blame] | 1411 | } |
| 1412 | |
| 1413 | ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); |
| 1414 | if (ret < 0) { |
| 1415 | return ret; |
| 1416 | } |
| 1417 | |
| 1418 | return nb_clusters; |
| 1419 | } |
| 1420 | |
| 1421 | int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset, |
Kevin Wolf | 670df5e | 2013-09-06 12:18:47 +0200 | [diff] [blame] | 1422 | int nb_sectors, enum qcow2_discard_type type) |
Kevin Wolf | 5ea929e | 2011-01-26 16:56:48 +0100 | [diff] [blame] | 1423 | { |
| 1424 | BDRVQcowState *s = bs->opaque; |
| 1425 | uint64_t end_offset; |
| 1426 | unsigned int nb_clusters; |
| 1427 | int ret; |
| 1428 | |
| 1429 | end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS); |
| 1430 | |
| 1431 | /* Round start up and end down */ |
| 1432 | offset = align_offset(offset, s->cluster_size); |
Hu Tao | ac95acd | 2013-12-05 14:32:34 +0800 | [diff] [blame] | 1433 | end_offset = start_of_cluster(s, end_offset); |
Kevin Wolf | 5ea929e | 2011-01-26 16:56:48 +0100 | [diff] [blame] | 1434 | |
| 1435 | if (offset > end_offset) { |
| 1436 | return 0; |
| 1437 | } |
| 1438 | |
| 1439 | nb_clusters = size_to_clusters(s, end_offset - offset); |
| 1440 | |
Kevin Wolf | 0b919fa | 2013-06-19 13:44:20 +0200 | [diff] [blame] | 1441 | s->cache_discards = true; |
| 1442 | |
Kevin Wolf | 5ea929e | 2011-01-26 16:56:48 +0100 | [diff] [blame] | 1443 | /* Each L2 table is handled by its own loop iteration */ |
| 1444 | while (nb_clusters > 0) { |
Kevin Wolf | 670df5e | 2013-09-06 12:18:47 +0200 | [diff] [blame] | 1445 | ret = discard_single_l2(bs, offset, nb_clusters, type); |
Kevin Wolf | 5ea929e | 2011-01-26 16:56:48 +0100 | [diff] [blame] | 1446 | if (ret < 0) { |
Kevin Wolf | 0b919fa | 2013-06-19 13:44:20 +0200 | [diff] [blame] | 1447 | goto fail; |
Kevin Wolf | 5ea929e | 2011-01-26 16:56:48 +0100 | [diff] [blame] | 1448 | } |
| 1449 | |
| 1450 | nb_clusters -= ret; |
| 1451 | offset += (ret * s->cluster_size); |
| 1452 | } |
| 1453 | |
Kevin Wolf | 0b919fa | 2013-06-19 13:44:20 +0200 | [diff] [blame] | 1454 | ret = 0; |
| 1455 | fail: |
| 1456 | s->cache_discards = false; |
| 1457 | qcow2_process_discards(bs, ret); |
| 1458 | |
| 1459 | return ret; |
Kevin Wolf | 5ea929e | 2011-01-26 16:56:48 +0100 | [diff] [blame] | 1460 | } |
Kevin Wolf | 621f058 | 2012-03-20 15:12:58 +0100 | [diff] [blame] | 1461 | |
| 1462 | /* |
| 1463 | * This zeroes as many clusters of nb_clusters as possible at once (i.e. |
| 1464 | * all clusters in the same L2 table) and returns the number of zeroed |
| 1465 | * clusters. |
| 1466 | */ |
| 1467 | static int zero_single_l2(BlockDriverState *bs, uint64_t offset, |
| 1468 | unsigned int nb_clusters) |
| 1469 | { |
| 1470 | BDRVQcowState *s = bs->opaque; |
| 1471 | uint64_t *l2_table; |
| 1472 | int l2_index; |
| 1473 | int ret; |
| 1474 | int i; |
| 1475 | |
| 1476 | ret = get_cluster_table(bs, offset, &l2_table, &l2_index); |
| 1477 | if (ret < 0) { |
| 1478 | return ret; |
| 1479 | } |
| 1480 | |
| 1481 | /* Limit nb_clusters to one L2 table */ |
| 1482 | nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); |
| 1483 | |
| 1484 | for (i = 0; i < nb_clusters; i++) { |
| 1485 | uint64_t old_offset; |
| 1486 | |
| 1487 | old_offset = be64_to_cpu(l2_table[l2_index + i]); |
| 1488 | |
| 1489 | /* Update L2 entries */ |
| 1490 | qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table); |
| 1491 | if (old_offset & QCOW_OFLAG_COMPRESSED) { |
| 1492 | l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); |
Kevin Wolf | 6cfcb9b | 2013-06-19 13:44:18 +0200 | [diff] [blame] | 1493 | qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST); |
Kevin Wolf | 621f058 | 2012-03-20 15:12:58 +0100 | [diff] [blame] | 1494 | } else { |
| 1495 | l2_table[l2_index + i] |= cpu_to_be64(QCOW_OFLAG_ZERO); |
| 1496 | } |
| 1497 | } |
| 1498 | |
| 1499 | ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); |
| 1500 | if (ret < 0) { |
| 1501 | return ret; |
| 1502 | } |
| 1503 | |
| 1504 | return nb_clusters; |
| 1505 | } |
| 1506 | |
| 1507 | int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors) |
| 1508 | { |
| 1509 | BDRVQcowState *s = bs->opaque; |
| 1510 | unsigned int nb_clusters; |
| 1511 | int ret; |
| 1512 | |
| 1513 | /* The zero flag is only supported by version 3 and newer */ |
| 1514 | if (s->qcow_version < 3) { |
| 1515 | return -ENOTSUP; |
| 1516 | } |
| 1517 | |
| 1518 | /* Each L2 table is handled by its own loop iteration */ |
| 1519 | nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS); |
| 1520 | |
Kevin Wolf | 0b919fa | 2013-06-19 13:44:20 +0200 | [diff] [blame] | 1521 | s->cache_discards = true; |
| 1522 | |
Kevin Wolf | 621f058 | 2012-03-20 15:12:58 +0100 | [diff] [blame] | 1523 | while (nb_clusters > 0) { |
| 1524 | ret = zero_single_l2(bs, offset, nb_clusters); |
| 1525 | if (ret < 0) { |
Kevin Wolf | 0b919fa | 2013-06-19 13:44:20 +0200 | [diff] [blame] | 1526 | goto fail; |
Kevin Wolf | 621f058 | 2012-03-20 15:12:58 +0100 | [diff] [blame] | 1527 | } |
| 1528 | |
| 1529 | nb_clusters -= ret; |
| 1530 | offset += (ret * s->cluster_size); |
| 1531 | } |
| 1532 | |
Kevin Wolf | 0b919fa | 2013-06-19 13:44:20 +0200 | [diff] [blame] | 1533 | ret = 0; |
| 1534 | fail: |
| 1535 | s->cache_discards = false; |
| 1536 | qcow2_process_discards(bs, ret); |
| 1537 | |
| 1538 | return ret; |
Kevin Wolf | 621f058 | 2012-03-20 15:12:58 +0100 | [diff] [blame] | 1539 | } |
Max Reitz | 32b6444 | 2013-09-03 10:09:52 +0200 | [diff] [blame] | 1540 | |
| 1541 | /* |
| 1542 | * Expands all zero clusters in a specific L1 table (or deallocates them, for |
| 1543 | * non-backed non-pre-allocated zero clusters). |
| 1544 | * |
| 1545 | * expanded_clusters is a bitmap where every bit corresponds to one cluster in |
| 1546 | * the image file; a bit gets set if the corresponding cluster has been used for |
| 1547 | * zero expansion (i.e., has been filled with zeroes and is referenced from an |
| 1548 | * L2 table). nb_clusters contains the total cluster count of the image file, |
| 1549 | * i.e., the number of bits in expanded_clusters. |
| 1550 | */ |
| 1551 | static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, |
Max Reitz | e390cf5 | 2013-09-25 12:07:22 +0200 | [diff] [blame] | 1552 | int l1_size, uint8_t **expanded_clusters, |
| 1553 | uint64_t *nb_clusters) |
Max Reitz | 32b6444 | 2013-09-03 10:09:52 +0200 | [diff] [blame] | 1554 | { |
| 1555 | BDRVQcowState *s = bs->opaque; |
| 1556 | bool is_active_l1 = (l1_table == s->l1_table); |
| 1557 | uint64_t *l2_table = NULL; |
| 1558 | int ret; |
| 1559 | int i, j; |
| 1560 | |
| 1561 | if (!is_active_l1) { |
| 1562 | /* inactive L2 tables require a buffer to be stored in when loading |
| 1563 | * them from disk */ |
| 1564 | l2_table = qemu_blockalign(bs, s->cluster_size); |
| 1565 | } |
| 1566 | |
| 1567 | for (i = 0; i < l1_size; i++) { |
| 1568 | uint64_t l2_offset = l1_table[i] & L1E_OFFSET_MASK; |
| 1569 | bool l2_dirty = false; |
| 1570 | |
| 1571 | if (!l2_offset) { |
| 1572 | /* unallocated */ |
| 1573 | continue; |
| 1574 | } |
| 1575 | |
| 1576 | if (is_active_l1) { |
| 1577 | /* get active L2 tables from cache */ |
| 1578 | ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, |
| 1579 | (void **)&l2_table); |
| 1580 | } else { |
| 1581 | /* load inactive L2 tables from disk */ |
| 1582 | ret = bdrv_read(bs->file, l2_offset / BDRV_SECTOR_SIZE, |
| 1583 | (void *)l2_table, s->cluster_sectors); |
| 1584 | } |
| 1585 | if (ret < 0) { |
| 1586 | goto fail; |
| 1587 | } |
| 1588 | |
| 1589 | for (j = 0; j < s->l2_size; j++) { |
| 1590 | uint64_t l2_entry = be64_to_cpu(l2_table[j]); |
| 1591 | int64_t offset = l2_entry & L2E_OFFSET_MASK, cluster_index; |
| 1592 | int cluster_type = qcow2_get_cluster_type(l2_entry); |
Max Reitz | 320c706 | 2013-09-27 10:21:48 +0200 | [diff] [blame] | 1593 | bool preallocated = offset != 0; |
Max Reitz | 32b6444 | 2013-09-03 10:09:52 +0200 | [diff] [blame] | 1594 | |
| 1595 | if (cluster_type == QCOW2_CLUSTER_NORMAL) { |
| 1596 | cluster_index = offset >> s->cluster_bits; |
Max Reitz | e390cf5 | 2013-09-25 12:07:22 +0200 | [diff] [blame] | 1597 | assert((cluster_index >= 0) && (cluster_index < *nb_clusters)); |
| 1598 | if ((*expanded_clusters)[cluster_index / 8] & |
Max Reitz | 32b6444 | 2013-09-03 10:09:52 +0200 | [diff] [blame] | 1599 | (1 << (cluster_index % 8))) { |
| 1600 | /* Probably a shared L2 table; this cluster was a zero |
| 1601 | * cluster which has been expanded, its refcount |
| 1602 | * therefore most likely requires an update. */ |
| 1603 | ret = qcow2_update_cluster_refcount(bs, cluster_index, 1, |
| 1604 | QCOW2_DISCARD_NEVER); |
| 1605 | if (ret < 0) { |
| 1606 | goto fail; |
| 1607 | } |
| 1608 | /* Since we just increased the refcount, the COPIED flag may |
| 1609 | * no longer be set. */ |
| 1610 | l2_table[j] = cpu_to_be64(l2_entry & ~QCOW_OFLAG_COPIED); |
| 1611 | l2_dirty = true; |
| 1612 | } |
| 1613 | continue; |
| 1614 | } |
| 1615 | else if (qcow2_get_cluster_type(l2_entry) != QCOW2_CLUSTER_ZERO) { |
| 1616 | continue; |
| 1617 | } |
| 1618 | |
Max Reitz | 320c706 | 2013-09-27 10:21:48 +0200 | [diff] [blame] | 1619 | if (!preallocated) { |
Max Reitz | 32b6444 | 2013-09-03 10:09:52 +0200 | [diff] [blame] | 1620 | if (!bs->backing_hd) { |
| 1621 | /* not backed; therefore we can simply deallocate the |
| 1622 | * cluster */ |
| 1623 | l2_table[j] = 0; |
| 1624 | l2_dirty = true; |
| 1625 | continue; |
| 1626 | } |
| 1627 | |
| 1628 | offset = qcow2_alloc_clusters(bs, s->cluster_size); |
| 1629 | if (offset < 0) { |
| 1630 | ret = offset; |
| 1631 | goto fail; |
| 1632 | } |
| 1633 | } |
| 1634 | |
Max Reitz | 231bb26 | 2013-10-10 11:09:23 +0200 | [diff] [blame] | 1635 | ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size); |
Max Reitz | 32b6444 | 2013-09-03 10:09:52 +0200 | [diff] [blame] | 1636 | if (ret < 0) { |
Max Reitz | 320c706 | 2013-09-27 10:21:48 +0200 | [diff] [blame] | 1637 | if (!preallocated) { |
| 1638 | qcow2_free_clusters(bs, offset, s->cluster_size, |
| 1639 | QCOW2_DISCARD_ALWAYS); |
| 1640 | } |
Max Reitz | 32b6444 | 2013-09-03 10:09:52 +0200 | [diff] [blame] | 1641 | goto fail; |
| 1642 | } |
| 1643 | |
| 1644 | ret = bdrv_write_zeroes(bs->file, offset / BDRV_SECTOR_SIZE, |
Peter Lieven | aa7bfbf | 2013-10-24 12:06:51 +0200 | [diff] [blame] | 1645 | s->cluster_sectors, 0); |
Max Reitz | 32b6444 | 2013-09-03 10:09:52 +0200 | [diff] [blame] | 1646 | if (ret < 0) { |
Max Reitz | 320c706 | 2013-09-27 10:21:48 +0200 | [diff] [blame] | 1647 | if (!preallocated) { |
| 1648 | qcow2_free_clusters(bs, offset, s->cluster_size, |
| 1649 | QCOW2_DISCARD_ALWAYS); |
| 1650 | } |
Max Reitz | 32b6444 | 2013-09-03 10:09:52 +0200 | [diff] [blame] | 1651 | goto fail; |
| 1652 | } |
| 1653 | |
| 1654 | l2_table[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED); |
| 1655 | l2_dirty = true; |
| 1656 | |
| 1657 | cluster_index = offset >> s->cluster_bits; |
Max Reitz | e390cf5 | 2013-09-25 12:07:22 +0200 | [diff] [blame] | 1658 | |
| 1659 | if (cluster_index >= *nb_clusters) { |
| 1660 | uint64_t old_bitmap_size = (*nb_clusters + 7) / 8; |
| 1661 | uint64_t new_bitmap_size; |
| 1662 | /* The offset may lie beyond the old end of the underlying image |
| 1663 | * file for growable files only */ |
| 1664 | assert(bs->file->growable); |
| 1665 | *nb_clusters = size_to_clusters(s, bs->file->total_sectors * |
| 1666 | BDRV_SECTOR_SIZE); |
| 1667 | new_bitmap_size = (*nb_clusters + 7) / 8; |
| 1668 | *expanded_clusters = g_realloc(*expanded_clusters, |
| 1669 | new_bitmap_size); |
| 1670 | /* clear the newly allocated space */ |
| 1671 | memset(&(*expanded_clusters)[old_bitmap_size], 0, |
| 1672 | new_bitmap_size - old_bitmap_size); |
| 1673 | } |
| 1674 | |
| 1675 | assert((cluster_index >= 0) && (cluster_index < *nb_clusters)); |
| 1676 | (*expanded_clusters)[cluster_index / 8] |= 1 << (cluster_index % 8); |
Max Reitz | 32b6444 | 2013-09-03 10:09:52 +0200 | [diff] [blame] | 1677 | } |
| 1678 | |
| 1679 | if (is_active_l1) { |
| 1680 | if (l2_dirty) { |
| 1681 | qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table); |
| 1682 | qcow2_cache_depends_on_flush(s->l2_table_cache); |
| 1683 | } |
| 1684 | ret = qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table); |
| 1685 | if (ret < 0) { |
| 1686 | l2_table = NULL; |
| 1687 | goto fail; |
| 1688 | } |
| 1689 | } else { |
| 1690 | if (l2_dirty) { |
Max Reitz | 231bb26 | 2013-10-10 11:09:23 +0200 | [diff] [blame] | 1691 | ret = qcow2_pre_write_overlap_check(bs, |
| 1692 | QCOW2_OL_INACTIVE_L2 | QCOW2_OL_ACTIVE_L2, l2_offset, |
Max Reitz | 32b6444 | 2013-09-03 10:09:52 +0200 | [diff] [blame] | 1693 | s->cluster_size); |
| 1694 | if (ret < 0) { |
| 1695 | goto fail; |
| 1696 | } |
| 1697 | |
| 1698 | ret = bdrv_write(bs->file, l2_offset / BDRV_SECTOR_SIZE, |
| 1699 | (void *)l2_table, s->cluster_sectors); |
| 1700 | if (ret < 0) { |
| 1701 | goto fail; |
| 1702 | } |
| 1703 | } |
| 1704 | } |
| 1705 | } |
| 1706 | |
| 1707 | ret = 0; |
| 1708 | |
| 1709 | fail: |
| 1710 | if (l2_table) { |
| 1711 | if (!is_active_l1) { |
| 1712 | qemu_vfree(l2_table); |
| 1713 | } else { |
| 1714 | if (ret < 0) { |
| 1715 | qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table); |
| 1716 | } else { |
| 1717 | ret = qcow2_cache_put(bs, s->l2_table_cache, |
| 1718 | (void **)&l2_table); |
| 1719 | } |
| 1720 | } |
| 1721 | } |
| 1722 | return ret; |
| 1723 | } |
| 1724 | |
| 1725 | /* |
| 1726 | * For backed images, expands all zero clusters on the image. For non-backed |
| 1727 | * images, deallocates all non-pre-allocated zero clusters (and claims the |
| 1728 | * allocation for pre-allocated ones). This is important for downgrading to a |
| 1729 | * qcow2 version which doesn't yet support metadata zero clusters. |
| 1730 | */ |
| 1731 | int qcow2_expand_zero_clusters(BlockDriverState *bs) |
| 1732 | { |
| 1733 | BDRVQcowState *s = bs->opaque; |
| 1734 | uint64_t *l1_table = NULL; |
Max Reitz | 32b6444 | 2013-09-03 10:09:52 +0200 | [diff] [blame] | 1735 | uint64_t nb_clusters; |
| 1736 | uint8_t *expanded_clusters; |
| 1737 | int ret; |
| 1738 | int i, j; |
| 1739 | |
Max Reitz | e390cf5 | 2013-09-25 12:07:22 +0200 | [diff] [blame] | 1740 | nb_clusters = size_to_clusters(s, bs->file->total_sectors * |
| 1741 | BDRV_SECTOR_SIZE); |
Max Reitz | 32b6444 | 2013-09-03 10:09:52 +0200 | [diff] [blame] | 1742 | expanded_clusters = g_malloc0((nb_clusters + 7) / 8); |
| 1743 | |
| 1744 | ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size, |
Max Reitz | e390cf5 | 2013-09-25 12:07:22 +0200 | [diff] [blame] | 1745 | &expanded_clusters, &nb_clusters); |
Max Reitz | 32b6444 | 2013-09-03 10:09:52 +0200 | [diff] [blame] | 1746 | if (ret < 0) { |
| 1747 | goto fail; |
| 1748 | } |
| 1749 | |
| 1750 | /* Inactive L1 tables may point to active L2 tables - therefore it is |
| 1751 | * necessary to flush the L2 table cache before trying to access the L2 |
| 1752 | * tables pointed to by inactive L1 entries (else we might try to expand |
| 1753 | * zero clusters that have already been expanded); furthermore, it is also |
| 1754 | * necessary to empty the L2 table cache, since it may contain tables which |
| 1755 | * are now going to be modified directly on disk, bypassing the cache. |
| 1756 | * qcow2_cache_empty() does both for us. */ |
| 1757 | ret = qcow2_cache_empty(bs, s->l2_table_cache); |
| 1758 | if (ret < 0) { |
| 1759 | goto fail; |
| 1760 | } |
| 1761 | |
| 1762 | for (i = 0; i < s->nb_snapshots; i++) { |
| 1763 | int l1_sectors = (s->snapshots[i].l1_size * sizeof(uint64_t) + |
| 1764 | BDRV_SECTOR_SIZE - 1) / BDRV_SECTOR_SIZE; |
| 1765 | |
| 1766 | l1_table = g_realloc(l1_table, l1_sectors * BDRV_SECTOR_SIZE); |
| 1767 | |
| 1768 | ret = bdrv_read(bs->file, s->snapshots[i].l1_table_offset / |
| 1769 | BDRV_SECTOR_SIZE, (void *)l1_table, l1_sectors); |
| 1770 | if (ret < 0) { |
| 1771 | goto fail; |
| 1772 | } |
| 1773 | |
| 1774 | for (j = 0; j < s->snapshots[i].l1_size; j++) { |
| 1775 | be64_to_cpus(&l1_table[j]); |
| 1776 | } |
| 1777 | |
| 1778 | ret = expand_zero_clusters_in_l1(bs, l1_table, s->snapshots[i].l1_size, |
Max Reitz | e390cf5 | 2013-09-25 12:07:22 +0200 | [diff] [blame] | 1779 | &expanded_clusters, &nb_clusters); |
Max Reitz | 32b6444 | 2013-09-03 10:09:52 +0200 | [diff] [blame] | 1780 | if (ret < 0) { |
| 1781 | goto fail; |
| 1782 | } |
| 1783 | } |
| 1784 | |
| 1785 | ret = 0; |
| 1786 | |
| 1787 | fail: |
| 1788 | g_free(expanded_clusters); |
| 1789 | g_free(l1_table); |
| 1790 | return ret; |
| 1791 | } |