blob: f2f8ae77c10af9755997f6098ec7ebcf8e9c6de6 [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010027#include "block/block_int.h"
28#include "block/blockjob.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010029#include "qemu/module.h"
Paolo Bonzini7b1b5d12012-12-17 18:19:43 +010030#include "qapi/qmp/qjson.h"
Markus Armbrusterbfb197e2014-10-07 13:59:11 +020031#include "sysemu/block-backend.h"
Paolo Bonzini9c17d612012-12-17 18:20:04 +010032#include "sysemu/sysemu.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010033#include "qemu/notify.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010034#include "block/coroutine.h"
Benoît Canetc13163f2014-01-23 21:31:34 +010035#include "block/qapi.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030036#include "qmp-commands.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010037#include "qemu/timer.h"
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +020038#include "qapi-event.h"
bellardfc01f7e2003-06-30 10:03:06 +000039
Juan Quintela71e72a12009-07-27 16:12:56 +020040#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000041#include <sys/types.h>
42#include <sys/stat.h>
43#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000044#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000045#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000046#include <sys/disk.h>
47#endif
blueswir1c5e97232009-03-07 20:06:23 +000048#endif
bellard7674e7b2005-04-26 21:59:26 +000049
aliguori49dc7682009-03-08 16:26:59 +000050#ifdef _WIN32
51#include <windows.h>
52#endif
53
Fam Zhenge4654d22013-11-13 18:29:43 +080054struct BdrvDirtyBitmap {
55 HBitmap *bitmap;
56 QLIST_ENTRY(BdrvDirtyBitmap) list;
57};
58
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010059#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
60
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020061static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +000062 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +020063 BlockCompletionFunc *cb, void *opaque);
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020064static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +000065 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +020066 BlockCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020067static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
68 int64_t sector_num, int nb_sectors,
69 QEMUIOVector *iov);
70static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
71 int64_t sector_num, int nb_sectors,
72 QEMUIOVector *iov);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010073static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
74 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000075 BdrvRequestFlags flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010076static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
77 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000078 BdrvRequestFlags flags);
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020079static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
80 int64_t sector_num,
81 QEMUIOVector *qiov,
82 int nb_sectors,
83 BdrvRequestFlags flags,
Markus Armbruster097310b2014-10-07 13:59:15 +020084 BlockCompletionFunc *cb,
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020085 void *opaque,
86 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010087static void coroutine_fn bdrv_co_do_rw(void *opaque);
Kevin Wolf621f0582012-03-20 15:12:58 +010088static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +020089 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
bellardec530c82006-04-25 22:36:06 +000090
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010091static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
92 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000093
Benoît Canetdc364f42014-01-23 21:31:32 +010094static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
95 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
96
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +010097static QLIST_HEAD(, BlockDriver) bdrv_drivers =
98 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +000099
Vladimir Sementsov-Ogievskiyc4237df2014-11-27 12:40:46 +0300100static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
101 int nr_sectors);
102static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
103 int nr_sectors);
Markus Armbrustereb852012009-10-27 18:41:44 +0100104/* If non-zero, use only whitelisted block drivers */
105static int use_bdrv_whitelist;
106
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000107#ifdef _WIN32
108static int is_windows_drive_prefix(const char *filename)
109{
110 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
111 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
112 filename[1] == ':');
113}
114
115int is_windows_drive(const char *filename)
116{
117 if (is_windows_drive_prefix(filename) &&
118 filename[2] == '\0')
119 return 1;
120 if (strstart(filename, "\\\\.\\", NULL) ||
121 strstart(filename, "//./", NULL))
122 return 1;
123 return 0;
124}
125#endif
126
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800127/* throttling disk I/O limits */
Benoît Canetcc0681c2013-09-02 14:14:39 +0200128void bdrv_set_io_limits(BlockDriverState *bs,
129 ThrottleConfig *cfg)
130{
131 int i;
132
133 throttle_config(&bs->throttle_state, cfg);
134
135 for (i = 0; i < 2; i++) {
136 qemu_co_enter_next(&bs->throttled_reqs[i]);
137 }
138}
139
140/* this function drain all the throttled IOs */
141static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
142{
143 bool drained = false;
144 bool enabled = bs->io_limits_enabled;
145 int i;
146
147 bs->io_limits_enabled = false;
148
149 for (i = 0; i < 2; i++) {
150 while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
151 drained = true;
152 }
153 }
154
155 bs->io_limits_enabled = enabled;
156
157 return drained;
158}
159
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800160void bdrv_io_limits_disable(BlockDriverState *bs)
161{
162 bs->io_limits_enabled = false;
163
Benoît Canetcc0681c2013-09-02 14:14:39 +0200164 bdrv_start_throttled_reqs(bs);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800165
Benoît Canetcc0681c2013-09-02 14:14:39 +0200166 throttle_destroy(&bs->throttle_state);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800167}
168
Benoît Canetcc0681c2013-09-02 14:14:39 +0200169static void bdrv_throttle_read_timer_cb(void *opaque)
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800170{
171 BlockDriverState *bs = opaque;
Benoît Canetcc0681c2013-09-02 14:14:39 +0200172 qemu_co_enter_next(&bs->throttled_reqs[0]);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800173}
174
Benoît Canetcc0681c2013-09-02 14:14:39 +0200175static void bdrv_throttle_write_timer_cb(void *opaque)
176{
177 BlockDriverState *bs = opaque;
178 qemu_co_enter_next(&bs->throttled_reqs[1]);
179}
180
181/* should be called before bdrv_set_io_limits if a limit is set */
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800182void bdrv_io_limits_enable(BlockDriverState *bs)
183{
Benoît Canetcc0681c2013-09-02 14:14:39 +0200184 assert(!bs->io_limits_enabled);
185 throttle_init(&bs->throttle_state,
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +0200186 bdrv_get_aio_context(bs),
Benoît Canetcc0681c2013-09-02 14:14:39 +0200187 QEMU_CLOCK_VIRTUAL,
188 bdrv_throttle_read_timer_cb,
189 bdrv_throttle_write_timer_cb,
190 bs);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800191 bs->io_limits_enabled = true;
192}
193
Benoît Canetcc0681c2013-09-02 14:14:39 +0200194/* This function makes an IO wait if needed
195 *
196 * @nb_sectors: the number of sectors of the IO
197 * @is_write: is the IO a write
198 */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800199static void bdrv_io_limits_intercept(BlockDriverState *bs,
Kevin Wolfd5103582014-01-16 13:29:10 +0100200 unsigned int bytes,
Benoît Canetcc0681c2013-09-02 14:14:39 +0200201 bool is_write)
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800202{
Benoît Canetcc0681c2013-09-02 14:14:39 +0200203 /* does this io must wait */
204 bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800205
Benoît Canetcc0681c2013-09-02 14:14:39 +0200206 /* if must wait or any request of this type throttled queue the IO */
207 if (must_wait ||
208 !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
209 qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800210 }
211
Benoît Canetcc0681c2013-09-02 14:14:39 +0200212 /* the IO will be executed, do the accounting */
Kevin Wolfd5103582014-01-16 13:29:10 +0100213 throttle_account(&bs->throttle_state, is_write, bytes);
214
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800215
Benoît Canetcc0681c2013-09-02 14:14:39 +0200216 /* if the next request must wait -> do nothing */
217 if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
218 return;
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800219 }
220
Benoît Canetcc0681c2013-09-02 14:14:39 +0200221 /* else queue next request for execution */
222 qemu_co_queue_next(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800223}
224
Kevin Wolf339064d2013-11-28 10:23:32 +0100225size_t bdrv_opt_mem_align(BlockDriverState *bs)
226{
227 if (!bs || !bs->drv) {
228 /* 4k should be on the safe side */
229 return 4096;
230 }
231
232 return bs->bl.opt_mem_alignment;
233}
234
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000235/* check if the path starts with "<protocol>:" */
Max Reitz5c984152014-12-03 14:57:22 +0100236int path_has_protocol(const char *path)
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000237{
Paolo Bonzini947995c2012-05-08 16:51:48 +0200238 const char *p;
239
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000240#ifdef _WIN32
241 if (is_windows_drive(path) ||
242 is_windows_drive_prefix(path)) {
243 return 0;
244 }
Paolo Bonzini947995c2012-05-08 16:51:48 +0200245 p = path + strcspn(path, ":/\\");
246#else
247 p = path + strcspn(path, ":/");
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000248#endif
249
Paolo Bonzini947995c2012-05-08 16:51:48 +0200250 return *p == ':';
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000251}
252
bellard83f64092006-08-01 16:21:11 +0000253int path_is_absolute(const char *path)
254{
bellard21664422007-01-07 18:22:37 +0000255#ifdef _WIN32
256 /* specific case for names like: "\\.\d:" */
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200257 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
bellard21664422007-01-07 18:22:37 +0000258 return 1;
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200259 }
260 return (*path == '/' || *path == '\\');
bellard3b9f94e2007-01-07 17:27:07 +0000261#else
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200262 return (*path == '/');
bellard3b9f94e2007-01-07 17:27:07 +0000263#endif
bellard83f64092006-08-01 16:21:11 +0000264}
265
266/* if filename is absolute, just copy it to dest. Otherwise, build a
267 path to it by considering it is relative to base_path. URL are
268 supported. */
269void path_combine(char *dest, int dest_size,
270 const char *base_path,
271 const char *filename)
272{
273 const char *p, *p1;
274 int len;
275
276 if (dest_size <= 0)
277 return;
278 if (path_is_absolute(filename)) {
279 pstrcpy(dest, dest_size, filename);
280 } else {
281 p = strchr(base_path, ':');
282 if (p)
283 p++;
284 else
285 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000286 p1 = strrchr(base_path, '/');
287#ifdef _WIN32
288 {
289 const char *p2;
290 p2 = strrchr(base_path, '\\');
291 if (!p1 || p2 > p1)
292 p1 = p2;
293 }
294#endif
bellard83f64092006-08-01 16:21:11 +0000295 if (p1)
296 p1++;
297 else
298 p1 = base_path;
299 if (p1 > p)
300 p = p1;
301 len = p - base_path;
302 if (len > dest_size - 1)
303 len = dest_size - 1;
304 memcpy(dest, base_path, len);
305 dest[len] = '\0';
306 pstrcat(dest, dest_size, filename);
307 }
308}
309
Max Reitz0a828552014-11-26 17:20:25 +0100310void bdrv_get_full_backing_filename_from_filename(const char *backed,
311 const char *backing,
Max Reitz9f074292014-11-26 17:20:26 +0100312 char *dest, size_t sz,
313 Error **errp)
Max Reitz0a828552014-11-26 17:20:25 +0100314{
Max Reitz9f074292014-11-26 17:20:26 +0100315 if (backing[0] == '\0' || path_has_protocol(backing) ||
316 path_is_absolute(backing))
317 {
Max Reitz0a828552014-11-26 17:20:25 +0100318 pstrcpy(dest, sz, backing);
Max Reitz9f074292014-11-26 17:20:26 +0100319 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
320 error_setg(errp, "Cannot use relative backing file names for '%s'",
321 backed);
Max Reitz0a828552014-11-26 17:20:25 +0100322 } else {
323 path_combine(dest, sz, backed, backing);
324 }
325}
326
Max Reitz9f074292014-11-26 17:20:26 +0100327void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
328 Error **errp)
Paolo Bonzinidc5a1372012-05-08 16:51:50 +0200329{
Max Reitz9f074292014-11-26 17:20:26 +0100330 char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
331
332 bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
333 dest, sz, errp);
Paolo Bonzinidc5a1372012-05-08 16:51:50 +0200334}
335
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500336void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000337{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100338 /* Block drivers without coroutine functions need emulation */
339 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200340 bdrv->bdrv_co_readv = bdrv_co_readv_em;
341 bdrv->bdrv_co_writev = bdrv_co_writev_em;
342
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100343 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
344 * the block driver lacks aio we need to emulate that too.
345 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200346 if (!bdrv->bdrv_aio_readv) {
347 /* add AIO emulation layer */
348 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
349 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200350 }
bellard83f64092006-08-01 16:21:11 +0000351 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200352
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100353 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000354}
bellardb3380822004-03-14 21:38:54 +0000355
Markus Armbruster7f06d472014-10-07 13:59:12 +0200356BlockDriverState *bdrv_new_root(void)
bellardfc01f7e2003-06-30 10:03:06 +0000357{
Markus Armbruster7f06d472014-10-07 13:59:12 +0200358 BlockDriverState *bs = bdrv_new();
Markus Armbrustere4e99862014-10-07 13:59:03 +0200359
Markus Armbrustere4e99862014-10-07 13:59:03 +0200360 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
Markus Armbrustere4e99862014-10-07 13:59:03 +0200361 return bs;
362}
363
364BlockDriverState *bdrv_new(void)
365{
366 BlockDriverState *bs;
367 int i;
368
Markus Armbruster5839e532014-08-19 10:31:08 +0200369 bs = g_new0(BlockDriverState, 1);
Fam Zhenge4654d22013-11-13 18:29:43 +0800370 QLIST_INIT(&bs->dirty_bitmaps);
Fam Zhengfbe40ff2014-05-23 21:29:42 +0800371 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
372 QLIST_INIT(&bs->op_blockers[i]);
373 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300374 bdrv_iostatus_disable(bs);
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200375 notifier_list_init(&bs->close_notifiers);
Stefan Hajnoczid616b222013-06-24 17:13:10 +0200376 notifier_with_return_list_init(&bs->before_write_notifiers);
Benoît Canetcc0681c2013-09-02 14:14:39 +0200377 qemu_co_queue_init(&bs->throttled_reqs[0]);
378 qemu_co_queue_init(&bs->throttled_reqs[1]);
Fam Zheng9fcb0252013-08-23 09:14:46 +0800379 bs->refcnt = 1;
Stefan Hajnoczidcd04222014-05-08 16:34:37 +0200380 bs->aio_context = qemu_get_aio_context();
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200381
bellardb3380822004-03-14 21:38:54 +0000382 return bs;
383}
384
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200385void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
386{
387 notifier_list_add(&bs->close_notifiers, notify);
388}
389
bellardea2384d2004-08-01 21:59:26 +0000390BlockDriver *bdrv_find_format(const char *format_name)
391{
392 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100393 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
394 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000395 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100396 }
bellardea2384d2004-08-01 21:59:26 +0000397 }
398 return NULL;
399}
400
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800401static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100402{
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800403 static const char *whitelist_rw[] = {
404 CONFIG_BDRV_RW_WHITELIST
405 };
406 static const char *whitelist_ro[] = {
407 CONFIG_BDRV_RO_WHITELIST
Markus Armbrustereb852012009-10-27 18:41:44 +0100408 };
409 const char **p;
410
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800411 if (!whitelist_rw[0] && !whitelist_ro[0]) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100412 return 1; /* no whitelist, anything goes */
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800413 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100414
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800415 for (p = whitelist_rw; *p; p++) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100416 if (!strcmp(drv->format_name, *p)) {
417 return 1;
418 }
419 }
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800420 if (read_only) {
421 for (p = whitelist_ro; *p; p++) {
422 if (!strcmp(drv->format_name, *p)) {
423 return 1;
424 }
425 }
426 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100427 return 0;
428}
429
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800430BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
431 bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100432{
433 BlockDriver *drv = bdrv_find_format(format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800434 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
Markus Armbrustereb852012009-10-27 18:41:44 +0100435}
436
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800437typedef struct CreateCo {
438 BlockDriver *drv;
439 char *filename;
Chunyan Liu83d05212014-06-05 17:20:51 +0800440 QemuOpts *opts;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800441 int ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200442 Error *err;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800443} CreateCo;
444
445static void coroutine_fn bdrv_create_co_entry(void *opaque)
446{
Max Reitzcc84d902013-09-06 17:14:26 +0200447 Error *local_err = NULL;
448 int ret;
449
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800450 CreateCo *cco = opaque;
451 assert(cco->drv);
452
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800453 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100454 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200455 error_propagate(&cco->err, local_err);
456 }
457 cco->ret = ret;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800458}
459
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200460int bdrv_create(BlockDriver *drv, const char* filename,
Chunyan Liu83d05212014-06-05 17:20:51 +0800461 QemuOpts *opts, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000462{
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800463 int ret;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200464
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800465 Coroutine *co;
466 CreateCo cco = {
467 .drv = drv,
468 .filename = g_strdup(filename),
Chunyan Liu83d05212014-06-05 17:20:51 +0800469 .opts = opts,
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800470 .ret = NOT_DONE,
Max Reitzcc84d902013-09-06 17:14:26 +0200471 .err = NULL,
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800472 };
473
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800474 if (!drv->bdrv_create) {
Max Reitzcc84d902013-09-06 17:14:26 +0200475 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300476 ret = -ENOTSUP;
477 goto out;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800478 }
479
480 if (qemu_in_coroutine()) {
481 /* Fast-path if already in coroutine context */
482 bdrv_create_co_entry(&cco);
483 } else {
484 co = qemu_coroutine_create(bdrv_create_co_entry);
485 qemu_coroutine_enter(co, &cco);
486 while (cco.ret == NOT_DONE) {
Paolo Bonzinib47ec2c2014-07-07 15:18:01 +0200487 aio_poll(qemu_get_aio_context(), true);
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800488 }
489 }
490
491 ret = cco.ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200492 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +0100493 if (cco.err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200494 error_propagate(errp, cco.err);
495 } else {
496 error_setg_errno(errp, -ret, "Could not create image");
497 }
498 }
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800499
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300500out:
501 g_free(cco.filename);
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800502 return ret;
bellardea2384d2004-08-01 21:59:26 +0000503}
504
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800505int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200506{
507 BlockDriver *drv;
Max Reitzcc84d902013-09-06 17:14:26 +0200508 Error *local_err = NULL;
509 int ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200510
Max Reitzb65a5e12015-02-05 13:58:12 -0500511 drv = bdrv_find_protocol(filename, true, errp);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200512 if (drv == NULL) {
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000513 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200514 }
515
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800516 ret = bdrv_create(drv, filename, opts, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100517 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200518 error_propagate(errp, local_err);
519 }
520 return ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200521}
522
Kevin Wolf3baca892014-07-16 17:48:16 +0200523void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
Kevin Wolfd34682c2013-12-11 19:26:16 +0100524{
525 BlockDriver *drv = bs->drv;
Kevin Wolf3baca892014-07-16 17:48:16 +0200526 Error *local_err = NULL;
Kevin Wolfd34682c2013-12-11 19:26:16 +0100527
528 memset(&bs->bl, 0, sizeof(bs->bl));
529
Kevin Wolf466ad822013-12-11 19:50:32 +0100530 if (!drv) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200531 return;
Kevin Wolf466ad822013-12-11 19:50:32 +0100532 }
533
534 /* Take some limits from the children as a default */
535 if (bs->file) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200536 bdrv_refresh_limits(bs->file, &local_err);
537 if (local_err) {
538 error_propagate(errp, local_err);
539 return;
540 }
Kevin Wolf466ad822013-12-11 19:50:32 +0100541 bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
Peter Lieven2647fab2014-10-27 10:18:44 +0100542 bs->bl.max_transfer_length = bs->file->bl.max_transfer_length;
Kevin Wolf339064d2013-11-28 10:23:32 +0100543 bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
544 } else {
545 bs->bl.opt_mem_alignment = 512;
Kevin Wolf466ad822013-12-11 19:50:32 +0100546 }
547
548 if (bs->backing_hd) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200549 bdrv_refresh_limits(bs->backing_hd, &local_err);
550 if (local_err) {
551 error_propagate(errp, local_err);
552 return;
553 }
Kevin Wolf466ad822013-12-11 19:50:32 +0100554 bs->bl.opt_transfer_length =
555 MAX(bs->bl.opt_transfer_length,
556 bs->backing_hd->bl.opt_transfer_length);
Peter Lieven2647fab2014-10-27 10:18:44 +0100557 bs->bl.max_transfer_length =
558 MIN_NON_ZERO(bs->bl.max_transfer_length,
559 bs->backing_hd->bl.max_transfer_length);
Kevin Wolf339064d2013-11-28 10:23:32 +0100560 bs->bl.opt_mem_alignment =
561 MAX(bs->bl.opt_mem_alignment,
562 bs->backing_hd->bl.opt_mem_alignment);
Kevin Wolf466ad822013-12-11 19:50:32 +0100563 }
564
565 /* Then let the driver override it */
566 if (drv->bdrv_refresh_limits) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200567 drv->bdrv_refresh_limits(bs, errp);
Kevin Wolfd34682c2013-12-11 19:26:16 +0100568 }
Kevin Wolfd34682c2013-12-11 19:26:16 +0100569}
570
Ekaterina Tumanova892b7de2015-02-16 12:47:54 +0100571/**
572 * Try to get @bs's logical and physical block size.
573 * On success, store them in @bsz struct and return 0.
574 * On failure return -errno.
575 * @bs must not be empty.
576 */
577int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
578{
579 BlockDriver *drv = bs->drv;
580
581 if (drv && drv->bdrv_probe_blocksizes) {
582 return drv->bdrv_probe_blocksizes(bs, bsz);
583 }
584
585 return -ENOTSUP;
586}
587
588/**
589 * Try to get @bs's geometry (cyls, heads, sectors).
590 * On success, store them in @geo struct and return 0.
591 * On failure return -errno.
592 * @bs must not be empty.
593 */
594int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
595{
596 BlockDriver *drv = bs->drv;
597
598 if (drv && drv->bdrv_probe_geometry) {
599 return drv->bdrv_probe_geometry(bs, geo);
600 }
601
602 return -ENOTSUP;
603}
604
Jim Meyeringeba25052012-05-28 09:27:54 +0200605/*
606 * Create a uniquely-named empty temporary file.
607 * Return 0 upon success, otherwise a negative errno value.
608 */
609int get_tmp_filename(char *filename, int size)
610{
bellardd5249392004-08-03 21:14:23 +0000611#ifdef _WIN32
bellard3b9f94e2007-01-07 17:27:07 +0000612 char temp_dir[MAX_PATH];
Jim Meyeringeba25052012-05-28 09:27:54 +0200613 /* GetTempFileName requires that its output buffer (4th param)
614 have length MAX_PATH or greater. */
615 assert(size >= MAX_PATH);
616 return (GetTempPath(MAX_PATH, temp_dir)
617 && GetTempFileName(temp_dir, "qem", 0, filename)
618 ? 0 : -GetLastError());
bellardd5249392004-08-03 21:14:23 +0000619#else
bellardea2384d2004-08-01 21:59:26 +0000620 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000621 const char *tmpdir;
aurel320badc1e2008-03-10 00:05:34 +0000622 tmpdir = getenv("TMPDIR");
Amit Shah69bef792014-02-26 15:12:37 +0530623 if (!tmpdir) {
624 tmpdir = "/var/tmp";
625 }
Jim Meyeringeba25052012-05-28 09:27:54 +0200626 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
627 return -EOVERFLOW;
628 }
bellardea2384d2004-08-01 21:59:26 +0000629 fd = mkstemp(filename);
Dunrong Huangfe235a02012-09-05 21:26:22 +0800630 if (fd < 0) {
631 return -errno;
632 }
633 if (close(fd) != 0) {
634 unlink(filename);
Jim Meyeringeba25052012-05-28 09:27:54 +0200635 return -errno;
636 }
637 return 0;
bellardd5249392004-08-03 21:14:23 +0000638#endif
Jim Meyeringeba25052012-05-28 09:27:54 +0200639}
bellardea2384d2004-08-01 21:59:26 +0000640
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200641/*
642 * Detect host devices. By convention, /dev/cdrom[N] is always
643 * recognized as a host CDROM.
644 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200645static BlockDriver *find_hdev_driver(const char *filename)
646{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200647 int score_max = 0, score;
648 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200649
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100650 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200651 if (d->bdrv_probe_device) {
652 score = d->bdrv_probe_device(filename);
653 if (score > score_max) {
654 score_max = score;
655 drv = d;
656 }
657 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200658 }
659
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200660 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200661}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200662
Kevin Wolf98289622013-07-10 15:47:39 +0200663BlockDriver *bdrv_find_protocol(const char *filename,
Max Reitzb65a5e12015-02-05 13:58:12 -0500664 bool allow_protocol_prefix,
665 Error **errp)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200666{
667 BlockDriver *drv1;
668 char protocol[128];
669 int len;
670 const char *p;
671
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200672 /* TODO Drivers without bdrv_file_open must be specified explicitly */
673
Christoph Hellwig39508e72010-06-23 12:25:17 +0200674 /*
675 * XXX(hch): we really should not let host device detection
676 * override an explicit protocol specification, but moving this
677 * later breaks access to device names with colons in them.
678 * Thanks to the brain-dead persistent naming schemes on udev-
679 * based Linux systems those actually are quite common.
680 */
681 drv1 = find_hdev_driver(filename);
682 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200683 return drv1;
684 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200685
Kevin Wolf98289622013-07-10 15:47:39 +0200686 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
Max Reitzef810432014-12-02 18:32:42 +0100687 return &bdrv_file;
Christoph Hellwig39508e72010-06-23 12:25:17 +0200688 }
Kevin Wolf98289622013-07-10 15:47:39 +0200689
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000690 p = strchr(filename, ':');
691 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200692 len = p - filename;
693 if (len > sizeof(protocol) - 1)
694 len = sizeof(protocol) - 1;
695 memcpy(protocol, filename, len);
696 protocol[len] = '\0';
697 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
698 if (drv1->protocol_name &&
699 !strcmp(drv1->protocol_name, protocol)) {
700 return drv1;
701 }
702 }
Max Reitzb65a5e12015-02-05 13:58:12 -0500703
704 error_setg(errp, "Unknown protocol '%s'", protocol);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200705 return NULL;
706}
707
Markus Armbrusterc6684242014-11-20 16:27:10 +0100708/*
709 * Guess image format by probing its contents.
710 * This is not a good idea when your image is raw (CVE-2008-2004), but
711 * we do it anyway for backward compatibility.
712 *
713 * @buf contains the image's first @buf_size bytes.
Kevin Wolf7cddd372014-11-20 16:27:11 +0100714 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
715 * but can be smaller if the image file is smaller)
Markus Armbrusterc6684242014-11-20 16:27:10 +0100716 * @filename is its filename.
717 *
718 * For all block drivers, call the bdrv_probe() method to get its
719 * probing score.
720 * Return the first block driver with the highest probing score.
721 */
Kevin Wolf38f3ef52014-11-20 16:27:12 +0100722BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
723 const char *filename)
Markus Armbrusterc6684242014-11-20 16:27:10 +0100724{
725 int score_max = 0, score;
726 BlockDriver *drv = NULL, *d;
727
728 QLIST_FOREACH(d, &bdrv_drivers, list) {
729 if (d->bdrv_probe) {
730 score = d->bdrv_probe(buf, buf_size, filename);
731 if (score > score_max) {
732 score_max = score;
733 drv = d;
734 }
735 }
736 }
737
738 return drv;
739}
740
Kevin Wolff500a6d2012-11-12 17:35:27 +0100741static int find_image_format(BlockDriverState *bs, const char *filename,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200742 BlockDriver **pdrv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000743{
Markus Armbrusterc6684242014-11-20 16:27:10 +0100744 BlockDriver *drv;
Kevin Wolf7cddd372014-11-20 16:27:11 +0100745 uint8_t buf[BLOCK_PROBE_BUF_SIZE];
Kevin Wolff500a6d2012-11-12 17:35:27 +0100746 int ret = 0;
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700747
Kevin Wolf08a00552010-06-01 18:37:31 +0200748 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
Paolo Bonzini8e895592013-01-10 15:39:27 +0100749 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
Max Reitzef810432014-12-02 18:32:42 +0100750 *pdrv = &bdrv_raw;
Stefan Weilc98ac352010-07-21 21:51:51 +0200751 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700752 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700753
bellard83f64092006-08-01 16:21:11 +0000754 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
bellard83f64092006-08-01 16:21:11 +0000755 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200756 error_setg_errno(errp, -ret, "Could not read image for determining its "
757 "format");
Stefan Weilc98ac352010-07-21 21:51:51 +0200758 *pdrv = NULL;
759 return ret;
bellard83f64092006-08-01 16:21:11 +0000760 }
761
Markus Armbrusterc6684242014-11-20 16:27:10 +0100762 drv = bdrv_probe_all(buf, ret, filename);
Stefan Weilc98ac352010-07-21 21:51:51 +0200763 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200764 error_setg(errp, "Could not determine image format: No compatible "
765 "driver found");
Stefan Weilc98ac352010-07-21 21:51:51 +0200766 ret = -ENOENT;
767 }
768 *pdrv = drv;
769 return ret;
bellardea2384d2004-08-01 21:59:26 +0000770}
771
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100772/**
773 * Set the current 'total_sectors' value
Markus Armbruster65a9bb22014-06-26 13:23:17 +0200774 * Return 0 on success, -errno on error.
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100775 */
776static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
777{
778 BlockDriver *drv = bs->drv;
779
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700780 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
781 if (bs->sg)
782 return 0;
783
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100784 /* query actual device if possible, otherwise just trust the hint */
785 if (drv->bdrv_getlength) {
786 int64_t length = drv->bdrv_getlength(bs);
787 if (length < 0) {
788 return length;
789 }
Fam Zheng7e382002013-11-06 19:48:06 +0800790 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100791 }
792
793 bs->total_sectors = hint;
794 return 0;
795}
796
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100797/**
Paolo Bonzini9e8f1832013-02-08 14:06:11 +0100798 * Set open flags for a given discard mode
799 *
800 * Return 0 on success, -1 if the discard mode was invalid.
801 */
802int bdrv_parse_discard_flags(const char *mode, int *flags)
803{
804 *flags &= ~BDRV_O_UNMAP;
805
806 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
807 /* do nothing */
808 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
809 *flags |= BDRV_O_UNMAP;
810 } else {
811 return -1;
812 }
813
814 return 0;
815}
816
817/**
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100818 * Set open flags for a given cache mode
819 *
820 * Return 0 on success, -1 if the cache mode was invalid.
821 */
822int bdrv_parse_cache_flags(const char *mode, int *flags)
823{
824 *flags &= ~BDRV_O_CACHE_MASK;
825
826 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
827 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100828 } else if (!strcmp(mode, "directsync")) {
829 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100830 } else if (!strcmp(mode, "writeback")) {
831 *flags |= BDRV_O_CACHE_WB;
832 } else if (!strcmp(mode, "unsafe")) {
833 *flags |= BDRV_O_CACHE_WB;
834 *flags |= BDRV_O_NO_FLUSH;
835 } else if (!strcmp(mode, "writethrough")) {
836 /* this is the default */
837 } else {
838 return -1;
839 }
840
841 return 0;
842}
843
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000844/**
845 * The copy-on-read flag is actually a reference count so multiple users may
846 * use the feature without worrying about clobbering its previous state.
847 * Copy-on-read stays enabled until all users have called to disable it.
848 */
849void bdrv_enable_copy_on_read(BlockDriverState *bs)
850{
851 bs->copy_on_read++;
852}
853
854void bdrv_disable_copy_on_read(BlockDriverState *bs)
855{
856 assert(bs->copy_on_read > 0);
857 bs->copy_on_read--;
858}
859
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200860/*
Kevin Wolfb1e6fc02014-05-06 12:11:42 +0200861 * Returns the flags that a temporary snapshot should get, based on the
862 * originally requested flags (the originally requested image will have flags
863 * like a backing file)
864 */
865static int bdrv_temp_snapshot_flags(int flags)
866{
867 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
868}
869
870/*
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200871 * Returns the flags that bs->file should get, based on the given flags for
872 * the parent BDS
873 */
874static int bdrv_inherited_flags(int flags)
875{
876 /* Enable protocol handling, disable format probing for bs->file */
877 flags |= BDRV_O_PROTOCOL;
878
879 /* Our block drivers take care to send flushes and respect unmap policy,
880 * so we can enable both unconditionally on lower layers. */
881 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
882
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200883 /* Clear flags that only apply to the top layer */
Kevin Wolf5669b442014-04-11 21:36:45 +0200884 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200885
886 return flags;
887}
888
Kevin Wolf317fc442014-04-25 13:27:34 +0200889/*
890 * Returns the flags that bs->backing_hd should get, based on the given flags
891 * for the parent BDS
892 */
893static int bdrv_backing_flags(int flags)
894{
895 /* backing files always opened read-only */
896 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
897
898 /* snapshot=on is handled on the top layer */
Kevin Wolf8bfea152014-04-11 19:16:36 +0200899 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
Kevin Wolf317fc442014-04-25 13:27:34 +0200900
901 return flags;
902}
903
Kevin Wolf7b272452012-11-12 17:05:39 +0100904static int bdrv_open_flags(BlockDriverState *bs, int flags)
905{
906 int open_flags = flags | BDRV_O_CACHE_WB;
907
908 /*
909 * Clear flags that are internal to the block layer before opening the
910 * image.
911 */
Kevin Wolf20cca272014-06-04 14:33:27 +0200912 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
Kevin Wolf7b272452012-11-12 17:05:39 +0100913
914 /*
915 * Snapshots should be writable.
916 */
Kevin Wolf8bfea152014-04-11 19:16:36 +0200917 if (flags & BDRV_O_TEMPORARY) {
Kevin Wolf7b272452012-11-12 17:05:39 +0100918 open_flags |= BDRV_O_RDWR;
919 }
920
921 return open_flags;
922}
923
Kevin Wolf636ea372014-01-24 14:11:52 +0100924static void bdrv_assign_node_name(BlockDriverState *bs,
925 const char *node_name,
926 Error **errp)
Benoît Canet6913c0c2014-01-23 21:31:33 +0100927{
928 if (!node_name) {
Kevin Wolf636ea372014-01-24 14:11:52 +0100929 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100930 }
931
Kevin Wolf9aebf3b2014-09-25 09:54:02 +0200932 /* Check for empty string or invalid characters */
Markus Armbrusterf5bebbb2014-09-30 13:59:30 +0200933 if (!id_wellformed(node_name)) {
Kevin Wolf9aebf3b2014-09-25 09:54:02 +0200934 error_setg(errp, "Invalid node name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100935 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100936 }
937
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100938 /* takes care of avoiding namespaces collisions */
Markus Armbruster7f06d472014-10-07 13:59:12 +0200939 if (blk_by_name(node_name)) {
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100940 error_setg(errp, "node-name=%s is conflicting with a device id",
941 node_name);
Kevin Wolf636ea372014-01-24 14:11:52 +0100942 return;
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100943 }
944
Benoît Canet6913c0c2014-01-23 21:31:33 +0100945 /* takes care of avoiding duplicates node names */
946 if (bdrv_find_node(node_name)) {
947 error_setg(errp, "Duplicate node name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100948 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100949 }
950
951 /* copy node name into the bs and insert it into the graph list */
952 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
953 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
Benoît Canet6913c0c2014-01-23 21:31:33 +0100954}
955
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200956/*
Kevin Wolf57915332010-04-14 15:24:50 +0200957 * Common part for opening disk images and files
Kevin Wolfb6ad4912013-03-15 10:35:04 +0100958 *
959 * Removes all processed options from *options.
Kevin Wolf57915332010-04-14 15:24:50 +0200960 */
Kevin Wolff500a6d2012-11-12 17:35:27 +0100961static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200962 QDict *options, int flags, BlockDriver *drv, Error **errp)
Kevin Wolf57915332010-04-14 15:24:50 +0200963{
964 int ret, open_flags;
Kevin Wolf035fccd2013-04-09 14:34:19 +0200965 const char *filename;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100966 const char *node_name = NULL;
Max Reitz34b5d2c2013-09-05 14:45:29 +0200967 Error *local_err = NULL;
Kevin Wolf57915332010-04-14 15:24:50 +0200968
969 assert(drv != NULL);
Paolo Bonzini64058752012-05-08 16:51:49 +0200970 assert(bs->file == NULL);
Kevin Wolf707ff822013-03-06 12:20:31 +0100971 assert(options != NULL && bs->options != options);
Kevin Wolf57915332010-04-14 15:24:50 +0200972
Kevin Wolf45673672013-04-22 17:48:40 +0200973 if (file != NULL) {
974 filename = file->filename;
975 } else {
976 filename = qdict_get_try_str(options, "filename");
977 }
978
Kevin Wolf765003d2014-02-03 14:49:42 +0100979 if (drv->bdrv_needs_filename && !filename) {
980 error_setg(errp, "The '%s' block driver requires a file name",
981 drv->format_name);
982 return -EINVAL;
983 }
984
Kevin Wolf45673672013-04-22 17:48:40 +0200985 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100986
Benoît Canet6913c0c2014-01-23 21:31:33 +0100987 node_name = qdict_get_try_str(options, "node-name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100988 bdrv_assign_node_name(bs, node_name, &local_err);
Markus Armbruster0fb63952014-04-25 16:50:31 +0200989 if (local_err) {
Kevin Wolf636ea372014-01-24 14:11:52 +0100990 error_propagate(errp, local_err);
991 return -EINVAL;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100992 }
993 qdict_del(options, "node-name");
994
Kevin Wolf5d186eb2013-03-27 17:28:18 +0100995 /* bdrv_open() with directly using a protocol as drv. This layer is already
996 * opened, so assign it to bs (while file becomes a closed BlockDriverState)
997 * and return immediately. */
998 if (file != NULL && drv->bdrv_file_open) {
999 bdrv_swap(file, bs);
1000 return 0;
1001 }
1002
Kevin Wolf57915332010-04-14 15:24:50 +02001003 bs->open_flags = flags;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01001004 bs->guest_block_size = 512;
Paolo Bonzinic25f53b2011-11-29 12:42:20 +01001005 bs->request_alignment = 512;
Asias He0d51b4d2013-08-22 15:24:14 +08001006 bs->zero_beyond_eof = true;
Fam Zhengb64ec4e2013-05-29 19:35:40 +08001007 open_flags = bdrv_open_flags(bs, flags);
1008 bs->read_only = !(open_flags & BDRV_O_RDWR);
1009
1010 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
Kevin Wolf8f94a6e2013-10-10 11:45:55 +02001011 error_setg(errp,
1012 !bs->read_only && bdrv_is_whitelisted(drv, true)
1013 ? "Driver '%s' can only be used for read-only devices"
1014 : "Driver '%s' is not whitelisted",
1015 drv->format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +08001016 return -ENOTSUP;
1017 }
Kevin Wolf57915332010-04-14 15:24:50 +02001018
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001019 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
Kevin Wolf0ebd24e2013-09-19 15:12:18 +02001020 if (flags & BDRV_O_COPY_ON_READ) {
1021 if (!bs->read_only) {
1022 bdrv_enable_copy_on_read(bs);
1023 } else {
1024 error_setg(errp, "Can't use copy-on-read on read-only device");
1025 return -EINVAL;
1026 }
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001027 }
1028
Kevin Wolfc2ad1b02013-03-18 16:40:51 +01001029 if (filename != NULL) {
1030 pstrcpy(bs->filename, sizeof(bs->filename), filename);
1031 } else {
1032 bs->filename[0] = '\0';
1033 }
Max Reitz91af7012014-07-18 20:24:56 +02001034 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
Kevin Wolf57915332010-04-14 15:24:50 +02001035
Kevin Wolf57915332010-04-14 15:24:50 +02001036 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -05001037 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +02001038
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +01001039 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Stefan Hajnoczie7c63792011-10-27 10:54:27 +01001040
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001041 /* Open the image, either directly or using a protocol */
1042 if (drv->bdrv_file_open) {
Kevin Wolf5d186eb2013-03-27 17:28:18 +01001043 assert(file == NULL);
Benoît Canet030be322013-09-24 17:07:04 +02001044 assert(!drv->bdrv_needs_filename || filename != NULL);
Max Reitz34b5d2c2013-09-05 14:45:29 +02001045 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001046 } else {
Kevin Wolf2af5ef72013-04-09 13:19:18 +02001047 if (file == NULL) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001048 error_setg(errp, "Can't use '%s' as a block driver for the "
1049 "protocol level", drv->format_name);
Kevin Wolf2af5ef72013-04-09 13:19:18 +02001050 ret = -EINVAL;
1051 goto free_and_fail;
1052 }
Kevin Wolff500a6d2012-11-12 17:35:27 +01001053 bs->file = file;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001054 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001055 }
1056
Kevin Wolf57915332010-04-14 15:24:50 +02001057 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +01001058 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001059 error_propagate(errp, local_err);
Dunrong Huang2fa9aa52013-09-24 18:14:01 +08001060 } else if (bs->filename[0]) {
1061 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
Max Reitz34b5d2c2013-09-05 14:45:29 +02001062 } else {
1063 error_setg_errno(errp, -ret, "Could not open image");
1064 }
Kevin Wolf57915332010-04-14 15:24:50 +02001065 goto free_and_fail;
1066 }
1067
Markus Armbrustera1f688f2015-03-13 21:09:40 +01001068 if (bs->encrypted) {
1069 error_report("Encrypted images are deprecated");
1070 error_printf("Support for them will be removed in a future release.\n"
1071 "You can use 'qemu-img convert' to convert your image"
1072 " to an unencrypted one.\n");
1073 }
1074
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001075 ret = refresh_total_sectors(bs, bs->total_sectors);
1076 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001077 error_setg_errno(errp, -ret, "Could not refresh total sector count");
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001078 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +02001079 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001080
Kevin Wolf3baca892014-07-16 17:48:16 +02001081 bdrv_refresh_limits(bs, &local_err);
1082 if (local_err) {
1083 error_propagate(errp, local_err);
1084 ret = -EINVAL;
1085 goto free_and_fail;
1086 }
1087
Paolo Bonzinic25f53b2011-11-29 12:42:20 +01001088 assert(bdrv_opt_mem_align(bs) != 0);
Kevin Wolf47ea2de2014-03-05 15:49:55 +01001089 assert((bs->request_alignment != 0) || bs->sg);
Kevin Wolf57915332010-04-14 15:24:50 +02001090 return 0;
1091
1092free_and_fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +01001093 bs->file = NULL;
Anthony Liguori7267c092011-08-20 22:09:37 -05001094 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +02001095 bs->opaque = NULL;
1096 bs->drv = NULL;
1097 return ret;
1098}
1099
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001100static QDict *parse_json_filename(const char *filename, Error **errp)
1101{
1102 QObject *options_obj;
1103 QDict *options;
1104 int ret;
1105
1106 ret = strstart(filename, "json:", &filename);
1107 assert(ret);
1108
1109 options_obj = qobject_from_json(filename);
1110 if (!options_obj) {
1111 error_setg(errp, "Could not parse the JSON options");
1112 return NULL;
1113 }
1114
1115 if (qobject_type(options_obj) != QTYPE_QDICT) {
1116 qobject_decref(options_obj);
1117 error_setg(errp, "Invalid JSON object given");
1118 return NULL;
1119 }
1120
1121 options = qobject_to_qdict(options_obj);
1122 qdict_flatten(options);
1123
1124 return options;
1125}
1126
Kevin Wolf57915332010-04-14 15:24:50 +02001127/*
Kevin Wolff54120f2014-05-26 11:09:59 +02001128 * Fills in default options for opening images and converts the legacy
1129 * filename/flags pair to option QDict entries.
1130 */
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001131static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
Kevin Wolf17b005f2014-05-27 10:50:29 +02001132 BlockDriver *drv, Error **errp)
Kevin Wolff54120f2014-05-26 11:09:59 +02001133{
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001134 const char *filename = *pfilename;
Kevin Wolff54120f2014-05-26 11:09:59 +02001135 const char *drvname;
Kevin Wolf462f5bc2014-05-26 11:39:55 +02001136 bool protocol = flags & BDRV_O_PROTOCOL;
Kevin Wolff54120f2014-05-26 11:09:59 +02001137 bool parse_filename = false;
1138 Error *local_err = NULL;
Kevin Wolff54120f2014-05-26 11:09:59 +02001139
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001140 /* Parse json: pseudo-protocol */
1141 if (filename && g_str_has_prefix(filename, "json:")) {
1142 QDict *json_options = parse_json_filename(filename, &local_err);
1143 if (local_err) {
1144 error_propagate(errp, local_err);
1145 return -EINVAL;
1146 }
1147
1148 /* Options given in the filename have lower priority than options
1149 * specified directly */
1150 qdict_join(*options, json_options, false);
1151 QDECREF(json_options);
1152 *pfilename = filename = NULL;
1153 }
1154
Kevin Wolff54120f2014-05-26 11:09:59 +02001155 /* Fetch the file name from the options QDict if necessary */
Kevin Wolf17b005f2014-05-27 10:50:29 +02001156 if (protocol && filename) {
Kevin Wolff54120f2014-05-26 11:09:59 +02001157 if (!qdict_haskey(*options, "filename")) {
1158 qdict_put(*options, "filename", qstring_from_str(filename));
1159 parse_filename = true;
1160 } else {
1161 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1162 "the same time");
1163 return -EINVAL;
1164 }
1165 }
1166
1167 /* Find the right block driver */
1168 filename = qdict_get_try_str(*options, "filename");
1169 drvname = qdict_get_try_str(*options, "driver");
1170
Kevin Wolf17b005f2014-05-27 10:50:29 +02001171 if (drv) {
1172 if (drvname) {
1173 error_setg(errp, "Driver specified twice");
1174 return -EINVAL;
1175 }
1176 drvname = drv->format_name;
1177 qdict_put(*options, "driver", qstring_from_str(drvname));
1178 } else {
1179 if (!drvname && protocol) {
1180 if (filename) {
Max Reitzb65a5e12015-02-05 13:58:12 -05001181 drv = bdrv_find_protocol(filename, parse_filename, errp);
Kevin Wolf17b005f2014-05-27 10:50:29 +02001182 if (!drv) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001183 return -EINVAL;
1184 }
1185
1186 drvname = drv->format_name;
1187 qdict_put(*options, "driver", qstring_from_str(drvname));
1188 } else {
1189 error_setg(errp, "Must specify either driver or file");
Kevin Wolff54120f2014-05-26 11:09:59 +02001190 return -EINVAL;
1191 }
Kevin Wolf17b005f2014-05-27 10:50:29 +02001192 } else if (drvname) {
1193 drv = bdrv_find_format(drvname);
1194 if (!drv) {
1195 error_setg(errp, "Unknown driver '%s'", drvname);
1196 return -ENOENT;
1197 }
Kevin Wolff54120f2014-05-26 11:09:59 +02001198 }
1199 }
1200
Kevin Wolf17b005f2014-05-27 10:50:29 +02001201 assert(drv || !protocol);
Kevin Wolff54120f2014-05-26 11:09:59 +02001202
1203 /* Driver-specific filename parsing */
Kevin Wolf17b005f2014-05-27 10:50:29 +02001204 if (drv && drv->bdrv_parse_filename && parse_filename) {
Kevin Wolff54120f2014-05-26 11:09:59 +02001205 drv->bdrv_parse_filename(filename, *options, &local_err);
1206 if (local_err) {
1207 error_propagate(errp, local_err);
1208 return -EINVAL;
1209 }
1210
1211 if (!drv->bdrv_needs_filename) {
1212 qdict_del(*options, "filename");
1213 }
1214 }
1215
1216 return 0;
1217}
1218
Fam Zheng8d24cce2014-05-23 21:29:45 +08001219void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1220{
1221
Fam Zheng826b6ca2014-05-23 21:29:47 +08001222 if (bs->backing_hd) {
1223 assert(bs->backing_blocker);
1224 bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1225 } else if (backing_hd) {
1226 error_setg(&bs->backing_blocker,
1227 "device is used as backing hd of '%s'",
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001228 bdrv_get_device_name(bs));
Fam Zheng826b6ca2014-05-23 21:29:47 +08001229 }
1230
Fam Zheng8d24cce2014-05-23 21:29:45 +08001231 bs->backing_hd = backing_hd;
1232 if (!backing_hd) {
Fam Zheng826b6ca2014-05-23 21:29:47 +08001233 error_free(bs->backing_blocker);
1234 bs->backing_blocker = NULL;
Fam Zheng8d24cce2014-05-23 21:29:45 +08001235 goto out;
1236 }
1237 bs->open_flags &= ~BDRV_O_NO_BACKING;
1238 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1239 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1240 backing_hd->drv ? backing_hd->drv->format_name : "");
Fam Zheng826b6ca2014-05-23 21:29:47 +08001241
1242 bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1243 /* Otherwise we won't be able to commit due to check in bdrv_commit */
Fam Zhengbb000212014-09-11 13:14:00 +08001244 bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
Fam Zheng826b6ca2014-05-23 21:29:47 +08001245 bs->backing_blocker);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001246out:
Kevin Wolf3baca892014-07-16 17:48:16 +02001247 bdrv_refresh_limits(bs, NULL);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001248}
1249
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001250/*
1251 * Opens the backing file for a BlockDriverState if not yet open
1252 *
1253 * options is a QDict of options to pass to the block drivers, or NULL for an
1254 * empty set of options. The reference to the QDict is transferred to this
1255 * function (even on failure), so if the caller intends to reuse the dictionary,
1256 * it needs to use QINCREF() before calling bdrv_file_open.
1257 */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001258int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
Paolo Bonzini9156df12012-10-18 16:49:17 +02001259{
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001260 char *backing_filename = g_malloc0(PATH_MAX);
Kevin Wolf317fc442014-04-25 13:27:34 +02001261 int ret = 0;
Fam Zheng8d24cce2014-05-23 21:29:45 +08001262 BlockDriverState *backing_hd;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001263 Error *local_err = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001264
1265 if (bs->backing_hd != NULL) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001266 QDECREF(options);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001267 goto free_exit;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001268 }
1269
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001270 /* NULL means an empty set of options */
1271 if (options == NULL) {
1272 options = qdict_new();
1273 }
1274
Paolo Bonzini9156df12012-10-18 16:49:17 +02001275 bs->open_flags &= ~BDRV_O_NO_BACKING;
Kevin Wolf1cb6f502013-04-12 20:27:07 +02001276 if (qdict_haskey(options, "file.filename")) {
1277 backing_filename[0] = '\0';
1278 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001279 QDECREF(options);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001280 goto free_exit;
Fam Zhengdbecebd2013-09-22 20:05:06 +08001281 } else {
Max Reitz9f074292014-11-26 17:20:26 +01001282 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1283 &local_err);
1284 if (local_err) {
1285 ret = -EINVAL;
1286 error_propagate(errp, local_err);
1287 QDECREF(options);
1288 goto free_exit;
1289 }
Paolo Bonzini9156df12012-10-18 16:49:17 +02001290 }
1291
Kevin Wolf8ee79e72014-06-04 15:09:35 +02001292 if (!bs->drv || !bs->drv->supports_backing) {
1293 ret = -EINVAL;
1294 error_setg(errp, "Driver doesn't support backing files");
1295 QDECREF(options);
1296 goto free_exit;
1297 }
1298
Markus Armbrustere4e99862014-10-07 13:59:03 +02001299 backing_hd = bdrv_new();
Fam Zheng8d24cce2014-05-23 21:29:45 +08001300
Kevin Wolfc5f6e492014-11-25 18:12:42 +01001301 if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1302 qdict_put(options, "driver", qstring_from_str(bs->backing_format));
Paolo Bonzini9156df12012-10-18 16:49:17 +02001303 }
1304
Max Reitzf67503e2014-02-18 18:33:05 +01001305 assert(bs->backing_hd == NULL);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001306 ret = bdrv_open(&backing_hd,
Max Reitzddf56362014-02-18 18:33:06 +01001307 *backing_filename ? backing_filename : NULL, NULL, options,
Kevin Wolfc5f6e492014-11-25 18:12:42 +01001308 bdrv_backing_flags(bs->open_flags), NULL, &local_err);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001309 if (ret < 0) {
Fam Zheng8d24cce2014-05-23 21:29:45 +08001310 bdrv_unref(backing_hd);
1311 backing_hd = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001312 bs->open_flags |= BDRV_O_NO_BACKING;
Fam Zhengb04b6b62013-11-08 11:26:49 +08001313 error_setg(errp, "Could not open backing file: %s",
1314 error_get_pretty(local_err));
1315 error_free(local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001316 goto free_exit;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001317 }
Fam Zheng8d24cce2014-05-23 21:29:45 +08001318 bdrv_set_backing_hd(bs, backing_hd);
Peter Feinerd80ac652014-01-08 19:43:25 +00001319
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001320free_exit:
1321 g_free(backing_filename);
1322 return ret;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001323}
1324
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001325/*
Max Reitzda557aa2013-12-20 19:28:11 +01001326 * Opens a disk image whose options are given as BlockdevRef in another block
1327 * device's options.
1328 *
Max Reitzda557aa2013-12-20 19:28:11 +01001329 * If allow_none is true, no image will be opened if filename is false and no
1330 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1331 *
1332 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1333 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1334 * itself, all options starting with "${bdref_key}." are considered part of the
1335 * BlockdevRef.
1336 *
1337 * The BlockdevRef will be removed from the options QDict.
Max Reitzf67503e2014-02-18 18:33:05 +01001338 *
1339 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
Max Reitzda557aa2013-12-20 19:28:11 +01001340 */
1341int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1342 QDict *options, const char *bdref_key, int flags,
Max Reitzf7d9fd82014-02-18 18:33:12 +01001343 bool allow_none, Error **errp)
Max Reitzda557aa2013-12-20 19:28:11 +01001344{
1345 QDict *image_options;
1346 int ret;
1347 char *bdref_key_dot;
1348 const char *reference;
1349
Max Reitzf67503e2014-02-18 18:33:05 +01001350 assert(pbs);
1351 assert(*pbs == NULL);
1352
Max Reitzda557aa2013-12-20 19:28:11 +01001353 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1354 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1355 g_free(bdref_key_dot);
1356
1357 reference = qdict_get_try_str(options, bdref_key);
1358 if (!filename && !reference && !qdict_size(image_options)) {
1359 if (allow_none) {
1360 ret = 0;
1361 } else {
1362 error_setg(errp, "A block device must be specified for \"%s\"",
1363 bdref_key);
1364 ret = -EINVAL;
1365 }
Markus Armbrusterb20e61e2014-05-28 11:16:57 +02001366 QDECREF(image_options);
Max Reitzda557aa2013-12-20 19:28:11 +01001367 goto done;
1368 }
1369
Max Reitzf7d9fd82014-02-18 18:33:12 +01001370 ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
Max Reitzda557aa2013-12-20 19:28:11 +01001371
1372done:
1373 qdict_del(options, bdref_key);
1374 return ret;
1375}
1376
Chen Gang6b8aeca2014-06-23 23:28:23 +08001377int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
Kevin Wolfb9988752014-04-03 12:09:34 +02001378{
1379 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001380 char *tmp_filename = g_malloc0(PATH_MAX + 1);
Kevin Wolfb9988752014-04-03 12:09:34 +02001381 int64_t total_size;
Chunyan Liu83d05212014-06-05 17:20:51 +08001382 QemuOpts *opts = NULL;
Kevin Wolfb9988752014-04-03 12:09:34 +02001383 QDict *snapshot_options;
1384 BlockDriverState *bs_snapshot;
1385 Error *local_err;
1386 int ret;
1387
1388 /* if snapshot, we create a temporary backing file and open it
1389 instead of opening 'filename' directly */
1390
1391 /* Get the required size from the image */
Kevin Wolff1877432014-04-04 17:07:19 +02001392 total_size = bdrv_getlength(bs);
1393 if (total_size < 0) {
Chen Gang6b8aeca2014-06-23 23:28:23 +08001394 ret = total_size;
Kevin Wolff1877432014-04-04 17:07:19 +02001395 error_setg_errno(errp, -total_size, "Could not get image size");
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001396 goto out;
Kevin Wolff1877432014-04-04 17:07:19 +02001397 }
Kevin Wolfb9988752014-04-03 12:09:34 +02001398
1399 /* Create the temporary image */
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001400 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
Kevin Wolfb9988752014-04-03 12:09:34 +02001401 if (ret < 0) {
1402 error_setg_errno(errp, -ret, "Could not get temporary filename");
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001403 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001404 }
1405
Max Reitzef810432014-12-02 18:32:42 +01001406 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
Chunyan Liuc282e1f2014-06-05 17:21:11 +08001407 &error_abort);
Markus Armbruster39101f22015-02-12 16:46:36 +01001408 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
Max Reitzef810432014-12-02 18:32:42 +01001409 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
Chunyan Liu83d05212014-06-05 17:20:51 +08001410 qemu_opts_del(opts);
Kevin Wolfb9988752014-04-03 12:09:34 +02001411 if (ret < 0) {
1412 error_setg_errno(errp, -ret, "Could not create temporary overlay "
1413 "'%s': %s", tmp_filename,
1414 error_get_pretty(local_err));
1415 error_free(local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001416 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001417 }
1418
1419 /* Prepare a new options QDict for the temporary file */
1420 snapshot_options = qdict_new();
1421 qdict_put(snapshot_options, "file.driver",
1422 qstring_from_str("file"));
1423 qdict_put(snapshot_options, "file.filename",
1424 qstring_from_str(tmp_filename));
1425
Markus Armbrustere4e99862014-10-07 13:59:03 +02001426 bs_snapshot = bdrv_new();
Kevin Wolfb9988752014-04-03 12:09:34 +02001427
1428 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
Max Reitzef810432014-12-02 18:32:42 +01001429 flags, &bdrv_qcow2, &local_err);
Kevin Wolfb9988752014-04-03 12:09:34 +02001430 if (ret < 0) {
1431 error_propagate(errp, local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001432 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001433 }
1434
1435 bdrv_append(bs_snapshot, bs);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001436
1437out:
1438 g_free(tmp_filename);
Chen Gang6b8aeca2014-06-23 23:28:23 +08001439 return ret;
Kevin Wolfb9988752014-04-03 12:09:34 +02001440}
1441
Max Reitzda557aa2013-12-20 19:28:11 +01001442/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001443 * Opens a disk image (raw, qcow2, vmdk, ...)
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001444 *
1445 * options is a QDict of options to pass to the block drivers, or NULL for an
1446 * empty set of options. The reference to the QDict belongs to the block layer
1447 * after the call (even on failure), so if the caller intends to reuse the
1448 * dictionary, it needs to use QINCREF() before calling bdrv_open.
Max Reitzf67503e2014-02-18 18:33:05 +01001449 *
1450 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1451 * If it is not NULL, the referenced BDS will be reused.
Max Reitzddf56362014-02-18 18:33:06 +01001452 *
1453 * The reference parameter may be used to specify an existing block device which
1454 * should be opened. If specified, neither options nor a filename may be given,
1455 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001456 */
Max Reitzddf56362014-02-18 18:33:06 +01001457int bdrv_open(BlockDriverState **pbs, const char *filename,
1458 const char *reference, QDict *options, int flags,
1459 BlockDriver *drv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +00001460{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001461 int ret;
Max Reitzf67503e2014-02-18 18:33:05 +01001462 BlockDriverState *file = NULL, *bs;
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001463 const char *drvname;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001464 Error *local_err = NULL;
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001465 int snapshot_flags = 0;
bellard712e7872005-04-28 21:09:32 +00001466
Max Reitzf67503e2014-02-18 18:33:05 +01001467 assert(pbs);
1468
Max Reitzddf56362014-02-18 18:33:06 +01001469 if (reference) {
1470 bool options_non_empty = options ? qdict_size(options) : false;
1471 QDECREF(options);
1472
1473 if (*pbs) {
1474 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1475 "another block device");
1476 return -EINVAL;
1477 }
1478
1479 if (filename || options_non_empty) {
1480 error_setg(errp, "Cannot reference an existing block device with "
1481 "additional options or a new filename");
1482 return -EINVAL;
1483 }
1484
1485 bs = bdrv_lookup_bs(reference, reference, errp);
1486 if (!bs) {
1487 return -ENODEV;
1488 }
1489 bdrv_ref(bs);
1490 *pbs = bs;
1491 return 0;
1492 }
1493
Max Reitzf67503e2014-02-18 18:33:05 +01001494 if (*pbs) {
1495 bs = *pbs;
1496 } else {
Markus Armbrustere4e99862014-10-07 13:59:03 +02001497 bs = bdrv_new();
Max Reitzf67503e2014-02-18 18:33:05 +01001498 }
1499
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001500 /* NULL means an empty set of options */
1501 if (options == NULL) {
1502 options = qdict_new();
1503 }
1504
Kevin Wolf17b005f2014-05-27 10:50:29 +02001505 ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
Kevin Wolf462f5bc2014-05-26 11:39:55 +02001506 if (local_err) {
1507 goto fail;
1508 }
1509
Kevin Wolf76c591b2014-06-04 14:19:44 +02001510 /* Find the right image format driver */
1511 drv = NULL;
1512 drvname = qdict_get_try_str(options, "driver");
1513 if (drvname) {
1514 drv = bdrv_find_format(drvname);
1515 qdict_del(options, "driver");
1516 if (!drv) {
1517 error_setg(errp, "Unknown driver: '%s'", drvname);
1518 ret = -EINVAL;
1519 goto fail;
1520 }
1521 }
1522
1523 assert(drvname || !(flags & BDRV_O_PROTOCOL));
1524 if (drv && !drv->bdrv_file_open) {
1525 /* If the user explicitly wants a format driver here, we'll need to add
1526 * another layer for the protocol in bs->file */
1527 flags &= ~BDRV_O_PROTOCOL;
1528 }
1529
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001530 bs->options = options;
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001531 options = qdict_clone_shallow(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001532
Kevin Wolff4788ad2014-06-03 16:44:19 +02001533 /* Open image file without format layer */
1534 if ((flags & BDRV_O_PROTOCOL) == 0) {
1535 if (flags & BDRV_O_RDWR) {
1536 flags |= BDRV_O_ALLOW_RDWR;
1537 }
1538 if (flags & BDRV_O_SNAPSHOT) {
1539 snapshot_flags = bdrv_temp_snapshot_flags(flags);
1540 flags = bdrv_backing_flags(flags);
1541 }
1542
1543 assert(file == NULL);
1544 ret = bdrv_open_image(&file, filename, options, "file",
1545 bdrv_inherited_flags(flags),
1546 true, &local_err);
1547 if (ret < 0) {
Max Reitz5469a2a2014-02-18 18:33:10 +01001548 goto fail;
1549 }
1550 }
1551
Kevin Wolf76c591b2014-06-04 14:19:44 +02001552 /* Image format probing */
Kevin Wolf38f3ef52014-11-20 16:27:12 +01001553 bs->probed = !drv;
Kevin Wolf76c591b2014-06-04 14:19:44 +02001554 if (!drv && file) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001555 ret = find_image_format(file, filename, &drv, &local_err);
1556 if (ret < 0) {
Kevin Wolf8bfea152014-04-11 19:16:36 +02001557 goto fail;
Max Reitz2a05cbe2013-12-20 19:28:10 +01001558 }
Kevin Wolf76c591b2014-06-04 14:19:44 +02001559 } else if (!drv) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001560 error_setg(errp, "Must specify either driver or file");
1561 ret = -EINVAL;
Kevin Wolf8bfea152014-04-11 19:16:36 +02001562 goto fail;
Kevin Wolff500a6d2012-11-12 17:35:27 +01001563 }
1564
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001565 /* Open the image */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001566 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001567 if (ret < 0) {
Kevin Wolf8bfea152014-04-11 19:16:36 +02001568 goto fail;
Christoph Hellwig69873072010-01-20 18:13:25 +01001569 }
1570
Max Reitz2a05cbe2013-12-20 19:28:10 +01001571 if (file && (bs->file != file)) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001572 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001573 file = NULL;
1574 }
1575
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001576 /* If there is a backing file, use it */
Paolo Bonzini9156df12012-10-18 16:49:17 +02001577 if ((flags & BDRV_O_NO_BACKING) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001578 QDict *backing_options;
1579
Benoît Canet5726d872013-09-25 13:30:01 +02001580 qdict_extract_subqdict(options, &backing_options, "backing.");
Max Reitz34b5d2c2013-09-05 14:45:29 +02001581 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001582 if (ret < 0) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001583 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001584 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001585 }
1586
Max Reitz91af7012014-07-18 20:24:56 +02001587 bdrv_refresh_filename(bs);
1588
Kevin Wolfb9988752014-04-03 12:09:34 +02001589 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1590 * temporary snapshot afterwards. */
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001591 if (snapshot_flags) {
Chen Gang6b8aeca2014-06-23 23:28:23 +08001592 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
Kevin Wolfb9988752014-04-03 12:09:34 +02001593 if (local_err) {
Kevin Wolfb9988752014-04-03 12:09:34 +02001594 goto close_and_fail;
1595 }
1596 }
1597
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001598 /* Check if any unknown options were used */
Max Reitz5acd9d82014-02-18 18:33:11 +01001599 if (options && (qdict_size(options) != 0)) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001600 const QDictEntry *entry = qdict_first(options);
Max Reitz5acd9d82014-02-18 18:33:11 +01001601 if (flags & BDRV_O_PROTOCOL) {
1602 error_setg(errp, "Block protocol '%s' doesn't support the option "
1603 "'%s'", drv->format_name, entry->key);
1604 } else {
1605 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1606 "support the option '%s'", drv->format_name,
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001607 bdrv_get_device_name(bs), entry->key);
Max Reitz5acd9d82014-02-18 18:33:11 +01001608 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001609
1610 ret = -EINVAL;
1611 goto close_and_fail;
1612 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001613
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001614 if (!bdrv_key_required(bs)) {
Markus Armbrustera7f53e22014-10-07 13:59:25 +02001615 if (bs->blk) {
1616 blk_dev_change_media_cb(bs->blk, true);
1617 }
Markus Armbrusterc3adb582014-03-14 09:22:48 +01001618 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1619 && !runstate_check(RUN_STATE_INMIGRATE)
1620 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1621 error_setg(errp,
1622 "Guest must be stopped for opening of encrypted image");
1623 ret = -EBUSY;
1624 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001625 }
1626
Markus Armbrusterc3adb582014-03-14 09:22:48 +01001627 QDECREF(options);
Max Reitzf67503e2014-02-18 18:33:05 +01001628 *pbs = bs;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001629 return 0;
1630
Kevin Wolf8bfea152014-04-11 19:16:36 +02001631fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +01001632 if (file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001633 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001634 }
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001635 QDECREF(bs->options);
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001636 QDECREF(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001637 bs->options = NULL;
Max Reitzf67503e2014-02-18 18:33:05 +01001638 if (!*pbs) {
1639 /* If *pbs is NULL, a new BDS has been created in this function and
1640 needs to be freed now. Otherwise, it does not need to be closed,
1641 since it has not really been opened yet. */
1642 bdrv_unref(bs);
1643 }
Markus Armbruster84d18f02014-01-30 15:07:28 +01001644 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001645 error_propagate(errp, local_err);
1646 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001647 return ret;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001648
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001649close_and_fail:
Max Reitzf67503e2014-02-18 18:33:05 +01001650 /* See fail path, but now the BDS has to be always closed */
1651 if (*pbs) {
1652 bdrv_close(bs);
1653 } else {
1654 bdrv_unref(bs);
1655 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001656 QDECREF(options);
Markus Armbruster84d18f02014-01-30 15:07:28 +01001657 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001658 error_propagate(errp, local_err);
1659 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001660 return ret;
1661}
1662
Jeff Codye971aa12012-09-20 15:13:19 -04001663typedef struct BlockReopenQueueEntry {
1664 bool prepared;
1665 BDRVReopenState state;
1666 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1667} BlockReopenQueueEntry;
1668
1669/*
1670 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1671 * reopen of multiple devices.
1672 *
1673 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1674 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1675 * be created and initialized. This newly created BlockReopenQueue should be
1676 * passed back in for subsequent calls that are intended to be of the same
1677 * atomic 'set'.
1678 *
1679 * bs is the BlockDriverState to add to the reopen queue.
1680 *
1681 * flags contains the open flags for the associated bs
1682 *
1683 * returns a pointer to bs_queue, which is either the newly allocated
1684 * bs_queue, or the existing bs_queue being used.
1685 *
1686 */
1687BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1688 BlockDriverState *bs, int flags)
1689{
1690 assert(bs != NULL);
1691
1692 BlockReopenQueueEntry *bs_entry;
1693 if (bs_queue == NULL) {
1694 bs_queue = g_new0(BlockReopenQueue, 1);
1695 QSIMPLEQ_INIT(bs_queue);
1696 }
1697
Kevin Wolff1f25a22014-04-25 19:04:55 +02001698 /* bdrv_open() masks this flag out */
1699 flags &= ~BDRV_O_PROTOCOL;
1700
Jeff Codye971aa12012-09-20 15:13:19 -04001701 if (bs->file) {
Kevin Wolff1f25a22014-04-25 19:04:55 +02001702 bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
Jeff Codye971aa12012-09-20 15:13:19 -04001703 }
1704
1705 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1706 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1707
1708 bs_entry->state.bs = bs;
1709 bs_entry->state.flags = flags;
1710
1711 return bs_queue;
1712}
1713
1714/*
1715 * Reopen multiple BlockDriverStates atomically & transactionally.
1716 *
1717 * The queue passed in (bs_queue) must have been built up previous
1718 * via bdrv_reopen_queue().
1719 *
1720 * Reopens all BDS specified in the queue, with the appropriate
1721 * flags. All devices are prepared for reopen, and failure of any
1722 * device will cause all device changes to be abandonded, and intermediate
1723 * data cleaned up.
1724 *
1725 * If all devices prepare successfully, then the changes are committed
1726 * to all devices.
1727 *
1728 */
1729int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1730{
1731 int ret = -1;
1732 BlockReopenQueueEntry *bs_entry, *next;
1733 Error *local_err = NULL;
1734
1735 assert(bs_queue != NULL);
1736
1737 bdrv_drain_all();
1738
1739 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1740 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1741 error_propagate(errp, local_err);
1742 goto cleanup;
1743 }
1744 bs_entry->prepared = true;
1745 }
1746
1747 /* If we reach this point, we have success and just need to apply the
1748 * changes
1749 */
1750 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1751 bdrv_reopen_commit(&bs_entry->state);
1752 }
1753
1754 ret = 0;
1755
1756cleanup:
1757 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1758 if (ret && bs_entry->prepared) {
1759 bdrv_reopen_abort(&bs_entry->state);
1760 }
1761 g_free(bs_entry);
1762 }
1763 g_free(bs_queue);
1764 return ret;
1765}
1766
1767
1768/* Reopen a single BlockDriverState with the specified flags. */
1769int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1770{
1771 int ret = -1;
1772 Error *local_err = NULL;
1773 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1774
1775 ret = bdrv_reopen_multiple(queue, &local_err);
1776 if (local_err != NULL) {
1777 error_propagate(errp, local_err);
1778 }
1779 return ret;
1780}
1781
1782
1783/*
1784 * Prepares a BlockDriverState for reopen. All changes are staged in the
1785 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1786 * the block driver layer .bdrv_reopen_prepare()
1787 *
1788 * bs is the BlockDriverState to reopen
1789 * flags are the new open flags
1790 * queue is the reopen queue
1791 *
1792 * Returns 0 on success, non-zero on error. On error errp will be set
1793 * as well.
1794 *
1795 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1796 * It is the responsibility of the caller to then call the abort() or
1797 * commit() for any other BDS that have been left in a prepare() state
1798 *
1799 */
1800int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1801 Error **errp)
1802{
1803 int ret = -1;
1804 Error *local_err = NULL;
1805 BlockDriver *drv;
1806
1807 assert(reopen_state != NULL);
1808 assert(reopen_state->bs->drv != NULL);
1809 drv = reopen_state->bs->drv;
1810
1811 /* if we are to stay read-only, do not allow permission change
1812 * to r/w */
1813 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1814 reopen_state->flags & BDRV_O_RDWR) {
1815 error_set(errp, QERR_DEVICE_IS_READ_ONLY,
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001816 bdrv_get_device_name(reopen_state->bs));
Jeff Codye971aa12012-09-20 15:13:19 -04001817 goto error;
1818 }
1819
1820
1821 ret = bdrv_flush(reopen_state->bs);
1822 if (ret) {
1823 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1824 strerror(-ret));
1825 goto error;
1826 }
1827
1828 if (drv->bdrv_reopen_prepare) {
1829 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1830 if (ret) {
1831 if (local_err != NULL) {
1832 error_propagate(errp, local_err);
1833 } else {
Luiz Capitulinod8b68952013-06-10 11:29:27 -04001834 error_setg(errp, "failed while preparing to reopen image '%s'",
1835 reopen_state->bs->filename);
Jeff Codye971aa12012-09-20 15:13:19 -04001836 }
1837 goto error;
1838 }
1839 } else {
1840 /* It is currently mandatory to have a bdrv_reopen_prepare()
1841 * handler for each supported drv. */
1842 error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001843 drv->format_name, bdrv_get_device_name(reopen_state->bs),
Jeff Codye971aa12012-09-20 15:13:19 -04001844 "reopening of file");
1845 ret = -1;
1846 goto error;
1847 }
1848
1849 ret = 0;
1850
1851error:
1852 return ret;
1853}
1854
1855/*
1856 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1857 * makes them final by swapping the staging BlockDriverState contents into
1858 * the active BlockDriverState contents.
1859 */
1860void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1861{
1862 BlockDriver *drv;
1863
1864 assert(reopen_state != NULL);
1865 drv = reopen_state->bs->drv;
1866 assert(drv != NULL);
1867
1868 /* If there are any driver level actions to take */
1869 if (drv->bdrv_reopen_commit) {
1870 drv->bdrv_reopen_commit(reopen_state);
1871 }
1872
1873 /* set BDS specific flags now */
1874 reopen_state->bs->open_flags = reopen_state->flags;
1875 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1876 BDRV_O_CACHE_WB);
1877 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
Kevin Wolf355ef4a2013-12-11 20:14:09 +01001878
Kevin Wolf3baca892014-07-16 17:48:16 +02001879 bdrv_refresh_limits(reopen_state->bs, NULL);
Jeff Codye971aa12012-09-20 15:13:19 -04001880}
1881
1882/*
1883 * Abort the reopen, and delete and free the staged changes in
1884 * reopen_state
1885 */
1886void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1887{
1888 BlockDriver *drv;
1889
1890 assert(reopen_state != NULL);
1891 drv = reopen_state->bs->drv;
1892 assert(drv != NULL);
1893
1894 if (drv->bdrv_reopen_abort) {
1895 drv->bdrv_reopen_abort(reopen_state);
1896 }
1897}
1898
1899
bellardfc01f7e2003-06-30 10:03:06 +00001900void bdrv_close(BlockDriverState *bs)
1901{
Max Reitz33384422014-06-20 21:57:33 +02001902 BdrvAioNotifier *ban, *ban_next;
1903
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001904 if (bs->job) {
1905 block_job_cancel_sync(bs->job);
1906 }
Stefan Hajnoczi58fda172013-07-02 15:36:25 +02001907 bdrv_drain_all(); /* complete I/O */
1908 bdrv_flush(bs);
1909 bdrv_drain_all(); /* in case flush left pending I/O */
Paolo Bonzinid7d512f2012-08-23 11:20:36 +02001910 notifier_list_notify(&bs->close_notifiers, bs);
Kevin Wolf7094f122012-04-11 11:06:37 +02001911
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001912 if (bs->drv) {
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001913 if (bs->backing_hd) {
Fam Zheng826b6ca2014-05-23 21:29:47 +08001914 BlockDriverState *backing_hd = bs->backing_hd;
1915 bdrv_set_backing_hd(bs, NULL);
1916 bdrv_unref(backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001917 }
bellardea2384d2004-08-01 21:59:26 +00001918 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -05001919 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +00001920 bs->opaque = NULL;
1921 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001922 bs->copy_on_read = 0;
Paolo Bonzinia275fa42012-05-08 16:51:43 +02001923 bs->backing_file[0] = '\0';
1924 bs->backing_format[0] = '\0';
Paolo Bonzini64058752012-05-08 16:51:49 +02001925 bs->total_sectors = 0;
1926 bs->encrypted = 0;
1927 bs->valid_key = 0;
1928 bs->sg = 0;
Asias He0d51b4d2013-08-22 15:24:14 +08001929 bs->zero_beyond_eof = false;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001930 QDECREF(bs->options);
1931 bs->options = NULL;
Max Reitz91af7012014-07-18 20:24:56 +02001932 QDECREF(bs->full_open_options);
1933 bs->full_open_options = NULL;
bellardb3380822004-03-14 21:38:54 +00001934
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001935 if (bs->file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001936 bdrv_unref(bs->file);
Paolo Bonzini0ac93772012-05-08 16:51:44 +02001937 bs->file = NULL;
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001938 }
bellardb3380822004-03-14 21:38:54 +00001939 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001940
Markus Armbrustera7f53e22014-10-07 13:59:25 +02001941 if (bs->blk) {
1942 blk_dev_change_media_cb(bs->blk, false);
1943 }
Pavel Hrdina9ca11152012-08-09 12:44:48 +02001944
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001945 /*throttling disk I/O limits*/
1946 if (bs->io_limits_enabled) {
1947 bdrv_io_limits_disable(bs);
1948 }
Max Reitz33384422014-06-20 21:57:33 +02001949
1950 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1951 g_free(ban);
1952 }
1953 QLIST_INIT(&bs->aio_notifiers);
bellardb3380822004-03-14 21:38:54 +00001954}
1955
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001956void bdrv_close_all(void)
1957{
1958 BlockDriverState *bs;
1959
Benoît Canetdc364f42014-01-23 21:31:32 +01001960 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02001961 AioContext *aio_context = bdrv_get_aio_context(bs);
1962
1963 aio_context_acquire(aio_context);
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001964 bdrv_close(bs);
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02001965 aio_context_release(aio_context);
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001966 }
1967}
1968
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001969/* Check if any requests are in-flight (including throttled requests) */
1970static bool bdrv_requests_pending(BlockDriverState *bs)
1971{
1972 if (!QLIST_EMPTY(&bs->tracked_requests)) {
1973 return true;
1974 }
Benoît Canetcc0681c2013-09-02 14:14:39 +02001975 if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
1976 return true;
1977 }
1978 if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001979 return true;
1980 }
1981 if (bs->file && bdrv_requests_pending(bs->file)) {
1982 return true;
1983 }
1984 if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
1985 return true;
1986 }
1987 return false;
1988}
1989
Stefan Hajnoczi5b98db02014-10-21 12:03:55 +01001990static bool bdrv_drain_one(BlockDriverState *bs)
1991{
1992 bool bs_busy;
1993
1994 bdrv_flush_io_queue(bs);
1995 bdrv_start_throttled_reqs(bs);
1996 bs_busy = bdrv_requests_pending(bs);
1997 bs_busy |= aio_poll(bdrv_get_aio_context(bs), bs_busy);
1998 return bs_busy;
1999}
2000
2001/*
2002 * Wait for pending requests to complete on a single BlockDriverState subtree
2003 *
2004 * See the warning in bdrv_drain_all(). This function can only be called if
2005 * you are sure nothing can generate I/O because you have op blockers
2006 * installed.
2007 *
2008 * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
2009 * AioContext.
2010 */
2011void bdrv_drain(BlockDriverState *bs)
2012{
2013 while (bdrv_drain_one(bs)) {
2014 /* Keep iterating */
2015 }
2016}
2017
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002018/*
2019 * Wait for pending requests to complete across all BlockDriverStates
2020 *
2021 * This function does not flush data to disk, use bdrv_flush_all() for that
2022 * after calling this function.
Zhi Yong Wu4c355d52012-04-12 14:00:57 +02002023 *
2024 * Note that completion of an asynchronous I/O operation can trigger any
2025 * number of other I/O operations on other devices---for example a coroutine
2026 * can be arbitrarily complex and a constant flow of I/O can come until the
2027 * coroutine is complete. Because of this, it is not possible to have a
2028 * function to drain a single device's I/O queue.
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002029 */
2030void bdrv_drain_all(void)
2031{
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02002032 /* Always run first iteration so any pending completion BHs run */
2033 bool busy = true;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002034 BlockDriverState *bs;
2035
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02002036 while (busy) {
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002037 busy = false;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002038
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002039 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2040 AioContext *aio_context = bdrv_get_aio_context(bs);
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002041
2042 aio_context_acquire(aio_context);
Stefan Hajnoczi5b98db02014-10-21 12:03:55 +01002043 busy |= bdrv_drain_one(bs);
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002044 aio_context_release(aio_context);
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002045 }
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002046 }
2047}
2048
Benoît Canetdc364f42014-01-23 21:31:32 +01002049/* make a BlockDriverState anonymous by removing from bdrv_state and
2050 * graph_bdrv_state list.
Ryan Harperd22b2f42011-03-29 20:51:47 -05002051 Also, NULL terminate the device_name to prevent double remove */
2052void bdrv_make_anon(BlockDriverState *bs)
2053{
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002054 /*
2055 * Take care to remove bs from bdrv_states only when it's actually
2056 * in it. Note that bs->device_list.tqe_prev is initially null,
2057 * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish
2058 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
2059 * resetting it to null on remove.
2060 */
2061 if (bs->device_list.tqe_prev) {
Benoît Canetdc364f42014-01-23 21:31:32 +01002062 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002063 bs->device_list.tqe_prev = NULL;
Ryan Harperd22b2f42011-03-29 20:51:47 -05002064 }
Benoît Canetdc364f42014-01-23 21:31:32 +01002065 if (bs->node_name[0] != '\0') {
2066 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
2067 }
2068 bs->node_name[0] = '\0';
Ryan Harperd22b2f42011-03-29 20:51:47 -05002069}
2070
Paolo Bonzinie023b2e2012-05-08 16:51:41 +02002071static void bdrv_rebind(BlockDriverState *bs)
2072{
2073 if (bs->drv && bs->drv->bdrv_rebind) {
2074 bs->drv->bdrv_rebind(bs);
2075 }
2076}
2077
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002078static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
2079 BlockDriverState *bs_src)
2080{
2081 /* move some fields that need to stay attached to the device */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002082
2083 /* dev info */
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01002084 bs_dest->guest_block_size = bs_src->guest_block_size;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002085 bs_dest->copy_on_read = bs_src->copy_on_read;
2086
2087 bs_dest->enable_write_cache = bs_src->enable_write_cache;
2088
Benoît Canetcc0681c2013-09-02 14:14:39 +02002089 /* i/o throttled req */
2090 memcpy(&bs_dest->throttle_state,
2091 &bs_src->throttle_state,
2092 sizeof(ThrottleState));
2093 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
2094 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002095 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
2096
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002097 /* r/w error */
2098 bs_dest->on_read_error = bs_src->on_read_error;
2099 bs_dest->on_write_error = bs_src->on_write_error;
2100
2101 /* i/o status */
2102 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
2103 bs_dest->iostatus = bs_src->iostatus;
2104
2105 /* dirty bitmap */
Fam Zhenge4654d22013-11-13 18:29:43 +08002106 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002107
Fam Zheng9fcb0252013-08-23 09:14:46 +08002108 /* reference count */
2109 bs_dest->refcnt = bs_src->refcnt;
2110
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002111 /* job */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002112 bs_dest->job = bs_src->job;
2113
2114 /* keep the same entry in bdrv_states */
Benoît Canetdc364f42014-01-23 21:31:32 +01002115 bs_dest->device_list = bs_src->device_list;
Markus Armbruster7e7d56d2014-10-07 13:59:05 +02002116 bs_dest->blk = bs_src->blk;
2117
Fam Zhengfbe40ff2014-05-23 21:29:42 +08002118 memcpy(bs_dest->op_blockers, bs_src->op_blockers,
2119 sizeof(bs_dest->op_blockers));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002120}
2121
2122/*
2123 * Swap bs contents for two image chains while they are live,
2124 * while keeping required fields on the BlockDriverState that is
2125 * actually attached to a device.
2126 *
2127 * This will modify the BlockDriverState fields, and swap contents
2128 * between bs_new and bs_old. Both bs_new and bs_old are modified.
2129 *
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002130 * bs_new must not be attached to a BlockBackend.
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002131 *
2132 * This function does not create any image files.
2133 */
2134void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
2135{
2136 BlockDriverState tmp;
2137
Benoît Canet90ce8a02014-03-05 23:48:29 +01002138 /* The code needs to swap the node_name but simply swapping node_list won't
2139 * work so first remove the nodes from the graph list, do the swap then
2140 * insert them back if needed.
2141 */
2142 if (bs_new->node_name[0] != '\0') {
2143 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
2144 }
2145 if (bs_old->node_name[0] != '\0') {
2146 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
2147 }
2148
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002149 /* bs_new must be unattached and shouldn't have anything fancy enabled */
Markus Armbruster7e7d56d2014-10-07 13:59:05 +02002150 assert(!bs_new->blk);
Fam Zhenge4654d22013-11-13 18:29:43 +08002151 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002152 assert(bs_new->job == NULL);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002153 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02002154 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002155
2156 tmp = *bs_new;
2157 *bs_new = *bs_old;
2158 *bs_old = tmp;
2159
2160 /* there are some fields that should not be swapped, move them back */
2161 bdrv_move_feature_fields(&tmp, bs_old);
2162 bdrv_move_feature_fields(bs_old, bs_new);
2163 bdrv_move_feature_fields(bs_new, &tmp);
2164
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002165 /* bs_new must remain unattached */
Markus Armbruster7e7d56d2014-10-07 13:59:05 +02002166 assert(!bs_new->blk);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002167
2168 /* Check a few fields that should remain attached to the device */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002169 assert(bs_new->job == NULL);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002170 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02002171 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002172
Benoît Canet90ce8a02014-03-05 23:48:29 +01002173 /* insert the nodes back into the graph node list if needed */
2174 if (bs_new->node_name[0] != '\0') {
2175 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
2176 }
2177 if (bs_old->node_name[0] != '\0') {
2178 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
2179 }
2180
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002181 bdrv_rebind(bs_new);
2182 bdrv_rebind(bs_old);
2183}
2184
Jeff Cody8802d1f2012-02-28 15:54:06 -05002185/*
2186 * Add new bs contents at the top of an image chain while the chain is
2187 * live, while keeping required fields on the top layer.
2188 *
2189 * This will modify the BlockDriverState fields, and swap contents
2190 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2191 *
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002192 * bs_new must not be attached to a BlockBackend.
Jeff Codyf6801b82012-03-27 16:30:19 -04002193 *
Jeff Cody8802d1f2012-02-28 15:54:06 -05002194 * This function does not create any image files.
2195 */
2196void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2197{
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002198 bdrv_swap(bs_new, bs_top);
Jeff Cody8802d1f2012-02-28 15:54:06 -05002199
2200 /* The contents of 'tmp' will become bs_top, as we are
2201 * swapping bs_new and bs_top contents. */
Fam Zheng8d24cce2014-05-23 21:29:45 +08002202 bdrv_set_backing_hd(bs_top, bs_new);
Jeff Cody8802d1f2012-02-28 15:54:06 -05002203}
2204
Fam Zheng4f6fd342013-08-23 09:14:47 +08002205static void bdrv_delete(BlockDriverState *bs)
bellardb3380822004-03-14 21:38:54 +00002206{
Paolo Bonzini3e914652012-03-30 13:17:11 +02002207 assert(!bs->job);
Fam Zheng3718d8a2014-05-23 21:29:43 +08002208 assert(bdrv_op_blocker_is_empty(bs));
Fam Zheng4f6fd342013-08-23 09:14:47 +08002209 assert(!bs->refcnt);
Fam Zhenge4654d22013-11-13 18:29:43 +08002210 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Markus Armbruster18846de2010-06-29 16:58:30 +02002211
Stefan Hajnoczie1b5c522013-06-27 15:32:26 +02002212 bdrv_close(bs);
2213
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002214 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -05002215 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +00002216
Anthony Liguori7267c092011-08-20 22:09:37 -05002217 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +00002218}
2219
aliguorie97fc192009-04-21 23:11:50 +00002220/*
2221 * Run consistency checks on an image
2222 *
Kevin Wolfe076f332010-06-29 11:43:13 +02002223 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +02002224 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +02002225 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +00002226 */
Kevin Wolf4534ff52012-05-11 16:07:02 +02002227int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
aliguorie97fc192009-04-21 23:11:50 +00002228{
Max Reitz908bcd52014-08-07 22:47:55 +02002229 if (bs->drv == NULL) {
2230 return -ENOMEDIUM;
2231 }
aliguorie97fc192009-04-21 23:11:50 +00002232 if (bs->drv->bdrv_check == NULL) {
2233 return -ENOTSUP;
2234 }
2235
Kevin Wolfe076f332010-06-29 11:43:13 +02002236 memset(res, 0, sizeof(*res));
Kevin Wolf4534ff52012-05-11 16:07:02 +02002237 return bs->drv->bdrv_check(bs, res, fix);
aliguorie97fc192009-04-21 23:11:50 +00002238}
2239
Kevin Wolf8a426612010-07-16 17:17:01 +02002240#define COMMIT_BUF_SECTORS 2048
2241
bellard33e39632003-07-06 17:15:21 +00002242/* commit COW file into the raw image */
2243int bdrv_commit(BlockDriverState *bs)
2244{
bellard19cb3732006-08-19 11:45:59 +00002245 BlockDriver *drv = bs->drv;
Jeff Cody72706ea2014-01-24 09:02:35 -05002246 int64_t sector, total_sectors, length, backing_length;
Kevin Wolf8a426612010-07-16 17:17:01 +02002247 int n, ro, open_flags;
Jeff Cody0bce5972012-09-20 15:13:34 -04002248 int ret = 0;
Jeff Cody72706ea2014-01-24 09:02:35 -05002249 uint8_t *buf = NULL;
bellard33e39632003-07-06 17:15:21 +00002250
bellard19cb3732006-08-19 11:45:59 +00002251 if (!drv)
2252 return -ENOMEDIUM;
Liu Yuan6bb45152014-09-01 13:35:21 +08002253
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002254 if (!bs->backing_hd) {
2255 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +00002256 }
2257
Fam Zhengbb000212014-09-11 13:14:00 +08002258 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2259 bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
Stefan Hajnoczi2d3735d2012-01-18 14:40:41 +00002260 return -EBUSY;
2261 }
2262
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002263 ro = bs->backing_hd->read_only;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002264 open_flags = bs->backing_hd->open_flags;
2265
2266 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002267 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2268 return -EACCES;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002269 }
bellard33e39632003-07-06 17:15:21 +00002270 }
bellardea2384d2004-08-01 21:59:26 +00002271
Jeff Cody72706ea2014-01-24 09:02:35 -05002272 length = bdrv_getlength(bs);
2273 if (length < 0) {
2274 ret = length;
2275 goto ro_cleanup;
2276 }
2277
2278 backing_length = bdrv_getlength(bs->backing_hd);
2279 if (backing_length < 0) {
2280 ret = backing_length;
2281 goto ro_cleanup;
2282 }
2283
2284 /* If our top snapshot is larger than the backing file image,
2285 * grow the backing file image if possible. If not possible,
2286 * we must return an error */
2287 if (length > backing_length) {
2288 ret = bdrv_truncate(bs->backing_hd, length);
2289 if (ret < 0) {
2290 goto ro_cleanup;
2291 }
2292 }
2293
2294 total_sectors = length >> BDRV_SECTOR_BITS;
Kevin Wolf857d4f42014-05-20 13:16:51 +02002295
2296 /* qemu_try_blockalign() for bs will choose an alignment that works for
2297 * bs->backing_hd as well, so no need to compare the alignment manually. */
2298 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2299 if (buf == NULL) {
2300 ret = -ENOMEM;
2301 goto ro_cleanup;
2302 }
bellardea2384d2004-08-01 21:59:26 +00002303
Kevin Wolf8a426612010-07-16 17:17:01 +02002304 for (sector = 0; sector < total_sectors; sector += n) {
Paolo Bonzinid6636402013-09-04 19:00:25 +02002305 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2306 if (ret < 0) {
2307 goto ro_cleanup;
2308 }
2309 if (ret) {
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002310 ret = bdrv_read(bs, sector, buf, n);
2311 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002312 goto ro_cleanup;
2313 }
2314
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002315 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2316 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002317 goto ro_cleanup;
2318 }
bellardea2384d2004-08-01 21:59:26 +00002319 }
2320 }
bellard95389c82005-12-18 18:28:15 +00002321
Christoph Hellwig1d449522010-01-17 12:32:30 +01002322 if (drv->bdrv_make_empty) {
2323 ret = drv->bdrv_make_empty(bs);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002324 if (ret < 0) {
2325 goto ro_cleanup;
2326 }
Christoph Hellwig1d449522010-01-17 12:32:30 +01002327 bdrv_flush(bs);
2328 }
bellard95389c82005-12-18 18:28:15 +00002329
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002330 /*
2331 * Make sure all data we wrote to the backing device is actually
2332 * stable on disk.
2333 */
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002334 if (bs->backing_hd) {
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002335 bdrv_flush(bs->backing_hd);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002336 }
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002337
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002338 ret = 0;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002339ro_cleanup:
Kevin Wolf857d4f42014-05-20 13:16:51 +02002340 qemu_vfree(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002341
2342 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002343 /* ignoring error return here */
2344 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002345 }
2346
Christoph Hellwig1d449522010-01-17 12:32:30 +01002347 return ret;
bellard33e39632003-07-06 17:15:21 +00002348}
2349
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002350int bdrv_commit_all(void)
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002351{
2352 BlockDriverState *bs;
2353
Benoît Canetdc364f42014-01-23 21:31:32 +01002354 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002355 AioContext *aio_context = bdrv_get_aio_context(bs);
2356
2357 aio_context_acquire(aio_context);
Jeff Cody272d2d82013-02-26 09:55:48 -05002358 if (bs->drv && bs->backing_hd) {
2359 int ret = bdrv_commit(bs);
2360 if (ret < 0) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002361 aio_context_release(aio_context);
Jeff Cody272d2d82013-02-26 09:55:48 -05002362 return ret;
2363 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002364 }
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002365 aio_context_release(aio_context);
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002366 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002367 return 0;
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002368}
2369
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002370/**
2371 * Remove an active request from the tracked requests list
2372 *
2373 * This function should be called when a tracked request is completing.
2374 */
2375static void tracked_request_end(BdrvTrackedRequest *req)
2376{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002377 if (req->serialising) {
2378 req->bs->serialising_in_flight--;
2379 }
2380
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002381 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002382 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002383}
2384
2385/**
2386 * Add an active request to the tracked requests list
2387 */
2388static void tracked_request_begin(BdrvTrackedRequest *req,
2389 BlockDriverState *bs,
Kevin Wolf793ed472013-12-03 15:31:25 +01002390 int64_t offset,
2391 unsigned int bytes, bool is_write)
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002392{
2393 *req = (BdrvTrackedRequest){
2394 .bs = bs,
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002395 .offset = offset,
2396 .bytes = bytes,
2397 .is_write = is_write,
2398 .co = qemu_coroutine_self(),
2399 .serialising = false,
Kevin Wolf73271452013-12-04 17:08:50 +01002400 .overlap_offset = offset,
2401 .overlap_bytes = bytes,
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002402 };
2403
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002404 qemu_co_queue_init(&req->wait_queue);
2405
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002406 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
2407}
2408
Kevin Wolfe96126f2014-02-08 10:42:18 +01002409static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002410{
Kevin Wolf73271452013-12-04 17:08:50 +01002411 int64_t overlap_offset = req->offset & ~(align - 1);
Kevin Wolfe96126f2014-02-08 10:42:18 +01002412 unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
2413 - overlap_offset;
Kevin Wolf73271452013-12-04 17:08:50 +01002414
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002415 if (!req->serialising) {
2416 req->bs->serialising_in_flight++;
2417 req->serialising = true;
2418 }
Kevin Wolf73271452013-12-04 17:08:50 +01002419
2420 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
2421 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002422}
2423
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002424/**
2425 * Round a region to cluster boundaries
2426 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01002427void bdrv_round_to_clusters(BlockDriverState *bs,
2428 int64_t sector_num, int nb_sectors,
2429 int64_t *cluster_sector_num,
2430 int *cluster_nb_sectors)
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002431{
2432 BlockDriverInfo bdi;
2433
2434 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
2435 *cluster_sector_num = sector_num;
2436 *cluster_nb_sectors = nb_sectors;
2437 } else {
2438 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
2439 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
2440 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
2441 nb_sectors, c);
2442 }
2443}
2444
Kevin Wolf73271452013-12-04 17:08:50 +01002445static int bdrv_get_cluster_size(BlockDriverState *bs)
Kevin Wolf793ed472013-12-03 15:31:25 +01002446{
2447 BlockDriverInfo bdi;
Kevin Wolf73271452013-12-04 17:08:50 +01002448 int ret;
Kevin Wolf793ed472013-12-03 15:31:25 +01002449
Kevin Wolf73271452013-12-04 17:08:50 +01002450 ret = bdrv_get_info(bs, &bdi);
2451 if (ret < 0 || bdi.cluster_size == 0) {
2452 return bs->request_alignment;
Kevin Wolf793ed472013-12-03 15:31:25 +01002453 } else {
Kevin Wolf73271452013-12-04 17:08:50 +01002454 return bdi.cluster_size;
Kevin Wolf793ed472013-12-03 15:31:25 +01002455 }
2456}
2457
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002458static bool tracked_request_overlaps(BdrvTrackedRequest *req,
Kevin Wolf793ed472013-12-03 15:31:25 +01002459 int64_t offset, unsigned int bytes)
2460{
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002461 /* aaaa bbbb */
Kevin Wolf73271452013-12-04 17:08:50 +01002462 if (offset >= req->overlap_offset + req->overlap_bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002463 return false;
2464 }
2465 /* bbbb aaaa */
Kevin Wolf73271452013-12-04 17:08:50 +01002466 if (req->overlap_offset >= offset + bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002467 return false;
2468 }
2469 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002470}
2471
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002472static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002473{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002474 BlockDriverState *bs = self->bs;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002475 BdrvTrackedRequest *req;
2476 bool retry;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002477 bool waited = false;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002478
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002479 if (!bs->serialising_in_flight) {
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002480 return false;
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002481 }
2482
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002483 do {
2484 retry = false;
2485 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002486 if (req == self || (!req->serialising && !self->serialising)) {
Kevin Wolf65afd212013-12-03 14:55:55 +01002487 continue;
2488 }
Kevin Wolf73271452013-12-04 17:08:50 +01002489 if (tracked_request_overlaps(req, self->overlap_offset,
2490 self->overlap_bytes))
2491 {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00002492 /* Hitting this means there was a reentrant request, for
2493 * example, a block driver issuing nested requests. This must
2494 * never happen since it means deadlock.
2495 */
2496 assert(qemu_coroutine_self() != req->co);
2497
Kevin Wolf64604402013-12-13 13:04:35 +01002498 /* If the request is already (indirectly) waiting for us, or
2499 * will wait for us as soon as it wakes up, then just go on
2500 * (instead of producing a deadlock in the former case). */
2501 if (!req->waiting_for) {
2502 self->waiting_for = req;
2503 qemu_co_queue_wait(&req->wait_queue);
2504 self->waiting_for = NULL;
2505 retry = true;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002506 waited = true;
Kevin Wolf64604402013-12-13 13:04:35 +01002507 break;
2508 }
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002509 }
2510 }
2511 } while (retry);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002512
2513 return waited;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002514}
2515
Kevin Wolf756e6732010-01-12 12:55:17 +01002516/*
2517 * Return values:
2518 * 0 - success
2519 * -EINVAL - backing format specified, but no file
2520 * -ENOSPC - can't update the backing file because no space is left in the
2521 * image file header
2522 * -ENOTSUP - format driver doesn't support changing the backing file
2523 */
2524int bdrv_change_backing_file(BlockDriverState *bs,
2525 const char *backing_file, const char *backing_fmt)
2526{
2527 BlockDriver *drv = bs->drv;
Paolo Bonzini469ef352012-04-12 14:01:02 +02002528 int ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002529
Paolo Bonzini5f377792012-04-12 14:01:01 +02002530 /* Backing file format doesn't make sense without a backing file */
2531 if (backing_fmt && !backing_file) {
2532 return -EINVAL;
2533 }
2534
Kevin Wolf756e6732010-01-12 12:55:17 +01002535 if (drv->bdrv_change_backing_file != NULL) {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002536 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
Kevin Wolf756e6732010-01-12 12:55:17 +01002537 } else {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002538 ret = -ENOTSUP;
Kevin Wolf756e6732010-01-12 12:55:17 +01002539 }
Paolo Bonzini469ef352012-04-12 14:01:02 +02002540
2541 if (ret == 0) {
2542 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2543 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2544 }
2545 return ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002546}
2547
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002548/*
2549 * Finds the image layer in the chain that has 'bs' as its backing file.
2550 *
2551 * active is the current topmost image.
2552 *
2553 * Returns NULL if bs is not found in active's image chain,
2554 * or if active == bs.
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002555 *
2556 * Returns the bottommost base image if bs == NULL.
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002557 */
2558BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2559 BlockDriverState *bs)
2560{
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002561 while (active && bs != active->backing_hd) {
2562 active = active->backing_hd;
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002563 }
2564
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002565 return active;
2566}
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002567
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002568/* Given a BDS, searches for the base layer. */
2569BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2570{
2571 return bdrv_find_overlay(bs, NULL);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002572}
2573
2574typedef struct BlkIntermediateStates {
2575 BlockDriverState *bs;
2576 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2577} BlkIntermediateStates;
2578
2579
2580/*
2581 * Drops images above 'base' up to and including 'top', and sets the image
2582 * above 'top' to have base as its backing file.
2583 *
2584 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2585 * information in 'bs' can be properly updated.
2586 *
2587 * E.g., this will convert the following chain:
2588 * bottom <- base <- intermediate <- top <- active
2589 *
2590 * to
2591 *
2592 * bottom <- base <- active
2593 *
2594 * It is allowed for bottom==base, in which case it converts:
2595 *
2596 * base <- intermediate <- top <- active
2597 *
2598 * to
2599 *
2600 * base <- active
2601 *
Jeff Cody54e26902014-06-25 15:40:10 -04002602 * If backing_file_str is non-NULL, it will be used when modifying top's
2603 * overlay image metadata.
2604 *
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002605 * Error conditions:
2606 * if active == top, that is considered an error
2607 *
2608 */
2609int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
Jeff Cody54e26902014-06-25 15:40:10 -04002610 BlockDriverState *base, const char *backing_file_str)
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002611{
2612 BlockDriverState *intermediate;
2613 BlockDriverState *base_bs = NULL;
2614 BlockDriverState *new_top_bs = NULL;
2615 BlkIntermediateStates *intermediate_state, *next;
2616 int ret = -EIO;
2617
2618 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2619 QSIMPLEQ_INIT(&states_to_delete);
2620
2621 if (!top->drv || !base->drv) {
2622 goto exit;
2623 }
2624
2625 new_top_bs = bdrv_find_overlay(active, top);
2626
2627 if (new_top_bs == NULL) {
2628 /* we could not find the image above 'top', this is an error */
2629 goto exit;
2630 }
2631
2632 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2633 * to do, no intermediate images */
2634 if (new_top_bs->backing_hd == base) {
2635 ret = 0;
2636 goto exit;
2637 }
2638
2639 intermediate = top;
2640
2641 /* now we will go down through the list, and add each BDS we find
2642 * into our deletion queue, until we hit the 'base'
2643 */
2644 while (intermediate) {
Markus Armbruster5839e532014-08-19 10:31:08 +02002645 intermediate_state = g_new0(BlkIntermediateStates, 1);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002646 intermediate_state->bs = intermediate;
2647 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2648
2649 if (intermediate->backing_hd == base) {
2650 base_bs = intermediate->backing_hd;
2651 break;
2652 }
2653 intermediate = intermediate->backing_hd;
2654 }
2655 if (base_bs == NULL) {
2656 /* something went wrong, we did not end at the base. safely
2657 * unravel everything, and exit with error */
2658 goto exit;
2659 }
2660
2661 /* success - we can delete the intermediate states, and link top->base */
Jeff Cody54e26902014-06-25 15:40:10 -04002662 backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2663 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002664 base_bs->drv ? base_bs->drv->format_name : "");
2665 if (ret) {
2666 goto exit;
2667 }
Fam Zheng920beae2014-05-23 21:29:46 +08002668 bdrv_set_backing_hd(new_top_bs, base_bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002669
2670 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2671 /* so that bdrv_close() does not recursively close the chain */
Fam Zheng920beae2014-05-23 21:29:46 +08002672 bdrv_set_backing_hd(intermediate_state->bs, NULL);
Fam Zheng4f6fd342013-08-23 09:14:47 +08002673 bdrv_unref(intermediate_state->bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002674 }
2675 ret = 0;
2676
2677exit:
2678 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2679 g_free(intermediate_state);
2680 }
2681 return ret;
2682}
2683
2684
aliguori71d07702009-03-03 17:37:16 +00002685static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
2686 size_t size)
2687{
Peter Lieven75af1f32015-02-06 11:54:11 +01002688 if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) {
Kevin Wolf1dd3a442014-04-14 14:48:16 +02002689 return -EIO;
2690 }
2691
Max Reitzc0191e72015-02-05 13:58:24 -05002692 if (!bdrv_is_inserted(bs)) {
aliguori71d07702009-03-03 17:37:16 +00002693 return -ENOMEDIUM;
Max Reitzc0191e72015-02-05 13:58:24 -05002694 }
aliguori71d07702009-03-03 17:37:16 +00002695
Max Reitzc0191e72015-02-05 13:58:24 -05002696 if (offset < 0) {
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002697 return -EIO;
Max Reitzc0191e72015-02-05 13:58:24 -05002698 }
aliguori71d07702009-03-03 17:37:16 +00002699
2700 return 0;
2701}
2702
2703static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
2704 int nb_sectors)
2705{
Peter Lieven75af1f32015-02-06 11:54:11 +01002706 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
Kevin Wolf8f4754e2014-03-26 13:06:02 +01002707 return -EIO;
2708 }
2709
Jes Sorenseneb5a3162010-05-27 16:20:31 +02002710 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
2711 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00002712}
2713
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002714typedef struct RwCo {
2715 BlockDriverState *bs;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002716 int64_t offset;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002717 QEMUIOVector *qiov;
2718 bool is_write;
2719 int ret;
Peter Lieven4105eaa2013-07-11 14:16:22 +02002720 BdrvRequestFlags flags;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002721} RwCo;
2722
2723static void coroutine_fn bdrv_rw_co_entry(void *opaque)
2724{
2725 RwCo *rwco = opaque;
2726
2727 if (!rwco->is_write) {
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002728 rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
2729 rwco->qiov->size, rwco->qiov,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002730 rwco->flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002731 } else {
2732 rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
2733 rwco->qiov->size, rwco->qiov,
2734 rwco->flags);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002735 }
2736}
2737
2738/*
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002739 * Process a vectored synchronous request using coroutines
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002740 */
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002741static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
2742 QEMUIOVector *qiov, bool is_write,
2743 BdrvRequestFlags flags)
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002744{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002745 Coroutine *co;
2746 RwCo rwco = {
2747 .bs = bs,
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002748 .offset = offset,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002749 .qiov = qiov,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002750 .is_write = is_write,
2751 .ret = NOT_DONE,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002752 .flags = flags,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002753 };
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002754
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002755 /**
2756 * In sync call context, when the vcpu is blocked, this throttling timer
2757 * will not fire; so the I/O throttling function has to be disabled here
2758 * if it has been enabled.
2759 */
2760 if (bs->io_limits_enabled) {
2761 fprintf(stderr, "Disabling I/O throttling on '%s' due "
2762 "to synchronous I/O.\n", bdrv_get_device_name(bs));
2763 bdrv_io_limits_disable(bs);
2764 }
2765
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002766 if (qemu_in_coroutine()) {
2767 /* Fast-path if already in coroutine context */
2768 bdrv_rw_co_entry(&rwco);
2769 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02002770 AioContext *aio_context = bdrv_get_aio_context(bs);
2771
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002772 co = qemu_coroutine_create(bdrv_rw_co_entry);
2773 qemu_coroutine_enter(co, &rwco);
2774 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02002775 aio_poll(aio_context, true);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002776 }
2777 }
2778 return rwco.ret;
2779}
2780
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002781/*
2782 * Process a synchronous request using coroutines
2783 */
2784static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002785 int nb_sectors, bool is_write, BdrvRequestFlags flags)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002786{
2787 QEMUIOVector qiov;
2788 struct iovec iov = {
2789 .iov_base = (void *)buf,
2790 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
2791 };
2792
Peter Lieven75af1f32015-02-06 11:54:11 +01002793 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
Kevin Wolfda15ee52014-04-14 15:39:36 +02002794 return -EINVAL;
2795 }
2796
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002797 qemu_iovec_init_external(&qiov, &iov, 1);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002798 return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
2799 &qiov, is_write, flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002800}
2801
bellard19cb3732006-08-19 11:45:59 +00002802/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00002803int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002804 uint8_t *buf, int nb_sectors)
2805{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002806 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
bellardfc01f7e2003-06-30 10:03:06 +00002807}
2808
Markus Armbruster07d27a42012-06-29 17:34:29 +02002809/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
2810int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
2811 uint8_t *buf, int nb_sectors)
2812{
2813 bool enabled;
2814 int ret;
2815
2816 enabled = bs->io_limits_enabled;
2817 bs->io_limits_enabled = false;
Peter Lieven4e7395e2013-07-18 10:37:32 +02002818 ret = bdrv_read(bs, sector_num, buf, nb_sectors);
Markus Armbruster07d27a42012-06-29 17:34:29 +02002819 bs->io_limits_enabled = enabled;
2820 return ret;
2821}
2822
ths5fafdf22007-09-16 21:08:06 +00002823/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00002824 -EIO generic I/O error (may happen for all errors)
2825 -ENOMEDIUM No media inserted.
2826 -EINVAL Invalid sector number or nb_sectors
2827 -EACCES Trying to write a read-only device
2828*/
ths5fafdf22007-09-16 21:08:06 +00002829int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002830 const uint8_t *buf, int nb_sectors)
2831{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002832 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
bellard83f64092006-08-01 16:21:11 +00002833}
2834
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002835int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
2836 int nb_sectors, BdrvRequestFlags flags)
Peter Lieven4105eaa2013-07-11 14:16:22 +02002837{
2838 return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002839 BDRV_REQ_ZERO_WRITE | flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002840}
2841
Peter Lievend75cbb52013-10-24 12:07:03 +02002842/*
2843 * Completely zero out a block device with the help of bdrv_write_zeroes.
2844 * The operation is sped up by checking the block status and only writing
2845 * zeroes to the device if they currently do not return zeroes. Optional
2846 * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2847 *
2848 * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2849 */
2850int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
2851{
Markus Armbrusterd32f7c12014-06-26 13:23:18 +02002852 int64_t target_sectors, ret, nb_sectors, sector_num = 0;
Peter Lievend75cbb52013-10-24 12:07:03 +02002853 int n;
2854
Markus Armbrusterd32f7c12014-06-26 13:23:18 +02002855 target_sectors = bdrv_nb_sectors(bs);
2856 if (target_sectors < 0) {
2857 return target_sectors;
Kevin Wolf9ce10c02014-04-14 17:03:34 +02002858 }
Kevin Wolf9ce10c02014-04-14 17:03:34 +02002859
Peter Lievend75cbb52013-10-24 12:07:03 +02002860 for (;;) {
Peter Lieven75af1f32015-02-06 11:54:11 +01002861 nb_sectors = MIN(target_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
Peter Lievend75cbb52013-10-24 12:07:03 +02002862 if (nb_sectors <= 0) {
2863 return 0;
2864 }
Peter Lievend75cbb52013-10-24 12:07:03 +02002865 ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
Peter Lieven3d94ce62013-12-12 13:57:05 +01002866 if (ret < 0) {
2867 error_report("error getting block status at sector %" PRId64 ": %s",
2868 sector_num, strerror(-ret));
2869 return ret;
2870 }
Peter Lievend75cbb52013-10-24 12:07:03 +02002871 if (ret & BDRV_BLOCK_ZERO) {
2872 sector_num += n;
2873 continue;
2874 }
2875 ret = bdrv_write_zeroes(bs, sector_num, n, flags);
2876 if (ret < 0) {
2877 error_report("error writing zeroes at sector %" PRId64 ": %s",
2878 sector_num, strerror(-ret));
2879 return ret;
2880 }
2881 sector_num += n;
2882 }
2883}
2884
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002885int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
bellard83f64092006-08-01 16:21:11 +00002886{
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002887 QEMUIOVector qiov;
2888 struct iovec iov = {
2889 .iov_base = (void *)buf,
2890 .iov_len = bytes,
2891 };
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002892 int ret;
bellard83f64092006-08-01 16:21:11 +00002893
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002894 if (bytes < 0) {
2895 return -EINVAL;
bellard83f64092006-08-01 16:21:11 +00002896 }
2897
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002898 qemu_iovec_init_external(&qiov, &iov, 1);
2899 ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
2900 if (ret < 0) {
2901 return ret;
bellard83f64092006-08-01 16:21:11 +00002902 }
2903
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002904 return bytes;
bellard83f64092006-08-01 16:21:11 +00002905}
2906
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002907int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
bellard83f64092006-08-01 16:21:11 +00002908{
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002909 int ret;
bellard83f64092006-08-01 16:21:11 +00002910
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002911 ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
2912 if (ret < 0) {
2913 return ret;
bellard83f64092006-08-01 16:21:11 +00002914 }
2915
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002916 return qiov->size;
2917}
2918
2919int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002920 const void *buf, int bytes)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002921{
2922 QEMUIOVector qiov;
2923 struct iovec iov = {
2924 .iov_base = (void *) buf,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002925 .iov_len = bytes,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002926 };
2927
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002928 if (bytes < 0) {
2929 return -EINVAL;
2930 }
2931
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002932 qemu_iovec_init_external(&qiov, &iov, 1);
2933 return bdrv_pwritev(bs, offset, &qiov);
bellard83f64092006-08-01 16:21:11 +00002934}
bellard83f64092006-08-01 16:21:11 +00002935
Kevin Wolff08145f2010-06-16 16:38:15 +02002936/*
2937 * Writes to the file and ensures that no writes are reordered across this
2938 * request (acts as a barrier)
2939 *
2940 * Returns 0 on success, -errno in error cases.
2941 */
2942int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2943 const void *buf, int count)
2944{
2945 int ret;
2946
2947 ret = bdrv_pwrite(bs, offset, buf, count);
2948 if (ret < 0) {
2949 return ret;
2950 }
2951
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02002952 /* No flush needed for cache modes that already do it */
2953 if (bs->enable_write_cache) {
Kevin Wolff08145f2010-06-16 16:38:15 +02002954 bdrv_flush(bs);
2955 }
2956
2957 return 0;
2958}
2959
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002960static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
Stefan Hajnocziab185922011-11-17 13:40:31 +00002961 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2962{
2963 /* Perform I/O through a temporary buffer so that users who scribble over
2964 * their read buffer while the operation is in progress do not end up
2965 * modifying the image file. This is critical for zero-copy guest I/O
2966 * where anything might happen inside guest memory.
2967 */
2968 void *bounce_buffer;
2969
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002970 BlockDriver *drv = bs->drv;
Stefan Hajnocziab185922011-11-17 13:40:31 +00002971 struct iovec iov;
2972 QEMUIOVector bounce_qiov;
2973 int64_t cluster_sector_num;
2974 int cluster_nb_sectors;
2975 size_t skip_bytes;
2976 int ret;
2977
2978 /* Cover entire cluster so no additional backing file I/O is required when
2979 * allocating cluster in the image file.
2980 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01002981 bdrv_round_to_clusters(bs, sector_num, nb_sectors,
2982 &cluster_sector_num, &cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002983
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002984 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
2985 cluster_sector_num, cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002986
2987 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
Kevin Wolf857d4f42014-05-20 13:16:51 +02002988 iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
2989 if (bounce_buffer == NULL) {
2990 ret = -ENOMEM;
2991 goto err;
2992 }
2993
Stefan Hajnocziab185922011-11-17 13:40:31 +00002994 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
2995
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002996 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
2997 &bounce_qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002998 if (ret < 0) {
2999 goto err;
3000 }
3001
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003002 if (drv->bdrv_co_write_zeroes &&
3003 buffer_is_zero(bounce_buffer, iov.iov_len)) {
Kevin Wolf621f0582012-03-20 15:12:58 +01003004 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003005 cluster_nb_sectors, 0);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003006 } else {
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02003007 /* This does not change the data on the disk, it is not necessary
3008 * to flush even in cache=writethrough mode.
3009 */
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003010 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
Stefan Hajnocziab185922011-11-17 13:40:31 +00003011 &bounce_qiov);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003012 }
3013
Stefan Hajnocziab185922011-11-17 13:40:31 +00003014 if (ret < 0) {
3015 /* It might be okay to ignore write errors for guest requests. If this
3016 * is a deliberate copy-on-read then we don't want to ignore the error.
3017 * Simply report it in all cases.
3018 */
3019 goto err;
3020 }
3021
3022 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
Michael Tokarev03396142012-06-07 20:17:55 +04003023 qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
3024 nb_sectors * BDRV_SECTOR_SIZE);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003025
3026err:
3027 qemu_vfree(bounce_buffer);
3028 return ret;
3029}
3030
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003031/*
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003032 * Forwards an already correctly aligned request to the BlockDriver. This
3033 * handles copy on read and zeroing after EOF; any other features must be
3034 * implemented by the caller.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003035 */
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003036static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01003037 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
Kevin Wolfec746e12013-12-04 12:13:10 +01003038 int64_t align, QEMUIOVector *qiov, int flags)
Kevin Wolfda1fa912011-07-14 17:27:13 +02003039{
3040 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00003041 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003042
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003043 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3044 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003045
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003046 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3047 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Kevin Wolf8eb029c2014-07-01 16:09:54 +02003048 assert(!qiov || bytes == qiov->size);
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003049
3050 /* Handle Copy on Read and associated serialisation */
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003051 if (flags & BDRV_REQ_COPY_ON_READ) {
Kevin Wolf73271452013-12-04 17:08:50 +01003052 /* If we touch the same cluster it counts as an overlap. This
3053 * guarantees that allocating writes will be serialized and not race
3054 * with each other for the same cluster. For example, in copy-on-read
3055 * it ensures that the CoR read and write operations are atomic and
3056 * guest writes cannot interleave between them. */
3057 mark_request_serialising(req, bdrv_get_cluster_size(bs));
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003058 }
3059
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01003060 wait_serialising_requests(req);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00003061
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003062 if (flags & BDRV_REQ_COPY_ON_READ) {
Stefan Hajnocziab185922011-11-17 13:40:31 +00003063 int pnum;
3064
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02003065 ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003066 if (ret < 0) {
3067 goto out;
3068 }
3069
3070 if (!ret || pnum != nb_sectors) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003071 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003072 goto out;
3073 }
3074 }
3075
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003076 /* Forward the request to the BlockDriver */
Max Reitzc0191e72015-02-05 13:58:24 -05003077 if (!bs->zero_beyond_eof) {
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003078 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
3079 } else {
Max Reitzc0191e72015-02-05 13:58:24 -05003080 /* Read zeros after EOF */
Markus Armbruster40490822014-06-26 13:23:19 +02003081 int64_t total_sectors, max_nb_sectors;
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003082
Markus Armbruster40490822014-06-26 13:23:19 +02003083 total_sectors = bdrv_nb_sectors(bs);
3084 if (total_sectors < 0) {
3085 ret = total_sectors;
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003086 goto out;
3087 }
3088
Kevin Wolf5f5bcd82014-02-07 16:00:09 +01003089 max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
3090 align >> BDRV_SECTOR_BITS);
Paolo Bonzinie012b782014-12-17 16:09:59 +01003091 if (nb_sectors < max_nb_sectors) {
3092 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
3093 } else if (max_nb_sectors > 0) {
Kevin Wolf33f461e2014-07-03 13:21:24 +02003094 QEMUIOVector local_qiov;
Kevin Wolf33f461e2014-07-03 13:21:24 +02003095
3096 qemu_iovec_init(&local_qiov, qiov->niov);
3097 qemu_iovec_concat(&local_qiov, qiov, 0,
Paolo Bonzinie012b782014-12-17 16:09:59 +01003098 max_nb_sectors * BDRV_SECTOR_SIZE);
Kevin Wolf33f461e2014-07-03 13:21:24 +02003099
Paolo Bonzinie012b782014-12-17 16:09:59 +01003100 ret = drv->bdrv_co_readv(bs, sector_num, max_nb_sectors,
Kevin Wolf33f461e2014-07-03 13:21:24 +02003101 &local_qiov);
3102
3103 qemu_iovec_destroy(&local_qiov);
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003104 } else {
3105 ret = 0;
3106 }
3107
3108 /* Reading beyond end of file is supposed to produce zeroes */
3109 if (ret == 0 && total_sectors < sector_num + nb_sectors) {
3110 uint64_t offset = MAX(0, total_sectors - sector_num);
3111 uint64_t bytes = (sector_num + nb_sectors - offset) *
3112 BDRV_SECTOR_SIZE;
3113 qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
3114 }
3115 }
Stefan Hajnocziab185922011-11-17 13:40:31 +00003116
3117out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00003118 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003119}
3120
Fam Zhengfc3959e2015-03-24 09:23:49 +08003121static inline uint64_t bdrv_get_align(BlockDriverState *bs)
3122{
3123 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3124 return MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3125}
3126
3127static inline bool bdrv_req_is_aligned(BlockDriverState *bs,
3128 int64_t offset, size_t bytes)
3129{
3130 int64_t align = bdrv_get_align(bs);
3131 return !(offset & (align - 1) || (bytes & (align - 1)));
3132}
3133
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003134/*
3135 * Handle a read request in coroutine context
3136 */
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003137static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
3138 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003139 BdrvRequestFlags flags)
3140{
3141 BlockDriver *drv = bs->drv;
Kevin Wolf65afd212013-12-03 14:55:55 +01003142 BdrvTrackedRequest req;
3143
Fam Zhengfc3959e2015-03-24 09:23:49 +08003144 uint64_t align = bdrv_get_align(bs);
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003145 uint8_t *head_buf = NULL;
3146 uint8_t *tail_buf = NULL;
3147 QEMUIOVector local_qiov;
3148 bool use_local_qiov = false;
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003149 int ret;
3150
3151 if (!drv) {
3152 return -ENOMEDIUM;
3153 }
Max Reitzb9c64942015-02-05 13:58:25 -05003154
3155 ret = bdrv_check_byte_request(bs, offset, bytes);
3156 if (ret < 0) {
3157 return ret;
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003158 }
3159
3160 if (bs->copy_on_read) {
3161 flags |= BDRV_REQ_COPY_ON_READ;
3162 }
3163
3164 /* throttling disk I/O */
3165 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003166 bdrv_io_limits_intercept(bs, bytes, false);
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003167 }
3168
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003169 /* Align read if necessary by padding qiov */
3170 if (offset & (align - 1)) {
3171 head_buf = qemu_blockalign(bs, align);
3172 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3173 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3174 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3175 use_local_qiov = true;
3176
3177 bytes += offset & (align - 1);
3178 offset = offset & ~(align - 1);
3179 }
3180
3181 if ((offset + bytes) & (align - 1)) {
3182 if (!use_local_qiov) {
3183 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3184 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3185 use_local_qiov = true;
3186 }
3187 tail_buf = qemu_blockalign(bs, align);
3188 qemu_iovec_add(&local_qiov, tail_buf,
3189 align - ((offset + bytes) & (align - 1)));
3190
3191 bytes = ROUND_UP(bytes, align);
3192 }
3193
Kevin Wolf65afd212013-12-03 14:55:55 +01003194 tracked_request_begin(&req, bs, offset, bytes, false);
Kevin Wolfec746e12013-12-04 12:13:10 +01003195 ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003196 use_local_qiov ? &local_qiov : qiov,
3197 flags);
Kevin Wolf65afd212013-12-03 14:55:55 +01003198 tracked_request_end(&req);
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003199
3200 if (use_local_qiov) {
3201 qemu_iovec_destroy(&local_qiov);
3202 qemu_vfree(head_buf);
3203 qemu_vfree(tail_buf);
3204 }
3205
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003206 return ret;
3207}
3208
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003209static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
3210 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3211 BdrvRequestFlags flags)
3212{
Peter Lieven75af1f32015-02-06 11:54:11 +01003213 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003214 return -EINVAL;
3215 }
3216
3217 return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
3218 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3219}
3220
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003221int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02003222 int nb_sectors, QEMUIOVector *qiov)
3223{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003224 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003225
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003226 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
3227}
3228
3229int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
3230 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3231{
3232 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
3233
3234 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
3235 BDRV_REQ_COPY_ON_READ);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003236}
3237
Peter Lieven98764152015-02-02 15:48:34 +01003238#define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768
Peter Lievenc31cb702013-10-24 12:06:58 +02003239
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003240static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003241 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003242{
3243 BlockDriver *drv = bs->drv;
3244 QEMUIOVector qiov;
Peter Lievenc31cb702013-10-24 12:06:58 +02003245 struct iovec iov = {0};
3246 int ret = 0;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003247
Peter Lieven75af1f32015-02-06 11:54:11 +01003248 int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_write_zeroes,
3249 BDRV_REQUEST_MAX_SECTORS);
Kevin Wolf621f0582012-03-20 15:12:58 +01003250
Peter Lievenc31cb702013-10-24 12:06:58 +02003251 while (nb_sectors > 0 && !ret) {
3252 int num = nb_sectors;
3253
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003254 /* Align request. Block drivers can expect the "bulk" of the request
3255 * to be aligned.
3256 */
3257 if (bs->bl.write_zeroes_alignment
3258 && num > bs->bl.write_zeroes_alignment) {
3259 if (sector_num % bs->bl.write_zeroes_alignment != 0) {
3260 /* Make a small request up to the first aligned sector. */
Peter Lievenc31cb702013-10-24 12:06:58 +02003261 num = bs->bl.write_zeroes_alignment;
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003262 num -= sector_num % bs->bl.write_zeroes_alignment;
3263 } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
3264 /* Shorten the request to the last aligned sector. num cannot
3265 * underflow because num > bs->bl.write_zeroes_alignment.
3266 */
3267 num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
Peter Lievenc31cb702013-10-24 12:06:58 +02003268 }
Kevin Wolf621f0582012-03-20 15:12:58 +01003269 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003270
3271 /* limit request size */
3272 if (num > max_write_zeroes) {
3273 num = max_write_zeroes;
3274 }
3275
3276 ret = -ENOTSUP;
3277 /* First try the efficient write zeroes operation */
3278 if (drv->bdrv_co_write_zeroes) {
3279 ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
3280 }
3281
3282 if (ret == -ENOTSUP) {
3283 /* Fall back to bounce buffer if write zeroes is unsupported */
Peter Lieven095e4fa2015-01-05 12:29:49 +01003284 int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length,
Peter Lieven98764152015-02-02 15:48:34 +01003285 MAX_WRITE_ZEROES_BOUNCE_BUFFER);
Peter Lieven095e4fa2015-01-05 12:29:49 +01003286 num = MIN(num, max_xfer_len);
Peter Lievenc31cb702013-10-24 12:06:58 +02003287 iov.iov_len = num * BDRV_SECTOR_SIZE;
3288 if (iov.iov_base == NULL) {
Kevin Wolf857d4f42014-05-20 13:16:51 +02003289 iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
3290 if (iov.iov_base == NULL) {
3291 ret = -ENOMEM;
3292 goto fail;
3293 }
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003294 memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
Peter Lievenc31cb702013-10-24 12:06:58 +02003295 }
3296 qemu_iovec_init_external(&qiov, &iov, 1);
3297
3298 ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003299
3300 /* Keep bounce buffer around if it is big enough for all
3301 * all future requests.
3302 */
Peter Lieven095e4fa2015-01-05 12:29:49 +01003303 if (num < max_xfer_len) {
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003304 qemu_vfree(iov.iov_base);
3305 iov.iov_base = NULL;
3306 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003307 }
3308
3309 sector_num += num;
3310 nb_sectors -= num;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003311 }
3312
Kevin Wolf857d4f42014-05-20 13:16:51 +02003313fail:
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003314 qemu_vfree(iov.iov_base);
3315 return ret;
3316}
3317
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003318/*
Kevin Wolfb404f722013-12-03 14:02:23 +01003319 * Forwards an already correctly aligned write request to the BlockDriver.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003320 */
Kevin Wolfb404f722013-12-03 14:02:23 +01003321static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01003322 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
3323 QEMUIOVector *qiov, int flags)
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003324{
3325 BlockDriver *drv = bs->drv;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003326 bool waited;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003327 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003328
Kevin Wolfb404f722013-12-03 14:02:23 +01003329 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3330 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003331
Kevin Wolfb404f722013-12-03 14:02:23 +01003332 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3333 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Kevin Wolf8eb029c2014-07-01 16:09:54 +02003334 assert(!qiov || bytes == qiov->size);
Benoît Canetcc0681c2013-09-02 14:14:39 +02003335
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003336 waited = wait_serialising_requests(req);
3337 assert(!waited || !req->serialising);
Kevin Wolfaf91f9a2014-02-07 15:35:56 +01003338 assert(req->overlap_offset <= offset);
3339 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
Kevin Wolf244eade2013-12-03 14:30:44 +01003340
Kevin Wolf65afd212013-12-03 14:55:55 +01003341 ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
Stefan Hajnoczid616b222013-06-24 17:13:10 +02003342
Peter Lieven465bee12014-05-18 00:58:19 +02003343 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
3344 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
3345 qemu_iovec_is_zero(qiov)) {
3346 flags |= BDRV_REQ_ZERO_WRITE;
3347 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
3348 flags |= BDRV_REQ_MAY_UNMAP;
3349 }
3350 }
3351
Stefan Hajnoczid616b222013-06-24 17:13:10 +02003352 if (ret < 0) {
3353 /* Do nothing, write notifier decided to fail this request */
3354 } else if (flags & BDRV_REQ_ZERO_WRITE) {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003355 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003356 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003357 } else {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003358 BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003359 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
3360 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003361 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003362
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02003363 if (ret == 0 && !bs->enable_write_cache) {
3364 ret = bdrv_co_flush(bs);
3365 }
3366
Fam Zhenge4654d22013-11-13 18:29:43 +08003367 bdrv_set_dirty(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003368
Benoît Canet5366d0c2014-09-05 15:46:18 +02003369 block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
Benoît Canet5e5a94b2014-09-05 15:46:16 +02003370
Max Reitzc0191e72015-02-05 13:58:24 -05003371 if (ret >= 0) {
Paolo Bonzinidf2a6f22013-09-04 19:00:21 +02003372 bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
3373 }
Kevin Wolfda1fa912011-07-14 17:27:13 +02003374
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003375 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003376}
3377
Kevin Wolfb404f722013-12-03 14:02:23 +01003378/*
3379 * Handle a write request in coroutine context
3380 */
Kevin Wolf66015532013-12-03 14:40:18 +01003381static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
3382 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfb404f722013-12-03 14:02:23 +01003383 BdrvRequestFlags flags)
3384{
Kevin Wolf65afd212013-12-03 14:55:55 +01003385 BdrvTrackedRequest req;
Fam Zhengfc3959e2015-03-24 09:23:49 +08003386 uint64_t align = bdrv_get_align(bs);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003387 uint8_t *head_buf = NULL;
3388 uint8_t *tail_buf = NULL;
3389 QEMUIOVector local_qiov;
3390 bool use_local_qiov = false;
Kevin Wolfb404f722013-12-03 14:02:23 +01003391 int ret;
3392
3393 if (!bs->drv) {
3394 return -ENOMEDIUM;
3395 }
3396 if (bs->read_only) {
3397 return -EACCES;
3398 }
Max Reitzb9c64942015-02-05 13:58:25 -05003399
3400 ret = bdrv_check_byte_request(bs, offset, bytes);
3401 if (ret < 0) {
3402 return ret;
Kevin Wolfb404f722013-12-03 14:02:23 +01003403 }
3404
Kevin Wolfb404f722013-12-03 14:02:23 +01003405 /* throttling disk I/O */
3406 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003407 bdrv_io_limits_intercept(bs, bytes, true);
Kevin Wolfb404f722013-12-03 14:02:23 +01003408 }
3409
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003410 /*
3411 * Align write if necessary by performing a read-modify-write cycle.
3412 * Pad qiov with the read parts and be sure to have a tracked request not
3413 * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
3414 */
Kevin Wolf65afd212013-12-03 14:55:55 +01003415 tracked_request_begin(&req, bs, offset, bytes, true);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003416
3417 if (offset & (align - 1)) {
3418 QEMUIOVector head_qiov;
3419 struct iovec head_iov;
3420
3421 mark_request_serialising(&req, align);
3422 wait_serialising_requests(&req);
3423
3424 head_buf = qemu_blockalign(bs, align);
3425 head_iov = (struct iovec) {
3426 .iov_base = head_buf,
3427 .iov_len = align,
3428 };
3429 qemu_iovec_init_external(&head_qiov, &head_iov, 1);
3430
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003431 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003432 ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
3433 align, &head_qiov, 0);
3434 if (ret < 0) {
3435 goto fail;
3436 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003437 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003438
3439 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3440 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3441 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3442 use_local_qiov = true;
3443
3444 bytes += offset & (align - 1);
3445 offset = offset & ~(align - 1);
3446 }
3447
3448 if ((offset + bytes) & (align - 1)) {
3449 QEMUIOVector tail_qiov;
3450 struct iovec tail_iov;
3451 size_t tail_bytes;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003452 bool waited;
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003453
3454 mark_request_serialising(&req, align);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003455 waited = wait_serialising_requests(&req);
3456 assert(!waited || !use_local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003457
3458 tail_buf = qemu_blockalign(bs, align);
3459 tail_iov = (struct iovec) {
3460 .iov_base = tail_buf,
3461 .iov_len = align,
3462 };
3463 qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
3464
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003465 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003466 ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
3467 align, &tail_qiov, 0);
3468 if (ret < 0) {
3469 goto fail;
3470 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003471 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003472
3473 if (!use_local_qiov) {
3474 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3475 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3476 use_local_qiov = true;
3477 }
3478
3479 tail_bytes = (offset + bytes) & (align - 1);
3480 qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
3481
3482 bytes = ROUND_UP(bytes, align);
3483 }
3484
Fam Zhengfc3959e2015-03-24 09:23:49 +08003485 if (use_local_qiov) {
3486 /* Local buffer may have non-zero data. */
3487 flags &= ~BDRV_REQ_ZERO_WRITE;
3488 }
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003489 ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
3490 use_local_qiov ? &local_qiov : qiov,
3491 flags);
3492
3493fail:
Kevin Wolf65afd212013-12-03 14:55:55 +01003494 tracked_request_end(&req);
Kevin Wolfb404f722013-12-03 14:02:23 +01003495
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003496 if (use_local_qiov) {
3497 qemu_iovec_destroy(&local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003498 }
Kevin Wolf99c4a852014-02-07 15:29:00 +01003499 qemu_vfree(head_buf);
3500 qemu_vfree(tail_buf);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003501
Kevin Wolfb404f722013-12-03 14:02:23 +01003502 return ret;
3503}
3504
Kevin Wolf66015532013-12-03 14:40:18 +01003505static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
3506 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3507 BdrvRequestFlags flags)
3508{
Peter Lieven75af1f32015-02-06 11:54:11 +01003509 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
Kevin Wolf66015532013-12-03 14:40:18 +01003510 return -EINVAL;
3511 }
3512
3513 return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
3514 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3515}
3516
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003517int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
3518 int nb_sectors, QEMUIOVector *qiov)
3519{
3520 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
3521
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003522 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
3523}
3524
3525int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003526 int64_t sector_num, int nb_sectors,
3527 BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003528{
Fam Zhengfc3959e2015-03-24 09:23:49 +08003529 int ret;
3530
Paolo Bonzini94d6ff22013-11-22 13:39:45 +01003531 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003532
Peter Lievend32f35c2013-10-24 12:06:52 +02003533 if (!(bs->open_flags & BDRV_O_UNMAP)) {
3534 flags &= ~BDRV_REQ_MAY_UNMAP;
3535 }
Fam Zhengfc3959e2015-03-24 09:23:49 +08003536 if (bdrv_req_is_aligned(bs, sector_num << BDRV_SECTOR_BITS,
3537 nb_sectors << BDRV_SECTOR_BITS)) {
3538 ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
3539 BDRV_REQ_ZERO_WRITE | flags);
3540 } else {
3541 uint8_t *buf;
3542 QEMUIOVector local_qiov;
3543 size_t bytes = nb_sectors << BDRV_SECTOR_BITS;
Peter Lievend32f35c2013-10-24 12:06:52 +02003544
Fam Zhengfc3959e2015-03-24 09:23:49 +08003545 buf = qemu_memalign(bdrv_opt_mem_align(bs), bytes);
3546 memset(buf, 0, bytes);
3547 qemu_iovec_init(&local_qiov, 1);
3548 qemu_iovec_add(&local_qiov, buf, bytes);
3549
3550 ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, &local_qiov,
3551 BDRV_REQ_ZERO_WRITE | flags);
3552 qemu_vfree(buf);
3553 }
3554 return ret;
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003555}
3556
bellard83f64092006-08-01 16:21:11 +00003557/**
bellard83f64092006-08-01 16:21:11 +00003558 * Truncate file to 'offset' bytes (needed only for file protocols)
3559 */
3560int bdrv_truncate(BlockDriverState *bs, int64_t offset)
3561{
3562 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003563 int ret;
bellard83f64092006-08-01 16:21:11 +00003564 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003565 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00003566 if (!drv->bdrv_truncate)
3567 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02003568 if (bs->read_only)
3569 return -EACCES;
Jeff Cody9c75e162014-06-25 16:55:30 -04003570
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003571 ret = drv->bdrv_truncate(bs, offset);
3572 if (ret == 0) {
3573 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbrustera7f53e22014-10-07 13:59:25 +02003574 if (bs->blk) {
3575 blk_dev_resize_cb(bs->blk);
3576 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003577 }
3578 return ret;
bellard83f64092006-08-01 16:21:11 +00003579}
3580
3581/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08003582 * Length of a allocated file in bytes. Sparse files are counted by actual
3583 * allocated space. Return < 0 if error or unknown.
3584 */
3585int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
3586{
3587 BlockDriver *drv = bs->drv;
3588 if (!drv) {
3589 return -ENOMEDIUM;
3590 }
3591 if (drv->bdrv_get_allocated_file_size) {
3592 return drv->bdrv_get_allocated_file_size(bs);
3593 }
3594 if (bs->file) {
3595 return bdrv_get_allocated_file_size(bs->file);
3596 }
3597 return -ENOTSUP;
3598}
3599
3600/**
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003601 * Return number of sectors on success, -errno on error.
bellard83f64092006-08-01 16:21:11 +00003602 */
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003603int64_t bdrv_nb_sectors(BlockDriverState *bs)
bellard83f64092006-08-01 16:21:11 +00003604{
3605 BlockDriver *drv = bs->drv;
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003606
bellard83f64092006-08-01 16:21:11 +00003607 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003608 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003609
Kevin Wolfb94a2612013-10-29 12:18:58 +01003610 if (drv->has_variable_length) {
3611 int ret = refresh_total_sectors(bs, bs->total_sectors);
3612 if (ret < 0) {
3613 return ret;
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01003614 }
bellard83f64092006-08-01 16:21:11 +00003615 }
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003616 return bs->total_sectors;
3617}
3618
3619/**
3620 * Return length in bytes on success, -errno on error.
3621 * The length is always a multiple of BDRV_SECTOR_SIZE.
3622 */
3623int64_t bdrv_getlength(BlockDriverState *bs)
3624{
3625 int64_t ret = bdrv_nb_sectors(bs);
3626
3627 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00003628}
3629
bellard19cb3732006-08-19 11:45:59 +00003630/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00003631void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00003632{
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003633 int64_t nb_sectors = bdrv_nb_sectors(bs);
3634
3635 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
bellardfc01f7e2003-06-30 10:03:06 +00003636}
bellardcf989512004-02-16 21:56:36 +00003637
Paolo Bonziniff06f5f2012-09-28 17:22:54 +02003638void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
3639 BlockdevOnError on_write_error)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003640{
3641 bs->on_read_error = on_read_error;
3642 bs->on_write_error = on_write_error;
3643}
3644
Paolo Bonzini1ceee0d2012-09-28 17:22:56 +02003645BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003646{
3647 return is_read ? bs->on_read_error : bs->on_write_error;
3648}
3649
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003650BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
3651{
3652 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
3653
3654 switch (on_err) {
3655 case BLOCKDEV_ON_ERROR_ENOSPC:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003656 return (error == ENOSPC) ?
3657 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003658 case BLOCKDEV_ON_ERROR_STOP:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003659 return BLOCK_ERROR_ACTION_STOP;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003660 case BLOCKDEV_ON_ERROR_REPORT:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003661 return BLOCK_ERROR_ACTION_REPORT;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003662 case BLOCKDEV_ON_ERROR_IGNORE:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003663 return BLOCK_ERROR_ACTION_IGNORE;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003664 default:
3665 abort();
3666 }
3667}
3668
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003669static void send_qmp_error_event(BlockDriverState *bs,
3670 BlockErrorAction action,
3671 bool is_read, int error)
3672{
Peter Maydell573742a2014-10-10 20:33:03 +01003673 IoOperationType optype;
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003674
Peter Maydell573742a2014-10-10 20:33:03 +01003675 optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
3676 qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003677 bdrv_iostatus_is_enabled(bs),
Luiz Capitulino624ff572014-09-11 10:25:48 -04003678 error == ENOSPC, strerror(error),
3679 &error_abort);
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003680}
3681
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003682/* This is done by device models because, while the block layer knows
3683 * about the error, it does not know whether an operation comes from
3684 * the device or the block layer (from a job, for example).
3685 */
3686void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
3687 bool is_read, int error)
3688{
3689 assert(error >= 0);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003690
Wenchao Xiaa5895692014-06-18 08:43:30 +02003691 if (action == BLOCK_ERROR_ACTION_STOP) {
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003692 /* First set the iostatus, so that "info block" returns an iostatus
3693 * that matches the events raised so far (an additional error iostatus
3694 * is fine, but not a lost one).
3695 */
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003696 bdrv_iostatus_set_err(bs, error);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003697
3698 /* Then raise the request to stop the VM and the event.
3699 * qemu_system_vmstop_request_prepare has two effects. First,
3700 * it ensures that the STOP event always comes after the
3701 * BLOCK_IO_ERROR event. Second, it ensures that even if management
3702 * can observe the STOP event and do a "cont" before the STOP
3703 * event is issued, the VM will not stop. In this case, vm_start()
3704 * also ensures that the STOP/RESUME pair of events is emitted.
3705 */
3706 qemu_system_vmstop_request_prepare();
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003707 send_qmp_error_event(bs, action, is_read, error);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003708 qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
3709 } else {
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003710 send_qmp_error_event(bs, action, is_read, error);
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003711 }
3712}
3713
bellardb3380822004-03-14 21:38:54 +00003714int bdrv_is_read_only(BlockDriverState *bs)
3715{
3716 return bs->read_only;
3717}
3718
ths985a03b2007-12-24 16:10:43 +00003719int bdrv_is_sg(BlockDriverState *bs)
3720{
3721 return bs->sg;
3722}
3723
Christoph Hellwige900a7b2009-09-04 19:01:15 +02003724int bdrv_enable_write_cache(BlockDriverState *bs)
3725{
3726 return bs->enable_write_cache;
3727}
3728
Paolo Bonzini425b0142012-06-06 00:04:52 +02003729void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
3730{
3731 bs->enable_write_cache = wce;
Jeff Cody55b110f2012-09-20 15:13:18 -04003732
3733 /* so a reopen() will preserve wce */
3734 if (wce) {
3735 bs->open_flags |= BDRV_O_CACHE_WB;
3736 } else {
3737 bs->open_flags &= ~BDRV_O_CACHE_WB;
3738 }
Paolo Bonzini425b0142012-06-06 00:04:52 +02003739}
3740
bellardea2384d2004-08-01 21:59:26 +00003741int bdrv_is_encrypted(BlockDriverState *bs)
3742{
3743 if (bs->backing_hd && bs->backing_hd->encrypted)
3744 return 1;
3745 return bs->encrypted;
3746}
3747
aliguoric0f4ce72009-03-05 23:01:01 +00003748int bdrv_key_required(BlockDriverState *bs)
3749{
3750 BlockDriverState *backing_hd = bs->backing_hd;
3751
3752 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
3753 return 1;
3754 return (bs->encrypted && !bs->valid_key);
3755}
3756
bellardea2384d2004-08-01 21:59:26 +00003757int bdrv_set_key(BlockDriverState *bs, const char *key)
3758{
3759 int ret;
3760 if (bs->backing_hd && bs->backing_hd->encrypted) {
3761 ret = bdrv_set_key(bs->backing_hd, key);
3762 if (ret < 0)
3763 return ret;
3764 if (!bs->encrypted)
3765 return 0;
3766 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02003767 if (!bs->encrypted) {
3768 return -EINVAL;
3769 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
3770 return -ENOMEDIUM;
3771 }
aliguoric0f4ce72009-03-05 23:01:01 +00003772 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00003773 if (ret < 0) {
3774 bs->valid_key = 0;
3775 } else if (!bs->valid_key) {
3776 bs->valid_key = 1;
Markus Armbrustera7f53e22014-10-07 13:59:25 +02003777 if (bs->blk) {
3778 /* call the change callback now, we skipped it on open */
3779 blk_dev_change_media_cb(bs->blk, true);
3780 }
aliguoribb5fc202009-03-05 23:01:15 +00003781 }
aliguoric0f4ce72009-03-05 23:01:01 +00003782 return ret;
bellardea2384d2004-08-01 21:59:26 +00003783}
3784
Markus Armbruster4d2855a2015-01-29 10:37:00 +01003785/*
3786 * Provide an encryption key for @bs.
3787 * If @key is non-null:
3788 * If @bs is not encrypted, fail.
3789 * Else if the key is invalid, fail.
3790 * Else set @bs's key to @key, replacing the existing key, if any.
3791 * If @key is null:
3792 * If @bs is encrypted and still lacks a key, fail.
3793 * Else do nothing.
3794 * On failure, store an error object through @errp if non-null.
3795 */
3796void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
3797{
3798 if (key) {
3799 if (!bdrv_is_encrypted(bs)) {
Markus Armbrusterb1ca6392015-01-29 10:37:01 +01003800 error_setg(errp, "Device '%s' is not encrypted",
Markus Armbruster4d2855a2015-01-29 10:37:00 +01003801 bdrv_get_device_name(bs));
3802 } else if (bdrv_set_key(bs, key) < 0) {
3803 error_set(errp, QERR_INVALID_PASSWORD);
3804 }
3805 } else {
3806 if (bdrv_key_required(bs)) {
Markus Armbrusterb1ca6392015-01-29 10:37:01 +01003807 error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
3808 "'%s' (%s) is encrypted",
Markus Armbruster4d2855a2015-01-29 10:37:00 +01003809 bdrv_get_device_name(bs),
3810 bdrv_get_encrypted_filename(bs));
3811 }
3812 }
3813}
3814
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003815const char *bdrv_get_format_name(BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00003816{
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003817 return bs->drv ? bs->drv->format_name : NULL;
bellardea2384d2004-08-01 21:59:26 +00003818}
3819
Stefan Hajnocziada42402014-08-27 12:08:55 +01003820static int qsort_strcmp(const void *a, const void *b)
3821{
3822 return strcmp(a, b);
3823}
3824
ths5fafdf22007-09-16 21:08:06 +00003825void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00003826 void *opaque)
3827{
3828 BlockDriver *drv;
Jeff Codye855e4f2014-04-28 18:29:54 -04003829 int count = 0;
Stefan Hajnocziada42402014-08-27 12:08:55 +01003830 int i;
Jeff Codye855e4f2014-04-28 18:29:54 -04003831 const char **formats = NULL;
bellardea2384d2004-08-01 21:59:26 +00003832
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01003833 QLIST_FOREACH(drv, &bdrv_drivers, list) {
Jeff Codye855e4f2014-04-28 18:29:54 -04003834 if (drv->format_name) {
3835 bool found = false;
3836 int i = count;
3837 while (formats && i && !found) {
3838 found = !strcmp(formats[--i], drv->format_name);
3839 }
3840
3841 if (!found) {
Markus Armbruster5839e532014-08-19 10:31:08 +02003842 formats = g_renew(const char *, formats, count + 1);
Jeff Codye855e4f2014-04-28 18:29:54 -04003843 formats[count++] = drv->format_name;
Jeff Codye855e4f2014-04-28 18:29:54 -04003844 }
3845 }
bellardea2384d2004-08-01 21:59:26 +00003846 }
Stefan Hajnocziada42402014-08-27 12:08:55 +01003847
3848 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
3849
3850 for (i = 0; i < count; i++) {
3851 it(opaque, formats[i]);
3852 }
3853
Jeff Codye855e4f2014-04-28 18:29:54 -04003854 g_free(formats);
bellardea2384d2004-08-01 21:59:26 +00003855}
3856
Benoît Canetdc364f42014-01-23 21:31:32 +01003857/* This function is to find a node in the bs graph */
3858BlockDriverState *bdrv_find_node(const char *node_name)
3859{
3860 BlockDriverState *bs;
3861
3862 assert(node_name);
3863
3864 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3865 if (!strcmp(node_name, bs->node_name)) {
3866 return bs;
3867 }
3868 }
3869 return NULL;
3870}
3871
Benoît Canetc13163f2014-01-23 21:31:34 +01003872/* Put this QMP function here so it can access the static graph_bdrv_states. */
3873BlockDeviceInfoList *bdrv_named_nodes_list(void)
3874{
3875 BlockDeviceInfoList *list, *entry;
3876 BlockDriverState *bs;
3877
3878 list = NULL;
3879 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3880 entry = g_malloc0(sizeof(*entry));
3881 entry->value = bdrv_block_device_info(bs);
3882 entry->next = list;
3883 list = entry;
3884 }
3885
3886 return list;
3887}
3888
Benoît Canet12d3ba82014-01-23 21:31:35 +01003889BlockDriverState *bdrv_lookup_bs(const char *device,
3890 const char *node_name,
3891 Error **errp)
3892{
Markus Armbruster7f06d472014-10-07 13:59:12 +02003893 BlockBackend *blk;
3894 BlockDriverState *bs;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003895
Benoît Canet12d3ba82014-01-23 21:31:35 +01003896 if (device) {
Markus Armbruster7f06d472014-10-07 13:59:12 +02003897 blk = blk_by_name(device);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003898
Markus Armbruster7f06d472014-10-07 13:59:12 +02003899 if (blk) {
3900 return blk_bs(blk);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003901 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003902 }
3903
Benoît Canetdd67fa52014-02-12 17:15:06 +01003904 if (node_name) {
3905 bs = bdrv_find_node(node_name);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003906
Benoît Canetdd67fa52014-02-12 17:15:06 +01003907 if (bs) {
3908 return bs;
3909 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003910 }
3911
Benoît Canetdd67fa52014-02-12 17:15:06 +01003912 error_setg(errp, "Cannot find device=%s nor node_name=%s",
3913 device ? device : "",
3914 node_name ? node_name : "");
3915 return NULL;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003916}
3917
Jeff Cody5a6684d2014-06-25 15:40:09 -04003918/* If 'base' is in the same chain as 'top', return true. Otherwise,
3919 * return false. If either argument is NULL, return false. */
3920bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
3921{
3922 while (top && top != base) {
3923 top = top->backing_hd;
3924 }
3925
3926 return top != NULL;
3927}
3928
Fam Zheng04df7652014-10-31 11:32:54 +08003929BlockDriverState *bdrv_next_node(BlockDriverState *bs)
3930{
3931 if (!bs) {
3932 return QTAILQ_FIRST(&graph_bdrv_states);
3933 }
3934 return QTAILQ_NEXT(bs, node_list);
3935}
3936
Markus Armbruster2f399b02010-06-02 18:55:20 +02003937BlockDriverState *bdrv_next(BlockDriverState *bs)
3938{
3939 if (!bs) {
3940 return QTAILQ_FIRST(&bdrv_states);
3941 }
Benoît Canetdc364f42014-01-23 21:31:32 +01003942 return QTAILQ_NEXT(bs, device_list);
Markus Armbruster2f399b02010-06-02 18:55:20 +02003943}
3944
Fam Zheng20a9e772014-10-31 11:32:55 +08003945const char *bdrv_get_node_name(const BlockDriverState *bs)
3946{
3947 return bs->node_name;
3948}
3949
Markus Armbruster7f06d472014-10-07 13:59:12 +02003950/* TODO check what callers really want: bs->node_name or blk_name() */
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02003951const char *bdrv_get_device_name(const BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00003952{
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02003953 return bs->blk ? blk_name(bs->blk) : "";
bellardea2384d2004-08-01 21:59:26 +00003954}
3955
Markus Armbrusterc8433282012-06-05 16:49:24 +02003956int bdrv_get_flags(BlockDriverState *bs)
3957{
3958 return bs->open_flags;
3959}
3960
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003961int bdrv_flush_all(void)
aliguoric6ca28d2008-10-06 13:55:43 +00003962{
3963 BlockDriverState *bs;
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003964 int result = 0;
aliguoric6ca28d2008-10-06 13:55:43 +00003965
Benoît Canetdc364f42014-01-23 21:31:32 +01003966 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02003967 AioContext *aio_context = bdrv_get_aio_context(bs);
3968 int ret;
3969
3970 aio_context_acquire(aio_context);
3971 ret = bdrv_flush(bs);
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003972 if (ret < 0 && !result) {
3973 result = ret;
3974 }
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02003975 aio_context_release(aio_context);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01003976 }
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003977
3978 return result;
aliguoric6ca28d2008-10-06 13:55:43 +00003979}
3980
Peter Lieven3ac21622013-06-28 12:47:42 +02003981int bdrv_has_zero_init_1(BlockDriverState *bs)
3982{
3983 return 1;
3984}
3985
Kevin Wolff2feebb2010-04-14 17:30:35 +02003986int bdrv_has_zero_init(BlockDriverState *bs)
3987{
3988 assert(bs->drv);
3989
Paolo Bonzini11212d82013-09-04 19:00:27 +02003990 /* If BS is a copy on write image, it is initialized to
3991 the contents of the base image, which may not be zeroes. */
3992 if (bs->backing_hd) {
3993 return 0;
3994 }
Kevin Wolf336c1c12010-07-28 11:26:29 +02003995 if (bs->drv->bdrv_has_zero_init) {
3996 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02003997 }
3998
Peter Lieven3ac21622013-06-28 12:47:42 +02003999 /* safe default */
4000 return 0;
Kevin Wolff2feebb2010-04-14 17:30:35 +02004001}
4002
Peter Lieven4ce78692013-10-24 12:06:54 +02004003bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
4004{
4005 BlockDriverInfo bdi;
4006
4007 if (bs->backing_hd) {
4008 return false;
4009 }
4010
4011 if (bdrv_get_info(bs, &bdi) == 0) {
4012 return bdi.unallocated_blocks_are_zero;
4013 }
4014
4015 return false;
4016}
4017
4018bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
4019{
4020 BlockDriverInfo bdi;
4021
4022 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
4023 return false;
4024 }
4025
4026 if (bdrv_get_info(bs, &bdi) == 0) {
4027 return bdi.can_write_zeroes_with_unmap;
4028 }
4029
4030 return false;
4031}
4032
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004033typedef struct BdrvCoGetBlockStatusData {
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004034 BlockDriverState *bs;
Miroslav Rezaninab35b2bb2013-02-13 09:09:39 +01004035 BlockDriverState *base;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004036 int64_t sector_num;
4037 int nb_sectors;
4038 int *pnum;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004039 int64_t ret;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004040 bool done;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004041} BdrvCoGetBlockStatusData;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004042
thsf58c7b32008-06-05 21:53:49 +00004043/*
Fam Zheng705be722014-11-10 17:10:38 +08004044 * Returns the allocation status of the specified sectors.
4045 * Drivers not implementing the functionality are assumed to not support
4046 * backing files, hence all their sectors are reported as allocated.
thsf58c7b32008-06-05 21:53:49 +00004047 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004048 * If 'sector_num' is beyond the end of the disk image the return value is 0
4049 * and 'pnum' is set to 0.
4050 *
thsf58c7b32008-06-05 21:53:49 +00004051 * 'pnum' is set to the number of sectors (including and immediately following
4052 * the specified sector) that are known to be in the same
4053 * allocated/unallocated state.
4054 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004055 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
4056 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00004057 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004058static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
4059 int64_t sector_num,
4060 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00004061{
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004062 int64_t total_sectors;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004063 int64_t n;
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004064 int64_t ret, ret2;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004065
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004066 total_sectors = bdrv_nb_sectors(bs);
4067 if (total_sectors < 0) {
4068 return total_sectors;
Paolo Bonzini617ccb42013-09-04 19:00:23 +02004069 }
4070
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004071 if (sector_num >= total_sectors) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004072 *pnum = 0;
4073 return 0;
4074 }
4075
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004076 n = total_sectors - sector_num;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004077 if (n < nb_sectors) {
4078 nb_sectors = n;
4079 }
4080
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004081 if (!bs->drv->bdrv_co_get_block_status) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004082 *pnum = nb_sectors;
Kevin Wolfe88ae222014-05-06 15:25:36 +02004083 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
Paolo Bonzini918e92d2013-09-04 19:00:37 +02004084 if (bs->drv->protocol_name) {
4085 ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
4086 }
4087 return ret;
thsf58c7b32008-06-05 21:53:49 +00004088 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004089
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004090 ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
4091 if (ret < 0) {
Peter Lieven3e0a2332013-09-24 15:35:08 +02004092 *pnum = 0;
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004093 return ret;
4094 }
4095
Peter Lieven92bc50a2013-10-08 14:43:14 +02004096 if (ret & BDRV_BLOCK_RAW) {
4097 assert(ret & BDRV_BLOCK_OFFSET_VALID);
4098 return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
4099 *pnum, pnum);
4100 }
4101
Kevin Wolfe88ae222014-05-06 15:25:36 +02004102 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
4103 ret |= BDRV_BLOCK_ALLOCATED;
4104 }
4105
Peter Lievenc3d86882013-10-24 12:07:04 +02004106 if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
4107 if (bdrv_unallocated_blocks_are_zero(bs)) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02004108 ret |= BDRV_BLOCK_ZERO;
Peter Lieven1f9db222013-09-24 15:35:09 +02004109 } else if (bs->backing_hd) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02004110 BlockDriverState *bs2 = bs->backing_hd;
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004111 int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
4112 if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02004113 ret |= BDRV_BLOCK_ZERO;
4114 }
4115 }
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004116 }
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004117
4118 if (bs->file &&
4119 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
4120 (ret & BDRV_BLOCK_OFFSET_VALID)) {
Max Reitz59c9a952014-10-22 17:00:15 +02004121 int file_pnum;
4122
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004123 ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
Max Reitz59c9a952014-10-22 17:00:15 +02004124 *pnum, &file_pnum);
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004125 if (ret2 >= 0) {
4126 /* Ignore errors. This is just providing extra information, it
4127 * is useful but not necessary.
4128 */
Max Reitz59c9a952014-10-22 17:00:15 +02004129 if (!file_pnum) {
4130 /* !file_pnum indicates an offset at or beyond the EOF; it is
4131 * perfectly valid for the format block driver to point to such
4132 * offsets, so catch it and mark everything as zero */
4133 ret |= BDRV_BLOCK_ZERO;
4134 } else {
4135 /* Limit request to the range reported by the protocol driver */
4136 *pnum = file_pnum;
4137 ret |= (ret2 & BDRV_BLOCK_ZERO);
4138 }
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004139 }
4140 }
4141
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004142 return ret;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004143}
4144
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004145/* Coroutine wrapper for bdrv_get_block_status() */
4146static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004147{
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004148 BdrvCoGetBlockStatusData *data = opaque;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004149 BlockDriverState *bs = data->bs;
4150
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004151 data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
4152 data->pnum);
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004153 data->done = true;
4154}
4155
4156/*
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004157 * Synchronous wrapper around bdrv_co_get_block_status().
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004158 *
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004159 * See bdrv_co_get_block_status() for details.
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004160 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004161int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
4162 int nb_sectors, int *pnum)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004163{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004164 Coroutine *co;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004165 BdrvCoGetBlockStatusData data = {
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004166 .bs = bs,
4167 .sector_num = sector_num,
4168 .nb_sectors = nb_sectors,
4169 .pnum = pnum,
4170 .done = false,
4171 };
4172
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004173 if (qemu_in_coroutine()) {
4174 /* Fast-path if already in coroutine context */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004175 bdrv_get_block_status_co_entry(&data);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004176 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004177 AioContext *aio_context = bdrv_get_aio_context(bs);
4178
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004179 co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004180 qemu_coroutine_enter(co, &data);
4181 while (!data.done) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004182 aio_poll(aio_context, true);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004183 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004184 }
4185 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00004186}
4187
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004188int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
4189 int nb_sectors, int *pnum)
4190{
Paolo Bonzini4333bb72013-09-04 19:00:29 +02004191 int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
4192 if (ret < 0) {
4193 return ret;
4194 }
Kevin Wolf01fb2702014-07-07 17:00:37 +02004195 return !!(ret & BDRV_BLOCK_ALLOCATED);
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004196}
4197
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004198/*
4199 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
4200 *
4201 * Return true if the given sector is allocated in any image between
4202 * BASE and TOP (inclusive). BASE can be NULL to check if the given
4203 * sector is allocated in any image of the chain. Return false otherwise.
4204 *
4205 * 'pnum' is set to the number of sectors (including and immediately following
4206 * the specified sector) that are known to be in the same
4207 * allocated/unallocated state.
4208 *
4209 */
Paolo Bonzini4f578632013-09-04 19:00:24 +02004210int bdrv_is_allocated_above(BlockDriverState *top,
4211 BlockDriverState *base,
4212 int64_t sector_num,
4213 int nb_sectors, int *pnum)
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004214{
4215 BlockDriverState *intermediate;
4216 int ret, n = nb_sectors;
4217
4218 intermediate = top;
4219 while (intermediate && intermediate != base) {
4220 int pnum_inter;
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004221 ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
4222 &pnum_inter);
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004223 if (ret < 0) {
4224 return ret;
4225 } else if (ret) {
4226 *pnum = pnum_inter;
4227 return 1;
4228 }
4229
4230 /*
4231 * [sector_num, nb_sectors] is unallocated on top but intermediate
4232 * might have
4233 *
4234 * [sector_num+x, nr_sectors] allocated.
4235 */
Vishvananda Ishaya63ba17d2013-01-24 10:02:08 -08004236 if (n > pnum_inter &&
4237 (intermediate == top ||
4238 sector_num + pnum_inter < intermediate->total_sectors)) {
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004239 n = pnum_inter;
4240 }
4241
4242 intermediate = intermediate->backing_hd;
4243 }
4244
4245 *pnum = n;
4246 return 0;
4247}
4248
aliguori045df332009-03-05 23:00:48 +00004249const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
4250{
4251 if (bs->backing_hd && bs->backing_hd->encrypted)
4252 return bs->backing_file;
4253 else if (bs->encrypted)
4254 return bs->filename;
4255 else
4256 return NULL;
4257}
4258
ths5fafdf22007-09-16 21:08:06 +00004259void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00004260 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00004261{
Kevin Wolf3574c602011-10-26 11:02:11 +02004262 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00004263}
4264
ths5fafdf22007-09-16 21:08:06 +00004265int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00004266 const uint8_t *buf, int nb_sectors)
4267{
4268 BlockDriver *drv = bs->drv;
Max Reitzb9c64942015-02-05 13:58:25 -05004269 int ret;
4270
4271 if (!drv) {
bellard19cb3732006-08-19 11:45:59 +00004272 return -ENOMEDIUM;
Max Reitzb9c64942015-02-05 13:58:25 -05004273 }
4274 if (!drv->bdrv_write_compressed) {
bellardfaea38e2006-08-05 21:31:00 +00004275 return -ENOTSUP;
Max Reitzb9c64942015-02-05 13:58:25 -05004276 }
4277 ret = bdrv_check_request(bs, sector_num, nb_sectors);
4278 if (ret < 0) {
4279 return ret;
4280 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004281
Fam Zhenge4654d22013-11-13 18:29:43 +08004282 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004283
bellardfaea38e2006-08-05 21:31:00 +00004284 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
4285}
ths3b46e622007-09-17 08:09:54 +00004286
bellardfaea38e2006-08-05 21:31:00 +00004287int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
4288{
4289 BlockDriver *drv = bs->drv;
4290 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00004291 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00004292 if (!drv->bdrv_get_info)
4293 return -ENOTSUP;
4294 memset(bdi, 0, sizeof(*bdi));
4295 return drv->bdrv_get_info(bs, bdi);
4296}
4297
Max Reitzeae041f2013-10-09 10:46:16 +02004298ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
4299{
4300 BlockDriver *drv = bs->drv;
4301 if (drv && drv->bdrv_get_specific_info) {
4302 return drv->bdrv_get_specific_info(bs);
4303 }
4304 return NULL;
4305}
4306
Christoph Hellwig45566e92009-07-10 23:11:57 +02004307int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
4308 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00004309{
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004310 QEMUIOVector qiov;
4311 struct iovec iov = {
4312 .iov_base = (void *) buf,
4313 .iov_len = size,
4314 };
4315
4316 qemu_iovec_init_external(&qiov, &iov, 1);
4317 return bdrv_writev_vmstate(bs, &qiov, pos);
4318}
4319
4320int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
4321{
aliguori178e08a2009-04-05 19:10:55 +00004322 BlockDriver *drv = bs->drv;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004323
4324 if (!drv) {
aliguori178e08a2009-04-05 19:10:55 +00004325 return -ENOMEDIUM;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004326 } else if (drv->bdrv_save_vmstate) {
4327 return drv->bdrv_save_vmstate(bs, qiov, pos);
4328 } else if (bs->file) {
4329 return bdrv_writev_vmstate(bs->file, qiov, pos);
4330 }
4331
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004332 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004333}
4334
Christoph Hellwig45566e92009-07-10 23:11:57 +02004335int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
4336 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00004337{
4338 BlockDriver *drv = bs->drv;
4339 if (!drv)
4340 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004341 if (drv->bdrv_load_vmstate)
4342 return drv->bdrv_load_vmstate(bs, buf, pos, size);
4343 if (bs->file)
4344 return bdrv_load_vmstate(bs->file, buf, pos, size);
4345 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004346}
4347
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004348void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
4349{
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004350 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004351 return;
4352 }
4353
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004354 bs->drv->bdrv_debug_event(bs, event);
Kevin Wolf41c695c2012-12-06 14:32:58 +01004355}
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004356
Kevin Wolf41c695c2012-12-06 14:32:58 +01004357int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
4358 const char *tag)
4359{
4360 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
4361 bs = bs->file;
4362 }
4363
4364 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
4365 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
4366 }
4367
4368 return -ENOTSUP;
4369}
4370
Fam Zheng4cc70e92013-11-20 10:01:54 +08004371int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
4372{
4373 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
4374 bs = bs->file;
4375 }
4376
4377 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
4378 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
4379 }
4380
4381 return -ENOTSUP;
4382}
4383
Kevin Wolf41c695c2012-12-06 14:32:58 +01004384int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
4385{
Max Reitz938789e2014-03-10 23:44:08 +01004386 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
Kevin Wolf41c695c2012-12-06 14:32:58 +01004387 bs = bs->file;
4388 }
4389
4390 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
4391 return bs->drv->bdrv_debug_resume(bs, tag);
4392 }
4393
4394 return -ENOTSUP;
4395}
4396
4397bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
4398{
4399 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
4400 bs = bs->file;
4401 }
4402
4403 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
4404 return bs->drv->bdrv_debug_is_suspended(bs, tag);
4405 }
4406
4407 return false;
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004408}
4409
Blue Swirl199630b2010-07-25 20:49:34 +00004410int bdrv_is_snapshot(BlockDriverState *bs)
4411{
4412 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
4413}
4414
Jeff Codyb1b1d782012-10-16 15:49:09 -04004415/* backing_file can either be relative, or absolute, or a protocol. If it is
4416 * relative, it must be relative to the chain. So, passing in bs->filename
4417 * from a BDS as backing_file should not be done, as that may be relative to
4418 * the CWD rather than the chain. */
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004419BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
4420 const char *backing_file)
4421{
Jeff Codyb1b1d782012-10-16 15:49:09 -04004422 char *filename_full = NULL;
4423 char *backing_file_full = NULL;
4424 char *filename_tmp = NULL;
4425 int is_protocol = 0;
4426 BlockDriverState *curr_bs = NULL;
4427 BlockDriverState *retval = NULL;
4428
4429 if (!bs || !bs->drv || !backing_file) {
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004430 return NULL;
4431 }
4432
Jeff Codyb1b1d782012-10-16 15:49:09 -04004433 filename_full = g_malloc(PATH_MAX);
4434 backing_file_full = g_malloc(PATH_MAX);
4435 filename_tmp = g_malloc(PATH_MAX);
4436
4437 is_protocol = path_has_protocol(backing_file);
4438
4439 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
4440
4441 /* If either of the filename paths is actually a protocol, then
4442 * compare unmodified paths; otherwise make paths relative */
4443 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
4444 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
4445 retval = curr_bs->backing_hd;
4446 break;
4447 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004448 } else {
Jeff Codyb1b1d782012-10-16 15:49:09 -04004449 /* If not an absolute filename path, make it relative to the current
4450 * image's filename path */
4451 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4452 backing_file);
4453
4454 /* We are going to compare absolute pathnames */
4455 if (!realpath(filename_tmp, filename_full)) {
4456 continue;
4457 }
4458
4459 /* We need to make sure the backing filename we are comparing against
4460 * is relative to the current image filename (or absolute) */
4461 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4462 curr_bs->backing_file);
4463
4464 if (!realpath(filename_tmp, backing_file_full)) {
4465 continue;
4466 }
4467
4468 if (strcmp(backing_file_full, filename_full) == 0) {
4469 retval = curr_bs->backing_hd;
4470 break;
4471 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004472 }
4473 }
4474
Jeff Codyb1b1d782012-10-16 15:49:09 -04004475 g_free(filename_full);
4476 g_free(backing_file_full);
4477 g_free(filename_tmp);
4478 return retval;
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004479}
4480
Benoît Canetf198fd12012-08-02 10:22:47 +02004481int bdrv_get_backing_file_depth(BlockDriverState *bs)
4482{
4483 if (!bs->drv) {
4484 return 0;
4485 }
4486
4487 if (!bs->backing_hd) {
4488 return 0;
4489 }
4490
4491 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
4492}
4493
bellard83f64092006-08-01 16:21:11 +00004494/**************************************************************/
4495/* async I/Os */
4496
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004497BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
4498 QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004499 BlockCompletionFunc *cb, void *opaque)
aliguori3b69e4b2009-01-22 16:59:24 +00004500{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004501 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
4502
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004503 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004504 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00004505}
4506
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004507BlockAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
4508 QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004509 BlockCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004510{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004511 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
4512
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004513 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004514 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00004515}
4516
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004517BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
Paolo Bonzinid5ef94d2013-11-22 13:39:46 +01004518 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
Markus Armbruster097310b2014-10-07 13:59:15 +02004519 BlockCompletionFunc *cb, void *opaque)
Paolo Bonzinid5ef94d2013-11-22 13:39:46 +01004520{
4521 trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
4522
4523 return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
4524 BDRV_REQ_ZERO_WRITE | flags,
4525 cb, opaque, true);
4526}
4527
Kevin Wolf40b4f532009-09-09 17:53:37 +02004528
4529typedef struct MultiwriteCB {
4530 int error;
4531 int num_requests;
4532 int num_callbacks;
4533 struct {
Markus Armbruster097310b2014-10-07 13:59:15 +02004534 BlockCompletionFunc *cb;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004535 void *opaque;
4536 QEMUIOVector *free_qiov;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004537 } callbacks[];
4538} MultiwriteCB;
4539
4540static void multiwrite_user_cb(MultiwriteCB *mcb)
4541{
4542 int i;
4543
4544 for (i = 0; i < mcb->num_callbacks; i++) {
4545 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01004546 if (mcb->callbacks[i].free_qiov) {
4547 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
4548 }
Anthony Liguori7267c092011-08-20 22:09:37 -05004549 g_free(mcb->callbacks[i].free_qiov);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004550 }
4551}
4552
4553static void multiwrite_cb(void *opaque, int ret)
4554{
4555 MultiwriteCB *mcb = opaque;
4556
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004557 trace_multiwrite_cb(mcb, ret);
4558
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02004559 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02004560 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004561 }
4562
4563 mcb->num_requests--;
4564 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02004565 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05004566 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004567 }
4568}
4569
4570static int multiwrite_req_compare(const void *a, const void *b)
4571{
Christoph Hellwig77be4362010-05-19 20:53:10 +02004572 const BlockRequest *req1 = a, *req2 = b;
4573
4574 /*
4575 * Note that we can't simply subtract req2->sector from req1->sector
4576 * here as that could overflow the return value.
4577 */
4578 if (req1->sector > req2->sector) {
4579 return 1;
4580 } else if (req1->sector < req2->sector) {
4581 return -1;
4582 } else {
4583 return 0;
4584 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02004585}
4586
4587/*
4588 * Takes a bunch of requests and tries to merge them. Returns the number of
4589 * requests that remain after merging.
4590 */
4591static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
4592 int num_reqs, MultiwriteCB *mcb)
4593{
4594 int i, outidx;
4595
4596 // Sort requests by start sector
4597 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
4598
4599 // Check if adjacent requests touch the same clusters. If so, combine them,
4600 // filling up gaps with zero sectors.
4601 outidx = 0;
4602 for (i = 1; i < num_reqs; i++) {
4603 int merge = 0;
4604 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
4605
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004606 // Handle exactly sequential writes and overlapping writes.
Kevin Wolf40b4f532009-09-09 17:53:37 +02004607 if (reqs[i].sector <= oldreq_last) {
4608 merge = 1;
4609 }
4610
Christoph Hellwige2a305f2010-01-26 14:49:08 +01004611 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
4612 merge = 0;
4613 }
4614
Peter Lieven6c5a42a2014-10-27 10:18:46 +01004615 if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors +
4616 reqs[i].nb_sectors > bs->bl.max_transfer_length) {
4617 merge = 0;
4618 }
4619
Kevin Wolf40b4f532009-09-09 17:53:37 +02004620 if (merge) {
4621 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05004622 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004623 qemu_iovec_init(qiov,
4624 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
4625
4626 // Add the first request to the merged one. If the requests are
4627 // overlapping, drop the last sectors of the first request.
4628 size = (reqs[i].sector - reqs[outidx].sector) << 9;
Michael Tokarev1b093c42012-03-12 21:28:06 +04004629 qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004630
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004631 // We should need to add any zeros between the two requests
4632 assert (reqs[i].sector <= oldreq_last);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004633
4634 // Add the second request
Michael Tokarev1b093c42012-03-12 21:28:06 +04004635 qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004636
Stefan Hajnoczi391827e2014-07-30 09:53:30 +01004637 // Add tail of first request, if necessary
4638 if (qiov->size < reqs[outidx].qiov->size) {
4639 qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
4640 reqs[outidx].qiov->size - qiov->size);
4641 }
4642
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02004643 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004644 reqs[outidx].qiov = qiov;
4645
4646 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
4647 } else {
4648 outidx++;
4649 reqs[outidx].sector = reqs[i].sector;
4650 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
4651 reqs[outidx].qiov = reqs[i].qiov;
4652 }
4653 }
4654
Peter Lievenf4564d52015-02-02 14:52:18 +01004655 block_acct_merge_done(&bs->stats, BLOCK_ACCT_WRITE, num_reqs - outidx - 1);
4656
Kevin Wolf40b4f532009-09-09 17:53:37 +02004657 return outidx + 1;
4658}
4659
4660/*
4661 * Submit multiple AIO write requests at once.
4662 *
4663 * On success, the function returns 0 and all requests in the reqs array have
4664 * been submitted. In error case this function returns -1, and any of the
4665 * requests may or may not be submitted yet. In particular, this means that the
4666 * callback will be called for some of the requests, for others it won't. The
4667 * caller must check the error field of the BlockRequest to wait for the right
4668 * callbacks (if error != 0, no callback will be called).
4669 *
4670 * The implementation may modify the contents of the reqs array, e.g. to merge
4671 * requests. However, the fields opaque and error are left unmodified as they
4672 * are used to signal failure for a single request to the caller.
4673 */
4674int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
4675{
Kevin Wolf40b4f532009-09-09 17:53:37 +02004676 MultiwriteCB *mcb;
4677 int i;
4678
Ryan Harper301db7c2011-03-07 10:01:04 -06004679 /* don't submit writes if we don't have a medium */
4680 if (bs->drv == NULL) {
4681 for (i = 0; i < num_reqs; i++) {
4682 reqs[i].error = -ENOMEDIUM;
4683 }
4684 return -1;
4685 }
4686
Kevin Wolf40b4f532009-09-09 17:53:37 +02004687 if (num_reqs == 0) {
4688 return 0;
4689 }
4690
4691 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05004692 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004693 mcb->num_requests = 0;
4694 mcb->num_callbacks = num_reqs;
4695
4696 for (i = 0; i < num_reqs; i++) {
4697 mcb->callbacks[i].cb = reqs[i].cb;
4698 mcb->callbacks[i].opaque = reqs[i].opaque;
4699 }
4700
4701 // Check for mergable requests
4702 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
4703
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004704 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
4705
Paolo Bonzinidf9309f2011-11-14 17:50:50 +01004706 /* Run the aio requests. */
4707 mcb->num_requests = num_reqs;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004708 for (i = 0; i < num_reqs; i++) {
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004709 bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
4710 reqs[i].nb_sectors, reqs[i].flags,
4711 multiwrite_cb, mcb,
4712 true);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004713 }
4714
4715 return 0;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004716}
4717
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004718void bdrv_aio_cancel(BlockAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00004719{
Fam Zhengca5fd112014-09-11 13:41:27 +08004720 qemu_aio_ref(acb);
4721 bdrv_aio_cancel_async(acb);
4722 while (acb->refcnt > 1) {
4723 if (acb->aiocb_info->get_aio_context) {
4724 aio_poll(acb->aiocb_info->get_aio_context(acb), true);
4725 } else if (acb->bs) {
4726 aio_poll(bdrv_get_aio_context(acb->bs), true);
4727 } else {
4728 abort();
Fam Zheng02c50ef2014-09-11 13:41:09 +08004729 }
Fam Zheng02c50ef2014-09-11 13:41:09 +08004730 }
Fam Zheng80074292014-09-11 13:41:28 +08004731 qemu_aio_unref(acb);
Fam Zheng02c50ef2014-09-11 13:41:09 +08004732}
4733
4734/* Async version of aio cancel. The caller is not blocked if the acb implements
4735 * cancel_async, otherwise we do nothing and let the request normally complete.
4736 * In either case the completion callback must be called. */
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004737void bdrv_aio_cancel_async(BlockAIOCB *acb)
Fam Zheng02c50ef2014-09-11 13:41:09 +08004738{
4739 if (acb->aiocb_info->cancel_async) {
4740 acb->aiocb_info->cancel_async(acb);
4741 }
bellard83f64092006-08-01 16:21:11 +00004742}
4743
4744/**************************************************************/
4745/* async block device emulation */
4746
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004747typedef struct BlockAIOCBSync {
4748 BlockAIOCB common;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004749 QEMUBH *bh;
4750 int ret;
4751 /* vector translation state */
4752 QEMUIOVector *qiov;
4753 uint8_t *bounce;
4754 int is_write;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004755} BlockAIOCBSync;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004756
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004757static const AIOCBInfo bdrv_em_aiocb_info = {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004758 .aiocb_size = sizeof(BlockAIOCBSync),
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004759};
4760
bellard83f64092006-08-01 16:21:11 +00004761static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00004762{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004763 BlockAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00004764
Kevin Wolf857d4f42014-05-20 13:16:51 +02004765 if (!acb->is_write && acb->ret >= 0) {
Michael Tokarev03396142012-06-07 20:17:55 +04004766 qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
Kevin Wolf857d4f42014-05-20 13:16:51 +02004767 }
aliguoriceb42de2009-04-07 18:43:28 +00004768 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00004769 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03004770 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03004771 acb->bh = NULL;
Fam Zheng80074292014-09-11 13:41:28 +08004772 qemu_aio_unref(acb);
bellardbeac80c2006-06-26 20:08:57 +00004773}
bellardbeac80c2006-06-26 20:08:57 +00004774
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004775static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
4776 int64_t sector_num,
4777 QEMUIOVector *qiov,
4778 int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004779 BlockCompletionFunc *cb,
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004780 void *opaque,
4781 int is_write)
aliguorif141eaf2009-04-07 18:43:24 +00004782
bellardea2384d2004-08-01 21:59:26 +00004783{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004784 BlockAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00004785
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004786 acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00004787 acb->is_write = is_write;
4788 acb->qiov = qiov;
Kevin Wolf857d4f42014-05-20 13:16:51 +02004789 acb->bounce = qemu_try_blockalign(bs, qiov->size);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004790 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00004791
Kevin Wolf857d4f42014-05-20 13:16:51 +02004792 if (acb->bounce == NULL) {
4793 acb->ret = -ENOMEM;
4794 } else if (is_write) {
Michael Tokarevd5e6b162012-06-07 20:21:06 +04004795 qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004796 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004797 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004798 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004799 }
4800
pbrookce1a14d2006-08-07 02:38:06 +00004801 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00004802
pbrookce1a14d2006-08-07 02:38:06 +00004803 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00004804}
4805
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004806static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +00004807 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004808 BlockCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004809{
aliguorif141eaf2009-04-07 18:43:24 +00004810 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00004811}
4812
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004813static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +00004814 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004815 BlockCompletionFunc *cb, void *opaque)
aliguorif141eaf2009-04-07 18:43:24 +00004816{
4817 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
4818}
4819
Kevin Wolf68485422011-06-30 10:05:46 +02004820
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004821typedef struct BlockAIOCBCoroutine {
4822 BlockAIOCB common;
Kevin Wolf68485422011-06-30 10:05:46 +02004823 BlockRequest req;
4824 bool is_write;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004825 bool *done;
Kevin Wolf68485422011-06-30 10:05:46 +02004826 QEMUBH* bh;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004827} BlockAIOCBCoroutine;
Kevin Wolf68485422011-06-30 10:05:46 +02004828
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004829static const AIOCBInfo bdrv_em_co_aiocb_info = {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004830 .aiocb_size = sizeof(BlockAIOCBCoroutine),
Kevin Wolf68485422011-06-30 10:05:46 +02004831};
4832
Paolo Bonzini35246a62011-10-14 10:41:29 +02004833static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02004834{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004835 BlockAIOCBCoroutine *acb = opaque;
Kevin Wolf68485422011-06-30 10:05:46 +02004836
4837 acb->common.cb(acb->common.opaque, acb->req.error);
Kevin Wolfd318aea2012-11-13 16:35:08 +01004838
Kevin Wolf68485422011-06-30 10:05:46 +02004839 qemu_bh_delete(acb->bh);
Fam Zheng80074292014-09-11 13:41:28 +08004840 qemu_aio_unref(acb);
Kevin Wolf68485422011-06-30 10:05:46 +02004841}
4842
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004843/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4844static void coroutine_fn bdrv_co_do_rw(void *opaque)
4845{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004846 BlockAIOCBCoroutine *acb = opaque;
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004847 BlockDriverState *bs = acb->common.bs;
4848
4849 if (!acb->is_write) {
4850 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004851 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004852 } else {
4853 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004854 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004855 }
4856
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004857 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004858 qemu_bh_schedule(acb->bh);
4859}
4860
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004861static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
4862 int64_t sector_num,
4863 QEMUIOVector *qiov,
4864 int nb_sectors,
4865 BdrvRequestFlags flags,
Markus Armbruster097310b2014-10-07 13:59:15 +02004866 BlockCompletionFunc *cb,
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004867 void *opaque,
4868 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02004869{
4870 Coroutine *co;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004871 BlockAIOCBCoroutine *acb;
Kevin Wolf68485422011-06-30 10:05:46 +02004872
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004873 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Kevin Wolf68485422011-06-30 10:05:46 +02004874 acb->req.sector = sector_num;
4875 acb->req.nb_sectors = nb_sectors;
4876 acb->req.qiov = qiov;
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004877 acb->req.flags = flags;
Kevin Wolf68485422011-06-30 10:05:46 +02004878 acb->is_write = is_write;
4879
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004880 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02004881 qemu_coroutine_enter(co, acb);
4882
4883 return &acb->common;
4884}
4885
Paolo Bonzini07f07612011-10-17 12:32:12 +02004886static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004887{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004888 BlockAIOCBCoroutine *acb = opaque;
Paolo Bonzini07f07612011-10-17 12:32:12 +02004889 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004890
Paolo Bonzini07f07612011-10-17 12:32:12 +02004891 acb->req.error = bdrv_co_flush(bs);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004892 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004893 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004894}
4895
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004896BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Markus Armbruster097310b2014-10-07 13:59:15 +02004897 BlockCompletionFunc *cb, void *opaque)
Alexander Graf016f5cf2010-05-26 17:51:49 +02004898{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004899 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004900
Paolo Bonzini07f07612011-10-17 12:32:12 +02004901 Coroutine *co;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004902 BlockAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02004903
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004904 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Kevin Wolfd318aea2012-11-13 16:35:08 +01004905
Paolo Bonzini07f07612011-10-17 12:32:12 +02004906 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
4907 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004908
Alexander Graf016f5cf2010-05-26 17:51:49 +02004909 return &acb->common;
4910}
4911
Paolo Bonzini4265d622011-10-17 12:32:14 +02004912static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
4913{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004914 BlockAIOCBCoroutine *acb = opaque;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004915 BlockDriverState *bs = acb->common.bs;
4916
4917 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004918 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
Paolo Bonzini4265d622011-10-17 12:32:14 +02004919 qemu_bh_schedule(acb->bh);
4920}
4921
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004922BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs,
Paolo Bonzini4265d622011-10-17 12:32:14 +02004923 int64_t sector_num, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004924 BlockCompletionFunc *cb, void *opaque)
Paolo Bonzini4265d622011-10-17 12:32:14 +02004925{
4926 Coroutine *co;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004927 BlockAIOCBCoroutine *acb;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004928
4929 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
4930
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004931 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Paolo Bonzini4265d622011-10-17 12:32:14 +02004932 acb->req.sector = sector_num;
4933 acb->req.nb_sectors = nb_sectors;
4934 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
4935 qemu_coroutine_enter(co, acb);
4936
4937 return &acb->common;
4938}
4939
bellardea2384d2004-08-01 21:59:26 +00004940void bdrv_init(void)
4941{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05004942 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00004943}
pbrookce1a14d2006-08-07 02:38:06 +00004944
Markus Armbrustereb852012009-10-27 18:41:44 +01004945void bdrv_init_with_whitelist(void)
4946{
4947 use_bdrv_whitelist = 1;
4948 bdrv_init();
4949}
4950
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004951void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
Markus Armbruster097310b2014-10-07 13:59:15 +02004952 BlockCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00004953{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004954 BlockAIOCB *acb;
pbrookce1a14d2006-08-07 02:38:06 +00004955
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004956 acb = g_slice_alloc(aiocb_info->aiocb_size);
4957 acb->aiocb_info = aiocb_info;
pbrookce1a14d2006-08-07 02:38:06 +00004958 acb->bs = bs;
4959 acb->cb = cb;
4960 acb->opaque = opaque;
Fam Zhengf197fe22014-09-11 13:41:08 +08004961 acb->refcnt = 1;
pbrookce1a14d2006-08-07 02:38:06 +00004962 return acb;
4963}
4964
Fam Zhengf197fe22014-09-11 13:41:08 +08004965void qemu_aio_ref(void *p)
4966{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004967 BlockAIOCB *acb = p;
Fam Zhengf197fe22014-09-11 13:41:08 +08004968 acb->refcnt++;
4969}
4970
Fam Zheng80074292014-09-11 13:41:28 +08004971void qemu_aio_unref(void *p)
pbrookce1a14d2006-08-07 02:38:06 +00004972{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004973 BlockAIOCB *acb = p;
Fam Zhengf197fe22014-09-11 13:41:08 +08004974 assert(acb->refcnt > 0);
4975 if (--acb->refcnt == 0) {
4976 g_slice_free1(acb->aiocb_info->aiocb_size, acb);
4977 }
pbrookce1a14d2006-08-07 02:38:06 +00004978}
bellard19cb3732006-08-19 11:45:59 +00004979
4980/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004981/* Coroutine block device emulation */
4982
4983typedef struct CoroutineIOCompletion {
4984 Coroutine *coroutine;
4985 int ret;
4986} CoroutineIOCompletion;
4987
4988static void bdrv_co_io_em_complete(void *opaque, int ret)
4989{
4990 CoroutineIOCompletion *co = opaque;
4991
4992 co->ret = ret;
4993 qemu_coroutine_enter(co->coroutine, NULL);
4994}
4995
4996static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
4997 int nb_sectors, QEMUIOVector *iov,
4998 bool is_write)
4999{
5000 CoroutineIOCompletion co = {
5001 .coroutine = qemu_coroutine_self(),
5002 };
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005003 BlockAIOCB *acb;
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005004
5005 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01005006 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
5007 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005008 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01005009 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
5010 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005011 }
5012
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01005013 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005014 if (!acb) {
5015 return -EIO;
5016 }
5017 qemu_coroutine_yield();
5018
5019 return co.ret;
5020}
5021
5022static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
5023 int64_t sector_num, int nb_sectors,
5024 QEMUIOVector *iov)
5025{
5026 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
5027}
5028
5029static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
5030 int64_t sector_num, int nb_sectors,
5031 QEMUIOVector *iov)
5032{
5033 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
5034}
5035
Paolo Bonzini07f07612011-10-17 12:32:12 +02005036static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005037{
Paolo Bonzini07f07612011-10-17 12:32:12 +02005038 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005039
Paolo Bonzini07f07612011-10-17 12:32:12 +02005040 rwco->ret = bdrv_co_flush(rwco->bs);
5041}
5042
5043int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
5044{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01005045 int ret;
5046
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005047 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02005048 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01005049 }
5050
Kevin Wolfca716362011-11-10 18:13:59 +01005051 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfbf736fe2013-06-05 15:17:55 +02005052 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01005053 if (bs->drv->bdrv_co_flush_to_os) {
5054 ret = bs->drv->bdrv_co_flush_to_os(bs);
5055 if (ret < 0) {
5056 return ret;
5057 }
5058 }
5059
Kevin Wolfca716362011-11-10 18:13:59 +01005060 /* But don't actually force it to the disk with cache=unsafe */
5061 if (bs->open_flags & BDRV_O_NO_FLUSH) {
Kevin Wolfd4c82322012-08-15 12:52:45 +02005062 goto flush_parent;
Kevin Wolfca716362011-11-10 18:13:59 +01005063 }
5064
Kevin Wolfbf736fe2013-06-05 15:17:55 +02005065 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01005066 if (bs->drv->bdrv_co_flush_to_disk) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005067 ret = bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005068 } else if (bs->drv->bdrv_aio_flush) {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005069 BlockAIOCB *acb;
Paolo Bonzini07f07612011-10-17 12:32:12 +02005070 CoroutineIOCompletion co = {
5071 .coroutine = qemu_coroutine_self(),
5072 };
5073
5074 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
5075 if (acb == NULL) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005076 ret = -EIO;
Paolo Bonzini07f07612011-10-17 12:32:12 +02005077 } else {
5078 qemu_coroutine_yield();
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005079 ret = co.ret;
Paolo Bonzini07f07612011-10-17 12:32:12 +02005080 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02005081 } else {
5082 /*
5083 * Some block drivers always operate in either writethrough or unsafe
5084 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
5085 * know how the server works (because the behaviour is hardcoded or
5086 * depends on server-side configuration), so we can't ensure that
5087 * everything is safe on disk. Returning an error doesn't work because
5088 * that would break guests even if the server operates in writethrough
5089 * mode.
5090 *
5091 * Let's hope the user knows what he's doing.
5092 */
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005093 ret = 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005094 }
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005095 if (ret < 0) {
5096 return ret;
5097 }
5098
5099 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
5100 * in the case of cache=unsafe, so there are no useless flushes.
5101 */
Kevin Wolfd4c82322012-08-15 12:52:45 +02005102flush_parent:
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005103 return bdrv_co_flush(bs->file);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005104}
5105
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005106void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
Anthony Liguori0f154232011-11-14 15:09:45 -06005107{
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005108 Error *local_err = NULL;
5109 int ret;
5110
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005111 if (!bs->drv) {
5112 return;
Anthony Liguori0f154232011-11-14 15:09:45 -06005113 }
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005114
Alexey Kardashevskiy7ea2d262014-10-09 13:50:46 +11005115 if (!(bs->open_flags & BDRV_O_INCOMING)) {
5116 return;
5117 }
5118 bs->open_flags &= ~BDRV_O_INCOMING;
5119
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005120 if (bs->drv->bdrv_invalidate_cache) {
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005121 bs->drv->bdrv_invalidate_cache(bs, &local_err);
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005122 } else if (bs->file) {
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005123 bdrv_invalidate_cache(bs->file, &local_err);
5124 }
5125 if (local_err) {
5126 error_propagate(errp, local_err);
5127 return;
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005128 }
5129
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005130 ret = refresh_total_sectors(bs, bs->total_sectors);
5131 if (ret < 0) {
5132 error_setg_errno(errp, -ret, "Could not refresh total sector count");
5133 return;
5134 }
Anthony Liguori0f154232011-11-14 15:09:45 -06005135}
5136
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005137void bdrv_invalidate_cache_all(Error **errp)
Anthony Liguori0f154232011-11-14 15:09:45 -06005138{
5139 BlockDriverState *bs;
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005140 Error *local_err = NULL;
Anthony Liguori0f154232011-11-14 15:09:45 -06005141
Benoît Canetdc364f42014-01-23 21:31:32 +01005142 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02005143 AioContext *aio_context = bdrv_get_aio_context(bs);
5144
5145 aio_context_acquire(aio_context);
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005146 bdrv_invalidate_cache(bs, &local_err);
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02005147 aio_context_release(aio_context);
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005148 if (local_err) {
5149 error_propagate(errp, local_err);
5150 return;
5151 }
Anthony Liguori0f154232011-11-14 15:09:45 -06005152 }
5153}
5154
Paolo Bonzini07f07612011-10-17 12:32:12 +02005155int bdrv_flush(BlockDriverState *bs)
5156{
5157 Coroutine *co;
5158 RwCo rwco = {
5159 .bs = bs,
5160 .ret = NOT_DONE,
5161 };
5162
5163 if (qemu_in_coroutine()) {
5164 /* Fast-path if already in coroutine context */
5165 bdrv_flush_co_entry(&rwco);
5166 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005167 AioContext *aio_context = bdrv_get_aio_context(bs);
5168
Paolo Bonzini07f07612011-10-17 12:32:12 +02005169 co = qemu_coroutine_create(bdrv_flush_co_entry);
5170 qemu_coroutine_enter(co, &rwco);
5171 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005172 aio_poll(aio_context, true);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005173 }
5174 }
5175
5176 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005177}
5178
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005179typedef struct DiscardCo {
5180 BlockDriverState *bs;
5181 int64_t sector_num;
5182 int nb_sectors;
5183 int ret;
5184} DiscardCo;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005185static void coroutine_fn bdrv_discard_co_entry(void *opaque)
5186{
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005187 DiscardCo *rwco = opaque;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005188
5189 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
5190}
5191
5192int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
5193 int nb_sectors)
5194{
Max Reitzb9c64942015-02-05 13:58:25 -05005195 int max_discard, ret;
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005196
Paolo Bonzini4265d622011-10-17 12:32:14 +02005197 if (!bs->drv) {
5198 return -ENOMEDIUM;
Max Reitzb9c64942015-02-05 13:58:25 -05005199 }
5200
5201 ret = bdrv_check_request(bs, sector_num, nb_sectors);
5202 if (ret < 0) {
5203 return ret;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005204 } else if (bs->read_only) {
5205 return -EROFS;
Paolo Bonzinidf702c92013-01-14 16:26:58 +01005206 }
5207
Fam Zhenge4654d22013-11-13 18:29:43 +08005208 bdrv_reset_dirty(bs, sector_num, nb_sectors);
Paolo Bonzinidf702c92013-01-14 16:26:58 +01005209
Paolo Bonzini9e8f1832013-02-08 14:06:11 +01005210 /* Do nothing if disabled. */
5211 if (!(bs->open_flags & BDRV_O_UNMAP)) {
5212 return 0;
5213 }
5214
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005215 if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
Paolo Bonzini4265d622011-10-17 12:32:14 +02005216 return 0;
5217 }
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005218
Peter Lieven75af1f32015-02-06 11:54:11 +01005219 max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS);
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005220 while (nb_sectors > 0) {
5221 int ret;
5222 int num = nb_sectors;
5223
5224 /* align request */
5225 if (bs->bl.discard_alignment &&
5226 num >= bs->bl.discard_alignment &&
5227 sector_num % bs->bl.discard_alignment) {
5228 if (num > bs->bl.discard_alignment) {
5229 num = bs->bl.discard_alignment;
5230 }
5231 num -= sector_num % bs->bl.discard_alignment;
5232 }
5233
5234 /* limit request size */
5235 if (num > max_discard) {
5236 num = max_discard;
5237 }
5238
5239 if (bs->drv->bdrv_co_discard) {
5240 ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
5241 } else {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005242 BlockAIOCB *acb;
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005243 CoroutineIOCompletion co = {
5244 .coroutine = qemu_coroutine_self(),
5245 };
5246
5247 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
5248 bdrv_co_io_em_complete, &co);
5249 if (acb == NULL) {
5250 return -EIO;
5251 } else {
5252 qemu_coroutine_yield();
5253 ret = co.ret;
5254 }
5255 }
Paolo Bonzini7ce21012013-11-22 13:39:47 +01005256 if (ret && ret != -ENOTSUP) {
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005257 return ret;
5258 }
5259
5260 sector_num += num;
5261 nb_sectors -= num;
5262 }
5263 return 0;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005264}
5265
5266int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
5267{
5268 Coroutine *co;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005269 DiscardCo rwco = {
Paolo Bonzini4265d622011-10-17 12:32:14 +02005270 .bs = bs,
5271 .sector_num = sector_num,
5272 .nb_sectors = nb_sectors,
5273 .ret = NOT_DONE,
5274 };
5275
5276 if (qemu_in_coroutine()) {
5277 /* Fast-path if already in coroutine context */
5278 bdrv_discard_co_entry(&rwco);
5279 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005280 AioContext *aio_context = bdrv_get_aio_context(bs);
5281
Paolo Bonzini4265d622011-10-17 12:32:14 +02005282 co = qemu_coroutine_create(bdrv_discard_co_entry);
5283 qemu_coroutine_enter(co, &rwco);
5284 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005285 aio_poll(aio_context, true);
Paolo Bonzini4265d622011-10-17 12:32:14 +02005286 }
5287 }
5288
5289 return rwco.ret;
5290}
5291
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005292/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00005293/* removable device support */
5294
5295/**
5296 * Return TRUE if the media is present
5297 */
5298int bdrv_is_inserted(BlockDriverState *bs)
5299{
5300 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02005301
bellard19cb3732006-08-19 11:45:59 +00005302 if (!drv)
5303 return 0;
5304 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02005305 return 1;
5306 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00005307}
5308
5309/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02005310 * Return whether the media changed since the last call to this
5311 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00005312 */
5313int bdrv_media_changed(BlockDriverState *bs)
5314{
5315 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00005316
Markus Armbruster8e49ca42011-08-03 15:08:08 +02005317 if (drv && drv->bdrv_media_changed) {
5318 return drv->bdrv_media_changed(bs);
5319 }
5320 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00005321}
5322
5323/**
5324 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
5325 */
Luiz Capitulinof36f3942012-02-03 16:24:53 -02005326void bdrv_eject(BlockDriverState *bs, bool eject_flag)
bellard19cb3732006-08-19 11:45:59 +00005327{
5328 BlockDriver *drv = bs->drv;
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02005329 const char *device_name;
bellard19cb3732006-08-19 11:45:59 +00005330
Markus Armbruster822e1cd2011-07-20 18:23:42 +02005331 if (drv && drv->bdrv_eject) {
5332 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00005333 }
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02005334
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02005335 device_name = bdrv_get_device_name(bs);
5336 if (device_name[0] != '\0') {
5337 qapi_event_send_device_tray_moved(device_name,
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +02005338 eject_flag, &error_abort);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02005339 }
bellard19cb3732006-08-19 11:45:59 +00005340}
5341
bellard19cb3732006-08-19 11:45:59 +00005342/**
5343 * Lock or unlock the media (if it is locked, the user won't be able
5344 * to eject it manually).
5345 */
Markus Armbruster025e8492011-09-06 18:58:47 +02005346void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00005347{
5348 BlockDriver *drv = bs->drv;
5349
Markus Armbruster025e8492011-09-06 18:58:47 +02005350 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01005351
Markus Armbruster025e8492011-09-06 18:58:47 +02005352 if (drv && drv->bdrv_lock_medium) {
5353 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00005354 }
5355}
ths985a03b2007-12-24 16:10:43 +00005356
5357/* needed for generic scsi interface */
5358
5359int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
5360{
5361 BlockDriver *drv = bs->drv;
5362
5363 if (drv && drv->bdrv_ioctl)
5364 return drv->bdrv_ioctl(bs, req, buf);
5365 return -ENOTSUP;
5366}
aliguori7d780662009-03-12 19:57:08 +00005367
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005368BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
aliguori221f7152009-03-28 17:28:41 +00005369 unsigned long int req, void *buf,
Markus Armbruster097310b2014-10-07 13:59:15 +02005370 BlockCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00005371{
aliguori221f7152009-03-28 17:28:41 +00005372 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00005373
aliguori221f7152009-03-28 17:28:41 +00005374 if (drv && drv->bdrv_aio_ioctl)
5375 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
5376 return NULL;
aliguori7d780662009-03-12 19:57:08 +00005377}
aliguorie268ca52009-04-22 20:20:00 +00005378
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005379void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005380{
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005381 bs->guest_block_size = align;
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005382}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005383
aliguorie268ca52009-04-22 20:20:00 +00005384void *qemu_blockalign(BlockDriverState *bs, size_t size)
5385{
Kevin Wolf339064d2013-11-28 10:23:32 +01005386 return qemu_memalign(bdrv_opt_mem_align(bs), size);
aliguorie268ca52009-04-22 20:20:00 +00005387}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005388
Max Reitz9ebd8442014-10-22 14:09:27 +02005389void *qemu_blockalign0(BlockDriverState *bs, size_t size)
5390{
5391 return memset(qemu_blockalign(bs, size), 0, size);
5392}
5393
Kevin Wolf7d2a35c2014-05-20 12:24:05 +02005394void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
5395{
5396 size_t align = bdrv_opt_mem_align(bs);
5397
5398 /* Ensure that NULL is never returned on success */
5399 assert(align > 0);
5400 if (size == 0) {
5401 size = align;
5402 }
5403
5404 return qemu_try_memalign(align, size);
5405}
5406
Max Reitz9ebd8442014-10-22 14:09:27 +02005407void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
5408{
5409 void *mem = qemu_try_blockalign(bs, size);
5410
5411 if (mem) {
5412 memset(mem, 0, size);
5413 }
5414
5415 return mem;
5416}
5417
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005418/*
5419 * Check if all memory in this vector is sector aligned.
5420 */
5421bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
5422{
5423 int i;
Kevin Wolf339064d2013-11-28 10:23:32 +01005424 size_t alignment = bdrv_opt_mem_align(bs);
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005425
5426 for (i = 0; i < qiov->niov; i++) {
Kevin Wolf339064d2013-11-28 10:23:32 +01005427 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005428 return false;
5429 }
Kevin Wolf339064d2013-11-28 10:23:32 +01005430 if (qiov->iov[i].iov_len % alignment) {
Kevin Wolf1ff735b2013-12-05 13:01:46 +01005431 return false;
5432 }
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005433 }
5434
5435 return true;
5436}
5437
Fam Zhengb8afb522014-04-16 09:34:30 +08005438BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity,
5439 Error **errp)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005440{
5441 int64_t bitmap_size;
Fam Zhenge4654d22013-11-13 18:29:43 +08005442 BdrvDirtyBitmap *bitmap;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005443
Paolo Bonzini50717e92013-01-21 17:09:45 +01005444 assert((granularity & (granularity - 1)) == 0);
5445
Fam Zhenge4654d22013-11-13 18:29:43 +08005446 granularity >>= BDRV_SECTOR_BITS;
5447 assert(granularity);
Markus Armbruster57322b72014-06-26 13:23:22 +02005448 bitmap_size = bdrv_nb_sectors(bs);
Fam Zhengb8afb522014-04-16 09:34:30 +08005449 if (bitmap_size < 0) {
5450 error_setg_errno(errp, -bitmap_size, "could not get length of device");
5451 errno = -bitmap_size;
5452 return NULL;
5453 }
Markus Armbruster5839e532014-08-19 10:31:08 +02005454 bitmap = g_new0(BdrvDirtyBitmap, 1);
Fam Zhenge4654d22013-11-13 18:29:43 +08005455 bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
5456 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
5457 return bitmap;
5458}
5459
5460void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5461{
5462 BdrvDirtyBitmap *bm, *next;
5463 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
5464 if (bm == bitmap) {
5465 QLIST_REMOVE(bitmap, list);
5466 hbitmap_free(bitmap->bitmap);
5467 g_free(bitmap);
5468 return;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005469 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005470 }
5471}
5472
Fam Zheng21b56832013-11-13 18:29:44 +08005473BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
5474{
5475 BdrvDirtyBitmap *bm;
5476 BlockDirtyInfoList *list = NULL;
5477 BlockDirtyInfoList **plist = &list;
5478
5479 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
Markus Armbruster5839e532014-08-19 10:31:08 +02005480 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
5481 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
Fam Zheng21b56832013-11-13 18:29:44 +08005482 info->count = bdrv_get_dirty_count(bs, bm);
5483 info->granularity =
5484 ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bm->bitmap));
5485 entry->value = info;
5486 *plist = entry;
5487 plist = &entry->next;
5488 }
5489
5490 return list;
5491}
5492
Fam Zhenge4654d22013-11-13 18:29:43 +08005493int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005494{
Fam Zhenge4654d22013-11-13 18:29:43 +08005495 if (bitmap) {
5496 return hbitmap_get(bitmap->bitmap, sector);
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005497 } else {
5498 return 0;
5499 }
5500}
5501
Fam Zhenge4654d22013-11-13 18:29:43 +08005502void bdrv_dirty_iter_init(BlockDriverState *bs,
5503 BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
Paolo Bonzini1755da12012-10-18 16:49:18 +02005504{
Fam Zhenge4654d22013-11-13 18:29:43 +08005505 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
Paolo Bonzini1755da12012-10-18 16:49:18 +02005506}
5507
Vladimir Sementsov-Ogievskiyc4237df2014-11-27 12:40:46 +03005508void bdrv_set_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
5509 int64_t cur_sector, int nr_sectors)
5510{
5511 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
5512}
5513
5514void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
5515 int64_t cur_sector, int nr_sectors)
5516{
5517 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5518}
5519
5520static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
5521 int nr_sectors)
Paolo Bonzini1755da12012-10-18 16:49:18 +02005522{
Fam Zhenge4654d22013-11-13 18:29:43 +08005523 BdrvDirtyBitmap *bitmap;
5524 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5525 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
Paolo Bonzini8f0720e2013-01-21 17:09:41 +01005526 }
Liran Schouraaa0eb72010-01-26 10:31:48 +02005527}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005528
Vladimir Sementsov-Ogievskiyc4237df2014-11-27 12:40:46 +03005529static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
5530 int nr_sectors)
Fam Zhenge4654d22013-11-13 18:29:43 +08005531{
5532 BdrvDirtyBitmap *bitmap;
5533 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5534 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5535 }
5536}
5537
5538int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5539{
5540 return hbitmap_count(bitmap->bitmap);
5541}
5542
Fam Zheng9fcb0252013-08-23 09:14:46 +08005543/* Get a reference to bs */
5544void bdrv_ref(BlockDriverState *bs)
5545{
5546 bs->refcnt++;
5547}
5548
5549/* Release a previously grabbed reference to bs.
5550 * If after releasing, reference count is zero, the BlockDriverState is
5551 * deleted. */
5552void bdrv_unref(BlockDriverState *bs)
5553{
Jeff Cody9a4d5ca2014-07-23 17:22:57 -04005554 if (!bs) {
5555 return;
5556 }
Fam Zheng9fcb0252013-08-23 09:14:46 +08005557 assert(bs->refcnt > 0);
5558 if (--bs->refcnt == 0) {
5559 bdrv_delete(bs);
5560 }
5561}
5562
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005563struct BdrvOpBlocker {
5564 Error *reason;
5565 QLIST_ENTRY(BdrvOpBlocker) list;
5566};
5567
5568bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
5569{
5570 BdrvOpBlocker *blocker;
5571 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5572 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
5573 blocker = QLIST_FIRST(&bs->op_blockers[op]);
5574 if (errp) {
5575 error_setg(errp, "Device '%s' is busy: %s",
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02005576 bdrv_get_device_name(bs),
5577 error_get_pretty(blocker->reason));
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005578 }
5579 return true;
5580 }
5581 return false;
5582}
5583
5584void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
5585{
5586 BdrvOpBlocker *blocker;
5587 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5588
Markus Armbruster5839e532014-08-19 10:31:08 +02005589 blocker = g_new0(BdrvOpBlocker, 1);
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005590 blocker->reason = reason;
5591 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
5592}
5593
5594void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
5595{
5596 BdrvOpBlocker *blocker, *next;
5597 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5598 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
5599 if (blocker->reason == reason) {
5600 QLIST_REMOVE(blocker, list);
5601 g_free(blocker);
5602 }
5603 }
5604}
5605
5606void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
5607{
5608 int i;
5609 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5610 bdrv_op_block(bs, i, reason);
5611 }
5612}
5613
5614void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
5615{
5616 int i;
5617 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5618 bdrv_op_unblock(bs, i, reason);
5619 }
5620}
5621
5622bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
5623{
5624 int i;
5625
5626 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5627 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
5628 return false;
5629 }
5630 }
5631 return true;
5632}
5633
Luiz Capitulino28a72822011-09-26 17:43:50 -03005634void bdrv_iostatus_enable(BlockDriverState *bs)
5635{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005636 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005637 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005638}
5639
5640/* The I/O status is only enabled if the drive explicitly
5641 * enables it _and_ the VM is configured to stop on errors */
5642bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
5643{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005644 return (bs->iostatus_enabled &&
Paolo Bonzini92aa5c62012-09-28 17:22:55 +02005645 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
5646 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
5647 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
Luiz Capitulino28a72822011-09-26 17:43:50 -03005648}
5649
5650void bdrv_iostatus_disable(BlockDriverState *bs)
5651{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005652 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005653}
5654
5655void bdrv_iostatus_reset(BlockDriverState *bs)
5656{
5657 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005658 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Paolo Bonzini3bd293c2012-10-18 16:49:27 +02005659 if (bs->job) {
5660 block_job_iostatus_reset(bs->job);
5661 }
Luiz Capitulino28a72822011-09-26 17:43:50 -03005662 }
5663}
5664
Luiz Capitulino28a72822011-09-26 17:43:50 -03005665void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
5666{
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02005667 assert(bdrv_iostatus_is_enabled(bs));
5668 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005669 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
5670 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005671 }
5672}
5673
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005674void bdrv_img_create(const char *filename, const char *fmt,
5675 const char *base_filename, const char *base_fmt,
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005676 char *options, uint64_t img_size, int flags,
5677 Error **errp, bool quiet)
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005678{
Chunyan Liu83d05212014-06-05 17:20:51 +08005679 QemuOptsList *create_opts = NULL;
5680 QemuOpts *opts = NULL;
5681 const char *backing_fmt, *backing_file;
5682 int64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005683 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005684 BlockDriver *backing_drv = NULL;
Max Reitzcc84d902013-09-06 17:14:26 +02005685 Error *local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005686 int ret = 0;
5687
5688 /* Find driver and parse its options */
5689 drv = bdrv_find_format(fmt);
5690 if (!drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005691 error_setg(errp, "Unknown file format '%s'", fmt);
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005692 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005693 }
5694
Max Reitzb65a5e12015-02-05 13:58:12 -05005695 proto_drv = bdrv_find_protocol(filename, true, errp);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005696 if (!proto_drv) {
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005697 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005698 }
5699
Max Reitzc6149722014-12-02 18:32:45 +01005700 if (!drv->create_opts) {
5701 error_setg(errp, "Format driver '%s' does not support image creation",
5702 drv->format_name);
5703 return;
5704 }
5705
5706 if (!proto_drv->create_opts) {
5707 error_setg(errp, "Protocol driver '%s' does not support image creation",
5708 proto_drv->format_name);
5709 return;
5710 }
5711
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005712 create_opts = qemu_opts_append(create_opts, drv->create_opts);
5713 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005714
5715 /* Create parameter list with default values */
Chunyan Liu83d05212014-06-05 17:20:51 +08005716 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
Markus Armbruster39101f22015-02-12 16:46:36 +01005717 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005718
5719 /* Parse -o options */
5720 if (options) {
Markus Armbrusterdc523cd342015-02-12 18:37:11 +01005721 qemu_opts_do_parse(opts, options, NULL, &local_err);
5722 if (local_err) {
5723 error_report_err(local_err);
5724 local_err = NULL;
Chunyan Liu83d05212014-06-05 17:20:51 +08005725 error_setg(errp, "Invalid options for file format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005726 goto out;
5727 }
5728 }
5729
5730 if (base_filename) {
Markus Armbrusterf43e47d2015-02-12 17:52:20 +01005731 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
Markus Armbruster6be41942015-02-12 17:49:02 +01005732 if (local_err) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005733 error_setg(errp, "Backing file not supported for file format '%s'",
5734 fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005735 goto out;
5736 }
5737 }
5738
5739 if (base_fmt) {
Markus Armbrusterf43e47d2015-02-12 17:52:20 +01005740 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
Markus Armbruster6be41942015-02-12 17:49:02 +01005741 if (local_err) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005742 error_setg(errp, "Backing file format not supported for file "
5743 "format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005744 goto out;
5745 }
5746 }
5747
Chunyan Liu83d05212014-06-05 17:20:51 +08005748 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
5749 if (backing_file) {
5750 if (!strcmp(filename, backing_file)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005751 error_setg(errp, "Error: Trying to create an image with the "
5752 "same filename as the backing file");
Jes Sorensen792da932010-12-16 13:52:17 +01005753 goto out;
5754 }
5755 }
5756
Chunyan Liu83d05212014-06-05 17:20:51 +08005757 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
5758 if (backing_fmt) {
5759 backing_drv = bdrv_find_format(backing_fmt);
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005760 if (!backing_drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005761 error_setg(errp, "Unknown backing file format '%s'",
Chunyan Liu83d05212014-06-05 17:20:51 +08005762 backing_fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005763 goto out;
5764 }
5765 }
5766
5767 // The size for the image must always be specified, with one exception:
5768 // If we are using a backing file, we can obtain the size from there
Chunyan Liu83d05212014-06-05 17:20:51 +08005769 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
5770 if (size == -1) {
5771 if (backing_file) {
Max Reitz66f6b812013-12-03 14:57:52 +01005772 BlockDriverState *bs;
Max Reitz29168012014-11-26 17:20:27 +01005773 char *full_backing = g_new0(char, PATH_MAX);
Markus Armbruster52bf1e72014-06-26 13:23:25 +02005774 int64_t size;
Paolo Bonzini63090da2012-04-12 14:01:03 +02005775 int back_flags;
5776
Max Reitz29168012014-11-26 17:20:27 +01005777 bdrv_get_full_backing_filename_from_filename(filename, backing_file,
5778 full_backing, PATH_MAX,
5779 &local_err);
5780 if (local_err) {
5781 g_free(full_backing);
5782 goto out;
5783 }
5784
Paolo Bonzini63090da2012-04-12 14:01:03 +02005785 /* backing files always opened read-only */
5786 back_flags =
5787 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005788
Max Reitzf67503e2014-02-18 18:33:05 +01005789 bs = NULL;
Max Reitz29168012014-11-26 17:20:27 +01005790 ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags,
Max Reitzcc84d902013-09-06 17:14:26 +02005791 backing_drv, &local_err);
Max Reitz29168012014-11-26 17:20:27 +01005792 g_free(full_backing);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005793 if (ret < 0) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005794 goto out;
5795 }
Markus Armbruster52bf1e72014-06-26 13:23:25 +02005796 size = bdrv_getlength(bs);
5797 if (size < 0) {
5798 error_setg_errno(errp, -size, "Could not get size of '%s'",
5799 backing_file);
5800 bdrv_unref(bs);
5801 goto out;
5802 }
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005803
Markus Armbruster39101f22015-02-12 16:46:36 +01005804 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
Max Reitz66f6b812013-12-03 14:57:52 +01005805
5806 bdrv_unref(bs);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005807 } else {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005808 error_setg(errp, "Image creation needs a size parameter");
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005809 goto out;
5810 }
5811 }
5812
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005813 if (!quiet) {
Fam Zheng43c5d8f2014-12-09 15:38:04 +08005814 printf("Formatting '%s', fmt=%s", filename, fmt);
5815 qemu_opts_print(opts, " ");
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005816 puts("");
5817 }
Chunyan Liu83d05212014-06-05 17:20:51 +08005818
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005819 ret = bdrv_create(drv, filename, opts, &local_err);
Chunyan Liu83d05212014-06-05 17:20:51 +08005820
Max Reitzcc84d902013-09-06 17:14:26 +02005821 if (ret == -EFBIG) {
5822 /* This is generally a better message than whatever the driver would
5823 * deliver (especially because of the cluster_size_hint), since that
5824 * is most probably not much different from "image too large". */
5825 const char *cluster_size_hint = "";
Chunyan Liu83d05212014-06-05 17:20:51 +08005826 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
Max Reitzcc84d902013-09-06 17:14:26 +02005827 cluster_size_hint = " (try using a larger cluster size)";
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005828 }
Max Reitzcc84d902013-09-06 17:14:26 +02005829 error_setg(errp, "The image size is too large for file format '%s'"
5830 "%s", fmt, cluster_size_hint);
5831 error_free(local_err);
5832 local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005833 }
5834
5835out:
Chunyan Liu83d05212014-06-05 17:20:51 +08005836 qemu_opts_del(opts);
5837 qemu_opts_free(create_opts);
Markus Armbruster84d18f02014-01-30 15:07:28 +01005838 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +02005839 error_propagate(errp, local_err);
5840 }
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005841}
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01005842
5843AioContext *bdrv_get_aio_context(BlockDriverState *bs)
5844{
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005845 return bs->aio_context;
5846}
5847
5848void bdrv_detach_aio_context(BlockDriverState *bs)
5849{
Max Reitz33384422014-06-20 21:57:33 +02005850 BdrvAioNotifier *baf;
5851
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005852 if (!bs->drv) {
5853 return;
5854 }
5855
Max Reitz33384422014-06-20 21:57:33 +02005856 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
5857 baf->detach_aio_context(baf->opaque);
5858 }
5859
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +02005860 if (bs->io_limits_enabled) {
5861 throttle_detach_aio_context(&bs->throttle_state);
5862 }
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005863 if (bs->drv->bdrv_detach_aio_context) {
5864 bs->drv->bdrv_detach_aio_context(bs);
5865 }
5866 if (bs->file) {
5867 bdrv_detach_aio_context(bs->file);
5868 }
5869 if (bs->backing_hd) {
5870 bdrv_detach_aio_context(bs->backing_hd);
5871 }
5872
5873 bs->aio_context = NULL;
5874}
5875
5876void bdrv_attach_aio_context(BlockDriverState *bs,
5877 AioContext *new_context)
5878{
Max Reitz33384422014-06-20 21:57:33 +02005879 BdrvAioNotifier *ban;
5880
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005881 if (!bs->drv) {
5882 return;
5883 }
5884
5885 bs->aio_context = new_context;
5886
5887 if (bs->backing_hd) {
5888 bdrv_attach_aio_context(bs->backing_hd, new_context);
5889 }
5890 if (bs->file) {
5891 bdrv_attach_aio_context(bs->file, new_context);
5892 }
5893 if (bs->drv->bdrv_attach_aio_context) {
5894 bs->drv->bdrv_attach_aio_context(bs, new_context);
5895 }
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +02005896 if (bs->io_limits_enabled) {
5897 throttle_attach_aio_context(&bs->throttle_state, new_context);
5898 }
Max Reitz33384422014-06-20 21:57:33 +02005899
5900 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
5901 ban->attached_aio_context(new_context, ban->opaque);
5902 }
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005903}
5904
5905void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
5906{
5907 bdrv_drain_all(); /* ensure there are no in-flight requests */
5908
5909 bdrv_detach_aio_context(bs);
5910
5911 /* This function executes in the old AioContext so acquire the new one in
5912 * case it runs in a different thread.
5913 */
5914 aio_context_acquire(new_context);
5915 bdrv_attach_aio_context(bs, new_context);
5916 aio_context_release(new_context);
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01005917}
Stefan Hajnoczid616b222013-06-24 17:13:10 +02005918
Max Reitz33384422014-06-20 21:57:33 +02005919void bdrv_add_aio_context_notifier(BlockDriverState *bs,
5920 void (*attached_aio_context)(AioContext *new_context, void *opaque),
5921 void (*detach_aio_context)(void *opaque), void *opaque)
5922{
5923 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
5924 *ban = (BdrvAioNotifier){
5925 .attached_aio_context = attached_aio_context,
5926 .detach_aio_context = detach_aio_context,
5927 .opaque = opaque
5928 };
5929
5930 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
5931}
5932
5933void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
5934 void (*attached_aio_context)(AioContext *,
5935 void *),
5936 void (*detach_aio_context)(void *),
5937 void *opaque)
5938{
5939 BdrvAioNotifier *ban, *ban_next;
5940
5941 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
5942 if (ban->attached_aio_context == attached_aio_context &&
5943 ban->detach_aio_context == detach_aio_context &&
5944 ban->opaque == opaque)
5945 {
5946 QLIST_REMOVE(ban, list);
5947 g_free(ban);
5948
5949 return;
5950 }
5951 }
5952
5953 abort();
5954}
5955
Stefan Hajnoczid616b222013-06-24 17:13:10 +02005956void bdrv_add_before_write_notifier(BlockDriverState *bs,
5957 NotifierWithReturn *notifier)
5958{
5959 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
5960}
Max Reitz6f176b42013-09-03 10:09:50 +02005961
Max Reitz77485432014-10-27 11:12:50 +01005962int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
5963 BlockDriverAmendStatusCB *status_cb)
Max Reitz6f176b42013-09-03 10:09:50 +02005964{
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005965 if (!bs->drv->bdrv_amend_options) {
Max Reitz6f176b42013-09-03 10:09:50 +02005966 return -ENOTSUP;
5967 }
Max Reitz77485432014-10-27 11:12:50 +01005968 return bs->drv->bdrv_amend_options(bs, opts, status_cb);
Max Reitz6f176b42013-09-03 10:09:50 +02005969}
Benoît Canetf6186f42013-10-02 14:33:48 +02005970
Benoît Canetb5042a32014-03-03 19:11:34 +01005971/* This function will be called by the bdrv_recurse_is_first_non_filter method
5972 * of block filter and by bdrv_is_first_non_filter.
5973 * It is used to test if the given bs is the candidate or recurse more in the
5974 * node graph.
Benoît Canet212a5a82014-01-23 21:31:36 +01005975 */
Benoît Canet212a5a82014-01-23 21:31:36 +01005976bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
5977 BlockDriverState *candidate)
Benoît Canetf6186f42013-10-02 14:33:48 +02005978{
Benoît Canetb5042a32014-03-03 19:11:34 +01005979 /* return false if basic checks fails */
5980 if (!bs || !bs->drv) {
5981 return false;
5982 }
5983
5984 /* the code reached a non block filter driver -> check if the bs is
5985 * the same as the candidate. It's the recursion termination condition.
5986 */
5987 if (!bs->drv->is_filter) {
5988 return bs == candidate;
5989 }
5990 /* Down this path the driver is a block filter driver */
5991
5992 /* If the block filter recursion method is defined use it to recurse down
5993 * the node graph.
5994 */
5995 if (bs->drv->bdrv_recurse_is_first_non_filter) {
Benoît Canet212a5a82014-01-23 21:31:36 +01005996 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
5997 }
5998
Benoît Canetb5042a32014-03-03 19:11:34 +01005999 /* the driver is a block filter but don't allow to recurse -> return false
6000 */
6001 return false;
Benoît Canet212a5a82014-01-23 21:31:36 +01006002}
6003
6004/* This function checks if the candidate is the first non filter bs down it's
6005 * bs chain. Since we don't have pointers to parents it explore all bs chains
6006 * from the top. Some filters can choose not to pass down the recursion.
6007 */
6008bool bdrv_is_first_non_filter(BlockDriverState *candidate)
6009{
6010 BlockDriverState *bs;
6011
6012 /* walk down the bs forest recursively */
6013 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
6014 bool perm;
6015
Benoît Canetb5042a32014-03-03 19:11:34 +01006016 /* try to recurse in this top level bs */
Kevin Wolfe6dc8a12014-02-04 11:45:31 +01006017 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
Benoît Canet212a5a82014-01-23 21:31:36 +01006018
6019 /* candidate is the first non filter */
6020 if (perm) {
6021 return true;
6022 }
6023 }
6024
6025 return false;
Benoît Canetf6186f42013-10-02 14:33:48 +02006026}
Benoît Canet09158f02014-06-27 18:25:25 +02006027
6028BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
6029{
6030 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006031 AioContext *aio_context;
6032
Benoît Canet09158f02014-06-27 18:25:25 +02006033 if (!to_replace_bs) {
6034 error_setg(errp, "Node name '%s' not found", node_name);
6035 return NULL;
6036 }
6037
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006038 aio_context = bdrv_get_aio_context(to_replace_bs);
6039 aio_context_acquire(aio_context);
6040
Benoît Canet09158f02014-06-27 18:25:25 +02006041 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006042 to_replace_bs = NULL;
6043 goto out;
Benoît Canet09158f02014-06-27 18:25:25 +02006044 }
6045
6046 /* We don't want arbitrary node of the BDS chain to be replaced only the top
6047 * most non filter in order to prevent data corruption.
6048 * Another benefit is that this tests exclude backing files which are
6049 * blocked by the backing blockers.
6050 */
6051 if (!bdrv_is_first_non_filter(to_replace_bs)) {
6052 error_setg(errp, "Only top most non filter can be replaced");
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006053 to_replace_bs = NULL;
6054 goto out;
Benoît Canet09158f02014-06-27 18:25:25 +02006055 }
6056
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006057out:
6058 aio_context_release(aio_context);
Benoît Canet09158f02014-06-27 18:25:25 +02006059 return to_replace_bs;
6060}
Ming Lei448ad912014-07-04 18:04:33 +08006061
6062void bdrv_io_plug(BlockDriverState *bs)
6063{
6064 BlockDriver *drv = bs->drv;
6065 if (drv && drv->bdrv_io_plug) {
6066 drv->bdrv_io_plug(bs);
6067 } else if (bs->file) {
6068 bdrv_io_plug(bs->file);
6069 }
6070}
6071
6072void bdrv_io_unplug(BlockDriverState *bs)
6073{
6074 BlockDriver *drv = bs->drv;
6075 if (drv && drv->bdrv_io_unplug) {
6076 drv->bdrv_io_unplug(bs);
6077 } else if (bs->file) {
6078 bdrv_io_unplug(bs->file);
6079 }
6080}
6081
6082void bdrv_flush_io_queue(BlockDriverState *bs)
6083{
6084 BlockDriver *drv = bs->drv;
6085 if (drv && drv->bdrv_flush_io_queue) {
6086 drv->bdrv_flush_io_queue(bs);
6087 } else if (bs->file) {
6088 bdrv_flush_io_queue(bs->file);
6089 }
6090}
Max Reitz91af7012014-07-18 20:24:56 +02006091
6092static bool append_open_options(QDict *d, BlockDriverState *bs)
6093{
6094 const QDictEntry *entry;
6095 bool found_any = false;
6096
6097 for (entry = qdict_first(bs->options); entry;
6098 entry = qdict_next(bs->options, entry))
6099 {
6100 /* Only take options for this level and exclude all non-driver-specific
6101 * options */
6102 if (!strchr(qdict_entry_key(entry), '.') &&
6103 strcmp(qdict_entry_key(entry), "node-name"))
6104 {
6105 qobject_incref(qdict_entry_value(entry));
6106 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
6107 found_any = true;
6108 }
6109 }
6110
6111 return found_any;
6112}
6113
6114/* Updates the following BDS fields:
6115 * - exact_filename: A filename which may be used for opening a block device
6116 * which (mostly) equals the given BDS (even without any
6117 * other options; so reading and writing must return the same
6118 * results, but caching etc. may be different)
6119 * - full_open_options: Options which, when given when opening a block device
6120 * (without a filename), result in a BDS (mostly)
6121 * equalling the given one
6122 * - filename: If exact_filename is set, it is copied here. Otherwise,
6123 * full_open_options is converted to a JSON object, prefixed with
6124 * "json:" (for use through the JSON pseudo protocol) and put here.
6125 */
6126void bdrv_refresh_filename(BlockDriverState *bs)
6127{
6128 BlockDriver *drv = bs->drv;
6129 QDict *opts;
6130
6131 if (!drv) {
6132 return;
6133 }
6134
6135 /* This BDS's file name will most probably depend on its file's name, so
6136 * refresh that first */
6137 if (bs->file) {
6138 bdrv_refresh_filename(bs->file);
6139 }
6140
6141 if (drv->bdrv_refresh_filename) {
6142 /* Obsolete information is of no use here, so drop the old file name
6143 * information before refreshing it */
6144 bs->exact_filename[0] = '\0';
6145 if (bs->full_open_options) {
6146 QDECREF(bs->full_open_options);
6147 bs->full_open_options = NULL;
6148 }
6149
6150 drv->bdrv_refresh_filename(bs);
6151 } else if (bs->file) {
6152 /* Try to reconstruct valid information from the underlying file */
6153 bool has_open_options;
6154
6155 bs->exact_filename[0] = '\0';
6156 if (bs->full_open_options) {
6157 QDECREF(bs->full_open_options);
6158 bs->full_open_options = NULL;
6159 }
6160
6161 opts = qdict_new();
6162 has_open_options = append_open_options(opts, bs);
6163
6164 /* If no specific options have been given for this BDS, the filename of
6165 * the underlying file should suffice for this one as well */
6166 if (bs->file->exact_filename[0] && !has_open_options) {
6167 strcpy(bs->exact_filename, bs->file->exact_filename);
6168 }
6169 /* Reconstructing the full options QDict is simple for most format block
6170 * drivers, as long as the full options are known for the underlying
6171 * file BDS. The full options QDict of that file BDS should somehow
6172 * contain a representation of the filename, therefore the following
6173 * suffices without querying the (exact_)filename of this BDS. */
6174 if (bs->file->full_open_options) {
6175 qdict_put_obj(opts, "driver",
6176 QOBJECT(qstring_from_str(drv->format_name)));
6177 QINCREF(bs->file->full_open_options);
6178 qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
6179
6180 bs->full_open_options = opts;
6181 } else {
6182 QDECREF(opts);
6183 }
6184 } else if (!bs->full_open_options && qdict_size(bs->options)) {
6185 /* There is no underlying file BDS (at least referenced by BDS.file),
6186 * so the full options QDict should be equal to the options given
6187 * specifically for this block device when it was opened (plus the
6188 * driver specification).
6189 * Because those options don't change, there is no need to update
6190 * full_open_options when it's already set. */
6191
6192 opts = qdict_new();
6193 append_open_options(opts, bs);
6194 qdict_put_obj(opts, "driver",
6195 QOBJECT(qstring_from_str(drv->format_name)));
6196
6197 if (bs->exact_filename[0]) {
6198 /* This may not work for all block protocol drivers (some may
6199 * require this filename to be parsed), but we have to find some
6200 * default solution here, so just include it. If some block driver
6201 * does not support pure options without any filename at all or
6202 * needs some special format of the options QDict, it needs to
6203 * implement the driver-specific bdrv_refresh_filename() function.
6204 */
6205 qdict_put_obj(opts, "filename",
6206 QOBJECT(qstring_from_str(bs->exact_filename)));
6207 }
6208
6209 bs->full_open_options = opts;
6210 }
6211
6212 if (bs->exact_filename[0]) {
6213 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
6214 } else if (bs->full_open_options) {
6215 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
6216 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
6217 qstring_get_str(json));
6218 QDECREF(json);
6219 }
6220}
Benoît Canet5366d0c2014-09-05 15:46:18 +02006221
6222/* This accessor function purpose is to allow the device models to access the
6223 * BlockAcctStats structure embedded inside a BlockDriverState without being
6224 * aware of the BlockDriverState structure layout.
6225 * It will go away when the BlockAcctStats structure will be moved inside
6226 * the device models.
6227 */
6228BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
6229{
6230 return &bs->stats;
6231}