blob: 27533f3a8125508a5506f2f189318719a11735f2 [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010027#include "block/block_int.h"
28#include "block/blockjob.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010029#include "qemu/module.h"
Paolo Bonzini7b1b5d12012-12-17 18:19:43 +010030#include "qapi/qmp/qjson.h"
Paolo Bonzini9c17d612012-12-17 18:20:04 +010031#include "sysemu/sysemu.h"
Markus Armbruster3ae59582014-09-12 21:26:22 +020032#include "sysemu/blockdev.h" /* FIXME layering violation */
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010033#include "qemu/notify.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010034#include "block/coroutine.h"
Benoît Canetc13163f2014-01-23 21:31:34 +010035#include "block/qapi.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030036#include "qmp-commands.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010037#include "qemu/timer.h"
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +020038#include "qapi-event.h"
bellardfc01f7e2003-06-30 10:03:06 +000039
Juan Quintela71e72a12009-07-27 16:12:56 +020040#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000041#include <sys/types.h>
42#include <sys/stat.h>
43#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000044#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000045#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000046#include <sys/disk.h>
47#endif
blueswir1c5e97232009-03-07 20:06:23 +000048#endif
bellard7674e7b2005-04-26 21:59:26 +000049
aliguori49dc7682009-03-08 16:26:59 +000050#ifdef _WIN32
51#include <windows.h>
52#endif
53
Fam Zhenge4654d22013-11-13 18:29:43 +080054struct BdrvDirtyBitmap {
55 HBitmap *bitmap;
56 QLIST_ENTRY(BdrvDirtyBitmap) list;
57};
58
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010059#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
60
Stefan Hajnoczi2a871512014-07-07 15:15:53 +020061#define COROUTINE_POOL_RESERVATION 64 /* number of coroutines to reserve */
62
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +020063static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
aliguorif141eaf2009-04-07 18:43:24 +000064static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
65 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
aliguoric87c0672009-04-07 18:43:20 +000066 BlockDriverCompletionFunc *cb, void *opaque);
aliguorif141eaf2009-04-07 18:43:24 +000067static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
68 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +000069 BlockDriverCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020070static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
71 int64_t sector_num, int nb_sectors,
72 QEMUIOVector *iov);
73static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
74 int64_t sector_num, int nb_sectors,
75 QEMUIOVector *iov);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010076static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
77 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000078 BdrvRequestFlags flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010079static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
80 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000081 BdrvRequestFlags flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010082static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
83 int64_t sector_num,
84 QEMUIOVector *qiov,
85 int nb_sectors,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +010086 BdrvRequestFlags flags,
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010087 BlockDriverCompletionFunc *cb,
88 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +010089 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010090static void coroutine_fn bdrv_co_do_rw(void *opaque);
Kevin Wolf621f0582012-03-20 15:12:58 +010091static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +020092 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
bellardec530c82006-04-25 22:36:06 +000093
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010094static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
95 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000096
Benoît Canetdc364f42014-01-23 21:31:32 +010097static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
98 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
99
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100100static QLIST_HEAD(, BlockDriver) bdrv_drivers =
101 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +0000102
Markus Armbrustereb852012009-10-27 18:41:44 +0100103/* If non-zero, use only whitelisted block drivers */
104static int use_bdrv_whitelist;
105
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000106#ifdef _WIN32
107static int is_windows_drive_prefix(const char *filename)
108{
109 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
110 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
111 filename[1] == ':');
112}
113
114int is_windows_drive(const char *filename)
115{
116 if (is_windows_drive_prefix(filename) &&
117 filename[2] == '\0')
118 return 1;
119 if (strstart(filename, "\\\\.\\", NULL) ||
120 strstart(filename, "//./", NULL))
121 return 1;
122 return 0;
123}
124#endif
125
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800126/* throttling disk I/O limits */
Benoît Canetcc0681c2013-09-02 14:14:39 +0200127void bdrv_set_io_limits(BlockDriverState *bs,
128 ThrottleConfig *cfg)
129{
130 int i;
131
132 throttle_config(&bs->throttle_state, cfg);
133
134 for (i = 0; i < 2; i++) {
135 qemu_co_enter_next(&bs->throttled_reqs[i]);
136 }
137}
138
139/* this function drain all the throttled IOs */
140static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
141{
142 bool drained = false;
143 bool enabled = bs->io_limits_enabled;
144 int i;
145
146 bs->io_limits_enabled = false;
147
148 for (i = 0; i < 2; i++) {
149 while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
150 drained = true;
151 }
152 }
153
154 bs->io_limits_enabled = enabled;
155
156 return drained;
157}
158
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800159void bdrv_io_limits_disable(BlockDriverState *bs)
160{
161 bs->io_limits_enabled = false;
162
Benoît Canetcc0681c2013-09-02 14:14:39 +0200163 bdrv_start_throttled_reqs(bs);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800164
Benoît Canetcc0681c2013-09-02 14:14:39 +0200165 throttle_destroy(&bs->throttle_state);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800166}
167
Benoît Canetcc0681c2013-09-02 14:14:39 +0200168static void bdrv_throttle_read_timer_cb(void *opaque)
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800169{
170 BlockDriverState *bs = opaque;
Benoît Canetcc0681c2013-09-02 14:14:39 +0200171 qemu_co_enter_next(&bs->throttled_reqs[0]);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800172}
173
Benoît Canetcc0681c2013-09-02 14:14:39 +0200174static void bdrv_throttle_write_timer_cb(void *opaque)
175{
176 BlockDriverState *bs = opaque;
177 qemu_co_enter_next(&bs->throttled_reqs[1]);
178}
179
180/* should be called before bdrv_set_io_limits if a limit is set */
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800181void bdrv_io_limits_enable(BlockDriverState *bs)
182{
Benoît Canetcc0681c2013-09-02 14:14:39 +0200183 assert(!bs->io_limits_enabled);
184 throttle_init(&bs->throttle_state,
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +0200185 bdrv_get_aio_context(bs),
Benoît Canetcc0681c2013-09-02 14:14:39 +0200186 QEMU_CLOCK_VIRTUAL,
187 bdrv_throttle_read_timer_cb,
188 bdrv_throttle_write_timer_cb,
189 bs);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800190 bs->io_limits_enabled = true;
191}
192
Benoît Canetcc0681c2013-09-02 14:14:39 +0200193/* This function makes an IO wait if needed
194 *
195 * @nb_sectors: the number of sectors of the IO
196 * @is_write: is the IO a write
197 */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800198static void bdrv_io_limits_intercept(BlockDriverState *bs,
Kevin Wolfd5103582014-01-16 13:29:10 +0100199 unsigned int bytes,
Benoît Canetcc0681c2013-09-02 14:14:39 +0200200 bool is_write)
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800201{
Benoît Canetcc0681c2013-09-02 14:14:39 +0200202 /* does this io must wait */
203 bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800204
Benoît Canetcc0681c2013-09-02 14:14:39 +0200205 /* if must wait or any request of this type throttled queue the IO */
206 if (must_wait ||
207 !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
208 qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800209 }
210
Benoît Canetcc0681c2013-09-02 14:14:39 +0200211 /* the IO will be executed, do the accounting */
Kevin Wolfd5103582014-01-16 13:29:10 +0100212 throttle_account(&bs->throttle_state, is_write, bytes);
213
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800214
Benoît Canetcc0681c2013-09-02 14:14:39 +0200215 /* if the next request must wait -> do nothing */
216 if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
217 return;
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800218 }
219
Benoît Canetcc0681c2013-09-02 14:14:39 +0200220 /* else queue next request for execution */
221 qemu_co_queue_next(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800222}
223
Kevin Wolf339064d2013-11-28 10:23:32 +0100224size_t bdrv_opt_mem_align(BlockDriverState *bs)
225{
226 if (!bs || !bs->drv) {
227 /* 4k should be on the safe side */
228 return 4096;
229 }
230
231 return bs->bl.opt_mem_alignment;
232}
233
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000234/* check if the path starts with "<protocol>:" */
235static int path_has_protocol(const char *path)
236{
Paolo Bonzini947995c2012-05-08 16:51:48 +0200237 const char *p;
238
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000239#ifdef _WIN32
240 if (is_windows_drive(path) ||
241 is_windows_drive_prefix(path)) {
242 return 0;
243 }
Paolo Bonzini947995c2012-05-08 16:51:48 +0200244 p = path + strcspn(path, ":/\\");
245#else
246 p = path + strcspn(path, ":/");
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000247#endif
248
Paolo Bonzini947995c2012-05-08 16:51:48 +0200249 return *p == ':';
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000250}
251
bellard83f64092006-08-01 16:21:11 +0000252int path_is_absolute(const char *path)
253{
bellard21664422007-01-07 18:22:37 +0000254#ifdef _WIN32
255 /* specific case for names like: "\\.\d:" */
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200256 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
bellard21664422007-01-07 18:22:37 +0000257 return 1;
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200258 }
259 return (*path == '/' || *path == '\\');
bellard3b9f94e2007-01-07 17:27:07 +0000260#else
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200261 return (*path == '/');
bellard3b9f94e2007-01-07 17:27:07 +0000262#endif
bellard83f64092006-08-01 16:21:11 +0000263}
264
265/* if filename is absolute, just copy it to dest. Otherwise, build a
266 path to it by considering it is relative to base_path. URL are
267 supported. */
268void path_combine(char *dest, int dest_size,
269 const char *base_path,
270 const char *filename)
271{
272 const char *p, *p1;
273 int len;
274
275 if (dest_size <= 0)
276 return;
277 if (path_is_absolute(filename)) {
278 pstrcpy(dest, dest_size, filename);
279 } else {
280 p = strchr(base_path, ':');
281 if (p)
282 p++;
283 else
284 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000285 p1 = strrchr(base_path, '/');
286#ifdef _WIN32
287 {
288 const char *p2;
289 p2 = strrchr(base_path, '\\');
290 if (!p1 || p2 > p1)
291 p1 = p2;
292 }
293#endif
bellard83f64092006-08-01 16:21:11 +0000294 if (p1)
295 p1++;
296 else
297 p1 = base_path;
298 if (p1 > p)
299 p = p1;
300 len = p - base_path;
301 if (len > dest_size - 1)
302 len = dest_size - 1;
303 memcpy(dest, base_path, len);
304 dest[len] = '\0';
305 pstrcat(dest, dest_size, filename);
306 }
307}
308
Paolo Bonzinidc5a1372012-05-08 16:51:50 +0200309void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
310{
311 if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
312 pstrcpy(dest, sz, bs->backing_file);
313 } else {
314 path_combine(dest, sz, bs->filename, bs->backing_file);
315 }
316}
317
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500318void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000319{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100320 /* Block drivers without coroutine functions need emulation */
321 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200322 bdrv->bdrv_co_readv = bdrv_co_readv_em;
323 bdrv->bdrv_co_writev = bdrv_co_writev_em;
324
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100325 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
326 * the block driver lacks aio we need to emulate that too.
327 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200328 if (!bdrv->bdrv_aio_readv) {
329 /* add AIO emulation layer */
330 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
331 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200332 }
bellard83f64092006-08-01 16:21:11 +0000333 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200334
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100335 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000336}
bellardb3380822004-03-14 21:38:54 +0000337
338/* create a new block device (by default it is empty) */
Kevin Wolf98522f62014-04-17 13:16:01 +0200339BlockDriverState *bdrv_new(const char *device_name, Error **errp)
bellardfc01f7e2003-06-30 10:03:06 +0000340{
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100341 BlockDriverState *bs;
Fam Zhengfbe40ff2014-05-23 21:29:42 +0800342 int i;
bellardb3380822004-03-14 21:38:54 +0000343
Markus Armbrusterf5bebbb2014-09-30 13:59:30 +0200344 if (*device_name && !id_wellformed(device_name)) {
Kevin Wolf9aebf3b2014-09-25 09:54:02 +0200345 error_setg(errp, "Invalid device name");
346 return NULL;
347 }
348
Kevin Wolff2d953e2014-04-17 13:27:05 +0200349 if (bdrv_find(device_name)) {
350 error_setg(errp, "Device with id '%s' already exists",
351 device_name);
352 return NULL;
353 }
354 if (bdrv_find_node(device_name)) {
Markus Armbrusterd2244692014-09-12 21:26:24 +0200355 error_setg(errp,
356 "Device name '%s' conflicts with an existing node name",
Kevin Wolff2d953e2014-04-17 13:27:05 +0200357 device_name);
358 return NULL;
359 }
360
Markus Armbruster5839e532014-08-19 10:31:08 +0200361 bs = g_new0(BlockDriverState, 1);
Fam Zhenge4654d22013-11-13 18:29:43 +0800362 QLIST_INIT(&bs->dirty_bitmaps);
bellardb3380822004-03-14 21:38:54 +0000363 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
bellardea2384d2004-08-01 21:59:26 +0000364 if (device_name[0] != '\0') {
Benoît Canetdc364f42014-01-23 21:31:32 +0100365 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
bellardea2384d2004-08-01 21:59:26 +0000366 }
Fam Zhengfbe40ff2014-05-23 21:29:42 +0800367 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
368 QLIST_INIT(&bs->op_blockers[i]);
369 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300370 bdrv_iostatus_disable(bs);
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200371 notifier_list_init(&bs->close_notifiers);
Stefan Hajnoczid616b222013-06-24 17:13:10 +0200372 notifier_with_return_list_init(&bs->before_write_notifiers);
Benoît Canetcc0681c2013-09-02 14:14:39 +0200373 qemu_co_queue_init(&bs->throttled_reqs[0]);
374 qemu_co_queue_init(&bs->throttled_reqs[1]);
Fam Zheng9fcb0252013-08-23 09:14:46 +0800375 bs->refcnt = 1;
Stefan Hajnoczidcd04222014-05-08 16:34:37 +0200376 bs->aio_context = qemu_get_aio_context();
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200377
bellardb3380822004-03-14 21:38:54 +0000378 return bs;
379}
380
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200381void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
382{
383 notifier_list_add(&bs->close_notifiers, notify);
384}
385
bellardea2384d2004-08-01 21:59:26 +0000386BlockDriver *bdrv_find_format(const char *format_name)
387{
388 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100389 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
390 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000391 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100392 }
bellardea2384d2004-08-01 21:59:26 +0000393 }
394 return NULL;
395}
396
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800397static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100398{
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800399 static const char *whitelist_rw[] = {
400 CONFIG_BDRV_RW_WHITELIST
401 };
402 static const char *whitelist_ro[] = {
403 CONFIG_BDRV_RO_WHITELIST
Markus Armbrustereb852012009-10-27 18:41:44 +0100404 };
405 const char **p;
406
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800407 if (!whitelist_rw[0] && !whitelist_ro[0]) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100408 return 1; /* no whitelist, anything goes */
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800409 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100410
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800411 for (p = whitelist_rw; *p; p++) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100412 if (!strcmp(drv->format_name, *p)) {
413 return 1;
414 }
415 }
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800416 if (read_only) {
417 for (p = whitelist_ro; *p; p++) {
418 if (!strcmp(drv->format_name, *p)) {
419 return 1;
420 }
421 }
422 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100423 return 0;
424}
425
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800426BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
427 bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100428{
429 BlockDriver *drv = bdrv_find_format(format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800430 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
Markus Armbrustereb852012009-10-27 18:41:44 +0100431}
432
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800433typedef struct CreateCo {
434 BlockDriver *drv;
435 char *filename;
Chunyan Liu83d05212014-06-05 17:20:51 +0800436 QemuOpts *opts;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800437 int ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200438 Error *err;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800439} CreateCo;
440
441static void coroutine_fn bdrv_create_co_entry(void *opaque)
442{
Max Reitzcc84d902013-09-06 17:14:26 +0200443 Error *local_err = NULL;
444 int ret;
445
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800446 CreateCo *cco = opaque;
447 assert(cco->drv);
448
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800449 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100450 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200451 error_propagate(&cco->err, local_err);
452 }
453 cco->ret = ret;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800454}
455
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200456int bdrv_create(BlockDriver *drv, const char* filename,
Chunyan Liu83d05212014-06-05 17:20:51 +0800457 QemuOpts *opts, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000458{
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800459 int ret;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200460
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800461 Coroutine *co;
462 CreateCo cco = {
463 .drv = drv,
464 .filename = g_strdup(filename),
Chunyan Liu83d05212014-06-05 17:20:51 +0800465 .opts = opts,
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800466 .ret = NOT_DONE,
Max Reitzcc84d902013-09-06 17:14:26 +0200467 .err = NULL,
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800468 };
469
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800470 if (!drv->bdrv_create) {
Max Reitzcc84d902013-09-06 17:14:26 +0200471 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300472 ret = -ENOTSUP;
473 goto out;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800474 }
475
476 if (qemu_in_coroutine()) {
477 /* Fast-path if already in coroutine context */
478 bdrv_create_co_entry(&cco);
479 } else {
480 co = qemu_coroutine_create(bdrv_create_co_entry);
481 qemu_coroutine_enter(co, &cco);
482 while (cco.ret == NOT_DONE) {
Paolo Bonzinib47ec2c2014-07-07 15:18:01 +0200483 aio_poll(qemu_get_aio_context(), true);
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800484 }
485 }
486
487 ret = cco.ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200488 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +0100489 if (cco.err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200490 error_propagate(errp, cco.err);
491 } else {
492 error_setg_errno(errp, -ret, "Could not create image");
493 }
494 }
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800495
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300496out:
497 g_free(cco.filename);
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800498 return ret;
bellardea2384d2004-08-01 21:59:26 +0000499}
500
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800501int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200502{
503 BlockDriver *drv;
Max Reitzcc84d902013-09-06 17:14:26 +0200504 Error *local_err = NULL;
505 int ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200506
Kevin Wolf98289622013-07-10 15:47:39 +0200507 drv = bdrv_find_protocol(filename, true);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200508 if (drv == NULL) {
Max Reitzcc84d902013-09-06 17:14:26 +0200509 error_setg(errp, "Could not find protocol for file '%s'", filename);
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000510 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200511 }
512
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800513 ret = bdrv_create(drv, filename, opts, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100514 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200515 error_propagate(errp, local_err);
516 }
517 return ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200518}
519
Kevin Wolf3baca892014-07-16 17:48:16 +0200520void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
Kevin Wolfd34682c2013-12-11 19:26:16 +0100521{
522 BlockDriver *drv = bs->drv;
Kevin Wolf3baca892014-07-16 17:48:16 +0200523 Error *local_err = NULL;
Kevin Wolfd34682c2013-12-11 19:26:16 +0100524
525 memset(&bs->bl, 0, sizeof(bs->bl));
526
Kevin Wolf466ad822013-12-11 19:50:32 +0100527 if (!drv) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200528 return;
Kevin Wolf466ad822013-12-11 19:50:32 +0100529 }
530
531 /* Take some limits from the children as a default */
532 if (bs->file) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200533 bdrv_refresh_limits(bs->file, &local_err);
534 if (local_err) {
535 error_propagate(errp, local_err);
536 return;
537 }
Kevin Wolf466ad822013-12-11 19:50:32 +0100538 bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
Kevin Wolf339064d2013-11-28 10:23:32 +0100539 bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
540 } else {
541 bs->bl.opt_mem_alignment = 512;
Kevin Wolf466ad822013-12-11 19:50:32 +0100542 }
543
544 if (bs->backing_hd) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200545 bdrv_refresh_limits(bs->backing_hd, &local_err);
546 if (local_err) {
547 error_propagate(errp, local_err);
548 return;
549 }
Kevin Wolf466ad822013-12-11 19:50:32 +0100550 bs->bl.opt_transfer_length =
551 MAX(bs->bl.opt_transfer_length,
552 bs->backing_hd->bl.opt_transfer_length);
Kevin Wolf339064d2013-11-28 10:23:32 +0100553 bs->bl.opt_mem_alignment =
554 MAX(bs->bl.opt_mem_alignment,
555 bs->backing_hd->bl.opt_mem_alignment);
Kevin Wolf466ad822013-12-11 19:50:32 +0100556 }
557
558 /* Then let the driver override it */
559 if (drv->bdrv_refresh_limits) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200560 drv->bdrv_refresh_limits(bs, errp);
Kevin Wolfd34682c2013-12-11 19:26:16 +0100561 }
Kevin Wolfd34682c2013-12-11 19:26:16 +0100562}
563
Jim Meyeringeba25052012-05-28 09:27:54 +0200564/*
565 * Create a uniquely-named empty temporary file.
566 * Return 0 upon success, otherwise a negative errno value.
567 */
568int get_tmp_filename(char *filename, int size)
569{
bellardd5249392004-08-03 21:14:23 +0000570#ifdef _WIN32
bellard3b9f94e2007-01-07 17:27:07 +0000571 char temp_dir[MAX_PATH];
Jim Meyeringeba25052012-05-28 09:27:54 +0200572 /* GetTempFileName requires that its output buffer (4th param)
573 have length MAX_PATH or greater. */
574 assert(size >= MAX_PATH);
575 return (GetTempPath(MAX_PATH, temp_dir)
576 && GetTempFileName(temp_dir, "qem", 0, filename)
577 ? 0 : -GetLastError());
bellardd5249392004-08-03 21:14:23 +0000578#else
bellardea2384d2004-08-01 21:59:26 +0000579 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000580 const char *tmpdir;
aurel320badc1e2008-03-10 00:05:34 +0000581 tmpdir = getenv("TMPDIR");
Amit Shah69bef792014-02-26 15:12:37 +0530582 if (!tmpdir) {
583 tmpdir = "/var/tmp";
584 }
Jim Meyeringeba25052012-05-28 09:27:54 +0200585 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
586 return -EOVERFLOW;
587 }
bellardea2384d2004-08-01 21:59:26 +0000588 fd = mkstemp(filename);
Dunrong Huangfe235a02012-09-05 21:26:22 +0800589 if (fd < 0) {
590 return -errno;
591 }
592 if (close(fd) != 0) {
593 unlink(filename);
Jim Meyeringeba25052012-05-28 09:27:54 +0200594 return -errno;
595 }
596 return 0;
bellardd5249392004-08-03 21:14:23 +0000597#endif
Jim Meyeringeba25052012-05-28 09:27:54 +0200598}
bellardea2384d2004-08-01 21:59:26 +0000599
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200600/*
601 * Detect host devices. By convention, /dev/cdrom[N] is always
602 * recognized as a host CDROM.
603 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200604static BlockDriver *find_hdev_driver(const char *filename)
605{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200606 int score_max = 0, score;
607 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200608
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100609 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200610 if (d->bdrv_probe_device) {
611 score = d->bdrv_probe_device(filename);
612 if (score > score_max) {
613 score_max = score;
614 drv = d;
615 }
616 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200617 }
618
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200619 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200620}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200621
Kevin Wolf98289622013-07-10 15:47:39 +0200622BlockDriver *bdrv_find_protocol(const char *filename,
623 bool allow_protocol_prefix)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200624{
625 BlockDriver *drv1;
626 char protocol[128];
627 int len;
628 const char *p;
629
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200630 /* TODO Drivers without bdrv_file_open must be specified explicitly */
631
Christoph Hellwig39508e72010-06-23 12:25:17 +0200632 /*
633 * XXX(hch): we really should not let host device detection
634 * override an explicit protocol specification, but moving this
635 * later breaks access to device names with colons in them.
636 * Thanks to the brain-dead persistent naming schemes on udev-
637 * based Linux systems those actually are quite common.
638 */
639 drv1 = find_hdev_driver(filename);
640 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200641 return drv1;
642 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200643
Kevin Wolf98289622013-07-10 15:47:39 +0200644 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
Christoph Hellwig39508e72010-06-23 12:25:17 +0200645 return bdrv_find_format("file");
646 }
Kevin Wolf98289622013-07-10 15:47:39 +0200647
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000648 p = strchr(filename, ':');
649 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200650 len = p - filename;
651 if (len > sizeof(protocol) - 1)
652 len = sizeof(protocol) - 1;
653 memcpy(protocol, filename, len);
654 protocol[len] = '\0';
655 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
656 if (drv1->protocol_name &&
657 !strcmp(drv1->protocol_name, protocol)) {
658 return drv1;
659 }
660 }
661 return NULL;
662}
663
Kevin Wolff500a6d2012-11-12 17:35:27 +0100664static int find_image_format(BlockDriverState *bs, const char *filename,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200665 BlockDriver **pdrv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000666{
Kevin Wolff500a6d2012-11-12 17:35:27 +0100667 int score, score_max;
bellardea2384d2004-08-01 21:59:26 +0000668 BlockDriver *drv1, *drv;
bellard83f64092006-08-01 16:21:11 +0000669 uint8_t buf[2048];
Kevin Wolff500a6d2012-11-12 17:35:27 +0100670 int ret = 0;
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700671
Kevin Wolf08a00552010-06-01 18:37:31 +0200672 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
Paolo Bonzini8e895592013-01-10 15:39:27 +0100673 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200674 drv = bdrv_find_format("raw");
675 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200676 error_setg(errp, "Could not find raw image format");
Stefan Weilc98ac352010-07-21 21:51:51 +0200677 ret = -ENOENT;
678 }
679 *pdrv = drv;
680 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700681 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700682
bellard83f64092006-08-01 16:21:11 +0000683 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
bellard83f64092006-08-01 16:21:11 +0000684 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200685 error_setg_errno(errp, -ret, "Could not read image for determining its "
686 "format");
Stefan Weilc98ac352010-07-21 21:51:51 +0200687 *pdrv = NULL;
688 return ret;
bellard83f64092006-08-01 16:21:11 +0000689 }
690
bellardea2384d2004-08-01 21:59:26 +0000691 score_max = 0;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200692 drv = NULL;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100693 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard83f64092006-08-01 16:21:11 +0000694 if (drv1->bdrv_probe) {
695 score = drv1->bdrv_probe(buf, ret, filename);
696 if (score > score_max) {
697 score_max = score;
698 drv = drv1;
699 }
bellardea2384d2004-08-01 21:59:26 +0000700 }
701 }
Stefan Weilc98ac352010-07-21 21:51:51 +0200702 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200703 error_setg(errp, "Could not determine image format: No compatible "
704 "driver found");
Stefan Weilc98ac352010-07-21 21:51:51 +0200705 ret = -ENOENT;
706 }
707 *pdrv = drv;
708 return ret;
bellardea2384d2004-08-01 21:59:26 +0000709}
710
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100711/**
712 * Set the current 'total_sectors' value
Markus Armbruster65a9bb22014-06-26 13:23:17 +0200713 * Return 0 on success, -errno on error.
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100714 */
715static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
716{
717 BlockDriver *drv = bs->drv;
718
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700719 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
720 if (bs->sg)
721 return 0;
722
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100723 /* query actual device if possible, otherwise just trust the hint */
724 if (drv->bdrv_getlength) {
725 int64_t length = drv->bdrv_getlength(bs);
726 if (length < 0) {
727 return length;
728 }
Fam Zheng7e382002013-11-06 19:48:06 +0800729 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100730 }
731
732 bs->total_sectors = hint;
733 return 0;
734}
735
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100736/**
Paolo Bonzini9e8f1832013-02-08 14:06:11 +0100737 * Set open flags for a given discard mode
738 *
739 * Return 0 on success, -1 if the discard mode was invalid.
740 */
741int bdrv_parse_discard_flags(const char *mode, int *flags)
742{
743 *flags &= ~BDRV_O_UNMAP;
744
745 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
746 /* do nothing */
747 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
748 *flags |= BDRV_O_UNMAP;
749 } else {
750 return -1;
751 }
752
753 return 0;
754}
755
756/**
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100757 * Set open flags for a given cache mode
758 *
759 * Return 0 on success, -1 if the cache mode was invalid.
760 */
761int bdrv_parse_cache_flags(const char *mode, int *flags)
762{
763 *flags &= ~BDRV_O_CACHE_MASK;
764
765 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
766 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100767 } else if (!strcmp(mode, "directsync")) {
768 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100769 } else if (!strcmp(mode, "writeback")) {
770 *flags |= BDRV_O_CACHE_WB;
771 } else if (!strcmp(mode, "unsafe")) {
772 *flags |= BDRV_O_CACHE_WB;
773 *flags |= BDRV_O_NO_FLUSH;
774 } else if (!strcmp(mode, "writethrough")) {
775 /* this is the default */
776 } else {
777 return -1;
778 }
779
780 return 0;
781}
782
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000783/**
784 * The copy-on-read flag is actually a reference count so multiple users may
785 * use the feature without worrying about clobbering its previous state.
786 * Copy-on-read stays enabled until all users have called to disable it.
787 */
788void bdrv_enable_copy_on_read(BlockDriverState *bs)
789{
790 bs->copy_on_read++;
791}
792
793void bdrv_disable_copy_on_read(BlockDriverState *bs)
794{
795 assert(bs->copy_on_read > 0);
796 bs->copy_on_read--;
797}
798
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200799/*
Kevin Wolfb1e6fc02014-05-06 12:11:42 +0200800 * Returns the flags that a temporary snapshot should get, based on the
801 * originally requested flags (the originally requested image will have flags
802 * like a backing file)
803 */
804static int bdrv_temp_snapshot_flags(int flags)
805{
806 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
807}
808
809/*
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200810 * Returns the flags that bs->file should get, based on the given flags for
811 * the parent BDS
812 */
813static int bdrv_inherited_flags(int flags)
814{
815 /* Enable protocol handling, disable format probing for bs->file */
816 flags |= BDRV_O_PROTOCOL;
817
818 /* Our block drivers take care to send flushes and respect unmap policy,
819 * so we can enable both unconditionally on lower layers. */
820 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
821
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200822 /* Clear flags that only apply to the top layer */
Kevin Wolf5669b442014-04-11 21:36:45 +0200823 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200824
825 return flags;
826}
827
Kevin Wolf317fc442014-04-25 13:27:34 +0200828/*
829 * Returns the flags that bs->backing_hd should get, based on the given flags
830 * for the parent BDS
831 */
832static int bdrv_backing_flags(int flags)
833{
834 /* backing files always opened read-only */
835 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
836
837 /* snapshot=on is handled on the top layer */
Kevin Wolf8bfea152014-04-11 19:16:36 +0200838 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
Kevin Wolf317fc442014-04-25 13:27:34 +0200839
840 return flags;
841}
842
Kevin Wolf7b272452012-11-12 17:05:39 +0100843static int bdrv_open_flags(BlockDriverState *bs, int flags)
844{
845 int open_flags = flags | BDRV_O_CACHE_WB;
846
847 /*
848 * Clear flags that are internal to the block layer before opening the
849 * image.
850 */
Kevin Wolf20cca272014-06-04 14:33:27 +0200851 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
Kevin Wolf7b272452012-11-12 17:05:39 +0100852
853 /*
854 * Snapshots should be writable.
855 */
Kevin Wolf8bfea152014-04-11 19:16:36 +0200856 if (flags & BDRV_O_TEMPORARY) {
Kevin Wolf7b272452012-11-12 17:05:39 +0100857 open_flags |= BDRV_O_RDWR;
858 }
859
860 return open_flags;
861}
862
Kevin Wolf636ea372014-01-24 14:11:52 +0100863static void bdrv_assign_node_name(BlockDriverState *bs,
864 const char *node_name,
865 Error **errp)
Benoît Canet6913c0c2014-01-23 21:31:33 +0100866{
867 if (!node_name) {
Kevin Wolf636ea372014-01-24 14:11:52 +0100868 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100869 }
870
Kevin Wolf9aebf3b2014-09-25 09:54:02 +0200871 /* Check for empty string or invalid characters */
Markus Armbrusterf5bebbb2014-09-30 13:59:30 +0200872 if (!id_wellformed(node_name)) {
Kevin Wolf9aebf3b2014-09-25 09:54:02 +0200873 error_setg(errp, "Invalid node name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100874 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100875 }
876
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100877 /* takes care of avoiding namespaces collisions */
878 if (bdrv_find(node_name)) {
879 error_setg(errp, "node-name=%s is conflicting with a device id",
880 node_name);
Kevin Wolf636ea372014-01-24 14:11:52 +0100881 return;
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100882 }
883
Benoît Canet6913c0c2014-01-23 21:31:33 +0100884 /* takes care of avoiding duplicates node names */
885 if (bdrv_find_node(node_name)) {
886 error_setg(errp, "Duplicate node name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100887 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100888 }
889
890 /* copy node name into the bs and insert it into the graph list */
891 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
892 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
Benoît Canet6913c0c2014-01-23 21:31:33 +0100893}
894
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200895/*
Kevin Wolf57915332010-04-14 15:24:50 +0200896 * Common part for opening disk images and files
Kevin Wolfb6ad4912013-03-15 10:35:04 +0100897 *
898 * Removes all processed options from *options.
Kevin Wolf57915332010-04-14 15:24:50 +0200899 */
Kevin Wolff500a6d2012-11-12 17:35:27 +0100900static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200901 QDict *options, int flags, BlockDriver *drv, Error **errp)
Kevin Wolf57915332010-04-14 15:24:50 +0200902{
903 int ret, open_flags;
Kevin Wolf035fccd2013-04-09 14:34:19 +0200904 const char *filename;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100905 const char *node_name = NULL;
Max Reitz34b5d2c2013-09-05 14:45:29 +0200906 Error *local_err = NULL;
Kevin Wolf57915332010-04-14 15:24:50 +0200907
908 assert(drv != NULL);
Paolo Bonzini64058752012-05-08 16:51:49 +0200909 assert(bs->file == NULL);
Kevin Wolf707ff822013-03-06 12:20:31 +0100910 assert(options != NULL && bs->options != options);
Kevin Wolf57915332010-04-14 15:24:50 +0200911
Kevin Wolf45673672013-04-22 17:48:40 +0200912 if (file != NULL) {
913 filename = file->filename;
914 } else {
915 filename = qdict_get_try_str(options, "filename");
916 }
917
Kevin Wolf765003d2014-02-03 14:49:42 +0100918 if (drv->bdrv_needs_filename && !filename) {
919 error_setg(errp, "The '%s' block driver requires a file name",
920 drv->format_name);
921 return -EINVAL;
922 }
923
Kevin Wolf45673672013-04-22 17:48:40 +0200924 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100925
Benoît Canet6913c0c2014-01-23 21:31:33 +0100926 node_name = qdict_get_try_str(options, "node-name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100927 bdrv_assign_node_name(bs, node_name, &local_err);
Markus Armbruster0fb63952014-04-25 16:50:31 +0200928 if (local_err) {
Kevin Wolf636ea372014-01-24 14:11:52 +0100929 error_propagate(errp, local_err);
930 return -EINVAL;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100931 }
932 qdict_del(options, "node-name");
933
Kevin Wolf5d186eb2013-03-27 17:28:18 +0100934 /* bdrv_open() with directly using a protocol as drv. This layer is already
935 * opened, so assign it to bs (while file becomes a closed BlockDriverState)
936 * and return immediately. */
937 if (file != NULL && drv->bdrv_file_open) {
938 bdrv_swap(file, bs);
939 return 0;
940 }
941
Kevin Wolf57915332010-04-14 15:24:50 +0200942 bs->open_flags = flags;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +0100943 bs->guest_block_size = 512;
Paolo Bonzinic25f53b2011-11-29 12:42:20 +0100944 bs->request_alignment = 512;
Asias He0d51b4d2013-08-22 15:24:14 +0800945 bs->zero_beyond_eof = true;
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800946 open_flags = bdrv_open_flags(bs, flags);
947 bs->read_only = !(open_flags & BDRV_O_RDWR);
Kevin Wolf20cca272014-06-04 14:33:27 +0200948 bs->growable = !!(flags & BDRV_O_PROTOCOL);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800949
950 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
Kevin Wolf8f94a6e2013-10-10 11:45:55 +0200951 error_setg(errp,
952 !bs->read_only && bdrv_is_whitelisted(drv, true)
953 ? "Driver '%s' can only be used for read-only devices"
954 : "Driver '%s' is not whitelisted",
955 drv->format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800956 return -ENOTSUP;
957 }
Kevin Wolf57915332010-04-14 15:24:50 +0200958
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000959 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
Kevin Wolf0ebd24e2013-09-19 15:12:18 +0200960 if (flags & BDRV_O_COPY_ON_READ) {
961 if (!bs->read_only) {
962 bdrv_enable_copy_on_read(bs);
963 } else {
964 error_setg(errp, "Can't use copy-on-read on read-only device");
965 return -EINVAL;
966 }
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000967 }
968
Kevin Wolfc2ad1b02013-03-18 16:40:51 +0100969 if (filename != NULL) {
970 pstrcpy(bs->filename, sizeof(bs->filename), filename);
971 } else {
972 bs->filename[0] = '\0';
973 }
Max Reitz91af7012014-07-18 20:24:56 +0200974 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
Kevin Wolf57915332010-04-14 15:24:50 +0200975
Kevin Wolf57915332010-04-14 15:24:50 +0200976 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -0500977 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +0200978
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100979 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Stefan Hajnoczie7c63792011-10-27 10:54:27 +0100980
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200981 /* Open the image, either directly or using a protocol */
982 if (drv->bdrv_file_open) {
Kevin Wolf5d186eb2013-03-27 17:28:18 +0100983 assert(file == NULL);
Benoît Canet030be322013-09-24 17:07:04 +0200984 assert(!drv->bdrv_needs_filename || filename != NULL);
Max Reitz34b5d2c2013-09-05 14:45:29 +0200985 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
Kevin Wolff500a6d2012-11-12 17:35:27 +0100986 } else {
Kevin Wolf2af5ef72013-04-09 13:19:18 +0200987 if (file == NULL) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200988 error_setg(errp, "Can't use '%s' as a block driver for the "
989 "protocol level", drv->format_name);
Kevin Wolf2af5ef72013-04-09 13:19:18 +0200990 ret = -EINVAL;
991 goto free_and_fail;
992 }
Kevin Wolff500a6d2012-11-12 17:35:27 +0100993 bs->file = file;
Max Reitz34b5d2c2013-09-05 14:45:29 +0200994 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200995 }
996
Kevin Wolf57915332010-04-14 15:24:50 +0200997 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +0100998 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200999 error_propagate(errp, local_err);
Dunrong Huang2fa9aa52013-09-24 18:14:01 +08001000 } else if (bs->filename[0]) {
1001 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
Max Reitz34b5d2c2013-09-05 14:45:29 +02001002 } else {
1003 error_setg_errno(errp, -ret, "Could not open image");
1004 }
Kevin Wolf57915332010-04-14 15:24:50 +02001005 goto free_and_fail;
1006 }
1007
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001008 ret = refresh_total_sectors(bs, bs->total_sectors);
1009 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001010 error_setg_errno(errp, -ret, "Could not refresh total sector count");
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001011 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +02001012 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001013
Kevin Wolf3baca892014-07-16 17:48:16 +02001014 bdrv_refresh_limits(bs, &local_err);
1015 if (local_err) {
1016 error_propagate(errp, local_err);
1017 ret = -EINVAL;
1018 goto free_and_fail;
1019 }
1020
Paolo Bonzinic25f53b2011-11-29 12:42:20 +01001021 assert(bdrv_opt_mem_align(bs) != 0);
Kevin Wolf47ea2de2014-03-05 15:49:55 +01001022 assert((bs->request_alignment != 0) || bs->sg);
Kevin Wolf57915332010-04-14 15:24:50 +02001023 return 0;
1024
1025free_and_fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +01001026 bs->file = NULL;
Anthony Liguori7267c092011-08-20 22:09:37 -05001027 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +02001028 bs->opaque = NULL;
1029 bs->drv = NULL;
1030 return ret;
1031}
1032
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001033static QDict *parse_json_filename(const char *filename, Error **errp)
1034{
1035 QObject *options_obj;
1036 QDict *options;
1037 int ret;
1038
1039 ret = strstart(filename, "json:", &filename);
1040 assert(ret);
1041
1042 options_obj = qobject_from_json(filename);
1043 if (!options_obj) {
1044 error_setg(errp, "Could not parse the JSON options");
1045 return NULL;
1046 }
1047
1048 if (qobject_type(options_obj) != QTYPE_QDICT) {
1049 qobject_decref(options_obj);
1050 error_setg(errp, "Invalid JSON object given");
1051 return NULL;
1052 }
1053
1054 options = qobject_to_qdict(options_obj);
1055 qdict_flatten(options);
1056
1057 return options;
1058}
1059
Kevin Wolf57915332010-04-14 15:24:50 +02001060/*
Kevin Wolff54120f2014-05-26 11:09:59 +02001061 * Fills in default options for opening images and converts the legacy
1062 * filename/flags pair to option QDict entries.
1063 */
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001064static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
Kevin Wolf17b005f2014-05-27 10:50:29 +02001065 BlockDriver *drv, Error **errp)
Kevin Wolff54120f2014-05-26 11:09:59 +02001066{
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001067 const char *filename = *pfilename;
Kevin Wolff54120f2014-05-26 11:09:59 +02001068 const char *drvname;
Kevin Wolf462f5bc2014-05-26 11:39:55 +02001069 bool protocol = flags & BDRV_O_PROTOCOL;
Kevin Wolff54120f2014-05-26 11:09:59 +02001070 bool parse_filename = false;
1071 Error *local_err = NULL;
Kevin Wolff54120f2014-05-26 11:09:59 +02001072
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001073 /* Parse json: pseudo-protocol */
1074 if (filename && g_str_has_prefix(filename, "json:")) {
1075 QDict *json_options = parse_json_filename(filename, &local_err);
1076 if (local_err) {
1077 error_propagate(errp, local_err);
1078 return -EINVAL;
1079 }
1080
1081 /* Options given in the filename have lower priority than options
1082 * specified directly */
1083 qdict_join(*options, json_options, false);
1084 QDECREF(json_options);
1085 *pfilename = filename = NULL;
1086 }
1087
Kevin Wolff54120f2014-05-26 11:09:59 +02001088 /* Fetch the file name from the options QDict if necessary */
Kevin Wolf17b005f2014-05-27 10:50:29 +02001089 if (protocol && filename) {
Kevin Wolff54120f2014-05-26 11:09:59 +02001090 if (!qdict_haskey(*options, "filename")) {
1091 qdict_put(*options, "filename", qstring_from_str(filename));
1092 parse_filename = true;
1093 } else {
1094 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1095 "the same time");
1096 return -EINVAL;
1097 }
1098 }
1099
1100 /* Find the right block driver */
1101 filename = qdict_get_try_str(*options, "filename");
1102 drvname = qdict_get_try_str(*options, "driver");
1103
Kevin Wolf17b005f2014-05-27 10:50:29 +02001104 if (drv) {
1105 if (drvname) {
1106 error_setg(errp, "Driver specified twice");
1107 return -EINVAL;
1108 }
1109 drvname = drv->format_name;
1110 qdict_put(*options, "driver", qstring_from_str(drvname));
1111 } else {
1112 if (!drvname && protocol) {
1113 if (filename) {
1114 drv = bdrv_find_protocol(filename, parse_filename);
1115 if (!drv) {
1116 error_setg(errp, "Unknown protocol");
1117 return -EINVAL;
1118 }
1119
1120 drvname = drv->format_name;
1121 qdict_put(*options, "driver", qstring_from_str(drvname));
1122 } else {
1123 error_setg(errp, "Must specify either driver or file");
Kevin Wolff54120f2014-05-26 11:09:59 +02001124 return -EINVAL;
1125 }
Kevin Wolf17b005f2014-05-27 10:50:29 +02001126 } else if (drvname) {
1127 drv = bdrv_find_format(drvname);
1128 if (!drv) {
1129 error_setg(errp, "Unknown driver '%s'", drvname);
1130 return -ENOENT;
1131 }
Kevin Wolff54120f2014-05-26 11:09:59 +02001132 }
1133 }
1134
Kevin Wolf17b005f2014-05-27 10:50:29 +02001135 assert(drv || !protocol);
Kevin Wolff54120f2014-05-26 11:09:59 +02001136
1137 /* Driver-specific filename parsing */
Kevin Wolf17b005f2014-05-27 10:50:29 +02001138 if (drv && drv->bdrv_parse_filename && parse_filename) {
Kevin Wolff54120f2014-05-26 11:09:59 +02001139 drv->bdrv_parse_filename(filename, *options, &local_err);
1140 if (local_err) {
1141 error_propagate(errp, local_err);
1142 return -EINVAL;
1143 }
1144
1145 if (!drv->bdrv_needs_filename) {
1146 qdict_del(*options, "filename");
1147 }
1148 }
1149
1150 return 0;
1151}
1152
Fam Zheng8d24cce2014-05-23 21:29:45 +08001153void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1154{
1155
Fam Zheng826b6ca2014-05-23 21:29:47 +08001156 if (bs->backing_hd) {
1157 assert(bs->backing_blocker);
1158 bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1159 } else if (backing_hd) {
1160 error_setg(&bs->backing_blocker,
1161 "device is used as backing hd of '%s'",
1162 bs->device_name);
1163 }
1164
Fam Zheng8d24cce2014-05-23 21:29:45 +08001165 bs->backing_hd = backing_hd;
1166 if (!backing_hd) {
Fam Zheng826b6ca2014-05-23 21:29:47 +08001167 error_free(bs->backing_blocker);
1168 bs->backing_blocker = NULL;
Fam Zheng8d24cce2014-05-23 21:29:45 +08001169 goto out;
1170 }
1171 bs->open_flags &= ~BDRV_O_NO_BACKING;
1172 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1173 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1174 backing_hd->drv ? backing_hd->drv->format_name : "");
Fam Zheng826b6ca2014-05-23 21:29:47 +08001175
1176 bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1177 /* Otherwise we won't be able to commit due to check in bdrv_commit */
1178 bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT,
1179 bs->backing_blocker);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001180out:
Kevin Wolf3baca892014-07-16 17:48:16 +02001181 bdrv_refresh_limits(bs, NULL);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001182}
1183
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001184/*
1185 * Opens the backing file for a BlockDriverState if not yet open
1186 *
1187 * options is a QDict of options to pass to the block drivers, or NULL for an
1188 * empty set of options. The reference to the QDict is transferred to this
1189 * function (even on failure), so if the caller intends to reuse the dictionary,
1190 * it needs to use QINCREF() before calling bdrv_file_open.
1191 */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001192int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
Paolo Bonzini9156df12012-10-18 16:49:17 +02001193{
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001194 char *backing_filename = g_malloc0(PATH_MAX);
Kevin Wolf317fc442014-04-25 13:27:34 +02001195 int ret = 0;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001196 BlockDriver *back_drv = NULL;
Fam Zheng8d24cce2014-05-23 21:29:45 +08001197 BlockDriverState *backing_hd;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001198 Error *local_err = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001199
1200 if (bs->backing_hd != NULL) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001201 QDECREF(options);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001202 goto free_exit;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001203 }
1204
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001205 /* NULL means an empty set of options */
1206 if (options == NULL) {
1207 options = qdict_new();
1208 }
1209
Paolo Bonzini9156df12012-10-18 16:49:17 +02001210 bs->open_flags &= ~BDRV_O_NO_BACKING;
Kevin Wolf1cb6f502013-04-12 20:27:07 +02001211 if (qdict_haskey(options, "file.filename")) {
1212 backing_filename[0] = '\0';
1213 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001214 QDECREF(options);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001215 goto free_exit;
Fam Zhengdbecebd2013-09-22 20:05:06 +08001216 } else {
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001217 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001218 }
1219
Kevin Wolf8ee79e72014-06-04 15:09:35 +02001220 if (!bs->drv || !bs->drv->supports_backing) {
1221 ret = -EINVAL;
1222 error_setg(errp, "Driver doesn't support backing files");
1223 QDECREF(options);
1224 goto free_exit;
1225 }
1226
Fam Zheng8d24cce2014-05-23 21:29:45 +08001227 backing_hd = bdrv_new("", errp);
1228
Paolo Bonzini9156df12012-10-18 16:49:17 +02001229 if (bs->backing_format[0] != '\0') {
1230 back_drv = bdrv_find_format(bs->backing_format);
1231 }
1232
Max Reitzf67503e2014-02-18 18:33:05 +01001233 assert(bs->backing_hd == NULL);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001234 ret = bdrv_open(&backing_hd,
Max Reitzddf56362014-02-18 18:33:06 +01001235 *backing_filename ? backing_filename : NULL, NULL, options,
Kevin Wolf317fc442014-04-25 13:27:34 +02001236 bdrv_backing_flags(bs->open_flags), back_drv, &local_err);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001237 if (ret < 0) {
Fam Zheng8d24cce2014-05-23 21:29:45 +08001238 bdrv_unref(backing_hd);
1239 backing_hd = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001240 bs->open_flags |= BDRV_O_NO_BACKING;
Fam Zhengb04b6b62013-11-08 11:26:49 +08001241 error_setg(errp, "Could not open backing file: %s",
1242 error_get_pretty(local_err));
1243 error_free(local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001244 goto free_exit;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001245 }
Fam Zheng8d24cce2014-05-23 21:29:45 +08001246 bdrv_set_backing_hd(bs, backing_hd);
Peter Feinerd80ac652014-01-08 19:43:25 +00001247
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001248free_exit:
1249 g_free(backing_filename);
1250 return ret;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001251}
1252
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001253/*
Max Reitzda557aa2013-12-20 19:28:11 +01001254 * Opens a disk image whose options are given as BlockdevRef in another block
1255 * device's options.
1256 *
Max Reitzda557aa2013-12-20 19:28:11 +01001257 * If allow_none is true, no image will be opened if filename is false and no
1258 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1259 *
1260 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1261 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1262 * itself, all options starting with "${bdref_key}." are considered part of the
1263 * BlockdevRef.
1264 *
1265 * The BlockdevRef will be removed from the options QDict.
Max Reitzf67503e2014-02-18 18:33:05 +01001266 *
1267 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
Max Reitzda557aa2013-12-20 19:28:11 +01001268 */
1269int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1270 QDict *options, const char *bdref_key, int flags,
Max Reitzf7d9fd82014-02-18 18:33:12 +01001271 bool allow_none, Error **errp)
Max Reitzda557aa2013-12-20 19:28:11 +01001272{
1273 QDict *image_options;
1274 int ret;
1275 char *bdref_key_dot;
1276 const char *reference;
1277
Max Reitzf67503e2014-02-18 18:33:05 +01001278 assert(pbs);
1279 assert(*pbs == NULL);
1280
Max Reitzda557aa2013-12-20 19:28:11 +01001281 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1282 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1283 g_free(bdref_key_dot);
1284
1285 reference = qdict_get_try_str(options, bdref_key);
1286 if (!filename && !reference && !qdict_size(image_options)) {
1287 if (allow_none) {
1288 ret = 0;
1289 } else {
1290 error_setg(errp, "A block device must be specified for \"%s\"",
1291 bdref_key);
1292 ret = -EINVAL;
1293 }
Markus Armbrusterb20e61e2014-05-28 11:16:57 +02001294 QDECREF(image_options);
Max Reitzda557aa2013-12-20 19:28:11 +01001295 goto done;
1296 }
1297
Max Reitzf7d9fd82014-02-18 18:33:12 +01001298 ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
Max Reitzda557aa2013-12-20 19:28:11 +01001299
1300done:
1301 qdict_del(options, bdref_key);
1302 return ret;
1303}
1304
Chen Gang6b8aeca2014-06-23 23:28:23 +08001305int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
Kevin Wolfb9988752014-04-03 12:09:34 +02001306{
1307 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001308 char *tmp_filename = g_malloc0(PATH_MAX + 1);
Kevin Wolfb9988752014-04-03 12:09:34 +02001309 int64_t total_size;
1310 BlockDriver *bdrv_qcow2;
Chunyan Liu83d05212014-06-05 17:20:51 +08001311 QemuOpts *opts = NULL;
Kevin Wolfb9988752014-04-03 12:09:34 +02001312 QDict *snapshot_options;
1313 BlockDriverState *bs_snapshot;
1314 Error *local_err;
1315 int ret;
1316
1317 /* if snapshot, we create a temporary backing file and open it
1318 instead of opening 'filename' directly */
1319
1320 /* Get the required size from the image */
Kevin Wolff1877432014-04-04 17:07:19 +02001321 total_size = bdrv_getlength(bs);
1322 if (total_size < 0) {
Chen Gang6b8aeca2014-06-23 23:28:23 +08001323 ret = total_size;
Kevin Wolff1877432014-04-04 17:07:19 +02001324 error_setg_errno(errp, -total_size, "Could not get image size");
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001325 goto out;
Kevin Wolff1877432014-04-04 17:07:19 +02001326 }
Kevin Wolfb9988752014-04-03 12:09:34 +02001327
1328 /* Create the temporary image */
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001329 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
Kevin Wolfb9988752014-04-03 12:09:34 +02001330 if (ret < 0) {
1331 error_setg_errno(errp, -ret, "Could not get temporary filename");
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001332 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001333 }
1334
1335 bdrv_qcow2 = bdrv_find_format("qcow2");
Chunyan Liuc282e1f2014-06-05 17:21:11 +08001336 opts = qemu_opts_create(bdrv_qcow2->create_opts, NULL, 0,
1337 &error_abort);
Chunyan Liu83d05212014-06-05 17:20:51 +08001338 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size);
Chunyan Liuc282e1f2014-06-05 17:21:11 +08001339 ret = bdrv_create(bdrv_qcow2, tmp_filename, opts, &local_err);
Chunyan Liu83d05212014-06-05 17:20:51 +08001340 qemu_opts_del(opts);
Kevin Wolfb9988752014-04-03 12:09:34 +02001341 if (ret < 0) {
1342 error_setg_errno(errp, -ret, "Could not create temporary overlay "
1343 "'%s': %s", tmp_filename,
1344 error_get_pretty(local_err));
1345 error_free(local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001346 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001347 }
1348
1349 /* Prepare a new options QDict for the temporary file */
1350 snapshot_options = qdict_new();
1351 qdict_put(snapshot_options, "file.driver",
1352 qstring_from_str("file"));
1353 qdict_put(snapshot_options, "file.filename",
1354 qstring_from_str(tmp_filename));
1355
Kevin Wolf98522f62014-04-17 13:16:01 +02001356 bs_snapshot = bdrv_new("", &error_abort);
Kevin Wolfb9988752014-04-03 12:09:34 +02001357
1358 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001359 flags, bdrv_qcow2, &local_err);
Kevin Wolfb9988752014-04-03 12:09:34 +02001360 if (ret < 0) {
1361 error_propagate(errp, local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001362 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001363 }
1364
1365 bdrv_append(bs_snapshot, bs);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001366
1367out:
1368 g_free(tmp_filename);
Chen Gang6b8aeca2014-06-23 23:28:23 +08001369 return ret;
Kevin Wolfb9988752014-04-03 12:09:34 +02001370}
1371
Max Reitzda557aa2013-12-20 19:28:11 +01001372/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001373 * Opens a disk image (raw, qcow2, vmdk, ...)
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001374 *
1375 * options is a QDict of options to pass to the block drivers, or NULL for an
1376 * empty set of options. The reference to the QDict belongs to the block layer
1377 * after the call (even on failure), so if the caller intends to reuse the
1378 * dictionary, it needs to use QINCREF() before calling bdrv_open.
Max Reitzf67503e2014-02-18 18:33:05 +01001379 *
1380 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1381 * If it is not NULL, the referenced BDS will be reused.
Max Reitzddf56362014-02-18 18:33:06 +01001382 *
1383 * The reference parameter may be used to specify an existing block device which
1384 * should be opened. If specified, neither options nor a filename may be given,
1385 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001386 */
Max Reitzddf56362014-02-18 18:33:06 +01001387int bdrv_open(BlockDriverState **pbs, const char *filename,
1388 const char *reference, QDict *options, int flags,
1389 BlockDriver *drv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +00001390{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001391 int ret;
Max Reitzf67503e2014-02-18 18:33:05 +01001392 BlockDriverState *file = NULL, *bs;
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001393 const char *drvname;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001394 Error *local_err = NULL;
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001395 int snapshot_flags = 0;
bellard712e7872005-04-28 21:09:32 +00001396
Max Reitzf67503e2014-02-18 18:33:05 +01001397 assert(pbs);
1398
Max Reitzddf56362014-02-18 18:33:06 +01001399 if (reference) {
1400 bool options_non_empty = options ? qdict_size(options) : false;
1401 QDECREF(options);
1402
1403 if (*pbs) {
1404 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1405 "another block device");
1406 return -EINVAL;
1407 }
1408
1409 if (filename || options_non_empty) {
1410 error_setg(errp, "Cannot reference an existing block device with "
1411 "additional options or a new filename");
1412 return -EINVAL;
1413 }
1414
1415 bs = bdrv_lookup_bs(reference, reference, errp);
1416 if (!bs) {
1417 return -ENODEV;
1418 }
1419 bdrv_ref(bs);
1420 *pbs = bs;
1421 return 0;
1422 }
1423
Max Reitzf67503e2014-02-18 18:33:05 +01001424 if (*pbs) {
1425 bs = *pbs;
1426 } else {
Kevin Wolf98522f62014-04-17 13:16:01 +02001427 bs = bdrv_new("", &error_abort);
Max Reitzf67503e2014-02-18 18:33:05 +01001428 }
1429
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001430 /* NULL means an empty set of options */
1431 if (options == NULL) {
1432 options = qdict_new();
1433 }
1434
Kevin Wolf17b005f2014-05-27 10:50:29 +02001435 ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
Kevin Wolf462f5bc2014-05-26 11:39:55 +02001436 if (local_err) {
1437 goto fail;
1438 }
1439
Kevin Wolf76c591b2014-06-04 14:19:44 +02001440 /* Find the right image format driver */
1441 drv = NULL;
1442 drvname = qdict_get_try_str(options, "driver");
1443 if (drvname) {
1444 drv = bdrv_find_format(drvname);
1445 qdict_del(options, "driver");
1446 if (!drv) {
1447 error_setg(errp, "Unknown driver: '%s'", drvname);
1448 ret = -EINVAL;
1449 goto fail;
1450 }
1451 }
1452
1453 assert(drvname || !(flags & BDRV_O_PROTOCOL));
1454 if (drv && !drv->bdrv_file_open) {
1455 /* If the user explicitly wants a format driver here, we'll need to add
1456 * another layer for the protocol in bs->file */
1457 flags &= ~BDRV_O_PROTOCOL;
1458 }
1459
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001460 bs->options = options;
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001461 options = qdict_clone_shallow(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001462
Kevin Wolff4788ad2014-06-03 16:44:19 +02001463 /* Open image file without format layer */
1464 if ((flags & BDRV_O_PROTOCOL) == 0) {
1465 if (flags & BDRV_O_RDWR) {
1466 flags |= BDRV_O_ALLOW_RDWR;
1467 }
1468 if (flags & BDRV_O_SNAPSHOT) {
1469 snapshot_flags = bdrv_temp_snapshot_flags(flags);
1470 flags = bdrv_backing_flags(flags);
1471 }
1472
1473 assert(file == NULL);
1474 ret = bdrv_open_image(&file, filename, options, "file",
1475 bdrv_inherited_flags(flags),
1476 true, &local_err);
1477 if (ret < 0) {
Max Reitz5469a2a2014-02-18 18:33:10 +01001478 goto fail;
1479 }
1480 }
1481
Kevin Wolf76c591b2014-06-04 14:19:44 +02001482 /* Image format probing */
1483 if (!drv && file) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001484 ret = find_image_format(file, filename, &drv, &local_err);
1485 if (ret < 0) {
Kevin Wolf8bfea152014-04-11 19:16:36 +02001486 goto fail;
Max Reitz2a05cbe2013-12-20 19:28:10 +01001487 }
Kevin Wolf76c591b2014-06-04 14:19:44 +02001488 } else if (!drv) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001489 error_setg(errp, "Must specify either driver or file");
1490 ret = -EINVAL;
Kevin Wolf8bfea152014-04-11 19:16:36 +02001491 goto fail;
Kevin Wolff500a6d2012-11-12 17:35:27 +01001492 }
1493
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001494 /* Open the image */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001495 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001496 if (ret < 0) {
Kevin Wolf8bfea152014-04-11 19:16:36 +02001497 goto fail;
Christoph Hellwig69873072010-01-20 18:13:25 +01001498 }
1499
Max Reitz2a05cbe2013-12-20 19:28:10 +01001500 if (file && (bs->file != file)) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001501 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001502 file = NULL;
1503 }
1504
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001505 /* If there is a backing file, use it */
Paolo Bonzini9156df12012-10-18 16:49:17 +02001506 if ((flags & BDRV_O_NO_BACKING) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001507 QDict *backing_options;
1508
Benoît Canet5726d872013-09-25 13:30:01 +02001509 qdict_extract_subqdict(options, &backing_options, "backing.");
Max Reitz34b5d2c2013-09-05 14:45:29 +02001510 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001511 if (ret < 0) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001512 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001513 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001514 }
1515
Max Reitz91af7012014-07-18 20:24:56 +02001516 bdrv_refresh_filename(bs);
1517
Kevin Wolfb9988752014-04-03 12:09:34 +02001518 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1519 * temporary snapshot afterwards. */
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001520 if (snapshot_flags) {
Chen Gang6b8aeca2014-06-23 23:28:23 +08001521 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
Kevin Wolfb9988752014-04-03 12:09:34 +02001522 if (local_err) {
Kevin Wolfb9988752014-04-03 12:09:34 +02001523 goto close_and_fail;
1524 }
1525 }
1526
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001527 /* Check if any unknown options were used */
Max Reitz5acd9d82014-02-18 18:33:11 +01001528 if (options && (qdict_size(options) != 0)) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001529 const QDictEntry *entry = qdict_first(options);
Max Reitz5acd9d82014-02-18 18:33:11 +01001530 if (flags & BDRV_O_PROTOCOL) {
1531 error_setg(errp, "Block protocol '%s' doesn't support the option "
1532 "'%s'", drv->format_name, entry->key);
1533 } else {
1534 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1535 "support the option '%s'", drv->format_name,
1536 bs->device_name, entry->key);
1537 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001538
1539 ret = -EINVAL;
1540 goto close_and_fail;
1541 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001542
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001543 if (!bdrv_key_required(bs)) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02001544 bdrv_dev_change_media_cb(bs, true);
Markus Armbrusterc3adb582014-03-14 09:22:48 +01001545 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1546 && !runstate_check(RUN_STATE_INMIGRATE)
1547 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1548 error_setg(errp,
1549 "Guest must be stopped for opening of encrypted image");
1550 ret = -EBUSY;
1551 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001552 }
1553
Markus Armbrusterc3adb582014-03-14 09:22:48 +01001554 QDECREF(options);
Max Reitzf67503e2014-02-18 18:33:05 +01001555 *pbs = bs;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001556 return 0;
1557
Kevin Wolf8bfea152014-04-11 19:16:36 +02001558fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +01001559 if (file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001560 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001561 }
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001562 QDECREF(bs->options);
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001563 QDECREF(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001564 bs->options = NULL;
Max Reitzf67503e2014-02-18 18:33:05 +01001565 if (!*pbs) {
1566 /* If *pbs is NULL, a new BDS has been created in this function and
1567 needs to be freed now. Otherwise, it does not need to be closed,
1568 since it has not really been opened yet. */
1569 bdrv_unref(bs);
1570 }
Markus Armbruster84d18f02014-01-30 15:07:28 +01001571 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001572 error_propagate(errp, local_err);
1573 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001574 return ret;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001575
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001576close_and_fail:
Max Reitzf67503e2014-02-18 18:33:05 +01001577 /* See fail path, but now the BDS has to be always closed */
1578 if (*pbs) {
1579 bdrv_close(bs);
1580 } else {
1581 bdrv_unref(bs);
1582 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001583 QDECREF(options);
Markus Armbruster84d18f02014-01-30 15:07:28 +01001584 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001585 error_propagate(errp, local_err);
1586 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001587 return ret;
1588}
1589
Jeff Codye971aa12012-09-20 15:13:19 -04001590typedef struct BlockReopenQueueEntry {
1591 bool prepared;
1592 BDRVReopenState state;
1593 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1594} BlockReopenQueueEntry;
1595
1596/*
1597 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1598 * reopen of multiple devices.
1599 *
1600 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1601 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1602 * be created and initialized. This newly created BlockReopenQueue should be
1603 * passed back in for subsequent calls that are intended to be of the same
1604 * atomic 'set'.
1605 *
1606 * bs is the BlockDriverState to add to the reopen queue.
1607 *
1608 * flags contains the open flags for the associated bs
1609 *
1610 * returns a pointer to bs_queue, which is either the newly allocated
1611 * bs_queue, or the existing bs_queue being used.
1612 *
1613 */
1614BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1615 BlockDriverState *bs, int flags)
1616{
1617 assert(bs != NULL);
1618
1619 BlockReopenQueueEntry *bs_entry;
1620 if (bs_queue == NULL) {
1621 bs_queue = g_new0(BlockReopenQueue, 1);
1622 QSIMPLEQ_INIT(bs_queue);
1623 }
1624
Kevin Wolff1f25a22014-04-25 19:04:55 +02001625 /* bdrv_open() masks this flag out */
1626 flags &= ~BDRV_O_PROTOCOL;
1627
Jeff Codye971aa12012-09-20 15:13:19 -04001628 if (bs->file) {
Kevin Wolff1f25a22014-04-25 19:04:55 +02001629 bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
Jeff Codye971aa12012-09-20 15:13:19 -04001630 }
1631
1632 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1633 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1634
1635 bs_entry->state.bs = bs;
1636 bs_entry->state.flags = flags;
1637
1638 return bs_queue;
1639}
1640
1641/*
1642 * Reopen multiple BlockDriverStates atomically & transactionally.
1643 *
1644 * The queue passed in (bs_queue) must have been built up previous
1645 * via bdrv_reopen_queue().
1646 *
1647 * Reopens all BDS specified in the queue, with the appropriate
1648 * flags. All devices are prepared for reopen, and failure of any
1649 * device will cause all device changes to be abandonded, and intermediate
1650 * data cleaned up.
1651 *
1652 * If all devices prepare successfully, then the changes are committed
1653 * to all devices.
1654 *
1655 */
1656int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1657{
1658 int ret = -1;
1659 BlockReopenQueueEntry *bs_entry, *next;
1660 Error *local_err = NULL;
1661
1662 assert(bs_queue != NULL);
1663
1664 bdrv_drain_all();
1665
1666 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1667 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1668 error_propagate(errp, local_err);
1669 goto cleanup;
1670 }
1671 bs_entry->prepared = true;
1672 }
1673
1674 /* If we reach this point, we have success and just need to apply the
1675 * changes
1676 */
1677 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1678 bdrv_reopen_commit(&bs_entry->state);
1679 }
1680
1681 ret = 0;
1682
1683cleanup:
1684 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1685 if (ret && bs_entry->prepared) {
1686 bdrv_reopen_abort(&bs_entry->state);
1687 }
1688 g_free(bs_entry);
1689 }
1690 g_free(bs_queue);
1691 return ret;
1692}
1693
1694
1695/* Reopen a single BlockDriverState with the specified flags. */
1696int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1697{
1698 int ret = -1;
1699 Error *local_err = NULL;
1700 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1701
1702 ret = bdrv_reopen_multiple(queue, &local_err);
1703 if (local_err != NULL) {
1704 error_propagate(errp, local_err);
1705 }
1706 return ret;
1707}
1708
1709
1710/*
1711 * Prepares a BlockDriverState for reopen. All changes are staged in the
1712 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1713 * the block driver layer .bdrv_reopen_prepare()
1714 *
1715 * bs is the BlockDriverState to reopen
1716 * flags are the new open flags
1717 * queue is the reopen queue
1718 *
1719 * Returns 0 on success, non-zero on error. On error errp will be set
1720 * as well.
1721 *
1722 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1723 * It is the responsibility of the caller to then call the abort() or
1724 * commit() for any other BDS that have been left in a prepare() state
1725 *
1726 */
1727int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1728 Error **errp)
1729{
1730 int ret = -1;
1731 Error *local_err = NULL;
1732 BlockDriver *drv;
1733
1734 assert(reopen_state != NULL);
1735 assert(reopen_state->bs->drv != NULL);
1736 drv = reopen_state->bs->drv;
1737
1738 /* if we are to stay read-only, do not allow permission change
1739 * to r/w */
1740 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1741 reopen_state->flags & BDRV_O_RDWR) {
1742 error_set(errp, QERR_DEVICE_IS_READ_ONLY,
1743 reopen_state->bs->device_name);
1744 goto error;
1745 }
1746
1747
1748 ret = bdrv_flush(reopen_state->bs);
1749 if (ret) {
1750 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1751 strerror(-ret));
1752 goto error;
1753 }
1754
1755 if (drv->bdrv_reopen_prepare) {
1756 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1757 if (ret) {
1758 if (local_err != NULL) {
1759 error_propagate(errp, local_err);
1760 } else {
Luiz Capitulinod8b68952013-06-10 11:29:27 -04001761 error_setg(errp, "failed while preparing to reopen image '%s'",
1762 reopen_state->bs->filename);
Jeff Codye971aa12012-09-20 15:13:19 -04001763 }
1764 goto error;
1765 }
1766 } else {
1767 /* It is currently mandatory to have a bdrv_reopen_prepare()
1768 * handler for each supported drv. */
1769 error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
1770 drv->format_name, reopen_state->bs->device_name,
1771 "reopening of file");
1772 ret = -1;
1773 goto error;
1774 }
1775
1776 ret = 0;
1777
1778error:
1779 return ret;
1780}
1781
1782/*
1783 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1784 * makes them final by swapping the staging BlockDriverState contents into
1785 * the active BlockDriverState contents.
1786 */
1787void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1788{
1789 BlockDriver *drv;
1790
1791 assert(reopen_state != NULL);
1792 drv = reopen_state->bs->drv;
1793 assert(drv != NULL);
1794
1795 /* If there are any driver level actions to take */
1796 if (drv->bdrv_reopen_commit) {
1797 drv->bdrv_reopen_commit(reopen_state);
1798 }
1799
1800 /* set BDS specific flags now */
1801 reopen_state->bs->open_flags = reopen_state->flags;
1802 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1803 BDRV_O_CACHE_WB);
1804 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
Kevin Wolf355ef4a2013-12-11 20:14:09 +01001805
Kevin Wolf3baca892014-07-16 17:48:16 +02001806 bdrv_refresh_limits(reopen_state->bs, NULL);
Jeff Codye971aa12012-09-20 15:13:19 -04001807}
1808
1809/*
1810 * Abort the reopen, and delete and free the staged changes in
1811 * reopen_state
1812 */
1813void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1814{
1815 BlockDriver *drv;
1816
1817 assert(reopen_state != NULL);
1818 drv = reopen_state->bs->drv;
1819 assert(drv != NULL);
1820
1821 if (drv->bdrv_reopen_abort) {
1822 drv->bdrv_reopen_abort(reopen_state);
1823 }
1824}
1825
1826
bellardfc01f7e2003-06-30 10:03:06 +00001827void bdrv_close(BlockDriverState *bs)
1828{
Max Reitz33384422014-06-20 21:57:33 +02001829 BdrvAioNotifier *ban, *ban_next;
1830
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001831 if (bs->job) {
1832 block_job_cancel_sync(bs->job);
1833 }
Stefan Hajnoczi58fda172013-07-02 15:36:25 +02001834 bdrv_drain_all(); /* complete I/O */
1835 bdrv_flush(bs);
1836 bdrv_drain_all(); /* in case flush left pending I/O */
Paolo Bonzinid7d512f2012-08-23 11:20:36 +02001837 notifier_list_notify(&bs->close_notifiers, bs);
Kevin Wolf7094f122012-04-11 11:06:37 +02001838
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001839 if (bs->drv) {
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001840 if (bs->backing_hd) {
Fam Zheng826b6ca2014-05-23 21:29:47 +08001841 BlockDriverState *backing_hd = bs->backing_hd;
1842 bdrv_set_backing_hd(bs, NULL);
1843 bdrv_unref(backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001844 }
bellardea2384d2004-08-01 21:59:26 +00001845 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -05001846 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +00001847 bs->opaque = NULL;
1848 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001849 bs->copy_on_read = 0;
Paolo Bonzinia275fa42012-05-08 16:51:43 +02001850 bs->backing_file[0] = '\0';
1851 bs->backing_format[0] = '\0';
Paolo Bonzini64058752012-05-08 16:51:49 +02001852 bs->total_sectors = 0;
1853 bs->encrypted = 0;
1854 bs->valid_key = 0;
1855 bs->sg = 0;
1856 bs->growable = 0;
Asias He0d51b4d2013-08-22 15:24:14 +08001857 bs->zero_beyond_eof = false;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001858 QDECREF(bs->options);
1859 bs->options = NULL;
Max Reitz91af7012014-07-18 20:24:56 +02001860 QDECREF(bs->full_open_options);
1861 bs->full_open_options = NULL;
bellardb3380822004-03-14 21:38:54 +00001862
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001863 if (bs->file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001864 bdrv_unref(bs->file);
Paolo Bonzini0ac93772012-05-08 16:51:44 +02001865 bs->file = NULL;
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001866 }
bellardb3380822004-03-14 21:38:54 +00001867 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001868
Pavel Hrdina9ca11152012-08-09 12:44:48 +02001869 bdrv_dev_change_media_cb(bs, false);
1870
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001871 /*throttling disk I/O limits*/
1872 if (bs->io_limits_enabled) {
1873 bdrv_io_limits_disable(bs);
1874 }
Max Reitz33384422014-06-20 21:57:33 +02001875
1876 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1877 g_free(ban);
1878 }
1879 QLIST_INIT(&bs->aio_notifiers);
bellardb3380822004-03-14 21:38:54 +00001880}
1881
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001882void bdrv_close_all(void)
1883{
1884 BlockDriverState *bs;
1885
Benoît Canetdc364f42014-01-23 21:31:32 +01001886 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02001887 AioContext *aio_context = bdrv_get_aio_context(bs);
1888
1889 aio_context_acquire(aio_context);
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001890 bdrv_close(bs);
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02001891 aio_context_release(aio_context);
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001892 }
1893}
1894
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001895/* Check if any requests are in-flight (including throttled requests) */
1896static bool bdrv_requests_pending(BlockDriverState *bs)
1897{
1898 if (!QLIST_EMPTY(&bs->tracked_requests)) {
1899 return true;
1900 }
Benoît Canetcc0681c2013-09-02 14:14:39 +02001901 if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
1902 return true;
1903 }
1904 if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001905 return true;
1906 }
1907 if (bs->file && bdrv_requests_pending(bs->file)) {
1908 return true;
1909 }
1910 if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
1911 return true;
1912 }
1913 return false;
1914}
1915
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001916/*
1917 * Wait for pending requests to complete across all BlockDriverStates
1918 *
1919 * This function does not flush data to disk, use bdrv_flush_all() for that
1920 * after calling this function.
Zhi Yong Wu4c355d52012-04-12 14:00:57 +02001921 *
1922 * Note that completion of an asynchronous I/O operation can trigger any
1923 * number of other I/O operations on other devices---for example a coroutine
1924 * can be arbitrarily complex and a constant flow of I/O can come until the
1925 * coroutine is complete. Because of this, it is not possible to have a
1926 * function to drain a single device's I/O queue.
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001927 */
1928void bdrv_drain_all(void)
1929{
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001930 /* Always run first iteration so any pending completion BHs run */
1931 bool busy = true;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001932 BlockDriverState *bs;
1933
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001934 while (busy) {
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02001935 busy = false;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001936
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02001937 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1938 AioContext *aio_context = bdrv_get_aio_context(bs);
1939 bool bs_busy;
1940
1941 aio_context_acquire(aio_context);
Ming Lei448ad912014-07-04 18:04:33 +08001942 bdrv_flush_io_queue(bs);
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02001943 bdrv_start_throttled_reqs(bs);
1944 bs_busy = bdrv_requests_pending(bs);
1945 bs_busy |= aio_poll(aio_context, bs_busy);
1946 aio_context_release(aio_context);
1947
1948 busy |= bs_busy;
1949 }
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001950 }
1951}
1952
Benoît Canetdc364f42014-01-23 21:31:32 +01001953/* make a BlockDriverState anonymous by removing from bdrv_state and
1954 * graph_bdrv_state list.
Ryan Harperd22b2f42011-03-29 20:51:47 -05001955 Also, NULL terminate the device_name to prevent double remove */
1956void bdrv_make_anon(BlockDriverState *bs)
1957{
1958 if (bs->device_name[0] != '\0') {
Benoît Canetdc364f42014-01-23 21:31:32 +01001959 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
Ryan Harperd22b2f42011-03-29 20:51:47 -05001960 }
1961 bs->device_name[0] = '\0';
Benoît Canetdc364f42014-01-23 21:31:32 +01001962 if (bs->node_name[0] != '\0') {
1963 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1964 }
1965 bs->node_name[0] = '\0';
Ryan Harperd22b2f42011-03-29 20:51:47 -05001966}
1967
Paolo Bonzinie023b2e2012-05-08 16:51:41 +02001968static void bdrv_rebind(BlockDriverState *bs)
1969{
1970 if (bs->drv && bs->drv->bdrv_rebind) {
1971 bs->drv->bdrv_rebind(bs);
1972 }
1973}
1974
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001975static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1976 BlockDriverState *bs_src)
1977{
1978 /* move some fields that need to stay attached to the device */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001979
1980 /* dev info */
1981 bs_dest->dev_ops = bs_src->dev_ops;
1982 bs_dest->dev_opaque = bs_src->dev_opaque;
1983 bs_dest->dev = bs_src->dev;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01001984 bs_dest->guest_block_size = bs_src->guest_block_size;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001985 bs_dest->copy_on_read = bs_src->copy_on_read;
1986
1987 bs_dest->enable_write_cache = bs_src->enable_write_cache;
1988
Benoît Canetcc0681c2013-09-02 14:14:39 +02001989 /* i/o throttled req */
1990 memcpy(&bs_dest->throttle_state,
1991 &bs_src->throttle_state,
1992 sizeof(ThrottleState));
1993 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
1994 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001995 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
1996
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001997 /* r/w error */
1998 bs_dest->on_read_error = bs_src->on_read_error;
1999 bs_dest->on_write_error = bs_src->on_write_error;
2000
2001 /* i/o status */
2002 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
2003 bs_dest->iostatus = bs_src->iostatus;
2004
2005 /* dirty bitmap */
Fam Zhenge4654d22013-11-13 18:29:43 +08002006 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002007
Fam Zheng9fcb0252013-08-23 09:14:46 +08002008 /* reference count */
2009 bs_dest->refcnt = bs_src->refcnt;
2010
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002011 /* job */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002012 bs_dest->job = bs_src->job;
2013
2014 /* keep the same entry in bdrv_states */
2015 pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name),
2016 bs_src->device_name);
Benoît Canetdc364f42014-01-23 21:31:32 +01002017 bs_dest->device_list = bs_src->device_list;
Fam Zhengfbe40ff2014-05-23 21:29:42 +08002018 memcpy(bs_dest->op_blockers, bs_src->op_blockers,
2019 sizeof(bs_dest->op_blockers));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002020}
2021
2022/*
2023 * Swap bs contents for two image chains while they are live,
2024 * while keeping required fields on the BlockDriverState that is
2025 * actually attached to a device.
2026 *
2027 * This will modify the BlockDriverState fields, and swap contents
2028 * between bs_new and bs_old. Both bs_new and bs_old are modified.
2029 *
2030 * bs_new is required to be anonymous.
2031 *
2032 * This function does not create any image files.
2033 */
2034void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
2035{
2036 BlockDriverState tmp;
2037
Benoît Canet90ce8a02014-03-05 23:48:29 +01002038 /* The code needs to swap the node_name but simply swapping node_list won't
2039 * work so first remove the nodes from the graph list, do the swap then
2040 * insert them back if needed.
2041 */
2042 if (bs_new->node_name[0] != '\0') {
2043 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
2044 }
2045 if (bs_old->node_name[0] != '\0') {
2046 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
2047 }
2048
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002049 /* bs_new must be anonymous and shouldn't have anything fancy enabled */
2050 assert(bs_new->device_name[0] == '\0');
Fam Zhenge4654d22013-11-13 18:29:43 +08002051 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002052 assert(bs_new->job == NULL);
2053 assert(bs_new->dev == NULL);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002054 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02002055 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002056
2057 tmp = *bs_new;
2058 *bs_new = *bs_old;
2059 *bs_old = tmp;
2060
2061 /* there are some fields that should not be swapped, move them back */
2062 bdrv_move_feature_fields(&tmp, bs_old);
2063 bdrv_move_feature_fields(bs_old, bs_new);
2064 bdrv_move_feature_fields(bs_new, &tmp);
2065
2066 /* bs_new shouldn't be in bdrv_states even after the swap! */
2067 assert(bs_new->device_name[0] == '\0');
2068
2069 /* Check a few fields that should remain attached to the device */
2070 assert(bs_new->dev == NULL);
2071 assert(bs_new->job == NULL);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002072 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02002073 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002074
Benoît Canet90ce8a02014-03-05 23:48:29 +01002075 /* insert the nodes back into the graph node list if needed */
2076 if (bs_new->node_name[0] != '\0') {
2077 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
2078 }
2079 if (bs_old->node_name[0] != '\0') {
2080 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
2081 }
2082
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002083 bdrv_rebind(bs_new);
2084 bdrv_rebind(bs_old);
2085}
2086
Jeff Cody8802d1f2012-02-28 15:54:06 -05002087/*
2088 * Add new bs contents at the top of an image chain while the chain is
2089 * live, while keeping required fields on the top layer.
2090 *
2091 * This will modify the BlockDriverState fields, and swap contents
2092 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2093 *
Jeff Codyf6801b82012-03-27 16:30:19 -04002094 * bs_new is required to be anonymous.
2095 *
Jeff Cody8802d1f2012-02-28 15:54:06 -05002096 * This function does not create any image files.
2097 */
2098void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2099{
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002100 bdrv_swap(bs_new, bs_top);
Jeff Cody8802d1f2012-02-28 15:54:06 -05002101
2102 /* The contents of 'tmp' will become bs_top, as we are
2103 * swapping bs_new and bs_top contents. */
Fam Zheng8d24cce2014-05-23 21:29:45 +08002104 bdrv_set_backing_hd(bs_top, bs_new);
Jeff Cody8802d1f2012-02-28 15:54:06 -05002105}
2106
Fam Zheng4f6fd342013-08-23 09:14:47 +08002107static void bdrv_delete(BlockDriverState *bs)
bellardb3380822004-03-14 21:38:54 +00002108{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002109 assert(!bs->dev);
Paolo Bonzini3e914652012-03-30 13:17:11 +02002110 assert(!bs->job);
Fam Zheng3718d8a2014-05-23 21:29:43 +08002111 assert(bdrv_op_blocker_is_empty(bs));
Fam Zheng4f6fd342013-08-23 09:14:47 +08002112 assert(!bs->refcnt);
Fam Zhenge4654d22013-11-13 18:29:43 +08002113 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Markus Armbruster18846de2010-06-29 16:58:30 +02002114
Stefan Hajnoczie1b5c522013-06-27 15:32:26 +02002115 bdrv_close(bs);
2116
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002117 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -05002118 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +00002119
Markus Armbruster3ae59582014-09-12 21:26:22 +02002120 drive_info_del(drive_get_by_blockdev(bs));
Anthony Liguori7267c092011-08-20 22:09:37 -05002121 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +00002122}
2123
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002124int bdrv_attach_dev(BlockDriverState *bs, void *dev)
2125/* TODO change to DeviceState *dev when all users are qdevified */
Markus Armbruster18846de2010-06-29 16:58:30 +02002126{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002127 if (bs->dev) {
Markus Armbruster18846de2010-06-29 16:58:30 +02002128 return -EBUSY;
2129 }
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002130 bs->dev = dev;
Luiz Capitulino28a72822011-09-26 17:43:50 -03002131 bdrv_iostatus_reset(bs);
Stefan Hajnoczi2a871512014-07-07 15:15:53 +02002132
2133 /* We're expecting I/O from the device so bump up coroutine pool size */
2134 qemu_coroutine_adjust_pool_size(COROUTINE_POOL_RESERVATION);
Markus Armbruster18846de2010-06-29 16:58:30 +02002135 return 0;
2136}
2137
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002138/* TODO qdevified devices don't use this, remove when devices are qdevified */
2139void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
Markus Armbruster18846de2010-06-29 16:58:30 +02002140{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002141 if (bdrv_attach_dev(bs, dev) < 0) {
2142 abort();
2143 }
2144}
2145
2146void bdrv_detach_dev(BlockDriverState *bs, void *dev)
2147/* TODO change to DeviceState *dev when all users are qdevified */
2148{
2149 assert(bs->dev == dev);
2150 bs->dev = NULL;
Markus Armbruster0e49de52011-08-03 15:07:41 +02002151 bs->dev_ops = NULL;
2152 bs->dev_opaque = NULL;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01002153 bs->guest_block_size = 512;
Stefan Hajnoczi2a871512014-07-07 15:15:53 +02002154 qemu_coroutine_adjust_pool_size(-COROUTINE_POOL_RESERVATION);
Markus Armbruster18846de2010-06-29 16:58:30 +02002155}
2156
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002157/* TODO change to return DeviceState * when all users are qdevified */
2158void *bdrv_get_attached_dev(BlockDriverState *bs)
Markus Armbruster18846de2010-06-29 16:58:30 +02002159{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002160 return bs->dev;
Markus Armbruster18846de2010-06-29 16:58:30 +02002161}
2162
Markus Armbruster0e49de52011-08-03 15:07:41 +02002163void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
2164 void *opaque)
2165{
2166 bs->dev_ops = ops;
2167 bs->dev_opaque = opaque;
2168}
2169
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002170static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
Markus Armbruster0e49de52011-08-03 15:07:41 +02002171{
Markus Armbruster145feb12011-08-03 15:07:42 +02002172 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02002173 bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002174 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02002175 if (tray_was_closed) {
2176 /* tray open */
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +02002177 qapi_event_send_device_tray_moved(bdrv_get_device_name(bs),
2178 true, &error_abort);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02002179 }
2180 if (load) {
2181 /* tray close */
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +02002182 qapi_event_send_device_tray_moved(bdrv_get_device_name(bs),
2183 false, &error_abort);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02002184 }
Markus Armbruster145feb12011-08-03 15:07:42 +02002185 }
2186}
2187
Markus Armbruster2c6942f2011-09-06 18:58:51 +02002188bool bdrv_dev_has_removable_media(BlockDriverState *bs)
2189{
2190 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
2191}
2192
Paolo Bonzini025ccaa2011-11-07 17:50:13 +01002193void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
2194{
2195 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
2196 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
2197 }
2198}
2199
Markus Armbrustere4def802011-09-06 18:58:53 +02002200bool bdrv_dev_is_tray_open(BlockDriverState *bs)
2201{
2202 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
2203 return bs->dev_ops->is_tray_open(bs->dev_opaque);
2204 }
2205 return false;
2206}
2207
Markus Armbruster145feb12011-08-03 15:07:42 +02002208static void bdrv_dev_resize_cb(BlockDriverState *bs)
2209{
2210 if (bs->dev_ops && bs->dev_ops->resize_cb) {
2211 bs->dev_ops->resize_cb(bs->dev_opaque);
Markus Armbruster0e49de52011-08-03 15:07:41 +02002212 }
2213}
2214
Markus Armbrusterf1076392011-09-06 18:58:46 +02002215bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
2216{
2217 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
2218 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
2219 }
2220 return false;
2221}
2222
aliguorie97fc192009-04-21 23:11:50 +00002223/*
2224 * Run consistency checks on an image
2225 *
Kevin Wolfe076f332010-06-29 11:43:13 +02002226 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +02002227 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +02002228 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +00002229 */
Kevin Wolf4534ff52012-05-11 16:07:02 +02002230int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
aliguorie97fc192009-04-21 23:11:50 +00002231{
Max Reitz908bcd52014-08-07 22:47:55 +02002232 if (bs->drv == NULL) {
2233 return -ENOMEDIUM;
2234 }
aliguorie97fc192009-04-21 23:11:50 +00002235 if (bs->drv->bdrv_check == NULL) {
2236 return -ENOTSUP;
2237 }
2238
Kevin Wolfe076f332010-06-29 11:43:13 +02002239 memset(res, 0, sizeof(*res));
Kevin Wolf4534ff52012-05-11 16:07:02 +02002240 return bs->drv->bdrv_check(bs, res, fix);
aliguorie97fc192009-04-21 23:11:50 +00002241}
2242
Kevin Wolf8a426612010-07-16 17:17:01 +02002243#define COMMIT_BUF_SECTORS 2048
2244
bellard33e39632003-07-06 17:15:21 +00002245/* commit COW file into the raw image */
2246int bdrv_commit(BlockDriverState *bs)
2247{
bellard19cb3732006-08-19 11:45:59 +00002248 BlockDriver *drv = bs->drv;
Jeff Cody72706ea2014-01-24 09:02:35 -05002249 int64_t sector, total_sectors, length, backing_length;
Kevin Wolf8a426612010-07-16 17:17:01 +02002250 int n, ro, open_flags;
Jeff Cody0bce5972012-09-20 15:13:34 -04002251 int ret = 0;
Jeff Cody72706ea2014-01-24 09:02:35 -05002252 uint8_t *buf = NULL;
Jim Meyeringc2cba3d2012-10-04 13:09:46 +02002253 char filename[PATH_MAX];
bellard33e39632003-07-06 17:15:21 +00002254
bellard19cb3732006-08-19 11:45:59 +00002255 if (!drv)
2256 return -ENOMEDIUM;
Liu Yuan6bb45152014-09-01 13:35:21 +08002257
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002258 if (!bs->backing_hd) {
2259 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +00002260 }
2261
Fam Zheng3718d8a2014-05-23 21:29:43 +08002262 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, NULL) ||
2263 bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, NULL)) {
Stefan Hajnoczi2d3735d2012-01-18 14:40:41 +00002264 return -EBUSY;
2265 }
2266
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002267 ro = bs->backing_hd->read_only;
Jim Meyeringc2cba3d2012-10-04 13:09:46 +02002268 /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */
2269 pstrcpy(filename, sizeof(filename), bs->backing_hd->filename);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002270 open_flags = bs->backing_hd->open_flags;
2271
2272 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002273 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2274 return -EACCES;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002275 }
bellard33e39632003-07-06 17:15:21 +00002276 }
bellardea2384d2004-08-01 21:59:26 +00002277
Jeff Cody72706ea2014-01-24 09:02:35 -05002278 length = bdrv_getlength(bs);
2279 if (length < 0) {
2280 ret = length;
2281 goto ro_cleanup;
2282 }
2283
2284 backing_length = bdrv_getlength(bs->backing_hd);
2285 if (backing_length < 0) {
2286 ret = backing_length;
2287 goto ro_cleanup;
2288 }
2289
2290 /* If our top snapshot is larger than the backing file image,
2291 * grow the backing file image if possible. If not possible,
2292 * we must return an error */
2293 if (length > backing_length) {
2294 ret = bdrv_truncate(bs->backing_hd, length);
2295 if (ret < 0) {
2296 goto ro_cleanup;
2297 }
2298 }
2299
2300 total_sectors = length >> BDRV_SECTOR_BITS;
Kevin Wolf857d4f42014-05-20 13:16:51 +02002301
2302 /* qemu_try_blockalign() for bs will choose an alignment that works for
2303 * bs->backing_hd as well, so no need to compare the alignment manually. */
2304 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2305 if (buf == NULL) {
2306 ret = -ENOMEM;
2307 goto ro_cleanup;
2308 }
bellardea2384d2004-08-01 21:59:26 +00002309
Kevin Wolf8a426612010-07-16 17:17:01 +02002310 for (sector = 0; sector < total_sectors; sector += n) {
Paolo Bonzinid6636402013-09-04 19:00:25 +02002311 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2312 if (ret < 0) {
2313 goto ro_cleanup;
2314 }
2315 if (ret) {
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002316 ret = bdrv_read(bs, sector, buf, n);
2317 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002318 goto ro_cleanup;
2319 }
2320
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002321 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2322 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002323 goto ro_cleanup;
2324 }
bellardea2384d2004-08-01 21:59:26 +00002325 }
2326 }
bellard95389c82005-12-18 18:28:15 +00002327
Christoph Hellwig1d449522010-01-17 12:32:30 +01002328 if (drv->bdrv_make_empty) {
2329 ret = drv->bdrv_make_empty(bs);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002330 if (ret < 0) {
2331 goto ro_cleanup;
2332 }
Christoph Hellwig1d449522010-01-17 12:32:30 +01002333 bdrv_flush(bs);
2334 }
bellard95389c82005-12-18 18:28:15 +00002335
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002336 /*
2337 * Make sure all data we wrote to the backing device is actually
2338 * stable on disk.
2339 */
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002340 if (bs->backing_hd) {
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002341 bdrv_flush(bs->backing_hd);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002342 }
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002343
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002344 ret = 0;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002345ro_cleanup:
Kevin Wolf857d4f42014-05-20 13:16:51 +02002346 qemu_vfree(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002347
2348 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002349 /* ignoring error return here */
2350 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002351 }
2352
Christoph Hellwig1d449522010-01-17 12:32:30 +01002353 return ret;
bellard33e39632003-07-06 17:15:21 +00002354}
2355
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002356int bdrv_commit_all(void)
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002357{
2358 BlockDriverState *bs;
2359
Benoît Canetdc364f42014-01-23 21:31:32 +01002360 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002361 AioContext *aio_context = bdrv_get_aio_context(bs);
2362
2363 aio_context_acquire(aio_context);
Jeff Cody272d2d82013-02-26 09:55:48 -05002364 if (bs->drv && bs->backing_hd) {
2365 int ret = bdrv_commit(bs);
2366 if (ret < 0) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002367 aio_context_release(aio_context);
Jeff Cody272d2d82013-02-26 09:55:48 -05002368 return ret;
2369 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002370 }
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002371 aio_context_release(aio_context);
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002372 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002373 return 0;
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002374}
2375
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002376/**
2377 * Remove an active request from the tracked requests list
2378 *
2379 * This function should be called when a tracked request is completing.
2380 */
2381static void tracked_request_end(BdrvTrackedRequest *req)
2382{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002383 if (req->serialising) {
2384 req->bs->serialising_in_flight--;
2385 }
2386
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002387 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002388 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002389}
2390
2391/**
2392 * Add an active request to the tracked requests list
2393 */
2394static void tracked_request_begin(BdrvTrackedRequest *req,
2395 BlockDriverState *bs,
Kevin Wolf793ed472013-12-03 15:31:25 +01002396 int64_t offset,
2397 unsigned int bytes, bool is_write)
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002398{
2399 *req = (BdrvTrackedRequest){
2400 .bs = bs,
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002401 .offset = offset,
2402 .bytes = bytes,
2403 .is_write = is_write,
2404 .co = qemu_coroutine_self(),
2405 .serialising = false,
Kevin Wolf73271452013-12-04 17:08:50 +01002406 .overlap_offset = offset,
2407 .overlap_bytes = bytes,
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002408 };
2409
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002410 qemu_co_queue_init(&req->wait_queue);
2411
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002412 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
2413}
2414
Kevin Wolfe96126f2014-02-08 10:42:18 +01002415static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002416{
Kevin Wolf73271452013-12-04 17:08:50 +01002417 int64_t overlap_offset = req->offset & ~(align - 1);
Kevin Wolfe96126f2014-02-08 10:42:18 +01002418 unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
2419 - overlap_offset;
Kevin Wolf73271452013-12-04 17:08:50 +01002420
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002421 if (!req->serialising) {
2422 req->bs->serialising_in_flight++;
2423 req->serialising = true;
2424 }
Kevin Wolf73271452013-12-04 17:08:50 +01002425
2426 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
2427 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002428}
2429
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002430/**
2431 * Round a region to cluster boundaries
2432 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01002433void bdrv_round_to_clusters(BlockDriverState *bs,
2434 int64_t sector_num, int nb_sectors,
2435 int64_t *cluster_sector_num,
2436 int *cluster_nb_sectors)
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002437{
2438 BlockDriverInfo bdi;
2439
2440 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
2441 *cluster_sector_num = sector_num;
2442 *cluster_nb_sectors = nb_sectors;
2443 } else {
2444 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
2445 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
2446 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
2447 nb_sectors, c);
2448 }
2449}
2450
Kevin Wolf73271452013-12-04 17:08:50 +01002451static int bdrv_get_cluster_size(BlockDriverState *bs)
Kevin Wolf793ed472013-12-03 15:31:25 +01002452{
2453 BlockDriverInfo bdi;
Kevin Wolf73271452013-12-04 17:08:50 +01002454 int ret;
Kevin Wolf793ed472013-12-03 15:31:25 +01002455
Kevin Wolf73271452013-12-04 17:08:50 +01002456 ret = bdrv_get_info(bs, &bdi);
2457 if (ret < 0 || bdi.cluster_size == 0) {
2458 return bs->request_alignment;
Kevin Wolf793ed472013-12-03 15:31:25 +01002459 } else {
Kevin Wolf73271452013-12-04 17:08:50 +01002460 return bdi.cluster_size;
Kevin Wolf793ed472013-12-03 15:31:25 +01002461 }
2462}
2463
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002464static bool tracked_request_overlaps(BdrvTrackedRequest *req,
Kevin Wolf793ed472013-12-03 15:31:25 +01002465 int64_t offset, unsigned int bytes)
2466{
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002467 /* aaaa bbbb */
Kevin Wolf73271452013-12-04 17:08:50 +01002468 if (offset >= req->overlap_offset + req->overlap_bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002469 return false;
2470 }
2471 /* bbbb aaaa */
Kevin Wolf73271452013-12-04 17:08:50 +01002472 if (req->overlap_offset >= offset + bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002473 return false;
2474 }
2475 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002476}
2477
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002478static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002479{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002480 BlockDriverState *bs = self->bs;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002481 BdrvTrackedRequest *req;
2482 bool retry;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002483 bool waited = false;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002484
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002485 if (!bs->serialising_in_flight) {
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002486 return false;
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002487 }
2488
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002489 do {
2490 retry = false;
2491 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002492 if (req == self || (!req->serialising && !self->serialising)) {
Kevin Wolf65afd212013-12-03 14:55:55 +01002493 continue;
2494 }
Kevin Wolf73271452013-12-04 17:08:50 +01002495 if (tracked_request_overlaps(req, self->overlap_offset,
2496 self->overlap_bytes))
2497 {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00002498 /* Hitting this means there was a reentrant request, for
2499 * example, a block driver issuing nested requests. This must
2500 * never happen since it means deadlock.
2501 */
2502 assert(qemu_coroutine_self() != req->co);
2503
Kevin Wolf64604402013-12-13 13:04:35 +01002504 /* If the request is already (indirectly) waiting for us, or
2505 * will wait for us as soon as it wakes up, then just go on
2506 * (instead of producing a deadlock in the former case). */
2507 if (!req->waiting_for) {
2508 self->waiting_for = req;
2509 qemu_co_queue_wait(&req->wait_queue);
2510 self->waiting_for = NULL;
2511 retry = true;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002512 waited = true;
Kevin Wolf64604402013-12-13 13:04:35 +01002513 break;
2514 }
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002515 }
2516 }
2517 } while (retry);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002518
2519 return waited;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002520}
2521
Kevin Wolf756e6732010-01-12 12:55:17 +01002522/*
2523 * Return values:
2524 * 0 - success
2525 * -EINVAL - backing format specified, but no file
2526 * -ENOSPC - can't update the backing file because no space is left in the
2527 * image file header
2528 * -ENOTSUP - format driver doesn't support changing the backing file
2529 */
2530int bdrv_change_backing_file(BlockDriverState *bs,
2531 const char *backing_file, const char *backing_fmt)
2532{
2533 BlockDriver *drv = bs->drv;
Paolo Bonzini469ef352012-04-12 14:01:02 +02002534 int ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002535
Paolo Bonzini5f377792012-04-12 14:01:01 +02002536 /* Backing file format doesn't make sense without a backing file */
2537 if (backing_fmt && !backing_file) {
2538 return -EINVAL;
2539 }
2540
Kevin Wolf756e6732010-01-12 12:55:17 +01002541 if (drv->bdrv_change_backing_file != NULL) {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002542 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
Kevin Wolf756e6732010-01-12 12:55:17 +01002543 } else {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002544 ret = -ENOTSUP;
Kevin Wolf756e6732010-01-12 12:55:17 +01002545 }
Paolo Bonzini469ef352012-04-12 14:01:02 +02002546
2547 if (ret == 0) {
2548 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2549 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2550 }
2551 return ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002552}
2553
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002554/*
2555 * Finds the image layer in the chain that has 'bs' as its backing file.
2556 *
2557 * active is the current topmost image.
2558 *
2559 * Returns NULL if bs is not found in active's image chain,
2560 * or if active == bs.
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002561 *
2562 * Returns the bottommost base image if bs == NULL.
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002563 */
2564BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2565 BlockDriverState *bs)
2566{
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002567 while (active && bs != active->backing_hd) {
2568 active = active->backing_hd;
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002569 }
2570
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002571 return active;
2572}
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002573
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002574/* Given a BDS, searches for the base layer. */
2575BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2576{
2577 return bdrv_find_overlay(bs, NULL);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002578}
2579
2580typedef struct BlkIntermediateStates {
2581 BlockDriverState *bs;
2582 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2583} BlkIntermediateStates;
2584
2585
2586/*
2587 * Drops images above 'base' up to and including 'top', and sets the image
2588 * above 'top' to have base as its backing file.
2589 *
2590 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2591 * information in 'bs' can be properly updated.
2592 *
2593 * E.g., this will convert the following chain:
2594 * bottom <- base <- intermediate <- top <- active
2595 *
2596 * to
2597 *
2598 * bottom <- base <- active
2599 *
2600 * It is allowed for bottom==base, in which case it converts:
2601 *
2602 * base <- intermediate <- top <- active
2603 *
2604 * to
2605 *
2606 * base <- active
2607 *
Jeff Cody54e26902014-06-25 15:40:10 -04002608 * If backing_file_str is non-NULL, it will be used when modifying top's
2609 * overlay image metadata.
2610 *
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002611 * Error conditions:
2612 * if active == top, that is considered an error
2613 *
2614 */
2615int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
Jeff Cody54e26902014-06-25 15:40:10 -04002616 BlockDriverState *base, const char *backing_file_str)
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002617{
2618 BlockDriverState *intermediate;
2619 BlockDriverState *base_bs = NULL;
2620 BlockDriverState *new_top_bs = NULL;
2621 BlkIntermediateStates *intermediate_state, *next;
2622 int ret = -EIO;
2623
2624 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2625 QSIMPLEQ_INIT(&states_to_delete);
2626
2627 if (!top->drv || !base->drv) {
2628 goto exit;
2629 }
2630
2631 new_top_bs = bdrv_find_overlay(active, top);
2632
2633 if (new_top_bs == NULL) {
2634 /* we could not find the image above 'top', this is an error */
2635 goto exit;
2636 }
2637
2638 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2639 * to do, no intermediate images */
2640 if (new_top_bs->backing_hd == base) {
2641 ret = 0;
2642 goto exit;
2643 }
2644
2645 intermediate = top;
2646
2647 /* now we will go down through the list, and add each BDS we find
2648 * into our deletion queue, until we hit the 'base'
2649 */
2650 while (intermediate) {
Markus Armbruster5839e532014-08-19 10:31:08 +02002651 intermediate_state = g_new0(BlkIntermediateStates, 1);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002652 intermediate_state->bs = intermediate;
2653 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2654
2655 if (intermediate->backing_hd == base) {
2656 base_bs = intermediate->backing_hd;
2657 break;
2658 }
2659 intermediate = intermediate->backing_hd;
2660 }
2661 if (base_bs == NULL) {
2662 /* something went wrong, we did not end at the base. safely
2663 * unravel everything, and exit with error */
2664 goto exit;
2665 }
2666
2667 /* success - we can delete the intermediate states, and link top->base */
Jeff Cody54e26902014-06-25 15:40:10 -04002668 backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2669 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002670 base_bs->drv ? base_bs->drv->format_name : "");
2671 if (ret) {
2672 goto exit;
2673 }
Fam Zheng920beae2014-05-23 21:29:46 +08002674 bdrv_set_backing_hd(new_top_bs, base_bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002675
2676 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2677 /* so that bdrv_close() does not recursively close the chain */
Fam Zheng920beae2014-05-23 21:29:46 +08002678 bdrv_set_backing_hd(intermediate_state->bs, NULL);
Fam Zheng4f6fd342013-08-23 09:14:47 +08002679 bdrv_unref(intermediate_state->bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002680 }
2681 ret = 0;
2682
2683exit:
2684 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2685 g_free(intermediate_state);
2686 }
2687 return ret;
2688}
2689
2690
aliguori71d07702009-03-03 17:37:16 +00002691static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
2692 size_t size)
2693{
2694 int64_t len;
2695
Kevin Wolf1dd3a442014-04-14 14:48:16 +02002696 if (size > INT_MAX) {
2697 return -EIO;
2698 }
2699
aliguori71d07702009-03-03 17:37:16 +00002700 if (!bdrv_is_inserted(bs))
2701 return -ENOMEDIUM;
2702
2703 if (bs->growable)
2704 return 0;
2705
2706 len = bdrv_getlength(bs);
2707
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002708 if (offset < 0)
2709 return -EIO;
2710
2711 if ((offset > len) || (len - offset < size))
aliguori71d07702009-03-03 17:37:16 +00002712 return -EIO;
2713
2714 return 0;
2715}
2716
2717static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
2718 int nb_sectors)
2719{
Kevin Wolf54db38a2014-04-14 14:47:14 +02002720 if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
Kevin Wolf8f4754e2014-03-26 13:06:02 +01002721 return -EIO;
2722 }
2723
Jes Sorenseneb5a3162010-05-27 16:20:31 +02002724 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
2725 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00002726}
2727
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002728typedef struct RwCo {
2729 BlockDriverState *bs;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002730 int64_t offset;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002731 QEMUIOVector *qiov;
2732 bool is_write;
2733 int ret;
Peter Lieven4105eaa2013-07-11 14:16:22 +02002734 BdrvRequestFlags flags;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002735} RwCo;
2736
2737static void coroutine_fn bdrv_rw_co_entry(void *opaque)
2738{
2739 RwCo *rwco = opaque;
2740
2741 if (!rwco->is_write) {
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002742 rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
2743 rwco->qiov->size, rwco->qiov,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002744 rwco->flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002745 } else {
2746 rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
2747 rwco->qiov->size, rwco->qiov,
2748 rwco->flags);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002749 }
2750}
2751
2752/*
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002753 * Process a vectored synchronous request using coroutines
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002754 */
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002755static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
2756 QEMUIOVector *qiov, bool is_write,
2757 BdrvRequestFlags flags)
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002758{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002759 Coroutine *co;
2760 RwCo rwco = {
2761 .bs = bs,
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002762 .offset = offset,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002763 .qiov = qiov,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002764 .is_write = is_write,
2765 .ret = NOT_DONE,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002766 .flags = flags,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002767 };
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002768
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002769 /**
2770 * In sync call context, when the vcpu is blocked, this throttling timer
2771 * will not fire; so the I/O throttling function has to be disabled here
2772 * if it has been enabled.
2773 */
2774 if (bs->io_limits_enabled) {
2775 fprintf(stderr, "Disabling I/O throttling on '%s' due "
2776 "to synchronous I/O.\n", bdrv_get_device_name(bs));
2777 bdrv_io_limits_disable(bs);
2778 }
2779
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002780 if (qemu_in_coroutine()) {
2781 /* Fast-path if already in coroutine context */
2782 bdrv_rw_co_entry(&rwco);
2783 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02002784 AioContext *aio_context = bdrv_get_aio_context(bs);
2785
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002786 co = qemu_coroutine_create(bdrv_rw_co_entry);
2787 qemu_coroutine_enter(co, &rwco);
2788 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02002789 aio_poll(aio_context, true);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002790 }
2791 }
2792 return rwco.ret;
2793}
2794
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002795/*
2796 * Process a synchronous request using coroutines
2797 */
2798static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002799 int nb_sectors, bool is_write, BdrvRequestFlags flags)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002800{
2801 QEMUIOVector qiov;
2802 struct iovec iov = {
2803 .iov_base = (void *)buf,
2804 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
2805 };
2806
Kevin Wolfda15ee52014-04-14 15:39:36 +02002807 if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
2808 return -EINVAL;
2809 }
2810
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002811 qemu_iovec_init_external(&qiov, &iov, 1);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002812 return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
2813 &qiov, is_write, flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002814}
2815
bellard19cb3732006-08-19 11:45:59 +00002816/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00002817int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002818 uint8_t *buf, int nb_sectors)
2819{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002820 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
bellardfc01f7e2003-06-30 10:03:06 +00002821}
2822
Markus Armbruster07d27a42012-06-29 17:34:29 +02002823/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
2824int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
2825 uint8_t *buf, int nb_sectors)
2826{
2827 bool enabled;
2828 int ret;
2829
2830 enabled = bs->io_limits_enabled;
2831 bs->io_limits_enabled = false;
Peter Lieven4e7395e2013-07-18 10:37:32 +02002832 ret = bdrv_read(bs, sector_num, buf, nb_sectors);
Markus Armbruster07d27a42012-06-29 17:34:29 +02002833 bs->io_limits_enabled = enabled;
2834 return ret;
2835}
2836
ths5fafdf22007-09-16 21:08:06 +00002837/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00002838 -EIO generic I/O error (may happen for all errors)
2839 -ENOMEDIUM No media inserted.
2840 -EINVAL Invalid sector number or nb_sectors
2841 -EACCES Trying to write a read-only device
2842*/
ths5fafdf22007-09-16 21:08:06 +00002843int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002844 const uint8_t *buf, int nb_sectors)
2845{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002846 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
bellard83f64092006-08-01 16:21:11 +00002847}
2848
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002849int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
2850 int nb_sectors, BdrvRequestFlags flags)
Peter Lieven4105eaa2013-07-11 14:16:22 +02002851{
2852 return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002853 BDRV_REQ_ZERO_WRITE | flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002854}
2855
Peter Lievend75cbb52013-10-24 12:07:03 +02002856/*
2857 * Completely zero out a block device with the help of bdrv_write_zeroes.
2858 * The operation is sped up by checking the block status and only writing
2859 * zeroes to the device if they currently do not return zeroes. Optional
2860 * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2861 *
2862 * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2863 */
2864int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
2865{
Markus Armbrusterd32f7c12014-06-26 13:23:18 +02002866 int64_t target_sectors, ret, nb_sectors, sector_num = 0;
Peter Lievend75cbb52013-10-24 12:07:03 +02002867 int n;
2868
Markus Armbrusterd32f7c12014-06-26 13:23:18 +02002869 target_sectors = bdrv_nb_sectors(bs);
2870 if (target_sectors < 0) {
2871 return target_sectors;
Kevin Wolf9ce10c02014-04-14 17:03:34 +02002872 }
Kevin Wolf9ce10c02014-04-14 17:03:34 +02002873
Peter Lievend75cbb52013-10-24 12:07:03 +02002874 for (;;) {
Markus Armbrusterd32f7c12014-06-26 13:23:18 +02002875 nb_sectors = target_sectors - sector_num;
Peter Lievend75cbb52013-10-24 12:07:03 +02002876 if (nb_sectors <= 0) {
2877 return 0;
2878 }
2879 if (nb_sectors > INT_MAX) {
2880 nb_sectors = INT_MAX;
2881 }
2882 ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
Peter Lieven3d94ce62013-12-12 13:57:05 +01002883 if (ret < 0) {
2884 error_report("error getting block status at sector %" PRId64 ": %s",
2885 sector_num, strerror(-ret));
2886 return ret;
2887 }
Peter Lievend75cbb52013-10-24 12:07:03 +02002888 if (ret & BDRV_BLOCK_ZERO) {
2889 sector_num += n;
2890 continue;
2891 }
2892 ret = bdrv_write_zeroes(bs, sector_num, n, flags);
2893 if (ret < 0) {
2894 error_report("error writing zeroes at sector %" PRId64 ": %s",
2895 sector_num, strerror(-ret));
2896 return ret;
2897 }
2898 sector_num += n;
2899 }
2900}
2901
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002902int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
bellard83f64092006-08-01 16:21:11 +00002903{
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002904 QEMUIOVector qiov;
2905 struct iovec iov = {
2906 .iov_base = (void *)buf,
2907 .iov_len = bytes,
2908 };
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002909 int ret;
bellard83f64092006-08-01 16:21:11 +00002910
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002911 if (bytes < 0) {
2912 return -EINVAL;
bellard83f64092006-08-01 16:21:11 +00002913 }
2914
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002915 qemu_iovec_init_external(&qiov, &iov, 1);
2916 ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
2917 if (ret < 0) {
2918 return ret;
bellard83f64092006-08-01 16:21:11 +00002919 }
2920
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002921 return bytes;
bellard83f64092006-08-01 16:21:11 +00002922}
2923
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002924int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
bellard83f64092006-08-01 16:21:11 +00002925{
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002926 int ret;
bellard83f64092006-08-01 16:21:11 +00002927
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002928 ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
2929 if (ret < 0) {
2930 return ret;
bellard83f64092006-08-01 16:21:11 +00002931 }
2932
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002933 return qiov->size;
2934}
2935
2936int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002937 const void *buf, int bytes)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002938{
2939 QEMUIOVector qiov;
2940 struct iovec iov = {
2941 .iov_base = (void *) buf,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002942 .iov_len = bytes,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002943 };
2944
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002945 if (bytes < 0) {
2946 return -EINVAL;
2947 }
2948
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002949 qemu_iovec_init_external(&qiov, &iov, 1);
2950 return bdrv_pwritev(bs, offset, &qiov);
bellard83f64092006-08-01 16:21:11 +00002951}
bellard83f64092006-08-01 16:21:11 +00002952
Kevin Wolff08145f2010-06-16 16:38:15 +02002953/*
2954 * Writes to the file and ensures that no writes are reordered across this
2955 * request (acts as a barrier)
2956 *
2957 * Returns 0 on success, -errno in error cases.
2958 */
2959int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2960 const void *buf, int count)
2961{
2962 int ret;
2963
2964 ret = bdrv_pwrite(bs, offset, buf, count);
2965 if (ret < 0) {
2966 return ret;
2967 }
2968
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02002969 /* No flush needed for cache modes that already do it */
2970 if (bs->enable_write_cache) {
Kevin Wolff08145f2010-06-16 16:38:15 +02002971 bdrv_flush(bs);
2972 }
2973
2974 return 0;
2975}
2976
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002977static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
Stefan Hajnocziab185922011-11-17 13:40:31 +00002978 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2979{
2980 /* Perform I/O through a temporary buffer so that users who scribble over
2981 * their read buffer while the operation is in progress do not end up
2982 * modifying the image file. This is critical for zero-copy guest I/O
2983 * where anything might happen inside guest memory.
2984 */
2985 void *bounce_buffer;
2986
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002987 BlockDriver *drv = bs->drv;
Stefan Hajnocziab185922011-11-17 13:40:31 +00002988 struct iovec iov;
2989 QEMUIOVector bounce_qiov;
2990 int64_t cluster_sector_num;
2991 int cluster_nb_sectors;
2992 size_t skip_bytes;
2993 int ret;
2994
2995 /* Cover entire cluster so no additional backing file I/O is required when
2996 * allocating cluster in the image file.
2997 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01002998 bdrv_round_to_clusters(bs, sector_num, nb_sectors,
2999 &cluster_sector_num, &cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003000
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003001 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
3002 cluster_sector_num, cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003003
3004 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
Kevin Wolf857d4f42014-05-20 13:16:51 +02003005 iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
3006 if (bounce_buffer == NULL) {
3007 ret = -ENOMEM;
3008 goto err;
3009 }
3010
Stefan Hajnocziab185922011-11-17 13:40:31 +00003011 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
3012
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003013 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
3014 &bounce_qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003015 if (ret < 0) {
3016 goto err;
3017 }
3018
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003019 if (drv->bdrv_co_write_zeroes &&
3020 buffer_is_zero(bounce_buffer, iov.iov_len)) {
Kevin Wolf621f0582012-03-20 15:12:58 +01003021 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003022 cluster_nb_sectors, 0);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003023 } else {
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02003024 /* This does not change the data on the disk, it is not necessary
3025 * to flush even in cache=writethrough mode.
3026 */
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003027 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
Stefan Hajnocziab185922011-11-17 13:40:31 +00003028 &bounce_qiov);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003029 }
3030
Stefan Hajnocziab185922011-11-17 13:40:31 +00003031 if (ret < 0) {
3032 /* It might be okay to ignore write errors for guest requests. If this
3033 * is a deliberate copy-on-read then we don't want to ignore the error.
3034 * Simply report it in all cases.
3035 */
3036 goto err;
3037 }
3038
3039 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
Michael Tokarev03396142012-06-07 20:17:55 +04003040 qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
3041 nb_sectors * BDRV_SECTOR_SIZE);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003042
3043err:
3044 qemu_vfree(bounce_buffer);
3045 return ret;
3046}
3047
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003048/*
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003049 * Forwards an already correctly aligned request to the BlockDriver. This
3050 * handles copy on read and zeroing after EOF; any other features must be
3051 * implemented by the caller.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003052 */
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003053static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01003054 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
Kevin Wolfec746e12013-12-04 12:13:10 +01003055 int64_t align, QEMUIOVector *qiov, int flags)
Kevin Wolfda1fa912011-07-14 17:27:13 +02003056{
3057 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00003058 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003059
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003060 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3061 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003062
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003063 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3064 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Kevin Wolf8eb029c2014-07-01 16:09:54 +02003065 assert(!qiov || bytes == qiov->size);
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003066
3067 /* Handle Copy on Read and associated serialisation */
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003068 if (flags & BDRV_REQ_COPY_ON_READ) {
Kevin Wolf73271452013-12-04 17:08:50 +01003069 /* If we touch the same cluster it counts as an overlap. This
3070 * guarantees that allocating writes will be serialized and not race
3071 * with each other for the same cluster. For example, in copy-on-read
3072 * it ensures that the CoR read and write operations are atomic and
3073 * guest writes cannot interleave between them. */
3074 mark_request_serialising(req, bdrv_get_cluster_size(bs));
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003075 }
3076
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01003077 wait_serialising_requests(req);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00003078
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003079 if (flags & BDRV_REQ_COPY_ON_READ) {
Stefan Hajnocziab185922011-11-17 13:40:31 +00003080 int pnum;
3081
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02003082 ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003083 if (ret < 0) {
3084 goto out;
3085 }
3086
3087 if (!ret || pnum != nb_sectors) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003088 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003089 goto out;
3090 }
3091 }
3092
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003093 /* Forward the request to the BlockDriver */
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003094 if (!(bs->zero_beyond_eof && bs->growable)) {
3095 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
3096 } else {
3097 /* Read zeros after EOF of growable BDSes */
Markus Armbruster40490822014-06-26 13:23:19 +02003098 int64_t total_sectors, max_nb_sectors;
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003099
Markus Armbruster40490822014-06-26 13:23:19 +02003100 total_sectors = bdrv_nb_sectors(bs);
3101 if (total_sectors < 0) {
3102 ret = total_sectors;
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003103 goto out;
3104 }
3105
Kevin Wolf5f5bcd82014-02-07 16:00:09 +01003106 max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
3107 align >> BDRV_SECTOR_BITS);
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003108 if (max_nb_sectors > 0) {
Kevin Wolf33f461e2014-07-03 13:21:24 +02003109 QEMUIOVector local_qiov;
3110 size_t local_sectors;
3111
3112 max_nb_sectors = MIN(max_nb_sectors, SIZE_MAX / BDRV_SECTOR_BITS);
3113 local_sectors = MIN(max_nb_sectors, nb_sectors);
3114
3115 qemu_iovec_init(&local_qiov, qiov->niov);
3116 qemu_iovec_concat(&local_qiov, qiov, 0,
3117 local_sectors * BDRV_SECTOR_SIZE);
3118
3119 ret = drv->bdrv_co_readv(bs, sector_num, local_sectors,
3120 &local_qiov);
3121
3122 qemu_iovec_destroy(&local_qiov);
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003123 } else {
3124 ret = 0;
3125 }
3126
3127 /* Reading beyond end of file is supposed to produce zeroes */
3128 if (ret == 0 && total_sectors < sector_num + nb_sectors) {
3129 uint64_t offset = MAX(0, total_sectors - sector_num);
3130 uint64_t bytes = (sector_num + nb_sectors - offset) *
3131 BDRV_SECTOR_SIZE;
3132 qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
3133 }
3134 }
Stefan Hajnocziab185922011-11-17 13:40:31 +00003135
3136out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00003137 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003138}
3139
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003140/*
3141 * Handle a read request in coroutine context
3142 */
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003143static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
3144 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003145 BdrvRequestFlags flags)
3146{
3147 BlockDriver *drv = bs->drv;
Kevin Wolf65afd212013-12-03 14:55:55 +01003148 BdrvTrackedRequest req;
3149
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003150 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3151 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3152 uint8_t *head_buf = NULL;
3153 uint8_t *tail_buf = NULL;
3154 QEMUIOVector local_qiov;
3155 bool use_local_qiov = false;
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003156 int ret;
3157
3158 if (!drv) {
3159 return -ENOMEDIUM;
3160 }
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003161 if (bdrv_check_byte_request(bs, offset, bytes)) {
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003162 return -EIO;
3163 }
3164
3165 if (bs->copy_on_read) {
3166 flags |= BDRV_REQ_COPY_ON_READ;
3167 }
3168
3169 /* throttling disk I/O */
3170 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003171 bdrv_io_limits_intercept(bs, bytes, false);
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003172 }
3173
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003174 /* Align read if necessary by padding qiov */
3175 if (offset & (align - 1)) {
3176 head_buf = qemu_blockalign(bs, align);
3177 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3178 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3179 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3180 use_local_qiov = true;
3181
3182 bytes += offset & (align - 1);
3183 offset = offset & ~(align - 1);
3184 }
3185
3186 if ((offset + bytes) & (align - 1)) {
3187 if (!use_local_qiov) {
3188 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3189 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3190 use_local_qiov = true;
3191 }
3192 tail_buf = qemu_blockalign(bs, align);
3193 qemu_iovec_add(&local_qiov, tail_buf,
3194 align - ((offset + bytes) & (align - 1)));
3195
3196 bytes = ROUND_UP(bytes, align);
3197 }
3198
Kevin Wolf65afd212013-12-03 14:55:55 +01003199 tracked_request_begin(&req, bs, offset, bytes, false);
Kevin Wolfec746e12013-12-04 12:13:10 +01003200 ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003201 use_local_qiov ? &local_qiov : qiov,
3202 flags);
Kevin Wolf65afd212013-12-03 14:55:55 +01003203 tracked_request_end(&req);
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003204
3205 if (use_local_qiov) {
3206 qemu_iovec_destroy(&local_qiov);
3207 qemu_vfree(head_buf);
3208 qemu_vfree(tail_buf);
3209 }
3210
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003211 return ret;
3212}
3213
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003214static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
3215 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3216 BdrvRequestFlags flags)
3217{
3218 if (nb_sectors < 0 || nb_sectors > (UINT_MAX >> BDRV_SECTOR_BITS)) {
3219 return -EINVAL;
3220 }
3221
3222 return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
3223 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3224}
3225
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003226int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02003227 int nb_sectors, QEMUIOVector *qiov)
3228{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003229 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003230
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003231 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
3232}
3233
3234int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
3235 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3236{
3237 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
3238
3239 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
3240 BDRV_REQ_COPY_ON_READ);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003241}
3242
Peter Lievenc31cb702013-10-24 12:06:58 +02003243/* if no limit is specified in the BlockLimits use a default
3244 * of 32768 512-byte sectors (16 MiB) per request.
3245 */
3246#define MAX_WRITE_ZEROES_DEFAULT 32768
3247
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003248static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003249 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003250{
3251 BlockDriver *drv = bs->drv;
3252 QEMUIOVector qiov;
Peter Lievenc31cb702013-10-24 12:06:58 +02003253 struct iovec iov = {0};
3254 int ret = 0;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003255
Peter Lievenc31cb702013-10-24 12:06:58 +02003256 int max_write_zeroes = bs->bl.max_write_zeroes ?
3257 bs->bl.max_write_zeroes : MAX_WRITE_ZEROES_DEFAULT;
Kevin Wolf621f0582012-03-20 15:12:58 +01003258
Peter Lievenc31cb702013-10-24 12:06:58 +02003259 while (nb_sectors > 0 && !ret) {
3260 int num = nb_sectors;
3261
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003262 /* Align request. Block drivers can expect the "bulk" of the request
3263 * to be aligned.
3264 */
3265 if (bs->bl.write_zeroes_alignment
3266 && num > bs->bl.write_zeroes_alignment) {
3267 if (sector_num % bs->bl.write_zeroes_alignment != 0) {
3268 /* Make a small request up to the first aligned sector. */
Peter Lievenc31cb702013-10-24 12:06:58 +02003269 num = bs->bl.write_zeroes_alignment;
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003270 num -= sector_num % bs->bl.write_zeroes_alignment;
3271 } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
3272 /* Shorten the request to the last aligned sector. num cannot
3273 * underflow because num > bs->bl.write_zeroes_alignment.
3274 */
3275 num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
Peter Lievenc31cb702013-10-24 12:06:58 +02003276 }
Kevin Wolf621f0582012-03-20 15:12:58 +01003277 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003278
3279 /* limit request size */
3280 if (num > max_write_zeroes) {
3281 num = max_write_zeroes;
3282 }
3283
3284 ret = -ENOTSUP;
3285 /* First try the efficient write zeroes operation */
3286 if (drv->bdrv_co_write_zeroes) {
3287 ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
3288 }
3289
3290 if (ret == -ENOTSUP) {
3291 /* Fall back to bounce buffer if write zeroes is unsupported */
3292 iov.iov_len = num * BDRV_SECTOR_SIZE;
3293 if (iov.iov_base == NULL) {
Kevin Wolf857d4f42014-05-20 13:16:51 +02003294 iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
3295 if (iov.iov_base == NULL) {
3296 ret = -ENOMEM;
3297 goto fail;
3298 }
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003299 memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
Peter Lievenc31cb702013-10-24 12:06:58 +02003300 }
3301 qemu_iovec_init_external(&qiov, &iov, 1);
3302
3303 ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003304
3305 /* Keep bounce buffer around if it is big enough for all
3306 * all future requests.
3307 */
3308 if (num < max_write_zeroes) {
3309 qemu_vfree(iov.iov_base);
3310 iov.iov_base = NULL;
3311 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003312 }
3313
3314 sector_num += num;
3315 nb_sectors -= num;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003316 }
3317
Kevin Wolf857d4f42014-05-20 13:16:51 +02003318fail:
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003319 qemu_vfree(iov.iov_base);
3320 return ret;
3321}
3322
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003323/*
Kevin Wolfb404f722013-12-03 14:02:23 +01003324 * Forwards an already correctly aligned write request to the BlockDriver.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003325 */
Kevin Wolfb404f722013-12-03 14:02:23 +01003326static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01003327 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
3328 QEMUIOVector *qiov, int flags)
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003329{
3330 BlockDriver *drv = bs->drv;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003331 bool waited;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003332 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003333
Kevin Wolfb404f722013-12-03 14:02:23 +01003334 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3335 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003336
Kevin Wolfb404f722013-12-03 14:02:23 +01003337 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3338 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Kevin Wolf8eb029c2014-07-01 16:09:54 +02003339 assert(!qiov || bytes == qiov->size);
Benoît Canetcc0681c2013-09-02 14:14:39 +02003340
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003341 waited = wait_serialising_requests(req);
3342 assert(!waited || !req->serialising);
Kevin Wolfaf91f9a2014-02-07 15:35:56 +01003343 assert(req->overlap_offset <= offset);
3344 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
Kevin Wolf244eade2013-12-03 14:30:44 +01003345
Kevin Wolf65afd212013-12-03 14:55:55 +01003346 ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
Stefan Hajnoczid616b222013-06-24 17:13:10 +02003347
Peter Lieven465bee12014-05-18 00:58:19 +02003348 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
3349 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
3350 qemu_iovec_is_zero(qiov)) {
3351 flags |= BDRV_REQ_ZERO_WRITE;
3352 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
3353 flags |= BDRV_REQ_MAY_UNMAP;
3354 }
3355 }
3356
Stefan Hajnoczid616b222013-06-24 17:13:10 +02003357 if (ret < 0) {
3358 /* Do nothing, write notifier decided to fail this request */
3359 } else if (flags & BDRV_REQ_ZERO_WRITE) {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003360 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003361 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003362 } else {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003363 BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003364 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
3365 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003366 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003367
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02003368 if (ret == 0 && !bs->enable_write_cache) {
3369 ret = bdrv_co_flush(bs);
3370 }
3371
Fam Zhenge4654d22013-11-13 18:29:43 +08003372 bdrv_set_dirty(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003373
Benoît Canet5366d0c2014-09-05 15:46:18 +02003374 block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
Benoît Canet5e5a94b2014-09-05 15:46:16 +02003375
Paolo Bonzinidf2a6f22013-09-04 19:00:21 +02003376 if (bs->growable && ret >= 0) {
3377 bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
3378 }
Kevin Wolfda1fa912011-07-14 17:27:13 +02003379
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003380 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003381}
3382
Kevin Wolfb404f722013-12-03 14:02:23 +01003383/*
3384 * Handle a write request in coroutine context
3385 */
Kevin Wolf66015532013-12-03 14:40:18 +01003386static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
3387 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfb404f722013-12-03 14:02:23 +01003388 BdrvRequestFlags flags)
3389{
Kevin Wolf65afd212013-12-03 14:55:55 +01003390 BdrvTrackedRequest req;
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003391 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3392 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3393 uint8_t *head_buf = NULL;
3394 uint8_t *tail_buf = NULL;
3395 QEMUIOVector local_qiov;
3396 bool use_local_qiov = false;
Kevin Wolfb404f722013-12-03 14:02:23 +01003397 int ret;
3398
3399 if (!bs->drv) {
3400 return -ENOMEDIUM;
3401 }
3402 if (bs->read_only) {
3403 return -EACCES;
3404 }
Kevin Wolf66015532013-12-03 14:40:18 +01003405 if (bdrv_check_byte_request(bs, offset, bytes)) {
Kevin Wolfb404f722013-12-03 14:02:23 +01003406 return -EIO;
3407 }
3408
Kevin Wolfb404f722013-12-03 14:02:23 +01003409 /* throttling disk I/O */
3410 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003411 bdrv_io_limits_intercept(bs, bytes, true);
Kevin Wolfb404f722013-12-03 14:02:23 +01003412 }
3413
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003414 /*
3415 * Align write if necessary by performing a read-modify-write cycle.
3416 * Pad qiov with the read parts and be sure to have a tracked request not
3417 * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
3418 */
Kevin Wolf65afd212013-12-03 14:55:55 +01003419 tracked_request_begin(&req, bs, offset, bytes, true);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003420
3421 if (offset & (align - 1)) {
3422 QEMUIOVector head_qiov;
3423 struct iovec head_iov;
3424
3425 mark_request_serialising(&req, align);
3426 wait_serialising_requests(&req);
3427
3428 head_buf = qemu_blockalign(bs, align);
3429 head_iov = (struct iovec) {
3430 .iov_base = head_buf,
3431 .iov_len = align,
3432 };
3433 qemu_iovec_init_external(&head_qiov, &head_iov, 1);
3434
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003435 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003436 ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
3437 align, &head_qiov, 0);
3438 if (ret < 0) {
3439 goto fail;
3440 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003441 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003442
3443 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3444 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3445 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3446 use_local_qiov = true;
3447
3448 bytes += offset & (align - 1);
3449 offset = offset & ~(align - 1);
3450 }
3451
3452 if ((offset + bytes) & (align - 1)) {
3453 QEMUIOVector tail_qiov;
3454 struct iovec tail_iov;
3455 size_t tail_bytes;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003456 bool waited;
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003457
3458 mark_request_serialising(&req, align);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003459 waited = wait_serialising_requests(&req);
3460 assert(!waited || !use_local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003461
3462 tail_buf = qemu_blockalign(bs, align);
3463 tail_iov = (struct iovec) {
3464 .iov_base = tail_buf,
3465 .iov_len = align,
3466 };
3467 qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
3468
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003469 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003470 ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
3471 align, &tail_qiov, 0);
3472 if (ret < 0) {
3473 goto fail;
3474 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003475 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003476
3477 if (!use_local_qiov) {
3478 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3479 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3480 use_local_qiov = true;
3481 }
3482
3483 tail_bytes = (offset + bytes) & (align - 1);
3484 qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
3485
3486 bytes = ROUND_UP(bytes, align);
3487 }
3488
3489 ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
3490 use_local_qiov ? &local_qiov : qiov,
3491 flags);
3492
3493fail:
Kevin Wolf65afd212013-12-03 14:55:55 +01003494 tracked_request_end(&req);
Kevin Wolfb404f722013-12-03 14:02:23 +01003495
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003496 if (use_local_qiov) {
3497 qemu_iovec_destroy(&local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003498 }
Kevin Wolf99c4a852014-02-07 15:29:00 +01003499 qemu_vfree(head_buf);
3500 qemu_vfree(tail_buf);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003501
Kevin Wolfb404f722013-12-03 14:02:23 +01003502 return ret;
3503}
3504
Kevin Wolf66015532013-12-03 14:40:18 +01003505static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
3506 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3507 BdrvRequestFlags flags)
3508{
3509 if (nb_sectors < 0 || nb_sectors > (INT_MAX >> BDRV_SECTOR_BITS)) {
3510 return -EINVAL;
3511 }
3512
3513 return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
3514 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3515}
3516
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003517int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
3518 int nb_sectors, QEMUIOVector *qiov)
3519{
3520 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
3521
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003522 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
3523}
3524
3525int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003526 int64_t sector_num, int nb_sectors,
3527 BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003528{
Paolo Bonzini94d6ff22013-11-22 13:39:45 +01003529 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003530
Peter Lievend32f35c2013-10-24 12:06:52 +02003531 if (!(bs->open_flags & BDRV_O_UNMAP)) {
3532 flags &= ~BDRV_REQ_MAY_UNMAP;
3533 }
3534
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003535 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003536 BDRV_REQ_ZERO_WRITE | flags);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003537}
3538
bellard83f64092006-08-01 16:21:11 +00003539/**
bellard83f64092006-08-01 16:21:11 +00003540 * Truncate file to 'offset' bytes (needed only for file protocols)
3541 */
3542int bdrv_truncate(BlockDriverState *bs, int64_t offset)
3543{
3544 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003545 int ret;
bellard83f64092006-08-01 16:21:11 +00003546 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003547 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00003548 if (!drv->bdrv_truncate)
3549 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02003550 if (bs->read_only)
3551 return -EACCES;
Jeff Cody9c75e162014-06-25 16:55:30 -04003552
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003553 ret = drv->bdrv_truncate(bs, offset);
3554 if (ret == 0) {
3555 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbruster145feb12011-08-03 15:07:42 +02003556 bdrv_dev_resize_cb(bs);
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003557 }
3558 return ret;
bellard83f64092006-08-01 16:21:11 +00003559}
3560
3561/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08003562 * Length of a allocated file in bytes. Sparse files are counted by actual
3563 * allocated space. Return < 0 if error or unknown.
3564 */
3565int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
3566{
3567 BlockDriver *drv = bs->drv;
3568 if (!drv) {
3569 return -ENOMEDIUM;
3570 }
3571 if (drv->bdrv_get_allocated_file_size) {
3572 return drv->bdrv_get_allocated_file_size(bs);
3573 }
3574 if (bs->file) {
3575 return bdrv_get_allocated_file_size(bs->file);
3576 }
3577 return -ENOTSUP;
3578}
3579
3580/**
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003581 * Return number of sectors on success, -errno on error.
bellard83f64092006-08-01 16:21:11 +00003582 */
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003583int64_t bdrv_nb_sectors(BlockDriverState *bs)
bellard83f64092006-08-01 16:21:11 +00003584{
3585 BlockDriver *drv = bs->drv;
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003586
bellard83f64092006-08-01 16:21:11 +00003587 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003588 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003589
Kevin Wolfb94a2612013-10-29 12:18:58 +01003590 if (drv->has_variable_length) {
3591 int ret = refresh_total_sectors(bs, bs->total_sectors);
3592 if (ret < 0) {
3593 return ret;
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01003594 }
bellard83f64092006-08-01 16:21:11 +00003595 }
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003596 return bs->total_sectors;
3597}
3598
3599/**
3600 * Return length in bytes on success, -errno on error.
3601 * The length is always a multiple of BDRV_SECTOR_SIZE.
3602 */
3603int64_t bdrv_getlength(BlockDriverState *bs)
3604{
3605 int64_t ret = bdrv_nb_sectors(bs);
3606
3607 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00003608}
3609
bellard19cb3732006-08-19 11:45:59 +00003610/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00003611void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00003612{
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003613 int64_t nb_sectors = bdrv_nb_sectors(bs);
3614
3615 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
bellardfc01f7e2003-06-30 10:03:06 +00003616}
bellardcf989512004-02-16 21:56:36 +00003617
Paolo Bonziniff06f5f2012-09-28 17:22:54 +02003618void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
3619 BlockdevOnError on_write_error)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003620{
3621 bs->on_read_error = on_read_error;
3622 bs->on_write_error = on_write_error;
3623}
3624
Paolo Bonzini1ceee0d2012-09-28 17:22:56 +02003625BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003626{
3627 return is_read ? bs->on_read_error : bs->on_write_error;
3628}
3629
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003630BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
3631{
3632 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
3633
3634 switch (on_err) {
3635 case BLOCKDEV_ON_ERROR_ENOSPC:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003636 return (error == ENOSPC) ?
3637 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003638 case BLOCKDEV_ON_ERROR_STOP:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003639 return BLOCK_ERROR_ACTION_STOP;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003640 case BLOCKDEV_ON_ERROR_REPORT:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003641 return BLOCK_ERROR_ACTION_REPORT;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003642 case BLOCKDEV_ON_ERROR_IGNORE:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003643 return BLOCK_ERROR_ACTION_IGNORE;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003644 default:
3645 abort();
3646 }
3647}
3648
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003649static void send_qmp_error_event(BlockDriverState *bs,
3650 BlockErrorAction action,
3651 bool is_read, int error)
3652{
3653 BlockErrorAction ac;
3654
3655 ac = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
3656 qapi_event_send_block_io_error(bdrv_get_device_name(bs), ac, action,
3657 bdrv_iostatus_is_enabled(bs),
Luiz Capitulino624ff572014-09-11 10:25:48 -04003658 error == ENOSPC, strerror(error),
3659 &error_abort);
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003660}
3661
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003662/* This is done by device models because, while the block layer knows
3663 * about the error, it does not know whether an operation comes from
3664 * the device or the block layer (from a job, for example).
3665 */
3666void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
3667 bool is_read, int error)
3668{
3669 assert(error >= 0);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003670
Wenchao Xiaa5895692014-06-18 08:43:30 +02003671 if (action == BLOCK_ERROR_ACTION_STOP) {
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003672 /* First set the iostatus, so that "info block" returns an iostatus
3673 * that matches the events raised so far (an additional error iostatus
3674 * is fine, but not a lost one).
3675 */
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003676 bdrv_iostatus_set_err(bs, error);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003677
3678 /* Then raise the request to stop the VM and the event.
3679 * qemu_system_vmstop_request_prepare has two effects. First,
3680 * it ensures that the STOP event always comes after the
3681 * BLOCK_IO_ERROR event. Second, it ensures that even if management
3682 * can observe the STOP event and do a "cont" before the STOP
3683 * event is issued, the VM will not stop. In this case, vm_start()
3684 * also ensures that the STOP/RESUME pair of events is emitted.
3685 */
3686 qemu_system_vmstop_request_prepare();
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003687 send_qmp_error_event(bs, action, is_read, error);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003688 qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
3689 } else {
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003690 send_qmp_error_event(bs, action, is_read, error);
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003691 }
3692}
3693
bellardb3380822004-03-14 21:38:54 +00003694int bdrv_is_read_only(BlockDriverState *bs)
3695{
3696 return bs->read_only;
3697}
3698
ths985a03b2007-12-24 16:10:43 +00003699int bdrv_is_sg(BlockDriverState *bs)
3700{
3701 return bs->sg;
3702}
3703
Christoph Hellwige900a7b2009-09-04 19:01:15 +02003704int bdrv_enable_write_cache(BlockDriverState *bs)
3705{
3706 return bs->enable_write_cache;
3707}
3708
Paolo Bonzini425b0142012-06-06 00:04:52 +02003709void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
3710{
3711 bs->enable_write_cache = wce;
Jeff Cody55b110f2012-09-20 15:13:18 -04003712
3713 /* so a reopen() will preserve wce */
3714 if (wce) {
3715 bs->open_flags |= BDRV_O_CACHE_WB;
3716 } else {
3717 bs->open_flags &= ~BDRV_O_CACHE_WB;
3718 }
Paolo Bonzini425b0142012-06-06 00:04:52 +02003719}
3720
bellardea2384d2004-08-01 21:59:26 +00003721int bdrv_is_encrypted(BlockDriverState *bs)
3722{
3723 if (bs->backing_hd && bs->backing_hd->encrypted)
3724 return 1;
3725 return bs->encrypted;
3726}
3727
aliguoric0f4ce72009-03-05 23:01:01 +00003728int bdrv_key_required(BlockDriverState *bs)
3729{
3730 BlockDriverState *backing_hd = bs->backing_hd;
3731
3732 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
3733 return 1;
3734 return (bs->encrypted && !bs->valid_key);
3735}
3736
bellardea2384d2004-08-01 21:59:26 +00003737int bdrv_set_key(BlockDriverState *bs, const char *key)
3738{
3739 int ret;
3740 if (bs->backing_hd && bs->backing_hd->encrypted) {
3741 ret = bdrv_set_key(bs->backing_hd, key);
3742 if (ret < 0)
3743 return ret;
3744 if (!bs->encrypted)
3745 return 0;
3746 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02003747 if (!bs->encrypted) {
3748 return -EINVAL;
3749 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
3750 return -ENOMEDIUM;
3751 }
aliguoric0f4ce72009-03-05 23:01:01 +00003752 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00003753 if (ret < 0) {
3754 bs->valid_key = 0;
3755 } else if (!bs->valid_key) {
3756 bs->valid_key = 1;
3757 /* call the change callback now, we skipped it on open */
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02003758 bdrv_dev_change_media_cb(bs, true);
aliguoribb5fc202009-03-05 23:01:15 +00003759 }
aliguoric0f4ce72009-03-05 23:01:01 +00003760 return ret;
bellardea2384d2004-08-01 21:59:26 +00003761}
3762
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003763const char *bdrv_get_format_name(BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00003764{
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003765 return bs->drv ? bs->drv->format_name : NULL;
bellardea2384d2004-08-01 21:59:26 +00003766}
3767
Stefan Hajnocziada42402014-08-27 12:08:55 +01003768static int qsort_strcmp(const void *a, const void *b)
3769{
3770 return strcmp(a, b);
3771}
3772
ths5fafdf22007-09-16 21:08:06 +00003773void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00003774 void *opaque)
3775{
3776 BlockDriver *drv;
Jeff Codye855e4f2014-04-28 18:29:54 -04003777 int count = 0;
Stefan Hajnocziada42402014-08-27 12:08:55 +01003778 int i;
Jeff Codye855e4f2014-04-28 18:29:54 -04003779 const char **formats = NULL;
bellardea2384d2004-08-01 21:59:26 +00003780
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01003781 QLIST_FOREACH(drv, &bdrv_drivers, list) {
Jeff Codye855e4f2014-04-28 18:29:54 -04003782 if (drv->format_name) {
3783 bool found = false;
3784 int i = count;
3785 while (formats && i && !found) {
3786 found = !strcmp(formats[--i], drv->format_name);
3787 }
3788
3789 if (!found) {
Markus Armbruster5839e532014-08-19 10:31:08 +02003790 formats = g_renew(const char *, formats, count + 1);
Jeff Codye855e4f2014-04-28 18:29:54 -04003791 formats[count++] = drv->format_name;
Jeff Codye855e4f2014-04-28 18:29:54 -04003792 }
3793 }
bellardea2384d2004-08-01 21:59:26 +00003794 }
Stefan Hajnocziada42402014-08-27 12:08:55 +01003795
3796 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
3797
3798 for (i = 0; i < count; i++) {
3799 it(opaque, formats[i]);
3800 }
3801
Jeff Codye855e4f2014-04-28 18:29:54 -04003802 g_free(formats);
bellardea2384d2004-08-01 21:59:26 +00003803}
3804
Benoît Canetdc364f42014-01-23 21:31:32 +01003805/* This function is to find block backend bs */
bellardb3380822004-03-14 21:38:54 +00003806BlockDriverState *bdrv_find(const char *name)
3807{
3808 BlockDriverState *bs;
3809
Benoît Canetdc364f42014-01-23 21:31:32 +01003810 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01003811 if (!strcmp(name, bs->device_name)) {
bellardb3380822004-03-14 21:38:54 +00003812 return bs;
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01003813 }
bellardb3380822004-03-14 21:38:54 +00003814 }
3815 return NULL;
3816}
3817
Benoît Canetdc364f42014-01-23 21:31:32 +01003818/* This function is to find a node in the bs graph */
3819BlockDriverState *bdrv_find_node(const char *node_name)
3820{
3821 BlockDriverState *bs;
3822
3823 assert(node_name);
3824
3825 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3826 if (!strcmp(node_name, bs->node_name)) {
3827 return bs;
3828 }
3829 }
3830 return NULL;
3831}
3832
Benoît Canetc13163f2014-01-23 21:31:34 +01003833/* Put this QMP function here so it can access the static graph_bdrv_states. */
3834BlockDeviceInfoList *bdrv_named_nodes_list(void)
3835{
3836 BlockDeviceInfoList *list, *entry;
3837 BlockDriverState *bs;
3838
3839 list = NULL;
3840 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3841 entry = g_malloc0(sizeof(*entry));
3842 entry->value = bdrv_block_device_info(bs);
3843 entry->next = list;
3844 list = entry;
3845 }
3846
3847 return list;
3848}
3849
Benoît Canet12d3ba82014-01-23 21:31:35 +01003850BlockDriverState *bdrv_lookup_bs(const char *device,
3851 const char *node_name,
3852 Error **errp)
3853{
3854 BlockDriverState *bs = NULL;
3855
Benoît Canet12d3ba82014-01-23 21:31:35 +01003856 if (device) {
3857 bs = bdrv_find(device);
3858
Benoît Canetdd67fa52014-02-12 17:15:06 +01003859 if (bs) {
3860 return bs;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003861 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003862 }
3863
Benoît Canetdd67fa52014-02-12 17:15:06 +01003864 if (node_name) {
3865 bs = bdrv_find_node(node_name);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003866
Benoît Canetdd67fa52014-02-12 17:15:06 +01003867 if (bs) {
3868 return bs;
3869 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003870 }
3871
Benoît Canetdd67fa52014-02-12 17:15:06 +01003872 error_setg(errp, "Cannot find device=%s nor node_name=%s",
3873 device ? device : "",
3874 node_name ? node_name : "");
3875 return NULL;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003876}
3877
Jeff Cody5a6684d2014-06-25 15:40:09 -04003878/* If 'base' is in the same chain as 'top', return true. Otherwise,
3879 * return false. If either argument is NULL, return false. */
3880bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
3881{
3882 while (top && top != base) {
3883 top = top->backing_hd;
3884 }
3885
3886 return top != NULL;
3887}
3888
Markus Armbruster2f399b02010-06-02 18:55:20 +02003889BlockDriverState *bdrv_next(BlockDriverState *bs)
3890{
3891 if (!bs) {
3892 return QTAILQ_FIRST(&bdrv_states);
3893 }
Benoît Canetdc364f42014-01-23 21:31:32 +01003894 return QTAILQ_NEXT(bs, device_list);
Markus Armbruster2f399b02010-06-02 18:55:20 +02003895}
3896
aliguori51de9762009-03-05 23:00:43 +00003897void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
bellard81d09122004-07-14 17:21:37 +00003898{
3899 BlockDriverState *bs;
3900
Benoît Canetdc364f42014-01-23 21:31:32 +01003901 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
aliguori51de9762009-03-05 23:00:43 +00003902 it(opaque, bs);
bellard81d09122004-07-14 17:21:37 +00003903 }
3904}
3905
bellardea2384d2004-08-01 21:59:26 +00003906const char *bdrv_get_device_name(BlockDriverState *bs)
3907{
3908 return bs->device_name;
3909}
3910
Markus Armbrusterc8433282012-06-05 16:49:24 +02003911int bdrv_get_flags(BlockDriverState *bs)
3912{
3913 return bs->open_flags;
3914}
3915
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003916int bdrv_flush_all(void)
aliguoric6ca28d2008-10-06 13:55:43 +00003917{
3918 BlockDriverState *bs;
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003919 int result = 0;
aliguoric6ca28d2008-10-06 13:55:43 +00003920
Benoît Canetdc364f42014-01-23 21:31:32 +01003921 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02003922 AioContext *aio_context = bdrv_get_aio_context(bs);
3923 int ret;
3924
3925 aio_context_acquire(aio_context);
3926 ret = bdrv_flush(bs);
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003927 if (ret < 0 && !result) {
3928 result = ret;
3929 }
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02003930 aio_context_release(aio_context);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01003931 }
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003932
3933 return result;
aliguoric6ca28d2008-10-06 13:55:43 +00003934}
3935
Peter Lieven3ac21622013-06-28 12:47:42 +02003936int bdrv_has_zero_init_1(BlockDriverState *bs)
3937{
3938 return 1;
3939}
3940
Kevin Wolff2feebb2010-04-14 17:30:35 +02003941int bdrv_has_zero_init(BlockDriverState *bs)
3942{
3943 assert(bs->drv);
3944
Paolo Bonzini11212d82013-09-04 19:00:27 +02003945 /* If BS is a copy on write image, it is initialized to
3946 the contents of the base image, which may not be zeroes. */
3947 if (bs->backing_hd) {
3948 return 0;
3949 }
Kevin Wolf336c1c12010-07-28 11:26:29 +02003950 if (bs->drv->bdrv_has_zero_init) {
3951 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02003952 }
3953
Peter Lieven3ac21622013-06-28 12:47:42 +02003954 /* safe default */
3955 return 0;
Kevin Wolff2feebb2010-04-14 17:30:35 +02003956}
3957
Peter Lieven4ce78692013-10-24 12:06:54 +02003958bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
3959{
3960 BlockDriverInfo bdi;
3961
3962 if (bs->backing_hd) {
3963 return false;
3964 }
3965
3966 if (bdrv_get_info(bs, &bdi) == 0) {
3967 return bdi.unallocated_blocks_are_zero;
3968 }
3969
3970 return false;
3971}
3972
3973bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
3974{
3975 BlockDriverInfo bdi;
3976
3977 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
3978 return false;
3979 }
3980
3981 if (bdrv_get_info(bs, &bdi) == 0) {
3982 return bdi.can_write_zeroes_with_unmap;
3983 }
3984
3985 return false;
3986}
3987
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003988typedef struct BdrvCoGetBlockStatusData {
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003989 BlockDriverState *bs;
Miroslav Rezaninab35b2bb2013-02-13 09:09:39 +01003990 BlockDriverState *base;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003991 int64_t sector_num;
3992 int nb_sectors;
3993 int *pnum;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003994 int64_t ret;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003995 bool done;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003996} BdrvCoGetBlockStatusData;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003997
thsf58c7b32008-06-05 21:53:49 +00003998/*
3999 * Returns true iff the specified sector is present in the disk image. Drivers
4000 * not implementing the functionality are assumed to not support backing files,
4001 * hence all their sectors are reported as allocated.
4002 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004003 * If 'sector_num' is beyond the end of the disk image the return value is 0
4004 * and 'pnum' is set to 0.
4005 *
thsf58c7b32008-06-05 21:53:49 +00004006 * 'pnum' is set to the number of sectors (including and immediately following
4007 * the specified sector) that are known to be in the same
4008 * allocated/unallocated state.
4009 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004010 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
4011 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00004012 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004013static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
4014 int64_t sector_num,
4015 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00004016{
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004017 int64_t total_sectors;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004018 int64_t n;
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004019 int64_t ret, ret2;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004020
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004021 total_sectors = bdrv_nb_sectors(bs);
4022 if (total_sectors < 0) {
4023 return total_sectors;
Paolo Bonzini617ccb42013-09-04 19:00:23 +02004024 }
4025
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004026 if (sector_num >= total_sectors) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004027 *pnum = 0;
4028 return 0;
4029 }
4030
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004031 n = total_sectors - sector_num;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004032 if (n < nb_sectors) {
4033 nb_sectors = n;
4034 }
4035
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004036 if (!bs->drv->bdrv_co_get_block_status) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004037 *pnum = nb_sectors;
Kevin Wolfe88ae222014-05-06 15:25:36 +02004038 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
Paolo Bonzini918e92d2013-09-04 19:00:37 +02004039 if (bs->drv->protocol_name) {
4040 ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
4041 }
4042 return ret;
thsf58c7b32008-06-05 21:53:49 +00004043 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004044
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004045 ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
4046 if (ret < 0) {
Peter Lieven3e0a2332013-09-24 15:35:08 +02004047 *pnum = 0;
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004048 return ret;
4049 }
4050
Peter Lieven92bc50a2013-10-08 14:43:14 +02004051 if (ret & BDRV_BLOCK_RAW) {
4052 assert(ret & BDRV_BLOCK_OFFSET_VALID);
4053 return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
4054 *pnum, pnum);
4055 }
4056
Kevin Wolfe88ae222014-05-06 15:25:36 +02004057 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
4058 ret |= BDRV_BLOCK_ALLOCATED;
4059 }
4060
Peter Lievenc3d86882013-10-24 12:07:04 +02004061 if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
4062 if (bdrv_unallocated_blocks_are_zero(bs)) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02004063 ret |= BDRV_BLOCK_ZERO;
Peter Lieven1f9db222013-09-24 15:35:09 +02004064 } else if (bs->backing_hd) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02004065 BlockDriverState *bs2 = bs->backing_hd;
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004066 int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
4067 if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02004068 ret |= BDRV_BLOCK_ZERO;
4069 }
4070 }
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004071 }
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004072
4073 if (bs->file &&
4074 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
4075 (ret & BDRV_BLOCK_OFFSET_VALID)) {
4076 ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
4077 *pnum, pnum);
4078 if (ret2 >= 0) {
4079 /* Ignore errors. This is just providing extra information, it
4080 * is useful but not necessary.
4081 */
4082 ret |= (ret2 & BDRV_BLOCK_ZERO);
4083 }
4084 }
4085
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004086 return ret;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004087}
4088
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004089/* Coroutine wrapper for bdrv_get_block_status() */
4090static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004091{
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004092 BdrvCoGetBlockStatusData *data = opaque;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004093 BlockDriverState *bs = data->bs;
4094
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004095 data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
4096 data->pnum);
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004097 data->done = true;
4098}
4099
4100/*
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004101 * Synchronous wrapper around bdrv_co_get_block_status().
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004102 *
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004103 * See bdrv_co_get_block_status() for details.
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004104 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004105int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
4106 int nb_sectors, int *pnum)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004107{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004108 Coroutine *co;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004109 BdrvCoGetBlockStatusData data = {
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004110 .bs = bs,
4111 .sector_num = sector_num,
4112 .nb_sectors = nb_sectors,
4113 .pnum = pnum,
4114 .done = false,
4115 };
4116
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004117 if (qemu_in_coroutine()) {
4118 /* Fast-path if already in coroutine context */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004119 bdrv_get_block_status_co_entry(&data);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004120 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004121 AioContext *aio_context = bdrv_get_aio_context(bs);
4122
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004123 co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004124 qemu_coroutine_enter(co, &data);
4125 while (!data.done) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004126 aio_poll(aio_context, true);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004127 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004128 }
4129 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00004130}
4131
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004132int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
4133 int nb_sectors, int *pnum)
4134{
Paolo Bonzini4333bb72013-09-04 19:00:29 +02004135 int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
4136 if (ret < 0) {
4137 return ret;
4138 }
Kevin Wolf01fb2702014-07-07 17:00:37 +02004139 return !!(ret & BDRV_BLOCK_ALLOCATED);
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004140}
4141
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004142/*
4143 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
4144 *
4145 * Return true if the given sector is allocated in any image between
4146 * BASE and TOP (inclusive). BASE can be NULL to check if the given
4147 * sector is allocated in any image of the chain. Return false otherwise.
4148 *
4149 * 'pnum' is set to the number of sectors (including and immediately following
4150 * the specified sector) that are known to be in the same
4151 * allocated/unallocated state.
4152 *
4153 */
Paolo Bonzini4f578632013-09-04 19:00:24 +02004154int bdrv_is_allocated_above(BlockDriverState *top,
4155 BlockDriverState *base,
4156 int64_t sector_num,
4157 int nb_sectors, int *pnum)
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004158{
4159 BlockDriverState *intermediate;
4160 int ret, n = nb_sectors;
4161
4162 intermediate = top;
4163 while (intermediate && intermediate != base) {
4164 int pnum_inter;
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004165 ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
4166 &pnum_inter);
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004167 if (ret < 0) {
4168 return ret;
4169 } else if (ret) {
4170 *pnum = pnum_inter;
4171 return 1;
4172 }
4173
4174 /*
4175 * [sector_num, nb_sectors] is unallocated on top but intermediate
4176 * might have
4177 *
4178 * [sector_num+x, nr_sectors] allocated.
4179 */
Vishvananda Ishaya63ba17d2013-01-24 10:02:08 -08004180 if (n > pnum_inter &&
4181 (intermediate == top ||
4182 sector_num + pnum_inter < intermediate->total_sectors)) {
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004183 n = pnum_inter;
4184 }
4185
4186 intermediate = intermediate->backing_hd;
4187 }
4188
4189 *pnum = n;
4190 return 0;
4191}
4192
aliguori045df332009-03-05 23:00:48 +00004193const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
4194{
4195 if (bs->backing_hd && bs->backing_hd->encrypted)
4196 return bs->backing_file;
4197 else if (bs->encrypted)
4198 return bs->filename;
4199 else
4200 return NULL;
4201}
4202
ths5fafdf22007-09-16 21:08:06 +00004203void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00004204 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00004205{
Kevin Wolf3574c602011-10-26 11:02:11 +02004206 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00004207}
4208
ths5fafdf22007-09-16 21:08:06 +00004209int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00004210 const uint8_t *buf, int nb_sectors)
4211{
4212 BlockDriver *drv = bs->drv;
4213 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00004214 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00004215 if (!drv->bdrv_write_compressed)
4216 return -ENOTSUP;
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02004217 if (bdrv_check_request(bs, sector_num, nb_sectors))
4218 return -EIO;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004219
Fam Zhenge4654d22013-11-13 18:29:43 +08004220 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004221
bellardfaea38e2006-08-05 21:31:00 +00004222 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
4223}
ths3b46e622007-09-17 08:09:54 +00004224
bellardfaea38e2006-08-05 21:31:00 +00004225int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
4226{
4227 BlockDriver *drv = bs->drv;
4228 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00004229 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00004230 if (!drv->bdrv_get_info)
4231 return -ENOTSUP;
4232 memset(bdi, 0, sizeof(*bdi));
4233 return drv->bdrv_get_info(bs, bdi);
4234}
4235
Max Reitzeae041f2013-10-09 10:46:16 +02004236ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
4237{
4238 BlockDriver *drv = bs->drv;
4239 if (drv && drv->bdrv_get_specific_info) {
4240 return drv->bdrv_get_specific_info(bs);
4241 }
4242 return NULL;
4243}
4244
Christoph Hellwig45566e92009-07-10 23:11:57 +02004245int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
4246 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00004247{
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004248 QEMUIOVector qiov;
4249 struct iovec iov = {
4250 .iov_base = (void *) buf,
4251 .iov_len = size,
4252 };
4253
4254 qemu_iovec_init_external(&qiov, &iov, 1);
4255 return bdrv_writev_vmstate(bs, &qiov, pos);
4256}
4257
4258int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
4259{
aliguori178e08a2009-04-05 19:10:55 +00004260 BlockDriver *drv = bs->drv;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004261
4262 if (!drv) {
aliguori178e08a2009-04-05 19:10:55 +00004263 return -ENOMEDIUM;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004264 } else if (drv->bdrv_save_vmstate) {
4265 return drv->bdrv_save_vmstate(bs, qiov, pos);
4266 } else if (bs->file) {
4267 return bdrv_writev_vmstate(bs->file, qiov, pos);
4268 }
4269
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004270 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004271}
4272
Christoph Hellwig45566e92009-07-10 23:11:57 +02004273int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
4274 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00004275{
4276 BlockDriver *drv = bs->drv;
4277 if (!drv)
4278 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004279 if (drv->bdrv_load_vmstate)
4280 return drv->bdrv_load_vmstate(bs, buf, pos, size);
4281 if (bs->file)
4282 return bdrv_load_vmstate(bs->file, buf, pos, size);
4283 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004284}
4285
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004286void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
4287{
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004288 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004289 return;
4290 }
4291
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004292 bs->drv->bdrv_debug_event(bs, event);
Kevin Wolf41c695c2012-12-06 14:32:58 +01004293}
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004294
Kevin Wolf41c695c2012-12-06 14:32:58 +01004295int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
4296 const char *tag)
4297{
4298 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
4299 bs = bs->file;
4300 }
4301
4302 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
4303 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
4304 }
4305
4306 return -ENOTSUP;
4307}
4308
Fam Zheng4cc70e92013-11-20 10:01:54 +08004309int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
4310{
4311 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
4312 bs = bs->file;
4313 }
4314
4315 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
4316 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
4317 }
4318
4319 return -ENOTSUP;
4320}
4321
Kevin Wolf41c695c2012-12-06 14:32:58 +01004322int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
4323{
Max Reitz938789e2014-03-10 23:44:08 +01004324 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
Kevin Wolf41c695c2012-12-06 14:32:58 +01004325 bs = bs->file;
4326 }
4327
4328 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
4329 return bs->drv->bdrv_debug_resume(bs, tag);
4330 }
4331
4332 return -ENOTSUP;
4333}
4334
4335bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
4336{
4337 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
4338 bs = bs->file;
4339 }
4340
4341 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
4342 return bs->drv->bdrv_debug_is_suspended(bs, tag);
4343 }
4344
4345 return false;
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004346}
4347
Blue Swirl199630b2010-07-25 20:49:34 +00004348int bdrv_is_snapshot(BlockDriverState *bs)
4349{
4350 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
4351}
4352
Jeff Codyb1b1d782012-10-16 15:49:09 -04004353/* backing_file can either be relative, or absolute, or a protocol. If it is
4354 * relative, it must be relative to the chain. So, passing in bs->filename
4355 * from a BDS as backing_file should not be done, as that may be relative to
4356 * the CWD rather than the chain. */
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004357BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
4358 const char *backing_file)
4359{
Jeff Codyb1b1d782012-10-16 15:49:09 -04004360 char *filename_full = NULL;
4361 char *backing_file_full = NULL;
4362 char *filename_tmp = NULL;
4363 int is_protocol = 0;
4364 BlockDriverState *curr_bs = NULL;
4365 BlockDriverState *retval = NULL;
4366
4367 if (!bs || !bs->drv || !backing_file) {
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004368 return NULL;
4369 }
4370
Jeff Codyb1b1d782012-10-16 15:49:09 -04004371 filename_full = g_malloc(PATH_MAX);
4372 backing_file_full = g_malloc(PATH_MAX);
4373 filename_tmp = g_malloc(PATH_MAX);
4374
4375 is_protocol = path_has_protocol(backing_file);
4376
4377 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
4378
4379 /* If either of the filename paths is actually a protocol, then
4380 * compare unmodified paths; otherwise make paths relative */
4381 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
4382 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
4383 retval = curr_bs->backing_hd;
4384 break;
4385 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004386 } else {
Jeff Codyb1b1d782012-10-16 15:49:09 -04004387 /* If not an absolute filename path, make it relative to the current
4388 * image's filename path */
4389 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4390 backing_file);
4391
4392 /* We are going to compare absolute pathnames */
4393 if (!realpath(filename_tmp, filename_full)) {
4394 continue;
4395 }
4396
4397 /* We need to make sure the backing filename we are comparing against
4398 * is relative to the current image filename (or absolute) */
4399 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4400 curr_bs->backing_file);
4401
4402 if (!realpath(filename_tmp, backing_file_full)) {
4403 continue;
4404 }
4405
4406 if (strcmp(backing_file_full, filename_full) == 0) {
4407 retval = curr_bs->backing_hd;
4408 break;
4409 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004410 }
4411 }
4412
Jeff Codyb1b1d782012-10-16 15:49:09 -04004413 g_free(filename_full);
4414 g_free(backing_file_full);
4415 g_free(filename_tmp);
4416 return retval;
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004417}
4418
Benoît Canetf198fd12012-08-02 10:22:47 +02004419int bdrv_get_backing_file_depth(BlockDriverState *bs)
4420{
4421 if (!bs->drv) {
4422 return 0;
4423 }
4424
4425 if (!bs->backing_hd) {
4426 return 0;
4427 }
4428
4429 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
4430}
4431
bellard83f64092006-08-01 16:21:11 +00004432/**************************************************************/
4433/* async I/Os */
4434
aliguori3b69e4b2009-01-22 16:59:24 +00004435BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
aliguorif141eaf2009-04-07 18:43:24 +00004436 QEMUIOVector *qiov, int nb_sectors,
aliguori3b69e4b2009-01-22 16:59:24 +00004437 BlockDriverCompletionFunc *cb, void *opaque)
4438{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004439 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
4440
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004441 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004442 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00004443}
4444
aliguorif141eaf2009-04-07 18:43:24 +00004445BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
4446 QEMUIOVector *qiov, int nb_sectors,
4447 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004448{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004449 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
4450
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004451 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004452 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00004453}
4454
Paolo Bonzinid5ef94d2013-11-22 13:39:46 +01004455BlockDriverAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
4456 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
4457 BlockDriverCompletionFunc *cb, void *opaque)
4458{
4459 trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
4460
4461 return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
4462 BDRV_REQ_ZERO_WRITE | flags,
4463 cb, opaque, true);
4464}
4465
Kevin Wolf40b4f532009-09-09 17:53:37 +02004466
4467typedef struct MultiwriteCB {
4468 int error;
4469 int num_requests;
4470 int num_callbacks;
4471 struct {
4472 BlockDriverCompletionFunc *cb;
4473 void *opaque;
4474 QEMUIOVector *free_qiov;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004475 } callbacks[];
4476} MultiwriteCB;
4477
4478static void multiwrite_user_cb(MultiwriteCB *mcb)
4479{
4480 int i;
4481
4482 for (i = 0; i < mcb->num_callbacks; i++) {
4483 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01004484 if (mcb->callbacks[i].free_qiov) {
4485 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
4486 }
Anthony Liguori7267c092011-08-20 22:09:37 -05004487 g_free(mcb->callbacks[i].free_qiov);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004488 }
4489}
4490
4491static void multiwrite_cb(void *opaque, int ret)
4492{
4493 MultiwriteCB *mcb = opaque;
4494
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004495 trace_multiwrite_cb(mcb, ret);
4496
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02004497 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02004498 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004499 }
4500
4501 mcb->num_requests--;
4502 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02004503 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05004504 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004505 }
4506}
4507
4508static int multiwrite_req_compare(const void *a, const void *b)
4509{
Christoph Hellwig77be4362010-05-19 20:53:10 +02004510 const BlockRequest *req1 = a, *req2 = b;
4511
4512 /*
4513 * Note that we can't simply subtract req2->sector from req1->sector
4514 * here as that could overflow the return value.
4515 */
4516 if (req1->sector > req2->sector) {
4517 return 1;
4518 } else if (req1->sector < req2->sector) {
4519 return -1;
4520 } else {
4521 return 0;
4522 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02004523}
4524
4525/*
4526 * Takes a bunch of requests and tries to merge them. Returns the number of
4527 * requests that remain after merging.
4528 */
4529static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
4530 int num_reqs, MultiwriteCB *mcb)
4531{
4532 int i, outidx;
4533
4534 // Sort requests by start sector
4535 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
4536
4537 // Check if adjacent requests touch the same clusters. If so, combine them,
4538 // filling up gaps with zero sectors.
4539 outidx = 0;
4540 for (i = 1; i < num_reqs; i++) {
4541 int merge = 0;
4542 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
4543
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004544 // Handle exactly sequential writes and overlapping writes.
Kevin Wolf40b4f532009-09-09 17:53:37 +02004545 if (reqs[i].sector <= oldreq_last) {
4546 merge = 1;
4547 }
4548
Christoph Hellwige2a305f2010-01-26 14:49:08 +01004549 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
4550 merge = 0;
4551 }
4552
Kevin Wolf40b4f532009-09-09 17:53:37 +02004553 if (merge) {
4554 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05004555 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004556 qemu_iovec_init(qiov,
4557 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
4558
4559 // Add the first request to the merged one. If the requests are
4560 // overlapping, drop the last sectors of the first request.
4561 size = (reqs[i].sector - reqs[outidx].sector) << 9;
Michael Tokarev1b093c42012-03-12 21:28:06 +04004562 qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004563
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004564 // We should need to add any zeros between the two requests
4565 assert (reqs[i].sector <= oldreq_last);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004566
4567 // Add the second request
Michael Tokarev1b093c42012-03-12 21:28:06 +04004568 qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004569
Stefan Hajnoczi391827e2014-07-30 09:53:30 +01004570 // Add tail of first request, if necessary
4571 if (qiov->size < reqs[outidx].qiov->size) {
4572 qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
4573 reqs[outidx].qiov->size - qiov->size);
4574 }
4575
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02004576 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004577 reqs[outidx].qiov = qiov;
4578
4579 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
4580 } else {
4581 outidx++;
4582 reqs[outidx].sector = reqs[i].sector;
4583 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
4584 reqs[outidx].qiov = reqs[i].qiov;
4585 }
4586 }
4587
4588 return outidx + 1;
4589}
4590
4591/*
4592 * Submit multiple AIO write requests at once.
4593 *
4594 * On success, the function returns 0 and all requests in the reqs array have
4595 * been submitted. In error case this function returns -1, and any of the
4596 * requests may or may not be submitted yet. In particular, this means that the
4597 * callback will be called for some of the requests, for others it won't. The
4598 * caller must check the error field of the BlockRequest to wait for the right
4599 * callbacks (if error != 0, no callback will be called).
4600 *
4601 * The implementation may modify the contents of the reqs array, e.g. to merge
4602 * requests. However, the fields opaque and error are left unmodified as they
4603 * are used to signal failure for a single request to the caller.
4604 */
4605int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
4606{
Kevin Wolf40b4f532009-09-09 17:53:37 +02004607 MultiwriteCB *mcb;
4608 int i;
4609
Ryan Harper301db7c2011-03-07 10:01:04 -06004610 /* don't submit writes if we don't have a medium */
4611 if (bs->drv == NULL) {
4612 for (i = 0; i < num_reqs; i++) {
4613 reqs[i].error = -ENOMEDIUM;
4614 }
4615 return -1;
4616 }
4617
Kevin Wolf40b4f532009-09-09 17:53:37 +02004618 if (num_reqs == 0) {
4619 return 0;
4620 }
4621
4622 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05004623 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004624 mcb->num_requests = 0;
4625 mcb->num_callbacks = num_reqs;
4626
4627 for (i = 0; i < num_reqs; i++) {
4628 mcb->callbacks[i].cb = reqs[i].cb;
4629 mcb->callbacks[i].opaque = reqs[i].opaque;
4630 }
4631
4632 // Check for mergable requests
4633 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
4634
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004635 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
4636
Paolo Bonzinidf9309f2011-11-14 17:50:50 +01004637 /* Run the aio requests. */
4638 mcb->num_requests = num_reqs;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004639 for (i = 0; i < num_reqs; i++) {
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004640 bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
4641 reqs[i].nb_sectors, reqs[i].flags,
4642 multiwrite_cb, mcb,
4643 true);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004644 }
4645
4646 return 0;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004647}
4648
bellard83f64092006-08-01 16:21:11 +00004649void bdrv_aio_cancel(BlockDriverAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00004650{
Fam Zhengca5fd112014-09-11 13:41:27 +08004651 qemu_aio_ref(acb);
4652 bdrv_aio_cancel_async(acb);
4653 while (acb->refcnt > 1) {
4654 if (acb->aiocb_info->get_aio_context) {
4655 aio_poll(acb->aiocb_info->get_aio_context(acb), true);
4656 } else if (acb->bs) {
4657 aio_poll(bdrv_get_aio_context(acb->bs), true);
4658 } else {
4659 abort();
Fam Zheng02c50ef2014-09-11 13:41:09 +08004660 }
Fam Zheng02c50ef2014-09-11 13:41:09 +08004661 }
Fam Zheng80074292014-09-11 13:41:28 +08004662 qemu_aio_unref(acb);
Fam Zheng02c50ef2014-09-11 13:41:09 +08004663}
4664
4665/* Async version of aio cancel. The caller is not blocked if the acb implements
4666 * cancel_async, otherwise we do nothing and let the request normally complete.
4667 * In either case the completion callback must be called. */
4668void bdrv_aio_cancel_async(BlockDriverAIOCB *acb)
4669{
4670 if (acb->aiocb_info->cancel_async) {
4671 acb->aiocb_info->cancel_async(acb);
4672 }
bellard83f64092006-08-01 16:21:11 +00004673}
4674
4675/**************************************************************/
4676/* async block device emulation */
4677
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004678typedef struct BlockDriverAIOCBSync {
4679 BlockDriverAIOCB common;
4680 QEMUBH *bh;
4681 int ret;
4682 /* vector translation state */
4683 QEMUIOVector *qiov;
4684 uint8_t *bounce;
4685 int is_write;
4686} BlockDriverAIOCBSync;
4687
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004688static const AIOCBInfo bdrv_em_aiocb_info = {
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004689 .aiocb_size = sizeof(BlockDriverAIOCBSync),
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004690};
4691
bellard83f64092006-08-01 16:21:11 +00004692static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00004693{
pbrookce1a14d2006-08-07 02:38:06 +00004694 BlockDriverAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00004695
Kevin Wolf857d4f42014-05-20 13:16:51 +02004696 if (!acb->is_write && acb->ret >= 0) {
Michael Tokarev03396142012-06-07 20:17:55 +04004697 qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
Kevin Wolf857d4f42014-05-20 13:16:51 +02004698 }
aliguoriceb42de2009-04-07 18:43:28 +00004699 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00004700 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03004701 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03004702 acb->bh = NULL;
Fam Zheng80074292014-09-11 13:41:28 +08004703 qemu_aio_unref(acb);
bellardbeac80c2006-06-26 20:08:57 +00004704}
bellardbeac80c2006-06-26 20:08:57 +00004705
aliguorif141eaf2009-04-07 18:43:24 +00004706static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
4707 int64_t sector_num,
4708 QEMUIOVector *qiov,
4709 int nb_sectors,
4710 BlockDriverCompletionFunc *cb,
4711 void *opaque,
4712 int is_write)
4713
bellardea2384d2004-08-01 21:59:26 +00004714{
pbrookce1a14d2006-08-07 02:38:06 +00004715 BlockDriverAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00004716
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004717 acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00004718 acb->is_write = is_write;
4719 acb->qiov = qiov;
Kevin Wolf857d4f42014-05-20 13:16:51 +02004720 acb->bounce = qemu_try_blockalign(bs, qiov->size);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004721 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00004722
Kevin Wolf857d4f42014-05-20 13:16:51 +02004723 if (acb->bounce == NULL) {
4724 acb->ret = -ENOMEM;
4725 } else if (is_write) {
Michael Tokarevd5e6b162012-06-07 20:21:06 +04004726 qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004727 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004728 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004729 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004730 }
4731
pbrookce1a14d2006-08-07 02:38:06 +00004732 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00004733
pbrookce1a14d2006-08-07 02:38:06 +00004734 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00004735}
4736
aliguorif141eaf2009-04-07 18:43:24 +00004737static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
4738 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +00004739 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004740{
aliguorif141eaf2009-04-07 18:43:24 +00004741 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00004742}
4743
aliguorif141eaf2009-04-07 18:43:24 +00004744static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
4745 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
4746 BlockDriverCompletionFunc *cb, void *opaque)
4747{
4748 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
4749}
4750
Kevin Wolf68485422011-06-30 10:05:46 +02004751
4752typedef struct BlockDriverAIOCBCoroutine {
4753 BlockDriverAIOCB common;
4754 BlockRequest req;
4755 bool is_write;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004756 bool *done;
Kevin Wolf68485422011-06-30 10:05:46 +02004757 QEMUBH* bh;
4758} BlockDriverAIOCBCoroutine;
4759
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004760static const AIOCBInfo bdrv_em_co_aiocb_info = {
Kevin Wolf68485422011-06-30 10:05:46 +02004761 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
Kevin Wolf68485422011-06-30 10:05:46 +02004762};
4763
Paolo Bonzini35246a62011-10-14 10:41:29 +02004764static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02004765{
4766 BlockDriverAIOCBCoroutine *acb = opaque;
4767
4768 acb->common.cb(acb->common.opaque, acb->req.error);
Kevin Wolfd318aea2012-11-13 16:35:08 +01004769
Kevin Wolf68485422011-06-30 10:05:46 +02004770 qemu_bh_delete(acb->bh);
Fam Zheng80074292014-09-11 13:41:28 +08004771 qemu_aio_unref(acb);
Kevin Wolf68485422011-06-30 10:05:46 +02004772}
4773
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004774/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4775static void coroutine_fn bdrv_co_do_rw(void *opaque)
4776{
4777 BlockDriverAIOCBCoroutine *acb = opaque;
4778 BlockDriverState *bs = acb->common.bs;
4779
4780 if (!acb->is_write) {
4781 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004782 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004783 } else {
4784 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004785 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004786 }
4787
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004788 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004789 qemu_bh_schedule(acb->bh);
4790}
4791
Kevin Wolf68485422011-06-30 10:05:46 +02004792static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
4793 int64_t sector_num,
4794 QEMUIOVector *qiov,
4795 int nb_sectors,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004796 BdrvRequestFlags flags,
Kevin Wolf68485422011-06-30 10:05:46 +02004797 BlockDriverCompletionFunc *cb,
4798 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004799 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02004800{
4801 Coroutine *co;
4802 BlockDriverAIOCBCoroutine *acb;
4803
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004804 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Kevin Wolf68485422011-06-30 10:05:46 +02004805 acb->req.sector = sector_num;
4806 acb->req.nb_sectors = nb_sectors;
4807 acb->req.qiov = qiov;
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004808 acb->req.flags = flags;
Kevin Wolf68485422011-06-30 10:05:46 +02004809 acb->is_write = is_write;
4810
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004811 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02004812 qemu_coroutine_enter(co, acb);
4813
4814 return &acb->common;
4815}
4816
Paolo Bonzini07f07612011-10-17 12:32:12 +02004817static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004818{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004819 BlockDriverAIOCBCoroutine *acb = opaque;
4820 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004821
Paolo Bonzini07f07612011-10-17 12:32:12 +02004822 acb->req.error = bdrv_co_flush(bs);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004823 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004824 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004825}
4826
Paolo Bonzini07f07612011-10-17 12:32:12 +02004827BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Alexander Graf016f5cf2010-05-26 17:51:49 +02004828 BlockDriverCompletionFunc *cb, void *opaque)
4829{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004830 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004831
Paolo Bonzini07f07612011-10-17 12:32:12 +02004832 Coroutine *co;
4833 BlockDriverAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02004834
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004835 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Kevin Wolfd318aea2012-11-13 16:35:08 +01004836
Paolo Bonzini07f07612011-10-17 12:32:12 +02004837 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
4838 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004839
Alexander Graf016f5cf2010-05-26 17:51:49 +02004840 return &acb->common;
4841}
4842
Paolo Bonzini4265d622011-10-17 12:32:14 +02004843static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
4844{
4845 BlockDriverAIOCBCoroutine *acb = opaque;
4846 BlockDriverState *bs = acb->common.bs;
4847
4848 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004849 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
Paolo Bonzini4265d622011-10-17 12:32:14 +02004850 qemu_bh_schedule(acb->bh);
4851}
4852
4853BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
4854 int64_t sector_num, int nb_sectors,
4855 BlockDriverCompletionFunc *cb, void *opaque)
4856{
4857 Coroutine *co;
4858 BlockDriverAIOCBCoroutine *acb;
4859
4860 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
4861
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004862 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Paolo Bonzini4265d622011-10-17 12:32:14 +02004863 acb->req.sector = sector_num;
4864 acb->req.nb_sectors = nb_sectors;
4865 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
4866 qemu_coroutine_enter(co, acb);
4867
4868 return &acb->common;
4869}
4870
bellardea2384d2004-08-01 21:59:26 +00004871void bdrv_init(void)
4872{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05004873 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00004874}
pbrookce1a14d2006-08-07 02:38:06 +00004875
Markus Armbrustereb852012009-10-27 18:41:44 +01004876void bdrv_init_with_whitelist(void)
4877{
4878 use_bdrv_whitelist = 1;
4879 bdrv_init();
4880}
4881
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004882void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004883 BlockDriverCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00004884{
pbrookce1a14d2006-08-07 02:38:06 +00004885 BlockDriverAIOCB *acb;
4886
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004887 acb = g_slice_alloc(aiocb_info->aiocb_size);
4888 acb->aiocb_info = aiocb_info;
pbrookce1a14d2006-08-07 02:38:06 +00004889 acb->bs = bs;
4890 acb->cb = cb;
4891 acb->opaque = opaque;
Fam Zhengf197fe22014-09-11 13:41:08 +08004892 acb->refcnt = 1;
pbrookce1a14d2006-08-07 02:38:06 +00004893 return acb;
4894}
4895
Fam Zhengf197fe22014-09-11 13:41:08 +08004896void qemu_aio_ref(void *p)
4897{
4898 BlockDriverAIOCB *acb = p;
4899 acb->refcnt++;
4900}
4901
Fam Zheng80074292014-09-11 13:41:28 +08004902void qemu_aio_unref(void *p)
pbrookce1a14d2006-08-07 02:38:06 +00004903{
Stefan Hajnoczid37c9752012-10-31 16:34:36 +01004904 BlockDriverAIOCB *acb = p;
Fam Zhengf197fe22014-09-11 13:41:08 +08004905 assert(acb->refcnt > 0);
4906 if (--acb->refcnt == 0) {
4907 g_slice_free1(acb->aiocb_info->aiocb_size, acb);
4908 }
pbrookce1a14d2006-08-07 02:38:06 +00004909}
bellard19cb3732006-08-19 11:45:59 +00004910
4911/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004912/* Coroutine block device emulation */
4913
4914typedef struct CoroutineIOCompletion {
4915 Coroutine *coroutine;
4916 int ret;
4917} CoroutineIOCompletion;
4918
4919static void bdrv_co_io_em_complete(void *opaque, int ret)
4920{
4921 CoroutineIOCompletion *co = opaque;
4922
4923 co->ret = ret;
4924 qemu_coroutine_enter(co->coroutine, NULL);
4925}
4926
4927static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
4928 int nb_sectors, QEMUIOVector *iov,
4929 bool is_write)
4930{
4931 CoroutineIOCompletion co = {
4932 .coroutine = qemu_coroutine_self(),
4933 };
4934 BlockDriverAIOCB *acb;
4935
4936 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01004937 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
4938 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004939 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01004940 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
4941 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004942 }
4943
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01004944 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004945 if (!acb) {
4946 return -EIO;
4947 }
4948 qemu_coroutine_yield();
4949
4950 return co.ret;
4951}
4952
4953static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
4954 int64_t sector_num, int nb_sectors,
4955 QEMUIOVector *iov)
4956{
4957 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
4958}
4959
4960static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
4961 int64_t sector_num, int nb_sectors,
4962 QEMUIOVector *iov)
4963{
4964 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
4965}
4966
Paolo Bonzini07f07612011-10-17 12:32:12 +02004967static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004968{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004969 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004970
Paolo Bonzini07f07612011-10-17 12:32:12 +02004971 rwco->ret = bdrv_co_flush(rwco->bs);
4972}
4973
4974int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
4975{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004976 int ret;
4977
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004978 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02004979 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004980 }
4981
Kevin Wolfca716362011-11-10 18:13:59 +01004982 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004983 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004984 if (bs->drv->bdrv_co_flush_to_os) {
4985 ret = bs->drv->bdrv_co_flush_to_os(bs);
4986 if (ret < 0) {
4987 return ret;
4988 }
4989 }
4990
Kevin Wolfca716362011-11-10 18:13:59 +01004991 /* But don't actually force it to the disk with cache=unsafe */
4992 if (bs->open_flags & BDRV_O_NO_FLUSH) {
Kevin Wolfd4c82322012-08-15 12:52:45 +02004993 goto flush_parent;
Kevin Wolfca716362011-11-10 18:13:59 +01004994 }
4995
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004996 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004997 if (bs->drv->bdrv_co_flush_to_disk) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004998 ret = bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02004999 } else if (bs->drv->bdrv_aio_flush) {
5000 BlockDriverAIOCB *acb;
5001 CoroutineIOCompletion co = {
5002 .coroutine = qemu_coroutine_self(),
5003 };
5004
5005 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
5006 if (acb == NULL) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005007 ret = -EIO;
Paolo Bonzini07f07612011-10-17 12:32:12 +02005008 } else {
5009 qemu_coroutine_yield();
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005010 ret = co.ret;
Paolo Bonzini07f07612011-10-17 12:32:12 +02005011 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02005012 } else {
5013 /*
5014 * Some block drivers always operate in either writethrough or unsafe
5015 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
5016 * know how the server works (because the behaviour is hardcoded or
5017 * depends on server-side configuration), so we can't ensure that
5018 * everything is safe on disk. Returning an error doesn't work because
5019 * that would break guests even if the server operates in writethrough
5020 * mode.
5021 *
5022 * Let's hope the user knows what he's doing.
5023 */
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005024 ret = 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005025 }
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005026 if (ret < 0) {
5027 return ret;
5028 }
5029
5030 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
5031 * in the case of cache=unsafe, so there are no useless flushes.
5032 */
Kevin Wolfd4c82322012-08-15 12:52:45 +02005033flush_parent:
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005034 return bdrv_co_flush(bs->file);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005035}
5036
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005037void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
Anthony Liguori0f154232011-11-14 15:09:45 -06005038{
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005039 Error *local_err = NULL;
5040 int ret;
5041
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005042 if (!bs->drv) {
5043 return;
Anthony Liguori0f154232011-11-14 15:09:45 -06005044 }
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005045
Alexey Kardashevskiy7ea2d262014-10-09 13:50:46 +11005046 if (!(bs->open_flags & BDRV_O_INCOMING)) {
5047 return;
5048 }
5049 bs->open_flags &= ~BDRV_O_INCOMING;
5050
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005051 if (bs->drv->bdrv_invalidate_cache) {
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005052 bs->drv->bdrv_invalidate_cache(bs, &local_err);
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005053 } else if (bs->file) {
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005054 bdrv_invalidate_cache(bs->file, &local_err);
5055 }
5056 if (local_err) {
5057 error_propagate(errp, local_err);
5058 return;
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005059 }
5060
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005061 ret = refresh_total_sectors(bs, bs->total_sectors);
5062 if (ret < 0) {
5063 error_setg_errno(errp, -ret, "Could not refresh total sector count");
5064 return;
5065 }
Anthony Liguori0f154232011-11-14 15:09:45 -06005066}
5067
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005068void bdrv_invalidate_cache_all(Error **errp)
Anthony Liguori0f154232011-11-14 15:09:45 -06005069{
5070 BlockDriverState *bs;
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005071 Error *local_err = NULL;
Anthony Liguori0f154232011-11-14 15:09:45 -06005072
Benoît Canetdc364f42014-01-23 21:31:32 +01005073 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02005074 AioContext *aio_context = bdrv_get_aio_context(bs);
5075
5076 aio_context_acquire(aio_context);
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005077 bdrv_invalidate_cache(bs, &local_err);
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02005078 aio_context_release(aio_context);
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005079 if (local_err) {
5080 error_propagate(errp, local_err);
5081 return;
5082 }
Anthony Liguori0f154232011-11-14 15:09:45 -06005083 }
5084}
5085
Paolo Bonzini07f07612011-10-17 12:32:12 +02005086int bdrv_flush(BlockDriverState *bs)
5087{
5088 Coroutine *co;
5089 RwCo rwco = {
5090 .bs = bs,
5091 .ret = NOT_DONE,
5092 };
5093
5094 if (qemu_in_coroutine()) {
5095 /* Fast-path if already in coroutine context */
5096 bdrv_flush_co_entry(&rwco);
5097 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005098 AioContext *aio_context = bdrv_get_aio_context(bs);
5099
Paolo Bonzini07f07612011-10-17 12:32:12 +02005100 co = qemu_coroutine_create(bdrv_flush_co_entry);
5101 qemu_coroutine_enter(co, &rwco);
5102 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005103 aio_poll(aio_context, true);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005104 }
5105 }
5106
5107 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005108}
5109
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005110typedef struct DiscardCo {
5111 BlockDriverState *bs;
5112 int64_t sector_num;
5113 int nb_sectors;
5114 int ret;
5115} DiscardCo;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005116static void coroutine_fn bdrv_discard_co_entry(void *opaque)
5117{
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005118 DiscardCo *rwco = opaque;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005119
5120 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
5121}
5122
Peter Lieven6f14da52013-10-24 12:06:59 +02005123/* if no limit is specified in the BlockLimits use a default
5124 * of 32768 512-byte sectors (16 MiB) per request.
5125 */
5126#define MAX_DISCARD_DEFAULT 32768
5127
Paolo Bonzini4265d622011-10-17 12:32:14 +02005128int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
5129 int nb_sectors)
5130{
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005131 int max_discard;
5132
Paolo Bonzini4265d622011-10-17 12:32:14 +02005133 if (!bs->drv) {
5134 return -ENOMEDIUM;
5135 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
5136 return -EIO;
5137 } else if (bs->read_only) {
5138 return -EROFS;
Paolo Bonzinidf702c92013-01-14 16:26:58 +01005139 }
5140
Fam Zhenge4654d22013-11-13 18:29:43 +08005141 bdrv_reset_dirty(bs, sector_num, nb_sectors);
Paolo Bonzinidf702c92013-01-14 16:26:58 +01005142
Paolo Bonzini9e8f1832013-02-08 14:06:11 +01005143 /* Do nothing if disabled. */
5144 if (!(bs->open_flags & BDRV_O_UNMAP)) {
5145 return 0;
5146 }
5147
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005148 if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
Paolo Bonzini4265d622011-10-17 12:32:14 +02005149 return 0;
5150 }
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005151
5152 max_discard = bs->bl.max_discard ? bs->bl.max_discard : MAX_DISCARD_DEFAULT;
5153 while (nb_sectors > 0) {
5154 int ret;
5155 int num = nb_sectors;
5156
5157 /* align request */
5158 if (bs->bl.discard_alignment &&
5159 num >= bs->bl.discard_alignment &&
5160 sector_num % bs->bl.discard_alignment) {
5161 if (num > bs->bl.discard_alignment) {
5162 num = bs->bl.discard_alignment;
5163 }
5164 num -= sector_num % bs->bl.discard_alignment;
5165 }
5166
5167 /* limit request size */
5168 if (num > max_discard) {
5169 num = max_discard;
5170 }
5171
5172 if (bs->drv->bdrv_co_discard) {
5173 ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
5174 } else {
5175 BlockDriverAIOCB *acb;
5176 CoroutineIOCompletion co = {
5177 .coroutine = qemu_coroutine_self(),
5178 };
5179
5180 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
5181 bdrv_co_io_em_complete, &co);
5182 if (acb == NULL) {
5183 return -EIO;
5184 } else {
5185 qemu_coroutine_yield();
5186 ret = co.ret;
5187 }
5188 }
Paolo Bonzini7ce21012013-11-22 13:39:47 +01005189 if (ret && ret != -ENOTSUP) {
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005190 return ret;
5191 }
5192
5193 sector_num += num;
5194 nb_sectors -= num;
5195 }
5196 return 0;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005197}
5198
5199int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
5200{
5201 Coroutine *co;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005202 DiscardCo rwco = {
Paolo Bonzini4265d622011-10-17 12:32:14 +02005203 .bs = bs,
5204 .sector_num = sector_num,
5205 .nb_sectors = nb_sectors,
5206 .ret = NOT_DONE,
5207 };
5208
5209 if (qemu_in_coroutine()) {
5210 /* Fast-path if already in coroutine context */
5211 bdrv_discard_co_entry(&rwco);
5212 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005213 AioContext *aio_context = bdrv_get_aio_context(bs);
5214
Paolo Bonzini4265d622011-10-17 12:32:14 +02005215 co = qemu_coroutine_create(bdrv_discard_co_entry);
5216 qemu_coroutine_enter(co, &rwco);
5217 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005218 aio_poll(aio_context, true);
Paolo Bonzini4265d622011-10-17 12:32:14 +02005219 }
5220 }
5221
5222 return rwco.ret;
5223}
5224
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005225/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00005226/* removable device support */
5227
5228/**
5229 * Return TRUE if the media is present
5230 */
5231int bdrv_is_inserted(BlockDriverState *bs)
5232{
5233 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02005234
bellard19cb3732006-08-19 11:45:59 +00005235 if (!drv)
5236 return 0;
5237 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02005238 return 1;
5239 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00005240}
5241
5242/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02005243 * Return whether the media changed since the last call to this
5244 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00005245 */
5246int bdrv_media_changed(BlockDriverState *bs)
5247{
5248 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00005249
Markus Armbruster8e49ca42011-08-03 15:08:08 +02005250 if (drv && drv->bdrv_media_changed) {
5251 return drv->bdrv_media_changed(bs);
5252 }
5253 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00005254}
5255
5256/**
5257 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
5258 */
Luiz Capitulinof36f3942012-02-03 16:24:53 -02005259void bdrv_eject(BlockDriverState *bs, bool eject_flag)
bellard19cb3732006-08-19 11:45:59 +00005260{
5261 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00005262
Markus Armbruster822e1cd2011-07-20 18:23:42 +02005263 if (drv && drv->bdrv_eject) {
5264 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00005265 }
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02005266
5267 if (bs->device_name[0] != '\0') {
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +02005268 qapi_event_send_device_tray_moved(bdrv_get_device_name(bs),
5269 eject_flag, &error_abort);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02005270 }
bellard19cb3732006-08-19 11:45:59 +00005271}
5272
bellard19cb3732006-08-19 11:45:59 +00005273/**
5274 * Lock or unlock the media (if it is locked, the user won't be able
5275 * to eject it manually).
5276 */
Markus Armbruster025e8492011-09-06 18:58:47 +02005277void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00005278{
5279 BlockDriver *drv = bs->drv;
5280
Markus Armbruster025e8492011-09-06 18:58:47 +02005281 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01005282
Markus Armbruster025e8492011-09-06 18:58:47 +02005283 if (drv && drv->bdrv_lock_medium) {
5284 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00005285 }
5286}
ths985a03b2007-12-24 16:10:43 +00005287
5288/* needed for generic scsi interface */
5289
5290int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
5291{
5292 BlockDriver *drv = bs->drv;
5293
5294 if (drv && drv->bdrv_ioctl)
5295 return drv->bdrv_ioctl(bs, req, buf);
5296 return -ENOTSUP;
5297}
aliguori7d780662009-03-12 19:57:08 +00005298
aliguori221f7152009-03-28 17:28:41 +00005299BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
5300 unsigned long int req, void *buf,
5301 BlockDriverCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00005302{
aliguori221f7152009-03-28 17:28:41 +00005303 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00005304
aliguori221f7152009-03-28 17:28:41 +00005305 if (drv && drv->bdrv_aio_ioctl)
5306 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
5307 return NULL;
aliguori7d780662009-03-12 19:57:08 +00005308}
aliguorie268ca52009-04-22 20:20:00 +00005309
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005310void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005311{
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005312 bs->guest_block_size = align;
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005313}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005314
aliguorie268ca52009-04-22 20:20:00 +00005315void *qemu_blockalign(BlockDriverState *bs, size_t size)
5316{
Kevin Wolf339064d2013-11-28 10:23:32 +01005317 return qemu_memalign(bdrv_opt_mem_align(bs), size);
aliguorie268ca52009-04-22 20:20:00 +00005318}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005319
Kevin Wolf7d2a35c2014-05-20 12:24:05 +02005320void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
5321{
5322 size_t align = bdrv_opt_mem_align(bs);
5323
5324 /* Ensure that NULL is never returned on success */
5325 assert(align > 0);
5326 if (size == 0) {
5327 size = align;
5328 }
5329
5330 return qemu_try_memalign(align, size);
5331}
5332
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005333/*
5334 * Check if all memory in this vector is sector aligned.
5335 */
5336bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
5337{
5338 int i;
Kevin Wolf339064d2013-11-28 10:23:32 +01005339 size_t alignment = bdrv_opt_mem_align(bs);
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005340
5341 for (i = 0; i < qiov->niov; i++) {
Kevin Wolf339064d2013-11-28 10:23:32 +01005342 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005343 return false;
5344 }
Kevin Wolf339064d2013-11-28 10:23:32 +01005345 if (qiov->iov[i].iov_len % alignment) {
Kevin Wolf1ff735b2013-12-05 13:01:46 +01005346 return false;
5347 }
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005348 }
5349
5350 return true;
5351}
5352
Fam Zhengb8afb522014-04-16 09:34:30 +08005353BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity,
5354 Error **errp)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005355{
5356 int64_t bitmap_size;
Fam Zhenge4654d22013-11-13 18:29:43 +08005357 BdrvDirtyBitmap *bitmap;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005358
Paolo Bonzini50717e92013-01-21 17:09:45 +01005359 assert((granularity & (granularity - 1)) == 0);
5360
Fam Zhenge4654d22013-11-13 18:29:43 +08005361 granularity >>= BDRV_SECTOR_BITS;
5362 assert(granularity);
Markus Armbruster57322b72014-06-26 13:23:22 +02005363 bitmap_size = bdrv_nb_sectors(bs);
Fam Zhengb8afb522014-04-16 09:34:30 +08005364 if (bitmap_size < 0) {
5365 error_setg_errno(errp, -bitmap_size, "could not get length of device");
5366 errno = -bitmap_size;
5367 return NULL;
5368 }
Markus Armbruster5839e532014-08-19 10:31:08 +02005369 bitmap = g_new0(BdrvDirtyBitmap, 1);
Fam Zhenge4654d22013-11-13 18:29:43 +08005370 bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
5371 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
5372 return bitmap;
5373}
5374
5375void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5376{
5377 BdrvDirtyBitmap *bm, *next;
5378 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
5379 if (bm == bitmap) {
5380 QLIST_REMOVE(bitmap, list);
5381 hbitmap_free(bitmap->bitmap);
5382 g_free(bitmap);
5383 return;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005384 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005385 }
5386}
5387
Fam Zheng21b56832013-11-13 18:29:44 +08005388BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
5389{
5390 BdrvDirtyBitmap *bm;
5391 BlockDirtyInfoList *list = NULL;
5392 BlockDirtyInfoList **plist = &list;
5393
5394 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
Markus Armbruster5839e532014-08-19 10:31:08 +02005395 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
5396 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
Fam Zheng21b56832013-11-13 18:29:44 +08005397 info->count = bdrv_get_dirty_count(bs, bm);
5398 info->granularity =
5399 ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bm->bitmap));
5400 entry->value = info;
5401 *plist = entry;
5402 plist = &entry->next;
5403 }
5404
5405 return list;
5406}
5407
Fam Zhenge4654d22013-11-13 18:29:43 +08005408int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005409{
Fam Zhenge4654d22013-11-13 18:29:43 +08005410 if (bitmap) {
5411 return hbitmap_get(bitmap->bitmap, sector);
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005412 } else {
5413 return 0;
5414 }
5415}
5416
Fam Zhenge4654d22013-11-13 18:29:43 +08005417void bdrv_dirty_iter_init(BlockDriverState *bs,
5418 BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
Paolo Bonzini1755da12012-10-18 16:49:18 +02005419{
Fam Zhenge4654d22013-11-13 18:29:43 +08005420 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
Paolo Bonzini1755da12012-10-18 16:49:18 +02005421}
5422
5423void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
5424 int nr_sectors)
5425{
Fam Zhenge4654d22013-11-13 18:29:43 +08005426 BdrvDirtyBitmap *bitmap;
5427 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5428 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
Paolo Bonzini8f0720e2013-01-21 17:09:41 +01005429 }
Liran Schouraaa0eb72010-01-26 10:31:48 +02005430}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005431
Fam Zhenge4654d22013-11-13 18:29:43 +08005432void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors)
5433{
5434 BdrvDirtyBitmap *bitmap;
5435 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5436 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5437 }
5438}
5439
5440int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5441{
5442 return hbitmap_count(bitmap->bitmap);
5443}
5444
Fam Zheng9fcb0252013-08-23 09:14:46 +08005445/* Get a reference to bs */
5446void bdrv_ref(BlockDriverState *bs)
5447{
5448 bs->refcnt++;
5449}
5450
5451/* Release a previously grabbed reference to bs.
5452 * If after releasing, reference count is zero, the BlockDriverState is
5453 * deleted. */
5454void bdrv_unref(BlockDriverState *bs)
5455{
Jeff Cody9a4d5ca2014-07-23 17:22:57 -04005456 if (!bs) {
5457 return;
5458 }
Fam Zheng9fcb0252013-08-23 09:14:46 +08005459 assert(bs->refcnt > 0);
5460 if (--bs->refcnt == 0) {
5461 bdrv_delete(bs);
5462 }
5463}
5464
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005465struct BdrvOpBlocker {
5466 Error *reason;
5467 QLIST_ENTRY(BdrvOpBlocker) list;
5468};
5469
5470bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
5471{
5472 BdrvOpBlocker *blocker;
5473 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5474 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
5475 blocker = QLIST_FIRST(&bs->op_blockers[op]);
5476 if (errp) {
5477 error_setg(errp, "Device '%s' is busy: %s",
5478 bs->device_name, error_get_pretty(blocker->reason));
5479 }
5480 return true;
5481 }
5482 return false;
5483}
5484
5485void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
5486{
5487 BdrvOpBlocker *blocker;
5488 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5489
Markus Armbruster5839e532014-08-19 10:31:08 +02005490 blocker = g_new0(BdrvOpBlocker, 1);
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005491 blocker->reason = reason;
5492 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
5493}
5494
5495void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
5496{
5497 BdrvOpBlocker *blocker, *next;
5498 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5499 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
5500 if (blocker->reason == reason) {
5501 QLIST_REMOVE(blocker, list);
5502 g_free(blocker);
5503 }
5504 }
5505}
5506
5507void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
5508{
5509 int i;
5510 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5511 bdrv_op_block(bs, i, reason);
5512 }
5513}
5514
5515void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
5516{
5517 int i;
5518 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5519 bdrv_op_unblock(bs, i, reason);
5520 }
5521}
5522
5523bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
5524{
5525 int i;
5526
5527 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5528 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
5529 return false;
5530 }
5531 }
5532 return true;
5533}
5534
Luiz Capitulino28a72822011-09-26 17:43:50 -03005535void bdrv_iostatus_enable(BlockDriverState *bs)
5536{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005537 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005538 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005539}
5540
5541/* The I/O status is only enabled if the drive explicitly
5542 * enables it _and_ the VM is configured to stop on errors */
5543bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
5544{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005545 return (bs->iostatus_enabled &&
Paolo Bonzini92aa5c62012-09-28 17:22:55 +02005546 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
5547 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
5548 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
Luiz Capitulino28a72822011-09-26 17:43:50 -03005549}
5550
5551void bdrv_iostatus_disable(BlockDriverState *bs)
5552{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005553 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005554}
5555
5556void bdrv_iostatus_reset(BlockDriverState *bs)
5557{
5558 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005559 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Paolo Bonzini3bd293c2012-10-18 16:49:27 +02005560 if (bs->job) {
5561 block_job_iostatus_reset(bs->job);
5562 }
Luiz Capitulino28a72822011-09-26 17:43:50 -03005563 }
5564}
5565
Luiz Capitulino28a72822011-09-26 17:43:50 -03005566void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
5567{
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02005568 assert(bdrv_iostatus_is_enabled(bs));
5569 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005570 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
5571 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005572 }
5573}
5574
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005575void bdrv_img_create(const char *filename, const char *fmt,
5576 const char *base_filename, const char *base_fmt,
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005577 char *options, uint64_t img_size, int flags,
5578 Error **errp, bool quiet)
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005579{
Chunyan Liu83d05212014-06-05 17:20:51 +08005580 QemuOptsList *create_opts = NULL;
5581 QemuOpts *opts = NULL;
5582 const char *backing_fmt, *backing_file;
5583 int64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005584 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005585 BlockDriver *backing_drv = NULL;
Max Reitzcc84d902013-09-06 17:14:26 +02005586 Error *local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005587 int ret = 0;
5588
5589 /* Find driver and parse its options */
5590 drv = bdrv_find_format(fmt);
5591 if (!drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005592 error_setg(errp, "Unknown file format '%s'", fmt);
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005593 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005594 }
5595
Kevin Wolf98289622013-07-10 15:47:39 +02005596 proto_drv = bdrv_find_protocol(filename, true);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005597 if (!proto_drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005598 error_setg(errp, "Unknown protocol '%s'", filename);
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005599 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005600 }
5601
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005602 create_opts = qemu_opts_append(create_opts, drv->create_opts);
5603 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005604
5605 /* Create parameter list with default values */
Chunyan Liu83d05212014-06-05 17:20:51 +08005606 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
5607 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005608
5609 /* Parse -o options */
5610 if (options) {
Chunyan Liu83d05212014-06-05 17:20:51 +08005611 if (qemu_opts_do_parse(opts, options, NULL) != 0) {
5612 error_setg(errp, "Invalid options for file format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005613 goto out;
5614 }
5615 }
5616
5617 if (base_filename) {
Chunyan Liu83d05212014-06-05 17:20:51 +08005618 if (qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005619 error_setg(errp, "Backing file not supported for file format '%s'",
5620 fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005621 goto out;
5622 }
5623 }
5624
5625 if (base_fmt) {
Chunyan Liu83d05212014-06-05 17:20:51 +08005626 if (qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005627 error_setg(errp, "Backing file format not supported for file "
5628 "format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005629 goto out;
5630 }
5631 }
5632
Chunyan Liu83d05212014-06-05 17:20:51 +08005633 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
5634 if (backing_file) {
5635 if (!strcmp(filename, backing_file)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005636 error_setg(errp, "Error: Trying to create an image with the "
5637 "same filename as the backing file");
Jes Sorensen792da932010-12-16 13:52:17 +01005638 goto out;
5639 }
5640 }
5641
Chunyan Liu83d05212014-06-05 17:20:51 +08005642 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
5643 if (backing_fmt) {
5644 backing_drv = bdrv_find_format(backing_fmt);
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005645 if (!backing_drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005646 error_setg(errp, "Unknown backing file format '%s'",
Chunyan Liu83d05212014-06-05 17:20:51 +08005647 backing_fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005648 goto out;
5649 }
5650 }
5651
5652 // The size for the image must always be specified, with one exception:
5653 // If we are using a backing file, we can obtain the size from there
Chunyan Liu83d05212014-06-05 17:20:51 +08005654 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
5655 if (size == -1) {
5656 if (backing_file) {
Max Reitz66f6b812013-12-03 14:57:52 +01005657 BlockDriverState *bs;
Markus Armbruster52bf1e72014-06-26 13:23:25 +02005658 int64_t size;
Paolo Bonzini63090da2012-04-12 14:01:03 +02005659 int back_flags;
5660
5661 /* backing files always opened read-only */
5662 back_flags =
5663 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005664
Max Reitzf67503e2014-02-18 18:33:05 +01005665 bs = NULL;
Chunyan Liu83d05212014-06-05 17:20:51 +08005666 ret = bdrv_open(&bs, backing_file, NULL, NULL, back_flags,
Max Reitzcc84d902013-09-06 17:14:26 +02005667 backing_drv, &local_err);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005668 if (ret < 0) {
Max Reitzcc84d902013-09-06 17:14:26 +02005669 error_setg_errno(errp, -ret, "Could not open '%s': %s",
Chunyan Liu83d05212014-06-05 17:20:51 +08005670 backing_file,
Max Reitzcc84d902013-09-06 17:14:26 +02005671 error_get_pretty(local_err));
5672 error_free(local_err);
5673 local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005674 goto out;
5675 }
Markus Armbruster52bf1e72014-06-26 13:23:25 +02005676 size = bdrv_getlength(bs);
5677 if (size < 0) {
5678 error_setg_errno(errp, -size, "Could not get size of '%s'",
5679 backing_file);
5680 bdrv_unref(bs);
5681 goto out;
5682 }
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005683
Chunyan Liu83d05212014-06-05 17:20:51 +08005684 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size);
Max Reitz66f6b812013-12-03 14:57:52 +01005685
5686 bdrv_unref(bs);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005687 } else {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005688 error_setg(errp, "Image creation needs a size parameter");
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005689 goto out;
5690 }
5691 }
5692
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005693 if (!quiet) {
5694 printf("Formatting '%s', fmt=%s ", filename, fmt);
Chunyan Liu83d05212014-06-05 17:20:51 +08005695 qemu_opts_print(opts);
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005696 puts("");
5697 }
Chunyan Liu83d05212014-06-05 17:20:51 +08005698
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005699 ret = bdrv_create(drv, filename, opts, &local_err);
Chunyan Liu83d05212014-06-05 17:20:51 +08005700
Max Reitzcc84d902013-09-06 17:14:26 +02005701 if (ret == -EFBIG) {
5702 /* This is generally a better message than whatever the driver would
5703 * deliver (especially because of the cluster_size_hint), since that
5704 * is most probably not much different from "image too large". */
5705 const char *cluster_size_hint = "";
Chunyan Liu83d05212014-06-05 17:20:51 +08005706 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
Max Reitzcc84d902013-09-06 17:14:26 +02005707 cluster_size_hint = " (try using a larger cluster size)";
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005708 }
Max Reitzcc84d902013-09-06 17:14:26 +02005709 error_setg(errp, "The image size is too large for file format '%s'"
5710 "%s", fmt, cluster_size_hint);
5711 error_free(local_err);
5712 local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005713 }
5714
5715out:
Chunyan Liu83d05212014-06-05 17:20:51 +08005716 qemu_opts_del(opts);
5717 qemu_opts_free(create_opts);
Markus Armbruster84d18f02014-01-30 15:07:28 +01005718 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +02005719 error_propagate(errp, local_err);
5720 }
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005721}
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01005722
5723AioContext *bdrv_get_aio_context(BlockDriverState *bs)
5724{
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005725 return bs->aio_context;
5726}
5727
5728void bdrv_detach_aio_context(BlockDriverState *bs)
5729{
Max Reitz33384422014-06-20 21:57:33 +02005730 BdrvAioNotifier *baf;
5731
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005732 if (!bs->drv) {
5733 return;
5734 }
5735
Max Reitz33384422014-06-20 21:57:33 +02005736 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
5737 baf->detach_aio_context(baf->opaque);
5738 }
5739
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +02005740 if (bs->io_limits_enabled) {
5741 throttle_detach_aio_context(&bs->throttle_state);
5742 }
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005743 if (bs->drv->bdrv_detach_aio_context) {
5744 bs->drv->bdrv_detach_aio_context(bs);
5745 }
5746 if (bs->file) {
5747 bdrv_detach_aio_context(bs->file);
5748 }
5749 if (bs->backing_hd) {
5750 bdrv_detach_aio_context(bs->backing_hd);
5751 }
5752
5753 bs->aio_context = NULL;
5754}
5755
5756void bdrv_attach_aio_context(BlockDriverState *bs,
5757 AioContext *new_context)
5758{
Max Reitz33384422014-06-20 21:57:33 +02005759 BdrvAioNotifier *ban;
5760
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005761 if (!bs->drv) {
5762 return;
5763 }
5764
5765 bs->aio_context = new_context;
5766
5767 if (bs->backing_hd) {
5768 bdrv_attach_aio_context(bs->backing_hd, new_context);
5769 }
5770 if (bs->file) {
5771 bdrv_attach_aio_context(bs->file, new_context);
5772 }
5773 if (bs->drv->bdrv_attach_aio_context) {
5774 bs->drv->bdrv_attach_aio_context(bs, new_context);
5775 }
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +02005776 if (bs->io_limits_enabled) {
5777 throttle_attach_aio_context(&bs->throttle_state, new_context);
5778 }
Max Reitz33384422014-06-20 21:57:33 +02005779
5780 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
5781 ban->attached_aio_context(new_context, ban->opaque);
5782 }
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005783}
5784
5785void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
5786{
5787 bdrv_drain_all(); /* ensure there are no in-flight requests */
5788
5789 bdrv_detach_aio_context(bs);
5790
5791 /* This function executes in the old AioContext so acquire the new one in
5792 * case it runs in a different thread.
5793 */
5794 aio_context_acquire(new_context);
5795 bdrv_attach_aio_context(bs, new_context);
5796 aio_context_release(new_context);
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01005797}
Stefan Hajnoczid616b222013-06-24 17:13:10 +02005798
Max Reitz33384422014-06-20 21:57:33 +02005799void bdrv_add_aio_context_notifier(BlockDriverState *bs,
5800 void (*attached_aio_context)(AioContext *new_context, void *opaque),
5801 void (*detach_aio_context)(void *opaque), void *opaque)
5802{
5803 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
5804 *ban = (BdrvAioNotifier){
5805 .attached_aio_context = attached_aio_context,
5806 .detach_aio_context = detach_aio_context,
5807 .opaque = opaque
5808 };
5809
5810 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
5811}
5812
5813void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
5814 void (*attached_aio_context)(AioContext *,
5815 void *),
5816 void (*detach_aio_context)(void *),
5817 void *opaque)
5818{
5819 BdrvAioNotifier *ban, *ban_next;
5820
5821 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
5822 if (ban->attached_aio_context == attached_aio_context &&
5823 ban->detach_aio_context == detach_aio_context &&
5824 ban->opaque == opaque)
5825 {
5826 QLIST_REMOVE(ban, list);
5827 g_free(ban);
5828
5829 return;
5830 }
5831 }
5832
5833 abort();
5834}
5835
Stefan Hajnoczid616b222013-06-24 17:13:10 +02005836void bdrv_add_before_write_notifier(BlockDriverState *bs,
5837 NotifierWithReturn *notifier)
5838{
5839 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
5840}
Max Reitz6f176b42013-09-03 10:09:50 +02005841
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005842int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts)
Max Reitz6f176b42013-09-03 10:09:50 +02005843{
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005844 if (!bs->drv->bdrv_amend_options) {
Max Reitz6f176b42013-09-03 10:09:50 +02005845 return -ENOTSUP;
5846 }
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005847 return bs->drv->bdrv_amend_options(bs, opts);
Max Reitz6f176b42013-09-03 10:09:50 +02005848}
Benoît Canetf6186f42013-10-02 14:33:48 +02005849
Benoît Canetb5042a32014-03-03 19:11:34 +01005850/* This function will be called by the bdrv_recurse_is_first_non_filter method
5851 * of block filter and by bdrv_is_first_non_filter.
5852 * It is used to test if the given bs is the candidate or recurse more in the
5853 * node graph.
Benoît Canet212a5a82014-01-23 21:31:36 +01005854 */
Benoît Canet212a5a82014-01-23 21:31:36 +01005855bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
5856 BlockDriverState *candidate)
Benoît Canetf6186f42013-10-02 14:33:48 +02005857{
Benoît Canetb5042a32014-03-03 19:11:34 +01005858 /* return false if basic checks fails */
5859 if (!bs || !bs->drv) {
5860 return false;
5861 }
5862
5863 /* the code reached a non block filter driver -> check if the bs is
5864 * the same as the candidate. It's the recursion termination condition.
5865 */
5866 if (!bs->drv->is_filter) {
5867 return bs == candidate;
5868 }
5869 /* Down this path the driver is a block filter driver */
5870
5871 /* If the block filter recursion method is defined use it to recurse down
5872 * the node graph.
5873 */
5874 if (bs->drv->bdrv_recurse_is_first_non_filter) {
Benoît Canet212a5a82014-01-23 21:31:36 +01005875 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
5876 }
5877
Benoît Canetb5042a32014-03-03 19:11:34 +01005878 /* the driver is a block filter but don't allow to recurse -> return false
5879 */
5880 return false;
Benoît Canet212a5a82014-01-23 21:31:36 +01005881}
5882
5883/* This function checks if the candidate is the first non filter bs down it's
5884 * bs chain. Since we don't have pointers to parents it explore all bs chains
5885 * from the top. Some filters can choose not to pass down the recursion.
5886 */
5887bool bdrv_is_first_non_filter(BlockDriverState *candidate)
5888{
5889 BlockDriverState *bs;
5890
5891 /* walk down the bs forest recursively */
5892 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
5893 bool perm;
5894
Benoît Canetb5042a32014-03-03 19:11:34 +01005895 /* try to recurse in this top level bs */
Kevin Wolfe6dc8a12014-02-04 11:45:31 +01005896 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
Benoît Canet212a5a82014-01-23 21:31:36 +01005897
5898 /* candidate is the first non filter */
5899 if (perm) {
5900 return true;
5901 }
5902 }
5903
5904 return false;
Benoît Canetf6186f42013-10-02 14:33:48 +02005905}
Benoît Canet09158f02014-06-27 18:25:25 +02005906
5907BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
5908{
5909 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
5910 if (!to_replace_bs) {
5911 error_setg(errp, "Node name '%s' not found", node_name);
5912 return NULL;
5913 }
5914
5915 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
5916 return NULL;
5917 }
5918
5919 /* We don't want arbitrary node of the BDS chain to be replaced only the top
5920 * most non filter in order to prevent data corruption.
5921 * Another benefit is that this tests exclude backing files which are
5922 * blocked by the backing blockers.
5923 */
5924 if (!bdrv_is_first_non_filter(to_replace_bs)) {
5925 error_setg(errp, "Only top most non filter can be replaced");
5926 return NULL;
5927 }
5928
5929 return to_replace_bs;
5930}
Ming Lei448ad912014-07-04 18:04:33 +08005931
5932void bdrv_io_plug(BlockDriverState *bs)
5933{
5934 BlockDriver *drv = bs->drv;
5935 if (drv && drv->bdrv_io_plug) {
5936 drv->bdrv_io_plug(bs);
5937 } else if (bs->file) {
5938 bdrv_io_plug(bs->file);
5939 }
5940}
5941
5942void bdrv_io_unplug(BlockDriverState *bs)
5943{
5944 BlockDriver *drv = bs->drv;
5945 if (drv && drv->bdrv_io_unplug) {
5946 drv->bdrv_io_unplug(bs);
5947 } else if (bs->file) {
5948 bdrv_io_unplug(bs->file);
5949 }
5950}
5951
5952void bdrv_flush_io_queue(BlockDriverState *bs)
5953{
5954 BlockDriver *drv = bs->drv;
5955 if (drv && drv->bdrv_flush_io_queue) {
5956 drv->bdrv_flush_io_queue(bs);
5957 } else if (bs->file) {
5958 bdrv_flush_io_queue(bs->file);
5959 }
5960}
Max Reitz91af7012014-07-18 20:24:56 +02005961
5962static bool append_open_options(QDict *d, BlockDriverState *bs)
5963{
5964 const QDictEntry *entry;
5965 bool found_any = false;
5966
5967 for (entry = qdict_first(bs->options); entry;
5968 entry = qdict_next(bs->options, entry))
5969 {
5970 /* Only take options for this level and exclude all non-driver-specific
5971 * options */
5972 if (!strchr(qdict_entry_key(entry), '.') &&
5973 strcmp(qdict_entry_key(entry), "node-name"))
5974 {
5975 qobject_incref(qdict_entry_value(entry));
5976 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
5977 found_any = true;
5978 }
5979 }
5980
5981 return found_any;
5982}
5983
5984/* Updates the following BDS fields:
5985 * - exact_filename: A filename which may be used for opening a block device
5986 * which (mostly) equals the given BDS (even without any
5987 * other options; so reading and writing must return the same
5988 * results, but caching etc. may be different)
5989 * - full_open_options: Options which, when given when opening a block device
5990 * (without a filename), result in a BDS (mostly)
5991 * equalling the given one
5992 * - filename: If exact_filename is set, it is copied here. Otherwise,
5993 * full_open_options is converted to a JSON object, prefixed with
5994 * "json:" (for use through the JSON pseudo protocol) and put here.
5995 */
5996void bdrv_refresh_filename(BlockDriverState *bs)
5997{
5998 BlockDriver *drv = bs->drv;
5999 QDict *opts;
6000
6001 if (!drv) {
6002 return;
6003 }
6004
6005 /* This BDS's file name will most probably depend on its file's name, so
6006 * refresh that first */
6007 if (bs->file) {
6008 bdrv_refresh_filename(bs->file);
6009 }
6010
6011 if (drv->bdrv_refresh_filename) {
6012 /* Obsolete information is of no use here, so drop the old file name
6013 * information before refreshing it */
6014 bs->exact_filename[0] = '\0';
6015 if (bs->full_open_options) {
6016 QDECREF(bs->full_open_options);
6017 bs->full_open_options = NULL;
6018 }
6019
6020 drv->bdrv_refresh_filename(bs);
6021 } else if (bs->file) {
6022 /* Try to reconstruct valid information from the underlying file */
6023 bool has_open_options;
6024
6025 bs->exact_filename[0] = '\0';
6026 if (bs->full_open_options) {
6027 QDECREF(bs->full_open_options);
6028 bs->full_open_options = NULL;
6029 }
6030
6031 opts = qdict_new();
6032 has_open_options = append_open_options(opts, bs);
6033
6034 /* If no specific options have been given for this BDS, the filename of
6035 * the underlying file should suffice for this one as well */
6036 if (bs->file->exact_filename[0] && !has_open_options) {
6037 strcpy(bs->exact_filename, bs->file->exact_filename);
6038 }
6039 /* Reconstructing the full options QDict is simple for most format block
6040 * drivers, as long as the full options are known for the underlying
6041 * file BDS. The full options QDict of that file BDS should somehow
6042 * contain a representation of the filename, therefore the following
6043 * suffices without querying the (exact_)filename of this BDS. */
6044 if (bs->file->full_open_options) {
6045 qdict_put_obj(opts, "driver",
6046 QOBJECT(qstring_from_str(drv->format_name)));
6047 QINCREF(bs->file->full_open_options);
6048 qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
6049
6050 bs->full_open_options = opts;
6051 } else {
6052 QDECREF(opts);
6053 }
6054 } else if (!bs->full_open_options && qdict_size(bs->options)) {
6055 /* There is no underlying file BDS (at least referenced by BDS.file),
6056 * so the full options QDict should be equal to the options given
6057 * specifically for this block device when it was opened (plus the
6058 * driver specification).
6059 * Because those options don't change, there is no need to update
6060 * full_open_options when it's already set. */
6061
6062 opts = qdict_new();
6063 append_open_options(opts, bs);
6064 qdict_put_obj(opts, "driver",
6065 QOBJECT(qstring_from_str(drv->format_name)));
6066
6067 if (bs->exact_filename[0]) {
6068 /* This may not work for all block protocol drivers (some may
6069 * require this filename to be parsed), but we have to find some
6070 * default solution here, so just include it. If some block driver
6071 * does not support pure options without any filename at all or
6072 * needs some special format of the options QDict, it needs to
6073 * implement the driver-specific bdrv_refresh_filename() function.
6074 */
6075 qdict_put_obj(opts, "filename",
6076 QOBJECT(qstring_from_str(bs->exact_filename)));
6077 }
6078
6079 bs->full_open_options = opts;
6080 }
6081
6082 if (bs->exact_filename[0]) {
6083 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
6084 } else if (bs->full_open_options) {
6085 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
6086 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
6087 qstring_get_str(json));
6088 QDECREF(json);
6089 }
6090}
Benoît Canet5366d0c2014-09-05 15:46:18 +02006091
6092/* This accessor function purpose is to allow the device models to access the
6093 * BlockAcctStats structure embedded inside a BlockDriverState without being
6094 * aware of the BlockDriverState structure layout.
6095 * It will go away when the BlockAcctStats structure will be moved inside
6096 * the device models.
6097 */
6098BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
6099{
6100 return &bs->stats;
6101}