blob: e7387f1556a2b9aa4d3fa7ef6b3b90a53d2037ef [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
Paolo Bonzini83c90892012-12-17 18:19:49 +010027#include "monitor/monitor.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010028#include "block/block_int.h"
29#include "block/blockjob.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010030#include "qemu/module.h"
Paolo Bonzini7b1b5d12012-12-17 18:19:43 +010031#include "qapi/qmp/qjson.h"
Paolo Bonzini9c17d612012-12-17 18:20:04 +010032#include "sysemu/sysemu.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010033#include "qemu/notify.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010034#include "block/coroutine.h"
Benoît Canetc13163f2014-01-23 21:31:34 +010035#include "block/qapi.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030036#include "qmp-commands.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010037#include "qemu/timer.h"
bellardfc01f7e2003-06-30 10:03:06 +000038
Juan Quintela71e72a12009-07-27 16:12:56 +020039#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000040#include <sys/types.h>
41#include <sys/stat.h>
42#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000043#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000044#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000045#include <sys/disk.h>
46#endif
blueswir1c5e97232009-03-07 20:06:23 +000047#endif
bellard7674e7b2005-04-26 21:59:26 +000048
aliguori49dc7682009-03-08 16:26:59 +000049#ifdef _WIN32
50#include <windows.h>
51#endif
52
Fam Zhenge4654d22013-11-13 18:29:43 +080053struct BdrvDirtyBitmap {
54 HBitmap *bitmap;
55 QLIST_ENTRY(BdrvDirtyBitmap) list;
56};
57
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010058#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
59
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +020060static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
aliguorif141eaf2009-04-07 18:43:24 +000061static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
62 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
aliguoric87c0672009-04-07 18:43:20 +000063 BlockDriverCompletionFunc *cb, void *opaque);
aliguorif141eaf2009-04-07 18:43:24 +000064static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
65 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +000066 BlockDriverCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020067static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
68 int64_t sector_num, int nb_sectors,
69 QEMUIOVector *iov);
70static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
71 int64_t sector_num, int nb_sectors,
72 QEMUIOVector *iov);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010073static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
74 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000075 BdrvRequestFlags flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010076static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
77 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000078 BdrvRequestFlags flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010079static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
80 int64_t sector_num,
81 QEMUIOVector *qiov,
82 int nb_sectors,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +010083 BdrvRequestFlags flags,
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010084 BlockDriverCompletionFunc *cb,
85 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +010086 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010087static void coroutine_fn bdrv_co_do_rw(void *opaque);
Kevin Wolf621f0582012-03-20 15:12:58 +010088static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +020089 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
bellardec530c82006-04-25 22:36:06 +000090
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010091static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
92 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000093
Benoît Canetdc364f42014-01-23 21:31:32 +010094static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
95 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
96
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +010097static QLIST_HEAD(, BlockDriver) bdrv_drivers =
98 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +000099
Markus Armbrustereb852012009-10-27 18:41:44 +0100100/* If non-zero, use only whitelisted block drivers */
101static int use_bdrv_whitelist;
102
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000103#ifdef _WIN32
104static int is_windows_drive_prefix(const char *filename)
105{
106 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
107 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
108 filename[1] == ':');
109}
110
111int is_windows_drive(const char *filename)
112{
113 if (is_windows_drive_prefix(filename) &&
114 filename[2] == '\0')
115 return 1;
116 if (strstart(filename, "\\\\.\\", NULL) ||
117 strstart(filename, "//./", NULL))
118 return 1;
119 return 0;
120}
121#endif
122
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800123/* throttling disk I/O limits */
Benoît Canetcc0681c2013-09-02 14:14:39 +0200124void bdrv_set_io_limits(BlockDriverState *bs,
125 ThrottleConfig *cfg)
126{
127 int i;
128
129 throttle_config(&bs->throttle_state, cfg);
130
131 for (i = 0; i < 2; i++) {
132 qemu_co_enter_next(&bs->throttled_reqs[i]);
133 }
134}
135
136/* this function drain all the throttled IOs */
137static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
138{
139 bool drained = false;
140 bool enabled = bs->io_limits_enabled;
141 int i;
142
143 bs->io_limits_enabled = false;
144
145 for (i = 0; i < 2; i++) {
146 while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
147 drained = true;
148 }
149 }
150
151 bs->io_limits_enabled = enabled;
152
153 return drained;
154}
155
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800156void bdrv_io_limits_disable(BlockDriverState *bs)
157{
158 bs->io_limits_enabled = false;
159
Benoît Canetcc0681c2013-09-02 14:14:39 +0200160 bdrv_start_throttled_reqs(bs);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800161
Benoît Canetcc0681c2013-09-02 14:14:39 +0200162 throttle_destroy(&bs->throttle_state);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800163}
164
Benoît Canetcc0681c2013-09-02 14:14:39 +0200165static void bdrv_throttle_read_timer_cb(void *opaque)
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800166{
167 BlockDriverState *bs = opaque;
Benoît Canetcc0681c2013-09-02 14:14:39 +0200168 qemu_co_enter_next(&bs->throttled_reqs[0]);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800169}
170
Benoît Canetcc0681c2013-09-02 14:14:39 +0200171static void bdrv_throttle_write_timer_cb(void *opaque)
172{
173 BlockDriverState *bs = opaque;
174 qemu_co_enter_next(&bs->throttled_reqs[1]);
175}
176
177/* should be called before bdrv_set_io_limits if a limit is set */
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800178void bdrv_io_limits_enable(BlockDriverState *bs)
179{
Benoît Canetcc0681c2013-09-02 14:14:39 +0200180 assert(!bs->io_limits_enabled);
181 throttle_init(&bs->throttle_state,
182 QEMU_CLOCK_VIRTUAL,
183 bdrv_throttle_read_timer_cb,
184 bdrv_throttle_write_timer_cb,
185 bs);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800186 bs->io_limits_enabled = true;
187}
188
Benoît Canetcc0681c2013-09-02 14:14:39 +0200189/* This function makes an IO wait if needed
190 *
191 * @nb_sectors: the number of sectors of the IO
192 * @is_write: is the IO a write
193 */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800194static void bdrv_io_limits_intercept(BlockDriverState *bs,
Kevin Wolfd5103582014-01-16 13:29:10 +0100195 unsigned int bytes,
Benoît Canetcc0681c2013-09-02 14:14:39 +0200196 bool is_write)
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800197{
Benoît Canetcc0681c2013-09-02 14:14:39 +0200198 /* does this io must wait */
199 bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800200
Benoît Canetcc0681c2013-09-02 14:14:39 +0200201 /* if must wait or any request of this type throttled queue the IO */
202 if (must_wait ||
203 !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
204 qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800205 }
206
Benoît Canetcc0681c2013-09-02 14:14:39 +0200207 /* the IO will be executed, do the accounting */
Kevin Wolfd5103582014-01-16 13:29:10 +0100208 throttle_account(&bs->throttle_state, is_write, bytes);
209
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800210
Benoît Canetcc0681c2013-09-02 14:14:39 +0200211 /* if the next request must wait -> do nothing */
212 if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
213 return;
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800214 }
215
Benoît Canetcc0681c2013-09-02 14:14:39 +0200216 /* else queue next request for execution */
217 qemu_co_queue_next(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800218}
219
Kevin Wolf339064d2013-11-28 10:23:32 +0100220size_t bdrv_opt_mem_align(BlockDriverState *bs)
221{
222 if (!bs || !bs->drv) {
223 /* 4k should be on the safe side */
224 return 4096;
225 }
226
227 return bs->bl.opt_mem_alignment;
228}
229
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000230/* check if the path starts with "<protocol>:" */
231static int path_has_protocol(const char *path)
232{
Paolo Bonzini947995c2012-05-08 16:51:48 +0200233 const char *p;
234
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000235#ifdef _WIN32
236 if (is_windows_drive(path) ||
237 is_windows_drive_prefix(path)) {
238 return 0;
239 }
Paolo Bonzini947995c2012-05-08 16:51:48 +0200240 p = path + strcspn(path, ":/\\");
241#else
242 p = path + strcspn(path, ":/");
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000243#endif
244
Paolo Bonzini947995c2012-05-08 16:51:48 +0200245 return *p == ':';
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000246}
247
bellard83f64092006-08-01 16:21:11 +0000248int path_is_absolute(const char *path)
249{
bellard21664422007-01-07 18:22:37 +0000250#ifdef _WIN32
251 /* specific case for names like: "\\.\d:" */
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200252 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
bellard21664422007-01-07 18:22:37 +0000253 return 1;
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200254 }
255 return (*path == '/' || *path == '\\');
bellard3b9f94e2007-01-07 17:27:07 +0000256#else
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200257 return (*path == '/');
bellard3b9f94e2007-01-07 17:27:07 +0000258#endif
bellard83f64092006-08-01 16:21:11 +0000259}
260
261/* if filename is absolute, just copy it to dest. Otherwise, build a
262 path to it by considering it is relative to base_path. URL are
263 supported. */
264void path_combine(char *dest, int dest_size,
265 const char *base_path,
266 const char *filename)
267{
268 const char *p, *p1;
269 int len;
270
271 if (dest_size <= 0)
272 return;
273 if (path_is_absolute(filename)) {
274 pstrcpy(dest, dest_size, filename);
275 } else {
276 p = strchr(base_path, ':');
277 if (p)
278 p++;
279 else
280 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000281 p1 = strrchr(base_path, '/');
282#ifdef _WIN32
283 {
284 const char *p2;
285 p2 = strrchr(base_path, '\\');
286 if (!p1 || p2 > p1)
287 p1 = p2;
288 }
289#endif
bellard83f64092006-08-01 16:21:11 +0000290 if (p1)
291 p1++;
292 else
293 p1 = base_path;
294 if (p1 > p)
295 p = p1;
296 len = p - base_path;
297 if (len > dest_size - 1)
298 len = dest_size - 1;
299 memcpy(dest, base_path, len);
300 dest[len] = '\0';
301 pstrcat(dest, dest_size, filename);
302 }
303}
304
Paolo Bonzinidc5a1372012-05-08 16:51:50 +0200305void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
306{
307 if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
308 pstrcpy(dest, sz, bs->backing_file);
309 } else {
310 path_combine(dest, sz, bs->filename, bs->backing_file);
311 }
312}
313
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500314void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000315{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100316 /* Block drivers without coroutine functions need emulation */
317 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200318 bdrv->bdrv_co_readv = bdrv_co_readv_em;
319 bdrv->bdrv_co_writev = bdrv_co_writev_em;
320
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100321 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
322 * the block driver lacks aio we need to emulate that too.
323 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200324 if (!bdrv->bdrv_aio_readv) {
325 /* add AIO emulation layer */
326 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
327 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200328 }
bellard83f64092006-08-01 16:21:11 +0000329 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200330
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100331 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000332}
bellardb3380822004-03-14 21:38:54 +0000333
334/* create a new block device (by default it is empty) */
335BlockDriverState *bdrv_new(const char *device_name)
bellardfc01f7e2003-06-30 10:03:06 +0000336{
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100337 BlockDriverState *bs;
bellardb3380822004-03-14 21:38:54 +0000338
Anthony Liguori7267c092011-08-20 22:09:37 -0500339 bs = g_malloc0(sizeof(BlockDriverState));
Fam Zhenge4654d22013-11-13 18:29:43 +0800340 QLIST_INIT(&bs->dirty_bitmaps);
bellardb3380822004-03-14 21:38:54 +0000341 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
bellardea2384d2004-08-01 21:59:26 +0000342 if (device_name[0] != '\0') {
Benoît Canetdc364f42014-01-23 21:31:32 +0100343 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
bellardea2384d2004-08-01 21:59:26 +0000344 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300345 bdrv_iostatus_disable(bs);
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200346 notifier_list_init(&bs->close_notifiers);
Stefan Hajnoczid616b222013-06-24 17:13:10 +0200347 notifier_with_return_list_init(&bs->before_write_notifiers);
Benoît Canetcc0681c2013-09-02 14:14:39 +0200348 qemu_co_queue_init(&bs->throttled_reqs[0]);
349 qemu_co_queue_init(&bs->throttled_reqs[1]);
Fam Zheng9fcb0252013-08-23 09:14:46 +0800350 bs->refcnt = 1;
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200351
bellardb3380822004-03-14 21:38:54 +0000352 return bs;
353}
354
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200355void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
356{
357 notifier_list_add(&bs->close_notifiers, notify);
358}
359
bellardea2384d2004-08-01 21:59:26 +0000360BlockDriver *bdrv_find_format(const char *format_name)
361{
362 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100363 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
364 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000365 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100366 }
bellardea2384d2004-08-01 21:59:26 +0000367 }
368 return NULL;
369}
370
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800371static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100372{
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800373 static const char *whitelist_rw[] = {
374 CONFIG_BDRV_RW_WHITELIST
375 };
376 static const char *whitelist_ro[] = {
377 CONFIG_BDRV_RO_WHITELIST
Markus Armbrustereb852012009-10-27 18:41:44 +0100378 };
379 const char **p;
380
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800381 if (!whitelist_rw[0] && !whitelist_ro[0]) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100382 return 1; /* no whitelist, anything goes */
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800383 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100384
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800385 for (p = whitelist_rw; *p; p++) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100386 if (!strcmp(drv->format_name, *p)) {
387 return 1;
388 }
389 }
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800390 if (read_only) {
391 for (p = whitelist_ro; *p; p++) {
392 if (!strcmp(drv->format_name, *p)) {
393 return 1;
394 }
395 }
396 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100397 return 0;
398}
399
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800400BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
401 bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100402{
403 BlockDriver *drv = bdrv_find_format(format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800404 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
Markus Armbrustereb852012009-10-27 18:41:44 +0100405}
406
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800407typedef struct CreateCo {
408 BlockDriver *drv;
409 char *filename;
410 QEMUOptionParameter *options;
411 int ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200412 Error *err;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800413} CreateCo;
414
415static void coroutine_fn bdrv_create_co_entry(void *opaque)
416{
Max Reitzcc84d902013-09-06 17:14:26 +0200417 Error *local_err = NULL;
418 int ret;
419
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800420 CreateCo *cco = opaque;
421 assert(cco->drv);
422
Max Reitzcc84d902013-09-06 17:14:26 +0200423 ret = cco->drv->bdrv_create(cco->filename, cco->options, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100424 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200425 error_propagate(&cco->err, local_err);
426 }
427 cco->ret = ret;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800428}
429
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200430int bdrv_create(BlockDriver *drv, const char* filename,
Max Reitzcc84d902013-09-06 17:14:26 +0200431 QEMUOptionParameter *options, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000432{
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800433 int ret;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200434
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800435 Coroutine *co;
436 CreateCo cco = {
437 .drv = drv,
438 .filename = g_strdup(filename),
439 .options = options,
440 .ret = NOT_DONE,
Max Reitzcc84d902013-09-06 17:14:26 +0200441 .err = NULL,
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800442 };
443
444 if (!drv->bdrv_create) {
Max Reitzcc84d902013-09-06 17:14:26 +0200445 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300446 ret = -ENOTSUP;
447 goto out;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800448 }
449
450 if (qemu_in_coroutine()) {
451 /* Fast-path if already in coroutine context */
452 bdrv_create_co_entry(&cco);
453 } else {
454 co = qemu_coroutine_create(bdrv_create_co_entry);
455 qemu_coroutine_enter(co, &cco);
456 while (cco.ret == NOT_DONE) {
457 qemu_aio_wait();
458 }
459 }
460
461 ret = cco.ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200462 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +0100463 if (cco.err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200464 error_propagate(errp, cco.err);
465 } else {
466 error_setg_errno(errp, -ret, "Could not create image");
467 }
468 }
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800469
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300470out:
471 g_free(cco.filename);
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800472 return ret;
bellardea2384d2004-08-01 21:59:26 +0000473}
474
Max Reitzcc84d902013-09-06 17:14:26 +0200475int bdrv_create_file(const char* filename, QEMUOptionParameter *options,
476 Error **errp)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200477{
478 BlockDriver *drv;
Max Reitzcc84d902013-09-06 17:14:26 +0200479 Error *local_err = NULL;
480 int ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200481
Kevin Wolf98289622013-07-10 15:47:39 +0200482 drv = bdrv_find_protocol(filename, true);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200483 if (drv == NULL) {
Max Reitzcc84d902013-09-06 17:14:26 +0200484 error_setg(errp, "Could not find protocol for file '%s'", filename);
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000485 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200486 }
487
Max Reitzcc84d902013-09-06 17:14:26 +0200488 ret = bdrv_create(drv, filename, options, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100489 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200490 error_propagate(errp, local_err);
491 }
492 return ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200493}
494
Kevin Wolf355ef4a2013-12-11 20:14:09 +0100495int bdrv_refresh_limits(BlockDriverState *bs)
Kevin Wolfd34682c2013-12-11 19:26:16 +0100496{
497 BlockDriver *drv = bs->drv;
498
499 memset(&bs->bl, 0, sizeof(bs->bl));
500
Kevin Wolf466ad822013-12-11 19:50:32 +0100501 if (!drv) {
502 return 0;
503 }
504
505 /* Take some limits from the children as a default */
506 if (bs->file) {
507 bdrv_refresh_limits(bs->file);
508 bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
Kevin Wolf339064d2013-11-28 10:23:32 +0100509 bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
510 } else {
511 bs->bl.opt_mem_alignment = 512;
Kevin Wolf466ad822013-12-11 19:50:32 +0100512 }
513
514 if (bs->backing_hd) {
515 bdrv_refresh_limits(bs->backing_hd);
516 bs->bl.opt_transfer_length =
517 MAX(bs->bl.opt_transfer_length,
518 bs->backing_hd->bl.opt_transfer_length);
Kevin Wolf339064d2013-11-28 10:23:32 +0100519 bs->bl.opt_mem_alignment =
520 MAX(bs->bl.opt_mem_alignment,
521 bs->backing_hd->bl.opt_mem_alignment);
Kevin Wolf466ad822013-12-11 19:50:32 +0100522 }
523
524 /* Then let the driver override it */
525 if (drv->bdrv_refresh_limits) {
Kevin Wolfd34682c2013-12-11 19:26:16 +0100526 return drv->bdrv_refresh_limits(bs);
527 }
528
529 return 0;
530}
531
Jim Meyeringeba25052012-05-28 09:27:54 +0200532/*
533 * Create a uniquely-named empty temporary file.
534 * Return 0 upon success, otherwise a negative errno value.
535 */
536int get_tmp_filename(char *filename, int size)
537{
bellardd5249392004-08-03 21:14:23 +0000538#ifdef _WIN32
bellard3b9f94e2007-01-07 17:27:07 +0000539 char temp_dir[MAX_PATH];
Jim Meyeringeba25052012-05-28 09:27:54 +0200540 /* GetTempFileName requires that its output buffer (4th param)
541 have length MAX_PATH or greater. */
542 assert(size >= MAX_PATH);
543 return (GetTempPath(MAX_PATH, temp_dir)
544 && GetTempFileName(temp_dir, "qem", 0, filename)
545 ? 0 : -GetLastError());
bellardd5249392004-08-03 21:14:23 +0000546#else
bellardea2384d2004-08-01 21:59:26 +0000547 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000548 const char *tmpdir;
aurel320badc1e2008-03-10 00:05:34 +0000549 tmpdir = getenv("TMPDIR");
Amit Shah69bef792014-02-26 15:12:37 +0530550 if (!tmpdir) {
551 tmpdir = "/var/tmp";
552 }
Jim Meyeringeba25052012-05-28 09:27:54 +0200553 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
554 return -EOVERFLOW;
555 }
bellardea2384d2004-08-01 21:59:26 +0000556 fd = mkstemp(filename);
Dunrong Huangfe235a02012-09-05 21:26:22 +0800557 if (fd < 0) {
558 return -errno;
559 }
560 if (close(fd) != 0) {
561 unlink(filename);
Jim Meyeringeba25052012-05-28 09:27:54 +0200562 return -errno;
563 }
564 return 0;
bellardd5249392004-08-03 21:14:23 +0000565#endif
Jim Meyeringeba25052012-05-28 09:27:54 +0200566}
bellardea2384d2004-08-01 21:59:26 +0000567
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200568/*
569 * Detect host devices. By convention, /dev/cdrom[N] is always
570 * recognized as a host CDROM.
571 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200572static BlockDriver *find_hdev_driver(const char *filename)
573{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200574 int score_max = 0, score;
575 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200576
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100577 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200578 if (d->bdrv_probe_device) {
579 score = d->bdrv_probe_device(filename);
580 if (score > score_max) {
581 score_max = score;
582 drv = d;
583 }
584 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200585 }
586
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200587 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200588}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200589
Kevin Wolf98289622013-07-10 15:47:39 +0200590BlockDriver *bdrv_find_protocol(const char *filename,
591 bool allow_protocol_prefix)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200592{
593 BlockDriver *drv1;
594 char protocol[128];
595 int len;
596 const char *p;
597
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200598 /* TODO Drivers without bdrv_file_open must be specified explicitly */
599
Christoph Hellwig39508e72010-06-23 12:25:17 +0200600 /*
601 * XXX(hch): we really should not let host device detection
602 * override an explicit protocol specification, but moving this
603 * later breaks access to device names with colons in them.
604 * Thanks to the brain-dead persistent naming schemes on udev-
605 * based Linux systems those actually are quite common.
606 */
607 drv1 = find_hdev_driver(filename);
608 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200609 return drv1;
610 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200611
Kevin Wolf98289622013-07-10 15:47:39 +0200612 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
Christoph Hellwig39508e72010-06-23 12:25:17 +0200613 return bdrv_find_format("file");
614 }
Kevin Wolf98289622013-07-10 15:47:39 +0200615
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000616 p = strchr(filename, ':');
617 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200618 len = p - filename;
619 if (len > sizeof(protocol) - 1)
620 len = sizeof(protocol) - 1;
621 memcpy(protocol, filename, len);
622 protocol[len] = '\0';
623 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
624 if (drv1->protocol_name &&
625 !strcmp(drv1->protocol_name, protocol)) {
626 return drv1;
627 }
628 }
629 return NULL;
630}
631
Kevin Wolff500a6d2012-11-12 17:35:27 +0100632static int find_image_format(BlockDriverState *bs, const char *filename,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200633 BlockDriver **pdrv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000634{
Kevin Wolff500a6d2012-11-12 17:35:27 +0100635 int score, score_max;
bellardea2384d2004-08-01 21:59:26 +0000636 BlockDriver *drv1, *drv;
bellard83f64092006-08-01 16:21:11 +0000637 uint8_t buf[2048];
Kevin Wolff500a6d2012-11-12 17:35:27 +0100638 int ret = 0;
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700639
Kevin Wolf08a00552010-06-01 18:37:31 +0200640 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
Paolo Bonzini8e895592013-01-10 15:39:27 +0100641 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200642 drv = bdrv_find_format("raw");
643 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200644 error_setg(errp, "Could not find raw image format");
Stefan Weilc98ac352010-07-21 21:51:51 +0200645 ret = -ENOENT;
646 }
647 *pdrv = drv;
648 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700649 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700650
bellard83f64092006-08-01 16:21:11 +0000651 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
bellard83f64092006-08-01 16:21:11 +0000652 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200653 error_setg_errno(errp, -ret, "Could not read image for determining its "
654 "format");
Stefan Weilc98ac352010-07-21 21:51:51 +0200655 *pdrv = NULL;
656 return ret;
bellard83f64092006-08-01 16:21:11 +0000657 }
658
bellardea2384d2004-08-01 21:59:26 +0000659 score_max = 0;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200660 drv = NULL;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100661 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard83f64092006-08-01 16:21:11 +0000662 if (drv1->bdrv_probe) {
663 score = drv1->bdrv_probe(buf, ret, filename);
664 if (score > score_max) {
665 score_max = score;
666 drv = drv1;
667 }
bellardea2384d2004-08-01 21:59:26 +0000668 }
669 }
Stefan Weilc98ac352010-07-21 21:51:51 +0200670 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200671 error_setg(errp, "Could not determine image format: No compatible "
672 "driver found");
Stefan Weilc98ac352010-07-21 21:51:51 +0200673 ret = -ENOENT;
674 }
675 *pdrv = drv;
676 return ret;
bellardea2384d2004-08-01 21:59:26 +0000677}
678
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100679/**
680 * Set the current 'total_sectors' value
681 */
682static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
683{
684 BlockDriver *drv = bs->drv;
685
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700686 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
687 if (bs->sg)
688 return 0;
689
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100690 /* query actual device if possible, otherwise just trust the hint */
691 if (drv->bdrv_getlength) {
692 int64_t length = drv->bdrv_getlength(bs);
693 if (length < 0) {
694 return length;
695 }
Fam Zheng7e382002013-11-06 19:48:06 +0800696 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100697 }
698
699 bs->total_sectors = hint;
700 return 0;
701}
702
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100703/**
Paolo Bonzini9e8f1832013-02-08 14:06:11 +0100704 * Set open flags for a given discard mode
705 *
706 * Return 0 on success, -1 if the discard mode was invalid.
707 */
708int bdrv_parse_discard_flags(const char *mode, int *flags)
709{
710 *flags &= ~BDRV_O_UNMAP;
711
712 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
713 /* do nothing */
714 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
715 *flags |= BDRV_O_UNMAP;
716 } else {
717 return -1;
718 }
719
720 return 0;
721}
722
723/**
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100724 * Set open flags for a given cache mode
725 *
726 * Return 0 on success, -1 if the cache mode was invalid.
727 */
728int bdrv_parse_cache_flags(const char *mode, int *flags)
729{
730 *flags &= ~BDRV_O_CACHE_MASK;
731
732 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
733 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100734 } else if (!strcmp(mode, "directsync")) {
735 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100736 } else if (!strcmp(mode, "writeback")) {
737 *flags |= BDRV_O_CACHE_WB;
738 } else if (!strcmp(mode, "unsafe")) {
739 *flags |= BDRV_O_CACHE_WB;
740 *flags |= BDRV_O_NO_FLUSH;
741 } else if (!strcmp(mode, "writethrough")) {
742 /* this is the default */
743 } else {
744 return -1;
745 }
746
747 return 0;
748}
749
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000750/**
751 * The copy-on-read flag is actually a reference count so multiple users may
752 * use the feature without worrying about clobbering its previous state.
753 * Copy-on-read stays enabled until all users have called to disable it.
754 */
755void bdrv_enable_copy_on_read(BlockDriverState *bs)
756{
757 bs->copy_on_read++;
758}
759
760void bdrv_disable_copy_on_read(BlockDriverState *bs)
761{
762 assert(bs->copy_on_read > 0);
763 bs->copy_on_read--;
764}
765
Kevin Wolf7b272452012-11-12 17:05:39 +0100766static int bdrv_open_flags(BlockDriverState *bs, int flags)
767{
768 int open_flags = flags | BDRV_O_CACHE_WB;
769
770 /*
771 * Clear flags that are internal to the block layer before opening the
772 * image.
773 */
774 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
775
776 /*
777 * Snapshots should be writable.
778 */
779 if (bs->is_temporary) {
780 open_flags |= BDRV_O_RDWR;
781 }
782
783 return open_flags;
784}
785
Benoît Canet6913c0c2014-01-23 21:31:33 +0100786static int bdrv_assign_node_name(BlockDriverState *bs,
787 const char *node_name,
788 Error **errp)
789{
790 if (!node_name) {
791 return 0;
792 }
793
794 /* empty string node name is invalid */
795 if (node_name[0] == '\0') {
796 error_setg(errp, "Empty node name");
797 return -EINVAL;
798 }
799
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100800 /* takes care of avoiding namespaces collisions */
801 if (bdrv_find(node_name)) {
802 error_setg(errp, "node-name=%s is conflicting with a device id",
803 node_name);
804 return -EINVAL;
805 }
806
Benoît Canet6913c0c2014-01-23 21:31:33 +0100807 /* takes care of avoiding duplicates node names */
808 if (bdrv_find_node(node_name)) {
809 error_setg(errp, "Duplicate node name");
810 return -EINVAL;
811 }
812
813 /* copy node name into the bs and insert it into the graph list */
814 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
815 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
816
817 return 0;
818}
819
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200820/*
Kevin Wolf57915332010-04-14 15:24:50 +0200821 * Common part for opening disk images and files
Kevin Wolfb6ad4912013-03-15 10:35:04 +0100822 *
823 * Removes all processed options from *options.
Kevin Wolf57915332010-04-14 15:24:50 +0200824 */
Kevin Wolff500a6d2012-11-12 17:35:27 +0100825static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200826 QDict *options, int flags, BlockDriver *drv, Error **errp)
Kevin Wolf57915332010-04-14 15:24:50 +0200827{
828 int ret, open_flags;
Kevin Wolf035fccd2013-04-09 14:34:19 +0200829 const char *filename;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100830 const char *node_name = NULL;
Max Reitz34b5d2c2013-09-05 14:45:29 +0200831 Error *local_err = NULL;
Kevin Wolf57915332010-04-14 15:24:50 +0200832
833 assert(drv != NULL);
Paolo Bonzini64058752012-05-08 16:51:49 +0200834 assert(bs->file == NULL);
Kevin Wolf707ff822013-03-06 12:20:31 +0100835 assert(options != NULL && bs->options != options);
Kevin Wolf57915332010-04-14 15:24:50 +0200836
Kevin Wolf45673672013-04-22 17:48:40 +0200837 if (file != NULL) {
838 filename = file->filename;
839 } else {
840 filename = qdict_get_try_str(options, "filename");
841 }
842
Kevin Wolf765003d2014-02-03 14:49:42 +0100843 if (drv->bdrv_needs_filename && !filename) {
844 error_setg(errp, "The '%s' block driver requires a file name",
845 drv->format_name);
846 return -EINVAL;
847 }
848
Kevin Wolf45673672013-04-22 17:48:40 +0200849 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100850
Benoît Canet6913c0c2014-01-23 21:31:33 +0100851 node_name = qdict_get_try_str(options, "node-name");
852 ret = bdrv_assign_node_name(bs, node_name, errp);
853 if (ret < 0) {
854 return ret;
855 }
856 qdict_del(options, "node-name");
857
Kevin Wolf5d186eb2013-03-27 17:28:18 +0100858 /* bdrv_open() with directly using a protocol as drv. This layer is already
859 * opened, so assign it to bs (while file becomes a closed BlockDriverState)
860 * and return immediately. */
861 if (file != NULL && drv->bdrv_file_open) {
862 bdrv_swap(file, bs);
863 return 0;
864 }
865
Kevin Wolf57915332010-04-14 15:24:50 +0200866 bs->open_flags = flags;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +0100867 bs->guest_block_size = 512;
Paolo Bonzinic25f53b2011-11-29 12:42:20 +0100868 bs->request_alignment = 512;
Asias He0d51b4d2013-08-22 15:24:14 +0800869 bs->zero_beyond_eof = true;
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800870 open_flags = bdrv_open_flags(bs, flags);
871 bs->read_only = !(open_flags & BDRV_O_RDWR);
872
873 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
Kevin Wolf8f94a6e2013-10-10 11:45:55 +0200874 error_setg(errp,
875 !bs->read_only && bdrv_is_whitelisted(drv, true)
876 ? "Driver '%s' can only be used for read-only devices"
877 : "Driver '%s' is not whitelisted",
878 drv->format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800879 return -ENOTSUP;
880 }
Kevin Wolf57915332010-04-14 15:24:50 +0200881
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000882 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
Kevin Wolf0ebd24e2013-09-19 15:12:18 +0200883 if (flags & BDRV_O_COPY_ON_READ) {
884 if (!bs->read_only) {
885 bdrv_enable_copy_on_read(bs);
886 } else {
887 error_setg(errp, "Can't use copy-on-read on read-only device");
888 return -EINVAL;
889 }
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000890 }
891
Kevin Wolfc2ad1b02013-03-18 16:40:51 +0100892 if (filename != NULL) {
893 pstrcpy(bs->filename, sizeof(bs->filename), filename);
894 } else {
895 bs->filename[0] = '\0';
896 }
Kevin Wolf57915332010-04-14 15:24:50 +0200897
Kevin Wolf57915332010-04-14 15:24:50 +0200898 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -0500899 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +0200900
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100901 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Stefan Hajnoczie7c63792011-10-27 10:54:27 +0100902
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200903 /* Open the image, either directly or using a protocol */
904 if (drv->bdrv_file_open) {
Kevin Wolf5d186eb2013-03-27 17:28:18 +0100905 assert(file == NULL);
Benoît Canet030be322013-09-24 17:07:04 +0200906 assert(!drv->bdrv_needs_filename || filename != NULL);
Max Reitz34b5d2c2013-09-05 14:45:29 +0200907 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
Kevin Wolff500a6d2012-11-12 17:35:27 +0100908 } else {
Kevin Wolf2af5ef72013-04-09 13:19:18 +0200909 if (file == NULL) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200910 error_setg(errp, "Can't use '%s' as a block driver for the "
911 "protocol level", drv->format_name);
Kevin Wolf2af5ef72013-04-09 13:19:18 +0200912 ret = -EINVAL;
913 goto free_and_fail;
914 }
Kevin Wolff500a6d2012-11-12 17:35:27 +0100915 bs->file = file;
Max Reitz34b5d2c2013-09-05 14:45:29 +0200916 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200917 }
918
Kevin Wolf57915332010-04-14 15:24:50 +0200919 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +0100920 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200921 error_propagate(errp, local_err);
Dunrong Huang2fa9aa52013-09-24 18:14:01 +0800922 } else if (bs->filename[0]) {
923 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
Max Reitz34b5d2c2013-09-05 14:45:29 +0200924 } else {
925 error_setg_errno(errp, -ret, "Could not open image");
926 }
Kevin Wolf57915332010-04-14 15:24:50 +0200927 goto free_and_fail;
928 }
929
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100930 ret = refresh_total_sectors(bs, bs->total_sectors);
931 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200932 error_setg_errno(errp, -ret, "Could not refresh total sector count");
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100933 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +0200934 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100935
Kevin Wolfd34682c2013-12-11 19:26:16 +0100936 bdrv_refresh_limits(bs);
Paolo Bonzinic25f53b2011-11-29 12:42:20 +0100937 assert(bdrv_opt_mem_align(bs) != 0);
Kevin Wolf47ea2de2014-03-05 15:49:55 +0100938 assert((bs->request_alignment != 0) || bs->sg);
Kevin Wolfd34682c2013-12-11 19:26:16 +0100939
Kevin Wolf57915332010-04-14 15:24:50 +0200940#ifndef _WIN32
941 if (bs->is_temporary) {
Dunrong Huangd4cea8d2013-10-03 01:31:27 +0800942 assert(bs->filename[0] != '\0');
943 unlink(bs->filename);
Kevin Wolf57915332010-04-14 15:24:50 +0200944 }
945#endif
946 return 0;
947
948free_and_fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +0100949 bs->file = NULL;
Anthony Liguori7267c092011-08-20 22:09:37 -0500950 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +0200951 bs->opaque = NULL;
952 bs->drv = NULL;
953 return ret;
954}
955
956/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200957 * Opens a file using a protocol (file, host_device, nbd, ...)
Kevin Wolf787e4a82013-03-06 11:52:48 +0100958 *
Max Reitz5acd9d82014-02-18 18:33:11 +0100959 * options is an indirect pointer to a QDict of options to pass to the block
960 * drivers, or pointer to NULL for an empty set of options. If this function
961 * takes ownership of the QDict reference, it will set *options to NULL;
962 * otherwise, it will contain unused/unrecognized options after this function
963 * returns. Then, the caller is responsible for freeing it. If it intends to
964 * reuse the QDict, QINCREF() should be called beforehand.
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200965 */
Max Reitzd4446ea2014-02-18 18:33:09 +0100966static int bdrv_file_open(BlockDriverState *bs, const char *filename,
Max Reitz5acd9d82014-02-18 18:33:11 +0100967 QDict **options, int flags, Error **errp)
bellardb3380822004-03-14 21:38:54 +0000968{
Christoph Hellwig6db95602010-04-05 16:53:57 +0200969 BlockDriver *drv;
Kevin Wolfc2ad1b02013-03-18 16:40:51 +0100970 const char *drvname;
Kevin Wolf98289622013-07-10 15:47:39 +0200971 bool allow_protocol_prefix = false;
Max Reitz34b5d2c2013-09-05 14:45:29 +0200972 Error *local_err = NULL;
bellard83f64092006-08-01 16:21:11 +0000973 int ret;
974
Kevin Wolf035fccd2013-04-09 14:34:19 +0200975 /* Fetch the file name from the options QDict if necessary */
976 if (!filename) {
Max Reitz5acd9d82014-02-18 18:33:11 +0100977 filename = qdict_get_try_str(*options, "filename");
978 } else if (filename && !qdict_haskey(*options, "filename")) {
979 qdict_put(*options, "filename", qstring_from_str(filename));
Kevin Wolf98289622013-07-10 15:47:39 +0200980 allow_protocol_prefix = true;
Kevin Wolf035fccd2013-04-09 14:34:19 +0200981 } else {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200982 error_setg(errp, "Can't specify 'file' and 'filename' options at the "
983 "same time");
Kevin Wolf035fccd2013-04-09 14:34:19 +0200984 ret = -EINVAL;
985 goto fail;
986 }
987
Kevin Wolfc2ad1b02013-03-18 16:40:51 +0100988 /* Find the right block driver */
Max Reitz5acd9d82014-02-18 18:33:11 +0100989 drvname = qdict_get_try_str(*options, "driver");
Kevin Wolfc2ad1b02013-03-18 16:40:51 +0100990 if (drvname) {
Kevin Wolf8f94a6e2013-10-10 11:45:55 +0200991 drv = bdrv_find_format(drvname);
Max Reitz34b5d2c2013-09-05 14:45:29 +0200992 if (!drv) {
993 error_setg(errp, "Unknown driver '%s'", drvname);
994 }
Max Reitz5acd9d82014-02-18 18:33:11 +0100995 qdict_del(*options, "driver");
Kevin Wolfc2ad1b02013-03-18 16:40:51 +0100996 } else if (filename) {
Kevin Wolf98289622013-07-10 15:47:39 +0200997 drv = bdrv_find_protocol(filename, allow_protocol_prefix);
998 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200999 error_setg(errp, "Unknown protocol");
Kevin Wolf98289622013-07-10 15:47:39 +02001000 }
Kevin Wolfc2ad1b02013-03-18 16:40:51 +01001001 } else {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001002 error_setg(errp, "Must specify either driver or file");
Kevin Wolfc2ad1b02013-03-18 16:40:51 +01001003 drv = NULL;
1004 }
1005
1006 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001007 /* errp has been set already */
Kevin Wolfc2ad1b02013-03-18 16:40:51 +01001008 ret = -ENOENT;
1009 goto fail;
1010 }
1011
1012 /* Parse the filename and open it */
1013 if (drv->bdrv_parse_filename && filename) {
Max Reitz5acd9d82014-02-18 18:33:11 +01001014 drv->bdrv_parse_filename(filename, *options, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +01001015 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001016 error_propagate(errp, local_err);
Kevin Wolf6963a302013-03-15 18:47:22 +01001017 ret = -EINVAL;
1018 goto fail;
1019 }
Max Reitzcd5d0312014-03-05 22:41:36 +01001020
1021 if (!drv->bdrv_needs_filename) {
1022 qdict_del(*options, "filename");
1023 } else {
1024 filename = qdict_get_str(*options, "filename");
1025 }
Kevin Wolf6963a302013-03-15 18:47:22 +01001026 }
1027
Max Reitz505d7582013-12-20 19:28:13 +01001028 if (!drv->bdrv_file_open) {
Max Reitz5acd9d82014-02-18 18:33:11 +01001029 ret = bdrv_open(&bs, filename, NULL, *options, flags, drv, &local_err);
1030 *options = NULL;
Max Reitz505d7582013-12-20 19:28:13 +01001031 } else {
Max Reitz5acd9d82014-02-18 18:33:11 +01001032 ret = bdrv_open_common(bs, NULL, *options, flags, drv, &local_err);
Max Reitz505d7582013-12-20 19:28:13 +01001033 }
Kevin Wolf707ff822013-03-06 12:20:31 +01001034 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001035 error_propagate(errp, local_err);
Kevin Wolf707ff822013-03-06 12:20:31 +01001036 goto fail;
1037 }
1038
aliguori71d07702009-03-03 17:37:16 +00001039 bs->growable = 1;
bellard83f64092006-08-01 16:21:11 +00001040 return 0;
Kevin Wolf707ff822013-03-06 12:20:31 +01001041
1042fail:
Kevin Wolf707ff822013-03-06 12:20:31 +01001043 return ret;
bellardea2384d2004-08-01 21:59:26 +00001044}
bellardfc01f7e2003-06-30 10:03:06 +00001045
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001046/*
1047 * Opens the backing file for a BlockDriverState if not yet open
1048 *
1049 * options is a QDict of options to pass to the block drivers, or NULL for an
1050 * empty set of options. The reference to the QDict is transferred to this
1051 * function (even on failure), so if the caller intends to reuse the dictionary,
1052 * it needs to use QINCREF() before calling bdrv_file_open.
1053 */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001054int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
Paolo Bonzini9156df12012-10-18 16:49:17 +02001055{
1056 char backing_filename[PATH_MAX];
1057 int back_flags, ret;
1058 BlockDriver *back_drv = NULL;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001059 Error *local_err = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001060
1061 if (bs->backing_hd != NULL) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001062 QDECREF(options);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001063 return 0;
1064 }
1065
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001066 /* NULL means an empty set of options */
1067 if (options == NULL) {
1068 options = qdict_new();
1069 }
1070
Paolo Bonzini9156df12012-10-18 16:49:17 +02001071 bs->open_flags &= ~BDRV_O_NO_BACKING;
Kevin Wolf1cb6f502013-04-12 20:27:07 +02001072 if (qdict_haskey(options, "file.filename")) {
1073 backing_filename[0] = '\0';
1074 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001075 QDECREF(options);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001076 return 0;
Fam Zhengdbecebd2013-09-22 20:05:06 +08001077 } else {
1078 bdrv_get_full_backing_filename(bs, backing_filename,
1079 sizeof(backing_filename));
Paolo Bonzini9156df12012-10-18 16:49:17 +02001080 }
1081
Paolo Bonzini9156df12012-10-18 16:49:17 +02001082 if (bs->backing_format[0] != '\0') {
1083 back_drv = bdrv_find_format(bs->backing_format);
1084 }
1085
1086 /* backing files always opened read-only */
Thibaut LAURENT87a5deb2013-10-25 02:15:07 +02001087 back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT |
1088 BDRV_O_COPY_ON_READ);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001089
Max Reitzf67503e2014-02-18 18:33:05 +01001090 assert(bs->backing_hd == NULL);
1091 ret = bdrv_open(&bs->backing_hd,
Max Reitzddf56362014-02-18 18:33:06 +01001092 *backing_filename ? backing_filename : NULL, NULL, options,
Max Reitz34b5d2c2013-09-05 14:45:29 +02001093 back_flags, back_drv, &local_err);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001094 if (ret < 0) {
Paolo Bonzini9156df12012-10-18 16:49:17 +02001095 bs->backing_hd = NULL;
1096 bs->open_flags |= BDRV_O_NO_BACKING;
Fam Zhengb04b6b62013-11-08 11:26:49 +08001097 error_setg(errp, "Could not open backing file: %s",
1098 error_get_pretty(local_err));
1099 error_free(local_err);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001100 return ret;
1101 }
Peter Feinerd80ac652014-01-08 19:43:25 +00001102
1103 if (bs->backing_hd->file) {
1104 pstrcpy(bs->backing_file, sizeof(bs->backing_file),
1105 bs->backing_hd->file->filename);
1106 }
1107
Kevin Wolfd34682c2013-12-11 19:26:16 +01001108 /* Recalculate the BlockLimits with the backing file */
1109 bdrv_refresh_limits(bs);
1110
Paolo Bonzini9156df12012-10-18 16:49:17 +02001111 return 0;
1112}
1113
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001114/*
Max Reitzda557aa2013-12-20 19:28:11 +01001115 * Opens a disk image whose options are given as BlockdevRef in another block
1116 * device's options.
1117 *
Max Reitzda557aa2013-12-20 19:28:11 +01001118 * If allow_none is true, no image will be opened if filename is false and no
1119 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1120 *
1121 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1122 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1123 * itself, all options starting with "${bdref_key}." are considered part of the
1124 * BlockdevRef.
1125 *
1126 * The BlockdevRef will be removed from the options QDict.
Max Reitzf67503e2014-02-18 18:33:05 +01001127 *
1128 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
Max Reitzda557aa2013-12-20 19:28:11 +01001129 */
1130int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1131 QDict *options, const char *bdref_key, int flags,
Max Reitzf7d9fd82014-02-18 18:33:12 +01001132 bool allow_none, Error **errp)
Max Reitzda557aa2013-12-20 19:28:11 +01001133{
1134 QDict *image_options;
1135 int ret;
1136 char *bdref_key_dot;
1137 const char *reference;
1138
Max Reitzf67503e2014-02-18 18:33:05 +01001139 assert(pbs);
1140 assert(*pbs == NULL);
1141
Max Reitzda557aa2013-12-20 19:28:11 +01001142 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1143 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1144 g_free(bdref_key_dot);
1145
1146 reference = qdict_get_try_str(options, bdref_key);
1147 if (!filename && !reference && !qdict_size(image_options)) {
1148 if (allow_none) {
1149 ret = 0;
1150 } else {
1151 error_setg(errp, "A block device must be specified for \"%s\"",
1152 bdref_key);
1153 ret = -EINVAL;
1154 }
1155 goto done;
1156 }
1157
Max Reitzf7d9fd82014-02-18 18:33:12 +01001158 ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
Max Reitzda557aa2013-12-20 19:28:11 +01001159
1160done:
1161 qdict_del(options, bdref_key);
1162 return ret;
1163}
1164
1165/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001166 * Opens a disk image (raw, qcow2, vmdk, ...)
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001167 *
1168 * options is a QDict of options to pass to the block drivers, or NULL for an
1169 * empty set of options. The reference to the QDict belongs to the block layer
1170 * after the call (even on failure), so if the caller intends to reuse the
1171 * dictionary, it needs to use QINCREF() before calling bdrv_open.
Max Reitzf67503e2014-02-18 18:33:05 +01001172 *
1173 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1174 * If it is not NULL, the referenced BDS will be reused.
Max Reitzddf56362014-02-18 18:33:06 +01001175 *
1176 * The reference parameter may be used to specify an existing block device which
1177 * should be opened. If specified, neither options nor a filename may be given,
1178 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001179 */
Max Reitzddf56362014-02-18 18:33:06 +01001180int bdrv_open(BlockDriverState **pbs, const char *filename,
1181 const char *reference, QDict *options, int flags,
1182 BlockDriver *drv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +00001183{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001184 int ret;
Stefan Weil89c9bc32012-11-22 07:25:48 +01001185 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1186 char tmp_filename[PATH_MAX + 1];
Max Reitzf67503e2014-02-18 18:33:05 +01001187 BlockDriverState *file = NULL, *bs;
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001188 const char *drvname;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001189 Error *local_err = NULL;
bellard712e7872005-04-28 21:09:32 +00001190
Max Reitzf67503e2014-02-18 18:33:05 +01001191 assert(pbs);
1192
Max Reitzddf56362014-02-18 18:33:06 +01001193 if (reference) {
1194 bool options_non_empty = options ? qdict_size(options) : false;
1195 QDECREF(options);
1196
1197 if (*pbs) {
1198 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1199 "another block device");
1200 return -EINVAL;
1201 }
1202
1203 if (filename || options_non_empty) {
1204 error_setg(errp, "Cannot reference an existing block device with "
1205 "additional options or a new filename");
1206 return -EINVAL;
1207 }
1208
1209 bs = bdrv_lookup_bs(reference, reference, errp);
1210 if (!bs) {
1211 return -ENODEV;
1212 }
1213 bdrv_ref(bs);
1214 *pbs = bs;
1215 return 0;
1216 }
1217
Max Reitzf67503e2014-02-18 18:33:05 +01001218 if (*pbs) {
1219 bs = *pbs;
1220 } else {
1221 bs = bdrv_new("");
1222 }
1223
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001224 /* NULL means an empty set of options */
1225 if (options == NULL) {
1226 options = qdict_new();
1227 }
1228
1229 bs->options = options;
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001230 options = qdict_clone_shallow(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001231
Max Reitz5469a2a2014-02-18 18:33:10 +01001232 if (flags & BDRV_O_PROTOCOL) {
1233 assert(!drv);
Max Reitz5acd9d82014-02-18 18:33:11 +01001234 ret = bdrv_file_open(bs, filename, &options, flags & ~BDRV_O_PROTOCOL,
Max Reitz5469a2a2014-02-18 18:33:10 +01001235 &local_err);
Max Reitz5469a2a2014-02-18 18:33:10 +01001236 if (!ret) {
Max Reitz5acd9d82014-02-18 18:33:11 +01001237 goto done;
Max Reitz5469a2a2014-02-18 18:33:10 +01001238 } else if (bs->drv) {
1239 goto close_and_fail;
1240 } else {
1241 goto fail;
1242 }
1243 }
1244
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001245 /* For snapshot=on, create a temporary qcow2 overlay */
bellard83f64092006-08-01 16:21:11 +00001246 if (flags & BDRV_O_SNAPSHOT) {
bellardea2384d2004-08-01 21:59:26 +00001247 BlockDriverState *bs1;
1248 int64_t total_size;
Kevin Wolf91a073a2009-05-27 14:48:06 +02001249 BlockDriver *bdrv_qcow2;
Kevin Wolf08b392e2013-03-18 16:17:44 +01001250 QEMUOptionParameter *create_options;
Kevin Wolf9fd31712013-11-14 15:37:12 +01001251 QDict *snapshot_options;
Kevin Wolfc2ad1b02013-03-18 16:40:51 +01001252
bellardea2384d2004-08-01 21:59:26 +00001253 /* if snapshot, we create a temporary backing file and open it
1254 instead of opening 'filename' directly */
1255
Kevin Wolf9fd31712013-11-14 15:37:12 +01001256 /* Get the required size from the image */
Kevin Wolf9fd31712013-11-14 15:37:12 +01001257 QINCREF(options);
Max Reitzf67503e2014-02-18 18:33:05 +01001258 bs1 = NULL;
Max Reitzddf56362014-02-18 18:33:06 +01001259 ret = bdrv_open(&bs1, filename, NULL, options, BDRV_O_NO_BACKING,
Kevin Wolfc9fbb992013-11-28 11:58:02 +01001260 drv, &local_err);
aliguori51d7c002009-03-05 23:00:29 +00001261 if (ret < 0) {
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001262 goto fail;
bellardea2384d2004-08-01 21:59:26 +00001263 }
Jes Sorensen3e829902010-05-27 16:20:30 +02001264 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
aliguori7c96d462008-09-12 17:54:13 +00001265
Fam Zheng4f6fd342013-08-23 09:14:47 +08001266 bdrv_unref(bs1);
ths3b46e622007-09-17 08:09:54 +00001267
Kevin Wolf9fd31712013-11-14 15:37:12 +01001268 /* Create the temporary image */
Jim Meyeringeba25052012-05-28 09:27:54 +02001269 ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename));
1270 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001271 error_setg_errno(errp, -ret, "Could not get temporary filename");
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001272 goto fail;
Jim Meyeringeba25052012-05-28 09:27:54 +02001273 }
aliguori7c96d462008-09-12 17:54:13 +00001274
Kevin Wolf91a073a2009-05-27 14:48:06 +02001275 bdrv_qcow2 = bdrv_find_format("qcow2");
Kevin Wolf08b392e2013-03-18 16:17:44 +01001276 create_options = parse_option_parameters("", bdrv_qcow2->create_options,
1277 NULL);
Kevin Wolf91a073a2009-05-27 14:48:06 +02001278
Kevin Wolf08b392e2013-03-18 16:17:44 +01001279 set_option_parameter_int(create_options, BLOCK_OPT_SIZE, total_size);
Kevin Wolf91a073a2009-05-27 14:48:06 +02001280
Max Reitzcc84d902013-09-06 17:14:26 +02001281 ret = bdrv_create(bdrv_qcow2, tmp_filename, create_options, &local_err);
Kevin Wolf08b392e2013-03-18 16:17:44 +01001282 free_option_parameters(create_options);
aliguori51d7c002009-03-05 23:00:29 +00001283 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001284 error_setg_errno(errp, -ret, "Could not create temporary overlay "
Max Reitzcc84d902013-09-06 17:14:26 +02001285 "'%s': %s", tmp_filename,
1286 error_get_pretty(local_err));
1287 error_free(local_err);
1288 local_err = NULL;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001289 goto fail;
bellardea2384d2004-08-01 21:59:26 +00001290 }
Kevin Wolf91a073a2009-05-27 14:48:06 +02001291
Kevin Wolf9fd31712013-11-14 15:37:12 +01001292 /* Prepare a new options QDict for the temporary file, where user
1293 * options refer to the backing file */
1294 if (filename) {
1295 qdict_put(options, "file.filename", qstring_from_str(filename));
1296 }
1297 if (drv) {
1298 qdict_put(options, "driver", qstring_from_str(drv->format_name));
1299 }
1300
1301 snapshot_options = qdict_new();
1302 qdict_put(snapshot_options, "backing", options);
1303 qdict_flatten(snapshot_options);
1304
1305 bs->options = snapshot_options;
1306 options = qdict_clone_shallow(bs->options);
1307
bellardea2384d2004-08-01 21:59:26 +00001308 filename = tmp_filename;
Kevin Wolf91a073a2009-05-27 14:48:06 +02001309 drv = bdrv_qcow2;
bellardea2384d2004-08-01 21:59:26 +00001310 bs->is_temporary = 1;
1311 }
bellard712e7872005-04-28 21:09:32 +00001312
Kevin Wolff500a6d2012-11-12 17:35:27 +01001313 /* Open image file without format layer */
Jeff Codybe028ad2012-09-20 15:13:17 -04001314 if (flags & BDRV_O_RDWR) {
1315 flags |= BDRV_O_ALLOW_RDWR;
1316 }
1317
Max Reitzf67503e2014-02-18 18:33:05 +01001318 assert(file == NULL);
Max Reitz054963f2013-12-20 19:28:12 +01001319 ret = bdrv_open_image(&file, filename, options, "file",
Max Reitzf7d9fd82014-02-18 18:33:12 +01001320 bdrv_open_flags(bs, flags | BDRV_O_UNMAP) |
1321 BDRV_O_PROTOCOL, true, &local_err);
Max Reitz054963f2013-12-20 19:28:12 +01001322 if (ret < 0) {
1323 goto fail;
Kevin Wolff500a6d2012-11-12 17:35:27 +01001324 }
1325
1326 /* Find the right image format driver */
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001327 drvname = qdict_get_try_str(options, "driver");
1328 if (drvname) {
Kevin Wolf8f94a6e2013-10-10 11:45:55 +02001329 drv = bdrv_find_format(drvname);
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001330 qdict_del(options, "driver");
Kevin Wolf06d22aa2013-08-08 17:44:52 +02001331 if (!drv) {
1332 error_setg(errp, "Invalid driver: '%s'", drvname);
1333 ret = -EINVAL;
1334 goto unlink_and_fail;
1335 }
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001336 }
1337
Kevin Wolff500a6d2012-11-12 17:35:27 +01001338 if (!drv) {
Max Reitz2a05cbe2013-12-20 19:28:10 +01001339 if (file) {
1340 ret = find_image_format(file, filename, &drv, &local_err);
1341 } else {
1342 error_setg(errp, "Must specify either driver or file");
1343 ret = -EINVAL;
1344 goto unlink_and_fail;
1345 }
Kevin Wolff500a6d2012-11-12 17:35:27 +01001346 }
1347
1348 if (!drv) {
1349 goto unlink_and_fail;
1350 }
1351
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001352 /* Open the image */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001353 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001354 if (ret < 0) {
Christoph Hellwig69873072010-01-20 18:13:25 +01001355 goto unlink_and_fail;
1356 }
1357
Max Reitz2a05cbe2013-12-20 19:28:10 +01001358 if (file && (bs->file != file)) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001359 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001360 file = NULL;
1361 }
1362
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001363 /* If there is a backing file, use it */
Paolo Bonzini9156df12012-10-18 16:49:17 +02001364 if ((flags & BDRV_O_NO_BACKING) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001365 QDict *backing_options;
1366
Benoît Canet5726d872013-09-25 13:30:01 +02001367 qdict_extract_subqdict(options, &backing_options, "backing.");
Max Reitz34b5d2c2013-09-05 14:45:29 +02001368 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001369 if (ret < 0) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001370 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001371 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001372 }
1373
Max Reitz5acd9d82014-02-18 18:33:11 +01001374done:
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001375 /* Check if any unknown options were used */
Max Reitz5acd9d82014-02-18 18:33:11 +01001376 if (options && (qdict_size(options) != 0)) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001377 const QDictEntry *entry = qdict_first(options);
Max Reitz5acd9d82014-02-18 18:33:11 +01001378 if (flags & BDRV_O_PROTOCOL) {
1379 error_setg(errp, "Block protocol '%s' doesn't support the option "
1380 "'%s'", drv->format_name, entry->key);
1381 } else {
1382 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1383 "support the option '%s'", drv->format_name,
1384 bs->device_name, entry->key);
1385 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001386
1387 ret = -EINVAL;
1388 goto close_and_fail;
1389 }
1390 QDECREF(options);
1391
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001392 if (!bdrv_key_required(bs)) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02001393 bdrv_dev_change_media_cb(bs, true);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001394 }
1395
Max Reitzf67503e2014-02-18 18:33:05 +01001396 *pbs = bs;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001397 return 0;
1398
1399unlink_and_fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +01001400 if (file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001401 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001402 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001403 if (bs->is_temporary) {
1404 unlink(filename);
1405 }
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001406fail:
1407 QDECREF(bs->options);
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001408 QDECREF(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001409 bs->options = NULL;
Max Reitzf67503e2014-02-18 18:33:05 +01001410 if (!*pbs) {
1411 /* If *pbs is NULL, a new BDS has been created in this function and
1412 needs to be freed now. Otherwise, it does not need to be closed,
1413 since it has not really been opened yet. */
1414 bdrv_unref(bs);
1415 }
Markus Armbruster84d18f02014-01-30 15:07:28 +01001416 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001417 error_propagate(errp, local_err);
1418 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001419 return ret;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001420
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001421close_and_fail:
Max Reitzf67503e2014-02-18 18:33:05 +01001422 /* See fail path, but now the BDS has to be always closed */
1423 if (*pbs) {
1424 bdrv_close(bs);
1425 } else {
1426 bdrv_unref(bs);
1427 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001428 QDECREF(options);
Markus Armbruster84d18f02014-01-30 15:07:28 +01001429 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001430 error_propagate(errp, local_err);
1431 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001432 return ret;
1433}
1434
Jeff Codye971aa12012-09-20 15:13:19 -04001435typedef struct BlockReopenQueueEntry {
1436 bool prepared;
1437 BDRVReopenState state;
1438 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1439} BlockReopenQueueEntry;
1440
1441/*
1442 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1443 * reopen of multiple devices.
1444 *
1445 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1446 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1447 * be created and initialized. This newly created BlockReopenQueue should be
1448 * passed back in for subsequent calls that are intended to be of the same
1449 * atomic 'set'.
1450 *
1451 * bs is the BlockDriverState to add to the reopen queue.
1452 *
1453 * flags contains the open flags for the associated bs
1454 *
1455 * returns a pointer to bs_queue, which is either the newly allocated
1456 * bs_queue, or the existing bs_queue being used.
1457 *
1458 */
1459BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1460 BlockDriverState *bs, int flags)
1461{
1462 assert(bs != NULL);
1463
1464 BlockReopenQueueEntry *bs_entry;
1465 if (bs_queue == NULL) {
1466 bs_queue = g_new0(BlockReopenQueue, 1);
1467 QSIMPLEQ_INIT(bs_queue);
1468 }
1469
1470 if (bs->file) {
1471 bdrv_reopen_queue(bs_queue, bs->file, flags);
1472 }
1473
1474 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1475 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1476
1477 bs_entry->state.bs = bs;
1478 bs_entry->state.flags = flags;
1479
1480 return bs_queue;
1481}
1482
1483/*
1484 * Reopen multiple BlockDriverStates atomically & transactionally.
1485 *
1486 * The queue passed in (bs_queue) must have been built up previous
1487 * via bdrv_reopen_queue().
1488 *
1489 * Reopens all BDS specified in the queue, with the appropriate
1490 * flags. All devices are prepared for reopen, and failure of any
1491 * device will cause all device changes to be abandonded, and intermediate
1492 * data cleaned up.
1493 *
1494 * If all devices prepare successfully, then the changes are committed
1495 * to all devices.
1496 *
1497 */
1498int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1499{
1500 int ret = -1;
1501 BlockReopenQueueEntry *bs_entry, *next;
1502 Error *local_err = NULL;
1503
1504 assert(bs_queue != NULL);
1505
1506 bdrv_drain_all();
1507
1508 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1509 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1510 error_propagate(errp, local_err);
1511 goto cleanup;
1512 }
1513 bs_entry->prepared = true;
1514 }
1515
1516 /* If we reach this point, we have success and just need to apply the
1517 * changes
1518 */
1519 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1520 bdrv_reopen_commit(&bs_entry->state);
1521 }
1522
1523 ret = 0;
1524
1525cleanup:
1526 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1527 if (ret && bs_entry->prepared) {
1528 bdrv_reopen_abort(&bs_entry->state);
1529 }
1530 g_free(bs_entry);
1531 }
1532 g_free(bs_queue);
1533 return ret;
1534}
1535
1536
1537/* Reopen a single BlockDriverState with the specified flags. */
1538int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1539{
1540 int ret = -1;
1541 Error *local_err = NULL;
1542 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1543
1544 ret = bdrv_reopen_multiple(queue, &local_err);
1545 if (local_err != NULL) {
1546 error_propagate(errp, local_err);
1547 }
1548 return ret;
1549}
1550
1551
1552/*
1553 * Prepares a BlockDriverState for reopen. All changes are staged in the
1554 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1555 * the block driver layer .bdrv_reopen_prepare()
1556 *
1557 * bs is the BlockDriverState to reopen
1558 * flags are the new open flags
1559 * queue is the reopen queue
1560 *
1561 * Returns 0 on success, non-zero on error. On error errp will be set
1562 * as well.
1563 *
1564 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1565 * It is the responsibility of the caller to then call the abort() or
1566 * commit() for any other BDS that have been left in a prepare() state
1567 *
1568 */
1569int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1570 Error **errp)
1571{
1572 int ret = -1;
1573 Error *local_err = NULL;
1574 BlockDriver *drv;
1575
1576 assert(reopen_state != NULL);
1577 assert(reopen_state->bs->drv != NULL);
1578 drv = reopen_state->bs->drv;
1579
1580 /* if we are to stay read-only, do not allow permission change
1581 * to r/w */
1582 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1583 reopen_state->flags & BDRV_O_RDWR) {
1584 error_set(errp, QERR_DEVICE_IS_READ_ONLY,
1585 reopen_state->bs->device_name);
1586 goto error;
1587 }
1588
1589
1590 ret = bdrv_flush(reopen_state->bs);
1591 if (ret) {
1592 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1593 strerror(-ret));
1594 goto error;
1595 }
1596
1597 if (drv->bdrv_reopen_prepare) {
1598 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1599 if (ret) {
1600 if (local_err != NULL) {
1601 error_propagate(errp, local_err);
1602 } else {
Luiz Capitulinod8b68952013-06-10 11:29:27 -04001603 error_setg(errp, "failed while preparing to reopen image '%s'",
1604 reopen_state->bs->filename);
Jeff Codye971aa12012-09-20 15:13:19 -04001605 }
1606 goto error;
1607 }
1608 } else {
1609 /* It is currently mandatory to have a bdrv_reopen_prepare()
1610 * handler for each supported drv. */
1611 error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
1612 drv->format_name, reopen_state->bs->device_name,
1613 "reopening of file");
1614 ret = -1;
1615 goto error;
1616 }
1617
1618 ret = 0;
1619
1620error:
1621 return ret;
1622}
1623
1624/*
1625 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1626 * makes them final by swapping the staging BlockDriverState contents into
1627 * the active BlockDriverState contents.
1628 */
1629void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1630{
1631 BlockDriver *drv;
1632
1633 assert(reopen_state != NULL);
1634 drv = reopen_state->bs->drv;
1635 assert(drv != NULL);
1636
1637 /* If there are any driver level actions to take */
1638 if (drv->bdrv_reopen_commit) {
1639 drv->bdrv_reopen_commit(reopen_state);
1640 }
1641
1642 /* set BDS specific flags now */
1643 reopen_state->bs->open_flags = reopen_state->flags;
1644 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1645 BDRV_O_CACHE_WB);
1646 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
Kevin Wolf355ef4a2013-12-11 20:14:09 +01001647
1648 bdrv_refresh_limits(reopen_state->bs);
Jeff Codye971aa12012-09-20 15:13:19 -04001649}
1650
1651/*
1652 * Abort the reopen, and delete and free the staged changes in
1653 * reopen_state
1654 */
1655void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1656{
1657 BlockDriver *drv;
1658
1659 assert(reopen_state != NULL);
1660 drv = reopen_state->bs->drv;
1661 assert(drv != NULL);
1662
1663 if (drv->bdrv_reopen_abort) {
1664 drv->bdrv_reopen_abort(reopen_state);
1665 }
1666}
1667
1668
bellardfc01f7e2003-06-30 10:03:06 +00001669void bdrv_close(BlockDriverState *bs)
1670{
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001671 if (bs->job) {
1672 block_job_cancel_sync(bs->job);
1673 }
Stefan Hajnoczi58fda172013-07-02 15:36:25 +02001674 bdrv_drain_all(); /* complete I/O */
1675 bdrv_flush(bs);
1676 bdrv_drain_all(); /* in case flush left pending I/O */
Paolo Bonzinid7d512f2012-08-23 11:20:36 +02001677 notifier_list_notify(&bs->close_notifiers, bs);
Kevin Wolf7094f122012-04-11 11:06:37 +02001678
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001679 if (bs->drv) {
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001680 if (bs->backing_hd) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001681 bdrv_unref(bs->backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001682 bs->backing_hd = NULL;
1683 }
bellardea2384d2004-08-01 21:59:26 +00001684 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -05001685 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +00001686#ifdef _WIN32
1687 if (bs->is_temporary) {
1688 unlink(bs->filename);
1689 }
bellard67b915a2004-03-31 23:37:16 +00001690#endif
bellardea2384d2004-08-01 21:59:26 +00001691 bs->opaque = NULL;
1692 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001693 bs->copy_on_read = 0;
Paolo Bonzinia275fa42012-05-08 16:51:43 +02001694 bs->backing_file[0] = '\0';
1695 bs->backing_format[0] = '\0';
Paolo Bonzini64058752012-05-08 16:51:49 +02001696 bs->total_sectors = 0;
1697 bs->encrypted = 0;
1698 bs->valid_key = 0;
1699 bs->sg = 0;
1700 bs->growable = 0;
Asias He0d51b4d2013-08-22 15:24:14 +08001701 bs->zero_beyond_eof = false;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001702 QDECREF(bs->options);
1703 bs->options = NULL;
bellardb3380822004-03-14 21:38:54 +00001704
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001705 if (bs->file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001706 bdrv_unref(bs->file);
Paolo Bonzini0ac93772012-05-08 16:51:44 +02001707 bs->file = NULL;
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001708 }
bellardb3380822004-03-14 21:38:54 +00001709 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001710
Pavel Hrdina9ca11152012-08-09 12:44:48 +02001711 bdrv_dev_change_media_cb(bs, false);
1712
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001713 /*throttling disk I/O limits*/
1714 if (bs->io_limits_enabled) {
1715 bdrv_io_limits_disable(bs);
1716 }
bellardb3380822004-03-14 21:38:54 +00001717}
1718
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001719void bdrv_close_all(void)
1720{
1721 BlockDriverState *bs;
1722
Benoît Canetdc364f42014-01-23 21:31:32 +01001723 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001724 bdrv_close(bs);
1725 }
1726}
1727
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001728/* Check if any requests are in-flight (including throttled requests) */
1729static bool bdrv_requests_pending(BlockDriverState *bs)
1730{
1731 if (!QLIST_EMPTY(&bs->tracked_requests)) {
1732 return true;
1733 }
Benoît Canetcc0681c2013-09-02 14:14:39 +02001734 if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
1735 return true;
1736 }
1737 if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001738 return true;
1739 }
1740 if (bs->file && bdrv_requests_pending(bs->file)) {
1741 return true;
1742 }
1743 if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
1744 return true;
1745 }
1746 return false;
1747}
1748
1749static bool bdrv_requests_pending_all(void)
1750{
1751 BlockDriverState *bs;
Benoît Canetdc364f42014-01-23 21:31:32 +01001752 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001753 if (bdrv_requests_pending(bs)) {
1754 return true;
1755 }
1756 }
1757 return false;
1758}
1759
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001760/*
1761 * Wait for pending requests to complete across all BlockDriverStates
1762 *
1763 * This function does not flush data to disk, use bdrv_flush_all() for that
1764 * after calling this function.
Zhi Yong Wu4c355d52012-04-12 14:00:57 +02001765 *
1766 * Note that completion of an asynchronous I/O operation can trigger any
1767 * number of other I/O operations on other devices---for example a coroutine
1768 * can be arbitrarily complex and a constant flow of I/O can come until the
1769 * coroutine is complete. Because of this, it is not possible to have a
1770 * function to drain a single device's I/O queue.
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001771 */
1772void bdrv_drain_all(void)
1773{
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001774 /* Always run first iteration so any pending completion BHs run */
1775 bool busy = true;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001776 BlockDriverState *bs;
1777
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001778 while (busy) {
Benoît Canetdc364f42014-01-23 21:31:32 +01001779 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczi0b06ef32013-11-26 16:18:00 +01001780 bdrv_start_throttled_reqs(bs);
Zhi Yong Wu4c355d52012-04-12 14:00:57 +02001781 }
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001782
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001783 busy = bdrv_requests_pending_all();
1784 busy |= aio_poll(qemu_get_aio_context(), busy);
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001785 }
1786}
1787
Benoît Canetdc364f42014-01-23 21:31:32 +01001788/* make a BlockDriverState anonymous by removing from bdrv_state and
1789 * graph_bdrv_state list.
Ryan Harperd22b2f42011-03-29 20:51:47 -05001790 Also, NULL terminate the device_name to prevent double remove */
1791void bdrv_make_anon(BlockDriverState *bs)
1792{
1793 if (bs->device_name[0] != '\0') {
Benoît Canetdc364f42014-01-23 21:31:32 +01001794 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
Ryan Harperd22b2f42011-03-29 20:51:47 -05001795 }
1796 bs->device_name[0] = '\0';
Benoît Canetdc364f42014-01-23 21:31:32 +01001797 if (bs->node_name[0] != '\0') {
1798 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1799 }
1800 bs->node_name[0] = '\0';
Ryan Harperd22b2f42011-03-29 20:51:47 -05001801}
1802
Paolo Bonzinie023b2e2012-05-08 16:51:41 +02001803static void bdrv_rebind(BlockDriverState *bs)
1804{
1805 if (bs->drv && bs->drv->bdrv_rebind) {
1806 bs->drv->bdrv_rebind(bs);
1807 }
1808}
1809
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001810static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1811 BlockDriverState *bs_src)
1812{
1813 /* move some fields that need to stay attached to the device */
1814 bs_dest->open_flags = bs_src->open_flags;
1815
1816 /* dev info */
1817 bs_dest->dev_ops = bs_src->dev_ops;
1818 bs_dest->dev_opaque = bs_src->dev_opaque;
1819 bs_dest->dev = bs_src->dev;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01001820 bs_dest->guest_block_size = bs_src->guest_block_size;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001821 bs_dest->copy_on_read = bs_src->copy_on_read;
1822
1823 bs_dest->enable_write_cache = bs_src->enable_write_cache;
1824
Benoît Canetcc0681c2013-09-02 14:14:39 +02001825 /* i/o throttled req */
1826 memcpy(&bs_dest->throttle_state,
1827 &bs_src->throttle_state,
1828 sizeof(ThrottleState));
1829 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
1830 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001831 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
1832
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001833 /* r/w error */
1834 bs_dest->on_read_error = bs_src->on_read_error;
1835 bs_dest->on_write_error = bs_src->on_write_error;
1836
1837 /* i/o status */
1838 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
1839 bs_dest->iostatus = bs_src->iostatus;
1840
1841 /* dirty bitmap */
Fam Zhenge4654d22013-11-13 18:29:43 +08001842 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001843
Fam Zheng9fcb0252013-08-23 09:14:46 +08001844 /* reference count */
1845 bs_dest->refcnt = bs_src->refcnt;
1846
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001847 /* job */
1848 bs_dest->in_use = bs_src->in_use;
1849 bs_dest->job = bs_src->job;
1850
1851 /* keep the same entry in bdrv_states */
1852 pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name),
1853 bs_src->device_name);
Benoît Canetdc364f42014-01-23 21:31:32 +01001854 bs_dest->device_list = bs_src->device_list;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001855}
1856
1857/*
1858 * Swap bs contents for two image chains while they are live,
1859 * while keeping required fields on the BlockDriverState that is
1860 * actually attached to a device.
1861 *
1862 * This will modify the BlockDriverState fields, and swap contents
1863 * between bs_new and bs_old. Both bs_new and bs_old are modified.
1864 *
1865 * bs_new is required to be anonymous.
1866 *
1867 * This function does not create any image files.
1868 */
1869void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
1870{
1871 BlockDriverState tmp;
1872
Benoît Canet90ce8a02014-03-05 23:48:29 +01001873 /* The code needs to swap the node_name but simply swapping node_list won't
1874 * work so first remove the nodes from the graph list, do the swap then
1875 * insert them back if needed.
1876 */
1877 if (bs_new->node_name[0] != '\0') {
1878 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
1879 }
1880 if (bs_old->node_name[0] != '\0') {
1881 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
1882 }
1883
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001884 /* bs_new must be anonymous and shouldn't have anything fancy enabled */
1885 assert(bs_new->device_name[0] == '\0');
Fam Zhenge4654d22013-11-13 18:29:43 +08001886 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001887 assert(bs_new->job == NULL);
1888 assert(bs_new->dev == NULL);
1889 assert(bs_new->in_use == 0);
1890 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02001891 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001892
1893 tmp = *bs_new;
1894 *bs_new = *bs_old;
1895 *bs_old = tmp;
1896
1897 /* there are some fields that should not be swapped, move them back */
1898 bdrv_move_feature_fields(&tmp, bs_old);
1899 bdrv_move_feature_fields(bs_old, bs_new);
1900 bdrv_move_feature_fields(bs_new, &tmp);
1901
1902 /* bs_new shouldn't be in bdrv_states even after the swap! */
1903 assert(bs_new->device_name[0] == '\0');
1904
1905 /* Check a few fields that should remain attached to the device */
1906 assert(bs_new->dev == NULL);
1907 assert(bs_new->job == NULL);
1908 assert(bs_new->in_use == 0);
1909 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02001910 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001911
Benoît Canet90ce8a02014-03-05 23:48:29 +01001912 /* insert the nodes back into the graph node list if needed */
1913 if (bs_new->node_name[0] != '\0') {
1914 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
1915 }
1916 if (bs_old->node_name[0] != '\0') {
1917 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
1918 }
1919
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001920 bdrv_rebind(bs_new);
1921 bdrv_rebind(bs_old);
1922}
1923
Jeff Cody8802d1f2012-02-28 15:54:06 -05001924/*
1925 * Add new bs contents at the top of an image chain while the chain is
1926 * live, while keeping required fields on the top layer.
1927 *
1928 * This will modify the BlockDriverState fields, and swap contents
1929 * between bs_new and bs_top. Both bs_new and bs_top are modified.
1930 *
Jeff Codyf6801b82012-03-27 16:30:19 -04001931 * bs_new is required to be anonymous.
1932 *
Jeff Cody8802d1f2012-02-28 15:54:06 -05001933 * This function does not create any image files.
1934 */
1935void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
1936{
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001937 bdrv_swap(bs_new, bs_top);
Jeff Cody8802d1f2012-02-28 15:54:06 -05001938
1939 /* The contents of 'tmp' will become bs_top, as we are
1940 * swapping bs_new and bs_top contents. */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001941 bs_top->backing_hd = bs_new;
1942 bs_top->open_flags &= ~BDRV_O_NO_BACKING;
1943 pstrcpy(bs_top->backing_file, sizeof(bs_top->backing_file),
1944 bs_new->filename);
1945 pstrcpy(bs_top->backing_format, sizeof(bs_top->backing_format),
1946 bs_new->drv ? bs_new->drv->format_name : "");
Jeff Cody8802d1f2012-02-28 15:54:06 -05001947}
1948
Fam Zheng4f6fd342013-08-23 09:14:47 +08001949static void bdrv_delete(BlockDriverState *bs)
bellardb3380822004-03-14 21:38:54 +00001950{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001951 assert(!bs->dev);
Paolo Bonzini3e914652012-03-30 13:17:11 +02001952 assert(!bs->job);
1953 assert(!bs->in_use);
Fam Zheng4f6fd342013-08-23 09:14:47 +08001954 assert(!bs->refcnt);
Fam Zhenge4654d22013-11-13 18:29:43 +08001955 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Markus Armbruster18846de2010-06-29 16:58:30 +02001956
Stefan Hajnoczie1b5c522013-06-27 15:32:26 +02001957 bdrv_close(bs);
1958
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01001959 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -05001960 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +00001961
Anthony Liguori7267c092011-08-20 22:09:37 -05001962 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +00001963}
1964
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001965int bdrv_attach_dev(BlockDriverState *bs, void *dev)
1966/* TODO change to DeviceState *dev when all users are qdevified */
Markus Armbruster18846de2010-06-29 16:58:30 +02001967{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001968 if (bs->dev) {
Markus Armbruster18846de2010-06-29 16:58:30 +02001969 return -EBUSY;
1970 }
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001971 bs->dev = dev;
Luiz Capitulino28a72822011-09-26 17:43:50 -03001972 bdrv_iostatus_reset(bs);
Markus Armbruster18846de2010-06-29 16:58:30 +02001973 return 0;
1974}
1975
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001976/* TODO qdevified devices don't use this, remove when devices are qdevified */
1977void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
Markus Armbruster18846de2010-06-29 16:58:30 +02001978{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001979 if (bdrv_attach_dev(bs, dev) < 0) {
1980 abort();
1981 }
1982}
1983
1984void bdrv_detach_dev(BlockDriverState *bs, void *dev)
1985/* TODO change to DeviceState *dev when all users are qdevified */
1986{
1987 assert(bs->dev == dev);
1988 bs->dev = NULL;
Markus Armbruster0e49de52011-08-03 15:07:41 +02001989 bs->dev_ops = NULL;
1990 bs->dev_opaque = NULL;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01001991 bs->guest_block_size = 512;
Markus Armbruster18846de2010-06-29 16:58:30 +02001992}
1993
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001994/* TODO change to return DeviceState * when all users are qdevified */
1995void *bdrv_get_attached_dev(BlockDriverState *bs)
Markus Armbruster18846de2010-06-29 16:58:30 +02001996{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001997 return bs->dev;
Markus Armbruster18846de2010-06-29 16:58:30 +02001998}
1999
Markus Armbruster0e49de52011-08-03 15:07:41 +02002000void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
2001 void *opaque)
2002{
2003 bs->dev_ops = ops;
2004 bs->dev_opaque = opaque;
2005}
2006
Paolo Bonzini32c81a42012-09-28 17:22:58 +02002007void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
2008 enum MonitorEvent ev,
2009 BlockErrorAction action, bool is_read)
Luiz Capitulino329c0a42012-01-25 16:59:43 -02002010{
2011 QObject *data;
2012 const char *action_str;
2013
2014 switch (action) {
2015 case BDRV_ACTION_REPORT:
2016 action_str = "report";
2017 break;
2018 case BDRV_ACTION_IGNORE:
2019 action_str = "ignore";
2020 break;
2021 case BDRV_ACTION_STOP:
2022 action_str = "stop";
2023 break;
2024 default:
2025 abort();
2026 }
2027
2028 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
2029 bdrv->device_name,
2030 action_str,
2031 is_read ? "read" : "write");
Paolo Bonzini32c81a42012-09-28 17:22:58 +02002032 monitor_protocol_event(ev, data);
Luiz Capitulino329c0a42012-01-25 16:59:43 -02002033
2034 qobject_decref(data);
2035}
2036
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02002037static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
2038{
2039 QObject *data;
2040
2041 data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
2042 bdrv_get_device_name(bs), ejected);
2043 monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
2044
2045 qobject_decref(data);
2046}
2047
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002048static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
Markus Armbruster0e49de52011-08-03 15:07:41 +02002049{
Markus Armbruster145feb12011-08-03 15:07:42 +02002050 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02002051 bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002052 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02002053 if (tray_was_closed) {
2054 /* tray open */
2055 bdrv_emit_qmp_eject_event(bs, true);
2056 }
2057 if (load) {
2058 /* tray close */
2059 bdrv_emit_qmp_eject_event(bs, false);
2060 }
Markus Armbruster145feb12011-08-03 15:07:42 +02002061 }
2062}
2063
Markus Armbruster2c6942f2011-09-06 18:58:51 +02002064bool bdrv_dev_has_removable_media(BlockDriverState *bs)
2065{
2066 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
2067}
2068
Paolo Bonzini025ccaa2011-11-07 17:50:13 +01002069void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
2070{
2071 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
2072 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
2073 }
2074}
2075
Markus Armbrustere4def802011-09-06 18:58:53 +02002076bool bdrv_dev_is_tray_open(BlockDriverState *bs)
2077{
2078 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
2079 return bs->dev_ops->is_tray_open(bs->dev_opaque);
2080 }
2081 return false;
2082}
2083
Markus Armbruster145feb12011-08-03 15:07:42 +02002084static void bdrv_dev_resize_cb(BlockDriverState *bs)
2085{
2086 if (bs->dev_ops && bs->dev_ops->resize_cb) {
2087 bs->dev_ops->resize_cb(bs->dev_opaque);
Markus Armbruster0e49de52011-08-03 15:07:41 +02002088 }
2089}
2090
Markus Armbrusterf1076392011-09-06 18:58:46 +02002091bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
2092{
2093 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
2094 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
2095 }
2096 return false;
2097}
2098
aliguorie97fc192009-04-21 23:11:50 +00002099/*
2100 * Run consistency checks on an image
2101 *
Kevin Wolfe076f332010-06-29 11:43:13 +02002102 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +02002103 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +02002104 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +00002105 */
Kevin Wolf4534ff52012-05-11 16:07:02 +02002106int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
aliguorie97fc192009-04-21 23:11:50 +00002107{
2108 if (bs->drv->bdrv_check == NULL) {
2109 return -ENOTSUP;
2110 }
2111
Kevin Wolfe076f332010-06-29 11:43:13 +02002112 memset(res, 0, sizeof(*res));
Kevin Wolf4534ff52012-05-11 16:07:02 +02002113 return bs->drv->bdrv_check(bs, res, fix);
aliguorie97fc192009-04-21 23:11:50 +00002114}
2115
Kevin Wolf8a426612010-07-16 17:17:01 +02002116#define COMMIT_BUF_SECTORS 2048
2117
bellard33e39632003-07-06 17:15:21 +00002118/* commit COW file into the raw image */
2119int bdrv_commit(BlockDriverState *bs)
2120{
bellard19cb3732006-08-19 11:45:59 +00002121 BlockDriver *drv = bs->drv;
Jeff Cody72706ea2014-01-24 09:02:35 -05002122 int64_t sector, total_sectors, length, backing_length;
Kevin Wolf8a426612010-07-16 17:17:01 +02002123 int n, ro, open_flags;
Jeff Cody0bce5972012-09-20 15:13:34 -04002124 int ret = 0;
Jeff Cody72706ea2014-01-24 09:02:35 -05002125 uint8_t *buf = NULL;
Jim Meyeringc2cba3d2012-10-04 13:09:46 +02002126 char filename[PATH_MAX];
bellard33e39632003-07-06 17:15:21 +00002127
bellard19cb3732006-08-19 11:45:59 +00002128 if (!drv)
2129 return -ENOMEDIUM;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002130
2131 if (!bs->backing_hd) {
2132 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +00002133 }
2134
Stefan Hajnoczi2d3735d2012-01-18 14:40:41 +00002135 if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
2136 return -EBUSY;
2137 }
2138
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002139 ro = bs->backing_hd->read_only;
Jim Meyeringc2cba3d2012-10-04 13:09:46 +02002140 /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */
2141 pstrcpy(filename, sizeof(filename), bs->backing_hd->filename);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002142 open_flags = bs->backing_hd->open_flags;
2143
2144 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002145 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2146 return -EACCES;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002147 }
bellard33e39632003-07-06 17:15:21 +00002148 }
bellardea2384d2004-08-01 21:59:26 +00002149
Jeff Cody72706ea2014-01-24 09:02:35 -05002150 length = bdrv_getlength(bs);
2151 if (length < 0) {
2152 ret = length;
2153 goto ro_cleanup;
2154 }
2155
2156 backing_length = bdrv_getlength(bs->backing_hd);
2157 if (backing_length < 0) {
2158 ret = backing_length;
2159 goto ro_cleanup;
2160 }
2161
2162 /* If our top snapshot is larger than the backing file image,
2163 * grow the backing file image if possible. If not possible,
2164 * we must return an error */
2165 if (length > backing_length) {
2166 ret = bdrv_truncate(bs->backing_hd, length);
2167 if (ret < 0) {
2168 goto ro_cleanup;
2169 }
2170 }
2171
2172 total_sectors = length >> BDRV_SECTOR_BITS;
Anthony Liguori7267c092011-08-20 22:09:37 -05002173 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
bellardea2384d2004-08-01 21:59:26 +00002174
Kevin Wolf8a426612010-07-16 17:17:01 +02002175 for (sector = 0; sector < total_sectors; sector += n) {
Paolo Bonzinid6636402013-09-04 19:00:25 +02002176 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2177 if (ret < 0) {
2178 goto ro_cleanup;
2179 }
2180 if (ret) {
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002181 ret = bdrv_read(bs, sector, buf, n);
2182 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002183 goto ro_cleanup;
2184 }
2185
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002186 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2187 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002188 goto ro_cleanup;
2189 }
bellardea2384d2004-08-01 21:59:26 +00002190 }
2191 }
bellard95389c82005-12-18 18:28:15 +00002192
Christoph Hellwig1d449522010-01-17 12:32:30 +01002193 if (drv->bdrv_make_empty) {
2194 ret = drv->bdrv_make_empty(bs);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002195 if (ret < 0) {
2196 goto ro_cleanup;
2197 }
Christoph Hellwig1d449522010-01-17 12:32:30 +01002198 bdrv_flush(bs);
2199 }
bellard95389c82005-12-18 18:28:15 +00002200
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002201 /*
2202 * Make sure all data we wrote to the backing device is actually
2203 * stable on disk.
2204 */
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002205 if (bs->backing_hd) {
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002206 bdrv_flush(bs->backing_hd);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002207 }
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002208
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002209 ret = 0;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002210ro_cleanup:
Anthony Liguori7267c092011-08-20 22:09:37 -05002211 g_free(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002212
2213 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002214 /* ignoring error return here */
2215 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002216 }
2217
Christoph Hellwig1d449522010-01-17 12:32:30 +01002218 return ret;
bellard33e39632003-07-06 17:15:21 +00002219}
2220
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002221int bdrv_commit_all(void)
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002222{
2223 BlockDriverState *bs;
2224
Benoît Canetdc364f42014-01-23 21:31:32 +01002225 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Jeff Cody272d2d82013-02-26 09:55:48 -05002226 if (bs->drv && bs->backing_hd) {
2227 int ret = bdrv_commit(bs);
2228 if (ret < 0) {
2229 return ret;
2230 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002231 }
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002232 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002233 return 0;
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002234}
2235
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002236/**
2237 * Remove an active request from the tracked requests list
2238 *
2239 * This function should be called when a tracked request is completing.
2240 */
2241static void tracked_request_end(BdrvTrackedRequest *req)
2242{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002243 if (req->serialising) {
2244 req->bs->serialising_in_flight--;
2245 }
2246
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002247 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002248 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002249}
2250
2251/**
2252 * Add an active request to the tracked requests list
2253 */
2254static void tracked_request_begin(BdrvTrackedRequest *req,
2255 BlockDriverState *bs,
Kevin Wolf793ed472013-12-03 15:31:25 +01002256 int64_t offset,
2257 unsigned int bytes, bool is_write)
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002258{
2259 *req = (BdrvTrackedRequest){
2260 .bs = bs,
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002261 .offset = offset,
2262 .bytes = bytes,
2263 .is_write = is_write,
2264 .co = qemu_coroutine_self(),
2265 .serialising = false,
Kevin Wolf73271452013-12-04 17:08:50 +01002266 .overlap_offset = offset,
2267 .overlap_bytes = bytes,
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002268 };
2269
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002270 qemu_co_queue_init(&req->wait_queue);
2271
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002272 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
2273}
2274
Kevin Wolfe96126f2014-02-08 10:42:18 +01002275static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002276{
Kevin Wolf73271452013-12-04 17:08:50 +01002277 int64_t overlap_offset = req->offset & ~(align - 1);
Kevin Wolfe96126f2014-02-08 10:42:18 +01002278 unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
2279 - overlap_offset;
Kevin Wolf73271452013-12-04 17:08:50 +01002280
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002281 if (!req->serialising) {
2282 req->bs->serialising_in_flight++;
2283 req->serialising = true;
2284 }
Kevin Wolf73271452013-12-04 17:08:50 +01002285
2286 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
2287 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002288}
2289
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002290/**
2291 * Round a region to cluster boundaries
2292 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01002293void bdrv_round_to_clusters(BlockDriverState *bs,
2294 int64_t sector_num, int nb_sectors,
2295 int64_t *cluster_sector_num,
2296 int *cluster_nb_sectors)
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002297{
2298 BlockDriverInfo bdi;
2299
2300 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
2301 *cluster_sector_num = sector_num;
2302 *cluster_nb_sectors = nb_sectors;
2303 } else {
2304 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
2305 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
2306 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
2307 nb_sectors, c);
2308 }
2309}
2310
Kevin Wolf73271452013-12-04 17:08:50 +01002311static int bdrv_get_cluster_size(BlockDriverState *bs)
Kevin Wolf793ed472013-12-03 15:31:25 +01002312{
2313 BlockDriverInfo bdi;
Kevin Wolf73271452013-12-04 17:08:50 +01002314 int ret;
Kevin Wolf793ed472013-12-03 15:31:25 +01002315
Kevin Wolf73271452013-12-04 17:08:50 +01002316 ret = bdrv_get_info(bs, &bdi);
2317 if (ret < 0 || bdi.cluster_size == 0) {
2318 return bs->request_alignment;
Kevin Wolf793ed472013-12-03 15:31:25 +01002319 } else {
Kevin Wolf73271452013-12-04 17:08:50 +01002320 return bdi.cluster_size;
Kevin Wolf793ed472013-12-03 15:31:25 +01002321 }
2322}
2323
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002324static bool tracked_request_overlaps(BdrvTrackedRequest *req,
Kevin Wolf793ed472013-12-03 15:31:25 +01002325 int64_t offset, unsigned int bytes)
2326{
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002327 /* aaaa bbbb */
Kevin Wolf73271452013-12-04 17:08:50 +01002328 if (offset >= req->overlap_offset + req->overlap_bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002329 return false;
2330 }
2331 /* bbbb aaaa */
Kevin Wolf73271452013-12-04 17:08:50 +01002332 if (req->overlap_offset >= offset + bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002333 return false;
2334 }
2335 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002336}
2337
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002338static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002339{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002340 BlockDriverState *bs = self->bs;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002341 BdrvTrackedRequest *req;
2342 bool retry;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002343 bool waited = false;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002344
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002345 if (!bs->serialising_in_flight) {
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002346 return false;
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002347 }
2348
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002349 do {
2350 retry = false;
2351 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002352 if (req == self || (!req->serialising && !self->serialising)) {
Kevin Wolf65afd212013-12-03 14:55:55 +01002353 continue;
2354 }
Kevin Wolf73271452013-12-04 17:08:50 +01002355 if (tracked_request_overlaps(req, self->overlap_offset,
2356 self->overlap_bytes))
2357 {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00002358 /* Hitting this means there was a reentrant request, for
2359 * example, a block driver issuing nested requests. This must
2360 * never happen since it means deadlock.
2361 */
2362 assert(qemu_coroutine_self() != req->co);
2363
Kevin Wolf64604402013-12-13 13:04:35 +01002364 /* If the request is already (indirectly) waiting for us, or
2365 * will wait for us as soon as it wakes up, then just go on
2366 * (instead of producing a deadlock in the former case). */
2367 if (!req->waiting_for) {
2368 self->waiting_for = req;
2369 qemu_co_queue_wait(&req->wait_queue);
2370 self->waiting_for = NULL;
2371 retry = true;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002372 waited = true;
Kevin Wolf64604402013-12-13 13:04:35 +01002373 break;
2374 }
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002375 }
2376 }
2377 } while (retry);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002378
2379 return waited;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002380}
2381
Kevin Wolf756e6732010-01-12 12:55:17 +01002382/*
2383 * Return values:
2384 * 0 - success
2385 * -EINVAL - backing format specified, but no file
2386 * -ENOSPC - can't update the backing file because no space is left in the
2387 * image file header
2388 * -ENOTSUP - format driver doesn't support changing the backing file
2389 */
2390int bdrv_change_backing_file(BlockDriverState *bs,
2391 const char *backing_file, const char *backing_fmt)
2392{
2393 BlockDriver *drv = bs->drv;
Paolo Bonzini469ef352012-04-12 14:01:02 +02002394 int ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002395
Paolo Bonzini5f377792012-04-12 14:01:01 +02002396 /* Backing file format doesn't make sense without a backing file */
2397 if (backing_fmt && !backing_file) {
2398 return -EINVAL;
2399 }
2400
Kevin Wolf756e6732010-01-12 12:55:17 +01002401 if (drv->bdrv_change_backing_file != NULL) {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002402 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
Kevin Wolf756e6732010-01-12 12:55:17 +01002403 } else {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002404 ret = -ENOTSUP;
Kevin Wolf756e6732010-01-12 12:55:17 +01002405 }
Paolo Bonzini469ef352012-04-12 14:01:02 +02002406
2407 if (ret == 0) {
2408 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2409 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2410 }
2411 return ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002412}
2413
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002414/*
2415 * Finds the image layer in the chain that has 'bs' as its backing file.
2416 *
2417 * active is the current topmost image.
2418 *
2419 * Returns NULL if bs is not found in active's image chain,
2420 * or if active == bs.
2421 */
2422BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2423 BlockDriverState *bs)
2424{
2425 BlockDriverState *overlay = NULL;
2426 BlockDriverState *intermediate;
2427
2428 assert(active != NULL);
2429 assert(bs != NULL);
2430
2431 /* if bs is the same as active, then by definition it has no overlay
2432 */
2433 if (active == bs) {
2434 return NULL;
2435 }
2436
2437 intermediate = active;
2438 while (intermediate->backing_hd) {
2439 if (intermediate->backing_hd == bs) {
2440 overlay = intermediate;
2441 break;
2442 }
2443 intermediate = intermediate->backing_hd;
2444 }
2445
2446 return overlay;
2447}
2448
2449typedef struct BlkIntermediateStates {
2450 BlockDriverState *bs;
2451 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2452} BlkIntermediateStates;
2453
2454
2455/*
2456 * Drops images above 'base' up to and including 'top', and sets the image
2457 * above 'top' to have base as its backing file.
2458 *
2459 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2460 * information in 'bs' can be properly updated.
2461 *
2462 * E.g., this will convert the following chain:
2463 * bottom <- base <- intermediate <- top <- active
2464 *
2465 * to
2466 *
2467 * bottom <- base <- active
2468 *
2469 * It is allowed for bottom==base, in which case it converts:
2470 *
2471 * base <- intermediate <- top <- active
2472 *
2473 * to
2474 *
2475 * base <- active
2476 *
2477 * Error conditions:
2478 * if active == top, that is considered an error
2479 *
2480 */
2481int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2482 BlockDriverState *base)
2483{
2484 BlockDriverState *intermediate;
2485 BlockDriverState *base_bs = NULL;
2486 BlockDriverState *new_top_bs = NULL;
2487 BlkIntermediateStates *intermediate_state, *next;
2488 int ret = -EIO;
2489
2490 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2491 QSIMPLEQ_INIT(&states_to_delete);
2492
2493 if (!top->drv || !base->drv) {
2494 goto exit;
2495 }
2496
2497 new_top_bs = bdrv_find_overlay(active, top);
2498
2499 if (new_top_bs == NULL) {
2500 /* we could not find the image above 'top', this is an error */
2501 goto exit;
2502 }
2503
2504 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2505 * to do, no intermediate images */
2506 if (new_top_bs->backing_hd == base) {
2507 ret = 0;
2508 goto exit;
2509 }
2510
2511 intermediate = top;
2512
2513 /* now we will go down through the list, and add each BDS we find
2514 * into our deletion queue, until we hit the 'base'
2515 */
2516 while (intermediate) {
2517 intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
2518 intermediate_state->bs = intermediate;
2519 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2520
2521 if (intermediate->backing_hd == base) {
2522 base_bs = intermediate->backing_hd;
2523 break;
2524 }
2525 intermediate = intermediate->backing_hd;
2526 }
2527 if (base_bs == NULL) {
2528 /* something went wrong, we did not end at the base. safely
2529 * unravel everything, and exit with error */
2530 goto exit;
2531 }
2532
2533 /* success - we can delete the intermediate states, and link top->base */
2534 ret = bdrv_change_backing_file(new_top_bs, base_bs->filename,
2535 base_bs->drv ? base_bs->drv->format_name : "");
2536 if (ret) {
2537 goto exit;
2538 }
2539 new_top_bs->backing_hd = base_bs;
2540
Kevin Wolf355ef4a2013-12-11 20:14:09 +01002541 bdrv_refresh_limits(new_top_bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002542
2543 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2544 /* so that bdrv_close() does not recursively close the chain */
2545 intermediate_state->bs->backing_hd = NULL;
Fam Zheng4f6fd342013-08-23 09:14:47 +08002546 bdrv_unref(intermediate_state->bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002547 }
2548 ret = 0;
2549
2550exit:
2551 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2552 g_free(intermediate_state);
2553 }
2554 return ret;
2555}
2556
2557
aliguori71d07702009-03-03 17:37:16 +00002558static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
2559 size_t size)
2560{
2561 int64_t len;
2562
2563 if (!bdrv_is_inserted(bs))
2564 return -ENOMEDIUM;
2565
2566 if (bs->growable)
2567 return 0;
2568
2569 len = bdrv_getlength(bs);
2570
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002571 if (offset < 0)
2572 return -EIO;
2573
2574 if ((offset > len) || (len - offset < size))
aliguori71d07702009-03-03 17:37:16 +00002575 return -EIO;
2576
2577 return 0;
2578}
2579
2580static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
2581 int nb_sectors)
2582{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02002583 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
2584 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00002585}
2586
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002587typedef struct RwCo {
2588 BlockDriverState *bs;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002589 int64_t offset;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002590 QEMUIOVector *qiov;
2591 bool is_write;
2592 int ret;
Peter Lieven4105eaa2013-07-11 14:16:22 +02002593 BdrvRequestFlags flags;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002594} RwCo;
2595
2596static void coroutine_fn bdrv_rw_co_entry(void *opaque)
2597{
2598 RwCo *rwco = opaque;
2599
2600 if (!rwco->is_write) {
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002601 rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
2602 rwco->qiov->size, rwco->qiov,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002603 rwco->flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002604 } else {
2605 rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
2606 rwco->qiov->size, rwco->qiov,
2607 rwco->flags);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002608 }
2609}
2610
2611/*
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002612 * Process a vectored synchronous request using coroutines
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002613 */
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002614static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
2615 QEMUIOVector *qiov, bool is_write,
2616 BdrvRequestFlags flags)
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002617{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002618 Coroutine *co;
2619 RwCo rwco = {
2620 .bs = bs,
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002621 .offset = offset,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002622 .qiov = qiov,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002623 .is_write = is_write,
2624 .ret = NOT_DONE,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002625 .flags = flags,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002626 };
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002627
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002628 /**
2629 * In sync call context, when the vcpu is blocked, this throttling timer
2630 * will not fire; so the I/O throttling function has to be disabled here
2631 * if it has been enabled.
2632 */
2633 if (bs->io_limits_enabled) {
2634 fprintf(stderr, "Disabling I/O throttling on '%s' due "
2635 "to synchronous I/O.\n", bdrv_get_device_name(bs));
2636 bdrv_io_limits_disable(bs);
2637 }
2638
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002639 if (qemu_in_coroutine()) {
2640 /* Fast-path if already in coroutine context */
2641 bdrv_rw_co_entry(&rwco);
2642 } else {
2643 co = qemu_coroutine_create(bdrv_rw_co_entry);
2644 qemu_coroutine_enter(co, &rwco);
2645 while (rwco.ret == NOT_DONE) {
2646 qemu_aio_wait();
2647 }
2648 }
2649 return rwco.ret;
2650}
2651
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002652/*
2653 * Process a synchronous request using coroutines
2654 */
2655static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002656 int nb_sectors, bool is_write, BdrvRequestFlags flags)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002657{
2658 QEMUIOVector qiov;
2659 struct iovec iov = {
2660 .iov_base = (void *)buf,
2661 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
2662 };
2663
2664 qemu_iovec_init_external(&qiov, &iov, 1);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002665 return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
2666 &qiov, is_write, flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002667}
2668
bellard19cb3732006-08-19 11:45:59 +00002669/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00002670int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002671 uint8_t *buf, int nb_sectors)
2672{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002673 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
bellardfc01f7e2003-06-30 10:03:06 +00002674}
2675
Markus Armbruster07d27a42012-06-29 17:34:29 +02002676/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
2677int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
2678 uint8_t *buf, int nb_sectors)
2679{
2680 bool enabled;
2681 int ret;
2682
2683 enabled = bs->io_limits_enabled;
2684 bs->io_limits_enabled = false;
Peter Lieven4e7395e2013-07-18 10:37:32 +02002685 ret = bdrv_read(bs, sector_num, buf, nb_sectors);
Markus Armbruster07d27a42012-06-29 17:34:29 +02002686 bs->io_limits_enabled = enabled;
2687 return ret;
2688}
2689
ths5fafdf22007-09-16 21:08:06 +00002690/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00002691 -EIO generic I/O error (may happen for all errors)
2692 -ENOMEDIUM No media inserted.
2693 -EINVAL Invalid sector number or nb_sectors
2694 -EACCES Trying to write a read-only device
2695*/
ths5fafdf22007-09-16 21:08:06 +00002696int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002697 const uint8_t *buf, int nb_sectors)
2698{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002699 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
bellard83f64092006-08-01 16:21:11 +00002700}
2701
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002702int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
2703 int nb_sectors, BdrvRequestFlags flags)
Peter Lieven4105eaa2013-07-11 14:16:22 +02002704{
2705 return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002706 BDRV_REQ_ZERO_WRITE | flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002707}
2708
Peter Lievend75cbb52013-10-24 12:07:03 +02002709/*
2710 * Completely zero out a block device with the help of bdrv_write_zeroes.
2711 * The operation is sped up by checking the block status and only writing
2712 * zeroes to the device if they currently do not return zeroes. Optional
2713 * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2714 *
2715 * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2716 */
2717int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
2718{
2719 int64_t target_size = bdrv_getlength(bs) / BDRV_SECTOR_SIZE;
2720 int64_t ret, nb_sectors, sector_num = 0;
2721 int n;
2722
2723 for (;;) {
2724 nb_sectors = target_size - sector_num;
2725 if (nb_sectors <= 0) {
2726 return 0;
2727 }
2728 if (nb_sectors > INT_MAX) {
2729 nb_sectors = INT_MAX;
2730 }
2731 ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
Peter Lieven3d94ce62013-12-12 13:57:05 +01002732 if (ret < 0) {
2733 error_report("error getting block status at sector %" PRId64 ": %s",
2734 sector_num, strerror(-ret));
2735 return ret;
2736 }
Peter Lievend75cbb52013-10-24 12:07:03 +02002737 if (ret & BDRV_BLOCK_ZERO) {
2738 sector_num += n;
2739 continue;
2740 }
2741 ret = bdrv_write_zeroes(bs, sector_num, n, flags);
2742 if (ret < 0) {
2743 error_report("error writing zeroes at sector %" PRId64 ": %s",
2744 sector_num, strerror(-ret));
2745 return ret;
2746 }
2747 sector_num += n;
2748 }
2749}
2750
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002751int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
bellard83f64092006-08-01 16:21:11 +00002752{
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002753 QEMUIOVector qiov;
2754 struct iovec iov = {
2755 .iov_base = (void *)buf,
2756 .iov_len = bytes,
2757 };
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002758 int ret;
bellard83f64092006-08-01 16:21:11 +00002759
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002760 if (bytes < 0) {
2761 return -EINVAL;
bellard83f64092006-08-01 16:21:11 +00002762 }
2763
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002764 qemu_iovec_init_external(&qiov, &iov, 1);
2765 ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
2766 if (ret < 0) {
2767 return ret;
bellard83f64092006-08-01 16:21:11 +00002768 }
2769
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002770 return bytes;
bellard83f64092006-08-01 16:21:11 +00002771}
2772
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002773int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
bellard83f64092006-08-01 16:21:11 +00002774{
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002775 int ret;
bellard83f64092006-08-01 16:21:11 +00002776
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002777 ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
2778 if (ret < 0) {
2779 return ret;
bellard83f64092006-08-01 16:21:11 +00002780 }
2781
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002782 return qiov->size;
2783}
2784
2785int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002786 const void *buf, int bytes)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002787{
2788 QEMUIOVector qiov;
2789 struct iovec iov = {
2790 .iov_base = (void *) buf,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002791 .iov_len = bytes,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002792 };
2793
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002794 if (bytes < 0) {
2795 return -EINVAL;
2796 }
2797
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002798 qemu_iovec_init_external(&qiov, &iov, 1);
2799 return bdrv_pwritev(bs, offset, &qiov);
bellard83f64092006-08-01 16:21:11 +00002800}
bellard83f64092006-08-01 16:21:11 +00002801
Kevin Wolff08145f2010-06-16 16:38:15 +02002802/*
2803 * Writes to the file and ensures that no writes are reordered across this
2804 * request (acts as a barrier)
2805 *
2806 * Returns 0 on success, -errno in error cases.
2807 */
2808int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2809 const void *buf, int count)
2810{
2811 int ret;
2812
2813 ret = bdrv_pwrite(bs, offset, buf, count);
2814 if (ret < 0) {
2815 return ret;
2816 }
2817
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02002818 /* No flush needed for cache modes that already do it */
2819 if (bs->enable_write_cache) {
Kevin Wolff08145f2010-06-16 16:38:15 +02002820 bdrv_flush(bs);
2821 }
2822
2823 return 0;
2824}
2825
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002826static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
Stefan Hajnocziab185922011-11-17 13:40:31 +00002827 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2828{
2829 /* Perform I/O through a temporary buffer so that users who scribble over
2830 * their read buffer while the operation is in progress do not end up
2831 * modifying the image file. This is critical for zero-copy guest I/O
2832 * where anything might happen inside guest memory.
2833 */
2834 void *bounce_buffer;
2835
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002836 BlockDriver *drv = bs->drv;
Stefan Hajnocziab185922011-11-17 13:40:31 +00002837 struct iovec iov;
2838 QEMUIOVector bounce_qiov;
2839 int64_t cluster_sector_num;
2840 int cluster_nb_sectors;
2841 size_t skip_bytes;
2842 int ret;
2843
2844 /* Cover entire cluster so no additional backing file I/O is required when
2845 * allocating cluster in the image file.
2846 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01002847 bdrv_round_to_clusters(bs, sector_num, nb_sectors,
2848 &cluster_sector_num, &cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002849
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002850 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
2851 cluster_sector_num, cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002852
2853 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
2854 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
2855 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
2856
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002857 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
2858 &bounce_qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002859 if (ret < 0) {
2860 goto err;
2861 }
2862
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002863 if (drv->bdrv_co_write_zeroes &&
2864 buffer_is_zero(bounce_buffer, iov.iov_len)) {
Kevin Wolf621f0582012-03-20 15:12:58 +01002865 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002866 cluster_nb_sectors, 0);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002867 } else {
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02002868 /* This does not change the data on the disk, it is not necessary
2869 * to flush even in cache=writethrough mode.
2870 */
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002871 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
Stefan Hajnocziab185922011-11-17 13:40:31 +00002872 &bounce_qiov);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002873 }
2874
Stefan Hajnocziab185922011-11-17 13:40:31 +00002875 if (ret < 0) {
2876 /* It might be okay to ignore write errors for guest requests. If this
2877 * is a deliberate copy-on-read then we don't want to ignore the error.
2878 * Simply report it in all cases.
2879 */
2880 goto err;
2881 }
2882
2883 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
Michael Tokarev03396142012-06-07 20:17:55 +04002884 qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
2885 nb_sectors * BDRV_SECTOR_SIZE);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002886
2887err:
2888 qemu_vfree(bounce_buffer);
2889 return ret;
2890}
2891
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01002892/*
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002893 * Forwards an already correctly aligned request to the BlockDriver. This
2894 * handles copy on read and zeroing after EOF; any other features must be
2895 * implemented by the caller.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01002896 */
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002897static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01002898 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
Kevin Wolfec746e12013-12-04 12:13:10 +01002899 int64_t align, QEMUIOVector *qiov, int flags)
Kevin Wolfda1fa912011-07-14 17:27:13 +02002900{
2901 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002902 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02002903
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002904 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
2905 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02002906
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002907 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
2908 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
2909
2910 /* Handle Copy on Read and associated serialisation */
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002911 if (flags & BDRV_REQ_COPY_ON_READ) {
Kevin Wolf73271452013-12-04 17:08:50 +01002912 /* If we touch the same cluster it counts as an overlap. This
2913 * guarantees that allocating writes will be serialized and not race
2914 * with each other for the same cluster. For example, in copy-on-read
2915 * it ensures that the CoR read and write operations are atomic and
2916 * guest writes cannot interleave between them. */
2917 mark_request_serialising(req, bdrv_get_cluster_size(bs));
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002918 }
2919
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002920 wait_serialising_requests(req);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002921
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002922 if (flags & BDRV_REQ_COPY_ON_READ) {
Stefan Hajnocziab185922011-11-17 13:40:31 +00002923 int pnum;
2924
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02002925 ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002926 if (ret < 0) {
2927 goto out;
2928 }
2929
2930 if (!ret || pnum != nb_sectors) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002931 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002932 goto out;
2933 }
2934 }
2935
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002936 /* Forward the request to the BlockDriver */
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08002937 if (!(bs->zero_beyond_eof && bs->growable)) {
2938 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
2939 } else {
2940 /* Read zeros after EOF of growable BDSes */
2941 int64_t len, total_sectors, max_nb_sectors;
2942
2943 len = bdrv_getlength(bs);
2944 if (len < 0) {
2945 ret = len;
2946 goto out;
2947 }
2948
Fam Zhengd055a1f2013-09-26 19:55:33 +08002949 total_sectors = DIV_ROUND_UP(len, BDRV_SECTOR_SIZE);
Kevin Wolf5f5bcd82014-02-07 16:00:09 +01002950 max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
2951 align >> BDRV_SECTOR_BITS);
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08002952 if (max_nb_sectors > 0) {
2953 ret = drv->bdrv_co_readv(bs, sector_num,
2954 MIN(nb_sectors, max_nb_sectors), qiov);
2955 } else {
2956 ret = 0;
2957 }
2958
2959 /* Reading beyond end of file is supposed to produce zeroes */
2960 if (ret == 0 && total_sectors < sector_num + nb_sectors) {
2961 uint64_t offset = MAX(0, total_sectors - sector_num);
2962 uint64_t bytes = (sector_num + nb_sectors - offset) *
2963 BDRV_SECTOR_SIZE;
2964 qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
2965 }
2966 }
Stefan Hajnocziab185922011-11-17 13:40:31 +00002967
2968out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002969 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02002970}
2971
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002972/*
2973 * Handle a read request in coroutine context
2974 */
Kevin Wolf1b0288a2013-12-02 16:09:46 +01002975static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
2976 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002977 BdrvRequestFlags flags)
2978{
2979 BlockDriver *drv = bs->drv;
Kevin Wolf65afd212013-12-03 14:55:55 +01002980 BdrvTrackedRequest req;
2981
Kevin Wolf1b0288a2013-12-02 16:09:46 +01002982 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
2983 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
2984 uint8_t *head_buf = NULL;
2985 uint8_t *tail_buf = NULL;
2986 QEMUIOVector local_qiov;
2987 bool use_local_qiov = false;
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002988 int ret;
2989
2990 if (!drv) {
2991 return -ENOMEDIUM;
2992 }
Kevin Wolf1b0288a2013-12-02 16:09:46 +01002993 if (bdrv_check_byte_request(bs, offset, bytes)) {
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002994 return -EIO;
2995 }
2996
2997 if (bs->copy_on_read) {
2998 flags |= BDRV_REQ_COPY_ON_READ;
2999 }
3000
3001 /* throttling disk I/O */
3002 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003003 bdrv_io_limits_intercept(bs, bytes, false);
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003004 }
3005
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003006 /* Align read if necessary by padding qiov */
3007 if (offset & (align - 1)) {
3008 head_buf = qemu_blockalign(bs, align);
3009 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3010 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3011 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3012 use_local_qiov = true;
3013
3014 bytes += offset & (align - 1);
3015 offset = offset & ~(align - 1);
3016 }
3017
3018 if ((offset + bytes) & (align - 1)) {
3019 if (!use_local_qiov) {
3020 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3021 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3022 use_local_qiov = true;
3023 }
3024 tail_buf = qemu_blockalign(bs, align);
3025 qemu_iovec_add(&local_qiov, tail_buf,
3026 align - ((offset + bytes) & (align - 1)));
3027
3028 bytes = ROUND_UP(bytes, align);
3029 }
3030
Kevin Wolf65afd212013-12-03 14:55:55 +01003031 tracked_request_begin(&req, bs, offset, bytes, false);
Kevin Wolfec746e12013-12-04 12:13:10 +01003032 ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003033 use_local_qiov ? &local_qiov : qiov,
3034 flags);
Kevin Wolf65afd212013-12-03 14:55:55 +01003035 tracked_request_end(&req);
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003036
3037 if (use_local_qiov) {
3038 qemu_iovec_destroy(&local_qiov);
3039 qemu_vfree(head_buf);
3040 qemu_vfree(tail_buf);
3041 }
3042
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003043 return ret;
3044}
3045
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003046static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
3047 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3048 BdrvRequestFlags flags)
3049{
3050 if (nb_sectors < 0 || nb_sectors > (UINT_MAX >> BDRV_SECTOR_BITS)) {
3051 return -EINVAL;
3052 }
3053
3054 return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
3055 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3056}
3057
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003058int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02003059 int nb_sectors, QEMUIOVector *qiov)
3060{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003061 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003062
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003063 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
3064}
3065
3066int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
3067 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3068{
3069 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
3070
3071 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
3072 BDRV_REQ_COPY_ON_READ);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003073}
3074
Peter Lievenc31cb702013-10-24 12:06:58 +02003075/* if no limit is specified in the BlockLimits use a default
3076 * of 32768 512-byte sectors (16 MiB) per request.
3077 */
3078#define MAX_WRITE_ZEROES_DEFAULT 32768
3079
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003080static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003081 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003082{
3083 BlockDriver *drv = bs->drv;
3084 QEMUIOVector qiov;
Peter Lievenc31cb702013-10-24 12:06:58 +02003085 struct iovec iov = {0};
3086 int ret = 0;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003087
Peter Lievenc31cb702013-10-24 12:06:58 +02003088 int max_write_zeroes = bs->bl.max_write_zeroes ?
3089 bs->bl.max_write_zeroes : MAX_WRITE_ZEROES_DEFAULT;
Kevin Wolf621f0582012-03-20 15:12:58 +01003090
Peter Lievenc31cb702013-10-24 12:06:58 +02003091 while (nb_sectors > 0 && !ret) {
3092 int num = nb_sectors;
3093
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003094 /* Align request. Block drivers can expect the "bulk" of the request
3095 * to be aligned.
3096 */
3097 if (bs->bl.write_zeroes_alignment
3098 && num > bs->bl.write_zeroes_alignment) {
3099 if (sector_num % bs->bl.write_zeroes_alignment != 0) {
3100 /* Make a small request up to the first aligned sector. */
Peter Lievenc31cb702013-10-24 12:06:58 +02003101 num = bs->bl.write_zeroes_alignment;
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003102 num -= sector_num % bs->bl.write_zeroes_alignment;
3103 } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
3104 /* Shorten the request to the last aligned sector. num cannot
3105 * underflow because num > bs->bl.write_zeroes_alignment.
3106 */
3107 num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
Peter Lievenc31cb702013-10-24 12:06:58 +02003108 }
Kevin Wolf621f0582012-03-20 15:12:58 +01003109 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003110
3111 /* limit request size */
3112 if (num > max_write_zeroes) {
3113 num = max_write_zeroes;
3114 }
3115
3116 ret = -ENOTSUP;
3117 /* First try the efficient write zeroes operation */
3118 if (drv->bdrv_co_write_zeroes) {
3119 ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
3120 }
3121
3122 if (ret == -ENOTSUP) {
3123 /* Fall back to bounce buffer if write zeroes is unsupported */
3124 iov.iov_len = num * BDRV_SECTOR_SIZE;
3125 if (iov.iov_base == NULL) {
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003126 iov.iov_base = qemu_blockalign(bs, num * BDRV_SECTOR_SIZE);
3127 memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
Peter Lievenc31cb702013-10-24 12:06:58 +02003128 }
3129 qemu_iovec_init_external(&qiov, &iov, 1);
3130
3131 ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003132
3133 /* Keep bounce buffer around if it is big enough for all
3134 * all future requests.
3135 */
3136 if (num < max_write_zeroes) {
3137 qemu_vfree(iov.iov_base);
3138 iov.iov_base = NULL;
3139 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003140 }
3141
3142 sector_num += num;
3143 nb_sectors -= num;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003144 }
3145
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003146 qemu_vfree(iov.iov_base);
3147 return ret;
3148}
3149
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003150/*
Kevin Wolfb404f722013-12-03 14:02:23 +01003151 * Forwards an already correctly aligned write request to the BlockDriver.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003152 */
Kevin Wolfb404f722013-12-03 14:02:23 +01003153static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01003154 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
3155 QEMUIOVector *qiov, int flags)
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003156{
3157 BlockDriver *drv = bs->drv;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003158 bool waited;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003159 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003160
Kevin Wolfb404f722013-12-03 14:02:23 +01003161 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3162 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003163
Kevin Wolfb404f722013-12-03 14:02:23 +01003164 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3165 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Benoît Canetcc0681c2013-09-02 14:14:39 +02003166
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003167 waited = wait_serialising_requests(req);
3168 assert(!waited || !req->serialising);
Kevin Wolfaf91f9a2014-02-07 15:35:56 +01003169 assert(req->overlap_offset <= offset);
3170 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
Kevin Wolf244eade2013-12-03 14:30:44 +01003171
Kevin Wolf65afd212013-12-03 14:55:55 +01003172 ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
Stefan Hajnoczid616b222013-06-24 17:13:10 +02003173
3174 if (ret < 0) {
3175 /* Do nothing, write notifier decided to fail this request */
3176 } else if (flags & BDRV_REQ_ZERO_WRITE) {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003177 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003178 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003179 } else {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003180 BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003181 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
3182 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003183 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003184
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02003185 if (ret == 0 && !bs->enable_write_cache) {
3186 ret = bdrv_co_flush(bs);
3187 }
3188
Fam Zhenge4654d22013-11-13 18:29:43 +08003189 bdrv_set_dirty(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003190
3191 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
3192 bs->wr_highest_sector = sector_num + nb_sectors - 1;
3193 }
Paolo Bonzinidf2a6f22013-09-04 19:00:21 +02003194 if (bs->growable && ret >= 0) {
3195 bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
3196 }
Kevin Wolfda1fa912011-07-14 17:27:13 +02003197
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003198 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003199}
3200
Kevin Wolfb404f722013-12-03 14:02:23 +01003201/*
3202 * Handle a write request in coroutine context
3203 */
Kevin Wolf66015532013-12-03 14:40:18 +01003204static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
3205 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfb404f722013-12-03 14:02:23 +01003206 BdrvRequestFlags flags)
3207{
Kevin Wolf65afd212013-12-03 14:55:55 +01003208 BdrvTrackedRequest req;
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003209 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3210 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3211 uint8_t *head_buf = NULL;
3212 uint8_t *tail_buf = NULL;
3213 QEMUIOVector local_qiov;
3214 bool use_local_qiov = false;
Kevin Wolfb404f722013-12-03 14:02:23 +01003215 int ret;
3216
3217 if (!bs->drv) {
3218 return -ENOMEDIUM;
3219 }
3220 if (bs->read_only) {
3221 return -EACCES;
3222 }
Kevin Wolf66015532013-12-03 14:40:18 +01003223 if (bdrv_check_byte_request(bs, offset, bytes)) {
Kevin Wolfb404f722013-12-03 14:02:23 +01003224 return -EIO;
3225 }
3226
Kevin Wolfb404f722013-12-03 14:02:23 +01003227 /* throttling disk I/O */
3228 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003229 bdrv_io_limits_intercept(bs, bytes, true);
Kevin Wolfb404f722013-12-03 14:02:23 +01003230 }
3231
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003232 /*
3233 * Align write if necessary by performing a read-modify-write cycle.
3234 * Pad qiov with the read parts and be sure to have a tracked request not
3235 * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
3236 */
Kevin Wolf65afd212013-12-03 14:55:55 +01003237 tracked_request_begin(&req, bs, offset, bytes, true);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003238
3239 if (offset & (align - 1)) {
3240 QEMUIOVector head_qiov;
3241 struct iovec head_iov;
3242
3243 mark_request_serialising(&req, align);
3244 wait_serialising_requests(&req);
3245
3246 head_buf = qemu_blockalign(bs, align);
3247 head_iov = (struct iovec) {
3248 .iov_base = head_buf,
3249 .iov_len = align,
3250 };
3251 qemu_iovec_init_external(&head_qiov, &head_iov, 1);
3252
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003253 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003254 ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
3255 align, &head_qiov, 0);
3256 if (ret < 0) {
3257 goto fail;
3258 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003259 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003260
3261 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3262 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3263 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3264 use_local_qiov = true;
3265
3266 bytes += offset & (align - 1);
3267 offset = offset & ~(align - 1);
3268 }
3269
3270 if ((offset + bytes) & (align - 1)) {
3271 QEMUIOVector tail_qiov;
3272 struct iovec tail_iov;
3273 size_t tail_bytes;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003274 bool waited;
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003275
3276 mark_request_serialising(&req, align);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003277 waited = wait_serialising_requests(&req);
3278 assert(!waited || !use_local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003279
3280 tail_buf = qemu_blockalign(bs, align);
3281 tail_iov = (struct iovec) {
3282 .iov_base = tail_buf,
3283 .iov_len = align,
3284 };
3285 qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
3286
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003287 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003288 ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
3289 align, &tail_qiov, 0);
3290 if (ret < 0) {
3291 goto fail;
3292 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003293 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003294
3295 if (!use_local_qiov) {
3296 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3297 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3298 use_local_qiov = true;
3299 }
3300
3301 tail_bytes = (offset + bytes) & (align - 1);
3302 qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
3303
3304 bytes = ROUND_UP(bytes, align);
3305 }
3306
3307 ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
3308 use_local_qiov ? &local_qiov : qiov,
3309 flags);
3310
3311fail:
Kevin Wolf65afd212013-12-03 14:55:55 +01003312 tracked_request_end(&req);
Kevin Wolfb404f722013-12-03 14:02:23 +01003313
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003314 if (use_local_qiov) {
3315 qemu_iovec_destroy(&local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003316 }
Kevin Wolf99c4a852014-02-07 15:29:00 +01003317 qemu_vfree(head_buf);
3318 qemu_vfree(tail_buf);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003319
Kevin Wolfb404f722013-12-03 14:02:23 +01003320 return ret;
3321}
3322
Kevin Wolf66015532013-12-03 14:40:18 +01003323static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
3324 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3325 BdrvRequestFlags flags)
3326{
3327 if (nb_sectors < 0 || nb_sectors > (INT_MAX >> BDRV_SECTOR_BITS)) {
3328 return -EINVAL;
3329 }
3330
3331 return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
3332 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3333}
3334
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003335int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
3336 int nb_sectors, QEMUIOVector *qiov)
3337{
3338 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
3339
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003340 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
3341}
3342
3343int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003344 int64_t sector_num, int nb_sectors,
3345 BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003346{
Paolo Bonzini94d6ff22013-11-22 13:39:45 +01003347 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003348
Peter Lievend32f35c2013-10-24 12:06:52 +02003349 if (!(bs->open_flags & BDRV_O_UNMAP)) {
3350 flags &= ~BDRV_REQ_MAY_UNMAP;
3351 }
3352
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003353 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003354 BDRV_REQ_ZERO_WRITE | flags);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003355}
3356
bellard83f64092006-08-01 16:21:11 +00003357/**
bellard83f64092006-08-01 16:21:11 +00003358 * Truncate file to 'offset' bytes (needed only for file protocols)
3359 */
3360int bdrv_truncate(BlockDriverState *bs, int64_t offset)
3361{
3362 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003363 int ret;
bellard83f64092006-08-01 16:21:11 +00003364 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003365 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00003366 if (!drv->bdrv_truncate)
3367 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02003368 if (bs->read_only)
3369 return -EACCES;
Marcelo Tosatti85916752011-01-26 12:12:35 -02003370 if (bdrv_in_use(bs))
3371 return -EBUSY;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003372 ret = drv->bdrv_truncate(bs, offset);
3373 if (ret == 0) {
3374 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbruster145feb12011-08-03 15:07:42 +02003375 bdrv_dev_resize_cb(bs);
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003376 }
3377 return ret;
bellard83f64092006-08-01 16:21:11 +00003378}
3379
3380/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08003381 * Length of a allocated file in bytes. Sparse files are counted by actual
3382 * allocated space. Return < 0 if error or unknown.
3383 */
3384int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
3385{
3386 BlockDriver *drv = bs->drv;
3387 if (!drv) {
3388 return -ENOMEDIUM;
3389 }
3390 if (drv->bdrv_get_allocated_file_size) {
3391 return drv->bdrv_get_allocated_file_size(bs);
3392 }
3393 if (bs->file) {
3394 return bdrv_get_allocated_file_size(bs->file);
3395 }
3396 return -ENOTSUP;
3397}
3398
3399/**
bellard83f64092006-08-01 16:21:11 +00003400 * Length of a file in bytes. Return < 0 if error or unknown.
3401 */
3402int64_t bdrv_getlength(BlockDriverState *bs)
3403{
3404 BlockDriver *drv = bs->drv;
3405 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003406 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003407
Kevin Wolfb94a2612013-10-29 12:18:58 +01003408 if (drv->has_variable_length) {
3409 int ret = refresh_total_sectors(bs, bs->total_sectors);
3410 if (ret < 0) {
3411 return ret;
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01003412 }
bellard83f64092006-08-01 16:21:11 +00003413 }
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01003414 return bs->total_sectors * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00003415}
3416
bellard19cb3732006-08-19 11:45:59 +00003417/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00003418void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00003419{
bellard19cb3732006-08-19 11:45:59 +00003420 int64_t length;
3421 length = bdrv_getlength(bs);
3422 if (length < 0)
3423 length = 0;
3424 else
Jan Kiszka6ea44302009-11-30 18:21:19 +01003425 length = length >> BDRV_SECTOR_BITS;
bellard19cb3732006-08-19 11:45:59 +00003426 *nb_sectors_ptr = length;
bellardfc01f7e2003-06-30 10:03:06 +00003427}
bellardcf989512004-02-16 21:56:36 +00003428
Paolo Bonziniff06f5f2012-09-28 17:22:54 +02003429void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
3430 BlockdevOnError on_write_error)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003431{
3432 bs->on_read_error = on_read_error;
3433 bs->on_write_error = on_write_error;
3434}
3435
Paolo Bonzini1ceee0d2012-09-28 17:22:56 +02003436BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003437{
3438 return is_read ? bs->on_read_error : bs->on_write_error;
3439}
3440
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003441BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
3442{
3443 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
3444
3445 switch (on_err) {
3446 case BLOCKDEV_ON_ERROR_ENOSPC:
3447 return (error == ENOSPC) ? BDRV_ACTION_STOP : BDRV_ACTION_REPORT;
3448 case BLOCKDEV_ON_ERROR_STOP:
3449 return BDRV_ACTION_STOP;
3450 case BLOCKDEV_ON_ERROR_REPORT:
3451 return BDRV_ACTION_REPORT;
3452 case BLOCKDEV_ON_ERROR_IGNORE:
3453 return BDRV_ACTION_IGNORE;
3454 default:
3455 abort();
3456 }
3457}
3458
3459/* This is done by device models because, while the block layer knows
3460 * about the error, it does not know whether an operation comes from
3461 * the device or the block layer (from a job, for example).
3462 */
3463void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
3464 bool is_read, int error)
3465{
3466 assert(error >= 0);
Paolo Bonzini32c81a42012-09-28 17:22:58 +02003467 bdrv_emit_qmp_error_event(bs, QEVENT_BLOCK_IO_ERROR, action, is_read);
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003468 if (action == BDRV_ACTION_STOP) {
3469 vm_stop(RUN_STATE_IO_ERROR);
3470 bdrv_iostatus_set_err(bs, error);
3471 }
3472}
3473
bellardb3380822004-03-14 21:38:54 +00003474int bdrv_is_read_only(BlockDriverState *bs)
3475{
3476 return bs->read_only;
3477}
3478
ths985a03b2007-12-24 16:10:43 +00003479int bdrv_is_sg(BlockDriverState *bs)
3480{
3481 return bs->sg;
3482}
3483
Christoph Hellwige900a7b2009-09-04 19:01:15 +02003484int bdrv_enable_write_cache(BlockDriverState *bs)
3485{
3486 return bs->enable_write_cache;
3487}
3488
Paolo Bonzini425b0142012-06-06 00:04:52 +02003489void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
3490{
3491 bs->enable_write_cache = wce;
Jeff Cody55b110f2012-09-20 15:13:18 -04003492
3493 /* so a reopen() will preserve wce */
3494 if (wce) {
3495 bs->open_flags |= BDRV_O_CACHE_WB;
3496 } else {
3497 bs->open_flags &= ~BDRV_O_CACHE_WB;
3498 }
Paolo Bonzini425b0142012-06-06 00:04:52 +02003499}
3500
bellardea2384d2004-08-01 21:59:26 +00003501int bdrv_is_encrypted(BlockDriverState *bs)
3502{
3503 if (bs->backing_hd && bs->backing_hd->encrypted)
3504 return 1;
3505 return bs->encrypted;
3506}
3507
aliguoric0f4ce72009-03-05 23:01:01 +00003508int bdrv_key_required(BlockDriverState *bs)
3509{
3510 BlockDriverState *backing_hd = bs->backing_hd;
3511
3512 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
3513 return 1;
3514 return (bs->encrypted && !bs->valid_key);
3515}
3516
bellardea2384d2004-08-01 21:59:26 +00003517int bdrv_set_key(BlockDriverState *bs, const char *key)
3518{
3519 int ret;
3520 if (bs->backing_hd && bs->backing_hd->encrypted) {
3521 ret = bdrv_set_key(bs->backing_hd, key);
3522 if (ret < 0)
3523 return ret;
3524 if (!bs->encrypted)
3525 return 0;
3526 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02003527 if (!bs->encrypted) {
3528 return -EINVAL;
3529 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
3530 return -ENOMEDIUM;
3531 }
aliguoric0f4ce72009-03-05 23:01:01 +00003532 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00003533 if (ret < 0) {
3534 bs->valid_key = 0;
3535 } else if (!bs->valid_key) {
3536 bs->valid_key = 1;
3537 /* call the change callback now, we skipped it on open */
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02003538 bdrv_dev_change_media_cb(bs, true);
aliguoribb5fc202009-03-05 23:01:15 +00003539 }
aliguoric0f4ce72009-03-05 23:01:01 +00003540 return ret;
bellardea2384d2004-08-01 21:59:26 +00003541}
3542
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003543const char *bdrv_get_format_name(BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00003544{
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003545 return bs->drv ? bs->drv->format_name : NULL;
bellardea2384d2004-08-01 21:59:26 +00003546}
3547
ths5fafdf22007-09-16 21:08:06 +00003548void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00003549 void *opaque)
3550{
3551 BlockDriver *drv;
3552
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01003553 QLIST_FOREACH(drv, &bdrv_drivers, list) {
bellardea2384d2004-08-01 21:59:26 +00003554 it(opaque, drv->format_name);
3555 }
3556}
3557
Benoît Canetdc364f42014-01-23 21:31:32 +01003558/* This function is to find block backend bs */
bellardb3380822004-03-14 21:38:54 +00003559BlockDriverState *bdrv_find(const char *name)
3560{
3561 BlockDriverState *bs;
3562
Benoît Canetdc364f42014-01-23 21:31:32 +01003563 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01003564 if (!strcmp(name, bs->device_name)) {
bellardb3380822004-03-14 21:38:54 +00003565 return bs;
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01003566 }
bellardb3380822004-03-14 21:38:54 +00003567 }
3568 return NULL;
3569}
3570
Benoît Canetdc364f42014-01-23 21:31:32 +01003571/* This function is to find a node in the bs graph */
3572BlockDriverState *bdrv_find_node(const char *node_name)
3573{
3574 BlockDriverState *bs;
3575
3576 assert(node_name);
3577
3578 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3579 if (!strcmp(node_name, bs->node_name)) {
3580 return bs;
3581 }
3582 }
3583 return NULL;
3584}
3585
Benoît Canetc13163f2014-01-23 21:31:34 +01003586/* Put this QMP function here so it can access the static graph_bdrv_states. */
3587BlockDeviceInfoList *bdrv_named_nodes_list(void)
3588{
3589 BlockDeviceInfoList *list, *entry;
3590 BlockDriverState *bs;
3591
3592 list = NULL;
3593 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3594 entry = g_malloc0(sizeof(*entry));
3595 entry->value = bdrv_block_device_info(bs);
3596 entry->next = list;
3597 list = entry;
3598 }
3599
3600 return list;
3601}
3602
Benoît Canet12d3ba82014-01-23 21:31:35 +01003603BlockDriverState *bdrv_lookup_bs(const char *device,
3604 const char *node_name,
3605 Error **errp)
3606{
3607 BlockDriverState *bs = NULL;
3608
Benoît Canet12d3ba82014-01-23 21:31:35 +01003609 if (device) {
3610 bs = bdrv_find(device);
3611
Benoît Canetdd67fa52014-02-12 17:15:06 +01003612 if (bs) {
3613 return bs;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003614 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003615 }
3616
Benoît Canetdd67fa52014-02-12 17:15:06 +01003617 if (node_name) {
3618 bs = bdrv_find_node(node_name);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003619
Benoît Canetdd67fa52014-02-12 17:15:06 +01003620 if (bs) {
3621 return bs;
3622 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003623 }
3624
Benoît Canetdd67fa52014-02-12 17:15:06 +01003625 error_setg(errp, "Cannot find device=%s nor node_name=%s",
3626 device ? device : "",
3627 node_name ? node_name : "");
3628 return NULL;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003629}
3630
Markus Armbruster2f399b02010-06-02 18:55:20 +02003631BlockDriverState *bdrv_next(BlockDriverState *bs)
3632{
3633 if (!bs) {
3634 return QTAILQ_FIRST(&bdrv_states);
3635 }
Benoît Canetdc364f42014-01-23 21:31:32 +01003636 return QTAILQ_NEXT(bs, device_list);
Markus Armbruster2f399b02010-06-02 18:55:20 +02003637}
3638
aliguori51de9762009-03-05 23:00:43 +00003639void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
bellard81d09122004-07-14 17:21:37 +00003640{
3641 BlockDriverState *bs;
3642
Benoît Canetdc364f42014-01-23 21:31:32 +01003643 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
aliguori51de9762009-03-05 23:00:43 +00003644 it(opaque, bs);
bellard81d09122004-07-14 17:21:37 +00003645 }
3646}
3647
bellardea2384d2004-08-01 21:59:26 +00003648const char *bdrv_get_device_name(BlockDriverState *bs)
3649{
3650 return bs->device_name;
3651}
3652
Markus Armbrusterc8433282012-06-05 16:49:24 +02003653int bdrv_get_flags(BlockDriverState *bs)
3654{
3655 return bs->open_flags;
3656}
3657
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003658int bdrv_flush_all(void)
aliguoric6ca28d2008-10-06 13:55:43 +00003659{
3660 BlockDriverState *bs;
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003661 int result = 0;
aliguoric6ca28d2008-10-06 13:55:43 +00003662
Benoît Canetdc364f42014-01-23 21:31:32 +01003663 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003664 int ret = bdrv_flush(bs);
3665 if (ret < 0 && !result) {
3666 result = ret;
3667 }
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01003668 }
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003669
3670 return result;
aliguoric6ca28d2008-10-06 13:55:43 +00003671}
3672
Peter Lieven3ac21622013-06-28 12:47:42 +02003673int bdrv_has_zero_init_1(BlockDriverState *bs)
3674{
3675 return 1;
3676}
3677
Kevin Wolff2feebb2010-04-14 17:30:35 +02003678int bdrv_has_zero_init(BlockDriverState *bs)
3679{
3680 assert(bs->drv);
3681
Paolo Bonzini11212d82013-09-04 19:00:27 +02003682 /* If BS is a copy on write image, it is initialized to
3683 the contents of the base image, which may not be zeroes. */
3684 if (bs->backing_hd) {
3685 return 0;
3686 }
Kevin Wolf336c1c12010-07-28 11:26:29 +02003687 if (bs->drv->bdrv_has_zero_init) {
3688 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02003689 }
3690
Peter Lieven3ac21622013-06-28 12:47:42 +02003691 /* safe default */
3692 return 0;
Kevin Wolff2feebb2010-04-14 17:30:35 +02003693}
3694
Peter Lieven4ce78692013-10-24 12:06:54 +02003695bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
3696{
3697 BlockDriverInfo bdi;
3698
3699 if (bs->backing_hd) {
3700 return false;
3701 }
3702
3703 if (bdrv_get_info(bs, &bdi) == 0) {
3704 return bdi.unallocated_blocks_are_zero;
3705 }
3706
3707 return false;
3708}
3709
3710bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
3711{
3712 BlockDriverInfo bdi;
3713
3714 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
3715 return false;
3716 }
3717
3718 if (bdrv_get_info(bs, &bdi) == 0) {
3719 return bdi.can_write_zeroes_with_unmap;
3720 }
3721
3722 return false;
3723}
3724
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003725typedef struct BdrvCoGetBlockStatusData {
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003726 BlockDriverState *bs;
Miroslav Rezaninab35b2bb2013-02-13 09:09:39 +01003727 BlockDriverState *base;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003728 int64_t sector_num;
3729 int nb_sectors;
3730 int *pnum;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003731 int64_t ret;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003732 bool done;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003733} BdrvCoGetBlockStatusData;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003734
thsf58c7b32008-06-05 21:53:49 +00003735/*
3736 * Returns true iff the specified sector is present in the disk image. Drivers
3737 * not implementing the functionality are assumed to not support backing files,
3738 * hence all their sectors are reported as allocated.
3739 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003740 * If 'sector_num' is beyond the end of the disk image the return value is 0
3741 * and 'pnum' is set to 0.
3742 *
thsf58c7b32008-06-05 21:53:49 +00003743 * 'pnum' is set to the number of sectors (including and immediately following
3744 * the specified sector) that are known to be in the same
3745 * allocated/unallocated state.
3746 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003747 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
3748 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00003749 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003750static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
3751 int64_t sector_num,
3752 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00003753{
Paolo Bonzini617ccb42013-09-04 19:00:23 +02003754 int64_t length;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003755 int64_t n;
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02003756 int64_t ret, ret2;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003757
Paolo Bonzini617ccb42013-09-04 19:00:23 +02003758 length = bdrv_getlength(bs);
3759 if (length < 0) {
3760 return length;
3761 }
3762
3763 if (sector_num >= (length >> BDRV_SECTOR_BITS)) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003764 *pnum = 0;
3765 return 0;
3766 }
3767
3768 n = bs->total_sectors - sector_num;
3769 if (n < nb_sectors) {
3770 nb_sectors = n;
3771 }
3772
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003773 if (!bs->drv->bdrv_co_get_block_status) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003774 *pnum = nb_sectors;
Paolo Bonzini918e92d2013-09-04 19:00:37 +02003775 ret = BDRV_BLOCK_DATA;
3776 if (bs->drv->protocol_name) {
3777 ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
3778 }
3779 return ret;
thsf58c7b32008-06-05 21:53:49 +00003780 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00003781
Paolo Bonzini415b5b02013-09-04 19:00:31 +02003782 ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
3783 if (ret < 0) {
Peter Lieven3e0a2332013-09-24 15:35:08 +02003784 *pnum = 0;
Paolo Bonzini415b5b02013-09-04 19:00:31 +02003785 return ret;
3786 }
3787
Peter Lieven92bc50a2013-10-08 14:43:14 +02003788 if (ret & BDRV_BLOCK_RAW) {
3789 assert(ret & BDRV_BLOCK_OFFSET_VALID);
3790 return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
3791 *pnum, pnum);
3792 }
3793
Peter Lievenc3d86882013-10-24 12:07:04 +02003794 if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
3795 if (bdrv_unallocated_blocks_are_zero(bs)) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02003796 ret |= BDRV_BLOCK_ZERO;
Peter Lieven1f9db222013-09-24 15:35:09 +02003797 } else if (bs->backing_hd) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02003798 BlockDriverState *bs2 = bs->backing_hd;
3799 int64_t length2 = bdrv_getlength(bs2);
3800 if (length2 >= 0 && sector_num >= (length2 >> BDRV_SECTOR_BITS)) {
3801 ret |= BDRV_BLOCK_ZERO;
3802 }
3803 }
Paolo Bonzini415b5b02013-09-04 19:00:31 +02003804 }
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02003805
3806 if (bs->file &&
3807 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
3808 (ret & BDRV_BLOCK_OFFSET_VALID)) {
3809 ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
3810 *pnum, pnum);
3811 if (ret2 >= 0) {
3812 /* Ignore errors. This is just providing extra information, it
3813 * is useful but not necessary.
3814 */
3815 ret |= (ret2 & BDRV_BLOCK_ZERO);
3816 }
3817 }
3818
Paolo Bonzini415b5b02013-09-04 19:00:31 +02003819 return ret;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003820}
3821
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003822/* Coroutine wrapper for bdrv_get_block_status() */
3823static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003824{
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003825 BdrvCoGetBlockStatusData *data = opaque;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003826 BlockDriverState *bs = data->bs;
3827
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003828 data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
3829 data->pnum);
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003830 data->done = true;
3831}
3832
3833/*
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003834 * Synchronous wrapper around bdrv_co_get_block_status().
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003835 *
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003836 * See bdrv_co_get_block_status() for details.
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003837 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003838int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
3839 int nb_sectors, int *pnum)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003840{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00003841 Coroutine *co;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003842 BdrvCoGetBlockStatusData data = {
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00003843 .bs = bs,
3844 .sector_num = sector_num,
3845 .nb_sectors = nb_sectors,
3846 .pnum = pnum,
3847 .done = false,
3848 };
3849
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02003850 if (qemu_in_coroutine()) {
3851 /* Fast-path if already in coroutine context */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003852 bdrv_get_block_status_co_entry(&data);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02003853 } else {
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003854 co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02003855 qemu_coroutine_enter(co, &data);
3856 while (!data.done) {
3857 qemu_aio_wait();
3858 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00003859 }
3860 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00003861}
3862
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003863int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
3864 int nb_sectors, int *pnum)
3865{
Paolo Bonzini4333bb72013-09-04 19:00:29 +02003866 int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
3867 if (ret < 0) {
3868 return ret;
3869 }
3870 return
3871 (ret & BDRV_BLOCK_DATA) ||
3872 ((ret & BDRV_BLOCK_ZERO) && !bdrv_has_zero_init(bs));
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003873}
3874
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02003875/*
3876 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
3877 *
3878 * Return true if the given sector is allocated in any image between
3879 * BASE and TOP (inclusive). BASE can be NULL to check if the given
3880 * sector is allocated in any image of the chain. Return false otherwise.
3881 *
3882 * 'pnum' is set to the number of sectors (including and immediately following
3883 * the specified sector) that are known to be in the same
3884 * allocated/unallocated state.
3885 *
3886 */
Paolo Bonzini4f578632013-09-04 19:00:24 +02003887int bdrv_is_allocated_above(BlockDriverState *top,
3888 BlockDriverState *base,
3889 int64_t sector_num,
3890 int nb_sectors, int *pnum)
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02003891{
3892 BlockDriverState *intermediate;
3893 int ret, n = nb_sectors;
3894
3895 intermediate = top;
3896 while (intermediate && intermediate != base) {
3897 int pnum_inter;
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02003898 ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
3899 &pnum_inter);
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02003900 if (ret < 0) {
3901 return ret;
3902 } else if (ret) {
3903 *pnum = pnum_inter;
3904 return 1;
3905 }
3906
3907 /*
3908 * [sector_num, nb_sectors] is unallocated on top but intermediate
3909 * might have
3910 *
3911 * [sector_num+x, nr_sectors] allocated.
3912 */
Vishvananda Ishaya63ba17d2013-01-24 10:02:08 -08003913 if (n > pnum_inter &&
3914 (intermediate == top ||
3915 sector_num + pnum_inter < intermediate->total_sectors)) {
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02003916 n = pnum_inter;
3917 }
3918
3919 intermediate = intermediate->backing_hd;
3920 }
3921
3922 *pnum = n;
3923 return 0;
3924}
3925
aliguori045df332009-03-05 23:00:48 +00003926const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
3927{
3928 if (bs->backing_hd && bs->backing_hd->encrypted)
3929 return bs->backing_file;
3930 else if (bs->encrypted)
3931 return bs->filename;
3932 else
3933 return NULL;
3934}
3935
ths5fafdf22007-09-16 21:08:06 +00003936void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00003937 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00003938{
Kevin Wolf3574c602011-10-26 11:02:11 +02003939 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00003940}
3941
ths5fafdf22007-09-16 21:08:06 +00003942int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00003943 const uint8_t *buf, int nb_sectors)
3944{
3945 BlockDriver *drv = bs->drv;
3946 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003947 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00003948 if (!drv->bdrv_write_compressed)
3949 return -ENOTSUP;
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02003950 if (bdrv_check_request(bs, sector_num, nb_sectors))
3951 return -EIO;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003952
Fam Zhenge4654d22013-11-13 18:29:43 +08003953 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003954
bellardfaea38e2006-08-05 21:31:00 +00003955 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
3956}
ths3b46e622007-09-17 08:09:54 +00003957
bellardfaea38e2006-08-05 21:31:00 +00003958int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
3959{
3960 BlockDriver *drv = bs->drv;
3961 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003962 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00003963 if (!drv->bdrv_get_info)
3964 return -ENOTSUP;
3965 memset(bdi, 0, sizeof(*bdi));
3966 return drv->bdrv_get_info(bs, bdi);
3967}
3968
Max Reitzeae041f2013-10-09 10:46:16 +02003969ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
3970{
3971 BlockDriver *drv = bs->drv;
3972 if (drv && drv->bdrv_get_specific_info) {
3973 return drv->bdrv_get_specific_info(bs);
3974 }
3975 return NULL;
3976}
3977
Christoph Hellwig45566e92009-07-10 23:11:57 +02003978int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
3979 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00003980{
Kevin Wolfcf8074b2013-04-05 21:27:53 +02003981 QEMUIOVector qiov;
3982 struct iovec iov = {
3983 .iov_base = (void *) buf,
3984 .iov_len = size,
3985 };
3986
3987 qemu_iovec_init_external(&qiov, &iov, 1);
3988 return bdrv_writev_vmstate(bs, &qiov, pos);
3989}
3990
3991int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
3992{
aliguori178e08a2009-04-05 19:10:55 +00003993 BlockDriver *drv = bs->drv;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02003994
3995 if (!drv) {
aliguori178e08a2009-04-05 19:10:55 +00003996 return -ENOMEDIUM;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02003997 } else if (drv->bdrv_save_vmstate) {
3998 return drv->bdrv_save_vmstate(bs, qiov, pos);
3999 } else if (bs->file) {
4000 return bdrv_writev_vmstate(bs->file, qiov, pos);
4001 }
4002
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004003 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004004}
4005
Christoph Hellwig45566e92009-07-10 23:11:57 +02004006int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
4007 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00004008{
4009 BlockDriver *drv = bs->drv;
4010 if (!drv)
4011 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004012 if (drv->bdrv_load_vmstate)
4013 return drv->bdrv_load_vmstate(bs, buf, pos, size);
4014 if (bs->file)
4015 return bdrv_load_vmstate(bs->file, buf, pos, size);
4016 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004017}
4018
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004019void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
4020{
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004021 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004022 return;
4023 }
4024
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004025 bs->drv->bdrv_debug_event(bs, event);
Kevin Wolf41c695c2012-12-06 14:32:58 +01004026}
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004027
Kevin Wolf41c695c2012-12-06 14:32:58 +01004028int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
4029 const char *tag)
4030{
4031 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
4032 bs = bs->file;
4033 }
4034
4035 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
4036 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
4037 }
4038
4039 return -ENOTSUP;
4040}
4041
Fam Zheng4cc70e92013-11-20 10:01:54 +08004042int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
4043{
4044 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
4045 bs = bs->file;
4046 }
4047
4048 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
4049 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
4050 }
4051
4052 return -ENOTSUP;
4053}
4054
Kevin Wolf41c695c2012-12-06 14:32:58 +01004055int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
4056{
4057 while (bs && bs->drv && !bs->drv->bdrv_debug_resume) {
4058 bs = bs->file;
4059 }
4060
4061 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
4062 return bs->drv->bdrv_debug_resume(bs, tag);
4063 }
4064
4065 return -ENOTSUP;
4066}
4067
4068bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
4069{
4070 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
4071 bs = bs->file;
4072 }
4073
4074 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
4075 return bs->drv->bdrv_debug_is_suspended(bs, tag);
4076 }
4077
4078 return false;
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004079}
4080
Blue Swirl199630b2010-07-25 20:49:34 +00004081int bdrv_is_snapshot(BlockDriverState *bs)
4082{
4083 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
4084}
4085
Jeff Codyb1b1d782012-10-16 15:49:09 -04004086/* backing_file can either be relative, or absolute, or a protocol. If it is
4087 * relative, it must be relative to the chain. So, passing in bs->filename
4088 * from a BDS as backing_file should not be done, as that may be relative to
4089 * the CWD rather than the chain. */
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004090BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
4091 const char *backing_file)
4092{
Jeff Codyb1b1d782012-10-16 15:49:09 -04004093 char *filename_full = NULL;
4094 char *backing_file_full = NULL;
4095 char *filename_tmp = NULL;
4096 int is_protocol = 0;
4097 BlockDriverState *curr_bs = NULL;
4098 BlockDriverState *retval = NULL;
4099
4100 if (!bs || !bs->drv || !backing_file) {
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004101 return NULL;
4102 }
4103
Jeff Codyb1b1d782012-10-16 15:49:09 -04004104 filename_full = g_malloc(PATH_MAX);
4105 backing_file_full = g_malloc(PATH_MAX);
4106 filename_tmp = g_malloc(PATH_MAX);
4107
4108 is_protocol = path_has_protocol(backing_file);
4109
4110 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
4111
4112 /* If either of the filename paths is actually a protocol, then
4113 * compare unmodified paths; otherwise make paths relative */
4114 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
4115 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
4116 retval = curr_bs->backing_hd;
4117 break;
4118 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004119 } else {
Jeff Codyb1b1d782012-10-16 15:49:09 -04004120 /* If not an absolute filename path, make it relative to the current
4121 * image's filename path */
4122 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4123 backing_file);
4124
4125 /* We are going to compare absolute pathnames */
4126 if (!realpath(filename_tmp, filename_full)) {
4127 continue;
4128 }
4129
4130 /* We need to make sure the backing filename we are comparing against
4131 * is relative to the current image filename (or absolute) */
4132 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4133 curr_bs->backing_file);
4134
4135 if (!realpath(filename_tmp, backing_file_full)) {
4136 continue;
4137 }
4138
4139 if (strcmp(backing_file_full, filename_full) == 0) {
4140 retval = curr_bs->backing_hd;
4141 break;
4142 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004143 }
4144 }
4145
Jeff Codyb1b1d782012-10-16 15:49:09 -04004146 g_free(filename_full);
4147 g_free(backing_file_full);
4148 g_free(filename_tmp);
4149 return retval;
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004150}
4151
Benoît Canetf198fd12012-08-02 10:22:47 +02004152int bdrv_get_backing_file_depth(BlockDriverState *bs)
4153{
4154 if (!bs->drv) {
4155 return 0;
4156 }
4157
4158 if (!bs->backing_hd) {
4159 return 0;
4160 }
4161
4162 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
4163}
4164
Jeff Cody79fac562012-09-27 13:29:15 -04004165BlockDriverState *bdrv_find_base(BlockDriverState *bs)
4166{
4167 BlockDriverState *curr_bs = NULL;
4168
4169 if (!bs) {
4170 return NULL;
4171 }
4172
4173 curr_bs = bs;
4174
4175 while (curr_bs->backing_hd) {
4176 curr_bs = curr_bs->backing_hd;
4177 }
4178 return curr_bs;
4179}
4180
bellard83f64092006-08-01 16:21:11 +00004181/**************************************************************/
4182/* async I/Os */
4183
aliguori3b69e4b2009-01-22 16:59:24 +00004184BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
aliguorif141eaf2009-04-07 18:43:24 +00004185 QEMUIOVector *qiov, int nb_sectors,
aliguori3b69e4b2009-01-22 16:59:24 +00004186 BlockDriverCompletionFunc *cb, void *opaque)
4187{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004188 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
4189
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004190 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004191 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00004192}
4193
aliguorif141eaf2009-04-07 18:43:24 +00004194BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
4195 QEMUIOVector *qiov, int nb_sectors,
4196 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004197{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004198 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
4199
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004200 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004201 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00004202}
4203
Paolo Bonzinid5ef94d2013-11-22 13:39:46 +01004204BlockDriverAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
4205 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
4206 BlockDriverCompletionFunc *cb, void *opaque)
4207{
4208 trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
4209
4210 return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
4211 BDRV_REQ_ZERO_WRITE | flags,
4212 cb, opaque, true);
4213}
4214
Kevin Wolf40b4f532009-09-09 17:53:37 +02004215
4216typedef struct MultiwriteCB {
4217 int error;
4218 int num_requests;
4219 int num_callbacks;
4220 struct {
4221 BlockDriverCompletionFunc *cb;
4222 void *opaque;
4223 QEMUIOVector *free_qiov;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004224 } callbacks[];
4225} MultiwriteCB;
4226
4227static void multiwrite_user_cb(MultiwriteCB *mcb)
4228{
4229 int i;
4230
4231 for (i = 0; i < mcb->num_callbacks; i++) {
4232 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01004233 if (mcb->callbacks[i].free_qiov) {
4234 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
4235 }
Anthony Liguori7267c092011-08-20 22:09:37 -05004236 g_free(mcb->callbacks[i].free_qiov);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004237 }
4238}
4239
4240static void multiwrite_cb(void *opaque, int ret)
4241{
4242 MultiwriteCB *mcb = opaque;
4243
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004244 trace_multiwrite_cb(mcb, ret);
4245
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02004246 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02004247 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004248 }
4249
4250 mcb->num_requests--;
4251 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02004252 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05004253 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004254 }
4255}
4256
4257static int multiwrite_req_compare(const void *a, const void *b)
4258{
Christoph Hellwig77be4362010-05-19 20:53:10 +02004259 const BlockRequest *req1 = a, *req2 = b;
4260
4261 /*
4262 * Note that we can't simply subtract req2->sector from req1->sector
4263 * here as that could overflow the return value.
4264 */
4265 if (req1->sector > req2->sector) {
4266 return 1;
4267 } else if (req1->sector < req2->sector) {
4268 return -1;
4269 } else {
4270 return 0;
4271 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02004272}
4273
4274/*
4275 * Takes a bunch of requests and tries to merge them. Returns the number of
4276 * requests that remain after merging.
4277 */
4278static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
4279 int num_reqs, MultiwriteCB *mcb)
4280{
4281 int i, outidx;
4282
4283 // Sort requests by start sector
4284 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
4285
4286 // Check if adjacent requests touch the same clusters. If so, combine them,
4287 // filling up gaps with zero sectors.
4288 outidx = 0;
4289 for (i = 1; i < num_reqs; i++) {
4290 int merge = 0;
4291 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
4292
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004293 // Handle exactly sequential writes and overlapping writes.
Kevin Wolf40b4f532009-09-09 17:53:37 +02004294 if (reqs[i].sector <= oldreq_last) {
4295 merge = 1;
4296 }
4297
Christoph Hellwige2a305f2010-01-26 14:49:08 +01004298 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
4299 merge = 0;
4300 }
4301
Kevin Wolf40b4f532009-09-09 17:53:37 +02004302 if (merge) {
4303 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05004304 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004305 qemu_iovec_init(qiov,
4306 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
4307
4308 // Add the first request to the merged one. If the requests are
4309 // overlapping, drop the last sectors of the first request.
4310 size = (reqs[i].sector - reqs[outidx].sector) << 9;
Michael Tokarev1b093c42012-03-12 21:28:06 +04004311 qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004312
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004313 // We should need to add any zeros between the two requests
4314 assert (reqs[i].sector <= oldreq_last);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004315
4316 // Add the second request
Michael Tokarev1b093c42012-03-12 21:28:06 +04004317 qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004318
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02004319 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004320 reqs[outidx].qiov = qiov;
4321
4322 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
4323 } else {
4324 outidx++;
4325 reqs[outidx].sector = reqs[i].sector;
4326 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
4327 reqs[outidx].qiov = reqs[i].qiov;
4328 }
4329 }
4330
4331 return outidx + 1;
4332}
4333
4334/*
4335 * Submit multiple AIO write requests at once.
4336 *
4337 * On success, the function returns 0 and all requests in the reqs array have
4338 * been submitted. In error case this function returns -1, and any of the
4339 * requests may or may not be submitted yet. In particular, this means that the
4340 * callback will be called for some of the requests, for others it won't. The
4341 * caller must check the error field of the BlockRequest to wait for the right
4342 * callbacks (if error != 0, no callback will be called).
4343 *
4344 * The implementation may modify the contents of the reqs array, e.g. to merge
4345 * requests. However, the fields opaque and error are left unmodified as they
4346 * are used to signal failure for a single request to the caller.
4347 */
4348int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
4349{
Kevin Wolf40b4f532009-09-09 17:53:37 +02004350 MultiwriteCB *mcb;
4351 int i;
4352
Ryan Harper301db7c2011-03-07 10:01:04 -06004353 /* don't submit writes if we don't have a medium */
4354 if (bs->drv == NULL) {
4355 for (i = 0; i < num_reqs; i++) {
4356 reqs[i].error = -ENOMEDIUM;
4357 }
4358 return -1;
4359 }
4360
Kevin Wolf40b4f532009-09-09 17:53:37 +02004361 if (num_reqs == 0) {
4362 return 0;
4363 }
4364
4365 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05004366 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004367 mcb->num_requests = 0;
4368 mcb->num_callbacks = num_reqs;
4369
4370 for (i = 0; i < num_reqs; i++) {
4371 mcb->callbacks[i].cb = reqs[i].cb;
4372 mcb->callbacks[i].opaque = reqs[i].opaque;
4373 }
4374
4375 // Check for mergable requests
4376 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
4377
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004378 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
4379
Paolo Bonzinidf9309f2011-11-14 17:50:50 +01004380 /* Run the aio requests. */
4381 mcb->num_requests = num_reqs;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004382 for (i = 0; i < num_reqs; i++) {
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004383 bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
4384 reqs[i].nb_sectors, reqs[i].flags,
4385 multiwrite_cb, mcb,
4386 true);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004387 }
4388
4389 return 0;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004390}
4391
bellard83f64092006-08-01 16:21:11 +00004392void bdrv_aio_cancel(BlockDriverAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00004393{
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004394 acb->aiocb_info->cancel(acb);
bellard83f64092006-08-01 16:21:11 +00004395}
4396
4397/**************************************************************/
4398/* async block device emulation */
4399
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004400typedef struct BlockDriverAIOCBSync {
4401 BlockDriverAIOCB common;
4402 QEMUBH *bh;
4403 int ret;
4404 /* vector translation state */
4405 QEMUIOVector *qiov;
4406 uint8_t *bounce;
4407 int is_write;
4408} BlockDriverAIOCBSync;
4409
4410static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
4411{
Kevin Wolfb666d232010-05-05 11:44:39 +02004412 BlockDriverAIOCBSync *acb =
4413 container_of(blockacb, BlockDriverAIOCBSync, common);
Dor Laor6a7ad292009-06-01 12:07:23 +03004414 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03004415 acb->bh = NULL;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004416 qemu_aio_release(acb);
4417}
4418
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004419static const AIOCBInfo bdrv_em_aiocb_info = {
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004420 .aiocb_size = sizeof(BlockDriverAIOCBSync),
4421 .cancel = bdrv_aio_cancel_em,
4422};
4423
bellard83f64092006-08-01 16:21:11 +00004424static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00004425{
pbrookce1a14d2006-08-07 02:38:06 +00004426 BlockDriverAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00004427
aliguorif141eaf2009-04-07 18:43:24 +00004428 if (!acb->is_write)
Michael Tokarev03396142012-06-07 20:17:55 +04004429 qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
aliguoriceb42de2009-04-07 18:43:28 +00004430 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00004431 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03004432 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03004433 acb->bh = NULL;
pbrookce1a14d2006-08-07 02:38:06 +00004434 qemu_aio_release(acb);
bellardbeac80c2006-06-26 20:08:57 +00004435}
bellardbeac80c2006-06-26 20:08:57 +00004436
aliguorif141eaf2009-04-07 18:43:24 +00004437static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
4438 int64_t sector_num,
4439 QEMUIOVector *qiov,
4440 int nb_sectors,
4441 BlockDriverCompletionFunc *cb,
4442 void *opaque,
4443 int is_write)
4444
bellardea2384d2004-08-01 21:59:26 +00004445{
pbrookce1a14d2006-08-07 02:38:06 +00004446 BlockDriverAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00004447
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004448 acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00004449 acb->is_write = is_write;
4450 acb->qiov = qiov;
aliguorie268ca52009-04-22 20:20:00 +00004451 acb->bounce = qemu_blockalign(bs, qiov->size);
Paolo Bonzini3f3aace2011-11-14 17:50:54 +01004452 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00004453
4454 if (is_write) {
Michael Tokarevd5e6b162012-06-07 20:21:06 +04004455 qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004456 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004457 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004458 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004459 }
4460
pbrookce1a14d2006-08-07 02:38:06 +00004461 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00004462
pbrookce1a14d2006-08-07 02:38:06 +00004463 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00004464}
4465
aliguorif141eaf2009-04-07 18:43:24 +00004466static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
4467 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +00004468 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004469{
aliguorif141eaf2009-04-07 18:43:24 +00004470 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00004471}
4472
aliguorif141eaf2009-04-07 18:43:24 +00004473static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
4474 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
4475 BlockDriverCompletionFunc *cb, void *opaque)
4476{
4477 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
4478}
4479
Kevin Wolf68485422011-06-30 10:05:46 +02004480
4481typedef struct BlockDriverAIOCBCoroutine {
4482 BlockDriverAIOCB common;
4483 BlockRequest req;
4484 bool is_write;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004485 bool *done;
Kevin Wolf68485422011-06-30 10:05:46 +02004486 QEMUBH* bh;
4487} BlockDriverAIOCBCoroutine;
4488
4489static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
4490{
Kevin Wolfd318aea2012-11-13 16:35:08 +01004491 BlockDriverAIOCBCoroutine *acb =
4492 container_of(blockacb, BlockDriverAIOCBCoroutine, common);
4493 bool done = false;
4494
4495 acb->done = &done;
4496 while (!done) {
4497 qemu_aio_wait();
4498 }
Kevin Wolf68485422011-06-30 10:05:46 +02004499}
4500
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004501static const AIOCBInfo bdrv_em_co_aiocb_info = {
Kevin Wolf68485422011-06-30 10:05:46 +02004502 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
4503 .cancel = bdrv_aio_co_cancel_em,
4504};
4505
Paolo Bonzini35246a62011-10-14 10:41:29 +02004506static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02004507{
4508 BlockDriverAIOCBCoroutine *acb = opaque;
4509
4510 acb->common.cb(acb->common.opaque, acb->req.error);
Kevin Wolfd318aea2012-11-13 16:35:08 +01004511
4512 if (acb->done) {
4513 *acb->done = true;
4514 }
4515
Kevin Wolf68485422011-06-30 10:05:46 +02004516 qemu_bh_delete(acb->bh);
4517 qemu_aio_release(acb);
4518}
4519
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004520/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4521static void coroutine_fn bdrv_co_do_rw(void *opaque)
4522{
4523 BlockDriverAIOCBCoroutine *acb = opaque;
4524 BlockDriverState *bs = acb->common.bs;
4525
4526 if (!acb->is_write) {
4527 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004528 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004529 } else {
4530 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004531 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004532 }
4533
Paolo Bonzini35246a62011-10-14 10:41:29 +02004534 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004535 qemu_bh_schedule(acb->bh);
4536}
4537
Kevin Wolf68485422011-06-30 10:05:46 +02004538static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
4539 int64_t sector_num,
4540 QEMUIOVector *qiov,
4541 int nb_sectors,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004542 BdrvRequestFlags flags,
Kevin Wolf68485422011-06-30 10:05:46 +02004543 BlockDriverCompletionFunc *cb,
4544 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004545 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02004546{
4547 Coroutine *co;
4548 BlockDriverAIOCBCoroutine *acb;
4549
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004550 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Kevin Wolf68485422011-06-30 10:05:46 +02004551 acb->req.sector = sector_num;
4552 acb->req.nb_sectors = nb_sectors;
4553 acb->req.qiov = qiov;
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004554 acb->req.flags = flags;
Kevin Wolf68485422011-06-30 10:05:46 +02004555 acb->is_write = is_write;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004556 acb->done = NULL;
Kevin Wolf68485422011-06-30 10:05:46 +02004557
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004558 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02004559 qemu_coroutine_enter(co, acb);
4560
4561 return &acb->common;
4562}
4563
Paolo Bonzini07f07612011-10-17 12:32:12 +02004564static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004565{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004566 BlockDriverAIOCBCoroutine *acb = opaque;
4567 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004568
Paolo Bonzini07f07612011-10-17 12:32:12 +02004569 acb->req.error = bdrv_co_flush(bs);
4570 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004571 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004572}
4573
Paolo Bonzini07f07612011-10-17 12:32:12 +02004574BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Alexander Graf016f5cf2010-05-26 17:51:49 +02004575 BlockDriverCompletionFunc *cb, void *opaque)
4576{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004577 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004578
Paolo Bonzini07f07612011-10-17 12:32:12 +02004579 Coroutine *co;
4580 BlockDriverAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02004581
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004582 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Kevin Wolfd318aea2012-11-13 16:35:08 +01004583 acb->done = NULL;
4584
Paolo Bonzini07f07612011-10-17 12:32:12 +02004585 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
4586 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004587
Alexander Graf016f5cf2010-05-26 17:51:49 +02004588 return &acb->common;
4589}
4590
Paolo Bonzini4265d622011-10-17 12:32:14 +02004591static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
4592{
4593 BlockDriverAIOCBCoroutine *acb = opaque;
4594 BlockDriverState *bs = acb->common.bs;
4595
4596 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
4597 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
4598 qemu_bh_schedule(acb->bh);
4599}
4600
4601BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
4602 int64_t sector_num, int nb_sectors,
4603 BlockDriverCompletionFunc *cb, void *opaque)
4604{
4605 Coroutine *co;
4606 BlockDriverAIOCBCoroutine *acb;
4607
4608 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
4609
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004610 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Paolo Bonzini4265d622011-10-17 12:32:14 +02004611 acb->req.sector = sector_num;
4612 acb->req.nb_sectors = nb_sectors;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004613 acb->done = NULL;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004614 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
4615 qemu_coroutine_enter(co, acb);
4616
4617 return &acb->common;
4618}
4619
bellardea2384d2004-08-01 21:59:26 +00004620void bdrv_init(void)
4621{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05004622 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00004623}
pbrookce1a14d2006-08-07 02:38:06 +00004624
Markus Armbrustereb852012009-10-27 18:41:44 +01004625void bdrv_init_with_whitelist(void)
4626{
4627 use_bdrv_whitelist = 1;
4628 bdrv_init();
4629}
4630
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004631void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004632 BlockDriverCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00004633{
pbrookce1a14d2006-08-07 02:38:06 +00004634 BlockDriverAIOCB *acb;
4635
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004636 acb = g_slice_alloc(aiocb_info->aiocb_size);
4637 acb->aiocb_info = aiocb_info;
pbrookce1a14d2006-08-07 02:38:06 +00004638 acb->bs = bs;
4639 acb->cb = cb;
4640 acb->opaque = opaque;
4641 return acb;
4642}
4643
4644void qemu_aio_release(void *p)
4645{
Stefan Hajnoczid37c9752012-10-31 16:34:36 +01004646 BlockDriverAIOCB *acb = p;
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004647 g_slice_free1(acb->aiocb_info->aiocb_size, acb);
pbrookce1a14d2006-08-07 02:38:06 +00004648}
bellard19cb3732006-08-19 11:45:59 +00004649
4650/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004651/* Coroutine block device emulation */
4652
4653typedef struct CoroutineIOCompletion {
4654 Coroutine *coroutine;
4655 int ret;
4656} CoroutineIOCompletion;
4657
4658static void bdrv_co_io_em_complete(void *opaque, int ret)
4659{
4660 CoroutineIOCompletion *co = opaque;
4661
4662 co->ret = ret;
4663 qemu_coroutine_enter(co->coroutine, NULL);
4664}
4665
4666static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
4667 int nb_sectors, QEMUIOVector *iov,
4668 bool is_write)
4669{
4670 CoroutineIOCompletion co = {
4671 .coroutine = qemu_coroutine_self(),
4672 };
4673 BlockDriverAIOCB *acb;
4674
4675 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01004676 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
4677 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004678 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01004679 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
4680 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004681 }
4682
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01004683 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004684 if (!acb) {
4685 return -EIO;
4686 }
4687 qemu_coroutine_yield();
4688
4689 return co.ret;
4690}
4691
4692static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
4693 int64_t sector_num, int nb_sectors,
4694 QEMUIOVector *iov)
4695{
4696 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
4697}
4698
4699static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
4700 int64_t sector_num, int nb_sectors,
4701 QEMUIOVector *iov)
4702{
4703 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
4704}
4705
Paolo Bonzini07f07612011-10-17 12:32:12 +02004706static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004707{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004708 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004709
Paolo Bonzini07f07612011-10-17 12:32:12 +02004710 rwco->ret = bdrv_co_flush(rwco->bs);
4711}
4712
4713int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
4714{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004715 int ret;
4716
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004717 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02004718 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004719 }
4720
Kevin Wolfca716362011-11-10 18:13:59 +01004721 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004722 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004723 if (bs->drv->bdrv_co_flush_to_os) {
4724 ret = bs->drv->bdrv_co_flush_to_os(bs);
4725 if (ret < 0) {
4726 return ret;
4727 }
4728 }
4729
Kevin Wolfca716362011-11-10 18:13:59 +01004730 /* But don't actually force it to the disk with cache=unsafe */
4731 if (bs->open_flags & BDRV_O_NO_FLUSH) {
Kevin Wolfd4c82322012-08-15 12:52:45 +02004732 goto flush_parent;
Kevin Wolfca716362011-11-10 18:13:59 +01004733 }
4734
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004735 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004736 if (bs->drv->bdrv_co_flush_to_disk) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004737 ret = bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02004738 } else if (bs->drv->bdrv_aio_flush) {
4739 BlockDriverAIOCB *acb;
4740 CoroutineIOCompletion co = {
4741 .coroutine = qemu_coroutine_self(),
4742 };
4743
4744 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
4745 if (acb == NULL) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004746 ret = -EIO;
Paolo Bonzini07f07612011-10-17 12:32:12 +02004747 } else {
4748 qemu_coroutine_yield();
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004749 ret = co.ret;
Paolo Bonzini07f07612011-10-17 12:32:12 +02004750 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02004751 } else {
4752 /*
4753 * Some block drivers always operate in either writethrough or unsafe
4754 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
4755 * know how the server works (because the behaviour is hardcoded or
4756 * depends on server-side configuration), so we can't ensure that
4757 * everything is safe on disk. Returning an error doesn't work because
4758 * that would break guests even if the server operates in writethrough
4759 * mode.
4760 *
4761 * Let's hope the user knows what he's doing.
4762 */
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004763 ret = 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004764 }
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004765 if (ret < 0) {
4766 return ret;
4767 }
4768
4769 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
4770 * in the case of cache=unsafe, so there are no useless flushes.
4771 */
Kevin Wolfd4c82322012-08-15 12:52:45 +02004772flush_parent:
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004773 return bdrv_co_flush(bs->file);
Paolo Bonzini07f07612011-10-17 12:32:12 +02004774}
4775
Anthony Liguori0f154232011-11-14 15:09:45 -06004776void bdrv_invalidate_cache(BlockDriverState *bs)
4777{
4778 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
4779 bs->drv->bdrv_invalidate_cache(bs);
4780 }
4781}
4782
4783void bdrv_invalidate_cache_all(void)
4784{
4785 BlockDriverState *bs;
4786
Benoît Canetdc364f42014-01-23 21:31:32 +01004787 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Anthony Liguori0f154232011-11-14 15:09:45 -06004788 bdrv_invalidate_cache(bs);
4789 }
4790}
4791
Benoît Canet07789262012-03-23 08:36:49 +01004792void bdrv_clear_incoming_migration_all(void)
4793{
4794 BlockDriverState *bs;
4795
Benoît Canetdc364f42014-01-23 21:31:32 +01004796 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Benoît Canet07789262012-03-23 08:36:49 +01004797 bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
4798 }
4799}
4800
Paolo Bonzini07f07612011-10-17 12:32:12 +02004801int bdrv_flush(BlockDriverState *bs)
4802{
4803 Coroutine *co;
4804 RwCo rwco = {
4805 .bs = bs,
4806 .ret = NOT_DONE,
4807 };
4808
4809 if (qemu_in_coroutine()) {
4810 /* Fast-path if already in coroutine context */
4811 bdrv_flush_co_entry(&rwco);
4812 } else {
4813 co = qemu_coroutine_create(bdrv_flush_co_entry);
4814 qemu_coroutine_enter(co, &rwco);
4815 while (rwco.ret == NOT_DONE) {
4816 qemu_aio_wait();
4817 }
4818 }
4819
4820 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004821}
4822
Kevin Wolf775aa8b2013-12-05 12:09:38 +01004823typedef struct DiscardCo {
4824 BlockDriverState *bs;
4825 int64_t sector_num;
4826 int nb_sectors;
4827 int ret;
4828} DiscardCo;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004829static void coroutine_fn bdrv_discard_co_entry(void *opaque)
4830{
Kevin Wolf775aa8b2013-12-05 12:09:38 +01004831 DiscardCo *rwco = opaque;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004832
4833 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
4834}
4835
Peter Lieven6f14da52013-10-24 12:06:59 +02004836/* if no limit is specified in the BlockLimits use a default
4837 * of 32768 512-byte sectors (16 MiB) per request.
4838 */
4839#define MAX_DISCARD_DEFAULT 32768
4840
Paolo Bonzini4265d622011-10-17 12:32:14 +02004841int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
4842 int nb_sectors)
4843{
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01004844 int max_discard;
4845
Paolo Bonzini4265d622011-10-17 12:32:14 +02004846 if (!bs->drv) {
4847 return -ENOMEDIUM;
4848 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
4849 return -EIO;
4850 } else if (bs->read_only) {
4851 return -EROFS;
Paolo Bonzinidf702c92013-01-14 16:26:58 +01004852 }
4853
Fam Zhenge4654d22013-11-13 18:29:43 +08004854 bdrv_reset_dirty(bs, sector_num, nb_sectors);
Paolo Bonzinidf702c92013-01-14 16:26:58 +01004855
Paolo Bonzini9e8f1832013-02-08 14:06:11 +01004856 /* Do nothing if disabled. */
4857 if (!(bs->open_flags & BDRV_O_UNMAP)) {
4858 return 0;
4859 }
4860
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01004861 if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
Paolo Bonzini4265d622011-10-17 12:32:14 +02004862 return 0;
4863 }
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01004864
4865 max_discard = bs->bl.max_discard ? bs->bl.max_discard : MAX_DISCARD_DEFAULT;
4866 while (nb_sectors > 0) {
4867 int ret;
4868 int num = nb_sectors;
4869
4870 /* align request */
4871 if (bs->bl.discard_alignment &&
4872 num >= bs->bl.discard_alignment &&
4873 sector_num % bs->bl.discard_alignment) {
4874 if (num > bs->bl.discard_alignment) {
4875 num = bs->bl.discard_alignment;
4876 }
4877 num -= sector_num % bs->bl.discard_alignment;
4878 }
4879
4880 /* limit request size */
4881 if (num > max_discard) {
4882 num = max_discard;
4883 }
4884
4885 if (bs->drv->bdrv_co_discard) {
4886 ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
4887 } else {
4888 BlockDriverAIOCB *acb;
4889 CoroutineIOCompletion co = {
4890 .coroutine = qemu_coroutine_self(),
4891 };
4892
4893 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
4894 bdrv_co_io_em_complete, &co);
4895 if (acb == NULL) {
4896 return -EIO;
4897 } else {
4898 qemu_coroutine_yield();
4899 ret = co.ret;
4900 }
4901 }
Paolo Bonzini7ce21012013-11-22 13:39:47 +01004902 if (ret && ret != -ENOTSUP) {
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01004903 return ret;
4904 }
4905
4906 sector_num += num;
4907 nb_sectors -= num;
4908 }
4909 return 0;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004910}
4911
4912int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
4913{
4914 Coroutine *co;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01004915 DiscardCo rwco = {
Paolo Bonzini4265d622011-10-17 12:32:14 +02004916 .bs = bs,
4917 .sector_num = sector_num,
4918 .nb_sectors = nb_sectors,
4919 .ret = NOT_DONE,
4920 };
4921
4922 if (qemu_in_coroutine()) {
4923 /* Fast-path if already in coroutine context */
4924 bdrv_discard_co_entry(&rwco);
4925 } else {
4926 co = qemu_coroutine_create(bdrv_discard_co_entry);
4927 qemu_coroutine_enter(co, &rwco);
4928 while (rwco.ret == NOT_DONE) {
4929 qemu_aio_wait();
4930 }
4931 }
4932
4933 return rwco.ret;
4934}
4935
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004936/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00004937/* removable device support */
4938
4939/**
4940 * Return TRUE if the media is present
4941 */
4942int bdrv_is_inserted(BlockDriverState *bs)
4943{
4944 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02004945
bellard19cb3732006-08-19 11:45:59 +00004946 if (!drv)
4947 return 0;
4948 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02004949 return 1;
4950 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00004951}
4952
4953/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02004954 * Return whether the media changed since the last call to this
4955 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00004956 */
4957int bdrv_media_changed(BlockDriverState *bs)
4958{
4959 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00004960
Markus Armbruster8e49ca42011-08-03 15:08:08 +02004961 if (drv && drv->bdrv_media_changed) {
4962 return drv->bdrv_media_changed(bs);
4963 }
4964 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00004965}
4966
4967/**
4968 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
4969 */
Luiz Capitulinof36f3942012-02-03 16:24:53 -02004970void bdrv_eject(BlockDriverState *bs, bool eject_flag)
bellard19cb3732006-08-19 11:45:59 +00004971{
4972 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00004973
Markus Armbruster822e1cd2011-07-20 18:23:42 +02004974 if (drv && drv->bdrv_eject) {
4975 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00004976 }
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02004977
4978 if (bs->device_name[0] != '\0') {
4979 bdrv_emit_qmp_eject_event(bs, eject_flag);
4980 }
bellard19cb3732006-08-19 11:45:59 +00004981}
4982
bellard19cb3732006-08-19 11:45:59 +00004983/**
4984 * Lock or unlock the media (if it is locked, the user won't be able
4985 * to eject it manually).
4986 */
Markus Armbruster025e8492011-09-06 18:58:47 +02004987void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00004988{
4989 BlockDriver *drv = bs->drv;
4990
Markus Armbruster025e8492011-09-06 18:58:47 +02004991 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01004992
Markus Armbruster025e8492011-09-06 18:58:47 +02004993 if (drv && drv->bdrv_lock_medium) {
4994 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00004995 }
4996}
ths985a03b2007-12-24 16:10:43 +00004997
4998/* needed for generic scsi interface */
4999
5000int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
5001{
5002 BlockDriver *drv = bs->drv;
5003
5004 if (drv && drv->bdrv_ioctl)
5005 return drv->bdrv_ioctl(bs, req, buf);
5006 return -ENOTSUP;
5007}
aliguori7d780662009-03-12 19:57:08 +00005008
aliguori221f7152009-03-28 17:28:41 +00005009BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
5010 unsigned long int req, void *buf,
5011 BlockDriverCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00005012{
aliguori221f7152009-03-28 17:28:41 +00005013 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00005014
aliguori221f7152009-03-28 17:28:41 +00005015 if (drv && drv->bdrv_aio_ioctl)
5016 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
5017 return NULL;
aliguori7d780662009-03-12 19:57:08 +00005018}
aliguorie268ca52009-04-22 20:20:00 +00005019
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005020void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005021{
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005022 bs->guest_block_size = align;
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005023}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005024
aliguorie268ca52009-04-22 20:20:00 +00005025void *qemu_blockalign(BlockDriverState *bs, size_t size)
5026{
Kevin Wolf339064d2013-11-28 10:23:32 +01005027 return qemu_memalign(bdrv_opt_mem_align(bs), size);
aliguorie268ca52009-04-22 20:20:00 +00005028}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005029
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005030/*
5031 * Check if all memory in this vector is sector aligned.
5032 */
5033bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
5034{
5035 int i;
Kevin Wolf339064d2013-11-28 10:23:32 +01005036 size_t alignment = bdrv_opt_mem_align(bs);
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005037
5038 for (i = 0; i < qiov->niov; i++) {
Kevin Wolf339064d2013-11-28 10:23:32 +01005039 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005040 return false;
5041 }
Kevin Wolf339064d2013-11-28 10:23:32 +01005042 if (qiov->iov[i].iov_len % alignment) {
Kevin Wolf1ff735b2013-12-05 13:01:46 +01005043 return false;
5044 }
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005045 }
5046
5047 return true;
5048}
5049
Fam Zhenge4654d22013-11-13 18:29:43 +08005050BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005051{
5052 int64_t bitmap_size;
Fam Zhenge4654d22013-11-13 18:29:43 +08005053 BdrvDirtyBitmap *bitmap;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005054
Paolo Bonzini50717e92013-01-21 17:09:45 +01005055 assert((granularity & (granularity - 1)) == 0);
5056
Fam Zhenge4654d22013-11-13 18:29:43 +08005057 granularity >>= BDRV_SECTOR_BITS;
5058 assert(granularity);
5059 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
5060 bitmap = g_malloc0(sizeof(BdrvDirtyBitmap));
5061 bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
5062 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
5063 return bitmap;
5064}
5065
5066void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5067{
5068 BdrvDirtyBitmap *bm, *next;
5069 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
5070 if (bm == bitmap) {
5071 QLIST_REMOVE(bitmap, list);
5072 hbitmap_free(bitmap->bitmap);
5073 g_free(bitmap);
5074 return;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005075 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005076 }
5077}
5078
Fam Zheng21b56832013-11-13 18:29:44 +08005079BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
5080{
5081 BdrvDirtyBitmap *bm;
5082 BlockDirtyInfoList *list = NULL;
5083 BlockDirtyInfoList **plist = &list;
5084
5085 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
5086 BlockDirtyInfo *info = g_malloc0(sizeof(BlockDirtyInfo));
5087 BlockDirtyInfoList *entry = g_malloc0(sizeof(BlockDirtyInfoList));
5088 info->count = bdrv_get_dirty_count(bs, bm);
5089 info->granularity =
5090 ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bm->bitmap));
5091 entry->value = info;
5092 *plist = entry;
5093 plist = &entry->next;
5094 }
5095
5096 return list;
5097}
5098
Fam Zhenge4654d22013-11-13 18:29:43 +08005099int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005100{
Fam Zhenge4654d22013-11-13 18:29:43 +08005101 if (bitmap) {
5102 return hbitmap_get(bitmap->bitmap, sector);
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005103 } else {
5104 return 0;
5105 }
5106}
5107
Fam Zhenge4654d22013-11-13 18:29:43 +08005108void bdrv_dirty_iter_init(BlockDriverState *bs,
5109 BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
Paolo Bonzini1755da12012-10-18 16:49:18 +02005110{
Fam Zhenge4654d22013-11-13 18:29:43 +08005111 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
Paolo Bonzini1755da12012-10-18 16:49:18 +02005112}
5113
5114void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
5115 int nr_sectors)
5116{
Fam Zhenge4654d22013-11-13 18:29:43 +08005117 BdrvDirtyBitmap *bitmap;
5118 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5119 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
Paolo Bonzini8f0720e2013-01-21 17:09:41 +01005120 }
Liran Schouraaa0eb72010-01-26 10:31:48 +02005121}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005122
Fam Zhenge4654d22013-11-13 18:29:43 +08005123void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors)
5124{
5125 BdrvDirtyBitmap *bitmap;
5126 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5127 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5128 }
5129}
5130
5131int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5132{
5133 return hbitmap_count(bitmap->bitmap);
5134}
5135
Fam Zheng9fcb0252013-08-23 09:14:46 +08005136/* Get a reference to bs */
5137void bdrv_ref(BlockDriverState *bs)
5138{
5139 bs->refcnt++;
5140}
5141
5142/* Release a previously grabbed reference to bs.
5143 * If after releasing, reference count is zero, the BlockDriverState is
5144 * deleted. */
5145void bdrv_unref(BlockDriverState *bs)
5146{
5147 assert(bs->refcnt > 0);
5148 if (--bs->refcnt == 0) {
5149 bdrv_delete(bs);
5150 }
5151}
5152
Marcelo Tosattidb593f22011-01-26 12:12:34 -02005153void bdrv_set_in_use(BlockDriverState *bs, int in_use)
5154{
5155 assert(bs->in_use != in_use);
5156 bs->in_use = in_use;
5157}
5158
5159int bdrv_in_use(BlockDriverState *bs)
5160{
5161 return bs->in_use;
5162}
5163
Luiz Capitulino28a72822011-09-26 17:43:50 -03005164void bdrv_iostatus_enable(BlockDriverState *bs)
5165{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005166 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005167 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005168}
5169
5170/* The I/O status is only enabled if the drive explicitly
5171 * enables it _and_ the VM is configured to stop on errors */
5172bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
5173{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005174 return (bs->iostatus_enabled &&
Paolo Bonzini92aa5c62012-09-28 17:22:55 +02005175 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
5176 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
5177 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
Luiz Capitulino28a72822011-09-26 17:43:50 -03005178}
5179
5180void bdrv_iostatus_disable(BlockDriverState *bs)
5181{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005182 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005183}
5184
5185void bdrv_iostatus_reset(BlockDriverState *bs)
5186{
5187 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005188 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Paolo Bonzini3bd293c2012-10-18 16:49:27 +02005189 if (bs->job) {
5190 block_job_iostatus_reset(bs->job);
5191 }
Luiz Capitulino28a72822011-09-26 17:43:50 -03005192 }
5193}
5194
Luiz Capitulino28a72822011-09-26 17:43:50 -03005195void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
5196{
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02005197 assert(bdrv_iostatus_is_enabled(bs));
5198 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005199 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
5200 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005201 }
5202}
5203
Christoph Hellwiga597e792011-08-25 08:26:01 +02005204void
5205bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
5206 enum BlockAcctType type)
5207{
5208 assert(type < BDRV_MAX_IOTYPE);
5209
5210 cookie->bytes = bytes;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02005211 cookie->start_time_ns = get_clock();
Christoph Hellwiga597e792011-08-25 08:26:01 +02005212 cookie->type = type;
5213}
5214
5215void
5216bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
5217{
5218 assert(cookie->type < BDRV_MAX_IOTYPE);
5219
5220 bs->nr_bytes[cookie->type] += cookie->bytes;
5221 bs->nr_ops[cookie->type]++;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02005222 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
Christoph Hellwiga597e792011-08-25 08:26:01 +02005223}
5224
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005225void bdrv_img_create(const char *filename, const char *fmt,
5226 const char *base_filename, const char *base_fmt,
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005227 char *options, uint64_t img_size, int flags,
5228 Error **errp, bool quiet)
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005229{
5230 QEMUOptionParameter *param = NULL, *create_options = NULL;
Kevin Wolfd2208942011-06-01 14:03:31 +02005231 QEMUOptionParameter *backing_fmt, *backing_file, *size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005232 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005233 BlockDriver *backing_drv = NULL;
Max Reitzcc84d902013-09-06 17:14:26 +02005234 Error *local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005235 int ret = 0;
5236
5237 /* Find driver and parse its options */
5238 drv = bdrv_find_format(fmt);
5239 if (!drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005240 error_setg(errp, "Unknown file format '%s'", fmt);
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005241 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005242 }
5243
Kevin Wolf98289622013-07-10 15:47:39 +02005244 proto_drv = bdrv_find_protocol(filename, true);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005245 if (!proto_drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005246 error_setg(errp, "Unknown protocol '%s'", filename);
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005247 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005248 }
5249
5250 create_options = append_option_parameters(create_options,
5251 drv->create_options);
5252 create_options = append_option_parameters(create_options,
5253 proto_drv->create_options);
5254
5255 /* Create parameter list with default values */
5256 param = parse_option_parameters("", create_options, param);
5257
5258 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
5259
5260 /* Parse -o options */
5261 if (options) {
5262 param = parse_option_parameters(options, create_options, param);
5263 if (param == NULL) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005264 error_setg(errp, "Invalid options for file format '%s'.", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005265 goto out;
5266 }
5267 }
5268
5269 if (base_filename) {
5270 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
5271 base_filename)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005272 error_setg(errp, "Backing file not supported for file format '%s'",
5273 fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005274 goto out;
5275 }
5276 }
5277
5278 if (base_fmt) {
5279 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005280 error_setg(errp, "Backing file format not supported for file "
5281 "format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005282 goto out;
5283 }
5284 }
5285
Jes Sorensen792da932010-12-16 13:52:17 +01005286 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
5287 if (backing_file && backing_file->value.s) {
5288 if (!strcmp(filename, backing_file->value.s)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005289 error_setg(errp, "Error: Trying to create an image with the "
5290 "same filename as the backing file");
Jes Sorensen792da932010-12-16 13:52:17 +01005291 goto out;
5292 }
5293 }
5294
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005295 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
5296 if (backing_fmt && backing_fmt->value.s) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005297 backing_drv = bdrv_find_format(backing_fmt->value.s);
5298 if (!backing_drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005299 error_setg(errp, "Unknown backing file format '%s'",
5300 backing_fmt->value.s);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005301 goto out;
5302 }
5303 }
5304
5305 // The size for the image must always be specified, with one exception:
5306 // If we are using a backing file, we can obtain the size from there
Kevin Wolfd2208942011-06-01 14:03:31 +02005307 size = get_option_parameter(param, BLOCK_OPT_SIZE);
5308 if (size && size->value.n == -1) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005309 if (backing_file && backing_file->value.s) {
Max Reitz66f6b812013-12-03 14:57:52 +01005310 BlockDriverState *bs;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005311 uint64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005312 char buf[32];
Paolo Bonzini63090da2012-04-12 14:01:03 +02005313 int back_flags;
5314
5315 /* backing files always opened read-only */
5316 back_flags =
5317 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005318
Max Reitzf67503e2014-02-18 18:33:05 +01005319 bs = NULL;
Max Reitzddf56362014-02-18 18:33:06 +01005320 ret = bdrv_open(&bs, backing_file->value.s, NULL, NULL, back_flags,
Max Reitzcc84d902013-09-06 17:14:26 +02005321 backing_drv, &local_err);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005322 if (ret < 0) {
Max Reitzcc84d902013-09-06 17:14:26 +02005323 error_setg_errno(errp, -ret, "Could not open '%s': %s",
5324 backing_file->value.s,
5325 error_get_pretty(local_err));
5326 error_free(local_err);
5327 local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005328 goto out;
5329 }
5330 bdrv_get_geometry(bs, &size);
5331 size *= 512;
5332
5333 snprintf(buf, sizeof(buf), "%" PRId64, size);
5334 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
Max Reitz66f6b812013-12-03 14:57:52 +01005335
5336 bdrv_unref(bs);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005337 } else {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005338 error_setg(errp, "Image creation needs a size parameter");
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005339 goto out;
5340 }
5341 }
5342
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005343 if (!quiet) {
5344 printf("Formatting '%s', fmt=%s ", filename, fmt);
5345 print_option_parameters(param);
5346 puts("");
5347 }
Max Reitzcc84d902013-09-06 17:14:26 +02005348 ret = bdrv_create(drv, filename, param, &local_err);
5349 if (ret == -EFBIG) {
5350 /* This is generally a better message than whatever the driver would
5351 * deliver (especially because of the cluster_size_hint), since that
5352 * is most probably not much different from "image too large". */
5353 const char *cluster_size_hint = "";
5354 if (get_option_parameter(create_options, BLOCK_OPT_CLUSTER_SIZE)) {
5355 cluster_size_hint = " (try using a larger cluster size)";
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005356 }
Max Reitzcc84d902013-09-06 17:14:26 +02005357 error_setg(errp, "The image size is too large for file format '%s'"
5358 "%s", fmt, cluster_size_hint);
5359 error_free(local_err);
5360 local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005361 }
5362
5363out:
5364 free_option_parameters(create_options);
5365 free_option_parameters(param);
5366
Markus Armbruster84d18f02014-01-30 15:07:28 +01005367 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +02005368 error_propagate(errp, local_err);
5369 }
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005370}
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01005371
5372AioContext *bdrv_get_aio_context(BlockDriverState *bs)
5373{
5374 /* Currently BlockDriverState always uses the main loop AioContext */
5375 return qemu_get_aio_context();
5376}
Stefan Hajnoczid616b222013-06-24 17:13:10 +02005377
5378void bdrv_add_before_write_notifier(BlockDriverState *bs,
5379 NotifierWithReturn *notifier)
5380{
5381 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
5382}
Max Reitz6f176b42013-09-03 10:09:50 +02005383
5384int bdrv_amend_options(BlockDriverState *bs, QEMUOptionParameter *options)
5385{
5386 if (bs->drv->bdrv_amend_options == NULL) {
5387 return -ENOTSUP;
5388 }
5389 return bs->drv->bdrv_amend_options(bs, options);
5390}
Benoît Canetf6186f42013-10-02 14:33:48 +02005391
Benoît Canet212a5a82014-01-23 21:31:36 +01005392/* Used to recurse on single child block filters.
5393 * Single child block filter will store their child in bs->file.
5394 */
5395bool bdrv_generic_is_first_non_filter(BlockDriverState *bs,
5396 BlockDriverState *candidate)
Benoît Canetf6186f42013-10-02 14:33:48 +02005397{
Benoît Canet212a5a82014-01-23 21:31:36 +01005398 if (!bs->drv) {
5399 return false;
Benoît Canetf6186f42013-10-02 14:33:48 +02005400 }
5401
Benoît Canet212a5a82014-01-23 21:31:36 +01005402 if (!bs->drv->authorizations[BS_IS_A_FILTER]) {
5403 if (bs == candidate) {
5404 return true;
5405 } else {
5406 return false;
5407 }
Benoît Canetf6186f42013-10-02 14:33:48 +02005408 }
5409
Benoît Canet212a5a82014-01-23 21:31:36 +01005410 if (!bs->drv->authorizations[BS_FILTER_PASS_DOWN]) {
5411 return false;
5412 }
5413
5414 if (!bs->file) {
5415 return false;
5416 }
5417
5418 return bdrv_recurse_is_first_non_filter(bs->file, candidate);
Benoît Canetf6186f42013-10-02 14:33:48 +02005419}
5420
Benoît Canet212a5a82014-01-23 21:31:36 +01005421bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
5422 BlockDriverState *candidate)
Benoît Canetf6186f42013-10-02 14:33:48 +02005423{
Benoît Canet212a5a82014-01-23 21:31:36 +01005424 if (bs->drv && bs->drv->bdrv_recurse_is_first_non_filter) {
5425 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
5426 }
5427
5428 return bdrv_generic_is_first_non_filter(bs, candidate);
5429}
5430
5431/* This function checks if the candidate is the first non filter bs down it's
5432 * bs chain. Since we don't have pointers to parents it explore all bs chains
5433 * from the top. Some filters can choose not to pass down the recursion.
5434 */
5435bool bdrv_is_first_non_filter(BlockDriverState *candidate)
5436{
5437 BlockDriverState *bs;
5438
5439 /* walk down the bs forest recursively */
5440 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
5441 bool perm;
5442
Kevin Wolfe6dc8a12014-02-04 11:45:31 +01005443 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
Benoît Canet212a5a82014-01-23 21:31:36 +01005444
5445 /* candidate is the first non filter */
5446 if (perm) {
5447 return true;
5448 }
5449 }
5450
5451 return false;
Benoît Canetf6186f42013-10-02 14:33:48 +02005452}