blob: aa2e7af98e37747526efbba6ef1120294e6abc55 [file] [log] [blame]
Stefan Hajnoczi4f1043b2012-01-18 14:40:44 +00001/*
2 * Image streaming
3 *
4 * Copyright IBM, Corp. 2011
5 *
6 * Authors:
7 * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU LGPL, version 2 or later.
10 * See the COPYING.LIB file in the top-level directory.
11 *
12 */
13
Peter Maydell80c71a22016-01-18 18:01:42 +000014#include "qemu/osdep.h"
Stefan Hajnoczi4f1043b2012-01-18 14:40:44 +000015#include "trace.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010016#include "block/block_int.h"
John Snowc87621e2016-10-27 12:07:00 -040017#include "block/blockjob_int.h"
Markus Armbrusterda34e652016-03-14 09:01:28 +010018#include "qapi/error.h"
Markus Armbrustercc7a8ea2015-03-17 17:22:46 +010019#include "qapi/qmp/qerror.h"
Paolo Bonzini6ef228f2012-05-09 16:09:46 +020020#include "qemu/ratelimit.h"
Max Reitz373340b2015-10-19 17:53:22 +020021#include "sysemu/block-backend.h"
Stefan Hajnoczi4f1043b2012-01-18 14:40:44 +000022
23enum {
24 /*
Vladimir Sementsov-Ogievskiy99136602019-07-25 13:05:49 +030025 * Maximum chunk size to feed to copy-on-read. This should be
26 * large enough to process multiple clusters in a single call, so
27 * that populating contiguous regions of the image is efficient.
Stefan Hajnoczi4f1043b2012-01-18 14:40:44 +000028 */
Vladimir Sementsov-Ogievskiy99136602019-07-25 13:05:49 +030029 STREAM_CHUNK = 512 * 1024, /* in bytes */
Stefan Hajnoczi4f1043b2012-01-18 14:40:44 +000030};
31
32typedef struct StreamBlockJob {
33 BlockJob common;
Andrey Shinkevichc624b012019-05-29 20:56:16 +030034 BlockDriverState *bottom;
Paolo Bonzini1d809092012-09-28 17:22:59 +020035 BlockdevOnError on_error;
Jeff Cody13d8cc52014-06-25 15:40:11 -040036 char *backing_file_str;
Alberto Garciae7d22f82018-11-12 16:00:37 +020037 bool bs_read_only;
Alberto Garcia65854932019-03-12 18:48:43 +020038 bool chain_frozen;
Stefan Hajnoczi4f1043b2012-01-18 14:40:44 +000039} StreamBlockJob;
40
Kevin Wolf03e35d82016-04-12 15:15:49 +020041static int coroutine_fn stream_populate(BlockBackend *blk,
Vladimir Sementsov-Ogievskiy99136602019-07-25 13:05:49 +030042 int64_t offset, uint64_t bytes)
Stefan Hajnoczi4f1043b2012-01-18 14:40:44 +000043{
Eric Blake84932112017-07-07 07:44:41 -050044 assert(bytes < SIZE_MAX);
Stefan Hajnoczi4f1043b2012-01-18 14:40:44 +000045
Vladimir Sementsov-Ogievskiy99136602019-07-25 13:05:49 +030046 return blk_co_preadv(blk, offset, bytes, NULL,
47 BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH);
Stefan Hajnoczi4f1043b2012-01-18 14:40:44 +000048}
49
Alberto Garcia65854932019-03-12 18:48:43 +020050static void stream_abort(Job *job)
51{
52 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
53
54 if (s->chain_frozen) {
55 BlockJob *bjob = &s->common;
Andrey Shinkevichc624b012019-05-29 20:56:16 +030056 bdrv_unfreeze_backing_chain(blk_bs(bjob->blk), s->bottom);
Alberto Garcia65854932019-03-12 18:48:43 +020057 }
58}
59
John Snow1b574882018-09-06 09:02:16 -040060static int stream_prepare(Job *job)
Stefan Hajnoczif3e69be2014-10-21 12:03:57 +010061{
Kevin Wolf1908a552018-04-17 16:41:17 +020062 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
63 BlockJob *bjob = &s->common;
Kevin Wolf1908a552018-04-17 16:41:17 +020064 BlockDriverState *bs = blk_bs(bjob->blk);
Andrey Shinkevichc624b012019-05-29 20:56:16 +030065 BlockDriverState *base = backing_bs(s->bottom);
Kevin Wolf12fa4af2017-02-17 20:42:32 +010066 Error *local_err = NULL;
John Snow1b574882018-09-06 09:02:16 -040067 int ret = 0;
Stefan Hajnoczif3e69be2014-10-21 12:03:57 +010068
Andrey Shinkevichc624b012019-05-29 20:56:16 +030069 bdrv_unfreeze_backing_chain(bs, s->bottom);
Alberto Garcia65854932019-03-12 18:48:43 +020070 s->chain_frozen = false;
71
John Snow1b574882018-09-06 09:02:16 -040072 if (bs->backing) {
Stefan Hajnoczif3e69be2014-10-21 12:03:57 +010073 const char *base_id = NULL, *base_fmt = NULL;
74 if (base) {
75 base_id = s->backing_file_str;
76 if (base->drv) {
77 base_fmt = base->drv->format_name;
78 }
79 }
Kevin Wolf12fa4af2017-02-17 20:42:32 +010080 bdrv_set_backing_hd(bs, base, &local_err);
Max Reitz8441d822019-07-03 19:28:04 +020081 ret = bdrv_change_backing_file(bs, base_id, base_fmt);
Kevin Wolf12fa4af2017-02-17 20:42:32 +010082 if (local_err) {
83 error_report_err(local_err);
John Snow1b574882018-09-06 09:02:16 -040084 return -EPERM;
Kevin Wolf12fa4af2017-02-17 20:42:32 +010085 }
Stefan Hajnoczif3e69be2014-10-21 12:03:57 +010086 }
87
John Snow1b574882018-09-06 09:02:16 -040088 return ret;
89}
90
91static void stream_clean(Job *job)
92{
93 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
94 BlockJob *bjob = &s->common;
95 BlockDriverState *bs = blk_bs(bjob->blk);
96
Alberto Garcia61b49e42016-10-28 10:08:10 +030097 /* Reopen the image back in read-only mode if necessary */
Alberto Garciae7d22f82018-11-12 16:00:37 +020098 if (s->bs_read_only) {
Kevin Wolfa170a912017-02-09 13:34:18 +010099 /* Give up write permissions before making it read-only */
Kevin Wolf1908a552018-04-17 16:41:17 +0200100 blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort);
Alberto Garciae7d22f82018-11-12 16:00:37 +0200101 bdrv_reopen_set_read_only(bs, true, NULL);
Alberto Garcia61b49e42016-10-28 10:08:10 +0300102 }
103
Stefan Hajnoczif3e69be2014-10-21 12:03:57 +0100104 g_free(s->backing_file_str);
Stefan Hajnoczif3e69be2014-10-21 12:03:57 +0100105}
106
John Snowf67432a2018-08-29 21:57:26 -0400107static int coroutine_fn stream_run(Job *job, Error **errp)
Stefan Hajnoczi4f1043b2012-01-18 14:40:44 +0000108{
John Snowf67432a2018-08-29 21:57:26 -0400109 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
Kevin Wolf03e35d82016-04-12 15:15:49 +0200110 BlockBackend *blk = s->common.blk;
111 BlockDriverState *bs = blk_bs(blk);
Andrey Shinkevichc624b012019-05-29 20:56:16 +0300112 bool enable_cor = !backing_bs(s->bottom);
Kevin Wolf05df8a62018-01-18 18:08:22 +0100113 int64_t len;
Eric Blaked5354352017-07-07 07:44:43 -0500114 int64_t offset = 0;
Sascha Silbef14a39c2016-06-28 17:28:41 +0200115 uint64_t delay_ns = 0;
Paolo Bonzini1d809092012-09-28 17:22:59 +0200116 int error = 0;
Eric Blake51b0a482017-07-07 07:44:59 -0500117 int64_t n = 0; /* bytes */
Stefan Hajnoczi4f1043b2012-01-18 14:40:44 +0000118
Andrey Shinkevichc624b012019-05-29 20:56:16 +0300119 if (bs == s->bottom) {
120 /* Nothing to stream */
Andrey Shinkevich96a07d52019-05-29 20:56:15 +0300121 return 0;
Max Reitzf4a193e2013-11-13 20:37:58 +0100122 }
123
Kevin Wolf05df8a62018-01-18 18:08:22 +0100124 len = bdrv_getlength(bs);
125 if (len < 0) {
Andrey Shinkevich96a07d52019-05-29 20:56:15 +0300126 return len;
Stefan Hajnoczi4f1043b2012-01-18 14:40:44 +0000127 }
Kevin Wolf30a5c882018-05-04 12:17:20 +0200128 job_progress_set_remaining(&s->common.job, len);
Stefan Hajnoczi4f1043b2012-01-18 14:40:44 +0000129
Stefan Hajnoczi4f1043b2012-01-18 14:40:44 +0000130 /* Turn on copy-on-read for the whole block device so that guest read
131 * requests help us make progress. Only do this when copying the entire
132 * backing chain since the copy-on-read operation does not take base into
133 * account.
134 */
Andrey Shinkevichc624b012019-05-29 20:56:16 +0300135 if (enable_cor) {
Stefan Hajnoczi4f1043b2012-01-18 14:40:44 +0000136 bdrv_enable_copy_on_read(bs);
137 }
138
Kevin Wolf05df8a62018-01-18 18:08:22 +0100139 for ( ; offset < len; offset += n) {
Paolo Bonzinif9749f22012-05-08 16:52:00 +0200140 bool copy;
Chen Qun35c94532020-03-02 21:07:04 +0800141 int ret;
Paolo Bonzini4513eaf2012-05-08 16:51:45 +0200142
Paolo Bonzini4513eaf2012-05-08 16:51:45 +0200143 /* Note that even when no rate limit is applied we need to yield
Kevin Wolfc57b6652012-11-13 16:35:13 +0100144 * with no pending I/O here so that bdrv_drain_all() returns.
Paolo Bonzini4513eaf2012-05-08 16:51:45 +0200145 */
Kevin Wolf5d43e862018-04-18 16:32:20 +0200146 job_sleep_ns(&s->common.job, delay_ns);
Kevin Wolfdaa7f2f2018-04-17 12:56:07 +0200147 if (job_is_cancelled(&s->common.job)) {
Stefan Hajnoczi4f1043b2012-01-18 14:40:44 +0000148 break;
149 }
150
Stefan Weilc3e4f432013-09-22 08:19:10 +0200151 copy = false;
152
Vladimir Sementsov-Ogievskiy99136602019-07-25 13:05:49 +0300153 ret = bdrv_is_allocated(bs, offset, STREAM_CHUNK, &n);
Paolo Bonzinif9749f22012-05-08 16:52:00 +0200154 if (ret == 1) {
155 /* Allocated in the top, no need to copy. */
Paolo Bonzinid6636402013-09-04 19:00:25 +0200156 } else if (ret >= 0) {
Paolo Bonzinif9749f22012-05-08 16:52:00 +0200157 /* Copy if allocated in the intermediate images. Limit to the
Eric Blaked5354352017-07-07 07:44:43 -0500158 * known-unallocated area [offset, offset+n*BDRV_SECTOR_SIZE). */
Andrey Shinkevichc624b012019-05-29 20:56:16 +0300159 ret = bdrv_is_allocated_above(backing_bs(bs), s->bottom, true,
Eric Blake51b0a482017-07-07 07:44:59 -0500160 offset, n, &n);
Stefan Hajnoczi571cd9d2012-08-28 15:26:48 +0100161 /* Finish early if end of backing file has been reached */
162 if (ret == 0 && n == 0) {
Kevin Wolf05df8a62018-01-18 18:08:22 +0100163 n = len - offset;
Stefan Hajnoczi571cd9d2012-08-28 15:26:48 +0100164 }
165
Paolo Bonzinif9749f22012-05-08 16:52:00 +0200166 copy = (ret == 1);
167 }
Eric Blake51b0a482017-07-07 07:44:59 -0500168 trace_stream_one_iteration(s, offset, n, ret);
Stefan Weilc3e4f432013-09-22 08:19:10 +0200169 if (copy) {
Vladimir Sementsov-Ogievskiy99136602019-07-25 13:05:49 +0300170 ret = stream_populate(blk, offset, n);
Stefan Hajnoczi4f1043b2012-01-18 14:40:44 +0000171 }
172 if (ret < 0) {
Paolo Bonzini1d809092012-09-28 17:22:59 +0200173 BlockErrorAction action =
Kevin Wolf81e254d2016-04-18 11:36:38 +0200174 block_job_error_action(&s->common, s->on_error, true, -ret);
Wenchao Xiaa5895692014-06-18 08:43:30 +0200175 if (action == BLOCK_ERROR_ACTION_STOP) {
Paolo Bonzini1d809092012-09-28 17:22:59 +0200176 n = 0;
177 continue;
178 }
179 if (error == 0) {
180 error = ret;
181 }
Wenchao Xiaa5895692014-06-18 08:43:30 +0200182 if (action == BLOCK_ERROR_ACTION_REPORT) {
Paolo Bonzini1d809092012-09-28 17:22:59 +0200183 break;
184 }
Stefan Hajnoczi4f1043b2012-01-18 14:40:44 +0000185 }
186
187 /* Publish progress */
Kevin Wolf30a5c882018-05-04 12:17:20 +0200188 job_progress_update(&s->common.job, n);
Kevin Wolfdee81d52018-01-18 21:19:38 +0100189 if (copy) {
190 delay_ns = block_job_ratelimit_get_delay(&s->common, n);
Kevin Wolf2fe4bba2018-01-18 21:23:52 +0100191 } else {
192 delay_ns = 0;
Sascha Silbef14a39c2016-06-28 17:28:41 +0200193 }
Stefan Hajnoczi4f1043b2012-01-18 14:40:44 +0000194 }
195
Andrey Shinkevichc624b012019-05-29 20:56:16 +0300196 if (enable_cor) {
Stefan Hajnoczi4f1043b2012-01-18 14:40:44 +0000197 bdrv_disable_copy_on_read(bs);
198 }
199
Andrey Shinkevich96a07d52019-05-29 20:56:15 +0300200 /* Do not remove the backing file if an error was there but ignored. */
201 return error;
Stefan Hajnoczi4f1043b2012-01-18 14:40:44 +0000202}
203
Fam Zheng3fc4b102013-10-08 17:29:38 +0800204static const BlockJobDriver stream_job_driver = {
Kevin Wolf33e9e9b2018-04-12 17:29:59 +0200205 .job_driver = {
206 .instance_size = sizeof(StreamBlockJob),
Kevin Wolf252291e2018-04-12 17:57:08 +0200207 .job_type = JOB_TYPE_STREAM,
Kevin Wolf80fa2c72018-04-13 18:50:05 +0200208 .free = block_job_free,
John Snowf67432a2018-08-29 21:57:26 -0400209 .run = stream_run,
John Snow1b574882018-09-06 09:02:16 -0400210 .prepare = stream_prepare,
Alberto Garcia65854932019-03-12 18:48:43 +0200211 .abort = stream_abort,
John Snow1b574882018-09-06 09:02:16 -0400212 .clean = stream_clean,
Kevin Wolfb15de822018-04-18 17:10:26 +0200213 .user_resume = block_job_user_resume,
Kevin Wolf33e9e9b2018-04-12 17:29:59 +0200214 },
Stefan Hajnoczi4f1043b2012-01-18 14:40:44 +0000215};
216
Alberto Garcia23233222016-07-05 17:28:59 +0300217void stream_start(const char *job_id, BlockDriverState *bs,
218 BlockDriverState *base, const char *backing_file_str,
John Snowcf6320d2018-09-06 09:02:12 -0400219 int creation_flags, int64_t speed,
220 BlockdevOnError on_error, Error **errp)
Stefan Hajnoczi4f1043b2012-01-18 14:40:44 +0000221{
222 StreamBlockJob *s;
Alberto Garcia61b49e42016-10-28 10:08:10 +0300223 BlockDriverState *iter;
Alberto Garciae7d22f82018-11-12 16:00:37 +0200224 bool bs_read_only;
Andrey Shinkevichc624b012019-05-29 20:56:16 +0300225 int basic_flags = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
226 BlockDriverState *bottom = bdrv_find_overlay(bs, base);
Stefan Hajnoczi4f1043b2012-01-18 14:40:44 +0000227
Andrey Shinkevichc624b012019-05-29 20:56:16 +0300228 if (bdrv_freeze_backing_chain(bs, bottom, errp) < 0) {
Alberto Garcia20509c42019-03-28 18:25:10 +0200229 return;
230 }
231
Alberto Garcia61b49e42016-10-28 10:08:10 +0300232 /* Make sure that the image is opened in read-write mode */
Alberto Garciae7d22f82018-11-12 16:00:37 +0200233 bs_read_only = bdrv_is_read_only(bs);
234 if (bs_read_only) {
235 if (bdrv_reopen_set_read_only(bs, false, errp) != 0) {
Alberto Garcia20509c42019-03-28 18:25:10 +0200236 bs_read_only = false;
237 goto fail;
Alberto Garcia61b49e42016-10-28 10:08:10 +0300238 }
239 }
240
Kevin Wolfa170a912017-02-09 13:34:18 +0100241 /* Prevent concurrent jobs trying to modify the graph structure here, we
242 * already have our own plans. Also don't allow resize as the image size is
243 * queried only at the job start and then cached. */
John Snow75859b92018-03-10 03:27:27 -0500244 s = block_job_create(job_id, &stream_job_driver, NULL, bs,
Andrey Shinkevichc624b012019-05-29 20:56:16 +0300245 basic_flags | BLK_PERM_GRAPH_MOD,
246 basic_flags | BLK_PERM_WRITE,
John Snowcf6320d2018-09-06 09:02:12 -0400247 speed, creation_flags, NULL, NULL, errp);
Kevin Wolfa170a912017-02-09 13:34:18 +0100248 if (!s) {
249 goto fail;
250 }
251
252 /* Block all intermediate nodes between bs and base, because they will
253 * disappear from the chain after this operation. The streaming job reads
Andrey Shinkevichc624b012019-05-29 20:56:16 +0300254 * every block only once, assuming that it doesn't change, so forbid writes
255 * and resizes. Reassign the base node pointer because the backing BS of the
256 * bottom node might change after the call to bdrv_reopen_set_read_only()
257 * due to parallel block jobs running.
258 */
259 base = backing_bs(bottom);
Alberto Garcia61b49e42016-10-28 10:08:10 +0300260 for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) {
Kevin Wolf76d554e2017-01-17 11:56:42 +0100261 block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
Andrey Shinkevichc624b012019-05-29 20:56:16 +0300262 basic_flags, &error_abort);
Alberto Garcia61b49e42016-10-28 10:08:10 +0300263 }
264
Andrey Shinkevichc624b012019-05-29 20:56:16 +0300265 s->bottom = bottom;
Jeff Cody13d8cc52014-06-25 15:40:11 -0400266 s->backing_file_str = g_strdup(backing_file_str);
Alberto Garciae7d22f82018-11-12 16:00:37 +0200267 s->bs_read_only = bs_read_only;
Alberto Garcia65854932019-03-12 18:48:43 +0200268 s->chain_frozen = true;
Stefan Hajnoczi4f1043b2012-01-18 14:40:44 +0000269
Paolo Bonzini1d809092012-09-28 17:22:59 +0200270 s->on_error = on_error;
John Snow5ccac6f2016-11-08 01:50:37 -0500271 trace_stream_start(bs, base, s);
Kevin Wolfda01ff72018-04-13 17:31:02 +0200272 job_start(&s->common.job);
Kevin Wolfa170a912017-02-09 13:34:18 +0100273 return;
274
275fail:
Alberto Garciae7d22f82018-11-12 16:00:37 +0200276 if (bs_read_only) {
277 bdrv_reopen_set_read_only(bs, true, NULL);
Kevin Wolfa170a912017-02-09 13:34:18 +0100278 }
Max Reitz17a7c392019-07-03 19:28:03 +0200279 bdrv_unfreeze_backing_chain(bs, bottom);
Stefan Hajnoczi4f1043b2012-01-18 14:40:44 +0000280}