blob: d13674f4b451b48f41417ac979825ec5dc88ee50 [file] [log] [blame]
Juan Quintela56e93d22015-05-07 19:33:31 +02001/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
Juan Quintela76cc7b52015-05-08 13:20:21 +02005 * Copyright (c) 2011-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
Juan Quintela56e93d22015-05-07 19:33:31 +02009 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
Peter Maydell1393a482016-01-26 18:16:54 +000028#include "qemu/osdep.h"
Paolo Bonzini33c11872016-03-15 16:58:45 +010029#include "qemu-common.h"
30#include "cpu.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020031#include <zlib.h>
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +000032#include "qapi-event.h"
Veronia Bahaaf348b6d2016-03-20 19:16:19 +020033#include "qemu/cutils.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020034#include "qemu/bitops.h"
35#include "qemu/bitmap.h"
Juan Quintela7205c9e2015-05-08 13:54:36 +020036#include "qemu/timer.h"
37#include "qemu/main-loop.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020038#include "migration/migration.h"
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +000039#include "migration/postcopy-ram.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020040#include "exec/address-spaces.h"
41#include "migration/page_cache.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020042#include "qemu/error-report.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020043#include "trace.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020044#include "exec/ram_addr.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020045#include "qemu/rcu_queue.h"
zhanghailianga91246c2016-10-27 14:42:59 +080046#include "migration/colo.h"
Juan Quintela56e93d22015-05-07 19:33:31 +020047
Juan Quintela56e93d22015-05-07 19:33:31 +020048/***********************************************************/
49/* ram save/restore */
50
51#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
52#define RAM_SAVE_FLAG_COMPRESS 0x02
53#define RAM_SAVE_FLAG_MEM_SIZE 0x04
54#define RAM_SAVE_FLAG_PAGE 0x08
55#define RAM_SAVE_FLAG_EOS 0x10
56#define RAM_SAVE_FLAG_CONTINUE 0x20
57#define RAM_SAVE_FLAG_XBZRLE 0x40
58/* 0x80 is reserved in migration.h start with 0x100 next */
59#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
60
Vijaya Kumar Kadb65de2016-10-24 16:26:49 +010061static uint8_t *ZERO_TARGET_PAGE;
Juan Quintela56e93d22015-05-07 19:33:31 +020062
63static inline bool is_zero_range(uint8_t *p, uint64_t size)
64{
Richard Hendersona1febc42016-08-29 11:46:14 -070065 return buffer_is_zero(p, size);
Juan Quintela56e93d22015-05-07 19:33:31 +020066}
67
68/* struct contains XBZRLE cache and a static page
69 used by the compression */
70static struct {
71 /* buffer used for XBZRLE encoding */
72 uint8_t *encoded_buf;
73 /* buffer for storing page content */
74 uint8_t *current_buf;
75 /* Cache for XBZRLE, Protected by lock. */
76 PageCache *cache;
77 QemuMutex lock;
78} XBZRLE;
79
80/* buffer used for XBZRLE decoding */
81static uint8_t *xbzrle_decoded_buf;
82
83static void XBZRLE_cache_lock(void)
84{
85 if (migrate_use_xbzrle())
86 qemu_mutex_lock(&XBZRLE.lock);
87}
88
89static void XBZRLE_cache_unlock(void)
90{
91 if (migrate_use_xbzrle())
92 qemu_mutex_unlock(&XBZRLE.lock);
93}
94
Juan Quintela3d0684b2017-03-23 15:06:39 +010095/**
96 * xbzrle_cache_resize: resize the xbzrle cache
97 *
98 * This function is called from qmp_migrate_set_cache_size in main
99 * thread, possibly while a migration is in progress. A running
100 * migration may be using the cache and might finish during this call,
101 * hence changes to the cache are protected by XBZRLE.lock().
102 *
103 * Returns the new_size or negative in case of error.
104 *
105 * @new_size: new cache size
Juan Quintela56e93d22015-05-07 19:33:31 +0200106 */
107int64_t xbzrle_cache_resize(int64_t new_size)
108{
109 PageCache *new_cache;
110 int64_t ret;
111
112 if (new_size < TARGET_PAGE_SIZE) {
113 return -1;
114 }
115
116 XBZRLE_cache_lock();
117
118 if (XBZRLE.cache != NULL) {
119 if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
120 goto out_new_size;
121 }
122 new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
123 TARGET_PAGE_SIZE);
124 if (!new_cache) {
125 error_report("Error creating cache");
126 ret = -1;
127 goto out;
128 }
129
130 cache_fini(XBZRLE.cache);
131 XBZRLE.cache = new_cache;
132 }
133
134out_new_size:
135 ret = pow2floor(new_size);
136out:
137 XBZRLE_cache_unlock();
138 return ret;
139}
140
Juan Quintela6f37bb82017-03-13 19:26:29 +0100141/* State of RAM for migration */
142struct RAMState {
143 /* Last block that we have visited searching for dirty pages */
144 RAMBlock *last_seen_block;
145 /* Last block from where we have sent data */
146 RAMBlock *last_sent_block;
147 /* Last offset we have sent data from */
148 ram_addr_t last_offset;
149 /* last ram version we have seen */
150 uint32_t last_version;
151 /* We are in the first round */
152 bool ram_bulk_stage;
Juan Quintela8d820d62017-03-13 19:35:50 +0100153 /* How many times we have dirty too many pages */
154 int dirty_rate_high_cnt;
Juan Quintela5a987732017-03-13 19:39:02 +0100155 /* How many times we have synchronized the bitmap */
156 uint64_t bitmap_sync_count;
Juan Quintelaf664da82017-03-13 19:44:57 +0100157 /* these variables are used for bitmap sync */
158 /* last time we did a full bitmap_sync */
159 int64_t time_last_bitmap_sync;
Juan Quintelaeac74152017-03-28 14:59:01 +0200160 /* bytes transferred at start_time */
Juan Quintelac4bdf0c2017-03-28 14:59:54 +0200161 uint64_t bytes_xfer_prev;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100162};
163typedef struct RAMState RAMState;
164
165static RAMState ram_state;
166
Juan Quintela56e93d22015-05-07 19:33:31 +0200167/* accounting for migration statistics */
168typedef struct AccountingInfo {
169 uint64_t dup_pages;
170 uint64_t skipped_pages;
171 uint64_t norm_pages;
172 uint64_t iterations;
173 uint64_t xbzrle_bytes;
174 uint64_t xbzrle_pages;
175 uint64_t xbzrle_cache_miss;
176 double xbzrle_cache_miss_rate;
177 uint64_t xbzrle_overflows;
178} AccountingInfo;
179
180static AccountingInfo acct_info;
181
182static void acct_clear(void)
183{
184 memset(&acct_info, 0, sizeof(acct_info));
185}
186
187uint64_t dup_mig_bytes_transferred(void)
188{
189 return acct_info.dup_pages * TARGET_PAGE_SIZE;
190}
191
192uint64_t dup_mig_pages_transferred(void)
193{
194 return acct_info.dup_pages;
195}
196
197uint64_t skipped_mig_bytes_transferred(void)
198{
199 return acct_info.skipped_pages * TARGET_PAGE_SIZE;
200}
201
202uint64_t skipped_mig_pages_transferred(void)
203{
204 return acct_info.skipped_pages;
205}
206
207uint64_t norm_mig_bytes_transferred(void)
208{
209 return acct_info.norm_pages * TARGET_PAGE_SIZE;
210}
211
212uint64_t norm_mig_pages_transferred(void)
213{
214 return acct_info.norm_pages;
215}
216
217uint64_t xbzrle_mig_bytes_transferred(void)
218{
219 return acct_info.xbzrle_bytes;
220}
221
222uint64_t xbzrle_mig_pages_transferred(void)
223{
224 return acct_info.xbzrle_pages;
225}
226
227uint64_t xbzrle_mig_pages_cache_miss(void)
228{
229 return acct_info.xbzrle_cache_miss;
230}
231
232double xbzrle_mig_cache_miss_rate(void)
233{
234 return acct_info.xbzrle_cache_miss_rate;
235}
236
237uint64_t xbzrle_mig_pages_overflow(void)
238{
239 return acct_info.xbzrle_overflows;
240}
241
Li Zhijiandd631692015-07-02 20:18:06 +0800242static QemuMutex migration_bitmap_mutex;
Juan Quintela56e93d22015-05-07 19:33:31 +0200243static uint64_t migration_dirty_pages;
Juan Quintela56e93d22015-05-07 19:33:31 +0200244
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +0100245/* used by the search for pages to send */
246struct PageSearchStatus {
247 /* Current block being searched */
248 RAMBlock *block;
249 /* Current offset to search from */
250 ram_addr_t offset;
251 /* Set once we wrap around */
252 bool complete_round;
253};
254typedef struct PageSearchStatus PageSearchStatus;
255
Denis V. Lunev60be6342015-09-28 14:41:58 +0300256static struct BitmapRcu {
257 struct rcu_head rcu;
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +0000258 /* Main migration bitmap */
Denis V. Lunev60be6342015-09-28 14:41:58 +0300259 unsigned long *bmap;
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +0000260 /* bitmap of pages that haven't been sent even once
261 * only maintained and used in postcopy at the moment
262 * where it's used to send the dirtymap at the start
263 * of the postcopy phase
264 */
265 unsigned long *unsentmap;
Denis V. Lunev60be6342015-09-28 14:41:58 +0300266} *migration_bitmap_rcu;
267
Juan Quintela56e93d22015-05-07 19:33:31 +0200268struct CompressParam {
Juan Quintela56e93d22015-05-07 19:33:31 +0200269 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800270 bool quit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200271 QEMUFile *file;
272 QemuMutex mutex;
273 QemuCond cond;
274 RAMBlock *block;
275 ram_addr_t offset;
276};
277typedef struct CompressParam CompressParam;
278
279struct DecompressParam {
Liang Li73a89122016-05-05 15:32:51 +0800280 bool done;
Liang Li90e56fb2016-05-05 15:32:56 +0800281 bool quit;
Juan Quintela56e93d22015-05-07 19:33:31 +0200282 QemuMutex mutex;
283 QemuCond cond;
284 void *des;
Peter Maydelld341d9f2016-01-22 15:09:21 +0000285 uint8_t *compbuf;
Juan Quintela56e93d22015-05-07 19:33:31 +0200286 int len;
287};
288typedef struct DecompressParam DecompressParam;
289
290static CompressParam *comp_param;
291static QemuThread *compress_threads;
292/* comp_done_cond is used to wake up the migration thread when
293 * one of the compression threads has finished the compression.
294 * comp_done_lock is used to co-work with comp_done_cond.
295 */
Liang Li0d9f9a52016-05-05 15:32:59 +0800296static QemuMutex comp_done_lock;
297static QemuCond comp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200298/* The empty QEMUFileOps will be used by file in CompressParam */
299static const QEMUFileOps empty_ops = { };
300
301static bool compression_switch;
Juan Quintela56e93d22015-05-07 19:33:31 +0200302static DecompressParam *decomp_param;
303static QemuThread *decompress_threads;
Liang Li73a89122016-05-05 15:32:51 +0800304static QemuMutex decomp_done_lock;
305static QemuCond decomp_done_cond;
Juan Quintela56e93d22015-05-07 19:33:31 +0200306
Liang Lia7a9a882016-05-05 15:32:57 +0800307static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
308 ram_addr_t offset);
Juan Quintela56e93d22015-05-07 19:33:31 +0200309
310static void *do_data_compress(void *opaque)
311{
312 CompressParam *param = opaque;
Liang Lia7a9a882016-05-05 15:32:57 +0800313 RAMBlock *block;
314 ram_addr_t offset;
Juan Quintela56e93d22015-05-07 19:33:31 +0200315
Liang Lia7a9a882016-05-05 15:32:57 +0800316 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +0800317 while (!param->quit) {
Liang Lia7a9a882016-05-05 15:32:57 +0800318 if (param->block) {
319 block = param->block;
320 offset = param->offset;
321 param->block = NULL;
322 qemu_mutex_unlock(&param->mutex);
323
324 do_compress_ram_page(param->file, block, offset);
325
Liang Li0d9f9a52016-05-05 15:32:59 +0800326 qemu_mutex_lock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800327 param->done = true;
Liang Li0d9f9a52016-05-05 15:32:59 +0800328 qemu_cond_signal(&comp_done_cond);
329 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800330
331 qemu_mutex_lock(&param->mutex);
332 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +0200333 qemu_cond_wait(&param->cond, &param->mutex);
334 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200335 }
Liang Lia7a9a882016-05-05 15:32:57 +0800336 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200337
338 return NULL;
339}
340
341static inline void terminate_compression_threads(void)
342{
343 int idx, thread_count;
344
345 thread_count = migrate_compress_threads();
Juan Quintela3d0684b2017-03-23 15:06:39 +0100346
Juan Quintela56e93d22015-05-07 19:33:31 +0200347 for (idx = 0; idx < thread_count; idx++) {
348 qemu_mutex_lock(&comp_param[idx].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +0800349 comp_param[idx].quit = true;
Juan Quintela56e93d22015-05-07 19:33:31 +0200350 qemu_cond_signal(&comp_param[idx].cond);
351 qemu_mutex_unlock(&comp_param[idx].mutex);
352 }
353}
354
355void migrate_compress_threads_join(void)
356{
357 int i, thread_count;
358
359 if (!migrate_use_compression()) {
360 return;
361 }
362 terminate_compression_threads();
363 thread_count = migrate_compress_threads();
364 for (i = 0; i < thread_count; i++) {
365 qemu_thread_join(compress_threads + i);
366 qemu_fclose(comp_param[i].file);
367 qemu_mutex_destroy(&comp_param[i].mutex);
368 qemu_cond_destroy(&comp_param[i].cond);
369 }
Liang Li0d9f9a52016-05-05 15:32:59 +0800370 qemu_mutex_destroy(&comp_done_lock);
371 qemu_cond_destroy(&comp_done_cond);
Juan Quintela56e93d22015-05-07 19:33:31 +0200372 g_free(compress_threads);
373 g_free(comp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +0200374 compress_threads = NULL;
375 comp_param = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +0200376}
377
378void migrate_compress_threads_create(void)
379{
380 int i, thread_count;
381
382 if (!migrate_use_compression()) {
383 return;
384 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200385 compression_switch = true;
386 thread_count = migrate_compress_threads();
387 compress_threads = g_new0(QemuThread, thread_count);
388 comp_param = g_new0(CompressParam, thread_count);
Liang Li0d9f9a52016-05-05 15:32:59 +0800389 qemu_cond_init(&comp_done_cond);
390 qemu_mutex_init(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200391 for (i = 0; i < thread_count; i++) {
Cao jine110aa92016-07-29 15:10:31 +0800392 /* comp_param[i].file is just used as a dummy buffer to save data,
393 * set its ops to empty.
Juan Quintela56e93d22015-05-07 19:33:31 +0200394 */
395 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
396 comp_param[i].done = true;
Liang Li90e56fb2016-05-05 15:32:56 +0800397 comp_param[i].quit = false;
Juan Quintela56e93d22015-05-07 19:33:31 +0200398 qemu_mutex_init(&comp_param[i].mutex);
399 qemu_cond_init(&comp_param[i].cond);
400 qemu_thread_create(compress_threads + i, "compress",
401 do_data_compress, comp_param + i,
402 QEMU_THREAD_JOINABLE);
403 }
404}
405
406/**
Juan Quintela3d0684b2017-03-23 15:06:39 +0100407 * save_page_header: write page header to wire
Juan Quintela56e93d22015-05-07 19:33:31 +0200408 *
409 * If this is the 1st block, it also writes the block identification
410 *
Juan Quintela3d0684b2017-03-23 15:06:39 +0100411 * Returns the number of bytes written
Juan Quintela56e93d22015-05-07 19:33:31 +0200412 *
413 * @f: QEMUFile where to send the data
414 * @block: block that contains the page we want to send
415 * @offset: offset inside the block for the page
416 * in the lower bits, it contains flags
417 */
418static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
419{
Liang Li9f5f3802015-07-13 17:34:10 +0800420 size_t size, len;
Juan Quintela56e93d22015-05-07 19:33:31 +0200421
422 qemu_put_be64(f, offset);
423 size = 8;
424
425 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
Liang Li9f5f3802015-07-13 17:34:10 +0800426 len = strlen(block->idstr);
427 qemu_put_byte(f, len);
428 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
429 size += 1 + len;
Juan Quintela56e93d22015-05-07 19:33:31 +0200430 }
431 return size;
432}
433
Juan Quintela3d0684b2017-03-23 15:06:39 +0100434/**
435 * mig_throttle_guest_down: throotle down the guest
436 *
437 * Reduce amount of guest cpu execution to hopefully slow down memory
438 * writes. If guest dirty memory rate is reduced below the rate at
439 * which we can transfer pages to the destination then we should be
440 * able to complete migration. Some workloads dirty memory way too
441 * fast and will not effectively converge, even with auto-converge.
Jason J. Herne070afca2015-09-08 13:12:35 -0400442 */
443static void mig_throttle_guest_down(void)
444{
445 MigrationState *s = migrate_get_current();
Daniel P. Berrange2594f562016-04-27 11:05:14 +0100446 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
447 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
Jason J. Herne070afca2015-09-08 13:12:35 -0400448
449 /* We have not started throttling yet. Let's start it. */
450 if (!cpu_throttle_active()) {
451 cpu_throttle_set(pct_initial);
452 } else {
453 /* Throttling already on, just increase the rate */
454 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
455 }
456}
457
Juan Quintela3d0684b2017-03-23 15:06:39 +0100458/**
459 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
460 *
Juan Quintela6f37bb82017-03-13 19:26:29 +0100461 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +0100462 * @current_addr: address for the zero page
463 *
464 * Update the xbzrle cache to reflect a page that's been sent as all 0.
Juan Quintela56e93d22015-05-07 19:33:31 +0200465 * The important thing is that a stale (not-yet-0'd) page be replaced
466 * by the new data.
467 * As a bonus, if the page wasn't in the cache it gets added so that
Juan Quintela3d0684b2017-03-23 15:06:39 +0100468 * when a small write is made into the 0'd page it gets XBZRLE sent.
Juan Quintela56e93d22015-05-07 19:33:31 +0200469 */
Juan Quintela6f37bb82017-03-13 19:26:29 +0100470static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
Juan Quintela56e93d22015-05-07 19:33:31 +0200471{
Juan Quintela6f37bb82017-03-13 19:26:29 +0100472 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200473 return;
474 }
475
476 /* We don't care if this fails to allocate a new cache page
477 * as long as it updated an old one */
478 cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
Juan Quintela5a987732017-03-13 19:39:02 +0100479 rs->bitmap_sync_count);
Juan Quintela56e93d22015-05-07 19:33:31 +0200480}
481
482#define ENCODING_FLAG_XBZRLE 0x1
483
484/**
485 * save_xbzrle_page: compress and send current page
486 *
487 * Returns: 1 means that we wrote the page
488 * 0 means that page is identical to the one already sent
489 * -1 means that xbzrle would be longer than normal
490 *
Juan Quintela5a987732017-03-13 19:39:02 +0100491 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +0200492 * @f: QEMUFile where to send the data
Juan Quintela3d0684b2017-03-23 15:06:39 +0100493 * @current_data: pointer to the address of the page contents
494 * @current_addr: addr of the page
Juan Quintela56e93d22015-05-07 19:33:31 +0200495 * @block: block that contains the page we want to send
496 * @offset: offset inside the block for the page
497 * @last_stage: if we are at the completion stage
498 * @bytes_transferred: increase it with the number of transferred bytes
499 */
Juan Quintela5a987732017-03-13 19:39:02 +0100500static int save_xbzrle_page(RAMState *rs, QEMUFile *f, uint8_t **current_data,
Juan Quintela56e93d22015-05-07 19:33:31 +0200501 ram_addr_t current_addr, RAMBlock *block,
502 ram_addr_t offset, bool last_stage,
503 uint64_t *bytes_transferred)
504{
505 int encoded_len = 0, bytes_xbzrle;
506 uint8_t *prev_cached_page;
507
Juan Quintela5a987732017-03-13 19:39:02 +0100508 if (!cache_is_cached(XBZRLE.cache, current_addr, rs->bitmap_sync_count)) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200509 acct_info.xbzrle_cache_miss++;
510 if (!last_stage) {
511 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
Juan Quintela5a987732017-03-13 19:39:02 +0100512 rs->bitmap_sync_count) == -1) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200513 return -1;
514 } else {
515 /* update *current_data when the page has been
516 inserted into cache */
517 *current_data = get_cached_data(XBZRLE.cache, current_addr);
518 }
519 }
520 return -1;
521 }
522
523 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
524
525 /* save current buffer into memory */
526 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
527
528 /* XBZRLE encoding (if there is no overflow) */
529 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
530 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
531 TARGET_PAGE_SIZE);
532 if (encoded_len == 0) {
Juan Quintela55c44462017-01-23 22:32:05 +0100533 trace_save_xbzrle_page_skipping();
Juan Quintela56e93d22015-05-07 19:33:31 +0200534 return 0;
535 } else if (encoded_len == -1) {
Juan Quintela55c44462017-01-23 22:32:05 +0100536 trace_save_xbzrle_page_overflow();
Juan Quintela56e93d22015-05-07 19:33:31 +0200537 acct_info.xbzrle_overflows++;
538 /* update data in the cache */
539 if (!last_stage) {
540 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
541 *current_data = prev_cached_page;
542 }
543 return -1;
544 }
545
546 /* we need to update the data in the cache, in order to get the same data */
547 if (!last_stage) {
548 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
549 }
550
551 /* Send XBZRLE based compressed page */
552 bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE);
553 qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
554 qemu_put_be16(f, encoded_len);
555 qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
556 bytes_xbzrle += encoded_len + 1 + 2;
557 acct_info.xbzrle_pages++;
558 acct_info.xbzrle_bytes += bytes_xbzrle;
559 *bytes_transferred += bytes_xbzrle;
560
561 return 1;
562}
563
Juan Quintela3d0684b2017-03-23 15:06:39 +0100564/**
565 * migration_bitmap_find_dirty: find the next dirty page from start
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +0000566 *
Juan Quintela3d0684b2017-03-23 15:06:39 +0100567 * Called with rcu_read_lock() to protect migration_bitmap
568 *
569 * Returns the byte offset within memory region of the start of a dirty page
570 *
Juan Quintela6f37bb82017-03-13 19:26:29 +0100571 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +0100572 * @rb: RAMBlock where to search for dirty pages
573 * @start: starting address (typically so we can continue from previous page)
574 * @ram_addr_abs: pointer into which to store the address of the dirty page
575 * within the global ram_addr space
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +0000576 */
Juan Quintela56e93d22015-05-07 19:33:31 +0200577static inline
Juan Quintela6f37bb82017-03-13 19:26:29 +0100578ram_addr_t migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000579 ram_addr_t start,
580 ram_addr_t *ram_addr_abs)
Juan Quintela56e93d22015-05-07 19:33:31 +0200581{
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +0100582 unsigned long base = rb->offset >> TARGET_PAGE_BITS;
Juan Quintela56e93d22015-05-07 19:33:31 +0200583 unsigned long nr = base + (start >> TARGET_PAGE_BITS);
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +0100584 uint64_t rb_size = rb->used_length;
585 unsigned long size = base + (rb_size >> TARGET_PAGE_BITS);
Li Zhijian2ff64032015-07-02 20:18:05 +0800586 unsigned long *bitmap;
Juan Quintela56e93d22015-05-07 19:33:31 +0200587
588 unsigned long next;
589
Denis V. Lunev60be6342015-09-28 14:41:58 +0300590 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
Juan Quintela6f37bb82017-03-13 19:26:29 +0100591 if (rs->ram_bulk_stage && nr > base) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200592 next = nr + 1;
593 } else {
Li Zhijian2ff64032015-07-02 20:18:05 +0800594 next = find_next_bit(bitmap, size, nr);
Juan Quintela56e93d22015-05-07 19:33:31 +0200595 }
596
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +0000597 *ram_addr_abs = next << TARGET_PAGE_BITS;
Juan Quintela56e93d22015-05-07 19:33:31 +0200598 return (next - base) << TARGET_PAGE_BITS;
599}
600
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +0000601static inline bool migration_bitmap_clear_dirty(ram_addr_t addr)
602{
603 bool ret;
604 int nr = addr >> TARGET_PAGE_BITS;
605 unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
606
607 ret = test_and_clear_bit(nr, bitmap);
608
609 if (ret) {
610 migration_dirty_pages--;
611 }
612 return ret;
613}
614
Chao Fan1ffb5df2017-03-14 09:55:07 +0800615static int64_t num_dirty_pages_period;
Juan Quintela56e93d22015-05-07 19:33:31 +0200616static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
617{
Li Zhijian2ff64032015-07-02 20:18:05 +0800618 unsigned long *bitmap;
Denis V. Lunev60be6342015-09-28 14:41:58 +0300619 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
Chao Fan1ffb5df2017-03-14 09:55:07 +0800620 migration_dirty_pages += cpu_physical_memory_sync_dirty_bitmap(bitmap,
621 start, length, &num_dirty_pages_period);
Juan Quintela56e93d22015-05-07 19:33:31 +0200622}
623
Juan Quintela56e93d22015-05-07 19:33:31 +0200624/* Fix me: there are too many global variables used in migration process. */
Juan Quintela56e93d22015-05-07 19:33:31 +0200625static uint64_t xbzrle_cache_miss_prev;
626static uint64_t iterations_prev;
627
Juan Quintelaf664da82017-03-13 19:44:57 +0100628static void migration_bitmap_sync_init(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +0200629{
Juan Quintelaf664da82017-03-13 19:44:57 +0100630 rs->time_last_bitmap_sync = 0;
Juan Quintelaeac74152017-03-28 14:59:01 +0200631 rs->bytes_xfer_prev = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200632 num_dirty_pages_period = 0;
633 xbzrle_cache_miss_prev = 0;
634 iterations_prev = 0;
635}
636
Juan Quintela3d0684b2017-03-23 15:06:39 +0100637/**
638 * ram_pagesize_summary: calculate all the pagesizes of a VM
639 *
640 * Returns a summary bitmap of the page sizes of all RAMBlocks
641 *
642 * For VMs with just normal pages this is equivalent to the host page
643 * size. If it's got some huge pages then it's the OR of all the
644 * different page sizes.
Dr. David Alan Gilberte8ca1db2017-02-24 18:28:29 +0000645 */
646uint64_t ram_pagesize_summary(void)
647{
648 RAMBlock *block;
649 uint64_t summary = 0;
650
651 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
652 summary |= block->page_size;
653 }
654
655 return summary;
656}
657
Juan Quintela8d820d62017-03-13 19:35:50 +0100658static void migration_bitmap_sync(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +0200659{
660 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +0200661 MigrationState *s = migrate_get_current();
662 int64_t end_time;
Juan Quintelac4bdf0c2017-03-28 14:59:54 +0200663 uint64_t bytes_xfer_now;
Juan Quintela56e93d22015-05-07 19:33:31 +0200664
Juan Quintela5a987732017-03-13 19:39:02 +0100665 rs->bitmap_sync_count++;
Juan Quintela56e93d22015-05-07 19:33:31 +0200666
Juan Quintelaeac74152017-03-28 14:59:01 +0200667 if (!rs->bytes_xfer_prev) {
668 rs->bytes_xfer_prev = ram_bytes_transferred();
Juan Quintela56e93d22015-05-07 19:33:31 +0200669 }
670
Juan Quintelaf664da82017-03-13 19:44:57 +0100671 if (!rs->time_last_bitmap_sync) {
672 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
Juan Quintela56e93d22015-05-07 19:33:31 +0200673 }
674
675 trace_migration_bitmap_sync_start();
Paolo Bonzini9c1f8f42016-09-22 16:08:31 +0200676 memory_global_dirty_log_sync();
Juan Quintela56e93d22015-05-07 19:33:31 +0200677
Li Zhijiandd631692015-07-02 20:18:06 +0800678 qemu_mutex_lock(&migration_bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200679 rcu_read_lock();
680 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +0100681 migration_bitmap_sync_range(block->offset, block->used_length);
Juan Quintela56e93d22015-05-07 19:33:31 +0200682 }
683 rcu_read_unlock();
Li Zhijiandd631692015-07-02 20:18:06 +0800684 qemu_mutex_unlock(&migration_bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200685
Chao Fan1ffb5df2017-03-14 09:55:07 +0800686 trace_migration_bitmap_sync_end(num_dirty_pages_period);
687
Juan Quintela56e93d22015-05-07 19:33:31 +0200688 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
689
690 /* more than 1 second = 1000 millisecons */
Juan Quintelaf664da82017-03-13 19:44:57 +0100691 if (end_time > rs->time_last_bitmap_sync + 1000) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200692 if (migrate_auto_converge()) {
693 /* The following detection logic can be refined later. For now:
694 Check to see if the dirtied bytes is 50% more than the approx.
695 amount of bytes that just got transferred since the last time we
Jason J. Herne070afca2015-09-08 13:12:35 -0400696 were in this routine. If that happens twice, start or increase
697 throttling */
Juan Quintela56e93d22015-05-07 19:33:31 +0200698 bytes_xfer_now = ram_bytes_transferred();
Jason J. Herne070afca2015-09-08 13:12:35 -0400699
Juan Quintela56e93d22015-05-07 19:33:31 +0200700 if (s->dirty_pages_rate &&
701 (num_dirty_pages_period * TARGET_PAGE_SIZE >
Juan Quintelaeac74152017-03-28 14:59:01 +0200702 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
Juan Quintela8d820d62017-03-13 19:35:50 +0100703 (rs->dirty_rate_high_cnt++ >= 2)) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200704 trace_migration_throttle();
Juan Quintela8d820d62017-03-13 19:35:50 +0100705 rs->dirty_rate_high_cnt = 0;
Jason J. Herne070afca2015-09-08 13:12:35 -0400706 mig_throttle_guest_down();
Juan Quintela56e93d22015-05-07 19:33:31 +0200707 }
Juan Quintelaeac74152017-03-28 14:59:01 +0200708 rs->bytes_xfer_prev = bytes_xfer_now;
Juan Quintela56e93d22015-05-07 19:33:31 +0200709 }
Jason J. Herne070afca2015-09-08 13:12:35 -0400710
Juan Quintela56e93d22015-05-07 19:33:31 +0200711 if (migrate_use_xbzrle()) {
712 if (iterations_prev != acct_info.iterations) {
713 acct_info.xbzrle_cache_miss_rate =
714 (double)(acct_info.xbzrle_cache_miss -
715 xbzrle_cache_miss_prev) /
716 (acct_info.iterations - iterations_prev);
717 }
718 iterations_prev = acct_info.iterations;
719 xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss;
720 }
721 s->dirty_pages_rate = num_dirty_pages_period * 1000
Juan Quintelaf664da82017-03-13 19:44:57 +0100722 / (end_time - rs->time_last_bitmap_sync);
Juan Quintela56e93d22015-05-07 19:33:31 +0200723 s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
Juan Quintelaf664da82017-03-13 19:44:57 +0100724 rs->time_last_bitmap_sync = end_time;
Juan Quintela56e93d22015-05-07 19:33:31 +0200725 num_dirty_pages_period = 0;
726 }
Juan Quintela5a987732017-03-13 19:39:02 +0100727 s->dirty_sync_count = rs->bitmap_sync_count;
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +0000728 if (migrate_use_events()) {
Juan Quintela5a987732017-03-13 19:39:02 +0100729 qapi_event_send_migration_pass(rs->bitmap_sync_count, NULL);
Dr. David Alan Gilbert4addcd42015-12-16 11:47:36 +0000730 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200731}
732
733/**
Juan Quintela3d0684b2017-03-23 15:06:39 +0100734 * save_zero_page: send the zero page to the stream
Juan Quintela56e93d22015-05-07 19:33:31 +0200735 *
Juan Quintela3d0684b2017-03-23 15:06:39 +0100736 * Returns the number of pages written.
Juan Quintela56e93d22015-05-07 19:33:31 +0200737 *
738 * @f: QEMUFile where to send the data
739 * @block: block that contains the page we want to send
740 * @offset: offset inside the block for the page
741 * @p: pointer to the page
742 * @bytes_transferred: increase it with the number of transferred bytes
743 */
744static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
745 uint8_t *p, uint64_t *bytes_transferred)
746{
747 int pages = -1;
748
749 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
750 acct_info.dup_pages++;
751 *bytes_transferred += save_page_header(f, block,
752 offset | RAM_SAVE_FLAG_COMPRESS);
753 qemu_put_byte(f, 0);
754 *bytes_transferred += 1;
755 pages = 1;
756 }
757
758 return pages;
759}
760
Juan Quintela36449152017-03-23 15:11:59 +0100761static void ram_release_pages(MigrationState *ms, const char *rbname,
Pavel Butsykin53f09a12017-02-03 18:23:20 +0300762 uint64_t offset, int pages)
763{
764 if (!migrate_release_ram() || !migration_in_postcopy(ms)) {
765 return;
766 }
767
Juan Quintela36449152017-03-23 15:11:59 +0100768 ram_discard_range(NULL, rbname, offset, pages << TARGET_PAGE_BITS);
Pavel Butsykin53f09a12017-02-03 18:23:20 +0300769}
770
Juan Quintela56e93d22015-05-07 19:33:31 +0200771/**
Juan Quintela3d0684b2017-03-23 15:06:39 +0100772 * ram_save_page: send the given page to the stream
Juan Quintela56e93d22015-05-07 19:33:31 +0200773 *
Juan Quintela3d0684b2017-03-23 15:06:39 +0100774 * Returns the number of pages written.
Dr. David Alan Gilbert3fd3c4b2015-12-10 16:31:46 +0000775 * < 0 - error
776 * >=0 - Number of pages written - this might legally be 0
777 * if xbzrle noticed the page was the same.
Juan Quintela56e93d22015-05-07 19:33:31 +0200778 *
Juan Quintela6f37bb82017-03-13 19:26:29 +0100779 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +0100780 * @ms: current migration state
Juan Quintela56e93d22015-05-07 19:33:31 +0200781 * @f: QEMUFile where to send the data
782 * @block: block that contains the page we want to send
783 * @offset: offset inside the block for the page
784 * @last_stage: if we are at the completion stage
785 * @bytes_transferred: increase it with the number of transferred bytes
786 */
Juan Quintela6f37bb82017-03-13 19:26:29 +0100787static int ram_save_page(RAMState *rs, MigrationState *ms, QEMUFile *f,
788 PageSearchStatus *pss, bool last_stage,
789 uint64_t *bytes_transferred)
Juan Quintela56e93d22015-05-07 19:33:31 +0200790{
791 int pages = -1;
792 uint64_t bytes_xmit;
793 ram_addr_t current_addr;
Juan Quintela56e93d22015-05-07 19:33:31 +0200794 uint8_t *p;
795 int ret;
796 bool send_async = true;
zhanghailianga08f6892016-01-15 11:37:44 +0800797 RAMBlock *block = pss->block;
798 ram_addr_t offset = pss->offset;
Juan Quintela56e93d22015-05-07 19:33:31 +0200799
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +0100800 p = block->host + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +0200801
802 /* In doubt sent page as normal */
803 bytes_xmit = 0;
804 ret = ram_control_save_page(f, block->offset,
805 offset, TARGET_PAGE_SIZE, &bytes_xmit);
806 if (bytes_xmit) {
807 *bytes_transferred += bytes_xmit;
808 pages = 1;
809 }
810
811 XBZRLE_cache_lock();
812
813 current_addr = block->offset + offset;
814
Juan Quintela6f37bb82017-03-13 19:26:29 +0100815 if (block == rs->last_sent_block) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200816 offset |= RAM_SAVE_FLAG_CONTINUE;
817 }
818 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
819 if (ret != RAM_SAVE_CONTROL_DELAYED) {
820 if (bytes_xmit > 0) {
821 acct_info.norm_pages++;
822 } else if (bytes_xmit == 0) {
823 acct_info.dup_pages++;
824 }
825 }
826 } else {
827 pages = save_zero_page(f, block, offset, p, bytes_transferred);
828 if (pages > 0) {
829 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
830 * page would be stale
831 */
Juan Quintela6f37bb82017-03-13 19:26:29 +0100832 xbzrle_cache_zero_page(rs, current_addr);
Pavel Butsykin53f09a12017-02-03 18:23:20 +0300833 ram_release_pages(ms, block->idstr, pss->offset, pages);
Juan Quintela6f37bb82017-03-13 19:26:29 +0100834 } else if (!rs->ram_bulk_stage &&
Pavel Butsykin9eb14762017-02-03 18:23:19 +0300835 !migration_in_postcopy(ms) && migrate_use_xbzrle()) {
Juan Quintela5a987732017-03-13 19:39:02 +0100836 pages = save_xbzrle_page(rs, f, &p, current_addr, block,
Juan Quintela56e93d22015-05-07 19:33:31 +0200837 offset, last_stage, bytes_transferred);
838 if (!last_stage) {
839 /* Can't send this cached data async, since the cache page
840 * might get updated before it gets to the wire
841 */
842 send_async = false;
843 }
844 }
845 }
846
847 /* XBZRLE overflow or normal page */
848 if (pages == -1) {
849 *bytes_transferred += save_page_header(f, block,
850 offset | RAM_SAVE_FLAG_PAGE);
851 if (send_async) {
Pavel Butsykin53f09a12017-02-03 18:23:20 +0300852 qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE,
853 migrate_release_ram() &
854 migration_in_postcopy(ms));
Juan Quintela56e93d22015-05-07 19:33:31 +0200855 } else {
856 qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
857 }
858 *bytes_transferred += TARGET_PAGE_SIZE;
859 pages = 1;
860 acct_info.norm_pages++;
861 }
862
863 XBZRLE_cache_unlock();
864
865 return pages;
866}
867
Liang Lia7a9a882016-05-05 15:32:57 +0800868static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
869 ram_addr_t offset)
Juan Quintela56e93d22015-05-07 19:33:31 +0200870{
871 int bytes_sent, blen;
Liang Lia7a9a882016-05-05 15:32:57 +0800872 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
Juan Quintela56e93d22015-05-07 19:33:31 +0200873
Liang Lia7a9a882016-05-05 15:32:57 +0800874 bytes_sent = save_page_header(f, block, offset |
Juan Quintela56e93d22015-05-07 19:33:31 +0200875 RAM_SAVE_FLAG_COMPRESS_PAGE);
Liang Lia7a9a882016-05-05 15:32:57 +0800876 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
Juan Quintela56e93d22015-05-07 19:33:31 +0200877 migrate_compress_level());
Liang Lib3be2892016-05-05 15:32:54 +0800878 if (blen < 0) {
879 bytes_sent = 0;
880 qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
881 error_report("compressed data failed!");
882 } else {
883 bytes_sent += blen;
Pavel Butsykin53f09a12017-02-03 18:23:20 +0300884 ram_release_pages(migrate_get_current(), block->idstr,
885 offset & TARGET_PAGE_MASK, 1);
Liang Lib3be2892016-05-05 15:32:54 +0800886 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200887
888 return bytes_sent;
889}
890
Juan Quintela56e93d22015-05-07 19:33:31 +0200891static uint64_t bytes_transferred;
892
893static void flush_compressed_data(QEMUFile *f)
894{
895 int idx, len, thread_count;
896
897 if (!migrate_use_compression()) {
898 return;
899 }
900 thread_count = migrate_compress_threads();
Liang Lia7a9a882016-05-05 15:32:57 +0800901
Liang Li0d9f9a52016-05-05 15:32:59 +0800902 qemu_mutex_lock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200903 for (idx = 0; idx < thread_count; idx++) {
Liang Lia7a9a882016-05-05 15:32:57 +0800904 while (!comp_param[idx].done) {
Liang Li0d9f9a52016-05-05 15:32:59 +0800905 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200906 }
Liang Lia7a9a882016-05-05 15:32:57 +0800907 }
Liang Li0d9f9a52016-05-05 15:32:59 +0800908 qemu_mutex_unlock(&comp_done_lock);
Liang Lia7a9a882016-05-05 15:32:57 +0800909
910 for (idx = 0; idx < thread_count; idx++) {
911 qemu_mutex_lock(&comp_param[idx].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +0800912 if (!comp_param[idx].quit) {
Juan Quintela56e93d22015-05-07 19:33:31 +0200913 len = qemu_put_qemu_file(f, comp_param[idx].file);
914 bytes_transferred += len;
915 }
Liang Lia7a9a882016-05-05 15:32:57 +0800916 qemu_mutex_unlock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200917 }
918}
919
920static inline void set_compress_params(CompressParam *param, RAMBlock *block,
921 ram_addr_t offset)
922{
923 param->block = block;
924 param->offset = offset;
925}
926
927static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block,
928 ram_addr_t offset,
929 uint64_t *bytes_transferred)
930{
931 int idx, thread_count, bytes_xmit = -1, pages = -1;
932
933 thread_count = migrate_compress_threads();
Liang Li0d9f9a52016-05-05 15:32:59 +0800934 qemu_mutex_lock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200935 while (true) {
936 for (idx = 0; idx < thread_count; idx++) {
937 if (comp_param[idx].done) {
Liang Lia7a9a882016-05-05 15:32:57 +0800938 comp_param[idx].done = false;
Juan Quintela56e93d22015-05-07 19:33:31 +0200939 bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file);
Liang Lia7a9a882016-05-05 15:32:57 +0800940 qemu_mutex_lock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200941 set_compress_params(&comp_param[idx], block, offset);
Liang Lia7a9a882016-05-05 15:32:57 +0800942 qemu_cond_signal(&comp_param[idx].cond);
943 qemu_mutex_unlock(&comp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +0200944 pages = 1;
945 acct_info.norm_pages++;
946 *bytes_transferred += bytes_xmit;
947 break;
948 }
949 }
950 if (pages > 0) {
951 break;
952 } else {
Liang Li0d9f9a52016-05-05 15:32:59 +0800953 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200954 }
955 }
Liang Li0d9f9a52016-05-05 15:32:59 +0800956 qemu_mutex_unlock(&comp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +0200957
958 return pages;
959}
960
961/**
962 * ram_save_compressed_page: compress the given page and send it to the stream
963 *
Juan Quintela3d0684b2017-03-23 15:06:39 +0100964 * Returns the number of pages written.
Juan Quintela56e93d22015-05-07 19:33:31 +0200965 *
Juan Quintela6f37bb82017-03-13 19:26:29 +0100966 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +0100967 * @ms: current migration state
Juan Quintela56e93d22015-05-07 19:33:31 +0200968 * @f: QEMUFile where to send the data
969 * @block: block that contains the page we want to send
970 * @offset: offset inside the block for the page
971 * @last_stage: if we are at the completion stage
972 * @bytes_transferred: increase it with the number of transferred bytes
973 */
Juan Quintela6f37bb82017-03-13 19:26:29 +0100974static int ram_save_compressed_page(RAMState *rs, MigrationState *ms,
975 QEMUFile *f,
Pavel Butsykin9eb14762017-02-03 18:23:19 +0300976 PageSearchStatus *pss, bool last_stage,
Juan Quintela56e93d22015-05-07 19:33:31 +0200977 uint64_t *bytes_transferred)
978{
979 int pages = -1;
Liang Lifc504382016-05-05 15:32:55 +0800980 uint64_t bytes_xmit = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +0200981 uint8_t *p;
Liang Lifc504382016-05-05 15:32:55 +0800982 int ret, blen;
zhanghailianga08f6892016-01-15 11:37:44 +0800983 RAMBlock *block = pss->block;
984 ram_addr_t offset = pss->offset;
Juan Quintela56e93d22015-05-07 19:33:31 +0200985
Dr. David Alan Gilbert2f68e392015-08-13 11:51:30 +0100986 p = block->host + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +0200987
Juan Quintela56e93d22015-05-07 19:33:31 +0200988 ret = ram_control_save_page(f, block->offset,
989 offset, TARGET_PAGE_SIZE, &bytes_xmit);
990 if (bytes_xmit) {
991 *bytes_transferred += bytes_xmit;
992 pages = 1;
993 }
Juan Quintela56e93d22015-05-07 19:33:31 +0200994 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
995 if (ret != RAM_SAVE_CONTROL_DELAYED) {
996 if (bytes_xmit > 0) {
997 acct_info.norm_pages++;
998 } else if (bytes_xmit == 0) {
999 acct_info.dup_pages++;
1000 }
1001 }
1002 } else {
1003 /* When starting the process of a new block, the first page of
1004 * the block should be sent out before other pages in the same
1005 * block, and all the pages in last block should have been sent
1006 * out, keeping this order is important, because the 'cont' flag
1007 * is used to avoid resending the block name.
1008 */
Juan Quintela6f37bb82017-03-13 19:26:29 +01001009 if (block != rs->last_sent_block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001010 flush_compressed_data(f);
1011 pages = save_zero_page(f, block, offset, p, bytes_transferred);
1012 if (pages == -1) {
Liang Lifc504382016-05-05 15:32:55 +08001013 /* Make sure the first page is sent out before other pages */
1014 bytes_xmit = save_page_header(f, block, offset |
1015 RAM_SAVE_FLAG_COMPRESS_PAGE);
1016 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
1017 migrate_compress_level());
1018 if (blen > 0) {
1019 *bytes_transferred += bytes_xmit + blen;
Liang Lib3be2892016-05-05 15:32:54 +08001020 acct_info.norm_pages++;
Liang Lib3be2892016-05-05 15:32:54 +08001021 pages = 1;
Liang Lifc504382016-05-05 15:32:55 +08001022 } else {
1023 qemu_file_set_error(f, blen);
1024 error_report("compressed data failed!");
Liang Lib3be2892016-05-05 15:32:54 +08001025 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001026 }
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001027 if (pages > 0) {
1028 ram_release_pages(ms, block->idstr, pss->offset, pages);
1029 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001030 } else {
Liang Lifc504382016-05-05 15:32:55 +08001031 offset |= RAM_SAVE_FLAG_CONTINUE;
Juan Quintela56e93d22015-05-07 19:33:31 +02001032 pages = save_zero_page(f, block, offset, p, bytes_transferred);
1033 if (pages == -1) {
1034 pages = compress_page_with_multi_thread(f, block, offset,
1035 bytes_transferred);
Pavel Butsykin53f09a12017-02-03 18:23:20 +03001036 } else {
1037 ram_release_pages(ms, block->idstr, pss->offset, pages);
Juan Quintela56e93d22015-05-07 19:33:31 +02001038 }
1039 }
1040 }
1041
1042 return pages;
1043}
1044
Juan Quintela3d0684b2017-03-23 15:06:39 +01001045/**
1046 * find_dirty_block: find the next dirty page and update any state
1047 * associated with the search process.
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001048 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001049 * Returns if a page is found
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001050 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001051 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001052 * @f: QEMUFile where to send the data
1053 * @pss: data about the state of the current dirty page scan
1054 * @again: set to false if the search has scanned the whole of RAM
1055 * @ram_addr_abs: pointer into which to store the address of the dirty page
1056 * within the global ram_addr space
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001057 */
Juan Quintela6f37bb82017-03-13 19:26:29 +01001058static bool find_dirty_block(RAMState *rs, QEMUFile *f, PageSearchStatus *pss,
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001059 bool *again, ram_addr_t *ram_addr_abs)
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001060{
Juan Quintela6f37bb82017-03-13 19:26:29 +01001061 pss->offset = migration_bitmap_find_dirty(rs, pss->block, pss->offset,
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001062 ram_addr_abs);
Juan Quintela6f37bb82017-03-13 19:26:29 +01001063 if (pss->complete_round && pss->block == rs->last_seen_block &&
1064 pss->offset >= rs->last_offset) {
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001065 /*
1066 * We've been once around the RAM and haven't found anything.
1067 * Give up.
1068 */
1069 *again = false;
1070 return false;
1071 }
1072 if (pss->offset >= pss->block->used_length) {
1073 /* Didn't find anything in this RAM Block */
1074 pss->offset = 0;
1075 pss->block = QLIST_NEXT_RCU(pss->block, next);
1076 if (!pss->block) {
1077 /* Hit the end of the list */
1078 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1079 /* Flag that we've looped */
1080 pss->complete_round = true;
Juan Quintela6f37bb82017-03-13 19:26:29 +01001081 rs->ram_bulk_stage = false;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001082 if (migrate_use_xbzrle()) {
1083 /* If xbzrle is on, stop using the data compression at this
1084 * point. In theory, xbzrle can do better than compression.
1085 */
1086 flush_compressed_data(f);
1087 compression_switch = false;
1088 }
1089 }
1090 /* Didn't find anything this time, but try again on the new block */
1091 *again = true;
1092 return false;
1093 } else {
1094 /* Can go around again, but... */
1095 *again = true;
1096 /* We've found something so probably don't need to */
1097 return true;
1098 }
1099}
1100
Juan Quintela3d0684b2017-03-23 15:06:39 +01001101/**
1102 * unqueue_page: gets a page of the queue
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001103 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001104 * Helper for 'get_queued_page' - gets a page off the queue
1105 *
1106 * Returns the block of the page (or NULL if none available)
1107 *
1108 * @ms: current migration state
1109 * @offset: used to return the offset within the RAMBlock
1110 * @ram_addr_abs: pointer into which to store the address of the dirty page
1111 * within the global ram_addr space
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001112 */
1113static RAMBlock *unqueue_page(MigrationState *ms, ram_addr_t *offset,
1114 ram_addr_t *ram_addr_abs)
1115{
1116 RAMBlock *block = NULL;
1117
1118 qemu_mutex_lock(&ms->src_page_req_mutex);
1119 if (!QSIMPLEQ_EMPTY(&ms->src_page_requests)) {
1120 struct MigrationSrcPageRequest *entry =
1121 QSIMPLEQ_FIRST(&ms->src_page_requests);
1122 block = entry->rb;
1123 *offset = entry->offset;
1124 *ram_addr_abs = (entry->offset + entry->rb->offset) &
1125 TARGET_PAGE_MASK;
1126
1127 if (entry->len > TARGET_PAGE_SIZE) {
1128 entry->len -= TARGET_PAGE_SIZE;
1129 entry->offset += TARGET_PAGE_SIZE;
1130 } else {
1131 memory_region_unref(block->mr);
1132 QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1133 g_free(entry);
1134 }
1135 }
1136 qemu_mutex_unlock(&ms->src_page_req_mutex);
1137
1138 return block;
1139}
1140
Juan Quintela3d0684b2017-03-23 15:06:39 +01001141/**
1142 * get_queued_page: unqueue a page from the postocpy requests
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001143 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001144 * Skips pages that are already sent (!dirty)
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001145 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001146 * Returns if a queued page is found
1147 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001148 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001149 * @ms: current migration state
1150 * @pss: data about the state of the current dirty page scan
1151 * @ram_addr_abs: pointer into which to store the address of the dirty page
1152 * within the global ram_addr space
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001153 */
Juan Quintela6f37bb82017-03-13 19:26:29 +01001154static bool get_queued_page(RAMState *rs, MigrationState *ms,
1155 PageSearchStatus *pss,
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001156 ram_addr_t *ram_addr_abs)
1157{
1158 RAMBlock *block;
1159 ram_addr_t offset;
1160 bool dirty;
1161
1162 do {
1163 block = unqueue_page(ms, &offset, ram_addr_abs);
1164 /*
1165 * We're sending this page, and since it's postcopy nothing else
1166 * will dirty it, and we must make sure it doesn't get sent again
1167 * even if this queue request was received after the background
1168 * search already sent it.
1169 */
1170 if (block) {
1171 unsigned long *bitmap;
1172 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1173 dirty = test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, bitmap);
1174 if (!dirty) {
1175 trace_get_queued_page_not_dirty(
1176 block->idstr, (uint64_t)offset,
1177 (uint64_t)*ram_addr_abs,
1178 test_bit(*ram_addr_abs >> TARGET_PAGE_BITS,
1179 atomic_rcu_read(&migration_bitmap_rcu)->unsentmap));
1180 } else {
1181 trace_get_queued_page(block->idstr,
1182 (uint64_t)offset,
1183 (uint64_t)*ram_addr_abs);
1184 }
1185 }
1186
1187 } while (block && !dirty);
1188
1189 if (block) {
1190 /*
1191 * As soon as we start servicing pages out of order, then we have
1192 * to kill the bulk stage, since the bulk stage assumes
1193 * in (migration_bitmap_find_and_reset_dirty) that every page is
1194 * dirty, that's no longer true.
1195 */
Juan Quintela6f37bb82017-03-13 19:26:29 +01001196 rs->ram_bulk_stage = false;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001197
1198 /*
1199 * We want the background search to continue from the queued page
1200 * since the guest is likely to want other pages near to the page
1201 * it just requested.
1202 */
1203 pss->block = block;
1204 pss->offset = offset;
1205 }
1206
1207 return !!block;
1208}
1209
Juan Quintela56e93d22015-05-07 19:33:31 +02001210/**
Juan Quintela5e58f962017-04-03 22:06:54 +02001211 * migration_page_queue_free: drop any remaining pages in the ram
1212 * request queue
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001213 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001214 * It should be empty at the end anyway, but in error cases there may
1215 * be some left. in case that there is any page left, we drop it.
1216 *
1217 * @ms: current migration state
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001218 */
Juan Quintela5e58f962017-04-03 22:06:54 +02001219void migration_page_queue_free(MigrationState *ms)
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001220{
1221 struct MigrationSrcPageRequest *mspr, *next_mspr;
1222 /* This queue generally should be empty - but in the case of a failed
1223 * migration might have some droppings in.
1224 */
1225 rcu_read_lock();
1226 QSIMPLEQ_FOREACH_SAFE(mspr, &ms->src_page_requests, next_req, next_mspr) {
1227 memory_region_unref(mspr->rb->mr);
1228 QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1229 g_free(mspr);
1230 }
1231 rcu_read_unlock();
1232}
1233
1234/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001235 * ram_save_queue_pages: queue the page for transmission
1236 *
1237 * A request from postcopy destination for example.
1238 *
1239 * Returns zero on success or negative on error
1240 *
1241 * @ms: current migration state
1242 * @rbname: Name of the RAMBLock of the request. NULL means the
1243 * same that last one.
1244 * @start: starting address from the start of the RAMBlock
1245 * @len: length (in bytes) to send
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001246 */
1247int ram_save_queue_pages(MigrationState *ms, const char *rbname,
1248 ram_addr_t start, ram_addr_t len)
1249{
1250 RAMBlock *ramblock;
1251
Dr. David Alan Gilbertd3bf5412016-06-13 12:16:42 +01001252 ms->postcopy_requests++;
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001253 rcu_read_lock();
1254 if (!rbname) {
1255 /* Reuse last RAMBlock */
1256 ramblock = ms->last_req_rb;
1257
1258 if (!ramblock) {
1259 /*
1260 * Shouldn't happen, we can't reuse the last RAMBlock if
1261 * it's the 1st request.
1262 */
1263 error_report("ram_save_queue_pages no previous block");
1264 goto err;
1265 }
1266 } else {
1267 ramblock = qemu_ram_block_by_name(rbname);
1268
1269 if (!ramblock) {
1270 /* We shouldn't be asked for a non-existent RAMBlock */
1271 error_report("ram_save_queue_pages no block '%s'", rbname);
1272 goto err;
1273 }
1274 ms->last_req_rb = ramblock;
1275 }
1276 trace_ram_save_queue_pages(ramblock->idstr, start, len);
1277 if (start+len > ramblock->used_length) {
Juan Quintela9458ad62015-11-10 17:42:05 +01001278 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1279 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001280 __func__, start, len, ramblock->used_length);
1281 goto err;
1282 }
1283
1284 struct MigrationSrcPageRequest *new_entry =
1285 g_malloc0(sizeof(struct MigrationSrcPageRequest));
1286 new_entry->rb = ramblock;
1287 new_entry->offset = start;
1288 new_entry->len = len;
1289
1290 memory_region_ref(ramblock->mr);
1291 qemu_mutex_lock(&ms->src_page_req_mutex);
1292 QSIMPLEQ_INSERT_TAIL(&ms->src_page_requests, new_entry, next_req);
1293 qemu_mutex_unlock(&ms->src_page_req_mutex);
1294 rcu_read_unlock();
1295
1296 return 0;
1297
1298err:
1299 rcu_read_unlock();
1300 return -1;
1301}
1302
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001303/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001304 * ram_save_target_page: save one target page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001305 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001306 * Returns the number of pages written
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001307 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001308 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001309 * @ms: current migration state
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001310 * @f: QEMUFile where to send the data
Juan Quintela3d0684b2017-03-23 15:06:39 +01001311 * @pss: data about the page we want to send
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001312 * @last_stage: if we are at the completion stage
1313 * @bytes_transferred: increase it with the number of transferred bytes
Juan Quintela3d0684b2017-03-23 15:06:39 +01001314 * @dirty_ram_abs: address of the start of the dirty page in ram_addr_t space
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001315 */
Juan Quintela6f37bb82017-03-13 19:26:29 +01001316static int ram_save_target_page(RAMState *rs, MigrationState *ms, QEMUFile *f,
zhanghailianga08f6892016-01-15 11:37:44 +08001317 PageSearchStatus *pss,
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001318 bool last_stage,
1319 uint64_t *bytes_transferred,
1320 ram_addr_t dirty_ram_abs)
1321{
1322 int res = 0;
1323
1324 /* Check the pages is dirty and if it is send it */
1325 if (migration_bitmap_clear_dirty(dirty_ram_abs)) {
1326 unsigned long *unsentmap;
1327 if (compression_switch && migrate_use_compression()) {
Juan Quintela6f37bb82017-03-13 19:26:29 +01001328 res = ram_save_compressed_page(rs, ms, f, pss,
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001329 last_stage,
1330 bytes_transferred);
1331 } else {
Juan Quintela6f37bb82017-03-13 19:26:29 +01001332 res = ram_save_page(rs, ms, f, pss, last_stage,
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001333 bytes_transferred);
1334 }
1335
1336 if (res < 0) {
1337 return res;
1338 }
1339 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1340 if (unsentmap) {
1341 clear_bit(dirty_ram_abs >> TARGET_PAGE_BITS, unsentmap);
1342 }
Dr. David Alan Gilbert3fd3c4b2015-12-10 16:31:46 +00001343 /* Only update last_sent_block if a block was actually sent; xbzrle
1344 * might have decided the page was identical so didn't bother writing
1345 * to the stream.
1346 */
1347 if (res > 0) {
Juan Quintela6f37bb82017-03-13 19:26:29 +01001348 rs->last_sent_block = pss->block;
Dr. David Alan Gilbert3fd3c4b2015-12-10 16:31:46 +00001349 }
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001350 }
1351
1352 return res;
1353}
1354
1355/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001356 * ram_save_host_page: save a whole host page
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001357 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001358 * Starting at *offset send pages up to the end of the current host
1359 * page. It's valid for the initial offset to point into the middle of
1360 * a host page in which case the remainder of the hostpage is sent.
1361 * Only dirty target pages are sent. Note that the host page size may
1362 * be a huge page for this block.
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001363 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001364 * Returns the number of pages written or negative on error
1365 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001366 * @rs: current RAM state
Juan Quintela3d0684b2017-03-23 15:06:39 +01001367 * @ms: current migration state
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001368 * @f: QEMUFile where to send the data
Juan Quintela3d0684b2017-03-23 15:06:39 +01001369 * @pss: data about the page we want to send
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001370 * @last_stage: if we are at the completion stage
1371 * @bytes_transferred: increase it with the number of transferred bytes
1372 * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
1373 */
Juan Quintela6f37bb82017-03-13 19:26:29 +01001374static int ram_save_host_page(RAMState *rs, MigrationState *ms, QEMUFile *f,
zhanghailianga08f6892016-01-15 11:37:44 +08001375 PageSearchStatus *pss,
1376 bool last_stage,
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001377 uint64_t *bytes_transferred,
1378 ram_addr_t dirty_ram_abs)
1379{
1380 int tmppages, pages = 0;
Dr. David Alan Gilbert4c011c32017-02-24 18:28:39 +00001381 size_t pagesize = qemu_ram_pagesize(pss->block);
1382
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001383 do {
Juan Quintela6f37bb82017-03-13 19:26:29 +01001384 tmppages = ram_save_target_page(rs, ms, f, pss, last_stage,
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001385 bytes_transferred, dirty_ram_abs);
1386 if (tmppages < 0) {
1387 return tmppages;
1388 }
1389
1390 pages += tmppages;
zhanghailianga08f6892016-01-15 11:37:44 +08001391 pss->offset += TARGET_PAGE_SIZE;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001392 dirty_ram_abs += TARGET_PAGE_SIZE;
Dr. David Alan Gilbert4c011c32017-02-24 18:28:39 +00001393 } while (pss->offset & (pagesize - 1));
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001394
1395 /* The offset we leave with is the last one we looked at */
zhanghailianga08f6892016-01-15 11:37:44 +08001396 pss->offset -= TARGET_PAGE_SIZE;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001397 return pages;
1398}
Dr. David Alan Gilbert6c595cd2015-11-05 18:11:08 +00001399
1400/**
Juan Quintela3d0684b2017-03-23 15:06:39 +01001401 * ram_find_and_save_block: finds a dirty page and sends it to f
Juan Quintela56e93d22015-05-07 19:33:31 +02001402 *
1403 * Called within an RCU critical section.
1404 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001405 * Returns the number of pages written where zero means no dirty pages
Juan Quintela56e93d22015-05-07 19:33:31 +02001406 *
Juan Quintela6f37bb82017-03-13 19:26:29 +01001407 * @rs: current RAM state
Juan Quintela56e93d22015-05-07 19:33:31 +02001408 * @f: QEMUFile where to send the data
1409 * @last_stage: if we are at the completion stage
1410 * @bytes_transferred: increase it with the number of transferred bytes
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001411 *
1412 * On systems where host-page-size > target-page-size it will send all the
1413 * pages in a host page that are dirty.
Juan Quintela56e93d22015-05-07 19:33:31 +02001414 */
1415
Juan Quintela6f37bb82017-03-13 19:26:29 +01001416static int ram_find_and_save_block(RAMState *rs, QEMUFile *f, bool last_stage,
Juan Quintela56e93d22015-05-07 19:33:31 +02001417 uint64_t *bytes_transferred)
1418{
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01001419 PageSearchStatus pss;
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001420 MigrationState *ms = migrate_get_current();
Juan Quintela56e93d22015-05-07 19:33:31 +02001421 int pages = 0;
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001422 bool again, found;
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001423 ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
1424 ram_addr_t space */
Juan Quintela56e93d22015-05-07 19:33:31 +02001425
Ashijeet Acharya0827b9e2017-02-08 19:58:45 +05301426 /* No dirty page as there is zero RAM */
1427 if (!ram_bytes_total()) {
1428 return pages;
1429 }
1430
Juan Quintela6f37bb82017-03-13 19:26:29 +01001431 pss.block = rs->last_seen_block;
1432 pss.offset = rs->last_offset;
Dr. David Alan Gilbertb8fb8cb2015-09-23 15:27:10 +01001433 pss.complete_round = false;
1434
1435 if (!pss.block) {
1436 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1437 }
Juan Quintela56e93d22015-05-07 19:33:31 +02001438
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001439 do {
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001440 again = true;
Juan Quintela6f37bb82017-03-13 19:26:29 +01001441 found = get_queued_page(rs, ms, &pss, &dirty_ram_abs);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001442
1443 if (!found) {
1444 /* priority queue empty, so just search for something dirty */
Juan Quintela6f37bb82017-03-13 19:26:29 +01001445 found = find_dirty_block(rs, f, &pss, &again, &dirty_ram_abs);
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001446 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001447
1448 if (found) {
Juan Quintela6f37bb82017-03-13 19:26:29 +01001449 pages = ram_save_host_page(rs, ms, f, &pss,
Dr. David Alan Gilberta82d5932015-11-05 18:11:09 +00001450 last_stage, bytes_transferred,
1451 dirty_ram_abs);
Juan Quintela56e93d22015-05-07 19:33:31 +02001452 }
Dr. David Alan Gilbertb9e60922015-09-23 15:27:11 +01001453 } while (!pages && again);
Juan Quintela56e93d22015-05-07 19:33:31 +02001454
Juan Quintela6f37bb82017-03-13 19:26:29 +01001455 rs->last_seen_block = pss.block;
1456 rs->last_offset = pss.offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02001457
1458 return pages;
1459}
1460
1461void acct_update_position(QEMUFile *f, size_t size, bool zero)
1462{
1463 uint64_t pages = size / TARGET_PAGE_SIZE;
1464 if (zero) {
1465 acct_info.dup_pages += pages;
1466 } else {
1467 acct_info.norm_pages += pages;
1468 bytes_transferred += size;
1469 qemu_update_position(f, size);
1470 }
1471}
1472
1473static ram_addr_t ram_save_remaining(void)
1474{
1475 return migration_dirty_pages;
1476}
1477
1478uint64_t ram_bytes_remaining(void)
1479{
1480 return ram_save_remaining() * TARGET_PAGE_SIZE;
1481}
1482
1483uint64_t ram_bytes_transferred(void)
1484{
1485 return bytes_transferred;
1486}
1487
1488uint64_t ram_bytes_total(void)
1489{
1490 RAMBlock *block;
1491 uint64_t total = 0;
1492
1493 rcu_read_lock();
1494 QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
1495 total += block->used_length;
1496 rcu_read_unlock();
1497 return total;
1498}
1499
1500void free_xbzrle_decoded_buf(void)
1501{
1502 g_free(xbzrle_decoded_buf);
1503 xbzrle_decoded_buf = NULL;
1504}
1505
Denis V. Lunev60be6342015-09-28 14:41:58 +03001506static void migration_bitmap_free(struct BitmapRcu *bmap)
1507{
1508 g_free(bmap->bmap);
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001509 g_free(bmap->unsentmap);
Denis V. Lunev60be6342015-09-28 14:41:58 +03001510 g_free(bmap);
1511}
1512
Liang Li6ad2a212015-11-02 15:37:03 +08001513static void ram_migration_cleanup(void *opaque)
Juan Quintela56e93d22015-05-07 19:33:31 +02001514{
Li Zhijian2ff64032015-07-02 20:18:05 +08001515 /* caller have hold iothread lock or is in a bh, so there is
1516 * no writing race against this migration_bitmap
1517 */
Denis V. Lunev60be6342015-09-28 14:41:58 +03001518 struct BitmapRcu *bitmap = migration_bitmap_rcu;
1519 atomic_rcu_set(&migration_bitmap_rcu, NULL);
Li Zhijian2ff64032015-07-02 20:18:05 +08001520 if (bitmap) {
Juan Quintela56e93d22015-05-07 19:33:31 +02001521 memory_global_dirty_log_stop();
Denis V. Lunev60be6342015-09-28 14:41:58 +03001522 call_rcu(bitmap, migration_bitmap_free, rcu);
Juan Quintela56e93d22015-05-07 19:33:31 +02001523 }
1524
1525 XBZRLE_cache_lock();
1526 if (XBZRLE.cache) {
1527 cache_fini(XBZRLE.cache);
1528 g_free(XBZRLE.encoded_buf);
1529 g_free(XBZRLE.current_buf);
Vijaya Kumar Kadb65de2016-10-24 16:26:49 +01001530 g_free(ZERO_TARGET_PAGE);
Juan Quintela56e93d22015-05-07 19:33:31 +02001531 XBZRLE.cache = NULL;
1532 XBZRLE.encoded_buf = NULL;
1533 XBZRLE.current_buf = NULL;
1534 }
1535 XBZRLE_cache_unlock();
1536}
1537
Juan Quintela6f37bb82017-03-13 19:26:29 +01001538static void ram_state_reset(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02001539{
Juan Quintela6f37bb82017-03-13 19:26:29 +01001540 rs->last_seen_block = NULL;
1541 rs->last_sent_block = NULL;
1542 rs->last_offset = 0;
1543 rs->last_version = ram_list.version;
1544 rs->ram_bulk_stage = true;
Juan Quintela56e93d22015-05-07 19:33:31 +02001545}
1546
1547#define MAX_WAIT 50 /* ms, half buffered_file limit */
1548
Li Zhijiandd631692015-07-02 20:18:06 +08001549void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
1550{
1551 /* called in qemu main thread, so there is
1552 * no writing race against this migration_bitmap
1553 */
Denis V. Lunev60be6342015-09-28 14:41:58 +03001554 if (migration_bitmap_rcu) {
1555 struct BitmapRcu *old_bitmap = migration_bitmap_rcu, *bitmap;
1556 bitmap = g_new(struct BitmapRcu, 1);
1557 bitmap->bmap = bitmap_new(new);
Li Zhijiandd631692015-07-02 20:18:06 +08001558
1559 /* prevent migration_bitmap content from being set bit
1560 * by migration_bitmap_sync_range() at the same time.
1561 * it is safe to migration if migration_bitmap is cleared bit
1562 * at the same time.
1563 */
1564 qemu_mutex_lock(&migration_bitmap_mutex);
Denis V. Lunev60be6342015-09-28 14:41:58 +03001565 bitmap_copy(bitmap->bmap, old_bitmap->bmap, old);
1566 bitmap_set(bitmap->bmap, old, new - old);
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00001567
1568 /* We don't have a way to safely extend the sentmap
1569 * with RCU; so mark it as missing, entry to postcopy
1570 * will fail.
1571 */
1572 bitmap->unsentmap = NULL;
1573
Denis V. Lunev60be6342015-09-28 14:41:58 +03001574 atomic_rcu_set(&migration_bitmap_rcu, bitmap);
Li Zhijiandd631692015-07-02 20:18:06 +08001575 qemu_mutex_unlock(&migration_bitmap_mutex);
1576 migration_dirty_pages += new - old;
Denis V. Lunev60be6342015-09-28 14:41:58 +03001577 call_rcu(old_bitmap, migration_bitmap_free, rcu);
Li Zhijiandd631692015-07-02 20:18:06 +08001578 }
1579}
Juan Quintela56e93d22015-05-07 19:33:31 +02001580
Dr. David Alan Gilbert4f2e4252015-11-05 18:10:38 +00001581/*
1582 * 'expected' is the value you expect the bitmap mostly to be full
1583 * of; it won't bother printing lines that are all this value.
1584 * If 'todump' is null the migration bitmap is dumped.
1585 */
1586void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
1587{
1588 int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
1589
1590 int64_t cur;
1591 int64_t linelen = 128;
1592 char linebuf[129];
1593
1594 if (!todump) {
1595 todump = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1596 }
1597
1598 for (cur = 0; cur < ram_pages; cur += linelen) {
1599 int64_t curb;
1600 bool found = false;
1601 /*
1602 * Last line; catch the case where the line length
1603 * is longer than remaining ram
1604 */
1605 if (cur + linelen > ram_pages) {
1606 linelen = ram_pages - cur;
1607 }
1608 for (curb = 0; curb < linelen; curb++) {
1609 bool thisbit = test_bit(cur + curb, todump);
1610 linebuf[curb] = thisbit ? '1' : '.';
1611 found = found || (thisbit != expected);
1612 }
1613 if (found) {
1614 linebuf[curb] = '\0';
1615 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
1616 }
1617 }
1618}
1619
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001620/* **** functions for postcopy ***** */
1621
Pavel Butsykinced1c612017-02-03 18:23:21 +03001622void ram_postcopy_migrated_memory_release(MigrationState *ms)
1623{
1624 struct RAMBlock *block;
1625 unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1626
1627 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1628 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1629 unsigned long range = first + (block->used_length >> TARGET_PAGE_BITS);
1630 unsigned long run_start = find_next_zero_bit(bitmap, range, first);
1631
1632 while (run_start < range) {
1633 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
1634 ram_discard_range(NULL, block->idstr, run_start << TARGET_PAGE_BITS,
1635 (run_end - run_start) << TARGET_PAGE_BITS);
1636 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
1637 }
1638 }
1639}
1640
Juan Quintela3d0684b2017-03-23 15:06:39 +01001641/**
1642 * postcopy_send_discard_bm_ram: discard a RAMBlock
1643 *
1644 * Returns zero on success
1645 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001646 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1647 * Note: At this point the 'unsentmap' is the processed bitmap combined
1648 * with the dirtymap; so a '1' means it's either dirty or unsent.
Juan Quintela3d0684b2017-03-23 15:06:39 +01001649 *
1650 * @ms: current migration state
1651 * @pds: state for postcopy
1652 * @start: RAMBlock starting page
1653 * @length: RAMBlock size
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001654 */
1655static int postcopy_send_discard_bm_ram(MigrationState *ms,
1656 PostcopyDiscardState *pds,
1657 unsigned long start,
1658 unsigned long length)
1659{
1660 unsigned long end = start + length; /* one after the end */
1661 unsigned long current;
1662 unsigned long *unsentmap;
1663
1664 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1665 for (current = start; current < end; ) {
1666 unsigned long one = find_next_bit(unsentmap, end, current);
1667
1668 if (one <= end) {
1669 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1670 unsigned long discard_length;
1671
1672 if (zero >= end) {
1673 discard_length = end - one;
1674 } else {
1675 discard_length = zero - one;
1676 }
Dr. David Alan Gilbertd688c622016-06-13 12:16:40 +01001677 if (discard_length) {
1678 postcopy_discard_send_range(ms, pds, one, discard_length);
1679 }
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001680 current = one + discard_length;
1681 } else {
1682 current = one;
1683 }
1684 }
1685
1686 return 0;
1687}
1688
Juan Quintela3d0684b2017-03-23 15:06:39 +01001689/**
1690 * postcopy_each_ram_send_discard: discard all RAMBlocks
1691 *
1692 * Returns 0 for success or negative for error
1693 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001694 * Utility for the outgoing postcopy code.
1695 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1696 * passing it bitmap indexes and name.
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001697 * (qemu_ram_foreach_block ends up passing unscaled lengths
1698 * which would mean postcopy code would have to deal with target page)
Juan Quintela3d0684b2017-03-23 15:06:39 +01001699 *
1700 * @ms: current migration state
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001701 */
1702static int postcopy_each_ram_send_discard(MigrationState *ms)
1703{
1704 struct RAMBlock *block;
1705 int ret;
1706
1707 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1708 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1709 PostcopyDiscardState *pds = postcopy_discard_send_init(ms,
1710 first,
1711 block->idstr);
1712
1713 /*
1714 * Postcopy sends chunks of bitmap over the wire, but it
1715 * just needs indexes at this point, avoids it having
1716 * target page specific code.
1717 */
1718 ret = postcopy_send_discard_bm_ram(ms, pds, first,
1719 block->used_length >> TARGET_PAGE_BITS);
1720 postcopy_discard_send_finish(ms, pds);
1721 if (ret) {
1722 return ret;
1723 }
1724 }
1725
1726 return 0;
1727}
1728
Juan Quintela3d0684b2017-03-23 15:06:39 +01001729/**
1730 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001731 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001732 * Helper for postcopy_chunk_hostpages; it's called twice to
1733 * canonicalize the two bitmaps, that are similar, but one is
1734 * inverted.
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001735 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001736 * Postcopy requires that all target pages in a hostpage are dirty or
1737 * clean, not a mix. This function canonicalizes the bitmaps.
1738 *
1739 * @ms: current migration state
1740 * @unsent_pass: if true we need to canonicalize partially unsent host pages
1741 * otherwise we need to canonicalize partially dirty host pages
1742 * @block: block that contains the page we want to canonicalize
1743 * @pds: state for postcopy
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001744 */
1745static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1746 RAMBlock *block,
1747 PostcopyDiscardState *pds)
1748{
1749 unsigned long *bitmap;
1750 unsigned long *unsentmap;
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00001751 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001752 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1753 unsigned long len = block->used_length >> TARGET_PAGE_BITS;
1754 unsigned long last = first + (len - 1);
1755 unsigned long run_start;
1756
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00001757 if (block->page_size == TARGET_PAGE_SIZE) {
1758 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
1759 return;
1760 }
1761
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001762 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1763 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1764
1765 if (unsent_pass) {
1766 /* Find a sent page */
1767 run_start = find_next_zero_bit(unsentmap, last + 1, first);
1768 } else {
1769 /* Find a dirty page */
1770 run_start = find_next_bit(bitmap, last + 1, first);
1771 }
1772
1773 while (run_start <= last) {
1774 bool do_fixup = false;
1775 unsigned long fixup_start_addr;
1776 unsigned long host_offset;
1777
1778 /*
1779 * If the start of this run of pages is in the middle of a host
1780 * page, then we need to fixup this host page.
1781 */
1782 host_offset = run_start % host_ratio;
1783 if (host_offset) {
1784 do_fixup = true;
1785 run_start -= host_offset;
1786 fixup_start_addr = run_start;
1787 /* For the next pass */
1788 run_start = run_start + host_ratio;
1789 } else {
1790 /* Find the end of this run */
1791 unsigned long run_end;
1792 if (unsent_pass) {
1793 run_end = find_next_bit(unsentmap, last + 1, run_start + 1);
1794 } else {
1795 run_end = find_next_zero_bit(bitmap, last + 1, run_start + 1);
1796 }
1797 /*
1798 * If the end isn't at the start of a host page, then the
1799 * run doesn't finish at the end of a host page
1800 * and we need to discard.
1801 */
1802 host_offset = run_end % host_ratio;
1803 if (host_offset) {
1804 do_fixup = true;
1805 fixup_start_addr = run_end - host_offset;
1806 /*
1807 * This host page has gone, the next loop iteration starts
1808 * from after the fixup
1809 */
1810 run_start = fixup_start_addr + host_ratio;
1811 } else {
1812 /*
1813 * No discards on this iteration, next loop starts from
1814 * next sent/dirty page
1815 */
1816 run_start = run_end + 1;
1817 }
1818 }
1819
1820 if (do_fixup) {
1821 unsigned long page;
1822
1823 /* Tell the destination to discard this page */
1824 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1825 /* For the unsent_pass we:
1826 * discard partially sent pages
1827 * For the !unsent_pass (dirty) we:
1828 * discard partially dirty pages that were sent
1829 * (any partially sent pages were already discarded
1830 * by the previous unsent_pass)
1831 */
1832 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1833 host_ratio);
1834 }
1835
1836 /* Clean up the bitmap */
1837 for (page = fixup_start_addr;
1838 page < fixup_start_addr + host_ratio; page++) {
1839 /* All pages in this host page are now not sent */
1840 set_bit(page, unsentmap);
1841
1842 /*
1843 * Remark them as dirty, updating the count for any pages
1844 * that weren't previously dirty.
1845 */
1846 migration_dirty_pages += !test_and_set_bit(page, bitmap);
1847 }
1848 }
1849
1850 if (unsent_pass) {
1851 /* Find the next sent page for the next iteration */
1852 run_start = find_next_zero_bit(unsentmap, last + 1,
1853 run_start);
1854 } else {
1855 /* Find the next dirty page for the next iteration */
1856 run_start = find_next_bit(bitmap, last + 1, run_start);
1857 }
1858 }
1859}
1860
Juan Quintela3d0684b2017-03-23 15:06:39 +01001861/**
1862 * postcopy_chuck_hostpages: discrad any partially sent host page
1863 *
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001864 * Utility for the outgoing postcopy code.
1865 *
1866 * Discard any partially sent host-page size chunks, mark any partially
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00001867 * dirty host-page size chunks as all dirty. In this case the host-page
1868 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001869 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001870 * Returns zero on success
1871 *
1872 * @ms: current migration state
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001873 */
1874static int postcopy_chunk_hostpages(MigrationState *ms)
1875{
Juan Quintela6f37bb82017-03-13 19:26:29 +01001876 RAMState *rs = &ram_state;
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001877 struct RAMBlock *block;
1878
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001879 /* Easiest way to make sure we don't resume in the middle of a host-page */
Juan Quintela6f37bb82017-03-13 19:26:29 +01001880 rs->last_seen_block = NULL;
1881 rs->last_sent_block = NULL;
1882 rs->last_offset = 0;
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001883
1884 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1885 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1886
1887 PostcopyDiscardState *pds =
1888 postcopy_discard_send_init(ms, first, block->idstr);
1889
1890 /* First pass: Discard all partially sent host pages */
1891 postcopy_chunk_hostpages_pass(ms, true, block, pds);
1892 /*
1893 * Second pass: Ensure that all partially dirty host pages are made
1894 * fully dirty.
1895 */
1896 postcopy_chunk_hostpages_pass(ms, false, block, pds);
1897
1898 postcopy_discard_send_finish(ms, pds);
1899 } /* ram_list loop */
1900
1901 return 0;
1902}
1903
Juan Quintela3d0684b2017-03-23 15:06:39 +01001904/**
1905 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
1906 *
1907 * Returns zero on success
1908 *
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001909 * Transmit the set of pages to be discarded after precopy to the target
1910 * these are pages that:
1911 * a) Have been previously transmitted but are now dirty again
1912 * b) Pages that have never been transmitted, this ensures that
1913 * any pages on the destination that have been mapped by background
1914 * tasks get discarded (transparent huge pages is the specific concern)
1915 * Hopefully this is pretty sparse
Juan Quintela3d0684b2017-03-23 15:06:39 +01001916 *
1917 * @ms: current migration state
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001918 */
1919int ram_postcopy_send_discard_bitmap(MigrationState *ms)
1920{
1921 int ret;
1922 unsigned long *bitmap, *unsentmap;
1923
1924 rcu_read_lock();
1925
1926 /* This should be our last sync, the src is now paused */
Juan Quintela8d820d62017-03-13 19:35:50 +01001927 migration_bitmap_sync(&ram_state);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001928
1929 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1930 if (!unsentmap) {
1931 /* We don't have a safe way to resize the sentmap, so
1932 * if the bitmap was resized it will be NULL at this
1933 * point.
1934 */
1935 error_report("migration ram resized during precopy phase");
1936 rcu_read_unlock();
1937 return -EINVAL;
1938 }
1939
Dr. David Alan Gilbert29c59172017-02-24 18:28:31 +00001940 /* Deal with TPS != HPS and huge pages */
Dr. David Alan Gilbert99e314e2015-11-05 18:11:15 +00001941 ret = postcopy_chunk_hostpages(ms);
1942 if (ret) {
1943 rcu_read_unlock();
1944 return ret;
1945 }
1946
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001947 /*
1948 * Update the unsentmap to be unsentmap = unsentmap | dirty
1949 */
1950 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1951 bitmap_or(unsentmap, unsentmap, bitmap,
1952 last_ram_offset() >> TARGET_PAGE_BITS);
1953
1954
1955 trace_ram_postcopy_send_discard_bitmap();
1956#ifdef DEBUG_POSTCOPY
1957 ram_debug_dump_bitmap(unsentmap, true);
1958#endif
1959
1960 ret = postcopy_each_ram_send_discard(ms);
1961 rcu_read_unlock();
1962
1963 return ret;
1964}
1965
Juan Quintela3d0684b2017-03-23 15:06:39 +01001966/**
1967 * ram_discard_range: discard dirtied pages at the beginning of postcopy
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001968 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001969 * Returns zero on success
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001970 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01001971 * @mis: current migration incoming state
Juan Quintela36449152017-03-23 15:11:59 +01001972 * @rbname: name of the RAMBlock of the request. NULL means the
1973 * same that last one.
Juan Quintela3d0684b2017-03-23 15:06:39 +01001974 * @start: RAMBlock starting page
1975 * @length: RAMBlock size
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001976 */
1977int ram_discard_range(MigrationIncomingState *mis,
Juan Quintela36449152017-03-23 15:11:59 +01001978 const char *rbname,
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001979 uint64_t start, size_t length)
1980{
1981 int ret = -1;
1982
Juan Quintela36449152017-03-23 15:11:59 +01001983 trace_ram_discard_range(rbname, start, length);
Dr. David Alan Gilbertd3a50382017-02-24 18:28:32 +00001984
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001985 rcu_read_lock();
Juan Quintela36449152017-03-23 15:11:59 +01001986 RAMBlock *rb = qemu_ram_block_by_name(rbname);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001987
1988 if (!rb) {
Juan Quintela36449152017-03-23 15:11:59 +01001989 error_report("ram_discard_range: Failed to find block '%s'", rbname);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001990 goto err;
1991 }
1992
Dr. David Alan Gilbertd3a50382017-02-24 18:28:32 +00001993 ret = ram_block_discard_range(rb, start, length);
Dr. David Alan Gilberte0b266f2015-11-05 18:11:02 +00001994
1995err:
1996 rcu_read_unlock();
1997
1998 return ret;
1999}
2000
Juan Quintela6f37bb82017-03-13 19:26:29 +01002001static int ram_save_init_globals(RAMState *rs)
Juan Quintela56e93d22015-05-07 19:33:31 +02002002{
Juan Quintela56e93d22015-05-07 19:33:31 +02002003 int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
2004
Juan Quintela8d820d62017-03-13 19:35:50 +01002005 rs->dirty_rate_high_cnt = 0;
Juan Quintela5a987732017-03-13 19:39:02 +01002006 rs->bitmap_sync_count = 0;
Juan Quintelaf664da82017-03-13 19:44:57 +01002007 migration_bitmap_sync_init(rs);
Li Zhijiandd631692015-07-02 20:18:06 +08002008 qemu_mutex_init(&migration_bitmap_mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02002009
2010 if (migrate_use_xbzrle()) {
2011 XBZRLE_cache_lock();
Vijaya Kumar Kadb65de2016-10-24 16:26:49 +01002012 ZERO_TARGET_PAGE = g_malloc0(TARGET_PAGE_SIZE);
Juan Quintela56e93d22015-05-07 19:33:31 +02002013 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
2014 TARGET_PAGE_SIZE,
2015 TARGET_PAGE_SIZE);
2016 if (!XBZRLE.cache) {
2017 XBZRLE_cache_unlock();
2018 error_report("Error creating cache");
2019 return -1;
2020 }
2021 XBZRLE_cache_unlock();
2022
2023 /* We prefer not to abort if there is no memory */
2024 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
2025 if (!XBZRLE.encoded_buf) {
2026 error_report("Error allocating encoded_buf");
2027 return -1;
2028 }
2029
2030 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
2031 if (!XBZRLE.current_buf) {
2032 error_report("Error allocating current_buf");
2033 g_free(XBZRLE.encoded_buf);
2034 XBZRLE.encoded_buf = NULL;
2035 return -1;
2036 }
2037
2038 acct_clear();
2039 }
2040
Paolo Bonzini49877832016-02-15 19:57:57 +01002041 /* For memory_global_dirty_log_start below. */
2042 qemu_mutex_lock_iothread();
2043
Juan Quintela56e93d22015-05-07 19:33:31 +02002044 qemu_mutex_lock_ramlist();
2045 rcu_read_lock();
2046 bytes_transferred = 0;
Juan Quintela6f37bb82017-03-13 19:26:29 +01002047 ram_state_reset(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02002048
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00002049 migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
Ashijeet Acharya0827b9e2017-02-08 19:58:45 +05302050 /* Skip setting bitmap if there is no RAM */
2051 if (ram_bytes_total()) {
2052 ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
2053 migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
2054 bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
Juan Quintela56e93d22015-05-07 19:33:31 +02002055
Ashijeet Acharya0827b9e2017-02-08 19:58:45 +05302056 if (migrate_postcopy_ram()) {
2057 migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
2058 bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
2059 }
Dr. David Alan Gilbertf3f491f2015-11-05 18:11:01 +00002060 }
2061
Juan Quintela56e93d22015-05-07 19:33:31 +02002062 /*
2063 * Count the total number of pages used by ram blocks not including any
2064 * gaps due to alignment or unplugs.
2065 */
2066 migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
2067
2068 memory_global_dirty_log_start();
Juan Quintela8d820d62017-03-13 19:35:50 +01002069 migration_bitmap_sync(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02002070 qemu_mutex_unlock_ramlist();
Paolo Bonzini49877832016-02-15 19:57:57 +01002071 qemu_mutex_unlock_iothread();
zhanghailianga91246c2016-10-27 14:42:59 +08002072 rcu_read_unlock();
2073
2074 return 0;
2075}
2076
Juan Quintela3d0684b2017-03-23 15:06:39 +01002077/*
2078 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
zhanghailianga91246c2016-10-27 14:42:59 +08002079 * long-running RCU critical section. When rcu-reclaims in the code
2080 * start to become numerous it will be necessary to reduce the
2081 * granularity of these critical sections.
2082 */
2083
Juan Quintela3d0684b2017-03-23 15:06:39 +01002084/**
2085 * ram_save_setup: Setup RAM for migration
2086 *
2087 * Returns zero to indicate success and negative for error
2088 *
2089 * @f: QEMUFile where to send the data
2090 * @opaque: RAMState pointer
2091 */
zhanghailianga91246c2016-10-27 14:42:59 +08002092static int ram_save_setup(QEMUFile *f, void *opaque)
2093{
Juan Quintela6f37bb82017-03-13 19:26:29 +01002094 RAMState *rs = opaque;
zhanghailianga91246c2016-10-27 14:42:59 +08002095 RAMBlock *block;
2096
2097 /* migration has already setup the bitmap, reuse it. */
2098 if (!migration_in_colo_state()) {
Juan Quintela6f37bb82017-03-13 19:26:29 +01002099 if (ram_save_init_globals(rs) < 0) {
zhanghailianga91246c2016-10-27 14:42:59 +08002100 return -1;
2101 }
2102 }
2103
2104 rcu_read_lock();
Juan Quintela56e93d22015-05-07 19:33:31 +02002105
2106 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
2107
2108 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2109 qemu_put_byte(f, strlen(block->idstr));
2110 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2111 qemu_put_be64(f, block->used_length);
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00002112 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
2113 qemu_put_be64(f, block->page_size);
2114 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002115 }
2116
2117 rcu_read_unlock();
2118
2119 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2120 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2121
2122 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2123
2124 return 0;
2125}
2126
Juan Quintela3d0684b2017-03-23 15:06:39 +01002127/**
2128 * ram_save_iterate: iterative stage for migration
2129 *
2130 * Returns zero to indicate success and negative for error
2131 *
2132 * @f: QEMUFile where to send the data
2133 * @opaque: RAMState pointer
2134 */
Juan Quintela56e93d22015-05-07 19:33:31 +02002135static int ram_save_iterate(QEMUFile *f, void *opaque)
2136{
Juan Quintela6f37bb82017-03-13 19:26:29 +01002137 RAMState *rs = opaque;
Juan Quintela56e93d22015-05-07 19:33:31 +02002138 int ret;
2139 int i;
2140 int64_t t0;
Thomas Huth5c903082016-11-04 14:10:17 +01002141 int done = 0;
Juan Quintela56e93d22015-05-07 19:33:31 +02002142
2143 rcu_read_lock();
Juan Quintela6f37bb82017-03-13 19:26:29 +01002144 if (ram_list.version != rs->last_version) {
2145 ram_state_reset(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02002146 }
2147
2148 /* Read version before ram_list.blocks */
2149 smp_rmb();
2150
2151 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2152
2153 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2154 i = 0;
2155 while ((ret = qemu_file_rate_limit(f)) == 0) {
2156 int pages;
2157
Juan Quintela6f37bb82017-03-13 19:26:29 +01002158 pages = ram_find_and_save_block(rs, f, false, &bytes_transferred);
Juan Quintela56e93d22015-05-07 19:33:31 +02002159 /* no more pages to sent */
2160 if (pages == 0) {
Thomas Huth5c903082016-11-04 14:10:17 +01002161 done = 1;
Juan Quintela56e93d22015-05-07 19:33:31 +02002162 break;
2163 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002164 acct_info.iterations++;
Jason J. Herne070afca2015-09-08 13:12:35 -04002165
Juan Quintela56e93d22015-05-07 19:33:31 +02002166 /* we want to check in the 1st loop, just in case it was the 1st time
2167 and we had to sync the dirty bitmap.
2168 qemu_get_clock_ns() is a bit expensive, so we only check each some
2169 iterations
2170 */
2171 if ((i & 63) == 0) {
2172 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2173 if (t1 > MAX_WAIT) {
Juan Quintela55c44462017-01-23 22:32:05 +01002174 trace_ram_save_iterate_big_wait(t1, i);
Juan Quintela56e93d22015-05-07 19:33:31 +02002175 break;
2176 }
2177 }
2178 i++;
2179 }
2180 flush_compressed_data(f);
2181 rcu_read_unlock();
2182
2183 /*
2184 * Must occur before EOS (or any QEMUFile operation)
2185 * because of RDMA protocol.
2186 */
2187 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2188
2189 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2190 bytes_transferred += 8;
2191
2192 ret = qemu_file_get_error(f);
2193 if (ret < 0) {
2194 return ret;
2195 }
2196
Thomas Huth5c903082016-11-04 14:10:17 +01002197 return done;
Juan Quintela56e93d22015-05-07 19:33:31 +02002198}
2199
Juan Quintela3d0684b2017-03-23 15:06:39 +01002200/**
2201 * ram_save_complete: function called to send the remaining amount of ram
2202 *
2203 * Returns zero to indicate success
2204 *
2205 * Called with iothread lock
2206 *
2207 * @f: QEMUFile where to send the data
2208 * @opaque: RAMState pointer
2209 */
Juan Quintela56e93d22015-05-07 19:33:31 +02002210static int ram_save_complete(QEMUFile *f, void *opaque)
2211{
Juan Quintela6f37bb82017-03-13 19:26:29 +01002212 RAMState *rs = opaque;
2213
Juan Quintela56e93d22015-05-07 19:33:31 +02002214 rcu_read_lock();
2215
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00002216 if (!migration_in_postcopy(migrate_get_current())) {
Juan Quintela8d820d62017-03-13 19:35:50 +01002217 migration_bitmap_sync(rs);
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00002218 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002219
2220 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2221
2222 /* try transferring iterative blocks of memory */
2223
2224 /* flush all remaining blocks regardless of rate limiting */
2225 while (true) {
2226 int pages;
2227
Juan Quintela6f37bb82017-03-13 19:26:29 +01002228 pages = ram_find_and_save_block(rs, f, !migration_in_colo_state(),
zhanghailianga91246c2016-10-27 14:42:59 +08002229 &bytes_transferred);
Juan Quintela56e93d22015-05-07 19:33:31 +02002230 /* no more blocks to sent */
2231 if (pages == 0) {
2232 break;
2233 }
2234 }
2235
2236 flush_compressed_data(f);
2237 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
Juan Quintela56e93d22015-05-07 19:33:31 +02002238
2239 rcu_read_unlock();
Paolo Bonzinid09a6fd2015-07-09 08:47:58 +02002240
Juan Quintela56e93d22015-05-07 19:33:31 +02002241 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2242
2243 return 0;
2244}
2245
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00002246static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
2247 uint64_t *non_postcopiable_pending,
2248 uint64_t *postcopiable_pending)
Juan Quintela56e93d22015-05-07 19:33:31 +02002249{
Juan Quintela8d820d62017-03-13 19:35:50 +01002250 RAMState *rs = opaque;
Juan Quintela56e93d22015-05-07 19:33:31 +02002251 uint64_t remaining_size;
2252
2253 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2254
Dr. David Alan Gilbert663e6c12015-11-05 18:11:13 +00002255 if (!migration_in_postcopy(migrate_get_current()) &&
2256 remaining_size < max_size) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002257 qemu_mutex_lock_iothread();
2258 rcu_read_lock();
Juan Quintela8d820d62017-03-13 19:35:50 +01002259 migration_bitmap_sync(rs);
Juan Quintela56e93d22015-05-07 19:33:31 +02002260 rcu_read_unlock();
2261 qemu_mutex_unlock_iothread();
2262 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2263 }
Dr. David Alan Gilbertc31b0982015-11-05 18:10:54 +00002264
2265 /* We can do postcopy, and all the data is postcopiable */
2266 *postcopiable_pending += remaining_size;
Juan Quintela56e93d22015-05-07 19:33:31 +02002267}
2268
2269static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2270{
2271 unsigned int xh_len;
2272 int xh_flags;
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00002273 uint8_t *loaded_data;
Juan Quintela56e93d22015-05-07 19:33:31 +02002274
2275 if (!xbzrle_decoded_buf) {
2276 xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2277 }
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00002278 loaded_data = xbzrle_decoded_buf;
Juan Quintela56e93d22015-05-07 19:33:31 +02002279
2280 /* extract RLE header */
2281 xh_flags = qemu_get_byte(f);
2282 xh_len = qemu_get_be16(f);
2283
2284 if (xh_flags != ENCODING_FLAG_XBZRLE) {
2285 error_report("Failed to load XBZRLE page - wrong compression!");
2286 return -1;
2287 }
2288
2289 if (xh_len > TARGET_PAGE_SIZE) {
2290 error_report("Failed to load XBZRLE page - len overflow!");
2291 return -1;
2292 }
2293 /* load data and decode */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00002294 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
Juan Quintela56e93d22015-05-07 19:33:31 +02002295
2296 /* decode RLE */
Dr. David Alan Gilbert063e7602015-12-16 11:47:37 +00002297 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
Juan Quintela56e93d22015-05-07 19:33:31 +02002298 TARGET_PAGE_SIZE) == -1) {
2299 error_report("Failed to load XBZRLE page - decode error!");
2300 return -1;
2301 }
2302
2303 return 0;
2304}
2305
Juan Quintela3d0684b2017-03-23 15:06:39 +01002306/**
2307 * ram_block_from_stream: read a RAMBlock id from the migration stream
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002308 *
Juan Quintela3d0684b2017-03-23 15:06:39 +01002309 * Must be called from within a rcu critical section.
2310 *
2311 * Returns a pointer from within the RCU-protected ram_list.
2312 *
2313 * @f: QEMUFile where to read the data from
2314 * @flags: Page flags (mostly to see if it's a continuation of previous block)
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002315 */
Juan Quintela3d0684b2017-03-23 15:06:39 +01002316static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
Juan Quintela56e93d22015-05-07 19:33:31 +02002317{
2318 static RAMBlock *block = NULL;
2319 char id[256];
2320 uint8_t len;
2321
2322 if (flags & RAM_SAVE_FLAG_CONTINUE) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08002323 if (!block) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002324 error_report("Ack, bad migration stream!");
2325 return NULL;
2326 }
zhanghailiang4c4bad42016-01-15 11:37:41 +08002327 return block;
Juan Quintela56e93d22015-05-07 19:33:31 +02002328 }
2329
2330 len = qemu_get_byte(f);
2331 qemu_get_buffer(f, (uint8_t *)id, len);
2332 id[len] = 0;
2333
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00002334 block = qemu_ram_block_by_name(id);
zhanghailiang4c4bad42016-01-15 11:37:41 +08002335 if (!block) {
2336 error_report("Can't find block %s", id);
2337 return NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002338 }
2339
zhanghailiang4c4bad42016-01-15 11:37:41 +08002340 return block;
2341}
2342
2343static inline void *host_from_ram_block_offset(RAMBlock *block,
2344 ram_addr_t offset)
2345{
2346 if (!offset_in_ramblock(block, offset)) {
2347 return NULL;
2348 }
2349
2350 return block->host + offset;
Juan Quintela56e93d22015-05-07 19:33:31 +02002351}
2352
Juan Quintela3d0684b2017-03-23 15:06:39 +01002353/**
2354 * ram_handle_compressed: handle the zero page case
2355 *
Juan Quintela56e93d22015-05-07 19:33:31 +02002356 * If a page (or a whole RDMA chunk) has been
2357 * determined to be zero, then zap it.
Juan Quintela3d0684b2017-03-23 15:06:39 +01002358 *
2359 * @host: host address for the zero page
2360 * @ch: what the page is filled from. We only support zero
2361 * @size: size of the zero page
Juan Quintela56e93d22015-05-07 19:33:31 +02002362 */
2363void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2364{
2365 if (ch != 0 || !is_zero_range(host, size)) {
2366 memset(host, ch, size);
2367 }
2368}
2369
2370static void *do_data_decompress(void *opaque)
2371{
2372 DecompressParam *param = opaque;
2373 unsigned long pagesize;
Liang Li33d151f2016-05-05 15:32:58 +08002374 uint8_t *des;
2375 int len;
Juan Quintela56e93d22015-05-07 19:33:31 +02002376
Liang Li33d151f2016-05-05 15:32:58 +08002377 qemu_mutex_lock(&param->mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08002378 while (!param->quit) {
Liang Li33d151f2016-05-05 15:32:58 +08002379 if (param->des) {
2380 des = param->des;
2381 len = param->len;
2382 param->des = 0;
2383 qemu_mutex_unlock(&param->mutex);
2384
Liang Li73a89122016-05-05 15:32:51 +08002385 pagesize = TARGET_PAGE_SIZE;
2386 /* uncompress() will return failed in some case, especially
2387 * when the page is dirted when doing the compression, it's
2388 * not a problem because the dirty page will be retransferred
2389 * and uncompress() won't break the data in other pages.
2390 */
Liang Li33d151f2016-05-05 15:32:58 +08002391 uncompress((Bytef *)des, &pagesize,
2392 (const Bytef *)param->compbuf, len);
Liang Li73a89122016-05-05 15:32:51 +08002393
Liang Li33d151f2016-05-05 15:32:58 +08002394 qemu_mutex_lock(&decomp_done_lock);
2395 param->done = true;
2396 qemu_cond_signal(&decomp_done_cond);
2397 qemu_mutex_unlock(&decomp_done_lock);
2398
2399 qemu_mutex_lock(&param->mutex);
2400 } else {
2401 qemu_cond_wait(&param->cond, &param->mutex);
2402 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002403 }
Liang Li33d151f2016-05-05 15:32:58 +08002404 qemu_mutex_unlock(&param->mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02002405
2406 return NULL;
2407}
2408
Liang Li5533b2e2016-05-05 15:32:52 +08002409static void wait_for_decompress_done(void)
2410{
2411 int idx, thread_count;
2412
2413 if (!migrate_use_compression()) {
2414 return;
2415 }
2416
2417 thread_count = migrate_decompress_threads();
2418 qemu_mutex_lock(&decomp_done_lock);
2419 for (idx = 0; idx < thread_count; idx++) {
2420 while (!decomp_param[idx].done) {
2421 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2422 }
2423 }
2424 qemu_mutex_unlock(&decomp_done_lock);
2425}
2426
Juan Quintela56e93d22015-05-07 19:33:31 +02002427void migrate_decompress_threads_create(void)
2428{
2429 int i, thread_count;
2430
2431 thread_count = migrate_decompress_threads();
2432 decompress_threads = g_new0(QemuThread, thread_count);
2433 decomp_param = g_new0(DecompressParam, thread_count);
Liang Li73a89122016-05-05 15:32:51 +08002434 qemu_mutex_init(&decomp_done_lock);
2435 qemu_cond_init(&decomp_done_cond);
Juan Quintela56e93d22015-05-07 19:33:31 +02002436 for (i = 0; i < thread_count; i++) {
2437 qemu_mutex_init(&decomp_param[i].mutex);
2438 qemu_cond_init(&decomp_param[i].cond);
2439 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
Liang Li73a89122016-05-05 15:32:51 +08002440 decomp_param[i].done = true;
Liang Li90e56fb2016-05-05 15:32:56 +08002441 decomp_param[i].quit = false;
Juan Quintela56e93d22015-05-07 19:33:31 +02002442 qemu_thread_create(decompress_threads + i, "decompress",
2443 do_data_decompress, decomp_param + i,
2444 QEMU_THREAD_JOINABLE);
2445 }
2446}
2447
2448void migrate_decompress_threads_join(void)
2449{
2450 int i, thread_count;
2451
Juan Quintela56e93d22015-05-07 19:33:31 +02002452 thread_count = migrate_decompress_threads();
2453 for (i = 0; i < thread_count; i++) {
2454 qemu_mutex_lock(&decomp_param[i].mutex);
Liang Li90e56fb2016-05-05 15:32:56 +08002455 decomp_param[i].quit = true;
Juan Quintela56e93d22015-05-07 19:33:31 +02002456 qemu_cond_signal(&decomp_param[i].cond);
2457 qemu_mutex_unlock(&decomp_param[i].mutex);
2458 }
2459 for (i = 0; i < thread_count; i++) {
2460 qemu_thread_join(decompress_threads + i);
2461 qemu_mutex_destroy(&decomp_param[i].mutex);
2462 qemu_cond_destroy(&decomp_param[i].cond);
2463 g_free(decomp_param[i].compbuf);
2464 }
2465 g_free(decompress_threads);
2466 g_free(decomp_param);
Juan Quintela56e93d22015-05-07 19:33:31 +02002467 decompress_threads = NULL;
2468 decomp_param = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002469}
2470
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00002471static void decompress_data_with_multi_threads(QEMUFile *f,
Juan Quintela56e93d22015-05-07 19:33:31 +02002472 void *host, int len)
2473{
2474 int idx, thread_count;
2475
2476 thread_count = migrate_decompress_threads();
Liang Li73a89122016-05-05 15:32:51 +08002477 qemu_mutex_lock(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002478 while (true) {
2479 for (idx = 0; idx < thread_count; idx++) {
Liang Li73a89122016-05-05 15:32:51 +08002480 if (decomp_param[idx].done) {
Liang Li33d151f2016-05-05 15:32:58 +08002481 decomp_param[idx].done = false;
2482 qemu_mutex_lock(&decomp_param[idx].mutex);
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00002483 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02002484 decomp_param[idx].des = host;
2485 decomp_param[idx].len = len;
Liang Li33d151f2016-05-05 15:32:58 +08002486 qemu_cond_signal(&decomp_param[idx].cond);
2487 qemu_mutex_unlock(&decomp_param[idx].mutex);
Juan Quintela56e93d22015-05-07 19:33:31 +02002488 break;
2489 }
2490 }
2491 if (idx < thread_count) {
2492 break;
Liang Li73a89122016-05-05 15:32:51 +08002493 } else {
2494 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002495 }
2496 }
Liang Li73a89122016-05-05 15:32:51 +08002497 qemu_mutex_unlock(&decomp_done_lock);
Juan Quintela56e93d22015-05-07 19:33:31 +02002498}
2499
Juan Quintela3d0684b2017-03-23 15:06:39 +01002500/**
2501 * ram_postcopy_incoming_init: allocate postcopy data structures
2502 *
2503 * Returns 0 for success and negative if there was one error
2504 *
2505 * @mis: current migration incoming state
2506 *
2507 * Allocate data structures etc needed by incoming migration with
2508 * postcopy-ram. postcopy-ram's similarly names
2509 * postcopy_ram_incoming_init does the work.
Dr. David Alan Gilbert1caddf82015-11-05 18:11:03 +00002510 */
2511int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2512{
2513 size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
2514
2515 return postcopy_ram_incoming_init(mis, ram_pages);
2516}
2517
Juan Quintela3d0684b2017-03-23 15:06:39 +01002518/**
2519 * ram_load_postcopy: load a page in postcopy case
2520 *
2521 * Returns 0 for success or -errno in case of error
2522 *
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002523 * Called in postcopy mode by ram_load().
2524 * rcu_read_lock is taken prior to this being called.
Juan Quintela3d0684b2017-03-23 15:06:39 +01002525 *
2526 * @f: QEMUFile where to send the data
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002527 */
2528static int ram_load_postcopy(QEMUFile *f)
2529{
2530 int flags = 0, ret = 0;
2531 bool place_needed = false;
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00002532 bool matching_page_sizes = false;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002533 MigrationIncomingState *mis = migration_incoming_get_current();
2534 /* Temporary page that is later 'placed' */
2535 void *postcopy_host_page = postcopy_get_tmp_page(mis);
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002536 void *last_host = NULL;
Dr. David Alan Gilberta3b6ff62015-11-11 14:02:28 +00002537 bool all_zero = false;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002538
2539 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2540 ram_addr_t addr;
2541 void *host = NULL;
2542 void *page_buffer = NULL;
2543 void *place_source = NULL;
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00002544 RAMBlock *block = NULL;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002545 uint8_t ch;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002546
2547 addr = qemu_get_be64(f);
2548 flags = addr & ~TARGET_PAGE_MASK;
2549 addr &= TARGET_PAGE_MASK;
2550
2551 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2552 place_needed = false;
2553 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00002554 block = ram_block_from_stream(f, flags);
zhanghailiang4c4bad42016-01-15 11:37:41 +08002555
2556 host = host_from_ram_block_offset(block, addr);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002557 if (!host) {
2558 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2559 ret = -EINVAL;
2560 break;
2561 }
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00002562 matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002563 /*
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00002564 * Postcopy requires that we place whole host pages atomically;
2565 * these may be huge pages for RAMBlocks that are backed by
2566 * hugetlbfs.
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002567 * To make it atomic, the data is read into a temporary page
2568 * that's moved into place later.
2569 * The migration protocol uses, possibly smaller, target-pages
2570 * however the source ensures it always sends all the components
2571 * of a host page in order.
2572 */
2573 page_buffer = postcopy_host_page +
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00002574 ((uintptr_t)host & (block->page_size - 1));
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002575 /* If all TP are zero then we can optimise the place */
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00002576 if (!((uintptr_t)host & (block->page_size - 1))) {
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002577 all_zero = true;
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002578 } else {
2579 /* not the 1st TP within the HP */
2580 if (host != (last_host + TARGET_PAGE_SIZE)) {
Markus Armbruster9af9e0f2015-12-18 16:35:19 +01002581 error_report("Non-sequential target page %p/%p",
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002582 host, last_host);
2583 ret = -EINVAL;
2584 break;
2585 }
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002586 }
2587
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002588
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002589 /*
2590 * If it's the last part of a host page then we place the host
2591 * page
2592 */
2593 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
Dr. David Alan Gilbert28abd202017-02-24 18:28:37 +00002594 (block->page_size - 1)) == 0;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002595 place_source = postcopy_host_page;
2596 }
Dr. David Alan Gilbertc53b7dd2015-11-05 18:11:12 +00002597 last_host = host;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002598
2599 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2600 case RAM_SAVE_FLAG_COMPRESS:
2601 ch = qemu_get_byte(f);
2602 memset(page_buffer, ch, TARGET_PAGE_SIZE);
2603 if (ch) {
2604 all_zero = false;
2605 }
2606 break;
2607
2608 case RAM_SAVE_FLAG_PAGE:
2609 all_zero = false;
2610 if (!place_needed || !matching_page_sizes) {
2611 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2612 } else {
2613 /* Avoids the qemu_file copy during postcopy, which is
2614 * going to do a copy later; can only do it when we
2615 * do this read in one go (matching page sizes)
2616 */
2617 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2618 TARGET_PAGE_SIZE);
2619 }
2620 break;
2621 case RAM_SAVE_FLAG_EOS:
2622 /* normal exit */
2623 break;
2624 default:
2625 error_report("Unknown combination of migration flags: %#x"
2626 " (postcopy mode)", flags);
2627 ret = -EINVAL;
2628 }
2629
2630 if (place_needed) {
2631 /* This gets called at the last target page in the host page */
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00002632 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
2633
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002634 if (all_zero) {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00002635 ret = postcopy_place_page_zero(mis, place_dest,
2636 block->page_size);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002637 } else {
Dr. David Alan Gilbertdf9ff5e2017-02-24 18:28:35 +00002638 ret = postcopy_place_page(mis, place_dest,
2639 place_source, block->page_size);
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002640 }
2641 }
2642 if (!ret) {
2643 ret = qemu_file_get_error(f);
2644 }
2645 }
2646
2647 return ret;
2648}
2649
Juan Quintela56e93d22015-05-07 19:33:31 +02002650static int ram_load(QEMUFile *f, void *opaque, int version_id)
2651{
2652 int flags = 0, ret = 0;
2653 static uint64_t seq_iter;
2654 int len = 0;
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002655 /*
2656 * If system is running in postcopy mode, page inserts to host memory must
2657 * be atomic
2658 */
2659 bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00002660 /* ADVISE is earlier, it shows the source has the postcopy capability on */
2661 bool postcopy_advised = postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE;
Juan Quintela56e93d22015-05-07 19:33:31 +02002662
2663 seq_iter++;
2664
2665 if (version_id != 4) {
2666 ret = -EINVAL;
2667 }
2668
2669 /* This RCU critical section can be very long running.
2670 * When RCU reclaims in the code start to become numerous,
2671 * it will be necessary to reduce the granularity of this
2672 * critical section.
2673 */
2674 rcu_read_lock();
Dr. David Alan Gilberta7180872015-11-05 18:11:11 +00002675
2676 if (postcopy_running) {
2677 ret = ram_load_postcopy(f);
2678 }
2679
2680 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
Juan Quintela56e93d22015-05-07 19:33:31 +02002681 ram_addr_t addr, total_ram_bytes;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002682 void *host = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002683 uint8_t ch;
2684
2685 addr = qemu_get_be64(f);
2686 flags = addr & ~TARGET_PAGE_MASK;
2687 addr &= TARGET_PAGE_MASK;
2688
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002689 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE |
2690 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
zhanghailiang4c4bad42016-01-15 11:37:41 +08002691 RAMBlock *block = ram_block_from_stream(f, flags);
2692
2693 host = host_from_ram_block_offset(block, addr);
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002694 if (!host) {
2695 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2696 ret = -EINVAL;
2697 break;
2698 }
2699 }
2700
Juan Quintela56e93d22015-05-07 19:33:31 +02002701 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2702 case RAM_SAVE_FLAG_MEM_SIZE:
2703 /* Synchronize RAM block list */
2704 total_ram_bytes = addr;
2705 while (!ret && total_ram_bytes) {
2706 RAMBlock *block;
Juan Quintela56e93d22015-05-07 19:33:31 +02002707 char id[256];
2708 ram_addr_t length;
2709
2710 len = qemu_get_byte(f);
2711 qemu_get_buffer(f, (uint8_t *)id, len);
2712 id[len] = 0;
2713 length = qemu_get_be64(f);
2714
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00002715 block = qemu_ram_block_by_name(id);
2716 if (block) {
2717 if (length != block->used_length) {
2718 Error *local_err = NULL;
Juan Quintela56e93d22015-05-07 19:33:31 +02002719
Gongleifa53a0e2016-05-10 10:04:59 +08002720 ret = qemu_ram_resize(block, length,
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00002721 &local_err);
2722 if (local_err) {
2723 error_report_err(local_err);
Juan Quintela56e93d22015-05-07 19:33:31 +02002724 }
Juan Quintela56e93d22015-05-07 19:33:31 +02002725 }
Dr. David Alan Gilbertef08fb32017-02-24 18:28:30 +00002726 /* For postcopy we need to check hugepage sizes match */
2727 if (postcopy_advised &&
2728 block->page_size != qemu_host_page_size) {
2729 uint64_t remote_page_size = qemu_get_be64(f);
2730 if (remote_page_size != block->page_size) {
2731 error_report("Mismatched RAM page size %s "
2732 "(local) %zd != %" PRId64,
2733 id, block->page_size,
2734 remote_page_size);
2735 ret = -EINVAL;
2736 }
2737 }
Dr. David Alan Gilberte3dd7492015-11-05 18:10:33 +00002738 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
2739 block->idstr);
2740 } else {
Juan Quintela56e93d22015-05-07 19:33:31 +02002741 error_report("Unknown ramblock \"%s\", cannot "
2742 "accept migration", id);
2743 ret = -EINVAL;
2744 }
2745
2746 total_ram_bytes -= length;
2747 }
2748 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002749
Juan Quintela56e93d22015-05-07 19:33:31 +02002750 case RAM_SAVE_FLAG_COMPRESS:
Juan Quintela56e93d22015-05-07 19:33:31 +02002751 ch = qemu_get_byte(f);
2752 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
2753 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002754
Juan Quintela56e93d22015-05-07 19:33:31 +02002755 case RAM_SAVE_FLAG_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02002756 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
2757 break;
Juan Quintela56e93d22015-05-07 19:33:31 +02002758
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002759 case RAM_SAVE_FLAG_COMPRESS_PAGE:
Juan Quintela56e93d22015-05-07 19:33:31 +02002760 len = qemu_get_be32(f);
2761 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
2762 error_report("Invalid compressed data length: %d", len);
2763 ret = -EINVAL;
2764 break;
2765 }
Dr. David Alan Gilbertc1bc6622015-12-16 11:47:38 +00002766 decompress_data_with_multi_threads(f, host, len);
Juan Quintela56e93d22015-05-07 19:33:31 +02002767 break;
Dr. David Alan Gilberta776aa12015-11-05 18:10:39 +00002768
Juan Quintela56e93d22015-05-07 19:33:31 +02002769 case RAM_SAVE_FLAG_XBZRLE:
Juan Quintela56e93d22015-05-07 19:33:31 +02002770 if (load_xbzrle(f, addr, host) < 0) {
2771 error_report("Failed to decompress XBZRLE page at "
2772 RAM_ADDR_FMT, addr);
2773 ret = -EINVAL;
2774 break;
2775 }
2776 break;
2777 case RAM_SAVE_FLAG_EOS:
2778 /* normal exit */
2779 break;
2780 default:
2781 if (flags & RAM_SAVE_FLAG_HOOK) {
Dr. David Alan Gilbert632e3a52015-06-11 18:17:23 +01002782 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
Juan Quintela56e93d22015-05-07 19:33:31 +02002783 } else {
2784 error_report("Unknown combination of migration flags: %#x",
2785 flags);
2786 ret = -EINVAL;
2787 }
2788 }
2789 if (!ret) {
2790 ret = qemu_file_get_error(f);
2791 }
2792 }
2793
Liang Li5533b2e2016-05-05 15:32:52 +08002794 wait_for_decompress_done();
Juan Quintela56e93d22015-05-07 19:33:31 +02002795 rcu_read_unlock();
Juan Quintela55c44462017-01-23 22:32:05 +01002796 trace_ram_load_complete(ret, seq_iter);
Juan Quintela56e93d22015-05-07 19:33:31 +02002797 return ret;
2798}
2799
2800static SaveVMHandlers savevm_ram_handlers = {
2801 .save_live_setup = ram_save_setup,
2802 .save_live_iterate = ram_save_iterate,
Dr. David Alan Gilbert763c9062015-11-05 18:11:00 +00002803 .save_live_complete_postcopy = ram_save_complete,
Dr. David Alan Gilberta3e06c32015-11-05 18:10:41 +00002804 .save_live_complete_precopy = ram_save_complete,
Juan Quintela56e93d22015-05-07 19:33:31 +02002805 .save_live_pending = ram_save_pending,
2806 .load_state = ram_load,
Liang Li6ad2a212015-11-02 15:37:03 +08002807 .cleanup = ram_migration_cleanup,
Juan Quintela56e93d22015-05-07 19:33:31 +02002808};
2809
2810void ram_mig_init(void)
2811{
2812 qemu_mutex_init(&XBZRLE.lock);
Juan Quintela6f37bb82017-03-13 19:26:29 +01002813 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);
Juan Quintela56e93d22015-05-07 19:33:31 +02002814}