blob: 59bb507189d5c14e075de2ff37a15102abe23f0e [file] [log] [blame]
zhanghailiang35a6ed42016-10-27 14:42:52 +08001/*
2 * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
3 * (a.k.a. Fault Tolerance or Continuous Replication)
4 *
5 * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
6 * Copyright (c) 2016 FUJITSU LIMITED
7 * Copyright (c) 2016 Intel Corporation
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or
10 * later. See the COPYING file in the top-level directory.
11 */
12
13#include "qemu/osdep.h"
zhanghailiang0b827d52016-10-27 14:42:54 +080014#include "sysemu/sysemu.h"
Markus Armbrustere688df62018-02-01 12:18:31 +010015#include "qapi/error.h"
Markus Armbruster9af23982018-02-11 10:36:01 +010016#include "qapi/qapi-commands-migration.h"
Juan Quintela40014d82017-04-17 19:34:36 +020017#include "qemu-file-channel.h"
Juan Quintela6666c962017-04-24 20:07:27 +020018#include "migration.h"
Juan Quintela08a0aee2017-04-20 18:52:18 +020019#include "qemu-file.h"
Juan Quintela20a519a2017-04-20 14:48:46 +020020#include "savevm.h"
zhanghailiang35a6ed42016-10-27 14:42:52 +080021#include "migration/colo.h"
Juan Quintela2c9e6fe2017-04-21 14:31:22 +020022#include "block.h"
zhanghailianga91246c2016-10-27 14:42:59 +080023#include "io/channel-buffer.h"
zhanghailiang0b827d52016-10-27 14:42:54 +080024#include "trace.h"
zhanghailiang56ba83d2016-10-27 14:42:56 +080025#include "qemu/error-report.h"
zhanghailiangd89e6662016-10-27 14:43:03 +080026#include "migration/failover.h"
Zhang Chen2c9639e2017-02-28 11:01:56 -080027#include "replication.h"
Zhang Chen131b2152018-09-03 12:38:45 +080028#include "net/colo-compare.h"
29#include "net/colo.h"
Zhang Chen8e48ac92018-09-03 12:38:46 +080030#include "block/block.h"
zhanghailiang9ecff6d2018-09-03 12:38:51 +080031#include "qapi/qapi-events-migration.h"
Zhang Chenf56c0062018-09-03 12:38:53 +080032#include "qapi/qmp/qerror.h"
Zhang Chen3f6df992018-09-03 12:38:54 +080033#include "sysemu/cpus.h"
zhanghailiang35a6ed42016-10-27 14:42:52 +080034
zhanghailianga8664ba2017-01-17 20:57:44 +080035static bool vmstate_loading;
Zhang Chen131b2152018-09-03 12:38:45 +080036static Notifier packets_compare_notifier;
zhanghailianga8664ba2017-01-17 20:57:44 +080037
zhanghailianga91246c2016-10-27 14:42:59 +080038#define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
39
zhanghailiang0b827d52016-10-27 14:42:54 +080040bool migration_in_colo_state(void)
41{
42 MigrationState *s = migrate_get_current();
43
44 return (s->state == MIGRATION_STATUS_COLO);
45}
46
zhanghailiang25d0c162016-10-27 14:42:55 +080047bool migration_incoming_in_colo_state(void)
48{
49 MigrationIncomingState *mis = migration_incoming_get_current();
50
51 return mis && (mis->state == MIGRATION_STATUS_COLO);
52}
53
zhanghailiangb3f7f0c2016-10-27 14:43:05 +080054static bool colo_runstate_is_stopped(void)
55{
56 return runstate_check(RUN_STATE_COLO) || !runstate_is_running();
57}
58
zhanghailiang9d2db372016-10-27 14:43:06 +080059static void secondary_vm_do_failover(void)
60{
61 int old_state;
62 MigrationIncomingState *mis = migration_incoming_get_current();
Zhang Chen8e48ac92018-09-03 12:38:46 +080063 Error *local_err = NULL;
zhanghailiang9d2db372016-10-27 14:43:06 +080064
zhanghailianga8664ba2017-01-17 20:57:44 +080065 /* Can not do failover during the process of VM's loading VMstate, Or
66 * it will break the secondary VM.
67 */
68 if (vmstate_loading) {
69 old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
70 FAILOVER_STATUS_RELAUNCH);
71 if (old_state != FAILOVER_STATUS_ACTIVE) {
72 error_report("Unknown error while do failover for secondary VM,"
Markus Armbruster977c7362017-08-24 10:46:08 +020073 "old_state: %s", FailoverStatus_str(old_state));
zhanghailianga8664ba2017-01-17 20:57:44 +080074 }
75 return;
76 }
77
zhanghailiang9d2db372016-10-27 14:43:06 +080078 migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
79 MIGRATION_STATUS_COMPLETED);
80
Zhang Chen8e48ac92018-09-03 12:38:46 +080081 replication_stop_all(true, &local_err);
82 if (local_err) {
83 error_report_err(local_err);
84 }
85
zhanghailiang9d2db372016-10-27 14:43:06 +080086 if (!autostart) {
87 error_report("\"-S\" qemu option will be ignored in secondary side");
88 /* recover runstate to normal migration finish state */
89 autostart = true;
90 }
zhanghailiangc937b9a2017-01-17 20:57:43 +080091 /*
92 * Make sure COLO incoming thread not block in recv or send,
93 * If mis->from_src_file and mis->to_src_file use the same fd,
94 * The second shutdown() will return -1, we ignore this value,
95 * It is harmless.
96 */
97 if (mis->from_src_file) {
98 qemu_file_shutdown(mis->from_src_file);
99 }
100 if (mis->to_src_file) {
101 qemu_file_shutdown(mis->to_src_file);
102 }
zhanghailiang9d2db372016-10-27 14:43:06 +0800103
104 old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
105 FAILOVER_STATUS_COMPLETED);
106 if (old_state != FAILOVER_STATUS_ACTIVE) {
107 error_report("Incorrect state (%s) while doing failover for "
Markus Armbruster977c7362017-08-24 10:46:08 +0200108 "secondary VM", FailoverStatus_str(old_state));
zhanghailiang9d2db372016-10-27 14:43:06 +0800109 return;
110 }
zhanghailiangc937b9a2017-01-17 20:57:43 +0800111 /* Notify COLO incoming thread that failover work is finished */
112 qemu_sem_post(&mis->colo_incoming_sem);
zhanghailiang9d2db372016-10-27 14:43:06 +0800113 /* For Secondary VM, jump to incoming co */
114 if (mis->migration_incoming_co) {
115 qemu_coroutine_enter(mis->migration_incoming_co);
116 }
117}
118
zhanghailiangb3f7f0c2016-10-27 14:43:05 +0800119static void primary_vm_do_failover(void)
120{
121 MigrationState *s = migrate_get_current();
122 int old_state;
Zhang Chen8e48ac92018-09-03 12:38:46 +0800123 Error *local_err = NULL;
zhanghailiangb3f7f0c2016-10-27 14:43:05 +0800124
125 migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
126 MIGRATION_STATUS_COMPLETED);
127
zhanghailiangc937b9a2017-01-17 20:57:43 +0800128 /*
129 * Wake up COLO thread which may blocked in recv() or send(),
130 * The s->rp_state.from_dst_file and s->to_dst_file may use the
131 * same fd, but we still shutdown the fd for twice, it is harmless.
132 */
133 if (s->to_dst_file) {
134 qemu_file_shutdown(s->to_dst_file);
135 }
136 if (s->rp_state.from_dst_file) {
137 qemu_file_shutdown(s->rp_state.from_dst_file);
138 }
139
zhanghailiangb3f7f0c2016-10-27 14:43:05 +0800140 old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
141 FAILOVER_STATUS_COMPLETED);
142 if (old_state != FAILOVER_STATUS_ACTIVE) {
143 error_report("Incorrect state (%s) while doing failover for Primary VM",
Markus Armbruster977c7362017-08-24 10:46:08 +0200144 FailoverStatus_str(old_state));
zhanghailiangb3f7f0c2016-10-27 14:43:05 +0800145 return;
146 }
Zhang Chen8e48ac92018-09-03 12:38:46 +0800147
148 replication_stop_all(true, &local_err);
149 if (local_err) {
150 error_report_err(local_err);
151 local_err = NULL;
152 }
153
zhanghailiangc937b9a2017-01-17 20:57:43 +0800154 /* Notify COLO thread that failover work is finished */
155 qemu_sem_post(&s->colo_exit_sem);
zhanghailiangb3f7f0c2016-10-27 14:43:05 +0800156}
157
Zhang Chenaad555c2018-09-03 12:38:47 +0800158COLOMode get_colo_mode(void)
159{
160 if (migration_in_colo_state()) {
161 return COLO_MODE_PRIMARY;
162 } else if (migration_incoming_in_colo_state()) {
163 return COLO_MODE_SECONDARY;
164 } else {
Zhang Chen41b6b772018-09-03 12:38:52 +0800165 return COLO_MODE_NONE;
Zhang Chenaad555c2018-09-03 12:38:47 +0800166 }
167}
168
zhanghailiangb3f7f0c2016-10-27 14:43:05 +0800169void colo_do_failover(MigrationState *s)
170{
171 /* Make sure VM stopped while failover happened. */
172 if (!colo_runstate_is_stopped()) {
173 vm_stop_force_state(RUN_STATE_COLO);
174 }
175
176 if (get_colo_mode() == COLO_MODE_PRIMARY) {
177 primary_vm_do_failover();
zhanghailiang9d2db372016-10-27 14:43:06 +0800178 } else {
179 secondary_vm_do_failover();
zhanghailiangb3f7f0c2016-10-27 14:43:05 +0800180 }
181}
182
Zhang Chen2c9639e2017-02-28 11:01:56 -0800183void qmp_xen_set_replication(bool enable, bool primary,
184 bool has_failover, bool failover,
185 Error **errp)
186{
Markus Armbruster38bb54f2017-04-27 15:00:53 +0200187#ifdef CONFIG_REPLICATION
Zhang Chen2c9639e2017-02-28 11:01:56 -0800188 ReplicationMode mode = primary ?
189 REPLICATION_MODE_PRIMARY :
190 REPLICATION_MODE_SECONDARY;
191
192 if (has_failover && enable) {
193 error_setg(errp, "Parameter 'failover' is only for"
194 " stopping replication");
195 return;
196 }
197
198 if (enable) {
199 replication_start_all(mode, errp);
200 } else {
201 if (!has_failover) {
202 failover = NULL;
203 }
204 replication_stop_all(failover, failover ? NULL : errp);
205 }
Markus Armbruster38bb54f2017-04-27 15:00:53 +0200206#else
207 abort();
208#endif
Zhang Chen2c9639e2017-02-28 11:01:56 -0800209}
210
Zhang Chendaa33c52017-02-25 10:46:04 +0800211ReplicationStatus *qmp_query_xen_replication_status(Error **errp)
212{
Markus Armbruster38bb54f2017-04-27 15:00:53 +0200213#ifdef CONFIG_REPLICATION
Zhang Chendaa33c52017-02-25 10:46:04 +0800214 Error *err = NULL;
215 ReplicationStatus *s = g_new0(ReplicationStatus, 1);
216
217 replication_get_error_all(&err);
218 if (err) {
219 s->error = true;
220 s->has_desc = true;
221 s->desc = g_strdup(error_get_pretty(err));
222 } else {
223 s->error = false;
224 }
225
226 error_free(err);
227 return s;
Markus Armbruster38bb54f2017-04-27 15:00:53 +0200228#else
229 abort();
230#endif
Zhang Chendaa33c52017-02-25 10:46:04 +0800231}
232
233void qmp_xen_colo_do_checkpoint(Error **errp)
234{
Markus Armbruster38bb54f2017-04-27 15:00:53 +0200235#ifdef CONFIG_REPLICATION
Zhang Chendaa33c52017-02-25 10:46:04 +0800236 replication_do_checkpoint_all(errp);
Markus Armbruster38bb54f2017-04-27 15:00:53 +0200237#else
238 abort();
239#endif
Zhang Chendaa33c52017-02-25 10:46:04 +0800240}
241
Zhang Chenf56c0062018-09-03 12:38:53 +0800242COLOStatus *qmp_query_colo_status(Error **errp)
243{
244 COLOStatus *s = g_new0(COLOStatus, 1);
245
246 s->mode = get_colo_mode();
247
248 switch (failover_get_state()) {
249 case FAILOVER_STATUS_NONE:
250 s->reason = COLO_EXIT_REASON_NONE;
251 break;
252 case FAILOVER_STATUS_REQUIRE:
253 s->reason = COLO_EXIT_REASON_REQUEST;
254 break;
255 default:
256 s->reason = COLO_EXIT_REASON_ERROR;
257 }
258
259 return s;
260}
261
zhanghailiang4f975582016-10-27 14:42:57 +0800262static void colo_send_message(QEMUFile *f, COLOMessage msg,
263 Error **errp)
264{
265 int ret;
266
267 if (msg >= COLO_MESSAGE__MAX) {
268 error_setg(errp, "%s: Invalid message", __func__);
269 return;
270 }
271 qemu_put_be32(f, msg);
272 qemu_fflush(f);
273
274 ret = qemu_file_get_error(f);
275 if (ret < 0) {
276 error_setg_errno(errp, -ret, "Can't send COLO message");
277 }
Markus Armbruster977c7362017-08-24 10:46:08 +0200278 trace_colo_send_message(COLOMessage_str(msg));
zhanghailiang4f975582016-10-27 14:42:57 +0800279}
280
zhanghailianga91246c2016-10-27 14:42:59 +0800281static void colo_send_message_value(QEMUFile *f, COLOMessage msg,
282 uint64_t value, Error **errp)
283{
284 Error *local_err = NULL;
285 int ret;
286
287 colo_send_message(f, msg, &local_err);
288 if (local_err) {
289 error_propagate(errp, local_err);
290 return;
291 }
292 qemu_put_be64(f, value);
293 qemu_fflush(f);
294
295 ret = qemu_file_get_error(f);
296 if (ret < 0) {
297 error_setg_errno(errp, -ret, "Failed to send value for message:%s",
Markus Armbruster977c7362017-08-24 10:46:08 +0200298 COLOMessage_str(msg));
zhanghailianga91246c2016-10-27 14:42:59 +0800299 }
300}
301
zhanghailiang4f975582016-10-27 14:42:57 +0800302static COLOMessage colo_receive_message(QEMUFile *f, Error **errp)
303{
304 COLOMessage msg;
305 int ret;
306
307 msg = qemu_get_be32(f);
308 ret = qemu_file_get_error(f);
309 if (ret < 0) {
310 error_setg_errno(errp, -ret, "Can't receive COLO message");
311 return msg;
312 }
313 if (msg >= COLO_MESSAGE__MAX) {
314 error_setg(errp, "%s: Invalid message", __func__);
315 return msg;
316 }
Markus Armbruster977c7362017-08-24 10:46:08 +0200317 trace_colo_receive_message(COLOMessage_str(msg));
zhanghailiang4f975582016-10-27 14:42:57 +0800318 return msg;
319}
320
321static void colo_receive_check_message(QEMUFile *f, COLOMessage expect_msg,
322 Error **errp)
323{
324 COLOMessage msg;
325 Error *local_err = NULL;
326
327 msg = colo_receive_message(f, &local_err);
328 if (local_err) {
329 error_propagate(errp, local_err);
330 return;
331 }
332 if (msg != expect_msg) {
333 error_setg(errp, "Unexpected COLO message %d, expected %d",
334 msg, expect_msg);
335 }
336}
337
zhanghailiang4291d372016-10-27 14:43:00 +0800338static uint64_t colo_receive_message_value(QEMUFile *f, uint32_t expect_msg,
339 Error **errp)
340{
341 Error *local_err = NULL;
342 uint64_t value;
343 int ret;
344
345 colo_receive_check_message(f, expect_msg, &local_err);
346 if (local_err) {
347 error_propagate(errp, local_err);
348 return 0;
349 }
350
351 value = qemu_get_be64(f);
352 ret = qemu_file_get_error(f);
353 if (ret < 0) {
354 error_setg_errno(errp, -ret, "Failed to get value for COLO message: %s",
Markus Armbruster977c7362017-08-24 10:46:08 +0200355 COLOMessage_str(expect_msg));
zhanghailiang4291d372016-10-27 14:43:00 +0800356 }
357 return value;
358}
359
zhanghailianga91246c2016-10-27 14:42:59 +0800360static int colo_do_checkpoint_transaction(MigrationState *s,
361 QIOChannelBuffer *bioc,
362 QEMUFile *fb)
zhanghailiang4f975582016-10-27 14:42:57 +0800363{
364 Error *local_err = NULL;
zhanghailianga91246c2016-10-27 14:42:59 +0800365 int ret = -1;
zhanghailiang4f975582016-10-27 14:42:57 +0800366
367 colo_send_message(s->to_dst_file, COLO_MESSAGE_CHECKPOINT_REQUEST,
368 &local_err);
369 if (local_err) {
370 goto out;
371 }
372
373 colo_receive_check_message(s->rp_state.from_dst_file,
374 COLO_MESSAGE_CHECKPOINT_REPLY, &local_err);
375 if (local_err) {
376 goto out;
377 }
zhanghailianga91246c2016-10-27 14:42:59 +0800378 /* Reset channel-buffer directly */
379 qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
380 bioc->usage = 0;
zhanghailiang4f975582016-10-27 14:42:57 +0800381
zhanghailianga91246c2016-10-27 14:42:59 +0800382 qemu_mutex_lock_iothread();
zhanghailiangb3f7f0c2016-10-27 14:43:05 +0800383 if (failover_get_state() != FAILOVER_STATUS_NONE) {
384 qemu_mutex_unlock_iothread();
385 goto out;
386 }
zhanghailianga91246c2016-10-27 14:42:59 +0800387 vm_stop_force_state(RUN_STATE_COLO);
388 qemu_mutex_unlock_iothread();
389 trace_colo_vm_state_change("run", "stop");
zhanghailiangb3f7f0c2016-10-27 14:43:05 +0800390 /*
391 * Failover request bh could be called after vm_stop_force_state(),
392 * So we need check failover_request_is_active() again.
393 */
394 if (failover_get_state() != FAILOVER_STATUS_NONE) {
395 goto out;
396 }
zhanghailianga91246c2016-10-27 14:42:59 +0800397
Zhang Chen131b2152018-09-03 12:38:45 +0800398 colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err);
399 if (local_err) {
400 goto out;
401 }
402
zhanghailianga91246c2016-10-27 14:42:59 +0800403 /* Disable block migration */
Juan Quintelace7c8172017-04-05 20:45:22 +0200404 migrate_set_block_enabled(false, &local_err);
zhanghailianga91246c2016-10-27 14:42:59 +0800405 qemu_mutex_lock_iothread();
Zhang Chen8e48ac92018-09-03 12:38:46 +0800406 replication_do_checkpoint_all(&local_err);
407 if (local_err) {
408 qemu_mutex_unlock_iothread();
409 goto out;
410 }
zhanghailiang4f975582016-10-27 14:42:57 +0800411
412 colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err);
413 if (local_err) {
Zhang Chen3f6df992018-09-03 12:38:54 +0800414 qemu_mutex_unlock_iothread();
zhanghailiang4f975582016-10-27 14:42:57 +0800415 goto out;
416 }
Zhang Chen3f6df992018-09-03 12:38:54 +0800417 /* Note: device state is saved into buffer */
418 ret = qemu_save_device_state(fb);
419
420 qemu_mutex_unlock_iothread();
421 if (ret < 0) {
422 goto out;
423 }
424 /*
425 * Only save VM's live state, which not including device state.
426 * TODO: We may need a timeout mechanism to prevent COLO process
427 * to be blocked here.
428 */
429 qemu_savevm_live_state(s->to_dst_file);
430
431 qemu_fflush(fb);
432
zhanghailianga91246c2016-10-27 14:42:59 +0800433 /*
434 * We need the size of the VMstate data in Secondary side,
435 * With which we can decide how much data should be read.
436 */
437 colo_send_message_value(s->to_dst_file, COLO_MESSAGE_VMSTATE_SIZE,
438 bioc->usage, &local_err);
439 if (local_err) {
440 goto out;
441 }
zhanghailiang4f975582016-10-27 14:42:57 +0800442
zhanghailianga91246c2016-10-27 14:42:59 +0800443 qemu_put_buffer(s->to_dst_file, bioc->data, bioc->usage);
444 qemu_fflush(s->to_dst_file);
445 ret = qemu_file_get_error(s->to_dst_file);
446 if (ret < 0) {
447 goto out;
448 }
zhanghailiang4f975582016-10-27 14:42:57 +0800449
450 colo_receive_check_message(s->rp_state.from_dst_file,
451 COLO_MESSAGE_VMSTATE_RECEIVED, &local_err);
452 if (local_err) {
453 goto out;
454 }
455
456 colo_receive_check_message(s->rp_state.from_dst_file,
457 COLO_MESSAGE_VMSTATE_LOADED, &local_err);
458 if (local_err) {
459 goto out;
460 }
461
zhanghailianga91246c2016-10-27 14:42:59 +0800462 ret = 0;
zhanghailiang4f975582016-10-27 14:42:57 +0800463
zhanghailianga91246c2016-10-27 14:42:59 +0800464 qemu_mutex_lock_iothread();
465 vm_start();
466 qemu_mutex_unlock_iothread();
467 trace_colo_vm_state_change("stop", "run");
468
zhanghailiang4f975582016-10-27 14:42:57 +0800469out:
470 if (local_err) {
471 error_report_err(local_err);
472 }
zhanghailianga91246c2016-10-27 14:42:59 +0800473 return ret;
zhanghailiang4f975582016-10-27 14:42:57 +0800474}
475
Zhang Chen131b2152018-09-03 12:38:45 +0800476static void colo_compare_notify_checkpoint(Notifier *notifier, void *data)
477{
478 colo_checkpoint_notify(data);
479}
480
zhanghailiang0b827d52016-10-27 14:42:54 +0800481static void colo_process_checkpoint(MigrationState *s)
482{
zhanghailianga91246c2016-10-27 14:42:59 +0800483 QIOChannelBuffer *bioc;
484 QEMUFile *fb = NULL;
zhanghailiang479125d2017-01-17 20:57:42 +0800485 int64_t current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
zhanghailiang4f975582016-10-27 14:42:57 +0800486 Error *local_err = NULL;
487 int ret;
488
zhanghailiangaef06082016-10-27 14:43:04 +0800489 failover_init_state();
490
zhanghailiang56ba83d2016-10-27 14:42:56 +0800491 s->rp_state.from_dst_file = qemu_file_get_return_path(s->to_dst_file);
492 if (!s->rp_state.from_dst_file) {
493 error_report("Open QEMUFile from_dst_file failed");
494 goto out;
495 }
496
Zhang Chen131b2152018-09-03 12:38:45 +0800497 packets_compare_notifier.notify = colo_compare_notify_checkpoint;
498 colo_compare_register_notifier(&packets_compare_notifier);
499
zhanghailiang4f975582016-10-27 14:42:57 +0800500 /*
501 * Wait for Secondary finish loading VM states and enter COLO
502 * restore.
503 */
504 colo_receive_check_message(s->rp_state.from_dst_file,
505 COLO_MESSAGE_CHECKPOINT_READY, &local_err);
506 if (local_err) {
507 goto out;
508 }
zhanghailianga91246c2016-10-27 14:42:59 +0800509 bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
510 fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc));
511 object_unref(OBJECT(bioc));
zhanghailiang4f975582016-10-27 14:42:57 +0800512
zhanghailiang0b827d52016-10-27 14:42:54 +0800513 qemu_mutex_lock_iothread();
Zhang Chen8e48ac92018-09-03 12:38:46 +0800514 replication_start_all(REPLICATION_MODE_PRIMARY, &local_err);
515 if (local_err) {
516 qemu_mutex_unlock_iothread();
517 goto out;
518 }
519
zhanghailiang0b827d52016-10-27 14:42:54 +0800520 vm_start();
521 qemu_mutex_unlock_iothread();
522 trace_colo_vm_state_change("stop", "run");
523
zhanghailiang479125d2017-01-17 20:57:42 +0800524 timer_mod(s->colo_delay_timer,
525 current_time + s->parameters.x_checkpoint_delay);
526
zhanghailiang4f975582016-10-27 14:42:57 +0800527 while (s->state == MIGRATION_STATUS_COLO) {
zhanghailiangb3f7f0c2016-10-27 14:43:05 +0800528 if (failover_get_state() != FAILOVER_STATUS_NONE) {
529 error_report("failover request");
530 goto out;
531 }
532
zhanghailiang479125d2017-01-17 20:57:42 +0800533 qemu_sem_wait(&s->colo_checkpoint_sem);
zhanghailiang18cc23d2016-10-27 14:43:02 +0800534
zhanghailianga91246c2016-10-27 14:42:59 +0800535 ret = colo_do_checkpoint_transaction(s, bioc, fb);
zhanghailiang4f975582016-10-27 14:42:57 +0800536 if (ret < 0) {
537 goto out;
538 }
539 }
zhanghailiang0b827d52016-10-27 14:42:54 +0800540
zhanghailiang56ba83d2016-10-27 14:42:56 +0800541out:
zhanghailiang4f975582016-10-27 14:42:57 +0800542 /* Throw the unreported error message after exited from loop */
543 if (local_err) {
544 error_report_err(local_err);
545 }
546
zhanghailianga91246c2016-10-27 14:42:59 +0800547 if (fb) {
548 qemu_fclose(fb);
549 }
550
zhanghailiang9ecff6d2018-09-03 12:38:51 +0800551 /*
552 * There are only two reasons we can get here, some error happened
553 * or the user triggered failover.
554 */
555 switch (failover_get_state()) {
556 case FAILOVER_STATUS_NONE:
557 qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
558 COLO_EXIT_REASON_ERROR);
559 break;
560 case FAILOVER_STATUS_REQUIRE:
561 qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
562 COLO_EXIT_REASON_REQUEST);
563 break;
564 default:
565 abort();
566 }
567
zhanghailiangc937b9a2017-01-17 20:57:43 +0800568 /* Hope this not to be too long to wait here */
569 qemu_sem_wait(&s->colo_exit_sem);
570 qemu_sem_destroy(&s->colo_exit_sem);
Zhang Chen131b2152018-09-03 12:38:45 +0800571
572 /*
573 * It is safe to unregister notifier after failover finished.
574 * Besides, colo_delay_timer and colo_checkpoint_sem can't be
575 * released befor unregister notifier, or there will be use-after-free
576 * error.
577 */
578 colo_compare_unregister_notifier(&packets_compare_notifier);
579 timer_del(s->colo_delay_timer);
580 timer_free(s->colo_delay_timer);
581 qemu_sem_destroy(&s->colo_checkpoint_sem);
582
zhanghailiangc937b9a2017-01-17 20:57:43 +0800583 /*
584 * Must be called after failover BH is completed,
585 * Or the failover BH may shutdown the wrong fd that
586 * re-used by other threads after we release here.
587 */
zhanghailiang56ba83d2016-10-27 14:42:56 +0800588 if (s->rp_state.from_dst_file) {
589 qemu_fclose(s->rp_state.from_dst_file);
590 }
zhanghailiang0b827d52016-10-27 14:42:54 +0800591}
592
zhanghailiang479125d2017-01-17 20:57:42 +0800593void colo_checkpoint_notify(void *opaque)
594{
595 MigrationState *s = opaque;
596 int64_t next_notify_time;
597
598 qemu_sem_post(&s->colo_checkpoint_sem);
599 s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
600 next_notify_time = s->colo_checkpoint_time +
601 s->parameters.x_checkpoint_delay;
602 timer_mod(s->colo_delay_timer, next_notify_time);
603}
604
zhanghailiang0b827d52016-10-27 14:42:54 +0800605void migrate_start_colo_process(MigrationState *s)
606{
607 qemu_mutex_unlock_iothread();
zhanghailiang479125d2017-01-17 20:57:42 +0800608 qemu_sem_init(&s->colo_checkpoint_sem, 0);
609 s->colo_delay_timer = timer_new_ms(QEMU_CLOCK_HOST,
610 colo_checkpoint_notify, s);
611
zhanghailiangc937b9a2017-01-17 20:57:43 +0800612 qemu_sem_init(&s->colo_exit_sem, 0);
zhanghailiang0b827d52016-10-27 14:42:54 +0800613 migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
614 MIGRATION_STATUS_COLO);
615 colo_process_checkpoint(s);
616 qemu_mutex_lock_iothread();
617}
zhanghailiang25d0c162016-10-27 14:42:55 +0800618
zhanghailiang4f975582016-10-27 14:42:57 +0800619static void colo_wait_handle_message(QEMUFile *f, int *checkpoint_request,
620 Error **errp)
621{
622 COLOMessage msg;
623 Error *local_err = NULL;
624
625 msg = colo_receive_message(f, &local_err);
626 if (local_err) {
627 error_propagate(errp, local_err);
628 return;
629 }
630
631 switch (msg) {
632 case COLO_MESSAGE_CHECKPOINT_REQUEST:
633 *checkpoint_request = 1;
634 break;
635 default:
636 *checkpoint_request = 0;
637 error_setg(errp, "Got unknown COLO message: %d", msg);
638 break;
639 }
640}
641
zhanghailiang25d0c162016-10-27 14:42:55 +0800642void *colo_process_incoming_thread(void *opaque)
643{
644 MigrationIncomingState *mis = opaque;
zhanghailiang4291d372016-10-27 14:43:00 +0800645 QEMUFile *fb = NULL;
646 QIOChannelBuffer *bioc = NULL; /* Cache incoming device state */
647 uint64_t total_size;
648 uint64_t value;
zhanghailiang4f975582016-10-27 14:42:57 +0800649 Error *local_err = NULL;
Zhang Chen3f6df992018-09-03 12:38:54 +0800650 int ret;
zhanghailiang25d0c162016-10-27 14:42:55 +0800651
Lidong Chen74637e62018-08-06 21:29:29 +0800652 rcu_register_thread();
zhanghailiangc937b9a2017-01-17 20:57:43 +0800653 qemu_sem_init(&mis->colo_incoming_sem, 0);
654
zhanghailiang25d0c162016-10-27 14:42:55 +0800655 migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
656 MIGRATION_STATUS_COLO);
657
zhanghailiangaef06082016-10-27 14:43:04 +0800658 failover_init_state();
659
zhanghailiang56ba83d2016-10-27 14:42:56 +0800660 mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
661 if (!mis->to_src_file) {
662 error_report("COLO incoming thread: Open QEMUFile to_src_file failed");
663 goto out;
664 }
665 /*
666 * Note: the communication between Primary side and Secondary side
667 * should be sequential, we set the fd to unblocked in migration incoming
668 * coroutine, and here we are in the COLO incoming thread, so it is ok to
669 * set the fd back to blocked.
670 */
671 qemu_file_set_blocking(mis->from_src_file, true);
672
zhanghailiang4291d372016-10-27 14:43:00 +0800673 bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
674 fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
675 object_unref(OBJECT(bioc));
676
Zhang Chen131b2152018-09-03 12:38:45 +0800677 qemu_mutex_lock_iothread();
Zhang Chen8e48ac92018-09-03 12:38:46 +0800678 replication_start_all(REPLICATION_MODE_SECONDARY, &local_err);
679 if (local_err) {
680 qemu_mutex_unlock_iothread();
681 goto out;
682 }
Zhang Chen131b2152018-09-03 12:38:45 +0800683 vm_start();
684 trace_colo_vm_state_change("stop", "run");
685 qemu_mutex_unlock_iothread();
686
zhanghailiang4f975582016-10-27 14:42:57 +0800687 colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY,
688 &local_err);
689 if (local_err) {
690 goto out;
691 }
692
693 while (mis->state == MIGRATION_STATUS_COLO) {
Jeff Cody02ba9262016-10-31 17:50:44 -0400694 int request = 0;
zhanghailiang4f975582016-10-27 14:42:57 +0800695
696 colo_wait_handle_message(mis->from_src_file, &request, &local_err);
697 if (local_err) {
698 goto out;
699 }
700 assert(request);
zhanghailiang9d2db372016-10-27 14:43:06 +0800701 if (failover_get_state() != FAILOVER_STATUS_NONE) {
702 error_report("failover request");
703 goto out;
704 }
705
Zhang Chen131b2152018-09-03 12:38:45 +0800706 qemu_mutex_lock_iothread();
707 vm_stop_force_state(RUN_STATE_COLO);
708 trace_colo_vm_state_change("run", "stop");
709 qemu_mutex_unlock_iothread();
710
zhanghailiang4f975582016-10-27 14:42:57 +0800711 /* FIXME: This is unnecessary for periodic checkpoint mode */
712 colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY,
713 &local_err);
714 if (local_err) {
715 goto out;
716 }
717
718 colo_receive_check_message(mis->from_src_file,
719 COLO_MESSAGE_VMSTATE_SEND, &local_err);
720 if (local_err) {
721 goto out;
722 }
723
Zhang Chen3f6df992018-09-03 12:38:54 +0800724 qemu_mutex_lock_iothread();
725 cpu_synchronize_all_pre_loadvm();
726 ret = qemu_loadvm_state_main(mis->from_src_file, mis);
727 qemu_mutex_unlock_iothread();
728
729 if (ret < 0) {
730 error_report("Load VM's live state (ram) error");
731 goto out;
732 }
733
zhanghailiang4291d372016-10-27 14:43:00 +0800734 value = colo_receive_message_value(mis->from_src_file,
735 COLO_MESSAGE_VMSTATE_SIZE, &local_err);
736 if (local_err) {
737 goto out;
738 }
739
740 /*
741 * Read VM device state data into channel buffer,
742 * It's better to re-use the memory allocated.
743 * Here we need to handle the channel buffer directly.
744 */
745 if (value > bioc->capacity) {
746 bioc->capacity = value;
747 bioc->data = g_realloc(bioc->data, bioc->capacity);
748 }
749 total_size = qemu_get_buffer(mis->from_src_file, bioc->data, value);
750 if (total_size != value) {
751 error_report("Got %" PRIu64 " VMState data, less than expected"
752 " %" PRIu64, total_size, value);
753 goto out;
754 }
755 bioc->usage = total_size;
756 qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
zhanghailiang4f975582016-10-27 14:42:57 +0800757
758 colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_RECEIVED,
759 &local_err);
760 if (local_err) {
761 goto out;
762 }
763
zhanghailiang4291d372016-10-27 14:43:00 +0800764 qemu_mutex_lock_iothread();
zhanghailianga8664ba2017-01-17 20:57:44 +0800765 vmstate_loading = true;
Zhang Chen3f6df992018-09-03 12:38:54 +0800766 ret = qemu_load_device_state(fb);
767 if (ret < 0) {
768 error_report("COLO: load device state failed");
zhanghailiang4291d372016-10-27 14:43:00 +0800769 qemu_mutex_unlock_iothread();
770 goto out;
771 }
zhanghailianga8664ba2017-01-17 20:57:44 +0800772
Zhang Chen8e48ac92018-09-03 12:38:46 +0800773 replication_get_error_all(&local_err);
774 if (local_err) {
775 qemu_mutex_unlock_iothread();
776 goto out;
777 }
778 /* discard colo disk buffer */
779 replication_do_checkpoint_all(&local_err);
780 if (local_err) {
781 qemu_mutex_unlock_iothread();
782 goto out;
783 }
784
zhanghailianga8664ba2017-01-17 20:57:44 +0800785 vmstate_loading = false;
Zhang Chen131b2152018-09-03 12:38:45 +0800786 vm_start();
787 trace_colo_vm_state_change("stop", "run");
zhanghailiang4291d372016-10-27 14:43:00 +0800788 qemu_mutex_unlock_iothread();
zhanghailiang4f975582016-10-27 14:42:57 +0800789
zhanghailianga8664ba2017-01-17 20:57:44 +0800790 if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
791 failover_set_state(FAILOVER_STATUS_RELAUNCH,
792 FAILOVER_STATUS_NONE);
793 failover_request_active(NULL);
794 goto out;
795 }
796
zhanghailiang4f975582016-10-27 14:42:57 +0800797 colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED,
798 &local_err);
799 if (local_err) {
800 goto out;
801 }
802 }
zhanghailiang25d0c162016-10-27 14:42:55 +0800803
zhanghailiang56ba83d2016-10-27 14:42:56 +0800804out:
zhanghailianga8664ba2017-01-17 20:57:44 +0800805 vmstate_loading = false;
zhanghailiang4f975582016-10-27 14:42:57 +0800806 /* Throw the unreported error message after exited from loop */
807 if (local_err) {
808 error_report_err(local_err);
809 }
810
zhanghailiang9ecff6d2018-09-03 12:38:51 +0800811 switch (failover_get_state()) {
812 case FAILOVER_STATUS_NONE:
813 qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
814 COLO_EXIT_REASON_ERROR);
815 break;
816 case FAILOVER_STATUS_REQUIRE:
817 qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
818 COLO_EXIT_REASON_REQUEST);
819 break;
820 default:
821 abort();
822 }
823
zhanghailiang4291d372016-10-27 14:43:00 +0800824 if (fb) {
825 qemu_fclose(fb);
826 }
827
zhanghailiangc937b9a2017-01-17 20:57:43 +0800828 /* Hope this not to be too long to loop here */
829 qemu_sem_wait(&mis->colo_incoming_sem);
830 qemu_sem_destroy(&mis->colo_incoming_sem);
831 /* Must be called after failover BH is completed */
zhanghailiang56ba83d2016-10-27 14:42:56 +0800832 if (mis->to_src_file) {
833 qemu_fclose(mis->to_src_file);
834 }
Zhang Chenaad555c2018-09-03 12:38:47 +0800835 migration_incoming_disable_colo();
zhanghailiang25d0c162016-10-27 14:42:55 +0800836
Lidong Chen74637e62018-08-06 21:29:29 +0800837 rcu_unregister_thread();
zhanghailiang25d0c162016-10-27 14:42:55 +0800838 return NULL;
839}